Appendix

K-Fold code

crossdata <- read_dta("C:/Users/buste/OneDrive/Desktop/Modeling/analysis1.dta")  %>%
  dplyr::select(subjid,hba1c,age,totchol,htn,bmi,fpg,ecghr,nbmedhhincome) %>%
  na.omit()
plot1 <- ggplot(crossdata, aes(bmi)) +
  geom_histogram(aes(fill = as.factor(htn)), color = "black", binwidth = 2)

plot2 <- ggplot(crossdata, aes(age)) +
  geom_histogram(aes(fill = as.factor(htn)), color = "black", binwidth = 2)

plot3 <- ggplot(crossdata, aes(ecghr)) +
  geom_histogram(aes(fill = as.factor(htn)), color = "black", binwidth = 2)

plot1 + theme(legend.position="bottom")
plot2 + theme(legend.position="bottom")
plot3 + theme(legend.position="bottom")

#grid.arrange(grobs= list(plot1, plot2, plot3),
#             ncol=2, nrow=2,
#             top = ("Histograms"))
## This sets the cross-validation method with k=5 folds
method <- trainControl(method = "cv", number = 5)

## #fit a regression model and use k-fold CV to evaluate performance
crossmodelfull <- train(as.factor(htn) ~ age + bmi + ecghr,
                        data = crossdata,
                        method = "glm",
                        trControl = method)

crossmodel1 <- train(as.factor(htn) ~ age + bmi,
                        data = crossdata,
                        method = "glm",
                        trControl = method)

crossmodel2 <- train(as.factor(htn) ~ age + ecghr,
                        data = crossdata,
                        method = "glm",
                        trControl = method)

crossmodel3 <- train(as.factor(htn) ~ bmi + ecghr,
                        data = crossdata,
                        method = "glm",
                        trControl = method)
print(crossmodelfull)

print(crossmodel1)

print(crossmodel2)

print(crossmodel3)
crossmodelfull$resample
crossmodelfull$finalModel

Holdout code

crossdata <- read_dta("C:/Users/buste/OneDrive/Desktop/Modeling/analysis1.dta")  %>%
  dplyr::select(subjid,hba1c,age,totchol,htn,bmi,fpg,ecghr,nbmedhhincome) %>%
  na.omit()
## This sets the cross-validation method to holdout with an 80/20 split
random_sample <- createDataPartition(crossdata $ htn,
                                p = 0.8, list = FALSE)

# generating training dataset from the random_sample
train  <- crossdata[random_sample, ]
 
# generating testing dataset from rows which are not included in the random_sample
test <- crossdata[-random_sample, ]
dim(train)

dim(test)
holdoutmodel1 <- glm(as.factor(htn) ~ age + bmi + ecghr, data = train, family = binomial)

summary(holdoutmodel1)
holdoutmodel2 <- glm(as.factor(htn) ~ age + bmi, data = train, family = binomial)
train$residuals <- residuals(holdoutmodel2)
train$predicted <- predict(holdoutmodel2,train)
rmse_train <- sqrt(mean(train$residuals ** 2))

# confusion Matrix
# $Misclassification error -Training data
pre1<-ifelse(train$predicted > 0.5, 1, 0)
pretable<-table(Prediction = pre1,
            Actual = train$htn)
pretable
1 - sum(diag(pretable)) / sum(pretable)

test$predicted <- predict(holdoutmodel2,test)
test$residuals <- test$htn - test$predicted
rmse_test <- sqrt(mean(test$residuals ** 2))

# confusion Matrix
# $Misclassification error -Testing data
post1<-ifelse(test$predicted > 0.5, 1, 0)
posttable<-table(Prediction = post1,
            Actual = test$htn)
posttable
1 - sum(diag(posttable)) / sum(posttable)

LOOCV code

## Load the pakages in this order.
library(lattice)
library(ggplot2)
library(caret)
library(tidyverse)
library(haven)
crossdata <- read_dta("C:/Users/buste/OneDrive/Desktop/Modeling/analysis1.dta")  %>%
  select(ecghr,age,bmi,htn) %>%
    na.omit()
## Sets method of cross-validation to  use leave-one-out
method <- trainControl(method = "LOOCV")


## Example model created to demonstrate leave-one-out
crossmodelfull <- train(as.factor(htn) ~ age + bmi + ecghr,
                        data = crossdata,
                        method = "glm",
                        trControl = method)

crossmodel1 <- train(as.factor(htn) ~ age + bmi,
                     data = crossdata,
                     method = "glm",
                     trControl = method)

crossmodel2 <- train(as.factor(htn) ~ age + ecghr,
                     data = crossdata,
                     method = "glm",
                     trControl = method)

crossmodel3 <- train(as.factor(htn) ~ bmi + ecghr,
                     data = crossdata,
                     method = "glm",
                     trControl = method)
print(crossmodelfull)

print(crossmodel1)

print(crossmodel2)

print(crossmodel3)
crossmodelfull$finalModel