crossdata <- read_dta("C:/Users/buste/OneDrive/Desktop/Modeling/analysis1.dta") %>%
dplyr::select(subjid,hba1c,age,totchol,htn,bmi,fpg,ecghr,nbmedhhincome) %>%
na.omit()Appendix
K-Fold code
plot1 <- ggplot(crossdata, aes(bmi)) +
geom_histogram(aes(fill = as.factor(htn)), color = "black", binwidth = 2)
plot2 <- ggplot(crossdata, aes(age)) +
geom_histogram(aes(fill = as.factor(htn)), color = "black", binwidth = 2)
plot3 <- ggplot(crossdata, aes(ecghr)) +
geom_histogram(aes(fill = as.factor(htn)), color = "black", binwidth = 2)
plot1 + theme(legend.position="bottom")
plot2 + theme(legend.position="bottom")
plot3 + theme(legend.position="bottom")
#grid.arrange(grobs= list(plot1, plot2, plot3),
# ncol=2, nrow=2,
# top = ("Histograms"))## This sets the cross-validation method with k=5 folds
method <- trainControl(method = "cv", number = 5)
## #fit a regression model and use k-fold CV to evaluate performance
crossmodelfull <- train(as.factor(htn) ~ age + bmi + ecghr,
data = crossdata,
method = "glm",
trControl = method)
crossmodel1 <- train(as.factor(htn) ~ age + bmi,
data = crossdata,
method = "glm",
trControl = method)
crossmodel2 <- train(as.factor(htn) ~ age + ecghr,
data = crossdata,
method = "glm",
trControl = method)
crossmodel3 <- train(as.factor(htn) ~ bmi + ecghr,
data = crossdata,
method = "glm",
trControl = method)print(crossmodelfull)
print(crossmodel1)
print(crossmodel2)
print(crossmodel3)crossmodelfull$resamplecrossmodelfull$finalModelHoldout code
crossdata <- read_dta("C:/Users/buste/OneDrive/Desktop/Modeling/analysis1.dta") %>%
dplyr::select(subjid,hba1c,age,totchol,htn,bmi,fpg,ecghr,nbmedhhincome) %>%
na.omit()## This sets the cross-validation method to holdout with an 80/20 split
random_sample <- createDataPartition(crossdata $ htn,
p = 0.8, list = FALSE)
# generating training dataset from the random_sample
train <- crossdata[random_sample, ]
# generating testing dataset from rows which are not included in the random_sample
test <- crossdata[-random_sample, ]dim(train)
dim(test)holdoutmodel1 <- glm(as.factor(htn) ~ age + bmi + ecghr, data = train, family = binomial)
summary(holdoutmodel1)holdoutmodel2 <- glm(as.factor(htn) ~ age + bmi, data = train, family = binomial)train$residuals <- residuals(holdoutmodel2)
train$predicted <- predict(holdoutmodel2,train)
rmse_train <- sqrt(mean(train$residuals ** 2))
# confusion Matrix
# $Misclassification error -Training data
pre1<-ifelse(train$predicted > 0.5, 1, 0)
pretable<-table(Prediction = pre1,
Actual = train$htn)
pretable
1 - sum(diag(pretable)) / sum(pretable)
test$predicted <- predict(holdoutmodel2,test)
test$residuals <- test$htn - test$predicted
rmse_test <- sqrt(mean(test$residuals ** 2))
# confusion Matrix
# $Misclassification error -Testing data
post1<-ifelse(test$predicted > 0.5, 1, 0)
posttable<-table(Prediction = post1,
Actual = test$htn)
posttable
1 - sum(diag(posttable)) / sum(posttable)LOOCV code
## Load the pakages in this order.
library(lattice)
library(ggplot2)
library(caret)
library(tidyverse)
library(haven)crossdata <- read_dta("C:/Users/buste/OneDrive/Desktop/Modeling/analysis1.dta") %>%
select(ecghr,age,bmi,htn) %>%
na.omit()
## Sets method of cross-validation to use leave-one-out
method <- trainControl(method = "LOOCV")
## Example model created to demonstrate leave-one-out
crossmodelfull <- train(as.factor(htn) ~ age + bmi + ecghr,
data = crossdata,
method = "glm",
trControl = method)
crossmodel1 <- train(as.factor(htn) ~ age + bmi,
data = crossdata,
method = "glm",
trControl = method)
crossmodel2 <- train(as.factor(htn) ~ age + ecghr,
data = crossdata,
method = "glm",
trControl = method)
crossmodel3 <- train(as.factor(htn) ~ bmi + ecghr,
data = crossdata,
method = "glm",
trControl = method)print(crossmodelfull)
print(crossmodel1)
print(crossmodel2)
print(crossmodel3)crossmodelfull$finalModel