<- read_dta("C:/Users/buste/OneDrive/Desktop/Modeling/analysis1.dta") %>%
crossdata ::select(subjid,hba1c,age,totchol,htn,bmi,fpg,ecghr,nbmedhhincome) %>%
dplyrna.omit()
Appendix
K-Fold code
<- ggplot(crossdata, aes(bmi)) +
plot1 geom_histogram(aes(fill = as.factor(htn)), color = "black", binwidth = 2)
<- ggplot(crossdata, aes(age)) +
plot2 geom_histogram(aes(fill = as.factor(htn)), color = "black", binwidth = 2)
<- ggplot(crossdata, aes(ecghr)) +
plot3 geom_histogram(aes(fill = as.factor(htn)), color = "black", binwidth = 2)
+ theme(legend.position="bottom")
plot1 + theme(legend.position="bottom")
plot2 + theme(legend.position="bottom")
plot3
#grid.arrange(grobs= list(plot1, plot2, plot3),
# ncol=2, nrow=2,
# top = ("Histograms"))
## This sets the cross-validation method with k=5 folds
<- trainControl(method = "cv", number = 5)
method
## #fit a regression model and use k-fold CV to evaluate performance
<- train(as.factor(htn) ~ age + bmi + ecghr,
crossmodelfull data = crossdata,
method = "glm",
trControl = method)
<- train(as.factor(htn) ~ age + bmi,
crossmodel1 data = crossdata,
method = "glm",
trControl = method)
<- train(as.factor(htn) ~ age + ecghr,
crossmodel2 data = crossdata,
method = "glm",
trControl = method)
<- train(as.factor(htn) ~ bmi + ecghr,
crossmodel3 data = crossdata,
method = "glm",
trControl = method)
print(crossmodelfull)
print(crossmodel1)
print(crossmodel2)
print(crossmodel3)
$resample crossmodelfull
$finalModel crossmodelfull
Holdout code
<- read_dta("C:/Users/buste/OneDrive/Desktop/Modeling/analysis1.dta") %>%
crossdata ::select(subjid,hba1c,age,totchol,htn,bmi,fpg,ecghr,nbmedhhincome) %>%
dplyrna.omit()
## This sets the cross-validation method to holdout with an 80/20 split
<- createDataPartition(crossdata $ htn,
random_sample p = 0.8, list = FALSE)
# generating training dataset from the random_sample
<- crossdata[random_sample, ]
train
# generating testing dataset from rows which are not included in the random_sample
<- crossdata[-random_sample, ] test
dim(train)
dim(test)
<- glm(as.factor(htn) ~ age + bmi + ecghr, data = train, family = binomial)
holdoutmodel1
summary(holdoutmodel1)
<- glm(as.factor(htn) ~ age + bmi, data = train, family = binomial) holdoutmodel2
$residuals <- residuals(holdoutmodel2)
train$predicted <- predict(holdoutmodel2,train)
train<- sqrt(mean(train$residuals ** 2))
rmse_train
# confusion Matrix
# $Misclassification error -Training data
<-ifelse(train$predicted > 0.5, 1, 0)
pre1<-table(Prediction = pre1,
pretableActual = train$htn)
pretable1 - sum(diag(pretable)) / sum(pretable)
$predicted <- predict(holdoutmodel2,test)
test$residuals <- test$htn - test$predicted
test<- sqrt(mean(test$residuals ** 2))
rmse_test
# confusion Matrix
# $Misclassification error -Testing data
<-ifelse(test$predicted > 0.5, 1, 0)
post1<-table(Prediction = post1,
posttableActual = test$htn)
posttable1 - sum(diag(posttable)) / sum(posttable)
LOOCV code
## Load the pakages in this order.
library(lattice)
library(ggplot2)
library(caret)
library(tidyverse)
library(haven)
<- read_dta("C:/Users/buste/OneDrive/Desktop/Modeling/analysis1.dta") %>%
crossdata select(ecghr,age,bmi,htn) %>%
na.omit()
## Sets method of cross-validation to use leave-one-out
<- trainControl(method = "LOOCV")
method
## Example model created to demonstrate leave-one-out
<- train(as.factor(htn) ~ age + bmi + ecghr,
crossmodelfull data = crossdata,
method = "glm",
trControl = method)
<- train(as.factor(htn) ~ age + bmi,
crossmodel1 data = crossdata,
method = "glm",
trControl = method)
<- train(as.factor(htn) ~ age + ecghr,
crossmodel2 data = crossdata,
method = "glm",
trControl = method)
<- train(as.factor(htn) ~ bmi + ecghr,
crossmodel3 data = crossdata,
method = "glm",
trControl = method)
print(crossmodelfull)
print(crossmodel1)
print(crossmodel2)
print(crossmodel3)
$finalModel crossmodelfull