Heart Disease Prediction Model
Heart Disease Prediction Model
Enock Bereka
2024-12-14
#Load the required library and the dataset
view(heart)
#Data Exploration
head(heart)
## # A tibble: 6 × 14
## age sex cp trestbps chol fbs restecg thalach exang oldpeak
slope
## <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
<dbl>
## 1 52 1 0 125 212 0 1 168 0 1
2
## 2 53 1 0 140 203 1 0 155 1 3.1
0
## 3 70 1 0 145 174 0 1 125 1 2.6
0
## 4 61 1 0 148 203 0 1 161 0 0
2
## 5 62 0 0 138 294 1 1 106 0 1.9
1
## 6 58 0 0 100 248 0 0 122 0 1
1
## # ℹ 3 more variables: ca <dbl>, thal <dbl>, target <dbl>
glimpse(heart)
## Rows: 1,025
## Columns: 14
## $ age <dbl> 52, 53, 70, 61, 62, 58, 58, 55, 46, 54, 71, 43, 34, 51,
52, 3…
## $ sex <dbl> 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 0, 0, 0, 1, 1, 0, 0, 1, 0,
1, 1…
## $ cp <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 2, 0, 1,
2, 2…
## $ trestbps <dbl> 125, 140, 145, 148, 138, 100, 114, 160, 120, 122, 112,
132, 1…
## $ chol <dbl> 212, 203, 174, 203, 294, 248, 318, 289, 249, 286, 149,
341, 2…
## $ fbs <dbl> 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0,
1, 0…
## $ restecg <dbl> 1, 0, 1, 1, 1, 0, 2, 0, 0, 0, 1, 0, 1, 1, 1, 1, 0, 0, 1,
0, 0…
## $ thalach <dbl> 168, 155, 125, 161, 106, 122, 140, 145, 144, 116, 125,
136, 1…
## $ exang <dbl> 0, 1, 1, 0, 0, 0, 0, 1, 0, 1, 0, 1, 0, 1, 1, 0, 0, 1, 0,
0, 0…
## $ oldpeak <dbl> 1.0, 3.1, 2.6, 0.0, 1.9, 1.0, 4.4, 0.8, 0.8, 3.2, 1.6,
3.0, 0…
## $ slope <dbl> 2, 0, 0, 2, 1, 1, 0, 1, 2, 1, 1, 1, 2, 1, 1, 2, 2, 1, 2,
2, 1…
## $ ca <dbl> 2, 0, 0, 1, 3, 0, 3, 1, 0, 2, 0, 0, 0, 3, 0, 0, 1, 1, 0,
0, 0…
## $ thal <dbl> 3, 3, 3, 3, 2, 2, 1, 3, 3, 2, 2, 3, 2, 3, 0, 2, 2, 3, 2,
2, 2…
## $ target <dbl> 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 1, 0, 0, 1, 1, 0, 1,
1, 0…
anyNA(heart)
## [1] FALSE
## .
## FALSE TRUE
## 302 723
cr <- round(cor(heart), 2)
##
## Call:
## glm(formula = target ~ ., family = binomial, data = training)
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 0.295200 4.353818 0.068 0.945943
## age 0.016480 0.030258 0.545 0.585989
## sex1 -2.624782 0.747849 -3.510 0.000448 ***
## cp1 1.306173 0.714180 1.829 0.067413 .
## cp2 2.596896 0.671185 3.869 0.000109 ***
## cp3 2.920285 0.861528 3.390 0.000700 ***
## trestbps -0.035263 0.013776 -2.560 0.010474 *
## chol -0.006496 0.005081 -1.278 0.201115
## fbs1 1.341979 0.831161 1.615 0.106401
## restecg1 0.074422 0.478785 0.155 0.876474
## restecg2 -0.243861 3.353627 -0.073 0.942032
## thalach 0.033606 0.017009 1.976 0.048174 *
## exang1 -0.619649 0.545508 -1.136 0.255993
## oldpeak -0.409794 0.267555 -1.532 0.125615
## slope1 -0.893167 1.000572 -0.893 0.372041
## slope2 0.861472 1.072122 0.804 0.421674
## ca1 -2.077715 0.649907 -3.197 0.001389 **
## ca2 -3.701530 0.989076 -3.742 0.000182 ***
## ca3 -1.845235 1.123495 -1.642 0.100506
## ca4 -1.084200 3.504617 -0.309 0.757045
## thal1 3.856076 2.968733 1.299 0.193980
## thal2 2.734698 2.856244 0.957 0.338342
## thal3 1.295089 2.852677 0.454 0.649835
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 332.26 on 240 degrees of freedom
## Residual deviance: 126.82 on 218 degrees of freedom
## AIC: 172.82
##
## Number of Fisher Scoring iterations: 7
#Model optimization
model <- glm(target~.-age, training, family = binomial)
summary(model)
##
## Call:
## glm(formula = target ~ . - age, family = binomial, data = training)
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 1.325665 4.011251 0.330 0.741032
## sex1 -2.645056 0.744208 -3.554 0.000379 ***
## cp1 1.314534 0.712747 1.844 0.065136 .
## cp2 2.614185 0.672591 3.887 0.000102 ***
## cp3 2.879652 0.851004 3.384 0.000715 ***
## trestbps -0.033395 0.013231 -2.524 0.011602 *
## chol -0.006145 0.004981 -1.234 0.217310
## fbs1 1.315863 0.823644 1.598 0.110129
## restecg1 0.072474 0.479066 0.151 0.879754
## restecg2 -0.203098 3.107969 -0.065 0.947897
## thalach 0.030176 0.015683 1.924 0.054334 .
## exang1 -0.628631 0.543178 -1.157 0.247142
## oldpeak -0.423506 0.266105 -1.591 0.111497
## slope1 -0.879646 1.000413 -0.879 0.379248
## slope2 0.874920 1.071971 0.816 0.414398
## ca1 -2.029203 0.643416 -3.154 0.001612 **
## ca2 -3.555466 0.944992 -3.762 0.000168 ***
## ca3 -1.760305 1.124240 -1.566 0.117402
## ca4 -1.173104 3.359677 -0.349 0.726960
## thal1 3.911792 3.091294 1.265 0.205720
## thal2 2.791222 2.982312 0.936 0.349311
## thal3 1.356410 2.977998 0.455 0.648766
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 332.26 on 240 degrees of freedom
## Residual deviance: 127.12 on 219 degrees of freedom
## AIC: 171.12
##
## Number of Fisher Scoring iterations: 7
##
## Call:
## glm(formula = target ~ . - chol, family = binomial, data = training)
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -0.44404 4.24888 -0.105 0.916766
## age 0.01272 0.02959 0.430 0.667287
## sex1 -2.41489 0.71785 -3.364 0.000768 ***
## cp1 1.22038 0.70464 1.732 0.083288 .
## cp2 2.58580 0.66586 3.883 0.000103 ***
## cp3 2.87907 0.85219 3.378 0.000729 ***
## trestbps -0.03508 0.01383 -2.537 0.011182 *
## fbs1 1.31623 0.81371 1.618 0.105757
## restecg1 0.19375 0.46605 0.416 0.677608
## restecg2 -0.06578 3.01838 -0.022 0.982614
## thalach 0.02989 0.01624 1.840 0.065696 .
## exang1 -0.63343 0.53583 -1.182 0.237144
## oldpeak -0.42392 0.26841 -1.579 0.114249
## slope1 -0.97229 1.00300 -0.969 0.332354
## slope2 0.78275 1.07296 0.730 0.465681
## ca1 -2.09718 0.65763 -3.189 0.001428 **
## ca2 -3.62163 0.98329 -3.683 0.000230 ***
## ca3 -1.83598 1.10052 -1.668 0.095261 .
## ca4 -1.08181 3.47259 -0.312 0.755400
## thal1 3.75233 2.93266 1.279 0.200723
## thal2 2.56257 2.81508 0.910 0.362663
## thal3 1.02733 2.80375 0.366 0.714057
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 332.26 on 240 degrees of freedom
## Residual deviance: 128.37 on 219 degrees of freedom
## AIC: 172.37
##
## Number of Fisher Scoring iterations: 7
##
## Call:
## glm(formula = target ~ . - fbs, family = binomial, data = training)
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 0.707565 4.920003 0.144 0.885647
## age 0.014721 0.030413 0.484 0.628355
## sex1 -2.572096 0.747487 -3.441 0.000580 ***
## cp1 1.209278 0.694549 1.741 0.081666 .
## cp2 2.629193 0.654316 4.018 5.86e-05 ***
## cp3 3.003895 0.864801 3.474 0.000514 ***
## trestbps -0.032134 0.013221 -2.431 0.015075 *
## chol -0.006386 0.004972 -1.284 0.199035
## restecg1 0.125134 0.474661 0.264 0.792066
## restecg2 -0.301995 3.265169 -0.092 0.926309
## thalach 0.033734 0.016897 1.996 0.045886 *
## exang1 -0.651458 0.544731 -1.196 0.231725
## oldpeak -0.436764 0.266089 -1.641 0.100710
## slope1 -0.968080 1.006197 -0.962 0.335990
## slope2 0.679868 1.077629 0.631 0.528111
## ca1 -1.906226 0.618536 -3.082 0.002057 **
## ca2 -3.514953 0.973282 -3.611 0.000304 ***
## ca3 -1.524615 1.046876 -1.456 0.145297
## ca4 -0.522147 4.651809 -0.112 0.910628
## thal1 3.303207 3.755174 0.880 0.379054
## thal2 2.051047 3.663348 0.560 0.575559
## thal3 0.705558 3.670620 0.192 0.847572
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 332.26 on 240 degrees of freedom
## Residual deviance: 129.54 on 219 degrees of freedom
## AIC: 173.54
##
## Number of Fisher Scoring iterations: 7
##
## Call:
## glm(formula = target ~ . - restecg, family = binomial, data = training)
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 0.363025 4.353294 0.083 0.933541
## age 0.016401 0.030232 0.543 0.587459
## sex1 -2.635390 0.740914 -3.557 0.000375 ***
## cp1 1.315717 0.712078 1.848 0.064644 .
## cp2 2.597341 0.671275 3.869 0.000109 ***
## cp3 2.924994 0.859950 3.401 0.000671 ***
## trestbps -0.035573 0.013639 -2.608 0.009104 **
## chol -0.006644 0.004961 -1.339 0.180501
## fbs1 1.346928 0.827585 1.628 0.103622
## thalach 0.033933 0.016907 2.007 0.044748 *
## exang1 -0.621461 0.545736 -1.139 0.254804
## oldpeak -0.410861 0.265914 -1.545 0.122325
## slope1 -0.893861 0.996916 -0.897 0.369919
## slope2 0.870259 1.066995 0.816 0.414719
## ca1 -2.085554 0.647452 -3.221 0.001277 **
## ca2 -3.706346 0.986647 -3.757 0.000172 ***
## ca3 -1.848735 1.127038 -1.640 0.100933
## ca4 -1.088224 3.450364 -0.315 0.752463
## thal1 3.878334 3.008006 1.289 0.197281
## thal2 2.736131 2.900255 0.943 0.345471
## thal3 1.308902 2.894590 0.452 0.651133
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 332.26 on 240 degrees of freedom
## Residual deviance: 126.85 on 220 degrees of freedom
## AIC: 168.85
##
## Number of Fisher Scoring iterations: 7
##
## Call:
## glm(formula = target ~ . - exang, family = binomial, data = training)
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -0.451644 4.102964 -0.110 0.912348
## age 0.017589 0.029896 0.588 0.556301
## sex1 -2.566693 0.733236 -3.501 0.000464 ***
## cp1 1.525123 0.688089 2.216 0.026660 *
## cp2 2.853317 0.637938 4.473 7.72e-06 ***
## cp3 3.060928 0.857519 3.570 0.000358 ***
## trestbps -0.035481 0.013831 -2.565 0.010308 *
## chol -0.006545 0.004967 -1.318 0.187579
## fbs1 1.348230 0.813749 1.657 0.097557 .
## restecg1 0.083141 0.476601 0.174 0.861516
## restecg2 -0.215391 3.169351 -0.068 0.945817
## thalach 0.035485 0.016760 2.117 0.034239 *
## oldpeak -0.445472 0.267789 -1.664 0.096208 .
## slope1 -0.892251 0.989585 -0.902 0.367248
## slope2 0.892124 1.061350 0.841 0.400597
## ca1 -2.087751 0.649074 -3.217 0.001298 **
## ca2 -3.598974 0.974396 -3.694 0.000221 ***
## ca3 -1.855011 1.117170 -1.660 0.096823 .
## ca4 -1.238432 3.346158 -0.370 0.711304
## thal1 3.987878 2.734565 1.458 0.144752
## thal2 2.843159 2.615743 1.087 0.277063
## thal3 1.344503 2.609336 0.515 0.606367
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 332.26 on 240 degrees of freedom
## Residual deviance: 128.10 on 219 degrees of freedom
## AIC: 172.1
##
## Number of Fisher Scoring iterations: 7
##
## Call:
## glm(formula = target ~ . - oldpeak, family = binomial, data = training)
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -0.381717 4.232311 -0.090 0.928135
## age 0.021295 0.030209 0.705 0.480861
## sex1 -2.711512 0.727086 -3.729 0.000192 ***
## cp1 1.406083 0.718866 1.956 0.050468 .
## cp2 2.398675 0.638389 3.757 0.000172 ***
## cp3 2.734715 0.858382 3.186 0.001443 **
## trestbps -0.036830 0.013751 -2.678 0.007401 **
## chol -0.006730 0.005041 -1.335 0.181857
## fbs1 1.367347 0.790969 1.729 0.083863 .
## restecg1 0.082123 0.473702 0.173 0.862365
## restecg2 -0.600034 2.904674 -0.207 0.836342
## thalach 0.034541 0.016853 2.049 0.040415 *
## exang1 -0.708292 0.538294 -1.316 0.188239
## slope1 -0.507411 0.901779 -0.563 0.573654
## slope2 1.501021 0.926555 1.620 0.105232
## ca1 -2.010400 0.639365 -3.144 0.001664 **
## ca2 -3.909845 0.959312 -4.076 4.59e-05 ***
## ca3 -1.944451 1.082769 -1.796 0.072524 .
## ca4 -0.666123 3.643758 -0.183 0.854945
## thal1 3.552278 2.822071 1.259 0.208121
## thal2 2.589238 2.710921 0.955 0.339520
## thal3 1.103418 2.708624 0.407 0.683735
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 332.26 on 240 degrees of freedom
## Residual deviance: 129.31 on 219 degrees of freedom
## AIC: 173.31
##
## Number of Fisher Scoring iterations: 7
#oldpeak should not be removed since it increases aic after removal
##
## Call:
## glm(formula = target ~ . - slope, family = binomial, data = training)
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -0.742790 3.584117 -0.207 0.835818
## age 0.015372 0.030434 0.505 0.613505
## sex1 -2.211146 0.676249 -3.270 0.001077 **
## cp1 1.347198 0.686410 1.963 0.049684 *
## cp2 2.397706 0.625744 3.832 0.000127 ***
## cp3 2.655163 0.806184 3.293 0.000990 ***
## trestbps -0.033253 0.013236 -2.512 0.011994 *
## chol -0.006654 0.004919 -1.353 0.176135
## fbs1 1.030259 0.764565 1.348 0.177816
## restecg1 0.186206 0.464385 0.401 0.688440
## restecg2 -0.357638 3.282078 -0.109 0.913229
## thalach 0.040990 0.015769 2.599 0.009337 **
## exang1 -0.695550 0.532766 -1.306 0.191708
## oldpeak -0.636878 0.253941 -2.508 0.012143 *
## ca1 -1.617110 0.584633 -2.766 0.005674 **
## ca2 -2.934552 0.873648 -3.359 0.000782 ***
## ca3 -1.492465 0.994232 -1.501 0.133324
## ca4 -1.135837 2.763365 -0.411 0.681048
## thal1 3.130701 2.100989 1.490 0.136196
## thal2 2.371865 1.949207 1.217 0.223667
## thal3 0.809129 1.946088 0.416 0.677577
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 332.26 on 240 degrees of freedom
## Residual deviance: 136.30 on 220 degrees of freedom
## AIC: 178.3
##
## Number of Fisher Scoring iterations: 6
##
## Call:
## glm(formula = target ~ . - thal, family = binomial, data = training)
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 4.609153 3.473813 1.327 0.184566
## age 0.012875 0.029076 0.443 0.657895
## sex1 -2.873978 0.637989 -4.505 6.65e-06 ***
## cp1 1.608956 0.698737 2.303 0.021298 *
## cp2 2.507789 0.615335 4.075 4.59e-05 ***
## cp3 2.777243 0.775936 3.579 0.000345 ***
## trestbps -0.032936 0.013123 -2.510 0.012077 *
## chol -0.009421 0.004888 -1.928 0.053912 .
## fbs1 1.075935 0.724441 1.485 0.137492
## restecg1 -0.054437 0.450792 -0.121 0.903883
## restecg2 -0.729176 2.797157 -0.261 0.794336
## thalach 0.026316 0.014890 1.767 0.077164 .
## exang1 -0.868600 0.527297 -1.647 0.099503 .
## oldpeak -0.392701 0.244089 -1.609 0.107651
## slope1 -0.978026 0.932975 -1.048 0.294506
## slope2 0.769392 1.031594 0.746 0.455771
## ca1 -1.942310 0.589225 -3.296 0.000979 ***
## ca2 -3.318062 0.894370 -3.710 0.000207 ***
## ca3 -2.177570 1.060378 -2.054 0.040016 *
## ca4 -0.998000 2.652173 -0.376 0.706698
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 332.26 on 240 degrees of freedom
## Residual deviance: 139.70 on 221 degrees of freedom
## AIC: 179.7
##
## Number of Fisher Scoring iterations: 6
##
## Call:
## glm(formula = target ~ . - age - chol - restecg - exang, family =
binomial,
## data = training)
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -0.17499 3.65481 -0.048 0.961812
## sex1 -2.39086 0.69210 -3.454 0.000551 ***
## cp1 1.46593 0.67782 2.163 0.030562 *
## cp2 2.85714 0.63490 4.500 6.79e-06 ***
## cp3 2.98425 0.83328 3.581 0.000342 ***
## trestbps -0.03439 0.01311 -2.623 0.008729 **
## fbs1 1.30984 0.78043 1.678 0.093277 .
## thalach 0.02941 0.01465 2.008 0.044618 *
## oldpeak -0.47145 0.26303 -1.792 0.073080 .
## slope1 -0.96292 0.97900 -0.984 0.325326
## slope2 0.83988 1.05150 0.799 0.424442
## ca1 -2.08207 0.64692 -3.218 0.001289 **
## ca2 -3.38838 0.91279 -3.712 0.000206 ***
## ca3 -1.80585 1.09189 -1.654 0.098153 .
## ca4 -1.31700 3.06447 -0.430 0.667367
## thal1 3.97623 2.87657 1.382 0.166886
## thal2 2.73084 2.76286 0.988 0.322950
## thal3 1.14514 2.74812 0.417 0.676900
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 332.26 on 240 degrees of freedom
## Residual deviance: 130.19 on 223 degrees of freedom
## AIC: 166.19
##
## Number of Fisher Scoring iterations: 6
library(performance)
#Check outliers
check_outliers(model)
#Check autocorrelation
check_autocorrelation(model)
##
## $sex
## # Predicted probabilities of target
##
## sex | Predicted | 95% CI
## ----------------------------
## 0 | 0.86 | 0.68, 0.95
## 1 | 0.37 | 0.24, 0.52
##
##
## $cp
## # Predicted probabilities of target
##
## cp | Predicted | 95% CI
## ---------------------------
## 0 | 0.25 | 0.15, 0.40
## 1 | 0.60 | 0.31, 0.83
## 2 | 0.86 | 0.69, 0.94
## 3 | 0.87 | 0.62, 0.96
##
##
## $trestbps
## # Predicted probabilities of target
##
## trestbps | Predicted | 95% CI
## ---------------------------------
## 94 | 0.82 | 0.62, 0.93
## 107 | 0.75 | 0.58, 0.87
## 121 | 0.65 | 0.52, 0.76
## 134 | 0.54 | 0.42, 0.65
## 147 | 0.43 | 0.29, 0.58
## 160 | 0.33 | 0.17, 0.54
## 173 | 0.24 | 0.09, 0.50
## 200 | 0.11 | 0.02, 0.43
##
## $fbs
## # Predicted probabilities of target
##
## fbs | Predicted | 95% CI
## ----------------------------
## 0 | 0.51 | 0.39, 0.63
## 1 | 0.80 | 0.48, 0.94
##
## $thalach
## # Predicted probabilities of target
##
## thalach | Predicted | 95% CI
## --------------------------------
## 70 | 0.11 | 0.01, 0.57
## 85 | 0.16 | 0.03, 0.57
## 105 | 0.25 | 0.08, 0.58
## 120 | 0.35 | 0.16, 0.59
## 135 | 0.45 | 0.30, 0.61
## 155 | 0.60 | 0.48, 0.70
## 170 | 0.70 | 0.53, 0.82
## 205 | 0.87 | 0.56, 0.97
## $oldpeak
## # Predicted probabilities of target
##
## oldpeak | Predicted | 95% CI
## --------------------------------
## 0.00 | 0.67 | 0.51, 0.80
## 0.50 | 0.62 | 0.49, 0.73
## 1.50 | 0.50 | 0.37, 0.64
## 2.50 | 0.39 | 0.20, 0.62
## 3.00 | 0.33 | 0.14, 0.62
## 3.50 | 0.28 | 0.09, 0.62
## 4.50 | 0.20 | 0.04, 0.62
## 6.00 | 0.11 | 0.01, 0.63
##
## $slope
## # Predicted probabilities of target
##
## slope | Predicted | 95% CI
## ------------------------------
## 0 | 0.57 | 0.17, 0.89
## 1 | 0.33 | 0.19, 0.51
## 2 | 0.75 | 0.58, 0.87
##
##
## $ca
## # Predicted probabilities of target
##
## ca | Predicted | 95% CI
## ---------------------------
## 0 | 0.78 | 0.65, 0.87
## 1 | 0.30 | 0.13, 0.55
## 2 | 0.11 | 0.02, 0.37
## 3 | 0.36 | 0.07, 0.81
## 4 | 0.48 | 0.00, 1.00
##
##
## $thal
## # Predicted probabilities of target
##
## thal | Predicted | 95% CI
## -----------------------------
## 0 | 0.13 | 0.00, 0.97
## 1 | 0.89 | 0.59, 0.98
## 2 | 0.69 | 0.54, 0.81
## 3 | 0.31 | 0.18, 0.50
##
## ## attr(,"class")
## [1] "ggalleffects" "list"
## attr(,"model.name")
## [1] "model"
##
## 0 1
## 0 116 22
## 1 141 23
##
## 0 1 2
## 0 12 91 35
## 1 9 49 106
##
## 0 1 2 3 4
## 0 45 44 31 17 1
## 1 130 21 7 3 3
##
## 0 1 2 3
## 0 1 12 36 89
## 1 1 6 129 28
car::Anova(model)
extract_eq(model)
𝑃(target = 1)
log [ ]
1 − 𝑃(target = 1)
= 𝛼 + 𝛽1 (sex1 ) + 𝛽2 (cp1 ) + 𝛽3 (cp2 ) + 𝛽4 (cp3 ) + 𝛽5 (trestbps) + 𝛽6 (fbs1 ) + 𝛽7 (thalach)
+ 𝛽8 (oldpeak) + 𝛽9 (slope1 ) + 𝛽10 (slope2 ) + 𝛽11 (ca1 ) + 𝛽12 (ca2 ) + 𝛽13 (ca3 ) + 𝛽14 (ca4 )
+ 𝛽15 (thal1 ) + 𝛽16 (thal2 ) + 𝛽17 (thal3 )
#How well our model fits the data
library(performance)
performance(model)
library(effectsize)
interpret_r2(0.665)
## [1] "substantial"
## (Rules: cohen1988)
#Making Predictions
## Actual Predicted
## 56 0 0.05760362
## 57 1 0.07685093
## 58 1 0.98546924
## 59 0 0.75525302
## 60 0 0.05270169
## 61 1 0.95253725
#Model Evaluation
table(Actual = testing$target, Predicted = predictions > 0.3)
## Predicted
## Actual FALSE TRUE
## 0 21 7
## 1 4 29
#Accuracy
((21+29)/(21+7+4+29))*100
## [1] 81.96721
#Recall
((29)/(7+29))*100
## [1] 80.55556
#Precision
((29)/(29+4))*100
## [1] 87.87879
#F1-Score
((80.56*87.88)/(80.56+87.88))*2
## [1] 84.06095
#MCC
((29*21)-(4*7))/911.9
## [1] 0.6371313
library(pROC)
##
## Call:
## roc.formula(formula = target ~ fitted.values(model), data = training,
plot = TRUE, legacy.axes = TRUE, print.auc = TRUE, ci = TRUE)
##
## Data: fitted.values(model) in 110 controls (target 0) < 131 cases (target
1).
## Area under the curve: 0.9555
## 95% CI: 0.9318-0.9792 (DeLong)
#AUC
#0.956
#Generate model probabilities
probs <- predict(model, testing, type = "response")