R version 4.0.4 (2021-02-15) -- "Lost Library Book"
Copyright (C) 2021 The R Foundation for Statistical Computing

dataset<-read.table(file="C:/Users/clsgsr12/Desktop/EXERCISES.csv", header=TRUE, sep=",")

attach(dataset)
summary(dataset)
#     GROUP         GENDER           AGE          PREOP_CCT     POSTOP_CCT_1M  
# Min.   :1.0   Min.   :1.000   Min.   :21.00   Min.   :378.0   Min.   :493.0  
# 1st Qu.:1.0   1st Qu.:1.000   1st Qu.:27.00   1st Qu.:397.0   1st Qu.:509.0  
# Median :1.5   Median :1.000   Median :33.50   Median :470.0   Median :518.0  
# Mean   :1.5   Mean   :1.422   Mean   :33.14   Mean   :459.3   Mean   :518.1  
# 3rd Qu.:2.0   3rd Qu.:2.000   3rd Qu.:39.00   3rd Qu.:518.0   3rd Qu.:527.0  
# Max.   :2.0   Max.   :2.000   Max.   :45.00   Max.   :541.0   Max.   :541.0  
# SMOKING_STATUS       BMI       
# Min.   :1.000   Min.   :18.90  
# 1st Qu.:1.000   1st Qu.:22.55  
# Median :2.000   Median :24.90  
# Mean   :1.967   Mean   :24.61  
# 3rd Qu.:3.000   3rd Qu.:26.40  
# Max.   :3.000   Max.   :32.40  

 dataset<-transform(dataset, GROUP=factor(GROUP, labels=c("STUDY", "CONTROL")), GENDER=factor(GENDER, labels=c("MALE", "FEMALE")), SMOKING_STATUS=factor(SMOKING_STATUS, labels=c("NEVER SMOKER", "CURRENT SMOKER", "EX-SMOKER")))
 
summary(dataset)
#     GROUP       GENDER        AGE          PREOP_CCT     POSTOP_CCT_1M  
# STUDY  :45   MALE  :52   Min.   :21.00   Min.   :378.0   Min.   :493.0  
# CONTROL:45   FEMALE:38   1st Qu.:27.00   1st Qu.:397.0   1st Qu.:509.0  
#                          Median :33.50   Median :470.0   Median :518.0  
#                          Mean   :33.14   Mean   :459.3   Mean   :518.1  
#                          3rd Qu.:39.00   3rd Qu.:518.0   3rd Qu.:527.0  
#                          Max.   :45.00   Max.   :541.0   Max.   :541.0  
#        SMOKING_STATUS      BMI       
# NEVER SMOKER  :32     Min.   :18.90  
# CURRENT SMOKER:29     1st Qu.:22.55  
# EX-SMOKER     :29     Median :24.90  
#                       Mean   :24.61  
#                       3rd Qu.:26.40  
#                       Max.   :32.40  
#####################################################
###GROUP VS GENDER
 GROUP.GENDER<-table(GROUP, GENDER)
 GROUP.GENDER

#     GENDER
#GROUP  1  2
#    1 32 13
#    2 20 25
 
prop.table(GROUP.GENDER, 1)

#     GENDER
#GROUP         1         2
#    1 0.7111111 0.2888889
#    2 0.4444444 0.5555556

 chisq.test(GROUP.GENDER)

#        Pearson's Chi-squared test with Yates' continuity correction

#data:  GROUP.GENDER
#X-squared = 5.5111, df = 1, p-value = 0.0189

 fisher.test(GROUP.GENDER)

#        Fisher's Exact Test for Count Data

#data:  GROUP.GENDER
#p-value = 0.01835
#alternative hypothesis: true odds ratio is not equal to 1
#95 percent confidence interval:
# 1.183925 8.107478
#sample estimates:
#odds ratio 
    3.0371 

###GROUP VS SMOKING_STATUS

 GROUP.SMOKING_STATUS<-table(GROUP, SMOKING_STATUS)
 GROUP.SMOKING_STATUS

#     SMOKING_STATUS
#GROUP  1  2  3
#    1 16 15 14
#    2 16 14 15

 #Frequency

 prop.table(GROUP.SMOKING_STATUS, 1)

#     SMOKING_STATUS
#GROUP         1         2         3
#    1 0.3555556 0.3333333 0.3111111
#    2 0.3555556 0.3111111 0.3333333

 chisq.test(GROUP.SMOKING_STATUS)

#        Pearson's Chi-squared test

#data:  GROUP.SMOKING_STATUS
#X-squared = 0.068966, df = 2, p-value = 0.9661

###GENDER VS SMOKING_STATUS

 GENDER.SMOKING_STATUS<-table(GENDER,SMOKING_STATUS)
 GENDER.SMOKING_STATUS

#      SMOKING_STATUS
#GENDER  1  2  3
#     1 21 12 19
#     2 11 17 10

#Frequency

 prop.table(GENDER.SMOKING_STATUS, 1)

#      SMOKING_STATUS
#GENDER         1         2         3
#     1 0.4038462 0.2307692 0.3653846
#     2 0.2894737 0.4473684 0.2631579

 chisq.test(GENDER.SMOKING_STATUS)

#        Pearson's Chi-squared test

#data:  GENDER.SMOKING_STATUS
#X-squared = 4.7165, df = 2, p-value = 0.09458

###Normality Test

# tapply(AGE, GROUP, shapiro.test)
#$`1`

#        Shapiro-Wilk normality test

#data:  X[[i]]
#W = 0.93537, p-value = 0.01459


#$`2`

#        Shapiro-Wilk normality test

#data:  X[[i]]
#W = 0.93621, p-value = 0.01565
 
##Wilcoxon rank test        

 wilcox.test(AGE~GROUP)

#        Wilcoxon rank sum test with continuity correction

#data:  AGE by GROUP
#W = 990.5, p-value = 0.8621
#alternative hypothesis: true location shift is not equal to 0

#Warning message:
#In wilcox.test.default(x = c(24L, 29L, 40L, 26L, 33L, 37L, 22L,  :
  cannot compute exact p-value with ties
 
##Summary

 tapply(AGE,GROUP,mean)

#       1        2 
#33.00000 33.28889 

 tapply(AGE,GROUP,sd)

#       1        2 
#7.019453 7.111521 
 
 xbarA<-tapply(AGE,GROUP,mean)
 sA<-tapply(AGE,GROUP,sd)
 nA<-tapply(AGE,GROUP,length)
 mA<-tapply(AGE,GROUP, min)
 maA<-tapply(AGE,GROUP, max)
 cbind(mean=xbarA, std.dev=sA, n=nA, min=mA, max=maA)

#      mean  std.dev  n min max
#1 33.00000 7.019453 45  21  44
#2 33.28889 7.111521 45  21  45
 
######################################################
###Normality Test

 tapply(BMI, GROUP, shapiro.test)
#$`1`

#        Shapiro-Wilk normality test

#data:  X[[i]]
#W = 0.96637, p-value = 0.2128


#$`2`

#        Shapiro-Wilk normality test

#data:  X[[i]]
#W = 0.96988, p-value = 0.2868

#Varyans Test

 var.test(BMI~GROUP)

#        F test to compare two variances

#data:  BMI by GROUP
#F = 0.71165, num df = 44, denom df = 44, p-value = 0.2631
#alternative hypothesis: true ratio of variances is not equal to 1
#95 percent confidence interval:
# 0.3910805 1.2949979
#sample estimates:
#ratio of variances 
#         0.7116519 

 
#Independent T Test        

 t.test(BMI~GROUP, var.equal=T)

#        Two Sample t-test

#data:  BMI by GROUP
#t = -0.015689, df = 88, p-value = 0.9875
#alternative hypothesis: true difference in means is not equal to 0
#95 percent confidence interval:
# -1.134835  1.117057
#sample estimates:
#mean in group 1 mean in group 2 
#       24.60444        24.61333 
 
#Summary

 tapply(BMI,GROUP,mean)

#       1        2 
#24.60444 24.61333 

 tapply(BMI,GROUP,sd)

#       1        2 
#2.450692 2.905058 

 
 xbarBm<-tapply(BMI,GROUP,mean)
 sBm<-tapply(BMI,GROUP,sd)
 nBm<-tapply(BMI,GROUP,length)
 mBm<-tapply(BMI,GROUP, min)
 maBm<-tapply(BMI,GROUP, max)
 cbind(mean=xbarBm, std.dev=sBm, n=nBm, min=mBm, max=maBm)

#      mean  std.dev  n  min  max
#1 24.60444 2.450692 45 19.4 32.4
#2 24.61333 2.905058 45 18.9 30.2

 ######################################################
 ###PAIRED T TEST

 shapiro.test(dataset$PREOP_CCT)

#        Shapiro-Wilk normality test

#data:  dataset$PREOP_CCT
#W = 0.80609, p-value = 1.578e-09

 shapiro.test(dataset$POSTOP_CCT_1M)

#        Shapiro-Wilk normality test

#data:  dataset$POSTOP_CCT_1M
#W = 0.97366, p-value = 0.0642

 wilcox.test(PREOP_CCT, POSTOP_CCT_1M, paired=T)

#        Wilcoxon signed rank test with continuity correction

#data:  PREOP_CCT and POSTOP_CCT_1M
#V = 1, p-value = 3.863e-09
#alternative hypothesis: true location shift is not equal to 0

#Warning messages:
#1: In wilcox.test.default(PREOP_CCT, POSTOP_CCT_1M, paired = T) :
#  cannot compute exact p-value with ties
#2: In wilcox.test.default(PREOP_CCT, POSTOP_CCT_1M, paired = T) :
#  cannot compute exact p-value with zeroes

 summary(PREOP_CCT)

#   Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
#  378.0   397.0   470.0   459.3   518.0   541.0 

 summary(POSTOP_CCT_1M)

#   Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
#  493.0   509.0   518.0   518.1   527.0   541.0 

###ANOVA

 tapply(BMI, SMOKING_STATUS, shapiro.test) 

#$`1`

#        Shapiro-Wilk normality test

#data:  X[[i]]
#W = 0.97169, p-value = 0.5473


#$`2`
#        Shapiro-Wilk normality test

#data:  X[[i]]
#W = 0.94353, p-value = 0.1241


#$`3`

#        Shapiro-Wilk normality test

#data:  X[[i]]
#W = 0.97065, p-value = 0.5775

          
 anova(lm(BMI~SMOKING_STATUS))

#Analysis of Variance Table

#Response: BMI
#               Df Sum Sq Mean Sq F value Pr(>F)
#SMOKING_STATUS  1   0.00  0.0003       0 0.9952
#Residuals      88 635.59  7.2226               
 
 aov(formula = lm(BMI~SMOKING_STATUS))

#Call:
  #aov(formula = lm(BMI ~ SMOKING_STATUS))

#Terms:
#                SMOKING_STATUS Residuals
#Sum of Squares          0.0003  635.5926
#Deg. of Freedom              1        88

#Residual standard error: 2.687498
#Estimated effects may be unbalanced

##Coefficients
 lm(formula = BMI~SMOKING_STATUS)

#Call:
#lm(formula = BMI ~ SMOKING_STATUS)

#Coefficients:
#   (Intercept)  SMOKING_STATUS  
#      24.60480         0.00208  
 
##Homogeneity of Variances(Bartlett test or Levene test ) 

 bartlett.test(BMI~SMOKING_STATUS)

#        Bartlett test of homogeneity of variances

#data:  BMI by SMOKING_STATUS
#Bartlett's K-squared = 1.0775, df = 2, p-value = 0.5835
 
##Post hoc Test

 pairwise.t.test(dataset$BMI, dataset$SMOKING_STATUS, p.adjust.method = "bonferroni")

#        Pairwise comparisons using t tests with pooled SD 

#data:  dataset$BMI and dataset$SMOKING_STATUS 

#               NEVER SMOKER CURRENT SMOKER
#CURRENT SMOKER 1            -             
#EX-SMOKER      1            1             

#P value adjustment method: bonferroni 

##Summary

 tapply(BMI, SMOKING_STATUS, sd)

#       1        2        3 
#2.412835 2.808739 2.891780 

 tapply(BMI, SMOKING_STATUS, mean)

#       1        2        3 
#24.58750 24.65172 24.58966 
 
 xbarBS<-tapply(BMI, SMOKING_STATUS,mean)
 sBS<-tapply(BMI, SMOKING_STATUS,sd)
 nBS<-tapply(BMI, SMOKING_STATUS,length)
 mBS<-tapply(BMI, SMOKING_STATUS, min)
 maBS<-tapply(BMI, SMOKING_STATUS, max)
 cbind(mean=xbarBS, std.dev=sBS, n=nBS, min=mBS, max=maBS)

#      mean  std.dev  n  min  max
#1 24.58750 2.412835 32 20.2 30.2
#2 24.65172 2.808739 29 19.4 32.4
#3 24.58966 2.891780 29 18.9 30.1

########################################################
#CORRELATION & SIMPLE LINEAR REGRESSION

 shapiro.test(dataset$AGE)

#        Shapiro-Wilk normality test

#data:  dataset$AGE
#W = 0.93759, p-value = 0.0003098

 shapiro.test(dataset$BMI)

#        Shapiro-Wilk normality test

#data:  dataset$BMI
#W = 0.98085, p-value = 0.207
 
##Correlation

 cor.test(BMI,AGE,method="spearman")

#        Spearman's rank correlation rho

#data:  BMI and AGE
#S = 26463, p-value < 2.2e-16
#alternative hypothesis: true rho is not equal to 0
#sample estimates:
#      rho 
#0.7821707 

#Warning message:
#In cor.test.default(BMI, AGE, method = "spearman") :
#  Cannot compute exact p-value with ties

##Simple Linear Regression

 lm(BMI~AGE)

#Call:
#lm(formula = BMI ~ AGE)

#Coefficients:
#(Intercept)          AGE  
#    15.1810       0.2844  


summary(lm(BMI~AGE))

#Call:
#lm(formula = BMI ~ AGE)

#Residuals:
#    Min      1Q  Median      3Q     Max 
#-4.8145 -1.0200 -0.2934  0.9077  5.8456 

#Coefficients:
#            Estimate Std. Error t value Pr(>|t|)    
#(Intercept) 15.18101    0.91136   16.66   <2e-16 ***
#AGE          0.28445    0.02691   10.57   <2e-16 ***
#---
#Signif. codes:  0 �***� 0.001 �**� 0.01 �*� 0.05 �.� 0.1 � � 1

#Residual standard error: 1.784 on 88 degrees of freedom
#Multiple R-squared:  0.5595,    Adjusted R-squared:  0.5545 
#F-statistic: 111.8 on 1 and 88 DF,  p-value: < 2.2e-16

 
 lm.velo <- lm(BMI~AGE)
 lm.velo

#Call:
#lm(formula = BMI ~ AGE)

#Coefficients:
#(Intercept)          AGE  
#    15.1810       0.2844  

###Fitted ValueS

 predict(lm.velo,int="c")

#        fit      lwr      upr
#1  22.00777 21.39240 22.62313
#2  23.43001 22.99559 23.86443
#3  26.55894 26.03551 27.08237
#4  22.57666 22.04231 23.11102
#5  24.56780 24.19407 24.94153
#6  25.70559 25.27885 26.13234
#7  21.43887 20.73554 22.14221
#8  24.85225 24.47581 25.22869
#9  27.69673 27.00643 28.38703
#10 22.57666 22.04231 23.11102
#11 26.55894 26.03551 27.08237
#12 25.42115 25.01751 25.82479
#13 24.56780 24.19407 24.94153
#14 25.99004 25.53505 26.44503
#15 22.86111 22.36357 23.35865
#16 27.69673 27.00643 28.38703
#17 23.14556 22.68159 23.60954
#18 23.14556 22.68159 23.60954
#19 26.27449 25.78701 26.76197
#20 22.00777 21.39240 22.62313
#21 26.55894 26.03551 27.08237
#22 22.86111 22.36357 23.35865
#23 26.27449 25.78701 26.76197
#24 23.43001 22.99559 23.86443
#25 25.13670 24.75010 25.52329
#26 25.70559 25.27885 26.13234
#27 22.57666 22.04231 23.11102
#28 21.15442 20.40525 21.90360
#29 22.00777 21.39240 22.62313
#30 25.42115 25.01751 25.82479
#31 27.12784 26.52467 27.73100
#32 23.14556 22.68159 23.60954
#33 24.56780 24.19407 24.94153
#34 25.99004 25.53505 26.44503
#35 23.71446 23.30473 24.12419
#36 25.13670 24.75010 25.52329
#37 21.72332 21.06467 22.38197
#38 23.14556 22.68159 23.60954
#39 22.57666 22.04231 23.11102
#40 27.12784 26.52467 27.73100
#41 22.29222 21.71842 22.86602
#42 27.69673 27.00643 28.38703
#43 27.69673 27.00643 28.38703
#44 26.84339 26.28122 27.40556
#45 26.55894 26.03551 27.08237
#46 27.12784 26.52467 27.73100
#47 25.99004 25.53505 26.44503
#48 22.86111 22.36357 23.35865
#49 24.56780 24.19407 24.94153
#50 25.13670 24.75010 25.52329
#51 26.27449 25.78701 26.76197
#52 22.00777 21.39240 22.62313
#53 25.99004 25.53505 26.44503
#54 26.84339 26.28122 27.40556
#55 22.00777 21.39240 22.62313
#56 22.00777 21.39240 22.62313
#57 26.84339 26.28122 27.40556
#58 26.84339 26.28122 27.40556
#59 23.99891 23.60806 24.38975
#60 25.42115 25.01751 25.82479
#61 27.41228 26.76629 28.05827
#62 21.43887 20.73554 22.14221
#63 27.12784 26.52467 27.73100
#64 25.99004 25.53505 26.44503
#65 24.28335 23.90473 24.66198
#66 26.27449 25.78701 26.76197
#67 25.99004 25.53505 26.44503
#68 22.86111 22.36357 23.35865
#69 21.15442 20.40525 21.90360
#70 25.70559 25.27885 26.13234
#71 25.13670 24.75010 25.52329
#72 21.72332 21.06467 22.38197
#73 24.85225 24.47581 25.22869
#74 26.27449 25.78701 26.76197
#75 22.00777 21.39240 22.62313
#76 23.71446 23.30473 24.12419
#77 22.29222 21.71842 22.86602
#78 23.14556 22.68159 23.60954
#79 23.43001 22.99559 23.86443
#80 22.57666 22.04231 23.11102
#81 27.69673 27.00643 28.38703
#82 27.98118 27.24535 28.71701
#83 23.99891 23.60806 24.38975
#84 22.00777 21.39240 22.62313
#85 23.99891 23.60806 24.38975
#86 26.27449 25.78701 26.76197
#87 26.55894 26.03551 27.08237
#88 27.41228 26.76629 28.05827
#89 23.43001 22.99559 23.86443
#90 22.57666 22.04231 23.11102