Home > R, Statistic > Stepwise logistic regression using R(Ⅱ)

Stepwise logistic regression using R(Ⅱ)

Consider a clinical data set. You can download it from Here. The Diabetes data set consists nine variables. The variable Class is the response variable with a value of tested_positive and a value of tested_negative. The other eight variables are the explanatory variables.

Stepwise logistic regression
  1. #read into data
  2. diabetes<-read.csv(“d:/diabetes.csv”,head=TRUE)
  3. #get explanatory variables
  4. x<-c(rep(1,768),diabetes[2:769,1],diabetes[2:769,2],diabetes[2:769,3],diabetes[2:769,4],diabetes[2:769,5],diabetes[2:769,6],diabetes[2:769,7],diabetes[2:769,8])
  5. x<-matrix(x,nrow=768)
  6. #get response variable
  7. y<-diabetes[2:769,9]
  8. y<-ifelse(y==“tested_positive”,1,0)
  9. y<-matrix(y,nrow=768)
  10. #set explantory variable’s name
  11. x_name<-c(“INTERCEPT”,“PREGNANT”,“PLASMA”,“PRESSURE”,“TRICEPS”,“INSULIN”,“INDEX”,“PEDIGREE”,“AGE”)
    The logistic regression R code is as follows.
Stepwise logistic regression
  1. pi_fun<-function(x,Beta){
  2. Beta<-matrix(as.vector(Beta),ncol=1)
  3. x<-matrix(as.vector(x),ncol=nrow(Beta))
  4. g_fun<-x%*%Beta
  5. exp(g_fun)/(1+exp(g_fun))
  6. }
  7. funs<-function(x,y,Beta){
  8. Beta<-matrix(c(Beta),ncol=1)
  9. pi_value<- pi_fun(x,Beta)
  10. U<-t(x)%*%(y-pi_value);
  11. uni_matrix<-matrix(rep(1,nrow(pi_value)),nrow= nrow(pi_value));
  12. H<-t(x)%*%diag(as.vector(pi_value*( uni_matrix -pi_value)))%*%x
  13. list(U=U,H=H)
  14. }
  15. Newtons<-function(fun,x,y,ep=1e-8,it_max=100){
  16. Beta=matrix(rep(0,ncol(x)),nrow=ncol(x))
  17. Index<-0;
  18. k<-1
  19. while(k<=it_max){
  20. x1<-Beta;obj<-fun(x,y,Beta);
  21. Beta<-Beta+solve(obj$H,obj$U);
  22. objTemp<-fun(x,y,Beta)
  23. if(any(is.nan(objTemp$H))){
  24. Beta<-x1
  25. print(“Warning:The maximum likelihood estimate does not exist.”)
  26. print(paste(“The LOGISTIC procedure continues in spite of the above warning.”,
  27. “Results shown are based on the last maximum likelihood iteration. Validity of the model fit is questionable.”));
  28. }
  29. norm<-sqrt(t((Beta-x1))%*%(Beta-x1))
  30. if(norm<ep){
  31. index<-1;break
  32. }
  33. k<-k+1
  34. }
  35. obj<-fun(x,y,Beta);
  36. list(Beta=Beta,it=k,U=obj$U,H=obj$H)
  37. }
  38. CheckZero<-function(x){
  39. for(i in 1:length(x)){
  40. if(x[i]==0)
  41. x[i]<-1e-300
  42. }
  43. x
  44. }
  45. ModelFitStat<-function(x,y,Beta){
  46. Beta<-matrix(as.vector(Beta),ncol=1)
  47. uni_matrix<-matrix(rep(1,nrow(y)),nrow= nrow(y));
  48. LOGL<–2*(t(y)%*%log(pi_fun(x,Beta))+t(uni_matrix-y)%*%log(CheckZero(uni_matrix-pi_fun(x,Beta))))
  49. #print(“—————–”)
  50. #print(LOGL)
  51. AIC<-LOGL+2*(ncol(x))
  52. SC<-LOGL+(ncol(x))*log(nrow(x))
  53. list(LOGL=LOGL,AIC=AIC,SC=SC)
  54. }
  55. GlobalNullTest<-function(x,y,Beta,BetaIntercept){
  56. Beta<-matrix(as.vector(Beta),ncol=1)
  57. pi_value<- pi_fun(x,Beta)
  58. df<-nrow(Beta)-1
  59. ##compute Likelihood ratio
  60. MF<-ModelFitStat(x,y,Beta)
  61. n1<-sum(y[y>0])
  62. n<-nrow(y)
  63. LR<–2*(n1*log(n1)+(n-n1)*log(n-n1)-n*log(n))-MF$LOGL
  64. LR_p_value<-pchisq(LR,df,lower.tail=FALSE)
  65. LR_Test<-list(LR=LR,DF=df,LR_p_value=LR_p_value)
  66. ##compute Score
  67. BetaIntercept<-matrix(c(as.vector(BetaIntercept),rep(0,ncol(x)-1)),ncol=1)
  68. obj<-funs(x,y,BetaIntercept)
  69. Score<-t(obj$U)%*%solve(obj$H)%*%obj$U
  70. Score_p_value<-pchisq(Score,df,lower.tail=FALSE)
  71. Score_Test<-list(Score=Score,DF=df,Score_p_value=Score_p_value)
  72. ##compute Wald test
  73. obj<-funs(x,y,Beta)
  74. I_Diag<-diag((solve(obj$H)))
  75. Q<-matrix(c(rep(0,nrow(Beta)-1),as.vector(diag(rep(1,nrow(Beta)-1)))),nrow=nrow(Beta)-1)
  76. Wald<-t(Q%*%Beta)%*%solve(Q%*%diag(I_Diag)%*%t(Q))%*%(Q%*%Beta)
  77. Wald_p_value<-pchisq(Wald,df,lower.tail=FALSE)
  78. Wald_Test<-list(Wald=Wald,DF=df,Wald_p_value=Wald_p_value)
  79. list(LR_Test=LR_Test,Score_Test=Score_Test,Wald_Test=Wald_Test)
  80. }
  81. WhichEqual1<-function(x){
  82. a<-NULL
  83. for(i in 1:length(x)){
  84. if(x[i]==1){
  85. a<-c(a,i)
  86. }
  87. }
  88. a
  89. }
  90. CheckOut<-function(source,check){
  91. for(j in 1:length(source)){
  92. for(k in 1:length(check)){
  93. if(source[j]==check[k])
  94. source[j]<-0
  95. }
  96. }
  97. source[source>0]
  98. }
  99. NegativeCheck<-function(x){
  100. for(i in length(x):1){
  101. if(x[i]>0)
  102. }
  103. i
  104. }
  105. CycleCheck<-function(x){
  106. NegativeFlg<-NegativeCheck(x)
  107. if(NegativeFlg==length(x)){
  108. return(FALSE)
  109. }
  110. NegVec<-x[(NegativeFlg+1):length(x)]
  111. PosVec<-x[(2*NegativeFlg-length(x)+1):NegativeFlg]
  112. NegVec<-sort(-1*NegVec)
  113. PosVec<-sort(PosVec)
  114. if(all((NegVec-PosVec)==0))
  115. return(TRUE)
  116. return(FALSE)
  117. }
  118. ##stepwise
  119. Stepwise<-function(x,y,checkin_pvalue=0.05,checkout_pvalue=0.05){
  120. ##as matrix
  121. ##indication of variable
  122. indict<-rep(0,ncol(x)) ##which column enter
  123. ##intercept entered
  124. indict[1]<-1
  125. Beta<-NULL
  126. print(“Intercept Entered…”)
  127. Result<-Newtons(funs,x[,1],y)
  128. Beta<-Result$Beta
  129. BetaIntercept<-Result$Beta
  130. uni_matrix<-matrix(rep(1,nrow(y)),nrow= nrow(y));
  131. LOGL<–2*(t(y)%*%log(pi_fun(x[,1],Beta))+t(uni_matrix-y)%*%log(uni_matrix-pi_fun(x[,1],Beta)))
  132. print(paste(“-2Log=”,LOGL))
  133. indexVector<-WhichEqual1(indict)
  134. ##check other variable
  135. VariableFlg<-NULL
  136. Terminate<-FALSE
  137. if(Terminate==TRUE){
  138. print(“Model building terminates because the last effect entered is removed by the Wald statistic criterion. “)
  139. }
  140. pvalue<-rep(1,ncol(x))
  141. k<-2:length(indict)
  142. k<-CheckOut(k,indexVector)
  143. for(i in k){
  144. obj<-funs(c(x[,indexVector],x[,i]),y,c(as.vector(Beta),0))
  145. Score<-t(obj$U)%*%solve(obj$H,obj$U)
  146. Score_pvalue<-pchisq(Score,1,lower.tail=FALSE)
  147. pvalue[i]<-Score_pvalue
  148. }
  149. #print(“Score pvalue for variable enter”)
  150. #print(pvalue)
  151. pvalue_min<-min(pvalue)
  152. if(pvalue_min<checkin_pvalue){
  153. j<-which.min(pvalue)
  154. print(paste(x_name[j],” entered:”))
  155. ##set indication of variable
  156. indict[j]<-1
  157. VariableFlg<-c(VariableFlg,j)
  158. indexVector<-WhichEqual1(indict)
  159. print(“indexVector–test”)
  160. print(indexVector)
  161. Result<-Newtons(funs,x[,indexVector],y)
  162. Beta<-NULL
  163. Beta<-Result$Beta
  164. ##compute model fit statistics
  165. print(“Model Fit Statistics”)
  166. MFStat<-ModelFitStat(x[,indexVector],y,Beta)
  167. print(MFStat)
  168. ##test globel null hypothesis:Beta=0
  169. print(“Testing Global Null Hypothese:Beta=0″)
  170. GNTest<-GlobalNullTest(x[,indexVector],y,Beta,BetaIntercept)
  171. print(GNTest)
  172. ##compute Wald test in order to remove variable
  173. indexVector<-WhichEqual1(indict)
  174. obj<-funs(x[,indexVector],y,Beta)
  175. H_Diag<-sqrt(diag(solve(obj$H)))
  176. WaldChisq<-(as.vector(Beta)/H_Diag)^2
  177. WaldChisqPvalue<-pchisq(WaldChisq,1,lower.tail=FALSE)
  178. WaldChisqTest<-list(WaldChisq=WaldChisq,WaldChisqPvalue=WaldChisqPvalue)
  179. print(“Wald chisq pvalue for variable remove”)
  180. print(WaldChisqPvalue)
  181. ##check wald to decide to which variable to be removed
  182. pvalue_max<-max(WaldChisqPvalue[2:length(WaldChisqPvalue)])##not check for intercept
  183. if(pvalue_max>checkout_pvalue){
  184. n<-which.max(WaldChisqPvalue[2:length(WaldChisqPvalue)])##not check for intercept
  185. print(paste(x_name[indexVector[n+1]],” is removed:”))
  186. ##set indication of variable
  187. indict[indexVector[n+1]]<-0
  188. m<- indexVector[n+1]
  189. VariableFlg<-c(VariableFlg,-m)
  190. ##renew Beta
  191. indexVector<-WhichEqual1(indict)
  192. Result<-Newtons(funs,x[,indexVector],y)
  193. Beta<-NULL
  194. Beta<-Result$Beta
  195. if(CycleCheck(VariableFlg)==TRUE){
  196. Terminate<-TRUE
  197. }
  198. }
  199. else {
  200. print(paste(“No (additional) effects met the” ,checkout_pvalue,“significance level for removal from the model.”))
  201. }
  202. }##repeat end
  203. }
  204. else {
  205. print(paste(“No effects met the” ,checkin_pvalue,” significance level for entry into the model”))
  206. }
  207. }##repeat end
  208. ##
  209. print(“Beta”)
  210. print(Beta)
  211. print(“Analysis of Maximum Likelihood Estimates”)
  212. obj<-funs(x[,indexVector],y,Beta)
  213. H_Diag<-sqrt(diag(solve(obj$H)))
  214. WaldChisq<-(as.vector(Beta)/H_Diag)^2
  215. WaldChisqPvalue<-pchisq(WaldChisq,1,lower.tail=FALSE)
  216. WaldChisqTest<-list(WaldChisq=WaldChisq,WaldChisqPvalue=WaldChisqPvalue)
  217. print(WaldChisqTest)
  218. }

Finally, revoke the stepwise function.

Stepwise logistic regression
  1. Stepwise(x,y)
Advertisement
Categories: R, Statistic Tags: ,
  1. No comments yet.
  1. No trackbacks yet.

Leave a Reply

Fill in your details below or click an icon to log in:

WordPress.com Logo

You are commenting using your WordPress.com account. Log Out / Change )

Twitter picture

You are commenting using your Twitter account. Log Out / Change )

Facebook photo

You are commenting using your Facebook account. Log Out / Change )

Connecting to %s

Follow

Get every new post delivered to your Inbox.