Stepwise logistic regression using R(Ⅱ)

Posted on October 3, 2010 by zgw21cnn

Consider a clinical data set. You can download it from Here. The Diabetes data set consists nine variables. The variable Class is the response variable with a value of tested_positive and a value of tested_negative. The other eight variables are the explanatory variables.

Stepwise logistic regression
#read into data
diabetes<-read.csv(“d:/diabetes.csv”,head=TRUE)
#get explanatory variables
x<-c(rep(1,768),diabetes[2:769,1],diabetes[2:769,2],diabetes[2:769,3],diabetes[2:769,4],diabetes[2:769,5],diabetes[2:769,6],diabetes[2:769,7],diabetes[2:769,8])
x<-matrix(x,nrow=768)
#get response variable
y<-diabetes[2:769,9]
y<-ifelse(y==“tested_positive”,1,0)
y<-matrix(y,nrow=768)
#set explantory variable’s name
x_name<-c(“INTERCEPT”,“PREGNANT”,“PLASMA”,“PRESSURE”,“TRICEPS”,“INSULIN”,“INDEX”,“PEDIGREE”,“AGE”)
 The logistic regression R code is as follows.

Stepwise logistic regression
pi_fun<-function(x,Beta){
Beta<-matrix(as.vector(Beta),ncol=1)
x<-matrix(as.vector(x),ncol=nrow(Beta))
g_fun<-x%*%Beta
exp(g_fun)/(1+exp(g_fun))
}
funs<-function(x,y,Beta){
y<-matrix(as.vector(y),ncol=1)
x<-matrix(as.vector(x),nrow=nrow(y))
Beta<-matrix(c(Beta),ncol=1)
pi_value<- pi_fun(x,Beta)
U<-t(x)%*%(y–pi_value);
uni_matrix<-matrix(rep(1,nrow(pi_value)),nrow= nrow(pi_value));
H<-t(x)%*%diag(as.vector(pi_value*( uni_matrix –pi_value)))%*%x
list(U=U,H=H)
}
Newtons<-function(fun,x,y,ep=1e–8,it_max=100){
x<-matrix(as.vector(x),nrow=nrow(y))
Beta=matrix(rep(0,ncol(x)),nrow=ncol(x))
Index<-0;
k<-1
while(k<=it_max){
x1<-Beta;obj<-fun(x,y,Beta);
Beta<-Beta+solve(obj$H,obj$U);
objTemp<-fun(x,y,Beta)
if(any(is.nan(objTemp$H))){
Beta<-x1
print(“Warning:The maximum likelihood estimate does not exist.”)
print(paste(“The LOGISTIC procedure continues in spite of the above warning.”,
“Results shown are based on the last maximum likelihood iteration. Validity of the model fit is questionable.”));
break
}
norm<-sqrt(t((Beta–x1))%*%(Beta–x1))
if(norm<ep){
index<-1;break
}
k<-k+1
}
obj<-fun(x,y,Beta);
list(Beta=Beta,it=k,U=obj$U,H=obj$H)
}
CheckZero<-function(x){
for(i in 1:length(x)){
if(x[i]==0)
x[i]<-1e–300
}
x
}
ModelFitStat<-function(x,y,Beta){
x<-matrix(as.vector(x),nrow=nrow(y))
Beta<-matrix(as.vector(Beta),ncol=1)
uni_matrix<-matrix(rep(1,nrow(y)),nrow= nrow(y));
LOGL<–2*(t(y)%*%log(pi_fun(x,Beta))+t(uni_matrix–y)%*%log(CheckZero(uni_matrix–pi_fun(x,Beta))))
#print(“—————–“)
#print(LOGL)
AIC<-LOGL+2*(ncol(x))
SC<-LOGL+(ncol(x))*log(nrow(x))
list(LOGL=LOGL,AIC=AIC,SC=SC)
}
GlobalNullTest<-function(x,y,Beta,BetaIntercept){
y<-matrix(as.vector(y),ncol=1)
x<-matrix(as.vector(x),nrow=nrow(y))
Beta<-matrix(as.vector(Beta),ncol=1)
pi_value<- pi_fun(x,Beta)
df<-nrow(Beta)–1
##compute Likelihood ratio
MF<-ModelFitStat(x,y,Beta)
n1<-sum(y[y>0])
n<-nrow(y)
LR<–2*(n1*log(n1)+(n–n1)*log(n–n1)–n*log(n))–MF$LOGL
LR_p_value<-pchisq(LR,df,lower.tail=FALSE)
LR_Test<-list(LR=LR,DF=df,LR_p_value=LR_p_value)
##compute Score
BetaIntercept<-matrix(c(as.vector(BetaIntercept),rep(0,ncol(x)–1)),ncol=1)
obj<-funs(x,y,BetaIntercept)
Score<-t(obj$U)%*%solve(obj$H)%*%obj$U
Score_p_value<-pchisq(Score,df,lower.tail=FALSE)
Score_Test<-list(Score=Score,DF=df,Score_p_value=Score_p_value)
##compute Wald test 
obj<-funs(x,y,Beta)
I_Diag<-diag((solve(obj$H)))
Q<-matrix(c(rep(0,nrow(Beta)–1),as.vector(diag(rep(1,nrow(Beta)–1)))),nrow=nrow(Beta)–1)
Wald<-t(Q%*%Beta)%*%solve(Q%*%diag(I_Diag)%*%t(Q))%*%(Q%*%Beta)
Wald_p_value<-pchisq(Wald,df,lower.tail=FALSE)
Wald_Test<-list(Wald=Wald,DF=df,Wald_p_value=Wald_p_value)
list(LR_Test=LR_Test,Score_Test=Score_Test,Wald_Test=Wald_Test)
}
WhichEqual1<-function(x){
a<-NULL
for(i in 1:length(x)){
if(x[i]==1){
a<-c(a,i)
}
}
a
}
CheckOut<-function(source,check){
for(j in 1:length(source)){
for(k in 1:length(check)){
if(source[j]==check[k])
source[j]<-0
}
}
source[source>0]
}
NegativeCheck<-function(x){
for(i in length(x):1){
if(x[i]>0)
break
}
i
}
CycleCheck<-function(x){
NegativeFlg<-NegativeCheck(x)
if(NegativeFlg==length(x)){
return(FALSE)
}
NegVec<-x[(NegativeFlg+1):length(x)]
PosVec<-x[(2*NegativeFlg–length(x)+1):NegativeFlg]
NegVec<-sort(–1*NegVec)
PosVec<-sort(PosVec)
if(all((NegVec–PosVec)==0))
return(TRUE)
return(FALSE)
}
##stepwise
Stepwise<-function(x,y,checkin_pvalue=0.05,checkout_pvalue=0.05){
##as matrix
x<-matrix(as.vector(x),nrow=nrow(y))
##indication of variable
indict<-rep(0,ncol(x)) ##which column enter
##intercept entered
indict[1]<-1
Beta<-NULL
print(“Intercept Entered…”)
Result<-Newtons(funs,x[,1],y)
Beta<-Result$Beta
BetaIntercept<-Result$Beta
uni_matrix<-matrix(rep(1,nrow(y)),nrow= nrow(y));
LOGL<–2*(t(y)%*%log(pi_fun(x[,1],Beta))+t(uni_matrix–y)%*%log(uni_matrix–pi_fun(x[,1],Beta)))
print(paste(“-2Log=”,LOGL))
indexVector<-WhichEqual1(indict)
##check other variable
VariableFlg<-NULL
Terminate<-FALSE
repeat{
if(Terminate==TRUE){
print(“Model building terminates because the last effect entered is removed by the Wald statistic criterion. “)
break
}
pvalue<-rep(1,ncol(x))
k<-2:length(indict)
k<-CheckOut(k,indexVector)
for(i in k){
obj<-funs(c(x[,indexVector],x[,i]),y,c(as.vector(Beta),0))
Score<-t(obj$U)%*%solve(obj$H,obj$U)
Score_pvalue<-pchisq(Score,1,lower.tail=FALSE)
pvalue[i]<-Score_pvalue
}
#print(“Score pvalue for variable enter”)
#print(pvalue)
pvalue_min<-min(pvalue)
if(pvalue_min<checkin_pvalue){
j<-which.min(pvalue)
print(paste(x_name[j],” entered:”))
##set indication of variable
indict[j]<-1
VariableFlg<-c(VariableFlg,j)
indexVector<-WhichEqual1(indict)
print(“indexVector–test”)
print(indexVector)
Result<-Newtons(funs,x[,indexVector],y)
Beta<-NULL
Beta<-Result$Beta
##compute model fit statistics
print(“Model Fit Statistics”)
MFStat<-ModelFitStat(x[,indexVector],y,Beta)
print(MFStat)
##test globel null hypothesis:Beta=0 
print(“Testing Global Null Hypothese:Beta=0”)
GNTest<-GlobalNullTest(x[,indexVector],y,Beta,BetaIntercept)
print(GNTest)
repeat{
##compute Wald test in order to remove variable
indexVector<-WhichEqual1(indict)
obj<-funs(x[,indexVector],y,Beta)
H_Diag<-sqrt(diag(solve(obj$H)))
WaldChisq<-(as.vector(Beta)/H_Diag)^2
WaldChisqPvalue<-pchisq(WaldChisq,1,lower.tail=FALSE)
WaldChisqTest<-list(WaldChisq=WaldChisq,WaldChisqPvalue=WaldChisqPvalue)
print(“Wald chisq pvalue for variable remove”)
print(WaldChisqPvalue)
##check wald to decide to which variable to be removed
pvalue_max<-max(WaldChisqPvalue[2:length(WaldChisqPvalue)])##not check for intercept
if(pvalue_max>checkout_pvalue){
n<-which.max(WaldChisqPvalue[2:length(WaldChisqPvalue)])##not check for intercept
print(paste(x_name[indexVector[n+1]],” is removed:”))
##set indication of variable
indict[indexVector[n+1]]<-0
m<- indexVector[n+1]
VariableFlg<-c(VariableFlg,–m)
##renew Beta
indexVector<-WhichEqual1(indict)
Result<-Newtons(funs,x[,indexVector],y)
Beta<-NULL
Beta<-Result$Beta
if(CycleCheck(VariableFlg)==TRUE){
Terminate<-TRUE
break
}
}
else {
print(paste(“No (additional) effects met the” ,checkout_pvalue,“significance level for removal from the model.”))
break;
}
}##repeat end
}
else {
print(paste(“No effects met the” ,checkin_pvalue,” significance level for entry into the model”))
break
}
}##repeat end 
##
print(“Beta”)
print(Beta)
print(“Analysis of Maximum Likelihood Estimates”)
obj<-funs(x[,indexVector],y,Beta)
H_Diag<-sqrt(diag(solve(obj$H)))
WaldChisq<-(as.vector(Beta)/H_Diag)^2
WaldChisqPvalue<-pchisq(WaldChisq,1,lower.tail=FALSE)
WaldChisqTest<-list(WaldChisq=WaldChisq,WaldChisqPvalue=WaldChisqPvalue)
print(WaldChisqTest)
}

Finally, revoke the stepwise function.

Stepwise logistic regression
Stepwise(x,y)

Stepwise Logistic Regression Using R(Ⅰ)

Posted on October 3, 2010 by zgw21cnn

1. Logic regression model

We assume that we have p explanatory variables . The logic regression model is

where is the probability of and . The right-hand side is usually referred to logic function, which can be pictured as follow.

We can also define

which means the probability of ,in other words, the probability of an event that not occurs.

The odds of an event that occurs with probability p is defined as

Using log function on both sides, we can get a linear function

2. Maximum likelihood estimation

The next goal is to estimate the p+1 parameters . It can be achieved by maximum likelihood estimation. Since the probability of an event that occurs is , the probability of that occurs can be writhed as . Likewise, the probability of that occurs can be writhed as . Thus ,we can get a likelihood function

Replacing with , it can be rewritten as

In order to maximize the likelihood function, we can use log function on both sides of likelihood function.

To maximize , we set the first order partial derivatives of to zero.

For j=1,2,… p.

3. Newton-Raphson iteration method

In order to solve the set of p+1 nonlinear equations ,for j=1,2,…p. we can use the Newton-Raphson iteration method . The matrix of second derivatives, called the Hessian, is

In matrix form, let denotes Hessian matrix, and

Where is an n by n diagonal matrix with element . We can get

We have New-Raphson update formula

In fact, we don’t need to compute directly. Since is a positive definite matrices, we can get it by Cholesky decomposition on .

Let is denoted as ,this matrix is called the observed information matrix. The variances and covariances of the estimated coefficients can be obtained from the inverse of , which represent as .that is to say, the variance of is the diagonal element of matrix , and the covariance of and , which can be denoted by , is the off-diagonal element. So, we can the estimated standard errors of the estimated coefficients as

, for j=0,1,2,…,p

4. Significance test

A Wald test is used to test the statistical significance of each coefficient in the model. The hypothesis of test is ：＝0. We can calculate

Where is not included in and the relevant row and column is not included in . This value is distributed as chi-square with degrees of freedom.

However, there are some problems with the use of the Wald statistic. Large coefficients and inflated standard error will lead to lower the Wald statistic value, and is easy to fail to reject the null when the hypothesis(TypeⅡerror). So the likelihood ratio test is more reliable for small sample sizes often used in practice.

The likelihood-ratio test uses the ratio of the maximized value of the likelihood function for unrestricted (L1) over the maximized value of the likelihood function when (L0). The test statistic,

yields a chi-squared distribution.

Score test is be introduced to test the hypothesis:

:

The score test statistic is

Which yelds a chi-squared distribution with df=1. The score test is often used in forward selection to add variable to model.

5. Variable selection: stepwise

A stepwise procedure for selection or deletion of variables from a model is based on a statistical algorithm that checks for the “importance” of variables. The “importance” of a variable is defined in terms of a measure of the statistical significance of the coefficient for the variable. Here is the flow:

Optimization Schaffer F6 function using basic genetic algorithm

Posted on September 25, 2010 by zgw21cnn

Schaffer F6 function, as well and its graph,is pictured here.

if set y=0,you can get a 2D graph.

These figures illustrate the fact that the Schaffer F6 function has many local optimizations.

The basic genetic algorithm is as follows.The matplotlib is included for plot the function graph.

import random
from math import sin, sqrt, pow
from mpl_toolkits.mplot3d import Axes3D
from matplotlib import cm
import matplotlib.pyplot as plt
import numpy as np
from pylab import *

MAXIMIZE, MINIMIZE = 11, 22

def SchafferF6(x, y):
    temp1 = sin(sqrt(x * x + y * y))
    temp2 = 1 + 0.001 * (x * x + y * y)
    return 0.5 + (temp1 * temp1 - 0.5) / (temp2 * temp2)

#
#plot the 3D figure
#
fig = plt.figure()
ax = Axes3D(fig)
X = np.arange(-100, 100, 1)
Y = np.arange(-100, 100, 1)
X, Y = np.meshgrid(X, Y)

R = np.sin(np.sqrt(X * X + Y * Y))
Q = 1 + 0.001 * (X * X + Y * Y)
Z = 0.5 + (R * R - 0.5) / (Q * Q)
ax.plot_surface(X, Y, Z, rstride=1, cstride=1, cmap=cm.jet)

plt.show()
#end

#
#plot the 2D figure
#

t = arange(-100, 100, 1)
s = 0.5 + (sin(t) * sin(t) - 0.5) / (1 + 0.001 * (t * t))
plot(t, s, linewidth=1.0)

xlabel('x')
ylabel('y')
title('Schaffer F6 2D figure')
grid(True)
show()

class Individual:
    alleles = (0, 1)
    seperator = ' '
    optimization = MINIMIZE
    length = 20
    chromosome = None
    score = None
    def __init__(self, chromosome=None):
        self.chromosome = chromosome or self._makechromosome()
        self.score = None # set during evaluation

    def _makechromosome(self):
        "makes a chromosome from randomly selected alleles."
        return [random.choice(self.alleles) for gene \

                 in range(0, self.length)]

    def evaluate(self):
        gene_x = self.chromosome[0: int(self.length / 2) + 1]
        gene_y = self.chromosome[int(self.length / 2) + 1:]
        x = 0
        y = 0
        for i in range(0, len(gene_x)):
            x += gene_x[i] * pow(2, i - 2)
        for j in range(0, len(gene_y)):
            y += gene_y[j] * pow(2, j - 2)
        self.score = SchafferF6(x, y)   

    def crossover(self, other):
       return self._twopoint(other)

    def mutate(self, gene):
       self.chromosome[gene] = 1 - self.chromosome[gene]

    # sample crossover method
    def _twopoint(self, other):
        "creates offspring via two-point crossover between mates."
        left, right = self._pickpivots()
        def mate(p0, p1):
            chromosome = p0.chromosome[:]
            chromosome[left:right] = p1.chromosome[left:right]
            child = p0.__class__(chromosome)
            return child
        return mate(self, other), mate(other, self)  

    def _pickpivots(self):
        left = random.randrange(1, self.length - 2)
        right = random.randrange(left, self.length - 1)
        return left, right

    def __repr__(self):
        "returns string representation of self"
        return '<%s chromosome="%s" score=%s>' % \
               (self.__class__.__name__,
                self.seperator.join(map(str, self.chromosome)),\

                self.score)

    def __cmp__(self, other):
        if self.optimization == MINIMIZE:
            return cmp(self.score, other.score)
        else: # MAXIMIZE
            return cmp(other.score, self.score)

    def copy(self):
        twin = self.__class__(self.chromosome[:])
        twin.score = self.score
        return twin

class Environment(object):
    def __init__(self, individual, population=None, size=100,\

                 maxgenerations=100,crossover_rate=0.90,\

                 mutation_rate=0.1, optimum=0):
        self.individual = individual
        self.size = size
        self.optimum = optimum
        self.population = population or self._makepopulation()
        for individual in self.population:
            individual.evaluate()
        self.crossover_rate = crossover_rate
        self.mutation_rate = mutation_rate
        self.maxgenerations = maxgenerations
        self.generation = 0

        self.population.sort()
        self.best = self.population[0]
        self.report()

    def _makepopulation(self):
        return [self.individual() for i in range(self.size)]

    def run(self):
        while self.generation < self.maxgenerations or \
              self.best.score <= self.optimum * 1.0:
            next_population = [self.best.copy()]
            while len(next_population) < self.size:
                mate1 = self._select()
                if random.random() < self.crossover_rate:
                    mate2 = self._select()
                    offspring = mate1.crossover(mate2)
                else:
                    offspring = [mate1.copy()]
                for individual in offspring:
                    for gene in range(individual.length):
                        if random.random() < self.mutation_rate:
                            individual.mutate(gene)
                    individual.evaluate()
                    next_population.append(individual)
            self.population = next_population[:self.size]

            self.population.sort()
            self.best = self.population[0]
            self.generation += 1
            self.report()
        self.report()

    def _select(self):
        "override this to use your preferred selection method"
        return self._tournament()

    #
    # sample selection method
    #
    def _tournament(self, size=8, choosebest=0.90):
        competitors = [random.choice(self.population) \

                       for i in range(size)]
        competitors.sort()
        if random.random() < choosebest:
            return competitors[0]
        else:
            return random.choice(competitors[1:])

    def report(self):
        print "=" * 70
        print "generation: ", self.generation
        print "best:       ", self.best

if __name__ == "__main__":
    env = Environment(Individual, maxgenerations=1000)
    env.run()

Zhanggw's Blog

Thinking,Reading,Writing