# Define three functions # clr--runs the EM algorithm for given pi; outputs pi and fit.table # clr.plot--plots the likelihood ratio statistic as a function of pi # It can be used to find bracketing values for uniroot # clr.root--Finds pi for which the LR statistic is either 0 (default) or # a chi-bar-squared critical value (e.g., 2.70) # Note: these functions haven't been "robustified"--they'll have trouble with 0 # cells for instance. # # EM function clr=function(pi0,Table1){ rdim=dim(Table1)[1] cdim=dim(Table1)[2] #Unconstrained MLEs pij=Table1/sum(Table1) qijplus=pij qpp1=pi0 qpp2=(1-pi0)/(rdim*cdim) #Select starting values #Starting values for independence model qij10=qpp1*rowSums(Table1)%o%colSums(Table1)/sum(Table1)^2 #Use the same start value for all cells of the unspecified model qij20=rep(1,rdim)%*%t(rep(1,cdim))*qpp2 #Relabel in preparation for looping qij1s=qij10 qij2s=qij20 diff=1.0 #Start the iterations while(abs(diff)>1.e-7){ #Weight cells by posterior probability of membership in each group gijk1=pij*qij1s/(qij1s+qij2s) gijk2=pij*qij2s/(qij1s+qij2s) #Cell counts that follow independence model qij1n=pi0*rowSums(gijk1)%o%colSums(gijk1)/sum(gijk1)^2 #Cell counts for unspecified model qij2n=(1-pi0)*gijk2/sum(gijk2) diff=sum(abs((qij1s+qij2s)-(qij1n+qij2n))) qij1s=qij1n qij2s=qij2n } out=list(pi=pi0,fit.table=sum(Table1)*(qij1n+qij2n)) out } # Plotting function clr.plot=function(table.obs){ L2=NULL for(i in 1:19){ p=i/20 table.exp=clr(p,table.obs)$fit.table L2[i]=2*sum(table.obs*log(table.obs/table.exp)) } plot(seq(.05,.95,by=.05),L2,type="l",ylab="LR",xlab="Independence fit proportion") title("Likelihood Ratio Statistic for Mixed Independence Model") abline(h=0,col="red") } # Root-finding function clr.root=function(pi0,table.obs,chistat=0){ table.exp=clr(pi0,table.obs)$fit.table # Subtract epsilon from the LR stat so that uniroot will work L2=2*sum(table.obs*log(table.obs/table.exp))-chistat-1.e-6 L2 } # Sample session # Set up the table data frame Rcolor=c("Brown","Blue","Hazel","Green") Ccolor=c("Black","Brunette","Red","Blonde") Table1.df=expand.grid(Eye.color=Rcolor,Hair.color=Ccolor) Table1.df=cbind(Table1.df,count=c(68,20,15,5,119,84,54,29,26,17,14,14,7,94,10,16)) #Convert the data frame to a 2-way table Table1=xtabs(data=Table1.df,count~Eye.color+Hair.color) #Compute some marginals Eplus=rowSums(Table1) Hplus=colSums(Table1) # EM for a couple different values of pi clr(.7,Table1) clr(.8,Table1) clr(.9,Table1) # LR plot clr.plot(Table1) # Find max. possible value of pi uniroot(clr.root,c(.6,.8),table.obs=Table1) # Find lower confidence limit for pi uniroot(clr.root,c(.6,.8),table.obs=Table1,chistat=2.70)