/* Fictional data linking student GPA to Overall Instructor's */ /* rating; 1=Poor, 2=Fair, 3=Good, 4=Very Good, 5=Excellent */ /* The class size is typical, but the data is really too sparse */ /* for the tests that follow */ data eval; input GPA OIR; datalines; 2.0 3 2.0 4 2.0 1 2.5 1 2.5 4 2.5 5 2.5 4 2.5 3 2.5 3 3.0 4 3.0 2 3.0 4 3.0 4 3.0 3 3.0 5 3.0 5 3.0 4 3.0 4 3.0 3 3.5 4 3.5 4 3.5 5 3.5 5 3.5 3 3.5 4 4.0 5 4.0 4 ; run; /* LOGISTIC automatically uses cumulative logits and tests for */ /* parallel slopes. Nonparallel slopes have to be fit in other */ /* PROCs. */ proc logistic data=eval; model OIR=GPA; run; /* GENMOD runs the same default model */ proc genmod data=eval; model OIR=GPA/dist=multinomial; run; /* CATMOD can test separate slopes, but doesn't like missing */ /* values. To fix this, we include the SPARSE option to save missing combinations */ proc freq data=eval; table GPA*OIR/out=outa sparse; run; /* Now we'll add small nominal */ /* counts to each cell so that CATMOD can analyze the data */ data evalp; set outa; if count=. then count=.01; else count=count+.01; proc print data=evalp; run; /* CATMOD fits the non-proportional odds model. Note that CATMOD */ /* defines cumulative odds as the inverse of Agresti's definition. */ /* Note the test results on GPA. */ proc catmod data=evalp; weight count; direct GPA; response clogits; model OIR=GPA; run; /* Here's CATMOD code for the parallel slopes model. Coefficients */ /* are WLS solutions, not ML solutions--their signs are reversed */ /* because CATMOD defines cumulative logits differently from */ /* Agresti and LOGISTIC. The default chi-squared test is */ /* uninteresting; we need to specify combinations of design */ /* columns to test in order to test interesting hypotheses. */ proc catmod data=evalp; weight count; population GPA; response clogits; model OIR=(1 0 0 0 2.0, 0 1 0 0 2.0, 0 0 1 0 2.0, 0 0 0 1 2.0, 1 0 0 0 2.5, 0 1 0 0 2.5, 0 0 1 0 2.5, 0 0 0 1 2.5, 1 0 0 0 3.0, 0 1 0 0 3.0, 0 0 1 0 3.0, 0 0 0 1 3.0, 1 0 0 0 3.5, 0 1 0 0 3.5, 0 0 1 0 3.5, 0 0 0 1 3.5, 1 0 0 0 4.0, 0 1 0 0 4.0, 0 0 1 0 4.0, 0 0 0 1 4.0) (1 2 3 4='Intercept', 5='GPA'); run; /* Here's another way of fitting the non-parallel slopes model */ /* We set this up so that we can add columns 5 and 6 in response */ /* to a student observation that the slopes for two of the logits */ /* appeared to be equal */ proc catmod data=evalp; weight count; population GPA; response clogits; model OIR=(1 0 0 0 2.0 0 0 0, 0 1 0 0 0 2.0 0 0, 0 0 1 0 0 0 2.0 0, 0 0 0 1 0 0 0 2.0, 1 0 0 0 2.5 0 0 0, 0 1 0 0 0 2.5 0 0, 0 0 1 0 0 0 2.5 0, 0 0 0 1 0 0 0 2.5, 1 0 0 0 3.0 0 0 0, 0 1 0 0 0 3.0 0 0, 0 0 1 0 0 0 3.0 0, 0 0 0 1 0 0 0 3.0, 1 0 0 0 3.5 0 0 0, 0 1 0 0 0 3.5 0 0, 0 0 1 0 0 0 3.5 0, 0 0 0 1 0 0 0 3.5, 1 0 0 0 4.0 0 0 0, 0 1 0 0 0 4.0 0 0, 0 0 1 0 0 0 4.0 0, 0 0 0 1 0 0 0 4.0) (1 2 3 4='Intercept', 5 6 7 8='GPA'); /* We can set up a parallel slopes lack of fit test by hand--results */ /* are quite different from the likelihood-based test,likely due to the /* numerous small cell counts */ contrast 'LOF' all_parms 0 0 0 0 1 0 0 -1, all_parms 0 0 0 0 0 1 0 -1, all_parms 0 0 0 0 0 0 1 -1; run; /* And here's the model with the logits for the first two slopes */ /* collapsed--the model chi-square changed by less than .01 */ proc catmod data=evalp; weight count; population GPA; response clogits; model OIR=(1 0 0 0 2.0 0 0, 0 1 0 0 2.0 0 0, 0 0 1 0 0 2.0 0, 0 0 0 1 0 0 2.0, 1 0 0 0 2.5 0 0, 0 1 0 0 2.5 0 0, 0 0 1 0 0 2.5 0, 0 0 0 1 0 0 2.5, 1 0 0 0 3.0 0 0, 0 1 0 0 3.0 0 0, 0 0 1 0 0 3.0 0, 0 0 0 1 0 0 3.0, 1 0 0 0 3.5 0 0, 0 1 0 0 3.5 0 0, 0 0 1 0 0 3.5 0, 0 0 0 1 0 0 3.5, 1 0 0 0 4.0 0 0, 0 1 0 0 4.0 0 0, 0 0 1 0 0 4.0 0, 0 0 0 1 0 0 4.0) (1 2 3 4='Intercept', 5 6 7='GPA'); run; /* Run the R code on the website to generate a larger simulated data set. */ /* Then read it into SAS and study the output for the nonproportional odds model */ data eval; infile '/home/grego1/STAT 770/cumlogsim.txt'; input GPA OIR; run; /* Inspect the frequency table for patterns */ proc freq data=eval; table GPA*OIR; run; proc catmod data=eval; population GPA; response clogits; model OIR=(1 0 0 0 2.0 0 0 0, 0 1 0 0 0 2.0 0 0, 0 0 1 0 0 0 2.0 0, 0 0 0 1 0 0 0 2.0, 1 0 0 0 2.5 0 0 0, 0 1 0 0 0 2.5 0 0, 0 0 1 0 0 0 2.5 0, 0 0 0 1 0 0 0 2.5, 1 0 0 0 3.0 0 0 0, 0 1 0 0 0 3.0 0 0, 0 0 1 0 0 0 3.0 0, 0 0 0 1 0 0 0 3.0, 1 0 0 0 3.5 0 0 0, 0 1 0 0 0 3.5 0 0, 0 0 1 0 0 0 3.5 0, 0 0 0 1 0 0 0 3.5, 1 0 0 0 4.0 0 0 0, 0 1 0 0 0 4.0 0 0, 0 0 1 0 0 0 4.0 0, 0 0 0 1 0 0 0 4.0) (1 2 3 4='Intercept', 5 6 7 8='GPA'); /* We can set up a parallel slopes lack of fit test by hand--results */ /* are now similar to the likelihood-based test */ contrast 'LOF' all_parms 0 0 0 0 1 0 0 -1, all_parms 0 0 0 0 0 1 0 -1, all_parms 0 0 0 0 0 0 1 -1; run; /* Run proc logistic to compare LOF tests */ proc logistic data=eval; model OIR=GPA; run; *Proportional odds in PROC CATMOD; proc catmod data=eval; weight count; population GPA; response clogits; model OIR=(1 0 0 0 2.0, 0 1 0 0 2.0, 0 0 1 0 2.0, 0 0 0 1 2.0, 1 0 0 0 2.5, 0 1 0 0 2.5, 0 0 1 0 2.5, 0 0 0 1 2.5, 1 0 0 0 3.0, 0 1 0 0 3.0, 0 0 1 0 3.0, 0 0 0 1 3.0, 1 0 0 0 3.5, 0 1 0 0 3.5, 0 0 1 0 3.5, 0 0 0 1 3.5, 1 0 0 0 4.0, 0 1 0 0 4.0, 0 0 1 0 4.0, 0 0 0 1 4.0) (1 2 3 4='Intercept', 5='GPA'); run; /* Since we know the logits for the first two slopes are equal, we */ /* collapse on them--the model chi-square changed by less than .01 */ proc catmod data=eval; weight count; population GPA; response clogits; model OIR=(1 0 0 0 2.0 0 0, 0 1 0 0 2.0 0 0, 0 0 1 0 0 2.0 0, 0 0 0 1 0 0 2.0, 1 0 0 0 2.5 0 0, 0 1 0 0 2.5 0 0, 0 0 1 0 0 2.5 0, 0 0 0 1 0 0 2.5, 1 0 0 0 3.0 0 0, 0 1 0 0 3.0 0 0, 0 0 1 0 0 3.0 0, 0 0 0 1 0 0 3.0, 1 0 0 0 3.5 0 0, 0 1 0 0 3.5 0 0, 0 0 1 0 0 3.5 0, 0 0 0 1 0 0 3.5, 1 0 0 0 4.0 0 0, 0 1 0 0 4.0 0 0, 0 0 1 0 0 4.0 0, 0 0 0 1 0 0 4.0) (1 2 3 4='Intercept', 5 6 7='GPA'); run;