/* Fictional data linking student GPA to Overall Instructor's */ /* rating; 1=Poor, 2=Fair, 3=Good, 4=Very Good, 5=Excellent */ /* The class size is typical, but the data is really too sparse */ /* for the tests that follow */ data eval; input GPA OIR; datalines; 2.0 3 2.0 4 2.0 1 2.5 1 2.5 4 2.5 5 2.5 4 2.5 3 2.5 3 3.0 4 3.0 2 3.0 4 3.0 4 3.0 3 3.0 5 3.0 5 3.0 4 3.0 4 3.0 3 3.5 4 3.5 4 3.5 5 3.5 5 3.5 3 3.5 4 4.0 5 4.0 4 ; run; /* LOGISTIC automatically uses cumulative logits and tests for */ /* parallel slopes. Nonparallel slopes have to be fit in other */ /* PROCs. */ proc logistic data=eval; model OIR=GPA; run; /* GENMOD runs the same default model */ proc genmod data=eavl; model OIR=GPA/dist=multinomial; run; /* CATMOD can test separate slopes, but doesn't like missing */ /* values. To fix this, we start by converting the raw data */ /* to counts */ proc freq data=eval; table GPA*OIR/out=outa; run; /* The output data set does not list missing cells */ proc print data=outa; run; proc sort data=outa; by GPA OIR; /* We need to generate the indices of the missing cells */ data eval1; do GPA=2.0 to 4.0 by 0.5; do OIR=1 to 5; output; end; end; proc sort data=eval1; by GPA OIR; run; /* Now we'll create a complete table and add small nominal */ /* counts to each cell so that CATMOD can analyze the data */ data evalp; merge eval1 outa; by GPA OIR; if count=. then count=.01; else count=count+.01; proc print data=evalp; run; /* CATMOD fits the non-proportional odds model. Note that CATMOD */ /* defines cumulative odds as the inverse of Agresti's definition. */ /* Note the test results on GPA. */ proc catmod data=evalp; weight count; direct GPA; response clogits; model OIR=GPA; run; /* Here's CATMOD code for the parallel slopes model. Coefficients */ /* are WLS solutions, not ML solutions--their signs are reversed */ /* because CATMOD defines cumulative logits differently from */ /* Agresti and LOGISTIC. The default chi-squared test is */ /* uninteresting; we need to specify combinations of design */ /* columns to test in order to test interesting hypotheses. */ proc catmod data=evalp; weight count; population GPA; response clogits; model OIR=(1 0 0 0 2.0, 0 1 0 0 2.0, 0 0 1 0 2.0, 0 0 0 1 2.0, 1 0 0 0 2.5, 0 1 0 0 2.5, 0 0 1 0 2.5, 0 0 0 1 2.5, 1 0 0 0 3.0, 0 1 0 0 3.0, 0 0 1 0 3.0, 0 0 0 1 3.0, 1 0 0 0 3.5, 0 1 0 0 3.5, 0 0 1 0 3.5, 0 0 0 1 3.5, 1 0 0 0 4.0, 0 1 0 0 4.0, 0 0 1 0 4.0, 0 0 0 1 4.0) (1 2 3 4='Intercept', 5='GPA'); run; /* Here's another way of fitting the non-parallel slopes model */ /* We set this up so that we can add columns 5 and 6 in response */ /* to a student observation that the slopes for two of the logits */ /* appeared to be equal */ proc catmod data=evalp; weight count; population GPA; response clogits; model OIR=(1 0 0 0 2.0 0 0 0, 0 1 0 0 0 2.0 0 0, 0 0 1 0 0 0 2.0 0, 0 0 0 1 0 0 0 2.0, 1 0 0 0 2.5 0 0 0, 0 1 0 0 0 2.5 0 0, 0 0 1 0 0 0 2.5 0, 0 0 0 1 0 0 0 2.5, 1 0 0 0 3.0 0 0 0, 0 1 0 0 0 3.0 0 0, 0 0 1 0 0 0 3.0 0, 0 0 0 1 0 0 0 3.0, 1 0 0 0 3.5 0 0 0, 0 1 0 0 0 3.5 0 0, 0 0 1 0 0 0 3.5 0, 0 0 0 1 0 0 0 3.5, 1 0 0 0 4.0 0 0 0, 0 1 0 0 0 4.0 0 0, 0 0 1 0 0 0 4.0 0, 0 0 0 1 0 0 0 4.0) (1 2 3 4='Intercept', 5 6 7 8='GPA'); /* We can set up a parallel slopes lack of fit test by hand--results */ /* are quite different from the likelihood-based test,likely due to the /* numerous small cell counts */ contrast 'LOF' all_parms 0 0 0 0 1 0 0 -1, all_parms 0 0 0 0 0 1 0 -1, all_parms 0 0 0 0 0 0 1 -1; run; /* And here's the model with the logits for the first two slopes */ /* collapsed--the model chi-square changed by less than .01 */ proc catmod data=evalp; weight count; population GPA; response clogits; model OIR=(1 0 0 0 2.0 0 0, 0 1 0 0 2.0 0 0, 0 0 1 0 0 2.0 0, 0 0 0 1 0 0 2.0, 1 0 0 0 2.5 0 0, 0 1 0 0 2.5 0 0, 0 0 1 0 0 2.5 0, 0 0 0 1 0 0 2.5, 1 0 0 0 3.0 0 0, 0 1 0 0 3.0 0 0, 0 0 1 0 0 3.0 0, 0 0 0 1 0 0 3.0, 1 0 0 0 3.5 0 0, 0 1 0 0 3.5 0 0, 0 0 1 0 0 3.5 0, 0 0 0 1 0 0 3.5, 1 0 0 0 4.0 0 0, 0 1 0 0 4.0 0 0, 0 0 1 0 0 4.0 0, 0 0 0 1 0 0 4.0) (1 2 3 4='Intercept', 5 6 7='GPA'); run; /* Run the R code on the website to generate a larger simulated data set. */ /* Then read it into SAS and study the output for the nonproportional odds model */ data eval; infile 'z:\stat 770\cumlogsim.txt'; input GPA OIR; run; /* Inspect the frequency table for patterns */ proc freq data=eval; table GPA*OIR; run; proc catmod data=eval; population GPA; response clogits; model OIR=(1 0 0 0 2.0 0 0 0, 0 1 0 0 0 2.0 0 0, 0 0 1 0 0 0 2.0 0, 0 0 0 1 0 0 0 2.0, 1 0 0 0 2.5 0 0 0, 0 1 0 0 0 2.5 0 0, 0 0 1 0 0 0 2.5 0, 0 0 0 1 0 0 0 2.5, 1 0 0 0 3.0 0 0 0, 0 1 0 0 0 3.0 0 0, 0 0 1 0 0 0 3.0 0, 0 0 0 1 0 0 0 3.0, 1 0 0 0 3.5 0 0 0, 0 1 0 0 0 3.5 0 0, 0 0 1 0 0 0 3.5 0, 0 0 0 1 0 0 0 3.5, 1 0 0 0 4.0 0 0 0, 0 1 0 0 0 4.0 0 0, 0 0 1 0 0 0 4.0 0, 0 0 0 1 0 0 0 4.0) (1 2 3 4='Intercept', 5 6 7 8='GPA'); /* We can set up a parallel slopes lack of fit test by hand--results */ /* are quite different from the likelihood-based test */ contrast 'LOF' all_parms 0 0 0 0 1 0 0 -1, all_parms 0 0 0 0 0 1 0 -1, all_parms 0 0 0 0 0 0 1 -1; run; /* Run proc logistic to compare LOF tests */ proc logistic data=eval; model OIR=GPA; run;