/* SAS code to analyze a 1 by 2 contingency table */ /* and make inference about the binomial probability */ /* We look at the driver's test data set: */ /* The raw data as 0's and 1's: */ DATA driver; INPUT results_01 @@; /* The @@ symbol tells SAS there are multiple observations on each line */ CARDS; 1 0 1 0 1 1 1 0 1 1 0 0 1 0 1 0 0 1 1 0 1 1 1 1 1 0 1 1 1 1 1 0 1 1 0 0 1 1 1 1 1 1 1 1 1 1 0 0 0 0 ; /* labeling the raw data as "PASS" or "FAIL": */ DATA driver; SET driver; IF results_01 = 1 THEN drive_test_results ="PASS"; ELSE drive_test_results ="FAIL"; PROC PRINT DATA=driver; VAR drive_test_results; run; /* The ORDER=FREQ option below defines the most frequently occurring category */ /* listed in the data set ("PASS") as the success category. */ /* creating a 1 by 2 contingency table based on the raw data: */ PROC FREQ DATA=driver ORDER=FREQ; TABLES drive_test_results / binomial; run; /* Inference about pi: */ /* 95% CI for pi (the probability of a random individual passing the test): */ /* The 95% Wald CI is (.529, .791). (Slightly different from R's large-sample "score" CI). */ /* **************************************************************************** */ /* Small-sample inference about pi: */ /* Suppose a sample of 10 trees revealed two diseased trees. */ DATA trees; INPUT disease $ number; cards; yes 2 no 8 ; run; /* Defining a diseased tree as a "success": */ /* The ORDER=DATA option below defines the first category */ /* listed in the data set ("yes") as the success category. */ /* Exact 95% CI for the population proportion of trees that are diseased: */ PROC FREQ DATA=trees ORDER=DATA; TABLES disease / binomial; WEIGHT number; run; /* The 95% CI is (.025, .556). */