## ## Code for simulating data for the STAT 518 project ## ################################# ## Example for paired data ################################# my.differences <- ## fill in your vector of differences using c() and separating values with commas## y <- my.differences my.n <- length(y) my.sd.diffs <- sd(y) my.var.diffs <- var(y) shape.est <- length(y)*(mean(y)^2)/(sum(y^2) - length(y)*(mean(y)^2)) scale.est <- (sum(y^2) - length(y)*(mean(y)^2))/(length(y)*mean(y)) paired.data.power = function(nsamp,nsim=50000,mu.d,sd.diff){ param.ps <- rep(0,nsim); nonparam.ps <- rep(0,nsim); for (i in 1:nsim){ sim.data <- rnorm(nsamp,mean=mu.d,sd=sd.diff) # replace this line if you are simulating non-normal data (see below) param.ps[i] = t.test(sim.data, alternative="two.sided")$p.value; nonparam.ps[i] = wilcox.test(sim.data, alternative="two.sided")$p.value; } out<- paste("power of t:", sum(param.ps < .05)/nsim, "power of signed rank:", sum(nonparam.ps < .05)/nsim ) return(out) } # If simulating right-skewed data, use: sim.data <- rgamma(nsamp,shape=shape.est,scale=scale.est)-mean(y)+mu.d # If simulating left-skewed data, use: sim.data <- -rgamma(nsamp,shape=shape.est,scale=scale.est)+mean(y)+mu.d # If simulating heavy-tailed data, use: sim.data <- rt(nsamp,df=max(1,round(2*my.var.diffs/(my.var.diffs-1))))+mu.d # If simulating light-tailed data, use: sim.data <- runif(nsamp,min=(1+1/length(y))*min(y), max=(1+1/length(y))*max(y) )-mean(y)+mu.d # With mu.d = 0, H0 is true in all these cases. # Try the code for a variety of values of mu.d, and make a plot of the power functions of each test: paired.data.power(mu.d=0, nsamp=my.n, sd.diff=my.sd.diffs) paired.data.power(mu.d=0.1, nsamp=my.n, sd.diff=my.sd.diffs) # and so on... ################################# ## Example for two-sample data ################################# my.sample1 <- ## fill in your sample 1 vector using c() and separating values with commas## my.sample2 <- ## fill in your sample 2 vector using c() and separating values with commas## y1 <- my.sample1 y2 <- my.sample2 my.n1 <- length(y1) my.n2 <- length(y2) my.sd.1 <- sd(y1) my.var.1 <- var(y1) my.sd.2 <- sd(y2) my.var.2 <- var(y2) shape.est1 <- length(y1)*(mean(y1)^2)/(sum(y1^2) - length(y1)*(mean(y1)^2)) scale.est1 <- (sum(y1^2) - length(y1)*(mean(y1)^2))/(length(y1)*mean(y1)) shape.est2 <- length(y2)*(mean(y2)^2)/(sum(y2^2) - length(y2)*(mean(y2)^2)) scale.est2 <- (sum(y2^2) - length(y2)*(mean(y2)^2))/(length(y2)*mean(y2)) twosample.data.power = function(nsamp1,nsamp2,nsim=50000,mu.d,sd1,sd2){ param.ps <- rep(0,nsim); nonparam.ps <- rep(0,nsim); for (i in 1:nsim){ sim.data1 <- rnorm(nsamp1,mean=0,sd=sd1) # replace at least one of these lines if you are simulating non-normal data (see below) sim.data2 <- rnorm(nsamp2,mean=mu.d,sd=sd2) param.ps[i] = t.test(sim.data1, sim.data2, paired=F, alternative="two.sided")$p.value; nonparam.ps[i] = wilcox.test(sim.data1, sim.data2, paired=F, alternative="two.sided")$p.value; } out<- paste("power of t:", sum(param.ps < .05)/nsim, "power of M-W:", sum(nonparam.ps < .05)/nsim ) return(out) } # If simulating right-skewed data, use something like (e.g., for sample 2): sim.data2 <- rgamma(nsamp2,shape=shape.est2,scale=scale.est2)-mean(y2)+mu.d # If simulating left-skewed data, use: sim.data2 <- -rgamma(nsamp2,shape=shape.est2,scale=scale.est2)+mean(y2)+mu.d # If simulating heavy-tailed data, use: sim.data2 <- rt(nsamp2,df=max(1,round(2*my.var.2/(my.var.2-1))))+mu.d # If simulating light-tailed data, use: sim.data2 <- runif(nsamp2,min=(1+1/length(y2))*min(y2), max=(1+1/length(y2))*max(y2) )-mean(y2)+mu.d # With mu.d = 0, H0 is true in all these cases. # Try the code for a variety of values of mu.d, and make a plot of the power functions of each test: twosample.data.power(mu.d=0,nsamp1=my.n1,nsamp2=my.n2,sd1=my.sd.1,sd2=my.sd.2) twosample.data.power(mu.d=0.1,nsamp1=my.n1,nsamp2=my.n2,sd1=my.sd.1,sd2=my.sd.2) # and so on... ################################################################################### ## Example for 3-sample data (for more than 3, it just extends in a similar way) ################################################################################### my.sample1 <- ## fill in your sample 1 vector using c() and separating values with commas## my.sample2 <- ## fill in your sample 2 vector using c() and separating values with commas## my.sample3 <- ## fill in your sample 3 vector using c() and separating values with commas## y1 <- my.sample1 y2 <- my.sample2 y3 <- my.sample3 my.n1 <- length(y1) my.n2 <- length(y2) my.n3 <- length(y3) my.sd.1 <- sd(y1) my.var.1 <- var(y1) my.sd.2 <- sd(y2) my.var.2 <- var(y2) my.sd.3 <- sd(y3) my.var.3 <- var(y3) shape.est1 <- length(y1)*(mean(y1)^2)/(sum(y1^2) - length(y1)*(mean(y1)^2)) scale.est1 <- (sum(y1^2) - length(y1)*(mean(y1)^2))/(length(y1)*mean(y1)) shape.est2 <- length(y2)*(mean(y2)^2)/(sum(y2^2) - length(y2)*(mean(y2)^2)) scale.est2 <- (sum(y2^2) - length(y2)*(mean(y2)^2))/(length(y2)*mean(y2)) shape.est3 <- length(y3)*(mean(y3)^2)/(sum(y3^2) - length(y3)*(mean(y3)^2)) scale.est3 <- (sum(y3^2) - length(y3)*(mean(y3)^2))/(length(y3)*mean(y3)) threesample.data.power = function(nsamp1,nsamp2,nsamp3,nsim=50000,mu.d1,mu.d2,sd1,sd2,sd3){ # There will be one fewer "mu.d" numbers than there are samples param.ps <- rep(0,nsim); nonparam.ps <- rep(0,nsim); for (i in 1:nsim){ sim.data1 <- rnorm(nsamp1,mean=0,sd=sd1) # replace at least one of these lines if you are simulating non-normal data (see below) sim.data2 <- rnorm(nsamp2,mean=mu.d1,sd=sd2) sim.data3 <- rnorm(nsamp3,mean=mu.d2,sd=sd3) param.ps[i] = anova(lm(c(sim.data1,sim.data2,sim.data3)~factor(rep(1:3,times=c(my.n1,my.n2,my.n3)))))[1,"Pr(>F)"]; nonparam.ps[i] = kruskal.test(list(sim.data1, sim.data2, sim.data3))$p.value; } out<- paste("power of F:", sum(param.ps < .05)/nsim, "power of K-W:", sum(nonparam.ps < .05)/nsim ) return(out) } # If simulating right-skewed data, use something like (e.g., for sample 2): sim.data2 <- rgamma(nsamp2,shape=shape.est2,scale=scale.est2)-mean(y2)+mu.d1 # If simulating left-skewed data, use: sim.data2 <- -rgamma(nsamp2,shape=shape.est2,scale=scale.est2)+mean(y2)+mu.d1 # If simulating heavy-tailed data, use: sim.data2 <- rt(nsamp2,df=max(1,round(2*my.var.2/(my.var.2-1))))+mu.d1 # If simulating light-tailed data, use: sim.data2 <- runif(nsamp2,min=(1+1/length(y2))*min(y2), max=(1+1/length(y2))*max(y2) )-mean(y2)+mu.d # With mu.d1 = mu.d2 = ... = 0, H0 is true in all these cases. # Try the code for a variety of values of mu.d1, mu.d2, etc., and make a plot of the power functions of each test: threesample.data.power(mu.d1=0,mu.d2=0,nsamp1=my.n1,nsamp2=my.n2,nsamp3=my.n3,sd1=my.sd.1,sd2=my.sd.2,sd3=my.sd.3) threesample.data.power(mu.d1=0.1,mu.d2=0.2,nsamp1=my.n1,nsamp2=my.n2,nsamp3=my.n3,sd1=my.sd.1,sd2=my.sd.2,sd3=my.sd.3) # and so on... # A nice way to get values for a power curve might be to put random spacings between the means, # and vary the amount of spacing: k <- 0 threesample.data.power(mu.d1=runif(1,-k,k),mu.d2=runif(1,-k,k),nsamp1=my.n1,nsamp2=my.n2,nsamp3=my.n3,sd1=my.sd.1,sd2=my.sd.2,sd3=my.sd.3) k <- 0.2 threesample.data.power(mu.d1=runif(1,-k,k),mu.d2=runif(1,-k,k),nsamp1=my.n1,nsamp2=my.n2,nsamp3=my.n3,sd1=my.sd.1,sd2=my.sd.2,sd3=my.sd.3) k <- 0.4 threesample.data.power(mu.d1=runif(1,-k,k),mu.d2=runif(1,-k,k),nsamp1=my.n1,nsamp2=my.n2,nsamp3=my.n3,sd1=my.sd.1,sd2=my.sd.2,sd3=my.sd.3) # and so on...