##
## Code for simulating data for the STAT 518 project
##

#################################
## Example for paired data
#################################

my.differences <- ## fill in your vector of differences using c() and separating values with commas##

y <- my.differences

my.n <- length(y) 
my.sd.diffs <- sd(y)
my.var.diffs <- var(y)

shape.est <- length(y)*(mean(y)^2)/(sum(y^2) - length(y)*(mean(y)^2))
scale.est <- (sum(y^2) - length(y)*(mean(y)^2))/(length(y)*mean(y))

paired.data.power = function(nsamp,nsim=50000,mu.d,sd.diff){
param.ps <- rep(0,nsim); nonparam.ps <- rep(0,nsim);
for (i in 1:nsim){
sim.data <- rnorm(nsamp,mean=mu.d,sd=sd.diff) # replace this line if you are simulating non-normal data (see below)

    param.ps[i] = t.test(sim.data, alternative="two.sided")$p.value; 
    nonparam.ps[i] = wilcox.test(sim.data, alternative="two.sided")$p.value;
}
    out<- paste("power of t:", sum(param.ps < .05)/nsim, "power of signed rank:", sum(nonparam.ps < .05)/nsim )
   return(out)
}

# If simulating right-skewed data, use:

sim.data <- rgamma(nsamp,shape=shape.est,scale=scale.est)-mean(y)+mu.d

# If simulating left-skewed data, use:

sim.data <- -rgamma(nsamp,shape=shape.est,scale=scale.est)+mean(y)+mu.d

# If simulating heavy-tailed data, use:

sim.data <- rt(nsamp,df=max(1,round(2*my.var.diffs/(my.var.diffs-1))))+mu.d

# If simulating light-tailed data, use:

sim.data <- runif(nsamp,min=(1+1/length(y))*min(y), max=(1+1/length(y))*max(y) )-mean(y)+mu.d

# With mu.d = 0, H0 is true in all these cases.
# Try the code for a variety of values of mu.d, and make a plot of the power functions of each test:

paired.data.power(mu.d=0, nsamp=my.n, sd.diff=my.sd.diffs)
paired.data.power(mu.d=0.1, nsamp=my.n, sd.diff=my.sd.diffs)
# and so on...


#################################
## Example for two-sample data
#################################

my.sample1 <- ## fill in your sample 1 vector using c() and separating values with commas##

my.sample2 <- ## fill in your sample 2 vector using c() and separating values with commas##

y1 <- my.sample1
y2 <- my.sample2

my.n1 <- length(y1)
my.n2 <- length(y2)



my.sd.1 <- sd(y1)
my.var.1 <- var(y1)
my.sd.2 <- sd(y2)
my.var.2 <- var(y2)

shape.est1 <- length(y1)*(mean(y1)^2)/(sum(y1^2) - length(y1)*(mean(y1)^2))
scale.est1 <- (sum(y1^2) - length(y1)*(mean(y1)^2))/(length(y1)*mean(y1))

shape.est2 <- length(y2)*(mean(y2)^2)/(sum(y2^2) - length(y2)*(mean(y2)^2))
scale.est2 <- (sum(y2^2) - length(y2)*(mean(y2)^2))/(length(y2)*mean(y2))



twosample.data.power = function(nsamp1,nsamp2,nsim=50000,mu.d,sd1,sd2){
param.ps <- rep(0,nsim); nonparam.ps <- rep(0,nsim);
for (i in 1:nsim){
sim.data1 <- rnorm(nsamp1,mean=0,sd=sd1) # replace at least one of these lines if you are simulating non-normal data (see below)
sim.data2 <- rnorm(nsamp2,mean=mu.d,sd=sd2)
    param.ps[i] = t.test(sim.data1, sim.data2, paired=F, alternative="two.sided")$p.value; 
    nonparam.ps[i] = wilcox.test(sim.data1, sim.data2, paired=F, alternative="two.sided")$p.value;
}
    out<- paste("power of t:", sum(param.ps < .05)/nsim, "power of M-W:", sum(nonparam.ps < .05)/nsim )
   return(out)
}

# If simulating right-skewed data, use something like (e.g., for sample 2):

sim.data2 <- rgamma(nsamp2,shape=shape.est2,scale=scale.est2)-mean(y2)+mu.d

# If simulating left-skewed data, use:

sim.data2 <- -rgamma(nsamp2,shape=shape.est2,scale=scale.est2)+mean(y2)+mu.d

# If simulating heavy-tailed data, use:

sim.data2 <- rt(nsamp2,df=max(1,round(2*my.var.2/(my.var.2-1))))+mu.d

# If simulating light-tailed data, use:

sim.data2 <- runif(nsamp2,min=(1+1/length(y2))*min(y2), max=(1+1/length(y2))*max(y2) )-mean(y2)+mu.d

# With mu.d = 0, H0 is true in all these cases.
# Try the code for a variety of values of mu.d, and make a plot of the power functions of each test:

twosample.data.power(mu.d=0,nsamp1=my.n1,nsamp2=my.n2,sd1=my.sd.1,sd2=my.sd.2)
twosample.data.power(mu.d=0.1,nsamp1=my.n1,nsamp2=my.n2,sd1=my.sd.1,sd2=my.sd.2)
# and so on...




###################################################################################
## Example for 3-sample data (for more than 3, it just extends in a similar way)
###################################################################################

my.sample1 <- ## fill in your sample 1 vector using c() and separating values with commas##

my.sample2 <- ## fill in your sample 2 vector using c() and separating values with commas##
my.sample3 <- ## fill in your sample 3 vector using c() and separating values with commas##

y1 <- my.sample1
y2 <- my.sample2
y3 <- my.sample3

my.n1 <- length(y1)
my.n2 <- length(y2)
my.n3 <- length(y3)


my.sd.1 <- sd(y1)
my.var.1 <- var(y1)
my.sd.2 <- sd(y2)
my.var.2 <- var(y2)
my.sd.3 <- sd(y3)
my.var.3 <- var(y3)

shape.est1 <- length(y1)*(mean(y1)^2)/(sum(y1^2) - length(y1)*(mean(y1)^2))
scale.est1 <- (sum(y1^2) - length(y1)*(mean(y1)^2))/(length(y1)*mean(y1))

shape.est2 <- length(y2)*(mean(y2)^2)/(sum(y2^2) - length(y2)*(mean(y2)^2))
scale.est2 <- (sum(y2^2) - length(y2)*(mean(y2)^2))/(length(y2)*mean(y2))

shape.est3 <- length(y3)*(mean(y3)^2)/(sum(y3^2) - length(y3)*(mean(y3)^2))
scale.est3 <- (sum(y3^2) - length(y3)*(mean(y3)^2))/(length(y3)*mean(y3))

threesample.data.power = function(nsamp1,nsamp2,nsamp3,nsim=50000,mu.d1,mu.d2,sd1,sd2,sd3){
# There will be one fewer "mu.d" numbers than there are samples
param.ps <- rep(0,nsim); nonparam.ps <- rep(0,nsim);
for (i in 1:nsim){
sim.data1 <- rnorm(nsamp1,mean=0,sd=sd1) # replace at least one of these lines if you are simulating non-normal data (see below)
sim.data2 <- rnorm(nsamp2,mean=mu.d1,sd=sd2)
sim.data3 <- rnorm(nsamp3,mean=mu.d2,sd=sd3)

    param.ps[i] = anova(lm(c(sim.data1,sim.data2,sim.data3)~factor(rep(1:3,times=c(my.n1,my.n2,my.n3)))))[1,"Pr(>F)"]; 
    nonparam.ps[i] = kruskal.test(list(sim.data1, sim.data2, sim.data3))$p.value;
}
    out<- paste("power of F:", sum(param.ps < .05)/nsim, "power of K-W:", sum(nonparam.ps < .05)/nsim )
   return(out)
}

# If simulating right-skewed data, use something like (e.g., for sample 2):

sim.data2 <- rgamma(nsamp2,shape=shape.est2,scale=scale.est2)-mean(y2)+mu.d1

# If simulating left-skewed data, use:

sim.data2 <- -rgamma(nsamp2,shape=shape.est2,scale=scale.est2)+mean(y2)+mu.d1

# If simulating heavy-tailed data, use:

sim.data2 <- rt(nsamp2,df=max(1,round(2*my.var.2/(my.var.2-1))))+mu.d1

# If simulating light-tailed data, use:

sim.data2 <- runif(nsamp2,min=(1+1/length(y2))*min(y2), max=(1+1/length(y2))*max(y2) )-mean(y2)+mu.d

# With mu.d1 = mu.d2 = ... = 0, H0 is true in all these cases.
# Try the code for a variety of values of mu.d1, mu.d2, etc., and make a plot of the power functions of each test:

threesample.data.power(mu.d1=0,mu.d2=0,nsamp1=my.n1,nsamp2=my.n2,nsamp3=my.n3,sd1=my.sd.1,sd2=my.sd.2,sd3=my.sd.3)
threesample.data.power(mu.d1=0.1,mu.d2=0.2,nsamp1=my.n1,nsamp2=my.n2,nsamp3=my.n3,sd1=my.sd.1,sd2=my.sd.2,sd3=my.sd.3)
# and so on...


# A nice way to get values for a power curve might be to put random spacings between the means, 
# and vary the amount of spacing:

k <- 0
threesample.data.power(mu.d1=runif(1,-k,k),mu.d2=runif(1,-k,k),nsamp1=my.n1,nsamp2=my.n2,nsamp3=my.n3,sd1=my.sd.1,sd2=my.sd.2,sd3=my.sd.3)
k <- 0.2
threesample.data.power(mu.d1=runif(1,-k,k),mu.d2=runif(1,-k,k),nsamp1=my.n1,nsamp2=my.n2,nsamp3=my.n3,sd1=my.sd.1,sd2=my.sd.2,sd3=my.sd.3)
k <- 0.4
threesample.data.power(mu.d1=runif(1,-k,k),mu.d2=runif(1,-k,k),nsamp1=my.n1,nsamp2=my.n2,nsamp3=my.n3,sd1=my.sd.1,sd2=my.sd.2,sd3=my.sd.3)
# and so on...