###################################################################### # R commands Stat588 Lab7 # University of South Carolina ###################################################################### # This file contains the R commands for the lab. # # Lines beginning with the symbol '#' are comments in R. All other # lines contain code. # # In R for Windows, you may wish to open this file from the menu bar # (File:Display file); you can then copy commands into the command # window. (Use the mouse to highlight one or more lines; then # right-click and select "Paste to console".) ###################################################################### BiocManager::install("Biobase") BiocManager::install("annotate") BiocManager::install("hgu95av2.db") ########### Access ALL data library("Biobase") library("annotate") library("hgu95av2.db") library(ALL) data(ALL) bcell = grep("^B", as.character(ALL$BT)) moltyp = which(as.character(ALL$mol.biol) %in% c("NEG", "BCR/ABL")) ALL_bcrneg = ALL[, intersect(bcell, moltyp)] ALL_bcrneg$mol.biol = factor(ALL_bcrneg$mol.biol) data<-exprs(ALL_bcrneg) probename<-rownames(data) ########### get gene symbol genename<-mget(probename, hgu95av2SYMBOL) genename[1:5] plot(dat[4,], dat[5,], pch=16) ################################### # calculate Pearson Correlation ################################## cor(data[4,], data[5,]) ###################################### # Fit a simple linear regression model ###################################### fit<-lm(data[4,] ~ data[5,]) summary(fit) ########################################################## # Fit a multiple regression model with a interaction term ###################################################### int<-as.numeric(ALL_bcrneg$mol.biol)*data[5,] fit2<-lm(data[4,] ~ data[5,] + ALL_bcrneg$mol.biol + int) #### equivalently #### fit2<-lm(data[4,] ~ data[5,]*ALL_bcrneg$mol.biol ) fit2out<-summary(fit2) fit2out ############################################################# ### Compare two regression models ###################################################### anova(fit, fit2) ############################################################# ### Fit a logistic regression model ###################################################### y<-rbinom(1000, size=1, prob=0.3) x<-rbinom(1000, size=1, prob=0.2) fitlogistic<-glm(y ~ x, family=binomial(link=logit)) summary(fitlogistic) ############################################################# ### Fit a logistic regression model ###################################################### fmsURL<-"http://people.stat.sc.edu/hoyen/STAT588/Data/FMS_data.txt" fms<-read.delim(file=fmsURL, header=TRUE, sep="\t", stringsAsFactors=F) dim(fms) ## check the dimension of the data whbmi<-grep(colnames(fms), pattern="BMI") colnames(fms)[whbmi] y<-fms[, "pre.BMI"] bmi25<-ifelse(y > 25, 1, 0) table(bmi25) x<-fms[,"actn3_1671064"] str(x) table(x) ##################### # Titanic dataset ###################### library(titanic) train<-titanic_train fit1 <- glm(Survived ~ Pclass+Sex+Age+SibSp+Parch+Embarked, family=binomial(("logit"))) summary(fit1) ########################## # Inheritance mode: linear ########################### x<-sample(c("AA", "GA", "GG"), 1000, replace=T) xGA<-ifelse(x=="GA", 1, 0) y<-5*xGA+rnorm(1000) m1<-mean(y[x=="AA"]) m2<-mean(y[x=="GA"]) m3<-mean(y[x=="GG"]) stripchart(y ~ x, method="jitter", jitter=0.2, vertical=T, ylab="Weight", xlab="Gentype", pch=16) lines(c(0.8, 1.2), c(m1, m1), col=2, lwd=3) lines(c(1.8, 2.2), c(m2, m2), col=3, lwd=3) lines(c(2.8, 3.2), c(m3, m3), col=4, lwd=3) xlinear<-x xlinear<-ifelse(x=="AA", 0, x) xlinear<-ifelse(x=="GA", 1, xlinear) xlinear<-ifelse(x=="GG", 2, xlinear) xlinear<-as.numeric(xlinear) fitlinear<-lm(y ~ xlinear) summary(fitlinear) plot(jitter(xlinear), y, pch=16, xlab="Genotype", ylab="Weight") lines(c(-0.2, 0.2), c(m1, m1), col=2, lwd=3) lines(c(0.8, 1.2), c(m2, m2), col=3, lwd=3) lines(c(1.8, 2.2), c(m3, m3), col=4, lwd=3) abline(fitlinear, col="gray", lty=2, lwd=3) ############################# # Inheritance mode: dominant ############################# xD<-rep(1, length(x)) xD<-ifelse(x=="AA", 0, xD) fitD<-lm(y ~ xD) summary(fitD) plot(jitter(xlinear), y, pch=16, xlab="Genotype", ylab="Weight", main="Dominant") lines(c(-0.2, 0.2), c(m1, m1), col=2, lwd=3) lines(c(0.8, 1.2), c(m2, m2), col=3, lwd=3) lines(c(1.8, 2.2), c(m3, m3), col=4, lwd=3) abline(fitD, col="gray", lty=2, lwd=3) ################################ # Inheritance mode: recessive ################################ xR<-rep(0, length(x)) xR<-ifelse(x=="GG", 1, xR) fitR<-lm(y ~ xR) summary(fitR) plot(jitter(xlinear), y, pch=16, xlab="Genotype", ylab="Weight", main="Recessive") lines(c(-0.2, 0.2), c(m1, m1), col=2, lwd=3) lines(c(0.8, 1.2), c(m2, m2), col=3, lwd=3) lines(c(1.8, 2.2), c(m3, m3), col=4, lwd=3) abline(fitR, col="gray", lty=2, lwd=3) ########################## # dummy variables ########################### xDummy<-factor(x) fitDummy<-lm(y ~ xDummy) summary(fitDummy) plot(jitter(xlinear), y, pch=16, xlab="Genotype", ylab="Weight", main="Dummy Variable") lines(c(-0.2, 0.2), c(m1, m1), col=2, lwd=3) lines(c(0.8, 1.2), c(m2, m2), col=3, lwd=3) lines(c(1.8, 2.2), c(m3, m3), col=4, lwd=3) fitted<-predict(fitDummy) points(xlinear, fitted, col="pink", cex=3, pch=16) ########################## # Logistic Regression ########################### x<-as.factor(x) str(x) fit3<-glm(bmi25 ~ x, family=binomial(link=logit)) summary(fit3) summary(fit3)$coefficient exp(summary(fit3)$coefficient[2,1]) ### the odds of having bmi >25 is 1.13 times higher in individuals with GA versus AA genotype. #### fit a linear regression model plot(y ~x, ylab="BMI") fit4<-lm( y ~ x) summary(fit4)