# Single-cell RNA-seq - normalization # Load libraries library(Seurat) library(tidyverse) library(RCurl) library(cowplot) setwd("/Users/hoyen/Desktop/STAT718/Notes/Lecture21-SingleCell2") load("filtered_seurat.RData") seurat_phase <- NormalizeData(filtered_seurat) ## simple normalization to explore and observe trend first ##### scRNAseq data exploration example ## Evaluation the effects of cell cycle load("single_cell_rnaseq/data/cycle.rda") seurat_phase <- CellCycleScoring(seurat_phase, g2m.features = g2m_genes, s.features = s_genes) # View cell cycle scores and phases assigned to cells View(seurat_phase@meta.data) # Identify the most variable genes seurat_phase <- FindVariableFeatures(seurat_phase, selection.method = "vst", nfeatures = 2000, verbose = FALSE) # Scale the counts seurat_phase <- ScaleData(seurat_phase) # Identify the 15 most highly variable genes ranked_variable_genes <- VariableFeatures(seurat_phase) top_genes <- ranked_variable_genes[1:15] # Plot the average expression and variance of these genes # With labels to indicate which genes are in the top 15 p <- VariableFeaturePlot(seurat_phase) LabelPoints(plot = p, points = top_genes, repel = TRUE) # Perform PCA seurat_phase <- RunPCA(seurat_phase) # Plot the PCA colored by cell cycle phase DimPlot(seurat_phase, reduction = "pca", group.by= "Phase", split.by = "Phase") #### Evaluation effects of mitochondrial expression # Check quartile values summary(seurat_phase@meta.data$mitoRatio) # Turn mitoRatio into categorical factor vector based on quartile values seurat_phase@meta.data$mitoFr <- cut(seurat_phase@meta.data$mitoRatio, breaks=c(-Inf, 0.0144, 0.0199, 0.0267, Inf), labels=c("Low","Medium","Medium high", "High")) ### scTransform # Split seurat object by condition to perform cell cycle scoring and SCT on all samples split_seurat <- SplitObject(seurat_phase, split.by = "sample") #ctrl_reps <- split_seurat[c("ctrl_1", "ctrl_2")] options(future.globals.maxSize = 4000 * 1024^2) for (i in 1:length(split_seurat)) { split_seurat[[i]] <- SCTransform(split_seurat[[i]], vars.to.regress = c("mitoRatio"), vst.flavor = "v2") } split_seurat$ctrl@assays saveRDS(split_seurat, "data/split_seurat.rds") #split_seurat <- readRDS("data/split_seurat.rds") ###### Integration # Select the most variable features to use for integration integ_features <- SelectIntegrationFeatures(object.list = split_seurat, nfeatures = 3000) split_seurat <- PrepSCTIntegration(object.list = split_seurat, anchor.features = integ_features) # Find best buddies - can take a while to run integ_anchors <- FindIntegrationAnchors(object.list = split_seurat, normalization.method = "SCT", anchor.features = integ_features) # Integrate across conditions seurat_integrated <- IntegrateData(anchorset = integ_anchors, normalization.method = "SCT") ##### Clustering # Determine the K-nearest neighbor graph seurat_integrated <- FindNeighbors(object = seurat_integrated, dims = 1:40) # Determine the clusters for various resolutions seurat_integrated <- FindClusters(object = seurat_integrated, resolution = c(0.4, 0.6, 0.8, 1.0, 1.4)) seurat_integrated@meta.data %>% View() #### choose a resolution to start Idents(object = seurat_integrated) <- "integrated_snn_res.0.8" ## Calculation of UMAP ## DO NOT RUN (calculated in the last lesson) # seurat_integrated <- RunUMAP(seurat_integrated, # reduction = "pca", # dims = 1:40) DimPlot(seurat_integrated, reduction = "umap", label = TRUE, label.size = 6) ### explore other resolution # Assign identity of clusters Idents(object = seurat_integrated) <- "integrated_snn_res.0.4" # Plot the UMAP DimPlot(seurat_integrated, reduction = "umap", label = TRUE, label.size = 6) ### # Assign identity of clusters Idents(object = seurat_integrated) <- "integrated_snn_res.0.8" # Plot the UMAP DimPlot(seurat_integrated, reduction = "umap", label = TRUE, label.size = 6)