## new chunk # Load libraries library(SingleCellExperiment) library(Seurat) library(tidyverse) library(Matrix) library(scales) library(cowplot) library(RCurl) ## new chunk # How to read in 10X data for a single sample (output is a sparse matrix) ctrl_counts <- Read10X(data.dir = "single_cell_rnaseq/data/ctrl_raw_feature_bc_matrix") # Turn count matrix into a Seurat object (output is a Seurat object) ctrl <- CreateSeuratObject(counts = ctrl_counts, min.features = 100) ## new chunk # Explore the metadata head(ctrl@meta.data) ## new chunk ## DO NOT RUN for (variable in input){ command1 command2 command3 } ## new chunk # Create a Seurat object for each sample for (file in c("ctrl_raw_feature_bc_matrix", "stim_raw_feature_bc_matrix")){ seurat_data <- Read10X(data.dir = paste0("single_cell_rnaseq/data/", file)) seurat_obj <- CreateSeuratObject(counts = seurat_data, min.features = 100, project = file) assign(file, seurat_obj) } ## new chunk # Check the metadata in the new Seurat objects head(ctrl_raw_feature_bc_matrix@meta.data) head(stim_raw_feature_bc_matrix@meta.data) ## new chunk # Create a merged Seurat object merged_seurat <- merge(x = ctrl_raw_feature_bc_matrix, y = stim_raw_feature_bc_matrix, add.cell.id = c("ctrl", "stim")) ## new chunk > ## DO NOT RUN > > merged_seurat <- merge(x = ctrl_raw_feature_bc_matrix, > y = c(stim1_raw_feature_bc_matrix, stim2_raw_feature_bc_matrix, stim3_raw_feature_bc_matrix), > add.cell.id = c("ctrl", "stim1", "stim2", "stim3")) > ## new chunk # Check that the merged object has the appropriate sample-specific prefixes head(merged_seurat@meta.data) tail(merged_seurat@meta.data) #################### ## ----Novelty------------------------------------------------------------------------------------------------ # Add number of genes per UMI for each cell to metadata merged_seurat$log10GenesPerUMI <- log10(merged_seurat$nFeature_RNA) / log10(merged_seurat$nCount_RNA) ## ----Mitochondrial Ratio------------------------------------------------------------------------------------ # Compute percent mito ratio merged_seurat$mitoRatio <- PercentageFeatureSet(object = merged_seurat, pattern = "^MT-") merged_seurat$mitoRatio <- merged_seurat@meta.data$mitoRatio / 100 ## ----addition, echo=FALSE----------------------------------------------------------------------------------- # Create metadata dataframe metadata <- merged_seurat@meta.data metadata$cells <- rownames(metadata) metadata$sample <- NA metadata$sample[which(str_detect(metadata$cells, "^ctrl_"))] <- "ctrl" metadata$sample[which(str_detect(metadata$cells, "^stim_"))] <- "stim" metadata <- metadata %>% dplyr::rename(seq_folder = orig.ident, nUMI = nCount_RNA, nGene = nFeature_RNA) # Add metadata back to Seurat object merged_seurat@meta.data <- metadata # Create .RData object to load at any time save(merged_seurat, file="single_cell_rnaseq/data/merged_filtered_seurat.RData") ## ----CellCounts, echo=FALSE, out.height="65%", out.width="65%", fig.align = 'center'------------------------ # Visualize the number of cell counts per sample metadata %>% ggplot(aes(x=sample, fill=sample)) + geom_bar() + theme_classic() + theme(axis.text.x = element_text(angle = 45, vjust = 1, hjust=1)) + theme(plot.title = element_text(hjust=0.5, face="bold")) + ggtitle("NCells") ## ----UMI, echo=FALSE, out.height="55%", out.width="55%", fig.align = 'center'------------------------------- # Visualize the number UMIs/transcripts per cell metadata %>% ggplot(aes(color=sample, x=nUMI, fill= sample)) + geom_density(alpha = 0.2) + scale_x_log10() + theme_classic() + ylab("Cell density") + geom_vline(xintercept = 500) ## ----GenePerCell, echo=FALSE, out.height="65%", out.width="65%", fig.align = 'center'----------------------- # Visualize the distribution of genes detected per cell via histogram metadata %>% ggplot(aes(color=sample, x=nGene, fill= sample)) + geom_density(alpha = 0.2) + theme_classic() + scale_x_log10() + geom_vline(xintercept = 300) ## ----Complexity, echo=FALSE, out.height="65%", out.width="65%", fig.align = 'center'------------------------ # Visualize the overall complexity of the gene expression by visualizing the genes detected per UMI (novelty score) metadata %>% ggplot(aes(x=log10GenesPerUMI, color = sample, fill=sample)) + geom_density(alpha = 0.2) + theme_classic() + geom_vline(xintercept = 0.8) ## ----Mito, echo=FALSE, out.height="65%", out.width="65%", fig.align = 'center'----------------------------- # Compute percent mito ratio merged_seurat$mitoRatio <- PercentageFeatureSet(object = merged_seurat, pattern = "^MT-") merged_seurat$mitoRatio <- merged_seurat@meta.data$mitoRatio / 100 metadata %>% ggplot(aes(color=sample, x=mitoRatio, fill=sample)) + geom_density(alpha = 0.2) + scale_x_log10() + theme_classic() + geom_vline(xintercept = 0.2) ## ----Joint, echo=FALSE, fig.align="center", message=FALSE, warning=FALSE, out.height="65%", out.width="65%"---- # Visualize the correlation between genes detected and number of UMIs and determine whether strong presence of cells with low numbers of genes/UMIs metadata %>% ggplot(aes(x=nUMI, y=nGene, color=mitoRatio)) + geom_point() + scale_colour_gradient(low = "gray90", high = "black") + stat_smooth(method=lm) + scale_x_log10() + scale_y_log10() + theme_classic() + geom_vline(xintercept = 500) + geom_hline(yintercept = 250) + facet_wrap(~sample) ## ----filter, eval=FALSE, include=FALSE---------------------------------------------------------------------- ## # Filter out low quality cells using selected thresholds - these will change with experiment filtered_seurat <- subset(x = merged_seurat, subset= (nUMI >= 500) & (nGene >= 250) & (log10GenesPerUMI > 0.80) & (mitoRatio < 0.20)) ## ----GeneFilter, eval=FALSE, message=FALSE, warning=FALSE, include=FALSE------------------------------------ ## # Extract counts ## counts <- GetAssayData(object = filtered_seurat, slot = "counts") ## ## # Output a logical matrix specifying for each gene on whether or not there are more than zero counts per cell ## nonzero <- counts > 0 ## # Sums all TRUE values and returns TRUE if more than 10 TRUE values per gene ## keep_genes <- Matrix::rowSums(nonzero) >= 10 ## ## # Only keeping those genes expressed in more than 10 cells ## filtered_counts <- counts[keep_genes, ] ## filtered_seurat <- CreateSeuratObject(filtered_counts, meta.data = filtered_seurat@meta.data)