# Single-cell RNA-seq - normalization

# Load libraries
library(Seurat)
library(tidyverse)
library(RCurl)
library(cowplot)

setwd("/Users/hoyen/Desktop/STAT718/Notes/Lecture21-SingleCell2")
load("filtered_seurat.RData")
seurat_phase <- NormalizeData(filtered_seurat) ## simple normalization to explore and observe trend first 

##### scRNAseq data exploration example 

## Evaluation the effects of cell cycle 

load("single_cell_rnaseq/data/cycle.rda")
seurat_phase <- CellCycleScoring(seurat_phase, 
                                 g2m.features = g2m_genes, 
                                 s.features = s_genes)

# View cell cycle scores and phases assigned to cells                                 
View(seurat_phase@meta.data)
# Identify the most variable genes
seurat_phase <- FindVariableFeatures(seurat_phase, 
                     selection.method = "vst",
                     nfeatures = 2000, 
                     verbose = FALSE)
		     
# Scale the counts
seurat_phase <- ScaleData(seurat_phase)    

# Identify the 15 most highly variable genes
ranked_variable_genes <- VariableFeatures(seurat_phase)
top_genes <- ranked_variable_genes[1:15]

# Plot the average expression and variance of these genes
# With labels to indicate which genes are in the top 15
p <- VariableFeaturePlot(seurat_phase)
LabelPoints(plot = p, points = top_genes, repel = TRUE)       # Perform PCA


seurat_phase <- RunPCA(seurat_phase)

# Plot the PCA colored by cell cycle phase
DimPlot(seurat_phase,
        reduction = "pca",
        group.by= "Phase",
        split.by = "Phase")
        
        
#### Evaluation effects of mitochondrial expression 


# Check quartile values
summary(seurat_phase@meta.data$mitoRatio)

# Turn mitoRatio into categorical factor vector based on quartile values
seurat_phase@meta.data$mitoFr <- cut(seurat_phase@meta.data$mitoRatio, 
                   breaks=c(-Inf, 0.0144, 0.0199, 0.0267, Inf), 
                   labels=c("Low","Medium","Medium high", "High"))
				

### scTransform 


# Split seurat object by condition to perform cell cycle scoring and SCT on all samples
split_seurat <- SplitObject(seurat_phase, split.by = "sample")

#ctrl_reps <- split_seurat[c("ctrl_1", "ctrl_2")]

options(future.globals.maxSize = 4000 * 1024^2)


for (i in 1:length(split_seurat)) {
    split_seurat[[i]] <- SCTransform(split_seurat[[i]], vars.to.regress = c("mitoRatio"), vst.flavor = "v2")
    }


split_seurat$ctrl@assays


saveRDS(split_seurat, "data/split_seurat.rds")


#split_seurat <- readRDS("data/split_seurat.rds")


###### Integration 


# Select the most variable features to use for integration
integ_features <- SelectIntegrationFeatures(object.list = split_seurat, 
                                            nfeatures = 3000) 


split_seurat <- PrepSCTIntegration(object.list = split_seurat, 
                                   anchor.features = integ_features)


# Find best buddies - can take a while to run
integ_anchors <- FindIntegrationAnchors(object.list = split_seurat, 
                                        normalization.method = "SCT", 
                                        anchor.features = integ_features)
                                        
                                       
# Integrate across conditions
seurat_integrated <- IntegrateData(anchorset = integ_anchors, 
                                   normalization.method = "SCT")
                                   
                                   
##### Clustering 

# Determine the K-nearest neighbor graph
seurat_integrated <- FindNeighbors(object = seurat_integrated, 
                                dims = 1:40)
                                
                                
                                   # Determine the clusters for various resolutions                                
seurat_integrated <- FindClusters(object = seurat_integrated,
                               resolution = c(0.4, 0.6, 0.8, 1.0, 1.4))
                               
                               
                               seurat_integrated@meta.data %>% 
        View()
#### choose a resolution to start         
Idents(object = seurat_integrated) <- "integrated_snn_res.0.8"

## Calculation of UMAP
## DO NOT RUN (calculated in the last lesson)

# seurat_integrated <- RunUMAP(seurat_integrated, 
#                  reduction = "pca", 
#                  dims = 1:40)

DimPlot(seurat_integrated,
        reduction = "umap",
        label = TRUE,
        label.size = 6)
        
### explore other resolution 
# Assign identity of clusters
Idents(object = seurat_integrated) <- "integrated_snn_res.0.4"

# Plot the UMAP
DimPlot(seurat_integrated,
        reduction = "umap",
        label = TRUE,
        label.size = 6)


### 
# Assign identity of clusters
Idents(object = seurat_integrated) <- "integrated_snn_res.0.8"

# Plot the UMAP
DimPlot(seurat_integrated,
        reduction = "umap",
        label = TRUE,
        label.size = 6)