RNA-Seq R-scripts for MKL-1 and WaGa cell lines

huang 0 like s 906 view s
Tags: R
The pipeline finished successfully, but the following samples were skipped,

due to very low alignment (less than 5%):
#  - 0505_MKL-1_wt_EV,EV.RNA



#### -------
setwd("/home/jhuang/DATA/Data_Ute/Data_RNA-Seq_MKL-1_WaGa/results/featureCounts")

library("AnnotationDbi")
library("clusterProfiler")
library("ReactomePA")
#library("org.Mm.eg.db")
library(DESeq2)
library(gplots)


 [1] gene_name                                           
 [2] X042_MKL.1_wt_EV       
 [3] MKL.1_RNA_147          
 [4] X042_MKL.1_sT_DMSO     #EIGENTLICH _EV
 [5] MKL.1_RNA              
 [6] X0505_MKL.1_scr_DMSO_EV
 [7] X0505_MKL.1_sT_DMSO_EV 
 [8] MKL.1_EV.RNA_87        
 [9] MKL.1_EV.RNA_27        
[10] X042_MKL.1_scr_Dox_EV  
[11] X042_MKL.1_scr_DMSO_EV 
[12] MKL.1_EV.RNA_118       
[13] X0505_MKL.1_scr_Dox_EV 
[14] MKL.1_EV.RNA           
[15] X042_MKL.1_sT_Dox      #EIGENTLICH _EV  
[16] X0505_MKL.1_sT_Dox_EV  
[17] MKL.1_RNA_118          
[18] MKL.1_EV.RNA_2

 [1] gene_name                                           
 [2] X042_MKL.1_wt_EV       
 [3] MKL.1_RNA_147          
 [4] X042_MKL.1_sT_DMSO     #EIGENTLICH _EV
 [5] MKL.1_RNA              
 [6] X0505_MKL.1_scr_DMSO_EV
 [7] X0505_MKL.1_sT_DMSO_EV 
 [8] MKL.1_EV.RNA_87        
 [9] MKL.1_EV.RNA_27        
[10] X042_MKL.1_scr_Dox_EV  
[11] X042_MKL.1_scr_DMSO_EV 
[12] MKL.1_EV.RNA_118       
[13] X0505_MKL.1_scr_Dox_EV 
[14] MKL.1_EV.RNA           
[15] X042_MKL.1_sT_Dox      #EIGENTLICH _EV  
[16] X0505_MKL.1_sT_Dox_EV  
[17] MKL.1_RNA_118          
[18] MKL.1_EV.RNA_2

[19] Geneid.1                                            
[20] gene_name.1                                         
[21] X1605_WaGa_sT_DMSO_EV  
[22] WaGa_EV.RNA_118        
[23] WaGa_EV.RNA            
[24] X2706_WaGa_scr_Dox_EV  
[25] X2706_WaGa_sT_DMSO_EV  
[26] X1107_WaGa_wt_EV       
[27] X1107_WaGa_sT_Dox_EV   
[28] WaGa_RNA               
[29] X1605_WaGa_scr_DMSO_EV 
[30] X2706_WaGa_scr_DMSO_EV 
[31] WaGa_EV.RNA_226        
[32] X2706_WaGa_sT_Dox_EV   
[33] X1605_WaGa_scr_Dox_EV  
[34] X1605_WaGa_wt_EV       
[35] X1605_WaGa_sT_Dox_EV   
[36] X1107_WaGa_scr_DMSO_EV 
[37] WaGa_EV.RNA_2          
[38] WaGa_EV.RNA_147        
[39] WaGa_RNA_118           
[40] X1107_WaGa_scr_Dox_EV  
[41] X1107_WaGa_sT_DMSO_EV  
[42] WaGa_RNA_147           
[43] X2706_WaGa_wt_EV





d.raw<- read.delim2("merged_gene_counts.txt",sep="\t", header=TRUE, row.names=1)
colnames(d.raw)<- c("gene_name","X042_MKL.1_wt_EV","MKL.1_RNA_147","X042_MKL.1_sT_DMSO","MKL.1_RNA","X0505_MKL.1_scr_DMSO_EV","X0505_MKL.1_sT_DMSO_EV","MKL.1_EV.RNA_87","MKL.1_EV.RNA_27","X042_MKL.1_scr_Dox_EV","X042_MKL.1_scr_DMSO_EV","MKL.1_EV.RNA_118","X0505_MKL.1_scr_Dox_EV","MKL.1_EV.RNA","X042_MKL.1_sT_Dox","X0505_MKL.1_sT_Dox_EV","MKL.1_RNA_118","MKL.1_EV.RNA_2",      "Geneid.1","gene_name.1","X1605_WaGa_sT_DMSO_EV","WaGa_EV.RNA_118","WaGa_EV.RNA","X2706_WaGa_scr_Dox_EV","X2706_WaGa_sT_DMSO_EV","X1107_WaGa_wt_EV","X1107_WaGa_sT_Dox_EV","WaGa_RNA","X1605_WaGa_scr_DMSO_EV","X2706_WaGa_scr_DMSO_EV","WaGa_EV.RNA_226","X2706_WaGa_sT_Dox_EV","X1605_WaGa_scr_Dox_EV","X1605_WaGa_wt_EV","X1605_WaGa_sT_Dox_EV","X1107_WaGa_scr_DMSO_EV","WaGa_EV.RNA_2","WaGa_EV.RNA_147","WaGa_RNA_118","X1107_WaGa_scr_Dox_EV","X1107_WaGa_sT_DMSO_EV","WaGa_RNA_147","X2706_WaGa_wt_EV")

col_order <- c("gene_name",  "MKL.1_RNA","MKL.1_RNA_118","MKL.1_RNA_147","MKL.1_EV.RNA","MKL.1_EV.RNA_2","MKL.1_EV.RNA_118","MKL.1_EV.RNA_87","MKL.1_EV.RNA_27","X042_MKL.1_wt_EV","X042_MKL.1_sT_DMSO","X0505_MKL.1_sT_DMSO_EV","X042_MKL.1_scr_DMSO_EV","X0505_MKL.1_scr_DMSO_EV","X042_MKL.1_sT_Dox","X0505_MKL.1_sT_Dox_EV","X042_MKL.1_scr_Dox_EV","X0505_MKL.1_scr_Dox_EV",     "Geneid.1","gene_name.1",    "WaGa_RNA","WaGa_RNA_118","WaGa_RNA_147",    "WaGa_EV.RNA","WaGa_EV.RNA_2","WaGa_EV.RNA_118","WaGa_EV.RNA_147","WaGa_EV.RNA_226","X1107_WaGa_wt_EV","X1605_WaGa_wt_EV","X2706_WaGa_wt_EV",    "X1107_WaGa_sT_DMSO_EV","X1605_WaGa_sT_DMSO_EV","X2706_WaGa_sT_DMSO_EV",    "X1107_WaGa_scr_DMSO_EV","X1605_WaGa_scr_DMSO_EV","X2706_WaGa_scr_DMSO_EV",     "X1107_WaGa_sT_Dox_EV","X1605_WaGa_sT_Dox_EV","X2706_WaGa_sT_Dox_EV",     "X1107_WaGa_scr_Dox_EV","X1605_WaGa_scr_Dox_EV","X2706_WaGa_scr_Dox_EV")
reordered.raw <- d.raw[,col_order]

#rename 
colnames(reordered.raw) <- c("gene_name", "MKL-1 RNA","MKL-1 RNA 118","MKL-1 RNA 147","MKL-1 EV","MKL-1 EV 2","MKL-1 EV 118","MKL-1 EV 87","MKL-1 EV 27","MKL-1 EV 042","MKL-1 EV sT DMSO 042","MKL-1 EV sT DMSO 0505","MKL-1 EV scr DMSO 042","MKL-1 EV scr DMSO 0505","MKL-1 EV sT Dox 042","MKL-1 EV sT Dox 0505","MKL-1 EV scr Dox 042","MKL-1 EV scr Dox 0505",   "Geneid.1","gene_name.1",    "WaGa RNA","WaGa RNA 118","WaGa RNA 147",    "WaGa EV","WaGa EV 2","WaGa EV 118","WaGa EV 147","WaGa EV 226","WaGa EV 1107","WaGa EV 1605","WaGa EV 2706",    "WaGa EV sT DMSO 1107","WaGa EV sT DMSO 1605","WaGa EV sT DMSO 2706",     "WaGa EV scr DMSO 1107","WaGa EV scr DMSO 1605","WaGa EV scr DMSO 2706",     "WaGa EV sT Dox 1107","WaGa EV sT Dox 1605","WaGa EV sT Dox 2706",     "WaGa EV scr Dox 1107","WaGa EV scr Dox 1605","WaGa EV scr Dox 2706")

reordered.raw$gene_name <- NULL
reordered.raw$Geneid.1 <- NULL
reordered.raw$gene_name.1 <- NULL
write.csv(reordered.raw, file="counts.txt")
#IMPORTANT that we should filter the data with the counts in the STEP!
d <- reordered.raw[rowSums(reordered.raw>3)>2,]





condition_for_pca = as.factor(c("RNA","RNA","RNA","EV","EV","EV","EV","EV","EV","sT.DMSO","sT.DMSO","scr.DMSO","scr.DMSO","sT.Dox","sT.Dox","scr.Dox","scr.Dox",    "RNA","RNA","RNA","EV","EV","EV","EV","EV","EV","EV","EV","sT.DMSO","sT.DMSO","sT.DMSO","scr.DMSO","scr.DMSO","scr.DMSO","sT.Dox","sT.Dox","sT.Dox","scr.Dox","scr.Dox","scr.Dox"))
condition = as.factor(c("MKL1.RNA","MKL1.RNA","MKL1.RNA","MKL1.EV","MKL1.EV","MKL1.EV","MKL1.EV","MKL1.EV","MKL1.EV","MKL1.sT.DMSO","MKL1.sT.DMSO","MKL1.scr.DMSO","MKL1.scr.DMSO","MKL1.sT.Dox","MKL1.sT.Dox","MKL1.scr.Dox","MKL1.scr.Dox",    "WaGa.RNA","WaGa.RNA","WaGa.RNA","WaGa.EV","WaGa.EV","WaGa.EV","WaGa.EV","WaGa.EV","WaGa.EV","WaGa.EV","WaGa.EV","WaGa.sT.DMSO","WaGa.sT.DMSO","WaGa.sT.DMSO","WaGa.scr.DMSO","WaGa.scr.DMSO","WaGa.scr.DMSO","WaGa.sT.Dox","WaGa.sT.Dox","WaGa.sT.Dox","WaGa.scr.Dox","WaGa.scr.Dox","WaGa.scr.Dox"))
#not sure if they rep1 in the first time is the same to the second time.
donor = as.factor(c("1","118","147","1","2","118","87","27","042","042","0505","042","0505","042","0505","042","0505",    "1","118","147",  "1","2","118","147","226","1107","1605","2706",   "1107","1605","2706","1107","1605","2706","1107","1605","2706","1107","1605","2706"))
batch = as.factor(c("2021.08","2021.09","2021.09","2021.08","2021.08","2021.09","2021.09","2021.09","2022.08","2022.08","2022.08","2022.08","2022.08","2022.08","2022.08","2022.08","2022.08",    "2021.08","2021.09","2021.09","2021.08","2021.08","2021.09","2021.09","2021.09","2022.11","2022.11","2022.11","2022.11","2022.11","2022.11","2022.11","2022.11","2022.11","2022.11","2022.11","2022.11","2022.11","2022.11","2022.11"))
cell.line = as.factor(c("MKL-1","MKL-1","MKL-1","MKL-1","MKL-1","MKL-1","MKL-1","MKL-1","MKL-1","MKL-1","MKL-1","MKL-1","MKL-1","MKL-1","MKL-1","MKL-1","MKL-1",    "WaGa","WaGa","WaGa","WaGa","WaGa","WaGa","WaGa","WaGa","WaGa","WaGa","WaGa","WaGa","WaGa","WaGa","WaGa","WaGa","WaGa","WaGa","WaGa","WaGa","WaGa","WaGa","WaGa"))
ids = as.factor(c("MKL1.RNA","MKL1.RNA.118","MKL1.RNA.147",  "MKL1.EV","MKL1.EV.2","MKL1.EV.118","MKL1.EV.87","MKL1.EV.27","MKL1.EV.042",  "MKL1.EV.sT.DMSO.042","MKL1.EV.sT.DMSO.0505",  "MKL1.EV.scr.DMSO.042","MKL1.EV.scr.DMSO.0505",  "MKL1.EV.sT.Dox.042","MKL1.EV.sT.Dox.0505",  "MKL1.EV.scr.Dox.042","MKL1.EV.scr.Dox.0505",      "WaGa.RNA","WaGa.RNA.118","WaGa.RNA.147",  "WaGa.EV","WaGa.EV.2","WaGa.EV.118","WaGa.EV.147","WaGa.EV.226","WaGa.EV.1107","WaGa.EV.1605","WaGa.EV.2706",  "WaGa.EV.sT.DMSO.1107","WaGa.EV.sT.DMSO.1605","WaGa.EV.sT.DMSO.2706",  "WaGa.EV.scr.DMSO.1107","WaGa.EV.scr.DMSO.1605","WaGa.EV.scr.DMSO.2706",  "WaGa.EV.sT.Dox.1107","WaGa.EV.sT.Dox.1605","WaGa.EV.sT.Dox.2706",  "WaGa.EV.scr.Dox.1107","WaGa.EV.scr.Dox.1605","WaGa.EV.scr.Dox.2706"))
#DEL ids = as.factor(c("MKL.1_RNA","MKL.1_RNA_118","MKL.1_RNA_147","MKL.1_EV.RNA","MKL.1_EV.RNA_2","MKL.1_EV.RNA_118","MKL.1_EV.RNA_87","MKL.1_EV.RNA_27","042_MKL.1_wt_EV","042_MKL.1_sT_DMSO","0505_MKL.1_sT_DMSO_EV","042_MKL.1_scr_DMSO_EV","0505_MKL.1_scr_DMSO_EV","042_MKL.1_sT_Dox","0505_MKL.1_sT_Dox_EV","042_MKL.1_scr_Dox_EV","0505_MKL.1_scr_Dox_EV"))

#IMPORTANT: using d instead of reordered.raw.
#cData = data.frame(row.names=colnames(d), condition=condition,  batch=batch, ids=ids)
#dds<-DESeqDataSetFromMatrix(countData=d, colData=cData, design=~batch+condition)
#cData = data.frame(row.names=colnames(d), condition=condition, ids=ids)
#dds<-DESeqDataSetFromMatrix(countData=d, colData=cData, design=~condition)
cData = data.frame(row.names=colnames(d), condition=condition,  donor=donor, batch=batch, cell.line=cell.line, ids=ids)
#dds<-DESeqDataSetFromMatrix(countData=d, colData=cData, design=~batch+condition_for_pca)
dds<-DESeqDataSetFromMatrix(countData=d, colData=cData, design=~batch+condition)

#rld <- vst(dds)
rld <- rlogTransformation(dds)



# -- before pca --
png("pca_before_batch_correction.png", 1200, 800)
#plotPCA(rld, intgroup=c("condition"))
plotPCA(rld, intgroup = c("condition", "batch"))
#plotPCA(rld, intgroup = c("condition", "ids"))
#plotPCA(rld, "batch")
dev.off()

# -- before heatmap --
## generate the pairwise comparison between samples
library(gplots) 
library("RColorBrewer")
png("heatmap_before_donor_correction.png", 1200, 800)
distsRL <- dist(t(assay(rld)))
mat <- as.matrix(distsRL)
#paste( rld$dex, rld$cell, sep="-" )
#rownames(mat) <- colnames(mat) <- with(colData(dds),paste(condition,batch, sep=":"))
#rownames(mat) <- colnames(mat) <- with(colData(dds),paste(condition,ids, sep=":"))
hc <- hclust(distsRL)
hmcol <- colorRampPalette(brewer.pal(9,"GnBu"))(100)
heatmap.2(mat, Rowv=as.dendrogram(hc),symm=TRUE, trace="none",col = rev(hmcol), margin=c(13, 13))
dev.off()

# -- remove batch effect --
mat <- assay(rld)
mm <- model.matrix(~condition, colData(rld))
mat <- limma::removeBatchEffect(mat, batch=rld$batch, design=mm)
assay(rld) <- mat

# -- after pca --
png("pca_after_batch_correction.png", 1200, 800)
svg("pca_after_batch_correction.svg")
plotPCA(rld, intgroup=c("condition"))
dev.off()


library(ggplot2)
data <- plotPCA(rld, intgroup=c("condition", "cell.line"), returnData=TRUE)
colnames(data) <- c("PC1","PC2","group2","Group","Cell.line","name")
percentVar <- round(100 * attr(data, "percentVar"))
#, shape=donor
#png("pca6.png", 1000, 1000)
svg("pca6.svg", 10, 8)
ggplot(data, aes(PC1, PC2, color=Group, shape=Cell.line)) +
  geom_point(size=3) +
  scale_color_manual(values = c("RNA" = "grey",
                                "EV"="cyan",
                                "scr.DMSO"="#b2df8a",
                                "sT.DMSO"="#33a02c",
                                "scr.Dox"="#fb9a99",
                                "sT.Dox"="#e31a1c")) +
  xlab(paste0("PC1: ",percentVar[1],"% variance")) +
  ylab(paste0("PC2: ",percentVar[2],"% variance")) +
  coord_fixed()
dev.off()

ggplot(data, aes(PC1, PC2, color=condition, shape=donor)) +
  geom_point(size=8) +
  scale_color_manual(values = c("untreated" = "grey",
                                "mCh d3"="#a6cee3",
                                "mCh d8"="#1f78b4",
                                "GFP+mCh d9/12"="cyan",
                                "GFP d3"="#b2df8a",
                                "GFP d8"="#33a02c",
                                "sT d3"="#fb9a99",
                                "sT d8"="#e31a1c",
                                "LT d3"="#fdbf6f",
                                "LT d8"="#ff7f00",
                                "LTtr d3"="#cab2d6",
                                "LTtr d8"="#6a3d9a",
                                "sT+LT d3"="#ffff99",                        
                                "sT+LTtr d9/12"="#a14a1a")) +
  xlab(paste0("PC1: ",percentVar[1],"% variance")) +
  ylab(paste0("PC2: ",percentVar[2],"% variance")) + theme(axis.text = element_text(face="bold",size = 21), axis.title = element_text(face="bold",size = 21)) + theme(legend.text = element_text(size = 20)) + theme(legend.title = element_text(size = 22)) + guides(color = guide_legend(override.aes = list(size = 10)), shape = guide_legend(override.aes = list(size = 10)), alpha = guide_legend(override.aes = list(size = 10)))
#axis.title = element_text(face="bold",size = 20)
#p + theme(axis.text.x = element_text(face="bold",size=14), axis.text.y = element_text(face="bold",size=14))
#+ coord_fixed()
#+ theme(
#    # Set the width to 6 inches
#    fig.width = 6,
#    # Set the height to 4 inches
#    fig.height = 4
#  )

# -- after heatmap --
## generate the pairwise comparison between samples
png("heatmap_after_donor_correction.png", 1200, 800)
distsRL <- dist(t(assay(rld)))
mat <- as.matrix(distsRL)
rownames(mat) <- colnames(mat) <- with(colData(dds),paste(condition,donor, sep=":"))
#rownames(mat) <- colnames(mat) <- with(colData(dds),paste(condition,ids, sep=":"))
hc <- hclust(distsRL)
hmcol <- colorRampPalette(brewer.pal(9,"GnBu"))(100)
heatmap.2(mat, Rowv=as.dendrogram(hc),symm=TRUE, trace="none",col = rev(hmcol), margin=c(13, 13))
dev.off()




#convert bam to bigwig using deepTools by feeding inverse of DESeq’s size Factor
sizeFactors(dds)
#NULL
dds <- estimateSizeFactors(dds)
> sizeFactors(dds)
             MKL-1 RNA          MKL-1 RNA 118          MKL-1 RNA 147 
             2.5698732              1.6270903              1.9233161 
              MKL-1 EV             MKL-1 EV 2           MKL-1 EV 118 
             1.5298145              1.2829153              0.6786873 
           MKL-1 EV 87            MKL-1 EV 27           MKL-1 EV 042 
             0.6741877              0.4176214              1.3361018 
  MKL-1 EV sT DMSO 042  MKL-1 EV sT DMSO 0505  MKL-1 EV scr DMSO 042 
             0.9539241              1.7357673              1.2957555 
MKL-1 EV scr DMSO 0505    MKL-1 EV sT Dox 042   MKL-1 EV sT Dox 0505 
             1.0908451              1.3658071              1.1222026 
  MKL-1 EV scr Dox 042  MKL-1 EV scr Dox 0505               WaGa RNA 
             1.4708224              1.1242917              2.1093923 
          WaGa RNA 118           WaGa RNA 147                WaGa EV 
             1.6923137              1.6712181              0.6021000 
             WaGa EV 2            WaGa EV 118            WaGa EV 147 
             1.2486671              0.4519720              0.5693254 
           WaGa EV 226           WaGa EV 1107           WaGa EV 1605 
             0.5449690              1.4884895              1.2630959 
          WaGa EV 2706   WaGa EV sT DMSO 1107   WaGa EV sT DMSO 1605 
             0.7678406              0.7981256              0.9055005 
  WaGa EV sT DMSO 2706  WaGa EV scr DMSO 1107  WaGa EV scr DMSO 1605 
             1.0602975              0.7928814              0.8116066 
 WaGa EV scr DMSO 2706    WaGa EV sT Dox 1107    WaGa EV sT Dox 1605 
             1.0660429              0.8551613              0.9547309 
   WaGa EV sT Dox 2706   WaGa EV scr Dox 1107   WaGa EV scr Dox 1605 
             0.9081678              0.7440897              0.8735994 
  WaGa EV scr Dox 2706 
             1.0146042

raw_counts <- counts(dds)
normalized_counts <- counts(dds, normalized=TRUE)
write.table(raw_counts, file="raw_counts.txt", sep="\t", quote=F, col.names=NA)
write.table(normalized_counts, file="normalized_counts.txt", sep="\t", quote=F, col.names=NA)


#bamCoverage --bam ../markDuplicates/${sample}Aligned.sortedByCoord.out.bam -o ${sample}_norm.bw --binSize 10 --scaleFactor  --effectiveGenomeSize 2864785220
bamCoverage --bam ../markDuplicates/WaGa_RNAAligned.sortedByCoord.out.markDups.bam -o WaGa_RNA.bw --binSize 10 --scaleFactor 0.4958732    --effectiveGenomeSize 2864785220
bamCoverage --bam ../markDuplicates/WaGa_RNA_118Aligned.sortedByCoord.out.markDups.bam -o WaGa_RNA_118.bw --binSize 10 --scaleFactor 0.6013898        --effectiveGenomeSize 2864785220
bamCoverage --bam ../markDuplicates/WaGa_RNA_147Aligned.sortedByCoord.out.markDups.bam -o WaGa_RNA_147.bw --binSize 10 --scaleFactor 0.6154516      --effectiveGenomeSize 2864785220

bamCoverage --bam ../markDuplicates/MKL1_RNAAligned.sortedByCoord.out.markDups.bam -o MKL1_RNA.bw --binSize 10 --scaleFactor  0.4078775      --effectiveGenomeSize 2864785220
bamCoverage --bam ../markDuplicates/MKL1_RNA_118Aligned.sortedByCoord.out.markDups.bam -o MKL1_RNA_118.bw --binSize 10 --scaleFactor 0.6525297       --effectiveGenomeSize 2864785220
bamCoverage --bam ../markDuplicates/MKL1_RNA_147Aligned.sortedByCoord.out.markDups.bam -o MKL1_RNA_147.bw --binSize 10 --scaleFactor 0.5331748       --effectiveGenomeSize 2864785220

bamCoverage --bam ../markDuplicates/WaGa_EV_RNAAligned.sortedByCoord.out.markDups.bam -o WaGa_EV_RNA.bw --binSize 10 --scaleFactor 1.733964       --effectiveGenomeSize 2864785220
bamCoverage --bam ../markDuplicates/WaGa_EV_RNA_2Aligned.sortedByCoord.out.markDups.bam -o WaGa_EV_RNA_2.bw --binSize 10 --scaleFactor  0.7761222      --effectiveGenomeSize 2864785220
bamCoverage --bam ../markDuplicates/WaGa_EV_RNA_118Aligned.sortedByCoord.out.markDups.bam -o WaGa_EV_RNA_118.bw --binSize 10 --scaleFactor  2.222971      --effectiveGenomeSize 2864785220
bamCoverage --bam ../markDuplicates/WaGa_EV_RNA_147Aligned.sortedByCoord.out.markDups.bam -o WaGa_EV_RNA_147.bw --binSize 10 --scaleFactor  1.679485     --effectiveGenomeSize 2864785220
bamCoverage --bam ../markDuplicates/WaGa_EV_RNA_226Aligned.sortedByCoord.out.markDups.bam -o WaGa_EV_RNA_226.bw --binSize 10 --scaleFactor  1.901282     --effectiveGenomeSize 2864785220

bamCoverage --bam ../markDuplicates/MKL1_EV_RNAAligned.sortedByCoord.out.markDups.bam -o MKL1_EV_RNA.bw --binSize 10 --scaleFactor 0.6469583       --effectiveGenomeSize 2864785220
bamCoverage --bam ../markDuplicates/MKL1_EV_RNA_2Aligned.sortedByCoord.out.markDups.bam -o MKL1_EV_RNA_2.bw --binSize 10 --scaleFactor 0.6877478       --effectiveGenomeSize 2864785220
bamCoverage --bam ../markDuplicates/MKL1_EV_RNA_27Aligned.sortedByCoord.out.markDups.bam -o MKL1_EV_RNA_27.bw --binSize 10 --scaleFactor 2.462424       --effectiveGenomeSize 2864785220
bamCoverage --bam ../markDuplicates/MKL1_EV_RNA_87Aligned.sortedByCoord.out.markDups.bam -o MKL1_EV_RNA_87.bw --binSize 10 --scaleFactor 1.411154       --effectiveGenomeSize 2864785220
bamCoverage --bam ../markDuplicates/MKL1_EV_RNA_118Aligned.sortedByCoord.out.markDups.bam -o MKL1_EV_RNA_118.bw --binSize 10 --scaleFactor 1.544239       --effectiveGenomeSize 2864785220



setwd("/home/jhuang/DATA/Data_Ute/Data_RNA-Seq_MKL-1_WaGa/results/featureCounts/degenes")
#---- * to untreated ----
dds$condition <- relevel(dds$condition, "WaGa.RNA")
dds = DESeq(dds, betaPrior=FALSE)
resultsNames(dds)
clist <- c("WaGa.EV_vs_WaGa.RNA")

dds$condition <- relevel(dds$condition, "WaGa.EV")
dds = DESeq(dds, betaPrior=FALSE)
resultsNames(dds)
clist <- c("WaGa.scr.DMSO_vs_WaGa.EV", "WaGa.sT.DMSO_vs_WaGa.EV", "WaGa.scr.Dox_vs_WaGa.EV", "WaGa.sT.Dox_vs_WaGa.EV")

dds$condition <- relevel(dds$condition, "WaGa.scr.Dox")
dds = DESeq(dds, betaPrior=FALSE)
resultsNames(dds)
clist <- c("WaGa.sT.Dox_vs_WaGa.scr.Dox")

dds$condition <- relevel(dds$condition, "WaGa.scr.DMSO")
dds = DESeq(dds, betaPrior=FALSE)
resultsNames(dds)
clist <- c("WaGa.sT.DMSO_vs_WaGa.scr.DMSO")



dds$condition <- relevel(dds$condition, "MKL1.RNA")
dds = DESeq(dds, betaPrior=FALSE)
resultsNames(dds)
clist <- c("MKL1.EV_vs_MKL1.RNA",  "WaGa.RNA_vs_MKL1.RNA")

dds$condition <- relevel(dds$condition, "MKL1.EV")
dds = DESeq(dds, betaPrior=FALSE)
resultsNames(dds)
clist <- c("MKL1.scr.DMSO_vs_MKL1.EV", "MKL1.sT.DMSO_vs_MKL1.EV", "MKL1.scr.Dox_vs_MKL1.EV", "MKL1.sT.Dox_vs_MKL1.EV",  "WaGa.EV_vs_MKL1.EV")

dds$condition <- relevel(dds$condition, "MKL1.scr.Dox")
dds = DESeq(dds, betaPrior=FALSE)
resultsNames(dds)
clist <- c("MKL1.sT.Dox_vs_MKL1.scr.Dox")

dds$condition <- relevel(dds$condition, "MKL1.scr.DMSO")
dds = DESeq(dds, betaPrior=FALSE)
resultsNames(dds)
clist <- c("MKL1.sT.DMSO_vs_MKL1.scr.DMSO")



##https://bioconductor.statistik.tu-dortmund.de/packages/3.7/data/annotation/
#BiocManager::install("EnsDb.Mmusculus.v79")
#library(EnsDb.Mmusculus.v79)
#edb <- EnsDb.Mmusculus.v79


#https://bioconductor.org/packages/release/bioc/vignettes/biomaRt/inst/doc/accessing_ensembl.html#selecting-an-ensembl-biomart-database-and-dataset
#https://bioconductor.org/packages/release/bioc/vignettes/biomaRt/inst/doc/accessing_ensembl.html#selecting-an-ensembl-biomart-database-and-dataset
library(biomaRt)
listEnsembl()
listMarts()
#ensembl <- useEnsembl(biomart = "genes", mirror="asia")  # default is Mouse strains 104
#ensembl <- useEnsembl(biomart = "ensembl", dataset = "mmusculus_gene_ensembl", mirror = "www")
#ensembl = useMart("ensembl_mart_44", dataset="hsapiens_gene_ensembl",archive=TRUE, mysql=TRUE)
#ensembl <- useEnsembl(biomart = "ensembl", dataset = "mmusculus_gene_ensembl", version="104")
#ensembl <- useEnsembl(biomart = "ensembl", dataset = "hsapiens_gene_ensembl", version="86")
#ensembl <- useEnsembl(biomart = "ensembl", dataset = "hsapiens_gene_ensembl", version="GRCh37")
#--> total 69, 27  GRCh38.p7 and 39  GRCm38.p4
ensembl <- useEnsembl(biomart = "ensembl", dataset = "hsapiens_gene_ensembl", version="104")
datasets <- listDatasets(ensembl)
#--> total 202   80                         GRCh38.p13         107                            GRCm39
#80           hsapiens_gene_ensembl                                      Human genes (GRCh38.p13)                         GRCh38.p13
#107         mmusculus_gene_ensembl                                        Mouse genes (GRCm39)                            GRCm39

> listEnsemblArchives()
             name     date                                url version
1  Ensembl GRCh37 Feb 2014          http://grch37.ensembl.org  GRCh37  *
2     Ensembl 104 May 2021 http://may2021.archive.ensembl.org     104  *
3     Ensembl 103 Feb 2021 http://feb2021.archive.ensembl.org     103
4     Ensembl 102 Nov 2020 http://nov2020.archive.ensembl.org     102

attributes = listAttributes(ensembl)
attributes[1:25,]


#library("dplyr")
for (i in clist) {
#i<-clist[1]
  contrast = paste("condition", i, sep="_")
  res = results(dds, name=contrast)
  res <- res[!is.na(res$log2FoldChange),]
  #geness <- AnnotationDbi::select(edb86, keys = rownames(res), keytype = "GENEID", columns = c("ENTREZID","EXONID","GENEBIOTYPE","GENEID","GENENAME","PROTEINDOMAINSOURCE","PROTEINID","SEQNAME","SEQSTRAND","SYMBOL","TXBIOTYPE","TXID","TXNAME","UNIPROTID"))
  #geness <- AnnotationDbi::select(edb86, keys = rownames(res), keytype = "GENEID", columns = c("GENEID", "ENTREZID", "SYMBOL", "GENENAME","GENEBIOTYPE","TXBIOTYPE","SEQSTRAND","UNIPROTID"))
  # In the ENSEMBL-database, GENEID is ENSEMBL-ID.
  #geness <- AnnotationDbi::select(edb86, keys = rownames(res), keytype = "GENEID", columns = c("GENEID", "SYMBOL", "GENEBIOTYPE"))  #  "ENTREZID", "TXID","TXBIOTYPE","TXSEQSTART","TXSEQEND"
  #geness <- geness[!duplicated(geness$GENEID), ]

  #using getBM replacing AnnotationDbi::select
  #filters = 'ensembl_gene_id' means the records should always have a valid ensembl_gene_ids.
  geness <- getBM(attributes = c('ensembl_gene_id', 'external_gene_name', 'gene_biotype', 'entrezgene_id', 'chromosome_name', 'start_position', 'end_position', 'strand', 'description'),
      filters = 'ensembl_gene_id',
      values = rownames(res), 
      mart = ensembl)
  geness_uniq <- distinct(geness, ensembl_gene_id, .keep_all= TRUE)

  #merge by column by common colunmn name, in the case "GENEID"
  res$ENSEMBL = rownames(res)
  identical(rownames(res), rownames(geness_uniq))
  res_df <- as.data.frame(res)
  geness_res <- merge(geness_uniq, res_df, by.x="ensembl_gene_id", by.y="ENSEMBL")
  dim(geness_res)
  rownames(geness_res) <- geness_res$ensembl_gene_id
  geness_res$ensembl_gene_id <- NULL
  write.csv(as.data.frame(geness_res[order(geness_res$pvalue),]), file = paste(i, "all.txt", sep="-"))
  up <- subset(geness_res, padj<=0.05 & log2FoldChange>=2)
  down <- subset(geness_res, padj<=0.05 & log2FoldChange<=-2)
  write.csv(as.data.frame(up[order(up$log2FoldChange,decreasing=TRUE),]), file = paste(i, "up.txt", sep="-"))
  write.csv(as.data.frame(down[order(abs(down$log2FoldChange),decreasing=TRUE),]), file = paste(i, "down.txt", sep="-"))
}


#   49751 scr_DMSO_vs_scr_Dox-all.txt
#       1 scr_DMSO_vs_scr_Dox-down.txt
#       7 scr_DMSO_vs_scr_Dox-up.txt
#   49751 sT_DMSO_vs_sT_Dox-all.txt
#       1 sT_DMSO_vs_sT_Dox-down.txt
#       1 sT_DMSO_vs_sT_Dox-up.txt
#   99512 total

#   846 EV.RNA_vs_RNA-down.id
# 18874 EV.RNA_vs_RNA-up.id

#1803 wt_EV_vs_RNA-down.txt
#20538 wt_EV_vs_RNA-up.txt



# --------------------------------------------------------
# ---- Volcano Plots with automatically finding top_g ----
#A canonical visualization for interpreting differential gene expression results is the volcano plot.
library(ggrepel)

#"EV.RNA_vs_RNA"  "scr.DMSO_vs_EV.RNA" "sT.DMSO_vs_EV.RNA" "scr.Dox_vs_EV.RNA" "sT.Dox_vs_EV.RNA"  "sT.Dox_vs_scr.Dox"  "sT.DMSO_vs_scr.DMSO"
geness_res <- read.csv(file = paste("WaGa.EV_vs_WaGa.RNA", "all.txt", sep="-"), row.names=1)
geness_res$Color <- "NS or log2FC < 2.0"
geness_res$Color[geness_res$pvalue < 0.05] <- "P < 0.05"
geness_res$Color[geness_res$padj < 0.05] <- "P-adj < 0.05"
geness_res$Color[geness_res$padj < 0.001] <- "P-adj < 0.001"
geness_res$Color[abs(geness_res$log2FoldChange) < 2.0] <- "NS or log2FC < 2.0"
geness_res$Color <- factor(geness_res$Color,
                        levels = c("NS or log2FC < 2.0", "P < 0.05",
                                   "P-adj < 0.05", "P-adj < 0.001"))

geness_res$invert_P <- (-log10(geness_res$pvalue)) * sign(geness_res$log2FoldChange)
top_g <- c()
top_g <- c(top_g, geness_res[, 'external_gene_name'][order(geness_res[, 'invert_P'], decreasing = TRUE)[1:200]], geness_res[, 'external_gene_name'][order(geness_res[, 'invert_P'], decreasing = FALSE)[1:200]])
top_g <- unique(top_g)
geness_res <- geness_res[, -1*ncol(geness_res)]  #remove invert_P from matrix


png("WaGa.EV_vs_WaGa.RNA.png",width=1400, height=1000)
ggplot(geness_res,
       aes(x = log2FoldChange, y = -log10(pvalue),
           color = Color, label = external_gene_name)) +
  geom_vline(xintercept = c(2.0, -2.0), lty = "dashed") +
  geom_hline(yintercept = -log10(0.05), lty = "dashed") +
  geom_point() +
  labs(x = "log2(FC)",
       y = "Significance, -log10(P)",
       color = "Significance") +
  scale_color_manual(values = c(`P-adj < 0.001` = "dodgerblue",
                                `P-adj < 0.05` = "lightblue",
                                `P < 0.05` = "orange2",
                                `NS or log2FC < 2.0` = "gray"),
                     guide = guide_legend(override.aes = list(size = 4))) +
  scale_y_continuous(expand = expansion(mult = c(0,0.05))) +
  geom_text_repel(data = subset(geness_res, external_gene_name %in% top_g & pvalue < 0.05 & (abs(geness_res$log2FoldChange) >= 2.0)),
                  size = 4, point.padding = 0.15, color = "black",
                  min.segment.length = .1, box.padding = .2, lwd = 2) +
  theme_bw(base_size = 16) +
  theme(legend.position = "bottom")
dev.off()


#sed -i -e 's/Color/Category/g' *_Category.csv







#x <- data.frame(k1 = c(NA,3,4,5,2), k2 = c(1,NA,4,5,2), data = 6:10)
#merge(x, y, by = "k2")

for cmp in "EV.RNA_vs_RNA"  "scr.DMSO_vs_EV.RNA" "sT.DMSO_vs_EV.RNA" "scr.Dox_vs_EV.RNA" "sT.Dox_vs_EV.RNA"  "sT.Dox_vs_scr.Dox"  "sT.DMSO_vs_scr.DMSO"; do
  echo "~/Tools/csv2xls-0.4/csv_to_xls.py ${cmp}-all.txt ${cmp}-up.txt ${cmp}-down.txt -d$',' -o ${cmp}.xls" 
done
~/Tools/csv2xls-0.4/csv_to_xls.py EV.RNA_vs_RNA-all.txt EV.RNA_vs_RNA-up.txt EV.RNA_vs_RNA-down.txt -d$',' -o EV.RNA_vs_RNA.xls
~/Tools/csv2xls-0.4/csv_to_xls.py scr.DMSO_vs_EV.RNA-all.txt scr.DMSO_vs_EV.RNA-up.txt scr.DMSO_vs_EV.RNA-down.txt -d$',' -o scr.DMSO_vs_EV.RNA.xls
~/Tools/csv2xls-0.4/csv_to_xls.py sT.DMSO_vs_EV.RNA-all.txt sT.DMSO_vs_EV.RNA-up.txt sT.DMSO_vs_EV.RNA-down.txt -d$',' -o sT.DMSO_vs_EV.RNA.xls
~/Tools/csv2xls-0.4/csv_to_xls.py scr.Dox_vs_EV.RNA-all.txt scr.Dox_vs_EV.RNA-up.txt scr.Dox_vs_EV.RNA-down.txt -d$',' -o scr.Dox_vs_EV.RNA.xls
~/Tools/csv2xls-0.4/csv_to_xls.py sT.Dox_vs_EV.RNA-all.txt sT.Dox_vs_EV.RNA-up.txt sT.Dox_vs_EV.RNA-down.txt -d$',' -o sT.Dox_vs_EV.RNA.xls
~/Tools/csv2xls-0.4/csv_to_xls.py sT.Dox_vs_scr.Dox-all.txt sT.Dox_vs_scr.Dox-up.txt sT.Dox_vs_scr.Dox-down.txt -d$',' -o sT.Dox_vs_scr.Dox.xls
~/Tools/csv2xls-0.4/csv_to_xls.py sT.DMSO_vs_scr.DMSO-all.txt sT.DMSO_vs_scr.DMSO-up.txt sT.DMSO_vs_scr.DMSO-down.txt -d$',' -o sT.DMSO_vs_scr.DMSO.xls

~/Tools/csv2xls-0.4/csv_to_xls.py WaGa.EV_vs_WaGa.RNA-all.txt WaGa.EV_vs_WaGa.RNA-up.txt WaGa.EV_vs_WaGa.RNA-down.txt -d$',' -o WaGa.EV_vs_WaGa.RNA.xls



# --------------------------------------------------------
# -- clustering the genes and draw heatmap --
./MKL1.EV_vs_MKL1.RNA-up.txt
./MKL1.sT.DMSO_vs_MKL1.EV-up.txt
./WaGa.scr.DMSO_vs_WaGa.EV-up.txt
./WaGa.sT.Dox_vs_WaGa.EV-up.txt
./WaGa.sT.DMSO_vs_WaGa.scr.DMSO-up.txt
./MKL1.scr.Dox_vs_MKL1.EV-up.txt
./MKL1.sT.Dox_vs_MKL1.EV-up.txt
./WaGa.scr.Dox_vs_WaGa.EV-up.txt
./MKL1.sT.DMSO_vs_MKL1.scr.DMSO-up.txt
./WaGa.sT.DMSO_vs_WaGa.EV-up.txt
./WaGa.EV_vs_MKL1.EV-up.txt
./WaGa.RNA_vs_MKL1.RNA-up.txt
./MKL1.sT.Dox_vs_MKL1.scr.Dox-up.txt
./WaGa.EV_vs_WaGa.RNA-up.txt
./WaGa.sT.Dox_vs_WaGa.scr.Dox-up.txt
./MKL1.scr.DMSO_vs_MKL1.EV-up.txt
./WaGa.sT.Dox_vs_WaGa.scr.Dox-down.txt
./WaGa.sT.Dox_vs_WaGa.EV-down.txt
./MKL1.sT.Dox_vs_MKL1.scr.Dox-down.txt
./WaGa.sT.DMSO_vs_WaGa.scr.DMSO-down.txt
./MKL1.sT.DMSO_vs_MKL1.scr.DMSO-down.txt
./WaGa.EV_vs_MKL1.EV-down.txt
./MKL1.scr.DMSO_vs_MKL1.EV-down.txt
./WaGa.scr.Dox_vs_WaGa.EV-down.txt
./MKL1.EV_vs_MKL1.RNA-down.txt
./MKL1.sT.Dox_vs_MKL1.EV-down.txt
./MKL1.sT.DMSO_vs_MKL1.EV-down.txt
./WaGa.scr.DMSO_vs_WaGa.EV-down.txt
./WaGa.RNA_vs_MKL1.RNA-down.txt
./WaGa.sT.DMSO_vs_WaGa.EV-down.txt
./WaGa.EV_vs_WaGa.RNA-down.txt
./MKL1.scr.Dox_vs_MKL1.EV-down.txt


install.packages("gplots")
library("gplots")

#Option3: as paper described, A heatmap showing expression values of all DEGs which are significant between any pair conditions.
all_genes <- c(rownames(mock_sT_d8_vs_mock_sT_d3_sig),rownames(sT_d3_vs_mock_sT_d3_sig),rownames(sT_d8_vs_mock_sT_d8_sig),rownames(sT_d8_vs_sT_d3_sig))     #873
all_genes <- unique(all_genes)   #663
#all_genes2 <- c(rownames(WAC_vs_mock_sig),rownames(WAP_vs_mock_sig),rownames(WAC_vs_WAP_sig))   #3917
#all_genes2 <- unique(all_genes2)   #2608
#intersected_genes <- intersect(all_genes, all_genes2)  # 2608
#RNASeq.NoCellLine <- read.csv(file ="gene_expression_keeping_condition.txt", row.names=1)
RNASeq.NoCellLine_  <- RNASeq.NoCellLine[all_genes,]
write.csv(as.data.frame(RNASeq.NoCellLine_), file ="gene_expression_keeping_condition.txt")

RNASeq.NoCellLine_ <- cbind(RNASeq.NoCellLine_, mock_sT_d3 = rowMeans(RNASeq.NoCellLine_[, 1:2]))
RNASeq.NoCellLine_ <- cbind(RNASeq.NoCellLine_, mock_sT_d8 = rowMeans(RNASeq.NoCellLine_[, 3:4]))
RNASeq.NoCellLine_ <- cbind(RNASeq.NoCellLine_, sT_d3 = rowMeans(RNASeq.NoCellLine_[, 5:6]))
RNASeq.NoCellLine_ <- cbind(RNASeq.NoCellLine_, sT_d8 = rowMeans(RNASeq.NoCellLine_[, 7:8]))
RNASeq.NoCellLine_ <- RNASeq.NoCellLine_[,c(-1:-8)]        #663x4
#RNASeq.NoCellLine__ <- read.csv(file ="gene_expression_keeping_condition.txt", row.names=1)
write.csv(as.data.frame(RNASeq.NoCellLine_), file ="gene_expression_merging_condition.txt")


cut -d',' -f1-1 ./sT.DMSO_vs_scr.DMSO-up.txt > sT.DMSO_vs_scr.DMSO-up.id
cut -d',' -f1-1 ./scr.DMSO_vs_EV.RNA-up.txt > scr.DMSO_vs_EV.RNA-up.id
cut -d',' -f1-1 ./sT.DMSO_vs_EV.RNA-up.txt > sT.DMSO_vs_EV.RNA-up.id
cut -d',' -f1-1 ./sT.Dox_vs_scr.Dox-up.txt > sT.Dox_vs_scr.Dox-up.id
cut -d',' -f1-1 ./sT.Dox_vs_EV.RNA-up.txt > sT.Dox_vs_EV.RNA-up.id
cut -d',' -f1-1 ./scr.Dox_vs_EV.RNA-up.txt > scr.Dox_vs_EV.RNA-up.id
cut -d',' -f1-1 ./EV.RNA_vs_RNA-up.txt > EV.RNA_vs_RNA-up.id
cut -d',' -f1-1 ./sT.DMSO_vs_scr.DMSO-down.txt > sT.DMSO_vs_scr.DMSO-down.id
cut -d',' -f1-1 ./scr.DMSO_vs_EV.RNA-down.txt > scr.DMSO_vs_EV.RNA-down.id
cut -d',' -f1-1 ./sT.DMSO_vs_EV.RNA-down.txt > sT.DMSO_vs_EV.RNA-down.id
cut -d',' -f1-1 ./sT.Dox_vs_scr.Dox-down.txt > sT.Dox_vs_scr.Dox-down.id
cut -d',' -f1-1 ./sT.Dox_vs_EV.RNA-down.txt > sT.Dox_vs_EV.RNA-down.id
cut -d',' -f1-1 ./scr.Dox_vs_EV.RNA-down.txt > scr.Dox_vs_EV.RNA-down.id
cut -d',' -f1-1 ./EV.RNA_vs_RNA-down.txt > EV.RNA_vs_RNA-down.id


#MKL-1    571 MKL1_EV_RNA_vs_MKL1_RNA-down.id
#MKL-1  16443 MKL1_EV_RNA_vs_MKL1_RNA-up.id
    92 WaGa_EV_RNA_vs_MKL1_EV_RNA-down.id
   114 WaGa_EV_RNA_vs_MKL1_EV_RNA-up.id
   324 WaGa_EV_RNA_vs_WaGa_RNA-down.id
 19911 WaGa_EV_RNA_vs_WaGa_RNA-up.id
#-- MKL-1 vs WaGa --
 #  690 WaGa_RNA_vs_MKL1_RNA-down.id
   761 WaGa_RNA_vs_MKL1_RNA-up.id
 38906 total
38906 ids










#-- MKL-1 --
#add Gene_Id in the first line.
GOI <- read.csv("ids")$Gene_Id
RNASeq.NoCellLine <- assay(rld)

#clustering methods: "ward.D", "ward.D2", "single", "complete", "average" (= UPGMA), "mcquitty" (= WPGMA), "median" (= WPGMC) or "centroid" (= UPGMC).  pearson or spearman
datamat = RNASeq.NoCellLine[GOI, ]
hr <- hclust(as.dist(1-cor(t(datamat), method="pearson")), method="complete")
hc <- hclust(as.dist(1-cor(datamat, method="spearman")), method="complete")
mycl = cutree(hr, h=max(hr$height)/1.05)
mycol = c("YELLOW", "DARKBLUE", "DARKORANGE", "DARKMAGENTA", "DARKCYAN", "DARKRED",  "MAROON", "DARKGREEN", "LIGHTBLUE", "PINK", "MAGENTA", "LIGHTCYAN","LIGHTGREEN", "BLUE", "ORANGE", "CYAN", "RED", "GREEN");

mycol = mycol[as.vector(mycl)]
sampleCols <- rep('GREY',ncol(datamat))
names(sampleCols) <- c("RNA","RNA 118","RNA 147","EV RNA","EV RNA 2","EV RNA 118","EV RNA 87","EV RNA 27","EV RNA 042","sT DMSO 042","sT DMSO 0505","scr DMSO 042","scr DMSO 0505","sT Dox 042","sT Dox 0505","scr Dox 042","scr Dox 0505")
#sampleCols[substr(colnames(RNASeq.NoCellLine_),1,4)=='mock'] <- 'GREY'

sampleCols["RNA"] <- 'DARKBLUE'
sampleCols["RNA 118"] <- 'DARKBLUE'
sampleCols["RNA 147"] <- 'DARKBLUE'

sampleCols["EV RNA"] <- 'DARKRED'
sampleCols["EV RNA 2"] <- 'DARKRED'
sampleCols["EV RNA 118"] <- 'DARKRED'
sampleCols["EV RNA 87"] <- 'DARKRED'
sampleCols["EV RNA 27"] <- 'DARKRED'
sampleCols["EV RNA 042"] <- 'DARKRED'

sampleCols["sT DMSO 042"] <- 'DARKORANGE'
sampleCols["sT DMSO 0505"] <- 'DARKORANGE'
sampleCols["scr DMSO 042"] <- 'DARKORANGE'
sampleCols["scr DMSO 0505"] <- 'DARKORANGE'

sampleCols["sT Dox 042"] <- 'DARKGREEN'
sampleCols["sT Dox 0505"] <- 'DARKGREEN'
sampleCols["scr Dox 042"] <- 'DARKGREEN'
sampleCols["scr Dox 0505"] <- 'DARKGREEN'

png("DEGs_heatmap.png", width=1000, height=1200)
heatmap.2(as.matrix(datamat),Rowv=as.dendrogram(hr),Colv = NA, dendrogram = 'row',
            scale='row',trace='none',col=bluered(75), 
            RowSideColors = mycol, ColSideColors = sampleCols, labRow="", margins=c(22,10), cexRow=8, cexCol=2, srtCol=45, lwid=c(1,7), lhei = c(1, 8))
legend("top", title = "",legend=c("WaGa_RNA","MKL1_RNA","WaGa_EV_RNA","MKL1_EV_RNA"), fill=c("DARKBLUE","DARKRED","DARKORANGE","DARKGREEN"), cex=0.8, box.lty=0)
dev.off()






#-- MKL-1 vs WaGa --
#add Gene_Id in the first line.
GOI <- read.csv("ids")$Gene_Id
RNASeq.NoCellLine <- assay(rld)

#clustering methods: "ward.D", "ward.D2", "single", "complete", "average" (= UPGMA), "mcquitty" (= WPGMA), "median" (= WPGMC) or "centroid" (= UPGMC).  pearson or spearman
datamat = RNASeq.NoCellLine[GOI, ]
hr <- hclust(as.dist(1-cor(t(datamat), method="pearson")), method="complete")
hc <- hclust(as.dist(1-cor(datamat, method="spearman")), method="complete")
mycl = cutree(hr, h=max(hr$height)/1.05)
mycol = c("YELLOW", "DARKBLUE", "DARKORANGE", "DARKMAGENTA", "DARKCYAN", "DARKRED",  "MAROON", "DARKGREEN", "LIGHTBLUE", "PINK", "MAGENTA", "LIGHTCYAN","LIGHTGREEN", "BLUE", "ORANGE", "CYAN", "RED", "GREEN");

mycol = mycol[as.vector(mycl)]
sampleCols <- rep('GREY',ncol(datamat))
names(sampleCols) <- c("WaGa_RNA","WaGa_RNA_118","WaGa_RNA_147",  "MKL1_RNA","MKL1_RNA_118","MKL1_RNA_147",  "WaGa_EV_RNA","WaGa_EV_RNA_2","WaGa_EV_RNA_118","WaGa_EV_RNA_147","WaGa_EV_RNA_226",  "MKL1_EV_RNA","MKL1_EV_RNA_2","MKL1_EV_RNA_27","MKL1_EV_RNA_87","MKL1_EV_RNA_118")
#sampleCols[substr(colnames(RNASeq.NoCellLine_),1,4)=='mock'] <- 'GREY'

sampleCols["WaGa_RNA"] <- 'DARKBLUE'
sampleCols["WaGa_RNA_118"] <- 'DARKBLUE'
sampleCols["WaGa_RNA_147"] <- 'DARKBLUE'

sampleCols["MKL1_RNA"] <- 'DARKRED'
sampleCols["MKL1_RNA_118"] <- 'DARKRED'
sampleCols["MKL1_RNA_147"] <- 'DARKRED'

sampleCols["WaGa_EV_RNA"] <- 'DARKORANGE'
sampleCols["WaGa_EV_RNA_2"] <- 'DARKORANGE'
sampleCols["WaGa_EV_RNA_118"] <- 'DARKORANGE'
sampleCols["WaGa_EV_RNA_147"] <- 'DARKORANGE'
sampleCols["WaGa_EV_RNA_226"] <- 'DARKORANGE'

sampleCols["MKL1_EV_RNA"] <- 'DARKGREEN'
sampleCols["MKL1_EV_RNA_2"] <- 'DARKGREEN'
sampleCols["MKL1_EV_RNA_27"] <- 'DARKGREEN'
sampleCols["MKL1_EV_RNA_87"] <- 'DARKGREEN'
sampleCols["MKL1_EV_RNA_118"] <- 'DARKGREEN'

png("DEGs_heatmap.png", width=1000, height=1200)
heatmap.2(as.matrix(datamat),Rowv=as.dendrogram(hr),Colv = NA, dendrogram = 'row',
            scale='row',trace='none',col=bluered(75), 
            RowSideColors = mycol, ColSideColors = sampleCols, labRow="", margins=c(22,10), cexRow=8, cexCol=2, srtCol=45, lwid=c(1,7), lhei = c(1, 8))
legend("top", title = "",legend=c("WaGa_RNA","MKL1_RNA","WaGa_EV_RNA","MKL1_EV_RNA"), fill=c("DARKBLUE","DARKRED","DARKORANGE","DARKGREEN"), cex=0.8, box.lty=0)
dev.off()





#-- cluster members --
#c("YELLOW", "DARKBLUE", "DARKORANGE", "DARKMAGENTA", "DARKCYAN", "DARKRED",  "MAROON", "DARKGREEN", "LIGHTBLUE", "PINK", "MAGENTA", "LIGHTCYAN","LIGHTGREEN", "BLUE", "ORANGE", "CYAN", "RED", "GREEN");
write.csv(names(subset(mycl, mycl == '1')),file='cluster1_YELLOW.txt')
write.csv(names(subset(mycl, mycl == '2')),file='cluster2_DARKBLUE.txt') 
write.csv(names(subset(mycl, mycl == '3')),file='cluster3_DARKORANGE.txt')  
write.csv(names(subset(mycl, mycl == '4')),file='cluster4_DARKMAGENTA.txt') 
write.csv(names(subset(mycl, mycl == '5')),file='cluster5_DARKCYAN.txt')  
#~/Tools/csv2xls-0.4/csv_to_xls.py cluster*.txt -d',' -o genelist_clusters.xls






# ------------------
# ---- Pathways ----
mkdir pathways

#--continue from BREAK POINT--
##
#source("https://bioconductor.org/biocLite.R") 
#biocLite("AnnotationDbi")
library("clusterProfiler")
library("ReactomePA")
setwd("~/DATA/Data_Anastasia_RNASeq/results/featureCounts/pathways")

for sample in WaGa_RNA_vs_MKL1_RNA MKL1_EV_RNA_vs_MKL1_RNA WaGa_EV_RNA_vs_MKL1_EV_RNA WaGa_EV_RNA_vs_WaGa_RNA; do \
echo "${sample}_up <- read.csv('../degenes/${sample}-up.txt', row.names=1)"
echo "${sample}_up_KEGG <- enrichKEGG(${sample}_up\$entrezgene_id)"
echo "write.table(as.data.frame(${sample}_up_KEGG), file = '${sample}_up_KEGG.txt', sep = '\t', row.names = FALSE)"
echo "${sample}_down <- read.csv('../degenes/${sample}-down.txt', row.names=1)"
echo "${sample}_down_KEGG <- enrichKEGG(${sample}_down\$entrezgene_id)"
echo "write.table(as.data.frame(${sample}_down_KEGG), file = '${sample}_down_KEGG.txt', sep = '\t', row.names = FALSE)"
echo "${sample}_sig <- rbind(${sample}_up, ${sample}_down)"
echo "${sample}_sig_KEGG <- enrichKEGG(${sample}_sig\$entrezgene_id)"
echo "write.table(as.data.frame(${sample}_sig_KEGG), file = '${sample}_sig_KEGG.txt', sep = '\t', row.names = FALSE)"
done


WaGa_RNA_vs_MKL1_RNA_up <- read.csv('../degenes/WaGa_RNA_vs_MKL1_RNA-up.txt', row.names=1)                                                                                                               
WaGa_RNA_vs_MKL1_RNA_up_KEGG <- enrichKEGG(WaGa_RNA_vs_MKL1_RNA_up$entrezgene_id)                                                                                                                        
write.table(as.data.frame(WaGa_RNA_vs_MKL1_RNA_up_KEGG), file = 'WaGa_RNA_vs_MKL1_RNA_up_KEGG.txt', sep = '\t', row.names = FALSE)                                                                       
WaGa_RNA_vs_MKL1_RNA_down <- read.csv('../degenes/WaGa_RNA_vs_MKL1_RNA-down.txt', row.names=1)                                                                                                           
WaGa_RNA_vs_MKL1_RNA_down_KEGG <- enrichKEGG(WaGa_RNA_vs_MKL1_RNA_down$entrezgene_id)                                                                                                                    
write.table(as.data.frame(WaGa_RNA_vs_MKL1_RNA_down_KEGG), file = 'WaGa_RNA_vs_MKL1_RNA_down_KEGG.txt', sep = '\t', row.names = FALSE)                                                                   
WaGa_RNA_vs_MKL1_RNA_sig <- rbind(WaGa_RNA_vs_MKL1_RNA_up, WaGa_RNA_vs_MKL1_RNA_down)
WaGa_RNA_vs_MKL1_RNA_sig_KEGG <- enrichKEGG(WaGa_RNA_vs_MKL1_RNA_sig$entrezgene_id)
write.table(as.data.frame(WaGa_RNA_vs_MKL1_RNA_sig_KEGG), file = 'WaGa_RNA_vs_MKL1_RNA_sig_KEGG.txt', sep = '\t', row.names = FALSE)
MKL1_EV_RNA_vs_MKL1_RNA_up <- read.csv('../degenes/MKL1_EV_RNA_vs_MKL1_RNA-up.txt', row.names=1)
MKL1_EV_RNA_vs_MKL1_RNA_up_KEGG <- enrichKEGG(MKL1_EV_RNA_vs_MKL1_RNA_up$entrezgene_id)
write.table(as.data.frame(MKL1_EV_RNA_vs_MKL1_RNA_up_KEGG), file = 'MKL1_EV_RNA_vs_MKL1_RNA_up_KEGG.txt', sep = '\t', row.names = FALSE)
MKL1_EV_RNA_vs_MKL1_RNA_down <- read.csv('../degenes/MKL1_EV_RNA_vs_MKL1_RNA-down.txt', row.names=1)
MKL1_EV_RNA_vs_MKL1_RNA_down_KEGG <- enrichKEGG(MKL1_EV_RNA_vs_MKL1_RNA_down$entrezgene_id)
write.table(as.data.frame(MKL1_EV_RNA_vs_MKL1_RNA_down_KEGG), file = 'MKL1_EV_RNA_vs_MKL1_RNA_down_KEGG.txt', sep = '\t', row.names = FALSE)
MKL1_EV_RNA_vs_MKL1_RNA_sig <- rbind(MKL1_EV_RNA_vs_MKL1_RNA_up, MKL1_EV_RNA_vs_MKL1_RNA_down)
MKL1_EV_RNA_vs_MKL1_RNA_sig_KEGG <- enrichKEGG(MKL1_EV_RNA_vs_MKL1_RNA_sig$entrezgene_id)
write.table(as.data.frame(MKL1_EV_RNA_vs_MKL1_RNA_sig_KEGG), file = 'MKL1_EV_RNA_vs_MKL1_RNA_sig_KEGG.txt', sep = '\t', row.names = FALSE)
WaGa_EV_RNA_vs_MKL1_EV_RNA_up <- read.csv('../degenes/WaGa_EV_RNA_vs_MKL1_EV_RNA-up.txt', row.names=1)
WaGa_EV_RNA_vs_MKL1_EV_RNA_up_KEGG <- enrichKEGG(WaGa_EV_RNA_vs_MKL1_EV_RNA_up$entrezgene_id)
write.table(as.data.frame(WaGa_EV_RNA_vs_MKL1_EV_RNA_up_KEGG), file = 'WaGa_EV_RNA_vs_MKL1_EV_RNA_up_KEGG.txt', sep = '\t', row.names = FALSE)
WaGa_EV_RNA_vs_MKL1_EV_RNA_down <- read.csv('../degenes/WaGa_EV_RNA_vs_MKL1_EV_RNA-down.txt', row.names=1)
WaGa_EV_RNA_vs_MKL1_EV_RNA_down_KEGG <- enrichKEGG(WaGa_EV_RNA_vs_MKL1_EV_RNA_down$entrezgene_id)
write.table(as.data.frame(WaGa_EV_RNA_vs_MKL1_EV_RNA_down_KEGG), file = 'WaGa_EV_RNA_vs_MKL1_EV_RNA_down_KEGG.txt', sep = '\t', row.names = FALSE)
WaGa_EV_RNA_vs_MKL1_EV_RNA_sig <- rbind(WaGa_EV_RNA_vs_MKL1_EV_RNA_up, WaGa_EV_RNA_vs_MKL1_EV_RNA_down)
WaGa_EV_RNA_vs_MKL1_EV_RNA_sig_KEGG <- enrichKEGG(WaGa_EV_RNA_vs_MKL1_EV_RNA_sig$entrezgene_id)
write.table(as.data.frame(WaGa_EV_RNA_vs_MKL1_EV_RNA_sig_KEGG), file = 'WaGa_EV_RNA_vs_MKL1_EV_RNA_sig_KEGG.txt', sep = '\t', row.names = FALSE)
WaGa_EV_RNA_vs_WaGa_RNA_up <- read.csv('../degenes/WaGa_EV_RNA_vs_WaGa_RNA-up.txt', row.names=1)
WaGa_EV_RNA_vs_WaGa_RNA_up_KEGG <- enrichKEGG(WaGa_EV_RNA_vs_WaGa_RNA_up$entrezgene_id)
write.table(as.data.frame(WaGa_EV_RNA_vs_WaGa_RNA_up_KEGG), file = 'WaGa_EV_RNA_vs_WaGa_RNA_up_KEGG.txt', sep = '\t', row.names = FALSE)
WaGa_EV_RNA_vs_WaGa_RNA_down <- read.csv('../degenes/WaGa_EV_RNA_vs_WaGa_RNA-down.txt', row.names=1)
WaGa_EV_RNA_vs_WaGa_RNA_down_KEGG <- enrichKEGG(WaGa_EV_RNA_vs_WaGa_RNA_down$entrezgene_id)
write.table(as.data.frame(WaGa_EV_RNA_vs_WaGa_RNA_down_KEGG), file = 'WaGa_EV_RNA_vs_WaGa_RNA_down_KEGG.txt', sep = '\t', row.names = FALSE)
WaGa_EV_RNA_vs_WaGa_RNA_sig <- rbind(WaGa_EV_RNA_vs_WaGa_RNA_up, WaGa_EV_RNA_vs_WaGa_RNA_down)
WaGa_EV_RNA_vs_WaGa_RNA_sig_KEGG <- enrichKEGG(WaGa_EV_RNA_vs_WaGa_RNA_sig$entrezgene_id)
write.table(as.data.frame(WaGa_EV_RNA_vs_WaGa_RNA_sig_KEGG), file = 'WaGa_EV_RNA_vs_WaGa_RNA_sig_KEGG.txt', sep = '\t', row.names = FALSE)


png("pathways_KEGG.png",width=1260, height=1000)
merged_list <- merge_result(list('WaGa_RNA vs MKL1_RNA'=WaGa_RNA_vs_MKL1_RNA_sig_KEGG, 'MKL1_EV_RNA vs MKL1_RNA'=MKL1_EV_RNA_vs_MKL1_RNA_sig_KEGG, 'WaGa_EV_RNA vs MKL1_EV_RNA'=WaGa_EV_RNA_vs_MKL1_EV_RNA_sig_KEGG, 'WaGa_EV_RNA vs WaGa_RNA'=WaGa_EV_RNA_vs_WaGa_RNA_sig_KEGG))
dotplot(merged_list, showCategory=1000)    #, font.size=6, srt = 35
dev.off()


# under CONSOLE
cd pathways_KEGG
~/Tools/csv2xls-0.4/csv_to_xls.py WaGa_RNA_vs_MKL1_RNA_sig_KEGG.txt MKL1_EV_RNA_vs_MKL1_RNA_sig_KEGG.txt WaGa_EV_RNA_vs_MKL1_EV_RNA_sig_KEGG.txt WaGa_EV_RNA_vs_WaGa_RNA_sig_KEGG.txt -d$'\t' -o pathways_KEGG.xls




# -------------
# ---- GOs ----
setwd("~/DATA/Data_Anastasia_RNASeq/results/featureCounts/GOs")
for sample in IKK1_vs_Cre IKK2_vs_Cre Nemo_vs_Cre NIK_vs_Cre; do \
echo "${sample}_up <- read.csv('../degenes/${sample}-up.txt', row.names=1)"
echo "${sample}_up_GO_BP <- enrichGO(${sample}_up\$entrezgene_id, 'org.Mm.eg.db', ont='BP')"
echo "${sample}_up_GO_MF <- enrichGO(${sample}_up\$entrezgene_id, 'org.Mm.eg.db', ont='MF')"
echo "${sample}_up_GO_CC <- enrichGO(${sample}_up\$entrezgene_id, 'org.Mm.eg.db', ont='CC')"
#echo "${sample}_up_GO_ALL <- enrichGO(${sample}_up\$entrezgene_id, 'org.Mm.eg.db', ont='ALL')"
echo "write.table(as.data.frame(${sample}_up_GO_BP), file = '${sample}_up_GO_BP.txt', sep = '\t', row.names = FALSE)"
echo "write.table(as.data.frame(${sample}_up_GO_MF), file = '${sample}_up_GO_MF.txt', sep = '\t', row.names = FALSE)"
echo "write.table(as.data.frame(${sample}_up_GO_CC), file = '${sample}_up_GO_CC.txt', sep = '\t', row.names = FALSE)"
#echo "write.table(as.data.frame(${sample}_up_GO_ALL), file = '${sample}_up_GO_ALL.txt', sep = '\t', row.names = FALSE)"
echo "${sample}_down <- read.csv('../degenes/${sample}-down.txt', row.names=1)"
echo "${sample}_down_GO_BP <- enrichGO(${sample}_down\$entrezgene_id, 'org.Mm.eg.db', ont='BP')"
echo "${sample}_down_GO_MF <- enrichGO(${sample}_down\$entrezgene_id, 'org.Mm.eg.db', ont='MF')"
echo "${sample}_down_GO_CC <- enrichGO(${sample}_down\$entrezgene_id, 'org.Mm.eg.db', ont='CC')"
#echo "${sample}_down_GO_ALL <- enrichGO(${sample}_down\$entrezgene_id, 'org.Mm.eg.db', ont='ALL')"
echo "write.table(as.data.frame(${sample}_down_GO_BP), file = '${sample}_down_GO_BP.txt', sep = '\t', row.names = FALSE)"
echo "write.table(as.data.frame(${sample}_down_GO_MF), file = '${sample}_down_GO_MF.txt', sep = '\t', row.names = FALSE)"
echo "write.table(as.data.frame(${sample}_down_GO_CC), file = '${sample}_down_GO_CC.txt', sep = '\t', row.names = FALSE)"
#echo "write.table(as.data.frame(${sample}_down_GO_ALL), file = '${sample}_down_GO_ALL.txt', sep = '\t', row.names = FALSE)"
echo "${sample}_sig <- rbind(${sample}_up, ${sample}_down)"
echo "${sample}_sig_GO_BP <- enrichGO(${sample}_sig\$entrezgene_id, 'org.Mm.eg.db', ont='BP')"
echo "${sample}_sig_GO_MF <- enrichGO(${sample}_sig\$entrezgene_id, 'org.Mm.eg.db', ont='MF')"
echo "${sample}_sig_GO_CC <- enrichGO(${sample}_sig\$entrezgene_id, 'org.Mm.eg.db', ont='CC')"
#echo "${sample}_sig_GO_ALL <- enrichGO(${sample}_sig\$entrezgene_id, 'org.Mm.eg.db', ont='ALL')"
echo "write.table(as.data.frame(${sample}_sig_GO_BP), file = '${sample}_sig_GO_BP.txt', sep = '\t', row.names = FALSE)"
echo "write.table(as.data.frame(${sample}_sig_GO_MF), file = '${sample}_sig_GO_MF.txt', sep = '\t', row.names = FALSE)"
echo "write.table(as.data.frame(${sample}_sig_GO_CC), file = '${sample}_sig_GO_CC.txt', sep = '\t', row.names = FALSE)"
#echo "write.table(as.data.frame(${sample}_sig_GO_ALL), file = '${sample}_sig_GO_ALL.txt', sep = '\t', row.names = FALSE)"
done

png("GOs_BP.png",width=3000, height=24000)
merged_list <- merge_result(list('IKK1_vs_Cre_up'=IKK1_vs_Cre_up_GO_BP, 'IKK1_vs_Cre_down'=IKK1_vs_Cre_down_GO_BP, 'IKK1_vs_Cre_sig'=IKK1_vs_Cre_sig_GO_BP, 'IKK2_vs_Cre_up'=IKK2_vs_Cre_up_GO_BP, 'IKK2_vs_Cre_down'=IKK2_vs_Cre_down_GO_BP, 'IKK2_vs_Cre_sig'=IKK2_vs_Cre_sig_GO_BP, 'Nemo_vs_Cre_up'= Nemo_vs_Cre_up_GO_BP, 'Nemo_vs_Cre_down'=Nemo_vs_Cre_down_GO_BP, 'Nemo_vs_Cre_sig'=Nemo_vs_Cre_sig_GO_BP, 'NIK_vs_Cre_up'=NIK_vs_Cre_up_GO_BP, 'NIK_vs_Cre_down'=NIK_vs_Cre_down_GO_BP, 'NIK_vs_Cre_sig'=NIK_vs_Cre_sig_GO_BP))
dotplot(merged_list, showCategory=10000)
dev.off()

# under CONSOLE
#   
cd GOs
~/Tools/csv2xls-0.4/csv_to_xls.py IKK1_vs_Cre_up_GO_BP.txt IKK1_vs_Cre_down_GO_BP.txt IKK1_vs_Cre_sig_GO_BP.txt  IKK2_vs_Cre_up_GO_BP.txt IKK2_vs_Cre_down_GO_BP.txt IKK2_vs_Cre_sig_GO_BP.txt  Nemo_vs_Cre_up_GO_BP.txt Nemo_vs_Cre_down_GO_BP.txt Nemo_vs_Cre_sig_GO_BP.txt  NIK_vs_Cre_up_GO_BP.txt NIK_vs_Cre_down_GO_BP.txt NIK_vs_Cre_sig_GO_BP.txt  -d$'\t' -o GOs_BP.xls
~/Tools/csv2xls-0.4/csv_to_xls.py IKK1_vs_Cre_up_GO_MF.txt IKK1_vs_Cre_down_GO_MF.txt IKK1_vs_Cre_sig_GO_MF.txt  IKK2_vs_Cre_up_GO_MF.txt IKK2_vs_Cre_down_GO_MF.txt IKK2_vs_Cre_sig_GO_MF.txt  Nemo_vs_Cre_up_GO_MF.txt Nemo_vs_Cre_down_GO_MF.txt Nemo_vs_Cre_sig_GO_MF.txt  NIK_vs_Cre_up_GO_MF.txt NIK_vs_Cre_down_GO_MF.txt NIK_vs_Cre_sig_GO_MF.txt  -d$'\t' -o GOs_MF.xls
~/Tools/csv2xls-0.4/csv_to_xls.py IKK1_vs_Cre_up_GO_CC.txt IKK1_vs_Cre_down_GO_CC.txt IKK1_vs_Cre_sig_GO_CC.txt  IKK2_vs_Cre_up_GO_CC.txt IKK2_vs_Cre_down_GO_CC.txt IKK2_vs_Cre_sig_GO_CC.txt  Nemo_vs_Cre_up_GO_CC.txt Nemo_vs_Cre_down_GO_CC.txt Nemo_vs_Cre_sig_GO_CC.txt  NIK_vs_Cre_up_GO_CC.txt NIK_vs_Cre_down_GO_CC.txt NIK_vs_Cre_sig_GO_CC.txt  -d$'\t' -o GOs_CC.xls
like unlike
点赞本文的读者
还没有人对此文章表态
本文有评论

没有评论
RNA-Seq R-scripts for MKL-1 and WaGa cell lines

The pipeline finished successfully, but the following samples were skipped,

本文有评论

看文章，发评论，不要沉默

最受欢迎文章

最新文章

最多评论文章

推荐相似文章