gene_x 0 like s 347 view s
Tags: sequencing, pipeline
Draw plots for piRNA generated by COMPSRA
Generate the following files according to STEPS 1-4 from http://xgenes.com/article/article-content/239/small-rna-sequencing-processing-in-the-example-of-smallrna-7/, http://xgenes.com/article/article-content/232/small-rna-sequencing-processing-in-the-example-of-smallrna-7/, and http://xgenes.com/article/article-content/156/small-rna-processing/. For COMPSRA_MERGE_0_miRNA.txt, we also need STEP 5 to add the read numbers of MCPyV-M1.
COMPSRA_MERGE_0_miRNA.txt *
COMPSRA_MERGE_0_piRNA.txt *
COMPSRA_MERGE_0_snRNA.txt *
COMPSRA_MERGE_0_tRNA.txt
COMPSRA_MERGE_0_snoRNA.txt
COMPSRA_MERGE_0_circRNA.txt
Input files for piRNA are two files: COMPSRA_MERGE_0_piRNA.txt and ids
COMPSRA_MERGE_0_piRNA.txt
#The former are more precise due to the reads from virus will be mapped on the virus-genome diff ./our_out_on_hg38+JN707599/COMPSRA_MERGE_0_piRNA.txt ./our_out_on_hg38/COMPSRA_MERGE_0_piRNA.txt diff ./our_out_on_hg38+JN707599/COMPSRA_MERGE_0_snRNA.txt ./our_out_on_hg38/COMPSRA_MERGE_0_snRNA.txt cp ../our_out_on_hg38+JN707599/COMPSRA_MERGE_0_piRNA.txt .
prepare the file ids
#see Option4: manully defining
Draw plots with R using DESeq2
#BiocManager::install("AnnotationDbi")
#BiocManager::install("clusterProfiler")
#BiocManager::install(c("ReactomePA","org.Hs.eg.db"))
#BiocManager::install("limma")
library("AnnotationDbi")
library("clusterProfiler")
library("ReactomePA")
library("org.Hs.eg.db")
library(DESeq2)
library(gplots)
library(limma)
# Check the current library paths
.libPaths()
setwd("/home/jhuang/DATA/Data_Ute/Data_Ute_smallRNA_7/piRNA_WaGa_2024")
d.raw<- read.delim2("COMPSRA_MERGE_0_piRNA.txt",sep="\t", header=TRUE, row.names=1)
d.raw$X <- NULL
d.raw[] <- lapply(d.raw, as.numeric)
EV_or_parental = as.factor(c("EV","EV", "EV","EV", "EV","EV", "EV","EV", "EV","EV", "parental","parental"))
donor = as.factor(c("0505","1905", "0505","1905", "0505","1905", "0505","1905", "0505","1905", "0505","1905"))
replicates = as.factor(c("sT_DMSO","sT_DMSO", "sT_Dox","sT_Dox", "scr_DMSO","scr_DMSO", "scr_Dox","scr_Dox", "wt","wt", "control","control"))
ids = as.factor(c("0505_WaGa_sT_DMSO","1905_WaGa_sT_DMSO","0505_WaGa_sT_Dox","1905_WaGa_sT_Dox","0505_WaGa_scr_DMSO","1905_WaGa_scr_DMSO","0505_WaGa_scr_Dox","1905_WaGa_scr_Dox","0505_WaGa_wt","1905_WaGa_wt","control_MKL1","control_WaGa"))
cData = data.frame(row.names=colnames(d.raw), replicates=replicates, ids=ids, donor=donor, EV_or_parental=EV_or_parental)
dds<-DESeqDataSetFromMatrix(countData=d.raw, colData=cData, design=~replicates+donor)
rld <- rlogTransformation(dds)
# -- before pca --
png("pca.png", 1200, 800)
plotPCA(rld, intgroup=c("replicates"))
#plotPCA(rld, intgroup = c("replicates", "batch"))
#plotPCA(rld, intgroup = c("replicates", "ids"))
#plotPCA(rld, "batch")
dev.off()
png("pca2.png", 1200, 800)
plotPCA(rld, intgroup=c("donor"))
dev.off()
#### STEP2: DEGs ####
#convert bam to bigwig using deepTools by feeding inverse of DESeq’s size Factor
sizeFactors(dds)
#NULL
dds <- estimateSizeFactors(dds)
sizeFactors(dds)
normalized_counts <- counts(dds, normalized=TRUE)
write.table(normalized_counts, file="normalized_counts.txt", sep="\t", quote=F, col.names=NA)
#---- * to untreated ----
dds<-DESeqDataSetFromMatrix(countData=d.raw, colData=cData, design=~EV_or_parental+donor)
dds$EV_or_parental <- relevel(dds$EV_or_parental, "parental")
dds = DESeq(dds, betaPrior=FALSE)
resultsNames(dds)
clist <- c("EV_vs_parental")
for (i in clist) {
contrast = paste("EV_or_parental", i, sep="_")
res = results(dds, name=contrast)
res <- res[!is.na(res$log2FoldChange),]
#https://bioconductor.org/packages/release/bioc/vignettes/DESeq2/inst/doc/DESeq2.html#why-are-some-p-values-set-to-na
res$padj <- ifelse(is.na(res$padj), 1, res$padj)
res_df <- as.data.frame(res)
write.csv(as.data.frame(res_df[order(res_df$pvalue),]), file = paste(i, "all.txt", sep="-"))
up <- subset(res_df, padj<=0.1 & log2FoldChange>=2)
down <- subset(res_df, padj<=0.1 & log2FoldChange<=-2)
write.csv(as.data.frame(up[order(up$log2FoldChange,decreasing=TRUE),]), file = paste(i, "up.txt", sep="-"))
write.csv(as.data.frame(down[order(abs(down$log2FoldChange),decreasing=TRUE),]), file = paste(i, "down.txt", sep="-"))
}
#~/Tools/csv2xls-0.4/csv_to_xls.py EV_vs_parental-all.txt EV_vs_parental-up.txt EV_vs_parental-down.txt -d$',' -o EV_vs_parental.xls;
dds<-DESeqDataSetFromMatrix(countData=d.raw, colData=cData, design=~replicates+donor)
dds$replicates <- relevel(dds$replicates, "sT_DMSO")
dds = DESeq(dds, betaPrior=FALSE)
resultsNames(dds)
clist <- c("sT_Dox_vs_sT_DMSO")
dds$replicates <- relevel(dds$replicates, "scr_Dox")
dds = DESeq(dds, betaPrior=FALSE)
resultsNames(dds)
clist <- c("sT_Dox_vs_scr_Dox")
dds$replicates <- relevel(dds$replicates, "scr_DMSO")
dds = DESeq(dds, betaPrior=FALSE)
resultsNames(dds)
clist <- c("scr_Dox_vs_scr_DMSO", "sT_DMSO_vs_scr_DMSO")
for (i in clist) {
contrast = paste("replicates", i, sep="_")
res = results(dds, name=contrast)
res <- res[!is.na(res$log2FoldChange),]
#https://bioconductor.org/packages/release/bioc/vignettes/DESeq2/inst/doc/DESeq2.html#why-are-some-p-values-set-to-na
res$padj <- ifelse(is.na(res$padj), 1, res$padj)
res_df <- as.data.frame(res)
write.csv(as.data.frame(res_df[order(res_df$pvalue),]), file = paste(i, "all.txt", sep="-"))
up <- subset(res_df, padj<=0.1 & log2FoldChange>=2)
down <- subset(res_df, padj<=0.1 & log2FoldChange<=-2)
write.csv(as.data.frame(up[order(up$log2FoldChange,decreasing=TRUE),]), file = paste(i, "up.txt", sep="-"))
write.csv(as.data.frame(down[order(abs(down$log2FoldChange),decreasing=TRUE),]), file = paste(i, "down.txt", sep="-"))
}
~/Tools/csv2xls-0.4/csv_to_xls.py \
sT_Dox_vs_sT_DMSO-all.txt \
sT_Dox_vs_sT_DMSO-up.txt \
sT_Dox_vs_sT_DMSO-down.txt \
-d$',' -o sT_Dox_vs_sT_DMSO.xls;
~/Tools/csv2xls-0.4/csv_to_xls.py \
sT_Dox_vs_scr_Dox-all.txt \
sT_Dox_vs_scr_Dox-up.txt \
sT_Dox_vs_scr_Dox-down.txt \
-d$',' -o sT_Dox_vs_scr_Dox.xls;
~/Tools/csv2xls-0.4/csv_to_xls.py \
scr_Dox_vs_scr_DMSO-all.txt \
scr_Dox_vs_scr_DMSO-up.txt \
scr_Dox_vs_scr_DMSO-down.txt \
-d$',' -o scr_Dox_vs_scr_DMSO.xls;
~/Tools/csv2xls-0.4/csv_to_xls.py \
sT_DMSO_vs_scr_DMSO-all.txt \
sT_DMSO_vs_scr_DMSO-up.txt \
sT_DMSO_vs_scr_DMSO-down.txt \
-d$',' -o sT_DMSO_vs_scr_DMSO.xls;
##### STEP3: prepare all_genes #####
rld <- rlogTransformation(dds)
mat <- assay(rld)
mm <- model.matrix(~replicates, colData(rld))
mat <- limma::removeBatchEffect(mat, batch=rld$donor, design=mm)
assay(rld) <- mat
RNASeq.NoCellLine <- assay(rld)
# reorder the columns
colnames(RNASeq.NoCellLine) = c("0505 WaGa sT DMSO","1905 WaGa sT DMSO","0505 WaGa sT Dox","1905 WaGa sT Dox","0505 WaGa scr DMSO","1905 WaGa scr DMSO","0505 WaGa scr Dox","1905 WaGa scr Dox","0505 WaGa wt","1905 WaGa wt","control MKL1","control WaGa")
col.order <-c("control MKL1", "control WaGa","0505 WaGa wt","1905 WaGa wt","0505 WaGa sT DMSO","1905 WaGa sT DMSO","0505 WaGa sT Dox","1905 WaGa sT Dox","0505 WaGa scr DMSO","1905 WaGa scr DMSO","0505 WaGa scr Dox","1905 WaGa scr Dox")
RNASeq.NoCellLine <- RNASeq.NoCellLine[,col.order]
#Option4: manully defining
#for i in EV_vs_parental sT_Dox_vs_sT_DMSO sT_Dox_vs_scr_Dox scr_Dox_vs_scr_DMSO sT_DMSO_vs_scr_DMSO; do echo "cut -d',' -f1-1 ${i}-up.txt > ${i}-up.id"; echo "cut -d',' -f1-1 ${i}-down.txt > ${i}-down.id"; done
#cat *.id | sort -u > ids
##add Gene_Id in the first line, delete the ""
GOI <- read.csv("ids")$Gene_Id
datamat = RNASeq.NoCellLine[GOI, ]
##### STEP4: clustering the genes and draw heatmap #####
datamat <- datamat[,-1] #delete the sample "control MKL1"
colnames(datamat)[1] <- "WaGa control" #rename the isolate names according to the style of RNA-seq as follows?
colnames(datamat)[2] <- "WaGa wildtype 0505"
colnames(datamat)[3] <- "WaGa wildtype 1905"
colnames(datamat)[4] <- "WaGa sT DMSO 0505"
colnames(datamat)[5] <- "WaGa sT DMSO 1905"
colnames(datamat)[6] <- "WaGa sT Dox 0505"
colnames(datamat)[7] <- "WaGa sT Dox 1905"
colnames(datamat)[8] <- "WaGa scr DMSO 0505"
colnames(datamat)[9] <- "WaGa scr DMSO 1905"
colnames(datamat)[10] <- "WaGa scr Dox 0505"
colnames(datamat)[11] <- "WaGa scr Dox 1905"
write.csv(datamat, file ="gene_expression_keeping_replicates.txt")
# In order to 100% reproduce the plot, load the data from the txt-file as datamat1.
#"ward.D"’, ‘"ward.D2"’,‘"single"’, ‘"complete"’, ‘"average"’ (= UPGMA), ‘"mcquitty"’(= WPGMA), ‘"median"’ (= WPGMC) or ‘"centroid"’ (= UPGMC)
hr <- hclust(as.dist(1-cor(t(datamat), method="pearson")), method="complete")
hc <- hclust(as.dist(1-cor(datamat, method="spearman")), method="complete")
mycl = cutree(hr, h=max(hr$height)/1.5)
mycol = c("YELLOW", "BLUE", "ORANGE", "CYAN", "GREEN", "MAGENTA", "GREY", "LIGHTCYAN", "RED", "PINK", "DARKORANGE", "MAROON", "LIGHTGREEN", "DARKBLUE", "DARKRED", "LIGHTBLUE", "DARKCYAN", "DARKGREEN", "DARKMAGENTA");
mycol = mycol[as.vector(mycl)]
png("piRNA_heatmap_keeping_replicates.png", width=800, height=1000)
#svg("DEGs_heatmap_keeping_replicates.svg", width=6, height=8)
heatmap.2(as.matrix(datamat),
Rowv=as.dendrogram(hr),
Colv=NA,
dendrogram='row',
labRow="",
scale='row',
trace='none',
col=bluered(75),
RowSideColors=mycol,
srtCol=20,
lhei=c(1,8),
#cexRow=1.2, # Increase row label font size
cexCol=1.7, # Increase column label font size
margin=c(7,1)
)
dev.off()
#### cluster members #####
write.csv(names(subset(mycl, mycl == '1')),file='YELLOW.txt')
write.csv(names(subset(mycl, mycl == '2')),file='BLUE.txt')
write.csv(names(subset(mycl, mycl == '3')),file='ORANGE.txt')
#~/Tools/csv2xls-0.4/csv_to_xls.py gene_expression_keeping_replicates.txt YELLOW.txt ORANGE.txt BLUE.txt -d',' -o piRNA_heatmap_keeping_replicates.xls
mv piRNA_heatmap_keeping_replicates.png piRNA_heatmap.png
mv piRNA_heatmap_keeping_replicates.xls piRNA_heatmap.xls
mv pca.png piRNA_pca.png
mv EV_vs_parental.xls piRNA_EV_vs_parental.xls
mv sT_DMSO_vs_scr_DMSO.xls piRNA_sT_DMSO_vs_scr_DMSO.xls
mv sT_Dox_vs_scr_Dox.xls piRNA_sT_Dox_vs_scr_Dox.xls
mv sT_Dox_vs_sT_DMSO.xls piRNA_sT_Dox_vs_sT_DMSO.xls
mv scr_Dox_vs_scr_DMSO.xls piRNA_scr_Dox_vs_scr_DMSO.xls
# --> SENDING piRNA_*.png, piRNA_EV_vs_parental.xls, and piRNA_heatmap.xls
# ---- NOT WORKING WELL due to actualColors --> DEBUG see the README_miRNA_WaGa ERROR-commented-lines in the heatmap.2-block ----
# merging replicates
datamat <- cbind(datamat, "WaGa wildtype" = rowMeans(datamat[, 2:3]))
datamat <- cbind(datamat, "WaGa sT DMSO" = rowMeans(datamat[, 4:5]))
datamat <- cbind(datamat, "WaGa sT Dox" = rowMeans(datamat[, 6:7]))
datamat <- cbind(datamat, "WaGa scr DMSO" = rowMeans(datamat[, 8:9]))
datamat <- cbind(datamat, "WaGa scr Dox" = rowMeans(datamat[, 10:11]))
datamat <- datamat[,c(-2:-11)]
write.csv(datamat, file ="gene_expression_merging_replicates.txt")
# Ensure 'mycl' is calculated properly.
mycl <- cutree(hr, h=max(hr$height)/2.9)
# mycol = c("YELLOW", "BLUE", "ORANGE", "CYAN", "GREEN", "MAGENTA", "GREY", "LIGHTCYAN", "RED", "PINK", "DARKORANGE", "MAROON", "LIGHTGREEN", "DARKBLUE", "DARKRED", "LIGHTBLUE", "DARKCYAN", "DARKGREEN", "DARKMAGENTA");
# Now map your clusters to colors, making sure that there's one color for each row:
<- mycol[mycl] # Assign colors based on cluster assignment
# Then use these 'actualColors' in your heatmap:
png("piRNA_heatmap_merging_replicates.png", width=800, height=1000)
heatmap.2(as.matrix(datamat),
Rowv=as.dendrogram(hr),
Colv=NA,
dendrogram='row',
labRow="",
scale='row',
trace='none',
col=bluered(75),
RowSideColors=actualColors, # Update this part
srtCol=20,
lhei=c(1,8),
cexCol=1.7, # Increase column label font size
margin=c(7,1)
)
dev.off()
#### cluster members #####
write.csv(names(subset(mycl, mycl == '1')),file='YELLOW.txt')
write.csv(names(subset(mycl, mycl == '2')),file='BLUE.txt')
write.csv(names(subset(mycl, mycl == '3')),file='ORANGE.txt')
write.csv(names(subset(mycl, mycl == '4')),file='CYAN.txt')
write.csv(names(subset(mycl, mycl == '5')),file='GREEN.txt')
write.csv(names(subset(mycl, mycl == '6')),file='MAGENTA.txt')
write.csv(names(subset(mycl, mycl == '7')),file='GREY.txt')
write.csv(names(subset(mycl, mycl == '8')),file='LIGHTCYAN.txt')
#write.csv(names(subset(mycl, mycl == '9')),file='RED.txt')
#~/Tools/csv2xls-0.4/csv_to_xls.py gene_expression_merging_replicates.txt YELLOW.txt BLUE.txt ORANGE.txt CYAN.txt MAGENTA.txt GREEN.txt LIGHTCYAN.txt GREY.txt -d',' -o piRNA_heatmap_merging_replicates.xls
4, do separate shRNA and treatment analysis (input d.raw)
d_WaGa <- d.raw[, !(grepl("control|wt|MKL1", names(d.raw)))]
cell.line = as.factor(c("WaGa","WaGa","WaGa","WaGa","WaGa","WaGa","WaGa","WaGa"))
shRNA = as.factor(c("sT","sT","sT","sT","scr","scr","scr","scr"))
treatment = as.factor(c("DMSO","DMSO","Dox","Dox","DMSO","DMSO","Dox","Dox"))
cData = data.frame(row.names=colnames(d_WaGa), shRNA=shRNA, treatment=treatment, cell.line=cell.line)
dds_WaGa<-DESeqDataSetFromMatrix(countData=d_WaGa, colData=cData, design=~shRNA+treatment+shRNA:treatment)
dds_WaGa = DESeq(dds_WaGa, betaPrior=FALSE)
resultsNames(dds_WaGa)
contrasts <- c("shRNA_sT_vs_scr", "treatment_Dox_vs_DMSO", "shRNAsT.treatmentDox")
for (i in contrasts) {
res = results(dds_WaGa, name=i)
res <- res[!is.na(res$log2FoldChange),]
res$padj <- ifelse(is.na(res$padj), 1, res$padj)
res_df <- as.data.frame(res)
write.csv(as.data.frame(res_df[order(res_df$pvalue),]), file = paste(i, "all.txt", sep="-"))
up <- subset(res_df, padj<=0.1 & log2FoldChange>=2)
down <- subset(res_df, padj<=0.1 & log2FoldChange<=-2)
write.csv(as.data.frame(up[order(up$log2FoldChange,decreasing=TRUE),]), file = paste(i, "up.txt", sep="-"))
write.csv(as.data.frame(down[order(abs(down$log2FoldChange),decreasing=TRUE),]), file = paste(i, "down.txt", sep="-"))
}
mv shRNA_sT_vs_scr-all.txt WaGa_shRNA_sT_vs_scr-all.txt
mv shRNA_sT_vs_scr-up.txt WaGa_shRNA_sT_vs_scr-up.txt
mv shRNA_sT_vs_scr-down.txt WaGa_shRNA_sT_vs_scr-down.txt
mv treatment_Dox_vs_DMSO-all.txt WaGa_treatment_Dox_vs_DMSO-all.txt
mv treatment_Dox_vs_DMSO-up.txt WaGa_treatment_Dox_vs_DMSO-up.txt
mv treatment_Dox_vs_DMSO-down.txt WaGa_treatment_Dox_vs_DMSO-down.txt
mv shRNAsT.treatmentDox-all.txt WaGa_shRNAsT.treatmentDox-all.txt
mv shRNAsT.treatmentDox-up.txt WaGa_shRNAsT.treatmentDox-up.txt
mv shRNAsT.treatmentDox-down.txt WaGa_shRNAsT.treatmentDox-down.txt
~/Tools/csv2xls-0.4/csv_to_xls.py WaGa_shRNA_sT_vs_scr-up.txt WaGa_shRNA_sT_vs_scr-down.txt WaGa_shRNA_sT_vs_scr-all.txt -d$',' -o WaGa_shRNA_sT_vs_scr.xls
~/Tools/csv2xls-0.4/csv_to_xls.py WaGa_treatment_Dox_vs_DMSO-up.txt WaGa_treatment_Dox_vs_DMSO-down.txt WaGa_treatment_Dox_vs_DMSO-all.txt -d$',' -o WaGa_treatment_Dox_vs_DMSO.xls
~/Tools/csv2xls-0.4/csv_to_xls.py WaGa_shRNAsT.treatmentDox-up.txt WaGa_shRNAsT.treatmentDox-down.txt WaGa_shRNAsT.treatmentDox-all.txt -d$',' -o WaGa_shRNAsT.treatmentDox.xls
点赞本文的读者
还没有人对此文章表态
没有评论
Tn-seq analysis pipeline (improved)
Step-by-Step Analysis of Transposon Insertion Sites
GSVA calculation for NanoString data
How to use H3K27ac, H3K4me1, and RNA-seq to identify enhancers and their target genes?
© 2023 XGenes.com Impressum