gene_x 0 like s 474 view s
Tags: R, RNA-seq
goal
#p604 and p601 to sT transcript
#p602 and p600 to LT transcript
#p605 and p600 to LTtr transcript (or LT transcript is also fine)
p602+604 compared to sT transcript (p602and604_d3 vs. p601_d3?)
p605+604 compared to sT transcript (p604and605_d9/d12 vs. p600and601_d9/d12?)
p602+604 compared to LT transcript (p602and604_d3 vs. p601_d3?)
p605+604 compared to LT transcript (p604and605_d9/d12 vs. p600and601_d9/d12?)
under ~/DATA/Data_Denise_sT_PUBLISHING/Data_Denise_RNASeq_Merged_Corrected_28+2
library("AnnotationDbi")
library("clusterProfiler")
library("ReactomePA")
library(gplots)
library(tximport)
library(DESeq2)
library("org.Hs.eg.db")
#d.raw<- read.delim2("gene_counts_hg38+virus_on_LT_transcripts_28samples.csv",sep=",", header=TRUE, row.names=1)
d.raw<- read.delim2("gene_counts_hg38+virus_on_sT_transcripts_28samples.csv",sep=",", header=TRUE, row.names=1)
# [1] V_8_0_mock_DonorI V_8_0_mock_DonorII
# [3] V_8_1_5_p601_d3_DonorII V_8_1_5_p604_d3_DonorII
# [5] V_8_1_5_p601_d8_DonorII V_8_1_5_p604_d8_DonorII
# [7] V_8_1_6_p601_d3_DonorI V_8_1_6_p604_d3_DonorI
# [9] V_8_1_6_p601_d8_DonorI V_8_1_6_p604_d8_DonorI
#[11] V_8_2_3_p600_d3_DonorII V_8_2_3_p605_d3_DonorII
#[13] V_8_2_3_p600_d8_DonorII V_8_2_3_p605_d8_DonorII
#[15] V_8_2_4_p600_d3_DonorI V_8_2_4_p605_d3_DonorI
#[17] V_8_2_4_p600_d8_DonorI V_8_2_4_p605_d8_DonorI
#[19] V_8_4_1_p602_d8_DonorII V_8_4_1_p602_d8_DonorI
#[21] V_8_3_1_p600and601_d12_DonorI V_8_3_1_p604and605_d12_DonorI
#[23] V_8_3_2_p600and601_d9_DonorII V_8_3_2_p604and605_d9_DonorII
#[25] V_8_4_2_p602_d3_DonorI V_8_4_2_p602_d3_DonorII
#[27] V_8_5_1_p602and604_d3_DonorI V_8_5_2_p602and604_d3_DonorII
col_order <- c("V_8_0_mock_DonorI","V_8_0_mock_DonorII","V_8_1_5_p601_d3_DonorII", "V_8_1_5_p604_d3_DonorII", "V_8_1_5_p601_d8_DonorII","V_8_1_5_p604_d8_DonorII", "V_8_1_6_p601_d3_DonorI","V_8_1_6_p604_d3_DonorI","V_8_1_6_p601_d8_DonorI","V_8_1_6_p604_d8_DonorI", "V_8_2_3_p600_d3_DonorII","V_8_2_3_p605_d3_DonorII","V_8_2_3_p600_d8_DonorII", "V_8_2_3_p605_d8_DonorII", "V_8_2_4_p600_d3_DonorI","V_8_2_4_p605_d3_DonorI","V_8_2_4_p600_d8_DonorI","V_8_2_4_p605_d8_DonorI", "V_8_4_1_p602_d8_DonorII","V_8_4_1_p602_d8_DonorI", "V_8_3_1_p600and601_d12_DonorI", "V_8_3_1_p604and605_d12_DonorI","V_8_3_2_p600and601_d9_DonorII","V_8_3_2_p604and605_d9_DonorII", "V_8_4_2_p602_d3_DonorI","V_8_4_2_p602_d3_DonorII", "V_8_5_1_p602and604_d3_DonorI", "V_8_5_2_p602and604_d3_DonorII")
identical(colnames(d.raw),col_order)
#reordered.raw <- d.raw[,col_order]
replicates = as.factor(c("untreated","untreated", "p601_d3","p604_d3", "p601_d8","p604_d8", "p601_d3","p604_d3","p601_d8","p604_d8", "p600_d3","p605_d3","p600_d8", "p605_d8", "p600_d3","p605_d3","p600_d8","p605_d8", "p602_d8","p602_d8", "p600and601_d9d12", "p604and605_d9d12","p600and601_d9d12","p604and605_d9d12", "p602_d3","p602_d3", "p602and604_d3","p602and604_d3"))
batch = as.factor(c("200420", "200420", "190927", "190927", "190927", "190927", "190228", "190228", "190228", "190228", "191008", "191008", "191008", "191008", "190228", "190228", "190228", "190228", "200817", "200817", "200420", "200420", "200817", "200817", "210302", "210302", "210504","210504"))
ids = as.factor(c("untreated_DonorI","untreated_DonorII", "p601_d3_DonorII","p604_d3_DonorII", "p601_d8_DonorII","p604_d8_DonorII", "p601_d3_DonorI","p604_d3_DonorI","p601_d8_DonorI","p604_d8_DonorI", "p600_d3_DonorII","p605_d3_DonorII","p600_d8_DonorII", "p605_d8_DonorII", "p600_d3_DonorI","p605_d3_DonorI","p600_d8_DonorI","p605_d8_DonorI", "p602_d8_DonorII","p602_d8_DonorI", "p600and601_d12_DonorI", "p604and605_d12_DonorI","p600and601_d9_DonorII","p604and605_d9_DonorII", "p602_d3_DonorI","p602_d3_DonorII", "p602and604_d3_DonorI","p602and604_d3_DonorII"))
donor = as.factor(c("DonorI","DonorII", "DonorII","DonorII", "DonorII","DonorII", "DonorI","DonorI","DonorI","DonorI", "DonorII","DonorII","DonorII","DonorII", "DonorI","DonorI","DonorI","DonorI", "DonorII","DonorI", "DonorI", "DonorI","DonorII","DonorII", "DonorI","DonorII", "DonorI","DonorII"))
cData = data.frame(row.names=colnames(d.raw), replicates=replicates, donor=donor, batch=batch, ids=ids)
#dds<-DESeqDataSetFromMatrix(countData=d.raw, colData=cData, design=~batch+replicates) # ERROR due to the factor 'batch'
dds<-DESeqDataSetFromMatrix(countData=d.raw, colData=cData, design=~donor+replicates)
sizeFactors(dds)
>NULL
dds <- estimateSizeFactors(dds)
sizeFactors(dds)
# V_8_0_mock_DonorI V_8_0_mock_DonorII
# 0.6152727 0.8401306
# V_8_1_5_p601_d3_DonorII V_8_1_5_p604_d3_DonorII
# 1.0196629 1.0293038
# V_8_1_5_p601_d8_DonorII V_8_1_5_p604_d8_DonorII
# 0.8901261 0.9062621
# V_8_1_6_p601_d3_DonorI V_8_1_6_p604_d3_DonorI
# 1.3752853 1.3504344
# V_8_1_6_p601_d8_DonorI V_8_1_6_p604_d8_DonorI
# 1.2316027 1.3085877
# V_8_2_3_p600_d3_DonorII V_8_2_3_p605_d3_DonorII
# 0.9323440 0.9381003
# V_8_2_3_p600_d8_DonorII V_8_2_3_p605_d8_DonorII
# 0.7179836 0.9400712
# V_8_2_4_p600_d3_DonorI V_8_2_4_p605_d3_DonorI
# 1.3953243 1.4836134
# V_8_2_4_p600_d8_DonorI V_8_2_4_p605_d8_DonorI
# 1.2135489 1.8270271
# V_8_4_1_p602_d8_DonorII V_8_4_1_p602_d8_DonorI
# 1.5281322 1.1579240
#V_8_3_1_p600and601_d12_DonorI V_8_3_1_p604and605_d12_DonorI
# 0.9895927 0.8560524
#V_8_3_2_p600and601_d9_DonorII V_8_3_2_p604and605_d9_DonorII
# 1.3462308 1.1142589
# V_8_4_2_p602_d3_DonorI V_8_4_2_p602_d3_DonorII
# 0.9675338 1.1800615
# V_8_5_1_p602and604_d3_DonorI V_8_5_2_p602and604_d3_DonorII
# 0.3940273 0.3490526
rld <- rlogTransformation(dds)
# -- before pca --
png("pca.png", 1200, 800)
plotPCA(rld, intgroup=c("replicates"))
#plotPCA(rld, intgroup = c("replicates", "batch"))
#plotPCA(rld, intgroup = c("replicates", "ids"))
#plotPCA(rld, "batch")
dev.off()
# -- before heatmap --
## generate the pairwise comparison between samples
library(gplots)
library("RColorBrewer")
png("heatmap.png", 1200, 800)
distsRL <- dist(t(assay(rld)))
mat <- as.matrix(distsRL)
#paste( rld$dex, rld$cell, sep="-" )
#rownames(mat) <- colnames(mat) <- with(colData(dds),paste(replicates,batch, sep=":"))
rownames(mat) <- colnames(mat) <- with(colData(dds),paste(replicates))
hc <- hclust(distsRL)
hmcol <- colorRampPalette(brewer.pal(9,"GnBu"))(100)
heatmap.2(mat, Rowv=as.dendrogram(hc),symm=TRUE, trace="none",col = rev(hmcol), margin=c(13, 13))
dev.off()
#---- 3 ----
dds$replicates <- relevel(dds$replicates, "p601_d3")
dds = DESeq(dds, betaPrior=FALSE)
resultsNames(dds)
clist <- c("p602and604_d3_vs_p601_d3")
dds$replicates <- relevel(dds$replicates, "p600and601_d9d12")
dds = DESeq(dds, betaPrior=FALSE)
resultsNames(dds)
clist <- c("p604and605_d9d12_vs_p600and601_d9d12")
for (i in clist) {
contrast = paste("replicates", i, sep="_")
res = results(dds, name=contrast)
res <- res[!is.na(res$log2FoldChange),]
geness <- AnnotationDbi::select(org.Hs.eg.db, keys = rownames(res), keytype = "SYMBOL", columns = c("ENSEMBL", "ENTREZID", "SYMBOL", "GENENAME"))
geness <- geness[!duplicated(geness$SYMBOL), ]
res$SYMBOL = rownames(res)
rownames(geness) <- geness$SYMBOL
identical(rownames(res), rownames(geness))
res_df <- as.data.frame(res)
geness_res <- merge(geness, res_df)
dim(geness_res)
write.csv(as.data.frame(geness_res[order(geness_res$pvalue),]), file = paste(i, "all.txt", sep="-"))
#write.csv(res_df, file = paste(i, "background.txt", sep="_"))
up <- subset(geness_res, padj<=0.05 & log2FoldChange>=1)
down <- subset(geness_res, padj<=0.05 & log2FoldChange<=-1)
write.csv(as.data.frame(up[order(up$log2FoldChange,decreasing=TRUE),]), file = paste(i, "up.txt", sep="-"))
write.csv(as.data.frame(down[order(abs(down$log2FoldChange),decreasing=TRUE),]), file = paste(i, "down.txt", sep="-"))
}
~/Tools/csv2xls-0.4/csv_to_xls.py \
p602and604_d3_vs_p601_d3-all.txt \
p602and604_d3_vs_p601_d3-up.txt \
p602and604_d3_vs_p601_d3-down.txt \
-d$',' -o p602and604_d3_vs_p601_d3_on_LT.xls;
~/Tools/csv2xls-0.4/csv_to_xls.py \
p604and605_d9d12_vs_p600and601_d9d12-all.txt \
p604and605_d9d12_vs_p600and601_d9d12-up.txt \
p604and605_d9d12_vs_p600and601_d9d12-down.txt \
-d$',' -o p604and605_d9d12_vs_p600and601_d9d12_on_LT.xls;
~/Tools/csv2xls-0.4/csv_to_xls.py \
p602and604_d3_vs_p601_d3-all.txt \
p602and604_d3_vs_p601_d3-up.txt \
p602and604_d3_vs_p601_d3-down.txt \
-d$',' -o p602and604_d3_vs_p601_d3_on_sT.xls;
~/Tools/csv2xls-0.4/csv_to_xls.py \
p604and605_d9d12_vs_p600and601_d9d12-all.txt \
p604and605_d9d12_vs_p600and601_d9d12-up.txt \
p604and605_d9d12_vs_p600and601_d9d12-down.txt \
-d$',' -o p604and605_d9d12_vs_p600and601_d9d12_on_sT.xls;
goal
p783 compared to LT transcript (p783_d8 vs. p600_d8?)
Under ~/DATA/Data_Denise_LT_K331A_RNASeq/Data_Denise_RNASeq_Merged_Corrected_8
d.raw<- read.delim2("gene_name_gene_counts_hg38+virus.csv",sep=",", header=TRUE, row.names=1)
library(dplyr)
# Remove duplicate rows based on gene_name
d.raw.unique <- d.raw %>% distinct(gene_name, .keep_all = TRUE)
# Convert to a traditional data frame
d.raw.df <- as.data.frame(d.raw.unique)
# Set gene_name as row names
rownames(d.raw.df) <- d.raw.df$gene_name
# Remove the now redundant gene_name column
d.raw.df$gene_name <- NULL
#d.summarized <- d.raw %>% group_by(gene_name) %>% summarize(across(everything(), sum, na.rm = TRUE))
#rownames(d.raw.df) <- d.raw.df$gene_name
## Remove the now redundant gene_name column
#d.raw.df$gene_name <- NULL
replicates = as.factor(c("p600_d8","p600_d8", "p602_d8","p602_d8", "p605_d8","p605_d8", "p783_d8","p783_d8"))
ids = as.factor(c("p600_d8_DonorI","p600_d8_DonorII", "p602_d8_DonorI","p602_d8_DonorII", "p605_d8_DonorI","p605_d8_DonorII", "p783_d8_DonorI","p783_d8_DonorII"))
donor = as.factor(c("DonorI","DonorII", "DonorI","DonorII", "DonorI","DonorII", "DonorI","DonorII"))
cData = data.frame(row.names=colnames(d.raw.df), replicates=replicates, donor=donor, ids=ids)
#dds<-DESeqDataSetFromMatrix(countData=d.raw.df, colData=cData, design=~batch+replicates) # ERROR due to the factor 'batch'
dds<-DESeqDataSetFromMatrix(countData=d.raw.df, colData=cData, design=~donor+replicates)
sizeFactors(dds)
>NULL
dds <- estimateSizeFactors(dds)
sizeFactors(dds)
# control_DI control_DII LT_DI LT_DII LTtr_DI LTtr_DII
# 0.9393201 0.5490296 0.9066729 1.1874750 1.4258546 0.7305989
# LT_K331A_DI LT_K331A_DII
# 1.4819629 1.2744066
rld <- rlogTransformation(dds)
dds$replicates <- relevel(dds$replicates, "p600_d8")
dds = DESeq(dds, betaPrior=FALSE)
resultsNames(dds)
clist <- c("p783_d8_vs_p600_d8")
for (i in clist) {
contrast = paste("replicates", i, sep="_")
res = results(dds, name=contrast)
res <- res[!is.na(res$log2FoldChange),]
geness <- AnnotationDbi::select(org.Hs.eg.db, keys = rownames(res), keytype = "SYMBOL", columns = c("ENSEMBL", "ENTREZID", "SYMBOL", "GENENAME"))
geness <- geness[!duplicated(geness$SYMBOL), ]
res$SYMBOL = rownames(res)
rownames(geness) <- geness$SYMBOL
identical(rownames(res), rownames(geness))
res_df <- as.data.frame(res)
geness_res <- merge(geness, res_df)
dim(geness_res)
write.csv(as.data.frame(geness_res[order(geness_res$pvalue),]), file = paste(i, "all.txt", sep="-"))
#write.csv(res_df, file = paste(i, "background.txt", sep="_"))
up <- subset(geness_res, padj<=0.05 & log2FoldChange>=1)
down <- subset(geness_res, padj<=0.05 & log2FoldChange<=-1)
write.csv(as.data.frame(up[order(up$log2FoldChange,decreasing=TRUE),]), file = paste(i, "up.txt", sep="-"))
write.csv(as.data.frame(down[order(abs(down$log2FoldChange),decreasing=TRUE),]), file = paste(i, "down.txt", sep="-"))
}
~/Tools/csv2xls-0.4/csv_to_xls.py \
p783_d8_vs_p600_d8-all.txt \
p783_d8_vs_p600_d8-up.txt \
p783_d8_vs_p600_d8-down.txt \
-d$',' -o p783_d8_vs_p600_d8_on_LT.xls;
Deduplication is not performed until UMI-tools are used.
UMI-tools dedup
picard MarkDuplicates
This step will be skipped automatically when using the --with_umi option or explicitly via the --skip_markduplicates parameter.
compare the STAR bam-files with markDuplicates bam-files. They are the same, since in the markDuplicates step, the duplicated reads were only marked, not physically removed! jhuang@hamburg:~/DATA/Data_Denise_sT_PUBLISHING/Data_Denise_RNASeq/results/STAR$ samtools flagstat ./V_8_4_2_p602_d3_DonorIIAligned.sortedByCoord.out.bam 19968762 + 0 in total (QC-passed reads + QC-failed reads) 2323677 + 0 secondary 0 + 0 supplementary 0 + 0 duplicates 19968762 + 0 mapped (100.00% : N/A) jhuang@hamburg:~/DATA/Data_Denise_sT_PUBLISHING/Data_Denise_RNASeq/results/markDuplicates$ samtools flagstat V_8_4_2_p602_d3_DonorIIAligned.sortedByCoord.out.markDups.bam 19968762 + 0 in total (QC-passed reads + QC-failed reads) 2323677 + 0 secondary 0 + 0 supplementary 15013903 + 0 duplicates 19968762 + 0 mapped (100.00% : N/A)
goal
#| p600and601_d9_DonorII | GFP+mCherry control | Day 9 |
#| p600and601_d12_DonorI | GFP+mCherry control | Day 12 |
#-->| p604and605_d9_DonorII | sT+LTtr | Day 9 |
#-->| p604and605_d12_DonorI | sT+LTtr | Day 12 |
#-->| p602and604_d3_DonorI | sT+LT | Day 3 |
#-->| p602and604_d3_DonorII | sT+LT | Day 3 |
p602+604 d3 versus p602 d3 on sT+LT transcript
p602+p604 d3 versus p604 d3 on sT+LT transcript
p604+605 d9/12 versus p604 d8 on sT+LT transcript
p604+605 d9/12 versus p605 d8 on sT+LT transcript
under ~/DATA/Data_Denise_sT_PUBLISHING/Data_Denise_RNASeq_Merged_Corrected_28+2
library("AnnotationDbi")
library("clusterProfiler")
library("ReactomePA")
library(gplots)
library(tximport)
library(DESeq2)
library("org.Hs.eg.db")
#d.raw<- read.delim2("gene_counts_hg38+virus_on_LT_transcripts_28samples.csv",sep=",", header=TRUE, row.names=1)
#d.raw<- read.delim2("gene_counts_hg38+virus_on_sT_transcripts_28samples.csv",sep=",", header=TRUE, row.names=1)
d.raw<- read.delim2("gene_counts_hg38+virus_on_sT+LT_transcripts_28samples.csv",sep=",", header=TRUE, row.names=1)
# [1] V_8_0_mock_DonorI V_8_0_mock_DonorII
# [3] V_8_1_5_p601_d3_DonorII V_8_1_5_p604_d3_DonorII
# [5] V_8_1_5_p601_d8_DonorII V_8_1_5_p604_d8_DonorII
# [7] V_8_1_6_p601_d3_DonorI V_8_1_6_p604_d3_DonorI
# [9] V_8_1_6_p601_d8_DonorI V_8_1_6_p604_d8_DonorI
#[11] V_8_2_3_p600_d3_DonorII V_8_2_3_p605_d3_DonorII
#[13] V_8_2_3_p600_d8_DonorII V_8_2_3_p605_d8_DonorII
#[15] V_8_2_4_p600_d3_DonorI V_8_2_4_p605_d3_DonorI
#[17] V_8_2_4_p600_d8_DonorI V_8_2_4_p605_d8_DonorI
#[19] V_8_4_1_p602_d8_DonorII V_8_4_1_p602_d8_DonorI
#[21] V_8_3_1_p600and601_d12_DonorI V_8_3_1_p604and605_d12_DonorI
#[23] V_8_3_2_p600and601_d9_DonorII V_8_3_2_p604and605_d9_DonorII
#[25] V_8_4_2_p602_d3_DonorI V_8_4_2_p602_d3_DonorII
#[27] V_8_5_1_p602and604_d3_DonorI V_8_5_2_p602and604_d3_DonorII
col_order <- c("V_8_0_mock_DonorI","V_8_0_mock_DonorII","V_8_1_5_p601_d3_DonorII", "V_8_1_5_p604_d3_DonorII", "V_8_1_5_p601_d8_DonorII","V_8_1_5_p604_d8_DonorII", "V_8_1_6_p601_d3_DonorI","V_8_1_6_p604_d3_DonorI","V_8_1_6_p601_d8_DonorI","V_8_1_6_p604_d8_DonorI", "V_8_2_3_p600_d3_DonorII","V_8_2_3_p605_d3_DonorII","V_8_2_3_p600_d8_DonorII", "V_8_2_3_p605_d8_DonorII", "V_8_2_4_p600_d3_DonorI","V_8_2_4_p605_d3_DonorI","V_8_2_4_p600_d8_DonorI","V_8_2_4_p605_d8_DonorI", "V_8_4_1_p602_d8_DonorII","V_8_4_1_p602_d8_DonorI", "V_8_3_1_p600and601_d12_DonorI", "V_8_3_1_p604and605_d12_DonorI","V_8_3_2_p600and601_d9_DonorII","V_8_3_2_p604and605_d9_DonorII", "V_8_4_2_p602_d3_DonorI","V_8_4_2_p602_d3_DonorII", "V_8_5_1_p602and604_d3_DonorI", "V_8_5_2_p602and604_d3_DonorII")
identical(colnames(d.raw),col_order)
#reordered.raw <- d.raw[,col_order]
replicates = as.factor(c("untreated","untreated", "p601_d3","p604_d3", "p601_d8","p604_d8", "p601_d3","p604_d3","p601_d8","p604_d8", "p600_d3","p605_d3","p600_d8", "p605_d8", "p600_d3","p605_d3","p600_d8","p605_d8", "p602_d8","p602_d8", "p600and601_d9d12", "p604and605_d9d12","p600and601_d9d12","p604and605_d9d12", "p602_d3","p602_d3", "p602and604_d3","p602and604_d3"))
batch = as.factor(c("200420", "200420", "190927", "190927", "190927", "190927", "190228", "190228", "190228", "190228", "191008", "191008", "191008", "191008", "190228", "190228", "190228", "190228", "200817", "200817", "200420", "200420", "200817", "200817", "210302", "210302", "210504","210504"))
ids = as.factor(c("untreated_DonorI","untreated_DonorII", "p601_d3_DonorII","p604_d3_DonorII", "p601_d8_DonorII","p604_d8_DonorII", "p601_d3_DonorI","p604_d3_DonorI","p601_d8_DonorI","p604_d8_DonorI", "p600_d3_DonorII","p605_d3_DonorII","p600_d8_DonorII", "p605_d8_DonorII", "p600_d3_DonorI","p605_d3_DonorI","p600_d8_DonorI","p605_d8_DonorI", "p602_d8_DonorII","p602_d8_DonorI", "p600and601_d12_DonorI", "p604and605_d12_DonorI","p600and601_d9_DonorII","p604and605_d9_DonorII", "p602_d3_DonorI","p602_d3_DonorII", "p602and604_d3_DonorI","p602and604_d3_DonorII"))
donor = as.factor(c("DonorI","DonorII", "DonorII","DonorII", "DonorII","DonorII", "DonorI","DonorI","DonorI","DonorI", "DonorII","DonorII","DonorII","DonorII", "DonorI","DonorI","DonorI","DonorI", "DonorII","DonorI", "DonorI", "DonorI","DonorII","DonorII", "DonorI","DonorII", "DonorI","DonorII"))
cData = data.frame(row.names=colnames(d.raw), replicates=replicates, donor=donor, batch=batch, ids=ids)
#dds<-DESeqDataSetFromMatrix(countData=d.raw, colData=cData, design=~batch+replicates) # ERROR due to the factor 'batch'
dds<-DESeqDataSetFromMatrix(countData=d.raw, colData=cData, design=~donor+replicates)
sizeFactors(dds)
>NULL
dds <- estimateSizeFactors(dds)
sizeFactors(dds)
# V_8_0_mock_DonorI V_8_0_mock_DonorII
# 0.6152727 0.8401306
# V_8_1_5_p601_d3_DonorII V_8_1_5_p604_d3_DonorII
# 1.0196629 1.0293038
# V_8_1_5_p601_d8_DonorII V_8_1_5_p604_d8_DonorII
# 0.8901261 0.9062621
# V_8_1_6_p601_d3_DonorI V_8_1_6_p604_d3_DonorI
# 1.3752853 1.3504344
# V_8_1_6_p601_d8_DonorI V_8_1_6_p604_d8_DonorI
# 1.2316027 1.3085877
# V_8_2_3_p600_d3_DonorII V_8_2_3_p605_d3_DonorII
# 0.9323440 0.9381003
# V_8_2_3_p600_d8_DonorII V_8_2_3_p605_d8_DonorII
# 0.7179836 0.9400712
# V_8_2_4_p600_d3_DonorI V_8_2_4_p605_d3_DonorI
# 1.3953243 1.4836134
# V_8_2_4_p600_d8_DonorI V_8_2_4_p605_d8_DonorI
# 1.2135489 1.8270271
# V_8_4_1_p602_d8_DonorII V_8_4_1_p602_d8_DonorI
# 1.5281322 1.1579240
#V_8_3_1_p600and601_d12_DonorI V_8_3_1_p604and605_d12_DonorI
# 0.9895927 0.8560524
#V_8_3_2_p600and601_d9_DonorII V_8_3_2_p604and605_d9_DonorII
# 1.3462308 1.1142589
# V_8_4_2_p602_d3_DonorI V_8_4_2_p602_d3_DonorII
# 0.9675338 1.1800615
# V_8_5_1_p602and604_d3_DonorI V_8_5_2_p602and604_d3_DonorII
# 0.3940273 0.3490526
rld <- rlogTransformation(dds)
# -- before pca --
png("pca.png", 1200, 800)
plotPCA(rld, intgroup=c("replicates"))
#plotPCA(rld, intgroup = c("replicates", "batch"))
#plotPCA(rld, intgroup = c("replicates", "ids"))
#plotPCA(rld, "batch")
dev.off()
# -- before heatmap --
## generate the pairwise comparison between samples
library(gplots)
library("RColorBrewer")
png("heatmap.png", 1200, 800)
distsRL <- dist(t(assay(rld)))
mat <- as.matrix(distsRL)
#paste( rld$dex, rld$cell, sep="-" )
#rownames(mat) <- colnames(mat) <- with(colData(dds),paste(replicates,batch, sep=":"))
rownames(mat) <- colnames(mat) <- with(colData(dds),paste(replicates))
hc <- hclust(distsRL)
hmcol <- colorRampPalette(brewer.pal(9,"GnBu"))(100)
heatmap.2(mat, Rowv=as.dendrogram(hc),symm=TRUE, trace="none",col = rev(hmcol), margin=c(13, 13))
dev.off()
#---- 3 ----
dds$replicates <- relevel(dds$replicates, "p602_d3")
dds = DESeq(dds, betaPrior=FALSE)
resultsNames(dds)
clist <- c("p602and604_d3_vs_p602_d3")
dds$replicates <- relevel(dds$replicates, "p604_d3")
dds = DESeq(dds, betaPrior=FALSE)
resultsNames(dds)
clist <- c("p602and604_d3_vs_p604_d3")
dds$replicates <- relevel(dds$replicates, "p604_d8")
dds = DESeq(dds, betaPrior=FALSE)
resultsNames(dds)
clist <- c("p604and605_d9d12_vs_p604_d8")
dds$replicates <- relevel(dds$replicates, "p605_d8")
dds = DESeq(dds, betaPrior=FALSE)
resultsNames(dds)
clist <- c("p604and605_d9d12_vs_p605_d8")
for (i in clist) {
contrast = paste("replicates", i, sep="_")
res = results(dds, name=contrast)
res <- res[!is.na(res$log2FoldChange),]
geness <- AnnotationDbi::select(org.Hs.eg.db, keys = rownames(res), keytype = "SYMBOL", columns = c("ENSEMBL", "ENTREZID", "SYMBOL", "GENENAME"))
geness <- geness[!duplicated(geness$SYMBOL), ]
res$SYMBOL = rownames(res)
rownames(geness) <- geness$SYMBOL
identical(rownames(res), rownames(geness))
res_df <- as.data.frame(res)
geness_res <- merge(geness, res_df)
dim(geness_res)
write.csv(as.data.frame(geness_res[order(geness_res$pvalue),]), file = paste(i, "all.txt", sep="-"))
#write.csv(res_df, file = paste(i, "background.txt", sep="_"))
up <- subset(geness_res, padj<=0.05 & log2FoldChange>=1)
down <- subset(geness_res, padj<=0.05 & log2FoldChange<=-1)
write.csv(as.data.frame(up[order(up$log2FoldChange,decreasing=TRUE),]), file = paste(i, "up.txt", sep="-"))
write.csv(as.data.frame(down[order(abs(down$log2FoldChange),decreasing=TRUE),]), file = paste(i, "down.txt", sep="-"))
}
~/Tools/csv2xls-0.4/csv_to_xls.py \
p602and604_d3_vs_p602_d3-all.txt \
p602and604_d3_vs_p602_d3-up.txt \
p602and604_d3_vs_p602_d3-down.txt \
-d$',' -o p602and604_d3_vs_p602_d3.xls;
~/Tools/csv2xls-0.4/csv_to_xls.py \
p602and604_d3_vs_p604_d3-all.txt \
p602and604_d3_vs_p604_d3-up.txt \
p602and604_d3_vs_p604_d3-down.txt \
-d$',' -o p602and604_d3_vs_p604_d3.xls;
~/Tools/csv2xls-0.4/csv_to_xls.py \
p604and605_d9d12_vs_p604_d8-all.txt \
p604and605_d9d12_vs_p604_d8-up.txt \
p604and605_d9d12_vs_p604_d8-down.txt \
-d$',' -o p604and605_d9d12_vs_p604_d8.xls;
~/Tools/csv2xls-0.4/csv_to_xls.py \
p604and605_d9d12_vs_p605_d8-all.txt \
p604and605_d9d12_vs_p605_d8-up.txt \
p604and605_d9d12_vs_p605_d8-down.txt \
-d$',' -o p604and605_d9d12_vs_p605_d8.xls;
点赞本文的读者
还没有人对此文章表态
没有评论
RNA-seq data analysis (R-part) for S. epidermidis 1585
Bubble plot for 1457∆atlE vs 1457-M10 vs 1457 vs mock
RNA-seq analysis for characterizing HSV-1 infection of human skin organoid
© 2023 XGenes.com Impressum