RNA-seq recalculate p-values on sT- and LT-transcripts

gene_x 0 like s 474 view s

Tags: R, RNA-seq

  1. goal

    #p604 and p601 to sT transcript
    #p602 and p600 to LT transcript
    #p605 and p600 to LTtr transcript (or LT transcript is also fine)
    
    p602+604 compared to sT transcript (p602and604_d3 vs. p601_d3?)
    p605+604 compared to sT transcript (p604and605_d9/d12 vs. p600and601_d9/d12?)
    p602+604 compared to LT transcript (p602and604_d3 vs. p601_d3?)
    p605+604 compared to LT transcript (p604and605_d9/d12 vs. p600and601_d9/d12?)
    
  2. under ~/DATA/Data_Denise_sT_PUBLISHING/Data_Denise_RNASeq_Merged_Corrected_28+2

    library("AnnotationDbi")
    library("clusterProfiler")
    library("ReactomePA")
    library(gplots)
    library(tximport)
    library(DESeq2)
    library("org.Hs.eg.db")
    
    #d.raw<- read.delim2("gene_counts_hg38+virus_on_LT_transcripts_28samples.csv",sep=",", header=TRUE, row.names=1)
    d.raw<- read.delim2("gene_counts_hg38+virus_on_sT_transcripts_28samples.csv",sep=",", header=TRUE, row.names=1)
    
    # [1] V_8_0_mock_DonorI             V_8_0_mock_DonorII           
    # [3] V_8_1_5_p601_d3_DonorII       V_8_1_5_p604_d3_DonorII      
    # [5] V_8_1_5_p601_d8_DonorII       V_8_1_5_p604_d8_DonorII      
    # [7] V_8_1_6_p601_d3_DonorI        V_8_1_6_p604_d3_DonorI       
    # [9] V_8_1_6_p601_d8_DonorI        V_8_1_6_p604_d8_DonorI       
    #[11] V_8_2_3_p600_d3_DonorII       V_8_2_3_p605_d3_DonorII      
    #[13] V_8_2_3_p600_d8_DonorII       V_8_2_3_p605_d8_DonorII      
    #[15] V_8_2_4_p600_d3_DonorI        V_8_2_4_p605_d3_DonorI       
    #[17] V_8_2_4_p600_d8_DonorI        V_8_2_4_p605_d8_DonorI       
    #[19] V_8_4_1_p602_d8_DonorII       V_8_4_1_p602_d8_DonorI       
    #[21] V_8_3_1_p600and601_d12_DonorI V_8_3_1_p604and605_d12_DonorI
    #[23] V_8_3_2_p600and601_d9_DonorII V_8_3_2_p604and605_d9_DonorII
    #[25] V_8_4_2_p602_d3_DonorI        V_8_4_2_p602_d3_DonorII      
    #[27] V_8_5_1_p602and604_d3_DonorI  V_8_5_2_p602and604_d3_DonorII
    
    col_order <- c("V_8_0_mock_DonorI","V_8_0_mock_DonorII","V_8_1_5_p601_d3_DonorII", "V_8_1_5_p604_d3_DonorII", "V_8_1_5_p601_d8_DonorII","V_8_1_5_p604_d8_DonorII",   "V_8_1_6_p601_d3_DonorI","V_8_1_6_p604_d3_DonorI","V_8_1_6_p601_d8_DonorI","V_8_1_6_p604_d8_DonorI",  "V_8_2_3_p600_d3_DonorII","V_8_2_3_p605_d3_DonorII","V_8_2_3_p600_d8_DonorII", "V_8_2_3_p605_d8_DonorII",  "V_8_2_4_p600_d3_DonorI","V_8_2_4_p605_d3_DonorI","V_8_2_4_p600_d8_DonorI","V_8_2_4_p605_d8_DonorI",  "V_8_4_1_p602_d8_DonorII","V_8_4_1_p602_d8_DonorI",  "V_8_3_1_p600and601_d12_DonorI", "V_8_3_1_p604and605_d12_DonorI","V_8_3_2_p600and601_d9_DonorII","V_8_3_2_p604and605_d9_DonorII",    "V_8_4_2_p602_d3_DonorI","V_8_4_2_p602_d3_DonorII",    "V_8_5_1_p602and604_d3_DonorI", "V_8_5_2_p602and604_d3_DonorII")
    identical(colnames(d.raw),col_order)
    
    #reordered.raw <- d.raw[,col_order]
    
    replicates = as.factor(c("untreated","untreated", "p601_d3","p604_d3", "p601_d8","p604_d8",   "p601_d3","p604_d3","p601_d8","p604_d8",  "p600_d3","p605_d3","p600_d8", "p605_d8",  "p600_d3","p605_d3","p600_d8","p605_d8",  "p602_d8","p602_d8",  "p600and601_d9d12", "p604and605_d9d12","p600and601_d9d12","p604and605_d9d12",     "p602_d3","p602_d3",    "p602and604_d3","p602and604_d3"))
    
    batch = as.factor(c("200420", "200420", "190927", "190927",    "190927", "190927", "190228", "190228",    "190228", "190228", "191008", "191008",    "191008", "191008", "190228", "190228",     "190228", "190228", "200817", "200817",       "200420", "200420", "200817", "200817",      "210302", "210302",  "210504","210504"))
    
    ids = as.factor(c("untreated_DonorI","untreated_DonorII", "p601_d3_DonorII","p604_d3_DonorII", "p601_d8_DonorII","p604_d8_DonorII",   "p601_d3_DonorI","p604_d3_DonorI","p601_d8_DonorI","p604_d8_DonorI",  "p600_d3_DonorII","p605_d3_DonorII","p600_d8_DonorII", "p605_d8_DonorII",  "p600_d3_DonorI","p605_d3_DonorI","p600_d8_DonorI","p605_d8_DonorI",  "p602_d8_DonorII","p602_d8_DonorI",  "p600and601_d12_DonorI", "p604and605_d12_DonorI","p600and601_d9_DonorII","p604and605_d9_DonorII",        "p602_d3_DonorI","p602_d3_DonorII",      "p602and604_d3_DonorI","p602and604_d3_DonorII"))
    
    donor = as.factor(c("DonorI","DonorII",  "DonorII","DonorII", "DonorII","DonorII",   "DonorI","DonorI","DonorI","DonorI",  "DonorII","DonorII","DonorII","DonorII",  "DonorI","DonorI","DonorI","DonorI",  "DonorII","DonorI",   "DonorI", "DonorI","DonorII","DonorII",        "DonorI","DonorII",    "DonorI","DonorII"))
    
    cData = data.frame(row.names=colnames(d.raw), replicates=replicates, donor=donor, batch=batch, ids=ids)
    #dds<-DESeqDataSetFromMatrix(countData=d.raw, colData=cData, design=~batch+replicates) # ERROR due to the factor 'batch'
    dds<-DESeqDataSetFromMatrix(countData=d.raw, colData=cData, design=~donor+replicates)
    
    sizeFactors(dds)
    >NULL
    dds <- estimateSizeFactors(dds)
    sizeFactors(dds)
    #            V_8_0_mock_DonorI            V_8_0_mock_DonorII 
    #                    0.6152727                     0.8401306 
    #      V_8_1_5_p601_d3_DonorII       V_8_1_5_p604_d3_DonorII 
    #                    1.0196629                     1.0293038 
    #      V_8_1_5_p601_d8_DonorII       V_8_1_5_p604_d8_DonorII 
    #                    0.8901261                     0.9062621 
    #       V_8_1_6_p601_d3_DonorI        V_8_1_6_p604_d3_DonorI 
    #                    1.3752853                     1.3504344 
    #       V_8_1_6_p601_d8_DonorI        V_8_1_6_p604_d8_DonorI 
    #                    1.2316027                     1.3085877 
    #      V_8_2_3_p600_d3_DonorII       V_8_2_3_p605_d3_DonorII 
    #                    0.9323440                     0.9381003 
    #      V_8_2_3_p600_d8_DonorII       V_8_2_3_p605_d8_DonorII 
    #                    0.7179836                     0.9400712 
    #       V_8_2_4_p600_d3_DonorI        V_8_2_4_p605_d3_DonorI 
    #                    1.3953243                     1.4836134 
    #       V_8_2_4_p600_d8_DonorI        V_8_2_4_p605_d8_DonorI 
    #                    1.2135489                     1.8270271 
    #      V_8_4_1_p602_d8_DonorII        V_8_4_1_p602_d8_DonorI 
    #                    1.5281322                     1.1579240 
    #V_8_3_1_p600and601_d12_DonorI V_8_3_1_p604and605_d12_DonorI 
    #                    0.9895927                     0.8560524 
    #V_8_3_2_p600and601_d9_DonorII V_8_3_2_p604and605_d9_DonorII 
    #                    1.3462308                     1.1142589 
    #       V_8_4_2_p602_d3_DonorI       V_8_4_2_p602_d3_DonorII 
    #                    0.9675338                     1.1800615 
    # V_8_5_1_p602and604_d3_DonorI V_8_5_2_p602and604_d3_DonorII 
    #                    0.3940273                     0.3490526
    
    rld <- rlogTransformation(dds)
    
    # -- before pca --
    png("pca.png", 1200, 800)
    plotPCA(rld, intgroup=c("replicates"))
    #plotPCA(rld, intgroup = c("replicates", "batch"))
    #plotPCA(rld, intgroup = c("replicates", "ids"))
    #plotPCA(rld, "batch")
    dev.off()
    
    # -- before heatmap --
    ## generate the pairwise comparison between samples
    library(gplots) 
    library("RColorBrewer")
    png("heatmap.png", 1200, 800)
    distsRL <- dist(t(assay(rld)))
    mat <- as.matrix(distsRL)
    #paste( rld$dex, rld$cell, sep="-" )
    #rownames(mat) <- colnames(mat) <- with(colData(dds),paste(replicates,batch, sep=":"))
    rownames(mat) <- colnames(mat) <- with(colData(dds),paste(replicates))
    hc <- hclust(distsRL)
    hmcol <- colorRampPalette(brewer.pal(9,"GnBu"))(100)
    heatmap.2(mat, Rowv=as.dendrogram(hc),symm=TRUE, trace="none",col = rev(hmcol), margin=c(13, 13))
    dev.off()
    
    #---- 3 ----
    dds$replicates <- relevel(dds$replicates, "p601_d3")
    dds = DESeq(dds, betaPrior=FALSE)
    resultsNames(dds)
    clist <- c("p602and604_d3_vs_p601_d3")
    
    dds$replicates <- relevel(dds$replicates, "p600and601_d9d12")
    dds = DESeq(dds, betaPrior=FALSE)
    resultsNames(dds)
    clist <- c("p604and605_d9d12_vs_p600and601_d9d12")
    
    for (i in clist) {
      contrast = paste("replicates", i, sep="_")
      res = results(dds, name=contrast)
      res <- res[!is.na(res$log2FoldChange),]
      geness <- AnnotationDbi::select(org.Hs.eg.db, keys = rownames(res), keytype = "SYMBOL", columns = c("ENSEMBL", "ENTREZID", "SYMBOL", "GENENAME"))
      geness <- geness[!duplicated(geness$SYMBOL), ]
      res$SYMBOL = rownames(res)
      rownames(geness) <- geness$SYMBOL
      identical(rownames(res), rownames(geness))
      res_df <- as.data.frame(res)
      geness_res <- merge(geness, res_df)
      dim(geness_res)
      write.csv(as.data.frame(geness_res[order(geness_res$pvalue),]), file = paste(i, "all.txt", sep="-"))
      #write.csv(res_df, file = paste(i, "background.txt", sep="_"))
      up <- subset(geness_res, padj<=0.05 & log2FoldChange>=1)
      down <- subset(geness_res, padj<=0.05 & log2FoldChange<=-1)
      write.csv(as.data.frame(up[order(up$log2FoldChange,decreasing=TRUE),]), file = paste(i, "up.txt", sep="-"))
      write.csv(as.data.frame(down[order(abs(down$log2FoldChange),decreasing=TRUE),]), file = paste(i, "down.txt", sep="-"))
    }
    
    ~/Tools/csv2xls-0.4/csv_to_xls.py \
    p602and604_d3_vs_p601_d3-all.txt \
    p602and604_d3_vs_p601_d3-up.txt \
    p602and604_d3_vs_p601_d3-down.txt \
    -d$',' -o p602and604_d3_vs_p601_d3_on_LT.xls;
    
    ~/Tools/csv2xls-0.4/csv_to_xls.py \
    p604and605_d9d12_vs_p600and601_d9d12-all.txt \
    p604and605_d9d12_vs_p600and601_d9d12-up.txt \
    p604and605_d9d12_vs_p600and601_d9d12-down.txt \
    -d$',' -o p604and605_d9d12_vs_p600and601_d9d12_on_LT.xls;
    
    ~/Tools/csv2xls-0.4/csv_to_xls.py \
    p602and604_d3_vs_p601_d3-all.txt \
    p602and604_d3_vs_p601_d3-up.txt \
    p602and604_d3_vs_p601_d3-down.txt \
    -d$',' -o p602and604_d3_vs_p601_d3_on_sT.xls;
    
    ~/Tools/csv2xls-0.4/csv_to_xls.py \
    p604and605_d9d12_vs_p600and601_d9d12-all.txt \
    p604and605_d9d12_vs_p600and601_d9d12-up.txt \
    p604and605_d9d12_vs_p600and601_d9d12-down.txt \
    -d$',' -o p604and605_d9d12_vs_p600and601_d9d12_on_sT.xls;
    
  3. goal

    p783 compared to LT transcript (p783_d8 vs. p600_d8?)
    
  4. Under ~/DATA/Data_Denise_LT_K331A_RNASeq/Data_Denise_RNASeq_Merged_Corrected_8

    d.raw<- read.delim2("gene_name_gene_counts_hg38+virus.csv",sep=",", header=TRUE, row.names=1)
    library(dplyr)
    # Remove duplicate rows based on gene_name
    d.raw.unique <- d.raw %>% distinct(gene_name, .keep_all = TRUE)
    # Convert to a traditional data frame
    d.raw.df <- as.data.frame(d.raw.unique)
    # Set gene_name as row names
    rownames(d.raw.df) <- d.raw.df$gene_name
    # Remove the now redundant gene_name column
    d.raw.df$gene_name <- NULL
    
    #d.summarized <- d.raw %>% group_by(gene_name) %>% summarize(across(everything(), sum, na.rm = TRUE))
    #rownames(d.raw.df) <- d.raw.df$gene_name
    ## Remove the now redundant gene_name column
    #d.raw.df$gene_name <- NULL
    
    replicates = as.factor(c("p600_d8","p600_d8", "p602_d8","p602_d8",  "p605_d8","p605_d8",    "p783_d8","p783_d8"))
    ids = as.factor(c("p600_d8_DonorI","p600_d8_DonorII", "p602_d8_DonorI","p602_d8_DonorII",  "p605_d8_DonorI","p605_d8_DonorII",    "p783_d8_DonorI","p783_d8_DonorII"))
    donor = as.factor(c("DonorI","DonorII",   "DonorI","DonorII",   "DonorI","DonorII",    "DonorI","DonorII"))
    cData = data.frame(row.names=colnames(d.raw.df), replicates=replicates, donor=donor, ids=ids)
    #dds<-DESeqDataSetFromMatrix(countData=d.raw.df, colData=cData, design=~batch+replicates) # ERROR due to the factor 'batch'
    dds<-DESeqDataSetFromMatrix(countData=d.raw.df, colData=cData, design=~donor+replicates)
    
    sizeFactors(dds)
    >NULL
    dds <- estimateSizeFactors(dds)
    sizeFactors(dds)
    
    #  control_DI  control_DII        LT_DI       LT_DII      LTtr_DI     LTtr_DII 
    #   0.9393201    0.5490296    0.9066729    1.1874750    1.4258546    0.7305989 
    # LT_K331A_DI LT_K331A_DII 
    #   1.4819629    1.2744066
    
    rld <- rlogTransformation(dds)
    
    dds$replicates <- relevel(dds$replicates, "p600_d8")
    dds = DESeq(dds, betaPrior=FALSE)
    resultsNames(dds)
    clist <- c("p783_d8_vs_p600_d8")
    
    for (i in clist) {
      contrast = paste("replicates", i, sep="_")
      res = results(dds, name=contrast)
      res <- res[!is.na(res$log2FoldChange),]
      geness <- AnnotationDbi::select(org.Hs.eg.db, keys = rownames(res), keytype = "SYMBOL", columns = c("ENSEMBL", "ENTREZID", "SYMBOL", "GENENAME"))
      geness <- geness[!duplicated(geness$SYMBOL), ]
      res$SYMBOL = rownames(res)
      rownames(geness) <- geness$SYMBOL
      identical(rownames(res), rownames(geness))
      res_df <- as.data.frame(res)
      geness_res <- merge(geness, res_df)
      dim(geness_res)
      write.csv(as.data.frame(geness_res[order(geness_res$pvalue),]), file = paste(i, "all.txt", sep="-"))
      #write.csv(res_df, file = paste(i, "background.txt", sep="_"))
      up <- subset(geness_res, padj<=0.05 & log2FoldChange>=1)
      down <- subset(geness_res, padj<=0.05 & log2FoldChange<=-1)
      write.csv(as.data.frame(up[order(up$log2FoldChange,decreasing=TRUE),]), file = paste(i, "up.txt", sep="-"))
      write.csv(as.data.frame(down[order(abs(down$log2FoldChange),decreasing=TRUE),]), file = paste(i, "down.txt", sep="-"))
    }
    
    ~/Tools/csv2xls-0.4/csv_to_xls.py \
    p783_d8_vs_p600_d8-all.txt \
    p783_d8_vs_p600_d8-up.txt \
    p783_d8_vs_p600_d8-down.txt \
    -d$',' -o p783_d8_vs_p600_d8_on_LT.xls;
    
  5. Deduplication is not performed until UMI-tools are used.

    • UMI-tools dedup

      • After extracting the UMI information from the read sequence (see UMI-tools extract), the second step in the removal of UMI barcodes involves deduplicating the reads based on both mapping and UMI barcode information using the UMI-tools dedup command.
      • This will generate a filtered BAM file after the removal of PCR duplicates.
    • picard MarkDuplicates

      • Unless you are using UMIs it is not possible to establish whether the fragments you have sequenced from your sample were derived via true biological duplication (i.e. sequencing independent template fragments) or as a result of PCR biases introduced during the library preparation.
      • By default, the pipeline uses picard MarkDuplicates to mark the duplicate reads identified amongst the alignments to allow you to guage the overall level of duplication in your samples.
      • However, for RNA-seq data it is not recommended to physically remove duplicate reads from the alignments (unless you are using UMIs) because you expect a significant level of true biological duplication that arises from the same fragments being sequenced from for example highly expressed genes.
      • This step will be skipped automatically when using the --with_umi option or explicitly via the --skip_markduplicates parameter.

        compare the STAR bam-files with markDuplicates bam-files. They are the same, since in the markDuplicates step, the duplicated reads were only marked, not physically removed! jhuang@hamburg:~/DATA/Data_Denise_sT_PUBLISHING/Data_Denise_RNASeq/results/STAR$ samtools flagstat ./V_8_4_2_p602_d3_DonorIIAligned.sortedByCoord.out.bam 19968762 + 0 in total (QC-passed reads + QC-failed reads) 2323677 + 0 secondary 0 + 0 supplementary 0 + 0 duplicates 19968762 + 0 mapped (100.00% : N/A) jhuang@hamburg:~/DATA/Data_Denise_sT_PUBLISHING/Data_Denise_RNASeq/results/markDuplicates$ samtools flagstat V_8_4_2_p602_d3_DonorIIAligned.sortedByCoord.out.markDups.bam 19968762 + 0 in total (QC-passed reads + QC-failed reads) 2323677 + 0 secondary 0 + 0 supplementary 15013903 + 0 duplicates 19968762 + 0 mapped (100.00% : N/A)

  6. goal

    #| p600and601_d9_DonorII | GFP+mCherry control | Day 9 | 
    #| p600and601_d12_DonorI | GFP+mCherry control | Day 12 | 
    #-->| p604and605_d9_DonorII | sT+LTtr | Day 9 | 
    #-->| p604and605_d12_DonorI | sT+LTtr | Day 12 | 
    #-->| p602and604_d3_DonorI  | sT+LT | Day 3 | 
    #-->| p602and604_d3_DonorII | sT+LT | Day 3 |
    p602+604 d3 versus p602 d3 on sT+LT transcript
    p602+p604 d3 versus p604 d3 on sT+LT transcript
    p604+605 d9/12 versus p604 d8 on sT+LT transcript
    p604+605 d9/12 versus p605 d8 on sT+LT transcript
    
  7. under ~/DATA/Data_Denise_sT_PUBLISHING/Data_Denise_RNASeq_Merged_Corrected_28+2

    library("AnnotationDbi")
    library("clusterProfiler")
    library("ReactomePA")
    library(gplots)
    library(tximport)
    library(DESeq2)
    library("org.Hs.eg.db")
    #d.raw<- read.delim2("gene_counts_hg38+virus_on_LT_transcripts_28samples.csv",sep=",", header=TRUE, row.names=1)
    #d.raw<- read.delim2("gene_counts_hg38+virus_on_sT_transcripts_28samples.csv",sep=",", header=TRUE, row.names=1)
    d.raw<- read.delim2("gene_counts_hg38+virus_on_sT+LT_transcripts_28samples.csv",sep=",", header=TRUE, row.names=1)
    # [1] V_8_0_mock_DonorI             V_8_0_mock_DonorII           
    # [3] V_8_1_5_p601_d3_DonorII       V_8_1_5_p604_d3_DonorII      
    # [5] V_8_1_5_p601_d8_DonorII       V_8_1_5_p604_d8_DonorII      
    # [7] V_8_1_6_p601_d3_DonorI        V_8_1_6_p604_d3_DonorI       
    # [9] V_8_1_6_p601_d8_DonorI        V_8_1_6_p604_d8_DonorI       
    #[11] V_8_2_3_p600_d3_DonorII       V_8_2_3_p605_d3_DonorII      
    #[13] V_8_2_3_p600_d8_DonorII       V_8_2_3_p605_d8_DonorII      
    #[15] V_8_2_4_p600_d3_DonorI        V_8_2_4_p605_d3_DonorI       
    #[17] V_8_2_4_p600_d8_DonorI        V_8_2_4_p605_d8_DonorI       
    #[19] V_8_4_1_p602_d8_DonorII       V_8_4_1_p602_d8_DonorI       
    #[21] V_8_3_1_p600and601_d12_DonorI V_8_3_1_p604and605_d12_DonorI
    #[23] V_8_3_2_p600and601_d9_DonorII V_8_3_2_p604and605_d9_DonorII
    #[25] V_8_4_2_p602_d3_DonorI        V_8_4_2_p602_d3_DonorII      
    #[27] V_8_5_1_p602and604_d3_DonorI  V_8_5_2_p602and604_d3_DonorII
    col_order <- c("V_8_0_mock_DonorI","V_8_0_mock_DonorII","V_8_1_5_p601_d3_DonorII", "V_8_1_5_p604_d3_DonorII", "V_8_1_5_p601_d8_DonorII","V_8_1_5_p604_d8_DonorII",   "V_8_1_6_p601_d3_DonorI","V_8_1_6_p604_d3_DonorI","V_8_1_6_p601_d8_DonorI","V_8_1_6_p604_d8_DonorI",  "V_8_2_3_p600_d3_DonorII","V_8_2_3_p605_d3_DonorII","V_8_2_3_p600_d8_DonorII", "V_8_2_3_p605_d8_DonorII",  "V_8_2_4_p600_d3_DonorI","V_8_2_4_p605_d3_DonorI","V_8_2_4_p600_d8_DonorI","V_8_2_4_p605_d8_DonorI",  "V_8_4_1_p602_d8_DonorII","V_8_4_1_p602_d8_DonorI",  "V_8_3_1_p600and601_d12_DonorI", "V_8_3_1_p604and605_d12_DonorI","V_8_3_2_p600and601_d9_DonorII","V_8_3_2_p604and605_d9_DonorII",    "V_8_4_2_p602_d3_DonorI","V_8_4_2_p602_d3_DonorII",    "V_8_5_1_p602and604_d3_DonorI", "V_8_5_2_p602and604_d3_DonorII")
    identical(colnames(d.raw),col_order)
    #reordered.raw <- d.raw[,col_order]
    replicates = as.factor(c("untreated","untreated", "p601_d3","p604_d3", "p601_d8","p604_d8",   "p601_d3","p604_d3","p601_d8","p604_d8",  "p600_d3","p605_d3","p600_d8", "p605_d8",  "p600_d3","p605_d3","p600_d8","p605_d8",  "p602_d8","p602_d8",  "p600and601_d9d12", "p604and605_d9d12","p600and601_d9d12","p604and605_d9d12",     "p602_d3","p602_d3",    "p602and604_d3","p602and604_d3"))
    batch = as.factor(c("200420", "200420", "190927", "190927",    "190927", "190927", "190228", "190228",    "190228", "190228", "191008", "191008",    "191008", "191008", "190228", "190228",     "190228", "190228", "200817", "200817",       "200420", "200420", "200817", "200817",      "210302", "210302",  "210504","210504"))
    ids = as.factor(c("untreated_DonorI","untreated_DonorII", "p601_d3_DonorII","p604_d3_DonorII", "p601_d8_DonorII","p604_d8_DonorII",   "p601_d3_DonorI","p604_d3_DonorI","p601_d8_DonorI","p604_d8_DonorI",  "p600_d3_DonorII","p605_d3_DonorII","p600_d8_DonorII", "p605_d8_DonorII",  "p600_d3_DonorI","p605_d3_DonorI","p600_d8_DonorI","p605_d8_DonorI",  "p602_d8_DonorII","p602_d8_DonorI",  "p600and601_d12_DonorI", "p604and605_d12_DonorI","p600and601_d9_DonorII","p604and605_d9_DonorII",        "p602_d3_DonorI","p602_d3_DonorII",      "p602and604_d3_DonorI","p602and604_d3_DonorII"))
    donor = as.factor(c("DonorI","DonorII",  "DonorII","DonorII", "DonorII","DonorII",   "DonorI","DonorI","DonorI","DonorI",  "DonorII","DonorII","DonorII","DonorII",  "DonorI","DonorI","DonorI","DonorI",  "DonorII","DonorI",   "DonorI", "DonorI","DonorII","DonorII",        "DonorI","DonorII",    "DonorI","DonorII"))
    cData = data.frame(row.names=colnames(d.raw), replicates=replicates, donor=donor, batch=batch, ids=ids)
    #dds<-DESeqDataSetFromMatrix(countData=d.raw, colData=cData, design=~batch+replicates) # ERROR due to the factor 'batch'
    dds<-DESeqDataSetFromMatrix(countData=d.raw, colData=cData, design=~donor+replicates)
    sizeFactors(dds)
    >NULL
    dds <- estimateSizeFactors(dds)
    sizeFactors(dds)
    #            V_8_0_mock_DonorI            V_8_0_mock_DonorII 
    #                    0.6152727                     0.8401306 
    #      V_8_1_5_p601_d3_DonorII       V_8_1_5_p604_d3_DonorII 
    #                    1.0196629                     1.0293038 
    #      V_8_1_5_p601_d8_DonorII       V_8_1_5_p604_d8_DonorII 
    #                    0.8901261                     0.9062621 
    #       V_8_1_6_p601_d3_DonorI        V_8_1_6_p604_d3_DonorI 
    #                    1.3752853                     1.3504344 
    #       V_8_1_6_p601_d8_DonorI        V_8_1_6_p604_d8_DonorI 
    #                    1.2316027                     1.3085877 
    #      V_8_2_3_p600_d3_DonorII       V_8_2_3_p605_d3_DonorII 
    #                    0.9323440                     0.9381003 
    #      V_8_2_3_p600_d8_DonorII       V_8_2_3_p605_d8_DonorII 
    #                    0.7179836                     0.9400712 
    #       V_8_2_4_p600_d3_DonorI        V_8_2_4_p605_d3_DonorI 
    #                    1.3953243                     1.4836134 
    #       V_8_2_4_p600_d8_DonorI        V_8_2_4_p605_d8_DonorI 
    #                    1.2135489                     1.8270271 
    #      V_8_4_1_p602_d8_DonorII        V_8_4_1_p602_d8_DonorI 
    #                    1.5281322                     1.1579240 
    #V_8_3_1_p600and601_d12_DonorI V_8_3_1_p604and605_d12_DonorI 
    #                    0.9895927                     0.8560524 
    #V_8_3_2_p600and601_d9_DonorII V_8_3_2_p604and605_d9_DonorII 
    #                    1.3462308                     1.1142589 
    #       V_8_4_2_p602_d3_DonorI       V_8_4_2_p602_d3_DonorII 
    #                    0.9675338                     1.1800615 
    # V_8_5_1_p602and604_d3_DonorI V_8_5_2_p602and604_d3_DonorII 
    #                    0.3940273                     0.3490526
    rld <- rlogTransformation(dds)
    # -- before pca --
    png("pca.png", 1200, 800)
    plotPCA(rld, intgroup=c("replicates"))
    #plotPCA(rld, intgroup = c("replicates", "batch"))
    #plotPCA(rld, intgroup = c("replicates", "ids"))
    #plotPCA(rld, "batch")
    dev.off()
    # -- before heatmap --
    ## generate the pairwise comparison between samples
    library(gplots) 
    library("RColorBrewer")
    png("heatmap.png", 1200, 800)
    distsRL <- dist(t(assay(rld)))
    mat <- as.matrix(distsRL)
    #paste( rld$dex, rld$cell, sep="-" )
    #rownames(mat) <- colnames(mat) <- with(colData(dds),paste(replicates,batch, sep=":"))
    rownames(mat) <- colnames(mat) <- with(colData(dds),paste(replicates))
    hc <- hclust(distsRL)
    hmcol <- colorRampPalette(brewer.pal(9,"GnBu"))(100)
    heatmap.2(mat, Rowv=as.dendrogram(hc),symm=TRUE, trace="none",col = rev(hmcol), margin=c(13, 13))
    dev.off()
    
    #---- 3 ----
    dds$replicates <- relevel(dds$replicates, "p602_d3")
    dds = DESeq(dds, betaPrior=FALSE)
    resultsNames(dds)
    clist <- c("p602and604_d3_vs_p602_d3")
    dds$replicates <- relevel(dds$replicates, "p604_d3")
    dds = DESeq(dds, betaPrior=FALSE)
    resultsNames(dds)
    clist <- c("p602and604_d3_vs_p604_d3")
    dds$replicates <- relevel(dds$replicates, "p604_d8")
    dds = DESeq(dds, betaPrior=FALSE)
    resultsNames(dds)
    clist <- c("p604and605_d9d12_vs_p604_d8")
    dds$replicates <- relevel(dds$replicates, "p605_d8")
    dds = DESeq(dds, betaPrior=FALSE)
    resultsNames(dds)
    clist <- c("p604and605_d9d12_vs_p605_d8")
    for (i in clist) {
    contrast = paste("replicates", i, sep="_")
    res = results(dds, name=contrast)
    res <- res[!is.na(res$log2FoldChange),]
    geness <- AnnotationDbi::select(org.Hs.eg.db, keys = rownames(res), keytype = "SYMBOL", columns = c("ENSEMBL", "ENTREZID", "SYMBOL", "GENENAME"))
    geness <- geness[!duplicated(geness$SYMBOL), ]
    res$SYMBOL = rownames(res)
    rownames(geness) <- geness$SYMBOL
    identical(rownames(res), rownames(geness))
    res_df <- as.data.frame(res)
    geness_res <- merge(geness, res_df)
    dim(geness_res)
    write.csv(as.data.frame(geness_res[order(geness_res$pvalue),]), file = paste(i, "all.txt", sep="-"))
    #write.csv(res_df, file = paste(i, "background.txt", sep="_"))
    up <- subset(geness_res, padj<=0.05 & log2FoldChange>=1)
    down <- subset(geness_res, padj<=0.05 & log2FoldChange<=-1)
    write.csv(as.data.frame(up[order(up$log2FoldChange,decreasing=TRUE),]), file = paste(i, "up.txt", sep="-"))
    write.csv(as.data.frame(down[order(abs(down$log2FoldChange),decreasing=TRUE),]), file = paste(i, "down.txt", sep="-"))
    }
    ~/Tools/csv2xls-0.4/csv_to_xls.py \
    p602and604_d3_vs_p602_d3-all.txt \
    p602and604_d3_vs_p602_d3-up.txt \
    p602and604_d3_vs_p602_d3-down.txt \
    -d$',' -o p602and604_d3_vs_p602_d3.xls;
    ~/Tools/csv2xls-0.4/csv_to_xls.py \
    p602and604_d3_vs_p604_d3-all.txt \
    p602and604_d3_vs_p604_d3-up.txt \
    p602and604_d3_vs_p604_d3-down.txt \
    -d$',' -o p602and604_d3_vs_p604_d3.xls;
    ~/Tools/csv2xls-0.4/csv_to_xls.py \
    p604and605_d9d12_vs_p604_d8-all.txt \
    p604and605_d9d12_vs_p604_d8-up.txt \
    p604and605_d9d12_vs_p604_d8-down.txt \
    -d$',' -o p604and605_d9d12_vs_p604_d8.xls;
    ~/Tools/csv2xls-0.4/csv_to_xls.py \
    p604and605_d9d12_vs_p605_d8-all.txt \
    p604and605_d9d12_vs_p605_d8-up.txt \
    p604and605_d9d12_vs_p605_d8-down.txt \
    -d$',' -o p604and605_d9d12_vs_p605_d8.xls;
    

like unlike

点赞本文的读者

还没有人对此文章表态


本文有评论

没有评论

看文章,发评论,不要沉默


© 2023 XGenes.com Impressum