ChIP-seq using HOMER (-style factor, findPeaks + default getDifferentialPeaksReplicates.pl)

gene_x 0 like s 591 view s

Tags:

  1. nextflow ChIP-seq run for NHDF_p783

    #under Raw_Data for ChIP-seq 
    ln -s ./230306_NB501882_0417_AHMVHHBGXN/2023_022_nf_denise/nf859/3_NHDF_Donor_1_p783_input_S5_R1_001.fastq.gz p783_input_DonorI.fastq.gz
    ln -s ./230306_NB501882_0417_AHMVHHBGXN/2023_022_nf_denise/nf860/4_NHDF_Donor_2_p783_input_S6_R1_001.fastq.gz p783_input_DonorII.fastq.gz
    ln -s ./230306_NB501882_0417_AHMVHHBGXN/2023_022_nf_denise/nf861/5_NHDF_Donor_1_p783_ChIP_S7_R1_001.fastq.gz p783_ChIP_DonorI.fastq.gz
    ln -s ./230306_NB501882_0417_AHMVHHBGXN/2023_022_nf_denise/nf862/6_NHDF_Donor_2_p783_ChIP_S8_R1_001.fastq.gz p783_ChIP_DonorII.fastq.gz
    
    #'hg38'      { bwa = "/home/jhuang/REFs/Homo_sapiens/UCSC/hg38/Sequence/BWAIndex/"
    #          blacklist = "/home/jhuang/REFs/Homo_sapiens/UCSC/hg38/blacklists/hg38-blacklist.bed"
    #          gtf = "/home/jhuang/REFs/Homo_sapiens/UCSC/hg38/Annotation/Genes/genes.gtf"
    #        }
    ln -s /home/jhuang/Tools/NGI-ChIPseq/ .
    (chipseq) nextflow run NGI-ChIPseq/main.nf --reads '/home/jhuang/DATA/Data_Denise_LT_DNA_Binding/Raw_Data/*.fastq.gz' --genome hg38 --macsconfig macs.config --singleEnd --blacklist_filtering -profile standard --project Denise_LT_DNA_Bindung --outdir results_LT_DNA_Bindung_hg38 -resume
    
    #By the way: nextflow RNA-seq run for NHDF_p783 (NOT the topics of the post).
    #under Raw_Data for RNA-seq
    cp ~/DATA/Data_Denise_tx_epi_MCPyV_PUBLISHING/Data_Denise_RNASeq/Raw_Data/V_8_2_4_p600_d8_DonorI.fastq.gz ./
    cp ~/DATA/Data_Denise_tx_epi_MCPyV_PUBLISHING/Data_Denise_RNASeq/Raw_Data/V_8_2_3_p600_d8_DonorII.fastq.gz ./
    #under Raw_Data_p783_RNAseq for RNA-seq
    ln -s ../Raw_Data/V_8_2_4_p600_d8_DonorI.fastq.gz  ctrl_DonorI.fastq.gz   
    ln -s ../Raw_Data/V_8_2_3_p600_d8_DonorII.fastq.gz ctrl_DonorII.fastq.gz
    ln -s ../Raw_Data/230306_NB501882_0417_AHMVHHBGXN/2023_022_nf_denise/nf857/1_NHDF_Donor_1_p783_S1_R1_001.fastq.gz p783_DonorI.fastq.gz
    ln -s ../Raw_Data/230306_NB501882_0417_AHMVHHBGXN/2023_022_nf_denise/nf858/2_NHDF_Donor_2_p783_S2_R1_001.fastq.gz p783_DonorII.fastq.gz
    #Note that we need to regenerate MultiQC.html after ignoring 'Biotype Counts', since --fcGroupFeaturesType gene_name cannot generate the real biotype counts!
    (rnaseq_2021) nextflow run rnaseq --reads '/home/jhuang/DATA/Data_Denise_LT_DNA_Binding/Raw_Data_p783/RNA_seq/*.fastq.gz'  --fasta "/home/jhuang/REFs/Homo_sapiens/UCSC/hg38/Sequence/WholeGenomeFasta/genome.fa" --gtf "/home/jhuang/REFs/Homo_sapiens/UCSC/hg38/Annotation/Genes/genes.gtf"  --bed12 "/home/jhuang/REFs/Homo_sapiens/UCSC/hg38/Annotation/Genes/genes.bed" --singleEnd -profile standard --aligner star --saveReference -resume --saveAlignedIntermediates --skip_rseqc --skip_dupradar --skip_genebody_coverage --skip_preseq --skip_edger --fcGroupFeaturesType gene_name
    
  2. nextflow ChIP-seq run for data of truncated LT-Ag + sT expression of WaGa and HEK293

    #160719_SN7001212_0156_AC8K76ACXX
    
    cat ../160719_SN7001212_0156_AC8K76ACXX/Sample_293_input_1_10_p197_1/293_input_1_10_p197_1_GTAGAG_L002_R1_001.fastq.gz ../160719_SN7001212_0156_AC8K76ACXX/Sample_293_input_1_10_p197_1/293_input_1_10_p197_1_GTAGAG_L003_R1_001.fastq.gz > HEK293_Input_p197_r1.fastq.gz
    cat ../160719_SN7001212_0156_AC8K76ACXX/Sample_293_input_1_10_p197_2/293_input_1_10_p197_2_GTCCGC_L002_R1_001.fastq.gz ../160719_SN7001212_0156_AC8K76ACXX/Sample_293_input_1_10_p197_2/293_input_1_10_p197_2_GTCCGC_L003_R1_001.fastq.gz > HEK293_Input_p197_r2.fastq.gz
    cat ../160719_SN7001212_0156_AC8K76ACXX/Sample_293_input_1_10_p197_3/293_input_1_10_p197_3_GTGAAA_L002_R1_001.fastq.gz ../160719_SN7001212_0156_AC8K76ACXX/Sample_293_input_1_10_p197_3/293_input_1_10_p197_3_GTGAAA_L003_R1_001.fastq.gz > HEK293_Input_p197_r3.fastq.gz
    
    cat ../160719_SN7001212_0156_AC8K76ACXX/Sample_293_lt_p197_1/293_lt_p197_1_TAGCTT_L002_R1_001.fastq.gz ../160719_SN7001212_0156_AC8K76ACXX/Sample_293_lt_p197_1/293_lt_p197_1_TAGCTT_L003_R1_001.fastq.gz > HEK293_LT_p197_r1.fastq.gz
    cat ../160719_SN7001212_0156_AC8K76ACXX/Sample_293_lt_p197_2/293_lt_p197_2_GGCTAC_L002_R1_001.fastq.gz ../160719_SN7001212_0156_AC8K76ACXX/Sample_293_lt_p197_2/293_lt_p197_2_GGCTAC_L003_R1_001.fastq.gz > HEK293_LT_p197_r2.fastq.gz
    cat ../160719_SN7001212_0156_AC8K76ACXX/Sample_293_lt_p197_3/293_lt_p197_3_AGTCAA_L002_R1_001.fastq.gz ../160719_SN7001212_0156_AC8K76ACXX/Sample_293_lt_p197_3/293_lt_p197_3_AGTCAA_L003_R1_001.fastq.gz > HEK293_LT_p197_r3.fastq.gz
    
    #140117_SN7001212_0097_AC3ECBACXX
    
    cat ../140117_SN7001212_0097_AC3ECBACXX/Sample_waga_igg/waga_igg_TAGCTT_L003_R1_001.fastq.gz ../140117_SN7001212_0097_AC3ECBACXX/Sample_waga_igg/waga_igg_TAGCTT_L004_R1_001.fastq.gz > WaGa_IgG.fastq.gz
    
    cat ../140117_SN7001212_0097_AC3ECBACXX/Sample_waga_lt/waga_lt_GGTAGC_L003_R1_001.fastq.gz ../140117_SN7001212_0097_AC3ECBACXX/Sample_waga_lt/waga_lt_GGTAGC_L004_R1_001.fastq.gz > WaGa_LT.fastq.gz
    
    ln -s /home/jhuang/Tools/NGI-ChIPseq/ .
    (chipseq) nextflow run NGI-ChIPseq/main.nf --reads '/home/jhuang/DATA/Data_Denise_LT_DNA_Binding/LTtr-ChIP/Raw_Data/*.fastq.gz' --genome hg38 --macsconfig macs.config --singleEnd --blacklist_filtering -profile standard --project Denise_LTtr_DNA_Bindung --outdir results_LTtr_DNA_Bindung_hg38 -resume
    
  3. makeTagDirectory

    conda activate myperl
    mkdir results_ChIPseq_K331A_hg38/homer; cd results_ChIPseq_K331A_hg38/homer
    
    #makeTagDirectory <output directory> <input file> -genome hg38
    for sample in p783_ChIP_DonorI p783_ChIP_DonorII p783_input_DonorI p783_input_DonorII; do
      makeTagDirectory ${sample} ../picard/${sample}.dedup.sorted.bam -genome hg38
    done
    
  4. generate bigwigs

    #makeUCSCfile peaks.txt -f peaks.bed -o auto -noadj -bigWig sample.bw -genome hg38
    for sample in p783_ChIP_DonorI p783_ChIP_DonorII p783_input_DonorI p783_input_DonorII; do
    makeUCSCfile ${sample} -pseudo 1 -bigWig /home/jhuang/REFs/hg38.chromSizes -o auto -style chipseq    -norm 1e7 -normLength 100 -fsize 1
    done
    mv ./p783_ChIP_DonorI/p783_ChIP_DonorI.ucsc.bigWig     ./p783_ChIP_DonorI/LT_K331A_DI.bigWig
    mv ./p783_ChIP_DonorII/p783_ChIP_DonorII.ucsc.bigWig   ./p783_ChIP_DonorII/LT_K331A_DII.bigWig
    mv ./p783_input_DonorI/p783_input_DonorI.ucsc.bigWig   ./p783_input_DonorI/LT_K331A_DI_input.bigWig
    mv ./p783_input_DonorII/p783_input_DonorII.ucsc.bigWig ./p783_input_DonorII/LT_K331A_DII_input.bigWig
    
  5. peak calling, get peaks.txt

      #findPeaks <tag directory> -i <input file> -o <output file> -genome hg38
      findPeaks p783_ChIP_DonorI  -style factor    -o auto -i p783_input_DonorI
      findPeaks p783_ChIP_DonorII -style factor    -o auto -i p783_input_DonorII
      cp ../reproduce_2023/tagDirectories/ ./
      cd homer
      ln -s ../tagDirectories/NHDF_LT_Donor1 ./
      ln -s ../tagDirectories/NHDF_LT_Donor2 ./
      ln -s ../tagDirectories/NHDF_LT_Donor1_Input ./
      ln -s ../tagDirectories/NHDF_LT_Donor2_Input ./
      ln -s ../tagDirectories/Pfsk-1B_LT+sT_r1 ./
      ln -s ../tagDirectories/Pfsk-1B_LT+sT_r2 ./
      ln -s ../tagDirectories/Pfsk-1B_LT+sT_r1_Input ./
      ln -s ../tagDirectories/Pfsk-1B_LT+sT_r2_Input ./
      ln -s ../tagDirectories/HEK293_LT+sT_r2 ./
      ln -s ../tagDirectories/HEK293_LT+sT_r3 ./
      ln -s ../tagDirectories/HEK293_LT+sT_r2_Input ./
      ln -s ../tagDirectories/HEK293_LT+sT_r3_Input ./
    
      findPeaks NHDF_LT_Donor1  -style factor      -o auto -i NHDF_LT_Donor1_Input
      findPeaks NHDF_LT_Donor2  -style factor      -o auto -i NHDF_LT_Donor2_Input
    
      findPeaks Pfsk-1B_LT+sT_r1  -style factor    -o auto -i Pfsk-1B_LT+sT_r1_Input
      findPeaks Pfsk-1B_LT+sT_r2  -style factor    -o auto -i Pfsk-1B_LT+sT_r2_Input
    
      findPeaks HEK293_LT+sT_r2 -style factor      -o auto -i HEK293_LT+sT_r2_Input
      findPeaks HEK293_LT+sT_r3 -style factor      -o auto -i HEK293_LT+sT_r3_Input
    
  6. peak calling using getDifferentialPeaksReplicates.pl

    cp -r ../../reproduce_2023/tagDirectories/NHDF_LT_Donor1_Input ./
    cp -r ../../reproduce_2023/tagDirectories/NHDF_LT_Donor2_Input ./
    cp -r ../../reproduce_2023/tagDirectories/NHDF_LT_Donor1 ./
    cp -r ../../reproduce_2023/tagDirectories/NHDF_LT_Donor2 ./
    #-annStats annStats.txt 
    conda activate myperl
    getDifferentialPeaksReplicates.pl -t p783_ChIP_DonorI p783_ChIP_DonorII -i p783_input_DonorI p783_input_DonorII      -genome hg38 -use peaks.txt > peaks_K331A_LT.txt
    mv peaks_K331A_LT.txt peaks_NHDF_K331A_LT.txt
    getDifferentialPeaksReplicates.pl -t NHDF_LT_Donor1 NHDF_LT_Donor2      -i NHDF_LT_Donor1_Input NHDF_LT_Donor2_Input -genome hg38 -use peaks.txt > peaks_NHDF_LT.txt
    getDifferentialPeaksReplicates.pl -t Pfsk-1B_LT+sT_r1 Pfsk-1B_LT+sT_r2  -i Pfsk-1B_LT+sT_r1_Input Pfsk-1B_LT+sT_r2_Input -genome hg38 -use peaks.txt > peaks_PFSK-1_LT+sT.txt
    getDifferentialPeaksReplicates.pl -t HEK293_LT+sT_r2 HEK293_LT+sT_r3  -i HEK293_LT+sT_r2_Input HEK293_LT+sT_r3_Input -genome hg38 -use peaks.txt > peaks_HEK293_LT+sT.txt
    
  7. merge peaks: tried 0, 200, 500, 1000, 2000

    #http://homer.ucsd.edu/homer/ngs/mergePeaks.html
    mergePeaks -d 1000 peaks_PFSK-1_LT+sT.txt peaks_HEK293_LT+sT.txt peaks_NHDF_LT.txt -prefix celllines -venn celllines.txt -matrix celllines
    
    #-- generate bed files --
    awk 'BEGIN {OFS="\t"} {print $2,$3,$4,$1,$6}' peaks_NHDF_LT.txt > peaks_NHDF.bed;        
    awk 'BEGIN {OFS="\t"} {print $2,$3,$4,$1,$6}' peaks_HEK293_LT+sT.txt > peaks_HEK293.bed;
    awk 'BEGIN {OFS="\t"} {print $2,$3,$4,$1,$6}' peaks_PFSK-1_LT+sT.txt > peaks_PFSK-1.bed;
    awk 'BEGIN {OFS="\t"} {print $2,$3,$4,$1,$6}' celllines_peaks_HEK293_LT+sT.txt > peaks_HEK293_only.bed;
    awk 'BEGIN {OFS="\t"} {print $2,$3,$4,$1,$6}' celllines_peaks_HEK293_LT+sT.txt_peaks_NHDF_LT.txt > peaks_HEK293_NHDF.bed;
    awk 'BEGIN {OFS="\t"} {print $2,$3,$4,$1,$6}' celllines_peaks_NHDF_LT.txt > peaks_NHDF_only.bed;
    awk 'BEGIN {OFS="\t"} {print $2,$3,$4,$1,$6}' celllines_peaks_PFSK-1_LT+sT.txt > peaks_PFSK-1_only.bed;
    awk 'BEGIN {OFS="\t"} {print $2,$3,$4,$1,$6}' celllines_peaks_PFSK-1_LT+sT.txt_peaks_HEK293_LT+sT.txt > peaks_PFSK-1_HEK293.bed;
    awk 'BEGIN {OFS="\t"} {print $2,$3,$4,$1,$6}' celllines_peaks_PFSK-1_LT+sT.txt_peaks_HEK293_LT+sT.txt_peaks_NHDF_LT.txt > peaks_PFSK-1_HEK293_NHDF.bed;
    awk 'BEGIN {OFS="\t"} {print $2,$3,$4,$1,$6}' celllines_peaks_PFSK-1_LT+sT.txt_peaks_NHDF_LT.txt > peaks_PFSK-1_NHDF.bed;
    
    #-- annotate the peaks --
    annotatePeaks.pl peaks_NHDF_LT.txt hg38 > annotatedPeaks_NHDF.txt
    annotatePeaks.pl peaks_HEK293_LT+sT.txt hg38 > annotatedPeaks_HEK293.txt
    annotatePeaks.pl peaks_PFSK-1_LT+sT.txt hg38 > annotatedPeaks_PFSK-1.txt
    annotatePeaks.pl celllines_peaks_HEK293_LT+sT.txt hg38 > annotatedPeaks_HEK293_only.txt
    annotatePeaks.pl celllines_peaks_HEK293_LT+sT.txt_peaks_NHDF_LT.txt hg38 > annotatedPeaks_HEK293_NHDF.txt
    annotatePeaks.pl celllines_peaks_NHDF_LT.txt hg38 > annotatedPeaks_NHDF_only.txt
    annotatePeaks.pl celllines_peaks_PFSK-1_LT+sT.txt hg38 > annotatedPeaks_PFSK-1_only.txt
    annotatePeaks.pl celllines_peaks_PFSK-1_LT+sT.txt_peaks_HEK293_LT+sT.txt hg38 > annotatedPeaks_PFSK-1_HEK293.txt
    annotatePeaks.pl celllines_peaks_PFSK-1_LT+sT.txt_peaks_HEK293_LT+sT.txt_peaks_NHDF_LT.txt hg38 > annotatedPeaks_PFSK-1_HEK293_NHDF.txt
    annotatePeaks.pl celllines_peaks_PFSK-1_LT+sT.txt_peaks_NHDF_LT.txt hg38 > annotatedPeaks_PFSK-1_NHDF.txt
    
    mkdir ../beds_PFSK-1_HEK293_NHDF;
    for sample in peaks_HEK293_only peaks_PFSK-1_only peaks_NHDF_only    peaks_HEK293 peaks_PFSK-1 peaks_NHDF    peaks_PFSK-1_HEK293 peaks_PFSK-1_NHDF peaks_HEK293_NHDF     peaks_PFSK-1_HEK293_NHDF; do
      grep -v "cmd" ${sample}.bed > ../beds_PFSK-1_HEK293_NHDF/${sample}_.bed
    done
    
    #Chr     Start   End     PeakID (cmd=annotatePeaks.pl common_peaks_NHDF.txt hg38)        Peak Score      Strand
    ~/Tools/csv2xls-0.4/csv_to_xls.py celllines.txt annotatedPeaks_HEK293_only.txt annotatedPeaks_PFSK-1_only.txt annotatedPeaks_NHDF_only.txt    annotatedPeaks_HEK293.txt annotatedPeaks_PFSK-1.txt annotatedPeaks_NHDF.txt    annotatedPeaks_PFSK-1_HEK293.txt annotatedPeaks_PFSK-1_NHDF.txt annotatedPeaks_HEK293_NHDF.txt     annotatedPeaks_PFSK-1_HEK293_NHDF.txt  -d$'\t' -o  annotatedPeaks_PFSK-1_HEK293_NHDF.xls
    
    #IMPORTANT: DELETE the column 'Strand' marked with '+' in the merged Excel file!
    

like unlike

点赞本文的读者

还没有人对此文章表态


本文有评论

没有评论

看文章,发评论,不要沉默


© 2023 XGenes.com Impressum