Tn-seq analysis pipeline (improved)

gene_x 0 like s 343 view s

Tags: sequencing, pipeline

  1. quality control

    ./240606_VH00358_96_AAFCFJGM5/kr11/initial_mutants_a_2_S6_R1_001.fastq.gz
    ./240606_VH00358_96_AAFCFJGM5/kr11/initial_mutants_a_2_S6_R2_001.fastq.gz
    ./240606_VH00358_96_AAFCFJGM5/kr13/LB_culture_a_2_S7_R1_001.fastq.gz
    ./240606_VH00358_96_AAFCFJGM5/kr13/LB_culture_a_2_S7_R2_001.fastq.gz
    ./240606_VH00358_96_AAFCFJGM5/kr15/growthout_control_24h_a_2_S8_R1_001.fastq.gz
    ./240606_VH00358_96_AAFCFJGM5/kr15/growthout_control_24h_a_2_S8_R2_001.fastq.gz
    ./240606_VH00358_96_AAFCFJGM5/kr17/extracellular_mutants_24h_a_2_S9_R1_001.fastq.gz
    ./240606_VH00358_96_AAFCFJGM5/kr17/extracellular_mutants_24h_a_2_S9_R2_001.fastq.gz
    ./240606_VH00358_96_AAFCFJGM5/kr19/intracellular_mutants_24h_a_2_S10_R1_001.fastq.gz
    ./240606_VH00358_96_AAFCFJGM5/kr19/intracellular_mutants_24h_a_2_S10_R2_001.fastq.gz
    
    #from fastqc results of initial_mutants
    49821406
    35-161
    49821406
    35-161
    
  2. modify the tpp scripts

    vim ~/.local/lib/python3.10/site-packages/pytpp/tpp_tools.py
    #search for "DEBUG"
    #-maxreads 10000 or not_given for take all!
    #-primer AGATGTGTATAAGAGACAG     the default primer of Tn5 is TAAGAGACAG!
    #-primer-start-window 0,159  set 0,159 as default!
    
  3. run Transposon Position Profiling (TPP) https://transit.readthedocs.io/en/latest/transit_running.html; https://orca2.tamu.edu/tom/iLab.html

    # Break-down of total reads (49821406):
    #  29481783 reads (59.2%) lack the expected Tn prefix
    # Break-down of trimmed reads with valid Tn prefix (20339623):
    
    #primer AGCTTCAGGGTTGAGATGTGTATAAGAGACAG
    #  primer_matches: 0 reads (0.0%) contain CTAGAGGGCCCAATTCGCCCTATAGTGAGT (Himar1)
    #  vector_matches: 0 reads (0.0%) contain CTAGACCGTCCAGTCTGGCAGGCCGGAAAC (phiMycoMarT7)
    #  adapter_matches: 0 reads (0.0%) contain GATCGGAAGAGCACACGTCTGAACTCCAGTCAC (Illumina/TruSeq index)
    #  misprimed_reads: 0 reads (0.0%) contain Himar1 prefix but don't end in TGTTA
    
    #kr11.trimmed1_failed_trim    22072406
    #-rw-rw-r-- 1 jhuang jhuang  2,7G Jun 11 15:44 kr11.trimmed1    20339623
    #-rw-rw-r-- 1 jhuang jhuang  2,9G Jun 11 15:46 kr11.trimmed2    20339623
    #cat initial_mutants_a_2_S6_R1_001.fastq | echo $((`wc -l` / 4))  49821406=29481783 reads (59.2%) + 20339623 = 49821406
    
    #vs.
    
    # Break-down of total reads (49821406):
    #  29356859 reads (58.9%) lack the expected Tn prefix
    # Break-down of trimmed reads with valid Tn prefix (20464547):
    #  primer_matches: 0 reads (0.0%) contain CTAGAGGGCCCAATTCGCCCTATAGTGAGT (Himar1)
    #  vector_matches: 0 reads (0.0%) contain CTAGACCGTCCAGTCTGGCAGGCCGGAAAC (phiMycoMarT7)
    #  adapter_matches: 0 reads (0.0%) contain GATCGGAAGAGCACACGTCTGAACTCCAGTCAC (Illumina/TruSeq index)
    #  misprimed_reads: 0 reads (0.0%) contain Himar1 prefix but don't end in TGTTA
    # read_length: 100 bp
    # mean_R1_genomic_length: 73.4 bp
    # mean_R2_genomic_length: 86.4 bp
    
    #conda deactivate
    ##Test AGCTTCAGGGTTGAGATGTGTATAAGAGACAG --> TAAGAGACAG, the results are similar!
    #python3 ~/.local/bin/tpp -bwa /usr/bin/bwa -protocol Tn5 -ref WA-314_m_.fasta -reads1 240606_VH00358_96_AAFCFJGM5/kr11/initial_mutants_a_2_S6_R1_001.fastq.gz -reads2 240606_VH00358_96_AAFCFJGM5/kr11/initial_mutants_a_2_S6_R2_001.fastq.gz -output kr11_10nt_primer -primer TAAGAGACAG -mismatches 1 -bwa-alg mem -replicon-ids contig_2_10,contig_2_9,contig_2_8,contig_2_7,contig_2_6,contig_2_5,contig_2_3,contig_2_2,contig_5_10,contig_5_11,contig_5_12,contig_5_13,contig_5_15,contig_5_16,contig_5_17,contig_5_18,contig_5_9,contig_5_8,contig_5_7,contig_5_6,contig_5_5,contig_5_4,contig_5_3,contig_5_2,contig_5_1,contig_4_2,contig_4_1,contig_3_59,contig_3_58,contig_3_57,contig_3_56,contig_3_55,contig_3_54,contig_3_53,contig_3_52,contig_3_51,contig_3_50,contig_3_49,contig_3_48,contig_3_47,contig_3_46,contig_3_44,contig_3_43,contig_3_42,contig_3_41,contig_3_40,contig_3_39,contig_3_38,contig_3_37,contig_3_36,contig_3_35,contig_3_34,contig_3_33,contig_3_32,contig_3_31,contig_3_30,contig_3_29,contig_3_28,contig_3_27,contig_3_26,contig_3_25,contig_3_24,contig_3_23,contig_3_22,contig_3_21,contig_3_20,contig_3_17,contig_3_16,contig_3_15,contig_3_14,contig_3_13,contig_3_12,contig_3_11,contig_3_9,contig_3_8,contig_3_7,contig_3_6,contig_3_5,contig_3_3,contig_3_2,contig_3_1,contig_1_48,contig_1_47,contig_1_46,contig_1_45,contig_1_44,contig_1_43,contig_1_42,contig_1_41,contig_1_40,contig_1_39,contig_1_38,contig_1_37,contig_1_34,contig_1_33,contig_1_32,contig_1_31,contig_1_28,contig_1_27,contig_1_26,contig_1_25,contig_1_24,contig_1_22,contig_1_20,contig_1_19,contig_1_18,contig_1_17,contig_1_16,contig_1_15,contig_1_14,contig_1_13,contig_1_12,contig_1_10,contig_1_8,contig_1_7,contig_1_6,contig_1_5,contig_1_4,contig_1_3,contig_1_2,contig_1_1,contig_C8715,contig_C8943,contig_C9371,contig_C8939,contig_C9357,contig_C8991,contig_C9445,contig_C8689
    #mv tpp.cfg kr11_10nt_primer_tpp.cfg
    
    #for initial_mutants, note that "-primer-start-window 0,161" is invalid!
    python3 ~/.local/bin/tpp -bwa /usr/bin/bwa -protocol Tn5 -ref WA-314_m_.fasta -reads1 240606_VH00358_96_AAFCFJGM5/kr11/initial_mutants_a_2_S6_R1_001.fastq.gz -reads2 240606_VH00358_96_AAFCFJGM5/kr11/initial_mutants_a_2_S6_R2_001.fastq.gz -output initial_mutants -primer AGCTTCAGGGTTGAGATGTGTATAAGAGACAG -mismatches 1 -primer-start-window 0,161 -bwa-alg mem -replicon-ids contig_2_10,contig_2_9,contig_2_8,contig_2_7,contig_2_6,contig_2_5,contig_2_3,contig_2_2,contig_5_10,contig_5_11,contig_5_12,contig_5_13,contig_5_15,contig_5_16,contig_5_17,contig_5_18,contig_5_9,contig_5_8,contig_5_7,contig_5_6,contig_5_5,contig_5_4,contig_5_3,contig_5_2,contig_5_1,contig_4_2,contig_4_1,contig_3_59,contig_3_58,contig_3_57,contig_3_56,contig_3_55,contig_3_54,contig_3_53,contig_3_52,contig_3_51,contig_3_50,contig_3_49,contig_3_48,contig_3_47,contig_3_46,contig_3_44,contig_3_43,contig_3_42,contig_3_41,contig_3_40,contig_3_39,contig_3_38,contig_3_37,contig_3_36,contig_3_35,contig_3_34,contig_3_33,contig_3_32,contig_3_31,contig_3_30,contig_3_29,contig_3_28,contig_3_27,contig_3_26,contig_3_25,contig_3_24,contig_3_23,contig_3_22,contig_3_21,contig_3_20,contig_3_17,contig_3_16,contig_3_15,contig_3_14,contig_3_13,contig_3_12,contig_3_11,contig_3_9,contig_3_8,contig_3_7,contig_3_6,contig_3_5,contig_3_3,contig_3_2,contig_3_1,contig_1_48,contig_1_47,contig_1_46,contig_1_45,contig_1_44,contig_1_43,contig_1_42,contig_1_41,contig_1_40,contig_1_39,contig_1_38,contig_1_37,contig_1_34,contig_1_33,contig_1_32,contig_1_31,contig_1_28,contig_1_27,contig_1_26,contig_1_25,contig_1_24,contig_1_22,contig_1_20,contig_1_19,contig_1_18,contig_1_17,contig_1_16,contig_1_15,contig_1_14,contig_1_13,contig_1_12,contig_1_10,contig_1_8,contig_1_7,contig_1_6,contig_1_5,contig_1_4,contig_1_3,contig_1_2,contig_1_1,contig_C8715,contig_C8943,contig_C9371,contig_C8939,contig_C9357,contig_C8991,contig_C9445,contig_C8689
    mv tpp.cfg initial_mutants_tpp.cfg
    
    #primer_start_window 0,159
    # Break-down of total reads (49821406):
    #  29481783 reads (59.2%) lack the expected Tn prefix
    # Break-down of trimmed reads with valid Tn prefix (20339623):
    #  primer_matches: 0 reads (0.0%) contain CTAGAGGGCCCAATTCGCCCTATAGTGAGT (Himar1)
    #  vector_matches: 0 reads (0.0%) contain CTAGACCGTCCAGTCTGGCAGGCCGGAAAC (phiMycoMarT7)
    #  adapter_matches: 0 reads (0.0%) contain GATCGGAAGAGCACACGTCTGAACTCCAGTCAC (Illumina/TruSeq index)
    #  misprimed_reads: 0 reads (0.0%) contain Himar1 prefix but don't end in TGTTA
    
    #for LB_culture
    python3 ~/.local/bin/tpp -bwa /usr/bin/bwa -protocol Tn5 -ref WA-314_m_.fasta -reads1 ./240606_VH00358_96_AAFCFJGM5/kr13/LB_culture_a_2_S7_R1_001.fastq.gz -reads2 ./240606_VH00358_96_AAFCFJGM5/kr13/LB_culture_a_2_S7_R2_001.fastq.gz -output LB_culture -primer AGCTTCAGGGTTGAGATGTGTATAAGAGACAG -mismatches 1 -primer-start-window 0,161 -bwa-alg mem -replicon-ids contig_2_10,contig_2_9,contig_2_8,contig_2_7,contig_2_6,contig_2_5,contig_2_3,contig_2_2,contig_5_10,contig_5_11,contig_5_12,contig_5_13,contig_5_15,contig_5_16,contig_5_17,contig_5_18,contig_5_9,contig_5_8,contig_5_7,contig_5_6,contig_5_5,contig_5_4,contig_5_3,contig_5_2,contig_5_1,contig_4_2,contig_4_1,contig_3_59,contig_3_58,contig_3_57,contig_3_56,contig_3_55,contig_3_54,contig_3_53,contig_3_52,contig_3_51,contig_3_50,contig_3_49,contig_3_48,contig_3_47,contig_3_46,contig_3_44,contig_3_43,contig_3_42,contig_3_41,contig_3_40,contig_3_39,contig_3_38,contig_3_37,contig_3_36,contig_3_35,contig_3_34,contig_3_33,contig_3_32,contig_3_31,contig_3_30,contig_3_29,contig_3_28,contig_3_27,contig_3_26,contig_3_25,contig_3_24,contig_3_23,contig_3_22,contig_3_21,contig_3_20,contig_3_17,contig_3_16,contig_3_15,contig_3_14,contig_3_13,contig_3_12,contig_3_11,contig_3_9,contig_3_8,contig_3_7,contig_3_6,contig_3_5,contig_3_3,contig_3_2,contig_3_1,contig_1_48,contig_1_47,contig_1_46,contig_1_45,contig_1_44,contig_1_43,contig_1_42,contig_1_41,contig_1_40,contig_1_39,contig_1_38,contig_1_37,contig_1_34,contig_1_33,contig_1_32,contig_1_31,contig_1_28,contig_1_27,contig_1_26,contig_1_25,contig_1_24,contig_1_22,contig_1_20,contig_1_19,contig_1_18,contig_1_17,contig_1_16,contig_1_15,contig_1_14,contig_1_13,contig_1_12,contig_1_10,contig_1_8,contig_1_7,contig_1_6,contig_1_5,contig_1_4,contig_1_3,contig_1_2,contig_1_1,contig_C8715,contig_C8943,contig_C9371,contig_C8939,contig_C9357,contig_C8991,contig_C9445,contig_C8689
    mv tpp.cfg LB_culture_tpp.cfg
    
    #for growthout_control_24h
    python3 ~/.local/bin/tpp -bwa /usr/bin/bwa -protocol Tn5 -ref WA-314_m_.fasta -reads1 ./240606_VH00358_96_AAFCFJGM5/kr15/growthout_control_24h_a_2_S8_R1_001.fastq.gz -reads2 ./240606_VH00358_96_AAFCFJGM5/kr15/growthout_control_24h_a_2_S8_R2_001.fastq.gz -output growthout_control_24h -primer AGCTTCAGGGTTGAGATGTGTATAAGAGACAG -mismatches 1 -primer-start-window 0,161 -bwa-alg mem -replicon-ids contig_2_10,contig_2_9,contig_2_8,contig_2_7,contig_2_6,contig_2_5,contig_2_3,contig_2_2,contig_5_10,contig_5_11,contig_5_12,contig_5_13,contig_5_15,contig_5_16,contig_5_17,contig_5_18,contig_5_9,contig_5_8,contig_5_7,contig_5_6,contig_5_5,contig_5_4,contig_5_3,contig_5_2,contig_5_1,contig_4_2,contig_4_1,contig_3_59,contig_3_58,contig_3_57,contig_3_56,contig_3_55,contig_3_54,contig_3_53,contig_3_52,contig_3_51,contig_3_50,contig_3_49,contig_3_48,contig_3_47,contig_3_46,contig_3_44,contig_3_43,contig_3_42,contig_3_41,contig_3_40,contig_3_39,contig_3_38,contig_3_37,contig_3_36,contig_3_35,contig_3_34,contig_3_33,contig_3_32,contig_3_31,contig_3_30,contig_3_29,contig_3_28,contig_3_27,contig_3_26,contig_3_25,contig_3_24,contig_3_23,contig_3_22,contig_3_21,contig_3_20,contig_3_17,contig_3_16,contig_3_15,contig_3_14,contig_3_13,contig_3_12,contig_3_11,contig_3_9,contig_3_8,contig_3_7,contig_3_6,contig_3_5,contig_3_3,contig_3_2,contig_3_1,contig_1_48,contig_1_47,contig_1_46,contig_1_45,contig_1_44,contig_1_43,contig_1_42,contig_1_41,contig_1_40,contig_1_39,contig_1_38,contig_1_37,contig_1_34,contig_1_33,contig_1_32,contig_1_31,contig_1_28,contig_1_27,contig_1_26,contig_1_25,contig_1_24,contig_1_22,contig_1_20,contig_1_19,contig_1_18,contig_1_17,contig_1_16,contig_1_15,contig_1_14,contig_1_13,contig_1_12,contig_1_10,contig_1_8,contig_1_7,contig_1_6,contig_1_5,contig_1_4,contig_1_3,contig_1_2,contig_1_1,contig_C8715,contig_C8943,contig_C9371,contig_C8939,contig_C9357,contig_C8991,contig_C9445,contig_C8689
    mv tpp.cfg growthout_control_24h_tpp.cfg
    
    #for extracellular_mutants_24h
    python3 ~/.local/bin/tpp -bwa /usr/bin/bwa -protocol Tn5 -ref WA-314_m_.fasta -reads1 ./240606_VH00358_96_AAFCFJGM5/kr17/extracellular_mutants_24h_a_2_S9_R1_001.fastq.gz -reads2 ./240606_VH00358_96_AAFCFJGM5/kr17/extracellular_mutants_24h_a_2_S9_R2_001.fastq.gz -output initial_mutants -primer AGCTTCAGGGTTGAGATGTGTATAAGAGACAG -mismatches 1 -primer-start-window 0,161 -bwa-alg mem -replicon-ids contig_2_10,contig_2_9,contig_2_8,contig_2_7,contig_2_6,contig_2_5,contig_2_3,contig_2_2,contig_5_10,contig_5_11,contig_5_12,contig_5_13,contig_5_15,contig_5_16,contig_5_17,contig_5_18,contig_5_9,contig_5_8,contig_5_7,contig_5_6,contig_5_5,contig_5_4,contig_5_3,contig_5_2,contig_5_1,contig_4_2,contig_4_1,contig_3_59,contig_3_58,contig_3_57,contig_3_56,contig_3_55,contig_3_54,contig_3_53,contig_3_52,contig_3_51,contig_3_50,contig_3_49,contig_3_48,contig_3_47,contig_3_46,contig_3_44,contig_3_43,contig_3_42,contig_3_41,contig_3_40,contig_3_39,contig_3_38,contig_3_37,contig_3_36,contig_3_35,contig_3_34,contig_3_33,contig_3_32,contig_3_31,contig_3_30,contig_3_29,contig_3_28,contig_3_27,contig_3_26,contig_3_25,contig_3_24,contig_3_23,contig_3_22,contig_3_21,contig_3_20,contig_3_17,contig_3_16,contig_3_15,contig_3_14,contig_3_13,contig_3_12,contig_3_11,contig_3_9,contig_3_8,contig_3_7,contig_3_6,contig_3_5,contig_3_3,contig_3_2,contig_3_1,contig_1_48,contig_1_47,contig_1_46,contig_1_45,contig_1_44,contig_1_43,contig_1_42,contig_1_41,contig_1_40,contig_1_39,contig_1_38,contig_1_37,contig_1_34,contig_1_33,contig_1_32,contig_1_31,contig_1_28,contig_1_27,contig_1_26,contig_1_25,contig_1_24,contig_1_22,contig_1_20,contig_1_19,contig_1_18,contig_1_17,contig_1_16,contig_1_15,contig_1_14,contig_1_13,contig_1_12,contig_1_10,contig_1_8,contig_1_7,contig_1_6,contig_1_5,contig_1_4,contig_1_3,contig_1_2,contig_1_1,contig_C8715,contig_C8943,contig_C9371,contig_C8939,contig_C9357,contig_C8991,contig_C9445,contig_C8689
    mv tpp.cfg extracellular_mutants_24h_tpp.cfg
    
    #for intracellular_mutants_24h
    python3 ~/.local/bin/tpp -bwa /usr/bin/bwa -protocol Tn5 -ref WA-314_m_.fasta -reads1 ./240606_VH00358_96_AAFCFJGM5/kr19/intracellular_mutants_24h_a_2_S10_R1_001.fastq.gz -reads2 ./240606_VH00358_96_AAFCFJGM5/kr19/intracellular_mutants_24h_a_2_S10_R2_001.fastq.gz -output initial_mutants -primer AGCTTCAGGGTTGAGATGTGTATAAGAGACAG -mismatches 1 -primer-start-window 0,161 -bwa-alg mem -replicon-ids contig_2_10,contig_2_9,contig_2_8,contig_2_7,contig_2_6,contig_2_5,contig_2_3,contig_2_2,contig_5_10,contig_5_11,contig_5_12,contig_5_13,contig_5_15,contig_5_16,contig_5_17,contig_5_18,contig_5_9,contig_5_8,contig_5_7,contig_5_6,contig_5_5,contig_5_4,contig_5_3,contig_5_2,contig_5_1,contig_4_2,contig_4_1,contig_3_59,contig_3_58,contig_3_57,contig_3_56,contig_3_55,contig_3_54,contig_3_53,contig_3_52,contig_3_51,contig_3_50,contig_3_49,contig_3_48,contig_3_47,contig_3_46,contig_3_44,contig_3_43,contig_3_42,contig_3_41,contig_3_40,contig_3_39,contig_3_38,contig_3_37,contig_3_36,contig_3_35,contig_3_34,contig_3_33,contig_3_32,contig_3_31,contig_3_30,contig_3_29,contig_3_28,contig_3_27,contig_3_26,contig_3_25,contig_3_24,contig_3_23,contig_3_22,contig_3_21,contig_3_20,contig_3_17,contig_3_16,contig_3_15,contig_3_14,contig_3_13,contig_3_12,contig_3_11,contig_3_9,contig_3_8,contig_3_7,contig_3_6,contig_3_5,contig_3_3,contig_3_2,contig_3_1,contig_1_48,contig_1_47,contig_1_46,contig_1_45,contig_1_44,contig_1_43,contig_1_42,contig_1_41,contig_1_40,contig_1_39,contig_1_38,contig_1_37,contig_1_34,contig_1_33,contig_1_32,contig_1_31,contig_1_28,contig_1_27,contig_1_26,contig_1_25,contig_1_24,contig_1_22,contig_1_20,contig_1_19,contig_1_18,contig_1_17,contig_1_16,contig_1_15,contig_1_14,contig_1_13,contig_1_12,contig_1_10,contig_1_8,contig_1_7,contig_1_6,contig_1_5,contig_1_4,contig_1_3,contig_1_2,contig_1_1,contig_C8715,contig_C8943,contig_C9371,contig_C8939,contig_C9357,contig_C8991,contig_C9445,contig_C8689
    mv tpp.cfg intracellular_mutants_24h_tpp.cfg
    
    ##Ignoring the reads_R1
    #python3 ~/.local/bin/tpp -bwa /usr/bin/bwa -protocol Tn5 -ref contig_1_1.fna -reads1 original_reads_rc_R2.fastq -output #contig_1_1_rc_only_reads2 -primer CCTACAACAAAGCTCTCATCAAC -mismatches 1 -bwa-alg mem -replicon-ids contig_1_1
    #mv tpp.cfg contig_1_1_rc_only_reads2.tpp.cfg
    
  4. generate statistics tables in Excel-format

    cp initial_mutants_rep1.tn_stats initial_mutants_rep1.tn_stats_
    #Delete all general statistics before the table data in initial_mutants_rep1.tn_stats_; delete the content after "# FR_corr (Fwd templates vs. Rev templates):"
    sed -i 's/read_count (TA sites only, for Himar1)/read_counts/g' *.tn_stats_
    sed -i 's/NZ_mean (among templates)/NZ_mean (mean template count over non-zero TA sites)/g' *.tn_stats_
    python3 ../parse_tn_stats.py initial_mutants_rep1.tn_stats_ initial_mutants_rep1.tn_stats.xlsx
    python3 ../parse_tn_stats.py LB_culture_rep1.tn_stats_ LB_culture_rep1.tn_stats.xlsx
    #calculate the sum of the first and second columns by "=SUM(B2:B130)" and "=SUM(C2:C130)"  441057 and 276060
    mkdir initial_mutants_rep1_wig
    mv *.wig initial_mutants_rep1_wig/
    zip -r initial_mutants_rep1_wig.zip initial_mutants_rep1_wig/
    #The counts-files are too big, not nessasary to send:
    #~/Tools/csv2xls-0.4/csv_to_xls.py initial_mutants_rep1.tn_stats_ *.counts -d$',' -o initial_mutants_rep1.stats.xls;
    #   contig_1_10 699 411
    #   contig_1_8  3206    2031
    #   contig_1_7  3787    2376
    #   contig_1_6  4604    2871
    #   contig_1_5  2794    1765
    #   contig_1_4  83  58
    #   contig_1_3  2944    1882
    #   contig_1_2  15446   9678
    #   contig_1_1  14391   8954
    

like unlike

点赞本文的读者

还没有人对此文章表态


本文有评论

没有评论

看文章,发评论,不要沉默


© 2023 XGenes.com Impressum