Mapping reads on reference genomes

gene_x 0 like s 50 view s

Tags: processing

  1. Search for the reference complete genome at https://www.ncbi.nlm.nih.gov/genomes/GenomesGroup.cgi: Enterovirus type B sequences, Parvovirus B19 sequences, Rhinovirus A sequences and Parainfluenza type 3

    #- Sort and Index the BAM File:
    #    samtools sort -o sorted.bam input.bam
    #    samtools index sorted.bam
    #- Call Variants:
    #    bcftools mpileup -Ou -f reference.fasta sorted.bam | bcftools call -mv -Oz -o calls.vcf.gz
    #    bcftools index calls.vcf.gz
    #- Generate Consensus FASTA:
    #    bcftools consensus -f reference.fasta calls.vcf.gz -o consensus.fasta
    
  2. Align Reads to Reference Genome

    First, align the reads from isolate 2 to the genome of isolate 1. For example, using BWA:

    bwa index Enterovirus_B.fasta
    bwa index Parvovirus_B19.fasta
    bwa index Rhinovirus_A.fasta
    bwa index Parainfluenza_3.fasta
    #bwa mem Enterovirus_B.fasta isolate2_reads.fastq > aligned.sam
    #bwa mem Enterovirus_B.fasta isolate2_reads_1.fastq isolate2_reads_2.fastq > aligned.sam
    bwa mem -t 64 Enterovirus_B.fasta ./240822_VH00358_106_AAFKTG7M5/p20562_1/AW_S22_R1_001.fastq.gz ./240822_VH00358_106_AAFKTG7M5/p20562_1/AW_S22_R2_001.fastq.gz | samtools view -Sb - > MiSeq_p20562_AW_on_Enterovirus_B.bam
    bwa mem -t 64 Enterovirus_B.fasta ./240822_VH00358_106_AAFKTG7M5/p20562_2/GE_S23_R1_001.fastq.gz ./240822_VH00358_106_AAFKTG7M5/p20562_2/GE_S23_R2_001.fastq.gz | samtools view -Sb - > MiSeq_p20562_GE_on_Enterovirus_B.bam
    bwa mem -t 64 Enterovirus_B.fasta ./240822_VH00358_106_AAFKTG7M5/p20562_3/IF_S24_R1_001.fastq.gz ./240822_VH00358_106_AAFKTG7M5/p20562_3/IF_S24_R2_001.fastq.gz | samtools view -Sb - > MiSeq_p20562_IF_on_Enterovirus_B.bam
    bwa mem -t 64 Enterovirus_B.fasta ./20240822_FS10003086_54_BTR99504-1132/Alignment_Imported_1/20240823_184235/Fastq/42_S1_L001_R1_001.fastq.gz ./20240822_FS10003086_54_BTR99504-1132/Alignment_Imported_1/20240823_184235/Fastq/42_S1_L001_R2_001.fastq.gz | samtools view -Sb - > iSeq_42_S1_on_Enterovirus_B.bam
    bwa mem -t 64 Enterovirus_B.fasta ./20240822_FS10003086_54_BTR99504-1132/Alignment_Imported_1/20240823_184235/Fastq/43_S2_L001_R1_001.fastq.gz ./20240822_FS10003086_54_BTR99504-1132/Alignment_Imported_1/20240823_184235/Fastq/43_S2_L001_R2_001.fastq.gz | samtools view -Sb - > iSeq_43_S2_on_Enterovirus_B.bam
    bwa mem -t 64 Enterovirus_B.fasta ./20240822_FS10003086_54_BTR99504-1132/Alignment_Imported_1/20240823_184235/Fastq/44_S3_L001_R1_001.fastq.gz ./20240822_FS10003086_54_BTR99504-1132/Alignment_Imported_1/20240823_184235/Fastq/44_S3_L001_R2_001.fastq.gz | samtools view -Sb - > iSeq_44_S3_on_Enterovirus_B.bam
    
    bwa mem -t 64 Parvovirus_B19.fasta ./240822_VH00358_106_AAFKTG7M5/p20562_1/AW_S22_R1_001.fastq.gz ./240822_VH00358_106_AAFKTG7M5/p20562_1/AW_S22_R2_001.fastq.gz | samtools view -Sb - > MiSeq_p20562_AW_on_Parvovirus_B19.bam
    bwa mem -t 64 Parvovirus_B19.fasta ./240822_VH00358_106_AAFKTG7M5/p20562_2/GE_S23_R1_001.fastq.gz ./240822_VH00358_106_AAFKTG7M5/p20562_2/GE_S23_R2_001.fastq.gz | samtools view -Sb - > MiSeq_p20562_GE_on_Parvovirus_B19.bam
    bwa mem -t 64 Parvovirus_B19.fasta ./240822_VH00358_106_AAFKTG7M5/p20562_3/IF_S24_R1_001.fastq.gz ./240822_VH00358_106_AAFKTG7M5/p20562_3/IF_S24_R2_001.fastq.gz | samtools view -Sb - > MiSeq_p20562_IF_on_Parvovirus_B19.bam
    bwa mem -t 64 Parvovirus_B19.fasta ./20240822_FS10003086_54_BTR99504-1132/Alignment_Imported_1/20240823_184235/Fastq/42_S1_L001_R1_001.fastq.gz ./20240822_FS10003086_54_BTR99504-1132/Alignment_Imported_1/20240823_184235/Fastq/42_S1_L001_R2_001.fastq.gz | samtools view -Sb - > iSeq_42_S1_on_Parvovirus_B19.bam
    bwa mem -t 64 Parvovirus_B19.fasta ./20240822_FS10003086_54_BTR99504-1132/Alignment_Imported_1/20240823_184235/Fastq/43_S2_L001_R1_001.fastq.gz ./20240822_FS10003086_54_BTR99504-1132/Alignment_Imported_1/20240823_184235/Fastq/43_S2_L001_R2_001.fastq.gz | samtools view -Sb - > iSeq_43_S2_on_Parvovirus_B19.bam
    bwa mem -t 64 Parvovirus_B19.fasta ./20240822_FS10003086_54_BTR99504-1132/Alignment_Imported_1/20240823_184235/Fastq/44_S3_L001_R1_001.fastq.gz ./20240822_FS10003086_54_BTR99504-1132/Alignment_Imported_1/20240823_184235/Fastq/44_S3_L001_R2_001.fastq.gz | samtools view -Sb - > iSeq_44_S3_on_Parvovirus_B19.bam
    
    bwa mem -t 64 Rhinovirus_A.fasta ./240822_VH00358_106_AAFKTG7M5/p20562_1/AW_S22_R1_001.fastq.gz ./240822_VH00358_106_AAFKTG7M5/p20562_1/AW_S22_R2_001.fastq.gz | samtools view -Sb - > MiSeq_p20562_AW_on_Rhinovirus_A.bam
    bwa mem -t 64 Rhinovirus_A.fasta ./240822_VH00358_106_AAFKTG7M5/p20562_2/GE_S23_R1_001.fastq.gz ./240822_VH00358_106_AAFKTG7M5/p20562_2/GE_S23_R2_001.fastq.gz | samtools view -Sb - > MiSeq_p20562_GE_on_Rhinovirus_A.bam
    bwa mem -t 64 Rhinovirus_A.fasta ./240822_VH00358_106_AAFKTG7M5/p20562_3/IF_S24_R1_001.fastq.gz ./240822_VH00358_106_AAFKTG7M5/p20562_3/IF_S24_R2_001.fastq.gz | samtools view -Sb - > MiSeq_p20562_IF_on_Rhinovirus_A.bam
    bwa mem -t 64 Rhinovirus_A.fasta ./20240822_FS10003086_54_BTR99504-1132/Alignment_Imported_1/20240823_184235/Fastq/42_S1_L001_R1_001.fastq.gz ./20240822_FS10003086_54_BTR99504-1132/Alignment_Imported_1/20240823_184235/Fastq/42_S1_L001_R2_001.fastq.gz | samtools view -Sb - > iSeq_42_S1_on_Rhinovirus_A.bam
    bwa mem -t 64 Rhinovirus_A.fasta ./20240822_FS10003086_54_BTR99504-1132/Alignment_Imported_1/20240823_184235/Fastq/43_S2_L001_R1_001.fastq.gz ./20240822_FS10003086_54_BTR99504-1132/Alignment_Imported_1/20240823_184235/Fastq/43_S2_L001_R2_001.fastq.gz | samtools view -Sb - > iSeq_43_S2_on_Rhinovirus_A.bam
    bwa mem -t 64 Rhinovirus_A.fasta ./20240822_FS10003086_54_BTR99504-1132/Alignment_Imported_1/20240823_184235/Fastq/44_S3_L001_R1_001.fastq.gz ./20240822_FS10003086_54_BTR99504-1132/Alignment_Imported_1/20240823_184235/Fastq/44_S3_L001_R2_001.fastq.gz | samtools view -Sb - > iSeq_44_S3_on_Rhinovirus_A.bam
    
    bwa mem -t 64 Parainfluenza_3.fasta ./240822_VH00358_106_AAFKTG7M5/p20562_1/AW_S22_R1_001.fastq.gz ./240822_VH00358_106_AAFKTG7M5/p20562_1/AW_S22_R2_001.fastq.gz | samtools view -Sb - > MiSeq_p20562_AW_on_Parainfluenza_3.bam
    bwa mem -t 64 Parainfluenza_3.fasta ./240822_VH00358_106_AAFKTG7M5/p20562_2/GE_S23_R1_001.fastq.gz ./240822_VH00358_106_AAFKTG7M5/p20562_2/GE_S23_R2_001.fastq.gz | samtools view -Sb - > MiSeq_p20562_GE_on_Parainfluenza_3.bam
    bwa mem -t 64 Parainfluenza_3.fasta ./240822_VH00358_106_AAFKTG7M5/p20562_3/IF_S24_R1_001.fastq.gz ./240822_VH00358_106_AAFKTG7M5/p20562_3/IF_S24_R2_001.fastq.gz | samtools view -Sb - > MiSeq_p20562_IF_on_Parainfluenza_3.bam
    bwa mem -t 64 Parainfluenza_3.fasta ./20240822_FS10003086_54_BTR99504-1132/Alignment_Imported_1/20240823_184235/Fastq/42_S1_L001_R1_001.fastq.gz ./20240822_FS10003086_54_BTR99504-1132/Alignment_Imported_1/20240823_184235/Fastq/42_S1_L001_R2_001.fastq.gz | samtools view -Sb - > iSeq_42_S1_on_Parainfluenza_3.bam
    bwa mem -t 64 Parainfluenza_3.fasta ./20240822_FS10003086_54_BTR99504-1132/Alignment_Imported_1/20240823_184235/Fastq/43_S2_L001_R1_001.fastq.gz ./20240822_FS10003086_54_BTR99504-1132/Alignment_Imported_1/20240823_184235/Fastq/43_S2_L001_R2_001.fastq.gz | samtools view -Sb - > iSeq_43_S2_on_Parainfluenza_3.bam
    bwa mem -t 64 Parainfluenza_3.fasta ./20240822_FS10003086_54_BTR99504-1132/Alignment_Imported_1/20240823_184235/Fastq/44_S3_L001_R1_001.fastq.gz ./20240822_FS10003086_54_BTR99504-1132/Alignment_Imported_1/20240823_184235/Fastq/44_S3_L001_R2_001.fastq.gz | samtools view -Sb - > iSeq_44_S3_on_Parainfluenza_3.bam
    

    or, use vrap to generate input.bam

    #for bowtie (1 DB): --host Bacillus_cereus.fasta
    #for blast (5 DBs): --virus=Acinetobacter_baumannii.fasta -n /mnt/h1/jhuang/blast/nt -a /mnt/h1/jhuang/blast/nr, and two default download_db.py
    #A6_chr_plasmid.fasta reads filtered, should only the contamination reads remaining!
    vrap/vrap.py -1 ./results_ATCC19606/trimmed/A10_CraA_HQ_trimmed_P_1.fastq.gz -2 ./results_ATCC19606/trimmed/A10_CraA_HQ_trimmed_P_2.fastq.gz -o A10_vrap_host_A6 --host A6_chr_plasmid.fasta --noblast -t 55
    
  3. Convert SAM to BAM, Sort and Index

    Convert the SAM file to BAM, sort it, and create an index:

    samtools view -Sb mapped > aligned.bam
    samtools sort aligned.bam -o sorted.bam
    samtools index sorted.bam
    
  4. Coverage Analysis

    Check the coverage of the genome regions. Regions with no coverage will be masked later.

    #bedtools genomecov -ibam sorted.bam -bg > coverage.bed
    #BUG above: By default, Bedtools doesn't explicitly list regions with zero coverage in the output of genomecov. To get around this, you can use the -bga option with genomecov. This option ensures that regions with zero coverage are also included in the output. Here's how you can modify your command:
    bedtools genomecov -ibam sorted.bam -bga > coverage.bed
    
  5. Create a Bed File for Masking

    Create a BED file that specifies regions with no or low coverage. Assume that any region with coverage less than a certain threshold (e.g., 1x) should be masked:

    awk '$4 < 1 {print $1 "\t" $2 "\t" $3}' coverage.bed > low_coverage.bed
    
  6. Generate Consensus Sequence

    Generate a consensus sequence, using bcftools, and mask regions with low or no coverage:

    bcftools mpileup -Ou -f ../../CP059040.fasta sorted.bam | bcftools call -mv --ploidy 1 -Oz -o calls.vcf.gz
    bcftools index calls.vcf.gz
    bcftools consensus -f ../../CP059040.fasta -m low_coverage.bed calls.vcf.gz -o consensus.fasta
    #The site CP059040:4 overlaps with another variant, skipping...
    #The site CP059040:3125036 overlaps with another variant, skipping...
    #Applied 58 variants
    
    bcftools mpileup -Ou -f ../../A6_chr_plasmid.fasta sorted.bam | bcftools call -mv --ploidy 1 -Oz -o calls.vcf.gz
    bcftools index calls.vcf.gz
    bcftools consensus -f ../../A6_chr_plasmid.fasta -m low_coverage.bed calls.vcf.gz -o consensus.fasta
    #The site seq00000000:4 overlaps with another variant, skipping...
    #The site seq00000000:3125036 overlaps with another variant, skipping...
    #Applied 61 variants
    #--> TODO: could report the following regions are not covered in A10 comparing to A6.
    #seq00000000     364666  365765
    #seq00000000     2810907 2813323
    #seq00000000     2813552 2813779
    #seq00000000     2813923 2819663
    #seq00000000     2821839 2831921
    #seq00000000     2832750 2832753
    #seq00000000     2833977 2860354
    #seq00000000     2860505 2861780
    #seq00000000     2861931 2861951
    #seq00000000     2862102 2862234
    #seq00000000     2862385 2863434
    #seq00000000     3124939 3124943
    #seq00000000     3125009 3125010
    

    In this command, the -m option in bcftools consensus is used to specify a BED file for masking low-coverage regions.

  7. Coverage plot

    for sample in 4132-Leber_CAP28 4132-Leber_CAP45 4132-Leber_NODE328; do
        bowtie2-build ${sample}.fasta ${sample}.fasta
        bowtie2 -1 ../trimmed_paired/4132-Leber_S3_L001_R1.fastq.gz -2 ../trimmed_paired/4132-Leber_S3_L001_R2.fastq.gz -x ${sample}.fasta --fast --threads 16 -S ${sample}.sam
        samtools view -h -Sb ${sample}.sam > ${sample}.bam
        samtools sort ${sample}.bam > ${sample}_sorted.bam
        samtools index ${sample}_sorted.bam
        samtools depth -d 1000000 ${sample}_sorted.bam | gzip > ${sample}.cov.gz
        ~/Tools/damian_extended/lib_py/vipr_cov_pdf_sample.py -c ${sample}.cov.gz -p ${sample}.pdf
    done
    
    #calculate average
    samtools depth pool_rind_9bis26_94_S7_sorted.bam | awk '{sum+=$3} END { print "Average = ",sum/NR}'
    #and with stdev:
    samtools depth pool_rind_9bis26_94_S7_sorted.bam | awk '{sum+=$3; sumsq+=$3*$3} END { print "Average = ",sum/NR; print "Stdev = ",sqrt(sumsq/NR - (sum/NR)**2)}'
    
  8. SNVs table + plots

    --R drawing plot--
    par(mfrow=c(1,1))
    cl <- rainbow(12)
    #x=c(438382,30718,15216,68712,41092,28916,107922,428720,57002,171788,2458486,113470,21348,276406,13329824,1785322,159416,9282986,68990)
    #y=c(0.96, 0.33, 0.11, 0.62,0.72,0.05,0.86,0.96,0.76,0.19,0.99,0.85,0.29,0.94,0.99,0.98,0.91,0.99,0.80)
    #x=c(581226,238044,151492,305816,185102,249346,269852,603476,203016,167814,2590728,291946,191906,404050,13448328,1938032,295890,9387234,232008)
    #x=c(423036,10004,1682,42520,29618,1460,93110,412024,43440,3320,2422088,95996,6168,260932,13222572,1747544,144448,9199232,55275)
    #y=c(0.73,0.04,0.01,0.14,0.16,  0.01,0.35,0.68,0.21,0.02,  0.93,0.33,0.03,0.65,0.98,  0.90,0.49,0.98,0.24)
    x=c(182419,3769,970,15974,18432,563,25898,188666,13129,479,598522,12672,3272,21948,5832429,121861,942,32889,4851987,25907)
    y=c(0.314,0.016,0.006,0.232,0.099,  0.002,0.096,0.313,0.065,0.003,  0.231,0.043,0.017,0.054,0.434,  0.063,0.005,0.111,0.517,0.112)
    logmod <- lm(log(y)~log(x))
    logypred <- predict(logmod)
    png(filename = "HCV_no_vs_enrichment.png", width = 800, height = 800)
    plot(y~x,log="xy",pch=1,type="p",cex=2,col=cl[1],main="", xlab="Number of HCV reads",ylab="Enrichment rate",las=1,xaxt="n",yaxt="n")
    points(y2~x,pch=8,type="p",cex=2,col=cl[8],las=1,xaxt="n",yaxt="n")
    lines(exp(logypred)~x, col=cl[1], lwd=2)
    
    #mtext(quote(X),1,line=3,cex=2)
    #mtext(quote(P(X)),2,line=4,cex=2)
    sfsmisc::eaxis(1,at=c(10^3,10^4,10^5,10^6,10^7),cex.axis=1.5)
    sfsmisc::eaxis(2,at=c(10^-1,10^0,10^1,10^2,10^3,10^4,10^5),cex.axis=1.5)
    #fit
    #legend(3000,6000, c("PE","SE","PE_unenriched"), lty=c(1,1),col=c(cl[1],cl[8],cl[5]))
    legend(3000,100, c("PE","SE"), lty=c(1,1),col=c(cl[1],cl[8]))
    legend("topright", bty="n", legend=paste("R2 =", format(summary(logmod)$adj.r.squared, digits=4)))
    dev.off()
    
    mkdir phylogenetics; cd phylogenetics
    
    mkdir assembly; cd assembly
    for sample in rsv-on1-h1-e1231-05ng 84660-tsek-50ng; do
        cp ~/rtpd_files/${sample}/idba_ud_assembly/gapped_contig.fa ${sample}.fa
    done
    
    #mkdir fasta_summary; cd fasta_summary
    #for sample in 838_S1 840_S2 820_S3 828_S4 815_S5 834_S6 808_S7 811_S8 837_S9 768_S10 773_S11 767_S12 810_S13 814_S14 10121-16_S15 7510-15_S16   8806-15_S18 9881-16_S19 8981-14_S20; do
    #   cp ../viral-ngs/data/02_assembly/${sample}.fasta ${sample}.fa     #828-17_S17 not inclusive, since they rRNA.
    #done
    cat 838_S1.fa 840_S2.fa 820_S3.fa 828_S4.fa 815_S5.fa 834_S6.fa 808_S7.fa 811_S8.fa 837_S9.fa 768_S10.fa 773_S11.fa 767_S12.fa 810_S13.fa 814_S14.fa 10121-16_S15.fa 7510-15_S16.fa  828-17_S17.fa  8806-15_S18.fa 9881-16_S19.fa 8981-14_S20.fa > all.fa
    cat
    mafft --adjustdirection --clustalout all.fa > all.aln
    cons
    mv all.fasta consensus.fa
    
    #under ~/rtpd_files/
    for sample in rsv-on1-h1-e1231-05ng 84660-tsek-50ng; do
    seqtk comp ${sample}/idba_ud_assembly/gapped_contig.fa | awk '{x=$3+$4+$5+$6;y=$2;print $1,y,x,y-x,(y-x)/y}'
    done
    
    #--
    mkdir interhost_variants_ref_p947; cd interhost_variants_ref_p947
    
    #mkdir multialign_to_ref; cd multialign_to_ref
    #mafft --adjustdirection --clustalout ref_plus_samples.fa > ref_plus_samples.aln
    
    #--
    mkdir intrahost_variants; cd intrahost_variants
    for sample in p946 p954 p952 p948 p945 p947 p955 p943 p951 p942  p944 p938 p953 p940; do
        cp ~/rtpd_files/${sample}_dedup/vphaser2_cut_filtered_groupref.annot.csv ${sample}_dedu
        cp ~/rtpd_files/${sample}/vphaser2_cut_filtered_groupref.annot.csv ${sample}_du
        #grep "#Chrom" ${sample}_dedu > ${sample}_dedup
        #grep "#Chrom" ${sample}_du > ${sample}_dup
        echo -e "#Chrom\tPosition\tREF\tAllele\tFrequency\tCoverage\tAnnotation\tAnnotation_Impact\tGene_Name\tFeature_Type\tTranscript_BioType\tCodon_Change\tAmino_Acid_Change\tCDS.pos/CDS.length\tAA.pos/AA.length" > ${sample}_dedup
        echo -e "#Chrom\tPosition\tREF\tAllele\tFrequency\tCoverage\tAnnotation\tAnnotation_Impact\tGene_Name\tFeature_Type\tTranscript_BioType\tCodon_Change\tAmino_Acid_Change\tCDS.pos/CDS.length\tAA.pos/AA.length" > ${sample}_dup
        awk 'length($3) == 1 && length($4) == 1' ${sample}_dedu >> ${sample}_dedup
    awk 'length($3) == 1 && length($4) == 1' ${sample}_du >> ${sample}_dup
    done
    mv p938_dedup _p938_dedup
    ~/Tools/csv2xls-0.4/csv_to_xls.py p946_dup p954_dup p952_dup p948_dup p945_dup p947_dup p955_dup p943_dup p951_dup p942_dup  p944_dup p938_dup p953_dup p940_dup  p946_dedup p954_dedup p952_dedup p948_dedup p945_dedup p947_dedup p955_dedup p943_dedup p951_dedup p942_dedup  p944_dedup _p938_dedup p953_dedup p940_dedup  -d$'\t' -o  SNVs.xls
    
    #---- collect plots and bams for final results ----
    #under ~/DATA/Data_Pietschmann_RSV_Probe/
    # replot plot if vphaser2_cut_filtered_groupref.vcf is empty.
    gzip vphaser2_cut_filtered_groupref.vcf
    ~/Tools/damian_extended/lib_py/vipr_af_vs_cov_pdf_1sample.py --vcf vphaser2_cut_filtered_groupref.vcf.gz --cov cov_groupref.gz --plot af-vs-cov_groupref.svg
    inkscape -z -e af-vs-cov_groupref.png af-vs-cov_groupref.svg -w 1600
    gunzip vphaser2_cut_filtered_groupref.vcf.gz
    
    for sample in p946 p954 p952 p948 p945 p947 p955 p943 p951 p942  p944 p938 p953 p940; do
    cp ~/rtpd_files/${sample}_dedup/af-vs-cov_groupref.png ${sample}_dedup.png
    cp ~/rtpd_files/${sample}/af-vs-cov_groupref.png ${sample}_dup.png
    done
    
    #under DIR results
    mkdir bams; cd bams
    for sample in p946 p954 p952 p948 p945 p947 p955 p943 p951 p942  p944 p938 p953 p940; do
    cp ~/rtpd_files/${sample}_dedup/mapped.bam ${sample}_mapped_dedup.bam
    done
    for sample in p946 p954 p952 p948 p945 p947 p955 p943 p951 p942  p944 p938 p953 p940; do
    cp ~/rtpd_files/${sample}/mapped.bam ${sample}_mapped_dup.bam
    done
    for sample in p946 p954 p952 p948 p945 p947 p955 p943 p951 p942  p944 p938 p953 p940; do
    samtools index ${sample}_mapped_dedup.bam
    samtools index ${sample}_mapped_dup.bam
    done
    
    #---- phylogenetic tree using ML ----
    "" > all.fa
    for sample in p946 p954 p952 p948 p945 p947 p955 p943 p951 p942  p944 p938 p953 p940; do
    cp ~/rtpd_files/${sample}/idba_ud_assembly/polished_contig.fa ${sample}.fa
    cp ../data/02_assembly/REF${sample}/REF${sample}.gbf ${sample}.gbk
    cat ~/rtpd_files/${sample}/idba_ud_assembly/polished_contig.fa >> all.fa
    done
    mafft --adjustdirection all.fa > all_aln.fa
    raxml-ng --all --model GTR+G --prefix RSV_GTR_G --threads 1 --msa all_aln.fa --bs-trees 200 --redo
    raxml-ng --all --model GTR+G+I --prefix RSV_GTR_G_I --threads 1 --msa all_aln.fa --bs-trees 1000 --redo
    #GTR+G+I --> GTR+FO+I+G4m
    #GTR+G -->  GTR+FO+G4m
    
    -- python merging plots--
    #under viral3
    import cv2 as cv
    from matplotlib import pyplot as plt
    import numpy as np
    import subprocess
    image1 = cv.imread('p955_dup.png')
    image2 = cv.imread('p943_dup.png')
    image3 = cv.imread('p954_dup.png')
    image4 = cv.imread('p946_dup.png')
    image5 = cv.imread('p942_dup.png')
    image6 = cv.imread('p945_dup.png')
    image7 = cv.imread('p948_dup.png')
    image8 = cv.imread('p947_dup.png')
    image9 = cv.imread('p951_dup.png')
    image10 = cv.imread('p952_dup.png')
    image11 = cv.imread('p944_dup.png')
    image12 = cv.imread('p938_dup.png')
    image13 = cv.imread('p953_dup.png')
    image14 = cv.imread('p940_dup.png')
    final = np.concatenate((image1, image2, image3, image4, image5, image6, image7, image8, image9, image10, image11, image12, image13, image14), axis = 0)
    cv.imwrite("coverages_and_SNVs_dup.png", final)
    
    import cv2 as cv
    from matplotlib import pyplot as plt
    import numpy as np
    import subprocess
    image1 = cv.imread('p3/af-vs-cov_groupref.png')
    image2 = cv.imread('p4/af-vs-cov_groupref.png')
    image3 = cv.imread('p5_700ng/af-vs-cov_groupref.png')
    final = np.concatenate((image1, image2, image3), axis = 0)
    cv.imwrite("coverages_and_SNVs_dedup_HEV.png", final)
    
    import cv2 as cv
    from matplotlib import pyplot as plt
    import numpy as np
    import subprocess
    image1 = cv.imread('p955_dedup.png')
    image2 = cv.imread('p943_dedup.png')
    image3 = cv.imread('p954_dedup.png')
    image4 = cv.imread('p946_dedup.png')
    image5 = cv.imread('p942_dedup.png')
    image6 = cv.imread('p945_dedup.png')
    image7 = cv.imread('p948_dedup.png')
    image8 = cv.imread('p947_dedup.png')
    image9 = cv.imread('p951_dedup.png')
    image10 = cv.imread('p952_dedup.png')
    image11 = cv.imread('p944_dedup.png')
    image12 = cv.imread('p938_dedup.png')
    image13 = cv.imread('p953_dedup.png')
    image14 = cv.imread('p940_dedup.png')
    final = np.concatenate((image1, image2, image3, image4, image5, image6, image7, image8, image9, image10, image11, image12, image13, image14), axis = 0)
    cv.imwrite("coverages_and_SNVs_dedup.png", final)
    
    rm p*dup.png
    
    image1 = cv.imread('838_S1.png')
    image2 = cv.imread('840_S2.png')
    image3 = cv.imread('837_S9.png')
    final = np.concatenate((image1, image2, image3), axis = 0)
    cv.imwrite("genotype_4d.png", final)
    
    image1 = cv.imread('820_S3.png')
    image2 = cv.imread('808_S7.png')
    image3 = cv.imread('811_S8.png')
    image4 = cv.imread('810_S13.png')
    image5 = cv.imread('10121-16_S15.png')
    image6 = cv.imread('7510-15_S16.png')
    image7 = cv.imread('9881-16_S19.png')
    final = np.concatenate((image1, image2, image3, image4, image5, image6, image7), axis = 0)
    cv.imwrite("genotype_3a.png", final)
    
    image1 = cv.imread('768_S10.png')
    image2 = cv.imread('773_S11.png')
    image3 = cv.imread('814_S14.png')
    image4 = cv.imread('8806-15_S18.png')
    final = np.concatenate((image1, image2, image3, image4), axis = 0)
    cv.imwrite("genotype_1a.png", final)
    

like unlike

点赞本文的读者

还没有人对此文章表态


本文有评论

没有评论

看文章,发评论,不要沉默


© 2023 XGenes.com Impressum