-
Search for the reference complete genome at https://www.ncbi.nlm.nih.gov/genomes/GenomesGroup.cgi: Enterovirus type B sequences, Parvovirus B19 sequences, Rhinovirus A sequences and Parainfluenza type 3
#- Sort and Index the BAM File: # samtools sort -o sorted.bam input.bam # samtools index sorted.bam #- Call Variants: # bcftools mpileup -Ou -f reference.fasta sorted.bam | bcftools call -mv -Oz -o calls.vcf.gz # bcftools index calls.vcf.gz #- Generate Consensus FASTA: # bcftools consensus -f reference.fasta calls.vcf.gz -o consensus.fasta
-
Align Reads to Reference Genome
First, align the reads from isolate 2 to the genome of isolate 1. For example, using BWA:
bwa index Enterovirus_B.fasta bwa index Parvovirus_B19.fasta bwa index Rhinovirus_A.fasta bwa index Parainfluenza_3.fasta #bwa mem Enterovirus_B.fasta isolate2_reads.fastq > aligned.sam #bwa mem Enterovirus_B.fasta isolate2_reads_1.fastq isolate2_reads_2.fastq > aligned.sam bwa mem -t 64 Enterovirus_B.fasta ./240822_VH00358_106_AAFKTG7M5/p20562_1/AW_S22_R1_001.fastq.gz ./240822_VH00358_106_AAFKTG7M5/p20562_1/AW_S22_R2_001.fastq.gz | samtools view -Sb - > MiSeq_p20562_AW_on_Enterovirus_B.bam bwa mem -t 64 Enterovirus_B.fasta ./240822_VH00358_106_AAFKTG7M5/p20562_2/GE_S23_R1_001.fastq.gz ./240822_VH00358_106_AAFKTG7M5/p20562_2/GE_S23_R2_001.fastq.gz | samtools view -Sb - > MiSeq_p20562_GE_on_Enterovirus_B.bam bwa mem -t 64 Enterovirus_B.fasta ./240822_VH00358_106_AAFKTG7M5/p20562_3/IF_S24_R1_001.fastq.gz ./240822_VH00358_106_AAFKTG7M5/p20562_3/IF_S24_R2_001.fastq.gz | samtools view -Sb - > MiSeq_p20562_IF_on_Enterovirus_B.bam bwa mem -t 64 Enterovirus_B.fasta ./20240822_FS10003086_54_BTR99504-1132/Alignment_Imported_1/20240823_184235/Fastq/42_S1_L001_R1_001.fastq.gz ./20240822_FS10003086_54_BTR99504-1132/Alignment_Imported_1/20240823_184235/Fastq/42_S1_L001_R2_001.fastq.gz | samtools view -Sb - > iSeq_42_S1_on_Enterovirus_B.bam bwa mem -t 64 Enterovirus_B.fasta ./20240822_FS10003086_54_BTR99504-1132/Alignment_Imported_1/20240823_184235/Fastq/43_S2_L001_R1_001.fastq.gz ./20240822_FS10003086_54_BTR99504-1132/Alignment_Imported_1/20240823_184235/Fastq/43_S2_L001_R2_001.fastq.gz | samtools view -Sb - > iSeq_43_S2_on_Enterovirus_B.bam bwa mem -t 64 Enterovirus_B.fasta ./20240822_FS10003086_54_BTR99504-1132/Alignment_Imported_1/20240823_184235/Fastq/44_S3_L001_R1_001.fastq.gz ./20240822_FS10003086_54_BTR99504-1132/Alignment_Imported_1/20240823_184235/Fastq/44_S3_L001_R2_001.fastq.gz | samtools view -Sb - > iSeq_44_S3_on_Enterovirus_B.bam bwa mem -t 64 Parvovirus_B19.fasta ./240822_VH00358_106_AAFKTG7M5/p20562_1/AW_S22_R1_001.fastq.gz ./240822_VH00358_106_AAFKTG7M5/p20562_1/AW_S22_R2_001.fastq.gz | samtools view -Sb - > MiSeq_p20562_AW_on_Parvovirus_B19.bam bwa mem -t 64 Parvovirus_B19.fasta ./240822_VH00358_106_AAFKTG7M5/p20562_2/GE_S23_R1_001.fastq.gz ./240822_VH00358_106_AAFKTG7M5/p20562_2/GE_S23_R2_001.fastq.gz | samtools view -Sb - > MiSeq_p20562_GE_on_Parvovirus_B19.bam bwa mem -t 64 Parvovirus_B19.fasta ./240822_VH00358_106_AAFKTG7M5/p20562_3/IF_S24_R1_001.fastq.gz ./240822_VH00358_106_AAFKTG7M5/p20562_3/IF_S24_R2_001.fastq.gz | samtools view -Sb - > MiSeq_p20562_IF_on_Parvovirus_B19.bam bwa mem -t 64 Parvovirus_B19.fasta ./20240822_FS10003086_54_BTR99504-1132/Alignment_Imported_1/20240823_184235/Fastq/42_S1_L001_R1_001.fastq.gz ./20240822_FS10003086_54_BTR99504-1132/Alignment_Imported_1/20240823_184235/Fastq/42_S1_L001_R2_001.fastq.gz | samtools view -Sb - > iSeq_42_S1_on_Parvovirus_B19.bam bwa mem -t 64 Parvovirus_B19.fasta ./20240822_FS10003086_54_BTR99504-1132/Alignment_Imported_1/20240823_184235/Fastq/43_S2_L001_R1_001.fastq.gz ./20240822_FS10003086_54_BTR99504-1132/Alignment_Imported_1/20240823_184235/Fastq/43_S2_L001_R2_001.fastq.gz | samtools view -Sb - > iSeq_43_S2_on_Parvovirus_B19.bam bwa mem -t 64 Parvovirus_B19.fasta ./20240822_FS10003086_54_BTR99504-1132/Alignment_Imported_1/20240823_184235/Fastq/44_S3_L001_R1_001.fastq.gz ./20240822_FS10003086_54_BTR99504-1132/Alignment_Imported_1/20240823_184235/Fastq/44_S3_L001_R2_001.fastq.gz | samtools view -Sb - > iSeq_44_S3_on_Parvovirus_B19.bam bwa mem -t 64 Rhinovirus_A.fasta ./240822_VH00358_106_AAFKTG7M5/p20562_1/AW_S22_R1_001.fastq.gz ./240822_VH00358_106_AAFKTG7M5/p20562_1/AW_S22_R2_001.fastq.gz | samtools view -Sb - > MiSeq_p20562_AW_on_Rhinovirus_A.bam bwa mem -t 64 Rhinovirus_A.fasta ./240822_VH00358_106_AAFKTG7M5/p20562_2/GE_S23_R1_001.fastq.gz ./240822_VH00358_106_AAFKTG7M5/p20562_2/GE_S23_R2_001.fastq.gz | samtools view -Sb - > MiSeq_p20562_GE_on_Rhinovirus_A.bam bwa mem -t 64 Rhinovirus_A.fasta ./240822_VH00358_106_AAFKTG7M5/p20562_3/IF_S24_R1_001.fastq.gz ./240822_VH00358_106_AAFKTG7M5/p20562_3/IF_S24_R2_001.fastq.gz | samtools view -Sb - > MiSeq_p20562_IF_on_Rhinovirus_A.bam bwa mem -t 64 Rhinovirus_A.fasta ./20240822_FS10003086_54_BTR99504-1132/Alignment_Imported_1/20240823_184235/Fastq/42_S1_L001_R1_001.fastq.gz ./20240822_FS10003086_54_BTR99504-1132/Alignment_Imported_1/20240823_184235/Fastq/42_S1_L001_R2_001.fastq.gz | samtools view -Sb - > iSeq_42_S1_on_Rhinovirus_A.bam bwa mem -t 64 Rhinovirus_A.fasta ./20240822_FS10003086_54_BTR99504-1132/Alignment_Imported_1/20240823_184235/Fastq/43_S2_L001_R1_001.fastq.gz ./20240822_FS10003086_54_BTR99504-1132/Alignment_Imported_1/20240823_184235/Fastq/43_S2_L001_R2_001.fastq.gz | samtools view -Sb - > iSeq_43_S2_on_Rhinovirus_A.bam bwa mem -t 64 Rhinovirus_A.fasta ./20240822_FS10003086_54_BTR99504-1132/Alignment_Imported_1/20240823_184235/Fastq/44_S3_L001_R1_001.fastq.gz ./20240822_FS10003086_54_BTR99504-1132/Alignment_Imported_1/20240823_184235/Fastq/44_S3_L001_R2_001.fastq.gz | samtools view -Sb - > iSeq_44_S3_on_Rhinovirus_A.bam bwa mem -t 64 Parainfluenza_3.fasta ./240822_VH00358_106_AAFKTG7M5/p20562_1/AW_S22_R1_001.fastq.gz ./240822_VH00358_106_AAFKTG7M5/p20562_1/AW_S22_R2_001.fastq.gz | samtools view -Sb - > MiSeq_p20562_AW_on_Parainfluenza_3.bam bwa mem -t 64 Parainfluenza_3.fasta ./240822_VH00358_106_AAFKTG7M5/p20562_2/GE_S23_R1_001.fastq.gz ./240822_VH00358_106_AAFKTG7M5/p20562_2/GE_S23_R2_001.fastq.gz | samtools view -Sb - > MiSeq_p20562_GE_on_Parainfluenza_3.bam bwa mem -t 64 Parainfluenza_3.fasta ./240822_VH00358_106_AAFKTG7M5/p20562_3/IF_S24_R1_001.fastq.gz ./240822_VH00358_106_AAFKTG7M5/p20562_3/IF_S24_R2_001.fastq.gz | samtools view -Sb - > MiSeq_p20562_IF_on_Parainfluenza_3.bam bwa mem -t 64 Parainfluenza_3.fasta ./20240822_FS10003086_54_BTR99504-1132/Alignment_Imported_1/20240823_184235/Fastq/42_S1_L001_R1_001.fastq.gz ./20240822_FS10003086_54_BTR99504-1132/Alignment_Imported_1/20240823_184235/Fastq/42_S1_L001_R2_001.fastq.gz | samtools view -Sb - > iSeq_42_S1_on_Parainfluenza_3.bam bwa mem -t 64 Parainfluenza_3.fasta ./20240822_FS10003086_54_BTR99504-1132/Alignment_Imported_1/20240823_184235/Fastq/43_S2_L001_R1_001.fastq.gz ./20240822_FS10003086_54_BTR99504-1132/Alignment_Imported_1/20240823_184235/Fastq/43_S2_L001_R2_001.fastq.gz | samtools view -Sb - > iSeq_43_S2_on_Parainfluenza_3.bam bwa mem -t 64 Parainfluenza_3.fasta ./20240822_FS10003086_54_BTR99504-1132/Alignment_Imported_1/20240823_184235/Fastq/44_S3_L001_R1_001.fastq.gz ./20240822_FS10003086_54_BTR99504-1132/Alignment_Imported_1/20240823_184235/Fastq/44_S3_L001_R2_001.fastq.gz | samtools view -Sb - > iSeq_44_S3_on_Parainfluenza_3.bam
or, use vrap to generate input.bam
#for bowtie (1 DB): --host Bacillus_cereus.fasta #for blast (5 DBs): --virus=Acinetobacter_baumannii.fasta -n /mnt/h1/jhuang/blast/nt -a /mnt/h1/jhuang/blast/nr, and two default download_db.py #A6_chr_plasmid.fasta reads filtered, should only the contamination reads remaining! vrap/vrap.py -1 ./results_ATCC19606/trimmed/A10_CraA_HQ_trimmed_P_1.fastq.gz -2 ./results_ATCC19606/trimmed/A10_CraA_HQ_trimmed_P_2.fastq.gz -o A10_vrap_host_A6 --host A6_chr_plasmid.fasta --noblast -t 55
-
Convert SAM to BAM, Sort and Index
Convert the SAM file to BAM, sort it, and create an index:
samtools view -Sb mapped > aligned.bam samtools sort aligned.bam -o sorted.bam samtools index sorted.bam
-
Coverage Analysis
Check the coverage of the genome regions. Regions with no coverage will be masked later.
#bedtools genomecov -ibam sorted.bam -bg > coverage.bed #BUG above: By default, Bedtools doesn't explicitly list regions with zero coverage in the output of genomecov. To get around this, you can use the -bga option with genomecov. This option ensures that regions with zero coverage are also included in the output. Here's how you can modify your command: bedtools genomecov -ibam sorted.bam -bga > coverage.bed
-
Create a Bed File for Masking
Create a BED file that specifies regions with no or low coverage. Assume that any region with coverage less than a certain threshold (e.g., 1x) should be masked:
awk '$4 < 1 {print $1 "\t" $2 "\t" $3}' coverage.bed > low_coverage.bed
-
Generate Consensus Sequence
Generate a consensus sequence, using bcftools, and mask regions with low or no coverage:
bcftools mpileup -Ou -f ../../CP059040.fasta sorted.bam | bcftools call -mv --ploidy 1 -Oz -o calls.vcf.gz bcftools index calls.vcf.gz bcftools consensus -f ../../CP059040.fasta -m low_coverage.bed calls.vcf.gz -o consensus.fasta #The site CP059040:4 overlaps with another variant, skipping... #The site CP059040:3125036 overlaps with another variant, skipping... #Applied 58 variants bcftools mpileup -Ou -f ../../A6_chr_plasmid.fasta sorted.bam | bcftools call -mv --ploidy 1 -Oz -o calls.vcf.gz bcftools index calls.vcf.gz bcftools consensus -f ../../A6_chr_plasmid.fasta -m low_coverage.bed calls.vcf.gz -o consensus.fasta #The site seq00000000:4 overlaps with another variant, skipping... #The site seq00000000:3125036 overlaps with another variant, skipping... #Applied 61 variants #--> TODO: could report the following regions are not covered in A10 comparing to A6. #seq00000000 364666 365765 #seq00000000 2810907 2813323 #seq00000000 2813552 2813779 #seq00000000 2813923 2819663 #seq00000000 2821839 2831921 #seq00000000 2832750 2832753 #seq00000000 2833977 2860354 #seq00000000 2860505 2861780 #seq00000000 2861931 2861951 #seq00000000 2862102 2862234 #seq00000000 2862385 2863434 #seq00000000 3124939 3124943 #seq00000000 3125009 3125010
In this command, the -m option in bcftools consensus is used to specify a BED file for masking low-coverage regions.
-
Coverage plot
for sample in 4132-Leber_CAP28 4132-Leber_CAP45 4132-Leber_NODE328; do bowtie2-build ${sample}.fasta ${sample}.fasta bowtie2 -1 ../trimmed_paired/4132-Leber_S3_L001_R1.fastq.gz -2 ../trimmed_paired/4132-Leber_S3_L001_R2.fastq.gz -x ${sample}.fasta --fast --threads 16 -S ${sample}.sam samtools view -h -Sb ${sample}.sam > ${sample}.bam samtools sort ${sample}.bam > ${sample}_sorted.bam samtools index ${sample}_sorted.bam samtools depth -d 1000000 ${sample}_sorted.bam | gzip > ${sample}.cov.gz ~/Tools/damian_extended/lib_py/vipr_cov_pdf_sample.py -c ${sample}.cov.gz -p ${sample}.pdf done #calculate average samtools depth pool_rind_9bis26_94_S7_sorted.bam | awk '{sum+=$3} END { print "Average = ",sum/NR}' #and with stdev: samtools depth pool_rind_9bis26_94_S7_sorted.bam | awk '{sum+=$3; sumsq+=$3*$3} END { print "Average = ",sum/NR; print "Stdev = ",sqrt(sumsq/NR - (sum/NR)**2)}'
-
SNVs table + plots
--R drawing plot-- par(mfrow=c(1,1)) cl <- rainbow(12) #x=c(438382,30718,15216,68712,41092,28916,107922,428720,57002,171788,2458486,113470,21348,276406,13329824,1785322,159416,9282986,68990) #y=c(0.96, 0.33, 0.11, 0.62,0.72,0.05,0.86,0.96,0.76,0.19,0.99,0.85,0.29,0.94,0.99,0.98,0.91,0.99,0.80) #x=c(581226,238044,151492,305816,185102,249346,269852,603476,203016,167814,2590728,291946,191906,404050,13448328,1938032,295890,9387234,232008) #x=c(423036,10004,1682,42520,29618,1460,93110,412024,43440,3320,2422088,95996,6168,260932,13222572,1747544,144448,9199232,55275) #y=c(0.73,0.04,0.01,0.14,0.16, 0.01,0.35,0.68,0.21,0.02, 0.93,0.33,0.03,0.65,0.98, 0.90,0.49,0.98,0.24) x=c(182419,3769,970,15974,18432,563,25898,188666,13129,479,598522,12672,3272,21948,5832429,121861,942,32889,4851987,25907) y=c(0.314,0.016,0.006,0.232,0.099, 0.002,0.096,0.313,0.065,0.003, 0.231,0.043,0.017,0.054,0.434, 0.063,0.005,0.111,0.517,0.112) logmod <- lm(log(y)~log(x)) logypred <- predict(logmod) png(filename = "HCV_no_vs_enrichment.png", width = 800, height = 800) plot(y~x,log="xy",pch=1,type="p",cex=2,col=cl[1],main="", xlab="Number of HCV reads",ylab="Enrichment rate",las=1,xaxt="n",yaxt="n") points(y2~x,pch=8,type="p",cex=2,col=cl[8],las=1,xaxt="n",yaxt="n") lines(exp(logypred)~x, col=cl[1], lwd=2) #mtext(quote(X),1,line=3,cex=2) #mtext(quote(P(X)),2,line=4,cex=2) sfsmisc::eaxis(1,at=c(10^3,10^4,10^5,10^6,10^7),cex.axis=1.5) sfsmisc::eaxis(2,at=c(10^-1,10^0,10^1,10^2,10^3,10^4,10^5),cex.axis=1.5) #fit #legend(3000,6000, c("PE","SE","PE_unenriched"), lty=c(1,1),col=c(cl[1],cl[8],cl[5])) legend(3000,100, c("PE","SE"), lty=c(1,1),col=c(cl[1],cl[8])) legend("topright", bty="n", legend=paste("R2 =", format(summary(logmod)$adj.r.squared, digits=4))) dev.off() mkdir phylogenetics; cd phylogenetics mkdir assembly; cd assembly for sample in rsv-on1-h1-e1231-05ng 84660-tsek-50ng; do cp ~/rtpd_files/${sample}/idba_ud_assembly/gapped_contig.fa ${sample}.fa done #mkdir fasta_summary; cd fasta_summary #for sample in 838_S1 840_S2 820_S3 828_S4 815_S5 834_S6 808_S7 811_S8 837_S9 768_S10 773_S11 767_S12 810_S13 814_S14 10121-16_S15 7510-15_S16 8806-15_S18 9881-16_S19 8981-14_S20; do # cp ../viral-ngs/data/02_assembly/${sample}.fasta ${sample}.fa #828-17_S17 not inclusive, since they rRNA. #done cat 838_S1.fa 840_S2.fa 820_S3.fa 828_S4.fa 815_S5.fa 834_S6.fa 808_S7.fa 811_S8.fa 837_S9.fa 768_S10.fa 773_S11.fa 767_S12.fa 810_S13.fa 814_S14.fa 10121-16_S15.fa 7510-15_S16.fa 828-17_S17.fa 8806-15_S18.fa 9881-16_S19.fa 8981-14_S20.fa > all.fa cat mafft --adjustdirection --clustalout all.fa > all.aln cons mv all.fasta consensus.fa #under ~/rtpd_files/ for sample in rsv-on1-h1-e1231-05ng 84660-tsek-50ng; do seqtk comp ${sample}/idba_ud_assembly/gapped_contig.fa | awk '{x=$3+$4+$5+$6;y=$2;print $1,y,x,y-x,(y-x)/y}' done #-- mkdir interhost_variants_ref_p947; cd interhost_variants_ref_p947 #mkdir multialign_to_ref; cd multialign_to_ref #mafft --adjustdirection --clustalout ref_plus_samples.fa > ref_plus_samples.aln #-- mkdir intrahost_variants; cd intrahost_variants for sample in p946 p954 p952 p948 p945 p947 p955 p943 p951 p942 p944 p938 p953 p940; do cp ~/rtpd_files/${sample}_dedup/vphaser2_cut_filtered_groupref.annot.csv ${sample}_dedu cp ~/rtpd_files/${sample}/vphaser2_cut_filtered_groupref.annot.csv ${sample}_du #grep "#Chrom" ${sample}_dedu > ${sample}_dedup #grep "#Chrom" ${sample}_du > ${sample}_dup echo -e "#Chrom\tPosition\tREF\tAllele\tFrequency\tCoverage\tAnnotation\tAnnotation_Impact\tGene_Name\tFeature_Type\tTranscript_BioType\tCodon_Change\tAmino_Acid_Change\tCDS.pos/CDS.length\tAA.pos/AA.length" > ${sample}_dedup echo -e "#Chrom\tPosition\tREF\tAllele\tFrequency\tCoverage\tAnnotation\tAnnotation_Impact\tGene_Name\tFeature_Type\tTranscript_BioType\tCodon_Change\tAmino_Acid_Change\tCDS.pos/CDS.length\tAA.pos/AA.length" > ${sample}_dup awk 'length($3) == 1 && length($4) == 1' ${sample}_dedu >> ${sample}_dedup awk 'length($3) == 1 && length($4) == 1' ${sample}_du >> ${sample}_dup done mv p938_dedup _p938_dedup ~/Tools/csv2xls-0.4/csv_to_xls.py p946_dup p954_dup p952_dup p948_dup p945_dup p947_dup p955_dup p943_dup p951_dup p942_dup p944_dup p938_dup p953_dup p940_dup p946_dedup p954_dedup p952_dedup p948_dedup p945_dedup p947_dedup p955_dedup p943_dedup p951_dedup p942_dedup p944_dedup _p938_dedup p953_dedup p940_dedup -d$'\t' -o SNVs.xls #---- collect plots and bams for final results ---- #under ~/DATA/Data_Pietschmann_RSV_Probe/ # replot plot if vphaser2_cut_filtered_groupref.vcf is empty. gzip vphaser2_cut_filtered_groupref.vcf ~/Tools/damian_extended/lib_py/vipr_af_vs_cov_pdf_1sample.py --vcf vphaser2_cut_filtered_groupref.vcf.gz --cov cov_groupref.gz --plot af-vs-cov_groupref.svg inkscape -z -e af-vs-cov_groupref.png af-vs-cov_groupref.svg -w 1600 gunzip vphaser2_cut_filtered_groupref.vcf.gz for sample in p946 p954 p952 p948 p945 p947 p955 p943 p951 p942 p944 p938 p953 p940; do cp ~/rtpd_files/${sample}_dedup/af-vs-cov_groupref.png ${sample}_dedup.png cp ~/rtpd_files/${sample}/af-vs-cov_groupref.png ${sample}_dup.png done #under DIR results mkdir bams; cd bams for sample in p946 p954 p952 p948 p945 p947 p955 p943 p951 p942 p944 p938 p953 p940; do cp ~/rtpd_files/${sample}_dedup/mapped.bam ${sample}_mapped_dedup.bam done for sample in p946 p954 p952 p948 p945 p947 p955 p943 p951 p942 p944 p938 p953 p940; do cp ~/rtpd_files/${sample}/mapped.bam ${sample}_mapped_dup.bam done for sample in p946 p954 p952 p948 p945 p947 p955 p943 p951 p942 p944 p938 p953 p940; do samtools index ${sample}_mapped_dedup.bam samtools index ${sample}_mapped_dup.bam done #---- phylogenetic tree using ML ---- "" > all.fa for sample in p946 p954 p952 p948 p945 p947 p955 p943 p951 p942 p944 p938 p953 p940; do cp ~/rtpd_files/${sample}/idba_ud_assembly/polished_contig.fa ${sample}.fa cp ../data/02_assembly/REF${sample}/REF${sample}.gbf ${sample}.gbk cat ~/rtpd_files/${sample}/idba_ud_assembly/polished_contig.fa >> all.fa done mafft --adjustdirection all.fa > all_aln.fa raxml-ng --all --model GTR+G --prefix RSV_GTR_G --threads 1 --msa all_aln.fa --bs-trees 200 --redo raxml-ng --all --model GTR+G+I --prefix RSV_GTR_G_I --threads 1 --msa all_aln.fa --bs-trees 1000 --redo #GTR+G+I --> GTR+FO+I+G4m #GTR+G --> GTR+FO+G4m -- python merging plots-- #under viral3 import cv2 as cv from matplotlib import pyplot as plt import numpy as np import subprocess image1 = cv.imread('p955_dup.png') image2 = cv.imread('p943_dup.png') image3 = cv.imread('p954_dup.png') image4 = cv.imread('p946_dup.png') image5 = cv.imread('p942_dup.png') image6 = cv.imread('p945_dup.png') image7 = cv.imread('p948_dup.png') image8 = cv.imread('p947_dup.png') image9 = cv.imread('p951_dup.png') image10 = cv.imread('p952_dup.png') image11 = cv.imread('p944_dup.png') image12 = cv.imread('p938_dup.png') image13 = cv.imread('p953_dup.png') image14 = cv.imread('p940_dup.png') final = np.concatenate((image1, image2, image3, image4, image5, image6, image7, image8, image9, image10, image11, image12, image13, image14), axis = 0) cv.imwrite("coverages_and_SNVs_dup.png", final) import cv2 as cv from matplotlib import pyplot as plt import numpy as np import subprocess image1 = cv.imread('p3/af-vs-cov_groupref.png') image2 = cv.imread('p4/af-vs-cov_groupref.png') image3 = cv.imread('p5_700ng/af-vs-cov_groupref.png') final = np.concatenate((image1, image2, image3), axis = 0) cv.imwrite("coverages_and_SNVs_dedup_HEV.png", final) import cv2 as cv from matplotlib import pyplot as plt import numpy as np import subprocess image1 = cv.imread('p955_dedup.png') image2 = cv.imread('p943_dedup.png') image3 = cv.imread('p954_dedup.png') image4 = cv.imread('p946_dedup.png') image5 = cv.imread('p942_dedup.png') image6 = cv.imread('p945_dedup.png') image7 = cv.imread('p948_dedup.png') image8 = cv.imread('p947_dedup.png') image9 = cv.imread('p951_dedup.png') image10 = cv.imread('p952_dedup.png') image11 = cv.imread('p944_dedup.png') image12 = cv.imread('p938_dedup.png') image13 = cv.imread('p953_dedup.png') image14 = cv.imread('p940_dedup.png') final = np.concatenate((image1, image2, image3, image4, image5, image6, image7, image8, image9, image10, image11, image12, image13, image14), axis = 0) cv.imwrite("coverages_and_SNVs_dedup.png", final) rm p*dup.png image1 = cv.imread('838_S1.png') image2 = cv.imread('840_S2.png') image3 = cv.imread('837_S9.png') final = np.concatenate((image1, image2, image3), axis = 0) cv.imwrite("genotype_4d.png", final) image1 = cv.imread('820_S3.png') image2 = cv.imread('808_S7.png') image3 = cv.imread('811_S8.png') image4 = cv.imread('810_S13.png') image5 = cv.imread('10121-16_S15.png') image6 = cv.imread('7510-15_S16.png') image7 = cv.imread('9881-16_S19.png') final = np.concatenate((image1, image2, image3, image4, image5, image6, image7), axis = 0) cv.imwrite("genotype_3a.png", final) image1 = cv.imread('768_S10.png') image2 = cv.imread('773_S11.png') image3 = cv.imread('814_S14.png') image4 = cv.imread('8806-15_S18.png') final = np.concatenate((image1, image2, image3, image4), axis = 0) cv.imwrite("genotype_1a.png", final)
Mapping reads on reference genomes
Leave a reply