-
Install and test qiime2-docker
#Cannot run under QIIME1, switch to QIIME2: pick_open_reference_otus.py -r/home/jhuang/REFs/SILVA_132_QIIME_release/rep_set/rep_set_16S_only/99/silva_132_99_16S.fna -i test.fna -o clustering_test/ -p clustering_params.txt --parallel --verbose docker pull quay.io/qiime2/core:2023.9 docker run -it --rm \ -v /mnt/md1/DATA/Data_Childrensclinic_16S_2025:/data \ -v /home/jhuang/REFs:/home/jhuang/REFs \ quay.io/qiime2/core:2023.9 bash cd /data -
Import the fastq-files to paired-end-demux.qza
#https://docs.qiime2.org/2018.8/tutorials/importing/ #Moving the following fastq.gz to raw_data_NOT_USED: 643805249,Kinderklinik Lauf 2 Pl.B,B04,N720-B,CGGAGCCT,S503-B,TATCCTCT,nan,nan 7909160377,Kinderkllinik Lauf 2 Pl.D,D11,N728-D,TGCAGCTA,S517-D,GCGTAAGA,nan,nan 7909188256,Kinderklinik Lauf 2 Pl.C,C10,N712-C,GTAGAGGA,S516-C,CCTAGAGT,nan,nan O231000101,Kinderkllinik Lauf 2 Pl.D,F11,N728-D,TGCAGCTA,S520-D,AAGGCTAT,nan,nan O231000102,Kinderkllinik Lauf 2 Pl.D,C09,N726-D,CCTAAGAC,S516-D,CCTAGAGT,nan,nan O231000103,Kinderkllinik Lauf 2 Pl.D,C11,N728-D,TGCAGCTA,S516-D,CCTAGAGT,nan,nan O231000902,Kinderkllinik Lauf 2 Pl.D,C12,N729-D,TCGACGTC,S516-D,CCTAGAGT,nan,nan O23100201,Kinderklinik Lauf 2 Pl.B,D10,N727-B,CGATCAGT,S506-B,ACTGCATA,nan,nan O23100805,Kinderklinik Lauf 2 Pl.B,F10,N727-B,CGATCAGT,S508-B,CTAAGCCT,nan,nan O23100901,Kinderkllinik Lauf 2 Pl.D,F08,N724-D,ACTGAGCG,S520-D,AAGGCTAT,nan,nan O23100903,Kinderkllinik Lauf 2 Pl.D,B08,N724-D,ACTGAGCG,S515-D,TTCTAGCT,nan,nan O23100904,Kinderkllinik Lauf 2 Pl.D,C08,N724-D,ACTGAGCG,S516-D,CCTAGAGT,nan,nan O23100905,Kinderkllinik Lauf 2 Pl.D,D10,N727-D,CGATCAGT,S517-D,GCGTAAGA,nan,nan O23111503,Kinderkllinik Lauf 2 Pl.D,A12,N729-D,TCGACGTC,S513-D,TCGACTAG,nan,nan O23120902,Kinderklinik Lauf 2 Pl.C,C03,N703-C,AGGCAGAA,S516-C,CCTAGAGT,nan,nan O24022002,Kinderklinik Lauf 2 Pl.C,H12,N715-C,ATCTCAGG,S522-C,TTATGCGA,nan,nan U23071801,Kinderklinik Lauf 2 Pl.B,B02,N718-B,GGAGCTAC,S503-B,TATCCTCT,nan,nan rename -n 's/^([A-Z]\d+)_S\d+_L\d+_R([12])_001\.fastq\.gz$/$1_R$2.fastq.gz/' *.fastq.gz rename 's/^([A-Z]\d+)_S\d+_L\d+_R([12])_001\.fastq\.gz$/$1_R$2.fastq.gz/' *.fastq.gz #for file in *.fastq.gz; do echo "mv $file $(echo $file | cut -d'_' -f1 | cut -d'-' -f1-1)_$(echo $file | cut -d'_' -f4).fastq.gz"; done for file in *.fastq.gz; do echo "mv $file $(echo $file | cut -d'_' -f1)_$(echo $file | cut -d'_' -f4).fastq.gz"; done #MANUALLY correct several filename errors and performing in the generated commands above #MANUALLY correct several filename errors and performing in the generated commands below NTC_2 NTC_3 NTC_4 NTC_5 NTC_6 NTC_7 NTC_8 NTC_9 NTC_10 NTC_11 NTC_12 NTC_13 NTC_14 NTC_15 NTC_16 mv NTC-1_R1.fastq.gz NTC_1_R1.fastq.gz mv NTC-1_R2.fastq.gz NTC_1_R2.fastq.gz mv NTC-2_R1.fastq.gz NTC_2_R1.fastq.gz mv NTC-2_R2.fastq.gz NTC_2_R2.fastq.gz mv NTC-3_R1.fastq.gz NTC_3_R1.fastq.gz mv NTC-3_R2.fastq.gz NTC_3_R2.fastq.gz mv NTC-4_R1.fastq.gz NTC_4_R1.fastq.gz mv NTC-4_R2.fastq.gz NTC_4_R2.fastq.gz mv NTC-5_R1.fastq.gz NTC_5_R1.fastq.gz mv NTC-5_R2.fastq.gz NTC_5_R2.fastq.gz mv NTC-6_R1.fastq.gz NTC_6_R1.fastq.gz mv NTC-6_R2.fastq.gz NTC_6_R2.fastq.gz mv NTC-7_R1.fastq.gz NTC_7_R1.fastq.gz mv NTC-7_R2.fastq.gz NTC_7_R2.fastq.gz mv NTC-8_R1.fastq.gz NTC_8_R1.fastq.gz mv NTC-8_R2.fastq.gz NTC_8_R2.fastq.gz mv NTC-9_R1.fastq.gz NTC_9_R1.fastq.gz mv NTC-9_R2.fastq.gz NTC_9_R2.fastq.gz mv NTC-10_R1.fastq.gz NTC_10_R1.fastq.gz mv NTC-10_R2.fastq.gz NTC_10_R2.fastq.gz mv NTC-11_R1.fastq.gz NTC_11_R1.fastq.gz mv NTC-11_R2.fastq.gz NTC_11_R2.fastq.gz mv NTC-12_R1.fastq.gz NTC_12_R1.fastq.gz mv NTC-12_R2.fastq.gz NTC_12_R2.fastq.gz mv NTC-13_R1.fastq.gz NTC_13_R1.fastq.gz mv NTC-13_R2.fastq.gz NTC_13_R2.fastq.gz mv NTC-14_R1.fastq.gz NTC_14_R1.fastq.gz mv NTC-14_R2.fastq.gz NTC_14_R2.fastq.gz mv NTC-15_R1.fastq.gz NTC_15_R1.fastq.gz mv NTC-15_R2.fastq.gz NTC_15_R2.fastq.gz mv NTC-16_R1.fastq.gz NTC_16_R1.fastq.gz mv NTC-16_R2.fastq.gz NTC_16_R2.fastq.gz PC_1 PC_2 PC_3 PC_4 PC_5 PC_6 PC_7 PC_8 mv PC-1_R1.fastq.gz PC_1_R1.fastq.gz mv PC-1_R2.fastq.gz PC_1_R2.fastq.gz mv PC-2_R1.fastq.gz PC_2_R1.fastq.gz mv PC-2_R2.fastq.gz PC_2_R2.fastq.gz mv PC-3_R1.fastq.gz PC_3_R1.fastq.gz mv PC-3_R2.fastq.gz PC_3_R2.fastq.gz mv PC-4_R1.fastq.gz PC_4_R1.fastq.gz mv PC-4_R2.fastq.gz PC_4_R2.fastq.gz mv PC-5_R1.fastq.gz PC_5_R1.fastq.gz mv PC-5_R2.fastq.gz PC_5_R2.fastq.gz mv PC-6_R1.fastq.gz PC_6_R1.fastq.gz mv PC-6_R2.fastq.gz PC_6_R2.fastq.gz mv PC-7_R1.fastq.gz PC_7_R1.fastq.gz mv PC-7_R2.fastq.gz PC_7_R2.fastq.gz mv PC-8_R1.fastq.gz PC_8_R1.fastq.gz mv PC-8_R2.fastq.gz PC_8_R2.fastq.gz #MOVE the used fastq.gz to the directory raw_data #PREPARE the conf-file --> "-rw-rw-r-- 1 jhuang jhuang 9,3K Nov 20 12:22 pe-33-manifest" qiime tools import --type 'SampleData[PairedEndSequencesWithQuality]' --input-path pe-33-manifest --output-path paired-end-demux.qza --input-format PairedEndFastqManifestPhred33 #--> "-rw-r--r-- 1 root root 3,5G Nov 20 12:24 paired-end-demux.qza" qiime demux summarize \ --i-data paired-end-demux.qza \ --o-visualization demux_pe.qzv #--> "-rw-r--r-- 1 root root 320K Nov 20 12:34 demux_pe.qzv" #qiime tools view demux_pe.qzv --> Error: Visualization viewing is currently not supported in headless environments. You can view Visualizations (and Artifacts) at https://view.qiime2.org, or move the Visualization to an environment with a display and view it with `qiime tools view`. #Open demux_pe.gzv on https://view.qiime2.org -
Optimizing the parameters trunc-len-f and trunc-len-r and denoising with DADA2: optimized parameters is f240_r240
#Your amplicon (V3–V4 region) is ~464 bp, so you need ≥20–30 bp overlap #464-38=426; 440 is the longst +12 nt for overlapping=we need 452 nt! #Optimize the parameters --p-trunc-len-f and --p-trunc-len-r (qiime2-amplicon-2023.9) root@4379fea45cf7:/data# ./dada2_batch_test.sh #!/bin/bash # Set your base inputs INPUT=paired-end-demux.qza TRIM_LEFT_F=17 TRIM_LEFT_R=21 # Output base OUTPUT_DIR=dada2_tests mkdir -p $OUTPUT_DIR # Loop over trunc-len-f and trunc-len-r combinations # Forward: from 220 to 260 # Reverse: from 210 to 260 #225 220 215 i=1 for f in 260 255 250 245 240 235 230 225 220; do for r in 260 255 250 245 240 235 230 225 220 215 210; do OUT=test_${i}_f${f}_r${r} echo "Running: $OUT" mkdir -p $OUTPUT_DIR/$OUT qiime dada2 denoise-paired \ --i-demultiplexed-seqs $INPUT \ --p-trim-left-f $TRIM_LEFT_F \ --p-trim-left-r $TRIM_LEFT_R \ --p-max-ee-f 3 --p-max-ee-r 5 \ --p-trunc-len-f $f \ --p-trunc-len-r $r \ --p-n-threads 32 \ --o-table $OUTPUT_DIR/$OUT/table.qza \ --o-representative-sequences $OUTPUT_DIR/$OUT/rep-seqs.qza \ --o-denoising-stats $OUTPUT_DIR/$OUT/denoising-stats.qza \ --verbose > $OUTPUT_DIR/$OUT/log.txt 2>&1 ((i++)) done done for f in dada2_tests2/test_*/denoising-stats.qza; do qiime metadata tabulate \ --m-input-file $f \ --o-visualization ${f%.qza}.qzv done #Manually convert denoising-stats.qza to denoising-stats.tsv using https://view.qiime2.org #Downloads to jhuang@WS-2290C:~/DATA/Data_Childrensclinic_16S_2025/dada2_tests/test_59_f235_r245$ mv ~/Downloads/data_stats.tsv . #NTC01 33463 1732 5.18 1723 1650 4.93 1589 4.75 #O23091304 54831 7477 13.64 7409 7133 13.01 7101 12.95 #NTC01 33463 1719 5.14 1711 1648 4.92 1588 4.75 #O23091304 54831 7419 13.53 7345 7068 12.89 7046 12.85 #pandaseq.out: grep ">" A1_R1.fastq.gz_merged.fasta | wc -l #8229; grep ">" A10_R1.fastq.gz_merged.fasta | wc -l #9165 sudo chown -R jhuang:jhuang dada2_tests cd dada2_tests python3 rank_dada2_params.py # Top parameter sets by % input non-chimeric (overall): # test_69_f230_r250: f=230, r=250, %non-chimeric=81.75, median%=84.57, merged%=83.86, samples_nonchim=253/253 # * (CHOOSEN) test_59_f235_r245: f=235, r=245, %non-chimeric=81.57, median%=84.46, merged%=83.79, samples_nonchim=253/253 # test_58_f235_r250: f=235, r=250, %non-chimeric=81.49, median%=84.35, merged%=83.61, samples_nonchim=253/253 # test_49_f240_r240: f=240, r=240, %non-chimeric=81.30, median%=84.02, merged%=83.46, samples_nonchim=253/253 # test_48_f240_r245: f=240, r=245, %non-chimeric=81.21, median%=83.95, merged%=83.33, samples_nonchim=253/253 # test_47_f240_r250: f=240, r=250, %non-chimeric=81.12, median%=83.94, merged%=83.19, samples_nonchim=253/253 # test_39_f245_r235: f=245, r=235, %non-chimeric=80.84, median%=83.59, merged%=82.98, samples_nonchim=253/253 # test_38_f245_r240: f=245, r=240, %non-chimeric=80.78, median%=83.52, merged%=82.91, samples_nonchim=253/253 # test_37_f245_r245: f=245, r=245, %non-chimeric=80.72, median%=83.36, merged%=82.81, samples_nonchim=253/253 # test_36_f245_r250: f=245, r=250, %non-chimeric=80.62, median%=83.41, merged%=82.68, samples_nonchim=253/253 -
Visualize outputs (Using pandaseq.out, since qiime2_metadata.tsv contains the pathway of pandaseq.out)
4.1. mkdir fastqc_out fastqc -t 4 raw_data/* -o fastqc_out/ 4.2. mkdir trim_data trimmed_unpaired cd raw_data for file in O24021602_R1.fastq.gz O24021601_R1.fastq.gz O23100705_R1.fastq.gz PC_5_R1.fastq.gz O24010402_R1.fastq.gz U23090801_R1.fastq.gz O24012402_R1.fastq.gz NTC_7_R1.fastq.gz O23121301_R1.fastq.gz O24013104_R1.fastq.gz U25020701_R1.fastq.gz O23100401_R1.fastq.gz O24020901_R1.fastq.gz NTC_1_R1.fastq.gz O24020903_R1.fastq.gz O23092002_R1.fastq.gz UR009768_R1.fastq.gz O23100402_R1.fastq.gz O23091304_R1.fastq.gz O23100703_R1.fastq.gz O23110904_R1.fastq.gz O23110903_R1.fastq.gz O23121304_R1.fastq.gz O23083101_R1.fastq.gz O23100502_R1.fastq.gz O23110301_R1.fastq.gz O24020805_R1.fastq.gz A23051102_R1.fastq.gz O23102604_R1.fastq.gz A23051103_R1.fastq.gz O23102504_R1.fastq.gz O24011205_R1.fastq.gz O23111602_R1.fastq.gz O24020905_R1.fastq.gz O23090701_R1.fastq.gz O24020702_R1.fastq.gz NTC_4_R1.fastq.gz A24072901_R1.fastq.gz NTC_10_R1.fastq.gz O23100704_R1.fastq.gz NTC_8_R1.fastq.gz O24011901_R1.fastq.gz O23111501_R1.fastq.gz O23112303_R1.fastq.gz O23112902_R1.fastq.gz O24011203_R1.fastq.gz O23092006_R1.fastq.gz O24010404_R1.fastq.gz O23090803_R1.fastq.gz O23091501_R1.fastq.gz U25022101_R1.fastq.gz O23121305_R1.fastq.gz U24080201_R1.fastq.gz U24111801_R1.fastq.gz A24040201_R1.fastq.gz O23120702_R1.fastq.gz NTC_5_R1.fastq.gz O24013105_R1.fastq.gz O23100706_R1.fastq.gz O23083102_R1.fastq.gz O23091305_R1.fastq.gz PC_7_R1.fastq.gz O24011005_R1.fastq.gz PC01_R1.fastq.gz O24011004_R1.fastq.gz O23110902_R1.fastq.gz O24020904_R1.fastq.gz A24071701_R1.fastq.gz O24021501_R1.fastq.gz O23120101_R1.fastq.gz U25011702_R1.fastq.gz O23092103_R1.fastq.gz UR009909_R1.fastq.gz O23091403_R1.fastq.gz NTC_2_R1.fastq.gz PC_1_R1.fastq.gz O24013102_R1.fastq.gz O23102704_R1.fastq.gz O24010401_R1.fastq.gz O24012501_R1.fastq.gz U24121801_R1.fastq.gz O24031304_R1.fastq.gz O23090802_R1.fastq.gz NTC_12_R1.fastq.gz O23100702_R1.fastq.gz O24011202_R1.fastq.gz O23100601_R1.fastq.gz O23100802_R1.fastq.gz O24021404_R1.fastq.gz O23112301_R1.fastq.gz O24011103_R1.fastq.gz O24030601_R1.fastq.gz U23052201_R1.fastq.gz O23100803_R1.fastq.gz O23102502_R1.fastq.gz O24011206_R1.fastq.gz O23091401_R1.fastq.gz O23092001_R1.fastq.gz O24020801_R1.fastq.gz O23082302_R1.fastq.gz O23112204_R1.fastq.gz O23100701_R1.fastq.gz O23112304_R1.fastq.gz A24031201_R1.fastq.gz O24022801_R1.fastq.gz PC_3_R1.fastq.gz O24022901_R1.fastq.gz O23092005_R1.fastq.gz O23121502_R1.fastq.gz O24021401_R1.fastq.gz U25011701_R1.fastq.gz NTC01_R1.fastq.gz O23082401_R1.fastq.gz O24022202_R1.fastq.gz NTC_6_R1.fastq.gz O24031302_R1.fastq.gz NTC_15_R1.fastq.gz NTC_13_R1.fastq.gz O24022302_R1.fastq.gz O23121303_R1.fastq.gz O24021502_R1.fastq.gz O24020704_R1.fastq.gz O23092202_R1.fastq.gz A23112002_R1.fastq.gz O23092803_R1.fastq.gz O23112203_R1.fastq.gz NTC_11_R1.fastq.gz O23091302_R1.fastq.gz A24062801_R1.fastq.gz O23121302_R1.fastq.gz O23092104_R1.fastq.gz O24020802_R1.fastq.gz O24021403_R1.fastq.gz O23112201_R1.fastq.gz O23082402_R1.fastq.gz U24070401_R1.fastq.gz O24020902_R1.fastq.gz O24030602_R1.fastq.gz NTC_3_R1.fastq.gz O24011201_R1.fastq.gz O23091301_R1.fastq.gz O23112202_R1.fastq.gz O24021503_R1.fastq.gz O23102601_R1.fastq.gz O24011002_R1.fastq.gz O23121503_R1.fastq.gz O23092004_R1.fastq.gz O23091402_R1.fastq.gz O23092701_R1.fastq.gz O24031301_R1.fastq.gz O24020703_R1.fastq.gz O24013103_R1.fastq.gz A24080201_R1.fastq.gz U23071201_R1.fastq.gz U23052401_R1.fastq.gz U23071901_R1.fastq.gz O24011101_R1.fastq.gz U23091101_R1.fastq.gz O23110204_R1.fastq.gz O23102702_R1.fastq.gz O24020101_R1.fastq.gz O24012401_R1.fastq.gz O23100501_R1.fastq.gz O24020705_R1.fastq.gz A23060602_R1.fastq.gz O23102701_R1.fastq.gz O24011001_R1.fastq.gz O24022102_R1.fastq.gz O23110302_R1.fastq.gz O23100503_R1.fastq.gz O24020803_R1.fastq.gz O23110203_R1.fastq.gz A24030402_R1.fastq.gz O23092102_R1.fastq.gz O24011003_R1.fastq.gz O23121306_R1.fastq.gz O23110101_R1.fastq.gz O24020804_R1.fastq.gz O23112901_R1.fastq.gz A23072501_R1.fastq.gz O24030604_R1.fastq.gz O23112302_R1.fastq.gz O24010301_R1.fastq.gz O24021402_R1.fastq.gz O24011104_R1.fastq.gz O23102602_R1.fastq.gz O23102703_R1.fastq.gz O23091303_R1.fastq.gz O24011204_R1.fastq.gz A24071901_R1.fastq.gz O23102603_R1.fastq.gz O23082303_R1.fastq.gz O24011801_R1.fastq.gz O24030603_R1.fastq.gz O23092802_R1.fastq.gz O23090801_R1.fastq.gz A24030401_R1.fastq.gz O23083001_R1.fastq.gz A23060601_R1.fastq.gz O23092203_R1.fastq.gz NTC_14_R1.fastq.gz O23090602_R1.fastq.gz A23111301_R1.fastq.gz O23120103_R1.fastq.gz O24011105_R1.fastq.gz U23071701_R1.fastq.gz O23111601_R1.fastq.gz O24010501_R1.fastq.gz O23110202_R1.fastq.gz O23121501_R1.fastq.gz O24010302_R1.fastq.gz O24011207_R1.fastq.gz O23092702_R1.fastq.gz O23112206_R1.fastq.gz PC_2_R1.fastq.gz O23102503_R1.fastq.gz O23092201_R1.fastq.gz NTC_9_R1.fastq.gz A23080101_R1.fastq.gz PC_6_R1.fastq.gz O23120701_R1.fastq.gz U24101801_R1.fastq.gz A23112001_R1.fastq.gz O23120104_R1.fastq.gz O23100804_R1.fastq.gz O24031305_R1.fastq.gz O23111502_R1.fastq.gz O24020103_R1.fastq.gz O23082301_R1.fastq.gz NTC_16_R1.fastq.gz PC_8_R1.fastq.gz O24022101_R1.fastq.gz O24020102_R1.fastq.gz O23081801_R1.fastq.gz O24022902_R1.fastq.gz O23112205_R1.fastq.gz O24021603_R1.fastq.gz O23102501_R1.fastq.gz O24010403_R1.fastq.gz O24012403_R1.fastq.gz O23092003_R1.fastq.gz O24013101_R1.fastq.gz O24011802_R1.fastq.gz O24011102_R1.fastq.gz PC_4_R1.fastq.gz O23100801_R1.fastq.gz O23092101_R1.fastq.gz O24020701_R1.fastq.gz O23092801_R1.fastq.gz O24022201_R1.fastq.gz O23110901_R1.fastq.gz O23090601_R1.fastq.gz O24022301_R1.fastq.gz; do java -jar /home/jhuang/Tools/Trimmomatic-0.36/trimmomatic-0.36.jar PE -threads 16 $file ${file/_R1/_R2} ../trim_data/$file ../trimmed_unpaired/$file ../trim_data/${file/_R1/_R2} ../trimmed_unpaired/${file/_R1/_R2} ILLUMINACLIP:/home/jhuang/Tools/Trimmomatic-0.36/adapters/TruSeq3-PE-2.fa:2:30:10:8:TRUE LEADING:3 TRAILING:3 SLIDINGWINDOW:4:15 MINLEN:36 AVGQUAL:20; done 2> trimmomatic_pe.log
4.3. mkdir pandaseq.out conda activate /home/jhuang/miniconda3/envs/qiime1 for file in trim_data/*_R1.fastq.gz; do pandaseq -f ${file} -r ${file/_R1.fastq.gz/R2.fastq.gz} -l 300 -p CCTACGGGNGGCWGCAG -q GACTACHVGGGTATCTAATCC -w pandaseq.out/$(echo $file | cut -d’/’ -f2 | cut -d’‘ -f1-3)_merged.fasta >> LOG_pandaseq; done conda deactivate
4.4. prepare qiime2_metadata.tsv
4.5. run qiime feature-table summarize \ –i-table dada2_tests/test_59_f235_r245/table.qza \ –o-visualization table.qzv \ –m-sample-metadata-file qiime2_metadata.tsv
#Table summary #Metric Sample #Number of samples 137-->96 #Number of features 3,039-->21,893 #Total frequency 1,641,484-->9,246,546 # #Frequency per sample #Minimum frequency 413.0-->41,764.0 #1st quartile 10,319.0-->100,017.5 #Median frequency 11,530.0-->100,017.5 #3rd quartile 13,146.0-->110,183.5 #Maximum frequency 40,022.0-->143,563.0 #Mean frequency 11,981.635036496351-->96,318.1875 # #Frequency per feature #Minimum frequency 1.0 #1st quartile 3.0 #Median frequency 8.0-->4.0 #3rd quartile 95.5-->14.0 #Maximum frequency 56,472.0-->983,499.0 #Mean frequency 540.1395195788089-->422.35 #qiime tools peek dada2_tests/test_59_f235_r245/table.qza #qiime tools peek qiime2_metadata.tsv qiime feature-table tabulate-seqs \ --i-data dada2_tests/test_59_f235_r245/rep-seqs.qza \ --o-visualization rep-seqs.qzv qiime metadata tabulate \ --m-input-file dada2_tests/test_59_f235_r245/denoising-stats.qza \ --o-visualization denoising-stats.qzv -
Import reference sequences and taxonomy (SILVA 132)
qiime tools import \ --type 'FeatureData[Sequence]' \ --input-path /home/jhuang/REFs/SILVA_132_QIIME_release/rep_set/rep_set_16S_only/99/silva_132_99_16S.fna \ --output-path silva_132_99_otus.qza \ --input-format DNAFASTAFormat qiime tools import \ --type 'FeatureData[Taxonomy]' \ --input-format HeaderlessTSVTaxonomyFormat \ --input-path /home/jhuang/REFs/SILVA_132_QIIME_release/taxonomy/16S_only/99/consensus_taxonomy_7_levels.txt \ --output-path silva_132_99_taxonomy.qza -
Assign taxonomy
qiime feature-classifier classify-consensus-vsearch \ --i-query dada2_tests/test_59_f235_r245/rep-seqs.qza \ --i-reference-reads silva_132_99_otus.qza \ --i-reference-taxonomy silva_132_99_taxonomy.qza \ --p-perc-identity 0.97 \ --p-threads 64 \ --o-classification taxonomy.qza \ --o-search-results search-results.qza -
Visualize taxonomy
qiime taxa barplot \ --i-table dada2_tests/test_59_f235_r245/table.qza \ --i-taxonomy taxonomy.qza \ --m-metadata-file qiime2_metadata.tsv \ --o-visualization taxa-bar-plots.qzv -
Build phylogenetic tree
qiime alignment mafft \ --i-sequences dada2_tests/test_59_f235_r245/rep-seqs.qza \ --o-alignment aligned-rep-seqs.qza qiime alignment mask \ --i-alignment aligned-rep-seqs.qza \ --o-masked-alignment masked-aligned-rep-seqs.qza qiime phylogeny fasttree \ --i-alignment masked-aligned-rep-seqs.qza \ --o-tree unrooted-tree.qza # (*) The rooted-tree is generated from unrooted-tree, and will be used in the next step! qiime phylogeny midpoint-root \ --i-tree unrooted-tree.qza \ --o-rooted-tree rooted-tree.qza -
Core diversity analysis
#The -e 6389 flag sets the even sampling depth (rarefaction depth) to 6,389 reads for diversity analyses. #All samples will be rarefied to 4,753 reads. #Samples with fewer reads are excluded. qiime diversity core-metrics-phylogenetic \ --i-phylogeny rooted-tree.qza \ --i-table dada2_tests/test_59_f235_r245/table.qza \ --p-sampling-depth 6389 \ --m-metadata-file qiime2_metadata.tsv \ --output-dir core_metrics_results qiime diversity alpha \ --i-table dada2_tests/test_59_f235_r245/table.qza \ --p-metric chao1 \ --o-alpha-diversity core_metrics_results/chao1_vector.qza qiime tools export --input-path core_metrics_results/shannon_vector.qza --output-path exported_alpha/shannon qiime tools export --input-path core_metrics_results/faith_pd_vector.qza --output-path exported_alpha/faith_pd qiime tools export --input-path core_metrics_results/observed_features_vector.qza --output-path exported_alpha/observed_features qiime tools export --input-path core_metrics_results/chao1_vector.qza --output-path exported_alpha/chao1 qiime tools export \ --input-path core_metrics_results/unweighted_unifrac_distance_matrix.qza \ --output-path exported_unweighted_unifrac qiime tools export \ --input-path core_metrics_results/weighted_unifrac_distance_matrix.qza \ --output-path exported_weighted_unifrac qiime diversity beta-group-significance \ --i-distance-matrix core_metrics_results/weighted_unifrac_distance_matrix.qza \ --m-metadata-file qiime2_metadata.tsv \ --m-metadata-column Group \ --p-pairwise \ --p-method permanova \ --o-visualization beta_group_significance.qzv qiime tools export \ --input-path beta_group_significance.qzv \ --output-path exported_beta_group -
Prepare three files feeding to Phyloseq.Rmd: table.qza (see above with ), rooted-tree.qza (see above with ), qiime2_metadata_for_qza_to_phyloseq.tsv edited from qiime2_metadata.tsv.
# Rarefying can be performed here, or in Phyloseq.Rmd (default), therefore, we don't need this step any more. qiime feature-table summarize \ --i-table core_metrics_results/rarefied_table.qza \ --o-visualization rarefied_table.qzv \ --m-sample-metadata-file qiime2_metadata.tsv #Table summary #Metric Sample #Number of samples 136 #Number of features 2,781 #Total frequency 868,904 # In QIIME2, we need table.qza, not biom-file, therefore, we don't need this step any more. qiime tools export \ --input-path core_metrics_results/rarefied_table.qza \ --output-path exported_rarefied_table #--> exported_rarefied_table/feature-table.biom biom convert \ -i exported_rarefied_table/feature-table.biom \ -o exported_rarefied_table/feature-table.tsv \ --to-tsv #✅ Old QIIME 1 table with GenBank IDs (like EF603722.1.1487) as feature labels. #✅ QIIME 2 table where feature IDs are hashes (like 0b438323a296b5f2ce2c8bbe3949ee8d). # Visulaize the taxonomy.qza qiime tools export \ --input-path taxonomy.qza \ --output-path exported-taxonomy #Feature ID Taxon Confidence #0b4383... k__Bacteria; p__Proteobacteria... 0.98 #dfa833... k__Bacteria; p__Firmicutes... 0.87 #... # ---- I used the following to generate two file for feeding in the Phyloseq.Rmd ---- #-1- exported_table/feature-table.biom corresesponds to table_even6389.biom in QIIME1, but in QIIME2, we don't need biom-file, instead of table.qza. qiime tools export \ --input-path dada2_tests/test_59_f235_r245/table.qza \ --output-path exported_table #--> exported_table/feature-table.biom #-2- exported-tree/tree.nwk corresesponds to rep_set.tre in QIIME1 qiime tools export \ --input-path rooted-tree.qza \ --output-path exported-tree #--> exported-tree/tree.nwk # END # ---- The code in Phyloseq.Rmd ---- #install.packages("remotes") #remotes::install_github("jbisanz/qiime2R") #"core_metrics_results/rarefied_table.qza", rarefying performed in the code, therefore import the raw table. library(qiime2R) ps.ng.tax <- qza_to_phyloseq( features = "dada2_tests/test_59_f235_r245/table.qza", tree = "rooted-tree.qza", metadata = "qiime2_metadata_for_qza_to_phyloseq.tsv" ) # or #biom convert \ # -i ./exported_table/feature-table.biom \ # -o ./exported_table/feature-table-v1.biom \ # --to-json #ps.ng.tax <- import_biom("./exported_table/feature-table-v1.biom", treefilename="./exported-tree/tree.nwk") #Note that the alpha- and beta-diversity-files needed in Phyloseq.Rmd has been prepared in the step 9. -
Figures generated by Phyloseq.Rmd and MicrobiotaProcess_*.R
The following files can be found under server.
./Phyloseq.Rmd (Result Phyloseq.html) ./MicrobiotaProcess_cluster1_Group9-11_vs_cluster2_Group12-14_orig.R ./MicrobiotaProcess_Group1_vs_Group2.R ./MicrobiotaProcess_Group3_vs_Group4.R ./MicrobiotaProcess_PCA_Group1-4.R ./MicrobiotaProcess_PCA_Group9-14.R -
Generating relative_abundance_phylum_order_family.xls Convert the Excel file for Phylum, Order, and Family relative abundances for all samples to csv file after correcting the family due to the two redundance species “=SUM(EG10+EG18)” Edit the csv file, merge then to a file.
~/Tools/csv2xls-0.4/csv_to_xls.py relative_abundance_phylum.csv relative_abundance_order.csv relative_abundance_family.csv -d$'\t' -o relative_abundance_phylum_order_family.xls;
Main workflow using QIIME2 for Data_Childrensclinic_16S_2025
Leave a reply