Main workflow using QIIME2 for Data_Childrensclinic_16S_2025

  1. Install and test qiime2-docker

     #Cannot run under QIIME1, switch to QIIME2: pick_open_reference_otus.py -r/home/jhuang/REFs/SILVA_132_QIIME_release/rep_set/rep_set_16S_only/99/silva_132_99_16S.fna -i test.fna -o clustering_test/ -p clustering_params.txt --parallel --verbose
    
     docker pull quay.io/qiime2/core:2023.9
    
     docker run -it --rm \
     -v /mnt/md1/DATA/Data_Childrensclinic_16S_2025:/data \
     -v /home/jhuang/REFs:/home/jhuang/REFs \
     quay.io/qiime2/core:2023.9 bash
     cd /data
  2. Import the fastq-files to paired-end-demux.qza

     #https://docs.qiime2.org/2018.8/tutorials/importing/
    
     #Moving the following fastq.gz to raw_data_NOT_USED:
     643805249,Kinderklinik Lauf 2 Pl.B,B04,N720-B,CGGAGCCT,S503-B,TATCCTCT,nan,nan
     7909160377,Kinderkllinik Lauf 2 Pl.D,D11,N728-D,TGCAGCTA,S517-D,GCGTAAGA,nan,nan
     7909188256,Kinderklinik Lauf 2 Pl.C,C10,N712-C,GTAGAGGA,S516-C,CCTAGAGT,nan,nan
     O231000101,Kinderkllinik Lauf 2 Pl.D,F11,N728-D,TGCAGCTA,S520-D,AAGGCTAT,nan,nan
     O231000102,Kinderkllinik Lauf 2 Pl.D,C09,N726-D,CCTAAGAC,S516-D,CCTAGAGT,nan,nan
     O231000103,Kinderkllinik Lauf 2 Pl.D,C11,N728-D,TGCAGCTA,S516-D,CCTAGAGT,nan,nan
     O231000902,Kinderkllinik Lauf 2 Pl.D,C12,N729-D,TCGACGTC,S516-D,CCTAGAGT,nan,nan
     O23100201,Kinderklinik Lauf 2 Pl.B,D10,N727-B,CGATCAGT,S506-B,ACTGCATA,nan,nan
     O23100805,Kinderklinik Lauf 2 Pl.B,F10,N727-B,CGATCAGT,S508-B,CTAAGCCT,nan,nan
     O23100901,Kinderkllinik Lauf 2 Pl.D,F08,N724-D,ACTGAGCG,S520-D,AAGGCTAT,nan,nan
     O23100903,Kinderkllinik Lauf 2 Pl.D,B08,N724-D,ACTGAGCG,S515-D,TTCTAGCT,nan,nan
     O23100904,Kinderkllinik Lauf 2 Pl.D,C08,N724-D,ACTGAGCG,S516-D,CCTAGAGT,nan,nan
     O23100905,Kinderkllinik Lauf 2 Pl.D,D10,N727-D,CGATCAGT,S517-D,GCGTAAGA,nan,nan
     O23111503,Kinderkllinik Lauf 2 Pl.D,A12,N729-D,TCGACGTC,S513-D,TCGACTAG,nan,nan
     O23120902,Kinderklinik Lauf 2 Pl.C,C03,N703-C,AGGCAGAA,S516-C,CCTAGAGT,nan,nan
     O24022002,Kinderklinik Lauf 2 Pl.C,H12,N715-C,ATCTCAGG,S522-C,TTATGCGA,nan,nan
     U23071801,Kinderklinik Lauf 2 Pl.B,B02,N718-B,GGAGCTAC,S503-B,TATCCTCT,nan,nan
    
     rename -n 's/^([A-Z]\d+)_S\d+_L\d+_R([12])_001\.fastq\.gz$/$1_R$2.fastq.gz/' *.fastq.gz
     rename 's/^([A-Z]\d+)_S\d+_L\d+_R([12])_001\.fastq\.gz$/$1_R$2.fastq.gz/' *.fastq.gz
    
     #for file in *.fastq.gz; do echo "mv $file $(echo $file | cut -d'_' -f1 | cut -d'-' -f1-1)_$(echo $file | cut -d'_' -f4).fastq.gz"; done
     for file in *.fastq.gz; do echo "mv $file $(echo $file | cut -d'_' -f1)_$(echo $file | cut -d'_' -f4).fastq.gz"; done
     #MANUALLY correct several filename errors and performing in the generated commands above
     #MANUALLY correct several filename errors and performing in the generated commands below
    
     NTC_2
     NTC_3
     NTC_4
     NTC_5
     NTC_6
     NTC_7
     NTC_8
     NTC_9
     NTC_10
     NTC_11
     NTC_12
     NTC_13
     NTC_14
     NTC_15
     NTC_16
    
     mv NTC-1_R1.fastq.gz NTC_1_R1.fastq.gz
     mv NTC-1_R2.fastq.gz NTC_1_R2.fastq.gz
     mv NTC-2_R1.fastq.gz NTC_2_R1.fastq.gz
     mv NTC-2_R2.fastq.gz NTC_2_R2.fastq.gz
     mv NTC-3_R1.fastq.gz NTC_3_R1.fastq.gz
     mv NTC-3_R2.fastq.gz NTC_3_R2.fastq.gz
     mv NTC-4_R1.fastq.gz NTC_4_R1.fastq.gz
     mv NTC-4_R2.fastq.gz NTC_4_R2.fastq.gz
     mv NTC-5_R1.fastq.gz NTC_5_R1.fastq.gz
     mv NTC-5_R2.fastq.gz NTC_5_R2.fastq.gz
     mv NTC-6_R1.fastq.gz NTC_6_R1.fastq.gz
     mv NTC-6_R2.fastq.gz NTC_6_R2.fastq.gz
     mv NTC-7_R1.fastq.gz NTC_7_R1.fastq.gz
     mv NTC-7_R2.fastq.gz NTC_7_R2.fastq.gz
     mv NTC-8_R1.fastq.gz NTC_8_R1.fastq.gz
     mv NTC-8_R2.fastq.gz NTC_8_R2.fastq.gz
     mv NTC-9_R1.fastq.gz NTC_9_R1.fastq.gz
     mv NTC-9_R2.fastq.gz NTC_9_R2.fastq.gz
     mv NTC-10_R1.fastq.gz NTC_10_R1.fastq.gz
     mv NTC-10_R2.fastq.gz NTC_10_R2.fastq.gz
     mv NTC-11_R1.fastq.gz NTC_11_R1.fastq.gz
     mv NTC-11_R2.fastq.gz NTC_11_R2.fastq.gz
     mv NTC-12_R1.fastq.gz NTC_12_R1.fastq.gz
     mv NTC-12_R2.fastq.gz NTC_12_R2.fastq.gz
     mv NTC-13_R1.fastq.gz NTC_13_R1.fastq.gz
     mv NTC-13_R2.fastq.gz NTC_13_R2.fastq.gz
     mv NTC-14_R1.fastq.gz NTC_14_R1.fastq.gz
     mv NTC-14_R2.fastq.gz NTC_14_R2.fastq.gz
     mv NTC-15_R1.fastq.gz NTC_15_R1.fastq.gz
     mv NTC-15_R2.fastq.gz NTC_15_R2.fastq.gz
     mv NTC-16_R1.fastq.gz NTC_16_R1.fastq.gz
     mv NTC-16_R2.fastq.gz NTC_16_R2.fastq.gz
    
     PC_1
     PC_2
     PC_3
     PC_4
     PC_5
     PC_6
     PC_7
     PC_8
    
     mv PC-1_R1.fastq.gz PC_1_R1.fastq.gz
     mv PC-1_R2.fastq.gz PC_1_R2.fastq.gz
     mv PC-2_R1.fastq.gz PC_2_R1.fastq.gz
     mv PC-2_R2.fastq.gz PC_2_R2.fastq.gz
     mv PC-3_R1.fastq.gz PC_3_R1.fastq.gz
     mv PC-3_R2.fastq.gz PC_3_R2.fastq.gz
     mv PC-4_R1.fastq.gz PC_4_R1.fastq.gz
     mv PC-4_R2.fastq.gz PC_4_R2.fastq.gz
     mv PC-5_R1.fastq.gz PC_5_R1.fastq.gz
     mv PC-5_R2.fastq.gz PC_5_R2.fastq.gz
     mv PC-6_R1.fastq.gz PC_6_R1.fastq.gz
     mv PC-6_R2.fastq.gz PC_6_R2.fastq.gz
     mv PC-7_R1.fastq.gz PC_7_R1.fastq.gz
     mv PC-7_R2.fastq.gz PC_7_R2.fastq.gz
     mv PC-8_R1.fastq.gz PC_8_R1.fastq.gz
     mv PC-8_R2.fastq.gz PC_8_R2.fastq.gz
    
     #MOVE the used fastq.gz to the directory raw_data
     #PREPARE the conf-file --> "-rw-rw-r-- 1 jhuang jhuang 9,3K Nov 20 12:22  pe-33-manifest"
     qiime tools import --type 'SampleData[PairedEndSequencesWithQuality]' --input-path pe-33-manifest --output-path paired-end-demux.qza --input-format PairedEndFastqManifestPhred33
     #--> "-rw-r--r-- 1 root   root   3,5G Nov 20 12:24  paired-end-demux.qza"
    
     qiime demux summarize \
     --i-data paired-end-demux.qza \
     --o-visualization demux_pe.qzv
     #--> "-rw-r--r-- 1 root   root   320K Nov 20 12:34  demux_pe.qzv"
    
     #qiime tools view demux_pe.qzv --> Error: Visualization viewing is currently not supported in headless environments. You can view Visualizations (and Artifacts) at https://view.qiime2.org, or move the Visualization to an environment with a display and view it with `qiime tools view`.
     #Open demux_pe.gzv on https://view.qiime2.org
  3. Optimizing the parameters trunc-len-f and trunc-len-r and denoising with DADA2: optimized parameters is f240_r240

     #Your amplicon (V3–V4 region) is ~464 bp, so you need ≥20–30 bp overlap
     #464-38=426; 440 is the longst +12 nt for overlapping=we need 452 nt!
    
     #Optimize the parameters --p-trunc-len-f and --p-trunc-len-r
     (qiime2-amplicon-2023.9) root@4379fea45cf7:/data# ./dada2_batch_test.sh
    
             #!/bin/bash
    
             # Set your base inputs
             INPUT=paired-end-demux.qza
             TRIM_LEFT_F=17
             TRIM_LEFT_R=21
    
             # Output base
             OUTPUT_DIR=dada2_tests
             mkdir -p $OUTPUT_DIR
    
             # Loop over trunc-len-f and trunc-len-r combinations
             # Forward: from 220 to 260
             # Reverse: from 210 to 260
             #225 220 215
             i=1
             for f in 260 255 250 245 240 235 230 225 220; do
                     for r in 260 255 250 245 240 235 230 225 220 215 210; do
                     OUT=test_${i}_f${f}_r${r}
                     echo "Running: $OUT"
                     mkdir -p $OUTPUT_DIR/$OUT
    
                     qiime dada2 denoise-paired \
                     --i-demultiplexed-seqs $INPUT \
                     --p-trim-left-f $TRIM_LEFT_F \
                     --p-trim-left-r $TRIM_LEFT_R \
                     --p-max-ee-f 3 --p-max-ee-r 5 \
                     --p-trunc-len-f $f \
                     --p-trunc-len-r $r \
                     --p-n-threads 32 \
                     --o-table $OUTPUT_DIR/$OUT/table.qza \
                     --o-representative-sequences $OUTPUT_DIR/$OUT/rep-seqs.qza \
                     --o-denoising-stats $OUTPUT_DIR/$OUT/denoising-stats.qza \
                     --verbose > $OUTPUT_DIR/$OUT/log.txt 2>&1
    
                     ((i++))
                     done
             done
    
     for f in dada2_tests2/test_*/denoising-stats.qza; do
     qiime metadata tabulate \
         --m-input-file $f \
         --o-visualization ${f%.qza}.qzv
     done
    
     #Manually convert denoising-stats.qza to denoising-stats.tsv using https://view.qiime2.org
     #Downloads to jhuang@WS-2290C:~/DATA/Data_Childrensclinic_16S_2025/dada2_tests/test_59_f235_r245$ mv ~/Downloads/data_stats.tsv .
     #NTC01  33463   1732    5.18    1723    1650    4.93    1589    4.75
     #O23091304  54831   7477    13.64   7409    7133    13.01   7101    12.95
     #NTC01  33463   1719    5.14    1711    1648    4.92    1588    4.75
     #O23091304  54831   7419    13.53   7345    7068    12.89   7046    12.85
    
     #pandaseq.out: grep ">" A1_R1.fastq.gz_merged.fasta | wc -l #8229;  grep ">" A10_R1.fastq.gz_merged.fasta | wc -l #9165
    
     sudo chown -R jhuang:jhuang dada2_tests
     cd dada2_tests
     python3 rank_dada2_params.py
    
     # Top parameter sets by % input non-chimeric (overall):
     # test_69_f230_r250: f=230, r=250, %non-chimeric=81.75, median%=84.57, merged%=83.86, samples_nonchim=253/253
     # * (CHOOSEN) test_59_f235_r245: f=235, r=245, %non-chimeric=81.57, median%=84.46, merged%=83.79, samples_nonchim=253/253
     # test_58_f235_r250: f=235, r=250, %non-chimeric=81.49, median%=84.35, merged%=83.61, samples_nonchim=253/253
     # test_49_f240_r240: f=240, r=240, %non-chimeric=81.30, median%=84.02, merged%=83.46, samples_nonchim=253/253
    
     # test_48_f240_r245: f=240, r=245, %non-chimeric=81.21, median%=83.95, merged%=83.33, samples_nonchim=253/253
     # test_47_f240_r250: f=240, r=250, %non-chimeric=81.12, median%=83.94, merged%=83.19, samples_nonchim=253/253
     # test_39_f245_r235: f=245, r=235, %non-chimeric=80.84, median%=83.59, merged%=82.98, samples_nonchim=253/253
     # test_38_f245_r240: f=245, r=240, %non-chimeric=80.78, median%=83.52, merged%=82.91, samples_nonchim=253/253
     # test_37_f245_r245: f=245, r=245, %non-chimeric=80.72, median%=83.36, merged%=82.81, samples_nonchim=253/253
     # test_36_f245_r250: f=245, r=250, %non-chimeric=80.62, median%=83.41, merged%=82.68, samples_nonchim=253/253
  4. Visualize outputs (Using pandaseq.out, since qiime2_metadata.tsv contains the pathway of pandaseq.out)

    4.1. mkdir fastqc_out fastqc -t 4 raw_data/* -o fastqc_out/ 4.2. mkdir trim_data trimmed_unpaired cd raw_data for file in O24021602_R1.fastq.gz O24021601_R1.fastq.gz O23100705_R1.fastq.gz PC_5_R1.fastq.gz O24010402_R1.fastq.gz U23090801_R1.fastq.gz O24012402_R1.fastq.gz NTC_7_R1.fastq.gz O23121301_R1.fastq.gz O24013104_R1.fastq.gz U25020701_R1.fastq.gz O23100401_R1.fastq.gz O24020901_R1.fastq.gz NTC_1_R1.fastq.gz O24020903_R1.fastq.gz O23092002_R1.fastq.gz UR009768_R1.fastq.gz O23100402_R1.fastq.gz O23091304_R1.fastq.gz O23100703_R1.fastq.gz O23110904_R1.fastq.gz O23110903_R1.fastq.gz O23121304_R1.fastq.gz O23083101_R1.fastq.gz O23100502_R1.fastq.gz O23110301_R1.fastq.gz O24020805_R1.fastq.gz A23051102_R1.fastq.gz O23102604_R1.fastq.gz A23051103_R1.fastq.gz O23102504_R1.fastq.gz O24011205_R1.fastq.gz O23111602_R1.fastq.gz O24020905_R1.fastq.gz O23090701_R1.fastq.gz O24020702_R1.fastq.gz NTC_4_R1.fastq.gz A24072901_R1.fastq.gz NTC_10_R1.fastq.gz O23100704_R1.fastq.gz NTC_8_R1.fastq.gz O24011901_R1.fastq.gz O23111501_R1.fastq.gz O23112303_R1.fastq.gz O23112902_R1.fastq.gz O24011203_R1.fastq.gz O23092006_R1.fastq.gz O24010404_R1.fastq.gz O23090803_R1.fastq.gz O23091501_R1.fastq.gz U25022101_R1.fastq.gz O23121305_R1.fastq.gz U24080201_R1.fastq.gz U24111801_R1.fastq.gz A24040201_R1.fastq.gz O23120702_R1.fastq.gz NTC_5_R1.fastq.gz O24013105_R1.fastq.gz O23100706_R1.fastq.gz O23083102_R1.fastq.gz O23091305_R1.fastq.gz PC_7_R1.fastq.gz O24011005_R1.fastq.gz PC01_R1.fastq.gz O24011004_R1.fastq.gz O23110902_R1.fastq.gz O24020904_R1.fastq.gz A24071701_R1.fastq.gz O24021501_R1.fastq.gz O23120101_R1.fastq.gz U25011702_R1.fastq.gz O23092103_R1.fastq.gz UR009909_R1.fastq.gz O23091403_R1.fastq.gz NTC_2_R1.fastq.gz PC_1_R1.fastq.gz O24013102_R1.fastq.gz O23102704_R1.fastq.gz O24010401_R1.fastq.gz O24012501_R1.fastq.gz U24121801_R1.fastq.gz O24031304_R1.fastq.gz O23090802_R1.fastq.gz NTC_12_R1.fastq.gz O23100702_R1.fastq.gz O24011202_R1.fastq.gz O23100601_R1.fastq.gz O23100802_R1.fastq.gz O24021404_R1.fastq.gz O23112301_R1.fastq.gz O24011103_R1.fastq.gz O24030601_R1.fastq.gz U23052201_R1.fastq.gz O23100803_R1.fastq.gz O23102502_R1.fastq.gz O24011206_R1.fastq.gz O23091401_R1.fastq.gz O23092001_R1.fastq.gz O24020801_R1.fastq.gz O23082302_R1.fastq.gz O23112204_R1.fastq.gz O23100701_R1.fastq.gz O23112304_R1.fastq.gz A24031201_R1.fastq.gz O24022801_R1.fastq.gz PC_3_R1.fastq.gz O24022901_R1.fastq.gz O23092005_R1.fastq.gz O23121502_R1.fastq.gz O24021401_R1.fastq.gz U25011701_R1.fastq.gz NTC01_R1.fastq.gz O23082401_R1.fastq.gz O24022202_R1.fastq.gz NTC_6_R1.fastq.gz O24031302_R1.fastq.gz NTC_15_R1.fastq.gz NTC_13_R1.fastq.gz O24022302_R1.fastq.gz O23121303_R1.fastq.gz O24021502_R1.fastq.gz O24020704_R1.fastq.gz O23092202_R1.fastq.gz A23112002_R1.fastq.gz O23092803_R1.fastq.gz O23112203_R1.fastq.gz NTC_11_R1.fastq.gz O23091302_R1.fastq.gz A24062801_R1.fastq.gz O23121302_R1.fastq.gz O23092104_R1.fastq.gz O24020802_R1.fastq.gz O24021403_R1.fastq.gz O23112201_R1.fastq.gz O23082402_R1.fastq.gz U24070401_R1.fastq.gz O24020902_R1.fastq.gz O24030602_R1.fastq.gz NTC_3_R1.fastq.gz O24011201_R1.fastq.gz O23091301_R1.fastq.gz O23112202_R1.fastq.gz O24021503_R1.fastq.gz O23102601_R1.fastq.gz O24011002_R1.fastq.gz O23121503_R1.fastq.gz O23092004_R1.fastq.gz O23091402_R1.fastq.gz O23092701_R1.fastq.gz O24031301_R1.fastq.gz O24020703_R1.fastq.gz O24013103_R1.fastq.gz A24080201_R1.fastq.gz U23071201_R1.fastq.gz U23052401_R1.fastq.gz U23071901_R1.fastq.gz O24011101_R1.fastq.gz U23091101_R1.fastq.gz O23110204_R1.fastq.gz O23102702_R1.fastq.gz O24020101_R1.fastq.gz O24012401_R1.fastq.gz O23100501_R1.fastq.gz O24020705_R1.fastq.gz A23060602_R1.fastq.gz O23102701_R1.fastq.gz O24011001_R1.fastq.gz O24022102_R1.fastq.gz O23110302_R1.fastq.gz O23100503_R1.fastq.gz O24020803_R1.fastq.gz O23110203_R1.fastq.gz A24030402_R1.fastq.gz O23092102_R1.fastq.gz O24011003_R1.fastq.gz O23121306_R1.fastq.gz O23110101_R1.fastq.gz O24020804_R1.fastq.gz O23112901_R1.fastq.gz A23072501_R1.fastq.gz O24030604_R1.fastq.gz O23112302_R1.fastq.gz O24010301_R1.fastq.gz O24021402_R1.fastq.gz O24011104_R1.fastq.gz O23102602_R1.fastq.gz O23102703_R1.fastq.gz O23091303_R1.fastq.gz O24011204_R1.fastq.gz A24071901_R1.fastq.gz O23102603_R1.fastq.gz O23082303_R1.fastq.gz O24011801_R1.fastq.gz O24030603_R1.fastq.gz O23092802_R1.fastq.gz O23090801_R1.fastq.gz A24030401_R1.fastq.gz O23083001_R1.fastq.gz A23060601_R1.fastq.gz O23092203_R1.fastq.gz NTC_14_R1.fastq.gz O23090602_R1.fastq.gz A23111301_R1.fastq.gz O23120103_R1.fastq.gz O24011105_R1.fastq.gz U23071701_R1.fastq.gz O23111601_R1.fastq.gz O24010501_R1.fastq.gz O23110202_R1.fastq.gz O23121501_R1.fastq.gz O24010302_R1.fastq.gz O24011207_R1.fastq.gz O23092702_R1.fastq.gz O23112206_R1.fastq.gz PC_2_R1.fastq.gz O23102503_R1.fastq.gz O23092201_R1.fastq.gz NTC_9_R1.fastq.gz A23080101_R1.fastq.gz PC_6_R1.fastq.gz O23120701_R1.fastq.gz U24101801_R1.fastq.gz A23112001_R1.fastq.gz O23120104_R1.fastq.gz O23100804_R1.fastq.gz O24031305_R1.fastq.gz O23111502_R1.fastq.gz O24020103_R1.fastq.gz O23082301_R1.fastq.gz NTC_16_R1.fastq.gz PC_8_R1.fastq.gz O24022101_R1.fastq.gz O24020102_R1.fastq.gz O23081801_R1.fastq.gz O24022902_R1.fastq.gz O23112205_R1.fastq.gz O24021603_R1.fastq.gz O23102501_R1.fastq.gz O24010403_R1.fastq.gz O24012403_R1.fastq.gz O23092003_R1.fastq.gz O24013101_R1.fastq.gz O24011802_R1.fastq.gz O24011102_R1.fastq.gz PC_4_R1.fastq.gz O23100801_R1.fastq.gz O23092101_R1.fastq.gz O24020701_R1.fastq.gz O23092801_R1.fastq.gz O24022201_R1.fastq.gz O23110901_R1.fastq.gz O23090601_R1.fastq.gz O24022301_R1.fastq.gz; do java -jar /home/jhuang/Tools/Trimmomatic-0.36/trimmomatic-0.36.jar PE -threads 16 $file ${file/_R1/_R2} ../trim_data/$file ../trimmed_unpaired/$file ../trim_data/${file/_R1/_R2} ../trimmed_unpaired/${file/_R1/_R2} ILLUMINACLIP:/home/jhuang/Tools/Trimmomatic-0.36/adapters/TruSeq3-PE-2.fa:2:30:10:8:TRUE LEADING:3 TRAILING:3 SLIDINGWINDOW:4:15 MINLEN:36 AVGQUAL:20; done 2> trimmomatic_pe.log

    4.3. mkdir pandaseq.out conda activate /home/jhuang/miniconda3/envs/qiime1 for file in trim_data/*_R1.fastq.gz; do pandaseq -f ${file} -r ${file/_R1.fastq.gz/R2.fastq.gz} -l 300 -p CCTACGGGNGGCWGCAG -q GACTACHVGGGTATCTAATCC -w pandaseq.out/$(echo $file | cut -d’/’ -f2 | cut -d’‘ -f1-3)_merged.fasta >> LOG_pandaseq; done conda deactivate

    4.4. prepare qiime2_metadata.tsv

    4.5. run qiime feature-table summarize \ –i-table dada2_tests/test_59_f235_r245/table.qza \ –o-visualization table.qzv \ –m-sample-metadata-file qiime2_metadata.tsv

     #Table summary
     #Metric Sample
     #Number of samples  137-->96
     #Number of features 3,039-->21,893
     #Total frequency    1,641,484-->9,246,546
     #
     #Frequency per sample
     #Minimum frequency  413.0-->41,764.0
     #1st quartile   10,319.0-->100,017.5
     #Median frequency   11,530.0-->100,017.5
     #3rd quartile   13,146.0-->110,183.5
     #Maximum frequency  40,022.0-->143,563.0
     #Mean frequency 11,981.635036496351-->96,318.1875
     #
     #Frequency per feature
     #Minimum frequency  1.0
     #1st quartile   3.0
     #Median frequency   8.0-->4.0
     #3rd quartile   95.5-->14.0
     #Maximum frequency  56,472.0-->983,499.0
     #Mean frequency 540.1395195788089-->422.35
    
     #qiime tools peek dada2_tests/test_59_f235_r245/table.qza
     #qiime tools peek qiime2_metadata.tsv
    
     qiime feature-table tabulate-seqs \
     --i-data dada2_tests/test_59_f235_r245/rep-seqs.qza \
     --o-visualization rep-seqs.qzv
    
     qiime metadata tabulate \
     --m-input-file dada2_tests/test_59_f235_r245/denoising-stats.qza \
     --o-visualization denoising-stats.qzv
  5. Import reference sequences and taxonomy (SILVA 132)

     qiime tools import \
     --type 'FeatureData[Sequence]' \
     --input-path /home/jhuang/REFs/SILVA_132_QIIME_release/rep_set/rep_set_16S_only/99/silva_132_99_16S.fna \
     --output-path silva_132_99_otus.qza \
     --input-format DNAFASTAFormat
    
     qiime tools import \
     --type 'FeatureData[Taxonomy]' \
     --input-format HeaderlessTSVTaxonomyFormat \
     --input-path /home/jhuang/REFs/SILVA_132_QIIME_release/taxonomy/16S_only/99/consensus_taxonomy_7_levels.txt \
     --output-path silva_132_99_taxonomy.qza
  6. Assign taxonomy

     qiime feature-classifier classify-consensus-vsearch \
     --i-query  dada2_tests/test_59_f235_r245/rep-seqs.qza \
     --i-reference-reads silva_132_99_otus.qza \
     --i-reference-taxonomy silva_132_99_taxonomy.qza \
     --p-perc-identity 0.97 \
     --p-threads 64 \
     --o-classification taxonomy.qza \
     --o-search-results search-results.qza
  7. Visualize taxonomy

     qiime taxa barplot \
     --i-table  dada2_tests/test_59_f235_r245/table.qza \
     --i-taxonomy taxonomy.qza \
     --m-metadata-file qiime2_metadata.tsv \
     --o-visualization taxa-bar-plots.qzv
  8. Build phylogenetic tree

     qiime alignment mafft \
     --i-sequences  dada2_tests/test_59_f235_r245/rep-seqs.qza \
     --o-alignment aligned-rep-seqs.qza
    
     qiime alignment mask \
     --i-alignment aligned-rep-seqs.qza \
     --o-masked-alignment masked-aligned-rep-seqs.qza
    
     qiime phylogeny fasttree \
     --i-alignment masked-aligned-rep-seqs.qza \
     --o-tree unrooted-tree.qza
    
     # (*) The rooted-tree is generated from unrooted-tree, and will be used in the next step!
     qiime phylogeny midpoint-root \
     --i-tree unrooted-tree.qza \
     --o-rooted-tree rooted-tree.qza
  9. Core diversity analysis

     #The -e 6389 flag sets the even sampling depth (rarefaction depth) to 6,389 reads for diversity analyses.
     #All samples will be rarefied to 4,753 reads.
     #Samples with fewer reads are excluded.
     qiime diversity core-metrics-phylogenetic \
     --i-phylogeny rooted-tree.qza \
     --i-table  dada2_tests/test_59_f235_r245/table.qza \
     --p-sampling-depth 6389 \
     --m-metadata-file qiime2_metadata.tsv \
     --output-dir core_metrics_results
    
     qiime diversity alpha \
     --i-table dada2_tests/test_59_f235_r245/table.qza \
     --p-metric chao1 \
     --o-alpha-diversity core_metrics_results/chao1_vector.qza
    
     qiime tools export --input-path core_metrics_results/shannon_vector.qza --output-path exported_alpha/shannon
     qiime tools export --input-path core_metrics_results/faith_pd_vector.qza --output-path exported_alpha/faith_pd
     qiime tools export --input-path core_metrics_results/observed_features_vector.qza --output-path exported_alpha/observed_features
     qiime tools export --input-path core_metrics_results/chao1_vector.qza --output-path exported_alpha/chao1
    
     qiime tools export \
     --input-path core_metrics_results/unweighted_unifrac_distance_matrix.qza \
     --output-path exported_unweighted_unifrac
     qiime tools export \
     --input-path core_metrics_results/weighted_unifrac_distance_matrix.qza \
     --output-path exported_weighted_unifrac
    
     qiime diversity beta-group-significance \
     --i-distance-matrix core_metrics_results/weighted_unifrac_distance_matrix.qza \
     --m-metadata-file qiime2_metadata.tsv \
     --m-metadata-column Group \
     --p-pairwise \
     --p-method permanova \
     --o-visualization beta_group_significance.qzv
    
     qiime tools export \
     --input-path beta_group_significance.qzv \
     --output-path exported_beta_group
  10. Prepare three files feeding to Phyloseq.Rmd: table.qza (see above with ), rooted-tree.qza (see above with ), qiime2_metadata_for_qza_to_phyloseq.tsv edited from qiime2_metadata.tsv.

     # Rarefying can be performed here, or in Phyloseq.Rmd (default), therefore, we don't need this step any more.
     qiime feature-table summarize \
     --i-table core_metrics_results/rarefied_table.qza \
     --o-visualization rarefied_table.qzv \
     --m-sample-metadata-file qiime2_metadata.tsv
    
     #Table summary
     #Metric Sample
     #Number of samples  136
     #Number of features 2,781
     #Total frequency    868,904
    
     # In QIIME2, we need table.qza, not biom-file, therefore, we don't need this step any more.
     qiime tools export \
     --input-path core_metrics_results/rarefied_table.qza \
     --output-path exported_rarefied_table
     #--> exported_rarefied_table/feature-table.biom
    
     biom convert \
     -i exported_rarefied_table/feature-table.biom \
     -o exported_rarefied_table/feature-table.tsv \
     --to-tsv
    
     #✅ Old QIIME 1 table with GenBank IDs (like EF603722.1.1487) as feature labels.
     #✅ QIIME 2 table where feature IDs are hashes (like 0b438323a296b5f2ce2c8bbe3949ee8d).
    
     # Visulaize the taxonomy.qza
     qiime tools export \
     --input-path taxonomy.qza \
     --output-path exported-taxonomy
    
     #Feature ID    Taxon                               Confidence
     #0b4383...     k__Bacteria; p__Proteobacteria...   0.98
     #dfa833...     k__Bacteria; p__Firmicutes...       0.87
     #...
    
     # ---- I used the following to generate two file for feeding in the Phyloseq.Rmd ----
    
     #-1- exported_table/feature-table.biom corresesponds to table_even6389.biom in QIIME1, but in QIIME2, we don't need biom-file, instead of table.qza.
    
     qiime tools export \
     --input-path  dada2_tests/test_59_f235_r245/table.qza \
     --output-path exported_table
     #--> exported_table/feature-table.biom
    
     #-2- exported-tree/tree.nwk corresesponds to rep_set.tre in QIIME1
    
     qiime tools export \
     --input-path rooted-tree.qza \
     --output-path exported-tree
     #--> exported-tree/tree.nwk
    
     # END
    
     # ---- The code in Phyloseq.Rmd ----
    
     #install.packages("remotes")
     #remotes::install_github("jbisanz/qiime2R")
     #"core_metrics_results/rarefied_table.qza", rarefying performed in the code, therefore import the raw table.
     library(qiime2R)
     ps.ng.tax <- qza_to_phyloseq(
         features =  "dada2_tests/test_59_f235_r245/table.qza",
         tree = "rooted-tree.qza",
         metadata = "qiime2_metadata_for_qza_to_phyloseq.tsv"
     )
     # or
     #biom convert \
     #      -i ./exported_table/feature-table.biom \
     #      -o ./exported_table/feature-table-v1.biom \
     #      --to-json
     #ps.ng.tax <- import_biom("./exported_table/feature-table-v1.biom", treefilename="./exported-tree/tree.nwk")
    
     #Note that the alpha- and beta-diversity-files needed in Phyloseq.Rmd has been prepared in the step 9.
  11. Figures generated by Phyloseq.Rmd and MicrobiotaProcess_*.R

    The following files can be found under server.

     ./Phyloseq.Rmd (Result Phyloseq.html)
     ./MicrobiotaProcess_cluster1_Group9-11_vs_cluster2_Group12-14_orig.R
     ./MicrobiotaProcess_Group1_vs_Group2.R
     ./MicrobiotaProcess_Group3_vs_Group4.R
     ./MicrobiotaProcess_PCA_Group1-4.R
     ./MicrobiotaProcess_PCA_Group9-14.R
  12. Generating relative_abundance_phylum_order_family.xls Convert the Excel file for Phylum, Order, and Family relative abundances for all samples to csv file after correcting the family due to the two redundance species “=SUM(EG10+EG18)” Edit the csv file, merge then to a file.

     ~/Tools/csv2xls-0.4/csv_to_xls.py relative_abundance_phylum.csv relative_abundance_order.csv relative_abundance_family.csv -d$'\t' -o relative_abundance_phylum_order_family.xls;

Leave a Reply

Your email address will not be published. Required fields are marked *