-
Install and test qiime2-docker
#Cannot run under QIIME1, switch to QIIME2: pick_open_reference_otus.py -r/home/jhuang/REFs/SILVA_132_QIIME_release/rep_set/rep_set_16S_only/99/silva_132_99_16S.fna -i test.fna -o clustering_test/ -p clustering_params.txt --parallel --verbose docker pull quay.io/qiime2/core:2023.9 docker run -it --rm \ -v /mnt/md1/DATA/Data_Marius_16S_2025:/data \ -v /home/jhuang/REFs:/home/jhuang/REFs \ quay.io/qiime2/core:2023.9 bash cd /data
-
Import the fastq-files to paired-end-demux.qza
#https://docs.qiime2.org/2018.8/tutorials/importing/ qiime tools import --type 'SampleData[PairedEndSequencesWithQuality]' --input-path pe-33-manifest --output-path paired-end-demux.qza --input-format PairedEndFastqManifestPhred33 #--> 1095204304 Mai 27 15:11 paired-end-demux.qza qiime demux summarize \ --i-data paired-end-demux.qza \ --o-visualization demux_pe.qzv #https://view.qiime2.org #qiime tools view demux_pe.qzv
-
Optimizing the parameters trunc-len-f and trunc-len-r and denoising with DADA2: optimized parameters is f240_r240
#Your amplicon (V3–V4 region) is ~464 bp, so you need ≥20–30 bp overlap #464-38=426; 440 is the longst +12 nt for overlapping=we need 452 nt! #Optimize the parameters --p-trunc-len-f and --p-trunc-len-r ./dada2_batch_test.sh #!/bin/bash # Set your base inputs INPUT=paired-end-demux.qza TRIM_LEFT_F=17 TRIM_LEFT_R=21 # Output base OUTPUT_DIR=dada2_tests mkdir -p $OUTPUT_DIR # Loop over trunc-len-f and trunc-len-r combinations # Forward: from 220 to 240 # Reverse: from 210 to 230 i=1 for f in 240 235 230 225; do for r in 225 220 215; do OUT=test_${i}_f${f}_r${r} echo "Running: $OUT" mkdir -p $OUTPUT_DIR/$OUT qiime dada2 denoise-paired \ --i-demultiplexed-seqs $INPUT \ --p-trim-left-f $TRIM_LEFT_F \ --p-trim-left-r $TRIM_LEFT_R \ --p-max-ee-f 3 --p-max-ee-r 5 \ --p-trunc-len-f $f \ --p-trunc-len-r $r \ --p-n-threads 32 \ --o-table $OUTPUT_DIR/$OUT/table.qza \ --o-representative-sequences $OUTPUT_DIR/$OUT/rep-seqs.qza \ --o-denoising-stats $OUTPUT_DIR/$OUT/denoising-stats.qza \ --verbose > $OUTPUT_DIR/$OUT/log.txt 2>&1 ((i++)) done done for f in dada2_tests2/test_*/denoising-stats.qza; do qiime metadata tabulate \ --m-input-file $f \ --o-visualization ${f%.qza}.qzv done #pandaseq.out: grep ">" A1_R1.fastq.gz_merged.fasta | wc -l #8229; grep ">" A10_R1.fastq.gz_merged.fasta | wc -l #9165 # The best choice is f251_r251, since the first 17 and 21 bases with bad quality are anyway removed! #f251_r251: sample-A1 18299 10989 60.05 10609 7129 38.96 6535 35.71; sample-A10 18736 12249 65.38 11778 7444 39.73 6978 37.24 #f251_r250: sample-A1 18299 11855 64.78 11435 7431 40.61 6823 37.29; sample-A10 18736 13092 69.88 12590 7714 41.17 7206 38.46 #f251_r245: sample-A1 18299 12642 69.09 12180 7860 42.95 7193 39.31; sample-A10 18736 13981 74.62 13457 8218 43.86 7649 40.83 #f251_r240: sample-A1 18299 12678 69.28 12214 8060 44.05 7387 40.37; sample-A10 18736 14018 74.82 13498 8412 44.9 7758 41.41 #f250_r240: sample-A1 18299 13705 74.89 13191 8796 48.07 7984 43.63 #f250_r235: sample-A1 18299 13716 74.95 13198 8793 48.05 7969 43.55 #f250_r230: sample-A1 18299 13739 75.08 13217 9023 49.31 8113 44.34; sample-A10 18736 14838 79.2 14159 8895 47.48 8101 43.24 #f245_r240: sample-A1 18299 14513 79.31 14019 9472 51.76 8739 47.76; sample-A10 18736 15609 83.31 15102 9605 51.26 8880 47.4 #f245_r235: sample-A1 18299 14524 79.37 14026 9485 51.83 8746 47.79; sample-A10 18736 15634 83.44 15127 9685 51.69 8869 47.34 #f245_r230: sample-A1 18299 14547 79.5 14045 8845 48.34 8058 44.04; sample-A10 18736 15664 83.6 15156 8812 47.03 7999 42.69 #f240_r240: sample-A1 18299 14647 80.04 14164 9869 53.93 8932 48.81; sample-A10 18736 15728 83.95 15213 10488 55.98 9081 48.47 * #f240_r235: sample-A1 18299 14661 80.12 14172 9125 49.87 8194 44.78; sample-A10 18736 15755 84.09 15242 9579 51.13 8105 43.26 #f240_r230: sample-A1 18299 14686 80.26 14191 4952 27.06 4666 25.5; sample-A10 18736 15785 84.25 15267 3489 18.62 3341 17.83 #f240_r225: sample-A1 18299 14701 80.34 14206 4575 25 4297 23.48 #f240_r220: sample-A1 18299 14720 80.44 14223 4588 25.07 4310 23.55 #f240_r225: sample-A1 18299 14747 80.59 14250 3976 21.73 3758 20.54 #f230_r220: sample-A1 18299 14972 81.82 14514 3 0.02 3 0.02 #The output of the optimized denoising: (*) dada2_tests2/test_7_f240_r240/table.qza and dada2_tests2/test_7_f240_r240/rep-seqs.qza.
-
Visualize outputs
qiime feature-table summarize \ --i-table dada2_tests2/test_7_f240_r240/table.qza \ --o-visualization table.qzv \ --m-sample-metadata-file qiime2_metadata.tsv #Table summary #Metric Sample #Number of samples 137 #Number of features 3,039 #Total frequency 1,641,484 # #Frequency per sample #Minimum frequency 413.0 #1st quartile 10,319.0 #Median frequency 11,530.0 #3rd quartile 13,146.0 #Maximum frequency 40,022.0 #Mean frequency 11,981.635036496351 # #Frequency per feature #Minimum frequency 1.0 #1st quartile 3.0 #Median frequency 8.0 #3rd quartile 95.5 #Maximum frequency 56,472.0 #Mean frequency 540.1395195788089 #qiime tools peek table.qza #qiime tools peek qiime2_metadata.tsv qiime feature-table tabulate-seqs \ --i-data dada2_tests2/test_7_f240_r240/rep-seqs.qza \ --o-visualization rep-seqs.qzv qiime metadata tabulate \ --m-input-file dada2_tests2/test_7_f240_r240/denoising-stats.qza \ --o-visualization denoising-stats.qzv
-
Import reference sequences and taxonomy (SILVA 132)
qiime tools import \ --type 'FeatureData[Sequence]' \ --input-path /home/jhuang/REFs/SILVA_132_QIIME_release/rep_set/rep_set_16S_only/99/silva_132_99_16S.fna \ --output-path silva_132_99_otus.qza \ --input-format DNAFASTAFormat qiime tools import \ --type 'FeatureData[Taxonomy]' \ --input-format HeaderlessTSVTaxonomyFormat \ --input-path /home/jhuang/REFs/SILVA_132_QIIME_release/taxonomy/16S_only/99/consensus_taxonomy_7_levels.txt \ --output-path silva_132_99_taxonomy.qza
-
Assign taxonomy
qiime feature-classifier classify-consensus-vsearch \ --i-query dada2_tests2/test_7_f240_r240/rep-seqs.qza \ --i-reference-reads silva_132_99_otus.qza \ --i-reference-taxonomy silva_132_99_taxonomy.qza \ --p-perc-identity 0.97 \ --p-threads 64 \ --o-classification taxonomy.qza \ --o-search-results search-results.qza
-
Visualize taxonomy
qiime taxa barplot \ --i-table dada2_tests2/test_7_f240_r240/table.qza \ --i-taxonomy taxonomy.qza \ --m-metadata-file qiime2_metadata.tsv \ --o-visualization taxa-bar-plots.qzv
-
Build phylogenetic tree
qiime alignment mafft \ --i-sequences dada2_tests2/test_7_f240_r240/rep-seqs.qza \ --o-alignment aligned-rep-seqs.qza qiime alignment mask \ --i-alignment aligned-rep-seqs.qza \ --o-masked-alignment masked-aligned-rep-seqs.qza qiime phylogeny fasttree \ --i-alignment masked-aligned-rep-seqs.qza \ --o-tree unrooted-tree.qza (*) qiime phylogeny midpoint-root \ --i-tree unrooted-tree.qza \ --o-rooted-tree rooted-tree.qza
-
Core diversity analysis
#The -e 6389 flag sets the even sampling depth (rarefaction depth) to 6,389 reads for diversity analyses. #All samples will be rarefied to 4,753 reads. #Samples with fewer reads are excluded. qiime diversity core-metrics-phylogenetic \ --i-phylogeny rooted-tree.qza \ --i-table dada2_tests2/test_7_f240_r240/table.qza \ --p-sampling-depth 6389 \ --m-metadata-file qiime2_metadata.tsv \ --output-dir core_metrics_results qiime diversity alpha \ --i-table table.qza \ --p-metric chao1 \ --o-alpha-diversity core_metrics_results/chao1_vector.qza qiime tools export --input-path core_metrics_results/shannon_vector.qza --output-path exported_alpha/shannon qiime tools export --input-path core_metrics_results/faith_pd_vector.qza --output-path exported_alpha/faith_pd qiime tools export --input-path core_metrics_results/observed_features_vector.qza --output-path exported_alpha/observed_features qiime tools export --input-path core_metrics_results/chao1_vector.qza --output-path exported_alpha/chao1 qiime tools export \ --input-path core_metrics_results/unweighted_unifrac_distance_matrix.qza \ --output-path exported_unweighted_unifrac qiime tools export \ --input-path core_metrics_results/weighted_unifrac_distance_matrix.qza \ --output-path exported_weighted_unifrac qiime diversity beta-group-significance \ --i-distance-matrix core_metrics_results/weighted_unifrac_distance_matrix.qza \ --m-metadata-file qiime2_metadata.tsv \ --m-metadata-column Group \ --p-pairwise \ --p-method permanova \ --o-visualization beta_group_significance.qzv qiime tools export \ --input-path beta_group_significance.qzv \ --output-path exported_beta_group #✅ Group 1 / Group 2 — The pairwise comparisons. #✅ Sample size — Number of samples used in the test. #✅ Permutations — Number of permutations in the PERMANOVA test. #✅ pseudo-F — The test statistic from PERMANOVA. #✅ p-value — The unadjusted p-value. #✅ q-value — The adjusted p-value (Bonferroni in QIIME2). #The q-value column (also sometimes called p-adj) is the multiple-testing corrected p-value. #👉 q < 0.05 means the difference is statistically significant between those two groups, even after correction. #“There is a significant difference in community composition between Group 1 and Group 2 (p=0.002).” #The --p-sampling-depth 6389 parameter is directly equivalent to QIIME 1’s -e 6389! #The QIIME 2 command will compute: #✅ Alpha diversity metrics (Observed OTUs, Shannon, Faith PD, Evenness) #✅ Beta diversity distance matrices (UniFrac, Bray-Curtis) #✅ PCoA plots #✅ Excludes samples with fewer than 6,389 reads. #📦 Output Folders and Files #Output Description #table.qzv Visual of feature table (sample counts) #rep-seqs.qzv Sequences per ASV #denoising-stats.qzv DADA2 read tracking #taxonomy.qza/.qzv Taxonomic classification of ASVs #taxa-bar-plots.qzv Interactive bar plots #core_metrics_results/ Alpha/beta diversity metrics + PCoA plots
-
Prepare three files feeding to Phyloseq.Rmd: table.qza (see above with ), rooted-tree.qza (see above with ), qiime2_metadata_for_qza_to_phyloseq.tsv edited from qiime2_metadata.tsv.
# Rarefying can be performed here, or in Phyloseq.Rmd (default), therefore, we don't need this step any more. qiime feature-table summarize \ --i-table core_metrics_results/rarefied_table.qza \ --o-visualization rarefied_table.qzv \ --m-sample-metadata-file qiime2_metadata.tsv #Table summary #Metric Sample #Number of samples 136 #Number of features 2,781 #Total frequency 868,904 # In QIIME2, we need table.qza, not biom-file, therefore, we don't need this step any more. qiime tools export \ --input-path core_metrics_results/rarefied_table.qza \ --output-path exported_rarefied_table #--> exported_rarefied_table/feature-table.biom biom convert \ -i exported_rarefied_table/feature-table.biom \ -o exported_rarefied_table/feature-table.tsv \ --to-tsv #✅ Old QIIME 1 table with GenBank IDs (like EF603722.1.1487) as feature labels. #✅ QIIME 2 table where feature IDs are hashes (like 0b438323a296b5f2ce2c8bbe3949ee8d). # Visulaize the taxonomy.qza qiime tools export \ --input-path taxonomy.qza \ --output-path exported-taxonomy #Feature ID Taxon Confidence #0b4383... k__Bacteria; p__Proteobacteria... 0.98 #dfa833... k__Bacteria; p__Firmicutes... 0.87 #... # ---- I used the following to generate two file for feeding in the Phyloseq.Rmd ---- #-1- exported_table/feature-table.biom corresesponds to table_even6389.biom in QIIME1, but in QIIME2, we don't need biom-file, instead of table.qza. qiime tools export \ --input-path dada2_tests2/test_7_f240_r240/table.qza \ --output-path exported_table #--> exported_table/feature-table.biom #-2- exported-tree/tree.nwk corresesponds to rep_set.tre in QIIME1 qiime tools export \ --input-path rooted-tree.qza \ --output-path exported-tree #--> exported-tree/tree.nwk # ---- The code in Phyloseq.Rmd ---- #install.packages("remotes") #remotes::install_github("jbisanz/qiime2R") #"core_metrics_results/rarefied_table.qza", rarefying performed in the code, therefore import the raw table. library(qiime2R) ps.ng.tax <- qza_to_phyloseq( features = "dada2_tests2/test_7_f240_r240/table.qza", tree = "rooted-tree.qza", metadata = "qiime2_metadata_for_qza_to_phyloseq.tsv" ) # or #biom convert \ # -i ./exported_table/feature-table.biom \ # -o ./exported_table/feature-table-v1.biom \ # --to-json #ps.ng.tax <- import_biom("./exported_table/feature-table-v1.biom", treefilename="./exported-tree/tree.nwk") #Note that the alpha- and beta-diversity-files needed in Phyloseq.Rmd has been prepared in the step 9.
-
Figures generated by Phyloseq.Rmd and MicrobiotaProcess_*.R
The following files can be found under server.
./Phyloseq.Rmd (Result Phyloseq.html) ./MicrobiotaProcess_cluster1_Group9-11_vs_cluster2_Group12-14_orig.R ./MicrobiotaProcess_Group1_vs_Group2.R ./MicrobiotaProcess_Group3_vs_Group4.R ./MicrobiotaProcess_PCA_Group1-4.R ./MicrobiotaProcess_PCA_Group9-14.R
Workflow using QIIME2 for Data_Karoline_16S_2025
Leave a reply