Author Archives: gene_x

Variant calling (inter-host + intra-host) for Data_Pietschmann_229ECoronavirus_Mutations_2025 (via docker own_viral_ngs) v2

  1. Input data:

     ln -s ../raw_data_2024/hCoV229E_Rluc_R1.fastq.gz hCoV229E_Rluc_R1.fastq.gz
     ln -s ../raw_data_2024/hCoV229E_Rluc_R2.fastq.gz hCoV229E_Rluc_R2.fastq.gz
     ln -s ../raw_data_2024/p10_DMSO_R1.fastq.gz p10_DMSO_R1.fastq.gz
     ln -s ../raw_data_2024/p10_DMSO_R2.fastq.gz p10_DMSO_R2.fastq.gz
     ln -s ../raw_data_2024/p10_K22_R1.fastq.gz p10_K22_R1.fastq.gz
     ln -s ../raw_data_2024/p10_K22_R2.fastq.gz p10_K22_R2.fastq.gz
     ln -s ../raw_data_2024/p10_K7523_R1.fastq.gz p10_K7523_R1.fastq.gz
     ln -s ../raw_data_2024/p10_K7523_R2.fastq.gz p10_K7523_R2.fastq.gz
     ln -s ../raw_data_2025/250506_VH00358_136_AAG3YJ5M5/p20606/p16_DMSO_S29_R1_001.fastq.gz p16_DMSO_R1.fastq.gz
     ln -s ../raw_data_2025/250506_VH00358_136_AAG3YJ5M5/p20606/p16_DMSO_S29_R2_001.fastq.gz p16_DMSO_R2.fastq.gz
     ln -s ../raw_data_2025/250506_VH00358_136_AAG3YJ5M5/p20607/p16_K22_S30_R1_001.fastq.gz p16_K22_R1.fastq.gz
     ln -s ../raw_data_2025/250506_VH00358_136_AAG3YJ5M5/p20607/p16_K22_S30_R2_001.fastq.gz p16_K22_R2.fastq.gz
     ln -s ../raw_data_2025/250506_VH00358_136_AAG3YJ5M5/p20608/p16_X7523_S31_R1_001.fastq.gz p16_X7523_R1.fastq.gz
     ln -s ../raw_data_2025/250506_VH00358_136_AAG3YJ5M5/p20608/p16_X7523_S31_R2_001.fastq.gz p16_X7523_R2.fastq.gz
  2. Call variant calling using snippy

     ln -s ~/Tools/bacto/db/ .;
     ln -s ~/Tools/bacto/envs/ .;
     ln -s ~/Tools/bacto/local/ .;
     cp ~/Tools/bacto/Snakefile .;
     cp ~/Tools/bacto/bacto-0.1.json .;
     cp ~/Tools/bacto/cluster.json .;
    
     #download CU459141.gb from GenBank
     mv ~/Downloads/sequence\(2\).gb db/PP810610.gb
    
     #setting the following in bacto-0.1.json
         "fastqc": false,
         "taxonomic_classifier": false,
         "assembly": true,
         "typing_ariba": false,
         "typing_mlst": true,
         "pangenome": true,
         "variants_calling": true,
         "phylogeny_fasttree": true,
         "phylogeny_raxml": true,
         "recombination": false, (due to gubbins-error set false)
         "genus": "Alphacoronavirus",
         "kingdom": "Viruses",
         "species": "Human coronavirus 229E",
         "mykrobe": {
             "species": "corona"
         },
         "reference": "db/PP810610.gb"
    
     mamba activate /home/jhuang/miniconda3/envs/bengal3_ac3
     (bengal3_ac3) /home/jhuang/miniconda3/envs/snakemake_4_3_1/bin/snakemake --printshellcmds
  3. Summarize all SNPs and Indels from the snippy result directory.

     #Output: snippy/summary_snps_indels.csv
     # IMPORTANT_ADAPT the array isolates = ["AYE-S", "AYE-Q", "AYE-WT on Tig4", "AYE-craA on Tig4", "AYE-craA-1 on Cm200", "AYE-craA-2 on Cm200"]
     python3 ~/Scripts/summarize_snippy_res.py snippy
     cd snippy
     #grep -v "None,,,,,,None,None" summary_snps_indels.csv > summary_snps_indels_.csv
  4. Using spandx calling variants (almost the same results to the one from viral-ngs!)

     mamba activate /home/jhuang/miniconda3/envs/spandx
     mkdir ~/miniconda3/envs/spandx/share/snpeff-5.1-2/data/PP810610
     cp PP810610.gb  ~/miniconda3/envs/spandx/share/snpeff-5.1-2/data/PP810610/genes.gbk
     vim ~/miniconda3/envs/spandx/share/snpeff-5.1-2/snpEff.config
     /home/jhuang/miniconda3/envs/spandx/bin/snpEff build PP810610    #-d
     ~/Scripts/genbank2fasta.py PP810610.gb
     mv PP810610.gb_converted.fna PP810610.fasta    #rename "NC_001348.1 xxxxx" to "NC_001348" in the fasta-file
     ln -s /home/jhuang/Tools/spandx/ spandx
     (spandx) nextflow run spandx/main.nf --fastq "trimmed/*_P_{1,2}.fastq" --ref PP810610.fasta --annotation --database PP810610 -resume
    
     # Rerun SNP_matrix.sh due to the error ERROR_CHROMOSOME_NOT_FOUND in the variants annotation
     cd Outputs/Master_vcf
     (spandx) cp -r ../../snippy/hCoV229E_Rluc/reference .
     (spandx) cp ../../spandx/bin/SNP_matrix.sh ./
     #Note that ${variant_genome_path}=NC_001348 in the following command, but it was not used after command replacement.
     #Adapt "snpEff eff -no-downstream -no-intergenic -ud 100 -formatEff -v ${variant_genome_path} out.vcf > out.annotated.vcf" to
     "/home/jhuang/miniconda3/envs/bengal3_ac3/bin/snpEff eff -no-downstream -no-intergenic -ud 100 -formatEff -c reference/snpeff.config -dataDir . ref out.vcf > out.annotated.vcf" in SNP_matrix.sh
     (spandx) bash SNP_matrix.sh PP810610 .
  5. Calling inter-host variants by merging the results from snippy+spandx (Manually!)

     # Inter-host variants(宿主间变异):一种病毒在两个人之间有不同的基因变异,这些变异可能与宿主的免疫反应、疾病表现或病毒传播的方式相关。
     cp All_SNPs_indels_annotated.txt All_SNPs_indels_annotated_backup.txt
     vim All_SNPs_indels_annotated.txt
    
     #in the file ids: grep "$(echo -e '\t')353$(echo -e '\t')" All_SNPs_indels_annotated.txt >> All_SNPs_indels_annotated_.txt
     #Replace \n with " All_SNPs_indels_annotated.txt >> All_SNPs_indels_annotated_.txt\ngrep "
     #Replace grep " --> grep "$(echo -e '\t')
     #Replace " All_ --> $(echo -e '\t')" All_
    
     # Potential intra-host variants: 10871, 19289, 23435.
     CHROM   POS     REF     ALT     TYPE    hCoV229E_Rluc_trimmed   p10_DMSO_trimmed        p10_K22_trimmed p10_K7523_trimmed       p16_DMSO_trimmed        p16_K22_trimmed p16_X7523_trimmed       Effect  Impact  Functional_Class        Codon_change    Protein_and_nucleotide_change   Amino_Acid_Length       Gene_name       Biotype
     PP810610        1464    T       C       SNP     C       C       C       C       C       C       C       missense_variant        MODERATE        MISSENSE        gTt/gCt p.Val416Ala/c.1247T>C   6757    CDS_1   protein_coding
     PP810610        1699    C       T       SNP     T       T       T       T       T       T       T       synonymous_variant      LOW     SILENT  gtC/gtT p.Val494Val/c.1482C>T   6757    CDS_1   protein_coding
     PP810610        6691    C       T       SNP     T       T       T       T       T       T       T       synonymous_variant      LOW     SILENT  tgC/tgT p.Cys2158Cys/c.6474C>T  6757    CDS_1   protein_coding
     PP810610        6919    C       G       SNP     G       G       G       G       G       G       G       synonymous_variant      LOW     SILENT  ggC/ggG p.Gly2234Gly/c.6702C>G  6757    CDS_1   protein_coding
     PP810610        7294    T       A       SNP     A       A       A       A       A       A       A       missense_variant        MODERATE        MISSENSE        agT/agA p.Ser2359Arg/c.7077T>A  6757    CDS_1   protein_coding
     * PP810610       10871   C       T       SNP     C       C/T     T       C/T     C/T     T       C/T     missense_variant        MODERATE        MISSENSE        Ctt/Ttt p.Leu3552Phe/c.10654C>T 6757    CDS_1   protein_coding
     PP810610        14472   T       C       SNP     C       C       C       C       C       C       C       missense_variant        MODERATE        MISSENSE        aTg/aCg p.Met4752Thr/c.14255T>C 6757    CDS_1   protein_coding
     PP810610        15458   T       C       SNP     C       C       C       C       C       C       C       synonymous_variant      LOW     SILENT  Ttg/Ctg p.Leu5081Leu/c.15241T>C 6757    CDS_1   protein_coding
     PP810610        16035   C       A       SNP     A       A       A       A       A       A       A       stop_gained     HIGH    NONSENSE        tCa/tAa p.Ser5273*/c.15818C>A   6757    CDS_1   protein_coding
     PP810610        17430   T       C       SNP     C       C       C       C       C       C       C       missense_variant        MODERATE        MISSENSE        tTa/tCa p.Leu5738Ser/c.17213T>C 6757    CDS_1   protein_coding
     * PP810610       19289   G       T       SNP     G       G       T       G       G       G/T     G       missense_variant        MODERATE        MISSENSE        Gtt/Ttt p.Val6358Phe/c.19072G>T 6757    CDS_1   protein_coding
     PP810610        21183   T       G       SNP     G       G       G       G       G       G       G       missense_variant        MODERATE        MISSENSE        tTt/tGt p.Phe230Cys/c.689T>G    1173    CDS_2   protein_coding
     PP810610        22636   T       G       SNP     G       G       G       G       G       G       G       missense_variant        MODERATE        MISSENSE        aaT/aaG p.Asn714Lys/c.2142T>G   1173    CDS_2   protein_coding
     PP810610        23022   T       C       SNP     C       C       C       C       C       C       C       missense_variant        MODERATE        MISSENSE        tTa/tCa p.Leu843Ser/c.2528T>C   1173    CDS_2   protein_coding
     * PP810610       23435   C       T       SNP     C       C       T       C/T     C       C/T     C/T     missense_variant        MODERATE        MISSENSE        Ctt/Ttt p.Leu981Phe/c.2941C>T   1173    CDS_2   protein_coding
     PP810610        24512   C       T       SNP     T       T       T       T       T       T       T       missense_variant        MODERATE        MISSENSE        Ctc/Ttc p.Leu36Phe/c.106C>T     88      CDS_4   protein_coding
     PP810610        24781   C       T       SNP     T       T       T       T       T       T       T       missense_variant        MODERATE        MISSENSE        aCt/aTt p.Thr36Ile/c.107C>T     77      CDS_5   protein_coding
     PP810610        25163   C       T       SNP     T       T       T       T       T       T       T       missense_variant        MODERATE        MISSENSE        Ctt/Ttt p.Leu82Phe/c.244C>T     225     CDS_6   protein_coding
     PP810610        25264   C       T       SNP     T       T       T       T       T       T       T       synonymous_variant      LOW     SILENT  gtC/gtT p.Val115Val/c.345C>T    225     CDS_6   protein_coding
     PP810610        26838   G       T       SNP     T       T       T       T       T       T       T
  6. Calling intra-host variants using viral-ngs

     # Intra-host variants(宿主内变异):同一个人感染了某种病毒,但在其体内的不同细胞或器官中可能存在多个不同的病毒变异株。
    
     #How to run and debug the viral-ngs docker?
     # ---- DEBUG_2025_1: using docker instead ----
     mkdir viralngs; cd viralngs
     ln -s ~/Tools/viral-ngs_docker/Snakefile Snakefile
     ln -s  ~/Tools/viral-ngs_docker/bin bin
     cp  ~/DATA_D/Data_Pietschmann_229ECoronavirus_Mutations_2024/refsel.acids refsel.acids
     cp  ~/DATA_D/Data_Pietschmann_229ECoronavirus_Mutations_2024/lastal.acids lastal.acids
     cp  ~/DATA_D/Data_Pietschmann_229ECoronavirus_Mutations_2024/config.yaml config.yaml
     cp  ~/DATA_D/Data_Pietschmann_229ECoronavirus_Mutations_2024/samples-runs.txt samples-runs.txt
     cp  ~/DATA_D/Data_Pietschmann_229ECoronavirus_Mutations_2024/samples-depletion.txt samples-depletion.txt
     cp  ~/DATA_D/Data_Pietschmann_229ECoronavirus_Mutations_2024/samples-metagenomics.txt samples-metagenomics.txt
     cp  ~/DATA_D/Data_Pietschmann_229ECoronavirus_Mutations_2024/samples-assembly.txt samples-assembly.txt
     cp  ~/DATA_D/Data_Pietschmann_229ECoronavirus_Mutations_2024/samples-assembly-failures.txt samples-assembly-failures.txt
     # Adapt the sample-*.txt
    
     mkdir viralngs/data
     mkdir viralngs/data/00_raw
    
     mkdir bams
     ref_fa="PP810610.fasta";
     #for sample in hCoV229E_Rluc p10_DMSO p10_K22; do
     for sample in p10_K7523 p16_DMSO p16_K22 p16_X7523; do
         bwa index ${ref_fa}; \
         bwa mem -M -t 16 ${ref_fa} trimmed/${sample}_trimmed_P_1.fastq trimmed/${sample}_trimmed_P_2.fastq | samtools view -bS - > bams/${sample}_genome_alignment.bam; \
     done
    
     conda activate viral-ngs4
     #for sample in hCoV229E_Rluc p10_DMSO p10_K22; do
     #for sample in p10_K7523 p16_DMSO p16_K22 p16_X7523; do
     for sample in p16_K22; do
         picard AddOrReplaceReadGroups I=bams/${sample}_genome_alignment.bam O=~/DATA_D/Data_Pietschmann_229ECoronavirus_Mutations_2025/viralngs/data/00_raw/${sample}.bam SORT_ORDER=coordinate CREATE_INDEX=true RGPL=illumina RGID=$sample RGSM=$sample RGLB=standard RGPU=$sample VALIDATION_STRINGENCY=LENIENT; \
     done
     conda deactivate
    
     # -- ! Firstly set the samples-assembly.txt empty, so that only focus on running depletion!
     docker run -it -v /mnt/md1/DATA_D/Data_Pietschmann_229ECoronavirus_Mutations_2025/viralngs:/work -v /home/jhuang/Tools/viral-ngs_docker:/home/jhuang/Tools/viral-ngs_docker -v /home/jhuang/REFs:/home/jhuang/REFs -v /home/jhuang/Tools/GenomeAnalysisTK-3.6:/home/jhuang/Tools/GenomeAnalysisTK-3.6 -v /home/jhuang/Tools/novocraft_v3:/home/jhuang/Tools/novocraft_v3 -v /usr/local/bin/gatk:/usr/local/bin/gatk   own_viral_ngs bash
     cd /work
     snakemake --directory /work --printshellcmds --cores 40
    
     # -- ! Secondly manully run assembly steps
     # --> By itereative add the unfinished assembly in the list, each time replace one, and run "snakemake --directory /work --printshellcmds --cores 40"
    
         # # ---- NOTE that the following steps need rerun --> DOES NOT WORK, USE STRATEGY ABOVE ----
         # #for sample in p10_K22 p10_K7523; do
         # for sample in hCoV229E_Rluc p10_DMSO p10_K22 p10_K7523  p16_DMSO p16_K22 p16_X7523; do
         #     bin/read_utils.py merge_bams data/01_cleaned/${sample}.cleaned.bam tmp/01_cleaned/${sample}.cleaned.bam --picardOptions SORT_ORDER=queryname
         #     bin/read_utils.py rmdup_mvicuna_bam tmp/01_cleaned/${sample}.cleaned.bam data/01_per_sample/${sample}.cleaned.bam --JVMmemory 30g
         # done
         #
         # #Note that the error generated by nextflow is from the step gapfill_gap2seq!
         # for sample in hCoV229E_Rluc p10_DMSO p10_K22 p10_K7523  p16_DMSO p16_K22 p16_X7523; do
         #     bin/assembly.py assemble_spades data/01_per_sample/${sample}.taxfilt.bam /home/jhuang/REFs/viral_ngs_dbs/trim_clip/contaminants.fasta tmp/02_assembly/${sample}.assembly1-spades.fasta --nReads 10000000 --threads 15 --memLimitGb 12
         # done
         # for sample in hCoV229E_Rluc p10_DMSO p10_K22 p10_K7523  p16_DMSO p16_K22 p16_X7523; do
         # for sample in p10_K22 p10_K7523; do
         #     bin/assembly.py order_and_orient tmp/02_assembly/${sample}.assembly1-spades.fasta refsel_db/refsel.fasta tmp/02_assembly/${sample}.assembly2-scaffolded.fasta --min_pct_contig_aligned 0.05 --outAlternateContigs tmp/02_assembly/${sample}.assembly2-alternate_sequences.fasta --nGenomeSegments 1 --outReference tmp/02_assembly/${sample}.assembly2-scaffold_ref.fasta --threads 15
         # done
         #
         # for sample in hCoV229E_Rluc p10_DMSO p10_K22 p10_K7523  p16_DMSO p16_K22 p16_X7523; do
         #     bin/assembly.py gapfill_gap2seq tmp/02_assembly/${sample}.assembly2-scaffolded.fasta data/01_per_sample/${sample}.cleaned.bam tmp/02_assembly/${sample}.assembly2-gapfilled.fasta --memLimitGb 12 --maskErrors --randomSeed 0 --loglevel DEBUG
         # done
    
     #IMPORTANT: Reun the following commands!
     for sample in hCoV229E_Rluc  p10_DMSO p10_K22 p10_K7523  p16_DMSO p16_K22 p16_X7523; do
         bin/assembly.py impute_from_reference tmp/02_assembly/${sample}.assembly2-gapfilled.fasta tmp/02_assembly/${sample}.assembly2-scaffold_ref.fasta tmp/02_assembly/${sample}.assembly3-modify.fasta --newName ${sample} --replaceLength 55 --minLengthFraction 0.05 --minUnambig 0.05 --index  --loglevel DEBUG
     done
    
         # for sample in hCoV229E_Rluc p10_DMSO p10_K22 p10_K7523  p16_DMSO p16_K22 p16_X7523; do
         #     bin/assembly.py refine_assembly tmp/02_assembly/${sample}.assembly3-modify.fasta data/01_per_sample/${sample}.cleaned.bam tmp/02_assembly/${sample}.assembly4-refined.fasta --outVcf tmp/02_assembly/${sample}.assembly3.vcf.gz --min_coverage 2 --novo_params '-r Random -l 20 -g 40 -x 20 -t 502' --threads 15  --loglevel DEBUG
         #     bin/assembly.py refine_assembly tmp/02_assembly/${sample}.assembly4-refined.fasta data/01_per_sample/${sample}.cleaned.bam data/02_assembly/${sample}.fasta --outVcf tmp/02_assembly/${sample}.assembly4.vcf.gz --min_coverage 3 --novo_params '-r Random -l 20 -g 40 -x 20 -t 100' --threads 15  --loglevel DEBUG
         # done
    
     # -- ! Thirdly set the samples-assembly.txt completely and run "snakemake --directory /work --printshellcmds --cores 40"
    
     # ---------------------------- BUG list of the docker pipeline, mostly are due to the version incompability ----------------------------
     #BUG_1: FileNotFoundError: [Errno 2] No such file or directory: '/home/jhuang/Tools/samtools-1.9/samtools': '/home/jhuang/Tools/samtools-1.9/samtools'
     #DEBUG_1 (DEPRECATED):
             # - In docker install independent samtools
             conda create -n samtools-1.9-env samtools=1.9 -c bioconda -c conda-forge
             # - persistence the modified docker, next time run own docker image
             docker ps
             #CONTAINER ID   IMAGE                              COMMAND   CREATED         STATUS         PORTS     NAMES
             #881a1ad6a990   quay.io/broadinstitute/viral-ngs   "bash"    8 minutes ago   Up 8 minutes             intelligent_yalow
             docker commit 881a1ad6a990 own_viral_ngs
             docker image ls
             docker run -it own_viral_ngs bash
             #Change the path as "/opt/miniconda/envs/samtools-1.9-env/bin/samtools" in /work/bin/tools/samtools.py
             #         If another tool expect for samtools could not be installed, also use the same method above to install it on own_viral_ngs!
     #DEBUG_1_BETTER_SIMPLE: TOOL_VERSION = '1.6' --> '1.9' in ~/Tools/viral-ngs_docker/bin/tools/samtools.py
    
     #BUG_2:
             bin/taxon_filter.py deplete data/00_raw/2040_04.bam tmp/01_cleaned/2040_04.raw.bam tmp/01_cleaned/2040_04.bmtagger_depleted.bam tmp/01_cleaned/2040_04.rmdup.bam data/01_cleaned/2040_04.cleaned.bam --bmtaggerDbs /home/jhuang/REFs/viral_ngs_dbs/bmtagger_dbs_remove/hg19 /home/jhuang/REFs/viral_ngs_dbs/bmtagger_dbs_remove/metagenomics_contaminants_v3 /home/jhuang/REFs/viral_ngs_dbs/bmtagger_dbs_remove/GRCh37.68_ncRNA-GRCh37.68_transcripts-HS_rRNA_mitRNA --blastDbs /home/jhuang/REFs/viral_ngs_dbs/blast_dbs_remove/hybsel_probe_adapters /home/jhuang/REFs/viral_ngs_dbs/blast_dbs_remove/metag_v3.ncRNA.mRNA.mitRNA.consensus --threads 15 --srprismMemory 14250 --JVMmemory 50g --loglevel DEBUG
             #2025-05-23 09:58:45,326 - __init__:445:_attempt_install - DEBUG - Currently installed version of blast: 2.7.1-h4422958_6
             #2025-05-23 09:58:45,327 - __init__:448:_attempt_install - DEBUG - Expected version of blast:            2.6.0
             #2025-05-23 09:58:45,327 - __init__:449:_attempt_install - DEBUG - Incorrect version of blast installed. Removing it...
     #DEBUG_2: TOOL_VERSION = "2.6.0" --> "2.7.1" in ~/Tools/viral-ngs_docker/bin/tools/blast.py
    
     #BUG_3:
             bin/read_utils.py bwamem_idxstats data/01_cleaned/1762_04.cleaned.bam /home/jhuang/REFs/viral_ngs_dbs/spikeins/ercc_spike-ins.fasta --outStats reports/spike_count/1762_04.spike_count.txt --minScoreToFilter 60 --loglevel DEBUG
     #DEBUG_3: TOOL_VERSION = "0.7.15" --> "0.7.17" in ~/Tools/viral-ngs_docker/bin/tools/bwa.py
    
     #BUG_4: FileNotFoundError: [Errno 2] No such file or directory: '/usr/local/bin/trimmomatic': '/usr/local/bin/trimmomatic'
     #DEBUG_4: TOOL_VERSION = "0.36" --> "0.38" in ~/Tools/viral-ngs_docker/bin/tools/trimmomatic.py
    
     #BUG_5: FileNotFoundError: [Errno 2] No such file or directory: '/usr/bin/spades.py': '/usr/bin/spades.py'
     #DEBUG_5:  TOOL_VERSION = "0.36" --> "0.38" in ~/Tools/viral-ngs_docker/bin/tools/trimmomatic.py
     #                def install_and_get_path(self):
     #                        # the conda version wraps the jar file with a shell script
     #                        return 'trimmomatic'
    
     #BUG_6: bin/assembly.py order_and_orient tmp/02_assembly/2039_04.assembly1-spades.fasta refsel_db/refsel.fasta tmp/02_assembly/2039_04.assembly2-scaffolded.fasta --min_pct_contig_aligned 0.05 --outAlternateContigs tmp/02_assembly/2039_04.assembly2-alternate_sequences.fasta --nGenomeSegments 1 --outReference tmp/02_assembly/2039_04.assembly2-scaffold_ref.fasta --threads 15 --loglevel DEBUG
     2025-05-23 17:40:19,526 - __init__:445:_attempt_install - DEBUG - Currently installed version of mummer4: 4.0.0beta2-pl526hf484d3e_4
     2025-05-23 17:40:19,527 - __init__:448:_attempt_install - DEBUG - Expected version of mummer4:            4.0.0rc1
     2025-05-23 17:40:19,527 - __init__:449:_attempt_install - DEBUG - Incorrect version of mummer4 installed. Removing it..
     DEBUG_6:  TOOL_VERSION = "4.0.0rc1" --> "4.0.0beta2" in ~/Tools/viral-ngs_docker/bin/tools/mummer.py
    
     #BUG_7: bin/assembly.py order_and_orient tmp/02_assembly/2039_04.assembly1-spades.fasta refsel_db/refsel.fasta tmp/02_assembly/2039_04.assembly2-scaffolded.fasta --min_pct_contig_aligned 0.05 --outAlternateContigs tmp/02_assembly/2039_04.assembly2-alternate_sequences.fasta --nGenomeSegments 1 --outReference tmp/02_assembly/2039_04.assembly2-scaffold_ref.fasta --threads 15 --loglevel DEBUG
             File "bin/assembly.py", line 549, in 
    base_counts = [sum([len(seg.seq.replace(“N”, “”)) for seg in scaffold]) \ AttributeError: ‘Seq’ object has no attribute ‘replace’ DEBUG_7: base_counts = [sum([len(seg.seq.replace(“N”, “”)) for seg in scaffold]) –> base_counts = [sum([len(seg.seq.ungap(‘N’)) for seg in scaffold]) in ~/Tools/viral-ngs_docker/bin/assembly.py BUG_8: bin/assembly.py refine_assembly tmp/02_assembly/1243_2.assembly3-modify.fasta data/01_per_sample/1243_2.cleaned.bam tmp/02_assembly/1243_2.assembly4-refined.fasta –outVcf tmp/02_assembly/1243_2.assembly3.vcf.gz –min_coverage 2 –novo_params ‘-r Random -l 20 -g 40 -x 20 -t 502’ –threads 15 –loglevel DEBUG File “/work/bin/tools/gatk.py”, line 75, in execute FileNotFoundError: [Errno 2] No such file or directory: ‘/usr/local/bin/gatk’: ‘/usr/local/bin/gatk’ #DEBUG_8: -v /usr/local/bin/gatk:/usr/local/bin/gatk in ‘docker run’ and change default python in the script via a shebang; TOOL_VERSION = “3.8” –> “3.6” in ~/Tools/viral-ngs_docker/bin/tools/gatk.py BUG_9: pyyaml is missing! #DEBUG_9: NO_ERROR if rerun! bin/assembly.py impute_from_reference tmp/02_assembly/2039_04.assembly2-gapfilled.fasta tmp/02_assembly/2039_04.assembly2-scaffold_ref.fasta tmp/02_assembly/2039_04.assembly3-modify.fasta –newName 2039_04 –replaceLength 55 –minLengthFraction 0.05 –minUnambig 0.05 –index –loglevel DEBUG for sample in 2039_04 2040_04; do for sample in 1762_04 1243_2 875_04; do bin/assembly.py impute_from_reference tmp/02_assembly/${sample}.assembly2-gapfilled.fasta tmp/02_assembly/${sample}.assembly2-scaffold_ref.fasta tmp/02_assembly/${sample}.assembly3-modify.fasta –newName ${sample} –replaceLength 55 –minLengthFraction 0.05 –minUnambig 0.05 –index –loglevel DEBUG done #BUG_10: bin/reports.py consolidate_fastqc reports/fastqc/2039_04/align_to_self reports/fastqc/2040_04/align_to_self reports/fastqc/1762_04/align_to_self reports/fastqc/1243_2/align_to_self reports/fastqc/875_04/align_to_self reports/summary.fastqc.align_to_self.txt #DEBUG_10: File “bin/intrahost.py”, line 527 and line 579 in merge_to_vcf # #MODIFIED_BACK samp_to_seqIndex[sampleName] = seq.seq.ungap(‘-‘) #samp_to_seqIndex[sampleName] = seq.seq.replace(“-“, “”) #BUG_11: bin/interhost.py multichr_mafft ref_genome/reference.fasta data/02_assembly/2039_04.fasta data/02_assembly/2040_04.fasta data/02_assembly/1762_04.fasta data/02_assembly/1243_2.fasta data/02_assembly/875_04.fasta data/03_multialign_to_ref –ep 0.123 –maxiters 1000 –preservecase –localpair –outFilePrefix aligned –sampleNameListFile data/03_multialign_to_ref/sampleNameList.txt –threads 15 –loglevel DEBUG 2025-05-26 15:04:19,014 – cmd:195:main_argparse – INFO – command: bin/interhost.py multichr_mafft inFastas=[‘ref_genome/reference.fasta’, ‘data/02_assembly/2039_04.fasta’, ‘data/02_assembly/2040_04.fasta’, ‘data/02_assembly/1762_04.fasta’, ‘data/02_assembly/1243_2.fasta’, ‘data/02_assembly/875_04.fasta’] localpair=True globalpair=None preservecase=True reorder=None gapOpeningPenalty=1.53 ep=0.123 verbose=False outputAsClustal=None maxiters=1000 outDirectory=data/03_multialign_to_ref outFilePrefix=aligned sampleRelationFile=None sampleNameListFile=data/03_multialign_to_ref/sampleNameList.txt threads=15 loglevel=DEBUG tmp_dir=/tmp tmp_dirKeep=False 2025-05-26 15:04:19,014 – cmd:209:main_argparse – DEBUG – using tempDir: /tmp/tmp-interhost-multichr_mafft-nuws9mhp 2025-05-26 15:04:21,085 – __init__:445:_attempt_install – DEBUG – Currently installed version of mafft: 7.402-0 2025-05-26 15:04:21,085 – __init__:448:_attempt_install – DEBUG – Expected version of mafft: 7.221 2025-05-26 15:04:21,085 – __init__:449:_attempt_install – DEBUG – Incorrect version of mafft installed. Removing it… #DEBUG_11: TOOL_VERSION = “7.221” –> “7.402” in ~/Tools/viral-ngs_docker/bin/tools/mafft.py #BUG_12: bin/interhost.py snpEff data/04_intrahost/isnvs.vcf.gz PP810610.1 data/04_intrahost/isnvs.annot.vcf.gz j.huang@uke.de –loglevel DEBUG 2025-06-10 13:14:07,526 – __init__:445:_attempt_install – DEBUG – Currently installed version of snpeff: 4.3.1t-3 2025-06-10 13:14:07,527 – __init__:448:_attempt_install – DEBUG – Expected version of snpeff: 4.1l #DEBUG_12: -v /usr/local/bin/gatk:/usr/local/bin/gatk in ‘docker run’ and change default python in the script via a shebang; TOOL_VERSION = “4.1l” –> “4.3.1t” in ~/Tools/viral-ngs_docker/bin/tools/snpeff.py
  7. Comparing intra- and inter-host variants, comparing the variants to the alignments of the assemblies to confirm its correctness.

     From the step 5, only 5 inter-host variants were confirmed: they are 10871, 19289, 23435.
    
     PP810610    10871   hCoV229E_Rluc   hCoV229E_Rluc       C,T 0.0057070386810399495   0.011348936781066188    1.0 missense_variant    10654C>T    Leu3552Phe  3552    6758    Gene_217_20492  XBA84229.1
     PP810610    10871   p10_DMSO    p10_DMSO        C,T 0.0577716643741403  0.10886819833916395 1.0 missense_variant    10654C>T    Leu3552Phe  3552    6758    Gene_217_20492  XBA84229.1
     PP810610    10871   p10_K22 p10_K22     C,T 1.0 0.0 1.0 missense_variant    10654C>T    Leu3552Phe  3552    6758    Gene_217_20492  XBA84229.1
     PP810610    10871   p10_K7523   p10_K7523       C,T 0.8228321896444167  0.2915587546587828  1.0 missense_variant    10654C>T    Leu3552Phe  3552    6758    Gene_217_20492  XBA84229.1
     PP810610    10871   p16_DMSO    p16_DMSO        C,T 0.02927088877062267 0.05682820768240093 1.0 missense_variant    10654C>T    Leu3552Phe  3552    6758    Gene_217_20492  XBA84229.1
     PP810610    10871   p16_K22 p16_K22     C,T 0.9911209766925638  0.017600372505084394    1.0 missense_variant    10654C>T    Leu3552Phe  3552    6758    Gene_217_20492  XBA84229.1
     PP810610    10871   p16_X7523   p16_X7523       C,T 0.8776699029126214  0.21473088886794223 1.0 missense_variant    10654C>T    Leu3552Phe  3552    6758    Gene_217_20492  XBA84229.1
    
     PP810610    19289   hCoV229E_Rluc   hCoV229E_Rluc       G,T 0.0 0.0 1.0 missense_variant    19073G>T    Gly6358Val  6358    6758    Gene_217_20492  XBA84229.1
     PP810610    19289   p10_DMSO    p10_DMSO        G,T 0.0 0.0 1.0 missense_variant    19073G>T    Gly6358Val  6358    6758    Gene_217_20492  XBA84229.1
     PP810610    19289   p10_K22 p10_K22     G,T 1.0 0.0 1.0 missense_variant    19073G>T    Gly6358Val  6358    6758    Gene_217_20492  XBA84229.1
     PP810610    19289   p10_K7523   p10_K7523       G,T 0.0 0.0 1.0 missense_variant    19073G>T    Gly6358Val  6358    6758    Gene_217_20492  XBA84229.1
     PP810610    19289   p16_DMSO    p16_DMSO        G,T 0.0 0.0 1.0 missense_variant    19073G>T    Gly6358Val  6358    6758    Gene_217_20492  XBA84229.1
     PP810610    19289   p16_K22 p16_K22     G,T 0.9884823848238482  0.02276991943361173 1.0 missense_variant    19073G>T    Gly6358Val  6358    6758    Gene_217_20492  XBA84229.1
     PP810610    19289   p16_X7523   p16_X7523       G,T 0.0 0.0 1.0 missense_variant    19073G>T    Gly6358Val  6358    6758    Gene_217_20492  XBA84229.1
    
     PP810610    23435   hCoV229E_Rluc   hCoV229E_Rluc       C,T 0.0 0.0 1.0 missense_variant    2941C>T Leu981Phe   981 1173    Gene_20494_24015    XBA84230.1
     PP810610    23435   p10_DMSO    p10_DMSO        C,T 0.031912415560214305    0.061788026586653055    1.0 missense_variant    2941C>T Leu981Phe   981 1173    Gene_20494_24015    XBA84230.1
     PP810610    23435   p10_K22 p10_K22     C,T 1.0 0.0 1.0 missense_variant    2941C>T Leu981Phe   981 1173    Gene_20494_24015    XBA84230.1
     PP810610    23435   p10_K7523   p10_K7523       C,T 0.8352090032154341  0.27526984832663026 1.0 missense_variant    2941C>T Leu981Phe   981 1173    Gene_20494_24015    XBA84230.1
     PP810610    23435   p16_DMSO    p16_DMSO        C,T 0.0 0.0 1.0 missense_variant    2941C>T Leu981Phe   981 1173    Gene_20494_24015    XBA84230.1
     PP810610    23435   p16_K22 p16_K22     C,T 0.958498023715415   0.07955912449811753 1.0 missense_variant    2941C>T Leu981Phe   981 1173    Gene_20494_24015    XBA84230.1
     PP810610    23435   p16_X7523   p16_X7523       C,T 0.13175164058556285 0.22878629157715102 1.0 missense_variant    2941C>T Leu981Phe   981 1173    Gene_20494_24015    XBA84230.1
  8. Generate variant_annot.xls and coverages.xls

     sudo chown -R jhuang:jhuang data
     # -- generate isnvs_annot_complete__.txt, isnvs_annot_0.05.txt from ~/DATA/Data_Pietschmann_RSV_Probe3/data/04_intrahost
     cp isnvs.annot.txt isnvs.annot_complete.txt
     ~/Tools/csv2xls-0.4/csv_to_xls.py isnvs.annot_complete.txt -d$'\t' -o isnvs.annot_complete.xls
     #delete the columns patient, time, Hw and Hs and the header in the xls and save as txt file.
    
     awk '{printf "%.3f\n", $5}' isnvs.annot_complete.csv > f5
     cut -f1-4 isnvs.annot_complete.csv > f1_4
     cut -f6- isnvs.annot_complete.csv > f6_
     paste f1_4 f5 > f1_5
     paste f1_5 f6_ > isnvs_annot_complete_.txt
     #correct f5 in header of isnvs_annot_complete_.txt to iSNV_freq
     #header: chr    pos sample  alleles iSNV_freq   eff_type    eff_codon_dna   eff_aa  eff_aa_pos  eff_prot_len    eff_gene    eff_protein
     ~/Tools/csv2xls-0.4/csv_to_xls.py isnvs_annot_complete_.txt -d$'\t' -o variant_annot.xls
    
     #MANUALLY generate variant_annot_0.01.csv variant_annot_0.05.csv
     awk ' $5 >= 0.05 ' isnvs_annot_complete_.txt > 0.05.csv
     cut -f2 0.05.csv
    
     awk ' $5 >= 0.01 ' isnvs_annot_complete_.txt > 0.01.csv
     cut -f2 0.05.csv | uniq > ids_0.05
     cut -f2 0.01.csv | uniq > ids_0.01
    
     #Replace '\n' with '\\t" isnvs_annot_complete_.txt >> isnvs_annot_0.05.txt\ngrep -P "PP810610\\t' in ids_0.05 and then deleting the 'pos' line
     #Replace '\n' with '\\t" isnvs_annot_complete_.txt >> isnvs_annot_0.01.txt\ngrep -P "PP810610\\t'  in ids_0.01 and then deleting the 'pos' line
     #Run ids_0.05 and ids_0.01
    
     cp ../../Outputs/Master_vcf/All_SNPs_indels_annotated.txt ../../Outputs/Master_vcf/All_SNPs_indels_annotated.txt hCoV229E_Rluc_variants
     # Delete the three records which already reported in intra-host results hCoV229E_Rluc_variants: they are 10871, 19289, 23435.
     PP810610       10871   C       T       SNP     C       C/T     T       C/T     C/T     T       C/T     missense_variant        MODERATE        MISSENSE        Ctt/Ttt p.Leu3552Phe/c.10654C>T 6757    CDS_1   protein_coding
     PP810610       19289   G       T       SNP     G       G       T       G       G       G/T     G       missense_variant        MODERATE        MISSENSE        Gtt/Ttt p.Val6358Phe/c.19072G>T 6757    CDS_1   protein_coding
     PP810610       23435   C       T       SNP     C       C       T       C/T     C       C/T     C/T     missense_variant        MODERATE        MISSENSE        Ctt/Ttt p.Leu981Phe/c.2941C>T   1173    CDS_2   protein_coding
    
     ~/Tools/csv2xls-0.4/csv_to_xls.py isnvs_annot_0.05.txt isnvs_annot_0.01.txt hCoV229E_Rluc_variants -d$'\t' -o variant_annot.xls
     #Modify sheetname to variant_annot_0.05 and variant_annot_0.01 and add the header in Excel file.
     #Note in the complete list, Set 2024 is NOT a subset of Set 2025 because the element 26283 is in set 2024 but missing from set 2025.
    
     # -- calculate the coverage
     samtools depth ./data/02_align_to_self/hCoV229E_Rluc.mapped.bam > hCoV229E_Rluc_cov.txt
     samtools depth ./data/02_align_to_self/p10_DMSO.mapped.bam > p10_DMSO_cov.txt
     samtools depth ./data/02_align_to_self/p10_K22.mapped.bam > p10_K22_cov.txt
     samtools depth ./data/02_align_to_self/p10_K7523.mapped.bam > p10_K7523_cov.txt
     ~/Tools/csv2xls-0.4/csv_to_xls.py hCoV229E_Rluc_cov.txt p10_DMSO_cov.txt p10_K22_cov.txt p10_K7523_cov.txt -d$'\t' -o coverages.xls
     #draw coverage and see if they are continuous?
     samtools depth ./data/02_align_to_self/p16_DMSO.mapped.bam > p16_DMSO_cov.txt
     samtools depth ./data/02_align_to_self/p16_K22.mapped.bam > p16_K22_cov.txt
     samtools depth ./data/02_align_to_self/p16_X7523.mapped.bam > p16_K7523_cov.txt
     ~/Tools/csv2xls-0.4/csv_to_xls.py p16_DMSO_cov.txt p16_K22_cov.txt p16_K7523_cov.txt -d$'\t' -o coverages_p16.xls
    
             # Load required packages
             library(ggplot2)
             library(dplyr)
    
             # Read the coverage data
             cov_data <- read.table("p16_K7523_cov.txt", header = FALSE, sep = "\t",
                             col.names = c("Chromosome", "Position", "Coverage"))
    
             # Create full position range for the given chromosome
             full_range <- data.frame(Position = seq(min(cov_data$Position), max(cov_data$Position)))
    
             # Merge with actual coverage data and fill missing positions with 0
             cov_full <- full_range %>%
             left_join(cov_data[, c("Position", "Coverage")], by = "Position") %>%
             mutate(Coverage = ifelse(is.na(Coverage), 0, Coverage))
    
             # Save the plot to PNG
             png("p16_K7523_coverage_filled.png", width = 1200, height = 600)
    
             ggplot(cov_full, aes(x = Position, y = Coverage)) +
             geom_line(color = "steelblue", size = 0.3) +
             labs(title = "Coverage Plot for p16_K7523 (Missing = 0)",
             x = "Genomic Position",
             y = "Coverage Depth") +
             theme_minimal() +
             theme(
             plot.title = element_text(hjust = 0.5),
             axis.text = element_text(size = 10),
             axis.title = element_text(size = 12)
             )
    
             dev.off()
  9. (Optional) Consensus sequences of each and of all isolates

     cat PP810610.1.fa OZ035258.1.fa MZ712010.1.fa OK662398.1.fa OK625404.1.fa KF293664.1.fa NC_002645.1.fa > all.fa
     cp data/02_assembly/*.fasta ./
     for sample in hCoV229E_Rluc p10_DMSO p10_K22 p10_K7523; do \
     mv ${sample}.fasta ${sample}.fa
     cat all.fa ${sample}.fa >> all.fa
     done
    
     cat RSV_dedup.fa all.fa > RSV_all.fa
     mafft --clustalout --adjustdirection RSV_all.fa > RSV_all.aln
     snp-sites RSV_all.aln -o RSV_all_.aln
  10. Report

     Please find attached the variant analysis results for Thomas. Variant frequencies in the new samples are highlighted in yellow.
    
     Although PP810610 is used as the reference, only differences observed in the samples p10_DMSO, p10_K22, p10_K7523, p16_DMSO, p16_K22, and p16_X7523 compared to hCoV229E_Rluc are reported in the sheets variant_annot_0.05 and variant_annot_0.01 (see variant_annot.xls). Variants already present in hCoV229E_Rluc are excluded from these sheets. In total, 17 mutations were found in hCoV229E_Rluc relative to PP810610, detailed in the sheet “hCoV229E_Rluc_variants” (see variant_annot.xls).
    
     ------ Explanation of iSNV_freq in the sheets variant_annot_0.05 and variant_annot_0.01 ------
    
     The iSNV_freq column shows the frequency of the second allele at each position. For example, at position 23435 on chr PP810610:
    
     chr               Position    Sample            Alleles    iSNV_freq
     PP810610    23435    hCoV229E_Rluc    C,T        0
     PP810610    23435    p10_DMSO           C,T       0.032
     PP810610    23435    p10_K22              C,T       0.995
     PP810610    23435    p10_K7523          C,T       0.835
     PP810610    23435    p16_DMSO          C,T        0
     PP810610    23435    p16_K22              C,T       0.958
     PP810610    23435    p16_X7523          C,T       0.132
    
     The second allele (T) frequencies are:
     0 (only C)
     0.032 (3.2% T)
     0.995 (99.5% T)
     0.835 (83.5% T)
     0 (only C)
     0.958 (95.8% T)
     0.132 (13.2% T)
    
     # --
    
     Regarding the mutation at position 19289 — you're absolutely right, and I had also noticed the discrepancy.
    
     In the 2024 analysis, I performed intra-host variant calling, which detects only those variants with frequencies strictly between 0% and 100% within a single sample. Since position 19289 showed 100% G in p10_DMSO, 100% T in p10_K22, and 100% G in p10_K7523, it was not identified as an intra-host variant at that time. Rather, it's a clear example of an inter-host variant — a fixed difference between samples.
    
     In the 2025 analysis, I again used intra-host variant calling. This time, the mutation at position 19289 in p16_K22 was detected at 98.8% T, which falls within the threshold and therefore appears in the intra-host variant table.
    
     After noticing this, I also ran a dedicated inter-host variant calling analysis, which specifically highlights differences between samples rather than within them. The results can be found in the third table ("hCoV229E_Rluc_variants") of the variant_annot.xls file I sent you previously. As you’ll see, all 17 positions are identical across the 7 samples, indicating that no additional inter-host variants were detected beyond what we had already observed.
    
     Lastly, please find the coverage data in the attached files.
    
     # --
    
     Just following up on the mutation at position 19289. By tweaking some settings in the inter-host variant calling, we can also detect variants at positions like 19289. However, in these results, a “/” indicates intra-host variants that require further validation through intra-host variant calling. The intra-host variant calling uses a more precise mapping strategy, enabling a more accurate estimation of allele frequencies.
    
     Here’s an example from the inter-host variant table showing the mutation at 19289 with the adjusted settings:
    
             CHROM       POS      REF    ALT    TYPE    hCoV229E_Rluc    p10_DMSO    p10_K22    p10_K7523    p16_DMSO    p16_K22    p16_X7523
             PP810610    19289    G      T      SNP          G               G           T           G          G           G/T          G

Workflow using QIIME2 for Data_Karoline_16S_2025

  1. Install and test qiime2-docker

     #Cannot run under QIIME1, switch to QIIME2: pick_open_reference_otus.py -r/home/jhuang/REFs/SILVA_132_QIIME_release/rep_set/rep_set_16S_only/99/silva_132_99_16S.fna -i test.fna -o clustering_test/ -p clustering_params.txt --parallel --verbose
    
     docker pull quay.io/qiime2/core:2023.9
    
     docker run -it --rm \
     -v /mnt/md1/DATA/Data_Marius_16S_2025:/data \
     -v /home/jhuang/REFs:/home/jhuang/REFs \
     quay.io/qiime2/core:2023.9 bash
     cd /data
  2. Import the fastq-files to paired-end-demux.qza

     #https://docs.qiime2.org/2018.8/tutorials/importing/
    
     qiime tools import --type 'SampleData[PairedEndSequencesWithQuality]' --input-path pe-33-manifest --output-path paired-end-demux.qza --input-format PairedEndFastqManifestPhred33
     #--> 1095204304 Mai 27 15:11 paired-end-demux.qza
    
     qiime demux summarize \
     --i-data paired-end-demux.qza \
     --o-visualization demux_pe.qzv
    
     #https://view.qiime2.org
     #qiime tools view demux_pe.qzv
  3. Optimizing the parameters trunc-len-f and trunc-len-r and denoising with DADA2: optimized parameters is f240_r240

     #Your amplicon (V3–V4 region) is ~464 bp, so you need ≥20–30 bp overlap
     #464-38=426; 440 is the longst +12 nt for overlapping=we need 452 nt!
    
     #Optimize the parameters --p-trunc-len-f and --p-trunc-len-r
     ./dada2_batch_test.sh
    
             #!/bin/bash
    
             # Set your base inputs
             INPUT=paired-end-demux.qza
             TRIM_LEFT_F=17
             TRIM_LEFT_R=21
    
             # Output base
             OUTPUT_DIR=dada2_tests
             mkdir -p $OUTPUT_DIR
    
             # Loop over trunc-len-f and trunc-len-r combinations
             # Forward: from 220 to 240
             # Reverse: from 210 to 230
             i=1
             for f in 240 235 230 225; do
             for r in 225 220 215; do
                 OUT=test_${i}_f${f}_r${r}
                 echo "Running: $OUT"
                 mkdir -p $OUTPUT_DIR/$OUT
    
                 qiime dada2 denoise-paired \
                 --i-demultiplexed-seqs $INPUT \
                 --p-trim-left-f $TRIM_LEFT_F \
                 --p-trim-left-r $TRIM_LEFT_R \
                 --p-max-ee-f 3 --p-max-ee-r 5 \
                 --p-trunc-len-f $f \
                 --p-trunc-len-r $r \
                 --p-n-threads 32 \
                 --o-table $OUTPUT_DIR/$OUT/table.qza \
                 --o-representative-sequences $OUTPUT_DIR/$OUT/rep-seqs.qza \
                 --o-denoising-stats $OUTPUT_DIR/$OUT/denoising-stats.qza \
                 --verbose > $OUTPUT_DIR/$OUT/log.txt 2>&1
    
                 ((i++))
             done
             done
    
     for f in dada2_tests2/test_*/denoising-stats.qza; do
     qiime metadata tabulate \
         --m-input-file $f \
         --o-visualization ${f%.qza}.qzv
     done
    
     #pandaseq.out: grep ">" A1_R1.fastq.gz_merged.fasta | wc -l #8229;  grep ">" A10_R1.fastq.gz_merged.fasta | wc -l #9165
    
     # The best choice is f251_r251, since the first 17 and 21 bases with bad quality are anyway removed!
     #f251_r251: sample-A1   18299   10989   60.05   10609   7129    38.96   6535    35.71;  sample-A10  18736   12249   65.38   11778   7444    39.73   6978    37.24
     #f251_r250: sample-A1   18299   11855   64.78   11435   7431    40.61   6823    37.29;  sample-A10  18736   13092   69.88   12590   7714    41.17   7206    38.46
     #f251_r245: sample-A1   18299   12642   69.09   12180   7860    42.95   7193    39.31;  sample-A10  18736   13981   74.62   13457   8218    43.86   7649    40.83
     #f251_r240: sample-A1   18299   12678   69.28   12214   8060    44.05   7387    40.37;  sample-A10  18736   14018   74.82   13498   8412    44.9    7758    41.41
    
     #f250_r240: sample-A1   18299   13705   74.89   13191   8796    48.07   7984    43.63
     #f250_r235: sample-A1   18299   13716   74.95   13198   8793    48.05   7969    43.55
     #f250_r230: sample-A1   18299   13739   75.08   13217   9023    49.31   8113    44.34;  sample-A10  18736   14838   79.2    14159   8895    47.48   8101    43.24
     #f245_r240: sample-A1   18299   14513   79.31   14019   9472    51.76   8739    47.76;  sample-A10  18736   15609   83.31   15102   9605    51.26   8880    47.4
     #f245_r235: sample-A1   18299   14524   79.37   14026   9485    51.83   8746    47.79;  sample-A10  18736   15634   83.44   15127   9685    51.69   8869    47.34
     #f245_r230: sample-A1   18299   14547   79.5    14045   8845    48.34   8058    44.04;  sample-A10  18736   15664   83.6    15156   8812    47.03   7999    42.69
     #f240_r240: sample-A1   18299   14647   80.04   14164   9869    53.93   8932    48.81;  sample-A10  18736   15728   83.95   15213   10488   55.98   9081    48.47 *
     #f240_r235: sample-A1   18299   14661   80.12   14172   9125    49.87   8194    44.78;  sample-A10  18736   15755   84.09   15242   9579    51.13   8105    43.26
     #f240_r230: sample-A1   18299   14686   80.26   14191   4952    27.06   4666    25.5;   sample-A10  18736   15785   84.25   15267   3489    18.62   3341    17.83
    
     #f240_r225: sample-A1   18299   14701   80.34   14206   4575    25  4297    23.48
     #f240_r220: sample-A1   18299   14720   80.44   14223   4588    25.07   4310    23.55
     #f240_r225: sample-A1   18299   14747   80.59   14250   3976    21.73   3758    20.54
     #f230_r220: sample-A1   18299   14972   81.82   14514   3   0.02    3   0.02
    
     #The output of the optimized denoising: (*) dada2_tests2/test_7_f240_r240/table.qza and dada2_tests2/test_7_f240_r240/rep-seqs.qza.
  4. Visualize outputs

     qiime feature-table summarize \
     --i-table dada2_tests2/test_7_f240_r240/table.qza \
     --o-visualization table.qzv \
     --m-sample-metadata-file qiime2_metadata.tsv
    
     #Table summary
     #Metric Sample
     #Number of samples  137
     #Number of features 3,039
     #Total frequency    1,641,484
     #
     #Frequency per sample
     #Minimum frequency  413.0
     #1st quartile   10,319.0
     #Median frequency   11,530.0
     #3rd quartile   13,146.0
     #Maximum frequency  40,022.0
     #Mean frequency 11,981.635036496351
     #
     #Frequency per feature
     #Minimum frequency  1.0
     #1st quartile   3.0
     #Median frequency   8.0
     #3rd quartile   95.5
     #Maximum frequency  56,472.0
     #Mean frequency 540.1395195788089
    
     #qiime tools peek table.qza
     #qiime tools peek qiime2_metadata.tsv
    
     qiime feature-table tabulate-seqs \
     --i-data dada2_tests2/test_7_f240_r240/rep-seqs.qza \
     --o-visualization rep-seqs.qzv
    
     qiime metadata tabulate \
     --m-input-file dada2_tests2/test_7_f240_r240/denoising-stats.qza \
     --o-visualization denoising-stats.qzv
  5. Import reference sequences and taxonomy (SILVA 132)

     qiime tools import \
     --type 'FeatureData[Sequence]' \
     --input-path /home/jhuang/REFs/SILVA_132_QIIME_release/rep_set/rep_set_16S_only/99/silva_132_99_16S.fna \
     --output-path silva_132_99_otus.qza \
     --input-format DNAFASTAFormat
    
     qiime tools import \
     --type 'FeatureData[Taxonomy]' \
     --input-format HeaderlessTSVTaxonomyFormat \
     --input-path /home/jhuang/REFs/SILVA_132_QIIME_release/taxonomy/16S_only/99/consensus_taxonomy_7_levels.txt \
     --output-path silva_132_99_taxonomy.qza
  6. Assign taxonomy

     qiime feature-classifier classify-consensus-vsearch \
     --i-query dada2_tests2/test_7_f240_r240/rep-seqs.qza \
     --i-reference-reads silva_132_99_otus.qza \
     --i-reference-taxonomy silva_132_99_taxonomy.qza \
     --p-perc-identity 0.97 \
     --p-threads 64 \
     --o-classification taxonomy.qza \
     --o-search-results search-results.qza
  7. Visualize taxonomy

     qiime taxa barplot \
     --i-table dada2_tests2/test_7_f240_r240/table.qza \
     --i-taxonomy taxonomy.qza \
     --m-metadata-file qiime2_metadata.tsv \
     --o-visualization taxa-bar-plots.qzv
  8. Build phylogenetic tree

     qiime alignment mafft \
     --i-sequences dada2_tests2/test_7_f240_r240/rep-seqs.qza \
     --o-alignment aligned-rep-seqs.qza
    
     qiime alignment mask \
     --i-alignment aligned-rep-seqs.qza \
     --o-masked-alignment masked-aligned-rep-seqs.qza
    
     qiime phylogeny fasttree \
     --i-alignment masked-aligned-rep-seqs.qza \
     --o-tree unrooted-tree.qza
    
     (*) qiime phylogeny midpoint-root \
     --i-tree unrooted-tree.qza \
     --o-rooted-tree rooted-tree.qza
  9. Core diversity analysis

     #The -e 6389 flag sets the even sampling depth (rarefaction depth) to 6,389 reads for diversity analyses.
     #All samples will be rarefied to 4,753 reads.
     #Samples with fewer reads are excluded.
     qiime diversity core-metrics-phylogenetic \
     --i-phylogeny rooted-tree.qza \
     --i-table dada2_tests2/test_7_f240_r240/table.qza \
     --p-sampling-depth 6389 \
     --m-metadata-file qiime2_metadata.tsv \
     --output-dir core_metrics_results
    
     qiime diversity alpha \
     --i-table table.qza \
     --p-metric chao1 \
     --o-alpha-diversity core_metrics_results/chao1_vector.qza
    
     qiime tools export --input-path core_metrics_results/shannon_vector.qza --output-path exported_alpha/shannon
     qiime tools export --input-path core_metrics_results/faith_pd_vector.qza --output-path exported_alpha/faith_pd
     qiime tools export --input-path core_metrics_results/observed_features_vector.qza --output-path exported_alpha/observed_features
     qiime tools export --input-path core_metrics_results/chao1_vector.qza --output-path exported_alpha/chao1
    
     qiime tools export \
     --input-path core_metrics_results/unweighted_unifrac_distance_matrix.qza \
     --output-path exported_unweighted_unifrac
     qiime tools export \
     --input-path core_metrics_results/weighted_unifrac_distance_matrix.qza \
     --output-path exported_weighted_unifrac
    
     qiime diversity beta-group-significance \
     --i-distance-matrix core_metrics_results/weighted_unifrac_distance_matrix.qza \
     --m-metadata-file qiime2_metadata.tsv \
     --m-metadata-column Group \
     --p-pairwise \
     --p-method permanova \
     --o-visualization beta_group_significance.qzv
    
     qiime tools export \
     --input-path beta_group_significance.qzv \
     --output-path exported_beta_group
    
     #✅ Group 1 / Group 2 — The pairwise comparisons.
     #✅ Sample size — Number of samples used in the test.
     #✅ Permutations — Number of permutations in the PERMANOVA test.
     #✅ pseudo-F — The test statistic from PERMANOVA.
     #✅ p-value — The unadjusted p-value.
     #✅ q-value — The adjusted p-value (Bonferroni in QIIME2).
     #The q-value column (also sometimes called p-adj) is the multiple-testing corrected p-value.
     #👉 q < 0.05 means the difference is statistically significant between those two groups, even after correction.
     #“There is a significant difference in community composition between Group 1 and Group 2 (p=0.002).”
    
     #The --p-sampling-depth 6389 parameter is directly equivalent to QIIME 1’s -e 6389!
     #The QIIME 2 command will compute:
     #✅ Alpha diversity metrics (Observed OTUs, Shannon, Faith PD, Evenness)
     #✅ Beta diversity distance matrices (UniFrac, Bray-Curtis)
     #✅ PCoA plots
     #✅ Excludes samples with fewer than 6,389 reads.
    
     #📦 Output Folders and Files
     #Output Description
     #table.qzv  Visual of feature table (sample counts)
     #rep-seqs.qzv   Sequences per ASV
     #denoising-stats.qzv    DADA2 read tracking
     #taxonomy.qza/.qzv  Taxonomic classification of ASVs
     #taxa-bar-plots.qzv Interactive bar plots
     #core_metrics_results/  Alpha/beta diversity metrics + PCoA plots
  10. Prepare three files feeding to Phyloseq.Rmd: table.qza (see above with ), rooted-tree.qza (see above with ), qiime2_metadata_for_qza_to_phyloseq.tsv edited from qiime2_metadata.tsv.

     # Rarefying can be performed here, or in Phyloseq.Rmd (default), therefore, we don't need this step any more.
     qiime feature-table summarize \
     --i-table core_metrics_results/rarefied_table.qza \
     --o-visualization rarefied_table.qzv \
     --m-sample-metadata-file qiime2_metadata.tsv
    
     #Table summary
     #Metric Sample
     #Number of samples  136
     #Number of features 2,781
     #Total frequency    868,904
    
     # In QIIME2, we need table.qza, not biom-file, therefore, we don't need this step any more.
     qiime tools export \
     --input-path core_metrics_results/rarefied_table.qza \
     --output-path exported_rarefied_table
     #-->
     exported_rarefied_table/feature-table.biom
    
     biom convert \
     -i exported_rarefied_table/feature-table.biom \
     -o exported_rarefied_table/feature-table.tsv \
     --to-tsv
    
     #✅ Old QIIME 1 table with GenBank IDs (like EF603722.1.1487) as feature labels.
     #✅ QIIME 2 table where feature IDs are hashes (like 0b438323a296b5f2ce2c8bbe3949ee8d).
    
     # Visulaize the taxonomy.qza
     qiime tools export \
     --input-path taxonomy.qza \
     --output-path exported-taxonomy
    
     #Feature ID    Taxon                               Confidence
     #0b4383...     k__Bacteria; p__Proteobacteria...   0.98
     #dfa833...     k__Bacteria; p__Firmicutes...       0.87
     #...
    
     # ---- I used the following to generate two file for feeding in the Phyloseq.Rmd ----
    
     #-1- exported_table/feature-table.biom corresesponds to table_even6389.biom in QIIME1, but in QIIME2, we don't need biom-file, instead of table.qza.
    
     qiime tools export \
     --input-path dada2_tests2/test_7_f240_r240/table.qza \
     --output-path exported_table
     #--> exported_table/feature-table.biom
    
     #-2- exported-tree/tree.nwk corresesponds to rep_set.tre in QIIME1
    
     qiime tools export \
     --input-path rooted-tree.qza \
     --output-path exported-tree
     #--> exported-tree/tree.nwk
    
     # ---- The code in Phyloseq.Rmd ----
    
     #install.packages("remotes")
     #remotes::install_github("jbisanz/qiime2R")
     #"core_metrics_results/rarefied_table.qza", rarefying performed in the code, therefore import the raw table.
     library(qiime2R)
     ps.ng.tax <- qza_to_phyloseq(
         features =  "dada2_tests2/test_7_f240_r240/table.qza",
         tree = "rooted-tree.qza",
         metadata = "qiime2_metadata_for_qza_to_phyloseq.tsv"
     )
     # or
     #biom convert \
     #      -i ./exported_table/feature-table.biom \
     #      -o ./exported_table/feature-table-v1.biom \
     #      --to-json
     #ps.ng.tax <- import_biom("./exported_table/feature-table-v1.biom", treefilename="./exported-tree/tree.nwk")
    
     #Note that the alpha- and beta-diversity-files needed in Phyloseq.Rmd has been prepared in the step 9.
  11. Figures generated by Phyloseq.Rmd and MicrobiotaProcess_*.R

    The following files can be found under server.

     ./Phyloseq.Rmd (Result Phyloseq.html)
     ./MicrobiotaProcess_cluster1_Group9-11_vs_cluster2_Group12-14_orig.R
     ./MicrobiotaProcess_Group1_vs_Group2.R
     ./MicrobiotaProcess_Group3_vs_Group4.R
     ./MicrobiotaProcess_PCA_Group1-4.R
     ./MicrobiotaProcess_PCA_Group9-14.R

Workflow using PICRUSt2 for Data_Karoline_16S_2025

  1. Environment Setup: It sets up a Conda environment named picrust2, using the conda create command and then activates this environment using conda activate picrust2.

     #https://github.com/picrust/picrust2/wiki/PICRUSt2-Tutorial-(v2.2.0-beta)#minimum-requirements-to-run-full-tutorial
     mamba create -n picrust2 -c bioconda -c conda-forge picrust2    #2.5.3  #=2.2.0_b
     mamba activate /home/jhuang/miniconda3/envs/picrust2

Under env (qiime2-amplicon-2023.9)

  1. Export QIIME2 feature table and representative sequences

     #docker pull quay.io/qiime2/core:2023.9
     #docker run -it --rm \
     #-v /mnt/md1/DATA/Data_Karoline_16S_2025:/data \
     #-v /home/jhuang/REFs:/home/jhuang/REFs \
     #quay.io/qiime2/core:2023.9 bash
     #cd /data
     # === SETTINGS ===
     FEATURE_TABLE_QZA="dada2_tests2/test_7_f240_r240/table.qza"
     REP_SEQS_QZA="dada2_tests2/test_7_f240_r240/rep-seqs.qza"
    
     # === STEP 1: EXPORT QIIME2 ARTIFACTS ===
     mkdir -p qiime2_export
     qiime tools export --input-path $FEATURE_TABLE_QZA --output-path qiime2_export
     qiime tools export --input-path $REP_SEQS_QZA --output-path qiime2_export
  2. Convert BIOM to TSV for Picrust2 input

     biom convert \
     -i qiime2_export/feature-table.biom \
     -o qiime2_export/feature-table.tsv \
     --to-tsv

Under env (picrust2): mamba activate /home/jhuang/miniconda3/envs/picrust2

  1. Run PICRUSt2 pipeline

     tail -n +2 qiime2_export/feature-table.tsv > qiime2_export/feature-table-fixed.tsv
     picrust2_pipeline.py \
     -s qiime2_export/dna-sequences.fasta \
     -i qiime2_export/feature-table-fixed.tsv \
     -o picrust2_out \
     -p 100
    
     #This will:
     #* Place sequences in the reference tree (using EPA-NG),
     #* Predict gene family abundances (e.g., EC, KO, PFAM, TIGRFAM),
     #* Predict pathway abundances.
    
     #In current PICRUSt2 (with picrust2_pipeline.py), you do not run hsp.py separately.
     #Instead, picrust2_pipeline.py internally runs the HSP step for all functional categories automatically. It outputs all the prediction files (16S_predicted_and_nsti.tsv.gz, COG_predicted.tsv.gz, PFAM_predicted.tsv.gz, KO_predicted.tsv.gz, EC_predicted.tsv.gz, TIGRFAM_predicted.tsv.gz, PHENO_predicted.tsv.gz) in the output directory.
    
     mkdir picrust2_out_advanced; cd picrust2_out_advanced
     #If you still want to run hsp.py manually (advanced use / debugging), the commands correspond directly:
     hsp.py -i 16S -t ../picrust2_out/out.tre -o 16S_predicted_and_nsti.tsv.gz -p 100 -n
     hsp.py -i COG -t ../picrust2_out/out.tre -o COG_predicted.tsv.gz -p 100
     hsp.py -i PFAM -t ../picrust2_out/out.tre -o PFAM_predicted.tsv.gz -p 100
     hsp.py -i KO -t ../picrust2_out/out.tre -o KO_predicted.tsv.gz -p 100
     hsp.py -i EC -t ../picrust2_out/out.tre -o EC_predicted.tsv.gz -p 100
     hsp.py -i TIGRFAM -t ../picrust2_out/out.tre -o TIGRFAM_predicted.tsv.gz -p 100
     hsp.py -i PHENO -t ../picrust2_out/out.tre -o PHENO_predicted.tsv.gz -p 100
  2. Metagenome prediction per functional category (if needed separately)

     #cd picrust2_out_advanced
     metagenome_pipeline.py -i ../qiime2_export/feature-table.biom -m 16S_predicted_and_nsti.tsv.gz -f COG_predicted.tsv.gz -o COG_metagenome_out --strat_out
     metagenome_pipeline.py -i ../qiime2_export/feature-table.biom -m 16S_predicted_and_nsti.tsv.gz -f EC_predicted.tsv.gz -o EC_metagenome_out --strat_out
     metagenome_pipeline.py -i ../qiime2_export/feature-table.biom -m 16S_predicted_and_nsti.tsv.gz -f KO_predicted.tsv.gz -o KO_metagenome_out --strat_out
     metagenome_pipeline.py -i ../qiime2_export/feature-table.biom -m 16S_predicted_and_nsti.tsv.gz -f PFAM_predicted.tsv.gz -o PFAM_metagenome_out --strat_out
     metagenome_pipeline.py -i ../qiime2_export/feature-table.biom -m 16S_predicted_and_nsti.tsv.gz -f TIGRFAM_predicted.tsv.gz -o TIGRFAM_metagenome_out --strat_out
    
     # Add descriptions in gene family tables
     add_descriptions.py -i COG_metagenome_out/pred_metagenome_unstrat.tsv.gz -m COG -o COG_metagenome_out/pred_metagenome_unstrat_descrip.tsv.gz
     add_descriptions.py -i EC_metagenome_out/pred_metagenome_unstrat.tsv.gz -m EC -o EC_metagenome_out/pred_metagenome_unstrat_descrip.tsv.gz
     add_descriptions.py -i KO_metagenome_out/pred_metagenome_unstrat.tsv.gz -m KO -o KO_metagenome_out/pred_metagenome_unstrat_descrip.tsv.gz   # EC and METACYC is a pair, EC for gene_annotation and METACYC for pathway_annotation
     add_descriptions.py -i PFAM_metagenome_out/pred_metagenome_unstrat.tsv.gz -m PFAM -o PFAM_metagenome_out/pred_metagenome_unstrat_descrip.tsv.gz
     add_descriptions.py -i TIGRFAM_metagenome_out/pred_metagenome_unstrat.tsv.gz -m TIGRFAM -o TIGRFAM_metagenome_out/pred_metagenome_unstrat_descrip.tsv.gz
  3. Pathway inference (MetaCyc pathways from EC numbers)

     #cd picrust2_out_advanced
     pathway_pipeline.py -i EC_metagenome_out/pred_metagenome_contrib.tsv.gz -o EC_pathways_out -p 100
     pathway_pipeline.py -i EC_metagenome_out/pred_metagenome_unstrat.tsv.gz -o EC_pathways_out_per_seq -p 100 --per_sequence_contrib --per_sequence_abun EC_metagenome_out/seqtab_norm.tsv.gz --per_sequence_function EC_predicted.tsv.gz
     #ERROR due to missing .../pathway_mapfiles/KEGG_pathways_to_KO.tsv
     pathway_pipeline.py -i COG_metagenome_out/pred_metagenome_contrib.tsv.gz -o KEGG_pathways_out -p 100 --no_regroup --map /home/jhuang/anaconda3/envs/picrust2/lib/python3.6/site-packages/picrust2/default_files/pathway_mapfiles/KEGG_pathways_to_KO.tsv
     pathway_pipeline.py -i KO_metagenome_out/pred_metagenome_strat.tsv.gz -o KEGG_pathways_out -p 100 --no_regroup --map /home/jhuang/anaconda3/envs/picrust2/lib/python3.6/site-packages/picrust2/default_files/pathway_mapfiles/KEGG_pathways_to_KO.tsv
    
     add_descriptions.py -i EC_pathways_out/path_abun_unstrat.tsv.gz -m METACYC -o EC_pathways_out/path_abun_unstrat_descrip.tsv.gz
     gunzip EC_pathways_out/path_abun_unstrat_descrip.tsv.gz
    
     #Error - no rows remain after regrouping input table. The default pathway and regroup mapfiles are meant for EC numbers. Note that KEGG pathways are not supported since KEGG is a closed-source database, but you can input custom pathway mapfiles if you have access. If you are using a custom function database did you mean to set the --no-regroup flag and/or change the default pathways mapfile used?
     #If ERROR --> USE the METACYC for downstream analyses!!!
    
     #ERROR due to missing .../description_mapfiles/KEGG_pathways_info.tsv.gz
     #add_descriptions.py -i KO_pathways_out/path_abun_unstrat.tsv.gz -o KEGG_pathways_out/path_abun_unstrat_descrip.tsv.gz --custom_map_table /home/jhuang/anaconda3/envs/picrust2/lib/python3.6/site-packages/picrust2/default_files/description_mapfiles/KEGG_pathways_info.tsv.gz
    
     #NOTE: Target-analysis for the pathway "mixed acid fermentation"
  4. Visualization

     #7.1 STAMP
     #https://github.com/picrust/picrust2/wiki/STAMP-example
     #Note that STAMP can only be opened under Windows
    
     # It needs two files: path_abun_unstrat_descrip.tsv.gz as "Profile file" and metadata.tsv as "Group metadata file".
     cp ~/DATA/Data_Karoline_16S_2025/picrust2_out_advanced/EC_pathways_out/path_abun_unstrat_descrip.tsv ~/DATA/Access_to_Win7/
    
     cut -d$'\t' -f1 qiime2_metadata.tsv > 1
     cut -d$'\t' -f3 qiime2_metadata.tsv > 3
     cut -d$'\t' -f5-6 qiime2_metadata.tsv > 5_6
     paste -d$'\t' 1 3 > 1_3
     paste -d$'\t' 1_3 5_6 > metadata.tsv
     #SampleID --> SampleID
     SampleID        Group   pre_post        Sex_age
     sample-A1       Group1  3d.post.stroke  male.aged
     sample-A2       Group1  3d.post.stroke  male.aged
     sample-A3       Group1  3d.post.stroke  male.aged
     cp ~/DATA/Data_Karoline_16S_2025/metadata.tsv ~/DATA/Access_to_Win7/
    
     #7.2. ALDEx2
     https://bioconductor.org/packages/release/bioc/html/ALDEx2.html

Under env (qiime2-amplicon-2023.9)

  1. (NOT_NEEDED) Convert pathway output to BIOM and re-import to QIIME2 gunzip picrust2_out/pathways_out/path_abun_unstrat.tsv.gz biom convert \ -i picrust2_out/pathways_out/path_abun_unstrat.tsv \ -o picrust2_out/path_abun_unstrat.biom \ –table-type=”Pathway table” \ –to-hdf5

     qiime tools import \
     --input-path picrust2_out/path_abun_unstrat.biom \
     --type 'FeatureTable[Frequency]' \
     --input-format BIOMV210Format \
     --output-path path_abun.qza
    
     #qiime tools export --input-path path_abun.qza --output-path exported_path_abun
     #qiime tools peek path_abun.qza
     echo "✅ PICRUSt2 pipeline complete. Output in: picrust2_out"

For QIIME1

  1. Environment Setup: It sets up a Conda environment named picrust2, using the conda create command and then activates this environment using conda activate picrust2.

     #https://github.com/picrust/picrust2/wiki/PICRUSt2-Tutorial-(v2.2.0-beta)#minimum-requirements-to-run-full-tutorial
     mamba create -n picrust2 -c bioconda -c conda-forge picrust2    #2.5.3  #=2.2.0_b
     mamba activate /home/jhuang/miniconda3/envs/picrust2
  2. Data Preparation: The script creates a new directory called picrust2_out, then enters it using mkdir and cd commands. It then identifies input files that are needed for the analysis: metadata.tsv, seqs.fna, table.biom. The biom commands are used to inspect and convert the BIOM format files.

     mkdir picrust2_out_2024_2
     cd picrust2_out_2024_2
    
     # Identifying input data
     # Note: Replace the paths and filenames with your actual data if different
     # metadata.tsv == ../map_corrected.txt
     # seqs.fna     == ../clustering/seqs.fna
     # table.biom   == ../core_diversity_e42369/table_even42369.biom
    
     # Inspect and convert the BIOM format files
     biom head -i ../core_diversity_e42369/table_even42369.biom
     biom summarize-table -i ../core_diversity_e42369/table_even42369.biom
     biom convert -i ../core_diversity_e42369/table_even42369.biom -o table_even42369.tsv --to-tsv
     #For QIIME2: exported_rarefied_table/feature-table.tsv
  3. Running PiCRUST2: The place_seqs.py command aligns the input sequences to a reference tree. The hsp.py commands generate hidden state prediction for multiple functional categories.

     #insert reads into reference tree using EPA-NG
     cp ../clustering/rep_set.fna ./
     grep ">" rep_set.fna | wc -l  #40990
     vim table_even42369.tsv       #40596-2
    
     samtools faidx rep_set.fna
     cut -f1-1 table_even42369.tsv > table_even42369.id
     #manually modify table_even42369.id by replacing "\n" with " >> seqs.fna\nsamtools faidx rep_set.fna "
     run table_even42369.id
    
     rm -rf intermediate/
     place_seqs.py -s seqs.fna -o out.tre -p 4 --intermediate intermediate/place_seqs
    
     #castor: Efficient Phylogenetics on Large Trees
     #https://github.com/picrust/picrust2/wiki/Hidden-state-prediction
    
     hsp.py -i 16S -t out.tre -o 16S_predicted_and_nsti.tsv.gz -p 100 -n
     hsp.py -i COG -t out.tre -o COG_predicted.tsv.gz -p 100
     hsp.py -i PFAM -t out.tre -o PFAM_predicted.tsv.gz -p 15
     hsp.py -i KO -t out.tre -o KO_predicted.tsv.gz -p 15
     hsp.py -i EC -t out.tre -o EC_predicted.tsv.gz -p 15
     hsp.py -i TIGRFAM -t out.tre -o TIGRFAM_predicted.tsv.gz -p 15
     hsp.py -i PHENO -t out.tre -o PHENO_predicted.tsv.gz -p 15
    
     #>In this table the predicted copy number of all Enzyme Classification (EC) numbers is shown for each ASV. The NSTI values per ASV are not in this table since we did not specify the -n option. EC numbers are a type of gene family defined based on the chemical reactions they catalyze. For instance, EC:1.1.1.1 corresponds to alcohol dehydrogenase. In this tutorial we are focusing on EC numbers since they can be used to infer MetaCyc pathway levels (see below).
    
     zless -S EC_predicted.tsv.gz
     sequence        EC:1.1.1.1      EC:1.1.1.10     EC:1.1.1.100    ...
     20e568023c10eaac834f1c110aacea18        2       0       3    ...
     23fe12a325dfefcdb23447f43b6b896e        0       0       1    ...
     288c8176059111c4c7fdfb0cd5afce64        1       0       1    ...
     ...
    
     ##Why import the tsv file to MyData?
     #MyData <- read.csv(file="./COG_predicted.tsv", header=TRUE, sep="\t", row.names=1)   #6806 4598  e.g. COG5665
     #MyData <- read.csv(file="./PFAM_predicted.tsv", header=TRUE, sep="\t", row.names=1)  #6806 11089 e.g. PF17225
     #MyData <- read.csv(file="./KO_predicted.tsv", header=TRUE, sep="\t", row.names=1)    #6806 10543 e.g. K19791
     #MyData <- read.csv(file="./EC_predicted.tsv", header=TRUE, sep="\t", row.names=1)    #6806 2913  e.g. EC.6.6.1.2
     #MyData <- read.csv(file="./16S_predicted.tsv", header=TRUE, sep="\t", row.names=1)   #6806    1     e.g. X16S_rRNA_Count
     #MyData <- read.csv(file="./TIGRFAM_predicted.tsv", header=TRUE, sep="\t", row.names=1)  #6806 4287  e.g. TIGR04571
     #MyData <- read.csv(file="./PHENO_predicted.tsv", header=TRUE, sep="\t", row.names=1)    #6806   41  e.g. Use_of_nitrate_as_electron_acceptor, Xylose_utilizing
  4. The metagenome_pipeline.py commands perform metagenomic prediction for several functional categories. Predicted gene families weighted by the relative abundance of ASVs in their community. In other words, we are interested in inferring the metagenomes of the communities.

     #Generate metagenome predictions using EC numbers https://en.wikipedia.org/wiki/List_of_enzymes#Category:EC_1.1_(act_on_the_CH-OH_group_of_donors)
     metagenome_pipeline.py -i ../core_diversity_e42369/table_even42369.biom -m 16S_predicted_and_nsti.tsv.gz -f COG_predicted.tsv.gz -o COG_metagenome_out --strat_out
     metagenome_pipeline.py -i ../core_diversity_e42369/table_even42369.biom -m 16S_predicted_and_nsti.tsv.gz -f EC_predicted.tsv.gz -o EC_metagenome_out --strat_out
     metagenome_pipeline.py -i ../core_diversity_e42369/table_even42369.biom -m 16S_predicted_and_nsti.tsv.gz -f KO_predicted.tsv.gz -o KO_metagenome_out --strat_out
     metagenome_pipeline.py -i ../core_diversity_e42369/table_even42369.biom -m 16S_predicted_and_nsti.tsv.gz -f PFAM_predicted.tsv.gz -o PFAM_metagenome_out --strat_out
     metagenome_pipeline.py -i ../core_diversity_e42369/table_even42369.biom -m 16S_predicted_and_nsti.tsv.gz -f TIGRFAM_predicted.tsv.gz -o TIGRFAM_metagenome_out --strat_out
  5. Pathway-level inference: By default this script infers MetaCyc pathway abundances based on EC number abundances, although different gene families and pathways can also be optionally specified. This script performs a number of steps by default, which are based on the approach implemented in HUMAnN2:

     #- Regroups EC numbers to MetaCyc reactions.
     #- Infers which MetaCyc pathways are present based on these reactions with MinPath.
     #- Calculates and returns the abundance of pathways identified as present.
    
     pathway_pipeline.py -i EC_metagenome_out/pred_metagenome_contrib.tsv.gz -o pathways_out -p 15
    
     #Note that the path of map files is under /home/jhuang/anaconda3/envs/picrust2/lib/python3.6/site-packages/picrust2/default_files/pathway_mapfiles
     pathway_pipeline.py -i COG_metagenome_out/pred_metagenome_contrib.tsv.gz -o KEGG_pathways_out -p 15 --no_regroup --map /home/jhuang/anaconda3/envs/picrust2/lib/python3.6/site-packages/picrust2/default_files/pathway_mapfiles/KEGG_pathways_to_KO.tsv
    
     #Mapping predicted KO abundances to legacy KEGG pathways (with stratified output that represents contributions to community-wide abundances):
     pathway_pipeline.py -i KO_metagenome_out/pred_metagenome_strat.tsv.gz -o KEGG_pathways_out --no_regroup --map /home/jhuang/anaconda3/envs/picrust2/lib/python3.6/site-packages/picrust2/default_files/pathway_mapfiles/KEGG_pathways_to_KO.tsv
    
     #Map EC numbers to MetaCyc pathways and get stratified output corresponding to contribution of predicted gene family abundances within each predicted genome:
     pathway_pipeline.py -i EC_metagenome_out/pred_metagenome_unstrat.tsv.gz -o pathways_out_per_seq --per_sequence_contrib --per_sequence_abun EC_metagenome_out/seqtab_norm.tsv.gz --per_sequence_function EC_predicted.tsv.gz
  6. Add functional descriptions: Finally, it can be useful to have a description of each functional id in the output abundance tables. The below commands will add these descriptions as new column in gene family and pathway abundance tables

     #--6.1. Add descriptions in gene family tables
     add_descriptions.py -i COG_metagenome_out/pred_metagenome_unstrat.tsv.gz -m COG -o COG_metagenome_out/pred_metagenome_unstrat_descrip.tsv.gz
     add_descriptions.py -i EC_metagenome_out/pred_metagenome_unstrat.tsv.gz -m EC -o EC_metagenome_out/pred_metagenome_unstrat_descrip.tsv.gz
     add_descriptions.py -i KO_metagenome_out/pred_metagenome_unstrat.tsv.gz -m KO -o KO_metagenome_out/pred_metagenome_unstrat_descrip.tsv.gz   # EC and METACYC is a pair, EC for gene_annotation and METACYC for pathway_annotation
     add_descriptions.py -i PFAM_metagenome_out/pred_metagenome_unstrat.tsv.gz -m PFAM -o PFAM_metagenome_out/pred_metagenome_unstrat_descrip.tsv.gz
     add_descriptions.py -i TIGRFAM_metagenome_out/pred_metagenome_unstrat.tsv.gz -m TIGRFAM -o TIGRFAM_metagenome_out/pred_metagenome_unstrat_descrip.tsv.gz
    
     #--6.2. Add descriptions in pathway abundance tables
     add_descriptions.py -i pathways_out/path_abun_unstrat.tsv.gz -m METACYC -o pathways_out/path_abun_unstrat_descrip.tsv.gz
     gunzip path_abun_unstrat_descrip.tsv.gz
    
     #Error - no rows remain after regrouping input table. The default pathway and regroup mapfiles are meant for EC numbers. Note that KEGG pathways are not supported since KEGG is a closed-source database, but you can input custom pathway mapfiles if you have access. If you are using a custom function database did you mean to set the --no-regroup flag and/or change the default pathways mapfile used?
     #If ERROR --> USE the METACYC for downstream analyses!!!
    
     add_descriptions.py -i pathways_out/path_abun_unstrat.tsv.gz -o KEGG_pathways_out/path_abun_unstrat_descrip.tsv.gz --custom_map_table /home/jhuang/anaconda3/envs/picrust2/lib/python3.6/site-packages/picrust2/default_files/description_mapfiles/KEGG_pathways_info.tsv.gz
  7. Visualization

     #7.1 STAMP
     #https://github.com/picrust/picrust2/wiki/STAMP-example
     conda deactivate
     conda install -c bioconda stamp
    
     #conda install -c bioconda stamp
     #sudo pip install pyqi
     #sudo apt-get install libblas-dev liblapack-dev gfortran
     #sudo apt-get install freetype* python-pip python-dev python-numpy python-scipy python-matplotlib
     #sudo pip install STAMP
     #conda install -c bioconda stamp
    
     conda create -n stamp -c bioconda/label/cf201901 stamp
     brew install pyqt
    
     #DEBUG the environment
     conda install pyqt=4
     #conda install icu=56
    
     e.g. path_abun_unstrat_descrip.tsv.gz and metadata.tsv from the tutorial)
     cut -d$'\t' -f1 map_corrected.txt > 1
     cut -d$'\t' -f5 map_corrected.txt > 5
     cut -d$'\t' -f6 map_corrected.txt > 6
     paste -d$'\t' 1 5 > 1_5
     paste -d$'\t' 1_5 6 > metadata.tsv
     #SampleID --> SampleID
     SampleID    Facility    Genotype
     100CHE6KO   PaloAlto    KO
     101CHE6WT   PaloAlto    WT
    
     #7.2. ALDEx2
     https://bioconductor.org/packages/release/bioc/html/ALDEx2.html

Viral genome assembly and recombination analysis for Data_Sophie_HDV_Sequences

  1. Prepare input raw data

     /mnt/md1/DATA/Data_Sophie_HDV_Sequences/raw_data
     for f in *_R[12]_001.fastq.gz; do newname="$(echo "$f" | awk -F_ '{print $1 "_" $4 ".fastq.gz"}')"; echo mv "$f" "$newname"; done
     for f in *_R[12]_001.fastq.gz; do newname="$(echo "$f" | awk -F_ '{print $1 "_" $4 ".fastq.gz"}')"; mv "$f" "$newname"; done
  2. Call variant calling using snippy

     ln -s ~/Tools/bacto/db/ .;
     ln -s ~/Tools/bacto/envs/ .;
     ln -s ~/Tools/bacto/local/ .;
     cp ~/Tools/bacto/Snakefile .;
     cp ~/Tools/bacto/bacto-0.1.json .;
     cp ~/Tools/bacto/cluster.json .;
     #download CU459141.gb from GenBank
     mv ~/Downloads/sequence\(2\).gb db/NC_001653.gb
     #setting the following in bacto-0.1.json
         "fastqc": false,
         "taxonomic_classifier": false,
         "assembly": true,
         "typing_ariba": false,
         "typing_mlst": true,
         "pangenome": true,
         "variants_calling": true,
         "phylogeny_fasttree": true,
         "phylogeny_raxml": true,
         "recombination": false, (due to gubbins-error set false)
         "genus": "Alphacoronavirus",
         "kingdom": "Viruses",
         "species": "Human coronavirus 229E",
         "mykrobe": {
             "species": "corona"
         },
         "reference": "db/PP810610.gb"
     mamba activate /home/jhuang/miniconda3/envs/bengal3_ac3
     (bengal3_ac3) /home/jhuang/miniconda3/envs/snakemake_4_3_1/bin/snakemake --printshellcmds
  3. Prepare virus database

     # ---- Date is 16.06.2025. ----
     #Taxonomy ID: 12475
     esearch -db nucleotide -query "txid12475[Organism:exp]" | efetch -format fasta -email j.huang@uke.de > genome_12475_ncbi.fasta
     python ~/Scripts/filter_fasta.py genome_12475_ncbi.fasta complete_genome_12475_ncbi.fasta  #4208-->760
    
     #https://de.wikipedia.org/wiki/Hepatitis-D-Virus
     Hepatitis delta virus, complete genome
     NCBI Reference Sequence: NC_001653.2
  4. (Deprecated) Calling intra-host variants using viral-ngs

     #How to run and debug the viral-ngs docker?
    
     mkdir viralngs; cd viralngs
     ln -s ~/Tools/viral-ngs_docker/Snakefile Snakefile
     ln -s  ~/Tools/viral-ngs_docker/bin bin
     cp  ~/DATA_D/Data_Pietschmann_229ECoronavirus_Mutations_2024/refsel.acids refsel.acids
     cp  ~/DATA_D/Data_Pietschmann_229ECoronavirus_Mutations_2024/lastal.acids lastal.acids
     cp  ~/DATA_D/Data_Pietschmann_229ECoronavirus_Mutations_2024/config.yaml config.yaml
     cp  ~/DATA_D/Data_Pietschmann_229ECoronavirus_Mutations_2024/samples-runs.txt samples-runs.txt
     cp  ~/DATA_D/Data_Pietschmann_229ECoronavirus_Mutations_2024/samples-depletion.txt samples-depletion.txt
     cp  ~/DATA_D/Data_Pietschmann_229ECoronavirus_Mutations_2024/samples-metagenomics.txt samples-metagenomics.txt
     cp  ~/DATA_D/Data_Pietschmann_229ECoronavirus_Mutations_2024/samples-assembly.txt samples-assembly.txt
     cp  ~/DATA_D/Data_Pietschmann_229ECoronavirus_Mutations_2024/samples-assembly-failures.txt samples-assembly-failures.txt
    
     # Adapt the sample-*.txt
     mkdir viralngs/data
     mkdir viralngs/data/00_raw
    
     mkdir bams
     ref_fa="NC_001653.fasta";
    
     for sample in 010  048  073  083  093  1021  104  108  129  253  282  301  357  383  405  444  446  450  494  503  69   738  81  879  94   995    HE290  HE554  HE695  HSVM 020  068  079  090  097  103   107  109  141  279  293  341  370  394  442  445  449  478  497  550  691  771  82  88   973  HE284  HE511  HE566  HE748; do
         bwa index ${ref_fa}; \
         bwa mem -M -t 16 ${ref_fa} trimmed/${sample}_trimmed_P_1.fastq trimmed/${sample}_trimmed_P_2.fastq | samtools view -bS - > bams/${sample}_genome_alignment.bam; \
     done
     conda activate viral-ngs4
    
     for sample in 010  048  073  083  093  1021  104  108  129  253  282  301  357  383  405  444  446  450  494  503  69   738  81  879  94   995    HE290  HE554  HE695  HSVM 020  068  079  090  097  103   107  109  141  279  293  341  370  394  442  445  449  478  497  550  691  771  82  88   973  HE284  HE511  HE566  HE748; do
         picard AddOrReplaceReadGroups I=bams/${sample}_genome_alignment.bam O=~/DATA/Data_Sophie_HDV_Sequences/viralngs/data/00_raw/${sample}.bam SORT_ORDER=coordinate CREATE_INDEX=true RGPL=illumina RGID=$sample RGSM=$sample RGLB=standard RGPU=$sample VALIDATION_STRINGENCY=LENIENT; \
     done
     conda deactivate
    
     # Activate the docker viralngs environment
     docker run -it --rm -v /mnt/md1/DATA/Data_Sophie_HDV_Sequences/viralngs:/work -v /home/jhuang/Tools/viral-ngs_docker:/home/jhuang/Tools/viral-ngs_docker -v /home/jhuang/REFs:/home/jhuang/REFs -v /home/jhuang/Tools/GenomeAnalysisTK-3.6:/home/jhuang/Tools/GenomeAnalysisTK-3.6 -v /home/jhuang/Tools/novocraft_v3:/home/jhuang/Tools/novocraft_v3 -v /usr/local/bin/gatk:/usr/local/bin/gatk   own_viral_ngs_gap2seq bash
     cd /work
    
     # -- ! Firstly manully run for generating all files ${sample}.cleaned.bam and ${sample}.taxfilt.bam in 01_cleaned and 01_per_sample
     for sample in 010  048  073  083  093  1021  104  108  129  253  282  301  357  383  405  444  446  450  494  503  69   738  81  879  94   995    HE290  HE554  HE695  HSVM 020  068  079  090  097  103   107  109  141  279  293  341  370  394  442  445  449  478  497  550  691  771  82  88   973  HE284  HE511  HE566  HE748; do
    
         # -- generating data/01_cleaned/${sample}.cleaned.bam --
         bin/taxon_filter.py deplete data/00_raw/${sample}.bam tmp/01_cleaned/${sample}.raw.bam tmp/01_cleaned/${sample}.bmtagger_depleted.bam tmp/01_cleaned/${sample}.rmdup.bam data/01_cleaned/${sample}.cleaned.bam --bmtaggerDbs /home/jhuang/REFs/viral_ngs_dbs/bmtagger_dbs_remove/metagenomics_contaminants_v3 /home/jhuang/REFs/viral_ngs_dbs/bmtagger_dbs_remove/GRCh37.68_ncRNA-GRCh37.68_transcripts-HS_rRNA_mitRNA /home/jhuang/REFs/viral_ngs_dbs/bmtagger_dbs_remove/hg19 --blastDbs /home/jhuang/REFs/viral_ngs_dbs/blast_dbs_remove/hybsel_probe_adapters /home/jhuang/REFs/viral_ngs_dbs/blast_dbs_remove/metag_v3.ncRNA.mRNA.mitRNA.consensus --threads 60 --srprismMemory 14250 --JVMmemory 50g
    
         # -- data/01_cleaned/073.cleaned.bam --> data/01_cleaned/073.taxfilt.bam --
         bin/taxon_filter.py filter_lastal_bam data/01_cleaned/${sample}.cleaned.bam lastal_db/lastal.fasta data/01_cleaned/${sample}.taxfilt.bam
         bin/read_utils.py bwamem_idxstats data/01_cleaned/${sample}.cleaned.bam /home/jhuang/REFs/viral_ngs_dbs/spikeins/ercc_spike-ins.fasta --outStats reports/spike_count/${sample}.spike_count.txt --minScoreToFilter 60
    
         fastqc -f bam data/01_cleaned/${sample}.cleaned.bam -o reports/fastqc/${sample}
         unzip reports/fastqc/${sample}/${sample}.cleaned_fastqc.zip -d reports/fastqc/${sample}
         fastqc -f bam data/01_cleaned/${sample}.taxfilt.bam -o reports/fastqc/${sample}
         unzip reports/fastqc/${sample}/${sample}.taxfilt_fastqc.zip -d reports/fastqc/${sample}
    
         # -- data/01_cleaned/${sample}.cleaned.bam --> data/01_per_sample/${sample}.cleaned.bam --
         bin/read_utils.py merge_bams data/01_cleaned/${sample}.cleaned.bam tmp/01_cleaned/${sample}.cleaned.bam --picardOptions SORT_ORDER=queryname
         bin/read_utils.py rmdup_mvicuna_bam tmp/01_cleaned/${sample}.cleaned.bam data/01_per_sample/${sample}.cleaned.bam --JVMmemory 30g
    
         # -- data/01_cleaned/${sample}.taxfilt.bam --> data/01_per_sample/${sample}.taxfilt.bam --
         bin/read_utils.py merge_bams data/01_cleaned/${sample}.taxfilt.bam tmp/01_cleaned/${sample}.taxfilt.bam --picardOptions SORT_ORDER=queryname
         bin/read_utils.py rmdup_mvicuna_bam tmp/01_cleaned/${sample}.taxfilt.bam data/01_per_sample/${sample}.taxfilt.bam --JVMmemory 30g
     done
    
     # -- ! Secondly --
     #If direct use         snakemake --directory /work --printshellcmds --cores 40, has the following error, using bash commands instead.
     #Error in rule orient_and_impute:
     #jobid: 0
     #output: tmp/02_assembly/HE290.assembly3-modify.fasta
     #DEBUG: --memLimitGb 12 --> --memLimitGb 960, if threads=60: 256M / 58G, how big the memory needed when threads=120: 492M / 468G.
     ##ASSEMBLY_1_SPADES: data/01_per_sample/010.taxfilt.bam ----> 010.assembly1-spades.fasta in tmp/02_assembly/
    
     for sample in 010  048  073  083  093  1021  104  108  129  253  282  301  357  383  405  444  446  450  494  503  69   738  81  879  94   995    HE290  HE554  HE695  HSVM 020  068  079  090  097  103   107  109  141  279  293  341  370  394  442  445  449  478  497  550  691  771  82  88   973  HE284  HE511  HE566  HE748; do
         bin/assembly.py assemble_spades data/01_per_sample/${sample}.taxfilt.bam /home/jhuang/REFs/viral_ngs_dbs/trim_clip/contaminants.fasta tmp/02_assembly/${sample}.assembly1-spades.fasta --nReads 10000000 --threads 120 --memLimitGb 960
     done
    
     #ASSEMBLY_2_SCAFFOLDED: 010.assembly1-spades.fasta ----> 010.assembly2-scaffolded[_ref].fasta + 010.assembly2-alternate_sequences.fasta
     for sample in 010  048  073  083  093  1021  104  108  129  253  282  301  357  383  405  444  446  450  494  503  69   738  81  879  94   995    HE290  HE554  HE695  HSVM 020  068  079  090  097  103   107  109  141  279  293  341  370  394  442  445  449  478  497  550  691  771  82  88   973  HE284  HE511  HE566  HE748; do
         bin/assembly.py order_and_orient tmp/02_assembly/${sample}.assembly1-spades.fasta refsel_db/refsel.fasta tmp/02_assembly/${sample}.assembly2-scaffolded.fasta --min_pct_contig_aligned 0.05 --outAlternateContigs tmp/02_assembly/${sample}.assembly2-alternate_sequences.fasta --nGenomeSegments 1 --outReference tmp/02_assembly/${sample}.assembly2-scaffold_ref.fasta --threads 60
     done
    
     #DEBUG: the tool gap2seq is missing, installing package gap2seq to root@bcd2c36b083c:/opt/miniconda/envs/viral-ngs-env/bin
     #
     #        #https://www.cs.helsinki.fi/u/lmsalmel/Gap2Seq/
     #        apt-get update
     #        apt-get install -y cmake
     #        mkdir build;  cd build;  cmake ..;  make
     #
     #        #cp /work/Gap2Seq-2.1/build/Gap2Seq /opt/miniconda/envs/viral-ngs-env/bin/gap2seq
     #        cp /work/Gap2Seq-2.1/build/Gap2Seq.sh /opt/miniconda/envs/viral-ngs-env/bin/
     #        cp /work/Gap2Seq-2.1/build/Gap2Seq /opt/miniconda/envs/viral-ngs-env/bin/
     #        cp /work/Gap2Seq-2.1/build/GapCutter /opt/miniconda/envs/viral-ngs-env/bin/
     #        cp /work/Gap2Seq-2.1/build/GapMerger /opt/miniconda/envs/viral-ngs-env/bin/
     #        cp -r /work/Gap2Seq-2.1/build/ext /opt/miniconda/envs/viral-ngs-env/bin/
     #        Gap2Seq.sh --help
     #
     #        #MOFIFIED1 in bin/tools/gap2seq.py
     #        #TOOL_VERSION = '2.1'
     #        TOOL_VERSION = '3.1.1a2'
     #
     #        root@544789adb8b6:/work# for sample in 010; do             bin/assembly.py gapfill_gap2seq tmp/02_assembly/${sample}.assembly2-scaffolded.fasta data/01_per_sample/${sample}.cleaned.bam tmp/02_assembly/${sample}.assembly2-gapfilled.fasta --memLimitGb 960 --maskErrors --randomSeed 0 --loglevel DEBUG;         done
     #        2025-06-20 11:12:41,165 - gap2seq:44:execute - DEBUG - running gap2seq: /opt/miniconda/envs/viral-ngs-env/bin/Gap2Seq.sh -scaffolds /work/tmp/02_assembly/010.assembly2-scaffolded.fasta -filled /tmp/tmp-assembly-gapfill_gap2seq-vz_n3tkp/tmpkt8508du_gap2seq_dir/gap2seq-filled.s3.k90.fasta -reads /tmp/tmp-assembly-gapfill_gap2seq-vz_n3tkp/tmpfj62s6n5.1.fq,/tmp/tmp-assembly-gapfill_gap2seq-vz_n3tkp/tmpu5eu1n1r.2.fq -all-upper -verbose -solid 3 -k 90 -nb-cores 0 -max-mem 960 -randseed 0
     #        /opt/miniconda/envs/viral-ngs-env/bin/Gap2Seq.sh: Unrecognized option -randseed
     #
     #        #MODIFIED2 in bin/tools/gap2seq.py: delete 'randseed=random_seed' in solid=solid_kmer_threshold, k=kmer_size, nb_cores=threads, max_mem=mem_limit_gb, randseed=random_seed) so that solid=solid_kmer_threshold, k=kmer_size, nb_cores=threads, max_mem=mem_limit_gb)
     #
     #        docker commit 3f9f9507ab31 viral_ngs_with_gap2seq
     #        docker image ls or docker images
     #
     #        #NOTE that the image cannot be deleted, since linke to other images!
     #        docker rmi own_viral_ngs_with_gap2seq
     #        #Error response from daemon: conflict: unable to remove repository reference "own_viral_ngs_gap2seq" (must force) - container 3f9f9507ab31 is using its referenced image 7ffc275c57cc
    
     #NOTE: --memLimitGb 12 --> --memLimitGb 960
     #ASSEMBLY_2_GAPFILLED: 010.assembly2-scaffolded.fasta + data/01_per_sample/010.cleaned.bam ----> 010.assembly2-gapfilled.fasta
     for sample in 010  048  073  083  093  1021  104  108  129  253  282  301  357  383  405  444  446  450  494  503  69   738  81  879  94   995    HE290  HE554  HE695  HSVM 020  068  079  090  097  103   107  109  141  279  293  341  370  394  442  445  449  478  497  550  691  771  82  88   973  HE284  HE511  HE566  HE748; do
         bin/assembly.py gapfill_gap2seq tmp/02_assembly/${sample}.assembly2-scaffolded.fasta data/01_per_sample/${sample}.cleaned.bam tmp/02_assembly/${sample}.assembly2-gapfilled.fasta --memLimitGb 960 --maskErrors --randomSeed 0 --loglevel DEBUG
     done
    
     #ASSEMBLY_3_MOFIFY: 010.assembly2-gapfilled.fasta + 010.assembly2-scaffold_ref.fasta ----> 010.assembly3-modify.[fasta|fasta.fai|dict|nix]
     for sample in 010  048  073  083  093  1021  104  108  129  253  282  301  357  383  405  444  446  450  494  503  69   738  81  879  94   995    HE290  HE554  HE695  HSVM 020  068  079  090  097  103   107  109  141  279  293  341  370  394  442  445  449  478  497  550  691  771  82  88   973  HE284  HE511  HE566  HE748; do
         bin/assembly.py impute_from_reference tmp/02_assembly/${sample}.assembly2-gapfilled.fasta tmp/02_assembly/${sample}.assembly2-scaffold_ref.fasta tmp/02_assembly/${sample}.assembly3-modify.fasta --newName ${sample} --replaceLength 55 --minLengthFraction 0.05 --minUnambig 0.05 --index
     done
    
     #ASSEMBLY_4_REFINED: 010.assembly3-modify.fasta + data/01_per_sample/010.cleaned.bam ----> 010.assembly4-refined.[fasta|fasta.fai|dict|nix] + 010.assembly3.[vcf.gz|vcf.gz.tbi]
     for sample in 010  048  073  083  093  1021  104  108  129  253  282  301  357  383  405  444  446  450  494  503  69   738  81  879  94   995    HE290  HE554  HE695  HSVM 020  068  079  090  097  103   107  109  141  279  293  341  370  394  442  445  449  478  497  550  691  771  82  88   973  HE284  HE511  HE566  HE748; do
         bin/assembly.py refine_assembly tmp/02_assembly/${sample}.assembly3-modify.fasta data/01_per_sample/${sample}.cleaned.bam tmp/02_assembly/${sample}.assembly4-refined.fasta --outVcf tmp/02_assembly/${sample}.assembly3.vcf.gz --min_coverage 2 --novo_params '-r Random -l 20 -g 40 -x 20 -t 502' --threads 60
     done
    
     #ASSEMBLY_5_REFINED2_GENERATE_ASSEMBLY_IN_DATA_DIR: tmp/02_assembly/010.assembly4-refined.fasta + data/01_per_sample/010.cleaned.bam ----> data/02_assembly/010.[fasta|fasta.fai|dict|nix] + tmp/02_assembly/010.assembly4.[vcf.gz|vcf.gz.tbi]
     for sample in 010  048  073  083  093  1021  104  108  129  253  282  301  357  383  405  444  446  450  494  503  69   738  81  879  94   995    HE290  HE554  HE695  HSVM 020  068  079  090  097  103   107  109  141  279  293  341  370  394  442  445  449  478  497  550  691  771  82  88   973  HE284  HE511  HE566  HE748; do
         bin/assembly.py refine_assembly tmp/02_assembly/${sample}.assembly4-refined.fasta data/01_per_sample/${sample}.cleaned.bam data/02_assembly/${sample}.fasta --outVcf tmp/02_assembly/${sample}.assembly4.vcf.gz --min_coverage 3 --novo_params '-r Random -l 20 -g 40 -x 20 -t 100' --threads 60
     done
    
     #ALIGN_CLEANED_BAM_GENERATE_MAPPED_BAM: data/02_assembly/010.fasta + data/01_per_sample/010.cleaned.bam ----> data/02_align_to_self/010.bam + data/02_align_to_self/010.mapped.bam
     for sample in 010  048  073  083  093  1021  104  108  129  253  282  301  357  383  405  444  446  450  494  503  69   738  81  879  94   995    HE290  HE554  HE695  HSVM 020  068  079  090  097  103   107  109  141  279  293  341  370  394  442  445  449  478  497  550  691  771  82  88   973  HE284  HE511  HE566  HE748; do
         bin/read_utils.py align_and_fix data/01_per_sample/${sample}.cleaned.bam data/02_assembly/${sample}.fasta --outBamAll data/02_align_to_self/${sample}.bam --outBamFiltered data/02_align_to_self/${sample}.mapped.bam --aligner novoalign --aligner_options '-r Random -l 20 -g 40 -x 20 -t 100 -k' --threads 60
     done
    
     # -- ! Thirdly set the samples-assembly.txt full
     snakemake --directory /work --printshellcmds --cores 40
     #Error in rule orient_and_impute:
     #jobid: 0
     #output: tmp/02_assembly/HE290.assembly3-modify.fast
    
                         # # ---- The snakemake pipeline contains the following remaining steps ----
                         #
                         # fastqc -f bam data/02_align_to_self/093.bam -o reports/fastqc/093
                         # unzip reports/fastqc/093/093_fastqc.zip -d reports/fastqc/093
                         # fastqc -f bam data/01_cleaned/093.cleaned.bam -o reports/fastqc/093
                         # unzip reports/fastqc/093/093.cleaned_fastqc.zip -d reports/fastqc/093
                         # fastqc -f bam data/01_cleaned/093.taxfilt.bam -o reports/fastqc/093
                         # unzip reports/fastqc/093/093.taxfilt_fastqc.zip -d reports/fastqc/093
                         #
                         # bin/intrahost.py vphaser_one_sample data/02_align_to_self/093.mapped.bam data/02_assembly/093.fasta data/04_intrahost/vphaser2.093.txt.gz --vphaserNumThreads 15 --removeDoublyMappedReads --minReadsEach 5 --maxBias 10
                         # bin/reports.py consolidate_fastqc reports/fastqc/093/taxfilt reports/summary.fastqc.taxfilt.txt
                         # bin/reports.py consolidate_fastqc reports/fastqc/093/align_to_self reports/summary.fastqc.align_to_self.txt
                         # bin/reports.py consolidate_fastqc reports/fastqc/093/cleaned reports/summary.fastqc.cleaned.txt
                         # bin/interhost.py multichr_mafft ref_genome/reference.fasta data/02_assembly/093.fasta data/03_multialign_to_ref --ep 0.123 --maxiters 1000 --preservecase --localpair --outFilePrefix aligned --sampleNameListFile data/03_multialign_to_ref/sampleNameList.txt --threads 60
                         # bin/intrahost.py merge_to_vcf ref_genome/reference.fasta data/04_intrahost/isnvs.vcf.gz --samples 093 --isnvs data/04_intrahost/vphaser2.093.txt.gz --alignments data/03_multialign_to_ref/aligned_1.fasta --strip_chr_version --parse_accession
                         # bin/interhost.py snpEff data/04_intrahost/isnvs.vcf.gz NC_001653.2 data/04_intrahost/isnvs.annot.vcf.gz j.huang@uke.de
                         # bin/intrahost.py iSNV_table data/04_intrahost/isnvs.annot.vcf.gz data/04_intrahost/isnvs.annot.txt.gz
                         # bin/reports.py consolidate_spike_count reports/spike_count reports/summary.spike_count.txt
  5. vrap-calling

     ln -s /home/jhuang/Tools/vrap/ .
     mamba activate /home/jhuang/miniconda3/envs/vrap
    
     for sample in 010  048  073  083  093  1021  104  108  129  253  282  301  357  383  405  444  446  450  494  503  69   738  81  879  94   995    HE290  HE554  HE695  HSVM 020  068  079  090  097  103   107  109  141  279  293  341  370  394  442  445  449  478  497  550  691  771  82  88   973  HE284  HE511  HE566  HE748; do
             vrap/vrap.py  -1 trimmed/${sample}_trimmed_P_1.fastq -2 trimmed/${sample}_trimmed_P_2.fastq -o vrap_${sample}  --bt2idx=/home/jhuang/REFs/genome  --host=/home/jhuang/REFs/genome.fa --virus=/mnt/md1/DATA/Data_Sophie_HDV_Sequences/complete_genome_12475_ncbi.fasta --nt=/mnt/nvme0n1p1/blast/nt --nr=/mnt/nvme0n1p1/blast/nr  -t 100 -l 200  -g
     done
  6. (Deprecated) Using docker viral-ngs scripts processing the vrap-results. Be carefual since it doesn’t release good results.

     mv vrap_010 viralngs/
     mkdir tmp/02_assembly data/02_assembly
    
     #refsel_db/refsel.fasta
    
     The longest contig genome-calling is
     010: HQ005371
    
     # Using viral-ngs to improve the assembly, the results are not so good due to the too diverser reference --> not used!
     for sample in 010  048  073  083  093  1021  104  108  129  253  282  301  357  383  405  444  446  450  494  503  69   738  81  879  94   995    HE290  HE554  HE695  HSVM 020  068  079  090  097  103   107  109  141  279  293  341  370  394  442  445  449  478  497  550  691  771  82  88   973  HE284  HE511  HE566  HE748; do
     for sample in 010; do
         bin/assembly.py order_and_orient vrap_${sample}/virus_user_db_contigs.fasta HQ005371.fasta  tmp/02_assembly/${sample}.assembly2-scaffolded.fasta --min_pct_contig_aligned 0.05 --outAlternateContigs tmp/02_assembly/${sample}.assembly2-alternate_sequences.fasta --nGenomeSegments 1 --outReference tmp/02_assembly/${sample}.assembly2-scaffold_ref.fasta --threads 60
         bin/assembly.py gapfill_gap2seq tmp/02_assembly/${sample}.assembly2-scaffolded.fasta data/01_per_sample/${sample}.cleaned.bam tmp/02_assembly/${sample}.assembly2-gapfilled.fasta --memLimitGb 960 --maskErrors --randomSeed 0 --loglevel DEBUG
         bin/assembly.py impute_from_reference tmp/02_assembly/${sample}.assembly2-gapfilled.fasta tmp/02_assembly/${sample}.assembly2-scaffold_ref.fasta tmp/02_assembly/${sample}.assembly3-modify.fasta --newName ${sample} --replaceLength 55 --minLengthFraction 0.05 --minUnambig 0.05 --index
         bin/assembly.py refine_assembly tmp/02_assembly/${sample}.assembly3-modify.fasta data/01_per_sample/${sample}.cleaned.bam tmp/02_assembly/${sample}.assembly4-refined.fasta --outVcf tmp/02_assembly/${sample}.assembly3.vcf.gz --min_coverage 2 --novo_params '-r Random -l 20 -g 40 -x 20 -t 502' --threads 60
         bin/assembly.py refine_assembly tmp/02_assembly/${sample}.assembly4-refined.fasta data/01_per_sample/${sample}.cleaned.bam data/02_assembly/${sample}.fasta --outVcf tmp/02_assembly/${sample}.assembly4.vcf.gz --min_coverage 3 --novo_params '-r Random -l 20 -g 40 -x 20 -t 100' --threads 60
    
         bin/read_utils.py align_and_fix data/01_per_sample/${sample}.cleaned.bam data/02_assembly/${sample}.fasta --outBamAll data/02_align_to_self/${sample}.bam --outBamFiltered data/02_align_to_self/${sample}.mapped.bam --aligner novoalign --aligner_options '-r Random -l 20 -g 40 -x 20 -t 100 -k' --threads 60
     done
  7. Filtering the contig from the vrap results

     for sample in 010  048  073  083  093  1021  104  108  129  253  282  301  357  383  405  444  446  450  494  503  69   738  81  879  94   995    HE290  HE554  HE695  HSVM 020  068  079  090  097  103   107  109  141  279  293  341  370  394  442  445  449  478  497  550  691  771  82  88   973  HE284  HE511  HE566  HE748; do
         python ~/Scripts/extract_virus_user_db_contigs.py vrap_${sample}
     done
    
     for sample in 010  048  073  083  093  1021  104  108  129  253  282  301  357  383  405  444  446  450  494  503  69   738  81  879  94   995    HE290  HE554  HE695  HSVM 020  068  079  090  097  103   107  109  141  279  293  341  370  394  442  445  449  478  497  550  691  771  82  88   973  HE284  HE511  HE566  HE748; do
         python ~/Scripts/extract_longest_contig.py vrap_${sample}/virus_user_db_contigs.fasta ${sample}_raw.fasta
     done
  8. Circularity checking of the contigs: method_1 using ccfind

     #Install ccfind
             #Detects circular genomes via terminal redundancy using BLAST or FASTA Smith–Waterman.
             git clone https://github.com/yosuken/ccfind
             cd ccfind
             # Ensure ssearch36, blastn, prodigal are installed
             ./ccfind <input.fasta> <output_dir>
    
             #https://github.com/wrpearson/fasta3
             wget http://faculty.virginia.edu/wrpearson/fasta/fasta36/fasta-36.3.8h.tar.gz
             tar -xzf fasta-36.3.8h.tar.gz
             cd fasta-36.3.8h
             make -f Makefile.linux64_sse2
             After compiling, add it to your path: export PATH=$PWD:$PATH
    
     # Using ccfind check circularity of the contigs
     #docker pull sangerpathogens/circlator
     #docker run -it --rm -v /home/jhuang/DATA/Data_Sophie_HDV_Sequences:/data sangerpathogens/circlator bash
     #cd /data
     for sample in 010 048  073  083  093  1021  104  108  129  253  282  301  357  383  405  444  446  450  494  503  69   738  81  879  94   995    HE290  HE554  HE695  HSVM 020  068  079  090  097  103   107  109  141  279  293  341  370  394  442  445  449  478  497  550  691  771  82  88   973  HE284  HE511  HE566  HE748; do
         ~/Tools/ccfind/ccfind ${sample}_raw.fasta ccfind_${sample}
         #seqtk mergepe trimmed/${sample}_trimmed_P_1.fastq trimmed/${sample}_trimmed_P_2.fastq > ${sample}_interleaved.fq
         #circlator all --threads 16 vrap_010/virus_user_db_contigs.fasta 010_interleaved.fq 010_circlator_out
     done
    
     #print file size of all files circ.noTR.fasta under "ccfind_*/result/"
     find ccfind_*/result/ -name "circ.noTR.fasta" -exec ls -lh {} \; | awk '{print $5, $9}'
     find ccfind_*/result/ -name "circ.noTR.fasta" -exec stat -c "%s %n" {} \;
  9. Circularity checking of the contigs: method_2 using blastn

     # Manual inspection of the mate-pair read mapping at the start and end confirmed that three of the contigs were circular plasmids.
    
     # -- Circularity_varification: Confirm the contigs are circular using blastn --
     makeblastdb -in virus_user_db_contigs.fasta -dbtype nucl -out contigs_db
     blastn -query virus_user_db_contigs.fasta -db contigs_db -outfmt 6 -evalue 1e-10 > blast_results.txt
    
         CAP_1_length_1851       CAP_1_length_1851       100.000 166     0       0       1686    1851    1       166     4.14e-86        307
         CAP_1_length_1851       CAP_1_length_1851       100.000 166     0       0       1       166     1686    1851    4.14e-86        307
    
     samtools faidx virus_user_db_contigs.fasta CAP_1_length_1851 > CAP_1.fasta
     python3 ~/Scripts/process_circular.py CAP_1.fasta CAP_1_circular.fasta --overlap_len 166
     #Modify the sequence header to 010
  10. Calculate coverage

     bwa index CAP_1_circular.fasta
     bwa mem CAP_1_circular.fasta ../trimmed/010_trimmed_P_1.fastq ../trimmed/010_trimmed_P_2.fastq > aligned_reads.sam
     samtools view -bS aligned_reads.sam | samtools sort -o aligned_reads.sorted.bam
     samtools index aligned_reads.sorted.bam
     samtools depth aligned_reads.sorted.bam > coverage.txt
     awk '{sum+=$3} END {print "Average coverage:", sum/NR}' coverage.txt
     #Average coverage: 7754.49
    
     bwa index 010.fasta
     bwa mem 010.fasta trimmed/010_trimmed_P_1.fastq trimmed/010_trimmed_P_2.fastq > 010_aligned_reads.sam
     samtools view -bS 010_aligned_reads.sam | samtools sort -o 010_aligned_reads.sorted.bam
     samtools index 010_aligned_reads.sorted.bam
    
     bwa index 068.fasta
     bwa mem 068.fasta trimmed/068_trimmed_P_1.fastq trimmed/068_trimmed_P_2.fastq > 068_aligned_reads.sam
     samtools view -bS 068_aligned_reads.sam | samtools sort -o 068_aligned_reads.sorted.bam
     samtools index 068_aligned_reads.sorted.bam
  11. Copy and update the fasta-headers

     #!/bin/bash
    
     # List of IDs to process
     ids=(
     010 020 048 073 079 083 093 104 253 279 282 341 383 394 405 446 449 503 550 69 738 879 88
     HE290 HE511 HE554 HE695 HE748
     )
    
     for id in "${ids[@]}"; do
     src="ccfind_${id}/result/circ.noTR.fasta"
     dst="${id}.fasta"
    
     if [[ -f "$src" ]]; then
         cp "$src" "$dst"
    
         # Update header in the copied fasta
         ruby -e "
         filename = '${id}'
         seq = ''
         File.foreach('${dst}') do |line|
             next if line.start_with?('>')
             seq += line.strip
         end
         File.open('${dst}', 'w') do |f|
             f.puts '>' + filename
             f.puts seq
         end
         "
    
         echo "Processed $id"
     else
         echo "Warning: source file $src not found!"
     fi
     done
  12. Generate coverage plot filtering some sequences, then align qualified sequences as input of RDP4

     #69 88 HE290 HE511 HE554 HE695 HE748 --> 069 088 290 511 554 695 748
     mv 69_my.fasta 069_my.fasta
     mv 88_my.fasta 088_my.fasta
     mv HE290_my.fasta 290_my.fasta
     mv HE511_my.fasta 511_my.fasta
     mv HE554_my.fasta 554_my.fasta
     mv HE695_my.fasta 695_my.fasta
     mv HE748_my.fasta 748_my.fasta
     for sample in  010 020 048 073 079 083 093 104 253 279 282 341 383 394 405 446 449 503 550 738 879    069 088 290 511 554 695 748; do
     mv ~/Downloads/${sample}.fasta .
     done
    
     cd trimmed_
     ln -s 69_trimmed_P_1.fastq 069_trimmed_P_1.fastq
     ln -s 69_trimmed_P_2.fastq 069_trimmed_P_2.fastq
     ln -s 88_trimmed_P_1.fastq 088_trimmed_P_1.fastq
     ln -s 88_trimmed_P_2.fastq 088_trimmed_P_2.fastq
     ln -s HE290_trimmed_P_1.fastq 290_trimmed_P_1.fastq
     ln -s HE290_trimmed_P_2.fastq 290_trimmed_P_2.fastq
     ln -s HE511_trimmed_P_1.fastq 511_trimmed_P_1.fastq
     ln -s HE511_trimmed_P_2.fastq 511_trimmed_P_2.fastq
     ln -s HE554_trimmed_P_1.fastq 554_trimmed_P_1.fastq
     ln -s HE554_trimmed_P_2.fastq 554_trimmed_P_2.fastq
     ln -s HE695_trimmed_P_1.fastq 695_trimmed_P_1.fastq
     ln -s HE695_trimmed_P_2.fastq 695_trimmed_P_2.fastq
     ln -s HE748_trimmed_P_1.fastq 748_trimmed_P_1.fastq
     ln -s HE748_trimmed_P_2.fastq 748_trimmed_P_2.fastq
    
     ln -s 81_trimmed_P_1.fastq 081_trimmed_P_1.fastq
     ln -s 81_trimmed_P_2.fastq 081_trimmed_P_2.fastq
     ln -s 82_trimmed_P_1.fastq 082_trimmed_P_1.fastq
     ln -s 82_trimmed_P_2.fastq 082_trimmed_P_2.fastq
     ln -s 94_trimmed_P_1.fastq 094_trimmed_P_1.fastq
     ln -s 94_trimmed_P_2.fastq 094_trimmed_P_2.fastq
     ln -s HE284_trimmed_P_1.fastq 284_trimmed_P_1.fastq
     ln -s HE284_trimmed_P_2.fastq 284_trimmed_P_2.fastq
     ln -s HE566_trimmed_P_1.fastq 566_trimmed_P_1.fastq
     ln -s HE566_trimmed_P_2.fastq 566_trimmed_P_2.fastq
    
     #update_file_header.sh
     update_fasta.py  # generate the HDV_genomes_
     conda activate plot-numpy1
     for sample in  010 020 048 073 079 083 093 104 253 279 282 341 383 394 405 446 449 503 550 738 879  069 088 290 511 554 695 748    068 081 082 090 094 097 103 107 108 109 129 141 284 357 370 442 444 445 497 566 691 771 973 995 1021    293 450 478 494; do
             bwa index HDV_genomes_/${sample}.fasta
             bwa mem HDV_genomes_/${sample}.fasta trimmed_/${sample}_trimmed_P_1.fastq trimmed_/${sample}_trimmed_P_2.fastq > ${sample}_aligned_reads.sam
             samtools view -bS ${sample}_aligned_reads.sam | samtools sort -o ${sample}_aligned_reads.sorted.bam
             samtools index ${sample}_aligned_reads.sorted.bam
             samtools depth -m 0 -a ${sample}_aligned_reads.sorted.bam > ${sample}_coverage.txt
             python ~/Scripts/plot_coverage.py ${sample}_coverage.txt ${sample}_cov.png
     done
    
     for sample in  010 020 048 073 079 083 093 104 253 279 282 341 383 394 405 446 449 503 550 738 879  069 088 290 511 554 695 748; do
             bwa index HDV_genomes_/${sample}_my.fasta
             bwa mem HDV_genomes_/${sample}_my.fasta trimmed_/${sample}_trimmed_P_1.fastq trimmed_/${sample}_trimmed_P_2.fastq > ${sample}_my_aligned_reads.sam
             samtools view -bS ${sample}_my_aligned_reads.sam | samtools sort -o ${sample}_my_aligned_reads.sorted.bam
             samtools index ${sample}_my_aligned_reads.sorted.bam
             samtools depth -m 0 -a ${sample}_my_aligned_reads.sorted.bam > ${sample}_my_coverage.txt
             python ~/Scripts/plot_coverage.py ${sample}_my_coverage.txt ${sample}_my_cov.png
     done
    
     #-- Note that The following two assembly were not sent to me due to the bad coverage --
     #301_coverage.txt
     #HSVM_coverage.txt
    
     for file in *_cov.png; do
         convert "$file" "${file%.png}.pdf"
     done
     pdftk 010_cov.pdf 010_my_cov.pdf 020_cov.pdf 020_my_cov.pdf 048_cov.pdf 048_my_cov.pdf 073_cov.pdf 073_my_cov.pdf 079_cov.pdf 079_my_cov.pdf 083_cov.pdf 083_my_cov.pdf 093_cov.pdf 093_my_cov.pdf 104_cov.pdf 104_my_cov.pdf 253_cov.pdf 253_my_cov.pdf 279_cov.pdf 279_my_cov.pdf 282_cov.pdf 282_my_cov.pdf 341_cov.pdf 341_my_cov.pdf 383_cov.pdf 383_my_cov.pdf 394_cov.pdf 394_my_cov.pdf 405_cov.pdf 405_my_cov.pdf 446_cov.pdf 446_my_cov.pdf 449_cov.pdf 449_my_cov.pdf 503_cov.pdf 503_my_cov.pdf 550_cov.pdf 550_my_cov.pdf 738_cov.pdf 738_my_cov.pdf 879_cov.pdf 879_my_cov.pdf    069_cov.pdf 069_my_cov.pdf 088_cov.pdf 088_my_cov.pdf 290_cov.pdf 290_my_cov.pdf 511_cov.pdf 511_my_cov.pdf 554_cov.pdf 554_my_cov.pdf 695_cov.pdf 695_my_cov.pdf 748_cov.pdf 748_my_cov.pdf      068_cov.pdf 081_cov.pdf 082_cov.pdf 090_cov.pdf 094_cov.pdf 097_cov.pdf 103_cov.pdf 107_cov.pdf 108_cov.pdf 109_cov.pdf 129_cov.pdf 141_cov.pdf 284_cov.pdf 357_cov.pdf 370_cov.pdf 442_cov.pdf 444_cov.pdf 445_cov.pdf 497_cov.pdf 566_cov.pdf 691_cov.pdf 771_cov.pdf 973_cov.pdf 995_cov.pdf 1021_cov.pdf    293_cov.pdf 450_cov.pdf 478_cov.pdf 494_cov.pdf cat output coverges_all.pdf
    
     pdftk 010_cov.pdf 020_cov.pdf 048_cov.pdf 073_cov.pdf 079_cov.pdf 083_cov.pdf 093_cov.pdf 104_cov.pdf 253_cov.pdf 279_cov.pdf 282_cov.pdf 341_cov.pdf 383_cov.pdf 394_cov.pdf 405_cov.pdf 446_cov.pdf 449_cov.pdf 503_cov.pdf 550_cov.pdf 738_cov.pdf 879_cov.pdf    069_cov.pdf 088_cov.pdf 290_cov.pdf 511_cov.pdf 554_cov.pdf 695_cov.pdf 748_cov.pdf      068_cov.pdf 081_cov.pdf 082_cov.pdf 090_cov.pdf 094_cov.pdf 097_cov.pdf 103_cov.pdf 107_cov.pdf 108_cov.pdf 109_cov.pdf 129_cov.pdf 141_cov.pdf 284_cov.pdf 357_cov.pdf 370_cov.pdf 442_cov.pdf 444_cov.pdf 445_cov.pdf 497_cov.pdf 566_cov.pdf 691_cov.pdf 771_cov.pdf 973_cov.pdf 995_cov.pdf 1021_cov.pdf    293_cov.pdf 450_cov.pdf 478_cov.pdf 494_cov.pdf cat output coverages.pdf
    
     #Not good quality: 082,094,129,284,566,691,293,450,478,494
    
     cat 010.fasta 020.fasta 048.fasta 073.fasta 079.fasta 083.fasta 093.fasta 104.fasta 253.fasta 279.fasta 282.fasta 341.fasta 383.fasta 394.fasta 405.fasta 446.fasta 449.fasta 503.fasta 550.fasta 738.fasta 879.fasta    069.fasta 088.fasta 290.fasta 511.fasta 554.fasta 695.fasta 748.fasta      068.fasta 081.fasta   090.fasta   097.fasta 103.fasta 107.fasta 108.fasta 109.fasta   141.fasta   357.fasta 370.fasta 442.fasta 444.fasta 445.fasta 497.fasta      771.fasta 973.fasta 995.fasta 1021.fasta               > all.fasta
     awk '/^>/ {print $1} !/^>/ {print}' all.fasta > all_.fasta
    
     mafft --adjustdirection --clustalout all_.fasta > all.aln
     mafft --auto all_.fasta > aligned.fasta
     #iqtree -s aligned.fasta -m GTR+G -bb 1000 -nt AUTO
     FastTree -gtr -gamma aligned.fasta > tree.nwk
  13. (NOT_NEEDED) rotate (fixstart) the genomes (not needed, since the genome provided has the same starting point)

     #python rotate_circular_genome.py ${sample}.fasta ${sample}_rotated.fasta ATGAGC
  14. (TODO) Draw plotTreeHeatmap

     #http://xgenes.com/article/article-content/383/presence-absence-table-and-graphics-for-selected-genes-in-data-patricia-sepi-7samples/#
  15. Report

     Several assemblies (082, 094, 129, 284, 566, 691, 293, 450, 478, 494) show poor quality. Please refer to the attached coverage.pdf for an overview of read mapping. I excluded these from the recombination analysis, which was carried out using RDP4, applying nine detection methods (RDP, GENECONV, Bootscan, MaxChi, Chimaera, SiScan, PhylPro, LARD, and 3Seq).
     Please note that the analysis assumes accurate assemblies—misassemblies can lead to false positives.
     The results, summarized in the attached Excel files, identify three sequences (503, 109, 394) as potential recombinants. However, I recommend interpreting these findings with caution.
         * Events 2 and 3: Breakpoints occur near the genome ends (1602–12 and 1451–134), where alignment artifacts are common.
         * Event 1: The recombinant region is less than 100 nucleotides, which may be below biological relevance.
         * Method flags: RDP flags Events 1 and 2 (~) as possibly caused by other evolutionary processes. All events are flagged (^) to indicate that the recombinant sequence may have been misidentified (one of the identified parents might be the recombinant).

Comprehensive smallRNA-7 profiling using exceRpt pipeline with full reference databases (v3)

  1. Input data

     # name                         condition
     # ----------------------------------------------
     # 0403_WaGa_wt                 parental_cells_1.fastq.gz
     # #0505_WaGa_wt_EV_RNA         untreated_1.fastq.gz
     # #0505_WaGa_sT_DMSO_EV_RNA    DMSO_control_1.fastq.gz
     # #0505_WaGa_sT_Dox_EV_RNA     sT_knockdown_1.fastq.gz
     # #0505_WaGa_scr_DMSO_EV_RNA   scr_DMSO_control_1.fastq.gz
     # #0505_WaGa_scr_Dox_EV_RNA    scr_control_1.fastq.gz
     # #1905_WaGa_wt_EV_RNA         untreated_2.fastq.gz
     # #1905_WaGa_sT_DMSO_EV_RNA    DMSO_control_2.fastq.gz
     # #1905_WaGa_sT_Dox_EV_RNA     sT_knockdown_2.fastq.gz
     # #1905_WaGa_scr_DMSO_EV_RNA   scr_DMSO_control_2.fastq.gz
     # #1905_WaGa_scr_Dox_EV_RNA    scr_control_2.fastq.gz
     #
     # WaGa_wt_cells_1              parental_cells_2.fastq.gz
     # WaGa_wt_cells_2              parental_cells_3.fastq.gz
     # #2001_WaGa_sT_DMSO           DMSO_control_3.fastq.gz
     # #2001_WaGa_sT_Dox            sT_knockdown_3.fastq.gz
     # #2001_WaGa_scr_DMSO          scr_DMSO_control_3.fastq.gz
     # #2001_WaGa_scr_Dox           scr_control_3.fastq.gz
     #
     # WaGa_wt_cells_1              parental_cells_2_R2.fastq.gz
     # WaGa_wt_cells_2              parental_cells_3_R2.fastq.gz
     # #2001_WaGa_sT_DMSO           DMSO_control_3_R2.fastq.gz
     # #2001_WaGa_sT_Dox            sT_knockdown_3_R2.fastq.gz
     # #2001_WaGa_scr_DMSO          scr_DMSO_control_3_R2.fastq.gz
     # #2001_WaGa_scr_Dox           scr_control_3_R2.fastq.gz
    
     mkdir ~/DATA/Data_Ute/Data_Ute_smallRNA_7/raw_data
     cd raw_data
     ln -s ~/DATA/Data_Ute/Data_Ute_smallRNA_3/220617_NB501882_0371_AH7572BGXM/nf774/0403_WaGa_wt_S20_R1_001.fastq.gz parental_cells_1.fastq.gz
     ln -s ~/DATA/Data_Ute/Data_Ute_smallRNA_7/231016_NB501882_0435_AHG7HMBGXV/nf930/01_0505_WaGa_wt_EV_RNA_S1_R1_001.fastq.gz untreated_1.fastq.gz
     ln -s ~/DATA/Data_Ute/Data_Ute_smallRNA_7/231016_NB501882_0435_AHG7HMBGXV/nf931/02_0505_WaGa_sT_DMSO_EV_RNA_S2_R1_001.fastq.gz DMSO_control_1.fastq.gz
     ln -s ~/DATA/Data_Ute/Data_Ute_smallRNA_7/231016_NB501882_0435_AHG7HMBGXV/nf932/03_0505_WaGa_sT_Dox_EV_RNA_S3_R1_001.fastq.gz sT_knockdown_1.fastq.gz
     ln -s ~/DATA/Data_Ute/Data_Ute_smallRNA_7/231016_NB501882_0435_AHG7HMBGXV/nf933/04_0505_WaGa_scr_DMSO_EV_RNA_S4_R1_001.fastq.gz scr_DMSO_control_1.fastq.gz
     ln -s ~/DATA/Data_Ute/Data_Ute_smallRNA_7/231016_NB501882_0435_AHG7HMBGXV/nf934/05_0505_WaGa_scr_Dox_EV_RNA_S5_R1_001.fastq.gz scr_control_1.fastq.gz
     ln -s ~/DATA/Data_Ute/Data_Ute_smallRNA_7/231016_NB501882_0435_AHG7HMBGXV/nf935/06_1905_WaGa_wt_EV_RNA_S6_R1_001.fastq.gz untreated_2.fastq.gz
     ln -s ~/DATA/Data_Ute/Data_Ute_smallRNA_7/231016_NB501882_0435_AHG7HMBGXV/nf936/07_1905_WaGa_sT_DMSO_EV_RNA_S7_R1_001.fastq.gz DMSO_control_2.fastq.gz
     ln -s ~/DATA/Data_Ute/Data_Ute_smallRNA_7/231016_NB501882_0435_AHG7HMBGXV/nf937/08_1905_WaGa_sT_Dox_EV_RNA_S8_R1_001.fastq.gz sT_knockdown_2.fastq.gz
     ln -s ~/DATA/Data_Ute/Data_Ute_smallRNA_7/231016_NB501882_0435_AHG7HMBGXV/nf938/09_1905_WaGa_scr_DMSO_EV_RNA_S9_R1_001.fastq.gz scr_DMSO_control_2.fastq.gz
     ln -s ~/DATA/Data_Ute/Data_Ute_smallRNA_7/231016_NB501882_0435_AHG7HMBGXV/nf939/10_1905_WaGa_scr_Dox_EV_RNA_S10_R1_001.fastq.gz scr_control_2.fastq.gz
    
     ln -s ~/DATA/Data_Ute/Data_Ute_smallRNA_7/250411_VH00358_135_AAGKGLHM5/nf961/WaGaWTcells_1_S1_R1_001.fastq.gz parental_cells_2.fastq.gz
     ln -s ~/DATA/Data_Ute/Data_Ute_smallRNA_7/250411_VH00358_135_AAGKGLHM5/nf962/WaGaWTcells_2_S2_R1_001.fastq.gz parental_cells_3.fastq.gz
     ln -s ~/DATA/Data_Ute/Data_Ute_smallRNA_7/250411_VH00358_135_AAGKGLHM5/nf971/2001_WaGa_sT_DMSO_S3_R1_001.fastq.gz DMSO_control_3.fastq.gz
     ln -s ~/DATA/Data_Ute/Data_Ute_smallRNA_7/250411_VH00358_135_AAGKGLHM5/nf972/2001_WaGa_sT_Dox_S4_R1_001.fastq.gz sT_knockdown_3.fastq.gz
     ln -s ~/DATA/Data_Ute/Data_Ute_smallRNA_7/250411_VH00358_135_AAGKGLHM5/nf973/2001_WaGa_scr_DMSO_S5_R1_001.fastq.gz scr_DMSO_control_3.fastq.gz
     ln -s ~/DATA/Data_Ute/Data_Ute_smallRNA_7/250411_VH00358_135_AAGKGLHM5/nf974/2001_WaGa_scr_Dox_S6_R1_001.fastq.gz scr_control_3.fastq.gz
    
     ln -s ~/DATA/Data_Ute/Data_Ute_smallRNA_7/250411_VH00358_135_AAGKGLHM5/nf961/WaGaWTcells_1_S1_R2_001.fastq.gz parental_cells_2_R2.fastq.gz
     ln -s ~/DATA/Data_Ute/Data_Ute_smallRNA_7/250411_VH00358_135_AAGKGLHM5/nf962/WaGaWTcells_2_S2_R2_001.fastq.gz parental_cells_3_R2.fastq.gz
     ln -s ~/DATA/Data_Ute/Data_Ute_smallRNA_7/250411_VH00358_135_AAGKGLHM5/nf971/2001_WaGa_sT_DMSO_S3_R2_001.fastq.gz DMSO_control_3_R2.fastq.gz
     ln -s ~/DATA/Data_Ute/Data_Ute_smallRNA_7/250411_VH00358_135_AAGKGLHM5/nf972/2001_WaGa_sT_Dox_S4_R2_001.fastq.gz sT_knockdown_3_R2.fastq.gz
     ln -s ~/DATA/Data_Ute/Data_Ute_smallRNA_7/250411_VH00358_135_AAGKGLHM5/nf973/2001_WaGa_scr_DMSO_S5_R2_001.fastq.gz scr_DMSO_control_3_R2.fastq.gz
     ln -s ~/DATA/Data_Ute/Data_Ute_smallRNA_7/250411_VH00358_135_AAGKGLHM5/nf974/2001_WaGa_scr_Dox_S6_R2_001.fastq.gz scr_control_3_R2.fastq.gz
    
     #awk '{print $2}' temp3
  2. Adapter trimming

     #some common adapter sequences from different kits for reference:
     #    - TruSeq Small RNA (Illumina): TGGAATTCTCGGGTGCCAAGG
     #    - Small RNA Kits V1 (Illumina): TCGTATGCCGTCTTCTGCTTGT
     #    - Small RNA Kits V1.5 (Illumina): ATCTCGTATGCCGTCTTCTGCTTG
     #    - NEXTflex Small RNA Sequencing Kit v3 for Illumina Platforms (Bioo Scientific): TGGAATTCTCGGGTGCCAAGG
     #    - LEXOGEN Small RNA-Seq Library Prep Kit (Illumina): TGGAATTCTCGGGTGCCAAGGAACTCCAGTCAC *
     mkdir trimmed; cd trimmed
     for sample in parental_cells_1 untreated_1 DMSO_control_1 sT_knockdown_1 scr_DMSO_control_1 scr_control_1 untreated_2 DMSO_control_2 sT_knockdown_2 scr_DMSO_control_2 scr_control_2 parental_cells_2 parental_cells_3 DMSO_control_3 sT_knockdown_3 scr_DMSO_control_3 scr_control_3 parental_cells_2_R2 parental_cells_3_R2 DMSO_control_3_R2 sT_knockdown_3_R2 scr_DMSO_control_3_R2 scr_control_3_R2; do
       echo "------------------------------------ cutadapting the ${sample} -----------------------------------" >> LOG
       cutadapt -a TGGAATTCTCGGGTGCCAAGGAACTCCAGTCAC -q 20 --minimum-length 5 --trim-n -o ${sample}.fastq.gz ../raw_data/${sample}.fastq.gz >> LOG
     done
    
     # In LOG file to look the differences of the R1 and R2 reads based on the statistics of trimming.
    
     #Reads with adapters:                10,114,799 (79.9%)
     #Reads with adapters:                   240,366 (1.9%)
     #Reads with adapters:                   233,380 (1.6%)
     #Reads with adapters:                   230,664 (1.3%)
     #Reads with adapters:                   207,717 (1.3%)
     #Reads with adapters:                   186,080 (1.2%)
     #Reads with adapters:                   577,429 (1.5%)
     #Reads with adapters:                   268,867 (1.7%)
     #Reads with adapters:                   325,300 (1.4%)
     #Reads with adapters:                   314,540 (1.5%)
     #Reads with adapters:                   264,349 (1.5%)
    
     #Reads with adapters:                   299,677 (0.7%)
     #Reads with adapters:                   108,801 (0.6%)
     #Reads with adapters:                     5,095 (0.0%)
     #Reads with adapters:                     6,989 (0.0%)
     #Reads with adapters:                     3,868 (0.0%)
     #Reads with adapters:                     2,173 (0.0%)
    
     #Reads with adapters:                   615,334 (1.4%)
     #Reads with adapters:                   258,388 (1.5%)
     #Reads with adapters:                   294,325 (1.4%)
     #Reads with adapters:                   336,932 (1.8%)
     #Reads with adapters:                   239,288 (2.0%)
     #Reads with adapters:                   117,544 (1.5%)
    
     #Alternatively, we can also cut adapter in the exceRpt built-in functions since 'grep "TGGAATTCTCGGGTGCCAAGGAACTCCAGTCAC" /mnt/nvme0n1p1/MyexceRptDatabase/adapters/adapters.fa | wc -l' results in 48 records. However, explicitly cut adapter before is more ensured.
    
     #TODO: check if the R1 and R2 has the similar data distribution? Then decide if only R1 or both used for the downstream analysis?
     cat parental_cells_2.fastq.gz parental_cells_2_R2.fastq.gz > parental_cells_2_merged.fastq.gz
     cat parental_cells_3.fastq.gz parental_cells_3_R2.fastq.gz > parental_cells_3_merged.fastq.gz
     cat DMSO_control_3.fastq.gz DMSO_control_3_R2.fastq.gz > DMSO_control_3_merged.fastq.gz
     cat sT_knockdown_3.fastq.gz sT_knockdown_3_R2.fastq.gz > sT_knockdown_3_merged.fastq.gz
     cat scr_DMSO_control_3.fastq.gz scr_DMSO_control_3_R2.fastq.gz > scr_DMSO_control_3_merged.fastq.gz
     cat scr_control_3.fastq.gz scr_control_3_R2.fastq.gz > scr_control_3_merged.fastq.gz
    
     #Scenario   Option to use
     #-----------------------------
     #Trimming Read 1 only   -a
     #Trimming Read 2 only   -a
     #Trimming paired-end together   -a and -A
     #cutadapt -a TGGAATTCTCGGGTGCCAAGGAACTCCAGTCAC -q 20 --minimum-length 5 --trim-n -o ${sample}_R2_trimmed.fastq.gz ../raw_data/${sample}_R2.fastq.gz
     cutadapt \
     -a TGGAATTCTCGGGTGCCAAGGAACTCCAGTCAC \
     -A TGGAATTCTCGGGTGCCAAGGAACTCCAGTCAC \
     -q 20 --minimum-length 5 --trim-n \
     -o ${sample}_R1_trimmed.fastq.gz -p ${sample}_R2_trimmed.fastq.gz \
     ../raw_data/${sample}_R1.fastq.gz ../raw_data/${sample}_R2.fastq.gz
    
     # -- check if it is necessary to remove adapter from 5'-end --
     #(Option_1) cutadapt -g TGGAATTCTCGGGTGCCAAGGAACTCCAGTCAC -o /dev/null --report=minimal 0505_WaGa_wt_cutadapted.fastq.gz --> The trimming statistics in the output will show how often 5'-end adapters were removed.
     #(Option 2) zcat your_sample.fastq.gz | grep 'TGGAATTCTCGGGTGCCAAGGAACTCCAGTCAC' | head -n 20
     #(Option 3) fastqc your_sample.fastq.gz
     #Open the generated HTML report and check:
     #    The "Overrepresented sequences" section for adapter sequences.
     #    The "Per base sequence content" plot to see if there are unexpected sequences at the start of reads.
     #(If check results shows both ends contain adapter) cutadapt -g TGGAATTCTCGGGTGCCAAGGAACTCCAGTCAC -a TGGAATTCTCGGGTGCCAAGGAACTCCAGTCAC -q 20 --minimum-length 10 -o ${sample}_trimmed.fastq.gz ${sample}.fastq.gz >> LOG2
     #    -g → Trims 5'-end adapters
     #    -a → Trims 3'-end adapters; -a TGGAATTCTCGGGTGCCAAGGAACTCCAGTCAC → Specifies the adapter sequence to be removed from the 3' end of the reads. The sequence provided is common in RNA-seq libraries (e.g., Illumina small RNA sequencing).
     #    -q 20 → Performs quality trimming at both read ends, removing bases with a Phred quality score below 20.
  3. Install exceRpt (https://github.gersteinlab.org/exceRpt/)

     docker pull rkitchen/excerpt
     mkdir MyexceRptDatabase
     cd /mnt/nvme0n1p1/MyexceRptDatabase
     wget http://org.gersteinlab.excerpt.s3-website-us-east-1.amazonaws.com/exceRptDB_v4_hg38_lowmem.tgz
     tar -xvf exceRptDB_v4_hg38_lowmem.tgz
     #http://org.gersteinlab.excerpt.s3-website-us-east-1.amazonaws.com/exceRptDB_v4_hg19_lowmem.tgz
     #http://org.gersteinlab.excerpt.s3-website-us-east-1.amazonaws.com/exceRptDB_v4_hg38_lowmem.tgz
     #http://org.gersteinlab.excerpt.s3-website-us-east-1.amazonaws.com/exceRptDB_v4_mm10_lowmem.tgz
     wget http://org.gersteinlab.excerpt.s3-website-us-east-1.amazonaws.com/exceRptDB_v4_EXOmiRNArRNA.tgz
     tar -xvf exceRptDB_v4_EXOmiRNArRNA.tgz
     wget http://org.gersteinlab.excerpt.s3-website-us-east-1.amazonaws.com/exceRptDB_v4_EXOGenomes.tgz
     tar -xvf exceRptDB_v4_EXOGenomes.tgz
  4. Run exceRpt

     #[---- REAL_RUNNING_COMPLETE_DB ---->]
     #NOTE that if not renamed in the input files, then have to RENAME all files recursively by removing "_cutadapted.fastq" in all names in _CORE_RESULTS_v4.6.3.tgz (first unzip, removing, then zip, mv to ../results_g).
     cd trimmed
     #for file in *_cutadapted.fastq.gz; do
     #    echo "mv \"$file\" \"${file/_cutadapted.fastq/}\""
     #done
     for file in *.fastq.gz; do
         echo "mv \"$file\" \"${file/.fastq/}\""
     done
    
     mkdir results_exo6
     for sample in parental_cells_2 parental_cells_3 DMSO_control_3 sT_knockdown_3 scr_DMSO_control_3 scr_control_3    parental_cells_2_R2 parental_cells_3_R2 DMSO_control_3_R2 sT_knockdown_3_R2 scr_DMSO_control_3_R2 scr_control_3_R2    parental_cells_2_merged parental_cells_3_merged DMSO_control_3_merged sT_knockdown_3_merged scr_DMSO_control_3_merged scr_control_3_merged    parental_cells_1 untreated_1 DMSO_control_1 sT_knockdown_1 scr_DMSO_control_1 scr_control_1 untreated_2 DMSO_control_2 sT_knockdown_2 scr_DMSO_control_2 scr_control_2; do
         docker run -v ~/DATA/Data_Ute/Data_Ute_smallRNA_7/trimmed:/exceRptInput \
                    -v ~/DATA/Data_Ute/Data_Ute_smallRNA_7/results_exo6:/exceRptOutput \
                   -v /mnt/nvme0n1p1/MyexceRptDatabase:/exceRpt_DB \
                   -t rkitchen/excerpt \
                   INPUT_FILE_PATH=/exceRptInput/${sample}.gz MAIN_ORGANISM_GENOME_ID=hg38 N_THREADS=50 JAVA_RAM='200G' MAP_EXOGENOUS=on
     done
    
     #TODO: DEBUG running exceRpt within docker container
     #docker run -it --rm \
     #  -v ~/DATA/Data_Ute/Data_Ute_smallRNA_7/trimmed:/exceRptInput \
     #  -v ~/DATA/Data_Ute/Data_Ute_smallRNA_7/results_exo6:/exceRptOutput \
     #  -v /mnt/nvme0n1p1/MyexceRptDatabase:/exceRpt_DB \
     #  --entrypoint bash \
     #  rkitchen/excerpt
     #bash /exceRpt_bin/exceRpt_smallRNA   INPUT_FILE_PATH=/exceRptInput/sample1.fastq.gz   MAIN_ORGANISM_GENOME_ID=hg38   N_THREADS=8   JAVA_RAM='16G'   MAP_EXOGENOUS=on
    
     #DEBUG the excerpt env
     docker inspect rkitchen/excerpt:latest
     # Without /bin/bash → May run and exit immediately
     #docker run -it rkitchen/excerpt
     # With /bin/bash → Stays open for interaction
     docker run -it --entrypoint /bin/bash rkitchen/excerpt
    
     #TODO: In the read2 exists the following adapter2, to test if the adapter can be identified and removed with the pipeline!
  5. Processing exceRpt output from multiple samples

     mkdir summaries_exo6
     cd ~/DATA/Data_Ute/Data_Ute_smallRNA_7/exceRpt-master
     (r_env) jhuang@WS-2290C:~/DATA/Data_Ute/Data_Ute_smallRNA_7/exceRpt-master$ R
     #WARNING: need to reload the R-script after each change of the script.
     source("mergePipelineRuns_functions.R")
    
     getwd()
     #[1] "/media/jhuang/Elements/Data_Ute/Data_Ute_smallRNA_7/exceRpt-master"
     processSamplesInDir("../results_exo6/", "../summaries_exo6")
    
     #~/Tools/csv2xls-0.4/csv_to_xls.py exceRpt_miRNA_ReadsPerMillion.txt exceRpt_tRNA_ReadsPerMillion.txt exceRpt_piRNA_ReadsPerMillion.txt -d$'\t' -o exceRpt_results_detailed.xls
  6. mv results_exo6 results_exo7; mkdir results_exo6; sudo mv _R2 ../results_exo6; sudo mv _merged ../results_exo6

     mkdir summaries_exo7
     processSamplesInDir("../results_exo7/", "../summaries_exo7")
  7. Re-draw the heatmap plots

     # -- R-code --
    
         # Load required library
         library(dplyr)
    
         # Original vectors
         samples_orig <- c("untreated_2", "parental_cells_1", "parental_cells_2", "parental_cells_3", "scr_control_3",
                         "DMSO_control_3", "scr_DMSO_control_3", "sT_knockdown_3", "untreated_1", "DMSO_control_1",
                         "scr_control_1", "scr_DMSO_control_1", "DMSO_control_2", "sT_knockdown_2", "scr_control_2",
                         "scr_DMSO_control_2", "sT_knockdown_1")
    
         categories_orig <- c("reads_used_for_alignment", "genome", "miRNA_sense", "miRNA_antisense",
                             "miRNAprecursor_sense", "miRNAprecursor_antisense", "tRNA_sense", "tRNA_antisense",
                             "piRNA_sense", "piRNA_antisense", "gencode_sense", "gencode_antisense",
                             "circularRNA_sense", "circularRNA_antisense", "not_mapped_to_genome_or_libs",
                             "repetitiveElements", "endogenous_gapped", "exogenous_miRNA", "exogenous_rRNA",
                             "exogenous_genomes")
    
         # Provided samples and categories (desired order and format)
         samples <- c("parental_cells_1","parental_cells_2","parental_cells_3",
                     "untreated_1","untreated_2",
                     "scr_control_1","scr_control_2","scr_control_3",
                     "DMSO_control_1","DMSO_control_2","DMSO_control_3",
                     "scr_DMSO_control_1","scr_DMSO_control_2","scr_DMSO_control_3",
                     "sT_knockdown_1","sT_knockdown_2","sT_knockdown_3")
    
         categories <- c("reads_used_for_alignment", "genome", "miRNA", "miRNAprecursor", "tRNA", "piRNA",
                         "gencode", "circularRNA", "not_mapped_to_genome_or_libs", "repetitiveElements",
                         "endogenous_gapped", "exogenous_miRNA", "exogenous_rRNA", "exogenous_genomes")
    
         # Original data matrix
         data_orig <- matrix(c(
                         100.0, 100.0, 100.0, 100.0, 100.0, 100.0, 100.0, 100.0, 100.0, 100.0, 100.0, 100.0, 100.0, 100.0, 100.0, 100.0, 100.0,
                         21.3, 97.4, 99.0, 99.0, 89.2, 91.9, 90.6, 91.0, 44.9, 65.6, 69.2, 73.3, 71.9, 81.4, 78.3, 79.3, 78.5,
                         3.5, 3.7, 88.7, 86.6, 70.9, 81.1, 77.9, 79.3, 7.1, 12.9, 7.0, 7.5, 14.6, 16.2, 14.7, 15.3, 15.8,
                         0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0,
                         0.0, 0.2, 0.1, 0.1, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.1, 0.1, 0.1, 0.0,
                         0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0,
                         8.4, 0.5, 2.9, 3.0, 1.7, 1.3, 1.2, 1.4, 25.3, 41.2, 49.0, 52.1, 33.9, 45.3, 41.4, 47.3, 48.8,
                         0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0,
                         0.1, 0.0, 0.4, 0.5, 0.9, 1.6, 1.1, 1.4, 0.4, 0.4, 0.5, 0.4, 0.6, 0.3, 0.4, 0.4, 0.5,
                         0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0,
                         6.7, 86.0, 5.3, 6.9, 7.9, 4.6, 5.5, 4.9, 8.6, 8.5, 10.8, 11.2, 18.3, 15.7, 16.6, 12.9, 10.8,
                         0.7, 0.1, 0.2, 0.2, 0.5, 0.2, 0.3, 0.3, 0.3, 0.2, 0.2, 0.2, 0.3, 0.2, 0.3, 0.2, 0.2,
                         0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0,
                         0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0,
                         78.7, 2.6, 1.0, 1.0, 10.8, 8.1, 9.4, 9.0, 55.1, 34.4, 30.8, 26.7, 28.1, 18.6, 21.7, 20.7, 21.5,
                         0.1, 0.0, 0.0, 0.0, 0.2, 0.1, 0.1, 0.2, 0.3, 0.3, 0.2, 0.2, 0.2, 0.1, 0.1, 0.1, 0.1,
                         0.3, 0.0, 0.1, 0.1, 0.7, 0.5, 0.6, 0.5, 1.3, 0.9, 0.8, 0.7, 0.6, 0.3, 0.3, 0.3, 0.5,
                         0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0,
                         0.2, 0.0, 0.0, 0.0, 0.3, 0.2, 0.2, 0.2, 1.5, 0.8, 0.8, 0.8, 0.7, 0.3, 0.3, 0.2, 0.5,
                         3.5, 0.0, 0.0, 0.0, 2.7, 1.6, 3.2, 2.2, 17.7, 9.3, 9.4, 6.9, 5.6, 2.4, 3.4, 3.3, 4.4), nrow = 20, byrow = TRUE)
    
         rownames(data_orig) <- categories_orig
         colnames(data_orig) <- samples_orig
    
         # Collapse sense/antisense
         merge_rows <- function(prefix) {
             row1 <- paste0(prefix, "_sense")
             row2 <- paste0(prefix, "_antisense")
             if (row1 %in% rownames(data_orig) && row2 %in% rownames(data_orig)) {
                 return(data_orig[row1, ] + data_orig[row2, ])
             } else if (row1 %in% rownames(data_orig)) {
                 return(data_orig[row1, ])
             } else {
                 return(rep(0, ncol(data_orig)))
             }
         }
    
         # Construct merged data
         data_merged <- rbind(
         reads_used_for_alignment = data_orig["reads_used_for_alignment", ],
         genome = data_orig["genome", ],
         miRNA = merge_rows("miRNA"),
         miRNAprecursor = merge_rows("miRNAprecursor"),
         tRNA = merge_rows("tRNA"),
         piRNA = merge_rows("piRNA"),
         gencode = merge_rows("gencode"),
         circularRNA = merge_rows("circularRNA"),
         not_mapped_to_genome_or_libs = data_orig["not_mapped_to_genome_or_libs", ],
         repetitiveElements = data_orig["repetitiveElements", ],
         endogenous_gapped = data_orig["endogenous_gapped", ],
         exogenous_miRNA = data_orig["exogenous_miRNA", ],
         exogenous_rRNA = data_orig["exogenous_rRNA", ],
         exogenous_genomes = data_orig["exogenous_genomes", ]
         )
    
         # Reorder columns to match desired sample order
         data_final <- data_merged[, samples[samples %in% colnames(data_merged)]]
    
         #genome --> human_genome, not_mapped_to_genome_or_libs --> not_mapped_to_human_genome
         rownames(data_final)[rownames(data_final) == "genome"] <- "human_genome"
         rownames(data_final)[rownames(data_final) == "not_mapped_to_genome_or_libs"] <- "not_mapped_to_human_genome"
    
         # Save to Excel
         write.xlsx(data_final, file = "distribution_heatmap.xlsx", rowNames = TRUE)
    
     # -- Python-code --
    
         python ~/Scripts/plot_distribution_heatmap.py distribution_heatmap.xlsx distribution_heatmap.png
    
                 import pandas as pd
                 import numpy as np
                 import seaborn as sns
                 import matplotlib.pyplot as plt
    
                 ## Load data from Excel file
                 #file_path = "distribution_heatmap.xlsx"
                 #
                 ## Read Excel file, assuming first column is index (row labels)
                 #df = pd.read_excel(file_path, index_col=0)
    
                 # Convert percentages to decimals
                 data = data / 100.0
    
                 # Create DataFrame
                 df = pd.DataFrame(data, index=categories, columns=samples)
    
                 # Plot heatmap
                 plt.figure(figsize=(14, 6))
                 sns.heatmap(df, annot=True, cmap="coolwarm", fmt=".3f", linewidths=0.5, cbar_kws={'label': 'Fraction Aligned Reads'})
    
                 # Improve layout
                 plt.title("Heatmap of Read Alignments by Category and Sample", fontsize=14)
                 plt.xlabel("Sample", fontsize=12)
                 plt.ylabel("Read Category", fontsize=12)
                 plt.xticks(rotation=15, ha="right", fontsize=10)
                 plt.yticks(rotation=0, fontsize=10)
                 plt.tight_layout()
    
                 # Save as PNG
                 plt.savefig("distribution_heatmap.png", dpi=300, bbox_inches="tight")
    
                 # Show plot
                 plt.show()
  8. Key steps of log: This log details the execution of a small RNA sequencing data analysis pipeline using the exceRpt tool (version 4.6.3) in a Docker container. The pipeline processes a human small RNA-seq dataset (testData_human.fastq.gz) with the following key steps:

    • Initial Setup

      • Docker container launched with mounted volumes for input/output and reference databases.
      • Parameters: hg38 genome, 50 threads, 200GB Java memory, exogenous mapping enabled.
      • Docker container launched with input/output volume mounts
      • 50 threads allocated with 200GB Java memory
      • hg38 reference genome specified
    • Preprocessing

      • Adapter detection and trimming using known adapter sequences.
      • Quality filtering (Phred score ≥20, length ≥18nt).
      • Removal of homopolymer-rich reads and low-quality sequences.
      • Input FASTQ file decompressed (testData_human.fastq.gz)
      • Adapter sequences identified using adapters.fa
      • Quality encoding determined (Phred+33/64)
      • Adapter clipping performed (TCGTATGCCGTCTTCTGCTTG)
      • Quality filtering (Q20, p<80%)
      • Homopolymer repeats filtered (max 66% single nt)
    • Contaminant Filtering

      • Alignment against UniVec contaminants and ribosomal RNA (rRNA) databases.
      • 322 reads processed, with statistics tracked at each step.
    • Endogenous RNA Analysis

      • Alignment to human genome (hg38) and transcriptome.
      • Quantification of small RNA types:
        • miRNA (mature/precursor): Sense strands detected (antisense absent).
        • tRNA, piRNA, gencode transcripts: Only sense strands reported.
        • circRNA: Not detected in this dataset.
      • Coverage and complexity metrics calculated.
    • Exogenous RNA Analysis

      • Screened for microbial/viral RNAs:
        • miRNA databases (miRBase).
        • Ribosomal RNA databases.
        • Comprehensive genomic databases (bacteria, plants, metazoa, fungi, viruses).
      • Taxonomic classification of exogenous hits performed.
    • QC & Results

      • QC Result: PASS (based on transcriptome/genome ratio >0.5 and >100k transcriptome reads).
      • Key Metrics:
        • Input Reads: ~1.5 million (exact count not shown in log).
        • Genome Mapped: Majority of reads.
        • Transcriptome Complexity: Calculated ratio.
      • Core results compressed into testData_human.fastq_CORE_RESULTS_v4.6.3.tgz.
    • Notable Observations:

      • Antisense Reads: Absent for miRNA, tRNA, and piRNA (common in small RNA-seq).
      • Potential Issues: Some files (e.g., antisense counts) were missing but did not disrupt pipeline.
      • Resource Usage: High RAM (200GB) and multi-threading (50 cores) employed for efficiency.
    • Output Files:

      • Quantified counts for endogenous RNAs (miRNA, tRNA, etc.).
      • Exogenous RNA alignments with taxonomic annotations.
      • QC report, adapter sequences, and alignment statistics.
  9. Downstream analyis using R for miRNAs

     # see http://xgenes.com/article/article-content/288/draw-plots-for-mirnas-generated-by-compsra/
     # see http://xgenes.com/article/article-content/289/draw-plots-for-pirna-generated-by-compsra/
     # see http://xgenes.com/article/article-content/290/draw-plots-for-snrna-generated-by-compsra/
    
     #Input file
     #exceRpt_miRNA_ReadCounts.txt
     #exceRpt_piRNA_ReadCounts.txt
    
     cd ~/DATA/Data_Ute/Data_Ute_smallRNA_7/summaries_exo7
     mamba activate r_env
     R
     #> .libPaths()
     #[1] "/home/jhuang/mambaforge/envs/r_env/lib/R/library"
    
     #BiocManager::install("AnnotationDbi")
     #BiocManager::install("clusterProfiler")
     #BiocManager::install(c("ReactomePA","org.Hs.eg.db"))
     #BiocManager::install("limma")
     #BiocManager::install("sva")
     #install.packages("writexl")
     #install.packages("openxlsx")
     library("AnnotationDbi")
     library("clusterProfiler")
     library("ReactomePA")
     library("org.Hs.eg.db")
     library(DESeq2)
     library(gplots)
     library(limma)
     library(sva)
     #library(writexl)  #d.raw_with_rownames <- cbind(RowNames = rownames(d.raw), d.raw); write_xlsx(d.raw, path = "d_raw.xlsx");
     library(openxlsx)
    
     setwd("../summaries_exo7/")
     d.raw<- read.delim2("exceRpt_miRNA_ReadCounts.txt",sep="\t", header=TRUE, row.names=1)
    
     # Desired column order
     desired_order <- c(
         "parental_cells_1", "parental_cells_2", "parental_cells_3",
         "untreated_1", "untreated_2",
         "scr_control_1", "scr_control_2", "scr_control_3",
         "DMSO_control_1", "DMSO_control_2", "DMSO_control_3",
         "scr_DMSO_control_1", "scr_DMSO_control_2", "scr_DMSO_control_3",
         "sT_knockdown_1", "sT_knockdown_2", "sT_knockdown_3"
     )
     # Reorder columns
     d.raw <- d.raw[, desired_order]
     setdiff(desired_order, colnames(d.raw))  # Shows missing or misnamed columns
     #sapply(d.raw, is.numeric)
     d.raw[] <- lapply(d.raw, as.numeric)
     #d.raw[] <- lapply(d.raw, function(x) as.numeric(as.character(x)))
     d.raw <- round(d.raw)
     write.csv(d.raw, file ="d_raw.csv")
     write.xlsx(d.raw, file = "d_raw.xlsx", rowNames = TRUE)
    
     # ------ Code sent to Ute ------
     #d.raw <- read.delim2("d_raw.csv",sep=",", header=TRUE, row.names=1)
     parental_or_EV = as.factor(c("parental","parental","parental", "EV","EV","EV","EV","EV","EV","EV","EV","EV","EV","EV","EV","EV","EV"))
     #donor = as.factor(c("0505","1905", "0505","1905", "0505","1905", "0505","1905", "0505","1905", "0505","1905"))
     batch = as.factor(c("Aug22","March25","March25", "Sep23","Sep23", "Sep23","Sep23","March25", "Sep23","Sep23","March25", "Sep23","Sep23","March25", "Sep23","Sep23","March25"))
    
     replicates = as.factor(c("parental_cells","parental_cells","parental_cells",  "untreated","untreated",   "scr_control","scr_control","scr_control",  "DMSO_control","DMSO_control","DMSO_control",  "scr_DMSO_control", "scr_DMSO_control","scr_DMSO_control",  "sT_knockdown", "sT_knockdown", "sT_knockdown"))
     ids = as.factor(c("parental_cells_1", "parental_cells_2", "parental_cells_3",
         "untreated_1", "untreated_2",
         "scr_control_1", "scr_control_2", "scr_control_3",
         "DMSO_control_1", "DMSO_control_2", "DMSO_control_3",
         "scr_DMSO_control_1", "scr_DMSO_control_2", "scr_DMSO_control_3",
         "sT_knockdown_1", "sT_knockdown_2", "sT_knockdown_3"))
     cData = data.frame(row.names=colnames(d.raw), replicates=replicates, ids=ids, batch=batch, parental_or_EV=parental_or_EV)
     dds<-DESeqDataSetFromMatrix(countData=d.raw, colData=cData, design=~replicates+batch)
    
     # Filter low-count miRNAs
     dds <- dds[ rowSums(counts(dds)) > 10, ]  #1322-->903
     rld <- rlogTransformation(dds)
    
     # -- before pca --
     png("pca.png", 1200, 800)
     plotPCA(rld, intgroup=c("replicates"))
     #plotPCA(rld, intgroup = c("replicates", "batch"))
     #plotPCA(rld, intgroup = c("replicates", "ids"))
     #plotPCA(rld, "batch")
     dev.off()
     png("pca2.png", 1200, 800)
     #plotPCA(rld, intgroup=c("replicates"))
     #plotPCA(rld, intgroup = c("replicates", "batch"))
     #plotPCA(rld, intgroup = c("replicates", "ids"))
     plotPCA(rld, "batch")
     dev.off()
    
     # Batch Effect Removal Methods:
    
     #Applying batch effect correction techniques such as ComBat or SVA (Surrogate Variable Analysis).
    
     #- Using ComBat (from the sva package):
    
         # Assume `rld` is the rlog-transformed counts from DESeq2
         rld_corrected <- ComBat(dat = assay(rld), batch = cData$batch, mod = model.matrix(~ replicates, data = cData))
    
         # Visualize corrected PCA
         pca_corrected <- prcomp(t(rld_corrected))
         png("pca_after_batch_correction.png", 1200, 800)
         plot(pca_corrected$x[, 1:2], col = cData$replicates)
         dev.off()
    
     #- Using SVA (Surrogate Variable Analysis):
    
         #If batch effects are strong and you want to remove hidden batch effects, SVA can help identify latent factors. After identifying these latent factors, you can add them to the DESeq2 design.
    
         # Assume that rld contains the rlog-transformed data
         mod <- model.matrix(~ replicates, data = cData)  # This should include your main experimental variables
         sva_results <- sva(assay(rld), mod)
    
         #You would then adjust the design formula to include these latent variables.
    
     #- Using removeBatchEffect (CHOSEN!)
    
         #http://bioconductor.org/packages/devel/bioc/vignettes/DESeq2/inst/doc/DESeq2.html#how-do-i-use-vst-or-rlog-data-for-differential-testing
         mat <- assay(rld)
         mm <- model.matrix(~replicates, colData(rld))
         mat <- limma::removeBatchEffect(mat, batch=rld$batch, design=mm)
         assay(rld) <- mat
    
     #- After batch effect removal, you should see a shift in the PCA plot — ideally, the samples should now cluster based on replicates or biological conditions rather than the batch.
     #If the batch effect has been successfully removed:
     #    * Before correction: You will likely see samples grouped by batch.
     #    * After correction: You should see the samples grouped by biological condition (e.g., parental, EV, scr_control, etc.).
    
         # -- after pca --
         png("pca_after_batch_correction.png", 1200, 800)
         #plotPCA(rld, intgroup = c("replicates", "batch"))
         #plotPCA(rld, intgroup = c("replicates", "ids"))
         plotPCA(rld, intgroup=c("replicates"))
         dev.off()
         png("pca_after_batch_correction2.png", 1200, 800)
         plotPCA(rld, "batch")
         dev.off()
    
         # -- after heatmap --
         ## generate the pairwise comparison between samples
         png("heatmap_after_batch_correction.png", 1200, 800)
         distsRL <- dist(t(assay(rld)))
         mat <- as.matrix(distsRL)
         rownames(mat) <- colnames(mat) <- with(colData(dds),paste(replicates,batch, sep=":"))
         #rownames(mat) <- colnames(mat) <- with(colData(dds),paste(replicates,ids, sep=":"))
         hc <- hclust(distsRL)
         hmcol <- colorRampPalette(brewer.pal(9,"GnBu"))(100)
         heatmap.2(mat, Rowv=as.dendrogram(hc),symm=TRUE, trace="none",col = rev(hmcol), margin=c(13, 13))
         dev.off()
    
     #### STEP2: DEGs ####
     #- Heatmap untreated/wt vs parental; 1x for WaGa cell line
     #- Volcano plot untreated/wt vs parental; 1x for WaGa cell line
     #- Manhattan plot miRNAs; 1x for WaGa cell line
     #- Distribution of different small RNA species untreated/wt and parental; 1x for WaGa cell line
     #- Motif analysis: identify RNA-binding proteins that may regulate small RNA loading; 1x for WaGa cell line
    
     #convert bam to bigwig using deepTools by feeding inverse of DESeq’s size Factor
     sizeFactors(dds)
     #NULL
     dds <- estimateSizeFactors(dds)
     sizeFactors(dds)
     normalized_counts <- counts(dds, normalized=TRUE)
     write.table(normalized_counts, file="normalized_counts.txt", sep="\t", quote=F, col.names=NA)
     write.xlsx(normalized_counts, file = "normalized_counts.xlsx", rowNames = TRUE)
    
     #---- untreated, scr_control, DMSO_control, scr_DMSO_control, sT_knockdown to parental_cells ----
     dds<-DESeqDataSetFromMatrix(countData=d.raw, colData=cData, design=~replicates+batch)
    
     dds$replicates <- relevel(dds$replicates, "parental_cells")
     dds = DESeq(dds, betaPrior=FALSE)  #default betaPrior is FALSE
     resultsNames(dds)
     clist <- c("untreated_vs_parental_cells")
    
     dds$replicates <- relevel(dds$replicates, "untreated")
     dds = DESeq(dds, betaPrior=FALSE)
     resultsNames(dds)
     clist <- c("DMSO_control_vs_untreated", "scr_control_vs_untreated", "scr_DMSO_control_vs_untreated", "sT_knockdown_vs_untreated")
    
     dds$replicates <- relevel(dds$replicates, "DMSO_control")
     dds = DESeq(dds, betaPrior=FALSE)
     resultsNames(dds)
     clist <- c("sT_knockdown_vs_DMSO_control")
    
     dds$replicates <- relevel(dds$replicates, "scr_control")
     dds = DESeq(dds, betaPrior=FALSE)
     resultsNames(dds)
     clist <- c("sT_knockdown_vs_scr_control")
    
     dds$replicates <- relevel(dds$replicates, "scr_DMSO_control")
     dds = DESeq(dds, betaPrior=FALSE)
     resultsNames(dds)
     clist <- c("sT_knockdown_vs_scr_DMSO_control")
    
     #NOTE that the results sent to Ute is |padj|<=0.1.
     for (i in clist) {
         contrast = paste("replicates", i, sep="_")
         res = results(dds, name=contrast)
         res <- res[!is.na(res$log2FoldChange),]
         #https://bioconductor.org/packages/release/bioc/vignettes/DESeq2/inst/doc/DESeq2.html#why-are-some-p-values-set-to-na
         res$padj <- ifelse(is.na(res$padj), 1, res$padj)
         res_df <- as.data.frame(res)
         write.csv(as.data.frame(res_df[order(res_df$pvalue),]), file = paste(i, "all.txt", sep="-"))
         up <- subset(res_df, padj<=0.05 & log2FoldChange>=2)
         down <- subset(res_df, padj<=0.05 & log2FoldChange<=-2)
         write.csv(as.data.frame(up[order(up$log2FoldChange,decreasing=TRUE),]), file = paste(i, "up.txt", sep="-"))
         write.csv(as.data.frame(down[order(abs(down$log2FoldChange),decreasing=TRUE),]), file = paste(i, "down.txt", sep="-"))
     }
    
     ~/Tools/csv2xls-0.4/csv_to_xls.py \
     untreated_vs_parental_cells-all.txt \
     untreated_vs_parental_cells-up.txt \
     untreated_vs_parental_cells-down.txt \
     -d$',' -o untreated_vs_parental_cells.xls;
    
     ~/Tools/csv2xls-0.4/csv_to_xls.py \
     DMSO_control_vs_untreated-all.txt \
     DMSO_control_vs_untreated-up.txt \
     DMSO_control_vs_untreated-down.txt \
     -d$',' -o DMSO_control_vs_untreated.xls;
    
     ~/Tools/csv2xls-0.4/csv_to_xls.py \
     scr_control_vs_untreated-all.txt \
     scr_control_vs_untreated-up.txt \
     scr_control_vs_untreated-down.txt \
     -d$',' -o scr_control_vs_untreated.xls;
    
     ~/Tools/csv2xls-0.4/csv_to_xls.py \
     scr_DMSO_control_vs_untreated-all.txt \
     scr_DMSO_control_vs_untreated-up.txt \
     scr_DMSO_control_vs_untreated-down.txt \
     -d$',' -o scr_DMSO_control_vs_untreated.xls;
    
     ~/Tools/csv2xls-0.4/csv_to_xls.py \
     sT_knockdown_vs_untreated-all.txt \
     sT_knockdown_vs_untreated-up.txt \
     sT_knockdown_vs_untreated-down.txt \
     -d$',' -o sT_knockdown_vs_untreated.xls;
    
     ~/Tools/csv2xls-0.4/csv_to_xls.py \
     sT_knockdown_vs_DMSO_control-all.txt \
     sT_knockdown_vs_DMSO_control-up.txt \
     sT_knockdown_vs_DMSO_control-down.txt \
     -d$',' -o sT_knockdown_vs_DMSO_control.xls;
    
     ~/Tools/csv2xls-0.4/csv_to_xls.py \
     sT_knockdown_vs_scr_control-all.txt \
     sT_knockdown_vs_scr_control-up.txt \
     sT_knockdown_vs_scr_control-down.txt \
     -d$',' -o sT_knockdown_vs_scr_control.xls;
    
     ~/Tools/csv2xls-0.4/csv_to_xls.py \
     sT_knockdown_vs_scr_DMSO_control-all.txt \
     sT_knockdown_vs_scr_DMSO_control-up.txt \
     sT_knockdown_vs_scr_DMSO_control-down.txt \
     -d$',' -o sT_knockdown_vs_scr_DMSO_control.xls;
    
     # ------------------- volcano_plot -------------------
     library(ggplot2)
     library(ggrepel)
    
     geness_res <- read.csv(file = paste("untreated_vs_parental_cells", "all.txt", sep="-"), row.names=1)
    
     external_gene_name <- rownames(geness_res)
     geness_res <- cbind(geness_res, external_gene_name)
     #top_g are from ids
     top_g <- c("hsa-let-7b-5p","hsa-let-7g-5p","hsa-let-7i-5p","hsa-miR-103a-3p","hsa-miR-107","hsa-miR-1224-5p","hsa-miR-122-5p","hsa-miR-1226-5p","hsa-miR-1246","hsa-miR-127-3p","hsa-miR-1290","hsa-miR-130a-3p","hsa-miR-139-3p","hsa-miR-141-3p","hsa-miR-143-3p","hsa-miR-148b-3p","hsa-miR-155-5p","hsa-miR-15a-5p","hsa-miR-17-5p","hsa-miR-184","hsa-miR-18a-3p","hsa-miR-18a-5p","hsa-miR-190a-5p","hsa-miR-191-5p","hsa-miR-193b-5p","hsa-miR-197-5p","hsa-miR-200a-3p","hsa-miR-200b-5p","hsa-miR-206","hsa-miR-20a-5p","hsa-miR-210-3p","hsa-miR-2110","hsa-miR-21-5p","hsa-miR-218-5p","hsa-miR-219a-1-3p","hsa-miR-221-3p","hsa-miR-23b-3p","hsa-miR-27a-3p","hsa-miR-27b-3p","hsa-miR-27b-5p","hsa-miR-28-3p","hsa-miR-30a-5p","hsa-miR-30c-5p","hsa-miR-30e-5p","hsa-miR-3127-5p","hsa-miR-3131","hsa-miR-3180|hsa-miR-3180-3p","hsa-miR-320a","hsa-miR-320b","hsa-miR-320c","hsa-miR-320d","hsa-miR-330-3p","hsa-miR-335-3p","hsa-miR-33b-5p","hsa-miR-340-5p","hsa-miR-342-5p","hsa-miR-3605-5p","hsa-miR-361-3p","hsa-miR-365a-5p","hsa-miR-374b-5p","hsa-miR-378i","hsa-miR-379-5p","hsa-miR-3940-5p","hsa-miR-409-3p","hsa-miR-411-5p","hsa-miR-423-3p","hsa-miR-423-5p","hsa-miR-4286","hsa-miR-429","hsa-miR-432-5p","hsa-miR-4326","hsa-miR-451a","hsa-miR-4520-3p","hsa-miR-454-3p","hsa-miR-4646-5p","hsa-miR-4667-5p","hsa-miR-4748","hsa-miR-483-5p","hsa-miR-486-5p","hsa-miR-5010-5p","hsa-miR-504-3p","hsa-miR-5187-5p","hsa-miR-590-3p","hsa-miR-6128","hsa-miR-625-5p","hsa-miR-6726-5p","hsa-miR-6730-5p","hsa-miR-676-3p","hsa-miR-6767-5p","hsa-miR-6777-5p","hsa-miR-6780a-5p","hsa-miR-6794-5p","hsa-miR-6817-3p","hsa-miR-708-5p","hsa-miR-7-5p","hsa-miR-766-5p","hsa-miR-7854-3p","hsa-miR-873-3p","hsa-miR-885-3p","hsa-miR-92b-5p","hsa-miR-93-5p","hsa-miR-937-3p","hsa-miR-9-5p","hsa-miR-98-5p")
     subset(geness_res, external_gene_name %in% top_g & pvalue < 0.05 & (abs(geness_res$log2FoldChange) >= 2.0))
     geness_res$Color <- "NS or log2FC < 2.0"
     geness_res$Color[geness_res$pvalue < 0.05] <- "P < 0.05"
     geness_res$Color[geness_res$padj < 0.05] <- "P-adj < 0.05"
     geness_res$Color[abs(geness_res$log2FoldChange) < 2.0] <- "NS or log2FC < 2.0"
    
     write.csv(geness_res, "untreated_vs_parental_cells_with_Category.csv")
     geness_res$invert_P <- (-log10(geness_res$pvalue)) * sign(geness_res$log2FoldChange)
    
     geness_res <- geness_res[, -1*ncol(geness_res)]
     png("volcano_plot_untreated_vs_parental_cells.png",width=1200, height=1400)
     #svg("untreated_vs_parental_cells.svg",width=12, height=14)
     ggplot(geness_res,       aes(x = log2FoldChange, y = -log10(pvalue),           color = Color, label = external_gene_name)) +       geom_vline(xintercept = c(2.0, -2.0), lty = "dashed") +       geom_hline(yintercept = -log10(0.05), lty = "dashed") +       geom_point() +       labs(x = "log2(FC)", y = "Significance, -log10(P)", color = "Significance") +       scale_color_manual(values = c("P < 0.05"="orange","P-adj < 0.05"="red","NS or log2FC < 2.0"="darkgray"),guide = guide_legend(override.aes = list(size = 4))) + scale_y_continuous(expand = expansion(mult = c(0,0.05))) +       geom_text_repel(data = subset(geness_res, external_gene_name %in% top_g & pvalue < 0.05 & (abs(geness_res$log2FoldChange) >= 2.0)), size = 4, point.padding = 0.15, color = "black", min.segment.length = .1, box.padding = .2, lwd = 2) +       theme_bw(base_size = 16) +       theme(legend.position = "bottom")
     dev.off()
    
     # ------------------ differentially_expressed_miRNAs_heatmap -----------------
     # prepare all_genes
     rld <- rlogTransformation(dds)
     mat <- assay(rld)
     mm <- model.matrix(~replicates, colData(rld))
     mat <- limma::removeBatchEffect(mat, batch=rld$batch, design=mm)
     assay(rld) <- mat
     RNASeq.NoCellLine <- assay(rld)
    
     # reorder the columns
     #colnames(RNASeq.NoCellLine) = c("0505 WaGa sT DMSO","1905 WaGa sT DMSO","0505 WaGa sT Dox","1905 WaGa sT Dox","0505 WaGa scr DMSO","1905 WaGa scr DMSO","0505 WaGa scr Dox","1905 WaGa scr Dox","0505 WaGa wt","1905 WaGa wt","control MKL1","control WaGa")
     #col.order <-c("control MKL1",  "control WaGa","0505 WaGa wt","1905 WaGa wt","0505 WaGa sT DMSO","1905 WaGa sT DMSO","0505 WaGa sT Dox","1905 WaGa sT Dox","0505 WaGa scr DMSO","1905 WaGa scr DMSO","0505 WaGa scr Dox","1905 WaGa scr Dox")
     #RNASeq.NoCellLine <- RNASeq.NoCellLine[,col.order]
    
     #Option4: manully defining
     #for i in untreated_vs_parental_cells    sT_knockdown_vs_untreated DMSO_control_vs_untreated scr_control_vs_untreated scr_DMSO_control_vs_untreated    sT_knockdown_vs_DMSO_control sT_knockdown_vs_scr_control sT_knockdown_vs_scr_DMSO_control; do
     #  echo "cut -d',' -f1-1 ${i}-up.txt > ${i}-up.id";
     #  echo "cut -d',' -f1-1 ${i}-down.txt > ${i}-down.id";
     #done
     #cat *.id | sort -u > ids
     ##add Gene_Id in the first line, delete the ""
     GOI <- read.csv("ids")$Gene_Id
     datamat = RNASeq.NoCellLine[GOI, ]
    
     # clustering the genes and draw heatmap
     #datamat <- datamat[,-1]  #delete the sample "control MKL1"
     #datamat <- datamat[, 1:5]
    
     #parental_cells_1 parental_cells_2 parental_cells_3    untreated_1 untreated_2    scr_control_1 scr_control_2 scr_control_3     DMSO_control_1 DMSO_control_2 DMSO_control_3    scr_DMSO_control_1 scr_DMSO_control_2 scr_DMSO_control_3    sT_knockdown_1 sT_knockdown_2 sT_knockdown_3 -->
     #parental cells 1 parental cells 2 parental cells 3    untreated 1 untreated 2    scr control 1 scr control 2 scr control 3    DMSO control 1 DMSO control 2 DMSO control 3    scr DMSO control 1 scr DMSO control 2 scr DMSO control 3    sT knockdown 1 sT knockdown 2 sT knockdown 3
     colnames(datamat)[1] <- "parental cells 1"
     colnames(datamat)[2] <- "parental cells 2"
     colnames(datamat)[3] <- "parental cells 3"
     colnames(datamat)[4] <- "untreated 1"
     colnames(datamat)[5] <- "untreated 2"
     colnames(datamat)[6] <- "scr control 1"
     colnames(datamat)[7] <- "scr control 2"
     colnames(datamat)[8] <- "scr control 3"
     colnames(datamat)[9] <- "DMSO control 1"
     colnames(datamat)[10] <- "DMSO control 2"
     colnames(datamat)[11] <- "DMSO control 3"
     colnames(datamat)[12] <- "scr DMSO control 1"
     colnames(datamat)[13] <- "scr DMSO control 2"
     colnames(datamat)[14] <- "scr DMSO control 3"
     colnames(datamat)[15] <- "sT knockdown 1"
     colnames(datamat)[16] <- "sT knockdown 2"
     colnames(datamat)[17] <- "sT knockdown 3"
    
     write.csv(datamat, file ="gene_expression_keeping_replicates.txt")
     write.xlsx(datamat, file = "gene_expression_keeping_replicates.xlsx", rowNames = TRUE)
     #"ward.D"’, ‘"ward.D2"’,‘"single"’, ‘"complete"’, ‘"average"’ (= UPGMA), ‘"mcquitty"’(= WPGMA), ‘"median"’ (= WPGMC) or ‘"centroid"’ (= UPGMC)
     hr <- hclust(as.dist(1-cor(t(datamat), method="pearson")), method="complete")
     hc <- hclust(as.dist(1-cor(datamat, method="spearman")), method="complete")
     mycl = cutree(hr, h=max(hr$height)/1.1)
     mycol = c("YELLOW", "BLUE", "ORANGE", "CYAN", "GREEN", "MAGENTA", "GREY", "LIGHTCYAN", "RED",     "PINK", "DARKORANGE", "MAROON",  "LIGHTGREEN", "DARKBLUE",  "DARKRED",   "LIGHTBLUE", "DARKCYAN",  "DARKGREEN", "DARKMAGENTA");
     mycol = mycol[as.vector(mycl)]
    
     rownames(datamat) <- sub("\\|.*", "", rownames(datamat))
    
     png("DEGs_heatmap_keeping_replicates.png", width=1000, height=1400)
     #svg("DEGs_heatmap_keeping_replicates.svg", width=6, height=8)
     heatmap.2(as.matrix(datamat),
         Rowv=as.dendrogram(hr),
         Colv=NA,
         dendrogram='row',
         labRow=row.names(datamat),
         scale='row',
         trace='none',
         col=bluered(75),
         RowSideColors=mycol,
         srtCol=30,
         lhei=c(1,8),
         cexRow=1.4,   # Increase row label font size
         cexCol=1.7,    # Increase column label font size
         margin=c(8, 12)
         )
     dev.off()
    
     # ----------- manhattan_plot -------------
    
     # TODO_TOMORROW: the top miRNA should different, since we want to see the differentially expressed miRNA, therefore we should show the top DEG miRNA, find the top-5 and mark the 5 as the red points and give the label!
     # TODO_piRNA
     # TODO: Both motiv calling!
     # TODO: send the results to Ute!
    
     # Load the required libraries
     library(ggplot2)
     library(dplyr)
     library(tidyr)
     library(ggrepel)  # For better label positioning
    
     # Step 1: Compute RPM from raw counts (d.raw has miRNAs in rows, samples in columns)
     d.raw_5 <- d.raw[, 1:5]  # assuming 5 samples
     total_counts <- colSums(d.raw_5)
     RPM <- sweep(d.raw_5, 2, total_counts, FUN = "/") * 1e6
    
     # Step 2: Prepare long-format dataframe
     RPM$miRNA <- rownames(RPM)
     df <- pivot_longer(RPM, cols = -miRNA, names_to = "sample", values_to = "RPM")
    
     # Step 3: Log-transform RPM
     df <- df %>%
     mutate(logRPM = log10(RPM + 1))
    
     # Step 4: Add miRNA index for x-axis positioning
     df <- df %>%
     arrange(miRNA) %>%
     group_by(sample) %>%
     mutate(Position = row_number())
    
     # Step 5: Identify top miRNAs based on mean RPM
     top_mirnas <- df %>%
     group_by(miRNA) %>%
     summarise(mean_RPM = mean(RPM)) %>%
     arrange(desc(mean_RPM)) %>%
     head(5) %>%
     pull(miRNA)  # Get the names of top 5 miRNAs
    
     # Step 6: Assign color based on whether the miRNA is top or not
     df$color <- ifelse(df$miRNA %in% top_mirnas, "red", "darkblue")
    
     # Rename the sample labels for display
     sample_labels <- c(
     "parental_cells_1" = "Parental cell 1",
     "parental_cells_2" = "Parental cell 2",
     "parental_cells_3" = "Parental cell 3",
     "untreated_1"      = "Untreated 1",
     "untreated_2"      = "Untreated 2"
     )
    
     # Step 7: Plot
     png("manhattan_plot_top_miRNAs_based_on_mean_RPM.png", width = 1200, height = 1200)
     ggplot(df, aes(x = Position, y = logRPM, color = color)) +
     scale_color_manual(values = c("red" = "red", "darkblue" = "darkblue")) +
     geom_jitter(width = 0.4) +
     geom_text_repel(
         data = df %>% filter(miRNA %in% top_mirnas),
         aes(label = miRNA),
         box.padding = 0.5,
         point.padding = 0.5,
         segment.color = 'gray50',
         size = 5,
         max.overlaps = 8,
         color = "black"
     ) +
     labs(x = "", y = "log10(Read Per Million) (RPM)") +
     facet_wrap(~sample, scales = "free_x", ncol = 5,
                 labeller = labeller(sample = sample_labels)) +
     theme_minimal() +
     theme(
         axis.text.x = element_blank(),
         axis.ticks.x = element_blank(),
         legend.position = "none",
         text = element_text(size = 16),
         axis.title = element_text(size = 18),
         strip.text = element_text(size = 16, face = "bold"),
         panel.spacing = unit(1.5, "lines")  # <-- More space between plots
     )
     dev.off()
    
     top_mirnas = c("hsa-miR-20a-5p","hsa-miR-93-5p","hsa-let-7g-5p","hsa-miR-30a-5p","hsa-miR-423-5p","hsa-let-7i-5p")
     #,"hsa-miR-17-5p","hsa-miR-107","hsa-miR-483-5p","hsa-miR-9-5p","hsa-miR-103a-3p","hsa-miR-30e-5p","hsa-miR-21-5p","hsa-miR-30d-5p")
    
     # Step 6: Assign color based on whether the miRNA is top or not
     df$color <- ifelse(df$miRNA %in% top_mirnas, "red", "darkblue")
    
     # Rename the sample labels for display
     sample_labels <- c(
     "parental_cells_1" = "Parental cell 1",
     "parental_cells_2" = "Parental cell 2",
     "parental_cells_3" = "Parental cell 3",
     "untreated_1"      = "Untreated 1",
     "untreated_2"      = "Untreated 2"
     )
    
     # Step 7: Plot
     png("manhattan_plot_most_differentially_expressed_miRNAs.png", width = 1200, height = 1200)
     ggplot(df, aes(x = Position, y = logRPM, color = color)) +
     scale_color_manual(values = c("red" = "red", "darkblue" = "darkblue")) +
     geom_jitter(width = 0.4) +
     geom_text_repel(
         data = df %>% filter(miRNA %in% top_mirnas),
         aes(label = miRNA),
         box.padding = 0.5,
         point.padding = 0.5,
         segment.color = 'gray50',
         size = 5,
         max.overlaps = 8,
         color = "black"
     ) +
     labs(x = "", y = "log10(Read Per Million) (RPM)") +
     facet_wrap(~sample, scales = "free_x", ncol = 5,
                 labeller = labeller(sample = sample_labels)) +
     theme_minimal() +
     theme(
         axis.text.x = element_blank(),
         axis.ticks.x = element_blank(),
         legend.position = "none",
         text = element_text(size = 16),
         axis.title = element_text(size = 18),
         strip.text = element_text(size = 16, face = "bold"),
         panel.spacing = unit(1.5, "lines")  # <-- More space between plots
     )
     dev.off()
    
     mkdir miRNAs
     mv *.png miRNAs
     mv *.svg miRNAs
     mv *.csv miRNAs
     mv *.xls* miRNAs
     mv *.id miRNAs
     mv ids miRNAs
     mv normalized_counts.txt miRNAs
     mv *-all.txt miRNAs
     mv *-up.txt miRNAs
     mv *-down.txt miRNAs
     mv gene_expression_keeping_replicates.txt miRNAs
     cd miRNAs
     mv DEGs_heatmap_keeping_replicates.png differentially_expressed_miRNAs_heatmap.png
     mv volcano_plot_untreated_vs_parental_cells.png volcano_plot_miRNAs_untreated_vs_parental_cells.png
     mv untreated_vs_parental_cells.xls miRNA_untreated_vs_parental_cells.xls
  10. Do separate shRNA and treatment analysis

     # cut [1-5], the remaining are
     d.raw_12 <- d.raw[, 6:17]
     #> colnames(d.raw_12)
     #[1] "scr_control_1"      "scr_control_2"      "scr_control_3"
     #[4] "DMSO_control_1"     "DMSO_control_2"     "DMSO_control_3"
     #[7] "scr_DMSO_control_1" "scr_DMSO_control_2" "scr_DMSO_control_3"
     #[10] "sT_knockdown_1"     "sT_knockdown_2"     "sT_knockdown_3"
     #    "scr Dox" → "scr control"
     #    "sT DMSO" → "DMSO control"
     #    "scr DMSO" → "scr DMSO control"
     #    "sT Dox" → "sT knockdown"
    
     shRNA = as.factor(c("scr","scr","scr","sT","sT","sT","scr","scr","scr","sT","sT","sT"))
     treatment = as.factor(c("Dox","Dox","Dox","DMSO","DMSO","DMSO","DMSO","DMSO","DMSO","Dox","Dox","Dox"))
     cData = data.frame(row.names=colnames(d.raw_12), shRNA=shRNA, treatment=treatment)
     dds_shRNA_treatment<-DESeqDataSetFromMatrix(countData=d.raw_12, colData=cData, design=~shRNA+treatment+shRNA:treatment)
    
     dds_shRNA_treatment = DESeq(dds_shRNA_treatment, betaPrior=FALSE)
     resultsNames(dds_shRNA_treatment)
     contrasts <- c("shRNA_sT_vs_scr", "treatment_Dox_vs_DMSO", "shRNAsT.treatmentDox")
    
     for (contrast in contrasts) {
             res = results(dds_shRNA_treatment, name=contrast)
             res <- res[!is.na(res$log2FoldChange),]
             #https://bioconductor.org/packages/release/bioc/vignettes/DESeq2/inst/doc/DESeq2.html#why-are-some-p-values-set-to-na
             res$padj <- ifelse(is.na(res$padj), 1, res$padj)
             res_df <- as.data.frame(res)
             write.csv(as.data.frame(res_df[order(res_df$pvalue),]), file = paste(contrast, "all.txt", sep="-"))
             up <- subset(res_df, padj<=0.05 & log2FoldChange>=2)
             down <- subset(res_df, padj<=0.05 & log2FoldChange<=-2)
             write.csv(as.data.frame(up[order(up$log2FoldChange,decreasing=TRUE),]), file = paste(contrast, "up.txt", sep="-"))
             write.csv(as.data.frame(down[order(abs(down$log2FoldChange),decreasing=TRUE),]), file = paste(contrast, "down.txt", sep="-"))
         }
    
     #~/Tools/csv2xls-0.4/csv_to_xls.py shRNA_sT_vs_scr-up.txt shRNA_sT_vs_scr-down.txt shRNA_sT_vs_scr-all.txt -d$',' -o shRNA_sT_vs_scr.xls
     #~/Tools/csv2xls-0.4/csv_to_xls.py treatment_Dox_vs_DMSO-up.txt treatment_Dox_vs_DMSO-down.txt treatment_Dox_vs_DMSO-all.txt -d$',' -o treatment_Dox_vs_DMSO.xls
     #~/Tools/csv2xls-0.4/csv_to_xls.py shRNAsT.treatmentDox-up.txt shRNAsT.treatmentDox-down.txt shRNAsT.treatmentDox-all.txt -d$',' -o shRNAsT.treatmentDox.xls
  11. Downstream analyis using R for piRNAs

     d.raw<- read.delim2("exceRpt_piRNA_ReadCounts.txt",sep="\t", header=TRUE, row.names=1)
    
     # Desired column order
     desired_order <- c(
         "parental_cells_1", "parental_cells_2", "parental_cells_3",
         "untreated_1", "untreated_2",
         "scr_control_1", "scr_control_2", "scr_control_3",
         "DMSO_control_1", "DMSO_control_2", "DMSO_control_3",
         "scr_DMSO_control_1", "scr_DMSO_control_2", "scr_DMSO_control_3",
         "sT_knockdown_1", "sT_knockdown_2", "sT_knockdown_3"
     )
     # Reorder columns
     d.raw <- d.raw[, desired_order]
     setdiff(desired_order, colnames(d.raw))  # Shows missing or misnamed columns
     #sapply(d.raw, is.numeric)
     d.raw[] <- lapply(d.raw, as.numeric)
     #d.raw[] <- lapply(d.raw, function(x) as.numeric(as.character(x)))
     d.raw <- round(d.raw)
     write.csv(d.raw, file ="d_raw.csv")
     write.xlsx(d.raw, file = "d_raw.xlsx", rowNames = TRUE)
    
     #Make the piRNA names shorter, e.g. "hsa_piR_016658|gb|DQ592931|Homo_sapiens:6:80508363:80508389:Plus" --> "hsa_piR_016658"
     #paste -d',' f1_1 f2_ > d_raw_.csv
     d.raw <- read.delim2("d_raw_.csv",sep=",", header=TRUE, row.names=1)
     parental_or_EV = as.factor(c("parental","parental","parental", "EV","EV","EV","EV","EV","EV","EV","EV","EV","EV","EV","EV","EV","EV"))
     #donor = as.factor(c("0505","1905", "0505","1905", "0505","1905", "0505","1905", "0505","1905", "0505","1905"))
     batch = as.factor(c("Aug22","March25","March25", "Sep23","Sep23", "Sep23","Sep23","March25", "Sep23","Sep23","March25", "Sep23","Sep23","March25", "Sep23","Sep23","March25"))
    
     replicates = as.factor(c("parental_cells","parental_cells","parental_cells",  "untreated","untreated",   "scr_control","scr_control","scr_control",  "DMSO_control","DMSO_control","DMSO_control",  "scr_DMSO_control", "scr_DMSO_control","scr_DMSO_control",  "sT_knockdown", "sT_knockdown", "sT_knockdown"))
     ids = as.factor(c("parental_cells_1", "parental_cells_2", "parental_cells_3",
         "untreated_1", "untreated_2",
         "scr_control_1", "scr_control_2", "scr_control_3",
         "DMSO_control_1", "DMSO_control_2", "DMSO_control_3",
         "scr_DMSO_control_1", "scr_DMSO_control_2", "scr_DMSO_control_3",
         "sT_knockdown_1", "sT_knockdown_2", "sT_knockdown_3"))
     cData = data.frame(row.names=colnames(d.raw), replicates=replicates, ids=ids, batch=batch, parental_or_EV=parental_or_EV)
     dds<-DESeqDataSetFromMatrix(countData=d.raw, colData=cData, design=~replicates+batch)
    
     # Filter low-count miRNAs
     dds <- dds[ rowSums(counts(dds)) > 10, ]  #364-->124
     rld <- rlogTransformation(dds)
    
     # -- before pca --
     png("pca.png", 1200, 800)
     plotPCA(rld, intgroup=c("replicates"))
     #plotPCA(rld, intgroup = c("replicates", "batch"))
     #plotPCA(rld, intgroup = c("replicates", "ids"))
     #plotPCA(rld, "batch")
     dev.off()
     png("pca2.png", 1200, 800)
     #plotPCA(rld, intgroup=c("replicates"))
     #plotPCA(rld, intgroup = c("replicates", "batch"))
     #plotPCA(rld, intgroup = c("replicates", "ids"))
     plotPCA(rld, "batch")
     dev.off()
    
     # Batch Effect Removal Methods:
    
     #Applying batch effect correction techniques such as ComBat, SVA (Surrogate Variable Analysis) or limma::removeBatchEffect.
    
         #http://bioconductor.org/packages/devel/bioc/vignettes/DESeq2/inst/doc/DESeq2.html#how-do-i-use-vst-or-rlog-data-for-differential-testing
         mat <- assay(rld)
         mm <- model.matrix(~replicates, colData(rld))
         mat <- limma::removeBatchEffect(mat, batch=rld$batch, design=mm)
         assay(rld) <- mat
    
     #- After batch effect removal, you should see a shift in the PCA plot — ideally, the samples should now cluster based on replicates or biological conditions rather than the batch.
     #If the batch effect has been successfully removed:
     #    * Before correction: You will likely see samples grouped by batch.
     #    * After correction: You should see the samples grouped by biological condition (e.g., parental, EV, scr_control, etc.).
    
         # -- after pca --
         png("pca_after_batch_correction.png", 1200, 800)
         #plotPCA(rld, intgroup = c("replicates", "batch"))
         #plotPCA(rld, intgroup = c("replicates", "ids"))
         plotPCA(rld, intgroup=c("replicates"))
         dev.off()
         png("pca_after_batch_correction2.png", 1200, 800)
         plotPCA(rld, "batch")
         dev.off()
    
         # -- after heatmap --
         ## generate the pairwise comparison between samples
         png("heatmap_after_batch_correction.png", 1200, 800)
         distsRL <- dist(t(assay(rld)))
         mat <- as.matrix(distsRL)
         rownames(mat) <- colnames(mat) <- with(colData(dds),paste(replicates,batch, sep=":"))
         #rownames(mat) <- colnames(mat) <- with(colData(dds),paste(replicates,ids, sep=":"))
         hc <- hclust(distsRL)
         hmcol <- colorRampPalette(brewer.pal(9,"GnBu"))(100)
         heatmap.2(mat, Rowv=as.dendrogram(hc),symm=TRUE, trace="none",col = rev(hmcol), margin=c(13, 13))
         dev.off()
    
     #### STEP2: DEGs ####
     #- Heatmap untreated/wt vs parental; 1x for WaGa cell line
     #- Volcano plot untreated/wt vs parental; 1x for WaGa cell line
     #- Manhattan plot miRNAs; 1x for WaGa cell line
     #- Distribution of different small RNA species untreated/wt and parental; 1x for WaGa cell line
     #- Motif analysis: identify RNA-binding proteins that may regulate small RNA loading; 1x for WaGa cell line
    
     #convert bam to bigwig using deepTools by feeding inverse of DESeq’s size Factor
     sizeFactors(dds)
     #NULL
     dds <- estimateSizeFactors(dds)
     sizeFactors(dds)
     normalized_counts <- counts(dds, normalized=TRUE)
     write.table(normalized_counts, file="normalized_counts.txt", sep="\t", quote=F, col.names=NA)
     write.xlsx(normalized_counts, file = "normalized_counts.xlsx", rowNames = TRUE)
    
     #---- untreated, scr_control, DMSO_control, scr_DMSO_control, sT_knockdown to parental_cells ----
     dds<-DESeqDataSetFromMatrix(countData=d.raw, colData=cData, design=~replicates+batch)
    
     dds$replicates <- relevel(dds$replicates, "parental_cells")
     dds = DESeq(dds, betaPrior=FALSE)  #default betaPrior is FALSE
     resultsNames(dds)
     clist <- c("untreated_vs_parental_cells")
    
     #NOTE that the results sent to Ute is |padj|<=0.1.
     for (i in clist) {
         contrast = paste("replicates", i, sep="_")
         res = results(dds, name=contrast)
         res <- res[!is.na(res$log2FoldChange),]
         #https://bioconductor.org/packages/release/bioc/vignettes/DESeq2/inst/doc/DESeq2.html#why-are-some-p-values-set-to-na
         res$padj <- ifelse(is.na(res$padj), 1, res$padj)
         res_df <- as.data.frame(res)
         write.csv(as.data.frame(res_df[order(res_df$pvalue),]), file = paste(i, "all.txt", sep="-"))
         up <- subset(res_df, padj<=0.05 & log2FoldChange>=2)
         down <- subset(res_df, padj<=0.05 & log2FoldChange<=-2)
         write.csv(as.data.frame(up[order(up$log2FoldChange,decreasing=TRUE),]), file = paste(i, "up.txt", sep="-"))
         write.csv(as.data.frame(down[order(abs(down$log2FoldChange),decreasing=TRUE),]), file = paste(i, "down.txt", sep="-"))
     }
    
     ~/Tools/csv2xls-0.4/csv_to_xls.py \
     untreated_vs_parental_cells-all.txt \
     untreated_vs_parental_cells-up.txt \
     untreated_vs_parental_cells-down.txt \
     -d$',' -o untreated_vs_parental_cells.xls;
    
     # ------------------- volcano_plot -------------------
     library(ggplot2)
     library(ggrepel)
    
     geness_res <- read.csv(file = paste("untreated_vs_parental_cells", "all.txt", sep="-"), row.names=1)
    
     external_gene_name <- rownames(geness_res)
     geness_res <- cbind(geness_res, external_gene_name)
     #top_g are from ids
     top_g <- c("hsa_piR_000805","hsa_piR_001152","hsa_piR_001170","hsa_piR_001205","hsa_piR_009051","hsa_piR_010894","hsa_piR_012681","hsa_piR_012753","hsa_piR_016659","hsa_piR_017033","hsa_piR_017178","hsa_piR_018292","hsa_piR_018780","hsa_piR_019420","hsa_piR_020009","hsa_piR_020326","hsa_piR_020813","hsa_piR_020814","hsa_piR_020828")
     subset(geness_res, external_gene_name %in% top_g & pvalue < 0.05 & (abs(geness_res$log2FoldChange) >= 2.0))
     geness_res$Color <- "NS or log2FC < 2.0"
     geness_res$Color[geness_res$pvalue < 0.05] <- "P < 0.05"
     geness_res$Color[geness_res$padj < 0.05] <- "P-adj < 0.05"
     geness_res$Color[abs(geness_res$log2FoldChange) < 2.0] <- "NS or log2FC < 2.0"
    
     write.csv(geness_res, "untreated_vs_parental_cells_with_Category.csv")
     geness_res$invert_P <- (-log10(geness_res$pvalue)) * sign(geness_res$log2FoldChange)
    
     geness_res <- geness_res[, -1*ncol(geness_res)]
     png("volcano_plot_piRNAs_untreated_vs_parental_cells.png",width=1200, height=1400)
     #svg("untreated_vs_parental_cells.svg",width=12, height=14)
     ggplot(geness_res,       aes(x = log2FoldChange, y = -log10(pvalue),           color = Color, label = external_gene_name)) +       geom_vline(xintercept = c(2.0, -2.0), lty = "dashed") +       geom_hline(yintercept = -log10(0.05), lty = "dashed") +       geom_point() +       labs(x = "log2(FC)", y = "Significance, -log10(P)", color = "Significance") +       scale_color_manual(values = c("P < 0.05"="orange","P-adj < 0.05"="red","NS or log2FC < 2.0"="darkgray"),guide = guide_legend(override.aes = list(size = 4))) + scale_y_continuous(expand = expansion(mult = c(0,0.05))) +       geom_text_repel(data = subset(geness_res, external_gene_name %in% top_g & pvalue < 0.05 & (abs(geness_res$log2FoldChange) >= 2.0)), size = 4, point.padding = 0.15, color = "black", min.segment.length = .1, box.padding = .2, lwd = 2) +       theme_bw(base_size = 16) +       theme(legend.position = "bottom")
     dev.off()
    
     # ------------------ differentially_expressed_piRNAs_heatmap -----------------
     # prepare all_genes
     rld <- rlogTransformation(dds)
     mat <- assay(rld)
     mm <- model.matrix(~replicates, colData(rld))
     mat <- limma::removeBatchEffect(mat, batch=rld$batch, design=mm)
     assay(rld) <- mat
     RNASeq.NoCellLine <- assay(rld)
    
     #Option4: manully defining
     #for i in untreated_vs_parental_cells; do
     #  echo "cut -d',' -f1-1 ${i}-up.txt > ${i}-up.id";
     #  echo "cut -d',' -f1-1 ${i}-down.txt > ${i}-down.id";
     #done
     #cat *.id | sort -u > ids
     ##add Gene_Id in the first line, delete the ""
     GOI <- read.csv("ids")$Gene_Id
     datamat = RNASeq.NoCellLine[GOI, ]
    
     # clustering the genes and draw heatmap
     #datamat <- datamat[,-1]  #delete the sample "control MKL1"
     datamat <- datamat[, 1:5]
    
     colnames(datamat)[1] <- "parental cells 1"
     colnames(datamat)[2] <- "parental cells 2"
     colnames(datamat)[3] <- "parental cells 3"
     colnames(datamat)[4] <- "untreated 1"
     colnames(datamat)[5] <- "untreated 2"
    
     write.csv(datamat, file ="gene_expression_keeping_replicates.txt")
     write.xlsx(datamat, file = "gene_expression_keeping_replicates.xlsx", rowNames = TRUE)
     #"ward.D"’, ‘"ward.D2"’,‘"single"’, ‘"complete"’, ‘"average"’ (= UPGMA), ‘"mcquitty"’(= WPGMA), ‘"median"’ (= WPGMC) or ‘"centroid"’ (= UPGMC)
     hr <- hclust(as.dist(1-cor(t(datamat), method="pearson")), method="complete")
     hc <- hclust(as.dist(1-cor(datamat, method="spearman")), method="complete")
     mycl = cutree(hr, h=max(hr$height)/1.1)
     mycol = c("YELLOW", "BLUE", "ORANGE", "CYAN", "GREEN", "MAGENTA", "GREY", "LIGHTCYAN", "RED",     "PINK", "DARKORANGE", "MAROON",  "LIGHTGREEN", "DARKBLUE",  "DARKRED",   "LIGHTBLUE", "DARKCYAN",  "DARKGREEN", "DARKMAGENTA");
     mycol = mycol[as.vector(mycl)]
    
     rownames(datamat) <- sub("\\|.*", "", rownames(datamat))
    
     png("differentially_expressed_piRNAs_heatmap.png", width=800, height=800)
     #svg("differentially_expressed_piRNAs_heatmap.svg", width=6, height=8)
     heatmap.2(as.matrix(datamat),
         Rowv=as.dendrogram(hr),
         Colv=NA,
         dendrogram='row',
         labRow=row.names(datamat),
         scale='row',
         trace='none',
         col=bluered(75),
         RowSideColors=mycol,
         srtCol=20,
         lhei=c(1,4),
         cexRow=1.7,   # Increase row label font size
         cexCol=1.7,    # Increase column label font size
         margin=c(6, 12)
         )
     dev.off()
    
     # ----------- manhattan_plot -------------
    
     # Load the required libraries
     library(ggplot2)
     library(dplyr)
     library(tidyr)
     library(ggrepel)  # For better label positioning
    
     # Step 1: Compute RPM from raw counts (d.raw has piRNAs in rows, samples in columns)
     d.raw_5 <- d.raw[, 1:5]  # assuming 5 samples
     total_counts <- colSums(d.raw_5)
     RPM <- sweep(d.raw_5, 2, total_counts, FUN = "/") * 1e6
    
     # Step 2: Prepare long-format dataframe
     RPM$piRNA <- rownames(RPM)
     df <- pivot_longer(RPM, cols = -piRNA, names_to = "sample", values_to = "RPM")
    
     # Step 3: Log-transform RPM
     df <- df %>%
     mutate(logRPM = log10(RPM + 1))
    
     # Step 4: Add piRNA index for x-axis positioning
     df <- df %>%
     arrange(piRNA) %>%
     group_by(sample) %>%
     mutate(Position = row_number())
    
     # Step 5: Identify top piRNAs based on mean RPM
     top_pirnas <- df %>%
     group_by(piRNA) %>%
     summarise(mean_RPM = mean(RPM)) %>%
     arrange(desc(mean_RPM)) %>%
     head(5) %>%
     pull(piRNA)  # Get the names of top 5 piRNAs
    
     # Step 6: Assign color based on whether the piRNA is top or not
     df$color <- ifelse(df$piRNA %in% top_pirnas, "red", "darkblue")
    
     # Rename the sample labels for display
     sample_labels <- c(
     "parental_cells_1" = "Parental cell 1",
     "parental_cells_2" = "Parental cell 2",
     "parental_cells_3" = "Parental cell 3",
     "untreated_1"      = "Untreated 1",
     "untreated_2"      = "Untreated 2"
     )
    
     # Step 7: Plot
     png("manhattan_plot_top_piRNAs_based_on_mean_RPM.png", width = 1200, height = 1200)
     ggplot(df, aes(x = Position, y = logRPM, color = color)) +
     scale_color_manual(values = c("red" = "red", "darkblue" = "darkblue")) +
     geom_jitter(width = 0.4) +
     geom_text_repel(
         data = df %>% filter(piRNA %in% top_pirnas),
         aes(label = piRNA),
         box.padding = 0.5,
         point.padding = 0.5,
         segment.color = 'gray50',
         size = 5,
         max.overlaps = 8,
         color = "black"
     ) +
     labs(x = "", y = "log10(Read Per Million) (RPM)") +
     facet_wrap(~sample, scales = "free_x", ncol = 5,
                 labeller = labeller(sample = sample_labels)) +
     theme_minimal() +
     theme(
         axis.text.x = element_blank(),
         axis.ticks.x = element_blank(),
         legend.position = "none",
         text = element_text(size = 16),
         axis.title = element_text(size = 18),
         strip.text = element_text(size = 16, face = "bold"),
         panel.spacing = unit(1.5, "lines")  # <-- More space between plots
     )
     dev.off()
    
     top_pirnas = c("hsa_piR_012681","hsa_piR_012753","hsa_piR_001152","hsa_piR_020813","hsa_piR_020828")
    
     # Step 6: Assign color based on whether the piRNA is top or not
     df$color <- ifelse(df$piRNA %in% top_pirnas, "red", "darkblue")
    
     # Rename the sample labels for display
     sample_labels <- c(
     "parental_cells_1" = "Parental cell 1",
     "parental_cells_2" = "Parental cell 2",
     "parental_cells_3" = "Parental cell 3",
     "untreated_1"      = "Untreated 1",
     "untreated_2"      = "Untreated 2"
     )
    
     # Step 7: Plot
     png("manhattan_plot_most_differentially_expressed_piRNAs.png", width = 1200, height = 1200)
     ggplot(df, aes(x = Position, y = logRPM, color = color)) +
     scale_color_manual(values = c("red" = "red", "darkblue" = "darkblue")) +
     geom_jitter(width = 0.4) +
     geom_text_repel(
         data = df %>% filter(piRNA %in% top_pirnas),
         aes(label = piRNA),
         box.padding = 0.5,
         point.padding = 0.5,
         segment.color = 'gray50',
         size = 5,
         max.overlaps = 8,
         color = "black"
     ) +
     labs(x = "", y = "log10(Read Per Million) (RPM)") +
     facet_wrap(~sample, scales = "free_x", ncol = 5,
                 labeller = labeller(sample = sample_labels)) +
     theme_minimal() +
     theme(
         axis.text.x = element_blank(),
         axis.ticks.x = element_blank(),
         legend.position = "none",
         text = element_text(size = 16),
         axis.title = element_text(size = 18),
         strip.text = element_text(size = 16, face = "bold"),
         panel.spacing = unit(1.5, "lines")  # <-- More space between plots
     )
     dev.off()
    
     mkdir piRNAs
     mv *.png piRNAs
     mv *.csv piRNAs
     mv *.xls* piRNAs
     mv *.id piRNAs
     mv ids piRNAs
     mv normalized_counts.txt piRNAs
     mv *-all.txt piRNAs
     mv *-up.txt piRNAs
     mv *-down.txt piRNAs
     mv gene_expression_keeping_replicates.txt piRNAs
     cd piRNAs
     mv untreated_vs_parental_cells.xls piRNA_untreated_vs_parental_cells.xls
  12. Reporting

    Please find attached the analysis results for small RNAs in the WaGa cell line. miRNAs:

     * Heatmap comparing untreated/wt vs. parental (1x):
     See differentially_expressed_miRNAs_heatmap.png
    
     * Volcano plot comparing untreated/wt vs. parental (1x):
     See volcano_plot_miRNAs_untreated_vs_parental_cells.png
    
     * Manhattan plots highlighting top differentially expressed miRNAs (1x):
     See manhattan_plot_most_differentially_expressed_miRNAs.png and manhattan_plot_top_miRNAs_based_on_mean_RPM.png

    piRNAs:

     * Heatmap comparing untreated/wt vs. parental (1x):
     See differentially_expressed_piRNAs_heatmap.png
    
     * Volcano plot comparing untreated/wt vs. parental (1x):
     See volcano_plot_piRNAs_untreated_vs_parental_cells.png
    
     * Manhattan plots highlighting top differentially expressed piRNAs (1x):
     See manhattan_plot_most_differentially_expressed_piRNAs.png and manhattan_plot_top_piRNAs_based_on_mean_RPM.png

    Additional

     * Distribution of small RNA species (untreated/wt vs. parental, 1x):
     See distribution_heatmap.png
    
     * Differential expression tables:
    
         - miRNA_untreated_vs_parental_cells.xls
         - piRNA_untreated_vs_parental_cells.xls
    
         These files contain all differentially expressed miRNAs and piRNAs, respectively.

    If you’d like the R code used to generate the plots, along with the raw data and full tables, just let me know—I’ll be happy to send it over.

Processing Data_Tam_RNAseq_2024_MHB_vs_Urine_ATCC19606

  1. Preparing raw data

     They are wildtype strains grown in different medium.
     Urine - human urine
     AUM - artificial urine medium
     MHB - Mueller-Hinton broth
     Urine(人类尿液):pH值、比重、温度、污染物、化学成分、微生物负荷。
     AUM(人工尿液培养基):pH值、营养成分、无菌性、渗透压、温度、污染物。
     MHB(Mueller-Hinton培养基):pH值、无菌性、营养成分、温度、渗透压、抗生素浓度。
    
     mkdir raw_data; cd raw_data
     ln -s ../X101SC24105589-Z01-J001/01.RawData/AUM-1/AUM-1_1.fq.gz AUM_r1_R1.fq.gz
     ln -s ../X101SC24105589-Z01-J001/01.RawData/AUM-1/AUM-1_2.fq.gz AUM_r1_R2.fq.gz
     ln -s ../X101SC24105589-Z01-J001/01.RawData/AUM-2/AUM-2_1.fq.gz AUM_r2_R1.fq.gz
     ln -s ../X101SC24105589-Z01-J001/01.RawData/AUM-2/AUM-2_2.fq.gz AUM_r2_R2.fq.gz
     ln -s ../X101SC24105589-Z01-J001/01.RawData/AUM-3/AUM-3_1.fq.gz AUM_r3_R1.fq.gz
     ln -s ../X101SC24105589-Z01-J001/01.RawData/AUM-3/AUM-3_2.fq.gz AUM_r3_R2.fq.gz
    
     ln -s ../X101SC24105589-Z01-J001/01.RawData/MHB-1/MHB-1_1.fq.gz MHB_r1_R1.fq.gz
     ln -s ../X101SC24105589-Z01-J001/01.RawData/MHB-1/MHB-1_2.fq.gz MHB_r1_R2.fq.gz
     ln -s ../X101SC24105589-Z01-J001/01.RawData/MHB-2/MHB-2_1.fq.gz MHB_r2_R1.fq.gz
     ln -s ../X101SC24105589-Z01-J001/01.RawData/MHB-2/MHB-2_2.fq.gz MHB_r2_R2.fq.gz
     ln -s ../X101SC24105589-Z01-J001/01.RawData/MHB-3/MHB-3_1.fq.gz MHB_r3_R1.fq.gz
     ln -s ../X101SC24105589-Z01-J001/01.RawData/MHB-3/MHB-3_2.fq.gz MHB_r3_R2.fq.gz
    
     ln -s ../X101SC24105589-Z01-J001/01.RawData/Urine-1/Urine-1_1.fq.gz Urine_r1_R1.fq.gz
     ln -s ../X101SC24105589-Z01-J001/01.RawData/Urine-1/Urine-1_2.fq.gz Urine_r1_R2.fq.gz
     ln -s ../X101SC24105589-Z01-J001/01.RawData/Urine-2/Urine-2_1.fq.gz Urine_r2_R1.fq.gz
     ln -s ../X101SC24105589-Z01-J001/01.RawData/Urine-2/Urine-2_2.fq.gz Urine_r2_R2.fq.gz
     ln -s ../X101SC24105589-Z01-J001/01.RawData/Urine-3/Urine-3_1.fq.gz Urine_r3_R1.fq.gz
     ln -s ../X101SC24105589-Z01-J001/01.RawData/Urine-3/Urine-3_2.fq.gz Urine_r3_R2.fq.gz
  2. (Optional) using trinity to find the most closely reference

     Trinity --seqType fq --max_memory 50G --left trimmed/wt_r1_R1.fastq.gz  --right trimmed/wt_r1_R2.fastq.gz --CPU 12
    
     #https://www.genome.jp/kegg/tables/br08606.html#prok
     acb     KGB     Acinetobacter baumannii ATCC 17978  2007    GenBank
     abm     KGB     Acinetobacter baumannii SDF     2008    GenBank
     aby     KGB     Acinetobacter baumannii AYE     2008    GenBank
     abc     KGB     Acinetobacter baumannii ACICU   2008    GenBank
     abn     KGB     Acinetobacter baumannii AB0057  2008    GenBank
     abb     KGB     Acinetobacter baumannii AB307-0294  2008    GenBank
     abx     KGB     Acinetobacter baumannii 1656-2  2012    GenBank
     abz     KGB     Acinetobacter baumannii MDR-ZJ06    2012    GenBank
     abr     KGB     Acinetobacter baumannii MDR-TJ  2012    GenBank
     abd     KGB     Acinetobacter baumannii TCDC-AB0715     2012    GenBank
     abh     KGB     Acinetobacter baumannii TYTH-1  2012    GenBank
     abad    KGB     Acinetobacter baumannii D1279779    2013    GenBank
     abj     KGB     Acinetobacter baumannii BJAB07104   2013    GenBank
     abab    KGB     Acinetobacter baumannii BJAB0715    2013    GenBank
     abaj    KGB     Acinetobacter baumannii BJAB0868    2013    GenBank
     abaz    KGB     Acinetobacter baumannii ZW85-1  2013    GenBank
     abk     KGB     Acinetobacter baumannii AbH12O-A2   2014    GenBank
     abau    KGB     Acinetobacter baumannii AB030   2014    GenBank
     abaa    KGB     Acinetobacter baumannii AB031   2014    GenBank
     abw     KGB     Acinetobacter baumannii AC29    2014    GenBank
     abal    KGB     Acinetobacter baumannii LAC-4   2015    GenBank
     #Note that the Acinetobacter baumannii strain ATCC 19606 chromosome, complete genome (GenBank: CP059040.1) was choosen as reference!
  3. Downloading CP059040.fasta and CP059040.gff from GenBank

  4. (Optional) Preparing CP059040.fasta, CP059040_gene.gff3 and CP059040.bed

     #Reference genome: https://www.ncbi.nlm.nih.gov/nuccore/CP059040
     cp /media/jhuang/Elements2/Data_Tam_RNASeq3/CP059040.fasta .     # Elements (Anna C.arnes)
     cp /media/jhuang/Elements2/Data_Tam_RNASeq3/CP059040_gene.gff3 .
     cp /media/jhuang/Elements2/Data_Tam_RNASeq3/CP059040_gene.gtf .
     cp /media/jhuang/Elements2/Data_Tam_RNASeq3/CP059040.bed .
     rsync -a -P CP059040.fasta jhuang@hamm:~/DATA/Data_Tam_RNAseq_2024/
     rsync -a -P CP059040_gene.gff3 jhuang@hamm:~/DATA/Data_Tam_RNAseq_2024/
     rsync -a -P CP059040.bed jhuang@hamm:~/DATA/Data_Tam_RNAseq_2024/
     (base) jhuang@WS-2290C:/media/jhuang/Elements2/Data_Tam_RNASeq3$ find . -name "CP059040*"
     ./CP059040.fasta
     ./CP059040.bed
     ./CP059040.gb
     ./CP059040.gff3
     ./CP059040.gff3_backup
     ./CP059040_full.gb
     ./CP059040_gene.gff3
     ./CP059040_gene.gtf
     ./CP059040_gene_old.gff3
     ./CP059040_rRNA.gff3
     ./CP059040_rRNA_v.gff3
    
     # ---- REF: Acinetobacter baumannii ATCC 17978 (DEBUG, gene_name failed) ----
     #gffread -E -F -T GCA_000015425.1_ASM1542v1_genomic.gff -o GCA_000015425.1_ASM1542v1_genomic.gtf_
     #grep "CDS" GCA_000015425.1_ASM1542v1_genomic.gtf_ > GCA_000015425.1_ASM1542v1_genomic.gtf
     #sed -i -e "s/\tCDS\t/\texon\t/g" GCA_000015425.1_ASM1542v1_genomic.gtf
     #gffread -E -F --bed GCA_000015425.1_ASM1542v1_genomic.gtf -o GCA_000015425.1_ASM1542v1_genomic.bed
    
     grep "locus_tag" GCA_000015425.1_ASM1542v1_genomic.gtf_ > GCA_000015425.1_ASM1542v1_genomic.gtf
     sed -i -e "s/\ttranscript\t/\texon\t/g" GCA_000015425.1_ASM1542v1_genomic.gtf # or using fc_count_type=transcript
     sed -i -e "s/\tgene_name\t/\tName\t/g" GCA_000015425.1_ASM1542v1_genomic.gtf
     gffread -E -F --bed GCA_000015425.1_ASM1542v1_genomic.gtf -o GCA_000015425.1_ASM1542v1_genomic.bed
     #grep "gene_name" GCA_000015425.1_ASM1542v1_genomic.gtf | wc -l  #69=3887-3803
    
     cp CP059040.gff3 CP059040_backup.gff3
     sed -i -e "s/\tGenbank\tgene\t/\tGenbank_gene\t/g" CP059040.gff3
     grep "Genbank_gene" CP059040.gff3 > CP059040_gene.gff3
     sed -i -e "s/\tGenbank_gene\t/\tGenbank\tgene\t/g" CP059040_gene.gff3
    
     #3796-3754=42--> they are pseudogene since grep "pseudogene" CP059040.gff3 | wc -l = 42
     # --------------------------------------------------------------------------------------------------------------------------------------------------
     # ---------- PREPARING gff3 file including gene_biotype=protein_coding+gene_biotype=tRNA = total(3754)) and gene_biotype=pseudogene(42) ------------
     cp CP059040.gff3 CP059040_backup.gff3
     sed -i -e "s/\tGenbank\tgene\t/\tGenbank_gene\t/g" CP059040.gff3
     grep "Genbank_gene" CP059040.gff3 > CP059040_gene.gff3
     sed -i -e "s/\tGenbank_gene\t/\tGenbank\tgene\t/g" CP059040_gene.gff3
     grep "gene_biotype=pseudogene" CP059040.gff3_backup >> CP059040_gene.gff3    #-->3796
    
     #The whole point of the GTF format was to standardise certain aspects that are left open in GFF. Hence, there are many different valid ways to encode the same information in a valid GFF format, and any parser or converter needs to be written specifically for the choices the author of the GFF file made. For example, a GTF file requires the gene ID attribute to be called "gene_id", while in GFF files, it may be "ID", "Gene", something different, or completely missing.
     # from gff3 to gtf
     sed -i -e "s/\tID=gene-/\tgene_id \"/g" CP059040_gene.gtf
     sed -i -e "s/;/\"; /g" CP059040_gene.gtf
     sed -i -e "s/=/=\"/g" CP059040_gene.gtf
    
     #sed -i -e "s/\n/\"\n/g" CP059040_gene.gtf
     #using editor instead!
    
     #The following is GTF-format.
     CP000521.1      Genbank exon    95      1492    .       +       .       transcript_id "gene0"; gene_id "gene0"; Name "A1S_0001"; gbkey "Gene"; gene_biotype "protein_coding"; locus_tag "A1S_0001";
    
     #NZ_MJHA01000001.1       RefSeq  region  1       8663    .       +       .       ID=id0;Dbxref=taxon:575584;Name=unnamed1;collected-by=IG Schaub;collection-date=1948;country=USA: Vancouver;culture-collection=ATCC:19606;gbkey=Src;genome=plasmid;isolation-source=urine;lat-lon=37.53 N 75.4 W;map=unlocalized;mol_type=genomic DNA;nat-host=Homo sapiens;plasmid-name=unnamed1;strain=ATCC 19606;type-material=type strain of Acinetobacter baumannii
     #NZ_MJHA01000001.1       RefSeq  gene    228     746     .       -       .       ID=gene0;Name=BIT33_RS00005;gbkey=Gene;gene_biotype=protein_coding;locus_tag=BIT33_RS00005;old_locus_tag=BIT33_18795
     #NZ_MJHA01000001.1       Protein Homology        CDS     228     746     .       -       0       ID=cds0;Parent=gene0;Dbxref=Genbank:WP_000839337.1;Name=WP_000839337.1;gbkey=CDS;inference=COORDINATES: similar to AA sequence:RefSeq:WP_000839337.1;product=hypothetical protein;protein_id=WP_000839337.1;transl_table=11
    
     ##gff-version 3
     ##sequence-region CP059040.1 1 3980852
     ##species https://www.ncbi.nlm.nih.gov/Taxonomy/Browser/wwwtax.cgi?id=470
    
     gffread -E -F --bed CP059040.gff3 -o CP059040.bed    #-->3796
     ##prepare the GTF-format (see above) --> ERROR! ----> using CP059040.gff3
     ##stringtie adeIJ.abx_r1.sorted.bam -o adeIJ.abx_r1.sorted_transcripts.gtf -v -G /media/jhuang/Elements/Data_Tam_RNASeq3/CP059040.gff3 -A adeIJ.abx_r1.sorted.gene_abund.txt -C adeIJ.abx_r1.sorted.bam.cov_refs.gtf -e -b adeIJ.abx_r1.sorted_ballgown
     #[01/21 10:57:46] Loading reference annotation (guides)..
     #GFF warning: merging adjacent/overlapping segments of gene-H0N29_00815 on CP059040.1 (179715-179786, 179788-180810)
     #[01/21 10:57:46] 3796 reference transcripts loaded.
     #Default stack size for threads: 8388608
     #WARNING: no reference transcripts found for genomic sequence "gi|1906906720|gb|CP059040.1|"! (mismatched reference names?)
     #WARNING: no reference transcripts were found for the genomic sequences where reads were mapped!
     #Please make sure the -G annotation file uses the same naming convention for the genome sequences.
     #[01/21 10:58:30] All threads finished.
    
     #  ERROR: failed to find the gene identifier attribute in the 9th column of the provided GTF file.
     #  The specified gene identifier attribute is 'Name'
     #  An example of attributes included in your GTF annotation is 'ID=exon-H0N29_00075-1;Parent=rna-H0N29_00075;gbkey=rRNA;locus_tag=H0N29_00075;product=16S ribosomal RNA'
     #  The program has to termin
    
     #  ERROR: failed to find the gene identifier attribute in the 9th column of the provided GTF file.
     #  The specified gene identifier attribute is 'gene_biotype'
     #  An example of attributes included in your GTF annotation is 'ID=exon-H0N29_00075-1;Parent=rna-H0N29_00075;gbkey=rRNA;locus_tag=H0N29_00075;product=16S ribosomal RNA'
     #  The program has to terminate.
    
     #grep "ID=cds-" CP059040.gff3 | wc -l
     #grep "ID=exon-" CP059040.gff3 | wc -l
     #grep "ID=gene-" CP059040.gff3 | wc -l   #the same as H0N29_18980/5=3796
     grep "gbkey=" CP059040.gff3 | wc -l  7695
     grep "ID=id-" CP059040.gff3 | wc -l  5
     grep "locus_tag=" CP059040.gff3 | wc -l    7689
     #...
     cds   3701                             locus_tag=xxxx, no gene_biotype
     exon   96                              locus_tag=xxxx, no gene_biotype
     gene   3796                            locus_tag=xxxx, gene_biotype=xxxx,
     id  (riboswitch+direct_repeat,5)       both no --> ignoring them!!  # grep "ID=id-" CP059040.gff3
     rna    96                              locus_tag=xxxx, no gene_biotype
     ------------------
         7694
    
     cp CP059040.gff3_backup CP059040.gff3
     grep "^##" CP059040.gff3 > CP059040_gene.gff3
     grep "ID=gene" CP059040.gff3 >> CP059040_gene.gff3
     #!!!!VERY_IMPORTANT!!!!: change type '\tCDS\t' to '\texon\t'!
     sed -i -e "s/\tgene\t/\texon\t/g" CP059040_gene.gff3
  5. Preparing the directory trimmed

     mkdir trimmed trimmed_unpaired;
     for sample_id in AUM_r1 AUM_r2 AUM_r3 Urine_r1 Urine_r2 Urine_r3 MHB_r1 MHB_r2 MHB_r3; do \
     for sample_id in MHB_r1 MHB_r2 MHB_r3; do \
             java -jar /home/jhuang/Tools/Trimmomatic-0.36/trimmomatic-0.36.jar PE -threads 100 raw_data/${sample_id}_R1.fq.gz raw_data/${sample_id}_R2.fq.gz trimmed/${sample_id}_R1.fq.gz trimmed_unpaired/${sample_id}_R1.fq.gz trimmed/${sample_id}_R2.fq.gz trimmed_unpaired/${sample_id}_R2.fq.gz ILLUMINACLIP:/home/jhuang/Tools/Trimmomatic-0.36/adapters/TruSeq3-PE-2.fa:2:30:10:8:TRUE LEADING:3 TRAILING:3 SLIDINGWINDOW:4:15 MINLEN:36 AVGQUAL:20; done 2> trimmomatic_pe.log;
     done
  6. Preparing samplesheet.csv

     sample,fastq_1,fastq_2,strandedness
     AUM_r1,AUM_r1_R1.fq.gz,AUM_r1_R2.fq.gz,auto
     AUM_r2,AUM_r2_R1.fq.gz,AUM_r2_R2.fq.gz,auto
     AUM_r3,AUM_r3_R1.fq.gz,AUM_r3_R2.fq.gz,auto
     MHB_r1,MHB_r1_R1.fq.gz,MHB_r1_R2.fq.gz,auto
     MHB_r2,MHB_r2_R1.fq.gz,MHB_r2_R2.fq.gz,auto
     MHB_r3,MHB_r3_R1.fq.gz,MHB_r3_R2.fq.gz,auto
     Urine_r1,Urine_r1_R1.fq.gz,Urine_r1_R2.fq.gz,auto
     Urine_r2,Urine_r2_R1.fq.gz,Urine_r2_R2.fq.gz,auto
     Urine_r3,Urine_r3_R1.fq.gz,Urine_r3_R2.fq.gz,auto
  7. nextflow run

     #Example1: http://xgenes.com/article/article-content/157/prepare-virus-gtf-for-nextflow-run/
    
     docker pull nfcore/rnaseq
     ln -s /home/jhuang/Tools/nf-core-rnaseq-3.12.0/ rnaseq
    
     #Default: --gtf_group_features 'gene_id'  --gtf_extra_attributes 'gene_name' --featurecounts_group_type 'gene_biotype' --featurecounts_feature_type 'exon'
     #(host_env) !NOT_WORKING! jhuang@WS-2290C:~/DATA/Data_Tam_RNAseq_2024$ /usr/local/bin/nextflow run rnaseq/main.nf --input samplesheet.csv --outdir results    --fasta "/home/jhuang/DATA/Data_Tam_RNAseq_2024/CP059040.fasta" --gff "/home/jhuang/DATA/Data_Tam_RNAseq_2024/CP059040.gff"        -profile docker -resume  --max_cpus 55 --max_memory 512.GB --max_time 2400.h    --save_align_intermeds --save_unaligned --save_reference    --aligner 'star_salmon'    --gtf_group_features 'gene_id'  --gtf_extra_attributes 'gene_name' --featurecounts_group_type 'gene_biotype' --featurecounts_feature_type 'transcript'
    
     # -- DEBUG_1 (CDS --> exon in CP059040.gff) --
     #Checking the record (see below) in results/genome/CP059040.gtf
     #In ./results/genome/CP059040.gtf e.g. "CP059040.1      Genbank transcript      1       1398    .       +       .       transcript_id "gene-H0N29_00005"; gene_id "gene-H0N29_00005"; gene_name "dnaA"; Name "dnaA"; gbkey "Gene"; gene "dnaA"; gene_biotype "protein_coding"; locus_tag "H0N29_00005";"
     #--featurecounts_feature_type 'transcript' returns only the tRNA results
     #Since the tRNA records have "transcript and exon". In gene records, we have "transcript and CDS". replace the CDS with exon
    
     grep -P "\texon\t" CP059040.gff | sort | wc -l    #96
     grep -P "cmsearch\texon\t" CP059040.gff | wc -l    #=10  ignal recognition particle sRNA small typ, transfer-messenger RNA, 5S ribosomal RNA
     grep -P "Genbank\texon\t" CP059040.gff | wc -l    #=12  16S and 23S ribosomal RNA
     grep -P "tRNAscan-SE\texon\t" CP059040.gff | wc -l    #tRNA 74
     wc -l star_salmon/AUM_r3/quant.genes.sf  #--featurecounts_feature_type 'transcript' results in 96 records!
    
     grep -P "\tCDS\t" CP059040.gff | wc -l  #3701
     sed 's/\tCDS\t/\texon\t/g' CP059040.gff > CP059040_m.gff
     grep -P "\texon\t" CP059040_m.gff | sort | wc -l  #3797
    
     # -- DEBUG_2: combination of 'CP059040_m.gff' and 'exon' results in ERROR, using 'transcript' instead!
     --gff "/home/jhuang/DATA/Data_Tam_RNAseq_2024/CP059040_m.gff" --featurecounts_feature_type 'transcript'
    
     # ---- SUCCESSFUL with directly downloaded gff3 and fasta from NCBI using docker after replacing 'CDS' with 'exon' ----
     (host_env) /usr/local/bin/nextflow run rnaseq/main.nf --input samplesheet.csv --outdir results    --fasta "/home/jhuang/DATA/Data_Tam_RNAseq_2024_AUM_MHB_Urine/CP059040.fasta" --gff "/home/jhuang/DATA/Data_Tam_RNAseq_2024_AUM_MHB_Urine/CP059040_m.gff"        -profile docker -resume  --max_cpus 55 --max_memory 512.GB --max_time 2400.h    --save_align_intermeds --save_unaligned --save_reference    --aligner 'star_salmon'    --gtf_group_features 'gene_id'  --gtf_extra_attributes 'gene_name' --featurecounts_group_type 'gene_biotype' --featurecounts_feature_type 'transcript'
    
     # -- DEBUG_3: make sure the header of fasta is the same to the *_m.gff file
  8. Import data and pca-plot

     #mamba activate r_env
    
     #install.packages("ggfun")
     # Import the required libraries
     library("AnnotationDbi")
     library("clusterProfiler")
     library("ReactomePA")
     library(gplots)
     library(tximport)
     library(DESeq2)
     #library("org.Hs.eg.db")
     library(dplyr)
     library(tidyverse)
     #install.packages("devtools")
     #devtools::install_version("gtable", version = "0.3.0")
     library(gplots)
     library("RColorBrewer")
     #install.packages("ggrepel")
     library("ggrepel")
     # install.packages("openxlsx")
     library(openxlsx)
     library(EnhancedVolcano)
     library(DESeq2)
     library(edgeR)
    
     setwd("~/DATA/Data_Tam_RNAseq_2024_AUM_MHB_Urine_ATCC19606/results/star_salmon")
     # Define paths to your Salmon output quantification files
     files <- c("Urine_r1" = "./Urine_r1/quant.sf",
             "Urine_r2" = "./Urine_r2/quant.sf",
             "Urine_r3" = "./Urine_r3/quant.sf",
             "MHB_r1" = "./MHB_r1/quant.sf",
             "MHB_r2" = "./MHB_r2/quant.sf",
             "MHB_r3" = "./MHB_r3/quant.sf")
     # Import the transcript abundance data with tximport
     txi <- tximport(files, type = "salmon", txIn = TRUE, txOut = TRUE)
     # Define the replicates and condition of the samples
     replicate <- factor(c("r1", "r2", "r3", "r1", "r2", "r3"))
     condition <- factor(c("Urine","Urine","Urine", "MHB","MHB","MHB"))
     # Define the colData for DESeq2
     colData <- data.frame(condition=condition, replicate=replicate, row.names=names(files))
    
     # ------------------------
     # 1️⃣ Setup and input files
     # ------------------------
    
     # Read in transcript-to-gene mapping
     tx2gene <- read.table("salmon_tx2gene.tsv", header=FALSE, stringsAsFactors=FALSE)
     colnames(tx2gene) <- c("transcript_id", "gene_id", "gene_name")
    
     # Prepare tx2gene for gene-level summarization (remove gene_name if needed)
     tx2gene_geneonly <- tx2gene[, c("transcript_id", "gene_id")]
    
     # -------------------------------
     # 2️⃣ Transcript-level counts
     # -------------------------------
     # Create DESeqDataSet directly from tximport (transcript-level)
     dds_tx <- DESeqDataSetFromTximport(txi, colData=colData, design=~condition)
     write.csv(counts(dds_tx), file="transcript_counts.csv")
    
     # --------------------------------
     # 3️⃣ Gene-level summarization
     # --------------------------------
     # Re-import Salmon data summarized at gene level
     txi_gene <- tximport(files, type="salmon", tx2gene=tx2gene_geneonly, txOut=FALSE)
    
     # Create DESeqDataSet for gene-level counts
     dds <- DESeqDataSetFromTximport(txi_gene, colData=colData, design=~condition+replicate)
    
     # --------------------------------
     # 4️⃣ Raw counts table (with gene names)
     # --------------------------------
     # Extract raw gene-level counts
     counts_data <- as.data.frame(counts(dds, normalized=FALSE))
     counts_data$gene_id <- rownames(counts_data)
    
     # Add gene names
     tx2gene_unique <- unique(tx2gene[, c("gene_id", "gene_name")])
     counts_data <- merge(counts_data, tx2gene_unique, by="gene_id", all.x=TRUE)
    
     # Reorder columns: gene_id, gene_name, then counts
     count_cols <- setdiff(colnames(counts_data), c("gene_id", "gene_name"))
     counts_data <- counts_data[, c("gene_id", "gene_name", count_cols)]
    
     # --------------------------------
     # 5️⃣ Calculate CPM
     # --------------------------------
     library(edgeR)
     library(openxlsx)
    
     # Prepare count matrix for CPM calculation
     count_matrix <- as.matrix(counts_data[, !(colnames(counts_data) %in% c("gene_id", "gene_name"))])
    
     # Calculate CPM
     #cpm_matrix <- cpm(count_matrix, normalized.lib.sizes=FALSE)
     total_counts <- colSums(count_matrix)
     cpm_matrix <- t(t(count_matrix) / total_counts) * 1e6
     cpm_matrix <- as.data.frame(cpm_matrix)
    
     # Add gene_id and gene_name back to CPM table
     cpm_counts <- cbind(counts_data[, c("gene_id", "gene_name")], cpm_matrix)
    
     # --------------------------------
     # 6️⃣ Save outputs
     # --------------------------------
     write.csv(counts_data, "gene_raw_counts.csv", row.names=FALSE)
     write.xlsx(counts_data, "gene_raw_counts.xlsx", row.names=FALSE)
     write.xlsx(cpm_counts, "gene_cpm_counts.xlsx", row.names=FALSE)
  9. PCA dim(counts(dds)) head(counts(dds), 10) rld <- rlogTransformation(dds)

     # draw simple pca and heatmap
     #mat <- assay(rld)
     #mm <- model.matrix(~condition, colData(rld))
     #mat <- limma::removeBatchEffect(mat, batch=rld$batch, design=mm)
     #assay(rld) <- mat
     # -- pca --
     png("pca.png", 1200, 800)
     plotPCA(rld, intgroup=c("condition"))
     dev.off()
     # -- heatmap --
     png("heatmap.png", 1200, 800)
     distsRL <- dist(t(assay(rld)))
     mat <- as.matrix(distsRL)
     hc <- hclust(distsRL)
     hmcol <- colorRampPalette(brewer.pal(9,"GnBu"))(100)
     heatmap.2(mat, Rowv=as.dendrogram(hc),symm=TRUE, trace="none",col = rev(hmcol), margin=c(13, 13))
     dev.off()
  10. Select the differentially expressed genes

     #https://galaxyproject.eu/posts/2020/08/22/three-steps-to-galaxify-your-tool/
     #https://www.biostars.org/p/282295/
     #https://www.biostars.org/p/335751/
     #> dds$condition
     #[1] Urine Urine Urine MHB   MHB   MHB
     #Levels: MHB Urine
     #CONSOLE: mkdir star_salmon/degenes
    
     setwd("degenes")
     #---- relevel to control ----
     dds$condition <- relevel(dds$condition, "MHB")
     dds = DESeq(dds, betaPrior=FALSE)
     resultsNames(dds)
     clist <- c("Urine_vs_MHB")
    
     for (i in clist) {
       contrast = paste("condition", i, sep="_")
       res = results(dds, name=contrast)
       res <- res[!is.na(res$log2FoldChange),]
       res_df <- as.data.frame(res)
    
       write.csv(as.data.frame(res_df[order(res_df$pvalue),]), file = paste(i, "all.txt", sep="-"))
       up <- subset(res_df, padj<=0.05 & log2FoldChange>=1.35)
       down <- subset(res_df, padj<=0.05 & log2FoldChange<=-1.35)
       write.csv(as.data.frame(up[order(up$log2FoldChange,decreasing=TRUE),]), file = paste(i, "up.txt", sep="-"))
       write.csv(as.data.frame(down[order(abs(down$log2FoldChange),decreasing=TRUE),]), file = paste(i, "down.txt", sep="-"))
     }
    
     # -- Under host-env --
     grep -P "\tgene\t" CP059040.gff > CP059040_gene.gff
     python3 ~/Scripts/replace_gene_names.py /home/jhuang/DATA/Data_Tam_RNAseq_2024_AUM_MHB_Urine_ATCC19606/CP059040_gene.gff Urine_vs_MHB-all.txt Urine_vs_MHB-all.csv
     python3 ~/Scripts/replace_gene_names.py /home/jhuang/DATA/Data_Tam_RNAseq_2024_AUM_MHB_Urine_ATCC19606/CP059040_gene.gff Urine_vs_MHB-up.txt Urine_vs_MHB-up.csv
     python3 ~/Scripts/replace_gene_names.py /home/jhuang/DATA/Data_Tam_RNAseq_2024_AUM_MHB_Urine_ATCC19606/CP059040_gene.gff Urine_vs_MHB-down.txt Urine_vs_MHB-down.csv
    
     res <- read.csv("Urine_vs_MHB-all.csv")
     # Replace empty GeneName with modified GeneID
     res$GeneName <- ifelse(
       res$GeneName == "" | is.na(res$GeneName),
       gsub("gene-", "", res$GeneID),
       res$GeneName
     )
     duplicated_genes <- res[duplicated(res$GeneName), "GeneName"]
    
     res <- res %>%
       group_by(GeneName) %>%
       slice_min(padj, with_ties = FALSE) %>%
       ungroup()
     res <- as.data.frame(res)
     # Sort res first by padj (ascending) and then by log2FoldChange (descending)
     res <- res[order(res$padj, -res$log2FoldChange), ]
    
     # Assuming res is your dataframe and already processed
     # Filter up-regulated genes: log2FoldChange > 2 and padj < 1e-2
     up_regulated <- res[res$log2FoldChange > 2 & res$padj < 1e-2, ]
     # Filter down-regulated genes: log2FoldChange < -2 and padj < 1e-2
     down_regulated <- res[res$log2FoldChange < -2 & res$padj < 1e-2, ]
     # Create a new workbook
     wb <- createWorkbook()
     # Add the complete dataset as the first sheet
     addWorksheet(wb, "Complete_Data")
     writeData(wb, "Complete_Data", res)
     # Add the up-regulated genes as the second sheet
     addWorksheet(wb, "Up_Regulated")
     writeData(wb, "Up_Regulated", up_regulated)
     # Add the down-regulated genes as the third sheet
     addWorksheet(wb, "Down_Regulated")
     writeData(wb, "Down_Regulated", down_regulated)
     # Save the workbook to a file
     saveWorkbook(wb, "Gene_Expression_Urine_vs_MHB.xlsx", overwrite = TRUE)
    
     # Set the 'GeneName' column as row.names
     rownames(res) <- res$GeneName
     # Drop the 'GeneName' column since it's now the row names
     res$GeneName <- NULL
     head(res)
    
     ## Ensure the data frame matches the expected format
     ## For example, it should have columns: log2FoldChange, padj, etc.
     #res <- as.data.frame(res)
     ## Remove rows with NA in log2FoldChange (if needed)
     #res <- res[!is.na(res$log2FoldChange),]
    
     # Replace padj = 0 with a small value
     res$padj[res$padj == 0] <- 1e-305
    
     #library(EnhancedVolcano)
     # Assuming res is already sorted and processed
     png("Urine_vs_MHB.png", width=1200, height=2000)
     #max.overlaps = 10
     EnhancedVolcano(res,
                     lab = rownames(res),
                     x = 'log2FoldChange',
                     y = 'padj',
                     pCutoff = 1e-2,
                     FCcutoff = 2,
                     title = '',
                     subtitleLabSize = 18,
                     pointSize = 3.0,
                     labSize = 5.0,
                     colAlpha = 1,
                     legendIconSize = 4.0,
                     drawConnectors = TRUE,
                     widthConnectors = 0.5,
                     colConnectors = 'black',
                     subtitle = expression("Urine versus MHB"))
     dev.off()

KEGG and GO annotations in non-model organisms

https://www.biobam.com/functional-analysis/

  1. Assign KEGG and GO Terms (see diagram above)

    Since your organism is non-model, standard R databases (org.Hs.eg.db, etc.) won’t work. You’ll need to manually retrieve KEGG and GO annotations.

    • Preparing file 1 eggnog_out.emapper.annotations.txt for the R-code below: (KEGG Terms): EggNog based on orthology and phylogenies

      EggNOG-mapper assigns both KEGG Orthology (KO) IDs and GO terms.

      Install EggNOG-mapper:

        mamba create -n eggnog_env python=3.8 eggnog-mapper -c conda-forge -c bioconda  #eggnog-mapper_2.1.12
        mamba activate eggnog_env

      Run annotation:

        #diamond makedb --in eggnog6.prots.faa -d eggnog_proteins.dmnd
        mkdir /home/jhuang/mambaforge/envs/eggnog_env/lib/python3.8/site-packages/data/
        download_eggnog_data.py --dbname eggnog.db -y --data_dir /home/jhuang/mambaforge/envs/eggnog_env/lib/python3.8/site-packages/data/
        #NOT_WORKING: emapper.py -i CP059040_gene.fasta -o eggnog_dmnd_out --cpu 60 -m diamond[hmmer,mmseqs] --dmnd_db /home/jhuang/REFs/eggnog_data/data/eggnog_proteins.dmnd
        python ~/Scripts/update_fasta_header.py CP059040_protein_.fasta CP059040_protein.fasta
        emapper.py -i CP059040_protein.fasta -o eggnog_out --cpu 60 --resume
        #----> result annotations.tsv: Contains KEGG, GO, and other functional annotations.
        #---->  470.IX87_14445:
            * 470 likely refers to the organism or strain (e.g., Acinetobacter baumannii ATCC 19606 or another related strain).
            * IX87_14445 would refer to a specific gene or protein within that genome.

      Extract KEGG KO IDs from annotations.emapper.annotations.

    • Preparing file 2 blast2goannot.annot2 for the R-code below:

      • Basic (GO Terms from ‘Blast2GO 5 Basic’, saved in blast2go_annot.annot): Using Blast/Diamond + Blast2GO_GUI based on sequence alignment + GO mapping

        • ‘Load protein sequences’ (Tags: NONE, generated columns: Nr, SeqName) –>
        • Buttons ‘blast’ (Tags: BLASTED, generated columns: Description, Length, #Hits, e-Value, sim mean),
        • Button ‘mapping’ (Tags: MAPPED, generated columns: #GO, GO IDs, GO Names), “Mapping finished – Please proceed now to annotation.”
        • Button ‘annot’ (Tags: ANNOTATED, generated columns: Enzyme Codes, Enzyme Names), “Annotation finished.”
          • Used parameter ‘Annotation CutOff’: The Blast2GO Annotation Rule seeks to find the most specific GO annotations with a certain level of reliability. An annotation score is calculated for each candidate GO which is composed by the sequence similarity of the Blast Hit, the evidence code of the source GO and the position of the particular GO in the Gene Ontology hierarchy. This annotation score cutoff select the most specific GO term for a given GO branch which lies above this value.
          • Used parameter ‘GO Weight’ is a value which is added to Annotation Score of a more general/abstract Gene Ontology term for each of its more specific, original source GO terms. In this case, more general GO terms which summarise many original source terms (those ones directly associated to the Blast Hits) will have a higher Annotation Score.
      • Advanced (GO Terms from ‘Blast2GO 5 Basic’): Interpro based protein families / domains –> Button interpro

        • Button ‘interpro’ (Tags: INTERPRO, generated columns: InterPro IDs, InterPro GO IDs, InterPro GO Names) –> “InterProScan Finished – You can now merge the obtained GO Annotations.”
      • MERGE the results of InterPro GO IDs (advanced) to GO IDs (basic) and generate final GO IDs, saved in blast2go_annot.annot2

        • Button ‘interpro’/’Merge InterProScan GOs to Annotation’ –> “Merge (add and validate) all GO terms retrieved via InterProScan to the already existing GO annotation.” –> “Finished merging GO terms from InterPro with annotations. Maybe you want to run ANNEX (Annotation Augmentation).”
        • (NOT_USED) Button ‘annot’/’ANNEX’ –> “ANNEX finished. Maybe you want to do the next step: Enzyme Code Mapping.”
      • PREPARING go_terms and ecterms: annot* file:

        cut -f1-2 -d$’\t’ blast2go_annot.annot2 > blast2goannot.annot2

  2. Perform KEGG and GO Enrichment in R

         #BiocManager::install("GO.db")
         #BiocManager::install("AnnotationDbi")
    
         # Load required libraries
         library(openxlsx)  # For Excel file handling
         library(dplyr)     # For data manipulation
         library(tidyr)
         library(stringr)
         library(clusterProfiler)  # For KEGG and GO enrichment analysis
         #library(org.Hs.eg.db)  # Replace with appropriate organism database
         library(GO.db)
         library(AnnotationDbi)
    
         setwd("~/DATA/Data_Tam_RNAseq_2024_AUM_MHB_Urine_ATCC19606/results/star_salmon/degenes")
         # Step 1: Load the blast2go annotation file with a check for missing columns
         annot_df <- read.table("/home/jhuang/b2gWorkspace_Tam_RNAseq_2024/blast2go_annot.annot2_",
                             header = FALSE, sep = "\t", stringsAsFactors = FALSE, fill = TRUE)
    
         # If the structure is inconsistent, we can make sure there are exactly 3 columns:
         colnames(annot_df) <- c("GeneID", "Term")
         # Step 2: Filter and aggregate GO and EC terms as before
         go_terms <- annot_df %>%
         filter(grepl("^GO:", Term)) %>%
         group_by(GeneID) %>%
         summarize(GOs = paste(Term, collapse = ","), .groups = "drop")
         ec_terms <- annot_df %>%
         filter(grepl("^EC:", Term)) %>%
         group_by(GeneID) %>%
         summarize(EC = paste(Term, collapse = ","), .groups = "drop")
    
         # Load the results
         res <- read.csv("Urine_vs_MHB-all.csv")   #up259, down138
    
         # Replace empty GeneName with modified GeneID
         res$GeneName <- ifelse(
             res$GeneName == "" | is.na(res$GeneName),
             gsub("gene-", "", res$GeneID),
             res$GeneName
         )
    
         # Remove duplicated genes by selecting the gene with the smallest padj
         duplicated_genes <- res[duplicated(res$GeneName), "GeneName"]
    
         res <- res %>%
         group_by(GeneName) %>%
         slice_min(padj, with_ties = FALSE) %>%
         ungroup()
    
         res <- as.data.frame(res)
         # Sort res first by padj (ascending) and then by log2FoldChange (descending)
         res <- res[order(res$padj, -res$log2FoldChange), ]
         # Read eggnog annotations
         eggnog_data <- read.delim("~/DATA/Data_Tam_RNAseq_2024_AUM_MHB_Urine_ATCC19606/eggnog_out.emapper.annotations.txt", header = TRUE, sep = "\t")
         # Remove the "gene-" prefix from GeneID in res to match eggnog 'query' format
         res$GeneID <- gsub("gene-", "", res$GeneID)
         # Merge eggnog data with res based on GeneID
         res <- res %>%
         left_join(eggnog_data, by = c("GeneID" = "query"))
    
         # DEBUG: NOT_NECESSARY, since res has already GeneName
         ##Convert row names to a new column 'GeneName' in res
         #res_with_geneName <- res %>%
         #mutate(GeneName = rownames(res)) %>%
         #as.data.frame()  # Ensure that it's a regular data frame without row names
         ## View the result
         #head(res_with_geneName)
    
         # Merge with the res dataframe
         # Perform the left joins and rename columns
         res_updated <- res %>%
         left_join(go_terms, by = "GeneID") %>%
         left_join(ec_terms, by = "GeneID") %>% dplyr::select(-EC.x, -GOs.x) %>% dplyr::rename(EC = EC.y, GOs = GOs.y)
    
         # DEBUG: NOT_NECESSARY, since 'GeneName' is already the first column.
         ## Reorder columns to move 'GeneName' as the first column in res_updated
         #res_updated <- res_updated %>%
         #select(GeneName, everything())
    
         ## Count the number of rows in the KEGG_ko, GOs, EC columns that have non-missing values
         #num_non_missing_KEGG_ko <- sum(res_updated$KEGG_ko != "-" & !is.na(res_updated$KEGG_ko))
         #print(num_non_missing_KEGG_ko)
         ##[1] 2030
         #num_non_missing_GOs <- sum(res_updated$GOs != "-" & !is.na(res_updated$GOs))
         #print(num_non_missing_GOs)
         ##[1] 2865 --> 2875
         #num_non_missing_EC <- sum(res_updated$EC != "-" & !is.na(res_updated$EC))
         #print(num_non_missing_EC)
         ##[1] 1701
    
         # Filter up-regulated genes
         up_regulated <- res_updated[res_updated$log2FoldChange > 2 & res_updated$padj < 0.01, ]
         # Filter down-regulated genes
         down_regulated <- res_updated[res_updated$log2FoldChange < -2 & res_updated$padj < 0.01, ]
    
         # Create a new workbook
         wb <- createWorkbook()
         # Add the complete dataset as the first sheet (with annotations)
         addWorksheet(wb, "Complete_Data")
         writeData(wb, "Complete_Data", res_updated)
         # Add the up-regulated genes as the second sheet (with annotations)
         addWorksheet(wb, "Up_Regulated")
         writeData(wb, "Up_Regulated", up_regulated)
         # Add the down-regulated genes as the third sheet (with annotations)
         addWorksheet(wb, "Down_Regulated")
         writeData(wb, "Down_Regulated", down_regulated)
         # Save the workbook to a file
         saveWorkbook(wb, "Gene_Expression_with_Annotations_Urine_vs_MHB.xlsx", overwrite = TRUE)
    
         # Set GeneName as row names after the join
         rownames(res_updated) <- res_updated$GeneName
         res_updated <- res_updated %>% dplyr::select(-GeneName)
         ## Set the 'GeneName' column as row.names
         #rownames(res_updated) <- res_updated$GeneName
         ## Drop the 'GeneName' column since it's now the row names
         #res_updated$GeneName <- NULL
    
         # ---- Perform KEGG enrichment analysis (up_regulated) ----
         gene_list_kegg_up <- up_regulated$KEGG_ko
         gene_list_kegg_up <- gsub("ko:", "", gene_list_kegg_up)
         kegg_enrichment_up <- enrichKEGG(gene = gene_list_kegg_up, organism = 'ko')
         # -- convert the GeneID (Kxxxxxx) to the true GeneID --
         # Step 0: Create KEGG to GeneID mapping
         kegg_to_geneid_up <- up_regulated %>%
         dplyr::select(KEGG_ko, GeneID) %>%
         filter(!is.na(KEGG_ko)) %>%  # Remove missing KEGG KO entries
         mutate(KEGG_ko = str_remove(KEGG_ko, "ko:"))  # Remove 'ko:' prefix if present
         # Step 1: Clean KEGG_ko values (separate multiple KEGG IDs)
         kegg_to_geneid_clean <- kegg_to_geneid_up %>%
         mutate(KEGG_ko = str_remove_all(KEGG_ko, "ko:")) %>%  # Remove 'ko:' prefixes
         separate_rows(KEGG_ko, sep = ",") %>%  # Ensure each KEGG ID is on its own row
         filter(KEGG_ko != "-") %>%  # Remove invalid KEGG IDs ("-")
         distinct()  # Remove any duplicate mappings
         # Step 2.1: Expand geneID column in kegg_enrichment_up
         expanded_kegg <- kegg_enrichment_up %>%
         as.data.frame() %>%
         separate_rows(geneID, sep = "/") %>%  # Split multiple KEGG IDs (Kxxxxx)
         left_join(kegg_to_geneid_clean, by = c("geneID" = "KEGG_ko"), relationship = "many-to-many") %>%  # Explicitly handle many-to-many
         distinct() %>%  # Remove duplicate matches
         group_by(ID) %>%
         summarise(across(everything(), ~ paste(unique(na.omit(.)), collapse = "/")), .groups = "drop")  # Re-collapse results
         #dplyr::glimpse(expanded_kegg)
         # Step 3.1: Replace geneID column in the original dataframe
         kegg_enrichment_up_df <- as.data.frame(kegg_enrichment_up)
         # Remove old geneID column and merge new one
         kegg_enrichment_up_df <- kegg_enrichment_up_df %>%
         dplyr::select(-geneID) %>%  # Remove old geneID column
         left_join(expanded_kegg %>% dplyr::select(ID, GeneID), by = "ID") %>%  # Merge new GeneID column
         dplyr::rename(geneID = GeneID)  # Rename column back to geneID
    
         # ---- Perform KEGG enrichment analysis (down_regulated) ----
         # Step 1: Extract KEGG KO terms from down-regulated genes
         gene_list_kegg_down <- down_regulated$KEGG_ko
         gene_list_kegg_down <- gsub("ko:", "", gene_list_kegg_down)
         # Step 2: Perform KEGG enrichment analysis
         kegg_enrichment_down <- enrichKEGG(gene = gene_list_kegg_down, organism = 'ko')
         # --- Convert KEGG gene IDs (Kxxxxxx) to actual GeneIDs ---
         # Step 3: Create KEGG to GeneID mapping from down_regulated dataset
         kegg_to_geneid_down <- down_regulated %>%
         dplyr::select(KEGG_ko, GeneID) %>%
         filter(!is.na(KEGG_ko)) %>%  # Remove missing KEGG KO entries
         mutate(KEGG_ko = str_remove(KEGG_ko, "ko:"))  # Remove 'ko:' prefix if present
         # Step 4: Clean KEGG_ko values (handle multiple KEGG IDs)
         kegg_to_geneid_down_clean <- kegg_to_geneid_down %>%
         mutate(KEGG_ko = str_remove_all(KEGG_ko, "ko:")) %>%  # Remove 'ko:' prefixes
         separate_rows(KEGG_ko, sep = ",") %>%  # Ensure each KEGG ID is on its own row
         filter(KEGG_ko != "-") %>%  # Remove invalid KEGG IDs ("-")
         distinct()  # Remove duplicate mappings
         # Step 5: Expand geneID column in kegg_enrichment_down
         expanded_kegg_down <- kegg_enrichment_down %>%
         as.data.frame() %>%
         separate_rows(geneID, sep = "/") %>%  # Split multiple KEGG IDs (Kxxxxx)
         left_join(kegg_to_geneid_down_clean, by = c("geneID" = "KEGG_ko"), relationship = "many-to-many") %>%  # Handle many-to-many mappings
         distinct() %>%  # Remove duplicate matches
         group_by(ID) %>%
         summarise(across(everything(), ~ paste(unique(na.omit(.)), collapse = "/")), .groups = "drop")  # Re-collapse results
         # Step 6: Replace geneID column in the original kegg_enrichment_down dataframe
         kegg_enrichment_down_df <- as.data.frame(kegg_enrichment_down) %>%
         dplyr::select(-geneID) %>%  # Remove old geneID column
         left_join(expanded_kegg_down %>% dplyr::select(ID, GeneID), by = "ID") %>%  # Merge new GeneID column
         dplyr::rename(geneID = GeneID)  # Rename column back to geneID
         # View the updated dataframe
         head(kegg_enrichment_down_df)
    
         # Create a new workbook
         wb <- createWorkbook()
         # Save enrichment results to the workbook
         addWorksheet(wb, "KEGG_Enrichment_Up")
         writeData(wb, "KEGG_Enrichment_Up", as.data.frame(kegg_enrichment_up_df))
         # Save enrichment results to the workbook
         addWorksheet(wb, "KEGG_Enrichment_Down")
         writeData(wb, "KEGG_Enrichment_Down", as.data.frame(kegg_enrichment_down_df))
         saveWorkbook(wb, "KEGG_Enrichment.xlsx", overwrite = TRUE)
    
         # ---- Perform GO enrichment analysis (TODO: extract the merged GO IDs from 'Blast2GO 5 Basic' and adapt the code below!)----
    
         # Define gene list (up-regulated genes)
         gene_list_go_up <- up_regulated$GeneID  # Extract the 149 up-regulated genes
         gene_list_go_down <- down_regulated$GeneID  # Extract the 65 down-regulated genes
    
         # Define background gene set (all genes in res)
         background_genes <- res_updated$GeneID  # Extract the 3646 background genes
    
         # Prepare GO annotation data from res
         go_annotation <- res_updated[, c("GOs","GeneID")]  # Extract relevant columns
         go_annotation <- go_annotation %>%
         tidyr::separate_rows(GOs, sep = ",")  # Split multiple GO terms into separate rows
    
         # Perform GO enrichment analysis, where pAdjustMethod is one of "holm", "hochberg", "hommel", "bonferroni", "BH", "BY", "fdr", "none"
         go_enrichment_up <- enricher(
             gene = gene_list_go_up,                # Up-regulated genes
             TERM2GENE = go_annotation,       # Custom GO annotation
             pvalueCutoff = 1.0,             # Significance threshold
             pAdjustMethod = "BH",
             universe = background_genes      # Define the background gene set
         )
         go_enrichment_up <- as.data.frame(go_enrichment_up)
    
         go_enrichment_down <- enricher(
             gene = gene_list_go_down,                # Up-regulated genes
             TERM2GENE = go_annotation,       # Custom GO annotation
             pvalueCutoff = 1.0,             # Significance threshold
             pAdjustMethod = "BH",
             universe = background_genes      # Define the background gene set
         )
         go_enrichment_down <- as.data.frame(go_enrichment_down)
    
         ## Remove the 'p.adjust' column since no adjusted methods have been applied!
         #go_enrichment_up <- go_enrichment_up[, !names(go_enrichment_up) %in% "p.adjust"]
         # Update the Description column with the term descriptions
         go_enrichment_up$Description <- sapply(go_enrichment_up$ID, function(go_id) {
         # Using select to get the term description
         term <- tryCatch({
             AnnotationDbi::select(GO.db, keys = go_id, columns = "TERM", keytype = "GOID")
         }, error = function(e) {
             message(paste("Error for GO term:", go_id))  # Print which GO ID caused the error
             return(data.frame(TERM = NA))  # In case of error, return NA
         })
    
         if (nrow(term) > 0) {
             return(term$TERM)
         } else {
             return(NA)  # If no description found, return NA
         }
         })
         ## Print the updated data frame
         #print(go_enrichment_up)
    
         ## Remove the 'p.adjust' column since no adjusted methods have been applied!
         #go_enrichment_down <- go_enrichment_down[, !names(go_enrichment_down) %in% "p.adjust"]
         # Update the Description column with the term descriptions
         go_enrichment_down$Description <- sapply(go_enrichment_down$ID, function(go_id) {
         # Using select to get the term description
         term <- tryCatch({
             AnnotationDbi::select(GO.db, keys = go_id, columns = "TERM", keytype = "GOID")
         }, error = function(e) {
             message(paste("Error for GO term:", go_id))  # Print which GO ID caused the error
             return(data.frame(TERM = NA))  # In case of error, return NA
         })
    
         if (nrow(term) > 0) {
             return(term$TERM)
         } else {
             return(NA)  # If no description found, return NA
         }
         })
    
         addWorksheet(wb, "GO_Enrichment_Up")
         writeData(wb, "GO_Enrichment_Up", as.data.frame(go_enrichment_up))
    
         addWorksheet(wb, "GO_Enrichment_Down")
         writeData(wb, "GO_Enrichment_Down", as.data.frame(go_enrichment_down))
    
         # Save the workbook with enrichment results
         saveWorkbook(wb, "KEGG_and_GO_Enrichments_Urine_vs_MHB.xlsx", overwrite = TRUE)
    
         #Error for GO term: GO:0006807: replace GO:0006807  obsolete nitrogen compound metabolic process
         #TODO: marked the color as yellow if the p.adjusted <= 0.05 in GO_enrichment!
  3. Finalizing the KEGG and GO Enrichment table

         1. NOTE: geneIDs in KEGG_Enrichment have been already translated from ko to geneID in H0N29_*-format;
         2. NEED_MANUAL_DELETION: p.adjust values have been calculated, we have to filter all records in GO_Enrichment-results by |p.adjust|<=0.05.

Processing RNAseq_2025_WT_vs_ΔIJ_on_ATCC19606

  1. Vorgabe

     #perform PCA analysis, Venn diagram analysis, as well as KEGG and GO annotations. We would also appreciate it if you could include CPM calculations for this dataset (gene_cpm_counts.xlsx). For comparative analysis, we are particularly interested in identifying DEGs between WT and ΔIJ across the different treatments and time points.
  2. Preparing raw data

     mkdir raw_data; cd raw_data
     ln -s ../RSMR00204/X101SC25062155-Z01/X101SC25062155-Z01-J001/01.RawData/WT-17-1/WT-17-1_1.fq.gz WT-17-r1_R1.fq.gz
     ln -s ../RSMR00204/X101SC25062155-Z01/X101SC25062155-Z01-J001/01.RawData/WT-17-1/WT-17-1_2.fq.gz WT-17-r1_R2.fq.gz
     ln -s ../RSMR00204/X101SC25062155-Z01/X101SC25062155-Z01-J001/01.RawData/WT-17-2/WT-17-2_1.fq.gz WT-17-r2_R1.fq.gz
     ln -s ../RSMR00204/X101SC25062155-Z01/X101SC25062155-Z01-J001/01.RawData/WT-17-2/WT-17-2_2.fq.gz WT-17-r2_R2.fq.gz
     ln -s ../RSMR00204/X101SC25062155-Z01/X101SC25062155-Z01-J001/01.RawData/WT-17-3/WT-17-3_1.fq.gz WT-17-r3_R1.fq.gz
     ln -s ../RSMR00204/X101SC25062155-Z01/X101SC25062155-Z01-J001/01.RawData/WT-17-3/WT-17-3_2.fq.gz WT-17-r3_R2.fq.gz
     ln -s ../RSMR00204/X101SC25062155-Z01/X101SC25062155-Z01-J001/01.RawData/WT-24-1/WT-24-1_1.fq.gz WT-24-r1_R1.fq.gz
     ln -s ../RSMR00204/X101SC25062155-Z01/X101SC25062155-Z01-J001/01.RawData/WT-24-1/WT-24-1_2.fq.gz WT-24-r1_R2.fq.gz
     ln -s ../RSMR00204/X101SC25062155-Z01/X101SC25062155-Z01-J001/01.RawData/WT-24-2/WT-24-2_1.fq.gz WT-24-r2_R1.fq.gz
     ln -s ../RSMR00204/X101SC25062155-Z01/X101SC25062155-Z01-J001/01.RawData/WT-24-2/WT-24-2_2.fq.gz WT-24-r2_R2.fq.gz
     ln -s ../RSMR00204/X101SC25062155-Z01/X101SC25062155-Z01-J001/01.RawData/WT-24-3/WT-24-3_1.fq.gz WT-24-r3_R1.fq.gz
     ln -s ../RSMR00204/X101SC25062155-Z01/X101SC25062155-Z01-J001/01.RawData/WT-24-3/WT-24-3_2.fq.gz WT-24-r3_R2.fq.gz
     ln -s ../RSMR00204/X101SC25062155-Z01/X101SC25062155-Z01-J001/01.RawData/ΔIJ-17-1/ΔIJ-17-1_1.fq.gz deltaIJ-17-r1_R1.fq.gz
     ln -s ../RSMR00204/X101SC25062155-Z01/X101SC25062155-Z01-J001/01.RawData/ΔIJ-17-1/ΔIJ-17-1_2.fq.gz deltaIJ-17-r1_R2.fq.gz
     ln -s ../RSMR00204/X101SC25062155-Z01/X101SC25062155-Z01-J001/01.RawData/ΔIJ-17-2/ΔIJ-17-2_1.fq.gz deltaIJ-17-r2_R1.fq.gz
     ln -s ../RSMR00204/X101SC25062155-Z01/X101SC25062155-Z01-J001/01.RawData/ΔIJ-17-2/ΔIJ-17-2_2.fq.gz deltaIJ-17-r2_R2.fq.gz
     ln -s ../RSMR00204/X101SC25062155-Z01/X101SC25062155-Z01-J001/01.RawData/ΔIJ-17-3/ΔIJ-17-3_1.fq.gz deltaIJ-17-r3_R1.fq.gz
     ln -s ../RSMR00204/X101SC25062155-Z01/X101SC25062155-Z01-J001/01.RawData/ΔIJ-17-3/ΔIJ-17-3_2.fq.gz deltaIJ-17-r3_R2.fq.gz
     ln -s ../RSMR00204/X101SC25062155-Z01/X101SC25062155-Z01-J001/01.RawData/ΔIJ-24-1/ΔIJ-24-1_1.fq.gz deltaIJ-24-r1_R1.fq.gz
     ln -s ../RSMR00204/X101SC25062155-Z01/X101SC25062155-Z01-J001/01.RawData/ΔIJ-24-1/ΔIJ-24-1_2.fq.gz deltaIJ-24-r1_R2.fq.gz
     ln -s ../RSMR00204/X101SC25062155-Z01/X101SC25062155-Z01-J001/01.RawData/ΔIJ-24-2/ΔIJ-24-2_1.fq.gz deltaIJ-24-r2_R1.fq.gz
     ln -s ../RSMR00204/X101SC25062155-Z01/X101SC25062155-Z01-J001/01.RawData/ΔIJ-24-2/ΔIJ-24-2_2.fq.gz deltaIJ-24-r2_R2.fq.gz
     ln -s ../RSMR00204/X101SC25062155-Z01/X101SC25062155-Z01-J001/01.RawData/ΔIJ-24-3/ΔIJ-24-3_1.fq.gz deltaIJ-24-r3_R1.fq.gz
     ln -s ../RSMR00204/X101SC25062155-Z01/X101SC25062155-Z01-J001/01.RawData/ΔIJ-24-3/ΔIJ-24-3_2.fq.gz deltaIJ-24-r3_R2.fq.gz
    
     ln -s ../RSMR00204/X101SC25062155-Z01/X101SC25062155-Z01-J001/01.RawData/preWT-17-1/preWT-17-1_1.fq.gz pre_WT-17-r1_R1.fq.gz
     ln -s ../RSMR00204/X101SC25062155-Z01/X101SC25062155-Z01-J001/01.RawData/preWT-17-1/preWT-17-1_2.fq.gz pre_WT-17-r1_R2.fq.gz
     ln -s ../RSMR00204/X101SC25062155-Z01/X101SC25062155-Z01-J001/01.RawData/preWT-17-2/preWT-17-2_1.fq.gz pre_WT-17-r2_R1.fq.gz
     ln -s ../RSMR00204/X101SC25062155-Z01/X101SC25062155-Z01-J001/01.RawData/preWT-17-2/preWT-17-2_2.fq.gz pre_WT-17-r2_R2.fq.gz
     ln -s ../RSMR00204/X101SC25062155-Z01/X101SC25062155-Z01-J001/01.RawData/preWT-17-3/preWT-17-3_1.fq.gz pre_WT-17-r3_R1.fq.gz
     ln -s ../RSMR00204/X101SC25062155-Z01/X101SC25062155-Z01-J001/01.RawData/preWT-17-3/preWT-17-3_2.fq.gz pre_WT-17-r3_R2.fq.gz
     ln -s ../RSMR00204/X101SC25062155-Z01/X101SC25062155-Z01-J001/01.RawData/preWT-24-1/preWT-24-1_1.fq.gz pre_WT-24-r1_R1.fq.gz
     ln -s ../RSMR00204/X101SC25062155-Z01/X101SC25062155-Z01-J001/01.RawData/preWT-24-1/preWT-24-1_2.fq.gz pre_WT-24-r1_R2.fq.gz
     ln -s ../RSMR00204/X101SC25062155-Z01/X101SC25062155-Z01-J001/01.RawData/preWT-24-2/preWT-24-2_1.fq.gz pre_WT-24-r2_R1.fq.gz
     ln -s ../RSMR00204/X101SC25062155-Z01/X101SC25062155-Z01-J001/01.RawData/preWT-24-2/preWT-24-2_2.fq.gz pre_WT-24-r2_R2.fq.gz
     ln -s ../RSMR00204/X101SC25062155-Z01/X101SC25062155-Z01-J001/01.RawData/preWT-24-3/preWT-24-3_1.fq.gz pre_WT-24-r3_R1.fq.gz
     ln -s ../RSMR00204/X101SC25062155-Z01/X101SC25062155-Z01-J001/01.RawData/preWT-24-3/preWT-24-3_2.fq.gz pre_WT-24-r3_R2.fq.gz
     ln -s ../RSMR00204/X101SC25062155-Z01/X101SC25062155-Z01-J001/01.RawData/preΔIJ-17-1/preΔIJ-17-1_1.fq.gz pre_deltaIJ-17-r1_R1.fq.gz
     ln -s ../RSMR00204/X101SC25062155-Z01/X101SC25062155-Z01-J001/01.RawData/preΔIJ-17-1/preΔIJ-17-1_2.fq.gz pre_deltaIJ-17-r1_R2.fq.gz
     ln -s ../RSMR00204/X101SC25062155-Z01/X101SC25062155-Z01-J001/01.RawData/preΔIJ-17-2/preΔIJ-17-2_1.fq.gz pre_deltaIJ-17-r2_R1.fq.gz
     ln -s ../RSMR00204/X101SC25062155-Z01/X101SC25062155-Z01-J001/01.RawData/preΔIJ-17-2/preΔIJ-17-2_2.fq.gz pre_deltaIJ-17-r2_R2.fq.gz
     ln -s ../RSMR00204/X101SC25062155-Z01/X101SC25062155-Z01-J001/01.RawData/preΔIJ-17-3/preΔIJ-17-3_1.fq.gz pre_deltaIJ-17-r3_R1.fq.gz
     ln -s ../RSMR00204/X101SC25062155-Z01/X101SC25062155-Z01-J001/01.RawData/preΔIJ-17-3/preΔIJ-17-3_2.fq.gz pre_deltaIJ-17-r3_R2.fq.gz
     ln -s ../RSMR00204/X101SC25062155-Z01/X101SC25062155-Z01-J001/01.RawData/preΔIJ-24-1/preΔIJ-24-1_1.fq.gz pre_deltaIJ-24-r1_R1.fq.gz
     ln -s ../RSMR00204/X101SC25062155-Z01/X101SC25062155-Z01-J001/01.RawData/preΔIJ-24-1/preΔIJ-24-1_2.fq.gz pre_deltaIJ-24-r1_R2.fq.gz
     ln -s ../RSMR00204/X101SC25062155-Z01/X101SC25062155-Z01-J001/01.RawData/preΔIJ-24-2/preΔIJ-24-2_1.fq.gz pre_deltaIJ-24-r2_R1.fq.gz
     ln -s ../RSMR00204/X101SC25062155-Z01/X101SC25062155-Z01-J001/01.RawData/preΔIJ-24-2/preΔIJ-24-2_2.fq.gz pre_deltaIJ-24-r2_R2.fq.gz
     ln -s ../RSMR00204/X101SC25062155-Z01/X101SC25062155-Z01-J001/01.RawData/preΔIJ-24-3/preΔIJ-24-3_1.fq.gz pre_deltaIJ-24-r3_R1.fq.gz
     ln -s ../RSMR00204/X101SC25062155-Z01/X101SC25062155-Z01-J001/01.RawData/preΔIJ-24-3/preΔIJ-24-3_2.fq.gz pre_deltaIJ-24-r3_R2.fq.gz
    
     ln -s ../RSMR00204/X101SC25062155-Z01/X101SC25062155-Z01-J001/01.RawData/WT0_5-17-1/WT0_5-17-1_1.fq.gz 0_5_WT-17-r1_R1.fq.gz
     ln -s ../RSMR00204/X101SC25062155-Z01/X101SC25062155-Z01-J001/01.RawData/WT0_5-17-1/WT0_5-17-1_2.fq.gz 0_5_WT-17-r1_R2.fq.gz
     ln -s ../RSMR00204/X101SC25062155-Z01/X101SC25062155-Z01-J001/01.RawData/WT0_5-17-2/WT0_5-17-2_1.fq.gz 0_5_WT-17-r2_R1.fq.gz
     ln -s ../RSMR00204/X101SC25062155-Z01/X101SC25062155-Z01-J001/01.RawData/WT0_5-17-2/WT0_5-17-2_2.fq.gz 0_5_WT-17-r2_R2.fq.gz
     ln -s ../RSMR00204/X101SC25062155-Z01/X101SC25062155-Z01-J001/01.RawData/WT0_5-17-3/WT0_5-17-3_1.fq.gz 0_5_WT-17-r3_R1.fq.gz
     ln -s ../RSMR00204/X101SC25062155-Z01/X101SC25062155-Z01-J001/01.RawData/WT0_5-17-3/WT0_5-17-3_2.fq.gz 0_5_WT-17-r3_R2.fq.gz
     ln -s ../RSMR00204/X101SC25062155-Z01/X101SC25062155-Z01-J001/01.RawData/WT0_5-24-1/WT0_5-24-1_1.fq.gz 0_5_WT-24-r1_R1.fq.gz
     ln -s ../RSMR00204/X101SC25062155-Z01/X101SC25062155-Z01-J001/01.RawData/WT0_5-24-1/WT0_5-24-1_2.fq.gz 0_5_WT-24-r1_R2.fq.gz
     ln -s ../RSMR00204/X101SC25062155-Z01/X101SC25062155-Z01-J001/01.RawData/WT0_5-24-2/WT0_5-24-2_1.fq.gz 0_5_WT-24-r2_R1.fq.gz
     ln -s ../RSMR00204/X101SC25062155-Z01/X101SC25062155-Z01-J001/01.RawData/WT0_5-24-2/WT0_5-24-2_2.fq.gz 0_5_WT-24-r2_R2.fq.gz
     ln -s ../RSMR00204/X101SC25062155-Z01/X101SC25062155-Z01-J001/01.RawData/WT0_5-24-3/WT0_5-24-3_1.fq.gz 0_5_WT-24-r3_R1.fq.gz
     ln -s ../RSMR00204/X101SC25062155-Z01/X101SC25062155-Z01-J001/01.RawData/WT0_5-24-3/WT0_5-24-3_2.fq.gz 0_5_WT-24-r3_R2.fq.gz
     ln -s ../RSMR00204/X101SC25062155-Z01/X101SC25062155-Z01-J001/01.RawData/0_5ΔIJ-17-1/0_5ΔIJ-17-1_1.fq.gz 0_5_deltaIJ-17-r1_R1.fq.gz
     ln -s ../RSMR00204/X101SC25062155-Z01/X101SC25062155-Z01-J001/01.RawData/0_5ΔIJ-17-1/0_5ΔIJ-17-1_2.fq.gz 0_5_deltaIJ-17-r1_R2.fq.gz
     ln -s ../RSMR00204/X101SC25062155-Z01/X101SC25062155-Z01-J001/01.RawData/0_5ΔIJ-17-2/0_5ΔIJ-17-2_1.fq.gz 0_5_deltaIJ-17-r2_R1.fq.gz
     ln -s ../RSMR00204/X101SC25062155-Z01/X101SC25062155-Z01-J001/01.RawData/0_5ΔIJ-17-2/0_5ΔIJ-17-2_2.fq.gz 0_5_deltaIJ-17-r2_R2.fq.gz
     ln -s ../RSMR00204/X101SC25062155-Z01/X101SC25062155-Z01-J001/01.RawData/0_5ΔIJ-17-3/0_5ΔIJ-17-3_1.fq.gz 0_5_deltaIJ-17-r3_R1.fq.gz
     ln -s ../RSMR00204/X101SC25062155-Z01/X101SC25062155-Z01-J001/01.RawData/0_5ΔIJ-17-3/0_5ΔIJ-17-3_2.fq.gz 0_5_deltaIJ-17-r3_R2.fq.gz
     ln -s ../RSMR00204/X101SC25062155-Z01/X101SC25062155-Z01-J001/01.RawData/0_5ΔIJ-24-1/0_5ΔIJ-24-1_1.fq.gz 0_5_deltaIJ-24-r1_R1.fq.gz
     ln -s ../RSMR00204/X101SC25062155-Z01/X101SC25062155-Z01-J001/01.RawData/0_5ΔIJ-24-1/0_5ΔIJ-24-1_2.fq.gz 0_5_deltaIJ-24-r1_R2.fq.gz
     ln -s ../RSMR00204/X101SC25062155-Z01/X101SC25062155-Z01-J001/01.RawData/0_5ΔIJ-24-2/0_5ΔIJ-24-2_1.fq.gz 0_5_deltaIJ-24-r2_R1.fq.gz
     ln -s ../RSMR00204/X101SC25062155-Z01/X101SC25062155-Z01-J001/01.RawData/0_5ΔIJ-24-2/0_5ΔIJ-24-2_2.fq.gz 0_5_deltaIJ-24-r2_R2.fq.gz
     ln -s ../RSMR00204/X101SC25062155-Z01/X101SC25062155-Z01-J001/01.RawData/0_5ΔIJ-24-3/0_5ΔIJ-24-3_1.fq.gz 0_5_deltaIJ-24-r3_R1.fq.gz
     ln -s ../RSMR00204/X101SC25062155-Z01/X101SC25062155-Z01-J001/01.RawData/0_5ΔIJ-24-3/0_5ΔIJ-24-3_2.fq.gz 0_5_deltaIJ-24-r3_R2.fq.gz
  3. (Done) Downloading CP059040.fasta and CP059040.gff from GenBank

  4. Preparing the directory trimmed

     mkdir trimmed trimmed_unpaired;
     for sample_id in WT-17-r1 WT-17-r2 WT-17-r3 WT-24-r1 WT-24-r2 WT-24-r3 deltaIJ-17-r1 deltaIJ-17-r2 deltaIJ-17-r3 deltaIJ-24-r1 deltaIJ-24-r2 deltaIJ-24-r3  pre_WT-17-r1 pre_WT-17-r2 pre_WT-17-r3 pre_WT-24-r1 pre_WT-24-r2 pre_WT-24-r3 pre_deltaIJ-17-r1 pre_deltaIJ-17-r2 pre_deltaIJ-17-r3 pre_deltaIJ-24-r1 pre_deltaIJ-24-r2 pre_deltaIJ-24-r3  0_5_WT-17-r1 0_5_WT-17-r2 0_5_WT-17-r3 0_5_WT-24-r1 0_5_WT-24-r2 0_5_WT-24-r3 0_5_deltaIJ-17-r1 0_5_deltaIJ-17-r2 0_5_deltaIJ-17-r3 0_5_deltaIJ-24-r1 0_5_deltaIJ-24-r2 0_5_deltaIJ-24-r3; do \
             java -jar /home/jhuang/Tools/Trimmomatic-0.36/trimmomatic-0.36.jar PE -threads 100 raw_data/${sample_id}_R1.fq.gz raw_data/${sample_id}_R2.fq.gz trimmed/${sample_id}_R1.fq.gz trimmed_unpaired/${sample_id}_R1.fq.gz trimmed/${sample_id}_R2.fq.gz trimmed_unpaired/${sample_id}_R2.fq.gz ILLUMINACLIP:/home/jhuang/Tools/Trimmomatic-0.36/adapters/TruSeq3-PE-2.fa:2:30:10:8:TRUE LEADING:3 TRAILING:3 SLIDINGWINDOW:4:15 MINLEN:36 AVGQUAL:20; done 2> trimmomatic_pe.log;
     done
  5. Preparing samplesheet.csv

     sample,fastq_1,fastq_2,strandedness
     WT_17_r1,WT-17-r1_R1.fq.gz,WT-17-r1_R2.fq.gz,auto
     WT_17_r2,WT-17-r2_R1.fq.gz,WT-17-r2_R2.fq.gz,auto
     WT_17_r3,WT-17-r3_R1.fq.gz,WT-17-r3_R2.fq.gz,auto
     WT_24_r1,WT-24-r1_R1.fq.gz,WT-24-r1_R2.fq.gz,auto
     WT_24_r2,WT-24-r2_R1.fq.gz,WT-24-r2_R2.fq.gz,auto
     WT_24_r3,WT-24-r3_R1.fq.gz,WT-24-r3_R2.fq.gz,auto
     deltaIJ_17_r1,deltaIJ-17-r1_R1.fq.gz,deltaIJ-17-r1_R2.fq.gz,auto
     deltaIJ_17_r2,deltaIJ-17-r2_R1.fq.gz,deltaIJ-17-r2_R2.fq.gz,auto
     deltaIJ_17_r3,deltaIJ-17-r3_R1.fq.gz,deltaIJ-17-r3_R2.fq.gz,auto
     deltaIJ_24_r1,deltaIJ-24-r1_R1.fq.gz,deltaIJ-24-r1_R2.fq.gz,auto
     deltaIJ_24_r2,deltaIJ-24-r2_R1.fq.gz,deltaIJ-24-r2_R2.fq.gz,auto
     deltaIJ_24_r3,deltaIJ-24-r3_R1.fq.gz,deltaIJ-24-r3_R2.fq.gz,auto
     pre_WT_17_r1,pre_WT-17-r1_R1.fq.gz,pre_WT-17-r1_R2.fq.gz,auto
     pre_WT_17_r2,pre_WT-17-r2_R1.fq.gz,pre_WT-17-r2_R2.fq.gz,auto
     pre_WT_17_r3,pre_WT-17-r3_R1.fq.gz,pre_WT-17-r3_R2.fq.gz,auto
     pre_WT_24_r1,pre_WT-24-r1_R1.fq.gz,pre_WT-24-r1_R2.fq.gz,auto
     pre_WT_24_r2,pre_WT-24-r2_R1.fq.gz,pre_WT-24-r2_R2.fq.gz,auto
     pre_WT_24_r3,pre_WT-24-r3_R1.fq.gz,pre_WT-24-r3_R2.fq.gz,auto
     pre_deltaIJ_17_r1,pre_deltaIJ-17-r1_R1.fq.gz,pre_deltaIJ-17-r1_R2.fq.gz,auto
     pre_deltaIJ_17_r2,pre_deltaIJ-17-r2_R1.fq.gz,pre_deltaIJ-17-r2_R2.fq.gz,auto
     pre_deltaIJ_17_r3,pre_deltaIJ-17-r3_R1.fq.gz,pre_deltaIJ-17-r3_R2.fq.gz,auto
     pre_deltaIJ_24_r1,pre_deltaIJ-24-r1_R1.fq.gz,pre_deltaIJ-24-r1_R2.fq.gz,auto
     pre_deltaIJ_24_r2,pre_deltaIJ-24-r2_R1.fq.gz,pre_deltaIJ-24-r2_R2.fq.gz,auto
     pre_deltaIJ_24_r3,pre_deltaIJ-24-r3_R1.fq.gz,pre_deltaIJ-24-r3_R2.fq.gz,auto
     0_5_WT_17_r1,0_5_WT-17-r1_R1.fq.gz,0_5_WT-17-r1_R2.fq.gz,auto
     0_5_WT_17_r2,0_5_WT-17-r2_R1.fq.gz,0_5_WT-17-r2_R2.fq.gz,auto
     0_5_WT_17_r3,0_5_WT-17-r3_R1.fq.gz,0_5_WT-17-r3_R2.fq.gz,auto
     0_5_WT_24_r1,0_5_WT-24-r1_R1.fq.gz,0_5_WT-24-r1_R2.fq.gz,auto
     0_5_WT_24_r2,0_5_WT-24-r2_R1.fq.gz,0_5_WT-24-r2_R2.fq.gz,auto
     0_5_WT_24_r3,0_5_WT-24-r3_R1.fq.gz,0_5_WT-24-r3_R2.fq.gz,auto
     0_5_deltaIJ_17_r1,0_5_deltaIJ-17-r1_R1.fq.gz,0_5_deltaIJ-17-r1_R2.fq.gz,auto
     0_5_deltaIJ_17_r2,0_5_deltaIJ-17-r2_R1.fq.gz,0_5_deltaIJ-17-r2_R2.fq.gz,auto
     0_5_deltaIJ_17_r3,0_5_deltaIJ-17-r3_R1.fq.gz,0_5_deltaIJ-17-r3_R2.fq.gz,auto
     0_5_deltaIJ_24_r1,0_5_deltaIJ-24-r1_R1.fq.gz,0_5_deltaIJ-24-r1_R2.fq.gz,auto
     0_5_deltaIJ_24_r2,0_5_deltaIJ-24-r2_R1.fq.gz,0_5_deltaIJ-24-r2_R2.fq.gz,auto
     0_5_deltaIJ_24_r3,0_5_deltaIJ-24-r3_R1.fq.gz,0_5_deltaIJ-24-r3_R2.fq.gz,auto
  6. nextflow run

     #Example1: http://xgenes.com/article/article-content/157/prepare-virus-gtf-for-nextflow-run/
    
     docker pull nfcore/rnaseq
     ln -s /home/jhuang/Tools/nf-core-rnaseq-3.12.0/ rnaseq
    
     #Default: --gtf_group_features 'gene_id'  --gtf_extra_attributes 'gene_name' --featurecounts_group_type 'gene_biotype' --featurecounts_feature_type 'exon'
     #(host_env) !NOT_WORKING! jhuang@WS-2290C:~/DATA/Data_Tam_RNAseq_2024$ /usr/local/bin/nextflow run rnaseq/main.nf --input samplesheet.csv --outdir results    --fasta "/home/jhuang/DATA/Data_Tam_RNAseq_2024/CP059040.fasta" --gff "/home/jhuang/DATA/Data_Tam_RNAseq_2024/CP059040.gff"        -profile docker -resume  --max_cpus 55 --max_memory 512.GB --max_time 2400.h    --save_align_intermeds --save_unaligned --save_reference    --aligner 'star_salmon'    --gtf_group_features 'gene_id'  --gtf_extra_attributes 'gene_name' --featurecounts_group_type 'gene_biotype' --featurecounts_feature_type 'transcript'
    
     # -- DEBUG_1 (CDS --> exon in CP059040.gff) --
     #Checking the record (see below) in results/genome/CP059040.gtf
     #In ./results/genome/CP059040.gtf e.g. "CP059040.1      Genbank transcript      1       1398    .       +       .       transcript_id "gene-H0N29_00005"; gene_id "gene-H0N29_00005"; gene_name "dnaA"; Name "dnaA"; gbkey "Gene"; gene "dnaA"; gene_biotype "protein_coding"; locus_tag "H0N29_00005";"
     #--featurecounts_feature_type 'transcript' returns only the tRNA results
     #Since the tRNA records have "transcript and exon". In gene records, we have "transcript and CDS". replace the CDS with exon
    
     grep -P "\texon\t" CP059040.gff | sort | wc -l    #96
     grep -P "cmsearch\texon\t" CP059040.gff | wc -l    #=10  ignal recognition particle sRNA small typ, transfer-messenger RNA, 5S ribosomal RNA
     grep -P "Genbank\texon\t" CP059040.gff | wc -l    #=12  16S and 23S ribosomal RNA
     grep -P "tRNAscan-SE\texon\t" CP059040.gff | wc -l    #tRNA 74
     wc -l star_salmon/AUM_r3/quant.genes.sf  #--featurecounts_feature_type 'transcript' results in 96 records!
    
     grep -P "\tCDS\t" CP059040.gff | wc -l  #3701
     sed 's/\tCDS\t/\texon\t/g' CP059040.gff > CP059040_m.gff
     grep -P "\texon\t" CP059040_m.gff | sort | wc -l  #3797
    
     # -- DEBUG_2: combination of 'CP059040_m.gff' and 'exon' results in ERROR, using 'transcript' instead!
     --gff "/home/jhuang/DATA/Data_Tam_RNAseq_2024/CP059040_m.gff" --featurecounts_feature_type 'transcript'
    
     # ---- SUCCESSFUL with directly downloaded gff3 and fasta from NCBI using docker after replacing 'CDS' with 'exon' ----
     mv trimmed/*.fq.gz .; rmdir trimmed
     (host_env) /usr/local/bin/nextflow run rnaseq/main.nf --input samplesheet.csv --outdir results    --fasta "/home/jhuang/DATA/Data_Tam_RNAseq_2024_AUM_MHB_Urine_ATCC19606/CP059040.fasta" --gff "/home/jhuang/DATA/Data_Tam_RNAseq_2024_AUM_MHB_Urine_ATCC19606/CP059040_m.gff"        -profile docker -resume  --max_cpus 90 --max_memory 900.GB --max_time 2400.h    --save_align_intermeds --save_unaligned --save_reference    --aligner 'star_salmon'    --gtf_group_features 'gene_id'  --gtf_extra_attributes 'gene_name' --featurecounts_group_type 'gene_biotype' --featurecounts_feature_type 'transcript'
    
     # -- DEBUG_3: make sure the header of fasta is the same to the *_m.gff file
  7. Import data and pca-plot

     #mamba activate r_env
    
     #install.packages("ggfun")
     # Import the required libraries
     library("AnnotationDbi")
     library("clusterProfiler")
     library("ReactomePA")
     library(gplots)
     library(tximport)
     library(DESeq2)
     #library("org.Hs.eg.db")
     library(dplyr)
     library(tidyverse)
     #install.packages("devtools")
     #devtools::install_version("gtable", version = "0.3.0")
     library(gplots)
     library("RColorBrewer")
     #install.packages("ggrepel")
     library("ggrepel")
     # install.packages("openxlsx")
     library(openxlsx)
     library(EnhancedVolcano)
     library(DESeq2)
     library(edgeR)
    
     setwd("~/DATA/Data_Tam_RNAseq_2025_WT_deltaIJ_ATCC19606/results/star_salmon")
     # Define paths to your Salmon output quantification files
     files <- c("WT_17_r1" = "./WT_17_r1/quant.sf",
                "WT_17_r2" = "./WT_17_r2/quant.sf",
                "WT_17_r3" = "./WT_17_r3/quant.sf",
                "WT_24_r1" = "./WT_24_r1/quant.sf",
                "WT_24_r2" = "./WT_24_r2/quant.sf",
                "WT_24_r3" = "./WT_24_r3/quant.sf",
                "deltaIJ_17_r1" = "./deltaIJ_17_r1/quant.sf",
                "deltaIJ_17_r2" = "./deltaIJ_17_r2/quant.sf",
                "deltaIJ_17_r3" = "./deltaIJ_17_r3/quant.sf",
                "deltaIJ_24_r1" = "./deltaIJ_24_r1/quant.sf",
                "deltaIJ_24_r2" = "./deltaIJ_24_r2/quant.sf",
                "deltaIJ_24_r3" = "./deltaIJ_24_r3/quant.sf",
                "pre_WT_17_r1" = "./pre_WT_17_r1/quant.sf",
                "pre_WT_17_r2" = "./pre_WT_17_r2/quant.sf",
                "pre_WT_17_r3" = "./pre_WT_17_r3/quant.sf",
                "pre_WT_24_r1" = "./pre_WT_24_r1/quant.sf",
                "pre_WT_24_r2" = "./pre_WT_24_r2/quant.sf",
                "pre_WT_24_r3" = "./pre_WT_24_r3/quant.sf",
                "pre_deltaIJ_17_r1" = "./pre_deltaIJ_17_r1/quant.sf",
                "pre_deltaIJ_17_r2" = "./pre_deltaIJ_17_r2/quant.sf",
                "pre_deltaIJ_17_r3" = "./pre_deltaIJ_17_r3/quant.sf",
                "pre_deltaIJ_24_r1" = "./pre_deltaIJ_24_r1/quant.sf",
                "pre_deltaIJ_24_r2" = "./pre_deltaIJ_24_r2/quant.sf",
                "pre_deltaIJ_24_r3" = "./pre_deltaIJ_24_r3/quant.sf",
                "0_5_WT_17_r1" = "./0_5_WT_17_r1/quant.sf",
                "0_5_WT_17_r2" = "./0_5_WT_17_r2/quant.sf",
                "0_5_WT_17_r3" = "./0_5_WT_17_r3/quant.sf",
                "0_5_WT_24_r1" = "./0_5_WT_24_r1/quant.sf",
                "0_5_WT_24_r2" = "./0_5_WT_24_r2/quant.sf",
                "0_5_WT_24_r3" = "./0_5_WT_24_r3/quant.sf",
                "0_5_deltaIJ_17_r1" = "./0_5_deltaIJ_17_r1/quant.sf",
                "0_5_deltaIJ_17_r2" = "./0_5_deltaIJ_17_r2/quant.sf",
                "0_5_deltaIJ_17_r3" = "./0_5_deltaIJ_17_r3/quant.sf",
                "0_5_deltaIJ_24_r1" = "./0_5_deltaIJ_24_r1/quant.sf",
                "0_5_deltaIJ_24_r2" = "./0_5_deltaIJ_24_r2/quant.sf",
                "0_5_deltaIJ_24_r3" = "./0_5_deltaIJ_24_r3/quant.sf")
     # Import the transcript abundance data with tximport
     txi <- tximport(files, type = "salmon", txIn = TRUE, txOut = TRUE)
     # Define the replicates and condition of the samples
     replicate <- factor(c("r1", "r2", "r3", "r1", "r2", "r3", "r1", "r2", "r3", "r1", "r2", "r3",     "r1", "r2", "r3", "r1", "r2", "r3", "r1", "r2", "r3", "r1", "r2", "r3",      "r1", "r2", "r3", "r1", "r2", "r3", "r1", "r2", "r3", "r1", "r2", "r3"))
     condition <- factor(c("WT_17","WT_17","WT_17","WT_24","WT_24","WT_24", "deltaIJ_17","deltaIJ_17","deltaIJ_17","deltaIJ_24","deltaIJ_24","deltaIJ_24",   "pre_WT_17","pre_WT_17","pre_WT_17","pre_WT_24","pre_WT_24","pre_WT_24", "pre_deltaIJ_17","pre_deltaIJ_17","pre_deltaIJ_17","pre_deltaIJ_24","pre_deltaIJ_24","pre_deltaIJ_24",   "0_5_WT_17","0_5_WT_17","0_5_WT_17","0_5_WT_24","0_5_WT_24","0_5_WT_24", "0_5_deltaIJ_17","0_5_deltaIJ_17","0_5_deltaIJ_17","0_5_deltaIJ_24","0_5_deltaIJ_24","0_5_deltaIJ_24"))
     # Define the colData for DESeq2
     colData <- data.frame(condition=condition, replicate=replicate, row.names=names(files))
    
     # ------------------------
     # 1️⃣ Setup and input files
     # ------------------------
    
     # Read in transcript-to-gene mapping
     tx2gene <- read.table("salmon_tx2gene.tsv", header=FALSE, stringsAsFactors=FALSE)
     colnames(tx2gene) <- c("transcript_id", "gene_id", "gene_name")
    
     # Prepare tx2gene for gene-level summarization (remove gene_name if needed)
     tx2gene_geneonly <- tx2gene[, c("transcript_id", "gene_id")]
    
     # -------------------------------
     # 2️⃣ Transcript-level counts
     # -------------------------------
     # Create DESeqDataSet directly from tximport (transcript-level)
     dds_tx <- DESeqDataSetFromTximport(txi, colData=colData, design=~condition)
     write.csv(counts(dds_tx), file="transcript_counts.csv")
    
     # --------------------------------
     # 3️⃣ Gene-level summarization
     # --------------------------------
     # Re-import Salmon data summarized at gene level
     txi_gene <- tximport(files, type="salmon", tx2gene=tx2gene_geneonly, txOut=FALSE)
    
     # Create DESeqDataSet for gene-level counts
     dds <- DESeqDataSetFromTximport(txi_gene, colData=colData, design=~condition+replicate)
    
     # --------------------------------
     # 4️⃣ Raw counts table (with gene names)
     # --------------------------------
     # Extract raw gene-level counts
     counts_data <- as.data.frame(counts(dds, normalized=FALSE))
     counts_data$gene_id <- rownames(counts_data)
    
     # Add gene names
     tx2gene_unique <- unique(tx2gene[, c("gene_id", "gene_name")])
     counts_data <- merge(counts_data, tx2gene_unique, by="gene_id", all.x=TRUE)
    
     # Reorder columns: gene_id, gene_name, then counts
     count_cols <- setdiff(colnames(counts_data), c("gene_id", "gene_name"))
     counts_data <- counts_data[, c("gene_id", "gene_name", count_cols)]
    
     # --------------------------------
     # 5️⃣ Calculate CPM
     # --------------------------------
     library(edgeR)
     library(openxlsx)
    
     # Prepare count matrix for CPM calculation
     count_matrix <- as.matrix(counts_data[, !(colnames(counts_data) %in% c("gene_id", "gene_name"))])
    
     # Calculate CPM
     #cpm_matrix <- cpm(count_matrix, normalized.lib.sizes=FALSE)
     total_counts <- colSums(count_matrix)
     cpm_matrix <- t(t(count_matrix) / total_counts) * 1e6
     cpm_matrix <- as.data.frame(cpm_matrix)
    
     # Add gene_id and gene_name back to CPM table
     cpm_counts <- cbind(counts_data[, c("gene_id", "gene_name")], cpm_matrix)
    
     # --------------------------------
     # 6️⃣ Save outputs (CPM calculations required to send!)
     # --------------------------------
     write.csv(counts_data, "gene_raw_counts.csv", row.names=FALSE)
     write.xlsx(counts_data, "gene_raw_counts.xlsx", row.names=FALSE)
     write.xlsx(cpm_counts, "gene_cpm_counts.xlsx", row.names=FALSE)
  8. PCA

     dim(counts(dds))
     head(counts(dds), 10)
    
     library(DESeq2)
     library(RColorBrewer)
     library(gplots)
     library(ggplot2)
    
     # Load or generate DESeqDataSet object: dds
     # dds <- DESeqDataSetFromMatrix(...)  # <- already assumed
    
     # Apply rlog transformation
     rld <- rlogTransformation(dds)
    
     # Define condition names in correct order
     condition <- factor(c(
       "WT_17","WT_17","WT_17",
       "WT_24","WT_24","WT_24",
       "deltaIJ_17","deltaIJ_17","deltaIJ_17",
       "deltaIJ_24","deltaIJ_24","deltaIJ_24",
       "pre_WT_17","pre_WT_17","pre_WT_17",
       "pre_WT_24","pre_WT_24","pre_WT_24",
       "pre_deltaIJ_17","pre_deltaIJ_17","pre_deltaIJ_17",
       "pre_deltaIJ_24","pre_deltaIJ_24","pre_deltaIJ_24",
       "0_5_WT_17","0_5_WT_17","0_5_WT_17",
       "0_5_WT_24","0_5_WT_24","0_5_WT_24",
       "0_5_deltaIJ_17","0_5_deltaIJ_17","0_5_deltaIJ_17",
       "0_5_deltaIJ_24","0_5_deltaIJ_24","0_5_deltaIJ_24"
     ))
    
     # Replace with descriptive condition names
     condition <- factor(condition,
       levels = c(
         "WT_17", "deltaIJ_17", "WT_24", "deltaIJ_24",
         "pre_WT_17", "pre_deltaIJ_17", "pre_WT_24", "pre_deltaIJ_24",
         "0_5_WT_17", "0_5_deltaIJ_17", "0_5_WT_24", "0_5_deltaIJ_24"
       ),
       labels = c(
         "WT-17", "ΔIJ-17",
         "WT-24", "ΔIJ-24",
         "preWT-17", "preΔIJ-17",
         "preWT-24", "preΔIJ-24",
         "0_5WT-17", "0_5ΔIJ-17",
         "0_5WT-24", "0_5ΔIJ-24"
       )
     )
    
     # Assign to rld
     colData(rld)$condition <- condition
    
     # Define colors (12 distinct ones)
     condition_colors <- c(
       "#1f78b4", "#33a02c", "#a6cee3", "#b2df8a",
       "#fb9a99", "#e31a1c", "#fdbf6f", "#ff7f00",
       "#cab2d6", "#6a3d9a", "#ffff99", "#b15928"
     )
    
     names(condition_colors) <- levels(condition)
    
     # Plot PCA
     png("pca_colored.png", width=1200, height=800)
     pcaData <- plotPCA(rld, intgroup="condition", returnData=TRUE)
     percentVar <- round(100 * attr(pcaData, "percentVar"))
    
     ggplot(pcaData, aes(PC1, PC2, color=condition)) +
       geom_point(size=4) +
       scale_color_manual(values=condition_colors) +
       xlab(paste0("PC1: ", percentVar[1], "% variance")) +
       ylab(paste0("PC2: ", percentVar[2], "% variance")) +
       theme_bw() +
       theme(axis.text = element_text(size=12), legend.text = element_text(size=10))
     dev.off()
    
     # Heatmap of sample distances
     png("heatmap.png", width=1200, height=800)
     distsRL <- dist(t(assay(rld)))
     mat <- as.matrix(distsRL)
     hc <- hclust(distsRL)
     hmcol <- colorRampPalette(brewer.pal(9,"GnBu"))(100)
     heatmap.2(mat, Rowv=as.dendrogram(hc), Colv=as.dendrogram(hc),
               trace="none", symm=TRUE, col=rev(hmcol),
               margin=c(13, 13), labRow=condition, labCol=condition)
     dev.off()
  9. Select the differentially expressed genes

     #https://galaxyproject.eu/posts/2020/08/22/three-steps-to-galaxify-your-tool/
     #https://www.biostars.org/p/282295/
     #https://www.biostars.org/p/335751/
     #> dds$condition
     #CONSOLE: mkdir star_salmon/degenes
    
     setwd("degenes")
     #---- relevel to control ----
     dds$condition <- relevel(dds$condition, "WT_17")
     dds = DESeq(dds, betaPrior=FALSE)
     resultsNames(dds)
     clist <- c("deltaIJ_17_vs_WT_17")
    
     dds$condition <- relevel(dds$condition, "WT_24")
     dds = DESeq(dds, betaPrior=FALSE)
     resultsNames(dds)
     clist <- c("deltaIJ_24_vs_WT_24")
    
     dds$condition <- relevel(dds$condition, "pre_WT_17")
     dds = DESeq(dds, betaPrior=FALSE)
     resultsNames(dds)
     clist <- c("pre_deltaIJ_17_vs_pre_WT_17")
    
     dds$condition <- relevel(dds$condition, "pre_WT_24")
     dds = DESeq(dds, betaPrior=FALSE)
     resultsNames(dds)
     clist <- c("pre_deltaIJ_24_vs_pre_WT_24")
    
     dds$condition <- relevel(dds$condition, "0_5_WT_17")
     dds = DESeq(dds, betaPrior=FALSE)
     resultsNames(dds)
     clist <- c("0_5_deltaIJ_17_vs_0_5_WT_17")
    
     dds$condition <- relevel(dds$condition, "0_5_WT_24")
     dds = DESeq(dds, betaPrior=FALSE)
     resultsNames(dds)
     clist <- c("0_5_deltaIJ_24_vs_0_5_WT_24")
    
     for (i in clist) {
       contrast = paste("condition", i, sep="_")
       res = results(dds, name=contrast)
       res <- res[!is.na(res$log2FoldChange),]
       res_df <- as.data.frame(res)
    
       write.csv(as.data.frame(res_df[order(res_df$pvalue),]), file = paste(i, "all.txt", sep="-"))
       up <- subset(res_df, padj<=0.05 & log2FoldChange>=2)
       down <- subset(res_df, padj<=0.05 & log2FoldChange<=-2)
       write.csv(as.data.frame(up[order(up$log2FoldChange,decreasing=TRUE),]), file = paste(i, "up.txt", sep="-"))
       write.csv(as.data.frame(down[order(abs(down$log2FoldChange),decreasing=TRUE),]), file = paste(i, "down.txt", sep="-"))
     }
    
     # -- Under host-env --
     grep -P "\tgene\t" CP059040.gff > CP059040_gene.gff
     python3 ~/Scripts/replace_gene_names.py /home/jhuang/DATA/Data_Tam_RNAseq_2024_AUM_MHB_Urine_ATCC19606/CP059040_gene.gff deltaIJ_17_vs_WT_17-all.txt deltaIJ_17_vs_WT_17-all.csv
     python3 ~/Scripts/replace_gene_names.py /home/jhuang/DATA/Data_Tam_RNAseq_2024_AUM_MHB_Urine_ATCC19606/CP059040_gene.gff deltaIJ_17_vs_WT_17-up.txt deltaIJ_17_vs_WT_17-up.csv
     python3 ~/Scripts/replace_gene_names.py /home/jhuang/DATA/Data_Tam_RNAseq_2024_AUM_MHB_Urine_ATCC19606/CP059040_gene.gff deltaIJ_17_vs_WT_17-down.txt deltaIJ_17_vs_WT_17-down.csv
     python3 ~/Scripts/replace_gene_names.py /home/jhuang/DATA/Data_Tam_RNAseq_2024_AUM_MHB_Urine_ATCC19606/CP059040_gene.gff deltaIJ_24_vs_WT_24-all.txt deltaIJ_24_vs_WT_24-all.csv
     python3 ~/Scripts/replace_gene_names.py /home/jhuang/DATA/Data_Tam_RNAseq_2024_AUM_MHB_Urine_ATCC19606/CP059040_gene.gff deltaIJ_24_vs_WT_24-up.txt deltaIJ_24_vs_WT_24-up.csv
     python3 ~/Scripts/replace_gene_names.py /home/jhuang/DATA/Data_Tam_RNAseq_2024_AUM_MHB_Urine_ATCC19606/CP059040_gene.gff deltaIJ_24_vs_WT_24-down.txt deltaIJ_24_vs_WT_24-down.csv
    
     python3 ~/Scripts/replace_gene_names.py /home/jhuang/DATA/Data_Tam_RNAseq_2024_AUM_MHB_Urine_ATCC19606/CP059040_gene.gff pre_deltaIJ_17_vs_pre_WT_17-all.txt pre_deltaIJ_17_vs_pre_WT_17-all.csv
     python3 ~/Scripts/replace_gene_names.py /home/jhuang/DATA/Data_Tam_RNAseq_2024_AUM_MHB_Urine_ATCC19606/CP059040_gene.gff pre_deltaIJ_17_vs_pre_WT_17-up.txt pre_deltaIJ_17_vs_pre_WT_17-up.csv
     python3 ~/Scripts/replace_gene_names.py /home/jhuang/DATA/Data_Tam_RNAseq_2024_AUM_MHB_Urine_ATCC19606/CP059040_gene.gff pre_deltaIJ_17_vs_pre_WT_17-down.txt pre_deltaIJ_17_vs_pre_WT_17-down.csv
     python3 ~/Scripts/replace_gene_names.py /home/jhuang/DATA/Data_Tam_RNAseq_2024_AUM_MHB_Urine_ATCC19606/CP059040_gene.gff pre_deltaIJ_24_vs_pre_WT_24-all.txt pre_deltaIJ_24_vs_pre_WT_24-all.csv
     python3 ~/Scripts/replace_gene_names.py /home/jhuang/DATA/Data_Tam_RNAseq_2024_AUM_MHB_Urine_ATCC19606/CP059040_gene.gff pre_deltaIJ_24_vs_pre_WT_24-up.txt pre_deltaIJ_24_vs_pre_WT_24-up.csv
     python3 ~/Scripts/replace_gene_names.py /home/jhuang/DATA/Data_Tam_RNAseq_2024_AUM_MHB_Urine_ATCC19606/CP059040_gene.gff pre_deltaIJ_24_vs_pre_WT_24-down.txt pre_deltaIJ_24_vs_pre_WT_24-down.csv
    
     python3 ~/Scripts/replace_gene_names.py /home/jhuang/DATA/Data_Tam_RNAseq_2024_AUM_MHB_Urine_ATCC19606/CP059040_gene.gff 0_5_deltaIJ_17_vs_0_5_WT_17-all.txt 0_5_deltaIJ_17_vs_0_5_WT_17-all.csv
     python3 ~/Scripts/replace_gene_names.py /home/jhuang/DATA/Data_Tam_RNAseq_2024_AUM_MHB_Urine_ATCC19606/CP059040_gene.gff 0_5_deltaIJ_17_vs_0_5_WT_17-up.txt 0_5_deltaIJ_17_vs_0_5_WT_17-up.csv
     python3 ~/Scripts/replace_gene_names.py /home/jhuang/DATA/Data_Tam_RNAseq_2024_AUM_MHB_Urine_ATCC19606/CP059040_gene.gff 0_5_deltaIJ_17_vs_0_5_WT_17-down.txt 0_5_deltaIJ_17_vs_0_5_WT_17-down.csv
     python3 ~/Scripts/replace_gene_names.py /home/jhuang/DATA/Data_Tam_RNAseq_2024_AUM_MHB_Urine_ATCC19606/CP059040_gene.gff 0_5_deltaIJ_24_vs_0_5_WT_24-all.txt 0_5_deltaIJ_24_vs_0_5_WT_24-all.csv
     python3 ~/Scripts/replace_gene_names.py /home/jhuang/DATA/Data_Tam_RNAseq_2024_AUM_MHB_Urine_ATCC19606/CP059040_gene.gff 0_5_deltaIJ_24_vs_0_5_WT_24-up.txt 0_5_deltaIJ_24_vs_0_5_WT_24-up.csv
     python3 ~/Scripts/replace_gene_names.py /home/jhuang/DATA/Data_Tam_RNAseq_2024_AUM_MHB_Urine_ATCC19606/CP059040_gene.gff 0_5_deltaIJ_24_vs_0_5_WT_24-down.txt 0_5_deltaIJ_24_vs_0_5_WT_24-down.csv
    
     res <- read.csv("deltaIJ_17_vs_WT_17-all.csv")
     res <- read.csv("deltaIJ_24_vs_WT_24-all.csv")
     res <- read.csv("pre_deltaIJ_17_vs_pre_WT_17-all.csv")
     res <- read.csv("pre_deltaIJ_24_vs_pre_WT_24-all.csv")
     res <- read.csv("0_5_deltaIJ_17_vs_0_5_WT_17-all.csv")
     res <- read.csv("0_5_deltaIJ_24_vs_0_5_WT_24-all.csv")
     # Replace empty GeneName with modified GeneID
     res$GeneName <- ifelse(
       res$GeneName == "" | is.na(res$GeneName),
       gsub("gene-", "", res$GeneID),
       res$GeneName
     )
     duplicated_genes <- res[duplicated(res$GeneName), "GeneName"]
    
     res <- res %>%
       group_by(GeneName) %>%
       slice_min(padj, with_ties = FALSE) %>%
       ungroup()
     res <- as.data.frame(res)
     # Sort res first by padj (ascending) and then by log2FoldChange (descending)
     res <- res[order(res$padj, -res$log2FoldChange), ]
    
     up_regulated <- res[res$log2FoldChange >= 2 & res$padj <= 5e-2, ]
     down_regulated <- res[res$log2FoldChange <= -2 & res$padj <= 5e-2, ]
     wb <- createWorkbook()
     addWorksheet(wb, "Complete_Data")
     writeData(wb, "Complete_Data", res)
     addWorksheet(wb, "Up_Regulated")
     writeData(wb, "Up_Regulated", up_regulated)
     addWorksheet(wb, "Down_Regulated")
     writeData(wb, "Down_Regulated", down_regulated)
     # Save the workbook to a file
     saveWorkbook(wb, "Gene_Expression_ΔIJ-17_vs_WT-17.xlsx", overwrite = TRUE)
     saveWorkbook(wb, "Gene_Expression_ΔIJ-24_vs_WT-24.xlsx", overwrite = TRUE)
     saveWorkbook(wb, "Gene_Expression_preΔIJ-17_vs_preWT-17.xlsx", overwrite = TRUE)
     saveWorkbook(wb, "Gene_Expression_preΔIJ-24_vs_preWT-24.xlsx", overwrite = TRUE)
     saveWorkbook(wb, "Gene_Expression_0_5ΔIJ-17_vs_0_5WT-17.xlsx", overwrite = TRUE)
     saveWorkbook(wb, "Gene_Expression_0_5ΔIJ-24_vs_0_5WT-24.xlsx", overwrite = TRUE)
    
     # Set the 'GeneName' column as row.names
     rownames(res) <- res$GeneName
     # Drop the 'GeneName' column since it's now the row names
     res$GeneName <- NULL
     head(res)
    
     ## Ensure the data frame matches the expected format
     ## For example, it should have columns: log2FoldChange, padj, etc.
     #res <- as.data.frame(res)
     ## Remove rows with NA in log2FoldChange (if needed)
     #res <- res[!is.na(res$log2FoldChange),]
    
     # Replace padj = 0 with a small value
     #res$padj[res$padj == 0] <- 1e-305
    
     #library(EnhancedVolcano)
     # Assuming res is already sorted and processed
     png("ΔIJ-17_vs_WT-17.png", width=1000, height=1200)
     png("ΔIJ-24_vs_WT-24.png", width=1000, height=1200)
     png("preΔIJ-17_vs_preWT-17.png", width=1000, height=1200)
     png("preΔIJ-24_vs_preWT-24.png", width=1000, height=1200)
     png("0_5ΔIJ-17_vs_0_5WT-17.png", width=1000, height=1200)
     png("0_5ΔIJ-24_vs_0_5WT-24.png", width=1000, height=1200)
     #max.overlaps = 10
     EnhancedVolcano(res,
                     lab = rownames(res),
                     x = 'log2FoldChange',
                     y = 'padj',
                     pCutoff = 5e-2,
                     FCcutoff = 2,
                     title = '',
                     subtitleLabSize = 18,
                     pointSize = 3.0,
                     labSize = 5.0,
                     colAlpha = 1,
                     legendIconSize = 4.0,
                     drawConnectors = TRUE,
                     widthConnectors = 0.5,
                     colConnectors = 'black',
                     subtitle = expression("0_5ΔIJ-24 versus 0_5WT-24"))
     dev.off()
  10. Venn diagram

         #To visualize gene expression overlaps across your conditions, Venn diagrams are useful — but only when comparing 2–5 groups at a time. Given your conditions and comparisons, here’s the best strategy:
    
         #✅ Best Venn Diagram Setup Options
         #You’re comparing wild-type (WT) and ΔIJ mutant strains under different conditions (no treatment, treatment A, treatment B) and time points (17h, 24h). To avoid overcrowded or unreadable plots, group comparisons by specific contrasts:
    
         #Option 1: Treatment Effect at One Time Point (ΔIJ vs WT)
         #Compare ΔIJ vs WT at 17h or 24h, under all 3 treatments (None, A, B):
    
         # Venn: “Treatment-dependent differences (ΔIJ vs WT) at 17h”
         #* ΔIJ vs WT (no treatment) – ΔIJ-17 vs WT-17
         #* ΔIJ vs WT (treatment A) – preΔIJ-17 vs preWT-17
         #* ΔIJ vs WT (treatment B) – 0_5ΔIJ-17 vs WT0_5-17
    
         #👉 3-way Venn diagram: Shows overlap in DEGs between different treatment conditions for the ΔIJ effect at a single time point.
    
         # Install and load required packages
         if (!require("VennDiagram")) install.packages("VennDiagram")
         if (!require("openxlsx")) install.packages("openxlsx")
         library(VennDiagram)
         library(openxlsx)
    
         # Set working directory
         setwd("/mnt/md1/DATA/Data_Tam_RNAseq_2025_WT_deltaIJ_ATCC19606/results/star_salmon/degenes")
    
         # Read upregulated gene lists at 17h
         df_no_treatment <- read.csv("deltaIJ_17_vs_WT_17-up.txt", header = TRUE)
         genes_no_treatment <- df_no_treatment[[1]]
    
         df_treatA <- read.csv("pre_deltaIJ_17_vs_pre_WT_17-up.txt", header = TRUE)
         genes_treatA <- df_treatA[[1]]
    
         df_treatB <- read.csv("0_5_deltaIJ_17_vs_0_5_WT_17-up.txt", header = TRUE)
         genes_treatB <- df_treatB[[1]]
    
         # Clean gene names (optional, in case of extra characters like quotes)
         genes_no_treatment <- gsub("^\"|\"$", "", genes_no_treatment)
         genes_treatA <- gsub("^\"|\"$", "", genes_treatA)
         genes_treatB <- gsub("^\"|\"$", "", genes_treatB)
    
         # Create a list for Venn
         venn_list <- list(
           "No_Treatment" = genes_no_treatment,
           "Treatment_A" = genes_treatA,
           "Treatment_B" = genes_treatB
         )
    
         # Save Venn diagram
         venn.diagram(
           x = venn_list,
           filename = "venn_17h_upregulated_treatments.png",
           imagetype = "png",
           output = TRUE,
           col = "transparent",
           fill = c("#66c2a5", "#fc8d62", "#8da0cb"),
           alpha = 0.5,
           cex = 1.5,
           cat.cex = 1.4,
           cat.pos = 0,
           cat.dist = 0.05,
           main = "Upregulated Genes (ΔIJ vs WT, 17h)",
           main.cex = 1.5
         )
    
         # Intersections
         only_no <- setdiff(genes_no_treatment, union(genes_treatA, genes_treatB))
         only_A <- setdiff(genes_treatA, union(genes_no_treatment, genes_treatB))
         only_B <- setdiff(genes_treatB, union(genes_no_treatment, genes_treatA))
    
         no_A <- intersect(genes_no_treatment, genes_treatA)
         no_B <- intersect(genes_no_treatment, genes_treatB)
         A_B <- intersect(genes_treatA, genes_treatB)
    
         no_A_B <- Reduce(intersect, list(genes_no_treatment, genes_treatA, genes_treatB))
    
         # Remove overlapping from pairwise (keep only those not in 3-way)
         no_A <- setdiff(no_A, no_A_B)
         no_B <- setdiff(no_B, no_A_B)
         A_B <- setdiff(A_B, no_A_B)
    
         # Write to Excel
         wb <- createWorkbook()
         addWorksheet(wb, "Only_No_Treatment")
         addWorksheet(wb, "Only_Treatment_A")
         addWorksheet(wb, "Only_Treatment_B")
         addWorksheet(wb, "No_Treatment_AND_Treatment_A")
         addWorksheet(wb, "No_Treatment_AND_Treatment_B")
         addWorksheet(wb, "Treatment_A_AND_Treatment_B")
         addWorksheet(wb, "All_Three")
    
         writeData(wb, "Only_No_Treatment", only_no)
         writeData(wb, "Only_Treatment_A", only_A)
         writeData(wb, "Only_Treatment_B", only_B)
         writeData(wb, "No_Treatment_AND_Treatment_A", no_A)
         writeData(wb, "No_Treatment_AND_Treatment_B", no_B)
         writeData(wb, "Treatment_A_AND_Treatment_B", A_B)
         writeData(wb, "All_Three", no_A_B)
    
         saveWorkbook(wb, "upregulated_17h_intersections.xlsx", overwrite = TRUE)
    
         #--
    
         if (!require("VennDiagram")) install.packages("VennDiagram")
         if (!require("openxlsx")) install.packages("openxlsx")
         library(VennDiagram)
         library(openxlsx)
    
         setwd("/mnt/md1/DATA/Data_Tam_RNAseq_2025_WT_deltaIJ_ATCC19606/results/star_salmon/degenes")
    
         # Helper function
         process_and_save_venn <- function(label, files, outfile_prefix) {
           gene_lists <- list()
    
           # Read gene lists
           for (name in names(files)) {
             df <- read.csv(files[[name]], header = TRUE)
             genes <- gsub("^\"|\"$", "", df[[1]])
             gene_lists[[name]] <- genes
           }
    
           # Plot Venn
           venn.diagram(
             x = gene_lists,
             filename = paste0(outfile_prefix, ".png"),
             imagetype = "png",
             output = TRUE,
             col = "transparent",
             fill = c("#66c2a5", "#fc8d62", "#8da0cb"),
             alpha = 0.5,
             cex = 1.5,
             cat.cex = 1.4,
             #cat.pos = 0,
             #cat.dist = 0.05,
             cat.pos = c(-30, 30, 135),   # Move labels around the circles
             cat.dist = c(0.04, 0.04, 0.04),  # Push labels further outside
             main = label,
             main.cex = 1.5
           )
    
           # Intersections
           A <- gene_lists[[1]]
           B <- gene_lists[[2]]
           C <- gene_lists[[3]]
    
           only_A <- setdiff(A, union(B, C))
           only_B <- setdiff(B, union(A, C))
           only_C <- setdiff(C, union(A, B))
    
           AB <- setdiff(intersect(A, B), intersect(intersect(A, B), C))
           AC <- setdiff(intersect(A, C), intersect(intersect(A, C), B))
           BC <- setdiff(intersect(B, C), intersect(intersect(A, B), C))
    
           ABC <- Reduce(intersect, list(A, B, C))
    
           # Save Excel
           wb <- createWorkbook()
           addWorksheet(wb, "Only_A")
           addWorksheet(wb, "Only_B")
           addWorksheet(wb, "Only_C")
           addWorksheet(wb, "A_and_B")
           addWorksheet(wb, "A_and_C")
           addWorksheet(wb, "B_and_C")
           addWorksheet(wb, "All_Three")
    
           writeData(wb, "Only_A", only_A)
           writeData(wb, "Only_B", only_B)
           writeData(wb, "Only_C", only_C)
           writeData(wb, "A_and_B", AB)
           writeData(wb, "A_and_C", AC)
           writeData(wb, "B_and_C", BC)
           writeData(wb, "All_Three", ABC)
    
           saveWorkbook(wb, paste0(outfile_prefix, ".xlsx"), overwrite = TRUE)
         }
    
         ### === UPREGULATED GENES === ###
         process_and_save_venn(
           label = "Upregulated Genes (ΔIJ vs WT, 17h)",
           files = list(
             "No_Treatment" = "deltaIJ_17_vs_WT_17-up.txt",
             "Treatment_A" = "pre_deltaIJ_17_vs_pre_WT_17-up.txt",
             "Treatment_B" = "0_5_deltaIJ_17_vs_0_5_WT_17-up.txt"
           ),
           outfile_prefix = "venn_upregulated_17h"
         )
    
         process_and_save_venn(
           label = "Upregulated Genes (ΔIJ vs WT, 24h)",
           files = list(
             "No_Treatment" = "deltaIJ_24_vs_WT_24-up.txt",
             "Treatment_A" = "pre_deltaIJ_24_vs_pre_WT_24-up.txt",
             "Treatment_B" = "0_5_deltaIJ_24_vs_0_5_WT_24-up.txt"
           ),
           outfile_prefix = "venn_upregulated_24h"
         )
    
         ### === DOWNREGULATED GENES === ###
         process_and_save_venn(
           label = "Downregulated Genes (ΔIJ vs WT, 17h)",
           files = list(
             "No_Treatment" = "deltaIJ_17_vs_WT_17-down.txt",
             "Treatment_A" = "pre_deltaIJ_17_vs_pre_WT_17-down.txt",
             "Treatment_B" = "0_5_deltaIJ_17_vs_0_5_WT_17-down.txt"
           ),
           outfile_prefix = "venn_downregulated_17h"
         )
    
         process_and_save_venn(
           label = "Downregulated Genes (ΔIJ vs WT, 24h)",
           files = list(
             "No_Treatment" = "deltaIJ_24_vs_WT_24-down.txt",
             "Treatment_A" = "pre_deltaIJ_24_vs_pre_WT_24-down.txt",
             "Treatment_B" = "0_5_deltaIJ_24_vs_0_5_WT_24-down.txt"
           ),
           outfile_prefix = "venn_downregulated_24h"
         )

KEGG and GO annotations in non-model organisms

https://www.biobam.com/functional-analysis/

  1. Assign KEGG and GO Terms (see diagram above)

    Since your organism is non-model, standard R databases (org.Hs.eg.db, etc.) won’t work. You’ll need to manually retrieve KEGG and GO annotations.

    • Preparing file 1 eggnog_out.emapper.annotations.txt for the R-code below: (KEGG Terms): EggNog based on orthology and phylogenies

      EggNOG-mapper assigns both KEGG Orthology (KO) IDs and GO terms.

      Install EggNOG-mapper:

        mamba create -n eggnog_env python=3.8 eggnog-mapper -c conda-forge -c bioconda  #eggnog-mapper_2.1.12
        mamba activate eggnog_env

      Run annotation:

        #diamond makedb --in eggnog6.prots.faa -d eggnog_proteins.dmnd
        mkdir /home/jhuang/mambaforge/envs/eggnog_env/lib/python3.8/site-packages/data/
        download_eggnog_data.py --dbname eggnog.db -y --data_dir /home/jhuang/mambaforge/envs/eggnog_env/lib/python3.8/site-packages/data/
        #NOT_WORKING: emapper.py -i CP059040_gene.fasta -o eggnog_dmnd_out --cpu 60 -m diamond[hmmer,mmseqs] --dmnd_db /home/jhuang/REFs/eggnog_data/data/eggnog_proteins.dmnd
        python ~/Scripts/update_fasta_header.py CP059040_protein_.fasta CP059040_protein.fasta
        emapper.py -i CP059040_protein.fasta -o eggnog_out --cpu 60 --resume
        #----> result annotations.tsv: Contains KEGG, GO, and other functional annotations.
        #---->  470.IX87_14445:
            * 470 likely refers to the organism or strain (e.g., Acinetobacter baumannii ATCC 19606 or another related strain).
            * IX87_14445 would refer to a specific gene or protein within that genome.

      Extract KEGG KO IDs from annotations.emapper.annotations.

    • Preparing file 2 blast2goannot.annot2 for the R-code below:

      • Basic (GO Terms from ‘Blast2GO 5 Basic’, saved in blast2go_annot.annot): Using Blast/Diamond + Blast2GO_GUI based on sequence alignment + GO mapping

        • ‘Load protein sequences’ (Tags: NONE, generated columns: Nr, SeqName) –>
        • Buttons ‘blast’ (Tags: BLASTED, generated columns: Description, Length, #Hits, e-Value, sim mean),
        • Button ‘mapping’ (Tags: MAPPED, generated columns: #GO, GO IDs, GO Names), “Mapping finished – Please proceed now to annotation.”
        • Button ‘annot’ (Tags: ANNOTATED, generated columns: Enzyme Codes, Enzyme Names), “Annotation finished.”
          • Used parameter ‘Annotation CutOff’: The Blast2GO Annotation Rule seeks to find the most specific GO annotations with a certain level of reliability. An annotation score is calculated for each candidate GO which is composed by the sequence similarity of the Blast Hit, the evidence code of the source GO and the position of the particular GO in the Gene Ontology hierarchy. This annotation score cutoff select the most specific GO term for a given GO branch which lies above this value.
          • Used parameter ‘GO Weight’ is a value which is added to Annotation Score of a more general/abstract Gene Ontology term for each of its more specific, original source GO terms. In this case, more general GO terms which summarise many original source terms (those ones directly associated to the Blast Hits) will have a higher Annotation Score.
      • Advanced (GO Terms from ‘Blast2GO 5 Basic’): Interpro based protein families / domains –> Button interpro

        • Button ‘interpro’ (Tags: INTERPRO, generated columns: InterPro IDs, InterPro GO IDs, InterPro GO Names) –> “InterProScan Finished – You can now merge the obtained GO Annotations.”
      • MERGE the results of InterPro GO IDs (advanced) to GO IDs (basic) and generate final GO IDs, saved in blast2go_annot.annot2

        • Button ‘interpro’/’Merge InterProScan GOs to Annotation’ –> “Merge (add and validate) all GO terms retrieved via InterProScan to the already existing GO annotation.” –> “Finished merging GO terms from InterPro with annotations. Maybe you want to run ANNEX (Annotation Augmentation).”
        • (NOT_USED) Button ‘annot’/’ANNEX’ –> “ANNEX finished. Maybe you want to do the next step: Enzyme Code Mapping.”
      • PREPARING go_terms and ecterms: annot* file:

        cut -f1-2 -d$’\t’ blast2go_annot.annot2 > blast2goannot.annot2

  2. Perform KEGG and GO Enrichment in R

         #BiocManager::install("GO.db")
         #BiocManager::install("AnnotationDbi")
    
         # Load required libraries
         library(openxlsx)  # For Excel file handling
         library(dplyr)     # For data manipulation
         library(tidyr)
         library(stringr)
         library(clusterProfiler)  # For KEGG and GO enrichment analysis
         #library(org.Hs.eg.db)  # Replace with appropriate organism database
         library(GO.db)
         library(AnnotationDbi)
    
         setwd("~/DATA/Data_Tam_RNAseq_2025_WT_deltaIJ_ATCC19606//results/star_salmon/degenes")
         # Step 1: Load the blast2go annotation file with a check for missing columns
         annot_df <- read.table("/home/jhuang/b2gWorkspace_Tam_RNAseq_2024/blast2go_annot.annot2_",
                             header = FALSE, sep = "\t", stringsAsFactors = FALSE, fill = TRUE)
    
         # If the structure is inconsistent, we can make sure there are exactly 3 columns:
         colnames(annot_df) <- c("GeneID", "Term")
         # Step 2: Filter and aggregate GO and EC terms as before
         go_terms <- annot_df %>%
         filter(grepl("^GO:", Term)) %>%
         group_by(GeneID) %>%
         summarize(GOs = paste(Term, collapse = ","), .groups = "drop")
         ec_terms <- annot_df %>%
         filter(grepl("^EC:", Term)) %>%
         group_by(GeneID) %>%
         summarize(EC = paste(Term, collapse = ","), .groups = "drop")
    
         # Load the results
         #res <- read.csv("deltaIJ_17_vs_WT_17-all.csv")   #up11, down3
         #res <- read.csv("deltaIJ_24_vs_WT_24-all.csv")   #up0, down2
         #res <- read.csv("pre_deltaIJ_17_vs_pre_WT_17-all.csv")  #up238, down90
         #res <- read.csv("pre_deltaIJ_24_vs_pre_WT_24-all.csv")  #up83, down64
         #res <- read.csv("0_5_deltaIJ_17_vs_0_5_WT_17-all.csv")  #up74, down14
         res <- read.csv("0_5_deltaIJ_24_vs_0_5_WT_24-all.csv")  #up1, down3
    
         # Replace empty GeneName with modified GeneID
         res$GeneName <- ifelse(
             res$GeneName == "" | is.na(res$GeneName),
             gsub("gene-", "", res$GeneID),
             res$GeneName
         )
    
         # Remove duplicated genes by selecting the gene with the smallest padj
         duplicated_genes <- res[duplicated(res$GeneName), "GeneName"]
    
         res <- res %>%
         group_by(GeneName) %>%
         slice_min(padj, with_ties = FALSE) %>%
         ungroup()
    
         res <- as.data.frame(res)
         # Sort res first by padj (ascending) and then by log2FoldChange (descending)
         res <- res[order(res$padj, -res$log2FoldChange), ]
         # Read eggnog annotations
         eggnog_data <- read.delim("~/DATA/Data_Tam_RNAseq_2024_AUM_MHB_Urine_ATCC19606/eggnog_out.emapper.annotations.txt", header = TRUE, sep = "\t")
         # Remove the "gene-" prefix from GeneID in res to match eggnog 'query' format
         res$GeneID <- gsub("gene-", "", res$GeneID)
         # Merge eggnog data with res based on GeneID
         res <- res %>%
         left_join(eggnog_data, by = c("GeneID" = "query"))
    
         # DEBUG: NOT_NECESSARY, since res has already GeneName
         ##Convert row names to a new column 'GeneName' in res
         #res_with_geneName <- res %>%
         #mutate(GeneName = rownames(res)) %>%
         #as.data.frame()  # Ensure that it's a regular data frame without row names
         ## View the result
         #head(res_with_geneName)
    
         # Merge with the res dataframe
         # Perform the left joins and rename columns
         res_updated <- res %>%
         left_join(go_terms, by = "GeneID") %>%
         left_join(ec_terms, by = "GeneID") %>% dplyr::select(-EC.x, -GOs.x) %>% dplyr::rename(EC = EC.y, GOs = GOs.y)
    
         # DEBUG: NOT_NECESSARY, since 'GeneName' is already the first column.
         ## Reorder columns to move 'GeneName' as the first column in res_updated
         #res_updated <- res_updated %>%
         #select(GeneName, everything())
    
         ## Count the number of rows in the KEGG_ko, GOs, EC columns that have non-missing values
         #num_non_missing_KEGG_ko <- sum(res_updated$KEGG_ko != "-" & !is.na(res_updated$KEGG_ko))
         #print(num_non_missing_KEGG_ko)
         ##[1] 2030
         #num_non_missing_GOs <- sum(res_updated$GOs != "-" & !is.na(res_updated$GOs))
         #print(num_non_missing_GOs)
         ##[1] 2865 --> 2875
         #num_non_missing_EC <- sum(res_updated$EC != "-" & !is.na(res_updated$EC))
         #print(num_non_missing_EC)
         ##[1] 1701
    
         # Filter up-regulated genes
         up_regulated <- res_updated[res_updated$log2FoldChange > 2 & res_updated$padj < 0.05, ]
         # Filter down-regulated genes
         down_regulated <- res_updated[res_updated$log2FoldChange < -2 & res_updated$padj < 0.05, ]
    
         # Create a new workbook
         wb <- createWorkbook()
         # Add the complete dataset as the first sheet (with annotations)
         addWorksheet(wb, "Complete_Data")
         writeData(wb, "Complete_Data", res_updated)
         # Add the up-regulated genes as the second sheet (with annotations)
         addWorksheet(wb, "Up_Regulated")
         writeData(wb, "Up_Regulated", up_regulated)
         # Add the down-regulated genes as the third sheet (with annotations)
         addWorksheet(wb, "Down_Regulated")
         writeData(wb, "Down_Regulated", down_regulated)
         # Save the workbook to a file
         saveWorkbook(wb, "Gene_Expression_with_Annotations_0_5ΔIJ-24_vs_0_5WT-24.xlsx", overwrite = TRUE)
    
         # Set GeneName as row names after the join
         rownames(res_updated) <- res_updated$GeneName
         res_updated <- res_updated %>% dplyr::select(-GeneName)
         ## Set the 'GeneName' column as row.names
         #rownames(res_updated) <- res_updated$GeneName
         ## Drop the 'GeneName' column since it's now the row names
         #res_updated$GeneName <- NULL
    
         # ---- Perform KEGG enrichment analysis (up_regulated) ----
         gene_list_kegg_up <- up_regulated$KEGG_ko
         gene_list_kegg_up <- gsub("ko:", "", gene_list_kegg_up)
         kegg_enrichment_up <- enrichKEGG(gene = gene_list_kegg_up, organism = 'ko')
         # -- convert the GeneID (Kxxxxxx) to the true GeneID --
         # Step 0: Create KEGG to GeneID mapping
         kegg_to_geneid_up <- up_regulated %>%
         dplyr::select(KEGG_ko, GeneID) %>%
         filter(!is.na(KEGG_ko)) %>%  # Remove missing KEGG KO entries
         mutate(KEGG_ko = str_remove(KEGG_ko, "ko:"))  # Remove 'ko:' prefix if present
         # Step 1: Clean KEGG_ko values (separate multiple KEGG IDs)
         kegg_to_geneid_clean <- kegg_to_geneid_up %>%
         mutate(KEGG_ko = str_remove_all(KEGG_ko, "ko:")) %>%  # Remove 'ko:' prefixes
         separate_rows(KEGG_ko, sep = ",") %>%  # Ensure each KEGG ID is on its own row
         filter(KEGG_ko != "-") %>%  # Remove invalid KEGG IDs ("-")
         distinct()  # Remove any duplicate mappings
         # Step 2.1: Expand geneID column in kegg_enrichment_up
         expanded_kegg <- kegg_enrichment_up %>%
         as.data.frame() %>%
         separate_rows(geneID, sep = "/") %>%  # Split multiple KEGG IDs (Kxxxxx)
         left_join(kegg_to_geneid_clean, by = c("geneID" = "KEGG_ko"), relationship = "many-to-many") %>%  # Explicitly handle many-to-many
         distinct() %>%  # Remove duplicate matches
         group_by(ID) %>%
         summarise(across(everything(), ~ paste(unique(na.omit(.)), collapse = "/")), .groups = "drop")  # Re-collapse results
         #dplyr::glimpse(expanded_kegg)
         # Step 3.1: Replace geneID column in the original dataframe
         kegg_enrichment_up_df <- as.data.frame(kegg_enrichment_up)
         # Remove old geneID column and merge new one
         kegg_enrichment_up_df <- kegg_enrichment_up_df %>%
         dplyr::select(-geneID) %>%  # Remove old geneID column
         left_join(expanded_kegg %>% dplyr::select(ID, GeneID), by = "ID") %>%  # Merge new GeneID column
         dplyr::rename(geneID = GeneID)  # Rename column back to geneID
    
         # ---- Perform KEGG enrichment analysis (down_regulated) ----
         # Step 1: Extract KEGG KO terms from down-regulated genes
         gene_list_kegg_down <- down_regulated$KEGG_ko
         gene_list_kegg_down <- gsub("ko:", "", gene_list_kegg_down)
         # Step 2: Perform KEGG enrichment analysis
         kegg_enrichment_down <- enrichKEGG(gene = gene_list_kegg_down, organism = 'ko')
         # --- Convert KEGG gene IDs (Kxxxxxx) to actual GeneIDs ---
         # Step 3: Create KEGG to GeneID mapping from down_regulated dataset
         kegg_to_geneid_down <- down_regulated %>%
         dplyr::select(KEGG_ko, GeneID) %>%
         filter(!is.na(KEGG_ko)) %>%  # Remove missing KEGG KO entries
         mutate(KEGG_ko = str_remove(KEGG_ko, "ko:"))  # Remove 'ko:' prefix if present
         # Step 4: Clean KEGG_ko values (handle multiple KEGG IDs)
         kegg_to_geneid_down_clean <- kegg_to_geneid_down %>%
         mutate(KEGG_ko = str_remove_all(KEGG_ko, "ko:")) %>%  # Remove 'ko:' prefixes
         separate_rows(KEGG_ko, sep = ",") %>%  # Ensure each KEGG ID is on its own row
         filter(KEGG_ko != "-") %>%  # Remove invalid KEGG IDs ("-")
         distinct()  # Remove duplicate mappings
         # Step 5: Expand geneID column in kegg_enrichment_down
         expanded_kegg_down <- kegg_enrichment_down %>%
         as.data.frame() %>%
         separate_rows(geneID, sep = "/") %>%  # Split multiple KEGG IDs (Kxxxxx)
         left_join(kegg_to_geneid_down_clean, by = c("geneID" = "KEGG_ko"), relationship = "many-to-many") %>%  # Handle many-to-many mappings
         distinct() %>%  # Remove duplicate matches
         group_by(ID) %>%
         summarise(across(everything(), ~ paste(unique(na.omit(.)), collapse = "/")), .groups = "drop")  # Re-collapse results
         # Step 6: Replace geneID column in the original kegg_enrichment_down dataframe
         kegg_enrichment_down_df <- as.data.frame(kegg_enrichment_down) %>%
         dplyr::select(-geneID) %>%  # Remove old geneID column
         left_join(expanded_kegg_down %>% dplyr::select(ID, GeneID), by = "ID") %>%  # Merge new GeneID column
         dplyr::rename(geneID = GeneID)  # Rename column back to geneID
         # View the updated dataframe
         head(kegg_enrichment_down_df)
    
         # Create a new workbook
         wb <- createWorkbook()
         # Save enrichment results to the workbook
         addWorksheet(wb, "KEGG_Enrichment_Up")
         writeData(wb, "KEGG_Enrichment_Up", as.data.frame(kegg_enrichment_up_df))
         # Save enrichment results to the workbook
         addWorksheet(wb, "KEGG_Enrichment_Down")
         writeData(wb, "KEGG_Enrichment_Down", as.data.frame(kegg_enrichment_down_df))
         #saveWorkbook(wb, "KEGG_Enrichment.xlsx", overwrite = TRUE)
    
         # ---- Perform GO enrichment analysis (TODO: extract the merged GO IDs from 'Blast2GO 5 Basic' and adapt the code below!)----
    
         # Define gene list (up-regulated genes)
         gene_list_go_up <- up_regulated$GeneID  # Extract the 149 up-regulated genes
         gene_list_go_down <- down_regulated$GeneID  # Extract the 65 down-regulated genes
    
         # Define background gene set (all genes in res)
         background_genes <- res_updated$GeneID  # Extract the 3646 background genes
    
         # Prepare GO annotation data from res
         go_annotation <- res_updated[, c("GOs","GeneID")]  # Extract relevant columns
         go_annotation <- go_annotation %>%
         tidyr::separate_rows(GOs, sep = ",")  # Split multiple GO terms into separate rows
    
         # Perform GO enrichment analysis, where pAdjustMethod is one of "holm", "hochberg", "hommel", "bonferroni", "BH", "BY", "fdr", "none"
         go_enrichment_up <- enricher(
             gene = gene_list_go_up,                # Up-regulated genes
             TERM2GENE = go_annotation,       # Custom GO annotation
             pvalueCutoff = 1.0,             # Significance threshold
             pAdjustMethod = "BH",
             universe = background_genes      # Define the background gene set
         )
         go_enrichment_up <- as.data.frame(go_enrichment_up)
    
         go_enrichment_down <- enricher(
             gene = gene_list_go_down,                # Up-regulated genes
             TERM2GENE = go_annotation,       # Custom GO annotation
             pvalueCutoff = 1.0,             # Significance threshold
             pAdjustMethod = "BH",
             universe = background_genes      # Define the background gene set
         )
         go_enrichment_down <- as.data.frame(go_enrichment_down)
    
         ## Remove the 'p.adjust' column since no adjusted methods have been applied!
         #go_enrichment_up <- go_enrichment_up[, !names(go_enrichment_up) %in% "p.adjust"]
         # Update the Description column with the term descriptions
         go_enrichment_up$Description <- sapply(go_enrichment_up$ID, function(go_id) {
         # Using select to get the term description
         term <- tryCatch({
             AnnotationDbi::select(GO.db, keys = go_id, columns = "TERM", keytype = "GOID")
         }, error = function(e) {
             message(paste("Error for GO term:", go_id))  # Print which GO ID caused the error
             return(data.frame(TERM = NA))  # In case of error, return NA
         })
    
         if (nrow(term) > 0) {
             return(term$TERM)
         } else {
             return(NA)  # If no description found, return NA
         }
         })
         ## Print the updated data frame
         #print(go_enrichment_up)
    
         ## Remove the 'p.adjust' column since no adjusted methods have been applied!
         #go_enrichment_down <- go_enrichment_down[, !names(go_enrichment_down) %in% "p.adjust"]
         # Update the Description column with the term descriptions
         go_enrichment_down$Description <- sapply(go_enrichment_down$ID, function(go_id) {
         # Using select to get the term description
         term <- tryCatch({
             AnnotationDbi::select(GO.db, keys = go_id, columns = "TERM", keytype = "GOID")
         }, error = function(e) {
             message(paste("Error for GO term:", go_id))  # Print which GO ID caused the error
             return(data.frame(TERM = NA))  # In case of error, return NA
         })
    
         if (nrow(term) > 0) {
             return(term$TERM)
         } else {
             return(NA)  # If no description found, return NA
         }
         })
    
         addWorksheet(wb, "GO_Enrichment_Up")
         writeData(wb, "GO_Enrichment_Up", as.data.frame(go_enrichment_up))
    
         addWorksheet(wb, "GO_Enrichment_Down")
         writeData(wb, "GO_Enrichment_Down", as.data.frame(go_enrichment_down))
    
         # Save the workbook with enrichment results
         saveWorkbook(wb, "KEGG_and_GO_Enrichments_0_5ΔIJ-24_vs_0_5WT-24.xlsx", overwrite = TRUE)
    
         #Error for GO term: GO:0006807: replace GO:0006807  obsolete nitrogen compound metabolic process
         #TODO: marked the color as yellow if the p.adjusted <= 0.05 in GO_enrichment!
  3. Finalizing the KEGG and GO Enrichment table

         1. NOTE: geneIDs in KEGG_Enrichment have been already translated from ko to geneID in H0N29_*-format;
         2. NEED_MANUAL_DELETION: p.adjust values have been calculated, we have to filter all records in GO_Enrichment-results by |p.adjust|<=0.05.
         3. DELETE_ALL_q-values, since sometimes the qvalues missing!: If only one record --> no q-values: the missing qvalue is expected here — you can't calculate q-values with only one p-value. The p.adjust (e.g. Benjamini-Hochberg FDR) is still valid because it technically works even with a single p-value, but qvalue requires more data.

MicrobiotaProcess_PCA_Group3-4.R processing Data_Karoline_16S_2025

# https://bioconductor.org/packages/release/bioc/vignettes/MicrobiotaProcess/inst/doc//MicrobiotaProcess.html

# -----------------------------------
# ---- prepare the R environment ----
#Rscript MicrobiotaProcess.R
#NOTE: exit R script, then login again R-environment; rm -rf Phyloseq*_cache
#mkdir figures
#rmarkdown::render('Phyloseq.Rmd',output_file='Phyloseq.html')
#source("MicrobiotaProcess_Group3_vs_Group4.R")

# with #alpha = 2.0, running the following script further!

# -----------------------------
# ---- 3.1. bridges other tools
##https://github.com/YuLab-SMU/MicrobiotaProcess
##https://www.bioconductor.org/packages/release/bioc/vignettes/MicrobiotaProcess/inst/doc/MicrobiotaProcess.html
##https://chiliubio.github.io/microeco_tutorial/intro.html#framework
##https://yiluheihei.github.io/microbiomeMarker/reference/plot_cladogram.html
#BiocManager::install("MicrobiotaProcess")
#install.packages("microeco")
#install.packages("ggalluvial")
#install.packages("ggh4x")

library(MicrobiotaProcess)
library(microeco)
library(ggalluvial)
library(ggh4x)
library(gghalves)

## Convert the phyloseq object to a MicrobiotaProcess object
#mp <- as.MicrobiotaProcess(ps.ng.tax)

#mt <- phyloseq2microeco(ps.ng.tax) #--> ERROR
#abundance_table <- mt$abun_table
#taxonomy_table <- mt$tax_table

#ps.ng.tax_abund <- phyloseq::filter_taxa(ps.ng.tax, function(x) sum(x > total*0.01) > 0, TRUE)
#ps.ng.tax_most = phyloseq::filter_taxa(ps.ng.tax_rel, function(x) mean(x) > 0.001, TRUE)

##OPTION1 (NOT_USED): take all samples, prepare ps.ng.tax_abund --> mpse_abund
##mpse <- ps.ng.tax %>% as.MPSE()
#mpse_abund <- ps.ng.tax_abund %>% as.MPSE()

##OPTION2 (USED!): take partial samples, prepare ps.ng.tax or ps.ng.tax_abund (2 replacements!)--> ps.ng.tax_sel --> mpse_abund
ps.ng.tax_sel <- ps.ng.tax_abund

##otu_table(ps.ng.tax_sel) <- otu_table(ps.ng.tax)[,c("1","2","5","6","7",  "15","16","17","18","19","20",  "29","30","31","32",  "40","41","42","43","44","46")]
##NOTE: Only choose Group2, Group4, Group6, Group8
#> ps.ng.tax_sel
#otu_table()   OTU Table:         [ 37465 taxa and 29 samples ]
#sample_data() Sample Data:       [ 29 samples by 10 sample variables ]
#tax_table()   Taxonomy Table:    [ 37465 taxa by 7 taxonomic ranks ]
#phy_tree()    Phylogenetic Tree: [ 37465 tips and 37461 internal nodes ]
#-Group4: "21","22","23","24","25","26","27","28",
#-Group8: ,  "47","48","49","50","52","53","55"
otu_table(ps.ng.tax_sel) <- otu_table(ps.ng.tax_abund)[,c("sample-C3","sample-C4","sample-C5","sample-C6","sample-C7",  "sample-E4","sample-E5","sample-E6","sample-E7","sample-E8")]
mpse_abund <- ps.ng.tax_sel %>% as.MPSE()
# A MPSE-tibble (MPSE object) abstraction: 2,352 × 20
# NOTE mpse_abund contains 20 variables: OTU, Sample, Abundance, BarcodeSequence, LinkerPrimerSequence, FileInput, Group,
#   Sex_age 
, pre_post_stroke , Conc , Vol_50ng , Vol_PCR , Description , # Domain , Phylum , Class , Order , Family , Genus , Species # ———————————– # —- 3.2. alpha diversity analysis # Rarefied species richness + RareAbundance mpse_abund %% mp_rrarefy() # ‘chunks’ represent the split number of each sample to calculate alpha # diversity, default is 400. e.g. If a sample has total 40000 # reads, if chunks is 400, it will be split to 100 sub-samples # (100, 200, 300,…, 40000), then alpha diversity index was # calculated based on the sub-samples. # ‘.abundance’ the column name of abundance, if the ‘.abundance’ is not be # rarefied calculate rarecurve, user can specific ‘force=TRUE’. mpse_abund %% mp_cal_rarecurve( .abundance = RareAbundance, chunks = 400 ) # The RareAbundanceRarecurve column will be added the colData slot # automatically (default action=”add”) #NOTE mpse_abund contains 22 varibles = 20 varibles + RareAbundance + RareAbundanceRarecurve # default will display the confidence interval around smooth. # se=TRUE # NOTE that two colors #c(“#00A087FF”, “#3C5488FF”) for .group = pre_post_stroke; four colors c(“#1f78b4”, “#33a02c”, “#e31a1c”, “#6a3d9a”) for .group = Group; p1 % mp_plot_rarecurve( .rare = RareAbundanceRarecurve, .alpha = Observe, ) p2 % mp_plot_rarecurve( .rare = RareAbundanceRarecurve, .alpha = Observe, .group = Group ) + scale_color_manual(values=c(“#1f78b4”, “#e31a1c”)) + scale_fill_manual(values=c(“#1f78b4”, “#e31a1c”), guide=”none”) # combine the samples belong to the same groups if plot.group=TRUE p3 % mp_plot_rarecurve( .rare = RareAbundanceRarecurve, .alpha = “Observe”, .group = Group, plot.group = TRUE ) + scale_color_manual(values=c(“#1f78b4”, “#e31a1c”)) + scale_fill_manual(values=c(“#1f78b4”, “#e31a1c”),guide=”none”) png(“rarefaction_of_samples_or_groups.png”, width=1080, height=600) p1 + p2 + p3 dev.off() # —————————————— # 3.3. calculate alpha index and visualization library(ggplot2) library(MicrobiotaProcess) mpse_abund %% mp_cal_alpha(.abundance=RareAbundance) mpse_abund #NOTE mpse_abund contains 28 varibles = 22 varibles + Observe , Chao1 , ACE , Shannon , Simpson , Pielou f1 % mp_plot_alpha( .group=Group, .alpha=c(Observe, Chao1, ACE, Shannon, Simpson, Pielou) ) + scale_fill_manual(values=c(“#1f78b4”, “#e31a1c”), guide=”none”) + scale_color_manual(values=c(“#1f78b4”, “#e31a1c”), guide=”none”) f2 % mp_plot_alpha( .alpha=c(Observe, Chao1, ACE, Shannon, Simpson, Pielou) ) #ps.ng.tax_sel contais only pre samples –> f1 cannot be generated! png(“alpha_diversity_comparison.png”, width=1400, height=600) f1 / f2 dev.off() # ——————————————- # 3.4. The visualization of taxonomy abundance (Class) mpse_abund %% mp_cal_abundance( # for each samples .abundance = RareAbundance ) %>% mp_cal_abundance( # for each groups .abundance=RareAbundance, .group=Group ) mpse_abund #NOTE mpse_abund contains 29 varibles = 28 varibles + RelRareAbundanceBySample # visualize the relative abundance of top 20 phyla for each sample. # .group=time, p1 % mp_plot_abundance( .abundance=RareAbundance, taxa.class = Class, topn = 20, relative = TRUE ) # visualize the abundance (rarefied) of top 20 phyla for each sample. # .group=time, p2 % mp_plot_abundance( .abundance=RareAbundance, taxa.class = Class, topn = 20, relative = FALSE ) png(“relative_abundance_and_abundance.png”, width= 1200, height=600) #NOT PRODUCED! p1 / p2 dev.off() #—- h1 % mp_plot_abundance( .abundance = RareAbundance, .group = Group, taxa.class = Class, relative = TRUE, topn = 20, geom = ‘heatmap’, features.dist = ‘euclidean’, features.hclust = ‘average’, sample.dist = ‘bray’, sample.hclust = ‘average’ ) h2 % mp_plot_abundance( .abundance = RareAbundance, .group = Group, taxa.class = Class, relative = FALSE, topn = 20, geom = ‘heatmap’, features.dist = ‘euclidean’, features.hclust = ‘average’, sample.dist = ‘bray’, sample.hclust = ‘average’ ) # the character (scale or theme) of figure can be adjusted by set_scale_theme # refer to the mp_plot_dist png(“relative_abundance_and_abundance_heatmap.png”, width= 1200, height=600) aplot::plot_list(gglist=list(h1, h2), tag_levels=”A”) dev.off() # visualize the relative abundance of top 20 class for each .group (Group) p3 % mp_plot_abundance( .abundance=RareAbundance, .group=Group, taxa.class = Class, topn = 20, plot.group = TRUE ) # visualize the abundance of top 20 phyla for each .group (time) p4 % mp_plot_abundance( .abundance=RareAbundance, .group= Group, taxa.class = Class, topn = 20, relative = FALSE, plot.group = TRUE ) png(“relative_abundance_and_abundance_groups.png”, width= 1000, height=1000) p3 / p4 dev.off() # ————————— # 3.5. Beta diversity analysis # ——————————————— # 3.5.1 The distance between samples or groups # standardization # mp_decostand wraps the decostand of vegan, which provides # many standardization methods for community ecology. # default is hellinger, then the abundance processed will # be stored to the assays slot. mpse_abund %% mp_decostand(.abundance=Abundance) mpse_abund #NOTE mpse_abund contains 30 varibles = 29 varibles + hellinger # calculate the distance between the samples. # the distance will be generated a nested tibble and added to the # colData slot. mpse_abund %% mp_cal_dist(.abundance=hellinger, distmethod=”bray”) mpse_abund #NOTE mpse_abund contains 31 varibles = 30 varibles + bray # mp_plot_dist provides there methods to visualize the distance between the samples or groups # when .group is not provided, the dot heatmap plot will be return p1 % mp_plot_dist(.distmethod = bray) png(“distance_between_samples.png”, width= 1000, height=1000) p1 dev.off() # when .group is provided, the dot heatmap plot with group information will be return. p2 % mp_plot_dist(.distmethod = bray, .group = Group) # The scale or theme of dot heatmap plot can be adjusted using set_scale_theme function. p2 %>% set_scale_theme( x = scale_fill_manual( values=c(“#1f78b4”, “#e31a1c”), #c(“orange”, “deepskyblue”), guide = guide_legend( keywidth = 1, keyheight = 0.5, title.theme = element_text(size=8), label.theme = element_text(size=6) ) ), aes_var = Group # specific the name of variable ) %>% set_scale_theme( x = scale_color_gradient( guide = guide_legend(keywidth = 0.5, keyheight = 0.5) ), aes_var = bray ) %>% set_scale_theme( x = scale_size_continuous( range = c(0.1, 3), guide = guide_legend(keywidth = 0.5, keyheight = 0.5) ), aes_var = bray ) png(“distance_between_samples_with_group_info.png”, width= 1000, height=1000) p2 dev.off() # when .group is provided and group.test is TRUE, the comparison of different groups will be returned # Assuming p3 is a ggplot object after mp_plot_dist call p3 % mp_plot_dist(.distmethod = bray, .group = Group, group.test = TRUE, textsize = 6) + theme( axis.title.x = element_text(size = 14), # Customize x-axis label face = “bold” axis.title.y = element_text(size = 14), # Customize y-axis label axis.text.x = element_text(size = 14), # Customize x-axis ticks axis.text.y = element_text(size = 14) # Customize y-axis ticks ) # Save the plot with the new theme settings png(“Comparison_of_Bray_Distances_Group3-4.png”, width = 1000, height = 1000) print(p3) # Ensure that p3 is explicitly printed in the device dev.off() # Extract Bray-Curtis Distance Values and save them in a Excel-table. library(dplyr) library(tidyr) library(openxlsx) # Define the sample numbers vector sample_numbers <- c("sample-C3","sample-C4","sample-C5","sample-C6","sample-C7", "sample-E4","sample-E5","sample-E6","sample-E7","sample-E8") # Consolidate the list of tibbles using the actual sample numbers bray_data <- bind_rows( lapply(seq_along(mpse_abund$bray), function(i) { tibble( Sample1 = sample_numbers[i], # Use actual sample number Sample2 = mpse_abund$bray[[i]]$braySampley, BrayDistance = mpse_abund$bray[[i]]$bray ) }), .id = "PairID" ) # Print the data frame to check the output print(bray_data) # Write the data frame to an Excel file write.xlsx(bray_data, file = "Bray_Curtis_Distances.xlsx") #DELETE the column "PairID" in Excel file # ———————– # 3.5.2 The PCoA analysis #install.packages("corrr") library(corrr) #install.packages("ggside") library(ggside) mpse_abund %% mp_cal_pcoa(.abundance=hellinger, distmethod=”bray”) # The dimensions of ordination analysis will be added the colData slot (default). mpse_abund mpse_abund %>% print(width=380, n=2) #NOTE mpse_abund contains 34 varibles = 31 varibles + `PCo1 (30.16%)` , `PCo2 (15.75%)` , `PCo3 (10.53%)` #BUG why 36 variables in mpse_abund %>% print(width=380, n=1) [RareAbundanceBySample , RareAbundanceByGroup ] #> methods(class=class(mpse_abund)) # [1] [ [[<- [<- # [4] $ $<- arrange # [7] as_tibble as.data.frame as.phyloseq #[10] coerce coerce<- colData<- #[13] distinct filter group_by #[16] left_join mp_adonis mp_aggregate_clade #[19] mp_aggregate mp_anosim mp_balance_clade #[22] mp_cal_abundance mp_cal_alpha mp_cal_cca #[25] mp_cal_clust mp_cal_dca mp_cal_dist #[28] mp_cal_nmds mp_cal_pca mp_cal_pcoa #[31] mp_cal_pd_metric mp_cal_rarecurve mp_cal_rda #[34] mp_cal_upset mp_cal_venn mp_decostand #[37] mp_diff_analysis mp_diff_clade mp_envfit #[40] mp_extract_abundance mp_extract_assays mp_extract_dist #[43] mp_extract_feature mp_extract_internal_attr mp_extract_rarecurve #[46] mp_extract_refseq mp_extract_sample mp_extract_taxonomy #[49] mp_extract_tree mp_filter_taxa mp_mantel #[52] mp_mrpp mp_plot_abundance mp_plot_alpha #[55] mp_plot_diff_boxplot mp_plot_diff_res mp_plot_dist #[58] mp_plot_ord mp_plot_rarecurve mp_plot_upset #[61] mp_plot_venn mp_rrarefy mp_select_as_tip #[64] mp_stat_taxa mutate otutree #[67] otutree<- print pull #[70] refsequence refsequence<- rename #[73] rownames<- select show # [ reached getOption("max.print") — omitted 6 entries ] #see '?methods' for accessing help and source code # We also can perform adonis or anosim to check whether it is significant to the dissimilarities of groups. mpse_abund %% mp_adonis(.abundance=hellinger, .formula=~Group, distmethod=”bray”, permutations=9999, action=”add”) mpse_abund %>% mp_extract_internal_attr(name=adonis) #NOTE mpse_abund contains 34 varibles, no new variable, it has been saved in mpse_abund and can be extracted with “mpse_abund %>% mp_extract_internal_attr(name=’adonis’)” #The result of adonis has been saved to the internal attribute ! #It can be extracted using this-object %>% mp_extract_internal_attr(name=’adonis’) #The object contained internal attribute: PCoA ADONIS #Permutation test for adonis under reduced model #Terms added sequentially (first to last) #Permutation: free #Number of permutations: 9999 # #vegan::adonis2(formula = .formula, data = sampleda, permutations = permutations, method = distmethod) # Df SumOfSqs R2 F Pr(>F) #Group 1 0.23448 0.22659 3.5158 5e-04 *** #Residual 12 0.80032 0.77341 #Total 13 1.03480 1.00000 #— #Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1 # (“1″,”2″,”5″,”6″,”7”, “15”,”16″,”17″,”18″,”19″,”20″, “29”,”30″,”31″,”32″, “40”,”41″,”42″,”43″,”44″,”46″) #div.df2[div.df2 == “Group1”] <- "aged.post" #div.df2[div.df2 == "Group3"] <- "young.post" #div.df2[div.df2 == "Group5"] <- "aged.post" #div.df2[div.df2 == "Group7"] <- "young.post" # ("8","9","10","12","13","14", "21","22","23","24","25","26","27","28", "33","34","35","36","37","38","39","51", "47","48","49","50","52","53","55") #div.df2[div.df2 == "Group2"] <- "aged.pre" #div.df2[div.df2 == "Group4"] <- "young.pre" #div.df2[div.df2 == "Group6"] <- "aged.pre" #div.df2[div.df2 == "Group8"] <- "young.pre" #Group1: f.aged and post #Group2: f.aged and pre #Group3: f.young and post #Group4: f.young and pre #Group5: m.aged and post #Group6: m.aged and pre #Group7: m.young and post #Group8: m.young and pre #[,c("1","2","5","6","7", "8","9","10","12","13","14")] #[,c("15","16","17","18","19","20", "21","22","23","24","25","26","27","28")] #[,c("29","30","31","32", "33","34","35","36","37","38","39","51")] #[,c("40","41","42","43","44","46", "47","48","49","50","52","53","55")] #For the first set: #a6cee3: This is a light blue color, somewhat pastel and soft. #b2df8a: A soft, pale green, similar to a light lime. #fb9a99: A soft pink, slightly peachy or salmon-like. #cab2d6: A pale purple, reminiscent of lavender or a light mauve. #For the second set: #1f78b4: This is a strong, vivid blue, close to cobalt or a medium-dark blue. #33a02c: A medium forest green, vibrant and leafy. #e31a1c: A bright red, very vivid, similar to fire engine red. #6a3d9a: This would be described as a deep purple, akin to a dark lavender or plum. p1 % mp_plot_ord( .ord = pcoa, .group = Group, .color = Group, .size = 4, # increase point size! .alpha = 1, ellipse = TRUE, show.legend = FALSE ) + scale_fill_manual( values = c(“#1f78b4”, “#e31a1c”), guide = guide_legend( keywidth = 1.6, keyheight = 1.6, label.theme = element_text(size = 16) ) ) + scale_color_manual( values = c(“#1f78b4”, “#e31a1c”), guide = guide_legend( keywidth = 1.6, keyheight = 1.6, label.theme = element_text(size = 16) ) ) + theme( axis.text = element_text(size = 20), axis.title = element_text(size = 22), legend.text = element_text(size = 20), legend.title = element_text(size = 22), plot.title = element_text(size = 24, face = “bold”), plot.subtitle = element_text(size = 20) ) png(“PCoA_Group3-4.png”, width = 1200, height = 1000) p1 dev.off() pdf(“PCoA_Group3-4.pdf”) p1 dev.off() p2 % mp_plot_ord( .ord = pcoa, .group = Group, .color = Group, .size = Shannon, .alpha = Observe, ellipse = TRUE, show.legend = FALSE ) + scale_fill_manual( values = c(“#1f78b4”, “#e31a1c”), guide = guide_legend( keywidth = 0.6, keyheight = 0.6, label.theme = element_text(size = 16) ) ) + scale_color_manual( values = c(“#1f78b4”, “#e31a1c”), guide = guide_legend( keywidth = 0.6, keyheight = 0.6, label.theme = element_text(size = 16) ) ) + scale_size_continuous( range = c(2, 6), # increase size range! guide = guide_legend( keywidth = 0.6, keyheight = 0.6, label.theme = element_text(size = 16) ) ) + theme( axis.text = element_text(size = 20), axis.title = element_text(size = 22), legend.text = element_text(size = 20), legend.title = element_text(size = 22), plot.title = element_text(size = 24, face = “bold”), plot.subtitle = element_text(size = 20) ) png(“PCoA2_Group3-4.png”, width = 1200, height = 1000) p2 dev.off() pdf(“PCoA2_Group3-4.pdf”) p2 dev.off() # Extract sample names and add ShortLabel to colData colData(mpse_abund)$ShortLabel <- gsub("sample-", "", mpse_abund@colData@rownames) p3 % mp_plot_ord( .ord = pcoa, .group = Group, .color = Group, .size = Shannon, .alpha = Observe, ellipse = TRUE, show.legend = FALSE ) + geom_text_repel(aes(label = ShortLabel), size = 5, max.overlaps = 100) + scale_fill_manual( values = c(“#1f78b4”, “#e31a1c”), guide = guide_legend( keywidth = 0.6, keyheight = 0.6, label.theme = element_text(size = 16) ) ) + scale_color_manual( values = c(“#1f78b4”, “#e31a1c”), guide = guide_legend( keywidth = 0.6, keyheight = 0.6, label.theme = element_text(size = 16) ) ) + scale_size_continuous( range = c(2, 6), # increase size range! guide = guide_legend( keywidth = 0.6, keyheight = 0.6, label.theme = element_text(size = 16) ) ) + theme( axis.text = element_text(size = 20), axis.title = element_text(size = 22), legend.text = element_text(size = 20), legend.title = element_text(size = 22), plot.title = element_text(size = 24, face = “bold”), plot.subtitle = element_text(size = 20) ) png(“PCoA3_Group3-4.png”, width = 1200, height = 1000) p3 dev.off() pdf(“PCoA3_Group3-4.pdf”) p3 dev.off()

Phyloseq.Rmd processing Data_Karoline_16S_2025

author: ""
date: '`r format(Sys.time(), "%d %m %Y")`'
header-includes:
   - \usepackage{color, fancyvrb}
output:
  rmdformats::readthedown:
    highlight: kate
    number_sections : yes    
  pdf_document: 
    toc: yes
    toc_depth: 2
    number_sections : yes
---

```{r, echo=FALSE, warning=FALSE}
## Global options
# TODO: reproduce the html with the additional figure/SVN-files for editing.
# IMPORTANT NOTE: needs before "mkdir figures"
#NEEDs to be often close R and start R, then new rendering --> working!
#rmarkdown::render('Phyloseq.Rmd',output_file='Phyloseq.html')
#install.packages("heatmaply")
#install.packages("gplots")
#BiocManager::install("phyloseq")
#library(phyloseq)
#DEBUG a package conflict: using phyloseq::tax_table() or detach(package:MicrobiotaProcess, unload=TRUE)
```

```{r load-packages, include=FALSE}

#install.packages(c("picante", "rmdformats"))
#mamba install -c conda-forge freetype libpng harfbuzz fribidi
#mamba install -c conda-forge r-systemfonts r-svglite r-kableExtra freetype fontconfig harfbuzz fribidi libpng
library(knitr)
library(rmdformats)
library(readxl)
library(dplyr)
library(kableExtra)
library(openxlsx)
library(DESeq2)

options(max.print="75")
knitr::opts_chunk$set(fig.width=8, 
                      fig.height=6, 
                      eval=TRUE, 
                      cache=TRUE,
                      echo=TRUE,
                      prompt=FALSE,
                      tidy=FALSE,
                      comment=NA,
                      message=FALSE,
                      warning=FALSE)
opts_knit$set(width=85)
# Phyloseq R library
#* Phyloseq web site : https://joey711.github.io/phyloseq/index.html
#* See in particular tutorials for
#    - importing data: https://joey711.github.io/phyloseq/import-data.html
#    - heat maps: https://joey711.github.io/phyloseq/plot_heatmap-examples.html
```

# Data  

Import raw data and assign sample key:

```{r, echo=FALSE, warning=FALSE}
#extend qiime2_metadata_for_qza_to_phyloseq.tsv with Diet and Flora
#setwd("~/DATA/Data_Laura_16S_2/core_diversity_e4753")
#map_corrected <- read.csv("qiime2_metadata_for_qza_to_phyloseq.tsv", sep="\t", row.names=1)
#knitr::kable(map_corrected) %>% kable_styling(bootstrap_options = c("striped", "hover", "condensed", "responsive"))
```

# Prerequisites to be installed

* R : https://pbil.univ-lyon1.fr/CRAN/
* R studio : https://www.rstudio.com/products/rstudio/download/#download

```R
install.packages("dplyr")     # To manipulate dataframes
install.packages("readxl")    # To read Excel files into R
install.packages("ggplot2")   # for high quality graphics
install.packages("heatmaply")
source("https://bioconductor.org/biocLite.R")
biocLite("phyloseq")
```

```{r libraries, echo=TRUE, message=FALSE}
#mamba install -c conda-forge r-ggplot2 r-vegan r-data.table
#BiocManager::install("microbiome")
#install.packages("ggpubr")
#install.packages("heatmaply")
library("readxl") # necessary to import the data from Excel file
library("ggplot2") # graphics
library("picante")
library("microbiome") # data analysis and visualisation
library("phyloseq") # also the basis of data object. Data analysis and visualisation
library("ggpubr") # publication quality figures, based on ggplot2
library("dplyr") # data handling, filter and reformat data frames
library("RColorBrewer") # nice color options
library("heatmaply")
library(vegan)
library(gplots)
```

# Read the data and create phyloseq objects

Three tables are needed

* OTU
* Taxonomy
* Samples

```{r, echo=FALSE, warning=FALSE}

    library(tidyr)

    # For QIIME1
    #ps.ng.tax <- import_biom("./exported_table/feature-table.biom", "./exported-tree/tree.nwk")

    # For QIIME2
    #install.packages("remotes")
    #remotes::install_github("jbisanz/qiime2R")
    #"core_metrics_results/rarefied_table.qza", rarefying performed in the code, therefore import the raw table.
    library(qiime2R)
    ps.ng.tax <- qza_to_phyloseq(
      features =  "dada2_tests2/test_7_f240_r240/table.qza",
      tree = "rooted-tree.qza",
      metadata = "qiime2_metadata_for_qza_to_phyloseq.tsv"
    )
    # or
    #biom convert \
    #      -i ./exported_table/feature-table.biom \
    #      -o ./exported_table/feature-table-v1.biom \
    #      --to-json
    #ps.ng.tax <- import_biom("./exported_table/feature-table-v1.biom", treefilename="./exported-tree/tree.nwk")

    sample <- read.csv("./qiime2_metadata_for_qza_to_phyloseq.tsv", sep="\t", row.names=1)
    SAM = sample_data(sample, errorIfNULL = T)
    #rownames(SAM) <- c("1","2","3","5","6","7","8","9","10","12","13","14","15","16","17","18","19","20","21","22","23","24","25","26","27","28","29","30","31","32","33","34","35","36","37","38","39","40","41","42","43","44","46","47","48","49","50","51","52","53","55")

    #> setdiff(rownames(SAM), sample_names(ps.ng.tax))
    #[1] "sample-L9" should be removed since the low reads

    ps.ng.tax <- merge_phyloseq(ps.ng.tax, SAM)
    print(ps.ng.tax)

    taxonomy <- read.delim("exported-taxonomy/taxonomy.tsv", sep="\t", header=TRUE)
    #head(taxonomy)
    # Separate taxonomy string into separate ranks
    taxonomy_df <- taxonomy %>% separate(Taxon, into = c("Domain","Phylum","Class","Order","Family","Genus","Species"), sep = ";", fill = "right", extra = "drop")
    # Use Feature.ID as rownames
    rownames(taxonomy_df) <- taxonomy_df$Feature.ID
    taxonomy_df <- taxonomy_df[, -c(1, ncol(taxonomy_df))]  # Drop Feature.ID and Confidence
    # Create tax_table
    tax_table_final <- phyloseq::tax_table(as.matrix(taxonomy_df))
    # Merge tax_table with existing phyloseq object
    ps.ng.tax <- merge_phyloseq(ps.ng.tax, tax_table_final)
    # Check
    ps.ng.tax

    #colnames(phyloseq::tax_table(ps.ng.tax)) <- c("Domain","Phylum","Class","Order","Family","Genus","Species")
    saveRDS(ps.ng.tax, "./ps.ng.tax.rds")
```

Visualize data
```{r, echo=TRUE, warning=FALSE}
  sample_names(ps.ng.tax)
  rank_names(ps.ng.tax)
  sample_variables(ps.ng.tax)

  # Define sample names once
  samples <- c(
    "sample-A1","sample-A2","sample-A3","sample-A4","sample-A5","sample-A6","sample-A7","sample-A8","sample-A9","sample-A10","sample-A11",
    "sample-B1","sample-B2","sample-B3","sample-B4","sample-B5","sample-B6","sample-B7","sample-B8","sample-B9","sample-B10","sample-B11","sample-B12","sample-B13","sample-B14","sample-B15","sample-B16",
    "sample-C1","sample-C2","sample-C3","sample-C4","sample-C5","sample-C6","sample-C7","sample-C8","sample-C9","sample-C10",
    "sample-E1","sample-E2","sample-E3","sample-E4","sample-E5","sample-E6","sample-E7","sample-E8","sample-E9","sample-E10",
    "sample-F1","sample-F2","sample-F3","sample-F4","sample-F5",
    "sample-G1","sample-G2","sample-G3","sample-G4","sample-G5","sample-G6",
    "sample-H1","sample-H2","sample-H3","sample-H4","sample-H5","sample-H6",
    "sample-I1","sample-I2","sample-I3","sample-I4","sample-I5","sample-I6",
    "sample-J1","sample-J2","sample-J3","sample-J4","sample-J10","sample-J11",  #RESIZED: "sample-J5","sample-J6","sample-J7","sample-J8","sample-J9",
    "sample-K7","sample-K8","sample-K9","sample-K10",  #RESIZED: "sample-K1","sample-K2","sample-K3","sample-K4","sample-K5","sample-K6",  "sample-K11","sample-K12","sample-K13","sample-K14","sample-K15",
    "sample-L1","sample-L7","sample-L8","sample-L10",  #RESIZED: "sample-L2","sample-L3","sample-L4","sample-L5","sample-L6",  "sample-L11","sample-L12","sample-L13","sample-L14","sample-L15",
    "sample-M1","sample-M2","sample-M3","sample-M4","sample-M5","sample-M6","sample-M7","sample-M8",
    "sample-N1","sample-N2","sample-N3","sample-N4","sample-N5","sample-N6","sample-N7","sample-N8","sample-N9","sample-N10",
    "sample-O1","sample-O2","sample-O3","sample-O4","sample-O5","sample-O6","sample-O7","sample-O8"
  )
  ps.ng.tax <- prune_samples(samples, ps.ng.tax)

  sample_names(ps.ng.tax)
  rank_names(ps.ng.tax)
  sample_variables(ps.ng.tax)
```

Normalize number of reads in each sample using median sequencing depth.
```{r, echo=TRUE, warning=FALSE}
# RAREFACTION
set.seed(9242)  # This will help in reproducing the filtering and nomalisation.
ps.ng.tax <- rarefy_even_depth(ps.ng.tax, sample.size = 6389)
total <- 6389

# NORMALIZE number of reads in each sample using median sequencing depth.
total = median(sample_sums(ps.ng.tax))
#> total
#[1] 42369
standf = function(x, t=total) round(t * (x / sum(x)))
ps.ng.tax = transform_sample_counts(ps.ng.tax, standf)
ps.ng.tax_rel <- microbiome::transform(ps.ng.tax, "compositional") 

saveRDS(ps.ng.tax, "./ps.ng.tax.rds")
hmp.meta <- meta(ps.ng.tax)
hmp.meta$sam_name <- rownames(hmp.meta)
```

# Prepare ps.ng.tax_rel, ps.ng.tax_abund, ps.ng.tax_abund_rel from ps.ng.tax

```{r, echo=FALSE, warning=FALSE}
#MOVE_FROM_ABOVE: The number of reads used for normalization is **`r sprintf("%.0f", total)`**. 
#A basic heatmap using the default parameters.
#  plot_heatmap(ps.ng.tax, method = "NMDS", distance = "bray")
#NOTE that giving the correct OTU numbers in the text (1%, 0.5%, ...)!!!
```

For the heatmaps, we focus on the most abundant OTUs by first converting counts to relative abundances within each sample. We then filter to retain only OTUs whose mean relative abundance across all samples exceeds 0.1% (0.001). We are left with 199 OTUs which makes the reading much more easy.

```{r, echo=FALSE, warning=FALSE}

# Custom function to plot a heatmap with the specified sample order
#plot_heatmap_custom <- function(ps, sample_order, method = "NMDS", distance = "bray") {

# --Filtering strategy 1: This filters taxa based on raw counts (ps.ng.tax). For each taxon (across samples), it checks if it has a count that exceeds 1% of the total in at least one sample.     Description: We consider the most abundant OTUs for heatmaps. For example one can only take OTUs that represent at least 1% of reads in at least one sample. Remember we normalized all the sampples to median number of reads (total).  We are left with only 382 OTUS which makes the reading much more easy.
#ps.ng.tax_abund <- phyloseq::filter_taxa(ps.ng.tax, function(x) sum(x > total*0.01) > 0, TRUE)

# --Filtering strategy 2: This filters taxa based on relative abundances (ps.ng.tax_rel). It keeps only those taxa whose mean relative abundance across samples exceeds 0.1%.
# 1) Convert to relative abundances
ps.ng.tax_rel <- transform_sample_counts(ps.ng.tax, function(x) x / sum(x))

# 2) Get the logical vector of which OTUs to keep (based on relative abundance)
keep_vector <- phyloseq::filter_taxa(
  ps.ng.tax_rel,
  function(x) mean(x) > 0.001,
  prune = FALSE
)

# 3) Use the TRUE/FALSE vector to subset absolute abundance data
ps.ng.tax_abund <- prune_taxa(names(keep_vector)[keep_vector], ps.ng.tax)

# 4) Normalize the final subset to relative abundances per sample
ps.ng.tax_abund_rel <- transform_sample_counts(
  ps.ng.tax_abund,
  function(x) x / sum(x)
)
```

# Heatmaps

```{r, echo=FALSE, warning=FALSE}
datamat_ = as.data.frame(otu_table(ps.ng.tax_abund))

#datamat <- datamat_[c("1","2","5","6","7",  "8","9","10","12","13","14",    "15","16","17","18","19","20",  "21","22","23","24","25","26","27","28",    "29","30","31","32",  "33","34","35","36","37","38","39","51",    "40","41","42","43","44","46",  "47","48","49","50","52","53","55")]
datamat <- datamat_[c(
    "sample-A1","sample-A2","sample-A3","sample-A4","sample-A5","sample-A6","sample-A7","sample-A8","sample-A9","sample-A10","sample-A11",
    "sample-B1","sample-B2","sample-B3","sample-B4","sample-B5","sample-B6","sample-B7","sample-B8","sample-B9","sample-B10","sample-B11","sample-B12","sample-B13","sample-B14","sample-B15","sample-B16",
    "sample-C1","sample-C2","sample-C3","sample-C4","sample-C5","sample-C6","sample-C7","sample-C8","sample-C9","sample-C10",
    "sample-E1","sample-E2","sample-E3","sample-E4","sample-E5","sample-E6","sample-E7","sample-E8","sample-E9","sample-E10",
    "sample-F1","sample-F2","sample-F3","sample-F4","sample-F5",
    "sample-G1","sample-G2","sample-G3","sample-G4","sample-G5","sample-G6",
    "sample-H1","sample-H2","sample-H3","sample-H4","sample-H5","sample-H6",
    "sample-I1","sample-I2","sample-I3","sample-I4","sample-I5","sample-I6",
    "sample-J1","sample-J2","sample-J3","sample-J4","sample-J10","sample-J11",  #RESIZED: "sample-J5","sample-J6","sample-J7","sample-J8","sample-J9",
    "sample-K7","sample-K8","sample-K9","sample-K10",  #RESIZED: "sample-K1","sample-K2","sample-K3","sample-K4","sample-K5","sample-K6",  "sample-K11","sample-K12","sample-K13","sample-K14","sample-K15",
    "sample-L1","sample-L7","sample-L8","sample-L10",  #RESIZED: "sample-L2","sample-L3","sample-L4","sample-L5","sample-L6",  "sample-L11","sample-L12","sample-L13","sample-L14","sample-L15",
    "sample-M1","sample-M2","sample-M3","sample-M4","sample-M5","sample-M6","sample-M7","sample-M8",
    "sample-N1","sample-N2","sample-N3","sample-N4","sample-N5","sample-N6","sample-N7","sample-N8","sample-N9","sample-N10",
    "sample-O1","sample-O2","sample-O3","sample-O4","sample-O5","sample-O6","sample-O7","sample-O8"
  )]

hr <- hclust(as.dist(1-cor(t(datamat), method="pearson")), method="complete")
hc <- hclust(as.dist(1-cor(datamat, method="spearman")), method="complete")
mycl = cutree(hr, h=max(hr$height)/1.08)
mycol = c("YELLOW", "DARKBLUE", "DARKORANGE", "DARKMAGENTA", "DARKCYAN", "DARKRED",  "MAROON", "DARKGREEN", "LIGHTBLUE", "PINK", "MAGENTA", "LIGHTCYAN","LIGHTGREEN", "BLUE", "ORANGE", "CYAN", "RED", "GREEN");

mycol = mycol[as.vector(mycl)]
sampleCols <- rep('GREY',ncol(datamat))
#names(sampleCols) <- c("Group1", "Group1", "Group1", "Group1", "Group1",   "Group2", "Group2",   "Group3", "Group3", "Group3",  ...)

# Define 14 colors
my_colors <- c("#a6cee3", "#1f78b4", "#b2df8a", "#33a02c",
                "#fb9a99", "#e31a1c", "#fdbf6f", "#ff7f00",
                "#cab2d6", "#6a3d9a", "#ffff99", "#b15928",
                "#8dd3c7", "#fb8072")
# Example column names
colnames(datamat) <- c(
    "sample-A1","sample-A2","sample-A3","sample-A4","sample-A5","sample-A6","sample-A7","sample-A8","sample-A9","sample-A10","sample-A11",
    "sample-B1","sample-B2","sample-B3","sample-B4","sample-B5","sample-B6","sample-B7","sample-B8","sample-B9","sample-B10","sample-B11","sample-B12","sample-B13","sample-B14","sample-B15","sample-B16",
    "sample-C1","sample-C2","sample-C3","sample-C4","sample-C5","sample-C6","sample-C7","sample-C8","sample-C9","sample-C10",
    "sample-E1","sample-E2","sample-E3","sample-E4","sample-E5","sample-E6","sample-E7","sample-E8","sample-E9","sample-E10",
    "sample-F1","sample-F2","sample-F3","sample-F4","sample-F5",
    "sample-G1","sample-G2","sample-G3","sample-G4","sample-G5","sample-G6",
    "sample-H1","sample-H2","sample-H3","sample-H4","sample-H5","sample-H6",
    "sample-I1","sample-I2","sample-I3","sample-I4","sample-I5","sample-I6",
    "sample-J1","sample-J2","sample-J3","sample-J4","sample-J10","sample-J11",  #RESIZED: "sample-J5","sample-J6","sample-J7","sample-J8","sample-J9",
    "sample-K7","sample-K8","sample-K9","sample-K10",  #RESIZED: "sample-K1","sample-K2","sample-K3","sample-K4","sample-K5","sample-K6",  "sample-K11","sample-K12","sample-K13","sample-K14","sample-K15",
    "sample-L1","sample-L7","sample-L8","sample-L10",  #RESIZED: "sample-L2","sample-L3","sample-L4","sample-L5","sample-L6",  "sample-L11","sample-L12","sample-L13","sample-L14","sample-L15",
    "sample-M1","sample-M2","sample-M3","sample-M4","sample-M5","sample-M6","sample-M7","sample-M8",
    "sample-N1","sample-N2","sample-N3","sample-N4","sample-N5","sample-N6","sample-N7","sample-N8","sample-N9","sample-N10",
    "sample-O1","sample-O2","sample-O3","sample-O4","sample-O5","sample-O6","sample-O7","sample-O8"
  )
# (replace with your actual column names)

# Remove "sample-" prefix for easier matching
sample_names <- sub("^sample-", "", colnames(datamat))

# Create a function to match sample IDs to groups
assign_group <- function(sample_id) {
  # First letter indicates group
  prefix <- substr(sample_id, 1, 1)
  switch(prefix,
         "A" = 1,
         "B" = 2,
         "C" = 3,
         "E" = 4,
         "F" = 5,
         "G" = 6,
         "H" = 7,
         "I" = 8,
         "J" = 9,
         "K" = 10,
         "L" = 11,
         "M" = 12,
         "N" = 13,
         "O" = 14,
         NA)
}
# Assign group numbers to samples
group_numbers <- sapply(sample_names, assign_group)
# Assign colors based on group numbers
sampleCols <- my_colors[group_numbers]
# Check results
print(sampleCols)
#'#a6cee3', '#1f78b4', '#b2df8a', '#33a02c', '#fb9a99', '#e31a1c', '#cab2d6', '#6a3d9a'

#bluered(75)
#color_pattern <- colorRampPalette(c("blue", "white", "red"))(100)
library(RColorBrewer)
custom_palette <- colorRampPalette(brewer.pal(9, "Blues"))
heatmap_colors <- custom_palette(100)
#colors <- heatmap_color_default(100)
png("figures/heatmap.png", width=1200, height=2400)
#par(mar=c(2, 2, 2, 2))  , lwid=1    lhei=c(0.7, 10)) # Adjust height of color keys   keysize=0.3,
heatmap.2(as.matrix(datamat),Rowv=as.dendrogram(hr),Colv = NA, dendrogram = 'row',
            scale='row',trace='none',col=heatmap_colors, cexRow=1.2, cexCol=1.5,
            RowSideColors = mycol, ColSideColors = sampleCols, srtCol=15, labRow=row.names(datamat), key=TRUE, margins=c(10, 15), lhei=c(0.7, 15), lwid=c(1,8))
dev.off()
```
```{r, echo=TRUE, warning=FALSE, fig.cap="Heatmap", out.width = '100%', fig.align= "center"}
knitr::include_graphics("./figures/heatmap.png")
```

\pagebreak

```{r, echo=FALSE, warning=FALSE}
  library(stringr)
#FITTING1: 
#for id in 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100  101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199; do
#for id in 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300; do
#for id in 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382; do
#  echo "phyloseq::tax_table(ps.ng.tax_abund_rel)[${id},\"Domain\"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[${id},\"Domain\"], \"__\")[[1]][2]"
#  echo "phyloseq::tax_table(ps.ng.tax_abund_rel)[${id},\"Phylum\"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[${id},\"Phylum\"], \"__\")[[1]][2]"
#  echo "phyloseq::tax_table(ps.ng.tax_abund_rel)[${id},\"Class\"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[${id},\"Class\"], \"__\")[[1]][2]"
#  echo "phyloseq::tax_table(ps.ng.tax_abund_rel)[${id},\"Order\"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[${id},\"Order\"], \"__\")[[1]][2]"
#  echo "phyloseq::tax_table(ps.ng.tax_abund_rel)[${id},\"Family\"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[${id},\"Family\"], \"__\")[[1]][2]"
#  echo "phyloseq::tax_table(ps.ng.tax_abund_rel)[${id},\"Genus\"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[${id},\"Genus\"], \"__\")[[1]][2]"
#  echo "phyloseq::tax_table(ps.ng.tax_abund_rel)[${id},\"Species\"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[${id},\"Species\"], \"__\")[[1]][2]"
#done

phyloseq::tax_table(ps.ng.tax_abund_rel)[1,"Domain"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[1,"Domain"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[1,"Phylum"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[1,"Phylum"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[1,"Class"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[1,"Class"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[1,"Order"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[1,"Order"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[1,"Family"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[1,"Family"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[1,"Genus"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[1,"Genus"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[1,"Species"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[1,"Species"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[2,"Domain"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[2,"Domain"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[2,"Phylum"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[2,"Phylum"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[2,"Class"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[2,"Class"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[2,"Order"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[2,"Order"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[2,"Family"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[2,"Family"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[2,"Genus"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[2,"Genus"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[2,"Species"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[2,"Species"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[3,"Domain"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[3,"Domain"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[3,"Phylum"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[3,"Phylum"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[3,"Class"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[3,"Class"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[3,"Order"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[3,"Order"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[3,"Family"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[3,"Family"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[3,"Genus"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[3,"Genus"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[3,"Species"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[3,"Species"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[4,"Domain"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[4,"Domain"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[4,"Phylum"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[4,"Phylum"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[4,"Class"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[4,"Class"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[4,"Order"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[4,"Order"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[4,"Family"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[4,"Family"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[4,"Genus"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[4,"Genus"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[4,"Species"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[4,"Species"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[5,"Domain"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[5,"Domain"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[5,"Phylum"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[5,"Phylum"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[5,"Class"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[5,"Class"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[5,"Order"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[5,"Order"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[5,"Family"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[5,"Family"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[5,"Genus"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[5,"Genus"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[5,"Species"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[5,"Species"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[6,"Domain"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[6,"Domain"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[6,"Phylum"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[6,"Phylum"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[6,"Class"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[6,"Class"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[6,"Order"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[6,"Order"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[6,"Family"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[6,"Family"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[6,"Genus"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[6,"Genus"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[6,"Species"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[6,"Species"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[7,"Domain"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[7,"Domain"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[7,"Phylum"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[7,"Phylum"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[7,"Class"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[7,"Class"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[7,"Order"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[7,"Order"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[7,"Family"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[7,"Family"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[7,"Genus"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[7,"Genus"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[7,"Species"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[7,"Species"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[8,"Domain"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[8,"Domain"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[8,"Phylum"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[8,"Phylum"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[8,"Class"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[8,"Class"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[8,"Order"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[8,"Order"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[8,"Family"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[8,"Family"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[8,"Genus"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[8,"Genus"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[8,"Species"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[8,"Species"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[9,"Domain"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[9,"Domain"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[9,"Phylum"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[9,"Phylum"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[9,"Class"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[9,"Class"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[9,"Order"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[9,"Order"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[9,"Family"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[9,"Family"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[9,"Genus"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[9,"Genus"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[9,"Species"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[9,"Species"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[10,"Domain"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[10,"Domain"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[10,"Phylum"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[10,"Phylum"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[10,"Class"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[10,"Class"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[10,"Order"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[10,"Order"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[10,"Family"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[10,"Family"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[10,"Genus"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[10,"Genus"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[10,"Species"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[10,"Species"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[11,"Domain"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[11,"Domain"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[11,"Phylum"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[11,"Phylum"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[11,"Class"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[11,"Class"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[11,"Order"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[11,"Order"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[11,"Family"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[11,"Family"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[11,"Genus"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[11,"Genus"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[11,"Species"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[11,"Species"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[12,"Domain"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[12,"Domain"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[12,"Phylum"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[12,"Phylum"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[12,"Class"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[12,"Class"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[12,"Order"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[12,"Order"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[12,"Family"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[12,"Family"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[12,"Genus"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[12,"Genus"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[12,"Species"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[12,"Species"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[13,"Domain"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[13,"Domain"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[13,"Phylum"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[13,"Phylum"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[13,"Class"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[13,"Class"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[13,"Order"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[13,"Order"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[13,"Family"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[13,"Family"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[13,"Genus"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[13,"Genus"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[13,"Species"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[13,"Species"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[14,"Domain"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[14,"Domain"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[14,"Phylum"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[14,"Phylum"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[14,"Class"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[14,"Class"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[14,"Order"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[14,"Order"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[14,"Family"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[14,"Family"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[14,"Genus"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[14,"Genus"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[14,"Species"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[14,"Species"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[15,"Domain"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[15,"Domain"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[15,"Phylum"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[15,"Phylum"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[15,"Class"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[15,"Class"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[15,"Order"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[15,"Order"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[15,"Family"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[15,"Family"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[15,"Genus"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[15,"Genus"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[15,"Species"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[15,"Species"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[16,"Domain"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[16,"Domain"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[16,"Phylum"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[16,"Phylum"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[16,"Class"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[16,"Class"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[16,"Order"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[16,"Order"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[16,"Family"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[16,"Family"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[16,"Genus"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[16,"Genus"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[16,"Species"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[16,"Species"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[17,"Domain"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[17,"Domain"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[17,"Phylum"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[17,"Phylum"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[17,"Class"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[17,"Class"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[17,"Order"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[17,"Order"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[17,"Family"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[17,"Family"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[17,"Genus"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[17,"Genus"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[17,"Species"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[17,"Species"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[18,"Domain"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[18,"Domain"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[18,"Phylum"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[18,"Phylum"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[18,"Class"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[18,"Class"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[18,"Order"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[18,"Order"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[18,"Family"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[18,"Family"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[18,"Genus"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[18,"Genus"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[18,"Species"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[18,"Species"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[19,"Domain"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[19,"Domain"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[19,"Phylum"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[19,"Phylum"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[19,"Class"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[19,"Class"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[19,"Order"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[19,"Order"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[19,"Family"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[19,"Family"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[19,"Genus"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[19,"Genus"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[19,"Species"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[19,"Species"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[20,"Domain"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[20,"Domain"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[20,"Phylum"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[20,"Phylum"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[20,"Class"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[20,"Class"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[20,"Order"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[20,"Order"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[20,"Family"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[20,"Family"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[20,"Genus"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[20,"Genus"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[20,"Species"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[20,"Species"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[21,"Domain"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[21,"Domain"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[21,"Phylum"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[21,"Phylum"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[21,"Class"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[21,"Class"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[21,"Order"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[21,"Order"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[21,"Family"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[21,"Family"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[21,"Genus"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[21,"Genus"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[21,"Species"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[21,"Species"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[22,"Domain"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[22,"Domain"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[22,"Phylum"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[22,"Phylum"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[22,"Class"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[22,"Class"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[22,"Order"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[22,"Order"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[22,"Family"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[22,"Family"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[22,"Genus"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[22,"Genus"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[22,"Species"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[22,"Species"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[23,"Domain"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[23,"Domain"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[23,"Phylum"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[23,"Phylum"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[23,"Class"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[23,"Class"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[23,"Order"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[23,"Order"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[23,"Family"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[23,"Family"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[23,"Genus"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[23,"Genus"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[23,"Species"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[23,"Species"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[24,"Domain"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[24,"Domain"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[24,"Phylum"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[24,"Phylum"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[24,"Class"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[24,"Class"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[24,"Order"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[24,"Order"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[24,"Family"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[24,"Family"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[24,"Genus"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[24,"Genus"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[24,"Species"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[24,"Species"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[25,"Domain"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[25,"Domain"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[25,"Phylum"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[25,"Phylum"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[25,"Class"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[25,"Class"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[25,"Order"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[25,"Order"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[25,"Family"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[25,"Family"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[25,"Genus"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[25,"Genus"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[25,"Species"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[25,"Species"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[26,"Domain"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[26,"Domain"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[26,"Phylum"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[26,"Phylum"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[26,"Class"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[26,"Class"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[26,"Order"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[26,"Order"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[26,"Family"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[26,"Family"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[26,"Genus"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[26,"Genus"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[26,"Species"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[26,"Species"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[27,"Domain"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[27,"Domain"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[27,"Phylum"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[27,"Phylum"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[27,"Class"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[27,"Class"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[27,"Order"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[27,"Order"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[27,"Family"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[27,"Family"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[27,"Genus"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[27,"Genus"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[27,"Species"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[27,"Species"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[28,"Domain"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[28,"Domain"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[28,"Phylum"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[28,"Phylum"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[28,"Class"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[28,"Class"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[28,"Order"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[28,"Order"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[28,"Family"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[28,"Family"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[28,"Genus"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[28,"Genus"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[28,"Species"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[28,"Species"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[29,"Domain"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[29,"Domain"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[29,"Phylum"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[29,"Phylum"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[29,"Class"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[29,"Class"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[29,"Order"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[29,"Order"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[29,"Family"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[29,"Family"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[29,"Genus"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[29,"Genus"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[29,"Species"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[29,"Species"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[30,"Domain"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[30,"Domain"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[30,"Phylum"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[30,"Phylum"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[30,"Class"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[30,"Class"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[30,"Order"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[30,"Order"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[30,"Family"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[30,"Family"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[30,"Genus"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[30,"Genus"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[30,"Species"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[30,"Species"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[31,"Domain"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[31,"Domain"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[31,"Phylum"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[31,"Phylum"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[31,"Class"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[31,"Class"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[31,"Order"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[31,"Order"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[31,"Family"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[31,"Family"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[31,"Genus"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[31,"Genus"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[31,"Species"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[31,"Species"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[32,"Domain"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[32,"Domain"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[32,"Phylum"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[32,"Phylum"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[32,"Class"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[32,"Class"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[32,"Order"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[32,"Order"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[32,"Family"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[32,"Family"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[32,"Genus"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[32,"Genus"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[32,"Species"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[32,"Species"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[33,"Domain"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[33,"Domain"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[33,"Phylum"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[33,"Phylum"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[33,"Class"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[33,"Class"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[33,"Order"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[33,"Order"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[33,"Family"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[33,"Family"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[33,"Genus"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[33,"Genus"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[33,"Species"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[33,"Species"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[34,"Domain"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[34,"Domain"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[34,"Phylum"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[34,"Phylum"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[34,"Class"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[34,"Class"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[34,"Order"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[34,"Order"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[34,"Family"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[34,"Family"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[34,"Genus"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[34,"Genus"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[34,"Species"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[34,"Species"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[35,"Domain"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[35,"Domain"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[35,"Phylum"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[35,"Phylum"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[35,"Class"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[35,"Class"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[35,"Order"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[35,"Order"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[35,"Family"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[35,"Family"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[35,"Genus"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[35,"Genus"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[35,"Species"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[35,"Species"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[36,"Domain"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[36,"Domain"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[36,"Phylum"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[36,"Phylum"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[36,"Class"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[36,"Class"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[36,"Order"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[36,"Order"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[36,"Family"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[36,"Family"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[36,"Genus"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[36,"Genus"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[36,"Species"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[36,"Species"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[37,"Domain"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[37,"Domain"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[37,"Phylum"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[37,"Phylum"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[37,"Class"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[37,"Class"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[37,"Order"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[37,"Order"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[37,"Family"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[37,"Family"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[37,"Genus"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[37,"Genus"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[37,"Species"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[37,"Species"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[38,"Domain"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[38,"Domain"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[38,"Phylum"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[38,"Phylum"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[38,"Class"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[38,"Class"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[38,"Order"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[38,"Order"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[38,"Family"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[38,"Family"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[38,"Genus"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[38,"Genus"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[38,"Species"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[38,"Species"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[39,"Domain"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[39,"Domain"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[39,"Phylum"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[39,"Phylum"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[39,"Class"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[39,"Class"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[39,"Order"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[39,"Order"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[39,"Family"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[39,"Family"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[39,"Genus"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[39,"Genus"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[39,"Species"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[39,"Species"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[40,"Domain"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[40,"Domain"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[40,"Phylum"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[40,"Phylum"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[40,"Class"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[40,"Class"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[40,"Order"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[40,"Order"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[40,"Family"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[40,"Family"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[40,"Genus"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[40,"Genus"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[40,"Species"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[40,"Species"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[41,"Domain"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[41,"Domain"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[41,"Phylum"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[41,"Phylum"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[41,"Class"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[41,"Class"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[41,"Order"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[41,"Order"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[41,"Family"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[41,"Family"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[41,"Genus"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[41,"Genus"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[41,"Species"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[41,"Species"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[42,"Domain"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[42,"Domain"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[42,"Phylum"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[42,"Phylum"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[42,"Class"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[42,"Class"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[42,"Order"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[42,"Order"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[42,"Family"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[42,"Family"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[42,"Genus"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[42,"Genus"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[42,"Species"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[42,"Species"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[43,"Domain"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[43,"Domain"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[43,"Phylum"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[43,"Phylum"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[43,"Class"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[43,"Class"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[43,"Order"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[43,"Order"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[43,"Family"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[43,"Family"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[43,"Genus"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[43,"Genus"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[43,"Species"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[43,"Species"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[44,"Domain"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[44,"Domain"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[44,"Phylum"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[44,"Phylum"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[44,"Class"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[44,"Class"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[44,"Order"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[44,"Order"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[44,"Family"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[44,"Family"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[44,"Genus"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[44,"Genus"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[44,"Species"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[44,"Species"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[45,"Domain"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[45,"Domain"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[45,"Phylum"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[45,"Phylum"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[45,"Class"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[45,"Class"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[45,"Order"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[45,"Order"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[45,"Family"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[45,"Family"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[45,"Genus"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[45,"Genus"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[45,"Species"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[45,"Species"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[46,"Domain"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[46,"Domain"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[46,"Phylum"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[46,"Phylum"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[46,"Class"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[46,"Class"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[46,"Order"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[46,"Order"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[46,"Family"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[46,"Family"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[46,"Genus"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[46,"Genus"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[46,"Species"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[46,"Species"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[47,"Domain"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[47,"Domain"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[47,"Phylum"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[47,"Phylum"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[47,"Class"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[47,"Class"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[47,"Order"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[47,"Order"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[47,"Family"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[47,"Family"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[47,"Genus"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[47,"Genus"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[47,"Species"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[47,"Species"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[48,"Domain"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[48,"Domain"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[48,"Phylum"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[48,"Phylum"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[48,"Class"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[48,"Class"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[48,"Order"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[48,"Order"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[48,"Family"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[48,"Family"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[48,"Genus"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[48,"Genus"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[48,"Species"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[48,"Species"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[49,"Domain"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[49,"Domain"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[49,"Phylum"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[49,"Phylum"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[49,"Class"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[49,"Class"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[49,"Order"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[49,"Order"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[49,"Family"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[49,"Family"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[49,"Genus"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[49,"Genus"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[49,"Species"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[49,"Species"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[50,"Domain"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[50,"Domain"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[50,"Phylum"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[50,"Phylum"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[50,"Class"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[50,"Class"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[50,"Order"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[50,"Order"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[50,"Family"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[50,"Family"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[50,"Genus"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[50,"Genus"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[50,"Species"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[50,"Species"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[51,"Domain"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[51,"Domain"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[51,"Phylum"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[51,"Phylum"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[51,"Class"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[51,"Class"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[51,"Order"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[51,"Order"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[51,"Family"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[51,"Family"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[51,"Genus"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[51,"Genus"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[51,"Species"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[51,"Species"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[52,"Domain"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[52,"Domain"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[52,"Phylum"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[52,"Phylum"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[52,"Class"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[52,"Class"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[52,"Order"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[52,"Order"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[52,"Family"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[52,"Family"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[52,"Genus"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[52,"Genus"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[52,"Species"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[52,"Species"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[53,"Domain"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[53,"Domain"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[53,"Phylum"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[53,"Phylum"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[53,"Class"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[53,"Class"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[53,"Order"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[53,"Order"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[53,"Family"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[53,"Family"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[53,"Genus"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[53,"Genus"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[53,"Species"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[53,"Species"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[54,"Domain"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[54,"Domain"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[54,"Phylum"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[54,"Phylum"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[54,"Class"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[54,"Class"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[54,"Order"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[54,"Order"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[54,"Family"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[54,"Family"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[54,"Genus"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[54,"Genus"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[54,"Species"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[54,"Species"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[55,"Domain"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[55,"Domain"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[55,"Phylum"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[55,"Phylum"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[55,"Class"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[55,"Class"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[55,"Order"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[55,"Order"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[55,"Family"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[55,"Family"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[55,"Genus"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[55,"Genus"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[55,"Species"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[55,"Species"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[56,"Domain"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[56,"Domain"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[56,"Phylum"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[56,"Phylum"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[56,"Class"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[56,"Class"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[56,"Order"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[56,"Order"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[56,"Family"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[56,"Family"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[56,"Genus"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[56,"Genus"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[56,"Species"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[56,"Species"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[57,"Domain"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[57,"Domain"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[57,"Phylum"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[57,"Phylum"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[57,"Class"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[57,"Class"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[57,"Order"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[57,"Order"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[57,"Family"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[57,"Family"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[57,"Genus"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[57,"Genus"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[57,"Species"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[57,"Species"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[58,"Domain"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[58,"Domain"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[58,"Phylum"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[58,"Phylum"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[58,"Class"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[58,"Class"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[58,"Order"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[58,"Order"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[58,"Family"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[58,"Family"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[58,"Genus"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[58,"Genus"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[58,"Species"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[58,"Species"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[59,"Domain"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[59,"Domain"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[59,"Phylum"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[59,"Phylum"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[59,"Class"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[59,"Class"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[59,"Order"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[59,"Order"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[59,"Family"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[59,"Family"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[59,"Genus"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[59,"Genus"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[59,"Species"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[59,"Species"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[60,"Domain"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[60,"Domain"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[60,"Phylum"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[60,"Phylum"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[60,"Class"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[60,"Class"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[60,"Order"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[60,"Order"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[60,"Family"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[60,"Family"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[60,"Genus"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[60,"Genus"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[60,"Species"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[60,"Species"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[61,"Domain"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[61,"Domain"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[61,"Phylum"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[61,"Phylum"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[61,"Class"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[61,"Class"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[61,"Order"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[61,"Order"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[61,"Family"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[61,"Family"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[61,"Genus"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[61,"Genus"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[61,"Species"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[61,"Species"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[62,"Domain"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[62,"Domain"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[62,"Phylum"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[62,"Phylum"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[62,"Class"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[62,"Class"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[62,"Order"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[62,"Order"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[62,"Family"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[62,"Family"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[62,"Genus"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[62,"Genus"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[62,"Species"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[62,"Species"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[63,"Domain"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[63,"Domain"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[63,"Phylum"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[63,"Phylum"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[63,"Class"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[63,"Class"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[63,"Order"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[63,"Order"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[63,"Family"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[63,"Family"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[63,"Genus"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[63,"Genus"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[63,"Species"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[63,"Species"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[64,"Domain"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[64,"Domain"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[64,"Phylum"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[64,"Phylum"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[64,"Class"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[64,"Class"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[64,"Order"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[64,"Order"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[64,"Family"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[64,"Family"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[64,"Genus"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[64,"Genus"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[64,"Species"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[64,"Species"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[65,"Domain"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[65,"Domain"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[65,"Phylum"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[65,"Phylum"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[65,"Class"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[65,"Class"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[65,"Order"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[65,"Order"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[65,"Family"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[65,"Family"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[65,"Genus"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[65,"Genus"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[65,"Species"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[65,"Species"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[66,"Domain"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[66,"Domain"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[66,"Phylum"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[66,"Phylum"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[66,"Class"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[66,"Class"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[66,"Order"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[66,"Order"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[66,"Family"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[66,"Family"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[66,"Genus"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[66,"Genus"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[66,"Species"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[66,"Species"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[67,"Domain"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[67,"Domain"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[67,"Phylum"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[67,"Phylum"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[67,"Class"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[67,"Class"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[67,"Order"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[67,"Order"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[67,"Family"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[67,"Family"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[67,"Genus"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[67,"Genus"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[67,"Species"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[67,"Species"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[68,"Domain"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[68,"Domain"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[68,"Phylum"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[68,"Phylum"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[68,"Class"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[68,"Class"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[68,"Order"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[68,"Order"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[68,"Family"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[68,"Family"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[68,"Genus"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[68,"Genus"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[68,"Species"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[68,"Species"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[69,"Domain"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[69,"Domain"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[69,"Phylum"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[69,"Phylum"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[69,"Class"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[69,"Class"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[69,"Order"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[69,"Order"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[69,"Family"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[69,"Family"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[69,"Genus"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[69,"Genus"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[69,"Species"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[69,"Species"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[70,"Domain"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[70,"Domain"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[70,"Phylum"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[70,"Phylum"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[70,"Class"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[70,"Class"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[70,"Order"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[70,"Order"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[70,"Family"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[70,"Family"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[70,"Genus"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[70,"Genus"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[70,"Species"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[70,"Species"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[71,"Domain"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[71,"Domain"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[71,"Phylum"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[71,"Phylum"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[71,"Class"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[71,"Class"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[71,"Order"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[71,"Order"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[71,"Family"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[71,"Family"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[71,"Genus"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[71,"Genus"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[71,"Species"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[71,"Species"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[72,"Domain"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[72,"Domain"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[72,"Phylum"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[72,"Phylum"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[72,"Class"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[72,"Class"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[72,"Order"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[72,"Order"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[72,"Family"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[72,"Family"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[72,"Genus"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[72,"Genus"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[72,"Species"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[72,"Species"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[73,"Domain"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[73,"Domain"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[73,"Phylum"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[73,"Phylum"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[73,"Class"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[73,"Class"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[73,"Order"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[73,"Order"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[73,"Family"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[73,"Family"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[73,"Genus"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[73,"Genus"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[73,"Species"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[73,"Species"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[74,"Domain"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[74,"Domain"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[74,"Phylum"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[74,"Phylum"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[74,"Class"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[74,"Class"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[74,"Order"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[74,"Order"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[74,"Family"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[74,"Family"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[74,"Genus"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[74,"Genus"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[74,"Species"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[74,"Species"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[75,"Domain"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[75,"Domain"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[75,"Phylum"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[75,"Phylum"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[75,"Class"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[75,"Class"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[75,"Order"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[75,"Order"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[75,"Family"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[75,"Family"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[75,"Genus"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[75,"Genus"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[75,"Species"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[75,"Species"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[76,"Domain"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[76,"Domain"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[76,"Phylum"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[76,"Phylum"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[76,"Class"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[76,"Class"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[76,"Order"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[76,"Order"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[76,"Family"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[76,"Family"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[76,"Genus"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[76,"Genus"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[76,"Species"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[76,"Species"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[77,"Domain"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[77,"Domain"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[77,"Phylum"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[77,"Phylum"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[77,"Class"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[77,"Class"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[77,"Order"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[77,"Order"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[77,"Family"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[77,"Family"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[77,"Genus"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[77,"Genus"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[77,"Species"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[77,"Species"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[78,"Domain"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[78,"Domain"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[78,"Phylum"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[78,"Phylum"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[78,"Class"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[78,"Class"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[78,"Order"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[78,"Order"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[78,"Family"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[78,"Family"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[78,"Genus"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[78,"Genus"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[78,"Species"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[78,"Species"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[79,"Domain"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[79,"Domain"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[79,"Phylum"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[79,"Phylum"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[79,"Class"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[79,"Class"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[79,"Order"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[79,"Order"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[79,"Family"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[79,"Family"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[79,"Genus"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[79,"Genus"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[79,"Species"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[79,"Species"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[80,"Domain"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[80,"Domain"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[80,"Phylum"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[80,"Phylum"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[80,"Class"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[80,"Class"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[80,"Order"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[80,"Order"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[80,"Family"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[80,"Family"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[80,"Genus"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[80,"Genus"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[80,"Species"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[80,"Species"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[81,"Domain"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[81,"Domain"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[81,"Phylum"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[81,"Phylum"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[81,"Class"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[81,"Class"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[81,"Order"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[81,"Order"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[81,"Family"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[81,"Family"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[81,"Genus"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[81,"Genus"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[81,"Species"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[81,"Species"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[82,"Domain"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[82,"Domain"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[82,"Phylum"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[82,"Phylum"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[82,"Class"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[82,"Class"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[82,"Order"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[82,"Order"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[82,"Family"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[82,"Family"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[82,"Genus"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[82,"Genus"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[82,"Species"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[82,"Species"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[83,"Domain"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[83,"Domain"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[83,"Phylum"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[83,"Phylum"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[83,"Class"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[83,"Class"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[83,"Order"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[83,"Order"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[83,"Family"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[83,"Family"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[83,"Genus"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[83,"Genus"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[83,"Species"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[83,"Species"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[84,"Domain"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[84,"Domain"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[84,"Phylum"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[84,"Phylum"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[84,"Class"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[84,"Class"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[84,"Order"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[84,"Order"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[84,"Family"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[84,"Family"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[84,"Genus"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[84,"Genus"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[84,"Species"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[84,"Species"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[85,"Domain"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[85,"Domain"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[85,"Phylum"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[85,"Phylum"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[85,"Class"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[85,"Class"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[85,"Order"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[85,"Order"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[85,"Family"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[85,"Family"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[85,"Genus"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[85,"Genus"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[85,"Species"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[85,"Species"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[86,"Domain"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[86,"Domain"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[86,"Phylum"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[86,"Phylum"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[86,"Class"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[86,"Class"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[86,"Order"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[86,"Order"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[86,"Family"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[86,"Family"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[86,"Genus"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[86,"Genus"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[86,"Species"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[86,"Species"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[87,"Domain"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[87,"Domain"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[87,"Phylum"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[87,"Phylum"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[87,"Class"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[87,"Class"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[87,"Order"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[87,"Order"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[87,"Family"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[87,"Family"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[87,"Genus"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[87,"Genus"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[87,"Species"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[87,"Species"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[88,"Domain"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[88,"Domain"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[88,"Phylum"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[88,"Phylum"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[88,"Class"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[88,"Class"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[88,"Order"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[88,"Order"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[88,"Family"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[88,"Family"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[88,"Genus"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[88,"Genus"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[88,"Species"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[88,"Species"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[89,"Domain"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[89,"Domain"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[89,"Phylum"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[89,"Phylum"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[89,"Class"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[89,"Class"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[89,"Order"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[89,"Order"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[89,"Family"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[89,"Family"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[89,"Genus"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[89,"Genus"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[89,"Species"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[89,"Species"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[90,"Domain"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[90,"Domain"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[90,"Phylum"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[90,"Phylum"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[90,"Class"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[90,"Class"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[90,"Order"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[90,"Order"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[90,"Family"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[90,"Family"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[90,"Genus"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[90,"Genus"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[90,"Species"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[90,"Species"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[91,"Domain"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[91,"Domain"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[91,"Phylum"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[91,"Phylum"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[91,"Class"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[91,"Class"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[91,"Order"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[91,"Order"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[91,"Family"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[91,"Family"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[91,"Genus"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[91,"Genus"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[91,"Species"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[91,"Species"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[92,"Domain"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[92,"Domain"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[92,"Phylum"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[92,"Phylum"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[92,"Class"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[92,"Class"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[92,"Order"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[92,"Order"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[92,"Family"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[92,"Family"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[92,"Genus"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[92,"Genus"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[92,"Species"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[92,"Species"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[93,"Domain"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[93,"Domain"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[93,"Phylum"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[93,"Phylum"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[93,"Class"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[93,"Class"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[93,"Order"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[93,"Order"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[93,"Family"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[93,"Family"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[93,"Genus"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[93,"Genus"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[93,"Species"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[93,"Species"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[94,"Domain"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[94,"Domain"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[94,"Phylum"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[94,"Phylum"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[94,"Class"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[94,"Class"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[94,"Order"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[94,"Order"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[94,"Family"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[94,"Family"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[94,"Genus"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[94,"Genus"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[94,"Species"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[94,"Species"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[95,"Domain"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[95,"Domain"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[95,"Phylum"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[95,"Phylum"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[95,"Class"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[95,"Class"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[95,"Order"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[95,"Order"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[95,"Family"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[95,"Family"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[95,"Genus"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[95,"Genus"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[95,"Species"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[95,"Species"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[96,"Domain"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[96,"Domain"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[96,"Phylum"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[96,"Phylum"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[96,"Class"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[96,"Class"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[96,"Order"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[96,"Order"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[96,"Family"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[96,"Family"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[96,"Genus"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[96,"Genus"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[96,"Species"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[96,"Species"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[97,"Domain"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[97,"Domain"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[97,"Phylum"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[97,"Phylum"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[97,"Class"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[97,"Class"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[97,"Order"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[97,"Order"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[97,"Family"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[97,"Family"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[97,"Genus"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[97,"Genus"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[97,"Species"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[97,"Species"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[98,"Domain"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[98,"Domain"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[98,"Phylum"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[98,"Phylum"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[98,"Class"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[98,"Class"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[98,"Order"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[98,"Order"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[98,"Family"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[98,"Family"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[98,"Genus"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[98,"Genus"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[98,"Species"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[98,"Species"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[99,"Domain"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[99,"Domain"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[99,"Phylum"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[99,"Phylum"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[99,"Class"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[99,"Class"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[99,"Order"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[99,"Order"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[99,"Family"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[99,"Family"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[99,"Genus"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[99,"Genus"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[99,"Species"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[99,"Species"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[100,"Domain"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[100,"Domain"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[100,"Phylum"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[100,"Phylum"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[100,"Class"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[100,"Class"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[100,"Order"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[100,"Order"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[100,"Family"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[100,"Family"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[100,"Genus"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[100,"Genus"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[100,"Species"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[100,"Species"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[101,"Domain"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[101,"Domain"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[101,"Phylum"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[101,"Phylum"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[101,"Class"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[101,"Class"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[101,"Order"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[101,"Order"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[101,"Family"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[101,"Family"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[101,"Genus"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[101,"Genus"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[101,"Species"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[101,"Species"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[102,"Domain"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[102,"Domain"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[102,"Phylum"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[102,"Phylum"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[102,"Class"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[102,"Class"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[102,"Order"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[102,"Order"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[102,"Family"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[102,"Family"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[102,"Genus"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[102,"Genus"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[102,"Species"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[102,"Species"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[103,"Domain"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[103,"Domain"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[103,"Phylum"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[103,"Phylum"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[103,"Class"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[103,"Class"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[103,"Order"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[103,"Order"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[103,"Family"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[103,"Family"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[103,"Genus"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[103,"Genus"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[103,"Species"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[103,"Species"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[104,"Domain"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[104,"Domain"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[104,"Phylum"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[104,"Phylum"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[104,"Class"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[104,"Class"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[104,"Order"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[104,"Order"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[104,"Family"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[104,"Family"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[104,"Genus"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[104,"Genus"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[104,"Species"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[104,"Species"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[105,"Domain"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[105,"Domain"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[105,"Phylum"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[105,"Phylum"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[105,"Class"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[105,"Class"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[105,"Order"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[105,"Order"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[105,"Family"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[105,"Family"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[105,"Genus"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[105,"Genus"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[105,"Species"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[105,"Species"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[106,"Domain"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[106,"Domain"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[106,"Phylum"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[106,"Phylum"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[106,"Class"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[106,"Class"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[106,"Order"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[106,"Order"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[106,"Family"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[106,"Family"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[106,"Genus"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[106,"Genus"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[106,"Species"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[106,"Species"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[107,"Domain"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[107,"Domain"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[107,"Phylum"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[107,"Phylum"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[107,"Class"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[107,"Class"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[107,"Order"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[107,"Order"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[107,"Family"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[107,"Family"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[107,"Genus"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[107,"Genus"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[107,"Species"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[107,"Species"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[108,"Domain"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[108,"Domain"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[108,"Phylum"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[108,"Phylum"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[108,"Class"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[108,"Class"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[108,"Order"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[108,"Order"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[108,"Family"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[108,"Family"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[108,"Genus"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[108,"Genus"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[108,"Species"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[108,"Species"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[109,"Domain"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[109,"Domain"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[109,"Phylum"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[109,"Phylum"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[109,"Class"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[109,"Class"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[109,"Order"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[109,"Order"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[109,"Family"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[109,"Family"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[109,"Genus"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[109,"Genus"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[109,"Species"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[109,"Species"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[110,"Domain"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[110,"Domain"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[110,"Phylum"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[110,"Phylum"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[110,"Class"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[110,"Class"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[110,"Order"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[110,"Order"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[110,"Family"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[110,"Family"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[110,"Genus"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[110,"Genus"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[110,"Species"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[110,"Species"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[111,"Domain"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[111,"Domain"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[111,"Phylum"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[111,"Phylum"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[111,"Class"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[111,"Class"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[111,"Order"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[111,"Order"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[111,"Family"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[111,"Family"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[111,"Genus"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[111,"Genus"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[111,"Species"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[111,"Species"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[112,"Domain"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[112,"Domain"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[112,"Phylum"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[112,"Phylum"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[112,"Class"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[112,"Class"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[112,"Order"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[112,"Order"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[112,"Family"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[112,"Family"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[112,"Genus"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[112,"Genus"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[112,"Species"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[112,"Species"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[113,"Domain"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[113,"Domain"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[113,"Phylum"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[113,"Phylum"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[113,"Class"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[113,"Class"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[113,"Order"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[113,"Order"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[113,"Family"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[113,"Family"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[113,"Genus"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[113,"Genus"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[113,"Species"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[113,"Species"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[114,"Domain"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[114,"Domain"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[114,"Phylum"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[114,"Phylum"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[114,"Class"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[114,"Class"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[114,"Order"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[114,"Order"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[114,"Family"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[114,"Family"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[114,"Genus"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[114,"Genus"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[114,"Species"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[114,"Species"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[115,"Domain"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[115,"Domain"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[115,"Phylum"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[115,"Phylum"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[115,"Class"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[115,"Class"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[115,"Order"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[115,"Order"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[115,"Family"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[115,"Family"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[115,"Genus"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[115,"Genus"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[115,"Species"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[115,"Species"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[116,"Domain"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[116,"Domain"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[116,"Phylum"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[116,"Phylum"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[116,"Class"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[116,"Class"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[116,"Order"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[116,"Order"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[116,"Family"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[116,"Family"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[116,"Genus"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[116,"Genus"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[116,"Species"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[116,"Species"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[117,"Domain"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[117,"Domain"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[117,"Phylum"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[117,"Phylum"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[117,"Class"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[117,"Class"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[117,"Order"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[117,"Order"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[117,"Family"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[117,"Family"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[117,"Genus"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[117,"Genus"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[117,"Species"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[117,"Species"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[118,"Domain"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[118,"Domain"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[118,"Phylum"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[118,"Phylum"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[118,"Class"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[118,"Class"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[118,"Order"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[118,"Order"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[118,"Family"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[118,"Family"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[118,"Genus"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[118,"Genus"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[118,"Species"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[118,"Species"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[119,"Domain"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[119,"Domain"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[119,"Phylum"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[119,"Phylum"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[119,"Class"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[119,"Class"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[119,"Order"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[119,"Order"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[119,"Family"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[119,"Family"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[119,"Genus"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[119,"Genus"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[119,"Species"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[119,"Species"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[120,"Domain"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[120,"Domain"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[120,"Phylum"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[120,"Phylum"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[120,"Class"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[120,"Class"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[120,"Order"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[120,"Order"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[120,"Family"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[120,"Family"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[120,"Genus"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[120,"Genus"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[120,"Species"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[120,"Species"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[121,"Domain"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[121,"Domain"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[121,"Phylum"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[121,"Phylum"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[121,"Class"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[121,"Class"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[121,"Order"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[121,"Order"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[121,"Family"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[121,"Family"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[121,"Genus"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[121,"Genus"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[121,"Species"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[121,"Species"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[122,"Domain"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[122,"Domain"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[122,"Phylum"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[122,"Phylum"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[122,"Class"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[122,"Class"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[122,"Order"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[122,"Order"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[122,"Family"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[122,"Family"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[122,"Genus"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[122,"Genus"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[122,"Species"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[122,"Species"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[123,"Domain"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[123,"Domain"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[123,"Phylum"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[123,"Phylum"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[123,"Class"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[123,"Class"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[123,"Order"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[123,"Order"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[123,"Family"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[123,"Family"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[123,"Genus"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[123,"Genus"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[123,"Species"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[123,"Species"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[124,"Domain"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[124,"Domain"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[124,"Phylum"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[124,"Phylum"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[124,"Class"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[124,"Class"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[124,"Order"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[124,"Order"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[124,"Family"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[124,"Family"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[124,"Genus"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[124,"Genus"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[124,"Species"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[124,"Species"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[125,"Domain"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[125,"Domain"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[125,"Phylum"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[125,"Phylum"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[125,"Class"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[125,"Class"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[125,"Order"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[125,"Order"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[125,"Family"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[125,"Family"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[125,"Genus"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[125,"Genus"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[125,"Species"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[125,"Species"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[126,"Domain"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[126,"Domain"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[126,"Phylum"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[126,"Phylum"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[126,"Class"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[126,"Class"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[126,"Order"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[126,"Order"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[126,"Family"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[126,"Family"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[126,"Genus"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[126,"Genus"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[126,"Species"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[126,"Species"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[127,"Domain"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[127,"Domain"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[127,"Phylum"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[127,"Phylum"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[127,"Class"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[127,"Class"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[127,"Order"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[127,"Order"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[127,"Family"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[127,"Family"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[127,"Genus"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[127,"Genus"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[127,"Species"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[127,"Species"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[128,"Domain"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[128,"Domain"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[128,"Phylum"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[128,"Phylum"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[128,"Class"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[128,"Class"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[128,"Order"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[128,"Order"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[128,"Family"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[128,"Family"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[128,"Genus"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[128,"Genus"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[128,"Species"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[128,"Species"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[129,"Domain"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[129,"Domain"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[129,"Phylum"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[129,"Phylum"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[129,"Class"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[129,"Class"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[129,"Order"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[129,"Order"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[129,"Family"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[129,"Family"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[129,"Genus"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[129,"Genus"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[129,"Species"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[129,"Species"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[130,"Domain"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[130,"Domain"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[130,"Phylum"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[130,"Phylum"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[130,"Class"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[130,"Class"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[130,"Order"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[130,"Order"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[130,"Family"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[130,"Family"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[130,"Genus"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[130,"Genus"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[130,"Species"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[130,"Species"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[131,"Domain"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[131,"Domain"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[131,"Phylum"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[131,"Phylum"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[131,"Class"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[131,"Class"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[131,"Order"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[131,"Order"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[131,"Family"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[131,"Family"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[131,"Genus"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[131,"Genus"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[131,"Species"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[131,"Species"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[132,"Domain"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[132,"Domain"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[132,"Phylum"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[132,"Phylum"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[132,"Class"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[132,"Class"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[132,"Order"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[132,"Order"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[132,"Family"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[132,"Family"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[132,"Genus"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[132,"Genus"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[132,"Species"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[132,"Species"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[133,"Domain"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[133,"Domain"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[133,"Phylum"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[133,"Phylum"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[133,"Class"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[133,"Class"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[133,"Order"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[133,"Order"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[133,"Family"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[133,"Family"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[133,"Genus"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[133,"Genus"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[133,"Species"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[133,"Species"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[134,"Domain"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[134,"Domain"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[134,"Phylum"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[134,"Phylum"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[134,"Class"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[134,"Class"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[134,"Order"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[134,"Order"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[134,"Family"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[134,"Family"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[134,"Genus"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[134,"Genus"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[134,"Species"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[134,"Species"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[135,"Domain"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[135,"Domain"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[135,"Phylum"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[135,"Phylum"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[135,"Class"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[135,"Class"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[135,"Order"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[135,"Order"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[135,"Family"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[135,"Family"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[135,"Genus"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[135,"Genus"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[135,"Species"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[135,"Species"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[136,"Domain"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[136,"Domain"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[136,"Phylum"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[136,"Phylum"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[136,"Class"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[136,"Class"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[136,"Order"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[136,"Order"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[136,"Family"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[136,"Family"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[136,"Genus"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[136,"Genus"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[136,"Species"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[136,"Species"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[137,"Domain"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[137,"Domain"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[137,"Phylum"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[137,"Phylum"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[137,"Class"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[137,"Class"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[137,"Order"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[137,"Order"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[137,"Family"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[137,"Family"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[137,"Genus"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[137,"Genus"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[137,"Species"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[137,"Species"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[138,"Domain"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[138,"Domain"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[138,"Phylum"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[138,"Phylum"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[138,"Class"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[138,"Class"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[138,"Order"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[138,"Order"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[138,"Family"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[138,"Family"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[138,"Genus"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[138,"Genus"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[138,"Species"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[138,"Species"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[139,"Domain"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[139,"Domain"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[139,"Phylum"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[139,"Phylum"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[139,"Class"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[139,"Class"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[139,"Order"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[139,"Order"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[139,"Family"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[139,"Family"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[139,"Genus"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[139,"Genus"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[139,"Species"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[139,"Species"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[140,"Domain"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[140,"Domain"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[140,"Phylum"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[140,"Phylum"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[140,"Class"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[140,"Class"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[140,"Order"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[140,"Order"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[140,"Family"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[140,"Family"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[140,"Genus"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[140,"Genus"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[140,"Species"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[140,"Species"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[141,"Domain"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[141,"Domain"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[141,"Phylum"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[141,"Phylum"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[141,"Class"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[141,"Class"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[141,"Order"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[141,"Order"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[141,"Family"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[141,"Family"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[141,"Genus"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[141,"Genus"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[141,"Species"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[141,"Species"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[142,"Domain"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[142,"Domain"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[142,"Phylum"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[142,"Phylum"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[142,"Class"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[142,"Class"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[142,"Order"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[142,"Order"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[142,"Family"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[142,"Family"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[142,"Genus"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[142,"Genus"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[142,"Species"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[142,"Species"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[143,"Domain"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[143,"Domain"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[143,"Phylum"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[143,"Phylum"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[143,"Class"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[143,"Class"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[143,"Order"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[143,"Order"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[143,"Family"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[143,"Family"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[143,"Genus"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[143,"Genus"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[143,"Species"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[143,"Species"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[144,"Domain"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[144,"Domain"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[144,"Phylum"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[144,"Phylum"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[144,"Class"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[144,"Class"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[144,"Order"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[144,"Order"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[144,"Family"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[144,"Family"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[144,"Genus"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[144,"Genus"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[144,"Species"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[144,"Species"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[145,"Domain"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[145,"Domain"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[145,"Phylum"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[145,"Phylum"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[145,"Class"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[145,"Class"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[145,"Order"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[145,"Order"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[145,"Family"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[145,"Family"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[145,"Genus"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[145,"Genus"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[145,"Species"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[145,"Species"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[146,"Domain"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[146,"Domain"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[146,"Phylum"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[146,"Phylum"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[146,"Class"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[146,"Class"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[146,"Order"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[146,"Order"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[146,"Family"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[146,"Family"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[146,"Genus"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[146,"Genus"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[146,"Species"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[146,"Species"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[147,"Domain"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[147,"Domain"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[147,"Phylum"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[147,"Phylum"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[147,"Class"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[147,"Class"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[147,"Order"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[147,"Order"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[147,"Family"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[147,"Family"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[147,"Genus"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[147,"Genus"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[147,"Species"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[147,"Species"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[148,"Domain"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[148,"Domain"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[148,"Phylum"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[148,"Phylum"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[148,"Class"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[148,"Class"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[148,"Order"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[148,"Order"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[148,"Family"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[148,"Family"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[148,"Genus"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[148,"Genus"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[148,"Species"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[148,"Species"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[149,"Domain"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[149,"Domain"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[149,"Phylum"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[149,"Phylum"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[149,"Class"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[149,"Class"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[149,"Order"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[149,"Order"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[149,"Family"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[149,"Family"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[149,"Genus"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[149,"Genus"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[149,"Species"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[149,"Species"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[150,"Domain"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[150,"Domain"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[150,"Phylum"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[150,"Phylum"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[150,"Class"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[150,"Class"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[150,"Order"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[150,"Order"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[150,"Family"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[150,"Family"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[150,"Genus"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[150,"Genus"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[150,"Species"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[150,"Species"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[151,"Domain"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[151,"Domain"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[151,"Phylum"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[151,"Phylum"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[151,"Class"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[151,"Class"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[151,"Order"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[151,"Order"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[151,"Family"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[151,"Family"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[151,"Genus"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[151,"Genus"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[151,"Species"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[151,"Species"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[152,"Domain"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[152,"Domain"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[152,"Phylum"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[152,"Phylum"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[152,"Class"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[152,"Class"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[152,"Order"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[152,"Order"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[152,"Family"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[152,"Family"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[152,"Genus"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[152,"Genus"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[152,"Species"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[152,"Species"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[153,"Domain"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[153,"Domain"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[153,"Phylum"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[153,"Phylum"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[153,"Class"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[153,"Class"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[153,"Order"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[153,"Order"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[153,"Family"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[153,"Family"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[153,"Genus"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[153,"Genus"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[153,"Species"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[153,"Species"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[154,"Domain"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[154,"Domain"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[154,"Phylum"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[154,"Phylum"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[154,"Class"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[154,"Class"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[154,"Order"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[154,"Order"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[154,"Family"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[154,"Family"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[154,"Genus"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[154,"Genus"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[154,"Species"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[154,"Species"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[155,"Domain"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[155,"Domain"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[155,"Phylum"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[155,"Phylum"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[155,"Class"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[155,"Class"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[155,"Order"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[155,"Order"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[155,"Family"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[155,"Family"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[155,"Genus"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[155,"Genus"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[155,"Species"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[155,"Species"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[156,"Domain"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[156,"Domain"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[156,"Phylum"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[156,"Phylum"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[156,"Class"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[156,"Class"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[156,"Order"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[156,"Order"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[156,"Family"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[156,"Family"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[156,"Genus"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[156,"Genus"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[156,"Species"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[156,"Species"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[157,"Domain"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[157,"Domain"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[157,"Phylum"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[157,"Phylum"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[157,"Class"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[157,"Class"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[157,"Order"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[157,"Order"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[157,"Family"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[157,"Family"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[157,"Genus"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[157,"Genus"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[157,"Species"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[157,"Species"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[158,"Domain"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[158,"Domain"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[158,"Phylum"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[158,"Phylum"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[158,"Class"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[158,"Class"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[158,"Order"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[158,"Order"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[158,"Family"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[158,"Family"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[158,"Genus"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[158,"Genus"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[158,"Species"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[158,"Species"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[159,"Domain"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[159,"Domain"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[159,"Phylum"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[159,"Phylum"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[159,"Class"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[159,"Class"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[159,"Order"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[159,"Order"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[159,"Family"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[159,"Family"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[159,"Genus"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[159,"Genus"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[159,"Species"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[159,"Species"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[160,"Domain"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[160,"Domain"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[160,"Phylum"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[160,"Phylum"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[160,"Class"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[160,"Class"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[160,"Order"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[160,"Order"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[160,"Family"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[160,"Family"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[160,"Genus"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[160,"Genus"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[160,"Species"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[160,"Species"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[161,"Domain"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[161,"Domain"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[161,"Phylum"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[161,"Phylum"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[161,"Class"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[161,"Class"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[161,"Order"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[161,"Order"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[161,"Family"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[161,"Family"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[161,"Genus"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[161,"Genus"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[161,"Species"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[161,"Species"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[162,"Domain"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[162,"Domain"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[162,"Phylum"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[162,"Phylum"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[162,"Class"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[162,"Class"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[162,"Order"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[162,"Order"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[162,"Family"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[162,"Family"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[162,"Genus"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[162,"Genus"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[162,"Species"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[162,"Species"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[163,"Domain"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[163,"Domain"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[163,"Phylum"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[163,"Phylum"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[163,"Class"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[163,"Class"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[163,"Order"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[163,"Order"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[163,"Family"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[163,"Family"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[163,"Genus"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[163,"Genus"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[163,"Species"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[163,"Species"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[164,"Domain"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[164,"Domain"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[164,"Phylum"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[164,"Phylum"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[164,"Class"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[164,"Class"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[164,"Order"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[164,"Order"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[164,"Family"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[164,"Family"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[164,"Genus"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[164,"Genus"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[164,"Species"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[164,"Species"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[165,"Domain"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[165,"Domain"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[165,"Phylum"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[165,"Phylum"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[165,"Class"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[165,"Class"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[165,"Order"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[165,"Order"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[165,"Family"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[165,"Family"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[165,"Genus"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[165,"Genus"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[165,"Species"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[165,"Species"], "__")[[1]][2]

phyloseq::tax_table(ps.ng.tax_abund_rel)[166,"Domain"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[166,"Domain"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[166,"Phylum"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[166,"Phylum"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[166,"Class"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[166,"Class"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[166,"Order"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[166,"Order"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[166,"Family"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[166,"Family"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[166,"Genus"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[166,"Genus"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[166,"Species"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[166,"Species"], "__")[[1]][2]

phyloseq::tax_table(ps.ng.tax_abund_rel)[167,"Domain"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[167,"Domain"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[167,"Phylum"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[167,"Phylum"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[167,"Class"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[167,"Class"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[167,"Order"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[167,"Order"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[167,"Family"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[167,"Family"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[167,"Genus"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[167,"Genus"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[167,"Species"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[167,"Species"], "__")[[1]][2]

phyloseq::tax_table(ps.ng.tax_abund_rel)[167,"Domain"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[167,"Domain"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[167,"Phylum"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[167,"Phylum"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[167,"Class"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[167,"Class"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[167,"Order"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[167,"Order"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[167,"Family"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[167,"Family"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[167,"Genus"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[167,"Genus"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[167,"Species"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[167,"Species"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[168,"Domain"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[168,"Domain"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[168,"Phylum"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[168,"Phylum"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[168,"Class"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[168,"Class"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[168,"Order"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[168,"Order"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[168,"Family"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[168,"Family"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[168,"Genus"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[168,"Genus"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[168,"Species"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[168,"Species"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[169,"Domain"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[169,"Domain"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[169,"Phylum"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[169,"Phylum"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[169,"Class"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[169,"Class"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[169,"Order"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[169,"Order"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[169,"Family"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[169,"Family"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[169,"Genus"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[169,"Genus"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[169,"Species"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[169,"Species"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[170,"Domain"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[170,"Domain"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[170,"Phylum"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[170,"Phylum"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[170,"Class"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[170,"Class"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[170,"Order"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[170,"Order"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[170,"Family"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[170,"Family"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[170,"Genus"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[170,"Genus"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[170,"Species"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[170,"Species"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[171,"Domain"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[171,"Domain"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[171,"Phylum"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[171,"Phylum"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[171,"Class"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[171,"Class"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[171,"Order"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[171,"Order"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[171,"Family"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[171,"Family"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[171,"Genus"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[171,"Genus"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[171,"Species"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[171,"Species"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[172,"Domain"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[172,"Domain"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[172,"Phylum"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[172,"Phylum"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[172,"Class"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[172,"Class"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[172,"Order"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[172,"Order"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[172,"Family"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[172,"Family"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[172,"Genus"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[172,"Genus"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[172,"Species"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[172,"Species"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[173,"Domain"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[173,"Domain"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[173,"Phylum"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[173,"Phylum"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[173,"Class"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[173,"Class"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[173,"Order"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[173,"Order"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[173,"Family"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[173,"Family"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[173,"Genus"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[173,"Genus"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[173,"Species"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[173,"Species"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[174,"Domain"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[174,"Domain"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[174,"Phylum"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[174,"Phylum"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[174,"Class"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[174,"Class"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[174,"Order"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[174,"Order"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[174,"Family"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[174,"Family"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[174,"Genus"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[174,"Genus"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[174,"Species"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[174,"Species"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[175,"Domain"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[175,"Domain"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[175,"Phylum"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[175,"Phylum"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[175,"Class"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[175,"Class"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[175,"Order"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[175,"Order"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[175,"Family"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[175,"Family"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[175,"Genus"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[175,"Genus"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[175,"Species"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[175,"Species"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[176,"Domain"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[176,"Domain"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[176,"Phylum"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[176,"Phylum"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[176,"Class"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[176,"Class"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[176,"Order"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[176,"Order"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[176,"Family"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[176,"Family"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[176,"Genus"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[176,"Genus"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[176,"Species"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[176,"Species"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[177,"Domain"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[177,"Domain"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[177,"Phylum"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[177,"Phylum"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[177,"Class"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[177,"Class"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[177,"Order"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[177,"Order"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[177,"Family"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[177,"Family"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[177,"Genus"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[177,"Genus"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[177,"Species"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[177,"Species"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[178,"Domain"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[178,"Domain"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[178,"Phylum"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[178,"Phylum"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[178,"Class"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[178,"Class"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[178,"Order"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[178,"Order"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[178,"Family"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[178,"Family"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[178,"Genus"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[178,"Genus"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[178,"Species"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[178,"Species"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[179,"Domain"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[179,"Domain"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[179,"Phylum"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[179,"Phylum"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[179,"Class"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[179,"Class"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[179,"Order"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[179,"Order"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[179,"Family"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[179,"Family"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[179,"Genus"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[179,"Genus"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[179,"Species"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[179,"Species"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[180,"Domain"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[180,"Domain"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[180,"Phylum"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[180,"Phylum"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[180,"Class"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[180,"Class"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[180,"Order"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[180,"Order"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[180,"Family"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[180,"Family"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[180,"Genus"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[180,"Genus"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[180,"Species"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[180,"Species"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[181,"Domain"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[181,"Domain"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[181,"Phylum"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[181,"Phylum"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[181,"Class"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[181,"Class"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[181,"Order"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[181,"Order"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[181,"Family"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[181,"Family"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[181,"Genus"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[181,"Genus"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[181,"Species"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[181,"Species"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[182,"Domain"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[182,"Domain"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[182,"Phylum"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[182,"Phylum"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[182,"Class"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[182,"Class"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[182,"Order"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[182,"Order"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[182,"Family"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[182,"Family"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[182,"Genus"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[182,"Genus"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[182,"Species"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[182,"Species"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[183,"Domain"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[183,"Domain"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[183,"Phylum"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[183,"Phylum"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[183,"Class"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[183,"Class"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[183,"Order"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[183,"Order"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[183,"Family"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[183,"Family"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[183,"Genus"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[183,"Genus"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[183,"Species"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[183,"Species"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[184,"Domain"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[184,"Domain"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[184,"Phylum"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[184,"Phylum"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[184,"Class"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[184,"Class"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[184,"Order"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[184,"Order"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[184,"Family"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[184,"Family"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[184,"Genus"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[184,"Genus"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[184,"Species"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[184,"Species"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[185,"Domain"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[185,"Domain"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[185,"Phylum"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[185,"Phylum"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[185,"Class"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[185,"Class"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[185,"Order"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[185,"Order"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[185,"Family"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[185,"Family"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[185,"Genus"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[185,"Genus"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[185,"Species"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[185,"Species"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[186,"Domain"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[186,"Domain"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[186,"Phylum"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[186,"Phylum"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[186,"Class"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[186,"Class"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[186,"Order"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[186,"Order"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[186,"Family"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[186,"Family"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[186,"Genus"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[186,"Genus"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[186,"Species"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[186,"Species"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[187,"Domain"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[187,"Domain"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[187,"Phylum"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[187,"Phylum"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[187,"Class"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[187,"Class"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[187,"Order"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[187,"Order"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[187,"Family"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[187,"Family"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[187,"Genus"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[187,"Genus"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[187,"Species"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[187,"Species"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[188,"Domain"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[188,"Domain"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[188,"Phylum"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[188,"Phylum"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[188,"Class"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[188,"Class"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[188,"Order"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[188,"Order"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[188,"Family"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[188,"Family"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[188,"Genus"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[188,"Genus"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[188,"Species"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[188,"Species"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[189,"Domain"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[189,"Domain"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[189,"Phylum"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[189,"Phylum"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[189,"Class"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[189,"Class"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[189,"Order"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[189,"Order"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[189,"Family"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[189,"Family"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[189,"Genus"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[189,"Genus"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[189,"Species"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[189,"Species"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[190,"Domain"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[190,"Domain"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[190,"Phylum"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[190,"Phylum"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[190,"Class"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[190,"Class"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[190,"Order"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[190,"Order"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[190,"Family"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[190,"Family"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[190,"Genus"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[190,"Genus"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[190,"Species"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[190,"Species"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[191,"Domain"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[191,"Domain"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[191,"Phylum"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[191,"Phylum"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[191,"Class"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[191,"Class"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[191,"Order"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[191,"Order"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[191,"Family"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[191,"Family"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[191,"Genus"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[191,"Genus"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[191,"Species"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[191,"Species"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[192,"Domain"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[192,"Domain"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[192,"Phylum"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[192,"Phylum"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[192,"Class"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[192,"Class"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[192,"Order"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[192,"Order"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[192,"Family"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[192,"Family"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[192,"Genus"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[192,"Genus"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[192,"Species"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[192,"Species"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[193,"Domain"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[193,"Domain"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[193,"Phylum"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[193,"Phylum"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[193,"Class"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[193,"Class"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[193,"Order"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[193,"Order"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[193,"Family"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[193,"Family"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[193,"Genus"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[193,"Genus"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[193,"Species"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[193,"Species"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[194,"Domain"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[194,"Domain"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[194,"Phylum"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[194,"Phylum"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[194,"Class"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[194,"Class"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[194,"Order"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[194,"Order"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[194,"Family"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[194,"Family"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[194,"Genus"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[194,"Genus"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[194,"Species"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[194,"Species"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[195,"Domain"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[195,"Domain"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[195,"Phylum"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[195,"Phylum"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[195,"Class"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[195,"Class"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[195,"Order"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[195,"Order"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[195,"Family"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[195,"Family"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[195,"Genus"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[195,"Genus"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[195,"Species"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[195,"Species"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[196,"Domain"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[196,"Domain"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[196,"Phylum"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[196,"Phylum"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[196,"Class"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[196,"Class"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[196,"Order"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[196,"Order"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[196,"Family"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[196,"Family"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[196,"Genus"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[196,"Genus"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[196,"Species"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[196,"Species"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[197,"Domain"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[197,"Domain"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[197,"Phylum"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[197,"Phylum"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[197,"Class"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[197,"Class"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[197,"Order"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[197,"Order"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[197,"Family"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[197,"Family"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[197,"Genus"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[197,"Genus"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[197,"Species"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[197,"Species"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[198,"Domain"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[198,"Domain"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[198,"Phylum"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[198,"Phylum"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[198,"Class"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[198,"Class"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[198,"Order"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[198,"Order"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[198,"Family"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[198,"Family"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[198,"Genus"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[198,"Genus"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[198,"Species"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[198,"Species"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[199,"Domain"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[199,"Domain"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[199,"Phylum"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[199,"Phylum"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[199,"Class"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[199,"Class"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[199,"Order"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[199,"Order"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[199,"Family"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[199,"Family"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[199,"Genus"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[199,"Genus"], "__")[[1]][2]
phyloseq::tax_table(ps.ng.tax_abund_rel)[199,"Species"] <- str_split(phyloseq::tax_table(ps.ng.tax_abund_rel)[199,"Species"], "__")[[1]][2]

```

# Taxonomic summary

## Bar plots in phylum level

```{r, fig.width=16, fig.height=8, echo=TRUE, warning=FALSE}
  #aes(color="Phylum", fill="Phylum") --> aes()
  #ggplot(data=data, aes(x=Sample, y=Abundance, fill=Phylum)) 
  my_colors <- c("darkblue", "darkgoldenrod1", "darkseagreen", "darkorchid", "darkolivegreen1", "lightskyblue", "darkgreen", "deeppink", "khaki2", "firebrick", "brown1", "darkorange1", "cyan1", "royalblue4", "darksalmon", "darkblue","royalblue4", "dodgerblue3", "steelblue1", "lightskyblue", "darkseagreen", "darkgoldenrod1", "darkseagreen", "darkorchid", "darkolivegreen1", "brown1", "darkorange1", "cyan1", "darkgrey")
  plot_bar(ps.ng.tax_abund_rel, fill="Phylum") + geom_bar(aes(), stat="identity", position="stack") +
  scale_fill_manual(values = my_colors) + theme(axis.text = element_text(size = 7, colour="black")) + theme(legend.position="bottom") + guides(fill=guide_legend(nrow=2))                                  #6 instead of theme.size
```
```{r, echo=FALSE, warning=FALSE}
  #png("abc.png")
  #knitr::include_graphics("./Phyloseq_files/figure-html/unnamed-chunk-7-1.png")
  #dev.off()
```

\pagebreak
Regroup together pre vs post stroke samples and normalize number of reads in each group using median sequencing depth.

```{r, echo=TRUE, warning=FALSE}
  ps.ng.tax_abund_rel_pre_post_stroke <- merge_samples(ps.ng.tax_abund_rel, "pre_post_stroke")
  #PENDING: The effect weighted twice by sum(x), is the same to the effect weighted once directly from absolute abundance?!
  ps.ng.tax_abund_rel_pre_post_stroke_ = transform_sample_counts(ps.ng.tax_abund_rel_pre_post_stroke, function(x) x / sum(x))
  #plot_bar(ps.ng.tax_abund_relSampleType_, fill = "Phylum") + geom_bar(aes(color=Phylum, fill=Phylum), stat="identity", position="stack")
  plot_bar(ps.ng.tax_abund_rel_pre_post_stroke_, fill="Phylum") + geom_bar(aes(), stat="identity", position="stack") +
  scale_fill_manual(values = my_colors) + theme(axis.text = element_text(size = 7, colour="black"))
```

```{r, echo=FALSE, warning=FALSE}

  #FITTING6: regulate the bar height if it has replicates: 11+16+10+10+5+6+6+6+11+15+14+8+10+8=136

  ps.ng.tax_abund_rel_weighted <- data.table::copy(ps.ng.tax_abund_rel)

  otu_table(ps.ng.tax_abund_rel_weighted)[,c("sample-A1")] <- otu_table(ps.ng.tax_abund_rel)[,c("sample-A1")]/11
  otu_table(ps.ng.tax_abund_rel_weighted)[,c("sample-A10")] <- otu_table(ps.ng.tax_abund_rel)[,c("sample-A10")]/11
  otu_table(ps.ng.tax_abund_rel_weighted)[,c("sample-A11")] <- otu_table(ps.ng.tax_abund_rel)[,c("sample-A11")]/11
  otu_table(ps.ng.tax_abund_rel_weighted)[,c("sample-A2")] <- otu_table(ps.ng.tax_abund_rel)[,c("sample-A2")]/11
  otu_table(ps.ng.tax_abund_rel_weighted)[,c("sample-A3")] <- otu_table(ps.ng.tax_abund_rel)[,c("sample-A3")]/11
  otu_table(ps.ng.tax_abund_rel_weighted)[,c("sample-A4")] <- otu_table(ps.ng.tax_abund_rel)[,c("sample-A4")]/11
  otu_table(ps.ng.tax_abund_rel_weighted)[,c("sample-A5")] <- otu_table(ps.ng.tax_abund_rel)[,c("sample-A5")]/11
  otu_table(ps.ng.tax_abund_rel_weighted)[,c("sample-A6")] <- otu_table(ps.ng.tax_abund_rel)[,c("sample-A6")]/11
  otu_table(ps.ng.tax_abund_rel_weighted)[,c("sample-A7")] <- otu_table(ps.ng.tax_abund_rel)[,c("sample-A7")]/11
  otu_table(ps.ng.tax_abund_rel_weighted)[,c("sample-A8")] <- otu_table(ps.ng.tax_abund_rel)[,c("sample-A8")]/11
  otu_table(ps.ng.tax_abund_rel_weighted)[,c("sample-A9")] <- otu_table(ps.ng.tax_abund_rel)[,c("sample-A9")]/11

  otu_table(ps.ng.tax_abund_rel_weighted)[,c("sample-B1")] <- otu_table(ps.ng.tax_abund_rel)[,c("sample-B1")]/16
  otu_table(ps.ng.tax_abund_rel_weighted)[,c("sample-B10")] <- otu_table(ps.ng.tax_abund_rel)[,c("sample-B10")]/16
  otu_table(ps.ng.tax_abund_rel_weighted)[,c("sample-B11")] <- otu_table(ps.ng.tax_abund_rel)[,c("sample-B11")]/16
  otu_table(ps.ng.tax_abund_rel_weighted)[,c("sample-B12")] <- otu_table(ps.ng.tax_abund_rel)[,c("sample-B12")]/16
  otu_table(ps.ng.tax_abund_rel_weighted)[,c("sample-B13")] <- otu_table(ps.ng.tax_abund_rel)[,c("sample-B13")]/16
  otu_table(ps.ng.tax_abund_rel_weighted)[,c("sample-B14")] <- otu_table(ps.ng.tax_abund_rel)[,c("sample-B14")]/16
  otu_table(ps.ng.tax_abund_rel_weighted)[,c("sample-B15")] <- otu_table(ps.ng.tax_abund_rel)[,c("sample-B15")]/16
  otu_table(ps.ng.tax_abund_rel_weighted)[,c("sample-B16")] <- otu_table(ps.ng.tax_abund_rel)[,c("sample-B16")]/16
  otu_table(ps.ng.tax_abund_rel_weighted)[,c("sample-B2")] <- otu_table(ps.ng.tax_abund_rel)[,c("sample-B2")]/16
  otu_table(ps.ng.tax_abund_rel_weighted)[,c("sample-B3")] <- otu_table(ps.ng.tax_abund_rel)[,c("sample-B3")]/16
  otu_table(ps.ng.tax_abund_rel_weighted)[,c("sample-B4")] <- otu_table(ps.ng.tax_abund_rel)[,c("sample-B4")]/16
  otu_table(ps.ng.tax_abund_rel_weighted)[,c("sample-B5")] <- otu_table(ps.ng.tax_abund_rel)[,c("sample-B5")]/16
  otu_table(ps.ng.tax_abund_rel_weighted)[,c("sample-B6")] <- otu_table(ps.ng.tax_abund_rel)[,c("sample-B6")]/16
  otu_table(ps.ng.tax_abund_rel_weighted)[,c("sample-B7")] <- otu_table(ps.ng.tax_abund_rel)[,c("sample-B7")]/16
  otu_table(ps.ng.tax_abund_rel_weighted)[,c("sample-B8")] <- otu_table(ps.ng.tax_abund_rel)[,c("sample-B8")]/16
  otu_table(ps.ng.tax_abund_rel_weighted)[,c("sample-B9")] <- otu_table(ps.ng.tax_abund_rel)[,c("sample-B9")]/16

  otu_table(ps.ng.tax_abund_rel_weighted)[,c("sample-C1")] <- otu_table(ps.ng.tax_abund_rel)[,c("sample-C1")]/10
  otu_table(ps.ng.tax_abund_rel_weighted)[,c("sample-C10")] <- otu_table(ps.ng.tax_abund_rel)[,c("sample-C10")]/10
  otu_table(ps.ng.tax_abund_rel_weighted)[,c("sample-C2")] <- otu_table(ps.ng.tax_abund_rel)[,c("sample-C2")]/10
  otu_table(ps.ng.tax_abund_rel_weighted)[,c("sample-C3")] <- otu_table(ps.ng.tax_abund_rel)[,c("sample-C3")]/10
  otu_table(ps.ng.tax_abund_rel_weighted)[,c("sample-C4")] <- otu_table(ps.ng.tax_abund_rel)[,c("sample-C4")]/10
  otu_table(ps.ng.tax_abund_rel_weighted)[,c("sample-C5")] <- otu_table(ps.ng.tax_abund_rel)[,c("sample-C5")]/10
  otu_table(ps.ng.tax_abund_rel_weighted)[,c("sample-C6")] <- otu_table(ps.ng.tax_abund_rel)[,c("sample-C6")]/10
  otu_table(ps.ng.tax_abund_rel_weighted)[,c("sample-C7")] <- otu_table(ps.ng.tax_abund_rel)[,c("sample-C7")]/10
  otu_table(ps.ng.tax_abund_rel_weighted)[,c("sample-C8")] <- otu_table(ps.ng.tax_abund_rel)[,c("sample-C8")]/10
  otu_table(ps.ng.tax_abund_rel_weighted)[,c("sample-C9")] <- otu_table(ps.ng.tax_abund_rel)[,c("sample-C9")]/10

  otu_table(ps.ng.tax_abund_rel_weighted)[,c("sample-E1")] <- otu_table(ps.ng.tax_abund_rel)[,c("sample-E1")]/10
  otu_table(ps.ng.tax_abund_rel_weighted)[,c("sample-E10")] <- otu_table(ps.ng.tax_abund_rel)[,c("sample-E10")]/10
  otu_table(ps.ng.tax_abund_rel_weighted)[,c("sample-E2")] <- otu_table(ps.ng.tax_abund_rel)[,c("sample-E2")]/10
  otu_table(ps.ng.tax_abund_rel_weighted)[,c("sample-E3")] <- otu_table(ps.ng.tax_abund_rel)[,c("sample-E3")]/10
  otu_table(ps.ng.tax_abund_rel_weighted)[,c("sample-E4")] <- otu_table(ps.ng.tax_abund_rel)[,c("sample-E4")]/10
  otu_table(ps.ng.tax_abund_rel_weighted)[,c("sample-E5")] <- otu_table(ps.ng.tax_abund_rel)[,c("sample-E5")]/10
  otu_table(ps.ng.tax_abund_rel_weighted)[,c("sample-E6")] <- otu_table(ps.ng.tax_abund_rel)[,c("sample-E6")]/10
  otu_table(ps.ng.tax_abund_rel_weighted)[,c("sample-E7")] <- otu_table(ps.ng.tax_abund_rel)[,c("sample-E7")]/10
  otu_table(ps.ng.tax_abund_rel_weighted)[,c("sample-E8")] <- otu_table(ps.ng.tax_abund_rel)[,c("sample-E8")]/10
  otu_table(ps.ng.tax_abund_rel_weighted)[,c("sample-E9")] <- otu_table(ps.ng.tax_abund_rel)[,c("sample-E9")]/10

  otu_table(ps.ng.tax_abund_rel_weighted)[,c("sample-F1")] <- otu_table(ps.ng.tax_abund_rel)[,c("sample-F1")]/5
  otu_table(ps.ng.tax_abund_rel_weighted)[,c("sample-F2")] <- otu_table(ps.ng.tax_abund_rel)[,c("sample-F2")]/5
  otu_table(ps.ng.tax_abund_rel_weighted)[,c("sample-F3")] <- otu_table(ps.ng.tax_abund_rel)[,c("sample-F3")]/5
  otu_table(ps.ng.tax_abund_rel_weighted)[,c("sample-F4")] <- otu_table(ps.ng.tax_abund_rel)[,c("sample-F4")]/5
  otu_table(ps.ng.tax_abund_rel_weighted)[,c("sample-F5")] <- otu_table(ps.ng.tax_abund_rel)[,c("sample-F5")]/5

  otu_table(ps.ng.tax_abund_rel_weighted)[,c("sample-G1")] <- otu_table(ps.ng.tax_abund_rel)[,c("sample-G1")]/6
  otu_table(ps.ng.tax_abund_rel_weighted)[,c("sample-G2")] <- otu_table(ps.ng.tax_abund_rel)[,c("sample-G2")]/6
  otu_table(ps.ng.tax_abund_rel_weighted)[,c("sample-G3")] <- otu_table(ps.ng.tax_abund_rel)[,c("sample-G3")]/6
  otu_table(ps.ng.tax_abund_rel_weighted)[,c("sample-G4")] <- otu_table(ps.ng.tax_abund_rel)[,c("sample-G4")]/6
  otu_table(ps.ng.tax_abund_rel_weighted)[,c("sample-G5")] <- otu_table(ps.ng.tax_abund_rel)[,c("sample-G5")]/6
  otu_table(ps.ng.tax_abund_rel_weighted)[,c("sample-G6")] <- otu_table(ps.ng.tax_abund_rel)[,c("sample-G6")]/6

  otu_table(ps.ng.tax_abund_rel_weighted)[,c("sample-H1")] <- otu_table(ps.ng.tax_abund_rel)[,c("sample-H1")]/6
  otu_table(ps.ng.tax_abund_rel_weighted)[,c("sample-H2")] <- otu_table(ps.ng.tax_abund_rel)[,c("sample-H2")]/6
  otu_table(ps.ng.tax_abund_rel_weighted)[,c("sample-H3")] <- otu_table(ps.ng.tax_abund_rel)[,c("sample-H3")]/6
  otu_table(ps.ng.tax_abund_rel_weighted)[,c("sample-H4")] <- otu_table(ps.ng.tax_abund_rel)[,c("sample-H4")]/6
  otu_table(ps.ng.tax_abund_rel_weighted)[,c("sample-H5")] <- otu_table(ps.ng.tax_abund_rel)[,c("sample-H5")]/6
  otu_table(ps.ng.tax_abund_rel_weighted)[,c("sample-H6")] <- otu_table(ps.ng.tax_abund_rel)[,c("sample-H6")]/6

  otu_table(ps.ng.tax_abund_rel_weighted)[,c("sample-I1")] <- otu_table(ps.ng.tax_abund_rel)[,c("sample-I1")]/6
  otu_table(ps.ng.tax_abund_rel_weighted)[,c("sample-I2")] <- otu_table(ps.ng.tax_abund_rel)[,c("sample-I2")]/6
  otu_table(ps.ng.tax_abund_rel_weighted)[,c("sample-I3")] <- otu_table(ps.ng.tax_abund_rel)[,c("sample-I3")]/6
  otu_table(ps.ng.tax_abund_rel_weighted)[,c("sample-I4")] <- otu_table(ps.ng.tax_abund_rel)[,c("sample-I4")]/6
  otu_table(ps.ng.tax_abund_rel_weighted)[,c("sample-I5")] <- otu_table(ps.ng.tax_abund_rel)[,c("sample-I5")]/6
  otu_table(ps.ng.tax_abund_rel_weighted)[,c("sample-I6")] <- otu_table(ps.ng.tax_abund_rel)[,c("sample-I6")]/6

  #RESIZED:
  otu_table(ps.ng.tax_abund_rel_weighted)[,c("sample-J1")] <- otu_table(ps.ng.tax_abund_rel)[,c("sample-J1")]/6
  otu_table(ps.ng.tax_abund_rel_weighted)[,c("sample-J2")] <- otu_table(ps.ng.tax_abund_rel)[,c("sample-J2")]/6
  otu_table(ps.ng.tax_abund_rel_weighted)[,c("sample-J3")] <- otu_table(ps.ng.tax_abund_rel)[,c("sample-J3")]/6
  otu_table(ps.ng.tax_abund_rel_weighted)[,c("sample-J4")] <- otu_table(ps.ng.tax_abund_rel)[,c("sample-J4")]/6
  #otu_table(ps.ng.tax_abund_rel_weighted)[,c("sample-J5")] <- otu_table(ps.ng.tax_abund_rel)[,c("sample-J5")]/11
  #otu_table(ps.ng.tax_abund_rel_weighted)[,c("sample-J6")] <- otu_table(ps.ng.tax_abund_rel)[,c("sample-J6")]/11
  #otu_table(ps.ng.tax_abund_rel_weighted)[,c("sample-J7")] <- otu_table(ps.ng.tax_abund_rel)[,c("sample-J7")]/11
  #otu_table(ps.ng.tax_abund_rel_weighted)[,c("sample-J8")] <- otu_table(ps.ng.tax_abund_rel)[,c("sample-J8")]/11
  #otu_table(ps.ng.tax_abund_rel_weighted)[,c("sample-J9")] <- otu_table(ps.ng.tax_abund_rel)[,c("sample-J9")]/11
  otu_table(ps.ng.tax_abund_rel_weighted)[,c("sample-J10")] <- otu_table(ps.ng.tax_abund_rel)[,c("sample-J10")]/6
  otu_table(ps.ng.tax_abund_rel_weighted)[,c("sample-J11")] <- otu_table(ps.ng.tax_abund_rel)[,c("sample-J11")]/6

  #RESIZED:
  #otu_table(ps.ng.tax_abund_rel_weighted)[,c("sample-K1")] <- otu_table(ps.ng.tax_abund_rel)[,c("sample-K1")]/15
  #otu_table(ps.ng.tax_abund_rel_weighted)[,c("sample-K2")] <- otu_table(ps.ng.tax_abund_rel)[,c("sample-K2")]/15
  #otu_table(ps.ng.tax_abund_rel_weighted)[,c("sample-K3")] <- otu_table(ps.ng.tax_abund_rel)[,c("sample-K3")]/15
  #otu_table(ps.ng.tax_abund_rel_weighted)[,c("sample-K4")] <- otu_table(ps.ng.tax_abund_rel)[,c("sample-K4")]/15
  #otu_table(ps.ng.tax_abund_rel_weighted)[,c("sample-K5")] <- otu_table(ps.ng.tax_abund_rel)[,c("sample-K5")]/15
  #otu_table(ps.ng.tax_abund_rel_weighted)[,c("sample-K6")] <- otu_table(ps.ng.tax_abund_rel)[,c("sample-K6")]/15
  otu_table(ps.ng.tax_abund_rel_weighted)[,c("sample-K7")] <- otu_table(ps.ng.tax_abund_rel)[,c("sample-K7")]/4
  otu_table(ps.ng.tax_abund_rel_weighted)[,c("sample-K8")] <- otu_table(ps.ng.tax_abund_rel)[,c("sample-K8")]/4
  otu_table(ps.ng.tax_abund_rel_weighted)[,c("sample-K9")] <- otu_table(ps.ng.tax_abund_rel)[,c("sample-K9")]/4
  otu_table(ps.ng.tax_abund_rel_weighted)[,c("sample-K10")] <- otu_table(ps.ng.tax_abund_rel)[,c("sample-K10")]/4
  #otu_table(ps.ng.tax_abund_rel_weighted)[,c("sample-K11")] <- otu_table(ps.ng.tax_abund_rel)[,c("sample-K11")]/15
  #otu_table(ps.ng.tax_abund_rel_weighted)[,c("sample-K12")] <- otu_table(ps.ng.tax_abund_rel)[,c("sample-K12")]/15
  #otu_table(ps.ng.tax_abund_rel_weighted)[,c("sample-K13")] <- otu_table(ps.ng.tax_abund_rel)[,c("sample-K13")]/15
  #otu_table(ps.ng.tax_abund_rel_weighted)[,c("sample-K14")] <- otu_table(ps.ng.tax_abund_rel)[,c("sample-K14")]/15
  #otu_table(ps.ng.tax_abund_rel_weighted)[,c("sample-K15")] <- otu_table(ps.ng.tax_abund_rel)[,c("sample-K15")]/15

  #RESIZED:
  otu_table(ps.ng.tax_abund_rel_weighted)[,c("sample-L1")] <- otu_table(ps.ng.tax_abund_rel)[,c("sample-L1")]/4
  #otu_table(ps.ng.tax_abund_rel_weighted)[,c("sample-L2")] <- otu_table(ps.ng.tax_abund_rel)[,c("sample-L2")]/14
  #otu_table(ps.ng.tax_abund_rel_weighted)[,c("sample-L3")] <- otu_table(ps.ng.tax_abund_rel)[,c("sample-L3")]/14
  #otu_table(ps.ng.tax_abund_rel_weighted)[,c("sample-L4")] <- otu_table(ps.ng.tax_abund_rel)[,c("sample-L4")]/14
  #otu_table(ps.ng.tax_abund_rel_weighted)[,c("sample-L5")] <- otu_table(ps.ng.tax_abund_rel)[,c("sample-L5")]/14
  #otu_table(ps.ng.tax_abund_rel_weighted)[,c("sample-L6")] <- otu_table(ps.ng.tax_abund_rel)[,c("sample-L6")]/14
  otu_table(ps.ng.tax_abund_rel_weighted)[,c("sample-L7")] <- otu_table(ps.ng.tax_abund_rel)[,c("sample-L7")]/4
  otu_table(ps.ng.tax_abund_rel_weighted)[,c("sample-L8")] <- otu_table(ps.ng.tax_abund_rel)[,c("sample-L8")]/4
  otu_table(ps.ng.tax_abund_rel_weighted)[,c("sample-L10")] <- otu_table(ps.ng.tax_abund_rel)[,c("sample-L10")]/4
  #otu_table(ps.ng.tax_abund_rel_weighted)[,c("sample-L11")] <- otu_table(ps.ng.tax_abund_rel)[,c("sample-L11")]/14
  #otu_table(ps.ng.tax_abund_rel_weighted)[,c("sample-L12")] <- otu_table(ps.ng.tax_abund_rel)[,c("sample-L12")]/14
  #otu_table(ps.ng.tax_abund_rel_weighted)[,c("sample-L13")] <- otu_table(ps.ng.tax_abund_rel)[,c("sample-L13")]/14
  #otu_table(ps.ng.tax_abund_rel_weighted)[,c("sample-L14")] <- otu_table(ps.ng.tax_abund_rel)[,c("sample-L14")]/14
  #otu_table(ps.ng.tax_abund_rel_weighted)[,c("sample-L15")] <- otu_table(ps.ng.tax_abund_rel)[,c("sample-L15")]/14

  otu_table(ps.ng.tax_abund_rel_weighted)[,c("sample-M1")] <- otu_table(ps.ng.tax_abund_rel)[,c("sample-M1")]/8
  otu_table(ps.ng.tax_abund_rel_weighted)[,c("sample-M2")] <- otu_table(ps.ng.tax_abund_rel)[,c("sample-M2")]/8
  otu_table(ps.ng.tax_abund_rel_weighted)[,c("sample-M3")] <- otu_table(ps.ng.tax_abund_rel)[,c("sample-M3")]/8
  otu_table(ps.ng.tax_abund_rel_weighted)[,c("sample-M4")] <- otu_table(ps.ng.tax_abund_rel)[,c("sample-M4")]/8
  otu_table(ps.ng.tax_abund_rel_weighted)[,c("sample-M5")] <- otu_table(ps.ng.tax_abund_rel)[,c("sample-M5")]/8
  otu_table(ps.ng.tax_abund_rel_weighted)[,c("sample-M6")] <- otu_table(ps.ng.tax_abund_rel)[,c("sample-M6")]/8
  otu_table(ps.ng.tax_abund_rel_weighted)[,c("sample-M7")] <- otu_table(ps.ng.tax_abund_rel)[,c("sample-M7")]/8
  otu_table(ps.ng.tax_abund_rel_weighted)[,c("sample-M8")] <- otu_table(ps.ng.tax_abund_rel)[,c("sample-M8")]/8

  otu_table(ps.ng.tax_abund_rel_weighted)[,c("sample-N1")] <- otu_table(ps.ng.tax_abund_rel)[,c("sample-N1")]/10
  otu_table(ps.ng.tax_abund_rel_weighted)[,c("sample-N10")] <- otu_table(ps.ng.tax_abund_rel)[,c("sample-N10")]/10
  otu_table(ps.ng.tax_abund_rel_weighted)[,c("sample-N2")] <- otu_table(ps.ng.tax_abund_rel)[,c("sample-N2")]/10
  otu_table(ps.ng.tax_abund_rel_weighted)[,c("sample-N3")] <- otu_table(ps.ng.tax_abund_rel)[,c("sample-N3")]/10
  otu_table(ps.ng.tax_abund_rel_weighted)[,c("sample-N4")] <- otu_table(ps.ng.tax_abund_rel)[,c("sample-N4")]/10
  otu_table(ps.ng.tax_abund_rel_weighted)[,c("sample-N5")] <- otu_table(ps.ng.tax_abund_rel)[,c("sample-N5")]/10
  otu_table(ps.ng.tax_abund_rel_weighted)[,c("sample-N6")] <- otu_table(ps.ng.tax_abund_rel)[,c("sample-N6")]/10
  otu_table(ps.ng.tax_abund_rel_weighted)[,c("sample-N7")] <- otu_table(ps.ng.tax_abund_rel)[,c("sample-N7")]/10
  otu_table(ps.ng.tax_abund_rel_weighted)[,c("sample-N8")] <- otu_table(ps.ng.tax_abund_rel)[,c("sample-N8")]/10
  otu_table(ps.ng.tax_abund_rel_weighted)[,c("sample-N9")] <- otu_table(ps.ng.tax_abund_rel)[,c("sample-N9")]/10

  otu_table(ps.ng.tax_abund_rel_weighted)[,c("sample-O1")] <- otu_table(ps.ng.tax_abund_rel)[,c("sample-O1")]/8
  otu_table(ps.ng.tax_abund_rel_weighted)[,c("sample-O2")] <- otu_table(ps.ng.tax_abund_rel)[,c("sample-O2")]/8
  otu_table(ps.ng.tax_abund_rel_weighted)[,c("sample-O3")] <- otu_table(ps.ng.tax_abund_rel)[,c("sample-O3")]/8
  otu_table(ps.ng.tax_abund_rel_weighted)[,c("sample-O4")] <- otu_table(ps.ng.tax_abund_rel)[,c("sample-O4")]/8
  otu_table(ps.ng.tax_abund_rel_weighted)[,c("sample-O5")] <- otu_table(ps.ng.tax_abund_rel)[,c("sample-O5")]/8
  otu_table(ps.ng.tax_abund_rel_weighted)[,c("sample-O6")] <- otu_table(ps.ng.tax_abund_rel)[,c("sample-O6")]/8
  otu_table(ps.ng.tax_abund_rel_weighted)[,c("sample-O7")] <- otu_table(ps.ng.tax_abund_rel)[,c("sample-O7")]/8
  otu_table(ps.ng.tax_abund_rel_weighted)[,c("sample-O8")] <- otu_table(ps.ng.tax_abund_rel)[,c("sample-O8")]/8

  sum(otu_table(ps.ng.tax_abund_rel_weighted)[,c("sample-O1")])
  #[1] 0.125
  sum(otu_table(ps.ng.tax_abund_rel)[,c("sample-O1")])
  #[1] 1
```

\pagebreak
Use color according to phylum. Do separate panels Stroke and Sex_age.

```{r, echo=FALSE, warning=FALSE}
  #plot_bar(ps.ng.tax_abund_relswab_, x="Phylum", fill = "Phylum", facet_grid = Patient~RoundDay) + geom_bar(aes(color=Phylum, fill=Phylum), stat="identity", position="stack") + theme(axis.text = element_text(size = theme.size, colour="black"))
  plot_bar(ps.ng.tax_abund_rel_weighted, x="Phylum", fill="Phylum", facet_grid = pre_post_stroke~Sex_age) + geom_bar(aes(), stat="identity", position="stack") +
  scale_fill_manual(values = my_colors) + theme(axis.text = element_text(size = 7, colour="black"), axis.text.x=element_blank(), axis.ticks=element_blank()) + theme(legend.position="bottom") + guides(fill=guide_legend(nrow=2))
```

## Bar plots in class level

```{r, fig.width=16, fig.height=8, echo=TRUE, warning=FALSE}
  my_colors <- c("darkblue", "darkgoldenrod1", "darkseagreen", "darkorchid", "darkolivegreen1", "lightskyblue", "darkgreen", "deeppink", "khaki2", "firebrick", "brown1", "darkorange1", "cyan1", "royalblue4", "darksalmon", "darkblue","royalblue4", "dodgerblue3", "steelblue1", "lightskyblue", "darkseagreen", "darkgoldenrod1", "darkseagreen", "darkorchid", "darkolivegreen1", "brown1", "darkorange1", "cyan1", "darkgrey")
  plot_bar(ps.ng.tax_abund_rel, fill="Class") + geom_bar(aes(), stat="identity", position="stack") +
  scale_fill_manual(values = my_colors) + theme(axis.text = element_text(size = 7, colour="black")) + theme(legend.position="bottom") + guides(fill=guide_legend(nrow=3))
```

Regroup together pre vs post stroke samples and normalize number of reads in each group using median sequencing depth.
```{r, echo=TRUE, warning=FALSE}
  plot_bar(ps.ng.tax_abund_rel_pre_post_stroke_, fill="Class") + geom_bar(aes(), stat="identity", position="stack") +
  scale_fill_manual(values = my_colors) + theme(axis.text = element_text(size = 7, colour="black"))
```
\pagebreak

Use color according to class. Do separate panels Stroke and Sex_age.
```{r, echo=TRUE, warning=FALSE}
  #NOTE: MANALLY RUNNING the CODE by COPYING the CODE under R-console for the 6 blocks, then show them with knitr::include_graphics
  sum(otu_table(ps.ng.tax_abund_rel_weighted)[,c("sample-O1")])
  plot_bar(ps.ng.tax_abund_rel_weighted, x="Class", fill="Class", facet_grid = pre_post_stroke~Sex_age) + geom_bar(aes(), stat="identity", position="stack") +
  scale_fill_manual(values = my_colors) + theme(axis.text = element_text(size = 7, colour="black"), axis.text.x=element_blank(), axis.ticks=element_blank()) + theme(legend.position="bottom") + guides(fill=guide_legend(nrow=3))
```

```{r, echo=FALSE, warning=FALSE}
        ps_df <- phyloseq::psmelt(ps.ng.tax_abund_rel_weighted)
        # 准备数据
        ps_summary <- ps_df %>%
          # 1. 只保留这三个 condition
          filter(pre_post_stroke %in% c("pre.antibiotics", "baseline", "pre.stroke")) %>%

          # 2. 聚合
          group_by(Sex_age, pre_post_stroke, Class) %>%
          summarise(Abundance = sum(Abundance), .groups = "drop") %>%

          # 3. 设置 factor 顺序和重命名
          mutate(
            # 替换 Sex_age 名称
            Sex_age = recode(Sex_age,
                            "female.aged" = "Female (Aged)",
                            "male.aged"   = "Male (Aged)",
                            "male.young"  = "Male (Young)"),
            Sex_age = factor(Sex_age, levels = c("Male (Aged)", "Female (Aged)", "Male (Young)")),

            # 替换 condition 名称
            pre_post_stroke = recode(pre_post_stroke,
                                    "pre.antibiotics" = "Before Antibiotics",
                                    "baseline"        = "Baseline",
                                    "pre.stroke"      = "Before Stroke"),
            pre_post_stroke = factor(pre_post_stroke,
                                    levels = c("Before Antibiotics", "Baseline", "Before Stroke")),

            Class = factor(Class)
          )

        # 确保颜色数匹配
        class_levels <- levels(ps_summary$Class)
        color_map <- setNames(my_colors[seq_along(class_levels)], class_levels)

        # 绘图
        p <- ggplot(ps_summary, aes(x = Sex_age, y = Abundance, fill = Class)) +
          geom_bar(stat = "identity", position = "stack", width = 0.55) +  # 更窄的柱子
          facet_grid(pre_post_stroke ~ ., scales = "free_x", drop = TRUE) +
          scale_fill_manual(values = color_map, drop = FALSE) +
          theme_minimal(base_size = 11) +
          theme(
            axis.text.x = element_text(angle = 45, hjust = 1, size = 9, colour = "black"),
            axis.title = element_text(size = 11),
            strip.text = element_text(size = 10, face = "bold"),
            legend.position = "right",               # ✅ legend 放右边
            legend.title = element_blank(),
            panel.grid.major.x = element_blank(),
            panel.grid.minor = element_blank()
          ) +
          guides(fill = guide_legend(ncol = 1)) +     # 竖排图例
          labs(
            x = "Sex and Age Group",
            y = "Relative Abundance",
            title = "Taxonomic Class Composition by Group and Condition"
          )

        # 保存为 PNG 文件
        ggsave(
          filename = "./figures/Separate_Stroke_and_SexAge_on_Class.png",
          plot = p,
          width = 8,
          height = 6,
          dpi = 200
        )

        knitr::include_graphics("./figures/Separate_Stroke_and_SexAge_on_Class.png")
```

```{r, echo=FALSE, warning=FALSE}
        ps_df <- phyloseq::psmelt(ps.ng.tax_abund_rel_weighted)
        # 数据处理,只保留 "Before Stroke"
        ps_summary <- ps_df %>%
          filter(pre_post_stroke == "pre.stroke") %>%
          group_by(Sex_age, Class) %>%
          summarise(Abundance = sum(Abundance), .groups = "drop") %>%
          mutate(
            Sex_age = recode(Sex_age,
              "female.aged" = "Female (Aged)",
              "male.aged" = "Male (Aged)",
              "male.young" = "Male (Young)"
            ),
            Sex_age = factor(Sex_age, levels = c("Male (Aged)", "Female (Aged)", "Male (Young)")),
            Class = factor(Class)
          )

        # 映射颜色
        class_levels <- levels(ps_summary$Class)
        color_map <- setNames(my_colors[seq_along(class_levels)], class_levels)

        # 绘图
        p <- ggplot(ps_summary, aes(x = Sex_age, y = Abundance, fill = Class)) +
          geom_bar(stat = "identity", position = "stack", width = 0.55) +
          scale_fill_manual(values = color_map, drop = FALSE) +
          theme_minimal(base_size = 11) +
          theme(
            axis.text.x = element_text(angle = 45, hjust = 1, size = 9, colour = "black"),
            axis.title = element_text(size = 11),
            legend.position = "right",
            legend.title = element_blank(),
            panel.grid.major.x = element_blank(),
            panel.grid.minor = element_blank()
          ) +
          labs(
            x = "Sex and Age Group",
            y = "Relative Abundance",
            title = "Class Composition - Before Stroke"
          ) +
          guides(fill = guide_legend(ncol = 2))

        # 保存图像
        ggsave(
          filename = "./figures/Before_Stroke_Class_Composition.png",
          plot = p,
          width = 8,
          height = 5,
          dpi = 200
        )

        # 插入图像到报告
        knitr::include_graphics("./figures/Before_Stroke_Class_Composition.png")
```

## Bar plots in order level

```{r, fig.width=16, fig.height=8, echo=TRUE, warning=FALSE}
  my_colors <- c("darkblue", "darkgoldenrod1", "darkseagreen", "darkorchid", "darkolivegreen1", "lightskyblue", "darkgreen", "deeppink", "khaki2", "firebrick", "brown1", "darkorange1", "cyan1", "royalblue4", "darksalmon", "darkblue","royalblue4", "dodgerblue3", "steelblue1", "lightskyblue", "darkseagreen", "darkgoldenrod1", "darkseagreen", "darkorchid", "darkolivegreen1", "brown1", "darkorange1", "cyan1", "darkgrey")
  plot_bar(ps.ng.tax_abund_rel, fill="Order") + geom_bar(aes(), stat="identity", position="stack") +
  scale_fill_manual(values = my_colors) + theme(axis.text = element_text(size = 7, colour="black")) + theme(legend.position="bottom") + guides(fill=guide_legend(nrow=4))
```

Regroup together pre vs post stroke and normalize number of reads in each group using median sequencing depth.
```{r, echo=TRUE, warning=FALSE}
  plot_bar(ps.ng.tax_abund_rel_pre_post_stroke_, fill="Order") + geom_bar(aes(), stat="identity", position="stack") +
  scale_fill_manual(values = my_colors) + theme(axis.text = element_text(size = 7, colour="black"))
```
\pagebreak

Use color according to order. Do separate panels Stroke and Sex_age.
```{r, echo=FALSE, warning=FALSE}

  #FITTING7: regulate the bar height if it has replicates
  sum(otu_table(ps.ng.tax_abund_rel_weighted)[,c("sample-O1")])
  plot_bar(ps.ng.tax_abund_rel_weighted, x="Order", fill="Order", facet_grid = pre_post_stroke~Sex_age) + geom_bar(aes(), stat="identity", position="stack") +
  scale_fill_manual(values = my_colors) + theme(axis.text = element_text(size = 7, colour="black"), axis.text.x=element_blank(), axis.ticks=element_blank()) + theme(legend.position="bottom") + guides(fill=guide_legend(nrow=4))
```

```{r, echo=FALSE, warning=FALSE}
        ps_df <- phyloseq::psmelt(ps.ng.tax_abund_rel_weighted)
        # 准备数据
        ps_summary <- ps_df %>%
          # 1. 只保留这三个 condition
          filter(pre_post_stroke %in% c("pre.antibiotics", "baseline", "pre.stroke")) %>%

          # 2. 聚合
          group_by(Sex_age, pre_post_stroke, Order) %>%
          summarise(Abundance = sum(Abundance), .groups = "drop") %>%

          # 3. 设置 factor 顺序和重命名
          mutate(
            # 替换 Sex_age 名称
            Sex_age = recode(Sex_age,
                            "female.aged" = "Female (Aged)",
                            "male.aged"   = "Male (Aged)",
                            "male.young"  = "Male (Young)"),
            Sex_age = factor(Sex_age, levels = c("Male (Aged)", "Female (Aged)", "Male (Young)")),

            # 替换 condition 名称
            pre_post_stroke = recode(pre_post_stroke,
                                    "pre.antibiotics" = "Before Antibiotics",
                                    "baseline"        = "Baseline",
                                    "pre.stroke"      = "Before Stroke"),
            pre_post_stroke = factor(pre_post_stroke,
                                    levels = c("Before Antibiotics", "Baseline", "Before Stroke")),

            Order = factor(Order)
          )

        # 确保颜色数匹配
        class_levels <- levels(ps_summary$Order)
        color_map <- setNames(my_colors[seq_along(class_levels)], class_levels)

        # 绘图
        p <- ggplot(ps_summary, aes(x = Sex_age, y = Abundance, fill = Order)) +
          geom_bar(stat = "identity", position = "stack", width = 0.55) +  # 更窄的柱子
          facet_grid(pre_post_stroke ~ ., scales = "free_x", drop = TRUE) +
          scale_fill_manual(values = color_map, drop = FALSE) +
          theme_minimal(base_size = 11) +
          theme(
            axis.text.x = element_text(angle = 45, hjust = 1, size = 9, colour = "black"),
            axis.title = element_text(size = 11),
            strip.text = element_text(size = 10, face = "bold"),
            legend.position = "right",               # ✅ legend 放右边
            legend.title = element_blank(),
            panel.grid.major.x = element_blank(),
            panel.grid.minor = element_blank()
          ) +
          guides(fill = guide_legend(ncol = 1)) +     # 竖排图例
          labs(
            x = "Sex and Age Group",
            y = "Relative Abundance",
            title = "Taxonomic Order Composition by Group and Condition"
          )

        # 保存为 PNG 文件
        ggsave(
          filename = "./figures/Separate_Stroke_and_SexAge_on_Order.png",
          plot = p,
          width = 8,
          height = 6,
          dpi = 200
        )

        knitr::include_graphics("./figures/Separate_Stroke_and_SexAge_on_Order.png")
```

```{r, echo=FALSE, warning=FALSE}
        ps_df <- phyloseq::psmelt(ps.ng.tax_abund_rel_weighted)
        # 数据处理,只保留 "Before Stroke"
        ps_summary <- ps_df %>%
          filter(pre_post_stroke == "pre.stroke") %>%
          group_by(Sex_age, Order) %>%
          summarise(Abundance = sum(Abundance), .groups = "drop") %>%
          mutate(
            Sex_age = recode(Sex_age,
              "female.aged" = "Female (Aged)",
              "male.aged" = "Male (Aged)",
              "male.young" = "Male (Young)"
            ),
            Sex_age = factor(Sex_age, levels = c("Male (Aged)", "Female (Aged)", "Male (Young)")),
            Order = factor(Order)
          )

        # 映射颜色
        class_levels <- levels(ps_summary$Order)
        color_map <- setNames(my_colors[seq_along(class_levels)], class_levels)

        # 绘图
        p <- ggplot(ps_summary, aes(x = Sex_age, y = Abundance, fill = Order)) +
          geom_bar(stat = "identity", position = "stack", width = 0.55) +
          scale_fill_manual(values = color_map, drop = FALSE) +
          theme_minimal(base_size = 11) +
          theme(
            axis.text.x = element_text(angle = 45, hjust = 1, size = 9, colour = "black"),
            axis.title = element_text(size = 11),
            legend.position = "right",
            legend.title = element_blank(),
            panel.grid.major.x = element_blank(),
            panel.grid.minor = element_blank()
          ) +
          labs(
            x = "Sex and Age Group",
            y = "Relative Abundance",
            title = "Order Composition - Before Stroke"
          ) +
          guides(fill = guide_legend(ncol = 2))

        # 保存图像
        ggsave(
          filename = "./figures/Before_Stroke_Order_Composition.png",
          plot = p,
          width = 8,
          height = 5,
          dpi = 200
        )

        # 插入图像到报告
        knitr::include_graphics("./figures/Before_Stroke_Order_Composition.png")
```

## Bar plots in family level

```{r, fig.width=16, fig.height=8, echo=TRUE, warning=FALSE}
  my_colors <- c(
          "#FF0000", "#000000", "#0000FF", "#C0C0C0", "#FFFFFF", "#FFFF00", "#00FFFF", "#FFA500", "#00FF00", "#808080", "#FF00FF", "#800080", "#FDD017", "#0000A0", "#3BB9FF", "#008000", "#800000", "#ADD8E6", "#F778A1", "#800517", "#736F6E", "#F52887", "#C11B17", "#5CB3FF", "#A52A2A", "#FF8040", "#2B60DE", "#736AFF", "#1589FF", "#98AFC7", "#8D38C9", "#307D7E", "#F6358A", "#151B54", "#6D7B8D", "#FDEEF4", "#FF0080", "#F88017", "#2554C7", "#FFF8C6", "#D4A017", "#306EFF", "#151B8D", "#9E7BFF", "#EAC117", "#E0FFFF", "#15317E", "#6C2DC7", "#FBB917", "#FCDFFF", "#15317E", "#254117", "#FAAFBE", "#357EC7"
        )
  plot_bar(ps.ng.tax_abund_rel, fill="Family") + geom_bar(aes(), stat="identity", position="stack") +
  scale_fill_manual(values = my_colors) + theme(axis.text = element_text(size = 7, colour="black")) + theme(legend.position="bottom") + guides(fill=guide_legend(nrow=8))
```

Regroup together pre vs post stroke samples and normalize number of reads in each group using median sequencing depth.
```{r, echo=TRUE, warning=FALSE}
  plot_bar(ps.ng.tax_abund_rel_pre_post_stroke_, fill="Family") + geom_bar(aes(), stat="identity", position="stack") +
  scale_fill_manual(values = my_colors) + theme(axis.text = element_text(size = 7, colour="black"))
```
\pagebreak

Use color according to family. Do separate panels Stroke and Sex_age.
```{r, echo=TRUE, warning=FALSE}
  sum(otu_table(ps.ng.tax_abund_rel_weighted)[,c("sample-O1")])
  plot_bar(ps.ng.tax_abund_rel_weighted, x="Family", fill="Family", facet_grid = pre_post_stroke~Sex_age) + geom_bar(aes(), stat="identity", position="stack") +
  scale_fill_manual(values = my_colors) + theme(axis.text = element_text(size = 7, colour="black"), axis.text.x=element_blank(), axis.ticks=element_blank()) + theme(legend.position="bottom") + guides(fill=guide_legend(nrow=8))
```

```{r, echo=FALSE, warning=FALSE}
        ps_df <- phyloseq::psmelt(ps.ng.tax_abund_rel_weighted)
        # 准备数据
        ps_summary <- ps_df %>%
          # 1. 只保留这三个 condition
          filter(pre_post_stroke %in% c("pre.antibiotics", "baseline", "pre.stroke")) %>%

          # 2. 聚合
          group_by(Sex_age, pre_post_stroke, Family) %>%
          summarise(Abundance = sum(Abundance), .groups = "drop") %>%

          # 3. 设置 factor 顺序和重命名
          mutate(
            # 替换 Sex_age 名称
            Sex_age = recode(Sex_age,
                            "female.aged" = "Female (Aged)",
                            "male.aged"   = "Male (Aged)",
                            "male.young"  = "Male (Young)"),
            Sex_age = factor(Sex_age, levels = c("Male (Aged)", "Female (Aged)", "Male (Young)")),

            # 替换 condition 名称
            pre_post_stroke = recode(pre_post_stroke,
                                    "pre.antibiotics" = "Before Antibiotics",
                                    "baseline"        = "Baseline",
                                    "pre.stroke"      = "Before Stroke"),
            pre_post_stroke = factor(pre_post_stroke,
                                    levels = c("Before Antibiotics", "Baseline", "Before Stroke")),

            Family = factor(Family)
          )

        # 确保颜色数匹配
        class_levels <- levels(ps_summary$Family)
        color_map <- setNames(my_colors[seq_along(class_levels)], class_levels)

        # 绘图
        p <- ggplot(ps_summary, aes(x = Sex_age, y = Abundance, fill = Family)) +
          geom_bar(stat = "identity", position = "stack", width = 0.55) +  # 更窄的柱子
          facet_grid(pre_post_stroke ~ ., scales = "free_x", drop = TRUE) +
          scale_fill_manual(values = color_map, drop = FALSE) +
          theme_minimal(base_size = 11) +
          theme(
            axis.text.x = element_text(angle = 45, hjust = 1, size = 9, colour = "black"),
            axis.title = element_text(size = 11),
            strip.text = element_text(size = 10, face = "bold"),
            legend.position = "right",               # ✅ legend 放右边
            legend.title = element_blank(),
            panel.grid.major.x = element_blank(),
            panel.grid.minor = element_blank()
          ) +
          guides(fill = guide_legend(ncol = 2)) +     # 竖排图例
          labs(
            x = "Sex and Age Group",
            y = "Relative Abundance",
            title = "Taxonomic Family Composition by Group and Condition"
          )

        # 保存为 PNG 文件
        ggsave(
          filename = "./figures/Separate_Stroke_and_SexAge_on_Family.png",
          plot = p,
          width = 9,
          height = 6,
          dpi = 200
        )

        knitr::include_graphics("./figures/Separate_Stroke_and_SexAge_on_Family.png")
```

```{r, echo=FALSE, warning=FALSE}
        ps_df <- phyloseq::psmelt(ps.ng.tax_abund_rel_weighted)
        # 数据处理,只保留 "Before Stroke"
        ps_summary <- ps_df %>%
          filter(pre_post_stroke == "pre.stroke") %>%
          group_by(Sex_age, Family) %>%
          summarise(Abundance = sum(Abundance), .groups = "drop") %>%
          mutate(
            Sex_age = recode(Sex_age,
              "female.aged" = "Female (Aged)",
              "male.aged" = "Male (Aged)",
              "male.young" = "Male (Young)"
            ),
            Sex_age = factor(Sex_age, levels = c("Male (Aged)", "Female (Aged)", "Male (Young)")),
            Family = factor(Family)
          )

        # 映射颜色
        class_levels <- levels(ps_summary$Family)
        color_map <- setNames(my_colors[seq_along(class_levels)], class_levels)

        # 绘图
        p <- ggplot(ps_summary, aes(x = Sex_age, y = Abundance, fill = Family)) +
          geom_bar(stat = "identity", position = "stack", width = 0.55) +
          scale_fill_manual(values = color_map, drop = FALSE) +
          theme_minimal(base_size = 11) +
          theme(
            axis.text.x = element_text(angle = 45, hjust = 1, size = 9, colour = "black"),
            axis.title = element_text(size = 11),
            legend.position = "right",
            legend.title = element_blank(),
            panel.grid.major.x = element_blank(),
            panel.grid.minor = element_blank()
          ) +
          labs(
            x = "Sex and Age Group",
            y = "Relative Abundance",
            title = "Family Composition - Before Stroke"
          ) +
          guides(fill = guide_legend(ncol = 2))

        # 保存图像
        ggsave(
          filename = "./figures/Before_Stroke_Family_Composition.png",
          plot = p,
          width = 8,
          height = 5,
          dpi = 200
        )

        # 插入图像到报告
        knitr::include_graphics("./figures/Before_Stroke_Family_Composition.png")
```

\pagebreak

# Alpha diversity
Plot Chao1 richness estimator, Observed OTUs, Shannon index, and Phylogenetic diversity. 
Regroup together samples from the same group.
```{r, echo=FALSE, warning=FALSE}
# using rarefied data
#FITTING2: CONSOLE: 
#gunzip table_even4753.biom.gz
#alpha_diversity.py -i table_even42369.biom --metrics chao1,observed_otus,shannon,PD_whole_tree -o adiv_even.txt -t ../clustering/rep_set.tre
#gunzip table_even4753.biom.gz
#alpha_diversity.py -i table_even4753.biom --metrics chao1,observed_otus,shannon,PD_whole_tree -o adiv_even.txt -t ../clustering_stool/rep_set.tre
#gunzip table_even4753.biom.gz
#alpha_diversity.py -i table_even4753.biom --metrics chao1,observed_otus,shannon,PD_whole_tree -o adiv_even.txt -t ../clustering_swab/rep_set.tre
```

```{r, echo=TRUE, warning=FALSE}
#fig.width=9, fig.height=6,
#QIIME1 hmp.div_qiime <- read.csv("adiv_even.txt", sep="\t")
#QIIME1 colnames(hmp.div_qiime) <- c("sam_name", "chao1", "observed_otus", "shannon", "PD_whole_tree")
#QIIME1 row.names(hmp.div_qiime) <- hmp.div_qiime$sam_name
#QIIME1 div.df <- merge(hmp.div_qiime, hmp.meta, by = "sam_name")
#QIIME1 div.df2 <- div.df[, c("Group", "chao1", "shannon", "observed_otus", "PD_whole_tree")]
#QIIME1 colnames(div.df2) <- c("Group", "Chao-1", "Shannon", "OTU", "Phylogenetic Diversity")
#QIIME1 options(max.print=999999)
#QIIME1 #27     H47 830.5000 5.008482 319               10.60177
#QIIME1 #FITTING4: if occuring "Computation failed in `stat_signif()`:not enough 'y' observations"
#QIIME1 #means: the patient H47 contains only one sample, it should be removed for the statistical p-values calculations.
#QIIME1 #delete H47(1)
#QIIME1 #div.df2 <- div.df2[-c(3), ]
#QIIME1 #div.df2 <- div.df2[-c(55,54, 45,40,39,27,26,25,1), ]

# for QIIME2: Lesen der Metriken
shannon <- read.table("exported_alpha/shannon/alpha-diversity.tsv", header=TRUE, sep="\t")
faith_pd <- read.table("exported_alpha/faith_pd/alpha-diversity.tsv", header=TRUE, sep="\t")
observed <- read.table("exported_alpha/observed_features/alpha-diversity.tsv", header=TRUE, sep="\t")
#chao1 <- read.table("exported_alpha/chao1/alpha-diversity.tsv", header=TRUE, sep="\t")    #TODO: Check the correctness of chao1-calculation.

# Umbenennen für Klarheit
colnames(shannon) <- c("sam_name", "shannon")
colnames(faith_pd) <- c("sam_name", "PD_whole_tree")
colnames(observed) <- c("sam_name", "observed_otus")
#colnames(chao1) <- c("sam_name", "chao1")

# Merge alles in ein DataFrame
div.df <- Reduce(function(x, y) merge(x, y, by="sam_name"),
                  list(shannon, faith_pd, observed))

# Meta-Daten einfügen
div.df <- merge(div.df, hmp.meta, by="sam_name")

# Reformat
div.df2 <- div.df[, c("sam_name", "Group", "shannon", "observed_otus", "PD_whole_tree")]
colnames(div.df2) <- c("Sample name", "Group", "Shannon", "OTU", "Phylogenetic Diversity")
write.csv(div.df2, file="alpha_diversities.txt")
knitr::kable(div.df2) %>% kable_styling(bootstrap_options = c("striped", "hover", "condensed", "responsive"))

#https://uc-r.github.io/t_test
#We can perform the test with t.test and transform our data and we can also perform the nonparametric test with the wilcox.test function.
stat.test.Shannon <- compare_means(
 Shannon ~ Group, data = div.df2,
 method = "t.test"
)
knitr::kable(stat.test.Shannon) %>% kable_styling(bootstrap_options = c("striped", "hover", "condensed", "responsive"))

div_df_melt <- reshape2::melt(div.df2)
#head(div_df_melt)

#https://plot.ly/r/box-plots/#horizontal-boxplot
#http://www.sthda.com/english/wiki/print.php?id=177
#https://rpkgs.datanovia.com/ggpubr/reference/as_ggplot.html
#http://www.sthda.com/english/articles/24-ggpubr-publication-ready-plots/82-ggplot2-easy-way-to-change-graphical-parameters/
#https://plot.ly/r/box-plots/#horizontal-boxplot
#library("gridExtra")
#par(mfrow=c(4,1))
p <- ggboxplot(div_df_melt, x = "Group", y = "value",
              facet.by = "variable", 
              scales = "free",
              width = 0.5,
              fill = "gray", legend= "right")
#ggpar(p, xlab = FALSE, ylab = FALSE)
lev <- levels(factor(div_df_melt$Group)) # get the variables
#FITTING4: delete H47(1) in lev
#lev <- lev[-c(3)]
# make a pairwise list that we want to compare.
#my_stat_compare_means
#https://stackoverflow.com/questions/47839988/indicating-significance-with-ggplot2-in-a-boxplot-with-multiple-groups
L.pairs <- combn(seq_along(lev), 2, simplify = FALSE, FUN = function(i) lev[i]) #%>% filter(p.signif != "ns")
my_stat_compare_means  <- function (mapping = NULL, data = NULL, method = NULL, paired = FALSE, 
    method.args = list(), ref.group = NULL, comparisons = NULL, 
    hide.ns = FALSE, label.sep = ", ", label = NULL, label.x.npc = "left", 
    label.y.npc = "top", label.x = NULL, label.y = NULL, tip.length = 0.03, 
    symnum.args = list(), geom = "text", position = "identity", 
    na.rm = FALSE, show.legend = NA, inherit.aes = TRUE, ...) 
{
    if (!is.null(comparisons)) {
        method.info <- ggpubr:::.method_info(method)
        method <- method.info$method
        method.args <- ggpubr:::.add_item(method.args, paired = paired)
        if (method == "wilcox.test") 
            method.args$exact <- FALSE
        pms <- list(...)
        size <- ifelse(is.null(pms$size), 0.3, pms$size)
        color <- ifelse(is.null(pms$color), "black", pms$color)
        map_signif_level <- FALSE
        if (is.null(label)) 
            label <- "p.format"
        if (ggpubr:::.is_p.signif_in_mapping(mapping) | (label %in% "p.signif")) {
            if (ggpubr:::.is_empty(symnum.args)) {
                map_signif_level <- c(`****` = 1e-04, `***` = 0.001, 
                  `**` = 0.01, `*` = 0.05, ns = 1)
            } else {
               map_signif_level <- symnum.args
            } 
            if (hide.ns) 
                names(map_signif_level)[5] <- " "
        }
        step_increase <- ifelse(is.null(label.y), 0.12, 0)
        ggsignif::geom_signif(comparisons = comparisons, y_position = label.y, 
            test = method, test.args = method.args, step_increase = step_increase, 
            size = size, color = color, map_signif_level = map_signif_level, 
            tip_length = tip.length, data = data)
    } else {
        mapping <- ggpubr:::.update_mapping(mapping, label)
        layer(stat = StatCompareMeans, data = data, mapping = mapping, 
            geom = geom, position = position, show.legend = show.legend, 
            inherit.aes = inherit.aes, params = list(label.x.npc = label.x.npc, 
                label.y.npc = label.y.npc, label.x = label.x, 
                label.y = label.y, label.sep = label.sep, method = method, 
                method.args = method.args, paired = paired, ref.group = ref.group, 
                symnum.args = symnum.args, hide.ns = hide.ns, 
                na.rm = na.rm, ...))
    }
}

# Rotate the x-axis labels to 45 degrees and adjust their position
p <- p + theme(axis.text.x = element_text(angle = 45, hjust = 1, vjust=1, size=8))
#comparisons = list(c("Group1", "Group2"), c("Group3", "Group4")),
p2 <- p + 
stat_compare_means(
  method="t.test",
  comparisons = list(),
  label = "p.signif",
  symnum.args <- list(cutpoints = c(0, 0.0001, 0.001, 0.01, 0.05, 1), symbols = c("****", "***", "**", "*", "ns"))
)
#comparisons = L.pairs,
#symnum.args <- list(cutpoints = c(0, 0.0001, 0.001, 0.01, 0.05), symbols = c("****", "***", "**", "*")),
#stat_pvalue_manual
print(p2)
#https://stackoverflow.com/questions/20500706/saving-multiple-ggplots-from-ls-into-one-and-separate-files-in-r
#FITTING3: mkdir figures
ggsave("./figures/alpha_diversity_Group.png", device="png", height = 10, width = 15)
ggsave("./figures/alpha_diversity_Group.svg", device="svg", height = 10, width = 15)

#NOTE: Run this Phyloseq.Rmd, then run the code of MicrobiotaProcess.R to manually generate PCoA.png, then run this Phyloseq.Rmd!
#NOTE: AT_FIRST_DEACTIVATE_THIS_LINE: knitr::include_graphics("./figures/PCoA.png")

```

```{r, echo=FALSE, warning=FALSE, fig.cap="Alpha diversity", out.width = '100%', fig.align= "center"}
## MANUALLY selected alpha diversities unter host-env after 'cp alpha_diversities.txt selected_alpha_diversities.txt'
#knitr::include_graphics("./figures/alpha_diversity_Group.png")
#selected_alpha_diversities<-read.csv("selected_alpha_diversities.txt",sep="\t")
#knitr::kable(selected_alpha_diversities) %>% kable_styling(bootstrap_options = c("striped", "hover", "condensed", "responsive"))
```

# Beta diversity (Bray-Curtis distance)

## Group1 vs Group2
```{r, echo=FALSE, warning=FALSE, out.width = '100%', fig.align= "center"}
#fig.cap="Beta diversity",

#for QIIME1: file:///home/jhuang/DATA/Data_Marius_16S/core_diversity_e42369/bdiv_even42369_Group/weighted_unifrac_boxplots/Group_Stats.txt

# -- for QIIME2: MANUALLY filter permanova-pairwise.csv and save as permanova-pairwise_.csv
# #grep "Permutations" exported_beta_group/permanova-pairwise.csv > permanova-pairwise_.csv
# #grep "Group1,Group2" exported_beta_group/permanova-pairwise.csv >> permanova-pairwise_.csv
# #grep "Group3,Group4" exported_beta_group/permanova-pairwise.csv >> permanova-pairwise_.csv
# beta_diversity_group_stats<-read.csv("permanova-pairwise_.csv",sep=",")
# #beta_diversity_group_stats <- beta_diversity_group_stats[beta_diversity_group_stats$Group.1 == "Group1" & beta_diversity_group_stats$Group.2 == "Group2", ]
# #beta_diversity_group_stats <- beta_diversity_group_stats[beta_diversity_group_stats$Group.1 == "Group3" & beta_diversity_group_stats$Group.2 == "Group4", ]
# knitr::kable(beta_diversity_group_stats) %>% kable_styling(bootstrap_options = c("striped", "hover", "condensed", "responsive"))

#NOTE: Run this Phyloseq.Rmd, then run the code of MicrobiotaProcess.R to manually generate Comparison_of_Bray_Distances_Group1_vs_Group2.png and Comparison_of_Bray_Distances_Group3_vs_Group4.png, then run this Phyloseq.Rmd!

#knitr::include_graphics("./figures/Comparison_of_Bray_Distances_Group1_vs_Group2.png")

```

## Group3 vs Group4
```{r, echo=FALSE, warning=FALSE, out.width = '100%', fig.align= "center"}
#knitr::include_graphics("./figures/Comparison_of_Bray_Distances_Group3_vs_Group4.png")
```

# The PCoA analysis

## Group1 vs Group2
```{r, echo=FALSE, warning=FALSE, out.width = '100%', fig.align= "center"}
#knitr::include_graphics("./figures/PCoA2_Group1_vs_Group2.png")
```

## Group3 vs Group4
```{r, echo=FALSE, warning=FALSE, out.width = '100%', fig.align= "center"}
#knitr::include_graphics("./figures/PCoA2_Group3_vs_Group4.png")
```

## Groups 1, 2, 3 and 4
```{r, echo=FALSE, warning=FALSE, out.width = '100%', fig.align= "center"}
#knitr::include_graphics("./figures/PCoA2_Group1-Group4.png")
```

## Groups 9,10, 11, 12,13, and 14
```{r, echo=FALSE, warning=FALSE, out.width = '100%', fig.align= "center"}
#knitr::include_graphics("./figures/PCoA2_Group9-Group14.png")
```

# Differential abundance analysis

Differential abundance analysis aims to find the differences in the abundance of each taxa between two groups of samples, assigning a significance value to each comparison.

## Group1 vs Group2

```{r, echo=TRUE, warning=FALSE}
#ps.ng.tax [ 2633 taxa and 136 samples] and ps.ng.tax_abund (absolute abundance)  [382 taxa and 136 samples],  ps.ng.tax_abund_rel (relative abundance)  [382 taxa and 136 samples], either ps.ng.tax and ps.ng.tax_abund can be used here!
ps.ng.tax_abund_sel1 <- data.table::copy(ps.ng.tax_abund)
otu_table(ps.ng.tax_abund_sel1) <- otu_table(ps.ng.tax_abund)[,c("sample-A1","sample-A2","sample-A3","sample-A4","sample-A5","sample-A6","sample-A7","sample-A8","sample-A9","sample-A10","sample-A11",   "sample-B1","sample-B2","sample-B3","sample-B4","sample-B5","sample-B6","sample-B7","sample-B8","sample-B9","sample-B10","sample-B11","sample-B12","sample-B13","sample-B14","sample-B15","sample-B16")]
diagdds = phyloseq_to_deseq2(ps.ng.tax_abund_sel1, ~Group)
diagdds$Group <- relevel(diagdds$Group, "Group2")
diagdds = DESeq(diagdds, test="Wald", fitType="parametric")
resultsNames(diagdds)

res = results(diagdds, cooksCutoff = FALSE)
alpha = 0.05
sigtab = res[which(res$padj < alpha), ]
sigtab = cbind(as(sigtab, "data.frame"), as(phyloseq::tax_table(ps.ng.tax_abund_sel1)[rownames(sigtab), ], "matrix"))
#sigtab <- sigtab[rownames(sigtab) %in% rownames(phyloseq::tax_table(ps.ng.tax_abund)), ]
kable(sigtab) %>%
  kable_styling(bootstrap_options = c("striped", "hover", "condensed", "responsive"))

library("ggplot2")
theme_set(theme_bw())
scale_fill_discrete <- function(palname = "Set1", ...) {
    scale_fill_brewer(palette = palname, ...)
}
x = tapply(sigtab$log2FoldChange, sigtab$Order, function(x) max(x))
x = sort(x)
sigtab$Order = factor(as.character(sigtab$Order), levels=names(x))
x = tapply(sigtab$log2FoldChange, sigtab$Family, function(x) max(x))
x = sort(x)
sigtab$Family = factor(as.character(sigtab$Family), levels=names(x))
ggplot(sigtab, aes(x=log2FoldChange, y=Family, color=Order)) + geom_point(aes(size=padj)) + scale_size_continuous(name="padj",range=c(8,4))+
  theme(axis.text.x = element_text(angle = -25, hjust = 0, vjust=0.5))
```

```{r, echo=FALSE, warning=FALSE, out.width = '100%', fig.align= "center"}
#knitr::include_graphics("./figures/diff_analysis_Group1_vs_Group2.png")
```

## Group3 vs Group4

```{r, echo=TRUE, warning=FALSE}
ps.ng.tax_abund_sel2 <- data.table::copy(ps.ng.tax_abund)
otu_table(ps.ng.tax_abund_sel2) <- otu_table(ps.ng.tax_abund)[,c("sample-C1","sample-C2","sample-C3","sample-C4","sample-C5","sample-C6","sample-C7","sample-C8","sample-C9","sample-C10",   "sample-E1","sample-E2","sample-E3","sample-E4","sample-E5","sample-E6","sample-E7","sample-E8","sample-E9","sample-E10")]
diagdds = phyloseq_to_deseq2(ps.ng.tax_abund_sel2, ~Group)
diagdds$Group <- relevel(diagdds$Group, "Group4")
diagdds = DESeq(diagdds, test="Wald", fitType="parametric")
resultsNames(diagdds)

res = results(diagdds, cooksCutoff = FALSE)
alpha = 0.05
sigtab = res[which(res$padj < alpha), ]
sigtab = cbind(as(sigtab, "data.frame"), as(phyloseq::tax_table(ps.ng.tax_abund_sel2)[rownames(sigtab), ], "matrix"))

kable(sigtab) %>%
  kable_styling(bootstrap_options = c("striped", "hover", "condensed", "responsive"))

library("ggplot2")
theme_set(theme_bw())
scale_fill_discrete <- function(palname = "Set1", ...) {
    scale_fill_brewer(palette = palname, ...)
}
x = tapply(sigtab$log2FoldChange, sigtab$Order, function(x) max(x))
x = sort(x)
sigtab$Order = factor(as.character(sigtab$Order), levels=names(x))
x = tapply(sigtab$log2FoldChange, sigtab$Family, function(x) max(x))
x = sort(x)
sigtab$Family = factor(as.character(sigtab$Family), levels=names(x))
ggplot(sigtab, aes(x=log2FoldChange, y=Family, color=Order)) + geom_point(aes(size=padj)) + scale_size_continuous(name="padj",range=c(8,4))+
  theme(axis.text.x = element_text(angle = -25, hjust = 0, vjust=0.5))
```

```{r, echo=FALSE, warning=FALSE, out.width = '200%', fig.align= "center"}
#knitr::include_graphics("./figures/diff_analysis_Group3_vs_Group4.png")
```

```{r, echo=FALSE, warning=FALSE}
## The table below shows the raw counts of the 199 OTUs across all samples, with OTUs as rows and samples as columns.
#kable(otu_table(ps.ng.tax)) %>%
#kable_styling(bootstrap_options = c("striped", "hover", "condensed", "responsive"))
```

```{r, echo=FALSE, warning=FALSE}
## The table below shows the taxonomic assignments of the 199 OTUs, with OTUs as rows and their corresponding taxonomic ranks as columns.
# ~/Tools/csv2xls-0.4/csv_to_xls.py otu_table.csv tax_table.csv -d',' -o otu_tax.xls;
#kable(taxonomy_df) %>%
#  kable_styling(bootstrap_options = c("striped", "hover", "condensed", "responsive"))
```

```{r, echo=FALSE, warning=FALSE}
## The sample L9 retained only 413 sequences after the complete preprocessing workflow, which includes filtering, denoising, merging, and chimera removal and was excluded from downstream analyses.
# # Read the TSV file
# ~/Tools/csv2xls-0.4/csv_to_xls.py denoising-stats.csv -d$'\t' -o preprocessing_stats.xls;
# denoising_stats <- read.csv("denoising-stats.csv", sep="\t")
# # Display the table
# kable(denoising_stats, caption = "Preprocessing statistics for each sample") %>%
#   kable_styling(bootstrap_options = c("striped", "hover", "condensed", "responsive"))
```

Somatic Variation Detection

🔬 癌症驱动基因与突变分析流程总览

一、常见癌症的驱动基因(Driver Genes)

癌症类型 常见驱动基因 说明
肺癌(NSCLC) EGFR, KRAS, ALK, TP53, BRAF EGFR 和 KRAS 突变常用于靶向治疗判断
肝癌(HCC) TP53, CTNNB1, AXIN1, TERT TERT 启动子突变常见
胃癌 TP53, ARID1A, PIK3CA, CDH1 与 WNT/PI3K 通路相关
大肠癌(CRC) APC, KRAS, TP53, PIK3CA Wnt 通路异常(APC 突变)
乳腺癌 BRCA1/2, PIK3CA, TP53, ERBB2(HER2) BRCA 突变相关于遗传性乳癌
黑色素瘤 BRAF, NRAS, NF1 BRAF-V600E 为靶向治疗重要突变
白血病(AML) FLT3, NPM1, DNMT3A, IDH1/2 多个基因可能共突变

🧪 二、常用的癌症突变检测工具和数据库

🛠️ 工具类(基因组分析)

工具名 用途/特点
Mutect2 (GATK) 检测肿瘤样本中的体细胞突变(与正常对照)
Strelka2 高灵敏度检测 SNV 和 InDel
VarScan2 检测突变和 CNV,适合低深度数据
SomaticSniper 老牌体细胞突变检测工具
FACETS 拷贝数变异与纯度/杂合度估计

🧬 数据库类(已知突变注释)

数据库/平台 功能
COSMIC 全球最大癌症突变数据库
TCGA (via GDC) 大规模癌症全基因组数据平台(含表达、突变等)
cBioPortal 可视化 TCGA、ICGC 数据;浏览癌症基因突变
OncoKB 癌症突变功能注释库,适合靶向药物关联
ClinVar 提供临床意义注释,如是否为致病突变

📘 三、推荐突变分析流程

样本准备 
    ↓
比对 (BWA) 
    ↓
去除重复 (Picard) 
    ↓
突变检测 (Mutect2, Strelka2, VarScan2) 
    ↓
注释 (ANNOVAR / VEP) 
    ↓
功能解释 (OncoKB, COSMIC, cBioPortal)

🧬 四、突变检测与注释:实际操作(从 BAM 到注释 VCF)

✅ 输入需求:

  • 肿瘤 BAM 文件(推荐同时有正常对照 BAM)
  • 参考基因组(例如 hg38.fasta
  • 索引文件(.bai, .fai
  • 工具安装:GATK, VEP, 或 ANNOVAR

🛠️ 常见工具推荐

工具 特点 输入要求
Mutect2 (GATK) 最佳的肿瘤–正常突变检测 肿瘤 + 正常 BAM
Strelka2 快速、准确检测 SNV/InDel 肿瘤 + 正常 BAM
VarScan2 可支持 tumor-only 模式 mpileup / VCF
LoFreq 高分辨率 SNV 检测 肿瘤 BAM(可选)

🧪 示例:使用 GATK Mutect2 检测体细胞突变

🔹 Tumor–Normal 模式(推荐)

gatk Mutect2 \
  -R hg38.fasta \
  -I tumor.bam \
  -I normal.bam \
  -tumor TumorSample \
  -normal NormalSample \
  --germline-resource af-only-gnomad.vcf.gz \
  --panel-of-normals pon.vcf.gz \
  -O somatic_raw.vcf

gatk FilterMutectCalls \
  -V somatic_raw.vcf \
  -R hg38.fasta \
  -O somatic_filtered.vcf

🔹 Tumor-Only 模式(无对照)

gatk Mutect2 \
  -R hg38.fasta \
  -I tumor.bam \
  -tumor TumorSample \
  --germline-resource af-only-gnomad.vcf.gz \
  -O somatic_raw.vcf

🧠 突变注释工具

1. Ensembl VEP

vep -i somatic_filtered.vcf \
    -o annotated.vep.vcf \
    --vcf \
    --cache \
    --offline \
    --assembly GRCh38 \
    --dir_cache /path/to/.vep \
    --everything

✅ 适用于通用功能预测、插件丰富(如 COSMIC、gnomAD)


2. ANNOVAR

convert2annovar.pl -format vcf4 somatic_filtered.vcf > input.avinput

table_annovar.pl input.avinput humandb/ \
   -buildver hg38 \
   -out annotated_annovar \
   -remove \
   -protocol refGene,clinvar_20240129,cosmic70,gnomad_genome \
   -operation g,f,f,f \
   -nastring . \
   -vcfinput

✅ 适合癌症、临床相关变异注释(COSMIC、ClinVar、gnomAD)


🚦 工具选择建议

使用场景 推荐工具
全面功能预测 VEP
癌症为重点 ANNOVAR
兼顾两者 同时使用更好