---
title: "Phyloseq microbiome"
author: ""
date: '`r format(Sys.time(), "%d %m %Y")`'
header-includes:
   - \usepackage{color, fancyvrb}
output:
  rmdformats::readthedown:
    highlight: kate
    number_sections : yes
  pdf_document:
    toc: yes
    toc_depth: 2
    number_sections : yes
---

```{r, echo=FALSE, warning=FALSE}
## Global options
# TODO: reproduce the html with the additional figure/SVN-files for editing.
# IMPORTANT NOTE: needs before "mkdir figures"
#gunzip table_even9893.biom.gz
#alpha_diversity.py -i table_even9893.biom --metrics chao1,observed_otus,shannon,PD_whole_tree -o adiv_even.txt -t ../clustering/rep_set.tre
#cp count_table_seq31_seq37.txt count_table_seq31_seq37_ST.txt by adding ST in first column and changing '-' to Novel
#Note count_table.txt is the file with higher unclassified rates!
#python3 ~/Scripts/calculate_alpha_diversities.py ../count_table_seq31_seq37_ST.txt alpha_diversity_metrics_samples_ST.csv
#rmarkdown::render('Phyloseq.Rmd',output_file='Phyloseq.html')
```

```{r load-packages, include=FALSE}
#install.packages(c("dplyr", "tidyr", "rstatix", "ggplot2", "RVAideMemoire"))
library(knitr)
library(rmdformats)
library(readxl)
library(dplyr)
library(kableExtra)
library(tibble)
#library(wilcox.test)
library(dplyr)
library(tidyr)
library(rstatix)          # For convenient paired statistical tests and p-value adjustment
library(RVAideMemoire)    # For Cochran's Q test
library(ggplot2)

options(max.print="75")
knitr::opts_chunk$set(fig.width=8,
                      fig.height=6,
                      eval=TRUE,
                      cache=TRUE,
                      echo=TRUE,
                      prompt=FALSE,
                      tidy=FALSE,
                      comment=NA,
                      message=FALSE,
                      warning=FALSE)
opts_knit$set(width=85)
# Phyloseq R library
#* Phyloseq web site : https://joey711.github.io/phyloseq/index.html
#* See in particular tutorials for
#    - importing data: https://joey711.github.io/phyloseq/import-data.html
#    - heat maps: https://joey711.github.io/phyloseq/plot_heatmap-examples.html
```

# Sample Collection and Study Design

Nasal swab samples were collected from two patient cohorts: patients undergoing aneurysm surgery (cohort A, n = 15) and patients undergoing hypophysis surgery (cohort H, n = 20). For each patient, samples were collected at three timepoints: admission (A1/H1), surgery (A2/H2), and discharge (A3/H3), resulting in a longitudinal study design to assess temporal changes in the nasal microbiome across surgical interventions.

# Data

Import raw data and assign sample key:

```{r, echo=TRUE, warning=FALSE}
#extend map_corrected.txt with Diet and Flora
#setwd("~/DATA/Data_Laura_16S_2/core_diversity_e4753")
#MODIFIED: Using map3_corrected.txt rather than map3_corrected.txt
#TODO_MONDAY: the name has not been changed!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
map_corrected <- read.csv("../map3_corrected.txt", sep="\t", row.names=1)
knitr::kable(map_corrected) %>% kable_styling(bootstrap_options = c("striped", "hover", "condensed", "responsive"))
```

# Prerequisites to be installed

* R : https://pbil.univ-lyon1.fr/CRAN/
* R studio : https://www.rstudio.com/products/rstudio/download/#download

```R
install.packages("dplyr")     # To manipulate dataframes
install.packages("readxl")    # To read Excel files into R
install.packages("ggplot2")   # for high quality graphics
install.packages("heatmaply")
source("https://bioconductor.org/biocLite.R")
biocLite("phyloseq")
```

```{r libraries, echo=TRUE, message=FALSE}
library("readxl") # necessary to import the data from Excel file
library("ggplot2") # graphics
library("picante")
library("microbiome") # data analysis and visualisation
library("phyloseq") # also the basis of data object. Data analysis and visualisation
library("DESeq2")
library("ggpubr") # publication quality figures, based on ggplot2
library("dplyr") # data handling, filter and reformat data frames
library("RColorBrewer") # nice color options
library("heatmaply")
library("vegan")
library("gplots")
library("openxlsx")
```

# Read the data and create phyloseq objects (16S)

Three tables are needed

* OTU
* Taxonomy
* Samples

```{r, echo=TRUE, warning=FALSE}
    # Change your working directory to where the files are located
    ps_raw <- import_biom("./table_even9893.biom", "../clustering/rep_set.tre")
    sample <- read.csv("../map3_corrected.txt", sep = "\t", row.names = 1)
    SAM = sample_data(sample, errorIfNULL = T)
    # rownames(SAM) <-c('P7.Nose','P8.Nose','P9.Nose','P10.Nose','P11.Nose','P12.Nose','P13.Nose','P14.Nose','P15.Nose','P16.Foot','P16.Nose','P17.Foot','P17.Nose','P18.Foot','P18.Nose','P19.Foot','P19.Nose','P20.Foot','P20.Nose','E.LS','E.NLS','E.Nose','G.LS','G.NLS','G.Nose','K.LS','K.NLS','K.Nose','H.LS','H.NLS','H.Nose','D.LS','D.NLS','D.Nose','C.LS','C.NLS','C.Nose','J.LS','J.NLS','J.Nose','F.LS','F.NLS','F.Nose','A.LS','A.NLS','A.Nose','I.LS','I.NLS','I.Nose','B.LS','B.NLS','B.Nose')
    ps_base <- merge_phyloseq(ps_raw, SAM)
    print(ps_base)
    colnames(tax_table(ps_base)) <- c("Domain","Phylum","Class","Order","Family","Genus","Species")
    saveRDS(ps_base, "./ps_base.rds")
```


Visualize data
```{r, echo=TRUE, warning=FALSE}
  sample_names(ps_base)
  rank_names(ps_base)
  sample_variables(ps_base)

  # Define sample names once
  samples <- c(
    "A1.1","A1.2","A1.3",
    "A2.1","A2.2","A2.3",
    "A3.1","A3.2","A3.3",
    "A4.1","A4.2","A4.3",
    "A5.1","A5.2","A5.3",
    "A10.1","A10.2","A10.3",
    "A12.1","A12.2","A12.3",
    "A17.1","A17.2","A17.3",
    "A20.1","A20.2","A20.3",
    "A21.1","A21.2","A21.3",
    "A22.1","A22.2","A22.3",
    "A24.1","A24.2","A24.3",
    "A25.1","A25.2","A25.3",
    "A27.1","A27.2","A27.3",
    "A28.1","A28.2","A28.3",
    "H11.1","H11.2","H11.3",
    "H13.1","H13.2","H13.3",
    "H14.1","H14.2","H14.3",
    "H16.1","H16.2","H16.3",
    "H19.1","H19.2","H19.3",
    "H24.1","H24.2","H24.3",
    "H53.1","H53.2","H53.3",
    "H56.1","H56.2","H56.3",
    "H57.1","H57.2","H57.3",
    "H59.1","H59.2","H59.3",
    "H60.1","H60.2","H60.3",
    "H64.1","H64.2","H64.3",
    "H65.1","H65.2","H65.3",
    "H67.1","H67.2","H67.3",
    "H72.1","H72.2","H72.3",
    "H73.1","H73.2","H73.3",
    "H76.1","H76.2","H76.3",
    "H78.1","H78.2","H78.3",
    "H91.1","H91.2","H91.3",
    "H94.1","H94.2","H94.3",
    "H99.1","H99.2","H99.3")
  ps_pruned <- prune_samples(samples, ps_base)

  sample_names(ps_pruned)
  rank_names(ps_pruned)
  sample_variables(ps_pruned)
```


No samples were excluded as low-depth outliers (library sizes below the minimum depth threshold of 1,000 reads), and the remaining dataset (ps_filt) contains only samples meeting this depth cutoff with taxa retained only if they have nonzero total counts.

```{r, echo=TRUE, warning=FALSE}
# ------------------------------------------------------------
#   Filter low-depth samples (recommended for all analyses)
# ------------------------------------------------------------
min_depth <- 1000  # <-- adjust to your data / study design, keeps all!
ps_filt <- prune_samples(sample_sums(ps_pruned) >= min_depth, ps_pruned)
ps_filt <- prune_taxa(taxa_sums(ps_filt) > 0, ps_filt)

# Keep a depth summary for reporting / QC
depth_summary <- summary(sample_sums(ps_filt))
depth_summary
```


**Differential abundance (DESeq2)** → **`ps_deseq`**: **non-rarefied integer counts** derived from `ps_filt`, with optional **count-based** taxon prefilter
  *(default: taxa total counts ≥ 10 across all samples)*

From `ps_filt` (e.g. 5669 taxa and 239 samples), we branch into analysis-ready objects in two directions:

* Direction 1 for diversity analyses

  - **Alpha diversity**: `ps_rarefied` ✅ (common)
  - **Beta diversity**:
    - **Unweighted UniFrac / Jaccard**: `ps_rarefied` ✅ (often recommended)
    - **Bray–Curtis / ordination on abundances**: `ps_rel` or Hellinger ✅ (rarefaction optional)
    - **Aitchison (CLR)**: CLR-transformed (non-rarefied) ✅ (no rarefaction)


Normalize number of reads in each sample using median sequencing depth.
```{r, echo=TRUE, warning=FALSE}

# RAREFACTION
set.seed(9242)  # This will help in reproducing the filtering and nomalisation.
ps_rarefied <- rarefy_even_depth(ps_filt, sample.size = 9893)
total <- 9893

# # NORMALIZE number of reads in each sample using median sequencing depth.
# total = median(sample_sums(ps.ng.tax))
# #> total
# #[1] 9893
# standf = function(x, t=total) round(t * (x / sum(x)))
# ps.ng.tax = transform_sample_counts(ps.ng.tax, standf)
# ps_rel <- microbiome::transform(ps.ng.tax, "compositional")
#
# saveRDS(ps.ng.tax, "./ps.ng.tax.rds")
```


* Direction 2 for taxonomic composition plots

  - **Taxonomic composition** → **`ps_rel`**: **relative abundance** (compositional) computed **after sample filtering** (e.g. 5669 taxa and 239 samples)
  - **Optional cleaner composition plots** → **`ps_abund` / `ps_abund_rel`**: taxa filtered for plotting (e.g., keep taxa with **mean relative abundance > 0.1%**); (e.g. 95 taxa and 239 samples)
  `ps_abund` = **counts**, `ps_abund_rel` = **relative abundance** *(use for visualization, not DESeq2)*

For the heatmaps, we focus on the most abundant OTUs by first converting counts to relative abundances within each sample. We then filter to retain only OTUs whose mean relative abundance across all samples exceeds 0.1% (0.001). We are left with 199 OTUs which makes the reading much more easy.

```{r, echo=FALSE, warning=FALSE}
# 1) Convert to relative abundances
ps_rel <- transform_sample_counts(ps_filt, function(x) x / sum(x))

# 2) Get the logical vector of which OTUs to keep (based on relative abundance)
keep_vector <- phyloseq::filter_taxa(
  ps_rel,
  function(x) mean(x) > 0.001,
  prune = FALSE
)

# 3) Use the TRUE/FALSE vector to subset absolute abundance data
ps_abund <- prune_taxa(names(keep_vector)[keep_vector], ps_filt)

# 4) Normalize the final subset to relative abundances per sample
ps_abund_rel <- transform_sample_counts(
  ps_abund,
  function(x) x / sum(x)
)
```


# Heatmaps (16S)

We consider the most abundant OTUs for heatmaps. For example one can only take OTUs that represent at least 1% of reads in at least one sample. Remember we normalized all the sampples to median number of reads (total).  We are left with only 119 OTUS which makes the reading much more easy.

```{r, echo=FALSE, warning=FALSE}
datamat_ = as.data.frame(otu_table(ps_abund))
datamat <- datamat_[c("A1.1","A1.2","A1.3","A2.1","A2.2","A2.3","A3.1","A3.2","A3.3","A4.1","A4.2","A4.3","A5.1","A5.2","A5.3","A10.1","A10.2","A10.3","A12.1","A12.2","A12.3","A17.1","A17.2","A17.3","A20.1","A20.2","A20.3","A21.1","A21.2","A21.3","A22.1","A22.2","A22.3","A24.1","A24.2","A24.3","A25.1","A25.2","A25.3","A27.1","A27.2","A27.3","A28.1","A28.2","A28.3","H11.1","H11.2","H11.3","H13.1","H13.2","H13.3","H14.1","H14.2","H14.3","H16.1","H16.2","H16.3","H19.1","H19.2","H19.3","H24.1","H24.2","H24.3","H53.1","H53.2","H53.3","H56.1","H56.2","H56.3","H57.1","H57.2","H57.3","H59.1","H59.2","H59.3","H60.1","H60.2","H60.3","H64.1","H64.2","H64.3","H65.1","H65.2","H65.3","H67.1","H67.2","H67.3","H72.1","H72.2","H72.3","H73.1","H73.2","H73.3","H76.1","H76.2","H76.3","H78.1","H78.2","H78.3","H91.1","H91.2","H91.3","H94.1","H94.2","H94.3","H99.1","H99.2","H99.3")]
hr <- hclust(as.dist(1-cor(t(datamat), method="pearson")), method="complete")
hc <- hclust(as.dist(1-cor(datamat, method="spearman")), method="complete")
mycl = cutree(hr, h=max(hr$height)/1.08)
mycol = c("YELLOW", "DARKBLUE", "DARKORANGE", "DARKMAGENTA", "DARKCYAN", "DARKRED",  "MAROON", "DARKGREEN", "LIGHTBLUE", "PINK", "MAGENTA", "LIGHTCYAN","LIGHTGREEN", "BLUE", "ORANGE", "CYAN", "RED", "GREEN");

mycol = mycol[as.vector(mycl)]
sampleCols <- rep('GREY',ncol(datamat))


sampleCols[colnames(datamat)=='A1.1'] <- '#1f78b4'
sampleCols[colnames(datamat)=='A1.2'] <- '#1f78b4'
sampleCols[colnames(datamat)=='A1.3'] <- '#1f78b4'
sampleCols[colnames(datamat)=='A2.1'] <- '#1f78b4'
sampleCols[colnames(datamat)=='A2.2'] <- '#1f78b4'
sampleCols[colnames(datamat)=='A2.3'] <- '#1f78b4'
sampleCols[colnames(datamat)=='A3.1'] <- '#1f78b4'
sampleCols[colnames(datamat)=='A3.2'] <- '#1f78b4'
sampleCols[colnames(datamat)=='A3.3'] <- '#1f78b4'
sampleCols[colnames(datamat)=='A4.1'] <- '#1f78b4'
sampleCols[colnames(datamat)=='A4.2'] <- '#1f78b4'
sampleCols[colnames(datamat)=='A4.3'] <- '#1f78b4'
sampleCols[colnames(datamat)=='A5.1'] <- '#1f78b4'
sampleCols[colnames(datamat)=='A5.2'] <- '#1f78b4'
sampleCols[colnames(datamat)=='A5.3'] <- '#1f78b4'
sampleCols[colnames(datamat)=='A10.1'] <- '#1f78b4'
sampleCols[colnames(datamat)=='A10.2'] <- '#1f78b4'
sampleCols[colnames(datamat)=='A10.3'] <- '#1f78b4'
sampleCols[colnames(datamat)=='A12.1'] <- '#1f78b4'
sampleCols[colnames(datamat)=='A12.2'] <- '#1f78b4'
sampleCols[colnames(datamat)=='A12.3'] <- '#1f78b4'
sampleCols[colnames(datamat)=='A17.1'] <- '#1f78b4'
sampleCols[colnames(datamat)=='A17.2'] <- '#1f78b4'
sampleCols[colnames(datamat)=='A17.3'] <- '#1f78b4'
sampleCols[colnames(datamat)=='A20.1'] <- '#1f78b4'
sampleCols[colnames(datamat)=='A20.2'] <- '#1f78b4'
sampleCols[colnames(datamat)=='A20.3'] <- '#1f78b4'
sampleCols[colnames(datamat)=='A21.1'] <- '#1f78b4'
sampleCols[colnames(datamat)=='A21.2'] <- '#1f78b4'
sampleCols[colnames(datamat)=='A21.3'] <- '#1f78b4'
sampleCols[colnames(datamat)=='A22.1'] <- '#1f78b4'
sampleCols[colnames(datamat)=='A22.2'] <- '#1f78b4'
sampleCols[colnames(datamat)=='A22.3'] <- '#1f78b4'
sampleCols[colnames(datamat)=='A24.1'] <- '#1f78b4'
sampleCols[colnames(datamat)=='A24.2'] <- '#1f78b4'
sampleCols[colnames(datamat)=='A24.3'] <- '#1f78b4'
sampleCols[colnames(datamat)=='A25.1'] <- '#1f78b4'
sampleCols[colnames(datamat)=='A25.2'] <- '#1f78b4'
sampleCols[colnames(datamat)=='A25.3'] <- '#1f78b4'
sampleCols[colnames(datamat)=='A27.1'] <- '#1f78b4'
sampleCols[colnames(datamat)=='A27.2'] <- '#1f78b4'
sampleCols[colnames(datamat)=='A27.3'] <- '#1f78b4'
sampleCols[colnames(datamat)=='A28.1'] <- '#1f78b4'
sampleCols[colnames(datamat)=='A28.2'] <- '#1f78b4'
sampleCols[colnames(datamat)=='A28.3'] <- '#1f78b4'

sampleCols[colnames(datamat)=='H11.1'] <- '#b2df8a'
sampleCols[colnames(datamat)=='H11.2'] <- '#b2df8a'
sampleCols[colnames(datamat)=='H11.3'] <- '#b2df8a'
sampleCols[colnames(datamat)=='H13.1'] <- '#b2df8a'
sampleCols[colnames(datamat)=='H13.2'] <- '#b2df8a'
sampleCols[colnames(datamat)=='H13.3'] <- '#b2df8a'
sampleCols[colnames(datamat)=='H14.1'] <- '#b2df8a'
sampleCols[colnames(datamat)=='H14.2'] <- '#b2df8a'
sampleCols[colnames(datamat)=='H14.3'] <- '#b2df8a'
sampleCols[colnames(datamat)=='H16.1'] <- '#b2df8a'
sampleCols[colnames(datamat)=='H16.2'] <- '#b2df8a'
sampleCols[colnames(datamat)=='H16.3'] <- '#b2df8a'
sampleCols[colnames(datamat)=='H19.1'] <- '#b2df8a'
sampleCols[colnames(datamat)=='H19.2'] <- '#b2df8a'
sampleCols[colnames(datamat)=='H19.3'] <- '#b2df8a'
sampleCols[colnames(datamat)=='H24.1'] <- '#b2df8a'
sampleCols[colnames(datamat)=='H24.2'] <- '#b2df8a'
sampleCols[colnames(datamat)=='H24.3'] <- '#b2df8a'
sampleCols[colnames(datamat)=='H53.1'] <- '#b2df8a'
sampleCols[colnames(datamat)=='H53.2'] <- '#b2df8a'
sampleCols[colnames(datamat)=='H53.3'] <- '#b2df8a'
sampleCols[colnames(datamat)=='H56.1'] <- '#b2df8a'
sampleCols[colnames(datamat)=='H56.2'] <- '#b2df8a'
sampleCols[colnames(datamat)=='H56.3'] <- '#b2df8a'
sampleCols[colnames(datamat)=='H57.1'] <- '#b2df8a'
sampleCols[colnames(datamat)=='H57.2'] <- '#b2df8a'
sampleCols[colnames(datamat)=='H57.3'] <- '#b2df8a'
sampleCols[colnames(datamat)=='H59.1'] <- '#b2df8a'
sampleCols[colnames(datamat)=='H59.2'] <- '#b2df8a'
sampleCols[colnames(datamat)=='H59.3'] <- '#b2df8a'
sampleCols[colnames(datamat)=='H60.1'] <- '#b2df8a'
sampleCols[colnames(datamat)=='H60.2'] <- '#b2df8a'
sampleCols[colnames(datamat)=='H60.3'] <- '#b2df8a'
sampleCols[colnames(datamat)=='H64.1'] <- '#b2df8a'
sampleCols[colnames(datamat)=='H64.2'] <- '#b2df8a'
sampleCols[colnames(datamat)=='H64.3'] <- '#b2df8a'
sampleCols[colnames(datamat)=='H65.1'] <- '#b2df8a'
sampleCols[colnames(datamat)=='H65.2'] <- '#b2df8a'
sampleCols[colnames(datamat)=='H65.3'] <- '#b2df8a'
sampleCols[colnames(datamat)=='H67.1'] <- '#b2df8a'
sampleCols[colnames(datamat)=='H67.2'] <- '#b2df8a'
sampleCols[colnames(datamat)=='H67.3'] <- '#b2df8a'
sampleCols[colnames(datamat)=='H72.1'] <- '#b2df8a'
sampleCols[colnames(datamat)=='H72.2'] <- '#b2df8a'
sampleCols[colnames(datamat)=='H72.3'] <- '#b2df8a'
sampleCols[colnames(datamat)=='H73.1'] <- '#b2df8a'
sampleCols[colnames(datamat)=='H73.2'] <- '#b2df8a'
sampleCols[colnames(datamat)=='H73.3'] <- '#b2df8a'
sampleCols[colnames(datamat)=='H76.1'] <- '#b2df8a'
sampleCols[colnames(datamat)=='H76.2'] <- '#b2df8a'
sampleCols[colnames(datamat)=='H76.3'] <- '#b2df8a'
sampleCols[colnames(datamat)=='H78.1'] <- '#b2df8a'
sampleCols[colnames(datamat)=='H78.2'] <- '#b2df8a'
sampleCols[colnames(datamat)=='H78.3'] <- '#b2df8a'
sampleCols[colnames(datamat)=='H91.1'] <- '#b2df8a'
sampleCols[colnames(datamat)=='H91.2'] <- '#b2df8a'
sampleCols[colnames(datamat)=='H91.3'] <- '#b2df8a'
sampleCols[colnames(datamat)=='H94.1'] <- '#b2df8a'
sampleCols[colnames(datamat)=='H94.2'] <- '#b2df8a'
sampleCols[colnames(datamat)=='H94.3'] <- '#b2df8a'
sampleCols[colnames(datamat)=='H99.1'] <- '#b2df8a'
sampleCols[colnames(datamat)=='H99.2'] <- '#b2df8a'
sampleCols[colnames(datamat)=='H99.3'] <- '#b2df8a'

#bluered(75)
#color_pattern <- colorRampPalette(c("blue", "white", "red"))(100)
library(RColorBrewer)
custom_palette <- colorRampPalette(brewer.pal(9, "Blues"))
heatmap_colors <- custom_palette(100)
#colors <- heatmap_color_default(100)
png("figures/heatmap.png", width=1200, height=2400)
#par(mar=c(2, 2, 2, 2))  , lwid=1    lhei=c(0.7, 10)) # Adjust height of color keys   keysize=0.3,
heatmap.2(as.matrix(datamat),Rowv=as.dendrogram(hr),Colv = NA, dendrogram = 'row',
            scale='row',trace='none',col=heatmap_colors, cexRow=1.2, cexCol=1.5,
            RowSideColors = mycol, ColSideColors = sampleCols, srtCol=60, labRow=row.names(datamat), key=TRUE, margins=c(10, 15), lhei=c(0.7, 15), lwid=c(1,8))
dev.off()
```
```{r, echo=TRUE, warning=FALSE, fig.cap="Heatmap", out.width = '100%', fig.align= "center"}
knitr::include_graphics("./figures/heatmap.png")
```

```{r, echo=FALSE, warning=FALSE}
  #It is possible to use different distances and different multivaraite methods. Many different built-in distances can be used.
  #dist_methods <- unlist(distanceMethodList)
  #print(dist_methods)
```

\pagebreak


```{r, echo=FALSE, warning=FALSE}
  library(stringr)
#for id in 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66; do
#echo "tax_table(ps_abund_rel)[${id},\"Domain\"] <- str_split(tax_table(ps_abund_rel)[${id},\"Domain\"], \"__\")[[1]][2]"
#echo "tax_table(ps_abund_rel)[${id},\"Phylum\"] <- str_split(tax_table(ps_abund_rel)[${id},\"Phylum\"], \"__\")[[1]][2]"
#echo "tax_table(ps_abund_rel)[${id},\"Class\"] <- str_split(tax_table(ps_abund_rel)[${id},\"Class\"], \"__\")[[1]][2]"
#echo "tax_table(ps_abund_rel)[${id},\"Order\"] <- str_split(tax_table(ps_abund_rel)[${id},\"Order\"], \"__\")[[1]][2]"
#echo "tax_table(ps_abund_rel)[${id},\"Family\"] <- str_split(tax_table(ps_abund_rel)[${id},\"Family\"], \"__\")[[1]][2]"
#echo "tax_table(ps_abund_rel)[${id},\"Genus\"] <- str_split(tax_table(ps_abund_rel)[${id},\"Genus\"], \"__\")[[1]][2]"
#echo "tax_table(ps_abund_rel)[${id},\"Species\"] <- str_split(tax_table(ps_abund_rel)[${id},\"Species\"], \"__\")[[1]][2]"
#done

tax_table(ps_abund_rel)[1,"Domain"] <- str_split(tax_table(ps_abund_rel)[1,"Domain"], "__")[[1]][2]
tax_table(ps_abund_rel)[1,"Phylum"] <- str_split(tax_table(ps_abund_rel)[1,"Phylum"], "__")[[1]][2]
tax_table(ps_abund_rel)[1,"Class"] <- str_split(tax_table(ps_abund_rel)[1,"Class"], "__")[[1]][2]
tax_table(ps_abund_rel)[1,"Order"] <- str_split(tax_table(ps_abund_rel)[1,"Order"], "__")[[1]][2]
tax_table(ps_abund_rel)[1,"Family"] <- str_split(tax_table(ps_abund_rel)[1,"Family"], "__")[[1]][2]
tax_table(ps_abund_rel)[1,"Genus"] <- str_split(tax_table(ps_abund_rel)[1,"Genus"], "__")[[1]][2]
tax_table(ps_abund_rel)[1,"Species"] <- str_split(tax_table(ps_abund_rel)[1,"Species"], "__")[[1]][2]
tax_table(ps_abund_rel)[2,"Domain"] <- str_split(tax_table(ps_abund_rel)[2,"Domain"], "__")[[1]][2]
tax_table(ps_abund_rel)[2,"Phylum"] <- str_split(tax_table(ps_abund_rel)[2,"Phylum"], "__")[[1]][2]
tax_table(ps_abund_rel)[2,"Class"] <- str_split(tax_table(ps_abund_rel)[2,"Class"], "__")[[1]][2]
tax_table(ps_abund_rel)[2,"Order"] <- str_split(tax_table(ps_abund_rel)[2,"Order"], "__")[[1]][2]
tax_table(ps_abund_rel)[2,"Family"] <- str_split(tax_table(ps_abund_rel)[2,"Family"], "__")[[1]][2]
tax_table(ps_abund_rel)[2,"Genus"] <- str_split(tax_table(ps_abund_rel)[2,"Genus"], "__")[[1]][2]
tax_table(ps_abund_rel)[2,"Species"] <- str_split(tax_table(ps_abund_rel)[2,"Species"], "__")[[1]][2]
tax_table(ps_abund_rel)[3,"Domain"] <- str_split(tax_table(ps_abund_rel)[3,"Domain"], "__")[[1]][2]
tax_table(ps_abund_rel)[3,"Phylum"] <- str_split(tax_table(ps_abund_rel)[3,"Phylum"], "__")[[1]][2]
tax_table(ps_abund_rel)[3,"Class"] <- str_split(tax_table(ps_abund_rel)[3,"Class"], "__")[[1]][2]
tax_table(ps_abund_rel)[3,"Order"] <- str_split(tax_table(ps_abund_rel)[3,"Order"], "__")[[1]][2]
tax_table(ps_abund_rel)[3,"Family"] <- str_split(tax_table(ps_abund_rel)[3,"Family"], "__")[[1]][2]
tax_table(ps_abund_rel)[3,"Genus"] <- str_split(tax_table(ps_abund_rel)[3,"Genus"], "__")[[1]][2]
tax_table(ps_abund_rel)[3,"Species"] <- str_split(tax_table(ps_abund_rel)[3,"Species"], "__")[[1]][2]
tax_table(ps_abund_rel)[4,"Domain"] <- str_split(tax_table(ps_abund_rel)[4,"Domain"], "__")[[1]][2]
tax_table(ps_abund_rel)[4,"Phylum"] <- str_split(tax_table(ps_abund_rel)[4,"Phylum"], "__")[[1]][2]
tax_table(ps_abund_rel)[4,"Class"] <- str_split(tax_table(ps_abund_rel)[4,"Class"], "__")[[1]][2]
tax_table(ps_abund_rel)[4,"Order"] <- str_split(tax_table(ps_abund_rel)[4,"Order"], "__")[[1]][2]
tax_table(ps_abund_rel)[4,"Family"] <- str_split(tax_table(ps_abund_rel)[4,"Family"], "__")[[1]][2]
tax_table(ps_abund_rel)[4,"Genus"] <- str_split(tax_table(ps_abund_rel)[4,"Genus"], "__")[[1]][2]
tax_table(ps_abund_rel)[4,"Species"] <- str_split(tax_table(ps_abund_rel)[4,"Species"], "__")[[1]][2]
tax_table(ps_abund_rel)[5,"Domain"] <- str_split(tax_table(ps_abund_rel)[5,"Domain"], "__")[[1]][2]
tax_table(ps_abund_rel)[5,"Phylum"] <- str_split(tax_table(ps_abund_rel)[5,"Phylum"], "__")[[1]][2]
tax_table(ps_abund_rel)[5,"Class"] <- str_split(tax_table(ps_abund_rel)[5,"Class"], "__")[[1]][2]
tax_table(ps_abund_rel)[5,"Order"] <- str_split(tax_table(ps_abund_rel)[5,"Order"], "__")[[1]][2]
tax_table(ps_abund_rel)[5,"Family"] <- str_split(tax_table(ps_abund_rel)[5,"Family"], "__")[[1]][2]
tax_table(ps_abund_rel)[5,"Genus"] <- str_split(tax_table(ps_abund_rel)[5,"Genus"], "__")[[1]][2]
tax_table(ps_abund_rel)[5,"Species"] <- str_split(tax_table(ps_abund_rel)[5,"Species"], "__")[[1]][2]
tax_table(ps_abund_rel)[6,"Domain"] <- str_split(tax_table(ps_abund_rel)[6,"Domain"], "__")[[1]][2]
tax_table(ps_abund_rel)[6,"Phylum"] <- str_split(tax_table(ps_abund_rel)[6,"Phylum"], "__")[[1]][2]
tax_table(ps_abund_rel)[6,"Class"] <- str_split(tax_table(ps_abund_rel)[6,"Class"], "__")[[1]][2]
tax_table(ps_abund_rel)[6,"Order"] <- str_split(tax_table(ps_abund_rel)[6,"Order"], "__")[[1]][2]
tax_table(ps_abund_rel)[6,"Family"] <- str_split(tax_table(ps_abund_rel)[6,"Family"], "__")[[1]][2]
tax_table(ps_abund_rel)[6,"Genus"] <- str_split(tax_table(ps_abund_rel)[6,"Genus"], "__")[[1]][2]
tax_table(ps_abund_rel)[6,"Species"] <- str_split(tax_table(ps_abund_rel)[6,"Species"], "__")[[1]][2]
tax_table(ps_abund_rel)[7,"Domain"] <- str_split(tax_table(ps_abund_rel)[7,"Domain"], "__")[[1]][2]
tax_table(ps_abund_rel)[7,"Phylum"] <- str_split(tax_table(ps_abund_rel)[7,"Phylum"], "__")[[1]][2]
tax_table(ps_abund_rel)[7,"Class"] <- str_split(tax_table(ps_abund_rel)[7,"Class"], "__")[[1]][2]
tax_table(ps_abund_rel)[7,"Order"] <- str_split(tax_table(ps_abund_rel)[7,"Order"], "__")[[1]][2]
tax_table(ps_abund_rel)[7,"Family"] <- str_split(tax_table(ps_abund_rel)[7,"Family"], "__")[[1]][2]
tax_table(ps_abund_rel)[7,"Genus"] <- str_split(tax_table(ps_abund_rel)[7,"Genus"], "__")[[1]][2]
tax_table(ps_abund_rel)[7,"Species"] <- str_split(tax_table(ps_abund_rel)[7,"Species"], "__")[[1]][2]
tax_table(ps_abund_rel)[8,"Domain"] <- str_split(tax_table(ps_abund_rel)[8,"Domain"], "__")[[1]][2]
tax_table(ps_abund_rel)[8,"Phylum"] <- str_split(tax_table(ps_abund_rel)[8,"Phylum"], "__")[[1]][2]
tax_table(ps_abund_rel)[8,"Class"] <- str_split(tax_table(ps_abund_rel)[8,"Class"], "__")[[1]][2]
tax_table(ps_abund_rel)[8,"Order"] <- str_split(tax_table(ps_abund_rel)[8,"Order"], "__")[[1]][2]
tax_table(ps_abund_rel)[8,"Family"] <- str_split(tax_table(ps_abund_rel)[8,"Family"], "__")[[1]][2]
tax_table(ps_abund_rel)[8,"Genus"] <- str_split(tax_table(ps_abund_rel)[8,"Genus"], "__")[[1]][2]
tax_table(ps_abund_rel)[8,"Species"] <- str_split(tax_table(ps_abund_rel)[8,"Species"], "__")[[1]][2]
tax_table(ps_abund_rel)[9,"Domain"] <- str_split(tax_table(ps_abund_rel)[9,"Domain"], "__")[[1]][2]
tax_table(ps_abund_rel)[9,"Phylum"] <- str_split(tax_table(ps_abund_rel)[9,"Phylum"], "__")[[1]][2]
tax_table(ps_abund_rel)[9,"Class"] <- str_split(tax_table(ps_abund_rel)[9,"Class"], "__")[[1]][2]
tax_table(ps_abund_rel)[9,"Order"] <- str_split(tax_table(ps_abund_rel)[9,"Order"], "__")[[1]][2]
tax_table(ps_abund_rel)[9,"Family"] <- str_split(tax_table(ps_abund_rel)[9,"Family"], "__")[[1]][2]
tax_table(ps_abund_rel)[9,"Genus"] <- str_split(tax_table(ps_abund_rel)[9,"Genus"], "__")[[1]][2]
tax_table(ps_abund_rel)[9,"Species"] <- str_split(tax_table(ps_abund_rel)[9,"Species"], "__")[[1]][2]
tax_table(ps_abund_rel)[10,"Domain"] <- str_split(tax_table(ps_abund_rel)[10,"Domain"], "__")[[1]][2]
tax_table(ps_abund_rel)[10,"Phylum"] <- str_split(tax_table(ps_abund_rel)[10,"Phylum"], "__")[[1]][2]
tax_table(ps_abund_rel)[10,"Class"] <- str_split(tax_table(ps_abund_rel)[10,"Class"], "__")[[1]][2]
tax_table(ps_abund_rel)[10,"Order"] <- str_split(tax_table(ps_abund_rel)[10,"Order"], "__")[[1]][2]
tax_table(ps_abund_rel)[10,"Family"] <- str_split(tax_table(ps_abund_rel)[10,"Family"], "__")[[1]][2]
tax_table(ps_abund_rel)[10,"Genus"] <- str_split(tax_table(ps_abund_rel)[10,"Genus"], "__")[[1]][2]
tax_table(ps_abund_rel)[10,"Species"] <- str_split(tax_table(ps_abund_rel)[10,"Species"], "__")[[1]][2]
tax_table(ps_abund_rel)[11,"Domain"] <- str_split(tax_table(ps_abund_rel)[11,"Domain"], "__")[[1]][2]
tax_table(ps_abund_rel)[11,"Phylum"] <- str_split(tax_table(ps_abund_rel)[11,"Phylum"], "__")[[1]][2]
tax_table(ps_abund_rel)[11,"Class"] <- str_split(tax_table(ps_abund_rel)[11,"Class"], "__")[[1]][2]
tax_table(ps_abund_rel)[11,"Order"] <- str_split(tax_table(ps_abund_rel)[11,"Order"], "__")[[1]][2]
tax_table(ps_abund_rel)[11,"Family"] <- str_split(tax_table(ps_abund_rel)[11,"Family"], "__")[[1]][2]
tax_table(ps_abund_rel)[11,"Genus"] <- str_split(tax_table(ps_abund_rel)[11,"Genus"], "__")[[1]][2]
tax_table(ps_abund_rel)[11,"Species"] <- str_split(tax_table(ps_abund_rel)[11,"Species"], "__")[[1]][2]
tax_table(ps_abund_rel)[12,"Domain"] <- str_split(tax_table(ps_abund_rel)[12,"Domain"], "__")[[1]][2]
tax_table(ps_abund_rel)[12,"Phylum"] <- str_split(tax_table(ps_abund_rel)[12,"Phylum"], "__")[[1]][2]
tax_table(ps_abund_rel)[12,"Class"] <- str_split(tax_table(ps_abund_rel)[12,"Class"], "__")[[1]][2]
tax_table(ps_abund_rel)[12,"Order"] <- str_split(tax_table(ps_abund_rel)[12,"Order"], "__")[[1]][2]
tax_table(ps_abund_rel)[12,"Family"] <- str_split(tax_table(ps_abund_rel)[12,"Family"], "__")[[1]][2]
tax_table(ps_abund_rel)[12,"Genus"] <- str_split(tax_table(ps_abund_rel)[12,"Genus"], "__")[[1]][2]
tax_table(ps_abund_rel)[12,"Species"] <- str_split(tax_table(ps_abund_rel)[12,"Species"], "__")[[1]][2]
tax_table(ps_abund_rel)[13,"Domain"] <- str_split(tax_table(ps_abund_rel)[13,"Domain"], "__")[[1]][2]
tax_table(ps_abund_rel)[13,"Phylum"] <- str_split(tax_table(ps_abund_rel)[13,"Phylum"], "__")[[1]][2]
tax_table(ps_abund_rel)[13,"Class"] <- str_split(tax_table(ps_abund_rel)[13,"Class"], "__")[[1]][2]
tax_table(ps_abund_rel)[13,"Order"] <- str_split(tax_table(ps_abund_rel)[13,"Order"], "__")[[1]][2]
tax_table(ps_abund_rel)[13,"Family"] <- str_split(tax_table(ps_abund_rel)[13,"Family"], "__")[[1]][2]
tax_table(ps_abund_rel)[13,"Genus"] <- str_split(tax_table(ps_abund_rel)[13,"Genus"], "__")[[1]][2]
tax_table(ps_abund_rel)[13,"Species"] <- str_split(tax_table(ps_abund_rel)[13,"Species"], "__")[[1]][2]
tax_table(ps_abund_rel)[14,"Domain"] <- str_split(tax_table(ps_abund_rel)[14,"Domain"], "__")[[1]][2]
tax_table(ps_abund_rel)[14,"Phylum"] <- str_split(tax_table(ps_abund_rel)[14,"Phylum"], "__")[[1]][2]
tax_table(ps_abund_rel)[14,"Class"] <- str_split(tax_table(ps_abund_rel)[14,"Class"], "__")[[1]][2]
tax_table(ps_abund_rel)[14,"Order"] <- str_split(tax_table(ps_abund_rel)[14,"Order"], "__")[[1]][2]
tax_table(ps_abund_rel)[14,"Family"] <- str_split(tax_table(ps_abund_rel)[14,"Family"], "__")[[1]][2]
tax_table(ps_abund_rel)[14,"Genus"] <- str_split(tax_table(ps_abund_rel)[14,"Genus"], "__")[[1]][2]
tax_table(ps_abund_rel)[14,"Species"] <- str_split(tax_table(ps_abund_rel)[14,"Species"], "__")[[1]][2]
tax_table(ps_abund_rel)[15,"Domain"] <- str_split(tax_table(ps_abund_rel)[15,"Domain"], "__")[[1]][2]
tax_table(ps_abund_rel)[15,"Phylum"] <- str_split(tax_table(ps_abund_rel)[15,"Phylum"], "__")[[1]][2]
tax_table(ps_abund_rel)[15,"Class"] <- str_split(tax_table(ps_abund_rel)[15,"Class"], "__")[[1]][2]
tax_table(ps_abund_rel)[15,"Order"] <- str_split(tax_table(ps_abund_rel)[15,"Order"], "__")[[1]][2]
tax_table(ps_abund_rel)[15,"Family"] <- str_split(tax_table(ps_abund_rel)[15,"Family"], "__")[[1]][2]
tax_table(ps_abund_rel)[15,"Genus"] <- str_split(tax_table(ps_abund_rel)[15,"Genus"], "__")[[1]][2]
tax_table(ps_abund_rel)[15,"Species"] <- str_split(tax_table(ps_abund_rel)[15,"Species"], "__")[[1]][2]
tax_table(ps_abund_rel)[16,"Domain"] <- str_split(tax_table(ps_abund_rel)[16,"Domain"], "__")[[1]][2]
tax_table(ps_abund_rel)[16,"Phylum"] <- str_split(tax_table(ps_abund_rel)[16,"Phylum"], "__")[[1]][2]
tax_table(ps_abund_rel)[16,"Class"] <- str_split(tax_table(ps_abund_rel)[16,"Class"], "__")[[1]][2]
tax_table(ps_abund_rel)[16,"Order"] <- str_split(tax_table(ps_abund_rel)[16,"Order"], "__")[[1]][2]
tax_table(ps_abund_rel)[16,"Family"] <- str_split(tax_table(ps_abund_rel)[16,"Family"], "__")[[1]][2]
tax_table(ps_abund_rel)[16,"Genus"] <- str_split(tax_table(ps_abund_rel)[16,"Genus"], "__")[[1]][2]
tax_table(ps_abund_rel)[16,"Species"] <- str_split(tax_table(ps_abund_rel)[16,"Species"], "__")[[1]][2]
tax_table(ps_abund_rel)[17,"Domain"] <- str_split(tax_table(ps_abund_rel)[17,"Domain"], "__")[[1]][2]
tax_table(ps_abund_rel)[17,"Phylum"] <- str_split(tax_table(ps_abund_rel)[17,"Phylum"], "__")[[1]][2]
tax_table(ps_abund_rel)[17,"Class"] <- str_split(tax_table(ps_abund_rel)[17,"Class"], "__")[[1]][2]
tax_table(ps_abund_rel)[17,"Order"] <- str_split(tax_table(ps_abund_rel)[17,"Order"], "__")[[1]][2]
tax_table(ps_abund_rel)[17,"Family"] <- str_split(tax_table(ps_abund_rel)[17,"Family"], "__")[[1]][2]
tax_table(ps_abund_rel)[17,"Genus"] <- str_split(tax_table(ps_abund_rel)[17,"Genus"], "__")[[1]][2]
tax_table(ps_abund_rel)[17,"Species"] <- str_split(tax_table(ps_abund_rel)[17,"Species"], "__")[[1]][2]
tax_table(ps_abund_rel)[18,"Domain"] <- str_split(tax_table(ps_abund_rel)[18,"Domain"], "__")[[1]][2]
tax_table(ps_abund_rel)[18,"Phylum"] <- str_split(tax_table(ps_abund_rel)[18,"Phylum"], "__")[[1]][2]
tax_table(ps_abund_rel)[18,"Class"] <- str_split(tax_table(ps_abund_rel)[18,"Class"], "__")[[1]][2]
tax_table(ps_abund_rel)[18,"Order"] <- str_split(tax_table(ps_abund_rel)[18,"Order"], "__")[[1]][2]
tax_table(ps_abund_rel)[18,"Family"] <- str_split(tax_table(ps_abund_rel)[18,"Family"], "__")[[1]][2]
tax_table(ps_abund_rel)[18,"Genus"] <- str_split(tax_table(ps_abund_rel)[18,"Genus"], "__")[[1]][2]
tax_table(ps_abund_rel)[18,"Species"] <- str_split(tax_table(ps_abund_rel)[18,"Species"], "__")[[1]][2]
tax_table(ps_abund_rel)[19,"Domain"] <- str_split(tax_table(ps_abund_rel)[19,"Domain"], "__")[[1]][2]
tax_table(ps_abund_rel)[19,"Phylum"] <- str_split(tax_table(ps_abund_rel)[19,"Phylum"], "__")[[1]][2]
tax_table(ps_abund_rel)[19,"Class"] <- str_split(tax_table(ps_abund_rel)[19,"Class"], "__")[[1]][2]
tax_table(ps_abund_rel)[19,"Order"] <- str_split(tax_table(ps_abund_rel)[19,"Order"], "__")[[1]][2]
tax_table(ps_abund_rel)[19,"Family"] <- str_split(tax_table(ps_abund_rel)[19,"Family"], "__")[[1]][2]
tax_table(ps_abund_rel)[19,"Genus"] <- str_split(tax_table(ps_abund_rel)[19,"Genus"], "__")[[1]][2]
tax_table(ps_abund_rel)[19,"Species"] <- str_split(tax_table(ps_abund_rel)[19,"Species"], "__")[[1]][2]
tax_table(ps_abund_rel)[20,"Domain"] <- str_split(tax_table(ps_abund_rel)[20,"Domain"], "__")[[1]][2]
tax_table(ps_abund_rel)[20,"Phylum"] <- str_split(tax_table(ps_abund_rel)[20,"Phylum"], "__")[[1]][2]
tax_table(ps_abund_rel)[20,"Class"] <- str_split(tax_table(ps_abund_rel)[20,"Class"], "__")[[1]][2]
tax_table(ps_abund_rel)[20,"Order"] <- str_split(tax_table(ps_abund_rel)[20,"Order"], "__")[[1]][2]
tax_table(ps_abund_rel)[20,"Family"] <- str_split(tax_table(ps_abund_rel)[20,"Family"], "__")[[1]][2]
tax_table(ps_abund_rel)[20,"Genus"] <- str_split(tax_table(ps_abund_rel)[20,"Genus"], "__")[[1]][2]
tax_table(ps_abund_rel)[20,"Species"] <- str_split(tax_table(ps_abund_rel)[20,"Species"], "__")[[1]][2]
tax_table(ps_abund_rel)[21,"Domain"] <- str_split(tax_table(ps_abund_rel)[21,"Domain"], "__")[[1]][2]
tax_table(ps_abund_rel)[21,"Phylum"] <- str_split(tax_table(ps_abund_rel)[21,"Phylum"], "__")[[1]][2]
tax_table(ps_abund_rel)[21,"Class"] <- str_split(tax_table(ps_abund_rel)[21,"Class"], "__")[[1]][2]
tax_table(ps_abund_rel)[21,"Order"] <- str_split(tax_table(ps_abund_rel)[21,"Order"], "__")[[1]][2]
tax_table(ps_abund_rel)[21,"Family"] <- str_split(tax_table(ps_abund_rel)[21,"Family"], "__")[[1]][2]
tax_table(ps_abund_rel)[21,"Genus"] <- str_split(tax_table(ps_abund_rel)[21,"Genus"], "__")[[1]][2]
tax_table(ps_abund_rel)[21,"Species"] <- str_split(tax_table(ps_abund_rel)[21,"Species"], "__")[[1]][2]
tax_table(ps_abund_rel)[22,"Domain"] <- str_split(tax_table(ps_abund_rel)[22,"Domain"], "__")[[1]][2]
tax_table(ps_abund_rel)[22,"Phylum"] <- str_split(tax_table(ps_abund_rel)[22,"Phylum"], "__")[[1]][2]
tax_table(ps_abund_rel)[22,"Class"] <- str_split(tax_table(ps_abund_rel)[22,"Class"], "__")[[1]][2]
tax_table(ps_abund_rel)[22,"Order"] <- str_split(tax_table(ps_abund_rel)[22,"Order"], "__")[[1]][2]
tax_table(ps_abund_rel)[22,"Family"] <- str_split(tax_table(ps_abund_rel)[22,"Family"], "__")[[1]][2]
tax_table(ps_abund_rel)[22,"Genus"] <- str_split(tax_table(ps_abund_rel)[22,"Genus"], "__")[[1]][2]
tax_table(ps_abund_rel)[22,"Species"] <- str_split(tax_table(ps_abund_rel)[22,"Species"], "__")[[1]][2]
tax_table(ps_abund_rel)[23,"Domain"] <- str_split(tax_table(ps_abund_rel)[23,"Domain"], "__")[[1]][2]
tax_table(ps_abund_rel)[23,"Phylum"] <- str_split(tax_table(ps_abund_rel)[23,"Phylum"], "__")[[1]][2]
tax_table(ps_abund_rel)[23,"Class"] <- str_split(tax_table(ps_abund_rel)[23,"Class"], "__")[[1]][2]
tax_table(ps_abund_rel)[23,"Order"] <- str_split(tax_table(ps_abund_rel)[23,"Order"], "__")[[1]][2]
tax_table(ps_abund_rel)[23,"Family"] <- str_split(tax_table(ps_abund_rel)[23,"Family"], "__")[[1]][2]
tax_table(ps_abund_rel)[23,"Genus"] <- str_split(tax_table(ps_abund_rel)[23,"Genus"], "__")[[1]][2]
tax_table(ps_abund_rel)[23,"Species"] <- str_split(tax_table(ps_abund_rel)[23,"Species"], "__")[[1]][2]
tax_table(ps_abund_rel)[24,"Domain"] <- str_split(tax_table(ps_abund_rel)[24,"Domain"], "__")[[1]][2]
tax_table(ps_abund_rel)[24,"Phylum"] <- str_split(tax_table(ps_abund_rel)[24,"Phylum"], "__")[[1]][2]
tax_table(ps_abund_rel)[24,"Class"] <- str_split(tax_table(ps_abund_rel)[24,"Class"], "__")[[1]][2]
tax_table(ps_abund_rel)[24,"Order"] <- str_split(tax_table(ps_abund_rel)[24,"Order"], "__")[[1]][2]
tax_table(ps_abund_rel)[24,"Family"] <- str_split(tax_table(ps_abund_rel)[24,"Family"], "__")[[1]][2]
tax_table(ps_abund_rel)[24,"Genus"] <- str_split(tax_table(ps_abund_rel)[24,"Genus"], "__")[[1]][2]
tax_table(ps_abund_rel)[24,"Species"] <- str_split(tax_table(ps_abund_rel)[24,"Species"], "__")[[1]][2]
tax_table(ps_abund_rel)[25,"Domain"] <- str_split(tax_table(ps_abund_rel)[25,"Domain"], "__")[[1]][2]
tax_table(ps_abund_rel)[25,"Phylum"] <- str_split(tax_table(ps_abund_rel)[25,"Phylum"], "__")[[1]][2]
tax_table(ps_abund_rel)[25,"Class"] <- str_split(tax_table(ps_abund_rel)[25,"Class"], "__")[[1]][2]
tax_table(ps_abund_rel)[25,"Order"] <- str_split(tax_table(ps_abund_rel)[25,"Order"], "__")[[1]][2]
tax_table(ps_abund_rel)[25,"Family"] <- str_split(tax_table(ps_abund_rel)[25,"Family"], "__")[[1]][2]
tax_table(ps_abund_rel)[25,"Genus"] <- str_split(tax_table(ps_abund_rel)[25,"Genus"], "__")[[1]][2]
tax_table(ps_abund_rel)[25,"Species"] <- str_split(tax_table(ps_abund_rel)[25,"Species"], "__")[[1]][2]
tax_table(ps_abund_rel)[26,"Domain"] <- str_split(tax_table(ps_abund_rel)[26,"Domain"], "__")[[1]][2]
tax_table(ps_abund_rel)[26,"Phylum"] <- str_split(tax_table(ps_abund_rel)[26,"Phylum"], "__")[[1]][2]
tax_table(ps_abund_rel)[26,"Class"] <- str_split(tax_table(ps_abund_rel)[26,"Class"], "__")[[1]][2]
tax_table(ps_abund_rel)[26,"Order"] <- str_split(tax_table(ps_abund_rel)[26,"Order"], "__")[[1]][2]
tax_table(ps_abund_rel)[26,"Family"] <- str_split(tax_table(ps_abund_rel)[26,"Family"], "__")[[1]][2]
tax_table(ps_abund_rel)[26,"Genus"] <- str_split(tax_table(ps_abund_rel)[26,"Genus"], "__")[[1]][2]
tax_table(ps_abund_rel)[26,"Species"] <- str_split(tax_table(ps_abund_rel)[26,"Species"], "__")[[1]][2]
tax_table(ps_abund_rel)[27,"Domain"] <- str_split(tax_table(ps_abund_rel)[27,"Domain"], "__")[[1]][2]
tax_table(ps_abund_rel)[27,"Phylum"] <- str_split(tax_table(ps_abund_rel)[27,"Phylum"], "__")[[1]][2]
tax_table(ps_abund_rel)[27,"Class"] <- str_split(tax_table(ps_abund_rel)[27,"Class"], "__")[[1]][2]
tax_table(ps_abund_rel)[27,"Order"] <- str_split(tax_table(ps_abund_rel)[27,"Order"], "__")[[1]][2]
tax_table(ps_abund_rel)[27,"Family"] <- str_split(tax_table(ps_abund_rel)[27,"Family"], "__")[[1]][2]
tax_table(ps_abund_rel)[27,"Genus"] <- str_split(tax_table(ps_abund_rel)[27,"Genus"], "__")[[1]][2]
tax_table(ps_abund_rel)[27,"Species"] <- str_split(tax_table(ps_abund_rel)[27,"Species"], "__")[[1]][2]
tax_table(ps_abund_rel)[28,"Domain"] <- str_split(tax_table(ps_abund_rel)[28,"Domain"], "__")[[1]][2]
tax_table(ps_abund_rel)[28,"Phylum"] <- str_split(tax_table(ps_abund_rel)[28,"Phylum"], "__")[[1]][2]
tax_table(ps_abund_rel)[28,"Class"] <- str_split(tax_table(ps_abund_rel)[28,"Class"], "__")[[1]][2]
tax_table(ps_abund_rel)[28,"Order"] <- str_split(tax_table(ps_abund_rel)[28,"Order"], "__")[[1]][2]
tax_table(ps_abund_rel)[28,"Family"] <- str_split(tax_table(ps_abund_rel)[28,"Family"], "__")[[1]][2]
tax_table(ps_abund_rel)[28,"Genus"] <- str_split(tax_table(ps_abund_rel)[28,"Genus"], "__")[[1]][2]
tax_table(ps_abund_rel)[28,"Species"] <- str_split(tax_table(ps_abund_rel)[28,"Species"], "__")[[1]][2]
tax_table(ps_abund_rel)[29,"Domain"] <- str_split(tax_table(ps_abund_rel)[29,"Domain"], "__")[[1]][2]
tax_table(ps_abund_rel)[29,"Phylum"] <- str_split(tax_table(ps_abund_rel)[29,"Phylum"], "__")[[1]][2]
tax_table(ps_abund_rel)[29,"Class"] <- str_split(tax_table(ps_abund_rel)[29,"Class"], "__")[[1]][2]
tax_table(ps_abund_rel)[29,"Order"] <- str_split(tax_table(ps_abund_rel)[29,"Order"], "__")[[1]][2]
tax_table(ps_abund_rel)[29,"Family"] <- str_split(tax_table(ps_abund_rel)[29,"Family"], "__")[[1]][2]
tax_table(ps_abund_rel)[29,"Genus"] <- str_split(tax_table(ps_abund_rel)[29,"Genus"], "__")[[1]][2]
tax_table(ps_abund_rel)[29,"Species"] <- str_split(tax_table(ps_abund_rel)[29,"Species"], "__")[[1]][2]
tax_table(ps_abund_rel)[30,"Domain"] <- str_split(tax_table(ps_abund_rel)[30,"Domain"], "__")[[1]][2]
tax_table(ps_abund_rel)[30,"Phylum"] <- str_split(tax_table(ps_abund_rel)[30,"Phylum"], "__")[[1]][2]
tax_table(ps_abund_rel)[30,"Class"] <- str_split(tax_table(ps_abund_rel)[30,"Class"], "__")[[1]][2]
tax_table(ps_abund_rel)[30,"Order"] <- str_split(tax_table(ps_abund_rel)[30,"Order"], "__")[[1]][2]
tax_table(ps_abund_rel)[30,"Family"] <- str_split(tax_table(ps_abund_rel)[30,"Family"], "__")[[1]][2]
tax_table(ps_abund_rel)[30,"Genus"] <- str_split(tax_table(ps_abund_rel)[30,"Genus"], "__")[[1]][2]
tax_table(ps_abund_rel)[30,"Species"] <- str_split(tax_table(ps_abund_rel)[30,"Species"], "__")[[1]][2]
tax_table(ps_abund_rel)[31,"Domain"] <- str_split(tax_table(ps_abund_rel)[31,"Domain"], "__")[[1]][2]
tax_table(ps_abund_rel)[31,"Phylum"] <- str_split(tax_table(ps_abund_rel)[31,"Phylum"], "__")[[1]][2]
tax_table(ps_abund_rel)[31,"Class"] <- str_split(tax_table(ps_abund_rel)[31,"Class"], "__")[[1]][2]
tax_table(ps_abund_rel)[31,"Order"] <- str_split(tax_table(ps_abund_rel)[31,"Order"], "__")[[1]][2]
tax_table(ps_abund_rel)[31,"Family"] <- str_split(tax_table(ps_abund_rel)[31,"Family"], "__")[[1]][2]
tax_table(ps_abund_rel)[31,"Genus"] <- str_split(tax_table(ps_abund_rel)[31,"Genus"], "__")[[1]][2]
tax_table(ps_abund_rel)[31,"Species"] <- str_split(tax_table(ps_abund_rel)[31,"Species"], "__")[[1]][2]
tax_table(ps_abund_rel)[32,"Domain"] <- str_split(tax_table(ps_abund_rel)[32,"Domain"], "__")[[1]][2]
tax_table(ps_abund_rel)[32,"Phylum"] <- str_split(tax_table(ps_abund_rel)[32,"Phylum"], "__")[[1]][2]
tax_table(ps_abund_rel)[32,"Class"] <- str_split(tax_table(ps_abund_rel)[32,"Class"], "__")[[1]][2]
tax_table(ps_abund_rel)[32,"Order"] <- str_split(tax_table(ps_abund_rel)[32,"Order"], "__")[[1]][2]
tax_table(ps_abund_rel)[32,"Family"] <- str_split(tax_table(ps_abund_rel)[32,"Family"], "__")[[1]][2]
tax_table(ps_abund_rel)[32,"Genus"] <- str_split(tax_table(ps_abund_rel)[32,"Genus"], "__")[[1]][2]
tax_table(ps_abund_rel)[32,"Species"] <- str_split(tax_table(ps_abund_rel)[32,"Species"], "__")[[1]][2]
tax_table(ps_abund_rel)[33,"Domain"] <- str_split(tax_table(ps_abund_rel)[33,"Domain"], "__")[[1]][2]
tax_table(ps_abund_rel)[33,"Phylum"] <- str_split(tax_table(ps_abund_rel)[33,"Phylum"], "__")[[1]][2]
tax_table(ps_abund_rel)[33,"Class"] <- str_split(tax_table(ps_abund_rel)[33,"Class"], "__")[[1]][2]
tax_table(ps_abund_rel)[33,"Order"] <- str_split(tax_table(ps_abund_rel)[33,"Order"], "__")[[1]][2]
tax_table(ps_abund_rel)[33,"Family"] <- str_split(tax_table(ps_abund_rel)[33,"Family"], "__")[[1]][2]
tax_table(ps_abund_rel)[33,"Genus"] <- str_split(tax_table(ps_abund_rel)[33,"Genus"], "__")[[1]][2]
tax_table(ps_abund_rel)[33,"Species"] <- str_split(tax_table(ps_abund_rel)[33,"Species"], "__")[[1]][2]
tax_table(ps_abund_rel)[34,"Domain"] <- str_split(tax_table(ps_abund_rel)[34,"Domain"], "__")[[1]][2]
tax_table(ps_abund_rel)[34,"Phylum"] <- str_split(tax_table(ps_abund_rel)[34,"Phylum"], "__")[[1]][2]
tax_table(ps_abund_rel)[34,"Class"] <- str_split(tax_table(ps_abund_rel)[34,"Class"], "__")[[1]][2]
tax_table(ps_abund_rel)[34,"Order"] <- str_split(tax_table(ps_abund_rel)[34,"Order"], "__")[[1]][2]
tax_table(ps_abund_rel)[34,"Family"] <- str_split(tax_table(ps_abund_rel)[34,"Family"], "__")[[1]][2]
tax_table(ps_abund_rel)[34,"Genus"] <- str_split(tax_table(ps_abund_rel)[34,"Genus"], "__")[[1]][2]
tax_table(ps_abund_rel)[34,"Species"] <- str_split(tax_table(ps_abund_rel)[34,"Species"], "__")[[1]][2]
tax_table(ps_abund_rel)[35,"Domain"] <- str_split(tax_table(ps_abund_rel)[35,"Domain"], "__")[[1]][2]
tax_table(ps_abund_rel)[35,"Phylum"] <- str_split(tax_table(ps_abund_rel)[35,"Phylum"], "__")[[1]][2]
tax_table(ps_abund_rel)[35,"Class"] <- str_split(tax_table(ps_abund_rel)[35,"Class"], "__")[[1]][2]
tax_table(ps_abund_rel)[35,"Order"] <- str_split(tax_table(ps_abund_rel)[35,"Order"], "__")[[1]][2]
tax_table(ps_abund_rel)[35,"Family"] <- str_split(tax_table(ps_abund_rel)[35,"Family"], "__")[[1]][2]
tax_table(ps_abund_rel)[35,"Genus"] <- str_split(tax_table(ps_abund_rel)[35,"Genus"], "__")[[1]][2]
tax_table(ps_abund_rel)[35,"Species"] <- str_split(tax_table(ps_abund_rel)[35,"Species"], "__")[[1]][2]
tax_table(ps_abund_rel)[36,"Domain"] <- str_split(tax_table(ps_abund_rel)[36,"Domain"], "__")[[1]][2]
tax_table(ps_abund_rel)[36,"Phylum"] <- str_split(tax_table(ps_abund_rel)[36,"Phylum"], "__")[[1]][2]
tax_table(ps_abund_rel)[36,"Class"] <- str_split(tax_table(ps_abund_rel)[36,"Class"], "__")[[1]][2]
tax_table(ps_abund_rel)[36,"Order"] <- str_split(tax_table(ps_abund_rel)[36,"Order"], "__")[[1]][2]
tax_table(ps_abund_rel)[36,"Family"] <- str_split(tax_table(ps_abund_rel)[36,"Family"], "__")[[1]][2]
tax_table(ps_abund_rel)[36,"Genus"] <- str_split(tax_table(ps_abund_rel)[36,"Genus"], "__")[[1]][2]
tax_table(ps_abund_rel)[36,"Species"] <- str_split(tax_table(ps_abund_rel)[36,"Species"], "__")[[1]][2]
tax_table(ps_abund_rel)[37,"Domain"] <- str_split(tax_table(ps_abund_rel)[37,"Domain"], "__")[[1]][2]
tax_table(ps_abund_rel)[37,"Phylum"] <- str_split(tax_table(ps_abund_rel)[37,"Phylum"], "__")[[1]][2]
tax_table(ps_abund_rel)[37,"Class"] <- str_split(tax_table(ps_abund_rel)[37,"Class"], "__")[[1]][2]
tax_table(ps_abund_rel)[37,"Order"] <- str_split(tax_table(ps_abund_rel)[37,"Order"], "__")[[1]][2]
tax_table(ps_abund_rel)[37,"Family"] <- str_split(tax_table(ps_abund_rel)[37,"Family"], "__")[[1]][2]
tax_table(ps_abund_rel)[37,"Genus"] <- str_split(tax_table(ps_abund_rel)[37,"Genus"], "__")[[1]][2]
tax_table(ps_abund_rel)[37,"Species"] <- str_split(tax_table(ps_abund_rel)[37,"Species"], "__")[[1]][2]
tax_table(ps_abund_rel)[38,"Domain"] <- str_split(tax_table(ps_abund_rel)[38,"Domain"], "__")[[1]][2]
tax_table(ps_abund_rel)[38,"Phylum"] <- str_split(tax_table(ps_abund_rel)[38,"Phylum"], "__")[[1]][2]
tax_table(ps_abund_rel)[38,"Class"] <- str_split(tax_table(ps_abund_rel)[38,"Class"], "__")[[1]][2]
tax_table(ps_abund_rel)[38,"Order"] <- str_split(tax_table(ps_abund_rel)[38,"Order"], "__")[[1]][2]
tax_table(ps_abund_rel)[38,"Family"] <- str_split(tax_table(ps_abund_rel)[38,"Family"], "__")[[1]][2]
tax_table(ps_abund_rel)[38,"Genus"] <- str_split(tax_table(ps_abund_rel)[38,"Genus"], "__")[[1]][2]
tax_table(ps_abund_rel)[38,"Species"] <- str_split(tax_table(ps_abund_rel)[38,"Species"], "__")[[1]][2]
tax_table(ps_abund_rel)[39,"Domain"] <- str_split(tax_table(ps_abund_rel)[39,"Domain"], "__")[[1]][2]
tax_table(ps_abund_rel)[39,"Phylum"] <- str_split(tax_table(ps_abund_rel)[39,"Phylum"], "__")[[1]][2]
tax_table(ps_abund_rel)[39,"Class"] <- str_split(tax_table(ps_abund_rel)[39,"Class"], "__")[[1]][2]
tax_table(ps_abund_rel)[39,"Order"] <- str_split(tax_table(ps_abund_rel)[39,"Order"], "__")[[1]][2]
tax_table(ps_abund_rel)[39,"Family"] <- str_split(tax_table(ps_abund_rel)[39,"Family"], "__")[[1]][2]
tax_table(ps_abund_rel)[39,"Genus"] <- str_split(tax_table(ps_abund_rel)[39,"Genus"], "__")[[1]][2]
tax_table(ps_abund_rel)[39,"Species"] <- str_split(tax_table(ps_abund_rel)[39,"Species"], "__")[[1]][2]
tax_table(ps_abund_rel)[40,"Domain"] <- str_split(tax_table(ps_abund_rel)[40,"Domain"], "__")[[1]][2]
tax_table(ps_abund_rel)[40,"Phylum"] <- str_split(tax_table(ps_abund_rel)[40,"Phylum"], "__")[[1]][2]
tax_table(ps_abund_rel)[40,"Class"] <- str_split(tax_table(ps_abund_rel)[40,"Class"], "__")[[1]][2]
tax_table(ps_abund_rel)[40,"Order"] <- str_split(tax_table(ps_abund_rel)[40,"Order"], "__")[[1]][2]
tax_table(ps_abund_rel)[40,"Family"] <- str_split(tax_table(ps_abund_rel)[40,"Family"], "__")[[1]][2]
tax_table(ps_abund_rel)[40,"Genus"] <- str_split(tax_table(ps_abund_rel)[40,"Genus"], "__")[[1]][2]
tax_table(ps_abund_rel)[40,"Species"] <- str_split(tax_table(ps_abund_rel)[40,"Species"], "__")[[1]][2]
tax_table(ps_abund_rel)[41,"Domain"] <- str_split(tax_table(ps_abund_rel)[41,"Domain"], "__")[[1]][2]
tax_table(ps_abund_rel)[41,"Phylum"] <- str_split(tax_table(ps_abund_rel)[41,"Phylum"], "__")[[1]][2]
tax_table(ps_abund_rel)[41,"Class"] <- str_split(tax_table(ps_abund_rel)[41,"Class"], "__")[[1]][2]
tax_table(ps_abund_rel)[41,"Order"] <- str_split(tax_table(ps_abund_rel)[41,"Order"], "__")[[1]][2]
tax_table(ps_abund_rel)[41,"Family"] <- str_split(tax_table(ps_abund_rel)[41,"Family"], "__")[[1]][2]
tax_table(ps_abund_rel)[41,"Genus"] <- str_split(tax_table(ps_abund_rel)[41,"Genus"], "__")[[1]][2]
tax_table(ps_abund_rel)[41,"Species"] <- str_split(tax_table(ps_abund_rel)[41,"Species"], "__")[[1]][2]
tax_table(ps_abund_rel)[42,"Domain"] <- str_split(tax_table(ps_abund_rel)[42,"Domain"], "__")[[1]][2]
tax_table(ps_abund_rel)[42,"Phylum"] <- str_split(tax_table(ps_abund_rel)[42,"Phylum"], "__")[[1]][2]
tax_table(ps_abund_rel)[42,"Class"] <- str_split(tax_table(ps_abund_rel)[42,"Class"], "__")[[1]][2]
tax_table(ps_abund_rel)[42,"Order"] <- str_split(tax_table(ps_abund_rel)[42,"Order"], "__")[[1]][2]
tax_table(ps_abund_rel)[42,"Family"] <- str_split(tax_table(ps_abund_rel)[42,"Family"], "__")[[1]][2]
tax_table(ps_abund_rel)[42,"Genus"] <- str_split(tax_table(ps_abund_rel)[42,"Genus"], "__")[[1]][2]
tax_table(ps_abund_rel)[42,"Species"] <- str_split(tax_table(ps_abund_rel)[42,"Species"], "__")[[1]][2]
tax_table(ps_abund_rel)[43,"Domain"] <- str_split(tax_table(ps_abund_rel)[43,"Domain"], "__")[[1]][2]
tax_table(ps_abund_rel)[43,"Phylum"] <- str_split(tax_table(ps_abund_rel)[43,"Phylum"], "__")[[1]][2]
tax_table(ps_abund_rel)[43,"Class"] <- str_split(tax_table(ps_abund_rel)[43,"Class"], "__")[[1]][2]
tax_table(ps_abund_rel)[43,"Order"] <- str_split(tax_table(ps_abund_rel)[43,"Order"], "__")[[1]][2]
tax_table(ps_abund_rel)[43,"Family"] <- str_split(tax_table(ps_abund_rel)[43,"Family"], "__")[[1]][2]
tax_table(ps_abund_rel)[43,"Genus"] <- str_split(tax_table(ps_abund_rel)[43,"Genus"], "__")[[1]][2]
tax_table(ps_abund_rel)[43,"Species"] <- str_split(tax_table(ps_abund_rel)[43,"Species"], "__")[[1]][2]
tax_table(ps_abund_rel)[44,"Domain"] <- str_split(tax_table(ps_abund_rel)[44,"Domain"], "__")[[1]][2]
tax_table(ps_abund_rel)[44,"Phylum"] <- str_split(tax_table(ps_abund_rel)[44,"Phylum"], "__")[[1]][2]
tax_table(ps_abund_rel)[44,"Class"] <- str_split(tax_table(ps_abund_rel)[44,"Class"], "__")[[1]][2]
tax_table(ps_abund_rel)[44,"Order"] <- str_split(tax_table(ps_abund_rel)[44,"Order"], "__")[[1]][2]
tax_table(ps_abund_rel)[44,"Family"] <- str_split(tax_table(ps_abund_rel)[44,"Family"], "__")[[1]][2]
tax_table(ps_abund_rel)[44,"Genus"] <- str_split(tax_table(ps_abund_rel)[44,"Genus"], "__")[[1]][2]
tax_table(ps_abund_rel)[44,"Species"] <- str_split(tax_table(ps_abund_rel)[44,"Species"], "__")[[1]][2]
tax_table(ps_abund_rel)[45,"Domain"] <- str_split(tax_table(ps_abund_rel)[45,"Domain"], "__")[[1]][2]
tax_table(ps_abund_rel)[45,"Phylum"] <- str_split(tax_table(ps_abund_rel)[45,"Phylum"], "__")[[1]][2]
tax_table(ps_abund_rel)[45,"Class"] <- str_split(tax_table(ps_abund_rel)[45,"Class"], "__")[[1]][2]
tax_table(ps_abund_rel)[45,"Order"] <- str_split(tax_table(ps_abund_rel)[45,"Order"], "__")[[1]][2]
tax_table(ps_abund_rel)[45,"Family"] <- str_split(tax_table(ps_abund_rel)[45,"Family"], "__")[[1]][2]
tax_table(ps_abund_rel)[45,"Genus"] <- str_split(tax_table(ps_abund_rel)[45,"Genus"], "__")[[1]][2]
tax_table(ps_abund_rel)[45,"Species"] <- str_split(tax_table(ps_abund_rel)[45,"Species"], "__")[[1]][2]
tax_table(ps_abund_rel)[46,"Domain"] <- str_split(tax_table(ps_abund_rel)[46,"Domain"], "__")[[1]][2]
tax_table(ps_abund_rel)[46,"Phylum"] <- str_split(tax_table(ps_abund_rel)[46,"Phylum"], "__")[[1]][2]
tax_table(ps_abund_rel)[46,"Class"] <- str_split(tax_table(ps_abund_rel)[46,"Class"], "__")[[1]][2]
tax_table(ps_abund_rel)[46,"Order"] <- str_split(tax_table(ps_abund_rel)[46,"Order"], "__")[[1]][2]
tax_table(ps_abund_rel)[46,"Family"] <- str_split(tax_table(ps_abund_rel)[46,"Family"], "__")[[1]][2]
tax_table(ps_abund_rel)[46,"Genus"] <- str_split(tax_table(ps_abund_rel)[46,"Genus"], "__")[[1]][2]
tax_table(ps_abund_rel)[46,"Species"] <- str_split(tax_table(ps_abund_rel)[46,"Species"], "__")[[1]][2]
tax_table(ps_abund_rel)[47,"Domain"] <- str_split(tax_table(ps_abund_rel)[47,"Domain"], "__")[[1]][2]
tax_table(ps_abund_rel)[47,"Phylum"] <- str_split(tax_table(ps_abund_rel)[47,"Phylum"], "__")[[1]][2]
tax_table(ps_abund_rel)[47,"Class"] <- str_split(tax_table(ps_abund_rel)[47,"Class"], "__")[[1]][2]
tax_table(ps_abund_rel)[47,"Order"] <- str_split(tax_table(ps_abund_rel)[47,"Order"], "__")[[1]][2]
tax_table(ps_abund_rel)[47,"Family"] <- str_split(tax_table(ps_abund_rel)[47,"Family"], "__")[[1]][2]
tax_table(ps_abund_rel)[47,"Genus"] <- str_split(tax_table(ps_abund_rel)[47,"Genus"], "__")[[1]][2]
tax_table(ps_abund_rel)[47,"Species"] <- str_split(tax_table(ps_abund_rel)[47,"Species"], "__")[[1]][2]
tax_table(ps_abund_rel)[48,"Domain"] <- str_split(tax_table(ps_abund_rel)[48,"Domain"], "__")[[1]][2]
tax_table(ps_abund_rel)[48,"Phylum"] <- str_split(tax_table(ps_abund_rel)[48,"Phylum"], "__")[[1]][2]
tax_table(ps_abund_rel)[48,"Class"] <- str_split(tax_table(ps_abund_rel)[48,"Class"], "__")[[1]][2]
tax_table(ps_abund_rel)[48,"Order"] <- str_split(tax_table(ps_abund_rel)[48,"Order"], "__")[[1]][2]
tax_table(ps_abund_rel)[48,"Family"] <- str_split(tax_table(ps_abund_rel)[48,"Family"], "__")[[1]][2]
tax_table(ps_abund_rel)[48,"Genus"] <- str_split(tax_table(ps_abund_rel)[48,"Genus"], "__")[[1]][2]
tax_table(ps_abund_rel)[48,"Species"] <- str_split(tax_table(ps_abund_rel)[48,"Species"], "__")[[1]][2]
tax_table(ps_abund_rel)[49,"Domain"] <- str_split(tax_table(ps_abund_rel)[49,"Domain"], "__")[[1]][2]
tax_table(ps_abund_rel)[49,"Phylum"] <- str_split(tax_table(ps_abund_rel)[49,"Phylum"], "__")[[1]][2]
tax_table(ps_abund_rel)[49,"Class"] <- str_split(tax_table(ps_abund_rel)[49,"Class"], "__")[[1]][2]
tax_table(ps_abund_rel)[49,"Order"] <- str_split(tax_table(ps_abund_rel)[49,"Order"], "__")[[1]][2]
tax_table(ps_abund_rel)[49,"Family"] <- str_split(tax_table(ps_abund_rel)[49,"Family"], "__")[[1]][2]
tax_table(ps_abund_rel)[49,"Genus"] <- str_split(tax_table(ps_abund_rel)[49,"Genus"], "__")[[1]][2]
tax_table(ps_abund_rel)[49,"Species"] <- str_split(tax_table(ps_abund_rel)[49,"Species"], "__")[[1]][2]
tax_table(ps_abund_rel)[50,"Domain"] <- str_split(tax_table(ps_abund_rel)[50,"Domain"], "__")[[1]][2]
tax_table(ps_abund_rel)[50,"Phylum"] <- str_split(tax_table(ps_abund_rel)[50,"Phylum"], "__")[[1]][2]
tax_table(ps_abund_rel)[50,"Class"] <- str_split(tax_table(ps_abund_rel)[50,"Class"], "__")[[1]][2]
tax_table(ps_abund_rel)[50,"Order"] <- str_split(tax_table(ps_abund_rel)[50,"Order"], "__")[[1]][2]
tax_table(ps_abund_rel)[50,"Family"] <- str_split(tax_table(ps_abund_rel)[50,"Family"], "__")[[1]][2]
tax_table(ps_abund_rel)[50,"Genus"] <- str_split(tax_table(ps_abund_rel)[50,"Genus"], "__")[[1]][2]
tax_table(ps_abund_rel)[50,"Species"] <- str_split(tax_table(ps_abund_rel)[50,"Species"], "__")[[1]][2]
tax_table(ps_abund_rel)[51,"Domain"] <- str_split(tax_table(ps_abund_rel)[51,"Domain"], "__")[[1]][2]
tax_table(ps_abund_rel)[51,"Phylum"] <- str_split(tax_table(ps_abund_rel)[51,"Phylum"], "__")[[1]][2]
tax_table(ps_abund_rel)[51,"Class"] <- str_split(tax_table(ps_abund_rel)[51,"Class"], "__")[[1]][2]
tax_table(ps_abund_rel)[51,"Order"] <- str_split(tax_table(ps_abund_rel)[51,"Order"], "__")[[1]][2]
tax_table(ps_abund_rel)[51,"Family"] <- str_split(tax_table(ps_abund_rel)[51,"Family"], "__")[[1]][2]
tax_table(ps_abund_rel)[51,"Genus"] <- str_split(tax_table(ps_abund_rel)[51,"Genus"], "__")[[1]][2]
tax_table(ps_abund_rel)[51,"Species"] <- str_split(tax_table(ps_abund_rel)[51,"Species"], "__")[[1]][2]
tax_table(ps_abund_rel)[52,"Domain"] <- str_split(tax_table(ps_abund_rel)[52,"Domain"], "__")[[1]][2]
tax_table(ps_abund_rel)[52,"Phylum"] <- str_split(tax_table(ps_abund_rel)[52,"Phylum"], "__")[[1]][2]
tax_table(ps_abund_rel)[52,"Class"] <- str_split(tax_table(ps_abund_rel)[52,"Class"], "__")[[1]][2]
tax_table(ps_abund_rel)[52,"Order"] <- str_split(tax_table(ps_abund_rel)[52,"Order"], "__")[[1]][2]
tax_table(ps_abund_rel)[52,"Family"] <- str_split(tax_table(ps_abund_rel)[52,"Family"], "__")[[1]][2]
tax_table(ps_abund_rel)[52,"Genus"] <- str_split(tax_table(ps_abund_rel)[52,"Genus"], "__")[[1]][2]
tax_table(ps_abund_rel)[52,"Species"] <- str_split(tax_table(ps_abund_rel)[52,"Species"], "__")[[1]][2]
tax_table(ps_abund_rel)[53,"Domain"] <- str_split(tax_table(ps_abund_rel)[53,"Domain"], "__")[[1]][2]
tax_table(ps_abund_rel)[53,"Phylum"] <- str_split(tax_table(ps_abund_rel)[53,"Phylum"], "__")[[1]][2]
tax_table(ps_abund_rel)[53,"Class"] <- str_split(tax_table(ps_abund_rel)[53,"Class"], "__")[[1]][2]
tax_table(ps_abund_rel)[53,"Order"] <- str_split(tax_table(ps_abund_rel)[53,"Order"], "__")[[1]][2]
tax_table(ps_abund_rel)[53,"Family"] <- str_split(tax_table(ps_abund_rel)[53,"Family"], "__")[[1]][2]
tax_table(ps_abund_rel)[53,"Genus"] <- str_split(tax_table(ps_abund_rel)[53,"Genus"], "__")[[1]][2]
tax_table(ps_abund_rel)[53,"Species"] <- str_split(tax_table(ps_abund_rel)[53,"Species"], "__")[[1]][2]
tax_table(ps_abund_rel)[54,"Domain"] <- str_split(tax_table(ps_abund_rel)[54,"Domain"], "__")[[1]][2]
tax_table(ps_abund_rel)[54,"Phylum"] <- str_split(tax_table(ps_abund_rel)[54,"Phylum"], "__")[[1]][2]
tax_table(ps_abund_rel)[54,"Class"] <- str_split(tax_table(ps_abund_rel)[54,"Class"], "__")[[1]][2]
tax_table(ps_abund_rel)[54,"Order"] <- str_split(tax_table(ps_abund_rel)[54,"Order"], "__")[[1]][2]
tax_table(ps_abund_rel)[54,"Family"] <- str_split(tax_table(ps_abund_rel)[54,"Family"], "__")[[1]][2]
tax_table(ps_abund_rel)[54,"Genus"] <- str_split(tax_table(ps_abund_rel)[54,"Genus"], "__")[[1]][2]
tax_table(ps_abund_rel)[54,"Species"] <- str_split(tax_table(ps_abund_rel)[54,"Species"], "__")[[1]][2]
tax_table(ps_abund_rel)[55,"Domain"] <- str_split(tax_table(ps_abund_rel)[55,"Domain"], "__")[[1]][2]
tax_table(ps_abund_rel)[55,"Phylum"] <- str_split(tax_table(ps_abund_rel)[55,"Phylum"], "__")[[1]][2]
tax_table(ps_abund_rel)[55,"Class"] <- str_split(tax_table(ps_abund_rel)[55,"Class"], "__")[[1]][2]
tax_table(ps_abund_rel)[55,"Order"] <- str_split(tax_table(ps_abund_rel)[55,"Order"], "__")[[1]][2]
tax_table(ps_abund_rel)[55,"Family"] <- str_split(tax_table(ps_abund_rel)[55,"Family"], "__")[[1]][2]
tax_table(ps_abund_rel)[55,"Genus"] <- str_split(tax_table(ps_abund_rel)[55,"Genus"], "__")[[1]][2]
tax_table(ps_abund_rel)[55,"Species"] <- str_split(tax_table(ps_abund_rel)[55,"Species"], "__")[[1]][2]
tax_table(ps_abund_rel)[56,"Domain"] <- str_split(tax_table(ps_abund_rel)[56,"Domain"], "__")[[1]][2]
tax_table(ps_abund_rel)[56,"Phylum"] <- str_split(tax_table(ps_abund_rel)[56,"Phylum"], "__")[[1]][2]
tax_table(ps_abund_rel)[56,"Class"] <- str_split(tax_table(ps_abund_rel)[56,"Class"], "__")[[1]][2]
tax_table(ps_abund_rel)[56,"Order"] <- str_split(tax_table(ps_abund_rel)[56,"Order"], "__")[[1]][2]
tax_table(ps_abund_rel)[56,"Family"] <- str_split(tax_table(ps_abund_rel)[56,"Family"], "__")[[1]][2]
tax_table(ps_abund_rel)[56,"Genus"] <- str_split(tax_table(ps_abund_rel)[56,"Genus"], "__")[[1]][2]
tax_table(ps_abund_rel)[56,"Species"] <- str_split(tax_table(ps_abund_rel)[56,"Species"], "__")[[1]][2]
tax_table(ps_abund_rel)[57,"Domain"] <- str_split(tax_table(ps_abund_rel)[57,"Domain"], "__")[[1]][2]
tax_table(ps_abund_rel)[57,"Phylum"] <- str_split(tax_table(ps_abund_rel)[57,"Phylum"], "__")[[1]][2]
tax_table(ps_abund_rel)[57,"Class"] <- str_split(tax_table(ps_abund_rel)[57,"Class"], "__")[[1]][2]
tax_table(ps_abund_rel)[57,"Order"] <- str_split(tax_table(ps_abund_rel)[57,"Order"], "__")[[1]][2]
tax_table(ps_abund_rel)[57,"Family"] <- str_split(tax_table(ps_abund_rel)[57,"Family"], "__")[[1]][2]
tax_table(ps_abund_rel)[57,"Genus"] <- str_split(tax_table(ps_abund_rel)[57,"Genus"], "__")[[1]][2]
tax_table(ps_abund_rel)[57,"Species"] <- str_split(tax_table(ps_abund_rel)[57,"Species"], "__")[[1]][2]
tax_table(ps_abund_rel)[58,"Domain"] <- str_split(tax_table(ps_abund_rel)[58,"Domain"], "__")[[1]][2]
tax_table(ps_abund_rel)[58,"Phylum"] <- str_split(tax_table(ps_abund_rel)[58,"Phylum"], "__")[[1]][2]
tax_table(ps_abund_rel)[58,"Class"] <- str_split(tax_table(ps_abund_rel)[58,"Class"], "__")[[1]][2]
tax_table(ps_abund_rel)[58,"Order"] <- str_split(tax_table(ps_abund_rel)[58,"Order"], "__")[[1]][2]
tax_table(ps_abund_rel)[58,"Family"] <- str_split(tax_table(ps_abund_rel)[58,"Family"], "__")[[1]][2]
tax_table(ps_abund_rel)[58,"Genus"] <- str_split(tax_table(ps_abund_rel)[58,"Genus"], "__")[[1]][2]
tax_table(ps_abund_rel)[58,"Species"] <- str_split(tax_table(ps_abund_rel)[58,"Species"], "__")[[1]][2]
tax_table(ps_abund_rel)[59,"Domain"] <- str_split(tax_table(ps_abund_rel)[59,"Domain"], "__")[[1]][2]
tax_table(ps_abund_rel)[59,"Phylum"] <- str_split(tax_table(ps_abund_rel)[59,"Phylum"], "__")[[1]][2]
tax_table(ps_abund_rel)[59,"Class"] <- str_split(tax_table(ps_abund_rel)[59,"Class"], "__")[[1]][2]
tax_table(ps_abund_rel)[59,"Order"] <- str_split(tax_table(ps_abund_rel)[59,"Order"], "__")[[1]][2]
tax_table(ps_abund_rel)[59,"Family"] <- str_split(tax_table(ps_abund_rel)[59,"Family"], "__")[[1]][2]
tax_table(ps_abund_rel)[59,"Genus"] <- str_split(tax_table(ps_abund_rel)[59,"Genus"], "__")[[1]][2]
tax_table(ps_abund_rel)[59,"Species"] <- str_split(tax_table(ps_abund_rel)[59,"Species"], "__")[[1]][2]
tax_table(ps_abund_rel)[60,"Domain"] <- str_split(tax_table(ps_abund_rel)[60,"Domain"], "__")[[1]][2]
tax_table(ps_abund_rel)[60,"Phylum"] <- str_split(tax_table(ps_abund_rel)[60,"Phylum"], "__")[[1]][2]
tax_table(ps_abund_rel)[60,"Class"] <- str_split(tax_table(ps_abund_rel)[60,"Class"], "__")[[1]][2]
tax_table(ps_abund_rel)[60,"Order"] <- str_split(tax_table(ps_abund_rel)[60,"Order"], "__")[[1]][2]
tax_table(ps_abund_rel)[60,"Family"] <- str_split(tax_table(ps_abund_rel)[60,"Family"], "__")[[1]][2]
tax_table(ps_abund_rel)[60,"Genus"] <- str_split(tax_table(ps_abund_rel)[60,"Genus"], "__")[[1]][2]
tax_table(ps_abund_rel)[60,"Species"] <- str_split(tax_table(ps_abund_rel)[60,"Species"], "__")[[1]][2]
tax_table(ps_abund_rel)[61,"Domain"] <- str_split(tax_table(ps_abund_rel)[61,"Domain"], "__")[[1]][2]
tax_table(ps_abund_rel)[61,"Phylum"] <- str_split(tax_table(ps_abund_rel)[61,"Phylum"], "__")[[1]][2]
tax_table(ps_abund_rel)[61,"Class"] <- str_split(tax_table(ps_abund_rel)[61,"Class"], "__")[[1]][2]
tax_table(ps_abund_rel)[61,"Order"] <- str_split(tax_table(ps_abund_rel)[61,"Order"], "__")[[1]][2]
tax_table(ps_abund_rel)[61,"Family"] <- str_split(tax_table(ps_abund_rel)[61,"Family"], "__")[[1]][2]
tax_table(ps_abund_rel)[61,"Genus"] <- str_split(tax_table(ps_abund_rel)[61,"Genus"], "__")[[1]][2]
tax_table(ps_abund_rel)[61,"Species"] <- str_split(tax_table(ps_abund_rel)[61,"Species"], "__")[[1]][2]
tax_table(ps_abund_rel)[62,"Domain"] <- str_split(tax_table(ps_abund_rel)[62,"Domain"], "__")[[1]][2]
tax_table(ps_abund_rel)[62,"Phylum"] <- str_split(tax_table(ps_abund_rel)[62,"Phylum"], "__")[[1]][2]
tax_table(ps_abund_rel)[62,"Class"] <- str_split(tax_table(ps_abund_rel)[62,"Class"], "__")[[1]][2]
tax_table(ps_abund_rel)[62,"Order"] <- str_split(tax_table(ps_abund_rel)[62,"Order"], "__")[[1]][2]
tax_table(ps_abund_rel)[62,"Family"] <- str_split(tax_table(ps_abund_rel)[62,"Family"], "__")[[1]][2]
tax_table(ps_abund_rel)[62,"Genus"] <- str_split(tax_table(ps_abund_rel)[62,"Genus"], "__")[[1]][2]
tax_table(ps_abund_rel)[62,"Species"] <- str_split(tax_table(ps_abund_rel)[62,"Species"], "__")[[1]][2]
tax_table(ps_abund_rel)[63,"Domain"] <- str_split(tax_table(ps_abund_rel)[63,"Domain"], "__")[[1]][2]
tax_table(ps_abund_rel)[63,"Phylum"] <- str_split(tax_table(ps_abund_rel)[63,"Phylum"], "__")[[1]][2]
tax_table(ps_abund_rel)[63,"Class"] <- str_split(tax_table(ps_abund_rel)[63,"Class"], "__")[[1]][2]
tax_table(ps_abund_rel)[63,"Order"] <- str_split(tax_table(ps_abund_rel)[63,"Order"], "__")[[1]][2]
tax_table(ps_abund_rel)[63,"Family"] <- str_split(tax_table(ps_abund_rel)[63,"Family"], "__")[[1]][2]
tax_table(ps_abund_rel)[63,"Genus"] <- str_split(tax_table(ps_abund_rel)[63,"Genus"], "__")[[1]][2]
tax_table(ps_abund_rel)[63,"Species"] <- str_split(tax_table(ps_abund_rel)[63,"Species"], "__")[[1]][2]
tax_table(ps_abund_rel)[64,"Domain"] <- str_split(tax_table(ps_abund_rel)[64,"Domain"], "__")[[1]][2]
tax_table(ps_abund_rel)[64,"Phylum"] <- str_split(tax_table(ps_abund_rel)[64,"Phylum"], "__")[[1]][2]
tax_table(ps_abund_rel)[64,"Class"] <- str_split(tax_table(ps_abund_rel)[64,"Class"], "__")[[1]][2]
tax_table(ps_abund_rel)[64,"Order"] <- str_split(tax_table(ps_abund_rel)[64,"Order"], "__")[[1]][2]
tax_table(ps_abund_rel)[64,"Family"] <- str_split(tax_table(ps_abund_rel)[64,"Family"], "__")[[1]][2]
tax_table(ps_abund_rel)[64,"Genus"] <- str_split(tax_table(ps_abund_rel)[64,"Genus"], "__")[[1]][2]
tax_table(ps_abund_rel)[64,"Species"] <- str_split(tax_table(ps_abund_rel)[64,"Species"], "__")[[1]][2]
tax_table(ps_abund_rel)[65,"Domain"] <- str_split(tax_table(ps_abund_rel)[65,"Domain"], "__")[[1]][2]
tax_table(ps_abund_rel)[65,"Phylum"] <- str_split(tax_table(ps_abund_rel)[65,"Phylum"], "__")[[1]][2]
tax_table(ps_abund_rel)[65,"Class"] <- str_split(tax_table(ps_abund_rel)[65,"Class"], "__")[[1]][2]
tax_table(ps_abund_rel)[65,"Order"] <- str_split(tax_table(ps_abund_rel)[65,"Order"], "__")[[1]][2]
tax_table(ps_abund_rel)[65,"Family"] <- str_split(tax_table(ps_abund_rel)[65,"Family"], "__")[[1]][2]
tax_table(ps_abund_rel)[65,"Genus"] <- str_split(tax_table(ps_abund_rel)[65,"Genus"], "__")[[1]][2]
tax_table(ps_abund_rel)[65,"Species"] <- str_split(tax_table(ps_abund_rel)[65,"Species"], "__")[[1]][2]
tax_table(ps_abund_rel)[66,"Domain"] <- str_split(tax_table(ps_abund_rel)[66,"Domain"], "__")[[1]][2]
tax_table(ps_abund_rel)[66,"Phylum"] <- str_split(tax_table(ps_abund_rel)[66,"Phylum"], "__")[[1]][2]
tax_table(ps_abund_rel)[66,"Class"] <- str_split(tax_table(ps_abund_rel)[66,"Class"], "__")[[1]][2]
tax_table(ps_abund_rel)[66,"Order"] <- str_split(tax_table(ps_abund_rel)[66,"Order"], "__")[[1]][2]
tax_table(ps_abund_rel)[66,"Family"] <- str_split(tax_table(ps_abund_rel)[66,"Family"], "__")[[1]][2]
tax_table(ps_abund_rel)[66,"Genus"] <- str_split(tax_table(ps_abund_rel)[66,"Genus"], "__")[[1]][2]
tax_table(ps_abund_rel)[66,"Species"] <- str_split(tax_table(ps_abund_rel)[66,"Species"], "__")[[1]][2]
tax_table(ps_abund_rel)[67,"Domain"] <- str_split(tax_table(ps_abund_rel)[67,"Domain"], "__")[[1]][2]
tax_table(ps_abund_rel)[67,"Phylum"] <- str_split(tax_table(ps_abund_rel)[67,"Phylum"], "__")[[1]][2]
tax_table(ps_abund_rel)[67,"Class"] <- str_split(tax_table(ps_abund_rel)[67,"Class"], "__")[[1]][2]
tax_table(ps_abund_rel)[67,"Order"] <- str_split(tax_table(ps_abund_rel)[67,"Order"], "__")[[1]][2]
tax_table(ps_abund_rel)[67,"Family"] <- str_split(tax_table(ps_abund_rel)[67,"Family"], "__")[[1]][2]
tax_table(ps_abund_rel)[67,"Genus"] <- str_split(tax_table(ps_abund_rel)[67,"Genus"], "__")[[1]][2]
tax_table(ps_abund_rel)[67,"Species"] <- str_split(tax_table(ps_abund_rel)[67,"Species"], "__")[[1]][2]
tax_table(ps_abund_rel)[68,"Domain"] <- str_split(tax_table(ps_abund_rel)[68,"Domain"], "__")[[1]][2]
tax_table(ps_abund_rel)[68,"Phylum"] <- str_split(tax_table(ps_abund_rel)[68,"Phylum"], "__")[[1]][2]
tax_table(ps_abund_rel)[68,"Class"] <- str_split(tax_table(ps_abund_rel)[68,"Class"], "__")[[1]][2]
tax_table(ps_abund_rel)[68,"Order"] <- str_split(tax_table(ps_abund_rel)[68,"Order"], "__")[[1]][2]
tax_table(ps_abund_rel)[68,"Family"] <- str_split(tax_table(ps_abund_rel)[68,"Family"], "__")[[1]][2]
tax_table(ps_abund_rel)[68,"Genus"] <- str_split(tax_table(ps_abund_rel)[68,"Genus"], "__")[[1]][2]
tax_table(ps_abund_rel)[68,"Species"] <- str_split(tax_table(ps_abund_rel)[68,"Species"], "__")[[1]][2]
tax_table(ps_abund_rel)[69,"Domain"] <- str_split(tax_table(ps_abund_rel)[69,"Domain"], "__")[[1]][2]
tax_table(ps_abund_rel)[69,"Phylum"] <- str_split(tax_table(ps_abund_rel)[69,"Phylum"], "__")[[1]][2]
tax_table(ps_abund_rel)[69,"Class"] <- str_split(tax_table(ps_abund_rel)[69,"Class"], "__")[[1]][2]
tax_table(ps_abund_rel)[69,"Order"] <- str_split(tax_table(ps_abund_rel)[69,"Order"], "__")[[1]][2]
tax_table(ps_abund_rel)[69,"Family"] <- str_split(tax_table(ps_abund_rel)[69,"Family"], "__")[[1]][2]
tax_table(ps_abund_rel)[69,"Genus"] <- str_split(tax_table(ps_abund_rel)[69,"Genus"], "__")[[1]][2]
tax_table(ps_abund_rel)[69,"Species"] <- str_split(tax_table(ps_abund_rel)[69,"Species"], "__")[[1]][2]
tax_table(ps_abund_rel)[70,"Domain"] <- str_split(tax_table(ps_abund_rel)[70,"Domain"], "__")[[1]][2]
tax_table(ps_abund_rel)[70,"Phylum"] <- str_split(tax_table(ps_abund_rel)[70,"Phylum"], "__")[[1]][2]
tax_table(ps_abund_rel)[70,"Class"] <- str_split(tax_table(ps_abund_rel)[70,"Class"], "__")[[1]][2]
tax_table(ps_abund_rel)[70,"Order"] <- str_split(tax_table(ps_abund_rel)[70,"Order"], "__")[[1]][2]
tax_table(ps_abund_rel)[70,"Family"] <- str_split(tax_table(ps_abund_rel)[70,"Family"], "__")[[1]][2]
tax_table(ps_abund_rel)[70,"Genus"] <- str_split(tax_table(ps_abund_rel)[70,"Genus"], "__")[[1]][2]
tax_table(ps_abund_rel)[70,"Species"] <- str_split(tax_table(ps_abund_rel)[70,"Species"], "__")[[1]][2]
```


# Taxonomic summary at the Phylum Level (16S)

## Bar plots in A* and H* samples

```{r, echo=TRUE, warning=FALSE}
  # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
  # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! TODO_TOMORROW_FROM_HERE !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
  ## ---- Phylum ----
  #aes(color="Phylum", fill="Phylum") --> aes()
  #ggplot(data=data, aes(x=Sample, y=Abundance, fill=Phylum))

  #DEBUG: ps_subset_2_type <- merge_samples(ps_subset_2, "Type")
  #DEBUG: ps_subset_2_type_ = transform_sample_counts(ps_subset_2_type, function(x) x / sum(x))
  #DEBUG: ps_subset_2_copied <- data.table::copy(ps_subset_2)
  #plot_bar(ps_abund_relSampleType_, fill = "Phylum") + geom_bar(aes(color=Phylum, fill=Phylum), stat="identity", position="stack")
  #plot_bar(ps_subset_2_type_, fill="Phylum") + geom_bar(aes(), stat="identity", position="stack") +
  #scale_fill_manual(values = c("darkblue", "darkgoldenrod1", "darkseagreen", "darkorchid", "darkolivegreen1", "lightskyblue", "darkgreen", "deeppink", "khaki2", "firebrick", "brown1", "darkorange1", "cyan1", "royalblue4", "darksalmon", "darkblue","royalblue4", "dodgerblue3", "steelblue1", "lightskyblue", "darkseagreen", "darkgoldenrod1", "darkseagreen", "darkorchid", "darkolivegreen1", "brown1", "darkorange1", "cyan1", "darkgrey")) + theme(axis.text = element_text(size = theme.size, colour="black"))

# Assuming you have a vector of sample IDs that you are interested in.
samples_to_keep <- c("A1.1","A1.2","A1.3","A2.1","A2.2","A2.3","A3.1","A3.2","A3.3","A4.1","A4.2","A4.3","A5.1","A5.2","A5.3","A10.1","A10.2","A10.3","A12.1","A12.2","A12.3","A17.1","A17.2","A17.3","A20.1","A20.2","A20.3","A21.1","A21.2","A21.3","A22.1","A22.2","A22.3","A24.1","A24.2","A24.3","A25.1","A25.2","A25.3","A27.1","A27.2","A27.3","A28.1","A28.2","A28.3")

# Subsetting the phyloseq object.
ps_subset <- subset_samples(ps_abund_rel, rownames(sample_data(ps_abund_rel)) %in% samples_to_keep)
# Convert to a data frame for manipulation with dplyr and ggplot
ps_data <- psmelt(ps_subset)
# Assuming "SampleID" is the column with your sample names
ps_data$Sample <- factor(ps_data$Sample, levels = samples_to_keep)

# Now plot using ggplot, with fill determined by "Phylum"
ggplot(ps_data, aes(x = Sample, y = Abundance)) +
  geom_bar(aes(fill = Phylum), stat = "identity") +
  scale_fill_manual(values = c("darkblue", "darkgoldenrod1", "darkseagreen", "darkorchid", "darkolivegreen1", "lightskyblue", "darkgreen", "deeppink", "khaki2", "firebrick", "brown1", "darkorange1", "cyan1", "royalblue4", "darksalmon", "darkblue","royalblue4", "dodgerblue3", "steelblue1", "lightskyblue", "darkseagreen", "darkgoldenrod1", "darkseagreen", "darkorchid", "darkolivegreen1", "brown1", "darkorange1", "cyan1", "darkgrey")) +
  theme(axis.text.x = element_text(size = 10, angle = 60, hjust = 1),
        axis.text.y = element_text(size = 9, colour="black")) +
  theme(legend.position="bottom") +
  guides(fill=guide_legend(nrow=2))

samples_to_keep <- c("H11.1","H11.2","H11.3","H13.1","H13.2","H13.3","H14.1","H14.2","H14.3","H16.1","H16.2","H16.3","H19.1","H19.2","H19.3","H24.1","H24.2","H24.3","H53.1","H53.2","H53.3","H56.1","H56.2","H56.3","H57.1","H57.2","H57.3","H59.1","H59.2","H59.3","H60.1","H60.2","H60.3","H64.1","H64.2","H64.3","H65.1","H65.2","H65.3","H67.1","H67.2","H67.3","H72.1","H72.2","H72.3","H73.1","H73.2","H73.3","H76.1","H76.2","H76.3","H78.1","H78.2","H78.3","H91.1","H91.2","H91.3","H94.1","H94.2","H94.3","H99.1","H99.2","H99.3")

# Subsetting the phyloseq object.
ps_subset <- subset_samples(ps_abund_rel, rownames(sample_data(ps_abund_rel)) %in% samples_to_keep)
# Convert to a data frame for manipulation with dplyr and ggplot
ps_data <- psmelt(ps_subset)
# Assuming "SampleID" is the column with your sample names
ps_data$Sample <- factor(ps_data$Sample, levels = samples_to_keep)

# Now plot using ggplot, with fill determined by "Phylum"
ggplot(ps_data, aes(x = Sample, y = Abundance)) +
  geom_bar(aes(fill = Phylum), stat = "identity") +
  scale_fill_manual(values = c("darkblue", "darkgoldenrod1", "darkseagreen", "darkorchid", "darkolivegreen1", "lightskyblue", "darkgreen", "deeppink", "khaki2", "firebrick", "brown1", "darkorange1", "cyan1", "royalblue4", "darksalmon", "darkblue","royalblue4", "dodgerblue3", "steelblue1", "lightskyblue", "darkseagreen", "darkgoldenrod1", "darkseagreen", "darkorchid", "darkolivegreen1", "brown1", "darkorange1", "cyan1", "darkgrey")) +
  theme(axis.text.x = element_text(size = 10, angle = 75, hjust = 1),
        axis.text.y = element_text(size = 9, colour="black")) +
  theme(legend.position="bottom") +
  guides(fill=guide_legend(nrow=2))
```

Regroup together A* vs H* samples and normalize number of reads in each group using median sequencing depth.

```{r, echo=TRUE, warning=FALSE}
  samples_to_keep <- c("A1.1","A1.2","A1.3","A2.1","A2.2","A2.3","A3.1","A3.2","A3.3","A4.1","A4.2","A4.3","A5.1","A5.2","A5.3","A10.1","A10.2","A10.3","A12.1","A12.2","A12.3","A17.1","A17.2","A17.3","A20.1","A20.2","A20.3","A21.1","A21.2","A21.3","A22.1","A22.2","A22.3","A24.1","A24.2","A24.3","A25.1","A25.2","A25.3","A27.1","A27.2","A27.3","A28.1","A28.2","A28.3", "H11.1","H11.2","H11.3","H13.1","H13.2","H13.3","H14.1","H14.2","H14.3","H16.1","H16.2","H16.3","H19.1","H19.2","H19.3","H24.1","H24.2","H24.3","H53.1","H53.2","H53.3","H56.1","H56.2","H56.3","H57.1","H57.2","H57.3","H59.1","H59.2","H59.3","H60.1","H60.2","H60.3","H64.1","H64.2","H64.3","H65.1","H65.2","H65.3","H67.1","H67.2","H67.3","H72.1","H72.2","H72.3","H73.1","H73.2","H73.3","H76.1","H76.2","H76.3","H78.1","H78.2","H78.3","H91.1","H91.2","H91.3","H94.1","H94.2","H94.3","H99.1","H99.2","H99.3")
    ps_subset <- subset_samples(ps_abund_rel, rownames(sample_data(ps_abund_rel)) %in% samples_to_keep)
  ps_subset_type <- merge_samples(ps_subset, "PatientType")
  ps_subset_type_ = transform_sample_counts(ps_subset_type, function(x) x / sum(x))
  #plot_bar(ps_abund_relSampleType_, fill = "Phylum") + geom_bar(aes(color=Phylum, fill=Phylum), stat="identity", position="stack")
  plot_bar(ps_subset_type_, fill="Phylum") + geom_bar(aes(), stat="identity", position="stack") +
  scale_fill_manual(values = c("darkblue", "darkgoldenrod1", "darkseagreen", "darkorchid", "darkolivegreen1", "lightskyblue", "darkgreen", "deeppink", "khaki2", "firebrick", "brown1", "darkorange1", "cyan1", "royalblue4", "darksalmon", "darkblue","royalblue4", "dodgerblue3", "steelblue1", "lightskyblue", "darkseagreen", "darkgoldenrod1", "darkseagreen", "darkorchid", "darkolivegreen1", "brown1", "darkorange1", "cyan1", "darkgrey")) +   theme(axis.text.x = element_text(size = 10, angle = 15, hjust = 1),
        axis.text.y = element_text(size = 9, colour="black")) +
  theme(legend.position="bottom") +
  guides(fill=guide_legend(nrow=2))

  ps_subset_type <- merge_samples(ps_subset, "SampleType")
  ps_subset_type_ = transform_sample_counts(ps_subset_type, function(x) x / sum(x))
  #plot_bar(ps_abund_relSampleType_, fill = "Phylum") + geom_bar(aes(color=Phylum, fill=Phylum), stat="identity", position="stack")
  plot_bar(ps_subset_type_, fill="Phylum") + geom_bar(aes(), stat="identity", position="stack") +
  scale_fill_manual(values = c("darkblue", "darkgoldenrod1", "darkseagreen", "darkorchid", "darkolivegreen1", "lightskyblue", "darkgreen", "deeppink", "khaki2", "firebrick", "brown1", "darkorange1", "cyan1", "royalblue4", "darksalmon", "darkblue","royalblue4", "dodgerblue3", "steelblue1", "lightskyblue", "darkseagreen", "darkgoldenrod1", "darkseagreen", "darkorchid", "darkolivegreen1", "brown1", "darkorange1", "cyan1", "darkgrey")) +   theme(axis.text.x = element_text(size = 10, angle = 15, hjust = 1),
        axis.text.y = element_text(size = 9, colour="black")) +
  theme(legend.position="bottom") +
  guides(fill=guide_legend(nrow=2)) +
  scale_x_discrete(limits = c("admission", "surgery", "discharge"))
```


# Taxonomic summary at the Class Level (16S)

## Bar plots in A* and H* samples

```{r, echo=TRUE, warning=FALSE}
  #aes(color="Class", fill="Class") --> aes()
  #ggplot(data=data, aes(x=Sample, y=Abundance, fill=Class))

  #DEBUG: ps_subset_2_type <- merge_samples(ps_subset_2, "Type")
  #DEBUG: ps_subset_2_type_ = transform_sample_counts(ps_subset_2_type, function(x) x / sum(x))
  #DEBUG: ps_subset_2_copied <- data.table::copy(ps_subset_2)
  #plot_bar(ps_abund_relSampleType_, fill = "Class") + geom_bar(aes(color=Class, fill=Class), stat="identity", position="stack")
  #plot_bar(ps_subset_2_type_, fill="Class") + geom_bar(aes(), stat="identity", position="stack") +
  #scale_fill_manual(values = c("darkblue", "darkgoldenrod1", "darkseagreen", "darkorchid", "darkolivegreen1", "lightskyblue", "darkgreen", "deeppink", "khaki2", "firebrick", "brown1", "darkorange1", "cyan1", "royalblue4", "darksalmon", "darkblue","royalblue4", "dodgerblue3", "steelblue1", "lightskyblue", "darkseagreen", "darkgoldenrod1", "darkseagreen", "darkorchid", "darkolivegreen1", "brown1", "darkorange1", "cyan1", "darkgrey")) + theme(axis.text = element_text(size = theme.size, colour="black"))


# Assuming you have a vector of sample IDs that you are interested in.
samples_to_keep <- c("A1.1","A1.2","A1.3","A2.1","A2.2","A2.3","A3.1","A3.2","A3.3","A4.1","A4.2","A4.3","A5.1","A5.2","A5.3","A10.1","A10.2","A10.3","A12.1","A12.2","A12.3","A17.1","A17.2","A17.3","A20.1","A20.2","A20.3","A21.1","A21.2","A21.3","A22.1","A22.2","A22.3","A24.1","A24.2","A24.3","A25.1","A25.2","A25.3","A27.1","A27.2","A27.3","A28.1","A28.2","A28.3")

# Subsetting the phyloseq object.
ps_subset <- subset_samples(ps_abund_rel, rownames(sample_data(ps_abund_rel)) %in% samples_to_keep)
# Convert to a data frame for manipulation with dplyr and ggplot
ps_data <- psmelt(ps_subset)
# Assuming "SampleID" is the column with your sample names
ps_data$Sample <- factor(ps_data$Sample, levels = samples_to_keep)

# Now plot using ggplot, with fill determined by "Class"
ggplot(ps_data, aes(x = Sample, y = Abundance)) +
  geom_bar(aes(fill = Class), stat = "identity") +
  scale_fill_manual(values = c("darkblue", "darkgoldenrod1", "darkseagreen", "darkorchid", "darkolivegreen1", "lightskyblue", "darkgreen", "deeppink", "khaki2", "firebrick", "brown1", "darkorange1", "cyan1", "royalblue4", "darksalmon", "darkblue","royalblue4", "dodgerblue3", "steelblue1", "lightskyblue", "darkseagreen", "darkgoldenrod1", "darkseagreen", "darkorchid", "darkolivegreen1", "brown1", "darkorange1", "cyan1", "darkgrey")) +
  theme(axis.text.x = element_text(size = 10, angle = 60, hjust = 1),
        axis.text.y = element_text(size = 9, colour="black")) +
  theme(legend.position="bottom") +
  guides(fill=guide_legend(nrow=3))

samples_to_keep <- c("H11.1","H11.2","H11.3","H13.1","H13.2","H13.3","H14.1","H14.2","H14.3","H16.1","H16.2","H16.3","H19.1","H19.2","H19.3","H24.1","H24.2","H24.3","H53.1","H53.2","H53.3","H56.1","H56.2","H56.3","H57.1","H57.2","H57.3","H59.1","H59.2","H59.3","H60.1","H60.2","H60.3","H64.1","H64.2","H64.3","H65.1","H65.2","H65.3","H67.1","H67.2","H67.3","H72.1","H72.2","H72.3","H73.1","H73.2","H73.3","H76.1","H76.2","H76.3","H78.1","H78.2","H78.3","H91.1","H91.2","H91.3","H94.1","H94.2","H94.3","H99.1","H99.2","H99.3")

# Subsetting the phyloseq object.
ps_subset <- subset_samples(ps_abund_rel, rownames(sample_data(ps_abund_rel)) %in% samples_to_keep)
# Convert to a data frame for manipulation with dplyr and ggplot
ps_data <- psmelt(ps_subset)
# Assuming "SampleID" is the column with your sample names
ps_data$Sample <- factor(ps_data$Sample, levels = samples_to_keep)

# Now plot using ggplot, with fill determined by "Class"
ggplot(ps_data, aes(x = Sample, y = Abundance)) +
  geom_bar(aes(fill = Class), stat = "identity") +
  scale_fill_manual(values = c("darkblue", "darkgoldenrod1", "darkseagreen", "darkorchid", "darkolivegreen1", "lightskyblue", "darkgreen", "deeppink", "khaki2", "firebrick", "brown1", "darkorange1", "cyan1", "royalblue4", "darksalmon", "darkblue","royalblue4", "dodgerblue3", "steelblue1", "lightskyblue", "darkseagreen", "darkgoldenrod1", "darkseagreen", "darkorchid", "darkolivegreen1", "brown1", "darkorange1", "cyan1", "darkgrey")) +
  theme(axis.text.x = element_text(size = 10, angle = 75, hjust = 1),
        axis.text.y = element_text(size = 9, colour="black")) +
  theme(legend.position="bottom") +
  guides(fill=guide_legend(nrow=3))
```

Regroup together A* vs H* samples and normalize number of reads in each group using median sequencing depth.

```{r, echo=TRUE, warning=FALSE}
  samples_to_keep <- c("A1.1","A1.2","A1.3","A2.1","A2.2","A2.3","A3.1","A3.2","A3.3","A4.1","A4.2","A4.3","A5.1","A5.2","A5.3","A10.1","A10.2","A10.3","A12.1","A12.2","A12.3","A17.1","A17.2","A17.3","A20.1","A20.2","A20.3","A21.1","A21.2","A21.3","A22.1","A22.2","A22.3","A24.1","A24.2","A24.3","A25.1","A25.2","A25.3","A27.1","A27.2","A27.3","A28.1","A28.2","A28.3","H11.1","H11.2","H11.3","H13.1","H13.2","H13.3","H14.1","H14.2","H14.3","H16.1","H16.2","H16.3","H19.1","H19.2","H19.3","H24.1","H24.2","H24.3","H53.1","H53.2","H53.3","H56.1","H56.2","H56.3","H57.1","H57.2","H57.3","H59.1","H59.2","H59.3","H60.1","H60.2","H60.3","H64.1","H64.2","H64.3","H65.1","H65.2","H65.3","H67.1","H67.2","H67.3","H72.1","H72.2","H72.3","H73.1","H73.2","H73.3","H76.1","H76.2","H76.3","H78.1","H78.2","H78.3","H91.1","H91.2","H91.3","H94.1","H94.2","H94.3","H99.1","H99.2","H99.3")
  ps_subset <- subset_samples(ps_abund_rel, rownames(sample_data(ps_abund_rel)) %in% samples_to_keep)
  ps_subset_type <- merge_samples(ps_subset, "PatientType")
  ps_subset_type_ = transform_sample_counts(ps_subset_type, function(x) x / sum(x))
  #plot_bar(ps_abund_relSampleType_, fill = "Class") + geom_bar(aes(color=Class, fill=Class), stat="identity", position="stack")
  plot_bar(ps_subset_type_, fill="Class") + geom_bar(aes(), stat="identity", position="stack") +
  scale_fill_manual(values = c("darkblue", "darkgoldenrod1", "darkseagreen", "darkorchid", "darkolivegreen1", "lightskyblue", "darkgreen", "deeppink", "khaki2", "firebrick", "brown1", "darkorange1", "cyan1", "royalblue4", "darksalmon", "darkblue","royalblue4", "dodgerblue3", "steelblue1", "lightskyblue", "darkseagreen", "darkgoldenrod1", "darkseagreen", "darkorchid", "darkolivegreen1", "brown1", "darkorange1", "cyan1", "darkgrey")) +   theme(axis.text.x = element_text(size = 10, angle = 15, hjust = 1),
        axis.text.y = element_text(size = 9, colour="black")) +
  theme(legend.position="bottom") +
  guides(fill=guide_legend(nrow=3))

  # In the followin code, I generated a plot, in which x axis is "admission", "discharge", "surgery". I want to change the order to "admission", "surgery" and "discharge".
  ps_subset_type <- merge_samples(ps_subset, "SampleType")
  ps_subset_type_ = transform_sample_counts(ps_subset_type, function(x) x / sum(x))
  #plot_bar(ps_abund_relSampleType_, fill = "Class") + geom_bar(aes(color=Class, fill=Class), stat="identity", position="stack")
  plot_bar(ps_subset_type_, fill="Class") + geom_bar(aes(), stat="identity", position="stack") +
  scale_fill_manual(values = c("darkblue", "darkgoldenrod1", "darkseagreen", "darkorchid", "darkolivegreen1", "lightskyblue", "darkgreen", "deeppink", "khaki2", "firebrick", "brown1", "darkorange1", "cyan1", "royalblue4", "darksalmon", "darkblue","royalblue4", "dodgerblue3", "steelblue1", "lightskyblue", "darkseagreen", "darkgoldenrod1", "darkseagreen", "darkorchid", "darkolivegreen1", "brown1", "darkorange1", "cyan1", "darkgrey")) +   theme(axis.text.x = element_text(size = 10, angle = 15, hjust = 1),
        axis.text.y = element_text(size = 9, colour="black")) +
  theme(legend.position="bottom") +
  guides(fill=guide_legend(nrow=3)) +
  scale_x_discrete(limits = c("admission", "surgery", "discharge"))
```


\pagebreak
# Alpha diversity (16S)

```{r, echo=FALSE, warning=FALSE}
#TEXT_FROM_MAINPAGE# Plot Chao1 richness estimator, Observed OTUs, Shannon index, and Phylogenetic diversity.
#Regroup together samples from the same group.
# using rarefied data
#FITTING2: CONSOLE:
#gunzip table_even4753.biom.gz
#alpha_diversity.py -i table_even9893.biom --metrics chao1,observed_otus,shannon,PD_whole_tree -o adiv_even.txt -t ../clustering/rep_set.tre

#In this specific block of code (8.3 and 8.4), we are only comparing two groups (Aneurysm vs. Hypophysis).
#    Because there is only one single comparison, there is no "multiple testing" problem to correct for.
#    Mathematically, the raw p-value (p) and the adjusted p-value (p.adj) are exactly the same.
#    Therefore, the ggpubr::compare_means() function is smart enough to realize this and omits the p.adj column entirely to keep the output clean. It only shows the raw p value.
#    p.format is only rounded values of p.adj!
```


## Alpha diversity among timepoints in aneurysm samples (Paired analysis)

```{r, echo=FALSE, warning=FALSE}
#TEXT_FROM_MAINPAGE# Friedman test for overall longitudinal comparison, and paired t-test / paired Wilcoxon for post-hoc.
hmp.div_qiime <- read.csv("adiv_even_A.txt", sep="\t")
colnames(hmp.div_qiime) <- c("sam_name", "chao1", "observed_otus", "shannon", "PD_whole_tree")
div.df <- merge(hmp.div_qiime, hmp.meta, by = "sam_name")

# Keep PatientID for paired statistical tests
div.df2 <- div.df[, c("sam_name", "PatientID", "SampleType", "shannon")]
colnames(div.df2) <- c("Sample", "PatientID", "Group", "Shannon")

# Set the chronological order of the timepoints
div.df2$Group <- factor(div.df2$Group, levels = c("admission", "surgery", "discharge"))
```

To evaluate temporal changes in microbial alpha diversity (Shannon index) within the **aneurysm cohort** across three clinical timepoints (admission, surgery, and discharge), a Friedman test was employed. The Friedman test is a non-parametric alternative to one-way repeated-measures ANOVA, specifically designed for comparing three or more paired or matched groups.

```{r, echo=FALSE, warning=FALSE}
# Note: Using rstatix::friedman_test to explicitly account for PatientID pairing
stat.test.Shannon <- div.df2 %>% friedman_test(Shannon ~ Group | PatientID)
knitr::kable(stat.test.Shannon, digits = 3) %>% kable_styling(bootstrap_options = c("striped", "hover", "condensed", "responsive"))
```

To identify specific temporal shifts between individual timepoints, post-hoc pairwise comparisons were conducted using paired t-tests.

```{r, echo=FALSE, warning=FALSE}
pw_t_paired <- div.df2 %>%
  pairwise_t_test(Shannon ~ Group, paired = TRUE, p.adjust.method = "BH") %>%
  # Overwrite the p.adj.signif column with your custom rules
  mutate(
    p.adj.signif = case_when(
      p.adj < 0.001 ~ "***",
      p.adj < 0.01  ~ "**",
      p.adj < 0.05  ~ "*",
      TRUE          ~ "ns"
    )
  )
knitr::kable(pw_t_paired, digits = 3) %>%
  kable_styling(bootstrap_options = c("striped", "hover", "condensed", "responsive"))
```

Robustness check: Paired Wilcoxon signed-rank test (non-parametric alternative)

```{r, echo=FALSE, warning=FALSE}
pw_wilcox_paired <- div.df2 %>% pairwise_wilcox_test(Shannon ~ Group, paired = TRUE, p.adjust.method = "BH") %>%
  # Overwrite the p.adj.signif column with your custom rules
  mutate(
    p.adj.signif = case_when(
      p.adj < 0.001 ~ "***",
      p.adj < 0.01  ~ "**",
      p.adj < 0.05  ~ "*",
      TRUE          ~ "ns"
    )
  )
knitr::kable(pw_wilcox_paired, digits = 3) %>%
  kable_styling(bootstrap_options = c("striped", "hover", "condensed", "responsive"))
```

Plotting (displaying paired p-values from paired t-tests)

```{r, echo=FALSE, warning=FALSE}
#Why p.adjust.method = "BH" does not work!
#1. At first AI suggests me use p.adjust.method = "BH", because if not setting, it default use unadjusted values:
#Unadjusted vs. Adjusted P-values (The main culprit):
#Your table displays the Benjamini-Hochberg (BH) adjusted p-values (p.adj), where the smallest value is 0.054 (which correctly maps to ns since it is > 0.05). However, stat_compare_means() in ggpubr defaults to plotting the unadjusted raw p-values. For the Wilcoxon test, your raw p-value is 0.018. Because 0.018 < 0.05, the plot automatically draws a * (significant), completely ignoring the multiple-testing correction that was applied in your table.
#2. But I use p.adjust.method = "BH", it still does not work!
#The reason this is happening is due to a known limitation in ggpubr: stat_compare_means() completely ignores the p.adjust.method argument when you provide a list of comparisons. When you supply comparisons, ggpubr falls back to the ggsignif package under the hood, which calculates each bracket independently using raw, unadjusted p-values. Because your raw p-value is 0.018 (which is < 0.05), it will always draw a *, regardless of what you put in p.adjust.method.
#3. AI suggested solution: Use geom_bracket() --> But it has also failed!
p <- ggboxplot(div.df2, x = "Group", y = "Shannon", fill = "Group",
               palette = c("admission" = "#D55E00", "surgery" = "#E69F00", "discharge" = "#F0E442"),
               width = 0.5, legend = "none") +
  theme(axis.text.x = element_text(angle = 30, hjust = 1)) +
  labs(title = "Shannon Diversity: Aneurysm Samples (Paired timepoints)") +
  stat_compare_means(comparisons = list(c("admission", "surgery"),
                                        c("surgery", "discharge"),
                                        c("admission", "discharge")),
                     method = "t.test",
                     paired = TRUE,
                     p.adjust.method = "BH",  #It does not work in the plot.
                     label = "p.signif",
                     size = 4)
print(p)
ggsave("./figures/alpha_diversity_Shannon_SampleType_A_paired_ttest.png", device="png", height = 6, width = 7)
ggsave("./figures/alpha_diversity_Shannon_SampleType_A_paired_ttest.svg", device="svg", height = 6, width = 7)
```


## Alpha diversity among timepoints in hypophysis samples (Paired analysis)

```{r, echo=FALSE, warning=FALSE}
#TEXT_FROM_MAINPAGE# Friedman test for overall longitudinal comparison, and paired t-test / paired Wilcoxon for post-hoc.
hmp.div_qiime <- read.csv("adiv_even_H.txt", sep="\t")
colnames(hmp.div_qiime) <- c("sam_name", "chao1", "observed_otus", "shannon", "PD_whole_tree")
div.df <- merge(hmp.div_qiime, hmp.meta, by = "sam_name")

# Keep PatientID for paired statistical tests
div.df2 <- div.df[, c("sam_name", "PatientID", "SampleType", "shannon")]
colnames(div.df2) <- c("Sample", "PatientID", "Group", "Shannon")

# Set the chronological order of the timepoints
div.df2$Group <- factor(div.df2$Group, levels = c("admission", "surgery", "discharge"))
```

To evaluate temporal changes in microbial alpha diversity (Shannon index) within the **hypophysis cohort** across three clinical timepoints (admission, surgery, and discharge), a Friedman test was employed. The Friedman test is a non-parametric alternative to one-way repeated-measures ANOVA, specifically designed for comparing three or more paired or matched groups.

```{r, echo=FALSE, warning=FALSE}
# Note: Using rstatix::friedman_test to explicitly account for PatientID pairing
stat.test.Shannon <- div.df2 %>% friedman_test(Shannon ~ Group | PatientID)
knitr::kable(stat.test.Shannon, digits = 3) %>% kable_styling(bootstrap_options = c("striped", "hover", "condensed", "responsive"))
```

To identify specific temporal shifts between individual timepoints, post-hoc pairwise comparisons were conducted using paired t-tests.

```{r, echo=FALSE, warning=FALSE}
pw_t_paired <- div.df2 %>%
  pairwise_t_test(Shannon ~ Group, paired = TRUE, p.adjust.method = "BH") %>%
  # Overwrite the p.adj.signif column with your custom rules
  mutate(
    p.adj.signif = case_when(
      p.adj < 0.001 ~ "***",
      p.adj < 0.01  ~ "**",
      p.adj < 0.05  ~ "*",
      TRUE          ~ "ns"
    )
  )
knitr::kable(pw_t_paired, digits = 3) %>%
  kable_styling(bootstrap_options = c("striped", "hover", "condensed", "responsive"))
```

Robustness check: Paired Wilcoxon signed-rank test (non-parametric alternative)

```{r, echo=FALSE, warning=FALSE}
pw_wilcox_paired <- div.df2 %>% pairwise_wilcox_test(Shannon ~ Group, paired = TRUE, p.adjust.method = "BH") %>%
  # Overwrite the p.adj.signif column with your custom rules
  mutate(
    p.adj.signif = case_when(
      p.adj < 0.001 ~ "***",
      p.adj < 0.01  ~ "**",
      p.adj < 0.05  ~ "*",
      TRUE          ~ "ns"
    )
  )
knitr::kable(pw_wilcox_paired, digits = 3) %>%
  kable_styling(bootstrap_options = c("striped", "hover", "condensed", "responsive"))
```

Plotting (displaying paired p-values from paired t-tests)

```{r, echo=FALSE, warning=FALSE}
p <- ggboxplot(div.df2, x = "Group", y = "Shannon", fill = "Group",
               palette = c("admission" = "#0072B2", "surgery" = "#56B4E9", "discharge" = "#999999"),
               width = 0.5, legend = "none") +
  theme(axis.text.x = element_text(angle = 30, hjust = 1)) +
  labs(title = "Shannon Diversity: Hypophysis Samples (Paired timepoints)") +
  stat_compare_means(comparisons = list(c("admission", "surgery"),
                                        c("surgery", "discharge"),
                                        c("admission", "discharge")),
                     method = "t.test",
                     paired = TRUE,
                     p.adjust.method = "BH",  #It does not work in the plot.
                     label = "p.signif",
                     size = 4)
print(p)
ggsave("./figures/alpha_diversity_Shannon_SampleType_H_paired_ttest.png", device="png", height = 6, width = 7)
ggsave("./figures/alpha_diversity_Shannon_SampleType_H_paired_ttest.svg", device="svg", height = 6, width = 7)
```


## Alpha diversity between aneurysm and hypophysis (All timepoints)

```{r, echo=FALSE, warning=FALSE}
library(dplyr)
library(ggpubr)
library(knitr)
library(kableExtra)

hmp.div_qiime <- read.csv("adiv_even.txt", sep="\t")
colnames(hmp.div_qiime) <- c("sam_name", "chao1", "observed_otus", "shannon", "PD_whole_tree")
div.df <- merge(hmp.div_qiime, hmp.meta, by = "sam_name")

# Extract only Shannon values and grouping variable
div.df2 <- div.df[, c("sam_name", "PatientType", "shannon")]
colnames(div.df2) <- c("Sample", "Group", "Shannon")
```

Two-group comparisons: Wilcoxon rank-sum test (Non-parametric) & Independent t-test (Parametric)

```{r, echo=FALSE, warning=FALSE}
# 1. Non-parametric test (Wilcoxon rank-sum / Mann-Whitney U)
stat.test.wilcox <- compare_means(Shannon ~ Group, data = div.df2, method = "wilcox.test")
cat("--- Wilcoxon Rank-Sum Test (All timepoints) ---\n")
knitr::kable(stat.test.wilcox, digits = 3) %>%
  kable_styling(bootstrap_options = c("striped", "hover", "condensed", "responsive"))

# 2. Parametric test (Independent two-sample t-test)
# Note: We use independent t-test because Aneurysm and Hypophysis are different patients.
stat.test.ttest <- compare_means(Shannon ~ Group, data = div.df2, method = "t.test")
cat("\n--- Independent t-test (All timepoints) ---\n")
knitr::kable(stat.test.ttest, digits = 3) %>%
  kable_styling(bootstrap_options = c("striped", "hover", "condensed", "responsive"))
```

Plotting (Displaying Wilcoxon rank-sum test)

```{r, echo=FALSE, warning=FALSE}
p <- ggboxplot(div.df2, x = "Group", y = "Shannon", fill = "Group",
               palette = c("aneurysm" = "#E69F00", "hypophysis" = "#56B4E9"),
               width = 0.5, legend = "none") +
  theme(axis.text.x = element_text(angle = 30, hjust = 1)) +
  labs(title = "Shannon Diversity: Aneurysm vs Hypophysis (All timepoints)") +
  stat_compare_means(comparisons = list(c("aneurysm", "hypophysis")),
                     method = "wilcox.test",
                     label = "p.signif", size = 5)

print(p)
ggsave("./figures/alpha_diversity_Shannon_PatientType.png", device="png", height = 6, width = 6)
ggsave("./figures/alpha_diversity_Shannon_PatientType.svg", device="svg", height = 6, width = 6)
```

## Aneurysm vs. Hypophysis AT SURGERY ONLY

*New block: Specifically filters for the surgery timepoint to compare the two cohorts without dilution.*

```{r, echo=TRUE, warning=FALSE}
hmp.div_qiime <- read.csv("adiv_even.txt", sep="\t")
colnames(hmp.div_qiime) <- c("sam_name", "chao1", "observed_otus", "shannon", "PD_whole_tree")
div.df <- merge(hmp.div_qiime, hmp.meta, by = "sam_name")

# Filter for the surgery timepoint samples. This line ensures that ONLY the "surgery" timepoint is kept for the analysis.
div.df_surgery <- div.df %>% filter(SampleType == "surgery")

div.df2 <- div.df_surgery[, c("sam_name", "PatientType", "shannon")]
colnames(div.df2) <- c("Sample", "Group", "Shannon")
```

Two-group comparisons: Wilcoxon rank-sum test & Independent t-test

```{r, echo=FALSE, warning=FALSE}
# 1. Non-parametric test (Wilcoxon)
stat.test.wilcox <- compare_means(Shannon ~ Group, data = div.df2, method = "wilcox.test")
cat("--- Wilcoxon Rank-Sum Test (Surgery ONLY) ---\n")
knitr::kable(stat.test.wilcox, digits = 3) %>%
  kable_styling(bootstrap_options = c("striped", "hover", "condensed", "responsive"))

# 2. Parametric test (Independent two-sample t-test)
stat.test.ttest <- compare_means(Shannon ~ Group, data = div.df2, method = "t.test")
cat("\n--- Independent t-test (Surgery ONLY) ---\n")
knitr::kable(stat.test.ttest, digits = 3) %>%
  kable_styling(bootstrap_options = c("striped", "hover", "condensed", "responsive"))
```

Plotting (Displaying Wilcoxon rank-sum test)

```{r, echo=FALSE, warning=FALSE}
p <- ggboxplot(div.df2, x = "Group", y = "Shannon", fill = "Group",
               palette = c("aneurysm" = "#E69F00", "hypophysis" = "#56B4E9"),
               width = 0.5, legend = "none") +
  theme(axis.text.x = element_text(angle = 30, hjust = 1)) +
  labs(title = "Shannon Diversity: Aneurysm vs Hypophysis (Surgery timepoint ONLY)") +
  stat_compare_means(comparisons = list(c("aneurysm", "hypophysis")),
                     method = "t.test", # Using independent t-test
                     label = "p.signif", size = 5)

print(p)
ggsave("./figures/alpha_diversity_Shannon_Surgery_ONLY.png", device="png", height = 6, width = 6)
ggsave("./figures/alpha_diversity_Shannon_Surgery_ONLY.svg", device="svg", height = 6, width = 6)
```


# Beta diversity (16S)

Group differences in microbial community composition (beta-diversity) were assessed using Bray-Curtis dissimilarity matrices. Statistical significance was evaluated using permutational multivariate analysis of variance (PERMANOVA; `adonis2` function in the vegan R package v2.7.2) with 999 permutations. Pairwise comparisons were conducted between all group combinations, and *P*-values were adjusted for multiple testing using the Benjamini-Hochberg false discovery rate (FDR) procedure. Principal coordinates analysis (PCoA) was performed on the Bray-Curtis distance matrix to visualize sample clustering patterns. The first two principal coordinates explained 10.84% (PCo1) and 9.41% (PCo2) of the total variation in community composition. Notably, significant temporal shifts in community structure were observed between the surgery and discharge timepoints in both cohorts: **aneurysm patients (A2 vs. A3: R² = 0.068, *P*~adj~ = 0.007)** and **hypophysis patients (H2 vs. H3: R² = 0.055, *P*~adj~ = 0.002)**, indicating measurable changes in the nasal microbiome during the post-surgical recovery period. Additionally, significant differences were detected between the two patient cohorts at corresponding timepoints (**A1 vs. H1: *P*~adj~ = 0.007; A2 vs. H2: *P*~adj~ = 0.001; A3 vs. H3: *P*~adj~ = 0.049**), suggesting distinct baseline microbiome profiles between aneurysm and hypophysis surgery patients.

```{r, echo=FALSE, warning=FALSE, fig.cap="Beta diversity", out.width = '100%', fig.align= "center"}
#file:///home/jhuang/DATA_B/Data_Luise_Epidome_longitudinal_nose/core_diversity_e9893/bdiv_even9893/bdiv_even9893_Group/unweighted_unifrac_boxplots/Group_Stats.txt
#beta_diversity_group_stats<-read.csv("unweighted_unifrac_boxplots_Group_Stats.txt",sep="\t")
#knitr::kable(beta_diversity_group_stats) %>% kable_styling(bootstrap_options = c("striped", "hover", "condensed", "responsive"))
library(readxl)
bray_pairwise_PERMANOVA <- read_excel("./figures_All_Combined/Bray_pairwise_PERMANOVA.xlsx")
#library(openxlsx)
#bray_pairwise_PERMANOVA <- read.xlsx("./figures_All_Combined/Bray_pairwise_PERMANOVA.xlsx")
#bray_pairwise_PERMANOVA<-read.csv("./figures_All_Combined/Bray_pairwise_PERMANOVA.csv",sep=",")
#knitr::kable(bray_pairwise_PERMANOVA) %>% kable_styling(bootstrap_options = c("striped", "hover", "condensed", "responsive"))
# Get total number of rows
n_rows <- nrow(bray_pairwise_PERMANOVA)
# Identify row indices where adjusted p-value <= 0.05 (excluding NA rows)
sig_rows <- which(!is.na(bray_pairwise_PERMANOVA$p_adj_BH) &
                  bray_pairwise_PERMANOVA$p_adj_BH <= 0.05)
# Render the table with conditional row coloring
knitr::kable(bray_pairwise_PERMANOVA, escape = FALSE, digits=3) %>%
  kableExtra::kable_styling(bootstrap_options = c("striped", "hover", "condensed", "responsive"), full_width = FALSE) %>%
  # Apply yellow background to significant rows (overwrites grey where they overlap)
  kableExtra::row_spec(sig_rows, background = "yellow", bold = TRUE)

knitr::include_graphics("./figures_All_Combined/PCoA.png")
knitr::include_graphics("./figures_All_Combined/PCoA3.png")
```


# Differential abundance analysis (16S)

Differential abundance analysis was performed to identify operational taxonomic units (OTUs) with significant abundance alterations across three clinical stages (admission, surgery, and discharge). Six independent pairwise comparisons were conducted within each patient cohort (aneurysm and hypophysis). Statistical testing was performed using the DESeq2 R package (v1.46.0) based on a negative binomial generalized linear model (Wald test). *P*-values were adjusted for multiple testing using the Benjamini-Hochberg false discovery rate (FDR) procedure, and OTUs with an adjusted *P*-value < 0.05 were considered statistically significant.

## admission vs. surgery in the aneurysm samples

```{r, echo=TRUE, warning=FALSE}
# ------------------------------------------------------------
# 3) DESeq2: non-rarefied integer counts + optional taxon prefilter
# ------------------------------------------------------------
ps_deseq <- ps_filt

#NOTE: for PICRUSt2 analysis, we used table_even42369.biom as input.
#NOTE: ps_deseq for beta-diversity in MicrobiotatProcess
ps_deseq_sel2 <- data.table::copy(ps_deseq)
otu_table(ps_deseq_sel2) <- otu_table(ps_deseq)[,c("A1.1","A2.1","A3.1","A4.1","A5.1","A10.1","A12.1","A17.1", "A20.1","A21.1","A22.1","A24.1","A25.1","A27.1","A28.1",  "A1.2","A2.2","A3.2","A4.2","A5.2","A10.2","A12.2","A17.2", "A20.2","A21.2","A22.2","A24.2","A25.2","A27.2","A28.2")]
diagdds = phyloseq_to_deseq2(ps_deseq_sel2, ~SampleType)
diagdds$SampleType <- relevel(diagdds$SampleType, "admission")
diagdds = DESeq(diagdds, test="Wald", fitType="parametric")
resultsNames(diagdds)

res = results(diagdds, cooksCutoff = FALSE)
alpha = 0.05
sigtab = res[which(res$padj < alpha), ]

# --- Check if there are any significant results before proceeding ---
if (nrow(sigtab) == 0) {
  message(">>> No significant features found with padj < ", alpha, ".")

  # 1. Print a clear markdown hint in the HTML
  cat(sprintf("\n> ⚠️ **Note:** No significant differentially abundant features were found for this comparison (padj < %s).\n\n", alpha))

  # 2. Generate a clean placeholder table in the HTML safely
  #empty_msg_df <- data.frame(
  #  Status = "No significant records",
  #  Details = sprintf("No taxa met the significance threshold (padj < %s) after multiple testing correction.", alpha)
  #)
  #knitr::kable(empty_msg_df, col.names = c("Status", "Details"), align = "c") %>%
  #  kableExtra::kable_styling(bootstrap_options = c("striped", "hover", "condensed", "responsive"), full_width = FALSE)

  # 3. Save an empty Excel file with the correct headers (so the file structure remains consistent)
  fname <- "DEGs_admission_vs_surgery_in_aneurysm"
  openxlsx::write.xlsx(as.data.frame(sigtab), file = paste0(fname, ".xlsx"), rowNames = TRUE)

} else {
  sigtab <- cbind(
    as(sigtab, "data.frame"),
    as(phyloseq::tax_table(ps_deseq_sel2)[rownames(sigtab), ], "matrix")
  )

  # file base name
  fname <- "DEGs_admission_vs_surgery_in_aneurysm"
  openxlsx::write.xlsx(sigtab, file = paste0(fname, ".xlsx"), rowNames = TRUE)

  # 1. Safely render the table in HTML without triggering the 'viewer' error
  knitr::kable(sigtab) %>% kable_styling(bootstrap_options = c("striped", "hover", "condensed", "responsive"))

  # 2. Generate the plot
  theme_set(theme_bw())

  # Order factors by max log2FoldChange for cleaner plotting
  x_class <- tapply(sigtab$log2FoldChange, sigtab$Class, function(x) max(x))
  sigtab$Class <- factor(as.character(sigtab$Class), levels = names(sort(x_class)))

  x_family <- tapply(sigtab$log2FoldChange, sigtab$Family, function(x) max(x))
  sigtab$Family <- factor(as.character(sigtab$Family), levels = names(sort(x_family)))

  # Build the plot
  p <- ggplot(sigtab, aes(x = log2FoldChange, y = Family, color = Class)) +
    geom_point(aes(size = padj)) +
    scale_size_continuous(name = "padj", range = c(8, 4)) +
    theme_bw() +
    theme(axis.text.x = element_text(angle = -25, hjust = 0, vjust = 0.5))

  print(p)

  # SVG (svglite gives crisp text)
  if (!requireNamespace("svglite", quietly = TRUE)) install.packages("svglite")
  ggplot2::ggsave(paste0(fname, ".svg"), plot = p, device = svglite::svglite,
                  width = 8, height = 6, units = "in", dpi = 300)

  # PNG
  ggplot2::ggsave(paste0(fname, ".png"), plot = p, device = "png",
                  width = 8, height = 6, units = "in", dpi = 300)
}
```


## admission vs. discharge in the aneurysm samples

```{r, echo=TRUE, warning=FALSE}
# ------------------------------------------------------------
# 3) DESeq2: non-rarefied integer counts + optional taxon prefilter
# ------------------------------------------------------------
ps_deseq <- ps_filt

ps_deseq_sel2 <- data.table::copy(ps_deseq)
otu_table(ps_deseq_sel2) <- otu_table(ps_deseq)[,c("A1.1","A2.1","A3.1","A4.1","A5.1","A10.1","A12.1","A17.1", "A20.1","A21.1","A22.1","A24.1","A25.1","A27.1","A28.1",  "A1.3","A2.3","A3.3","A4.3","A5.3","A10.3","A12.3","A17.3", "A20.3","A21.3","A22.3","A24.3","A25.3","A27.3","A28.3")]
diagdds = phyloseq_to_deseq2(ps_deseq_sel2, ~SampleType)
diagdds$SampleType <- relevel(diagdds$SampleType, "admission")
diagdds = DESeq(diagdds, test="Wald", fitType="parametric")
resultsNames(diagdds)

res = results(diagdds, cooksCutoff = FALSE)
alpha = 0.05
sigtab = res[which(res$padj < alpha), ]

# --- Check if there are any significant results before proceeding ---
if (nrow(sigtab) == 0) {
  message(">>> No significant features found with padj < ", alpha, ".")

  # 1. Print a clear markdown hint in the HTML
  cat(sprintf("\n> ⚠️ **Note:** No significant differentially abundant features were found for this comparison (padj < %s).\n\n", alpha))

  # 3. Save an empty Excel file with the correct headers (so the file structure remains consistent)
  fname <- "DEGs_admission_vs_discharge_in_aneurysm"
  openxlsx::write.xlsx(as.data.frame(sigtab), file = paste0(fname, ".xlsx"), rowNames = TRUE)

} else {
  sigtab = cbind(as(sigtab, "data.frame"), as(phyloseq::tax_table(ps_deseq_sel2)[rownames(sigtab), ], "matrix"))
  # file base name
  fname <- "DEGs_admission_vs_discharge_in_aneurysm"
  write.xlsx(sigtab, file = paste0(fname, ".xlsx"), rowNames = TRUE)
  knitr::kable(sigtab) %>% kable_styling(bootstrap_options = c("striped", "hover", "condensed", "responsive"))

  theme_set(theme_bw())
  scale_fill_discrete <- function(palname = "Set1", ...) {
      scale_fill_brewer(palette = palname, ...)
  }
  x = tapply(sigtab$log2FoldChange, sigtab$Class, function(x) max(x))
  x = sort(x)
  sigtab$Class = factor(as.character(sigtab$Class), levels=names(x))
  x = tapply(sigtab$log2FoldChange, sigtab$Family, function(x) max(x))
  x = sort(x)
  sigtab$Family = factor(as.character(sigtab$Family), levels=names(x))

  # build the plot
  p <- ggplot(sigtab, aes(x = log2FoldChange, y = Family, color = Class)) +
    geom_point(aes(size = padj)) +
    scale_size_continuous(name = "padj", range = c(8, 4)) +
    theme_bw() +
    theme(axis.text.x = element_text(angle = -25, hjust = 0, vjust = 0.5))
  print(p)
  # SVG (svglite gives crisp text)
  if (!requireNamespace("svglite", quietly = TRUE)) install.packages("svglite")
  ggplot2::ggsave(paste0(fname, ".svg"), plot = p, device = svglite::svglite,
                  width = 8, height = 6, units = "in", dpi = 300)
  # PNG
  ggplot2::ggsave(paste0(fname, ".png"), plot = p, device = "png",
                  width = 8, height = 6, units = "in", dpi = 300)
}
```


## surgery vs. discharge in the aneurysm samples

```{r, echo=TRUE, warning=FALSE}
  # ------------------------------------------------------------
  # 3) DESeq2: non-rarefied integer counts + optional taxon prefilter
  # ------------------------------------------------------------
  ps_deseq <- ps_filt

  ps_deseq_sel2 <- data.table::copy(ps_deseq)
  otu_table(ps_deseq_sel2) <- otu_table(ps_deseq)[,c("A1.2","A2.2","A3.2","A4.2","A5.2","A10.2","A12.2","A17.2", "A20.2","A21.2","A22.2","A24.2","A25.2","A27.2","A28.2",  "A1.3","A2.3","A3.3","A4.3","A5.3","A10.3","A12.3","A17.3", "A20.3","A21.3","A22.3","A24.3","A25.3","A27.3","A28.3")]
  diagdds = phyloseq_to_deseq2(ps_deseq_sel2, ~SampleType)
  diagdds$SampleType <- relevel(diagdds$SampleType, "surgery")
  diagdds = DESeq(diagdds, test="Wald", fitType="parametric")
  resultsNames(diagdds)

  res = results(diagdds, cooksCutoff = FALSE)
  alpha = 0.05
  sigtab = res[which(res$padj < alpha), ]

  sigtab = cbind(as(sigtab, "data.frame"), as(phyloseq::tax_table(ps_deseq_sel2)[rownames(sigtab), ], "matrix"))
  # file base name
  fname <- "DEGs_surgery_vs_discharge_in_aneurysm"
  write.xlsx(sigtab, file = paste0(fname, ".xlsx"), rowNames = TRUE)
  knitr::kable(sigtab) %>% kable_styling(bootstrap_options = c("striped", "hover", "condensed", "responsive"))

  theme_set(theme_bw())
  scale_fill_discrete <- function(palname = "Set1", ...) {
      scale_fill_brewer(palette = palname, ...)
  }
  x = tapply(sigtab$log2FoldChange, sigtab$Class, function(x) max(x))
  x = sort(x)
  sigtab$Class = factor(as.character(sigtab$Class), levels=names(x))
  x = tapply(sigtab$log2FoldChange, sigtab$Family, function(x) max(x))
  x = sort(x)
  sigtab$Family = factor(as.character(sigtab$Family), levels=names(x))

  # build the plot
  p <- ggplot(sigtab, aes(x = log2FoldChange, y = Family, color = Class)) +
    geom_point(aes(size = padj)) +
    scale_size_continuous(name = "padj", range = c(8, 4)) +
    theme_bw() +
    theme(axis.text.x = element_text(angle = -25, hjust = 0, vjust = 0.5))
  print(p)
  # SVG (svglite gives crisp text)
  if (!requireNamespace("svglite", quietly = TRUE)) install.packages("svglite")
  ggplot2::ggsave(paste0(fname, ".svg"), plot = p, device = svglite::svglite,
                  width = 8, height = 6, units = "in", dpi = 300)
  # PNG
  ggplot2::ggsave(paste0(fname, ".png"), plot = p, device = "png",
                  width = 8, height = 6, units = "in", dpi = 300)
```


## admission vs. surgery in the hypophysis samples

```{r, echo=TRUE, warning=FALSE}
  # ------------------------------------------------------------
  # 3) DESeq2: non-rarefied integer counts + optional taxon prefilter
  # ------------------------------------------------------------
  ps_deseq <- ps_filt

  ps_deseq_sel2 <- data.table::copy(ps_deseq)
  otu_table(ps_deseq_sel2) <- otu_table(ps_deseq)[,c("H11.1","H13.1","H14.1","H16.1","H19.1","H24.1","H53.1","H56.1","H57.1","H59.1","H60.1","H64.1","H65.1","H67.1","H72.1","H73.1","H76.1","H78.1","H91.1","H94.1","H99.1",  "H11.2","H13.2","H14.2","H16.2","H19.2","H24.2","H53.2","H56.2","H57.2","H59.2","H60.2","H64.2","H65.2","H67.2","H72.2","H73.2","H76.2","H78.2","H91.2","H94.2","H99.2")]
  diagdds = phyloseq_to_deseq2(ps_deseq_sel2, ~SampleType)
  diagdds$SampleType <- relevel(diagdds$SampleType, "admission")
  diagdds = DESeq(diagdds, test="Wald", fitType="parametric")
  resultsNames(diagdds)

  res = results(diagdds, cooksCutoff = FALSE)
  alpha = 0.05
  sigtab = res[which(res$padj < alpha), ]

  sigtab = cbind(as(sigtab, "data.frame"), as(phyloseq::tax_table(ps_deseq_sel2)[rownames(sigtab), ], "matrix"))
  # file base name
  fname <- "DEGs_admission_vs_surgery_in_hypophysis"
  write.xlsx(sigtab, file = paste0(fname, ".xlsx"), rowNames = TRUE)
  knitr::kable(sigtab) %>% kable_styling(bootstrap_options = c("striped", "hover", "condensed", "responsive"))

  theme_set(theme_bw())
  scale_fill_discrete <- function(palname = "Set1", ...) {
      scale_fill_brewer(palette = palname, ...)
  }
  x = tapply(sigtab$log2FoldChange, sigtab$Class, function(x) max(x))
  x = sort(x)
  sigtab$Class = factor(as.character(sigtab$Class), levels=names(x))
  x = tapply(sigtab$log2FoldChange, sigtab$Family, function(x) max(x))
  x = sort(x)
  sigtab$Family = factor(as.character(sigtab$Family), levels=names(x))

  # build the plot
  p <- ggplot(sigtab, aes(x = log2FoldChange, y = Family, color = Class)) +
    geom_point(aes(size = padj)) +
    scale_size_continuous(name = "padj", range = c(8, 4)) +
    theme_bw() +
    theme(axis.text.x = element_text(angle = -25, hjust = 0, vjust = 0.5))
  print(p)
  # SVG (svglite gives crisp text)
  if (!requireNamespace("svglite", quietly = TRUE)) install.packages("svglite")
  ggplot2::ggsave(paste0(fname, ".svg"), plot = p, device = svglite::svglite,
                  width = 8, height = 6, units = "in", dpi = 300)
  # PNG
  ggplot2::ggsave(paste0(fname, ".png"), plot = p, device = "png",
                  width = 8, height = 6, units = "in", dpi = 300)
```


## admission vs. discharge in the hypophysis samples

```{r, echo=TRUE, warning=FALSE}
# ------------------------------------------------------------
# 3) DESeq2: non-rarefied integer counts + optional taxon prefilter
# ------------------------------------------------------------
ps_deseq <- ps_filt

ps_deseq_sel2 <- data.table::copy(ps_deseq)
otu_table(ps_deseq_sel2) <- otu_table(ps_deseq)[,c("H11.1","H13.1","H14.1","H16.1","H19.1","H24.1","H53.1","H56.1","H57.1","H59.1","H60.1","H64.1","H65.1","H67.1","H72.1","H73.1","H76.1","H78.1","H91.1","H94.1","H99.1",  "H11.3","H13.3","H14.3","H16.3","H19.3","H24.3","H53.3","H56.3","H57.3","H59.3","H60.3","H64.3","H65.3","H67.3","H72.3","H73.3","H76.3","H78.3","H91.3","H94.3","H99.3")]
diagdds = phyloseq_to_deseq2(ps_deseq_sel2, ~SampleType)
diagdds$SampleType <- relevel(diagdds$SampleType, "admission")
diagdds = DESeq(diagdds, test="Wald", fitType="parametric")
resultsNames(diagdds)

res = results(diagdds, cooksCutoff = FALSE)
alpha = 0.05
sigtab = res[which(res$padj < alpha), ]

# --- Check if there are any significant results before proceeding ---
if (nrow(sigtab) == 0) {
  message(">>> No significant features found with padj < ", alpha, ".")

  # 1. Print a clear markdown hint in the HTML
  cat(sprintf("\n> ⚠️ **Note:** No significant differentially abundant features were found for this comparison (padj < %s).\n\n", alpha))

  # 3. Save an empty Excel file with the correct headers (so the file structure remains consistent)
  fname <- "DEGs_admission_vs_discharge_in_hypophysis"
  openxlsx::write.xlsx(as.data.frame(sigtab), file = paste0(fname, ".xlsx"), rowNames = TRUE)

} else {
  sigtab = cbind(as(sigtab, "data.frame"), as(phyloseq::tax_table(ps_deseq_sel2)[rownames(sigtab), ], "matrix"))
  # file base name
  fname <- "DEGs_admission_vs_discharge_in_hypophysis"
  write.xlsx(sigtab, file = paste0(fname, ".xlsx"), rowNames = TRUE)
  knitr::kable(sigtab) %>% kable_styling(bootstrap_options = c("striped", "hover", "condensed", "responsive"))

  theme_set(theme_bw())
  scale_fill_discrete <- function(palname = "Set1", ...) {
      scale_fill_brewer(palette = palname, ...)
  }
  x = tapply(sigtab$log2FoldChange, sigtab$Class, function(x) max(x))
  x = sort(x)
  sigtab$Class = factor(as.character(sigtab$Class), levels=names(x))
  x = tapply(sigtab$log2FoldChange, sigtab$Family, function(x) max(x))
  x = sort(x)
  sigtab$Family = factor(as.character(sigtab$Family), levels=names(x))

  # build the plot
  p <- ggplot(sigtab, aes(x = log2FoldChange, y = Family, color = Class)) +
    geom_point(aes(size = padj)) +
    scale_size_continuous(name = "padj", range = c(8, 4)) +
    theme_bw() +
    theme(axis.text.x = element_text(angle = -25, hjust = 0, vjust = 0.5))
  print(p)
  # SVG (svglite gives crisp text)
  if (!requireNamespace("svglite", quietly = TRUE)) install.packages("svglite")
  ggplot2::ggsave(paste0(fname, ".svg"), plot = p, device = svglite::svglite,
                  width = 8, height = 6, units = "in", dpi = 300)
  # PNG
  ggplot2::ggsave(paste0(fname, ".png"), plot = p, device = "png",
                  width = 8, height = 6, units = "in", dpi = 300)
}
```


## surgery vs. discharge in the hypophysis samples

```{r, echo=TRUE, warning=FALSE}
  # ------------------------------------------------------------
  # 3) DESeq2: non-rarefied integer counts + optional taxon prefilter
  # ------------------------------------------------------------
  ps_deseq <- ps_filt

  ps_deseq_sel2 <- data.table::copy(ps_deseq)
  otu_table(ps_deseq_sel2) <- otu_table(ps_deseq)[,c("H11.2","H13.2","H14.2","H16.2","H19.2","H24.2","H53.2","H56.2","H57.2","H59.2","H60.2","H64.2","H65.2","H67.2","H72.2","H73.2","H76.2","H78.2","H91.2","H94.2","H99.2",  "H11.3","H13.3","H14.3","H16.3","H19.3","H24.3","H53.3","H56.3","H57.3","H59.3","H60.3","H64.3","H65.3","H67.3","H72.3","H73.3","H76.3","H78.3","H91.3","H94.3","H99.3")]
  diagdds = phyloseq_to_deseq2(ps_deseq_sel2, ~SampleType)
  diagdds$SampleType <- relevel(diagdds$SampleType, "surgery")
  diagdds = DESeq(diagdds, test="Wald", fitType="parametric")
  resultsNames(diagdds)

  res = results(diagdds, cooksCutoff = FALSE)
  alpha = 0.05
  sigtab = res[which(res$padj < alpha), ]

  sigtab = cbind(as(sigtab, "data.frame"), as(phyloseq::tax_table(ps_deseq_sel2)[rownames(sigtab), ], "matrix"))
  # file base name
  fname <- "DEGs_surgery_vs_discharge_in_hypophysis"
  write.xlsx(sigtab, file = paste0(fname, ".xlsx"), rowNames = TRUE)
  knitr::kable(sigtab) %>% kable_styling(bootstrap_options = c("striped", "hover", "condensed", "responsive"))

  theme_set(theme_bw())
  scale_fill_discrete <- function(palname = "Set1", ...) {
      scale_fill_brewer(palette = palname, ...)
  }
  x = tapply(sigtab$log2FoldChange, sigtab$Class, function(x) max(x))
  x = sort(x)
  sigtab$Class = factor(as.character(sigtab$Class), levels=names(x))
  x = tapply(sigtab$log2FoldChange, sigtab$Family, function(x) max(x))
  x = sort(x)
  sigtab$Family = factor(as.character(sigtab$Family), levels=names(x))

  # build the plot
  p <- ggplot(sigtab, aes(x = log2FoldChange, y = Family, color = Class)) +
    geom_point(aes(size = padj)) +
    scale_size_continuous(name = "padj", range = c(8, 4)) +
    theme_bw() +
    theme(axis.text.x = element_text(angle = -25, hjust = 0, vjust = 0.5))
  print(p)
  # SVG (svglite gives crisp text)
  if (!requireNamespace("svglite", quietly = TRUE)) install.packages("svglite")
  ggplot2::ggsave(paste0(fname, ".svg"), plot = p, device = svglite::svglite,
                  width = 8, height = 6, units = "in", dpi = 300)
  # PNG
  ggplot2::ggsave(paste0(fname, ".png"), plot = p, device = "png",
                  width = 8, height = 6, units = "in", dpi = 300)
```


# ST summary (g216 & yycH)

```{r, echo=TRUE, warning=FALSE}
    ##Change your working directory to where the files are located
    count_data <- read.table("../count_table_seq31_seq37_ST.txt", header = TRUE, row.names = 1, sep=",")
    OTU = otu_table(as.matrix(count_data), taxa_are_rows = TRUE)

    sample_data <- read.csv("../map3_corrected.txt", sep="\t", row.names = 1)
    SAM = sample_data(sample_data, errorIfNULL = TRUE)
    #rownames(SAM) <- c("P7.Nose", "P8.Nose", "P9.Nose", "P10.Nose", "P11.Nose", "P12.Nose", "P13.Nose", "P14.Nose", "P15.Nose", "P16.Foot", "P16.Nose", "P17.Foot", "P17.Nose", "P18.Foot", "P18.Nose", "P19.Foot", "P19.Nose", "P20.Foot", "P20.Nose", "AH.LH", "AH.NLH", "AH.Nose", "AL.LH", "AL.NLH", "AL.Nose", "CB.LH", "CB.NLH", "CB.Nose", "HR.LH", "HR.NLH", "HR.Nose", "KK.LH", "KK.NLH", "KK.Nose", "MC.LH", "MC.NLH", "MC.Nose", "MR.LH", "MR.NLH", "MR.Nose", "PT2.LH", "PT2.NLH", "PT2.Nose", "RP.LH", "RP.NLH", "RP.Nose", "SA.LH", "SA.NLH", "SA.Nose", "XN.LH", "XN.NLH", "XN.Nose")

    #tree_file <- read_tree("../clustering/rep_set.tre")
    ps <- phyloseq(OTU, SAM)   #, tree_file)
    saveRDS(ps, "./ST_ps.rds")

```

Visualize data
```{r, echo=TRUE, warning=FALSE}
  sample_names(ps)
  sample_variables(ps)
```


Normalize number of reads in each sample using median sequencing depth.
```{r, echo=TRUE, warning=FALSE}
#> sample_sums(ps)
#  A1.1   A1.2   A1.3   A2.1   A2.2   A2.3   A3.1   A3.2   A3.3   A4.1   A4.2
# 58236  32274  32575  76814  83305  81171 103250  52566  95302 100983  85068
#  A4.3   A5.1   A5.2   A5.3  A10.1  A10.2  A10.3  A12.1  A12.2  A12.3  A17.1
# 68205  93481  95239  69527  23772 163696  97159  37027  53896  31686  90402
# A17.2  A17.3  A20.1  A20.2  A20.3  A21.1  A21.2  A21.3  A22.1  A22.2  A22.3
#106581 120234  55312  49696  61397 126445 108008  87120 125673  77986  51353
# A24.1  A24.2  A24.3  A25.1  A25.2  A25.3  A27.1  A27.2  A27.3  A28.1  A28.2
# 67387  49384 100837  48487  80262  60919  36941  80448  77111  31220  29387
# A28.3  H11.1  H11.2  H11.3  H13.1  H13.2  H13.3  H14.1  H14.2  H14.3  H16.1
# 43240  52289  44011  56319  44710  26599  42339  33493  60110  22282 421887
# H16.2  H16.3  H19.1  H19.2  H19.3  H24.1  H24.2  H24.3  H53.1  H53.2  H53.3
# 38799  79011  36510  57909  72987  34716  47999  39182  32926  54544  42584
# H56.1  H56.2  H56.3  H57.1  H57.2  H57.3  H59.1  H59.2  H59.3
# 38215  74656  55938  37925  43340  76032  33029  32992  57646
#total = median(sample_sums(ps))
#> total
#[1] 56191
# NORMALIZE number of reads in each sample using median sequencing depth. In other words, now the sum of each sample is 56191 --> Similar methods with the cutoff.
standf = function(x, t=total) round(t * (x / sum(x)))
ps = transform_sample_counts(ps, standf)
# Transform absolute abundance to relative abundance.
ps_rel <- microbiome::transform(ps, "compositional")

saveRDS(ps_rel, "./ps.rds")
hmp.meta <- meta(ps)
hmp.meta$sam_name <- rownames(hmp.meta)

knitr::kable(otu_table(ps_rel)) %>%
    kable_styling(bootstrap_options = c("striped", "hover", "condensed", "responsive"))
```


# Bar plots for ST (g216 & yycH)

## Bar plots of A* samples

```{r, echo=TRUE, warning=FALSE}
# Assuming you have a vector of sample IDs that you are interested in.
samples_to_keep <- c("A1.1","A1.2","A1.3","A2.1","A2.2","A2.3","A3.1","A3.2","A3.3","A4.1","A4.2","A4.3","A5.1","A5.2","A5.3","A10.1","A10.2","A10.3","A12.1","A12.2","A12.3","A17.1","A17.2","A17.3","A20.1","A20.2","A20.3","A21.1","A21.2","A21.3","A22.1","A22.2","A22.3","A24.1","A24.2","A24.3","A25.1","A25.2","A25.3","A27.1","A27.2","A27.3","A28.1","A28.2","A28.3")

# Subsetting the phyloseq object.
ps_subset <- subset_samples(ps_rel, rownames(sample_data(ps_rel)) %in% samples_to_keep)
# Convert to a data frame for manipulation with dplyr and ggplot
ps_data <- psmelt(ps_subset)
# Assuming "SampleID" is the column with your sample names
ps_data$Sample <- factor(ps_data$Sample, levels = samples_to_keep)

# Now plot using ggplot, with fill determined by "Class"
ggplot(ps_data, aes(x = Sample, y = Abundance)) +
  geom_bar(aes(fill = OTU), stat = "identity") +
  scale_fill_manual(values = c("cornflowerblue","darkgreen","seagreen3","tan","red",  "navyblue", "gold",     "green","orange","pink","purple","magenta","brown", "darksalmon","chocolate4","darkkhaki", "lightcyan3", "maroon","lightgreen",     "blue","cyan", "skyblue2", "azure3","blueviolet","darkgoldenrod",  "tomato","mediumpurple4","indianred",   "orange", "LimeGreen","DeepSkyBlue","Yellow","HotPink", "darkgrey")) +
  labs(fill = "") +
  theme(axis.text.x = element_text(size = 9, angle = 60, hjust = 1),
        axis.text.y = element_text(size = 9, colour="black"),
        legend.position="bottom",
        legend.text = element_text(size = 8)) +
  guides(fill=guide_legend(nrow=4))
```

## Bar plots of H* samples

```{r, echo=TRUE, warning=FALSE}
# Assuming you have a vector of sample IDs that you are interested in.
samples_to_keep <- c("H11.1","H11.2","H11.3","H13.1","H13.2","H13.3","H14.1","H14.2","H14.3","H16.1","H16.2","H16.3","H19.1","H19.2","H19.3","H24.1","H24.2","H24.3","H53.1","H53.2","H53.3","H56.1","H56.2","H56.3","H57.1","H57.2","H57.3","H59.1","H59.2","H59.3","H60.1","H60.2","H60.3","H64.1","H64.2","H64.3","H65.1","H65.2","H65.3","H67.1","H67.2","H67.3","H72.1","H72.2","H72.3","H73.1","H73.2","H73.3","H76.1","H76.2","H76.3","H78.1","H78.2","H78.3","H91.1","H91.2","H91.3","H94.1","H94.2","H94.3","H99.1","H99.2","H99.3")

# Subsetting the phyloseq object.
ps_subset <- subset_samples(ps_rel, rownames(sample_data(ps_rel)) %in% samples_to_keep)
# Convert to a data frame for manipulation with dplyr and ggplot
ps_data <- psmelt(ps_subset)
# Assuming "SampleID" is the column with your sample names
ps_data$Sample <- factor(ps_data$Sample, levels = samples_to_keep)

# Now plot using ggplot, with fill determined by "Class"
ggplot(ps_data, aes(x = Sample, y = Abundance)) +
  geom_bar(aes(fill = OTU), stat = "identity") +
  scale_fill_manual(values = c("cornflowerblue","darkgreen","seagreen3","tan","red",  "navyblue", "gold",     "green","orange","pink","purple","magenta","brown", "darksalmon","chocolate4","darkkhaki", "lightcyan3", "maroon","lightgreen",     "blue","cyan", "skyblue2", "azure3","blueviolet","darkgoldenrod",  "tomato","mediumpurple4","indianred",   "orange", "LimeGreen","DeepSkyBlue","Yellow","HotPink", "darkgrey")) +
  labs(fill = "") +
  theme(axis.text.x = element_text(size = 9, angle = 75, hjust = 1),
        axis.text.y = element_text(size = 9, colour="black"),
        legend.position="bottom",
        legend.text = element_text(size = 8)) +
  guides(fill=guide_legend(nrow=4))
```

## Comparing the aneurysm and hypophyis groups

```{r, echo=TRUE, warning=FALSE}
  #Regroup together A* vs H* samples and normalize number of reads in each group using median sequencing depth.
  samples_to_keep <- c("A1.1","A1.2","A1.3","A2.1","A2.2","A2.3","A3.1","A3.2","A3.3","A4.1","A4.2","A4.3","A5.1","A5.2","A5.3","A10.1","A10.2","A10.3","A12.1","A12.2","A12.3","A17.1","A17.2","A17.3","A20.1","A20.2","A20.3","A21.1","A21.2","A21.3","A22.1","A22.2","A22.3","A24.1","A24.2","A24.3","A25.1","A25.2","A25.3","A27.1","A27.2","A27.3","A28.1","A28.2","A28.3","H11.1","H11.2","H11.3","H13.1","H13.2","H13.3","H14.1","H14.2","H14.3","H16.1","H16.2","H16.3","H19.1","H19.2","H19.3","H24.1","H24.2","H24.3","H53.1","H53.2","H53.3","H56.1","H56.2","H56.3","H57.1","H57.2","H57.3","H59.1","H59.2","H59.3","H60.1","H60.2","H60.3","H64.1","H64.2","H64.3","H65.1","H65.2","H65.3","H67.1","H67.2","H67.3","H72.1","H72.2","H72.3","H73.1","H73.2","H73.3","H76.1","H76.2","H76.3","H78.1","H78.2","H78.3","H91.1","H91.2","H91.3","H94.1","H94.2","H94.3","H99.1","H99.2","H99.3")
  ps_subset <- subset_samples(ps_rel, rownames(sample_data(ps_rel)) %in% samples_to_keep)
  ps_subset_type <- merge_samples(ps_subset, "PatientType")
  ps_subset_type_ = transform_sample_counts(ps_subset_type, function(x) x / sum(x))
  plot_bar(ps_subset_type_, fill="OTU") + geom_bar(aes(), stat="identity", position="stack") +
  scale_fill_manual(values = c("cornflowerblue","darkgreen","seagreen3","tan","red",  "navyblue", "gold",     "green","orange","pink","purple","magenta","brown", "darksalmon","chocolate4","darkkhaki", "lightcyan3", "maroon","lightgreen",     "blue","cyan", "skyblue2", "azure3","blueviolet","darkgoldenrod",  "tomato","mediumpurple4","indianred",   "orange", "LimeGreen","DeepSkyBlue","Yellow","HotPink", "darkgrey")) +
  labs(fill = "") +
  theme(axis.text.x = element_text(size = 10, angle = 15, hjust = 1),
        axis.text.y = element_text(size = 9, colour="black"),
        legend.position="bottom",
        legend.text = element_text(size = 8)) +
  guides(fill=guide_legend(nrow=4))
```

## Comparing the admission, surgery, and discharge groups

```{r, echo=TRUE, warning=FALSE}
  ps_subset_type <- merge_samples(ps_subset, "SampleType")
  ps_subset_type_ = transform_sample_counts(ps_subset_type, function(x) x / sum(x))
  plot_bar(ps_subset_type_, fill="OTU") + geom_bar(aes(), stat="identity", position="stack") +
  scale_fill_manual(values = c("cornflowerblue","darkgreen","seagreen3","tan","red",  "navyblue", "gold",     "green","orange","pink","purple","magenta","brown", "darksalmon","chocolate4","darkkhaki", "lightcyan3", "maroon","lightgreen",     "blue","cyan", "skyblue2", "azure3","blueviolet","darkgoldenrod",  "tomato","mediumpurple4","indianred",   "orange", "LimeGreen","DeepSkyBlue","Yellow","HotPink", "darkgrey")) +
  labs(fill = "") +
  theme(axis.text.x = element_text(size = 10, angle = 15, hjust = 1),
        axis.text.y = element_text(size = 9, colour="black"),
        legend.position="bottom",
        legend.text = element_text(size = 8)) +
  guides(fill=guide_legend(nrow=4)) +
  scale_x_discrete(limits = c("admission", "surgery", "discharge"))
```

## Comparing the admission, surgery, and discharge groups in A* samples

```{r, echo=TRUE, warning=FALSE}
  samples_to_keep <- c("A1.1","A1.2","A1.3","A2.1","A2.2","A2.3","A3.1","A3.2","A3.3","A4.1","A4.2","A4.3","A5.1","A5.2","A5.3","A10.1","A10.2","A10.3","A12.1","A12.2","A12.3","A17.1","A17.2","A17.3","A20.1","A20.2","A20.3","A21.1","A21.2","A21.3","A22.1","A22.2","A22.3","A24.1","A24.2","A24.3","A25.1","A25.2","A25.3","A27.1","A27.2","A27.3","A28.1","A28.2","A28.3")

  ps_subset <- subset_samples(ps_rel, rownames(sample_data(ps_rel)) %in% samples_to_keep)
  ps_subset_type <- merge_samples(ps_subset, "SampleType")
  ps_subset_type_ = transform_sample_counts(ps_subset_type, function(x) x / sum(x))
  plot_bar(ps_subset_type_, fill="OTU") + geom_bar(aes(), stat="identity", position="stack") +
  scale_fill_manual(values = c("cornflowerblue","darkgreen","seagreen3","tan","red",  "navyblue", "gold",     "green","orange","pink","purple","magenta","brown", "darksalmon","chocolate4","darkkhaki", "lightcyan3", "maroon","lightgreen",     "blue","cyan", "skyblue2", "azure3","blueviolet","darkgoldenrod",  "tomato","mediumpurple4","indianred",   "orange", "LimeGreen","DeepSkyBlue","Yellow","HotPink", "darkgrey")) +
  labs(fill = "") +
  theme(axis.text.x = element_text(size = 10, angle = 15, hjust = 1),
        axis.text.y = element_text(size = 9, colour="black"),
        legend.position="bottom",
        legend.text = element_text(size = 8)) +
  guides(fill=guide_legend(nrow=4)) +
  scale_x_discrete(limits = c("admission", "surgery", "discharge"))
```

## Comparing the admission, surgery, and discharge groups in H* samples

```{r, echo=TRUE, warning=FALSE}
  samples_to_keep <- c("H11.1","H11.2","H11.3","H13.1","H13.2","H13.3","H14.1","H14.2","H14.3","H16.1","H16.2","H16.3","H19.1","H19.2","H19.3","H24.1","H24.2","H24.3","H53.1","H53.2","H53.3","H56.1","H56.2","H56.3","H57.1","H57.2","H57.3","H59.1","H59.2","H59.3","H60.1","H60.2","H60.3","H64.1","H64.2","H64.3","H65.1","H65.2","H65.3","H67.1","H67.2","H67.3","H72.1","H72.2","H72.3","H73.1","H73.2","H73.3","H76.1","H76.2","H76.3","H78.1","H78.2","H78.3","H91.1","H91.2","H91.3","H94.1","H94.2","H94.3","H99.1","H99.2","H99.3")

  ps_subset <- subset_samples(ps_rel, rownames(sample_data(ps_rel)) %in% samples_to_keep)
  ps_subset_type <- merge_samples(ps_subset, "SampleType")
  ps_subset_type_ = transform_sample_counts(ps_subset_type, function(x) x / sum(x))
  plot_bar(ps_subset_type_, fill="OTU") + geom_bar(aes(), stat="identity", position="stack") +
  scale_fill_manual(values = c("cornflowerblue","darkgreen","seagreen3","tan","red",  "navyblue", "gold",     "green","orange","pink","purple","magenta","brown", "darksalmon","chocolate4","darkkhaki", "lightcyan3", "maroon","lightgreen",     "blue","cyan", "skyblue2", "azure3","blueviolet","darkgoldenrod",  "tomato","mediumpurple4","indianred",   "orange", "LimeGreen","DeepSkyBlue","Yellow","HotPink", "darkgrey")) +
  labs(fill = "") +
  theme(axis.text.x = element_text(size = 10, angle = 15, hjust = 1),
        axis.text.y = element_text(size = 9, colour="black"),
        legend.position="bottom",
        legend.text = element_text(size = 8)) +
  guides(fill=guide_legend(nrow=4)) +
  scale_x_discrete(limits = c("admission", "surgery", "discharge"))
```


# Binary Prevalence Analysis of Target *S. epidermidis* STs (g216 & yycH)

To evaluate the temporal dynamics of specific healthcare-associated *Staphylococcus epidermidis* clones (ST2, ST5, and ST23), we analyzed their colonization status as binary presence/absence data across three clinical timepoints (admission, surgery, discharge). This approach isolates the dynamics of strain acquisition and clearance from continuous abundance fluctuations. Overall temporal changes were assessed using Cochran’s Q test, followed by robust pairwise McNemar tests with Benjamini-Hochberg (BH) correction to identify statistically significant transitions between specific timepoints.


```{r, echo=TRUE, warning=FALSE}

# Data Preparation: Extract target STs, convert to binary (0/1) matrix, and save to Excel
# Assuming 'df' is the ST abundance data frame from your previous code (rownames = STs, colnames = samples like "A1.1")
target_sts <- c("ST2", "ST5", "ST23")

# Check if these STs exist in your data to prevent errors
available_sts <- intersect(target_sts, rownames(df))

if (length(available_sts) == 0) {
  stop("Error: Target STs not found in the data. Please check the row names.")
} else if (length(available_sts) < length(target_sts)) {
  message(sprintf("Note: The following STs were not found and will be skipped: %s",
                  paste(setdiff(target_sts, available_sts), collapse = ", ")))
}

# Convert abundance to binary: >0 becomes 1 (Present), ==0 becomes 0 (Absent)
df_binary <- df[available_sts, ] > 0
df_binary <- df_binary * 1  # Convert TRUE/FALSE to 1/0

# >>> Export the binary matrix to an Excel file <<<
# 1. Convert the matrix to a data.frame, then move rownames to a column
df_binary_export <- as.data.frame(df_binary) %>%
  tibble::rownames_to_column(var = "ST")
knitr::kable(df_binary_export) %>%
  kable_styling(bootstrap_options = c("striped", "hover", "condensed", "responsive"))

# 2. Save to Excel
if (!dir.exists("./figures")) dir.create("./figures")
output_excel_path <- "./figures/ST_binary_matrix.xlsx"
openxlsx::write.xlsx(df_binary_export, file = output_excel_path, rowNames = FALSE)
message(sprintf("✅ Successfully saved binary ST matrix to: %s", output_excel_path))


# Data Reshaping: Extract Patient ID and Timepoint to build long-format data
# Fixed: Convert the matrix to a data.frame first before using tibble functions
df_long <- as.data.frame(df_binary) %>%
  tibble::rownames_to_column(var = "ST") %>%
  tidyr::pivot_longer(cols = -ST, names_to = "Sample", values_to = "Presence") %>%
  mutate(
    # Extract Patient ID (e.g., "A1") from sample name (e.g., "A1.1")
    PatientID = sub("\\.[1-3]$", "", Sample),
    # Define timepoints based on the sample suffix
    Timepoint = case_when(
      grepl("\\.1$", Sample) ~ "admission",
      grepl("\\.2$", Sample) ~ "surgery",
      grepl("\\.3$", Sample) ~ "discharge"
    )
  ) %>%
  # Set Timepoint as an ordered factor for correct plotting/statistical order
  mutate(Timepoint = factor(Timepoint, levels = c("admission", "surgery", "discharge")))


# Statistical Analysis: Cochran's Q test + Robust Paired McNemar test
results_list <- list()

# Helper function for robust pairwise McNemar test (avoids silent rstatix failures)
robust_pairwise_mcnemar <- function(st_data, st_name) {
  # Pivot to wide format to easily compare timepoints per patient
  data_wide <- st_data %>%
    dplyr::select(PatientID, Timepoint, Presence) %>%
    tidyr::pivot_wider(names_from = Timepoint, values_from = Presence, values_fill = 0)

  # Check if all timepoints exist for this ST
  if (!all(c("admission", "surgery", "discharge") %in% colnames(data_wide))) {
    message(sprintf("⚠️ Warning: Missing timepoints for %s. Skipping pairwise tests.", st_name))
    return(data.frame(ST = st_name, `Timepoint 1` = NA, `Timepoint 2` = NA,
                      `Raw p-value` = NA, `Adjusted p-value (BH)` = NA, Significance = NA, check.names = FALSE))
  }

  # Helper to calculate McNemar p-value safely
  get_mcnemar_p <- function(x, y) {
    # Create a strict 2x2 contingency table
    tbl <- table(factor(x, levels = c(0, 1)), factor(y, levels = c(0, 1)))

    # If there are no discordant pairs (off-diagonals are 0), there is no change. Return p = 1.0
    if (tbl[1, 2] == 0 && tbl[2, 1] == 0) {
      return(1.0)
    }

    # Try standard McNemar test with continuity correction
    p_val <- tryCatch({
      mcnemar.test(tbl, correct = TRUE)$p.value
    }, error = function(e) {
      # Fallback: if counts are too low for Chi-square, return NA
      NA
    })
    return(p_val)
  }

  # Calculate p-values for the 3 pairwise comparisons
  p1 <- get_mcnemar_p(data_wide$admission, data_wide$surgery)
  p2 <- get_mcnemar_p(data_wide$surgery, data_wide$discharge)
  p3 <- get_mcnemar_p(data_wide$admission, data_wide$discharge)

  p_vals <- c(p1, p2, p3)

  # Adjust for multiple testing (Benjamini-Hochberg FDR)
  p_adj <- p.adjust(p_vals, method = "BH")

  # Determine significance stars
  get_sig <- function(p) {
    if (is.na(p)) return("NA")
    if (p < 0.001) return("***")
    if (p < 0.01) return("**")
    if (p < 0.05) return("*")
    return("ns")
  }

  data.frame(
    ST = st_name,
    `Timepoint 1` = c("admission", "surgery", "admission"),
    `Timepoint 2` = c("surgery", "discharge", "discharge"),
    `Raw p-value` = p_vals,
    `Adjusted p-value (BH)` = p_adj,
    Significance = sapply(p_adj, get_sig),
    check.names = FALSE,
    stringsAsFactors = FALSE
  )
}

# Run tests for each available ST
for (st in available_sts) {
  st_data <- df_long %>% filter(ST == st)

  # 3.1 Overall test: Cochran's Q test
  cochran_res <- tryCatch({
    RVAideMemoire::cochran.qtest(Presence ~ Timepoint | PatientID, data = st_data)
  }, error = function(e) {
    message(sprintf("⚠️ Cochran's Q failed for %s: %s", st, e$message))
    return(data.frame(p.value = NA, note = "Incomplete data or zero variance"))
  })

  # 3.2 Pairwise comparisons using the robust function
  pairwise_res <- robust_pairwise_mcnemar(st_data, st)

  results_list[[st]] <- list(
    Cochran_Q = cochran_res,
    Pairwise_McNemar = pairwise_res
  )
}

# Combine pairwise comparison results for all STs
final_pairwise_results <- bind_rows(lapply(results_list, function(x) x$Pairwise_McNemar))
```

Pairwise McNemar Tests with Benjamini-Hochberg (BH) Correction

```{r, echo=FALSE, warning=FALSE, message=FALSE}
knitr::kable(final_pairwise_results, digits = 4,
             col.names = c("ST", "Timepoint 1", "Timepoint 2", "Raw p-value", "Adjusted p-value (BH)", "Significance")) %>%
  kableExtra::kable_styling(bootstrap_options = c("striped", "hover", "condensed", "responsive"))
```

Overall Temporal Changes (Cochran's Q Test)

```{r, echo=FALSE, warning=FALSE}
cochran_results_df <- do.call(rbind, lapply(available_sts, function(st) {
  res <- results_list[[st]]$Cochran_Q

  if (!is.na(res$p.value)) {
    data.frame(
      ST = st,
      `Q statistic` = round(res$statistic, 3),
      df = res$parameter,
      `p-value` = round(res$p.value, 4),
      Note = "",
      check.names = FALSE,
      stringsAsFactors = FALSE
    )
  } else {
    data.frame(
      ST = st,
      `Q statistic` = NA,
      df = NA,
      `p-value` = NA,
      Note = "Insufficient variation or missing data",
      check.names = FALSE,
      stringsAsFactors = FALSE
    )
  }
}))

# Render the Cochran's Q results as a formatted table
knitr::kable(cochran_results_df, digits = 4,
             col.names = c("ST", "Q statistic", "df", "p-value", "Note")) %>%
  kableExtra::kable_styling(bootstrap_options = c("striped", "hover", "condensed", "responsive"))
```

Data Visualization: Plot prevalence (%) trends across timepoints

```{r, echo=TRUE, warning=FALSE}
prevalence_data <- df_long %>%
  group_by(ST, Timepoint) %>%
  summarise(
    Prevalence = mean(Presence) * 100, # Calculate prevalence percentage
    n_present = sum(Presence),
    n_total = n(),
    .groups = 'drop'
  )

# Create line chart + scatter points
p_prevalence <- ggplot(prevalence_data, aes(x = Timepoint, y = Prevalence, group = ST, color = ST)) +
  geom_line(linewidth = 1, alpha = 0.7) +
  geom_point(size = 4) +
  geom_text(aes(label = sprintf("%.1f%%\n(n=%d/%d)", Prevalence, n_present, n_total)),
            vjust = -1.2, size = 3.5) +
  scale_y_continuous(limits = c(0, 105), breaks = seq(0, 100, 20)) +
  labs(
    title = "Prevalence of Healthcare-Associated STs Across Clinical Timepoints",
    subtitle = "Based on Presence/Absence (Binary) Data",
    x = "Clinical Timepoint",
    y = "Prevalence (%)",
    color = "Sequence Type"
  ) +
  theme_minimal(base_size = 12) +
  theme(
    axis.text.x = element_text(angle = 30, hjust = 1),
    legend.position = "bottom"
  )

print(p_prevalence)

# Save the plot
ggsave("./figures/ST_prevalence_timepoints.png", plot = p_prevalence, width = 8, height = 6, dpi = 300)
```


# Alpha diversity for ST (g216 & yycH)

Plot Shannon index and observed STs.

## Alpha diversity between aneurysm and hypophysis

```{r, echo=TRUE, warning=FALSE}
hmp.div_st <- read.csv("alpha_diversity_metrics_samples_ST.csv", sep=",")
colnames(hmp.div_st) <- c("sample", "chao1", "observed_sts", "shannon")
row.names(hmp.div_st) <- hmp.div_st$sample
div.df <- merge(hmp.div_st, hmp.meta, by.x="sample", by.y = "sam_name")
div.df2 <- div.df[, c("PatientType", "shannon", "observed_sts")]
colnames(div.df2) <- c("PatientType", "Shannon", "ST")
#colnames(div.df2)
options(max.print=999999)

stat.test.Shannon <- compare_means(
 Shannon ~ PatientType, data = div.df2,
 method = "t.test"
)
knitr::kable(stat.test.Shannon) %>% kable_styling(bootstrap_options = c("striped", "hover", "condensed", "responsive"))

stat.test.ST <- compare_means(
 ST ~ PatientType, data = div.df2,
 method = "t.test"
)
knitr::kable(stat.test.ST) %>% kable_styling(bootstrap_options = c("striped", "hover", "condensed", "responsive"))

div_df_melt <- reshape2::melt(div.df2)

p <- ggboxplot(div_df_melt, x = "PatientType", y = "value",
              facet.by = "variable",
              scales = "free",
              width = 0.5,
              fill = "gray", legend= "right")
#ggpar(p, xlab = FALSE, ylab = FALSE)
lev <- levels(factor(div_df_melt$PatientType)) # get the variables
L.pairs <- combn(seq_along(lev), 2, simplify = FALSE, FUN = function(i) lev[i]) #%>% filter(p.signif != "ns")
my_stat_compare_means  <- function (mapping = NULL, data = NULL, method = NULL, paired = FALSE,
    method.args = list(), ref.group = NULL, comparisons = NULL,
    hide.ns = FALSE, label.sep = ", ", label = NULL, label.x.npc = "left",
    label.y.npc = "top", label.x = NULL, label.y = NULL, tip.length = 0.03,
    symnum.args = list(), geom = "text", position = "identity",
    na.rm = FALSE, show.legend = NA, inherit.aes = TRUE, ...)
{
    if (!is.null(comparisons)) {
        method.info <- ggpubr:::.method_info(method)
        method <- method.info$method
        method.args <- ggpubr:::.add_item(method.args, paired = paired)
        if (method == "wilcox.test")
            method.args$exact <- FALSE
        pms <- list(...)
        size <- ifelse(is.null(pms$size), 0.3, pms$size)
        color <- ifelse(is.null(pms$color), "black", pms$color)
        map_signif_level <- FALSE
        if (is.null(label))
            label <- "p.format"
        if (ggpubr:::.is_p.signif_in_mapping(mapping) | (label %in% "p.signif")) {
            if (ggpubr:::.is_empty(symnum.args)) {
                map_signif_level <- c(`****` = 1e-04, `***` = 0.001,
                  `**` = 0.01, `*` = 0.05, ns = 1)
            } else {
               map_signif_level <- symnum.args
            }
            if (hide.ns)
                names(map_signif_level)[5] <- " "
        }
        step_increase <- ifelse(is.null(label.y), 0.12, 0)
        ggsignif::geom_signif(comparisons = comparisons, y_position = label.y,
            test = method, test.args = method.args, step_increase = step_increase,
            size = size, color = color, map_signif_level = map_signif_level,
            tip_length = tip.length, data = data)
    } else {
        mapping <- ggpubr:::.update_mapping(mapping, label)
        layer(stat = StatCompareMeans, data = data, mapping = mapping,
            geom = geom, position = position, show.legend = show.legend,
            inherit.aes = inherit.aes, params = list(label.x.npc = label.x.npc,
                label.y.npc = label.y.npc, label.x = label.x,
                label.y = label.y, label.sep = label.sep, method = method,
                method.args = method.args, paired = paired, ref.group = ref.group,
                symnum.args = symnum.args, hide.ns = hide.ns,
                na.rm = na.rm, ...))
    }
}

p3 <- p +
  stat_compare_means(
    method="t.test",
    comparisons = list(c("aneurysm", "hypophysis")),
    label = "p.signif",
    symnum.args <- list(cutpoints = c(0, 0.0001, 0.001, 0.01, 0.05, 1), symbols = c("****", "***", "**", "*", "ns")),
  ) +
  theme(axis.text.x = element_text(angle = 30, hjust = 1))  # add this line to rotate x-axis text
#print(p3)

ggsave("./figures/alpha_diversity_ST_PatientType.png", device="png", height = 5, width = 9)
ggsave("./figures/alpha_diversity_ST_PatientType.svg", device="svg", height = 4, width = 8)
knitr::include_graphics("figures/alpha_diversity_ST_PatientType.png")
```

## Alpha diversity among admission, surgery, and discharge

```{r, echo=TRUE, warning=FALSE}
div.df2 <- div.df[, c("SampleType", "shannon", "observed_sts")]
colnames(div.df2) <- c("SampleType", "Shannon", "ST")
#colnames(div.df2)
options(max.print=999999)

stat.test.Shannon <- compare_means(
 Shannon ~ SampleType, data = div.df2,
 method = "t.test"
)
knitr::kable(stat.test.Shannon) %>% kable_styling(bootstrap_options = c("striped", "hover", "condensed", "responsive"))

stat.test.ST <- compare_means(
 ST ~ SampleType, data = div.df2,
 method = "t.test"
)
knitr::kable(stat.test.ST) %>% kable_styling(bootstrap_options = c("striped", "hover", "condensed", "responsive"))

div_df_melt <- reshape2::melt(div.df2)

p <- ggboxplot(div_df_melt, x = "SampleType", y = "value",
              facet.by = "variable",
              scales = "free",
              width = 0.5,
              fill = "gray", legend= "right")
#ggpar(p, xlab = FALSE, ylab = FALSE)
lev <- levels(factor(div_df_melt$SampleType)) # get the variables
L.pairs <- combn(seq_along(lev), 2, simplify = FALSE, FUN = function(i) lev[i]) #%>% filter(p.signif != "ns")
my_stat_compare_means  <- function (mapping = NULL, data = NULL, method = NULL, paired = FALSE,
    method.args = list(), ref.group = NULL, comparisons = NULL,
    hide.ns = FALSE, label.sep = ", ", label = NULL, label.x.npc = "left",
    label.y.npc = "top", label.x = NULL, label.y = NULL, tip.length = 0.03,
    symnum.args = list(), geom = "text", position = "identity",
    na.rm = FALSE, show.legend = NA, inherit.aes = TRUE, ...)
{
    if (!is.null(comparisons)) {
        method.info <- ggpubr:::.method_info(method)
        method <- method.info$method
        method.args <- ggpubr:::.add_item(method.args, paired = paired)
        if (method == "wilcox.test")
            method.args$exact <- FALSE
        pms <- list(...)
        size <- ifelse(is.null(pms$size), 0.3, pms$size)
        color <- ifelse(is.null(pms$color), "black", pms$color)
        map_signif_level <- FALSE
        if (is.null(label))
            label <- "p.format"
        if (ggpubr:::.is_p.signif_in_mapping(mapping) | (label %in% "p.signif")) {
            if (ggpubr:::.is_empty(symnum.args)) {
                map_signif_level <- c(`****` = 1e-04, `***` = 0.001,
                  `**` = 0.01, `*` = 0.05, ns = 1)
            } else {
               map_signif_level <- symnum.args
            }
            if (hide.ns)
                names(map_signif_level)[5] <- " "
        }
        step_increase <- ifelse(is.null(label.y), 0.12, 0)
        ggsignif::geom_signif(comparisons = comparisons, y_position = label.y,
            test = method, test.args = method.args, step_increase = step_increase,
            size = size, color = color, map_signif_level = map_signif_level,
            tip_length = tip.length, data = data)
    } else {
        mapping <- ggpubr:::.update_mapping(mapping, label)
        layer(stat = StatCompareMeans, data = data, mapping = mapping,
            geom = geom, position = position, show.legend = show.legend,
            inherit.aes = inherit.aes, params = list(label.x.npc = label.x.npc,
                label.y.npc = label.y.npc, label.x = label.x,
                label.y = label.y, label.sep = label.sep, method = method,
                method.args = method.args, paired = paired, ref.group = ref.group,
                symnum.args = symnum.args, hide.ns = hide.ns,
                na.rm = na.rm, ...))
    }
}

p3 <- p +
  stat_compare_means(
    method="t.test",
    comparisons = list(c("admission", "surgery"), c("surgery", "discharge"), c("admission", "discharge")),
    label = "p.signif",
    symnum.args <- list(cutpoints = c(0, 0.0001, 0.001, 0.01, 0.05, 1), symbols = c("****", "***", "**", "*", "ns")),
  ) +
  theme(axis.text.x = element_text(angle = 30, hjust = 1))  # add this line to rotate x-axis text
#print(p3)

ggsave("./figures/alpha_diversity_ST_SampleType.png", device="png", height = 5, width = 9)
ggsave("./figures/alpha_diversity_ST_SampleType.svg", device="svg", height = 4, width = 8)
knitr::include_graphics("figures/alpha_diversity_ST_SampleType.png")
```


## Alpha diversity among admission, surgery, and discharge in A* samples

```{r, echo=TRUE, warning=FALSE}
hmp.div_st <- read.csv("alpha_diversity_metrics_samples_ST_A.csv", sep=",")
colnames(hmp.div_st) <- c("sample", "chao1", "observed_sts", "shannon")
row.names(hmp.div_st) <- hmp.div_st$sample
div.df <- merge(hmp.div_st, hmp.meta, by.x="sample", by.y = "sam_name")
div.df2 <- div.df[, c("SampleType", "shannon", "observed_sts")]
colnames(div.df2) <- c("SampleType", "Shannon", "ST")
#colnames(div.df2)
options(max.print=999999)

stat.test.Shannon <- compare_means(
 Shannon ~ SampleType, data = div.df2,
 method = "t.test"
)
knitr::kable(stat.test.Shannon) %>% kable_styling(bootstrap_options = c("striped", "hover", "condensed", "responsive"))

stat.test.ST <- compare_means(
 ST ~ SampleType, data = div.df2,
 method = "t.test"
)
knitr::kable(stat.test.ST) %>% kable_styling(bootstrap_options = c("striped", "hover", "condensed", "responsive"))

div_df_melt <- reshape2::melt(div.df2)

p <- ggboxplot(div_df_melt, x = "SampleType", y = "value",
              facet.by = "variable",
              scales = "free",
              width = 0.5,
              fill = "gray", legend= "right")
#ggpar(p, xlab = FALSE, ylab = FALSE)
lev <- levels(factor(div_df_melt$SampleType)) # get the variables
L.pairs <- combn(seq_along(lev), 2, simplify = FALSE, FUN = function(i) lev[i]) #%>% filter(p.signif != "ns")
my_stat_compare_means  <- function (mapping = NULL, data = NULL, method = NULL, paired = FALSE,
    method.args = list(), ref.group = NULL, comparisons = NULL,
    hide.ns = FALSE, label.sep = ", ", label = NULL, label.x.npc = "left",
    label.y.npc = "top", label.x = NULL, label.y = NULL, tip.length = 0.03,
    symnum.args = list(), geom = "text", position = "identity",
    na.rm = FALSE, show.legend = NA, inherit.aes = TRUE, ...)
{
    if (!is.null(comparisons)) {
        method.info <- ggpubr:::.method_info(method)
        method <- method.info$method
        method.args <- ggpubr:::.add_item(method.args, paired = paired)
        if (method == "wilcox.test")
            method.args$exact <- FALSE
        pms <- list(...)
        size <- ifelse(is.null(pms$size), 0.3, pms$size)
        color <- ifelse(is.null(pms$color), "black", pms$color)
        map_signif_level <- FALSE
        if (is.null(label))
            label <- "p.format"
        if (ggpubr:::.is_p.signif_in_mapping(mapping) | (label %in% "p.signif")) {
            if (ggpubr:::.is_empty(symnum.args)) {
                map_signif_level <- c(`****` = 1e-04, `***` = 0.001,
                  `**` = 0.01, `*` = 0.05, ns = 1)
            } else {
               map_signif_level <- symnum.args
            }
            if (hide.ns)
                names(map_signif_level)[5] <- " "
        }
        step_increase <- ifelse(is.null(label.y), 0.12, 0)
        ggsignif::geom_signif(comparisons = comparisons, y_position = label.y,
            test = method, test.args = method.args, step_increase = step_increase,
            size = size, color = color, map_signif_level = map_signif_level,
            tip_length = tip.length, data = data)
    } else {
        mapping <- ggpubr:::.update_mapping(mapping, label)
        layer(stat = StatCompareMeans, data = data, mapping = mapping,
            geom = geom, position = position, show.legend = show.legend,
            inherit.aes = inherit.aes, params = list(label.x.npc = label.x.npc,
                label.y.npc = label.y.npc, label.x = label.x,
                label.y = label.y, label.sep = label.sep, method = method,
                method.args = method.args, paired = paired, ref.group = ref.group,
                symnum.args = symnum.args, hide.ns = hide.ns,
                na.rm = na.rm, ...))
    }
}

p3 <- p +
  stat_compare_means(
    method="t.test",
    comparisons = list(c("admission", "surgery"), c("surgery", "discharge"), c("admission", "discharge")),
    label = "p.signif",
    symnum.args <- list(cutpoints = c(0, 0.0001, 0.001, 0.01, 0.05, 1), symbols = c("****", "***", "**", "*", "ns")),
  ) +
  theme(axis.text.x = element_text(angle = 30, hjust = 1))  # add this line to rotate x-axis text
#print(p3)

ggsave("./figures/alpha_diversity_ST_SampleType_A.png", device="png", height = 5, width = 9)
ggsave("./figures/alpha_diversity_ST_SampleType_A.svg", device="svg", height = 4, width = 8)
knitr::include_graphics("figures/alpha_diversity_ST_SampleType_A.png")
```


## Alpha diversity among admission, surgery, and discharge in H* samples

```{r, echo=TRUE, warning=FALSE}
hmp.div_st <- read.csv("alpha_diversity_metrics_samples_ST_H.csv", sep=",")
colnames(hmp.div_st) <- c("sample", "chao1", "observed_sts", "shannon")
row.names(hmp.div_st) <- hmp.div_st$sample
div.df <- merge(hmp.div_st, hmp.meta, by.x="sample", by.y = "sam_name")
div.df2 <- div.df[, c("SampleType", "shannon", "observed_sts")]
colnames(div.df2) <- c("SampleType", "Shannon", "ST")
#colnames(div.df2)
options(max.print=999999)

stat.test.Shannon <- compare_means(
 Shannon ~ SampleType, data = div.df2,
 method = "t.test"
)
knitr::kable(stat.test.Shannon) %>% kable_styling(bootstrap_options = c("striped", "hover", "condensed", "responsive"))

stat.test.ST <- compare_means(
 ST ~ SampleType, data = div.df2,
 method = "t.test"
)
knitr::kable(stat.test.ST) %>% kable_styling(bootstrap_options = c("striped", "hover", "condensed", "responsive"))

div_df_melt <- reshape2::melt(div.df2)

p <- ggboxplot(div_df_melt, x = "SampleType", y = "value",
              facet.by = "variable",
              scales = "free",
              width = 0.5,
              fill = "gray", legend= "right")
#ggpar(p, xlab = FALSE, ylab = FALSE)
lev <- levels(factor(div_df_melt$SampleType)) # get the variables
L.pairs <- combn(seq_along(lev), 2, simplify = FALSE, FUN = function(i) lev[i]) #%>% filter(p.signif != "ns")
my_stat_compare_means  <- function (mapping = NULL, data = NULL, method = NULL, paired = FALSE,
    method.args = list(), ref.group = NULL, comparisons = NULL,
    hide.ns = FALSE, label.sep = ", ", label = NULL, label.x.npc = "left",
    label.y.npc = "top", label.x = NULL, label.y = NULL, tip.length = 0.03,
    symnum.args = list(), geom = "text", position = "identity",
    na.rm = FALSE, show.legend = NA, inherit.aes = TRUE, ...)
{
    if (!is.null(comparisons)) {
        method.info <- ggpubr:::.method_info(method)
        method <- method.info$method
        method.args <- ggpubr:::.add_item(method.args, paired = paired)
        if (method == "wilcox.test")
            method.args$exact <- FALSE
        pms <- list(...)
        size <- ifelse(is.null(pms$size), 0.3, pms$size)
        color <- ifelse(is.null(pms$color), "black", pms$color)
        map_signif_level <- FALSE
        if (is.null(label))
            label <- "p.format"
        if (ggpubr:::.is_p.signif_in_mapping(mapping) | (label %in% "p.signif")) {
            if (ggpubr:::.is_empty(symnum.args)) {
                map_signif_level <- c(`****` = 1e-04, `***` = 0.001,
                  `**` = 0.01, `*` = 0.05, ns = 1)
            } else {
               map_signif_level <- symnum.args
            }
            if (hide.ns)
                names(map_signif_level)[5] <- " "
        }
        step_increase <- ifelse(is.null(label.y), 0.12, 0)
        ggsignif::geom_signif(comparisons = comparisons, y_position = label.y,
            test = method, test.args = method.args, step_increase = step_increase,
            size = size, color = color, map_signif_level = map_signif_level,
            tip_length = tip.length, data = data)
    } else {
        mapping <- ggpubr:::.update_mapping(mapping, label)
        layer(stat = StatCompareMeans, data = data, mapping = mapping,
            geom = geom, position = position, show.legend = show.legend,
            inherit.aes = inherit.aes, params = list(label.x.npc = label.x.npc,
                label.y.npc = label.y.npc, label.x = label.x,
                label.y = label.y, label.sep = label.sep, method = method,
                method.args = method.args, paired = paired, ref.group = ref.group,
                symnum.args = symnum.args, hide.ns = hide.ns,
                na.rm = na.rm, ...))
    }
}

p3 <- p +
  stat_compare_means(
    method="t.test",
    comparisons = list(c("admission", "surgery"), c("surgery", "discharge"), c("admission", "discharge")),
    label = "p.signif",
    symnum.args <- list(cutpoints = c(0, 0.0001, 0.001, 0.01, 0.05, 1), symbols = c("****", "***", "**", "*", "ns")),
  ) +
  theme(axis.text.x = element_text(angle = 30, hjust = 1))  # add this line to rotate x-axis text
#print(p3)

ggsave("./figures/alpha_diversity_ST_SampleType_H.png", device="png", height = 5, width = 9)
ggsave("./figures/alpha_diversity_ST_SampleType_H.svg", device="svg", height = 4, width = 8)
knitr::include_graphics("figures/alpha_diversity_ST_SampleType_H.png")
```


# Statistical tests for the abundance of a specific subtype between admission and discharge (g216 & yycH)

```{r, echo=FALSE, warning=FALSE}
otu_data <- otu_table(ps_rel)

# Convert OTU table to a data frame for easier manipulation
# This conversion keeps taxa as rows and samples as columns
df <- as.data.frame(otu_data)

# Assuming ST8 is one of your taxa, extract its data
ST8_data <- df["ST8", ]

# Define sample groups based on naming convention (admission (.1) and discharge (.3))
admission_samples <- names(ST8_data)[grepl("\\.1$", names(ST8_data))]
discharge_samples <- names(ST8_data)[grepl("\\.3$", names(ST8_data))]

# Extract relative abundances for ST8 at admission and discharge
before_treatment <- ST8_data[admission_samples]
after_treatment <- ST8_data[discharge_samples]

# Ensure the treatment data is numeric
before_treatment <- as.numeric(before_treatment)
after_treatment <- as.numeric(after_treatment)

#> colnames(ST8_admission)
# [1] "A1.1"  "A2.1"  "A3.1"  "A4.1"  "A5.1"  "A10.1" "A12.1" "A17.1" "A20.1"
#[10] "A21.1" "A22.1" "A24.1" "A25.1" "A27.1" "A28.1" "H11.1" "H13.1" "H14.1"
#[19] "H16.1" "H19.1" "H24.1" "H53.1" "H56.1" "H57.1" "H59.1" "H60.1" "H64.1"
#[28] "H65.1" "H67.1" "H72.1" "H73.1" "H76.1" "H78.1" "H91.1" "H94.1" "H99.1"
#> colnames(ST8_discharge)
# [1] "A1.3"  "A2.3"  "A3.3"  "A4.3"  "A5.3"  "A10.3" "A12.3" "A17.3" "A20.3"
#[10] "A21.3" "A22.3" "A24.3" "A25.3" "A27.3" "A28.3" "H11.3" "H13.3" "H14.3"
#[19] "H16.3" "H19.3" "H24.3" "H53.3" "H56.3" "H57.3" "H59.3" "H60.3" "H64.3"
#[28] "H65.3" "H67.3" "H72.3" "H73.3" "H76.3" "H78.3" "H91.3" "H94.3" "H99.3"

# # Performing the Wilcoxon signed-rank test
# result <- wilcoxsign_test(before_treatment ~ after_treatment, distribution = "exact")
# # Printing the result
# print(result)
# #	Exact Wilcoxon-Pratt Signed-Rank Test
# #data:  y by x (pos, neg)
# #	 stratified by block
# #Z = 1.6933, p-value = 0.09375
# #alternative hypothesis: true mu is not equal to 0
#
# # Alternatively, using the stats package
# result_alt <- wilcox.test(before_treatment, after_treatment, paired = TRUE)
# # Printing the alternative result
# print(result_alt)

#--1-- Accepting the Approximate p-value (the default behavior in R):
#If you are okay with the normal approximation that R uses when ties are present, you can simply run:

result1 <- wilcox.test(before_treatment, after_treatment, paired = TRUE)
#Despite the warning, this will still give you an approximate p-value.
print(result1)
#	Wilcoxon signed rank exact test
#data:  before_treatment and after_treatment
#V = 358, p-value = 0.7037
#alternative hypothesis: true location shift is not equal to 0

#--2-- Using the Coin Package for Exact Calculations:
#Install and use the coin package which can handle ties more gracefully:

# Install if not already installed: install.packages("coin")
library(coin)

# Using the wilcoxsign_test function from the coin package
result2 <- wilcoxsign_test(before_treatment ~ after_treatment, distribution = "exact")
print(result2)
#This method calculates an exact p-value even in the presence of ties.

#	Exact Wilcoxon-Pratt Signed-Rank Test
#
#data:  y by x (pos, neg)
#	 stratified by block
#Z = 0.39276, p-value = 0.7037
#alternative hypothesis: true mu is not equal to 0

#--3-- Adding a Small Random Noise to Data (Not recommended for final analysis):
#If you want to break the ties by adding a small amount of noise, you can do so, but be aware that this alters your data:

set.seed(123)  # Setting a seed for reproducibility
before_treatment_jittered <- before_treatment + runif(length(before_treatment), -0.01, 0.01)
after_treatment_jittered <- after_treatment + runif(length(after_treatment), -0.01, 0.01)
wilcox.test(before_treatment_jittered, after_treatment_jittered, paired = TRUE)
#This method should be used with caution as it modifies the original data.
#	Wilcoxon signed rank exact test
#data:  before_treatment_jittered and after_treatment_jittered
#V = 377, p-value = 0.4989
#alternative hypothesis: true location shift is not equal to 0

#--4-- Using another statistical test, such as the Sign Test:
#If you want to avoid the issue of ties altogether, you can use a different test, like the sign test, which is less sensitive to ties:
# Assuming 'BSDA' package is installed; if not, install via install.packages("BSDA")
#install.packages("devtools", dependencies = TRUE)
#devtools::has_devel()
#devtools::install_github('alanarnholt/BSDA')
library(BSDA)
SIGN.test(before_treatment, after_treatment, paired = TRUE)
#The sign test is a simpler non-parametric test that might be less powerful than the Wilcoxon test but does not require dealing with ties.

#	Dependent-samples Sign-Test
#data:  before_treatment and after_treatment
#S = 20, p-value = 0.6177
#alternative hypothesis: true median difference is not equal to 0
#95 percent confidence interval:
# -0.003397394  0.011402066
#sample estimates:
#median of x-y
#  0.002972116
#Achieved and Interpolated Confidence Intervals:
#                  Conf.Level  L.E.pt U.E.pt
#Lower Achieved CI     0.9348 -0.0025 0.0102
#Interpolated CI       0.9500 -0.0034 0.0114
#Upper Achieved CI     0.9712 -0.0047 0.0130
```

## ST8

```{r, echo=FALSE, warning=FALSE}
# Extract data
ST8_data <- df["ST8", ]

# Define sample groups based on naming convention (admission (.1) and discharge (.3))
admission_samples <- names(ST8_data)[grepl("\\.1$", names(ST8_data))]
discharge_samples <- names(ST8_data)[grepl("\\.3$", names(ST8_data))]

# Extract relative abundances for ST8 at admission and discharge
before_treatment <- ST8_data[admission_samples]
after_treatment <- ST8_data[discharge_samples]

# Ensure the treatment data is numeric
before_treatment <- as.numeric(before_treatment)
after_treatment <- as.numeric(after_treatment)

result1 <- wilcox.test(before_treatment, after_treatment, paired = TRUE)
#print(result1)

# Creating a new tibble for the Wilcoxon test result
  #p.adj = result1$p.value,  # Adjusted p-value, same as raw p-value in this context
  #p.format = sprintf("%.3f", result1$p.value),  # Formatted p-value
result1_formatted <- tibble(
  .y. = "ST8 Relative Abundance",  # Change this to the appropriate response variable name
  group1 = "admission",
  group2 = "discharge",
  p = result1$p.value,
  p.signif = symnum(result1$p.value, corr = FALSE, na = FALSE, cutpoints = c(0, 0.001, 0.01, 0.05, 0.1, 1), symbols = c("***", "**", "*", ".", "ns")),
  method = "Wilcoxon signed-rank test"
)

# Print the formatted result
#print(result1_formatted)
knitr::kable(result1_formatted) %>% kable_styling(bootstrap_options = c("striped", "hover", "condensed", "responsive"))
```

## ST59

```{r, echo=FALSE, warning=FALSE}
# Extract data
ST59_data <- df["ST59", ]

# Define sample groups based on naming convention (admission (.1) and discharge (.3))
admission_samples <- names(ST59_data)[grepl("\\.1$", names(ST59_data))]
discharge_samples <- names(ST59_data)[grepl("\\.3$", names(ST59_data))]

# Extract relative abundances for ST59 at admission and discharge
before_treatment <- ST59_data[admission_samples]
after_treatment <- ST59_data[discharge_samples]

# Ensure the treatment data is numeric
before_treatment <- as.numeric(before_treatment)
after_treatment <- as.numeric(after_treatment)

result1 <- wilcox.test(before_treatment, after_treatment, paired = TRUE)

# Creating a new tibble for the Wilcoxon test result
result1_formatted <- tibble(
  .y. = "ST59 Relative Abundance",  # Change this to the appropriate response variable name
  group1 = "admission",
  group2 = "discharge",
  p = result1$p.value,
  p.signif = symnum(result1$p.value, corr = FALSE, na = FALSE, cutpoints = c(0, 0.001, 0.01, 0.05, 0.1, 1), symbols = c("***", "**", "*", ".", "ns")),
  method = "Wilcoxon signed-rank test"
)

# Print the formatted result
knitr::kable(result1_formatted) %>% kable_styling(bootstrap_options = c("striped", "hover", "condensed", "responsive"))
```

## ST60

```{r, echo=FALSE, warning=FALSE}
# Extract data
ST60_data <- df["ST60", ]

# Define sample groups based on naming convention (admission (.1) and discharge (.3))
admission_samples <- names(ST60_data)[grepl("\\.1$", names(ST60_data))]
discharge_samples <- names(ST60_data)[grepl("\\.3$", names(ST60_data))]

# Extract relative abundances for ST60 at admission and discharge
before_treatment <- ST60_data[admission_samples]
after_treatment <- ST60_data[discharge_samples]

# Ensure the treatment data is numeric
before_treatment <- as.numeric(before_treatment)
after_treatment <- as.numeric(after_treatment)

result1 <- wilcox.test(before_treatment, after_treatment, paired = TRUE)

# Creating a new tibble for the Wilcoxon test result
result1_formatted <- tibble(
  .y. = "ST60 Relative Abundance",  # Change this to the appropriate response variable name
  group1 = "admission",
  group2 = "discharge",
  p = result1$p.value,
  p.signif = symnum(result1$p.value, corr = FALSE, na = FALSE, cutpoints = c(0, 0.001, 0.01, 0.05, 0.1, 1), symbols = c("***", "**", "*", ".", "ns")),
  method = "Wilcoxon signed-rank test"
)

# Print the formatted result
knitr::kable(result1_formatted) %>% kable_styling(bootstrap_options = c("striped", "hover", "condensed", "responsive"))
```

## ST73

```{r, echo=FALSE, warning=FALSE}
# Extract data
ST73_data <- df["ST73", ]

# Define sample groups based on naming convention (admission (.1) and discharge (.3))
admission_samples <- names(ST73_data)[grepl("\\.1$", names(ST73_data))]
discharge_samples <- names(ST73_data)[grepl("\\.3$", names(ST73_data))]

# Extract relative abundances for ST73 at admission and discharge
before_treatment <- ST73_data[admission_samples]
after_treatment <- ST73_data[discharge_samples]

# Ensure the treatment data is numeric
before_treatment <- as.numeric(before_treatment)
after_treatment <- as.numeric(after_treatment)

result1 <- wilcox.test(before_treatment, after_treatment, paired = TRUE)

# Creating a new tibble for the Wilcoxon test result
result1_formatted <- tibble(
  .y. = "ST73 Relative Abundance",  # Change this to the appropriate response variable name
  group1 = "admission",
  group2 = "discharge",
  p = result1$p.value,
  p.signif = symnum(result1$p.value, corr = FALSE, na = FALSE, cutpoints = c(0, 0.001, 0.01, 0.05, 0.1, 1), symbols = c("***", "**", "*", ".", "ns")),
  method = "Wilcoxon signed-rank test"
)

# Print the formatted result
knitr::kable(result1_formatted) %>% kable_styling(bootstrap_options = c("striped", "hover", "condensed", "responsive"))
```

## ST215

```{r, echo=FALSE, warning=FALSE}
# Extract data
ST215_data <- df["ST215", ]

# Define sample groups based on naming convention (admission (.1) and discharge (.3))
admission_samples <- names(ST215_data)[grepl("\\.1$", names(ST215_data))]
discharge_samples <- names(ST215_data)[grepl("\\.3$", names(ST215_data))]

# Extract relative abundances for ST215 at admission and discharge
before_treatment <- ST215_data[admission_samples]
after_treatment <- ST215_data[discharge_samples]

# Ensure the treatment data is numeric
before_treatment <- as.numeric(before_treatment)
after_treatment <- as.numeric(after_treatment)

result1 <- wilcox.test(before_treatment, after_treatment, paired = TRUE)

# Creating a new tibble for the Wilcoxon test result
result1_formatted <- tibble(
  .y. = "ST215 Relative Abundance",  # Change this to the appropriate response variable name
  group1 = "admission",
  group2 = "discharge",
  p = result1$p.value,
  p.signif = symnum(result1$p.value, corr = FALSE, na = FALSE, cutpoints = c(0, 0.001, 0.01, 0.05, 0.1, 1), symbols = c("***", "**", "*", ".", "ns")),
  method = "Wilcoxon signed-rank test"
)

# Print the formatted result
knitr::kable(result1_formatted) %>% kable_styling(bootstrap_options = c("striped", "hover", "condensed", "responsive"))
```

## ST5+ST87+ST130+ST210+ST384

```{r, echo=FALSE, warning=FALSE}
# Extract data for the specified subtypes
selected_taxa <- df[c("ST5", "ST87", "ST130", "ST210", "ST384"), ]

# Calculate the combined relative abundances for the selected taxa
combined_data <- colSums(selected_taxa, na.rm = TRUE)

# Define sample groups based on naming convention (admission (.1) and discharge (.3))
admission_samples <- names(combined_data)[grepl("\\.1$", names(combined_data))]
discharge_samples <- names(combined_data)[grepl("\\.3$", names(combined_data))]

# Extract combined relative abundances for the selected taxa at admission and discharge
before_treatment <- combined_data[admission_samples]
after_treatment <- combined_data[discharge_samples]

# Ensure the treatment data is numeric
before_treatment <- as.numeric(before_treatment)
after_treatment <- as.numeric(after_treatment)

# Perform the Wilcoxon signed-rank test
result1 <- wilcox.test(before_treatment, after_treatment, paired = TRUE)

# Creating a new tibble for the Wilcoxon test result
result1_formatted <- tibble(
  .y. = "Combined ST5+ST87+ST130+ST210+ST384 Relative Abundance",
  group1 = "admission",
  group2 = "discharge",
  p = result1$p.value,
  p.signif = symnum(result1$p.value, corr = FALSE, na = FALSE, cutpoints = c(0, 0.001, 0.01, 0.05, 0.1, 1), symbols = c("***", "**", "*", ".", "ns")),
  method = "Wilcoxon signed-rank test"
)

# Print the formatted result using knitr and kableExtra for a nice table format in R Markdown
knitr::kable(result1_formatted) %>%
  kableExtra::kable_styling(bootstrap_options = c("striped", "hover", "condensed", "responsive"))
```

## ST5

```{r, echo=FALSE, warning=FALSE}
# Extract data
ST5_data <- df["ST5", ]

# Define sample groups based on naming convention (admission (.1) and discharge (.3))
admission_samples <- names(ST5_data)[grepl("\\.1$", names(ST5_data))]
discharge_samples <- names(ST5_data)[grepl("\\.3$", names(ST5_data))]

# Extract relative abundances for ST5 at admission and discharge
before_treatment <- ST5_data[admission_samples]
after_treatment <- ST5_data[discharge_samples]

# Ensure the treatment data is numeric
before_treatment <- as.numeric(before_treatment)
after_treatment <- as.numeric(after_treatment)

result1 <- wilcox.test(before_treatment, after_treatment, paired = TRUE)

# Creating a new tibble for the Wilcoxon test result
result1_formatted <- tibble(
  .y. = "ST5 Relative Abundance",  # Change this to the appropriate response variable name
  group1 = "admission",
  group2 = "discharge",
  p = result1$p.value,
  p.signif = symnum(result1$p.value, corr = FALSE, na = FALSE, cutpoints = c(0, 0.001, 0.01, 0.05, 0.1, 1), symbols = c("***", "**", "*", ".", "ns")),
  method = "Wilcoxon signed-rank test"
)

# Print the formatted result
knitr::kable(result1_formatted) %>% kable_styling(bootstrap_options = c("striped", "hover", "condensed", "responsive"))
```

## ST130

```{r, echo=FALSE, warning=FALSE}
# Extract data
ST130_data <- df["ST130", ]

# Define sample groups based on naming convention (admission (.1) and discharge (.3))
admission_samples <- names(ST130_data)[grepl("\\.1$", names(ST130_data))]
discharge_samples <- names(ST130_data)[grepl("\\.3$", names(ST130_data))]

# Extract relative abundances for ST130 at admission and discharge
before_treatment <- ST130_data[admission_samples]
after_treatment <- ST130_data[discharge_samples]

# Ensure the treatment data is numeric
before_treatment <- as.numeric(before_treatment)
after_treatment <- as.numeric(after_treatment)

result1 <- wilcox.test(before_treatment, after_treatment, paired = TRUE)

# Creating a new tibble for the Wilcoxon test result
result1_formatted <- tibble(
  .y. = "ST130 Relative Abundance",  # Change this to the appropriate response variable name
  group1 = "admission",
  group2 = "discharge",
  p = result1$p.value,
  p.signif = symnum(result1$p.value, corr = FALSE, na = FALSE, cutpoints = c(0, 0.001, 0.01, 0.05, 0.1, 1), symbols = c("***", "**", "*", ".", "ns")),
  method = "Wilcoxon signed-rank test"
)

# Print the formatted result
knitr::kable(result1_formatted) %>% kable_styling(bootstrap_options = c("striped", "hover", "condensed", "responsive"))
```

## ST2+ST19+ST100

```{r, echo=TRUE, warning=FALSE}
# Extract data for the specified subtypes
selected_taxa <- df[c("ST2", "ST19", "ST100"), ]

# Calculate the combined relative abundances for the selected taxa
combined_data <- colSums(selected_taxa, na.rm = TRUE)

# Define sample groups based on naming convention (admission (.1) and discharge (.3))
admission_samples <- names(combined_data)[grepl("\\.1$", names(combined_data))]
discharge_samples <- names(combined_data)[grepl("\\.3$", names(combined_data))]

# Extract combined relative abundances for the selected taxa at admission and discharge
before_treatment <- combined_data[admission_samples]
after_treatment <- combined_data[discharge_samples]

# Ensure the treatment data is numeric
before_treatment <- as.numeric(before_treatment)
after_treatment <- as.numeric(after_treatment)
print(before_treatment)
print(mean(before_treatment))
print(after_treatment)
print(mean(after_treatment))

# Perform the Wilcoxon signed-rank test
result1 <- wilcox.test(before_treatment, after_treatment, paired = TRUE)

# Creating a new tibble for the Wilcoxon test result
result1_formatted <- tibble(
  .y. = "Combined ST2+ST19+ST100 Relative Abundance",
  group1 = "admission",
  group2 = "discharge",
  p = result1$p.value,
  p.signif = symnum(result1$p.value, corr = FALSE, na = FALSE, cutpoints = c(0, 0.001, 0.01, 0.05, 0.1, 1), symbols = c("***", "**", "*", ".", "ns")),
  method = "Wilcoxon signed-rank test"
)

# Print the formatted result using knitr and kableExtra for a nice table format in R Markdown
knitr::kable(result1_formatted) %>%
  kableExtra::kable_styling(bootstrap_options = c("striped", "hover", "condensed", "responsive"))
```

## ST2

```{r, echo=TRUE, warning=FALSE}
# Extract data
ST2_data <- df["ST2", ]

# Define sample groups based on naming convention (admission (.1) and discharge (.3))
admission_samples <- names(ST2_data)[grepl("\\.1$", names(ST2_data))]
discharge_samples <- names(ST2_data)[grepl("\\.3$", names(ST2_data))]

# Extract relative abundances for ST2 at admission and discharge
before_treatment <- ST2_data[admission_samples]
after_treatment <- ST2_data[discharge_samples]

# Ensure the treatment data is numeric
before_treatment <- as.numeric(before_treatment)
after_treatment <- as.numeric(after_treatment)
print(before_treatment)
print(mean(before_treatment))
print(after_treatment)
print(mean(after_treatment))

result1 <- wilcox.test(before_treatment, after_treatment, paired = TRUE)

# Creating a new tibble for the Wilcoxon test result
result1_formatted <- tibble(
  .y. = "ST2 Relative Abundance",  # Change this to the appropriate response variable name
  group1 = "admission",
  group2 = "discharge",
  p = result1$p.value,
  p.signif = symnum(result1$p.value, corr = FALSE, na = FALSE, cutpoints = c(0, 0.001, 0.01, 0.05, 0.1, 1), symbols = c("***", "**", "*", ".", "ns")),
  method = "Wilcoxon signed-rank test"
)

# Print the formatted result
knitr::kable(result1_formatted) %>% kable_styling(bootstrap_options = c("striped", "hover", "condensed", "responsive"))
```

## ST19

```{r, echo=TRUE, warning=FALSE}
# Extract data
ST19_data <- df["ST19", ]

# Define sample groups based on naming convention (admission (.1) and discharge (.3))
admission_samples <- names(ST19_data)[grepl("\\.1$", names(ST19_data))]
discharge_samples <- names(ST19_data)[grepl("\\.3$", names(ST19_data))]

# Extract relative abundances for ST19 at admission and discharge
before_treatment <- ST19_data[admission_samples]
after_treatment <- ST19_data[discharge_samples]

# Ensure the treatment data is numeric
before_treatment <- as.numeric(before_treatment)
after_treatment <- as.numeric(after_treatment)
print(before_treatment)
print(mean(before_treatment))
print(after_treatment)
print(mean(after_treatment))

result1 <- wilcox.test(before_treatment, after_treatment, paired = TRUE)

# Creating a new tibble for the Wilcoxon test result
result1_formatted <- tibble(
  .y. = "ST19 Relative Abundance",  # Change this to the appropriate response variable name
  group1 = "admission",
  group2 = "discharge",
  p = result1$p.value,
  p.signif = symnum(result1$p.value, corr = FALSE, na = FALSE, cutpoints = c(0, 0.001, 0.01, 0.05, 0.1, 1), symbols = c("***", "**", "*", ".", "ns")),
  method = "Wilcoxon signed-rank test"
)

# Print the formatted result
knitr::kable(result1_formatted) %>% kable_styling(bootstrap_options = c("striped", "hover", "condensed", "responsive"))
```

## ST100

```{r, echo=TRUE, warning=FALSE}
# Extract data
ST100_data <- df["ST100", ]

# Define sample groups based on naming convention (admission (.1) and discharge (.3))
admission_samples <- names(ST100_data)[grepl("\\.1$", names(ST100_data))]
discharge_samples <- names(ST100_data)[grepl("\\.3$", names(ST100_data))]

# Extract relative abundances for ST100 at admission and discharge
before_treatment <- ST100_data[admission_samples]
after_treatment <- ST100_data[discharge_samples]

# Ensure the treatment data is numeric
before_treatment <- as.numeric(before_treatment)
after_treatment <- as.numeric(after_treatment)
print(before_treatment)
print(mean(before_treatment))
print(after_treatment)
print(mean(after_treatment))

result1 <- wilcox.test(before_treatment, after_treatment, paired = TRUE)

# Creating a new tibble for the Wilcoxon test result
result1_formatted <- tibble(
  .y. = "ST100 Relative Abundance",  # Change this to the appropriate response variable name
  group1 = "admission",
  group2 = "discharge",
  p = result1$p.value,
  p.signif = symnum(result1$p.value, corr = FALSE, na = FALSE, cutpoints = c(0, 0.001, 0.01, 0.05, 0.1, 1), symbols = c("***", "**", "*", ".", "ns")),
  method = "Wilcoxon signed-rank test"
)

# Print the formatted result
knitr::kable(result1_formatted) %>% kable_styling(bootstrap_options = c("striped", "hover", "condensed", "responsive"))
```

## ST5+ST87+ST130+ST210+ST384+ST2+ST19+ST100

```{r, echo=FALSE, warning=FALSE}
# Extract data for the specified subtypes
selected_taxa <- df[c("ST5", "ST87", "ST130", "ST210", "ST384", "ST2", "ST19", "ST100"), ]

# Calculate the combined relative abundances for the selected taxa
combined_data <- colSums(selected_taxa, na.rm = TRUE)

# Define sample groups based on naming convention (admission (.1) and discharge (.3))
admission_samples <- names(combined_data)[grepl("\\.1$", names(combined_data))]
discharge_samples <- names(combined_data)[grepl("\\.3$", names(combined_data))]

# Extract combined relative abundances for the selected taxa at admission and discharge
before_treatment <- combined_data[admission_samples]
after_treatment <- combined_data[discharge_samples]

# Ensure the treatment data is numeric
before_treatment <- as.numeric(before_treatment)
after_treatment <- as.numeric(after_treatment)

# Perform the Wilcoxon signed-rank test
result1 <- wilcox.test(before_treatment, after_treatment, paired = TRUE)

# Creating a new tibble for the Wilcoxon test result
result1_formatted <- tibble(
  .y. = "Combined ST5+ST87+ST130+ST210+ST384+ST2+ST19+ST100 Relative Abundance",
  group1 = "admission",
  group2 = "discharge",
  p.format = sprintf("%.3f", result1$p.value),
  p.signif = symnum(result1$p.value, corr = FALSE, na = FALSE, cutpoints = c(0, 0.001, 0.01, 0.05, 0.1, 1), symbols = c("***", "**", "*", ".", "ns")),
  method = "Wilcoxon signed-rank test"
)

# Print the formatted result using knitr and kableExtra for a nice table format in R Markdown
knitr::kable(result1_formatted) %>%
  kableExtra::kable_styling(bootstrap_options = c("striped", "hover", "condensed", "responsive"))
```


# Statistical tests for the abundance of a specific subtype between admission and discharge in A* samples (g216 & yycH)

```{r, echo=FALSE, warning=FALSE}
otu_data <- otu_table(ps_rel)
df_ <- as.data.frame(otu_data)
df <- df_[, grep("^A", names(df_))]
```

## ST8

```{r, echo=FALSE, warning=FALSE}
# Extract data
ST8_data <- df["ST8", ]

# Define sample groups based on naming convention (admission (.1) and discharge (.3))
admission_samples <- names(ST8_data)[grepl("\\.1$", names(ST8_data))]
discharge_samples <- names(ST8_data)[grepl("\\.3$", names(ST8_data))]

# Extract relative abundances for ST8 at admission and discharge
before_treatment <- ST8_data[admission_samples]
after_treatment <- ST8_data[discharge_samples]

# Ensure the treatment data is numeric
before_treatment <- as.numeric(before_treatment)
after_treatment <- as.numeric(after_treatment)

result1 <- wilcox.test(before_treatment, after_treatment, paired = TRUE)
#print(result1)

# Creating a new tibble for the Wilcoxon test result
  #p.adj = result1$p.value,  # Adjusted p-value, same as raw p-value in this context
  #p.format = sprintf("%.3f", result1$p.value),  # Formatted p-value
result1_formatted <- tibble(
  .y. = "ST8 Relative Abundance",  # Change this to the appropriate response variable name
  group1 = "admission",
  group2 = "discharge",
  p = result1$p.value,
  p.signif = symnum(result1$p.value, corr = FALSE, na = FALSE, cutpoints = c(0, 0.001, 0.01, 0.05, 0.1, 1), symbols = c("***", "**", "*", ".", "ns")),
  method = "Wilcoxon signed-rank test"
)

# Print the formatted result
#print(result1_formatted)
knitr::kable(result1_formatted) %>% kable_styling(bootstrap_options = c("striped", "hover", "condensed", "responsive"))
```

## ST59

```{r, echo=FALSE, warning=FALSE}
# Extract data
ST59_data <- df["ST59", ]

# Define sample groups based on naming convention (admission (.1) and discharge (.3))
admission_samples <- names(ST59_data)[grepl("\\.1$", names(ST59_data))]
discharge_samples <- names(ST59_data)[grepl("\\.3$", names(ST59_data))]

# Extract relative abundances for ST59 at admission and discharge
before_treatment <- ST59_data[admission_samples]
after_treatment <- ST59_data[discharge_samples]

# Ensure the treatment data is numeric
before_treatment <- as.numeric(before_treatment)
after_treatment <- as.numeric(after_treatment)

result1 <- wilcox.test(before_treatment, after_treatment, paired = TRUE)

# Creating a new tibble for the Wilcoxon test result
result1_formatted <- tibble(
  .y. = "ST59 Relative Abundance",  # Change this to the appropriate response variable name
  group1 = "admission",
  group2 = "discharge",
  p = result1$p.value,
  p.signif = symnum(result1$p.value, corr = FALSE, na = FALSE, cutpoints = c(0, 0.001, 0.01, 0.05, 0.1, 1), symbols = c("***", "**", "*", ".", "ns")),
  method = "Wilcoxon signed-rank test"
)

# Print the formatted result
knitr::kable(result1_formatted) %>% kable_styling(bootstrap_options = c("striped", "hover", "condensed", "responsive"))
```

## ST60

```{r, echo=FALSE, warning=FALSE}
# Extract data
ST60_data <- df["ST60", ]

# Define sample groups based on naming convention (admission (.1) and discharge (.3))
admission_samples <- names(ST60_data)[grepl("\\.1$", names(ST60_data))]
discharge_samples <- names(ST60_data)[grepl("\\.3$", names(ST60_data))]

# Extract relative abundances for ST60 at admission and discharge
before_treatment <- ST60_data[admission_samples]
after_treatment <- ST60_data[discharge_samples]

# Ensure the treatment data is numeric
before_treatment <- as.numeric(before_treatment)
after_treatment <- as.numeric(after_treatment)

result1 <- wilcox.test(before_treatment, after_treatment, paired = TRUE)

# Creating a new tibble for the Wilcoxon test result
result1_formatted <- tibble(
  .y. = "ST60 Relative Abundance",  # Change this to the appropriate response variable name
  group1 = "admission",
  group2 = "discharge",
  p = result1$p.value,
  p.signif = symnum(result1$p.value, corr = FALSE, na = FALSE, cutpoints = c(0, 0.001, 0.01, 0.05, 0.1, 1), symbols = c("***", "**", "*", ".", "ns")),
  method = "Wilcoxon signed-rank test"
)

# Print the formatted result
knitr::kable(result1_formatted) %>% kable_styling(bootstrap_options = c("striped", "hover", "condensed", "responsive"))
```

## ST73

```{r, echo=FALSE, warning=FALSE}
# Extract data
ST73_data <- df["ST73", ]

# Define sample groups based on naming convention (admission (.1) and discharge (.3))
admission_samples <- names(ST73_data)[grepl("\\.1$", names(ST73_data))]
discharge_samples <- names(ST73_data)[grepl("\\.3$", names(ST73_data))]

# Extract relative abundances for ST73 at admission and discharge
before_treatment <- ST73_data[admission_samples]
after_treatment <- ST73_data[discharge_samples]

# Ensure the treatment data is numeric
before_treatment <- as.numeric(before_treatment)
after_treatment <- as.numeric(after_treatment)

result1 <- wilcox.test(before_treatment, after_treatment, paired = TRUE)

# Creating a new tibble for the Wilcoxon test result
result1_formatted <- tibble(
  .y. = "ST73 Relative Abundance",  # Change this to the appropriate response variable name
  group1 = "admission",
  group2 = "discharge",
  p = result1$p.value,
  p.signif = symnum(result1$p.value, corr = FALSE, na = FALSE, cutpoints = c(0, 0.001, 0.01, 0.05, 0.1, 1), symbols = c("***", "**", "*", ".", "ns")),
  method = "Wilcoxon signed-rank test"
)

# Print the formatted result
knitr::kable(result1_formatted) %>% kable_styling(bootstrap_options = c("striped", "hover", "condensed", "responsive"))
```

## ST215

```{r, echo=FALSE, warning=FALSE}
# Extract data
ST215_data <- df["ST215", ]

# Define sample groups based on naming convention (admission (.1) and discharge (.3))
admission_samples <- names(ST215_data)[grepl("\\.1$", names(ST215_data))]
discharge_samples <- names(ST215_data)[grepl("\\.3$", names(ST215_data))]

# Extract relative abundances for ST215 at admission and discharge
before_treatment <- ST215_data[admission_samples]
after_treatment <- ST215_data[discharge_samples]

# Ensure the treatment data is numeric
before_treatment <- as.numeric(before_treatment)
after_treatment <- as.numeric(after_treatment)

result1 <- wilcox.test(before_treatment, after_treatment, paired = TRUE)

# Creating a new tibble for the Wilcoxon test result
result1_formatted <- tibble(
  .y. = "ST215 Relative Abundance",  # Change this to the appropriate response variable name
  group1 = "admission",
  group2 = "discharge",
  p = result1$p.value,
  p.signif = symnum(result1$p.value, corr = FALSE, na = FALSE, cutpoints = c(0, 0.001, 0.01, 0.05, 0.1, 1), symbols = c("***", "**", "*", ".", "ns")),
  method = "Wilcoxon signed-rank test"
)

# Print the formatted result
knitr::kable(result1_formatted) %>% kable_styling(bootstrap_options = c("striped", "hover", "condensed", "responsive"))
```

## ST5+ST87+ST130+ST210+ST384

```{r, echo=FALSE, warning=FALSE}
# Extract data for the specified subtypes
selected_taxa <- df[c("ST5", "ST87", "ST130", "ST210", "ST384"), ]

# Calculate the combined relative abundances for the selected taxa
combined_data <- colSums(selected_taxa, na.rm = TRUE)

# Define sample groups based on naming convention (admission (.1) and discharge (.3))
admission_samples <- names(combined_data)[grepl("\\.1$", names(combined_data))]
discharge_samples <- names(combined_data)[grepl("\\.3$", names(combined_data))]

# Extract combined relative abundances for the selected taxa at admission and discharge
before_treatment <- combined_data[admission_samples]
after_treatment <- combined_data[discharge_samples]

# Ensure the treatment data is numeric
before_treatment <- as.numeric(before_treatment)
after_treatment <- as.numeric(after_treatment)

# Perform the Wilcoxon signed-rank test
result1 <- wilcox.test(before_treatment, after_treatment, paired = TRUE)

# Creating a new tibble for the Wilcoxon test result
result1_formatted <- tibble(
  .y. = "Combined ST5+ST87+ST130+ST210+ST384 Relative Abundance",
  group1 = "admission",
  group2 = "discharge",
  p = result1$p.value,
  p.signif = symnum(result1$p.value, corr = FALSE, na = FALSE, cutpoints = c(0, 0.001, 0.01, 0.05, 0.1, 1), symbols = c("***", "**", "*", ".", "ns")),
  method = "Wilcoxon signed-rank test"
)

# Print the formatted result using knitr and kableExtra for a nice table format in R Markdown
knitr::kable(result1_formatted) %>%
  kableExtra::kable_styling(bootstrap_options = c("striped", "hover", "condensed", "responsive"))
```

## ST5

```{r, echo=FALSE, warning=FALSE}
# Extract data
ST5_data <- df["ST5", ]

# Define sample groups based on naming convention (admission (.1) and discharge (.3))
admission_samples <- names(ST5_data)[grepl("\\.1$", names(ST5_data))]
discharge_samples <- names(ST5_data)[grepl("\\.3$", names(ST5_data))]

# Extract relative abundances for ST5 at admission and discharge
before_treatment <- ST5_data[admission_samples]
after_treatment <- ST5_data[discharge_samples]

# Ensure the treatment data is numeric
before_treatment <- as.numeric(before_treatment)
after_treatment <- as.numeric(after_treatment)

result1 <- wilcox.test(before_treatment, after_treatment, paired = TRUE)

# Creating a new tibble for the Wilcoxon test result
result1_formatted <- tibble(
  .y. = "ST5 Relative Abundance",  # Change this to the appropriate response variable name
  group1 = "admission",
  group2 = "discharge",
  p = result1$p.value,
  p.signif = symnum(result1$p.value, corr = FALSE, na = FALSE, cutpoints = c(0, 0.001, 0.01, 0.05, 0.1, 1), symbols = c("***", "**", "*", ".", "ns")),
  method = "Wilcoxon signed-rank test"
)

# Print the formatted result
knitr::kable(result1_formatted) %>% kable_styling(bootstrap_options = c("striped", "hover", "condensed", "responsive"))
```

## ST130

```{r, echo=FALSE, warning=FALSE}
# Extract data
ST130_data <- df["ST130", ]

# Define sample groups based on naming convention (admission (.1) and discharge (.3))
admission_samples <- names(ST130_data)[grepl("\\.1$", names(ST130_data))]
discharge_samples <- names(ST130_data)[grepl("\\.3$", names(ST130_data))]

# Extract relative abundances for ST130 at admission and discharge
before_treatment <- ST130_data[admission_samples]
after_treatment <- ST130_data[discharge_samples]

# Ensure the treatment data is numeric
before_treatment <- as.numeric(before_treatment)
after_treatment <- as.numeric(after_treatment)

result1 <- wilcox.test(before_treatment, after_treatment, paired = TRUE)

# Creating a new tibble for the Wilcoxon test result
result1_formatted <- tibble(
  .y. = "ST130 Relative Abundance",  # Change this to the appropriate response variable name
  group1 = "admission",
  group2 = "discharge",
  p = result1$p.value,
  p.signif = symnum(result1$p.value, corr = FALSE, na = FALSE, cutpoints = c(0, 0.001, 0.01, 0.05, 0.1, 1), symbols = c("***", "**", "*", ".", "ns")),
  method = "Wilcoxon signed-rank test"
)

# Print the formatted result
knitr::kable(result1_formatted) %>% kable_styling(bootstrap_options = c("striped", "hover", "condensed", "responsive"))
```


## ST2+ST19+ST100

```{r, echo=TRUE, warning=FALSE}
# Extract data for the specified subtypes
selected_taxa <- df[c("ST2", "ST19", "ST100"), ]

# Calculate the combined relative abundances for the selected taxa
combined_data <- colSums(selected_taxa, na.rm = TRUE)

# Define sample groups based on naming convention (admission (.1) and discharge (.3))
admission_samples <- names(combined_data)[grepl("\\.1$", names(combined_data))]
discharge_samples <- names(combined_data)[grepl("\\.3$", names(combined_data))]

# Extract combined relative abundances for the selected taxa at admission and discharge
before_treatment <- combined_data[admission_samples]
after_treatment <- combined_data[discharge_samples]

# Ensure the treatment data is numeric
before_treatment <- as.numeric(before_treatment)
after_treatment <- as.numeric(after_treatment)
print(before_treatment)
print(mean(before_treatment))
print(after_treatment)
print(mean(after_treatment))

# Perform the Wilcoxon signed-rank test
result1 <- wilcox.test(before_treatment, after_treatment, paired = TRUE)

# Creating a new tibble for the Wilcoxon test result
result1_formatted <- tibble(
  .y. = "Combined ST2+ST19+ST100 Relative Abundance",
  group1 = "admission",
  group2 = "discharge",
  p = result1$p.value,
  p.signif = symnum(result1$p.value, corr = FALSE, na = FALSE, cutpoints = c(0, 0.001, 0.01, 0.05, 0.1, 1), symbols = c("***", "**", "*", ".", "ns")),
  method = "Wilcoxon signed-rank test"
)

# Print the formatted result using knitr and kableExtra for a nice table format in R Markdown
knitr::kable(result1_formatted) %>%
  kableExtra::kable_styling(bootstrap_options = c("striped", "hover", "condensed", "responsive"))
```

## ST2

```{r, echo=TRUE, warning=FALSE}
# Extract data
ST2_data <- df["ST2", ]

# Define sample groups based on naming convention (admission (.1) and discharge (.3))
admission_samples <- names(ST2_data)[grepl("\\.1$", names(ST2_data))]
discharge_samples <- names(ST2_data)[grepl("\\.3$", names(ST2_data))]

# Extract relative abundances for ST2 at admission and discharge
before_treatment <- ST2_data[admission_samples]
after_treatment <- ST2_data[discharge_samples]

# Ensure the treatment data is numeric
before_treatment <- as.numeric(before_treatment)
after_treatment <- as.numeric(after_treatment)
print(before_treatment)
print(mean(before_treatment))
print(after_treatment)
print(mean(after_treatment))

result1 <- wilcox.test(before_treatment, after_treatment, paired = TRUE)

# Creating a new tibble for the Wilcoxon test result
result1_formatted <- tibble(
  .y. = "ST2 Relative Abundance",
  group1 = "admission",
  group2 = "discharge",
  p = result1$p.value,
  p.signif = symnum(result1$p.value, corr = FALSE, na = FALSE, cutpoints = c(0, 0.001, 0.01, 0.05, 0.1, 1), symbols = c("***", "**", "*", ".", "ns")),
  method = "Wilcoxon signed-rank test"
)

# Print the formatted result
knitr::kable(result1_formatted) %>% kable_styling(bootstrap_options = c("striped", "hover", "condensed", "responsive"))
```

## ST19

```{r, echo=TRUE, warning=FALSE}
# Extract data
ST19_data <- df["ST19", ]

# Define sample groups based on naming convention (admission (.1) and discharge (.3))
admission_samples <- names(ST19_data)[grepl("\\.1$", names(ST19_data))]
discharge_samples <- names(ST19_data)[grepl("\\.3$", names(ST19_data))]

# Extract relative abundances for ST19 at admission and discharge
before_treatment <- ST19_data[admission_samples]
after_treatment <- ST19_data[discharge_samples]

# Ensure the treatment data is numeric
before_treatment <- as.numeric(before_treatment)
after_treatment <- as.numeric(after_treatment)
print(before_treatment)
print(mean(before_treatment))
print(after_treatment)
print(mean(after_treatment))

result1 <- wilcox.test(before_treatment, after_treatment, paired = TRUE)

# Creating a new tibble for the Wilcoxon test result
result1_formatted <- tibble(
  .y. = "ST19 Relative Abundance",  # Change this to the appropriate response variable name
  group1 = "admission",
  group2 = "discharge",
  p = result1$p.value,
  p.signif = symnum(result1$p.value, corr = FALSE, na = FALSE, cutpoints = c(0, 0.001, 0.01, 0.05, 0.1, 1), symbols = c("***", "**", "*", ".", "ns")),
  method = "Wilcoxon signed-rank test"
)

# Print the formatted result
knitr::kable(result1_formatted) %>% kable_styling(bootstrap_options = c("striped", "hover", "condensed", "responsive"))
```

## ST100

```{r, echo=TRUE, warning=FALSE}
# Extract data
ST100_data <- df["ST100", ]

# Define sample groups based on naming convention (admission (.1) and discharge (.3))
admission_samples <- names(ST100_data)[grepl("\\.1$", names(ST100_data))]
discharge_samples <- names(ST100_data)[grepl("\\.3$", names(ST100_data))]

# Extract relative abundances for ST100 at admission and discharge
before_treatment <- ST100_data[admission_samples]
after_treatment <- ST100_data[discharge_samples]

# Ensure the treatment data is numeric
before_treatment <- as.numeric(before_treatment)
after_treatment <- as.numeric(after_treatment)
print(before_treatment)
print(mean(before_treatment))
print(after_treatment)
print(mean(after_treatment))

result1 <- wilcox.test(before_treatment, after_treatment, paired = TRUE)

# Creating a new tibble for the Wilcoxon test result
result1_formatted <- tibble(
  .y. = "ST100 Relative Abundance",  # Change this to the appropriate response variable name
  group1 = "admission",
  group2 = "discharge",
  p = result1$p.value,
  p.signif = symnum(result1$p.value, corr = FALSE, na = FALSE, cutpoints = c(0, 0.001, 0.01, 0.05, 0.1, 1), symbols = c("***", "**", "*", ".", "ns")),
  method = "Wilcoxon signed-rank test"
)

# Print the formatted result
knitr::kable(result1_formatted) %>% kable_styling(bootstrap_options = c("striped", "hover", "condensed", "responsive"))
```

## ST5+ST87+ST130+ST210+ST384+ST2+ST19+ST100

```{r, echo=FALSE, warning=FALSE}
# Extract data for the specified subtypes
selected_taxa <- df[c("ST5", "ST87", "ST130", "ST210", "ST384", "ST2", "ST19", "ST100"), ]

# Calculate the combined relative abundances for the selected taxa
combined_data <- colSums(selected_taxa, na.rm = TRUE)

# Define sample groups based on naming convention (admission (.1) and discharge (.3))
admission_samples <- names(combined_data)[grepl("\\.1$", names(combined_data))]
discharge_samples <- names(combined_data)[grepl("\\.3$", names(combined_data))]

# Extract combined relative abundances for the selected taxa at admission and discharge
before_treatment <- combined_data[admission_samples]
after_treatment <- combined_data[discharge_samples]

# Ensure the treatment data is numeric
before_treatment <- as.numeric(before_treatment)
after_treatment <- as.numeric(after_treatment)

# Perform the Wilcoxon signed-rank test
result1 <- wilcox.test(before_treatment, after_treatment, paired = TRUE)

# Creating a new tibble for the Wilcoxon test result
result1_formatted <- tibble(
  .y. = "Combined ST5+ST87+ST130+ST210+ST384+ST2+ST19+ST100 Relative Abundance",
  group1 = "admission",
  group2 = "discharge",
  p.format = sprintf("%.3f", result1$p.value),
  p.signif = symnum(result1$p.value, corr = FALSE, na = FALSE, cutpoints = c(0, 0.001, 0.01, 0.05, 0.1, 1), symbols = c("***", "**", "*", ".", "ns")),
  method = "Wilcoxon signed-rank test"
)

# Print the formatted result using knitr and kableExtra for a nice table format in R Markdown
knitr::kable(result1_formatted) %>%
  kableExtra::kable_styling(bootstrap_options = c("striped", "hover", "condensed", "responsive"))
```


# Statistical tests for the abundance of a specific subtype between admission and discharge in H* samples (g216 & yycH)

```{r, echo=FALSE, warning=FALSE}
otu_data <- otu_table(ps_rel)
df_ <- as.data.frame(otu_data)
df <- df_[, grep("^H", names(df_))]
```

## ST8

```{r, echo=FALSE, warning=FALSE}
# Extract data
ST8_data <- df["ST8", ]

# Define sample groups based on naming convention (admission (.1) and discharge (.3))
admission_samples <- names(ST8_data)[grepl("\\.1$", names(ST8_data))]
discharge_samples <- names(ST8_data)[grepl("\\.3$", names(ST8_data))]

# Extract relative abundances for ST8 at admission and discharge
before_treatment <- ST8_data[admission_samples]
after_treatment <- ST8_data[discharge_samples]

# Ensure the treatment data is numeric
before_treatment <- as.numeric(before_treatment)
after_treatment <- as.numeric(after_treatment)

result1 <- wilcox.test(before_treatment, after_treatment, paired = TRUE)
#print(result1)

# Creating a new tibble for the Wilcoxon test result
  #p.adj = result1$p.value,  # Adjusted p-value, same as raw p-value in this context
  #p.format = sprintf("%.3f", result1$p.value),  # Formatted p-value
result1_formatted <- tibble(
  .y. = "ST8 Relative Abundance",  # Change this to the appropriate response variable name
  group1 = "admission",
  group2 = "discharge",
  p = result1$p.value,
  p.signif = symnum(result1$p.value, corr = FALSE, na = FALSE, cutpoints = c(0, 0.001, 0.01, 0.05, 0.1, 1), symbols = c("***", "**", "*", ".", "ns")),
  method = "Wilcoxon signed-rank test"
)

# Print the formatted result
#print(result1_formatted)
knitr::kable(result1_formatted) %>% kable_styling(bootstrap_options = c("striped", "hover", "condensed", "responsive"))
```

## ST59

```{r, echo=FALSE, warning=FALSE}
# Extract data
ST59_data <- df["ST59", ]

# Define sample groups based on naming convention (admission (.1) and discharge (.3))
admission_samples <- names(ST59_data)[grepl("\\.1$", names(ST59_data))]
discharge_samples <- names(ST59_data)[grepl("\\.3$", names(ST59_data))]

# Extract relative abundances for ST59 at admission and discharge
before_treatment <- ST59_data[admission_samples]
after_treatment <- ST59_data[discharge_samples]

# Ensure the treatment data is numeric
before_treatment <- as.numeric(before_treatment)
after_treatment <- as.numeric(after_treatment)

result1 <- wilcox.test(before_treatment, after_treatment, paired = TRUE)

# Creating a new tibble for the Wilcoxon test result
result1_formatted <- tibble(
  .y. = "ST59 Relative Abundance",  # Change this to the appropriate response variable name
  group1 = "admission",
  group2 = "discharge",
  p = result1$p.value,
  p.signif = symnum(result1$p.value, corr = FALSE, na = FALSE, cutpoints = c(0, 0.001, 0.01, 0.05, 0.1, 1), symbols = c("***", "**", "*", ".", "ns")),
  method = "Wilcoxon signed-rank test"
)

# Print the formatted result
knitr::kable(result1_formatted) %>% kable_styling(bootstrap_options = c("striped", "hover", "condensed", "responsive"))
```

## ST60

```{r, echo=FALSE, warning=FALSE}
# Extract data
ST60_data <- df["ST60", ]

# Define sample groups based on naming convention (admission (.1) and discharge (.3))
admission_samples <- names(ST60_data)[grepl("\\.1$", names(ST60_data))]
discharge_samples <- names(ST60_data)[grepl("\\.3$", names(ST60_data))]

# Extract relative abundances for ST60 at admission and discharge
before_treatment <- ST60_data[admission_samples]
after_treatment <- ST60_data[discharge_samples]

# Ensure the treatment data is numeric
before_treatment <- as.numeric(before_treatment)
after_treatment <- as.numeric(after_treatment)

result1 <- wilcox.test(before_treatment, after_treatment, paired = TRUE)

# Creating a new tibble for the Wilcoxon test result
result1_formatted <- tibble(
  .y. = "ST60 Relative Abundance",  # Change this to the appropriate response variable name
  group1 = "admission",
  group2 = "discharge",
  p = result1$p.value,
  p.signif = symnum(result1$p.value, corr = FALSE, na = FALSE, cutpoints = c(0, 0.001, 0.01, 0.05, 0.1, 1), symbols = c("***", "**", "*", ".", "ns")),
  method = "Wilcoxon signed-rank test"
)

# Print the formatted result
knitr::kable(result1_formatted) %>% kable_styling(bootstrap_options = c("striped", "hover", "condensed", "responsive"))
```

## ST73

```{r, echo=FALSE, warning=FALSE}
# Extract data
ST73_data <- df["ST73", ]

# Define sample groups based on naming convention (admission (.1) and discharge (.3))
admission_samples <- names(ST73_data)[grepl("\\.1$", names(ST73_data))]
discharge_samples <- names(ST73_data)[grepl("\\.3$", names(ST73_data))]

# Extract relative abundances for ST73 at admission and discharge
before_treatment <- ST73_data[admission_samples]
after_treatment <- ST73_data[discharge_samples]

# Ensure the treatment data is numeric
before_treatment <- as.numeric(before_treatment)
after_treatment <- as.numeric(after_treatment)

result1 <- wilcox.test(before_treatment, after_treatment, paired = TRUE)

# Creating a new tibble for the Wilcoxon test result
result1_formatted <- tibble(
  .y. = "ST73 Relative Abundance",  # Change this to the appropriate response variable name
  group1 = "admission",
  group2 = "discharge",
  p = result1$p.value,
  p.signif = symnum(result1$p.value, corr = FALSE, na = FALSE, cutpoints = c(0, 0.001, 0.01, 0.05, 0.1, 1), symbols = c("***", "**", "*", ".", "ns")),
  method = "Wilcoxon signed-rank test"
)

# Print the formatted result
knitr::kable(result1_formatted) %>% kable_styling(bootstrap_options = c("striped", "hover", "condensed", "responsive"))
```

## ST215

```{r, echo=FALSE, warning=FALSE}
# Extract data
ST215_data <- df["ST215", ]

# Define sample groups based on naming convention (admission (.1) and discharge (.3))
admission_samples <- names(ST215_data)[grepl("\\.1$", names(ST215_data))]
discharge_samples <- names(ST215_data)[grepl("\\.3$", names(ST215_data))]

# Extract relative abundances for ST215 at admission and discharge
before_treatment <- ST215_data[admission_samples]
after_treatment <- ST215_data[discharge_samples]

# Ensure the treatment data is numeric
before_treatment <- as.numeric(before_treatment)
after_treatment <- as.numeric(after_treatment)

result1 <- wilcox.test(before_treatment, after_treatment, paired = TRUE)

# Creating a new tibble for the Wilcoxon test result
result1_formatted <- tibble(
  .y. = "ST215 Relative Abundance",  # Change this to the appropriate response variable name
  group1 = "admission",
  group2 = "discharge",
  p = result1$p.value,
  p.signif = symnum(result1$p.value, corr = FALSE, na = FALSE, cutpoints = c(0, 0.001, 0.01, 0.05, 0.1, 1), symbols = c("***", "**", "*", ".", "ns")),
  method = "Wilcoxon signed-rank test"
)

# Print the formatted result
knitr::kable(result1_formatted) %>% kable_styling(bootstrap_options = c("striped", "hover", "condensed", "responsive"))
```

## ST5+ST87+ST130+ST210+ST384

```{r, echo=FALSE, warning=FALSE}
# Extract data for the specified subtypes
selected_taxa <- df[c("ST5", "ST87", "ST130", "ST210", "ST384"), ]

# Calculate the combined relative abundances for the selected taxa
combined_data <- colSums(selected_taxa, na.rm = TRUE)

# Define sample groups based on naming convention (admission (.1) and discharge (.3))
admission_samples <- names(combined_data)[grepl("\\.1$", names(combined_data))]
discharge_samples <- names(combined_data)[grepl("\\.3$", names(combined_data))]

# Extract combined relative abundances for the selected taxa at admission and discharge
before_treatment <- combined_data[admission_samples]
after_treatment <- combined_data[discharge_samples]

# Ensure the treatment data is numeric
before_treatment <- as.numeric(before_treatment)
after_treatment <- as.numeric(after_treatment)

# Perform the Wilcoxon signed-rank test
result1 <- wilcox.test(before_treatment, after_treatment, paired = TRUE)

# Creating a new tibble for the Wilcoxon test result
result1_formatted <- tibble(
  .y. = "Combined ST5+ST87+ST130+ST210+ST384 Relative Abundance",
  group1 = "admission",
  group2 = "discharge",
  p = result1$p.value,
  p.signif = symnum(result1$p.value, corr = FALSE, na = FALSE, cutpoints = c(0, 0.001, 0.01, 0.05, 0.1, 1), symbols = c("***", "**", "*", ".", "ns")),
  method = "Wilcoxon signed-rank test"
)

# Print the formatted result using knitr and kableExtra for a nice table format in R Markdown
knitr::kable(result1_formatted) %>%
  kableExtra::kable_styling(bootstrap_options = c("striped", "hover", "condensed", "responsive"))
```

## ST5

```{r, echo=FALSE, warning=FALSE}
# Extract data
ST5_data <- df["ST5", ]

# Define sample groups based on naming convention (admission (.1) and discharge (.3))
admission_samples <- names(ST5_data)[grepl("\\.1$", names(ST5_data))]
discharge_samples <- names(ST5_data)[grepl("\\.3$", names(ST5_data))]

# Extract relative abundances for ST5 at admission and discharge
before_treatment <- ST5_data[admission_samples]
after_treatment <- ST5_data[discharge_samples]

# Ensure the treatment data is numeric
before_treatment <- as.numeric(before_treatment)
after_treatment <- as.numeric(after_treatment)

result1 <- wilcox.test(before_treatment, after_treatment, paired = TRUE)

# Creating a new tibble for the Wilcoxon test result
result1_formatted <- tibble(
  .y. = "ST5 Relative Abundance",  # Change this to the appropriate response variable name
  group1 = "admission",
  group2 = "discharge",
  p = result1$p.value,
  p.signif = symnum(result1$p.value, corr = FALSE, na = FALSE, cutpoints = c(0, 0.001, 0.01, 0.05, 0.1, 1), symbols = c("***", "**", "*", ".", "ns")),
  method = "Wilcoxon signed-rank test"
)

# Print the formatted result
knitr::kable(result1_formatted) %>% kable_styling(bootstrap_options = c("striped", "hover", "condensed", "responsive"))
```

## ST130

```{r, echo=FALSE, warning=FALSE}
# Extract data
ST130_data <- df["ST130", ]

# Define sample groups based on naming convention (admission (.1) and discharge (.3))
admission_samples <- names(ST130_data)[grepl("\\.1$", names(ST130_data))]
discharge_samples <- names(ST130_data)[grepl("\\.3$", names(ST130_data))]

# Extract relative abundances for ST130 at admission and discharge
before_treatment <- ST130_data[admission_samples]
after_treatment <- ST130_data[discharge_samples]

# Ensure the treatment data is numeric
before_treatment <- as.numeric(before_treatment)
after_treatment <- as.numeric(after_treatment)

result1 <- wilcox.test(before_treatment, after_treatment, paired = TRUE)

# Creating a new tibble for the Wilcoxon test result
result1_formatted <- tibble(
  .y. = "ST130 Relative Abundance",  # Change this to the appropriate response variable name
  group1 = "admission",
  group2 = "discharge",
  p = result1$p.value,
  p.signif = symnum(result1$p.value, corr = FALSE, na = FALSE, cutpoints = c(0, 0.001, 0.01, 0.05, 0.1, 1), symbols = c("***", "**", "*", ".", "ns")),
  method = "Wilcoxon signed-rank test"
)

# Print the formatted result
knitr::kable(result1_formatted) %>% kable_styling(bootstrap_options = c("striped", "hover", "condensed", "responsive"))
```

## ST2+ST19+ST100

```{r, echo=TRUE, warning=FALSE}
# Extract data for the specified subtypes
selected_taxa <- df[c("ST2", "ST19", "ST100"), ]

# Calculate the combined relative abundances for the selected taxa
combined_data <- colSums(selected_taxa, na.rm = TRUE)

# Define sample groups based on naming convention (admission (.1) and discharge (.3))
admission_samples <- names(combined_data)[grepl("\\.1$", names(combined_data))]
discharge_samples <- names(combined_data)[grepl("\\.3$", names(combined_data))]

# Extract combined relative abundances for the selected taxa at admission and discharge
before_treatment <- combined_data[admission_samples]
after_treatment <- combined_data[discharge_samples]

# Ensure the treatment data is numeric
before_treatment <- as.numeric(before_treatment)
after_treatment <- as.numeric(after_treatment)
print(before_treatment)
print(mean(before_treatment))
print(after_treatment)
print(mean(after_treatment))

# Perform the Wilcoxon signed-rank test
result1 <- wilcox.test(before_treatment, after_treatment, paired = TRUE)

# Creating a new tibble for the Wilcoxon test result
result1_formatted <- tibble(
  .y. = "Combined ST2+ST19+ST100 Relative Abundance",
  group1 = "admission",
  group2 = "discharge",
  p = result1$p.value,
  p.signif = symnum(result1$p.value, corr = FALSE, na = FALSE, cutpoints = c(0, 0.001, 0.01, 0.05, 0.1, 1), symbols = c("***", "**", "*", ".", "ns")),
  method = "Wilcoxon signed-rank test"
)

# Print the formatted result using knitr and kableExtra for a nice table format in R Markdown
knitr::kable(result1_formatted) %>%
  kableExtra::kable_styling(bootstrap_options = c("striped", "hover", "condensed", "responsive"))
```

## ST2

```{r, echo=TRUE, warning=FALSE}
# Extract data
ST2_data <- df["ST2", ]

# Define sample groups based on naming convention (admission (.1) and discharge (.3))
admission_samples <- names(ST2_data)[grepl("\\.1$", names(ST2_data))]
discharge_samples <- names(ST2_data)[grepl("\\.3$", names(ST2_data))]

# Extract relative abundances for ST2 at admission and discharge
before_treatment <- ST2_data[admission_samples]
after_treatment <- ST2_data[discharge_samples]

# Ensure the treatment data is numeric
before_treatment <- as.numeric(before_treatment)
after_treatment <- as.numeric(after_treatment)
print(before_treatment)
print(mean(before_treatment))
print(after_treatment)
print(mean(after_treatment))

result1 <- wilcox.test(before_treatment, after_treatment, paired = TRUE)

# Creating a new tibble for the Wilcoxon test result
result1_formatted <- tibble(
  .y. = "ST2 Relative Abundance",  # Change this to the appropriate response variable name
  group1 = "admission",
  group2 = "discharge",
  p = result1$p.value,
  p.signif = symnum(result1$p.value, corr = FALSE, na = FALSE, cutpoints = c(0, 0.001, 0.01, 0.05, 0.1, 1), symbols = c("***", "**", "*", ".", "ns")),
  method = "Wilcoxon signed-rank test"
)

# Print the formatted result
knitr::kable(result1_formatted) %>% kable_styling(bootstrap_options = c("striped", "hover", "condensed", "responsive"))
```

## ST19

```{r, echo=TRUE, warning=FALSE}
# Extract data
ST19_data <- df["ST19", ]

# Define sample groups based on naming convention (admission (.1) and discharge (.3))
admission_samples <- names(ST19_data)[grepl("\\.1$", names(ST19_data))]
discharge_samples <- names(ST19_data)[grepl("\\.3$", names(ST19_data))]

# Extract relative abundances for ST19 at admission and discharge
before_treatment <- ST19_data[admission_samples]
after_treatment <- ST19_data[discharge_samples]

# Ensure the treatment data is numeric
before_treatment <- as.numeric(before_treatment)
after_treatment <- as.numeric(after_treatment)
print(before_treatment)
print(mean(before_treatment))
print(after_treatment)
print(mean(after_treatment))

result1 <- wilcox.test(before_treatment, after_treatment, paired = TRUE)

# Creating a new tibble for the Wilcoxon test result
result1_formatted <- tibble(
  .y. = "ST19 Relative Abundance",  # Change this to the appropriate response variable name
  group1 = "admission",
  group2 = "discharge",
  p = result1$p.value,
  p.signif = symnum(result1$p.value, corr = FALSE, na = FALSE, cutpoints = c(0, 0.001, 0.01, 0.05, 0.1, 1), symbols = c("***", "**", "*", ".", "ns")),
  method = "Wilcoxon signed-rank test"
)

# Print the formatted result
knitr::kable(result1_formatted) %>% kable_styling(bootstrap_options = c("striped", "hover", "condensed", "responsive"))
```

## ST100

```{r, echo=TRUE, warning=FALSE}
# Extract data
ST100_data <- df["ST100", ]

# Define sample groups based on naming convention (admission (.1) and discharge (.3))
admission_samples <- names(ST100_data)[grepl("\\.1$", names(ST100_data))]
discharge_samples <- names(ST100_data)[grepl("\\.3$", names(ST100_data))]

# Extract relative abundances for ST100 at admission and discharge
before_treatment <- ST100_data[admission_samples]
after_treatment <- ST100_data[discharge_samples]

# Ensure the treatment data is numeric
before_treatment <- as.numeric(before_treatment)
after_treatment <- as.numeric(after_treatment)
print(before_treatment)
print(mean(before_treatment))
print(after_treatment)
print(mean(after_treatment))

result1 <- wilcox.test(before_treatment, after_treatment, paired = TRUE)

# Creating a new tibble for the Wilcoxon test result
result1_formatted <- tibble(
  .y. = "ST100 Relative Abundance",  # Change this to the appropriate response variable name
  group1 = "admission",
  group2 = "discharge",
  p = result1$p.value,
  p.signif = symnum(result1$p.value, corr = FALSE, na = FALSE, cutpoints = c(0, 0.001, 0.01, 0.05, 0.1, 1), symbols = c("***", "**", "*", ".", "ns")),
  method = "Wilcoxon signed-rank test"
)

# Print the formatted result
knitr::kable(result1_formatted) %>% kable_styling(bootstrap_options = c("striped", "hover", "condensed", "responsive"))
```

## ST5+ST87+ST130+ST210+ST384+ST2+ST19+ST100

```{r, echo=FALSE, warning=FALSE}
# Extract data for the specified subtypes
selected_taxa <- df[c("ST5", "ST87", "ST130", "ST210", "ST384", "ST2", "ST19", "ST100"), ]

# Calculate the combined relative abundances for the selected taxa
combined_data <- colSums(selected_taxa, na.rm = TRUE)

# Define sample groups based on naming convention (admission (.1) and discharge (.3))
admission_samples <- names(combined_data)[grepl("\\.1$", names(combined_data))]
discharge_samples <- names(combined_data)[grepl("\\.3$", names(combined_data))]

# Extract combined relative abundances for the selected taxa at admission and discharge
before_treatment <- combined_data[admission_samples]
after_treatment <- combined_data[discharge_samples]

# Ensure the treatment data is numeric
before_treatment <- as.numeric(before_treatment)
after_treatment <- as.numeric(after_treatment)

# Perform the Wilcoxon signed-rank test
result1 <- wilcox.test(before_treatment, after_treatment, paired = TRUE)

# Creating a new tibble for the Wilcoxon test result
result1_formatted <- tibble(
  .y. = "Combined ST5+ST87+ST130+ST210+ST384+ST2+ST19+ST100 Relative Abundance",
  group1 = "admission",
  group2 = "discharge",
  p.format = sprintf("%.3f", result1$p.value),
  p.signif = symnum(result1$p.value, corr = FALSE, na = FALSE, cutpoints = c(0, 0.001, 0.01, 0.05, 0.1, 1), symbols = c("***", "**", "*", ".", "ns")),
  method = "Wilcoxon signed-rank test"
)

# Print the formatted result using knitr and kableExtra for a nice table format in R Markdown
knitr::kable(result1_formatted) %>%
  kableExtra::kable_styling(bootstrap_options = c("striped", "hover", "condensed", "responsive"))
```


# goeBURST DLV-Analysis (g216 & yycH)

```{r echo=FALSE, out.width='100%'}
knitr::include_graphics("figures/goeBURST.png")
```