--- title: "Phyloseq microbiome" author: "" date: '`r format(Sys.time(), "%d %m %Y")`' header-includes: - \usepackage{color, fancyvrb} output: rmdformats::readthedown: highlight: kate number_sections : yes pdf_document: toc: yes toc_depth: 2 number_sections : yes --- ```{r, echo=FALSE, warning=FALSE} ## Global options # TODO: reproduce the html with the additional figure/SVN-files for editing. # IMPORTANT NOTE: needs before "mkdir figures" #gunzip table_even9893.biom.gz #alpha_diversity.py -i table_even9893.biom --metrics chao1,observed_otus,shannon,PD_whole_tree -o adiv_even.txt -t ../clustering/rep_set.tre #cp count_table_seq31_seq37.txt count_table_seq31_seq37_ST.txt by adding ST in first column and changing '-' to Novel #Note count_table.txt is the file with higher unclassified rates! #python3 ~/Scripts/calculate_alpha_diversities.py ../count_table_seq31_seq37_ST.txt alpha_diversity_metrics_samples_ST.csv #rmarkdown::render('Phyloseq.Rmd',output_file='Phyloseq.html') ``` ```{r load-packages, include=FALSE} #install.packages(c("dplyr", "tidyr", "rstatix", "ggplot2", "RVAideMemoire")) library(knitr) library(rmdformats) library(readxl) library(dplyr) library(kableExtra) library(tibble) #library(wilcox.test) library(dplyr) library(tidyr) library(rstatix) # For convenient paired statistical tests and p-value adjustment library(RVAideMemoire) # For Cochran's Q test library(ggplot2) options(max.print="75") knitr::opts_chunk$set(fig.width=8, fig.height=6, eval=TRUE, cache=TRUE, echo=TRUE, prompt=FALSE, tidy=FALSE, comment=NA, message=FALSE, warning=FALSE) opts_knit$set(width=85) # Phyloseq R library #* Phyloseq web site : https://joey711.github.io/phyloseq/index.html #* See in particular tutorials for # - importing data: https://joey711.github.io/phyloseq/import-data.html # - heat maps: https://joey711.github.io/phyloseq/plot_heatmap-examples.html ``` # Sample Collection and Study Design Nasal swab samples were collected from two patient cohorts: patients undergoing aneurysm surgery (cohort A, n = 15) and patients undergoing hypophysis surgery (cohort H, n = 20). For each patient, samples were collected at three timepoints: admission (A1/H1), surgery (A2/H2), and discharge (A3/H3), resulting in a longitudinal study design to assess temporal changes in the nasal microbiome across surgical interventions. # Data Import raw data and assign sample key: ```{r, echo=TRUE, warning=FALSE} #extend map_corrected.txt with Diet and Flora #setwd("~/DATA/Data_Laura_16S_2/core_diversity_e4753") #MODIFIED: Using map3_corrected.txt rather than map3_corrected.txt #TODO_MONDAY: the name has not been changed!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! map_corrected <- read.csv("../map3_corrected.txt", sep="\t", row.names=1) knitr::kable(map_corrected) %>% kable_styling(bootstrap_options = c("striped", "hover", "condensed", "responsive")) ``` # Prerequisites to be installed * R : https://pbil.univ-lyon1.fr/CRAN/ * R studio : https://www.rstudio.com/products/rstudio/download/#download ```R install.packages("dplyr") # To manipulate dataframes install.packages("readxl") # To read Excel files into R install.packages("ggplot2") # for high quality graphics install.packages("heatmaply") source("https://bioconductor.org/biocLite.R") biocLite("phyloseq") ``` ```{r libraries, echo=TRUE, message=FALSE} library("readxl") # necessary to import the data from Excel file library("ggplot2") # graphics library("picante") library("microbiome") # data analysis and visualisation library("phyloseq") # also the basis of data object. Data analysis and visualisation library("DESeq2") library("ggpubr") # publication quality figures, based on ggplot2 library("dplyr") # data handling, filter and reformat data frames library("RColorBrewer") # nice color options library("heatmaply") library("vegan") library("gplots") library("openxlsx") ``` # Read the data and create phyloseq objects (16S) Three tables are needed * OTU * Taxonomy * Samples ```{r, echo=TRUE, warning=FALSE} # Change your working directory to where the files are located ps_raw <- import_biom("./table_even9893.biom", "../clustering/rep_set.tre") sample <- read.csv("../map3_corrected.txt", sep = "\t", row.names = 1) SAM = sample_data(sample, errorIfNULL = T) # rownames(SAM) <-c('P7.Nose','P8.Nose','P9.Nose','P10.Nose','P11.Nose','P12.Nose','P13.Nose','P14.Nose','P15.Nose','P16.Foot','P16.Nose','P17.Foot','P17.Nose','P18.Foot','P18.Nose','P19.Foot','P19.Nose','P20.Foot','P20.Nose','E.LS','E.NLS','E.Nose','G.LS','G.NLS','G.Nose','K.LS','K.NLS','K.Nose','H.LS','H.NLS','H.Nose','D.LS','D.NLS','D.Nose','C.LS','C.NLS','C.Nose','J.LS','J.NLS','J.Nose','F.LS','F.NLS','F.Nose','A.LS','A.NLS','A.Nose','I.LS','I.NLS','I.Nose','B.LS','B.NLS','B.Nose') ps_base <- merge_phyloseq(ps_raw, SAM) print(ps_base) colnames(tax_table(ps_base)) <- c("Domain","Phylum","Class","Order","Family","Genus","Species") saveRDS(ps_base, "./ps_base.rds") ``` Visualize data ```{r, echo=TRUE, warning=FALSE} sample_names(ps_base) rank_names(ps_base) sample_variables(ps_base) # Define sample names once samples <- c( "A1.1","A1.2","A1.3", "A2.1","A2.2","A2.3", "A3.1","A3.2","A3.3", "A4.1","A4.2","A4.3", "A5.1","A5.2","A5.3", "A10.1","A10.2","A10.3", "A12.1","A12.2","A12.3", "A17.1","A17.2","A17.3", "A20.1","A20.2","A20.3", "A21.1","A21.2","A21.3", "A22.1","A22.2","A22.3", "A24.1","A24.2","A24.3", "A25.1","A25.2","A25.3", "A27.1","A27.2","A27.3", "A28.1","A28.2","A28.3", "H11.1","H11.2","H11.3", "H13.1","H13.2","H13.3", "H14.1","H14.2","H14.3", "H16.1","H16.2","H16.3", "H19.1","H19.2","H19.3", "H24.1","H24.2","H24.3", "H53.1","H53.2","H53.3", "H56.1","H56.2","H56.3", "H57.1","H57.2","H57.3", "H59.1","H59.2","H59.3", "H60.1","H60.2","H60.3", "H64.1","H64.2","H64.3", "H65.1","H65.2","H65.3", "H67.1","H67.2","H67.3", "H72.1","H72.2","H72.3", "H73.1","H73.2","H73.3", "H76.1","H76.2","H76.3", "H78.1","H78.2","H78.3", "H91.1","H91.2","H91.3", "H94.1","H94.2","H94.3", "H99.1","H99.2","H99.3") ps_pruned <- prune_samples(samples, ps_base) sample_names(ps_pruned) rank_names(ps_pruned) sample_variables(ps_pruned) ``` No samples were excluded as low-depth outliers (library sizes below the minimum depth threshold of 1,000 reads), and the remaining dataset (ps_filt) contains only samples meeting this depth cutoff with taxa retained only if they have nonzero total counts. ```{r, echo=TRUE, warning=FALSE} # ------------------------------------------------------------ # Filter low-depth samples (recommended for all analyses) # ------------------------------------------------------------ min_depth <- 1000 # <-- adjust to your data / study design, keeps all! ps_filt <- prune_samples(sample_sums(ps_pruned) >= min_depth, ps_pruned) ps_filt <- prune_taxa(taxa_sums(ps_filt) > 0, ps_filt) # Keep a depth summary for reporting / QC depth_summary <- summary(sample_sums(ps_filt)) depth_summary ``` **Differential abundance (DESeq2)** → **`ps_deseq`**: **non-rarefied integer counts** derived from `ps_filt`, with optional **count-based** taxon prefilter *(default: taxa total counts ≥ 10 across all samples)* From `ps_filt` (e.g. 5669 taxa and 239 samples), we branch into analysis-ready objects in two directions: * Direction 1 for diversity analyses - **Alpha diversity**: `ps_rarefied` ✅ (common) - **Beta diversity**: - **Unweighted UniFrac / Jaccard**: `ps_rarefied` ✅ (often recommended) - **Bray–Curtis / ordination on abundances**: `ps_rel` or Hellinger ✅ (rarefaction optional) - **Aitchison (CLR)**: CLR-transformed (non-rarefied) ✅ (no rarefaction) Normalize number of reads in each sample using median sequencing depth. ```{r, echo=TRUE, warning=FALSE} # RAREFACTION set.seed(9242) # This will help in reproducing the filtering and nomalisation. ps_rarefied <- rarefy_even_depth(ps_filt, sample.size = 9893) total <- 9893 # # NORMALIZE number of reads in each sample using median sequencing depth. # total = median(sample_sums(ps.ng.tax)) # #> total # #[1] 9893 # standf = function(x, t=total) round(t * (x / sum(x))) # ps.ng.tax = transform_sample_counts(ps.ng.tax, standf) # ps_rel <- microbiome::transform(ps.ng.tax, "compositional") # # saveRDS(ps.ng.tax, "./ps.ng.tax.rds") ``` * Direction 2 for taxonomic composition plots - **Taxonomic composition** → **`ps_rel`**: **relative abundance** (compositional) computed **after sample filtering** (e.g. 5669 taxa and 239 samples) - **Optional cleaner composition plots** → **`ps_abund` / `ps_abund_rel`**: taxa filtered for plotting (e.g., keep taxa with **mean relative abundance > 0.1%**); (e.g. 95 taxa and 239 samples) `ps_abund` = **counts**, `ps_abund_rel` = **relative abundance** *(use for visualization, not DESeq2)* For the heatmaps, we focus on the most abundant OTUs by first converting counts to relative abundances within each sample. We then filter to retain only OTUs whose mean relative abundance across all samples exceeds 0.1% (0.001). We are left with 199 OTUs which makes the reading much more easy. ```{r, echo=FALSE, warning=FALSE} # 1) Convert to relative abundances ps_rel <- transform_sample_counts(ps_filt, function(x) x / sum(x)) # 2) Get the logical vector of which OTUs to keep (based on relative abundance) keep_vector <- phyloseq::filter_taxa( ps_rel, function(x) mean(x) > 0.001, prune = FALSE ) # 3) Use the TRUE/FALSE vector to subset absolute abundance data ps_abund <- prune_taxa(names(keep_vector)[keep_vector], ps_filt) # 4) Normalize the final subset to relative abundances per sample ps_abund_rel <- transform_sample_counts( ps_abund, function(x) x / sum(x) ) ``` # Heatmaps (16S) We consider the most abundant OTUs for heatmaps. For example one can only take OTUs that represent at least 1% of reads in at least one sample. Remember we normalized all the sampples to median number of reads (total). We are left with only 119 OTUS which makes the reading much more easy. ```{r, echo=FALSE, warning=FALSE} datamat_ = as.data.frame(otu_table(ps_abund)) datamat <- datamat_[c("A1.1","A1.2","A1.3","A2.1","A2.2","A2.3","A3.1","A3.2","A3.3","A4.1","A4.2","A4.3","A5.1","A5.2","A5.3","A10.1","A10.2","A10.3","A12.1","A12.2","A12.3","A17.1","A17.2","A17.3","A20.1","A20.2","A20.3","A21.1","A21.2","A21.3","A22.1","A22.2","A22.3","A24.1","A24.2","A24.3","A25.1","A25.2","A25.3","A27.1","A27.2","A27.3","A28.1","A28.2","A28.3","H11.1","H11.2","H11.3","H13.1","H13.2","H13.3","H14.1","H14.2","H14.3","H16.1","H16.2","H16.3","H19.1","H19.2","H19.3","H24.1","H24.2","H24.3","H53.1","H53.2","H53.3","H56.1","H56.2","H56.3","H57.1","H57.2","H57.3","H59.1","H59.2","H59.3","H60.1","H60.2","H60.3","H64.1","H64.2","H64.3","H65.1","H65.2","H65.3","H67.1","H67.2","H67.3","H72.1","H72.2","H72.3","H73.1","H73.2","H73.3","H76.1","H76.2","H76.3","H78.1","H78.2","H78.3","H91.1","H91.2","H91.3","H94.1","H94.2","H94.3","H99.1","H99.2","H99.3")] hr <- hclust(as.dist(1-cor(t(datamat), method="pearson")), method="complete") hc <- hclust(as.dist(1-cor(datamat, method="spearman")), method="complete") mycl = cutree(hr, h=max(hr$height)/1.08) mycol = c("YELLOW", "DARKBLUE", "DARKORANGE", "DARKMAGENTA", "DARKCYAN", "DARKRED", "MAROON", "DARKGREEN", "LIGHTBLUE", "PINK", "MAGENTA", "LIGHTCYAN","LIGHTGREEN", "BLUE", "ORANGE", "CYAN", "RED", "GREEN"); mycol = mycol[as.vector(mycl)] sampleCols <- rep('GREY',ncol(datamat)) sampleCols[colnames(datamat)=='A1.1'] <- '#1f78b4' sampleCols[colnames(datamat)=='A1.2'] <- '#1f78b4' sampleCols[colnames(datamat)=='A1.3'] <- '#1f78b4' sampleCols[colnames(datamat)=='A2.1'] <- '#1f78b4' sampleCols[colnames(datamat)=='A2.2'] <- '#1f78b4' sampleCols[colnames(datamat)=='A2.3'] <- '#1f78b4' sampleCols[colnames(datamat)=='A3.1'] <- '#1f78b4' sampleCols[colnames(datamat)=='A3.2'] <- '#1f78b4' sampleCols[colnames(datamat)=='A3.3'] <- '#1f78b4' sampleCols[colnames(datamat)=='A4.1'] <- '#1f78b4' sampleCols[colnames(datamat)=='A4.2'] <- '#1f78b4' sampleCols[colnames(datamat)=='A4.3'] <- '#1f78b4' sampleCols[colnames(datamat)=='A5.1'] <- '#1f78b4' sampleCols[colnames(datamat)=='A5.2'] <- '#1f78b4' sampleCols[colnames(datamat)=='A5.3'] <- '#1f78b4' sampleCols[colnames(datamat)=='A10.1'] <- '#1f78b4' sampleCols[colnames(datamat)=='A10.2'] <- '#1f78b4' sampleCols[colnames(datamat)=='A10.3'] <- '#1f78b4' sampleCols[colnames(datamat)=='A12.1'] <- '#1f78b4' sampleCols[colnames(datamat)=='A12.2'] <- '#1f78b4' sampleCols[colnames(datamat)=='A12.3'] <- '#1f78b4' sampleCols[colnames(datamat)=='A17.1'] <- '#1f78b4' sampleCols[colnames(datamat)=='A17.2'] <- '#1f78b4' sampleCols[colnames(datamat)=='A17.3'] <- '#1f78b4' sampleCols[colnames(datamat)=='A20.1'] <- '#1f78b4' sampleCols[colnames(datamat)=='A20.2'] <- '#1f78b4' sampleCols[colnames(datamat)=='A20.3'] <- '#1f78b4' sampleCols[colnames(datamat)=='A21.1'] <- '#1f78b4' sampleCols[colnames(datamat)=='A21.2'] <- '#1f78b4' sampleCols[colnames(datamat)=='A21.3'] <- '#1f78b4' sampleCols[colnames(datamat)=='A22.1'] <- '#1f78b4' sampleCols[colnames(datamat)=='A22.2'] <- '#1f78b4' sampleCols[colnames(datamat)=='A22.3'] <- '#1f78b4' sampleCols[colnames(datamat)=='A24.1'] <- '#1f78b4' sampleCols[colnames(datamat)=='A24.2'] <- '#1f78b4' sampleCols[colnames(datamat)=='A24.3'] <- '#1f78b4' sampleCols[colnames(datamat)=='A25.1'] <- '#1f78b4' sampleCols[colnames(datamat)=='A25.2'] <- '#1f78b4' sampleCols[colnames(datamat)=='A25.3'] <- '#1f78b4' sampleCols[colnames(datamat)=='A27.1'] <- '#1f78b4' sampleCols[colnames(datamat)=='A27.2'] <- '#1f78b4' sampleCols[colnames(datamat)=='A27.3'] <- '#1f78b4' sampleCols[colnames(datamat)=='A28.1'] <- '#1f78b4' sampleCols[colnames(datamat)=='A28.2'] <- '#1f78b4' sampleCols[colnames(datamat)=='A28.3'] <- '#1f78b4' sampleCols[colnames(datamat)=='H11.1'] <- '#b2df8a' sampleCols[colnames(datamat)=='H11.2'] <- '#b2df8a' sampleCols[colnames(datamat)=='H11.3'] <- '#b2df8a' sampleCols[colnames(datamat)=='H13.1'] <- '#b2df8a' sampleCols[colnames(datamat)=='H13.2'] <- '#b2df8a' sampleCols[colnames(datamat)=='H13.3'] <- '#b2df8a' sampleCols[colnames(datamat)=='H14.1'] <- '#b2df8a' sampleCols[colnames(datamat)=='H14.2'] <- '#b2df8a' sampleCols[colnames(datamat)=='H14.3'] <- '#b2df8a' sampleCols[colnames(datamat)=='H16.1'] <- '#b2df8a' sampleCols[colnames(datamat)=='H16.2'] <- '#b2df8a' sampleCols[colnames(datamat)=='H16.3'] <- '#b2df8a' sampleCols[colnames(datamat)=='H19.1'] <- '#b2df8a' sampleCols[colnames(datamat)=='H19.2'] <- '#b2df8a' sampleCols[colnames(datamat)=='H19.3'] <- '#b2df8a' sampleCols[colnames(datamat)=='H24.1'] <- '#b2df8a' sampleCols[colnames(datamat)=='H24.2'] <- '#b2df8a' sampleCols[colnames(datamat)=='H24.3'] <- '#b2df8a' sampleCols[colnames(datamat)=='H53.1'] <- '#b2df8a' sampleCols[colnames(datamat)=='H53.2'] <- '#b2df8a' sampleCols[colnames(datamat)=='H53.3'] <- '#b2df8a' sampleCols[colnames(datamat)=='H56.1'] <- '#b2df8a' sampleCols[colnames(datamat)=='H56.2'] <- '#b2df8a' sampleCols[colnames(datamat)=='H56.3'] <- '#b2df8a' sampleCols[colnames(datamat)=='H57.1'] <- '#b2df8a' sampleCols[colnames(datamat)=='H57.2'] <- '#b2df8a' sampleCols[colnames(datamat)=='H57.3'] <- '#b2df8a' sampleCols[colnames(datamat)=='H59.1'] <- '#b2df8a' sampleCols[colnames(datamat)=='H59.2'] <- '#b2df8a' sampleCols[colnames(datamat)=='H59.3'] <- '#b2df8a' sampleCols[colnames(datamat)=='H60.1'] <- '#b2df8a' sampleCols[colnames(datamat)=='H60.2'] <- '#b2df8a' sampleCols[colnames(datamat)=='H60.3'] <- '#b2df8a' sampleCols[colnames(datamat)=='H64.1'] <- '#b2df8a' sampleCols[colnames(datamat)=='H64.2'] <- '#b2df8a' sampleCols[colnames(datamat)=='H64.3'] <- '#b2df8a' sampleCols[colnames(datamat)=='H65.1'] <- '#b2df8a' sampleCols[colnames(datamat)=='H65.2'] <- '#b2df8a' sampleCols[colnames(datamat)=='H65.3'] <- '#b2df8a' sampleCols[colnames(datamat)=='H67.1'] <- '#b2df8a' sampleCols[colnames(datamat)=='H67.2'] <- '#b2df8a' sampleCols[colnames(datamat)=='H67.3'] <- '#b2df8a' sampleCols[colnames(datamat)=='H72.1'] <- '#b2df8a' sampleCols[colnames(datamat)=='H72.2'] <- '#b2df8a' sampleCols[colnames(datamat)=='H72.3'] <- '#b2df8a' sampleCols[colnames(datamat)=='H73.1'] <- '#b2df8a' sampleCols[colnames(datamat)=='H73.2'] <- '#b2df8a' sampleCols[colnames(datamat)=='H73.3'] <- '#b2df8a' sampleCols[colnames(datamat)=='H76.1'] <- '#b2df8a' sampleCols[colnames(datamat)=='H76.2'] <- '#b2df8a' sampleCols[colnames(datamat)=='H76.3'] <- '#b2df8a' sampleCols[colnames(datamat)=='H78.1'] <- '#b2df8a' sampleCols[colnames(datamat)=='H78.2'] <- '#b2df8a' sampleCols[colnames(datamat)=='H78.3'] <- '#b2df8a' sampleCols[colnames(datamat)=='H91.1'] <- '#b2df8a' sampleCols[colnames(datamat)=='H91.2'] <- '#b2df8a' sampleCols[colnames(datamat)=='H91.3'] <- '#b2df8a' sampleCols[colnames(datamat)=='H94.1'] <- '#b2df8a' sampleCols[colnames(datamat)=='H94.2'] <- '#b2df8a' sampleCols[colnames(datamat)=='H94.3'] <- '#b2df8a' sampleCols[colnames(datamat)=='H99.1'] <- '#b2df8a' sampleCols[colnames(datamat)=='H99.2'] <- '#b2df8a' sampleCols[colnames(datamat)=='H99.3'] <- '#b2df8a' #bluered(75) #color_pattern <- colorRampPalette(c("blue", "white", "red"))(100) library(RColorBrewer) custom_palette <- colorRampPalette(brewer.pal(9, "Blues")) heatmap_colors <- custom_palette(100) #colors <- heatmap_color_default(100) png("figures/heatmap.png", width=1200, height=2400) #par(mar=c(2, 2, 2, 2)) , lwid=1 lhei=c(0.7, 10)) # Adjust height of color keys keysize=0.3, heatmap.2(as.matrix(datamat),Rowv=as.dendrogram(hr),Colv = NA, dendrogram = 'row', scale='row',trace='none',col=heatmap_colors, cexRow=1.2, cexCol=1.5, RowSideColors = mycol, ColSideColors = sampleCols, srtCol=60, labRow=row.names(datamat), key=TRUE, margins=c(10, 15), lhei=c(0.7, 15), lwid=c(1,8)) dev.off() ``` ```{r, echo=TRUE, warning=FALSE, fig.cap="Heatmap", out.width = '100%', fig.align= "center"} knitr::include_graphics("./figures/heatmap.png") ``` ```{r, echo=FALSE, warning=FALSE} #It is possible to use different distances and different multivaraite methods. Many different built-in distances can be used. #dist_methods <- unlist(distanceMethodList) #print(dist_methods) ``` \pagebreak ```{r, echo=FALSE, warning=FALSE} library(stringr) #for id in 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66; do #echo "tax_table(ps_abund_rel)[${id},\"Domain\"] <- str_split(tax_table(ps_abund_rel)[${id},\"Domain\"], \"__\")[[1]][2]" #echo "tax_table(ps_abund_rel)[${id},\"Phylum\"] <- str_split(tax_table(ps_abund_rel)[${id},\"Phylum\"], \"__\")[[1]][2]" #echo "tax_table(ps_abund_rel)[${id},\"Class\"] <- str_split(tax_table(ps_abund_rel)[${id},\"Class\"], \"__\")[[1]][2]" #echo "tax_table(ps_abund_rel)[${id},\"Order\"] <- str_split(tax_table(ps_abund_rel)[${id},\"Order\"], \"__\")[[1]][2]" #echo "tax_table(ps_abund_rel)[${id},\"Family\"] <- str_split(tax_table(ps_abund_rel)[${id},\"Family\"], \"__\")[[1]][2]" #echo "tax_table(ps_abund_rel)[${id},\"Genus\"] <- str_split(tax_table(ps_abund_rel)[${id},\"Genus\"], \"__\")[[1]][2]" #echo "tax_table(ps_abund_rel)[${id},\"Species\"] <- str_split(tax_table(ps_abund_rel)[${id},\"Species\"], \"__\")[[1]][2]" #done tax_table(ps_abund_rel)[1,"Domain"] <- str_split(tax_table(ps_abund_rel)[1,"Domain"], "__")[[1]][2] tax_table(ps_abund_rel)[1,"Phylum"] <- str_split(tax_table(ps_abund_rel)[1,"Phylum"], "__")[[1]][2] tax_table(ps_abund_rel)[1,"Class"] <- str_split(tax_table(ps_abund_rel)[1,"Class"], "__")[[1]][2] tax_table(ps_abund_rel)[1,"Order"] <- str_split(tax_table(ps_abund_rel)[1,"Order"], "__")[[1]][2] tax_table(ps_abund_rel)[1,"Family"] <- str_split(tax_table(ps_abund_rel)[1,"Family"], "__")[[1]][2] tax_table(ps_abund_rel)[1,"Genus"] <- str_split(tax_table(ps_abund_rel)[1,"Genus"], "__")[[1]][2] tax_table(ps_abund_rel)[1,"Species"] <- str_split(tax_table(ps_abund_rel)[1,"Species"], "__")[[1]][2] tax_table(ps_abund_rel)[2,"Domain"] <- str_split(tax_table(ps_abund_rel)[2,"Domain"], "__")[[1]][2] tax_table(ps_abund_rel)[2,"Phylum"] <- str_split(tax_table(ps_abund_rel)[2,"Phylum"], "__")[[1]][2] tax_table(ps_abund_rel)[2,"Class"] <- str_split(tax_table(ps_abund_rel)[2,"Class"], "__")[[1]][2] tax_table(ps_abund_rel)[2,"Order"] <- str_split(tax_table(ps_abund_rel)[2,"Order"], "__")[[1]][2] tax_table(ps_abund_rel)[2,"Family"] <- str_split(tax_table(ps_abund_rel)[2,"Family"], "__")[[1]][2] tax_table(ps_abund_rel)[2,"Genus"] <- str_split(tax_table(ps_abund_rel)[2,"Genus"], "__")[[1]][2] tax_table(ps_abund_rel)[2,"Species"] <- str_split(tax_table(ps_abund_rel)[2,"Species"], "__")[[1]][2] tax_table(ps_abund_rel)[3,"Domain"] <- str_split(tax_table(ps_abund_rel)[3,"Domain"], "__")[[1]][2] tax_table(ps_abund_rel)[3,"Phylum"] <- str_split(tax_table(ps_abund_rel)[3,"Phylum"], "__")[[1]][2] tax_table(ps_abund_rel)[3,"Class"] <- str_split(tax_table(ps_abund_rel)[3,"Class"], "__")[[1]][2] tax_table(ps_abund_rel)[3,"Order"] <- str_split(tax_table(ps_abund_rel)[3,"Order"], "__")[[1]][2] tax_table(ps_abund_rel)[3,"Family"] <- str_split(tax_table(ps_abund_rel)[3,"Family"], "__")[[1]][2] tax_table(ps_abund_rel)[3,"Genus"] <- str_split(tax_table(ps_abund_rel)[3,"Genus"], "__")[[1]][2] tax_table(ps_abund_rel)[3,"Species"] <- str_split(tax_table(ps_abund_rel)[3,"Species"], "__")[[1]][2] tax_table(ps_abund_rel)[4,"Domain"] <- str_split(tax_table(ps_abund_rel)[4,"Domain"], "__")[[1]][2] tax_table(ps_abund_rel)[4,"Phylum"] <- str_split(tax_table(ps_abund_rel)[4,"Phylum"], "__")[[1]][2] tax_table(ps_abund_rel)[4,"Class"] <- str_split(tax_table(ps_abund_rel)[4,"Class"], "__")[[1]][2] tax_table(ps_abund_rel)[4,"Order"] <- str_split(tax_table(ps_abund_rel)[4,"Order"], "__")[[1]][2] tax_table(ps_abund_rel)[4,"Family"] <- str_split(tax_table(ps_abund_rel)[4,"Family"], "__")[[1]][2] tax_table(ps_abund_rel)[4,"Genus"] <- str_split(tax_table(ps_abund_rel)[4,"Genus"], "__")[[1]][2] tax_table(ps_abund_rel)[4,"Species"] <- str_split(tax_table(ps_abund_rel)[4,"Species"], "__")[[1]][2] tax_table(ps_abund_rel)[5,"Domain"] <- str_split(tax_table(ps_abund_rel)[5,"Domain"], "__")[[1]][2] tax_table(ps_abund_rel)[5,"Phylum"] <- str_split(tax_table(ps_abund_rel)[5,"Phylum"], "__")[[1]][2] tax_table(ps_abund_rel)[5,"Class"] <- str_split(tax_table(ps_abund_rel)[5,"Class"], "__")[[1]][2] tax_table(ps_abund_rel)[5,"Order"] <- str_split(tax_table(ps_abund_rel)[5,"Order"], "__")[[1]][2] tax_table(ps_abund_rel)[5,"Family"] <- str_split(tax_table(ps_abund_rel)[5,"Family"], "__")[[1]][2] tax_table(ps_abund_rel)[5,"Genus"] <- str_split(tax_table(ps_abund_rel)[5,"Genus"], "__")[[1]][2] tax_table(ps_abund_rel)[5,"Species"] <- str_split(tax_table(ps_abund_rel)[5,"Species"], "__")[[1]][2] tax_table(ps_abund_rel)[6,"Domain"] <- str_split(tax_table(ps_abund_rel)[6,"Domain"], "__")[[1]][2] tax_table(ps_abund_rel)[6,"Phylum"] <- str_split(tax_table(ps_abund_rel)[6,"Phylum"], "__")[[1]][2] tax_table(ps_abund_rel)[6,"Class"] <- str_split(tax_table(ps_abund_rel)[6,"Class"], "__")[[1]][2] tax_table(ps_abund_rel)[6,"Order"] <- str_split(tax_table(ps_abund_rel)[6,"Order"], "__")[[1]][2] tax_table(ps_abund_rel)[6,"Family"] <- str_split(tax_table(ps_abund_rel)[6,"Family"], "__")[[1]][2] tax_table(ps_abund_rel)[6,"Genus"] <- str_split(tax_table(ps_abund_rel)[6,"Genus"], "__")[[1]][2] tax_table(ps_abund_rel)[6,"Species"] <- str_split(tax_table(ps_abund_rel)[6,"Species"], "__")[[1]][2] tax_table(ps_abund_rel)[7,"Domain"] <- str_split(tax_table(ps_abund_rel)[7,"Domain"], "__")[[1]][2] tax_table(ps_abund_rel)[7,"Phylum"] <- str_split(tax_table(ps_abund_rel)[7,"Phylum"], "__")[[1]][2] tax_table(ps_abund_rel)[7,"Class"] <- str_split(tax_table(ps_abund_rel)[7,"Class"], "__")[[1]][2] tax_table(ps_abund_rel)[7,"Order"] <- str_split(tax_table(ps_abund_rel)[7,"Order"], "__")[[1]][2] tax_table(ps_abund_rel)[7,"Family"] <- str_split(tax_table(ps_abund_rel)[7,"Family"], "__")[[1]][2] tax_table(ps_abund_rel)[7,"Genus"] <- str_split(tax_table(ps_abund_rel)[7,"Genus"], "__")[[1]][2] tax_table(ps_abund_rel)[7,"Species"] <- str_split(tax_table(ps_abund_rel)[7,"Species"], "__")[[1]][2] tax_table(ps_abund_rel)[8,"Domain"] <- str_split(tax_table(ps_abund_rel)[8,"Domain"], "__")[[1]][2] tax_table(ps_abund_rel)[8,"Phylum"] <- str_split(tax_table(ps_abund_rel)[8,"Phylum"], "__")[[1]][2] tax_table(ps_abund_rel)[8,"Class"] <- str_split(tax_table(ps_abund_rel)[8,"Class"], "__")[[1]][2] tax_table(ps_abund_rel)[8,"Order"] <- str_split(tax_table(ps_abund_rel)[8,"Order"], "__")[[1]][2] tax_table(ps_abund_rel)[8,"Family"] <- str_split(tax_table(ps_abund_rel)[8,"Family"], "__")[[1]][2] tax_table(ps_abund_rel)[8,"Genus"] <- str_split(tax_table(ps_abund_rel)[8,"Genus"], "__")[[1]][2] tax_table(ps_abund_rel)[8,"Species"] <- str_split(tax_table(ps_abund_rel)[8,"Species"], "__")[[1]][2] tax_table(ps_abund_rel)[9,"Domain"] <- str_split(tax_table(ps_abund_rel)[9,"Domain"], "__")[[1]][2] tax_table(ps_abund_rel)[9,"Phylum"] <- str_split(tax_table(ps_abund_rel)[9,"Phylum"], "__")[[1]][2] tax_table(ps_abund_rel)[9,"Class"] <- str_split(tax_table(ps_abund_rel)[9,"Class"], "__")[[1]][2] tax_table(ps_abund_rel)[9,"Order"] <- str_split(tax_table(ps_abund_rel)[9,"Order"], "__")[[1]][2] tax_table(ps_abund_rel)[9,"Family"] <- str_split(tax_table(ps_abund_rel)[9,"Family"], "__")[[1]][2] tax_table(ps_abund_rel)[9,"Genus"] <- str_split(tax_table(ps_abund_rel)[9,"Genus"], "__")[[1]][2] tax_table(ps_abund_rel)[9,"Species"] <- str_split(tax_table(ps_abund_rel)[9,"Species"], "__")[[1]][2] tax_table(ps_abund_rel)[10,"Domain"] <- str_split(tax_table(ps_abund_rel)[10,"Domain"], "__")[[1]][2] tax_table(ps_abund_rel)[10,"Phylum"] <- str_split(tax_table(ps_abund_rel)[10,"Phylum"], "__")[[1]][2] tax_table(ps_abund_rel)[10,"Class"] <- str_split(tax_table(ps_abund_rel)[10,"Class"], "__")[[1]][2] tax_table(ps_abund_rel)[10,"Order"] <- str_split(tax_table(ps_abund_rel)[10,"Order"], "__")[[1]][2] tax_table(ps_abund_rel)[10,"Family"] <- str_split(tax_table(ps_abund_rel)[10,"Family"], "__")[[1]][2] tax_table(ps_abund_rel)[10,"Genus"] <- str_split(tax_table(ps_abund_rel)[10,"Genus"], "__")[[1]][2] tax_table(ps_abund_rel)[10,"Species"] <- str_split(tax_table(ps_abund_rel)[10,"Species"], "__")[[1]][2] tax_table(ps_abund_rel)[11,"Domain"] <- str_split(tax_table(ps_abund_rel)[11,"Domain"], "__")[[1]][2] tax_table(ps_abund_rel)[11,"Phylum"] <- str_split(tax_table(ps_abund_rel)[11,"Phylum"], "__")[[1]][2] tax_table(ps_abund_rel)[11,"Class"] <- str_split(tax_table(ps_abund_rel)[11,"Class"], "__")[[1]][2] tax_table(ps_abund_rel)[11,"Order"] <- str_split(tax_table(ps_abund_rel)[11,"Order"], "__")[[1]][2] tax_table(ps_abund_rel)[11,"Family"] <- str_split(tax_table(ps_abund_rel)[11,"Family"], "__")[[1]][2] tax_table(ps_abund_rel)[11,"Genus"] <- str_split(tax_table(ps_abund_rel)[11,"Genus"], "__")[[1]][2] tax_table(ps_abund_rel)[11,"Species"] <- str_split(tax_table(ps_abund_rel)[11,"Species"], "__")[[1]][2] tax_table(ps_abund_rel)[12,"Domain"] <- str_split(tax_table(ps_abund_rel)[12,"Domain"], "__")[[1]][2] tax_table(ps_abund_rel)[12,"Phylum"] <- str_split(tax_table(ps_abund_rel)[12,"Phylum"], "__")[[1]][2] tax_table(ps_abund_rel)[12,"Class"] <- str_split(tax_table(ps_abund_rel)[12,"Class"], "__")[[1]][2] tax_table(ps_abund_rel)[12,"Order"] <- str_split(tax_table(ps_abund_rel)[12,"Order"], "__")[[1]][2] tax_table(ps_abund_rel)[12,"Family"] <- str_split(tax_table(ps_abund_rel)[12,"Family"], "__")[[1]][2] tax_table(ps_abund_rel)[12,"Genus"] <- str_split(tax_table(ps_abund_rel)[12,"Genus"], "__")[[1]][2] tax_table(ps_abund_rel)[12,"Species"] <- str_split(tax_table(ps_abund_rel)[12,"Species"], "__")[[1]][2] tax_table(ps_abund_rel)[13,"Domain"] <- str_split(tax_table(ps_abund_rel)[13,"Domain"], "__")[[1]][2] tax_table(ps_abund_rel)[13,"Phylum"] <- str_split(tax_table(ps_abund_rel)[13,"Phylum"], "__")[[1]][2] tax_table(ps_abund_rel)[13,"Class"] <- str_split(tax_table(ps_abund_rel)[13,"Class"], "__")[[1]][2] tax_table(ps_abund_rel)[13,"Order"] <- str_split(tax_table(ps_abund_rel)[13,"Order"], "__")[[1]][2] tax_table(ps_abund_rel)[13,"Family"] <- str_split(tax_table(ps_abund_rel)[13,"Family"], "__")[[1]][2] tax_table(ps_abund_rel)[13,"Genus"] <- str_split(tax_table(ps_abund_rel)[13,"Genus"], "__")[[1]][2] tax_table(ps_abund_rel)[13,"Species"] <- str_split(tax_table(ps_abund_rel)[13,"Species"], "__")[[1]][2] tax_table(ps_abund_rel)[14,"Domain"] <- str_split(tax_table(ps_abund_rel)[14,"Domain"], "__")[[1]][2] tax_table(ps_abund_rel)[14,"Phylum"] <- str_split(tax_table(ps_abund_rel)[14,"Phylum"], "__")[[1]][2] tax_table(ps_abund_rel)[14,"Class"] <- str_split(tax_table(ps_abund_rel)[14,"Class"], "__")[[1]][2] tax_table(ps_abund_rel)[14,"Order"] <- str_split(tax_table(ps_abund_rel)[14,"Order"], "__")[[1]][2] tax_table(ps_abund_rel)[14,"Family"] <- str_split(tax_table(ps_abund_rel)[14,"Family"], "__")[[1]][2] tax_table(ps_abund_rel)[14,"Genus"] <- str_split(tax_table(ps_abund_rel)[14,"Genus"], "__")[[1]][2] tax_table(ps_abund_rel)[14,"Species"] <- str_split(tax_table(ps_abund_rel)[14,"Species"], "__")[[1]][2] tax_table(ps_abund_rel)[15,"Domain"] <- str_split(tax_table(ps_abund_rel)[15,"Domain"], "__")[[1]][2] tax_table(ps_abund_rel)[15,"Phylum"] <- str_split(tax_table(ps_abund_rel)[15,"Phylum"], "__")[[1]][2] tax_table(ps_abund_rel)[15,"Class"] <- str_split(tax_table(ps_abund_rel)[15,"Class"], "__")[[1]][2] tax_table(ps_abund_rel)[15,"Order"] <- str_split(tax_table(ps_abund_rel)[15,"Order"], "__")[[1]][2] tax_table(ps_abund_rel)[15,"Family"] <- str_split(tax_table(ps_abund_rel)[15,"Family"], "__")[[1]][2] tax_table(ps_abund_rel)[15,"Genus"] <- str_split(tax_table(ps_abund_rel)[15,"Genus"], "__")[[1]][2] tax_table(ps_abund_rel)[15,"Species"] <- str_split(tax_table(ps_abund_rel)[15,"Species"], "__")[[1]][2] tax_table(ps_abund_rel)[16,"Domain"] <- str_split(tax_table(ps_abund_rel)[16,"Domain"], "__")[[1]][2] tax_table(ps_abund_rel)[16,"Phylum"] <- str_split(tax_table(ps_abund_rel)[16,"Phylum"], "__")[[1]][2] tax_table(ps_abund_rel)[16,"Class"] <- str_split(tax_table(ps_abund_rel)[16,"Class"], "__")[[1]][2] tax_table(ps_abund_rel)[16,"Order"] <- str_split(tax_table(ps_abund_rel)[16,"Order"], "__")[[1]][2] tax_table(ps_abund_rel)[16,"Family"] <- str_split(tax_table(ps_abund_rel)[16,"Family"], "__")[[1]][2] tax_table(ps_abund_rel)[16,"Genus"] <- str_split(tax_table(ps_abund_rel)[16,"Genus"], "__")[[1]][2] tax_table(ps_abund_rel)[16,"Species"] <- str_split(tax_table(ps_abund_rel)[16,"Species"], "__")[[1]][2] tax_table(ps_abund_rel)[17,"Domain"] <- str_split(tax_table(ps_abund_rel)[17,"Domain"], "__")[[1]][2] tax_table(ps_abund_rel)[17,"Phylum"] <- str_split(tax_table(ps_abund_rel)[17,"Phylum"], "__")[[1]][2] tax_table(ps_abund_rel)[17,"Class"] <- str_split(tax_table(ps_abund_rel)[17,"Class"], "__")[[1]][2] tax_table(ps_abund_rel)[17,"Order"] <- str_split(tax_table(ps_abund_rel)[17,"Order"], "__")[[1]][2] tax_table(ps_abund_rel)[17,"Family"] <- str_split(tax_table(ps_abund_rel)[17,"Family"], "__")[[1]][2] tax_table(ps_abund_rel)[17,"Genus"] <- str_split(tax_table(ps_abund_rel)[17,"Genus"], "__")[[1]][2] tax_table(ps_abund_rel)[17,"Species"] <- str_split(tax_table(ps_abund_rel)[17,"Species"], "__")[[1]][2] tax_table(ps_abund_rel)[18,"Domain"] <- str_split(tax_table(ps_abund_rel)[18,"Domain"], "__")[[1]][2] tax_table(ps_abund_rel)[18,"Phylum"] <- str_split(tax_table(ps_abund_rel)[18,"Phylum"], "__")[[1]][2] tax_table(ps_abund_rel)[18,"Class"] <- str_split(tax_table(ps_abund_rel)[18,"Class"], "__")[[1]][2] tax_table(ps_abund_rel)[18,"Order"] <- str_split(tax_table(ps_abund_rel)[18,"Order"], "__")[[1]][2] tax_table(ps_abund_rel)[18,"Family"] <- str_split(tax_table(ps_abund_rel)[18,"Family"], "__")[[1]][2] tax_table(ps_abund_rel)[18,"Genus"] <- str_split(tax_table(ps_abund_rel)[18,"Genus"], "__")[[1]][2] tax_table(ps_abund_rel)[18,"Species"] <- str_split(tax_table(ps_abund_rel)[18,"Species"], "__")[[1]][2] tax_table(ps_abund_rel)[19,"Domain"] <- str_split(tax_table(ps_abund_rel)[19,"Domain"], "__")[[1]][2] tax_table(ps_abund_rel)[19,"Phylum"] <- str_split(tax_table(ps_abund_rel)[19,"Phylum"], "__")[[1]][2] tax_table(ps_abund_rel)[19,"Class"] <- str_split(tax_table(ps_abund_rel)[19,"Class"], "__")[[1]][2] tax_table(ps_abund_rel)[19,"Order"] <- str_split(tax_table(ps_abund_rel)[19,"Order"], "__")[[1]][2] tax_table(ps_abund_rel)[19,"Family"] <- str_split(tax_table(ps_abund_rel)[19,"Family"], "__")[[1]][2] tax_table(ps_abund_rel)[19,"Genus"] <- str_split(tax_table(ps_abund_rel)[19,"Genus"], "__")[[1]][2] tax_table(ps_abund_rel)[19,"Species"] <- str_split(tax_table(ps_abund_rel)[19,"Species"], "__")[[1]][2] tax_table(ps_abund_rel)[20,"Domain"] <- str_split(tax_table(ps_abund_rel)[20,"Domain"], "__")[[1]][2] tax_table(ps_abund_rel)[20,"Phylum"] <- str_split(tax_table(ps_abund_rel)[20,"Phylum"], "__")[[1]][2] tax_table(ps_abund_rel)[20,"Class"] <- str_split(tax_table(ps_abund_rel)[20,"Class"], "__")[[1]][2] tax_table(ps_abund_rel)[20,"Order"] <- str_split(tax_table(ps_abund_rel)[20,"Order"], "__")[[1]][2] tax_table(ps_abund_rel)[20,"Family"] <- str_split(tax_table(ps_abund_rel)[20,"Family"], "__")[[1]][2] tax_table(ps_abund_rel)[20,"Genus"] <- str_split(tax_table(ps_abund_rel)[20,"Genus"], "__")[[1]][2] tax_table(ps_abund_rel)[20,"Species"] <- str_split(tax_table(ps_abund_rel)[20,"Species"], "__")[[1]][2] tax_table(ps_abund_rel)[21,"Domain"] <- str_split(tax_table(ps_abund_rel)[21,"Domain"], "__")[[1]][2] tax_table(ps_abund_rel)[21,"Phylum"] <- str_split(tax_table(ps_abund_rel)[21,"Phylum"], "__")[[1]][2] tax_table(ps_abund_rel)[21,"Class"] <- str_split(tax_table(ps_abund_rel)[21,"Class"], "__")[[1]][2] tax_table(ps_abund_rel)[21,"Order"] <- str_split(tax_table(ps_abund_rel)[21,"Order"], "__")[[1]][2] tax_table(ps_abund_rel)[21,"Family"] <- str_split(tax_table(ps_abund_rel)[21,"Family"], "__")[[1]][2] tax_table(ps_abund_rel)[21,"Genus"] <- str_split(tax_table(ps_abund_rel)[21,"Genus"], "__")[[1]][2] tax_table(ps_abund_rel)[21,"Species"] <- str_split(tax_table(ps_abund_rel)[21,"Species"], "__")[[1]][2] tax_table(ps_abund_rel)[22,"Domain"] <- str_split(tax_table(ps_abund_rel)[22,"Domain"], "__")[[1]][2] tax_table(ps_abund_rel)[22,"Phylum"] <- str_split(tax_table(ps_abund_rel)[22,"Phylum"], "__")[[1]][2] tax_table(ps_abund_rel)[22,"Class"] <- str_split(tax_table(ps_abund_rel)[22,"Class"], "__")[[1]][2] tax_table(ps_abund_rel)[22,"Order"] <- str_split(tax_table(ps_abund_rel)[22,"Order"], "__")[[1]][2] tax_table(ps_abund_rel)[22,"Family"] <- str_split(tax_table(ps_abund_rel)[22,"Family"], "__")[[1]][2] tax_table(ps_abund_rel)[22,"Genus"] <- str_split(tax_table(ps_abund_rel)[22,"Genus"], "__")[[1]][2] tax_table(ps_abund_rel)[22,"Species"] <- str_split(tax_table(ps_abund_rel)[22,"Species"], "__")[[1]][2] tax_table(ps_abund_rel)[23,"Domain"] <- str_split(tax_table(ps_abund_rel)[23,"Domain"], "__")[[1]][2] tax_table(ps_abund_rel)[23,"Phylum"] <- str_split(tax_table(ps_abund_rel)[23,"Phylum"], "__")[[1]][2] tax_table(ps_abund_rel)[23,"Class"] <- str_split(tax_table(ps_abund_rel)[23,"Class"], "__")[[1]][2] tax_table(ps_abund_rel)[23,"Order"] <- str_split(tax_table(ps_abund_rel)[23,"Order"], "__")[[1]][2] tax_table(ps_abund_rel)[23,"Family"] <- str_split(tax_table(ps_abund_rel)[23,"Family"], "__")[[1]][2] tax_table(ps_abund_rel)[23,"Genus"] <- str_split(tax_table(ps_abund_rel)[23,"Genus"], "__")[[1]][2] tax_table(ps_abund_rel)[23,"Species"] <- str_split(tax_table(ps_abund_rel)[23,"Species"], "__")[[1]][2] tax_table(ps_abund_rel)[24,"Domain"] <- str_split(tax_table(ps_abund_rel)[24,"Domain"], "__")[[1]][2] tax_table(ps_abund_rel)[24,"Phylum"] <- str_split(tax_table(ps_abund_rel)[24,"Phylum"], "__")[[1]][2] tax_table(ps_abund_rel)[24,"Class"] <- str_split(tax_table(ps_abund_rel)[24,"Class"], "__")[[1]][2] tax_table(ps_abund_rel)[24,"Order"] <- str_split(tax_table(ps_abund_rel)[24,"Order"], "__")[[1]][2] tax_table(ps_abund_rel)[24,"Family"] <- str_split(tax_table(ps_abund_rel)[24,"Family"], "__")[[1]][2] tax_table(ps_abund_rel)[24,"Genus"] <- str_split(tax_table(ps_abund_rel)[24,"Genus"], "__")[[1]][2] tax_table(ps_abund_rel)[24,"Species"] <- str_split(tax_table(ps_abund_rel)[24,"Species"], "__")[[1]][2] tax_table(ps_abund_rel)[25,"Domain"] <- str_split(tax_table(ps_abund_rel)[25,"Domain"], "__")[[1]][2] tax_table(ps_abund_rel)[25,"Phylum"] <- str_split(tax_table(ps_abund_rel)[25,"Phylum"], "__")[[1]][2] tax_table(ps_abund_rel)[25,"Class"] <- str_split(tax_table(ps_abund_rel)[25,"Class"], "__")[[1]][2] tax_table(ps_abund_rel)[25,"Order"] <- str_split(tax_table(ps_abund_rel)[25,"Order"], "__")[[1]][2] tax_table(ps_abund_rel)[25,"Family"] <- str_split(tax_table(ps_abund_rel)[25,"Family"], "__")[[1]][2] tax_table(ps_abund_rel)[25,"Genus"] <- str_split(tax_table(ps_abund_rel)[25,"Genus"], "__")[[1]][2] tax_table(ps_abund_rel)[25,"Species"] <- str_split(tax_table(ps_abund_rel)[25,"Species"], "__")[[1]][2] tax_table(ps_abund_rel)[26,"Domain"] <- str_split(tax_table(ps_abund_rel)[26,"Domain"], "__")[[1]][2] tax_table(ps_abund_rel)[26,"Phylum"] <- str_split(tax_table(ps_abund_rel)[26,"Phylum"], "__")[[1]][2] tax_table(ps_abund_rel)[26,"Class"] <- str_split(tax_table(ps_abund_rel)[26,"Class"], "__")[[1]][2] tax_table(ps_abund_rel)[26,"Order"] <- str_split(tax_table(ps_abund_rel)[26,"Order"], "__")[[1]][2] tax_table(ps_abund_rel)[26,"Family"] <- str_split(tax_table(ps_abund_rel)[26,"Family"], "__")[[1]][2] tax_table(ps_abund_rel)[26,"Genus"] <- str_split(tax_table(ps_abund_rel)[26,"Genus"], "__")[[1]][2] tax_table(ps_abund_rel)[26,"Species"] <- str_split(tax_table(ps_abund_rel)[26,"Species"], "__")[[1]][2] tax_table(ps_abund_rel)[27,"Domain"] <- str_split(tax_table(ps_abund_rel)[27,"Domain"], "__")[[1]][2] tax_table(ps_abund_rel)[27,"Phylum"] <- str_split(tax_table(ps_abund_rel)[27,"Phylum"], "__")[[1]][2] tax_table(ps_abund_rel)[27,"Class"] <- str_split(tax_table(ps_abund_rel)[27,"Class"], "__")[[1]][2] tax_table(ps_abund_rel)[27,"Order"] <- str_split(tax_table(ps_abund_rel)[27,"Order"], "__")[[1]][2] tax_table(ps_abund_rel)[27,"Family"] <- str_split(tax_table(ps_abund_rel)[27,"Family"], "__")[[1]][2] tax_table(ps_abund_rel)[27,"Genus"] <- str_split(tax_table(ps_abund_rel)[27,"Genus"], "__")[[1]][2] tax_table(ps_abund_rel)[27,"Species"] <- str_split(tax_table(ps_abund_rel)[27,"Species"], "__")[[1]][2] tax_table(ps_abund_rel)[28,"Domain"] <- str_split(tax_table(ps_abund_rel)[28,"Domain"], "__")[[1]][2] tax_table(ps_abund_rel)[28,"Phylum"] <- str_split(tax_table(ps_abund_rel)[28,"Phylum"], "__")[[1]][2] tax_table(ps_abund_rel)[28,"Class"] <- str_split(tax_table(ps_abund_rel)[28,"Class"], "__")[[1]][2] tax_table(ps_abund_rel)[28,"Order"] <- str_split(tax_table(ps_abund_rel)[28,"Order"], "__")[[1]][2] tax_table(ps_abund_rel)[28,"Family"] <- str_split(tax_table(ps_abund_rel)[28,"Family"], "__")[[1]][2] tax_table(ps_abund_rel)[28,"Genus"] <- str_split(tax_table(ps_abund_rel)[28,"Genus"], "__")[[1]][2] tax_table(ps_abund_rel)[28,"Species"] <- str_split(tax_table(ps_abund_rel)[28,"Species"], "__")[[1]][2] tax_table(ps_abund_rel)[29,"Domain"] <- str_split(tax_table(ps_abund_rel)[29,"Domain"], "__")[[1]][2] tax_table(ps_abund_rel)[29,"Phylum"] <- str_split(tax_table(ps_abund_rel)[29,"Phylum"], "__")[[1]][2] tax_table(ps_abund_rel)[29,"Class"] <- str_split(tax_table(ps_abund_rel)[29,"Class"], "__")[[1]][2] tax_table(ps_abund_rel)[29,"Order"] <- str_split(tax_table(ps_abund_rel)[29,"Order"], "__")[[1]][2] tax_table(ps_abund_rel)[29,"Family"] <- str_split(tax_table(ps_abund_rel)[29,"Family"], "__")[[1]][2] tax_table(ps_abund_rel)[29,"Genus"] <- str_split(tax_table(ps_abund_rel)[29,"Genus"], "__")[[1]][2] tax_table(ps_abund_rel)[29,"Species"] <- str_split(tax_table(ps_abund_rel)[29,"Species"], "__")[[1]][2] tax_table(ps_abund_rel)[30,"Domain"] <- str_split(tax_table(ps_abund_rel)[30,"Domain"], "__")[[1]][2] tax_table(ps_abund_rel)[30,"Phylum"] <- str_split(tax_table(ps_abund_rel)[30,"Phylum"], "__")[[1]][2] tax_table(ps_abund_rel)[30,"Class"] <- str_split(tax_table(ps_abund_rel)[30,"Class"], "__")[[1]][2] tax_table(ps_abund_rel)[30,"Order"] <- str_split(tax_table(ps_abund_rel)[30,"Order"], "__")[[1]][2] tax_table(ps_abund_rel)[30,"Family"] <- str_split(tax_table(ps_abund_rel)[30,"Family"], "__")[[1]][2] tax_table(ps_abund_rel)[30,"Genus"] <- str_split(tax_table(ps_abund_rel)[30,"Genus"], "__")[[1]][2] tax_table(ps_abund_rel)[30,"Species"] <- str_split(tax_table(ps_abund_rel)[30,"Species"], "__")[[1]][2] tax_table(ps_abund_rel)[31,"Domain"] <- str_split(tax_table(ps_abund_rel)[31,"Domain"], "__")[[1]][2] tax_table(ps_abund_rel)[31,"Phylum"] <- str_split(tax_table(ps_abund_rel)[31,"Phylum"], "__")[[1]][2] tax_table(ps_abund_rel)[31,"Class"] <- str_split(tax_table(ps_abund_rel)[31,"Class"], "__")[[1]][2] tax_table(ps_abund_rel)[31,"Order"] <- str_split(tax_table(ps_abund_rel)[31,"Order"], "__")[[1]][2] tax_table(ps_abund_rel)[31,"Family"] <- str_split(tax_table(ps_abund_rel)[31,"Family"], "__")[[1]][2] tax_table(ps_abund_rel)[31,"Genus"] <- str_split(tax_table(ps_abund_rel)[31,"Genus"], "__")[[1]][2] tax_table(ps_abund_rel)[31,"Species"] <- str_split(tax_table(ps_abund_rel)[31,"Species"], "__")[[1]][2] tax_table(ps_abund_rel)[32,"Domain"] <- str_split(tax_table(ps_abund_rel)[32,"Domain"], "__")[[1]][2] tax_table(ps_abund_rel)[32,"Phylum"] <- str_split(tax_table(ps_abund_rel)[32,"Phylum"], "__")[[1]][2] tax_table(ps_abund_rel)[32,"Class"] <- str_split(tax_table(ps_abund_rel)[32,"Class"], "__")[[1]][2] tax_table(ps_abund_rel)[32,"Order"] <- str_split(tax_table(ps_abund_rel)[32,"Order"], "__")[[1]][2] tax_table(ps_abund_rel)[32,"Family"] <- str_split(tax_table(ps_abund_rel)[32,"Family"], "__")[[1]][2] tax_table(ps_abund_rel)[32,"Genus"] <- str_split(tax_table(ps_abund_rel)[32,"Genus"], "__")[[1]][2] tax_table(ps_abund_rel)[32,"Species"] <- str_split(tax_table(ps_abund_rel)[32,"Species"], "__")[[1]][2] tax_table(ps_abund_rel)[33,"Domain"] <- str_split(tax_table(ps_abund_rel)[33,"Domain"], "__")[[1]][2] tax_table(ps_abund_rel)[33,"Phylum"] <- str_split(tax_table(ps_abund_rel)[33,"Phylum"], "__")[[1]][2] tax_table(ps_abund_rel)[33,"Class"] <- str_split(tax_table(ps_abund_rel)[33,"Class"], "__")[[1]][2] tax_table(ps_abund_rel)[33,"Order"] <- str_split(tax_table(ps_abund_rel)[33,"Order"], "__")[[1]][2] tax_table(ps_abund_rel)[33,"Family"] <- str_split(tax_table(ps_abund_rel)[33,"Family"], "__")[[1]][2] tax_table(ps_abund_rel)[33,"Genus"] <- str_split(tax_table(ps_abund_rel)[33,"Genus"], "__")[[1]][2] tax_table(ps_abund_rel)[33,"Species"] <- str_split(tax_table(ps_abund_rel)[33,"Species"], "__")[[1]][2] tax_table(ps_abund_rel)[34,"Domain"] <- str_split(tax_table(ps_abund_rel)[34,"Domain"], "__")[[1]][2] tax_table(ps_abund_rel)[34,"Phylum"] <- str_split(tax_table(ps_abund_rel)[34,"Phylum"], "__")[[1]][2] tax_table(ps_abund_rel)[34,"Class"] <- str_split(tax_table(ps_abund_rel)[34,"Class"], "__")[[1]][2] tax_table(ps_abund_rel)[34,"Order"] <- str_split(tax_table(ps_abund_rel)[34,"Order"], "__")[[1]][2] tax_table(ps_abund_rel)[34,"Family"] <- str_split(tax_table(ps_abund_rel)[34,"Family"], "__")[[1]][2] tax_table(ps_abund_rel)[34,"Genus"] <- str_split(tax_table(ps_abund_rel)[34,"Genus"], "__")[[1]][2] tax_table(ps_abund_rel)[34,"Species"] <- str_split(tax_table(ps_abund_rel)[34,"Species"], "__")[[1]][2] tax_table(ps_abund_rel)[35,"Domain"] <- str_split(tax_table(ps_abund_rel)[35,"Domain"], "__")[[1]][2] tax_table(ps_abund_rel)[35,"Phylum"] <- str_split(tax_table(ps_abund_rel)[35,"Phylum"], "__")[[1]][2] tax_table(ps_abund_rel)[35,"Class"] <- str_split(tax_table(ps_abund_rel)[35,"Class"], "__")[[1]][2] tax_table(ps_abund_rel)[35,"Order"] <- str_split(tax_table(ps_abund_rel)[35,"Order"], "__")[[1]][2] tax_table(ps_abund_rel)[35,"Family"] <- str_split(tax_table(ps_abund_rel)[35,"Family"], "__")[[1]][2] tax_table(ps_abund_rel)[35,"Genus"] <- str_split(tax_table(ps_abund_rel)[35,"Genus"], "__")[[1]][2] tax_table(ps_abund_rel)[35,"Species"] <- str_split(tax_table(ps_abund_rel)[35,"Species"], "__")[[1]][2] tax_table(ps_abund_rel)[36,"Domain"] <- str_split(tax_table(ps_abund_rel)[36,"Domain"], "__")[[1]][2] tax_table(ps_abund_rel)[36,"Phylum"] <- str_split(tax_table(ps_abund_rel)[36,"Phylum"], "__")[[1]][2] tax_table(ps_abund_rel)[36,"Class"] <- str_split(tax_table(ps_abund_rel)[36,"Class"], "__")[[1]][2] tax_table(ps_abund_rel)[36,"Order"] <- str_split(tax_table(ps_abund_rel)[36,"Order"], "__")[[1]][2] tax_table(ps_abund_rel)[36,"Family"] <- str_split(tax_table(ps_abund_rel)[36,"Family"], "__")[[1]][2] tax_table(ps_abund_rel)[36,"Genus"] <- str_split(tax_table(ps_abund_rel)[36,"Genus"], "__")[[1]][2] tax_table(ps_abund_rel)[36,"Species"] <- str_split(tax_table(ps_abund_rel)[36,"Species"], "__")[[1]][2] tax_table(ps_abund_rel)[37,"Domain"] <- str_split(tax_table(ps_abund_rel)[37,"Domain"], "__")[[1]][2] tax_table(ps_abund_rel)[37,"Phylum"] <- str_split(tax_table(ps_abund_rel)[37,"Phylum"], "__")[[1]][2] tax_table(ps_abund_rel)[37,"Class"] <- str_split(tax_table(ps_abund_rel)[37,"Class"], "__")[[1]][2] tax_table(ps_abund_rel)[37,"Order"] <- str_split(tax_table(ps_abund_rel)[37,"Order"], "__")[[1]][2] tax_table(ps_abund_rel)[37,"Family"] <- str_split(tax_table(ps_abund_rel)[37,"Family"], "__")[[1]][2] tax_table(ps_abund_rel)[37,"Genus"] <- str_split(tax_table(ps_abund_rel)[37,"Genus"], "__")[[1]][2] tax_table(ps_abund_rel)[37,"Species"] <- str_split(tax_table(ps_abund_rel)[37,"Species"], "__")[[1]][2] tax_table(ps_abund_rel)[38,"Domain"] <- str_split(tax_table(ps_abund_rel)[38,"Domain"], "__")[[1]][2] tax_table(ps_abund_rel)[38,"Phylum"] <- str_split(tax_table(ps_abund_rel)[38,"Phylum"], "__")[[1]][2] tax_table(ps_abund_rel)[38,"Class"] <- str_split(tax_table(ps_abund_rel)[38,"Class"], "__")[[1]][2] tax_table(ps_abund_rel)[38,"Order"] <- str_split(tax_table(ps_abund_rel)[38,"Order"], "__")[[1]][2] tax_table(ps_abund_rel)[38,"Family"] <- str_split(tax_table(ps_abund_rel)[38,"Family"], "__")[[1]][2] tax_table(ps_abund_rel)[38,"Genus"] <- str_split(tax_table(ps_abund_rel)[38,"Genus"], "__")[[1]][2] tax_table(ps_abund_rel)[38,"Species"] <- str_split(tax_table(ps_abund_rel)[38,"Species"], "__")[[1]][2] tax_table(ps_abund_rel)[39,"Domain"] <- str_split(tax_table(ps_abund_rel)[39,"Domain"], "__")[[1]][2] tax_table(ps_abund_rel)[39,"Phylum"] <- str_split(tax_table(ps_abund_rel)[39,"Phylum"], "__")[[1]][2] tax_table(ps_abund_rel)[39,"Class"] <- str_split(tax_table(ps_abund_rel)[39,"Class"], "__")[[1]][2] tax_table(ps_abund_rel)[39,"Order"] <- str_split(tax_table(ps_abund_rel)[39,"Order"], "__")[[1]][2] tax_table(ps_abund_rel)[39,"Family"] <- str_split(tax_table(ps_abund_rel)[39,"Family"], "__")[[1]][2] tax_table(ps_abund_rel)[39,"Genus"] <- str_split(tax_table(ps_abund_rel)[39,"Genus"], "__")[[1]][2] tax_table(ps_abund_rel)[39,"Species"] <- str_split(tax_table(ps_abund_rel)[39,"Species"], "__")[[1]][2] tax_table(ps_abund_rel)[40,"Domain"] <- str_split(tax_table(ps_abund_rel)[40,"Domain"], "__")[[1]][2] tax_table(ps_abund_rel)[40,"Phylum"] <- str_split(tax_table(ps_abund_rel)[40,"Phylum"], "__")[[1]][2] tax_table(ps_abund_rel)[40,"Class"] <- str_split(tax_table(ps_abund_rel)[40,"Class"], "__")[[1]][2] tax_table(ps_abund_rel)[40,"Order"] <- str_split(tax_table(ps_abund_rel)[40,"Order"], "__")[[1]][2] tax_table(ps_abund_rel)[40,"Family"] <- str_split(tax_table(ps_abund_rel)[40,"Family"], "__")[[1]][2] tax_table(ps_abund_rel)[40,"Genus"] <- str_split(tax_table(ps_abund_rel)[40,"Genus"], "__")[[1]][2] tax_table(ps_abund_rel)[40,"Species"] <- str_split(tax_table(ps_abund_rel)[40,"Species"], "__")[[1]][2] tax_table(ps_abund_rel)[41,"Domain"] <- str_split(tax_table(ps_abund_rel)[41,"Domain"], "__")[[1]][2] tax_table(ps_abund_rel)[41,"Phylum"] <- str_split(tax_table(ps_abund_rel)[41,"Phylum"], "__")[[1]][2] tax_table(ps_abund_rel)[41,"Class"] <- str_split(tax_table(ps_abund_rel)[41,"Class"], "__")[[1]][2] tax_table(ps_abund_rel)[41,"Order"] <- str_split(tax_table(ps_abund_rel)[41,"Order"], "__")[[1]][2] tax_table(ps_abund_rel)[41,"Family"] <- str_split(tax_table(ps_abund_rel)[41,"Family"], "__")[[1]][2] tax_table(ps_abund_rel)[41,"Genus"] <- str_split(tax_table(ps_abund_rel)[41,"Genus"], "__")[[1]][2] tax_table(ps_abund_rel)[41,"Species"] <- str_split(tax_table(ps_abund_rel)[41,"Species"], "__")[[1]][2] tax_table(ps_abund_rel)[42,"Domain"] <- str_split(tax_table(ps_abund_rel)[42,"Domain"], "__")[[1]][2] tax_table(ps_abund_rel)[42,"Phylum"] <- str_split(tax_table(ps_abund_rel)[42,"Phylum"], "__")[[1]][2] tax_table(ps_abund_rel)[42,"Class"] <- str_split(tax_table(ps_abund_rel)[42,"Class"], "__")[[1]][2] tax_table(ps_abund_rel)[42,"Order"] <- str_split(tax_table(ps_abund_rel)[42,"Order"], "__")[[1]][2] tax_table(ps_abund_rel)[42,"Family"] <- str_split(tax_table(ps_abund_rel)[42,"Family"], "__")[[1]][2] tax_table(ps_abund_rel)[42,"Genus"] <- str_split(tax_table(ps_abund_rel)[42,"Genus"], "__")[[1]][2] tax_table(ps_abund_rel)[42,"Species"] <- str_split(tax_table(ps_abund_rel)[42,"Species"], "__")[[1]][2] tax_table(ps_abund_rel)[43,"Domain"] <- str_split(tax_table(ps_abund_rel)[43,"Domain"], "__")[[1]][2] tax_table(ps_abund_rel)[43,"Phylum"] <- str_split(tax_table(ps_abund_rel)[43,"Phylum"], "__")[[1]][2] tax_table(ps_abund_rel)[43,"Class"] <- str_split(tax_table(ps_abund_rel)[43,"Class"], "__")[[1]][2] tax_table(ps_abund_rel)[43,"Order"] <- str_split(tax_table(ps_abund_rel)[43,"Order"], "__")[[1]][2] tax_table(ps_abund_rel)[43,"Family"] <- str_split(tax_table(ps_abund_rel)[43,"Family"], "__")[[1]][2] tax_table(ps_abund_rel)[43,"Genus"] <- str_split(tax_table(ps_abund_rel)[43,"Genus"], "__")[[1]][2] tax_table(ps_abund_rel)[43,"Species"] <- str_split(tax_table(ps_abund_rel)[43,"Species"], "__")[[1]][2] tax_table(ps_abund_rel)[44,"Domain"] <- str_split(tax_table(ps_abund_rel)[44,"Domain"], "__")[[1]][2] tax_table(ps_abund_rel)[44,"Phylum"] <- str_split(tax_table(ps_abund_rel)[44,"Phylum"], "__")[[1]][2] tax_table(ps_abund_rel)[44,"Class"] <- str_split(tax_table(ps_abund_rel)[44,"Class"], "__")[[1]][2] tax_table(ps_abund_rel)[44,"Order"] <- str_split(tax_table(ps_abund_rel)[44,"Order"], "__")[[1]][2] tax_table(ps_abund_rel)[44,"Family"] <- str_split(tax_table(ps_abund_rel)[44,"Family"], "__")[[1]][2] tax_table(ps_abund_rel)[44,"Genus"] <- str_split(tax_table(ps_abund_rel)[44,"Genus"], "__")[[1]][2] tax_table(ps_abund_rel)[44,"Species"] <- str_split(tax_table(ps_abund_rel)[44,"Species"], "__")[[1]][2] tax_table(ps_abund_rel)[45,"Domain"] <- str_split(tax_table(ps_abund_rel)[45,"Domain"], "__")[[1]][2] tax_table(ps_abund_rel)[45,"Phylum"] <- str_split(tax_table(ps_abund_rel)[45,"Phylum"], "__")[[1]][2] tax_table(ps_abund_rel)[45,"Class"] <- str_split(tax_table(ps_abund_rel)[45,"Class"], "__")[[1]][2] tax_table(ps_abund_rel)[45,"Order"] <- str_split(tax_table(ps_abund_rel)[45,"Order"], "__")[[1]][2] tax_table(ps_abund_rel)[45,"Family"] <- str_split(tax_table(ps_abund_rel)[45,"Family"], "__")[[1]][2] tax_table(ps_abund_rel)[45,"Genus"] <- str_split(tax_table(ps_abund_rel)[45,"Genus"], "__")[[1]][2] tax_table(ps_abund_rel)[45,"Species"] <- str_split(tax_table(ps_abund_rel)[45,"Species"], "__")[[1]][2] tax_table(ps_abund_rel)[46,"Domain"] <- str_split(tax_table(ps_abund_rel)[46,"Domain"], "__")[[1]][2] tax_table(ps_abund_rel)[46,"Phylum"] <- str_split(tax_table(ps_abund_rel)[46,"Phylum"], "__")[[1]][2] tax_table(ps_abund_rel)[46,"Class"] <- str_split(tax_table(ps_abund_rel)[46,"Class"], "__")[[1]][2] tax_table(ps_abund_rel)[46,"Order"] <- str_split(tax_table(ps_abund_rel)[46,"Order"], "__")[[1]][2] tax_table(ps_abund_rel)[46,"Family"] <- str_split(tax_table(ps_abund_rel)[46,"Family"], "__")[[1]][2] tax_table(ps_abund_rel)[46,"Genus"] <- str_split(tax_table(ps_abund_rel)[46,"Genus"], "__")[[1]][2] tax_table(ps_abund_rel)[46,"Species"] <- str_split(tax_table(ps_abund_rel)[46,"Species"], "__")[[1]][2] tax_table(ps_abund_rel)[47,"Domain"] <- str_split(tax_table(ps_abund_rel)[47,"Domain"], "__")[[1]][2] tax_table(ps_abund_rel)[47,"Phylum"] <- str_split(tax_table(ps_abund_rel)[47,"Phylum"], "__")[[1]][2] tax_table(ps_abund_rel)[47,"Class"] <- str_split(tax_table(ps_abund_rel)[47,"Class"], "__")[[1]][2] tax_table(ps_abund_rel)[47,"Order"] <- str_split(tax_table(ps_abund_rel)[47,"Order"], "__")[[1]][2] tax_table(ps_abund_rel)[47,"Family"] <- str_split(tax_table(ps_abund_rel)[47,"Family"], "__")[[1]][2] tax_table(ps_abund_rel)[47,"Genus"] <- str_split(tax_table(ps_abund_rel)[47,"Genus"], "__")[[1]][2] tax_table(ps_abund_rel)[47,"Species"] <- str_split(tax_table(ps_abund_rel)[47,"Species"], "__")[[1]][2] tax_table(ps_abund_rel)[48,"Domain"] <- str_split(tax_table(ps_abund_rel)[48,"Domain"], "__")[[1]][2] tax_table(ps_abund_rel)[48,"Phylum"] <- str_split(tax_table(ps_abund_rel)[48,"Phylum"], "__")[[1]][2] tax_table(ps_abund_rel)[48,"Class"] <- str_split(tax_table(ps_abund_rel)[48,"Class"], "__")[[1]][2] tax_table(ps_abund_rel)[48,"Order"] <- str_split(tax_table(ps_abund_rel)[48,"Order"], "__")[[1]][2] tax_table(ps_abund_rel)[48,"Family"] <- str_split(tax_table(ps_abund_rel)[48,"Family"], "__")[[1]][2] tax_table(ps_abund_rel)[48,"Genus"] <- str_split(tax_table(ps_abund_rel)[48,"Genus"], "__")[[1]][2] tax_table(ps_abund_rel)[48,"Species"] <- str_split(tax_table(ps_abund_rel)[48,"Species"], "__")[[1]][2] tax_table(ps_abund_rel)[49,"Domain"] <- str_split(tax_table(ps_abund_rel)[49,"Domain"], "__")[[1]][2] tax_table(ps_abund_rel)[49,"Phylum"] <- str_split(tax_table(ps_abund_rel)[49,"Phylum"], "__")[[1]][2] tax_table(ps_abund_rel)[49,"Class"] <- str_split(tax_table(ps_abund_rel)[49,"Class"], "__")[[1]][2] tax_table(ps_abund_rel)[49,"Order"] <- str_split(tax_table(ps_abund_rel)[49,"Order"], "__")[[1]][2] tax_table(ps_abund_rel)[49,"Family"] <- str_split(tax_table(ps_abund_rel)[49,"Family"], "__")[[1]][2] tax_table(ps_abund_rel)[49,"Genus"] <- str_split(tax_table(ps_abund_rel)[49,"Genus"], "__")[[1]][2] tax_table(ps_abund_rel)[49,"Species"] <- str_split(tax_table(ps_abund_rel)[49,"Species"], "__")[[1]][2] tax_table(ps_abund_rel)[50,"Domain"] <- str_split(tax_table(ps_abund_rel)[50,"Domain"], "__")[[1]][2] tax_table(ps_abund_rel)[50,"Phylum"] <- str_split(tax_table(ps_abund_rel)[50,"Phylum"], "__")[[1]][2] tax_table(ps_abund_rel)[50,"Class"] <- str_split(tax_table(ps_abund_rel)[50,"Class"], "__")[[1]][2] tax_table(ps_abund_rel)[50,"Order"] <- str_split(tax_table(ps_abund_rel)[50,"Order"], "__")[[1]][2] tax_table(ps_abund_rel)[50,"Family"] <- str_split(tax_table(ps_abund_rel)[50,"Family"], "__")[[1]][2] tax_table(ps_abund_rel)[50,"Genus"] <- str_split(tax_table(ps_abund_rel)[50,"Genus"], "__")[[1]][2] tax_table(ps_abund_rel)[50,"Species"] <- str_split(tax_table(ps_abund_rel)[50,"Species"], "__")[[1]][2] tax_table(ps_abund_rel)[51,"Domain"] <- str_split(tax_table(ps_abund_rel)[51,"Domain"], "__")[[1]][2] tax_table(ps_abund_rel)[51,"Phylum"] <- str_split(tax_table(ps_abund_rel)[51,"Phylum"], "__")[[1]][2] tax_table(ps_abund_rel)[51,"Class"] <- str_split(tax_table(ps_abund_rel)[51,"Class"], "__")[[1]][2] tax_table(ps_abund_rel)[51,"Order"] <- str_split(tax_table(ps_abund_rel)[51,"Order"], "__")[[1]][2] tax_table(ps_abund_rel)[51,"Family"] <- str_split(tax_table(ps_abund_rel)[51,"Family"], "__")[[1]][2] tax_table(ps_abund_rel)[51,"Genus"] <- str_split(tax_table(ps_abund_rel)[51,"Genus"], "__")[[1]][2] tax_table(ps_abund_rel)[51,"Species"] <- str_split(tax_table(ps_abund_rel)[51,"Species"], "__")[[1]][2] tax_table(ps_abund_rel)[52,"Domain"] <- str_split(tax_table(ps_abund_rel)[52,"Domain"], "__")[[1]][2] tax_table(ps_abund_rel)[52,"Phylum"] <- str_split(tax_table(ps_abund_rel)[52,"Phylum"], "__")[[1]][2] tax_table(ps_abund_rel)[52,"Class"] <- str_split(tax_table(ps_abund_rel)[52,"Class"], "__")[[1]][2] tax_table(ps_abund_rel)[52,"Order"] <- str_split(tax_table(ps_abund_rel)[52,"Order"], "__")[[1]][2] tax_table(ps_abund_rel)[52,"Family"] <- str_split(tax_table(ps_abund_rel)[52,"Family"], "__")[[1]][2] tax_table(ps_abund_rel)[52,"Genus"] <- str_split(tax_table(ps_abund_rel)[52,"Genus"], "__")[[1]][2] tax_table(ps_abund_rel)[52,"Species"] <- str_split(tax_table(ps_abund_rel)[52,"Species"], "__")[[1]][2] tax_table(ps_abund_rel)[53,"Domain"] <- str_split(tax_table(ps_abund_rel)[53,"Domain"], "__")[[1]][2] tax_table(ps_abund_rel)[53,"Phylum"] <- str_split(tax_table(ps_abund_rel)[53,"Phylum"], "__")[[1]][2] tax_table(ps_abund_rel)[53,"Class"] <- str_split(tax_table(ps_abund_rel)[53,"Class"], "__")[[1]][2] tax_table(ps_abund_rel)[53,"Order"] <- str_split(tax_table(ps_abund_rel)[53,"Order"], "__")[[1]][2] tax_table(ps_abund_rel)[53,"Family"] <- str_split(tax_table(ps_abund_rel)[53,"Family"], "__")[[1]][2] tax_table(ps_abund_rel)[53,"Genus"] <- str_split(tax_table(ps_abund_rel)[53,"Genus"], "__")[[1]][2] tax_table(ps_abund_rel)[53,"Species"] <- str_split(tax_table(ps_abund_rel)[53,"Species"], "__")[[1]][2] tax_table(ps_abund_rel)[54,"Domain"] <- str_split(tax_table(ps_abund_rel)[54,"Domain"], "__")[[1]][2] tax_table(ps_abund_rel)[54,"Phylum"] <- str_split(tax_table(ps_abund_rel)[54,"Phylum"], "__")[[1]][2] tax_table(ps_abund_rel)[54,"Class"] <- str_split(tax_table(ps_abund_rel)[54,"Class"], "__")[[1]][2] tax_table(ps_abund_rel)[54,"Order"] <- str_split(tax_table(ps_abund_rel)[54,"Order"], "__")[[1]][2] tax_table(ps_abund_rel)[54,"Family"] <- str_split(tax_table(ps_abund_rel)[54,"Family"], "__")[[1]][2] tax_table(ps_abund_rel)[54,"Genus"] <- str_split(tax_table(ps_abund_rel)[54,"Genus"], "__")[[1]][2] tax_table(ps_abund_rel)[54,"Species"] <- str_split(tax_table(ps_abund_rel)[54,"Species"], "__")[[1]][2] tax_table(ps_abund_rel)[55,"Domain"] <- str_split(tax_table(ps_abund_rel)[55,"Domain"], "__")[[1]][2] tax_table(ps_abund_rel)[55,"Phylum"] <- str_split(tax_table(ps_abund_rel)[55,"Phylum"], "__")[[1]][2] tax_table(ps_abund_rel)[55,"Class"] <- str_split(tax_table(ps_abund_rel)[55,"Class"], "__")[[1]][2] tax_table(ps_abund_rel)[55,"Order"] <- str_split(tax_table(ps_abund_rel)[55,"Order"], "__")[[1]][2] tax_table(ps_abund_rel)[55,"Family"] <- str_split(tax_table(ps_abund_rel)[55,"Family"], "__")[[1]][2] tax_table(ps_abund_rel)[55,"Genus"] <- str_split(tax_table(ps_abund_rel)[55,"Genus"], "__")[[1]][2] tax_table(ps_abund_rel)[55,"Species"] <- str_split(tax_table(ps_abund_rel)[55,"Species"], "__")[[1]][2] tax_table(ps_abund_rel)[56,"Domain"] <- str_split(tax_table(ps_abund_rel)[56,"Domain"], "__")[[1]][2] tax_table(ps_abund_rel)[56,"Phylum"] <- str_split(tax_table(ps_abund_rel)[56,"Phylum"], "__")[[1]][2] tax_table(ps_abund_rel)[56,"Class"] <- str_split(tax_table(ps_abund_rel)[56,"Class"], "__")[[1]][2] tax_table(ps_abund_rel)[56,"Order"] <- str_split(tax_table(ps_abund_rel)[56,"Order"], "__")[[1]][2] tax_table(ps_abund_rel)[56,"Family"] <- str_split(tax_table(ps_abund_rel)[56,"Family"], "__")[[1]][2] tax_table(ps_abund_rel)[56,"Genus"] <- str_split(tax_table(ps_abund_rel)[56,"Genus"], "__")[[1]][2] tax_table(ps_abund_rel)[56,"Species"] <- str_split(tax_table(ps_abund_rel)[56,"Species"], "__")[[1]][2] tax_table(ps_abund_rel)[57,"Domain"] <- str_split(tax_table(ps_abund_rel)[57,"Domain"], "__")[[1]][2] tax_table(ps_abund_rel)[57,"Phylum"] <- str_split(tax_table(ps_abund_rel)[57,"Phylum"], "__")[[1]][2] tax_table(ps_abund_rel)[57,"Class"] <- str_split(tax_table(ps_abund_rel)[57,"Class"], "__")[[1]][2] tax_table(ps_abund_rel)[57,"Order"] <- str_split(tax_table(ps_abund_rel)[57,"Order"], "__")[[1]][2] tax_table(ps_abund_rel)[57,"Family"] <- str_split(tax_table(ps_abund_rel)[57,"Family"], "__")[[1]][2] tax_table(ps_abund_rel)[57,"Genus"] <- str_split(tax_table(ps_abund_rel)[57,"Genus"], "__")[[1]][2] tax_table(ps_abund_rel)[57,"Species"] <- str_split(tax_table(ps_abund_rel)[57,"Species"], "__")[[1]][2] tax_table(ps_abund_rel)[58,"Domain"] <- str_split(tax_table(ps_abund_rel)[58,"Domain"], "__")[[1]][2] tax_table(ps_abund_rel)[58,"Phylum"] <- str_split(tax_table(ps_abund_rel)[58,"Phylum"], "__")[[1]][2] tax_table(ps_abund_rel)[58,"Class"] <- str_split(tax_table(ps_abund_rel)[58,"Class"], "__")[[1]][2] tax_table(ps_abund_rel)[58,"Order"] <- str_split(tax_table(ps_abund_rel)[58,"Order"], "__")[[1]][2] tax_table(ps_abund_rel)[58,"Family"] <- str_split(tax_table(ps_abund_rel)[58,"Family"], "__")[[1]][2] tax_table(ps_abund_rel)[58,"Genus"] <- str_split(tax_table(ps_abund_rel)[58,"Genus"], "__")[[1]][2] tax_table(ps_abund_rel)[58,"Species"] <- str_split(tax_table(ps_abund_rel)[58,"Species"], "__")[[1]][2] tax_table(ps_abund_rel)[59,"Domain"] <- str_split(tax_table(ps_abund_rel)[59,"Domain"], "__")[[1]][2] tax_table(ps_abund_rel)[59,"Phylum"] <- str_split(tax_table(ps_abund_rel)[59,"Phylum"], "__")[[1]][2] tax_table(ps_abund_rel)[59,"Class"] <- str_split(tax_table(ps_abund_rel)[59,"Class"], "__")[[1]][2] tax_table(ps_abund_rel)[59,"Order"] <- str_split(tax_table(ps_abund_rel)[59,"Order"], "__")[[1]][2] tax_table(ps_abund_rel)[59,"Family"] <- str_split(tax_table(ps_abund_rel)[59,"Family"], "__")[[1]][2] tax_table(ps_abund_rel)[59,"Genus"] <- str_split(tax_table(ps_abund_rel)[59,"Genus"], "__")[[1]][2] tax_table(ps_abund_rel)[59,"Species"] <- str_split(tax_table(ps_abund_rel)[59,"Species"], "__")[[1]][2] tax_table(ps_abund_rel)[60,"Domain"] <- str_split(tax_table(ps_abund_rel)[60,"Domain"], "__")[[1]][2] tax_table(ps_abund_rel)[60,"Phylum"] <- str_split(tax_table(ps_abund_rel)[60,"Phylum"], "__")[[1]][2] tax_table(ps_abund_rel)[60,"Class"] <- str_split(tax_table(ps_abund_rel)[60,"Class"], "__")[[1]][2] tax_table(ps_abund_rel)[60,"Order"] <- str_split(tax_table(ps_abund_rel)[60,"Order"], "__")[[1]][2] tax_table(ps_abund_rel)[60,"Family"] <- str_split(tax_table(ps_abund_rel)[60,"Family"], "__")[[1]][2] tax_table(ps_abund_rel)[60,"Genus"] <- str_split(tax_table(ps_abund_rel)[60,"Genus"], "__")[[1]][2] tax_table(ps_abund_rel)[60,"Species"] <- str_split(tax_table(ps_abund_rel)[60,"Species"], "__")[[1]][2] tax_table(ps_abund_rel)[61,"Domain"] <- str_split(tax_table(ps_abund_rel)[61,"Domain"], "__")[[1]][2] tax_table(ps_abund_rel)[61,"Phylum"] <- str_split(tax_table(ps_abund_rel)[61,"Phylum"], "__")[[1]][2] tax_table(ps_abund_rel)[61,"Class"] <- str_split(tax_table(ps_abund_rel)[61,"Class"], "__")[[1]][2] tax_table(ps_abund_rel)[61,"Order"] <- str_split(tax_table(ps_abund_rel)[61,"Order"], "__")[[1]][2] tax_table(ps_abund_rel)[61,"Family"] <- str_split(tax_table(ps_abund_rel)[61,"Family"], "__")[[1]][2] tax_table(ps_abund_rel)[61,"Genus"] <- str_split(tax_table(ps_abund_rel)[61,"Genus"], "__")[[1]][2] tax_table(ps_abund_rel)[61,"Species"] <- str_split(tax_table(ps_abund_rel)[61,"Species"], "__")[[1]][2] tax_table(ps_abund_rel)[62,"Domain"] <- str_split(tax_table(ps_abund_rel)[62,"Domain"], "__")[[1]][2] tax_table(ps_abund_rel)[62,"Phylum"] <- str_split(tax_table(ps_abund_rel)[62,"Phylum"], "__")[[1]][2] tax_table(ps_abund_rel)[62,"Class"] <- str_split(tax_table(ps_abund_rel)[62,"Class"], "__")[[1]][2] tax_table(ps_abund_rel)[62,"Order"] <- str_split(tax_table(ps_abund_rel)[62,"Order"], "__")[[1]][2] tax_table(ps_abund_rel)[62,"Family"] <- str_split(tax_table(ps_abund_rel)[62,"Family"], "__")[[1]][2] tax_table(ps_abund_rel)[62,"Genus"] <- str_split(tax_table(ps_abund_rel)[62,"Genus"], "__")[[1]][2] tax_table(ps_abund_rel)[62,"Species"] <- str_split(tax_table(ps_abund_rel)[62,"Species"], "__")[[1]][2] tax_table(ps_abund_rel)[63,"Domain"] <- str_split(tax_table(ps_abund_rel)[63,"Domain"], "__")[[1]][2] tax_table(ps_abund_rel)[63,"Phylum"] <- str_split(tax_table(ps_abund_rel)[63,"Phylum"], "__")[[1]][2] tax_table(ps_abund_rel)[63,"Class"] <- str_split(tax_table(ps_abund_rel)[63,"Class"], "__")[[1]][2] tax_table(ps_abund_rel)[63,"Order"] <- str_split(tax_table(ps_abund_rel)[63,"Order"], "__")[[1]][2] tax_table(ps_abund_rel)[63,"Family"] <- str_split(tax_table(ps_abund_rel)[63,"Family"], "__")[[1]][2] tax_table(ps_abund_rel)[63,"Genus"] <- str_split(tax_table(ps_abund_rel)[63,"Genus"], "__")[[1]][2] tax_table(ps_abund_rel)[63,"Species"] <- str_split(tax_table(ps_abund_rel)[63,"Species"], "__")[[1]][2] tax_table(ps_abund_rel)[64,"Domain"] <- str_split(tax_table(ps_abund_rel)[64,"Domain"], "__")[[1]][2] tax_table(ps_abund_rel)[64,"Phylum"] <- str_split(tax_table(ps_abund_rel)[64,"Phylum"], "__")[[1]][2] tax_table(ps_abund_rel)[64,"Class"] <- str_split(tax_table(ps_abund_rel)[64,"Class"], "__")[[1]][2] tax_table(ps_abund_rel)[64,"Order"] <- str_split(tax_table(ps_abund_rel)[64,"Order"], "__")[[1]][2] tax_table(ps_abund_rel)[64,"Family"] <- str_split(tax_table(ps_abund_rel)[64,"Family"], "__")[[1]][2] tax_table(ps_abund_rel)[64,"Genus"] <- str_split(tax_table(ps_abund_rel)[64,"Genus"], "__")[[1]][2] tax_table(ps_abund_rel)[64,"Species"] <- str_split(tax_table(ps_abund_rel)[64,"Species"], "__")[[1]][2] tax_table(ps_abund_rel)[65,"Domain"] <- str_split(tax_table(ps_abund_rel)[65,"Domain"], "__")[[1]][2] tax_table(ps_abund_rel)[65,"Phylum"] <- str_split(tax_table(ps_abund_rel)[65,"Phylum"], "__")[[1]][2] tax_table(ps_abund_rel)[65,"Class"] <- str_split(tax_table(ps_abund_rel)[65,"Class"], "__")[[1]][2] tax_table(ps_abund_rel)[65,"Order"] <- str_split(tax_table(ps_abund_rel)[65,"Order"], "__")[[1]][2] tax_table(ps_abund_rel)[65,"Family"] <- str_split(tax_table(ps_abund_rel)[65,"Family"], "__")[[1]][2] tax_table(ps_abund_rel)[65,"Genus"] <- str_split(tax_table(ps_abund_rel)[65,"Genus"], "__")[[1]][2] tax_table(ps_abund_rel)[65,"Species"] <- str_split(tax_table(ps_abund_rel)[65,"Species"], "__")[[1]][2] tax_table(ps_abund_rel)[66,"Domain"] <- str_split(tax_table(ps_abund_rel)[66,"Domain"], "__")[[1]][2] tax_table(ps_abund_rel)[66,"Phylum"] <- str_split(tax_table(ps_abund_rel)[66,"Phylum"], "__")[[1]][2] tax_table(ps_abund_rel)[66,"Class"] <- str_split(tax_table(ps_abund_rel)[66,"Class"], "__")[[1]][2] tax_table(ps_abund_rel)[66,"Order"] <- str_split(tax_table(ps_abund_rel)[66,"Order"], "__")[[1]][2] tax_table(ps_abund_rel)[66,"Family"] <- str_split(tax_table(ps_abund_rel)[66,"Family"], "__")[[1]][2] tax_table(ps_abund_rel)[66,"Genus"] <- str_split(tax_table(ps_abund_rel)[66,"Genus"], "__")[[1]][2] tax_table(ps_abund_rel)[66,"Species"] <- str_split(tax_table(ps_abund_rel)[66,"Species"], "__")[[1]][2] tax_table(ps_abund_rel)[67,"Domain"] <- str_split(tax_table(ps_abund_rel)[67,"Domain"], "__")[[1]][2] tax_table(ps_abund_rel)[67,"Phylum"] <- str_split(tax_table(ps_abund_rel)[67,"Phylum"], "__")[[1]][2] tax_table(ps_abund_rel)[67,"Class"] <- str_split(tax_table(ps_abund_rel)[67,"Class"], "__")[[1]][2] tax_table(ps_abund_rel)[67,"Order"] <- str_split(tax_table(ps_abund_rel)[67,"Order"], "__")[[1]][2] tax_table(ps_abund_rel)[67,"Family"] <- str_split(tax_table(ps_abund_rel)[67,"Family"], "__")[[1]][2] tax_table(ps_abund_rel)[67,"Genus"] <- str_split(tax_table(ps_abund_rel)[67,"Genus"], "__")[[1]][2] tax_table(ps_abund_rel)[67,"Species"] <- str_split(tax_table(ps_abund_rel)[67,"Species"], "__")[[1]][2] tax_table(ps_abund_rel)[68,"Domain"] <- str_split(tax_table(ps_abund_rel)[68,"Domain"], "__")[[1]][2] tax_table(ps_abund_rel)[68,"Phylum"] <- str_split(tax_table(ps_abund_rel)[68,"Phylum"], "__")[[1]][2] tax_table(ps_abund_rel)[68,"Class"] <- str_split(tax_table(ps_abund_rel)[68,"Class"], "__")[[1]][2] tax_table(ps_abund_rel)[68,"Order"] <- str_split(tax_table(ps_abund_rel)[68,"Order"], "__")[[1]][2] tax_table(ps_abund_rel)[68,"Family"] <- str_split(tax_table(ps_abund_rel)[68,"Family"], "__")[[1]][2] tax_table(ps_abund_rel)[68,"Genus"] <- str_split(tax_table(ps_abund_rel)[68,"Genus"], "__")[[1]][2] tax_table(ps_abund_rel)[68,"Species"] <- str_split(tax_table(ps_abund_rel)[68,"Species"], "__")[[1]][2] tax_table(ps_abund_rel)[69,"Domain"] <- str_split(tax_table(ps_abund_rel)[69,"Domain"], "__")[[1]][2] tax_table(ps_abund_rel)[69,"Phylum"] <- str_split(tax_table(ps_abund_rel)[69,"Phylum"], "__")[[1]][2] tax_table(ps_abund_rel)[69,"Class"] <- str_split(tax_table(ps_abund_rel)[69,"Class"], "__")[[1]][2] tax_table(ps_abund_rel)[69,"Order"] <- str_split(tax_table(ps_abund_rel)[69,"Order"], "__")[[1]][2] tax_table(ps_abund_rel)[69,"Family"] <- str_split(tax_table(ps_abund_rel)[69,"Family"], "__")[[1]][2] tax_table(ps_abund_rel)[69,"Genus"] <- str_split(tax_table(ps_abund_rel)[69,"Genus"], "__")[[1]][2] tax_table(ps_abund_rel)[69,"Species"] <- str_split(tax_table(ps_abund_rel)[69,"Species"], "__")[[1]][2] tax_table(ps_abund_rel)[70,"Domain"] <- str_split(tax_table(ps_abund_rel)[70,"Domain"], "__")[[1]][2] tax_table(ps_abund_rel)[70,"Phylum"] <- str_split(tax_table(ps_abund_rel)[70,"Phylum"], "__")[[1]][2] tax_table(ps_abund_rel)[70,"Class"] <- str_split(tax_table(ps_abund_rel)[70,"Class"], "__")[[1]][2] tax_table(ps_abund_rel)[70,"Order"] <- str_split(tax_table(ps_abund_rel)[70,"Order"], "__")[[1]][2] tax_table(ps_abund_rel)[70,"Family"] <- str_split(tax_table(ps_abund_rel)[70,"Family"], "__")[[1]][2] tax_table(ps_abund_rel)[70,"Genus"] <- str_split(tax_table(ps_abund_rel)[70,"Genus"], "__")[[1]][2] tax_table(ps_abund_rel)[70,"Species"] <- str_split(tax_table(ps_abund_rel)[70,"Species"], "__")[[1]][2] ``` # Taxonomic summary at the Phylum Level (16S) ## Bar plots in A* and H* samples ```{r, echo=TRUE, warning=FALSE} # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! TODO_TOMORROW_FROM_HERE !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! ## ---- Phylum ---- #aes(color="Phylum", fill="Phylum") --> aes() #ggplot(data=data, aes(x=Sample, y=Abundance, fill=Phylum)) #DEBUG: ps_subset_2_type <- merge_samples(ps_subset_2, "Type") #DEBUG: ps_subset_2_type_ = transform_sample_counts(ps_subset_2_type, function(x) x / sum(x)) #DEBUG: ps_subset_2_copied <- data.table::copy(ps_subset_2) #plot_bar(ps_abund_relSampleType_, fill = "Phylum") + geom_bar(aes(color=Phylum, fill=Phylum), stat="identity", position="stack") #plot_bar(ps_subset_2_type_, fill="Phylum") + geom_bar(aes(), stat="identity", position="stack") + #scale_fill_manual(values = c("darkblue", "darkgoldenrod1", "darkseagreen", "darkorchid", "darkolivegreen1", "lightskyblue", "darkgreen", "deeppink", "khaki2", "firebrick", "brown1", "darkorange1", "cyan1", "royalblue4", "darksalmon", "darkblue","royalblue4", "dodgerblue3", "steelblue1", "lightskyblue", "darkseagreen", "darkgoldenrod1", "darkseagreen", "darkorchid", "darkolivegreen1", "brown1", "darkorange1", "cyan1", "darkgrey")) + theme(axis.text = element_text(size = theme.size, colour="black")) # Assuming you have a vector of sample IDs that you are interested in. samples_to_keep <- c("A1.1","A1.2","A1.3","A2.1","A2.2","A2.3","A3.1","A3.2","A3.3","A4.1","A4.2","A4.3","A5.1","A5.2","A5.3","A10.1","A10.2","A10.3","A12.1","A12.2","A12.3","A17.1","A17.2","A17.3","A20.1","A20.2","A20.3","A21.1","A21.2","A21.3","A22.1","A22.2","A22.3","A24.1","A24.2","A24.3","A25.1","A25.2","A25.3","A27.1","A27.2","A27.3","A28.1","A28.2","A28.3") # Subsetting the phyloseq object. ps_subset <- subset_samples(ps_abund_rel, rownames(sample_data(ps_abund_rel)) %in% samples_to_keep) # Convert to a data frame for manipulation with dplyr and ggplot ps_data <- psmelt(ps_subset) # Assuming "SampleID" is the column with your sample names ps_data$Sample <- factor(ps_data$Sample, levels = samples_to_keep) # Now plot using ggplot, with fill determined by "Phylum" ggplot(ps_data, aes(x = Sample, y = Abundance)) + geom_bar(aes(fill = Phylum), stat = "identity") + scale_fill_manual(values = c("darkblue", "darkgoldenrod1", "darkseagreen", "darkorchid", "darkolivegreen1", "lightskyblue", "darkgreen", "deeppink", "khaki2", "firebrick", "brown1", "darkorange1", "cyan1", "royalblue4", "darksalmon", "darkblue","royalblue4", "dodgerblue3", "steelblue1", "lightskyblue", "darkseagreen", "darkgoldenrod1", "darkseagreen", "darkorchid", "darkolivegreen1", "brown1", "darkorange1", "cyan1", "darkgrey")) + theme(axis.text.x = element_text(size = 10, angle = 60, hjust = 1), axis.text.y = element_text(size = 9, colour="black")) + theme(legend.position="bottom") + guides(fill=guide_legend(nrow=2)) samples_to_keep <- c("H11.1","H11.2","H11.3","H13.1","H13.2","H13.3","H14.1","H14.2","H14.3","H16.1","H16.2","H16.3","H19.1","H19.2","H19.3","H24.1","H24.2","H24.3","H53.1","H53.2","H53.3","H56.1","H56.2","H56.3","H57.1","H57.2","H57.3","H59.1","H59.2","H59.3","H60.1","H60.2","H60.3","H64.1","H64.2","H64.3","H65.1","H65.2","H65.3","H67.1","H67.2","H67.3","H72.1","H72.2","H72.3","H73.1","H73.2","H73.3","H76.1","H76.2","H76.3","H78.1","H78.2","H78.3","H91.1","H91.2","H91.3","H94.1","H94.2","H94.3","H99.1","H99.2","H99.3") # Subsetting the phyloseq object. ps_subset <- subset_samples(ps_abund_rel, rownames(sample_data(ps_abund_rel)) %in% samples_to_keep) # Convert to a data frame for manipulation with dplyr and ggplot ps_data <- psmelt(ps_subset) # Assuming "SampleID" is the column with your sample names ps_data$Sample <- factor(ps_data$Sample, levels = samples_to_keep) # Now plot using ggplot, with fill determined by "Phylum" ggplot(ps_data, aes(x = Sample, y = Abundance)) + geom_bar(aes(fill = Phylum), stat = "identity") + scale_fill_manual(values = c("darkblue", "darkgoldenrod1", "darkseagreen", "darkorchid", "darkolivegreen1", "lightskyblue", "darkgreen", "deeppink", "khaki2", "firebrick", "brown1", "darkorange1", "cyan1", "royalblue4", "darksalmon", "darkblue","royalblue4", "dodgerblue3", "steelblue1", "lightskyblue", "darkseagreen", "darkgoldenrod1", "darkseagreen", "darkorchid", "darkolivegreen1", "brown1", "darkorange1", "cyan1", "darkgrey")) + theme(axis.text.x = element_text(size = 10, angle = 75, hjust = 1), axis.text.y = element_text(size = 9, colour="black")) + theme(legend.position="bottom") + guides(fill=guide_legend(nrow=2)) ``` Regroup together A* vs H* samples and normalize number of reads in each group using median sequencing depth. ```{r, echo=TRUE, warning=FALSE} samples_to_keep <- c("A1.1","A1.2","A1.3","A2.1","A2.2","A2.3","A3.1","A3.2","A3.3","A4.1","A4.2","A4.3","A5.1","A5.2","A5.3","A10.1","A10.2","A10.3","A12.1","A12.2","A12.3","A17.1","A17.2","A17.3","A20.1","A20.2","A20.3","A21.1","A21.2","A21.3","A22.1","A22.2","A22.3","A24.1","A24.2","A24.3","A25.1","A25.2","A25.3","A27.1","A27.2","A27.3","A28.1","A28.2","A28.3", "H11.1","H11.2","H11.3","H13.1","H13.2","H13.3","H14.1","H14.2","H14.3","H16.1","H16.2","H16.3","H19.1","H19.2","H19.3","H24.1","H24.2","H24.3","H53.1","H53.2","H53.3","H56.1","H56.2","H56.3","H57.1","H57.2","H57.3","H59.1","H59.2","H59.3","H60.1","H60.2","H60.3","H64.1","H64.2","H64.3","H65.1","H65.2","H65.3","H67.1","H67.2","H67.3","H72.1","H72.2","H72.3","H73.1","H73.2","H73.3","H76.1","H76.2","H76.3","H78.1","H78.2","H78.3","H91.1","H91.2","H91.3","H94.1","H94.2","H94.3","H99.1","H99.2","H99.3") ps_subset <- subset_samples(ps_abund_rel, rownames(sample_data(ps_abund_rel)) %in% samples_to_keep) ps_subset_type <- merge_samples(ps_subset, "PatientType") ps_subset_type_ = transform_sample_counts(ps_subset_type, function(x) x / sum(x)) #plot_bar(ps_abund_relSampleType_, fill = "Phylum") + geom_bar(aes(color=Phylum, fill=Phylum), stat="identity", position="stack") plot_bar(ps_subset_type_, fill="Phylum") + geom_bar(aes(), stat="identity", position="stack") + scale_fill_manual(values = c("darkblue", "darkgoldenrod1", "darkseagreen", "darkorchid", "darkolivegreen1", "lightskyblue", "darkgreen", "deeppink", "khaki2", "firebrick", "brown1", "darkorange1", "cyan1", "royalblue4", "darksalmon", "darkblue","royalblue4", "dodgerblue3", "steelblue1", "lightskyblue", "darkseagreen", "darkgoldenrod1", "darkseagreen", "darkorchid", "darkolivegreen1", "brown1", "darkorange1", "cyan1", "darkgrey")) + theme(axis.text.x = element_text(size = 10, angle = 15, hjust = 1), axis.text.y = element_text(size = 9, colour="black")) + theme(legend.position="bottom") + guides(fill=guide_legend(nrow=2)) ps_subset_type <- merge_samples(ps_subset, "SampleType") ps_subset_type_ = transform_sample_counts(ps_subset_type, function(x) x / sum(x)) #plot_bar(ps_abund_relSampleType_, fill = "Phylum") + geom_bar(aes(color=Phylum, fill=Phylum), stat="identity", position="stack") plot_bar(ps_subset_type_, fill="Phylum") + geom_bar(aes(), stat="identity", position="stack") + scale_fill_manual(values = c("darkblue", "darkgoldenrod1", "darkseagreen", "darkorchid", "darkolivegreen1", "lightskyblue", "darkgreen", "deeppink", "khaki2", "firebrick", "brown1", "darkorange1", "cyan1", "royalblue4", "darksalmon", "darkblue","royalblue4", "dodgerblue3", "steelblue1", "lightskyblue", "darkseagreen", "darkgoldenrod1", "darkseagreen", "darkorchid", "darkolivegreen1", "brown1", "darkorange1", "cyan1", "darkgrey")) + theme(axis.text.x = element_text(size = 10, angle = 15, hjust = 1), axis.text.y = element_text(size = 9, colour="black")) + theme(legend.position="bottom") + guides(fill=guide_legend(nrow=2)) + scale_x_discrete(limits = c("admission", "surgery", "discharge")) ``` # Taxonomic summary at the Class Level (16S) ## Bar plots in A* and H* samples ```{r, echo=TRUE, warning=FALSE} #aes(color="Class", fill="Class") --> aes() #ggplot(data=data, aes(x=Sample, y=Abundance, fill=Class)) #DEBUG: ps_subset_2_type <- merge_samples(ps_subset_2, "Type") #DEBUG: ps_subset_2_type_ = transform_sample_counts(ps_subset_2_type, function(x) x / sum(x)) #DEBUG: ps_subset_2_copied <- data.table::copy(ps_subset_2) #plot_bar(ps_abund_relSampleType_, fill = "Class") + geom_bar(aes(color=Class, fill=Class), stat="identity", position="stack") #plot_bar(ps_subset_2_type_, fill="Class") + geom_bar(aes(), stat="identity", position="stack") + #scale_fill_manual(values = c("darkblue", "darkgoldenrod1", "darkseagreen", "darkorchid", "darkolivegreen1", "lightskyblue", "darkgreen", "deeppink", "khaki2", "firebrick", "brown1", "darkorange1", "cyan1", "royalblue4", "darksalmon", "darkblue","royalblue4", "dodgerblue3", "steelblue1", "lightskyblue", "darkseagreen", "darkgoldenrod1", "darkseagreen", "darkorchid", "darkolivegreen1", "brown1", "darkorange1", "cyan1", "darkgrey")) + theme(axis.text = element_text(size = theme.size, colour="black")) # Assuming you have a vector of sample IDs that you are interested in. samples_to_keep <- c("A1.1","A1.2","A1.3","A2.1","A2.2","A2.3","A3.1","A3.2","A3.3","A4.1","A4.2","A4.3","A5.1","A5.2","A5.3","A10.1","A10.2","A10.3","A12.1","A12.2","A12.3","A17.1","A17.2","A17.3","A20.1","A20.2","A20.3","A21.1","A21.2","A21.3","A22.1","A22.2","A22.3","A24.1","A24.2","A24.3","A25.1","A25.2","A25.3","A27.1","A27.2","A27.3","A28.1","A28.2","A28.3") # Subsetting the phyloseq object. ps_subset <- subset_samples(ps_abund_rel, rownames(sample_data(ps_abund_rel)) %in% samples_to_keep) # Convert to a data frame for manipulation with dplyr and ggplot ps_data <- psmelt(ps_subset) # Assuming "SampleID" is the column with your sample names ps_data$Sample <- factor(ps_data$Sample, levels = samples_to_keep) # Now plot using ggplot, with fill determined by "Class" ggplot(ps_data, aes(x = Sample, y = Abundance)) + geom_bar(aes(fill = Class), stat = "identity") + scale_fill_manual(values = c("darkblue", "darkgoldenrod1", "darkseagreen", "darkorchid", "darkolivegreen1", "lightskyblue", "darkgreen", "deeppink", "khaki2", "firebrick", "brown1", "darkorange1", "cyan1", "royalblue4", "darksalmon", "darkblue","royalblue4", "dodgerblue3", "steelblue1", "lightskyblue", "darkseagreen", "darkgoldenrod1", "darkseagreen", "darkorchid", "darkolivegreen1", "brown1", "darkorange1", "cyan1", "darkgrey")) + theme(axis.text.x = element_text(size = 10, angle = 60, hjust = 1), axis.text.y = element_text(size = 9, colour="black")) + theme(legend.position="bottom") + guides(fill=guide_legend(nrow=3)) samples_to_keep <- c("H11.1","H11.2","H11.3","H13.1","H13.2","H13.3","H14.1","H14.2","H14.3","H16.1","H16.2","H16.3","H19.1","H19.2","H19.3","H24.1","H24.2","H24.3","H53.1","H53.2","H53.3","H56.1","H56.2","H56.3","H57.1","H57.2","H57.3","H59.1","H59.2","H59.3","H60.1","H60.2","H60.3","H64.1","H64.2","H64.3","H65.1","H65.2","H65.3","H67.1","H67.2","H67.3","H72.1","H72.2","H72.3","H73.1","H73.2","H73.3","H76.1","H76.2","H76.3","H78.1","H78.2","H78.3","H91.1","H91.2","H91.3","H94.1","H94.2","H94.3","H99.1","H99.2","H99.3") # Subsetting the phyloseq object. ps_subset <- subset_samples(ps_abund_rel, rownames(sample_data(ps_abund_rel)) %in% samples_to_keep) # Convert to a data frame for manipulation with dplyr and ggplot ps_data <- psmelt(ps_subset) # Assuming "SampleID" is the column with your sample names ps_data$Sample <- factor(ps_data$Sample, levels = samples_to_keep) # Now plot using ggplot, with fill determined by "Class" ggplot(ps_data, aes(x = Sample, y = Abundance)) + geom_bar(aes(fill = Class), stat = "identity") + scale_fill_manual(values = c("darkblue", "darkgoldenrod1", "darkseagreen", "darkorchid", "darkolivegreen1", "lightskyblue", "darkgreen", "deeppink", "khaki2", "firebrick", "brown1", "darkorange1", "cyan1", "royalblue4", "darksalmon", "darkblue","royalblue4", "dodgerblue3", "steelblue1", "lightskyblue", "darkseagreen", "darkgoldenrod1", "darkseagreen", "darkorchid", "darkolivegreen1", "brown1", "darkorange1", "cyan1", "darkgrey")) + theme(axis.text.x = element_text(size = 10, angle = 75, hjust = 1), axis.text.y = element_text(size = 9, colour="black")) + theme(legend.position="bottom") + guides(fill=guide_legend(nrow=3)) ``` Regroup together A* vs H* samples and normalize number of reads in each group using median sequencing depth. ```{r, echo=TRUE, warning=FALSE} samples_to_keep <- c("A1.1","A1.2","A1.3","A2.1","A2.2","A2.3","A3.1","A3.2","A3.3","A4.1","A4.2","A4.3","A5.1","A5.2","A5.3","A10.1","A10.2","A10.3","A12.1","A12.2","A12.3","A17.1","A17.2","A17.3","A20.1","A20.2","A20.3","A21.1","A21.2","A21.3","A22.1","A22.2","A22.3","A24.1","A24.2","A24.3","A25.1","A25.2","A25.3","A27.1","A27.2","A27.3","A28.1","A28.2","A28.3","H11.1","H11.2","H11.3","H13.1","H13.2","H13.3","H14.1","H14.2","H14.3","H16.1","H16.2","H16.3","H19.1","H19.2","H19.3","H24.1","H24.2","H24.3","H53.1","H53.2","H53.3","H56.1","H56.2","H56.3","H57.1","H57.2","H57.3","H59.1","H59.2","H59.3","H60.1","H60.2","H60.3","H64.1","H64.2","H64.3","H65.1","H65.2","H65.3","H67.1","H67.2","H67.3","H72.1","H72.2","H72.3","H73.1","H73.2","H73.3","H76.1","H76.2","H76.3","H78.1","H78.2","H78.3","H91.1","H91.2","H91.3","H94.1","H94.2","H94.3","H99.1","H99.2","H99.3") ps_subset <- subset_samples(ps_abund_rel, rownames(sample_data(ps_abund_rel)) %in% samples_to_keep) ps_subset_type <- merge_samples(ps_subset, "PatientType") ps_subset_type_ = transform_sample_counts(ps_subset_type, function(x) x / sum(x)) #plot_bar(ps_abund_relSampleType_, fill = "Class") + geom_bar(aes(color=Class, fill=Class), stat="identity", position="stack") plot_bar(ps_subset_type_, fill="Class") + geom_bar(aes(), stat="identity", position="stack") + scale_fill_manual(values = c("darkblue", "darkgoldenrod1", "darkseagreen", "darkorchid", "darkolivegreen1", "lightskyblue", "darkgreen", "deeppink", "khaki2", "firebrick", "brown1", "darkorange1", "cyan1", "royalblue4", "darksalmon", "darkblue","royalblue4", "dodgerblue3", "steelblue1", "lightskyblue", "darkseagreen", "darkgoldenrod1", "darkseagreen", "darkorchid", "darkolivegreen1", "brown1", "darkorange1", "cyan1", "darkgrey")) + theme(axis.text.x = element_text(size = 10, angle = 15, hjust = 1), axis.text.y = element_text(size = 9, colour="black")) + theme(legend.position="bottom") + guides(fill=guide_legend(nrow=3)) # In the followin code, I generated a plot, in which x axis is "admission", "discharge", "surgery". I want to change the order to "admission", "surgery" and "discharge". ps_subset_type <- merge_samples(ps_subset, "SampleType") ps_subset_type_ = transform_sample_counts(ps_subset_type, function(x) x / sum(x)) #plot_bar(ps_abund_relSampleType_, fill = "Class") + geom_bar(aes(color=Class, fill=Class), stat="identity", position="stack") plot_bar(ps_subset_type_, fill="Class") + geom_bar(aes(), stat="identity", position="stack") + scale_fill_manual(values = c("darkblue", "darkgoldenrod1", "darkseagreen", "darkorchid", "darkolivegreen1", "lightskyblue", "darkgreen", "deeppink", "khaki2", "firebrick", "brown1", "darkorange1", "cyan1", "royalblue4", "darksalmon", "darkblue","royalblue4", "dodgerblue3", "steelblue1", "lightskyblue", "darkseagreen", "darkgoldenrod1", "darkseagreen", "darkorchid", "darkolivegreen1", "brown1", "darkorange1", "cyan1", "darkgrey")) + theme(axis.text.x = element_text(size = 10, angle = 15, hjust = 1), axis.text.y = element_text(size = 9, colour="black")) + theme(legend.position="bottom") + guides(fill=guide_legend(nrow=3)) + scale_x_discrete(limits = c("admission", "surgery", "discharge")) ``` \pagebreak # Alpha diversity (16S) ```{r, echo=FALSE, warning=FALSE} #TEXT_FROM_MAINPAGE# Plot Chao1 richness estimator, Observed OTUs, Shannon index, and Phylogenetic diversity. #Regroup together samples from the same group. # using rarefied data #FITTING2: CONSOLE: #gunzip table_even4753.biom.gz #alpha_diversity.py -i table_even9893.biom --metrics chao1,observed_otus,shannon,PD_whole_tree -o adiv_even.txt -t ../clustering/rep_set.tre #In this specific block of code (8.3 and 8.4), we are only comparing two groups (Aneurysm vs. Hypophysis). # Because there is only one single comparison, there is no "multiple testing" problem to correct for. # Mathematically, the raw p-value (p) and the adjusted p-value (p.adj) are exactly the same. # Therefore, the ggpubr::compare_means() function is smart enough to realize this and omits the p.adj column entirely to keep the output clean. It only shows the raw p value. # p.format is only rounded values of p.adj! ``` ## Alpha diversity among timepoints in aneurysm samples (Paired analysis) ```{r, echo=FALSE, warning=FALSE} #TEXT_FROM_MAINPAGE# Friedman test for overall longitudinal comparison, and paired t-test / paired Wilcoxon for post-hoc. hmp.div_qiime <- read.csv("adiv_even_A.txt", sep="\t") colnames(hmp.div_qiime) <- c("sam_name", "chao1", "observed_otus", "shannon", "PD_whole_tree") div.df <- merge(hmp.div_qiime, hmp.meta, by = "sam_name") # Keep PatientID for paired statistical tests div.df2 <- div.df[, c("sam_name", "PatientID", "SampleType", "shannon")] colnames(div.df2) <- c("Sample", "PatientID", "Group", "Shannon") # Set the chronological order of the timepoints div.df2$Group <- factor(div.df2$Group, levels = c("admission", "surgery", "discharge")) ``` To evaluate temporal changes in microbial alpha diversity (Shannon index) within the **aneurysm cohort** across three clinical timepoints (admission, surgery, and discharge), a Friedman test was employed. The Friedman test is a non-parametric alternative to one-way repeated-measures ANOVA, specifically designed for comparing three or more paired or matched groups. ```{r, echo=FALSE, warning=FALSE} # Note: Using rstatix::friedman_test to explicitly account for PatientID pairing stat.test.Shannon <- div.df2 %>% friedman_test(Shannon ~ Group | PatientID) knitr::kable(stat.test.Shannon, digits = 3) %>% kable_styling(bootstrap_options = c("striped", "hover", "condensed", "responsive")) ``` To identify specific temporal shifts between individual timepoints, post-hoc pairwise comparisons were conducted using paired t-tests. ```{r, echo=FALSE, warning=FALSE} pw_t_paired <- div.df2 %>% pairwise_t_test(Shannon ~ Group, paired = TRUE, p.adjust.method = "BH") %>% # Overwrite the p.adj.signif column with your custom rules mutate( p.adj.signif = case_when( p.adj < 0.001 ~ "***", p.adj < 0.01 ~ "**", p.adj < 0.05 ~ "*", TRUE ~ "ns" ) ) knitr::kable(pw_t_paired, digits = 3) %>% kable_styling(bootstrap_options = c("striped", "hover", "condensed", "responsive")) ``` Robustness check: Paired Wilcoxon signed-rank test (non-parametric alternative) ```{r, echo=FALSE, warning=FALSE} pw_wilcox_paired <- div.df2 %>% pairwise_wilcox_test(Shannon ~ Group, paired = TRUE, p.adjust.method = "BH") %>% # Overwrite the p.adj.signif column with your custom rules mutate( p.adj.signif = case_when( p.adj < 0.001 ~ "***", p.adj < 0.01 ~ "**", p.adj < 0.05 ~ "*", TRUE ~ "ns" ) ) knitr::kable(pw_wilcox_paired, digits = 3) %>% kable_styling(bootstrap_options = c("striped", "hover", "condensed", "responsive")) ``` Plotting (displaying paired p-values from paired t-tests) ```{r, echo=FALSE, warning=FALSE} #Why p.adjust.method = "BH" does not work! #1. At first AI suggests me use p.adjust.method = "BH", because if not setting, it default use unadjusted values: #Unadjusted vs. Adjusted P-values (The main culprit): #Your table displays the Benjamini-Hochberg (BH) adjusted p-values (p.adj), where the smallest value is 0.054 (which correctly maps to ns since it is > 0.05). However, stat_compare_means() in ggpubr defaults to plotting the unadjusted raw p-values. For the Wilcoxon test, your raw p-value is 0.018. Because 0.018 < 0.05, the plot automatically draws a * (significant), completely ignoring the multiple-testing correction that was applied in your table. #2. But I use p.adjust.method = "BH", it still does not work! #The reason this is happening is due to a known limitation in ggpubr: stat_compare_means() completely ignores the p.adjust.method argument when you provide a list of comparisons. When you supply comparisons, ggpubr falls back to the ggsignif package under the hood, which calculates each bracket independently using raw, unadjusted p-values. Because your raw p-value is 0.018 (which is < 0.05), it will always draw a *, regardless of what you put in p.adjust.method. #3. AI suggested solution: Use geom_bracket() --> But it has also failed! p <- ggboxplot(div.df2, x = "Group", y = "Shannon", fill = "Group", palette = c("admission" = "#D55E00", "surgery" = "#E69F00", "discharge" = "#F0E442"), width = 0.5, legend = "none") + theme(axis.text.x = element_text(angle = 30, hjust = 1)) + labs(title = "Shannon Diversity: Aneurysm Samples (Paired timepoints)") + stat_compare_means(comparisons = list(c("admission", "surgery"), c("surgery", "discharge"), c("admission", "discharge")), method = "t.test", paired = TRUE, p.adjust.method = "BH", #It does not work in the plot. label = "p.signif", size = 4) print(p) ggsave("./figures/alpha_diversity_Shannon_SampleType_A_paired_ttest.png", device="png", height = 6, width = 7) ggsave("./figures/alpha_diversity_Shannon_SampleType_A_paired_ttest.svg", device="svg", height = 6, width = 7) ``` ## Alpha diversity among timepoints in hypophysis samples (Paired analysis) ```{r, echo=FALSE, warning=FALSE} #TEXT_FROM_MAINPAGE# Friedman test for overall longitudinal comparison, and paired t-test / paired Wilcoxon for post-hoc. hmp.div_qiime <- read.csv("adiv_even_H.txt", sep="\t") colnames(hmp.div_qiime) <- c("sam_name", "chao1", "observed_otus", "shannon", "PD_whole_tree") div.df <- merge(hmp.div_qiime, hmp.meta, by = "sam_name") # Keep PatientID for paired statistical tests div.df2 <- div.df[, c("sam_name", "PatientID", "SampleType", "shannon")] colnames(div.df2) <- c("Sample", "PatientID", "Group", "Shannon") # Set the chronological order of the timepoints div.df2$Group <- factor(div.df2$Group, levels = c("admission", "surgery", "discharge")) ``` To evaluate temporal changes in microbial alpha diversity (Shannon index) within the **hypophysis cohort** across three clinical timepoints (admission, surgery, and discharge), a Friedman test was employed. The Friedman test is a non-parametric alternative to one-way repeated-measures ANOVA, specifically designed for comparing three or more paired or matched groups. ```{r, echo=FALSE, warning=FALSE} # Note: Using rstatix::friedman_test to explicitly account for PatientID pairing stat.test.Shannon <- div.df2 %>% friedman_test(Shannon ~ Group | PatientID) knitr::kable(stat.test.Shannon, digits = 3) %>% kable_styling(bootstrap_options = c("striped", "hover", "condensed", "responsive")) ``` To identify specific temporal shifts between individual timepoints, post-hoc pairwise comparisons were conducted using paired t-tests. ```{r, echo=FALSE, warning=FALSE} pw_t_paired <- div.df2 %>% pairwise_t_test(Shannon ~ Group, paired = TRUE, p.adjust.method = "BH") %>% # Overwrite the p.adj.signif column with your custom rules mutate( p.adj.signif = case_when( p.adj < 0.001 ~ "***", p.adj < 0.01 ~ "**", p.adj < 0.05 ~ "*", TRUE ~ "ns" ) ) knitr::kable(pw_t_paired, digits = 3) %>% kable_styling(bootstrap_options = c("striped", "hover", "condensed", "responsive")) ``` Robustness check: Paired Wilcoxon signed-rank test (non-parametric alternative) ```{r, echo=FALSE, warning=FALSE} pw_wilcox_paired <- div.df2 %>% pairwise_wilcox_test(Shannon ~ Group, paired = TRUE, p.adjust.method = "BH") %>% # Overwrite the p.adj.signif column with your custom rules mutate( p.adj.signif = case_when( p.adj < 0.001 ~ "***", p.adj < 0.01 ~ "**", p.adj < 0.05 ~ "*", TRUE ~ "ns" ) ) knitr::kable(pw_wilcox_paired, digits = 3) %>% kable_styling(bootstrap_options = c("striped", "hover", "condensed", "responsive")) ``` Plotting (displaying paired p-values from paired t-tests) ```{r, echo=FALSE, warning=FALSE} p <- ggboxplot(div.df2, x = "Group", y = "Shannon", fill = "Group", palette = c("admission" = "#0072B2", "surgery" = "#56B4E9", "discharge" = "#999999"), width = 0.5, legend = "none") + theme(axis.text.x = element_text(angle = 30, hjust = 1)) + labs(title = "Shannon Diversity: Hypophysis Samples (Paired timepoints)") + stat_compare_means(comparisons = list(c("admission", "surgery"), c("surgery", "discharge"), c("admission", "discharge")), method = "t.test", paired = TRUE, p.adjust.method = "BH", #It does not work in the plot. label = "p.signif", size = 4) print(p) ggsave("./figures/alpha_diversity_Shannon_SampleType_H_paired_ttest.png", device="png", height = 6, width = 7) ggsave("./figures/alpha_diversity_Shannon_SampleType_H_paired_ttest.svg", device="svg", height = 6, width = 7) ``` ## Alpha diversity between aneurysm and hypophysis (All timepoints) ```{r, echo=FALSE, warning=FALSE} library(dplyr) library(ggpubr) library(knitr) library(kableExtra) hmp.div_qiime <- read.csv("adiv_even.txt", sep="\t") colnames(hmp.div_qiime) <- c("sam_name", "chao1", "observed_otus", "shannon", "PD_whole_tree") div.df <- merge(hmp.div_qiime, hmp.meta, by = "sam_name") # Extract only Shannon values and grouping variable div.df2 <- div.df[, c("sam_name", "PatientType", "shannon")] colnames(div.df2) <- c("Sample", "Group", "Shannon") ``` Two-group comparisons: Wilcoxon rank-sum test (Non-parametric) & Independent t-test (Parametric) ```{r, echo=FALSE, warning=FALSE} # 1. Non-parametric test (Wilcoxon rank-sum / Mann-Whitney U) stat.test.wilcox <- compare_means(Shannon ~ Group, data = div.df2, method = "wilcox.test") cat("--- Wilcoxon Rank-Sum Test (All timepoints) ---\n") knitr::kable(stat.test.wilcox, digits = 3) %>% kable_styling(bootstrap_options = c("striped", "hover", "condensed", "responsive")) # 2. Parametric test (Independent two-sample t-test) # Note: We use independent t-test because Aneurysm and Hypophysis are different patients. stat.test.ttest <- compare_means(Shannon ~ Group, data = div.df2, method = "t.test") cat("\n--- Independent t-test (All timepoints) ---\n") knitr::kable(stat.test.ttest, digits = 3) %>% kable_styling(bootstrap_options = c("striped", "hover", "condensed", "responsive")) ``` Plotting (Displaying Wilcoxon rank-sum test) ```{r, echo=FALSE, warning=FALSE} p <- ggboxplot(div.df2, x = "Group", y = "Shannon", fill = "Group", palette = c("aneurysm" = "#E69F00", "hypophysis" = "#56B4E9"), width = 0.5, legend = "none") + theme(axis.text.x = element_text(angle = 30, hjust = 1)) + labs(title = "Shannon Diversity: Aneurysm vs Hypophysis (All timepoints)") + stat_compare_means(comparisons = list(c("aneurysm", "hypophysis")), method = "wilcox.test", label = "p.signif", size = 5) print(p) ggsave("./figures/alpha_diversity_Shannon_PatientType.png", device="png", height = 6, width = 6) ggsave("./figures/alpha_diversity_Shannon_PatientType.svg", device="svg", height = 6, width = 6) ``` ## Aneurysm vs. Hypophysis AT SURGERY ONLY *New block: Specifically filters for the surgery timepoint to compare the two cohorts without dilution.* ```{r, echo=TRUE, warning=FALSE} hmp.div_qiime <- read.csv("adiv_even.txt", sep="\t") colnames(hmp.div_qiime) <- c("sam_name", "chao1", "observed_otus", "shannon", "PD_whole_tree") div.df <- merge(hmp.div_qiime, hmp.meta, by = "sam_name") # Filter for the surgery timepoint samples. This line ensures that ONLY the "surgery" timepoint is kept for the analysis. div.df_surgery <- div.df %>% filter(SampleType == "surgery") div.df2 <- div.df_surgery[, c("sam_name", "PatientType", "shannon")] colnames(div.df2) <- c("Sample", "Group", "Shannon") ``` Two-group comparisons: Wilcoxon rank-sum test & Independent t-test ```{r, echo=FALSE, warning=FALSE} # 1. Non-parametric test (Wilcoxon) stat.test.wilcox <- compare_means(Shannon ~ Group, data = div.df2, method = "wilcox.test") cat("--- Wilcoxon Rank-Sum Test (Surgery ONLY) ---\n") knitr::kable(stat.test.wilcox, digits = 3) %>% kable_styling(bootstrap_options = c("striped", "hover", "condensed", "responsive")) # 2. Parametric test (Independent two-sample t-test) stat.test.ttest <- compare_means(Shannon ~ Group, data = div.df2, method = "t.test") cat("\n--- Independent t-test (Surgery ONLY) ---\n") knitr::kable(stat.test.ttest, digits = 3) %>% kable_styling(bootstrap_options = c("striped", "hover", "condensed", "responsive")) ``` Plotting (Displaying Wilcoxon rank-sum test) ```{r, echo=FALSE, warning=FALSE} p <- ggboxplot(div.df2, x = "Group", y = "Shannon", fill = "Group", palette = c("aneurysm" = "#E69F00", "hypophysis" = "#56B4E9"), width = 0.5, legend = "none") + theme(axis.text.x = element_text(angle = 30, hjust = 1)) + labs(title = "Shannon Diversity: Aneurysm vs Hypophysis (Surgery timepoint ONLY)") + stat_compare_means(comparisons = list(c("aneurysm", "hypophysis")), method = "t.test", # Using independent t-test label = "p.signif", size = 5) print(p) ggsave("./figures/alpha_diversity_Shannon_Surgery_ONLY.png", device="png", height = 6, width = 6) ggsave("./figures/alpha_diversity_Shannon_Surgery_ONLY.svg", device="svg", height = 6, width = 6) ``` # Beta diversity (16S) Group differences in microbial community composition (beta-diversity) were assessed using Bray-Curtis dissimilarity matrices. Statistical significance was evaluated using permutational multivariate analysis of variance (PERMANOVA; `adonis2` function in the vegan R package v2.7.2) with 999 permutations. Pairwise comparisons were conducted between all group combinations, and *P*-values were adjusted for multiple testing using the Benjamini-Hochberg false discovery rate (FDR) procedure. Principal coordinates analysis (PCoA) was performed on the Bray-Curtis distance matrix to visualize sample clustering patterns. The first two principal coordinates explained 10.84% (PCo1) and 9.41% (PCo2) of the total variation in community composition. Notably, significant temporal shifts in community structure were observed between the surgery and discharge timepoints in both cohorts: **aneurysm patients (A2 vs. A3: R² = 0.068, *P*~adj~ = 0.007)** and **hypophysis patients (H2 vs. H3: R² = 0.055, *P*~adj~ = 0.002)**, indicating measurable changes in the nasal microbiome during the post-surgical recovery period. Additionally, significant differences were detected between the two patient cohorts at corresponding timepoints (**A1 vs. H1: *P*~adj~ = 0.007; A2 vs. H2: *P*~adj~ = 0.001; A3 vs. H3: *P*~adj~ = 0.049**), suggesting distinct baseline microbiome profiles between aneurysm and hypophysis surgery patients. ```{r, echo=FALSE, warning=FALSE, fig.cap="Beta diversity", out.width = '100%', fig.align= "center"} #file:///home/jhuang/DATA_B/Data_Luise_Epidome_longitudinal_nose/core_diversity_e9893/bdiv_even9893/bdiv_even9893_Group/unweighted_unifrac_boxplots/Group_Stats.txt #beta_diversity_group_stats<-read.csv("unweighted_unifrac_boxplots_Group_Stats.txt",sep="\t") #knitr::kable(beta_diversity_group_stats) %>% kable_styling(bootstrap_options = c("striped", "hover", "condensed", "responsive")) library(readxl) bray_pairwise_PERMANOVA <- read_excel("./figures_All_Combined/Bray_pairwise_PERMANOVA.xlsx") #library(openxlsx) #bray_pairwise_PERMANOVA <- read.xlsx("./figures_All_Combined/Bray_pairwise_PERMANOVA.xlsx") #bray_pairwise_PERMANOVA<-read.csv("./figures_All_Combined/Bray_pairwise_PERMANOVA.csv",sep=",") #knitr::kable(bray_pairwise_PERMANOVA) %>% kable_styling(bootstrap_options = c("striped", "hover", "condensed", "responsive")) # Get total number of rows n_rows <- nrow(bray_pairwise_PERMANOVA) # Identify row indices where adjusted p-value <= 0.05 (excluding NA rows) sig_rows <- which(!is.na(bray_pairwise_PERMANOVA$p_adj_BH) & bray_pairwise_PERMANOVA$p_adj_BH <= 0.05) # Render the table with conditional row coloring knitr::kable(bray_pairwise_PERMANOVA, escape = FALSE, digits=3) %>% kableExtra::kable_styling(bootstrap_options = c("striped", "hover", "condensed", "responsive"), full_width = FALSE) %>% # Apply yellow background to significant rows (overwrites grey where they overlap) kableExtra::row_spec(sig_rows, background = "yellow", bold = TRUE) knitr::include_graphics("./figures_All_Combined/PCoA.png") knitr::include_graphics("./figures_All_Combined/PCoA3.png") ``` # Differential abundance analysis (16S) Differential abundance analysis was performed to identify operational taxonomic units (OTUs) with significant abundance alterations across three clinical stages (admission, surgery, and discharge). Six independent pairwise comparisons were conducted within each patient cohort (aneurysm and hypophysis). Statistical testing was performed using the DESeq2 R package (v1.46.0) based on a negative binomial generalized linear model (Wald test). *P*-values were adjusted for multiple testing using the Benjamini-Hochberg false discovery rate (FDR) procedure, and OTUs with an adjusted *P*-value < 0.05 were considered statistically significant. ## admission vs. surgery in the aneurysm samples ```{r, echo=TRUE, warning=FALSE} # ------------------------------------------------------------ # 3) DESeq2: non-rarefied integer counts + optional taxon prefilter # ------------------------------------------------------------ ps_deseq <- ps_filt #NOTE: for PICRUSt2 analysis, we used table_even42369.biom as input. #NOTE: ps_deseq for beta-diversity in MicrobiotatProcess ps_deseq_sel2 <- data.table::copy(ps_deseq) otu_table(ps_deseq_sel2) <- otu_table(ps_deseq)[,c("A1.1","A2.1","A3.1","A4.1","A5.1","A10.1","A12.1","A17.1", "A20.1","A21.1","A22.1","A24.1","A25.1","A27.1","A28.1", "A1.2","A2.2","A3.2","A4.2","A5.2","A10.2","A12.2","A17.2", "A20.2","A21.2","A22.2","A24.2","A25.2","A27.2","A28.2")] diagdds = phyloseq_to_deseq2(ps_deseq_sel2, ~SampleType) diagdds$SampleType <- relevel(diagdds$SampleType, "admission") diagdds = DESeq(diagdds, test="Wald", fitType="parametric") resultsNames(diagdds) res = results(diagdds, cooksCutoff = FALSE) alpha = 0.05 sigtab = res[which(res$padj < alpha), ] # --- Check if there are any significant results before proceeding --- if (nrow(sigtab) == 0) { message(">>> No significant features found with padj < ", alpha, ".") # 1. Print a clear markdown hint in the HTML cat(sprintf("\n> ⚠️ **Note:** No significant differentially abundant features were found for this comparison (padj < %s).\n\n", alpha)) # 2. Generate a clean placeholder table in the HTML safely #empty_msg_df <- data.frame( # Status = "No significant records", # Details = sprintf("No taxa met the significance threshold (padj < %s) after multiple testing correction.", alpha) #) #knitr::kable(empty_msg_df, col.names = c("Status", "Details"), align = "c") %>% # kableExtra::kable_styling(bootstrap_options = c("striped", "hover", "condensed", "responsive"), full_width = FALSE) # 3. Save an empty Excel file with the correct headers (so the file structure remains consistent) fname <- "DEGs_admission_vs_surgery_in_aneurysm" openxlsx::write.xlsx(as.data.frame(sigtab), file = paste0(fname, ".xlsx"), rowNames = TRUE) } else { sigtab <- cbind( as(sigtab, "data.frame"), as(phyloseq::tax_table(ps_deseq_sel2)[rownames(sigtab), ], "matrix") ) # file base name fname <- "DEGs_admission_vs_surgery_in_aneurysm" openxlsx::write.xlsx(sigtab, file = paste0(fname, ".xlsx"), rowNames = TRUE) # 1. Safely render the table in HTML without triggering the 'viewer' error knitr::kable(sigtab) %>% kable_styling(bootstrap_options = c("striped", "hover", "condensed", "responsive")) # 2. Generate the plot theme_set(theme_bw()) # Order factors by max log2FoldChange for cleaner plotting x_class <- tapply(sigtab$log2FoldChange, sigtab$Class, function(x) max(x)) sigtab$Class <- factor(as.character(sigtab$Class), levels = names(sort(x_class))) x_family <- tapply(sigtab$log2FoldChange, sigtab$Family, function(x) max(x)) sigtab$Family <- factor(as.character(sigtab$Family), levels = names(sort(x_family))) # Build the plot p <- ggplot(sigtab, aes(x = log2FoldChange, y = Family, color = Class)) + geom_point(aes(size = padj)) + scale_size_continuous(name = "padj", range = c(8, 4)) + theme_bw() + theme(axis.text.x = element_text(angle = -25, hjust = 0, vjust = 0.5)) print(p) # SVG (svglite gives crisp text) if (!requireNamespace("svglite", quietly = TRUE)) install.packages("svglite") ggplot2::ggsave(paste0(fname, ".svg"), plot = p, device = svglite::svglite, width = 8, height = 6, units = "in", dpi = 300) # PNG ggplot2::ggsave(paste0(fname, ".png"), plot = p, device = "png", width = 8, height = 6, units = "in", dpi = 300) } ``` ## admission vs. discharge in the aneurysm samples ```{r, echo=TRUE, warning=FALSE} # ------------------------------------------------------------ # 3) DESeq2: non-rarefied integer counts + optional taxon prefilter # ------------------------------------------------------------ ps_deseq <- ps_filt ps_deseq_sel2 <- data.table::copy(ps_deseq) otu_table(ps_deseq_sel2) <- otu_table(ps_deseq)[,c("A1.1","A2.1","A3.1","A4.1","A5.1","A10.1","A12.1","A17.1", "A20.1","A21.1","A22.1","A24.1","A25.1","A27.1","A28.1", "A1.3","A2.3","A3.3","A4.3","A5.3","A10.3","A12.3","A17.3", "A20.3","A21.3","A22.3","A24.3","A25.3","A27.3","A28.3")] diagdds = phyloseq_to_deseq2(ps_deseq_sel2, ~SampleType) diagdds$SampleType <- relevel(diagdds$SampleType, "admission") diagdds = DESeq(diagdds, test="Wald", fitType="parametric") resultsNames(diagdds) res = results(diagdds, cooksCutoff = FALSE) alpha = 0.05 sigtab = res[which(res$padj < alpha), ] # --- Check if there are any significant results before proceeding --- if (nrow(sigtab) == 0) { message(">>> No significant features found with padj < ", alpha, ".") # 1. Print a clear markdown hint in the HTML cat(sprintf("\n> ⚠️ **Note:** No significant differentially abundant features were found for this comparison (padj < %s).\n\n", alpha)) # 3. Save an empty Excel file with the correct headers (so the file structure remains consistent) fname <- "DEGs_admission_vs_discharge_in_aneurysm" openxlsx::write.xlsx(as.data.frame(sigtab), file = paste0(fname, ".xlsx"), rowNames = TRUE) } else { sigtab = cbind(as(sigtab, "data.frame"), as(phyloseq::tax_table(ps_deseq_sel2)[rownames(sigtab), ], "matrix")) # file base name fname <- "DEGs_admission_vs_discharge_in_aneurysm" write.xlsx(sigtab, file = paste0(fname, ".xlsx"), rowNames = TRUE) knitr::kable(sigtab) %>% kable_styling(bootstrap_options = c("striped", "hover", "condensed", "responsive")) theme_set(theme_bw()) scale_fill_discrete <- function(palname = "Set1", ...) { scale_fill_brewer(palette = palname, ...) } x = tapply(sigtab$log2FoldChange, sigtab$Class, function(x) max(x)) x = sort(x) sigtab$Class = factor(as.character(sigtab$Class), levels=names(x)) x = tapply(sigtab$log2FoldChange, sigtab$Family, function(x) max(x)) x = sort(x) sigtab$Family = factor(as.character(sigtab$Family), levels=names(x)) # build the plot p <- ggplot(sigtab, aes(x = log2FoldChange, y = Family, color = Class)) + geom_point(aes(size = padj)) + scale_size_continuous(name = "padj", range = c(8, 4)) + theme_bw() + theme(axis.text.x = element_text(angle = -25, hjust = 0, vjust = 0.5)) print(p) # SVG (svglite gives crisp text) if (!requireNamespace("svglite", quietly = TRUE)) install.packages("svglite") ggplot2::ggsave(paste0(fname, ".svg"), plot = p, device = svglite::svglite, width = 8, height = 6, units = "in", dpi = 300) # PNG ggplot2::ggsave(paste0(fname, ".png"), plot = p, device = "png", width = 8, height = 6, units = "in", dpi = 300) } ``` ## surgery vs. discharge in the aneurysm samples ```{r, echo=TRUE, warning=FALSE} # ------------------------------------------------------------ # 3) DESeq2: non-rarefied integer counts + optional taxon prefilter # ------------------------------------------------------------ ps_deseq <- ps_filt ps_deseq_sel2 <- data.table::copy(ps_deseq) otu_table(ps_deseq_sel2) <- otu_table(ps_deseq)[,c("A1.2","A2.2","A3.2","A4.2","A5.2","A10.2","A12.2","A17.2", "A20.2","A21.2","A22.2","A24.2","A25.2","A27.2","A28.2", "A1.3","A2.3","A3.3","A4.3","A5.3","A10.3","A12.3","A17.3", "A20.3","A21.3","A22.3","A24.3","A25.3","A27.3","A28.3")] diagdds = phyloseq_to_deseq2(ps_deseq_sel2, ~SampleType) diagdds$SampleType <- relevel(diagdds$SampleType, "surgery") diagdds = DESeq(diagdds, test="Wald", fitType="parametric") resultsNames(diagdds) res = results(diagdds, cooksCutoff = FALSE) alpha = 0.05 sigtab = res[which(res$padj < alpha), ] sigtab = cbind(as(sigtab, "data.frame"), as(phyloseq::tax_table(ps_deseq_sel2)[rownames(sigtab), ], "matrix")) # file base name fname <- "DEGs_surgery_vs_discharge_in_aneurysm" write.xlsx(sigtab, file = paste0(fname, ".xlsx"), rowNames = TRUE) knitr::kable(sigtab) %>% kable_styling(bootstrap_options = c("striped", "hover", "condensed", "responsive")) theme_set(theme_bw()) scale_fill_discrete <- function(palname = "Set1", ...) { scale_fill_brewer(palette = palname, ...) } x = tapply(sigtab$log2FoldChange, sigtab$Class, function(x) max(x)) x = sort(x) sigtab$Class = factor(as.character(sigtab$Class), levels=names(x)) x = tapply(sigtab$log2FoldChange, sigtab$Family, function(x) max(x)) x = sort(x) sigtab$Family = factor(as.character(sigtab$Family), levels=names(x)) # build the plot p <- ggplot(sigtab, aes(x = log2FoldChange, y = Family, color = Class)) + geom_point(aes(size = padj)) + scale_size_continuous(name = "padj", range = c(8, 4)) + theme_bw() + theme(axis.text.x = element_text(angle = -25, hjust = 0, vjust = 0.5)) print(p) # SVG (svglite gives crisp text) if (!requireNamespace("svglite", quietly = TRUE)) install.packages("svglite") ggplot2::ggsave(paste0(fname, ".svg"), plot = p, device = svglite::svglite, width = 8, height = 6, units = "in", dpi = 300) # PNG ggplot2::ggsave(paste0(fname, ".png"), plot = p, device = "png", width = 8, height = 6, units = "in", dpi = 300) ``` ## admission vs. surgery in the hypophysis samples ```{r, echo=TRUE, warning=FALSE} # ------------------------------------------------------------ # 3) DESeq2: non-rarefied integer counts + optional taxon prefilter # ------------------------------------------------------------ ps_deseq <- ps_filt ps_deseq_sel2 <- data.table::copy(ps_deseq) otu_table(ps_deseq_sel2) <- otu_table(ps_deseq)[,c("H11.1","H13.1","H14.1","H16.1","H19.1","H24.1","H53.1","H56.1","H57.1","H59.1","H60.1","H64.1","H65.1","H67.1","H72.1","H73.1","H76.1","H78.1","H91.1","H94.1","H99.1", "H11.2","H13.2","H14.2","H16.2","H19.2","H24.2","H53.2","H56.2","H57.2","H59.2","H60.2","H64.2","H65.2","H67.2","H72.2","H73.2","H76.2","H78.2","H91.2","H94.2","H99.2")] diagdds = phyloseq_to_deseq2(ps_deseq_sel2, ~SampleType) diagdds$SampleType <- relevel(diagdds$SampleType, "admission") diagdds = DESeq(diagdds, test="Wald", fitType="parametric") resultsNames(diagdds) res = results(diagdds, cooksCutoff = FALSE) alpha = 0.05 sigtab = res[which(res$padj < alpha), ] sigtab = cbind(as(sigtab, "data.frame"), as(phyloseq::tax_table(ps_deseq_sel2)[rownames(sigtab), ], "matrix")) # file base name fname <- "DEGs_admission_vs_surgery_in_hypophysis" write.xlsx(sigtab, file = paste0(fname, ".xlsx"), rowNames = TRUE) knitr::kable(sigtab) %>% kable_styling(bootstrap_options = c("striped", "hover", "condensed", "responsive")) theme_set(theme_bw()) scale_fill_discrete <- function(palname = "Set1", ...) { scale_fill_brewer(palette = palname, ...) } x = tapply(sigtab$log2FoldChange, sigtab$Class, function(x) max(x)) x = sort(x) sigtab$Class = factor(as.character(sigtab$Class), levels=names(x)) x = tapply(sigtab$log2FoldChange, sigtab$Family, function(x) max(x)) x = sort(x) sigtab$Family = factor(as.character(sigtab$Family), levels=names(x)) # build the plot p <- ggplot(sigtab, aes(x = log2FoldChange, y = Family, color = Class)) + geom_point(aes(size = padj)) + scale_size_continuous(name = "padj", range = c(8, 4)) + theme_bw() + theme(axis.text.x = element_text(angle = -25, hjust = 0, vjust = 0.5)) print(p) # SVG (svglite gives crisp text) if (!requireNamespace("svglite", quietly = TRUE)) install.packages("svglite") ggplot2::ggsave(paste0(fname, ".svg"), plot = p, device = svglite::svglite, width = 8, height = 6, units = "in", dpi = 300) # PNG ggplot2::ggsave(paste0(fname, ".png"), plot = p, device = "png", width = 8, height = 6, units = "in", dpi = 300) ``` ## admission vs. discharge in the hypophysis samples ```{r, echo=TRUE, warning=FALSE} # ------------------------------------------------------------ # 3) DESeq2: non-rarefied integer counts + optional taxon prefilter # ------------------------------------------------------------ ps_deseq <- ps_filt ps_deseq_sel2 <- data.table::copy(ps_deseq) otu_table(ps_deseq_sel2) <- otu_table(ps_deseq)[,c("H11.1","H13.1","H14.1","H16.1","H19.1","H24.1","H53.1","H56.1","H57.1","H59.1","H60.1","H64.1","H65.1","H67.1","H72.1","H73.1","H76.1","H78.1","H91.1","H94.1","H99.1", "H11.3","H13.3","H14.3","H16.3","H19.3","H24.3","H53.3","H56.3","H57.3","H59.3","H60.3","H64.3","H65.3","H67.3","H72.3","H73.3","H76.3","H78.3","H91.3","H94.3","H99.3")] diagdds = phyloseq_to_deseq2(ps_deseq_sel2, ~SampleType) diagdds$SampleType <- relevel(diagdds$SampleType, "admission") diagdds = DESeq(diagdds, test="Wald", fitType="parametric") resultsNames(diagdds) res = results(diagdds, cooksCutoff = FALSE) alpha = 0.05 sigtab = res[which(res$padj < alpha), ] # --- Check if there are any significant results before proceeding --- if (nrow(sigtab) == 0) { message(">>> No significant features found with padj < ", alpha, ".") # 1. Print a clear markdown hint in the HTML cat(sprintf("\n> ⚠️ **Note:** No significant differentially abundant features were found for this comparison (padj < %s).\n\n", alpha)) # 3. Save an empty Excel file with the correct headers (so the file structure remains consistent) fname <- "DEGs_admission_vs_discharge_in_hypophysis" openxlsx::write.xlsx(as.data.frame(sigtab), file = paste0(fname, ".xlsx"), rowNames = TRUE) } else { sigtab = cbind(as(sigtab, "data.frame"), as(phyloseq::tax_table(ps_deseq_sel2)[rownames(sigtab), ], "matrix")) # file base name fname <- "DEGs_admission_vs_discharge_in_hypophysis" write.xlsx(sigtab, file = paste0(fname, ".xlsx"), rowNames = TRUE) knitr::kable(sigtab) %>% kable_styling(bootstrap_options = c("striped", "hover", "condensed", "responsive")) theme_set(theme_bw()) scale_fill_discrete <- function(palname = "Set1", ...) { scale_fill_brewer(palette = palname, ...) } x = tapply(sigtab$log2FoldChange, sigtab$Class, function(x) max(x)) x = sort(x) sigtab$Class = factor(as.character(sigtab$Class), levels=names(x)) x = tapply(sigtab$log2FoldChange, sigtab$Family, function(x) max(x)) x = sort(x) sigtab$Family = factor(as.character(sigtab$Family), levels=names(x)) # build the plot p <- ggplot(sigtab, aes(x = log2FoldChange, y = Family, color = Class)) + geom_point(aes(size = padj)) + scale_size_continuous(name = "padj", range = c(8, 4)) + theme_bw() + theme(axis.text.x = element_text(angle = -25, hjust = 0, vjust = 0.5)) print(p) # SVG (svglite gives crisp text) if (!requireNamespace("svglite", quietly = TRUE)) install.packages("svglite") ggplot2::ggsave(paste0(fname, ".svg"), plot = p, device = svglite::svglite, width = 8, height = 6, units = "in", dpi = 300) # PNG ggplot2::ggsave(paste0(fname, ".png"), plot = p, device = "png", width = 8, height = 6, units = "in", dpi = 300) } ``` ## surgery vs. discharge in the hypophysis samples ```{r, echo=TRUE, warning=FALSE} # ------------------------------------------------------------ # 3) DESeq2: non-rarefied integer counts + optional taxon prefilter # ------------------------------------------------------------ ps_deseq <- ps_filt ps_deseq_sel2 <- data.table::copy(ps_deseq) otu_table(ps_deseq_sel2) <- otu_table(ps_deseq)[,c("H11.2","H13.2","H14.2","H16.2","H19.2","H24.2","H53.2","H56.2","H57.2","H59.2","H60.2","H64.2","H65.2","H67.2","H72.2","H73.2","H76.2","H78.2","H91.2","H94.2","H99.2", "H11.3","H13.3","H14.3","H16.3","H19.3","H24.3","H53.3","H56.3","H57.3","H59.3","H60.3","H64.3","H65.3","H67.3","H72.3","H73.3","H76.3","H78.3","H91.3","H94.3","H99.3")] diagdds = phyloseq_to_deseq2(ps_deseq_sel2, ~SampleType) diagdds$SampleType <- relevel(diagdds$SampleType, "surgery") diagdds = DESeq(diagdds, test="Wald", fitType="parametric") resultsNames(diagdds) res = results(diagdds, cooksCutoff = FALSE) alpha = 0.05 sigtab = res[which(res$padj < alpha), ] sigtab = cbind(as(sigtab, "data.frame"), as(phyloseq::tax_table(ps_deseq_sel2)[rownames(sigtab), ], "matrix")) # file base name fname <- "DEGs_surgery_vs_discharge_in_hypophysis" write.xlsx(sigtab, file = paste0(fname, ".xlsx"), rowNames = TRUE) knitr::kable(sigtab) %>% kable_styling(bootstrap_options = c("striped", "hover", "condensed", "responsive")) theme_set(theme_bw()) scale_fill_discrete <- function(palname = "Set1", ...) { scale_fill_brewer(palette = palname, ...) } x = tapply(sigtab$log2FoldChange, sigtab$Class, function(x) max(x)) x = sort(x) sigtab$Class = factor(as.character(sigtab$Class), levels=names(x)) x = tapply(sigtab$log2FoldChange, sigtab$Family, function(x) max(x)) x = sort(x) sigtab$Family = factor(as.character(sigtab$Family), levels=names(x)) # build the plot p <- ggplot(sigtab, aes(x = log2FoldChange, y = Family, color = Class)) + geom_point(aes(size = padj)) + scale_size_continuous(name = "padj", range = c(8, 4)) + theme_bw() + theme(axis.text.x = element_text(angle = -25, hjust = 0, vjust = 0.5)) print(p) # SVG (svglite gives crisp text) if (!requireNamespace("svglite", quietly = TRUE)) install.packages("svglite") ggplot2::ggsave(paste0(fname, ".svg"), plot = p, device = svglite::svglite, width = 8, height = 6, units = "in", dpi = 300) # PNG ggplot2::ggsave(paste0(fname, ".png"), plot = p, device = "png", width = 8, height = 6, units = "in", dpi = 300) ``` # ST summary (g216 & yycH) ```{r, echo=TRUE, warning=FALSE} ##Change your working directory to where the files are located count_data <- read.table("../count_table_seq31_seq37_ST.txt", header = TRUE, row.names = 1, sep=",") OTU = otu_table(as.matrix(count_data), taxa_are_rows = TRUE) sample_data <- read.csv("../map3_corrected.txt", sep="\t", row.names = 1) SAM = sample_data(sample_data, errorIfNULL = TRUE) #rownames(SAM) <- c("P7.Nose", "P8.Nose", "P9.Nose", "P10.Nose", "P11.Nose", "P12.Nose", "P13.Nose", "P14.Nose", "P15.Nose", "P16.Foot", "P16.Nose", "P17.Foot", "P17.Nose", "P18.Foot", "P18.Nose", "P19.Foot", "P19.Nose", "P20.Foot", "P20.Nose", "AH.LH", "AH.NLH", "AH.Nose", "AL.LH", "AL.NLH", "AL.Nose", "CB.LH", "CB.NLH", "CB.Nose", "HR.LH", "HR.NLH", "HR.Nose", "KK.LH", "KK.NLH", "KK.Nose", "MC.LH", "MC.NLH", "MC.Nose", "MR.LH", "MR.NLH", "MR.Nose", "PT2.LH", "PT2.NLH", "PT2.Nose", "RP.LH", "RP.NLH", "RP.Nose", "SA.LH", "SA.NLH", "SA.Nose", "XN.LH", "XN.NLH", "XN.Nose") #tree_file <- read_tree("../clustering/rep_set.tre") ps <- phyloseq(OTU, SAM) #, tree_file) saveRDS(ps, "./ST_ps.rds") ``` Visualize data ```{r, echo=TRUE, warning=FALSE} sample_names(ps) sample_variables(ps) ``` Normalize number of reads in each sample using median sequencing depth. ```{r, echo=TRUE, warning=FALSE} #> sample_sums(ps) # A1.1 A1.2 A1.3 A2.1 A2.2 A2.3 A3.1 A3.2 A3.3 A4.1 A4.2 # 58236 32274 32575 76814 83305 81171 103250 52566 95302 100983 85068 # A4.3 A5.1 A5.2 A5.3 A10.1 A10.2 A10.3 A12.1 A12.2 A12.3 A17.1 # 68205 93481 95239 69527 23772 163696 97159 37027 53896 31686 90402 # A17.2 A17.3 A20.1 A20.2 A20.3 A21.1 A21.2 A21.3 A22.1 A22.2 A22.3 #106581 120234 55312 49696 61397 126445 108008 87120 125673 77986 51353 # A24.1 A24.2 A24.3 A25.1 A25.2 A25.3 A27.1 A27.2 A27.3 A28.1 A28.2 # 67387 49384 100837 48487 80262 60919 36941 80448 77111 31220 29387 # A28.3 H11.1 H11.2 H11.3 H13.1 H13.2 H13.3 H14.1 H14.2 H14.3 H16.1 # 43240 52289 44011 56319 44710 26599 42339 33493 60110 22282 421887 # H16.2 H16.3 H19.1 H19.2 H19.3 H24.1 H24.2 H24.3 H53.1 H53.2 H53.3 # 38799 79011 36510 57909 72987 34716 47999 39182 32926 54544 42584 # H56.1 H56.2 H56.3 H57.1 H57.2 H57.3 H59.1 H59.2 H59.3 # 38215 74656 55938 37925 43340 76032 33029 32992 57646 #total = median(sample_sums(ps)) #> total #[1] 56191 # NORMALIZE number of reads in each sample using median sequencing depth. In other words, now the sum of each sample is 56191 --> Similar methods with the cutoff. standf = function(x, t=total) round(t * (x / sum(x))) ps = transform_sample_counts(ps, standf) # Transform absolute abundance to relative abundance. ps_rel <- microbiome::transform(ps, "compositional") saveRDS(ps_rel, "./ps.rds") hmp.meta <- meta(ps) hmp.meta$sam_name <- rownames(hmp.meta) knitr::kable(otu_table(ps_rel)) %>% kable_styling(bootstrap_options = c("striped", "hover", "condensed", "responsive")) ``` # Bar plots for ST (g216 & yycH) ## Bar plots of A* samples ```{r, echo=TRUE, warning=FALSE} # Assuming you have a vector of sample IDs that you are interested in. samples_to_keep <- c("A1.1","A1.2","A1.3","A2.1","A2.2","A2.3","A3.1","A3.2","A3.3","A4.1","A4.2","A4.3","A5.1","A5.2","A5.3","A10.1","A10.2","A10.3","A12.1","A12.2","A12.3","A17.1","A17.2","A17.3","A20.1","A20.2","A20.3","A21.1","A21.2","A21.3","A22.1","A22.2","A22.3","A24.1","A24.2","A24.3","A25.1","A25.2","A25.3","A27.1","A27.2","A27.3","A28.1","A28.2","A28.3") # Subsetting the phyloseq object. ps_subset <- subset_samples(ps_rel, rownames(sample_data(ps_rel)) %in% samples_to_keep) # Convert to a data frame for manipulation with dplyr and ggplot ps_data <- psmelt(ps_subset) # Assuming "SampleID" is the column with your sample names ps_data$Sample <- factor(ps_data$Sample, levels = samples_to_keep) # Now plot using ggplot, with fill determined by "Class" ggplot(ps_data, aes(x = Sample, y = Abundance)) + geom_bar(aes(fill = OTU), stat = "identity") + scale_fill_manual(values = c("cornflowerblue","darkgreen","seagreen3","tan","red", "navyblue", "gold", "green","orange","pink","purple","magenta","brown", "darksalmon","chocolate4","darkkhaki", "lightcyan3", "maroon","lightgreen", "blue","cyan", "skyblue2", "azure3","blueviolet","darkgoldenrod", "tomato","mediumpurple4","indianred", "orange", "LimeGreen","DeepSkyBlue","Yellow","HotPink", "darkgrey")) + labs(fill = "") + theme(axis.text.x = element_text(size = 9, angle = 60, hjust = 1), axis.text.y = element_text(size = 9, colour="black"), legend.position="bottom", legend.text = element_text(size = 8)) + guides(fill=guide_legend(nrow=4)) ``` ## Bar plots of H* samples ```{r, echo=TRUE, warning=FALSE} # Assuming you have a vector of sample IDs that you are interested in. samples_to_keep <- c("H11.1","H11.2","H11.3","H13.1","H13.2","H13.3","H14.1","H14.2","H14.3","H16.1","H16.2","H16.3","H19.1","H19.2","H19.3","H24.1","H24.2","H24.3","H53.1","H53.2","H53.3","H56.1","H56.2","H56.3","H57.1","H57.2","H57.3","H59.1","H59.2","H59.3","H60.1","H60.2","H60.3","H64.1","H64.2","H64.3","H65.1","H65.2","H65.3","H67.1","H67.2","H67.3","H72.1","H72.2","H72.3","H73.1","H73.2","H73.3","H76.1","H76.2","H76.3","H78.1","H78.2","H78.3","H91.1","H91.2","H91.3","H94.1","H94.2","H94.3","H99.1","H99.2","H99.3") # Subsetting the phyloseq object. ps_subset <- subset_samples(ps_rel, rownames(sample_data(ps_rel)) %in% samples_to_keep) # Convert to a data frame for manipulation with dplyr and ggplot ps_data <- psmelt(ps_subset) # Assuming "SampleID" is the column with your sample names ps_data$Sample <- factor(ps_data$Sample, levels = samples_to_keep) # Now plot using ggplot, with fill determined by "Class" ggplot(ps_data, aes(x = Sample, y = Abundance)) + geom_bar(aes(fill = OTU), stat = "identity") + scale_fill_manual(values = c("cornflowerblue","darkgreen","seagreen3","tan","red", "navyblue", "gold", "green","orange","pink","purple","magenta","brown", "darksalmon","chocolate4","darkkhaki", "lightcyan3", "maroon","lightgreen", "blue","cyan", "skyblue2", "azure3","blueviolet","darkgoldenrod", "tomato","mediumpurple4","indianred", "orange", "LimeGreen","DeepSkyBlue","Yellow","HotPink", "darkgrey")) + labs(fill = "") + theme(axis.text.x = element_text(size = 9, angle = 75, hjust = 1), axis.text.y = element_text(size = 9, colour="black"), legend.position="bottom", legend.text = element_text(size = 8)) + guides(fill=guide_legend(nrow=4)) ``` ## Comparing the aneurysm and hypophyis groups ```{r, echo=TRUE, warning=FALSE} #Regroup together A* vs H* samples and normalize number of reads in each group using median sequencing depth. samples_to_keep <- c("A1.1","A1.2","A1.3","A2.1","A2.2","A2.3","A3.1","A3.2","A3.3","A4.1","A4.2","A4.3","A5.1","A5.2","A5.3","A10.1","A10.2","A10.3","A12.1","A12.2","A12.3","A17.1","A17.2","A17.3","A20.1","A20.2","A20.3","A21.1","A21.2","A21.3","A22.1","A22.2","A22.3","A24.1","A24.2","A24.3","A25.1","A25.2","A25.3","A27.1","A27.2","A27.3","A28.1","A28.2","A28.3","H11.1","H11.2","H11.3","H13.1","H13.2","H13.3","H14.1","H14.2","H14.3","H16.1","H16.2","H16.3","H19.1","H19.2","H19.3","H24.1","H24.2","H24.3","H53.1","H53.2","H53.3","H56.1","H56.2","H56.3","H57.1","H57.2","H57.3","H59.1","H59.2","H59.3","H60.1","H60.2","H60.3","H64.1","H64.2","H64.3","H65.1","H65.2","H65.3","H67.1","H67.2","H67.3","H72.1","H72.2","H72.3","H73.1","H73.2","H73.3","H76.1","H76.2","H76.3","H78.1","H78.2","H78.3","H91.1","H91.2","H91.3","H94.1","H94.2","H94.3","H99.1","H99.2","H99.3") ps_subset <- subset_samples(ps_rel, rownames(sample_data(ps_rel)) %in% samples_to_keep) ps_subset_type <- merge_samples(ps_subset, "PatientType") ps_subset_type_ = transform_sample_counts(ps_subset_type, function(x) x / sum(x)) plot_bar(ps_subset_type_, fill="OTU") + geom_bar(aes(), stat="identity", position="stack") + scale_fill_manual(values = c("cornflowerblue","darkgreen","seagreen3","tan","red", "navyblue", "gold", "green","orange","pink","purple","magenta","brown", "darksalmon","chocolate4","darkkhaki", "lightcyan3", "maroon","lightgreen", "blue","cyan", "skyblue2", "azure3","blueviolet","darkgoldenrod", "tomato","mediumpurple4","indianred", "orange", "LimeGreen","DeepSkyBlue","Yellow","HotPink", "darkgrey")) + labs(fill = "") + theme(axis.text.x = element_text(size = 10, angle = 15, hjust = 1), axis.text.y = element_text(size = 9, colour="black"), legend.position="bottom", legend.text = element_text(size = 8)) + guides(fill=guide_legend(nrow=4)) ``` ## Comparing the admission, surgery, and discharge groups ```{r, echo=TRUE, warning=FALSE} ps_subset_type <- merge_samples(ps_subset, "SampleType") ps_subset_type_ = transform_sample_counts(ps_subset_type, function(x) x / sum(x)) plot_bar(ps_subset_type_, fill="OTU") + geom_bar(aes(), stat="identity", position="stack") + scale_fill_manual(values = c("cornflowerblue","darkgreen","seagreen3","tan","red", "navyblue", "gold", "green","orange","pink","purple","magenta","brown", "darksalmon","chocolate4","darkkhaki", "lightcyan3", "maroon","lightgreen", "blue","cyan", "skyblue2", "azure3","blueviolet","darkgoldenrod", "tomato","mediumpurple4","indianred", "orange", "LimeGreen","DeepSkyBlue","Yellow","HotPink", "darkgrey")) + labs(fill = "") + theme(axis.text.x = element_text(size = 10, angle = 15, hjust = 1), axis.text.y = element_text(size = 9, colour="black"), legend.position="bottom", legend.text = element_text(size = 8)) + guides(fill=guide_legend(nrow=4)) + scale_x_discrete(limits = c("admission", "surgery", "discharge")) ``` ## Comparing the admission, surgery, and discharge groups in A* samples ```{r, echo=TRUE, warning=FALSE} samples_to_keep <- c("A1.1","A1.2","A1.3","A2.1","A2.2","A2.3","A3.1","A3.2","A3.3","A4.1","A4.2","A4.3","A5.1","A5.2","A5.3","A10.1","A10.2","A10.3","A12.1","A12.2","A12.3","A17.1","A17.2","A17.3","A20.1","A20.2","A20.3","A21.1","A21.2","A21.3","A22.1","A22.2","A22.3","A24.1","A24.2","A24.3","A25.1","A25.2","A25.3","A27.1","A27.2","A27.3","A28.1","A28.2","A28.3") ps_subset <- subset_samples(ps_rel, rownames(sample_data(ps_rel)) %in% samples_to_keep) ps_subset_type <- merge_samples(ps_subset, "SampleType") ps_subset_type_ = transform_sample_counts(ps_subset_type, function(x) x / sum(x)) plot_bar(ps_subset_type_, fill="OTU") + geom_bar(aes(), stat="identity", position="stack") + scale_fill_manual(values = c("cornflowerblue","darkgreen","seagreen3","tan","red", "navyblue", "gold", "green","orange","pink","purple","magenta","brown", "darksalmon","chocolate4","darkkhaki", "lightcyan3", "maroon","lightgreen", "blue","cyan", "skyblue2", "azure3","blueviolet","darkgoldenrod", "tomato","mediumpurple4","indianred", "orange", "LimeGreen","DeepSkyBlue","Yellow","HotPink", "darkgrey")) + labs(fill = "") + theme(axis.text.x = element_text(size = 10, angle = 15, hjust = 1), axis.text.y = element_text(size = 9, colour="black"), legend.position="bottom", legend.text = element_text(size = 8)) + guides(fill=guide_legend(nrow=4)) + scale_x_discrete(limits = c("admission", "surgery", "discharge")) ``` ## Comparing the admission, surgery, and discharge groups in H* samples ```{r, echo=TRUE, warning=FALSE} samples_to_keep <- c("H11.1","H11.2","H11.3","H13.1","H13.2","H13.3","H14.1","H14.2","H14.3","H16.1","H16.2","H16.3","H19.1","H19.2","H19.3","H24.1","H24.2","H24.3","H53.1","H53.2","H53.3","H56.1","H56.2","H56.3","H57.1","H57.2","H57.3","H59.1","H59.2","H59.3","H60.1","H60.2","H60.3","H64.1","H64.2","H64.3","H65.1","H65.2","H65.3","H67.1","H67.2","H67.3","H72.1","H72.2","H72.3","H73.1","H73.2","H73.3","H76.1","H76.2","H76.3","H78.1","H78.2","H78.3","H91.1","H91.2","H91.3","H94.1","H94.2","H94.3","H99.1","H99.2","H99.3") ps_subset <- subset_samples(ps_rel, rownames(sample_data(ps_rel)) %in% samples_to_keep) ps_subset_type <- merge_samples(ps_subset, "SampleType") ps_subset_type_ = transform_sample_counts(ps_subset_type, function(x) x / sum(x)) plot_bar(ps_subset_type_, fill="OTU") + geom_bar(aes(), stat="identity", position="stack") + scale_fill_manual(values = c("cornflowerblue","darkgreen","seagreen3","tan","red", "navyblue", "gold", "green","orange","pink","purple","magenta","brown", "darksalmon","chocolate4","darkkhaki", "lightcyan3", "maroon","lightgreen", "blue","cyan", "skyblue2", "azure3","blueviolet","darkgoldenrod", "tomato","mediumpurple4","indianred", "orange", "LimeGreen","DeepSkyBlue","Yellow","HotPink", "darkgrey")) + labs(fill = "") + theme(axis.text.x = element_text(size = 10, angle = 15, hjust = 1), axis.text.y = element_text(size = 9, colour="black"), legend.position="bottom", legend.text = element_text(size = 8)) + guides(fill=guide_legend(nrow=4)) + scale_x_discrete(limits = c("admission", "surgery", "discharge")) ``` # Binary Prevalence Analysis of Target *S. epidermidis* STs (g216 & yycH) To evaluate the temporal dynamics of specific healthcare-associated *Staphylococcus epidermidis* clones (ST2, ST5, and ST23), we analyzed their colonization status as binary presence/absence data across three clinical timepoints (admission, surgery, discharge). This approach isolates the dynamics of strain acquisition and clearance from continuous abundance fluctuations. Overall temporal changes were assessed using Cochran’s Q test, followed by robust pairwise McNemar tests with Benjamini-Hochberg (BH) correction to identify statistically significant transitions between specific timepoints. ```{r, echo=TRUE, warning=FALSE} # Data Preparation: Extract target STs, convert to binary (0/1) matrix, and save to Excel # Assuming 'df' is the ST abundance data frame from your previous code (rownames = STs, colnames = samples like "A1.1") target_sts <- c("ST2", "ST5", "ST23") # Check if these STs exist in your data to prevent errors available_sts <- intersect(target_sts, rownames(df)) if (length(available_sts) == 0) { stop("Error: Target STs not found in the data. Please check the row names.") } else if (length(available_sts) < length(target_sts)) { message(sprintf("Note: The following STs were not found and will be skipped: %s", paste(setdiff(target_sts, available_sts), collapse = ", "))) } # Convert abundance to binary: >0 becomes 1 (Present), ==0 becomes 0 (Absent) df_binary <- df[available_sts, ] > 0 df_binary <- df_binary * 1 # Convert TRUE/FALSE to 1/0 # >>> Export the binary matrix to an Excel file <<< # 1. Convert the matrix to a data.frame, then move rownames to a column df_binary_export <- as.data.frame(df_binary) %>% tibble::rownames_to_column(var = "ST") knitr::kable(df_binary_export) %>% kable_styling(bootstrap_options = c("striped", "hover", "condensed", "responsive")) # 2. Save to Excel if (!dir.exists("./figures")) dir.create("./figures") output_excel_path <- "./figures/ST_binary_matrix.xlsx" openxlsx::write.xlsx(df_binary_export, file = output_excel_path, rowNames = FALSE) message(sprintf("✅ Successfully saved binary ST matrix to: %s", output_excel_path)) # Data Reshaping: Extract Patient ID and Timepoint to build long-format data # Fixed: Convert the matrix to a data.frame first before using tibble functions df_long <- as.data.frame(df_binary) %>% tibble::rownames_to_column(var = "ST") %>% tidyr::pivot_longer(cols = -ST, names_to = "Sample", values_to = "Presence") %>% mutate( # Extract Patient ID (e.g., "A1") from sample name (e.g., "A1.1") PatientID = sub("\\.[1-3]$", "", Sample), # Define timepoints based on the sample suffix Timepoint = case_when( grepl("\\.1$", Sample) ~ "admission", grepl("\\.2$", Sample) ~ "surgery", grepl("\\.3$", Sample) ~ "discharge" ) ) %>% # Set Timepoint as an ordered factor for correct plotting/statistical order mutate(Timepoint = factor(Timepoint, levels = c("admission", "surgery", "discharge"))) # Statistical Analysis: Cochran's Q test + Robust Paired McNemar test results_list <- list() # Helper function for robust pairwise McNemar test (avoids silent rstatix failures) robust_pairwise_mcnemar <- function(st_data, st_name) { # Pivot to wide format to easily compare timepoints per patient data_wide <- st_data %>% dplyr::select(PatientID, Timepoint, Presence) %>% tidyr::pivot_wider(names_from = Timepoint, values_from = Presence, values_fill = 0) # Check if all timepoints exist for this ST if (!all(c("admission", "surgery", "discharge") %in% colnames(data_wide))) { message(sprintf("⚠️ Warning: Missing timepoints for %s. Skipping pairwise tests.", st_name)) return(data.frame(ST = st_name, `Timepoint 1` = NA, `Timepoint 2` = NA, `Raw p-value` = NA, `Adjusted p-value (BH)` = NA, Significance = NA, check.names = FALSE)) } # Helper to calculate McNemar p-value safely get_mcnemar_p <- function(x, y) { # Create a strict 2x2 contingency table tbl <- table(factor(x, levels = c(0, 1)), factor(y, levels = c(0, 1))) # If there are no discordant pairs (off-diagonals are 0), there is no change. Return p = 1.0 if (tbl[1, 2] == 0 && tbl[2, 1] == 0) { return(1.0) } # Try standard McNemar test with continuity correction p_val <- tryCatch({ mcnemar.test(tbl, correct = TRUE)$p.value }, error = function(e) { # Fallback: if counts are too low for Chi-square, return NA NA }) return(p_val) } # Calculate p-values for the 3 pairwise comparisons p1 <- get_mcnemar_p(data_wide$admission, data_wide$surgery) p2 <- get_mcnemar_p(data_wide$surgery, data_wide$discharge) p3 <- get_mcnemar_p(data_wide$admission, data_wide$discharge) p_vals <- c(p1, p2, p3) # Adjust for multiple testing (Benjamini-Hochberg FDR) p_adj <- p.adjust(p_vals, method = "BH") # Determine significance stars get_sig <- function(p) { if (is.na(p)) return("NA") if (p < 0.001) return("***") if (p < 0.01) return("**") if (p < 0.05) return("*") return("ns") } data.frame( ST = st_name, `Timepoint 1` = c("admission", "surgery", "admission"), `Timepoint 2` = c("surgery", "discharge", "discharge"), `Raw p-value` = p_vals, `Adjusted p-value (BH)` = p_adj, Significance = sapply(p_adj, get_sig), check.names = FALSE, stringsAsFactors = FALSE ) } # Run tests for each available ST for (st in available_sts) { st_data <- df_long %>% filter(ST == st) # 3.1 Overall test: Cochran's Q test cochran_res <- tryCatch({ RVAideMemoire::cochran.qtest(Presence ~ Timepoint | PatientID, data = st_data) }, error = function(e) { message(sprintf("⚠️ Cochran's Q failed for %s: %s", st, e$message)) return(data.frame(p.value = NA, note = "Incomplete data or zero variance")) }) # 3.2 Pairwise comparisons using the robust function pairwise_res <- robust_pairwise_mcnemar(st_data, st) results_list[[st]] <- list( Cochran_Q = cochran_res, Pairwise_McNemar = pairwise_res ) } # Combine pairwise comparison results for all STs final_pairwise_results <- bind_rows(lapply(results_list, function(x) x$Pairwise_McNemar)) ``` Pairwise McNemar Tests with Benjamini-Hochberg (BH) Correction ```{r, echo=FALSE, warning=FALSE, message=FALSE} knitr::kable(final_pairwise_results, digits = 4, col.names = c("ST", "Timepoint 1", "Timepoint 2", "Raw p-value", "Adjusted p-value (BH)", "Significance")) %>% kableExtra::kable_styling(bootstrap_options = c("striped", "hover", "condensed", "responsive")) ``` Overall Temporal Changes (Cochran's Q Test) ```{r, echo=FALSE, warning=FALSE} cochran_results_df <- do.call(rbind, lapply(available_sts, function(st) { res <- results_list[[st]]$Cochran_Q if (!is.na(res$p.value)) { data.frame( ST = st, `Q statistic` = round(res$statistic, 3), df = res$parameter, `p-value` = round(res$p.value, 4), Note = "", check.names = FALSE, stringsAsFactors = FALSE ) } else { data.frame( ST = st, `Q statistic` = NA, df = NA, `p-value` = NA, Note = "Insufficient variation or missing data", check.names = FALSE, stringsAsFactors = FALSE ) } })) # Render the Cochran's Q results as a formatted table knitr::kable(cochran_results_df, digits = 4, col.names = c("ST", "Q statistic", "df", "p-value", "Note")) %>% kableExtra::kable_styling(bootstrap_options = c("striped", "hover", "condensed", "responsive")) ``` Data Visualization: Plot prevalence (%) trends across timepoints ```{r, echo=TRUE, warning=FALSE} prevalence_data <- df_long %>% group_by(ST, Timepoint) %>% summarise( Prevalence = mean(Presence) * 100, # Calculate prevalence percentage n_present = sum(Presence), n_total = n(), .groups = 'drop' ) # Create line chart + scatter points p_prevalence <- ggplot(prevalence_data, aes(x = Timepoint, y = Prevalence, group = ST, color = ST)) + geom_line(linewidth = 1, alpha = 0.7) + geom_point(size = 4) + geom_text(aes(label = sprintf("%.1f%%\n(n=%d/%d)", Prevalence, n_present, n_total)), vjust = -1.2, size = 3.5) + scale_y_continuous(limits = c(0, 105), breaks = seq(0, 100, 20)) + labs( title = "Prevalence of Healthcare-Associated STs Across Clinical Timepoints", subtitle = "Based on Presence/Absence (Binary) Data", x = "Clinical Timepoint", y = "Prevalence (%)", color = "Sequence Type" ) + theme_minimal(base_size = 12) + theme( axis.text.x = element_text(angle = 30, hjust = 1), legend.position = "bottom" ) print(p_prevalence) # Save the plot ggsave("./figures/ST_prevalence_timepoints.png", plot = p_prevalence, width = 8, height = 6, dpi = 300) ``` # Alpha diversity for ST (g216 & yycH) Plot Shannon index and observed STs. ## Alpha diversity between aneurysm and hypophysis ```{r, echo=TRUE, warning=FALSE} hmp.div_st <- read.csv("alpha_diversity_metrics_samples_ST.csv", sep=",") colnames(hmp.div_st) <- c("sample", "chao1", "observed_sts", "shannon") row.names(hmp.div_st) <- hmp.div_st$sample div.df <- merge(hmp.div_st, hmp.meta, by.x="sample", by.y = "sam_name") div.df2 <- div.df[, c("PatientType", "shannon", "observed_sts")] colnames(div.df2) <- c("PatientType", "Shannon", "ST") #colnames(div.df2) options(max.print=999999) stat.test.Shannon <- compare_means( Shannon ~ PatientType, data = div.df2, method = "t.test" ) knitr::kable(stat.test.Shannon) %>% kable_styling(bootstrap_options = c("striped", "hover", "condensed", "responsive")) stat.test.ST <- compare_means( ST ~ PatientType, data = div.df2, method = "t.test" ) knitr::kable(stat.test.ST) %>% kable_styling(bootstrap_options = c("striped", "hover", "condensed", "responsive")) div_df_melt <- reshape2::melt(div.df2) p <- ggboxplot(div_df_melt, x = "PatientType", y = "value", facet.by = "variable", scales = "free", width = 0.5, fill = "gray", legend= "right") #ggpar(p, xlab = FALSE, ylab = FALSE) lev <- levels(factor(div_df_melt$PatientType)) # get the variables L.pairs <- combn(seq_along(lev), 2, simplify = FALSE, FUN = function(i) lev[i]) #%>% filter(p.signif != "ns") my_stat_compare_means <- function (mapping = NULL, data = NULL, method = NULL, paired = FALSE, method.args = list(), ref.group = NULL, comparisons = NULL, hide.ns = FALSE, label.sep = ", ", label = NULL, label.x.npc = "left", label.y.npc = "top", label.x = NULL, label.y = NULL, tip.length = 0.03, symnum.args = list(), geom = "text", position = "identity", na.rm = FALSE, show.legend = NA, inherit.aes = TRUE, ...) { if (!is.null(comparisons)) { method.info <- ggpubr:::.method_info(method) method <- method.info$method method.args <- ggpubr:::.add_item(method.args, paired = paired) if (method == "wilcox.test") method.args$exact <- FALSE pms <- list(...) size <- ifelse(is.null(pms$size), 0.3, pms$size) color <- ifelse(is.null(pms$color), "black", pms$color) map_signif_level <- FALSE if (is.null(label)) label <- "p.format" if (ggpubr:::.is_p.signif_in_mapping(mapping) | (label %in% "p.signif")) { if (ggpubr:::.is_empty(symnum.args)) { map_signif_level <- c(`****` = 1e-04, `***` = 0.001, `**` = 0.01, `*` = 0.05, ns = 1) } else { map_signif_level <- symnum.args } if (hide.ns) names(map_signif_level)[5] <- " " } step_increase <- ifelse(is.null(label.y), 0.12, 0) ggsignif::geom_signif(comparisons = comparisons, y_position = label.y, test = method, test.args = method.args, step_increase = step_increase, size = size, color = color, map_signif_level = map_signif_level, tip_length = tip.length, data = data) } else { mapping <- ggpubr:::.update_mapping(mapping, label) layer(stat = StatCompareMeans, data = data, mapping = mapping, geom = geom, position = position, show.legend = show.legend, inherit.aes = inherit.aes, params = list(label.x.npc = label.x.npc, label.y.npc = label.y.npc, label.x = label.x, label.y = label.y, label.sep = label.sep, method = method, method.args = method.args, paired = paired, ref.group = ref.group, symnum.args = symnum.args, hide.ns = hide.ns, na.rm = na.rm, ...)) } } p3 <- p + stat_compare_means( method="t.test", comparisons = list(c("aneurysm", "hypophysis")), label = "p.signif", symnum.args <- list(cutpoints = c(0, 0.0001, 0.001, 0.01, 0.05, 1), symbols = c("****", "***", "**", "*", "ns")), ) + theme(axis.text.x = element_text(angle = 30, hjust = 1)) # add this line to rotate x-axis text #print(p3) ggsave("./figures/alpha_diversity_ST_PatientType.png", device="png", height = 5, width = 9) ggsave("./figures/alpha_diversity_ST_PatientType.svg", device="svg", height = 4, width = 8) knitr::include_graphics("figures/alpha_diversity_ST_PatientType.png") ``` ## Alpha diversity among admission, surgery, and discharge ```{r, echo=TRUE, warning=FALSE} div.df2 <- div.df[, c("SampleType", "shannon", "observed_sts")] colnames(div.df2) <- c("SampleType", "Shannon", "ST") #colnames(div.df2) options(max.print=999999) stat.test.Shannon <- compare_means( Shannon ~ SampleType, data = div.df2, method = "t.test" ) knitr::kable(stat.test.Shannon) %>% kable_styling(bootstrap_options = c("striped", "hover", "condensed", "responsive")) stat.test.ST <- compare_means( ST ~ SampleType, data = div.df2, method = "t.test" ) knitr::kable(stat.test.ST) %>% kable_styling(bootstrap_options = c("striped", "hover", "condensed", "responsive")) div_df_melt <- reshape2::melt(div.df2) p <- ggboxplot(div_df_melt, x = "SampleType", y = "value", facet.by = "variable", scales = "free", width = 0.5, fill = "gray", legend= "right") #ggpar(p, xlab = FALSE, ylab = FALSE) lev <- levels(factor(div_df_melt$SampleType)) # get the variables L.pairs <- combn(seq_along(lev), 2, simplify = FALSE, FUN = function(i) lev[i]) #%>% filter(p.signif != "ns") my_stat_compare_means <- function (mapping = NULL, data = NULL, method = NULL, paired = FALSE, method.args = list(), ref.group = NULL, comparisons = NULL, hide.ns = FALSE, label.sep = ", ", label = NULL, label.x.npc = "left", label.y.npc = "top", label.x = NULL, label.y = NULL, tip.length = 0.03, symnum.args = list(), geom = "text", position = "identity", na.rm = FALSE, show.legend = NA, inherit.aes = TRUE, ...) { if (!is.null(comparisons)) { method.info <- ggpubr:::.method_info(method) method <- method.info$method method.args <- ggpubr:::.add_item(method.args, paired = paired) if (method == "wilcox.test") method.args$exact <- FALSE pms <- list(...) size <- ifelse(is.null(pms$size), 0.3, pms$size) color <- ifelse(is.null(pms$color), "black", pms$color) map_signif_level <- FALSE if (is.null(label)) label <- "p.format" if (ggpubr:::.is_p.signif_in_mapping(mapping) | (label %in% "p.signif")) { if (ggpubr:::.is_empty(symnum.args)) { map_signif_level <- c(`****` = 1e-04, `***` = 0.001, `**` = 0.01, `*` = 0.05, ns = 1) } else { map_signif_level <- symnum.args } if (hide.ns) names(map_signif_level)[5] <- " " } step_increase <- ifelse(is.null(label.y), 0.12, 0) ggsignif::geom_signif(comparisons = comparisons, y_position = label.y, test = method, test.args = method.args, step_increase = step_increase, size = size, color = color, map_signif_level = map_signif_level, tip_length = tip.length, data = data) } else { mapping <- ggpubr:::.update_mapping(mapping, label) layer(stat = StatCompareMeans, data = data, mapping = mapping, geom = geom, position = position, show.legend = show.legend, inherit.aes = inherit.aes, params = list(label.x.npc = label.x.npc, label.y.npc = label.y.npc, label.x = label.x, label.y = label.y, label.sep = label.sep, method = method, method.args = method.args, paired = paired, ref.group = ref.group, symnum.args = symnum.args, hide.ns = hide.ns, na.rm = na.rm, ...)) } } p3 <- p + stat_compare_means( method="t.test", comparisons = list(c("admission", "surgery"), c("surgery", "discharge"), c("admission", "discharge")), label = "p.signif", symnum.args <- list(cutpoints = c(0, 0.0001, 0.001, 0.01, 0.05, 1), symbols = c("****", "***", "**", "*", "ns")), ) + theme(axis.text.x = element_text(angle = 30, hjust = 1)) # add this line to rotate x-axis text #print(p3) ggsave("./figures/alpha_diversity_ST_SampleType.png", device="png", height = 5, width = 9) ggsave("./figures/alpha_diversity_ST_SampleType.svg", device="svg", height = 4, width = 8) knitr::include_graphics("figures/alpha_diversity_ST_SampleType.png") ``` ## Alpha diversity among admission, surgery, and discharge in A* samples ```{r, echo=TRUE, warning=FALSE} hmp.div_st <- read.csv("alpha_diversity_metrics_samples_ST_A.csv", sep=",") colnames(hmp.div_st) <- c("sample", "chao1", "observed_sts", "shannon") row.names(hmp.div_st) <- hmp.div_st$sample div.df <- merge(hmp.div_st, hmp.meta, by.x="sample", by.y = "sam_name") div.df2 <- div.df[, c("SampleType", "shannon", "observed_sts")] colnames(div.df2) <- c("SampleType", "Shannon", "ST") #colnames(div.df2) options(max.print=999999) stat.test.Shannon <- compare_means( Shannon ~ SampleType, data = div.df2, method = "t.test" ) knitr::kable(stat.test.Shannon) %>% kable_styling(bootstrap_options = c("striped", "hover", "condensed", "responsive")) stat.test.ST <- compare_means( ST ~ SampleType, data = div.df2, method = "t.test" ) knitr::kable(stat.test.ST) %>% kable_styling(bootstrap_options = c("striped", "hover", "condensed", "responsive")) div_df_melt <- reshape2::melt(div.df2) p <- ggboxplot(div_df_melt, x = "SampleType", y = "value", facet.by = "variable", scales = "free", width = 0.5, fill = "gray", legend= "right") #ggpar(p, xlab = FALSE, ylab = FALSE) lev <- levels(factor(div_df_melt$SampleType)) # get the variables L.pairs <- combn(seq_along(lev), 2, simplify = FALSE, FUN = function(i) lev[i]) #%>% filter(p.signif != "ns") my_stat_compare_means <- function (mapping = NULL, data = NULL, method = NULL, paired = FALSE, method.args = list(), ref.group = NULL, comparisons = NULL, hide.ns = FALSE, label.sep = ", ", label = NULL, label.x.npc = "left", label.y.npc = "top", label.x = NULL, label.y = NULL, tip.length = 0.03, symnum.args = list(), geom = "text", position = "identity", na.rm = FALSE, show.legend = NA, inherit.aes = TRUE, ...) { if (!is.null(comparisons)) { method.info <- ggpubr:::.method_info(method) method <- method.info$method method.args <- ggpubr:::.add_item(method.args, paired = paired) if (method == "wilcox.test") method.args$exact <- FALSE pms <- list(...) size <- ifelse(is.null(pms$size), 0.3, pms$size) color <- ifelse(is.null(pms$color), "black", pms$color) map_signif_level <- FALSE if (is.null(label)) label <- "p.format" if (ggpubr:::.is_p.signif_in_mapping(mapping) | (label %in% "p.signif")) { if (ggpubr:::.is_empty(symnum.args)) { map_signif_level <- c(`****` = 1e-04, `***` = 0.001, `**` = 0.01, `*` = 0.05, ns = 1) } else { map_signif_level <- symnum.args } if (hide.ns) names(map_signif_level)[5] <- " " } step_increase <- ifelse(is.null(label.y), 0.12, 0) ggsignif::geom_signif(comparisons = comparisons, y_position = label.y, test = method, test.args = method.args, step_increase = step_increase, size = size, color = color, map_signif_level = map_signif_level, tip_length = tip.length, data = data) } else { mapping <- ggpubr:::.update_mapping(mapping, label) layer(stat = StatCompareMeans, data = data, mapping = mapping, geom = geom, position = position, show.legend = show.legend, inherit.aes = inherit.aes, params = list(label.x.npc = label.x.npc, label.y.npc = label.y.npc, label.x = label.x, label.y = label.y, label.sep = label.sep, method = method, method.args = method.args, paired = paired, ref.group = ref.group, symnum.args = symnum.args, hide.ns = hide.ns, na.rm = na.rm, ...)) } } p3 <- p + stat_compare_means( method="t.test", comparisons = list(c("admission", "surgery"), c("surgery", "discharge"), c("admission", "discharge")), label = "p.signif", symnum.args <- list(cutpoints = c(0, 0.0001, 0.001, 0.01, 0.05, 1), symbols = c("****", "***", "**", "*", "ns")), ) + theme(axis.text.x = element_text(angle = 30, hjust = 1)) # add this line to rotate x-axis text #print(p3) ggsave("./figures/alpha_diversity_ST_SampleType_A.png", device="png", height = 5, width = 9) ggsave("./figures/alpha_diversity_ST_SampleType_A.svg", device="svg", height = 4, width = 8) knitr::include_graphics("figures/alpha_diversity_ST_SampleType_A.png") ``` ## Alpha diversity among admission, surgery, and discharge in H* samples ```{r, echo=TRUE, warning=FALSE} hmp.div_st <- read.csv("alpha_diversity_metrics_samples_ST_H.csv", sep=",") colnames(hmp.div_st) <- c("sample", "chao1", "observed_sts", "shannon") row.names(hmp.div_st) <- hmp.div_st$sample div.df <- merge(hmp.div_st, hmp.meta, by.x="sample", by.y = "sam_name") div.df2 <- div.df[, c("SampleType", "shannon", "observed_sts")] colnames(div.df2) <- c("SampleType", "Shannon", "ST") #colnames(div.df2) options(max.print=999999) stat.test.Shannon <- compare_means( Shannon ~ SampleType, data = div.df2, method = "t.test" ) knitr::kable(stat.test.Shannon) %>% kable_styling(bootstrap_options = c("striped", "hover", "condensed", "responsive")) stat.test.ST <- compare_means( ST ~ SampleType, data = div.df2, method = "t.test" ) knitr::kable(stat.test.ST) %>% kable_styling(bootstrap_options = c("striped", "hover", "condensed", "responsive")) div_df_melt <- reshape2::melt(div.df2) p <- ggboxplot(div_df_melt, x = "SampleType", y = "value", facet.by = "variable", scales = "free", width = 0.5, fill = "gray", legend= "right") #ggpar(p, xlab = FALSE, ylab = FALSE) lev <- levels(factor(div_df_melt$SampleType)) # get the variables L.pairs <- combn(seq_along(lev), 2, simplify = FALSE, FUN = function(i) lev[i]) #%>% filter(p.signif != "ns") my_stat_compare_means <- function (mapping = NULL, data = NULL, method = NULL, paired = FALSE, method.args = list(), ref.group = NULL, comparisons = NULL, hide.ns = FALSE, label.sep = ", ", label = NULL, label.x.npc = "left", label.y.npc = "top", label.x = NULL, label.y = NULL, tip.length = 0.03, symnum.args = list(), geom = "text", position = "identity", na.rm = FALSE, show.legend = NA, inherit.aes = TRUE, ...) { if (!is.null(comparisons)) { method.info <- ggpubr:::.method_info(method) method <- method.info$method method.args <- ggpubr:::.add_item(method.args, paired = paired) if (method == "wilcox.test") method.args$exact <- FALSE pms <- list(...) size <- ifelse(is.null(pms$size), 0.3, pms$size) color <- ifelse(is.null(pms$color), "black", pms$color) map_signif_level <- FALSE if (is.null(label)) label <- "p.format" if (ggpubr:::.is_p.signif_in_mapping(mapping) | (label %in% "p.signif")) { if (ggpubr:::.is_empty(symnum.args)) { map_signif_level <- c(`****` = 1e-04, `***` = 0.001, `**` = 0.01, `*` = 0.05, ns = 1) } else { map_signif_level <- symnum.args } if (hide.ns) names(map_signif_level)[5] <- " " } step_increase <- ifelse(is.null(label.y), 0.12, 0) ggsignif::geom_signif(comparisons = comparisons, y_position = label.y, test = method, test.args = method.args, step_increase = step_increase, size = size, color = color, map_signif_level = map_signif_level, tip_length = tip.length, data = data) } else { mapping <- ggpubr:::.update_mapping(mapping, label) layer(stat = StatCompareMeans, data = data, mapping = mapping, geom = geom, position = position, show.legend = show.legend, inherit.aes = inherit.aes, params = list(label.x.npc = label.x.npc, label.y.npc = label.y.npc, label.x = label.x, label.y = label.y, label.sep = label.sep, method = method, method.args = method.args, paired = paired, ref.group = ref.group, symnum.args = symnum.args, hide.ns = hide.ns, na.rm = na.rm, ...)) } } p3 <- p + stat_compare_means( method="t.test", comparisons = list(c("admission", "surgery"), c("surgery", "discharge"), c("admission", "discharge")), label = "p.signif", symnum.args <- list(cutpoints = c(0, 0.0001, 0.001, 0.01, 0.05, 1), symbols = c("****", "***", "**", "*", "ns")), ) + theme(axis.text.x = element_text(angle = 30, hjust = 1)) # add this line to rotate x-axis text #print(p3) ggsave("./figures/alpha_diversity_ST_SampleType_H.png", device="png", height = 5, width = 9) ggsave("./figures/alpha_diversity_ST_SampleType_H.svg", device="svg", height = 4, width = 8) knitr::include_graphics("figures/alpha_diversity_ST_SampleType_H.png") ``` # Statistical tests for the abundance of a specific subtype between admission and discharge (g216 & yycH) ```{r, echo=FALSE, warning=FALSE} otu_data <- otu_table(ps_rel) # Convert OTU table to a data frame for easier manipulation # This conversion keeps taxa as rows and samples as columns df <- as.data.frame(otu_data) # Assuming ST8 is one of your taxa, extract its data ST8_data <- df["ST8", ] # Define sample groups based on naming convention (admission (.1) and discharge (.3)) admission_samples <- names(ST8_data)[grepl("\\.1$", names(ST8_data))] discharge_samples <- names(ST8_data)[grepl("\\.3$", names(ST8_data))] # Extract relative abundances for ST8 at admission and discharge before_treatment <- ST8_data[admission_samples] after_treatment <- ST8_data[discharge_samples] # Ensure the treatment data is numeric before_treatment <- as.numeric(before_treatment) after_treatment <- as.numeric(after_treatment) #> colnames(ST8_admission) # [1] "A1.1" "A2.1" "A3.1" "A4.1" "A5.1" "A10.1" "A12.1" "A17.1" "A20.1" #[10] "A21.1" "A22.1" "A24.1" "A25.1" "A27.1" "A28.1" "H11.1" "H13.1" "H14.1" #[19] "H16.1" "H19.1" "H24.1" "H53.1" "H56.1" "H57.1" "H59.1" "H60.1" "H64.1" #[28] "H65.1" "H67.1" "H72.1" "H73.1" "H76.1" "H78.1" "H91.1" "H94.1" "H99.1" #> colnames(ST8_discharge) # [1] "A1.3" "A2.3" "A3.3" "A4.3" "A5.3" "A10.3" "A12.3" "A17.3" "A20.3" #[10] "A21.3" "A22.3" "A24.3" "A25.3" "A27.3" "A28.3" "H11.3" "H13.3" "H14.3" #[19] "H16.3" "H19.3" "H24.3" "H53.3" "H56.3" "H57.3" "H59.3" "H60.3" "H64.3" #[28] "H65.3" "H67.3" "H72.3" "H73.3" "H76.3" "H78.3" "H91.3" "H94.3" "H99.3" # # Performing the Wilcoxon signed-rank test # result <- wilcoxsign_test(before_treatment ~ after_treatment, distribution = "exact") # # Printing the result # print(result) # # Exact Wilcoxon-Pratt Signed-Rank Test # #data: y by x (pos, neg) # # stratified by block # #Z = 1.6933, p-value = 0.09375 # #alternative hypothesis: true mu is not equal to 0 # # # Alternatively, using the stats package # result_alt <- wilcox.test(before_treatment, after_treatment, paired = TRUE) # # Printing the alternative result # print(result_alt) #--1-- Accepting the Approximate p-value (the default behavior in R): #If you are okay with the normal approximation that R uses when ties are present, you can simply run: result1 <- wilcox.test(before_treatment, after_treatment, paired = TRUE) #Despite the warning, this will still give you an approximate p-value. print(result1) # Wilcoxon signed rank exact test #data: before_treatment and after_treatment #V = 358, p-value = 0.7037 #alternative hypothesis: true location shift is not equal to 0 #--2-- Using the Coin Package for Exact Calculations: #Install and use the coin package which can handle ties more gracefully: # Install if not already installed: install.packages("coin") library(coin) # Using the wilcoxsign_test function from the coin package result2 <- wilcoxsign_test(before_treatment ~ after_treatment, distribution = "exact") print(result2) #This method calculates an exact p-value even in the presence of ties. # Exact Wilcoxon-Pratt Signed-Rank Test # #data: y by x (pos, neg) # stratified by block #Z = 0.39276, p-value = 0.7037 #alternative hypothesis: true mu is not equal to 0 #--3-- Adding a Small Random Noise to Data (Not recommended for final analysis): #If you want to break the ties by adding a small amount of noise, you can do so, but be aware that this alters your data: set.seed(123) # Setting a seed for reproducibility before_treatment_jittered <- before_treatment + runif(length(before_treatment), -0.01, 0.01) after_treatment_jittered <- after_treatment + runif(length(after_treatment), -0.01, 0.01) wilcox.test(before_treatment_jittered, after_treatment_jittered, paired = TRUE) #This method should be used with caution as it modifies the original data. # Wilcoxon signed rank exact test #data: before_treatment_jittered and after_treatment_jittered #V = 377, p-value = 0.4989 #alternative hypothesis: true location shift is not equal to 0 #--4-- Using another statistical test, such as the Sign Test: #If you want to avoid the issue of ties altogether, you can use a different test, like the sign test, which is less sensitive to ties: # Assuming 'BSDA' package is installed; if not, install via install.packages("BSDA") #install.packages("devtools", dependencies = TRUE) #devtools::has_devel() #devtools::install_github('alanarnholt/BSDA') library(BSDA) SIGN.test(before_treatment, after_treatment, paired = TRUE) #The sign test is a simpler non-parametric test that might be less powerful than the Wilcoxon test but does not require dealing with ties. # Dependent-samples Sign-Test #data: before_treatment and after_treatment #S = 20, p-value = 0.6177 #alternative hypothesis: true median difference is not equal to 0 #95 percent confidence interval: # -0.003397394 0.011402066 #sample estimates: #median of x-y # 0.002972116 #Achieved and Interpolated Confidence Intervals: # Conf.Level L.E.pt U.E.pt #Lower Achieved CI 0.9348 -0.0025 0.0102 #Interpolated CI 0.9500 -0.0034 0.0114 #Upper Achieved CI 0.9712 -0.0047 0.0130 ``` ## ST8 ```{r, echo=FALSE, warning=FALSE} # Extract data ST8_data <- df["ST8", ] # Define sample groups based on naming convention (admission (.1) and discharge (.3)) admission_samples <- names(ST8_data)[grepl("\\.1$", names(ST8_data))] discharge_samples <- names(ST8_data)[grepl("\\.3$", names(ST8_data))] # Extract relative abundances for ST8 at admission and discharge before_treatment <- ST8_data[admission_samples] after_treatment <- ST8_data[discharge_samples] # Ensure the treatment data is numeric before_treatment <- as.numeric(before_treatment) after_treatment <- as.numeric(after_treatment) result1 <- wilcox.test(before_treatment, after_treatment, paired = TRUE) #print(result1) # Creating a new tibble for the Wilcoxon test result #p.adj = result1$p.value, # Adjusted p-value, same as raw p-value in this context #p.format = sprintf("%.3f", result1$p.value), # Formatted p-value result1_formatted <- tibble( .y. = "ST8 Relative Abundance", # Change this to the appropriate response variable name group1 = "admission", group2 = "discharge", p = result1$p.value, p.signif = symnum(result1$p.value, corr = FALSE, na = FALSE, cutpoints = c(0, 0.001, 0.01, 0.05, 0.1, 1), symbols = c("***", "**", "*", ".", "ns")), method = "Wilcoxon signed-rank test" ) # Print the formatted result #print(result1_formatted) knitr::kable(result1_formatted) %>% kable_styling(bootstrap_options = c("striped", "hover", "condensed", "responsive")) ``` ## ST59 ```{r, echo=FALSE, warning=FALSE} # Extract data ST59_data <- df["ST59", ] # Define sample groups based on naming convention (admission (.1) and discharge (.3)) admission_samples <- names(ST59_data)[grepl("\\.1$", names(ST59_data))] discharge_samples <- names(ST59_data)[grepl("\\.3$", names(ST59_data))] # Extract relative abundances for ST59 at admission and discharge before_treatment <- ST59_data[admission_samples] after_treatment <- ST59_data[discharge_samples] # Ensure the treatment data is numeric before_treatment <- as.numeric(before_treatment) after_treatment <- as.numeric(after_treatment) result1 <- wilcox.test(before_treatment, after_treatment, paired = TRUE) # Creating a new tibble for the Wilcoxon test result result1_formatted <- tibble( .y. = "ST59 Relative Abundance", # Change this to the appropriate response variable name group1 = "admission", group2 = "discharge", p = result1$p.value, p.signif = symnum(result1$p.value, corr = FALSE, na = FALSE, cutpoints = c(0, 0.001, 0.01, 0.05, 0.1, 1), symbols = c("***", "**", "*", ".", "ns")), method = "Wilcoxon signed-rank test" ) # Print the formatted result knitr::kable(result1_formatted) %>% kable_styling(bootstrap_options = c("striped", "hover", "condensed", "responsive")) ``` ## ST60 ```{r, echo=FALSE, warning=FALSE} # Extract data ST60_data <- df["ST60", ] # Define sample groups based on naming convention (admission (.1) and discharge (.3)) admission_samples <- names(ST60_data)[grepl("\\.1$", names(ST60_data))] discharge_samples <- names(ST60_data)[grepl("\\.3$", names(ST60_data))] # Extract relative abundances for ST60 at admission and discharge before_treatment <- ST60_data[admission_samples] after_treatment <- ST60_data[discharge_samples] # Ensure the treatment data is numeric before_treatment <- as.numeric(before_treatment) after_treatment <- as.numeric(after_treatment) result1 <- wilcox.test(before_treatment, after_treatment, paired = TRUE) # Creating a new tibble for the Wilcoxon test result result1_formatted <- tibble( .y. = "ST60 Relative Abundance", # Change this to the appropriate response variable name group1 = "admission", group2 = "discharge", p = result1$p.value, p.signif = symnum(result1$p.value, corr = FALSE, na = FALSE, cutpoints = c(0, 0.001, 0.01, 0.05, 0.1, 1), symbols = c("***", "**", "*", ".", "ns")), method = "Wilcoxon signed-rank test" ) # Print the formatted result knitr::kable(result1_formatted) %>% kable_styling(bootstrap_options = c("striped", "hover", "condensed", "responsive")) ``` ## ST73 ```{r, echo=FALSE, warning=FALSE} # Extract data ST73_data <- df["ST73", ] # Define sample groups based on naming convention (admission (.1) and discharge (.3)) admission_samples <- names(ST73_data)[grepl("\\.1$", names(ST73_data))] discharge_samples <- names(ST73_data)[grepl("\\.3$", names(ST73_data))] # Extract relative abundances for ST73 at admission and discharge before_treatment <- ST73_data[admission_samples] after_treatment <- ST73_data[discharge_samples] # Ensure the treatment data is numeric before_treatment <- as.numeric(before_treatment) after_treatment <- as.numeric(after_treatment) result1 <- wilcox.test(before_treatment, after_treatment, paired = TRUE) # Creating a new tibble for the Wilcoxon test result result1_formatted <- tibble( .y. = "ST73 Relative Abundance", # Change this to the appropriate response variable name group1 = "admission", group2 = "discharge", p = result1$p.value, p.signif = symnum(result1$p.value, corr = FALSE, na = FALSE, cutpoints = c(0, 0.001, 0.01, 0.05, 0.1, 1), symbols = c("***", "**", "*", ".", "ns")), method = "Wilcoxon signed-rank test" ) # Print the formatted result knitr::kable(result1_formatted) %>% kable_styling(bootstrap_options = c("striped", "hover", "condensed", "responsive")) ``` ## ST215 ```{r, echo=FALSE, warning=FALSE} # Extract data ST215_data <- df["ST215", ] # Define sample groups based on naming convention (admission (.1) and discharge (.3)) admission_samples <- names(ST215_data)[grepl("\\.1$", names(ST215_data))] discharge_samples <- names(ST215_data)[grepl("\\.3$", names(ST215_data))] # Extract relative abundances for ST215 at admission and discharge before_treatment <- ST215_data[admission_samples] after_treatment <- ST215_data[discharge_samples] # Ensure the treatment data is numeric before_treatment <- as.numeric(before_treatment) after_treatment <- as.numeric(after_treatment) result1 <- wilcox.test(before_treatment, after_treatment, paired = TRUE) # Creating a new tibble for the Wilcoxon test result result1_formatted <- tibble( .y. = "ST215 Relative Abundance", # Change this to the appropriate response variable name group1 = "admission", group2 = "discharge", p = result1$p.value, p.signif = symnum(result1$p.value, corr = FALSE, na = FALSE, cutpoints = c(0, 0.001, 0.01, 0.05, 0.1, 1), symbols = c("***", "**", "*", ".", "ns")), method = "Wilcoxon signed-rank test" ) # Print the formatted result knitr::kable(result1_formatted) %>% kable_styling(bootstrap_options = c("striped", "hover", "condensed", "responsive")) ``` ## ST5+ST87+ST130+ST210+ST384 ```{r, echo=FALSE, warning=FALSE} # Extract data for the specified subtypes selected_taxa <- df[c("ST5", "ST87", "ST130", "ST210", "ST384"), ] # Calculate the combined relative abundances for the selected taxa combined_data <- colSums(selected_taxa, na.rm = TRUE) # Define sample groups based on naming convention (admission (.1) and discharge (.3)) admission_samples <- names(combined_data)[grepl("\\.1$", names(combined_data))] discharge_samples <- names(combined_data)[grepl("\\.3$", names(combined_data))] # Extract combined relative abundances for the selected taxa at admission and discharge before_treatment <- combined_data[admission_samples] after_treatment <- combined_data[discharge_samples] # Ensure the treatment data is numeric before_treatment <- as.numeric(before_treatment) after_treatment <- as.numeric(after_treatment) # Perform the Wilcoxon signed-rank test result1 <- wilcox.test(before_treatment, after_treatment, paired = TRUE) # Creating a new tibble for the Wilcoxon test result result1_formatted <- tibble( .y. = "Combined ST5+ST87+ST130+ST210+ST384 Relative Abundance", group1 = "admission", group2 = "discharge", p = result1$p.value, p.signif = symnum(result1$p.value, corr = FALSE, na = FALSE, cutpoints = c(0, 0.001, 0.01, 0.05, 0.1, 1), symbols = c("***", "**", "*", ".", "ns")), method = "Wilcoxon signed-rank test" ) # Print the formatted result using knitr and kableExtra for a nice table format in R Markdown knitr::kable(result1_formatted) %>% kableExtra::kable_styling(bootstrap_options = c("striped", "hover", "condensed", "responsive")) ``` ## ST5 ```{r, echo=FALSE, warning=FALSE} # Extract data ST5_data <- df["ST5", ] # Define sample groups based on naming convention (admission (.1) and discharge (.3)) admission_samples <- names(ST5_data)[grepl("\\.1$", names(ST5_data))] discharge_samples <- names(ST5_data)[grepl("\\.3$", names(ST5_data))] # Extract relative abundances for ST5 at admission and discharge before_treatment <- ST5_data[admission_samples] after_treatment <- ST5_data[discharge_samples] # Ensure the treatment data is numeric before_treatment <- as.numeric(before_treatment) after_treatment <- as.numeric(after_treatment) result1 <- wilcox.test(before_treatment, after_treatment, paired = TRUE) # Creating a new tibble for the Wilcoxon test result result1_formatted <- tibble( .y. = "ST5 Relative Abundance", # Change this to the appropriate response variable name group1 = "admission", group2 = "discharge", p = result1$p.value, p.signif = symnum(result1$p.value, corr = FALSE, na = FALSE, cutpoints = c(0, 0.001, 0.01, 0.05, 0.1, 1), symbols = c("***", "**", "*", ".", "ns")), method = "Wilcoxon signed-rank test" ) # Print the formatted result knitr::kable(result1_formatted) %>% kable_styling(bootstrap_options = c("striped", "hover", "condensed", "responsive")) ``` ## ST130 ```{r, echo=FALSE, warning=FALSE} # Extract data ST130_data <- df["ST130", ] # Define sample groups based on naming convention (admission (.1) and discharge (.3)) admission_samples <- names(ST130_data)[grepl("\\.1$", names(ST130_data))] discharge_samples <- names(ST130_data)[grepl("\\.3$", names(ST130_data))] # Extract relative abundances for ST130 at admission and discharge before_treatment <- ST130_data[admission_samples] after_treatment <- ST130_data[discharge_samples] # Ensure the treatment data is numeric before_treatment <- as.numeric(before_treatment) after_treatment <- as.numeric(after_treatment) result1 <- wilcox.test(before_treatment, after_treatment, paired = TRUE) # Creating a new tibble for the Wilcoxon test result result1_formatted <- tibble( .y. = "ST130 Relative Abundance", # Change this to the appropriate response variable name group1 = "admission", group2 = "discharge", p = result1$p.value, p.signif = symnum(result1$p.value, corr = FALSE, na = FALSE, cutpoints = c(0, 0.001, 0.01, 0.05, 0.1, 1), symbols = c("***", "**", "*", ".", "ns")), method = "Wilcoxon signed-rank test" ) # Print the formatted result knitr::kable(result1_formatted) %>% kable_styling(bootstrap_options = c("striped", "hover", "condensed", "responsive")) ``` ## ST2+ST19+ST100 ```{r, echo=TRUE, warning=FALSE} # Extract data for the specified subtypes selected_taxa <- df[c("ST2", "ST19", "ST100"), ] # Calculate the combined relative abundances for the selected taxa combined_data <- colSums(selected_taxa, na.rm = TRUE) # Define sample groups based on naming convention (admission (.1) and discharge (.3)) admission_samples <- names(combined_data)[grepl("\\.1$", names(combined_data))] discharge_samples <- names(combined_data)[grepl("\\.3$", names(combined_data))] # Extract combined relative abundances for the selected taxa at admission and discharge before_treatment <- combined_data[admission_samples] after_treatment <- combined_data[discharge_samples] # Ensure the treatment data is numeric before_treatment <- as.numeric(before_treatment) after_treatment <- as.numeric(after_treatment) print(before_treatment) print(mean(before_treatment)) print(after_treatment) print(mean(after_treatment)) # Perform the Wilcoxon signed-rank test result1 <- wilcox.test(before_treatment, after_treatment, paired = TRUE) # Creating a new tibble for the Wilcoxon test result result1_formatted <- tibble( .y. = "Combined ST2+ST19+ST100 Relative Abundance", group1 = "admission", group2 = "discharge", p = result1$p.value, p.signif = symnum(result1$p.value, corr = FALSE, na = FALSE, cutpoints = c(0, 0.001, 0.01, 0.05, 0.1, 1), symbols = c("***", "**", "*", ".", "ns")), method = "Wilcoxon signed-rank test" ) # Print the formatted result using knitr and kableExtra for a nice table format in R Markdown knitr::kable(result1_formatted) %>% kableExtra::kable_styling(bootstrap_options = c("striped", "hover", "condensed", "responsive")) ``` ## ST2 ```{r, echo=TRUE, warning=FALSE} # Extract data ST2_data <- df["ST2", ] # Define sample groups based on naming convention (admission (.1) and discharge (.3)) admission_samples <- names(ST2_data)[grepl("\\.1$", names(ST2_data))] discharge_samples <- names(ST2_data)[grepl("\\.3$", names(ST2_data))] # Extract relative abundances for ST2 at admission and discharge before_treatment <- ST2_data[admission_samples] after_treatment <- ST2_data[discharge_samples] # Ensure the treatment data is numeric before_treatment <- as.numeric(before_treatment) after_treatment <- as.numeric(after_treatment) print(before_treatment) print(mean(before_treatment)) print(after_treatment) print(mean(after_treatment)) result1 <- wilcox.test(before_treatment, after_treatment, paired = TRUE) # Creating a new tibble for the Wilcoxon test result result1_formatted <- tibble( .y. = "ST2 Relative Abundance", # Change this to the appropriate response variable name group1 = "admission", group2 = "discharge", p = result1$p.value, p.signif = symnum(result1$p.value, corr = FALSE, na = FALSE, cutpoints = c(0, 0.001, 0.01, 0.05, 0.1, 1), symbols = c("***", "**", "*", ".", "ns")), method = "Wilcoxon signed-rank test" ) # Print the formatted result knitr::kable(result1_formatted) %>% kable_styling(bootstrap_options = c("striped", "hover", "condensed", "responsive")) ``` ## ST19 ```{r, echo=TRUE, warning=FALSE} # Extract data ST19_data <- df["ST19", ] # Define sample groups based on naming convention (admission (.1) and discharge (.3)) admission_samples <- names(ST19_data)[grepl("\\.1$", names(ST19_data))] discharge_samples <- names(ST19_data)[grepl("\\.3$", names(ST19_data))] # Extract relative abundances for ST19 at admission and discharge before_treatment <- ST19_data[admission_samples] after_treatment <- ST19_data[discharge_samples] # Ensure the treatment data is numeric before_treatment <- as.numeric(before_treatment) after_treatment <- as.numeric(after_treatment) print(before_treatment) print(mean(before_treatment)) print(after_treatment) print(mean(after_treatment)) result1 <- wilcox.test(before_treatment, after_treatment, paired = TRUE) # Creating a new tibble for the Wilcoxon test result result1_formatted <- tibble( .y. = "ST19 Relative Abundance", # Change this to the appropriate response variable name group1 = "admission", group2 = "discharge", p = result1$p.value, p.signif = symnum(result1$p.value, corr = FALSE, na = FALSE, cutpoints = c(0, 0.001, 0.01, 0.05, 0.1, 1), symbols = c("***", "**", "*", ".", "ns")), method = "Wilcoxon signed-rank test" ) # Print the formatted result knitr::kable(result1_formatted) %>% kable_styling(bootstrap_options = c("striped", "hover", "condensed", "responsive")) ``` ## ST100 ```{r, echo=TRUE, warning=FALSE} # Extract data ST100_data <- df["ST100", ] # Define sample groups based on naming convention (admission (.1) and discharge (.3)) admission_samples <- names(ST100_data)[grepl("\\.1$", names(ST100_data))] discharge_samples <- names(ST100_data)[grepl("\\.3$", names(ST100_data))] # Extract relative abundances for ST100 at admission and discharge before_treatment <- ST100_data[admission_samples] after_treatment <- ST100_data[discharge_samples] # Ensure the treatment data is numeric before_treatment <- as.numeric(before_treatment) after_treatment <- as.numeric(after_treatment) print(before_treatment) print(mean(before_treatment)) print(after_treatment) print(mean(after_treatment)) result1 <- wilcox.test(before_treatment, after_treatment, paired = TRUE) # Creating a new tibble for the Wilcoxon test result result1_formatted <- tibble( .y. = "ST100 Relative Abundance", # Change this to the appropriate response variable name group1 = "admission", group2 = "discharge", p = result1$p.value, p.signif = symnum(result1$p.value, corr = FALSE, na = FALSE, cutpoints = c(0, 0.001, 0.01, 0.05, 0.1, 1), symbols = c("***", "**", "*", ".", "ns")), method = "Wilcoxon signed-rank test" ) # Print the formatted result knitr::kable(result1_formatted) %>% kable_styling(bootstrap_options = c("striped", "hover", "condensed", "responsive")) ``` ## ST5+ST87+ST130+ST210+ST384+ST2+ST19+ST100 ```{r, echo=FALSE, warning=FALSE} # Extract data for the specified subtypes selected_taxa <- df[c("ST5", "ST87", "ST130", "ST210", "ST384", "ST2", "ST19", "ST100"), ] # Calculate the combined relative abundances for the selected taxa combined_data <- colSums(selected_taxa, na.rm = TRUE) # Define sample groups based on naming convention (admission (.1) and discharge (.3)) admission_samples <- names(combined_data)[grepl("\\.1$", names(combined_data))] discharge_samples <- names(combined_data)[grepl("\\.3$", names(combined_data))] # Extract combined relative abundances for the selected taxa at admission and discharge before_treatment <- combined_data[admission_samples] after_treatment <- combined_data[discharge_samples] # Ensure the treatment data is numeric before_treatment <- as.numeric(before_treatment) after_treatment <- as.numeric(after_treatment) # Perform the Wilcoxon signed-rank test result1 <- wilcox.test(before_treatment, after_treatment, paired = TRUE) # Creating a new tibble for the Wilcoxon test result result1_formatted <- tibble( .y. = "Combined ST5+ST87+ST130+ST210+ST384+ST2+ST19+ST100 Relative Abundance", group1 = "admission", group2 = "discharge", p.format = sprintf("%.3f", result1$p.value), p.signif = symnum(result1$p.value, corr = FALSE, na = FALSE, cutpoints = c(0, 0.001, 0.01, 0.05, 0.1, 1), symbols = c("***", "**", "*", ".", "ns")), method = "Wilcoxon signed-rank test" ) # Print the formatted result using knitr and kableExtra for a nice table format in R Markdown knitr::kable(result1_formatted) %>% kableExtra::kable_styling(bootstrap_options = c("striped", "hover", "condensed", "responsive")) ``` # Statistical tests for the abundance of a specific subtype between admission and discharge in A* samples (g216 & yycH) ```{r, echo=FALSE, warning=FALSE} otu_data <- otu_table(ps_rel) df_ <- as.data.frame(otu_data) df <- df_[, grep("^A", names(df_))] ``` ## ST8 ```{r, echo=FALSE, warning=FALSE} # Extract data ST8_data <- df["ST8", ] # Define sample groups based on naming convention (admission (.1) and discharge (.3)) admission_samples <- names(ST8_data)[grepl("\\.1$", names(ST8_data))] discharge_samples <- names(ST8_data)[grepl("\\.3$", names(ST8_data))] # Extract relative abundances for ST8 at admission and discharge before_treatment <- ST8_data[admission_samples] after_treatment <- ST8_data[discharge_samples] # Ensure the treatment data is numeric before_treatment <- as.numeric(before_treatment) after_treatment <- as.numeric(after_treatment) result1 <- wilcox.test(before_treatment, after_treatment, paired = TRUE) #print(result1) # Creating a new tibble for the Wilcoxon test result #p.adj = result1$p.value, # Adjusted p-value, same as raw p-value in this context #p.format = sprintf("%.3f", result1$p.value), # Formatted p-value result1_formatted <- tibble( .y. = "ST8 Relative Abundance", # Change this to the appropriate response variable name group1 = "admission", group2 = "discharge", p = result1$p.value, p.signif = symnum(result1$p.value, corr = FALSE, na = FALSE, cutpoints = c(0, 0.001, 0.01, 0.05, 0.1, 1), symbols = c("***", "**", "*", ".", "ns")), method = "Wilcoxon signed-rank test" ) # Print the formatted result #print(result1_formatted) knitr::kable(result1_formatted) %>% kable_styling(bootstrap_options = c("striped", "hover", "condensed", "responsive")) ``` ## ST59 ```{r, echo=FALSE, warning=FALSE} # Extract data ST59_data <- df["ST59", ] # Define sample groups based on naming convention (admission (.1) and discharge (.3)) admission_samples <- names(ST59_data)[grepl("\\.1$", names(ST59_data))] discharge_samples <- names(ST59_data)[grepl("\\.3$", names(ST59_data))] # Extract relative abundances for ST59 at admission and discharge before_treatment <- ST59_data[admission_samples] after_treatment <- ST59_data[discharge_samples] # Ensure the treatment data is numeric before_treatment <- as.numeric(before_treatment) after_treatment <- as.numeric(after_treatment) result1 <- wilcox.test(before_treatment, after_treatment, paired = TRUE) # Creating a new tibble for the Wilcoxon test result result1_formatted <- tibble( .y. = "ST59 Relative Abundance", # Change this to the appropriate response variable name group1 = "admission", group2 = "discharge", p = result1$p.value, p.signif = symnum(result1$p.value, corr = FALSE, na = FALSE, cutpoints = c(0, 0.001, 0.01, 0.05, 0.1, 1), symbols = c("***", "**", "*", ".", "ns")), method = "Wilcoxon signed-rank test" ) # Print the formatted result knitr::kable(result1_formatted) %>% kable_styling(bootstrap_options = c("striped", "hover", "condensed", "responsive")) ``` ## ST60 ```{r, echo=FALSE, warning=FALSE} # Extract data ST60_data <- df["ST60", ] # Define sample groups based on naming convention (admission (.1) and discharge (.3)) admission_samples <- names(ST60_data)[grepl("\\.1$", names(ST60_data))] discharge_samples <- names(ST60_data)[grepl("\\.3$", names(ST60_data))] # Extract relative abundances for ST60 at admission and discharge before_treatment <- ST60_data[admission_samples] after_treatment <- ST60_data[discharge_samples] # Ensure the treatment data is numeric before_treatment <- as.numeric(before_treatment) after_treatment <- as.numeric(after_treatment) result1 <- wilcox.test(before_treatment, after_treatment, paired = TRUE) # Creating a new tibble for the Wilcoxon test result result1_formatted <- tibble( .y. = "ST60 Relative Abundance", # Change this to the appropriate response variable name group1 = "admission", group2 = "discharge", p = result1$p.value, p.signif = symnum(result1$p.value, corr = FALSE, na = FALSE, cutpoints = c(0, 0.001, 0.01, 0.05, 0.1, 1), symbols = c("***", "**", "*", ".", "ns")), method = "Wilcoxon signed-rank test" ) # Print the formatted result knitr::kable(result1_formatted) %>% kable_styling(bootstrap_options = c("striped", "hover", "condensed", "responsive")) ``` ## ST73 ```{r, echo=FALSE, warning=FALSE} # Extract data ST73_data <- df["ST73", ] # Define sample groups based on naming convention (admission (.1) and discharge (.3)) admission_samples <- names(ST73_data)[grepl("\\.1$", names(ST73_data))] discharge_samples <- names(ST73_data)[grepl("\\.3$", names(ST73_data))] # Extract relative abundances for ST73 at admission and discharge before_treatment <- ST73_data[admission_samples] after_treatment <- ST73_data[discharge_samples] # Ensure the treatment data is numeric before_treatment <- as.numeric(before_treatment) after_treatment <- as.numeric(after_treatment) result1 <- wilcox.test(before_treatment, after_treatment, paired = TRUE) # Creating a new tibble for the Wilcoxon test result result1_formatted <- tibble( .y. = "ST73 Relative Abundance", # Change this to the appropriate response variable name group1 = "admission", group2 = "discharge", p = result1$p.value, p.signif = symnum(result1$p.value, corr = FALSE, na = FALSE, cutpoints = c(0, 0.001, 0.01, 0.05, 0.1, 1), symbols = c("***", "**", "*", ".", "ns")), method = "Wilcoxon signed-rank test" ) # Print the formatted result knitr::kable(result1_formatted) %>% kable_styling(bootstrap_options = c("striped", "hover", "condensed", "responsive")) ``` ## ST215 ```{r, echo=FALSE, warning=FALSE} # Extract data ST215_data <- df["ST215", ] # Define sample groups based on naming convention (admission (.1) and discharge (.3)) admission_samples <- names(ST215_data)[grepl("\\.1$", names(ST215_data))] discharge_samples <- names(ST215_data)[grepl("\\.3$", names(ST215_data))] # Extract relative abundances for ST215 at admission and discharge before_treatment <- ST215_data[admission_samples] after_treatment <- ST215_data[discharge_samples] # Ensure the treatment data is numeric before_treatment <- as.numeric(before_treatment) after_treatment <- as.numeric(after_treatment) result1 <- wilcox.test(before_treatment, after_treatment, paired = TRUE) # Creating a new tibble for the Wilcoxon test result result1_formatted <- tibble( .y. = "ST215 Relative Abundance", # Change this to the appropriate response variable name group1 = "admission", group2 = "discharge", p = result1$p.value, p.signif = symnum(result1$p.value, corr = FALSE, na = FALSE, cutpoints = c(0, 0.001, 0.01, 0.05, 0.1, 1), symbols = c("***", "**", "*", ".", "ns")), method = "Wilcoxon signed-rank test" ) # Print the formatted result knitr::kable(result1_formatted) %>% kable_styling(bootstrap_options = c("striped", "hover", "condensed", "responsive")) ``` ## ST5+ST87+ST130+ST210+ST384 ```{r, echo=FALSE, warning=FALSE} # Extract data for the specified subtypes selected_taxa <- df[c("ST5", "ST87", "ST130", "ST210", "ST384"), ] # Calculate the combined relative abundances for the selected taxa combined_data <- colSums(selected_taxa, na.rm = TRUE) # Define sample groups based on naming convention (admission (.1) and discharge (.3)) admission_samples <- names(combined_data)[grepl("\\.1$", names(combined_data))] discharge_samples <- names(combined_data)[grepl("\\.3$", names(combined_data))] # Extract combined relative abundances for the selected taxa at admission and discharge before_treatment <- combined_data[admission_samples] after_treatment <- combined_data[discharge_samples] # Ensure the treatment data is numeric before_treatment <- as.numeric(before_treatment) after_treatment <- as.numeric(after_treatment) # Perform the Wilcoxon signed-rank test result1 <- wilcox.test(before_treatment, after_treatment, paired = TRUE) # Creating a new tibble for the Wilcoxon test result result1_formatted <- tibble( .y. = "Combined ST5+ST87+ST130+ST210+ST384 Relative Abundance", group1 = "admission", group2 = "discharge", p = result1$p.value, p.signif = symnum(result1$p.value, corr = FALSE, na = FALSE, cutpoints = c(0, 0.001, 0.01, 0.05, 0.1, 1), symbols = c("***", "**", "*", ".", "ns")), method = "Wilcoxon signed-rank test" ) # Print the formatted result using knitr and kableExtra for a nice table format in R Markdown knitr::kable(result1_formatted) %>% kableExtra::kable_styling(bootstrap_options = c("striped", "hover", "condensed", "responsive")) ``` ## ST5 ```{r, echo=FALSE, warning=FALSE} # Extract data ST5_data <- df["ST5", ] # Define sample groups based on naming convention (admission (.1) and discharge (.3)) admission_samples <- names(ST5_data)[grepl("\\.1$", names(ST5_data))] discharge_samples <- names(ST5_data)[grepl("\\.3$", names(ST5_data))] # Extract relative abundances for ST5 at admission and discharge before_treatment <- ST5_data[admission_samples] after_treatment <- ST5_data[discharge_samples] # Ensure the treatment data is numeric before_treatment <- as.numeric(before_treatment) after_treatment <- as.numeric(after_treatment) result1 <- wilcox.test(before_treatment, after_treatment, paired = TRUE) # Creating a new tibble for the Wilcoxon test result result1_formatted <- tibble( .y. = "ST5 Relative Abundance", # Change this to the appropriate response variable name group1 = "admission", group2 = "discharge", p = result1$p.value, p.signif = symnum(result1$p.value, corr = FALSE, na = FALSE, cutpoints = c(0, 0.001, 0.01, 0.05, 0.1, 1), symbols = c("***", "**", "*", ".", "ns")), method = "Wilcoxon signed-rank test" ) # Print the formatted result knitr::kable(result1_formatted) %>% kable_styling(bootstrap_options = c("striped", "hover", "condensed", "responsive")) ``` ## ST130 ```{r, echo=FALSE, warning=FALSE} # Extract data ST130_data <- df["ST130", ] # Define sample groups based on naming convention (admission (.1) and discharge (.3)) admission_samples <- names(ST130_data)[grepl("\\.1$", names(ST130_data))] discharge_samples <- names(ST130_data)[grepl("\\.3$", names(ST130_data))] # Extract relative abundances for ST130 at admission and discharge before_treatment <- ST130_data[admission_samples] after_treatment <- ST130_data[discharge_samples] # Ensure the treatment data is numeric before_treatment <- as.numeric(before_treatment) after_treatment <- as.numeric(after_treatment) result1 <- wilcox.test(before_treatment, after_treatment, paired = TRUE) # Creating a new tibble for the Wilcoxon test result result1_formatted <- tibble( .y. = "ST130 Relative Abundance", # Change this to the appropriate response variable name group1 = "admission", group2 = "discharge", p = result1$p.value, p.signif = symnum(result1$p.value, corr = FALSE, na = FALSE, cutpoints = c(0, 0.001, 0.01, 0.05, 0.1, 1), symbols = c("***", "**", "*", ".", "ns")), method = "Wilcoxon signed-rank test" ) # Print the formatted result knitr::kable(result1_formatted) %>% kable_styling(bootstrap_options = c("striped", "hover", "condensed", "responsive")) ``` ## ST2+ST19+ST100 ```{r, echo=TRUE, warning=FALSE} # Extract data for the specified subtypes selected_taxa <- df[c("ST2", "ST19", "ST100"), ] # Calculate the combined relative abundances for the selected taxa combined_data <- colSums(selected_taxa, na.rm = TRUE) # Define sample groups based on naming convention (admission (.1) and discharge (.3)) admission_samples <- names(combined_data)[grepl("\\.1$", names(combined_data))] discharge_samples <- names(combined_data)[grepl("\\.3$", names(combined_data))] # Extract combined relative abundances for the selected taxa at admission and discharge before_treatment <- combined_data[admission_samples] after_treatment <- combined_data[discharge_samples] # Ensure the treatment data is numeric before_treatment <- as.numeric(before_treatment) after_treatment <- as.numeric(after_treatment) print(before_treatment) print(mean(before_treatment)) print(after_treatment) print(mean(after_treatment)) # Perform the Wilcoxon signed-rank test result1 <- wilcox.test(before_treatment, after_treatment, paired = TRUE) # Creating a new tibble for the Wilcoxon test result result1_formatted <- tibble( .y. = "Combined ST2+ST19+ST100 Relative Abundance", group1 = "admission", group2 = "discharge", p = result1$p.value, p.signif = symnum(result1$p.value, corr = FALSE, na = FALSE, cutpoints = c(0, 0.001, 0.01, 0.05, 0.1, 1), symbols = c("***", "**", "*", ".", "ns")), method = "Wilcoxon signed-rank test" ) # Print the formatted result using knitr and kableExtra for a nice table format in R Markdown knitr::kable(result1_formatted) %>% kableExtra::kable_styling(bootstrap_options = c("striped", "hover", "condensed", "responsive")) ``` ## ST2 ```{r, echo=TRUE, warning=FALSE} # Extract data ST2_data <- df["ST2", ] # Define sample groups based on naming convention (admission (.1) and discharge (.3)) admission_samples <- names(ST2_data)[grepl("\\.1$", names(ST2_data))] discharge_samples <- names(ST2_data)[grepl("\\.3$", names(ST2_data))] # Extract relative abundances for ST2 at admission and discharge before_treatment <- ST2_data[admission_samples] after_treatment <- ST2_data[discharge_samples] # Ensure the treatment data is numeric before_treatment <- as.numeric(before_treatment) after_treatment <- as.numeric(after_treatment) print(before_treatment) print(mean(before_treatment)) print(after_treatment) print(mean(after_treatment)) result1 <- wilcox.test(before_treatment, after_treatment, paired = TRUE) # Creating a new tibble for the Wilcoxon test result result1_formatted <- tibble( .y. = "ST2 Relative Abundance", group1 = "admission", group2 = "discharge", p = result1$p.value, p.signif = symnum(result1$p.value, corr = FALSE, na = FALSE, cutpoints = c(0, 0.001, 0.01, 0.05, 0.1, 1), symbols = c("***", "**", "*", ".", "ns")), method = "Wilcoxon signed-rank test" ) # Print the formatted result knitr::kable(result1_formatted) %>% kable_styling(bootstrap_options = c("striped", "hover", "condensed", "responsive")) ``` ## ST19 ```{r, echo=TRUE, warning=FALSE} # Extract data ST19_data <- df["ST19", ] # Define sample groups based on naming convention (admission (.1) and discharge (.3)) admission_samples <- names(ST19_data)[grepl("\\.1$", names(ST19_data))] discharge_samples <- names(ST19_data)[grepl("\\.3$", names(ST19_data))] # Extract relative abundances for ST19 at admission and discharge before_treatment <- ST19_data[admission_samples] after_treatment <- ST19_data[discharge_samples] # Ensure the treatment data is numeric before_treatment <- as.numeric(before_treatment) after_treatment <- as.numeric(after_treatment) print(before_treatment) print(mean(before_treatment)) print(after_treatment) print(mean(after_treatment)) result1 <- wilcox.test(before_treatment, after_treatment, paired = TRUE) # Creating a new tibble for the Wilcoxon test result result1_formatted <- tibble( .y. = "ST19 Relative Abundance", # Change this to the appropriate response variable name group1 = "admission", group2 = "discharge", p = result1$p.value, p.signif = symnum(result1$p.value, corr = FALSE, na = FALSE, cutpoints = c(0, 0.001, 0.01, 0.05, 0.1, 1), symbols = c("***", "**", "*", ".", "ns")), method = "Wilcoxon signed-rank test" ) # Print the formatted result knitr::kable(result1_formatted) %>% kable_styling(bootstrap_options = c("striped", "hover", "condensed", "responsive")) ``` ## ST100 ```{r, echo=TRUE, warning=FALSE} # Extract data ST100_data <- df["ST100", ] # Define sample groups based on naming convention (admission (.1) and discharge (.3)) admission_samples <- names(ST100_data)[grepl("\\.1$", names(ST100_data))] discharge_samples <- names(ST100_data)[grepl("\\.3$", names(ST100_data))] # Extract relative abundances for ST100 at admission and discharge before_treatment <- ST100_data[admission_samples] after_treatment <- ST100_data[discharge_samples] # Ensure the treatment data is numeric before_treatment <- as.numeric(before_treatment) after_treatment <- as.numeric(after_treatment) print(before_treatment) print(mean(before_treatment)) print(after_treatment) print(mean(after_treatment)) result1 <- wilcox.test(before_treatment, after_treatment, paired = TRUE) # Creating a new tibble for the Wilcoxon test result result1_formatted <- tibble( .y. = "ST100 Relative Abundance", # Change this to the appropriate response variable name group1 = "admission", group2 = "discharge", p = result1$p.value, p.signif = symnum(result1$p.value, corr = FALSE, na = FALSE, cutpoints = c(0, 0.001, 0.01, 0.05, 0.1, 1), symbols = c("***", "**", "*", ".", "ns")), method = "Wilcoxon signed-rank test" ) # Print the formatted result knitr::kable(result1_formatted) %>% kable_styling(bootstrap_options = c("striped", "hover", "condensed", "responsive")) ``` ## ST5+ST87+ST130+ST210+ST384+ST2+ST19+ST100 ```{r, echo=FALSE, warning=FALSE} # Extract data for the specified subtypes selected_taxa <- df[c("ST5", "ST87", "ST130", "ST210", "ST384", "ST2", "ST19", "ST100"), ] # Calculate the combined relative abundances for the selected taxa combined_data <- colSums(selected_taxa, na.rm = TRUE) # Define sample groups based on naming convention (admission (.1) and discharge (.3)) admission_samples <- names(combined_data)[grepl("\\.1$", names(combined_data))] discharge_samples <- names(combined_data)[grepl("\\.3$", names(combined_data))] # Extract combined relative abundances for the selected taxa at admission and discharge before_treatment <- combined_data[admission_samples] after_treatment <- combined_data[discharge_samples] # Ensure the treatment data is numeric before_treatment <- as.numeric(before_treatment) after_treatment <- as.numeric(after_treatment) # Perform the Wilcoxon signed-rank test result1 <- wilcox.test(before_treatment, after_treatment, paired = TRUE) # Creating a new tibble for the Wilcoxon test result result1_formatted <- tibble( .y. = "Combined ST5+ST87+ST130+ST210+ST384+ST2+ST19+ST100 Relative Abundance", group1 = "admission", group2 = "discharge", p.format = sprintf("%.3f", result1$p.value), p.signif = symnum(result1$p.value, corr = FALSE, na = FALSE, cutpoints = c(0, 0.001, 0.01, 0.05, 0.1, 1), symbols = c("***", "**", "*", ".", "ns")), method = "Wilcoxon signed-rank test" ) # Print the formatted result using knitr and kableExtra for a nice table format in R Markdown knitr::kable(result1_formatted) %>% kableExtra::kable_styling(bootstrap_options = c("striped", "hover", "condensed", "responsive")) ``` # Statistical tests for the abundance of a specific subtype between admission and discharge in H* samples (g216 & yycH) ```{r, echo=FALSE, warning=FALSE} otu_data <- otu_table(ps_rel) df_ <- as.data.frame(otu_data) df <- df_[, grep("^H", names(df_))] ``` ## ST8 ```{r, echo=FALSE, warning=FALSE} # Extract data ST8_data <- df["ST8", ] # Define sample groups based on naming convention (admission (.1) and discharge (.3)) admission_samples <- names(ST8_data)[grepl("\\.1$", names(ST8_data))] discharge_samples <- names(ST8_data)[grepl("\\.3$", names(ST8_data))] # Extract relative abundances for ST8 at admission and discharge before_treatment <- ST8_data[admission_samples] after_treatment <- ST8_data[discharge_samples] # Ensure the treatment data is numeric before_treatment <- as.numeric(before_treatment) after_treatment <- as.numeric(after_treatment) result1 <- wilcox.test(before_treatment, after_treatment, paired = TRUE) #print(result1) # Creating a new tibble for the Wilcoxon test result #p.adj = result1$p.value, # Adjusted p-value, same as raw p-value in this context #p.format = sprintf("%.3f", result1$p.value), # Formatted p-value result1_formatted <- tibble( .y. = "ST8 Relative Abundance", # Change this to the appropriate response variable name group1 = "admission", group2 = "discharge", p = result1$p.value, p.signif = symnum(result1$p.value, corr = FALSE, na = FALSE, cutpoints = c(0, 0.001, 0.01, 0.05, 0.1, 1), symbols = c("***", "**", "*", ".", "ns")), method = "Wilcoxon signed-rank test" ) # Print the formatted result #print(result1_formatted) knitr::kable(result1_formatted) %>% kable_styling(bootstrap_options = c("striped", "hover", "condensed", "responsive")) ``` ## ST59 ```{r, echo=FALSE, warning=FALSE} # Extract data ST59_data <- df["ST59", ] # Define sample groups based on naming convention (admission (.1) and discharge (.3)) admission_samples <- names(ST59_data)[grepl("\\.1$", names(ST59_data))] discharge_samples <- names(ST59_data)[grepl("\\.3$", names(ST59_data))] # Extract relative abundances for ST59 at admission and discharge before_treatment <- ST59_data[admission_samples] after_treatment <- ST59_data[discharge_samples] # Ensure the treatment data is numeric before_treatment <- as.numeric(before_treatment) after_treatment <- as.numeric(after_treatment) result1 <- wilcox.test(before_treatment, after_treatment, paired = TRUE) # Creating a new tibble for the Wilcoxon test result result1_formatted <- tibble( .y. = "ST59 Relative Abundance", # Change this to the appropriate response variable name group1 = "admission", group2 = "discharge", p = result1$p.value, p.signif = symnum(result1$p.value, corr = FALSE, na = FALSE, cutpoints = c(0, 0.001, 0.01, 0.05, 0.1, 1), symbols = c("***", "**", "*", ".", "ns")), method = "Wilcoxon signed-rank test" ) # Print the formatted result knitr::kable(result1_formatted) %>% kable_styling(bootstrap_options = c("striped", "hover", "condensed", "responsive")) ``` ## ST60 ```{r, echo=FALSE, warning=FALSE} # Extract data ST60_data <- df["ST60", ] # Define sample groups based on naming convention (admission (.1) and discharge (.3)) admission_samples <- names(ST60_data)[grepl("\\.1$", names(ST60_data))] discharge_samples <- names(ST60_data)[grepl("\\.3$", names(ST60_data))] # Extract relative abundances for ST60 at admission and discharge before_treatment <- ST60_data[admission_samples] after_treatment <- ST60_data[discharge_samples] # Ensure the treatment data is numeric before_treatment <- as.numeric(before_treatment) after_treatment <- as.numeric(after_treatment) result1 <- wilcox.test(before_treatment, after_treatment, paired = TRUE) # Creating a new tibble for the Wilcoxon test result result1_formatted <- tibble( .y. = "ST60 Relative Abundance", # Change this to the appropriate response variable name group1 = "admission", group2 = "discharge", p = result1$p.value, p.signif = symnum(result1$p.value, corr = FALSE, na = FALSE, cutpoints = c(0, 0.001, 0.01, 0.05, 0.1, 1), symbols = c("***", "**", "*", ".", "ns")), method = "Wilcoxon signed-rank test" ) # Print the formatted result knitr::kable(result1_formatted) %>% kable_styling(bootstrap_options = c("striped", "hover", "condensed", "responsive")) ``` ## ST73 ```{r, echo=FALSE, warning=FALSE} # Extract data ST73_data <- df["ST73", ] # Define sample groups based on naming convention (admission (.1) and discharge (.3)) admission_samples <- names(ST73_data)[grepl("\\.1$", names(ST73_data))] discharge_samples <- names(ST73_data)[grepl("\\.3$", names(ST73_data))] # Extract relative abundances for ST73 at admission and discharge before_treatment <- ST73_data[admission_samples] after_treatment <- ST73_data[discharge_samples] # Ensure the treatment data is numeric before_treatment <- as.numeric(before_treatment) after_treatment <- as.numeric(after_treatment) result1 <- wilcox.test(before_treatment, after_treatment, paired = TRUE) # Creating a new tibble for the Wilcoxon test result result1_formatted <- tibble( .y. = "ST73 Relative Abundance", # Change this to the appropriate response variable name group1 = "admission", group2 = "discharge", p = result1$p.value, p.signif = symnum(result1$p.value, corr = FALSE, na = FALSE, cutpoints = c(0, 0.001, 0.01, 0.05, 0.1, 1), symbols = c("***", "**", "*", ".", "ns")), method = "Wilcoxon signed-rank test" ) # Print the formatted result knitr::kable(result1_formatted) %>% kable_styling(bootstrap_options = c("striped", "hover", "condensed", "responsive")) ``` ## ST215 ```{r, echo=FALSE, warning=FALSE} # Extract data ST215_data <- df["ST215", ] # Define sample groups based on naming convention (admission (.1) and discharge (.3)) admission_samples <- names(ST215_data)[grepl("\\.1$", names(ST215_data))] discharge_samples <- names(ST215_data)[grepl("\\.3$", names(ST215_data))] # Extract relative abundances for ST215 at admission and discharge before_treatment <- ST215_data[admission_samples] after_treatment <- ST215_data[discharge_samples] # Ensure the treatment data is numeric before_treatment <- as.numeric(before_treatment) after_treatment <- as.numeric(after_treatment) result1 <- wilcox.test(before_treatment, after_treatment, paired = TRUE) # Creating a new tibble for the Wilcoxon test result result1_formatted <- tibble( .y. = "ST215 Relative Abundance", # Change this to the appropriate response variable name group1 = "admission", group2 = "discharge", p = result1$p.value, p.signif = symnum(result1$p.value, corr = FALSE, na = FALSE, cutpoints = c(0, 0.001, 0.01, 0.05, 0.1, 1), symbols = c("***", "**", "*", ".", "ns")), method = "Wilcoxon signed-rank test" ) # Print the formatted result knitr::kable(result1_formatted) %>% kable_styling(bootstrap_options = c("striped", "hover", "condensed", "responsive")) ``` ## ST5+ST87+ST130+ST210+ST384 ```{r, echo=FALSE, warning=FALSE} # Extract data for the specified subtypes selected_taxa <- df[c("ST5", "ST87", "ST130", "ST210", "ST384"), ] # Calculate the combined relative abundances for the selected taxa combined_data <- colSums(selected_taxa, na.rm = TRUE) # Define sample groups based on naming convention (admission (.1) and discharge (.3)) admission_samples <- names(combined_data)[grepl("\\.1$", names(combined_data))] discharge_samples <- names(combined_data)[grepl("\\.3$", names(combined_data))] # Extract combined relative abundances for the selected taxa at admission and discharge before_treatment <- combined_data[admission_samples] after_treatment <- combined_data[discharge_samples] # Ensure the treatment data is numeric before_treatment <- as.numeric(before_treatment) after_treatment <- as.numeric(after_treatment) # Perform the Wilcoxon signed-rank test result1 <- wilcox.test(before_treatment, after_treatment, paired = TRUE) # Creating a new tibble for the Wilcoxon test result result1_formatted <- tibble( .y. = "Combined ST5+ST87+ST130+ST210+ST384 Relative Abundance", group1 = "admission", group2 = "discharge", p = result1$p.value, p.signif = symnum(result1$p.value, corr = FALSE, na = FALSE, cutpoints = c(0, 0.001, 0.01, 0.05, 0.1, 1), symbols = c("***", "**", "*", ".", "ns")), method = "Wilcoxon signed-rank test" ) # Print the formatted result using knitr and kableExtra for a nice table format in R Markdown knitr::kable(result1_formatted) %>% kableExtra::kable_styling(bootstrap_options = c("striped", "hover", "condensed", "responsive")) ``` ## ST5 ```{r, echo=FALSE, warning=FALSE} # Extract data ST5_data <- df["ST5", ] # Define sample groups based on naming convention (admission (.1) and discharge (.3)) admission_samples <- names(ST5_data)[grepl("\\.1$", names(ST5_data))] discharge_samples <- names(ST5_data)[grepl("\\.3$", names(ST5_data))] # Extract relative abundances for ST5 at admission and discharge before_treatment <- ST5_data[admission_samples] after_treatment <- ST5_data[discharge_samples] # Ensure the treatment data is numeric before_treatment <- as.numeric(before_treatment) after_treatment <- as.numeric(after_treatment) result1 <- wilcox.test(before_treatment, after_treatment, paired = TRUE) # Creating a new tibble for the Wilcoxon test result result1_formatted <- tibble( .y. = "ST5 Relative Abundance", # Change this to the appropriate response variable name group1 = "admission", group2 = "discharge", p = result1$p.value, p.signif = symnum(result1$p.value, corr = FALSE, na = FALSE, cutpoints = c(0, 0.001, 0.01, 0.05, 0.1, 1), symbols = c("***", "**", "*", ".", "ns")), method = "Wilcoxon signed-rank test" ) # Print the formatted result knitr::kable(result1_formatted) %>% kable_styling(bootstrap_options = c("striped", "hover", "condensed", "responsive")) ``` ## ST130 ```{r, echo=FALSE, warning=FALSE} # Extract data ST130_data <- df["ST130", ] # Define sample groups based on naming convention (admission (.1) and discharge (.3)) admission_samples <- names(ST130_data)[grepl("\\.1$", names(ST130_data))] discharge_samples <- names(ST130_data)[grepl("\\.3$", names(ST130_data))] # Extract relative abundances for ST130 at admission and discharge before_treatment <- ST130_data[admission_samples] after_treatment <- ST130_data[discharge_samples] # Ensure the treatment data is numeric before_treatment <- as.numeric(before_treatment) after_treatment <- as.numeric(after_treatment) result1 <- wilcox.test(before_treatment, after_treatment, paired = TRUE) # Creating a new tibble for the Wilcoxon test result result1_formatted <- tibble( .y. = "ST130 Relative Abundance", # Change this to the appropriate response variable name group1 = "admission", group2 = "discharge", p = result1$p.value, p.signif = symnum(result1$p.value, corr = FALSE, na = FALSE, cutpoints = c(0, 0.001, 0.01, 0.05, 0.1, 1), symbols = c("***", "**", "*", ".", "ns")), method = "Wilcoxon signed-rank test" ) # Print the formatted result knitr::kable(result1_formatted) %>% kable_styling(bootstrap_options = c("striped", "hover", "condensed", "responsive")) ``` ## ST2+ST19+ST100 ```{r, echo=TRUE, warning=FALSE} # Extract data for the specified subtypes selected_taxa <- df[c("ST2", "ST19", "ST100"), ] # Calculate the combined relative abundances for the selected taxa combined_data <- colSums(selected_taxa, na.rm = TRUE) # Define sample groups based on naming convention (admission (.1) and discharge (.3)) admission_samples <- names(combined_data)[grepl("\\.1$", names(combined_data))] discharge_samples <- names(combined_data)[grepl("\\.3$", names(combined_data))] # Extract combined relative abundances for the selected taxa at admission and discharge before_treatment <- combined_data[admission_samples] after_treatment <- combined_data[discharge_samples] # Ensure the treatment data is numeric before_treatment <- as.numeric(before_treatment) after_treatment <- as.numeric(after_treatment) print(before_treatment) print(mean(before_treatment)) print(after_treatment) print(mean(after_treatment)) # Perform the Wilcoxon signed-rank test result1 <- wilcox.test(before_treatment, after_treatment, paired = TRUE) # Creating a new tibble for the Wilcoxon test result result1_formatted <- tibble( .y. = "Combined ST2+ST19+ST100 Relative Abundance", group1 = "admission", group2 = "discharge", p = result1$p.value, p.signif = symnum(result1$p.value, corr = FALSE, na = FALSE, cutpoints = c(0, 0.001, 0.01, 0.05, 0.1, 1), symbols = c("***", "**", "*", ".", "ns")), method = "Wilcoxon signed-rank test" ) # Print the formatted result using knitr and kableExtra for a nice table format in R Markdown knitr::kable(result1_formatted) %>% kableExtra::kable_styling(bootstrap_options = c("striped", "hover", "condensed", "responsive")) ``` ## ST2 ```{r, echo=TRUE, warning=FALSE} # Extract data ST2_data <- df["ST2", ] # Define sample groups based on naming convention (admission (.1) and discharge (.3)) admission_samples <- names(ST2_data)[grepl("\\.1$", names(ST2_data))] discharge_samples <- names(ST2_data)[grepl("\\.3$", names(ST2_data))] # Extract relative abundances for ST2 at admission and discharge before_treatment <- ST2_data[admission_samples] after_treatment <- ST2_data[discharge_samples] # Ensure the treatment data is numeric before_treatment <- as.numeric(before_treatment) after_treatment <- as.numeric(after_treatment) print(before_treatment) print(mean(before_treatment)) print(after_treatment) print(mean(after_treatment)) result1 <- wilcox.test(before_treatment, after_treatment, paired = TRUE) # Creating a new tibble for the Wilcoxon test result result1_formatted <- tibble( .y. = "ST2 Relative Abundance", # Change this to the appropriate response variable name group1 = "admission", group2 = "discharge", p = result1$p.value, p.signif = symnum(result1$p.value, corr = FALSE, na = FALSE, cutpoints = c(0, 0.001, 0.01, 0.05, 0.1, 1), symbols = c("***", "**", "*", ".", "ns")), method = "Wilcoxon signed-rank test" ) # Print the formatted result knitr::kable(result1_formatted) %>% kable_styling(bootstrap_options = c("striped", "hover", "condensed", "responsive")) ``` ## ST19 ```{r, echo=TRUE, warning=FALSE} # Extract data ST19_data <- df["ST19", ] # Define sample groups based on naming convention (admission (.1) and discharge (.3)) admission_samples <- names(ST19_data)[grepl("\\.1$", names(ST19_data))] discharge_samples <- names(ST19_data)[grepl("\\.3$", names(ST19_data))] # Extract relative abundances for ST19 at admission and discharge before_treatment <- ST19_data[admission_samples] after_treatment <- ST19_data[discharge_samples] # Ensure the treatment data is numeric before_treatment <- as.numeric(before_treatment) after_treatment <- as.numeric(after_treatment) print(before_treatment) print(mean(before_treatment)) print(after_treatment) print(mean(after_treatment)) result1 <- wilcox.test(before_treatment, after_treatment, paired = TRUE) # Creating a new tibble for the Wilcoxon test result result1_formatted <- tibble( .y. = "ST19 Relative Abundance", # Change this to the appropriate response variable name group1 = "admission", group2 = "discharge", p = result1$p.value, p.signif = symnum(result1$p.value, corr = FALSE, na = FALSE, cutpoints = c(0, 0.001, 0.01, 0.05, 0.1, 1), symbols = c("***", "**", "*", ".", "ns")), method = "Wilcoxon signed-rank test" ) # Print the formatted result knitr::kable(result1_formatted) %>% kable_styling(bootstrap_options = c("striped", "hover", "condensed", "responsive")) ``` ## ST100 ```{r, echo=TRUE, warning=FALSE} # Extract data ST100_data <- df["ST100", ] # Define sample groups based on naming convention (admission (.1) and discharge (.3)) admission_samples <- names(ST100_data)[grepl("\\.1$", names(ST100_data))] discharge_samples <- names(ST100_data)[grepl("\\.3$", names(ST100_data))] # Extract relative abundances for ST100 at admission and discharge before_treatment <- ST100_data[admission_samples] after_treatment <- ST100_data[discharge_samples] # Ensure the treatment data is numeric before_treatment <- as.numeric(before_treatment) after_treatment <- as.numeric(after_treatment) print(before_treatment) print(mean(before_treatment)) print(after_treatment) print(mean(after_treatment)) result1 <- wilcox.test(before_treatment, after_treatment, paired = TRUE) # Creating a new tibble for the Wilcoxon test result result1_formatted <- tibble( .y. = "ST100 Relative Abundance", # Change this to the appropriate response variable name group1 = "admission", group2 = "discharge", p = result1$p.value, p.signif = symnum(result1$p.value, corr = FALSE, na = FALSE, cutpoints = c(0, 0.001, 0.01, 0.05, 0.1, 1), symbols = c("***", "**", "*", ".", "ns")), method = "Wilcoxon signed-rank test" ) # Print the formatted result knitr::kable(result1_formatted) %>% kable_styling(bootstrap_options = c("striped", "hover", "condensed", "responsive")) ``` ## ST5+ST87+ST130+ST210+ST384+ST2+ST19+ST100 ```{r, echo=FALSE, warning=FALSE} # Extract data for the specified subtypes selected_taxa <- df[c("ST5", "ST87", "ST130", "ST210", "ST384", "ST2", "ST19", "ST100"), ] # Calculate the combined relative abundances for the selected taxa combined_data <- colSums(selected_taxa, na.rm = TRUE) # Define sample groups based on naming convention (admission (.1) and discharge (.3)) admission_samples <- names(combined_data)[grepl("\\.1$", names(combined_data))] discharge_samples <- names(combined_data)[grepl("\\.3$", names(combined_data))] # Extract combined relative abundances for the selected taxa at admission and discharge before_treatment <- combined_data[admission_samples] after_treatment <- combined_data[discharge_samples] # Ensure the treatment data is numeric before_treatment <- as.numeric(before_treatment) after_treatment <- as.numeric(after_treatment) # Perform the Wilcoxon signed-rank test result1 <- wilcox.test(before_treatment, after_treatment, paired = TRUE) # Creating a new tibble for the Wilcoxon test result result1_formatted <- tibble( .y. = "Combined ST5+ST87+ST130+ST210+ST384+ST2+ST19+ST100 Relative Abundance", group1 = "admission", group2 = "discharge", p.format = sprintf("%.3f", result1$p.value), p.signif = symnum(result1$p.value, corr = FALSE, na = FALSE, cutpoints = c(0, 0.001, 0.01, 0.05, 0.1, 1), symbols = c("***", "**", "*", ".", "ns")), method = "Wilcoxon signed-rank test" ) # Print the formatted result using knitr and kableExtra for a nice table format in R Markdown knitr::kable(result1_formatted) %>% kableExtra::kable_styling(bootstrap_options = c("striped", "hover", "condensed", "responsive")) ``` # goeBURST DLV-Analysis (g216 & yycH) ```{r echo=FALSE, out.width='100%'} knitr::include_graphics("figures/goeBURST.png") ```