MicrobiotaProcess_PCA_Group3-4.R processing Data_Karoline_16S_2025

# https://bioconductor.org/packages/release/bioc/vignettes/MicrobiotaProcess/inst/doc//MicrobiotaProcess.html

# -----------------------------------
# ---- prepare the R environment ----
#Rscript MicrobiotaProcess.R
#NOTE: exit R script, then login again R-environment; rm -rf Phyloseq*_cache
#mkdir figures
#rmarkdown::render('Phyloseq.Rmd',output_file='Phyloseq.html')
#source("MicrobiotaProcess_Group3_vs_Group4.R")

# with #alpha = 2.0, running the following script further!

# -----------------------------
# ---- 3.1. bridges other tools
##https://github.com/YuLab-SMU/MicrobiotaProcess
##https://www.bioconductor.org/packages/release/bioc/vignettes/MicrobiotaProcess/inst/doc/MicrobiotaProcess.html
##https://chiliubio.github.io/microeco_tutorial/intro.html#framework
##https://yiluheihei.github.io/microbiomeMarker/reference/plot_cladogram.html
#BiocManager::install("MicrobiotaProcess")
#install.packages("microeco")
#install.packages("ggalluvial")
#install.packages("ggh4x")

library(MicrobiotaProcess)
library(microeco)
library(ggalluvial)
library(ggh4x)
library(gghalves)

## Convert the phyloseq object to a MicrobiotaProcess object
#mp <- as.MicrobiotaProcess(ps.ng.tax)

#mt <- phyloseq2microeco(ps.ng.tax) #--> ERROR
#abundance_table <- mt$abun_table
#taxonomy_table <- mt$tax_table

#ps.ng.tax_abund <- phyloseq::filter_taxa(ps.ng.tax, function(x) sum(x > total*0.01) > 0, TRUE)
#ps.ng.tax_most = phyloseq::filter_taxa(ps.ng.tax_rel, function(x) mean(x) > 0.001, TRUE)

##OPTION1 (NOT_USED): take all samples, prepare ps.ng.tax_abund --> mpse_abund
##mpse <- ps.ng.tax %>% as.MPSE()
#mpse_abund <- ps.ng.tax_abund %>% as.MPSE()

##OPTION2 (USED!): take partial samples, prepare ps.ng.tax or ps.ng.tax_abund (2 replacements!)--> ps.ng.tax_sel --> mpse_abund
ps.ng.tax_sel <- ps.ng.tax_abund

##otu_table(ps.ng.tax_sel) <- otu_table(ps.ng.tax)[,c("1","2","5","6","7",  "15","16","17","18","19","20",  "29","30","31","32",  "40","41","42","43","44","46")]
##NOTE: Only choose Group2, Group4, Group6, Group8
#> ps.ng.tax_sel
#otu_table()   OTU Table:         [ 37465 taxa and 29 samples ]
#sample_data() Sample Data:       [ 29 samples by 10 sample variables ]
#tax_table()   Taxonomy Table:    [ 37465 taxa by 7 taxonomic ranks ]
#phy_tree()    Phylogenetic Tree: [ 37465 tips and 37461 internal nodes ]
#-Group4: "21","22","23","24","25","26","27","28",
#-Group8: ,  "47","48","49","50","52","53","55"
otu_table(ps.ng.tax_sel) <- otu_table(ps.ng.tax_abund)[,c("sample-C3","sample-C4","sample-C5","sample-C6","sample-C7",  "sample-E4","sample-E5","sample-E6","sample-E7","sample-E8")]
mpse_abund <- ps.ng.tax_sel %>% as.MPSE()
# A MPSE-tibble (MPSE object) abstraction: 2,352 × 20
# NOTE mpse_abund contains 20 variables: OTU, Sample, Abundance, BarcodeSequence, LinkerPrimerSequence, FileInput, Group,
#   Sex_age

, pre_post_stroke , Conc , Vol_50ng , Vol_PCR , Description , # Domain , Phylum , Class , Order , Family , Genus , Species # ———————————– # —- 3.2. alpha diversity analysis # Rarefied species richness + RareAbundance mpse_abund %% mp_rrarefy() # ‘chunks’ represent the split number of each sample to calculate alpha # diversity, default is 400. e.g. If a sample has total 40000 # reads, if chunks is 400, it will be split to 100 sub-samples # (100, 200, 300,…, 40000), then alpha diversity index was # calculated based on the sub-samples. # ‘.abundance’ the column name of abundance, if the ‘.abundance’ is not be # rarefied calculate rarecurve, user can specific ‘force=TRUE’. mpse_abund %% mp_cal_rarecurve( .abundance = RareAbundance, chunks = 400 ) # The RareAbundanceRarecurve column will be added the colData slot # automatically (default action=”add”) #NOTE mpse_abund contains 22 varibles = 20 varibles + RareAbundance + RareAbundanceRarecurve # default will display the confidence interval around smooth. # se=TRUE # NOTE that two colors #c(“#00A087FF”, “#3C5488FF”) for .group = pre_post_stroke; four colors c(“#1f78b4”, “#33a02c”, “#e31a1c”, “#6a3d9a”) for .group = Group; p1 % mp_plot_rarecurve( .rare = RareAbundanceRarecurve, .alpha = Observe, ) p2 % mp_plot_rarecurve( .rare = RareAbundanceRarecurve, .alpha = Observe, .group = Group ) + scale_color_manual(values=c(“#1f78b4”, “#e31a1c”)) + scale_fill_manual(values=c(“#1f78b4”, “#e31a1c”), guide=”none”) # combine the samples belong to the same groups if plot.group=TRUE p3 % mp_plot_rarecurve( .rare = RareAbundanceRarecurve, .alpha = “Observe”, .group = Group, plot.group = TRUE ) + scale_color_manual(values=c(“#1f78b4”, “#e31a1c”)) + scale_fill_manual(values=c(“#1f78b4”, “#e31a1c”),guide=”none”) png(“rarefaction_of_samples_or_groups.png”, width=1080, height=600) p1 + p2 + p3 dev.off() # —————————————— # 3.3. calculate alpha index and visualization library(ggplot2) library(MicrobiotaProcess) mpse_abund %% mp_cal_alpha(.abundance=RareAbundance) mpse_abund #NOTE mpse_abund contains 28 varibles = 22 varibles + Observe , Chao1 , ACE , Shannon , Simpson , Pielou f1 % mp_plot_alpha( .group=Group, .alpha=c(Observe, Chao1, ACE, Shannon, Simpson, Pielou) ) + scale_fill_manual(values=c(“#1f78b4”, “#e31a1c”), guide=”none”) + scale_color_manual(values=c(“#1f78b4”, “#e31a1c”), guide=”none”) f2 % mp_plot_alpha( .alpha=c(Observe, Chao1, ACE, Shannon, Simpson, Pielou) ) #ps.ng.tax_sel contais only pre samples –> f1 cannot be generated! png(“alpha_diversity_comparison.png”, width=1400, height=600) f1 / f2 dev.off() # ——————————————- # 3.4. The visualization of taxonomy abundance (Class) mpse_abund %% mp_cal_abundance( # for each samples .abundance = RareAbundance ) %>% mp_cal_abundance( # for each groups .abundance=RareAbundance, .group=Group ) mpse_abund #NOTE mpse_abund contains 29 varibles = 28 varibles + RelRareAbundanceBySample # visualize the relative abundance of top 20 phyla for each sample. # .group=time, p1 % mp_plot_abundance( .abundance=RareAbundance, taxa.class = Class, topn = 20, relative = TRUE ) # visualize the abundance (rarefied) of top 20 phyla for each sample. # .group=time, p2 % mp_plot_abundance( .abundance=RareAbundance, taxa.class = Class, topn = 20, relative = FALSE ) png(“relative_abundance_and_abundance.png”, width= 1200, height=600) #NOT PRODUCED! p1 / p2 dev.off() #—- h1 % mp_plot_abundance( .abundance = RareAbundance, .group = Group, taxa.class = Class, relative = TRUE, topn = 20, geom = ‘heatmap’, features.dist = ‘euclidean’, features.hclust = ‘average’, sample.dist = ‘bray’, sample.hclust = ‘average’ ) h2 % mp_plot_abundance( .abundance = RareAbundance, .group = Group, taxa.class = Class, relative = FALSE, topn = 20, geom = ‘heatmap’, features.dist = ‘euclidean’, features.hclust = ‘average’, sample.dist = ‘bray’, sample.hclust = ‘average’ ) # the character (scale or theme) of figure can be adjusted by set_scale_theme # refer to the mp_plot_dist png(“relative_abundance_and_abundance_heatmap.png”, width= 1200, height=600) aplot::plot_list(gglist=list(h1, h2), tag_levels=”A”) dev.off() # visualize the relative abundance of top 20 class for each .group (Group) p3 % mp_plot_abundance( .abundance=RareAbundance, .group=Group, taxa.class = Class, topn = 20, plot.group = TRUE ) # visualize the abundance of top 20 phyla for each .group (time) p4 % mp_plot_abundance( .abundance=RareAbundance, .group= Group, taxa.class = Class, topn = 20, relative = FALSE, plot.group = TRUE ) png(“relative_abundance_and_abundance_groups.png”, width= 1000, height=1000) p3 / p4 dev.off() # ————————— # 3.5. Beta diversity analysis # ——————————————— # 3.5.1 The distance between samples or groups # standardization # mp_decostand wraps the decostand of vegan, which provides # many standardization methods for community ecology. # default is hellinger, then the abundance processed will # be stored to the assays slot. mpse_abund %% mp_decostand(.abundance=Abundance) mpse_abund #NOTE mpse_abund contains 30 varibles = 29 varibles + hellinger # calculate the distance between the samples. # the distance will be generated a nested tibble and added to the # colData slot. mpse_abund %% mp_cal_dist(.abundance=hellinger, distmethod=”bray”) mpse_abund #NOTE mpse_abund contains 31 varibles = 30 varibles + bray # mp_plot_dist provides there methods to visualize the distance between the samples or groups # when .group is not provided, the dot heatmap plot will be return p1 % mp_plot_dist(.distmethod = bray) png(“distance_between_samples.png”, width= 1000, height=1000) p1 dev.off() # when .group is provided, the dot heatmap plot with group information will be return. p2 % mp_plot_dist(.distmethod = bray, .group = Group) # The scale or theme of dot heatmap plot can be adjusted using set_scale_theme function. p2 %>% set_scale_theme( x = scale_fill_manual( values=c(“#1f78b4”, “#e31a1c”), #c(“orange”, “deepskyblue”), guide = guide_legend( keywidth = 1, keyheight = 0.5, title.theme = element_text(size=8), label.theme = element_text(size=6) ) ), aes_var = Group # specific the name of variable ) %>% set_scale_theme( x = scale_color_gradient( guide = guide_legend(keywidth = 0.5, keyheight = 0.5) ), aes_var = bray ) %>% set_scale_theme( x = scale_size_continuous( range = c(0.1, 3), guide = guide_legend(keywidth = 0.5, keyheight = 0.5) ), aes_var = bray ) png(“distance_between_samples_with_group_info.png”, width= 1000, height=1000) p2 dev.off() # when .group is provided and group.test is TRUE, the comparison of different groups will be returned # Assuming p3 is a ggplot object after mp_plot_dist call p3 % mp_plot_dist(.distmethod = bray, .group = Group, group.test = TRUE, textsize = 6) + theme( axis.title.x = element_text(size = 14), # Customize x-axis label face = “bold” axis.title.y = element_text(size = 14), # Customize y-axis label axis.text.x = element_text(size = 14), # Customize x-axis ticks axis.text.y = element_text(size = 14) # Customize y-axis ticks ) # Save the plot with the new theme settings png(“Comparison_of_Bray_Distances_Group3-4.png”, width = 1000, height = 1000) print(p3) # Ensure that p3 is explicitly printed in the device dev.off() # Extract Bray-Curtis Distance Values and save them in a Excel-table. library(dplyr) library(tidyr) library(openxlsx) # Define the sample numbers vector sample_numbers <- c("sample-C3","sample-C4","sample-C5","sample-C6","sample-C7", "sample-E4","sample-E5","sample-E6","sample-E7","sample-E8") # Consolidate the list of tibbles using the actual sample numbers bray_data <- bind_rows( lapply(seq_along(mpse_abund$bray), function(i) { tibble( Sample1 = sample_numbers[i], # Use actual sample number Sample2 = mpse_abund$bray[[i]]$braySampley, BrayDistance = mpse_abund$bray[[i]]$bray ) }), .id = "PairID" ) # Print the data frame to check the output print(bray_data) # Write the data frame to an Excel file write.xlsx(bray_data, file = "Bray_Curtis_Distances.xlsx") #DELETE the column "PairID" in Excel file # ———————– # 3.5.2 The PCoA analysis #install.packages("corrr") library(corrr) #install.packages("ggside") library(ggside) mpse_abund %% mp_cal_pcoa(.abundance=hellinger, distmethod=”bray”) # The dimensions of ordination analysis will be added the colData slot (default). mpse_abund mpse_abund %>% print(width=380, n=2) #NOTE mpse_abund contains 34 varibles = 31 varibles + `PCo1 (30.16%)` , `PCo2 (15.75%)` , `PCo3 (10.53%)` #BUG why 36 variables in mpse_abund %>% print(width=380, n=1) [RareAbundanceBySample , RareAbundanceByGroup ] #> methods(class=class(mpse_abund)) # [1] [ [[<- [<- # [4] $ $<- arrange # [7] as_tibble as.data.frame as.phyloseq #[10] coerce coerce<- colData<- #[13] distinct filter group_by #[16] left_join mp_adonis mp_aggregate_clade #[19] mp_aggregate mp_anosim mp_balance_clade #[22] mp_cal_abundance mp_cal_alpha mp_cal_cca #[25] mp_cal_clust mp_cal_dca mp_cal_dist #[28] mp_cal_nmds mp_cal_pca mp_cal_pcoa #[31] mp_cal_pd_metric mp_cal_rarecurve mp_cal_rda #[34] mp_cal_upset mp_cal_venn mp_decostand #[37] mp_diff_analysis mp_diff_clade mp_envfit #[40] mp_extract_abundance mp_extract_assays mp_extract_dist #[43] mp_extract_feature mp_extract_internal_attr mp_extract_rarecurve #[46] mp_extract_refseq mp_extract_sample mp_extract_taxonomy #[49] mp_extract_tree mp_filter_taxa mp_mantel #[52] mp_mrpp mp_plot_abundance mp_plot_alpha #[55] mp_plot_diff_boxplot mp_plot_diff_res mp_plot_dist #[58] mp_plot_ord mp_plot_rarecurve mp_plot_upset #[61] mp_plot_venn mp_rrarefy mp_select_as_tip #[64] mp_stat_taxa mutate otutree #[67] otutree<- print pull #[70] refsequence refsequence<- rename #[73] rownames<- select show # [ reached getOption("max.print") — omitted 6 entries ] #see '?methods' for accessing help and source code # We also can perform adonis or anosim to check whether it is significant to the dissimilarities of groups. mpse_abund %% mp_adonis(.abundance=hellinger, .formula=~Group, distmethod=”bray”, permutations=9999, action=”add”) mpse_abund %>% mp_extract_internal_attr(name=adonis) #NOTE mpse_abund contains 34 varibles, no new variable, it has been saved in mpse_abund and can be extracted with “mpse_abund %>% mp_extract_internal_attr(name=’adonis’)” #The result of adonis has been saved to the internal attribute ! #It can be extracted using this-object %>% mp_extract_internal_attr(name=’adonis’) #The object contained internal attribute: PCoA ADONIS #Permutation test for adonis under reduced model #Terms added sequentially (first to last) #Permutation: free #Number of permutations: 9999 # #vegan::adonis2(formula = .formula, data = sampleda, permutations = permutations, method = distmethod) # Df SumOfSqs R2 F Pr(>F) #Group 1 0.23448 0.22659 3.5158 5e-04 *** #Residual 12 0.80032 0.77341 #Total 13 1.03480 1.00000 #— #Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1 # (“1″,”2″,”5″,”6″,”7”, “15”,”16″,”17″,”18″,”19″,”20″, “29”,”30″,”31″,”32″, “40”,”41″,”42″,”43″,”44″,”46″) #div.df2[div.df2 == “Group1”] <- "aged.post" #div.df2[div.df2 == "Group3"] <- "young.post" #div.df2[div.df2 == "Group5"] <- "aged.post" #div.df2[div.df2 == "Group7"] <- "young.post" # ("8","9","10","12","13","14", "21","22","23","24","25","26","27","28", "33","34","35","36","37","38","39","51", "47","48","49","50","52","53","55") #div.df2[div.df2 == "Group2"] <- "aged.pre" #div.df2[div.df2 == "Group4"] <- "young.pre" #div.df2[div.df2 == "Group6"] <- "aged.pre" #div.df2[div.df2 == "Group8"] <- "young.pre" #Group1: f.aged and post #Group2: f.aged and pre #Group3: f.young and post #Group4: f.young and pre #Group5: m.aged and post #Group6: m.aged and pre #Group7: m.young and post #Group8: m.young and pre #[,c("1","2","5","6","7", "8","9","10","12","13","14")] #[,c("15","16","17","18","19","20", "21","22","23","24","25","26","27","28")] #[,c("29","30","31","32", "33","34","35","36","37","38","39","51")] #[,c("40","41","42","43","44","46", "47","48","49","50","52","53","55")] #For the first set: #a6cee3: This is a light blue color, somewhat pastel and soft. #b2df8a: A soft, pale green, similar to a light lime. #fb9a99: A soft pink, slightly peachy or salmon-like. #cab2d6: A pale purple, reminiscent of lavender or a light mauve. #For the second set: #1f78b4: This is a strong, vivid blue, close to cobalt or a medium-dark blue. #33a02c: A medium forest green, vibrant and leafy. #e31a1c: A bright red, very vivid, similar to fire engine red. #6a3d9a: This would be described as a deep purple, akin to a dark lavender or plum. p1 % mp_plot_ord( .ord = pcoa, .group = Group, .color = Group, .size = 4, # increase point size! .alpha = 1, ellipse = TRUE, show.legend = FALSE ) + scale_fill_manual( values = c(“#1f78b4”, “#e31a1c”), guide = guide_legend( keywidth = 1.6, keyheight = 1.6, label.theme = element_text(size = 16) ) ) + scale_color_manual( values = c(“#1f78b4”, “#e31a1c”), guide = guide_legend( keywidth = 1.6, keyheight = 1.6, label.theme = element_text(size = 16) ) ) + theme( axis.text = element_text(size = 20), axis.title = element_text(size = 22), legend.text = element_text(size = 20), legend.title = element_text(size = 22), plot.title = element_text(size = 24, face = “bold”), plot.subtitle = element_text(size = 20) ) png(“PCoA_Group3-4.png”, width = 1200, height = 1000) p1 dev.off() pdf(“PCoA_Group3-4.pdf”) p1 dev.off() p2 % mp_plot_ord( .ord = pcoa, .group = Group, .color = Group, .size = Shannon, .alpha = Observe, ellipse = TRUE, show.legend = FALSE ) + scale_fill_manual( values = c(“#1f78b4”, “#e31a1c”), guide = guide_legend( keywidth = 0.6, keyheight = 0.6, label.theme = element_text(size = 16) ) ) + scale_color_manual( values = c(“#1f78b4”, “#e31a1c”), guide = guide_legend( keywidth = 0.6, keyheight = 0.6, label.theme = element_text(size = 16) ) ) + scale_size_continuous( range = c(2, 6), # increase size range! guide = guide_legend( keywidth = 0.6, keyheight = 0.6, label.theme = element_text(size = 16) ) ) + theme( axis.text = element_text(size = 20), axis.title = element_text(size = 22), legend.text = element_text(size = 20), legend.title = element_text(size = 22), plot.title = element_text(size = 24, face = “bold”), plot.subtitle = element_text(size = 20) ) png(“PCoA2_Group3-4.png”, width = 1200, height = 1000) p2 dev.off() pdf(“PCoA2_Group3-4.pdf”) p2 dev.off() # Extract sample names and add ShortLabel to colData colData(mpse_abund)$ShortLabel <- gsub("sample-", "", mpse_abund@colData@rownames) p3 % mp_plot_ord( .ord = pcoa, .group = Group, .color = Group, .size = Shannon, .alpha = Observe, ellipse = TRUE, show.legend = FALSE ) + geom_text_repel(aes(label = ShortLabel), size = 5, max.overlaps = 100) + scale_fill_manual( values = c(“#1f78b4”, “#e31a1c”), guide = guide_legend( keywidth = 0.6, keyheight = 0.6, label.theme = element_text(size = 16) ) ) + scale_color_manual( values = c(“#1f78b4”, “#e31a1c”), guide = guide_legend( keywidth = 0.6, keyheight = 0.6, label.theme = element_text(size = 16) ) ) + scale_size_continuous( range = c(2, 6), # increase size range! guide = guide_legend( keywidth = 0.6, keyheight = 0.6, label.theme = element_text(size = 16) ) ) + theme( axis.text = element_text(size = 20), axis.title = element_text(size = 22), legend.text = element_text(size = 20), legend.title = element_text(size = 22), plot.title = element_text(size = 24, face = “bold”), plot.subtitle = element_text(size = 20) ) png(“PCoA3_Group3-4.png”, width = 1200, height = 1000) p3 dev.off() pdf(“PCoA3_Group3-4.pdf”) p3 dev.off()

Microbial bioinformatics

Microbial bioinformatics uses computational tools to analyze genomes, track evolution, and study functions in microorganisms, including bacteria and viruses.

MicrobiotaProcess_PCA_Group3-4.R processing Data_Karoline_16S_2025

Leave a Reply Cancel reply