## =========================================================
## MicrobiotaProcess_UPDATED.R
## Uses your objects:
## - ps_filt : for alpha + beta diversity (NOT taxa-filtered)
## - ps_abund_rel : for cleaner composition plots (taxa filtered for plotting)
##
## Output:
## - Rarefaction curves + alpha diversity plots
## - Bray (Hellinger) distance + PCoA + PERMANOVA
## - Composition plots (Class) + heatmaps (from ps_abund_rel)
## =========================================================
## -----------------------------
## 0) Define groups (sample IDs) ---- EXACTLY as provided
Group1<-c("U24080201","U25020701","O23092004","U24101801","U25022101","O23102703","A24062801","O23112205","U23071901","A23112002","U24111801","O23110101","U24121801","O23120101","O24011202","O23090803","A23060602","A24030402","U25011701","O24011901","U23090801","O24011201","O24011003","O23092202","O23082301","O23091403","O23112901","O23092201","O24013103","O24021403","O24010402","O23092005","O23092203","O24010302","O23090701","O23091501","O23092701","O24022202","O23092802","O23090601","O23100401","O24022102","O23081801","O23092006","O23100503","O23090602","O24013104","O24020803","O24010301","O24010404","O23090802","O23092801","O24022801","O23100706","O23102602","O24021601","O24012401","O24021603","O24022901","O24021501","O23110902","O23102601","O23102704","O23100803","O23102701","O24021401","O24022101","O24030603","O23110901","O23110903","O23110301","O24022301","O23102502","O23111501","O23111602","O24020705","O24021502","O24022201","O23110202","O23090801")
Group2<-c("U23071701","U23052401","U23052201","U24070401","O24011801","O23092003","A24071901","A24072901","O24011102","O23121501","O23092104","O23092001","O23121301","O24020701","O23112201","O23100701","O23100801","O24020903","O24020901","O24020703","O23112204")
Group3<-c("O23100802","O24011205","O23092002","O24011207","O23092103","O23102501","O24011005","A24030401","O24011004","A23051102","U25011702","O24011204","O23121502","O23120702","O24011206","O24021404","O23092101","O24010403","O23112303","O23083001","O23082302","O24010401","O24022302","O24010501","O23112902","O23082303","O23083102","O24013101","O23100402","O24020801","O23120701","O23121304","O24021602","O24011802","O23121306","O23120103","O24020905","O24012403","O24013102","O24021503","O24020904","O23102504","O24013105","O24030601","O23100705","O24030604","O23111601","O24020103","O24030602","O23110302","O23102603","O24031304","O24021402","O24020101","O24012501","O24020804","O23100804","O23102503","O24022902","O24020704","O23110904","O24020102","O24012402","O23102702","O23102604","O23110204","O23110203","O23083101","O23092702")
Group4<-c("O23112304","A23051103","A24071701","A23080101","A24031201","A24080201","O24011105","O23091305","O23121302","O23092803")
Group5<-c("O23091303","O23112301","O24011203","A23112001","O24011001","O24011002","O23091302","O24020902","O23092102","O23091401","O23121503","O23091402","O24020702","O23091301","O23112206","O24011103","O23121305","O24011101","O23121303","O24011104","O23120104","O23100704","O23112302","O23112203","O23100703","O24020805","O24020802","O23112202","O24031302","O23111502","O23100702","O24031301","O24031305","O23082402")
NTC<-c("NTC_2","NTC_3","NTC_16","NTC_4","NTC_6","NTC_7","NTC_8","NTC_9","NTC_11","NTC_12","NTC_13","NTC_14","NTC_15","NTC_10")
PC<-c("PC_1","PC_2","PC_8","PC_4","PC_3","PC_5","PC_6","PC_7","UR009768","UR009909","PC01")
## Ensure unique IDs within each vector
Group1 <- unique(Group1); Group2 <- unique(Group2); Group3 <- unique(Group3)
Group4 <- unique(Group4); Group5 <- unique(Group5); NTC <- unique(NTC); PC <- unique(PC)
## The group order you want everywhere (plots + stats)
group_levels <- c("Group1","Group2","Group3","Group4","Group5","NTC","PC")
## -----------------------------
## 0.1) Sample subset (union of all groups)
keep_samples <- unique(c(Group1, Group2, Group3, Group4, Group5, NTC, PC))
## -----------------------------
## 0.2) Helper: assign Group as an ordered factor (membership-based)
add_group <- function(mpse_obj) {
sn <- rownames(colData(mpse_obj))
grp <- rep(NA_character_, length(sn))
grp[sn %in% Group1] <- "Group1"
grp[sn %in% Group2] <- "Group2"
grp[sn %in% Group3] <- "Group3"
grp[sn %in% Group4] <- "Group4"
grp[sn %in% Group5] <- "Group5"
grp[sn %in% NTC] <- "NTC"
grp[sn %in% PC] <- "PC"
colData(mpse_obj)$Group <- factor(grp, levels = group_levels)
# warn if any samples weren't assigned (helps catch typos / missing IDs)
if (any(is.na(colData(mpse_obj)$Group))) {
warning(
"Unassigned samples (not found in any group list): ",
paste(sn[is.na(colData(mpse_obj)$Group)], collapse = ", ")
)
}
mpse_obj
}
## -----------------------------
## 0.3) Colors (edit to taste)
group_colors <- c(
"Group1" = "#1f77b4",
"Group2" = "#ff7f0e",
"Group3" = "#2ca02c",
"Group4" = "#d62728",
"Group5" = "#9467bd",
"NTC" = "#7f7f7f",
"PC" = "#8c564b"
)
## =========================================================
## 1) Diversity analysis object (alpha + beta)
## IMPORTANT: start from ps_filt (all taxa retained)
## =========================================================
ps_div <- prune_samples(keep_samples, ps_filt)
ps_div <- prune_taxa(taxa_sums(ps_div) > 0, ps_div)
mpse_div <- as.MPSE(ps_div)
mpse_div <- add_group(mpse_div)
cat("\n[mpse_div] Group counts:\n")
print(table(colData(mpse_div)$Group, useNA = "ifany"))
## =========================================================
## 2) Alpha diversity (rarefaction-based)
## =========================================================
set.seed(9242)
mpse_div %<>% mp_rrarefy() # creates RareAbundance
mpse_div %<>% mp_cal_rarecurve(.abundance = RareAbundance, chunks = 400)
# Rarefaction curves: sample + grouped
p_rare_1 <- mpse_div %>% mp_plot_rarecurve(.rare = RareAbundanceRarecurve, .alpha = Observe)
p_rare_2 <- mpse_div %>%
mp_plot_rarecurve(.rare = RareAbundanceRarecurve, .alpha = Observe, .group = Group) +
scale_color_manual(values = group_colors, guide = "none") +
scale_fill_manual(values = group_colors, guide = "none")
p_rare_3 <- mpse_div %>%
mp_plot_rarecurve(.rare = RareAbundanceRarecurve, .alpha = "Observe",
.group = Group, plot.group = TRUE) +
scale_color_manual(values = group_colors, guide = "none") +
scale_fill_manual(values = group_colors, guide = "none")
png("figures/rarefaction_of_samples_or_groups.png", width = 1080, height = 600)
print(p_rare_1 + p_rare_2 + p_rare_3)
dev.off()
# Alpha indices from RareAbundance
mpse_div %<>% mp_cal_alpha(.abundance = RareAbundance)
f_alpha_1 <- mpse_div %>%
mp_plot_alpha(.group = Group, .alpha = c(Observe, Chao1, ACE, Shannon, Simpson, Pielou)) +
scale_color_manual(values = group_colors, guide = "none") +
scale_fill_manual(values = group_colors, guide = "none")
f_alpha_2 <- mpse_div %>%
mp_plot_alpha(.alpha = c(Observe, Chao1, ACE, Shannon, Simpson, Pielou))
png("figures/alpha_diversity_comparison.png", width = 1400, height = 600)
print(f_alpha_1 / f_alpha_2)
dev.off()
## =========================================================
## 3) Beta diversity (Bray–Curtis on Hellinger)
## IMPORTANT: use non-rarefied Abundance (not taxa-filtered)
## =========================================================
mpse_div %<>% mp_decostand(.abundance = Abundance) # creates 'hellinger'
mpse_div %<>% mp_cal_dist(.abundance = hellinger, distmethod = "bray")
# Distance between samples
p_dist_1 <- mpse_div %>% mp_plot_dist(.distmethod = bray)
png("figures/distance_between_samples.png", width = 1000, height = 1000)
print(p_dist_1)
dev.off()
# Distance with group info
p_dist_2 <- mpse_div %>% mp_plot_dist(.distmethod = bray, .group = Group) +
scale_fill_manual(values = group_colors) +
scale_color_gradient()
png("figures/distance_between_samples_with_group_info.png", width = 1000, height = 1000)
print(p_dist_2)
dev.off()
# Compare distances between groups (Bray)
p_dist_3 <- mpse_div %>%
mp_plot_dist(.distmethod = bray, .group = Group, group.test = TRUE, textsize = 6) +
theme(
axis.title.x = element_text(size = 14),
axis.title.y = element_text(size = 14),
axis.text.x = element_text(size = 14),
axis.text.y = element_text(size = 14)
)
png("figures/Comparison_of_Bray_Distances.png", width = 1000, height = 1000)
print(p_dist_3)
dev.off()
## PCoA + PERMANOVA (adonis2)
mpse_div %<>% mp_cal_pcoa(.abundance = hellinger, distmethod = "bray")
mpse_div %<>% mp_adonis(
.abundance = hellinger,
.formula = ~ Group,
distmethod = "bray",
permutations = 9999,
action = "add"
)
cat("\n[PERMANOVA result]\n")
print(mpse_div %>% mp_extract_internal_attr(name = "adonis"))
## PCoA plot
p_pcoa_1 <- mpse_div %>%
mp_plot_ord(
.ord = pcoa,
.group = Group,
.color = Group,
.size = 4,
.alpha = 1,
ellipse = TRUE,
show.legend = TRUE
) +
scale_fill_manual(values = group_colors) +
scale_color_manual(values = group_colors) +
theme(
axis.text = element_text(size = 16),
axis.title = element_text(size = 18),
legend.text = element_text(size = 14),
legend.title= element_text(size = 16)
)
png("figures/PCoA.png", width = 1200, height = 1000)
print(p_pcoa_1)
dev.off()
pdf("figures/PCoA.pdf", width = 10, height = 8)
print(p_pcoa_1)
dev.off()
## Optional: label points
colData(mpse_div)$ShortLabel <- gsub("sample-", "", rownames(colData(mpse_div)))
p_pcoa_2 <- p_pcoa_1 +
geom_text_repel(aes(label = ShortLabel), size = 4, max.overlaps = 100)
png("figures/PCoA_labeled.png", width = 1200, height = 1000)
print(p_pcoa_2)
dev.off()
#[PERMANOVA result]
#The object contained internal attribute: PCoA ADONIS
#Permutation test for adonis under reduced model
#Permutation: free
#Number of permutations: 9999
#
#vegan::adonis2(formula = .formula, data = sampleda, permutations = permutations, method = distmethod)
# Df SumOfSqs R2 F Pr(>F)
#Model 6 11.446 0.16058 7.3971 1e-04 ***
#Residual 232 59.829 0.83942
#Total 238 71.274 1.00000
#---
#Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
## =========================================================
## 4) Composition plots object (clean taxa set for plotting)
## IMPORTANT: start from ps_abund_rel (plotting-filtered taxa)
## =========================================================
ps_plot <- prune_samples(keep_samples, ps_abund_rel)
ps_plot <- prune_taxa(taxa_sums(ps_plot) > 0, ps_plot)
mpse_plot <- as.MPSE(ps_plot)
mpse_plot <- add_group(mpse_plot)
## Summaries for plotting (Class)
mpse_plot %<>%
mp_cal_abundance(.abundance = Abundance) %>% # per sample
mp_cal_abundance(.abundance = Abundance, .group = Group) # per group
## Class abundance barplots (top 20)
p_class_rel <- mpse_plot %>%
mp_plot_abundance(
.abundance = Abundance,
taxa.class = Class,
topn = 20,
relative = TRUE
)
p_class_abs <- mpse_plot %>%
mp_plot_abundance(
.abundance = Abundance,
taxa.class = Class,
topn = 20,
relative = FALSE
)
png("figures/relative_abundance_and_abundance_samples.png", width = 1200, height = 600)
print(p_class_rel / p_class_abs)
dev.off()
## Heatmaps (grouped)
h_rel <- mpse_plot %>%
mp_plot_abundance(
.abundance = Abundance,
.group = Group,
taxa.class = Class,
relative = TRUE,
topn = 20,
geom = "heatmap",
features.dist = "euclidean",
features.hclust = "average",
sample.dist = "bray",
sample.hclust = "average"
)
h_abs <- mpse_plot %>%
mp_plot_abundance(
.abundance = Abundance,
.group = Group,
taxa.class = Class,
relative = FALSE,
topn = 20,
geom = "heatmap",
features.dist = "euclidean",
features.hclust = "average",
sample.dist = "bray",
sample.hclust = "average"
)
png("figures/relative_abundance_and_abundance_heatmap.png", width = 1200, height = 600)
print(aplot::plot_list(gglist = list(h_rel, h_abs), tag_levels = "A"))
dev.off()
## Group-level barplots
p_group_rel <- mpse_plot %>%
mp_plot_abundance(
.abundance = Abundance,
.group = Group,
taxa.class = Class,
topn = 20,
plot.group = TRUE,
relative = TRUE
) +
scale_fill_manual(values = group_colors)
p_group_abs <- mpse_plot %>%
mp_plot_abundance(
.abundance = Abundance,
.group = Group,
taxa.class = Class,
topn = 20,
plot.group = TRUE,
relative = FALSE
) +
scale_fill_manual(values = group_colors)
png("figures/relative_abundance_and_abundance_groups.png", width = 1000, height = 1000)
print(p_group_rel / p_group_abs)
dev.off()
cat("\nDONE. Outputs written to ./figures/\n")
## =========================================================
## CONTINUE: Export Bray distances + pairwise PERMANOVA
## (Use mpse_div from the updated script above)
## =========================================================
suppressPackageStartupMessages({
library(dplyr)
library(tidyr)
library(openxlsx)
library(vegan)
})
## -----------------------------
## Helper: get assay matrix with rows = samples, cols = features
get_sample_by_feature <- function(mpse_obj, assay_name) {
mat <- assay(mpse_obj, assay_name)
# sample IDs in MPSE
samp_ids <- rownames(colData(mpse_obj))
# If samples are columns, transpose
if (!is.null(colnames(mat)) && all(samp_ids %in% colnames(mat))) {
mat <- t(mat)
}
# Now enforce row order to match colData
mat <- mat[samp_ids, , drop = FALSE]
mat
}
## -----------------------------
## 1) Recompute Bray–Curtis distance (robust extraction)
hell_mat_sf <- get_sample_by_feature(mpse_div, "hellinger") # rows=samples, cols=features
bray_dist <- vegdist(hell_mat_sf, method = "bray")
## sanity checks
stopifnot(!any(is.na(as.matrix(bray_dist))))
stopifnot(!any(as.matrix(bray_dist) < 0, na.rm = TRUE))
## -----------------------------
## 2) Export all pairwise Bray distances to Excel
bray_mat <- as.matrix(bray_dist)
samples <- rownames(bray_mat)
bray_df <- as.data.frame(as.table(bray_mat)) %>%
rename(Sample1 = Var1, Sample2 = Var2, BrayDistance = Freq) %>%
filter(Sample1 < Sample2) %>%
arrange(Sample1, Sample2)
write.xlsx(bray_df, file = "figures/Bray_Curtis_Distances.xlsx")
## -----------------------------
## 3) Pairwise PERMANOVA (post-hoc) using vegan::adonis2
meta2 <- as.data.frame(colData(mpse_div))
meta2 <- meta2[rownames(hell_mat_sf), , drop = FALSE]
meta2$Group <- droplevels(meta2$Group)
groups <- levels(meta2$Group)
res_list <- list()
k <- 1
for (i in 1:(length(groups) - 1)) {
for (j in (i + 1):length(groups)) {
g1 <- groups[i]
g2 <- groups[j]
idx <- meta2$Group %in% c(g1, g2)
sub_meta <- meta2[idx, , drop = FALSE]
sub_dist <- as.dist(as.matrix(bray_dist)[idx, idx])
ad <- adonis2(sub_dist ~ Group, data = sub_meta, permutations = 9999)
res_list[[k]] <- data.frame(
group1 = g1,
group2 = g2,
F = ad$F[1],
R2 = ad$R2[1],
p = ad$`Pr(>F)`[1]
)
k <- k + 1
}
}
pair_res <- do.call(rbind, res_list)
pair_res$p_adj_BH <- p.adjust(pair_res$p, method = "BH")
pair_res$p_adj_Bonferroni <- p.adjust(pair_res$p, method = "bonferroni")
write.csv(pair_res, "figures/Bray_pairwise_PERMANOVA.csv", row.names = FALSE)
cat("\nPairwise PERMANOVA written to: figures/Bray_pairwise_PERMANOVA.csv\n")
cat("Bray distance table written to: figures/Bray_Curtis_Distances.xlsx\n")
## =========================================================
## OPTIONAL: PCoA plot where point size = Shannon and alpha = Observe
## (requires mpse_div already has Shannon/Observe from mp_cal_alpha)
## =========================================================
p_pcoa_sizealpha <- mpse_div %>%
mp_plot_ord(
.ord = pcoa,
.group = Group,
.color = Group,
.size = Shannon,
.alpha = Observe,
ellipse = TRUE,
show.legend = TRUE
) +
scale_fill_manual(values = group_colors) +
scale_color_manual(values = group_colors) +
scale_size_continuous(range = c(2, 6)) +
theme(
axis.text = element_text(size = 16),
axis.title = element_text(size = 18),
legend.text = element_text(size = 14),
legend.title= element_text(size = 16)
)
png("figures/PCoA_sizealpha.png", width = 1200, height = 1000)
print(p_pcoa_sizealpha)
dev.off()
pdf("figures/PCoA_sizealpha.pdf", width = 10, height = 8)
print(p_pcoa_sizealpha)
dev.off()
## =========================================================
## Ensure all three ordination outputs exist:
## - PCoA : basic (color/group)
## - PCoA2 : size = Shannon, alpha = Observe
## - PCoA3 : same as PCoA2 + sample labels
##
## Assumes you already have:
## - mpse_div with: pcoa, Group, Shannon, Observe
## - group_colors defined
## =========================================================
p1 <- mpse_div %>%
mp_plot_ord(
.ord = pcoa,
.group = Group,
.color = Group,
.size = 4,
.alpha = 1,
ellipse = TRUE,
show.legend = FALSE
) +
scale_fill_manual(
values = group_colors,
guide = guide_legend(
keywidth = 1.6,
keyheight = 1.6,
label.theme = element_text(size = 16)
)
) +
scale_color_manual(
values = group_colors,
guide = guide_legend(
keywidth = 1.6,
keyheight = 1.6,
label.theme = element_text(size = 16)
)
) +
theme(
axis.text = element_text(size = 20),
axis.title = element_text(size = 22),
legend.text = element_text(size = 20),
legend.title= element_text(size = 22),
plot.title = element_text(size = 24, face = "bold"),
plot.subtitle = element_text(size = 20)
)
png("PCoA.png", width = 1200, height = 1000)
p1
dev.off()
pdf("PCoA.pdf")
p1
dev.off()
p2 <- mpse_div %>%
mp_plot_ord(
.ord = pcoa,
.group = Group,
.color = Group,
.size = Shannon,
.alpha = Observe,
ellipse = TRUE,
show.legend = FALSE
) +
scale_fill_manual(
values = group_colors,
guide = guide_legend(
keywidth = 0.6,
keyheight = 0.6,
label.theme = element_text(size = 16)
)
) +
scale_color_manual(
values = group_colors,
guide = guide_legend(
keywidth = 0.6,
keyheight = 0.6,
label.theme = element_text(size = 16)
)
) +
scale_size_continuous(
range = c(2, 6),
guide = guide_legend(
keywidth = 0.6,
keyheight = 0.6,
label.theme = element_text(size = 16)
)
) +
theme(
axis.text = element_text(size = 20),
axis.title = element_text(size = 22),
legend.text = element_text(size = 20),
legend.title= element_text(size = 22),
plot.title = element_text(size = 24, face = "bold"),
plot.subtitle = element_text(size = 20)
)
png("PCoA2.png", width = 1200, height = 1000)
p2
dev.off()
pdf("PCoA2.pdf")
p2
dev.off()
library(ggrepel)
colData(mpse_div)$ShortLabel <- gsub("sample-", "", mpse_div@colData@rownames)
p3 <- mpse_div %>%
mp_plot_ord(
.ord = pcoa,
.group = Group,
.color = Group,
.size = Shannon,
.alpha = Observe,
ellipse = TRUE,
show.legend = FALSE
) +
geom_text_repel(aes(label = ShortLabel), size = 5, max.overlaps = 100) +
scale_fill_manual(
values = group_colors,
guide = guide_legend(
keywidth = 0.6,
keyheight = 0.6,
label.theme = element_text(size = 16)
)
) +
scale_color_manual(
values = group_colors,
guide = guide_legend(
keywidth = 0.6,
keyheight = 0.6,
label.theme = element_text(size = 16)
)
) +
scale_size_continuous(
range = c(2, 6),
guide = guide_legend(
keywidth = 0.6,
keyheight = 0.6,
label.theme = element_text(size = 16)
)
) +
theme(
axis.text = element_text(size = 20),
axis.title = element_text(size = 22),
legend.text = element_text(size = 20),
legend.title= element_text(size = 22),
plot.title = element_text(size = 24, face = "bold"),
plot.subtitle = element_text(size = 20)
)
png("PCoA3.png", width = 1200, height = 1000)
p3
dev.off()
svg("PCoA3.svg", width = 12, height = 10)
p3
dev.off()
pdf("PCoA3.pdf")
p3
dev.off() MicrobiotaProcess.R for Data_Childrensclinic_16S_2025
Leave a reply