MicrobiotaProcess.R for Data_Childrensclinic_16S_2025

## =========================================================
## MicrobiotaProcess_UPDATED.R
## Uses your objects:
##   - ps_filt        : for alpha + beta diversity (NOT taxa-filtered)
##   - ps_abund_rel   : for cleaner composition plots (taxa filtered for plotting)
##
## Output:
##   - Rarefaction curves + alpha diversity plots
##   - Bray (Hellinger) distance + PCoA + PERMANOVA
##   - Composition plots (Class) + heatmaps (from ps_abund_rel)
## =========================================================

## -----------------------------
## 0) Define groups (sample IDs)  ---- EXACTLY as provided
Group1<-c("U24080201","U25020701","O23092004","U24101801","U25022101","O23102703","A24062801","O23112205","U23071901","A23112002","U24111801","O23110101","U24121801","O23120101","O24011202","O23090803","A23060602","A24030402","U25011701","O24011901","U23090801","O24011201","O24011003","O23092202","O23082301","O23091403","O23112901","O23092201","O24013103","O24021403","O24010402","O23092005","O23092203","O24010302","O23090701","O23091501","O23092701","O24022202","O23092802","O23090601","O23100401","O24022102","O23081801","O23092006","O23100503","O23090602","O24013104","O24020803","O24010301","O24010404","O23090802","O23092801","O24022801","O23100706","O23102602","O24021601","O24012401","O24021603","O24022901","O24021501","O23110902","O23102601","O23102704","O23100803","O23102701","O24021401","O24022101","O24030603","O23110901","O23110903","O23110301","O24022301","O23102502","O23111501","O23111602","O24020705","O24021502","O24022201","O23110202","O23090801")

Group2<-c("U23071701","U23052401","U23052201","U24070401","O24011801","O23092003","A24071901","A24072901","O24011102","O23121501","O23092104","O23092001","O23121301","O24020701","O23112201","O23100701","O23100801","O24020903","O24020901","O24020703","O23112204")

Group3<-c("O23100802","O24011205","O23092002","O24011207","O23092103","O23102501","O24011005","A24030401","O24011004","A23051102","U25011702","O24011204","O23121502","O23120702","O24011206","O24021404","O23092101","O24010403","O23112303","O23083001","O23082302","O24010401","O24022302","O24010501","O23112902","O23082303","O23083102","O24013101","O23100402","O24020801","O23120701","O23121304","O24021602","O24011802","O23121306","O23120103","O24020905","O24012403","O24013102","O24021503","O24020904","O23102504","O24013105","O24030601","O23100705","O24030604","O23111601","O24020103","O24030602","O23110302","O23102603","O24031304","O24021402","O24020101","O24012501","O24020804","O23100804","O23102503","O24022902","O24020704","O23110904","O24020102","O24012402","O23102702","O23102604","O23110204","O23110203","O23083101","O23092702")

Group4<-c("O23112304","A23051103","A24071701","A23080101","A24031201","A24080201","O24011105","O23091305","O23121302","O23092803")

Group5<-c("O23091303","O23112301","O24011203","A23112001","O24011001","O24011002","O23091302","O24020902","O23092102","O23091401","O23121503","O23091402","O24020702","O23091301","O23112206","O24011103","O23121305","O24011101","O23121303","O24011104","O23120104","O23100704","O23112302","O23112203","O23100703","O24020805","O24020802","O23112202","O24031302","O23111502","O23100702","O24031301","O24031305","O23082402")

NTC<-c("NTC_2","NTC_3","NTC_16","NTC_4","NTC_6","NTC_7","NTC_8","NTC_9","NTC_11","NTC_12","NTC_13","NTC_14","NTC_15","NTC_10")

PC<-c("PC_1","PC_2","PC_8","PC_4","PC_3","PC_5","PC_6","PC_7","UR009768","UR009909","PC01")

## Ensure unique IDs within each vector
Group1 <- unique(Group1); Group2 <- unique(Group2); Group3 <- unique(Group3)
Group4 <- unique(Group4); Group5 <- unique(Group5); NTC <- unique(NTC); PC <- unique(PC)

## The group order you want everywhere (plots + stats)
group_levels <- c("Group1","Group2","Group3","Group4","Group5","NTC","PC")

## -----------------------------
## 0.1) Sample subset (union of all groups)
keep_samples <- unique(c(Group1, Group2, Group3, Group4, Group5, NTC, PC))

## -----------------------------
## 0.2) Helper: assign Group as an ordered factor (membership-based)
add_group <- function(mpse_obj) {
    sn <- rownames(colData(mpse_obj))
    grp <- rep(NA_character_, length(sn))

    grp[sn %in% Group1] <- "Group1"
    grp[sn %in% Group2] <- "Group2"
    grp[sn %in% Group3] <- "Group3"
    grp[sn %in% Group4] <- "Group4"
    grp[sn %in% Group5] <- "Group5"
    grp[sn %in% NTC]    <- "NTC"
    grp[sn %in% PC]     <- "PC"

    colData(mpse_obj)$Group <- factor(grp, levels = group_levels)

    # warn if any samples weren't assigned (helps catch typos / missing IDs)
    if (any(is.na(colData(mpse_obj)$Group))) {
        warning(
            "Unassigned samples (not found in any group list): ",
            paste(sn[is.na(colData(mpse_obj)$Group)], collapse = ", ")
        )
    }
    mpse_obj
}

## -----------------------------
## 0.3) Colors (edit to taste)
group_colors <- c(
    "Group1" = "#1f77b4",
    "Group2" = "#ff7f0e",
    "Group3" = "#2ca02c",
    "Group4" = "#d62728",
    "Group5" = "#9467bd",
    "NTC"    = "#7f7f7f",
    "PC"     = "#8c564b"
)

## =========================================================
## 1) Diversity analysis object (alpha + beta)
##    IMPORTANT: start from ps_filt (all taxa retained)
## =========================================================
ps_div <- prune_samples(keep_samples, ps_filt)
ps_div <- prune_taxa(taxa_sums(ps_div) > 0, ps_div)

mpse_div <- as.MPSE(ps_div)
mpse_div <- add_group(mpse_div)

cat("\n[mpse_div] Group counts:\n")
print(table(colData(mpse_div)$Group, useNA = "ifany"))

## =========================================================
## 2) Alpha diversity (rarefaction-based)
## =========================================================
set.seed(9242)
mpse_div %<>% mp_rrarefy()  # creates RareAbundance
mpse_div %<>% mp_cal_rarecurve(.abundance = RareAbundance, chunks = 400)

# Rarefaction curves: sample + grouped
p_rare_1 <- mpse_div %>% mp_plot_rarecurve(.rare = RareAbundanceRarecurve, .alpha = Observe)

p_rare_2 <- mpse_div %>%
    mp_plot_rarecurve(.rare = RareAbundanceRarecurve, .alpha = Observe, .group = Group) +
    scale_color_manual(values = group_colors, guide = "none") +
    scale_fill_manual(values = group_colors, guide = "none")

p_rare_3 <- mpse_div %>%
    mp_plot_rarecurve(.rare = RareAbundanceRarecurve, .alpha = "Observe",
                                        .group = Group, plot.group = TRUE) +
    scale_color_manual(values = group_colors, guide = "none") +
    scale_fill_manual(values = group_colors, guide = "none")

png("figures/rarefaction_of_samples_or_groups.png", width = 1080, height = 600)
print(p_rare_1 + p_rare_2 + p_rare_3)
dev.off()

# Alpha indices from RareAbundance
mpse_div %<>% mp_cal_alpha(.abundance = RareAbundance)

f_alpha_1 <- mpse_div %>%
    mp_plot_alpha(.group = Group, .alpha = c(Observe, Chao1, ACE, Shannon, Simpson, Pielou)) +
    scale_color_manual(values = group_colors, guide = "none") +
    scale_fill_manual(values = group_colors, guide = "none")

f_alpha_2 <- mpse_div %>%
    mp_plot_alpha(.alpha = c(Observe, Chao1, ACE, Shannon, Simpson, Pielou))

png("figures/alpha_diversity_comparison.png", width = 1400, height = 600)
print(f_alpha_1 / f_alpha_2)
dev.off()

## =========================================================
## 3) Beta diversity (Bray–Curtis on Hellinger)
##    IMPORTANT: use non-rarefied Abundance (not taxa-filtered)
## =========================================================
mpse_div %<>% mp_decostand(.abundance = Abundance)             # creates 'hellinger'
mpse_div %<>% mp_cal_dist(.abundance = hellinger, distmethod = "bray")

# Distance between samples
p_dist_1 <- mpse_div %>% mp_plot_dist(.distmethod = bray)
png("figures/distance_between_samples.png", width = 1000, height = 1000)
print(p_dist_1)
dev.off()

# Distance with group info
p_dist_2 <- mpse_div %>% mp_plot_dist(.distmethod = bray, .group = Group) +
    scale_fill_manual(values = group_colors) +
    scale_color_gradient()

png("figures/distance_between_samples_with_group_info.png", width = 1000, height = 1000)
print(p_dist_2)
dev.off()

# Compare distances between groups (Bray)
p_dist_3 <- mpse_div %>%
    mp_plot_dist(.distmethod = bray, .group = Group, group.test = TRUE, textsize = 6) +
    theme(
        axis.title.x = element_text(size = 14),
        axis.title.y = element_text(size = 14),
        axis.text.x  = element_text(size = 14),
        axis.text.y  = element_text(size = 14)
    )

png("figures/Comparison_of_Bray_Distances.png", width = 1000, height = 1000)
print(p_dist_3)
dev.off()

## PCoA + PERMANOVA (adonis2)
mpse_div %<>% mp_cal_pcoa(.abundance = hellinger, distmethod = "bray")

mpse_div %<>% mp_adonis(
    .abundance   = hellinger,
    .formula     = ~ Group,
    distmethod   = "bray",
    permutations = 9999,
    action       = "add"
)

cat("\n[PERMANOVA result]\n")
print(mpse_div %>% mp_extract_internal_attr(name = "adonis"))

## PCoA plot
p_pcoa_1 <- mpse_div %>%
    mp_plot_ord(
        .ord   = pcoa,
        .group = Group,
        .color = Group,
        .size  = 4,
        .alpha = 1,
        ellipse = TRUE,
        show.legend = TRUE
    ) +
    scale_fill_manual(values = group_colors) +
    scale_color_manual(values = group_colors) +
    theme(
        axis.text   = element_text(size = 16),
        axis.title  = element_text(size = 18),
        legend.text = element_text(size = 14),
        legend.title= element_text(size = 16)
    )

png("figures/PCoA.png", width = 1200, height = 1000)
print(p_pcoa_1)
dev.off()

pdf("figures/PCoA.pdf", width = 10, height = 8)
print(p_pcoa_1)
dev.off()

## Optional: label points
colData(mpse_div)$ShortLabel <- gsub("sample-", "", rownames(colData(mpse_div)))

p_pcoa_2 <- p_pcoa_1 +
    geom_text_repel(aes(label = ShortLabel), size = 4, max.overlaps = 100)

png("figures/PCoA_labeled.png", width = 1200, height = 1000)
print(p_pcoa_2)
dev.off()
#[PERMANOVA result]
#The object contained internal attribute: PCoA ADONIS
#Permutation test for adonis under reduced model
#Permutation: free
#Number of permutations: 9999
#
#vegan::adonis2(formula = .formula, data = sampleda, permutations = permutations, method = distmethod)
#          Df SumOfSqs      R2      F Pr(>F)
#Model      6   11.446 0.16058 7.3971  1e-04 ***
#Residual 232   59.829 0.83942
#Total    238   71.274 1.00000
#---
#Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1

## =========================================================
## 4) Composition plots object (clean taxa set for plotting)
##    IMPORTANT: start from ps_abund_rel (plotting-filtered taxa)
## =========================================================
ps_plot <- prune_samples(keep_samples, ps_abund_rel)
ps_plot <- prune_taxa(taxa_sums(ps_plot) > 0, ps_plot)

mpse_plot <- as.MPSE(ps_plot)
mpse_plot <- add_group(mpse_plot)

## Summaries for plotting (Class)
mpse_plot %<>%
    mp_cal_abundance(.abundance = Abundance) %>%               # per sample
    mp_cal_abundance(.abundance = Abundance, .group = Group)   # per group

## Class abundance barplots (top 20)
p_class_rel <- mpse_plot %>%
    mp_plot_abundance(
        .abundance = Abundance,
        taxa.class = Class,
        topn = 20,
        relative = TRUE
    )

p_class_abs <- mpse_plot %>%
    mp_plot_abundance(
        .abundance = Abundance,
        taxa.class = Class,
        topn = 20,
        relative = FALSE
    )

png("figures/relative_abundance_and_abundance_samples.png", width = 1200, height = 600)
print(p_class_rel / p_class_abs)
dev.off()

## Heatmaps (grouped)
h_rel <- mpse_plot %>%
    mp_plot_abundance(
        .abundance = Abundance,
        .group = Group,
        taxa.class = Class,
        relative = TRUE,
        topn = 20,
        geom = "heatmap",
        features.dist = "euclidean",
        features.hclust = "average",
        sample.dist = "bray",
        sample.hclust = "average"
    )

h_abs <- mpse_plot %>%
    mp_plot_abundance(
        .abundance = Abundance,
        .group = Group,
        taxa.class = Class,
        relative = FALSE,
        topn = 20,
        geom = "heatmap",
        features.dist = "euclidean",
        features.hclust = "average",
        sample.dist = "bray",
        sample.hclust = "average"
    )

png("figures/relative_abundance_and_abundance_heatmap.png", width = 1200, height = 600)
print(aplot::plot_list(gglist = list(h_rel, h_abs), tag_levels = "A"))
dev.off()

## Group-level barplots
p_group_rel <- mpse_plot %>%
    mp_plot_abundance(
        .abundance = Abundance,
        .group = Group,
        taxa.class = Class,
        topn = 20,
        plot.group = TRUE,
        relative = TRUE
    ) +
    scale_fill_manual(values = group_colors)

p_group_abs <- mpse_plot %>%
    mp_plot_abundance(
        .abundance = Abundance,
        .group = Group,
        taxa.class = Class,
        topn = 20,
        plot.group = TRUE,
        relative = FALSE
    ) +
    scale_fill_manual(values = group_colors)

png("figures/relative_abundance_and_abundance_groups.png", width = 1000, height = 1000)
print(p_group_rel / p_group_abs)
dev.off()

cat("\nDONE. Outputs written to ./figures/\n")

## =========================================================
## CONTINUE: Export Bray distances + pairwise PERMANOVA
## (Use mpse_div from the updated script above)
## =========================================================

suppressPackageStartupMessages({
    library(dplyr)
    library(tidyr)
    library(openxlsx)
    library(vegan)
})

## -----------------------------
## Helper: get assay matrix with rows = samples, cols = features
get_sample_by_feature <- function(mpse_obj, assay_name) {
    mat <- assay(mpse_obj, assay_name)

    # sample IDs in MPSE
    samp_ids <- rownames(colData(mpse_obj))

    # If samples are columns, transpose
    if (!is.null(colnames(mat)) && all(samp_ids %in% colnames(mat))) {
        mat <- t(mat)
    }

    # Now enforce row order to match colData
    mat <- mat[samp_ids, , drop = FALSE]
    mat
}

## -----------------------------
## 1) Recompute Bray–Curtis distance (robust extraction)
hell_mat_sf <- get_sample_by_feature(mpse_div, "hellinger")  # rows=samples, cols=features
bray_dist   <- vegdist(hell_mat_sf, method = "bray")

## sanity checks
stopifnot(!any(is.na(as.matrix(bray_dist))))
stopifnot(!any(as.matrix(bray_dist) < 0, na.rm = TRUE))

## -----------------------------
## 2) Export all pairwise Bray distances to Excel
bray_mat <- as.matrix(bray_dist)
samples  <- rownames(bray_mat)

bray_df <- as.data.frame(as.table(bray_mat)) %>%
    rename(Sample1 = Var1, Sample2 = Var2, BrayDistance = Freq) %>%
    filter(Sample1 < Sample2) %>%
    arrange(Sample1, Sample2)

write.xlsx(bray_df, file = "figures/Bray_Curtis_Distances.xlsx")

## -----------------------------
## 3) Pairwise PERMANOVA (post-hoc) using vegan::adonis2
meta2 <- as.data.frame(colData(mpse_div))
meta2 <- meta2[rownames(hell_mat_sf), , drop = FALSE]
meta2$Group <- droplevels(meta2$Group)

groups <- levels(meta2$Group)

res_list <- list()
k <- 1

for (i in 1:(length(groups) - 1)) {
    for (j in (i + 1):length(groups)) {

        g1 <- groups[i]
        g2 <- groups[j]

        idx <- meta2$Group %in% c(g1, g2)
        sub_meta <- meta2[idx, , drop = FALSE]

        sub_dist <- as.dist(as.matrix(bray_dist)[idx, idx])

        ad <- adonis2(sub_dist ~ Group, data = sub_meta, permutations = 9999)

        res_list[[k]] <- data.frame(
            group1 = g1,
            group2 = g2,
            F      = ad$F[1],
            R2     = ad$R2[1],
            p      = ad$`Pr(>F)`[1]
        )
        k <- k + 1
    }
}

pair_res <- do.call(rbind, res_list)
pair_res$p_adj_BH         <- p.adjust(pair_res$p, method = "BH")
pair_res$p_adj_Bonferroni <- p.adjust(pair_res$p, method = "bonferroni")

write.csv(pair_res, "figures/Bray_pairwise_PERMANOVA.csv", row.names = FALSE)

cat("\nPairwise PERMANOVA written to: figures/Bray_pairwise_PERMANOVA.csv\n")
cat("Bray distance table written to: figures/Bray_Curtis_Distances.xlsx\n")

## =========================================================
## OPTIONAL: PCoA plot where point size = Shannon and alpha = Observe
## (requires mpse_div already has Shannon/Observe from mp_cal_alpha)
## =========================================================

p_pcoa_sizealpha <- mpse_div %>%
    mp_plot_ord(
        .ord   = pcoa,
        .group = Group,
        .color = Group,
        .size  = Shannon,
        .alpha = Observe,
        ellipse = TRUE,
        show.legend = TRUE
    ) +
    scale_fill_manual(values = group_colors) +
    scale_color_manual(values = group_colors) +
    scale_size_continuous(range = c(2, 6)) +
    theme(
        axis.text   = element_text(size = 16),
        axis.title  = element_text(size = 18),
        legend.text = element_text(size = 14),
        legend.title= element_text(size = 16)
    )

png("figures/PCoA_sizealpha.png", width = 1200, height = 1000)
print(p_pcoa_sizealpha)
dev.off()

pdf("figures/PCoA_sizealpha.pdf", width = 10, height = 8)
print(p_pcoa_sizealpha)
dev.off()

## =========================================================
## Ensure all three ordination outputs exist:
##   - PCoA  : basic (color/group)
##   - PCoA2 : size = Shannon, alpha = Observe
##   - PCoA3 : same as PCoA2 + sample labels
##
## Assumes you already have:
##   - mpse_div with: pcoa, Group, Shannon, Observe
##   - group_colors defined
## =========================================================

p1 <- mpse_div %>%
    mp_plot_ord(
        .ord   = pcoa,
        .group = Group,
        .color = Group,
        .size  = 4,
        .alpha = 1,
        ellipse = TRUE,
        show.legend = FALSE
    ) +
    scale_fill_manual(
        values = group_colors,
        guide  = guide_legend(
            keywidth = 1.6,
            keyheight = 1.6,
            label.theme = element_text(size = 16)
        )
    ) +
    scale_color_manual(
        values = group_colors,
        guide  = guide_legend(
            keywidth = 1.6,
            keyheight = 1.6,
            label.theme = element_text(size = 16)
        )
    ) +
    theme(
        axis.text   = element_text(size = 20),
        axis.title  = element_text(size = 22),
        legend.text = element_text(size = 20),
        legend.title= element_text(size = 22),
        plot.title  = element_text(size = 24, face = "bold"),
        plot.subtitle = element_text(size = 20)
    )

png("PCoA.png", width = 1200, height = 1000)
p1
dev.off()
pdf("PCoA.pdf")
p1
dev.off()

p2 <- mpse_div %>%
    mp_plot_ord(
        .ord   = pcoa,
        .group = Group,
        .color = Group,
        .size  = Shannon,
        .alpha = Observe,
        ellipse = TRUE,
        show.legend = FALSE
    ) +
    scale_fill_manual(
        values = group_colors,
        guide  = guide_legend(
            keywidth = 0.6,
            keyheight = 0.6,
            label.theme = element_text(size = 16)
        )
    ) +
    scale_color_manual(
        values = group_colors,
        guide  = guide_legend(
            keywidth = 0.6,
            keyheight = 0.6,
            label.theme = element_text(size = 16)
        )
    ) +
    scale_size_continuous(
        range = c(2, 6),
        guide = guide_legend(
            keywidth = 0.6,
            keyheight = 0.6,
            label.theme = element_text(size = 16)
        )
    ) +
    theme(
        axis.text   = element_text(size = 20),
        axis.title  = element_text(size = 22),
        legend.text = element_text(size = 20),
        legend.title= element_text(size = 22),
        plot.title  = element_text(size = 24, face = "bold"),
        plot.subtitle = element_text(size = 20)
    )

png("PCoA2.png", width = 1200, height = 1000)
p2
dev.off()
pdf("PCoA2.pdf")
p2
dev.off()

library(ggrepel)
colData(mpse_div)$ShortLabel <- gsub("sample-", "", mpse_div@colData@rownames)

p3 <- mpse_div %>%
    mp_plot_ord(
        .ord   = pcoa,
        .group = Group,
        .color = Group,
        .size  = Shannon,
        .alpha = Observe,
        ellipse = TRUE,
        show.legend = FALSE
    ) +
    geom_text_repel(aes(label = ShortLabel), size = 5, max.overlaps = 100) +
    scale_fill_manual(
        values = group_colors,
        guide  = guide_legend(
            keywidth = 0.6,
            keyheight = 0.6,
            label.theme = element_text(size = 16)
        )
    ) +
    scale_color_manual(
        values = group_colors,
        guide  = guide_legend(
            keywidth = 0.6,
            keyheight = 0.6,
            label.theme = element_text(size = 16)
        )
    ) +
    scale_size_continuous(
        range = c(2, 6),
        guide = guide_legend(
            keywidth = 0.6,
            keyheight = 0.6,
            label.theme = element_text(size = 16)
        )
    ) +
    theme(
        axis.text   = element_text(size = 20),
        axis.title  = element_text(size = 22),
        legend.text = element_text(size = 20),
        legend.title= element_text(size = 22),
        plot.title  = element_text(size = 24, face = "bold"),
        plot.subtitle = element_text(size = 20)
    )

png("PCoA3.png", width = 1200, height = 1000)
p3
dev.off()
svg("PCoA3.svg", width = 12, height = 10)
p3
dev.off()
pdf("PCoA3.pdf")
p3
dev.off()
Microbial bioinformatics

Microbial bioinformatics uses computational tools to analyze genomes, track evolution, and study functions in microorganisms, including bacteria and viruses.

MicrobiotaProcess.R for Data_Childrensclinic_16S_2025

Leave a Reply Cancel reply