gene_x 0 like s 294 view s
Tags: R, processing, metagenomics, 16S, pipeline
# https://bioconductor.org/packages/release/bioc/vignettes/MicrobiotaProcess/inst/doc//MicrobiotaProcess.html
# -----------------------------------
# ---- prepare the R environment ----
#Rscript MicrobiotaProcess.R
#NOTE: exit R script, then login again R-environment; rm -rf Phyloseq*_cache
#mkdir figures
rmarkdown::render('Phyloseq.Rmd',output_file='Phyloseq.html')
# with #alpha = 2.0, running the following script further!
# -----------------------------
# ---- 3.1. bridges other tools
##https://github.com/YuLab-SMU/MicrobiotaProcess
##https://www.bioconductor.org/packages/release/bioc/vignettes/MicrobiotaProcess/inst/doc/MicrobiotaProcess.html
##https://chiliubio.github.io/microeco_tutorial/intro.html#framework
##https://yiluheihei.github.io/microbiomeMarker/reference/plot_cladogram.html
#BiocManager::install("MicrobiotaProcess")
#install.packages("microeco")
#install.packages("ggalluvial")
#install.packages("ggh4x")
library(MicrobiotaProcess)
library(microeco)
library(ggalluvial)
library(ggh4x)
library(gghalves)
## Convert the phyloseq object to a MicrobiotaProcess object
#mp <- as.MicrobiotaProcess(ps.ng.tax)
#mt <- phyloseq2microeco(ps.ng.tax) #--> ERROR
#abundance_table <- mt$abun_table
#taxonomy_table <- mt$tax_table
#ps.ng.tax_abund <- phyloseq::filter_taxa(ps.ng.tax, function(x) sum(x > total*0.01) > 0, TRUE)
#ps.ng.tax_most = phyloseq::filter_taxa(ps.ng.tax_rel, function(x) mean(x) > 0.001, TRUE)
##OPTION1 (NOT_USED): take all samples, prepare ps.ng.tax_abund --> mpse_abund
##mpse <- ps.ng.tax %>% as.MPSE()
#mpse_abund <- ps.ng.tax_abund %>% as.MPSE()
##OPTION2 (USED!): take partial samples, prepare ps.ng.tax or ps.ng.tax_abund (2 replacements!)--> ps.ng.tax_sel --> mpse_abund
ps.ng.tax_sel <- ps.ng.tax_abund
##otu_table(ps.ng.tax_sel) <- otu_table(ps.ng.tax)[,c("1","2","5","6","7", "15","16","17","18","19","20", "29","30","31","32", "40","41","42","43","44","46")]
##NOTE: Only choose Group2, Group4, Group6, Group8
#> ps.ng.tax_sel
#otu_table() OTU Table: [ 37465 taxa and 29 samples ]
#sample_data() Sample Data: [ 29 samples by 10 sample variables ]
#tax_table() Taxonomy Table: [ 37465 taxa by 7 taxonomic ranks ]
#phy_tree() Phylogenetic Tree: [ 37465 tips and 37461 internal nodes ]
#-Group4: "21","22","23","24","25","26","27","28",
#-Group8: , "47","48","49","50","52","53","55"
otu_table(ps.ng.tax_sel) <- otu_table(ps.ng.tax_abund)[,c("8","9","10","12","13","14", "33","34","35","36","37","38","39","51")]
mpse_abund <- ps.ng.tax_sel %>% as.MPSE()
# A MPSE-tibble (MPSE object) abstraction: 2,352 × 20
# NOTE mpse_abund contains 20 variables: OTU, Sample, Abundance, BarcodeSequence, LinkerPrimerSequence, FileInput, Group,
# Sex_age <chr>, pre_post_stroke <chr>, Conc <dbl>, Vol_50ng <dbl>, Vol_PCR <dbl>, Description <chr>,
# Domain <chr>, Phylum <chr>, Class <chr>, Order <chr>, Family <chr>, Genus <chr>, Species <chr>
# -----------------------------------
# ---- 3.2. alpha diversity analysis
# Rarefied species richness + RareAbundance
mpse_abund %<>% mp_rrarefy()
# 'chunks' represent the split number of each sample to calculate alpha
# diversity, default is 400. e.g. If a sample has total 40000
# reads, if chunks is 400, it will be split to 100 sub-samples
# (100, 200, 300,..., 40000), then alpha diversity index was
# calculated based on the sub-samples.
# '.abundance' the column name of abundance, if the '.abundance' is not be
# rarefied calculate rarecurve, user can specific 'force=TRUE'.
mpse_abund %<>%
mp_cal_rarecurve(
.abundance = RareAbundance,
chunks = 400
)
# The RareAbundanceRarecurve column will be added the colData slot
# automatically (default action="add")
#NOTE mpse_abund contains 22 varibles = 20 varibles + RareAbundance <dbl> + RareAbundanceRarecurve <list>
# default will display the confidence interval around smooth.
# se=TRUE
# NOTE that two colors #c("#00A087FF", "#3C5488FF") for .group = pre_post_stroke; four colors c("#1f78b4", "#33a02c", "#e31a1c", "#6a3d9a") for .group = Group;
p1 <- mpse_abund %>%
mp_plot_rarecurve(
.rare = RareAbundanceRarecurve,
.alpha = Observe,
)
p2 <- mpse_abund %>%
mp_plot_rarecurve(
.rare = RareAbundanceRarecurve,
.alpha = Observe,
.group = Group
) +
scale_color_manual(values=c("#1f78b4", "#e31a1c")) +
scale_fill_manual(values=c("#1f78b4", "#e31a1c"), guide="none")
# combine the samples belong to the same groups if plot.group=TRUE
p3 <- mpse_abund %>%
mp_plot_rarecurve(
.rare = RareAbundanceRarecurve,
.alpha = "Observe",
.group = Group,
plot.group = TRUE
) +
scale_color_manual(values=c("#1f78b4", "#e31a1c")) +
scale_fill_manual(values=c("#1f78b4", "#e31a1c"),guide="none")
png("rarefaction_of_samples_or_groups.png", width=1080, height=600)
p1 + p2 + p3
dev.off()
# ------------------------------------------
# 3.3. calculate alpha index and visualization
library(ggplot2)
library(MicrobiotaProcess)
mpse_abund %<>%
mp_cal_alpha(.abundance=RareAbundance)
mpse_abund
#NOTE mpse_abund contains 28 varibles = 22 varibles + Observe <dbl>, Chao1 <dbl>, ACE <dbl>, Shannon <dbl>, Simpson <dbl>, Pielou <dbl>
f1 <- mpse_abund %>%
mp_plot_alpha(
.group=Group,
.alpha=c(Observe, Chao1, ACE, Shannon, Simpson, Pielou)
) +
scale_fill_manual(values=c("#1f78b4", "#e31a1c"), guide="none") +
scale_color_manual(values=c("#1f78b4", "#e31a1c"), guide="none")
f2 <- mpse_abund %>%
mp_plot_alpha(
.alpha=c(Observe, Chao1, ACE, Shannon, Simpson, Pielou)
)
#ps.ng.tax_sel contais only pre samples --> f1 cannot be generated!
png("alpha_diversity_comparison.png", width=1400, height=600)
f1 / f2
dev.off()
# -------------------------------------------
# 3.4. The visualization of taxonomy abundance (Class)
mpse_abund %<>%
mp_cal_abundance( # for each samples
.abundance = RareAbundance
) %>%
mp_cal_abundance( # for each groups
.abundance=RareAbundance,
.group=Group
)
mpse_abund
#NOTE mpse_abund contains 29 varibles = 28 varibles + RelRareAbundanceBySample <dbl>
# visualize the relative abundance of top 20 phyla for each sample.
# .group=time,
p1 <- mpse_abund %>%
mp_plot_abundance(
.abundance=RareAbundance,
taxa.class = Class,
topn = 20,
relative = TRUE
)
# visualize the abundance (rarefied) of top 20 phyla for each sample.
# .group=time,
p2 <- mpse_abund %>%
mp_plot_abundance(
.abundance=RareAbundance,
taxa.class = Class,
topn = 20,
relative = FALSE
)
png("relative_abundance_and_abundance.png", width= 1200, height=600) #NOT PRODUCED!
p1 / p2
dev.off()
#----
h1 <- mpse_abund %>%
mp_plot_abundance(
.abundance = RareAbundance,
.group = Group,
taxa.class = Class,
relative = TRUE,
topn = 20,
geom = 'heatmap',
features.dist = 'euclidean',
features.hclust = 'average',
sample.dist = 'bray',
sample.hclust = 'average'
)
h2 <- mpse_abund %>%
mp_plot_abundance(
.abundance = RareAbundance,
.group = Group,
taxa.class = Class,
relative = FALSE,
topn = 20,
geom = 'heatmap',
features.dist = 'euclidean',
features.hclust = 'average',
sample.dist = 'bray',
sample.hclust = 'average'
)
# the character (scale or theme) of figure can be adjusted by set_scale_theme
# refer to the mp_plot_dist
png("relative_abundance_and_abundance_heatmap.png", width= 1200, height=600)
aplot::plot_list(gglist=list(h1, h2), tag_levels="A")
dev.off()
# visualize the relative abundance of top 20 class for each .group (Group)
p3 <- mpse_abund %>%
mp_plot_abundance(
.abundance=RareAbundance,
.group=Group,
taxa.class = Class,
topn = 20,
plot.group = TRUE
)
# visualize the abundance of top 20 phyla for each .group (time)
p4 <- mpse_abund %>%
mp_plot_abundance(
.abundance=RareAbundance,
.group= Group,
taxa.class = Class,
topn = 20,
relative = FALSE,
plot.group = TRUE
)
png("relative_abundance_and_abundance_groups.png", width= 1000, height=1000)
p3 / p4
dev.off()
# ---------------------------
# 3.5. Beta diversity analysis
# ---------------------------------------------
# 3.5.1 The distance between samples or groups
# standardization
# mp_decostand wraps the decostand of vegan, which provides
# many standardization methods for community ecology.
# default is hellinger, then the abundance processed will
# be stored to the assays slot.
mpse_abund %<>%
mp_decostand(.abundance=Abundance)
mpse_abund
#NOTE mpse_abund contains 30 varibles = 29 varibles + hellinger <dbl>
# calculate the distance between the samples.
# the distance will be generated a nested tibble and added to the
# colData slot.
mpse_abund %<>% mp_cal_dist(.abundance=hellinger, distmethod="bray")
mpse_abund
#NOTE mpse_abund contains 31 varibles = 30 varibles + bray <list>
# mp_plot_dist provides there methods to visualize the distance between the samples or groups
# when .group is not provided, the dot heatmap plot will be return
p1 <- mpse_abund %>% mp_plot_dist(.distmethod = bray)
png("distance_between_samples.png", width= 1000, height=1000)
p1
dev.off()
# when .group is provided, the dot heatmap plot with group information will be return.
p2 <- mpse_abund %>% mp_plot_dist(.distmethod = bray, .group = Group)
# The scale or theme of dot heatmap plot can be adjusted using set_scale_theme function.
p2 %>% set_scale_theme(
x = scale_fill_manual(
values=c("#1f78b4", "#e31a1c"), #c("orange", "deepskyblue"),
guide = guide_legend(
keywidth = 1,
keyheight = 0.5,
title.theme = element_text(size=8),
label.theme = element_text(size=6)
)
),
aes_var = Group # specific the name of variable
) %>%
set_scale_theme(
x = scale_color_gradient(
guide = guide_legend(keywidth = 0.5, keyheight = 0.5)
),
aes_var = bray
) %>%
set_scale_theme(
x = scale_size_continuous(
range = c(0.1, 3),
guide = guide_legend(keywidth = 0.5, keyheight = 0.5)
),
aes_var = bray
)
png("distance_between_samples_with_group_info.png", width= 1000, height=1000)
p2
dev.off()
# when .group is provided and group.test is TRUE, the comparison of different groups will be returned
# Assuming p3 is a ggplot object after mp_plot_dist call
p3 <- mpse_abund %>%
mp_plot_dist(.distmethod = bray, .group = Group, group.test = TRUE, textsize = 6) +
theme(
axis.title.x = element_text(size = 14), # Customize x-axis label face = "bold"
axis.title.y = element_text(size = 14), # Customize y-axis label
axis.text.x = element_text(size = 14), # Customize x-axis ticks
axis.text.y = element_text(size = 14) # Customize y-axis ticks
)
# Save the plot with the new theme settings
png("Comparison_of_Bray_Distances.png", width = 1000, height = 1000)
print(p3) # Ensure that p3 is explicitly printed in the device
dev.off()
# Extract Bray-Curtis Distance Values and save them in a Excel-table.
library(dplyr)
library(tidyr)
library(openxlsx)
# Define the sample numbers vector
sample_numbers <- c("8", "9", "10", "12", "13", "14", "33", "34", "35", "36", "37", "38", "39", "51")
# Consolidate the list of tibbles using the actual sample numbers
bray_data <- bind_rows(
lapply(seq_along(mpse_abund$bray), function(i) {
tibble(
Sample1 = sample_numbers[i], # Use actual sample number
Sample2 = mpse_abund$bray[[i]]$braySampley,
BrayDistance = mpse_abund$bray[[i]]$bray
)
}),
.id = "PairID"
)
# Print the data frame to check the output
print(bray_data)
# Write the data frame to an Excel file
write.xlsx(bray_data, file = "Bray_Curtis_Distances.xlsx")
#DELETE the column "PairID" in Excel file
# -----------------------
# 3.5.2 The PCoA analysis
#install.packages("corrr")
library(corrr)
#install.packages("ggside")
library(ggside)
mpse_abund %<>%
mp_cal_pcoa(.abundance=hellinger, distmethod="bray")
# The dimensions of ordination analysis will be added the colData slot (default).
mpse_abund
mpse_abund %>% print(width=380, n=2)
#NOTE mpse_abund contains 34 varibles = 31 varibles + `PCo1 (30.16%)` <dbl>, `PCo2 (15.75%)` <dbl>, `PCo3 (10.53%)` <dbl>
#BUG why 36 variables in mpse_abund %>% print(width=380, n=1) [RareAbundanceBySample <list>, RareAbundanceByGroup <list>]
#> methods(class=class(mpse_abund))
# [1] [ [[<- [<-
# [4] $ $<- arrange
# [7] as_tibble as.data.frame as.phyloseq
#[10] coerce coerce<- colData<-
#[13] distinct filter group_by
#[16] left_join mp_adonis mp_aggregate_clade
#[19] mp_aggregate mp_anosim mp_balance_clade
#[22] mp_cal_abundance mp_cal_alpha mp_cal_cca
#[25] mp_cal_clust mp_cal_dca mp_cal_dist
#[28] mp_cal_nmds mp_cal_pca mp_cal_pcoa
#[31] mp_cal_pd_metric mp_cal_rarecurve mp_cal_rda
#[34] mp_cal_upset mp_cal_venn mp_decostand
#[37] mp_diff_analysis mp_diff_clade mp_envfit
#[40] mp_extract_abundance mp_extract_assays mp_extract_dist
#[43] mp_extract_feature mp_extract_internal_attr mp_extract_rarecurve
#[46] mp_extract_refseq mp_extract_sample mp_extract_taxonomy
#[49] mp_extract_tree mp_filter_taxa mp_mantel
#[52] mp_mrpp mp_plot_abundance mp_plot_alpha
#[55] mp_plot_diff_boxplot mp_plot_diff_res mp_plot_dist
#[58] mp_plot_ord mp_plot_rarecurve mp_plot_upset
#[61] mp_plot_venn mp_rrarefy mp_select_as_tip
#[64] mp_stat_taxa mutate otutree
#[67] otutree<- print pull
#[70] refsequence refsequence<- rename
#[73] rownames<- select show
# [ reached getOption("max.print") -- omitted 6 entries ]
#see '?methods' for accessing help and source code
# We also can perform adonis or anosim to check whether it is significant to the dissimilarities of groups.
mpse_abund %<>%
mp_adonis(.abundance=hellinger, .formula=~Group, distmethod="bray", permutations=9999, action="add")
mpse_abund %>% mp_extract_internal_attr(name=adonis)
#NOTE mpse_abund contains 34 varibles, no new variable, it has been saved in mpse_abund and can be extracted with "mpse_abund %>% mp_extract_internal_attr(name='adonis')"
#The result of adonis has been saved to the internal attribute !
#It can be extracted using this-object %>% mp_extract_internal_attr(name='adonis')
#The object contained internal attribute: PCoA ADONIS
#Permutation test for adonis under reduced model
#Terms added sequentially (first to last)
#Permutation: free
#Number of permutations: 9999
#
#vegan::adonis2(formula = .formula, data = sampleda, permutations = permutations, method = distmethod)
# Df SumOfSqs R2 F Pr(>F)
#Group 1 0.23448 0.22659 3.5158 5e-04 ***
#Residual 12 0.80032 0.77341
#Total 13 1.03480 1.00000
#---
#Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
# ("1","2","5","6","7", "15","16","17","18","19","20", "29","30","31","32", "40","41","42","43","44","46")
#div.df2[div.df2 == "Group1"] <- "aged.post"
#div.df2[div.df2 == "Group3"] <- "young.post"
#div.df2[div.df2 == "Group5"] <- "aged.post"
#div.df2[div.df2 == "Group7"] <- "young.post"
# ("8","9","10","12","13","14", "21","22","23","24","25","26","27","28", "33","34","35","36","37","38","39","51", "47","48","49","50","52","53","55")
#div.df2[div.df2 == "Group2"] <- "aged.pre"
#div.df2[div.df2 == "Group4"] <- "young.pre"
#div.df2[div.df2 == "Group6"] <- "aged.pre"
#div.df2[div.df2 == "Group8"] <- "young.pre"
#Group1: f.aged and post
#Group2: f.aged and pre
#Group3: f.young and post
#Group4: f.young and pre
#Group5: m.aged and post
#Group6: m.aged and pre
#Group7: m.young and post
#Group8: m.young and pre
#[,c("1","2","5","6","7", "8","9","10","12","13","14")]
#[,c("15","16","17","18","19","20", "21","22","23","24","25","26","27","28")]
#[,c("29","30","31","32", "33","34","35","36","37","38","39","51")]
#[,c("40","41","42","43","44","46", "47","48","49","50","52","53","55")]
#For the first set:
#a6cee3: This is a light blue color, somewhat pastel and soft.
#b2df8a: A soft, pale green, similar to a light lime.
#fb9a99: A soft pink, slightly peachy or salmon-like.
#cab2d6: A pale purple, reminiscent of lavender or a light mauve.
#For the second set:
#1f78b4: This is a strong, vivid blue, close to cobalt or a medium-dark blue.
#33a02c: A medium forest green, vibrant and leafy.
#e31a1c: A bright red, very vivid, similar to fire engine red.
#6a3d9a: This would be described as a deep purple, akin to a dark lavender or plum.
p1 <- mpse_abund %>%
mp_plot_ord(
.ord = pcoa,
.group = Group,
.color = Group,
.size = 2.4,
.alpha = 1,
ellipse = TRUE,
show.legend = FALSE # don't display the legend of stat_ellipse
) +
scale_fill_manual(
#values = c("#a6cee3", "#1f78b4", "#b2df8a", "#33a02c", "#fb9a99", "#e31a1c", "#cab2d6", "#6a3d9a"),
#values = c("#a6cee3", "#b2df8a", "#fb9a99", "#cab2d6"),
values = c("#1f78b4", "#e31a1c"),
guide = guide_legend(keywidth=1.6, keyheight=1.6, label.theme=element_text(size=12))
) +
scale_color_manual(
#values=c("#a6cee3", "#1f78b4", "#b2df8a", "#33a02c", "#fb9a99", "#e31a1c", "#cab2d6", "#6a3d9a"),
#values = c("#a6cee3", "#b2df8a", "#fb9a99", "#cab2d6"),
values = c("#1f78b4", "#e31a1c"),
guide = guide_legend(keywidth=1.6, keyheight=1.6, label.theme=element_text(size=12))
)
#scale_fill_manual(values=c("#FF0000", "#000000", "#0000FF", "#C0C0C0", "#00FF00", "#FFFF00", "#00FFFF", "#FFA500")) +
#scale_color_manual(values=c("#FF0000", "#000000", "#0000FF", "#C0C0C0", "#00FF00", "#FFFF00", "#00FFFF", "#FFA500"))
#scale_fill_manual(values=c("#00A087FF", "#3C5488FF")) +
#scale_color_manual(values=c("#00A087FF", "#3C5488FF"))
#png("PCoA.png", width= 1000, height=1000)
#svg("PCoA.svg", width= 11, height=10)
#svg("PCoA_.svg", width=10, height=10)
#svg("PCoA.svg")
pdf("PCoA_Group2_vs_Group6.pdf")
p1
dev.off()
#FF0000: Red
#000000: Black
#0000FF: Blue
#C0C0C0: Silver
#00FF00: Lime (often referred to simply as Green in web colors)
#FFFF00: Yellow
#00FFFF: Aqua (also known as Cyan)
#FFA500: Orange
# The size of point also can be mapped to other variables such as Observe, or Shannon
# Then the alpha diversity and beta diversity will be displayed simultaneously.
p2 <- mpse_abund %>%
mp_plot_ord(
.ord = pcoa,
.group = Group,
.color = Group,
.size = Shannon,
.alpha = Observe,
ellipse = TRUE,
show.legend = FALSE # don't display the legend of stat_ellipse
) +
scale_fill_manual(
values = c("#1f78b4", "#e31a1c"), #only needs four colors.
#values = c("#FF0000", "#000000", "#0000FF", "#C0C0C0", "#00FF00", "#FFFF00", "#00FFFF", "#FFA500"),
guide = guide_legend(keywidth=0.6, keyheight=0.6, label.theme=element_text(size=8))
) +
scale_color_manual(
values = c("#1f78b4", "#e31a1c"), #only needs four colors.
#values=c("#FF0000", "#000000", "#0000FF", "#C0C0C0", "#00FF00", "#FFFF00", "#00FFFF", "#FFA500"),
guide = guide_legend(keywidth=0.6, keyheight=0.6, label.theme=element_text(size=8))
) +
scale_size_continuous(
range=c(0.5, 3),
guide = guide_legend(keywidth=0.6, keyheight=0.6, label.theme=element_text(size=8))
)
pdf("PCoA2_Group2_vs_Group6.pdf")
p2
dev.off()
# ------------------------------------------
# 3.5.3 Hierarchical cluster (tree) analysis
#input should contain hellinger!
mpse_abund %<>%
mp_cal_clust(
.abundance = hellinger,
distmethod = "bray",
hclustmethod = "average", # (UPGAE)
action = "add" # action is used to control which result will be returned
)
mpse_abund
mpse_abund %>% print(width=380, n=2)
#NOTE mpse_abund contains 34 varibles, no new variable, the column bray has been new calculated!
# if action = 'add', the result of hierarchical cluster will be added to the MPSE object
# mp_extract_internal_attr can extract it. It is a treedata object, so it can be visualized
# by ggtree.
sample.clust <- mpse_abund %>% mp_extract_internal_attr(name='SampleClust')
#The object contained internal attribute: PCoA ADONIS SampleClust
sample.clust
#--> The associated data tibble abstraction: 27 × 30
library(ggtree)
p <- ggtree(sample.clust) +
geom_tippoint(aes(color=Group)) +
geom_tiplab(as_ylab = TRUE) +
ggplot2::scale_x_continuous(expand=c(0, 0.01))
png("hierarchical_cluster1.png", width= 1000, height=800)
p
dev.off()
#https://bioconductor.org/packages/release/bioc/vignettes/MicrobiotaProcess/inst/doc//MicrobiotaProcess.html
# mapping = aes(x = RelRareAbundanceBySample-->Group,
# y = Sample-->Group,
# fill = Phyla
# ),
library(ggtreeExtra)
library(ggplot2)
# Extract relative abundance of phyla
phyla.tb <- mpse_abund %>%
mp_extract_abundance(taxa.class=Phylum, topn=30)
# The abundance of each samples is nested, it can be flatted using the unnest of tidyr.
phyla.tb %<>% tidyr::unnest(cols=RareAbundanceBySample) %>% dplyr::rename(Phyla="label")
phyla.tb
phyla.tb %>% print(width=380, n=10)
p1 <- p +
geom_fruit(
data=phyla.tb,
geom=geom_col,
mapping = aes(x = RelRareAbundanceBySample,
y = Sample,
fill = Phyla
),
orientation = "y",
#offset = 0.4,
pwidth = 3,
axis.params = list(axis = "x",
title = "The relative abundance of phyla (%)",
title.size = 4,
text.size = 2,
vjust = 1),
grid.params = list()
)
png("hierarchical_cluster2_Phyla.png", width = 1000, height = 800)
p1
dev.off()
# Extract relative abundance of classes
class.tb <- mpse_abund %>%
mp_extract_abundance(taxa.class = Class, topn = 30)
# Flatten and rename the columns
class.tb %<>% tidyr::unnest(cols = RareAbundanceBySample) %>% dplyr::rename(Class = "label")
# View the data frame
class.tb
# Create the plot
p1 <- p +
geom_fruit(
data = class.tb,
geom = geom_col,
mapping = aes(x = RelRareAbundanceBySample,
y = Sample,
fill = Class
),
orientation = "y",
pwidth = 3,
axis.params = list(axis = "x",
title = "The relative abundance of classes (%)",
title.size = 4,
text.size = 2,
vjust = 1),
grid.params = list()
)
# Save the plot to a file #ERROR-->NEED to be DEBUGGED!
png("hierarchical_cluster2_Class.png", width = 1000, height = 800)
print(p1)
dev.off()
# -----------------------
# 3.6 Biomarker discovery
library(ggtree)
library(ggtreeExtra)
library(ggplot2)
library(MicrobiotaProcess)
library(tidytree)
library(ggstar)
library(forcats)
#----BUG: why resulting in 26 taxa != 16 in the end ----
mpse_abund %>% print(width=150)
#mpse_abund %<>%
# mp_cal_abundance( # for each samples
# .abundance = RareAbundance
# ) %>%
# mp_cal_abundance( # for each groups
# .abundance=RareAbundance,
# .group=Group
# )
#mpse_abund
mpse_abund %<>%
mp_diff_analysis(
.abundance = RelRareAbundanceBySample,
.group = Group,
first.test.alpha = 0.01, filter.p="pvalue"
)
# The result is stored to the taxatree or otutree slot, you can use mp_extract_tree to extract the specific slot.
taxa.tree <- mpse_abund %>%
mp_extract_tree(type="taxatree")
taxa.tree
## And the result tibble of different analysis can also be extracted with tidytree (>=0.3.5)
#LDAupper, LDAmean, LDAlower,
taxa.tree %>% select(label, nodeClass, Sign_Group, fdr) #%>% dplyr::filter(!is.na(fdr))
taxa.tree %>% print(width=150, n=200)
# -- replace the pvalue and fdr with pvalue and p-adjusted from DESeq enrichment results --
#TODO: replace the values of pvalue and fdr in taxa.tree, with the values of pvalue and padj from sigtab, if the the tips in taxa.tree could be found in colnames(sigtab).
#tree_data <- get.data(taxa.tree)
#as.treedata(taxa.tree)
#d <- tibble(label = paste0('t', 1:4), trait = rnorm(4))
tree_data <- as_tibble(taxa.tree)
#full_join(x, d, by = 'label') %>% as.treedata
# Modify tree_data by joining with sigtab and updating Sign_Group
sigtab$label <- rownames(sigtab)
#write.xlsx(sigtab, file = "sigtab.xlsx")
sum(sigtab$padj<0.05))
#taxa.tree <- left_join(tree_data, sigtab[, c("label", "log2FoldChange", "pvalue", "padj")], by = 'label') %>% as.treedata
taxa.tree2 <- tree_data %>%
left_join(sigtab[, c("label", "baseMean", "log2FoldChange", "lfcSE", "stat", "pvalue", "padj")], by = "label") %>%
mutate(Sign_Group = case_when(
log2FoldChange > 0 & padj <= 0.05 ~ "Group2",
log2FoldChange < 0 & padj <= 0.05 ~ "Group6",
TRUE ~ NA_character_ # Sets Sign_Group to NA otherwise
)) %>%
as.treedata() # Convert the dataframe to a treedata object
taxa.tree2 %>% print(width=380, n=20)
# ---- print taxa_data2 to Excel, why resulting in 26 records? ----
taxa_data2 <- as_tibble(taxa.tree2)
sum(!is.na(taxa_data2$Sign_Group))
sapply(taxa_data2, class)
# Remove or transform list columns if not needed
taxa_data2_simplified <- taxa_data2 %>%
select(-RareAbundanceBySample, -RareAbundanceByGroup) %>%
mutate(across(where(is.list), ~toString(.))) # Convert lists to character strings if needed
# Replace NA with a placeholder, such as "NA" or another suitable representation
taxa_data2_simplified <- taxa_data2_simplified %>%
mutate(across(everything(), ~ifelse(is.na(.), "NA", .)))
taxonomy_data <- as.data.frame(mp_extract_taxonomy(mpse_abund))
colnames(taxa_data2_simplified)[colnames(taxa_data2_simplified) == "label"] <- "OTU"
combined_data <- left_join(taxa_data2_simplified, taxonomy_data, by = "OTU")
library(writexl)
write_xlsx(combined_data, "taxa_data2.xlsx")
# Since taxa.tree is treedata object, it can be visualized by ggtree and ggtreeExtra
p1 <- ggtree(
taxa.tree2,
layout="radial",
size = 0.3
) +
geom_point(
data = td_filter(!isTip),
fill="white",
size=1,
shape=21
)
# display the high light of phylum clade.
p2 <- p1 +
geom_hilight(
data = td_filter(nodeClass == "Phylum"),
mapping = aes(node = node, fill = label)
)
# display the relative abundance of features(OTU)
p3 <- p2 +
ggnewscale::new_scale("fill") +
geom_fruit(
data = td_unnest(RareAbundanceBySample),
geom = geom_star,
mapping = aes(
x = fct_reorder(Sample, Group, .fun=min),
size = RelRareAbundanceBySample,
fill = Group,
subset = RelRareAbundanceBySample > 0
),
starshape = 13,
starstroke = 0.25,
offset = 0.03,
pwidth = 0.4,
grid.params = list(linetype=2)
) +
scale_size_continuous(
name="Relative Abundance (%)",
range = c(.5, 3)
) +
scale_fill_manual(values=c("#1B9E77", "#D95F02"))
# display the tip labels of taxa tree
p4 <- p3 + geom_tiplab(size=6, offset=4.0)
# display the LDA of significant OTU.
#p5 <- p4 +
# ggnewscale::new_scale("fill") +
# geom_fruit(
# geom = geom_col,
# mapping = aes(
# x = LDAmean,
# fill = Sign_Group,
# subset = !is.na(LDAmean)
# ),
# orientation = "y",
# offset = 0.3,
# pwidth = 0.5,
# axis.params = list(axis = "x",
# title = "Log10(LDA)",
# title.height = 0.01,
# title.size = 2,
# text.size = 1.8,
# vjust = 1),
# grid.params = list(linetype = 2)
# )
# display the significant (FDR-->pvalue-->padj) taxonomy after kruskal.test (default)
#shape = 21,
#scale_size_continuous(range=c(1, 3)) +
p6 <- p4 +
ggnewscale::new_scale("size") +
geom_point(
data=td_filter(!is.na(Sign_Group)),
mapping = aes(size = -log10(padj),
fill = Sign_Group,
),
shape = 21,
) +
scale_size_continuous(range=c(1, 4)) +
scale_fill_manual(values=c("#1B9E77", "#D95F02"))
svg("diff_analysis_Group2_vs_Group6.svg",width=22, height=22)
#png("differently_expressed_otu.png", width=2000, height=2000)
p6 + theme(
legend.key.height = unit(1.0, "cm"),
legend.key.width = unit(1.0, "cm"),
legend.spacing.y = unit(0.01, "cm"),
legend.text = element_text(size = 20),
legend.title = element_text(size = 20)
#legend.position = c(0.99, 0.01)
)
dev.off()
点赞本文的读者
还没有人对此文章表态
没有评论
Phyloseq + MicrobiotaProcess + PICRUSt2
MicrobiotaProcess Group2 vs Group6 (v1)
© 2023 XGenes.com Impressum