QIIME + Phyloseq + MicrobiotaProcess (v1)

# https://bioconductor.org/packages/release/bioc/vignettes/MicrobiotaProcess/inst/doc//MicrobiotaProcess.html

# -----------------------------------
# ---- prepare the R environment ----
#Rscript MicrobiotaProcess.R
#NOTE: exit R script, then login again R-environment; rm -rf Phyloseq*_cache
#mkdir figures
rmarkdown::render('Phyloseq.Rmd',output_file='Phyloseq.html')

# with #alpha = 2.0, running the following script further!

# -----------------------------
# ---- 3.1. bridges other tools
##https://github.com/YuLab-SMU/MicrobiotaProcess
##https://www.bioconductor.org/packages/release/bioc/vignettes/MicrobiotaProcess/inst/doc/MicrobiotaProcess.html
##https://chiliubio.github.io/microeco_tutorial/intro.html#framework
##https://yiluheihei.github.io/microbiomeMarker/reference/plot_cladogram.html
#BiocManager::install("MicrobiotaProcess")
#install.packages("microeco")
#install.packages("ggalluvial")
#install.packages("ggh4x")

library(MicrobiotaProcess)
library(microeco)
library(ggalluvial)
library(ggh4x)
library(gghalves)

## Convert the phyloseq object to a MicrobiotaProcess object
#mp <- as.MicrobiotaProcess(ps.ng.tax)

#mt <- phyloseq2microeco(ps.ng.tax) #--> ERROR
#abundance_table <- mt$abun_table
#taxonomy_table <- mt$tax_table

#ps.ng.tax_abund <- phyloseq::filter_taxa(ps.ng.tax, function(x) sum(x > total*0.01) > 0, TRUE)
#ps.ng.tax_most = phyloseq::filter_taxa(ps.ng.tax_rel, function(x) mean(x) > 0.001, TRUE)

##OPTION1 (NOT_USED): take all samples, prepare ps.ng.tax_abund --> mpse_abund
##mpse <- ps.ng.tax %>% as.MPSE()
#mpse_abund <- ps.ng.tax_abund %>% as.MPSE()

##OPTION2 (USED!): take partial samples, prepare ps.ng.tax or ps.ng.tax_abund (2 replacements!)--> ps.ng.tax_sel --> mpse_abund
ps.ng.tax_sel <- ps.ng.tax_abund

##otu_table(ps.ng.tax_sel) <- otu_table(ps.ng.tax)[,c("1","2","5","6","7",  "15","16","17","18","19","20",  "29","30","31","32",  "40","41","42","43","44","46")]
##NOTE: Only choose Group2, Group4, Group6, Group8
#> ps.ng.tax_sel
#otu_table()   OTU Table:         [ 37465 taxa and 29 samples ]
#sample_data() Sample Data:       [ 29 samples by 10 sample variables ]
#tax_table()   Taxonomy Table:    [ 37465 taxa by 7 taxonomic ranks ]
#phy_tree()    Phylogenetic Tree: [ 37465 tips and 37461 internal nodes ]
#-Group4: "21","22","23","24","25","26","27","28",
#-Group8: ,  "47","48","49","50","52","53","55"
otu_table(ps.ng.tax_sel) <- otu_table(ps.ng.tax_abund)[,c("8","9","10","12","13","14",  "33","34","35","36","37","38","39","51")]
mpse_abund <- ps.ng.tax_sel %>% as.MPSE()
# A MPSE-tibble (MPSE object) abstraction: 2,352 × 20
# NOTE mpse_abund contains 20 variables: OTU, Sample, Abundance, BarcodeSequence, LinkerPrimerSequence, FileInput, Group,
#   Sex_age

, pre_post_stroke , Conc , Vol_50ng , Vol_PCR , Description , # Domain , Phylum , Class , Order , Family , Genus , Species # ———————————– # —- 3.2. alpha diversity analysis # Rarefied species richness + RareAbundance mpse_abund %<>% mp_rrarefy() # ‘chunks’ represent the split number of each sample to calculate alpha # diversity, default is 400. e.g. If a sample has total 40000 # reads, if chunks is 400, it will be split to 100 sub-samples # (100, 200, 300,…, 40000), then alpha diversity index was # calculated based on the sub-samples. # ‘.abundance’ the column name of abundance, if the ‘.abundance’ is not be # rarefied calculate rarecurve, user can specific ‘force=TRUE’. mpse_abund %<>% mp_cal_rarecurve( .abundance = RareAbundance, chunks = 400 ) # The RareAbundanceRarecurve column will be added the colData slot # automatically (default action=”add”) #NOTE mpse_abund contains 22 varibles = 20 varibles + RareAbundance + RareAbundanceRarecurve # default will display the confidence interval around smooth. # se=TRUE # NOTE that two colors #c(“#00A087FF”, “#3C5488FF”) for .group = pre_post_stroke; four colors c(“#1f78b4”, “#33a02c”, “#e31a1c”, “#6a3d9a”) for .group = Group; p1 <- mpse_abund %>% mp_plot_rarecurve( .rare = RareAbundanceRarecurve, .alpha = Observe, ) p2 <- mpse_abund %>% mp_plot_rarecurve( .rare = RareAbundanceRarecurve, .alpha = Observe, .group = Group ) + scale_color_manual(values=c(“#1f78b4”, “#e31a1c”)) + scale_fill_manual(values=c(“#1f78b4”, “#e31a1c”), guide=”none”) # combine the samples belong to the same groups if plot.group=TRUE p3 <- mpse_abund %>% mp_plot_rarecurve( .rare = RareAbundanceRarecurve, .alpha = “Observe”, .group = Group, plot.group = TRUE ) + scale_color_manual(values=c(“#1f78b4”, “#e31a1c”)) + scale_fill_manual(values=c(“#1f78b4”, “#e31a1c”),guide=”none”) png(“rarefaction_of_samples_or_groups.png”, width=1080, height=600) p1 + p2 + p3 dev.off() # —————————————— # 3.3. calculate alpha index and visualization library(ggplot2) library(MicrobiotaProcess) mpse_abund %<>% mp_cal_alpha(.abundance=RareAbundance) mpse_abund #NOTE mpse_abund contains 28 varibles = 22 varibles + Observe , Chao1 , ACE , Shannon , Simpson , Pielou f1 <- mpse_abund %>% mp_plot_alpha( .group=Group, .alpha=c(Observe, Chao1, ACE, Shannon, Simpson, Pielou) ) + scale_fill_manual(values=c(“#1f78b4”, “#e31a1c”), guide=”none”) + scale_color_manual(values=c(“#1f78b4”, “#e31a1c”), guide=”none”) f2 <- mpse_abund %>% mp_plot_alpha( .alpha=c(Observe, Chao1, ACE, Shannon, Simpson, Pielou) ) #ps.ng.tax_sel contais only pre samples –> f1 cannot be generated! png(“alpha_diversity_comparison.png”, width=1400, height=600) f1 / f2 dev.off() # ——————————————- # 3.4. The visualization of taxonomy abundance (Class) mpse_abund %<>% mp_cal_abundance( # for each samples .abundance = RareAbundance ) %>% mp_cal_abundance( # for each groups .abundance=RareAbundance, .group=Group ) mpse_abund #NOTE mpse_abund contains 29 varibles = 28 varibles + RelRareAbundanceBySample # visualize the relative abundance of top 20 phyla for each sample. # .group=time, p1 <- mpse_abund %>% mp_plot_abundance( .abundance=RareAbundance, taxa.class = Class, topn = 20, relative = TRUE ) # visualize the abundance (rarefied) of top 20 phyla for each sample. # .group=time, p2 <- mpse_abund %>% mp_plot_abundance( .abundance=RareAbundance, taxa.class = Class, topn = 20, relative = FALSE ) png(“relative_abundance_and_abundance.png”, width= 1200, height=600) #NOT PRODUCED! p1 / p2 dev.off() #—- h1 <- mpse_abund %>% mp_plot_abundance( .abundance = RareAbundance, .group = Group, taxa.class = Class, relative = TRUE, topn = 20, geom = ‘heatmap’, features.dist = ‘euclidean’, features.hclust = ‘average’, sample.dist = ‘bray’, sample.hclust = ‘average’ ) h2 <- mpse_abund %>% mp_plot_abundance( .abundance = RareAbundance, .group = Group, taxa.class = Class, relative = FALSE, topn = 20, geom = ‘heatmap’, features.dist = ‘euclidean’, features.hclust = ‘average’, sample.dist = ‘bray’, sample.hclust = ‘average’ ) # the character (scale or theme) of figure can be adjusted by set_scale_theme # refer to the mp_plot_dist png(“relative_abundance_and_abundance_heatmap.png”, width= 1200, height=600) aplot::plot_list(gglist=list(h1, h2), tag_levels=”A”) dev.off() # visualize the relative abundance of top 20 class for each .group (Group) p3 <- mpse_abund %>% mp_plot_abundance( .abundance=RareAbundance, .group=Group, taxa.class = Class, topn = 20, plot.group = TRUE ) # visualize the abundance of top 20 phyla for each .group (time) p4 <- mpse_abund %>% mp_plot_abundance( .abundance=RareAbundance, .group= Group, taxa.class = Class, topn = 20, relative = FALSE, plot.group = TRUE ) png(“relative_abundance_and_abundance_groups.png”, width= 1000, height=1000) p3 / p4 dev.off() # ————————— # 3.5. Beta diversity analysis # ——————————————— # 3.5.1 The distance between samples or groups # standardization # mp_decostand wraps the decostand of vegan, which provides # many standardization methods for community ecology. # default is hellinger, then the abundance processed will # be stored to the assays slot. mpse_abund %<>% mp_decostand(.abundance=Abundance) mpse_abund #NOTE mpse_abund contains 30 varibles = 29 varibles + hellinger # calculate the distance between the samples. # the distance will be generated a nested tibble and added to the # colData slot. mpse_abund %<>% mp_cal_dist(.abundance=hellinger, distmethod=”bray”) mpse_abund #NOTE mpse_abund contains 31 varibles = 30 varibles + bray # mp_plot_dist provides there methods to visualize the distance between the samples or groups # when .group is not provided, the dot heatmap plot will be return p1 <- mpse_abund %>% mp_plot_dist(.distmethod = bray) png(“distance_between_samples.png”, width= 1000, height=1000) p1 dev.off() # when .group is provided, the dot heatmap plot with group information will be return. p2 <- mpse_abund %>% mp_plot_dist(.distmethod = bray, .group = Group) # The scale or theme of dot heatmap plot can be adjusted using set_scale_theme function. p2 %>% set_scale_theme( x = scale_fill_manual( values=c(“#1f78b4”, “#e31a1c”), #c(“orange”, “deepskyblue”), guide = guide_legend( keywidth = 1, keyheight = 0.5, title.theme = element_text(size=8), label.theme = element_text(size=6) ) ), aes_var = Group # specific the name of variable ) %>% set_scale_theme( x = scale_color_gradient( guide = guide_legend(keywidth = 0.5, keyheight = 0.5) ), aes_var = bray ) %>% set_scale_theme( x = scale_size_continuous( range = c(0.1, 3), guide = guide_legend(keywidth = 0.5, keyheight = 0.5) ), aes_var = bray ) png(“distance_between_samples_with_group_info.png”, width= 1000, height=1000) p2 dev.off() # when .group is provided and group.test is TRUE, the comparison of different groups will be returned # Assuming p3 is a ggplot object after mp_plot_dist call p3 <- mpse_abund %>% mp_plot_dist(.distmethod = bray, .group = Group, group.test = TRUE, textsize = 6) + theme( axis.title.x = element_text(size = 14), # Customize x-axis label face = “bold” axis.title.y = element_text(size = 14), # Customize y-axis label axis.text.x = element_text(size = 14), # Customize x-axis ticks axis.text.y = element_text(size = 14) # Customize y-axis ticks ) # Save the plot with the new theme settings png(“Comparison_of_Bray_Distances.png”, width = 1000, height = 1000) print(p3) # Ensure that p3 is explicitly printed in the device dev.off() # Extract Bray-Curtis Distance Values and save them in a Excel-table. library(dplyr) library(tidyr) library(openxlsx) # Define the sample numbers vector sample_numbers <- c("8", "9", "10", "12", "13", "14", "33", "34", "35", "36", "37", "38", "39", "51") # Consolidate the list of tibbles using the actual sample numbers bray_data <- bind_rows( lapply(seq_along(mpse_abund$bray), function(i) { tibble( Sample1 = sample_numbers[i], # Use actual sample number Sample2 = mpse_abund$bray[[i]]$braySampley, BrayDistance = mpse_abund$bray[[i]]$bray ) }), .id = "PairID" ) # Print the data frame to check the output print(bray_data) # Write the data frame to an Excel file write.xlsx(bray_data, file = "Bray_Curtis_Distances.xlsx") #DELETE the column "PairID" in Excel file # ----------------------- # 3.5.2 The PCoA analysis #install.packages("corrr") library(corrr) #install.packages("ggside") library(ggside) mpse_abund %<>% mp_cal_pcoa(.abundance=hellinger, distmethod=”bray”) # The dimensions of ordination analysis will be added the colData slot (default). mpse_abund mpse_abund %>% print(width=380, n=2) #NOTE mpse_abund contains 34 varibles = 31 varibles + `PCo1 (30.16%)` , `PCo2 (15.75%)` , `PCo3 (10.53%)` #BUG why 36 variables in mpse_abund %>% print(width=380, n=1) [RareAbundanceBySample , RareAbundanceByGroup ] #> methods(class=class(mpse_abund)) # [1] [ [[<- [<- # [4] $ $<- arrange # [7] as_tibble as.data.frame as.phyloseq #[10] coerce coerce<- colData<- #[13] distinct filter group_by #[16] left_join mp_adonis mp_aggregate_clade #[19] mp_aggregate mp_anosim mp_balance_clade #[22] mp_cal_abundance mp_cal_alpha mp_cal_cca #[25] mp_cal_clust mp_cal_dca mp_cal_dist #[28] mp_cal_nmds mp_cal_pca mp_cal_pcoa #[31] mp_cal_pd_metric mp_cal_rarecurve mp_cal_rda #[34] mp_cal_upset mp_cal_venn mp_decostand #[37] mp_diff_analysis mp_diff_clade mp_envfit #[40] mp_extract_abundance mp_extract_assays mp_extract_dist #[43] mp_extract_feature mp_extract_internal_attr mp_extract_rarecurve #[46] mp_extract_refseq mp_extract_sample mp_extract_taxonomy #[49] mp_extract_tree mp_filter_taxa mp_mantel #[52] mp_mrpp mp_plot_abundance mp_plot_alpha #[55] mp_plot_diff_boxplot mp_plot_diff_res mp_plot_dist #[58] mp_plot_ord mp_plot_rarecurve mp_plot_upset #[61] mp_plot_venn mp_rrarefy mp_select_as_tip #[64] mp_stat_taxa mutate otutree #[67] otutree<- print pull #[70] refsequence refsequence<- rename #[73] rownames<- select show # [ reached getOption("max.print") -- omitted 6 entries ] #see '?methods' for accessing help and source code # We also can perform adonis or anosim to check whether it is significant to the dissimilarities of groups. mpse_abund %<>% mp_adonis(.abundance=hellinger, .formula=~Group, distmethod=”bray”, permutations=9999, action=”add”) mpse_abund %>% mp_extract_internal_attr(name=adonis) #NOTE mpse_abund contains 34 varibles, no new variable, it has been saved in mpse_abund and can be extracted with “mpse_abund %>% mp_extract_internal_attr(name=’adonis’)” #The result of adonis has been saved to the internal attribute ! #It can be extracted using this-object %>% mp_extract_internal_attr(name=’adonis’) #The object contained internal attribute: PCoA ADONIS #Permutation test for adonis under reduced model #Terms added sequentially (first to last) #Permutation: free #Number of permutations: 9999 # #vegan::adonis2(formula = .formula, data = sampleda, permutations = permutations, method = distmethod) # Df SumOfSqs R2 F Pr(>F) #Group 1 0.23448 0.22659 3.5158 5e-04 *** #Residual 12 0.80032 0.77341 #Total 13 1.03480 1.00000 #— #Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1 # (“1″,”2″,”5″,”6″,”7”, “15”,”16″,”17″,”18″,”19″,”20″, “29”,”30″,”31″,”32″, “40”,”41″,”42″,”43″,”44″,”46″) #div.df2[div.df2 == “Group1”] <- "aged.post" #div.df2[div.df2 == "Group3"] <- "young.post" #div.df2[div.df2 == "Group5"] <- "aged.post" #div.df2[div.df2 == "Group7"] <- "young.post" # ("8","9","10","12","13","14", "21","22","23","24","25","26","27","28", "33","34","35","36","37","38","39","51", "47","48","49","50","52","53","55") #div.df2[div.df2 == "Group2"] <- "aged.pre" #div.df2[div.df2 == "Group4"] <- "young.pre" #div.df2[div.df2 == "Group6"] <- "aged.pre" #div.df2[div.df2 == "Group8"] <- "young.pre" #Group1: f.aged and post #Group2: f.aged and pre #Group3: f.young and post #Group4: f.young and pre #Group5: m.aged and post #Group6: m.aged and pre #Group7: m.young and post #Group8: m.young and pre #[,c("1","2","5","6","7", "8","9","10","12","13","14")] #[,c("15","16","17","18","19","20", "21","22","23","24","25","26","27","28")] #[,c("29","30","31","32", "33","34","35","36","37","38","39","51")] #[,c("40","41","42","43","44","46", "47","48","49","50","52","53","55")] #For the first set: #a6cee3: This is a light blue color, somewhat pastel and soft. #b2df8a: A soft, pale green, similar to a light lime. #fb9a99: A soft pink, slightly peachy or salmon-like. #cab2d6: A pale purple, reminiscent of lavender or a light mauve. #For the second set: #1f78b4: This is a strong, vivid blue, close to cobalt or a medium-dark blue. #33a02c: A medium forest green, vibrant and leafy. #e31a1c: A bright red, very vivid, similar to fire engine red. #6a3d9a: This would be described as a deep purple, akin to a dark lavender or plum. p1 <- mpse_abund %>% mp_plot_ord( .ord = pcoa, .group = Group, .color = Group, .size = 2.4, .alpha = 1, ellipse = TRUE, show.legend = FALSE # don’t display the legend of stat_ellipse ) + scale_fill_manual( #values = c(“#a6cee3”, “#1f78b4”, “#b2df8a”, “#33a02c”, “#fb9a99”, “#e31a1c”, “#cab2d6”, “#6a3d9a”), #values = c(“#a6cee3”, “#b2df8a”, “#fb9a99”, “#cab2d6”), values = c(“#1f78b4”, “#e31a1c”), guide = guide_legend(keywidth=1.6, keyheight=1.6, label.theme=element_text(size=12)) ) + scale_color_manual( #values=c(“#a6cee3”, “#1f78b4”, “#b2df8a”, “#33a02c”, “#fb9a99”, “#e31a1c”, “#cab2d6”, “#6a3d9a”), #values = c(“#a6cee3”, “#b2df8a”, “#fb9a99”, “#cab2d6”), values = c(“#1f78b4”, “#e31a1c”), guide = guide_legend(keywidth=1.6, keyheight=1.6, label.theme=element_text(size=12)) ) #scale_fill_manual(values=c(“#FF0000”, “#000000”, “#0000FF”, “#C0C0C0”, “#00FF00”, “#FFFF00”, “#00FFFF”, “#FFA500”)) + #scale_color_manual(values=c(“#FF0000”, “#000000”, “#0000FF”, “#C0C0C0”, “#00FF00”, “#FFFF00”, “#00FFFF”, “#FFA500”)) #scale_fill_manual(values=c(“#00A087FF”, “#3C5488FF”)) + #scale_color_manual(values=c(“#00A087FF”, “#3C5488FF”)) #png(“PCoA.png”, width= 1000, height=1000) #svg(“PCoA.svg”, width= 11, height=10) #svg(“PCoA_.svg”, width=10, height=10) #svg(“PCoA.svg”) pdf(“PCoA_Group2_vs_Group6.pdf”) p1 dev.off() #FF0000: Red #000000: Black #0000FF: Blue #C0C0C0: Silver #00FF00: Lime (often referred to simply as Green in web colors) #FFFF00: Yellow #00FFFF: Aqua (also known as Cyan) #FFA500: Orange # The size of point also can be mapped to other variables such as Observe, or Shannon # Then the alpha diversity and beta diversity will be displayed simultaneously. p2 <- mpse_abund %>% mp_plot_ord( .ord = pcoa, .group = Group, .color = Group, .size = Shannon, .alpha = Observe, ellipse = TRUE, show.legend = FALSE # don’t display the legend of stat_ellipse ) + scale_fill_manual( values = c(“#1f78b4”, “#e31a1c”), #only needs four colors. #values = c(“#FF0000”, “#000000”, “#0000FF”, “#C0C0C0”, “#00FF00”, “#FFFF00”, “#00FFFF”, “#FFA500”), guide = guide_legend(keywidth=0.6, keyheight=0.6, label.theme=element_text(size=8)) ) + scale_color_manual( values = c(“#1f78b4”, “#e31a1c”), #only needs four colors. #values=c(“#FF0000”, “#000000”, “#0000FF”, “#C0C0C0”, “#00FF00”, “#FFFF00”, “#00FFFF”, “#FFA500”), guide = guide_legend(keywidth=0.6, keyheight=0.6, label.theme=element_text(size=8)) ) + scale_size_continuous( range=c(0.5, 3), guide = guide_legend(keywidth=0.6, keyheight=0.6, label.theme=element_text(size=8)) ) pdf(“PCoA2_Group2_vs_Group6.pdf”) p2 dev.off() # —————————————— # 3.5.3 Hierarchical cluster (tree) analysis #input should contain hellinger! mpse_abund %<>% mp_cal_clust( .abundance = hellinger, distmethod = “bray”, hclustmethod = “average”, # (UPGAE) action = “add” # action is used to control which result will be returned ) mpse_abund mpse_abund %>% print(width=380, n=2) #NOTE mpse_abund contains 34 varibles, no new variable, the column bray has been new calculated! # if action = ‘add’, the result of hierarchical cluster will be added to the MPSE object # mp_extract_internal_attr can extract it. It is a treedata object, so it can be visualized # by ggtree. sample.clust <- mpse_abund %>% mp_extract_internal_attr(name=’SampleClust’) #The object contained internal attribute: PCoA ADONIS SampleClust sample.clust #–> The associated data tibble abstraction: 27 × 30 library(ggtree) p <- ggtree(sample.clust) + geom_tippoint(aes(color=Group)) + geom_tiplab(as_ylab = TRUE) + ggplot2::scale_x_continuous(expand=c(0, 0.01)) png("hierarchical_cluster1.png", width= 1000, height=800) p dev.off() #https://bioconductor.org/packages/release/bioc/vignettes/MicrobiotaProcess/inst/doc//MicrobiotaProcess.html # mapping = aes(x = RelRareAbundanceBySample-->Group, # y = Sample–>Group, # fill = Phyla # ), library(ggtreeExtra) library(ggplot2) # Extract relative abundance of phyla phyla.tb <- mpse_abund %>% mp_extract_abundance(taxa.class=Phylum, topn=30) # The abundance of each samples is nested, it can be flatted using the unnest of tidyr. phyla.tb %<>% tidyr::unnest(cols=RareAbundanceBySample) %>% dplyr::rename(Phyla=”label”) phyla.tb phyla.tb %>% print(width=380, n=10) p1 <- p + geom_fruit( data=phyla.tb, geom=geom_col, mapping = aes(x = RelRareAbundanceBySample, y = Sample, fill = Phyla ), orientation = "y", #offset = 0.4, pwidth = 3, axis.params = list(axis = "x", title = "The relative abundance of phyla (%)", title.size = 4, text.size = 2, vjust = 1), grid.params = list() ) png("hierarchical_cluster2_Phyla.png", width = 1000, height = 800) p1 dev.off() # Extract relative abundance of classes class.tb <- mpse_abund %>% mp_extract_abundance(taxa.class = Class, topn = 30) # Flatten and rename the columns class.tb %<>% tidyr::unnest(cols = RareAbundanceBySample) %>% dplyr::rename(Class = “label”) # View the data frame class.tb # Create the plot p1 <- p + geom_fruit( data = class.tb, geom = geom_col, mapping = aes(x = RelRareAbundanceBySample, y = Sample, fill = Class ), orientation = "y", pwidth = 3, axis.params = list(axis = "x", title = "The relative abundance of classes (%)", title.size = 4, text.size = 2, vjust = 1), grid.params = list() ) # Save the plot to a file #ERROR-->NEED to be DEBUGGED! png(“hierarchical_cluster2_Class.png”, width = 1000, height = 800) print(p1) dev.off() # ———————– # 3.6 Biomarker discovery library(ggtree) library(ggtreeExtra) library(ggplot2) library(MicrobiotaProcess) library(tidytree) library(ggstar) library(forcats) #—-BUG: why resulting in 26 taxa != 16 in the end —- mpse_abund %>% print(width=150) #mpse_abund %<>% # mp_cal_abundance( # for each samples # .abundance = RareAbundance # ) %>% # mp_cal_abundance( # for each groups # .abundance=RareAbundance, # .group=Group # ) #mpse_abund mpse_abund %<>% mp_diff_analysis( .abundance = RelRareAbundanceBySample, .group = Group, first.test.alpha = 0.01, filter.p=”pvalue” ) # The result is stored to the taxatree or otutree slot, you can use mp_extract_tree to extract the specific slot. taxa.tree <- mpse_abund %>% mp_extract_tree(type=”taxatree”) taxa.tree ## And the result tibble of different analysis can also be extracted with tidytree (>=0.3.5) #LDAupper, LDAmean, LDAlower, taxa.tree %>% select(label, nodeClass, Sign_Group, fdr) #%>% dplyr::filter(!is.na(fdr)) taxa.tree %>% print(width=150, n=200) # — replace the pvalue and fdr with pvalue and p-adjusted from DESeq enrichment results — #TODO: replace the values of pvalue and fdr in taxa.tree, with the values of pvalue and padj from sigtab, if the the tips in taxa.tree could be found in colnames(sigtab). #tree_data <- get.data(taxa.tree) #as.treedata(taxa.tree) #d <- tibble(label = paste0('t', 1:4), trait = rnorm(4)) tree_data <- as_tibble(taxa.tree) #full_join(x, d, by = 'label') %>% as.treedata # Modify tree_data by joining with sigtab and updating Sign_Group sigtab$label <- rownames(sigtab) #write.xlsx(sigtab, file = "sigtab.xlsx") sum(sigtab$padj<0.05)) #taxa.tree <- left_join(tree_data, sigtab[, c("label", "log2FoldChange", "pvalue", "padj")], by = 'label') %>% as.treedata taxa.tree2 <- tree_data %>% left_join(sigtab[, c(“label”, “baseMean”, “log2FoldChange”, “lfcSE”, “stat”, “pvalue”, “padj”)], by = “label”) %>% mutate(Sign_Group = case_when( log2FoldChange > 0 & padj <= 0.05 ~ "Group2", log2FoldChange < 0 & padj <= 0.05 ~ "Group6", TRUE ~ NA_character_ # Sets Sign_Group to NA otherwise )) %>% as.treedata() # Convert the dataframe to a treedata object taxa.tree2 %>% print(width=380, n=20) # —- print taxa_data2 to Excel, why resulting in 26 records? —- taxa_data2 <- as_tibble(taxa.tree2) sum(!is.na(taxa_data2$Sign_Group)) sapply(taxa_data2, class) # Remove or transform list columns if not needed taxa_data2_simplified <- taxa_data2 %>% select(-RareAbundanceBySample, -RareAbundanceByGroup) %>% mutate(across(where(is.list), ~toString(.))) # Convert lists to character strings if needed # Replace NA with a placeholder, such as “NA” or another suitable representation taxa_data2_simplified <- taxa_data2_simplified %>% mutate(across(everything(), ~ifelse(is.na(.), “NA”, .))) taxonomy_data <- as.data.frame(mp_extract_taxonomy(mpse_abund)) colnames(taxa_data2_simplified)[colnames(taxa_data2_simplified) == "label"] <- "OTU" combined_data <- left_join(taxa_data2_simplified, taxonomy_data, by = "OTU") library(writexl) write_xlsx(combined_data, "taxa_data2.xlsx") # Since taxa.tree is treedata object, it can be visualized by ggtree and ggtreeExtra p1 <- ggtree( taxa.tree2, layout="radial", size = 0.3 ) + geom_point( data = td_filter(!isTip), fill="white", size=1, shape=21 ) # display the high light of phylum clade. p2 <- p1 + geom_hilight( data = td_filter(nodeClass == "Phylum"), mapping = aes(node = node, fill = label) ) # display the relative abundance of features(OTU) p3 <- p2 + ggnewscale::new_scale("fill") + geom_fruit( data = td_unnest(RareAbundanceBySample), geom = geom_star, mapping = aes( x = fct_reorder(Sample, Group, .fun=min), size = RelRareAbundanceBySample, fill = Group, subset = RelRareAbundanceBySample > 0 ), starshape = 13, starstroke = 0.25, offset = 0.03, pwidth = 0.4, grid.params = list(linetype=2) ) + scale_size_continuous( name=”Relative Abundance (%)”, range = c(.5, 3) ) + scale_fill_manual(values=c(“#1B9E77”, “#D95F02”)) # display the tip labels of taxa tree p4 <- p3 + geom_tiplab(size=6, offset=4.0) # display the LDA of significant OTU. #p5 <- p4 + # ggnewscale::new_scale("fill") + # geom_fruit( # geom = geom_col, # mapping = aes( # x = LDAmean, # fill = Sign_Group, # subset = !is.na(LDAmean) # ), # orientation = "y", # offset = 0.3, # pwidth = 0.5, # axis.params = list(axis = "x", # title = "Log10(LDA)", # title.height = 0.01, # title.size = 2, # text.size = 1.8, # vjust = 1), # grid.params = list(linetype = 2) # ) # display the significant (FDR-->pvalue–>padj) taxonomy after kruskal.test (default) #shape = 21, #scale_size_continuous(range=c(1, 3)) + p6 <- p4 + ggnewscale::new_scale("size") + geom_point( data=td_filter(!is.na(Sign_Group)), mapping = aes(size = -log10(padj), fill = Sign_Group, ), shape = 21, ) + scale_size_continuous(range=c(1, 4)) + scale_fill_manual(values=c("#1B9E77", "#D95F02")) svg("diff_analysis_Group2_vs_Group6.svg",width=22, height=22) #png("differently_expressed_otu.png", width=2000, height=2000) p6 + theme( legend.key.height = unit(1.0, "cm"), legend.key.width = unit(1.0, "cm"), legend.spacing.y = unit(0.01, "cm"), legend.text = element_text(size = 20), legend.title = element_text(size = 20) #legend.position = c(0.99, 0.01) ) dev.off()

Microbial bioinformatics

Microbial bioinformatics uses computational tools to analyze genomes, track evolution, and study functions in microorganisms, including bacteria and viruses.

QIIME + Phyloseq + MicrobiotaProcess (v1)

Leave a Reply Cancel reply