gene_x 0 like s 600 view s
Tags: R, metagenomics, 16S, tool, pipeline
Phyloseq.Rmd
author: ""
date: '`r format(Sys.time(), "%d %m %Y")`'
header-includes:
- \usepackage{color, fancyvrb}
output:
rmdformats::readthedown:
highlight: kate
number_sections : yes
pdf_document:
toc: yes
toc_depth: 2
number_sections : yes
---
```{r, echo=FALSE, warning=FALSE}
## Global options
# TODO: reproduce the html with the additional figure/SVN-files for editing.
# IMPORTANT NOTE: needs before "mkdir figures"
#rmarkdown::render('Phyloseq.Rmd',output_file='Phyloseq.html')
```
```{r load-packages, include=FALSE}
library(knitr)
library(rmdformats)
library(readxl)
library(dplyr)
library(kableExtra)
options(max.print="75")
knitr::opts_chunk$set(fig.width=8,
fig.height=6,
eval=TRUE,
cache=TRUE,
echo=TRUE,
prompt=FALSE,
tidy=TRUE,
comment=NA,
message=FALSE,
warning=FALSE)
opts_knit$set(width=85)
# Phyloseq R library
#* Phyloseq web site : https://joey711.github.io/phyloseq/index.html
#* See in particular tutorials for
# - importing data: https://joey711.github.io/phyloseq/import-data.html
# - heat maps: https://joey711.github.io/phyloseq/plot_heatmap-examples.html
```
# Data
Import raw data and assign sample key:
```{r, echo=TRUE, warning=FALSE}
#extend map_corrected.txt with Diet and Flora
#setwd("~/DATA/Data_Laura_16S_2/core_diversity_e4753")
map_corrected <- read.csv("map_corrected.txt", sep="\t", row.names=1)
knitr::kable(map_corrected) %>% kable_styling(bootstrap_options = c("striped", "hover", "condensed", "responsive"))
```
# Prerequisites to be installed
* R : https://pbil.univ-lyon1.fr/CRAN/
* R studio : https://www.rstudio.com/products/rstudio/download/#download
```R
install.packages("dplyr") # To manipulate dataframes
install.packages("readxl") # To read Excel files into R
install.packages("ggplot2") # for high quality graphics
install.packages("heatmaply")
source("https://bioconductor.org/biocLite.R")
biocLite("phyloseq")
```
```{r libraries, echo=TRUE, message=FALSE}
library("readxl") # necessary to import the data from Excel file
library("ggplot2") # graphics
library("picante")
library("microbiome") # data analysis and visualisation
library("phyloseq") # also the basis of data object. Data analysis and visualisation
library("ggpubr") # publication quality figures, based on ggplot2
library("dplyr") # data handling, filter and reformat data frames
library("RColorBrewer") # nice color options
library("heatmaply")
library(vegan)
library(gplots)
```
# Read the data and create phyloseq objects
Three tables are needed
* OTU
* Taxonomy
* Samples
```{r, echo=TRUE, warning=FALSE}
#Change your working directory to where the files are located
ps.ng.tax <- import_biom("./table_even42369.biom", "./clustering/rep_set.tre")
sample <- read.csv("./map_corrected.txt", sep="\t", row.names=1)
SAM = sample_data(sample, errorIfNULL = T)
rownames(SAM) <-
c("1","2","3","5","6","7","8","9","10","12","13","14","15","16","17","18","19","20","21","22","23","24","25","26","27","28","29","30","31","32","33","34","35","36","37","38","39","40","41","42","43","44","46","47","48","49","50","51","52","53","55")
ps.ng.tax <- merge_phyloseq(ps.ng.tax, SAM)
print(ps.ng.tax)
colnames(tax_table(ps.ng.tax)) <- c("Domain","Phylum","Class","Order","Family","Genus","Species")
saveRDS(ps.ng.tax, "./ps.ng.tax.rds")
```
Visualize data
```{r, echo=TRUE, warning=FALSE}
sample_names(ps.ng.tax)
rank_names(ps.ng.tax)
sample_variables(ps.ng.tax)
```
Normalize number of reads in each sample using median sequencing depth.
```{r, echo=TRUE, warning=FALSE}
# RAREFACTION
#set.seed(9242) # This will help in reproducing the filtering and nomalisation.
#ps.ng.tax <- rarefy_even_depth(ps.ng.tax, sample.size = 42369)
#total <- 42369
# NORMALIZE number of reads in each sample using median sequencing depth.
total = median(sample_sums(ps.ng.tax))
#> total
#[1] 42369
standf = function(x, t=total) round(t * (x / sum(x)))
ps.ng.tax = transform_sample_counts(ps.ng.tax, standf)
ps.ng.tax_rel <- microbiome::transform(ps.ng.tax, "compositional")
saveRDS(ps.ng.tax, "./ps.ng.tax.rds")
hmp.meta <- meta(ps.ng.tax)
hmp.meta$sam_name <- rownames(hmp.meta)
```
# Heatmaps
```{r, echo=TRUE, warning=FALSE}
#MOVE_FROM_ABOVE: The number of reads used for normalization is **`r sprintf("%.0f", total)`**.
#A basic heatmap using the default parameters.
# plot_heatmap(ps.ng.tax, method = "NMDS", distance = "bray")
#NOTE that giving the correct OTU numbers in the text (1%, 0.5%, ...)!!!
```
We consider the most abundant OTUs for heatmaps. For example one can only take OTUs that represent at least 1% of reads in at least one sample. Remember we normalized all the sampples to median number of reads (total). We are left with only 168 OTUS which makes the reading much more easy.
```{r, echo=TRUE, warning=FALSE}
# Custom function to plot a heatmap with the specified sample order
#plot_heatmap_custom <- function(ps, sample_order, method = "NMDS", distance = "bray") {
ps.ng.tax_abund <- phyloseq::filter_taxa(ps.ng.tax, function(x) sum(x > total*0.01) > 0, TRUE)
kable(otu_table(ps.ng.tax_abund)) %>%
kable_styling(bootstrap_options = c("striped", "hover", "condensed", "responsive"))
# Calculate the relative abundance for each sample
ps.ng.tax_abund_rel <- transform_sample_counts(ps.ng.tax_abund, function(x) x / sum(x))
datamat_ = as.data.frame(otu_table(ps.ng.tax_abund))
#datamat <- datamat_[c("1","2","5","6","7", "8","9","10","12","13","14", "15","16","17","18","19","20", "21","22","23","24","25","26","27","28", "29","30","31","32", "33","34","35","36","37","38","39","51", "40","41","42","43","44","46", "47","48","49","50","52","53","55")]
datamat <- datamat_[c("8","9","10","12","13","14", "21","22","23","24","25","26","27","28", "33","34","35","36","37","38","39","51", "47","48","49","50","52","53","55")]
hr <- hclust(as.dist(1-cor(t(datamat), method="pearson")), method="complete")
hc <- hclust(as.dist(1-cor(datamat, method="spearman")), method="complete")
mycl = cutree(hr, h=max(hr$height)/1.08)
mycol = c("YELLOW", "DARKBLUE", "DARKORANGE", "DARKMAGENTA", "DARKCYAN", "DARKRED", "MAROON", "DARKGREEN", "LIGHTBLUE", "PINK", "MAGENTA", "LIGHTCYAN","LIGHTGREEN", "BLUE", "ORANGE", "CYAN", "RED", "GREEN");
mycol = mycol[as.vector(mycl)]
sampleCols <- rep('GREY',ncol(datamat))
#names(sampleCols) <- c("Group1", "Group1", "Group1", "Group1", "Group1", "Group2", "Group2", "Group2", "Group2", "Group2","Group2", "Group3", "Group3", "Group3", "Group3", "Group1", "Group1", "Group1", "Group1", "Group1", "Group1", "Group1", "Group1", "Group1", "Group1", "Group1", "Group1", "Group1", "Group1", "Group1", "Group1", "Group1", "Group1", "Group1", "Group1", "Group1", "Group1", "Group1", "Group1", "Group1", "Group1", "Group1", "Group1", "Group1", "Group1", "Group1", "Group1", "Group1", "Group1", "Group1")
#sampleCols[colnames(datamat)=='1'] <- '#a6cee3'
#sampleCols[colnames(datamat)=='2'] <- '#a6cee3'
#sampleCols[colnames(datamat)=='5'] <- '#a6cee3'
#sampleCols[colnames(datamat)=='6'] <- '#a6cee3'
#sampleCols[colnames(datamat)=='7'] <- '#a6cee3'
sampleCols[colnames(datamat)=='8'] <- '#1f78b4'
sampleCols[colnames(datamat)=='9'] <- '#1f78b4'
sampleCols[colnames(datamat)=='10'] <- '#1f78b4'
sampleCols[colnames(datamat)=='12'] <- '#1f78b4'
sampleCols[colnames(datamat)=='13'] <- '#1f78b4'
sampleCols[colnames(datamat)=='14'] <- '#1f78b4'
#sampleCols[colnames(datamat)=='15'] <- '#b2df8a'
#sampleCols[colnames(datamat)=='16'] <- '#b2df8a'
#sampleCols[colnames(datamat)=='17'] <- '#b2df8a'
#sampleCols[colnames(datamat)=='18'] <- '#b2df8a'
#sampleCols[colnames(datamat)=='19'] <- '#b2df8a'
#sampleCols[colnames(datamat)=='20'] <- '#b2df8a'
sampleCols[colnames(datamat)=='21'] <- '#33a02c'
sampleCols[colnames(datamat)=='22'] <- '#33a02c'
sampleCols[colnames(datamat)=='23'] <- '#33a02c'
sampleCols[colnames(datamat)=='24'] <- '#33a02c'
sampleCols[colnames(datamat)=='25'] <- '#33a02c'
sampleCols[colnames(datamat)=='26'] <- '#33a02c'
sampleCols[colnames(datamat)=='27'] <- '#33a02c'
sampleCols[colnames(datamat)=='28'] <- '#33a02c'
#sampleCols[colnames(datamat)=='29'] <- '#fb9a99'
#sampleCols[colnames(datamat)=='30'] <- '#fb9a99'
#sampleCols[colnames(datamat)=='31'] <- '#fb9a99'
#sampleCols[colnames(datamat)=='32'] <- '#fb9a99'
sampleCols[colnames(datamat)=='33'] <- '#e31a1c'
sampleCols[colnames(datamat)=='34'] <- '#e31a1c'
sampleCols[colnames(datamat)=='35'] <- '#e31a1c'
sampleCols[colnames(datamat)=='36'] <- '#e31a1c'
sampleCols[colnames(datamat)=='37'] <- '#e31a1c'
sampleCols[colnames(datamat)=='38'] <- '#e31a1c'
sampleCols[colnames(datamat)=='39'] <- '#e31a1c'
sampleCols[colnames(datamat)=='51'] <- '#e31a1c'
#sampleCols[colnames(datamat)=='40'] <- '#cab2d6'
#sampleCols[colnames(datamat)=='41'] <- '#cab2d6'
#sampleCols[colnames(datamat)=='42'] <- '#cab2d6'
#sampleCols[colnames(datamat)=='43'] <- '#cab2d6'
#sampleCols[colnames(datamat)=='44'] <- '#cab2d6'
#sampleCols[colnames(datamat)=='46'] <- '#cab2d6'
sampleCols[colnames(datamat)=='47'] <- '#6a3d9a'
sampleCols[colnames(datamat)=='48'] <- '#6a3d9a'
sampleCols[colnames(datamat)=='49'] <- '#6a3d9a'
sampleCols[colnames(datamat)=='50'] <- '#6a3d9a'
sampleCols[colnames(datamat)=='52'] <- '#6a3d9a'
sampleCols[colnames(datamat)=='53'] <- '#6a3d9a'
sampleCols[colnames(datamat)=='55'] <- '#6a3d9a'
#bluered(75)
#color_pattern <- colorRampPalette(c("blue", "white", "red"))(100)
library(RColorBrewer)
custom_palette <- colorRampPalette(brewer.pal(9, "Blues"))
heatmap_colors <- custom_palette(100)
#colors <- heatmap_color_default(100)
png("figures/heatmap.png", width=1200, height=2400)
#par(mar=c(2, 2, 2, 2)) , lwid=1 lhei=c(0.7, 10)) # Adjust height of color keys keysize=0.3,
heatmap.2(as.matrix(datamat),Rowv=as.dendrogram(hr),Colv = NA, dendrogram = 'row',
scale='row',trace='none',col=heatmap_colors, cexRow=1.2, cexCol=1.5,
RowSideColors = mycol, ColSideColors = sampleCols, srtCol=15, labRow=row.names(datamat), key=TRUE, margins=c(10, 15), lhei=c(0.7, 15), lwid=c(1,8))
dev.off()
```
```{r, echo=TRUE, warning=FALSE, fig.cap="Heatmap", out.width = '100%', fig.align= "center"}
knitr::include_graphics("./figures/heatmap.png")
```
```{r, echo=FALSE, warning=FALSE}
#It is possible to use different distances and different multivaraite methods. Many different built-in distances can be used.
#dist_methods <- unlist(distanceMethodList)
#print(dist_methods)
```
\pagebreak
# Taxonomic summary
## Bar plots in phylum level
```{r, echo=FALSE, warning=FALSE}
#Make the bargraph nicer by removing OTUs boundaries. This is done by adding ggplot2 modifier.
# 1: uniform color. Color is for the border, fill is for the inside
#ggplot(mtcars, aes(x=as.factor(cyl) )) +
# geom_bar(color="blue", fill=rgb(0.1,0.4,0.5,0.7) )
# 2: Using Hue
#ggplot(mtcars, aes(x=as.factor(cyl), fill=as.factor(cyl) )) +
# geom_bar( ) +
# scale_fill_hue(c = 40) +
# theme(legend.position="none")
# 3: Using RColorBrewer
#ggplot(mtcars, aes(x=as.factor(cyl), fill=as.factor(cyl) )) +
# geom_bar( ) +
# scale_fill_brewer(palette = "Set1") +
# theme(legend.position="none")
# 4: Using greyscale:
#ggplot(mtcars, aes(x=as.factor(cyl), fill=as.factor(cyl) )) +
# geom_bar( ) +
# scale_fill_grey(start = 0.25, end = 0.75) +
# theme(legend.position="none")
# 5: Set manualy
#ggplot(mtcars, aes(x=as.factor(cyl), fill=as.factor(cyl) )) +
# geom_bar( ) +
# scale_fill_manual(values = c("red", "green", "blue") ) +
# theme(legend.position="none")
#NOT SUCCESSFUL!
#allGroupsColors<- c(
# "grey0", "grey50", "dodgerblu", "deepskyblue",
# "red", "darkred", "green", "green4")
# plot_bar(ps.ng.tax_rel, fill="Phylum") +
# geom_bar(stat="identity", position="stack") + scale_color_manual(values = allGroupsColors) #, fill=Phylum + scale_fill_brewer(palette = "Set1")
# ##### Keep only the most abundant phyla and
#ps.ng.tax_most <- subset_taxa(ps.ng.tax_rel, Phylum %in% c("D_1__Actinobacteria")) #1.57
#ps.ng.tax_most <- subset_taxa(ps.ng.tax_rel, Phylum %in% c("D_1__Bacteroidetes")) #27.27436
#ps.ng.tax_most <- subset_taxa(ps.ng.tax_rel, Phylum %in% c("D_1__Cyanobacteria")) #0.02244249
#ps.ng.tax_most <- subset_taxa(ps.ng.tax_rel, Phylum %in% c("D_1__Epsilonbacteraeota")) #0.01309145
#ps.ng.tax_most <- subset_taxa(ps.ng.tax_rel, Phylum %in% c("D_1__Euryarchaeota")) #0.1210024
#ps.ng.tax_most <- subset_taxa(ps.ng.tax_rel, Phylum %in% c("D_1__Firmicutes")) #32.50589
#ps.ng.tax_most <- subset_taxa(ps.ng.tax_rel, Phylum %in% c("D_1__Lentisphaerae")) #0.0001870208
#ps.ng.tax_most <- subset_taxa(ps.ng.tax_rel, Phylum %in% c("D_1__Patescibacteria")) #0.008789976
#ps.ng.tax_most <- subset_taxa(ps.ng.tax_rel, Phylum %in% c("D_1__Planctomycetes")) #0.01365252
#ps.ng.tax_most <- subset_taxa(ps.ng.tax_rel, Phylum %in% c("D_1__Proteobacteria")) #6.769216
#ps.ng.tax_most <- subset_taxa(ps.ng.tax_rel, Phylum %in% c("D_1__Synergistetes")) #0.005049561
#ps.ng.tax_most <- subset_taxa(ps.ng.tax_rel, Phylum %in% c("D_1__Tenericutes")) #0.0005610623
#ps.ng.tax_most <- subset_taxa(ps.ng.tax_rel, Phylum %in% c("D_1__Verrucomicrobia")) #2.076304
#ps.ng.tax_most <- subset_taxa(ps.ng.tax_rel, Phylum %in% c(NA)) #sum(otu_table(ps.ng.tax_most)) = 2.619413
```
```{r, echo=TRUE, warning=FALSE}
library(ggplot2)
geom.text.size = 6
theme.size = 8 #(14/5) * geom.text.size
#ps.ng.tax_most <- subset_taxa(ps.ng.tax_rel, Phylum %in% c("D_1__Actinobacteria", "D_1__Bacteroidetes", "D_1__Firmicutes", "D_1__Proteobacteria", "D_1__Verrucomicrobia", NA))
ps.ng.tax_most = phyloseq::filter_taxa(ps.ng.tax_rel, function(x) mean(x) > 0.001, TRUE)
#CONSOLE(OPTIONAL): for sampleid in 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73; do
#echo "otu_table(ps.ng.tax_most)[,${sampleid}]=otu_table(ps.ng.tax_most)[,${sampleid}]/sum(otu_table(ps.ng.tax_most)[,${sampleid}])" done
#OR
ps.ng.tax_most_ = transform_sample_counts(ps.ng.tax_most, function(x) x / sum(x))
```
```{r, echo=FALSE, warning=FALSE}
##--Creating 100% stacked bar plots with less abundant taxa in a sub-category #901--
##https://github.com/joey711/phyloseq/issues/901
##ps.ng.tax_most_df <- psmelt(ps.ng.tax_most_) #5986x19
#glom <- tax_glom(ps.ng.tax_most_, taxrank = 'Phylum')
#tax_table(glom) # should list # taxa as # phyla
#data <- psmelt(glom) # create dataframe from phyloseq object
#data$Phylum <- as.character(data$Phylum) #convert to character
##simple way to rename phyla with < 1% abundance
#data$Phylum[data$Abundance < 0.001] <- "< 0.1% abund."
#
#library(plyr)
#medians <- ddply(data, ~Phylum, function(x) c(median=median(x$Abundance)))
#remainder <- medians[medians$median <= 0.001,]$Phylum
#data[data$Phylum %in% remainder,]$Phylum <- "Phyla < 0.1% abund."
#data$Phylum[data$Abundance < 0.001] <- "Phyla < 0.1% abund."
##--> data are not used!
#
##in class level
#glom <- tax_glom(ps.ng.tax_most_, taxrank = 'Class')
#tax_table(glom) # should list # taxa as # phyla
#data <- psmelt(glom) # create dataframe from phyloseq object
#data$Class <- as.character(data$Class) #convert to character
#
##simple way to rename phyla with < 1% abundance
#data$Class[data$Abundance < 0.001] <- "< 0.1% abund."
#Count = length(unique(data$Class))
#
##unique(data$Class)
##data$Class <- factor(data$Class, levels = c("Bacilli", "Bacteroidia", "Verrucomicrobiae", "Clostridia", "Gammaproteobacteria", "Alphaproteobacteria", "Actinobacteria", "Negativicutes", "Erysipelotrichia", "Methanobacteria", "< 0.1% abund."))
##------- Creating 100% stacked bar plots END --------
library(stringr)
#FITTING1:
# tax_table(ps.ng.tax_most_)[1,"Domain"] <- str_split(tax_table(ps.ng.tax_most_)[1,"Domain"], "__")[[1]][2]
# ... ...
# tax_table(ps.ng.tax_most_)[167,"Species"] <- str_split(tax_table(ps.ng.tax_most_)[167,"Species"], "__")[[1]][2]
#ps.ng.tax_most_
#in total [ 89 taxa and 55 samples ]
#otu_table() OTU Table: [ 166 taxa and 54 samples ]
#otu_table() OTU Table: [ 168 taxa and 50 samples ]
#for id in 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166; do
#echo "tax_table(ps.ng.tax_most_)[${id},\"Domain\"] <- str_split(tax_table(ps.ng.tax_most_)[${id},\"Domain\"], \"__\")[[1]][2]"
#echo "tax_table(ps.ng.tax_most_)[${id},\"Phylum\"] <- str_split(tax_table(ps.ng.tax_most_)[${id},\"Phylum\"], \"__\")[[1]][2]"
#echo "tax_table(ps.ng.tax_most_)[${id},\"Class\"] <- str_split(tax_table(ps.ng.tax_most_)[${id},\"Class\"], \"__\")[[1]][2]"
#echo "tax_table(ps.ng.tax_most_)[${id},\"Order\"] <- str_split(tax_table(ps.ng.tax_most_)[${id},\"Order\"], \"__\")[[1]][2]"
#echo "tax_table(ps.ng.tax_most_)[${id},\"Family\"] <- str_split(tax_table(ps.ng.tax_most_)[${id},\"Family\"], \"__\")[[1]][2]"
#echo "tax_table(ps.ng.tax_most_)[${id},\"Genus\"] <- str_split(tax_table(ps.ng.tax_most_)[${id},\"Genus\"], \"__\")[[1]][2]"
#echo "tax_table(ps.ng.tax_most_)[${id},\"Species\"] <- str_split(tax_table(ps.ng.tax_most_)[${id},\"Species\"], \"__\")[[1]][2]"
#done
tax_table(ps.ng.tax_most_)[1,"Domain"] <- str_split(tax_table(ps.ng.tax_most_)[1,"Domain"], "__")[[1]][2]
tax_table(ps.ng.tax_most_)[1,"Phylum"] <- str_split(tax_table(ps.ng.tax_most_)[1,"Phylum"], "__")[[1]][2]
tax_table(ps.ng.tax_most_)[1,"Class"] <- str_split(tax_table(ps.ng.tax_most_)[1,"Class"], "__")[[1]][2]
tax_table(ps.ng.tax_most_)[1,"Order"] <- str_split(tax_table(ps.ng.tax_most_)[1,"Order"], "__")[[1]][2]
tax_table(ps.ng.tax_most_)[1,"Family"] <- str_split(tax_table(ps.ng.tax_most_)[1,"Family"], "__")[[1]][2]
tax_table(ps.ng.tax_most_)[1,"Genus"] <- str_split(tax_table(ps.ng.tax_most_)[1,"Genus"], "__")[[1]][2]
tax_table(ps.ng.tax_most_)[1,"Species"] <- str_split(tax_table(ps.ng.tax_most_)[1,"Species"], "__")[[1]][2]
#... ...
tax_table(ps.ng.tax_most_)[167,"Domain"] <- str_split(tax_table(ps.ng.tax_most_)[167,"Domain"], "__")[[1]][2]
tax_table(ps.ng.tax_most_)[167,"Phylum"] <- str_split(tax_table(ps.ng.tax_most_)[167,"Phylum"], "__")[[1]][2]
tax_table(ps.ng.tax_most_)[167,"Class"] <- str_split(tax_table(ps.ng.tax_most_)[167,"Class"], "__")[[1]][2]
tax_table(ps.ng.tax_most_)[167,"Order"] <- str_split(tax_table(ps.ng.tax_most_)[167,"Order"], "__")[[1]][2]
tax_table(ps.ng.tax_most_)[167,"Family"] <- str_split(tax_table(ps.ng.tax_most_)[167,"Family"], "__")[[1]][2]
tax_table(ps.ng.tax_most_)[167,"Genus"] <- str_split(tax_table(ps.ng.tax_most_)[167,"Genus"], "__")[[1]][2]
tax_table(ps.ng.tax_most_)[167,"Species"] <- str_split(tax_table(ps.ng.tax_most_)[167,"Species"], "__")[[1]][2]
```
```{r, echo=TRUE, warning=FALSE}
#aes(color="Phylum", fill="Phylum") --> aes()
#ggplot(data=data, aes(x=Sample, y=Abundance, fill=Phylum))
plot_bar(ps.ng.tax_most_, fill="Phylum") + geom_bar(aes(), stat="identity", position="stack") +
scale_fill_manual(values = c("darkblue", "darkgoldenrod1", "darkseagreen", "darkorchid", "darkolivegreen1", "lightskyblue", "darkgreen", "deeppink", "khaki2", "firebrick", "brown1", "darkorange1", "cyan1", "royalblue4", "darksalmon", "darkblue","royalblue4", "dodgerblue3", "steelblue1", "lightskyblue", "darkseagreen", "darkgoldenrod1", "darkseagreen", "darkorchid", "darkolivegreen1", "brown1", "darkorange1", "cyan1", "darkgrey")) + theme(axis.text = element_text(size = 5, colour="black")) + theme(legend.position="bottom") + guides(fill=guide_legend(nrow=2)) #6 instead of theme.size
```
```{r, echo=FALSE, warning=FALSE}
#png("abc.png")
#knitr::include_graphics("./Phyloseq_files/figure-html/unnamed-chunk-7-1.png")
#dev.off()
```
\pagebreak
Regroup together pre vs post stroke samples and normalize number of reads in each group using median sequencing depth.
```{r, echo=TRUE, warning=FALSE}
ps.ng.tax_most_pre_post_stroke <- merge_samples(ps.ng.tax_most_, "pre_post_stroke")
ps.ng.tax_most_pre_post_stroke_ = transform_sample_counts(ps.ng.tax_most_pre_post_stroke, function(x) x / sum(x))
#plot_bar(ps.ng.tax_most_SampleType_, fill = "Phylum") + geom_bar(aes(color=Phylum, fill=Phylum), stat="identity", position="stack")
plot_bar(ps.ng.tax_most_pre_post_stroke_, fill="Phylum") + geom_bar(aes(), stat="identity", position="stack") +
scale_fill_manual(values = c("darkblue", "darkgoldenrod1", "darkseagreen", "darkorchid", "darkolivegreen1", "lightskyblue", "darkgreen", "deeppink", "khaki2", "firebrick", "brown1", "darkorange1", "cyan1", "royalblue4", "darksalmon", "darkblue","royalblue4", "dodgerblue3", "steelblue1", "lightskyblue", "darkseagreen", "darkgoldenrod1", "darkseagreen", "darkorchid", "darkolivegreen1", "brown1", "darkorange1", "cyan1", "darkgrey")) + theme(axis.text = element_text(size = theme.size, colour="black"))
```
\pagebreak
Use color according to phylum. Do separate panels Stroke and Sex_age.
```{r, echo=TRUE, warning=FALSE}
ps.ng.tax_most_copied <- data.table::copy(ps.ng.tax_most_)
#FITTING6: regulate the bar height if it has replicates: 5+6+6+8+4+8+6+7=25+25=50
otu_table(ps.ng.tax_most_)[,c("1")] <- otu_table(ps.ng.tax_most_)[,c("1")]/5
otu_table(ps.ng.tax_most_)[,c("2")] <- otu_table(ps.ng.tax_most_)[,c("2")]/5
otu_table(ps.ng.tax_most_)[,c("5")] <- otu_table(ps.ng.tax_most_)[,c("5")]/5
otu_table(ps.ng.tax_most_)[,c("6")] <- otu_table(ps.ng.tax_most_)[,c("6")]/5
otu_table(ps.ng.tax_most_)[,c("7")] <- otu_table(ps.ng.tax_most_)[,c("7")]/5
otu_table(ps.ng.tax_most_)[,c("8")] <- otu_table(ps.ng.tax_most_)[,c("8")]/6
otu_table(ps.ng.tax_most_)[,c("9")] <- otu_table(ps.ng.tax_most_)[,c("9")]/6
otu_table(ps.ng.tax_most_)[,c("10")] <- otu_table(ps.ng.tax_most_)[,c("10")]/6
otu_table(ps.ng.tax_most_)[,c("12")] <- otu_table(ps.ng.tax_most_)[,c("12")]/6
otu_table(ps.ng.tax_most_)[,c("13")] <- otu_table(ps.ng.tax_most_)[,c("13")]/6
otu_table(ps.ng.tax_most_)[,c("14")] <- otu_table(ps.ng.tax_most_)[,c("14")]/6
otu_table(ps.ng.tax_most_)[,c("15")] <- otu_table(ps.ng.tax_most_)[,c("15")]/6
otu_table(ps.ng.tax_most_)[,c("16")] <- otu_table(ps.ng.tax_most_)[,c("16")]/6
otu_table(ps.ng.tax_most_)[,c("17")] <- otu_table(ps.ng.tax_most_)[,c("17")]/6
otu_table(ps.ng.tax_most_)[,c("18")] <- otu_table(ps.ng.tax_most_)[,c("18")]/6
otu_table(ps.ng.tax_most_)[,c("19")] <- otu_table(ps.ng.tax_most_)[,c("19")]/6
otu_table(ps.ng.tax_most_)[,c("20")] <- otu_table(ps.ng.tax_most_)[,c("20")]/6
otu_table(ps.ng.tax_most_)[,c("21")] <- otu_table(ps.ng.tax_most_)[,c("21")]/8
otu_table(ps.ng.tax_most_)[,c("22")] <- otu_table(ps.ng.tax_most_)[,c("22")]/8
otu_table(ps.ng.tax_most_)[,c("23")] <- otu_table(ps.ng.tax_most_)[,c("23")]/8
otu_table(ps.ng.tax_most_)[,c("24")] <- otu_table(ps.ng.tax_most_)[,c("24")]/8
otu_table(ps.ng.tax_most_)[,c("25")] <- otu_table(ps.ng.tax_most_)[,c("25")]/8
otu_table(ps.ng.tax_most_)[,c("26")] <- otu_table(ps.ng.tax_most_)[,c("26")]/8
otu_table(ps.ng.tax_most_)[,c("27")] <- otu_table(ps.ng.tax_most_)[,c("27")]/8
otu_table(ps.ng.tax_most_)[,c("28")] <- otu_table(ps.ng.tax_most_)[,c("28")]/8
otu_table(ps.ng.tax_most_)[,c("29")] <- otu_table(ps.ng.tax_most_)[,c("29")]/4
otu_table(ps.ng.tax_most_)[,c("30")] <- otu_table(ps.ng.tax_most_)[,c("30")]/4
otu_table(ps.ng.tax_most_)[,c("31")] <- otu_table(ps.ng.tax_most_)[,c("31")]/4
otu_table(ps.ng.tax_most_)[,c("32")] <- otu_table(ps.ng.tax_most_)[,c("32")]/4
otu_table(ps.ng.tax_most_)[,c("33")] <- otu_table(ps.ng.tax_most_)[,c("33")]/8
otu_table(ps.ng.tax_most_)[,c("34")] <- otu_table(ps.ng.tax_most_)[,c("34")]/8
otu_table(ps.ng.tax_most_)[,c("35")] <- otu_table(ps.ng.tax_most_)[,c("35")]/8
otu_table(ps.ng.tax_most_)[,c("36")] <- otu_table(ps.ng.tax_most_)[,c("36")]/8
otu_table(ps.ng.tax_most_)[,c("37")] <- otu_table(ps.ng.tax_most_)[,c("37")]/8
otu_table(ps.ng.tax_most_)[,c("38")] <- otu_table(ps.ng.tax_most_)[,c("38")]/8
otu_table(ps.ng.tax_most_)[,c("39")] <- otu_table(ps.ng.tax_most_)[,c("39")]/8
otu_table(ps.ng.tax_most_)[,c("51")] <- otu_table(ps.ng.tax_most_)[,c("51")]/8
otu_table(ps.ng.tax_most_)[,c("40")] <- otu_table(ps.ng.tax_most_)[,c("40")]/6
otu_table(ps.ng.tax_most_)[,c("41")] <- otu_table(ps.ng.tax_most_)[,c("41")]/6
otu_table(ps.ng.tax_most_)[,c("42")] <- otu_table(ps.ng.tax_most_)[,c("42")]/6
otu_table(ps.ng.tax_most_)[,c("43")] <- otu_table(ps.ng.tax_most_)[,c("43")]/6
otu_table(ps.ng.tax_most_)[,c("44")] <- otu_table(ps.ng.tax_most_)[,c("44")]/6
otu_table(ps.ng.tax_most_)[,c("46")] <- otu_table(ps.ng.tax_most_)[,c("46")]/6
otu_table(ps.ng.tax_most_)[,c("47")] <- otu_table(ps.ng.tax_most_)[,c("47")]/7
otu_table(ps.ng.tax_most_)[,c("48")] <- otu_table(ps.ng.tax_most_)[,c("48")]/7
otu_table(ps.ng.tax_most_)[,c("49")] <- otu_table(ps.ng.tax_most_)[,c("49")]/7
otu_table(ps.ng.tax_most_)[,c("50")] <- otu_table(ps.ng.tax_most_)[,c("50")]/7
otu_table(ps.ng.tax_most_)[,c("52")] <- otu_table(ps.ng.tax_most_)[,c("52")]/7
otu_table(ps.ng.tax_most_)[,c("53")] <- otu_table(ps.ng.tax_most_)[,c("53")]/7
otu_table(ps.ng.tax_most_)[,c("55")] <- otu_table(ps.ng.tax_most_)[,c("55")]/7
#plot_bar(ps.ng.tax_most_swab_, x="Phylum", fill = "Phylum", facet_grid = Patient~RoundDay) + geom_bar(aes(color=Phylum, fill=Phylum), stat="identity", position="stack") + theme(axis.text = element_text(size = theme.size, colour="black"))
plot_bar(ps.ng.tax_most_, x="Phylum", fill="Phylum", facet_grid = pre_post_stroke~Sex_age) + geom_bar(aes(), stat="identity", position="stack") +
scale_fill_manual(values = c("darkblue", "darkgoldenrod1", "darkseagreen", "darkorchid", "darkolivegreen1", "lightskyblue", "darkgreen", "deeppink", "khaki2", "firebrick", "brown1", "darkorange1", "cyan1", "royalblue4", "darksalmon", "darkblue","royalblue4", "dodgerblue3", "steelblue1", "lightskyblue", "darkseagreen", "darkgoldenrod1", "darkseagreen", "darkorchid", "darkolivegreen1", "brown1", "darkorange1", "cyan1", "darkgrey")) + theme(axis.text = element_text(size = 5, colour="black"), axis.text.x=element_blank(), axis.ticks=element_blank()) + theme(legend.position="bottom") + guides(fill=guide_legend(nrow=2))
```
```{r, echo=FALSE, warning=FALSE}
#knitr::include_graphics("./Phyloseq_files/figure-html/unnamed-chunk-10-1.png")
#> tax_table(carbom)
#Taxonomy Table: [205 taxa by 7 taxonomic ranks]:
# Domain Supergroup Division Class
#Otu001 "Eukaryota" "Archaeplastida" "Chlorophyta" "Mamiellophyceae"
# Order Family Genus
#Otu001 "Mamiellales" "Bathycoccaceae" "Ostreococcus"
#sample_data(ps.ng.tax)
```
## Bar plots in class level
```{r, echo=TRUE, warning=FALSE}
plot_bar(ps.ng.tax_most_copied, fill="Class") + geom_bar(aes(), stat="identity", position="stack") +
scale_fill_manual(values = c("darkblue", "darkgoldenrod1", "darkseagreen", "darkorchid", "darkolivegreen1", "lightskyblue", "darkgreen", "deeppink", "khaki2", "firebrick", "brown1", "darkorange1", "cyan1", "royalblue4", "darksalmon", "darkblue","royalblue4", "dodgerblue3", "steelblue1", "lightskyblue", "darkseagreen", "darkgoldenrod1", "darkseagreen", "darkorchid", "darkolivegreen1", "brown1", "darkorange1", "cyan1", "darkgrey")) + theme(axis.text = element_text(size = 5, colour="black")) + theme(legend.position="bottom") + guides(fill=guide_legend(nrow=3))
```
Regroup together pre vs post stroke samples and normalize number of reads in each group using median sequencing depth.
```{r, echo=TRUE, warning=FALSE}
plot_bar(ps.ng.tax_most_pre_post_stroke_, fill="Class") + geom_bar(aes(), stat="identity", position="stack") +
scale_fill_manual(values = c("darkblue", "darkgoldenrod1", "darkseagreen", "darkorchid", "darkolivegreen1", "lightskyblue", "darkgreen", "deeppink", "khaki2", "firebrick", "brown1", "darkorange1", "cyan1", "royalblue4", "darksalmon", "darkblue","royalblue4", "dodgerblue3", "steelblue1", "lightskyblue", "darkseagreen", "darkgoldenrod1", "darkseagreen", "darkorchid", "darkolivegreen1", "brown1", "darkorange1", "cyan1", "darkgrey")) + theme(axis.text = element_text(size = theme.size, colour="black"))
```
\pagebreak
Use color according to class. Do separate panels Stroke and Sex_age.
```{r, echo=TRUE, warning=FALSE}
#-- If existing replicates, to be processed as follows --
plot_bar(ps.ng.tax_most_, x="Class", fill="Class", facet_grid = pre_post_stroke~Sex_age) + geom_bar(aes(), stat="identity", position="stack") +
scale_fill_manual(values = c("darkblue", "darkgoldenrod1", "darkseagreen", "darkorchid", "darkolivegreen1", "lightskyblue", "darkgreen", "deeppink", "khaki2", "firebrick", "brown1", "darkorange1", "cyan1", "royalblue4", "darksalmon", "darkblue","royalblue4", "dodgerblue3", "steelblue1", "lightskyblue", "darkseagreen", "darkgoldenrod1", "darkseagreen", "darkorchid", "darkolivegreen1", "brown1", "darkorange1", "cyan1", "darkgrey")) + theme(axis.text = element_text(size = 5, colour="black"), axis.text.x=element_blank(), axis.ticks=element_blank()) + theme(legend.position="bottom") + guides(fill=guide_legend(nrow=3))
```
## Bar plots in order level
```{r, echo=TRUE, warning=FALSE}
plot_bar(ps.ng.tax_most_copied, fill="Order") + geom_bar(aes(), stat="identity", position="stack") +
scale_fill_manual(values = c("darkblue", "darkgoldenrod1", "darkseagreen", "darkorchid", "darkolivegreen1", "lightskyblue", "darkgreen", "deeppink", "khaki2", "firebrick", "brown1", "darkorange1", "cyan1", "royalblue4", "darksalmon", "darkblue","royalblue4", "dodgerblue3", "steelblue1", "lightskyblue", "darkseagreen", "darkgoldenrod1", "darkseagreen", "darkorchid", "darkolivegreen1", "brown1", "darkorange1", "cyan1", "darkgrey")) + theme(axis.text = element_text(size = 5, colour="black")) + theme(legend.position="bottom") + guides(fill=guide_legend(nrow=4))
```
Regroup together pre vs post stroke and normalize number of reads in each group using median sequencing depth.
```{r, echo=TRUE, warning=FALSE}
plot_bar(ps.ng.tax_most_pre_post_stroke_, fill="Order") + geom_bar(aes(), stat="identity", position="stack") +
scale_fill_manual(values = c("darkblue", "darkgoldenrod1", "darkseagreen", "darkorchid", "darkolivegreen1", "lightskyblue", "darkgreen", "deeppink", "khaki2", "firebrick", "brown1", "darkorange1", "cyan1", "royalblue4", "darksalmon", "darkblue","royalblue4", "dodgerblue3", "steelblue1", "lightskyblue", "darkseagreen", "darkgoldenrod1", "darkseagreen", "darkorchid", "darkolivegreen1", "brown1", "darkorange1", "cyan1", "darkgrey")) + theme(axis.text = element_text(size = theme.size, colour="black")) + theme(legend.position="bottom") + guides(fill=guide_legend(nrow=4))
```
\pagebreak
Use color according to order. Do separate panels Stroke and Sex_age.
```{r, echo=TRUE, warning=FALSE}
#FITTING7: regulate the bar height if it has replicates
plot_bar(ps.ng.tax_most_, x="Order", fill="Order", facet_grid = pre_post_stroke~Sex_age) + geom_bar(aes(), stat="identity", position="stack") +
scale_fill_manual(values = c("darkblue", "darkgoldenrod1", "darkseagreen", "darkorchid", "darkolivegreen1", "lightskyblue", "darkgreen", "deeppink", "khaki2", "firebrick", "brown1", "darkorange1", "cyan1", "royalblue4", "darksalmon", "darkblue","royalblue4", "dodgerblue3", "steelblue1", "lightskyblue", "darkseagreen", "darkgoldenrod1", "darkseagreen", "darkorchid", "darkolivegreen1", "brown1", "darkorange1", "cyan1", "darkgrey")) + theme(axis.text = element_text(size = 5, colour="black"), axis.text.x=element_blank(), axis.ticks=element_blank()) + theme(legend.position="bottom") + guides(fill=guide_legend(nrow=4))
```
## Bar plots in family level
```{r, echo=TRUE, warning=FALSE}
plot_bar(ps.ng.tax_most_copied, fill="Family") + geom_bar(aes(), stat="identity", position="stack") +
scale_fill_manual(values = c("#FF0000", "#000000", "#0000FF", "#C0C0C0", "#FFFFFF", "#FFFF00", "#00FFFF", "#FFA500", "#00FF00", "#808080", "#FF00FF", "#800080", "#FDD017", "#0000A0", "#3BB9FF", "#008000", "#800000", "#ADD8E6", "#F778A1", "#800517", "#736F6E", "#F52887", "#C11B17", "#5CB3FF", "#A52A2A", "#FF8040", "#2B60DE", "#736AFF", "#1589FF", "#98AFC7", "#8D38C9", "#307D7E", "#F6358A", "#151B54", "#6D7B8D", "#FDEEF4", "#FF0080", "#F88017", "#2554C7", "#FFF8C6", "#D4A017", "#306EFF", "#151B8D", "#9E7BFF", "#EAC117", "#E0FFFF", "#15317E", "#6C2DC7", "#FBB917", "#FCDFFF", "#15317E", "#254117", "#FAAFBE", "#357EC7")) + theme(axis.text = element_text(size = 5, colour="black")) + theme(legend.position="bottom") + guides(fill=guide_legend(nrow=8))
```
Regroup together pre vs post stroke samples and normalize number of reads in each group using median sequencing depth.
```{r, echo=TRUE, warning=FALSE}
plot_bar(ps.ng.tax_most_pre_post_stroke_, fill="Family") + geom_bar(aes(), stat="identity", position="stack") +
scale_fill_manual(values = c("#FF0000", "#000000", "#0000FF", "#C0C0C0", "#FFFFFF", "#FFFF00", "#00FFFF", "#FFA500", "#00FF00", "#808080", "#FF00FF", "#800080", "#FDD017", "#0000A0", "#3BB9FF", "#008000", "#800000", "#ADD8E6", "#F778A1", "#800517", "#736F6E", "#F52887", "#C11B17", "#5CB3FF", "#A52A2A", "#FF8040", "#2B60DE", "#736AFF", "#1589FF", "#98AFC7", "#8D38C9", "#307D7E", "#F6358A", "#151B54", "#6D7B8D", "#FDEEF4", "#FF0080", "#F88017", "#2554C7", "#FFF8C6", "#D4A017", "#306EFF", "#151B8D", "#9E7BFF", "#EAC117", "#E0FFFF", "#15317E", "#6C2DC7", "#FBB917", "#FCDFFF", "#15317E", "#254117", "#FAAFBE", "#357EC7")) + theme(axis.text = element_text(size = theme.size, colour="black")) + theme(legend.position="bottom") + guides(fill=guide_legend(nrow=8))
```
\pagebreak
Use color according to family. Do separate panels Stroke and Sex_age.
```{r, echo=TRUE, warning=FALSE}
#-- If existing replicates, to be processed as follows --
plot_bar(ps.ng.tax_most_, x="Family", fill="Family", facet_grid = pre_post_stroke~Sex_age) + geom_bar(aes(), stat="identity", position="stack") +
scale_fill_manual(values = c("#FF0000", "#000000", "#0000FF", "#C0C0C0", "#FFFFFF", "#FFFF00", "#00FFFF", "#FFA500", "#00FF00", "#808080", "#FF00FF", "#800080", "#FDD017", "#0000A0", "#3BB9FF", "#008000", "#800000", "#ADD8E6", "#F778A1", "#800517", "#736F6E", "#F52887", "#C11B17", "#5CB3FF", "#A52A2A", "#FF8040", "#2B60DE", "#736AFF", "#1589FF", "#98AFC7", "#8D38C9", "#307D7E", "#F6358A", "#151B54", "#6D7B8D", "#FDEEF4", "#FF0080", "#F88017", "#2554C7", "#FFF8C6", "#D4A017", "#306EFF", "#151B8D", "#9E7BFF", "#EAC117", "#E0FFFF", "#15317E", "#6C2DC7", "#FBB917", "#FCDFFF", "#15317E", "#254117", "#FAAFBE", "#357EC7")) + theme(axis.text = element_text(size = 5, colour="black"), axis.text.x=element_blank(), axis.ticks=element_blank()) + theme(legend.position="bottom") + guides(fill=guide_legend(nrow=8))
```
```{r, echo=FALSE, warning=FALSE}
#MOVE_FROM_ABOVE: ## Bar plots in genus level
#MOVE_FROM_ABOVE: Regroup together pre vs post stroke samples and normalize number of reads in each group using median sequencing depth.
#plot_bar(ps.ng.tax_most_pre_post_stroke_, fill="Genus") + geom_bar(aes(), stat="identity", position="stack") +
#scale_fill_manual(values = c("#FF0000", "#000000", "#0000FF", "#C0C0C0", "#FFFFFF", "#FFFF00", "#00FFFF", "#FFA500", "#00FF00", "#808080", "#FF00FF", "#800080", "#FDD017", "#0000A0", "#3BB9FF", "#008000", "#800000", "#ADD8E6", "#F778A1", "#800517", "#736F6E", "#F52887", "#C11B17", "#5CB3FF", "#A52A2A", "#FF8040", "#2B60DE", "#736AFF", "#1589FF", "#98AFC7", "#8D38C9", "#307D7E", "#F6358A", "#151B54", "#6D7B8D", "#FDEEF4", "#FF0080", "#F88017", "#2554C7", "#FFF8C6", "#D4A017", "#306EFF", "#151B8D", "#9E7BFF", "#EAC117", "#E0FFFF", "#15317E", "#6C2DC7", "#FBB917", "#FCDFFF", "#15317E", "#254117", "#FAAFBE", "#357EC7")) + theme(axis.text = element_text(size = theme.size, colour="black")) + theme(legend.position="bottom")
```
\pagebreak
```{r, echo=FALSE, warning=FALSE}
#MOVE_FROM_ABOVE: Use color according to genus. Do separate panels Stroke and Sex_age.
##-- If existing replicates, to be processed as follows --
#plot_bar(ps.ng.tax_most_, x="Genus", fill="Genus", facet_grid = pre_post_stroke~Sex_age) + geom_bar(aes(), stat="identity", position="stack") +
#scale_fill_manual(values = c("#FF0000", "#000000", "#0000FF", "#C0C0C0", "#FFFFFF", "#FFFF00", "#00FFFF", "#FFA500", "#00FF00", "#808080", "#FF00FF", "#800080", "#FDD017", "#0000A0", "#3BB9FF", "#008000", "#800000", "#ADD8E6", "#F778A1", "#800517", "#736F6E", "#F52887", "#C11B17", "#5CB3FF", "#A52A2A", "#FF8040", "#2B60DE", "#736AFF", "#1589FF", "#98AFC7", "#8D38C9", "#307D7E", "#F6358A", "#151B54", "#6D7B8D", "#FDEEF4", "#FF0080", "#F88017", "#2554C7", "#FFF8C6", "#D4A017", "#306EFF", "#151B8D", "#9E7BFF", "#EAC117", "#E0FFFF", "#15317E", "#6C2DC7", "#FBB917", "#FCDFFF", "#15317E", "#254117", "#FAAFBE", "#357EC7")) + theme(axis.text = element_text(size = 6, colour="black"), axis.text.x=element_blank(), axis.ticks=element_blank()) + theme(legend.position="bottom") + guides(fill=guide_legend(nrow=18))
```
\pagebreak
# Alpha diversity
Plot Chao1 richness estimator, Observed OTUs, Shannon index, and Phylogenetic diversity.
Regroup together samples from the same group.
```{r, echo=FALSE, warning=FALSE}
# using rarefied data
#FITTING2: CONSOLE:
#gunzip table_even4753.biom.gz
#alpha_diversity.py -i table_even42369.biom --metrics chao1,observed_otus,shannon,PD_whole_tree -o adiv_even.txt -t ../clustering/rep_set.tre
#gunzip table_even4753.biom.gz
#alpha_diversity.py -i table_even4753.biom --metrics chao1,observed_otus,shannon,PD_whole_tree -o adiv_even.txt -t ../clustering_stool/rep_set.tre
#gunzip table_even4753.biom.gz
#alpha_diversity.py -i table_even4753.biom --metrics chao1,observed_otus,shannon,PD_whole_tree -o adiv_even.txt -t ../clustering_swab/rep_set.tre
```
```{r, echo=TRUE, warning=FALSE}
hmp.div_qiime <- read.csv("adiv_even.txt", sep="\t")
colnames(hmp.div_qiime) <- c("sam_name", "chao1", "observed_otus", "shannon", "PD_whole_tree")
row.names(hmp.div_qiime) <- hmp.div_qiime$sam_name
div.df <- merge(hmp.div_qiime, hmp.meta, by = "sam_name")
div.df2 <- div.df[, c("Group", "chao1", "shannon", "observed_otus", "PD_whole_tree")]
colnames(div.df2) <- c("Group", "Chao-1", "Shannon", "OTU", "Phylogenetic Diversity")
#colnames(div.df2)
options(max.print=999999)
#27 H47 830.5000 5.008482 319 10.60177
#FITTING4: if occuring "Computation failed in `stat_signif()`:not enough 'y' observations"
#means: the patient H47 contains only one sample, it should be removed for the statistical p-values calculations.
#delete H47(1)
#div.df2 <- div.df2[-c(3), ]
#div.df2 <- div.df2[-c(55,54, 45,40,39,27,26,25,1), ]
knitr::kable(div.df2) %>% kable_styling(bootstrap_options = c("striped", "hover", "condensed", "responsive"))
#https://uc-r.github.io/t_test
#We can perform the test with t.test and transform our data and we can also perform the nonparametric test with the wilcox.test function.
stat.test.Shannon <- compare_means(
Shannon ~ Group, data = div.df2,
method = "t.test"
)
knitr::kable(stat.test.Shannon) %>% kable_styling(bootstrap_options = c("striped", "hover", "condensed", "responsive"))
div_df_melt <- reshape2::melt(div.df2)
#head(div_df_melt)
#https://plot.ly/r/box-plots/#horizontal-boxplot
#http://www.sthda.com/english/wiki/print.php?id=177
#https://rpkgs.datanovia.com/ggpubr/reference/as_ggplot.html
#http://www.sthda.com/english/articles/24-ggpubr-publication-ready-plots/82-ggplot2-easy-way-to-change-graphical-parameters/
#https://plot.ly/r/box-plots/#horizontal-boxplot
#library("gridExtra")
#par(mfrow=c(4,1))
p <- ggboxplot(div_df_melt, x = "Group", y = "value",
facet.by = "variable",
scales = "free",
width = 0.5,
fill = "gray", legend= "right")
#ggpar(p, xlab = FALSE, ylab = FALSE)
lev <- levels(factor(div_df_melt$Group)) # get the variables
#FITTING4: delete H47(1) in lev
#lev <- lev[-c(3)]
# make a pairwise list that we want to compare.
#my_stat_compare_means
#https://stackoverflow.com/questions/47839988/indicating-significance-with-ggplot2-in-a-boxplot-with-multiple-groups
L.pairs <- combn(seq_along(lev), 2, simplify = FALSE, FUN = function(i) lev[i]) #%>% filter(p.signif != "ns")
my_stat_compare_means <- function (mapping = NULL, data = NULL, method = NULL, paired = FALSE,
method.args = list(), ref.group = NULL, comparisons = NULL,
hide.ns = FALSE, label.sep = ", ", label = NULL, label.x.npc = "left",
label.y.npc = "top", label.x = NULL, label.y = NULL, tip.length = 0.03,
symnum.args = list(), geom = "text", position = "identity",
na.rm = FALSE, show.legend = NA, inherit.aes = TRUE, ...)
{
if (!is.null(comparisons)) {
method.info <- ggpubr:::.method_info(method)
method <- method.info$method
method.args <- ggpubr:::.add_item(method.args, paired = paired)
if (method == "wilcox.test")
method.args$exact <- FALSE
pms <- list(...)
size <- ifelse(is.null(pms$size), 0.3, pms$size)
color <- ifelse(is.null(pms$color), "black", pms$color)
map_signif_level <- FALSE
if (is.null(label))
label <- "p.format"
if (ggpubr:::.is_p.signif_in_mapping(mapping) | (label %in% "p.signif")) {
if (ggpubr:::.is_empty(symnum.args)) {
map_signif_level <- c(`****` = 1e-04, `***` = 0.001,
`**` = 0.01, `*` = 0.05, ns = 1)
} else {
map_signif_level <- symnum.args
}
if (hide.ns)
names(map_signif_level)[5] <- " "
}
step_increase <- ifelse(is.null(label.y), 0.12, 0)
ggsignif::geom_signif(comparisons = comparisons, y_position = label.y,
test = method, test.args = method.args, step_increase = step_increase,
size = size, color = color, map_signif_level = map_signif_level,
tip_length = tip.length, data = data)
} else {
mapping <- ggpubr:::.update_mapping(mapping, label)
layer(stat = StatCompareMeans, data = data, mapping = mapping,
geom = geom, position = position, show.legend = show.legend,
inherit.aes = inherit.aes, params = list(label.x.npc = label.x.npc,
label.y.npc = label.y.npc, label.x = label.x,
label.y = label.y, label.sep = label.sep, method = method,
method.args = method.args, paired = paired, ref.group = ref.group,
symnum.args = symnum.args, hide.ns = hide.ns,
na.rm = na.rm, ...))
}
}
p2 <- p +
stat_compare_means(
method="t.test",
comparisons = list(c("Group1", "Group2"), c("Group1", "Group3"), c("Group1", "Group4"), c("Group1", "Group6"), c("Group1", "Group8"), c("Group2", "Group5"),c("Group4", "Group5"),c("Group4", "Group6"),c("Group4", "Group7"),c("Group6", "Group7")),
label = "p.signif",
symnum.args <- list(cutpoints = c(0, 0.0001, 0.001, 0.01, 0.05, 1), symbols = c("****", "***", "**", "*", "ns"))
)
#comparisons = L.pairs,
#symnum.args <- list(cutpoints = c(0, 0.0001, 0.001, 0.01, 0.05), symbols = c("****", "***", "**", "*")),
#stat_pvalue_manual
#print(p2)
#https://stackoverflow.com/questions/20500706/saving-multiple-ggplots-from-ls-into-one-and-separate-files-in-r
#FITTING3: mkdir figures
ggsave("./figures/alpha_diversity_Group.png", device="png", height = 10, width = 12)
ggsave("./figures/alpha_diversity_Group.svg", device="svg", height = 10, width = 12)
p3 <- p +
stat_compare_means(
method="t.test",
comparisons = list(c("Group2", "Group4"), c("Group2", "Group6"), c("Group4", "Group8"), c("Group6", "Group8")),
label = "p.signif",
symnum.args <- list(cutpoints = c(0, 0.0001, 0.001, 0.01, 0.05, 1), symbols = c("****", "***", "**", "*", "ns")),
)
#symnum.args <- list(cutpoints = c(0, 0.0001, 0.001, 0.01, 0.05), symbols = c("****", "***", "**", "*")),
#stat_pvalue_manual
#print(p2)
#https://stackoverflow.com/questions/20500706/saving-multiple-ggplots-from-ls-into-one-and-separate-files-in-r
#FITTING3: mkdir figures
ggsave("./figures/alpha_diversity_Group2.png", device="png", height = 10, width = 12)
ggsave("./figures/alpha_diversity_Group2.svg", device="svg", height = 10, width = 12)
```
# Selected alpha diversity
```{r, echo=TRUE, warning=FALSE, fig.cap="Alpha diversity", out.width = '100%', fig.align= "center"}
knitr::include_graphics("./figures/alpha_diversity_Group2.png")
selected_alpha_diversities<-read.csv("selected_alpha_diversities.txt",sep="\t")
knitr::kable(selected_alpha_diversities) %>% kable_styling(bootstrap_options = c("striped", "hover", "condensed", "responsive"))
```
# Beta diversity
```{r, echo=TRUE, warning=FALSE, fig.cap="Beta diversity", out.width = '100%', fig.align= "center"}
#file:///home/jhuang/DATA/Data_Marius_16S/core_diversity_e42369/bdiv_even42369_Group/unweighted_unifrac_boxplots/Group_Stats.txt
beta_diversity_group_stats<-read.csv("unweighted_unifrac_boxplots_Group_Stats.txt",sep="\t")
knitr::kable(beta_diversity_group_stats) %>% kable_styling(bootstrap_options = c("striped", "hover", "condensed", "responsive"))
#NOTE: Run this Phyloseq0.Rmd, then run the code of MicrobiotaProcess.R to manually generate PCoA.png, then run this Phyloseq.Rmd!
#NOTE: AT_FIRST_DEACTIVATE_THIS_LINE: knitr::include_graphics("./figures/PCoA.png")
```
# Differential abundance analysis
Differential abundance analysis aims to find the differences in the abundance of each taxa between two groups of samples, assigning a significance value to each comparison.
## Group2 vs Group4
```{r, echo=TRUE, warning=FALSE}
library("DESeq2")
#ALTERNATIVE using ps.ng.tax_most_copied: ps.ng.tax (40594) vs. ps.ng.tax_most_copied (166)
ps.ng.tax_sel <- ps.ng.tax
#FITTING5: correct the id of the group members, see FITTING6
otu_table(ps.ng.tax_sel) <- otu_table(ps.ng.tax)[,c("8","9","10","12","13","14", "21","22","23","24","25","26","27","28")]
diagdds = phyloseq_to_deseq2(ps.ng.tax_sel, ~Group)
diagdds$Group <- relevel(diagdds$Group, "Group4")
diagdds = DESeq(diagdds, test="Wald", fitType="parametric")
resultsNames(diagdds)
res = results(diagdds, cooksCutoff = FALSE)
alpha = 0.05
sigtab = res[which(res$padj < alpha), ]
sigtab = cbind(as(sigtab, "data.frame"), as(tax_table(ps.ng.tax_sel)[rownames(sigtab), ], "matrix"))
sigtab <- sigtab[rownames(sigtab) %in% rownames(tax_table(ps.ng.tax_most_copied)), ]
kable(sigtab) %>%
kable_styling(bootstrap_options = c("striped", "hover", "condensed", "responsive"))
#rownames(sigtab) %in% rownames(tax_table(ps.ng.tax_most_copied))
#subv %in% v
### returns a vector TRUE FALSE
#is.element(subv, v)
### returns a vector TRUE FALSE
library("ggplot2")
theme_set(theme_bw())
scale_fill_discrete <- function(palname = "Set1", ...) {
scale_fill_brewer(palette = palname, ...)
}
x = tapply(sigtab$log2FoldChange, sigtab$Order, function(x) max(x))
x = sort(x)
sigtab$Order = factor(as.character(sigtab$Order), levels=names(x))
x = tapply(sigtab$log2FoldChange, sigtab$Family, function(x) max(x))
x = sort(x)
sigtab$Family = factor(as.character(sigtab$Family), levels=names(x))
ggplot(sigtab, aes(x=log2FoldChange, y=Family, color=Order)) + geom_point(aes(size=padj)) + scale_size_continuous(name="padj",range=c(8,4))+
theme(axis.text.x = element_text(angle = -25, hjust = 0, vjust=0.5))
#Error in checkForExperimentalReplicates(object, modelMatrix) :
# The design matrix has the same number of samples and coefficients to fit,
# so estimation of dispersion is not possible. Treating samples
# as replicates was deprecated in v1.20 and no longer supported since v1.22.
```
## Group2 vs Group6
```{r, echo=TRUE, warning=FALSE}
ps.ng.tax_sel <- ps.ng.tax
otu_table(ps.ng.tax_sel) <- otu_table(ps.ng.tax)[,c("8","9","10","12","13","14", "33","34","35","36","37","38","39","51")]
diagdds = phyloseq_to_deseq2(ps.ng.tax_sel, ~Group)
diagdds$Group <- relevel(diagdds$Group, "Group6")
diagdds = DESeq(diagdds, test="Wald", fitType="parametric")
resultsNames(diagdds)
res = results(diagdds, cooksCutoff = FALSE)
alpha = 0.05
sigtab = res[which(res$padj < alpha), ]
sigtab = cbind(as(sigtab, "data.frame"), as(tax_table(ps.ng.tax_sel)[rownames(sigtab), ], "matrix"))
sigtab <- sigtab[rownames(sigtab) %in% rownames(tax_table(ps.ng.tax_most_copied)), ]
kable(sigtab) %>%
kable_styling(bootstrap_options = c("striped", "hover", "condensed", "responsive"))
library("ggplot2")
theme_set(theme_bw())
scale_fill_discrete <- function(palname = "Set1", ...) {
scale_fill_brewer(palette = palname, ...)
}
x = tapply(sigtab$log2FoldChange, sigtab$Order, function(x) max(x))
x = sort(x)
sigtab$Order = factor(as.character(sigtab$Order), levels=names(x))
x = tapply(sigtab$log2FoldChange, sigtab$Family, function(x) max(x))
x = sort(x)
sigtab$Family = factor(as.character(sigtab$Family), levels=names(x))
ggplot(sigtab, aes(x=log2FoldChange, y=Family, color=Order)) + geom_point(aes(size=padj)) + scale_size_continuous(name="padj",range=c(8,4))+
theme(axis.text.x = element_text(angle = -25, hjust = 0, vjust=0.5))
```
## Group4 vs Group8
```{r, echo=TRUE, warning=FALSE}
ps.ng.tax_sel <- ps.ng.tax
otu_table(ps.ng.tax_sel) <- otu_table(ps.ng.tax)[,c("21","22","23","24","25","26","27","28", "47","48","49","50","52","53","55")]
diagdds = phyloseq_to_deseq2(ps.ng.tax_sel, ~Group)
diagdds$Group <- relevel(diagdds$Group, "Group8")
diagdds = DESeq(diagdds, test="Wald", fitType="parametric")
resultsNames(diagdds)
res = results(diagdds, cooksCutoff = FALSE)
alpha = 0.05
sigtab = res[which(res$padj < alpha), ]
sigtab = cbind(as(sigtab, "data.frame"), as(tax_table(ps.ng.tax_sel)[rownames(sigtab), ], "matrix"))
sigtab <- sigtab[rownames(sigtab) %in% rownames(tax_table(ps.ng.tax_most_copied)), ]
kable(sigtab) %>%
kable_styling(bootstrap_options = c("striped", "hover", "condensed", "responsive"))
library("ggplot2")
theme_set(theme_bw())
scale_fill_discrete <- function(palname = "Set1", ...) {
scale_fill_brewer(palette = palname, ...)
}
x = tapply(sigtab$log2FoldChange, sigtab$Order, function(x) max(x))
x = sort(x)
sigtab$Order = factor(as.character(sigtab$Order), levels=names(x))
x = tapply(sigtab$log2FoldChange, sigtab$Family, function(x) max(x))
x = sort(x)
sigtab$Family = factor(as.character(sigtab$Family), levels=names(x))
ggplot(sigtab, aes(x=log2FoldChange, y=Family, color=Order)) + geom_point(aes(size=padj)) + scale_size_continuous(name="padj",range=c(8,4))+
theme(axis.text.x = element_text(angle = -25, hjust = 0, vjust=0.5))
```
## Group6 vs Group8
```{r, echo=TRUE, warning=FALSE}
ps.ng.tax_sel <- ps.ng.tax
otu_table(ps.ng.tax_sel) <- otu_table(ps.ng.tax)[,c("33","34","35","36","37","38","39","51", "47","48","49","50","52","53","55")]
diagdds = phyloseq_to_deseq2(ps.ng.tax_sel, ~Group)
diagdds$Group <- relevel(diagdds$Group, "Group8")
diagdds = DESeq(diagdds, test="Wald", fitType="parametric")
resultsNames(diagdds)
res = results(diagdds, cooksCutoff = FALSE)
alpha = 0.05
sigtab = res[which(res$padj < alpha), ]
sigtab = cbind(as(sigtab, "data.frame"), as(tax_table(ps.ng.tax_sel)[rownames(sigtab), ], "matrix"))
sigtab <- sigtab[rownames(sigtab) %in% rownames(tax_table(ps.ng.tax_most_copied)), ]
kable(sigtab) %>%
kable_styling(bootstrap_options = c("striped", "hover", "condensed", "responsive"))
library("ggplot2")
theme_set(theme_bw())
scale_fill_discrete <- function(palname = "Set1", ...) {
scale_fill_brewer(palette = palname, ...)
}
x = tapply(sigtab$log2FoldChange, sigtab$Order, function(x) max(x))
x = sort(x)
sigtab$Order = factor(as.character(sigtab$Order), levels=names(x))
x = tapply(sigtab$log2FoldChange, sigtab$Family, function(x) max(x))
x = sort(x)
sigtab$Family = factor(as.character(sigtab$Family), levels=names(x))
ggplot(sigtab, aes(x=log2FoldChange, y=Family, color=Order)) + geom_point(aes(size=padj)) + scale_size_continuous(name="padj",range=c(8,4))+
theme(axis.text.x = element_text(angle = -25, hjust = 0, vjust=0.5))
```
MicrobiotaProcess.R
# -----------------------------------
# ---- prepare the R environment ----
#Rscript MicrobiotaProcess.R
#NOTE: exit R script, then login again R-environment; rm -rf Phyloseq*_cache
rmarkdown::render('Phyloseq.Rmd',output_file='Phyloseq.html')
# -----------------------------
# ---- 3.1. bridges other tools
##https://github.com/YuLab-SMU/MicrobiotaProcess
##https://www.bioconductor.org/packages/release/bioc/vignettes/MicrobiotaProcess/inst/doc/MicrobiotaProcess.html
##https://chiliubio.github.io/microeco_tutorial/intro.html#framework
##https://yiluheihei.github.io/microbiomeMarker/reference/plot_cladogram.html
#BiocManager::install("MicrobiotaProcess")
#install.packages("microeco")
#install.packages("ggalluvial")
#install.packages("ggh4x")
library(MicrobiotaProcess)
library(microeco)
library(ggalluvial)
library(ggh4x)
library(gghalves)
## Convert the phyloseq object to a MicrobiotaProcess object
#mp <- as.MicrobiotaProcess(ps.ng.tax)
#mt <- phyloseq2microeco(ps.ng.tax) #--> ERROR
#abundance_table <- mt$abun_table
#taxonomy_table <- mt$tax_table
#ps.ng.tax_abund <- phyloseq::filter_taxa(ps.ng.tax, function(x) sum(x > total*0.01) > 0, TRUE)
#ps.ng.tax_most = phyloseq::filter_taxa(ps.ng.tax_rel, function(x) mean(x) > 0.001, TRUE)
##OPTION1: take all samples, prepare mpse_abund!
##mpse <- ps.ng.tax %>% as.MPSE()
#mpse_abund <- ps.ng.tax_abund %>% as.MPSE()
##OPTION2: take partial samples, prepare mpse_abund
ps.ng.tax_sel <- ps.ng.tax #IMPORTANT
##otu_table(ps.ng.tax_sel) <- otu_table(ps.ng.tax)[,c("1","2","5","6","7", "15","16","17","18","19","20", "29","30","31","32", "40","41","42","43","44","46")]
##NOTE: Only choose Group2, Group4, Group6, Group8
#> ps.ng.tax_sel
#otu_table() OTU Table: [ 37465 taxa and 29 samples ]
#sample_data() Sample Data: [ 29 samples by 10 sample variables ]
#tax_table() Taxonomy Table: [ 37465 taxa by 7 taxonomic ranks ]
#phy_tree() Phylogenetic Tree: [ 37465 tips and 37461 internal nodes ]
otu_table(ps.ng.tax_sel) <- otu_table(ps.ng.tax)[,c("8","9","10","12","13","14", "21","22","23","24","25","26","27","28", "33","34","35","36","37","38","39","51", "47","48","49","50","52","53","55")]
#For quick calculation
#otu_table(ps.ng.tax_sel) <- otu_table(ps.ng.tax_abund)[,c("8","9","10","12","13","14", "21","22","23","24","25","26","27","28", "33","34","35","36","37","38","39","51", "47","48","49","50","52","53","55")]
mpse_abund <- ps.ng.tax_sel %>% as.MPSE()
# -----------------------------------
# ---- 3.2. alpha diversity analysis
# Rarefied species richness + RareAbundance
mpse_abund %<>% mp_rrarefy()
# 'chunks' represent the split number of each sample to calculate alpha
# diversity, default is 400. e.g. If a sample has total 40000
# reads, if chunks is 400, it will be split to 100 sub-samples
# (100, 200, 300,..., 40000), then alpha diversity index was
# calculated based on the sub-samples.
# '.abundance' the column name of abundance, if the '.abundance' is not be
# rarefied calculate rarecurve, user can specific 'force=TRUE'.
# + RareAbundance
mpse_abund %<>%
mp_cal_rarecurve(
.abundance = RareAbundance,
chunks = 400
)
# The RareAbundanceRarecurve column will be added the colData slot
# automatically (default action="add")
mpse_abund %>% print(width=180, n=100)
# default will display the confidence interval around smooth.
# se=TRUE
p1 <- mpse_abund %>%
mp_plot_rarecurve(
.rare = RareAbundanceRarecurve,
.alpha = Observe,
)
p2 <- mpse_abund %>%
mp_plot_rarecurve(
.rare = RareAbundanceRarecurve,
.alpha = Observe,
.group = pre_post_stroke
) +
scale_color_manual(values=c("#00A087FF", "#3C5488FF")) +
scale_fill_manual(values=c("#00A087FF", "#3C5488FF"), guide="none")
# combine the samples belong to the same groups if plot.group=TRUE
p3 <- mpse_abund %>%
mp_plot_rarecurve(
.rare = RareAbundanceRarecurve,
.alpha = "Observe",
.group = pre_post_stroke,
plot.group = TRUE
) +
scale_color_manual(values=c("#00A087FF", "#3C5488FF")) +
scale_fill_manual(values=c("#00A087FF", "#3C5488FF"),guide="none")
png("rarefaction_of_samples_or_groups.png", width=1080, height=600)
p1 + p2 + p3
dev.off()
# ------------------------------------------
# 3.3. calculate alpha index and visualization
library(ggplot2)
library(MicrobiotaProcess)
mpse_abund %<>%
mp_cal_alpha(.abundance=RareAbundance)
mpse_abund
f1 <- mpse_abund %>%
mp_plot_alpha(
.group=pre_post_stroke,
.alpha=c(Observe, Chao1, ACE, Shannon, Simpson, Pielou)
) +
scale_fill_manual(values=c("#00A087FF", "#3C5488FF"), guide="none") +
scale_color_manual(values=c("#00A087FF", "#3C5488FF"), guide="none")
f2 <- mpse_abund %>%
mp_plot_alpha(
.alpha=c(Observe, Chao1, ACE, Shannon, Simpson, Pielou)
)
png("alpha_diversity_comparison.png", width=1000, height=1000)
f1 / f2
dev.off()
# -------------------------------------------
# 3.4. The visualization of taxonomy abundance (Class)
mpse_abund %<>%
mp_cal_abundance( # for each samples
.abundance = RareAbundance
) %>%
mp_cal_abundance( # for each groups
.abundance=RareAbundance,
.group=pre_post_stroke
)
mpse_abund
# visualize the relative abundance of top 20 phyla for each sample.
p1 <- mpse_abund %>%
mp_plot_abundance(
.abundance=RareAbundance,
.group=time,
taxa.class = Class,
topn = 20,
relative = TRUE
)
# visualize the abundance (rarefied) of top 20 phyla for each sample.
p2 <- mpse_abund %>%
mp_plot_abundance(
.abundance=RareAbundance,
.group=time,
taxa.class = Class,
topn = 20,
relative = FALSE
)
png("relative_abundance_and_abundance.png", width= 1200, height=600) #NOT PRODUCED!
p1 / p2
dev.off()
h1 <- mpse_abund %>%
mp_plot_abundance(
.abundance = RareAbundance,
.group = pre_post_stroke,
taxa.class = Class,
relative = TRUE,
topn = 20,
geom = 'heatmap',
features.dist = 'euclidean',
features.hclust = 'average',
sample.dist = 'bray',
sample.hclust = 'average'
)
h2 <- mpse_abund %>%
mp_plot_abundance(
.abundance = RareAbundance,
.group = pre_post_stroke,
taxa.class = Class,
relative = FALSE,
topn = 20,
geom = 'heatmap',
features.dist = 'euclidean',
features.hclust = 'average',
sample.dist = 'bray',
sample.hclust = 'average'
)
# the character (scale or theme) of figure can be adjusted by set_scale_theme
# refer to the mp_plot_dist
png("relative_abundance_and_abundance_heatmap.png", width= 1200, height=600)
aplot::plot_list(gglist=list(h1, h2), tag_levels="A")
dev.off()
# visualize the relative abundance of top 20 class for each .group (pre_post_stroke)
p3 <- mpse_abund %>%
mp_plot_abundance(
.abundance=RareAbundance,
.group=pre_post_stroke,
taxa.class = Class,
topn = 20,
plot.group = TRUE
)
# visualize the abundance of top 20 phyla for each .group (time)
p4 <- mpse_abund %>%
mp_plot_abundance(
.abundance=RareAbundance,
.group= pre_post_stroke,
taxa.class = Class,
topn = 20,
relative = FALSE,
plot.group = TRUE
)
png("relative_abundance_and_abundance_groups.png", width= 1000, height=1000)
p3 / p4
dev.off()
# ---------------------------
# 3.5. Beta diversity analysis
# ---------------------------------------------
# 3.5.1 The distance between samples or groups
# standardization
# mp_decostand wraps the decostand of vegan, which provides
# many standardization methods for community ecology.
# default is hellinger, then the abundance processed will
# be stored to the assays slot.
mpse_abund %<>%
mp_decostand(.abundance=Abundance)
mpse_abund
# calculate the distance between the samples.
# the distance will be generated a nested tibble and added to the
# colData slot.
mpse_abund %<>% mp_cal_dist(.abundance=hellinger, distmethod="bray")
mpse_abund
# mp_plot_dist provides there methods to visualize the distance between the samples or groups
# when .group is not provided, the dot heatmap plot will be return
p1 <- mpse_abund %>% mp_plot_dist(.distmethod = bray)
png("distance_between_samples.png", width= 1000, height=1000)
p1
dev.off()
# when .group is provided, the dot heatmap plot with group information will be return.
p2 <- mpse_abund %>% mp_plot_dist(.distmethod = bray, .group = pre_post_stroke)
# The scale or theme of dot heatmap plot can be adjusted using set_scale_theme function.
p2 %>% set_scale_theme(
x = scale_fill_manual(
values=c("orange", "deepskyblue"),
guide = guide_legend(
keywidth = 1,
keyheight = 0.5,
title.theme = element_text(size=8),
label.theme = element_text(size=6)
)
),
aes_var = pre_post_stroke # specific the name of variable
) %>%
set_scale_theme(
x = scale_color_gradient(
guide = guide_legend(keywidth = 0.5, keyheight = 0.5)
),
aes_var = bray
) %>%
set_scale_theme(
x = scale_size_continuous(
range = c(0.1, 3),
guide = guide_legend(keywidth = 0.5, keyheight = 0.5)
),
aes_var = bray
)
png("distance_between_samples_with_group_info.png", width= 1000, height=1000)
p2
dev.off()
# when .group is provided and group.test is TRUE, the comparison of different groups will be returned
p3 <- mpse_abund %>% mp_plot_dist(.distmethod = bray, .group = pre_post_stroke, group.test=TRUE, textsize=2)
png("comparison_of_distance.png", width= 1000, height=1000)
p3
dev.off()
# -----------------------
# 3.5.2 The PCoA analysis
#install.packages("corrr")
library(corrr)
#install.packages("ggside")
library(ggside)
mpse_abund %<>%
mp_cal_pcoa(.abundance=hellinger, distmethod="bray")
# The dimensions of ordination analysis will be added the colData slot (default).
mpse_abund
#> methods(class=class(mpse_abund))
# [1] [ [[<- [<-
# [4] $ $<- arrange
# [7] as_tibble as.data.frame as.phyloseq
#[10] coerce coerce<- colData<-
#[13] distinct filter group_by
#[16] left_join mp_adonis mp_aggregate_clade
#[19] mp_aggregate mp_anosim mp_balance_clade
#[22] mp_cal_abundance mp_cal_alpha mp_cal_cca
#[25] mp_cal_clust mp_cal_dca mp_cal_dist
#[28] mp_cal_nmds mp_cal_pca mp_cal_pcoa
#[31] mp_cal_pd_metric mp_cal_rarecurve mp_cal_rda
#[34] mp_cal_upset mp_cal_venn mp_decostand
#[37] mp_diff_analysis mp_diff_clade mp_envfit
#[40] mp_extract_abundance mp_extract_assays mp_extract_dist
#[43] mp_extract_feature mp_extract_internal_attr mp_extract_rarecurve
#[46] mp_extract_refseq mp_extract_sample mp_extract_taxonomy
#[49] mp_extract_tree mp_filter_taxa mp_mantel
#[52] mp_mrpp mp_plot_abundance mp_plot_alpha
#[55] mp_plot_diff_boxplot mp_plot_diff_res mp_plot_dist
#[58] mp_plot_ord mp_plot_rarecurve mp_plot_upset
#[61] mp_plot_venn mp_rrarefy mp_select_as_tip
#[64] mp_stat_taxa mutate otutree
#[67] otutree<- print pull
#[70] refsequence refsequence<- rename
#[73] rownames<- select show
# [ reached getOption("max.print") -- omitted 6 entries ]
#see '?methods' for accessing help and source code
# We also can perform adonis or anosim to check whether it is significant to the dissimilarities of groups.
mpse_abund %<>%
mp_adonis(.abundance=hellinger, .formula=~Group, distmethod="bray", permutations=9999, action="add")
mpse_abund %>% mp_extract_internal_attr(name=adonis)
# ("1","2","5","6","7", "15","16","17","18","19","20", "29","30","31","32", "40","41","42","43","44","46")
#div.df2[div.df2 == "Group1"] <- "aged.post"
#div.df2[div.df2 == "Group3"] <- "young.post"
#div.df2[div.df2 == "Group5"] <- "aged.post"
#div.df2[div.df2 == "Group7"] <- "young.post"
# ("8","9","10","12","13","14", "21","22","23","24","25","26","27","28", "33","34","35","36","37","38","39","51", "47","48","49","50","52","53","55")
#div.df2[div.df2 == "Group2"] <- "aged.pre"
#div.df2[div.df2 == "Group4"] <- "young.pre"
#div.df2[div.df2 == "Group6"] <- "aged.pre"
#div.df2[div.df2 == "Group8"] <- "young.pre"
#Group1: f.aged and post
#Group2: f.aged and pre
#Group3: f.young and post
#Group4: f.young and pre
#Group5: m.aged and post
#Group6: m.aged and pre
#Group7: m.young and post
#Group8: m.young and pre
#[,c("1","2","5","6","7", "8","9","10","12","13","14")]
#[,c("15","16","17","18","19","20", "21","22","23","24","25","26","27","28")]
#[,c("29","30","31","32", "33","34","35","36","37","38","39","51")]
#[,c("40","41","42","43","44","46", "47","48","49","50","52","53","55")]
p1 <- mpse_abund %>%
mp_plot_ord(
.ord = pcoa,
.group = Group,
.color = Group,
.size = 2.4,
.alpha = 1,
ellipse = TRUE,
show.legend = FALSE # don't display the legend of stat_ellipse
) +
scale_fill_manual(
#values = c("#a6cee3", "#1f78b4", "#b2df8a", "#33a02c", "#fb9a99", "#e31a1c", "#cab2d6", "#6a3d9a"),
#values = c("#a6cee3", "#b2df8a", "#fb9a99", "#cab2d6"),
values = c("#1f78b4", "#33a02c", "#e31a1c", "#6a3d9a"),
guide = guide_legend(keywidth=1.6, keyheight=1.6, label.theme=element_text(size=12))
) +
scale_color_manual(
#values=c("#a6cee3", "#1f78b4", "#b2df8a", "#33a02c", "#fb9a99", "#e31a1c", "#cab2d6", "#6a3d9a"),
#values = c("#a6cee3", "#b2df8a", "#fb9a99", "#cab2d6"),
values = c("#1f78b4", "#33a02c", "#e31a1c", "#6a3d9a"),
guide = guide_legend(keywidth=1.6, keyheight=1.6, label.theme=element_text(size=12))
)
#scale_fill_manual(values=c("#FF0000", "#000000", "#0000FF", "#C0C0C0", "#00FF00", "#FFFF00", "#00FFFF", "#FFA500")) +
#scale_color_manual(values=c("#FF0000", "#000000", "#0000FF", "#C0C0C0", "#00FF00", "#FFFF00", "#00FFFF", "#FFA500"))
#scale_fill_manual(values=c("#00A087FF", "#3C5488FF")) +
#scale_color_manual(values=c("#00A087FF", "#3C5488FF"))
#png("PCoA.png", width= 1000, height=1000)
svg("PCoA.svg", width= 11, height=10)
#svg("PCoA_.svg", width=10, height=10)
#svg("PCoA.svg")
pdf("PCoA.pdf")
p1
dev.off()
# The size of point also can be mapped to other variables such as Observe, or Shannon
# Then the alpha diversity and beta diversity will be displayed simultaneously.
p2 <- mpse_abund %>%
mp_plot_ord(
.ord = pcoa,
.group = Group,
.color = Group,
.size = Shannon,
.alpha = Observe,
ellipse = TRUE,
show.legend = FALSE # don't display the legend of stat_ellipse
) +
scale_fill_manual(
values = c("#FF0000", "#000000", "#0000FF", "#C0C0C0", "#00FF00", "#FFFF00", "#00FFFF", "#FFA500"),
guide = guide_legend(keywidth=0.6, keyheight=0.6, label.theme=element_text(size=8))
) +
scale_color_manual(
values=c("#FF0000", "#000000", "#0000FF", "#C0C0C0", "#00FF00", "#FFFF00", "#00FFFF", "#FFA500"),
guide = guide_legend(keywidth=0.6, keyheight=0.6, label.theme=element_text(size=8))
) +
scale_size_continuous(
range=c(0.5, 3),
guide = guide_legend(keywidth=0.6, keyheight=0.6, label.theme=element_text(size=8))
)
# ------------------------------------------
# 3.5.3 Hierarchical cluster (tree) analysis
#input should contain hellinger!
mpse_abund %<>%
mp_cal_clust(
.abundance = hellinger,
distmethod = "bray",
hclustmethod = "average", # (UPGAE)
action = "add" # action is used to control which result will be returned
)
mpse_abund
# if action = 'add', the result of hierarchical cluster will be added to the MPSE object
# mp_extract_internal_attr can extract it. It is a treedata object, so it can be visualized
# by ggtree.
sample.clust <- mpse_abund %>% mp_extract_internal_attr(name='SampleClust')
sample.clust
library(ggtree)
p_cluster <- ggtree(sample.clust) +
geom_tippoint(aes(color=pre_post_stroke)) +
geom_tiplab(as_ylab = TRUE) +
ggplot2::scale_x_continuous(expand=c(0, 0.01))
png("hierarchical_cluster1.png", width= 1000, height=800)
p_cluster
dev.off()
library(ggtreeExtra)
library(ggplot2)
# # Extract relative abundance of phyla
# phyla.tb <- mouse.time.mpse %>%
# mp_extract_abundance(taxa.class=Phylum, topn=30)
# # The abundance of each samples is nested, it can be flatted using the unnest of tidyr.
# phyla.tb %<>% tidyr::unnest(cols=RareAbundanceBySample) %>% dplyr::rename(Phyla="label")
# phyla.tb
#
# p1 <- p +
# geom_fruit(
# data=phyla.tb,
# geom=geom_col,
# mapping = aes(x = RelRareAbundanceBySample,
# y = Sample,
# fill = Phyla
# ),
# orientation = "y",
# #offset = 0.4,
# pwidth = 3,
# axis.params = list(axis = "x",
# title = "The relative abundance of phyla (%)",
# title.size = 4,
# text.size = 2,
# vjust = 1),
# grid.params = list()
# )
# png("hierarchical_cluster2.png", width = 1000, height = 800)
# p1
# dev.off()
# Extract relative abundance of classes
mpse_abund %>% print(width=150)
class.tb <- mpse_abund %>%
mp_extract_abundance(taxa.class = Class, topn = 30)
# Flatten and rename the columns
class.tb %<>% tidyr::unnest(cols = RareAbundanceBySample) %>% dplyr::rename(Class = "label")
# View the data frame
class.tb
# Create the plot
p1 <- p +
geom_fruit(
data = class.tb,
geom = geom_col,
mapping = aes(x = RelRareAbundanceBySample,
y = Sample,
fill = Class
),
orientation = "y",
pwidth = 3,
axis.params = list(axis = "x",
title = "The relative abundance of classes (%)",
title.size = 4,
text.size = 2,
vjust = 1),
grid.params = list()
)
# Save the plot to a file
png("hierarchical_cluster2.png", width = 1000, height = 800)
print(p1)
dev.off()
# -----------------------
# 3.6 Biomarker discovery
library(ggtree)
library(ggtreeExtra)
library(ggplot2)
library(MicrobiotaProcess)
library(tidytree)
library(ggstar)
library(forcats)
mpse_abund %>% print(width=150)
mpse_abund %<>%
mp_cal_abundance( # for each samples
.abundance = RareAbundance
) %>%
mp_cal_abundance( # for each groups
.abundance=RareAbundance,
.group=pre_post_stroke
)
mpse_abund
mpse_abund %<>%
mp_diff_analysis(
.abundance = RelRareAbundanceBySample,
.group = pre_post_stroke,
first.test.alpha = 0.01
)
# The result is stored to the taxatree or otutree slot, you can use mp_extract_tree to extract the specific slot.
taxa.tree <- mpse_abund %>%
mp_extract_tree(type="taxatree")
taxa.tree
## And the result tibble of different analysis can also be extracted
## with tidytree (>=0.3.5)
taxa.tree %>% select(label, nodeClass, LDAupper, LDAmean, LDAlower, Sign_pre_post_stroke, pvalue, fdr) %>% dplyr::filter(!is.na(fdr))
taxa.tree %>% print(width=150, n=100)
#.data, layout, tree.type, .taxa.class, tiplab.size, offset.abun, pwidth.abun, offset.effsize, pwidth.effsize, group.abun, tiplab.linetype
p <- mpse_abund %>%
mp_plot_diff_res(
group.abun = TRUE,
pwidth.abun=0.05,
offset.abun=0.02,
pwidth.effsize=0.3,
offset.effsize=0.46,
tiplab.size = 4.9
) +
scale_fill_manual(values=c("deepskyblue", "orange")) +
scale_fill_manual(
aesthetics = "fill_new", # The fill aes was renamed to "fill_new" for the abundance dotplot layer
values = c("deepskyblue", "orange")
) +
scale_fill_manual(
aesthetics = "fill_new_new", # The fill aes for hight light layer of tree was renamed to 'fill_new_new'
values = c("#E41A1C", "#377EB8", "#4DAF4A",
"#984EA3", "#FF7F00", "#FFFF33",
"#A65628", "#F781BF", "#00FFFF", "#999999"
)
) +
theme(
axis.title = element_text(size = 28), # Font size for axis titles
axis.text = element_text(size = 28), # Font size for axis text
plot.title = element_text(size = 28), # Font size for plot title
legend.title = element_text(size = 16), # Font size for legend title
legend.text = element_text(size = 14) # Font size for legend text
)
#p$layers[[2]]$geom <- geom_tiplab(fontsize = 22) # Change 12 to the desired font size
png("differently_expressed_otu.png", width=2000, height=2000)
#svg("p7.svg",width=8, height=8)
p
dev.off()
f <- mpse_abund %>%
mp_plot_diff_cladogram(
label.size = 2.5,
hilight.alpha = .3,
bg.tree.size = .5,
bg.point.size = 2,
bg.point.stroke = .25
) +
scale_fill_diff_cladogram( # set the color of different group.
values = c('deepskyblue', 'orange')
) +
scale_size_continuous(range = c(1, 4))
#png("cladogram.png", width=1000, height=1000)
svg("cladogram.svg", width=10, height=10)
f
dev.off()
## Extract the OTU table and taxonomy table from the phyloseq object
#otu_table <- phyloseq::otu_table(ps.ng.tax_abund) %>% as.data.frame() %>% as.matrix()
#tax_table <- phyloseq::tax_table(ps.ng.tax_abund) %>% as.data.frame() %>% as.matrix()
#write.csv(otu_table, file="otu_table.csv")
#write.csv(tax_table, file="tax_table.csv")
#~/Tools/csv2xls-0.4/csv_to_xls.py otu_table.csv tax_table.csv -d',' -o otu_tax.xls
3.1. Environment Setup: It sets up a Conda environment named picrust2, using the conda create command and then activates this environment using conda activate picrust2.
#https://github.com/picrust/picrust2/wiki/PICRUSt2-Tutorial-(v2.2.0-beta)#minimum-requirements-to-run-full-tutorial
conda create -n picrust2 -c bioconda -c conda-forge picrust2=2.2.0_b
conda activate picrust2
3.2. Data Preparation: The script creates a new directory called picrust2_out, then enters it using mkdir and cd commands. It then identifies input files that are needed for the analysis: metadata.tsv, seqs.fna, table.biom. The biom commands are used to inspect and convert the BIOM format files.
mkdir picrust2_out
cd picrust2_out
# Identifying input data
# Note: Replace the paths and filenames with your actual data if different
# metadata.tsv == ../map_corrected.txt
# seqs.fna == ../clustering/seqs.fna
# table.biom == ../core_diversity_e42369/table_even42369.biom
# Inspect and convert the BIOM format files
biom head -i ../core_diversity_e42369/table_even42369.biom
biom summarize-table -i ../core_diversity_e42369/table_even42369.biom
biom convert -i ../core_diversity_e42369/table_even42369.biom -o table_even42369.tsv --to-tsv
3.3. Running PiCRUST2: The place_seqs.py command aligns the input sequences to a reference tree. The hsp.py commands generate hidden state prediction for multiple functional categories.
#insert reads into reference tree using EPA-NG
cp ../clustering/rep_set.fna ./
grep ">" rep_set.fna | wc -l #44238
vim table_even42369.tsv #40596-2
samtools faidx rep_set.fna
cut -f1-1 table_even42369.tsv > table_even42369.id
#manually modify table_even42369.id by replacing "\n" with " >> seqs.fna\nsamtools faidx rep_set.fna "
run table_even42369.id
rm -rf intermediate/
place_seqs.py -s seqs.fna -o out.tre -p 64 --intermediate intermediate/place_seqs
#castor: Efficient Phylogenetics on Large Trees
#https://github.com/picrust/picrust2/wiki/Hidden-state-prediction
hsp.py -i 16S -t out.tre -o 16S_predicted_and_nsti.tsv.gz -p 100 -n
hsp.py -i COG -t out.tre -o COG_predicted.tsv.gz -p 100
hsp.py -i PFAM -t out.tre -o PFAM_predicted.tsv.gz -p 100
hsp.py -i KO -t out.tre -o KO_predicted.tsv.gz -p 100
hsp.py -i EC -t out.tre -o EC_predicted.tsv.gz -p 100
hsp.py -i TIGRFAM -t out.tre -o TIGRFAM_predicted.tsv.gz -p 100
hsp.py -i PHENO -t out.tre -o PHENO_predicted.tsv.gz -p 100
>In this table the predicted copy number of all Enzyme Classification (EC) numbers is shown for each ASV. The NSTI values per ASV are not in this table since we did not specify the -n option. EC numbers are a type of gene family defined based on the chemical reactions they catalyze. For instance, EC:1.1.1.1 corresponds to alcohol dehydrogenase. In this tutorial we are focusing on EC numbers since they can be used to infer MetaCyc pathway levels (see below).
zless -S EC_predicted.tsv.gz
sequence EC:1.1.1.1 EC:1.1.1.10 EC:1.1.1.100 ...
20e568023c10eaac834f1c110aacea18 2 0 3 ...
23fe12a325dfefcdb23447f43b6b896e 0 0 1 ...
288c8176059111c4c7fdfb0cd5afce64 1 0 1 ...
...
##Why import the tsv file to MyData?
#MyData <- read.csv(file="./COG_predicted.tsv", header=TRUE, sep="\t", row.names=1) #6806 4598 e.g. COG5665
#MyData <- read.csv(file="./PFAM_predicted.tsv", header=TRUE, sep="\t", row.names=1) #6806 11089 e.g. PF17225
#MyData <- read.csv(file="./KO_predicted.tsv", header=TRUE, sep="\t", row.names=1) #6806 10543 e.g. K19791
#MyData <- read.csv(file="./EC_predicted.tsv", header=TRUE, sep="\t", row.names=1) #6806 2913 e.g. EC.6.6.1.2
#MyData <- read.csv(file="./16S_predicted.tsv", header=TRUE, sep="\t", row.names=1) #6806 1 e.g. X16S_rRNA_Count
#MyData <- read.csv(file="./TIGRFAM_predicted.tsv", header=TRUE, sep="\t", row.names=1) #6806 4287 e.g. TIGR04571
#MyData <- read.csv(file="./PHENO_predicted.tsv", header=TRUE, sep="\t", row.names=1) #6806 41 e.g. Use_of_nitrate_as_electron_acceptor, Xylose_utilizing
3.4. The metagenome_pipeline.py commands perform metagenomic prediction for several functional categories. Predicted gene families weighted by the relative abundance of ASVs in their community. In other words, we are interested in inferring the metagenomes of the communities.
#Generate metagenome predictions using EC numbers https://en.wikipedia.org/wiki/List_of_enzymes#Category:EC_1.1_(act_on_the_CH-OH_group_of_donors)
metagenome_pipeline.py -i ../core_diversity_e42369/table_even42369.biom -m 16S_predicted_and_nsti.tsv.gz -f COG_predicted.tsv.gz -o COG_metagenome_out --strat_out
metagenome_pipeline.py -i ../core_diversity_e42369/table_even42369.biom -m 16S_predicted_and_nsti.tsv.gz -f EC_predicted.tsv.gz -o EC_metagenome_out --strat_out
metagenome_pipeline.py -i ../core_diversity_e42369/table_even42369.biom -m 16S_predicted_and_nsti.tsv.gz -f KO_predicted.tsv.gz -o KO_metagenome_out --strat_out
metagenome_pipeline.py -i ../core_diversity_e42369/table_even42369.biom -m 16S_predicted_and_nsti.tsv.gz -f PFAM_predicted.tsv.gz -o PFAM_metagenome_out --strat_out
metagenome_pipeline.py -i ../core_diversity_e42369/table_even42369.biom -m 16S_predicted_and_nsti.tsv.gz -f TIGRFAM_predicted.tsv.gz -o TIGRFAM_metagenome_out --strat_out
3.5. Pathway-level inference: By default this script infers MetaCyc pathway abundances based on EC number abundances, although different gene families and pathways can also be optionally specified. This script performs a number of steps by default, which are based on the approach implemented in HUMAnN2:
pathway_pipeline.py -i EC_metagenome_out/pred_metagenome_contrib.tsv.gz -o pathways_out -p 15
#Note that the path of map files is under /home/jhuang/anaconda3/envs/picrust2/lib/python3.6/site-packages/picrust2/default_files/pathway_mapfiles
pathway_pipeline.py -i COG_metagenome_out/pred_metagenome_contrib.tsv.gz -o KEGG_pathways_out -p 15 --no_regroup --map /home/jhuang/anaconda3/envs/picrust2/lib/python3.6/site-packages/picrust2/default_files/pathway_mapfiles/KEGG_pathways_to_KO.tsv
#Mapping predicted KO abundances to legacy KEGG pathways (with stratified output that represents contributions to community-wide abundances):
pathway_pipeline.py -i KO_metagenome_out/pred_metagenome_strat.tsv.gz -o KEGG_pathways_out --no_regroup --map /home/jhuang/anaconda3/envs/picrust2/lib/python3.6/site-packages/picrust2/default_files/pathway_mapfiles/KEGG_pathways_to_KO.tsv
#Map EC numbers to MetaCyc pathways and get stratified output corresponding to contribution of predicted gene family abundances within each predicted genome:
pathway_pipeline.py -i EC_metagenome_out/pred_metagenome_unstrat.tsv.gz -o pathways_out_per_seq --per_sequence_contrib --per_sequence_abun EC_metagenome_out/seqtab_norm.tsv.gz --per_sequence_function EC_predicted.tsv.gz
3.6. Add functional descriptions: Finally, it can be useful to have a description of each functional id in the output abundance tables. The below commands will add these descriptions as new column in gene family and pathway abundance tables
#--6.1. Add descriptions in gene family tables
add_descriptions.py -i COG_metagenome_out/pred_metagenome_unstrat.tsv.gz -m COG -o COG_metagenome_out/pred_metagenome_unstrat_descrip.tsv.gz
add_descriptions.py -i EC_metagenome_out/pred_metagenome_unstrat.tsv.gz -m EC -o EC_metagenome_out/pred_metagenome_unstrat_descrip.tsv.gz
add_descriptions.py -i KO_metagenome_out/pred_metagenome_unstrat.tsv.gz -m KO -o KO_metagenome_out/pred_metagenome_unstrat_descrip.tsv.gz # EC and METACYC is a pair, EC for gene_annotation and METACYC for pathway_annotation
add_descriptions.py -i PFAM_metagenome_out/pred_metagenome_unstrat.tsv.gz -m PFAM -o PFAM_metagenome_out/pred_metagenome_unstrat_descrip.tsv.gz
add_descriptions.py -i TIGRFAM_metagenome_out/pred_metagenome_unstrat.tsv.gz -m TIGRFAM -o TIGRFAM_metagenome_out/pred_metagenome_unstrat_descrip.tsv.gz
#--6.2. Add descriptions in pathway abundance tables
add_descriptions.py -i pathways_out/path_abun_unstrat.tsv.gz -m METACYC -o pathways_out/path_abun_unstrat_descrip.tsv.gz
gunzip path_abun_unstrat_descrip.tsv.gz
#Error - no rows remain after regrouping input table. The default pathway and regroup mapfiles are meant for EC numbers. Note that KEGG pathways are not supported since KEGG is a closed-source database, but you can input custom pathway mapfiles if you have access. If you are using a custom function database did you mean to set the --no-regroup flag and/or change the default pathways mapfile used?
#If ERROR --> USE the METACYC for downstream analyses!!!
add_descriptions.py -i pathways_out/path_abun_unstrat.tsv.gz -o KEGG_pathways_out/path_abun_unstrat_descrip.tsv.gz --custom_map_table /home/jhuang/anaconda3/envs/picrust2/lib/python3.6/site-packages/picrust2/default_files/description_mapfiles/KEGG_pathways_info.tsv.gz
3.7. Visualization
#7.1 STAMP
#https://github.com/picrust/picrust2/wiki/STAMP-example
conda activate stamp; #import path_abun_unstrat_descrip.tsv to STAMP
conda deactivate
conda install -c bioconda stamp
#conda install -c bioconda stamp
#sudo pip install pyqi
#sudo apt-get install libblas-dev liblapack-dev gfortran
#sudo apt-get install freetype* python-pip python-dev python-numpy python-scipy python-matplotlib
#sudo pip install STAMP
#conda install -c bioconda stamp
conda create -n stamp -c bioconda/label/cf201901 stamp
brew install pyqt
#DEBUG the environment
conda install pyqt=4
#conda install icu=56
e.g. path_abun_unstrat_descrip.tsv.gz and metadata.tsv from the tutorial)
cut -d$'\t' -f1 map_corrected.txt > 1
cut -d$'\t' -f5 map_corrected.txt > 5
cut -d$'\t' -f6 map_corrected.txt > 6
paste -d$'\t' 1 5 > 1_5
paste -d$'\t' 1_5 6 > metadata.tsv
#SampleID --> SampleID
SampleID Facility Genotype
100CHE6KO PaloAlto KO
101CHE6WT PaloAlto WT
#7.2. ALDEx2
https://bioconductor.org/packages/release/bioc/html/ALDEx2.html
#7.3. Convert png to svg and pdf
inkscape error_bar.png --export-plain-svg=error_bar.svg (embbed)
sudo apt update
sudo apt install autotrace
sudo apt-get install -y libpng-dev libtiff-dev imagemagick
git clone https://github.com/autotrace/autotrace.git
cd autotrace
#sudo apt install intltool
#sudo apt install gettext libglib2.0-dev
#sudo apt install libtool libtool-bin
#sudo apt install automake
sudo apt-get install libxml-parser-perl
./autogen.sh
./configure
make
autotrace -output-format svg -output-file error_bar.svg error_bar.png
点赞本文的读者
还没有人对此文章表态
没有评论
QIIME + Phyloseq + MicrobiotaProcess (v1)
Plotting Alpha Diversities from 16S rRNA Sequencing Data
© 2023 XGenes.com Impressum