This step generates a circular phylogenetic tree, a heatmap, and a multiple alignment sequence in a single figure. The tree is constructed from the aligned protein sequence “yopE_alignedprotein.fasta” and the heatmap is based on the “typingyopE.csv” data. The multiple alignment sequence is added into the final figure by extracting specific sequences from the aligned protein sequence file.
library(ggtree)
library(ggplot2)
library(Biostrings)
library(stringr)
library(dplyr)
library(ape)
library(ggmsa)
#install.packages("cowplot")
library(cowplot)
setwd("/home/jhuang/DATA/Data_Gunnar_Yersiniomics/plotTreeHeatmap/")
# -- edit tree --
#https://icytree.org/
#0.000780
#info <- read.csv("typing_198.csv")
info <- read.csv("typing_yopE_.csv")
info$name <- info$Isolate
#rownames(info) <- info$name
info$yopE <- factor(info$yopE)
#tree <- read.tree("core_gene_alignment_fasttree_directly_from_186isoaltes.tree") --> NOT GOOD!
#tree <- read.tree("raxml.tree")
#tree <- read.tree("yopK_alignment_modified.tree")
tree <- read.tree("yopE_aligned_protein.tree")
#tree <- read.tree("core_gene_raxml.raxml.bestTree")
cols <- c(Yes='purple2', No='skyblue2')
# -- tree --
tree_plot <- ggtree(tree, layout='circular', branch.length='none') %<+% info + scale_color_manual(values=cols) + geom_tiplab2(aes(label=name), offset=1) + geom_tippoint(aes(color=yopE))
#+ scale_color_manual(values=cols) + geom_tiplab2(aes(label=name), offset=1)
#, geom='text', align=TRUE, linetype=NA, hjust=1.8,check.overlap=TRUE, size=3.3
#difference between geom_tiplab and geom_tiplab2?
#+ theme(axis.text.x = element_text(angle = 30, vjust = 0.5)) + theme(axis.text = element_text(size = 20)) + scale_size(range = c(1, 20))
#font.size=10,
png("ggtree2.png", width=1260, height=1260)
#png("ggtree.png", width=1000, height=1000)
#svg("ggtree.svg", width=1260, height=1260)
tree_plot
dev.off()
# -- heatmap --
#heatmapData2 <- info %>% select(Isolate, cgMLST, Lineage)
heatmapData2 <- info %>% select(Isolate, Species)
rn <- heatmapData2$Isolate
heatmapData2$Isolate <- NULL
heatmapData2 <- as.data.frame(sapply(heatmapData2, as.character))
rownames(heatmapData2) <- rn
#"1","2","3","4","9","12","13","14","16","18", "19","30","32","36","39","41","42","43","44","53", "64","79","83","84","92","140","148","154","171","172", "173","194","215","217","252","277","279","282","290","312", "335","NA"
#https://bookdown.org/hneth/ds4psy/D-3-apx-colors-basics.html
#"blue","cyan", "skyblue2", "azure3","blueviolet","darkgoldenrod", "tomato","mediumpurple4","indianred",
#"cornflowerblue","darkgreen","seagreen3","tan","red","green","orange","pink","brown","magenta", "cornflowerblue","darkgreen","red","tan","brown",
#heatmap.colours <- c("cornflowerblue","darkgreen","seagreen3","tan","red", "navyblue", "gold", "lightcyan3","green","orange","pink","purple","magenta","brown", "darksalmon","chocolate4","darkkhaki", "maroon","lightgreen", "darkgreen","seagreen3","tan","red", "navyblue", "gold", "green","orange","pink","purple","magenta","brown", "darksalmon","chocolate4","darkkhaki", "lightcyan3", "maroon","lightgreen", "darkgrey")
#names(heatmap.colours) <- c("pestis","pseudotuberculosis","similis","enterocolitica","frederiksenii","kristensenii","occitanica","intermedia","hibernica","canariae","alsatica","rohdei","massiliensis","bercovieri","aleksiciae","mollaretii","aldovae","ruckeri","entomophaga", "1","1Aa","1B","2","2/3-9a","2/3-9b","4","5","6","8","10","11","12","13","14","16","17","29","NA")
heatmap.colours <- c("cornflowerblue","darkgreen","seagreen3","tan","red", "navyblue", "gold", "lightcyan3","green","orange","pink","purple","magenta","brown", "darksalmon","chocolate4","darkkhaki", "maroon","lightgreen")
names(heatmap.colours) <- c("pestis","pseudotuberculosis","similis","enterocolitica","frederiksenii","kristensenii","occitanica","intermedia","hibernica","canariae","alsatica","rohdei","massiliensis","bercovieri","aleksiciae","mollaretii","aldovae","ruckeri","entomophaga")
#"cornflowerblue","darkgreen","seagreen3","tan","red","green","orange","pink","brown","magenta","cornflowerblue",
#"2.MED","4.ANT","2.ANT","1.ORI","1.IN","1.ANT","0.ANT3","0.PE4","0.PE3","0.PE2","1a",
#mydat$Regulation <- factor(mydat$Regulation, levels=c("up","down"))
#rochesterensis-->occitanica
png("ggtree_and_gheatmap_yopE.png", width=1000, height=800)
#png("ggtree_and_gheatmap.png", width=1290, height=1000)
#png("ggtree_and_gheatmap.png", width=1690, height=1400)
#svg("ggtree_and_gheatmap.svg", width=1290, height=1000)
#svg("ggtree_and_gheatmap.svg", width=17, height=15)
tree_heatmap_plot <- gheatmap(tree_plot, heatmapData2, width=0.2,colnames_position="top", colnames_angle=90, colnames_offset_y = 0.1, hjust=0.5, font.size=4, offset = 8) + scale_fill_manual(values=heatmap.colours) + theme(legend.text = element_text(size = 14)) + theme(legend.title = element_text(size = 14)) + guides(fill=guide_legend(title=""), color = guide_legend(override.aes = list(size = 5)))
tree_heatmap_plot
dev.off()
#samtools faidx yopE_aligned_protein_.fasta "1522" > yopE_aligned_protein_sorted.fasta
#samtools faidx yopE_aligned_protein_.fasta "Pestoides_F_bis" >> yopE_aligned_protein_sorted.fasta
#...
#samtools faidx yopE_aligned_protein_.fasta "8081" >> yopE_aligned_protein_sorted.fasta
#samtools faidx yopE_aligned_protein_.fasta "WA" >> yopE_aligned_protein_sorted.fasta
#https://bioconductor.org/packages/devel/bioc/vignettes/ggtreeExtra/inst/doc/ggtreeExtra.html
#https://github.com/YuLab-SMU/supplemental-ggmsa
#https://github.com/YuLab-SMU/ggmsa
library(ggmsa)
library(ggplot2)
library(ggtree)
#library(gggenes)
library(ape)
library(Biostrings)
library(ggnewscale)
library(dplyr)
library(ggtreeExtra)
library(phangorn)
library(RColorBrewer)
library(patchwork)
library(ggplotify)
library(aplot)
library(magick)
library(treeio)
#data <- "../supplemental-ggmsa-main/data/s_RBD.fasta"
data <- "yopE_aligned_protein_.fasta"
#data <- "yopE_aligned_protein_sorted.fasta"
#dms <- read.csv("data/DMS.csv")
#del <- c("expr_lib1", "expr_lib2","expr_avg","bind_lib1","bind_lib2")
#dms <- dms[,!colnames(dms) %in% del]
tidymsa <- tidy_msa(data)
#tidymsa <- assign_dms(tidymsa, dms)
#Mapping the position to the protein-protein interaction plot
#tidymsa$position <- tidymsa$position + 330
#dms = TRUE,
png("alignment_yopE.png", width=1100, height=5400)
msa_plot <- ggplot() +
geom_msa(data = tidymsa, char_width = 0.5, seq_name = TRUE, show.legend = TRUE) + theme_msa() + facet_msa(50)
#scale_fill_gradientn(name = "ACE2 binding", colors = c(colRD(75),colBU(25)))
msa_plot
dev.off()
# #Combine the ggtree and ggmsa plots using the cowplot package:
# combined_plot <- cowplot::plot_grid(msa_plot, tree_heatmap_plot, ncol = 1, align = "v", axis = "l", rel_heights = c(3, 2))
# ggsave("combined_plot.png", combined_plot, width = 10, height = 10, dpi = 300)