Generation of Heatmap from DEGs Data and Annotation of Identified Gene Clusters

gene_x 0 like s 645 view s

Tags: plot, R, scripts

This script is structured to process gene expression data, specifically DEGs (Differentially Expressed Genes) and create a heatmap visualizing the patterns of the data. The steps involved are as follows:

  1. Package Installation and Library Loading: The script first ensures that essential packages are installed and then loads them. Some of the key packages include "gplots" for generating heatmaps, "readxl" and "writexl" for reading and writing Excel data, and "biomaRt" for fetching gene annotation data from Ensembl.

  2. Data Input: It reads in the gene expression data from an Excel file named "DEGs_heatmap_data.xls".

  3. Hierarchical Clustering: The script performs hierarchical clustering on the data using both Pearson and Spearman correlations to determine the relationships between genes.

  4. Heatmap Generation: A heatmap is generated to visualize the clustered data, and this visualization is saved as an image file named "DEGs_heatmap.png".

  5. Annotation and Data Segregation: The genes are further grouped into clusters, and for each cluster, annotation details such as gene ID, gene name, chromosome name, start and end positions, and more are fetched from Ensembl. This annotated data for each cluster is stored with the expression data in distinct data frames.

  6. Output: All the processed clusters are then compiled and written to an Excel file named "gene_clusters.xlsx", with each cluster having its designated sheet.

This script aids in the identification and exploration of gene expression patterns and further provides essential annotations for identified gene clusters.

#ensure you have the following packages installed. If not, you'll have to install them
install.packages("gplots")
install.packages("readxl")
install.packages("writexl")
install.packages("dplyr")
if (!requireNamespace("BiocManager", quietly = TRUE))
    install.packages("BiocManager")
BiocManager::install("biomaRt")

library(gplots)
library(readxl)
library(writexl)
library(dplyr)
library(biomaRt)
listEnsembl()
listMarts()
ensembl <- useEnsembl(biomart = "ensembl", dataset = "hsapiens_gene_ensembl", version="104")
datasets <- listDatasets(ensembl)

# Read the Excel file
datamat = read_excel(path = "DEGs_heatmap_data.xls", sheet = 1, col_names = TRUE)
datamat <- as.data.frame(datamat)
rownames(datamat) <- datamat[, 1]
datamat <- datamat[, -1] # Remove the first column which is now the row names


hr <- hclust(as.dist(1-cor(t(datamat), method="pearson")), method="complete")
hc <- hclust(as.dist(1-cor(datamat, method="spearman")), method="complete")
mycl = cutree(hr, h=max(hr$height)/1.2)
mycol = c("YELLOW", "DARKBLUE", "DARKORANGE", "DARKMAGENTA", "DARKCYAN", "DARKRED",  "MAROON", "DARKGREEN", "LIGHTBLUE", "PINK", "MAGENTA", "LIGHTCYAN","LIGHTGREEN", "BLUE", "ORANGE", "CYAN", "RED", "GREEN");
mycol = mycol[as.vector(mycl)]
png("DEGs_heatmap.png", width=900, height=1010)
heatmap.2(as.matrix(datamat),Rowv=as.dendrogram(hr),Colv = NA, dendrogram = 'row',
            scale='row',trace='none',col=bluered(75),
            RowSideColors = mycol, labRow="", srtCol=30, keysize=0.72, cexRow = 2, cexCol = 1.4)
dev.off()

#### cluster members #####
subset_1<-names(subset(mycl, mycl == '1'))
subset_1_ <- getBM(attributes = c('ensembl_gene_id', 'external_gene_name', 'gene_biotype', 'entrezgene_id', 'chromosome_name', 'start_position', 'end_position', 'strand', 'description'),
      filters = 'ensembl_gene_id',
      values = subset_1,
      mart = ensembl)
subset_1_uniq <- distinct(subset_1_, ensembl_gene_id, .keep_all= TRUE)
subset_1_expr  <- datamat[subset_1,]
subset_1_expr$ENSEMBL = rownames(subset_1_expr)
cluster1_YELLOW <- merge(subset_1_uniq, subset_1_expr, by.x="ensembl_gene_id", by.y="ENSEMBL")
#write.csv(cluster1_YELLOW,file='cluster1_YELLOW.txt')

subset_2<-names(subset(mycl, mycl == '2'))
subset_2_ <- getBM(attributes = c('ensembl_gene_id', 'external_gene_name', 'gene_biotype', 'entrezgene_id', 'chromosome_name', 'start_position', 'end_position', 'strand', 'description'),
      filters = 'ensembl_gene_id',
      values = subset_2,
      mart = ensembl)
subset_2_uniq <- distinct(subset_2_, ensembl_gene_id, .keep_all= TRUE)
subset_2_expr  <- datamat[subset_2,]
subset_2_expr$ENSEMBL = rownames(subset_2_expr)
cluster2_DARKBLUE <- merge(subset_2_uniq, subset_2_expr, by.x="ensembl_gene_id", by.y="ENSEMBL")
#write.csv(cluster2_DARKBLUE,file='cluster2_DARKBLUE.txt')

subset_3<-names(subset(mycl, mycl == '3'))
subset_3_ <- getBM(attributes = c('ensembl_gene_id', 'external_gene_name', 'gene_biotype', 'entrezgene_id', 'chromosome_name', 'start_position', 'end_position', 'strand', 'description'),
      filters = 'ensembl_gene_id',
      values = subset_3,
      mart = ensembl)
subset_3_uniq <- distinct(subset_3_, ensembl_gene_id, .keep_all= TRUE)
subset_3_expr  <- datamat[subset_3,]
subset_3_expr$ENSEMBL = rownames(subset_3_expr)
cluster3_DARKORANGE <- merge(subset_3_uniq, subset_3_expr, by.x="ensembl_gene_id", by.y="ENSEMBL")
#write.csv(cluster3_DARKORANGE,file='cluster3_DARKORANGE.txt')

subset_4<-names(subset(mycl, mycl == '4'))
subset_4_ <- getBM(attributes = c('ensembl_gene_id', 'external_gene_name', 'gene_biotype', 'entrezgene_id', 'chromosome_name', 'start_position', 'end_position', 'strand', 'description'),
      filters = 'ensembl_gene_id',
      values = subset_4,
      mart = ensembl)
subset_4_uniq <- distinct(subset_4_, ensembl_gene_id, .keep_all= TRUE)
subset_4_expr  <- datamat[subset_4,]
subset_4_expr$ENSEMBL = rownames(subset_4_expr)
cluster4_DARKMAGENTA <- merge(subset_4_uniq, subset_4_expr, by.x="ensembl_gene_id", by.y="ENSEMBL")
#write.csv(cluster4_DARKMAGENTA,file='cluster4_DARKMAGENTA.txt')

subset_5<-names(subset(mycl, mycl == '5'))
subset_5_ <- getBM(attributes = c('ensembl_gene_id', 'external_gene_name', 'gene_biotype', 'entrezgene_id', 'chromosome_name', 'start_position', 'end_position', 'strand', 'description'),
      filters = 'ensembl_gene_id',
      values = subset_5,
      mart = ensembl)
subset_5_uniq <- distinct(subset_5_, ensembl_gene_id, .keep_all= TRUE)
subset_5_expr  <- datamat[subset_5,]
subset_5_expr$ENSEMBL = rownames(subset_5_expr)
cluster5_DARKCYAN <- merge(subset_5_uniq, subset_5_expr, by.x="ensembl_gene_id", by.y="ENSEMBL")
#write.csv(cluster5_DARKCYAN,file='cluster5_DARKCYAN.txt')

subset_6<-names(subset(mycl, mycl == '6'))
subset_6_ <- getBM(attributes = c('ensembl_gene_id', 'external_gene_name', 'gene_biotype', 'entrezgene_id', 'chromosome_name', 'start_position', 'end_position', 'strand', 'description'),
      filters = 'ensembl_gene_id',
      values = subset_6,
      mart = ensembl)
subset_6_uniq <- distinct(subset_6_, ensembl_gene_id, .keep_all= TRUE)
subset_6_expr  <- datamat[subset_6,]
subset_6_expr$ENSEMBL = rownames(subset_6_expr)
cluster6_DARKRED <- merge(subset_6_uniq, subset_6_expr, by.x="ensembl_gene_id", by.y="ENSEMBL")
#write.csv(cluster6_DARKRED,file='cluster6_DARKRED.txt')

write_xlsx(list(
  "Cluster 1 YELLOW" = cluster1_YELLOW,
  "Cluster 2 DARKBLUE" = cluster2_DARKBLUE,
  "Cluster 3 DARKORANGE" = cluster3_DARKORANGE,
  "Cluster 4 DARKMAGENTA" = cluster4_DARKMAGENTA,
  "Cluster 5 DARKCYAN" = cluster5_DARKCYAN,
  "Cluster 6 DARKRED" = cluster6_DARKRED
), "gene_clusters.xlsx")

like unlike

点赞本文的读者

还没有人对此文章表态


本文有评论

没有评论

看文章,发评论,不要沉默


© 2023 XGenes.com Impressum