gene_x 0 like s 703 view s
Tags: plot, R, scripts
This script is structured to process gene expression data, specifically DEGs (Differentially Expressed Genes) and create a heatmap visualizing the patterns of the data. The steps involved are as follows:
Package Installation and Library Loading: The script first ensures that essential packages are installed and then loads them. Some of the key packages include "gplots" for generating heatmaps, "readxl" and "writexl" for reading and writing Excel data, and "biomaRt" for fetching gene annotation data from Ensembl.
Data Input: It reads in the gene expression data from an Excel file named "DEGs_heatmap_data.xls".
Hierarchical Clustering: The script performs hierarchical clustering on the data using both Pearson and Spearman correlations to determine the relationships between genes.
Heatmap Generation: A heatmap is generated to visualize the clustered data, and this visualization is saved as an image file named "DEGs_heatmap.png".
Annotation and Data Segregation: The genes are further grouped into clusters, and for each cluster, annotation details such as gene ID, gene name, chromosome name, start and end positions, and more are fetched from Ensembl. This annotated data for each cluster is stored with the expression data in distinct data frames.
Output: All the processed clusters are then compiled and written to an Excel file named "gene_clusters.xlsx", with each cluster having its designated sheet.
This script aids in the identification and exploration of gene expression patterns and further provides essential annotations for identified gene clusters.
#ensure you have the following packages installed. If not, you'll have to install them
install.packages("gplots")
install.packages("readxl")
install.packages("writexl")
install.packages("dplyr")
if (!requireNamespace("BiocManager", quietly = TRUE))
install.packages("BiocManager")
BiocManager::install("biomaRt")
library(gplots)
library(readxl)
library(writexl)
library(dplyr)
library(biomaRt)
listEnsembl()
listMarts()
ensembl <- useEnsembl(biomart = "ensembl", dataset = "hsapiens_gene_ensembl", version="104")
datasets <- listDatasets(ensembl)
# Read the Excel file
datamat = read_excel(path = "DEGs_heatmap_data.xls", sheet = 1, col_names = TRUE)
datamat <- as.data.frame(datamat)
rownames(datamat) <- datamat[, 1]
datamat <- datamat[, -1] # Remove the first column which is now the row names
hr <- hclust(as.dist(1-cor(t(datamat), method="pearson")), method="complete")
hc <- hclust(as.dist(1-cor(datamat, method="spearman")), method="complete")
mycl = cutree(hr, h=max(hr$height)/1.2)
mycol = c("YELLOW", "DARKBLUE", "DARKORANGE", "DARKMAGENTA", "DARKCYAN", "DARKRED", "MAROON", "DARKGREEN", "LIGHTBLUE", "PINK", "MAGENTA", "LIGHTCYAN","LIGHTGREEN", "BLUE", "ORANGE", "CYAN", "RED", "GREEN");
mycol = mycol[as.vector(mycl)]
png("DEGs_heatmap.png", width=900, height=1010)
heatmap.2(as.matrix(datamat),Rowv=as.dendrogram(hr),Colv = NA, dendrogram = 'row',
scale='row',trace='none',col=bluered(75),
RowSideColors = mycol, labRow="", srtCol=30, keysize=0.72, cexRow = 2, cexCol = 1.4)
dev.off()
#### cluster members #####
subset_1<-names(subset(mycl, mycl == '1'))
subset_1_ <- getBM(attributes = c('ensembl_gene_id', 'external_gene_name', 'gene_biotype', 'entrezgene_id', 'chromosome_name', 'start_position', 'end_position', 'strand', 'description'),
filters = 'ensembl_gene_id',
values = subset_1,
mart = ensembl)
subset_1_uniq <- distinct(subset_1_, ensembl_gene_id, .keep_all= TRUE)
subset_1_expr <- datamat[subset_1,]
subset_1_expr$ENSEMBL = rownames(subset_1_expr)
cluster1_YELLOW <- merge(subset_1_uniq, subset_1_expr, by.x="ensembl_gene_id", by.y="ENSEMBL")
#write.csv(cluster1_YELLOW,file='cluster1_YELLOW.txt')
subset_2<-names(subset(mycl, mycl == '2'))
subset_2_ <- getBM(attributes = c('ensembl_gene_id', 'external_gene_name', 'gene_biotype', 'entrezgene_id', 'chromosome_name', 'start_position', 'end_position', 'strand', 'description'),
filters = 'ensembl_gene_id',
values = subset_2,
mart = ensembl)
subset_2_uniq <- distinct(subset_2_, ensembl_gene_id, .keep_all= TRUE)
subset_2_expr <- datamat[subset_2,]
subset_2_expr$ENSEMBL = rownames(subset_2_expr)
cluster2_DARKBLUE <- merge(subset_2_uniq, subset_2_expr, by.x="ensembl_gene_id", by.y="ENSEMBL")
#write.csv(cluster2_DARKBLUE,file='cluster2_DARKBLUE.txt')
subset_3<-names(subset(mycl, mycl == '3'))
subset_3_ <- getBM(attributes = c('ensembl_gene_id', 'external_gene_name', 'gene_biotype', 'entrezgene_id', 'chromosome_name', 'start_position', 'end_position', 'strand', 'description'),
filters = 'ensembl_gene_id',
values = subset_3,
mart = ensembl)
subset_3_uniq <- distinct(subset_3_, ensembl_gene_id, .keep_all= TRUE)
subset_3_expr <- datamat[subset_3,]
subset_3_expr$ENSEMBL = rownames(subset_3_expr)
cluster3_DARKORANGE <- merge(subset_3_uniq, subset_3_expr, by.x="ensembl_gene_id", by.y="ENSEMBL")
#write.csv(cluster3_DARKORANGE,file='cluster3_DARKORANGE.txt')
subset_4<-names(subset(mycl, mycl == '4'))
subset_4_ <- getBM(attributes = c('ensembl_gene_id', 'external_gene_name', 'gene_biotype', 'entrezgene_id', 'chromosome_name', 'start_position', 'end_position', 'strand', 'description'),
filters = 'ensembl_gene_id',
values = subset_4,
mart = ensembl)
subset_4_uniq <- distinct(subset_4_, ensembl_gene_id, .keep_all= TRUE)
subset_4_expr <- datamat[subset_4,]
subset_4_expr$ENSEMBL = rownames(subset_4_expr)
cluster4_DARKMAGENTA <- merge(subset_4_uniq, subset_4_expr, by.x="ensembl_gene_id", by.y="ENSEMBL")
#write.csv(cluster4_DARKMAGENTA,file='cluster4_DARKMAGENTA.txt')
subset_5<-names(subset(mycl, mycl == '5'))
subset_5_ <- getBM(attributes = c('ensembl_gene_id', 'external_gene_name', 'gene_biotype', 'entrezgene_id', 'chromosome_name', 'start_position', 'end_position', 'strand', 'description'),
filters = 'ensembl_gene_id',
values = subset_5,
mart = ensembl)
subset_5_uniq <- distinct(subset_5_, ensembl_gene_id, .keep_all= TRUE)
subset_5_expr <- datamat[subset_5,]
subset_5_expr$ENSEMBL = rownames(subset_5_expr)
cluster5_DARKCYAN <- merge(subset_5_uniq, subset_5_expr, by.x="ensembl_gene_id", by.y="ENSEMBL")
#write.csv(cluster5_DARKCYAN,file='cluster5_DARKCYAN.txt')
subset_6<-names(subset(mycl, mycl == '6'))
subset_6_ <- getBM(attributes = c('ensembl_gene_id', 'external_gene_name', 'gene_biotype', 'entrezgene_id', 'chromosome_name', 'start_position', 'end_position', 'strand', 'description'),
filters = 'ensembl_gene_id',
values = subset_6,
mart = ensembl)
subset_6_uniq <- distinct(subset_6_, ensembl_gene_id, .keep_all= TRUE)
subset_6_expr <- datamat[subset_6,]
subset_6_expr$ENSEMBL = rownames(subset_6_expr)
cluster6_DARKRED <- merge(subset_6_uniq, subset_6_expr, by.x="ensembl_gene_id", by.y="ENSEMBL")
#write.csv(cluster6_DARKRED,file='cluster6_DARKRED.txt')
write_xlsx(list(
"Cluster 1 YELLOW" = cluster1_YELLOW,
"Cluster 2 DARKBLUE" = cluster2_DARKBLUE,
"Cluster 3 DARKORANGE" = cluster3_DARKORANGE,
"Cluster 4 DARKMAGENTA" = cluster4_DARKMAGENTA,
"Cluster 5 DARKCYAN" = cluster5_DARKCYAN,
"Cluster 6 DARKRED" = cluster6_DARKRED
), "gene_clusters.xlsx")
点赞本文的读者
还没有人对此文章表态
没有评论
Gene Set Variation Analysis (GSVA) and Visualization of Gene Sets from Excel Signatures
© 2023 XGenes.com Impressum