gene_x 0 like s 563 view s
Tags: plot, R, scripts
#install.packages("readxl")
library(readxl)
# Path to the Excel file
file_path <- "Signatures.xls"
#example of a signature:
#geneSymbol geneEntrezID ENSEMBL GeneSet
#CD160 11126 ENSG00000117281 Anergic or act. T cells
#CD244 51744 ENSG00000122223 Anergic or act. T cells
#CTLA4 1493 ENSG00000163599 Anergic or act. T cells
#HAVCR2 84868 ENSG00000135077 Anergic or act. T cells
#ICOS 29851 ENSG00000163600 Anergic or act. T cells
#KLRG1 10219 ENSG00000139187 Anergic or act. T cells
#LAG3 3902 ENSG00000089692 Anergic or act. T cells
#PDCD1 5133 ENSG00000188389 Anergic or act. T cells
#PDCD1 5133 ENSG00000276977 Anergic or act. T cells
# Get the names of the sheets
sheet_names <- excel_sheets(file_path)
# Initialize an empty list to hold gene sets
geneSets <- list()
# Loop over each sheet, extract the ENSEMBL IDs, and add to the list
for (sheet in sheet_names) {
# Read the sheet
data <- read_excel(file_path, sheet = sheet)
# Process the GeneSet names (replacing spaces with underscores, for example)
gene_set_name <- gsub(" ", "_", unique(data$GeneSet)[1])
# Add ENSEMBL IDs to the list
geneSets[[gene_set_name]] <- as.character(data$ENSEMBL)
}
# Print the result to check
print(geneSets)
# 1. Compute GSVA scores:
gsva_scores <- gsva(exprs, geneSets, method="gsva")
# 2. Convert to data.frame for ggplot:
gsva_df <- as.data.frame(t(gsva_scores))
# 3. Add conditions to gsva_df:
gsva_df$Condition <- dds$condition
# 4. Filter the gsva_df to retain only the desired conditions:
gsva_df_filtered <- gsva_df[gsva_df$Condition %in% c("Ace2_mock_2h", "Ace2_inf_24h"), ]
# 5. Define a function to plot violin plots:
# Update the condition levels in gsva_df_filtered to ensure the desired order on x-axis:
gsva_df_filtered$Condition <- gsub("Ace2_mock_2h", "mock", gsva_df_filtered$Condition)
gsva_df_filtered$Condition <- gsub("Ace2_inf_24h", "infection", gsva_df_filtered$Condition)
gsva_df_filtered$Condition <- factor(gsva_df_filtered$Condition, levels = c("mock", "infection"))
plot_violin <- function(data, gene_name) {
# Calculate the t-test p-value for the two conditions
condition1_data <- data[data$Condition == "mock", gene_name]
condition2_data <- data[data$Condition == "infection", gene_name]
p_value <- t.test(condition1_data, condition2_data)$p.value
# Convert p-value to annotation
p_annotation <- ifelse(p_value < 0.01, "**", ifelse(p_value < 0.05, "*", ""))
rounded_p_value <- paste0("p = ", round(p_value, 2))
plot_title <- gsub("_", " ", gene_name)
p <- ggplot(data, aes(x=Condition, y=!!sym(gene_name))) +
geom_violin(linewidth=1.2) +
labs(title=plot_title, y="GSVA Score") +
ylim(-1, 1) +
theme_light() +
theme(
axis.title.x = element_text(size=12),
axis.title.y = element_text(size=12),
axis.text.x = element_text(size=10),
axis.text.y = element_text(size=10),
plot.title = element_text(size=12, hjust=0.5)
)
# Add p-value annotation to the plot
p <- p + annotate("text", x=1.5, y=0.9, label=paste0(p_annotation, " ", rounded_p_value), size=5, hjust=0.5)
return(p)
}
# 6. Generate the list of plots:
#genes <- colnames(gsva_df_filtered)[!colnames(gsva_df_filtered) %in% "Condition"]
#plots_list <- lapply(genes, function(gene) plot_violin(gsva_df_filtered, gene))
# 6. Generate the list of plots in a predefined order:
desired_order <- c("Platelets","Granulocytes","LDG","pDC","Anti-inflammation", "Pro-inflam._IL-1","Dendritic_cells","MHC_II","Alt._complement","TNF", "NLRP3_inflammasome","Unfolded_protein","B_cells","Monocyte_cell_surface","Inflammasome", "Monocyte_secreted","IL-1_cytokines","SNOR_low_UP","CD40_activated","Lectin_complement", "Classical_complement","Cell_cycle","Plasma_cells","IG_chains","Erythrocytes", "IL-6R_complex","IFN","TCRB","TCRA","Cyt._act._T_cells", "TCRG","T_cells","CD8T-NK-NKT","Anergic_or_act._T_cells","T_activated", "NK_cells","TCRD","T_regs","SNOR_low_DOWN","Monocytes", "Myeloid_cells","Neutrophils")
genes <- colnames(gsva_df_filtered)[!colnames(gsva_df_filtered) %in% "Condition"]
genes <- genes[match(desired_order, genes)]
plots_list <- lapply(genes, function(gene) plot_violin(gsva_df_filtered, gene))
# 7. Pad the list of plots:
remaining_plots <- 6 - (length(plots_list) %% 6)
if (remaining_plots != 6) {
plots_list <- c(plots_list, rep(list(NULL), remaining_plots))
}
# 8. Create the plots and arrange them in a grid:
library(gridExtra)
plots_matrix <- matrix(plots_list, ncol=6, byrow=T)
#do.call("grid.arrange", c(plots_matrix, list(ncol=6)))
# 9. Save the plots to a PNG:
png("All_Violin_Plots.png", width=1000, height=1000)
do.call("grid.arrange", c(plots_matrix, list(ncol=6)))
dev.off()
点赞本文的读者
还没有人对此文章表态
没有评论
Generation of Heatmap from DEGs Data and Annotation of Identified Gene Clusters
© 2023 XGenes.com Impressum