gene_x 0 like s 642 view s
Tags: R, processing, protein
library("rmarkdown")
library("tidyverse")
library(rmarkdown)
setwd("/home/jhuang/DATA/Data_Susanne_Carotis_MS/LIMMA-pipeline-proteomics/Results_20231006_165122")
# -1. prepare and read signatures (namely gene sets)
#Monocytes.csv #66
#Monocytes2.csv #201
#Neutrophils.csv #40
#Neutrophils2.csv #61
#~/Tools/csv2xls-0.4/csv_to_xls.py Alt._complement.csv Anergic_or_act._T_cells.csv Anti-inflammation.csv B_cells.csv CD40_activated.csv CD8T-NK-NKT.csv Cell_cycle.csv Classical_complement.csv Cyt._act._T_cells.csv Dendritic_cells.csv Erythrocytes.csv Granulocytes.csv IFN.csv IG_chains.csv IL-1_cytokines.csv IL-6R_complex.csv Inflammasome.csv LDG.csv Lectin_complement.csv MHC_II.csv Monocytes.csv Monocyte_cell_surface.csv Monocyte_secreted.csv Myeloid_cells.csv Neutrophils.csv NK_cells.csv NLRP3_inflammasome.csv pDC.csv Plasma_cells.csv Platelets.csv Pro-inflam._IL-1.csv SNOR_low_DOWN.csv SNOR_low_UP.csv TCRA.csv TCRB.csv TCRD.csv TCRG.csv TNF.csv T_activated.csv T_cells.csv T_regs.csv Unfolded_protein.csv Inflammatory_neutrophils.csv Suppressive_neutrophils.csv Apoptosis.csv NFkB_complex.csv -d',' -o Signatures_46.xls
library(readxl)
file_path <- "../../Signatures_46.xls"
# Get the names of the sheets
sheet_names <- excel_sheets(file_path)
# Initialize an empty list to hold gene sets
geneSets <- list()
# Loop over each sheet, extract the ENSEMBL IDs, and add to the list
for (sheet in sheet_names) {
# Read the sheet
data <- read_excel(file_path, sheet = sheet)
# Process the GeneSet names (replacing spaces with underscores, for example)
gene_set_name <- gsub(" ", "_", unique(data$GeneSet)[1])
# Add ENSEMBL IDs to the list
geneSets[[gene_set_name]] <- unique(as.character(data$geneSymbol))
}
# Print the result to check
print(geneSets)
length(geneSets)
# 0. for the following calculation, the GSVA input requires a gene expression matrix 'exprs' where rows are genes and columns are samples.
library(gridExtra)
library(ggplot2)
library(GSVA)
exprs_ <- read.csv(sep="\t", file="20231006_165122_final_data.tsv")
# List of desired columns
desired_columns <- c("CA1", "CA2", "CA3", "CA10", "CA5", "CA6", "CA8", "CA14", "CA12", "CA13", "Con1", "Con2", "Con3", "Con4", "Con5", "Con6")
# # Find duplicated symbols
# duplicated_symbols <- exprs_$Symbol[duplicated(exprs_$Symbol) | duplicated(exprs_$Symbol, fromLast = TRUE)]
# print(duplicated_symbols)
# Filter out rows with empty 'Symbol' values
exprs_ <- exprs_[exprs_$Symbol != "", ]
# Subset the dataframe
exprs <- exprs_[, desired_columns]
# Set the 'Symbol' column as row names
rownames(exprs) <- exprs_$Symbol
exprs_matrix <- as.matrix(exprs)
# 1. Compute GSVA scores:
library(GSVA)
gsva_scores <- gsva(exprs_matrix, geneSets, method="gsva", useNames=TRUE)
# 2. Convert to data.frame for ggplot:
gsva_df <- as.data.frame(t(gsva_scores))
# 3. Add conditions to gsva_df:
gsva_df$Condition <- c("COVID","COVID","COVID","COVID","COVID","COVID","COVID","COVID","COVID","COVID","control","control","control","control","control","control")
# 4. Filter the gsva_df to retain only the desired conditions:
# 5. Define a function to plot violin plots:
gsva_df$Condition <- factor(gsva_df$Condition, levels = c("control", "COVID"))
plot_violin <- function(data, gene_name) {
# Calculate the t-test p-value for the two conditions
condition1_data <- data[data$Condition == "control", gene_name]
condition2_data <- data[data$Condition == "COVID", gene_name]
p_value <- t.test(condition1_data, condition2_data)$p.value
# Convert p-value to annotation
p_annotation <- ifelse(p_value < 0.01, "**", ifelse(p_value < 0.05, "*", ""))
rounded_p_value <- paste0("p = ", round(p_value, 2))
plot_title <- gsub("_", " ", gene_name)
p <- ggplot(data, aes(x=Condition, y=!!sym(gene_name))) +
geom_violin(linewidth=1.2) +
labs(title=plot_title, y="GSVA Score") +
ylim(-1, 1) +
theme_light() +
theme(
axis.title.x = element_text(size=12),
axis.title.y = element_text(size=12),
axis.text.x = element_text(size=10),
axis.text.y = element_text(size=10),
plot.title = element_text(size=12, hjust=0.5)
)
# Add p-value annotation to the plot
p <- p + annotate("text", x=1.5, y=0.9, label=paste0(p_annotation, " ", rounded_p_value), size=5, hjust=0.5)
return(p)
}
# 6. Generate the list of plots in a predefined order:
# [1] "Platelets" "Granulocytes"
# [3] "LDG" "pDC"
# [5] "Anti-inflammation" "Pro-inflam._IL-1"
# [7] "Dendritic_cells" "MHC_II"
# [9] "Alt._complement" "TNF"
#[11] "NLRP3_inflammasome" "Unfolded_protein"
#[13] "B_cells" "Monocyte_cell_surface"
#[15] "Inflammasome" "Monocyte_secreted"
#[17] "IL-1_cytokines" "SNOR_low_UP"
#[19] "CD40_activated" "Lectin_complement"
#[21] "Classical_complement" "Cell_cycle"
#[23] "Plasma_cells" "IG_chains"
#[25] "Erythrocytes" "IL-6R_complex"
#[27] "IFN" "TCRB"
#[29] "TCRA" "Cyt._act._T_cells"
#[31] "TCRG" "T_cells"
#[33] "CD8T-NK-NKT" "Anergic_or_act._T_cells"
#[35] "T_activated" "Neutrophils"
#[37] "NK_cells" "TCRD"
#[39] "T_regs" "SNOR_low_DOWN"
#[41] "Monocytes" "Myeloid_cells"
#[43] "Inflammatory_neutrophils" "Suppressive_neutrophils"
#[45] "Apoptosis" "NFkB_complex"
desired_order <- c("Platelets","Granulocytes","LDG","pDC","Anti-inflammation", "Pro-inflam._IL-1","Dendritic_cells","MHC_II","Alt._complement","TNF", "NLRP3_inflammasome","Unfolded_protein","B_cells","Monocyte_cell_surface","Inflammasome", "Monocyte_secreted","IL-1_cytokines","SNOR_low_UP","CD40_activated","Lectin_complement", "Classical_complement","Cell_cycle","Plasma_cells","IG_chains","Erythrocytes", "IL-6R_complex","IFN","TCRB","TCRA","Cyt._act._T_cells", "TCRG","T_cells","CD8T-NK-NKT","Anergic_or_act._T_cells","T_activated", "Neutrophils", "NK_cells","TCRD","T_regs","SNOR_low_DOWN","Monocytes", "Myeloid_cells", "Inflammatory_neutrophils", "Suppressive_neutrophils", "Apoptosis", "NFkB_complex")
genes <- colnames(gsva_df)[!colnames(gsva_df) %in% "Condition"]
genes <- genes[match(desired_order, genes)]
filtered_genes <- genes[!is.na(genes)]
print(filtered_genes)
plots_list <- lapply(filtered_genes, function(filtered_genes) plot_violin(gsva_df, filtered_genes))
# 7. Pad the list of plots:
remaining_plots <- 6 - (length(plots_list) %% 6)
if (remaining_plots != 6) {
plots_list <- c(plots_list, rep(list(NULL), remaining_plots))
}
# 8. Create the plots and arrange them in a grid:
library(gridExtra)
plots_matrix <- matrix(plots_list, ncol=6, byrow=T)
#do.call("grid.arrange", c(plots_matrix, list(ncol=6)))
# 9. Save the plots to a PNG:
png("Violin_Plots_GSVA_MS.png", width=1000, height=1000)
do.call("grid.arrange", c(plots_matrix, list(ncol=6)))
dev.off()
# 10. Save GSVA scores as Excel-file.
library(writexl)
write_xlsx(gsva_df, "GSVA_Scores_MS.xlsx")
点赞本文的读者
还没有人对此文章表态
没有评论
QIIME + Phyloseq + MicrobiotaProcess (v1)
MicrobiotaProcess Group2 vs Group6 (v1)
© 2023 XGenes.com Impressum