# ============================================================================= # Script: manhattan_plot_Carmen_custom_labels.R # Description: Manhattan plot with custom miRNA labels, prints labeled miRNAs # for manual verification, and avoids duplicate family labels. # ============================================================================= library(ggplot2) library(dplyr) library(tidyr) library(ggrepel) library(openxlsx) # 1. Load data setwd("~/DATA/Data_Ute_smallRNA_via_exceRpt_workspace/summaries_WaGa/") d.raw <- read.delim2("exceRpt_miRNA_ReadCounts.txt", sep = "\t", header = TRUE, row.names = 1, stringsAsFactors = FALSE) d.raw[] <- lapply(d.raw, as.numeric) # 2. Define sample groups cell_cols <- c("nf774", "nf961", "nf962") ev_cols <- c("nf930", "nf935") all_cols <- c(cell_cols, ev_cols) # 3. Calculate RPM & aggregate total_counts <- colSums(d.raw[, all_cols]) RPM <- sweep(d.raw[, all_cols], 2, total_counts, FUN = "/") * 1e6 df_agg <- data.frame( miRNA = rownames(RPM), WaGa_cells = rowMeans(RPM[, cell_cols], na.rm = TRUE), WaGa_untreated_EVs = rowMeans(RPM[, ev_cols], na.rm = TRUE) ) # 4. Reshape & transform df_long <- pivot_longer(df_agg, cols = -miRNA, names_to = "sample", values_to = "RPM") df_long <- df_long %>% mutate(logRPM = log10(RPM + 1)) df_long <- df_long %>% arrange(miRNA) %>% group_by(sample) %>% mutate(Position = row_number()) # 5. Define custom miRNAs (Carmen's request) custom_mirnas <- c("hsa-miR-10b-5p", "hsa-miR-21-5p", "hsa-miR-1246", "hsa-miR-182-5p", "hsa-miR-183-5p", "hsa-miR-30a-5p", "hsa-miR-30d-5p", "hsa-miR-200c-3p") # 6. Create display labels (handle families/clusters) df_long$display_label <- df_long$miRNA # default: show original name # # Map family/cluster members to grouped labels # family_map <- list( # "miR-30-Family" = c("hsa-miR-30a-5p", "hsa-miR-30d-5p"), # "miR-182/183-Cluster" = c("hsa-miR-182-5p", "hsa-miR-183-5p") # ) # # for (label in names(family_map)) { # df_long$display_label[df_long$miRNA %in% family_map[[label]]] <- label # } # 7. Determine which miRNAs to label (custom + top 10 by mean RPM) top_auto <- df_long %>% group_by(miRNA) %>% summarise(mean_RPM = mean(RPM)) %>% arrange(desc(mean_RPM)) %>% head(10) %>% pull(miRNA) highlight_list <- unique(c(custom_mirnas, top_auto)) highlight_list <- highlight_list[highlight_list %in% df_long$miRNA] # 8. 🖨️ PRINT labeled miRNAs for manual verification cat("\n=== MIrnas TO BE LABELED IN PLOT ===\n") labeled_info <- df_long %>% filter(miRNA %in% highlight_list) %>% select(miRNA, display_label, sample, RPM, logRPM) %>% arrange(display_label, miRNA, sample) %>% distinct() # remove duplicate rows print(labeled_info, n = 50) # Save this table for Carmen's reference write.xlsx(labeled_info, file = "labeled_miRNAs_for_verification.xlsx", rowNames = FALSE) cat("✅ Saved verification table: labeled_miRNAs_for_verification.xlsx\n") # 9. Assign colors df_long$color <- ifelse(df_long$miRNA %in% highlight_list, "red", "darkblue") # 10. Generate plot (with deduplicated labels via ggrepel) p <- ggplot(df_long, aes(x = Position, y = logRPM, color = color)) + scale_color_manual(values = c("red" = "red", "darkblue" = "darkblue")) + geom_jitter(width = 0.3, alpha = 0.8) + geom_text_repel( data = df_long %>% filter(miRNA %in% highlight_list), aes(label = display_label), box.padding = 0.6, point.padding = 0.5, segment.color = "grey50", size = 4, max.overlaps = 25, color = "black", force = 2, force_pull = 1, # Optional: add miRNA name in parentheses if label is grouped label.padding = unit(0.25, "lines") ) + labs(x = "", y = "log10(Reads Per Million) (RPM)") + facet_wrap(~sample, scales = "free_x", ncol = 2, labeller = labeller(sample = c("WaGa_cells" = "WaGa cells", "WaGa_untreated_EVs" = "WaGa untreated EVs"))) + theme_minimal(base_size = 14) + theme( panel.grid.major = element_line(color = "grey90", size = 0.5), panel.grid.minor = element_line(color = "grey95", size = 0.2), axis.text.x = element_blank(), axis.ticks.x = element_blank(), legend.position = "none", strip.text = element_text(size = 14, face = "bold"), panel.background = element_rect(fill = "white", color = NA) ) # 11. Export ggsave("manhattan_plot_Carmen_custom.png", plot = p, width = 6.5, height = 10, dpi = 200) ggsave("manhattan_plot_Carmen_custom.svg", plot = p, width = 6.5, height = 10) write.xlsx(df_long %>% select(miRNA, display_label, sample, Position, RPM, logRPM, color), file = "manhattan_plot_Carmen_data.xlsx", rowNames = FALSE) cat("✅ Successfully generated:\n") cat(" - manhattan_plot_Carmen_custom.png\n") cat(" - manhattan_plot_Carmen_custom.svg\n") cat(" - manhattan_plot_Carmen_data.xlsx\n") cat(" - labeled_miRNAs_for_verification.xlsx\n")