Author Archives: gene_x

Visualization and Export of miRNA Expression Profiles Using Manhattan Plots in R

This R script processes raw miRNA read count data to visualize expression profiles across different samples using Manhattan plots. The main steps include:

Normalization: Raw counts are converted to Reads Per Million (RPM) to account for sequencing depth differences across samples.
Transformation & Reshaping: The data is log-transformed and reshaped into long format for plotting.
Highlighting Key miRNAs:
- Top 5 miRNAs with highest mean RPM across all samples are highlighted in Plot 1.
- A manually selected set of biologically relevant miRNAs is highlighted in Plot 2.
Plotting: Two Manhattan plots are generated using ggplot2 and ggrepel, one for each set of highlighted miRNAs.

Export: The processed data, including RPM values, log-transformed values, and highlight flags, is saved to an Excel file (manhattan_data.xlsx) for further analysis.

  # Load required libraries
  library(ggplot2)
  library(dplyr)
  library(tidyr)
  library(ggrepel)
  library(openxlsx)

  # Load data
  d.raw <- read.delim2("d_raw.csv", sep = ",", header = TRUE, row.names = 1)

  # Step 1: Compute RPM
  d.raw_5 <- d.raw[, 1:5]
  total_counts <- colSums(d.raw_5)
  RPM <- sweep(d.raw_5, 2, total_counts, FUN = "/") * 1e6
  RPM$miRNA <- rownames(RPM)

  # Step 2: Long format
  df <- pivot_longer(RPM, cols = -miRNA, names_to = "sample", values_to = "RPM")

  # Step 3: Log transform
  df <- df %>%
  mutate(logRPM = log10(RPM + 1))

  # Step 4: miRNA position
  df <- df %>%
  arrange(miRNA) %>%
  group_by(sample) %>%
  mutate(Position = row_number())

  # Step 5: Define top miRNAs
  top_mirnas_mean <- df %>%
  group_by(miRNA) %>%
  summarise(mean_RPM = mean(RPM)) %>%
  arrange(desc(mean_RPM)) %>%
  slice_head(n = 5) %>%
  pull(miRNA)

  top_mirnas_selected <- c("hsa-miR-20a-5p", "hsa-miR-93-5p", "hsa-let-7g-5p",
                          "hsa-miR-30a-5p", "hsa-miR-423-5p", "hsa-let-7i-5p")

  # Step 6: Annotate highlights
  df <- df %>%
  mutate(
      highlight_meanRPM = miRNA %in% top_mirnas_mean,
      highlight_selected = miRNA %in% top_mirnas_selected
  )

  # Step 7: Export data to Excel
  write.xlsx(df, "manhattan_data.xlsx", asTable = TRUE)

  # Sample labels
  sample_labels <- c(
  "parental_cells_1" = "Parental cell 1",
  "parental_cells_2" = "Parental cell 2",
  "parental_cells_3" = "Parental cell 3",
  "untreated_1"      = "Untreated 1",
  "untreated_2"      = "Untreated 2"
  )

  # Step 8: Plot - Top by mean RPM
  df$color_mean <- ifelse(df$highlight_meanRPM, "red", "darkblue")

  png("manhattan_plot_top_miRNAs_based_on_mean_RPM.png", width = 1200, height = 1200)
  ggplot(df, aes(x = Position, y = logRPM, color = color_mean)) +
  scale_color_manual(values = c("red" = "red", "darkblue" = "darkblue")) +
  geom_jitter(width = 0.4) +
  geom_text_repel(
      data = df %>% filter(highlight_meanRPM),
      aes(label = miRNA),
      box.padding = 0.5,
      point.padding = 0.5,
      segment.color = 'gray50',
      size = 5,
      max.overlaps = 8,
      color = "black"
  ) +
  labs(x = "", y = "log10(Read Per Million) (RPM)") +
  facet_wrap(~sample, scales = "free_x", ncol = 5,
              labeller = labeller(sample = sample_labels)) +
  theme_minimal() +
  theme(
      axis.text.x = element_blank(),
      axis.ticks.x = element_blank(),
      legend.position = "none",
      text = element_text(size = 16),
      axis.title = element_text(size = 18),
      strip.text = element_text(size = 16, face = "bold"),
      panel.spacing = unit(1.5, "lines")
  )
  dev.off()

  # Step 9: Plot - Selected miRNAs
  df$color_selected <- ifelse(df$highlight_selected, "red", "darkblue")

  png("manhattan_plot_most_differentially_expressed_miRNAs.png", width = 1200, height = 1200)
  ggplot(df, aes(x = Position, y = logRPM, color = color_selected)) +
  scale_color_manual(values = c("red" = "red", "darkblue" = "darkblue")) +
  geom_jitter(width = 0.4) +
  geom_text_repel(
      data = df %>% filter(highlight_selected),
      aes(label = miRNA),
      box.padding = 0.5,
      point.padding = 0.5,
      segment.color = 'gray50',
      size = 5,
      max.overlaps = 8,
      color = "black"
  ) +
  labs(x = "", y = "log10(Read Per Million) (RPM)") +
  facet_wrap(~sample, scales = "free_x", ncol = 5,
              labeller = labeller(sample = sample_labels)) +
  theme_minimal() +
  theme(
      axis.text.x = element_blank(),
      axis.ticks.x = element_blank(),
      legend.position = "none",
      text = element_text(size = 16),
      axis.title = element_text(size = 18),
      strip.text = element_text(size = 16, face = "bold"),
      panel.spacing = unit(1.5, "lines")
  )
  dev.off()

Example Raw Data

    "","parental_cells_1","parental_cells_2","parental_cells_3","untreated_1","untreated_2","scr_control_1","scr_control_2","scr_control_3","DMSO_control_1","DMSO_control_2","DMSO_control_3","scr_DMSO_control_1","scr_DMSO_control_2","scr_DMSO_control_3","sT_knockdown_1","sT_knockdown_2","sT_knockdown_3"
    "hsa-miR-375",34533,3262377,1301496,100825,188119,153531,349072,21074,323775,336095,82391,194233,450228,51320,364245,772745,111276
    "hsa-miR-10b-5p",9041,7118756,2898458,89378,300984,171106,420457,30970,244186,358048,79946,202764,460488,64718,404386,432872,99274
    "hsa-let-7a-5p",117441,5512997,1790596,60180,90384,100759,131398,7423,208599,187416,33942,99479,172489,17403,214496,169542,41342
    "hsa-let-7b-5p",67432,814417,267664,69114,87227,44756,126032,6136,125020,121212,28550,52388,315105,13506,170644,216544,37364
    "hsa-miR-182-5p",6309,1361486,524716,42333,88821,63989,100756,2591,99036,104442,9956,74656,140172,6816,150818,151969,15236
    "hsa-miR-30a-5p",8721,948558,400525,14555,29211,27433,56620,418,67797,60160,2198,37562,112903,1134,89586,102199,2749
    "hsa-let-7f-5p",8950,1379796,501830,9482,18786,19152,25105,1097,35743,32119,4992,19641,29458,2611,37753,31201,5464
    "hsa-miR-191-5p",5319,187270,64891,17598,40244,9230,51212,2764,30164,48766,5275,20608,87755,9099,113397,85939,2608
    "hsa-miR-92a-3p",6202,466546,186103,9520,25668,20740,16677,1354,28347,33256,5931,21893,16776,3690,16799,20150,6742
    "hsa-miR-30d-5p",7668,480810,189297,8698,19449,11807,26133,332,24259,28667,1447,17094,44136,853,35161,37334,1542
    "hsa-miR-320a",4558,61397,19613,46980,78771,8831,237689,1470,24235,90901,2678,19811,222769,3872,76030,307737,2645
    "hsa-miR-486-5p",7236,116907,41502,6994,12458,11211,13947,1232,16559,19186,4688,12813,12954,1767,13088,17925,4386
    "hsa-miR-26a-5p",7787,556023,193310,4516,7921,8216,12146,730,15873,15392,2673,8557,18329,1703,25686,13053,3582
    "hsa-miR-378a-3p",2771,97674,38061,12733,60619,5156,157491,1151,14690,42554,2166,9806,108211,3328,58851,190501,1880
    "hsa-miR-423-5p",832,19336,6629,15116,40901,4269,38871,1090,12925,30694,1943,11965,63670,5767,32188,45385,2476
    "hsa-miR-93-5p",8360,336124,124845,3186,6539,5086,13468,121,11579,14276,618,5501,20073,361,19077,24599,644
    "hsa-miR-25-3p",2264,202212,84611,3218,8618,4036,20093,361,9774,14423,1514,5694,26041,1247,24033,46581,1924
    "hsa-let-7i-5p",6916,463608,184832,1876,3820,3259,6402,185,8490,8160,1032,4088,7530,470,8197,10634,1027
    "hsa-miR-181a-5p",4531,133716,47864,2879,6362,4313,8111,153,7554,8516,1077,4485,14439,533,13982,10684,1482
    "hsa-miR-30c-5p",1038,124393,49852,1592,4234,3036,4471,52,6945,7165,210,3745,7523,129,7161,4287,279
    "hsa-miR-148a-3p",448,439462,186294,1840,4740,3691,5857,525,6879,6337,1094,4959,6094,990,6399,6527,1590
    "hsa-miR-103a-3p",9950,316450,119774,1614,2530,3312,9204,190,5862,5666,653,4019,6595,390,7861,12440,940
    "hsa-let-7g-5p",8977,507218,178238,1663,2563,2572,6304,211,5486,5458,989,2676,7290,510,8191,8110,1301
    "hsa-miR-16-5p",3108,273138,96609,1621,3897,2418,6767,243,5200,6464,779,3178,7436,510,8496,6497,943
    "hsa-miR-769-5p",405,75785,29561,1168,3628,2156,5473,252,4465,5318,1059,3457,6846,633,6831,5787,1568
    "hsa-miR-146b-5p",354,80850,28700,2174,3161,1834,4325,100,3937,3779,288,3025,9058,363,10583,7922,362
    "hsa-miR-196a-5p",1361,113874,38288,1777,3074,1893,3981,104,3932,4291,506,2058,5900,316,6358,5839,594
    "hsa-miR-30e-5p",1070,147574,55848,936,1604,2255,2556,30,3822,3247,134,2572,3733,76,4280,3211,170
    "hsa-miR-200c-3p",7532,118630,38580,1465,3558,2559,5566,466,3807,6115,1632,3008,5706,1002,6467,5903,2384
    "hsa-miR-92b-3p",2340,78803,28318,1672,2249,3339,2488,187,3480,2980,830,2987,2072,293,2654,2559,823
    "hsa-miR-345-5p",439,27482,11486,1368,2869,1625,9583,47,3395,4737,258,2124,13198,139,6879,11281,248
    "hsa-miR-98-5p",1390,220110,74613,716,1168,1982,2470,99,3073,2615,471,1487,2115,237,2781,3424,390
    "hsa-let-7d-5p",1612,36438,11128,724,1148,1460,2190,44,2846,2642,250,1471,2238,162,2200,2146,354
    "hsa-miR-106b-3p",1000,18194,8302,1023,2106,1650,5202,34,2786,4840,142,1960,6501,89,6581,7052,209
    "hsa-miR-151a-3p",940,50844,20614,1123,2186,1317,3103,104,2748,3537,416,2348,6036,322,6857,5410,501
    "hsa-miR-423-3p",473,21186,8905,2029,3062,1262,11931,161,2669,3863,679,1955,9282,496,3890,13902,910
    "hsa-miR-9-5p",876,272251,119702,601,1293,1038,1670,63,2639,2000,353,1172,2409,172,3382,2448,454
    "hsa-miR-885-3p",373,3458,1123,3490,4454,613,3781,63,2490,4312,108,1918,12307,194,8321,6196,140
    "hsa-miR-200b-3p",2250,79100,30268,737,1969,2322,3554,52,2468,3343,198,3273,3457,193,3859,3264,320
    "hsa-miR-181b-5p",1089,31926,12240,1210,1488,1288,2379,64,2326,3140,323,1108,5134,208,4514,4266,274
    "hsa-miR-186-5p",677,21082,7797,776,2212,1591,5698,120,2016,3253,371,1785,6175,298,3723,6488,571
    "hsa-miR-181d-5p",929,76318,28372,756,1060,1000,4864,57,1942,2736,340,998,5815,232,3884,6836,466
    "hsa-miR-183-5p",847,54855,19776,824,1137,1242,2801,83,1881,2258,269,1418,2692,170,2367,3509,421
    "hsa-miR-26b-5p",2164,214186,74552,518,803,1090,2181,172,1797,1956,653,1031,2385,503,3527,3143,752
    "hsa-miR-320b",73,349,99,2395,6147,684,10886,5,1660,6171,20,1415,13452,21,5202,14559,21
    "hsa-miR-181c-5p",488,66293,25997,496,1407,986,3665,76,1607,1980,294,1221,5804,243,2968,4268,512
    "hsa-miR-30a-3p",23,18099,7876,946,1383,952,3233,79,1534,2332,308,1396,5988,401,7291,7917,506
    "hsa-miR-7-5p",526,50288,20079,490,541,708,1095,34,1277,1467,81,650,1322,41,1821,1619,90
    "hsa-miR-101-3p",63,35823,16223,480,1303,682,1503,58,1208,1491,147,1051,1581,81,2218,1883,204
    "hsa-miR-151a-5p|hsa-miR-151b",976,9344,3234,356,692,828,1176,15,1194,1228,50,868,1588,36,1490,978,66
    "hsa-miR-92a-1-5p",110,4993,1743,636,1140,1086,1407,27,1191,1667,90,1280,2098,92,1822,1648,245
    "hsa-miR-20a-5p",865,109223,38996,308,532,704,1469,17,1142,1172,83,900,1835,55,1824,1890,88
    "hsa-miR-190b",308,51627,19416,256,416,428,2568,102,1090,1453,488,423,2427,291,1787,4725,489
    "hsa-miR-1307-3p",111,8751,3680,368,722,442,1726,11,1059,1376,84,670,1579,21,944,2229,79
    "hsa-miR-24-3p",724,58135,21371,452,696,400,2739,58,1057,1011,303,601,2545,212,2017,4056,252
    "hsa-miR-1-3p",932,276107,128481,875,926,666,1334,145,1045,1201,457,861,1176,190,2555,1807,430
    "hsa-miR-532-5p",152,16899,7525,396,1174,622,2020,57,1043,1677,210,896,2132,204,2131,4051,227
    "hsa-miR-744-5p",980,8950,3197,479,668,628,1103,18,972,1242,108,559,1288,44,1090,1430,113
    "hsa-miR-130b-3p",258,7803,3037,688,1517,318,2330,13,957,1133,12,787,4032,16,3854,4966,33
    "hsa-miR-192-5p",60,11210,4454,269,474,639,1158,20,948,1128,82,802,1287,80,1689,1322,85
    "hsa-miR-3615",78,1503,642,268,344,238,1602,1,877,805,25,537,2549,14,612,2883,35
    "hsa-miR-1224-5p",74,1116,344,694,1264,135,1305,65,876,1723,136,882,3643,249,2182,1845,50
    "hsa-miR-17-5p",1102,65959,24350,194,665,533,1324,14,851,1306,82,842,1196,62,1767,1721,87
    "hsa-miR-130b-5p",291,11405,3876,274,529,451,476,20,832,595,93,618,655,35,1021,518,110
    "hsa-miR-301a-3p",61,43874,17611,161,310,337,928,28,826,777,198,428,879,98,991,855,198
    "hsa-miR-148a-5p",105,14869,6081,348,613,392,1150,19,794,693,124,488,1137,84,1044,1329,154
    "hsa-miR-107",1048,29568,12292,226,461,375,1154,22,785,890,62,454,1089,34,1197,1462,62
    "hsa-miR-200a-3p",649,45836,16810,184,684,833,1214,19,772,909,54,987,1136,26,1626,1217,72
    "hsa-miR-27b-3p",346,15959,6048,204,690,480,1106,9,751,1012,31,430,1544,10,1408,1354,46
    "hsa-miR-196b-5p",315,56050,20888,261,422,467,531,56,744,720,146,266,849,104,815,804,199
    "hsa-miR-885-5p",60,4387,1793,410,441,604,570,8,728,702,55,594,687,36,922,734,62
    "hsa-miR-19b-3p",44,46497,20811,304,1171,384,2380,40,727,1052,143,475,2640,72,2695,2681,182
    "hsa-miR-181a-2-3p",41,2966,1304,272,468,422,313,8,692,845,63,371,663,20,1126,613,67
    "hsa-miR-873-3p",102,5299,1957,767,1717,208,4009,77,686,1944,276,596,3868,255,2004,4425,236
    "hsa-miR-21-5p",124,54200,23746,278,290,448,848,13,682,864,73,567,886,25,1212,839,65
    "hsa-miR-421",51,4594,1689,185,138,192,347,16,586,314,60,312,507,56,398,595,50
    "hsa-miR-128-3p",147,3435,1325,209,609,212,927,10,515,668,39,394,680,17,728,1154,34
    "hsa-miR-425-5p",298,13138,4499,267,609,363,1650,27,492,972,108,397,1872,72,759,1884,147
    "hsa-miR-877-5p",114,1014,353,299,269,114,243,1,483,426,9,305,765,29,871,502,9
    "hsa-miR-625-3p",19,3201,1475,151,318,312,372,3,473,591,52,331,473,23,531,347,26
    "hsa-miR-1180-3p",331,3493,1265,283,343,238,815,5,418,478,94,325,810,53,536,1038,98
    "hsa-miR-30b-5p",89,28134,11724,298,419,534,544,19,401,935,87,368,1037,42,514,368,83
    "hsa-miR-30e-3p",15,11217,4550,178,270,328,702,23,396,422,76,273,743,52,671,839,116
    "hsa-miR-148b-3p",335,62635,29143,86,243,237,315,34,383,247,43,166,226,36,374,391,63
    "hsa-miR-941",590,10640,3991,141,270,231,354,29,350,381,30,172,364,42,402,238,41
    "hsa-miR-122-5p",0,0,0,625,126,250,190,6,346,225,9,581,190,2,1942,253,4
    "hsa-miR-95-3p",193,1087,394,90,370,401,571,1,345,555,13,338,576,0,641,467,18
    "hsa-miR-92b-5p",43,834,270,94,440,84,238,9,333,400,27,200,1109,54,704,327,21
    "hsa-miR-483-5p",0,445,147,117,1361,48,259,138,320,1598,328,70,1762,232,564,270,508
    "hsa-let-7c-5p",135,2637,1038,222,335,228,455,8,320,646,48,529,601,24,471,534,36
    "hsa-miR-197-3p",89,2006,834,97,159,154,158,11,318,168,52,184,225,22,131,128,22
    "hsa-miR-133a-3p",452,19488,7714,302,650,386,946,22,310,462,146,502,970,107,1026,864,156
    "hsa-miR-708-5p",293,22077,8483,103,162,209,394,5,300,279,51,162,466,15,584,403,44
    "hsa-miR-760",60,771,278,209,315,149,515,1,298,292,26,235,642,9,591,792,16
    "hsa-miR-141-3p",198,5911,2362,132,160,318,338,0,292,314,7,328,320,2,501,319,10
    "hsa-miR-22-3p",88,4854,1881,109,234,172,299,9,287,259,21,200,401,5,395,510,45
    "hsa-miR-181c-3p",32,12886,5202,53,209,107,411,4,275,228,39,145,376,24,504,497,51
    "hsa-miR-25-5p",209,907,353,207,247,306,235,3,263,417,20,328,571,14,546,444,29
    "hsa-miR-429",45,8117,3117,50,249,318,455,2,262,260,31,190,454,11,629,554,6
    "hsa-miR-340-5p",49,25006,10179,58,261,142,297,18,250,261,40,226,182,34,405,324,48
    "hsa-miR-200a-5p",31,2757,1043,87,113,179,279,10,248,150,30,259,634,26,1137,695,37
    "hsa-miR-454-3p",128,20684,7584,80,80,65,231,6,237,232,23,164,210,29,264,285,26
    "hsa-miR-425-3p",96,3316,1345,76,321,186,579,14,232,324,51,168,1043,45,478,707,39
    "hsa-miR-484",281,5775,2227,168,353,120,394,5,227,304,40,155,425,6,312,302,13
    "hsa-miR-1270",104,1035,434,189,226,183,433,13,224,541,39,173,999,23,1232,1187,36
    "hsa-miR-342-3p",117,6675,2169,170,245,150,555,22,220,401,57,204,694,31,356,453,28
    "hsa-miR-185-5p",94,1450,552,152,852,95,926,5,220,334,8,153,1063,8,1154,1312,9
    "hsa-let-7e-5p",173,3800,1440,90,110,89,184,7,214,228,80,157,202,38,346,308,77
    "hsa-miR-339-5p",65,1151,470,114,370,161,636,1,205,493,10,183,1428,13,833,547,30
    "hsa-miR-629-5p",26,1989,764,114,368,149,973,13,190,307,17,111,778,23,350,1726,36
    "hsa-miR-454-5p",8,2713,979,53,145,78,171,4,188,285,26,207,124,16,223,296,32
    "hsa-miR-7706",69,1304,490,114,214,156,322,0,186,212,18,47,238,18,238,603,27
    "hsa-let-7a-3p",6,4014,1498,33,270,121,279,3,179,180,31,171,431,15,485,312,10
    "hsa-miR-1226-5p",5,23,8,81,52,169,73,6,179,218,2,180,42,6,100,92,5
    "hsa-miR-3180|hsa-miR-3180-3p",7,308,87,267,659,9,680,6,171,1111,9,123,1816,36,483,629,2
    "hsa-miR-143-3p",0,14,6,83,238,127,189,1,170,242,3,179,248,3,404,394,6
    "hsa-miR-132-3p",14,4593,1814,95,345,137,252,19,168,291,65,119,194,41,333,274,98
    "hsa-miR-139-3p",33,951,297,396,1087,56,393,30,154,475,49,196,903,120,844,673,24
    "hsa-miR-3187-3p",24,348,119,74,103,18,311,0,152,128,4,96,198,4,58,598,3
    "hsa-miR-660-5p",42,14230,6086,78,241,165,367,11,151,241,34,192,483,24,474,576,36
    "hsa-miR-23b-3p",63,2574,1030,47,49,45,92,1,150,245,1,70,188,2,110,67,4
    "hsa-let-7d-3p",164,1466,508,80,222,191,248,2,148,237,22,182,254,21,122,167,8
    "hsa-miR-342-5p",35,1405,488,136,162,100,164,26,147,187,56,137,280,54,431,173,119
    "hsa-miR-339-3p",23,2262,798,66,208,109,429,1,139,143,8,149,606,9,542,765,5
    "hsa-miR-130a-3p",50,2277,947,75,94,52,251,0,139,116,1,119,272,3,269,442,6
    "hsa-miR-30c-2-3p",19,1421,668,25,92,27,164,0,129,112,20,71,185,6,309,206,28
    "hsa-miR-320c",17,25,12,214,390,41,1085,2,128,480,2,161,1304,7,477,1499,3
    "hsa-miR-18a-5p",52,6597,2513,0,0,6,94,0,124,63,5,31,31,5,43,61,2
    "hsa-miR-23a-3p",44,1148,473,47,114,65,156,3,122,134,3,50,148,5,218,144,6
    "hsa-miR-328-3p",67,1241,530,34,73,27,131,5,113,76,20,16,138,9,82,72,15
    "hsa-miR-409-3p",11,198,98,112,166,41,281,2,111,171,33,45,228,8,244,355,16
    "hsa-miR-34a-5p",100,10066,3383,65,69,26,960,19,109,351,71,132,845,50,589,1125,58
    "hsa-miR-589-5p",27,377,134,32,49,55,81,0,107,93,3,26,66,0,68,96,3
    "hsa-miR-361-5p",246,10554,3534,59,188,153,203,12,107,189,47,72,178,19,210,257,40
    "hsa-miR-1246",4,20,2,112,738,40,1106,1,106,518,1,92,995,1,413,967,3
    "hsa-miR-106b-5p",163,5590,1969,32,166,134,228,0,106,127,2,133,70,10,197,120,25
    "hsa-miR-3127-5p",12,62,22,78,117,18,257,8,101,183,10,60,379,28,350,806,16
    "hsa-miR-500a-3p",12,1217,470,66,84,32,94,6,100,73,24,60,202,17,78,178,22
    "hsa-miR-3131",2,49,31,27,187,21,143,4,97,163,0,58,169,1,162,199,2
    "hsa-miR-301b-3p",16,15397,5867,65,16,77,109,12,97,105,32,40,60,11,154,139,33
    "hsa-miR-15b-5p",609,6681,2409,83,47,45,159,1,95,104,13,89,144,13,109,58,31
    "hsa-miR-10a-5p",8,1492,607,46,94,88,144,2,94,182,6,138,116,4,118,115,14
    "hsa-miR-200b-5p",28,101,37,38,108,71,118,4,90,84,2,48,277,5,297,259,0
    "hsa-miR-361-3p",106,2463,1016,31,67,43,105,1,88,82,5,28,221,0,151,157,8
    "hsa-miR-625-5p",40,3917,1346,6,13,37,77,6,87,43,11,39,176,7,149,116,3
    "hsa-miR-887-3p",21,545,186,24,70,11,122,0,86,61,2,47,120,0,42,52,2
    "hsa-miR-708-3p",11,2405,977,32,115,87,129,1,86,104,4,69,141,3,121,160,9
    "hsa-miR-19a-3p",13,14791,6422,118,419,137,716,6,84,509,46,128,589,42,782,722,51
    "hsa-miR-330-3p",14,486,148,73,393,39,192,9,82,362,26,98,284,31,328,391,4
    "hsa-miR-671-5p",27,721,229,41,64,21,217,0,82,55,1,97,143,2,129,276,0
    "hsa-miR-324-5p",123,1886,782,16,122,30,287,6,81,81,12,137,590,13,284,290,41
    "hsa-miR-181a-3p",6,1199,510,51,23,183,28,2,80,92,7,34,135,2,134,53,13
    "hsa-miR-210-3p",68,30874,14090,18,74,33,290,28,77,189,109,43,460,64,224,348,178
    "hsa-miR-411-5p",0,1,3,25,55,8,19,0,75,7,1,6,15,0,53,116,0
    "hsa-miR-1468-5p",12,1112,404,48,59,25,171,17,74,195,27,134,274,35,161,417,36
    "hsa-let-7b-3p",15,1366,424,30,27,52,11,0,70,37,6,55,63,5,37,13,1
    "hsa-miR-324-3p",70,291,90,64,114,116,396,0,69,220,0,58,491,0,255,377,3
    "hsa-miR-340-3p",14,2099,728,0,37,56,40,1,68,47,1,90,9,6,25,19,7
    "hsa-miR-4677-3p",1,1395,547,0,35,18,23,5,68,88,14,69,21,0,38,48,7
    "hsa-miR-873-5p",8,1826,692,21,43,24,164,0,68,132,12,36,119,8,143,192,4
    "hsa-miR-652-3p",72,1113,369,14,27,0,49,0,67,59,4,16,90,2,54,62,17
    "hsa-miR-4286",20,2,0,19,79,18,75,0,66,148,0,65,199,0,201,77,0
    "hsa-miR-671-3p",19,273,115,20,46,13,146,3,66,143,6,21,155,3,140,128,5
    "hsa-miR-218-5p",68,11916,4970,30,58,41,116,3,65,121,15,70,179,3,178,137,12
    "hsa-miR-505-3p",45,687,339,10,67,1,131,0,65,24,2,71,111,3,52,156,1
    "hsa-miR-382-5p",5,245,73,57,177,18,310,1,64,125,9,12,330,20,230,1000,0
    "hsa-miR-6721-5p",15,139,52,4,0,19,0,0,62,46,1,18,18,0,30,18,0
    "hsa-miR-140-3p",36,1859,937,15,74,44,159,2,62,142,15,76,298,15,156,362,20
    "hsa-miR-5187-5p",6,44,8,7,94,1,52,1,61,92,9,78,190,12,94,92,15
    "hsa-miR-548av-5p|hsa-miR-548k",9,956,388,8,48,19,48,1,61,74,7,96,16,7,0,18,2
    "hsa-miR-1908-5p",45,125,58,26,25,36,55,0,60,34,0,46,66,0,23,106,0
    "hsa-miR-3940-5p",0,0,0,24,38,30,25,0,58,23,0,56,47,0,30,84,0
    "hsa-miR-335-3p",15,4166,1655,17,21,86,120,4,58,114,14,194,47,18,133,174,15
    "hsa-miR-15a-5p",79,3126,1105,18,67,50,142,0,58,93,8,46,131,0,170,113,2
    "hsa-miR-3605-3p",10,285,87,0,0,10,0,0,57,33,9,39,26,1,9,15,5
    "hsa-miR-18a-3p",20,396,170,60,106,23,230,0,56,130,11,85,288,12,151,189,16
    "hsa-miR-4788",17,274,106,13,23,20,54,5,52,26,4,24,57,0,138,80,4
    "hsa-miR-3200-3p",24,650,272,16,51,21,154,2,51,29,6,46,102,7,41,67,15
    "hsa-miR-219a-1-3p",9,316,105,198,223,13,557,0,51,232,10,51,691,30,240,1145,19
    "hsa-miR-330-5p",1,238,118,0,0,20,0,0,49,0,2,0,0,0,15,0,0
    "hsa-miR-940",30,127,45,11,16,37,100,0,49,123,0,0,171,0,107,92,0
    "hsa-miR-3174",7,174,56,16,43,1,0,2,49,68,3,34,45,1,69,14,1
    "hsa-miR-6730-5p",6,14,1,13,43,0,7,1,48,41,0,33,67,1,11,27,0
    "hsa-miR-151b",18,13,4,16,36,31,104,0,46,80,0,32,144,0,92,121,0
    "hsa-miR-504-3p",0,4,1,7,72,0,6,2,46,24,0,0,117,4,16,13,0
    "hsa-miR-129-5p",4,301,92,23,74,0,183,0,44,42,0,10,229,2,81,502,2
    "hsa-miR-769-3p",56,1608,582,21,25,0,111,0,44,56,2,51,126,0,23,83,3
    "hsa-miR-4326",1,167,69,0,0,12,34,0,42,15,3,19,48,0,20,2,1
    "hsa-miR-1276",9,135,44,0,0,0,0,0,42,17,0,0,49,0,14,0,0
    "hsa-miR-6847-5p",3,36,9,0,0,0,0,0,41,0,0,17,4,1,13,52,0
    "hsa-miR-3605-5p",4,366,109,102,178,19,134,18,40,215,20,16,383,26,280,115,18
    "hsa-miR-33b-3p",22,16,8,0,0,31,0,0,38,12,0,35,1,0,10,12,0
    "hsa-miR-335-5p",2,1518,535,33,56,46,145,10,38,25,5,65,55,14,136,91,10
    "hsa-miR-877-3p",14,269,91,14,28,13,11,1,37,9,1,12,72,0,0,20,4
    "hsa-miR-148b-5p",7,1317,512,3,7,0,22,1,36,14,5,16,64,5,50,0,8
    "hsa-miR-128-1-5p",32,385,158,14,2,2,12,0,36,49,6,11,59,2,67,40,8
    "hsa-miR-576-3p",2,391,138,28,0,12,88,2,36,0,9,49,98,0,28,107,1
    "hsa-miR-320d",5,2,1,65,177,11,289,0,35,131,1,41,382,0,170,389,0
    "hsa-miR-3928-3p",11,114,44,36,175,16,201,0,35,105,0,71,305,5,150,299,3
    "hsa-miR-6891-5p",0,0,1,1,0,4,6,0,35,5,0,21,10,0,10,12,0
    "hsa-miR-4476",1,11,1,5,0,14,7,0,35,9,0,0,8,0,13,0,0
    "hsa-miR-937-3p",9,105,61,17,19,56,22,2,34,74,12,23,44,6,4,87,18
    "hsa-miR-204-5p",0,155,59,5,27,0,0,6,33,33,0,0,15,0,58,16,1
    "hsa-miR-486-3p",14,342,101,21,16,1,75,0,33,56,10,6,70,0,90,73,0
    "hsa-miR-151a-5p",38,913,310,0,17,0,7,0,33,41,1,18,26,1,2,38,9
    "hsa-miR-659-5p",1,132,56,7,0,12,8,0,33,0,0,0,10,3,0,0,0
    "hsa-miR-144-5p",0,26,8,1,0,29,29,0,32,30,5,37,6,2,1,9,0
    "hsa-miR-501-3p",4,568,174,20,88,6,90,0,32,74,10,62,64,14,32,204,4
    "hsa-miR-197-5p",3,5,7,13,51,0,26,0,31,34,3,15,65,3,20,19,0
    "hsa-miR-1301-3p",66,587,208,11,53,23,100,0,31,55,7,15,51,1,32,21,16
    "hsa-miR-5010-5p",2,7,5,15,190,35,40,0,30,79,10,28,141,5,45,61,12
    "hsa-miR-193b-3p",20,776,302,40,64,12,67,1,30,96,13,34,263,2,31,130,18
    "hsa-miR-2682-5p",1,120,63,5,0,17,44,0,30,117,11,0,70,4,166,94,1
    "hsa-miR-191-3p",16,99,54,3,35,2,20,0,30,50,0,23,57,0,12,75,3
    "hsa-miR-577",3,2156,808,8,37,0,61,0,30,27,13,0,51,2,58,36,10
    "hsa-miR-126-5p",1,50,21,10,0,27,31,0,30,14,0,9,0,0,64,28,0
    "hsa-miR-1296-5p",65,740,299,9,33,34,103,6,30,77,11,18,129,15,69,34,10
    "hsa-miR-193b-5p",3,59,11,73,149,10,136,5,29,125,0,17,155,17,78,129,11
    "hsa-miR-93-3p",32,853,334,19,61,59,192,4,29,39,15,41,173,7,104,88,2
    "hsa-miR-149-3p",2,7,5,6,0,31,15,1,28,32,1,22,40,2,23,16,0
    "hsa-miR-16-2-3p",17,1180,441,20,73,26,71,1,28,68,5,36,95,4,96,50,17
    "hsa-miR-3074-5p",4,74,40,5,0,12,72,2,28,35,4,24,32,1,14,11,0
    "hsa-miR-4667-5p",0,14,2,12,52,13,53,2,27,38,0,40,105,12,25,34,0
    "hsa-miR-27a-3p",2,1136,434,8,10,36,70,1,26,30,0,11,26,0,46,16,1
    "hsa-miR-3158-3p",1,105,32,0,0,9,72,0,26,28,0,0,47,3,22,58,0
    "hsa-miR-331-3p",33,667,212,0,25,0,18,0,26,39,5,22,50,4,37,29,0
    "hsa-miR-149-5p",29,814,431,45,25,63,64,1,26,104,11,90,216,6,133,103,40
    "hsa-miR-451a",0,9,5,25,55,78,126,10,25,71,4,76,111,3,195,75,12
    "hsa-miR-7854-3p",1,5,4,31,7,0,6,0,25,12,0,9,29,6,17,12,0
    "hsa-miR-126-3p",6,620,318,10,47,35,1,0,25,0,0,21,22,3,77,28,0
    "hsa-miR-374b-5p",5,703,263,0,0,11,24,0,25,82,0,0,10,0,26,0,9
    "hsa-miR-6858-5p",0,0,0,16,0,9,11,0,24,0,0,0,39,0,59,0,2
    "hsa-miR-432-5p",0,1,2,25,23,50,78,6,24,103,4,48,115,5,202,85,16
    "hsa-miR-146a-5p",6,792,253,25,24,18,22,0,24,11,5,16,95,10,34,16,10
    "hsa-miR-502-3p",4,78,18,4,0,0,14,0,24,8,0,4,26,0,22,22,0
    "hsa-miR-221-3p",0,0,0,0,10,45,68,0,23,23,7,31,43,2,90,58,0
    "hsa-miR-3127-3p",0,4,2,0,0,0,0,0,23,0,0,0,19,0,18,13,0
    "hsa-miR-210-5p",2,1624,638,0,3,51,0,0,23,17,7,49,12,1,0,0,7
    "hsa-miR-4739",0,0,0,0,0,0,0,0,23,0,0,0,13,1,0,0,2
    "hsa-miR-1269a",0,24,17,0,0,0,0,0,23,12,0,0,36,1,16,0,0
    "hsa-miR-550a-3-5p|hsa-miR-550a-5p",6,267,96,0,9,0,32,0,22,14,2,12,22,0,38,23,0
    "hsa-miR-2277-5p",2,99,43,0,1,11,19,0,22,18,0,0,11,0,0,10,0
    "hsa-miR-3909",2,445,148,0,17,0,31,0,22,53,2,0,20,0,0,11,0
    "hsa-miR-3200-5p",1,38,11,0,21,0,24,0,22,0,0,0,15,0,11,21,0
    "hsa-miR-20b-5p",19,2713,900,0,20,0,19,0,22,0,1,22,1,0,1,20,0
    "hsa-miR-766-5p",5,11,4,15,196,13,129,5,21,116,6,54,157,0,93,159,0
    "hsa-miR-652-5p",1,41,34,0,0,0,0,0,21,0,0,0,0,0,0,0,0
    "hsa-miR-6514-5p",7,75,39,46,0,1,64,0,21,5,0,13,32,0,36,59,2
    "hsa-miR-33a-5p",1,13,13,0,2,10,31,0,21,0,0,4,0,0,49,6,0
    "hsa-miR-4743-5p",1,5,8,0,0,0,39,0,21,6,0,5,0,0,45,15,0
    "hsa-miR-4786-5p",1,34,13,4,0,7,0,0,21,26,0,0,0,4,0,16,0
    "hsa-miR-29b-1-5p",1,24,12,0,1,21,0,1,21,0,0,23,0,0,0,0,0
    "hsa-miR-1292-5p",2,27,6,0,53,0,85,0,20,24,5,14,57,2,10,95,0
    "hsa-miR-6793-5p",0,12,5,2,19,0,0,0,20,38,0,0,33,4,0,6,1
    "hsa-miR-27b-5p",13,1177,434,0,0,24,27,0,20,92,1,14,94,4,88,42,8
    "hsa-miR-6734-5p",0,5,1,0,0,0,0,0,19,0,0,0,7,0,19,0,0
    "hsa-miR-1538",1,27,18,0,0,0,18,0,19,7,0,0,32,0,7,37,0
    "hsa-miR-195-5p",18,1300,537,9,20,10,11,0,19,0,0,1,34,0,0,12,0
    "hsa-miR-4750-5p",3,38,14,26,11,0,28,1,19,54,0,12,49,8,27,115,0
    "hsa-miR-6894-5p",1,1,3,15,21,6,0,0,18,0,0,0,9,0,0,30,4
    "hsa-miR-504-5p",41,108,41,0,16,7,4,0,18,39,3,17,27,0,9,9,0
    "hsa-miR-382-3p",0,8,5,0,7,0,0,0,18,0,0,0,0,0,13,41,1
    "hsa-miR-4741",0,5,0,0,0,0,42,0,18,21,0,0,20,0,11,21,0
    "hsa-miR-363-3p",11,630,202,0,0,0,9,0,18,14,2,6,1,0,0,0,1
    "hsa-miR-6794-5p",1,9,4,55,59,20,53,1,18,77,0,17,147,3,88,57,0
    "hsa-miR-212-3p",22,269,136,8,25,7,11,0,17,47,0,0,15,2,0,59,1
    "hsa-miR-3188",12,93,30,27,0,0,47,2,17,0,3,14,13,5,0,48,0
    "hsa-miR-190a-5p",0,2441,792,0,0,2,25,0,17,0,2,0,11,0,17,42,2
    "hsa-miR-939-5p",1,71,15,11,0,0,7,0,17,0,0,0,13,0,0,11,0
    "hsa-miR-4746-5p",5,208,65,16,58,29,22,0,17,55,4,50,69,2,144,102,7
    "hsa-miR-3613-3p",1,119,35,12,42,19,16,2,16,28,1,26,0,1,10,0,5
    "hsa-miR-3682-3p",0,4,0,0,0,0,0,0,16,0,0,0,5,0,0,0,0
    "hsa-miR-505-5p",4,111,48,23,10,8,25,3,16,17,0,13,35,1,19,0,0
    "hsa-miR-3130-5p",0,10,3,0,0,0,0,0,16,0,0,0,18,0,15,0,0
    "hsa-miR-4738-3p",0,0,0,0,24,0,24,0,16,0,1,0,0,0,0,0,0
    "hsa-miR-4646-5p",0,10,0,0,24,0,16,1,16,7,1,0,42,1,11,9,3
    "hsa-miR-6867-5p",2,9,0,0,0,0,28,0,16,7,2,11,42,0,12,31,0
    "hsa-miR-7111-3p",1,5,6,10,0,0,18,0,15,0,0,0,14,0,0,0,0
    "hsa-miR-127-3p",0,36,12,13,19,6,20,0,15,8,14,36,5,3,57,24,5
    "hsa-miR-6726-5p",0,0,3,15,49,0,0,0,15,24,0,13,46,0,32,48,0
    "hsa-miR-1225-3p",0,1,0,0,0,0,0,0,15,0,0,0,0,0,0,0,0
    "hsa-miR-590-3p",0,463,234,0,0,21,10,0,15,13,2,10,0,0,0,9,1
    "hsa-miR-4660",2,8,1,0,0,0,0,0,15,0,0,0,0,0,0,0,0
    "hsa-miR-6738-3p",0,0,0,0,0,0,0,0,14,0,0,0,0,0,0,0,0
    "hsa-miR-1307-5p",19,354,185,14,54,23,66,0,14,71,0,59,86,2,175,68,4
    "hsa-miR-629-3p",0,12,11,0,0,0,10,0,14,0,0,4,11,0,0,3,0
    "hsa-miR-6862-5p",2,6,4,17,19,0,31,0,14,36,0,0,51,0,31,42,0
    "hsa-miR-412-5p",0,1,1,4,28,0,7,0,14,34,0,0,18,0,0,40,0
    "hsa-miR-1253",1,1,0,0,9,0,13,0,14,0,0,0,7,0,29,8,0
    "hsa-miR-98-3p",1,423,133,0,34,9,31,0,14,0,0,11,14,0,4,0,0
    "hsa-miR-33b-5p",14,0,0,10,15,22,35,0,14,42,0,25,21,0,21,27,0
    "hsa-miR-6877-5p",6,91,34,5,30,7,69,0,14,46,0,0,46,1,37,45,0
    "hsa-miR-664a-3p",0,89,27,20,0,0,0,0,14,14,0,0,7,0,0,0,0
    "hsa-miR-4707-3p",2,56,32,33,15,0,84,0,14,0,2,7,73,5,42,101,0
    "hsa-miR-206",69,81,28,280,125,56,142,2,14,135,4,115,649,4,371,329,7
    "hsa-miR-943",1,51,32,0,21,0,0,0,13,0,0,0,0,0,0,0,0
    "hsa-miR-3138",0,0,0,0,0,0,16,0,13,15,0,0,0,0,21,0,0
    "hsa-miR-26b-3p",3,210,77,12,0,0,0,8,13,7,0,23,18,0,45,0,1
    "hsa-miR-152-3p",10,854,328,8,51,0,24,3,13,33,5,17,67,0,0,88,9
    "hsa-miR-487b-3p",0,42,11,0,0,0,33,0,13,0,0,13,16,0,0,22,0
    "hsa-miR-99b-3p",0,3,0,0,0,0,0,0,13,6,2,0,0,0,0,0,0
    "hsa-miR-195-3p",10,477,159,0,106,0,14,0,13,19,3,0,18,0,19,21,0
    "hsa-miR-942-5p",4,133,32,17,0,11,10,0,13,0,1,0,49,0,0,5,0
    "hsa-miR-155-5p",8,128,33,41,23,49,13,0,13,144,36,0,78,1,78,61,18
    "hsa-miR-1908-3p",1,14,9,0,0,0,25,0,13,0,0,0,11,0,0,5,0
    "hsa-miR-3180-5p",0,4,1,0,0,0,0,0,13,0,0,0,0,0,0,0,0
    "hsa-miR-4763-3p",1,0,0,0,4,0,0,0,13,8,0,14,0,0,0,0,0
    "hsa-miR-4466",0,6,1,0,0,0,0,0,13,0,0,0,0,0,0,0,0
    "hsa-miR-4520-3p",0,22,14,30,19,13,59,5,12,17,9,15,134,7,127,94,4
    "hsa-miR-3918",0,7,1,12,0,0,11,0,12,28,2,0,10,0,32,24,0
    "hsa-miR-6812-5p",0,0,2,0,0,16,0,0,12,0,1,9,8,0,6,0,0
    "hsa-miR-501-5p",24,321,121,0,16,0,21,0,12,0,0,21,50,0,0,85,4
    "hsa-miR-6788-5p",0,9,1,5,8,0,0,2,12,19,0,0,3,0,9,7,0
    "hsa-let-7f-2-3p",0,334,104,8,13,0,10,0,12,26,4,39,33,3,55,19,0
    "hsa-miR-5002-5p",1,11,2,0,0,17,0,0,12,0,0,0,0,0,0,0,0
    "hsa-miR-4421",6,173,71,0,7,16,17,0,12,67,0,0,101,0,40,60,0
    "hsa-miR-3691-3p",0,9,3,0,0,0,0,0,12,20,2,0,0,0,0,12,0
    "hsa-miR-654-3p",4,44,22,14,13,13,33,0,11,21,0,4,32,0,31,27,1
    "hsa-miR-6785-5p",1,0,0,0,18,0,0,0,11,11,0,0,4,0,0,0,0
    "hsa-miR-96-5p",7,3263,1378,0,40,41,28,11,11,16,3,19,8,3,36,46,1
    "hsa-miR-616-3p",0,13,4,0,10,0,43,0,11,67,0,0,76,3,39,96,0
    "hsa-miR-503-5p",2,81,27,0,0,0,10,0,11,0,0,0,0,0,0,1,0
    "hsa-miR-500a-5p",6,242,80,0,0,0,0,0,11,0,0,0,0,0,9,0,0
    "hsa-miR-222-3p",0,0,0,0,19,15,14,0,11,0,1,0,21,0,0,0,2
    "hsa-miR-2116-3p",1,19,6,0,0,0,0,0,11,0,0,0,0,0,0,6,0
    "hsa-miR-1233-3p",0,3,1,0,0,0,0,0,11,0,0,0,0,0,0,0,0
    "hsa-miR-5188",0,0,2,0,0,0,0,0,11,0,0,0,0,0,0,0,0
    "hsa-miR-4423-5p",0,0,1,0,0,0,0,0,11,0,0,0,7,0,0,0,0
    "hsa-miR-6765-5p",0,0,0,0,0,0,0,0,11,0,0,0,17,0,0,0,0
    "hsa-miR-139-5p",31,549,271,10,110,15,35,1,11,2,4,2,30,3,0,45,8
    "hsa-miR-3663-3p",0,9,1,9,17,0,28,0,10,0,0,14,18,0,17,16,0
    "hsa-miR-1255b-5p",0,13,8,0,6,0,21,0,10,24,0,0,0,6,27,20,0
    "hsa-miR-1910-5p",3,3,1,8,0,0,31,0,10,14,0,0,10,0,0,59,0
    "hsa-miR-4647",2,8,0,0,18,0,29,0,10,29,0,27,77,0,64,57,6
    "hsa-miR-1343-3p",0,26,10,0,0,0,0,0,10,0,0,0,0,0,0,8,0
    "hsa-miR-106a-5p",14,1490,566,2,12,4,20,0,10,16,2,2,4,0,10,29,2
    "hsa-miR-378i",7,0,0,14,66,3,180,0,10,51,0,14,120,0,73,245,0
    "hsa-miR-3691-5p",0,83,32,26,0,0,0,0,9,10,1,0,5,0,3,17,0
    "hsa-miR-6855-5p",1,2,1,0,0,0,0,0,9,0,0,0,0,0,0,0,0
    "hsa-miR-3679-5p",6,33,14,4,28,0,15,0,9,8,0,0,54,1,48,81,0
    "hsa-miR-4748",1,4,1,31,0,1,30,2,9,9,0,0,35,0,24,14,0
    "hsa-miR-223-5p",0,0,0,0,0,0,0,0,9,0,0,0,0,0,0,0,0
    "hsa-miR-5581-3p",0,8,3,0,0,0,0,0,9,3,0,0,0,0,11,0,0
    "hsa-miR-4706",0,0,2,0,18,0,23,0,9,0,0,18,27,0,0,0,0
    "hsa-miR-2110",9,107,35,19,60,5,39,0,9,3,2,13,51,2,84,29,4
    "hsa-miR-1305",0,63,16,0,0,0,0,0,9,0,0,0,0,0,0,0,0
    "hsa-miR-1286",3,10,7,0,28,1,13,0,9,11,0,1,27,1,19,28,0
    "hsa-miR-4747-5p",0,0,2,0,18,0,3,0,9,19,0,3,28,0,14,5,6
    "hsa-miR-17-3p",10,289,119,0,0,0,0,0,9,0,0,19,27,1,15,25,0
    "hsa-miR-365a-5p",2,112,20,12,7,11,8,2,9,11,7,0,36,5,15,27,8
    "hsa-miR-133b",4,72,34,0,22,8,51,0,8,32,1,18,60,0,25,32,1
    "hsa-miR-150-5p",2,42,12,10,22,29,0,0,8,0,0,18,0,1,14,6,0
    "hsa-miR-889-3p",1,5,7,0,0,0,0,0,8,7,0,0,0,0,8,0,0
    "hsa-miR-30b-3p",29,535,167,43,0,12,25,0,8,0,0,0,39,0,48,8,20
    "hsa-miR-4707-5p",0,28,8,0,25,0,0,0,8,0,0,8,0,0,0,19,0
    "hsa-miR-6802-5p",1,7,1,0,0,0,0,0,8,0,0,0,0,0,6,6,0
    "hsa-miR-4648",0,9,0,0,0,9,7,0,8,0,0,0,10,0,0,16,0
    "hsa-miR-6747-5p",0,0,0,0,0,0,0,0,8,0,0,0,2,5,0,0,0
    "hsa-miR-676-3p",4,153,61,0,0,19,54,0,8,0,0,0,56,0,12,49,1
    "hsa-miR-5009-5p",0,11,0,0,13,0,0,0,8,3,0,18,11,1,0,34,0
    "hsa-miR-4749-5p",1,27,2,4,4,8,6,0,8,0,3,0,18,1,24,15,0
    "hsa-miR-1224-3p",9,95,87,18,0,41,0,0,8,0,5,22,5,0,0,8,0
    "hsa-miR-4689",1,14,0,0,45,0,0,0,8,19,0,0,30,1,21,8,2
    "hsa-miR-532-3p",32,357,124,10,24,21,48,0,8,9,3,50,109,0,84,94,3
    "hsa-miR-30c-1-3p",10,115,56,6,10,0,5,0,8,0,1,0,6,0,0,0,1
    "hsa-miR-6845-5p",0,0,0,0,0,0,0,0,7,0,0,0,0,0,0,5,0
    "hsa-miR-6871-5p",0,14,5,0,0,5,23,0,7,11,0,0,21,6,11,31,0
    "hsa-miR-212-5p",7,237,86,0,0,0,0,0,7,9,0,0,0,0,0,0,0
    "hsa-miR-6859-5p",0,6,1,0,0,0,5,2,7,0,3,0,14,2,10,16,0
    "hsa-miR-185-3p",4,73,16,0,0,1,9,1,7,0,0,0,6,0,0,15,0
    "hsa-miR-6882-5p",0,44,24,0,16,0,6,0,7,13,0,0,8,0,0,22,0
    "hsa-miR-1250-5p",1,89,29,0,5,0,10,0,7,0,0,0,0,0,0,6,0
    "hsa-miR-6500-3p",2,31,17,0,0,0,0,0,7,0,0,0,0,2,16,4,0
    "hsa-miR-543",3,31,7,12,0,11,14,0,7,28,7,0,34,1,59,14,0
    "hsa-miR-6817-3p",2,1,1,10,23,0,7,0,6,0,0,5,13,0,0,0,0
    "hsa-miR-6803-3p",1,45,15,0,0,11,10,0,6,0,0,0,0,0,22,12,0
    "hsa-miR-4668-5p",0,7,1,0,0,0,0,0,6,0,0,0,0,0,0,0,0
    "hsa-miR-7111-5p",0,0,0,0,11,0,0,0,6,0,0,0,0,0,10,0,0
    "hsa-miR-6825-5p",0,3,1,5,7,0,0,0,6,52,0,0,13,2,25,8,0
    "hsa-miR-3944-3p",0,6,4,0,0,0,12,0,6,0,0,0,0,0,0,0,0
    "hsa-miR-15b-3p",6,538,204,0,1,9,10,0,6,0,1,0,20,0,6,23,3
    "hsa-miR-323a-3p",0,4,0,0,0,0,0,0,5,0,0,0,0,2,0,0,0
    "hsa-miR-26a-2-3p",0,114,47,4,0,0,0,0,5,0,0,0,0,0,13,0,0
    "hsa-miR-1275",9,13,1,0,9,0,19,0,5,17,0,0,12,0,20,7,0
    "hsa-miR-7977",1,0,0,0,0,0,0,0,5,0,0,0,0,0,0,0,0
    "hsa-miR-1343-5p",0,1,2,2,0,0,0,0,5,19,0,0,4,0,0,26,0
    "hsa-miR-3159",0,55,20,0,0,0,16,2,5,14,0,0,0,0,0,16,0
    "hsa-miR-194-3p",1,32,12,0,0,0,0,0,5,0,0,0,0,0,0,15,0
    "hsa-mir-378c",1,18,14,49,18,20,77,0,5,13,0,18,40,1,175,755,0
    "hsa-miR-204-3p",0,0,0,15,0,0,12,0,4,18,0,10,0,0,0,12,0
    "hsa-miR-4467",1,12,5,0,0,0,0,0,4,0,0,0,0,0,0,0,0
    "hsa-miR-3115",4,16,4,0,0,1,33,0,4,13,0,0,15,0,0,23,0
    "hsa-miR-6128",0,0,0,3,21,2,50,0,4,13,0,4,39,0,23,66,0
    "hsa-miR-378d",1,17,8,5,15,1,58,1,4,14,0,5,46,0,17,75,0
    "hsa-miR-6747-3p",2,35,12,0,0,0,0,0,3,0,0,0,0,0,0,0,0
    "hsa-miR-664a-5p",2,2,3,0,0,0,1,0,3,0,0,0,5,0,24,2,0
    "hsa-miR-99a-5p",0,1,0,0,0,0,0,0,3,0,0,0,0,0,0,0,0
    "hsa-miR-320e",3,6,4,7,5,0,20,0,3,13,0,0,22,2,15,33,0
    "hsa-miR-1290",0,1,1,4,35,2,10,0,3,9,0,4,14,0,5,9,0
    "hsa-miR-4687-3p",1,14,6,1,0,0,7,0,3,0,0,0,16,0,24,0,0
    "hsa-miR-6838-5p",0,0,0,0,0,0,0,0,3,0,0,0,0,0,0,0,0
    "hsa-miR-6807-5p",0,10,0,0,0,0,45,0,2,0,0,0,0,0,19,0,0
    "hsa-miR-6511a-5p|hsa-miR-6511b-5p",4,88,57,5,15,0,15,1,2,32,0,18,22,3,11,8,0
    "hsa-miR-3064-5p",2,25,10,0,0,0,4,0,2,0,0,0,0,0,0,0,0
    "hsa-miR-374a-3p",2,2288,964,0,13,60,8,2,2,0,1,30,18,2,31,6,3
    "hsa-miR-378g",2,18,6,6,16,0,85,1,2,2,1,0,62,4,23,80,0
    "hsa-miR-4429",0,0,0,1,4,0,5,0,2,1,0,2,7,0,7,19,0
    "hsa-miR-144-3p",0,0,0,0,0,0,21,0,2,12,0,17,0,0,0,0,0
    "hsa-let-7c",1,15,10,0,0,0,0,0,2,1,0,0,0,0,0,5,0
    "hsa-miR-7974",14,878,330,0,0,0,0,0,1,0,0,0,0,0,0,0,0
    "hsa-miR-1271-5p",0,36,16,0,0,0,0,0,1,0,0,0,1,0,0,0,0
    "hsa-miR-654-5p",5,65,24,0,8,9,0,0,1,0,6,0,6,0,0,23,0
    "hsa-miR-2467-5p",0,318,125,3,18,18,10,0,1,28,0,3,31,3,15,1,0
    "hsa-miR-1244",0,1,0,0,0,0,0,0,1,0,0,0,1,0,0,0,0
    "hsa-miR-373-5p",0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0
    "hsa-miR-550a-3p",8,141,61,0,14,0,0,0,1,0,0,0,25,0,18,18,0
    "hsa-miR-6511a-5p",0,10,3,18,0,11,0,0,1,0,0,0,0,0,0,11,0
    "hsa-miR-378e",1,11,12,2,13,0,20,0,1,4,0,0,24,0,21,52,0
    "hsa-miR-378h",1,2,1,5,9,0,28,0,1,11,1,3,20,0,13,33,0
    "hsa-miR-374c-5p",0,4,1,0,0,0,0,0,1,0,0,0,0,0,0,0,0
    "hsa-miR-4318",0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
    "hsa-miR-548i",0,20,4,0,0,0,0,0,0,0,0,0,0,0,0,0,0
    "hsa-miR-4520-2-3p",0,4,1,0,0,0,0,0,0,0,0,0,6,0,31,4,0
    "hsa-miR-215-5p",2,13,6,0,0,3,2,0,0,4,0,2,2,0,10,2,0
    "hsa-mir-196a-2",0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
    "hsa-miR-550a-3-5p",1,4,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0
    "hsa-miR-548av-5p",0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
    "hsa-mir-107",0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
    "hsa-mir-28",0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
    "hsa-miR-373-3p",0,2,2,22,35,0,346,0,0,136,1,0,164,0,0,934,2
    "hsa-miR-371a-5p",0,2,0,8,15,0,28,0,0,136,0,0,9,0,17,88,3
    "hsa-miR-4661-5p",0,31,10,0,0,15,0,0,0,61,0,0,18,0,0,30,0
    "hsa-miR-765",0,6,10,0,0,17,0,0,0,53,0,0,24,0,0,8,0
    "hsa-miR-6777-5p",3,11,11,68,69,11,47,12,0,47,4,1,66,7,60,42,0
    "hsa-miR-29a-3p",12,902,352,0,0,0,25,5,0,45,16,0,24,3,0,21,0
    "hsa-miR-27a-5p",0,496,187,0,0,14,0,0,0,45,1,0,0,1,17,26,1
    "hsa-miR-4664-3p",4,48,26,0,9,0,29,0,0,40,3,0,8,2,0,54,0
    "hsa-miR-6767-5p",2,42,2,24,0,0,54,7,0,37,2,0,65,8,76,76,10
    "hsa-miR-125b-2-3p",1,384,152,0,35,18,1,0,0,36,3,13,29,0,16,45,7
    "hsa-miR-378a-5p",7,254,65,0,0,6,7,0,0,36,0,0,1,0,19,9,1
    "hsa-miR-744-3p",1,47,26,0,45,0,24,0,0,35,0,18,33,4,8,15,2
    "hsa-miR-6780a-5p",0,16,5,3,14,0,11,1,0,33,1,0,44,6,0,15,14
    "hsa-miR-10b-3p",10,101,32,22,19,20,86,0,0,30,0,0,15,0,10,52,0
    "hsa-miR-4511",0,23,7,0,0,0,0,0,0,30,0,0,26,0,0,7,0
    "hsa-miR-1306-5p",15,89,22,14,46,8,7,0,0,28,0,23,32,2,38,11,8
    "hsa-miR-6511a-3p",5,155,39,18,0,0,2,0,0,27,3,0,4,0,0,0,1
    "hsa-miR-365b-5p",0,4,8,7,16,0,0,0,0,27,3,0,16,0,13,11,5
    "hsa-miR-6884-5p",0,3,2,0,0,0,0,0,0,26,0,17,11,0,0,0,0
    "hsa-miR-1287-5p",6,133,48,19,14,0,55,0,0,26,1,16,60,0,24,46,1
    "hsa-miR-6805-5p",0,3,6,0,9,8,0,0,0,24,0,0,0,0,0,0,4
    "hsa-miR-28-3p",3,97,52,22,20,13,47,1,0,23,15,28,79,12,31,58,17
    "hsa-miR-548b-5p",6,158,63,0,0,0,9,0,0,23,0,15,35,0,0,7,0
    "hsa-miR-548l",1,77,26,0,8,15,0,0,0,22,2,19,0,0,7,18,0
    "hsa-miR-125b-5p",3,125,51,20,0,0,0,0,0,21,2,0,5,0,0,2,0
    "hsa-miR-6751-5p",1,1,1,3,0,0,0,0,0,20,1,0,10,0,0,0,0
    "hsa-miR-3934-5p",3,149,66,4,0,13,7,6,0,20,6,18,27,9,38,38,0
    "hsa-miR-548av-3p|hsa-miR-548o-3p",3,137,47,0,0,26,17,0,0,19,0,0,4,2,0,0,0
    "hsa-miR-493-5p",0,6,3,0,41,0,3,0,0,19,0,0,25,4,64,34,0
    "hsa-miR-548u",0,20,8,0,0,0,0,0,0,19,0,0,0,0,0,0,0
    "hsa-miR-3929",0,42,10,0,7,0,3,0,0,18,0,0,16,0,0,19,0
    "hsa-miR-3133",0,6,0,0,0,0,0,0,0,18,0,0,0,0,0,0,0
    "hsa-miR-548e-3p",0,30,12,0,0,0,0,0,0,18,0,0,0,0,0,0,0
    "hsa-miR-548b-3p",0,46,12,0,0,0,0,0,0,18,0,0,0,0,5,11,0
    "hsa-miR-4665-5p",1,3,2,0,11,0,0,0,0,17,1,0,9,1,0,13,0
    "hsa-miR-194-5p",4,702,285,7,0,0,0,0,0,17,1,23,0,0,0,11,2
    "hsa-miR-1273d",0,8,5,0,0,0,0,0,0,17,0,0,5,0,0,0,0
    "hsa-miR-372-3p",0,0,1,0,0,0,0,0,0,17,0,0,0,0,0,20,0
    "hsa-miR-199a-3p|hsa-miR-199b-3p",2,72,31,8,0,21,26,0,0,17,0,15,8,0,47,32,0
    "hsa-miR-641",5,417,159,0,20,40,17,0,0,17,0,0,44,0,60,16,10
    "hsa-miR-628-3p",1,44,20,0,17,0,0,0,0,17,0,0,0,0,0,0,0
    "hsa-let-7f-1-3p",1,60,28,0,3,0,0,0,0,16,0,9,0,0,18,0,0
    "hsa-miR-323b-3p",0,0,1,0,13,0,0,0,0,16,0,6,14,0,0,25,0
    "hsa-miR-3173-3p",1,8,1,0,0,3,0,0,0,16,0,0,0,0,30,0,1
    "hsa-miR-6832-5p",1,31,7,14,0,0,0,0,0,15,2,0,16,1,59,15,3
    "hsa-miR-3938",2,95,19,0,0,0,10,0,0,15,12,0,31,0,11,9,9
    "hsa-miR-3198",0,3,8,0,0,0,35,0,0,15,5,10,28,0,38,0,0
    "hsa-miR-6778-5p",0,0,0,0,0,0,0,0,0,15,0,0,3,0,0,0,0
    "hsa-miR-3065-5p",16,943,345,9,0,66,26,0,0,15,6,36,72,1,91,48,6
    "hsa-miR-3911",2,27,14,0,0,0,18,0,0,15,0,0,18,0,6,0,0
    "hsa-miR-3620-3p",0,6,5,0,0,0,0,0,0,15,0,0,0,0,0,0,0
    "hsa-miR-6799-5p",0,0,0,5,0,0,0,0,0,14,0,0,0,0,0,0,0
    "hsa-miR-6765-3p",2,11,2,0,0,8,0,0,0,14,0,0,6,0,0,0,0
    "hsa-miR-5189-5p",0,0,0,0,0,0,0,0,0,14,2,0,19,2,12,0,0
    "hsa-miR-449a",0,122,49,0,0,0,4,0,0,14,0,13,0,0,0,0,0
    "hsa-miR-346",2,60,19,0,0,0,0,0,0,14,0,0,32,0,22,3,2
    "hsa-miR-5088-5p",0,6,0,0,0,0,16,0,0,14,3,0,37,0,0,0,0
    "hsa-miR-1277-5p",0,234,109,0,0,0,0,0,0,13,0,0,10,0,0,0,0
    "hsa-miR-642a-5p",4,107,47,0,0,0,0,0,0,13,2,0,12,0,0,14,0
    "hsa-miR-5001-5p",1,12,4,0,50,0,67,0,0,13,5,0,21,1,24,68,0
    "hsa-miR-184",0,6,2,6,32,5,19,1,0,13,0,0,11,1,1,23,8
    "hsa-miR-6801-5p",0,5,2,0,0,0,3,0,0,13,0,0,0,0,19,0,0
    "hsa-miR-5683",0,40,14,0,0,0,0,0,0,13,0,0,0,0,0,0,0
    "hsa-miR-6800-5p",0,0,2,11,0,28,27,0,0,12,0,17,8,0,0,0,0
    "hsa-miR-152-5p",0,23,3,0,0,0,9,0,0,12,0,0,0,0,0,0,0
    "hsa-miR-4783-3p",0,12,10,0,0,0,0,0,0,12,0,0,0,0,0,11,1
    "hsa-miR-6780b-5p",0,0,0,0,0,0,0,0,0,12,0,0,0,0,0,0,0
    "hsa-miR-6511b-3p",11,202,61,13,11,0,5,0,0,12,2,14,31,0,17,36,10
    "hsa-miR-485-3p",4,7,13,0,0,0,14,0,0,12,2,0,0,4,0,0,0
    "hsa-miR-4658",0,0,0,0,0,0,0,0,0,12,0,0,0,0,0,0,0
    "hsa-miR-5699-3p",0,2,1,0,0,0,12,0,0,11,0,0,0,0,0,0,0
    "hsa-miR-6764-5p",6,54,22,0,0,0,35,0,0,11,0,0,14,0,0,40,0
    "hsa-miR-5697",0,3,0,0,0,0,0,0,0,11,0,0,0,0,0,0,0
    "hsa-miR-365a-3p|hsa-miR-365b-3p",0,542,170,8,0,0,9,0,0,11,1,4,1,0,13,0,0
    "hsa-miR-6755-5p",0,9,7,0,0,0,4,0,0,11,0,0,11,0,0,0,0
    "hsa-miR-3124-5p",4,15,5,0,21,7,6,0,0,11,0,0,20,2,27,26,0
    "hsa-miR-2276-3p",0,1,3,0,0,0,0,0,0,10,0,0,8,0,13,14,0
    "hsa-miR-6790-5p",0,0,1,0,0,0,0,0,0,10,0,13,15,0,0,12,0
    "hsa-miR-615-3p",23,446,190,0,5,0,32,0,0,10,0,38,53,1,0,45,0
    "hsa-miR-183-3p",12,374,85,0,0,9,0,0,0,10,2,12,0,0,16,0,0
    "hsa-miR-551b-3p",9,241,88,8,0,8,10,0,0,10,0,0,0,0,0,11,1
    "hsa-miR-301a-5p",1,398,153,0,1,0,0,0,0,10,0,0,0,0,16,7,0
    "hsa-miR-6727-5p",0,1,2,0,8,0,0,0,0,10,0,0,2,0,37,6,0
    "hsa-miR-141-5p",4,905,355,1,0,0,10,0,0,10,6,20,0,0,0,0,2
    "hsa-miR-1306-3p",19,141,42,0,0,14,52,0,0,9,0,0,53,0,52,37,7
    "hsa-miR-656-3p",0,0,0,0,0,0,0,0,0,9,0,0,0,0,0,0,0
    "hsa-miR-1226-3p",7,98,45,6,0,0,4,0,0,9,0,0,0,2,0,3,0
    "hsa-miR-2278",0,1,1,0,0,0,0,0,0,9,0,0,0,0,0,0,0
    "hsa-miR-1237-3p",8,16,3,7,0,0,6,0,0,9,0,0,9,0,6,13,0
    "hsa-miR-636",1,12,2,0,8,0,0,0,0,8,0,0,0,0,0,0,0
    "hsa-miR-1225-5p",0,0,0,0,0,0,0,0,0,8,0,0,5,0,4,2,0
    "hsa-miR-6763-5p",0,0,0,1,4,0,0,0,0,8,0,0,6,0,28,17,0
    "hsa-miR-6738-5p",0,0,0,0,0,0,0,0,0,8,0,0,5,0,0,0,0
    "hsa-miR-6819-5p",0,0,2,0,0,0,0,0,0,7,0,0,0,0,0,0,0
    "hsa-miR-203a-3p",0,122,46,0,0,0,0,0,0,7,0,0,7,0,2,15,0
    "hsa-miR-5698",0,11,1,0,0,1,7,0,0,7,3,0,9,0,0,7,0
    "hsa-miR-3173-5p",2,12,8,0,0,3,11,0,0,7,2,0,7,0,0,9,0
    "hsa-miR-6777-3p",9,23,5,0,6,0,6,0,0,7,0,0,0,0,0,14,0
    "hsa-miR-378f",0,16,6,2,7,1,23,0,0,6,0,1,8,1,7,16,0
    "hsa-miR-6834-5p",0,0,0,0,0,0,0,0,0,6,0,0,0,0,0,0,0
    "hsa-miR-8072",0,2,1,1,0,12,0,0,0,6,0,11,0,0,0,14,0
    "hsa-miR-4745-5p",2,1,1,0,0,7,0,0,0,5,0,0,4,0,0,0,0
    "hsa-miR-6887-3p",0,0,0,0,0,0,0,0,0,4,0,0,0,0,0,0,0
    "hsa-miR-5089-5p",0,7,1,0,0,0,0,0,0,4,0,0,0,0,0,0,0
    "hsa-mir-320b-1",0,0,0,0,3,0,10,0,0,3,0,0,10,0,4,5,0
    "hsa-mir-320a",0,0,0,0,3,0,9,0,0,3,0,0,10,0,8,4,0
    "hsa-miR-33a-3p",1,6,5,0,0,0,0,0,0,3,0,0,0,0,0,0,0
    "hsa-miR-876-3p",4,60,28,0,16,0,0,0,0,3,0,0,13,0,3,1,0
    "hsa-miR-378b",0,1,0,2,2,0,10,0,0,3,0,1,4,0,2,15,0
    "hsa-miR-3180-3p",0,0,0,0,0,0,1,0,0,3,0,0,2,0,0,9,0
    "hsa-miR-6842-3p",2,22,19,0,0,0,0,0,0,3,0,0,0,0,0,0,0
    "hsa-miR-6741-3p",0,42,9,0,0,0,0,0,0,3,8,0,0,0,0,5,1
    "hsa-mir-320b-2",0,0,0,1,3,0,26,0,0,3,0,0,19,0,8,2,0
    "hsa-miR-6849-5p",0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0
    "hsa-miR-6740-5p",0,5,0,17,17,1,17,0,0,2,0,0,30,2,0,30,0
    "hsa-miR-378c",0,20,8,0,2,2,19,0,0,2,0,0,2,0,0,31,0
    "hsa-miR-30d-3p",3,657,238,0,15,0,18,0,0,2,0,0,10,2,1,21,0
    "hsa-let-7g-3p",0,53,30,0,0,0,0,0,0,1,0,18,12,0,0,0,0
    "hsa-let-7a-3",0,3,1,0,0,0,0,0,0,0,0,0,3,0,0,0,0
    "hsa-miR-374a-5p",3,864,413,0,0,0,0,0,0,0,0,0,5,0,0,12,0
    "hsa-mir-320e",0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
    "hsa-mir-320c-2",0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
    "hsa-miR-483-3p",0,250,108,0,0,19,8,1,0,0,8,0,0,0,9,0,0
    "hsa-miR-3162-5p",0,0,0,0,0,0,0,0,0,0,6,0,23,0,0,10,0
    "hsa-miR-598-3p",1,90,39,0,50,0,42,2,0,0,5,13,0,0,0,60,0
    "hsa-miR-6511b-5p",0,27,6,6,0,6,26,1,0,0,5,11,0,0,8,0,0
    "hsa-miR-6794-3p",0,9,0,0,0,0,0,0,0,0,4,0,3,0,0,29,0
    "hsa-miR-4449",1,5,3,0,11,12,0,0,0,0,3,0,44,1,23,11,2
    "hsa-miR-1289",0,73,19,10,0,14,13,0,0,0,3,0,16,0,8,0,0
    "hsa-miR-4525",0,16,0,0,0,0,0,3,0,0,3,0,13,8,0,0,0
    "hsa-miR-6515-3p",0,6,2,0,0,0,0,0,0,0,3,12,0,0,0,0,0
    "hsa-miR-134-5p",6,150,36,10,15,1,36,1,0,0,3,23,48,0,20,33,0
    "hsa-miR-6812-3p",5,10,5,0,0,13,0,0,0,0,3,0,0,0,0,0,0
    "hsa-miR-3913-5p",0,46,21,0,0,0,0,0,0,0,3,0,6,0,0,0,0
    "hsa-miR-219a-5p",0,106,21,0,10,0,0,0,0,0,2,0,0,0,20,12,2
    "hsa-miR-3065-3p",6,475,158,0,0,0,18,0,0,0,2,0,0,2,0,31,3
    "hsa-miR-1229-3p",0,21,6,0,0,0,0,0,0,0,2,0,11,0,0,11,0
    "hsa-miR-34c-3p",0,0,0,0,0,0,0,0,0,0,2,0,17,0,0,11,0
    "hsa-miR-548ap-5p|hsa-miR-548j-5p",0,37,10,0,13,0,0,0,0,0,2,0,0,0,0,0,0
    "hsa-miR-5699-5p",1,19,4,11,0,0,0,0,0,0,2,0,0,0,28,27,0
    "hsa-miR-6788-3p",1,36,13,0,0,0,0,0,0,0,2,0,0,0,0,0,0
    "hsa-miR-8061",0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,25,0
    "hsa-miR-3187-5p",0,12,4,0,0,0,0,0,0,0,2,0,0,0,0,5,0
    "hsa-miR-628-5p",2,199,64,0,0,0,0,0,0,0,2,0,0,1,10,12,4
    "hsa-miR-132-5p",7,304,90,0,27,2,10,0,0,0,2,12,0,0,0,0,0
    "hsa-miR-6873-3p",0,7,2,0,0,0,0,0,0,0,1,0,0,0,0,0,0
    "hsa-miR-200c-5p",0,67,29,0,0,0,14,0,0,0,1,0,0,0,0,0,0
    "hsa-miR-766-3p",2,203,48,0,0,19,33,0,0,0,1,19,20,0,0,13,0
    "hsa-miR-1976",0,39,18,0,21,0,0,0,0,0,1,0,0,0,0,11,0
    "hsa-miR-6807-3p",3,16,4,0,0,0,0,0,0,0,1,0,0,0,0,15,0
    "hsa-miR-485-5p",0,10,4,0,16,0,0,0,0,0,1,0,0,0,0,14,4
    "hsa-miR-548k",0,12,5,0,1,0,0,0,0,0,1,0,0,0,0,0,0
    "hsa-miR-874-3p",0,14,4,0,0,0,5,0,0,0,1,0,0,0,0,10,0
    "hsa-miR-4745-3p",2,7,3,0,0,0,0,0,0,0,1,0,0,0,0,0,0
    "hsa-miR-7705",0,33,9,0,0,0,0,0,0,0,1,0,0,0,0,0,0
    "hsa-miR-4793-3p",1,37,35,0,16,0,0,0,0,0,1,0,0,2,0,11,0
    "hsa-miR-576-5p",2,130,52,0,15,0,0,0,0,0,1,0,9,0,16,0,0
    "hsa-miR-6728-5p",0,13,8,20,0,0,0,0,0,0,1,0,0,0,0,11,0
    "hsa-miR-362-5p",33,2841,566,16,4,17,0,2,0,0,1,0,41,1,0,26,0
    "hsa-miR-9-3p",2,366,154,0,10,0,0,0,0,0,1,0,8,3,24,26,0
    "hsa-miR-548ad-5p|hsa-miR-548ae-5p|hsa-miR-548ay-5p",2,16,2,0,0,0,0,0,0,0,1,0,0,0,0,0,0
    "hsa-miR-551b-5p",15,232,86,7,13,0,46,0,0,0,0,43,34,0,0,59,0
    "hsa-miR-4521",14,1333,438,0,0,0,2,0,0,0,0,0,0,1,0,0,0
    "hsa-miR-497-5p",13,507,191,0,0,0,22,0,0,0,0,0,0,0,0,0,0
    "hsa-miR-32-3p",10,56,14,0,0,0,0,0,0,0,0,0,0,0,0,0,0
    "hsa-miR-6783-3p",8,52,24,0,0,0,0,0,0,0,0,0,0,0,0,0,0
    "hsa-miR-146b-3p",8,141,55,0,0,0,0,0,0,0,0,0,7,0,16,9,2
    "hsa-miR-6720-3p",7,143,85,8,0,2,0,0,0,0,0,0,0,0,14,0,0
    "hsa-miR-182-3p",6,23,8,0,0,0,0,0,0,0,0,13,8,0,0,0,0
    "hsa-miR-424-3p",6,21,4,6,29,0,13,0,0,0,0,0,0,0,0,11,0
    "hsa-miR-6760-3p",5,4,2,0,0,10,0,0,0,0,0,0,0,0,0,0,0
    "hsa-miR-3176",4,29,17,0,0,0,0,0,0,0,0,0,0,0,0,11,0
    "hsa-miR-7155-3p",4,7,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0
    "hsa-miR-6758-5p",4,10,8,0,36,0,0,0,0,0,0,0,0,0,25,9,0
    "hsa-miR-6786-3p",4,24,4,0,0,0,0,0,0,0,0,0,0,1,15,0,0
    "hsa-miR-1254",4,71,25,9,0,15,10,0,0,0,0,3,21,1,22,0,3
    "hsa-miR-4804-5p",4,15,2,0,16,0,0,0,0,0,0,8,0,2,0,0,0
    "hsa-miR-188-5p",4,151,85,7,0,0,1,0,0,0,0,0,4,0,0,0,0
    "hsa-miR-92a-2-5p",3,47,17,0,0,0,0,0,0,0,0,0,0,0,0,0,0
    "hsa-miR-32-5p",3,28,24,0,0,0,0,0,0,0,0,0,0,0,0,0,0
    "hsa-miR-24-2-5p",3,338,140,2,0,13,21,0,0,0,0,0,0,0,20,0,0
    "hsa-miR-2355-3p",3,25,8,0,0,0,0,0,0,0,0,0,0,0,0,0,0
    "hsa-miR-1303",3,39,10,0,0,0,0,0,0,0,0,0,0,1,19,16,0
    "hsa-miR-193a-5p",3,40,12,5,0,0,0,0,0,0,0,0,12,0,8,0,0
    "hsa-miR-3651",3,16,9,6,9,0,17,0,0,0,0,0,0,0,0,2,0
    "hsa-miR-1260b",3,0,0,4,0,0,1,0,0,0,0,0,6,0,15,0,0
    "hsa-miR-4458",3,9,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
    "hsa-miR-1273h-3p",2,40,24,15,27,18,18,0,0,0,0,0,0,0,0,4,0
    "hsa-miR-4485-3p",2,25,1,0,0,0,0,0,0,0,0,0,0,0,14,0,0
    "hsa-miR-22-5p",2,3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
    "hsa-miR-937-5p",2,5,0,0,0,0,18,0,0,0,0,0,0,0,0,9,0
    "hsa-miR-7851-3p",2,14,8,0,0,0,19,0,0,0,0,0,0,0,18,0,0
    "hsa-miR-3653-3p",2,11,3,0,0,0,2,0,0,0,0,0,0,0,0,0,0
    "hsa-miR-876-5p",2,10,12,0,17,0,0,0,0,0,0,0,0,0,29,13,0
    "hsa-miR-3179",2,14,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
    "hsa-miR-6762-3p",2,16,13,0,0,0,0,0,0,0,0,0,0,0,0,0,0
    "hsa-miR-138-5p",2,27,4,0,0,0,0,0,0,0,0,0,0,0,0,0,0
    "hsa-miR-1179",2,738,227,0,0,0,23,2,0,0,0,9,0,0,14,68,1
    "hsa-miR-1247-5p",2,7,1,0,0,14,9,0,0,0,0,0,0,0,0,5,0
    "hsa-miR-1227-3p",2,10,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0
    "hsa-miR-4646-3p",2,2,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0
    "hsa-miR-374b-3p",2,120,31,0,17,0,0,0,0,0,0,0,0,0,0,0,0
    "hsa-miR-1236-3p",2,0,1,0,0,0,16,0,0,0,0,0,0,0,0,0,0
    "hsa-miR-548e-5p",2,14,8,0,0,0,0,0,0,0,0,0,0,0,0,0,0
    "hsa-miR-133a-5p",2,81,24,0,0,0,0,0,0,0,0,0,0,0,0,0,0
    "hsa-miR-3912-3p",2,74,35,0,0,0,0,0,0,0,0,0,0,0,0,0,0
    "hsa-miR-4781-3p",2,59,19,0,0,0,0,0,0,0,0,0,0,0,0,0,0
    "hsa-miR-6882-3p",2,1,1,9,0,0,0,0,0,0,0,0,0,0,0,0,0
    "hsa-miR-381-3p",2,82,49,0,0,0,0,1,0,0,0,0,19,0,0,6,0
    "hsa-miR-6886-3p",2,4,0,11,0,0,0,0,0,0,0,4,0,0,0,0,0
    "hsa-miR-196a-3p",2,271,107,0,1,0,0,0,0,0,0,0,8,0,18,20,0
    "hsa-miR-34c-5p",2,30,9,0,0,0,0,0,0,0,0,0,23,0,24,8,6
    "hsa-miR-548s",1,13,3,0,0,0,0,0,0,0,0,0,0,0,0,0,0
    "hsa-miR-6800-3p",1,0,0,0,0,0,0,0,0,0,0,0,7,0,0,0,0
    "hsa-miR-4498",1,12,5,0,0,0,0,0,0,0,0,0,0,0,0,6,0
    "hsa-miR-4517",1,55,18,0,0,0,0,0,0,0,0,0,0,0,0,0,0
    "hsa-miR-6894-3p",1,6,2,6,0,0,0,0,0,0,0,0,0,0,0,0,0
    "hsa-miR-1234-3p",1,16,4,0,0,0,0,0,0,0,0,0,0,0,6,0,0
    "hsa-miR-5579-3p",1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
    "hsa-miR-887-5p",1,8,8,0,0,0,0,0,0,0,0,0,0,0,0,0,0
    "hsa-miR-4638-5p",1,9,2,0,19,0,0,0,0,0,0,0,19,0,0,4,0
    "hsa-miR-3146",1,1,3,0,0,0,0,0,0,0,0,0,0,0,0,0,0
    "hsa-miR-6840-5p",1,17,9,0,0,0,0,0,0,0,0,0,0,0,0,0,0
    "hsa-miR-7855-5p",1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
    "hsa-miR-6865-5p",1,2,2,0,0,0,0,0,0,0,0,0,6,0,0,0,0
    "hsa-miR-6766-5p",1,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0
    "hsa-miR-6820-3p",1,3,2,0,0,0,7,0,0,0,0,0,9,0,0,0,0
    "hsa-miR-3609",1,11,3,0,0,0,0,0,0,0,0,0,0,0,0,1,0
    "hsa-miR-6759-5p",1,5,3,0,0,0,0,0,0,0,0,0,4,0,0,5,0
    "hsa-miR-6808-3p",1,3,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0
    "hsa-miR-4440",1,57,11,8,0,0,1,0,0,0,0,0,7,0,0,0,0
    "hsa-miR-129-1-3p",1,10,8,0,0,0,0,0,0,0,0,0,0,0,0,0,0
    "hsa-miR-1914-3p",1,2,0,0,0,0,12,0,0,0,0,0,0,0,0,7,0
    "hsa-miR-6852-5p",1,22,6,0,0,0,0,0,0,0,0,0,5,0,0,19,0
    "hsa-miR-6769a-3p",1,1,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0
    "hsa-miR-579-5p",1,4,3,0,17,0,0,0,0,0,0,0,0,0,0,0,0
    "hsa-miR-3122",1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
    "hsa-miR-23a-5p",1,27,7,0,12,0,9,0,0,0,0,0,0,2,0,0,0
    "hsa-miR-6873-5p",1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
    "hsa-miR-301b-5p",1,30,4,0,0,0,0,0,0,0,0,0,0,0,6,0,0
    "hsa-miR-3120-5p",1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
    "hsa-miR-1185-1-3p",1,2,3,0,0,0,0,0,0,0,0,0,0,0,0,0,0
    "hsa-miR-3136-5p",1,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0
    "hsa-miR-6775-3p",1,4,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0
    "hsa-miR-610",1,5,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
    "hsa-miR-592",1,45,17,0,0,0,0,0,0,0,0,0,6,0,0,0,0
    "hsa-miR-6893-5p",1,1,2,0,0,0,4,2,0,0,0,14,0,0,0,0,0
    "hsa-miR-3936",1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,7,0
    "hsa-miR-2277-3p",1,12,5,0,12,0,14,0,0,0,0,0,0,0,0,5,0
    "hsa-miR-6515-5p",1,17,2,11,34,0,21,0,0,0,0,0,10,0,22,15,0
    "hsa-miR-1236-5p",1,0,1,4,0,0,0,0,0,0,0,0,10,0,0,8,0
    "hsa-miR-3607-3p",1,48,33,0,0,0,1,0,0,0,0,0,0,0,0,0,0
    "hsa-miR-3157-3p",1,35,6,0,0,0,9,0,0,0,0,0,0,0,19,0,4
    "hsa-miR-1257",1,1,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0
    "hsa-miR-6857-3p",1,14,7,0,0,0,0,0,0,0,0,0,0,0,0,0,0
    "hsa-miR-140-5p",1,201,72,1,0,0,0,0,0,0,0,0,0,0,22,0,0
    "hsa-miR-6751-3p",1,6,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0
    "hsa-miR-3661",1,7,3,4,0,0,0,0,0,0,0,0,30,0,0,24,0
    "hsa-miR-1909-5p",1,6,2,0,0,2,0,0,0,0,0,0,12,0,0,9,0
    "hsa-miR-1273h-5p",1,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0
    "hsa-miR-6866-5p",1,23,7,0,2,0,0,0,0,0,0,0,16,0,0,12,3
    "hsa-miR-574-5p",1,0,0,7,8,0,0,0,0,0,0,0,0,0,27,2,0
    "hsa-miR-6820-5p",1,8,4,0,0,0,8,0,0,0,0,0,11,0,0,0,0
    "hsa-miR-4717-3p",1,23,20,0,13,0,0,0,0,0,0,0,0,0,0,0,0
    "hsa-miR-6850-5p",1,5,0,0,22,1,0,0,0,0,0,0,7,0,10,0,0
    "hsa-miR-6886-5p",1,0,2,0,0,0,10,0,0,0,0,0,0,0,0,0,0
    "hsa-miR-5587-3p",1,7,4,0,0,0,0,0,0,0,0,0,0,0,0,0,0
    "hsa-miR-31-5p",1,97,25,0,0,0,0,0,0,0,0,0,17,0,0,0,0
    "hsa-miR-495-3p",1,10,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
    "hsa-miR-4708-3p",1,23,9,0,0,0,3,0,0,0,0,0,0,0,0,2,7
    "hsa-miR-4753-5p",1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
    "hsa-miR-942-3p",1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
    "hsa-miR-5010-3p",1,45,27,0,0,0,0,0,0,0,0,0,3,0,0,0,0
    "hsa-miR-491-5p",1,8,3,0,0,0,0,0,0,0,0,0,0,0,0,0,0
    "hsa-miR-6827-5p",1,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0
    "hsa-miR-101-5p",1,154,52,0,0,0,0,0,0,0,0,0,0,0,0,0,0
    "hsa-miR-6770-3p",1,10,8,0,19,0,0,0,0,0,0,0,0,0,0,0,0
    "hsa-miR-6853-3p",1,5,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0
    "hsa-miR-6895-3p",1,1,2,3,0,0,0,0,0,0,0,0,0,0,0,0,0
    "hsa-miR-3190-3p",1,13,4,0,14,0,0,0,0,0,0,0,11,0,18,13,0
    "hsa-miR-487a-5p",1,2,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0
    "hsa-miR-150-3p",1,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
    "hsa-miR-3192-5p",1,1,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0
    "hsa-miR-2355-5p",1,47,9,0,0,0,0,0,0,0,0,0,0,0,0,0,0
    "hsa-miR-4798-5p",1,18,22,0,0,0,0,0,0,0,0,0,0,0,1,0,0
    "hsa-miR-6726-3p",1,4,8,3,0,0,0,0,0,0,0,0,0,0,0,0,0
    "hsa-miR-153-3p",0,424,163,0,5,0,6,0,0,0,0,0,0,0,0,0,0
    "hsa-miR-7-1-3p",0,347,113,0,1,0,20,0,0,0,0,0,17,0,12,7,0
    "hsa-miR-338-3p",0,289,100,0,0,0,0,1,0,0,0,0,0,0,0,0,3
    "hsa-miR-29b-3p",0,98,57,0,9,0,0,0,0,0,0,0,0,0,0,0,0
    "hsa-miR-542-3p",0,93,42,0,0,0,0,0,0,0,0,18,0,0,0,0,0
    "hsa-miR-20a-3p",0,91,40,0,0,0,0,0,0,0,0,0,0,0,0,0,0
    "hsa-miR-1255a",0,81,25,0,0,0,0,1,0,0,0,0,0,0,36,0,3
    "hsa-miR-6798-3p",0,79,36,0,0,0,0,0,0,0,0,0,0,0,0,0,0
    "hsa-miR-137",0,77,34,0,0,0,0,0,0,0,0,0,8,0,0,0,0
    "hsa-miR-3677-3p",0,66,21,0,0,0,10,0,0,0,0,0,9,0,18,0,0
    "hsa-miR-550a-5p",0,56,23,0,0,0,0,1,0,0,0,0,0,0,12,0,1
    "hsa-miR-135b-5p",0,53,17,0,0,0,0,0,0,0,0,0,0,0,0,15,0
    "hsa-miR-24-1-5p",0,52,6,0,0,0,0,0,0,0,0,0,0,0,0,0,0
    "hsa-miR-6716-3p",0,51,13,0,6,0,0,0,0,0,0,0,16,0,0,0,0
    "hsa-miR-217",0,51,22,0,0,0,0,0,0,0,0,0,14,0,0,0,6
    "hsa-miR-135b-3p",0,48,15,0,0,0,0,0,0,0,0,0,0,0,0,0,0
    "hsa-miR-616-5p",0,46,11,0,4,0,13,0,0,0,0,0,8,0,0,0,2
    "hsa-miR-331-5p",0,42,10,0,0,0,0,0,0,0,0,0,0,0,12,0,0
    "hsa-miR-627-5p",0,39,16,0,0,15,0,0,0,0,0,0,0,0,0,9,0
    "hsa-miR-3940-3p",0,39,4,0,25,0,17,0,0,0,0,0,9,0,0,0,0
    "hsa-miR-5001-3p",0,37,13,0,0,0,0,0,0,0,0,0,0,0,0,11,0
    "hsa-miR-6806-3p",0,36,19,0,18,0,0,0,0,0,0,0,12,0,0,0,0
    "hsa-miR-4775",0,35,16,0,0,0,0,0,0,0,0,0,0,0,20,8,0
    "hsa-miR-675-5p",0,34,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0
    "hsa-miR-3139",0,33,15,0,0,0,0,0,0,0,0,0,0,0,0,0,0
    "hsa-miR-203b-3p",0,33,9,0,0,0,0,0,0,0,0,0,0,0,0,0,0
    "hsa-miR-1266-5p",0,32,9,0,0,0,0,0,0,0,0,22,0,0,0,36,0
    "hsa-miR-345-3p",0,31,8,0,0,0,22,0,0,0,0,0,10,0,0,11,0
    "hsa-miR-450b-5p",0,31,17,0,0,0,0,0,0,0,0,0,13,0,0,0,0
    "hsa-miR-660-3p",0,31,16,0,0,0,0,0,0,0,0,0,0,0,0,0,0
    "hsa-miR-21-3p",0,29,29,0,0,0,3,0,0,0,0,21,11,0,0,0,0
    "hsa-miR-3145-3p",0,27,16,0,0,0,0,0,0,0,0,0,0,0,0,0,0
    "hsa-miR-20b-3p",0,26,5,0,0,0,0,0,0,0,0,0,0,0,0,0,0
    "hsa-miR-424-5p",0,26,10,0,0,0,0,0,0,0,0,0,0,2,0,0,0
    "hsa-miR-18b-5p",0,24,4,0,0,0,6,0,0,0,0,0,0,0,0,4,0
    "hsa-miR-449c-5p",0,24,21,0,0,0,0,0,0,0,0,0,0,1,0,0,2
    "hsa-miR-7703",0,24,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0
    "hsa-miR-585-3p",0,23,10,0,0,0,0,0,0,0,0,0,0,0,0,9,0
    "hsa-miR-362-3p",0,23,9,0,0,0,0,0,0,0,0,0,4,0,0,0,0
    "hsa-miR-3120-3p",0,20,12,0,0,0,0,0,0,0,0,0,0,0,0,0,0
    "hsa-miR-124-5p",0,20,18,0,0,0,0,0,0,0,0,0,0,0,0,0,0
    "hsa-miR-216b-5p",0,20,4,0,0,0,0,0,0,0,0,0,0,0,0,0,0
    "hsa-miR-6720-5p",0,19,10,0,0,0,0,0,0,0,0,0,12,0,0,0,0
    "hsa-miR-19b-1-5p",0,19,12,0,0,0,9,0,0,0,0,0,0,0,0,19,0
    "hsa-miR-590-5p",0,19,6,0,0,0,0,0,0,0,0,0,0,0,0,0,0
    "hsa-miR-589-3p",0,19,5,0,0,0,0,0,0,0,0,0,0,0,0,0,0
    "hsa-miR-4742-3p",0,19,5,0,0,0,9,0,0,0,0,0,0,0,0,8,0
    "hsa-miR-548n",0,18,10,0,0,0,0,0,0,0,0,0,0,0,0,0,2
    "hsa-miR-6770-5p",0,18,15,0,0,0,29,0,0,0,0,0,18,0,6,0,0
    "hsa-miR-5696",0,18,5,0,0,0,0,0,0,0,0,0,0,0,0,0,0
    "hsa-miR-539-3p",0,18,5,0,0,0,0,0,0,0,0,0,0,0,0,0,0
    "hsa-miR-219b-3p",0,17,7,0,0,0,19,0,0,0,0,0,0,0,0,0,0
    "hsa-miR-3149",0,17,5,0,0,0,0,0,0,0,0,0,0,0,0,0,0
    "hsa-miR-196b-3p",0,17,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0
    "hsa-miR-548j-5p",0,17,4,0,0,0,0,0,0,0,0,0,0,0,0,0,0
    "hsa-miR-548ba",0,17,7,0,0,0,0,0,0,0,0,0,0,0,0,0,0
    "hsa-miR-103a-2-5p",0,16,19,0,0,0,0,0,0,0,0,6,0,0,0,0,0
    "hsa-miR-455-5p",0,16,5,0,0,0,0,0,0,0,0,0,0,0,0,16,0
    "hsa-miR-642a-3p",0,16,4,0,0,0,0,0,0,0,0,0,16,11,16,0,1
    "hsa-miR-664b-5p",0,16,3,0,0,0,0,0,0,0,0,0,12,0,0,1,0
    "hsa-miR-4999-5p",0,16,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0
    "hsa-miR-455-3p",0,15,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0
    "hsa-miR-4640-3p",0,15,3,4,0,0,0,0,0,0,0,0,0,0,0,0,0
    "hsa-miR-4726-5p",0,15,4,0,0,0,2,0,0,0,0,0,0,0,0,0,0
    "hsa-miR-3074-3p",0,15,3,0,0,0,0,0,0,0,0,0,8,0,0,0,0
    "hsa-miR-3662",0,15,14,0,0,0,0,0,0,0,0,0,0,0,0,0,0
    "hsa-miR-95-5p",0,14,7,0,0,0,0,0,0,0,0,14,0,0,36,0,0
    "hsa-miR-3684",0,14,9,0,0,0,0,0,0,0,0,0,0,0,0,0,0
    "hsa-miR-6837-3p",0,14,13,0,0,15,0,0,0,0,0,0,9,0,34,0,0
    "hsa-miR-5695",0,14,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0
    "hsa-miR-675-3p",0,14,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0
    "hsa-miR-3150a-5p",0,14,0,0,0,0,8,0,0,0,0,0,0,0,0,0,0
    "hsa-miR-3150a-3p",0,14,2,0,0,0,0,0,0,0,0,0,8,0,0,18,0
    "hsa-miR-3177-5p",0,13,6,0,0,0,0,0,0,0,0,0,0,0,0,0,0
    "hsa-miR-3143",0,13,8,0,0,0,0,0,0,0,0,0,0,0,0,0,1
    "hsa-miR-193a-3p",0,13,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0
    "hsa-miR-548at-5p",0,13,7,0,15,0,0,0,0,0,0,0,0,0,0,0,0
    "hsa-miR-545-5p",0,13,7,0,0,0,0,0,0,0,0,0,0,0,0,0,0
    "hsa-miR-5688",0,13,4,0,0,0,0,0,0,0,0,0,0,0,1,0,0
    "hsa-miR-450a-5p",0,13,5,0,0,0,0,0,0,0,0,0,0,0,0,0,0
    "hsa-miR-6802-3p",0,13,4,0,0,0,10,0,0,0,0,0,0,0,0,0,0
    "hsa-miR-3939",0,12,8,0,15,0,0,0,0,0,0,0,13,0,0,0,0
    "hsa-miR-16-1-3p",0,12,5,0,0,0,0,0,0,0,0,0,0,0,0,0,0
    "hsa-miR-216a-3p",0,12,4,0,0,0,0,0,0,0,0,0,0,0,16,0,0
    "hsa-miR-6833-3p",0,12,3,0,0,0,6,0,0,0,0,0,10,0,0,10,0
    "hsa-miR-6516-5p",0,12,2,0,16,0,0,0,0,0,0,0,0,0,0,0,0
    "hsa-miR-6750-3p",0,12,3,0,0,0,15,0,0,0,0,0,0,0,0,0,0
    "hsa-miR-4667-3p",0,11,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
    "hsa-miR-153-5p",0,11,3,0,0,0,0,0,0,0,0,0,0,0,0,0,0
    "hsa-miR-7112-3p",0,11,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0
    "hsa-miR-26a-1-3p",0,11,4,0,0,0,1,0,0,0,0,0,0,0,0,0,0
    "hsa-miR-181b-3p",0,11,4,0,0,0,0,0,0,0,0,0,0,0,0,0,0
    "hsa-miR-6508-3p",0,11,1,0,0,0,0,0,0,0,0,0,0,0,0,3,0
    "hsa-miR-627-3p",0,11,4,0,0,0,0,0,0,0,0,0,0,0,12,5,0
    "hsa-miR-5583-3p",0,11,3,0,0,0,0,0,0,0,0,0,0,0,0,0,0
    "hsa-miR-6513-5p",0,10,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0
    "hsa-miR-34a-3p",0,10,6,0,0,0,0,0,0,0,0,0,22,0,0,0,0
    "hsa-miR-6516-3p",0,10,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0
    "hsa-miR-7156-5p",0,10,9,0,0,0,0,0,0,0,0,0,0,0,0,0,0
    "hsa-miR-597-3p",0,10,7,0,0,0,0,0,0,0,0,0,0,0,17,0,0
    "hsa-miR-7114-3p",0,10,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0
    "hsa-miR-488-3p",0,10,9,0,0,0,0,0,0,0,0,0,0,0,0,0,0
    "hsa-miR-5690",0,10,3,0,0,0,0,0,0,0,0,0,0,0,0,0,0
    "hsa-miR-3685",0,9,4,0,0,0,0,0,0,0,0,0,0,0,14,0,0
    "hsa-miR-6823-5p",0,9,3,0,0,0,13,0,0,0,0,0,7,0,0,9,0
    "hsa-miR-4504",0,9,3,0,0,0,0,0,0,0,0,0,0,0,0,10,0
    "hsa-miR-6510-3p",0,9,4,0,0,0,0,0,0,0,0,0,8,0,0,11,0
    "hsa-miR-142-5p",0,9,1,23,37,10,0,0,0,0,0,19,20,0,44,18,0
    "hsa-miR-4766-3p",0,9,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0
    "hsa-miR-1284",0,9,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0
    "hsa-miR-18b-3p",0,9,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0
    "hsa-miR-4461",0,9,5,0,0,0,9,0,0,0,0,18,0,0,0,1,0
    "hsa-miR-3617-5p",0,9,5,0,0,0,0,0,0,0,0,0,0,0,0,0,0
    "hsa-miR-6737-3p",0,9,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0
    "hsa-miR-211-5p",0,9,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
    "hsa-miR-548al",0,8,7,0,0,0,0,0,0,0,0,0,0,0,0,0,0
    "hsa-miR-23b-5p",0,8,6,0,0,0,0,0,0,0,0,0,11,0,0,28,0
    "hsa-miR-5680",0,8,6,0,0,0,0,0,0,0,0,0,6,0,17,1,0
    "hsa-miR-99b-5p",0,8,1,0,0,0,0,0,0,0,0,0,0,0,23,11,0
    "hsa-miR-147b",0,8,1,0,1,0,0,0,0,0,0,0,0,0,0,0,0
    "hsa-miR-6864-5p",0,8,5,0,0,0,0,0,0,0,0,0,0,0,0,0,0
    "hsa-miR-643",0,8,5,0,0,0,0,0,0,0,0,0,0,0,0,0,0
    "hsa-miR-7156-3p",0,8,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
    "hsa-miR-874-5p",0,8,0,0,0,0,0,0,0,0,0,0,3,0,0,0,0
    "hsa-miR-548a-3p",0,8,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0
    "hsa-miR-6513-3p",0,8,2,0,0,0,5,0,0,0,0,0,0,0,0,0,0
    "hsa-miR-548ah-3p|hsa-miR-548av-3p",0,8,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
    "hsa-mir-30c-2",0,8,2,0,14,0,0,0,0,0,0,0,0,0,0,0,0
    "hsa-mir-103a-1",0,8,0,0,0,0,0,0,0,0,0,6,0,0,0,0,0
    "hsa-miR-6762-5p",0,7,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
    "hsa-miR-3128",0,7,4,0,0,0,0,0,0,0,0,0,15,0,0,0,0
    "hsa-miR-363-5p",0,7,8,0,0,0,0,0,0,0,0,0,0,0,0,0,0
    "hsa-miR-28-5p",0,7,3,0,0,11,13,0,0,0,0,0,0,0,0,0,0
    "hsa-miR-6784-3p",0,7,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0
    "hsa-miR-199b-5p",0,7,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0
    "hsa-miR-3688-3p",0,7,8,0,0,0,0,0,0,0,0,0,0,0,0,0,0
    "hsa-miR-5003-3p",0,7,2,0,0,0,13,0,0,0,0,9,14,0,20,12,0
    "hsa-miR-6797-3p",0,7,3,0,0,0,0,0,0,0,0,0,0,0,0,0,0
    "hsa-miR-542-5p",0,7,5,0,0,0,0,0,0,0,0,0,0,0,0,0,0
    "hsa-miR-4749-3p",0,7,6,0,0,0,0,0,0,0,0,0,0,0,0,0,0
    "hsa-miR-34b-5p",0,7,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
    "hsa-miR-1249-3p",0,7,3,0,0,0,0,0,0,0,0,0,0,0,0,0,0
    "hsa-miR-580-3p",0,7,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0
    "hsa-miR-5004-5p",0,7,1,0,0,0,25,0,0,0,0,0,0,0,0,0,0
    "hsa-miR-5094",0,7,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0
    "hsa-miR-4671-3p",0,7,5,0,0,0,0,0,0,0,0,0,0,0,0,0,0
    "hsa-let-7i-3p",0,7,9,0,0,0,0,0,0,0,0,0,0,0,0,0,0
    "hsa-miR-129-2-3p",0,6,6,0,0,0,0,0,0,0,0,0,0,2,0,0,0
    "hsa-let-7a-1",0,6,1,0,0,0,0,0,0,0,0,0,3,0,0,0,0
    "hsa-miR-6826-5p",0,6,3,0,0,0,0,0,0,0,0,0,0,2,0,0,0
    "hsa-miR-3191-3p",0,6,0,8,0,0,0,0,0,0,0,0,0,0,0,0,0
    "hsa-miR-6728-3p",0,6,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0
    "hsa-miR-6862-3p",0,6,1,0,0,0,0,0,0,0,0,0,11,0,0,0,0
    "hsa-miR-450a-1-3p",0,6,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
    "hsa-miR-548az-5p",0,6,12,0,0,0,0,0,0,0,0,0,10,2,0,0,0
    "hsa-miR-3619-5p",0,6,4,0,0,4,0,0,0,0,0,0,0,0,0,0,0
    "hsa-miR-6804-3p",0,6,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
    "hsa-miR-4289",0,6,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0
    "hsa-miR-5004-3p",0,6,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0
    "hsa-miR-570-3p",0,6,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0
    "hsa-miR-502-5p",0,6,3,0,0,0,0,0,0,0,0,0,0,0,13,1,0
    "hsa-miR-3177-3p",0,6,3,14,0,0,15,0,0,0,0,0,30,0,8,4,0
    "hsa-miR-3925-5p",0,6,1,0,0,0,18,0,0,0,0,0,27,3,0,28,0
    "hsa-miR-4760-5p",0,6,5,0,0,0,0,0,0,0,0,0,0,0,0,0,0
    "hsa-miR-4728-3p",0,6,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
    "hsa-miR-6804-5p",0,6,2,0,0,15,20,0,0,0,0,0,0,0,0,13,0
    "hsa-miR-6855-3p",0,6,1,0,0,7,0,0,0,0,0,0,0,0,0,0,0
    "hsa-miR-5187-3p",0,6,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
    "hsa-miR-3157-5p",0,6,6,0,0,0,0,0,0,0,0,0,0,0,0,0,0
    "hsa-miR-6868-3p",0,6,11,0,0,0,13,0,0,0,0,0,0,0,0,0,0
    "hsa-miR-4474-3p",0,6,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0
    "hsa-miR-6848-5p",0,6,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
    "hsa-miR-100-5p",0,5,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
    "hsa-miR-199a-5p",0,5,2,0,0,0,0,1,0,0,0,0,0,0,0,0,0
    "hsa-miR-6813-5p",0,5,4,0,0,0,8,0,0,0,0,0,0,0,0,0,0
    "hsa-miR-4751",0,5,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
    "hsa-miR-6769a-5p",0,5,0,0,6,0,8,0,0,0,0,0,7,0,0,0,0
    "hsa-miR-1278",0,5,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
    "hsa-miR-4641",0,5,3,0,0,0,0,0,0,0,0,0,0,0,0,0,0
    "hsa-miR-6732-3p",0,5,3,0,0,0,0,0,0,0,0,0,0,3,0,0,0
    "hsa-miR-767-5p",0,5,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0
    "hsa-miR-6819-3p",0,5,5,0,0,0,0,0,0,0,0,0,0,0,0,0,0
    "hsa-miR-6750-5p",0,5,1,0,0,0,0,0,0,0,0,0,15,0,0,8,0
    "hsa-miR-6754-3p",0,5,3,0,0,0,0,0,0,0,0,0,0,0,0,0,0
    "hsa-miR-2115-3p",0,5,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
    "hsa-miR-4747-3p",0,5,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0
    "hsa-miR-124-3p",0,5,0,0,0,0,0,0,0,0,0,0,12,0,0,0,0
    "hsa-miR-6754-5p",0,5,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
    "hsa-miR-3163",0,5,2,0,0,0,5,0,0,0,0,0,0,0,0,0,0
    "hsa-miR-4754",0,5,1,0,0,0,7,0,0,0,0,0,0,0,0,0,0
    "hsa-miR-4762-3p",0,5,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0
    "hsa-miR-7-2-3p",0,5,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0
    "hsa-miR-6840-3p",0,5,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
    "hsa-miR-676-5p",0,5,4,0,0,0,11,0,0,0,0,0,10,0,0,15,0
    "hsa-miR-4666a-5p",0,5,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
    "hsa-miR-3942-3p",0,5,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0
    "hsa-miR-377-3p",0,5,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0
    "hsa-miR-186-3p",0,5,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
    "hsa-miR-3116",0,5,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0
    "hsa-miR-579-3p",0,4,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0
    "hsa-miR-125a-5p",0,4,6,0,0,0,11,0,0,0,0,0,0,0,0,5,2
    "hsa-miR-3064-3p",0,4,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0
    "hsa-miR-3160-3p",0,4,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
    "hsa-miR-6735-3p",0,4,6,0,0,0,0,0,0,0,0,0,7,0,0,10,0
    "hsa-miR-6879-3p",0,4,5,0,0,13,0,0,0,0,0,0,0,0,0,0,0
    "hsa-miR-6796-3p",0,4,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
    "hsa-miR-612",0,4,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
    "hsa-miR-548ad-5p|hsa-miR-548ae-5p",0,4,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0
    "hsa-miR-6793-3p",0,4,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0
    "hsa-miR-6743-3p",0,4,0,0,0,0,3,0,0,0,0,0,0,0,0,0,0
    "hsa-miR-6739-3p",0,4,9,0,0,0,0,0,0,0,0,0,0,0,0,0,0
    "hsa-miR-4671-5p",0,4,2,0,0,0,0,0,0,0,0,0,18,0,0,0,0
    "hsa-miR-496",0,4,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
    "hsa-miR-4457",0,4,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
    "hsa-miR-6852-3p",0,4,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
    "hsa-miR-6851-5p",0,4,0,0,0,0,0,0,0,0,0,0,15,0,0,0,0
    "hsa-miR-548t-5p",0,4,0,0,0,0,0,0,0,0,0,0,0,0,6,0,0
    "hsa-miR-6767-3p",0,4,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
    "hsa-miR-218-1-3p",0,4,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
    "hsa-miR-3616-5p",0,4,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
    "hsa-miR-4640-5p",0,4,1,0,0,0,0,0,0,0,0,0,8,0,0,0,0
    "hsa-miR-6755-3p",0,4,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0
    "hsa-miR-6818-5p",0,4,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
    "hsa-miR-4656",0,4,1,0,0,0,0,0,0,0,0,0,2,0,0,0,0
    "hsa-miR-6818-3p",0,4,3,0,0,0,0,0,0,0,0,0,0,0,0,0,0
    "hsa-miR-548ap-5p",0,4,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
    "hsa-miR-3125",0,4,0,0,17,0,0,0,0,0,0,0,0,0,14,0,0
    "hsa-miR-548ab",0,4,4,0,0,0,0,0,0,0,0,0,0,0,0,0,2
    "hsa-miR-4685-3p",0,4,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
    "hsa-miR-188-3p",0,4,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0
    "hsa-miR-6841-5p",0,4,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
    "hsa-miR-6859-3p",0,3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
    "hsa-miR-449b-3p",0,3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
    "hsa-miR-4730",0,3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
    "hsa-miR-664b-3p",0,3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
    "hsa-miR-4762-5p",0,3,0,0,0,0,0,0,0,0,0,0,0,0,14,0,0
    "hsa-miR-1304-5p",0,3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
    "hsa-miR-4803",0,3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
    "hsa-miR-6885-5p",0,3,3,11,0,0,0,0,0,0,0,0,0,0,0,0,0
    "hsa-miR-4473",0,3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
    "hsa-miR-548am-3p",0,3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
    "hsa-miR-6730-3p",0,3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
    "hsa-miR-6742-3p",0,3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
    "hsa-miR-7110-3p",0,3,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0
    "hsa-miR-29a-5p",0,3,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0
    "hsa-miR-6827-3p",0,3,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0
    "hsa-miR-5091",0,3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
    "hsa-miR-6844",0,3,3,0,0,0,0,0,0,0,0,0,0,0,0,0,0
    "hsa-miR-154-5p",0,3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
    "hsa-miR-7845-5p",0,3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
    "hsa-miR-4683",0,3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
    "hsa-miR-4528",0,3,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0
    "hsa-miR-219b-5p",0,3,0,0,0,0,0,0,0,0,0,0,14,0,0,22,0
    "hsa-miR-7106-3p",0,3,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0
    "hsa-miR-1285-3p",0,3,1,0,0,0,0,0,0,0,0,0,1,0,0,1,0
    "hsa-miR-6880-5p",0,3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
    "hsa-miR-6885-3p",0,3,2,0,0,0,0,0,0,0,0,0,0,0,14,0,0
    "hsa-miR-4654",0,3,3,0,0,6,9,0,0,0,0,0,0,0,0,0,0
    "hsa-miR-1252-5p",0,3,2,8,0,14,0,0,0,0,0,0,0,0,0,0,0
    "hsa-miR-541-3p",0,3,0,0,0,0,0,0,0,0,0,0,0,0,0,5,0
    "hsa-miR-597-5p",0,3,8,0,0,0,0,0,0,0,0,0,0,0,0,0,0
    "hsa-miR-4794",0,3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
    "hsa-miR-2467-3p",0,3,0,0,0,0,19,0,0,0,0,0,0,0,0,0,0
    "hsa-miR-933",0,3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
    "hsa-miR-3193",0,3,6,0,0,0,0,0,0,0,0,0,0,0,0,0,0
    "hsa-miR-4632-3p",0,3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
    "hsa-miR-3121-3p",0,3,2,0,0,0,0,0,0,0,0,13,0,0,16,0,0
    "hsa-miR-190a-3p",0,3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
    "hsa-miR-6733-5p",0,3,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0
    "hsa-miR-3164",0,3,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0
    "hsa-miR-6746-3p",0,3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
    "hsa-miR-3165",0,3,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0
    "hsa-miR-573",0,3,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0
    "hsa-miR-6783-5p",0,3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
    "hsa-miR-4716-3p",0,3,4,4,14,0,15,0,0,0,0,0,13,0,0,11,0
    "hsa-miR-1302",0,3,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0
    "hsa-miR-624-3p",0,3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
    "hsa-miR-5002-3p",0,3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
    "hsa-miR-6889-5p",0,3,2,11,11,0,0,0,0,0,0,0,9,0,0,24,0
    "hsa-miR-6866-3p",0,3,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0
    "hsa-miR-545-3p",0,3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
    "hsa-miR-3675-3p",0,3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
    "hsa-miR-6854-5p",0,3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
    "hsa-miR-1243",0,3,5,0,0,0,0,0,0,0,0,0,0,0,0,0,0
    "hsa-miR-6796-5p",0,3,1,0,0,16,0,0,0,0,0,0,0,0,0,12,0
    "hsa-let-7b",0,3,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0
    "hsa-miR-192-3p",0,2,4,0,0,0,0,0,0,0,0,0,0,0,0,0,0
    "hsa-miR-509-3p",0,2,0,0,0,0,5,0,0,0,0,0,0,0,0,0,0
    "hsa-miR-379-5p",0,2,1,31,17,11,27,0,0,0,0,0,23,0,41,64,0
    "hsa-mir-106a",0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
    "hsa-miR-1285-5p",0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
    "hsa-miR-499a-5p",0,2,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0
    "hsa-miR-615-5p",0,2,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0
    "hsa-miR-5587-5p",0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
    "hsa-miR-6871-3p",0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
    "hsa-miR-4690-3p",0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
    "hsa-miR-4713-5p",0,2,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0
    "hsa-miR-6851-3p",0,2,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0
    "hsa-miR-3923",0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
    "hsa-miR-4638-3p",0,2,0,0,0,0,0,0,0,0,0,0,22,0,0,0,0
    "hsa-miR-548o-3p",0,2,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0
    "hsa-miR-4750-3p",0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
    "hsa-miR-1911-5p",0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
    "hsa-miR-6773-3p",0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
    "hsa-miR-6814-3p",0,2,1,0,0,0,5,0,0,0,0,15,0,0,0,0,0
    "hsa-miR-624-5p",0,2,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0
    "hsa-miR-136-3p",0,2,0,0,0,0,13,0,0,0,0,0,0,0,23,0,4
    "hsa-miR-219a-2-3p",0,2,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0
    "hsa-miR-6829-5p",0,2,0,0,0,0,0,0,0,0,0,5,0,0,0,0,0
    "hsa-miR-6744-5p",0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
    "hsa-miR-6895-5p",0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
    "hsa-miR-4639-5p",0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
    "hsa-miR-556-3p",0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
    "hsa-miR-5090",0,2,0,7,0,0,0,0,0,0,0,0,8,0,0,0,0
    "hsa-miR-4712-3p",0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
    "hsa-miR-4753-3p",0,2,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0
    "hsa-miR-6780b-3p",0,2,0,0,0,0,0,0,0,0,0,0,8,0,0,0,0
    "hsa-miR-3664-5p",0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
    "hsa-miR-7151-5p",0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
    "hsa-miR-4723-3p",0,2,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0
    "hsa-miR-1292-3p",0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
    "hsa-miR-548ag",0,2,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0
    "hsa-miR-4684-3p",0,2,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0
    "hsa-miR-7152-3p",0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
    "hsa-miR-4677-5p",0,2,3,0,0,0,0,0,0,0,0,0,0,0,0,0,0
    "hsa-miR-6797-5p",0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
    "hsa-miR-548ak",0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
    "hsa-miR-6792-5p",0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
    "hsa-miR-4470",0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
    "hsa-miR-6874-3p",0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
    "hsa-miR-581",0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
    "hsa-miR-6884-3p",0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
    "hsa-miR-4642",0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
    "hsa-miR-7109-5p",0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
    "hsa-miR-6791-3p",0,2,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0
    "hsa-miR-6874-5p",0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
    "hsa-miR-6888-3p",0,2,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0
    "hsa-miR-4687-5p",0,2,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0
    "hsa-mir-642b",0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
    "hsa-miR-548aj-5p|hsa-miR-548g-5p|hsa-miR-548x-5p",0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
    "hsa-miR-651-5p",0,2,0,0,0,0,0,4,0,0,0,0,0,0,0,10,0
    "hsa-miR-3610",0,2,0,0,0,0,0,0,0,0,0,0,4,0,0,0,0
    "hsa-miR-6752-3p",0,2,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0
    "hsa-miR-6734-3p",0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
    "hsa-miR-548w",0,2,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0
    "hsa-miR-586",0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
    "hsa-miR-3680-5p",0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
    "hsa-miR-4725-3p",0,2,0,0,0,0,0,0,0,0,0,5,0,0,0,0,0
    "hsa-miR-548au-5p",0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
    "hsa-miR-3145-5p",0,2,0,0,0,0,0,0,0,0,0,0,7,0,0,0,0
    "hsa-miR-409-5p",0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
    "hsa-miR-4787-3p",0,2,2,0,0,0,0,0,0,0,0,0,14,0,0,0,0
    "hsa-miR-3692-5p",0,2,3,0,0,0,0,0,0,0,0,0,0,0,0,0,0
    "hsa-miR-5682",0,2,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0
    "hsa-miR-7112-5p",0,2,2,0,0,0,0,0,0,0,0,0,0,0,20,0,0
    "hsa-miR-4501",0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
    "hsa-miR-548ai|hsa-miR-570-5p",0,2,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0
    "hsa-miR-6830-5p",0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
    "hsa-miR-449b-5p",0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
    "hsa-miR-7151-3p",0,2,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0
    "hsa-miR-488-5p",0,2,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0
    "hsa-miR-5000-3p",0,2,3,0,0,0,0,0,0,0,0,0,0,0,0,0,0
    "hsa-miR-515-5p",0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
    "hsa-miR-1304-3p",0,2,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0
    "hsa-miR-3917",0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
    "hsa-miR-4691-3p",0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
    "hsa-miR-551a",0,2,3,0,0,0,1,0,0,0,0,0,11,0,16,0,0
    "hsa-miR-3616-3p",0,2,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0
    "hsa-miR-4709-3p",0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
    "hsa-miR-6816-5p",0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
    "hsa-miR-548av-3p",0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
    "hsa-miR-4791",0,1,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0
    "hsa-miR-4714-3p",0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
    "hsa-miR-1914-5p",0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
    "hsa-miR-6890-5p",0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,3,0
    "hsa-miR-6878-5p",0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
    "hsa-miR-548as-5p",0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
    "hsa-miR-6795-5p",0,1,0,0,6,8,0,0,0,0,0,0,0,0,0,0,0
    "hsa-miR-4800-3p",0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
    "hsa-miR-6798-5p",0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
    "hsa-miR-216a-5p",0,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0
    "hsa-miR-5006-3p",0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
    "hsa-miR-1298-5p",0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
    "hsa-miR-4695-3p",0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
    "hsa-miR-6768-5p",0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
    "hsa-miR-3134",0,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0
    "hsa-miR-4522",0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
    "hsa-miR-578",0,1,3,0,0,0,0,0,0,0,0,0,0,0,0,0,0
    "hsa-miR-4743-3p",0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
    "hsa-miR-6831-5p",0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
    "hsa-miR-585-5p",0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
    "hsa-miR-1262",0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
    "hsa-miR-500b-3p",0,1,1,0,13,0,0,0,0,0,0,0,0,0,0,0,0
    "hsa-miR-491-3p",0,1,1,0,0,0,0,0,0,0,0,0,0,0,21,0,0
    "hsa-miR-6780a-3p",0,1,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0
    "hsa-miR-4655-5p",0,1,0,0,0,0,12,0,0,0,0,0,0,0,0,0,0
    "hsa-miR-7114-5p",0,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0
    "hsa-miR-1323",0,1,0,0,0,0,12,0,0,0,0,0,0,0,0,0,0
    "hsa-miR-6836-5p",0,1,0,0,0,0,0,0,0,0,0,0,0,0,14,0,0
    "hsa-miR-6776-3p",0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
    "hsa-miR-5095",0,1,0,0,0,0,9,0,0,0,0,0,0,0,0,0,0
    "hsa-miR-5692a",0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
    "hsa-miR-548ah-3p|hsa-miR-548p",0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
    "hsa-miR-5196-3p",0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
    "hsa-miR-6760-5p",0,1,0,0,0,0,0,0,0,0,0,18,23,0,16,0,0
    "hsa-miR-6842-5p",0,1,0,0,0,0,0,0,0,0,0,11,0,0,0,0,0
    "hsa-miR-6739-5p",0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
    "hsa-miR-6505-5p",0,1,4,0,0,0,0,0,0,0,0,0,0,0,0,0,0
    "hsa-miR-187-3p",0,1,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0
    "hsa-miR-31-3p",0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
    "hsa-miR-670-3p",0,1,4,0,0,0,0,0,0,0,0,0,0,0,0,0,0
    "hsa-miR-600",0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
    "hsa-miR-6733-3p",0,1,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0
    "hsa-miR-4740-5p",0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
    "hsa-miR-6854-3p",0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
    "hsa-miR-6781-3p",0,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0
    "hsa-miR-6741-5p",0,1,2,0,14,0,7,0,0,0,0,0,0,0,15,0,0
    "hsa-miR-5582-3p",0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
    "hsa-miR-548au-3p",0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
    "hsa-miR-4734",0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,7,0
    "hsa-miR-5088-3p",0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
    "hsa-miR-96-3p",0,1,3,0,0,0,0,0,0,0,0,18,0,0,0,0,0
    "hsa-miR-3529-5p",0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
    "hsa-miR-516a-5p",0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
    "hsa-miR-1-5p",0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
    "hsa-miR-6865-3p",0,1,0,0,0,0,0,0,0,0,0,0,0,0,7,5,0
    "hsa-miR-1277-3p",0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
    "hsa-miR-1238-3p",0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
    "hsa-miR-3135a",0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
    "hsa-miR-548ad-5p|hsa-miR-548ae-5p|hsa-miR-548ay-5p|hsa-miR-548d-5p",0,1,3,0,0,0,0,0,0,0,0,0,0,0,0,0,0
    "hsa-mir-181b-2",0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
    "hsa-miR-4639-3p",0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
    "hsa-miR-6779-5p",0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
    "hsa-miR-106a-3p",0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
    "hsa-miR-4755-3p",0,1,0,0,0,0,0,0,0,0,0,0,9,2,0,0,0
    "hsa-mir-92a-1",0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
    "hsa-miR-4459",0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
    "hsa-miR-6729-3p",0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
    "hsa-miR-588",0,1,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0
    "hsa-miR-1231",0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
    "hsa-miR-668-3p",0,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0
    "hsa-miR-6835-5p",0,1,3,0,12,0,11,0,0,0,0,0,0,0,0,0,0
    "hsa-miR-4786-3p",0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
    "hsa-miR-548d-3p",0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
    "hsa-miR-6125",0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0
    "hsa-miR-6735-5p",0,1,4,0,0,0,0,0,0,0,0,30,14,0,0,0,0
    "hsa-miR-4446-3p",0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
    "hsa-miR-4714-5p",0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
    "hsa-miR-934",0,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0
    "hsa-miR-3153",0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
    "hsa-miR-369-3p",0,1,0,0,10,0,0,0,0,0,0,0,0,0,0,0,0
    "hsa-miR-154-3p",0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
    "hsa-miR-2113",0,1,1,0,0,2,0,0,0,0,0,0,0,0,0,0,0
    "hsa-miR-493-3p",0,1,0,0,0,0,0,0,0,0,0,0,0,0,21,0,0
    "hsa-miR-4723-5p",0,1,0,7,0,0,0,0,0,0,0,0,0,0,0,0,0
    "hsa-miR-6749-3p",0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
    "hsa-miR-518a-3p",0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
    "hsa-mir-3142",0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
    "hsa-miR-548ay-3p",0,1,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0
    "hsa-miR-3142",0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
    "hsa-miR-3942-5p",0,1,0,0,0,0,12,0,0,0,0,0,0,0,0,0,0
    "hsa-miR-6769b-3p",0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
    "hsa-miR-6742-5p",0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
    "hsa-miR-1537-5p",0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
    "hsa-miR-338-5p",0,1,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0
    "hsa-miR-6505-3p",0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
    "hsa-miR-4533",0,1,0,0,0,0,9,0,0,0,0,0,0,0,0,0,0
    "hsa-miR-508-3p",0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
    "hsa-miR-7152-5p",0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
    "hsa-miR-4664-5p",0,1,2,0,0,0,0,1,0,0,0,0,0,0,0,0,0
    "hsa-miR-3659",0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
    "hsa-miR-4701-5p",0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
    "hsa-miR-3908",0,1,3,0,0,0,0,0,0,0,0,0,0,0,0,0,0
    "hsa-miR-6857-5p",0,1,0,0,0,6,0,0,0,0,0,0,0,0,0,0,0
    "hsa-miR-6861-3p",0,1,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0
    "hsa-miR-3183",0,1,1,0,0,0,0,0,0,0,0,0,9,0,0,12,0
    "hsa-miR-6893-3p",0,1,2,0,0,0,2,0,0,0,0,0,0,0,0,0,0
    "hsa-miR-6845-3p",0,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0
    "hsa-miR-1291",0,1,0,0,0,0,4,0,0,0,0,0,0,0,0,0,0
    "hsa-miR-5003-5p",0,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0
    "hsa-miR-1248",0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
    "hsa-miR-1297",0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
    "hsa-mir-30c-1",0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
    "hsa-mir-7-2",0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
    "hsa-mir-92a-2",0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
    "hsa-miR-29c-3p",0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
    "hsa-mir-500b",0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
    "hsa-let-7a-2",0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
    "hsa-let-7f-1",0,0,4,0,0,0,0,0,0,0,0,0,0,0,0,0,0
    "hsa-miR-6824-3p",0,0,3,0,0,0,0,0,0,0,0,0,0,0,18,0,0
    "hsa-miR-450a-2-3p",0,0,3,0,0,0,0,0,0,0,0,0,0,0,0,0,0
    "hsa-miR-6748-5p",0,0,3,0,0,0,0,0,0,0,0,0,11,0,0,9,0
    "hsa-miR-548p",0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0
    "hsa-miR-6749-5p",0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0
    "hsa-miR-3613-5p",0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0
    "hsa-miR-556-5p",0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0
    "hsa-miR-383-5p",0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0
    "hsa-miR-6823-3p",0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0
    "hsa-miR-6781-5p",0,0,2,0,0,0,9,0,0,0,0,0,0,0,0,0,0
    "hsa-miR-15a-3p",0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0
    "hsa-miR-3151-5p",0,0,2,3,0,0,0,0,0,0,0,0,0,0,0,8,0
    "hsa-miR-328-5p",0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0
    "hsa-miR-6832-3p",0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0
    "hsa-miR-4764-5p",0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0
    "hsa-miR-7155-5p",0,0,2,0,0,0,0,1,0,0,0,0,0,0,0,0,0
    "hsa-miR-7108-3p",0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0
    "hsa-miR-4465",0,0,1,0,0,0,0,0,0,0,0,0,0,0,10,0,0
    "hsa-miR-518f-5p",0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0
    "hsa-miR-2114-3p",0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0
    "hsa-miR-4644",0,0,1,0,3,0,0,0,0,0,0,0,10,0,0,13,0
    "hsa-miR-3192-3p",0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0
    "hsa-miR-3199",0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0
    "hsa-miR-539-5p",0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0
    "hsa-miR-4254",0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0
    "hsa-miR-6757-5p",0,0,1,0,0,11,0,0,0,0,0,0,0,0,0,0,0
    "hsa-miR-6509-3p",0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0
    "hsa-miR-4523",0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0
    "hsa-miR-1185-5p",0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0
    "hsa-miR-526b-5p",0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0
    "hsa-miR-2116-5p",0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0
    "hsa-miR-4784",0,0,1,0,0,0,3,0,0,0,0,0,7,0,24,8,0
    "hsa-miR-6782-5p",0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0
    "hsa-miR-548a-5p",0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0
    "hsa-miR-559",0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0
    "hsa-miR-4688",0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0
    "hsa-miR-4453",0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0
    "hsa-miR-1909-3p",0,0,1,0,0,0,0,0,0,0,0,0,0,0,14,0,0
    "hsa-miR-548f-3p",0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0
    "hsa-miR-4515",0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0
    "hsa-miR-376b-3p",0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0
    "hsa-miR-4798-3p",0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0
    "hsa-miR-944",0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0
    "hsa-miR-7113-5p",0,0,1,0,0,0,0,0,0,0,0,0,0,0,4,0,0
    "hsa-miR-7976",0,0,1,0,0,0,7,0,0,0,0,0,0,0,0,0,0
    "hsa-miR-2276-5p",0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0
    "hsa-miR-6849-3p",0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0
    "hsa-miR-6890-3p",0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0
    "hsa-miR-6869-5p",0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0
    "hsa-miR-3611",0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0
    "hsa-miR-4767",0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0
    "hsa-miR-6880-3p",0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0
    "hsa-miR-889-5p",0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0
    "hsa-miR-6848-3p",0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0
    "hsa-miR-642b-3p",0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0
    "hsa-miR-4781-5p",0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0
    "hsa-miR-5580-5p",0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0
    "hsa-miR-6727-3p",0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0
    "hsa-miR-4797-3p",0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0
    "hsa-miR-6887-5p",0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0
    "hsa-miR-492",0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0
    "hsa-miR-3663-5p",0,0,1,9,0,0,0,1,0,0,0,0,0,0,0,0,0
    "hsa-miR-512-3p",0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,18,3
    "hsa-miR-216b-3p",0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0
    "hsa-miR-548aw",0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,6,0
    "hsa-miR-548ah-3p|hsa-miR-548av-3p|hsa-miR-548p",0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
    "hsa-mir-10a",0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
    "hsa-miR-6816-3p",0,0,0,0,0,0,12,0,0,0,0,0,8,0,0,0,0
    "hsa-miR-6791-5p",0,0,0,0,0,0,11,0,0,0,0,0,0,0,0,0,0
    "hsa-miR-1247-3p",0,0,0,0,0,0,11,0,0,0,0,0,28,3,0,0,0
    "hsa-miR-6737-5p",0,0,0,0,0,0,6,0,0,0,0,0,0,0,0,0,0
    "hsa-miR-299-5p",0,0,0,0,0,0,6,0,0,0,0,0,0,0,0,0,0
    "hsa-miR-223-3p",0,0,0,0,0,0,3,0,0,0,0,0,0,0,17,9,0
    "hsa-mir-378d-2",0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
    "hsa-miR-6833-5p",0,0,0,0,0,0,0,1,0,0,0,0,17,0,0,0,0
    "hsa-miR-7108-5p",0,0,0,0,0,0,0,0,0,0,0,22,0,1,22,0,0
    "hsa-miR-6507-3p",0,0,0,0,0,0,0,0,0,0,0,17,0,0,0,0,0
    "hsa-miR-410-3p",0,0,0,0,0,0,0,0,0,0,0,12,0,0,0,0,0
    "hsa-miR-6891-3p",0,0,0,0,0,0,0,0,0,0,0,7,0,0,0,0,0
    "hsa-miR-3620-5p",0,0,0,0,0,0,0,0,0,0,0,7,0,0,0,0,0
    "hsa-miR-1827",0,0,0,0,0,0,0,0,0,0,0,1,2,0,0,1,0
    "hsa-miR-3175",0,0,0,0,1,0,0,0,0,0,0,0,31,0,14,8,0
    "hsa-miR-1288-3p",0,0,0,0,0,0,0,0,0,0,0,0,18,0,0,0,0
    "hsa-miR-3653-5p",0,0,0,0,0,0,0,0,0,0,0,0,17,0,20,1,0
    "hsa-miR-5194",0,0,0,0,0,0,0,0,0,0,0,0,15,1,0,0,0
    "hsa-miR-6881-3p",0,0,0,0,0,0,0,0,0,0,0,0,13,0,0,0,0
    "hsa-miR-299-3p",0,0,0,5,0,0,0,0,0,0,0,0,12,0,0,0,0
    "hsa-miR-6510-5p",0,0,0,0,0,0,0,0,0,0,0,0,11,0,13,0,0
    "hsa-miR-6509-5p",0,0,0,0,0,0,0,0,0,0,0,0,11,0,0,0,0
    "hsa-miR-1180-5p",0,0,0,0,0,0,0,0,0,0,0,0,10,0,0,0,0
    "hsa-miR-4428",0,0,0,0,0,0,0,0,0,0,0,0,10,0,0,13,0
    "hsa-miR-6861-5p",0,0,0,0,0,0,0,0,0,0,0,0,7,0,0,0,0
    "hsa-miR-6501-5p",0,0,0,0,0,0,0,0,0,0,0,0,4,0,0,0,0
    "hsa-miR-1233-5p",0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0,0
    "hsa-miR-4516",0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,1,0
    "hsa-miR-494-3p",0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0
    "hsa-mir-151b",0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
    "hsa-miR-4695-5p",0,0,0,0,0,0,0,0,0,0,0,0,0,4,0,0,0
    "hsa-miR-145-5p",0,0,0,0,0,0,0,0,0,0,0,0,0,0,24,0,0
    "hsa-miR-4482-3p",0,0,0,0,0,0,0,0,0,0,0,0,0,0,15,0,0
    "hsa-miR-5006-5p",0,0,0,0,0,0,0,0,0,0,0,0,0,0,14,0,0
    "hsa-miR-3126-5p",0,0,0,0,0,0,0,0,0,0,0,0,0,0,7,13,0
    "hsa-miR-602",0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,19,0
    "hsa-miR-127-5p",0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,14,0
    "hsa-miR-376c-3p",0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,12,0
    "hsa-miR-6821-5p",0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,12,0
    "hsa-miR-6761-5p",0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,10,0
    "hsa-miR-371a-3p",0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,9,0
    "hsa-miR-4690-5p",0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,5,0
    "hsa-miR-1910-3p",0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,5,0
    "hsa-miR-3180",0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,4,0
    "hsa-miR-6764-3p",0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,3,0
    "hsa-miR-1915-3p",0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0
    "hsa-miR-6870-5p",0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0
    "hsa-miR-6821-3p",0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,5
    "hsa-miR-6799-3p",0,0,0,8,13,0,0,0,0,0,0,0,0,0,0,0,0
    "hsa-miR-5703",0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0
    "hsa-miR-6785-3p",0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0

Analysis of the RNA binding protein (RBP) motifs for RNA-Seq and miRNAs (v3)

Leave a reply

There are several alternative R packages and tools to perform motif enrichment analysis for RNA-binding proteins (RBPs), beyond PWMEnrich::motifEnrichment(). Here are the most notable ones:

Tool / Package	Enrichment	Custom Motifs	CLI or R?	RNA-specific?	Notes
PWMEnrich	✅	✅	R	✅	Tried (see pipeline.v1-block3)
RBPmap	✅	❌ (uses own db)	Web/CLI	✅	Tried RBPmap, but it is too slow
Biostrings/TFBSTools	❌ (only scanning)	✅	R	❌	ATtRACT+Biostrings/TFBSTools (tried, pipeline.v1-block3)
rmap	✅ (CLIP-based)	❌	R	✅
Homer	✅	✅	CLI	⚠ RNA optional
MEME (AME, FIMO)	✅	✅	Web/CLI	⚠ Generic	Finally using ATtRACT+FIMO, AME has BUG, not runnable

#For me it was suggested to use “RBPmap” or “GraphProt” to do this analysis.

Get 3UTR.fasta, 5UTR.fasta, CDS.fasta and transcripts.fasta

         mRNA Transcript
 ┌────────────┬────────────┬────────────┐
 │   5′ UTR   │     CDS    │   3′ UTR   │
 └────────────┴────────────┴────────────┘
 ↑            ↑            ↑            ↑
 Start        Start        Stop         End
 of           Codon       Codon        of
 Transcript                             Transcript

 ✅ Option 1: Use GENCODE and python scripts (CHOSEN!)

 #Input: up- and down-, all-regulated files
 ~/DATA/Data_Ute/Data_RNA-Seq_MKL-1+WaGa/results_2025_1/degenes/MKL-1_wt.EV_vs_parental-up.txt    #20086
 ~/DATA/Data_Ute/Data_RNA-Seq_MKL-1+WaGa/results_2025_1/degenes/MKL-1_wt.EV_vs_parental-down.txt  #634
 ~/DATA/Data_Ute/Data_RNA-Seq_MKL-1+WaGa/results_2025_1/degenes/WaGa_wt.EV_vs_parental-up.txt     #23832
 ~/DATA/Data_Ute/Data_RNA-Seq_MKL-1+WaGa/results_2025_1/degenes/WaGa_wt.EV_vs_parental-down.txt   #375

 #Filtering the down-regulated genes to include only protein_coding genes before extracting 3' UTRs, because
 #1. Only protein_coding genes have well-annotated 3' UTRs
 #3' UTRs are defined as the region after the CDS (coding sequence) and before the poly-A tail.
 #Non-coding RNAs (e.g., lncRNA, snoRNA, miRNA precursors) do not have CDS, and therefore don't have canonical 3' UTRs.
 #2. In GENCODE, most UTR annotations are only provided for transcripts of gene_type = "protein_coding".

 cd ~/DATA/Data_Ute/RBPs_analysis/extract_3UTR_5UTR_CDS_transcript
 grep ",\"protein_coding\"," ~/DATA/Data_Ute/Data_RNA-Seq_MKL-1+WaGa/results_2025_1/degenes/MKL-1_wt.EV_vs_parental-up.txt > MKL-1_wt.EV_vs_parental-up_protein_coding.txt
 grep ",\"protein_coding\"," ~/DATA/Data_Ute/Data_RNA-Seq_MKL-1+WaGa/results_2025_1/degenes/MKL-1_wt.EV_vs_parental-down.txt > MKL-1_wt.EV_vs_parental-down_protein_coding.txt
 grep ",\"protein_coding\"," ~/DATA/Data_Ute/Data_RNA-Seq_MKL-1+WaGa/results_2025_1/degenes/WaGa_wt.EV_vs_parental-up.txt > WaGa_wt.EV_vs_parental-up_protein_coding.txt
 grep ",\"protein_coding\"," ~/DATA/Data_Ute/Data_RNA-Seq_MKL-1+WaGa/results_2025_1/degenes/WaGa_wt.EV_vs_parental-down.txt > WaGa_wt.EV_vs_parental-down_protein_coding.txt

 #Visit and Download: GENCODE FTP site https://www.gencodegenes.org/human/
     * GTF annotation file (e.g., gencode.v48.annotation.gtf.gz)
     * Corresponding genome FASTA (e.g., GRCh38.primary_assembly.genome.fa.gz)
 wget https://ftp.ebi.ac.uk/pub/databases/gencode/Gencode_human/release_48/gencode.v48.annotation.gtf.gz
 wget https://ftp.ebi.ac.uk/pub/databases/gencode/Gencode_human/release_48/GRCh38.primary_assembly.genome.fa.gz
 gunzip gencode.v48.annotation.gtf.gz
 gunzip GRCh38.primary_assembly.genome.fa.gz

 python extract_transcript_parts.py MKL-1_wt.EV_vs_parental-down_protein_coding.txt ~/REFs/gencode.v48.annotation.gtf ~/REFs/GRCh38.primary_assembly.genome.fa MKL-1_down
 python extract_transcript_parts.py MKL-1_wt.EV_vs_parental-up_protein_coding.txt ~/REFs/gencode.v48.annotation.gtf ~/REFs/GRCh38.primary_assembly.genome.fa MKL-1_up  #5988
 python extract_transcript_parts.py WaGa_wt.EV_vs_parental-down_protein_coding.txt ~/REFs/gencode.v48.annotation.gtf ~/REFs/GRCh38.primary_assembly.genome.fa WaGa_down  #93
 python extract_transcript_parts.py WaGa_wt.EV_vs_parental-up_protein_coding.txt ~/REFs/gencode.v48.annotation.gtf ~/REFs/GRCh38.primary_assembly.genome.fa WaGa_up  #6538

 ✅ Option 2-5 see at the end!

Why 3′ UTR?

 🧬 miRNA, RBP, or translation/post-transcriptional regulation
 ➡️ Use 3' UTR sequences

 Because:

 Most miRNA binding and many RBP motifs are located in the 3' UTR.

 It’s the primary region for mRNA stability, localization, and translation regulation.

 🧠 Example: You're looking for binding enrichment of miRNAs or RNA-binding proteins (PUM, HuR, etc.)
 ✅ Input = 3UTR.fasta

 🧪 If you're testing PBRs related to:
 - Translation initiation, upstream ORFs, or 5' cap interaction:
 ➡️ Use 5' UTR

 - Coding mutations, protein-level motifs, or translational efficiency:
 ➡️ Use CDS

 - General transcriptome-wide motif search (no preference):
 ➡️ Use transcripts, or test all regions separately to localize signal

Recommended Workflow with RBPmap https://rbpmap.technion.ac.il (Too slow!)

 RBPmap itself does not compute enrichment p-values or FDR; it's a motif scanning tool.

 To get statistically meaningful RBP enrichments, combine RBPmap with custom permutation testing or Fisher’s exact test + multiple testing correction.

     1. Prepare foreground (target) and background sequences

         Extract 3′ UTRs of:

         📉 Downregulated mRNAs (foreground) — likely targeted by upregulated miRNAs

         ⚪ A control set of 3′ UTRs — e.g., non-differentially expressed protein-coding genes

             grep ",\"protein_coding\"," ~/DATA/Data_Ute/Data_RNA-Seq_MKL-1+WaGa/results_2025_1/degenes/MKL-1_wt.EV_vs_parental-all.txt > MKL-1_wt.EV_vs_parental-all_protein_coding.txt
             grep ",\"protein_coding\"," ~/DATA/Data_Ute/Data_RNA-Seq_MKL-1+WaGa/results_2025_1/degenes/WaGa_wt.EV_vs_parental-all.txt > WaGa_wt.EV_vs_parental-all_protein_coding.txt

             cut -d',' -f1 MKL-1_wt.EV_vs_parental-all_protein_coding.txt | sort > all_genes.txt  #19239
             cut -d',' -f1 MKL-1_wt.EV_vs_parental-up_protein_coding.txt | sort > up_genes.txt  #5988
             cut -d',' -f1 MKL-1_wt.EV_vs_parental-down_protein_coding.txt | sort > down_genes.txt  #112
             cat up_genes.txt down_genes.txt | sort | uniq > regulated_genes.txt
             comm -23 all_genes.txt regulated_genes.txt > background_genes.txt
             grep -Ff background_genes.txt MKL-1_wt.EV_vs_parental-all_protein_coding.txt > MKL-1_wt.EV_vs_parental-background_protein_coding.txt  #13139

             cut -d',' -f1 WaGa_wt.EV_vs_parental-all_protein_coding.txt | sort > all_genes.txt  #19239
             cut -d',' -f1 WaGa_wt.EV_vs_parental-up_protein_coding.txt | sort > up_genes.txt  #6538
             cut -d',' -f1 WaGa_wt.EV_vs_parental-down_protein_coding.txt | sort > down_genes.txt  #93
             cat up_genes.txt down_genes.txt | sort | uniq > regulated_genes.txt
             comm -23 all_genes.txt regulated_genes.txt > background_genes.txt
             grep -Ff background_genes.txt WaGa_wt.EV_vs_parental-all_protein_coding.txt > WaGa_wt.EV_vs_parental-background_protein_coding.txt  #12608

             python extract_transcript_parts.py MKL-1_wt.EV_vs_parental-background_protein_coding.txt ~/REFs/gencode.v48.annotation.gtf ~/REFs/GRCh38.primary_assembly.genome.fa MKL-1_background
             python extract_transcript_parts.py WaGa_wt.EV_vs_parental-background_protein_coding.txt ~/REFs/gencode.v48.annotation.gtf ~/REFs/GRCh38.primary_assembly.genome.fa WaGa_background

             foreground.fasta: 你的目标（前景）序列，例如下调基因的 3′UTRs。
             background.fasta: 你的背景对照序列，例如未显著差异表达的基因的 3′UTRs。

     2. Run RBPmap separately on both sets (in total of 6 calculations)

         * Submit both sets of UTRs to RBPmap.
         * Use the same settings (e.g., “human genome”, “high stringency”, "Apply conservation filter" etc.)
         * Choose all RBPs
         * Download motif match outputs for both sets

     3. Count motif hits per RBP in each set

         You now have:
         For each RBP:
         a: number of target 3′ UTRs with a motif match
         b: number of background 3′ UTRs with a motif match
         c: total number of target 3′ UTRs
         d: total number of background 3′ UTRs

     4. Perform Fisher’s Exact Test per RBP

         For each RBP, construct a 2x2 table:

         Motif Present   Motif Absent
         Foreground (targets)    a   c - a
         Background  b   d - b

     5. Adjust p-values for multiple testing
     Use Benjamini-Hochberg (FDR) correction (e.g., in Python or R) across all RBPs tested.

     6.✅ Summary

         Step    Tool
         Prepare Database of RNA-binding motifs  ATtRACT
         3′ UTR extraction   extract_transcript_parts.py
         Motif scan  RBPmap or FIMO
         Count motif hits    Your own parser (Python or R)
         Fisher’s exact test scipy.stats or fisher.test()
         FDR correction  multipletests() or p.adjust()

     python rbp_enrichment.py rbpmap_downregulated.tsv rbpmap_background.tsv rbp_enrichment_results.csv

Quick Drop-In Plan (RBPmap Alternative with FIMO for motif scan)

 1. [ATtRACT + FIMO (MEME suite)]

     ATtRACT: Database of RNA-binding motifs.
     FIMO: Fast and scriptable motif scanning tool.

     #Download RBP motifs (PWM) from ATtRACT DB; Convert to MEME format (if needed); Use FIMO to scan UTR sequences

     grep "Homo_sapiens" ATtRACT_db.txt > attract_human.txt

     #cut -f12 attract_human.txt | sort | uniq > valid_ids.txt

     python filter_short_fasta.py ../Data_RNA-Seq_MKL-1+WaGa/motif_analysis/MKL-1_background.3UTR.fasta MKL-1_background.filtered.3UTR.fasta
     ✅ 筛选完成: 总序列 = 70650
     🧹 已移除过短序列 (<16 nt): 1760
     🟢 保留有效序列 (≥16 nt): 68890
     📁 新背景文件保存为: MKL-1_background.filtered.3UTR.fasta
     # 检查背景文件中有多少序列：
     grep -c "^>" MKL-1_background.filtered.3UTR.fasta
     68890
     # 检查背景 FIMO 命中的总序列数：
     cut -f3 fimo_background_MKL-1_background/fimo.tsv | sort | uniq | wc -l
     67841
     python filter_short_fasta.py ../Data_RNA-Seq_MKL-1+WaGa/motif_analysis/MKL-1_up.3UTR.fasta MKL-1_up.filtered.3UTR.fasta
     python filter_short_fasta.py ../Data_RNA-Seq_MKL-1+WaGa/motif_analysis/MKL-1_down.3UTR.fasta MKL-1_down.filtered.3UTR.fasta
     python filter_short_fasta.py ../Data_RNA-Seq_MKL-1+WaGa/motif_analysis/WaGa_background.3UTR.fasta WaGa_background.filtered.3UTR.fasta
     python filter_short_fasta.py ../Data_RNA-Seq_MKL-1+WaGa/motif_analysis/WaGa_up.3UTR.fasta WaGa_up.filtered.3UTR.fasta
     python filter_short_fasta.py ../Data_RNA-Seq_MKL-1+WaGa/motif_analysis/WaGa_down.3UTR.fasta WaGa_down.filtered.3UTR.fasta

     python convert_attract_pwm_to_meme.py

     fimo --thresh 1e-4 --oc fimo_foreground_MKL-1_down attract_human.meme ../Data_RNA-Seq_MKL-1+WaGa/motif_analysis/MKL-1_down.3UTR.fasta
     fimo --thresh 1e-4 --oc fimo_foreground_MKL-1_up attract_human.meme ../Data_RNA-Seq_MKL-1+WaGa/motif_analysis/MKL-1_up.3UTR.fasta
     fimo --thresh 1e-4 --oc fimo_background_MKL-1_background attract_human.meme ../Data_RNA-Seq_MKL-1+WaGa/motif_analysis/MKL-1_background.3UTR.fasta
     fimo --thresh 1e-4 --oc fimo_foreground_WaGa_down attract_human.meme ../Data_RNA-Seq_MKL-1+WaGa/motif_analysis/WaGa_down.3UTR.fasta
     fimo --thresh 1e-4 --oc fimo_foreground_WaGa_up attract_human.meme ../Data_RNA-Seq_MKL-1+WaGa/motif_analysis/WaGa_up.3UTR.fasta
     fimo --thresh 1e-4 --oc fimo_background_WaGa_background attract_human.meme ../Data_RNA-Seq_MKL-1+WaGa/motif_analysis/WaGa_background.3UTR.fasta

     #Explanation for the table from FIMO (Find Individual Motif Occurrences), which scans sequences to find statistically significant matches to known motifs (e.g., RNA or DNA binding sites).

     Column  Meaning
     motif_id    ID of the motif, as defined in the .meme file
     motif_alt_id    Alternative ID or name for the motif (may be blank or unused)
     sequence_name   Name of the sequence where the motif was found (e.g., gene
     start   Start position (1-based) of the motif match within the sequence
     stop    End position of the motif match
     strand  Strand on which the motif was found: + (forward) or - (reverse)
     score   Motif match score; higher scores indicate better matches
     p-value Statistical significance of the match (lower is more significant)
     q-value Adjusted p-value (False Discovery Rate corrected)
     matched_sequence    The actual sequence in the input that matches the motif

     ✅ Example Interpretation
     1338 ENSG00000134871|ENST00000714397|3UTR 103 114 + 23.4126 5.96e-08 0.111 GGAGAGAAGGGA
     motif_id: 1338 — a numeric ID from your motif file
     sequence_name: ENSG00000134871|ENST00000714397|3UTR — refers to the gene, transcript, and region (3′ UTR)
     start–stop: 103–114 — the motif occurs from position 103 to 114
     strand: + — found on the positive strand
     score: 23.41 — high score means strong motif match
     p-value: 5.96e-08 — very statistically significant
     q-value: 0.111 — FDR-corrected p-value
     matched_sequence: GGAGAGAAGGGA — the actual sequence match in the UTR

     💡 Tips
     You can map motif_id to RBP (RNA-binding protein) names using an annotation file like ATtRACT_db.txt.
     Typically, q-value < 0.05 is considered significant.
     Duplicate matches in different transcripts of the same gene may occur and are valid.
     Would you like help converting motif_id to RBP names for clarity?

     🧠 In most biological contexts:
         * Counting a motif as present multiple times because it's in several transcripts can inflate significance.
         * If you're using Fisher's exact test (as in enrichment), this transcript-level duplication can bias results.

     ⚠️ Caveat: If you're studying isoform-specific regulation, then transcript-level data may be valuable and shouldn't be collapsed. But for most general RBP enrichment or gene expression studies, the gene-level collapse is preferred.

     #Keep only one match per gene (based on Ensembl Gene ID like ENSG00000134871) for each RBP motif, even if multiple transcripts have hits.
     #python filter_fimo_best_per_gene.py --input fimo_foreground/fimo.tsv --output fimo_foreground/fimo.filtered.tsv
     convert_gtf_to_Gene_annotation_TSV_file.py  #generate gene_annotation.tsv
     python filter_fimo_best_per_gene_annotated.py \
     --input fimo_foreground_MKL-1_down/fimo.tsv \
     --annot Homo_sapiens.GRCh38.gene_annotation.tsv \
     --output_filtered fimo_foreground_MKL-1_down/fimo.filtered.tsv \
     --output_annotated fimo_foreground_MKL-1_down/fimo.filtered.annotated.tsv
     #21559
     python filter_fimo_best_per_gene_annotated.py \
     --input fimo_foreground_MKL-1_up/fimo.tsv \
     --annot Homo_sapiens.GRCh38.gene_annotation.tsv \
     --output_filtered fimo_foreground_MKL-1_up/fimo.filtered.tsv \
     --output_annotated fimo_foreground_MKL-1_up/fimo.filtered.annotated.tsv
     #(736661 rows)
     python filter_fimo_best_per_gene_annotated.py \
     --input fimo_background_MKL-1_background/fimo.tsv \
     --annot Homo_sapiens.GRCh38.gene_annotation.tsv \
     --output_filtered fimo_background_MKL-1_background/fimo.filtered.tsv \
     --output_annotated fimo_background_MKL-1_background/fimo.filtered.annotated.tsv
     #(1869075 rows)
     python filter_fimo_best_per_gene_annotated.py \
     --input fimo_foreground_WaGa_down/fimo.tsv \
     --annot Homo_sapiens.GRCh38.gene_annotation.tsv \
     --output_filtered fimo_foreground_WaGa_down/fimo.filtered.tsv \
     --output_annotated fimo_foreground_WaGa_down/fimo.filtered.annotated.tsv
     #(20364 rows)
     python filter_fimo_best_per_gene_annotated.py \
     --input fimo_foreground_WaGa_up/fimo.tsv \
     --annot Homo_sapiens.GRCh38.gene_annotation.tsv \
     --output_filtered fimo_foreground_WaGa_up/fimo.filtered.tsv \
     --output_annotated fimo_foreground_WaGa_up/fimo.filtered.annotated.tsv
     #(805634 rows)
     python filter_fimo_best_per_gene_annotated.py \
     --input fimo_background_WaGa_background/fimo.tsv \
     --annot Homo_sapiens.GRCh38.gene_annotation.tsv \
     --output_filtered fimo_background_WaGa_background/fimo.filtered.tsv \
     --output_annotated fimo_background_WaGa_background/fimo.filtered.annotated.tsv
     #(1811615 rows)

     python run_enrichment.py \
         --attract ATtRACT_db.txt \
         --fimo_fg fimo_foreground_MKL-1_up/fimo.filtered.tsv \
         --fimo_bg fimo_background_MKL-1_background/fimo.filtered.tsv \
         --output rbp_enrichment_MKL-1_up.csv \
         --strategy inclusive
     python run_enrichment.py \
         --attract ATtRACT_db.txt \
         --fimo_fg fimo_foreground_MKL-1_down/fimo.filtered.tsv \
         --fimo_bg fimo_background_MKL-1_background/fimo.filtered.tsv \
         --output rbp_enrichment_MKL-1_down.csv
     python run_enrichment.py \
         --attract ATtRACT_db.txt \
         --fimo_fg fimo_foreground_WaGa_up/fimo.filtered.tsv \
         --fimo_bg fimo_background_WaGa_background/fimo.filtered.tsv \
         --output rbp_enrichment_WaGa_up.csv
     python run_enrichment.py \
         --attract ATtRACT_db.txt \
         --fimo_fg fimo_foreground_WaGa_down/fimo.filtered.tsv \
         --fimo_bg fimo_background_WaGa_background/fimo.filtered.tsv \
         --output rbp_enrichment_WaGa_down.csv

     python plot_volcano.py --csv rbp_enrichment_MKL-1_up.csv --output MKL-1_volcano_up.pdf --title "Upregulated MKL-1"
     python plot_rbp_heatmap.py \
     --csvs rbp_enrichment_MKL-1_up.csv rbp_enrichment_MKL-1_down.csv \
     --labels Upregulated Downregulated \
     --output MKL-1_rbp_enrichment_heatmap.pdf

     #Column Meaning
     #a  Number of unique foreground UTRs hit by the RBP
     #b  Number of unique background UTRs hit by the RBP
     #c  Total number of foreground UTRs
     #d  Total number of background UTRs (⬅️ this is the value you're asking about)
     #p_value, fdr   From Fisher's exact test on enrichment

     #-- Get all genes the number 1621 refers to --
     #AGO2,1621,5050,5732,12987,1.0,1.0   #MKL-1_up
     #motif_ids are 414 and 399
     grep "^414" fimo.filtered.annotated.tsv > AGO2.txt
     grep "^399" fimo.filtered.annotated.tsv >> AGO2.txt
     cut -d$'\t' -f11 AGO2.txt | sort -u > AGO2_uniq.txt
     wc -l AGO2_uniq.txt
     #1621 AGO2_uniq.txt

     #工具 功能  关注点 应用场景
     FIMO    精确查找 motif 出现位置 motif 在什么位置出现   找出具体结合位点
     AME 统计 motif 富集情况   哪些 motif 在某组序列中更富集  比较 motif 是否显著出现更多

     如你还在做差异表达后的RBP富集分析，可以考虑先用 FIMO 扫描，再用你自己写的代码 + Fisher’s exact test 做类似 AME 的工作，或直接用 AME 做分析

     # Generate the attract_human.meme inkl. Gene_name!
     #python generate_named_meme.py pwm.txt attract_human.txt
     python generate_attract_human_meme.py pwm.txt ATtRACT_db.txt

     #ERROR during running ame --> DEBUG!
     #--control ../Data_RNA-Seq_MKL-1+WaGa/motif_analysis/WaGa_all.3UTR.fasta \
     ame --control --shuffle-- \
     --oc ame_out \
     --scoring avg \
     --method fisher --verbose 5 ../Data_RNA-Seq_MKL-1+WaGa/motif_analysis/WaGa_down.3UTR.fasta attract_human.meme

 2. GraphProt2 (ALTERNATIVE_TODO)

     ML-based tool using sequence + structure

     Pre-trained models for many RBPs

     ✅ Advantages:

     Local, GPU/CPU supported

     More biologically realistic (includes structure)

miRNAs motif analysis using ATtRACT + FIMO

 ✅ Goal

     * Extract their sequences
     * Generate a background set
     * Run RBP enrichment (e.g., with RBPmap or FIMO)
     * Get p-adjusted enrichment stats (e.g., Fisher + BH)

     Input_1. DE results (differential expression file from smallRNA-seq)
         #Input: up- and down-, all-regulated files
         #~/DATA/Data_Ute/Data_Ute_smallRNA_7/summaries_exo7/miRNAs/EV_vs_parental-up.txt  #83
         #~/DATA/Data_Ute/Data_Ute_smallRNA_7/summaries_exo7/miRNAs/EV_vs_parental-down.txt  #34
         #~/DATA/Data_Ute/Data_Ute_smallRNA_7/summaries_exo7/miRNAs/EV_vs_parental-all.txt  #1304
         ~/DATA/Data_Ute/Data_Ute_smallRNA_7/summaries_exo7/miRNAs/untreated_vs_parental_cells-up.txt  #66
         ~/DATA/Data_Ute/Data_Ute_smallRNA_7/summaries_exo7/miRNAs/untreated_vs_parental_cells-down.txt  #38
         ~/DATA/Data_Ute/Data_Ute_smallRNA_7/summaries_exo7/miRNAs/untreated_vs_parental_cells-all.txt  #1304
         #Format: 1st column = miRNA ID (e.g., hsa-miR-21-5p), optionally with other stats.

     Input_2. Reference FASTA (Reference sequences from miRBase or GENCODE)
         #From miRBase: https://mirbase.org/download/  https://mirbase.org/download/CURRENT/
         ##miRBase_v21
         #mature.fa.gz → contains mature miRNA sequences
         #hairpin.fa.gz → for pre-miRNAs

         cp ~/DATA/Data_Ute/Data_Ute_smallRNA_7/summaries_exo7/miRNAs/untreated_vs_parental_cells-*.txt .
         #"hsa-miR-3180|hsa-miR-3180-3p"
         #>hsa-miR-3180 MIMAT0018178 Homo sapiens miR-3180
         #UGGGGCGGAGCUUCCGGAG
         #>hsa-miR-3180-3p MIMAT0015058 Homo sapiens miR-3180-3p
         #UGGGGCGGAGCUUCCGGAGGCC

     5.1 (Optional, not used!)

         #python extract_miRNA_fasta.py EV_vs_parental-up.txt mature_v21.fa up_mature_miRNAs.fa --unmatched up_mature_unmatched.txt  #84+0
         #python extract_miRNA_fasta.py EV_vs_parental-up.txt hairpin_v21.fa up_precursor_miRNAs.fa --unmatched up_precursor_unmatched.txt  #0
         #python extract_miRNA_fasta.py EV_vs_parental-down.txt mature_v21.fa down_mature_miRNAs.fa --unmatched down_mature_unmatched.txt  #34+0
         #python extract_miRNA_fasta.py EV_vs_parental-down.txt hairpin_v21.fa down_precursor_miRNAs.fa --unmatched down_precursor_unmatched.txt  #0
         #python extract_miRNA_fasta.py EV_vs_parental-all.txt mature_v21.fa all_mature_miRNAs.fa --unmatched all_mature_unmatched.txt         #1304+16
         #python extract_miRNA_fasta.py EV_vs_parental-all.txt hairpin_v21.fa all_precursor_miRNAs.fa --unmatched all_precursor_unmatched.txt  #16
         python extract_miRNA_fasta.py untreated_vs_parental_cells-up.txt mature_v21.fa up_mature_miRNAs.fa --unmatched up_mature_unmatched.txt  #67+0
         python extract_miRNA_fasta.py untreated_vs_parental_cells-up.txt hairpin_v21.fa up_precursor_miRNAs.fa --unmatched up_precursor_unmatched.txt  #0
         python extract_miRNA_fasta.py untreated_vs_parental_cells-down.txt mature_v21.fa down_mature_miRNAs.fa --unmatched down_mature_unmatched.txt  #38+0
         python extract_miRNA_fasta.py untreated_vs_parental_cells-down.txt hairpin_v21.fa down_precursor_miRNAs.fa --unmatched down_precursor_unmatched.txt  #0
         python extract_miRNA_fasta.py untreated_vs_parental_cells-all.txt mature_v21.fa all_mature_miRNAs.fa --unmatched all_mature_unmatched.txt         #1304+16
         python extract_miRNA_fasta.py untreated_vs_parental_cells-all.txt hairpin_v21.fa all_precursor_miRNAs.fa --unmatched all_precursor_unmatched.txt  #16

     5.2 (Advanced)
         Extract Sequences + Background Set

         Inputs:
             * up_miRNA.txt and down_miRNA.txt: DE results (first column = miRNA name, e.g., hsa-miR-21-5p)
             * mature.fa or hairpin.fa from miRBase

         Outputs:
             * mirna_up.fa
             * mirna_down.fa
             * mirna_background.fa

         #Use all remaining miRNAs as background:
         python prepare_miRNA_sets.py untreated_vs_parental_cells-up.txt untreated_vs_parental_cells-down.txt mature_v21.fa mirna --full-background
         mv mirna_background.fa mirna_full-background.fa
         #Use random subset background. Note that the generated background has the number of maxsize(up, down), in the case is up (84 records):
         python prepare_miRNA_sets.py untreated_vs_parental_cells-up.txt untreated_vs_parental_cells-down.txt mature_v21.fa mirna
         # grep ">" mature_v21.fa | wc -l  #35828
         # grep ">" mirna_full-background.fa | wc -l  #35710-->35723
         # grep ">" mirna_up.fa | wc -l  #84
         # grep ">" mirna_down.fa | wc -l  #34
         # grep ">" mirna_background.fa | wc -l  #84-->67
         # #35,710 + 84 + 34 = 35,828

     🔬 What You Can Do Next
     Goal    Tool    Input
     * RBP motif enrichment in pre-miRNAs    RBPmap, FIMO, AME   up_precursor_miRNAs.fa
     * Motif comparison (up vs down miRNAs)  DREME, MEME, HOMER  Up/down mature miRNAs
     * Build background for enrichment   Random subset of other miRNAs   Filtered from hairpin.fa

     fimo --thresh 1e-4 --oc fimo_mirna_down attract_human.meme mirna_down.fa
     fimo --thresh 1e-4 --oc fimo_mirna_up attract_human.meme mirna_up.fa
     fimo --thresh 1e-4 --oc fimo_mirna_full-background attract_human.meme mirna_full-background.fa
     fimo --thresh 1e-4 --oc fimo_mirna_background attract_human.meme mirna_background.fa
     #END

     python filter_fimo_best_per_gene_annotated.py \
     --input fimo_mirna_down/fimo.tsv \
     --annot Homo_sapiens.GRCh38.gene_annotation.tsv \
     --output_filtered fimo_mirna_down/fimo.filtered.tsv \
     --output_annotated fimo_mirna_down/fimo.filtered.annotated.tsv  #21
     python filter_fimo_best_per_gene_annotated.py \
     --input fimo_mirna_up/fimo.tsv \
     --annot Homo_sapiens.GRCh38.gene_annotation.tsv \
     --output_filtered fimo_mirna_up/fimo.filtered.tsv \
     --output_annotated fimo_mirna_up/fimo.filtered.annotated.tsv  #48
     python filter_fimo_best_per_gene_annotated.py \
     --input fimo_mirna_full-background/fimo.tsv \
     --annot Homo_sapiens.GRCh38.gene_annotation.tsv \
     --output_filtered fimo_mirna_full-background/fimo.filtered.tsv \
     --output_annotated fimo_mirna_full-background/fimo.filtered.annotated.tsv  #896
     python filter_fimo_best_per_gene_annotated.py \
     --input fimo_mirna_background/fimo.tsv \
     --annot Homo_sapiens.GRCh38.gene_annotation.tsv \
     --output_filtered fimo_mirna_background/fimo.filtered.tsv \
     --output_annotated fimo_mirna_background/fimo.filtered.annotated.tsv  #57

     python run_enrichment_miRNAs.py \
         --attract ATtRACT_db.txt \
         --fimo_fg fimo_mirna_up/fimo.filtered.tsv \
         --fimo_bg fimo_mirna_full-background/fimo.filtered.tsv \
         --output rbp_enrichment_mirna_up.csv \
         --strategy inclusive
     python run_enrichment_miRNAs.py \
         --attract ATtRACT_db.txt \
         --fimo_fg fimo_mirna_down/fimo.filtered.tsv \
         --fimo_bg fimo_mirna_full-background/fimo.filtered.tsv \
         --output rbp_enrichment_mirna_down.csv \
         --strategy inclusive
     #python run_enrichment_miRNAs.py \
     #    --attract ATtRACT_db.txt \
     #    --fimo_fg fimo_mirna_up/fimo.filtered.tsv \
     #    --fimo_bg fimo_mirna_background/fimo.filtered.tsv \
     #    --output rbp_enrichment_mirna_up_on_subset-background.csv \
     #    --strategy inclusive
     #python run_enrichment_miRNAs.py \
     #    --attract ATtRACT_db.txt \
     #    --fimo_fg fimo_mirna_down/fimo.filtered.tsv \
     #    --fimo_bg fimo_mirna_background/fimo.filtered.tsv \
     #    --output rbp_enrichment_mirna_down_on_subset-background.csv \
     #    --strategy inclusive

     #FXR2   1 (hsa-miR-92b-5p)  1   1   118 0.0168067226890756  0.365546218487395
     #ORB2   1 (hsa-miR-4748)    1   1   118 0.0168067226890756  0.365546218487395

     #-- Get all genes the number 1621 refers to --
     grep "^FXR2" ATtRACT_db.txt
     #motif_ids is M020_0.6
     grep "^M020_0.6" fimo_mirna_up/fimo.filtered.annotated.tsv > FXR2.txt
     grep "^M020_0.6" fimo_mirna_up/fimo.filtered.annotated.tsv
     #cut -d$'\t' -f11 AGO2.txt | sort -u > AGO2_uniq.txt
     #wc -l AGO2_uniq.txt (1621 records)

     grep "^ORB2" ATtRACT_db.txt
     grep "^M120_0.6" fimo_mirna_up/fimo.filtered.annotated.tsv

RBP Enrichment from RBPmap Results (NOT implemented!) 🔹 Use RBPmap output (typically CSV or TSV) 🔹 Compare hit counts in input vs background 🔹 Perform Fisher’s exact test + Benjamini-Hochberg correction 🔹 Plot significantly enriched RBPs

     📁 Requirements
     You’ll need:

     File    Description
     rbpmap_up.tsv   RBPmap result file for upregulated set
     rbpmap_background.tsv   RBPmap result file for background set

     📝 These should have columns like:

     Motif Name or Protein

     Sequence Name or Sequence ID
     (If different, I’ll show you how to adjust.

     python analyze_rbpmap_enrichment.py rbpmap_up.tsv rbpmap_background.tsv enriched_up.csv enriched_up_plot.png

     ✅ Output
     enriched_up.csv
     RBP FG_hits BG_hits pval    padj    enriched
     ELAVL1  24  2   0.0001  0.003   ✅
     HNRNPA1 15  10  0.04    0.06    ❌

     enriched_up_plot.png
     Barplot showing top significant RBPs (lowest FDR)

     🧰 Customization Options
     Would you like:

         * Support for multiple RBPmap files at once?
         * To match by RBP family?
         * A full report (PDF/HTML) of top hits?
         * Let me know, and I’ll tailor the next script!

The planned pipeline is very reasonable and scientifically sound for RBP (RNA-binding protein) enrichment using miRNAs as input for FIMO motif scanning and downstream analysis.

 Here’s a breakdown of why your steps make sense, plus a few important suggestions:

 ✅ YOUR PIPELINE LOGIC (EXCELLENT)
 1. Motif Scanning with FIMO

 fimo --thresh 1e-4 --oc fimo_mirna_* attract_human.meme mirna_*.fa
 ✔️ Uses ATtRACT motifs to scan miRNA sequences.

 ✔️ Applies a reasonable p-value threshold (1e-4) to detect motif hits.

 📌 If you want more sensitivity (but more noise), you could try 1e-3; for higher stringency, 1e-5.

 2. Filter FIMO hits to best per gene and annotate

 python filter_fimo_best_per_gene_annotated.py --input ... --annot ... --output ...
 ✔️ Filters redundant hits.

 ✔️ Annotates with gene symbols or other metadata — essential for interpretability.

 3. RBP Enrichment

 python run_enrichment.py --fimo_fg ... --fimo_bg ... --attract ... --output ...
 ✔️ Uses foreground (up/down-regulated) and background (non-differential) miRNAs.

 ✔️ The --strategy inclusive is typical and includes all motif hits, not just strongest per gene or region.

 ✔️ Running up and down separately = ✅ best practice.

 🔍 MINOR IMPROVEMENTS / SUGGESTIONS
 ✅ Use the same background for both up and down analyses:
 You already are — good!

 This ensures your enrichment comparison is balanced and interpretable.

 ⚠️ Check motif directionality (optional):
 Some RBPs are strand-specific — but since miRNAs are single-stranded, and your FASTA is from mature sequences, you’re probably okay.

 ✅ Consider including summary stats:
 How many motifs hit in up/down/background?

 How many unique RBPs?

 This helps interpret enrichment p-values.

 🧪 Statistical validation (optional but helpful):
 If you have many miRNAs (e.g. >30), consider bootstrapping enrichment from random subsets of the background to assess robustness.

 ✅ FINAL VERDICT
 Your pipeline is:

 ✔️ Technically correct

 ✔️ Biologically appropriate

 ✔️ Reproducible and clear

 🧠 Well-structured for miRNA-RBP motif enrichment

Why background differs between RNA-seq and miRNA motif analysis:

 In RNA-seq differential expression analysis, especially for gene-level enrichment (e.g., GO or pathway analysis), the background typically includes all genes detected or expressed, not just a random subset. The logic is:

 ✅ “If a gene was testable (i.e., expressed above a threshold), it should be part of the universe we consider when evaluating enrichment.”

 This is because the statistical model assumes:

 Your universe is all genes tested.

 You're asking: Are my differentially expressed genes overrepresented in some category, compared to all testable genes?

 🔬 Why a random subset background was used in the miRNA motif pipeline (e.g., RBP enrichment):
 In the prepare_miRNA_sets.py script, the subset was used for one reason only:

 📉 To balance the number of background sequences relative to the foreground (up/down) — especially for statistical tests like Fisher's exact test.

 But this is optional and not required for correctness. A larger background improves statistical power, but must match distributional properties (e.g., GC content, sequence length) to avoid bias.

 ✅ So: Should you use all non-differentially expressed miRNAs as background?
 Yes, that is often more appropriate, assuming:

 You have a full set of detectable/testable miRNAs (e.g., all from the FASTA).

 You exclude the foreground (up/down) sets.

 The background matches the general properties of the foreground set.

RBP enrichments via FIMO (Another description; it is the same to the workflow in the point 4)

 1. Collect the 3′ UTR sequences: Use the 3UTR.fasta file generated earlier, filtered to protein-coding and downregulated genes.

 2. Prepare Motif Database (MEME format)

     * ATtRACT: https://attract.cnic.es
     * RBPDB: http://rbpdb.ccbr.utoronto.ca
     * Ray2013 (CISBP-RNA motifs) — available via MEME Suite
     * [RBPmap motifs (if downloadable)]
     #Example format: rbp_motifs.meme

 2. Run FIMO to Scan for RBP Motifs (Similar to RBPmap)

     fimo --oc fimo_up rbp_motifs.meme mirna_up.fa
     fimo --oc fimo_down rbp_motifs.meme mirna_down.fa
     fimo --oc fimo_background rbp_motifs.meme mirna_background.fa
     #This produces fimo.tsv in each output folder.

 3. Run RBP motif enrichment using MEME Suite using AME (Analysis of Motif Enrichment). Note that FIMO+run_enrichment.py=AME, however, directly using AME returns ERROR:

     ame \
     --control control_3UTRs.fasta \
     --oc ame_out \
     --scoring avg \
     --method fisher \
     3UTR.fasta \
     rbp_motifs.meme

     Where:

     * 3UTR.fasta = your downregulated genes’ 3′ UTRs
     * control_3UTRs.fasta = background UTRs (e.g., random protein-coding genes not downregulated)
     * rbp_motifs.meme = motif file from RBPDB or Ray2013

 4. Interpret Results: Output includes RBP motifs enriched in your downregulated mRNAs' 3′ UTRs.

     You can then link enriched RBPs to known interactions with your upregulated miRNAs, or explore their regulatory roles.

 5. ✅ Bonus: Predict Which mRNAs Are Targets of Your miRNAs

     Use tools like: miRanda, TargetScan, miRDB

     Then intersect predicted targets with your downregulated genes to identify likely functional interactions.

 6. Summary

     Goal    Input   Tool / Approach
     RBP enrichment  3UTR.fasta of downregulated genes   AME with RBP motifs
     Background/control  3′ UTRs from non-differential or upregulated genes
     Link miRNA to targets   Use TargetScan / miRanda    Intersect with down genes

 7. Would you like:

     * Ready-to-use RBP motif .meme file?
     * Script to generate background sequences?
     * Visualization options for the enrichment results?

Other options to get sequences of 3UTR, 5UTR, CDS and mRNA transcripts

 ✅ Option 2: Use Ensembl BioMart (web-based, no coding) --> Lasting too long!

     Go to Ensembl BioMart https://www.ensembl.org/biomart/martview/7b826bcbd0cec79021977f8dc12a8f61

     Select:

     Database: Ensembl Genes
     Dataset: Homo sapiens genes (GRCh38 or latest)

     Click on “Filters” → expand Region or Gene to limit your selection (optional).
     Click on “Attributes”:
     Under Sequences, check:
     Sequences
     3' UTR sequences

     Optionally add gene IDs, transcript IDs, etc.

     Click “Results” to view/download the FASTA of 3' UTRs.

 ✅ Option 3: Use GENCODE (precompiled annotations) and gffread

     Use a tool like gffread (from the Cufflinks or gffread package) to extract 3' UTRs:

         #gffread gencode.v44.annotation.gtf -g GRCh38.primary_assembly.genome.fa -w all_utrs.fa -U
         #gffread -w three_prime_utrs.fa -g GRCh38.fa -x cds.fa -y proteins.fa -U -F gencode.gtf

         grep -P "\tthree_prime_utr\t" gencode.v48.annotation.gtf > three_prime_utrs.gtf
         gtf2bed < three_prime_utrs.gtf > three_prime_utrs.bed
         bedtools getfasta -fi GRCh38.primary_assembly.genome.fa -bed three_prime_utrs.bed -name -s > three_prime_utrs.fa

         gffread gencode.v48.annotation.gtf -g GRCh38.primary_assembly.genome.fa -U -w all_with_utrs.fa

     Add -U flag to extract UTRs, and filter post hoc for only 3' UTRs if needed.

 ✅ Option 4: Use Bioconductor in R (UCSC-ID, not suitable!)

     # Install if not already installed
     if (!requireNamespace("BiocManager", quietly = TRUE))
         install.packages("BiocManager")
     BiocManager::install("GenomicFeatures")
     BiocManager::install("txdbmaker")
     #sudo apt-get update
     #sudo apt-get install libmariadb-dev
     #(optional)sudo apt-get install libmysqlclient-dev
     install.packages("RMariaDB")

     # Load library
     library(GenomicFeatures)

     # Create TxDb object for human genome
     txdb <- txdbmaker::makeTxDbFromUCSC(genome="hg38", tablename="refGene")

     # Extract 3' UTRs by transcript
     utr3 <- threeUTRsByTranscript(txdb, use.names=TRUE)

 # View or export as needed

 ✅ Option 5: Extract 3′ UTRs Using UCSC Table Browser (GUI method)
     🔗 Website:
     UCSC Table Browser

     🔹 Step-by-Step Instructions
     1. Set the basic parameters:
     Clade: Mammal

     Genome: Human

     Assembly: GRCh38/hg38

     Group: Genes and Gene Predictions

     Track: GENCODE v44 (or latest)

     Table: knownGene or wgEncodeGencodeBasicV44

     Choose knownGene for RefSeq-like models or wgEncodeGencodeBasicV44 for GENCODE

     2. Region:
     Select: genome (default)

     3. Output format:
     Select: sequence

     4. Click "get output"
     🔹 Sequence Retrieval Options:
     On the next page (after clicking "get output"), you’ll see sequence options.

     Configure as follows:
     ✅ Output format: FASTA

     ✅ Which part of the gene: Select only
     → UTRs → 3' UTR only

     ✅ Header options: choose if you want gene name,

⚡️ Bonus: Combine with miRNA-mRNA predictions

 Once you have RBPs enriched in downregulated mRNAs, you can intersect:
     * Which RBPs overlap miRNA binding regions (e.g., via CLIPdb or POSTAR)
     * Check if miRNAs and RBPs compete or co-bind
 This can lead to identifying miRNA-RBP regulatory modules.

Reports

Please find attached the results of the RNA-binding protein (RBP) enrichment analysis using FIMO and the ATtRACT motif database, along with a brief description of the procedures used for both the 3′ UTR-based analysis (RNA-seq) and the miRNA-based analysis (small RNA-seq).

    1. RBP Motif Enrichment from RNA-seq (3′ UTRs)

    We focused on 3′ UTRs, as they are key regulatory regions for RBPs. Sequences shorter than 16 nucleotides were excluded. Using FIMO (from the MEME suite) with motifs from the ATtRACT database, we scanned both foreground and background 3′ UTR sets to identify motif occurrences.

    Foreground: Differentially expressed transcripts (e.g., MKL-1 up/down, WaGa up/down)
    Background: All non-differentially expressed transcripts

    Analysis: Fisher’s exact test was used to assess motif enrichment; p-values were adjusted using the Benjamini–Hochberg method.

    Output files (RNA-seq):

        * rbp_enrichment_MKL-1_down.xlsx / .png
        * rbp_enrichment_MKL-1_up.xlsx / .png
        * rbp_enrichment_WaGa_down.xlsx / .png
        * rbp_enrichment_WaGa_up.xlsx / .png

    2. RBP Motif Enrichment from Small RNA-seq (miRNAs)

    This analysis focused on differentially expressed miRNAs, using either mature miRNA sequences from miRBase. We scanned for RBP binding motifs within these sequences using FIMO and assessed motif enrichment relative to background sets.

    Foreground: DE miRNAs (up/down) from small RNA-seq comparisons
    Background: All other miRNAs from miRBase

    Analysis: FIMO was used with --thresh 1e-4, followed by annotation and filtering. Enrichment was assessed using Fisher’s test + BH correction.

    Output files (miRNAs):

        * rbp_enrichment_mirna_down.xlsx
        * rbp_enrichment_mirna_up.xlsx

    How to Interpret the Numbers
    Each row in the result tables represents one RBP and its enrichment statistics:

    a: foreground genes/sequences with the motif
    b: background genes/sequences with the motif
    c: total number of foreground genes/sequences
    d: total number of background genes/sequences

    These values are used to compute p-values and FDRs.

    For example, in rbp_enrichment_MKL-1_up.xlsx, AGO2 has a = 1621, meaning FIMO detected AGO2 motifs in 1,621 genes in the MKL-1 upregulated set. These genes are listed in AGO2_uniq.txt.

    Similarly, for the miRNA analysis (e.g., rbp_enrichment_mirna_up.xlsx and rbp_enrichment_mirna_down.xlsx), the numbers represent counts of unique miRNAs with at least one significant motif hit. As examples, I calculated the detailed membership for FXR2 and ORB2.

Post-processing of DAMIAN results

Leave a reply

Prepare input raw data

 # -- Ringversuch --
 ~/DATA/Data_Damian/241213_VH00358_120_AAG523FM5_Ringversuch
 ln ../241213_VH00358_120_AAG523FM5_Ringversuch/p20579/01_RV1_DNA_S1_R1_001.fastq.gz RV1_DNA_R1.fastq.gz
 ln ../241213_VH00358_120_AAG523FM5_Ringversuch/p20579/01_RV1_DNA_S1_R2_001.fastq.gz RV1_DNA_R2.fastq.gz
 ln ../241213_VH00358_120_AAG523FM5_Ringversuch/p20580/02_RV2_DNA_S2_R1_001.fastq.gz RV2_DNA_R1.fastq.gz
 ln ../241213_VH00358_120_AAG523FM5_Ringversuch/p20580/02_RV2_DNA_S2_R2_001.fastq.gz RV2_DNA_R2.fastq.gz
 ln ../241213_VH00358_120_AAG523FM5_Ringversuch/p20581/03_RV3_DNA_S3_R1_001.fastq.gz RV3_DNA_R1.fastq.gz
 ln ../241213_VH00358_120_AAG523FM5_Ringversuch/p20581/03_RV3_DNA_S3_R2_001.fastq.gz RV3_DNA_R2.fastq.gz
 ln ../241213_VH00358_120_AAG523FM5_Ringversuch/p20582/04_RV4_DNA_S4_R1_001.fastq.gz RV4_DNA_R1.fastq.gz
 ln ../241213_VH00358_120_AAG523FM5_Ringversuch/p20582/04_RV4_DNA_S4_R2_001.fastq.gz RV4_DNA_R2.fastq.gz
 ln ../241213_VH00358_120_AAG523FM5_Ringversuch/p20583/05_RV5_DNA_S5_R1_001.fastq.gz RV5_DNA_R1.fastq.gz
 ln ../241213_VH00358_120_AAG523FM5_Ringversuch/p20583/05_RV5_DNA_S5_R2_001.fastq.gz RV5_DNA_R2.fastq.gz
 ln ../241213_VH00358_120_AAG523FM5_Ringversuch/p20584/06_RV6_DNA_S6_R1_001.fastq.gz RV6_DNA_R1.fastq.gz
 ln ../241213_VH00358_120_AAG523FM5_Ringversuch/p20584/06_RV6_DNA_S6_R2_001.fastq.gz RV6_DNA_R2.fastq.gz
 ln ../241213_VH00358_120_AAG523FM5_Ringversuch/p20585/07_RV1_RNA_S7_R1_001.fastq.gz RV1_RNA_R1.fastq.gz
 ln ../241213_VH00358_120_AAG523FM5_Ringversuch/p20585/07_RV1_RNA_S7_R2_001.fastq.gz RV1_RNA_R2.fastq.gz
 ln ../241213_VH00358_120_AAG523FM5_Ringversuch/p20586/08_RV2_RNA_S8_R1_001.fastq.gz RV2_RNA_R1.fastq.gz
 ln ../241213_VH00358_120_AAG523FM5_Ringversuch/p20586/08_RV2_RNA_S8_R2_001.fastq.gz RV2_RNA_R2.fastq.gz
 ln ../241213_VH00358_120_AAG523FM5_Ringversuch/p20587/09_RV3_RNA_S9_R1_001.fastq.gz RV3_RNA_R1.fastq.gz
 ln ../241213_VH00358_120_AAG523FM5_Ringversuch/p20587/09_RV3_RNA_S9_R2_001.fastq.gz RV3_RNA_R2.fastq.gz
 ln ../241213_VH00358_120_AAG523FM5_Ringversuch/p20588/10_RV4_RNA_S10_R1_001.fastq.gz RV4_RNA_R1.fastq.gz
 ln ../241213_VH00358_120_AAG523FM5_Ringversuch/p20588/10_RV4_RNA_S10_R2_001.fastq.gz RV4_RNA_R2.fastq.gz
 ln ../241213_VH00358_120_AAG523FM5_Ringversuch/p20589/11_RV5_RNA_S11_R1_001.fastq.gz RV5_RNA_R1.fastq.gz
 ln ../241213_VH00358_120_AAG523FM5_Ringversuch/p20589/11_RV5_RNA_S11_R2_001.fastq.gz RV5_RNA_R2.fastq.gz
 ln ../241213_VH00358_120_AAG523FM5_Ringversuch/p20590/12_RV6_RNA_S12_R1_001.fastq.gz RV6_RNA_R1.fastq.gz
 ln ../241213_VH00358_120_AAG523FM5_Ringversuch/p20590/12_RV6_RNA_S12_R2_001.fastq.gz RV6_RNA_R2.fastq.gz

Prepare virus database and select 8 representatives for the eight given viruses from the database

 # -- Download all genomes --
 # enterovirus D68
 # HSV-1
 # HSV-2
 # Influenza A H1N1
 # Cytomegalovirus AD169 (The genome size of Human herpesvirus 5 (HHV-5) — more commonly known as Cytomegalovirus (CMV))
 # Influenza A H3N2
 # Monkeypox
 # HIV-1

 esearch -db nucleotide -query "txid42789[Organism:exp]" | efetch -format fasta -email j.huang@uke.de > genome_42789_ncbi.fasta
 python ~/Scripts/filter_fasta.py genome_42789_ncbi.fasta complete_42789_ncbi.fasta    #899
 esearch -db nucleotide -query "txid10298[Organism:exp]" | efetch -format fasta -email j.huang@uke.de > genome_10298_ncbi.fasta
 python ~/Scripts/filter_fasta.py genome_10298_ncbi.fasta complete_10298_ncbi.fasta    #162
 esearch -db nucleotide -query "txid10310[Organism:exp]" | efetch -format fasta -email j.huang@uke.de > genome_10310_ncbi.fasta
 python ~/Scripts/filter_fasta.py genome_10310_ncbi.fasta complete_10310_ncbi.fasta    #33
 esearch -db nucleotide -query "txid1323429[Organism:exp]" | efetch -format fasta -email j.huang@uke.de > genome_1323429_ncbi.fasta
 python ~/Scripts/filter_fasta2.py genome_1323429_ncbi.fasta complete_1323429_ncbi.fasta    #465
 esearch -db nucleotide -query "txid10360[Organism:exp]" | efetch -format fasta -email j.huang@uke.de > genome_10360_ncbi.fasta
 python ~/Scripts/filter_fasta2.py genome_10360_ncbi.fasta complete_10360_ncbi.fasta    #1
 esearch -db nucleotide -query "txid41857[Organism:exp]" | efetch -format fasta -email j.huang@uke.de > genome_41857_ncbi.fasta
 python ~/Scripts/filter_fasta2.py genome_41857_ncbi.fasta complete_41857_ncbi.fasta    #120
 esearch -db nucleotide -query "txid10244[Organism:exp]" | efetch -format fasta -email j.huang@uke.de > genome_10244_ncbi.fasta
 python ~/Scripts/filter_fasta.py genome_10244_ncbi.fasta complete_10244_ncbi.fasta    #2525
 esearch -db nucleotide -query "txid11676[Organism:exp]" | efetch -format fasta -email j.huang@uke.de > genome_11676_ncbi.fasta
 python ~/Scripts/filter_fasta.py genome_11676_ncbi.fasta complete_11676_ncbi.fasta    #485995-->7416

 # ---- Alternatively, using ENA instead to download the genomes ----
 # https://www.ebi.ac.uk/ena/browser/view/11676 (1138065 records)
 # #Click "Sequence" and download "Counts" (1132648) and "Taxon descendants count" (1138065) if there is enough time! Downloading time points is 09.04.2025.
 # python ~/Scripts/filter_fasta.py  ena_11676_sequence.fasta complete_11676_ena.fasta  #1138065-->????

 # Virus Name    NCBI TaxID
 # ------------------------
 # Enterovirus D68   42789                             >PQ895337.1 Enterovirus D68 isolate SH2024-25870
 # HSV-1 (Herpes Simplex Virus 1)    10298             >PQ569920.1 Human alphaherpesvirus 1 isolate MacIntyre, complete genome
 # HSV-2 (Herpes Simplex Virus 2)    10310             >OM370995.1 Human alphaherpesvirus 2 strain G, complete genome

     samtools faidx complete_42789_ncbi.fasta PQ895337.1 > Enterovirus_D68_isolate_SH2024-25870.fasta
     samtools faidx complete_10298_ncbi.fasta PQ569920.1 > HSV-1_isolate_MacIntyre.fasta
     samtools faidx complete_10310_ncbi.fasta OM370995.1 > HSV-2_strain_G.fasta

 # Influenza A virus (H1N1)  1323429
 # The Influenza A virus (H1N1) genome is composed of eight single-stranded negative-sense RNA segments, and the total genome size is approximately 13,500 nucleotides (13.5 kb).
 # Segment   Gene    Protein Product(s)  Approx. Length (nt)
 # 1 PB2 Polymerase basic 2  ~2,341
 # 2 PB1 Polymerase basic 1, PB1-F2  ~2,341
 # 3 PA  Polymerase acidic   ~2,233
 # 4 HA  Hemagglutinin   ~1,778
 # 5 NP  Nucleoprotein   ~1,565
 # 6 NA  Neuraminidase   ~1,413
 # 7 M   Matrix proteins (M1, M2)    ~1,027
 # 8 NS  Nonstructural (NS1, NS2)    ~890

 # >LC662544.1 Influenza A virus (H1N1) A/PR/8/34 NEP, NS1 genes for nonstructural protein 2, nonstructural protein 1, complete cds
 # >LC662543.1 Influenza A virus (H1N1) A/PR/8/34 M2, M1 genes for matrix protein 2, matrix protein 1, complete cds
 # >LC662542.1 Influenza A virus (H1N1) A/PR/8/34 NA gene for neuraminidase, complete cds
 # >LC662541.1 Influenza A virus (H1N1) A/PR/8/34 NP gene for nucleoprotein, complete cds
 # >LC662540.1 Influenza A virus (H1N1) A/PR/8/34 HA gene for haemagglutinin, complete cds
 # >LC662539.1 Influenza A virus (H1N1) A/PR/8/34 PA, PA-X genes for polymerase PA, PA-X protein, complete cds
 # >LC662538.1 Influenza A virus (H1N1) A/PR/8/34 PB1, PB1-F2 genes for polymerase PB1, PB1-F2 protein, complete cds
 # >LC662537.1 Influenza A virus (H1N1) A/PR/8/34 PB2 gene for polymerase PB2, complete cds

     samtools faidx complete_1323429_ncbi.fasta LC662537.1 > H1N1_A-PR-8-34_PB2.fasta
     samtools faidx complete_1323429_ncbi.fasta LC662538.1 > H1N1_A-PR-8-34_PB1.fasta
     samtools faidx complete_1323429_ncbi.fasta LC662539.1 > H1N1_A-PR-8-34_PA.fasta
     samtools faidx complete_1323429_ncbi.fasta LC662540.1 > H1N1_A-PR-8-34_HA.fasta
     samtools faidx complete_1323429_ncbi.fasta LC662541.1 > H1N1_A-PR-8-34_NP.fasta
     samtools faidx complete_1323429_ncbi.fasta LC662542.1 > H1N1_A-PR-8-34_NA.fasta
     samtools faidx complete_1323429_ncbi.fasta LC662543.1 > H1N1_A-PR-8-34_M.fasta
     samtools faidx complete_1323429_ncbi.fasta LC662544.1 > H1N1_A-PR-8-34_NS.fasta

 # Human cytomegalovirus AD169   10360

 # Influenza A virus (H3N2)  41857

 # >LC817411.1 Influenza A virus H3N2 A_Fukushima_OR808_2023 RNA, seqment 8, complete sequence
 # >LC817410.1 Influenza A virus H3N2 A_Fukushima_OR808_2023 RNA, seqment 7, complete sequence
 # >LC817409.1 Influenza A virus H3N2 A_Fukushima_OR808_2023 RNA, seqment 6, complete sequence
 # >LC817408.1 Influenza A virus H3N2 A_Fukushima_OR808_2023 RNA, seqment 5, complete sequence
 # >LC817407.1 Influenza A virus H3N2 A_Fukushima_OR808_2023 RNA, seqment 4, complete sequence
 # >LC817406.1 Influenza A virus H3N2 A_Fukushima_OR808_2023 RNA, seqment 3, complete sequence
 # >LC817405.1 Influenza A virus H3N2 A_Fukushima_OR808_2023 RNA, seqment 2, complete sequence
 # >LC817404.1 Influenza A virus H3N2 A_Fukushima_OR808_2023 RNA, seqment 1, complete sequence

     samtools faidx complete_41857_ncbi.fasta LC817404.1 > H3N2_A-Fukushima-OR808-2023_PB2.fasta
     samtools faidx complete_41857_ncbi.fasta LC817405.1 > H3N2_A-Fukushima-OR808-2023_PB1.fasta
     samtools faidx complete_41857_ncbi.fasta LC817406.1 > H3N2_A-Fukushima-OR808-2023_PA.fasta
     samtools faidx complete_41857_ncbi.fasta LC817407.1 > H3N2_A-Fukushima-OR808-2023_HA.fasta
     samtools faidx complete_41857_ncbi.fasta LC817408.1 > H3N2_A-Fukushima-OR808-2023_NP.fasta
     samtools faidx complete_41857_ncbi.fasta LC817409.1 > H3N2_A-Fukushima-OR808-2023_NA.fasta
     samtools faidx complete_41857_ncbi.fasta LC817410.1 > H3N2_A-Fukushima-OR808-2023_M.fasta
     samtools faidx complete_41857_ncbi.fasta LC817411.1 > H3N2_A-Fukushima-OR808-2023_NS.fasta

 # Monkeypox virus   10244: >OP689666.1 Monkeypox virus isolate MPXV/Germany/2022/RKI513, complete genome
     samtools faidx complete_10244_ncbi.fasta OP689666.1 > Monkeypox_isolate_MPXV-Germany-2022-RKI513.fasta

 # Human immunodeficiency virus 1    11676: >AJ866558.1 Human immunodeficiency virus 1 complete genome, isolate 01IC-PCI127
     samtools faidx complete_11676_ncbi.fasta AJ866558.1 >  HIV-1_isolate_01IC-PCI127.fasta

 # -- Selected genomes saved in the fasta-files --
 # Enterovirus_D68_isolate_SH2024-25870.fasta
 # HSV-1_isolate_MacIntyre.fasta
 # HSV-2_strain_G.fasta
 # H1N1_A-PR-8-34_PB2.fasta
 # H1N1_A-PR-8-34_PB1.fasta
 # H1N1_A-PR-8-34_PA.fasta
 # H1N1_A-PR-8-34_HA.fasta
 # H1N1_A-PR-8-34_NP.fasta
 # H1N1_A-PR-8-34_NA.fasta
 # H1N1_A-PR-8-34_M.fasta
 # H1N1_A-PR-8-34_NS.fasta
 # Human_cytomegalovirus_strain_AD169.fasta
 # H3N2_A-Fukushima-OR808-2023_PB2.fasta
 # H3N2_A-Fukushima-OR808-2023_PB1.fasta
 # H3N2_A-Fukushima-OR808-2023_PA.fasta
 # H3N2_A-Fukushima-OR808-2023_HA.fasta
 # H3N2_A-Fukushima-OR808-2023_NP.fasta
 # H3N2_A-Fukushima-OR808-2023_NA.fasta
 # H3N2_A-Fukushima-OR808-2023_M.fasta
 # H3N2_A-Fukushima-OR808-2023_NS.fasta
 # Monkeypox_isolate_MPXV-Germany-2022-RKI513.fasta
 # HIV-1_isolate_01IC-PCI127.fasta

(Optional) Run the first round of vrap (–virus==viruses_selected.fasta)

 ln -s ~/Tools/vrap/ .
 mamba activate /home/jhuang/miniconda3/envs/vrap

 cd ~/DATA/Data_Damian/vrap_Ringversuch
 cat complete_10244_ncbi.fasta complete_10298_ncbi.fasta complete_10310_ncbi.fasta complete_1323429_ncbi.fasta complete_10360_ncbi.fasta complete_41857_ncbi.fasta complete_10244_ncbi.fasta complete_11676_ncbi.fasta > viruses_selected.fasta

 #Run vrap (first round): replace --virus to the specific taxonomy (e.g. viruses_selected.fasta) --> change virus_user_db --> specific_bacteria_user_db
 (vrap) for sample in RV1_DNA RV2_DNA RV3_DNA RV4_DNA RV5_DNA RV6_DNA  RV1_RNA RV2_RNA RV3_RNA RV4_RNA RV5_RNA RV6_RNA; do
     vrap/vrap.py  -1 ${sample}_R1.fastq.gz -2 ${sample}_R2.fastq.gz  -o vrap_${sample} --bt2idx=/home/jhuang/REFs/genome --host=/home/jhuang/REFs/genome.fa --virus=/home/jhuang/DATA/Data_Damian/vrap_Ringversuch/viruses_selected.fasta --nt=/mnt/nvme1n1p1/blast/nt --nr=/mnt/nvme1n1p1/blast/nr  -t 100 -l 200  -g
 done

Run the second round of vrap (–host==${virus}.fasta)

 cat Enterovirus_D68_isolate_SH2024-25870.fasta HSV-1_isolate_MacIntyre.fasta HSV-2_strain_G.fasta H1N1_A-PR-8-34_PB2.fasta H1N1_A-PR-8-34_PB1.fasta H1N1_A-PR-8-34_PA.fasta H1N1_A-PR-8-34_HA.fasta H1N1_A-PR-8-34_NP.fasta H1N1_A-PR-8-34_NA.fasta H1N1_A-PR-8-34_M.fasta H1N1_A-PR-8-34_NS.fasta Human_cytomegalovirus_strain_AD169.fasta H3N2_A-Fukushima-OR808-2023_PB2.fasta H3N2_A-Fukushima-OR808-2023_PB1.fasta H3N2_A-Fukushima-OR808-2023_PA.fasta H3N2_A-Fukushima-OR808-2023_HA.fasta H3N2_A-Fukushima-OR808-2023_NP.fasta H3N2_A-Fukushima-OR808-2023_NA.fasta H3N2_A-Fukushima-OR808-2023_M.fasta H3N2_A-Fukushima-OR808-2023_NS.fasta Monkeypox_isolate_MPXV-Germany-2022-RKI513.fasta HIV-1_isolate_01IC-PCI127.fasta > viruses_representative.fasta

 # Run vrap (second round): selecte some representative viruses from the generated Excel-files generated by the last step as --host
 (vrap) for sample in RV1_DNA RV2_DNA RV3_DNA RV4_DNA RV5_DNA RV6_DNA  RV1_RNA RV2_RNA RV3_RNA RV4_RNA RV5_RNA RV6_RNA; do
     vrap/vrap_until_bowtie2.py  -1 ${sample}_R1.fastq.gz -2 ${sample}_R2.fastq.gz  -o vrap_${sample}_on_representatives --host /home/jhuang/DATA/Data_Damian/vrap_Ringversuch/viruses_representative.fasta   -t 100 -l 200  --gbt2 --noblast
 done

Generate the mapping statistics for the sam-files generated from last step

 for sample in RV1_DNA RV2_DNA RV3_DNA RV4_DNA RV5_DNA RV6_DNA  RV1_RNA RV2_RNA RV3_RNA RV4_RNA RV5_RNA RV6_RNA; do
     echo "-----${sample}_on_representatives------" >> LOG_mapping
     #cd vrap_${sample}_on_${virus}/bowtie
     cd vrap_${sample}_on_representatives/bowtie
     # Rename and convert SAM to BAM
     mv mapped mapped.sam 2>> ../../LOG_mapping
     samtools view -S -b mapped.sam > mapped.bam 2>> ../../LOG_mapping
     samtools sort mapped.bam -o mapped_sorted.bam 2>> ../../LOG_mapping
     samtools index mapped_sorted.bam 2>> ../../LOG_mapping
     # Write flagstat output to log (go up two levels to write correctly)
     samtools flagstat mapped_sorted.bam >> ../../LOG_mapping 2>&1
     cd ../..
 done

 #draw some plots for some representative isolates which found in the first round (see Excel-file).
 samtools depth -m 0 -a mapped_sorted.bam > coverage.txt
 grep "PQ895337.1" coverage.txt > PQ895337_coverage.txt
 grep "PQ569920.1" coverage.txt > PQ569920_coverage.txt

         import pandas as pd
         import matplotlib.pyplot as plt

         # Load coverage data
         df = pd.read_csv("PQ895337_coverage.txt", sep="\t", header=None, names=["chr", "pos", "coverage"])

         # Plot
         plt.figure(figsize=(10,4))
         plt.plot(df["pos"], df["coverage"], color="blue", linewidth=0.5)
         plt.xlabel("Genomic Position")
         plt.ylabel("Coverage Depth")
         plt.title("BAM Coverage Plot")
         plt.show()

Report

 Subject: Mapping Results and Selected Reference Genomes

 Dear XXXX,

 Please find below the results. For each of the viruses you sent me, a representative isolate has been selected, as listed below:

 Selected Reference Isolates:

     Enterovirus D68:
         PQ895337.1 – Enterovirus D68 isolate SH2024-25870

     HSV-1 (Herpes Simplex Virus 1):
         PQ569920.1 – Human alphaherpesvirus 1 isolate MacIntyre, complete genome

     HSV-2 (Herpes Simplex Virus 2):
         OM370995.1 – Human alphaherpesvirus 2 strain G, complete genome

     Influenza A virus (H1N1):

         LC662537.1 – Influenza A virus (H1N1) A/PR/8/34 PB2 gene for polymerase PB2, complete cds
         LC662538.1 – Influenza A virus (H1N1) A/PR/8/34 PB1, PB1-F2 genes for polymerase PB1, PB1-F2 protein, complete cds
         LC662539.1 – Influenza A virus (H1N1) A/PR/8/34 PA, PA-X genes for polymerase PA, PA-X protein, complete cds
         LC662540.1 – Influenza A virus (H1N1) A/PR/8/34 HA gene for haemagglutinin, complete cds
         LC662541.1 – Influenza A virus (H1N1) A/PR/8/34 NP gene for nucleoprotein, complete cds
         LC662542.1 – Influenza A virus (H1N1) A/PR/8/34 NA gene for neuraminidase, complete cds
         LC662543.1 – Influenza A virus (H1N1) A/PR/8/34 M2, M1 genes for matrix protein 2, matrix protein 1, complete cds
         LC662544.1 – Influenza A virus (H1N1) A/PR/8/34 NEP, NS1 genes for nonstructural protein 2, nonstructural protein 1, complete cds

     Cytomegalovirus (strain AD169):
         X17403.1 – Human cytomegalovirus strain AD169, complete genome

     Influenza A virus (H3N2):

         LC817404.1 – Influenza A virus H3N2 A_Fukushima_OR808_2023 PB2 gene, complete sequence
         LC817405.1 – Influenza A virus H3N2 A_Fukushima_OR808_2023 PB1 gene, complete sequence
         LC817406.1 – Influenza A virus H3N2 A_Fukushima_OR808_2023 PA gene, complete sequence
         LC817407.1 – Influenza A virus H3N2 A_Fukushima_OR808_2023 HA gene, complete sequence
         LC817408.1 – Influenza A virus H3N2 A_Fukushima_OR808_2023 NP gene, complete sequence
         LC817409.1 – Influenza A virus H3N2 A_Fukushima_OR808_2023 NA gene, complete sequence
         LC817410.1 – Influenza A virus H3N2 A_Fukushima_OR808_2023 M gene, complete sequence
         LC817411.1 – Influenza A virus H3N2 A_Fukushima_OR808_2023 NS gene, complete sequence

     Monkeypox virus:
         OP689666.1 – Isolate MPXV/Germany/2022/RKI513, complete genome

     Human Immunodeficiency Virus 1 (HIV-1):
         AJ866558.1 – Isolate 01IC-PCI127, complete genome

 Mapping Results:

 Then, we mapped the paired-end reads from 12 samples of the Ringversuch project against the reference genomes listed above. The following are the mapping statistics. Coverage plots are attached for each case where reads map to the reference genome (see attachments).

 Mapping statistics:

     RV1_DNA_on_Enterovirus_D68_isolate_SH2024-25870: 0 + 0 mapped (0.00% : N/A)
     RV2_DNA_on_Enterovirus_D68_isolate_SH2024-25870: 0 + 0 mapped (0.00% : N/A)
     RV3_DNA_on_Enterovirus_D68_isolate_SH2024-25870: 0 + 0 mapped (0.00% : N/A)
     RV4_DNA_on_Enterovirus_D68_isolate_SH2024-25870: 0 + 0 mapped (0.00% : N/A)
     RV5_DNA_on_Enterovirus_D68_isolate_SH2024-25870: 0 + 0 mapped (0.00% : N/A)
     RV6_DNA_on_Enterovirus_D68_isolate_SH2024-25870: 0 + 0 mapped (0.00% : N/A)
     RV1_RNA_on_Enterovirus_D68_isolate_SH2024-25870: 0 + 0 mapped (0.00% : N/A)
     RV2_RNA_on_Enterovirus_D68_isolate_SH2024-25870: 0 + 0 mapped (0.00% : N/A)
     RV3_RNA_on_Enterovirus_D68_isolate_SH2024-25870: 0 + 0 mapped (0.00% : N/A)
     RV4_RNA_on_Enterovirus_D68_isolate_SH2024-25870: 0 + 0 mapped (0.00% : N/A)
     RV5_RNA_on_Enterovirus_D68_isolate_SH2024-25870: 0 + 0 mapped (0.00% : N/A)
     RV6_RNA_on_Enterovirus_D68_isolate_SH2024-25870: 0 + 0 mapped (0.00% : N/A)

Variant calling for Data_Pietschmann_229ECoronavirus_Mutations_2025 (via docker own_viral_ngs)

Leave a reply

Input data:

 ln -s ../raw_data_2024/hCoV229E_Rluc_R1.fastq.gz hCoV229E_Rluc_R1.fastq.gz
 ln -s ../raw_data_2024/hCoV229E_Rluc_R2.fastq.gz hCoV229E_Rluc_R2.fastq.gz
 ln -s ../raw_data_2024/p10_DMSO_R1.fastq.gz p10_DMSO_R1.fastq.gz
 ln -s ../raw_data_2024/p10_DMSO_R2.fastq.gz p10_DMSO_R2.fastq.gz
 ln -s ../raw_data_2024/p10_K22_R1.fastq.gz p10_K22_R1.fastq.gz
 ln -s ../raw_data_2024/p10_K22_R2.fastq.gz p10_K22_R2.fastq.gz
 ln -s ../raw_data_2024/p10_K7523_R1.fastq.gz p10_K7523_R1.fastq.gz
 ln -s ../raw_data_2024/p10_K7523_R2.fastq.gz p10_K7523_R2.fastq.gz
 ln -s ../raw_data_2025/250506_VH00358_136_AAG3YJ5M5/p20606/p16_DMSO_S29_R1_001.fastq.gz p16_DMSO_R1.fastq.gz
 ln -s ../raw_data_2025/250506_VH00358_136_AAG3YJ5M5/p20606/p16_DMSO_S29_R2_001.fastq.gz p16_DMSO_R2.fastq.gz
 ln -s ../raw_data_2025/250506_VH00358_136_AAG3YJ5M5/p20607/p16_K22_S30_R1_001.fastq.gz p16_K22_R1.fastq.gz
 ln -s ../raw_data_2025/250506_VH00358_136_AAG3YJ5M5/p20607/p16_K22_S30_R2_001.fastq.gz p16_K22_R2.fastq.gz
 ln -s ../raw_data_2025/250506_VH00358_136_AAG3YJ5M5/p20608/p16_X7523_S31_R1_001.fastq.gz p16_X7523_R1.fastq.gz
 ln -s ../raw_data_2025/250506_VH00358_136_AAG3YJ5M5/p20608/p16_X7523_S31_R2_001.fastq.gz p16_X7523_R2.fastq.gz

Call variant calling using snippy

 ln -s ~/Tools/bacto/db/ .;
 ln -s ~/Tools/bacto/envs/ .;
 ln -s ~/Tools/bacto/local/ .;
 cp ~/Tools/bacto/Snakefile .;
 cp ~/Tools/bacto/bacto-0.1.json .;
 cp ~/Tools/bacto/cluster.json .;

 #download CU459141.gb from GenBank
 mv ~/Downloads/sequence\(2\).gb db/PP810610.gb

 #setting the following in bacto-0.1.json
     "fastqc": false,
     "taxonomic_classifier": false,
     "assembly": true,
     "typing_ariba": false,
     "typing_mlst": true,
     "pangenome": true,
     "variants_calling": true,
     "phylogeny_fasttree": true,
     "phylogeny_raxml": true,
     "recombination": false, (due to gubbins-error set false)
     "genus": "Alphacoronavirus",
     "kingdom": "Viruses",
     "species": "Human coronavirus 229E",
     "mykrobe": {
         "species": "corona"
     },
     "reference": "db/PP810610.gb"

 mamba activate /home/jhuang/miniconda3/envs/bengal3_ac3
 (bengal3_ac3) /home/jhuang/miniconda3/envs/snakemake_4_3_1/bin/snakemake --printshellcmds

Summarize all SNPs and Indels from the snippy result directory.

 #Output: snippy/summary_snps_indels.csv
 # IMPORTANT_ADAPT the array isolates = ["AYE-S", "AYE-Q", "AYE-WT on Tig4", "AYE-craA on Tig4", "AYE-craA-1 on Cm200", "AYE-craA-2 on Cm200"]
 python3 ~/Scripts/summarize_snippy_res.py snippy
 cd snippy
 #grep -v "None,,,,,,None,None" summary_snps_indels.csv > summary_snps_indels_.csv

Using spandx calling variants (almost the same results to the one from viral-ngs!)

 mamba activate /home/jhuang/miniconda3/envs/spandx
 mkdir ~/miniconda3/envs/spandx/share/snpeff-5.1-2/data/PP810610
 cp PP810610.gb  ~/miniconda3/envs/spandx/share/snpeff-5.1-2/data/PP810610/genes.gbk
 vim ~/miniconda3/envs/spandx/share/snpeff-5.1-2/snpEff.config
 /home/jhuang/miniconda3/envs/spandx/bin/snpEff build PP810610    #-d
 ~/Scripts/genbank2fasta.py PP810610.gb
 mv PP810610.gb_converted.fna PP810610.fasta    #rename "NC_001348.1 xxxxx" to "NC_001348" in the fasta-file
 ln -s /home/jhuang/Tools/spandx/ spandx
 (spandx) nextflow run spandx/main.nf --fastq "trimmed/*_P_{1,2}.fastq" --ref PP810610.fasta --annotation --database PP810610 -resume

 # Rerun SNP_matrix.sh due to the error ERROR_CHROMOSOME_NOT_FOUND in the variants annotation
 cd Outputs/Master_vcf
 (spandx) cp -r ../../snippy/hCoV229E_Rluc/reference .
 (spandx) cp ../../spandx/bin/SNP_matrix.sh ./
 #Note that ${variant_genome_path}=NC_001348 in the following command, but it was not used after command replacement.
 #Adapt "snpEff eff -no-downstream -no-intergenic -ud 100 -formatEff -v ${variant_genome_path} out.vcf > out.annotated.vcf" to
 "/home/jhuang/miniconda3/envs/bengal3_ac3/bin/snpEff eff -no-downstream -no-intergenic -ud 100 -formatEff -c reference/snpeff.config -dataDir . ref out.vcf > out.annotated.vcf" in SNP_matrix.sh
 (spandx) bash SNP_matrix.sh PP810610 .

Calling inter-host variants by merging the results from snippy+spandx (Manually!)

 # Inter-host variants（宿主间变异）:一种病毒在两个人之间有不同的基因变异，这些变异可能与宿主的免疫反应、疾病表现或病毒传播的方式相关。
 cp All_SNPs_indels_annotated.txt All_SNPs_indels_annotated_backup.txt
 vim All_SNPs_indels_annotated.txt

 #in the file ids: grep "$(echo -e '\t')353$(echo -e '\t')" All_SNPs_indels_annotated.txt >> All_SNPs_indels_annotated_.txt
 #Replace \n with " All_SNPs_indels_annotated.txt >> All_SNPs_indels_annotated_.txt\ngrep "
 #Replace grep " --> grep "$(echo -e '\t')
 #Replace " All_ --> $(echo -e '\t')" All_

 # Potential intra-host variants: 10871, 19289, 23435.
 CHROM   POS     REF     ALT     TYPE    hCoV229E_Rluc_trimmed   p10_DMSO_trimmed        p10_K22_trimmed p10_K7523_trimmed       p16_DMSO_trimmed        p16_K22_trimmed p16_X7523_trimmed       Effect  Impact  Functional_Class        Codon_change    Protein_and_nucleotide_change   Amino_Acid_Length       Gene_name       Biotype
 PP810610        1464    T       C       SNP     C       C       C       C       C       C       C       missense_variant        MODERATE        MISSENSE        gTt/gCt p.Val416Ala/c.1247T>C   6757    CDS_1   protein_coding
 PP810610        1699    C       T       SNP     T       T       T       T       T       T       T       synonymous_variant      LOW     SILENT  gtC/gtT p.Val494Val/c.1482C>T   6757    CDS_1   protein_coding
 PP810610        6691    C       T       SNP     T       T       T       T       T       T       T       synonymous_variant      LOW     SILENT  tgC/tgT p.Cys2158Cys/c.6474C>T  6757    CDS_1   protein_coding
 PP810610        6919    C       G       SNP     G       G       G       G       G       G       G       synonymous_variant      LOW     SILENT  ggC/ggG p.Gly2234Gly/c.6702C>G  6757    CDS_1   protein_coding
 PP810610        7294    T       A       SNP     A       A       A       A       A       A       A       missense_variant        MODERATE        MISSENSE        agT/agA p.Ser2359Arg/c.7077T>A  6757    CDS_1   protein_coding
 * PP810610       10871   C       T       SNP     C       C/T     T       C/T     C/T     T       C/T     missense_variant        MODERATE        MISSENSE        Ctt/Ttt p.Leu3552Phe/c.10654C>T 6757    CDS_1   protein_coding
 PP810610        14472   T       C       SNP     C       C       C       C       C       C       C       missense_variant        MODERATE        MISSENSE        aTg/aCg p.Met4752Thr/c.14255T>C 6757    CDS_1   protein_coding
 PP810610        15458   T       C       SNP     C       C       C       C       C       C       C       synonymous_variant      LOW     SILENT  Ttg/Ctg p.Leu5081Leu/c.15241T>C 6757    CDS_1   protein_coding
 PP810610        16035   C       A       SNP     A       A       A       A       A       A       A       stop_gained     HIGH    NONSENSE        tCa/tAa p.Ser5273*/c.15818C>A   6757    CDS_1   protein_coding
 PP810610        17430   T       C       SNP     C       C       C       C       C       C       C       missense_variant        MODERATE        MISSENSE        tTa/tCa p.Leu5738Ser/c.17213T>C 6757    CDS_1   protein_coding
 * PP810610       19289   G       T       SNP     G       G       T       G       G       G/T     G       missense_variant        MODERATE        MISSENSE        Gtt/Ttt p.Val6358Phe/c.19072G>T 6757    CDS_1   protein_coding
 PP810610        21183   T       G       SNP     G       G       G       G       G       G       G       missense_variant        MODERATE        MISSENSE        tTt/tGt p.Phe230Cys/c.689T>G    1173    CDS_2   protein_coding
 PP810610        22636   T       G       SNP     G       G       G       G       G       G       G       missense_variant        MODERATE        MISSENSE        aaT/aaG p.Asn714Lys/c.2142T>G   1173    CDS_2   protein_coding
 PP810610        23022   T       C       SNP     C       C       C       C       C       C       C       missense_variant        MODERATE        MISSENSE        tTa/tCa p.Leu843Ser/c.2528T>C   1173    CDS_2   protein_coding
 * PP810610       23435   C       T       SNP     C       C       T       C/T     C       C/T     C/T     missense_variant        MODERATE        MISSENSE        Ctt/Ttt p.Leu981Phe/c.2941C>T   1173    CDS_2   protein_coding
 PP810610        24512   C       T       SNP     T       T       T       T       T       T       T       missense_variant        MODERATE        MISSENSE        Ctc/Ttc p.Leu36Phe/c.106C>T     88      CDS_4   protein_coding
 PP810610        24781   C       T       SNP     T       T       T       T       T       T       T       missense_variant        MODERATE        MISSENSE        aCt/aTt p.Thr36Ile/c.107C>T     77      CDS_5   protein_coding
 PP810610        25163   C       T       SNP     T       T       T       T       T       T       T       missense_variant        MODERATE        MISSENSE        Ctt/Ttt p.Leu82Phe/c.244C>T     225     CDS_6   protein_coding
 PP810610        25264   C       T       SNP     T       T       T       T       T       T       T       synonymous_variant      LOW     SILENT  gtC/gtT p.Val115Val/c.345C>T    225     CDS_6   protein_coding
 PP810610        26838   G       T       SNP     T       T       T       T       T       T       T

Calling intra-host variants using viral-ngs

 # Intra-host variants（宿主内变异）：同一个人感染了某种病毒，但在其体内的不同细胞或器官中可能存在多个不同的病毒变异株。

 #How to run and debug the viral-ngs docker?
 # ---- DEBUG_2025_1: using docker instead ----
 mkdir viralngs; cd viralngs
 ln -s ~/Tools/viral-ngs_docker/Snakefile Snakefile
 ln -s  ~/Tools/viral-ngs_docker/bin bin
 cp  ~/DATA_D/Data_Pietschmann_229ECoronavirus_Mutations_2024/refsel.acids refsel.acids
 cp  ~/DATA_D/Data_Pietschmann_229ECoronavirus_Mutations_2024/lastal.acids lastal.acids
 cp  ~/DATA_D/Data_Pietschmann_229ECoronavirus_Mutations_2024/config.yaml config.yaml
 cp  ~/DATA_D/Data_Pietschmann_229ECoronavirus_Mutations_2024/samples-runs.txt samples-runs.txt
 cp  ~/DATA_D/Data_Pietschmann_229ECoronavirus_Mutations_2024/samples-depletion.txt samples-depletion.txt
 cp  ~/DATA_D/Data_Pietschmann_229ECoronavirus_Mutations_2024/samples-metagenomics.txt samples-metagenomics.txt
 cp  ~/DATA_D/Data_Pietschmann_229ECoronavirus_Mutations_2024/samples-assembly.txt samples-assembly.txt
 cp  ~/DATA_D/Data_Pietschmann_229ECoronavirus_Mutations_2024/samples-assembly-failures.txt samples-assembly-failures.txt
 # Adapt the sample-*.txt

 mkdir viralngs/data
 mkdir viralngs/data/00_raw

 mkdir bams
 ref_fa="PP810610.fasta";
 #for sample in hCoV229E_Rluc p10_DMSO p10_K22; do
 for sample in p10_K7523 p16_DMSO p16_K22 p16_X7523; do
     bwa index ${ref_fa}; \
     bwa mem -M -t 16 ${ref_fa} trimmed/${sample}_trimmed_P_1.fastq trimmed/${sample}_trimmed_P_2.fastq | samtools view -bS - > bams/${sample}_genome_alignment.bam; \
 done

 conda activate viral-ngs4
 #for sample in hCoV229E_Rluc p10_DMSO p10_K22; do
 #for sample in p10_K7523 p16_DMSO p16_K22 p16_X7523; do
 for sample in p16_K22; do
     picard AddOrReplaceReadGroups I=bams/${sample}_genome_alignment.bam O=~/DATA_D/Data_Pietschmann_229ECoronavirus_Mutations_2025/viralngs/data/00_raw/${sample}.bam SORT_ORDER=coordinate CREATE_INDEX=true RGPL=illumina RGID=$sample RGSM=$sample RGLB=standard RGPU=$sample VALIDATION_STRINGENCY=LENIENT; \
 done
 conda deactivate

 # -- ! Firstly set the samples-assembly.txt empty, so that only focus on running depletion!
 docker run -it -v /mnt/md1/DATA_D/Data_Pietschmann_229ECoronavirus_Mutations_2025/viralngs:/work -v /home/jhuang/Tools/viral-ngs_docker:/home/jhuang/Tools/viral-ngs_docker -v /home/jhuang/REFs:/home/jhuang/REFs -v /home/jhuang/Tools/GenomeAnalysisTK-3.6:/home/jhuang/Tools/GenomeAnalysisTK-3.6 -v /home/jhuang/Tools/novocraft_v3:/home/jhuang/Tools/novocraft_v3 -v /usr/local/bin/gatk:/usr/local/bin/gatk   own_viral_ngs bash
 cd /work
 snakemake --directory /work --printshellcmds --cores 40

 # -- ! Secondly manully run assembly steps
 # --> By itereative add the unfinished assembly in the list, each time replace one, and run "snakemake --directory /work --printshellcmds --cores 40"

     # # ---- NOTE that the following steps need rerun --> DOES NOT WORK, USE STRATEGY ABOVE ----
     # #for sample in p10_K22 p10_K7523; do
     # for sample in hCoV229E_Rluc p10_DMSO p10_K22 p10_K7523  p16_DMSO p16_K22 p16_X7523; do
     #     bin/read_utils.py merge_bams data/01_cleaned/${sample}.cleaned.bam tmp/01_cleaned/${sample}.cleaned.bam --picardOptions SORT_ORDER=queryname
     #     bin/read_utils.py rmdup_mvicuna_bam tmp/01_cleaned/${sample}.cleaned.bam data/01_per_sample/${sample}.cleaned.bam --JVMmemory 30g
     # done
     #
     # #Note that the error generated by nextflow is from the step gapfill_gap2seq!
     # for sample in hCoV229E_Rluc p10_DMSO p10_K22 p10_K7523  p16_DMSO p16_K22 p16_X7523; do
     #     bin/assembly.py assemble_spades data/01_per_sample/${sample}.taxfilt.bam /home/jhuang/REFs/viral_ngs_dbs/trim_clip/contaminants.fasta tmp/02_assembly/${sample}.assembly1-spades.fasta --nReads 10000000 --threads 15 --memLimitGb 12
     # done
     # for sample in hCoV229E_Rluc p10_DMSO p10_K22 p10_K7523  p16_DMSO p16_K22 p16_X7523; do
     # for sample in p10_K22 p10_K7523; do
     #     bin/assembly.py order_and_orient tmp/02_assembly/${sample}.assembly1-spades.fasta refsel_db/refsel.fasta tmp/02_assembly/${sample}.assembly2-scaffolded.fasta --min_pct_contig_aligned 0.05 --outAlternateContigs tmp/02_assembly/${sample}.assembly2-alternate_sequences.fasta --nGenomeSegments 1 --outReference tmp/02_assembly/${sample}.assembly2-scaffold_ref.fasta --threads 15
     # done
     #
     # for sample in hCoV229E_Rluc p10_DMSO p10_K22 p10_K7523  p16_DMSO p16_K22 p16_X7523; do
     #     bin/assembly.py gapfill_gap2seq tmp/02_assembly/${sample}.assembly2-scaffolded.fasta data/01_per_sample/${sample}.cleaned.bam tmp/02_assembly/${sample}.assembly2-gapfilled.fasta --memLimitGb 12 --maskErrors --randomSeed 0 --loglevel DEBUG
     # done

 #IMPORTANT: Reun the following commands!
 for sample in hCoV229E_Rluc  p10_DMSO p10_K22 p10_K7523  p16_DMSO p16_K22 p16_X7523; do

     bin/assembly.py impute_from_reference tmp/02_assembly/${sample}.assembly2-gapfilled.fasta tmp/02_assembly/${sample}.assembly2-scaffold_ref.fasta tmp/02_assembly/${sample}.assembly3-modify.fasta --newName ${sample} --replaceLength 55 --minLengthFraction 0.05 --minUnambig 0.05 --index  --loglevel DEBUG
 done

     # for sample in hCoV229E_Rluc p10_DMSO p10_K22 p10_K7523  p16_DMSO p16_K22 p16_X7523; do
     #     bin/assembly.py refine_assembly tmp/02_assembly/${sample}.assembly3-modify.fasta data/01_per_sample/${sample}.cleaned.bam tmp/02_assembly/${sample}.assembly4-refined.fasta --outVcf tmp/02_assembly/${sample}.assembly3.vcf.gz --min_coverage 2 --novo_params '-r Random -l 20 -g 40 -x 20 -t 502' --threads 15  --loglevel DEBUG
     #     bin/assembly.py refine_assembly tmp/02_assembly/${sample}.assembly4-refined.fasta data/01_per_sample/${sample}.cleaned.bam data/02_assembly/${sample}.fasta --outVcf tmp/02_assembly/${sample}.assembly4.vcf.gz --min_coverage 3 --novo_params '-r Random -l 20 -g 40 -x 20 -t 100' --threads 15  --loglevel DEBUG
     # done

 # -- ! Thirdly set the samples-assembly.txt completely and run "snakemake --directory /work --printshellcmds --cores 40"

Merge intra- and inter-host variants, comparing the variants to the alignments of the assemblies to confirm its correctness.

 cat NC_001348.fasta viralngs/data/02_assembly/VZV_20S.fasta viralngs/data/02_assembly/VZV_60S.fasta > aligned_1.fasta
 mafft --clustalout aligned_1.fasta > aligned_1.aln
 #~/Scripts/convert_fasta_to_clustal.py aligned_1.fasta_orig aligned_1.aln
 ~/Scripts/convert_clustal_to_clustal.py aligned_1.aln aligned_1_.aln
 #manully delete the postion with all or '-' in aligned_1_.aln
 ~/Scripts/check_sequence_differences.py aligned_1_.aln
 ~/Scripts/check_sequence_differences.py aligned_1_.aln > aligned_1.res
 grep -v " = n" aligned_1.res > aligned_1_.res

 cat NC_001348.fasta viralngs/tmp/02_assembly/VZV_20S.assembly4-refined.fasta viralngs/tmp/02_assembly/VZV_60S.assembly4-refined.fasta > aligned_1.fasta
 mafft --clustalout aligned_1.fasta > aligned_1.aln
 ~/Scripts/convert_clustal_to_clustal.py aligned_1.aln aligned_1_.aln
 ~/Scripts/check_sequence_differences.py aligned_1_.aln > aligned_1.res
 grep -v " = n" aligned_1.res > aligned_1_.res

 #Differences found at the following positions (150):
 Position 8956: OP297860.1 = A, HSV1_S1-1 = A, HSV-Klinik_S2-1 = G
 Position 8991: OP297860.1 = A, HSV1_S1-1 = A, HSV-Klinik_S2-1 = C
 Position 8992: OP297860.1 = T, HSV1_S1-1 = C, HSV-Klinik_S2-1 = C
 Position 8995: OP297860.1 = T, HSV1_S1-1 = T, HSV-Klinik_S2-1 = C
 Position 9190: OP297860.1 = T, HSV1_S1-1 = A, HSV-Klinik_S2-1 = T
 * Position 13659: OP297860.1 = G, HSV1_S1-1 = T, HSV-Klinik_S2-1 = G
 * Position 47969: OP297860.1 = C, HSV1_S1-1 = T, HSV-Klinik_S2-1 = C
 * Position 53691: OP297860.1 = G, HSV1_S1-1 = T, HSV-Klinik_S2-1 = G
 * Position 55501: OP297860.1 = T, HSV1_S1-1 = C, HSV-Klinik_S2-1 = C
 * Position 63248: OP297860.1 = G, HSV1_S1-1 = T, HSV-Klinik_S2-1 = G
 Position 63799: OP297860.1 = T, HSV1_S1-1 = C, HSV-Klinik_S2-1 = T
 * Position 64328: OP297860.1 = C, HSV1_S1-1 = A, HSV-Klinik_S2-1 = C
 Position 65179: OP297860.1 = T, HSV1_S1-1 = T, HSV-Klinik_S2-1 = C
 * Position 65225: OP297860.1 = G, HSV1_S1-1 = G, HSV-Klinik_S2-1 = A
 * Position 95302: OP297860.1 = C, HSV1_S1-1 = A, HSV-Klinik_S2-1 = C

 gunzip isnvs.annot.txt.gz
 ~/Scripts/filter_isnv.py isnvs.annot.txt 0.05
 cut -d$'\t' filtered_isnvs.annot.txt -f1-7
 chr     pos     sample  patient time    alleles iSNV_freq
 OP297860        13203   HSV1_S1 HSV1_S1         T,C,A   1.0
 OP297860        13203   HSV-Klinik_S2   HSV-Klinik_S2           T,C,A   1.0
 OP297860        13522   HSV1_S1 HSV1_S1         G,T     1.0
 OP297860        13522   HSV-Klinik_S2   HSV-Klinik_S2           G,T     0.008905554253573941
 OP297860        13659   HSV1_S1 HSV1_S1         G,T     1.0
 OP297860        13659   HSV-Klinik_S2   HSV-Klinik_S2           G,T     0.008383233532934131

 ~/Scripts/convert_clustal_to_fasta.py aligned_1_.aln aligned_1.fasta
 samtools faidx aligned_1.fasta
 samtools faidx aligned_1.fasta OP297860.1 > OP297860.1.fasta
 samtools faidx aligned_1.fasta HSV1_S1-1 > HSV1_S1-1.fasta
 samtools faidx aligned_1.fasta HSV-Klinik_S2-1 > HSV-Klinik_S2-1.fasta
 seqkit seq OP297860.1.fasta -w 70 > OP297860.1_w70.fasta
 diff OP297860.1_w70.fasta ../../refsel_db/refsel.fasta

Consensus sequences of each and of all isolates

 cp data/02_assembly/*.fasta ./
 for sample in 838_S1 840_S2 820_S3 828_S4 815_S5 834_S6 808_S7 811_S8 837_S9 768_S10 773_S11 767_S12 810_S13 814_S14 10121-16_S15 7510-15_S16 828-17_S17 8806-15_S18 9881-16_S19 8981-14_S20; do
 for sample in p953-84660-tsek p938-16972-nra p942-88507-nra p943-98523-nra p944-103323-nra p947-105565-nra p948-112830-nra; do \
 mv ${sample}.fasta ${sample}.fa
 cat all.fa ${sample}.fa >> all.fa
 done
 cat RSV_dedup.fa all.fa > RSV_all.fa
 mafft --adjustdirection RSV_all.fa > RSV_all.aln
 snp-sites RSV_all.aln -o RSV_all_.aln

Download all Human alphaherpesvirus 3 (Varicella-zoster virus) genomes

 Human alphaherpesvirus 3
 acronym: HHV-3 VZV
 equivalent: Human herpes virus 3

 Human alphaherpesvirus 3 (Varicella-zoster virus)
     * Human herpesvirus 3 strain Dumas
     * Human herpesvirus 3 strain Oka vaccine
     * Human herpesvirus 3 VZV-32

 #Taxonomy ID: 10335
 esearch -db nucleotide -query "txid10335[Organism:exp]" | efetch -format fasta -email j.huang@uke.de > genome_10335_ncbi.fasta
 python ~/Scripts/filter_fasta.py genome_10335_ncbi.fasta complete_genome_10335_ncbi.fasta  #2041-->165
 # ---- Download related genomes from ENA ----
 https://www.ebi.ac.uk/ena/browser/view/10335
 #Click "Sequence" and download "Counts" (2003) and "Taxon descendants count" (2005) if there is enough time! Downloading time points is 11.03.2025.
 python ~/Scripts/filter_fasta.py  ena_10335_sequence.fasta complete_genome_10335_ena_taxon_descendants_count.fasta  #2005-->153
 #python ~/Scripts/filter_fasta.py ena_10335_sequence_Counts.fasta complete_genome_10335_ena_Counts.fasta  #xxx, 5.8G
 https://www.ebi.ac.uk/ena/browser/view/10239
 https://www.ebi.ac.uk/ena/browser/view/2497569
 https://www.ebi.ac.uk/ena/browser/view/Taxon:2497569
 ena_10239_sequence.fasta
 esearch -db nucleotide -query "txid10239[Organism:exp]" | efetch -format fasta -email j.huang@uke.de > genome_10239_ncbi.fasta

Using Multi-CAR for scaffolding the contigs (If not useful, choose another scaffolding tool, e.g. https://github.com/malonge/RagTag)

  All contigs over 500 bp were successfully scaffolded to the graft genome using Multi-CAR (13), resulting in a chromosomal assembly of 4,506,689 bp.
  https://genome.cs.nthu.edu.tw/Multi-CAR/
  https://github.com/ablab-nthu/Multi-CSAR

Using the bowtie of vrap to map the reads on ref_genome/reference.fasta (The reference refers to the closest related genome found from the list generated by vrap)

 (vrap) vrap/vrap.py  -1 trimmed/VZV_20S_trimmed_P_1.fastq -2 trimmed/VZV_20S_trimmed_P_2.fastq  -o VZV_20S_on_X04370 --host /home/jhuang/DATA/Data_Huang_Human_herpesvirus_3/X04370.fasta   -t 100 -l 200  -g
 cd bowtie
 mv mapped mapped.sam
 samtools view -S -b mapped.sam > mapped.bam
 samtools sort mapped.bam -o mapped_sorted.bam
 samtools index mapped_sorted.bam
 samtools view -H mapped_sorted.bam
 samtools flagstat mapped_sorted.bam

Show the bw on IGV

Reports

 diff data/02_assembly/2040_04.fasta tmp/02_assembly/2040_04.assembly4-refined.fasta

 diff data/02_assembly/2040_04.fasta tmp/02_assembly/2040_04.assembly1-spades.fasta
 diff data/02_assembly/2040_04.fasta tmp/02_assembly/2040_04.assembly2-scaffolded.fasta
 diff data/02_assembly/2040_04.fasta tmp/02_assembly/2040_04.assembly2-gapfilled.fasta
 diff data/02_assembly/2040_04.fasta tmp/02_assembly/2040_04.assembly3-modify.fasta
 diff data/02_assembly/2040_04.fasta tmp/02_assembly/2040_04.assembly4-refined.fasta
 ./2040_04.assembly2-alternate_sequences.fasta
 ./2040_04.assembly2-scaffold_ref.fasta

How to debug and construct the docker docker own_viral_ngs?

Leave a reply

    mkdir viralngs; cd viralngs
    ln -s ~/Tools/viral-ngs_docker/Snakefile Snakefile
    ln -s  ~/Tools/viral-ngs_docker/bin bin
    cp  ~/Tools/viral-ngs_docker/refsel.acids refsel.acids
    cp  ~/Tools/viral-ngs_docker/lastal.acids lastal.acids
    cp  ~/Tools/viral-ngs_docker/config.yaml config.yaml
    cp  ~/Tools/viral-ngs_docker/samples-runs.txt samples-runs.txt
    cp  ~/Tools/viral-ngs_docker/samples-depletion.txt samples-depletion.txt
    cp  ~/Tools/viral-ngs_docker/samples-metagenomics.txt samples-metagenomics.txt
    cp  ~/Tools/viral-ngs_docker/samples-assembly.txt samples-assembly.txt
    cp  ~/Tools/viral-ngs_docker/samples-assembly-failures.txt samples-assembly-failures.txt

    docker run -it -v /mnt/md1/DATA/Data_Huang_Human_herpesvirus_3/viralngs:/work -v /home/jhuang/Tools/viral-ngs_docker:/home/jhuang/Tools/viral-ngs_docker -v /home/jhuang/REFs:/home/jhuang/REFs -v /home/jhuang/Tools/GenomeAnalysisTK-3.6:/home/jhuang/Tools/GenomeAnalysisTK-3.6 -v /home/jhuang/Tools/novocraft_v3:/home/jhuang/Tools/novocraft_v3 -v /usr/local/bin/gatk:/usr/local/bin/gatk   own_viral_ngs bash
    cd /work
    snakemake --directory /work --printshellcmds --cores 40

    #BUG_1: FileNotFoundError: [Errno 2] No such file or directory: '/home/jhuang/Tools/samtools-1.9/samtools': '/home/jhuang/Tools/samtools-1.9/samtools'
    #DEBUG_1 (DEPRECATED):
            # - In docker install independent samtools
            conda create -n samtools-1.9-env samtools=1.9 -c bioconda -c conda-forge
            # - persistence the modified docker, next time run own docker image
            docker ps
            #CONTAINER ID   IMAGE                              COMMAND   CREATED         STATUS         PORTS     NAMES
            #881a1ad6a990   quay.io/broadinstitute/viral-ngs   "bash"    8 minutes ago   Up 8 minutes             intelligent_yalow
            docker commit 881a1ad6a990 own_viral_ngs
            docker image ls
            docker run -it own_viral_ngs bash
            #Change the path as "/opt/miniconda/envs/samtools-1.9-env/bin/samtools" in /work/bin/tools/samtools.py
            #         If another tool expect for samtools could not be installed, also use the same method above to install it on own_viral_ngs!
    #DEBUG_1_BETTER_SIMPLE: TOOL_VERSION = '1.6' --> '1.9' in ~/Tools/viral-ngs_docker/bin/tools/samtools.py

    #BUG_2:
            bin/taxon_filter.py deplete data/00_raw/2040_04.bam tmp/01_cleaned/2040_04.raw.bam tmp/01_cleaned/2040_04.bmtagger_depleted.bam tmp/01_cleaned/2040_04.rmdup.bam data/01_cleaned/2040_04.cleaned.bam --bmtaggerDbs /home/jhuang/REFs/viral_ngs_dbs/bmtagger_dbs_remove/hg19 /home/jhuang/REFs/viral_ngs_dbs/bmtagger_dbs_remove/metagenomics_contaminants_v3 /home/jhuang/REFs/viral_ngs_dbs/bmtagger_dbs_remove/GRCh37.68_ncRNA-GRCh37.68_transcripts-HS_rRNA_mitRNA --blastDbs /home/jhuang/REFs/viral_ngs_dbs/blast_dbs_remove/hybsel_probe_adapters /home/jhuang/REFs/viral_ngs_dbs/blast_dbs_remove/metag_v3.ncRNA.mRNA.mitRNA.consensus --threads 15 --srprismMemory 14250 --JVMmemory 50g --loglevel DEBUG
            #2025-05-23 09:58:45,326 - __init__:445:_attempt_install - DEBUG - Currently installed version of blast: 2.7.1-h4422958_6
            #2025-05-23 09:58:45,327 - __init__:448:_attempt_install - DEBUG - Expected version of blast:            2.6.0
            #2025-05-23 09:58:45,327 - __init__:449:_attempt_install - DEBUG - Incorrect version of blast installed. Removing it...
    #DEBUG_2: TOOL_VERSION = "2.6.0" --> "2.7.1" in ~/Tools/viral-ngs_docker/bin/tools/blast.py

    #BUG_3:
            bin/read_utils.py bwamem_idxstats data/01_cleaned/1762_04.cleaned.bam /home/jhuang/REFs/viral_ngs_dbs/spikeins/ercc_spike-ins.fasta --outStats reports/spike_count/1762_04.spike_count.txt --minScoreToFilter 60 --loglevel DEBUG
    #DEBUG_3: TOOL_VERSION = "0.7.15" --> "0.7.17" in ~/Tools/viral-ngs_docker/bin/tools/bwa.py

    #BUG_4: FileNotFoundError: [Errno 2] No such file or directory: '/usr/local/bin/trimmomatic': '/usr/local/bin/trimmomatic'
    #DEBUG_4: TOOL_VERSION = "0.36" --> "0.38" in ~/Tools/viral-ngs_docker/bin/tools/trimmomatic.py

    #BUG_5: FileNotFoundError: [Errno 2] No such file or directory: '/usr/bin/spades.py': '/usr/bin/spades.py'
    #DEBUG_5:  TOOL_VERSION = "0.36" --> "0.38" in ~/Tools/viral-ngs_docker/bin/tools/trimmomatic.py
    #                def install_and_get_path(self):
    #                        # the conda version wraps the jar file with a shell script
    #                        return 'trimmomatic'

    #BUG_6: bin/assembly.py order_and_orient tmp/02_assembly/2039_04.assembly1-spades.fasta refsel_db/refsel.fasta tmp/02_assembly/2039_04.assembly2-scaffolded.fasta --min_pct_contig_aligned 0.05 --outAlternateContigs tmp/02_assembly/2039_04.assembly2-alternate_sequences.fasta --nGenomeSegments 1 --outReference tmp/02_assembly/2039_04.assembly2-scaffold_ref.fasta --threads 15 --loglevel DEBUG
    2025-05-23 17:40:19,526 - __init__:445:_attempt_install - DEBUG - Currently installed version of mummer4: 4.0.0beta2-pl526hf484d3e_4
    2025-05-23 17:40:19,527 - __init__:448:_attempt_install - DEBUG - Expected version of mummer4:            4.0.0rc1
    2025-05-23 17:40:19,527 - __init__:449:_attempt_install - DEBUG - Incorrect version of mummer4 installed. Removing it..
    DEBUG_6:  TOOL_VERSION = "4.0.0rc1" --> "4.0.0beta2" in ~/Tools/viral-ngs_docker/bin/tools/mummer.py

    #BUG_7: bin/assembly.py order_and_orient tmp/02_assembly/2039_04.assembly1-spades.fasta refsel_db/refsel.fasta tmp/02_assembly/2039_04.assembly2-scaffolded.fasta --min_pct_contig_aligned 0.05 --outAlternateContigs tmp/02_assembly/2039_04.assembly2-alternate_sequences.fasta --nGenomeSegments 1 --outReference tmp/02_assembly/2039_04.assembly2-scaffold_ref.fasta --threads 15 --loglevel DEBUG
            File "bin/assembly.py", line 549, in

base_counts = [sum([len(seg.seq.replace(“N”, “”)) for seg in scaffold]) \ AttributeError: ‘Seq’ object has no attribute ‘replace’ DEBUG_7: base_counts = [sum([len(seg.seq.replace(“N”, “”)) for seg in scaffold]) –> base_counts = [sum([len(seg.seq.ungap(‘N’)) for seg in scaffold]) in ~/Tools/viral-ngs_docker/bin/assembly.py BUG_8: bin/assembly.py refine_assembly tmp/02_assembly/1243_2.assembly3-modify.fasta data/01_per_sample/1243_2.cleaned.bam tmp/02_assembly/1243_2.assembly4-refined.fasta –outVcf tmp/02_assembly/1243_2.assembly3.vcf.gz –min_coverage 2 –novo_params ‘-r Random -l 20 -g 40 -x 20 -t 502’ –threads 15 –loglevel DEBUG File “/work/bin/tools/gatk.py”, line 75, in execute FileNotFoundError: [Errno 2] No such file or directory: ‘/usr/local/bin/gatk’: ‘/usr/local/bin/gatk’ #DEBUG_8: -v /usr/local/bin/gatk:/usr/local/bin/gatk in ‘docker run’ and change default python in the script via a shebang; TOOL_VERSION = “3.8” –> “3.6” in ~/Tools/viral-ngs_docker/bin/tools/gatk.py BUG_9: pyyaml is missing! #DEBUG_9: NO_ERROR if rerun! bin/assembly.py impute_from_reference tmp/02_assembly/2039_04.assembly2-gapfilled.fasta tmp/02_assembly/2039_04.assembly2-scaffold_ref.fasta tmp/02_assembly/2039_04.assembly3-modify.fasta –newName 2039_04 –replaceLength 55 –minLengthFraction 0.05 –minUnambig 0.05 –index –loglevel DEBUG for sample in 2039_04 2040_04; do for sample in 1762_04 1243_2 875_04; do bin/assembly.py impute_from_reference tmp/02_assembly/${sample}.assembly2-gapfilled.fasta tmp/02_assembly/${sample}.assembly2-scaffold_ref.fasta tmp/02_assembly/${sample}.assembly3-modify.fasta –newName ${sample} –replaceLength 55 –minLengthFraction 0.05 –minUnambig 0.05 –index –loglevel DEBUG done #BUG_10: bin/reports.py consolidate_fastqc reports/fastqc/2039_04/align_to_self reports/fastqc/2040_04/align_to_self reports/fastqc/1762_04/align_to_self reports/fastqc/1243_2/align_to_self reports/fastqc/875_04/align_to_self reports/summary.fastqc.align_to_self.txt #DEBUG_10: File “bin/intrahost.py”, line 527 and line 579 in merge_to_vcf # #MODIFIED_BACK samp_to_seqIndex[sampleName] = seq.seq.ungap(‘-‘) #samp_to_seqIndex[sampleName] = seq.seq.replace(“-“, “”) #BUG_11: bin/interhost.py multichr_mafft ref_genome/reference.fasta data/02_assembly/2039_04.fasta data/02_assembly/2040_04.fasta data/02_assembly/1762_04.fasta data/02_assembly/1243_2.fasta data/02_assembly/875_04.fasta data/03_multialign_to_ref –ep 0.123 –maxiters 1000 –preservecase –localpair –outFilePrefix aligned –sampleNameListFile data/03_multialign_to_ref/sampleNameList.txt –threads 15 –loglevel DEBUG 2025-05-26 15:04:19,014 – cmd:195:main_argparse – INFO – command: bin/interhost.py multichr_mafft inFastas=[‘ref_genome/reference.fasta’, ‘data/02_assembly/2039_04.fasta’, ‘data/02_assembly/2040_04.fasta’, ‘data/02_assembly/1762_04.fasta’, ‘data/02_assembly/1243_2.fasta’, ‘data/02_assembly/875_04.fasta’] localpair=True globalpair=None preservecase=True reorder=None gapOpeningPenalty=1.53 ep=0.123 verbose=False outputAsClustal=None maxiters=1000 outDirectory=data/03_multialign_to_ref outFilePrefix=aligned sampleRelationFile=None sampleNameListFile=data/03_multialign_to_ref/sampleNameList.txt threads=15 loglevel=DEBUG tmp_dir=/tmp tmp_dirKeep=False 2025-05-26 15:04:19,014 – cmd:209:main_argparse – DEBUG – using tempDir: /tmp/tmp-interhost-multichr_mafft-nuws9mhp 2025-05-26 15:04:21,085 – __init__:445:_attempt_install – DEBUG – Currently installed version of mafft: 7.402-0 2025-05-26 15:04:21,085 – __init__:448:_attempt_install – DEBUG – Expected version of mafft: 7.221 2025-05-26 15:04:21,085 – __init__:449:_attempt_install – DEBUG – Incorrect version of mafft installed. Removing it… #DEBUG_11: TOOL_VERSION = “7.221” –> “7.402” in ~/Tools/viral-ngs_docker/bin/tools/mafft.py

Processing Data_Tam_RNAseq_2025_LB_vs_Mac_ATCC19606

Leave a reply

Targets

 Could you please assist me with processing RNA-seq data? The reference genome is CP059040. I aim to analyze the data using PCA, a Venn diagram, and KEGG and GO annotation enrichment analysis.
 The samples are labeled as follows (where 'x' indicates the replicate number):

     LB-AB-x
     LB-IJ-x
     LB-W1-x
     LB-WT19606-x
     LB-Y1-x
     Mac-AB-x
     Mac-IJ-x
     Mac-W1-x
     Mac-WT19606-x
     Mac-Y1-x

Download the raw data

 ./lnd login -u X101SC25015922-Z02-J002 -p m*********5
 ./lnd list
 ./lnd cp -d oss://  ./
 ./lnd cp oss://CP2024102300053 .  #Error
 ./lnd list oss://CP2024102300053
 ./lnd cp -d oss://CP2024102300053/H101SC25015922/RSMR00204 .
 #CP2024102300053/H101SC25015922/RSMR00204/X101SC25015922-Z02/X101SC25015922-Z02-J002

Prepare raw data

 mkdir raw_data; cd raw_data

 ln -s ../RSMR00204/X101SC25015922-Z02/X101SC25015922-Z02-J002/01.RawData/LB-AB-1/LB-AB-1_1.fq.gz LB-AB-r1_R1.fq.gz
 ln -s ../RSMR00204/X101SC25015922-Z02/X101SC25015922-Z02-J002/01.RawData/LB-AB-1/LB-AB-1_2.fq.gz LB-AB-r1_R2.fq.gz
 ln -s ../RSMR00204/X101SC25015922-Z02/X101SC25015922-Z02-J002/01.RawData/LB-AB-2/LB-AB-2_1.fq.gz LB-AB-r2_R1.fq.gz
 ln -s ../RSMR00204/X101SC25015922-Z02/X101SC25015922-Z02-J002/01.RawData/LB-AB-2/LB-AB-2_2.fq.gz LB-AB-r2_R2.fq.gz
 ln -s ../RSMR00204/X101SC25015922-Z02/X101SC25015922-Z02-J002/01.RawData/LB-AB-3/LB-AB-3_1.fq.gz LB-AB-r3_R1.fq.gz
 ln -s ../RSMR00204/X101SC25015922-Z02/X101SC25015922-Z02-J002/01.RawData/LB-AB-3/LB-AB-3_2.fq.gz LB-AB-r3_R2.fq.gz

 ln -s ../RSMR00204/X101SC25015922-Z02/X101SC25015922-Z02-J002/01.RawData/LB-IJ-1/LB-IJ-1_1.fq.gz LB-IJ-r1_R1.fq.gz
 ln -s ../RSMR00204/X101SC25015922-Z02/X101SC25015922-Z02-J002/01.RawData/LB-IJ-1/LB-IJ-1_2.fq.gz LB-IJ-r1_R2.fq.gz
 ln -s ../RSMR00204/X101SC25015922-Z02/X101SC25015922-Z02-J002/01.RawData/LB-IJ-2/LB-IJ-2_1.fq.gz LB-IJ-r2_R1.fq.gz
 ln -s ../RSMR00204/X101SC25015922-Z02/X101SC25015922-Z02-J002/01.RawData/LB-IJ-2/LB-IJ-2_2.fq.gz LB-IJ-r2_R2.fq.gz
 ln -s ../RSMR00204/X101SC25015922-Z02/X101SC25015922-Z02-J002/01.RawData/LB-IJ-4/LB-IJ-4_1.fq.gz LB-IJ-r4_R1.fq.gz
 ln -s ../RSMR00204/X101SC25015922-Z02/X101SC25015922-Z02-J002/01.RawData/LB-IJ-4/LB-IJ-4_2.fq.gz LB-IJ-r4_R2.fq.gz

 ln -s ../RSMR00204/X101SC25015922-Z02/X101SC25015922-Z02-J002/01.RawData/LB-W1-1/LB-W1-1_1.fq.gz LB-W1-r1_R1.fq.gz
 ln -s ../RSMR00204/X101SC25015922-Z02/X101SC25015922-Z02-J002/01.RawData/LB-W1-1/LB-W1-1_2.fq.gz LB-W1-r1_R2.fq.gz
 ln -s ../RSMR00204/X101SC25015922-Z02/X101SC25015922-Z02-J002/01.RawData/LB-W1-2/LB-W1-2_1.fq.gz LB-W1-r2_R1.fq.gz
 ln -s ../RSMR00204/X101SC25015922-Z02/X101SC25015922-Z02-J002/01.RawData/LB-W1-2/LB-W1-2_2.fq.gz LB-W1-r2_R2.fq.gz
 ln -s ../RSMR00204/X101SC25015922-Z02/X101SC25015922-Z02-J002/01.RawData/LB-W1-3/LB-W1-3_1.fq.gz LB-W1-r3_R1.fq.gz
 ln -s ../RSMR00204/X101SC25015922-Z02/X101SC25015922-Z02-J002/01.RawData/LB-W1-3/LB-W1-3_2.fq.gz LB-W1-r3_R2.fq.gz

 ln -s ../RSMR00204/X101SC25015922-Z02/X101SC25015922-Z02-J002/01.RawData/LB-WT19606-2/LB-WT19606-2_1.fq.gz LB-WT19606-r2_R1.fq.gz
 ln -s ../RSMR00204/X101SC25015922-Z02/X101SC25015922-Z02-J002/01.RawData/LB-WT19606-2/LB-WT19606-2_2.fq.gz LB-WT19606-r2_R2.fq.gz
 ln -s ../RSMR00204/X101SC25015922-Z02/X101SC25015922-Z02-J002/01.RawData/LB-WT19606-3/LB-WT19606-3_1.fq.gz LB-WT19606-r3_R1.fq.gz
 ln -s ../RSMR00204/X101SC25015922-Z02/X101SC25015922-Z02-J002/01.RawData/LB-WT19606-3/LB-WT19606-3_2.fq.gz LB-WT19606-r3_R2.fq.gz
 ln -s ../RSMR00204/X101SC25015922-Z02/X101SC25015922-Z02-J002/01.RawData/LB-WT19606-4/LB-WT19606-4_1.fq.gz LB-WT19606-r4_R1.fq.gz
 ln -s ../RSMR00204/X101SC25015922-Z02/X101SC25015922-Z02-J002/01.RawData/LB-WT19606-4/LB-WT19606-4_2.fq.gz LB-WT19606-r4_R2.fq.gz

 ln -s ../RSMR00204/X101SC25015922-Z02/X101SC25015922-Z02-J002/01.RawData/LB-Y1-2/LB-Y1-2_1.fq.gz LB-Y1-r2_R1.fq.gz
 ln -s ../RSMR00204/X101SC25015922-Z02/X101SC25015922-Z02-J002/01.RawData/LB-Y1-2/LB-Y1-2_2.fq.gz LB-Y1-r2_R2.fq.gz
 ln -s ../RSMR00204/X101SC25015922-Z02/X101SC25015922-Z02-J002/01.RawData/LB-Y1-3/LB-Y1-3_1.fq.gz LB-Y1-r3_R1.fq.gz
 ln -s ../RSMR00204/X101SC25015922-Z02/X101SC25015922-Z02-J002/01.RawData/LB-Y1-3/LB-Y1-3_2.fq.gz LB-Y1-r3_R2.fq.gz
 ln -s ../RSMR00204/X101SC25015922-Z02/X101SC25015922-Z02-J002/01.RawData/LB-Y1-4/LB-Y1-4_1.fq.gz LB-Y1-r4_R1.fq.gz
 ln -s ../RSMR00204/X101SC25015922-Z02/X101SC25015922-Z02-J002/01.RawData/LB-Y1-4/LB-Y1-4_2.fq.gz LB-Y1-r4_R2.fq.gz

 ln -s ../RSMR00204/X101SC25015922-Z02/X101SC25015922-Z02-J002/01.RawData/Mac-AB-1/Mac-AB-1_1.fq.gz Mac-AB-r1_R1.fq.gz
 ln -s ../RSMR00204/X101SC25015922-Z02/X101SC25015922-Z02-J002/01.RawData/Mac-AB-1/Mac-AB-1_2.fq.gz Mac-AB-r1_R2.fq.gz
 ln -s ../RSMR00204/X101SC25015922-Z02/X101SC25015922-Z02-J002/01.RawData/Mac-AB-2/Mac-AB-2_1.fq.gz Mac-AB-r2_R1.fq.gz
 ln -s ../RSMR00204/X101SC25015922-Z02/X101SC25015922-Z02-J002/01.RawData/Mac-AB-2/Mac-AB-2_2.fq.gz Mac-AB-r2_R2.fq.gz
 ln -s ../RSMR00204/X101SC25015922-Z02/X101SC25015922-Z02-J002/01.RawData/Mac-AB-3/Mac-AB-3_1.fq.gz Mac-AB-r3_R1.fq.gz
 ln -s ../RSMR00204/X101SC25015922-Z02/X101SC25015922-Z02-J002/01.RawData/Mac-AB-3/Mac-AB-3_2.fq.gz Mac-AB-r3_R2.fq.gz

 ln -s ../RSMR00204/X101SC25015922-Z02/X101SC25015922-Z02-J002/01.RawData/Mac-IJ-1/Mac-IJ-1_1.fq.gz Mac-IJ-r1_R1.fq.gz
 ln -s ../RSMR00204/X101SC25015922-Z02/X101SC25015922-Z02-J002/01.RawData/Mac-IJ-1/Mac-IJ-1_2.fq.gz Mac-IJ-r1_R2.fq.gz
 ln -s ../RSMR00204/X101SC25015922-Z02/X101SC25015922-Z02-J002/01.RawData/Mac-IJ-2/Mac-IJ-2_1.fq.gz Mac-IJ-r2_R1.fq.gz
 ln -s ../RSMR00204/X101SC25015922-Z02/X101SC25015922-Z02-J002/01.RawData/Mac-IJ-2/Mac-IJ-2_2.fq.gz Mac-IJ-r2_R2.fq.gz
 ln -s ../RSMR00204/X101SC25015922-Z02/X101SC25015922-Z02-J002/01.RawData/Mac-IJ-4/Mac-IJ-4_1.fq.gz Mac-IJ-r4_R1.fq.gz
 ln -s ../RSMR00204/X101SC25015922-Z02/X101SC25015922-Z02-J002/01.RawData/Mac-IJ-4/Mac-IJ-4_2.fq.gz Mac-IJ-r4_R2.fq.gz

 ln -s ../RSMR00204/X101SC25015922-Z02/X101SC25015922-Z02-J002/01.RawData/Mac-W1-1/Mac-W1-1_1.fq.gz Mac-W1-r1_R1.fq.gz
 ln -s ../RSMR00204/X101SC25015922-Z02/X101SC25015922-Z02-J002/01.RawData/Mac-W1-1/Mac-W1-1_2.fq.gz Mac-W1-r1_R2.fq.gz
 ln -s ../RSMR00204/X101SC25015922-Z02/X101SC25015922-Z02-J002/01.RawData/Mac-W1-2/Mac-W1-2_1.fq.gz Mac-W1-r2_R1.fq.gz
 ln -s ../RSMR00204/X101SC25015922-Z02/X101SC25015922-Z02-J002/01.RawData/Mac-W1-2/Mac-W1-2_2.fq.gz Mac-W1-r2_R2.fq.gz
 ln -s ../RSMR00204/X101SC25015922-Z02/X101SC25015922-Z02-J002/01.RawData/Mac-W1-3/Mac-W1-3_1.fq.gz Mac-W1-r3_R1.fq.gz
 ln -s ../RSMR00204/X101SC25015922-Z02/X101SC25015922-Z02-J002/01.RawData/Mac-W1-3/Mac-W1-3_2.fq.gz Mac-W1-r3_R2.fq.gz

 ln -s ../RSMR00204/X101SC25015922-Z02/X101SC25015922-Z02-J002/01.RawData/Mac-WT19606-2/Mac-WT19606-2_1.fq.gz Mac-WT19606-r2_R1.fq.gz
 ln -s ../RSMR00204/X101SC25015922-Z02/X101SC25015922-Z02-J002/01.RawData/Mac-WT19606-2/Mac-WT19606-2_2.fq.gz Mac-WT19606-r2_R2.fq.gz
 ln -s ../RSMR00204/X101SC25015922-Z02/X101SC25015922-Z02-J002/01.RawData/Mac-WT19606-3/Mac-WT19606-3_1.fq.gz Mac-WT19606-r3_R1.fq.gz
 ln -s ../RSMR00204/X101SC25015922-Z02/X101SC25015922-Z02-J002/01.RawData/Mac-WT19606-3/Mac-WT19606-3_2.fq.gz Mac-WT19606-r3_R2.fq.gz
 ln -s ../RSMR00204/X101SC25015922-Z02/X101SC25015922-Z02-J002/01.RawData/Mac-WT19606-4/Mac-WT19606-4_1.fq.gz Mac-WT19606-r4_R1.fq.gz
 ln -s ../RSMR00204/X101SC25015922-Z02/X101SC25015922-Z02-J002/01.RawData/Mac-WT19606-4/Mac-WT19606-4_2.fq.gz Mac-WT19606-r4_R2.fq.gz

 ln -s ../RSMR00204/X101SC25015922-Z02/X101SC25015922-Z02-J002/01.RawData/Mac-Y1-2/Mac-Y1-2_1.fq.gz Mac-Y1-r2_R1.fq.gz
 ln -s ../RSMR00204/X101SC25015922-Z02/X101SC25015922-Z02-J002/01.RawData/Mac-Y1-2/Mac-Y1-2_2.fq.gz Mac-Y1-r2_R2.fq.gz
 ln -s ../RSMR00204/X101SC25015922-Z02/X101SC25015922-Z02-J002/01.RawData/Mac-Y1-3/Mac-Y1-3_1.fq.gz Mac-Y1-r3_R1.fq.gz
 ln -s ../RSMR00204/X101SC25015922-Z02/X101SC25015922-Z02-J002/01.RawData/Mac-Y1-3/Mac-Y1-3_2.fq.gz Mac-Y1-r3_R2.fq.gz
 ln -s ../RSMR00204/X101SC25015922-Z02/X101SC25015922-Z02-J002/01.RawData/Mac-Y1-4/Mac-Y1-4_1.fq.gz Mac-Y1-r4_R1.fq.gz
 ln -s ../RSMR00204/X101SC25015922-Z02/X101SC25015922-Z02-J002/01.RawData/Mac-Y1-4/Mac-Y1-4_2.fq.gz Mac-Y1-r4_R2.fq.gz

Preparing the directory trimmed

 mkdir trimmed trimmed_unpaired;
 for sample_id in LB-AB-r1 LB-AB-r2 LB-AB-r3  LB-IJ-r1 LB-IJ-r2 LB-IJ-r4  LB-W1-r1 LB-W1-r2 LB-W1-r3  LB-WT19606-r2 LB-WT19606-r3 LB-WT19606-r4  LB-Y1-r2 LB-Y1-r3 LB-Y1-r4    Mac-AB-r1 Mac-AB-r2 Mac-AB-r3  Mac-IJ-r1 Mac-IJ-r2 Mac-IJ-r4  Mac-W1-r1 Mac-W1-r2 Mac-W1-r3  Mac-WT19606-r2 Mac-WT19606-r3 Mac-WT19606-r4  Mac-Y1-r2 Mac-Y1-r3 Mac-Y1-r4; do
         java -jar /home/jhuang/Tools/Trimmomatic-0.36/trimmomatic-0.36.jar PE -threads 100 raw_data/${sample_id}_R1.fq.gz raw_data/${sample_id}_R2.fq.gz trimmed/${sample_id}_R1.fq.gz trimmed_unpaired/${sample_id}_R1.fq.gz trimmed/${sample_id}_R2.fq.gz trimmed_unpaired/${sample_id}_R2.fq.gz ILLUMINACLIP:/home/jhuang/Tools/Trimmomatic-0.36/adapters/TruSeq3-PE-2.fa:2:30:10:8:TRUE LEADING:3 TRAILING:3 SLIDINGWINDOW:4:15 MINLEN:36 AVGQUAL:20; done 2> trimmomatic_pe.log;
 done

Preparing samplesheet.csv

 sample,fastq_1,fastq_2,strandedness
 LB-AB-r1,LB-AB-r1_R1.fq.gz,LB-AB-r1_R2.fq.gz,auto
 LB-AB-r2,LB-AB-r2_R1.fq.gz,LB-AB-r2_R2.fq.gz,auto
 LB-AB-r3,LB-AB-r3_R1.fq.gz,LB-AB-r3_R2.fq.gz,auto
 LB-IJ-r1,LB-IJ-r1_R1.fq.gz,LB-IJ-r1_R2.fq.gz,auto
 LB-IJ-r2,LB-IJ-r2_R1.fq.gz,LB-IJ-r2_R2.fq.gz,auto
 LB-IJ-r4,LB-IJ-r4_R1.fq.gz,LB-IJ-r4_R2.fq.gz,auto
 LB-W1-r1,LB-W1-r1_R1.fq.gz,LB-W1-r1_R2.fq.gz,auto
 LB-W1-r2,LB-W1-r2_R1.fq.gz,LB-W1-r2_R2.fq.gz,auto
 LB-W1-r3,LB-W1-r3_R1.fq.gz,LB-W1-r3_R2.fq.gz,auto
 LB-WT19606-r2,LB-WT19606-r2_R1.fq.gz,LB-WT19606-r2_R2.fq.gz,auto
 LB-WT19606-r3,LB-WT19606-r3_R1.fq.gz,LB-WT19606-r3_R2.fq.gz,auto
 LB-WT19606-r4,LB-WT19606-r4_R1.fq.gz,LB-WT19606-r4_R2.fq.gz,auto
 LB-Y1-r2,LB-Y1-r2_R1.fq.gz,LB-Y1-r2_R2.fq.gz,auto
 LB-Y1-r3,LB-Y1-r3_R1.fq.gz,LB-Y1-r3_R2.fq.gz,auto
 LB-Y1-r4,LB-Y1-r4_R1.fq.gz,LB-Y1-r4_R2.fq.gz,auto
 Mac-AB-r1,Mac-AB-r1_R1.fq.gz,Mac-AB-r1_R2.fq.gz,auto
 Mac-AB-r2,Mac-AB-r2_R1.fq.gz,Mac-AB-r2_R2.fq.gz,auto
 Mac-AB-r3,Mac-AB-r3_R1.fq.gz,Mac-AB-r3_R2.fq.gz,auto
 Mac-IJ-r1,Mac-IJ-r1_R1.fq.gz,Mac-IJ-r1_R2.fq.gz,auto
 Mac-IJ-r2,Mac-IJ-r2_R1.fq.gz,Mac-IJ-r2_R2.fq.gz,auto
 Mac-IJ-r4,Mac-IJ-r4_R1.fq.gz,Mac-IJ-r4_R2.fq.gz,auto
 Mac-W1-r1,Mac-W1-r1_R1.fq.gz,Mac-W1-r1_R2.fq.gz,auto
 Mac-W1-r2,Mac-W1-r2_R1.fq.gz,Mac-W1-r2_R2.fq.gz,auto
 Mac-W1-r3,Mac-W1-r3_R1.fq.gz,Mac-W1-r3_R2.fq.gz,auto
 Mac-WT19606-r2,Mac-WT19606-r2_R1.fq.gz,Mac-WT19606-r2_R2.fq.gz,auto
 Mac-WT19606-r3,Mac-WT19606-r3_R1.fq.gz,Mac-WT19606-r3_R2.fq.gz,auto
 Mac-WT19606-r4,Mac-WT19606-r4_R1.fq.gz,Mac-WT19606-r4_R2.fq.gz,auto
 Mac-Y1-r2,Mac-Y1-r2_R1.fq.gz,Mac-Y1-r2_R2.fq.gz,auto
 Mac-Y1-r3,Mac-Y1-r3_R1.fq.gz,Mac-Y1-r3_R2.fq.gz,auto
 Mac-Y1-r4,Mac-Y1-r4_R1.fq.gz,Mac-Y1-r4_R2.fq.gz,auto

 #mv trimmed/* .

nextflow run

 #Example1: http://xgenes.com/article/article-content/157/prepare-virus-gtf-for-nextflow-run/
 #docker pull nfcore/rnaseq
 ln -s /home/jhuang/Tools/nf-core-rnaseq-3.12.0/ rnaseq

 # ---- SUCCESSFUL with directly downloaded gff3 and fasta from NCBI using docker after replacing 'CDS' with 'exon' ----
 (host_env) /usr/local/bin/nextflow run rnaseq/main.nf --input samplesheet.csv --outdir results    --fasta "/home/jhuang/DATA/Data_Tam_RNAseq_2024_AUM_MHB_Urine/CP059040.fasta" --gff "/home/jhuang/DATA/Data_Tam_RNAseq_2024_AUM_MHB_Urine/CP059040_m.gff"        -profile docker -resume  --max_cpus 55 --max_memory 512.GB --max_time 2400.h    --save_align_intermeds --save_unaligned --save_reference    --aligner 'star_salmon'    --gtf_group_features 'gene_id'  --gtf_extra_attributes 'gene_name' --featurecounts_group_type 'gene_biotype' --featurecounts_feature_type 'transcript'

Import data and pca-plot

  #mamba activate r_env

 #install.packages("ggfun")
 # Import the required libraries
 library("AnnotationDbi")
 library("clusterProfiler")
 library("ReactomePA")
 library(gplots)
 library(tximport)
 library(DESeq2)
 #library("org.Hs.eg.db")
 library(dplyr)
 library(tidyverse)
 #install.packages("devtools")
 #devtools::install_version("gtable", version = "0.3.0")
 library(gplots)
 library("RColorBrewer")
 #install.packages("ggrepel")
 library("ggrepel")
 # install.packages("openxlsx")
 library(openxlsx)
 library(EnhancedVolcano)
 library(DESeq2)
 library(edgeR)

 setwd("~/DATA/Data_Tam_RNAseq_2025_LB_vs_Mac_ATCC19606/results/star_salmon")
 # Define paths to your Salmon output quantification files

 files <- c("LB-AB_r1" = "./LB-AB-r1/quant.sf",
         "LB-AB_r2" = "./LB-AB-r2/quant.sf",
         "LB-AB_r3" = "./LB-AB-r3/quant.sf",
         "LB-IJ_r1" = "./LB-IJ-r1/quant.sf",
         "LB-IJ_r2" = "./LB-IJ-r2/quant.sf",
         "LB-IJ_r4" = "./LB-IJ-r4/quant.sf",
         "LB-W1_r1" = "./LB-W1-r1/quant.sf",
         "LB-W1_r2" = "./LB-W1-r2/quant.sf",
         "LB-W1_r3" = "./LB-W1-r3/quant.sf",
         "LB-WT19606_r2" = "./LB-WT19606-r2/quant.sf",
         "LB-WT19606_r3" = "./LB-WT19606-r3/quant.sf",
         "LB-WT19606_r4" = "./LB-WT19606-r4/quant.sf",
         "LB-Y1_r2" = "./LB-Y1-r2/quant.sf",
         "LB-Y1_r3" = "./LB-Y1-r3/quant.sf",
         "LB-Y1_r4" = "./LB-Y1-r4/quant.sf",
         "Mac-AB_r1" = "./Mac-AB-r1/quant.sf",
         "Mac-AB_r2" = "./Mac-AB-r2/quant.sf",
         "Mac-AB_r3" = "./Mac-AB-r3/quant.sf",
         "Mac-IJ_r1" = "./Mac-IJ-r1/quant.sf",
         "Mac-IJ_r2" = "./Mac-IJ-r2/quant.sf",
         "Mac-IJ_r4" = "./Mac-IJ-r4/quant.sf",
         "Mac-W1_r1" = "./Mac-W1-r1/quant.sf",
         "Mac-W1_r2" = "./Mac-W1-r2/quant.sf",
         "Mac-W1_r3" = "./Mac-W1-r3/quant.sf",
         "Mac-WT19606_r2" = "./Mac-WT19606-r2/quant.sf",
         "Mac-WT19606_r3" = "./Mac-WT19606-r3/quant.sf",
         "Mac-WT19606_r4" = "./Mac-WT19606-r4/quant.sf",
         "Mac-Y1_r2" = "./Mac-Y1-r2/quant.sf",
         "Mac-Y1_r3" = "./Mac-Y1-r3/quant.sf",
         "Mac-Y1_r4" = "./Mac-Y1-r4/quant.sf")

 # Import the transcript abundance data with tximport
 txi <- tximport(files, type = "salmon", txIn = TRUE, txOut = TRUE)
 # Define the replicates and condition of the samples
 #replicate <- factor(c("r1", "r2", "r3", "r1", "r2", "r3", "r1", "r2", "r3"))
 #adeA and adeB encode a membrane fusion protein that is part of the AdeABC efflux pump, which contributes to multidrug resistance.
 #System: Part of the AdeIJK efflux pump, which includes: adeI — membrane fusion protein, adeJ — RND transporter, adeK — outer membrane factor
 condition <- factor(c("LB-AB","LB-AB","LB-AB", "LB-IJ","LB-IJ","LB-IJ", "LB-W1","LB-W1","LB-W1","LB-WT19606","LB-WT19606","LB-WT19606","LB-Y1","LB-Y1","LB-Y1","Mac-AB","Mac-AB","Mac-AB","Mac-IJ","Mac-IJ","Mac-IJ","Mac-W1","Mac-W1","Mac-W1","Mac-WT19606","Mac-WT19606","Mac-WT19606","Mac-Y1","Mac-Y1","Mac-Y1"))
 # Define the colData for DESeq2
 colData <- data.frame(condition=condition, row.names=names(files))

 # ------------------------
 # 1️⃣ Setup and input files
 # ------------------------

 # Read in transcript-to-gene mapping
 tx2gene <- read.table("salmon_tx2gene.tsv", header=FALSE, stringsAsFactors=FALSE)
 colnames(tx2gene) <- c("transcript_id", "gene_id", "gene_name")

 # Prepare tx2gene for gene-level summarization (remove gene_name if needed)
 tx2gene_geneonly <- tx2gene[, c("transcript_id", "gene_id")]

 # -------------------------------
 # 2️⃣ Transcript-level counts
 # -------------------------------
 # Create DESeqDataSet directly from tximport (transcript-level)
 dds_tx <- DESeqDataSetFromTximport(txi, colData=colData, design=~condition)
 write.csv(counts(dds_tx), file="transcript_counts.csv")

 # --------------------------------
 # 3️⃣ Gene-level summarization
 # --------------------------------
 # Re-import Salmon data summarized at gene level
 txi_gene <- tximport(files, type="salmon", tx2gene=tx2gene_geneonly, txOut=FALSE)

 # Create DESeqDataSet for gene-level counts
 #dds <- DESeqDataSetFromTximport(txi_gene, colData=colData, design=~condition+replicate)
 dds <- DESeqDataSetFromTximport(txi_gene, colData=colData, design=~condition)

 # --------------------------------
 # 4️⃣ Raw counts table (with gene names)
 # --------------------------------
 # Extract raw gene-level counts
 counts_data <- as.data.frame(counts(dds, normalized=FALSE))
 counts_data$gene_id <- rownames(counts_data)

 # Add gene names
 tx2gene_unique <- unique(tx2gene[, c("gene_id", "gene_name")])
 counts_data <- merge(counts_data, tx2gene_unique, by="gene_id", all.x=TRUE)

 # Reorder columns: gene_id, gene_name, then counts
 count_cols <- setdiff(colnames(counts_data), c("gene_id", "gene_name"))
 counts_data <- counts_data[, c("gene_id", "gene_name", count_cols)]

 # --------------------------------
 # 5️⃣ Calculate CPM
 # --------------------------------
 library(edgeR)
 library(openxlsx)

 # Prepare count matrix for CPM calculation
 count_matrix <- as.matrix(counts_data[, !(colnames(counts_data) %in% c("gene_id", "gene_name"))])

 # Calculate CPM
 #cpm_matrix <- cpm(count_matrix, normalized.lib.sizes=FALSE)
 total_counts <- colSums(count_matrix)
 cpm_matrix <- t(t(count_matrix) / total_counts) * 1e6
 cpm_matrix <- as.data.frame(cpm_matrix)

 # Add gene_id and gene_name back to CPM table
 cpm_counts <- cbind(counts_data[, c("gene_id", "gene_name")], cpm_matrix)

 # --------------------------------
 # 6️⃣ Save outputs
 # --------------------------------
 write.csv(counts_data, "gene_raw_counts.csv", row.names=FALSE)
 write.xlsx(counts_data, "gene_raw_counts.xlsx", row.names=FALSE)
 write.xlsx(cpm_counts, "gene_cpm_counts.xlsx", row.names=FALSE)

 # -- (Optional) Save the rlog-transformed counts --
 dim(counts(dds))
 head(counts(dds), 10)
 rld <- rlogTransformation(dds)
 rlog_counts <- assay(rld)
 write.xlsx(as.data.frame(rlog_counts), "gene_rlog_transformed_counts.xlsx")

 # ---- (Optional for NACHREIHEN) split the factos media and strain from condition (for comparison Mac vs LB) ----
 # AdeIJK vs. AdeABC Efflux Pumps
 #     * AdeIJK is the "housekeeping" pump — always active, broadly expressed, contributing to background resistance.
 #     * AdeABC is the "emergency" pump — induced under stress or mutations, more potent in contributing to clinical multidrug resistance.
 #LB = Luria-Bertani broth (a standard rich growth medium)
 #Mac = MacConkey agar or broth (selective for Gram-negative bacteria)
 # - Growth medium   Media or Condition, GrowthMedium
 # - Bacterial strain/genotype   Strain or Isolate, Genotype, SampleType
 media <- factor(c("LB","LB","LB", "LB","LB","LB", "LB","LB","LB","LB","LB","LB","LB","LB","LB","Mac","Mac","Mac","Mac","Mac","Mac","Mac","Mac","Mac","Mac","Mac","Mac","Mac","Mac","Mac"))
 strain <- factor(c("AB","AB","AB", "IJ","IJ","IJ", "W1","W1","W1","WT19606","WT19606","WT19606","Y1","Y1","Y1","AB","AB","AB","IJ","IJ","IJ","W1","W1","W1","WT19606","WT19606","WT19606","Y1","Y1","Y1"))
 # Define the colData for DESeq2
 colData <- data.frame(media=media, strain=strain, row.names=names(files))
 # -- transcript-level count data (x2) --
 # Create DESeqDataSet object
 dds <- DESeqDataSetFromTximport(txi, colData=colData, design=~media+strain)
 #write.csv(counts(dds), file="transcript_counts_media_strain.csv")  #check correctness, it should be identical to transcript_counts.csv
 # -- gene-level count data (x2) --
 # Read in the tx2gene map from salmon_tx2gene.tsv
 tx2gene <- read.table("salmon_tx2gene.tsv", header=FALSE, stringsAsFactors=FALSE)
 # Set the column names
 colnames(tx2gene) <- c("transcript_id", "gene_id", "gene_name")
 # Remove the gene_name column if not needed
 tx2gene <- tx2gene[,1:2]
 # Import and summarize the Salmon data with tximport
 txi <- tximport(files, type = "salmon", tx2gene = tx2gene, txOut = FALSE)
 # Continue with the DESeq2 workflow as before...
 colData <- data.frame(media=media, strain=strain, row.names=names(files))
 dds <- DESeqDataSetFromTximport(txi, colData=colData, design=~media+strain)
 #dds <- dds[rowSums(counts(dds) > 3) > 2, ]    #3796->????
 #write.csv(counts(dds, normalized=FALSE), file="gene_counts_media_strain.csv")  #check correctness, it should be identical to gene_counts.csv
 # ---- (Optional for NACHREIHEN) END ----

 # -- pca --
 png("pca2.png", 1200, 800)
 plotPCA(rld, intgroup=c("condition"))
 dev.off()
 # -- heatmap --
 png("heatmap2.png", 1200, 800)
 distsRL <- dist(t(assay(rld)))
 mat <- as.matrix(distsRL)
 hc <- hclust(distsRL)
 hmcol <- colorRampPalette(brewer.pal(9,"GnBu"))(100)
 heatmap.2(mat, Rowv=as.dendrogram(hc),symm=TRUE, trace="none",col = rev(hmcol), margin=c(13, 13))
 dev.off()

 # -- pca_media_strain --
 png("pca_media.png", 1200, 800)
 plotPCA(rld, intgroup=c("media"))
 dev.off()
 png("pca_strain.png", 1200, 800)
 plotPCA(rld, intgroup=c("strain"))
 dev.off()

(Optional; ERROR–>need to be debugged!) ) estimate size factors and dispersion values.

 #Size Factors: These are used to normalize the read counts across different samples. The size factor for a sample accounts for differences in sequencing depth (i.e., the total number of reads) and other technical biases between samples. After normalization with size factors, the counts should be comparable across samples. Size factors are usually calculated in a way that they reflect the median or mean ratio of gene expression levels between samples, assuming that most genes are not differentially expressed.
 #Dispersion: This refers to the variability or spread of gene expression measurements. In RNA-seq data analysis, each gene has its own dispersion value, which reflects how much the counts for that gene vary between different samples, more than what would be expected just due to the Poisson variation inherent in counting. Dispersion is important for accurately modeling the data and for detecting differentially expressed genes.
 #So in summary, size factors are specific to samples (used to make counts comparable across samples), and dispersion values are specific to genes (reflecting variability in gene expression).

 sizeFactors(dds)
 #NULL
 # Estimate size factors
 dds <- estimateSizeFactors(dds)
 # Estimate dispersions
 dds <- estimateDispersions(dds)
 #> sizeFactors(dds)

 #control_r1 control_r2  HSV.d2_r1  HSV.d2_r2  HSV.d4_r1  HSV.d4_r2  HSV.d6_r1
 #2.3282468  2.0251928  1.8036883  1.3767551  0.9341929  1.0911693  0.5454526
 #HSV.d6_r2  HSV.d8_r1  HSV.d8_r2
 #0.4604461  0.5799834  0.6803681

 # (DEBUG) If avgTxLength is Necessary
 #To simplify the computation and ensure sizeFactors are calculated:
 assays(dds)$avgTxLength <- NULL
 dds <- estimateSizeFactors(dds)
 sizeFactors(dds)
 #If you want to retain avgTxLength but suspect it is causing issues, you can explicitly instruct DESeq2 to compute size factors without correcting for library size with average transcript lengths:
 dds <- estimateSizeFactors(dds, controlGenes = NULL, use = FALSE)
 sizeFactors(dds)

 # If alone with virus data, the following BUG occured:
 #Still NULL --> BUG --> using manual calculation method for sizeFactor calculation!
                     HeLa_TO_r1                      HeLa_TO_r2
                     0.9978755                       1.1092227
 data.frame(genes = rownames(dds), dispersions = dispersions(dds))

 #Given the raw counts, the control_r1 and control_r2 samples seem to have a much lower sequencing depth (total read count) than the other samples. Therefore, when normalization methods are applied, the normalization factors for these control samples will be relatively high, boosting the normalized counts.
 1/0.9978755=1.002129023
 1/1.1092227=
 #bamCoverage --bam ../markDuplicates/${sample}Aligned.sortedByCoord.out.bam -o ${sample}_norm.bw --binSize 10 --scaleFactor  --effectiveGenomeSize 2864785220
 bamCoverage --bam ../markDuplicates/HeLa_TO_r1Aligned.sortedByCoord.out.markDups.bam -o HeLa_TO_r1.bw --binSize 10 --scaleFactor 1.002129023     --effectiveGenomeSize 2864785220
 bamCoverage --bam ../markDuplicates/HeLa_TO_r2Aligned.sortedByCoord.out.markDups.bam -o HeLa_TO_r2.bw --binSize 10 --scaleFactor  0.901532217        --effectiveGenomeSize 2864785220

 raw_counts <- counts(dds)
 normalized_counts <- counts(dds, normalized=TRUE)
 #write.table(raw_counts, file="raw_counts.txt", sep="\t", quote=F, col.names=NA)
 #write.table(normalized_counts, file="normalized_counts.txt", sep="\t", quote=F, col.names=NA)
 #convert bam to bigwig using deepTools by feeding inverse of DESeq’s size Factor
 estimSf <- function (cds){
     # Get the count matrix
     cts <- counts(cds)
     # Compute the geometric mean
     geomMean <- function(x) prod(x)^(1/length(x))
     # Compute the geometric mean over the line
     gm.mean  <-  apply(cts, 1, geomMean)
     # Zero values are set to NA (avoid subsequentcdsdivision by 0)
     gm.mean[gm.mean == 0] <- NA
     # Divide each line by its corresponding geometric mean
     # sweep(x, MARGIN, STATS, FUN = "-", check.margin = TRUE, ...)
     # MARGIN: 1 or 2 (line or columns)
     # STATS: a vector of length nrow(x) or ncol(x), depending on MARGIN
     # FUN: the function to be applied
     cts <- sweep(cts, 1, gm.mean, FUN="/")
     # Compute the median over the columns
     med <- apply(cts, 2, median, na.rm=TRUE)
     # Return the scaling factor
     return(med)
 }
 #https://dputhier.github.io/ASG/practicals/rnaseq_diff_Snf2/rnaseq_diff_Snf2.html
 #http://bioconductor.org/packages/devel/bioc/vignettes/DESeq2/inst/doc/DESeq2.html#data-transformations-and-visualization
 #https://hbctraining.github.io/DGE_workshop/lessons/02_DGE_count_normalization.html
 #https://hbctraining.github.io/DGE_workshop/lessons/04_DGE_DESeq2_analysis.html
 #https://genviz.org/module-04-expression/0004/02/01/DifferentialExpression/
 #DESeq2’s median of ratios [1]
 #EdgeR’s trimmed mean of M values (TMM) [2]
 #http://www.nathalievialaneix.eu/doc/html/TP1_normalization.html  #very good website!
 test_normcount <- sweep(raw_counts, 2, sizeFactors(dds), "/")
 sum(test_normcount != normalized_counts)

Select the differentially expressed genes

 #https://galaxyproject.eu/posts/2020/08/22/three-steps-to-galaxify-your-tool/
 #https://www.biostars.org/p/282295/
 #https://www.biostars.org/p/335751/
 #> dds$condition
 #LB-AB       LB-IJ       LB-W1       LB-WT19606  LB-Y1       Mac-AB     Mac-IJ      Mac-W1      Mac-WT19606 Mac-Y1
 #CONSOLE: mkdir star_salmon/degenes

 setwd("degenes")
 #---- relevel to control ----
 dds$condition <- relevel(dds$condition, "LB-WT19606")
 dds = DESeq(dds, betaPrior=FALSE)
 resultsNames(dds)
 clist <- c("LB.AB_vs_LB.WT19606","LB.IJ_vs_LB.WT19606","LB.W1_vs_LB.WT19606","LB.Y1_vs_LB.WT19606")

 dds$condition <- relevel(dds$condition, "Mac-WT19606")
 dds = DESeq(dds, betaPrior=FALSE)
 resultsNames(dds)
 clist <- c("Mac.AB_vs_Mac.WT19606","Mac.IJ_vs_Mac.WT19606","Mac.W1_vs_Mac.WT19606","Mac.Y1_vs_Mac.WT19606")

 # - 如果你的实验是关注细菌在没有选择性压力下的生长、基因表达或一般行为，LB 是更好的对照。
 # - 如果你希望研究细菌在选择性压力下的行为（例如，针对革兰氏阴性细菌、测试抗生素耐药性或区分乳糖发酵菌），那么 MacConkey 更适合作为对照。
 dds$media <- relevel(dds$media, "LB")
 dds = DESeq(dds, betaPrior=FALSE)
 resultsNames(dds)
 clist <- c("Mac_vs_LB")

 dds$media <- relevel(dds$media, "Mac")
 dds = DESeq(dds, betaPrior=FALSE)
 resultsNames(dds)
 clist <- c("LB_vs_Mac")

 for (i in clist) {
   #contrast = paste("condition", i, sep="_")
   contrast = paste("media", i, sep="_")
   res = results(dds, name=contrast)
   res <- res[!is.na(res$log2FoldChange),]
   res_df <- as.data.frame(res)

   write.csv(as.data.frame(res_df[order(res_df$pvalue),]), file = paste(i, "all.txt", sep="-"))
   up <- subset(res_df, padj<=0.05 & log2FoldChange>=2)
   down <- subset(res_df, padj<=0.05 & log2FoldChange<=-2)
   write.csv(as.data.frame(up[order(up$log2FoldChange,decreasing=TRUE),]), file = paste(i, "up.txt", sep="-"))
   write.csv(as.data.frame(down[order(abs(down$log2FoldChange),decreasing=TRUE),]), file = paste(i, "down.txt", sep="-"))
 }

 # -- Under host-env --
 grep -P "\tgene\t" CP059040.gff > CP059040_gene.gff
 python3 ~/Scripts/replace_gene_names.py /home/jhuang/DATA/Data_Tam_RNAseq_2024_AUM_MHB_Urine/CP059040_gene.gff LB.AB_vs_LB.WT19606-all.txt LB.AB_vs_LB.WT19606-all.csv
 python3 ~/Scripts/replace_gene_names.py /home/jhuang/DATA/Data_Tam_RNAseq_2024_AUM_MHB_Urine/CP059040_gene.gff LB.AB_vs_LB.WT19606-up.txt LB.AB_vs_LB.WT19606-up.csv
 python3 ~/Scripts/replace_gene_names.py /home/jhuang/DATA/Data_Tam_RNAseq_2024_AUM_MHB_Urine/CP059040_gene.gff LB.AB_vs_LB.WT19606-down.txt LB.AB_vs_LB.WT19606-down.csv
 python3 ~/Scripts/replace_gene_names.py /home/jhuang/DATA/Data_Tam_RNAseq_2024_AUM_MHB_Urine/CP059040_gene.gff LB.IJ_vs_LB.WT19606-all.txt LB.IJ_vs_LB.WT19606-all.csv
 python3 ~/Scripts/replace_gene_names.py /home/jhuang/DATA/Data_Tam_RNAseq_2024_AUM_MHB_Urine/CP059040_gene.gff LB.IJ_vs_LB.WT19606-up.txt LB.IJ_vs_LB.WT19606-up.csv
 python3 ~/Scripts/replace_gene_names.py /home/jhuang/DATA/Data_Tam_RNAseq_2024_AUM_MHB_Urine/CP059040_gene.gff LB.IJ_vs_LB.WT19606-down.txt LB.IJ_vs_LB.WT19606-down.csv
 python3 ~/Scripts/replace_gene_names.py /home/jhuang/DATA/Data_Tam_RNAseq_2024_AUM_MHB_Urine/CP059040_gene.gff LB.W1_vs_LB.WT19606-all.txt LB.W1_vs_LB.WT19606-all.csv
 python3 ~/Scripts/replace_gene_names.py /home/jhuang/DATA/Data_Tam_RNAseq_2024_AUM_MHB_Urine/CP059040_gene.gff LB.W1_vs_LB.WT19606-up.txt LB.W1_vs_LB.WT19606-up.csv
 python3 ~/Scripts/replace_gene_names.py /home/jhuang/DATA/Data_Tam_RNAseq_2024_AUM_MHB_Urine/CP059040_gene.gff LB.W1_vs_LB.WT19606-down.txt LB.W1_vs_LB.WT19606-down.csv
 python3 ~/Scripts/replace_gene_names.py /home/jhuang/DATA/Data_Tam_RNAseq_2024_AUM_MHB_Urine/CP059040_gene.gff LB.Y1_vs_LB.WT19606-all.txt LB.Y1_vs_LB.WT19606-all.csv
 python3 ~/Scripts/replace_gene_names.py /home/jhuang/DATA/Data_Tam_RNAseq_2024_AUM_MHB_Urine/CP059040_gene.gff LB.Y1_vs_LB.WT19606-up.txt LB.Y1_vs_LB.WT19606-up.csv
 python3 ~/Scripts/replace_gene_names.py /home/jhuang/DATA/Data_Tam_RNAseq_2024_AUM_MHB_Urine/CP059040_gene.gff LB.Y1_vs_LB.WT19606-down.txt LB.Y1_vs_LB.WT19606-down.csv
 python3 ~/Scripts/replace_gene_names.py /home/jhuang/DATA/Data_Tam_RNAseq_2024_AUM_MHB_Urine/CP059040_gene.gff Mac.AB_vs_Mac.WT19606-all.txt Mac.AB_vs_Mac.WT19606-all.csv
 python3 ~/Scripts/replace_gene_names.py /home/jhuang/DATA/Data_Tam_RNAseq_2024_AUM_MHB_Urine/CP059040_gene.gff Mac.AB_vs_Mac.WT19606-up.txt Mac.AB_vs_Mac.WT19606-up.csv
 python3 ~/Scripts/replace_gene_names.py /home/jhuang/DATA/Data_Tam_RNAseq_2024_AUM_MHB_Urine/CP059040_gene.gff Mac.AB_vs_Mac.WT19606-down.txt Mac.AB_vs_Mac.WT19606-down.csv
 python3 ~/Scripts/replace_gene_names.py /home/jhuang/DATA/Data_Tam_RNAseq_2024_AUM_MHB_Urine/CP059040_gene.gff Mac.IJ_vs_Mac.WT19606-all.txt Mac.IJ_vs_Mac.WT19606-all.csv
 python3 ~/Scripts/replace_gene_names.py /home/jhuang/DATA/Data_Tam_RNAseq_2024_AUM_MHB_Urine/CP059040_gene.gff Mac.IJ_vs_Mac.WT19606-up.txt Mac.IJ_vs_Mac.WT19606-up.csv
 python3 ~/Scripts/replace_gene_names.py /home/jhuang/DATA/Data_Tam_RNAseq_2024_AUM_MHB_Urine/CP059040_gene.gff Mac.IJ_vs_Mac.WT19606-down.txt Mac.IJ_vs_Mac.WT19606-down.csv
 python3 ~/Scripts/replace_gene_names.py /home/jhuang/DATA/Data_Tam_RNAseq_2024_AUM_MHB_Urine/CP059040_gene.gff Mac.W1_vs_Mac.WT19606-all.txt Mac.W1_vs_Mac.WT19606-all.csv
 python3 ~/Scripts/replace_gene_names.py /home/jhuang/DATA/Data_Tam_RNAseq_2024_AUM_MHB_Urine/CP059040_gene.gff Mac.W1_vs_Mac.WT19606-up.txt Mac.W1_vs_Mac.WT19606-up.csv
 python3 ~/Scripts/replace_gene_names.py /home/jhuang/DATA/Data_Tam_RNAseq_2024_AUM_MHB_Urine/CP059040_gene.gff Mac.W1_vs_Mac.WT19606-down.txt Mac.W1_vs_Mac.WT19606-down.csv
 python3 ~/Scripts/replace_gene_names.py /home/jhuang/DATA/Data_Tam_RNAseq_2024_AUM_MHB_Urine/CP059040_gene.gff Mac.Y1_vs_Mac.WT19606-all.txt Mac.Y1_vs_Mac.WT19606-all.csv
 python3 ~/Scripts/replace_gene_names.py /home/jhuang/DATA/Data_Tam_RNAseq_2024_AUM_MHB_Urine/CP059040_gene.gff Mac.Y1_vs_Mac.WT19606-up.txt Mac.Y1_vs_Mac.WT19606-up.csv
 python3 ~/Scripts/replace_gene_names.py /home/jhuang/DATA/Data_Tam_RNAseq_2024_AUM_MHB_Urine/CP059040_gene.gff Mac.Y1_vs_Mac.WT19606-down.txt Mac.Y1_vs_Mac.WT19606-down.csv
 python3 ~/Scripts/replace_gene_names.py /home/jhuang/DATA/Data_Tam_RNAseq_2024_AUM_MHB_Urine/CP059040_gene.gff Mac_vs_LB-all.txt Mac_vs_LB-all.csv
 python3 ~/Scripts/replace_gene_names.py /home/jhuang/DATA/Data_Tam_RNAseq_2024_AUM_MHB_Urine/CP059040_gene.gff Mac_vs_LB-up.txt Mac_vs_LB-up.csv
 python3 ~/Scripts/replace_gene_names.py /home/jhuang/DATA/Data_Tam_RNAseq_2024_AUM_MHB_Urine/CP059040_gene.gff Mac_vs_LB-down.txt Mac_vs_LB-down.csv
 python3 ~/Scripts/replace_gene_names.py /home/jhuang/DATA/Data_Tam_RNAseq_2024_AUM_MHB_Urine/CP059040_gene.gff LB_vs_Mac-all.txt LB_vs_Mac-all.csv
 python3 ~/Scripts/replace_gene_names.py /home/jhuang/DATA/Data_Tam_RNAseq_2024_AUM_MHB_Urine/CP059040_gene.gff LB_vs_Mac-up.txt LB_vs_Mac-up.csv
 python3 ~/Scripts/replace_gene_names.py /home/jhuang/DATA/Data_Tam_RNAseq_2024_AUM_MHB_Urine/CP059040_gene.gff LB_vs_Mac-down.txt LB_vs_Mac-down.csv

 # ---- Mac_vs_LB ----
 res <- read.csv("Mac_vs_LB-all.csv")
 # Replace empty GeneName with modified GeneID
 res$GeneName <- ifelse(
   res$GeneName == "" | is.na(res$GeneName),
   gsub("gene-", "", res$GeneID),
   res$GeneName
 )
 duplicated_genes <- res[duplicated(res$GeneName), "GeneName"]
 #print(duplicated_genes)
 # [1] "bfr"  "lipA" "ahpF" "pcaF" "alr"  "pcaD" "cydB" "lpdA" "pgaC" "ppk1"
 #[11] "pcaF" "tuf"  "galE" "murI" "yccS" "rrf"  "rrf"  "arsB" "ptsP" "umuD"
 #[21] "map"  "pgaB" "rrf"  "rrf"  "rrf"  "pgaD" "uraH" "benE"
 #res[res$GeneName == "bfr", ]

 #1st_strategy First occurrence is kept and Subsequent duplicates are removed
 #res <- res[!duplicated(res$GeneName), ]
 #2nd_strategy keep the row with the smallest padj value for each GeneName
 res <- res %>%
   group_by(GeneName) %>%
   slice_min(padj, with_ties = FALSE) %>%
   ungroup()
 res <- as.data.frame(res)
 # Sort res first by padj (ascending) and then by log2FoldChange (descending)
 res <- res[order(res$padj, -res$log2FoldChange), ]

 # Assuming res is your dataframe and already processed
 # Filter up-regulated genes: log2FoldChange > 2 and padj < 1e-2
 up_regulated <- res[res$log2FoldChange > 2 & res$padj < 1e-2, ]
 # Filter down-regulated genes: log2FoldChange < -2 and padj < 1e-2
 down_regulated <- res[res$log2FoldChange < -2 & res$padj < 1e-2, ]
 # Create a new workbook
 wb <- createWorkbook()
 # Add the complete dataset as the first sheet
 addWorksheet(wb, "Complete_Data")
 writeData(wb, "Complete_Data", res)
 # Add the up-regulated genes as the second sheet
 addWorksheet(wb, "Up_Regulated")
 writeData(wb, "Up_Regulated", up_regulated)
 # Add the down-regulated genes as the third sheet
 addWorksheet(wb, "Down_Regulated")
 writeData(wb, "Down_Regulated", down_regulated)
 # Save the workbook to a file
 saveWorkbook(wb, "Gene_Expression_Mac_vs_LB.xlsx", overwrite = TRUE)

 # Set the 'GeneName' column as row.names
 rownames(res) <- res$GeneName
 # Drop the 'GeneName' column since it's now the row names
 res$GeneName <- NULL
 head(res)

 ## Ensure the data frame matches the expected format
 ## For example, it should have columns: log2FoldChange, padj, etc.
 #res <- as.data.frame(res)
 ## Remove rows with NA in log2FoldChange (if needed)
 #res <- res[!is.na(res$log2FoldChange),]

 # Replace padj = 0 with a small value
 res$padj[res$padj == 0] <- 1e-150

 #library(EnhancedVolcano)
 # Assuming res is already sorted and processed
 png("Mac_vs_LB.png", width=1200, height=2000)
 #max.overlaps = 10
 EnhancedVolcano(res,
                 lab = rownames(res),
                 x = 'log2FoldChange',
                 y = 'padj',
                 pCutoff = 1e-2,
                 FCcutoff = 2,
                 title = '',
                 subtitleLabSize = 18,
                 pointSize = 3.0,
                 labSize = 5.0,
                 colAlpha = 1,
                 legendIconSize = 4.0,
                 drawConnectors = TRUE,
                 widthConnectors = 0.5,
                 colConnectors = 'black',
                 subtitle = expression("Mac versus LB"))
 dev.off()

 # ---- LB.AB_vs_LB.WT19606 ----
 res <- read.csv("LB.AB_vs_LB.WT19606-all.csv")
 # Replace empty GeneName with modified GeneID
 res$GeneName <- ifelse(
   res$GeneName == "" | is.na(res$GeneName),
   gsub("gene-", "", res$GeneID),
   res$GeneName
 )
 duplicated_genes <- res[duplicated(res$GeneName), "GeneName"]

 res <- res %>%
   group_by(GeneName) %>%
   slice_min(padj, with_ties = FALSE) %>%
   ungroup()
 res <- as.data.frame(res)
 # Sort res first by padj (ascending) and then by log2FoldChange (descending)
 res <- res[order(res$padj, -res$log2FoldChange), ]

 # Assuming res is your dataframe and already processed
 # Filter up-regulated genes: log2FoldChange > 2 and padj < 1e-2
 up_regulated <- res[res$log2FoldChange > 2 & res$padj < 1e-2, ]
 # Filter down-regulated genes: log2FoldChange < -2 and padj < 1e-2
 down_regulated <- res[res$log2FoldChange < -2 & res$padj < 1e-2, ]
 # Create a new workbook
 wb <- createWorkbook()
 # Add the complete dataset as the first sheet
 addWorksheet(wb, "Complete_Data")
 writeData(wb, "Complete_Data", res)
 # Add the up-regulated genes as the second sheet
 addWorksheet(wb, "Up_Regulated")
 writeData(wb, "Up_Regulated", up_regulated)
 # Add the down-regulated genes as the third sheet
 addWorksheet(wb, "Down_Regulated")
 writeData(wb, "Down_Regulated", down_regulated)
 # Save the workbook to a file
 saveWorkbook(wb, "Gene_Expression_LB.AB_vs_LB.WT19606.xlsx", overwrite = TRUE)

 # Set the 'GeneName' column as row.names
 rownames(res) <- res$GeneName
 # Drop the 'GeneName' column since it's now the row names
 res$GeneName <- NULL
 head(res)

 ## Ensure the data frame matches the expected format
 ## For example, it should have columns: log2FoldChange, padj, etc.
 #res <- as.data.frame(res)
 ## Remove rows with NA in log2FoldChange (if needed)
 #res <- res[!is.na(res$log2FoldChange),]

 # Replace padj = 0 with a small value
 res$padj[res$padj == 0] <- 1e-12

 #library(EnhancedVolcano)
 # Assuming res is already sorted and processed
 png("LB.AB_vs_LB.WT19606.png", width=1200, height=1200)
 #max.overlaps = 10
 EnhancedVolcano(res,
                 lab = rownames(res),
                 x = 'log2FoldChange',
                 y = 'padj',
                 pCutoff = 1e-2,
                 FCcutoff = 2,
                 title = '',
                 subtitleLabSize = 18,
                 pointSize = 3.0,
                 labSize = 5.0,
                 colAlpha = 1,
                 legendIconSize = 4.0,
                 drawConnectors = TRUE,
                 widthConnectors = 0.5,
                 colConnectors = 'black',
                 subtitle = expression("LB.AB versus LB.WT19606"))
 dev.off()

 # ---- LB.IJ_vs_LB.WT19606 ----
 res <- read.csv("LB.IJ_vs_LB.WT19606-all.csv")
 # Replace empty GeneName with modified GeneID
 res$GeneName <- ifelse(
   res$GeneName == "" | is.na(res$GeneName),
   gsub("gene-", "", res$GeneID),
   res$GeneName
 )
 duplicated_genes <- res[duplicated(res$GeneName), "GeneName"]

 res <- res %>%
   group_by(GeneName) %>%
   slice_min(padj, with_ties = FALSE) %>%
   ungroup()
 res <- as.data.frame(res)
 # Sort res first by padj (ascending) and then by log2FoldChange (descending)
 res <- res[order(res$padj, -res$log2FoldChange), ]

 # Assuming res is your dataframe and already processed
 # Filter up-regulated genes: log2FoldChange > 2 and padj < 1e-2
 up_regulated <- res[res$log2FoldChange > 2 & res$padj < 1e-2, ]
 # Filter down-regulated genes: log2FoldChange < -2 and padj < 1e-2
 down_regulated <- res[res$log2FoldChange < -2 & res$padj < 1e-2, ]
 # Create a new workbook
 wb <- createWorkbook()
 # Add the complete dataset as the first sheet
 addWorksheet(wb, "Complete_Data")
 writeData(wb, "Complete_Data", res)
 # Add the up-regulated genes as the second sheet
 addWorksheet(wb, "Up_Regulated")
 writeData(wb, "Up_Regulated", up_regulated)
 # Add the down-regulated genes as the third sheet
 addWorksheet(wb, "Down_Regulated")
 writeData(wb, "Down_Regulated", down_regulated)
 # Save the workbook to a file
 saveWorkbook(wb, "Gene_Expression_LB.IJ_vs_LB.WT19606.xlsx", overwrite = TRUE)

 # Set the 'GeneName' column as row.names
 rownames(res) <- res$GeneName
 # Drop the 'GeneName' column since it's now the row names
 res$GeneName <- NULL
 head(res)

 ## Ensure the data frame matches the expected format
 ## For example, it should have columns: log2FoldChange, padj, etc.
 #res <- as.data.frame(res)
 ## Remove rows with NA in log2FoldChange (if needed)
 #res <- res[!is.na(res$log2FoldChange),]

 # Replace padj = 0 with a small value
 res$padj[res$padj == 0] <- 1e-12

 #library(EnhancedVolcano)
 # Assuming res is already sorted and processed
 png("LB.IJ_vs_LB.WT19606.png", width=1200, height=1200)
 #max.overlaps = 10
 EnhancedVolcano(res,
                 lab = rownames(res),
                 x = 'log2FoldChange',
                 y = 'padj',
                 pCutoff = 1e-2,
                 FCcutoff = 2,
                 title = '',
                 subtitleLabSize = 18,
                 pointSize = 3.0,
                 labSize = 5.0,
                 colAlpha = 1,
                 legendIconSize = 4.0,
                 drawConnectors = TRUE,
                 widthConnectors = 0.5,
                 colConnectors = 'black',
                 subtitle = expression("LB.IJ versus LB.WT19606"))
 dev.off()

 # ---- LB.W1_vs_LB.WT19606 ----
 res <- read.csv("LB.W1_vs_LB.WT19606-all.csv")
 # Replace empty GeneName with modified GeneID
 res$GeneName <- ifelse(
   res$GeneName == "" | is.na(res$GeneName),
   gsub("gene-", "", res$GeneID),
   res$GeneName
 )
 duplicated_genes <- res[duplicated(res$GeneName), "GeneName"]

 res <- res %>%
   group_by(GeneName) %>%
   slice_min(padj, with_ties = FALSE) %>%
   ungroup()
 res <- as.data.frame(res)
 # Sort res first by padj (ascending) and then by log2FoldChange (descending)
 res <- res[order(res$padj, -res$log2FoldChange), ]

 # Assuming res is your dataframe and already processed
 # Filter up-regulated genes: log2FoldChange > 2 and padj < 1e-2
 up_regulated <- res[res$log2FoldChange > 2 & res$padj < 1e-2, ]
 # Filter down-regulated genes: log2FoldChange < -2 and padj < 1e-2
 down_regulated <- res[res$log2FoldChange < -2 & res$padj < 1e-2, ]
 # Create a new workbook
 wb <- createWorkbook()
 # Add the complete dataset as the first sheet
 addWorksheet(wb, "Complete_Data")
 writeData(wb, "Complete_Data", res)
 # Add the up-regulated genes as the second sheet
 addWorksheet(wb, "Up_Regulated")
 writeData(wb, "Up_Regulated", up_regulated)
 # Add the down-regulated genes as the third sheet
 addWorksheet(wb, "Down_Regulated")
 writeData(wb, "Down_Regulated", down_regulated)
 # Save the workbook to a file
 saveWorkbook(wb, "Gene_Expression_LB.W1_vs_LB.WT19606.xlsx", overwrite = TRUE)

 # Set the 'GeneName' column as row.names
 rownames(res) <- res$GeneName
 # Drop the 'GeneName' column since it's now the row names
 res$GeneName <- NULL
 head(res)

 ## Ensure the data frame matches the expected format
 ## For example, it should have columns: log2FoldChange, padj, etc.
 #res <- as.data.frame(res)
 ## Remove rows with NA in log2FoldChange (if needed)
 #res <- res[!is.na(res$log2FoldChange),]

 # Replace padj = 0 with a small value
 res$padj[res$padj == 0] <- 1e-12

 #library(EnhancedVolcano)
 # Assuming res is already sorted and processed
 png("LB.W1_vs_LB.WT19606.png", width=1200, height=1200)
 #max.overlaps = 10
 EnhancedVolcano(res,
                 lab = rownames(res),
                 x = 'log2FoldChange',
                 y = 'padj',
                 pCutoff = 1e-2,
                 FCcutoff = 2,
                 title = '',
                 subtitleLabSize = 18,
                 pointSize = 3.0,
                 labSize = 5.0,
                 colAlpha = 1,
                 legendIconSize = 4.0,
                 drawConnectors = TRUE,
                 widthConnectors = 0.5,
                 colConnectors = 'black',
                 subtitle = expression("LB.W1 versus LB.WT19606"))
 dev.off()

 # ---- LB.Y1_vs_LB.WT19606 ----
 res <- read.csv("LB.Y1_vs_LB.WT19606-all.csv")
 # Replace empty GeneName with modified GeneID
 res$GeneName <- ifelse(
   res$GeneName == "" | is.na(res$GeneName),
   gsub("gene-", "", res$GeneID),
   res$GeneName
 )
 duplicated_genes <- res[duplicated(res$GeneName), "GeneName"]

 res <- res %>%
   group_by(GeneName) %>%
   slice_min(padj, with_ties = FALSE) %>%
   ungroup()
 res <- as.data.frame(res)
 # Sort res first by padj (ascending) and then by log2FoldChange (descending)
 res <- res[order(res$padj, -res$log2FoldChange), ]

 # Assuming res is your dataframe and already processed
 # Filter up-regulated genes: log2FoldChange > 2 and padj < 1e-2
 up_regulated <- res[res$log2FoldChange > 2 & res$padj < 1e-2, ]
 # Filter down-regulated genes: log2FoldChange < -2 and padj < 1e-2
 down_regulated <- res[res$log2FoldChange < -2 & res$padj < 1e-2, ]
 # Create a new workbook
 wb <- createWorkbook()
 # Add the complete dataset as the first sheet
 addWorksheet(wb, "Complete_Data")
 writeData(wb, "Complete_Data", res)
 # Add the up-regulated genes as the second sheet
 addWorksheet(wb, "Up_Regulated")
 writeData(wb, "Up_Regulated", up_regulated)
 # Add the down-regulated genes as the third sheet
 addWorksheet(wb, "Down_Regulated")
 writeData(wb, "Down_Regulated", down_regulated)
 # Save the workbook to a file
 saveWorkbook(wb, "Gene_Expression_LB.Y1_vs_LB.WT19606.xlsx", overwrite = TRUE)

 # Set the 'GeneName' column as row.names
 rownames(res) <- res$GeneName
 # Drop the 'GeneName' column since it's now the row names
 res$GeneName <- NULL
 head(res)

 ## Ensure the data frame matches the expected format
 ## For example, it should have columns: log2FoldChange, padj, etc.
 #res <- as.data.frame(res)
 ## Remove rows with NA in log2FoldChange (if needed)
 #res <- res[!is.na(res$log2FoldChange),]

 # Replace padj = 0 with a small value
 res$padj[res$padj == 0] <- 1e-12

 #library(EnhancedVolcano)
 # Assuming res is already sorted and processed
 png("LB.Y1_vs_LB.WT19606.png", width=1200, height=1200)
 #max.overlaps = 10
 EnhancedVolcano(res,
                 lab = rownames(res),
                 x = 'log2FoldChange',
                 y = 'padj',
                 pCutoff = 1e-2,
                 FCcutoff = 2,
                 title = '',
                 subtitleLabSize = 18,
                 pointSize = 3.0,
                 labSize = 5.0,
                 colAlpha = 1,
                 legendIconSize = 4.0,
                 drawConnectors = TRUE,
                 widthConnectors = 0.5,
                 colConnectors = 'black',
                 subtitle = expression("LB.Y1 versus LB.WT19606"))
 dev.off()

 # ---- Mac.AB_vs_Mac.WT19606 ----
 res <- read.csv("Mac.AB_vs_Mac.WT19606-all.csv")
 # Replace empty GeneName with modified GeneID
 res$GeneName <- ifelse(
   res$GeneName == "" | is.na(res$GeneName),
   gsub("gene-", "", res$GeneID),
   res$GeneName
 )
 duplicated_genes <- res[duplicated(res$GeneName), "GeneName"]

 res <- res %>%
   group_by(GeneName) %>%
   slice_min(padj, with_ties = FALSE) %>%
   ungroup()
 res <- as.data.frame(res)
 # Sort res first by padj (ascending) and then by log2FoldChange (descending)
 res <- res[order(res$padj, -res$log2FoldChange), ]

 # Assuming res is your dataframe and already processed
 # Filter up-regulated genes: log2FoldChange > 2 and padj < 1e-2
 up_regulated <- res[res$log2FoldChange > 2 & res$padj < 1e-2, ]
 # Filter down-regulated genes: log2FoldChange < -2 and padj < 1e-2
 down_regulated <- res[res$log2FoldChange < -2 & res$padj < 1e-2, ]
 # Create a new workbook
 wb <- createWorkbook()
 # Add the complete dataset as the first sheet
 addWorksheet(wb, "Complete_Data")
 writeData(wb, "Complete_Data", res)
 # Add the up-regulated genes as the second sheet
 addWorksheet(wb, "Up_Regulated")
 writeData(wb, "Up_Regulated", up_regulated)
 # Add the down-regulated genes as the third sheet
 addWorksheet(wb, "Down_Regulated")
 writeData(wb, "Down_Regulated", down_regulated)
 # Save the workbook to a file
 saveWorkbook(wb, "Gene_Expression_Mac.AB_vs_Mac.WT19606.xlsx", overwrite = TRUE)

 # Set the 'GeneName' column as row.names
 rownames(res) <- res$GeneName
 # Drop the 'GeneName' column since it's now the row names
 res$GeneName <- NULL
 head(res)

 ## Ensure the data frame matches the expected format
 ## For example, it should have columns: log2FoldChange, padj, etc.
 #res <- as.data.frame(res)
 ## Remove rows with NA in log2FoldChange (if needed)
 #res <- res[!is.na(res$log2FoldChange),]

 # Replace padj = 0 with a small value
 res$padj[res$padj == 0] <- 1e-12

 #library(EnhancedVolcano)
 # Assuming res is already sorted and processed
 png("Mac.AB_vs_Mac.WT19606.png", width=1200, height=1200)
 #max.overlaps = 10
 EnhancedVolcano(res,
                 lab = rownames(res),
                 x = 'log2FoldChange',
                 y = 'padj',
                 pCutoff = 1e-2,
                 FCcutoff = 2,
                 title = '',
                 subtitleLabSize = 18,
                 pointSize = 3.0,
                 labSize = 5.0,
                 colAlpha = 1,
                 legendIconSize = 4.0,
                 drawConnectors = TRUE,
                 widthConnectors = 0.5,
                 colConnectors = 'black',
                 subtitle = expression("Mac.AB versus Mac.WT19606"))
 dev.off()

 # ---- Mac.IJ_vs_Mac.WT19606 ----
 res <- read.csv("Mac.IJ_vs_Mac.WT19606-all.csv")
 # Replace empty GeneName with modified GeneID
 res$GeneName <- ifelse(
   res$GeneName == "" | is.na(res$GeneName),
   gsub("gene-", "", res$GeneID),
   res$GeneName
 )
 duplicated_genes <- res[duplicated(res$GeneName), "GeneName"]

 res <- res %>%
   group_by(GeneName) %>%
   slice_min(padj, with_ties = FALSE) %>%
   ungroup()
 res <- as.data.frame(res)
 # Sort res first by padj (ascending) and then by log2FoldChange (descending)
 res <- res[order(res$padj, -res$log2FoldChange), ]

 # Assuming res is your dataframe and already processed
 # Filter up-regulated genes: log2FoldChange > 2 and padj < 1e-2
 up_regulated <- res[res$log2FoldChange > 2 & res$padj < 1e-2, ]
 # Filter down-regulated genes: log2FoldChange < -2 and padj < 1e-2
 down_regulated <- res[res$log2FoldChange < -2 & res$padj < 1e-2, ]
 # Create a new workbook
 wb <- createWorkbook()
 # Add the complete dataset as the first sheet
 addWorksheet(wb, "Complete_Data")
 writeData(wb, "Complete_Data", res)
 # Add the up-regulated genes as the second sheet
 addWorksheet(wb, "Up_Regulated")
 writeData(wb, "Up_Regulated", up_regulated)
 # Add the down-regulated genes as the third sheet
 addWorksheet(wb, "Down_Regulated")
 writeData(wb, "Down_Regulated", down_regulated)
 # Save the workbook to a file
 saveWorkbook(wb, "Gene_Expression_Mac.IJ_vs_Mac.WT19606.xlsx", overwrite = TRUE)

 # Set the 'GeneName' column as row.names
 rownames(res) <- res$GeneName
 # Drop the 'GeneName' column since it's now the row names
 res$GeneName <- NULL
 head(res)

 ## Ensure the data frame matches the expected format
 ## For example, it should have columns: log2FoldChange, padj, etc.
 #res <- as.data.frame(res)
 ## Remove rows with NA in log2FoldChange (if needed)
 #res <- res[!is.na(res$log2FoldChange),]

 # Replace padj = 0 with a small value
 res$padj[res$padj == 0] <- 1e-12

 #library(EnhancedVolcano)
 # Assuming res is already sorted and processed
 png("Mac.IJ_vs_Mac.WT19606.png", width=1200, height=1200)
 #max.overlaps = 10
 EnhancedVolcano(res,
                 lab = rownames(res),
                 x = 'log2FoldChange',
                 y = 'padj',
                 pCutoff = 1e-2,
                 FCcutoff = 2,
                 title = '',
                 subtitleLabSize = 18,
                 pointSize = 3.0,
                 labSize = 5.0,
                 colAlpha = 1,
                 legendIconSize = 4.0,
                 drawConnectors = TRUE,
                 widthConnectors = 0.5,
                 colConnectors = 'black',
                 subtitle = expression("Mac.IJ versus Mac.WT19606"))
 dev.off()

 # ---- Mac.W1_vs_Mac.WT19606 ----
 res <- read.csv("Mac.W1_vs_Mac.WT19606-all.csv")
 # Replace empty GeneName with modified GeneID
 res$GeneName <- ifelse(
   res$GeneName == "" | is.na(res$GeneName),
   gsub("gene-", "", res$GeneID),
   res$GeneName
 )
 duplicated_genes <- res[duplicated(res$GeneName), "GeneName"]

 res <- res %>%
   group_by(GeneName) %>%
   slice_min(padj, with_ties = FALSE) %>%
   ungroup()
 res <- as.data.frame(res)
 # Sort res first by padj (ascending) and then by log2FoldChange (descending)
 res <- res[order(res$padj, -res$log2FoldChange), ]

 # Assuming res is your dataframe and already processed
 # Filter up-regulated genes: log2FoldChange > 2 and padj < 1e-2
 up_regulated <- res[res$log2FoldChange > 2 & res$padj < 1e-2, ]
 # Filter down-regulated genes: log2FoldChange < -2 and padj < 1e-2
 down_regulated <- res[res$log2FoldChange < -2 & res$padj < 1e-2, ]
 # Create a new workbook
 wb <- createWorkbook()
 # Add the complete dataset as the first sheet
 addWorksheet(wb, "Complete_Data")
 writeData(wb, "Complete_Data", res)
 # Add the up-regulated genes as the second sheet
 addWorksheet(wb, "Up_Regulated")
 writeData(wb, "Up_Regulated", up_regulated)
 # Add the down-regulated genes as the third sheet
 addWorksheet(wb, "Down_Regulated")
 writeData(wb, "Down_Regulated", down_regulated)
 # Save the workbook to a file
 saveWorkbook(wb, "Gene_Expression_Mac.W1_vs_Mac.WT19606.xlsx", overwrite = TRUE)

 # Set the 'GeneName' column as row.names
 rownames(res) <- res$GeneName
 # Drop the 'GeneName' column since it's now the row names
 res$GeneName <- NULL
 head(res)

 ## Ensure the data frame matches the expected format
 ## For example, it should have columns: log2FoldChange, padj, etc.
 #res <- as.data.frame(res)
 ## Remove rows with NA in log2FoldChange (if needed)
 #res <- res[!is.na(res$log2FoldChange),]

 # Replace padj = 0 with a small value
 res$padj[res$padj == 0] <- 1e-12

 #library(EnhancedVolcano)
 # Assuming res is already sorted and processed
 png("Mac.W1_vs_Mac.WT19606.png", width=1200, height=1200)
 #max.overlaps = 10
 EnhancedVolcano(res,
                 lab = rownames(res),
                 x = 'log2FoldChange',
                 y = 'padj',
                 pCutoff = 1e-2,
                 FCcutoff = 2,
                 title = '',
                 subtitleLabSize = 18,
                 pointSize = 3.0,
                 labSize = 5.0,
                 colAlpha = 1,
                 legendIconSize = 4.0,
                 drawConnectors = TRUE,
                 widthConnectors = 0.5,
                 colConnectors = 'black',
                 subtitle = expression("Mac.W1 versus Mac.WT19606"))
 dev.off()

 # ---- Mac.Y1_vs_Mac.WT19606 ----
 res <- read.csv("Mac.Y1_vs_Mac.WT19606-all.csv")
 # Replace empty GeneName with modified GeneID
 res$GeneName <- ifelse(
   res$GeneName == "" | is.na(res$GeneName),
   gsub("gene-", "", res$GeneID),
   res$GeneName
 )
 duplicated_genes <- res[duplicated(res$GeneName), "GeneName"]

 res <- res %>%
   group_by(GeneName) %>%
   slice_min(padj, with_ties = FALSE) %>%
   ungroup()
 res <- as.data.frame(res)
 # Sort res first by padj (ascending) and then by log2FoldChange (descending)
 res <- res[order(res$padj, -res$log2FoldChange), ]

 # Assuming res is your dataframe and already processed
 # Filter up-regulated genes: log2FoldChange > 2 and padj < 1e-2
 up_regulated <- res[res$log2FoldChange > 2 & res$padj < 1e-2, ]
 # Filter down-regulated genes: log2FoldChange < -2 and padj < 1e-2
 down_regulated <- res[res$log2FoldChange < -2 & res$padj < 1e-2, ]
 # Create a new workbook
 wb <- createWorkbook()
 # Add the complete dataset as the first sheet
 addWorksheet(wb, "Complete_Data")
 writeData(wb, "Complete_Data", res)
 # Add the up-regulated genes as the second sheet
 addWorksheet(wb, "Up_Regulated")
 writeData(wb, "Up_Regulated", up_regulated)
 # Add the down-regulated genes as the third sheet
 addWorksheet(wb, "Down_Regulated")
 writeData(wb, "Down_Regulated", down_regulated)
 # Save the workbook to a file
 saveWorkbook(wb, "Gene_Expression_Mac.Y1_vs_Mac.WT19606.xlsx", overwrite = TRUE)

 # Set the 'GeneName' column as row.names
 rownames(res) <- res$GeneName
 # Drop the 'GeneName' column since it's now the row names
 res$GeneName <- NULL
 head(res)

 ## Ensure the data frame matches the expected format
 ## For example, it should have columns: log2FoldChange, padj, etc.
 #res <- as.data.frame(res)
 ## Remove rows with NA in log2FoldChange (if needed)
 #res <- res[!is.na(res$log2FoldChange),]

 # Replace padj = 0 with a small value
 res$padj[res$padj == 0] <- 1e-12

 #library(EnhancedVolcano)
 # Assuming res is already sorted and processed
 png("Mac.Y1_vs_Mac.WT19606.png", width=1200, height=1200)
 #max.overlaps = 10
 EnhancedVolcano(res,
                 lab = rownames(res),
                 x = 'log2FoldChange',
                 y = 'padj',
                 pCutoff = 1e-2,
                 FCcutoff = 2,
                 title = '',
                 subtitleLabSize = 18,
                 pointSize = 3.0,
                 labSize = 5.0,
                 colAlpha = 1,
                 legendIconSize = 4.0,
                 drawConnectors = TRUE,
                 widthConnectors = 0.5,
                 colConnectors = 'black',
                 subtitle = expression("Mac.Y1 versus Mac.WT19606"))
 dev.off()

 #TODO: annotate the Gene_Expression_xxx_vs_yyy.xlsx

Clustering the genes and draw heatmap

 #http://xgenes.com/article/article-content/150/draw-venn-diagrams-using-matplotlib/
 #http://xgenes.com/article/article-content/276/go-terms-for-s-epidermidis/

 # save the Up-regulated and Down-regulated genes into -up.id and -down.id
 for i in Mac_vs_LB LB.AB_vs_LB.WT19606 LB.IJ_vs_LB.WT19606 LB.W1_vs_LB.WT19606 LB.Y1_vs_LB.WT19606 Mac.AB_vs_Mac.WT19606 Mac.IJ_vs_Mac.WT19606 Mac.W1_vs_Mac.WT19606 Mac.Y1_vs_Mac.WT19606; do
   echo "cut -d',' -f1-1 ${i}-up.txt > ${i}-up.id";
   echo "cut -d',' -f1-1 ${i}-down.txt > ${i}-down.id";
 done
 #5 LB.AB_vs_LB.WT19606-down.id
 #20 LB.AB_vs_LB.WT19606-up.id
 #64 LB.IJ_vs_LB.WT19606-down.id
 #69 LB.IJ_vs_LB.WT19606-up.id
 #23 LB.W1_vs_LB.WT19606-down.id
 #97 LB.W1_vs_LB.WT19606-up.id
 #9 LB.Y1_vs_LB.WT19606-down.id
 #20 LB.Y1_vs_LB.WT19606-up.id
 #20 Mac.AB_vs_Mac.WT19606-down.id
 #29 Mac.AB_vs_Mac.WT19606-up.id
 #65 Mac.IJ_vs_Mac.WT19606-down.id
 #197 Mac.IJ_vs_Mac.WT19606-up.id
 #359 Mac_vs_LB-down.id
 #308 Mac_vs_LB-up.id
 #290 Mac.W1_vs_Mac.WT19606-down.id
 #343 Mac.W1_vs_Mac.WT19606-up.id
 #75 Mac.Y1_vs_Mac.WT19606-down.id
 #0 Mac.Y1_vs_Mac.WT19606.png-down.id
 #0 Mac.Y1_vs_Mac.WT19606.png-up.id
 #68 Mac.Y1_vs_Mac.WT19606-up.id
 #2061 total

 cat *.id | sort -u > ids
 #Delete "GeneName"
 #add Gene_Id in the first line, delete the ""  #Note that using GeneID as index, rather than GeneName, since .txt contains only GeneID.
 GOI <- read.csv("ids")$Gene_Id    #1329
 RNASeq.NoCellLine <- assay(rld)
 #install.packages("gplots")
 library("gplots")
 #clustering methods: "ward.D", "ward.D2", "single", "complete", "average" (= UPGMA), "mcquitty" (= WPGMA), "median" (= WPGMC) or "centroid" (= UPGMC).  pearson or spearman
 datamat = RNASeq.NoCellLine[GOI, ]
 #datamat = RNASeq.NoCellLine
 write.csv(as.data.frame(datamat), file ="DEGs_heatmap_expression_data.txt")

 constant_rows <- apply(datamat, 1, function(row) var(row) == 0)
 if(any(constant_rows)) {
   cat("Removing", sum(constant_rows), "constant rows.\n")
   datamat <- datamat[!constant_rows, ]
 }
 hr <- hclust(as.dist(1-cor(t(datamat), method="pearson")), method="complete")
 hc <- hclust(as.dist(1-cor(datamat, method="spearman")), method="complete")
 mycl = cutree(hr, h=max(hr$height)/1.15)
 mycol = c("YELLOW", "BLUE", "ORANGE", "MAGENTA", "CYAN", "RED", "GREEN", "MAROON", "LIGHTBLUE", "PINK", "MAGENTA", "LIGHTCYAN", "LIGHTRED", "LIGHTGREEN");
 mycol = mycol[as.vector(mycl)]
 #png("DEGs_heatmap.png", width=900, height=800)
 #cex.lab=10, labRow="",
 png("DEGs_heatmap.png", width=1200, height=1000)
 heatmap.2(as.matrix(datamat),Rowv=as.dendrogram(hr),Colv = NA, dendrogram = 'row',labRow="",
             scale='row',trace='none',col=bluered(75), cexCol=1.8,
             RowSideColors = mycol, margins=c(10,2), cexRow=1.5, srtCol=30, lhei = c(1, 8), lwid=c(2, 8))  #rownames(datamat)
 #heatmap.2(datamat, Rowv=as.dendrogram(hr), col=bluered(75), scale="row", RowSideColors=mycol, trace="none", margin=c(5,5), sepwidth=c(0,0), dendrogram = 'row', Colv = 'false', density.info='none', labRow="", srtCol=30, lhei=c(0.1,2))
 dev.off()
 #### cluster members #####
 write.csv(names(subset(mycl, mycl == '1')),file='cluster1_YELLOW.txt')
 write.csv(names(subset(mycl, mycl == '2')),file='cluster2_DARKBLUE.txt')
 write.csv(names(subset(mycl, mycl == '3')),file='cluster3_DARKORANGE.txt')
 write.csv(names(subset(mycl, mycl == '4')),file='cluster4_DARKMAGENTA.txt')
 write.csv(names(subset(mycl, mycl == '5')),file='cluster5_DARKCYAN.txt')
 #~/Tools/csv2xls-0.4/csv_to_xls.py cluster*.txt -d',' -o DEGs_heatmap_cluster_members.xls
 #~/Tools/csv2xls-0.4/csv_to_xls.py DEGs_heatmap_expression_data.txt -d',' -o DEGs_heatmap_expression_data.xls;

 #### (NOT_WORKING) cluster members (adding annotations, note that it does not work for the bacteria, since it is not model-speices and we cannot use mart=ensembl) #####
 subset_1<-names(subset(mycl, mycl == '1'))
 data <- as.data.frame(datamat[rownames(datamat) %in% subset_1, ])  #2575
 subset_2<-names(subset(mycl, mycl == '2'))
 data <- as.data.frame(datamat[rownames(datamat) %in% subset_2, ])  #1855
 subset_3<-names(subset(mycl, mycl == '3'))
 data <- as.data.frame(datamat[rownames(datamat) %in% subset_3, ])  #217
 subset_4<-names(subset(mycl, mycl == '4'))
 data <- as.data.frame(datamat[rownames(datamat) %in% subset_4, ])  #
 subset_5<-names(subset(mycl, mycl == '5'))
 data <- as.data.frame(datamat[rownames(datamat) %in% subset_5, ])  #
 # Initialize an empty data frame for the annotated data
 annotated_data <- data.frame()
 # Determine total number of genes
 total_genes <- length(rownames(data))
 # Loop through each gene to annotate
 for (i in 1:total_genes) {
     gene <- rownames(data)[i]
     result <- getBM(attributes = c('ensembl_gene_id', 'external_gene_name', 'gene_biotype', 'entrezgene_id', 'chromosome_name', 'start_position', 'end_position', 'strand', 'description'),
                     filters = 'ensembl_gene_id',
                     values = gene,
                     mart = ensembl)
     # If multiple rows are returned, take the first one
     if (nrow(result) > 1) {
         result <- result[1, ]
     }
     # Check if the result is empty
     if (nrow(result) == 0) {
         result <- data.frame(ensembl_gene_id = gene,
                             external_gene_name = NA,
                             gene_biotype = NA,
                             entrezgene_id = NA,
                             chromosome_name = NA,
                             start_position = NA,
                             end_position = NA,
                             strand = NA,
                             description = NA)
     }
     # Transpose expression values
     expression_values <- t(data.frame(t(data[gene, ])))
     colnames(expression_values) <- colnames(data)
     # Combine gene information and expression data
     combined_result <- cbind(result, expression_values)
     # Append to the final dataframe
     annotated_data <- rbind(annotated_data, combined_result)
     # Print progress every 100 genes
     if (i %% 100 == 0) {
         cat(sprintf("Processed gene %d out of %d\n", i, total_genes))
     }
 }
 # Save the annotated data to a new CSV file
 write.csv(annotated_data, "cluster1_YELLOW.csv", row.names=FALSE)
 write.csv(annotated_data, "cluster2_DARKBLUE.csv", row.names=FALSE)
 write.csv(annotated_data, "cluster3_DARKORANGE.csv", row.names=FALSE)
 write.csv(annotated_data, "cluster4_DARKMAGENTA.csv", row.names=FALSE)
 write.csv(annotated_data, "cluster5_DARKCYAN.csv", row.names=FALSE)
 #~/Tools/csv2xls-0.4/csv_to_xls.py cluster*.csv -d',' -o DEGs_heatmap_clusters.xls

KEGG and GO annotations in non-model organisms

https://www.biobam.com/functional-analysis/

Assign KEGG and GO Terms (see diagram above)

Since your organism is non-model, standard R databases (org.Hs.eg.db, etc.) won’t work. You’ll need to manually retrieve KEGG and GO annotations.

Option 1 (KEGG Terms): EggNog based on orthology and phylogenies
```
 EggNOG-mapper assigns both KEGG Orthology (KO) IDs and GO terms.

 Install EggNOG-mapper:

     mamba create -n eggnog_env python=3.8 eggnog-mapper -c conda-forge -c bioconda  #eggnog-mapper_2.1.12
     mamba activate eggnog_env

 Run annotation:

     #diamond makedb --in eggnog6.prots.faa -d eggnog_proteins.dmnd
     mkdir /home/jhuang/mambaforge/envs/eggnog_env/lib/python3.8/site-packages/data/
     download_eggnog_data.py --dbname eggnog.db -y --data_dir /home/jhuang/mambaforge/envs/eggnog_env/lib/python3.8/site-packages/data/
     #NOT_WORKING: emapper.py -i CP059040_gene.fasta -o eggnog_dmnd_out --cpu 60 -m diamond[hmmer,mmseqs] --dmnd_db /home/jhuang/REFs/eggnog_data/data/eggnog_proteins.dmnd
     python ~/Scripts/update_fasta_header.py CP059040_protein_.fasta CP059040_protein.fasta
     emapper.py -i CP059040_protein.fasta -o eggnog_out --cpu 60 --resume
     #----> result annotations.tsv: Contains KEGG, GO, and other functional annotations.
     #---->  470.IX87_14445:
         * 470 likely refers to the organism or strain (e.g., Acinetobacter baumannii ATCC 19606 or another related strain).
         * IX87_14445 would refer to a specific gene or protein within that genome.

 Extract KEGG KO IDs from annotations.emapper.annotations.
```
Option 2 (GO Terms from ‘Blast2GO 5 Basic’, saved in blast2go_annot.annot): Using Blast/Diamond + Blast2GO_GUI based on sequence alignment + GO mapping
- ‘Load protein sequences’ (Tags: NONE, generated columns: Nr, SeqName) –>
- Buttons ‘blast’ (Tags: BLASTED, generated columns: Description, Length, #Hits, e-Value, sim mean),
- Button ‘mapping’ (Tags: MAPPED, generated columns: #GO, GO IDs, GO Names), “Mapping finished – Please proceed now to annotation.”
- Button ‘annot’ (Tags: ANNOTATED, generated columns: Enzyme Codes, Enzyme Names), “Annotation finished.”
  - Used parameter ‘Annotation CutOff’: The Blast2GO Annotation Rule seeks to find the most specific GO annotations with a certain level of reliability. An annotation score is calculated for each candidate GO which is composed by the sequence similarity of the Blast Hit, the evidence code of the source GO and the position of the particular GO in the Gene Ontology hierarchy. This annotation score cutoff select the most specific GO term for a given GO branch which lies above this value.
  - Used parameter ‘GO Weight’ is a value which is added to Annotation Score of a more general/abstract Gene Ontology term for each of its more specific, original source GO terms. In this case, more general GO terms which summarise many original source terms (those ones directly associated to the Blast Hits) will have a higher Annotation Score.
or blast2go_cli_v1.5.1 (NOT_USED)
```
     #https://help.biobam.com/space/BCD/2250407989/Installation
     #see ~/Scripts/blast2go_pipeline.sh
```
Option 3 (GO Terms from ‘Blast2GO 5 Basic’, saved in blast2go_annot.annot2): Interpro based protein families / domains –> Button interpro
- Button ‘interpro’ (Tags: INTERPRO, generated columns: InterPro IDs, InterPro GO IDs, InterPro GO Names) –> “InterProScan Finished – You can now merge the obtained GO Annotations.”
MERGE the results of InterPro GO IDs (Option 3) to GO IDs (Option 2) and generate final GO IDs
- Button ‘interpro’/’Merge InterProScan GOs to Annotation’ –> “Merge (add and validate) all GO terms retrieved via InterProScan to the already existing GO annotation.” –> “Finished merging GO terms from InterPro with annotations. Maybe you want to run ANNEX (Annotation Augmentation).” #* Button ‘annot’/’ANNEX’ –> “ANNEX finished. Maybe you want to do the next step: Enzyme Code Mapping.”
```
 #-- before merging (blast2go_annot.annot) --
 #H0N29_18790     GO:0004842      ankyrin repeat domain-containing protein
 #H0N29_18790     GO:0085020
 #-- after merging (blast2go_annot.annot2) -->
 #H0N29_18790     GO:0031436      ankyrin repeat domain-containing protein
 #H0N29_18790     GO:0070531
 #H0N29_18790     GO:0004842
 #H0N29_18790     GO:0005515
 #H0N29_18790     GO:0085020
```
Option 4 (NOT_USED): RFAM for non-colding RNA

Option 5 (NOT_USED): PSORTb for subcellular localizations

Option 6 (NOT_USED): KAAS (KEGG Automatic Annotation Server)
```
* Go to KAAS
* Upload your FASTA file.
* Select an appropriate gene set.
* Download the KO assignments.
```

Find the Closest KEGG Organism Code (NOT_USED)

Since your species isn’t directly in KEGG, use a closely related organism.

* Check available KEGG organisms:

     library(clusterProfiler)
     library(KEGGREST)

     kegg_organisms <- keggList("organism")

     Pick the closest relative (e.g., zebrafish "dre" for fish, Arabidopsis "ath" for plants).

     # Search for Acinetobacter in the list
     grep("Acinetobacter", kegg_organisms, ignore.case = TRUE, value = TRUE)
     # Gammaproteobacteria
     #Extract KO IDs from the eggnog results for  "Acinetobacter baumannii strain ATCC 19606"

Find the Closest KEGG Organism for a Non-Model Species

If your organism is not in KEGG, search for the closest relative:
```
     grep("fish", kegg_organisms, ignore.case = TRUE, value = TRUE)  # Example search
```
For KEGG pathway enrichment in non-model species, use “ko” instead of a species code (the code has been intergrated in the point 4):
```
     kegg_enrich <- enrichKEGG(gene = gene_list, organism = "ko")  # "ko" = KEGG Orthology
```

Perform KEGG and GO Enrichment in R (under dir ~/DATA/ata_Tam_RNAseq_2025_LB_vs_Mac_ATCC19606/results/star_salmon/degenes)

     #BiocManager::install("GO.db")
     #BiocManager::install("AnnotationDbi")

     # Load required libraries
     library(openxlsx)  # For Excel file handling
     library(dplyr)     # For data manipulation
     library(tidyr)
     library(stringr)
     library(clusterProfiler)  # For KEGG and GO enrichment analysis
     #library(org.Hs.eg.db)  # Replace with appropriate organism database
     library(GO.db)
     library(AnnotationDbi)

     setwd("~/DATA/Data_Tam_RNAseq_2025_LB_vs_Mac_ATCC19606/results/star_salmon/degenes")
     # PREPARING go_terms and ec_terms: annot_* file: cut -f1-2 -d$'\t' blast2go_annot.annot2 > blast2go_annot.annot2_
     # Step 1: Load the blast2go annotation file with a check for missing columns
     annot_df <- read.table("/home/jhuang/b2gWorkspace_Tam_RNAseq_2024/blast2go_annot.annot2_",
                         header = FALSE, sep = "\t", stringsAsFactors = FALSE, fill = TRUE)

     # If the structure is inconsistent, we can make sure there are exactly 3 columns:
     colnames(annot_df) <- c("GeneID", "Term")
     # Step 2: Filter and aggregate GO and EC terms as before
     go_terms <- annot_df %>%
     filter(grepl("^GO:", Term)) %>%
     group_by(GeneID) %>%
     summarize(GOs = paste(Term, collapse = ","), .groups = "drop")
     ec_terms <- annot_df %>%
     filter(grepl("^EC:", Term)) %>%
     group_by(GeneID) %>%
     summarize(EC = paste(Term, collapse = ","), .groups = "drop")

     # Load the results
     #res <- read.csv("Mac_vs_LB-all.csv")     #up307, down358
     #res <- read.csv("LB.AB_vs_LB.WT19606-all.csv")     #up307, down358
     #res <- read.csv("LB.IJ_vs_LB.WT19606-all.csv")     #up307, down358
     #res <- read.csv("LB.W1_vs_LB.WT19606-all.csv")     #up307, down358
     #res <- read.csv("LB.Y1_vs_LB.WT19606-all.csv")     #up307, down358
     #res <- read.csv("Mac.AB_vs_Mac.WT19606-all.csv")     #up307, down358
     #res <- read.csv("Mac.IJ_vs_Mac.WT19606-all.csv")     #up307, down358
     #res <- read.csv("Mac.W1_vs_Mac.WT19606-all.csv")     #up307, down358
     res <- read.csv("Mac.Y1_vs_Mac.WT19606-all.csv")     #up307, down358

     # Replace empty GeneName with modified GeneID
     res$GeneName <- ifelse(
         res$GeneName == "" | is.na(res$GeneName),
         gsub("gene-", "", res$GeneID),
         res$GeneName
     )

     # Remove duplicated genes by selecting the gene with the smallest padj
     duplicated_genes <- res[duplicated(res$GeneName), "GeneName"]

     res <- res %>%
     group_by(GeneName) %>%
     slice_min(padj, with_ties = FALSE) %>%
     ungroup()

     res <- as.data.frame(res)
     # Sort res first by padj (ascending) and then by log2FoldChange (descending)
     res <- res[order(res$padj, -res$log2FoldChange), ]
     # Read eggnog annotations
     eggnog_data <- read.delim("~/DATA/Data_Tam_RNAseq_2024_AUM_MHB_Urine_ATCC19606/eggnog_out.emapper.annotations.txt", header = TRUE, sep = "\t")
     # Remove the "gene-" prefix from GeneID in res to match eggnog 'query' format
     res$GeneID <- gsub("gene-", "", res$GeneID)
     # Merge eggnog data with res based on GeneID
     res <- res %>% left_join(eggnog_data, by = c("GeneID" = "query"))

     # Merge with the res dataframe
     # Perform the left joins and rename columns
     res_updated <- res %>%
     left_join(go_terms, by = "GeneID") %>%
     left_join(ec_terms, by = "GeneID") %>% dplyr::select(-EC.x, -GOs.x) %>% dplyr::rename(EC = EC.y, GOs = GOs.y)

     # Filter up-regulated genes
     up_regulated <- res_updated[res_updated$log2FoldChange > 2 & res_updated$padj < 0.01, ]
     # Filter down-regulated genes
     down_regulated <- res_updated[res_updated$log2FoldChange < -2 & res_updated$padj < 0.01, ]

     # Create a new workbook
     wb <- createWorkbook()
     # Add the complete dataset as the first sheet (with annotations)
     addWorksheet(wb, "Complete_Data")
     writeData(wb, "Complete_Data", res_updated)
     # Add the up-regulated genes as the second sheet (with annotations)
     addWorksheet(wb, "Up_Regulated")
     writeData(wb, "Up_Regulated", up_regulated)
     # Add the down-regulated genes as the third sheet (with annotations)
     addWorksheet(wb, "Down_Regulated")
     writeData(wb, "Down_Regulated", down_regulated)
     # Save the workbook to a file
     saveWorkbook(wb, "Gene_Expression_with_Annotations_Urine_vs_MHB.xlsx", overwrite = TRUE)

     # Set GeneName as row names after the join
     rownames(res_updated) <- res_updated$GeneName
     res_updated <- res_updated %>% dplyr::select(-GeneName)
     ## Set the 'GeneName' column as row.names
     #rownames(res_updated) <- res_updated$GeneName
     ## Drop the 'GeneName' column since it's now the row names
     #res_updated$GeneName <- NULL
     # -- BREAK_1 --

     # ---- Perform KEGG enrichment analysis (up_regulated) ----
     gene_list_kegg_up <- up_regulated$KEGG_ko
     gene_list_kegg_up <- gsub("ko:", "", gene_list_kegg_up)
     kegg_enrichment_up <- enrichKEGG(gene = gene_list_kegg_up, organism = 'ko')
     # -- convert the GeneID (Kxxxxxx) to the true GeneID --
     # Step 0: Create KEGG to GeneID mapping
     kegg_to_geneid_up <- up_regulated %>%
     dplyr::select(KEGG_ko, GeneID) %>%
     filter(!is.na(KEGG_ko)) %>%  # Remove missing KEGG KO entries
     mutate(KEGG_ko = str_remove(KEGG_ko, "ko:"))  # Remove 'ko:' prefix if present
     # Step 1: Clean KEGG_ko values (separate multiple KEGG IDs)
     kegg_to_geneid_clean <- kegg_to_geneid_up %>%
     mutate(KEGG_ko = str_remove_all(KEGG_ko, "ko:")) %>%  # Remove 'ko:' prefixes
     separate_rows(KEGG_ko, sep = ",") %>%  # Ensure each KEGG ID is on its own row
     filter(KEGG_ko != "-") %>%  # Remove invalid KEGG IDs ("-")
     distinct()  # Remove any duplicate mappings
     # Step 2.1: Expand geneID column in kegg_enrichment_up
     expanded_kegg <- kegg_enrichment_up %>%
     as.data.frame() %>%
     separate_rows(geneID, sep = "/") %>%  # Split multiple KEGG IDs (Kxxxxx)
     left_join(kegg_to_geneid_clean, by = c("geneID" = "KEGG_ko"), relationship = "many-to-many") %>%  # Explicitly handle many-to-many
     distinct() %>%  # Remove duplicate matches
     group_by(ID) %>%
     summarise(across(everything(), ~ paste(unique(na.omit(.)), collapse = "/")), .groups = "drop")  # Re-collapse results
     #dplyr::glimpse(expanded_kegg)
     # Step 3.1: Replace geneID column in the original dataframe
     kegg_enrichment_up_df <- as.data.frame(kegg_enrichment_up)
     # Remove old geneID column and merge new one
     kegg_enrichment_up_df <- kegg_enrichment_up_df %>%
     dplyr::select(-geneID) %>%  # Remove old geneID column
     left_join(expanded_kegg %>% dplyr::select(ID, GeneID), by = "ID") %>%  # Merge new GeneID column
     dplyr::rename(geneID = GeneID)  # Rename column back to geneID

     # ---- Perform KEGG enrichment analysis (down_regulated) ----
     # Step 1: Extract KEGG KO terms from down-regulated genes
     gene_list_kegg_down <- down_regulated$KEGG_ko
     gene_list_kegg_down <- gsub("ko:", "", gene_list_kegg_down)
     # Step 2: Perform KEGG enrichment analysis
     kegg_enrichment_down <- enrichKEGG(gene = gene_list_kegg_down, organism = 'ko')
     # --- Convert KEGG gene IDs (Kxxxxxx) to actual GeneIDs ---
     # Step 3: Create KEGG to GeneID mapping from down_regulated dataset
     kegg_to_geneid_down <- down_regulated %>%
     dplyr::select(KEGG_ko, GeneID) %>%
     filter(!is.na(KEGG_ko)) %>%  # Remove missing KEGG KO entries
     mutate(KEGG_ko = str_remove(KEGG_ko, "ko:"))  # Remove 'ko:' prefix if present
     # -- BREAK_2 --

     # Step 4: Clean KEGG_ko values (handle multiple KEGG IDs)
     kegg_to_geneid_down_clean <- kegg_to_geneid_down %>%
     mutate(KEGG_ko = str_remove_all(KEGG_ko, "ko:")) %>%  # Remove 'ko:' prefixes
     separate_rows(KEGG_ko, sep = ",") %>%  # Ensure each KEGG ID is on its own row
     filter(KEGG_ko != "-") %>%  # Remove invalid KEGG IDs ("-")
     distinct()  # Remove duplicate mappings
     # Step 5: Expand geneID column in kegg_enrichment_down
     expanded_kegg_down <- kegg_enrichment_down %>%
     as.data.frame() %>%
     separate_rows(geneID, sep = "/") %>%  # Split multiple KEGG IDs (Kxxxxx)
     left_join(kegg_to_geneid_down_clean, by = c("geneID" = "KEGG_ko"), relationship = "many-to-many") %>%  # Handle many-to-many mappings
     distinct() %>%  # Remove duplicate matches
     group_by(ID) %>%
     summarise(across(everything(), ~ paste(unique(na.omit(.)), collapse = "/")), .groups = "drop")  # Re-collapse results
     # Step 6: Replace geneID column in the original kegg_enrichment_down dataframe
     kegg_enrichment_down_df <- as.data.frame(kegg_enrichment_down) %>%
     dplyr::select(-geneID) %>%  # Remove old geneID column
     left_join(expanded_kegg_down %>% dplyr::select(ID, GeneID), by = "ID") %>%  # Merge new GeneID column
     dplyr::rename(geneID = GeneID)  # Rename column back to geneID
     # View the updated dataframe
     head(kegg_enrichment_down_df)

     # Create a new workbook
     wb <- createWorkbook()
     # Save enrichment results to the workbook
     addWorksheet(wb, "KEGG_Enrichment_Up")
     writeData(wb, "KEGG_Enrichment_Up", as.data.frame(kegg_enrichment_up_df))
     # Save enrichment results to the workbook
     addWorksheet(wb, "KEGG_Enrichment_Down")
     writeData(wb, "KEGG_Enrichment_Down", as.data.frame(kegg_enrichment_down_df))

     # Define gene list (up-regulated genes)
     gene_list_go_up <- up_regulated$GeneID  # Extract the 149 up-regulated genes
     gene_list_go_down <- down_regulated$GeneID  # Extract the 65 down-regulated genes

     # Define background gene set (all genes in res)
     background_genes <- res_updated$GeneID  # Extract the 3646 background genes

     # Prepare GO annotation data from res
     go_annotation <- res_updated[, c("GOs","GeneID")]  # Extract relevant columns
     go_annotation <- go_annotation %>%
     tidyr::separate_rows(GOs, sep = ",")  # Split multiple GO terms into separate rows
     # -- BREAK_3 --

     go_enrichment_up <- enricher(
         gene = gene_list_go_up,                # Up-regulated genes
         TERM2GENE = go_annotation,       # Custom GO annotation
         pvalueCutoff = 0.05,             # Significance threshold
         pAdjustMethod = "BH",
         universe = background_genes      # Define the background gene set
     )
     go_enrichment_up <- as.data.frame(go_enrichment_up)

     go_enrichment_down <- enricher(
         gene = gene_list_go_down,                # Up-regulated genes
         TERM2GENE = go_annotation,       # Custom GO annotation
         pvalueCutoff = 0.05,             # Significance threshold
         pAdjustMethod = "BH",
         universe = background_genes      # Define the background gene set
     )
     go_enrichment_down <- as.data.frame(go_enrichment_down)

     ## Remove the 'p.adjust' column since no adjusted methods have been applied --> In this version we have used pvalue filtering (see above)!
     #go_enrichment_up <- go_enrichment_up[, !names(go_enrichment_up) %in% "p.adjust"]
     # Update the Description column with the term descriptions
     go_enrichment_up$Description <- sapply(go_enrichment_up$ID, function(go_id) {
     # Using select to get the term description
     term <- tryCatch({
         AnnotationDbi::select(GO.db, keys = go_id, columns = "TERM", keytype = "GOID")
     }, error = function(e) {
         message(paste("Error for GO term:", go_id))  # Print which GO ID caused the error
         return(data.frame(TERM = NA))  # In case of error, return NA
     })

     if (nrow(term) > 0) {
         return(term$TERM)
     } else {
         return(NA)  # If no description found, return NA
     }
     })
     ## Print the updated data frame
     #print(go_enrichment_up)

     ## Remove the 'p.adjust' column since no adjusted methods have been applied --> In this version we have used pvalue filtering (see above)!
     #go_enrichment_down <- go_enrichment_down[, !names(go_enrichment_down) %in% "p.adjust"]
     # Update the Description column with the term descriptions
     go_enrichment_down$Description <- sapply(go_enrichment_down$ID, function(go_id) {
     # Using select to get the term description
     term <- tryCatch({
         AnnotationDbi::select(GO.db, keys = go_id, columns = "TERM", keytype = "GOID")
     }, error = function(e) {
         message(paste("Error for GO term:", go_id))  # Print which GO ID caused the error
         return(data.frame(TERM = NA))  # In case of error, return NA
     })

     if (nrow(term) > 0) {
         return(term$TERM)
     } else {
         return(NA)  # If no description found, return NA
     }
     })

     addWorksheet(wb, "GO_Enrichment_Up")
     writeData(wb, "GO_Enrichment_Up", as.data.frame(go_enrichment_up))

     addWorksheet(wb, "GO_Enrichment_Down")
     writeData(wb, "GO_Enrichment_Down", as.data.frame(go_enrichment_down))

     # Save the workbook with enrichment results
     saveWorkbook(wb, "KEGG_and_GO_Enrichments_Urine_vs_MHB.xlsx", overwrite = TRUE)

     #Error for GO term: GO:0006807: replace "GO:0006807 obsolete nitrogen compound metabolic process"
     #obsolete nitrogen compound metabolic process #https://www.ebi.ac.uk/QuickGO/term/GO:0006807
     #TODO: marked the color as yellow if the p.adjusted <= 0.05 in GO_enrichment!

     #mv KEGG_and_GO_Enrichments_Urine_vs_MHB.xlsx KEGG_and_GO_Enrichments_Mac_vs_LB.xlsx
     #Mac_vs_LB
     #LB.AB_vs_LB.WT19606
     #LB.IJ_vs_LB.WT19606
     #LB.W1_vs_LB.WT19606
     #LB.Y1_vs_LB.WT19606
     #Mac.AB_vs_Mac.WT19606
     #Mac.IJ_vs_Mac.WT19606
     #Mac.W1_vs_Mac.WT19606
     #Mac.Y1_vs_Mac.WT19606

(DEBUG) Draw the Venn diagram to compare the total DEGs across AUM, Urine, and MHB, irrespective of up- or down-regulation.

         library(openxlsx)

         # Function to read and clean gene ID files
         read_gene_ids <- function(file_path) {
         # Read the gene IDs from the file
         gene_ids <- readLines(file_path)

         # Remove any quotes and trim whitespaces
         gene_ids <- gsub('"', '', gene_ids)  # Remove quotes
         gene_ids <- trimws(gene_ids)  # Trim whitespaces

         # Remove empty entries or NAs
         gene_ids <- gene_ids[gene_ids != "" & !is.na(gene_ids)]

         return(gene_ids)
         }

         # Example list of LB files with both -up.id and -down.id for each condition
         lb_files_up <- c("LB.AB_vs_LB.WT19606-up.id", "LB.IJ_vs_LB.WT19606-up.id",
                         "LB.W1_vs_LB.WT19606-up.id", "LB.Y1_vs_LB.WT19606-up.id")
         lb_files_down <- c("LB.AB_vs_LB.WT19606-down.id", "LB.IJ_vs_LB.WT19606-down.id",
                         "LB.W1_vs_LB.WT19606-down.id", "LB.Y1_vs_LB.WT19606-down.id")

         # Combine both up and down files for each condition
         lb_files <- c(lb_files_up, lb_files_down)

         # Read gene IDs for each file in LB group
         #lb_degs <- setNames(lapply(lb_files, read_gene_ids), gsub("-(up|down).id", "", lb_files))
         lb_degs <- setNames(lapply(lb_files, read_gene_ids), make.unique(gsub("-(up|down).id", "", lb_files)))

         lb_degs_ <- list()
         combined_set <- c(lb_degs[["LB.AB_vs_LB.WT19606"]], lb_degs[["LB.AB_vs_LB.WT19606.1"]])
         #unique_combined_set <- unique(combined_set)
         lb_degs_$AB <- combined_set
         combined_set <- c(lb_degs[["LB.IJ_vs_LB.WT19606"]], lb_degs[["LB.IJ_vs_LB.WT19606.1"]])
         lb_degs_$IJ <- combined_set
         combined_set <- c(lb_degs[["LB.W1_vs_LB.WT19606"]], lb_degs[["LB.W1_vs_LB.WT19606.1"]])
         lb_degs_$W1 <- combined_set
         combined_set <- c(lb_degs[["LB.Y1_vs_LB.WT19606"]], lb_degs[["LB.Y1_vs_LB.WT19606.1"]])
         lb_degs_$Y1 <- combined_set

         # Example list of Mac files with both -up.id and -down.id for each condition
         mac_files_up <- c("Mac.AB_vs_Mac.WT19606-up.id", "Mac.IJ_vs_Mac.WT19606-up.id",
                         "Mac.W1_vs_Mac.WT19606-up.id", "Mac.Y1_vs_Mac.WT19606-up.id")
         mac_files_down <- c("Mac.AB_vs_Mac.WT19606-down.id", "Mac.IJ_vs_Mac.WT19606-down.id",
                         "Mac.W1_vs_Mac.WT19606-down.id", "Mac.Y1_vs_Mac.WT19606-down.id")

         # Combine both up and down files for each condition in Mac group
         mac_files <- c(mac_files_up, mac_files_down)

         # Read gene IDs for each file in Mac group
         mac_degs <- setNames(lapply(mac_files, read_gene_ids), make.unique(gsub("-(up|down).id", "", mac_files)))

         mac_degs_ <- list()
         combined_set <- c(mac_degs[["Mac.AB_vs_Mac.WT19606"]], mac_degs[["Mac.AB_vs_Mac.WT19606.1"]])
         mac_degs_$AB <- combined_set
         combined_set <- c(mac_degs[["Mac.IJ_vs_Mac.WT19606"]], mac_degs[["Mac.IJ_vs_Mac.WT19606.1"]])
         mac_degs_$IJ <- combined_set
         combined_set <- c(mac_degs[["Mac.W1_vs_Mac.WT19606"]], mac_degs[["Mac.W1_vs_Mac.WT19606.1"]])
         mac_degs_$W1 <- combined_set
         combined_set <- c(mac_degs[["Mac.Y1_vs_Mac.WT19606"]], mac_degs[["Mac.Y1_vs_Mac.WT19606.1"]])
         mac_degs_$Y1 <- combined_set

         # Function to clean sheet names to ensure no sheet name exceeds 31 characters
         truncate_sheet_name <- function(names_list) {
         sapply(names_list, function(name) {
         if (nchar(name) > 31) {
         return(substr(name, 1, 31))  # Truncate sheet name to 31 characters
         }
         return(name)
         })
         }

         # Assuming lb_degs_ is already a list of gene sets (LB.AB, LB.IJ, etc.)

         # Define intersections between different conditions for LB
         inter_lb_ab_ij <- intersect(lb_degs_$AB, lb_degs_$IJ)
         inter_lb_ab_w1 <- intersect(lb_degs_$AB, lb_degs_$W1)
         inter_lb_ab_y1 <- intersect(lb_degs_$AB, lb_degs_$Y1)
         inter_lb_ij_w1 <- intersect(lb_degs_$IJ, lb_degs_$W1)
         inter_lb_ij_y1 <- intersect(lb_degs_$IJ, lb_degs_$Y1)
         inter_lb_w1_y1 <- intersect(lb_degs_$W1, lb_degs_$Y1)

         # Define intersections between three conditions for LB
         inter_lb_ab_ij_w1 <- Reduce(intersect, list(lb_degs_$AB, lb_degs_$IJ, lb_degs_$W1))
         inter_lb_ab_ij_y1 <- Reduce(intersect, list(lb_degs_$AB, lb_degs_$IJ, lb_degs_$Y1))
         inter_lb_ab_w1_y1 <- Reduce(intersect, list(lb_degs_$AB, lb_degs_$W1, lb_degs_$Y1))
         inter_lb_ij_w1_y1 <- Reduce(intersect, list(lb_degs_$IJ, lb_degs_$W1, lb_degs_$Y1))

         # Define intersection between all four conditions for LB
         inter_lb_ab_ij_w1_y1 <- Reduce(intersect, list(lb_degs_$AB, lb_degs_$IJ, lb_degs_$W1, lb_degs_$Y1))

         # Now remove the intersected genes from each original set for LB
         venn_list_lb <- list()

         # For LB.AB, remove genes that are also in other conditions
         venn_list_lb[["LB.AB_only"]] <- setdiff(lb_degs_$AB, union(inter_lb_ab_ij, union(inter_lb_ab_w1, inter_lb_ab_y1)))

         # For LB.IJ, remove genes that are also in other conditions
         venn_list_lb[["LB.IJ_only"]] <- setdiff(lb_degs_$IJ, union(inter_lb_ab_ij, union(inter_lb_ij_w1, inter_lb_ij_y1)))

         # For LB.W1, remove genes that are also in other conditions
         venn_list_lb[["LB.W1_only"]] <- setdiff(lb_degs_$W1, union(inter_lb_ab_w1, union(inter_lb_ij_w1, inter_lb_ab_w1_y1)))

         # For LB.Y1, remove genes that are also in other conditions
         venn_list_lb[["LB.Y1_only"]] <- setdiff(lb_degs_$Y1, union(inter_lb_ab_y1, union(inter_lb_ij_y1, inter_lb_ab_w1_y1)))

         # Add the intersections for LB (same as before)
         venn_list_lb[["LB.AB_AND_LB.IJ"]] <- inter_lb_ab_ij
         venn_list_lb[["LB.AB_AND_LB.W1"]] <- inter_lb_ab_w1
         venn_list_lb[["LB.AB_AND_LB.Y1"]] <- inter_lb_ab_y1
         venn_list_lb[["LB.IJ_AND_LB.W1"]] <- inter_lb_ij_w1
         venn_list_lb[["LB.IJ_AND_LB.Y1"]] <- inter_lb_ij_y1
         venn_list_lb[["LB.W1_AND_LB.Y1"]] <- inter_lb_w1_y1

         # Define intersections between three conditions for LB
         venn_list_lb[["LB.AB_AND_LB.IJ_AND_LB.W1"]] <- inter_lb_ab_ij_w1
         venn_list_lb[["LB.AB_AND_LB.IJ_AND_LB.Y1"]] <- inter_lb_ab_ij_y1
         venn_list_lb[["LB.AB_AND_LB.W1_AND_LB.Y1"]] <- inter_lb_ab_w1_y1
         venn_list_lb[["LB.IJ_AND_LB.W1_AND_LB.Y1"]] <- inter_lb_ij_w1_y1

         # Define intersection between all four conditions for LB
         venn_list_lb[["LB.AB_AND_LB.IJ_AND_LB.W1_AND_LB.Y1"]] <- inter_lb_ab_ij_w1_y1

         # Assuming mac_degs_ is already a list of gene sets (Mac.AB, Mac.IJ, etc.)

         # Define intersections between different conditions
         inter_mac_ab_ij <- intersect(mac_degs_$AB, mac_degs_$IJ)
         inter_mac_ab_w1 <- intersect(mac_degs_$AB, mac_degs_$W1)
         inter_mac_ab_y1 <- intersect(mac_degs_$AB, mac_degs_$Y1)
         inter_mac_ij_w1 <- intersect(mac_degs_$IJ, mac_degs_$W1)
         inter_mac_ij_y1 <- intersect(mac_degs_$IJ, mac_degs_$Y1)
         inter_mac_w1_y1 <- intersect(mac_degs_$W1, mac_degs_$Y1)

         # Define intersections between three conditions
         inter_mac_ab_ij_w1 <- Reduce(intersect, list(mac_degs_$AB, mac_degs_$IJ, mac_degs_$W1))
         inter_mac_ab_ij_y1 <- Reduce(intersect, list(mac_degs_$AB, mac_degs_$IJ, mac_degs_$Y1))
         inter_mac_ab_w1_y1 <- Reduce(intersect, list(mac_degs_$AB, mac_degs_$W1, mac_degs_$Y1))
         inter_mac_ij_w1_y1 <- Reduce(intersect, list(mac_degs_$IJ, mac_degs_$W1, mac_degs_$Y1))

         # Define intersection between all four conditions
         inter_mac_ab_ij_w1_y1 <- Reduce(intersect, list(mac_degs_$AB, mac_degs_$IJ, mac_degs_$W1, mac_degs_$Y1))

         # Now remove the intersected genes from each original set
         venn_list_mac <- list()

         # For Mac.AB, remove genes that are also in other conditions
         venn_list_mac[["Mac.AB_only"]] <- setdiff(mac_degs_$AB, union(inter_mac_ab_ij, union(inter_mac_ab_w1, inter_mac_ab_y1)))

         # For Mac.IJ, remove genes that are also in other conditions
         venn_list_mac[["Mac.IJ_only"]] <- setdiff(mac_degs_$IJ, union(inter_mac_ab_ij, union(inter_mac_ij_w1, inter_mac_ij_y1)))

         # For Mac.W1, remove genes that are also in other conditions
         venn_list_mac[["Mac.W1_only"]] <- setdiff(mac_degs_$W1, union(inter_mac_ab_w1, union(inter_mac_ij_w1, inter_mac_ab_w1_y1)))

         # For Mac.Y1, remove genes that are also in other conditions
         venn_list_mac[["Mac.Y1_only"]] <- setdiff(mac_degs_$Y1, union(inter_mac_ab_y1, union(inter_mac_ij_y1, inter_mac_ab_w1_y1)))

         # Add the intersections (same as before)
         venn_list_mac[["Mac.AB_AND_Mac.IJ"]] <- inter_mac_ab_ij
         venn_list_mac[["Mac.AB_AND_Mac.W1"]] <- inter_mac_ab_w1
         venn_list_mac[["Mac.AB_AND_Mac.Y1"]] <- inter_mac_ab_y1
         venn_list_mac[["Mac.IJ_AND_Mac.W1"]] <- inter_mac_ij_w1
         venn_list_mac[["Mac.IJ_AND_Mac.Y1"]] <- inter_mac_ij_y1
         venn_list_mac[["Mac.W1_AND_Mac.Y1"]] <- inter_mac_w1_y1

         # Define intersections between three conditions
         venn_list_mac[["Mac.AB_AND_Mac.IJ_AND_Mac.W1"]] <- inter_mac_ab_ij_w1
         venn_list_mac[["Mac.AB_AND_Mac.IJ_AND_Mac.Y1"]] <- inter_mac_ab_ij_y1
         venn_list_mac[["Mac.AB_AND_Mac.W1_AND_Mac.Y1"]] <- inter_mac_ab_w1_y1
         venn_list_mac[["Mac.IJ_AND_Mac.W1_AND_Mac.Y1"]] <- inter_mac_ij_w1_y1

         # Define intersection between all four conditions
         venn_list_mac[["Mac.AB_AND_Mac.IJ_AND_Mac.W1_AND_Mac.Y1"]] <- inter_mac_ab_ij_w1_y1

         # Save the gene IDs to Excel for further inspection (optional)
         write.xlsx(lb_degs, file = "LB_DEGs.xlsx")
         write.xlsx(mac_degs, file = "Mac_DEGs.xlsx")

         # Clean sheet names and write the Venn intersection sets for LB and Mac groups into Excel files
         write.xlsx(venn_list_lb, file = "Venn_LB_Genes_Intersect.xlsx", sheetName = truncate_sheet_name(names(venn_list_lb)), rowNames = FALSE)
         write.xlsx(venn_list_mac, file = "Venn_Mac_Genes_Intersect.xlsx", sheetName = truncate_sheet_name(names(venn_list_mac)), rowNames = FALSE)

         # Venn Diagram for LB group
         venn1 <- ggvenn(lb_degs_,
                         fill_color = c("skyblue", "tomato", "gold", "orchid"),
                         stroke_size = 0.4,
                         set_name_size = 5)
         ggsave("Venn_LB_Genes.png", plot = venn1, width = 7, height = 7, dpi = 300)

         # Venn Diagram for Mac group
         venn2 <- ggvenn(mac_degs_,
                         fill_color = c("lightgreen", "slateblue", "plum", "orange"),
                         stroke_size = 0.4,
                         set_name_size = 5)
         ggsave("Venn_Mac_Genes.png", plot = venn2, width = 7, height = 7, dpi = 300)

         cat("✅ All Venn intersection sets exported to Excel successfully.\n")

How to correlate RNA-seq Data with Mass Spectrometry Proteomics Data?

Leave a reply

Correlating RNA-seq data with mass spectrometry (MS)-based proteomics data is a powerful way to link transcript-level expression with protein-level abundance. Here’s a step-by-step outline of how to approach it:

Preprocessing and Normalization

For RNA-Seq data:
- Obtain gene-level expression data, usually as raw counts or TPM (transcripts per million) / FPKM (fragments per kilobase million).
- Normalize the data (e.g., using DESeq2’s variance stabilizing transformation (VST) or edgeR’s TMM normalization).
For MS proteomics data:
- Quantify protein abundances, often using spectral counts, iBAQ, LFQ intensities, or other measures.
- Log-transform the data if needed to stabilize variance.
Data Mapping and Integration
- Gene/Protein Mapping: Use gene symbols, Ensembl IDs, or UniProt IDs to map transcript-level data (RNA-seq) to protein-level data (MS). Be cautious of differences in annotation – e.g., some genes might have multiple protein isoforms.
- Common Identifiers:
  - Convert all IDs to a common identifier (e.g., gene symbols or Ensembl IDs).
  - Remove entries without matching pairs to ensure one-to-one correspondence.
Data Filtering
- Filter out lowly expressed genes/proteins or those not reliably detected in both datasets.
- Optionally, keep only genes/proteins of interest or those with high coverage.
Correlation Analysis
- For each matched gene/protein pair, calculate correlation (usually Pearson or Spearman) across the samples.
  
  Steps:
  - Construct a table with rows as genes/proteins and columns as samples.
  - For each row, you’ll have two vectors:
    - RNA expression (e.g., normalized RNA counts)
    - Protein abundance (e.g., log-transformed LFQ intensity)
  - Calculate:
```
  from scipy.stats import pearsonr, spearmanr

  rna_vector = [...]
  protein_vector = [...]

  pearson_corr, _ = pearsonr(rna_vector, protein_vector)
  spearman_corr, _ = spearmanr(rna_vector, protein_vector)
```
Visualize and Interpret
- Plot scatter plots of RNA vs protein levels for:
  - All genes/proteins together (aggregate view)
  - Selected genes of interest
- Plot correlation coefficients:
  - Histogram of all gene/protein correlations
  - Heatmap if you have sample-wise data
- Assess overall agreement:
  - Typically, moderate correlation (~0.3–0.6) is observed in many studies.
Consider Batch Effects and Biological Variability
- If the datasets come from different experiments or platforms, consider batch correction methods (e.g., ComBat from the sva R package).
- Be mindful that:
  - Post-transcriptional regulation affects how well mRNA levels correlate with protein levels.
  - Some genes/proteins might show no correlation due to translational regulation, stability, etc.
Summary Workflow

✅ Preprocess & normalize both datasets ✅ Map genes/proteins to common IDs ✅ Filter to shared, high-quality data ✅ Calculate correlations ✅ Visualize and interpret

Python script that walks through the key steps of correlating RNA-seq data with proteomics data:

 import pandas as pd
 import numpy as np
 from scipy.stats import pearsonr, spearmanr
 import matplotlib.pyplot as plt
 import seaborn as sns

 # --- Step 1: Load your data ---

 # Example: CSVs with genes/proteins as rows, samples as columns
 rna_data = pd.read_csv('rna_seq_data.csv', index_col=0)  # genes x samples
 protein_data = pd.read_csv('proteomics_data.csv', index_col=0)  # proteins x samples

 # --- Step 2: Map genes to proteins (assuming same identifiers) ---

 # Filter to common genes/proteins
 common_genes = rna_data.index.intersection(protein_data.index)
 rna_data_filtered = rna_data.loc[common_genes]
 protein_data_filtered = protein_data.loc[common_genes]

 print(f"Number of common genes/proteins: {len(common_genes)}")

 # --- Step 3: Log transform if needed (optional) ---

 rna_data_log = np.log2(rna_data_filtered + 1)
 protein_data_log = np.log2(protein_data_filtered + 1)

 # --- Step 4: Calculate gene-wise correlations across samples ---

 pearson_corrs = []
 spearman_corrs = []

 for gene in common_genes:
     rna_vector = rna_data_log.loc[gene]
     protein_vector = protein_data_log.loc[gene]

     pearson_corr, _ = pearsonr(rna_vector, protein_vector)
     spearman_corr, _ = spearmanr(rna_vector, protein_vector)

     pearson_corrs.append(pearson_corr)
     spearman_corrs.append(spearman_corr)

 # Save results
 correlation_df = pd.DataFrame({
     'Gene': common_genes,
     'Pearson': pearson_corrs,
     'Spearman': spearman_corrs
 })
 correlation_df.to_csv('gene_protein_correlations.csv', index=False)
 print("Saved gene-wise correlation data to 'gene_protein_correlations.csv'")

 # --- Step 5: Visualize the correlation distributions ---

 sns.histplot(correlation_df['Pearson'], bins=30, kde=True, color='skyblue')
 plt.xlabel('Pearson Correlation')
 plt.title('Distribution of Pearson Correlations (RNA vs Protein)')
 plt.show()

 sns.histplot(correlation_df['Spearman'], bins=30, kde=True, color='salmon')
 plt.xlabel('Spearman Correlation')
 plt.title('Distribution of Spearman Correlations (RNA vs Protein)')
 plt.show()

 # --- Step 6: Scatter plot for a selected gene/protein ---

 example_gene = common_genes[0]  # change to your gene of interest
 plt.scatter(rna_data_log.loc[example_gene], protein_data_log.loc[example_gene])
 plt.xlabel('Log2 RNA Expression')
 plt.ylabel('Log2 Protein Abundance')
 plt.title(f'RNA vs Protein for {example_gene}')
 plt.grid(True)
 plt.show()

 # Key Notes:
 #✅ Replace the filenames (rna_seq_data.csv and proteomics_data.csv) with your actual files.
 #✅ The script expects rows to be genes/proteins and columns to be samples.
 #✅ Modify or add steps if you have different normalization needs (e.g., DESeq2 normalization).

R script that covers the same steps as above:

 # --- Load libraries ---
 library(ggplot2)
 library(dplyr)

 # --- Step 1: Load your data ---
 # Example: CSVs with genes/proteins as rows, samples as columns
 rna_data <- read.csv("rna_seq_data.csv", row.names = 1)
 protein_data <- read.csv("proteomics_data.csv", row.names = 1)

 # --- Step 2: Find common genes/proteins ---
 common_genes <- intersect(rownames(rna_data), rownames(protein_data))
 rna_data_filtered <- rna_data[common_genes, ]
 protein_data_filtered <- protein_data[common_genes, ]

 cat("Number of common genes/proteins:", length(common_genes), "\n")

 # --- Step 3: Log-transform if needed (optional) ---
 rna_data_log <- log2(rna_data_filtered + 1)
 protein_data_log <- log2(protein_data_filtered + 1)

 # --- Step 4: Calculate gene-wise correlations across samples ---
 pearson_corrs <- numeric(length(common_genes))
 spearman_corrs <- numeric(length(common_genes))

 for (i in seq_along(common_genes)) {
 gene <- common_genes[i]
 rna_vector <- as.numeric(rna_data_log[gene, ])
 protein_vector <- as.numeric(protein_data_log[gene, ])

 pearson_corrs[i] <- cor(rna_vector, protein_vector, method = "pearson")
 spearman_corrs[i] <- cor(rna_vector, protein_vector, method = "spearman")
 }

 # Save the results
 correlation_df <- data.frame(
 Gene = common_genes,
 Pearson = pearson_corrs,
 Spearman = spearman_corrs
 )

 write.csv(correlation_df, "gene_protein_correlations.csv", row.names = FALSE)
 cat("Saved gene-wise correlation data to 'gene_protein_correlations.csv'\n")

 # --- Step 5: Visualize the correlation distributions ---
 ggplot(correlation_df, aes(x = Pearson)) +
 geom_histogram(bins = 30, fill = "skyblue", color = "black") +
 labs(title = "Distribution of Pearson Correlations (RNA vs Protein)",
     x = "Pearson Correlation", y = "Frequency") +
 theme_minimal()

 ggplot(correlation_df, aes(x = Spearman)) +
 geom_histogram(bins = 30, fill = "salmon", color = "black") +
 labs(title = "Distribution of Spearman Correlations (RNA vs Protein)",
     x = "Spearman Correlation", y = "Frequency") +
 theme_minimal()

 # --- Step 6: Scatter plot for a selected gene/protein ---
 example_gene <- common_genes[1]  # change this to your gene of interest
 df_example <- data.frame(
 RNA = as.numeric(rna_data_log[example_gene, ]),
 Protein = as.numeric(protein_data_log[example_gene, ])
 )

 ggplot(df_example, aes(x = RNA, y = Protein)) +
 geom_point() +
 labs(title = paste("RNA vs Protein for", example_gene),
     x = "Log2 RNA Expression", y = "Log2 Protein Abundance") +
 theme_minimal() +
 geom_smooth(method = "lm", se = FALSE, color = "red")

 # Key Notes:
 #✅ Replace "rna_seq_data.csv" and "proteomics_data.csv" with your real file names.
 #✅ Rows: genes/proteins, columns: samples.
 #✅ Change example_gene to any gene of interest for plotting.
 #Tweak this for the new dataset or extend it with batch correction or other normalizations?

All tools and services of BV-BRC

Leave a reply

Genomics

Genome Assembly
Genome Annotation
Comprehensive Genome Analysis (B)
BLAST
Primer Design
Similar Genome Finder
Genome Alignment
Variation Analysis
Tn-Seq Analysis

Phylogenomics

Bacterial Genome Tree
Viral Genome Tree
Gene/Protein Tree

Protein Tools

MSA and SNP Analysis
Meta-CATS
Proteome Comparison
Protein Family Sorter
Comparative Systems
Docking

Metagenomics

Taxonomic Classification
Metagenomic Binning
Metagenomic Read Mapping

Transcriptomics

RNA-Seq Analysis
Expression Import

Utilities

Fastq Utilities
ID Mapper

Viral Tools

SARS-CoV-2 Genome Analysis
SARS-CoV-2 Wastewater Analysis
Influenza Sequence Submission
Influenza HA Subtype Conversion
Subspecies Classification
Viral Assembly

Outbreak Tracker

Measles 2025
Mpox 2024
Influenza H5N1 2024
SARS-CoV-2

DAMIAN Post-processing for Flavivirus and FSME

Leave a reply

Prepare input raw data

 ~/DATA/Data_DAMIAN_Post-processing_Flavivirus_and_FSME

 ln ./240621_M03701_0312_000000000-GHL9N/p20534/7448_7501_S0_R1_001.fastq.gz p20534_7448_R1.fastq.gz
 ln ./240621_M03701_0312_000000000-GHL9N/p20534/7448_7501_S0_R2_001.fastq.gz p20534_7448_R2.fastq.gz

Prepare virus database and select 8 representatives for the eight given viruses from the database

 # -- Download genomes --
 # ---- Date is 13.06.2025. ----
 #Taxonomy ID: 3044782
 #Die Gattung Orthoflavivirus (früher Flavivirus) umfasst behüllte Viren mit einem positivsträngigen RNA-Einzelstrang als Genom, die durch Arthropoden (Zecken und Stechmücken) als Vektoren auf Vögel und Säugetiere übertragen werden.
 #The English name for Flavivirus is simply: Flavivirus
 #It is both the scientific and common name for the genus of viruses in the family Flaviviridae. This genus includes several well-known viruses such as:
         * Dengue virus
         * Zika virus
         * West Nile virus
         * Yellow fever virus
         * Tick-borne encephalitis virus (TBEV / FSME virus)

 esearch -db nucleotide -query "txid3044782[Organism:exp]" | efetch -format fasta -email j.huang@uke.de > genome_3044782_ncbi.fasta
 python ~/Scripts/filter_fasta.py genome_3044782_ncbi.fasta complete_genome_3044782_ncbi.fasta  #96579-->9431
 #https://www.ebi.ac.uk/ena/browser/view/3044782

 #Download FMSE
 esearch -db nucleotide -query "txid11084[Organism:exp]" | efetch -format fasta -email j.huang@uke.de > genome_11084_ncbi.fasta
 python ~/Scripts/filter_fasta.py genome_11084_ncbi.fasta complete_genome_11084_ncbi.fasta  #3426-->219
 #https://www.ebi.ac.uk/ena/browser/view/11084

 samtools faidx complete_genome_11084_ncbi.fasta PV626569.1 > PV626569.fasta

Run the second round of vrap (–host==${virus}.fasta)

 #cat FluB_PB1.fasta FluB_PB2.fasta FluB_PA.fasta FluB_HA.fasta FluB_NP.fasta FluB_NB_NA.fasta FluB_M1_BM2.fasta FluB_NEP_NS1.fasta > FluB.fasta

 # Run vrap (second round): selecte some representative viruses from the generated Excel-files generated by the last step as --host
 (vrap) for sample in p20534_7448; do
     vrap/vrap_until_bowtie2.py  -1 ${sample}_R1.fastq.gz -2 ${sample}_R2.fastq.gz  -o vrap_${sample}_on_FSME --host /home/jhuang/DATA/Data_DAMIAN_Post-processing_Flavivirus_and_FSME/PV626569.fasta   -t 100 -l 200  --gbt2 --noblast
 done

 (vrap) for sample in p20534_7448; do
     vrap/vrap_until_bowtie2.py  -1 ${sample}_R1.fastq.gz -2 ${sample}_R2.fastq.gz  -o vrap_${sample}_on_Flavivirus --host /home/jhuang/DATA/Data_DAMIAN_Post-processing_Flavivirus_and_FSME/complete_genome_3044782_ncbi.fasta   -t 100 -l 200  --gbt2 --noblast
 done

Generate the mapping statistics for the sam-files generated from last step

 for sample in p20534_7448; do
     echo "-----${sample}_on_representatives------" >> LOG_mapping
     #cd vrap_${sample}_on_${virus}/bowtie
     cd vrap_${sample}_on_Flavivirus/bowtie
     # Rename and convert SAM to BAM
     mv mapped mapped.sam 2>> ../../LOG_mapping
     samtools view -S -b mapped.sam > mapped.bam 2>> ../../LOG_mapping
     samtools sort mapped.bam -o mapped_sorted.bam 2>> ../../LOG_mapping
     samtools index mapped_sorted.bam 2>> ../../LOG_mapping
     # Write flagstat output to log (go up two levels to write correctly)
     samtools flagstat mapped_sorted.bam >> ../../LOG_mapping 2>&1
     #samtools idxstats mapped_sorted.bam >> ../../LOG_mapping 2>&1
     cd ../..
 done

 (bakta) jhuang@WS-2290C:/mnt/md1/DATA/Data_DAMIAN_Post-processing_Flavivirus_and_FSME/vrap_p20534_7448_on_FSME/bowtie$ samtools flagstat mapped_sorted.bam
 7836046 + 0 in total (QC-passed reads + QC-failed reads)
 7836046 + 0 primary
 0 + 0 secondary
 0 + 0 supplementary
 0 + 0 duplicates
 0 + 0 primary duplicates
 0 + 0 mapped (0.00% : N/A)
 0 + 0 primary mapped (0.00% : N/A)
 5539082 + 0 paired in sequencing
 2769541 + 0 read1
 2769541 + 0 read2
 0 + 0 properly paired (0.00% : N/A)
 0 + 0 with itself and mate mapped
 0 + 0 singletons (0.00% : N/A)
 0 + 0 with mate mapped to a different chr
 0 + 0 with mate mapped to a different chr (mapQ>=5)

 (bakta) jhuang@WS-2290C:/mnt/md1/DATA/Data_DAMIAN_Post-processing_Flavivirus_and_FSME/vrap_p20534_7448_on_Flavivirus/bowtie$ samtools flagstat mapped_sorted.bam
 7836234 + 0 in total (QC-passed reads + QC-failed reads)
 7836234 + 0 primary
 0 + 0 secondary
 0 + 0 supplementary
 0 + 0 duplicates
 0 + 0 primary duplicates
 52 + 0 mapped (0.00% : N/A)
 52 + 0 primary mapped (0.00% : N/A)
 5539458 + 0 paired in sequencing
 2769729 + 0 read1
 2769729 + 0 read2
 0 + 0 properly paired (0.00% : N/A)
 4 + 0 with itself and mate mapped
 13 + 0 singletons (0.00% : N/A)
 0 + 0 with mate mapped to a different chr
 0 + 0 with mate mapped to a different chr (mapQ>=5)

 samtools view -F 4 mapped_sorted.bam > mapped_reads.sam
 awk '{print $3}' mapped_reads.sam | sort | uniq -c
 52 KY766069.1 Zika virus isolate Pf13/251013-18, complete genome

 # ------------------ DEBUG ----------------------
 samtools idxstats mapped_sorted.bam | cut -f 1

 for ref in PV424649.1 PV424650.1 PV424648.1 PV424643.1 PV424646.1 PV424645.1 PV424644.1 PV424647.1; do
     echo "Reference: $ref"
     samtools view -b mapped_sorted.bam "$ref" | samtools flagstat -
 done

 When I run samtools flagstat mapped_sorted.bam

 49572521 + 0 in total (QC-passed reads + QC-failed reads)
 0 + 0 secondary
 0 + 0 supplementary
 0 + 0 duplicates
 1169 + 0 mapped (0.00% : N/A)
 38247374 + 0 paired in sequencing
 19123687 + 0 read1
 19123687 + 0 read2
 884 + 0 properly paired (0.00% : N/A)
 934 + 0 with itself and mate mapped
 227 + 0 singletons (0.00% : N/A)
 0 + 0 with mate mapped to a different chr
 0 + 0 with mate mapped to a different chr (mapQ>=5)

 however, wenn I run for ref in PV424649.1 PV424650.1 PV424648.1 PV424643.1 PV424646.1 PV424645.1 PV424644.1 PV424647.1; do
         echo "Reference: $ref"
         samtools view -b mapped_sorted.bam "$ref" | samtools flagstat -
         done

 Reference: PV424647.1
 83 + 0 in total (QC-passed reads + QC-failed reads)
 0 + 0 secondary
 0 + 0 supplementary
 0 + 0 duplicates
 72 + 0 mapped (86.75% : N/A)
 82 + 0 paired in sequencing
 41 + 0 read1
 41 + 0 read2
 56 + 0 properly paired (68.29% : N/A)
 60 + 0 with itself and mate mapped
 11 + 0 singletons (13.41% : N/A)
 0 + 0 with mate mapped to a different chr
 0 + 0 with mate mapped to a different chr (mapQ>=5)

 I want to also the same total name as "samtools flagstat mapped_sorted.bam". How?

 samtools view -b mapped_sorted.bam PV424649.1

 for ref in PV424649.1 PV424650.1 PV424648.1 PV424643.1 PV424646.1 PV424645.1 PV424644.1 PV424647.1; do
 echo "Reference: $ref"
 samtools view -h mapped_sorted.bam | grep -E "^@|$ref" | samtools view -Sb - | samtools flagstat -
 done

 # ---- DEBUG END ----

 #draw some plots for some representative isolates which found in the first round (see Excel-file).
 samtools depth -m 0 -a mapped_sorted.bam > coverage.txt
 #grep "PV424649.1" coverage.txt > FluB_PB1_coverage.txt
 #grep "PV424650.1" coverage.txt > FluB_PB2_coverage.txt
 #grep "PV424648.1" coverage.txt > FluB_PA_coverage.txt
 #grep "PV424643.1" coverage.txt > FluB_HA_coverage.txt
 #grep "PV424646.1" coverage.txt > FluB_NP_coverage.txt
 #grep "PV424645.1" coverage.txt > FluB_NB_NA_coverage.txt
 #grep "PV424644.1" coverage.txt > FluB_M1_BM2_coverage.txt
 #grep "PV424647.1" coverage.txt > FluB_NEP_NS1_coverage.txt

         import pandas as pd
         import matplotlib.pyplot as plt

         # Load coverage data
         df = pd.read_csv("coverage.txt", sep="\t", header=None, names=["chr", "pos", "coverage"])

         # Plot
         plt.figure(figsize=(10,4))
         plt.plot(df["pos"], df["coverage"], color="blue", linewidth=0.5)
         plt.xlabel("Genomic Position")
         plt.ylabel("Coverage Depth")
         plt.title("BAM Coverage Plot")
         plt.show()

Report

 Subject: Mapping Results for FluB Representative Isolate

 I have re-analyzed sample P20534 (7448) with a focus on Flaviviruses and FSME.

 Using curated reference sets from NCBI (Taxonomy ID 3044782 for Flavivirus, comprising 9,431 complete genomes—see attached flavivirus_names.txt for details; and Taxonomy ID 11084 for FSME, with 219 complete genomes), I performed targeted mapping. The key findings are summarized below:

     * Total reads: 7,836,234
     * Mapped to Flavivirus: 52 reads
       All 52 reads mapped specifically to Zika virus (KY766069.1, complete genome of isolate Pf13/251013-18)
     * Mapped to FSME: No significant hits detected

 Please find attached a coverage plot for the Zika virus genome (KY766069).

Preparing a database containing all representative viruses from NCBI Virus #https://www.ncbi.nlm.nih.gov/labs/virus/vssi/#/ #Download All Records (18,708) am 26.05.2025

 # ------------ Manually update the internal viral databases --------------
 ##https://www.ebi.ac.uk/ena/browser/view/10239
 #esearch -db nucleotide -query "txid10239[Organism:exp]" | efetch -format fasta -email j.huang@uke.de > genome_10239_ncbi.fasta
 #esearch -db protein -query "txid11520[Organism:exp]" | efetch -format fasta -email j.huang@uke.de > protein_11520_ncbi.fasta
 #mv ~/Tools/vrap/database/viral_db/nucleotide.fa ~/Tools/vrap/database/viral_db/nucleotide_Human_alphaherpesvirus_1.fa
 #mv ~/Tools/vrap/database/viral_db/protein.fa ~/Tools/vrap/database/viral_db/protein_Human_alphaherpesvirus_1.fa
 #cp genome_11520_ncbi.fasta ~/Tools/vrap/database/viral_db/nucleotide.fa
 #cp protein_11520_ncbi.fasta ~/Tools/vrap/database/viral_db/protein.fa
 #cd ~/Tools/vrap/database/viral_db
 #~/Tools/vrap/external_tools/blast/makeblastdb -in nucleotide.fa -dbtype nucl -parse_seqids -out virus_nucleotide
 #~/Tools/vrap/external_tools/blast/makeblastdb -in protein.fa -dbtype prot -parse_seqids -out virus_protein
 #vrap/vrap_noassembly.py  -1 AW005486_R1.fastq.gz -2 AW005486_R2.fastq.gz -o vrap_AW005486_on_InfluB  --bt2idx=/home/jhuang/REFs/genome  --host=/home/jhuang/REFs/genome.fa --virus=/mnt/md1/DATA/Data_Nicole_DAMIAN_Post-processing_Pathoprobe_FluB_Links/complete_11520_ncbi.fasta  -t 20 -l 200  -g

 # ----------- Three databases ----------
 #db is [virus_user_db]
 /home/jhuang/Tools/vrap/external_tools/blast/makeblastdb -in /mnt/md1/DATA/Data_Nicole_DAMIAN_Post-processing_Pathoprobe_FluB_Links/vrap_AW005486_on_InfluB/blast/custom_viral_seq.fa -dbtype nucl -parse_seqids -out /mnt/md1/DATA/Data_Nicole_DAMIAN_Post-processing_Pathoprobe_FluB_Links/vrap_AW005486_on_InfluB/blast/db/virus >> /mnt/md1/DATA/Data_Nicole_DAMIAN_Post-processing_Pathoprobe_FluB_Links/vrap_AW005486_on_InfluB/vrap.log 2>> /mnt/md1/DATA/Data_Nicole_DAMIAN_Post-processing_Pathoprobe_FluB_Links/vrap_AW005486_on_InfluB/vrap.log

 #db is ~/Tools/vrap/database/viral_db/nucleotide.fa  [Human alphaherpesvirus 1] [virus_nt_db]
 /home/jhuang/Tools/vrap/external_tools/blast/blastn -num_threads 20 -query /mnt/md1/DATA/Data_Nicole_DAMIAN_Post-processing_Pathoprobe_FluB_Links/vrap_AW005486_on_InfluB/vrap_contig.fasta -db "/home/jhuang/Tools/vrap/database/viral_db/viral_nucleotide /mnt/md1/DATA/Data_Nicole_DAMIAN_Post-processing_Pathoprobe_FluB_Links/vrap_AW005486_on_InfluB/blast/db/virus"  -evalue 1e-4 -outfmt "6 qseqid qstart qend sstart send evalue length pident sseqid stitle qcovs qcovhsp sacc slen qlen" -max_target_seqs 1 -out /mnt/md1/DATA/Data_Nicole_DAMIAN_Post-processing_Pathoprobe_FluB_Links/vrap_AW005486_on_InfluB/blast/blastn.csv >> /mnt/md1/DATA/Data_Nicole_DAMIAN_Post-processing_Pathoprobe_FluB_Links/vrap_AW005486_on_InfluB/vrap.log
 Warning: [blastn] Examining 5 or more matches is recommended

 #db is ~/Tools/vrap/database/viral_db/protein.fa [Human alphaherpesvirus 1] [virus_aa_db]
 /home/jhuang/Tools/vrap/external_tools/blast/blastx -num_threads 20 -query /mnt/md1/DATA/Data_Nicole_DAMIAN_Post-processing_Pathoprobe_FluB_Links/vrap_AW005486_on_InfluB/blast/blastn.fa -db "/home/jhuang/Tools/vrap/database/viral_db/viral_protein"  -evalue 1e-6 -outfmt "6 qseqid qstart qend sstart send evalue length pident sseqid stitle qcovs qcovhsp sacc slen qlen" -max_target_seqs 1 -out /mnt/md1/DATA/Data_Nicole_DAMIAN_Post-processing_Pathoprobe_FluB_Links/vrap_AW005486_on_InfluB/blast/blastx.csv >> /mnt/md1/DATA/Data_Nicole_DAMIAN_Post-processing_Pathoprobe_FluB_Links/vrap_AW005486_on_InfluB/vrap.log

阳光房漏水怎么办？丁基胶带才是最佳密封选择

Leave a reply

TODO: 以后只用丁基胶带粘Wintergarten上玻璃的侧边和金属的连接处！

HSButyl FoilBand https://www.hsbutyl.com/de/product/foilband-butyl-tape/

Das Premium Glasfalz Silikon OTTOSEAL S120 310ml Alle Farben (Transparent) Innen- und Außen https://www.ebay.de/itm/266565929135?var=566331892689

丁基胶带（Butyl Tape）简介：
- 材料：由丁基橡胶制成，是一种柔软、自粘、防水的密封带。
- 特点： ✅ 强力粘附：适用于玻璃、金属等材料 ✅ 防水防漏：密封性优良 ✅ 耐高低温、抗紫外线：适合户外使用 ✅ 无毒无味：环保 ✅ 易施工：可剪裁、手贴，干净整洁
- 适用场景：
  - 屋顶玻璃、窗户缝隙密封
  - 房车、冷藏车、金属屋顶防水修补
  - 管道和通风口密封
沥青胶带（Bitumen Tape）简介：
- 材料：由改性沥青制成，表面有铝箔，常用于建筑屋顶大面积防水。
- 特点：
  
  ✅ 防水性能强 ❌ 易脏、气味重 ❌ 夏天会软化、流淌，污染玻璃 ❌ 冬天易变硬、粘性下降 ❌ 不适合用于玻璃细缝密封

✅ 哪种更适合密封阳光房玻璃缝？

比较项 丁基胶带（推荐）    沥青胶带（不推荐）
与玻璃的兼容性 ✅ 非常好   ❌ 易弄脏玻璃，附着差
抗紫外线能力  ✅ 优秀，适合阳光房长期使用  ❌ 容易老化、融化
外观美观    ✅ 整洁，可选灰黑色  ❌ 黑色粗糙，难清理
温度适应性   ✅ 热不融、冷不裂   ❌ 高温融化、低温变硬
安装简便    ✅ 自粘易贴、整洁   ❌ 易弄脏手、施工复杂
气味环保    ✅ 无味环保  ❌ 有强烈沥青味道

3 使用建议：

* 清洁玻璃与框架表面（干净、干燥、无油）
* 贴上丁基胶带，沿缝隙压实贴牢
* 可选：在外部再加一层铝箔反光带或密封压条，增加防晒耐久性
* 使用橡胶滚轮压实效果更佳

4 德国购买关键词：

* Butyl-Dichtband
* UV-beständig
* Für Glas und Aluminium
* 推荐品牌：Tesa、Sika、3M 等
* 购买渠道：Amazon.de、OBI、Bauhaus、Hornbach

✅ 总结结论：你要密封Wintergarten屋顶玻璃缝隙，请优先选择丁基胶带，不要使用沥青胶带。前者更干净、耐久、环保，且长期使用不影响玻璃美观与结构密封性。

Microbial bioinformatics

Microbial bioinformatics uses computational tools to analyze genomes, track evolution, and study functions in microorganisms, including bacteria and viruses.

Author Archives: gene_x

Visualization and Export of miRNA Expression Profiles Using Manhattan Plots in R

Analysis of the RNA binding protein (RBP) motifs for RNA-Seq and miRNAs (v3)

Post-processing of DAMIAN results

Variant calling for Data_Pietschmann_229ECoronavirus_Mutations_2025 (via docker own_viral_ngs)

How to debug and construct the docker docker own_viral_ngs?

Processing Data_Tam_RNAseq_2025_LB_vs_Mac_ATCC19606

How to correlate RNA-seq Data with Mass Spectrometry Proteomics Data?

All tools and services of BV-BRC

DAMIAN Post-processing for Flavivirus and FSME

阳光房漏水怎么办？丁基胶带才是最佳密封选择