Pipeline to Create Circos DE Plots from DESeq2 Output (Data_JuliaFuchs_RNAseq_2025)

  • circos_Moxi_18h_vs_Untreated_18h
  • circos_Mitomycin_18h_vs_Untreated_18h
    #Set the comparison condition in the R-script
    comparison <- "Moxi_18h_vs_Untreated_18h"
    # e.g.
    # comparison <- "Mitomycin_18h_vs_Untreated_18h"
    # comparison <- "Mitomycin_8h_vs_Untreated_8h"
    # comparison <- "Moxi_8h_vs_Untreated_8h"
    # comparison <- "Mitomycin_4h_vs_Untreated_4h"
    # comparison <- "Moxi_4h_vs_Untreated_4h"
    (r_env) Rscript make_circos_from_deseq.R

    (circos-env) cd circos_Moxi_18h_vs_Untreated_18h
    (circos-env) touch karyotype.txt (see the step2)
    (circos-env) touch circos.conf (see the step3)
    cp circos_Moxi_18h_vs_Untreated_18h/karyotype.txt circos_Moxi_8h_vs_Untreated_8h/
    cp circos_Moxi_18h_vs_Untreated_18h/karyotype.txt circos_Moxi_4h_vs_Untreated_4h/
    cp circos_Moxi_18h_vs_Untreated_18h/karyotype.txt circos_Mitomycin_18h_vs_Untreated_18h/
    cp circos_Moxi_18h_vs_Untreated_18h/karyotype.txt circos_Mitomycin_8h_vs_Untreated_8h/
    cp circos_Moxi_18h_vs_Untreated_18h/karyotype.txt circos_Mitomycin_4h_vs_Untreated_4h/
    cp circos_Moxi_18h_vs_Untreated_18h/circos.conf circos_Moxi_8h_vs_Untreated_8h/
    cp circos_Moxi_18h_vs_Untreated_18h/circos.conf circos_Moxi_4h_vs_Untreated_4h/
    cp circos_Moxi_18h_vs_Untreated_18h/circos.conf circos_Mitomycin_18h_vs_Untreated_18h/
    cp circos_Moxi_18h_vs_Untreated_18h/circos.conf circos_Mitomycin_8h_vs_Untreated_8h/
    cp circos_Moxi_18h_vs_Untreated_18h/circos.conf circos_Mitomycin_4h_vs_Untreated_4h/

    (circos-env) circos -conf circos.conf
    #or (circos-env) /home/jhuang/mambaforge/envs/circos-env/bin/circos  -conf /mnt/md1/DATA/Data_JuliaFuchs_RNAseq_2025/results/star_salmon/degenes/circos/circos.conf

    #(circos-env) jhuang@WS-2290C:/mnt/md1/DATA/Data_JuliaFuchs_RNAseq_2025/results/star_salmon/degenes$ find . -name "*circos.png"
    mv ./circos_Moxi_4h_vs_Untreated_4h/circos.png circos_Moxi_4h_vs_Untreated_4h.png
    mv ./circos_Moxi_18h_vs_Untreated_18h/circos.png circos_Moxi_18h_vs_Untreated_18h.png
    mv ./circos_Moxi_8h_vs_Untreated_8h/circos.png circos_Moxi_8h_vs_Untreated_8h.png
    mv ./circos_Mitomycin_18h_vs_Untreated_18h/circos.png circos_Mitomycin_18h_vs_Untreated_18h.png
    mv ./circos_Mitomycin_8h_vs_Untreated_8h/circos.png circos_Mitomycin_8h_vs_Untreated_8h.png
    mv ./circos_Mitomycin_4h_vs_Untreated_4h/circos.png circos_Mitomycin_4h_vs_Untreated_4h.png
  1. touch karyotype.txt
    chr - CP052959.1 CP052959.1 0 2706926 grey
  1. touch circos.conf
    karyotype = karyotype.txt

<ideogram>

<spacing>
    default = 0.005r
    </spacing>

    radius       = 0.80r
    thickness    = 20p
    fill         = yes

    show_label   = yes
    label_radius = 1.05r
    label_size   = 30p
    label_font   = bold
    label_parallel = yes
    </ideogram>

    # --- Wichtig: Schalter auf Top-Level, NICHT im 
<ticks>-Block ---
    show_ticks       = yes
    show_tick_labels = yes

<ticks>
    # Starte direkt an der äußeren Ideogramm-Kante
    radius      = dims(ideogram,radius_outer)
    orientation = out          # Ticks nach außen zeichnen (oder 'in' für nach innen)
    color       = black
    thickness   = 2p
    size        = 8p

    # kleine Ticks alle 100 kb, ohne Label

<tick>
    spacing      = 50000
    size         = 8p
    thickness    = 3p
    color        = black
    show_label   = no
    </tick>

    # große Ticks alle 500 kb, mit Label in Mb

<tick>
    spacing      = 100000
    size         = 12p
    thickness    = 5p
    color        = black

    show_label   = yes
    label_size   = 18p
    label_offset = 6p
    multiplier   = 0.000001     # in Mb
    format       = %.1f
    suffix       = " Mb"
    </tick>

    </ticks>

<plots>

    # Density of up-regulated genes

<plot>
    show      = yes
    type      = histogram
    file      = data/density_up.txt
    r0        = 0.88r
    r1        = 0.78r
    fill_color = red
    thickness = 1p
    </plot>

    # Density of down-regulated genes

<plot>
    show      = yes
    type      = histogram
    file      = data/density_down.txt
    r0        = 0.78r
    r1        = 0.68r
    fill_color = blue
    thickness = 1p
    </plot>

    # Scatter of individual significantly DE genes

<plot>
    show           = yes
    type           = scatter
    file           = data/genes_scatter.txt
    r0             = 0.46r
    r1             = 0.76r
    glyph          = circle
    glyph_size     = 5p
    stroke_thickness = 0

    min            = -15
    max            =  15

<rules>

<rule>
    condition = var(value) > 0
    color     = red
    </rule>

<rule>
    condition = var(value) < 0
    color     = blue
    </rule>
    </rules>

    </plot>

    </plots>

<image>
    <<include etc/image.conf>>
    </image>

    <<include etc/colors_fonts_patterns.conf>>
    <<include etc/housekeeping.conf>>
  1. Rscript make_circos_from_deseq.R

     ############################################################
     # make_circos_from_deseq.R
     #
     # - Read DESeq2 results (annotated CSV)
     # - Read BED annotation
     # - Merge by GeneID
     # - Classify genes (up / down / ns)
     # - Create Circos input files:
     #     * genes_scatter.txt
     #     * density_up.txt
     #     * density_down.txt
     # - Create annotated versions of the above
     # - Export annotated tables into one Excel workbook
     ############################################################
    
     ###############################
     # 0. Single parameter to set
     ###############################
    
     # Just change this line for each comparison:
     #comparison <- "Moxi_18h_vs_Untreated_18h"
     # e.g.
     #comparison <- "Mitomycin_18h_vs_Untreated_18h"
     #comparison <- "Mitomycin_8h_vs_Untreated_8h"
     #comparison <- "Moxi_8h_vs_Untreated_8h"
     #comparison <- "Mitomycin_4h_vs_Untreated_4h"
     comparison <- "Moxi_4h_vs_Untreated_4h"
    
     ###############################
     # 0. Derived settings
     ###############################
    
     # DESeq2 result file (annotated)
     deseq_file <- paste0(comparison, "-all_annotated.csv")
    
     # BED file with gene coordinates (constant for this genome)
     bed_file   <- "CP052959_m.bed"
    
     # Create a filesystem-friendly directory name from comparison
     safe_comp  <- gsub("[^A-Za-z0-9_]+", "_", comparison)
    
     # Output directory for Circos files (e.g. circos_Moxi_18h_vs_Untreated_18h)
     out_dir    <- paste0("circos_", safe_comp)
    
     # Thresholds for significance
     padj_cutoff <- 0.05
     lfc_cutoff  <- 1      # |log2FC| >= 1
    
     # Bin size for density in bp (e.g. 10000 = 10 kb)
     bin_size <- 10000
    
     # Comparison label for Excel / plots (human-readable)
     comparison_label <- comparison
    
     options(scipen = 999) # turn off scientific notation
    
     ###############################
     # 1. Setup & packages
     ###############################
    
     if (!dir.exists(out_dir)) dir.create(out_dir, showWarnings = FALSE)
     data_dir <- file.path(out_dir, "data")
     if (!dir.exists(data_dir)) dir.create(data_dir, showWarnings = FALSE)
    
     # openxlsx for Excel export
     if (!requireNamespace("openxlsx", quietly = TRUE)) {
     stop("Package 'openxlsx' is required. Please install it with install.packages('openxlsx').")
     }
     library(openxlsx)
    
     ###############################
     # 2. Read data
     ###############################
    
     message("Reading DESeq2 results from: ", deseq_file)
     deseq <- read.csv(deseq_file, stringsAsFactors = FALSE)
    
     # Check required columns in DESeq2 table
     required_cols <- c("GeneID", "log2FoldChange", "padj")
     if (!all(required_cols %in% colnames(deseq))) {
     stop("DESeq2 table must contain columns: ", paste(required_cols, collapse = ", "))
     }
    
     message("Reading BED annotation from: ", bed_file)
     bed_cols <- c("chr","start","end","gene_id","score",
                 "strand","thickStart","thickEnd",
                 "itemRgb","blockCount","blockSizes","blockStarts")
    
     annot <- read.table(
     bed_file,
     header = FALSE,
     sep = "\t",
     stringsAsFactors = FALSE,
     col.names = bed_cols
     )
    
     ###############################
     # 3. Merge DESeq2 + annotation
     ###############################
    
     # Match DESeq2 GeneID (e.g. "gene-HJI06_09365") to BED gene_id
     merged <- merge(
     deseq,
     annot[, c("chr", "start", "end", "gene_id")],
     by.x = "GeneID",
     by.y = "gene_id"
     )
    
     if (nrow(merged) == 0) {
     stop("No overlap between DESeq2 GeneID and BED gene_id.")
     }
    
     # Midpoint of each gene
     merged$mid <- round((merged$start + merged$end) / 2)
    
     ###############################
     # 4. Classify genes
     ###############################
    
     merged$regulation <- "ns"
     merged$regulation[merged$padj < padj_cutoff & merged$log2FoldChange >=  lfc_cutoff]  <- "up"
     merged$regulation[merged$padj < padj_cutoff & merged$log2FoldChange <= -lfc_cutoff] <- "down"
    
     table_reg <- table(merged$regulation)
     message("Regulation counts: ", paste(names(table_reg), table_reg, collapse = " | "))
    
     ###############################
     # 5. Scatter files (per-gene)
     ###############################
    
     scatter <- merged[merged$regulation != "ns", ]
    
     # Circos scatter input: chr  start  end  value
     scatter_file <- file.path(data_dir, "genes_scatter.txt")
     scatter_out  <- scatter[, c("chr", "mid", "mid", "log2FoldChange")]
    
     write.table(scatter_out,
                 scatter_file,
                 quote = FALSE, sep = "\t",
                 row.names = FALSE, col.names = FALSE)
    
     message("Written Circos scatter file: ", scatter_file)
    
     # Annotated scatter for Excel / inspection
     scatter_annot <- scatter[, c(
     "chr",
     "mid",              # start
     "mid",              # end
     "log2FoldChange",
     "GeneID",
     "padj",
     "regulation"
     )]
     colnames(scatter_annot)[1:4] <- c("chr", "start", "end", "log2FoldChange")
    
     scatter_annot_file <- file.path(data_dir, "genes_scatter_annotated.tsv")
     write.table(scatter_annot,
                 scatter_annot_file,
                 quote = FALSE, sep = "\t",
                 row.names = FALSE, col.names = TRUE)
    
     message("Written annotated scatter file: ", scatter_annot_file)
    
     ###############################
     # 6. Density function (bins)
     ###############################
    
     bin_chr <- function(df_chr, bin_size, direction = c("up", "down")) {
     direction <- match.arg(direction)
    
     chr_name <- df_chr$chr[1]
     max_pos  <- max(df_chr$mid)
    
     # number of bins
     n_bins <- ceiling((max_pos + 1) / bin_size)
    
     starts <- seq(0, by = bin_size, length.out = n_bins)
     ends   <- starts + bin_size
    
     # init counts & gene lists
     counts    <- integer(n_bins)
     gene_list <- vector("list", n_bins)
    
     df_dir <- df_chr[df_chr$regulation == direction, ]
    
     if (nrow(df_dir) > 0) {
         # bin index for each gene
         bin_index <- floor(df_dir$mid / bin_size) + 1
         bin_index[bin_index < 1]        <- 1
         bin_index[bin_index > n_bins]   <- n_bins
    
         # accumulate counts and GeneIDs
         for (i in seq_len(nrow(df_dir))) {
         idx <- bin_index[i]
         counts[idx] <- counts[idx] + 1L
         gene_list[[idx]] <- c(gene_list[[idx]], df_dir$GeneID[i])
         }
     }
    
     gene_ids <- vapply(
         gene_list,
         function(x) {
         if (length(x) == 0) "" else paste(unique(x), collapse = ";")
         },
         character(1)
     )
    
     data.frame(
         chr      = chr_name,
         start    = as.integer(starts),
         end      = as.integer(ends),
         value    = as.integer(counts),
         gene_ids = gene_ids,
         stringsAsFactors = FALSE
     )
     }
    
     ###############################
     # 7. Density up/down for all chromosomes
     ###############################
    
     chr_list <- split(merged, merged$chr)
    
     density_up_list   <- lapply(chr_list, bin_chr, bin_size = bin_size, direction = "up")
     density_down_list <- lapply(chr_list, bin_size = bin_size, FUN = bin_chr, direction = "down")
    
     density_up   <- do.call(rbind, density_up_list)
     density_down <- do.call(rbind, density_down_list)
    
     # Plain Circos input (no gene_ids)
     density_up_file   <- file.path(data_dir, "density_up.txt")
     density_down_file <- file.path(data_dir, "density_down.txt")
    
     write.table(density_up[, c("chr", "start", "end", "value")],
                 density_up_file,
                 quote = FALSE, sep = "\t",
                 row.names = FALSE, col.names = FALSE)
    
     write.table(density_down[, c("chr", "start", "end", "value")],
                 density_down_file,
                 quote = FALSE, sep = "\t",
                 row.names = FALSE, col.names = FALSE)
    
     message("Written Circos density files: ",
             density_up_file, " and ", density_down_file)
    
     # Annotated density files (with gene_ids)
     density_up_annot_file   <- file.path(data_dir, "density_up_annotated.tsv")
     density_down_annot_file <- file.path(data_dir, "density_down_annotated.tsv")
    
     write.table(density_up,
                 density_up_annot_file,
                 quote = FALSE, sep = "\t",
                 row.names = FALSE, col.names = TRUE)
    
     write.table(density_down,
                 density_down_annot_file,
                 quote = FALSE, sep = "\t",
                 row.names = FALSE, col.names = TRUE)
    
     message("Written annotated density files: ",
             density_up_annot_file, " and ", density_down_annot_file)
    
     ###############################
     # 8. Export annotated tables to Excel
     ###############################
    
     excel_file <- file.path(
     out_dir,
     paste0("circos_annotations_", comparison_label, ".xlsx")
     )
    
     wb <- createWorkbook()
    
     addWorksheet(wb, "scatter_points")
     writeData(wb, "scatter_points", scatter_annot)
    
     addWorksheet(wb, "density_up")
     writeData(wb, "density_up", density_up)
    
     addWorksheet(wb, "density_down")
     writeData(wb, "density_down", density_down)
    
     saveWorkbook(wb, excel_file, overwrite = TRUE)
    
     message("Excel workbook written to: ", excel_file)
    
     message("Done.")

Leave a Reply

Your email address will not be published. Required fields are marked *