#!/usr/bin/env Rscript
# =============================================================================
# BioF3 Epigenomics Module 03: DiffBind differential binding analysis
# =============================================================================
#
# Uses DiffBind built-in tamoxifen ER ChIP-seq data (11 samples,
# Responsive vs Resistant breast cancer cell lines) to demonstrate:
#   1. Sample correlation heatmap
#   2. PCA on binding affinity
#   3. Differential binding analysis (DESeq2 backend)
#   4. MA plot of differential peaks
#   5. Volcano plot
#   6. Binding affinity heatmap of top DB sites
#
# Usage:
#   Rscript scripts/epigenomics/epi03_diffbind_sci.R
# =============================================================================

options(stringsAsFactors = FALSE)
set.seed(42)

args <- commandArgs(trailingOnly = FALSE)
file_arg <- grep("^--file=", args, value = TRUE)
script_path <- if (length(file_arg) > 0) {
  normalizePath(sub("^--file=", "", file_arg[[1]]), mustWork = TRUE)
} else {
  normalizePath("scripts/epigenomics/epi03_diffbind_sci.R", mustWork = FALSE)
}
script_dir <- dirname(script_path)
project_root <- dirname(dirname(script_dir))
output_dir <- Sys.getenv(
  "BIOF3_OUTPUT_DIR",
  file.path(project_root, "static", "img", "tutorial", "epigenomics", "epi03")
)
dir.create(output_dir, recursive = TRUE, showWarnings = FALSE)

suppressPackageStartupMessages({
  library(DiffBind)
  library(ggplot2)
  library(dplyr)
  library(patchwork)
  library(pheatmap)
  library(RColorBrewer)
})

biof3_colors <- c(green="#0f766e", mint="#43d1ae", blue="#2563eb",
                  amber="#f59e0b", red="#dc2626", slate="#475569",
                  violet="#7c3aed", pink="#ec4899")
theme_biof3 <- function(base_size = 12) {
  theme_classic(base_size = base_size) +
    theme(axis.text = element_text(color = "#111827"),
          axis.title = element_text(color = "#111827", face = "bold"),
          plot.title = element_text(color = "#12342f", face = "bold", hjust = 0),
          plot.subtitle = element_text(color = "#526760", hjust = 0),
          legend.title = element_text(face = "bold"),
          strip.background = element_blank(),
          strip.text = element_text(color = "#12342f", face = "bold"))
}
save_plot <- function(p, name, width = 8, height = 5) {
  ggsave(file.path(output_dir, name), p, width = width, height = height, dpi = 300, bg = "white")
}

# ---- load data -----------------------------------------------------------
message("Loading DiffBind tamoxifen data ...")
data(tamoxifen_counts)
message("Samples: ", length(tamoxifen$samples$SampleID))
message("Consensus peaks: ", tamoxifen$totalMerged)

# ---- figure 1: sample correlation heatmap --------------------------------
message("Figure 1: sample correlation heatmap")
png(file.path(output_dir, "01-correlation.png"),
    width = 9, height = 7, units = "in", res = 300, bg = "white")
plot(tamoxifen, main = "Sample correlation (binding affinity)")
dev.off()

# ---- figure 2: PCA -------------------------------------------------------
message("Figure 2: PCA")
p2 <- dba.plotPCA(tamoxifen, attributes = DBA_CONDITION, label = DBA_ID)
if (inherits(p2, "gg")) {
  p2 <- p2 + labs(title = "PCA on binding affinity") + theme_biof3()
  save_plot(p2, "02-pca.png", width = 9, height = 6)
} else {
  png(file.path(output_dir, "02-pca.png"), width = 9, height = 6, units = "in", res = 300, bg = "white")
  dba.plotPCA(tamoxifen, attributes = DBA_CONDITION, label = DBA_ID)
  dev.off()
}

# ---- differential analysis -----------------------------------------------
message("Running differential binding (Responsive vs Resistant) ...")
tamoxifen <- dba.contrast(tamoxifen, categories = DBA_CONDITION)
tamoxifen <- dba.analyze(tamoxifen, method = DBA_DESEQ2)
db_report <- dba.report(tamoxifen)
message("DB sites (FDR < 0.05): ", length(db_report))

# ---- figure 3: MA plot ---------------------------------------------------
message("Figure 3: MA plot")
p3 <- dba.plotMA(tamoxifen, method = DBA_DESEQ2)
if (inherits(p3, "gg")) {
  p3 <- p3 + labs(title = "MA plot: Responsive vs Resistant") + theme_biof3()
  save_plot(p3, "03-ma-plot.png", width = 9, height = 6)
} else {
  png(file.path(output_dir, "03-ma-plot.png"), width = 9, height = 6, units = "in", res = 300, bg = "white")
  dba.plotMA(tamoxifen, method = DBA_DESEQ2)
  dev.off()
}

# ---- figure 4: volcano ---------------------------------------------------
message("Figure 4: volcano")
p4 <- dba.plotVolcano(tamoxifen, method = DBA_DESEQ2)
if (inherits(p4, "gg")) {
  p4 <- p4 + labs(title = "Volcano: Responsive vs Resistant") + theme_biof3()
  save_plot(p4, "04-volcano.png", width = 9, height = 6)
} else {
  png(file.path(output_dir, "04-volcano.png"), width = 9, height = 6, units = "in", res = 300, bg = "white")
  dba.plotVolcano(tamoxifen, method = DBA_DESEQ2)
  dev.off()
}

# ---- figure 5: binding affinity heatmap of DB sites ----------------------
message("Figure 5: DB sites heatmap")
png(file.path(output_dir, "05-db-heatmap.png"),
    width = 9, height = 7, units = "in", res = 300, bg = "white")
dba.plotHeatmap(tamoxifen, contrast = 1, correlations = FALSE,
               main = "Top differential binding sites")
dev.off()

# ---- figure 6: Venn of DB sites by direction -----------------------------
message("Figure 6: DB sites by direction")
png(file.path(output_dir, "06-db-venn.png"),
    width = 8, height = 6, units = "in", res = 300, bg = "white")
dba.plotVenn(tamoxifen, contrast = 1, bDB = TRUE, bGain = TRUE, bLoss = TRUE,
            main = "Gained vs Lost binding sites")
dev.off()

message("=== epigenomics epi03 DiffBind figures ===")
print(list.files(output_dir, pattern = "\\.png$", full.names = FALSE))
message("Done.")
sessionInfo()
