#!/usr/bin/env Rscript
# =============================================================================
# BioF3 Proteomics Module 03: GO + Reactome enrichment on DEP results
# =============================================================================
#
# Runs DEP pipeline on UbiLength, then:
#   - Maps gene names to Entrez IDs
#   - GO BP over-representation on Ubi6 vs Ctrl DE proteins
#   - Reactome pathway enrichment
#   - GSEA on full ranked protein list
#   - Produces 6 figures
#
# Usage:
#   Rscript scripts/proteomics/prot03_enrichment_sci.R
# =============================================================================

options(stringsAsFactors = FALSE)
set.seed(42)

args <- commandArgs(trailingOnly = FALSE)
file_arg <- grep("^--file=", args, value = TRUE)
script_path <- if (length(file_arg) > 0) {
  normalizePath(sub("^--file=", "", file_arg[[1]]), mustWork = TRUE)
} else {
  normalizePath("scripts/proteomics/prot03_enrichment_sci.R", mustWork = FALSE)
}
script_dir <- dirname(script_path)
project_root <- dirname(dirname(script_dir))
output_dir <- Sys.getenv(
  "BIOF3_OUTPUT_DIR",
  file.path(project_root, "static", "img", "tutorial", "proteomics", "prot03")
)
dir.create(output_dir, recursive = TRUE, showWarnings = FALSE)

suppressPackageStartupMessages({
  library(DEP)
  library(clusterProfiler)
  library(org.Hs.eg.db)
  library(ReactomePA)
  library(enrichplot)
  library(ggplot2)
  library(dplyr)
  library(patchwork)
  library(stringr)
  library(SummarizedExperiment)
})

biof3_colors <- c(green="#0f766e", mint="#43d1ae", blue="#2563eb",
                  amber="#f59e0b", red="#dc2626", slate="#475569",
                  violet="#7c3aed", pink="#ec4899")
theme_biof3 <- function(base_size = 12) {
  theme_classic(base_size = base_size) +
    theme(axis.text = element_text(color = "#111827"),
          axis.title = element_text(color = "#111827", face = "bold"),
          plot.title = element_text(color = "#12342f", face = "bold", hjust = 0),
          plot.subtitle = element_text(color = "#526760", hjust = 0),
          legend.title = element_text(face = "bold"),
          strip.background = element_blank(),
          strip.text = element_text(color = "#12342f", face = "bold"))
}
save_plot <- function(p, name, width = 8, height = 5) {
  ggsave(file.path(output_dir, name), p, width = width, height = height, dpi = 300, bg = "white")
}

# ---- DEP pipeline (same as prot02) --------------------------------------
message("Running DEP pipeline ...")
data(UbiLength); data(UbiLength_ExpDesign)
data_unique <- make_unique(UbiLength, "Gene.names", "Protein.IDs", delim = ";")
lfq_cols <- grep("LFQ.intensity.", colnames(data_unique))
data_se <- make_se(data_unique, lfq_cols, UbiLength_ExpDesign)
data_filt <- filter_missval(data_se, thr = 0)
data_norm <- normalize_vsn(data_filt)
data_imp <- impute(data_norm, fun = "MinProb", q = 0.01)
data_diff <- test_diff(data_imp, type = "control", control = "Ctrl")
dep <- add_rejections(data_diff, alpha = 0.05, lfc = 1)

# ---- extract DE results for Ubi6 vs Ctrl --------------------------------
res_df <- as.data.frame(rowData(dep))
lfc_col <- grep("Ubi6_vs_Ctrl_diff", colnames(res_df), value = TRUE)[1]
p_col <- grep("Ubi6_vs_Ctrl_p.adj", colnames(res_df), value = TRUE)[1]
res_df$lfc <- res_df[[lfc_col]]
res_df$padj <- res_df[[p_col]]
res_df <- res_df |> filter(!is.na(padj))

# map gene names to Entrez
id_map <- AnnotationDbi::select(org.Hs.eg.db, keys = res_df$name,
                                keytype = "SYMBOL", columns = "ENTREZID") |>
  distinct(SYMBOL, .keep_all = TRUE) |> filter(!is.na(ENTREZID))
res_df <- res_df |> left_join(id_map, by = c("name" = "SYMBOL"))
message("Mapped proteins: ", sum(!is.na(res_df$ENTREZID)))

sig_up <- res_df |> filter(padj < 0.05 & lfc > 1) |> pull(ENTREZID) |> na.omit()
sig_down <- res_df |> filter(padj < 0.05 & lfc < -1) |> pull(ENTREZID) |> na.omit()
sig_all <- c(sig_up, sig_down)
universe <- res_df$ENTREZID[!is.na(res_df$ENTREZID)]
message("DE proteins (up/down): ", length(sig_up), " / ", length(sig_down))

# ---- figure 1: GO BP dotplot --------------------------------------------
message("Figure 1: GO BP dotplot")
ego <- enrichGO(gene = sig_all, universe = universe, OrgDb = org.Hs.eg.db,
                keyType = "ENTREZID", ont = "BP", pAdjustMethod = "BH",
                pvalueCutoff = 0.05, readable = TRUE)
if (!is.null(ego) && nrow(ego@result) > 0) {
  ego <- tryCatch(simplify(ego, cutoff = 0.7), error = function(e) ego)
  p1 <- dotplot(ego, showCategory = 15, label_format = 50) +
    scale_color_gradient(low = biof3_colors["red"], high = biof3_colors["blue"]) +
    labs(title = "GO biological process enrichment (Ubi6 vs Ctrl)",
         subtitle = "Top 15 BP terms from DE proteins (padj<0.05, |LFC|>1)") +
    theme_biof3()
  save_plot(p1, "01-go-bp-dotplot.png", width = 10, height = 8)
}

# ---- figure 2: Reactome pathway -----------------------------------------
message("Figure 2: Reactome pathway")
ereact <- enrichPathway(gene = sig_all, organism = "human", universe = universe,
                        pAdjustMethod = "BH", pvalueCutoff = 0.05, readable = TRUE)
if (!is.null(ereact) && nrow(ereact@result) > 0) {
  p2 <- dotplot(ereact, showCategory = 15, label_format = 50) +
    scale_color_gradient(low = biof3_colors["red"], high = biof3_colors["blue"]) +
    labs(title = "Reactome pathway enrichment (Ubi6 vs Ctrl)",
         subtitle = "Top 15 Reactome pathways") +
    theme_biof3()
  save_plot(p2, "02-reactome-dotplot.png", width = 10, height = 8)
} else {
  message("  No Reactome terms passed; skipping figure 2")
}

# ---- figure 3: GO emap ---------------------------------------------------
message("Figure 3: GO enrichment map")
if (!is.null(ego) && nrow(ego@result) > 0) {
  ego_pw <- pairwise_termsim(ego)
  p3 <- emapplot(ego_pw, showCategory = 20) +
    labs(title = "GO BP term similarity network",
         subtitle = "Connected terms share DE proteins; clusters = related biology") +
    theme_biof3() +
    theme(axis.text = element_blank(), axis.ticks = element_blank(),
          axis.title = element_blank(), axis.line = element_blank())
  save_plot(p3, "03-go-emap.png", width = 10, height = 8)
}

# ---- figure 4: up vs down GO comparison ---------------------------------
message("Figure 4: up vs down GO comparison")
ego_up <- enrichGO(gene = sig_up, universe = universe, OrgDb = org.Hs.eg.db,
                   keyType = "ENTREZID", ont = "BP", pvalueCutoff = 0.1, readable = TRUE)
ego_down <- enrichGO(gene = sig_down, universe = universe, OrgDb = org.Hs.eg.db,
                     keyType = "ENTREZID", ont = "BP", pvalueCutoff = 0.1, readable = TRUE)

up_terms <- if (!is.null(ego_up) && nrow(ego_up@result) > 0) {
  head(ego_up@result |> arrange(p.adjust), 8) |>
    mutate(direction = "Up", neglogp = -log10(p.adjust),
           Description = str_trunc(Description, 50))
} else { data.frame() }

down_terms <- if (!is.null(ego_down) && nrow(ego_down@result) > 0) {
  head(ego_down@result |> arrange(p.adjust), 8) |>
    mutate(direction = "Down", neglogp = -log10(p.adjust),
           Description = str_trunc(Description, 50))
} else { data.frame() }

cmp_df <- bind_rows(up_terms, down_terms)
if (nrow(cmp_df) > 0) {
  p4 <- ggplot(cmp_df, aes(x = reorder(Description, neglogp), y = neglogp, fill = direction)) +
    geom_col(width = 0.7) +
    coord_flip() +
    facet_wrap(~ direction, scales = "free_y", ncol = 1) +
    scale_fill_manual(values = c(Up = biof3_colors[["red"]], Down = biof3_colors[["blue"]]), guide = "none") +
    labs(title = "GO BP: upregulated vs downregulated proteins",
         subtitle = "Separate enrichment for up and down DE proteins",
         x = NULL, y = "-log10(p.adjust)") +
    theme_biof3()
  save_plot(p4, "04-go-up-vs-down.png", width = 11, height = 8)
}

# ---- figure 5: GSEA on ranked list --------------------------------------
message("Figure 5: GSEA GO BP")
rnk <- res_df |> filter(!is.na(ENTREZID) & !is.na(padj) & padj > 0) |>
  mutate(stat = sign(lfc) * -log10(padj)) |> arrange(desc(stat))
rank_vec <- setNames(rnk$stat, rnk$ENTREZID)
rank_vec <- rank_vec[!duplicated(names(rank_vec))]

gsea_go <- gseGO(geneList = rank_vec, OrgDb = org.Hs.eg.db, ont = "BP",
                 keyType = "ENTREZID", pAdjustMethod = "BH", pvalueCutoff = 0.25,
                 minGSSize = 10, maxGSSize = 500, verbose = FALSE, seed = TRUE)
message("GSEA terms passing: ", nrow(gsea_go@result))

if (nrow(gsea_go@result) > 0) {
  gsea_df <- gsea_go@result |> arrange(desc(NES))
  top_nes <- bind_rows(head(gsea_df, 8), tail(gsea_df, 8)) |>
    distinct(ID, .keep_all = TRUE) |>
    mutate(direction = ifelse(NES > 0, "Activated", "Suppressed"),
           Description = str_trunc(Description, 50))
  p5 <- ggplot(top_nes, aes(x = reorder(Description, NES), y = NES, fill = direction)) +
    geom_col(width = 0.7) + coord_flip() +
    scale_fill_manual(values = c(Activated = biof3_colors[["red"]],
                                 Suppressed = biof3_colors[["blue"]])) +
    labs(title = "GSEA (GO BP): top activated and suppressed terms",
         subtitle = "Ranked by NES; proteins ranked by sign(LFC)*-log10(padj)",
         x = NULL, y = "NES", fill = NULL) +
    theme_biof3()
  save_plot(p5, "05-gsea-waterfall.png", width = 11, height = 7)
}

# ---- figure 6: GSEA running-score for top term --------------------------
message("Figure 6: GSEA running-score")
if (nrow(gsea_go@result) > 0) {
  pick_id <- gsea_go@result$ID[which.max(abs(gsea_go@result$NES))]
  p6 <- gseaplot2(gsea_go, geneSetID = pick_id, pvalue_table = TRUE,
                  color = biof3_colors[["red"]])
  ggsave(file.path(output_dir, "06-gsea-running-score.png"),
         p6, width = 10, height = 7, dpi = 300, bg = "white")
}

message("=== proteomics prot03 enrichment figures ===")
print(list.files(output_dir, pattern = "\\.png$", full.names = FALSE))
message("Done.")
sessionInfo()
