#!/usr/bin/env Rscript
# =============================================================================
# BioF3 Module 09 (教程 08): TCR/BCR 真实示例 — scRepertoire + 配套 Seurat
# =============================================================================
#
# 这个脚本在单细胞实践教程 08 章（TCR/BCR 测序分析）里用到。它：
#   1. 使用 scRepertoire 包自带的 8 个 10x VDJ 样本（P17~P20，B/L 配对）
#   2. 用 combineTCR 合并并注解样本来源
#   3. 算克隆数量、丰度、稳态、多样性、样本间共享
#   4. 把 clonotype 合并进配套的 500 细胞 Seurat 对象，画 UMAP 上的克隆扩增分布
#   5. 输出 6 张真实 PNG 到 static/img/tutorial/single-cell/module09/
#
# 数据来源：
#   scRepertoire::contig_list —— 8 个样本的 filtered_contig_annotations
#   scRepertoire::scRep_example —— 配套的 500 细胞 Seurat（已 UMAP + 聚类）
#   都是包内数据，无需下载
#
# 用法：
#   Rscript scripts/module09_tcr_sci.R
#
# 可选环境变量：
#   BIOF3_OUTPUT_DIR=/path/to/static/img/tutorial/single-cell/module09
#
# 依赖：
#   scRepertoire (>= 2.0), Seurat, ggplot2, dplyr, patchwork
# =============================================================================

options(stringsAsFactors = FALSE)
set.seed(42)

# ---- 路径 ----------------------------------------------------------------
args <- commandArgs(trailingOnly = FALSE)
file_arg <- grep("^--file=", args, value = TRUE)
script_path <- if (length(file_arg) > 0) {
  normalizePath(sub("^--file=", "", file_arg[[1]]), mustWork = TRUE)
} else {
  normalizePath("scripts/single-cell/sc09_tcr_sci.R", mustWork = FALSE)
}
script_dir <- dirname(script_path)
project_root <- if (basename(script_dir) == "scripts" && basename(dirname(script_dir)) == "static") {
  dirname(dirname(script_dir))
} else {
  dirname(script_dir)
}
output_dir <- Sys.getenv(
  "BIOF3_OUTPUT_DIR",
  file.path(project_root, "static", "img", "tutorial", "modules", "module09")
)
dir.create(output_dir, recursive = TRUE, showWarnings = FALSE)

# ---- 依赖 ----------------------------------------------------------------
suppressPackageStartupMessages({
  library(scRepertoire)
  library(Seurat)
  library(ggplot2)
  library(dplyr)
  library(patchwork)
})

biof3_colors <- c(
  green = "#0f766e", mint  = "#43d1ae", blue  = "#2563eb",
  amber = "#f59e0b", red   = "#dc2626", slate = "#475569",
  violet = "#7c3aed", pink = "#ec4899"
)

theme_biof3 <- function(base_size = 12) {
  theme_classic(base_size = base_size) +
    theme(
      axis.text  = element_text(color = "#111827"),
      axis.title = element_text(color = "#111827", face = "bold"),
      plot.title = element_text(color = "#12342f", face = "bold", hjust = 0),
      plot.subtitle = element_text(color = "#526760", hjust = 0),
      legend.title = element_text(face = "bold"),
      legend.position = "right",
      strip.background = element_blank(),
      strip.text = element_text(color = "#12342f", face = "bold")
    )
}

save_plot <- function(p, name, width = 8, height = 5) {
  ggsave(file.path(output_dir, name), p, width = width, height = height, dpi = 300, bg = "white")
}

# ---- 载入内置数据 --------------------------------------------------------
data("contig_list")
data("scRep_example")

# 8 个样本：P17/P18/P19/P20 各自 B（外周血）和 L（病灶/淋巴）
sample_ids <- c("P17B", "P17L", "P18B", "P18L", "P19B", "P19L", "P20B", "P20L")
patient_ids <- substr(sample_ids, 1, 3)    # "P17","P17","P18"...
tissue_ids  <- substr(sample_ids, 4, 4)    # "B","L","B","L"...

message("8 个样本的 contig 行数：")
print(sapply(contig_list, nrow))

# ---- combineTCR ---------------------------------------------------------
message("combineTCR: 合并并注解来源...")
combined <- combineTCR(
  contig_list,
  samples = sample_ids,
  removeNA = FALSE,
  removeMulti = FALSE
)
# 之后很多 group.by 要用 sample 列，这里手动补一列 tissue 供后面自定义用
for (i in seq_along(combined)) {
  combined[[i]]$tissue <- tissue_ids[i]
}
message("合并后 combined 长度：", length(combined))
message("第一个样本的列：", paste(colnames(combined[[1]]), collapse = ", "))

# ---- 图 1：各样本独立克隆数 -------------------------------------------
message("图 1：clonalQuant — 各样本独立克隆数")
p1 <- clonalQuant(combined, cloneCall = "strict", chain = "both", scale = TRUE) +
  scale_fill_manual(values = unname(biof3_colors), guide = "none") +
  labs(title = "Unique clonotypes per sample (scaled)",
       subtitle = "Unique clonotypes / total cells; removes sample-size bias",
       x = NULL, y = "Unique clones / total cells") +
  theme_biof3() +
  theme(axis.text.x = element_text(angle = 45, hjust = 1))
save_plot(p1, "01-clonal-quant.png", width = 9, height = 5)

# ---- 图 2：克隆丰度分布 -----------------------------------------------
message("图 2：clonalAbundance — 克隆丰度")
p2 <- clonalAbundance(combined, cloneCall = "strict", scale = FALSE) +
  scale_color_manual(values = rep(unname(biof3_colors), length.out = length(combined))) +
  labs(title = "Clonal abundance distribution",
       subtitle = "Clones ranked by frequency; a longer tail indicates stronger expansion",
       x = "Clone frequency rank", y = "Number of clones") +
  theme_biof3()
save_plot(p2, "02-clonal-abundance.png", width = 9, height = 5.5)

# ---- 图 3：克隆稳态 -----------------------------------------------------
message("图 3：clonalHomeostasis — 克隆按大小分档")
p3 <- clonalHomeostasis(
  combined,
  cloneCall = "strict",
  cloneSize = c(Rare = 1e-4, Small = 0.001, Medium = 0.01, Large = 0.1, Hyperexpanded = 1)
) +
  labs(title = "Clonal homeostasis by size category",
       subtitle = "Higher Hyperexpanded fraction = stronger antigen-driven expansion",
       x = NULL, y = "Relative proportion") +
  theme_biof3() +
  theme(axis.text.x = element_text(angle = 45, hjust = 1))
save_plot(p3, "03-clonal-homeostasis.png", width = 10, height = 5.5)

# ---- 图 4：克隆多样性 --------------------------------------------------
message("图 4：clonalDiversity — Shannon 多样性")
p4 <- clonalDiversity(
  combined,
  cloneCall = "strict",
  group.by  = "sample",
  metric    = "shannon"
) +
  scale_color_manual(values = rep(unname(biof3_colors), length.out = length(combined)), guide = "none") +
  labs(title = "Shannon diversity per sample",
       subtitle = "Lower Shannon = repertoire dominated by a few large clones",
       x = NULL, y = "Shannon index") +
  theme_biof3() +
  theme(axis.text.x = element_text(angle = 45, hjust = 1))
save_plot(p4, "04-clonal-diversity.png", width = 9, height = 5.5)

# ---- 图 5：样本间克隆共享 ---------------------------------------------
message("图 5：clonalOverlap — 样本间共享克隆")
p5 <- clonalOverlap(combined, cloneCall = "strict", method = "morisita") +
  scale_fill_gradient(low = "white", high = biof3_colors["green"], na.value = "white") +
  labs(title = "Pairwise clonal overlap (Morisita index)",
       subtitle = "Closer to 1 = more shared dominant clones; same-patient B/L pairs are highest",
       x = NULL, y = NULL) +
  theme_biof3() +
  theme(axis.text.x = element_text(angle = 45, hjust = 1))
save_plot(p5, "05-clonal-overlap.png", width = 8.5, height = 6.5)

# ---- 图 6：克隆在 UMAP 上的分布 ---------------------------------------
message("图 6：combineExpression + DimPlot — 克隆扩增在 UMAP 上的分布")
# scRep_example 的 barcodes 是 "P17B_xxx-1" 格式，combined 做完也是这种带前缀的 id
sc <- combineExpression(
  combined, scRep_example,
  cloneCall = "strict",
  group.by  = "sample",
  proportion = TRUE,
  cloneSize  = c(Rare = 1e-4, Small = 0.001, Medium = 0.01, Large = 0.1, Hyperexpanded = 1)
)

message("combineExpression 后新增列：")
print(setdiff(colnames(sc@meta.data), colnames(scRep_example@meta.data)))

# cloneSize 是一个 factor，控制颜色顺序
clone_size_levels <- c("Rare (0 < X <= 1e-04)",
                       "Small (1e-04 < X <= 0.001)",
                       "Medium (0.001 < X <= 0.01)",
                       "Large (0.01 < X <= 0.1)",
                       "Hyperexpanded (0.1 < X <= 1)")
clone_size_colors <- c("#cbd5e1", biof3_colors["mint"], biof3_colors["amber"],
                       biof3_colors["red"],  biof3_colors["violet"])

p6 <- DimPlot(sc, group.by = "cloneSize", order = TRUE, pt.size = 0.7) +
  scale_color_manual(values = clone_size_colors, na.value = "#e5e7eb",
                     breaks = clone_size_levels) +
  labs(title = "Clonal expansion on UMAP",
       subtitle = "Each dot is a T cell; color indicates clone-size category") +
  theme_biof3()
save_plot(p6, "06-umap-clone-size.png", width = 9.5, height = 6)

# ---- 输出清单 -----------------------------------------------------------
message("=== 已生成 module09 TCR 图 ===")
print(list.files(output_dir, pattern = "\\.png$", full.names = FALSE))
message("输出目录：", normalizePath(output_dir))
message("完成。")

sessionInfo()
