#!/usr/bin/env Rscript
# =============================================================================
# BioF3 Module 07 (教程 06): 细胞-细胞通讯真实示例 — PBMC 3k + CellChat
# =============================================================================
#
# 这个脚本在单细胞实践教程 06 章（细胞通讯）里用到。它：
#   1. 读 PBMC 3k 真实矩阵（同 module01/03/04/05 一致）
#   2. 做 Seurat 标准流程 + marker-based 粗注释
#   3. 用 CellChat + CellChatDB.human 推断细胞间通讯
#   4. 输出 6 张真实 PNG 到 static/img/tutorial/single-cell/module07/
#
# PBMC 3k 是健康外周血，细胞间通讯不如肿瘤微环境剧烈，但 T/B/NK/
# monocyte/DC 之间的 MHC-II、CD45、CXCR/CCR 一族信号足够当教学演示。
#
# 用法：
#   Rscript scripts/module07_cellchat_sci.R
#
# 可选环境变量：
#   BIOF3_DATA_DIR=/path/to/data
#   BIOF3_OUTPUT_DIR=/path/to/static/img/tutorial/single-cell/module07
#
# 依赖：
#   Matrix, Seurat, CellChat, ggplot2, dplyr, scales, RColorBrewer,
#   patchwork, igraph, ComplexHeatmap, NMF
# =============================================================================

options(stringsAsFactors = FALSE)
set.seed(42)

# ---- 路径 ----------------------------------------------------------------
args <- commandArgs(trailingOnly = FALSE)
file_arg <- grep("^--file=", args, value = TRUE)
script_path <- if (length(file_arg) > 0) {
  normalizePath(sub("^--file=", "", file_arg[[1]]), mustWork = TRUE)
} else {
  normalizePath("scripts/single-cell/sc07_cellchat_sci.R", mustWork = FALSE)
}
script_dir <- dirname(script_path)
project_root <- if (basename(script_dir) == "scripts" && basename(dirname(script_dir)) == "static") {
  dirname(dirname(script_dir))
} else {
  dirname(script_dir)
}

data_root <- Sys.getenv("BIOF3_DATA_DIR", file.path(path.expand("~"), "biof3-data"))
pbmc_dir  <- file.path(data_root, "pbmc3k")
output_dir <- Sys.getenv(
  "BIOF3_OUTPUT_DIR",
  file.path(project_root, "static", "img", "tutorial", "modules", "module07")
)

dir.create(pbmc_dir, recursive = TRUE, showWarnings = FALSE)
dir.create(output_dir, recursive = TRUE, showWarnings = FALSE)

pbmc_url <- paste0(
  "https://cf.10xgenomics.com/samples/cell-exp/1.1.0/pbmc3k/",
  "pbmc3k_filtered_gene_bc_matrices.tar.gz"
)
pbmc_tar <- file.path(pbmc_dir, "pbmc3k_filtered_gene_bc_matrices.tar.gz")
matrix_dir <- file.path(pbmc_dir, "filtered_gene_bc_matrices", "hg19")

# ---- 依赖 ---------------------------------------------------------------
suppressPackageStartupMessages({
  library(Matrix)
  library(Seurat)
  library(CellChat)
  library(patchwork)
  library(ggplot2)
  library(dplyr)
  library(scales)
  library(RColorBrewer)
  library(igraph)
})

# ---- 下载 PBMC 3k -------------------------------------------------------
if (!file.exists(pbmc_tar)) {
  message("下载 PBMC 3k...")
  download.file(pbmc_url, destfile = pbmc_tar, mode = "wb", quiet = FALSE)
}
if (!file.exists(file.path(matrix_dir, "matrix.mtx"))) {
  message("解压 PBMC 3k...")
  utils::untar(pbmc_tar, exdir = pbmc_dir)
}

# ---- 读入 + 预处理 ------------------------------------------------------
message("读取 PBMC 3k...")
counts <- Seurat::Read10X(data.dir = matrix_dir)
pbmc <- Seurat::CreateSeuratObject(counts, project = "PBMC3K",
                                   min.cells = 3, min.features = 200)
pbmc[["percent.mt"]] <- Seurat::PercentageFeatureSet(pbmc, pattern = "^MT-")
pbmc <- subset(pbmc, subset = nFeature_RNA > 200 & nFeature_RNA < 2500 & percent.mt < 5)

pbmc <- NormalizeData(pbmc, verbose = FALSE)
pbmc <- FindVariableFeatures(pbmc, selection.method = "vst", nfeatures = 2000, verbose = FALSE)
pbmc <- ScaleData(pbmc, verbose = FALSE)
pbmc <- RunPCA(pbmc, npcs = 30, verbose = FALSE)
pbmc <- FindNeighbors(pbmc, dims = 1:15, verbose = FALSE)
pbmc <- FindClusters(pbmc, resolution = 0.5, verbose = FALSE)

# 粗注释（同 module06 trajectory 的思路）
marker_signature <- list(
  "B"        = c("MS4A1", "CD79A"),
  "NK"       = c("GNLY", "NKG7"),
  "Monocyte" = c("CD14", "LYZ"),
  "T CD4"    = c("IL7R", "CCR7", "CD4"),
  "T CD8"    = c("CD8A", "CD8B"),
  "DC"       = c("FCER1A", "CST3"),
  "Platelet" = "PPBP"
)
for (ct in names(marker_signature)) {
  pbmc <- AddModuleScore(pbmc, features = list(marker_signature[[ct]]),
                          name = paste0("score_", ct), nbin = 12)
}
score_mat <- pbmc@meta.data[, paste0("score_", names(marker_signature), "1"), drop = FALSE]
colnames(score_mat) <- names(marker_signature)
pbmc$celltype <- factor(
  colnames(score_mat)[apply(score_mat, 1, which.max)],
  levels = names(marker_signature)
)

message("细胞数：", ncol(pbmc), "；每类细胞数：")
print(table(pbmc$celltype))

# ---- CellChat ------------------------------------------------------------
message("构建 CellChat 对象...")
data.input <- GetAssayData(pbmc, assay = "RNA", layer = "data")
meta <- data.frame(
  labels = pbmc$celltype,
  row.names = colnames(pbmc)
)
cellchat <- createCellChat(object = data.input, meta = meta, group.by = "labels")
cellchat@DB <- CellChatDB.human

message("过表达基因和 L-R 对...")
cellchat <- subsetData(cellchat)
future::plan("sequential")   # 避免 parallel 带来的问题
cellchat <- identifyOverExpressedGenes(cellchat)
cellchat <- identifyOverExpressedInteractions(cellchat)

message("计算通讯概率...")
cellchat <- computeCommunProb(cellchat, type = "triMean", trim = 0.1, raw.use = TRUE)
cellchat <- filterCommunication(cellchat, min.cells = 10)
cellchat <- computeCommunProbPathway(cellchat)
cellchat <- aggregateNet(cellchat)
cellchat <- netAnalysis_computeCentrality(cellchat, slot.name = "netP")

message("激活的信号通路：", paste(cellchat@netP$pathways, collapse = ", "))

# ---- 保存辅助 -----------------------------------------------------------
save_plot <- function(expr, name, width = 8, height = 6) {
  png(file.path(output_dir, name), width = width, height = height, units = "in", res = 300, bg = "white")
  on.exit(dev.off(), add = TRUE)
  print(expr)
  invisible(NULL)
}

save_base <- function(expr, name, width = 8, height = 6) {
  png(file.path(output_dir, name), width = width, height = height, units = "in", res = 300, bg = "white")
  expr
  dev.off()
}

# ---- 图 1：通讯数量圆圈图 ----------------------------------------------
message("图 1：通讯数量")
groupSize <- as.numeric(table(cellchat@idents))
save_base({
  netVisual_circle(
    cellchat@net$count,
    vertex.weight = groupSize,
    weight.scale = TRUE,
    label.edge = FALSE,
    title.name = "Number of interactions"
  )
}, "01-count-circle.png", width = 7.5, height = 7)

# ---- 图 2：通讯强度圆圈图 ----------------------------------------------
message("图 2：通讯强度")
save_base({
  netVisual_circle(
    cellchat@net$weight,
    vertex.weight = groupSize,
    weight.scale = TRUE,
    label.edge = FALSE,
    title.name = "Interaction strength"
  )
}, "02-weight-circle.png", width = 7.5, height = 7)

# ---- 图 3：每类细胞作为 sender 的通讯网络 -------------------------------
message("图 3：每类细胞的发送网络")
mat <- cellchat@net$weight
senders <- rownames(mat)
# 只画前 6 个 cluster（PBMC 只有 7 类，排除 Platelet 如果太少）
show_senders <- senders[1:min(6, length(senders))]

png(file.path(output_dir, "03-senders-grid.png"),
    width = 12, height = 8, units = "in", res = 300, bg = "white")
par(mfrow = c(2, 3), xpd = TRUE, mar = c(1, 1, 3, 1))
for (s in show_senders) {
  mat2 <- matrix(0, nrow = nrow(mat), ncol = ncol(mat), dimnames = dimnames(mat))
  mat2[s, ] <- mat[s, ]
  netVisual_circle(
    mat2,
    vertex.weight = groupSize,
    weight.scale = TRUE,
    edge.weight.max = max(mat),
    title.name = s
  )
}
dev.off()

# ---- 图 4：信号通路热图（outgoing / incoming）-------------------------
message("图 4：sender/receiver 角色热图")
ht1 <- netAnalysis_signalingRole_heatmap(
  cellchat, pattern = "outgoing",
  width = 5, height = 7, font.size = 10
)
ht2 <- netAnalysis_signalingRole_heatmap(
  cellchat, pattern = "incoming",
  width = 5, height = 7, font.size = 10
)
png(file.path(output_dir, "04-signaling-role.png"),
    width = 12, height = 8, units = "in", res = 300, bg = "white")
ComplexHeatmap::draw(ht1 + ht2, ht_gap = grid::unit(1, "cm"))
dev.off()

# ---- 图 5：MHC-II 信号通路（PBMC 里很可靠存在）-------------------------
message("图 5：MHC-II 信号通路")
pw <- cellchat@netP$pathways
pick_pathway <- intersect(c("MHC-II", "MHC-I", "APP", "CD45", "CLEC"), pw)[1]
if (is.na(pick_pathway)) pick_pathway <- pw[1]
message("  选通路：", pick_pathway)

save_base({
  netVisual_aggregate(cellchat, signaling = pick_pathway, layout = "chord")
}, "05-pathway-chord.png", width = 8.5, height = 8)

# ---- 图 6：气泡图（top interactions）-----------------------------------
message("图 6：top L-R 气泡图")
p6 <- netVisual_bubble(
  cellchat,
  sources.use    = seq_len(nlevels(cellchat@idents)),
  targets.use    = seq_len(nlevels(cellchat@idents)),
  remove.isolate = FALSE,
  angle.x        = 45,
  thresh         = 0.05
)
ggsave(file.path(output_dir, "06-bubble-topLR.png"), p6,
       width = 12, height = 10, dpi = 300, bg = "white")

# ---- 输出清单 -----------------------------------------------------------
message("=== 已生成 module07 细胞通讯图 ===")
print(list.files(output_dir, pattern = "\\.png$", full.names = FALSE))
message("输出目录：", normalizePath(output_dir))
message("完成。")

sessionInfo()
