当前位置: 首页 > news >正文

WNN 多模态整合 | Seurat 单细胞多组学整合流程

测试环境:CentOS7.9, R4.3.2, Seurat 4.4.0, SeuratObject 4.1.4
2024.10.23

# WNN
library(ggplot2)
library(dplyr)
library(patchwork)

1. 导入数据

(1). load counts of RNA and protein

dyn.load('/home/wangjl/.local/lib/libhdf5_hl.so.100')
library(hdf5r)library(Seurat)
dat=Read10X_h5("/datapool/wangjl/others/hanlu/raw/GSE210079/GSM6459763_32-3mo_raw_feature_bc_matrix.h5")
str(dat)
names(dat) #"Gene Expression"  "Antibody Capture" #两个矩阵:RNA和 55个蛋白str(dat$`Gene Expression`)
dat$`Gene Expression`[1:4, 1:5]# make sure cell id are the same
all.equal(colnames(dat[["Gene Expression"]]), colnames(dat[["Antibody Capture"]])) #T

(2). use RNA data to create Obj

scRNA=CreateSeuratObject(counts = dat$`Gene Expression`, project = "A1")

(3). add protein mat

# https://zhuanlan.zhihu.com/p/567253121
adt_assay <- CreateAssayObject(counts = dat$`Antibody Capture`)
scRNA[["ADT"]] <- adt_assay# (4). check
# protein names
rownames(scRNA[["ADT"]])# assays
Assays(scRNA) #"RNA" "ADT"# check default assay, or change default assay
DefaultAssay(scRNA) #"RNA"

2. 每个模态分别分析

要分别分析到PCA结束。

bm=scRNA## QC ====
bm #655671
bm[["percent.mt"]] <- PercentageFeatureSet(bm, pattern = "^MT-")# VlnPlot(bm, features = c("nFeature_RNA", "nCount_RNA", "percent.mt"), ncol = 3)plot1 <- FeatureScatter(bm, feature1 = "nCount_RNA", feature2 = "percent.mt")
plot2 <- FeatureScatter(bm, feature1 = "nCount_RNA", feature2 = "nFeature_RNA")
(plot1 + geom_hline(yintercept = 10, linetype=2, color="red") ) + (plot2 + geom_hline(yintercept = c(300, 5000), linetype=2, color="red")) #fig1

在这里插入图片描述
Fig1

(1)Filter

bm <- subset(bm, subset = nFeature_RNA > 300 & nFeature_RNA < 5000 & percent.mt < 10)
bm #7837

(2)for RNA

DefaultAssay(bm) <- 'RNA'
bm <- NormalizeData(bm) %>% FindVariableFeatures(nfeatures = 3000) %>% ScaleData() %>% RunPCA(dims = 1:50)
DimPlot(bm, reduction = 'pca')
ElbowPlot(bm, ndims = 50) #fig2

(3)for protein

DefaultAssay(bm) <- 'ADT'
# we will use all ADT features for dimensional reduction
# we set a dimensional reduction name to avoid overwriting the 
VariableFeatures(bm) <- rownames(bm[["ADT"]])
bm <- NormalizeData(bm, normalization.method = 'CLR', margin = 2) %>% ScaleData() %>% RunPCA(reduction.name = 'apca')
ElbowPlot(bm, ndims = 50, reduction = "apca") #fig2

在这里插入图片描述
Fig2

3. 整合模态

# Identify multimodal neighbors. These will be stored in the neighbors slot, 
# and can be accessed using bm[['weighted.nn']]  加权最近邻
# The WNN graph can be accessed at bm[["wknn"]],  加权knn图
# and the SNN graph used for clustering at bm[["wsnn"]] 加权snn图
# Cell-specific modality weights can be accessed at bm$RNA.weight #模态的权重
bm2=bm
bm2 <- FindMultiModalNeighbors(bm,reduction.list = list("pca", "apca"),dims.list = list(1:30, 1:20)#modality.weight.name = c("RNA.weight", "ADT.weight")# 模态权重名字 要和 reduction.list 长度一致,否则会使用默认:assay + ".weight"
)
bm2@graphs |> names() #[1] "wknn" "wsnn"

4. 基于wnn的下游分析

(1)UMAP和细胞分群

bm2 <- RunUMAP(bm2, nn.name = "weighted.nn", reduction.name = "wnn.umap", reduction.key = "wnnUMAP_")
bm2 <- FindClusters(bm2, graph.name = "wsnn", algorithm = 1, resolution = 0.7, verbose = T)
#0.3,0.4,0.6 too small;  0.8 too large;p1=DimPlot(bm2, reduction = 'wnn.umap', label=T, group.by = 'wsnn_res.0.7') + ggtitle("WNN"); p1 #fig3

在这里插入图片描述
Fig3 (same as Fig8)

(2)模态权重:按cluster统计

head(bm3@meta.data)
VlnPlot(bm3, features = c("RNA.weight", "nFeature_RNA", "ADT.weight", "nFeature_ADT"), group.by = 'wsnn_res.0.7', sort = F, #是否排序pt.size = 0, ncol = 2) +NoLegend() #Fig3B# 每个细胞的2个模态中的权重和为1
all( abs((bm3@meta.data$RNA.weight + bm3@meta.data$ADT.weight) -1) < 1e-10) #T 

在这里插入图片描述
Fig3B

5. 和单一模态的比较

bm3=bm2
DefaultAssay(bm3)="RNA" #RNA
DefaultAssay(bm3) #RNA

(1) 单模态UMAP

bm3 <- RunUMAP(bm3, reduction = 'pca', dims = 1:30, assay = 'RNA', reduction.name = 'rna.umap', reduction.key = 'rnaUMAP_')
bm3 <- RunUMAP(bm3, reduction = 'apca', dims = 1:20, assay = 'ADT', reduction.name = 'adt.umap', reduction.key = 'adtUMAP_')
bm3@reductions |> names() #[1] "pca"      "apca"     "wnn.umap" "rna.umap" "adt.umap"p2 <- DimPlot(bm3, reduction = 'rna.umap', #group.by = 'celltype.l2', label = TRUE, #label.size = 2.5,repel = TRUE) + ggtitle("RNA") + NoLegend() 
p3 <- DimPlot(bm3, reduction = 'adt.umap', #group.by = 'celltype.l2', label = TRUE, #label.size = 2.5,repel = TRUE) + ggtitle("ADT")+ NoLegend()
p2 + p3 + p1 #Fig3if(0){
p4 <- FeaturePlot(bm3, features = c("adt_CD45RA","adt_CD14.1","adt_CD161"),reduction = 'wnn.umap', max.cutoff = 2, cols = c("lightgrey","darkgreen"), ncol = 3)
p5 <- FeaturePlot(bm3, features = c("rna_PTPRC", "rna_CD14", "rna_KLRB1"), reduction = 'wnn.umap', max.cutoff = 3, ncol = 3)
p4 / p5
}grep("CD45", bm3@assays$ADT@var.features, value=T) #"CD45RA" "CD45"   "CD4.1"  "CD45RO"
grep("FCGR3A", rownames(bm3@assays$RNA@counts), value=T)
FeatureScatter(bm3, feature1 = "adt_CD4.1", feature2 = "adt_CD8a")
FeatureScatter(bm3, feature1 = "adt_CD45", feature2 = "adt_CD8a") #Fig4

在这里插入图片描述
Fig4

#RNA UMAP
pC1=FeaturePlot(bm3, features = c("adt_CD45RA","adt_CD45RO", "adt_CD3","adt_CD4.1", "adt_CD8a", "adt_CD19.1"),reduction = 'rna.umap', max.cutoff = 2, cols = c("lightgrey","darkgreen"), ncol = 6) & NoLegend(); pC1
pC2=FeaturePlot(bm3, features = c("rna_PTPRC", "rna_CCR7", "rna_CD3D", "rna_CD4", "rna_CD8A", "rna_CD19"),reduction = 'rna.umap', max.cutoff = 2, cols = c("lightgrey","navy"), ncol = 6)& NoLegend(); pC2
pC1 / pC2 #Fig5

在这里插入图片描述
Fig5

#ADT UMAP
pC1=FeaturePlot(bm3, features = c("adt_CD45RA","adt_CD45RO", "adt_CD3","adt_CD4.1", "adt_CD8a", "adt_CD19.1"),reduction = 'adt.umap', max.cutoff = 2, cols = c("lightgrey","darkgreen"), ncol = 6) & NoLegend(); pC1
pC2=FeaturePlot(bm3, features = c("rna_PTPRC", "rna_CCR7", "rna_CD3D", "rna_CD4", "rna_CD8A", "rna_CD19"),reduction = 'adt.umap', max.cutoff = 2, cols = c("lightgrey","navy"), ncol = 6)& NoLegend(); pC2
pC1 / pC2 #Fig6

在这里插入图片描述
Fig6

# WNN
pC1=FeaturePlot(bm3, features = c("adt_CD45RA","adt_CD45RO", "adt_CD3","adt_CD4.1", "adt_CD8a", "adt_CD19.1"),reduction = 'wnn.umap', max.cutoff = 2, cols = c("lightgrey","darkgreen"), ncol = 6) & NoLegend(); pC1
pC2=FeaturePlot(bm3, features = c("rna_PTPRC", "rna_CCR7", "rna_CD3D", "rna_CD4", "rna_CD8A", "rna_CD19"),reduction = 'wnn.umap', max.cutoff = 2, cols = c("lightgrey","navy"), ncol = 6)& NoLegend(); pC2
pC1 / pC2 #Fig7

在这里插入图片描述
Fig7 效果似乎不好,CD4+和CD8+依旧不清晰。
也没有其他更优的参数可以调试。
也就是wnn不一定适合所有该类型(RNA + ADT)的样本。

(2) 单模态细胞聚类/cell cluster

DefaultAssay(bm3)="RNA"
bm3@graphs |> names() #[1] "wknn" "wsnn"
bm3 <- FindNeighbors(bm3, dims = 1:30, reduction = "pca")
bm3@graphs |> names() ##[1] "wknn"    "wsnn"    "RNA_nn"  "RNA_snn"
bm3 <- FindClusters(bm3, graph.name = "RNA_snn", algorithm = 1, resolution = 0.5)
table(bm3@meta.data$RNA_snn_res.0.5)DefaultAssay(bm3)="ADT"
DefaultAssay(bm3) #ADT
bm3 <- FindNeighbors(bm3, dims = 1:20, reduction = "apca")
bm3@graphs |> names() #[1] "wknn"    "wsnn"    "RNA_nn"  "RNA_snn" "ADT_nn"  "ADT_snn"
bm3 <- FindClusters(bm3, graph.name = "ADT_snn", algorithm = 1, resolution = 0.5)
table(bm3@meta.data$ADT_snn_res.0.5)pB1 <- DimPlot(bm3, reduction = 'rna.umap', group.by = 'RNA_snn_res.0.5', label = TRUE, #label.size = 2.5,repel = F) + ggtitle("RNA umap & its cluster")
pB2 <- DimPlot(bm3, reduction = 'adt.umap', group.by = 'ADT_snn_res.0.5', label = TRUE, #label.size = 2.5,repel = F) + ggtitle("ADT umap & its cluster")
pB3=DimPlot(bm3, reduction = 'wnn.umap', group.by = 'wsnn_res.0.7', label=T) + ggtitle("WNN");
pB1 + pB2 + pB3 #Fig8

在这里插入图片描述
Fig8 (same as Fig3B)

  • https://satijalab.org/seurat/articles/weighted_nearest_neighbor_analysis
http://www.lryc.cn/news/467137.html

相关文章:

  • 【Linux】磁盘文件系统(inode)、软硬链接
  • 网安加·百家讲坛 | 徐一丁:金融机构网络安全合规浅析
  • 九、pico+Unity交互开发——触碰抓取
  • 老机MicroServer Gen8再玩 OCP万兆光口+IT直通
  • jmeter 从多个固定字符串中随机取一个值的方法
  • priority_queue (优先级队列的使用和模拟实现)
  • VisionPro 手部骨骼跟踪 Skeletal Hand Tracking 虚拟首饰
  • class 9: vue.js 3 组件化基础(2)父子组件间通信
  • Laravel|Lumen项目配置信息config原理
  • 2024系统分析师考试---论区块链技术及其应用
  • 为您的 Raspberry Pi 项目选择正确的实时操作系统(RTOS)
  • 鸿蒙应用的Tabs 组件怎么使用
  • 第四天 文件操作与异常处理
  • 【密码分析学 笔记】ch3 3.1 差分分析
  • Go:strings包的基本使用
  • uniapp,获取头部高度
  • 开发面试题-更新中...
  • 【Jmeter】jmeter指定jdk版本启动
  • 数据处理利器:图片识别转Excel表格让数据录入变简单
  • 【WPF】中Binding的应用
  • 华为OD机试2024年真题(基站维修工程师)
  • 在MySQL中为啥引入批量键访问(Batch Key Access, BKA)
  • 912.排序数组(归并排序)
  • 使用 cmake 在 x86 系统中为 arm 系统交叉编译程序
  • 软考(网工)——网络规划设计
  • 即插即用特征融合模块,即用即涨点!
  • 蓝桥算法双周赛 第 19 场 小白入门赛
  • Cursor零基础小白教程系列「进阶」 - Cursor 智能代码补全详解(Tab)
  • 数据结构《顺序表》
  • 视频分享网站毕业设计基于SpringBootSSM框架