当前位置：首页 > news >正文

【单细胞-第三节多样本数据分析】

news 2025/7/27 22:46:05

文件在单细胞\5_GC_py\1_single_cell\1.GSE183904.Rmd
GSE183904
数据原文

1.获取临床信息

筛选样本可以参考临床信息

rm(list = ls())
library(tinyarray)
a = geo_download("GSE183904")$pd
head(a)
table(a$Characteristics_ch1) #统计各样本有多少

2.批量读取

学会如何读取特定的样本

if(!file.exists("f.Rdata")){#untar("GSE183904_RAW.tar",exdir = "GSE183904_RAW")fs = dir("GSE183904_RAW/")[c(2,7)] #dir("GSE183904_RAW/"),列出所有文件#为了省点内存只做2个样本，去掉[c(2,7)]即做全部样本f = lapply(paste0("GSE183904_RAW/",fs),read.csv,row.names = 1)#row.names = 1写在lapply的括号里，但是它是read.csv的参数fs = stringr::str_split_i(fs,"_",1)names(f) = fssave(f,file = "f.Rdata")
}
load("f.Rdata")
library(Seurat)
scelist = list()
for(i in 1:length(f)){scelist[[i]] <- CreateSeuratObject(counts = f[[i]], project = names(f)[[i]])print(dim(scelist[[i]]))
}
sce.all = merge(scelist[[1]],scelist[-1])
sce.all = JoinLayers(sce.all)  #连接数据head(sce.all@meta.data)
table(sce.all$orig.ident)

3.质控指标

sce.all[["percent.mt"]] <- PercentageFeatureSet(sce.all, pattern = "^MT-")
sce.all[["percent.rp"]] <- PercentageFeatureSet(sce.all, pattern = "^RP[SL]")
sce.all[["percent.hb"]] <- PercentageFeatureSet(sce.all, pattern = "^HB[^(P)]")head(sce.all@meta.data, 3)VlnPlot(sce.all, features = c("nFeature_RNA","nCount_RNA", "percent.mt","percent.rp","percent.hb"),ncol = 3,pt.size = 0, group.by = "orig.ident")

4.整合降维聚类分群

f = "obj.Rdata"
library(harmony)
if(!file.exists(f)){sce.all = sce.all %>% NormalizeData() %>%  FindVariableFeatures() %>%  ScaleData(features = rownames(.)) %>%  RunPCA(pc.genes = VariableFeatures(.))  %>%RunHarmony("orig.ident") %>% #RunHarmony 包，整合多个样本，处理多样本的必备步骤FindNeighbors(dims = 1:15, reduction = "harmony") %>% FindClusters(resolution = 0.5) %>% RunUMAP(dims = 1:15,reduction = "harmony") %>% #reduction = "harmony"必须写上RunTSNE(dims = 1:15,reduction = "harmony")save(sce.all,file = f)
}
load(f)
ElbowPlot(sce.all)
UMAPPlot(sce.all,label = T)
TSNEPlot(sce.all,label = T)

5.手动注释

markers = read.delim("GCmarker.txt",header = F,sep = ";")
library(tidyr)
markers = separate_rows(markers,V2,sep = ",") #拆分marker
markers = split(markers$V2,markers$V1)
DotPlot(sce.all,features = markers,cols = "RdYlBu")+RotatedAxis()
ggplot2::ggsave("dotplot.png",height = 10,width = 25)
writeLines(paste0(as.character(0:13),","))
names(markers)celltype = read.csv("celltype.csv",header = F) #自己照着DotPlot图填的
celltypenew.cluster.ids <- celltype$V2
names(new.cluster.ids) <- levels(sce.all)
seu.obj <- RenameIdents(sce.all, new.cluster.ids)
save(seu.obj,file = "seu.obj.Rdata")
p1 <- DimPlot(seu.obj, reduction = "tsne", label = TRUE, pt.size = 0.5) + NoLegend()
p1

6.自动注释

SingleR完成自主注释，不同的是scRNA = sce.all

library(celldex)
library(SingleR)
ls("package:celldex")
f = "ref_BlueprintEncode.RData"
if(!file.exists(f)){ref <- celldex::BlueprintEncodeData()save(ref,file = f)
}
ref <- get(load(f))
library(BiocParallel)
scRNA = sce.all
test = scRNA@assays$RNA@layers$data
rownames(test) = Features(scRNA)
colnames(test) = Cells(scRNA)
pred.scRNA <- SingleR(test = test, ref = ref,labels = ref$label.main, clusters = scRNA@active.ident)
pred.scRNA$pruned.labels
#查看注释准确性 
plotScoreHeatmap(pred.scRNA, clusters=pred.scRNA@rownames, fontsize.row = 9,show_colnames = T)
new.cluster.ids <- pred.scRNA$pruned.labels
names(new.cluster.ids) <- levels(scRNA)
levels(scRNA)
scRNA <- RenameIdents(scRNA,new.cluster.ids)
levels(scRNA)
p2 <- DimPlot(scRNA, reduction = "tsne",label = T,pt.size = 0.5) + NoLegend()
p1+p2

7.marker基因

找不同细胞类型间的差异基因

f = "markers.Rdata"
if(!file.exists(f)){allmarkers <- FindAllMarkers(seu.obj, only.pos = TRUE, min.pct = 0.25, logfc.threshold = 0.25)save(allmarkers,file = f)
}
load(f)
head(allmarkers)

如果想自行修改orig.ident:使用下边的代码：

sce.all@meta.data$orig.ident=rep(c("a","b"),times= c(ncol(scelist[[1]]),
ncol(scelistl[[2]])))

查看全文

http://www.lryc.cn/news/529202.html

(java) IO流

2025年1月个人工作生活总结

线性调整器——耗能型调整器

【2025美赛D题】为更美好的城市绘制路线图建模｜建模过程+完整代码论文全解全析

【Numpy核心编程攻略：Python数据处理、分析详解与科学计算】1.28 存储之道：跨平台数据持久化方案

拼车（1094）

基于Python的人工智能患者风险评估预测模型构建与应用研究（下）

＜ OS 有关＞ Android 手机 SSH 客户端 app: connectBot

向量和矩阵算法笔记

uniapp使用uni.navigateBack返回页面时携带参数到上个页面

Python 梯度下降法（二）：RMSProp Optimize

Android Studio 正式版 10 周年回顾，承载 Androider 的峥嵘十年

sem_wait的概念和使用案列

集合的奇妙世界：Python集合的经典、避坑与实战

专业视角深度解析：DeepSeek的核心优势何在？

MySQL 索引存储结构

【ComfyUI专栏】如何使用Git命令行安装非Manager收录节点

python算法和数据结构刷题[1]：数组、矩阵、字符串

数据分析系列--④RapidMiner进行关联分析(案例)

1/30每日一题

vim的多文件操作

设计转换Apache Hive的HQL语句为Snowflake SQL语句的Python程序方法

CAPL与外部接口

无公网IP 外网访问本地部署夫人 hello-algo

实验四 XML

Autosar-Os是怎么运行的？（内存保护）

1.获取临床信息

2.批量读取

3.质控指标

4.整合降维聚类分群

5.手动注释

6.自动注释

7.marker基因

相关文章：