概述
MultiBaC包: 能够消除在不同批次中生成的不同组学之间的批次效应。因此,MultiBaC是一种利用现有数据集跨组学技术进行元分析的有效工具。
1. 包和演示数据集的加载
# if (!require("BiocManager", quietly = TRUE))
# install.packages("BiocManager")
#
# BiocManager::install("MultiBaC")
library(MultiBaC)
data("multiyeast")
2. 处理单一组学数据
# createMbac {MultiBaC}:This function creates a list object to be used by
# MultiBaC function from a set of matrix R objects.
# batch source :different labs generating data, time or lab technician etc.
data_RNA <- createMbac (inputOmics = list(A.rna, B.rna, C.rna),
batchFactor = c("A", "B", "C"),
experimentalDesign = list("A" = c("Glu+", "Glu+",
"Glu+", "Glu-",
"Glu-", "Glu-"),
"B" = c("Glu+", "Glu+",
"Glu-", "Glu-"),
"C" = c("Glu+", "Glu+",
"Glu-", "Glu-")),
omicNames = "RNA")
custom_col <- c("brown2", "dodgerblue", "forestgreen") # 颜色表示不同批次
custom_pch <- c(19,19,19,1,1,1,
19,19,1,1,
19,19,1,1) # 形状表示样品的不同处理
### 原始数据,PCA作图查看批次效应
# ?plot.mbac
plot(data_RNA, typeP="pca.org",
bty = "L", # 只有下框
pch = custom_pch, # 点形状表示样本处理方式
cex = 3, # 点的大小
col.per.group = custom_col, # 不同批次颜色
# 注释文字
legend.text = c("Color: Batch", names(data_RNA$ListOfBatches),
"Fill: Cond.", unique(colData(data_RNA$ListOfBatches$A)$tfactor)),
# 注释位置,形状,大小,颜色等
args.legend = list("x" = "topright",
"pch" = c(NA, 15, 15, 15,
NA, 15, 0),
"col" = c(NA, "brown2", "dodgerblue", "forestgreen",
NA, 1, 1),
"bty" = "n",
"cex" = 1))
par(mfrow = c(1,2))
### 去除批次效应 (ARSyN:ASCA Removal of Systematic Noise)
#Interaction = FALSE, experimental factors和bacth factor没有相互作用
arsyn_1 <- ARSyNbac(data_RNA, modelName = "RNA", Variability = 0.95,
batchEstimation = TRUE,
Interaction = FALSE)
# 校正后数据的PCA作图
plot(arsyn_1, typeP="pca.cor", bty = "L",
pch = custom_pch, cex = 3, col.per.group = custom_col,
legend.text = c("Color: Batch", names(data_RNA$ListOfBatches),
"Fill: Cond.", unique(colData(data_RNA$ListOfBatches$A)$tfactor)),
args.legend = list("x" = "topright",
"pch" = c(NA, 15, 15, 15,
NA, 15, 0),
"col" = c(NA, "brown2", "dodgerblue", "forestgreen",
NA, 1, 1),
"bty" = "n",
"cex" = 1.2))
# plot函数同时显示校正前和校正后的PCA图
plot(arsyn_1, typeP="pca.both", bty = "L",
pch = custom_pch, cex = 1, col.per.group = custom_col,
legend.text = c("Color: Batch", names(data_RNA$ListOfBatches),
"Fill: Cond.", unique(colData(data_RNA$ListOfBatches$A)$tfactor)),
args.legend = list("x" = "topright",
"pch" = c(NA, 15, 15, 15,
NA, 15, 0),
"col" = c(NA, "brown2", "dodgerblue", "forestgreen",
NA, 1, 1),
"bty" = "n",
"cex" = 0.5))
# plot_pca函数显示校正前和校正后的PCA图
plot_pca(arsyn_1, col.by.batch = TRUE, col.per.group = NULL,
comp2plot = c(1, 2), typeP = "pca.both",
legend.text = c("Color: Batch", names(data_RNA$ListOfBatches),
"Fill: Cond.", unique(colData(data_RNA$ListOfBatches$A)$tfactor)),
args.legend = list("x" = "topright",
"pch" = c(NA, 15, 15, 15,
NA, 15, 0),
"col" = c(NA, "brown2", "dodgerblue", "forestgreen",
NA, 1, 1),
"bty" = "n",
"cex" = 0.5))
par(mfrow = c(1,2))
# Interaction = TRUE, experimental factors和bacth factor有相互作用
arsyn_2 <- ARSyNbac(data_RNA, modelName = "RNA", Variability = 0.95,
batchEstimation = TRUE,
Interaction = TRUE)
plot(arsyn_2, typeP="pca.cor", bty = "L",
pch = custom_pch, cex = 3, col.per.group = custom_col,
legend.text = c("Color: Batch", names(data_RNA$ListOfBatches),
"Fill: Cond.", unique(colData(data_RNA$ListOfBatches$A)$tfactor)),
args.legend = list("x" = "topright",
"pch" = c(NA, 15, 15, 15,
NA, 15, 0),
"col" = c(NA, "brown2", "dodgerblue", "forestgreen",
NA, 1, 1),
"bty" = "n",
"cex" = 1.2))
# Displays the structure and the content of the object of class mbac.
summary(data_RNA)
3. 处理多组学数据
# 不同batch要含有共同的组学数据,这里为rna数据。
my_mbac <- createMbac (inputOmics = list(A.rna, A.gro,
B.rna, B.ribo,
C.rna, C.par),
batchFactor = c("A", "A",
"B", "B",
"C", "C"),
experimentalDesign = list("A" = c("Glu+", "Glu+",
"Glu+", "Glu-",
"Glu-", "Glu-"),
"B" = c("Glu+", "Glu+",
"Glu-", "Glu-"),
"C" = c("Glu+", "Glu+",
"Glu-", "Glu-")),
omicNames = c("RNA", "GRO",
"RNA", "RIBO",
"RNA", "PAR"))
# This function uses linear models to estimate the batch effect
# magnitude using the common data across batches.
batchEstPlot(my_mbac)
## 一步法校正
# MultiBaC: performs a multi-omic, multi-batch correction
my_final_mbac <- MultiBaC (my_mbac,
test.comp = NULL, scale = FALSE,
center = TRUE, crossval = NULL,
Interaction = FALSE,
Variability = 0.9,
showplot = TRUE,
showinfo = TRUE)
## 分多步校正,结果一样
# genModelList {MultiBaC}:This function performs PLS models for every batch.
# A PLS model is generated for each non-common omic in each batch.
# PLS:multivariate partial least squares (PLS)
my_mbac_2 <- genModelList (my_mbac, test.comp = NULL,
scale = FALSE, center = TRUE,
crossval = NULL,
showinfo = TRUE)
# genMissingOmics {MultiBaC}:This function generates for all the batches
# the omic data they had not originally.
# This is the previous step to apply ARSyNbac [1] correction.
multiBatchDesign <- genMissingOmics(my_mbac_2)
# batchCorrection {MultiBaC}: This function performs the ARSyNbac
# correction [1] for each omic contained in mulBatchDesign input object.
my_finalwise_mbac <- batchCorrection(my_mbac_2,
multiBatchDesign = multiBatchDesign,
Interaction = FALSE,
Variability = 0.90)
plot(my_final_mbac, typeP = "batch")
plot (my_finalwise_mbac, typeP = "batch")
# 校正前
plot (my_final_mbac, typeP = "pca.org",
cex.axis = 1, cex.lab = 1, cex = 3, bty = "L",
cex.main = 1.2, pch = 19)
# 校正后
plot (my_final_mbac, typeP = "pca.cor",
cex.axis = 1, cex.lab = 1, cex = 3, bty = "L",
cex.main = 1.2, pch = 19)
custom_col <- c("brown2", "dodgerblue", "forestgreen")
custom_pch <- c(19,19,19,1,1,1,15,15,15,0,0,0, # batch A
19,19,1,1,17,17,2,2, # batch B
19,19,1,1,18,18,5,5) # batch C
plot(my_final_mbac, typeP = "pca.both", col.by.batch = TRUE,
col.per.group = custom_col, comp2plot = 1:3,
cex.axis = 1.3, cex.lab = 1.2, cex = 3, bty = "L",
cex.main = 1.7, pch = custom_pch,
legend.text = c("Color", names(my_final_mbac$ListOfBatches),
"Shape", c("RNA", "GRO", "RIBO", "PAR"),
"Fill", levels(colData(my_final_mbac$ListOfBatches$A)$tfactor)),
args.legend = list("x" = "topright",
"pch" = c(NA, 15, 15, 15,
NA, 19, 15, 17, 18,
NA, 19, 1),
"col" = c(NA, "brown2", "dodgerblue", "forestgreen",
NA, rep(1, 4),
NA, 1, 1),
"bty" = "n",
"cex" = 2))
# Q2_plot(mbac, ...), Q2 plot of PLS models is displayed.
# mbac:Object of class mbac generated by *MultiBaC* or *genModelList*.
Q2_plot(my_final_mbac)
4. 提取校正后的表达矩阵
### 得到校正后的表达矩阵
A_rna_cor <- assays(my_final_mbac$CorrectedData$A)$RNA
AA_rna_cor <- assays(my_finalwise_mbac$CorrectedData$A)$RNA
head(A_rna_cor)
head(AA_rna_cor)
table(A_rna_cor==AA_rna_cor)
head(A.rna)
最后
以上就是动听硬币为你收集整理的MultiBaC包消除不同组学数据之间的批次效应的全部内容,希望文章能够帮你解决MultiBaC包消除不同组学数据之间的批次效应所遇到的程序开发问题。
如果觉得靠谱客网站的内容还不错,欢迎将靠谱客网站推荐给程序员好友。
本图文内容来源于网友提供,作为学习参考使用,或来自网络收集整理,版权属于原作者所有。
发表评论 取消回复