我是靠谱客的博主 傲娇发带,这篇文章主要介绍R包minfi处理DNA甲基化芯片数据,现在分享给大家,希望可以做个参考。

minfi:Analyze Illumina Infinium DNA methylation arrays

1. 数据读入

复制代码
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
if (!require("BiocManager", quietly = TRUE))   install.packages("BiocManager") BiocManager::install("minfi",force = TRUE) BiocManager::install("minfiData") library(minfi) library(minfiData) ### 1. 数据读入 # RGsetEx # ## RGsetEx: RGChannelSet, 622,399 features # class(RGsetEx) #"RGChannelSet" # MsetEx <- preprocessRaw(RGsetEx)   # class(MsetEx)  # "MethylSet" # ## MsetEx: MethylSet, 485,512 features # GMsetEx <- mapToGenome(MsetEx) # class(GMsetEx) #"GenomicMethylSet" # ## GMsetEx: GenomicMethylSet, 485,512 features ## ----baseDir------------------------------------------------------------------ baseDir <- system.file("extdata", package = "minfiData") list.files(baseDir) list.files(file.path(baseDir, "5723646052")) ## 读Illumina methylation sample sheet(csv结尾的文件),含有样本信息以及文件位置 targets <- read.metharray.sheet(baseDir) # targets 为data.frame # Usage # read.metharray.sheet(base, pattern = "csv$", ignore.case = TRUE, #                      recursive = TRUE, verbose = TRUE)   # sub(baseDir, "", targets$Basename) # 替换 ## 通过样本信息(targets为data.frame)读取甲基化芯片数据idat RGset <- read.metharray.exp(targets = targets) class(RGset) # "RGChannelSet" # 只读取某一文件夹下面的idat文件 RGset2 <- read.metharray.exp(file.path(baseDir, "5723646052")) # 读取baseDir下所有子文件夹下面的idat文件 RGset3 <- read.metharray.exp(baseDir, recursive = TRUE)

2. 质控

复制代码
1
2
3
4
5
6
7
8
9
10
11
### 2. 质控 ## 甲基化β值的豆状密度图。 par(mar=c(5,6,4,2))  # c(bottom, left, top, right) densityBeanPlot(RGset, sampNames=names, sampGroups=group) ## β值密度曲线 densityPlot(RGset,sampGroups=group) # 检测失败的位点 detP<- detectionP(RGset, type = "m+u") failed <- detP>0.01 colMeans(failed) # Fraction of failed positions per sample sum(rowMeans(failed)>0.5) # How many positions failed in >50% of samples

3.数据类型转化

复制代码
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
### 3.数据类型转化 # 产生没有标准的MethylSet类型 Mset <- preprocessRaw(RGset) # quantile normalization gmSet <- preprocessQuantile(Mset) # GMset <- mapToGenome(Mset) # getMeth(GMset) # getUnmeth(GMset) ## 对于RGset,Mset,GMset, gmSet对象 都有这些函数 pd <- pData(gmSet) # 表型数据 getBeta(gmSet) group <- pd$Sample_Group # 样品分组 annotation(gmSet) # 注释信息 getAnnotatio(gmSet) ids <- rownames(gmSet)  # 探针id名 names <- colnames(gmSet)  # 样品名 beta <- getBeta(gmSet) #β值 #assays(RGset)[[1]]  # assays(RGset)$Green #assays(RGset)[2] # assays(RGset)$Red

4. 读入TCGA数据和GEO数据

复制代码
1
2
3
4
5
6
7
8
### 4. 读入TCGA数据和GEO数据 data_file <- "/Users/zhengxueming/test/test_READ.methylation450.tsv" gmSet_tcga <- readTCGA(data_file) class(gmSet_tcga) #"GenomicRatioSet" ?readGEORawFile ?read.metharray ?read.metharray.exp  ?read.metharray.sheet

5.甲基化差异化分析

复制代码
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
### 5.甲基化差异化分析 ## 检测差异甲基化位点 # 要用normalization后的gmSet数据 pd <- pData(gmSet) beta <- getBeta(gmSet) group <- pd$Sample_Group dmp <- dmpFinder(beta, pheno = group, type = "categorical") class(dump) # "data.frame" # Usage # dmpFinder(dat, pheno, type = c("categorical", "continuous"), #           qCutoff = 1, shrinkVar = FALSE) # Continuous phenotypes are tested with linear regression,  # while an F-test is used for categorical phenotypes. head(dmp) ## 选择统计学差异的位点 # sum(dmp$qval < 0.05, na.rm=TRUE) dmp_sig <- dmp[which(dmp$qval < 0.05),] head(dmp_sig) ##检测差异甲基化区域 designMatrix <- model.matrix(~ gmSet$status) #运行时间较长,并行处理 require(doParallel) registerDoParallel(cores = 2)  bumps <- bumphunter(gmSet, design = designMatrix, B = 10,                     type = "Beta", cutoff = 0.25) class(bumps) #"list" names(bumps)  dim(bumps$table) head(bumps$coef) length(bumps$coef) head(bumps$pvaluesMarginal) bumps$pvaluesMarginal head(bumps$fitted) bumps_table <- bumps$table dim(bumps_table) head(bumps_table) summary(bumps_table$fwer) bumps_table_sig <- bumps_table[which(bumps_table$p.value < 0.01),] head(bumps_table_sig) dim(bumps_table_sig) bumps_table_sig[which(bumps_table_sig$chr=="chr7"),] # dat <- dummyData() # # Enable parallelization # require(doParallel) # registerDoParallel(cores = 2) # # Find bumps # dmrs <- bumphunter(dat$mat, design=dat$design, chr=dat$chr, pos=dat$pos, #                     cluster=dat$cluster, coef=2, cutoff= 0.28, nullMethod="bootstrap", #                     smooth=TRUE, B=250, verbose=TRUE, #                     smoothFunction=loessByCluster) # head(dmrs$table[,1:4], n =3)

参考:

https://www.bioconductor.org/packages/release/bioc/manuals/minfi/man/minfi.pdf


 

最后

以上就是傲娇发带最近收集整理的关于R包minfi处理DNA甲基化芯片数据的全部内容,更多相关R包minfi处理DNA甲基化芯片数据内容请搜索靠谱客的其他文章。

本图文内容来源于网友提供,作为学习参考使用,或来自网络收集整理,版权属于原作者所有。
点赞(54)

评论列表共有 0 条评论

立即
投稿
返回
顶部