minfi:Analyze Illumina Infinium DNA methylation arrays
1. 数据读入
复制代码
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34if (!require("BiocManager", quietly = TRUE)) install.packages("BiocManager") BiocManager::install("minfi",force = TRUE) BiocManager::install("minfiData") library(minfi) library(minfiData) ### 1. 数据读入 # RGsetEx # ## RGsetEx: RGChannelSet, 622,399 features # class(RGsetEx) #"RGChannelSet" # MsetEx <- preprocessRaw(RGsetEx) # class(MsetEx) # "MethylSet" # ## MsetEx: MethylSet, 485,512 features # GMsetEx <- mapToGenome(MsetEx) # class(GMsetEx) #"GenomicMethylSet" # ## GMsetEx: GenomicMethylSet, 485,512 features ## ----baseDir------------------------------------------------------------------ baseDir <- system.file("extdata", package = "minfiData") list.files(baseDir) list.files(file.path(baseDir, "5723646052")) ## 读Illumina methylation sample sheet(csv结尾的文件),含有样本信息以及文件位置 targets <- read.metharray.sheet(baseDir) # targets 为data.frame # Usage # read.metharray.sheet(base, pattern = "csv$", ignore.case = TRUE, # recursive = TRUE, verbose = TRUE) # sub(baseDir, "", targets$Basename) # 替换 ## 通过样本信息(targets为data.frame)读取甲基化芯片数据idat RGset <- read.metharray.exp(targets = targets) class(RGset) # "RGChannelSet" # 只读取某一文件夹下面的idat文件 RGset2 <- read.metharray.exp(file.path(baseDir, "5723646052")) # 读取baseDir下所有子文件夹下面的idat文件 RGset3 <- read.metharray.exp(baseDir, recursive = TRUE)
2. 质控
复制代码
1
2
3
4
5
6
7
8
9
10
11### 2. 质控 ## 甲基化β值的豆状密度图。 par(mar=c(5,6,4,2)) # c(bottom, left, top, right) densityBeanPlot(RGset, sampNames=names, sampGroups=group) ## β值密度曲线 densityPlot(RGset,sampGroups=group) # 检测失败的位点 detP<- detectionP(RGset, type = "m+u") failed <- detP>0.01 colMeans(failed) # Fraction of failed positions per sample sum(rowMeans(failed)>0.5) # How many positions failed in >50% of samples
3.数据类型转化
复制代码
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19### 3.数据类型转化 # 产生没有标准的MethylSet类型 Mset <- preprocessRaw(RGset) # quantile normalization gmSet <- preprocessQuantile(Mset) # GMset <- mapToGenome(Mset) # getMeth(GMset) # getUnmeth(GMset) ## 对于RGset,Mset,GMset, gmSet对象 都有这些函数 pd <- pData(gmSet) # 表型数据 getBeta(gmSet) group <- pd$Sample_Group # 样品分组 annotation(gmSet) # 注释信息 getAnnotatio(gmSet) ids <- rownames(gmSet) # 探针id名 names <- colnames(gmSet) # 样品名 beta <- getBeta(gmSet) #β值 #assays(RGset)[[1]] # assays(RGset)$Green #assays(RGset)[2] # assays(RGset)$Red
4. 读入TCGA数据和GEO数据
复制代码
1
2
3
4
5
6
7
8### 4. 读入TCGA数据和GEO数据 data_file <- "/Users/zhengxueming/test/test_READ.methylation450.tsv" gmSet_tcga <- readTCGA(data_file) class(gmSet_tcga) #"GenomicRatioSet" ?readGEORawFile ?read.metharray ?read.metharray.exp ?read.metharray.sheet
5.甲基化差异化分析
复制代码
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51### 5.甲基化差异化分析 ## 检测差异甲基化位点 # 要用normalization后的gmSet数据 pd <- pData(gmSet) beta <- getBeta(gmSet) group <- pd$Sample_Group dmp <- dmpFinder(beta, pheno = group, type = "categorical") class(dump) # "data.frame" # Usage # dmpFinder(dat, pheno, type = c("categorical", "continuous"), # qCutoff = 1, shrinkVar = FALSE) # Continuous phenotypes are tested with linear regression, # while an F-test is used for categorical phenotypes. head(dmp) ## 选择统计学差异的位点 # sum(dmp$qval < 0.05, na.rm=TRUE) dmp_sig <- dmp[which(dmp$qval < 0.05),] head(dmp_sig) ##检测差异甲基化区域 designMatrix <- model.matrix(~ gmSet$status) #运行时间较长,并行处理 require(doParallel) registerDoParallel(cores = 2) bumps <- bumphunter(gmSet, design = designMatrix, B = 10, type = "Beta", cutoff = 0.25) class(bumps) #"list" names(bumps) dim(bumps$table) head(bumps$coef) length(bumps$coef) head(bumps$pvaluesMarginal) bumps$pvaluesMarginal head(bumps$fitted) bumps_table <- bumps$table dim(bumps_table) head(bumps_table) summary(bumps_table$fwer) bumps_table_sig <- bumps_table[which(bumps_table$p.value < 0.01),] head(bumps_table_sig) dim(bumps_table_sig) bumps_table_sig[which(bumps_table_sig$chr=="chr7"),] # dat <- dummyData() # # Enable parallelization # require(doParallel) # registerDoParallel(cores = 2) # # Find bumps # dmrs <- bumphunter(dat$mat, design=dat$design, chr=dat$chr, pos=dat$pos, # cluster=dat$cluster, coef=2, cutoff= 0.28, nullMethod="bootstrap", # smooth=TRUE, B=250, verbose=TRUE, # smoothFunction=loessByCluster) # head(dmrs$table[,1:4], n =3)
参考:
https://www.bioconductor.org/packages/release/bioc/manuals/minfi/man/minfi.pdf
最后
以上就是傲娇发带最近收集整理的关于R包minfi处理DNA甲基化芯片数据的全部内容,更多相关R包minfi处理DNA甲基化芯片数据内容请搜索靠谱客的其他文章。
本图文内容来源于网友提供,作为学习参考使用,或来自网络收集整理,版权属于原作者所有。
发表评论 取消回复