Chapter 6 KEGG analysis
注释包KEGG.db自2012年起就没有进行过更新,并且在clusterProfiler中,richKEGG(用于KEGG路径分析)richMKEGG(用于KEGG模块分析)支持下载最新的KEGG数据进行富集分析,还可以通过将use_internal_data参数显示设置为TRUE来支持使用KEGG.db。
有了这个功能以后,生物不仅限于先前版本支持的生物,它还可以是在KEGG数据库中的具有KEGG注释数据的任何物种,用户需要将相应名称的缩写传递给生物参数,KEGG支持的生物物种的完整的列表可以通过以下链接进行查询。通过指定的物种,还支持KEGG Orthology(KO)数据库。
clusterProfiler提供了search_kegg-organism()函数,来帮助搜索支持的物种。
library(clusterProfiler)
search_kegg_organism('ece', by='kegg_code')
## kegg_code scientific_name
## 366 ece Escherichia coli O157:H7 EDL933 (EHEC)
## common_name
## 366 <NA>
ecoli <- search_kegg_organism('Escherichia coli', by='scientific_name')
dim(ecoli)
## [1] 65 3
head(ecoli)
## kegg_code scientific_name
## 361 eco Escherichia coli K-12 MG1655
## 362 ecj Escherichia coli K-12 W3110
## 363 ecd Escherichia coli K-12 DH10B
## 364 ebw Escherichia coli BW2952
## 365 ecok Escherichia coli K-12 MDS42
## 366 ece Escherichia coli O157:H7 EDL933 (EHEC)
## common_name
## 361 <NA>
## 362 <NA>
## 363 <NA>
## 364 <NA>
## 365 <NA>
## 366 <NA>
6.1 KEGG over-representation test
data(geneList, package="DOSE")
gene <- names(geneList)[abs(geneList) > 2]
kk <- enrichKEGG(gene = gene,
organism = 'hsa',
pvalueCutoff = 0.05)
head(kk)
## ID
## hsa04110 hsa04110
## hsa04114 hsa04114
## hsa04218 hsa04218
## hsa04061 hsa04061
## hsa03320 hsa03320
## hsa04914 hsa04914
## Description
## hsa04110 Cell cycle
## hsa04114 Oocyte meiosis
## hsa04218 Cellular senescence
## hsa04061 Viral protein interaction with cytokine and cytokine receptor
## hsa03320 PPAR signaling pathway
## hsa04914 Progesterone-mediated oocyte maturation
## GeneRatio BgRatio pvalue p.adjust
## hsa04110 11/93 124/7932 1.799177e-07 3.544378e-05
## hsa04114 10/93 128/7932 2.188761e-06 2.155930e-04
## hsa04218 10/93 160/7932 1.613634e-05 9.988265e-04
## hsa04061 8/93 100/7932 2.028074e-05 9.988265e-04
## hsa03320 7/93 76/7932 2.745818e-05 1.081852e-03
## hsa04914 7/93 99/7932 1.502807e-04 4.934216e-03
## qvalue
## hsa04110 3.465782e-05
## hsa04114 2.108123e-04
## hsa04218 9.766778e-04
## hsa04061 9.766778e-04
## hsa03320 1.057863e-03
## hsa04914 4.824802e-03
## geneID
## hsa04110 8318/991/9133/890/983/4085/7272/1111/891/4174/9232
## hsa04114 991/9133/983/4085/51806/6790/891/9232/3708/5241
## hsa04218 2305/4605/9133/890/983/51806/1111/891/776/3708
## hsa04061 3627/10563/6373/4283/6362/6355/9547/1524
## hsa03320 4312/9415/9370/5105/2167/3158/5346
## hsa04914 9133/890/983/4085/6790/891/5241
## Count
## hsa04110 11
## hsa04114 10
## hsa04218 10
## hsa04061 8
## hsa03320 7
## hsa04914 7
输入的Gene ID的格式可以是以下格式kegg, ncbi-geneid, ncbi-proteinid or uniprot
6.2 KEGG Gene Set Enrichment Analysis
kk2 <- gseKEGG(geneList = geneList,
organism = 'hsa',
nPerm = 1000,
minGSSize = 120,
pvalueCutoff = 0.05,
verbose = FALSE)
head(kk2)
## ID Description setSize
## hsa04510 hsa04510 Focal adhesion 188
## hsa04151 hsa04151 PI3K-Akt signaling pathway 322
## hsa03013 hsa03013 RNA transport 131
## hsa05152 hsa05152 Tuberculosis 162
## hsa04062 hsa04062 Chemokine signaling pathway 165
## hsa04218 hsa04218 Cellular senescence 143
## enrichmentScore NES pvalue p.adjust
## hsa04510 -0.4188582 -1.706291 0.001430615 0.02322097
## hsa04151 -0.3482755 -1.497042 0.002614379 0.02322097
## hsa03013 0.4116488 1.735751 0.003095975 0.02322097
## hsa05152 0.3745153 1.630500 0.003154574 0.02322097
## hsa04062 0.3754101 1.633635 0.003184713 0.02322097
## hsa04218 0.4153718 1.772207 0.003194888 0.02322097
## qvalues rank leading_edge
## hsa04510 0.01576976 2183 tags=27%, list=17%, signal=23%
## hsa04151 0.01576976 1997 tags=23%, list=16%, signal=20%
## hsa03013 0.01576976 3383 tags=40%, list=27%, signal=29%
## hsa05152 0.01576976 2823 tags=34%, list=23%, signal=27%
## hsa04062 0.01576976 1298 tags=21%, list=10%, signal=19%
## hsa04218 0.01576976 1155 tags=17%, list=9%, signal=16%
## core_enrichment
## hsa04510 5595/5228/7424/1499/4636/83660/7059/5295/1288/23396/3910/3371/3082/1291/394/3791/7450/596/3685/1280/3675/595/2318/3912/1793/1278/1277/1293/10398/55742/2317/7058/25759/56034/3693/3480/5159/857/1292/3908/3909/63923/3913/1287/3679/7060/3479/10451/80310/1311/1101
## hsa04151 627/2252/7059/92579/5563/5295/6794/1288/7010/3910/3371/3082/1291/4602/3791/1027/90993/3441/3643/1129/2322/1975/7450/596/3685/1942/2149/1280/4804/3675/595/2261/7248/2246/4803/3912/1902/1278/1277/2846/2057/1293/2247/55970/5618/7058/10161/56034/3693/4254/3480/4908/5159/1292/3908/2690/3909/8817/9223/4915/3551/2791/63923/3913/9863/3667/1287/3679/7060/3479/80310/1311/5105/2066/1101
## hsa03013 10460/1978/55110/54913/9688/8894/11260/10799/9631/4116/5042/8761/6396/23165/8662/10248/55706/79833/9775/29107/23636/5905/9513/5901/10775/10557/4927/79902/1981/26986/11171/10762/8480/8891/11097/26019/10940/4686/9972/81929/10556/3646/9470/387082/1977/57122/8563/7514/79023/3837/9818/56000
## hsa05152 820/51806/6772/64581/3126/3112/8767/3654/1054/1051/3458/1520/11151/1594/50617/54205/91860/8877/3329/637/3689/7096/2207/3929/4360/5603/929/533/3452/6850/7124/1509/3569/7097/1378/8772/64170/3119/843/2213/8625/3920/2215/3587/5594/3593/9103/3592/6300/9114/10333/3109/3108/1432/3552
## hsa04062 3627/10563/6373/4283/6362/6355/2921/6364/3576/6352/10663/1230/6772/6347/6351/3055/1237/1236/4067/6354/114/3702/6361/1794/1234/6367/6375/6374/2919/409/4793/2792/6360/5880
## hsa04218
6.3 KEGG Module over-representation test
KEGG模块是手动定义的功能单元的集合,在某些情况下,KEGG模块具有更直接的解释。
mkk <- enrichMKEGG(gene = gene,
organism = 'hsa')
6.4 KEGG Module Gene Set Enrichment Analysis
mkk2 <- gseMKEGG(geneList = geneList,
organism = 'hsa')