- 针对芯片数据,可用芯片对应的R包,如下:
1.1 affymetrix芯片处理
rm(list=ls())
library(“hgu95av2.db”)
ls(‘package:hgu95av2.db’) #36种对应关系
probe2entrezID=toTable(hgu95av2ENTREZID)
probe2symbol=toTable(hgu95av2SYMBOL)
probe2genename=toTable(hgu95av2GENENAME)
my_probe = sample(unique(mappedLkeys(hgu95av2ENTREZID)),30)
tmp1 = probe2symbol[match(my_probe,probe2symbol$probe_id),]
tmp2 = probe2entrezID[match(my_probe,probe2entrezID$probe_id),]
tmp3 = probe2genename[match(my_probe,probe2genename$probe_id),]
write.table(my_probe,’my_probe.txt’,quote = F,col.names = F,row.names =F)
write.table(tmp1$symbol,’my_symbol.txt’,quote = F,col.names = F,row.names =F)
write.table(tmp2$gene_id,’my_geneID.txt’,quote = F,col.names = F,row.names =F)
1.2illumina芯片的 探针:
library(“illuminaHumanv4.db”)
ls(‘package:illuminaHumanv4.db’)
probe2entrezID=toTable(illuminaHumanv4ENTREZID)
probe2symbol=toTable(illuminaHumanv4SYMBOL)
probe2genename=toTable(illuminaHumanv4GENENAME)
my_probe = sample(unique(mappedLkeys(illuminaHumanv4ENTREZID)),30)
probe2symbol[match(my_probe,probe2symbol$probe_id),]
probe2entrezID[match(my_probe,probe2entrezID$probe_id),]
probe2genename[match(my_probe,probe2genename$probe_id),]
- 基因数据
library(“illuminaHumanv4.db”)
ls(‘package:illuminaHumanv4.db’)
my_entrez_gene = sample(unique(mappedRkeys(illuminaHumanv4ENTREZID)),30)
my_symbol_gene = sample(unique(mappedRkeys(illuminaHumanv4SYMBOL)),30)
library(“org.Hs.eg.db”)
ls(‘package:org.Hs.eg.db’)
entrezID2symbol <- toTable(org.Hs.egSYMBOL)
entrezID2symbol[match(my_entrez_gene,entrezID2symbol$gene_id),]
entrezID2symbol[match(my_symbol_gene,entrezID2symbol$symbol),]
此外,还有Biomart包
source(“http://bioconductor.org/biocLite.R“)
options(BioC_mirror=”http://mirrors.ustc.edu.cn/bioc/“)
biocLite(“org.Hs.eg.db”)
library(org.Hs.eg.db)
biocLite(“biomaRt”)
library(biomaRt)
ensembl <- useMart(“ensembl”, dataset = “hsapiens_gene_ensembl”)
entrzID=c(“672”,”1”)
getBM(attributes=c(“entrezgene”,”hgnc_symbol”,”ensembl_gene_id”), filters = “entrezgene”, values =entrzID, mart=ensembl)
genesymbol<-getBM(attributes=c(“entrezgene”,”hgnc_symbol”,”ensembl_gene_id”), filters = “entrezgene”, values =entrzID, mart=ensembl)
write.table(genesymbol, file=”symbol.xls”, sep=”\t”,quote=F)
tmp<-read.table(“geneid.txt”) ###需要转换的geneid 集
tmp2<-getBM(attributes=c(“entrezgene”,”hgnc_symbol”,”ensembl_gene_id”), filters = “entrezgene”, values =tmp, mart=ensembl )
write.table(tmp2, file=”xxx.xls”, sep=”\t”,quote=F)
包的安装:
source(“http://bioconductor.org/biocLite.R“)
options(BioC_mirror=”http://mirrors.ustc.edu.cn/bioc/“)
library(“hgu95av2.db”)
ls(‘package:hgu95av2.db’) #36种对应关系