## ---------------------------------------------------------------------
## A. BASIC USAGE
## ---------------------------------------------------------------------
## We can use listDatasets() from the biomaRt package to list the
## datasets available in the "ENSEMBL_MART_ENSEMBL" BioMart database:
library(biomaRt)
listMarts(host="www.ensembl.org")
datasets <- listDatasets(useMart(biomart="ENSEMBL_MART_ENSEMBL",
host="www.ensembl.org"))
head(datasets)
subset(datasets, grepl("elegans", dataset, ignore.case=TRUE))
## Retrieve the full transcript dataset for Worm:
txdb1 <- makeTxDbFromBiomart(dataset="celegans_gene_ensembl")
txdb1
## Retrieve an incomplete transcript dataset for Human:
transcript_ids <- c(
"ENST00000013894",
"ENST00000268655",
"ENST00000313243",
"ENST00000435657",
"ENST00000384428",
"ENST00000478783"
)
txdb2 <- makeTxDbFromBiomart(dataset="hsapiens_gene_ensembl",
transcript_ids=transcript_ids)
txdb2 # note that these annotations match the GRCh38 genome assembly
## ---------------------------------------------------------------------
## B. USING A HOST OTHER THAN www.ensembl.org
## ---------------------------------------------------------------------
## A typical use case is to access the "ENSEMBL_MART_ENSEMBL" BioMart
## database on a mirror e.g. on uswest.ensembl.org. A gotcha when
## doing this is that the name of the database on the mirror might
## be different! We can check this with listMarts() from the biomaRt
## package:
listMarts(host="useast.ensembl.org")
## Therefore in addition to setting 'host' to "uswest.ensembl.org" we
## might also need to specify the 'biomart' argument:
txdb3 <- makeTxDbFromBiomart(biomart="ENSEMBL_MART_ENSEMBL",
dataset="hsapiens_gene_ensembl",
transcript_ids=transcript_ids,
host="useast.ensembl.org")
txdb3
## ---------------------------------------------------------------------
## C. USING FILTERS
## ---------------------------------------------------------------------
## We can use listFilters() from the biomaRt package to get valid filter
## names:
mart <- useMart(biomart="ENSEMBL_MART_ENSEMBL",
dataset="hsapiens_gene_ensembl",
host="www.ensembl.org")
head(listFilters(mart))
## Retrieve transcript dataset for Ensembl gene ENSG00000011198:
my_filter <- list(ensembl_gene_id="ENSG00000011198")
txdb4 <- makeTxDbFromBiomart(dataset="hsapiens_gene_ensembl",
filter=my_filter)
txdb4
transcripts(txdb4, columns=c("tx_id", "tx_name", "gene_id"))
transcriptLengths(txdb4)
## ---------------------------------------------------------------------
## D. RETRIEVING CHROMOSOME INFORMATION ONLY
## ---------------------------------------------------------------------
chrominfo <- getChromInfoFromBiomart(dataset="celegans_gene_ensembl")
chrominfo
Run the code above in your browser using DataLab