## ---------------------------------------------------------------------
## BASIC EXAMPLES
## ---------------------------------------------------------------------
df <- data.frame(chr="chr1", start=11:15, end=12:16,
strand=c("+","-","+","*","."), score=1:5)
df
makeGRangesFromDataFrame(df) # strand value "." is replaced with "*"
## The strand column is optional:
df <- data.frame(chr="chr1", start=11:15, end=12:16, score=1:5)
makeGRangesFromDataFrame(df)
gr <- makeGRangesFromDataFrame(df, keep.extra.columns=TRUE)
gr2 <- as(df, "GRanges") # equivalent to the above
stopifnot(identical(gr, gr2))
gr2 <- GRanges(df) # equivalent to the above
stopifnot(identical(gr, gr2))
makeGRangesFromDataFrame(df, ignore.strand=TRUE)
makeGRangesFromDataFrame(df, keep.extra.columns=TRUE,
ignore.strand=TRUE)
makeGRangesFromDataFrame(df, seqinfo=paste0("chr", 4:1))
makeGRangesFromDataFrame(df, seqinfo=c(chrM=NA, chr1=500, chrX=100))
makeGRangesFromDataFrame(df, seqinfo=Seqinfo(paste0("chr", 4:1)))
## ---------------------------------------------------------------------
## ABOUT AUTOMATIC DETECTION OF THE seqnames/start/end/strand COLUMNS
## ---------------------------------------------------------------------
## Automatic detection of the seqnames/start/end/strand columns is
## case insensitive:
df <- data.frame(ChRoM="chr1", StarT=11:15, stoP=12:16,
STRAND=c("+","-","+","*","."), score=1:5)
makeGRangesFromDataFrame(df)
## It also ignores a common prefix between the start and end columns:
df <- data.frame(seqnames="chr1", tx_start=11:15, tx_end=12:16,
strand=c("+","-","+","*","."), score=1:5)
makeGRangesFromDataFrame(df)
## The common prefix between the start and end columns is used to
## disambiguate between more than one seqnames column:
df <- data.frame(chrom="chr1", tx_start=11:15, tx_end=12:16,
tx_chr="chr2", score=1:5)
makeGRangesFromDataFrame(df)
## ---------------------------------------------------------------------
## 0-BASED VS 1-BASED START POSITIONS
## ---------------------------------------------------------------------
if (require(rtracklayer)) {
session <- browserSession()
genome(session) <- "sacCer2"
query <- ucscTableQuery(session, "Most Conserved")
df <- getTable(query)
## A common pitfall is to forget that the UCSC Table Browser uses the
## "0-based start" convention:
gr0 <- makeGRangesFromDataFrame(df, keep.extra.columns=TRUE)
head(gr0)
min(start(gr0))
## The start positions need to be converted into 1-based positions,
## to adhere to the convention used in Bioconductor:
gr1 <- makeGRangesFromDataFrame(df, keep.extra.columns=TRUE,
starts.in.df.are.0based=TRUE)
head(gr1)
}
Run the code above in your browser using DataLab