Learn R Programming

Matrix.utils (version 0.9.8)

merge.Matrix: Merges two Matrices or matrix-like objects

Description

Implementation of merge for Matrix. By explicitly calling merge.Matrix it will also work for matrix, for data.frame, and vector objects as a much faster alternative to the built-in merge.

Usage

# S3 method for Matrix
merge(
  x,
  y,
  by.x,
  by.y,
  all.x = TRUE,
  all.y = TRUE,
  out.class = class(x)[1],
  fill.x = ifelse(is(x, "sparseMatrix"), FALSE, NA),
  fill.y = fill.x,
  ...
)

join.Matrix( x, y, by.x, by.y, all.x = TRUE, all.y = TRUE, out.class = class(x)[1], fill.x = ifelse(is(x, "sparseMatrix"), FALSE, NA), fill.y = fill.x, ... )

Arguments

x, y

Matrix or matrix-like object

by.x

vector indicating the names to match from Matrix x

by.y

vector indicating the names to match from Matrix y

all.x

logical; if TRUE, then each value in x will be included even if it has no matching values in y

all.y

logical; if TRUE, then each value in y will be included even if it has no matching values in x

out.class

the class of the output object. Defaults to the class of x. Note that some output classes are not possible due to R coercion capabilities, such as converting a character matrix to a Matrix.

fill.x, fill.y

the value to put in merged columns where there is no match. Defaults to 0/FALSE for sparse matrices in order to preserve sparsity, NA for all other classes

...

arguments to be passed to or from methods. Currently ignored

Details

#' all.x/all.y correspond to the four types of database joins in the following way:

left

all.x=TRUE, all.y=FALSE

right

all.x=FALSE, all.y=TRUE

inner

all.x=FALSE, all.y=FALSE

full

all.x=TRUE, all.y=TRUE

Note that NA values will match other NA values.

Examples

Run this code
# NOT RUN {
orders<-Matrix(as.matrix(data.frame(orderNum=1:1000, 
 customer=sample(100,1000,TRUE)))) 
 cancelledOrders<-Matrix(as.matrix(data.frame(orderNum=sample(1000,100), 
 cancelled=1))) 
skus<-Matrix(as.matrix(data.frame(orderNum=sample(1000,10000,TRUE), 
sku=sample(1000,10000,TRUE), amount=runif(10000)))) 
a<-merge(orders,cancelledOrders,orders[,'orderNum'],cancelledOrders[,'orderNum'])
b<-merge(orders,cancelledOrders,orders[,'orderNum'],cancelledOrders[,'orderNum'],all.x=FALSE)
c<-merge(orders,skus,orders[,'orderNum'],skus[,'orderNum'])

#The above Matrices could be converted to matrices or data.frames and handled in other methods.  
#However, this is not possible in the sparse case, which can be handled by this function:
sm<-cbind2(1:200000,rsparsematrix(200000,10000,density=.0001))
sm2<-cbind2(sample(1:200000,50000,TRUE),rsparsematrix(200000,10,density=.01))
sm3<-merge.Matrix(sm,sm2,by.x=sm[,1],by.y=sm2[,1])

 
# }
# NOT RUN {
#merge.Matrix can also handle many other data types, such as data frames, and is generally fast.
orders<-data.frame(orderNum=as.character(sample(1e5, 1e6, TRUE)),
   sku=sample(1e3, 1e6, TRUE),
   customer=sample(1e4,1e6,TRUE),stringsAsFactors=FALSE)
cancelledOrders<-data.frame(orderNum=as.character(sample(1e5,1e4)),
   cancelled=1,stringsAsFactors=FALSE)
system.time(a<-merge.Matrix(orders,cancelledOrders,orders[,'orderNum'],
   cancelledOrders[,'orderNum']))
system.time(b<-merge.data.frame(orders,cancelledOrders,all.x = TRUE,all.y=TRUE))
system.time(c<-dplyr::full_join(orders,cancelledOrders))
system.time({require(data.table);
d<-merge(data.table(orders),data.table(cancelledOrders),
   by='orderNum',all=TRUE,allow.cartesian=TRUE)})

orders<-data.frame(orderNum=sample(1e5, 1e6, TRUE), sku=sample(1e3, 1e6,
TRUE), customer=sample(1e4,1e6,TRUE),stringsAsFactors=FALSE) 
cancelledOrders<-data.frame(orderNum=sample(1e5,1e4),cancelled=1,stringsAsFactors=FALSE)
system.time(b<-merge.Matrix(orders,cancelledOrders,orders[,'orderNum'], 
cancelledOrders[,'orderNum'])) 
system.time(e<-dplyr::full_join(orders,cancelledOrders)) 
system.time({require(data.table);
 d<-merge(data.table(orders),data.table(cancelledOrders),
 by='orderNum',all=TRUE,allow.cartesian=TRUE)})

#In certain cases, merge.Matrix can be much faster than alternatives. 
one<-as.character(1:1000000) 
two<-as.character(sample(1:1000000,1e5,TRUE)) 
system.time(b<-merge.Matrix(one,two,one,two)) 
system.time(c<-dplyr::full_join(data.frame(key=one),data.frame(key=two))) 
system.time({require(data.table);
 d<-merge(data.table(data.frame(key=one)),data.table(data.frame(key=two)),
 by='key',all=TRUE,allow.cartesian=TRUE)})
# }
# NOT RUN {
# }

Run the code above in your browser using DataLab