M = matrix(c(0,0, 1,2, 2,0, 3,2, 4,0), ncol=2, byrow=TRUE)
X = matrix(c(0,0, -2,1, 0,2, -2,3, 0,4), ncol=2, byrow=TRUE)
rownames(M) = rownames(X) = cc("A, B, C, D, E") # words
colnames(M) = colnames(X) = cc("dim1, dim2") # dimensions
ggplot() +
geom_path(data=as.data.frame(M), aes(x=dim1, y=dim2),
color="red") +
geom_path(data=as.data.frame(X), aes(x=dim1, y=dim2),
color="blue") +
coord_equal()
# Usage 1: input two matrices (can be `embed` objects)
XR = orth_procrustes(M, X)
XR # aligned with M
ggplot() +
geom_path(data=as.data.frame(XR), aes(x=dim1, y=dim2)) +
coord_equal()
# Usage 2: input two `wordvec` objects
M.wv = as_wordvec(M)
X.wv = as_wordvec(X)
XR.wv = orth_procrustes(M.wv, X.wv)
XR.wv # aligned with M.wv
# M and X must have the same set and order of words
# and the same number of word vector dimensions.
# The function extracts only the intersection of words
# and sorts them in the same order according to M.
Y = rbind(X, X[rev(rownames(X)),])
rownames(Y)[1:5] = cc("F, G, H, I, J")
M.wv = as_wordvec(M)
Y.wv = as_wordvec(Y)
M.wv # words: A, B, C, D, E
Y.wv # words: F, G, H, I, J, E, D, C, B, A
YR.wv = orth_procrustes(M.wv, Y.wv)
YR.wv # aligned with M.wv, with the same order of words
Run the code above in your browser using DataLab