# NOT RUN {
# }
# NOT RUN {
library(tm) # tm package needs to be installed
data(crude) # news messages on crude oil from Reuters corpus
cat(as.character(crude[[1]]), "\n") # a text example
corpus <- tm_map(crude, stripWhitespace) # some pre-processing
corpus <- tm_map(corpus, content_transformer(tolower))
corpus <- tm_map(corpus, removePunctuation)
corpus <- tm_map(corpus, removeWords, stopwords("english"))
cat(as.character(corpus[[1]]), "\n") # pre-processed text
dtm <- DocumentTermMatrix(corpus) # document-term matrix
inspect(dtm[1:5, 90:99]) # rows = documents
wordspace_dtm <- as.dsm(dtm, verbose=TRUE) # convert to DSM
print(wordspace_dtm$S[1:5, 90:99]) # same part of dtm as above
wordspace_tdm <- t(wordspace_dtm) # convert to term-document matrix
print(wordspace_tdm)
# }
# NOT RUN {
# }
Run the code above in your browser using DataLab