if(interactive()){
# initialize connection to Dallas database in Aster
conn = odbcDriverConnect(connection="driver={Aster ODBC Driver};
server=<dbhost>;port=2406;database=<dbname>;uid=<user>;pwd=<pw>")
stopwords = c("a", "an", "the", "with")
# 2-gram tf-idf on offense table
daypart_tfidf_2gram = computeTfIdf(conn, "public.dallaspoliceall",
docId="extract('hour' from offensestarttime)::int/6",
textColumns=c('offensedescription','offensenarrative'),
parser=nGram(2, delimiter='[ \\t\\b\\f\\r:\"]+'),
stopwords=stopwords)
toRace <- function(ch) {
switch(as.character(ch),
"M" = "Male",
"F" = "Female",
"0" = "Night",
"1" = "Morning",
"2" = "Day",
"3" = "Evening",
"C" = "C",
"Unknown")
}
createDallasWordcloud <- function(tf_df, metric, slice, n, maxWords=25, size=750) {
words=with(tf_df$rs, tf_df$rs[docid==slice,])
## palette
pal = rev(brewer.pal(8, "Set1"))[c(-3,-1)]
createWordcloud(words$term, words[, metric], maxWords=maxWords, scale=c(4, 0.5), palette=pal,
title=paste("Top ", metric, "Offense", n, "- grams for", toRace(race)),
file=paste0('wordclouds/',metric,'_offense_',n,'gram_',toRace(slice),'.png'),
width=size, height=size)
}
createDallasWordcloud(daypart_tfidf_2gram, 'tf_idf', 0, n=2, maxWords=200, size=1300)
}
Run the code above in your browser using DataLab