# NOT RUN {
## All Citations
x <- c("Hello World (V. Raptor, 1986) bye",
"Narcissism is not dead (Rinker, 2014)",
"The R Core Team (2014) has many members.",
paste("Bunn (2005) said, \"As for elegance, R is refined, tasteful, and",
"beautiful. When I grow up, I want to marry R.\""),
"It is wrong to blame ANY tool for our own shortcomings (Baer, 2005).",
"Wickham's (in press) Tidy Data should be out soon.",
"Rinker's (n.d.) dissertation not so much.",
"I always consult xkcd comics for guidance (Foo, 2012; Bar, 2014).",
"Uwe Ligges (2007) says, \"RAM is cheap and thinking hurts\""
)
rm_citation(x)
ex_citation(x)
as_count(ex_citation(x))
rm_citation(x, replacement="[CITATION HERE]")
# }
# NOT RUN {
qdapTools::vect2df(sort(table(unlist(rm_citation(x, extract=TRUE)))),
"citation", "count")
# }
# NOT RUN {
## In-Text
ex_citation(x, pattern="@rm_citation2")
## Parenthetical
ex_citation(x, pattern="@rm_citation3")
# }
# NOT RUN {
## Mining Citation
if (!require("pacman")) install.packages("pacman")
pacman::p_load(qdap, qdapTools, dplyr, ggplot2)
url_dl("http://umlreading.weebly.com/uploads/2/5/2/5/25253346/whole_language_timeline-updated.docx")
parts <- read_docx("whole_language_timeline-updated.docx") %>%
rm_non_ascii() %>%
split_vector(split = "References", include = TRUE, regex=TRUE)
parts[[1]]
parts[[1]] %>%
unbag() %>%
ex_citation() %>%
c()
## Counts
parts[[1]] %>%
unbag() %>%
ex_citation() %>%
as_count()
## By line
ex_citation(parts[[1]])
## Frequency
cites <- parts[[1]] %>%
unbag() %>%
ex_citation() %>%
c() %>%
data_frame(citation=.) %>%
count(citation) %>%
arrange(n) %>%
mutate(citation=factor(citation, levels=citation))
## Distribution of citations (find locations and then plot)
cite_locs <- do.call(rbind, lapply(cites[[1]], function(x){
m <- gregexpr(x, unbag(parts[[1]]), fixed=TRUE)
data.frame(
citation=x,
start = m[[1]] -5,
end = m[[1]] + 5 + attributes(m[[1]])[["match.length"]]
)
}))
ggplot(cite_locs) +
geom_segment(aes(x=start, xend=end, y=citation, yend=citation), size=3,
color="yellow") +
xlab("Duration") +
scale_x_continuous(expand = c(0,0),
limits = c(0, nchar(unbag(parts[[1]])) + 25)) +
theme_grey() +
theme(
panel.grid.major=element_line(color="grey20"),
panel.grid.minor=element_line(color="grey20"),
plot.background = element_rect(fill="black"),
panel.background = element_rect(fill="black"),
panel.border = element_rect(colour = "grey50", fill=NA, size=1),
axis.text=element_text(color="grey50"),
axis.title=element_text(color="grey50")
)
# }
Run the code above in your browser using DataLab