library(tidytext)
library(tibble)
library(dplyr)
library(ggplot2)
library(forcats)
# Analyzing Facebook IPO text
facebook <- tibble(text = ipo$facebook, company = "Facebook")
facebook %>%
unnest_tokens(word, text) %>%
anti_join(stop_words) %>%
count(word, sort = TRUE) %>%
slice_head(n = 20) %>%
ggplot(aes(y = fct_reorder(word, n), x = n, fill = n)) +
geom_col() +
labs(
title = "Top 20 most common words in Facebook IPO",
x = "Frequency",
y = "Word"
)
# Comparisons to Google and LinkedIn IPO texts
google <- tibble(text = ipo$google, company = "Google")
linkedin <- tibble(text = ipo$linkedin, company = "LinkedIn")
ipo_texts <- bind_rows(facebook, google, linkedin)
ipo_texts %>%
unnest_tokens(word, text) %>%
count(company, word, sort = TRUE) %>%
bind_tf_idf(word, company, n) %>%
arrange(desc(tf_idf)) %>%
group_by(company) %>%
slice_max(tf_idf, n = 15) %>%
ungroup() %>%
ggplot(aes(tf_idf, fct_reorder(word, tf_idf), fill = company)) +
geom_col(show.legend = FALSE) +
facet_wrap(~company, ncol = 3, scales = "free") +
labs(x = "tf-idf", y = NULL)
Run the code above in your browser using DataLab