end_mark: Sentence End Marks

Description

end_mark - Grab the sentence end marks for a transcript. This can be useful to categorize based on sentence type. end_mark_by - Grab the sentence end marks for a transcript by grouping variable(s).

Usage

end_mark(text.var, missing.end.mark = "_", missing.text = NA,
  other.endmarks = NULL)

end_mark_by(text.var, grouping.var, digits = 3, percent = FALSE,
  zero.replace = 0, ...)

Arguments

text.var

The text variable.

missing.end.mark

A value to use for sentences with missing endmarks.

missing.text

A value to use for sentences with missing (NA) text.

other.endmarks

Other 1-2 character endmarks to search for.

grouping.var

The grouping variables. Default NULL generates one word list for all text. Also takes a single grouping variable or a list of 1 or more grouping variables.

digits

Integer; number of decimal places to round when printing.

percent

logical. If TRUE output given as percent. If FALSE the output is proportion.

zero.replace

Value to replace 0 values with.

...

Other arguments passed to end_mark.

Value

Returns a character vector of qdap end marks for each sentence. End marks include:
"."Declarative sentence.
"?"Question sentence.
"!"Exclamatory sentence.
"|"Incomplete sentence.
"*."Imperative-declarative sentence.
"*?"Imperative-question sentence (unlikely to occur)
"*!"Imperative-exclamatory sentence.
"*|"Imperative-incomplete sentence.
"no.em"No end mark.
"blank"Empty cell/NA.

Examples

Run this code

end_mark(DATA.SPLIT$state)
end_mark(mraja1spl$dialogue)
table(end_mark(mraja1spl$dialogue))
plot(end_mark(mraja1spl$dialogue))
ques <- mraja1spl[end_mark(mraja1spl$dialogue) == "?", ] #grab questions
htruncdf(ques)
non.ques <- mraja1spl[end_mark(mraja1spl$dialogue) != "?", ] #non questions
htruncdf(non.ques, 20)
ques.per <- mraja1spl[end_mark(mraja1spl$dialogue) %in% c(".", "?"), ] #grab ? and .
htruncdf(ques.per, 20)

(x_by <- end_mark_by(DATA.SPLIT$state, DATA.SPLIT$person))
scores(x_by)
counts(x_by)
proportions(x_by)
preprocessed(x_by)
plot(scores(x_by))
plot(counts(x_by))
plot(proportions(x_by))
plot(preprocessed(x_by))

#================================#
## End Marks Over Time Examples ##
#================================#
##EXAMPLE 1
sentpres <- lapply(with(pres_debates2012, split(dialogue, time)), function(x) {
    end_mark(x)
})

sentplots <- lapply(seq_along(sentpres), function(i) {
    m <- plot(cumulative(sentpres[[i]]))
    if (i != 2) m <- m + ylab("")
    if (i != 3) m <- m + xlab(NULL)
    m + ggtitle(paste("Debate", i))
})

library(grid)
library(gridExtra)
do.call(grid.arrange, sentplots)

##EXAMPLE 2
sentraj <- lapply(with(rajSPLIT, split(dialogue, act)), function(x) {
    end_mark(x)
})

sentplots2 <- lapply(seq_along(sentraj), function(i) {
    m <- plot(cumulative(sentraj[[i]]))
    if (i != 2) m <- m + ylab("")
    if (i != 3) m <- m + xlab(NULL)
    act <- qcv(I, II, III, IV, V)
    m + ggtitle(paste("Act", act[i]))
})

## ggplot2 function to extract legend
g_legend <- function(a.gplot){
    tmp <- ggplot_gtable(ggplot_build(a.gplot))
    leg <- which(sapply(tmp[["grobs"]], function(x) x[["name"]]) == "guide-box")
    legend <- tmp[["grobs"]][[leg]]
    legend
}

## remove legends from plots
sentplots3 <- lapply(sentplots2, function(x){
    x + theme(legend.position="none") + xlab(NULL) + ylab(NULL)
})

sentplots3[[6]] <- g_legend(sentplots2[[1]])

do.call(grid.arrange, sentplots3)

Run the code above in your browser using DataLab