# NOT RUN {
dts <- c("2005-01-02 18:47:22",
"2005-12-24 16:30:58",
"2005-10-28 07:30:05",
"2005-12-28 07:01:05",
"2006-01-24 00:01:10")
y <- c(2.0, 2.2, 3.4, 2.5, 1.8)
df <- createDataFrame(data.frame(time = as.POSIXct(dts), y = y))
# }
# NOT RUN {
# }
# NOT RUN {
head(select(df, df$time, year(df$time), quarter(df$time), month(df$time),
dayofmonth(df$time), dayofweek(df$time), dayofyear(df$time), weekofyear(df$time)))
head(agg(groupBy(df, year(df$time)), count(df$y), avg(df$y)))
head(agg(groupBy(df, month(df$time)), avg(df$y)))
# }
# NOT RUN {
# }
# NOT RUN {
head(select(df, hour(df$time), minute(df$time), second(df$time)))
head(agg(groupBy(df, dayofmonth(df$time)), avg(df$y)))
head(agg(groupBy(df, hour(df$time)), avg(df$y)))
head(agg(groupBy(df, minute(df$time)), avg(df$y)))
# }
# NOT RUN {
# }
# NOT RUN {
head(select(df, df$time, last_day(df$time), month(df$time)))
# }
# NOT RUN {
# }
# NOT RUN {
tmp <- createDataFrame(data.frame(time_string = dts))
tmp2 <- mutate(tmp, date1 = to_date(tmp$time_string),
date2 = to_date(tmp$time_string, "yyyy-MM-dd"),
date3 = date_format(tmp$time_string, "MM/dd/yyy"),
time1 = to_timestamp(tmp$time_string),
time2 = to_timestamp(tmp$time_string, "yyyy-MM-dd"))
head(tmp2)
# }
# NOT RUN {
# }
# NOT RUN {
tmp <- mutate(df, to_unix = unix_timestamp(df$time),
to_unix2 = unix_timestamp(df$time, 'yyyy-MM-dd HH'),
from_unix = from_unixtime(unix_timestamp(df$time)),
from_unix2 = from_unixtime(unix_timestamp(df$time), 'yyyy-MM-dd HH:mm'))
head(tmp)
# }
# NOT RUN {
# }
# NOT RUN {
# One minute windows every 15 seconds 10 seconds after the minute, e.g. 09:00:10-09:01:10,
# 09:00:25-09:01:25, 09:00:40-09:01:40, ...
window(df$time, "1 minute", "15 seconds", "10 seconds")
# One minute tumbling windows 15 seconds after the minute, e.g. 09:00:15-09:01:15,
# 09:01:15-09:02:15...
window(df$time, "1 minute", startTime = "15 seconds")
# Thirty-second windows every 10 seconds, e.g. 09:00:00-09:00:30, 09:00:10-09:00:40, ...
window(df$time, "30 seconds", "10 seconds")
# }
# NOT RUN {
# }
# NOT RUN {
head(select(df, df$time, trunc(df$time, "year"), trunc(df$time, "yy"),
trunc(df$time, "month"), trunc(df$time, "mon")))
# }
# NOT RUN {
# }
# NOT RUN {
head(select(df, df$time, date_trunc("hour", df$time), date_trunc("minute", df$time),
date_trunc("week", df$time), date_trunc("quarter", df$time)))
# }
# NOT RUN {
head(select(df, current_date(), current_timestamp()))
# }
Run the code above in your browser using DataLab