# NOT RUN {
# Dataframe used throughout this doc
df <- createDataFrame(cbind(model = rownames(mtcars), mtcars))
tmp <- mutate(df, v1 = create_array(df$mpg, df$cyl, df$hp))
head(select(tmp, array_contains(tmp$v1, 21), size(tmp$v1)))
tmp2 <- mutate(tmp, v2 = explode(tmp$v1))
head(tmp2)
head(select(tmp, posexplode(tmp$v1)))
head(select(tmp, sort_array(tmp$v1)))
head(select(tmp, sort_array(tmp$v1, asc = FALSE)))
tmp3 <- mutate(df, v3 = create_map(df$model, df$cyl))
head(select(tmp3, map_keys(tmp3$v3)))
head(select(tmp3, map_values(tmp3$v3)))
# }
# NOT RUN {
# }
# NOT RUN {
# Converts a struct into a JSON object
df2 <- sql("SELECT named_struct('date', cast('2000-01-01' as date)) as d")
select(df2, to_json(df2$d, dateFormat = 'dd/MM/yyyy'))
# Converts an array of structs into a JSON array
df2 <- sql("SELECT array(named_struct('name', 'Bob'), named_struct('name', 'Alice')) as people")
df2 <- mutate(df2, people_json = to_json(df2$people))
# Converts a map into a JSON object
df2 <- sql("SELECT map('name', 'Bob')) as people")
df2 <- mutate(df2, people_json = to_json(df2$people))
# Converts an array of maps into a JSON array
df2 <- sql("SELECT array(map('name', 'Bob'), map('name', 'Alice')) as people")
df2 <- mutate(df2, people_json = to_json(df2$people))
# }
# NOT RUN {
# }
# NOT RUN {
df2 <- sql("SELECT named_struct('date', cast('2000-01-01' as date)) as d")
df2 <- mutate(df2, d2 = to_json(df2$d, dateFormat = 'dd/MM/yyyy'))
schema <- structType(structField("date", "string"))
head(select(df2, from_json(df2$d2, schema, dateFormat = 'dd/MM/yyyy')))
df2 <- sql("SELECT named_struct('name', 'Bob') as people")
df2 <- mutate(df2, people_json = to_json(df2$people))
schema <- structType(structField("name", "string"))
head(select(df2, from_json(df2$people_json, schema)))
head(select(df2, from_json(df2$people_json, "name STRING")))
# }
# NOT RUN {
# }
# NOT RUN {
df2 <- createDataFrame(data.frame(
id = c(1, 2, 3), text = c("a,b,c", NA, "d,e")
))
head(select(df2, df2$id, explode_outer(split_string(df2$text, ","))))
head(select(df2, df2$id, posexplode_outer(split_string(df2$text, ","))))
# }
Run the code above in your browser using DataLab