# NOT RUN {
# Dataframe used throughout this doc
df <- createDataFrame(cbind(model = rownames(mtcars), mtcars))
tmp <- mutate(df, v1 = create_array(df$mpg, df$cyl, df$hp))
head(select(tmp, array_contains(tmp$v1, 21), size(tmp$v1), shuffle(tmp$v1)))
head(select(tmp, array_max(tmp$v1), array_min(tmp$v1), array_distinct(tmp$v1)))
head(select(tmp, array_position(tmp$v1, 21), array_repeat(df$mpg, 3), array_sort(tmp$v1)))
head(select(tmp, reverse(tmp$v1), array_remove(tmp$v1, 21)))
head(select(tmp, array_transform("v1", function(x) x * 10)))
head(select(tmp, array_exists("v1", function(x) x > 120)))
head(select(tmp, array_forall("v1", function(x) x >= 8.0)))
head(select(tmp, array_filter("v1", function(x) x < 10)))
head(select(tmp, array_aggregate("v1", lit(0), function(acc, y) acc + y)))
head(select(
tmp,
array_aggregate("v1", lit(0), function(acc, y) acc + y, function(acc) acc / 10)))
tmp2 <- mutate(tmp, v2 = explode(tmp$v1))
head(tmp2)
head(select(tmp, posexplode(tmp$v1)))
head(select(tmp, slice(tmp$v1, 2L, 2L)))
head(select(tmp, sort_array(tmp$v1)))
head(select(tmp, sort_array(tmp$v1, asc = FALSE)))
tmp3 <- mutate(df, v3 = create_map(df$model, df$cyl))
head(select(tmp3, map_entries(tmp3$v3), map_keys(tmp3$v3), map_values(tmp3$v3)))
head(select(tmp3, element_at(tmp3$v3, "Valiant"), map_concat(tmp3$v3, tmp3$v3)))
head(select(tmp3, transform_keys("v3", function(k, v) upper(k))))
head(select(tmp3, transform_values("v3", function(k, v) v * 10)))
head(select(tmp3, map_filter("v3", function(k, v) v < 42)))
tmp4 <- mutate(df, v4 = create_array(df$mpg, df$cyl), v5 = create_array(df$cyl, df$hp))
head(select(tmp4, concat(tmp4$v4, tmp4$v5), arrays_overlap(tmp4$v4, tmp4$v5)))
head(select(tmp4, array_except(tmp4$v4, tmp4$v5), array_intersect(tmp4$v4, tmp4$v5)))
head(select(tmp4, array_union(tmp4$v4, tmp4$v5)))
head(select(tmp4, arrays_zip(tmp4$v4, tmp4$v5)))
head(select(tmp, concat(df$mpg, df$cyl, df$hp)))
head(select(tmp4, arrays_zip_with(tmp4$v4, tmp4$v5, function(x, y) x * y)))
tmp5 <- mutate(df, v6 = create_array(df$model, df$model))
head(select(tmp5, array_join(tmp5$v6, "#"), array_join(tmp5$v6, "#", "NULL")))
tmp6 <- mutate(df, v7 = create_array(create_array(df$model, df$model)))
head(select(tmp6, flatten(tmp6$v7)))
tmp7 <- mutate(df, v8 = create_array(df$model, df$cyl), v9 = create_array(df$model, df$hp))
head(select(tmp7, arrays_zip_with("v8", "v9", function(x, y) (x * y) %% 3)))
head(select(tmp7, map_from_arrays(tmp7$v8, tmp7$v9)))
tmp8 <- mutate(df, v10 = create_array(struct(df$model, df$cyl)))
head(select(tmp8, map_from_entries(tmp8$v10)))
# }
# NOT RUN {
# }
# NOT RUN {
# Converts a struct into a JSON object
df2 <- sql("SELECT named_struct('date', cast('2000-01-01' as date)) as d")
select(df2, to_json(df2$d, dateFormat = 'dd/MM/yyyy'))
# Converts an array of structs into a JSON array
df2 <- sql("SELECT array(named_struct('name', 'Bob'), named_struct('name', 'Alice')) as people")
df2 <- mutate(df2, people_json = to_json(df2$people))
# Converts a map into a JSON object
df2 <- sql("SELECT map('name', 'Bob') as people")
df2 <- mutate(df2, people_json = to_json(df2$people))
# Converts an array of maps into a JSON array
df2 <- sql("SELECT array(map('name', 'Bob'), map('name', 'Alice')) as people")
df2 <- mutate(df2, people_json = to_json(df2$people))
# Converts a map into a pretty JSON object
df2 <- sql("SELECT map('name', 'Bob') as people")
df2 <- mutate(df2, people_json = to_json(df2$people, pretty = TRUE))
# }
# NOT RUN {
# }
# NOT RUN {
# Converts a struct into a CSV string
df2 <- sql("SELECT named_struct('date', cast('2000-01-01' as date)) as d")
select(df2, to_csv(df2$d, dateFormat = 'dd/MM/yyyy'))
# }
# NOT RUN {
# }
# NOT RUN {
df2 <- sql("SELECT named_struct('date', cast('2000-01-01' as date)) as d")
df2 <- mutate(df2, d2 = to_json(df2$d, dateFormat = 'dd/MM/yyyy'))
schema <- structType(structField("date", "string"))
head(select(df2, from_json(df2$d2, schema, dateFormat = 'dd/MM/yyyy')))
df2 <- sql("SELECT named_struct('name', 'Bob') as people")
df2 <- mutate(df2, people_json = to_json(df2$people))
schema <- structType(structField("name", "string"))
head(select(df2, from_json(df2$people_json, schema)))
head(select(df2, from_json(df2$people_json, "name STRING")))
head(select(df2, from_json(df2$people_json, schema_of_json(head(df2)$people_json))))
# }
# NOT RUN {
# }
# NOT RUN {
json <- "{\"name\":\"Bob\"}"
df <- sql("SELECT * FROM range(1)")
head(select(df, schema_of_json(json)))
# }
# NOT RUN {
# }
# NOT RUN {
csv <- "Amsterdam,2018"
df <- sql(paste0("SELECT '", csv, "' as csv"))
schema <- "city STRING, year INT"
head(select(df, from_csv(df$csv, schema)))
head(select(df, from_csv(df$csv, structType(schema))))
head(select(df, from_csv(df$csv, schema_of_csv(csv))))
# }
# NOT RUN {
# }
# NOT RUN {
csv <- "Amsterdam,2018"
df <- sql("SELECT * FROM range(1)")
head(select(df, schema_of_csv(csv)))
# }
# NOT RUN {
# }
# NOT RUN {
df2 <- createDataFrame(data.frame(
id = c(1, 2, 3), text = c("a,b,c", NA, "d,e")
))
head(select(df2, df2$id, explode_outer(split_string(df2$text, ","))))
head(select(df2, df2$id, posexplode_outer(split_string(df2$text, ","))))
# }
Run the code above in your browser using DataLab