Learn R Programming

SparkR (version 2.3.0)

column_collection_functions: Collection functions for Column operations

Description

Collection functions defined for Column.

Usage

array_contains(x, value)

explode(x)

explode_outer(x)

from_json(x, schema, ...)

map_keys(x)

map_values(x)

posexplode(x)

posexplode_outer(x)

size(x)

sort_array(x, asc = TRUE)

to_json(x, ...)

# S4 method for Column to_json(x, ...)

# S4 method for Column,characterOrstructType from_json(x, schema, as.json.array = FALSE, ...)

# S4 method for Column array_contains(x, value)

# S4 method for Column map_keys(x)

# S4 method for Column map_values(x)

# S4 method for Column explode(x)

# S4 method for Column size(x)

# S4 method for Column sort_array(x, asc = TRUE)

# S4 method for Column posexplode(x)

# S4 method for Column explode_outer(x)

# S4 method for Column posexplode_outer(x)

Arguments

x

Column to compute on. Note the difference in the following methods:

  • to_json: it is the column containing the struct, array of the structs, the map or array of maps.

  • from_json: it is the column containing the JSON string.

value

a value to be checked if contained in the column

schema

a structType object to use as the schema to use when parsing the JSON string. Since Spark 2.3, the DDL-formatted string is also supported for the schema.

...

additional argument(s). In to_json and from_json, this contains additional named properties to control how it is converted, accepts the same options as the JSON data source.

asc

a logical flag indicating the sorting order. TRUE, sorting is in ascending order. FALSE, sorting is in descending order.

as.json.array

indicating if input string is JSON array of objects or a single object.

Details

to_json: Converts a column containing a structType, array of structType, a mapType or array of mapType into a Column of JSON string. Resolving the Column can fail if an unsupported type is encountered.

from_json: Parses a column containing a JSON string into a Column of structType with the specified schema or array of structType if as.json.array is set to TRUE. If the string is unparseable, the Column will contain the value NA.

array_contains: Returns null if the array is null, true if the array contains the value, and false otherwise.

map_keys: Returns an unordered array containing the keys of the map.

map_values: Returns an unordered array containing the values of the map.

explode: Creates a new row for each element in the given array or map column.

size: Returns length of array or map.

sort_array: Sorts the input array in ascending or descending order according to the natural ordering of the array elements.

posexplode: Creates a new row for each element with position in the given array or map column.

explode: Creates a new row for each element in the given array or map column. Unlike explode, if the array/map is null or empty then null is produced.

posexplode_outer: Creates a new row for each element with position in the given array or map column. Unlike posexplode, if the array/map is null or empty then the row (null, null) is produced.

Examples

Run this code
# NOT RUN {
# Dataframe used throughout this doc
df <- createDataFrame(cbind(model = rownames(mtcars), mtcars))
tmp <- mutate(df, v1 = create_array(df$mpg, df$cyl, df$hp))
head(select(tmp, array_contains(tmp$v1, 21), size(tmp$v1)))
tmp2 <- mutate(tmp, v2 = explode(tmp$v1))
head(tmp2)
head(select(tmp, posexplode(tmp$v1)))
head(select(tmp, sort_array(tmp$v1)))
head(select(tmp, sort_array(tmp$v1, asc = FALSE)))
tmp3 <- mutate(df, v3 = create_map(df$model, df$cyl))
head(select(tmp3, map_keys(tmp3$v3)))
head(select(tmp3, map_values(tmp3$v3)))
# }
# NOT RUN {
# }
# NOT RUN {
# Converts a struct into a JSON object
df2 <- sql("SELECT named_struct('date', cast('2000-01-01' as date)) as d")
select(df2, to_json(df2$d, dateFormat = 'dd/MM/yyyy'))

# Converts an array of structs into a JSON array
df2 <- sql("SELECT array(named_struct('name', 'Bob'), named_struct('name', 'Alice')) as people")
df2 <- mutate(df2, people_json = to_json(df2$people))

# Converts a map into a JSON object
df2 <- sql("SELECT map('name', 'Bob')) as people")
df2 <- mutate(df2, people_json = to_json(df2$people))

# Converts an array of maps into a JSON array
df2 <- sql("SELECT array(map('name', 'Bob'), map('name', 'Alice')) as people")
df2 <- mutate(df2, people_json = to_json(df2$people))
# }
# NOT RUN {
# }
# NOT RUN {
df2 <- sql("SELECT named_struct('date', cast('2000-01-01' as date)) as d")
df2 <- mutate(df2, d2 = to_json(df2$d, dateFormat = 'dd/MM/yyyy'))
schema <- structType(structField("date", "string"))
head(select(df2, from_json(df2$d2, schema, dateFormat = 'dd/MM/yyyy')))
df2 <- sql("SELECT named_struct('name', 'Bob') as people")
df2 <- mutate(df2, people_json = to_json(df2$people))
schema <- structType(structField("name", "string"))
head(select(df2, from_json(df2$people_json, schema)))
head(select(df2, from_json(df2$people_json, "name STRING")))
# }
# NOT RUN {
# }
# NOT RUN {
df2 <- createDataFrame(data.frame(
  id = c(1, 2, 3), text = c("a,b,c", NA, "d,e")
))

head(select(df2, df2$id, explode_outer(split_string(df2$text, ","))))
head(select(df2, df2$id, posexplode_outer(split_string(df2$text, ","))))
# }

Run the code above in your browser using DataLab