# NOT RUN {
df <- createDataFrame(iris)
df1 <- dapply(df, function(x) { x }, schema(df))
collect(df1)
# filter and add a column
df <- createDataFrame(
list(list(1L, 1, "1"), list(2L, 2, "2"), list(3L, 3, "3")),
c("a", "b", "c"))
schema <- structType(structField("a", "integer"), structField("b", "double"),
structField("c", "string"), structField("d", "integer"))
df1 <- dapply(
df,
function(x) {
y <- x[x[1] > 1, ]
y <- cbind(y, y[1] + 1L)
},
schema)
# The schema also can be specified in a DDL-formatted string.
schema <- "a INT, d DOUBLE, c STRING, d INT"
df1 <- dapply(
df,
function(x) {
y <- x[x[1] > 1, ]
y <- cbind(y, y[1] + 1L)
},
schema)
collect(df1)
# the result
# a b c d
# 1 2 2 2 3
# 2 3 3 3 4
# }
Run the code above in your browser using DataLab