# Attach packages
library(rearrr)
library(dplyr)
has_ggplot <- require(ggplot2) # Attach if installed
# Set seed
set.seed(2)
# Create a data frame
df <- data.frame(
"x" = runif(50),
"y" = runif(50),
"z" = runif(50),
"g" = rep(c(1, 2, 3, 4, 5), each = 10)
)
# Move the data points into clusters
cluster_groups(df,
cols = c("x", "y"),
group_col = "g"
)
cluster_groups(df,
cols = c("x", "y"),
group_col = "g",
multiplier = 0.1
)
cluster_groups(df,
cols = c("x"),
group_col = "g",
multiplier = 0.1
)
#
# Plotting clusters
#
# Cluster x and y for each group in g
df_clustered <- cluster_groups(
data = df,
cols = c("x", "y"),
group_col = "g"
)
# Plot the clusters over the original data points
# As we work with random data, the cluster might overlap
if (has_ggplot){
ggplot(
df_clustered,
aes(x = x_clustered, y = y_clustered, color = factor(g))
) +
# Original data
geom_point(aes(x = x, y = y), alpha = 0.3, size = 0.8) +
# Clustered data
geom_point() +
theme_minimal() +
labs(x = "x", y = "y", color = "g")
}
#
# Maintain original group centroids
#
df_clustered <- cluster_groups(
data = df,
cols = c("x", "y"),
group_col = "g",
keep_centroids = TRUE
)
# Plot the clusters over the original data points
# As we work with random data, the cluster might overlap
if (has_ggplot){
ggplot(
df_clustered,
aes(x = x_clustered, y = y_clustered, color = factor(g))
) +
# Original data
geom_point(aes(x = x, y = y), alpha = 0.3, size = 0.8) +
# Clustered data
geom_point() +
theme_minimal() +
labs(x = "x", y = "y", color = "g")
}
#
# Three dimensions
#
# Cluster in 3d
df_clustered <- cluster_groups(
data = df,
cols = c("x", "y", "z"),
group_col = "g"
)
if (FALSE) {
# Plot 3d with plotly
plotly::plot_ly(
x = df_clustered$x_clustered,
y = df_clustered$y_clustered,
z = df_clustered$z_clustered,
type = "scatter3d",
mode = "markers",
color = df_clustered$g
)
}
Run the code above in your browser using DataLab