Chapter7 Demographic
7.1 Merge cluster data with decographic data
demo_cluster <- cbind(microvan[,c(2,33:39)], cluster= EFA_kmeans$cluster) %>%
as.data.frame() %>%
select(cluster, everything())
df = demo_cluster %>%
gather(variable, value, -cluster) %>%
as.data.frame()
DT::datatable(demo_cluster ,
options = list(pageLength = 10)) 7.2 Histogram
# factorize cluster
df$cluster <- factor(df$cluster)
df$variable <- factor(df$variable,
levels = c("mvliking","age",
"income",
"miles",
"educ",
"numkids",
"recycle",
"female"))
# plot
demo_cluster_hist = ggplot(df, aes(value,
fill = cluster,
color = cluster)) +
geom_histogram(alpha = 0.3, position = "identity") +
facet_wrap( ~ variable, scales = "free",ncol = 4) +
theme_economist_white()
demo_cluster_hist_path = file.path(plotDir, "demo_cluster_hist.png")
ggsave(
filename = demo_cluster_hist_path,
plot = demo_cluster_hist,
width = 2400,
height = 1800,
units = "px"
)## `stat_bin()` using `bins = 30`. Pick better value with
## `binwidth`.

7.3 Ridge plot
# ridge plot
demo_cluster_ridge = df %>%
ggplot( aes(y=cluster, x=value, fill=cluster)) +
geom_density_ridges(alpha=0.3, stat="binline", bins=20) +
facet_wrap( ~ variable, scales = "free",ncol = 4) +
xlab("") +
ylab("") +
theme_economist_white()
demo_cluster_ridge_path = file.path(plotDir, "demo_cluster_ridge.png")
ggsave(
filename = demo_cluster_ridge_path,
plot = demo_cluster_ridge,
width = 2400,
height = 1800,
units = "px"
)
7.3.1 Count of Subjects in Clusters
demo_cluster_count = demo_cluster %>%
dplyr::select(cluster)%>%
dplyr::group_by(cluster)%>%
dplyr::mutate(n_subjects =n()) %>%
distinct() %>%
dplyr::arrange(cluster)
# DT table
DT::datatable(demo_cluster_count,rownames = F) 7.3.2 Stat
7.3.2.1 female
library(ggstatsplot)
DF = demo_cluster
DF$cluster <- factor(DF$cluster, levels = c(1,2,3))
DF$female <- factor(DF$female, levels = c(0,1))
custom_colors = c("#B2182B", "white", "#4D4D4D")
png_name = "female"
female_path= file.path(plotDir,paste0(png_name,".png"))
png(
filename = female_path,
width = 400,
height = 600
)
# female
ggbarstats(
data = DF,
#type = "nonparametric",
x = cluster,
y = female,
title = png_name
# ggtheme = ggthemes::theme_tufte(base_size = 12)
) +
scale_color_manual(values = c(custom_colors))
dev.off()## quartz_off_screen
## 2

7.3.2.2 Education
# educ
ggbarstats(
data = DF,
#type = "nonparametric",
x = educ,
y = cluster,title = "educ"
# ggtheme = ggthemes::theme_tufte(base_size = 12)
) +
scale_color_manual(values = c(custom_colors))
#### Recycle
png_name = "female"
recycle_path= file.path(plotDir,paste0(png_name,".png"))
png(
filename = recycle_path,
width = 400,
height = 600
)
#
ggbarstats(
data = DF,
#type = "nonparametric",
y = cluster,
x = recycle,
title = png_name,
# ggtheme = ggthemes::theme_tufte(base_size = 12)
)
dev.off()## quartz_off_screen
## 2
#ggbarstats(
# data = DF,
#type = "nonparametric",
#y = recycle,
#x = cluster,
#title = png_name,
# ggtheme = ggthemes::theme_tufte(base_size = 12)
#)
7.3.3 Kruskal–Wallis H test
kwh_test_educ <- DF %>%
rstatix::kruskal_test(educ ~ cluster) %>%
as.data.frame()
kwh_test_numkids <- DF %>%
rstatix::kruskal_test(numkids~ cluster) %>%
as.data.frame()
kwh_test_mvliking <- DF %>%
rstatix::kruskal_test(mvliking~ cluster) %>%
as.data.frame()
kwh_test_age<- DF %>%
rstatix::kruskal_test(age~ cluster) %>%
as.data.frame()
kwh_test_income <- DF %>%
rstatix::kruskal_test(income~ cluster) %>%
as.data.frame()
kwh_test_miles <- DF %>%
rstatix::kruskal_test(miles~ cluster) %>%
as.data.frame()
kwh_test_female <- DF %>%
rstatix::kruskal_test(female~ cluster) %>%
as.data.frame()
kwh_test_recycle <- DF %>%
rstatix::kruskal_test(recycle~ cluster) %>%
as.data.frame()
kwh_test = dplyr::bind_rows(
kwh_test_educ,
kwh_test_mvliking,
kwh_test_age,
kwh_test_income,
kwh_test_miles,
kwh_test_female,
kwh_test_recycle )
DT::datatable(kwh_test)