bwt_df =
read_csv("./birthweight.csv") %>%
janitor::clean_names() %>%
babysex = as.factor(babysex),
babysex = fct_recode(babysex, "male" = "1", "female" = "2"),
frace = as.factor(frace),
frace = fct_recode(frace, "white" = "1", "black" = "2", "asian" = "3",
"puerto rican" = "4", "other" = "8"),
malform = as.logical(malform),
mrace = as.factor(mrace),
mrace = fct_recode(mrace, "white" = "1", "black" = "2", "asian" = "3",
"puerto rican" = "4")) %>%
Inputs fir “glmnet”
x = model.matrix(bwt ~ ., bwt_df)[,-1]#[,-1] no intercept
y = bwt_df$bwt
Fit Lasso!
lambda = 10^(seq(3, -2, -0.1)) # illustare how lasso work
lasso_fit =
glmnet(x, y, lambda = lambda)
lasso_cv =
cv.glmnet(x, y, lambda = lambda) # best lamda
lambda_opt = lasso_cv$lambda.min
This is the plot for lasso
broom::tidy(lasso_fit) %>%
select(term, lambda, estimate) %>%
complete(term, lambda, fill = list(estimate = 0) ) %>%
filter(term != "(Intercept)") %>%
ggplot(aes(x = log(lambda, 10), y = estimate, group = term, color = term)) +
geom_path() +
geom_vline(xintercept = log(lambda_opt, 10), color = "blue", size = 1.2) +
theme(legend.position = "none")
broom::tidy(lasso_cv) %>%
ggplot(aes(x = log(lambda, 10), y = estimate)) +
poke_df =
read_csv("./pokemon.csv") %>%
janitor::clean_names() %>%
select(hp, speed)
poke_df %>%
ggplot(aes(x = hp, y = speed)) +
Run K means
kmeans_fit =
kmeans(x = poke_df, centers = 3) # run cluster and want 3 clusters
poke_df =
broom::augment(kmeans_fit, poke_df) # give me cluster mean, imorted cluste to my original dataset
poke_df %>%
ggplot(aes(x = hp, y = speed, color = .cluster)) +
clusts =
tibble(k = 2:4) %>%
km_fit = map(k, ~kmeans(poke_df, .x)),
augmented = map(km_fit, ~broom::augment(.x, poke_df))
clusts %>%
select(-km_fit) %>%
unnest(augmented) %>%
ggplot(aes(hp, speed, color = .cluster)) +
geom_point(aes(color = .cluster)) +