
library(modelr)
cv10 = crossv_kfold(women, 10)
cv10
cv10 %>%
mutate(models = map(train, ~ lm(weight ~ height, data = .x)),
r2 = map2_dbl(models, test, mse))
bootstrap求均值置信区间
df = tibble(height = c(167,155,166,161,168,163,179,164,178,156, 161,163,168,163,163,169,162,174,172,172))
library(infer)
boot_means = df %>%
specify(response = height) %>%
generate(reps = 1000, type = "bootstrap") %>% # 1000 次 bootstrap
calculate(stat = "mean") # 计算统计量: 样本均值
boot_means
mean(boot_means$stat) - 1.96*sd(boot_means$stat)
mean(boot_means$stat) + 1.96*sd(boot_means$stat)
boot_ci = boot_means %>%
get_confidence_interval(point_estimate = mean(boot_means$stat), level = 0.95, type = "se")
boot_ci
visualize(boot_means) +
shade_ci(endpoints = boot_ci)
bootstrap求回归系数置信区间
mod = lm(height ~ weight, data = women)
summary(mod)
confint(mod)
boot_lm = women %>%
specify(height ~ weight) %>%
generate(reps = 1000, type = "bootstrap") %>%
calculate(stat = "slope")
mean(boot_lm$stat)
mean(boot_lm$stat) - 1.96 * sd(boot_lm$stat)
mean(boot_lm$stat) + 1.96 * sd(boot_lm$stat)
boot_ci = boot_lm %>%
get_confidence_interval(point_estimate = mean(boot_lm$stat), level = 0.95, type = "se")
boot_ci
visualize(boot_lm) +
shade_ci(endpoints = boot_ci)
一般的实验设计,都包括下面的下面的步骤