10折交叉验证

Untitled

library(modelr)

cv10 = crossv_kfold(women, 10)
cv10

cv10 %>%
  mutate(models = map(train, ~ lm(weight ~ height, data = .x)),
         r2 = map2_dbl(models, test, mse))

Bootstrap抽样法

bootstrap求均值置信区间

df = tibble(height = c(167,155,166,161,168,163,179,164,178,156, 161,163,168,163,163,169,162,174,172,172))

library(infer)
boot_means = df %>%
  specify(response = height) %>% 
  generate(reps = 1000, type = "bootstrap") %>% # 1000 次 bootstrap
  calculate(stat = "mean") # 计算统计量: 样本均值
boot_means

mean(boot_means$stat) - 1.96*sd(boot_means$stat)
mean(boot_means$stat) + 1.96*sd(boot_means$stat)

boot_ci = boot_means %>%
  get_confidence_interval(point_estimate = mean(boot_means$stat), level = 0.95, type = "se")
boot_ci

visualize(boot_means) +
  shade_ci(endpoints = boot_ci)

bootstrap求回归系数置信区间

mod = lm(height ~ weight, data = women)
summary(mod)
confint(mod)

boot_lm = women %>% 
  specify(height ~ weight) %>% 
  generate(reps = 1000, type = "bootstrap") %>% 
  calculate(stat = "slope")

mean(boot_lm$stat)
mean(boot_lm$stat) - 1.96 * sd(boot_lm$stat)
mean(boot_lm$stat) + 1.96 * sd(boot_lm$stat)

boot_ci = boot_lm %>%
  get_confidence_interval(point_estimate = mean(boot_lm$stat), level = 0.95, type = "se")
boot_ci

visualize(boot_lm) +
  shade_ci(endpoints = boot_ci)

实验设计

一般的实验设计,都包括下面的下面的步骤