• 9. Bootstrap script

Overview

Here I provide streamlined versions of R scripts to estimate uncertainty by a bootstrap.

Code for selecting data from a few columns from RILs planted at GC

library(readr)
library(dplyr)
ril_link <- "https://raw.githubusercontent.com/ybrandvain/datasets/refs/heads/master/clarkia_rils.csv"
ril_data <- readr::read_csv(ril_link) 

gc_rils <- ril_data |>
  filter(location == "GC", !is.na(prop_hybrid), ! is.na(mean_visits))|>
  select(petal_color, petal_area_mm, num_hybrid, offspring_genotyped, prop_hybrid, mean_visits , asd_mm )|>
  mutate(id = factor(1:n()))

Functions

The key “workhorse” functions I will use are:

The specify() to generate() to calculate() pipeline from the infer package.
summarize() from dplyr.
sd() from base R. We’ll use this to get the standard deviation of the bootstrap distribution - aka the “bootstrap standard error.”
quantile() from base R. We’ll use this to get the upper and lower tails of the bootstrap distirbution to find the “bootstrap confidence interval.”

Reproducible script

Now here it is!

Set up

library(dplyr)
library(infer)
alpha   <- .05
n_boots <- 5000

For mean prop_hybrid

se	lower_ci	upper_ci
0.023	0.107	0.197

library(dplyr)
library(infer)
gc_rils %>%
    specify(response = prop_hybrid)               |> 
    generate(reps = n_boots, type = "bootstrap")  |> 
    calculate(stat = "mean")|>
    summarise(se = sd(stat),
            lower_ci = quantile(stat, probs = alpha /2,),
            upper_ci = quantile(stat, probs = 1 - alpha /2))

For differences in mean prop_hybrid by petal color

se	lower_ci	upper_ci
0.042	0.149	0.312

gc_rils                                                          |>
  filter(!is.na(petal_color))                                    |>
  specify(prop_hybrid ~ petal_color)                             |>                      
  generate(reps = n_boots, type = "bootstrap")                   |>                   
  calculate(stat = "diff in means", order = c("pink", "white"))  |>
  summarise(se = sd(stat),
            lower_ci = quantile(stat, probs = alpha /2,),
            upper_ci = quantile(stat, probs = 1 - alpha /2))

# A tibble: 1 × 3
      se lower_ci upper_ci
   <dbl>    <dbl>    <dbl>
1 0.0423    0.147    0.312

For slopes

se	lower_ci	upper_ci
0.00173	0.00217	0.00896

gc_rils                                           |>
  filter(!is.na(petal_area_mm))                   |>
  specify(prop_hybrid ~ petal_area_mm)            |>  # The linear model we're looking into
  generate(reps = n_boots, type = "bootstrap")    |>  # Bootstrapping
  calculate(stat = "slope")                       |>
  summarise(se = sd(stat),
            lower_ci = quantile(stat, probs = alpha /2,),
            upper_ci = quantile(stat, probs = 1 - alpha /2))

# A tibble: 1 × 3
       se lower_ci upper_ci
    <dbl>    <dbl>    <dbl>
1 0.00178  0.00201  0.00900