• 9. Bootstrap script

Overview

Here I provide streamlined versions of R scripts to estimate uncertainty by a bootstrap.

Code for selecting data from a few columns from RILs planted at GC
library(readr)
library(dplyr)
ril_link <- "https://raw.githubusercontent.com/ybrandvain/datasets/refs/heads/master/clarkia_rils.csv"
ril_data <- readr::read_csv(ril_link) 

gc_rils <- ril_data |>
  filter(location == "GC", !is.na(prop_hybrid), ! is.na(mean_visits))|>
  select(petal_color, petal_area_mm, num_hybrid, offspring_genotyped, prop_hybrid, mean_visits , asd_mm )|>
  mutate(id = factor(1:n()))

Functions

The key “workhorse” functions I will use are:

  • The specify() to generate() to calculate() pipeline from the infer package.

  • summarize() from dplyr.

  • sd() from base R. We’ll use this to get the standard deviation of the bootstrap distribution - aka the “bootstrap standard error.”

  • quantile() from base R. We’ll use this to get the upper and lower tails of the bootstrap distirbution to find the “bootstrap confidence interval.”

Reproducible script

Now here it is!

Set up

library(dplyr)
library(infer)
alpha   <- .05
n_boots <- 5000

For mean prop_hybrid

se lower_ci upper_ci
0.023 0.107 0.197
library(dplyr)
library(infer)
gc_rils %>%
    specify(response = prop_hybrid)               |> 
    generate(reps = n_boots, type = "bootstrap")  |> 
    calculate(stat = "mean")|>
    summarise(se = sd(stat),
            lower_ci = quantile(stat, probs = alpha /2,),
            upper_ci = quantile(stat, probs = 1 - alpha /2))

For differences in mean prop_hybrid by petal color

se lower_ci upper_ci
0.042 0.149 0.312
gc_rils                                                          |>
  filter(!is.na(petal_color))                                    |>
  specify(prop_hybrid ~ petal_color)                             |>                      
  generate(reps = n_boots, type = "bootstrap")                   |>                   
  calculate(stat = "diff in means", order = c("pink", "white"))  |>
  summarise(se = sd(stat),
            lower_ci = quantile(stat, probs = alpha /2,),
            upper_ci = quantile(stat, probs = 1 - alpha /2))
# A tibble: 1 × 3
      se lower_ci upper_ci
   <dbl>    <dbl>    <dbl>
1 0.0423    0.147    0.312

For slopes

se lower_ci upper_ci
0.00173 0.00217 0.00896
gc_rils                                           |>
  filter(!is.na(petal_area_mm))                   |>
  specify(prop_hybrid ~ petal_area_mm)            |>  # The linear model we're looking into
  generate(reps = n_boots, type = "bootstrap")    |>  # Bootstrapping
  calculate(stat = "slope")                       |>
  summarise(se = sd(stat),
            lower_ci = quantile(stat, probs = alpha /2,),
            upper_ci = quantile(stat, probs = 1 - alpha /2))
# A tibble: 1 × 3
       se lower_ci upper_ci
    <dbl>    <dbl>    <dbl>
1 0.00178  0.00201  0.00900