README

A consistent API for hypothesis testing in R, designed around principles from Structure and Interpretation of Computer Programs (SICP):

Installation

install.packages("hypothesize")

# install.packages("devtools")
devtools::install_github("queelius/hypothesize")

The Interface

# Create a Wald test
w <- wald_test(estimate = 2.5, se = 0.8, null_value = 0)

pval(w)                    # p-value
#> [1] 0.00178
test_stat(w)               # test statistic
#> [1] 9.77
dof(w)                     # degrees of freedom
#> [1] 1
is_significant_at(w, 0.05) # significance check
#> [1] TRUE

Primitive Tests

Z-Test (simplest case)

# Test if population mean equals 100 (known sigma = 15)
set.seed(42)
x <- rnorm(30, mean = 105, sd = 15)
z_test(x, mu0 = 100, sigma = 15)
#> Hypothesis test (z_test)
#> -----------------------------
#> Test statistic: 2.20140742409738
#> P-value: 0.0277071938999057
#> Degrees of freedom: Inf
#> Significant at 5% level: TRUE

Wald Test (general parameters)

# Test if a regression coefficient equals zero
wald_test(estimate = 1.8, se = 0.7)
#> Hypothesis test (wald_test)
#> -----------------------------
#> Test statistic: 6.61224489795918
#> P-value: 0.0101279905493907
#> Degrees of freedom: 1
#> Significant at 5% level: TRUE

Likelihood Ratio Test (model comparison)

# From raw log-likelihoods
lrt(null_loglik = -150, alt_loglik = -140, dof = 3)
#> Hypothesis test (likelihood_ratio_test)
#> -----------------------------
#> Test statistic: 20
#> P-value: 0.000169742435552826
#> Degrees of freedom: 3
#> Significant at 5% level: TRUE

# Or from fitted models -- dof derived automatically
set.seed(42)
x <- 1:50; y <- 2 + 3 * x + rnorm(50, sd = 5)
lrt(logLik(lm(y ~ 1)), logLik(lm(y ~ x)))
#> Hypothesis test (likelihood_ratio_test)
#> -----------------------------
#> Test statistic: 203.015531891017
#> P-value: 4.58985868918993e-46
#> Degrees of freedom: 1
#> Significant at 5% level: TRUE

Combining Tests (Closure Property)

Fisher’s method combines independent p-values—and returns a hypothesis test:

# Three studies, none individually significant
fisher_combine(0.08, 0.12, 0.06)
#> Hypothesis test (fisher_combined_test)
#> -----------------------------
#> Test statistic: 14.9188057945368
#> P-value: 0.0208977112449541
#> Degrees of freedom: 6
#> Significant at 5% level: TRUE

Transforming Tests (Higher-Order Functions)

tests <- list(
  wald_test(estimate = 2.5, se = 1.0),
  wald_test(estimate = 1.8, se = 0.9),
  wald_test(estimate = 1.2, se = 0.7)
)

# Original p-values
vapply(tests, pval, numeric(1))
#> [1] 0.0124 0.0455 0.0865

# Bonferroni-adjusted
vapply(adjust_pval(tests, method = "bonferroni"), pval, numeric(1))
#> [1] 0.0373 0.1365 0.2594

Duality: Tests ↔︎ Confidence Intervals

w <- wald_test(estimate = 5.0, se = 1.2)
confint(w)
#> lower upper 
#>  2.65  7.35
confint(w, level = 0.99)
#> lower upper 
#>  1.91  8.09

Extending the Package

# Custom chi-squared goodness-of-fit wrapper
chisq_gof <- function(observed, expected) {
  stat <- sum((observed - expected)^2 / expected)
  df <- length(observed) - 1
  hypothesis_test(
    stat = stat,
    p.value = pchisq(stat, df, lower.tail = FALSE),
    dof = df,
    superclasses = "chisq_gof_test"
  )
}

chisq_gof(observed = c(45, 35, 20), expected = c(40, 40, 20))
#> Hypothesis test (chisq_gof_test)
#> -----------------------------
#> Test statistic: 1.25
#> P-value: 0.53526142851899
#> Degrees of freedom: 2
#> Significant at 5% level: FALSE

hypothesize