A/B Power Calculator

Sample size, MDE, and runtime with CUPED/CUPAC variance reduction, multi-arm corrections, sequential monitoring, and design effects.

Use this tool to plan online experiments:

Required sample size given an MDE, or achievable MDE given a budget
Adjustments for CUPED/CUPAC variance reduction and cluster/design effects
Multi-arm corrections (Bonferroni, Šidák, Dunnett)
Group sequential monitoring boundaries (O’Brien–Fleming, Pocock)
Sensitivity tables and power curves

Assumptions & caveats

These are normal-approximation calculations (Wald test). They work well for large-scale online experiments but be cautious with:

Very low conversion rates (< 0.5%) — consider exact tests
Heavy-tailed metrics (revenue, session duration) — consider bootstrap or robust methods
Sequential monitoring — the boundaries below are approximate; use a proper spending function library for production
CUPED/CUPAC — the variance-reduction % is an estimate; actual reduction depends on covariate predictive power

clamp = (x, lo, hi) => Math.min(hi, Math.max(lo, x))

// Acklam inverse normal CDF
normInv = (p) => {
  const eps = 1e-12
  p = Math.min(1 - eps, Math.max(eps, p))
  const a = [-3.969683028665376e+01, 2.209460984245205e+02,
             -2.759285104469687e+02, 1.383577518672690e+02,
             -3.066479806614716e+01, 2.506628277459239e+00]
  const b = [-5.447609879822406e+01, 1.615858368580409e+02,
             -1.556989798598866e+02, 6.680131188771972e+01,
             -1.328068155288572e+01]
  const c = [-7.784894002430293e-03, -3.223964580411365e-01,
             -2.400758277161838e+00, -2.549732539343734e+00,
              4.374664141464968e+00, 2.938163982698783e+00]
  const d = [ 7.784695709041462e-03, 3.224671290700398e-01,
              2.445134137142996e+00, 3.754408661907416e+00]
  const plow = 0.02425, phigh = 1 - plow
  let q, r
  if (p < plow) {
    q = Math.sqrt(-2 * Math.log(p))
    return (((((c[0]*q+c[1])*q+c[2])*q+c[3])*q+c[4])*q+c[5]) /
           ((((d[0]*q+d[1])*q+d[2])*q+d[3])*q+1)
  }
  if (p > phigh) {
    q = Math.sqrt(-2 * Math.log(1 - p))
    return -(((((c[0]*q+c[1])*q+c[2])*q+c[3])*q+c[4])*q+c[5]) /
             ((((d[0]*q+d[1])*q+d[2])*q+d[3])*q+1)
  }
  q = p - 0.5; r = q*q
  return (((((a[0]*r+a[1])*r+a[2])*r+a[3])*r+a[4])*r+a[5])*q /
         (((((b[0]*r+b[1])*r+b[2])*r+b[3])*r+b[4])*r+1)
}

erf = (x) => {
  const sign = x >= 0 ? 1 : -1
  x = Math.abs(x)
  const a1 = 0.254829592, a2 = -0.284496736, a3 = 1.421413741,
        a4 = -1.453152027, a5 = 1.061405429, pp = 0.3275911
  const t = 1 / (1 + pp*x)
  const y = 1 - (((((a5*t + a4)*t) + a3)*t + a2)*t + a1)*t*Math.exp(-x*x)
  return sign * y
}

normCdf = (z) => 0.5 * (1 + erf(z / Math.SQRT2))

// Multi-comparison adjusted alpha
adjustedAlpha = (alpha, nComparisons, correction) => {
  if (nComparisons <= 1) return alpha
  if (correction === "Bonferroni") return alpha / nComparisons
  if (correction === "Šidák") return 1 - Math.pow(1 - alpha, 1 / nComparisons)
  if (correction === "Dunnett (approx)") return 1 - Math.pow(1 - alpha, 1 / Math.sqrt(nComparisons))
  return alpha
}

zAlpha = (testType, a) =>
  (testType === "Two-sided") ? normInv(1 - a/2) : normInv(1 - a)

direction = (testType) => (testType === "One-sided (decrease)") ? -1 : +1

// ──────────────────────────────────────────────
// §1. SE & POWER FUNCTIONS
// ──────────────────────────────────────────────

se_cont = (sd, nC, nT, vrPct, seMult) => {
  const vr = clamp(vrPct/100, 0, 0.95)
  const varY = (sd*sd) * (1 - vr)
  return Math.sqrt(varY/nC + varY/nT) * Math.max(1, seMult)
}

se_bin = (p0, p1, nC, nT, vrPct, seMult) => {
  const vr = clamp(vrPct/100, 0, 0.95)
  const p0c = clamp(p0, 1e-6, 1-1e-6)
  const p1c = clamp(p1, 1e-6, 1-1e-6)
  const v = (p0c*(1-p0c))/nC + (p1c*(1-p1c))/nT
  return Math.sqrt(v * (1 - vr)) * Math.max(1, seMult)
}

requiredN_cont = (deltaAbs, sd, a, power, treatShare, vrPct, seMult, testType) => {
  const t = clamp(treatShare, 0.01, 0.99)
  const k = t / (1 - t)
  const zA = zAlpha(testType, a)
  const zB = normInv(power)
  const zSum = zA + zB
  const vr = clamp(vrPct/100, 0, 0.95)
  const varY = (sd*sd) * (1 - vr)
  let nC = (zSum*zSum) * varY * (1 + k) / (k * deltaAbs * deltaAbs)
  nC *= (Math.max(1, seMult) ** 2)
  const nT = k * nC
  return {nC: Math.ceil(nC), nT: Math.ceil(nT)}
}

requiredN_bin = (deltaAbs, p0, a, power, treatShare, vrPct, seMult, testType) => {
  const t = clamp(treatShare, 0.01, 0.99)
  const k = t / (1 - t)
  const zA = zAlpha(testType, a)
  const zB = normInv(power)
  const zSum = zA + zB
  const dir = direction(testType)
  const p0c = clamp(p0, 1e-6, 1-1e-6)
  const p1 = clamp(p0c + dir*deltaAbs, 1e-6, 1-1e-6)
  let varTerm = p0c*(1-p0c) + (p1*(1-p1))/k
  const vr = clamp(vrPct/100, 0, 0.95)
  varTerm *= (1 - vr)
  let nC = (zSum*zSum) * varTerm / (deltaAbs * deltaAbs)
  nC *= (Math.max(1, seMult) ** 2)
  const nT = k * nC
  return {nC: Math.ceil(nC), nT: Math.ceil(nT), p1}
}

power_from_mu = (mu, zA, testType) => {
  if (testType === "Two-sided") return normCdf(-zA - mu) + (1 - normCdf(zA - mu))
  if (testType === "One-sided (decrease)") return normCdf(-zA - mu)
  return 1 - normCdf(zA - mu)
}

power_cont = (deltaAbs, sd, nC, nT, a, vrPct, seMult, testType) => {
  const zA = zAlpha(testType, a)
  const dir = direction(testType)
  const se = se_cont(sd, nC, nT, vrPct, seMult)
  return power_from_mu((dir * deltaAbs) / se, zA, testType)
}

power_bin = (deltaAbs, p0, nC, nT, a, vrPct, seMult, testType) => {
  const zA = zAlpha(testType, a)
  const dir = direction(testType)
  const p0c = clamp(p0, 1e-6, 1-1e-6)
  const p1 = clamp(p0c + dir*deltaAbs, 1e-6, 1-1e-6)
  const se = se_bin(p0c, p1, nC, nT, vrPct, seMult)
  return power_from_mu((dir * deltaAbs) / se, zA, testType)
}

mde_bin_from_n = (p0, nC, nT, a, power, vrPct, seMult, testType) => {
  const zA = zAlpha(testType, a)
  const zB = normInv(power)
  const zSum = zA + zB
  const dir = direction(testType)
  const p0c = clamp(p0, 1e-6, 1-1e-6)
  let delta = zSum * Math.sqrt(p0c*(1-p0c)*(1/nC + 1/nT)) *
              Math.sqrt(1 - clamp(vrPct/100, 0, 0.95)) * Math.max(1, seMult)
  for (let iter = 0; iter < 30; iter++) {
    const p1 = clamp(p0c + dir*delta, 1e-6, 1-1e-6)
    const se = se_bin(p0c, p1, nC, nT, vrPct, seMult)
    const deltaNew = zSum * se
    if (Math.abs(deltaNew - delta) < 1e-10) break
    delta = deltaNew
  }
  return delta
}

// ──────────────────────────────────────────────
// §2. EFFECT SIZE BENCHMARKS
// ──────────────────────────────────────────────

cohensD = (deltaAbs, sd) => Math.abs(deltaAbs) / sd

// Cohen's h for proportions
cohensH = (p0, p1) => Math.abs(2 * Math.asin(Math.sqrt(p1)) - 2 * Math.asin(Math.sqrt(p0)))

effectLabel = (d) => {
  if (d < 0.01) return "negligible"
  if (d < 0.2) return "small"
  if (d < 0.5) return "medium"
  if (d < 0.8) return "large"
  return "very large"
}

// ──────────────────────────────────────────────
// §3. GROUP SEQUENTIAL BOUNDARIES
// ──────────────────────────────────────────────

// O'Brien-Fleming boundary at information fraction t
obfBoundary = (zA, t) => zA / Math.sqrt(t)

// Pocock boundary (constant) — approximate via repeated significance
pocockBoundary = (zA, nLooks) => {
  // Pocock boundary is approximately z_alpha adjusted for nLooks
  // Simple approximation: inflate z by sqrt(1 + 0.25 * (nLooks - 1) / nLooks)
  // More accurate: use Lan-DeMets approximation
  // For practical purposes, a lookup works for common nLooks
  const table = {1: zA, 2: zA * 1.10, 3: zA * 1.15, 4: zA * 1.18, 5: zA * 1.20}
  return table[nLooks] || zA * (1 + 0.04 * Math.log(nLooks))
}

// ──────────────────────────────────────────────
// §4. INPUTS
// ──────────────────────────────────────────────

viewof planMode = Inputs.select(
  ["Plan sample size (given MDE)", "Plan MDE (given sample size / duration)"],
  {label: "Mode", value: "Plan sample size (given MDE)"}
)

viewof metric = Inputs.select(
  ["Continuous (mean)", "Binary (conversion)"],
  {label: "Metric type", value: "Continuous (mean)"}
)

viewof preset = Inputs.select(
  ["Custom", "E-commerce conversion (~3%)", "SaaS trial signup (~8%)",
   "Click-through rate (~2%)", "Revenue per user", "Session duration"],
  {label: "Preset", value: "Custom"}
)

viewof testType = Inputs.select(
  ["Two-sided", "One-sided (increase)", "One-sided (decrease)"],
  {label: "Test", value: "Two-sided"}
)

viewof effectScale = Inputs.select(
  ["Absolute", "Relative (%)"],
  {label: "MDE scale", value: "Relative (%)"}
)

viewof alpha = Inputs.range([0.01, 0.20], {label: "Significance α", step: 0.005, value: 0.05})
viewof powerTarget = Inputs.range([0.70, 0.99], {label: "Target power (1−β)", step: 0.01, value: 0.80})
viewof treatShare = Inputs.range([0.10, 0.90], {label: "Treatment share", step: 0.05, value: 0.50})

// Multi-arm
viewof nArms = Inputs.range([1, 10], {label: "# treatment arms", step: 1, value: 1})
viewof multiCorrection = Inputs.select(
  ["None", "Bonferroni", "Šidák", "Dunnett (approx)"],
  {label: "Multiple testing correction", value: "Bonferroni"}
)

viewof varReduction = Inputs.range([0, 90], {label: "Variance reduction (CUPED/CUPAC %)", step: 5, value: 0})
viewof seMult = Inputs.range([1.0, 3.0], {label: "SE multiplier (cluster/design)", step: 0.05, value: 1.0})

viewof trafficPerDay = Inputs.number({label: "Eligible units/day", value: 20000, step: 100, min: 1})
viewof trafficFrac = Inputs.range([5, 100], {label: "% traffic in experiment", step: 5, value: 100})
viewof weekdaysOnly = Inputs.toggle({label: "Weekdays only (5/7)", value: false})

viewof baselineMean = Inputs.number({label: "Baseline mean", value: 10, step: 0.1})
viewof sd = Inputs.number({label: "Std. deviation", value: 3, step: 0.1, min: 0.001})

viewof baselineRate = Inputs.range([0.001, 0.80], {label: "Baseline rate", step: 0.001, value: 0.10})

viewof mdeAbs = Inputs.number({label: "MDE (absolute)", value: 0.2, step: 0.001, min: 0.0001})
viewof mdePct = Inputs.range([0.1, 50], {label: "MDE (relative %)", step: 0.1, value: 5.0})

viewof budgetMode = Inputs.select(["Total N", "Duration (days)"], {label: "Budget as", value: "Duration (days)"})
viewof totalN = Inputs.number({label: "Total N", value: 200000, step: 1000, min: 10})
viewof daysBudget = Inputs.number({label: "Duration (days)", value: 14, step: 1, min: 1})

// Sequential monitoring
viewof enableSequential = Inputs.toggle({label: "Show sequential boundaries", value: false})
viewof nLooks = Inputs.range([2, 8], {label: "Number of interim looks", step: 1, value: 4})
viewof seqMethod = Inputs.select(
  ["O'Brien–Fleming", "Pocock"],
  {label: "Spending function", value: "O'Brien–Fleming"}
)

// ──────────────────────────────────────────────
// §5. APPLY PRESETS
// ──────────────────────────────────────────────

// Presets set suggested values reactively (we show them as guidance text)
presetInfo = {
  const presets = {
    "Custom": null,
    "E-commerce conversion (~3%)": {metric: "Binary (conversion)", rate: 0.03, mde: 5},
    "SaaS trial signup (~8%)":     {metric: "Binary (conversion)", rate: 0.08, mde: 5},
    "Click-through rate (~2%)":    {metric: "Binary (conversion)", rate: 0.02, mde: 8},
    "Revenue per user":            {metric: "Continuous (mean)", mean: 45, sd: 120, mde: 3},
    "Session duration":            {metric: "Continuous (mean)", mean: 300, sd: 450, mde: 5}
  }
  return presets[preset]
}

showMDE = planMode === "Plan sample size (given MDE)"
showAbs = effectScale === "Absolute"
dailyEligible = Math.max(1, trafficPerDay * (trafficFrac/100))
effectiveDailyEligible = weekdaysOnly ? dailyEligible * (5/7) : dailyEligible

// Effective alpha after multi-arm correction
nComparisons = Math.max(1, nArms)
effAlpha = adjustedAlpha(alpha, nComparisons, nArms > 1 ? multiCorrection : "None")

ui = html`
<div class="card" style="margin-top:.25rem;">
  <div class="card-body" style="display:flex; flex-direction:column; gap:.65rem; font-size: 0.92rem;">

    <details open>
      <summary style="font-weight:600; cursor:pointer;">📐 Test Design</summary>
      <div style="display:flex; flex-direction:column; gap:.5rem; margin-top:.4rem;">
        <div>${viewof planMode}</div>
        <div>${viewof metric}</div>
        <div>${viewof preset}</div>
        ${presetInfo ? html`<div style="background:#f0f7ff; border-radius:4px; padding:6px 10px; font-size:0.85rem; color:#555;">
          <strong>Preset tip:</strong> ${preset}. Adjust the metric inputs below to match.
        </div>` : html``}
        <div>${viewof testType}</div>
        <div>${viewof effectScale}</div>
      </div>
    </details>

    <hr style="margin:.1rem 0;" />

    <details open>
      <summary style="font-weight:600; cursor:pointer;">🎯 Statistical Parameters</summary>
      <div style="display:flex; flex-direction:column; gap:.5rem; margin-top:.4rem;">
        <div>${viewof alpha}</div>
        <div>${viewof powerTarget}</div>
        <div>${viewof treatShare}</div>
      </div>
    </details>

    <hr style="margin:.1rem 0;" />

    <details>
      <summary style="font-weight:600; cursor:pointer;">🔀 Multi-Arm Testing</summary>
      <div style="display:flex; flex-direction:column; gap:.5rem; margin-top:.4rem;">
        <div>${viewof nArms}</div>
        ${nArms > 1 ? html`
          <div>${viewof multiCorrection}</div>
          <div style="font-size:0.85rem; color:#666;">
            Corrected α: <strong>${effAlpha.toFixed(4)}</strong>
            (${nComparisons} comparison${nComparisons > 1 ? "s" : ""})
          </div>
        ` : html``}
      </div>
    </details>

    <hr style="margin:.1rem 0;" />

    <details open>
      <summary style="font-weight:600; cursor:pointer;">📊 Metric Parameters</summary>
      <div style="display:flex; flex-direction:column; gap:.5rem; margin-top:.4rem;">
        ${metric === "Continuous (mean)"
          ? html`<div>${viewof baselineMean}</div><div>${viewof sd}</div>`
          : html`<div>${viewof baselineRate}</div>`
        }
        ${showMDE
          ? html`
              <div style="display:${showAbs ? "block" : "none"}">${viewof mdeAbs}</div>
              <div style="display:${showAbs ? "none" : "block"}">${viewof mdePct}</div>
            `
          : html`
              <div>${viewof budgetMode}</div>
              <div>${budgetMode === "Total N" ? viewof totalN : viewof daysBudget}</div>
            `
        }
      </div>
    </details>

    <hr style="margin:.1rem 0;" />

    <details>
      <summary style="font-weight:600; cursor:pointer;">⚙️ Variance Reduction & Design</summary>
      <div style="display:flex; flex-direction:column; gap:.5rem; margin-top:.4rem;">
        <div>${viewof varReduction}</div>
        <div>${viewof seMult}</div>
      </div>
    </details>

    <hr style="margin:.1rem 0;" />

    <details open>
      <summary style="font-weight:600; cursor:pointer;">🚦 Traffic & Duration</summary>
      <div style="display:flex; flex-direction:column; gap:.5rem; margin-top:.4rem;">
        <div>${viewof trafficPerDay}</div>
        <div>${viewof trafficFrac}</div>
        <div>${viewof weekdaysOnly}</div>
        <div style="color:#666; font-size:.88rem;">
          <strong>Effective units/day:</strong> ${Math.round(effectiveDailyEligible).toLocaleString()}
        </div>
      </div>
    </details>

    <hr style="margin:.1rem 0;" />

    <details>
      <summary style="font-weight:600; cursor:pointer;">📈 Sequential Monitoring</summary>
      <div style="display:flex; flex-direction:column; gap:.5rem; margin-top:.4rem;">
        <div>${viewof enableSequential}</div>
        ${enableSequential ? html`
          <div>${viewof nLooks}</div>
          <div>${viewof seqMethod}</div>
        ` : html``}
      </div>
    </details>

  </div>
</div>
`
ui

// ──────────────────────────────────────────────
// §7. CORE CALCULATION
// ──────────────────────────────────────────────

calc = {
  const dir = direction(testType)
  const aUsed = effAlpha
  const zA = zAlpha(testType, aUsed)
  const zB = normInv(powerTarget)
  const zSum = zA + zB
  const daily = effectiveDailyEligible

  let deltaAbs, deltaRel, nC, nT, Ntot, runtimeDays, p1 = null

  if (planMode === "Plan sample size (given MDE)") {
    if (metric === "Continuous (mean)") {
      const base = Math.abs(baselineMean) < 1e-12 ? 1e-12 : Math.abs(baselineMean)
      deltaAbs = (effectScale === "Absolute") ? Math.abs(mdeAbs) : base * (mdePct/100)
      deltaRel = 100 * deltaAbs / base
      const out = requiredN_cont(deltaAbs, sd, aUsed, powerTarget, treatShare, varReduction, seMult, testType)
      nC = out.nC; nT = out.nT
      // For multi-arm: total treatment N = nT * nArms
      if (nArms > 1) {
        Ntot = nC + nT * nArms
      } else {
        Ntot = nC + nT
      }
    } else {
      const p0 = baselineRate
      deltaAbs = (effectScale === "Absolute") ? Math.abs(mdeAbs) : p0 * (mdePct/100)
      deltaRel = 100 * deltaAbs / p0
      const out = requiredN_bin(deltaAbs, p0, aUsed, powerTarget, treatShare, varReduction, seMult, testType)
      nC = out.nC; nT = out.nT; p1 = out.p1
      Ntot = nArms > 1 ? nC + nT * nArms : nC + nT
    }
    runtimeDays = Ntot / daily
  }

  if (planMode === "Plan MDE (given sample size / duration)") {
    const Nbudget = (budgetMode === "Total N")
      ? Math.max(10, totalN)
      : Math.max(10, Math.floor(daysBudget * daily))

    // For multi-arm, control gets 1/(1+nArms) share, each treatment gets treatShare/(nArms)
    if (nArms > 1) {
      nC = Math.max(1, Math.floor(Nbudget / (1 + nArms)))
      nT = Math.max(1, Math.floor((Nbudget - nC) / nArms))
    } else {
      nT = Math.max(1, Math.floor(Nbudget * treatShare))
      nC = Math.max(1, Nbudget - nT)
    }
    Ntot = nArms > 1 ? nC + nT * nArms : nC + nT

    if (metric === "Continuous (mean)") {
      const base = Math.abs(baselineMean) < 1e-12 ? 1e-12 : Math.abs(baselineMean)
      const se = se_cont(sd, nC, nT, varReduction, seMult)
      deltaAbs = zSum * se
      deltaRel = 100 * deltaAbs / base
    } else {
      const p0 = baselineRate
      deltaAbs = mde_bin_from_n(p0, nC, nT, aUsed, powerTarget, varReduction, seMult, testType)
      deltaRel = 100 * deltaAbs / p0
      p1 = clamp(p0 + dir*deltaAbs, 1e-6, 1-1e-6)
    }
    runtimeDays = (budgetMode === "Duration (days)") ? daysBudget : (Ntot / daily)
  }

  const achievedPower = (metric === "Continuous (mean)")
    ? power_cont(deltaAbs, sd, nC, nT, aUsed, varReduction, seMult, testType)
    : power_bin(deltaAbs, baselineRate, nC, nT, aUsed, varReduction, seMult, testType)

  const vr = clamp(varReduction/100, 0, 0.95)
  const essGain = 1 / (1 - vr)
  const seInfl = Math.max(1, seMult)
  const nInfl = seInfl ** 2

  // Effect size benchmarks
  const effSize = (metric === "Continuous (mean)")
    ? cohensD(deltaAbs, sd)
    : cohensH(baselineRate, clamp(baselineRate + dir*deltaAbs, 1e-6, 1-1e-6))

  // MDE at different power levels
  const mdeAtPowers = [0.80, 0.90, 0.95].map(pw => {
    if (planMode === "Plan MDE (given sample size / duration)") {
      const zB2 = normInv(pw)
      const zA2 = zAlpha(testType, aUsed)
      const zSum2 = zA2 + zB2
      if (metric === "Continuous (mean)") {
        const se = se_cont(sd, nC, nT, varReduction, seMult)
        return {power: pw, mdeAbs: zSum2 * se, mdeRel: 100 * zSum2 * se / (Math.abs(baselineMean) < 1e-12 ? 1e-12 : Math.abs(baselineMean))}
      } else {
        const d = mde_bin_from_n(baselineRate, nC, nT, aUsed, pw, varReduction, seMult, testType)
        return {power: pw, mdeAbs: d, mdeRel: 100 * d / baselineRate}
      }
    }
    return null
  }).filter(x => x !== null)

  // Runtime formatting
  const weeks = Math.floor(runtimeDays / 7)
  const remDays = Math.round(runtimeDays % 7)
  const runtimeLabel = weeks > 0
    ? `${weeks}w ${remDays}d`
    : `${Math.round(runtimeDays)}d`

  return {deltaAbs, deltaRel, nC, nT, Ntot, runtimeDays, runtimeLabel,
          dailyEligible: daily, p1, achievedPower, vr, essGain, seInfl, nInfl,
          effSize, mdeAtPowers, aUsed}
}

fmtInt = (x) => x.toLocaleString(undefined, {maximumFractionDigits: 0})
fmt1 = (x) => x.toLocaleString(undefined, {maximumFractionDigits: 1})
fmt2 = (x) => x.toLocaleString(undefined, {maximumFractionDigits: 2})
fmt4 = (x) => x.toLocaleString(undefined, {maximumFractionDigits: 4})

powerColor = (p) => p >= 0.80 ? "#2d7d46" : (p >= 0.70 ? "#b8860b" : "#c0392b")

summaryText = [
  `Mode: ${planMode}`,
  `Metric: ${metric}`,
  `Test: ${testType}`,
  `Alpha: ${alpha}${nArms > 1 ? ` (corrected: ${calc.aUsed.toFixed(4)}, ${multiCorrection}, ${nArms} arms)` : ""}`,
  `Target power: ${powerTarget}`,
  `Treatment share: ${treatShare}`,
  `Variance reduction: ${varReduction}% (ESS ×${calc.essGain.toFixed(2)})`,
  `SE multiplier: ${calc.seInfl.toFixed(2)} (N ×${calc.nInfl.toFixed(2)})`,
  `Control N: ${calc.nC}  |  Treatment N (per arm): ${calc.nT}  |  Total N: ${calc.Ntot}`,
  `MDE abs: ${calc.deltaAbs.toFixed(6)}  |  MDE rel: ${calc.deltaRel.toFixed(2)}%`,
  `Effect size: ${calc.effSize.toFixed(3)} (${effectLabel(calc.effSize)})`,
  `Achieved power: ${(100*calc.achievedPower).toFixed(1)}%`,
  `Traffic/day: ${Math.round(calc.dailyEligible)}  |  Runtime: ${calc.runtimeLabel} (${fmt1(calc.runtimeDays)} days)`
].join("\n")

copyBtn = html`<button class="btn btn-sm btn-outline-secondary me-2" title="Copy to clipboard">📋 Copy summary</button>`
copyBtn.onclick = async () => {
  try {
    await navigator.clipboard.writeText(summaryText)
    copyBtn.textContent = "✅ Copied!"
    setTimeout(() => copyBtn.textContent = "📋 Copy summary", 1500)
  } catch {
    const ta = document.createElement("textarea")
    ta.value = summaryText; document.body.appendChild(ta); ta.select()
    document.execCommand("copy"); document.body.removeChild(ta)
    copyBtn.textContent = "✅ Copied!"
    setTimeout(() => copyBtn.textContent = "📋 Copy summary", 1500)
  }
}

csvRows = [
  ["field","value"],
  ["mode", planMode],
  ["metric", metric],
  ["test", testType],
  ["alpha_nominal", alpha],
  ["alpha_adjusted", calc.aUsed],
  ["n_arms", nArms],
  ["correction", nArms > 1 ? multiCorrection : "None"],
  ["target_power", powerTarget],
  ["treatment_share", treatShare],
  ["variance_reduction_pct", varReduction],
  ["se_multiplier", calc.seInfl],
  ["n_control", calc.nC],
  ["n_treatment_per_arm", calc.nT],
  ["n_total", calc.Ntot],
  ["mde_abs", calc.deltaAbs],
  ["mde_rel_pct", calc.deltaRel],
  ["effect_size", calc.effSize],
  ["achieved_power", calc.achievedPower],
  ["traffic_per_day", calc.dailyEligible],
  ["runtime_days", calc.runtimeDays]
].map(r => r.join(",")).join("\n")

csvHref = "data:text/csv;charset=utf-8," + encodeURIComponent(csvRows)
dlBtn = html`<a class="btn btn-sm btn-outline-secondary" href="${csvHref}" download="ab_power_calc.csv">💾 Download CSV</a>`

card = html`
<div class="card" style="margin-top:.5rem;">
  <div class="card-body">
    <h3 style="margin-top:0;">Results</h3>

    <div class="row" style="row-gap:.6rem;">
      <div class="col-md-6">
        <div><strong>Control N:</strong> ${fmtInt(calc.nC)}</div>
        <div><strong>Treatment N${nArms > 1 ? " (per arm)" : ""}:</strong> ${fmtInt(calc.nT)}</div>
        ${nArms > 1 ? html`<div><strong>Total treatment N:</strong> ${fmtInt(calc.nT * nArms)} (${nArms} arms)</div>` : html``}
        <div><strong>Total N:</strong> ${fmtInt(calc.Ntot)}</div>
        <div><strong>Runtime:</strong> ${calc.runtimeLabel}
          <span style="color:#888;">(${fmt1(calc.runtimeDays)} days at ${fmtInt(Math.round(calc.dailyEligible))}/day)</span>
        </div>
      </div>
      <div class="col-md-6">
        <div><strong>MDE (absolute):</strong> ${fmt4(calc.deltaAbs)}</div>
        <div><strong>MDE (relative):</strong> ${fmt1(calc.deltaRel)}%</div>
        ${metric === "Binary (conversion)"
          ? html`<div><strong>Implied p₁:</strong> ${fmt2(100*calc.p1)}%
              <span style="color:#888;">(baseline ${fmt2(100*baselineRate)}%)</span></div>`
          : html`<div><strong>Baseline:</strong> ${fmt2(baselineMean)}
              <span style="color:#888;">(σ = ${fmt2(sd)})</span></div>`
        }
        <div><strong>Achieved power:</strong>
          <span style="color:${powerColor(calc.achievedPower)}; font-weight:600;">
            ${fmt1(100*calc.achievedPower)}%
          </span>
        </div>
        <div><strong>Effect size:</strong> ${fmt2(calc.effSize)}
          <span style="color:#888;">(${effectLabel(calc.effSize)},
            ${metric === "Continuous (mean)" ? "Cohen's d" : "Cohen's h"})</span>
        </div>
        ${nArms > 1 ? html`<div><strong>Adjusted α:</strong> ${fmt4(calc.aUsed)}
          <span style="color:#888;">(${multiCorrection})</span></div>` : html``}
      </div>
    </div>

    <hr/>

    <div style="color:#666; font-size:0.9rem;">
      Variance reduction = ${fmt1(100*calc.vr)}% → effective N ×${fmt2(calc.essGain)}.
      SE multiplier = ${fmt2(calc.seInfl)} → required N ×${fmt2(calc.nInfl)}.
    </div>

    ${calc.mdeAtPowers.length > 0 ? html`
      <div style="margin-top:.8rem;">
        <strong>MDE at different power levels:</strong>
        <table class="table table-sm" style="width:auto; font-size:0.88rem; margin-top:.3rem;">
          <thead><tr><th>Power</th><th>MDE (abs)</th><th>MDE (rel)</th></tr></thead>
          <tbody>
            ${calc.mdeAtPowers.map(row => html`
              <tr>
                <td>${(100*row.power).toFixed(0)}%</td>
                <td>${fmt4(row.mdeAbs)}</td>
                <td>${fmt1(row.mdeRel)}%</td>
              </tr>
            `)}
          </tbody>
        </table>
      </div>
    ` : html``}

    <div style="margin-top:.8rem;">
      ${copyBtn} ${dlBtn}
    </div>
  </div>
</div>
`
card

mdeMultipliers = [0.5, 0.75, 1.0, 1.25, 1.5, 2.0]
nMultipliers = [0.5, 0.75, 1.0, 1.25, 1.5, 2.0]

sensitivityData = {
  const rows = []
  for (const nm of nMultipliers) {
    for (const mm of mdeMultipliers) {
      const d = calc.deltaAbs * mm
      const N = Math.max(10, Math.round(calc.Ntot * nm))
      const nTloc = Math.max(1, Math.floor(N * treatShare))
      const nCloc = Math.max(1, N - nTloc)
      const pw = (metric === "Continuous (mean)")
        ? power_cont(d, sd, nCloc, nTloc, calc.aUsed, varReduction, seMult, testType)
        : power_bin(d, baselineRate, nCloc, nTloc, calc.aUsed, varReduction, seMult, testType)
      rows.push({
        mdeMult: mm,
        mdeLabel: `${(mm*100).toFixed(0)}%`,
        nMult: nm,
        nLabel: `${(nm*100).toFixed(0)}%`,
        power: pw,
        N: N,
        mdeAbs: d
      })
    }
  }
  return rows
}

cellColor = (pw) => {
  if (pw >= 0.9) return "#c6efce"
  if (pw >= 0.8) return "#d4edda"
  if (pw >= 0.7) return "#fff3cd"
  if (pw >= 0.5) return "#fde2e2"
  return "#f8d7da"
}

sensTable = html`
<div class="card" style="margin-top:.5rem;">
  <div class="card-body">
    <h3 style="margin-top:0;">Power Sensitivity Grid</h3>
    <p style="color:#666; font-size:0.88rem;">
      Rows = sample-size multiplier relative to current total N (${fmtInt(calc.Ntot)}).
      Columns = MDE multiplier relative to current MDE (${fmt4(calc.deltaAbs)}).
      Cell = achieved power.
    </p>
    <div style="overflow-x:auto;">
      <table class="table table-sm table-bordered" style="width:auto; font-size:0.85rem; text-align:center;">
        <thead>
          <tr>
            <th style="min-width:80px;">N \\ MDE</th>
            ${mdeMultipliers.map(mm => html`<th>${(mm*100).toFixed(0)}%<br/><span style="color:#888; font-size:0.78rem;">(${fmt4(calc.deltaAbs * mm)})</span></th>`)}
          </tr>
        </thead>
        <tbody>
          ${nMultipliers.map(nm => html`
            <tr>
              <td style="font-weight:600;">
                ${(nm*100).toFixed(0)}%
                <br/><span style="color:#888; font-size:0.78rem;">(${fmtInt(Math.round(calc.Ntot * nm))})</span>
              </td>
              ${mdeMultipliers.map(mm => {
                const row = sensitivityData.find(r => r.nMult === nm && r.mdeMult === mm)
                const pw = row.power
                return html`<td style="background:${cellColor(pw)}; font-weight:${nm === 1.0 && mm === 1.0 ? '700' : '400'};">
                  ${(100*pw).toFixed(1)}%
                </td>`
              })}
            </tr>
          `)}
        </tbody>
      </table>
    </div>
    <p style="color:#888; font-size:0.82rem; margin-top:.3rem;">
      Green ≥ 80%, yellow ≥ 70%, red &lt; 70%. Bold cell = your current settings.
    </p>
  </div>
</div>
`
sensTable

w = Math.min(880, width)

relCenter = Math.max(0.1, calc.deltaRel)
relMin = Math.max(0.05, relCenter / 3)
relMax = Math.min(80, relCenter * 3)
relGrid = Array.from({length: 50}, (_, i) => relMin + (relMax - relMin) * i / 49)

totalN_for_rel = (relPct, vrPctOverride) => {
  const base = (metric === "Continuous (mean)")
    ? (Math.abs(baselineMean) < 1e-12 ? 1e-12 : Math.abs(baselineMean))
    : baselineRate
  const delta = base * (relPct/100)
  if (metric === "Continuous (mean)") {
    const out = requiredN_cont(delta, sd, calc.aUsed, powerTarget, treatShare, vrPctOverride, seMult, testType)
    return nArms > 1 ? out.nC + out.nT * nArms : out.nC + out.nT
  } else {
    const out = requiredN_bin(delta, baselineRate, calc.aUsed, powerTarget, treatShare, vrPctOverride, seMult, testType)
    return nArms > 1 ? out.nC + out.nT * nArms : out.nC + out.nT
  }
}

dataNoVR = relGrid.map(r => ({rel: r, N: totalN_for_rel(r, 0), series: "No CUPED"}))
dataWithVR = varReduction > 0
  ? relGrid.map(r => ({rel: r, N: totalN_for_rel(r, varReduction), series: `${varReduction}% VR`}))
  : []
dataCombinedN = dataNoVR.concat(dataWithVR)

plotN = Plot.plot({
  width: w, height: 340, marginLeft: 75,
  x: {label: "MDE (relative %)", grid: true},
  y: {label: "Total required N", grid: true, type: "log"},
  color: {legend: varReduction > 0},
  marks: [
    Plot.lineY(dataCombinedN, {x: "rel", y: "N", stroke: "series", strokeWidth: 2}),
    Plot.ruleX([relCenter], {stroke: "#999", strokeDasharray: "4 4"}),
    Plot.dot([{rel: relCenter, N: calc.Ntot, series: "Current"}], {x: "rel", y: "N", fill: "red", r: 5}),
    Plot.text([{rel: relCenter, N: calc.Ntot}], {x: "rel", y: "N", text: d => `N=${fmtInt(d.N)}`, dy: -12, fontSize: 11})
  ]
})

dataDaysNoVR = relGrid.map(r => ({rel: r, days: totalN_for_rel(r, 0) / calc.dailyEligible, series: "No CUPED"}))
dataDaysWithVR = varReduction > 0
  ? relGrid.map(r => ({rel: r, days: totalN_for_rel(r, varReduction) / calc.dailyEligible, series: `${varReduction}% VR`}))
  : []
dataCombinedDays = dataDaysNoVR.concat(dataDaysWithVR)

plotDays = Plot.plot({
  width: w, height: 340, marginLeft: 75,
  x: {label: "MDE (relative %)", grid: true},
  y: {label: "Runtime (days)", grid: true, type: "log"},
  color: {legend: varReduction > 0},
  marks: [
    Plot.lineY(dataCombinedDays, {x: "rel", y: "days", stroke: "series", strokeWidth: 2}),
    Plot.ruleX([relCenter], {stroke: "#999", strokeDasharray: "4 4"}),
    Plot.ruleY([7], {stroke: "#ddd", strokeDasharray: "2 2"}),
    Plot.ruleY([14], {stroke: "#ddd", strokeDasharray: "2 2"}),
    Plot.ruleY([28], {stroke: "#ddd", strokeDasharray: "2 2"}),
    Plot.text([{y: 7, x: relMin}], {y: "y", x: "x", text: ["1 wk"], dx: 5, fill: "#aaa", fontSize: 10}),
    Plot.text([{y: 14, x: relMin}], {y: "y", x: "x", text: ["2 wk"], dx: 5, fill: "#aaa", fontSize: 10}),
    Plot.text([{y: 28, x: relMin}], {y: "y", x: "x", text: ["4 wk"], dx: 5, fill: "#aaa", fontSize: 10}),
    Plot.dot([{rel: relCenter, days: calc.runtimeDays, series: "Current"}], {x: "rel", y: "days", fill: "red", r: 5}),
    Plot.text([{rel: relCenter, days: calc.runtimeDays}], {x: "rel", y: "days", text: d => `${fmt1(d.days)}d`, dy: -12, fontSize: 11})
  ]
})

Nmin = Math.max(100, Math.floor(calc.Ntot * 0.2))
Nmax = Math.max(Nmin + 500, Math.floor(calc.Ntot * 2.5))
Ngrid = Array.from({length: 50}, (_, i) => Math.floor(Nmin + (Nmax - Nmin) * i / 49))

power_for_totalN = (N) => {
  const nTloc = Math.max(1, Math.floor(N * treatShare))
  const nCloc = Math.max(1, N - nTloc)
  return (metric === "Continuous (mean)")
    ? power_cont(calc.deltaAbs, sd, nCloc, nTloc, calc.aUsed, varReduction, seMult, testType)
    : power_bin(calc.deltaAbs, baselineRate, nCloc, nTloc, calc.aUsed, varReduction, seMult, testType)
}

dataPow = Ngrid.map(N => ({N, power: power_for_totalN(N)}))

plotPower = Plot.plot({
  width: w, height: 340, marginLeft: 75,
  x: {label: "Total N", grid: true},
  y: {label: "Power", domain: [0, 1], grid: true},
  marks: [
    Plot.lineY(dataPow, {x: "N", y: "power", stroke: "steelblue", strokeWidth: 2.5}),
    Plot.ruleY([powerTarget], {stroke: "#c0392b", strokeDasharray: "4 4"}),
    Plot.text([{N: Nmax, y: powerTarget}], {x: "N", y: "y", text: [`target = ${(100*powerTarget).toFixed(0)}%`], dx: -40, dy: -10, fill: "#c0392b", fontSize: 10}),
    Plot.ruleY([0.80], {stroke: "#eee", strokeDasharray: "1 3"}),
    Plot.dot([{N: calc.Ntot, power: calc.achievedPower}], {x: "N", y: "power", fill: "red", r: 5}),
    Plot.text([{N: calc.Ntot, power: calc.achievedPower}], {x: "N", y: "power", text: d => `${fmt1(100*d.power)}%`, dy: -12, fontSize: 11})
  ]
})

html`
<div class="card">
  <div class="card-body">
    <h3 style="margin-top:0;">Plots</h3>
    <p style="color:#666; font-size:0.9rem; margin-bottom:1rem;">
      Red dot = your current setting. ${varReduction > 0 ? "Two lines compare no-CUPED vs your variance-reduction setting." : "Enable variance reduction to see a comparison line."}
    </p>

    <h4>Required N vs MDE</h4>
    <p style="color:#888; font-size:0.85rem;">Log scale. Larger effects need exponentially less data.</p>
    ${plotN}

    <h4 style="margin-top:1.2rem;">Runtime vs MDE</h4>
    <p style="color:#888; font-size:0.85rem;">Dashed lines at 1, 2, and 4 week marks.</p>
    ${plotDays}

    <h4 style="margin-top:1.2rem;">Power Curve (MDE fixed at ${fmt4(calc.deltaAbs)})</h4>
    ${plotPower}
  </div>
</div>
`

seqCard = {
  if (!enableSequential) return html``

  const zA = zAlpha(testType, calc.aUsed)
  const fractions = Array.from({length: nLooks}, (_, i) => (i + 1) / nLooks)

  const boundaries = fractions.map(t => {
    const obf = obfBoundary(zA, t)
    const poc = pocockBoundary(zA, nLooks)
    return {
      look: Math.round(t * nLooks),
      infoFrac: t,
      nAtLook: Math.round(calc.Ntot * t),
      obf: obf,
      pocock: poc,
      selected: seqMethod === "O'Brien–Fleming" ? obf : poc
    }
  })

  const seqPlotData = []
  for (const b of boundaries) {
    seqPlotData.push({infoFrac: b.infoFrac, z: b.obf, method: "O'Brien–Fleming"})
    seqPlotData.push({infoFrac: b.infoFrac, z: b.pocock, method: "Pocock"})
  }

  const seqPlot = Plot.plot({
    width: w, height: 280, marginLeft: 60,
    x: {label: "Information fraction", domain: [0, 1], grid: true},
    y: {label: "Critical z-value", grid: true},
    color: {legend: true},
    marks: [
      Plot.lineY(seqPlotData, {x: "infoFrac", y: "z", stroke: "method", strokeWidth: 2, marker: "circle"}),
      Plot.ruleY([zA], {stroke: "#999", strokeDasharray: "4 4"}),
      Plot.text([{x: 0.05, y: zA}], {x: "x", y: "y", text: [`fixed z = ${fmt2(zA)}`], dy: -8, fill: "#888", fontSize: 10})
    ]
  })

  return html`
  <div class="card" style="margin-top:.5rem;">
    <div class="card-body">
      <h3 style="margin-top:0;">Sequential Monitoring Boundaries</h3>
      <p style="color:#666; font-size:0.88rem;">
        ${nLooks} interim analyses, ${seqMethod} spending. Reject H₀ at look <em>k</em> if |z| exceeds the boundary.
      </p>

      <table class="table table-sm" style="width:auto; font-size:0.85rem;">
        <thead><tr>
          <th>Look</th><th>Info fraction</th><th>N at look</th>
          <th>O'Brien–Fleming z</th><th>Pocock z</th>
        </tr></thead>
        <tbody>
          ${boundaries.map(b => html`
            <tr>
              <td>${b.look}</td>
              <td>${fmt2(b.infoFrac)}</td>
              <td>${fmtInt(b.nAtLook)}</td>
              <td>${fmt2(b.obf)}</td>
              <td>${fmt2(b.pocock)}</td>
            </tr>
          `)}
        </tbody>
      </table>

      ${seqPlot}

      <div style="margin-top:.5rem; color:#888; font-size:0.82rem;">
        ⚠️ These are approximate boundaries. For production use, compute exact boundaries via a Lan–DeMets alpha-spending function (e.g., <code>gsDesign</code> in R or <code>scipy</code> spending functions).
      </div>
    </div>
  </div>
  `
}

seqCard

Reference

Variance reduction (CUPED/CUPAC)

CUPED (Controlled-experiment Using Pre-Experiment Data) reduces variance by regressing out pre-experiment covariate information. If the covariate explains ρ² of the outcome variance, effective variance drops by a factor of (1 − ρ²), which is equivalent to multiplying your sample size by 1/(1 − ρ²).

Common variance-reduction magnitudes:

Covariate	Typical ρ²	VR (%)
Pre-period of same metric (1 wk)	0.15–0.30	15–30%
Pre-period of same metric (4 wk)	0.25–0.50	25–50%
CUPAC (ML model of outcome)	0.30–0.60	30–60%
Multiple covariates (MLRATE)	0.40–0.70	40–70%

Tip: run a pre-experiment analysis regressing Y on your covariates to estimate R² before committing to a VR assumption.

SE multiplier (design effects)

For cluster-randomized experiments (e.g., randomizing at the market, store, or page level), the effective sample size is reduced by the design effect:

\[\text{DEFF} = 1 + (m - 1)\rho\]

where m = average cluster size and ρ = intraclass correlation (ICC). The SE multiplier is √DEFF.

Scenario	Typical ICC	m	SE mult
Users within geo-markets	0.001–0.01	500	1.1–1.6
Sessions within users	0.05–0.15	10	1.2–1.5
Students within classrooms	0.10–0.25	25	1.5–2.5

Multi-arm corrections

When testing k treatment arms against a single control, the family-wise error rate (FWER) inflates. Common corrections:

Bonferroni: α* = α/k. Conservative but simple.
Šidák: α* = 1 − (1 − α)^(1/k). Slightly less conservative; assumes independence.
Dunnett: exact correction for many-to-one comparisons (approximated here). Accounts for correlation from shared control.

In practice, Dunnett is preferred for the many-to-one comparison structure typical in A/B/n tests.

Effect size benchmarks

Cohen’s d (continuous metrics): 0.2 = small, 0.5 = medium, 0.8 = large.

Cohen’s h (proportions): uses the arcsine transformation, h = 2 arcsin(√p₁) − 2 arcsin(√p₀). Same thresholds as d.

In online experiments, effects are typically small (d < 0.1). A 2% relative lift on a 10% conversion rate gives h ≈ 0.03 — firmly in “small” territory, which is why large sample sizes are needed.

Sequential testing

O’Brien–Fleming (OBF): conservative early boundaries (very hard to reject early), aggressive later. Nominal α at the final look is close to the unadjusted level. Preferred when you don’t expect to stop early but want the option.

Pocock: constant boundaries across looks. Easier to reject early but requires a higher bar at the final look. Preferred when early stopping is a realistic goal.

Both are implemented here as approximations. For production sequential designs, use a proper alpha-spending function (Lan–DeMets) via packages like gsDesign (R), sequential (R), or statsmodels (Python).

--- title: "A/B Power Calculator" description: "Sample size, MDE, and runtime with CUPED/CUPAC variance reduction, multi-arm corrections, sequential monitoring, and design effects." page-layout: article toc: false --- Use this tool to plan online experiments: - **Required sample size** given an MDE, or **achievable MDE** given a budget - Adjustments for **CUPED/CUPAC** variance reduction and **cluster/design effects** - **Multi-arm** corrections (Bonferroni, Šidák, Dunnett) - **Group sequential** monitoring boundaries (O'Brien–Fleming, Pocock) - **Sensitivity tables** and **power curves** ::: {.callout-note collapse="true"} ## Assumptions & caveats These are **normal-approximation** calculations (Wald test). They work well for large-scale online experiments but be cautious with: - Very low conversion rates (< 0.5%) — consider exact tests - Heavy-tailed metrics (revenue, session duration) — consider bootstrap or robust methods - Sequential monitoring — the boundaries below are approximate; use a proper spending function library for production - CUPED/CUPAC — the variance-reduction % is an *estimate*; actual reduction depends on covariate predictive power ::: ::: {.panel-tabset} ## Calculator ```{ojs} //| echo: false // ────────────────────────────────────────────── // §0. MATH HELPERS // ────────────────────────────────────────────── clamp = (x, lo, hi) => Math.min(hi, Math.max(lo, x)) // Acklam inverse normal CDF normInv = (p) => { const eps = 1e-12 p = Math.min(1 - eps, Math.max(eps, p)) const a = [-3.969683028665376e+01, 2.209460984245205e+02, -2.759285104469687e+02, 1.383577518672690e+02, -3.066479806614716e+01, 2.506628277459239e+00] const b = [-5.447609879822406e+01, 1.615858368580409e+02, -1.556989798598866e+02, 6.680131188771972e+01, -1.328068155288572e+01] const c = [-7.784894002430293e-03, -3.223964580411365e-01, -2.400758277161838e+00, -2.549732539343734e+00, 4.374664141464968e+00, 2.938163982698783e+00] const d = [ 7.784695709041462e-03, 3.224671290700398e-01, 2.445134137142996e+00, 3.754408661907416e+00] const plow = 0.02425, phigh = 1 - plow let q, r if (p < plow) { q = Math.sqrt(-2 * Math.log(p)) return (((((c[0]*q+c[1])*q+c[2])*q+c[3])*q+c[4])*q+c[5]) / ((((d[0]*q+d[1])*q+d[2])*q+d[3])*q+1) } if (p > phigh) { q = Math.sqrt(-2 * Math.log(1 - p)) return -(((((c[0]*q+c[1])*q+c[2])*q+c[3])*q+c[4])*q+c[5]) / ((((d[0]*q+d[1])*q+d[2])*q+d[3])*q+1) } q = p - 0.5; r = q*q return (((((a[0]*r+a[1])*r+a[2])*r+a[3])*r+a[4])*r+a[5])*q / (((((b[0]*r+b[1])*r+b[2])*r+b[3])*r+b[4])*r+1) } erf = (x) => { const sign = x >= 0 ? 1 : -1 x = Math.abs(x) const a1 = 0.254829592, a2 = -0.284496736, a3 = 1.421413741, a4 = -1.453152027, a5 = 1.061405429, pp = 0.3275911 const t = 1 / (1 + pp*x) const y = 1 - (((((a5*t + a4)*t) + a3)*t + a2)*t + a1)*t*Math.exp(-x*x) return sign * y } normCdf = (z) => 0.5 * (1 + erf(z / Math.SQRT2)) // Multi-comparison adjusted alpha adjustedAlpha = (alpha, nComparisons, correction) => { if (nComparisons <= 1) return alpha if (correction === "Bonferroni") return alpha / nComparisons if (correction === "Šidák") return 1 - Math.pow(1 - alpha, 1 / nComparisons) if (correction === "Dunnett (approx)") return 1 - Math.pow(1 - alpha, 1 / Math.sqrt(nComparisons)) return alpha } zAlpha = (testType, a) => (testType === "Two-sided") ? normInv(1 - a/2) : normInv(1 - a) direction = (testType) => (testType === "One-sided (decrease)") ? -1 : +1 // ────────────────────────────────────────────── // §1. SE & POWER FUNCTIONS // ────────────────────────────────────────────── se_cont = (sd, nC, nT, vrPct, seMult) => { const vr = clamp(vrPct/100, 0, 0.95) const varY = (sd*sd) * (1 - vr) return Math.sqrt(varY/nC + varY/nT) * Math.max(1, seMult) } se_bin = (p0, p1, nC, nT, vrPct, seMult) => { const vr = clamp(vrPct/100, 0, 0.95) const p0c = clamp(p0, 1e-6, 1-1e-6) const p1c = clamp(p1, 1e-6, 1-1e-6) const v = (p0c*(1-p0c))/nC + (p1c*(1-p1c))/nT return Math.sqrt(v * (1 - vr)) * Math.max(1, seMult) } requiredN_cont = (deltaAbs, sd, a, power, treatShare, vrPct, seMult, testType) => { const t = clamp(treatShare, 0.01, 0.99) const k = t / (1 - t) const zA = zAlpha(testType, a) const zB = normInv(power) const zSum = zA + zB const vr = clamp(vrPct/100, 0, 0.95) const varY = (sd*sd) * (1 - vr) let nC = (zSum*zSum) * varY * (1 + k) / (k * deltaAbs * deltaAbs) nC *= (Math.max(1, seMult) ** 2) const nT = k * nC return {nC: Math.ceil(nC), nT: Math.ceil(nT)} } requiredN_bin = (deltaAbs, p0, a, power, treatShare, vrPct, seMult, testType) => { const t = clamp(treatShare, 0.01, 0.99) const k = t / (1 - t) const zA = zAlpha(testType, a) const zB = normInv(power) const zSum = zA + zB const dir = direction(testType) const p0c = clamp(p0, 1e-6, 1-1e-6) const p1 = clamp(p0c + dir*deltaAbs, 1e-6, 1-1e-6) let varTerm = p0c*(1-p0c) + (p1*(1-p1))/k const vr = clamp(vrPct/100, 0, 0.95) varTerm *= (1 - vr) let nC = (zSum*zSum) * varTerm / (deltaAbs * deltaAbs) nC *= (Math.max(1, seMult) ** 2) const nT = k * nC return {nC: Math.ceil(nC), nT: Math.ceil(nT), p1} } power_from_mu = (mu, zA, testType) => { if (testType === "Two-sided") return normCdf(-zA - mu) + (1 - normCdf(zA - mu)) if (testType === "One-sided (decrease)") return normCdf(-zA - mu) return 1 - normCdf(zA - mu) } power_cont = (deltaAbs, sd, nC, nT, a, vrPct, seMult, testType) => { const zA = zAlpha(testType, a) const dir = direction(testType) const se = se_cont(sd, nC, nT, vrPct, seMult) return power_from_mu((dir * deltaAbs) / se, zA, testType) } power_bin = (deltaAbs, p0, nC, nT, a, vrPct, seMult, testType) => { const zA = zAlpha(testType, a) const dir = direction(testType) const p0c = clamp(p0, 1e-6, 1-1e-6) const p1 = clamp(p0c + dir*deltaAbs, 1e-6, 1-1e-6) const se = se_bin(p0c, p1, nC, nT, vrPct, seMult) return power_from_mu((dir * deltaAbs) / se, zA, testType) } mde_bin_from_n = (p0, nC, nT, a, power, vrPct, seMult, testType) => { const zA = zAlpha(testType, a) const zB = normInv(power) const zSum = zA + zB const dir = direction(testType) const p0c = clamp(p0, 1e-6, 1-1e-6) let delta = zSum * Math.sqrt(p0c*(1-p0c)*(1/nC + 1/nT)) * Math.sqrt(1 - clamp(vrPct/100, 0, 0.95)) * Math.max(1, seMult) for (let iter = 0; iter < 30; iter++) { const p1 = clamp(p0c + dir*delta, 1e-6, 1-1e-6) const se = se_bin(p0c, p1, nC, nT, vrPct, seMult) const deltaNew = zSum * se if (Math.abs(deltaNew - delta) < 1e-10) break delta = deltaNew } return delta } // ────────────────────────────────────────────── // §2. EFFECT SIZE BENCHMARKS // ────────────────────────────────────────────── cohensD = (deltaAbs, sd) => Math.abs(deltaAbs) / sd // Cohen's h for proportions cohensH = (p0, p1) => Math.abs(2 * Math.asin(Math.sqrt(p1)) - 2 * Math.asin(Math.sqrt(p0))) effectLabel = (d) => { if (d < 0.01) return "negligible" if (d < 0.2) return "small" if (d < 0.5) return "medium" if (d < 0.8) return "large" return "very large" } // ────────────────────────────────────────────── // §3. GROUP SEQUENTIAL BOUNDARIES // ────────────────────────────────────────────── // O'Brien-Fleming boundary at information fraction t obfBoundary = (zA, t) => zA / Math.sqrt(t) // Pocock boundary (constant) — approximate via repeated significance pocockBoundary = (zA, nLooks) => { // Pocock boundary is approximately z_alpha adjusted for nLooks // Simple approximation: inflate z by sqrt(1 + 0.25 * (nLooks - 1) / nLooks) // More accurate: use Lan-DeMets approximation // For practical purposes, a lookup works for common nLooks const table = {1: zA, 2: zA * 1.10, 3: zA * 1.15, 4: zA * 1.18, 5: zA * 1.20} return table[nLooks] || zA * (1 + 0.04 * Math.log(nLooks)) } // ────────────────────────────────────────────── // §4. INPUTS // ────────────────────────────────────────────── viewof planMode = Inputs.select( ["Plan sample size (given MDE)", "Plan MDE (given sample size / duration)"], {label: "Mode", value: "Plan sample size (given MDE)"} ) viewof metric = Inputs.select( ["Continuous (mean)", "Binary (conversion)"], {label: "Metric type", value: "Continuous (mean)"} ) viewof preset = Inputs.select( ["Custom", "E-commerce conversion (~3%)", "SaaS trial signup (~8%)", "Click-through rate (~2%)", "Revenue per user", "Session duration"], {label: "Preset", value: "Custom"} ) viewof testType = Inputs.select( ["Two-sided", "One-sided (increase)", "One-sided (decrease)"], {label: "Test", value: "Two-sided"} ) viewof effectScale = Inputs.select( ["Absolute", "Relative (%)"], {label: "MDE scale", value: "Relative (%)"} ) viewof alpha = Inputs.range([0.01, 0.20], {label: "Significance α", step: 0.005, value: 0.05}) viewof powerTarget = Inputs.range([0.70, 0.99], {label: "Target power (1−β)", step: 0.01, value: 0.80}) viewof treatShare = Inputs.range([0.10, 0.90], {label: "Treatment share", step: 0.05, value: 0.50}) // Multi-arm viewof nArms = Inputs.range([1, 10], {label: "# treatment arms", step: 1, value: 1}) viewof multiCorrection = Inputs.select( ["None", "Bonferroni", "Šidák", "Dunnett (approx)"], {label: "Multiple testing correction", value: "Bonferroni"} ) viewof varReduction = Inputs.range([0, 90], {label: "Variance reduction (CUPED/CUPAC %)", step: 5, value: 0}) viewof seMult = Inputs.range([1.0, 3.0], {label: "SE multiplier (cluster/design)", step: 0.05, value: 1.0}) viewof trafficPerDay = Inputs.number({label: "Eligible units/day", value: 20000, step: 100, min: 1}) viewof trafficFrac = Inputs.range([5, 100], {label: "% traffic in experiment", step: 5, value: 100}) viewof weekdaysOnly = Inputs.toggle({label: "Weekdays only (5/7)", value: false}) viewof baselineMean = Inputs.number({label: "Baseline mean", value: 10, step: 0.1}) viewof sd = Inputs.number({label: "Std. deviation", value: 3, step: 0.1, min: 0.001}) viewof baselineRate = Inputs.range([0.001, 0.80], {label: "Baseline rate", step: 0.001, value: 0.10}) viewof mdeAbs = Inputs.number({label: "MDE (absolute)", value: 0.2, step: 0.001, min: 0.0001}) viewof mdePct = Inputs.range([0.1, 50], {label: "MDE (relative %)", step: 0.1, value: 5.0}) viewof budgetMode = Inputs.select(["Total N", "Duration (days)"], {label: "Budget as", value: "Duration (days)"}) viewof totalN = Inputs.number({label: "Total N", value: 200000, step: 1000, min: 10}) viewof daysBudget = Inputs.number({label: "Duration (days)", value: 14, step: 1, min: 1}) // Sequential monitoring viewof enableSequential = Inputs.toggle({label: "Show sequential boundaries", value: false}) viewof nLooks = Inputs.range([2, 8], {label: "Number of interim looks", step: 1, value: 4}) viewof seqMethod = Inputs.select( ["O'Brien–Fleming", "Pocock"], {label: "Spending function", value: "O'Brien–Fleming"} ) ``` ```{ojs} //| echo: false // ────────────────────────────────────────────── // §5. APPLY PRESETS // ────────────────────────────────────────────── // Presets set suggested values reactively (we show them as guidance text) presetInfo = { const presets = { "Custom": null, "E-commerce conversion (~3%)": {metric: "Binary (conversion)", rate: 0.03, mde: 5}, "SaaS trial signup (~8%)": {metric: "Binary (conversion)", rate: 0.08, mde: 5}, "Click-through rate (~2%)": {metric: "Binary (conversion)", rate: 0.02, mde: 8}, "Revenue per user": {metric: "Continuous (mean)", mean: 45, sd: 120, mde: 3}, "Session duration": {metric: "Continuous (mean)", mean: 300, sd: 450, mde: 5} } return presets[preset] } ``` ```{ojs} //| echo: false // ────────────────────────────────────────────── // §6. SIDEBAR LAYOUT // ────────────────────────────────────────────── showMDE = planMode === "Plan sample size (given MDE)" showAbs = effectScale === "Absolute" dailyEligible = Math.max(1, trafficPerDay * (trafficFrac/100)) effectiveDailyEligible = weekdaysOnly ? dailyEligible * (5/7) : dailyEligible // Effective alpha after multi-arm correction nComparisons = Math.max(1, nArms) effAlpha = adjustedAlpha(alpha, nComparisons, nArms > 1 ? multiCorrection : "None") ui = html` <div class="card" style="margin-top:.25rem;"> <div class="card-body" style="display:flex; flex-direction:column; gap:.65rem; font-size: 0.92rem;"> <details open> <summary style="font-weight:600; cursor:pointer;">📐 Test Design</summary> <div style="display:flex; flex-direction:column; gap:.5rem; margin-top:.4rem;"> <div>${viewof planMode}</div> <div>${viewof metric}</div> <div>${viewof preset}</div> ${presetInfo ? html`<div style="background:#f0f7ff; border-radius:4px; padding:6px 10px; font-size:0.85rem; color:#555;"> <strong>Preset tip:</strong> ${preset}. Adjust the metric inputs below to match. </div>` : html``} <div>${viewof testType}</div> <div>${viewof effectScale}</div> </div> </details> <hr style="margin:.1rem 0;" /> <details open> <summary style="font-weight:600; cursor:pointer;">🎯 Statistical Parameters</summary> <div style="display:flex; flex-direction:column; gap:.5rem; margin-top:.4rem;"> <div>${viewof alpha}</div> <div>${viewof powerTarget}</div> <div>${viewof treatShare}</div> </div> </details> <hr style="margin:.1rem 0;" /> <details> <summary style="font-weight:600; cursor:pointer;">🔀 Multi-Arm Testing</summary> <div style="display:flex; flex-direction:column; gap:.5rem; margin-top:.4rem;"> <div>${viewof nArms}</div> ${nArms > 1 ? html` <div>${viewof multiCorrection}</div> <div style="font-size:0.85rem; color:#666;"> Corrected α: <strong>${effAlpha.toFixed(4)}</strong> (${nComparisons} comparison${nComparisons > 1 ? "s" : ""}) </div> ` : html``} </div> </details> <hr style="margin:.1rem 0;" /> <details open> <summary style="font-weight:600; cursor:pointer;">📊 Metric Parameters</summary> <div style="display:flex; flex-direction:column; gap:.5rem; margin-top:.4rem;"> ${metric === "Continuous (mean)" ? html`<div>${viewof baselineMean}</div><div>${viewof sd}</div>` : html`<div>${viewof baselineRate}</div>` } ${showMDE ? html` <div style="display:${showAbs ? "block" : "none"}">${viewof mdeAbs}</div> <div style="display:${showAbs ? "none" : "block"}">${viewof mdePct}</div> ` : html` <div>${viewof budgetMode}</div> <div>${budgetMode === "Total N" ? viewof totalN : viewof daysBudget}</div> ` } </div> </details> <hr style="margin:.1rem 0;" /> <details> <summary style="font-weight:600; cursor:pointer;">⚙️ Variance Reduction & Design</summary> <div style="display:flex; flex-direction:column; gap:.5rem; margin-top:.4rem;"> <div>${viewof varReduction}</div> <div>${viewof seMult}</div> </div> </details> <hr style="margin:.1rem 0;" /> <details open> <summary style="font-weight:600; cursor:pointer;">🚦 Traffic & Duration</summary> <div style="display:flex; flex-direction:column; gap:.5rem; margin-top:.4rem;"> <div>${viewof trafficPerDay}</div> <div>${viewof trafficFrac}</div> <div>${viewof weekdaysOnly}</div> <div style="color:#666; font-size:.88rem;"> <strong>Effective units/day:</strong> ${Math.round(effectiveDailyEligible).toLocaleString()} </div> </div> </details> <hr style="margin:.1rem 0;" /> <details> <summary style="font-weight:600; cursor:pointer;">📈 Sequential Monitoring</summary> <div style="display:flex; flex-direction:column; gap:.5rem; margin-top:.4rem;"> <div>${viewof enableSequential}</div> ${enableSequential ? html` <div>${viewof nLooks}</div> <div>${viewof seqMethod}</div> ` : html``} </div> </details> </div> </div> ` ui ``` ```{ojs} //| echo: false // ────────────────────────────────────────────── // §7. CORE CALCULATION // ────────────────────────────────────────────── calc = { const dir = direction(testType) const aUsed = effAlpha const zA = zAlpha(testType, aUsed) const zB = normInv(powerTarget) const zSum = zA + zB const daily = effectiveDailyEligible let deltaAbs, deltaRel, nC, nT, Ntot, runtimeDays, p1 = null if (planMode === "Plan sample size (given MDE)") { if (metric === "Continuous (mean)") { const base = Math.abs(baselineMean) < 1e-12 ? 1e-12 : Math.abs(baselineMean) deltaAbs = (effectScale === "Absolute") ? Math.abs(mdeAbs) : base * (mdePct/100) deltaRel = 100 * deltaAbs / base const out = requiredN_cont(deltaAbs, sd, aUsed, powerTarget, treatShare, varReduction, seMult, testType) nC = out.nC; nT = out.nT // For multi-arm: total treatment N = nT * nArms if (nArms > 1) { Ntot = nC + nT * nArms } else { Ntot = nC + nT } } else { const p0 = baselineRate deltaAbs = (effectScale === "Absolute") ? Math.abs(mdeAbs) : p0 * (mdePct/100) deltaRel = 100 * deltaAbs / p0 const out = requiredN_bin(deltaAbs, p0, aUsed, powerTarget, treatShare, varReduction, seMult, testType) nC = out.nC; nT = out.nT; p1 = out.p1 Ntot = nArms > 1 ? nC + nT * nArms : nC + nT } runtimeDays = Ntot / daily } if (planMode === "Plan MDE (given sample size / duration)") { const Nbudget = (budgetMode === "Total N") ? Math.max(10, totalN) : Math.max(10, Math.floor(daysBudget * daily)) // For multi-arm, control gets 1/(1+nArms) share, each treatment gets treatShare/(nArms) if (nArms > 1) { nC = Math.max(1, Math.floor(Nbudget / (1 + nArms))) nT = Math.max(1, Math.floor((Nbudget - nC) / nArms)) } else { nT = Math.max(1, Math.floor(Nbudget * treatShare)) nC = Math.max(1, Nbudget - nT) } Ntot = nArms > 1 ? nC + nT * nArms : nC + nT if (metric === "Continuous (mean)") { const base = Math.abs(baselineMean) < 1e-12 ? 1e-12 : Math.abs(baselineMean) const se = se_cont(sd, nC, nT, varReduction, seMult) deltaAbs = zSum * se deltaRel = 100 * deltaAbs / base } else { const p0 = baselineRate deltaAbs = mde_bin_from_n(p0, nC, nT, aUsed, powerTarget, varReduction, seMult, testType) deltaRel = 100 * deltaAbs / p0 p1 = clamp(p0 + dir*deltaAbs, 1e-6, 1-1e-6) } runtimeDays = (budgetMode === "Duration (days)") ? daysBudget : (Ntot / daily) } const achievedPower = (metric === "Continuous (mean)") ? power_cont(deltaAbs, sd, nC, nT, aUsed, varReduction, seMult, testType) : power_bin(deltaAbs, baselineRate, nC, nT, aUsed, varReduction, seMult, testType) const vr = clamp(varReduction/100, 0, 0.95) const essGain = 1 / (1 - vr) const seInfl = Math.max(1, seMult) const nInfl = seInfl ** 2 // Effect size benchmarks const effSize = (metric === "Continuous (mean)") ? cohensD(deltaAbs, sd) : cohensH(baselineRate, clamp(baselineRate + dir*deltaAbs, 1e-6, 1-1e-6)) // MDE at different power levels const mdeAtPowers = [0.80, 0.90, 0.95].map(pw => { if (planMode === "Plan MDE (given sample size / duration)") { const zB2 = normInv(pw) const zA2 = zAlpha(testType, aUsed) const zSum2 = zA2 + zB2 if (metric === "Continuous (mean)") { const se = se_cont(sd, nC, nT, varReduction, seMult) return {power: pw, mdeAbs: zSum2 * se, mdeRel: 100 * zSum2 * se / (Math.abs(baselineMean) < 1e-12 ? 1e-12 : Math.abs(baselineMean))} } else { const d = mde_bin_from_n(baselineRate, nC, nT, aUsed, pw, varReduction, seMult, testType) return {power: pw, mdeAbs: d, mdeRel: 100 * d / baselineRate} } } return null }).filter(x => x !== null) // Runtime formatting const weeks = Math.floor(runtimeDays / 7) const remDays = Math.round(runtimeDays % 7) const runtimeLabel = weeks > 0 ? `${weeks}w ${remDays}d` : `${Math.round(runtimeDays)}d` return {deltaAbs, deltaRel, nC, nT, Ntot, runtimeDays, runtimeLabel, dailyEligible: daily, p1, achievedPower, vr, essGain, seInfl, nInfl, effSize, mdeAtPowers, aUsed} } ``` ```{ojs} //| echo: false // ────────────────────────────────────────────── // §8. RESULTS CARD // ────────────────────────────────────────────── fmtInt = (x) => x.toLocaleString(undefined, {maximumFractionDigits: 0}) fmt1 = (x) => x.toLocaleString(undefined, {maximumFractionDigits: 1}) fmt2 = (x) => x.toLocaleString(undefined, {maximumFractionDigits: 2}) fmt4 = (x) => x.toLocaleString(undefined, {maximumFractionDigits: 4}) powerColor = (p) => p >= 0.80 ? "#2d7d46" : (p >= 0.70 ? "#b8860b" : "#c0392b") summaryText = [ `Mode: ${planMode}`, `Metric: ${metric}`, `Test: ${testType}`, `Alpha: ${alpha}${nArms > 1 ? ` (corrected: ${calc.aUsed.toFixed(4)}, ${multiCorrection}, ${nArms} arms)` : ""}`, `Target power: ${powerTarget}`, `Treatment share: ${treatShare}`, `Variance reduction: ${varReduction}% (ESS ×${calc.essGain.toFixed(2)})`, `SE multiplier: ${calc.seInfl.toFixed(2)} (N ×${calc.nInfl.toFixed(2)})`, `Control N: ${calc.nC} | Treatment N (per arm): ${calc.nT} | Total N: ${calc.Ntot}`, `MDE abs: ${calc.deltaAbs.toFixed(6)} | MDE rel: ${calc.deltaRel.toFixed(2)}%`, `Effect size: ${calc.effSize.toFixed(3)} (${effectLabel(calc.effSize)})`, `Achieved power: ${(100*calc.achievedPower).toFixed(1)}%`, `Traffic/day: ${Math.round(calc.dailyEligible)} | Runtime: ${calc.runtimeLabel} (${fmt1(calc.runtimeDays)} days)` ].join("\n") copyBtn = html`<button class="btn btn-sm btn-outline-secondary me-2" title="Copy to clipboard">📋 Copy summary</button>` copyBtn.onclick = async () => { try { await navigator.clipboard.writeText(summaryText) copyBtn.textContent = "✅ Copied!" setTimeout(() => copyBtn.textContent = "📋 Copy summary", 1500) } catch { const ta = document.createElement("textarea") ta.value = summaryText; document.body.appendChild(ta); ta.select() document.execCommand("copy"); document.body.removeChild(ta) copyBtn.textContent = "✅ Copied!" setTimeout(() => copyBtn.textContent = "📋 Copy summary", 1500) } } csvRows = [ ["field","value"], ["mode", planMode], ["metric", metric], ["test", testType], ["alpha_nominal", alpha], ["alpha_adjusted", calc.aUsed], ["n_arms", nArms], ["correction", nArms > 1 ? multiCorrection : "None"], ["target_power", powerTarget], ["treatment_share", treatShare], ["variance_reduction_pct", varReduction], ["se_multiplier", calc.seInfl], ["n_control", calc.nC], ["n_treatment_per_arm", calc.nT], ["n_total", calc.Ntot], ["mde_abs", calc.deltaAbs], ["mde_rel_pct", calc.deltaRel], ["effect_size", calc.effSize], ["achieved_power", calc.achievedPower], ["traffic_per_day", calc.dailyEligible], ["runtime_days", calc.runtimeDays] ].map(r => r.join(",")).join("\n") csvHref = "data:text/csv;charset=utf-8," + encodeURIComponent(csvRows) dlBtn = html`<a class="btn btn-sm btn-outline-secondary" href="${csvHref}" download="ab_power_calc.csv">💾 Download CSV</a>` card = html` <div class="card" style="margin-top:.5rem;"> <div class="card-body"> <h3 style="margin-top:0;">Results</h3> <div class="row" style="row-gap:.6rem;"> <div class="col-md-6"> <div><strong>Control N:</strong> ${fmtInt(calc.nC)}</div> <div><strong>Treatment N${nArms > 1 ? " (per arm)" : ""}:</strong> ${fmtInt(calc.nT)}</div> ${nArms > 1 ? html`<div><strong>Total treatment N:</strong> ${fmtInt(calc.nT * nArms)} (${nArms} arms)</div>` : html``} <div><strong>Total N:</strong> ${fmtInt(calc.Ntot)}</div> <div><strong>Runtime:</strong> ${calc.runtimeLabel} <span style="color:#888;">(${fmt1(calc.runtimeDays)} days at ${fmtInt(Math.round(calc.dailyEligible))}/day)</span> </div> </div> <div class="col-md-6"> <div><strong>MDE (absolute):</strong> ${fmt4(calc.deltaAbs)}</div> <div><strong>MDE (relative):</strong> ${fmt1(calc.deltaRel)}%</div> ${metric === "Binary (conversion)" ? html`<div><strong>Implied p₁:</strong> ${fmt2(100*calc.p1)}% <span style="color:#888;">(baseline ${fmt2(100*baselineRate)}%)</span></div>` : html`<div><strong>Baseline:</strong> ${fmt2(baselineMean)} <span style="color:#888;">(σ = ${fmt2(sd)})</span></div>` } <div><strong>Achieved power:</strong> <span style="color:${powerColor(calc.achievedPower)}; font-weight:600;"> ${fmt1(100*calc.achievedPower)}% </span> </div> <div><strong>Effect size:</strong> ${fmt2(calc.effSize)} <span style="color:#888;">(${effectLabel(calc.effSize)}, ${metric === "Continuous (mean)" ? "Cohen's d" : "Cohen's h"})</span> </div> ${nArms > 1 ? html`<div><strong>Adjusted α:</strong> ${fmt4(calc.aUsed)} <span style="color:#888;">(${multiCorrection})</span></div>` : html``} </div> </div> <hr/> <div style="color:#666; font-size:0.9rem;"> Variance reduction = ${fmt1(100*calc.vr)}% → effective N ×${fmt2(calc.essGain)}. SE multiplier = ${fmt2(calc.seInfl)} → required N ×${fmt2(calc.nInfl)}. </div> ${calc.mdeAtPowers.length > 0 ? html` <div style="margin-top:.8rem;"> <strong>MDE at different power levels:</strong> <table class="table table-sm" style="width:auto; font-size:0.88rem; margin-top:.3rem;"> <thead><tr><th>Power</th><th>MDE (abs)</th><th>MDE (rel)</th></tr></thead> <tbody> ${calc.mdeAtPowers.map(row => html` <tr> <td>${(100*row.power).toFixed(0)}%</td> <td>${fmt4(row.mdeAbs)}</td> <td>${fmt1(row.mdeRel)}%</td> </tr> `)} </tbody> </table> </div> ` : html``} <div style="margin-top:.8rem;"> ${copyBtn} ${dlBtn} </div> </div> </div> ` card ``` ## Sensitivity ```{ojs} //| echo: false // ────────────────────────────────────────────── // §9. SENSITIVITY TABLE (MDE × sample multiplier → power) // ────────────────────────────────────────────── mdeMultipliers = [0.5, 0.75, 1.0, 1.25, 1.5, 2.0] nMultipliers = [0.5, 0.75, 1.0, 1.25, 1.5, 2.0] sensitivityData = { const rows = [] for (const nm of nMultipliers) { for (const mm of mdeMultipliers) { const d = calc.deltaAbs * mm const N = Math.max(10, Math.round(calc.Ntot * nm)) const nTloc = Math.max(1, Math.floor(N * treatShare)) const nCloc = Math.max(1, N - nTloc) const pw = (metric === "Continuous (mean)") ? power_cont(d, sd, nCloc, nTloc, calc.aUsed, varReduction, seMult, testType) : power_bin(d, baselineRate, nCloc, nTloc, calc.aUsed, varReduction, seMult, testType) rows.push({ mdeMult: mm, mdeLabel: `${(mm*100).toFixed(0)}%`, nMult: nm, nLabel: `${(nm*100).toFixed(0)}%`, power: pw, N: N, mdeAbs: d }) } } return rows } cellColor = (pw) => { if (pw >= 0.9) return "#c6efce" if (pw >= 0.8) return "#d4edda" if (pw >= 0.7) return "#fff3cd" if (pw >= 0.5) return "#fde2e2" return "#f8d7da" } sensTable = html` <div class="card" style="margin-top:.5rem;"> <div class="card-body"> <h3 style="margin-top:0;">Power Sensitivity Grid</h3> <p style="color:#666; font-size:0.88rem;"> Rows = sample-size multiplier relative to current total N (${fmtInt(calc.Ntot)}). Columns = MDE multiplier relative to current MDE (${fmt4(calc.deltaAbs)}). Cell = achieved power. </p> <div style="overflow-x:auto;"> <table class="table table-sm table-bordered" style="width:auto; font-size:0.85rem; text-align:center;"> <thead> <tr> <th style="min-width:80px;">N \\ MDE</th> ${mdeMultipliers.map(mm => html`<th>${(mm*100).toFixed(0)}%<br/><span style="color:#888; font-size:0.78rem;">(${fmt4(calc.deltaAbs * mm)})</span></th>`)} </tr> </thead> <tbody> ${nMultipliers.map(nm => html` <tr> <td style="font-weight:600;"> ${(nm*100).toFixed(0)}% <br/><span style="color:#888; font-size:0.78rem;">(${fmtInt(Math.round(calc.Ntot * nm))})</span> </td> ${mdeMultipliers.map(mm => { const row = sensitivityData.find(r => r.nMult === nm && r.mdeMult === mm) const pw = row.power return html`<td style="background:${cellColor(pw)}; font-weight:${nm === 1.0 && mm === 1.0 ? '700' : '400'};"> ${(100*pw).toFixed(1)}% </td>` })} </tr> `)} </tbody> </table> </div> <p style="color:#888; font-size:0.82rem; margin-top:.3rem;"> Green ≥ 80%, yellow ≥ 70%, red < 70%. Bold cell = your current settings. </p> </div> </div> ` sensTable ``` ## Visuals ```{ojs} //| echo: false // ────────────────────────────────────────────── // §10. PLOTS // ────────────────────────────────────────────── w = Math.min(880, width) relCenter = Math.max(0.1, calc.deltaRel) relMin = Math.max(0.05, relCenter / 3) relMax = Math.min(80, relCenter * 3) relGrid = Array.from({length: 50}, (_, i) => relMin + (relMax - relMin) * i / 49) totalN_for_rel = (relPct, vrPctOverride) => { const base = (metric === "Continuous (mean)") ? (Math.abs(baselineMean) < 1e-12 ? 1e-12 : Math.abs(baselineMean)) : baselineRate const delta = base * (relPct/100) if (metric === "Continuous (mean)") { const out = requiredN_cont(delta, sd, calc.aUsed, powerTarget, treatShare, vrPctOverride, seMult, testType) return nArms > 1 ? out.nC + out.nT * nArms : out.nC + out.nT } else { const out = requiredN_bin(delta, baselineRate, calc.aUsed, powerTarget, treatShare, vrPctOverride, seMult, testType) return nArms > 1 ? out.nC + out.nT * nArms : out.nC + out.nT } } dataNoVR = relGrid.map(r => ({rel: r, N: totalN_for_rel(r, 0), series: "No CUPED"})) dataWithVR = varReduction > 0 ? relGrid.map(r => ({rel: r, N: totalN_for_rel(r, varReduction), series: `${varReduction}% VR`})) : [] dataCombinedN = dataNoVR.concat(dataWithVR) plotN = Plot.plot({ width: w, height: 340, marginLeft: 75, x: {label: "MDE (relative %)", grid: true}, y: {label: "Total required N", grid: true, type: "log"}, color: {legend: varReduction > 0}, marks: [ Plot.lineY(dataCombinedN, {x: "rel", y: "N", stroke: "series", strokeWidth: 2}), Plot.ruleX([relCenter], {stroke: "#999", strokeDasharray: "4 4"}), Plot.dot([{rel: relCenter, N: calc.Ntot, series: "Current"}], {x: "rel", y: "N", fill: "red", r: 5}), Plot.text([{rel: relCenter, N: calc.Ntot}], {x: "rel", y: "N", text: d => `N=${fmtInt(d.N)}`, dy: -12, fontSize: 11}) ] }) dataDaysNoVR = relGrid.map(r => ({rel: r, days: totalN_for_rel(r, 0) / calc.dailyEligible, series: "No CUPED"})) dataDaysWithVR = varReduction > 0 ? relGrid.map(r => ({rel: r, days: totalN_for_rel(r, varReduction) / calc.dailyEligible, series: `${varReduction}% VR`})) : [] dataCombinedDays = dataDaysNoVR.concat(dataDaysWithVR) plotDays = Plot.plot({ width: w, height: 340, marginLeft: 75, x: {label: "MDE (relative %)", grid: true}, y: {label: "Runtime (days)", grid: true, type: "log"}, color: {legend: varReduction > 0}, marks: [ Plot.lineY(dataCombinedDays, {x: "rel", y: "days", stroke: "series", strokeWidth: 2}), Plot.ruleX([relCenter], {stroke: "#999", strokeDasharray: "4 4"}), Plot.ruleY([7], {stroke: "#ddd", strokeDasharray: "2 2"}), Plot.ruleY([14], {stroke: "#ddd", strokeDasharray: "2 2"}), Plot.ruleY([28], {stroke: "#ddd", strokeDasharray: "2 2"}), Plot.text([{y: 7, x: relMin}], {y: "y", x: "x", text: ["1 wk"], dx: 5, fill: "#aaa", fontSize: 10}), Plot.text([{y: 14, x: relMin}], {y: "y", x: "x", text: ["2 wk"], dx: 5, fill: "#aaa", fontSize: 10}), Plot.text([{y: 28, x: relMin}], {y: "y", x: "x", text: ["4 wk"], dx: 5, fill: "#aaa", fontSize: 10}), Plot.dot([{rel: relCenter, days: calc.runtimeDays, series: "Current"}], {x: "rel", y: "days", fill: "red", r: 5}), Plot.text([{rel: relCenter, days: calc.runtimeDays}], {x: "rel", y: "days", text: d => `${fmt1(d.days)}d`, dy: -12, fontSize: 11}) ] }) Nmin = Math.max(100, Math.floor(calc.Ntot * 0.2)) Nmax = Math.max(Nmin + 500, Math.floor(calc.Ntot * 2.5)) Ngrid = Array.from({length: 50}, (_, i) => Math.floor(Nmin + (Nmax - Nmin) * i / 49)) power_for_totalN = (N) => { const nTloc = Math.max(1, Math.floor(N * treatShare)) const nCloc = Math.max(1, N - nTloc) return (metric === "Continuous (mean)") ? power_cont(calc.deltaAbs, sd, nCloc, nTloc, calc.aUsed, varReduction, seMult, testType) : power_bin(calc.deltaAbs, baselineRate, nCloc, nTloc, calc.aUsed, varReduction, seMult, testType) } dataPow = Ngrid.map(N => ({N, power: power_for_totalN(N)})) plotPower = Plot.plot({ width: w, height: 340, marginLeft: 75, x: {label: "Total N", grid: true}, y: {label: "Power", domain: [0, 1], grid: true}, marks: [ Plot.lineY(dataPow, {x: "N", y: "power", stroke: "steelblue", strokeWidth: 2.5}), Plot.ruleY([powerTarget], {stroke: "#c0392b", strokeDasharray: "4 4"}), Plot.text([{N: Nmax, y: powerTarget}], {x: "N", y: "y", text: [`target = ${(100*powerTarget).toFixed(0)}%`], dx: -40, dy: -10, fill: "#c0392b", fontSize: 10}), Plot.ruleY([0.80], {stroke: "#eee", strokeDasharray: "1 3"}), Plot.dot([{N: calc.Ntot, power: calc.achievedPower}], {x: "N", y: "power", fill: "red", r: 5}), Plot.text([{N: calc.Ntot, power: calc.achievedPower}], {x: "N", y: "power", text: d => `${fmt1(100*d.power)}%`, dy: -12, fontSize: 11}) ] }) html` <div class="card"> <div class="card-body"> <h3 style="margin-top:0;">Plots</h3> <p style="color:#666; font-size:0.9rem; margin-bottom:1rem;"> Red dot = your current setting. ${varReduction > 0 ? "Two lines compare no-CUPED vs your variance-reduction setting." : "Enable variance reduction to see a comparison line."} </p> <h4>Required N vs MDE</h4> <p style="color:#888; font-size:0.85rem;">Log scale. Larger effects need exponentially less data.</p> ${plotN} <h4 style="margin-top:1.2rem;">Runtime vs MDE</h4> <p style="color:#888; font-size:0.85rem;">Dashed lines at 1, 2, and 4 week marks.</p> ${plotDays} <h4 style="margin-top:1.2rem;">Power Curve (MDE fixed at ${fmt4(calc.deltaAbs)})</h4> ${plotPower} </div> </div> ` ``` ```{ojs} //| echo: false // ────────────────────────────────────────────── // §11. SEQUENTIAL MONITORING BOUNDARIES // ────────────────────────────────────────────── seqCard = { if (!enableSequential) return html`` const zA = zAlpha(testType, calc.aUsed) const fractions = Array.from({length: nLooks}, (_, i) => (i + 1) / nLooks) const boundaries = fractions.map(t => { const obf = obfBoundary(zA, t) const poc = pocockBoundary(zA, nLooks) return { look: Math.round(t * nLooks), infoFrac: t, nAtLook: Math.round(calc.Ntot * t), obf: obf, pocock: poc, selected: seqMethod === "O'Brien–Fleming" ? obf : poc } }) const seqPlotData = [] for (const b of boundaries) { seqPlotData.push({infoFrac: b.infoFrac, z: b.obf, method: "O'Brien–Fleming"}) seqPlotData.push({infoFrac: b.infoFrac, z: b.pocock, method: "Pocock"}) } const seqPlot = Plot.plot({ width: w, height: 280, marginLeft: 60, x: {label: "Information fraction", domain: [0, 1], grid: true}, y: {label: "Critical z-value", grid: true}, color: {legend: true}, marks: [ Plot.lineY(seqPlotData, {x: "infoFrac", y: "z", stroke: "method", strokeWidth: 2, marker: "circle"}), Plot.ruleY([zA], {stroke: "#999", strokeDasharray: "4 4"}), Plot.text([{x: 0.05, y: zA}], {x: "x", y: "y", text: [`fixed z = ${fmt2(zA)}`], dy: -8, fill: "#888", fontSize: 10}) ] }) return html` <div class="card" style="margin-top:.5rem;"> <div class="card-body"> <h3 style="margin-top:0;">Sequential Monitoring Boundaries</h3> <p style="color:#666; font-size:0.88rem;"> ${nLooks} interim analyses, ${seqMethod} spending. Reject H₀ at look <em>k</em> if |z| exceeds the boundary. </p> <table class="table table-sm" style="width:auto; font-size:0.85rem;"> <thead><tr> <th>Look</th><th>Info fraction</th><th>N at look</th> <th>O'Brien–Fleming z</th><th>Pocock z</th> </tr></thead> <tbody> ${boundaries.map(b => html` <tr> <td>${b.look}</td> <td>${fmt2(b.infoFrac)}</td> <td>${fmtInt(b.nAtLook)}</td> <td>${fmt2(b.obf)}</td> <td>${fmt2(b.pocock)}</td> </tr> `)} </tbody> </table> ${seqPlot} <div style="margin-top:.5rem; color:#888; font-size:0.82rem;"> ⚠️ These are approximate boundaries. For production use, compute exact boundaries via a Lan–DeMets alpha-spending function (e.g., <code>gsDesign</code> in R or <code>scipy</code> spending functions). </div> </div> </div> ` } seqCard ``` ::: ## Reference ::: {.callout-tip collapse="true"} ## Variance reduction (CUPED/CUPAC) **CUPED** (Controlled-experiment Using Pre-Experiment Data) reduces variance by regressing out pre-experiment covariate information. If the covariate explains *ρ²* of the outcome variance, effective variance drops by a factor of *(1 − ρ²)*, which is equivalent to multiplying your sample size by *1/(1 − ρ²)*. Common variance-reduction magnitudes: | Covariate | Typical ρ² | VR (%) | |:---------------------------------|:------------|:--------| | Pre-period of same metric (1 wk) | 0.15–0.30 | 15–30% | | Pre-period of same metric (4 wk) | 0.25–0.50 | 25–50% | | CUPAC (ML model of outcome) | 0.30–0.60 | 30–60% | | Multiple covariates (MLRATE) | 0.40–0.70 | 40–70% | **Tip:** run a pre-experiment analysis regressing *Y* on your covariates to estimate *R²* before committing to a VR assumption. ::: ::: {.callout-tip collapse="true"} ## SE multiplier (design effects) For **cluster-randomized** experiments (e.g., randomizing at the market, store, or page level), the effective sample size is reduced by the design effect: $$\text{DEFF} = 1 + (m - 1)\rho$$ where *m* = average cluster size and *ρ* = intraclass correlation (ICC). The SE multiplier is √DEFF. | Scenario | Typical ICC | m | SE mult | |:-----------------------------|:------------|:----|:--------| | Users within geo-markets | 0.001–0.01 | 500 | 1.1–1.6 | | Sessions within users | 0.05–0.15 | 10 | 1.2–1.5 | | Students within classrooms | 0.10–0.25 | 25 | 1.5–2.5 | ::: ::: {.callout-tip collapse="true"} ## Multi-arm corrections When testing *k* treatment arms against a single control, the family-wise error rate (FWER) inflates. Common corrections: - **Bonferroni:** α* = α/k. Conservative but simple. - **Šidák:** α* = 1 − (1 − α)^(1/k). Slightly less conservative; assumes independence. - **Dunnett:** exact correction for many-to-one comparisons (approximated here). Accounts for correlation from shared control. In practice, Dunnett is preferred for the many-to-one comparison structure typical in A/B/n tests. ::: ::: {.callout-tip collapse="true"} ## Effect size benchmarks **Cohen's d** (continuous metrics): 0.2 = small, 0.5 = medium, 0.8 = large. **Cohen's h** (proportions): uses the arcsine transformation, *h = 2 arcsin(√p₁) − 2 arcsin(√p₀)*. Same thresholds as *d*. In online experiments, effects are typically small (*d* < 0.1). A 2% relative lift on a 10% conversion rate gives *h* ≈ 0.03 — firmly in "small" territory, which is why large sample sizes are needed. ::: ::: {.callout-tip collapse="true"} ## Sequential testing **O'Brien–Fleming (OBF):** conservative early boundaries (very hard to reject early), aggressive later. Nominal α at the final look is close to the unadjusted level. Preferred when you don't expect to stop early but want the option. **Pocock:** constant boundaries across looks. Easier to reject early but requires a higher bar at the final look. Preferred when early stopping is a realistic goal. Both are implemented here as approximations. For production sequential designs, use a proper alpha-spending function (Lan–DeMets) via packages like `gsDesign` (R), `sequential` (R), or `statsmodels` (Python). :::