Simulation for testing average hazard ratio and sample size under non-proportional hazards • gsDesign2

library(gsDesign2)
library(gsDesign)
library(ggplot2)
library(dplyr)
library(tibble)
library(simtrial)
library(survival)
library(knitr)

Introduction

This document demonstrates a simple simulation for unit testing of average hazard ratio function (AHR()). The details on calculating average hazard ratio is already available in vignette("AHRVignette"), and the purpose of this vignette is to show how a simulation can be conducted in order to approximate the average hazard ratio. The results were used for unit test to see whether the simulation results could be good approximation of the actual results from AHR().

The simulation is wrapped into a function which gives the users the flexibility the design assumptions. The simulations are based on targeted event only, but for other scenarios, similar approach can be taken.

Intial setup

We begin by setting two parameters that will be used throughout in simulations used to verify accuracy of power approximations; either could be customized for each simulation. First, we set the number of simulations to be performed. You can increase this to improve accuracy of simulation estimates of power.

nsim <- 2000
block <- rep(c("Experimental", "Control"), 2)
strata <- tibble::tibble(Stratum = "All", p = 1)

Design scenario

We set up the design parameters. Enrollment ramps up over the course of the first 4 months follow-up by a steady state enrollment thereafter. This will be adjusted proportionately to power the trial later. The control group has a piecewise exponential distribution with median 9 for the first 3 months and 18 thereafter. The hazard ratio of the experimental group versus control is 0.9 for the first 3 months followed by 0.6 thereafter.

enrollRates <- tibble::tibble(
  Stratum = "All",
  duration = c(2, 2, 10),
  rate = c(3, 6, 9)
)
failRates <- tibble::tibble(
  Stratum = "All",
  duration = c(3, 100),
  failRate = log(2) / c(9, 18),
  hr = c(.9, .6),
  dropoutRate = rep(.001, 2)
)

The Fleming-Harrington weights can be defined in case the users want to run any weighted logrank tests, and is defined as rg as follows.

rg <- tibble(rho = 0, gamma = 0)

For this simulation, we consider the sample size and the events are given as N and events and the bounds for the interim analysis are provided as bounds.

N <- enrollRates %>% summarise(N = sum(rate * duration))
events <- c(20.4, 48.9, 66.1)
bounds <- tibble::tibble(
  k = 1:3,
  upper = c(2.962588, 2.359018, 2.014084),
  lower = c(qnorm(.05), qnorm(.1), -Inf)
)

The simulation for the average hazard ratio for \(k = 3\) interim analysis can be then conducted as follows:

K <- length(events)
fr <- simtrial::simfix2simPWSurv(failRates = failRates)
simresult <- NULL
for (i in 1:nsim) {
  sim <- simtrial::simPWSurv(
    n = as.numeric(N),
    enrollRates = enrollRates,
    failRates = fr$failRates,
    dropoutRates = fr$dropoutRates,
    strata = strata,
    block = block
  )
  for (e in 1:K) {
    dt <- simtrial::getCutDateForCount(x = sim, count = events[e])
    ds <- sim %>% simtrial::cutData(dt)
    res.cox <- coxph(Surv(time = tte, event = event) ~ Treatment + strata(Stratum), data = ds)
    Cox.coef <- res.cox$coefficients
    Z <- sim %>%
      cutDataAtCount(events[e]) %>% # cut simulation for analysis at targeted events
      tensurv(txval = "Experimental") %>%
      tenFH(rg = rg)
    simresult <- rbind(
      simresult,
      tibble(
        sim = i,
        k = e,
        Events = events[e],
        Z = -Z$Z, # Change sign for Z
        Time = dt,
        Cox.coef = Cox.coef
      )
    )
  }
}

simresult <- tibble(simresult, N) %>% mutate(N = as.integer(N))
simresult

After combining the simulation results they can be summarized with respect to the analysis time as

simresult %>%
  full_join(bounds, by = "k") %>%
  tidyr::gather(c("upper", "lower"), key = "Bounds", value = "value") %>%
  group_by(k, Bounds) %>%
  summarize(
    n = n(),
    Time = mean(Time),
    AHR = exp(mean(Cox.coef)),
    z = unique(value),
    Events = unique(Events)
  ) %>%
  mutate_if(is.numeric, round, digits = 4) %>%
  arrange(desc(Bounds)) %>%
  select(k, Bounds, Time, Events, AHR, z)

Putting them all into a function

Finally, the whole simulation approach is wrapped into a function sim_gsd where the users can modify the design assumptions based on their desired design.

sim_gsd <- function(nsim = 1000,
                    block = rep(c("Experimental", "Control"), 2),
                    strata = tibble::tibble(Stratum = "All", p = 1),
                    enrollRates = tibble::tibble(
                      Stratum = "All",
                      duration = c(2, 2, 10),
                      rate = c(3, 6, 9)
                    ),
                    failRates = tibble::tibble(
                      Stratum = "All",
                      duration = c(3, 100),
                      failRate = log(2) / c(9, 18),
                      hr = c(.9, .6),
                      dropoutRate = rep(.001, 2)
                    ),
                    rg = tibble(rho = 0, gamma = 0),
                    N = NULL,
                    # events = c(158.954, 200.636, 252.077),
                    events = c(20.4, 48.9, 66.1),
                    bounds = tibble::tibble(
                      k = 1:3,
                      upper = c(2.962588, 2.359018, 2.014084),
                      lower = c(qnorm(.05), qnorm(.1), -Inf)
                    )) {
  N <- ifelse(is.null(N), enrollRates %>% summarise(N = sum(rate * duration)), N)
  K <- length(events)
  fr <- simtrial::simfix2simPWSurv(failRates = failRates)
  simresult <- NULL
  for (i in 1:nsim) {
    sim <- simtrial::simPWSurv(
      n = as.numeric(N),
      enrollRates = enrollRates,
      failRates = fr$failRates,
      dropoutRates = fr$dropoutRates,
      strata = strata,
      block = block
    )
    for (e in 1:K) {
      dt <- simtrial::getCutDateForCount(x = sim, count = events[e])
      ds <- sim %>% simtrial::cutData(dt)
      res.cox <- coxph(Surv(time = tte, event = event) ~ Treatment + strata(Stratum), data = ds)
      Cox.coef <- res.cox$coefficients
      Z <- sim %>%
        cutDataAtCount(events[e]) %>% # cut simulation for analysis at targeted events
        tensurv(txval = "Experimental") %>%
        tenFH(rg = rg)
      simresult <- rbind(
        simresult,
        tibble(
          sim = i,
          k = e,
          Events = events[e],
          Z = -Z$Z, # Change sign for Z
          Time = dt,
          Cox.coef = Cox.coef
        )
      )
    }
  }

  simresult <- tibble(simresult, N) %>% mutate(N = as.integer(N))
  res <- simresult %>%
    full_join(bounds, by = "k") %>%
    tidyr::gather(c("upper", "lower"), key = "Bounds", value = "value") %>%
    group_by(k, Bounds) %>%
    summarize(
      n = n(),
      Time = mean(Time),
      AHR = exp(mean(Cox.coef)),
      z = unique(value),
      Events = unique(Events)
    ) %>%
    mutate_if(is.numeric, round, digits = 4) %>%
    arrange(desc(Bounds)) %>%
    select(k, Bounds, Time, Events, AHR, z)
  res <- tibble::tibble(
    Analysis = res$k,
    Bound = res$Bounds,
    Z = res$z,
    Time = res$Time,
    AHR = res$AHR,
    Events = res$Events
  )

  res <- tibble(res, N) %>% mutate(N = as.integer(N))
  return(res)
}

Here is a small simulation of size 10 to show the results of sim_gsd

sim_gsd(nsim = 100)