Skip to content

Commit

Permalink
Merge pull request #114 from jdblischak/data.table-pr
Browse files Browse the repository at this point in the history
Convert sim_fixed_n() to use data.table internally
  • Loading branch information
nanxstats authored Oct 11, 2023
2 parents 3419ba5 + 04b36d0 commit 0f06d96
Show file tree
Hide file tree
Showing 25 changed files with 825 additions and 303 deletions.
2 changes: 2 additions & 0 deletions .github/workflows/R-CMD-check.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ on:
branches: [main, master]
pull_request:
branches: [main, master]
workflow_dispatch:

name: R-CMD-check

Expand All @@ -27,6 +28,7 @@ jobs:
env:
GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }}
R_KEEP_PKG_SOURCE: yes
SIMTRIAL_TEST_BACKWARDS_COMPATIBILITY_REF: 341f77f0a598dc6d638bd5c48746952a7db88255

steps:
- uses: actions/checkout@v3
Expand Down
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -7,3 +7,4 @@ inst/doc
.Rhistory
.RData
.Ruserdata
tests/testthat/fixtures/backwards-compatibility
9 changes: 7 additions & 2 deletions DESCRIPTION
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
Package: simtrial
Type: Package
Title: Clinical Trial Simulation
Version: 0.3.0
Version: 0.3.0.1
Authors@R: c(
person("Keaven", "Anderson", email = "[email protected]", role = c("aut")),
person("Yilong", "Zhang", email = "[email protected]", role = c("aut")),
Expand All @@ -17,6 +17,7 @@ Authors@R: c(
person("Heng", "Zhou", role = c("ctb")),
person("Amin", "Shirazi", role = c("ctb")),
person("Cole", "Manschot", role = c("ctb")),
person("John", "Blischak", role = c("ctb")),
person("Merck & Co., Inc., Rahway, NJ, USA and its affiliates", role = "cph")
)
Description: simtrial provides some basic routines for simulating a
Expand All @@ -36,16 +37,19 @@ VignetteBuilder: knitr
Depends: R (>= 3.5.0)
Imports:
Rcpp,
data.table,
doFuture,
dplyr,
foreach,
future,
magrittr,
methods,
mvtnorm,
stats,
survival,
tibble,
tidyr
tidyr,
utils
Suggests:
Matrix,
bshazard,
Expand All @@ -54,6 +58,7 @@ Suggests:
gsDesign,
knitr,
markdown,
remotes,
rmarkdown,
stringr,
survMisc,
Expand Down
22 changes: 11 additions & 11 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -18,33 +18,33 @@ export(sim_pw_surv)
export(simfix2simpwsurv)
export(wlr)
importFrom(Rcpp,sourceCpp)
importFrom(data.table,":=")
importFrom(data.table,.N)
importFrom(data.table,as.data.table)
importFrom(data.table,data.table)
importFrom(data.table,frankv)
importFrom(data.table,last)
importFrom(data.table,merge.data.table)
importFrom(data.table,rbindlist)
importFrom(data.table,setDF)
importFrom(data.table,setDT)
importFrom(data.table,setorderv)
importFrom(doFuture,"%dofuture%")
importFrom(dplyr,arrange)
importFrom(dplyr,desc)
importFrom(dplyr,filter)
importFrom(dplyr,first)
importFrom(dplyr,full_join)
importFrom(dplyr,group_by)
importFrom(dplyr,lag)
importFrom(dplyr,last)
importFrom(dplyr,left_join)
importFrom(dplyr,mutate)
importFrom(dplyr,n)
importFrom(dplyr,right_join)
importFrom(dplyr,row_number)
importFrom(dplyr,select)
importFrom(dplyr,starts_with)
importFrom(dplyr,summarize)
importFrom(dplyr,ungroup)
importFrom(future,plan)
importFrom(magrittr,"%>%")
importFrom(methods,is)
importFrom(mvtnorm,GenzBretz)
importFrom(mvtnorm,pmvnorm)
importFrom(survival,Surv)
importFrom(survival,is.Surv)
importFrom(tibble,as_tibble)
importFrom(tibble,tibble)
importFrom(tidyr,expand)
importFrom(tidyr,replace_na)
useDynLib(simtrial, .registration = TRUE)
73 changes: 35 additions & 38 deletions R/counting_process.R
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,7 @@
#' treatment group value.
#'
#' @return
#' A `tibble` grouped by `stratum` and sorted within stratum by `tte`.
#' A data frame grouped by `stratum` and sorted within stratum by `tte`.
#' Remain rows with at least one event in the population, at least one subject
#' is at risk in both treatment group and control group.
#' Other variables in this represent the following within each stratum at
Expand All @@ -57,8 +57,7 @@
#' hypothesis)
#' - `var_o_minus_e`: Variance of `o_minus_e` under the same assumption.
#'
#' @importFrom dplyr group_by arrange desc mutate
#' summarize first filter select lag
#' @importFrom data.table ":=" as.data.table setDF
#'
#' @export
#'
Expand Down Expand Up @@ -95,40 +94,38 @@ counting_process <- function(x, arm) {
stop("counting_process: event indicator must be 0 (censoring) or 1 (event).")
}

ans <- x %>%
group_by(stratum) %>%
arrange(desc(tte)) %>%
mutate(
one = 1,
n_risk_tol = cumsum(one),
n_risk_trt = cumsum(treatment == arm)
) %>%
# Handling ties using Breslow's method
group_by(stratum, mtte = desc(tte)) %>%
summarize(
events = sum(event),
n_event_tol = sum((treatment == arm) * event),
tte = first(tte),
n_risk_tol = max(n_risk_tol),
n_risk_trt = max(n_risk_trt)
) %>%
# Keep calculation for observed time with at least one event,
# at least one subject is at risk in both treatment group and control group.
filter(events > 0, n_risk_tol - n_risk_trt > 0, n_risk_trt > 0) %>%
select(-mtte) %>%
mutate(s = 1 - events / n_risk_tol) %>%
arrange(stratum, tte) %>%
group_by(stratum) %>%
mutate(
# Left continuous Kaplan-Meier Estimator
s = dplyr::lag(cumprod(s), default = 1),
# Observed events minus Expected events in treatment group
o_minus_e = n_event_tol - n_risk_trt / n_risk_tol * events,
# Variance of o_minus_e
var_o_minus_e = (n_risk_tol - n_risk_trt) *
n_risk_trt * events * (n_risk_tol - events) /
n_risk_tol^2 / (n_risk_tol - 1)
)
ans <- as.data.table(x)
ans <- ans[order(tte, decreasing = TRUE), ]
ans[, one := 1]
ans[, `:=`(
n_risk_tol = cumsum(one),
n_risk_trt = cumsum(treatment == arm)
), by = "stratum"]

ans
# Handling ties using Breslow's method
ans[, mtte := -tte]
ans <- ans[, .(
events = sum(event),
n_event_tol = sum((treatment == arm) * event),
tte = tte[1],
n_risk_tol = max(n_risk_tol),
n_risk_trt = max(n_risk_trt)
), by = c("stratum", "mtte")]

# Keep calculation for observed time with at least one event,
# at least one subject is at risk in both treatment group and control group.
ans <- ans[events > 0 & n_risk_tol - n_risk_trt > 0 & n_risk_trt > 0, ]
ans[, mtte := NULL]
ans[, s := 1 - events / n_risk_tol]
ans <- ans[order(stratum, tte), ]
# Left continuous Kaplan-Meier Estimator
ans[, s := c(1, cumprod(s)[-length(s)]), by = "stratum"]
# Observed events minus Expected events in treatment group
ans[, o_minus_e := n_event_tol - n_risk_trt / n_risk_tol * events]
# Variance of o_minus_e
ans[, var_o_minus_e := (n_risk_tol - n_risk_trt) *
n_risk_trt * events * (n_risk_tol - events) /
n_risk_tol^2 / (n_risk_tol - 1)]

return(setDF(ans))
}
16 changes: 7 additions & 9 deletions R/cut_data_by_date.R
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@
#'
#' @return A dataset ready for survival analysis.
#'
#' @importFrom dplyr filter mutate select
#' @importFrom data.table ":=" as.data.table setDF
#'
#' @export
#'
Expand All @@ -33,13 +33,11 @@
#' # cut at calendar time 5 after start of randomization
#' sim_pw_surv(n = 20) %>% cut_data_by_date(5)
cut_data_by_date <- function(x, cut_date) {
ans <- x %>%
filter(enroll_time <= cut_date) %>%
mutate(
tte = pmin(cte, cut_date) - enroll_time,
event = fail * (cte <= cut_date)
) %>%
select(tte, event, stratum, treatment)
ans <- as.data.table(x)
ans <- ans[enroll_time <= cut_date, ]
ans[, tte := pmin(cte, cut_date) - enroll_time]
ans[, event := fail * (cte <= cut_date)]
ans <- ans[, c("tte", "event", "stratum", "treatment")]

ans
return(setDF(ans))
}
6 changes: 4 additions & 2 deletions R/cut_data_by_event.R
Original file line number Diff line number Diff line change
Expand Up @@ -24,9 +24,11 @@
#' @param x A time-to-event dataset, for example, generated by [sim_pw_surv()].
#' @param event Event count at which data cutoff is to be made.
#'
#' @return A tibble ready for survival analysis, including columns
#' @return A data frame ready for survival analysis, including columns
#' time to event (`tte`), `event`, the `stratum`, and the `treatment`.
#'
#' @importFrom data.table setDF
#'
#' @export
#'
#' @examples
Expand All @@ -36,5 +38,5 @@
cut_data_by_event <- function(x, event) {
cut_date <- get_cut_date_by_event(x, event)
ans <- x %>% cut_data_by_date(cut_date = cut_date)
ans
return(setDF(ans))
}
18 changes: 8 additions & 10 deletions R/get_cut_date_by_event.R
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@
#' at which the targeted event count is reached, or if the final event count
#' is never reached, the final `cte` at which an event occurs.
#'
#' @importFrom dplyr ungroup select filter arrange mutate row_number last
#' @importFrom data.table ":=" as.data.table frankv last
#'
#' @export
#'
Expand Down Expand Up @@ -62,14 +62,12 @@
#' y <- cut_data_by_date(x, cut_date = d)
#' table(y$stratum, y$event)
get_cut_date_by_event <- function(x, event) {
y <- x %>%
ungroup() %>%
select(cte, fail) %>%
filter(fail == 1) %>%
select(cte) %>%
arrange(cte) %>%
mutate(eventCount = row_number()) %>%
subset(eventCount <= event)
y <- as.data.table(x)
y <- y[fail == 1, ]
y <- y[, .(cte)]
y <- y[order(cte), ]
y[, eventCount := frankv(y, "cte", ties.method = "first")]
y <- y[eventCount <= event, ]

last(y$cte)
return(last(y$cte))
}
43 changes: 23 additions & 20 deletions R/global.R
Original file line number Diff line number Diff line change
Expand Up @@ -21,43 +21,46 @@

utils::globalVariables(
c(
"atrisk",
"Count",
".",
"Ex1delayedEffect",
"N",
"cte",
"dropoutRate",
"dropoutTime",
"enrollTime",
"dropout_rate",
"dropout_time",
"duration",
"enroll_time",
"event",
"eventCount",
"events",
"Ex1delayedEffect",
"fail",
"fail_rate",
"fail_time",
"hr",
"duration",
"enrollTime",
"event",
"finish",
"hr",
"i",
"lambda",
"max_weight",
"mtte",
"N",
"OminusE",
"n_event_tol",
"n_risk_tol",
"n_risk_trt",
"nbrOfWorkers",
"o_minus_e",
"one",
"origin",
"period",
"rate",
"s",
"S",
"status",
"stratum",
"time",
"treatment",
"tte",
"txevents",
"Var",
"w",
"wOminusE",
"wVar",
"txatrisk"
"var_o_minus_e"
)
)

# Workaround to remove `R CMD check` NOTE "All declared Imports should be used."
# https://r-pkgs.org/dependencies-in-practice.html#how-to-not-use-a-package-in-imports
ignore_unused_imports <- function() {
utils::globalVariables
}
Loading

0 comments on commit 0f06d96

Please sign in to comment.