## -----------------------------------------------------------------------------
#| include: false
has_pkg <- requireNamespace("TemporalHazard", quietly = TRUE) &&
  requireNamespace("ggplot2", quietly = TRUE)
knitr::opts_chunk$set(
  collapse = TRUE,
  comment  = "#>",
  eval     = has_pkg
)


## -----------------------------------------------------------------------------
#| label: setup
library(TemporalHazard)
library(survival)
library(ggplot2)


## -----------------------------------------------------------------------------
#| label: kul-data
data(cabgkul)
str(cabgkul)


## -----------------------------------------------------------------------------
#| label: kul-weibull
fit_kul <- hazard(
  Surv(int_dead, dead) ~ 1,
  data  = cabgkul,
  dist  = "weibull",
  theta = c(mu = 0.10, nu = 1.0),
  fit   = TRUE
)

fit_kul


## -----------------------------------------------------------------------------
#| label: fig-kul-km
#| fig-cap: "Weibull parametric survival vs. Kaplan-Meier (CABG, KU Leuven)"
#| fig-width: 7
#| fig-height: 4.5
t_grid <- seq(0.01, max(cabgkul$int_dead) * 0.9, length.out = 200)
nd     <- data.frame(time = t_grid)
surv   <- predict(fit_kul, newdata = nd, type = "survival") * 100

km     <- survfit(Surv(int_dead, dead) ~ 1, data = cabgkul)
km_df  <- data.frame(time = km$time, survival = km$surv * 100)

ggplot() +
  geom_step(data = km_df, aes(time, survival, colour = "Kaplan-Meier"),
            linewidth = 0.5) +
  geom_line(data = data.frame(time = t_grid, survival = surv),
            aes(time, survival, colour = "Weibull"), linewidth = 1) +
  scale_colour_manual(values = c("Weibull" = "#0072B2",
                                 "Kaplan-Meier" = "#D55E00")) +
  scale_y_continuous(limits = c(0, 100)) +
  labs(x = "Months after CABG", y = "Freedom from death (%)",
       colour = NULL) +
  theme_minimal() +
  theme(legend.position = "bottom")


## -----------------------------------------------------------------------------
#| label: avc-data
data(avc)
avc <- na.omit(avc)
str(avc)


## -----------------------------------------------------------------------------
#| label: avc-mv
fit_avc <- hazard(
  Surv(int_dead, dead) ~ age + status + mal + com_iv + inc_surg + orifice,
  data  = avc,
  dist  = "weibull",
  theta = c(mu = 0.20, nu = 1.0, rep(0, 6)),
  fit   = TRUE,
  control = list(maxit = 500)
)

fit_avc


## -----------------------------------------------------------------------------
#| label: avc-mp
fit_mp <- hazard(
  Surv(int_dead, dead) ~ 1,
  data   = avc,
  dist   = "multiphase",
  phases = list(
    early    = hzr_phase("cdf", t_half = 0.5, nu = 1, m = 1,
                          fixed = "shapes"),
    constant = hzr_phase("constant")
  ),
  fit     = TRUE,
  control = list(n_starts = 5, maxit = 1000)
)

summary(fit_mp)


## -----------------------------------------------------------------------------
#| label: fig-avc-compare
#| fig-cap: "Single-phase Weibull vs. multiphase model against Kaplan-Meier (AVC)"
#| fig-width: 7
#| fig-height: 4.5
t_grid <- seq(0.01, max(avc$int_dead) * 0.95, length.out = 200)
nd     <- data.frame(time = t_grid)

km_avc <- survfit(Surv(int_dead, dead) ~ 1, data = avc)
km_df  <- data.frame(time = km_avc$time, survival = km_avc$surv * 100)

fit_wb <- hazard(
  Surv(int_dead, dead) ~ 1, data = avc, dist = "weibull",
  theta = c(mu = 0.20, nu = 1.0), fit = TRUE
)

surv_wb <- predict(fit_wb, newdata = nd, type = "survival") * 100
surv_mp <- predict(fit_mp, newdata = nd, type = "survival") * 100

plot_df <- rbind(
  data.frame(time = t_grid, survival = surv_wb, Model = "Single Weibull"),
  data.frame(time = t_grid, survival = surv_mp, Model = "Multiphase (2-phase)")
)

ggplot() +
  geom_step(data = km_df, aes(time, survival), colour = "grey50",
            linewidth = 0.5) +
  geom_line(data = plot_df, aes(time, survival, colour = Model),
            linewidth = 1) +
  scale_colour_manual(values = c("Single Weibull" = "#E69F00",
                                 "Multiphase (2-phase)" = "#0072B2")) +
  scale_y_continuous(limits = c(0, 100)) +
  annotate("text", x = max(t_grid) * 0.6, y = 95, label = "KM (grey)",
           size = 3, colour = "grey50") +
  labs(x = "Months after AVC repair", y = "Freedom from death (%)",
       colour = NULL) +
  theme_minimal() +
  theme(legend.position = "bottom")


## -----------------------------------------------------------------------------
#| label: valves-death
data(valves)
valves <- na.omit(valves)

fit_death <- hazard(
  Surv(int_dead, dead) ~ age_cop + nyha + mechvalv,
  data  = valves,
  dist  = "weibull",
  theta = c(mu = 0.10, nu = 1.0, rep(0, 3)),
  fit   = TRUE,
  control = list(maxit = 500)
)

fit_death


## -----------------------------------------------------------------------------
#| label: valves-pve
fit_pve <- hazard(
  Surv(int_pve, pve) ~ age_cop + nve + mechvalv,
  data  = valves,
  dist  = "weibull",
  theta = c(mu = 0.02, nu = 1.0, rep(0, 3)),
  fit   = TRUE,
  control = list(maxit = 500)
)

fit_pve


## -----------------------------------------------------------------------------
#| label: interval-simulate
set.seed(101)
n         <- 400
visit_gap <- 6   # months between visits

# True event times: Weibull mu = 0.025, nu = 1.0 (median ~28 months)
true_time <- rexp(n, rate = 0.025)  # nu=1 → exponential

n_visits  <- sample(4:8, n, replace = TRUE)

status     <- integer(n)
time       <- numeric(n)
time_lower <- numeric(n)
time_upper <- numeric(n)

for (i in seq_len(n)) {
  visits <- seq(visit_gap, n_visits[i] * visit_gap, by = visit_gap)
  t_ev   <- true_time[i]
  brk    <- findInterval(t_ev, c(0, visits))  # which interval contains t_ev

  if (brk > length(visits)) {
    # Event after last visit: right-censored.
    # time_lower = 0: with H(time_lower) = H(0) = 0, the right-censored
    # contribution reduces to -(H(time) - H(0)) = -H(time), which is correct.
    status[i]     <- 0L
    time[i]       <- max(visits)
    time_lower[i] <- 0
    time_upper[i] <- max(visits)
  } else {
    # Event within the visit window: interval-censored in (lower, upper)
    lower_bound   <- if (brk == 1L) 0 else visits[brk - 1L]
    upper_bound   <- visits[brk]
    status[i]     <- 2L
    time[i]       <- lower_bound
    time_lower[i] <- lower_bound
    time_upper[i] <- upper_bound
  }
}

table(status)


## -----------------------------------------------------------------------------
#| label: interval-fit
fit_ic <- hazard(
  time       = time,
  status     = status,
  time_lower = time_lower,
  time_upper = time_upper,
  dist  = "weibull",
  theta = c(mu = 0.025, nu = 1.0),
  fit   = TRUE
)

fit_ic


## -----------------------------------------------------------------------------
#| label: interval-vs-naive
# Naive: treat each interval-censored row as an exact event at time_upper
# (the visit when the death was first recorded)
fit_naive <- hazard(
  time   = ifelse(status == 2L, time_upper, time),
  status = ifelse(status == 2L, 1L,         status),
  dist  = "weibull",
  theta = c(mu = 0.025, nu = 1.0),
  fit   = TRUE
)

# Compare MLEs; true parameters are mu = 0.025, nu = 1.0
rbind(
  interval_censored = round(coef(fit_ic)[1:2],   4),
  naive_exact_upper = round(coef(fit_naive)[1:2], 4),
  truth             = c(mu = 0.025, nu = 1.0)
)


## -----------------------------------------------------------------------------
#| label: starting-values
nel <- hzr_nelson(cabgkul$int_dead, cabgkul$dead)

# Drop the zero-time boundary point before log-transforming
nel_clean <- nel[nel$time > 0 & nel$cumhaz > 0, ]
log_t <- log(nel_clean$time)
log_H <- log(nel_clean$cumhaz)

lm_fit  <- lm(log_H ~ log_t)
nu_hat  <- unname(coef(lm_fit)[2])
mu_hat  <- exp(unname(coef(lm_fit)[1]) / nu_hat)
c(mu = round(mu_hat, 4), nu = round(nu_hat, 4))


## -----------------------------------------------------------------------------
#| label: fig-cumhaz-shape
#| fig-cap: "Nelson-Aalen cumulative hazard for CABGKUL — three-phase shape visible as two kinks"
#| fig-width: 7
#| fig-height: 4
ggplot(data.frame(time = nel$time, cumhaz = nel$cumhaz),
       aes(time, cumhaz)) +
  geom_step(colour = "#D55E00", linewidth = 0.7) +
  labs(x = "Months after CABG", y = "Cumulative hazard") +
  theme_minimal()


## -----------------------------------------------------------------------------
#| label: overparameterized
set.seed(42)
fit_3ph <- hazard(
  Surv(int_dead, dead) ~ 1,
  data   = avc,
  dist   = "multiphase",
  phases = list(
    early    = hzr_phase("cdf", t_half = 0.5, nu = 1, m = 1,
                          fixed = "shapes"),
    constant = hzr_phase("constant"),
    late     = hzr_phase("g3", tau = 5, gamma = 3, alpha = 1, eta = 1,
                          fixed = "shapes")
  ),
  fit = TRUE, control = list(n_starts = 5, maxit = 1000)
)

# Scale magnitudes: exp(log_mu) ≈ 0 for any phase the data doesn't support
log_mu_idx <- grep("log_mu", names(coef(fit_3ph)))
round(exp(coef(fit_3ph)[log_mu_idx]), 6)


## -----------------------------------------------------------------------------
#| label: nstarts
set.seed(42)
fit_robust <- hazard(
  Surv(int_dead, dead) ~ 1,
  data   = avc,
  dist   = "multiphase",
  phases = list(
    early    = hzr_phase("cdf", t_half = 0.5, nu = 1, m = 1,
                          fixed = "shapes"),
    constant = hzr_phase("constant")
  ),
  fit     = TRUE,
  control = list(n_starts = 10, maxit = 1000)
)
fit_robust$fit$converged

