VPSPulse Mirrors

High-Performance Open-Source Archive

Exploring The Variables Importance

Exploring The Variables Importance

Gabriele Pittarello

2024-11-14

Introduction

Machine learning models catch interactions between covariates. Often they are a black-box but they can be interpreted with SHAP values. We generate two data sets, one from scenario Alpha and one from scenario Delta the plotting functionalities of the ReSurv package.

input_data_0 <- data_generator(
  random_seed = 1,
  scenario = 0,
  time_unit = 1 / 360,
  years = 4,
  yearly_exposure = 200
)

individual_data_0 <- IndividualDataPP(
  data = input_data_0,
  id = NULL,
  categorical_features = "claim_type",
  continuous_features = "AP",
  accident_period = "AP",
  calendar_period = "RP",
  input_time_granularity = "days",
  output_time_granularity = "quarters",
  years = 4
)
# Input data scenario Delta

input_data3 <- data_generator(
  random_seed = 1,
  scenario = 3,
  time_unit = 1 / 360,
  years = 4,
  yearly_exposure = 200
)

individual_data_3 <- IndividualDataPP(
  data = input_data3,
  id = NULL,
  categorical_features = "claim_type",
  continuous_features = "AP",
  accident_period = "AP",
  calendar_period = "RP",
  input_time_granularity = "days",
  output_time_granularity = "quarters",
  years = 4
)

Here we fit Neural Networks and XGB. In order to simplify this vignette, we provide in advance the optimal hyperparameters.

hp_scenario_alpha_xgb <- list(
  params = list(
    booster = "gbtree",
    eta = 0.9887265,
    subsample = 0.7924135 ,
    alpha = 10.85342,
    lambda = 6.213317,
    min_child_weight = 3.042204,
    max_depth = 1
  ),
  print_every_n = 0,
  nrounds = 3000,
  verbose = FALSE,
  early_stopping_rounds = 500
)

hp_scenario_alpha_nn <- list(
  batch_size = as.integer(5000),
  epochs = as.integer(5500),
  num_workers = 0,
  tie = 'Efron',
  num_layers = 2,
  num_nodes = 10,
  optim = "SGD",
  batch_size = as.integer(5000),
  lr = 0.3023043,
  xi = 0.426443,
  eps = 0,
  activation = "SELU",
  early_stopping = TRUE,
  patience = 350,
  verbose = FALSE,
  network_structure = NULL
)

hp_scenario_delta_xgb <- list(params=list(booster="gbtree",
                                          eta=0.2717736,
                                          subsample=0.9043068 ,
                                          alpha=7.789214,
                                          lambda=12.09398 ,
                                          min_child_weight=22.4837 ,
                                          max_depth = 4),
                                          print_every_n = 0,
                                          nrounds=3000,
                                          verbose= FALSE,
                                          early_stopping_rounds = 500)

hp_scenario_delta_nn <- list(
  batch_size = as.integer(5000),
  epochs = as.integer(5500),
  num_workers = 0,
  tie = 'Efron',
  num_layers = 2,
  num_nodes = 2,
  optim = "Adam",
  batch_size = as.integer(5000),
  lr = 0.3542422,
  xi = 0.1803953,
  eps = 0,
  activation = "LeakyReLU",
  early_stopping = TRUE,
  patience = 350,
  verbose = FALSE,
  network_structure = NULL
)
resurv_model_xgb_A <-  ReSurv(individual_data_0,
                              hazard_model = "XGB",
                              hparameters = hp_scenario_alpha_xgb)

resurv_model_nn_A <-  ReSurv(individual_data_0,
                             hazard_model = "NN",
                             hparameters = hp_scenario_alpha_nn)

resurv_model_xgb_D <-  ReSurv(individual_data_3,
                              hazard_model = "XGB",
                              hparameters = hp_scenario_delta_xgb)

resurv_model_nn_D <- ReSurv(individual_data_3,
                            hazard_model = "NN",
                            hparameters = hp_scenario_delta_nn)

Shap values (XGB)

plot(resurv_model_xgb_A)
plot(resurv_model_xgb_D)

Shap values (NN)

plot(resurv_model_nn_A, nsamples = 10000)
plot(resurv_model_nn_D, nsamples=10000)

Need mirroring services?
Contact our team at info@vpspulse.com.

Mirror powered by VPSpulse

Infrastructure sponsored by VPSPulse & Secure Payments by ArionPay.