From d5caef66b81e8d4e0a4eed61edacb40e5e6684a2 Mon Sep 17 00:00:00 2001
From: novica <nnovica@gmail.com>
Date: Thu, 4 Dec 2025 18:38:45 +0100
Subject: [PATCH 1/2] fix: vignette builds

---
 vignettes/tuningHyperparameters.Rmd | 74 +++++++++++++++++++----------
 1 file changed, 48 insertions(+), 26 deletions(-)

diff --git a/vignettes/tuningHyperparameters.Rmd b/vignettes/tuningHyperparameters.Rmd
index de9e7d4..dd9165a 100644
--- a/vignettes/tuningHyperparameters.Rmd
+++ b/vignettes/tuningHyperparameters.Rmd
@@ -53,38 +53,60 @@ Folds <- list(
 Now we need to define the scoring function. This function should, at a minimum, return a list with a ```Score``` element, which is the model evaluation metric we want to maximize. We can also retain other pieces of information created by the scoring function by including them as named elements of the returned list. In this case, we want to retain the optimal number of rounds determined by the ```xgb.cv```:
 
 ```{r eval = xgbAvail}
+
 scoringFunction <- function(max_depth, min_child_weight, subsample) {
 
-  dtrain <- xgb.DMatrix(agaricus.train$data,label = agaricus.train$label)
-  
-  Pars <- list( 
-      booster = "gbtree"
-    , eta = 0.01
-    , max_depth = max_depth
-    , min_child_weight = min_child_weight
-    , subsample = subsample
-    , objective = "binary:logistic"
-    , eval_metric = "auc"
+  # Coerce types explicitly
+  max_depth        <- as.integer(max_depth)[1]
+  min_child_weight <- as.numeric(min_child_weight)[1]
+  subsample        <- as.numeric(subsample)[1]
+
+  dtrain <- xgboost::xgb.DMatrix(
+    data  = agaricus.train$data,
+    label = agaricus.train$label
+  )
+
+  Pars <- list(
+    booster          = "gbtree",
+    eta              = 0.01,
+    max_depth        = max_depth,
+    min_child_weight = min_child_weight,
+    subsample        = subsample,
+    objective        = "binary:logistic",
+    eval_metric      = "auc"
   )
 
-  xgbcv <- xgb.cv(
-      params = Pars
-    , data = dtrain
-    , nround = 100
-    , folds = Folds
-    , prediction = TRUE
-    , showsd = TRUE
-    , early_stopping_rounds = 5
-    , maximize = TRUE
-            , verbose = 0)
-
-  return(
-    list( 
-        Score = max(xgbcv$evaluation_log$test_auc_mean)
-      , nrounds = xgbcv$best_iteration
-    )
+  xgbcv <- xgboost::xgb.cv(
+    params                = Pars,
+    data                  = dtrain,
+    nrounds               = 100,     # <- canonical argument name
+    folds                 = Folds,
+    prediction            = FALSE,   # set TRUE only if you actually need CV preds
+    showsd                = TRUE,
+    early_stopping_rounds = 5,
+    maximize              = TRUE,
+    verbose               = 0
+  )
+
+  # Compute Score robustly (scalar)
+  score_vec <- xgbcv$evaluation_log$test_auc_mean
+  Score     <- as.numeric(max(score_vec, na.rm = TRUE))[1]
+
+  # Derive a scalar "best nrounds" robustly
+  bi <- xgbcv$best_iteration
+  if (is.null(bi) || length(bi) != 1 || is.na(bi)) {
+    # fallback: the iteration where test AUC is maximized
+    bi <- which.max(score_vec)
+    if (length(bi) != 1 || is.na(bi)) bi <- 1L
+  }
+  BestNrounds <- as.integer(bi)[1]
+
+  list(
+    Score       = Score,
+    BestNrounds = BestNrounds
   )
 }
+
 ```
 
 

From 12bd3cba98424fbcb7d0502bfe2e80f1d613cdf6 Mon Sep 17 00:00:00 2001
From: novica <nnovica@gmail.com>
Date: Thu, 4 Dec 2025 18:54:54 +0100
Subject: [PATCH 2/2] fix: test works

---
 tests/testthat/test-hyperparameterTuning.R | 125 +++++++++++++++------
 1 file changed, 88 insertions(+), 37 deletions(-)

diff --git a/tests/testthat/test-hyperparameterTuning.R b/tests/testthat/test-hyperparameterTuning.R
index 7b5f285..c7c9a25 100644
--- a/tests/testthat/test-hyperparameterTuning.R
+++ b/tests/testthat/test-hyperparameterTuning.R
@@ -18,52 +18,103 @@ testthat::test_that(
       , Fold3 = as.integer(seq(3,nrow(agaricus.train$data),by = 3))
     )
 
-    scoringFunction <- function(
-        max_depth
-      , max_leaves
-      , min_child_weight
-      , subsample
-      , colsample_bytree
-      , gamma
-      , lambda
-      , alpha
-      ) {
-
-      dtrain <- xgb.DMatrix(agaricus.train$data,label = agaricus.train$label)
 
+
+    scoringFunction <- function(max_depth, max_leaves,
+                                min_child_weight, subsample,
+                                colsample_bytree, gamma, lambda, alpha,
+                                .debug = FALSE) {
+
+      # ---- Type coercion & scalarization ----
+      max_depth        <- as.integer(max_depth)[1]
+      max_leaves       <- as.integer(max_leaves)[1]
+      min_child_weight <- as.numeric(min_child_weight)[1]
+      subsample        <- as.numeric(subsample)[1]
+      colsample_bytree <- as.numeric(colsample_bytree)[1]
+      gamma            <- as.numeric(gamma)[1]
+      lambda           <- as.numeric(lambda)[1]
+      alpha            <- as.numeric(alpha)[1]
+
+      # ---- Data (assumes 'agaricus.train' and 'Folds' exist) ----
+      dtrain <- xgboost::xgb.DMatrix(
+        data  = agaricus.train$data,
+        label = agaricus.train$label
+      )
+
+      # Base params
       Pars <- list(
-        booster = "gbtree"
-        , eta = 0.001
-        , max_depth = max_depth
-        , max_leaves = max_leaves
-        , min_child_weight = min_child_weight
-        , subsample = subsample
-        , colsample_bytree = colsample_bytree
-        , gamma = gamma
-        , lambda = lambda
-        , alpha = alpha
-        , objective = "binary:logistic"
-        , eval_metric = "auc"
+        booster           = "gbtree",
+        eta               = 0.01,
+        max_depth         = max_depth,
+        min_child_weight  = min_child_weight,
+        subsample         = subsample,
+        colsample_bytree  = colsample_bytree,
+        gamma             = gamma,
+        lambda            = lambda,  # L2 reg
+        alpha             = alpha,   # L1 reg
+        objective         = "binary:logistic",
+        eval_metric       = "auc"
       )
 
-      xgbcv <- xgb.cv(
-        params = Pars
-        , data = dtrain
-        , nround = 100
-        , folds = Folds
-        , early_stopping_rounds = 5
-        , maximize = TRUE
-        , verbose = 0
+      # If max_leaves is requested, enable histogram/lossguide (so xgboost uses it)
+      if (!is.na(max_leaves) && max_leaves > 0L) {
+        Pars$tree_method <- "hist"
+        Pars$grow_policy <- "lossguide"
+        Pars$max_leaves  <- max_leaves
+        # It's common to leave max_depth as-is; alternatively set max_depth = 0
+        # Pars$max_depth <- 0L
+      }
+
+      # ---- Safe CV wrapper ----
+      xgbcv <- try(
+        xgboost::xgb.cv(
+          params                 = Pars,
+          data                   = dtrain,
+          nrounds                = 100,
+          folds                  = Folds,
+          prediction             = FALSE,
+          showsd                 = TRUE,
+          early_stopping_rounds  = 5,
+          maximize               = TRUE,
+          verbose                = 0
+        ),
+        silent = TRUE
       )
 
-      return(
-        list(
-            Score = max(xgbcv$evaluation_log$test_auc_mean)
-          , nrounds = xgbcv$best_iteration
-        )
+      # On error: return worst score but keep scalars so bayesOpt can proceed
+      if (inherits(xgbcv, "try-error")) {
+        if (isTRUE(.debug)) message("xgb.cv error: ", as.character(xgbcv))
+        return(list(Score = as.numeric(-Inf), BestNrounds = as.integer(1L)))
+      }
+
+      # ---- Scalar Score ----
+      score_vec <- as.numeric(xgbcv$evaluation_log$test_auc_mean)
+      if (!is.null(names(score_vec))) names(score_vec) <- NULL
+      Score <- as.numeric(max(score_vec, na.rm = TRUE))[1]
+
+      # ---- Scalar best nrounds ----
+      bi <- xgbcv$best_iteration
+      if (is.null(bi) || length(bi) != 1L || is.na(bi)) {
+        bi <- which.max(score_vec)
+        if (length(bi) != 1L || is.na(bi)) bi <- 1L
+      }
+      BestNrounds <- as.integer(bi)[1]
+
+      if (isTRUE(.debug)) {
+        cat(sprintf(
+          "DEBUG | Score len=%d val=%.6f | BestNrounds len=%d val=%d\n",
+          length(Score), Score, length(BestNrounds), BestNrounds
+        ))
+      }
+
+      list(
+        Score       = Score,        # must be scalar
+        BestNrounds = BestNrounds   # must be scalar
       )
     }
 
+
+
     bounds <- list(
       max_depth = c(1L, 5L)
       , max_leaves = c(2L,25L)