From 082806eda9cc61069bad26dc5752119db4a717c9 Mon Sep 17 00:00:00 2001 From: amcim Date: Tue, 17 Feb 2026 16:37:52 -0700 Subject: [PATCH 1/3] implemented SVM code --- R/api.R | 100 ++++++++++++++++++++++++++++++++--- man/fit_SVM_contrast.Rd | 23 ++++++++ man/fit_logistic_contrast.Rd | 4 +- package_build.R | 2 +- 4 files changed, 118 insertions(+), 11 deletions(-) create mode 100644 man/fit_SVM_contrast.Rd diff --git a/R/api.R b/R/api.R index 999b35a..2f7c022 100644 --- a/R/api.R +++ b/R/api.R @@ -221,16 +221,17 @@ fit_models <- function(config,data) { contrasts <- config$analysis$contrasts results <- list() + if(is.null(contrasts)){ + stop("Logistic regression with multi-class outcome requires 'contrasts'") + } + + if(!is.list(contrasts)){ + stop("'contrasts' must be a list of length-2 vectors") + } + #Running models and seeing which to run if("logistic" %in% models){ - if(is.null(contrasts)){ - stop("Logistic regression with multi-class outcome requires 'contrasts'") - } - - if(!is.list(contrasts)){ - stop("'contrasts' must be a list of length-2 vectors") - } logistic_results <- list() @@ -253,6 +254,28 @@ fit_models <- function(config,data) { results$logistic <- logistic_results } + if("SVM" %in% models){ + svm_results <- list() + #The fimctopm os called using + svm_results <- furrr::future_pmap( + list( + contrast = contrasts + ), + fit_svm_contrast, + data = data, + outcome = outcome, + predictors = predictors + ) + + names(svm_results) <- vapply( + contrasts, + function(x) paste0(x[[1]], "_vs_", x[[2]]), + character(1) + ) + + + results$svm <- svmresults + } if(length(results) == 0){ stop("No supported models requested") @@ -261,7 +284,7 @@ fit_models <- function(config,data) { } -#' Title fit_logistic_contrast +#' fit_logistic_contrast #' #' @param contrast #' @param data @@ -276,6 +299,7 @@ fit_logistic_contrast <- function(contrast, data, outcome, predictors) { group0 <- contrast[[1]] group1 <- contrast[[2]] + print(group1) subset_idx <- data[[outcome]] %in% c(group0, group1) sub_data <- data[subset_idx, , drop = FALSE] @@ -312,6 +336,66 @@ fit_logistic_contrast <- function(contrast, data, outcome, predictors) { return(fitresult) } +#' Title fit_SVM_contrast +#' +#' @param contrast +#' @param data +#' @param outcome +#' @param predictors +#' +#' @returns fitted list of SVM contrasts +#' +fit_SVM_contrast<-function(contrast, data, outcome, predictors){ + if(length(contrast) != 2){ + stop("Each contrast must have exactly two outcome values") + } + + group0 <- contrast[[1]] + group1 <- contrast[[2]] + + subset_idx <- data[[outcome]] %in% c(group0, group1) + sub_data <- data[subset_idx, , drop = FALSE] + + if(nrow(sub_data)== 0){ + stop("No rows found for contrast ", group0, " vs ", group1) + } + + sub_data[[outcome]] <- ifelse(sub_data[[outcome]] == group1, 1, 0) + + if(length(unique(sub_data[[outcome]])) != 2){ + stop("Contrast does not produce a binary outcome") + } + + fml <- stats::as.formula( + paste(outcome, "~", paste(predictors, collapse = " + ")) + ) + + fit <- e1071::svm( + formula = fml, + data = sub_data, + probability = TRUE + ) + #Using predict from e1071 + preds <- e1071::predict(fit, sub_data, probability = TRUE) + #To get the predictions object consistent with the logistic regression predict format + probs <- attr(pred_obj, "probabilities") + fitresult <- list( + model = fit, + predictions = list( + class = pred_obj, + probabilities = probs + ), + formula = fml, + contrast = c(group0, group1), + n = nrow(sub_data) + ) + return(fitresult) + + + + return(fitresult) +} + #' evaluate_models #' diff --git a/man/fit_SVM_contrast.Rd b/man/fit_SVM_contrast.Rd new file mode 100644 index 0000000..ab4ec50 --- /dev/null +++ b/man/fit_SVM_contrast.Rd @@ -0,0 +1,23 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/api.R +\name{fit_SVM_contrast} +\alias{fit_SVM_contrast} +\title{Title fit_SVM_contrast} +\usage{ +fit_SVM_contrast(contrast, data, outcome, predictors) +} +\arguments{ +\item{contrast}{} + +\item{data}{} + +\item{outcome}{} + +\item{predictors}{} +} +\value{ +fitted list of SVM contrasts +} +\description{ +Title fit_SVM_contrast +} diff --git a/man/fit_logistic_contrast.Rd b/man/fit_logistic_contrast.Rd index bfc6e5f..fb4aed9 100644 --- a/man/fit_logistic_contrast.Rd +++ b/man/fit_logistic_contrast.Rd @@ -2,7 +2,7 @@ % Please edit documentation in R/api.R \name{fit_logistic_contrast} \alias{fit_logistic_contrast} -\title{Title fit_logistic_contrast} +\title{fit_logistic_contrast} \usage{ fit_logistic_contrast(contrast, data, outcome, predictors) } @@ -19,5 +19,5 @@ fit_logistic_contrast(contrast, data, outcome, predictors) fitted list of the logistic regressiong results } \description{ -Title fit_logistic_contrast +fit_logistic_contrast } diff --git a/package_build.R b/package_build.R index 2e5614e..f9ae68e 100644 --- a/package_build.R +++ b/package_build.R @@ -1,6 +1,6 @@ library(devtools) library(roxygen2) - +setwd("~/Rwork/TabularTools") #This creates a folder with the project files in it, only call once #create("tabularTools") From 3306a5491285fda8b76003dbbc43275df0cd92e1 Mon Sep 17 00:00:00 2001 From: amcim Date: Thu, 19 Feb 2026 16:37:39 -0700 Subject: [PATCH 2/3] SVM done, evaluation corrected --- DESCRIPTION | 3 +- R/api.R | 63 +++++++++++++++++++++++++++++------------ man/fit_SVM_contrast.Rd | 6 ++-- runAnalysis.R | 5 ++-- 4 files changed, 53 insertions(+), 24 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index 447b5a1..1217f16 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -19,7 +19,8 @@ Imports: ggplot2, furrr, future, - pROC + pROC, + e1071 Suggests: testthat (>= 3.0.0), knitr, diff --git a/R/api.R b/R/api.R index 2f7c022..221dddc 100644 --- a/R/api.R +++ b/R/api.R @@ -242,7 +242,8 @@ fit_models <- function(config,data) { fit_logistic_contrast, data = data, outcome = outcome, - predictors = predictors + predictors = predictors, + .options = furrr::furrr_options(seed = TRUE) ) names(logistic_results) <- vapply( @@ -254,7 +255,7 @@ fit_models <- function(config,data) { results$logistic <- logistic_results } - if("SVM" %in% models){ + if("svm" %in% models){ svm_results <- list() #The fimctopm os called using svm_results <- furrr::future_pmap( @@ -264,7 +265,8 @@ fit_models <- function(config,data) { fit_svm_contrast, data = data, outcome = outcome, - predictors = predictors + predictors = predictors, + .options = furrr::furrr_options(seed = TRUE) ) names(svm_results) <- vapply( @@ -274,7 +276,7 @@ fit_models <- function(config,data) { ) - results$svm <- svmresults + results$svm <- svm_results } if(length(results) == 0){ @@ -309,13 +311,19 @@ fit_logistic_contrast <- function(contrast, data, outcome, predictors) { } sub_data[[outcome]] <- ifelse(sub_data[[outcome]] == group1, 1, 0) + cat("Rows before NA removal:", nrow(sub_data), "\n") + + sub_data <- sub_data[complete.cases(sub_data[, c(outcome, predictors)]), ] + + cat("Rows after NA removal:", nrow(sub_data), "\n") if(length(unique(sub_data[[outcome]])) != 2){ stop("Contrast does not produce a binary outcome") } - fml <- stats::as.formula( - paste(outcome, "~", paste(predictors, collapse = " + ")) + fml <- stats::reformulate( + termlabels = predictors, + response = outcome ) fit <- stats::glm( @@ -329,6 +337,7 @@ fit_logistic_contrast <- function(contrast, data, outcome, predictors) { fitresult<- list( model = fit, predictions = preds, + truth=sub_data[[outcome]], formula = fml, contrast = c(group0, group1), n = nrow(sub_data) @@ -345,7 +354,7 @@ fit_logistic_contrast <- function(contrast, data, outcome, predictors) { #' #' @returns fitted list of SVM contrasts #' -fit_SVM_contrast<-function(contrast, data, outcome, predictors){ +fit_svm_contrast<-function(contrast, data, outcome, predictors){ if(length(contrast) != 2){ stop("Each contrast must have exactly two outcome values") } @@ -361,7 +370,13 @@ fit_SVM_contrast<-function(contrast, data, outcome, predictors){ } sub_data[[outcome]] <- ifelse(sub_data[[outcome]] == group1, 1, 0) + cat("Rows before NA removal:", nrow(sub_data), "\n") + + sub_data <- sub_data[complete.cases(sub_data[, c(outcome, predictors)]), ] + cat("Rows after NA removal:", nrow(sub_data), "\n") + #Needs to be factor for classification and to get probabilties + sub_data[[outcome]] <- factor(sub_data[[outcome]]) if(length(unique(sub_data[[outcome]])) != 2){ stop("Contrast does not produce a binary outcome") } @@ -375,25 +390,23 @@ fit_SVM_contrast<-function(contrast, data, outcome, predictors){ data = sub_data, probability = TRUE ) - #Using predict from e1071 - preds <- e1071::predict(fit, sub_data, probability = TRUE) + #Predict gets ran differently with different models + preds <- stats::predict(fit, sub_data, probability = TRUE) #To get the predictions object consistent with the logistic regression predict format - probs <- attr(pred_obj, "probabilities") + prob_matrix <- attr(preds, "probabilities") + attr(preds, "probabilities") <- NULL fitresult <- list( model = fit, predictions = list( - class = pred_obj, - probabilities = probs + class = preds, + probabilities = prob_matrix ), + truth=sub_data[[outcome]], formula = fml, contrast = c(group0, group1), n = nrow(sub_data) ) return(fitresult) - - - - return(fitresult) } @@ -416,15 +429,29 @@ evaluate_models <- function(models){ if(length(models) == 0){ stop("models must contain at least one model", call. = FALSE) } + #Logistic regression is default, if statemens in loop will deal with outputs of other models + #if they do not fit LR logic for(model_name in names(models)){ model_evals <- list() - + print(model_name) for(contrast_name in names(models[[model_name]])){ res <- models[[model_name]][[contrast_name]] preds <- res$predictions - truth <- res$model$y + truth <- res$truth + #########To handle the SVM + if(is.list(preds)){ + prob_matrix <- preds$probabilities + print(prob_matrix) + if(is.null(prob_matrix)){ + stop("Predictions list missing probability matrix.", call. = FALSE) + } + + positive_col <- which(colnames(prob_matrix) == "1") + preds <- prob_matrix[, positive_col] + } + ############## pred_class <- ifelse(preds >= 0.5, 1, 0) diff --git a/man/fit_SVM_contrast.Rd b/man/fit_SVM_contrast.Rd index ab4ec50..9e5549f 100644 --- a/man/fit_SVM_contrast.Rd +++ b/man/fit_SVM_contrast.Rd @@ -1,10 +1,10 @@ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/api.R -\name{fit_SVM_contrast} -\alias{fit_SVM_contrast} +\name{fit_svm_contrast} +\alias{fit_svm_contrast} \title{Title fit_SVM_contrast} \usage{ -fit_SVM_contrast(contrast, data, outcome, predictors) +fit_svm_contrast(contrast, data, outcome, predictors) } \arguments{ \item{contrast}{} diff --git a/runAnalysis.R b/runAnalysis.R index ebe0838..7053df6 100644 --- a/runAnalysis.R +++ b/runAnalysis.R @@ -15,9 +15,10 @@ validated=validate_data(cfg,data) pdata=preprocess_data(cfg,data) ### -plan(multisession, workers = 4) -lrmod=fit_models(cfg,data) plan(sequential) + +lrmod=fit_models(cfg,data) +plan(multisession, workers = 4) lrmod=fit_models(cfg,data) ### evals=evaluate_models(lrmod) From ff84428041a5f340cd49ddc4583b2b5f7879473a Mon Sep 17 00:00:00 2001 From: amcim Date: Sun, 22 Feb 2026 14:04:23 -0700 Subject: [PATCH 3/3] fixed failing tests due to fixing prediction bug --- tests/testthat/test-evaluate_models.R | 46 +++++++++++++++++++++++++-- 1 file changed, 43 insertions(+), 3 deletions(-) diff --git a/tests/testthat/test-evaluate_models.R b/tests/testthat/test-evaluate_models.R index af87747..a80fa8a 100644 --- a/tests/testthat/test-evaluate_models.R +++ b/tests/testthat/test-evaluate_models.R @@ -17,7 +17,7 @@ test_that("evaluate_models returns expected structure", { glm = list( "0_vs_1" = list( predictions = c(0.2, 0.8), - model = list(y = c(0, 1)) + truth = c(0, 1) ) ) ) @@ -37,7 +37,7 @@ test_that("evaluate_models returns valid metrics", { glm = list( "0_vs_1" = list( predictions = c(0.1, 0.4, 0.7, 0.9), - model = list(y = c(0, 0, 1, 1)) + truth = c(0, 0, 1, 1) ) ) ) @@ -57,7 +57,7 @@ test_that("evaluate_models returns a confusion matrix", { glm = list( "0_vs_1" = list( predictions = c(0.3, 0.6), - model = list(y = c(0, 1)) + truth = c(0, 1) ) ) ) @@ -68,3 +68,43 @@ test_that("evaluate_models returns a confusion matrix", { expect_s3_class(conf, "table") expect_equal(sum(conf), 2) }) +#Multiple models at once +test_that("evaluate_models works with logistic and svm models together", { + + fake_models <- list( + logistic = list( + "0_vs_1" = list( + predictions = c(0.2, 0.8, 0.6, 0.1), + truth = c(0, 1, 1, 0) + ) + ), + svm = list( + "0_vs_1" = list( + predictions = c(0.3, 0.7, 0.4, 0.9), + truth = c(0, 1, 0, 1) + ) + ) + ) + + res <- evaluate_models(fake_models) + + # Top-level names + expect_named(res, c("logistic", "svm")) + + # Check structure for logistic + expect_named(res$logistic, "0_vs_1") + expect_named(res$logistic$`0_vs_1`, + c("metrics", "confusion", "roc")) + + # Check structure for svm + expect_named(res$svm, "0_vs_1") + expect_named(res$svm$`0_vs_1`, + c("metrics", "confusion", "roc")) + + # Check AUC values exist and are valid + expect_gte(res$logistic$`0_vs_1`$metrics$auc, 0) + expect_lte(res$logistic$`0_vs_1`$metrics$auc, 1) + + expect_gte(res$svm$`0_vs_1`$metrics$auc, 0) + expect_lte(res$svm$`0_vs_1`$metrics$auc, 1) +}) \ No newline at end of file