csglab · Arshammik · Nov 16, 2025
diff --git a/R/feature_selection.R b/R/feature_selection.R
@@ -19,22 +19,31 @@
 #'  # printing the results
 #'  print(HVE[order(-sum_deviance)])
 #' @export
-find_variable_events <- function(m1_matrix, m2_matrix, min_row_sum = 50, n_threads=1, verbose=TRUE, ...) {
+find_variable_events <- function(m1_matrix, m2_matrix, min_row_sum = 50, n_threads=1, verbose=FALSE, ...) {
 
   # Check if matrices are sparse
   if (!(inherits(m1_matrix, "Matrix") && inherits(m2_matrix, "Matrix"))) {
-    stop("Both 'm1_matrix' and 'm2_matrix' must be sparse matrices of class 'Matrix'.")
+    stop("Both 'm1_matrix' and 'm2_matrix' must be sparse matrices of class 'Matrix'.", call. = FALSE)
   }
   # Check matrix compatibility
   if (!identical(colnames(m1_matrix), colnames(m2_matrix))) {
-    stop("The colnames (barcodes) of inclusion and exclusion matrices are not identical.")
+    stop("The colnames (barcodes) of inclusion and exclusion matrices are not identical.", call. = FALSE)
   }
   if (!identical(rownames(m1_matrix), rownames(m2_matrix))) {
-    stop("The rownames (junction events) of inclusion and exclusion matrices are not identical.")
+    stop("The rownames (junction events) of inclusion and exclusion matrices are not identical.", call. = FALSE)
   }
 
   # Filter rows based on minimum row sum criteria
-  to_keep_events <- which(rowSums(m1_matrix) > min_row_sum & rowSums(m2_matrix) > min_row_sum)
+  m1_sums <- rowSums(m1_matrix)
+  m2_sums <- rowSums(m2_matrix)
+  to_keep_events <- which(m1_sums > min_row_sum & m2_sums > min_row_sum)
+
+  # Check if any events pass the threshold
+  if (length(to_keep_events) == 0) {
+    stop("No events pass the min_row_sum threshold of ", min_row_sum,
+         ". Consider lowering the threshold or checking your data.", call. = FALSE)
+  }
+
   m1_matrix <- m1_matrix[to_keep_events, , drop = FALSE]
   m2_matrix <- m2_matrix[to_keep_events, , drop = FALSE]
 
@@ -58,7 +67,7 @@ find_variable_events <- function(m1_matrix, m2_matrix, min_row_sum = 50, n_threa
     deviance_values <- tryCatch({
       calcDeviances_ratio(M1_sub, M2_sub, n_threads)
     }, error = function(e) {
-      stop("Error in calcDeviances_ratio function: ", e$message)
+      stop("Error in calcDeviances_ratio function: ", e$message, call. = FALSE)
     })
     deviance_values <- c(deviance_values)
     names(deviance_values) <- rownames(M1_sub)
@@ -116,24 +125,24 @@ find_variable_events <- function(m1_matrix, m2_matrix, min_row_sum = 50, n_threa
 #' @import Matrix
 #' @importClassesFrom Matrix dgCMatrix dsCMatrix dgTMatrix dsTMatrix
 #' @export
-find_variable_genes <- function(gene_expression_matrix, method = "vst", n_threads = 1, verbose = TRUE, ...) {
+find_variable_genes <- function(gene_expression_matrix, method = "vst", n_threads = 1, verbose = FALSE, ...) {
   # adding the vst method as the default
   method <- match.arg(method, choices = c("vst", "sum_deviance"))
 
   # Verify that gene_expression_matrix is a sparse Matrix
   if (!inherits(gene_expression_matrix, "Matrix")) {
-    stop("The 'gene_expression_matrix' must be a sparse matrix of class 'Matrix'.")
+    stop("The 'gene_expression_matrix' must be a sparse matrix of class 'Matrix'.", call. = FALSE)
   }
 
   if (method == "vst") {
     if(verbose) cat("The method we are using is vst (Seurat)...\n")
     if (!exists("standardizeSparse_variance_vst")) {
-      stop("The function 'standardizeSparse_variance_vst' is not available. Check your C++ source files.")
+      stop("The function 'standardizeSparse_variance_vst' is not available. Check your C++ source files.", call. = FALSE)
     }
     rez_vector <- tryCatch({
       standardizeSparse_variance_vst(matSEXP = gene_expression_matrix)
     }, error = function(e) {
-      stop("Error in standardizeSparse_variance_vst: ", e$message)
+      stop("Error in standardizeSparse_variance_vst: ", e$message, call. = FALSE)
     })
     rez <- data.table::data.table(events = rownames(gene_expression_matrix),
                                   standardize_variance = rez_vector)
@@ -143,7 +152,7 @@ find_variable_genes <- function(gene_expression_matrix, method = "vst", n_thread
     # Filter rows based on minimum row sum criteria
     to_keep_features <- which(rowSums(gene_expression_matrix) > 0)
     if (length(to_keep_features) == 0) {
-      stop("No genes with a positive row sum were found.")
+      stop("No genes with a positive row sum were found.", call. = FALSE)
     }
     gene_expression_matrix <- gene_expression_matrix[to_keep_features, , drop = FALSE]
 
@@ -174,7 +183,7 @@ find_variable_genes <- function(gene_expression_matrix, method = "vst", n_thread
         deviance_values <- tryCatch({
           calcNBDeviancesWithThetaEstimation(as(gene_expression_matrix_sub, "dgCMatrix"), n_threads)
         }, error = function(e) {
-          stop("Error in calcNBDeviancesWithThetaEstimation function: ", e$message)
+          stop("Error in calcNBDeviancesWithThetaEstimation function: ", e$message, call. = FALSE)
         })
         deviance_values <- c(deviance_values)
         names(deviance_values) <- rownames(gene_expression_matrix_sub)
@@ -197,7 +206,7 @@ find_variable_genes <- function(gene_expression_matrix, method = "vst", n_thread
     row_var <- tryCatch({
       splikit::get_rowVar(M = gene_expression_matrix)
     }, error = function(e) {
-      stop("Error in splikit::get_rowVar: ", e$message)
+      stop("Error in splikit::get_rowVar: ", e$message, call. = FALSE)
     })
 
     row_var_cpp_dt <- data.table::data.table(events = rownames(gene_expression_matrix),

diff --git a/R/general_tools.R b/R/general_tools.R
@@ -56,27 +56,36 @@ NULL
 #' print(pseudo_r_square_nagel)
 #'
 #' @export
-get_pseudo_correlation <- function(ZDB_matrix, m1_inclusion, m2_exclusion, metric = "CoxSnell", suppress_warnings=TRUE) {
+get_pseudo_correlation <- function(ZDB_matrix, m1_inclusion, m2_exclusion, metric = "CoxSnell", suppress_warnings=FALSE) {
 
   # Validate metric parameter
   metric <- match.arg(metric, choices = c("CoxSnell", "Nagelkerke"))
-  
+
   # Check ZDB_matrix (must be dense)
-  if (!is.matrix(ZDB_matrix)) stop("ZDB_matrix must be a dense matrix.")
-  
+  if (!is.matrix(ZDB_matrix)) stop("ZDB_matrix must be a dense matrix.", call. = FALSE)
+
   # Check m1 and m2 (can be sparse or dense)
   if (!is.matrix(m1_inclusion) && !inherits(m1_inclusion, "Matrix")) {
-    stop("m1_inclusion must be either a dense matrix or a sparse Matrix.")
+    stop("m1_inclusion must be either a dense matrix or a sparse Matrix.", call. = FALSE)
   }
   if (!is.matrix(m2_exclusion) && !inherits(m2_exclusion, "Matrix")) {
-    stop("m2_exclusion must be either a dense matrix or a sparse Matrix.")
+    stop("m2_exclusion must be either a dense matrix or a sparse Matrix.", call. = FALSE)
   }
 
+  # Check row dimensions
   if (nrow(m1_inclusion) != nrow(ZDB_matrix)) {
-    stop("m1_inclusion must have the same number of rows as ZDB_matrix.")
+    stop("m1_inclusion must have the same number of rows as ZDB_matrix.", call. = FALSE)
   }
   if (nrow(m2_exclusion) != nrow(ZDB_matrix)) {
-    stop("m2_exclusion must have the same number of rows as ZDB_matrix.")
+    stop("m2_exclusion must have the same number of rows as ZDB_matrix.", call. = FALSE)
+  }
+
+  # Check column dimensions
+  if (ncol(m1_inclusion) != ncol(ZDB_matrix)) {
+    stop("m1_inclusion must have the same number of columns as ZDB_matrix.", call. = FALSE)
+  }
+  if (ncol(m2_exclusion) != ncol(ZDB_matrix)) {
+    stop("m2_exclusion must have the same number of columns as ZDB_matrix.", call. = FALSE)
   }
 
   cat("Input dimension check passed. Proceeding with computation.\n")
@@ -151,7 +160,17 @@ get_pseudo_correlation <- function(ZDB_matrix, m1_inclusion, m2_exclusion, metri
     })
   }
 
+  # Warn about NA removal
+  n_before <- nrow(results)
   results <- na.omit(results)
+  n_after <- nrow(results)
+
+  if (n_before > n_after) {
+    n_removed <- n_before - n_after
+    message("Removed ", n_removed, " event(s) with NA values (",
+            round(100 * n_removed / n_before, 1), "% of total).")
+    message("Reasons for NA: insufficient data (n<2), no variation, or convergence failure.")
+  }
 
   cat("Computation completed successfully.\n")
   return(results)
@@ -185,7 +204,7 @@ get_pseudo_correlation <- function(ZDB_matrix, m1_inclusion, m2_exclusion, metri
 #' @export
 get_rowVar <- function(M, verbose=FALSE) {
   if (! (is.matrix(M) || inherits(M, "dgCMatrix")) ) {
-    stop("`M` must be either a dense numeric matrix or a dgCMatrix.")
+    stop("`M` must be either a dense numeric matrix or a dgCMatrix.", call. = FALSE)
   }
   if(verbose) message("[get_rowVar] Starting computation")
   result <- rowVariance_cpp(M)

diff --git a/R/star_solo_processing.R b/R/star_solo_processing.R
@@ -982,15 +982,27 @@ make_m2 <- function(m1_inclusion_matrix, eventdata, batch_size = 5000,
 #' @export
 make_eventdata_plus <- function(eventdata, GTF_file_direction) {
 
+  # Validate GTF file path
+  if (!file.exists(GTF_file_direction)) {
+    stop("GTF file not found: ", GTF_file_direction, call. = FALSE)
+  }
+  if (!file.access(GTF_file_direction, mode = 4) == 0) {
+    stop("GTF file is not readable: ", GTF_file_direction, call. = FALSE)
+  }
+
   # Read GTF file as plain text using fread
-  GTF <- data.table::fread(
-    GTF_file_direction,
-    col.names = c("seqid", "source", "type", "start", "end", "score", "strand", "phase", "attribute"),
-    sep = "\t",
-    header = FALSE,
-    quote = "",
-    showProgress = FALSE
-  )
+  GTF <- tryCatch({
+    data.table::fread(
+      GTF_file_direction,
+      col.names = c("seqid", "source", "type", "start", "end", "score", "strand", "phase", "attribute"),
+      sep = "\t",
+      header = FALSE,
+      quote = "",
+      showProgress = FALSE
+    )
+  }, error = function(e) {
+    stop("Error reading GTF file: ", e$message, call. = FALSE)
+  })
 
   # Filter for 'gene' entries
   ref_gtf <- GTF[type == "gene"]

diff --git a/configure b/configure
@@ -1,9 +1,23 @@
 #!/bin/bash
 
-if [ "$(uname)" = "Darwin" ]; then
-    cp -f src/Makevars.mac src/Makevars
-else
-    cp -f src/Makevars.linux src/Makevars
-fi
+# Detect operating system and copy appropriate Makevars file
+# Note: Windows typically uses src/Makevars.win directly, but we handle it here for completeness
+
+OS_TYPE="$(uname -s)"
+
+case "${OS_TYPE}" in
+    Darwin*)
+        # macOS
+        cp -f src/Makevars.mac src/Makevars
+        ;;
+    MINGW*|MSYS*|CYGWIN*)
+        # Windows (Git Bash, MSYS2, Cygwin)
+        cp -f src/Makevars.win src/Makevars
+        ;;
+    *)
+        # Linux and other Unix-like systems
+        cp -f src/Makevars.linux src/Makevars
+        ;;
+esac
 
 exit 0
diff --git a/src/calcDeviances.cpp b/src/calcDeviances.cpp
@@ -12,65 +12,70 @@ using namespace Rcpp;
 arma::vec calcDeviances_ratio(const arma::sp_mat& M1,
                               const arma::sp_mat& M2,
                               int num_threads = 1) {
-  int n_rows = M1.n_rows;
-  int n_cols = M1.n_cols;
-  arma::vec dev(n_rows, arma::fill::zeros);
-
-  // Notify about OpenMP availability only once (thread-safe)
-  static std::atomic<bool> has_printed(false);
-  bool expected = false;
-  if (has_printed.compare_exchange_strong(expected, true)) {
-    if (num_threads <= 1) {
-#ifdef _OPENMP
-      Rcpp::Rcout << "Note: OpenMP is available. "
-      "You can speed up this computation by passing "
-      "num_threads > 1 to enable multi-threading.\n";
-#else
-      Rcpp::Rcout << "OpenMP not detected: running in single-thread mode.\n";
-#endif
-    } else {
+  try {
+    int n_rows = M1.n_rows;
+    int n_cols = M1.n_cols;
+    arma::vec dev(n_rows, arma::fill::zeros);
+
+    // Notify about OpenMP availability only once (thread-safe)
+    // Improved to reduce message spam
+    static std::atomic<bool> has_printed(false);
+    bool expected = false;
+    if (has_printed.compare_exchange_strong(expected, true)) {
 #ifdef _OPENMP
-      Rcpp::Rcout << "Running with " << num_threads << " threads.\n";
+      if (num_threads > 1) {
+        Rcpp::Rcout << "OpenMP detected: using " << num_threads << " threads for deviance calculation.\n";
+      }
 #else
-      Rcpp::Rcout << "OpenMP not detected: running in single-thread mode.\n";
+      // Only warn if user requested multi-threading but it's not available
+      if (num_threads > 1) {
+        Rcpp::Rcout << "OpenMP not available: running in single-threaded mode despite num_threads="
+                    << num_threads << " request.\n";
+      }
 #endif
     }
-  }
-
-  // Set the number of threads if OpenMP is available
+
+    // Set the number of threads if OpenMP is available
 #ifdef _OPENMP
-  if (num_threads > 1) {
-    omp_set_num_threads(num_threads);
-  }
+    if (num_threads > 1) {
+      omp_set_num_threads(num_threads);
+    }
 #endif
-  
-  // Parallelize the outer loop if OpenMP is available and num_threads > 1
+
+    // Parallelize the outer loop if OpenMP is available and num_threads > 1
 #ifdef _OPENMP
 #pragma omp parallel for if(num_threads > 1)
 #endif
-  for (int i = 0; i < n_rows; i++) {
-    double sum_y = 0.0, sum_n = 0.0;
-    for (int k = 0; k < n_cols; k++) {
-      double y = M1(i,k), f = M2(i,k);
-      sum_y += y;
-      sum_n += (y + f);
-    }
-    if (sum_n <= 0) { dev[i] = 0; continue; }
-    double p_hat = sum_y / sum_n;
-    if (p_hat <= 0 || p_hat >= 1) { dev[i] = 0; continue; }
-
-    double dev_row = 0.0;
-    for (int k = 0; k < n_cols; k++) {
-      double y = M1(i,k), f = M2(i,k);
-      double n_i = y + f;
-      if (n_i <= 0) continue;
-      if (y > 0)
-        dev_row += 2 * y * std::log(y / (n_i * p_hat));
-      if (n_i - y > 0)
-        dev_row += 2 * (n_i - y) * std::log((n_i - y) / (n_i * (1 - p_hat)));
+    for (int i = 0; i < n_rows; i++) {
+      double sum_y = 0.0, sum_n = 0.0;
+      for (int k = 0; k < n_cols; k++) {
+        double y = M1(i,k), f = M2(i,k);
+        sum_y += y;
+        sum_n += (y + f);
+      }
+      if (sum_n <= 0) { dev[i] = 0; continue; }
+      double p_hat = sum_y / sum_n;
+      if (p_hat <= 0 || p_hat >= 1) { dev[i] = 0; continue; }
+
+      double dev_row = 0.0;
+      for (int k = 0; k < n_cols; k++) {
+        double y = M1(i,k), f = M2(i,k);
+        double n_i = y + f;
+        if (n_i <= 0) continue;
+        if (y > 0)
+          dev_row += 2 * y * std::log(y / (n_i * p_hat));
+        if (n_i - y > 0)
+          dev_row += 2 * (n_i - y) * std::log((n_i - y) / (n_i * (1 - p_hat)));
+      }
+      dev[i] = dev_row;
     }
-    dev[i] = dev_row;
+
+    return dev;
+
+  } catch(std::exception &ex) {
+    forward_exception_to_r(ex);
+  } catch(...) {
+    ::Rf_error("C++ exception in calcDeviances_ratio (unknown reason)");
   }
-
-  return dev;
+  return arma::vec(); // never reached
 }