Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
35 changes: 22 additions & 13 deletions R/feature_selection.R
Original file line number Diff line number Diff line change
Expand Up @@ -19,22 +19,31 @@
#' # printing the results
#' print(HVE[order(-sum_deviance)])
#' @export
find_variable_events <- function(m1_matrix, m2_matrix, min_row_sum = 50, n_threads=1, verbose=TRUE, ...) {
find_variable_events <- function(m1_matrix, m2_matrix, min_row_sum = 50, n_threads=1, verbose=FALSE, ...) {

# Check if matrices are sparse
if (!(inherits(m1_matrix, "Matrix") && inherits(m2_matrix, "Matrix"))) {
stop("Both 'm1_matrix' and 'm2_matrix' must be sparse matrices of class 'Matrix'.")
stop("Both 'm1_matrix' and 'm2_matrix' must be sparse matrices of class 'Matrix'.", call. = FALSE)
}
# Check matrix compatibility
if (!identical(colnames(m1_matrix), colnames(m2_matrix))) {
stop("The colnames (barcodes) of inclusion and exclusion matrices are not identical.")
stop("The colnames (barcodes) of inclusion and exclusion matrices are not identical.", call. = FALSE)
}
if (!identical(rownames(m1_matrix), rownames(m2_matrix))) {
stop("The rownames (junction events) of inclusion and exclusion matrices are not identical.")
stop("The rownames (junction events) of inclusion and exclusion matrices are not identical.", call. = FALSE)
}

# Filter rows based on minimum row sum criteria
to_keep_events <- which(rowSums(m1_matrix) > min_row_sum & rowSums(m2_matrix) > min_row_sum)
m1_sums <- rowSums(m1_matrix)
m2_sums <- rowSums(m2_matrix)
to_keep_events <- which(m1_sums > min_row_sum & m2_sums > min_row_sum)

# Check if any events pass the threshold
if (length(to_keep_events) == 0) {
stop("No events pass the min_row_sum threshold of ", min_row_sum,
". Consider lowering the threshold or checking your data.", call. = FALSE)
}

m1_matrix <- m1_matrix[to_keep_events, , drop = FALSE]
m2_matrix <- m2_matrix[to_keep_events, , drop = FALSE]

Expand All @@ -58,7 +67,7 @@ find_variable_events <- function(m1_matrix, m2_matrix, min_row_sum = 50, n_threa
deviance_values <- tryCatch({
calcDeviances_ratio(M1_sub, M2_sub, n_threads)
}, error = function(e) {
stop("Error in calcDeviances_ratio function: ", e$message)
stop("Error in calcDeviances_ratio function: ", e$message, call. = FALSE)
})
deviance_values <- c(deviance_values)
names(deviance_values) <- rownames(M1_sub)
Expand Down Expand Up @@ -116,24 +125,24 @@ find_variable_events <- function(m1_matrix, m2_matrix, min_row_sum = 50, n_threa
#' @import Matrix
#' @importClassesFrom Matrix dgCMatrix dsCMatrix dgTMatrix dsTMatrix
#' @export
find_variable_genes <- function(gene_expression_matrix, method = "vst", n_threads = 1, verbose = TRUE, ...) {
find_variable_genes <- function(gene_expression_matrix, method = "vst", n_threads = 1, verbose = FALSE, ...) {
# adding the vst method as the default
method <- match.arg(method, choices = c("vst", "sum_deviance"))

# Verify that gene_expression_matrix is a sparse Matrix
if (!inherits(gene_expression_matrix, "Matrix")) {
stop("The 'gene_expression_matrix' must be a sparse matrix of class 'Matrix'.")
stop("The 'gene_expression_matrix' must be a sparse matrix of class 'Matrix'.", call. = FALSE)
}

if (method == "vst") {
if(verbose) cat("The method we are using is vst (Seurat)...\n")
if (!exists("standardizeSparse_variance_vst")) {
stop("The function 'standardizeSparse_variance_vst' is not available. Check your C++ source files.")
stop("The function 'standardizeSparse_variance_vst' is not available. Check your C++ source files.", call. = FALSE)
}
rez_vector <- tryCatch({
standardizeSparse_variance_vst(matSEXP = gene_expression_matrix)
}, error = function(e) {
stop("Error in standardizeSparse_variance_vst: ", e$message)
stop("Error in standardizeSparse_variance_vst: ", e$message, call. = FALSE)
})
rez <- data.table::data.table(events = rownames(gene_expression_matrix),
standardize_variance = rez_vector)
Expand All @@ -143,7 +152,7 @@ find_variable_genes <- function(gene_expression_matrix, method = "vst", n_thread
# Filter rows based on minimum row sum criteria
to_keep_features <- which(rowSums(gene_expression_matrix) > 0)
if (length(to_keep_features) == 0) {
stop("No genes with a positive row sum were found.")
stop("No genes with a positive row sum were found.", call. = FALSE)
}
gene_expression_matrix <- gene_expression_matrix[to_keep_features, , drop = FALSE]

Expand Down Expand Up @@ -174,7 +183,7 @@ find_variable_genes <- function(gene_expression_matrix, method = "vst", n_thread
deviance_values <- tryCatch({
calcNBDeviancesWithThetaEstimation(as(gene_expression_matrix_sub, "dgCMatrix"), n_threads)
}, error = function(e) {
stop("Error in calcNBDeviancesWithThetaEstimation function: ", e$message)
stop("Error in calcNBDeviancesWithThetaEstimation function: ", e$message, call. = FALSE)
})
deviance_values <- c(deviance_values)
names(deviance_values) <- rownames(gene_expression_matrix_sub)
Expand All @@ -197,7 +206,7 @@ find_variable_genes <- function(gene_expression_matrix, method = "vst", n_thread
row_var <- tryCatch({
splikit::get_rowVar(M = gene_expression_matrix)
}, error = function(e) {
stop("Error in splikit::get_rowVar: ", e$message)
stop("Error in splikit::get_rowVar: ", e$message, call. = FALSE)
})

row_var_cpp_dt <- data.table::data.table(events = rownames(gene_expression_matrix),
Expand Down
37 changes: 28 additions & 9 deletions R/general_tools.R
Original file line number Diff line number Diff line change
Expand Up @@ -56,27 +56,36 @@ NULL
#' print(pseudo_r_square_nagel)
#'
#' @export
get_pseudo_correlation <- function(ZDB_matrix, m1_inclusion, m2_exclusion, metric = "CoxSnell", suppress_warnings=TRUE) {
get_pseudo_correlation <- function(ZDB_matrix, m1_inclusion, m2_exclusion, metric = "CoxSnell", suppress_warnings=FALSE) {

# Validate metric parameter
metric <- match.arg(metric, choices = c("CoxSnell", "Nagelkerke"))

# Check ZDB_matrix (must be dense)
if (!is.matrix(ZDB_matrix)) stop("ZDB_matrix must be a dense matrix.")
if (!is.matrix(ZDB_matrix)) stop("ZDB_matrix must be a dense matrix.", call. = FALSE)

# Check m1 and m2 (can be sparse or dense)
if (!is.matrix(m1_inclusion) && !inherits(m1_inclusion, "Matrix")) {
stop("m1_inclusion must be either a dense matrix or a sparse Matrix.")
stop("m1_inclusion must be either a dense matrix or a sparse Matrix.", call. = FALSE)
}
if (!is.matrix(m2_exclusion) && !inherits(m2_exclusion, "Matrix")) {
stop("m2_exclusion must be either a dense matrix or a sparse Matrix.")
stop("m2_exclusion must be either a dense matrix or a sparse Matrix.", call. = FALSE)
}

# Check row dimensions
if (nrow(m1_inclusion) != nrow(ZDB_matrix)) {
stop("m1_inclusion must have the same number of rows as ZDB_matrix.")
stop("m1_inclusion must have the same number of rows as ZDB_matrix.", call. = FALSE)
}
if (nrow(m2_exclusion) != nrow(ZDB_matrix)) {
stop("m2_exclusion must have the same number of rows as ZDB_matrix.")
stop("m2_exclusion must have the same number of rows as ZDB_matrix.", call. = FALSE)
}

# Check column dimensions
if (ncol(m1_inclusion) != ncol(ZDB_matrix)) {
stop("m1_inclusion must have the same number of columns as ZDB_matrix.", call. = FALSE)
}
if (ncol(m2_exclusion) != ncol(ZDB_matrix)) {
stop("m2_exclusion must have the same number of columns as ZDB_matrix.", call. = FALSE)
}

cat("Input dimension check passed. Proceeding with computation.\n")
Expand Down Expand Up @@ -151,7 +160,17 @@ get_pseudo_correlation <- function(ZDB_matrix, m1_inclusion, m2_exclusion, metri
})
}

# Warn about NA removal
n_before <- nrow(results)
results <- na.omit(results)
n_after <- nrow(results)

if (n_before > n_after) {
n_removed <- n_before - n_after
message("Removed ", n_removed, " event(s) with NA values (",
round(100 * n_removed / n_before, 1), "% of total).")
message("Reasons for NA: insufficient data (n<2), no variation, or convergence failure.")
}

cat("Computation completed successfully.\n")
return(results)
Expand Down Expand Up @@ -185,7 +204,7 @@ get_pseudo_correlation <- function(ZDB_matrix, m1_inclusion, m2_exclusion, metri
#' @export
get_rowVar <- function(M, verbose=FALSE) {
if (! (is.matrix(M) || inherits(M, "dgCMatrix")) ) {
stop("`M` must be either a dense numeric matrix or a dgCMatrix.")
stop("`M` must be either a dense numeric matrix or a dgCMatrix.", call. = FALSE)
}
if(verbose) message("[get_rowVar] Starting computation")
result <- rowVariance_cpp(M)
Expand Down
28 changes: 20 additions & 8 deletions R/star_solo_processing.R
Original file line number Diff line number Diff line change
Expand Up @@ -982,15 +982,27 @@ make_m2 <- function(m1_inclusion_matrix, eventdata, batch_size = 5000,
#' @export
make_eventdata_plus <- function(eventdata, GTF_file_direction) {

# Validate GTF file path
if (!file.exists(GTF_file_direction)) {
stop("GTF file not found: ", GTF_file_direction, call. = FALSE)
}
if (!file.access(GTF_file_direction, mode = 4) == 0) {
stop("GTF file is not readable: ", GTF_file_direction, call. = FALSE)
}

# Read GTF file as plain text using fread
GTF <- data.table::fread(
GTF_file_direction,
col.names = c("seqid", "source", "type", "start", "end", "score", "strand", "phase", "attribute"),
sep = "\t",
header = FALSE,
quote = "",
showProgress = FALSE
)
GTF <- tryCatch({
data.table::fread(
GTF_file_direction,
col.names = c("seqid", "source", "type", "start", "end", "score", "strand", "phase", "attribute"),
sep = "\t",
header = FALSE,
quote = "",
showProgress = FALSE
)
}, error = function(e) {
stop("Error reading GTF file: ", e$message, call. = FALSE)
})

# Filter for 'gene' entries
ref_gtf <- GTF[type == "gene"]
Expand Down
24 changes: 19 additions & 5 deletions configure
Original file line number Diff line number Diff line change
@@ -1,9 +1,23 @@
#!/bin/bash

if [ "$(uname)" = "Darwin" ]; then
cp -f src/Makevars.mac src/Makevars
else
cp -f src/Makevars.linux src/Makevars
fi
# Detect operating system and copy appropriate Makevars file
# Note: Windows typically uses src/Makevars.win directly, but we handle it here for completeness

OS_TYPE="$(uname -s)"

case "${OS_TYPE}" in
Darwin*)
# macOS
cp -f src/Makevars.mac src/Makevars
;;
MINGW*|MSYS*|CYGWIN*)
# Windows (Git Bash, MSYS2, Cygwin)
cp -f src/Makevars.win src/Makevars
;;
*)
# Linux and other Unix-like systems
cp -f src/Makevars.linux src/Makevars
;;
esac

exit 0
105 changes: 55 additions & 50 deletions src/calcDeviances.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -12,65 +12,70 @@ using namespace Rcpp;
arma::vec calcDeviances_ratio(const arma::sp_mat& M1,
const arma::sp_mat& M2,
int num_threads = 1) {
int n_rows = M1.n_rows;
int n_cols = M1.n_cols;
arma::vec dev(n_rows, arma::fill::zeros);

// Notify about OpenMP availability only once (thread-safe)
static std::atomic<bool> has_printed(false);
bool expected = false;
if (has_printed.compare_exchange_strong(expected, true)) {
if (num_threads <= 1) {
#ifdef _OPENMP
Rcpp::Rcout << "Note: OpenMP is available. "
"You can speed up this computation by passing "
"num_threads > 1 to enable multi-threading.\n";
#else
Rcpp::Rcout << "OpenMP not detected: running in single-thread mode.\n";
#endif
} else {
try {
int n_rows = M1.n_rows;
int n_cols = M1.n_cols;
arma::vec dev(n_rows, arma::fill::zeros);

// Notify about OpenMP availability only once (thread-safe)
// Improved to reduce message spam
static std::atomic<bool> has_printed(false);
bool expected = false;
if (has_printed.compare_exchange_strong(expected, true)) {
#ifdef _OPENMP
Rcpp::Rcout << "Running with " << num_threads << " threads.\n";
if (num_threads > 1) {
Rcpp::Rcout << "OpenMP detected: using " << num_threads << " threads for deviance calculation.\n";
}
#else
Rcpp::Rcout << "OpenMP not detected: running in single-thread mode.\n";
// Only warn if user requested multi-threading but it's not available
if (num_threads > 1) {
Rcpp::Rcout << "OpenMP not available: running in single-threaded mode despite num_threads="
<< num_threads << " request.\n";
}
#endif
}
}

// Set the number of threads if OpenMP is available

// Set the number of threads if OpenMP is available
#ifdef _OPENMP
if (num_threads > 1) {
omp_set_num_threads(num_threads);
}
if (num_threads > 1) {
omp_set_num_threads(num_threads);
}
#endif
// Parallelize the outer loop if OpenMP is available and num_threads > 1

// Parallelize the outer loop if OpenMP is available and num_threads > 1
#ifdef _OPENMP
#pragma omp parallel for if(num_threads > 1)
#endif
for (int i = 0; i < n_rows; i++) {
double sum_y = 0.0, sum_n = 0.0;
for (int k = 0; k < n_cols; k++) {
double y = M1(i,k), f = M2(i,k);
sum_y += y;
sum_n += (y + f);
}
if (sum_n <= 0) { dev[i] = 0; continue; }
double p_hat = sum_y / sum_n;
if (p_hat <= 0 || p_hat >= 1) { dev[i] = 0; continue; }

double dev_row = 0.0;
for (int k = 0; k < n_cols; k++) {
double y = M1(i,k), f = M2(i,k);
double n_i = y + f;
if (n_i <= 0) continue;
if (y > 0)
dev_row += 2 * y * std::log(y / (n_i * p_hat));
if (n_i - y > 0)
dev_row += 2 * (n_i - y) * std::log((n_i - y) / (n_i * (1 - p_hat)));
for (int i = 0; i < n_rows; i++) {
double sum_y = 0.0, sum_n = 0.0;
for (int k = 0; k < n_cols; k++) {
double y = M1(i,k), f = M2(i,k);
sum_y += y;
sum_n += (y + f);
}
if (sum_n <= 0) { dev[i] = 0; continue; }
double p_hat = sum_y / sum_n;
if (p_hat <= 0 || p_hat >= 1) { dev[i] = 0; continue; }

double dev_row = 0.0;
for (int k = 0; k < n_cols; k++) {
double y = M1(i,k), f = M2(i,k);
double n_i = y + f;
if (n_i <= 0) continue;
if (y > 0)
dev_row += 2 * y * std::log(y / (n_i * p_hat));
if (n_i - y > 0)
dev_row += 2 * (n_i - y) * std::log((n_i - y) / (n_i * (1 - p_hat)));
}
dev[i] = dev_row;
}
dev[i] = dev_row;

return dev;

} catch(std::exception &ex) {
forward_exception_to_r(ex);
} catch(...) {
::Rf_error("C++ exception in calcDeviances_ratio (unknown reason)");
}

return dev;
return arma::vec(); // never reached
}
Loading
Loading