From 468e8d1b9d166cdce06ca63c7f3b66ee61cd2f91 Mon Sep 17 00:00:00 2001 From: James Eapen Date: Thu, 6 Nov 2025 13:52:41 -0500 Subject: [PATCH 01/28] refactor: reorganize the arg order in top level functions to consolidate parallelizations args before using biocParallel --- R/arrayCompartments.R | 26 +++++++++++++------------- R/getCompartments.R | 12 ++++++------ R/scCompartments.R | 22 +++++++++++----------- flake.nix | 1 + man/arrayCompartments.Rd | 36 ++++++++++++++++++------------------ man/scCompartments.Rd | 30 +++++++++++++++--------------- 6 files changed, 64 insertions(+), 63 deletions(-) diff --git a/R/arrayCompartments.R b/R/arrayCompartments.R index 4bff477c..3658909b 100644 --- a/R/arrayCompartments.R +++ b/R/arrayCompartments.R @@ -6,18 +6,18 @@ #' @param obj Input SummarizedExperiment object #' @param res Compartment resolution in bp #' @param chr What chromosome to work on (leave as NULL to run on all chromosomes) +#' @param group Whether to treat this as a group set of samples #' @param targets Samples/cells to shrink towards +#' @param bootstrap Whether we should perform bootstrapping of inferred compartments +#' @param num.bootstraps How many bootstraps to run #' @param preprocess Whether to preprocess the arrays prior to compartment inference +#' @param array.type What type of array is this ("hm450", "EPIC") +#' @param genome What genome to work on ("hg19", "hg38", "mm9", "mm10") +#' @param other Another arbitrary genome to compute compartments on #' @param parallel Whether to run samples in parallel #' @param cores How many cores to use when running samples in parallel -#' @param bootstrap Whether we should perform bootstrapping of inferred compartments -#' @param num.bootstraps How many bootstraps to run #' @param boot.parallel Whether to run the bootstrapping in parallel #' @param boot.cores How many cores to use for the bootstrapping -#' @param genome What genome to work on ("hg19", "hg38", "mm9", "mm10") -#' @param other Another arbitrary genome to compute compartments on -#' @param group Whether to treat this as a group set of samples -#' @param array.type What type of array is this ("hm450", "EPIC") #' #' @return A RaggedExperiment of inferred compartments #' @import SummarizedExperiment @@ -44,18 +44,18 @@ arrayCompartments <- function( obj, res = 1e6, chr = NULL, + group = FALSE, targets = NULL, - preprocess = TRUE, - parallel = TRUE, - cores = 2, bootstrap = TRUE, num.bootstraps = 1000, - boot.parallel = TRUE, - boot.cores = 2, + preprocess = TRUE, + array.type = c("hm450", "EPIC"), genome = c("hg19", "hg38", "mm9", "mm10"), - group = FALSE, other = NULL, - array.type = c("hm450", "EPIC") + parallel = TRUE, + cores = 2, + boot.parallel = TRUE, + boot.cores = 2 ) { verifySE(obj) verifyCoords(obj) diff --git a/R/getCompartments.R b/R/getCompartments.R index ad8178d6..6c98e55d 100644 --- a/R/getCompartments.R +++ b/R/getCompartments.R @@ -1,17 +1,17 @@ getCompartments <- function( obj, - assay, res, - parallel, chr, + group, targets, - cores, bootstrap, num.bootstraps, - boot.parallel, - boot.cores, genome, - group + assay, + parallel, + cores, + boot.parallel, + boot.cores ) { if (is.null(chr)) { message("Assuming we want to process all chromosomes.") diff --git a/R/scCompartments.R b/R/scCompartments.R index 251bb099..18a142cc 100644 --- a/R/scCompartments.R +++ b/R/scCompartments.R @@ -6,16 +6,16 @@ #' @param obj Input SummarizedExperiment object #' @param res Compartment resolution in bp #' @param chr What chromosome to work on (leave as NULL to run on all chromosomes) +#' @param group Whether to treat this as a group set of samples #' @param targets Samples/cells to shrink towards -#' @param parallel Whether to run samples in parallel -#' @param cores How many cores to use when running samples in parallel #' @param bootstrap Whether we should perform bootstrapping of inferred compartments #' @param num.bootstraps How many bootstraps to run -#' @param boot.parallel Whether to run the bootstrapping in parallel -#' @param boot.cores How many cores to use for the bootstrapping #' @param genome What genome to work on ("hg19", "hg38", "mm9", "mm10") -#' @param group Whether to treat this as a group set of samples #' @param assay What type of single-cell assay is the input data ("atac" or "rna") +#' @param parallel Whether to run samples in parallel +#' @param cores How many cores to use when running samples in parallel +#' @param boot.parallel Whether to run the bootstrapping in parallel +#' @param boot.cores How many cores to use for the bootstrapping #' #' @return A RaggedExperiment of inferred compartments #' @import SummarizedExperiment @@ -35,16 +35,16 @@ scCompartments <- function( obj, res = 1e6, chr = NULL, + group = FALSE, targets = NULL, - parallel = FALSE, - cores = 2, bootstrap = TRUE, num.bootstraps = 100, - boot.parallel = FALSE, - boot.cores = 2, genome = c("hg19", "hg38", "mm9", "mm10"), - group = FALSE, - assay = c("atac", "rna") + assay = c("atac", "rna"), + parallel = FALSE, + cores = 2, + boot.parallel = FALSE, + boot.cores = 2 ) { verifySE(obj) verifyCoords(obj) diff --git a/flake.nix b/flake.nix index 9ae613c3..fa3879b8 100644 --- a/flake.nix +++ b/flake.nix @@ -17,6 +17,7 @@ ]; Imports = with pkgs.rPackages; [ + BiocParallel DelayedArray DelayedMatrixStats GenomicRanges diff --git a/man/arrayCompartments.Rd b/man/arrayCompartments.Rd index 29fcb711..5239ed9f 100644 --- a/man/arrayCompartments.Rd +++ b/man/arrayCompartments.Rd @@ -8,18 +8,18 @@ arrayCompartments( obj, res = 1000000, chr = NULL, + group = FALSE, targets = NULL, - preprocess = TRUE, - parallel = TRUE, - cores = 2, bootstrap = TRUE, num.bootstraps = 1000, - boot.parallel = TRUE, - boot.cores = 2, + preprocess = TRUE, + array.type = c("hm450", "EPIC"), genome = c("hg19", "hg38", "mm9", "mm10"), - group = FALSE, other = NULL, - array.type = c("hm450", "EPIC") + parallel = TRUE, + cores = 2, + boot.parallel = TRUE, + boot.cores = 2 ) } \arguments{ @@ -29,29 +29,29 @@ arrayCompartments( \item{chr}{What chromosome to work on (leave as NULL to run on all chromosomes)} -\item{targets}{Samples/cells to shrink towards} - -\item{preprocess}{Whether to preprocess the arrays prior to compartment inference} - -\item{parallel}{Whether to run samples in parallel} +\item{group}{Whether to treat this as a group set of samples} -\item{cores}{How many cores to use when running samples in parallel} +\item{targets}{Samples/cells to shrink towards} \item{bootstrap}{Whether we should perform bootstrapping of inferred compartments} \item{num.bootstraps}{How many bootstraps to run} -\item{boot.parallel}{Whether to run the bootstrapping in parallel} +\item{preprocess}{Whether to preprocess the arrays prior to compartment inference} -\item{boot.cores}{How many cores to use for the bootstrapping} +\item{array.type}{What type of array is this ("hm450", "EPIC")} \item{genome}{What genome to work on ("hg19", "hg38", "mm9", "mm10")} -\item{group}{Whether to treat this as a group set of samples} - \item{other}{Another arbitrary genome to compute compartments on} -\item{array.type}{What type of array is this ("hm450", "EPIC")} +\item{parallel}{Whether to run samples in parallel} + +\item{cores}{How many cores to use when running samples in parallel} + +\item{boot.parallel}{Whether to run the bootstrapping in parallel} + +\item{boot.cores}{How many cores to use for the bootstrapping} } \value{ A RaggedExperiment of inferred compartments diff --git a/man/scCompartments.Rd b/man/scCompartments.Rd index 92193fcb..a2e03278 100644 --- a/man/scCompartments.Rd +++ b/man/scCompartments.Rd @@ -8,16 +8,16 @@ scCompartments( obj, res = 1000000, chr = NULL, + group = FALSE, targets = NULL, - parallel = FALSE, - cores = 2, bootstrap = TRUE, num.bootstraps = 100, - boot.parallel = FALSE, - boot.cores = 2, genome = c("hg19", "hg38", "mm9", "mm10"), - group = FALSE, - assay = c("atac", "rna") + assay = c("atac", "rna"), + parallel = FALSE, + cores = 2, + boot.parallel = FALSE, + boot.cores = 2 ) } \arguments{ @@ -27,25 +27,25 @@ scCompartments( \item{chr}{What chromosome to work on (leave as NULL to run on all chromosomes)} -\item{targets}{Samples/cells to shrink towards} - -\item{parallel}{Whether to run samples in parallel} +\item{group}{Whether to treat this as a group set of samples} -\item{cores}{How many cores to use when running samples in parallel} +\item{targets}{Samples/cells to shrink towards} \item{bootstrap}{Whether we should perform bootstrapping of inferred compartments} \item{num.bootstraps}{How many bootstraps to run} -\item{boot.parallel}{Whether to run the bootstrapping in parallel} +\item{genome}{What genome to work on ("hg19", "hg38", "mm9", "mm10")} -\item{boot.cores}{How many cores to use for the bootstrapping} +\item{assay}{What type of single-cell assay is the input data ("atac" or "rna")} -\item{genome}{What genome to work on ("hg19", "hg38", "mm9", "mm10")} +\item{parallel}{Whether to run samples in parallel} -\item{group}{Whether to treat this as a group set of samples} +\item{cores}{How many cores to use when running samples in parallel} -\item{assay}{What type of single-cell assay is the input data ("atac" or "rna")} +\item{boot.parallel}{Whether to run the bootstrapping in parallel} + +\item{boot.cores}{How many cores to use for the bootstrapping} } \value{ A RaggedExperiment of inferred compartments From 51e0e942eefe7edf157c581df7381bac32e3be8d Mon Sep 17 00:00:00 2001 From: James Eapen Date: Thu, 13 Nov 2025 10:59:00 -0500 Subject: [PATCH 02/28] feat(threads): add BiocParallel worker checking functions --- DESCRIPTION | 1 + NAMESPACE | 1 + R/parallel.R | 80 +++++++++++++++++++++++++++++++++++++++ man/bpnworkers.list.Rd | 18 +++++++++ man/check_worker_count.Rd | 16 ++++++++ man/get_bpnworkers.Rd | 14 +++++++ man/get_nested_params.Rd | 18 +++++++++ man/verify_bp.Rd | 19 ++++++++++ man/verify_workers.Rd | 19 ++++++++++ 9 files changed, 186 insertions(+) create mode 100644 R/parallel.R create mode 100644 man/bpnworkers.list.Rd create mode 100644 man/check_worker_count.Rd create mode 100644 man/get_bpnworkers.Rd create mode 100644 man/get_nested_params.Rd create mode 100644 man/verify_bp.Rd create mode 100644 man/verify_workers.Rd diff --git a/DESCRIPTION b/DESCRIPTION index ec535185..19e01d04 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -26,6 +26,7 @@ Depends: HDF5Array Imports: BiocSingular, + BiocParallel, rlang, S4Vectors, IRanges, diff --git a/NAMESPACE b/NAMESPACE index 6ca8ae7b..55d9b3d7 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -49,6 +49,7 @@ import(HDF5Array) import(Matrix) import(RaggedExperiment) import(SummarizedExperiment) +importFrom(BiocParallel,bpnworkers) importFrom(BiocSingular,IrlbaParam) importFrom(BiocSingular,runSVD) importFrom(GenomeInfoDb,"seqlevelsStyle<-") diff --git a/R/parallel.R b/R/parallel.R new file mode 100644 index 00000000..908afa46 --- /dev/null +++ b/R/parallel.R @@ -0,0 +1,80 @@ +#' Check that the number of requested workers is valid +#' @keywords internal +check_worker_count <- function(bpparam, boot.parallel, avail_workers = parallelly::availableCores()) { + workers <- get_bpnworkers(bpparam) + total <- sum(Reduce(`*`, workers), workers[1]) + if (verify_workers(total)) { + return(TRUE) + } + + msg <- sprintf( + "Using %1$d outer and %2$d inner workers would require %3$d workers (%1$d + (%1$d x %2$d)) but your system has only %4$d cores. + See parallelly::availableCores() for more information on available resources", + workers[1], + workers[2], + total, + avail_workers + ) + stop(msg) +} + +#' Get the total number of BiocParallelParam workers that will get used +#' From a BiocParallelParam or list of 2 BiocParallelParam objects +#' @keywords internal +get_bpnworkers <- function(bp) { + workers <- bpnworkers.list(bp) +} + +#' Return the number of workers in a list of BiocParallelParam objects +#' @param List of BiocParallelParam objects +#' @importFrom BiocParallel bpnworkers +#' @return A vector of the `bpnworkers` count in each list element +#' @keywords internal +bpnworkers.list <- function(bplist) { + unlist(Map(bpnworkers, bplist)) +} + +#' Verify that the input BiocParallelParam is valid +#' @param A BiocParallelParam or list of 2 BiocParallelParam objects +#' @importFrom BiocParallel bpnworkers +#' @return TRUE if the total `bpnworkers` in the input does not exceed +#' available resources as defined by `parallelly::availableCores()` +#' @keywords internal +verify_bp <- function(bp) { + verify_workers(get_bpnworkers(bp)) +} + +#' Verify that requested thread count is not higher than available +#' @param thread_count The number of workers to check availability +#' @return TRUE if the requested `thread_count` does not exceed available +#' resources as defined by `parallelly::availableCores()` +#' @keywords internal +verify_workers <- function(n_workers) { + avail_workers <- parallelly::availableCores() + n_workers <= avail_workers +} + +#' Set outer and inner params for nester parallelization +#' The outer param is across the input samples/columns and the second is for +#' bootstrapping. If `boot.parallel` is FALSE, the inner param is set to +#' `SerialParam`. +#' @keywords internal +get_nested_params <- function(BPPARAM, boot.parallel) { + stopifnot("Only two BiocParallelParam objects can be used" = length(BPPARAM) <= 2) + single_param <- length(BPPARAM) == 1 + BPPARAM <- if (single_param & is.list(BPPARAM)) BPPARAM[[1]] else BPPARAM + + if (boot.parallel) { + if (single_param) { + return(list(outer = BPPARAM, inner = BPPARAM)) + } else { + return(list(outer = BPPARAM[[1]], inner = BPPARAM[[2]])) + } + } + + if (single_param) { + list(outer = BPPARAM, inner = SerialParam()) + } else { + list(outer = BPPARAM[[1]], inner = SerialParam()) + } +} diff --git a/man/bpnworkers.list.Rd b/man/bpnworkers.list.Rd new file mode 100644 index 00000000..762f6eb4 --- /dev/null +++ b/man/bpnworkers.list.Rd @@ -0,0 +1,18 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/parallel.R +\name{bpnworkers.list} +\alias{bpnworkers.list} +\title{Return the number of workers in a list of BiocParallelParam objects} +\usage{ +bpnworkers.list(bplist) +} +\arguments{ +\item{List}{of BiocParallelParam objects} +} +\value{ +A vector of the \code{bpnworkers} count in each list element +} +\description{ +Return the number of workers in a list of BiocParallelParam objects +} +\keyword{internal} diff --git a/man/check_worker_count.Rd b/man/check_worker_count.Rd new file mode 100644 index 00000000..62d20728 --- /dev/null +++ b/man/check_worker_count.Rd @@ -0,0 +1,16 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/parallel.R +\name{check_worker_count} +\alias{check_worker_count} +\title{Check that the number of requested workers is valid} +\usage{ +check_worker_count( + bpparam, + boot.parallel, + avail_workers = parallelly::availableCores() +) +} +\description{ +Check that the number of requested workers is valid +} +\keyword{internal} diff --git a/man/get_bpnworkers.Rd b/man/get_bpnworkers.Rd new file mode 100644 index 00000000..8bad4025 --- /dev/null +++ b/man/get_bpnworkers.Rd @@ -0,0 +1,14 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/parallel.R +\name{get_bpnworkers} +\alias{get_bpnworkers} +\title{Get the total number of BiocParallelParam workers that will get used +From a BiocParallelParam or list of 2 BiocParallelParam objects} +\usage{ +get_bpnworkers(bp) +} +\description{ +Get the total number of BiocParallelParam workers that will get used +From a BiocParallelParam or list of 2 BiocParallelParam objects +} +\keyword{internal} diff --git a/man/get_nested_params.Rd b/man/get_nested_params.Rd new file mode 100644 index 00000000..2e7ed1bc --- /dev/null +++ b/man/get_nested_params.Rd @@ -0,0 +1,18 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/parallel.R +\name{get_nested_params} +\alias{get_nested_params} +\title{Set outer and inner params for nester parallelization +The outer param is across the input samples/columns and the second is for +bootstrapping. If \code{boot.parallel} is FALSE, the inner param is set to +\code{SerialParam}.} +\usage{ +get_nested_params(BPPARAM, boot.parallel) +} +\description{ +Set outer and inner params for nester parallelization +The outer param is across the input samples/columns and the second is for +bootstrapping. If \code{boot.parallel} is FALSE, the inner param is set to +\code{SerialParam}. +} +\keyword{internal} diff --git a/man/verify_bp.Rd b/man/verify_bp.Rd new file mode 100644 index 00000000..0869d5d3 --- /dev/null +++ b/man/verify_bp.Rd @@ -0,0 +1,19 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/parallel.R +\name{verify_bp} +\alias{verify_bp} +\title{Verify that the input BiocParallelParam is valid} +\usage{ +verify_bp(bp) +} +\arguments{ +\item{A}{BiocParallelParam or list of 2 BiocParallelParam objects} +} +\value{ +TRUE if the total \code{bpnworkers} in the input does not exceed +available resources as defined by \code{parallelly::availableCores()} +} +\description{ +Verify that the input BiocParallelParam is valid +} +\keyword{internal} diff --git a/man/verify_workers.Rd b/man/verify_workers.Rd new file mode 100644 index 00000000..d3117a1d --- /dev/null +++ b/man/verify_workers.Rd @@ -0,0 +1,19 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/parallel.R +\name{verify_workers} +\alias{verify_workers} +\title{Verify that requested thread count is not higher than available} +\usage{ +verify_workers(n_workers) +} +\arguments{ +\item{thread_count}{The number of workers to check availability} +} +\value{ +TRUE if the requested \code{thread_count} does not exceed available +resources as defined by \code{parallelly::availableCores()} +} +\description{ +Verify that requested thread count is not higher than available +} +\keyword{internal} From 2e7b5aed524da0795c1deb7e067e4fde12c51bef Mon Sep 17 00:00:00 2001 From: James Eapen Date: Thu, 13 Nov 2025 11:02:09 -0500 Subject: [PATCH 03/28] wip: add args for parallelization for scCompartments/arrayCompartments --- NAMESPACE | 1 + R/arrayCompartments.R | 21 +++++++++------------ R/scCompartments.R | 27 ++++++++++++--------------- man/arrayCompartments.Rd | 12 +++--------- man/scCompartments.Rd | 12 +++--------- 5 files changed, 28 insertions(+), 45 deletions(-) diff --git a/NAMESPACE b/NAMESPACE index 55d9b3d7..ed3fce7b 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -50,6 +50,7 @@ import(Matrix) import(RaggedExperiment) import(SummarizedExperiment) importFrom(BiocParallel,bpnworkers) +importFrom(BiocParallel,bpparam) importFrom(BiocSingular,IrlbaParam) importFrom(BiocSingular,runSVD) importFrom(GenomeInfoDb,"seqlevelsStyle<-") diff --git a/R/arrayCompartments.R b/R/arrayCompartments.R index 3658909b..43978d79 100644 --- a/R/arrayCompartments.R +++ b/R/arrayCompartments.R @@ -14,10 +14,9 @@ #' @param array.type What type of array is this ("hm450", "EPIC") #' @param genome What genome to work on ("hg19", "hg38", "mm9", "mm10") #' @param other Another arbitrary genome to compute compartments on -#' @param parallel Whether to run samples in parallel -#' @param cores How many cores to use when running samples in parallel -#' @param boot.parallel Whether to run the bootstrapping in parallel -#' @param boot.cores How many cores to use for the bootstrapping +#' @param boot.parallel Whether to run the bootstrapping in parallel. See details. +#' @param BPPARAM BiocParallelParam object to use for parallelization. See details. +#' #' #' @return A RaggedExperiment of inferred compartments #' @import SummarizedExperiment @@ -52,14 +51,14 @@ arrayCompartments <- function( array.type = c("hm450", "EPIC"), genome = c("hg19", "hg38", "mm9", "mm10"), other = NULL, - parallel = TRUE, - cores = 2, boot.parallel = TRUE, - boot.cores = 2 + BPPARAM = bpparam() ) { verifySE(obj) verifyCoords(obj) verifyAssayNames(obj, assay = "array") + bpparams <- get_nested_params(BPPARAM) + check_worker_count(bpparams) # preprocess the arrays if (preprocess) { @@ -83,15 +82,13 @@ arrayCompartments <- function( obj = obj, assay = "array", res = res, - parallel = parallel, chr = chr, targets = targets, - cores = cores, bootstrap = bootstrap, num.bootstraps = num.bootstraps, - boot.parallel = boot.parallel, - boot.cores = boot.cores, genome = genome, - group = group + group = group, + boot.parallel = boot.parallel, + bpparams = bpparams ) } diff --git a/R/scCompartments.R b/R/scCompartments.R index 18a142cc..42a0ba4e 100644 --- a/R/scCompartments.R +++ b/R/scCompartments.R @@ -12,22 +12,20 @@ #' @param num.bootstraps How many bootstraps to run #' @param genome What genome to work on ("hg19", "hg38", "mm9", "mm10") #' @param assay What type of single-cell assay is the input data ("atac" or "rna") -#' @param parallel Whether to run samples in parallel -#' @param cores How many cores to use when running samples in parallel -#' @param boot.parallel Whether to run the bootstrapping in parallel -#' @param boot.cores How many cores to use for the bootstrapping +#' @param boot.parallel Whether to run the bootstrapping in parallel. See details. +#' @param BPPARAM BiocParallelParam object to use for parallelization. See details. +#' #' #' @return A RaggedExperiment of inferred compartments #' @import SummarizedExperiment -#' @importFrom parallel mclapply #' @import RaggedExperiment +#' @importFrom BiocParallel bpparam #' @export #' @examples #' data("k562_scrna_chr14", package = "compartmap") #' sc_compartments <- scCompartments( #' k562_scrna_chr14, #' chr = "chr14", -#' parallel = FALSE, #' bootstrap = FALSE, #' genome = "hg19" #' ) @@ -41,31 +39,30 @@ scCompartments <- function( num.bootstraps = 100, genome = c("hg19", "hg38", "mm9", "mm10"), assay = c("atac", "rna"), - parallel = FALSE, - cores = 2, boot.parallel = FALSE, - boot.cores = 2 + BPPARAM = bpparam() ) { verifySE(obj) verifyCoords(obj) + bpparams <- get_nested_params(BPPARAM, boot.parallel) + check_worker_count(bpparams) + # which assay are we working on if (!all(assay %in% c("atac", "rna"))) stop("Supported assays are 'atac', and 'rna'.") assay <- tolower(match.arg(assay)) verifyAssayNames(obj, assay = assay) getCompartments( obj = obj, - assay = assay, res = res, chr = chr, + group = group, targets = targets, - parallel = parallel, - cores = cores, bootstrap = bootstrap, num.bootstraps = num.bootstraps, - boot.parallel = boot.parallel, - boot.cores = boot.cores, genome = genome, - group = group + assay = assay, + boot.parallel = boot.parallel, + bpparams = bpparams ) } diff --git a/man/arrayCompartments.Rd b/man/arrayCompartments.Rd index 5239ed9f..a9bbc0fe 100644 --- a/man/arrayCompartments.Rd +++ b/man/arrayCompartments.Rd @@ -16,10 +16,8 @@ arrayCompartments( array.type = c("hm450", "EPIC"), genome = c("hg19", "hg38", "mm9", "mm10"), other = NULL, - parallel = TRUE, - cores = 2, boot.parallel = TRUE, - boot.cores = 2 + BPPARAM = bpparam() ) } \arguments{ @@ -45,13 +43,9 @@ arrayCompartments( \item{other}{Another arbitrary genome to compute compartments on} -\item{parallel}{Whether to run samples in parallel} +\item{boot.parallel}{Whether to run the bootstrapping in parallel. See details.} -\item{cores}{How many cores to use when running samples in parallel} - -\item{boot.parallel}{Whether to run the bootstrapping in parallel} - -\item{boot.cores}{How many cores to use for the bootstrapping} +\item{BPPARAM}{BiocParallelParam object to use for parallelization. See details.} } \value{ A RaggedExperiment of inferred compartments diff --git a/man/scCompartments.Rd b/man/scCompartments.Rd index a2e03278..97c8182e 100644 --- a/man/scCompartments.Rd +++ b/man/scCompartments.Rd @@ -14,10 +14,8 @@ scCompartments( num.bootstraps = 100, genome = c("hg19", "hg38", "mm9", "mm10"), assay = c("atac", "rna"), - parallel = FALSE, - cores = 2, boot.parallel = FALSE, - boot.cores = 2 + BPPARAM = bpparam() ) } \arguments{ @@ -39,13 +37,9 @@ scCompartments( \item{assay}{What type of single-cell assay is the input data ("atac" or "rna")} -\item{parallel}{Whether to run samples in parallel} - -\item{cores}{How many cores to use when running samples in parallel} - -\item{boot.parallel}{Whether to run the bootstrapping in parallel} +\item{boot.parallel}{Whether to run the bootstrapping in parallel. See details.} -\item{boot.cores}{How many cores to use for the bootstrapping} +\item{BPPARAM}{BiocParallelParam object to use for parallelization. See details.} } \value{ A RaggedExperiment of inferred compartments From 2f0a56f8ee11eccb6e77b1554d2cba9f581fd538 Mon Sep 17 00:00:00 2001 From: James Eapen Date: Thu, 13 Nov 2025 11:10:20 -0500 Subject: [PATCH 04/28] wip: parallelize precomputeBootstrapMeans --- NAMESPACE | 1 + R/getCompartments.R | 11 +++++------ R/getGlobalMeans.R | 22 ++++++++++++---------- man/getCompartments.Rd | 23 +++++++++++++++++++++++ man/precomputeBootstrapMeans.Rd | 11 ++++------- 5 files changed, 45 insertions(+), 23 deletions(-) create mode 100644 man/getCompartments.Rd diff --git a/NAMESPACE b/NAMESPACE index ed3fce7b..4639307d 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -49,6 +49,7 @@ import(HDF5Array) import(Matrix) import(RaggedExperiment) import(SummarizedExperiment) +importFrom(BiocParallel,bplapply) importFrom(BiocParallel,bpnworkers) importFrom(BiocParallel,bpparam) importFrom(BiocSingular,IrlbaParam) diff --git a/R/getCompartments.R b/R/getCompartments.R index 6c98e55d..f6fce380 100644 --- a/R/getCompartments.R +++ b/R/getCompartments.R @@ -1,3 +1,5 @@ +#' Run compartment inference +#' @importFrom BiocParallel bplapply getCompartments <- function( obj, res, @@ -8,10 +10,8 @@ getCompartments <- function( num.bootstraps, genome, assay, - parallel, - cores, boot.parallel, - boot.cores + bpparams ) { if (is.null(chr)) { message("Assuming we want to process all chromosomes.") @@ -28,11 +28,10 @@ getCompartments <- function( message("Pre-computing the bootstrap global means.") bmeans <- precomputeBootstrapMeans( obj = obj, + BPPARAM = bpparams[[1]], targets = targets, num.bootstraps = num.bootstraps, - assay = assay, - parallel = parallel, - num.cores = cores + assay = assay ) } diff --git a/R/getGlobalMeans.R b/R/getGlobalMeans.R index 09f1ae87..6a71cf92 100644 --- a/R/getGlobalMeans.R +++ b/R/getGlobalMeans.R @@ -49,11 +49,10 @@ getGlobalMeans <- function(obj, targets = NULL, assay = c("atac", "rna", "array" #' @name precomputeBootstrapMeans #' #' @param obj Input SummarizedExperiment object +#' @param BPPARAM BiocParallelParam for parallelizing computation #' @param targets Optional targets to shrink towards #' @param num.bootstraps The number of bootstraps to compute #' @param assay What type of assay the data are from -#' @param parallel Whether to run in parallel -#' @param num.cores How many cores to use for parallel processing #' #' @return A matrix of bootstrapped global means #' @@ -69,11 +68,10 @@ getGlobalMeans <- function(obj, targets = NULL, assay = c("atac", "rna", "array" #' ) precomputeBootstrapMeans <- function( obj, + BPPARAM, targets = NULL, num.bootstraps = 100, - assay = c("atac", "rna", "array"), - parallel = FALSE, - num.cores = 1 + assay = c("atac", "rna", "array") ) { # this function precomputes the bootstrapped global means # as a default we will make 100 bootstraps @@ -85,11 +83,15 @@ precomputeBootstrapMeans <- function( obj <- getShrinkageTargets(obj, targets) } assay.data <- .getAssay(obj, is.array) - bootMean <- mclapply(1:num.bootstraps, function(b) { - message("Working on bootstrap ", b) - resamp.mat <- .resampleMatrix(assay.data) - computeGlobalMean(resamp.mat) - }, mc.cores = ifelse(parallel, num.cores, 1)) + bootMean <- bplapply( + 1:num.bootstraps, + function(b) { + # message("Working on bootstrap ", b) + resamp.mat <- .resampleMatrix(assay.data) + computeGlobalMean(resamp.mat) + }, + BPPARAM = BPPARAM + ) bootResult <- do.call("cbind", bootMean) rownames(bootResult) <- as.character(granges(obj)) diff --git a/man/getCompartments.Rd b/man/getCompartments.Rd new file mode 100644 index 00000000..8dd97d7e --- /dev/null +++ b/man/getCompartments.Rd @@ -0,0 +1,23 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/getCompartments.R +\name{getCompartments} +\alias{getCompartments} +\title{Run compartment inference} +\usage{ +getCompartments( + obj, + res, + chr, + group, + targets, + bootstrap, + num.bootstraps, + genome, + assay, + boot.parallel, + bpparams +) +} +\description{ +Run compartment inference +} diff --git a/man/precomputeBootstrapMeans.Rd b/man/precomputeBootstrapMeans.Rd index 2f46f8ac..6f8536a4 100644 --- a/man/precomputeBootstrapMeans.Rd +++ b/man/precomputeBootstrapMeans.Rd @@ -6,25 +6,22 @@ \usage{ precomputeBootstrapMeans( obj, + BPPARAM, targets = NULL, num.bootstraps = 100, - assay = c("atac", "rna", "array"), - parallel = FALSE, - num.cores = 1 + assay = c("atac", "rna", "array") ) } \arguments{ \item{obj}{Input SummarizedExperiment object} +\item{BPPARAM}{BiocParallelParam for parallelizing computation} + \item{targets}{Optional targets to shrink towards} \item{num.bootstraps}{The number of bootstraps to compute} \item{assay}{What type of assay the data are from} - -\item{parallel}{Whether to run in parallel} - -\item{num.cores}{How many cores to use for parallel processing} } \value{ A matrix of bootstrapped global means From d650a0c69ac5e90055c8e02d769c445a2dddd1b5 Mon Sep 17 00:00:00 2001 From: James Eapen Date: Thu, 13 Nov 2025 11:10:58 -0500 Subject: [PATCH 05/28] wip: parallelize inference and bootstrapping, --- R/bootstrapCompartments.R | 70 ++++++++++++++++--------------- R/getCompartments.R | 81 ++++++++++++++++++++---------------- man/bootstrapCompartments.Rd | 21 +++++----- 3 files changed, 91 insertions(+), 81 deletions(-) diff --git a/R/bootstrapCompartments.R b/R/bootstrapCompartments.R index 35e9e171..a129faad 100644 --- a/R/bootstrapCompartments.R +++ b/R/bootstrapCompartments.R @@ -1,14 +1,14 @@ -#' Non-parametric bootstrapping of compartments and summarization of bootstraps/compute confidence intervals +#' Non-parametric bootstrapping of compartments and summarization of +#' bootstraps/compute confidence intervals #' #' @name bootstrapCompartments #' #' @param obj List object of computed compartments for a sample with 'pc' and 'gr' as elements #' @param original.obj The original, full input SummarizedExperiment of all samples/cells +#' @param BPPARAM BiocParallelParam for parallelizing bootstrapping #' @param bootstrap.samples How many bootstraps to run #' @param chr Which chromosome to operate on #' @param assay What sort of assay are we working on -#' @param parallel Whether to run the bootstrapping in parallel -#' @param cores How many cores to use for parallel processing #' @param targets Targets to shrink towards #' @param res The compartment resolution #' @param genome What genome are we working on @@ -18,7 +18,6 @@ #' @param bootstrap.means Pre-computed bootstrap means matrix #' #' @return Compartment estimates with summarized bootstraps and confidence intervals -#' @importFrom parallel mclapply #' @import SummarizedExperiment #' #' @examples @@ -29,17 +28,16 @@ bootstrapCompartments <- function( obj, original.obj, + BPPARAM, bootstrap.samples = 1000, chr = "chr14", + group = FALSE, assay = c("rna", "atac", "array"), - parallel = TRUE, - cores = 2, targets = NULL, res = 1e6, genome = c("hg19", "hg38", "mm9", "mm10"), q = 0.95, svd = NULL, - group = FALSE, bootstrap.means = NULL ) { # function for nonparametric bootstrap of compartments and compute 95% CIs @@ -61,36 +59,40 @@ bootstrapCompartments <- function( } # if (ncol(original.obj) < 6) stop("We need more than 5 samples to bootstrap with for the results to be meaningful.") - if (parallel) { - message("Bootstrapping in parallel with ", cores, " cores.") - } else { - message("Not bootstrapping in parallel will take a long time...") - } + # if (parallel) { + # message("Bootstrapping in parallel with ", cores, " cores.") + # } else { + # message("Not bootstrapping in parallel will take a long time...") + # } # bootstrap and recompute compartments - resamp.compartments <- mclapply(1:ncol(bmeans), function(b) { - # get the shrunken bins with new global mean - boot.mean <- as.matrix(bmeans[, b]) - colnames(boot.mean) <- "globalMean" - s.bins <- shrinkBins( - obj, - original.obj, - prior.means = boot.mean, - chr = chr, - res = res, - assay = assay, - genome = genome - ) - cor.bins <- getCorMatrix(s.bins, squeeze = !group) + resamp.compartments <- bplapply( + 1:ncol(bmeans), + function(b) { + # get the shrunken bins with new global mean + boot.mean <- as.matrix(bmeans[, b]) + colnames(boot.mean) <- "globalMean" + s.bins <- shrinkBins( + obj, + original.obj, + prior.means = boot.mean, + chr = chr, + res = res, + assay = assay, + genome = genome + ) + cor.bins <- getCorMatrix(s.bins, squeeze = !group) - # Stupid check for perfect correlation with global mean - if (any(is.na(cor.bins$binmat.cor))) { - absig <- matrix(rep(NA, nrow(cor.bins$binmat.cor))) - } else { - absig <- getABSignal(cor.bins, assay = assay) - } - return(absig) - }, mc.cores = ifelse(parallel, cores, 1)) + # Stupid check for perfect correlation with global mean + if (any(is.na(cor.bins$binmat.cor))) { + absig <- matrix(rep(NA, nrow(cor.bins$binmat.cor))) + } else { + absig <- getABSignal(cor.bins, assay = assay) + } + return(absig) + }, + BPPARAM = BPPARAM + ) # summarize the bootstraps and compute confidence intervals resamp.compartments <- summarizeBootstraps(resamp.compartments, svd, q = q, assay = assay) diff --git a/R/getCompartments.R b/R/getCompartments.R index f6fce380..414941a6 100644 --- a/R/getCompartments.R +++ b/R/getCompartments.R @@ -35,54 +35,67 @@ getCompartments <- function( ) } + if (boot.parallel) { + innerBPPARAM <- bpparams[[2]] + } else { + innerBPPARAM <- BiocParallel::SerialParam() + } + if (group) { - compartments.list <- mclapply(chr, function(c) { - .getCompartments( - obj, - obj, - assay = assay, - res = res, - chr = c, - targets = targets, - genome = genome, - bootstrap = bootstrap, - num.bootstraps = num.bootstraps, - prior.means = prior.means, - parallel = boot.parallel, - cores = boot.cores, - group = group, - bootstrap.means = bmeans - ) - }, mc.cores = ifelse(parallel, cores, 1)) + message("Computing group level compartments") + compartments.list <- bplapply( + chr, + function(c) { + .getCompartments( + obj, + obj, + assay = assay, + BPPARAM = innerBPPARAM, + res = res, + chr = c, + group = group, + targets = targets, + genome = genome, + prior.means = prior.means, + bootstrap = bootstrap, + num.bootstraps = num.bootstraps, + bootstrap.means = bmeans + ) + }, + BPPARAM = bpparams[[1]] + ) compartments <- sort(unlist(as(compartments.list, "GRangesList"))) return(compartments) } - compartments <- mclapply(columns, function(s) { - obj.sub <- obj[, s] + message("Computing single-cell level compartments") + compartments <- bplapply( + columns, + function(s) { + obj.sub <- obj[, s] - message("Working on ", s) - compartments.list <- lapply(chr, function(c) { + compartments.list <- lapply(chr, function(c) { .getCompartments( obj.sub, obj, assay = assay, + BPPARAM = innerBPPARAM, res = res, chr = c, + group = group, targets = targets, genome = genome, - bootstrap = bootstrap, prior.means = prior.means, + bootstrap = bootstrap, num.bootstraps = num.bootstraps, - parallel = boot.parallel, - cores = boot.cores, - group = group, bootstrap.means = bmeans ) }) - sort(unlist(as(compartments.list, "GRangesList"))) - }, mc.cores = ifelse(parallel, cores, 1), mc.preschedule = F) + sort(unlist(as(compartments.list, "GRangesList"))) + }, + BPPARAM = bpparams[[1]] + ) compartments <- as(compartments, "CompressedGRangesList") RaggedExperiment(compartments, colData = colData(obj)) @@ -93,22 +106,19 @@ getCompartments <- function( obj, original.obj, assay, + BPPARAM, res = 1e6, chr = NULL, + group = FALSE, targets = NULL, genome = c("hg19", "hg38", "mm9", "mm10"), prior.means = NULL, bootstrap = TRUE, num.bootstraps = 1000, - parallel = FALSE, - cores = 2, - group = FALSE, bootstrap.means = NULL ) { genome <- match.arg(genome) - if (parallel) options(mc.cores = cores) - # update message("Computing compartments for ", chr) obj <- keepSeqlevels(obj, chr, pruning.mode = "coarse") @@ -163,17 +173,16 @@ getCompartments <- function( bootstrapCompartments( obj, original.obj, + BPPARAM = BPPARAM, bootstrap.samples = num.bootstraps, chr = chr, + group = group, assay = assay, - parallel = parallel, - cores = cores, targets = targets, res = res, genome = genome, q = 0.95, svd = obj.svd, - group = group, bootstrap.means = bmeans ) } diff --git a/man/bootstrapCompartments.Rd b/man/bootstrapCompartments.Rd index c9ab1c18..440b511f 100644 --- a/man/bootstrapCompartments.Rd +++ b/man/bootstrapCompartments.Rd @@ -2,22 +2,22 @@ % Please edit documentation in R/bootstrapCompartments.R \name{bootstrapCompartments} \alias{bootstrapCompartments} -\title{Non-parametric bootstrapping of compartments and summarization of bootstraps/compute confidence intervals} +\title{Non-parametric bootstrapping of compartments and summarization of +bootstraps/compute confidence intervals} \usage{ bootstrapCompartments( obj, original.obj, + BPPARAM, bootstrap.samples = 1000, chr = "chr14", + group = FALSE, assay = c("rna", "atac", "array"), - parallel = TRUE, - cores = 2, targets = NULL, res = 1000000, genome = c("hg19", "hg38", "mm9", "mm10"), q = 0.95, svd = NULL, - group = FALSE, bootstrap.means = NULL ) } @@ -26,15 +26,15 @@ bootstrapCompartments( \item{original.obj}{The original, full input SummarizedExperiment of all samples/cells} +\item{BPPARAM}{BiocParallelParam for parallelizing bootstrapping} + \item{bootstrap.samples}{How many bootstraps to run} \item{chr}{Which chromosome to operate on} -\item{assay}{What sort of assay are we working on} - -\item{parallel}{Whether to run the bootstrapping in parallel} +\item{group}{Whether this is for group-level inference} -\item{cores}{How many cores to use for parallel processing} +\item{assay}{What sort of assay are we working on} \item{targets}{Targets to shrink towards} @@ -46,15 +46,14 @@ bootstrapCompartments( \item{svd}{The original compartment calls as a GRanges object} -\item{group}{Whether this is for group-level inference} - \item{bootstrap.means}{Pre-computed bootstrap means matrix} } \value{ Compartment estimates with summarized bootstraps and confidence intervals } \description{ -Non-parametric bootstrapping of compartments and summarization of bootstraps/compute confidence intervals +Non-parametric bootstrapping of compartments and summarization of +bootstraps/compute confidence intervals } \examples{ From cf7740ef8dfb73c805d73636c1b46a61380cdc4d Mon Sep 17 00:00:00 2001 From: James Eapen Date: Thu, 13 Nov 2025 11:11:27 -0500 Subject: [PATCH 06/28] chore(flake): update with parallelly --- flake.nix | 1 + 1 file changed, 1 insertion(+) diff --git a/flake.nix b/flake.nix index fa3879b8..bb277aaa 100644 --- a/flake.nix +++ b/flake.nix @@ -24,6 +24,7 @@ ggplot2 impute Matrix + parallelly reshape2 RMTstat rtracklayer From 2650389549c76153cdd7e169117c580cbdf66046 Mon Sep 17 00:00:00 2001 From: James Eapen Date: Thu, 13 Nov 2025 11:11:45 -0500 Subject: [PATCH 07/28] docs: add parallelization details --- R/arrayCompartments.R | 1 + R/scCompartments.R | 46 +++++++++++++++++++++++++++++++++++++++ man/arrayCompartments.Rd | 46 +++++++++++++++++++++++++++++++++++++++ man/scCompartments.Rd | 47 +++++++++++++++++++++++++++++++++++++++- 4 files changed, 139 insertions(+), 1 deletion(-) diff --git a/R/arrayCompartments.R b/R/arrayCompartments.R index 43978d79..aab2867d 100644 --- a/R/arrayCompartments.R +++ b/R/arrayCompartments.R @@ -17,6 +17,7 @@ #' @param boot.parallel Whether to run the bootstrapping in parallel. See details. #' @param BPPARAM BiocParallelParam object to use for parallelization. See details. #' +#' @inherit scCompartments details #' #' @return A RaggedExperiment of inferred compartments #' @import SummarizedExperiment diff --git a/R/scCompartments.R b/R/scCompartments.R index 42a0ba4e..c2cca96f 100644 --- a/R/scCompartments.R +++ b/R/scCompartments.R @@ -15,6 +15,52 @@ #' @param boot.parallel Whether to run the bootstrapping in parallel. See details. #' @param BPPARAM BiocParallelParam object to use for parallelization. See details. #' +#' @details +#' +#' compartmap uses `BiocParallel` to parallelize operations in four +#' configurations. The default setting is to parallelize across columns but not +#' bootstraps using the thread count as reported by `BiocParallel::bpparam()`, +#' which is usually two cores fewer than the number of available cores. +#' Parallel bootstrapping is disabled by default to avoid nested parallelism +#' issues but can be done independent of column-wise parallelization. +#' + +#' ## Available configurations +#' +#' ### Serial bootstrapping +#' +#' - Serially with just one core: +#' `BPPARAM = BiocParallel::SerialParam()` +#' +#' - Parallel across columns and serially across bootstraps: +#' `BPPARAM = BiocParallel::MulticoreParam(n)` where `n` is the number of +#' threads to use +#' +#' See `?BiocParallel::BiocParallelParam` for other parallel backends. Parallel +#' backends may also be passed to `BiocParallel::register()` to make them +#' available to `bpparam()`. + +#' ### Parallel bootstrapping +#' +#' Set `boot.parallel = TRUE` for one the these configurations: +#' +#' - Serially across columns and parallel across bootstraps: Set `BPPARAM = +#' list(SerialParam(), MulticoreParam(n))' +#' +#' - Parallel across both columns and bootstraps: Set `BPPARAM = +#' list(MulticoreParam(outer), MulticoreParam(inner))` where `outer` is the +#' thread count for column-wise operations and `inner` the thread count for +#' bootstrapping. The required number of threads is given by +#' +#' `( outer * inner ) + outer` +#' +#' We recommend using an explicit list of two BiocParallelParam backends over +#' relying on `register()` and `bpparam()` for parallelizing across bootstraps. +#' With nested `bplapply` calls, the registered backend is used for both the +#' outer and inner parallel loops. On a system with 8 available threads if the +#' registered backend asks for 4 workers, it will try to use 20 threads in the +#' nested loops. Instead to use all 8 cores, set +#' `BPPARAM = list(MulticoreParam(2), MulticoreParam(3))`. #' #' @return A RaggedExperiment of inferred compartments #' @import SummarizedExperiment diff --git a/man/arrayCompartments.Rd b/man/arrayCompartments.Rd index a9bbc0fe..8ac6ea61 100644 --- a/man/arrayCompartments.Rd +++ b/man/arrayCompartments.Rd @@ -53,6 +53,52 @@ A RaggedExperiment of inferred compartments \description{ \code{arrayCompartments} returns estimated A/B compartments from methylation array data. } +\details{ +compartmap uses \code{BiocParallel} to parallelize operations in four +configurations. The default setting is to parallelize across columns but not +bootstraps using the thread count as reported by \code{BiocParallel::bpparam()}, +which is usually two cores fewer than the number of available cores. +Parallel bootstrapping is disabled by default to avoid nested parallelism +issues but can be done independent of column-wise parallelization. +\subsection{Available configurations}{ +\subsection{Serial bootstrapping}{ +\itemize{ +\item Serially with just one core: +\code{BPPARAM = BiocParallel::SerialParam()} +\item Parallel across columns and serially across bootstraps: +\code{BPPARAM = BiocParallel::MulticoreParam(n)} where \code{n} is the number of +threads to use +} + +See \code{?BiocParallel::BiocParallelParam} for other parallel backends. Parallel +backends may also be passed to \code{BiocParallel::register()} to make them +available to \code{bpparam()}. +} + +\subsection{Parallel bootstrapping}{ + +Set \code{boot.parallel = TRUE} for one the these configurations: +\itemize{ +\item Serially across columns and parallel across bootstraps: Set `BPPARAM = +list(SerialParam(), MulticoreParam(n))' +\item Parallel across both columns and bootstraps: Set \code{BPPARAM = list(MulticoreParam(outer), MulticoreParam(inner))} where \code{outer} is the +thread count for column-wise operations and \code{inner} the thread count for +bootstrapping. The required number of threads is given by +} + +\code{( outer * inner ) + outer} + +We recommend using an explicit list of two BiocParallelParam backends over +relying on \code{register()} and \code{bpparam()} for parallelizing across bootstraps. +With nested \code{bplapply} calls, the registered backend is used for both the +outer and inner parallel loops. On a system with 8 available threads if the +registered backend asks for 4 workers, it will try to use 20 threads in the +nested loops. Instead to use all 8 cores, set +\code{BPPARAM = list(MulticoreParam(2), MulticoreParam(3))}. +} + +} +} \examples{ if (requireNamespace("minfi", quietly = TRUE)) { diff --git a/man/scCompartments.Rd b/man/scCompartments.Rd index 97c8182e..45c9b5e8 100644 --- a/man/scCompartments.Rd +++ b/man/scCompartments.Rd @@ -47,12 +47,57 @@ A RaggedExperiment of inferred compartments \description{ \code{scCompartments} returns estimated A/B compartments from sc-seq data. } +\details{ +compartmap uses \code{BiocParallel} to parallelize operations in four +configurations. The default setting is to parallelize across columns but not +bootstraps using the thread count as reported by \code{BiocParallel::bpparam()}, +which is usually two cores fewer than the number of available cores. +Parallel bootstrapping is disabled by default to avoid nested parallelism +issues but can be done independent of column-wise parallelization. +\subsection{Available configurations}{ +\subsection{Serial bootstrapping}{ +\itemize{ +\item Serially with just one core: +\code{BPPARAM = BiocParallel::SerialParam()} +\item Parallel across columns and serially across bootstraps: +\code{BPPARAM = BiocParallel::MulticoreParam(n)} where \code{n} is the number of +threads to use +} + +See \code{?BiocParallel::BiocParallelParam} for other parallel backends. Parallel +backends may also be passed to \code{BiocParallel::register()} to make them +available to \code{bpparam()}. +} + +\subsection{Parallel bootstrapping}{ + +Set \code{boot.parallel = TRUE} for one the these configurations: +\itemize{ +\item Serially across columns and parallel across bootstraps: Set `BPPARAM = +list(SerialParam(), MulticoreParam(n))' +\item Parallel across both columns and bootstraps: Set \code{BPPARAM = list(MulticoreParam(outer), MulticoreParam(inner))} where \code{outer} is the +thread count for column-wise operations and \code{inner} the thread count for +bootstrapping. The required number of threads is given by +} + +\code{( outer * inner ) + outer} + +We recommend using an explicit list of two BiocParallelParam backends over +relying on \code{register()} and \code{bpparam()} for parallelizing across bootstraps. +With nested \code{bplapply} calls, the registered backend is used for both the +outer and inner parallel loops. On a system with 8 available threads if the +registered backend asks for 4 workers, it will try to use 20 threads in the +nested loops. Instead to use all 8 cores, set +\code{BPPARAM = list(MulticoreParam(2), MulticoreParam(3))}. +} + +} +} \examples{ data("k562_scrna_chr14", package = "compartmap") sc_compartments <- scCompartments( k562_scrna_chr14, chr = "chr14", - parallel = FALSE, bootstrap = FALSE, genome = "hg19" ) From 05b021d31edfff19241ace11af281d47a9dcff91 Mon Sep 17 00:00:00 2001 From: James Eapen Date: Thu, 13 Nov 2025 11:12:03 -0500 Subject: [PATCH 08/28] docs(vignettes): update call calls with BPPARAM --- vignettes/compartmap.Rmd | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/vignettes/compartmap.Rmd b/vignettes/compartmap.Rmd index db67d8d1..37a0f065 100644 --- a/vignettes/compartmap.Rmd +++ b/vignettes/compartmap.Rmd @@ -54,7 +54,8 @@ k562_compartments <- scCompartments( group = TRUE, bootstrap = FALSE, genome = "hg19", - assay = "rna" + assay = "rna", + BPPARAM = BiocParallel::MulticoreParam(2) ) ``` @@ -77,7 +78,8 @@ k562_compartments.boot <- scCompartments( bootstrap = TRUE, num.bootstraps = 10, genome = "hg19", - assay = "rna" + assay = "rna", + BPPARAM = BiocParallel::MulticoreParam(2) ) # Flip the domain sign if the sign coherence is discordant in 80% of the bootstraps @@ -281,7 +283,8 @@ k562_scrna_chr14_raw_domains <- scCompartments(k562_scrna_se_chr14, bootstrap = TRUE, num.bootstraps = 10, genome = "hg19", - assay = "rna" + assay = "rna", + BPPARAM = BiocParallel::MulticoreParam(2) ) ``` From b53816a7ab41c9d9177c43481b0e9e83945f8535 Mon Sep 17 00:00:00 2001 From: James Eapen Date: Thu, 13 Nov 2025 11:18:37 -0500 Subject: [PATCH 09/28] add logging --- DESCRIPTION | 1 + 1 file changed, 1 insertion(+) diff --git a/DESCRIPTION b/DESCRIPTION index 19e01d04..dc5a5f79 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -27,6 +27,7 @@ Depends: Imports: BiocSingular, BiocParallel, + futile.logger, rlang, S4Vectors, IRanges, From 9fd2cd98a05c4e3d5b42c37eeb442b399ea8f35e Mon Sep 17 00:00:00 2001 From: James Eapen Date: Thu, 13 Nov 2025 11:26:49 -0500 Subject: [PATCH 10/28] fix(getCompartments): remove bpparams setting done by get_nested_params --- R/getCompartments.R | 6 ------ 1 file changed, 6 deletions(-) diff --git a/R/getCompartments.R b/R/getCompartments.R index 414941a6..48831fc1 100644 --- a/R/getCompartments.R +++ b/R/getCompartments.R @@ -35,12 +35,6 @@ getCompartments <- function( ) } - if (boot.parallel) { - innerBPPARAM <- bpparams[[2]] - } else { - innerBPPARAM <- BiocParallel::SerialParam() - } - if (group) { message("Computing group level compartments") compartments.list <- bplapply( From eeacd0a2ec462b5b01b1bf75a1f8b05fda7b06e6 Mon Sep 17 00:00:00 2001 From: James Eapen Date: Thu, 13 Nov 2025 12:30:45 -0500 Subject: [PATCH 11/28] fix(logging): use futil.logger and move most to debug level reduce verbosity but configurable --- NAMESPACE | 2 ++ R/bootstrapCompartments.R | 5 ----- R/fixCompartments.R | 6 +++--- R/getABSignal.R | 6 +++--- R/getBinMatrix.R | 2 +- R/getCompartments.R | 21 ++++++++++++++------- R/getCorMatrix.R | 4 ++-- R/getGlobalMeans.R | 4 ++-- R/meanSmoother.R | 2 +- R/plotAB.R | 2 +- R/preprocessArrays.R | 4 ++-- R/shrinkBins.R | 2 +- R/summarizeBootstraps.R | 4 ++-- 13 files changed, 34 insertions(+), 30 deletions(-) diff --git a/NAMESPACE b/NAMESPACE index 4639307d..0bb12199 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -62,6 +62,8 @@ importFrom(GenomeInfoDb,seqlengths) importFrom(GenomeInfoDb,seqlevels) importFrom(S4Vectors,queryHits) importFrom(S4Vectors,subjectHits) +importFrom(futile.logger,flog.debug) +importFrom(futile.logger,flog.info) importFrom(ggplot2,aes) importFrom(ggplot2,element_blank) importFrom(ggplot2,geom_raster) diff --git a/R/bootstrapCompartments.R b/R/bootstrapCompartments.R index a129faad..7a835302 100644 --- a/R/bootstrapCompartments.R +++ b/R/bootstrapCompartments.R @@ -59,11 +59,6 @@ bootstrapCompartments <- function( } # if (ncol(original.obj) < 6) stop("We need more than 5 samples to bootstrap with for the results to be meaningful.") - # if (parallel) { - # message("Bootstrapping in parallel with ", cores, " cores.") - # } else { - # message("Not bootstrapping in parallel will take a long time...") - # } # bootstrap and recompute compartments resamp.compartments <- bplapply( diff --git a/R/fixCompartments.R b/R/fixCompartments.R index 4adab598..77110e98 100644 --- a/R/fixCompartments.R +++ b/R/fixCompartments.R @@ -21,8 +21,8 @@ setGeneric("fixCompartments", function(x, min.conf = 0.8, parallel = FALSE, core #' @rdname fixCompartments #' @param x GRanges setMethod("fixCompartments", "GRanges", function(x, min.conf = 0.8, parallel = FALSE, cores = 1) { - message("Assuming we only have a single sample to process") - message("Fixing compartments using a minimum confidence score of ", min.conf * 100, "%") + flog.debug("Assuming we only have a single sample to process.") + flog.debug("Fixing compartments using a minimum confidence score of %f %%", min.conf * 100) flipper(x, min.conf) }) @@ -30,7 +30,7 @@ setMethod("fixCompartments", "GRanges", function(x, min.conf = 0.8, parallel = F #' @param x RaggedExperiment setMethod("fixCompartments", "RaggedExperiment", function(x, min.conf = 0.8, parallel = FALSE, cores = 1) { obj <- condenseSE(x, sample.name = colnames(assay(x))) - message("Fixing compartments using a minimum confidence score of ", min.conf * 100, "%") + flog.info("Fixing compartments using a minimum confidence score of ", min.conf * 100, "%") # go through and invert compartments based on the min.conf flip_compartments_lst <- mclapply(obj, flipper, min.conf, mc.cores = ifelse(parallel, cores, 1)) names(flip_compartments_lst) <- names(obj) diff --git a/R/getABSignal.R b/R/getABSignal.R index 7dc2c7c8..a2fd3164 100644 --- a/R/getABSignal.R +++ b/R/getABSignal.R @@ -58,13 +58,13 @@ getABSignal <- function( assay <- match.arg(assay) gr <- x$gr - message("Calculating eigenvectors.") + flog.debug("Calculating eigenvectors.") pc <- getSVD(x$binmat.cor, sing.vec = "right") if (squeeze) pc <- ifisherZ(pc) - message("Smoothing eigenvector.") + flog.debug("Smoothing eigenvector.") gr$pc <- meanSmoother(pc) - message("Done smoothing.") + flog.debug("Done smoothing.") if (flipSign(gr, genome)) gr$pc <- -gr$pc gr$compartments <- extractOpenClosed(gr, assay = assay) diff --git a/R/getBinMatrix.R b/R/getBinMatrix.R index 5899c6c0..e16be4e0 100644 --- a/R/getBinMatrix.R +++ b/R/getBinMatrix.R @@ -68,7 +68,7 @@ getBinMatrix <- function( ids <- findOverlaps(genloc, gr.bin, select = "first") binCount <- length(gr.bin) - message(binCount, " bins created...") + flog.debug("%d bins created...", binCount) mat.bin <- apply(mat, 2, function(x) { .summarizeBins(x, binCount, ids, FUN) diff --git a/R/getCompartments.R b/R/getCompartments.R index 48831fc1..b89a4ec2 100644 --- a/R/getCompartments.R +++ b/R/getCompartments.R @@ -1,5 +1,6 @@ #' Run compartment inference #' @importFrom BiocParallel bplapply +#' @importFrom futile.logger flog.info flog.debug getCompartments <- function( obj, res, @@ -14,7 +15,7 @@ getCompartments <- function( bpparams ) { if (is.null(chr)) { - message("Assuming we want to process all chromosomes.") + flog.info("Assuming we want to process all chromosomes.") chr <- getChrs(obj) } @@ -25,7 +26,7 @@ getCompartments <- function( prior.means <- getGlobalMeans(obj = obj, targets = targets, assay = assay) if (bootstrap) { - message("Pre-computing the bootstrap global means.") + flog.info("Pre-computing the bootstrap global means.") bmeans <- precomputeBootstrapMeans( obj = obj, BPPARAM = bpparams[[1]], @@ -35,8 +36,14 @@ getCompartments <- function( ) } + if (boot.parallel) { + flog.info("Bootstrapping in parallel with %d cores", bpnworkers(bpparams[[2]])) + } else { + flog.info("Not bootstrapping in parallel will take a long time...") + } + if (group) { - message("Computing group level compartments") + flog.info("Computing group level compartments") compartments.list <- bplapply( chr, function(c) { @@ -44,7 +51,7 @@ getCompartments <- function( obj, obj, assay = assay, - BPPARAM = innerBPPARAM, + BPPARAM = bpparams[[2]], res = res, chr = c, group = group, @@ -63,7 +70,7 @@ getCompartments <- function( return(compartments) } - message("Computing single-cell level compartments") + flog.info("Computing single-cell level compartments") compartments <- bplapply( columns, function(s) { @@ -74,7 +81,7 @@ getCompartments <- function( obj.sub, obj, assay = assay, - BPPARAM = innerBPPARAM, + BPPARAM = bpparams[[2]], res = res, chr = c, group = group, @@ -114,7 +121,7 @@ getCompartments <- function( genome <- match.arg(genome) # update - message("Computing compartments for ", chr) + flog.debug("Computing compartments for %s", chr) obj <- keepSeqlevels(obj, chr, pruning.mode = "coarse") original.obj <- keepSeqlevels(original.obj, chr, pruning.mode = "coarse") diff --git a/R/getCorMatrix.R b/R/getCorMatrix.R index 914b6495..da2e68fb 100644 --- a/R/getCorMatrix.R +++ b/R/getCorMatrix.R @@ -43,7 +43,7 @@ #' # Calculate correlations #' getCorMatrix(bin.counts) getCorMatrix <- function(binmat, squeeze = FALSE) { - message("Calculating correlations...") + flog.debug("Calculating correlations...") # bind back up the global means and shrunken bins binmat$x <- cbind(binmat$x, binmat$gmeans) binmat.cor <- suppressWarnings(cor(t(binmat$x))) @@ -51,6 +51,6 @@ getCorMatrix <- function(binmat, squeeze = FALSE) { if (squeeze) { binmat.cor <- fisherZ(binmat.cor) } - message("Done...") + flog.debug("Done...") list(gr.cor = binmat$gr, binmat.cor = binmat.cor) } diff --git a/R/getGlobalMeans.R b/R/getGlobalMeans.R index 6a71cf92..cf4ab83b 100644 --- a/R/getGlobalMeans.R +++ b/R/getGlobalMeans.R @@ -30,7 +30,7 @@ getGlobalMeans <- function(obj, targets = NULL, assay = c("atac", "rna", "array" # check if shrinkage targets are being used if (!is.null(targets)) { stargets <- getShrinkageTargets(obj, targets) - message("Using ", paste(shQuote(targets), collapse = ", "), " as shrinkage targets...") + flog.debug("Using %s as shrinkage targets", paste(shQuote(targets), collapse = ", ")) globalMean.input <- stargets } else { globalMean.input <- obj @@ -86,7 +86,7 @@ precomputeBootstrapMeans <- function( bootMean <- bplapply( 1:num.bootstraps, function(b) { - # message("Working on bootstrap ", b) + flog.debug("Working on bootstrap ", b) resamp.mat <- .resampleMatrix(assay.data) computeGlobalMean(resamp.mat) }, diff --git a/R/meanSmoother.R b/R/meanSmoother.R index 09be0d7d..f645d2c5 100644 --- a/R/meanSmoother.R +++ b/R/meanSmoother.R @@ -22,7 +22,7 @@ #' @export meanSmoother <- function(mat, k = 1, iters = 2, delta = 0, weights = NULL) { if (k == 0) { - message("Returning unsmoothed mat as 'k' = 0") + flog.debug("Returning unsmoothed mat as 'k' = 0") return(mat) } diff --git a/R/plotAB.R b/R/plotAB.R index 8b4db5b6..d1040a6e 100644 --- a/R/plotAB.R +++ b/R/plotAB.R @@ -113,7 +113,7 @@ plotAB <- function( NAs <- is.na(x) x[!NAs] <- x[!NAs] / sqrt(sum(x[!NAs]^2)) na.count <- sum(NAs) - if (na.count > 0) message(sprintf("[.unitarize] %i missing values were ignored.\n", na.count)) + if (na.count > 0) flog.debug("[.unitarize] %i missing values were ignored.\n", na.count) x } diff --git a/R/preprocessArrays.R b/R/preprocessArrays.R index 4e24d220..1d3694c9 100644 --- a/R/preprocessArrays.R +++ b/R/preprocessArrays.R @@ -40,13 +40,13 @@ preprocessArrays <- function( # this should be default but allows handling if given M-values in Beta slot is.beta <- min(assays(obj)$Beta, na.rm = TRUE) > 0 if (is.beta) { - message("Converting to squeezed M-values.") + flog.debug("Converting to squeezed M-values.") assays(obj.opensea)$Beta <- flogit(assays(obj.opensea)$Beta) } # impute missing values if possible if (any(is.na(minfi::getBeta(obj.opensea)))) { - message("Imputing missing values.") + flog.debug("Imputing missing values.") obj.opensea <- imputeKNN(obj.opensea, assay = "array") } diff --git a/R/shrinkBins.R b/R/shrinkBins.R index 8bf41c22..205b3564 100644 --- a/R/shrinkBins.R +++ b/R/shrinkBins.R @@ -55,7 +55,7 @@ shrinkBins <- function( if (target.count == 1) { stop("Cannot perform targeted bin-level shrinkage with one target sample.") } else if (target.count < 4) { - message("Number of means fewer than 4. Using Bayes instead of JSE.") + flog.debug("Number of means fewer than 4. Using Bayes instead of JSE.") jse <- FALSE } } diff --git a/R/summarizeBootstraps.R b/R/summarizeBootstraps.R index b540f354..df8c4d19 100644 --- a/R/summarizeBootstraps.R +++ b/R/summarizeBootstraps.R @@ -21,7 +21,7 @@ summarizeBootstraps <- function(boot.list, est.ab, q = 0.95, assay = c("rna", "a is.atac_or_rna <- assay %in% c("atac", "rna") - message("Summarizing bootstraps.") + flog.debug("Summarizing bootstraps") # filter out failed compartment estimates boot.list <- removeEmptyBoots(boot.list) @@ -37,7 +37,7 @@ summarizeBootstraps <- function(boot.list, est.ab, q = 0.95, assay = c("rna", "a est.ab$boot.open <- .getBootRowSums(1) est.ab$boot.closed <- .getBootRowSums(2) - message("Computing Agresti-Coull 95% confidence intervals.") + flog.debug("Computing Agresti-Coull 95% confidence intervals") .getCI(est.ab, q) } From a39216d711780edeb5e43200e48c45f87307b9fc Mon Sep 17 00:00:00 2001 From: James Eapen Date: Thu, 13 Nov 2025 12:31:57 -0500 Subject: [PATCH 12/28] fix(bootstrap): don't show progress bar in the bootstrap step pollutes progress info with too many bars --- R/bootstrapCompartments.R | 1 + 1 file changed, 1 insertion(+) diff --git a/R/bootstrapCompartments.R b/R/bootstrapCompartments.R index 7a835302..07f14a30 100644 --- a/R/bootstrapCompartments.R +++ b/R/bootstrapCompartments.R @@ -61,6 +61,7 @@ bootstrapCompartments <- function( # if (ncol(original.obj) < 6) stop("We need more than 5 samples to bootstrap with for the results to be meaningful.") # bootstrap and recompute compartments + BiocParallel::bpprogressbar(BPPARAM) <- FALSE resamp.compartments <- bplapply( 1:ncol(bmeans), function(b) { From aa607e137acbb914c00e7a024b50f0c48f854434 Mon Sep 17 00:00:00 2001 From: James Eapen Date: Thu, 13 Nov 2025 13:07:59 -0500 Subject: [PATCH 13/28] fix(parallel): correctly calculate total when outer/inner is 1 --- R/parallel.R | 40 +++++++++++++++++++++++++++++++--------- 1 file changed, 31 insertions(+), 9 deletions(-) diff --git a/R/parallel.R b/R/parallel.R index 908afa46..aa7dabd4 100644 --- a/R/parallel.R +++ b/R/parallel.R @@ -2,19 +2,31 @@ #' @keywords internal check_worker_count <- function(bpparam, boot.parallel, avail_workers = parallelly::availableCores()) { workers <- get_bpnworkers(bpparam) - total <- sum(Reduce(`*`, workers), workers[1]) + total <- required_workers(workers) if (verify_workers(total)) { return(TRUE) } - msg <- sprintf( - "Using %1$d outer and %2$d inner workers would require %3$d workers (%1$d + (%1$d x %2$d)) but your system has only %4$d cores. - See parallelly::availableCores() for more information on available resources", - workers[1], - workers[2], - total, - avail_workers - ) + avail_msg <- sprintf("but your system has only %d cores", avail_workers) + info_msg <- "See parallelly::availableCores(which = 'all') for more information on available resources" + if (workers[1] == 1 | workers[2] == 1) { + msg <- sprintf( + "Requested %d %s workers %s\n%s", + max(workers), + ifelse(workers[1] == 1, "inner", "outer"), + avail_msg, + info_msg + ) + } else { + msg <- sprintf( + "Requested %1$d outer and %2$d inner workers that require %3$d total workers (%1$d + (%1$d x %2$d)) %4$s\n%5$s", + workers[1], + workers[2], + total, + avail_msg, + info_msg + ) + } stop(msg) } @@ -34,6 +46,16 @@ bpnworkers.list <- function(bplist) { unlist(Map(bpnworkers, bplist)) } +required_workers <- function(workers) { + if (workers[1] == 1) { + return(workers[2]) + } + if (workers[2] == 1) { + return(workers[1]) + } + sum(Reduce(`*`, workers), workers[1]) +} + #' Verify that the input BiocParallelParam is valid #' @param A BiocParallelParam or list of 2 BiocParallelParam objects #' @importFrom BiocParallel bpnworkers From 557a3d5dda52d160baccd4ea06f63ca4ac0d8b58 Mon Sep 17 00:00:00 2001 From: James Eapen Date: Thu, 13 Nov 2025 13:15:45 -0500 Subject: [PATCH 14/28] fix(bootstrap): only message about slow bootstrap if bootstrapping --- R/getCompartments.R | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/R/getCompartments.R b/R/getCompartments.R index b89a4ec2..6bab517c 100644 --- a/R/getCompartments.R +++ b/R/getCompartments.R @@ -36,9 +36,9 @@ getCompartments <- function( ) } - if (boot.parallel) { + if (bootstrap & boot.parallel) { flog.info("Bootstrapping in parallel with %d cores", bpnworkers(bpparams[[2]])) - } else { + } else if (bootstrap & !boot.parallel) { flog.info("Not bootstrapping in parallel will take a long time...") } From 6ad514428000ce7d9ed86fccf953203576befc39 Mon Sep 17 00:00:00 2001 From: James Eapen Date: Thu, 13 Nov 2025 13:53:03 -0500 Subject: [PATCH 15/28] fix(logging): move bootstrap message after computing message --- R/getCompartments.R | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/R/getCompartments.R b/R/getCompartments.R index 6bab517c..a602e736 100644 --- a/R/getCompartments.R +++ b/R/getCompartments.R @@ -36,14 +36,16 @@ getCompartments <- function( ) } + boot_msg <- "" if (bootstrap & boot.parallel) { - flog.info("Bootstrapping in parallel with %d cores", bpnworkers(bpparams[[2]])) + boot_msg <- sprintf("Bootstrapping in parallel with %d cores", bpnworkers(bpparams[[2]])) } else if (bootstrap & !boot.parallel) { - flog.info("Not bootstrapping in parallel will take a long time...") + boot_msg <- "Not bootstrapping in parallel will take a long time..." } if (group) { flog.info("Computing group level compartments") + flog.info(boot_msg) compartments.list <- bplapply( chr, function(c) { @@ -71,6 +73,7 @@ getCompartments <- function( } flog.info("Computing single-cell level compartments") + flog.info(boot_msg) compartments <- bplapply( columns, function(s) { From e917ae6e31cbbec087519d097a5bfd649c804586 Mon Sep 17 00:00:00 2001 From: James Eapen Date: Fri, 14 Nov 2025 14:01:26 -0500 Subject: [PATCH 16/28] docs(parallel): add info on load balancing different configs --- R/scCompartments.R | 23 +++++++++++++++++++++++ man/arrayCompartments.Rd | 23 +++++++++++++++++++++++ man/scCompartments.Rd | 23 +++++++++++++++++++++++ 3 files changed, 69 insertions(+) diff --git a/R/scCompartments.R b/R/scCompartments.R index c2cca96f..a29dfa25 100644 --- a/R/scCompartments.R +++ b/R/scCompartments.R @@ -54,6 +54,8 @@ #' #' `( outer * inner ) + outer` #' +#' which is more easily calculated as `outer * (inner + 1)`. +#' #' We recommend using an explicit list of two BiocParallelParam backends over #' relying on `register()` and `bpparam()` for parallelizing across bootstraps. #' With nested `bplapply` calls, the registered backend is used for both the @@ -62,6 +64,27 @@ #' nested loops. Instead to use all 8 cores, set #' `BPPARAM = list(MulticoreParam(2), MulticoreParam(3))`. #' +#' ### Load balancing +#' +#' Unless you have only 1 chromosome or are not bootstrapping/not bootstrapping +#' in parallel, you can use nested parallelism. If you are working on just 1 +#' chromosome, put all cores into the inner bootstrapping backend. Conversely +#' with multiple chromosmes without bootstrapping, put all available workers in +#' the outer loop. +#' +#' In general, use more 'outer' workers, which loop over chromosmes when `group +#' = TRUE` and cells when `group = FALSE`, than 'inner' workers that loop over +#' bootstraps. Using 8 outer and 7 inner workers is faster than 7 outer and 8 +#' inner. +#' +#' When `group = FALSE`, use `MulticoreParam()` only on the outer workers. We +#' find that parallelizing at both column and bootstrap levels with the +#' single-cell inference is slower than only parallelizing at the column-level. +#' +#' With `group = TRUE`, minimize the difference between the two worker counts: +#' with 64 total cores, doing 8 outer and 7 inner is faster than 16 outer and 3 +#' inner. +#' #' @return A RaggedExperiment of inferred compartments #' @import SummarizedExperiment #' @import RaggedExperiment diff --git a/man/arrayCompartments.Rd b/man/arrayCompartments.Rd index 8ac6ea61..1162e663 100644 --- a/man/arrayCompartments.Rd +++ b/man/arrayCompartments.Rd @@ -88,6 +88,8 @@ bootstrapping. The required number of threads is given by \code{( outer * inner ) + outer} +which is more easily calculated as \code{outer * (inner + 1)}. + We recommend using an explicit list of two BiocParallelParam backends over relying on \code{register()} and \code{bpparam()} for parallelizing across bootstraps. With nested \code{bplapply} calls, the registered backend is used for both the @@ -97,6 +99,27 @@ nested loops. Instead to use all 8 cores, set \code{BPPARAM = list(MulticoreParam(2), MulticoreParam(3))}. } +\subsection{Load balancing}{ + +Unless you have only 1 chromosome or are not bootstrapping/not bootstrapping +in parallel, you can use nested parallelism. If you are working on just 1 +chromosome, put all cores into the inner bootstrapping backend. Conversely +with multiple chromosmes without bootstrapping, put all available workers in +the outer loop. + +In general, use more 'outer' workers, which loop over chromosmes when \code{group = TRUE} and cells when \code{group = FALSE}, than 'inner' workers that loop over +bootstraps. Using 8 outer and 7 inner workers is faster than 7 outer and 8 +inner. + +When \code{group = FALSE}, use \code{MulticoreParam()} only on the outer workers. We +find that parallelizing at both column and bootstrap levels with the +single-cell inference is slower than only parallelizing at the column-level. + +With \code{group = TRUE}, minimize the difference between the two worker counts: +with 64 total cores, doing 8 outer and 7 inner is faster than 16 outer and 3 +inner. +} + } } \examples{ diff --git a/man/scCompartments.Rd b/man/scCompartments.Rd index 45c9b5e8..a32103ae 100644 --- a/man/scCompartments.Rd +++ b/man/scCompartments.Rd @@ -82,6 +82,8 @@ bootstrapping. The required number of threads is given by \code{( outer * inner ) + outer} +which is more easily calculated as \code{outer * (inner + 1)}. + We recommend using an explicit list of two BiocParallelParam backends over relying on \code{register()} and \code{bpparam()} for parallelizing across bootstraps. With nested \code{bplapply} calls, the registered backend is used for both the @@ -91,6 +93,27 @@ nested loops. Instead to use all 8 cores, set \code{BPPARAM = list(MulticoreParam(2), MulticoreParam(3))}. } +\subsection{Load balancing}{ + +Unless you have only 1 chromosome or are not bootstrapping/not bootstrapping +in parallel, you can use nested parallelism. If you are working on just 1 +chromosome, put all cores into the inner bootstrapping backend. Conversely +with multiple chromosmes without bootstrapping, put all available workers in +the outer loop. + +In general, use more 'outer' workers, which loop over chromosmes when \code{group = TRUE} and cells when \code{group = FALSE}, than 'inner' workers that loop over +bootstraps. Using 8 outer and 7 inner workers is faster than 7 outer and 8 +inner. + +When \code{group = FALSE}, use \code{MulticoreParam()} only on the outer workers. We +find that parallelizing at both column and bootstrap levels with the +single-cell inference is slower than only parallelizing at the column-level. + +With \code{group = TRUE}, minimize the difference between the two worker counts: +with 64 total cores, doing 8 outer and 7 inner is faster than 16 outer and 3 +inner. +} + } } \examples{ From 4378c0418e7aeae1683ecdcf84d13526f7f20a00 Mon Sep 17 00:00:00 2001 From: James Eapen Date: Fri, 14 Nov 2025 14:03:24 -0500 Subject: [PATCH 17/28] feat(parallel): check if user BPPARAM settings are optimal --- R/arrayCompartments.R | 2 +- R/parallel.R | 22 +++++++++++++++++++++- R/scCompartments.R | 2 +- man/check_worker_count.Rd | 4 +++- 4 files changed, 26 insertions(+), 4 deletions(-) diff --git a/R/arrayCompartments.R b/R/arrayCompartments.R index aab2867d..511d1432 100644 --- a/R/arrayCompartments.R +++ b/R/arrayCompartments.R @@ -59,7 +59,7 @@ arrayCompartments <- function( verifyCoords(obj) verifyAssayNames(obj, assay = "array") bpparams <- get_nested_params(BPPARAM) - check_worker_count(bpparams) + check_worker_count(bpparams, group, length(chr), bootstrap) # preprocess the arrays if (preprocess) { diff --git a/R/parallel.R b/R/parallel.R index aa7dabd4..af82a62e 100644 --- a/R/parallel.R +++ b/R/parallel.R @@ -1,9 +1,29 @@ +#' Check and tell users whether input BPPARAM are optimal given group/sc +#' inference and bootstrapping +#' @keywords internal +check_optim <- function(workers, group, chr_count, bootstrap) { + if (group & chr_count < workers[1]) { + flog.info( + "Grouped inference with more outer workers than chromosmes leaves %d of %d workers unused", + workers[1] - chr_count, + workers[1] + ) + if (bootstrap) { + flog.info("Consider using a single core for the outer worker and more cores for the inner bootstrap worker") + } + } + if (!group & bootstrap & workers[1] < workers[2]) { + flog.info("More outer (column-wise) than inner (bootstrap) workers is faster for single-cell inference") + } +} + #' Check that the number of requested workers is valid #' @keywords internal -check_worker_count <- function(bpparam, boot.parallel, avail_workers = parallelly::availableCores()) { +check_worker_count <- function(bpparam, group, chr_count, bootstrap, avail_workers = parallelly::availableCores()) { workers <- get_bpnworkers(bpparam) total <- required_workers(workers) if (verify_workers(total)) { + check_optim(workers, group, chr_count, bootstrap) return(TRUE) } diff --git a/R/scCompartments.R b/R/scCompartments.R index a29dfa25..a1986a8f 100644 --- a/R/scCompartments.R +++ b/R/scCompartments.R @@ -115,7 +115,7 @@ scCompartments <- function( verifyCoords(obj) bpparams <- get_nested_params(BPPARAM, boot.parallel) - check_worker_count(bpparams) + check_worker_count(bpparams, group, length(chr), bootstrap) # which assay are we working on if (!all(assay %in% c("atac", "rna"))) stop("Supported assays are 'atac', and 'rna'.") diff --git a/man/check_worker_count.Rd b/man/check_worker_count.Rd index 62d20728..5dd80962 100644 --- a/man/check_worker_count.Rd +++ b/man/check_worker_count.Rd @@ -6,7 +6,9 @@ \usage{ check_worker_count( bpparam, - boot.parallel, + group, + chr_count, + bootstrap, avail_workers = parallelly::availableCores() ) } From c81ce2e46fed1764eb0be02c9bdc77b9a60f7b7b Mon Sep 17 00:00:00 2001 From: James Eapen Date: Fri, 14 Nov 2025 14:03:41 -0500 Subject: [PATCH 18/28] fix(bootstrapping): don't warn about slow serial bootstrapping Its only slower than parallel when group = TRUE --- R/getCompartments.R | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/R/getCompartments.R b/R/getCompartments.R index a602e736..25a4e53f 100644 --- a/R/getCompartments.R +++ b/R/getCompartments.R @@ -39,8 +39,8 @@ getCompartments <- function( boot_msg <- "" if (bootstrap & boot.parallel) { boot_msg <- sprintf("Bootstrapping in parallel with %d cores", bpnworkers(bpparams[[2]])) - } else if (bootstrap & !boot.parallel) { - boot_msg <- "Not bootstrapping in parallel will take a long time..." + } else if (bootstrap & !boot.parallel & group) { + boot_msg <- "Not bootstrapping in parallel could take a long time..." } if (group) { From 151d60eb18969a49d8ade96aa7b89efe207e2ded Mon Sep 17 00:00:00 2001 From: James Eapen Date: Fri, 19 Dec 2025 11:09:22 -0500 Subject: [PATCH 19/28] fix(parallel): add missing SerialParam import --- NAMESPACE | 1 + R/parallel.R | 1 + 2 files changed, 2 insertions(+) diff --git a/NAMESPACE b/NAMESPACE index 0bb12199..21e83fac 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -49,6 +49,7 @@ import(HDF5Array) import(Matrix) import(RaggedExperiment) import(SummarizedExperiment) +importFrom(BiocParallel,SerialParam) importFrom(BiocParallel,bplapply) importFrom(BiocParallel,bpnworkers) importFrom(BiocParallel,bpparam) diff --git a/R/parallel.R b/R/parallel.R index af82a62e..b8a8b006 100644 --- a/R/parallel.R +++ b/R/parallel.R @@ -100,6 +100,7 @@ verify_workers <- function(n_workers) { #' The outer param is across the input samples/columns and the second is for #' bootstrapping. If `boot.parallel` is FALSE, the inner param is set to #' `SerialParam`. +#' @importFrom BiocParallel SerialParam #' @keywords internal get_nested_params <- function(BPPARAM, boot.parallel) { stopifnot("Only two BiocParallelParam objects can be used" = length(BPPARAM) <= 2) From 4c08faca845d7863283e9c527050e03ddfe2763d Mon Sep 17 00:00:00 2001 From: James Eapen Date: Fri, 19 Dec 2025 11:24:26 -0500 Subject: [PATCH 20/28] chore(parallel): fix 'chromosme' typo --- R/parallel.R | 2 +- R/scCompartments.R | 4 ++-- man/arrayCompartments.Rd | 4 ++-- man/scCompartments.Rd | 4 ++-- 4 files changed, 7 insertions(+), 7 deletions(-) diff --git a/R/parallel.R b/R/parallel.R index b8a8b006..93e5faa7 100644 --- a/R/parallel.R +++ b/R/parallel.R @@ -4,7 +4,7 @@ check_optim <- function(workers, group, chr_count, bootstrap) { if (group & chr_count < workers[1]) { flog.info( - "Grouped inference with more outer workers than chromosmes leaves %d of %d workers unused", + "Grouped inference with more outer workers than chromosomes leaves %d of %d workers unused", workers[1] - chr_count, workers[1] ) diff --git a/R/scCompartments.R b/R/scCompartments.R index a1986a8f..7a839d6f 100644 --- a/R/scCompartments.R +++ b/R/scCompartments.R @@ -69,10 +69,10 @@ #' Unless you have only 1 chromosome or are not bootstrapping/not bootstrapping #' in parallel, you can use nested parallelism. If you are working on just 1 #' chromosome, put all cores into the inner bootstrapping backend. Conversely -#' with multiple chromosmes without bootstrapping, put all available workers in +#' with multiple chromosomes without bootstrapping, put all available workers in #' the outer loop. #' -#' In general, use more 'outer' workers, which loop over chromosmes when `group +#' In general, use more 'outer' workers, which loop over chromosomes when `group #' = TRUE` and cells when `group = FALSE`, than 'inner' workers that loop over #' bootstraps. Using 8 outer and 7 inner workers is faster than 7 outer and 8 #' inner. diff --git a/man/arrayCompartments.Rd b/man/arrayCompartments.Rd index 1162e663..57301f69 100644 --- a/man/arrayCompartments.Rd +++ b/man/arrayCompartments.Rd @@ -104,10 +104,10 @@ nested loops. Instead to use all 8 cores, set Unless you have only 1 chromosome or are not bootstrapping/not bootstrapping in parallel, you can use nested parallelism. If you are working on just 1 chromosome, put all cores into the inner bootstrapping backend. Conversely -with multiple chromosmes without bootstrapping, put all available workers in +with multiple chromosomes without bootstrapping, put all available workers in the outer loop. -In general, use more 'outer' workers, which loop over chromosmes when \code{group = TRUE} and cells when \code{group = FALSE}, than 'inner' workers that loop over +In general, use more 'outer' workers, which loop over chromosomes when \code{group = TRUE} and cells when \code{group = FALSE}, than 'inner' workers that loop over bootstraps. Using 8 outer and 7 inner workers is faster than 7 outer and 8 inner. diff --git a/man/scCompartments.Rd b/man/scCompartments.Rd index a32103ae..eae9ca74 100644 --- a/man/scCompartments.Rd +++ b/man/scCompartments.Rd @@ -98,10 +98,10 @@ nested loops. Instead to use all 8 cores, set Unless you have only 1 chromosome or are not bootstrapping/not bootstrapping in parallel, you can use nested parallelism. If you are working on just 1 chromosome, put all cores into the inner bootstrapping backend. Conversely -with multiple chromosmes without bootstrapping, put all available workers in +with multiple chromosomes without bootstrapping, put all available workers in the outer loop. -In general, use more 'outer' workers, which loop over chromosmes when \code{group = TRUE} and cells when \code{group = FALSE}, than 'inner' workers that loop over +In general, use more 'outer' workers, which loop over chromosomes when \code{group = TRUE} and cells when \code{group = FALSE}, than 'inner' workers that loop over bootstraps. Using 8 outer and 7 inner workers is faster than 7 outer and 8 inner. From 5ea3e545cbb26939f8a2eede793d01736dd78328 Mon Sep 17 00:00:00 2001 From: James Eapen Date: Fri, 19 Dec 2025 11:24:59 -0500 Subject: [PATCH 21/28] docs(parallel): add check_optim --- man/check_optim.Rd | 14 ++++++++++++++ 1 file changed, 14 insertions(+) create mode 100644 man/check_optim.Rd diff --git a/man/check_optim.Rd b/man/check_optim.Rd new file mode 100644 index 00000000..582e2772 --- /dev/null +++ b/man/check_optim.Rd @@ -0,0 +1,14 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/parallel.R +\name{check_optim} +\alias{check_optim} +\title{Check and tell users whether input BPPARAM are optimal given group/sc +inference and bootstrapping} +\usage{ +check_optim(workers, group, chr_count, bootstrap) +} +\description{ +Check and tell users whether input BPPARAM are optimal given group/sc +inference and bootstrapping +} +\keyword{internal} From f72332db86ebbca9097ee595e0d1ce331724aec2 Mon Sep 17 00:00:00 2001 From: James Eapen Date: Fri, 19 Dec 2025 12:01:20 -0500 Subject: [PATCH 22/28] chore(DESCRIPTION): add missing parallelly --- DESCRIPTION | 1 + 1 file changed, 1 insertion(+) diff --git a/DESCRIPTION b/DESCRIPTION index dc5a5f79..8a33e12d 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -34,6 +34,7 @@ Imports: GenomicRanges, GenomeInfoDb, parallel, + parallelly, methods, grid, ggplot2, From f84806178335f2924703e21e053d8033eb572bd1 Mon Sep 17 00:00:00 2001 From: James Eapen Date: Fri, 19 Dec 2025 12:34:47 -0500 Subject: [PATCH 23/28] test: remove message tests since they're now handled by futile.logger --- R/fixCompartments.R | 4 ++-- tests/testthat/test-fixCompartments.R | 1 + tests/testthat/test-getCorMatrix.R | 4 ---- tests/testthat/test-getGlobalMeans.R | 8 ++++---- tests/testthat/test-meanSmoother.R | 5 ----- tests/testthat/test-plotAB.R | 6 +----- tests/testthat/test-summarizeBootstraps.R | 2 -- 7 files changed, 8 insertions(+), 22 deletions(-) diff --git a/R/fixCompartments.R b/R/fixCompartments.R index 77110e98..a5750c67 100644 --- a/R/fixCompartments.R +++ b/R/fixCompartments.R @@ -22,7 +22,7 @@ setGeneric("fixCompartments", function(x, min.conf = 0.8, parallel = FALSE, core #' @param x GRanges setMethod("fixCompartments", "GRanges", function(x, min.conf = 0.8, parallel = FALSE, cores = 1) { flog.debug("Assuming we only have a single sample to process.") - flog.debug("Fixing compartments using a minimum confidence score of %f %%", min.conf * 100) + flog.debug("Fixing compartments using a minimum confidence score of %d%%", min.conf * 100) flipper(x, min.conf) }) @@ -30,7 +30,7 @@ setMethod("fixCompartments", "GRanges", function(x, min.conf = 0.8, parallel = F #' @param x RaggedExperiment setMethod("fixCompartments", "RaggedExperiment", function(x, min.conf = 0.8, parallel = FALSE, cores = 1) { obj <- condenseSE(x, sample.name = colnames(assay(x))) - flog.info("Fixing compartments using a minimum confidence score of ", min.conf * 100, "%") + flog.info("Fixing compartments using a minimum confidence score of %d%%", min.conf * 100) # go through and invert compartments based on the min.conf flip_compartments_lst <- mclapply(obj, flipper, min.conf, mc.cores = ifelse(parallel, cores, 1)) names(flip_compartments_lst) <- names(obj) diff --git a/tests/testthat/test-fixCompartments.R b/tests/testthat/test-fixCompartments.R index f647a331..fa2f2f09 100644 --- a/tests/testthat/test-fixCompartments.R +++ b/tests/testthat/test-fixCompartments.R @@ -80,6 +80,7 @@ test_that("flipper", { }) test_that("fixCompartments", { + futile.logger::flog.threshold(1) lapply(1:length(gr.expected), function(i) { expect_equal( fixCompartments(gr.input[[i]], min.conf), diff --git a/tests/testthat/test-getCorMatrix.R b/tests/testthat/test-getCorMatrix.R index 50e1d101..acc855af 100644 --- a/tests/testthat/test-getCorMatrix.R +++ b/tests/testthat/test-getCorMatrix.R @@ -10,10 +10,6 @@ test_that("getCorMatrix", { expected.result <- list(gr.cor = gr, binmat.cor = expected.cormat) expected.result.squeezed <- list(gr.cor = gr, binmat.cor = expected.cormat.squeezed) - expect_message(getCorMatrix(binmat), "Calculating correlations") - expect_message(getCorMatrix(binmat), "Done") - expect_no_warning(expect_message(getCorMatrix(binmat))) - expect_equal(getCorMatrix(binmat), expected.result) expect_equal(getCorMatrix(binmat, squeeze = TRUE), expected.result.squeezed) }) diff --git a/tests/testthat/test-getGlobalMeans.R b/tests/testthat/test-getGlobalMeans.R index 28c84c99..48733685 100644 --- a/tests/testthat/test-getGlobalMeans.R +++ b/tests/testthat/test-getGlobalMeans.R @@ -65,18 +65,18 @@ test_that("getGlobalMeans", { test_that("precomputeBootstrapMeans", { expected.rownames <- as.character(gr) - boot.mean <- precomputeBootstrapMeans(se.rna, num.bootstraps = 2) + boot.mean <- precomputeBootstrapMeans(se.rna, BiocParallel::SerialParam(), num.bootstraps = 2) expect_equal(rownames(boot.mean), expected.rownames) lapply(1:50, function(boot_count) { - boot.mean <- precomputeBootstrapMeans(se.rna, num.bootstraps = boot_count) + boot.mean <- precomputeBootstrapMeans(se.rna, BiocParallel::SerialParam(), num.bootstraps = boot_count) expect_equal(ncol(boot.mean), boot_count) }) expect_error( - precomputeBootstrapMeans(se.rna, num.bootstraps = 2, targets = c(1:4)), + precomputeBootstrapMeans(se.rna, BiocParallel::SerialParam(), num.bootstraps = 2, targets = c(1:4)), "Need 5 or more samples for targeted bootstrapping to work." ) expect_no_error( - precomputeBootstrapMeans(se.rna, num.bootstraps = 2, targets = c(1:5)) + precomputeBootstrapMeans(se.rna, BiocParallel::SerialParam(), num.bootstraps = 2, targets = c(1:5)) ) }) diff --git a/tests/testthat/test-meanSmoother.R b/tests/testthat/test-meanSmoother.R index dc6b9fe9..2c00347b 100644 --- a/tests/testthat/test-meanSmoother.R +++ b/tests/testthat/test-meanSmoother.R @@ -1,10 +1,5 @@ test_that("meanSmoother", { mat <- matrix(1:5, nrow = 5, ncol = 5) - expect_message( - meanSmoother(mat, k = 0), - "Returning unsmoothed mat as 'k' = 0", - fixed = TRUE - ) expect_equal( meanSmoother(mat, k = 0), mat diff --git a/tests/testthat/test-plotAB.R b/tests/testthat/test-plotAB.R index d04b0867..3634abb3 100644 --- a/tests/testthat/test-plotAB.R +++ b/tests/testthat/test-plotAB.R @@ -34,11 +34,7 @@ test_that(".unitarize", { scaling <- sqrt(sum(centered[-4]^2)) centered / scaling } - expect_message( - compartmap:::.unitarize(vec.withNA, medianCenter = FALSE), - "[.unitarize] 1 missing values were ignored.", - fixed = TRUE - ) + expect_equal(compartmap:::.unitarize(vec.withNA), expected.withNA) expected.withNA.nomedian <- { diff --git a/tests/testthat/test-summarizeBootstraps.R b/tests/testthat/test-summarizeBootstraps.R index 4b6ec355..a330d3a2 100644 --- a/tests/testthat/test-summarizeBootstraps.R +++ b/tests/testthat/test-summarizeBootstraps.R @@ -57,8 +57,6 @@ test_that("summarizeBootstraps", { removeEmptyBoots(list(gr1, gr2)) - expect_message(summarizeBootstraps(list(gr1, gr1), gr1), "Summarizing bootstraps") - tester <- function(one, two, expected) { mcols(gr1) <- data.frame(pc = one) gr1$compartments <- ifelse(mcols(gr1)$pc > 0, "open", "closed") From e049e97a239fedc0d21f58df25dc23d813180802 Mon Sep 17 00:00:00 2001 From: James Eapen Date: Mon, 23 Feb 2026 12:52:05 -0500 Subject: [PATCH 24/28] fix(pkgdown): internalize getCompartments --- R/getCompartments.R | 1 + man/getCompartments.Rd | 1 + 2 files changed, 2 insertions(+) diff --git a/R/getCompartments.R b/R/getCompartments.R index 25a4e53f..a2d8af1d 100644 --- a/R/getCompartments.R +++ b/R/getCompartments.R @@ -1,6 +1,7 @@ #' Run compartment inference #' @importFrom BiocParallel bplapply #' @importFrom futile.logger flog.info flog.debug +#' @keywords internal getCompartments <- function( obj, res, diff --git a/man/getCompartments.Rd b/man/getCompartments.Rd index 8dd97d7e..b9440752 100644 --- a/man/getCompartments.Rd +++ b/man/getCompartments.Rd @@ -21,3 +21,4 @@ getCompartments( \description{ Run compartment inference } +\keyword{internal} From fcbd6734c4ec11d1855a9a7f8b5893882a68834c Mon Sep 17 00:00:00 2001 From: James Eapen Date: Mon, 23 Feb 2026 13:48:15 -0500 Subject: [PATCH 25/28] docs(arrayCompartments): remove parallel arg in example --- R/arrayCompartments.R | 1 - man/arrayCompartments.Rd | 1 - 2 files changed, 2 deletions(-) diff --git a/R/arrayCompartments.R b/R/arrayCompartments.R index 511d1432..3554dd1f 100644 --- a/R/arrayCompartments.R +++ b/R/arrayCompartments.R @@ -34,7 +34,6 @@ #' array_compartments <- arrayCompartments( #' array.data.chr14, #' chr="chr14", -#' parallel=FALSE, #' bootstrap=FALSE, #' genome="hg19", #' array.type="hm450" diff --git a/man/arrayCompartments.Rd b/man/arrayCompartments.Rd index 57301f69..871c5c52 100644 --- a/man/arrayCompartments.Rd +++ b/man/arrayCompartments.Rd @@ -129,7 +129,6 @@ if (requireNamespace("minfi", quietly = TRUE)) { array_compartments <- arrayCompartments( array.data.chr14, chr="chr14", - parallel=FALSE, bootstrap=FALSE, genome="hg19", array.type="hm450" From 61e6376bcaff394c4b9bf2d23cafa2cd72db0fda Mon Sep 17 00:00:00 2001 From: James Eapen Date: Mon, 23 Feb 2026 14:05:42 -0500 Subject: [PATCH 26/28] docs(parallel): fix missing params in docstring --- R/parallel.R | 6 +++--- man/bpnworkers.list.Rd | 2 +- man/verify_bp.Rd | 2 +- man/verify_workers.Rd | 2 +- 4 files changed, 6 insertions(+), 6 deletions(-) diff --git a/R/parallel.R b/R/parallel.R index 93e5faa7..154c129d 100644 --- a/R/parallel.R +++ b/R/parallel.R @@ -58,7 +58,7 @@ get_bpnworkers <- function(bp) { } #' Return the number of workers in a list of BiocParallelParam objects -#' @param List of BiocParallelParam objects +#' @param bplist List of BiocParallelParam objects #' @importFrom BiocParallel bpnworkers #' @return A vector of the `bpnworkers` count in each list element #' @keywords internal @@ -77,7 +77,7 @@ required_workers <- function(workers) { } #' Verify that the input BiocParallelParam is valid -#' @param A BiocParallelParam or list of 2 BiocParallelParam objects +#' @param bp A BiocParallelParam or list of 2 BiocParallelParam objects #' @importFrom BiocParallel bpnworkers #' @return TRUE if the total `bpnworkers` in the input does not exceed #' available resources as defined by `parallelly::availableCores()` @@ -87,7 +87,7 @@ verify_bp <- function(bp) { } #' Verify that requested thread count is not higher than available -#' @param thread_count The number of workers to check availability +#' @param n_workers The number of workers to check availability #' @return TRUE if the requested `thread_count` does not exceed available #' resources as defined by `parallelly::availableCores()` #' @keywords internal diff --git a/man/bpnworkers.list.Rd b/man/bpnworkers.list.Rd index 762f6eb4..727a1a1e 100644 --- a/man/bpnworkers.list.Rd +++ b/man/bpnworkers.list.Rd @@ -7,7 +7,7 @@ bpnworkers.list(bplist) } \arguments{ -\item{List}{of BiocParallelParam objects} +\item{bplist}{List of BiocParallelParam objects} } \value{ A vector of the \code{bpnworkers} count in each list element diff --git a/man/verify_bp.Rd b/man/verify_bp.Rd index 0869d5d3..fabe2928 100644 --- a/man/verify_bp.Rd +++ b/man/verify_bp.Rd @@ -7,7 +7,7 @@ verify_bp(bp) } \arguments{ -\item{A}{BiocParallelParam or list of 2 BiocParallelParam objects} +\item{bp}{A BiocParallelParam or list of 2 BiocParallelParam objects} } \value{ TRUE if the total \code{bpnworkers} in the input does not exceed diff --git a/man/verify_workers.Rd b/man/verify_workers.Rd index d3117a1d..8c0ee120 100644 --- a/man/verify_workers.Rd +++ b/man/verify_workers.Rd @@ -7,7 +7,7 @@ verify_workers(n_workers) } \arguments{ -\item{thread_count}{The number of workers to check availability} +\item{n_workers}{The number of workers to check availability} } \value{ TRUE if the requested \code{thread_count} does not exceed available From 1de1a5eb65448d2934fc8a8138ca77a90d7a46ed Mon Sep 17 00:00:00 2001 From: James Eapen Date: Mon, 23 Feb 2026 14:06:07 -0500 Subject: [PATCH 27/28] fix(arrayCompartments): pass boot.parallel to get_nested_params --- R/arrayCompartments.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/arrayCompartments.R b/R/arrayCompartments.R index 3554dd1f..5547aed3 100644 --- a/R/arrayCompartments.R +++ b/R/arrayCompartments.R @@ -57,7 +57,7 @@ arrayCompartments <- function( verifySE(obj) verifyCoords(obj) verifyAssayNames(obj, assay = "array") - bpparams <- get_nested_params(BPPARAM) + bpparams <- get_nested_params(BPPARAM, boot.parallel) check_worker_count(bpparams, group, length(chr), bootstrap) # preprocess the arrays From a54cce5e3901c11549525683d4ad52e44b6be066 Mon Sep 17 00:00:00 2001 From: James Eapen Date: Mon, 23 Feb 2026 15:30:32 -0500 Subject: [PATCH 28/28] docs: do grouped inference for examples with SerialParam --- R/arrayCompartments.R | 4 +++- R/getGlobalMeans.R | 1 + R/scCompartments.R | 4 +++- man/arrayCompartments.Rd | 4 +++- man/precomputeBootstrapMeans.Rd | 1 + man/scCompartments.Rd | 4 +++- 6 files changed, 14 insertions(+), 4 deletions(-) diff --git a/R/arrayCompartments.R b/R/arrayCompartments.R index 5547aed3..da559e06 100644 --- a/R/arrayCompartments.R +++ b/R/arrayCompartments.R @@ -34,9 +34,11 @@ #' array_compartments <- arrayCompartments( #' array.data.chr14, #' chr="chr14", +#' group=TRUE, #' bootstrap=FALSE, #' genome="hg19", -#' array.type="hm450" +#' array.type="hm450", +#' BPPARAM = BiocParallel::SerialParam() #' ) #' } arrayCompartments <- function( diff --git a/R/getGlobalMeans.R b/R/getGlobalMeans.R index cf4ab83b..0c3a082e 100644 --- a/R/getGlobalMeans.R +++ b/R/getGlobalMeans.R @@ -63,6 +63,7 @@ getGlobalMeans <- function(obj, targets = NULL, assay = c("atac", "rna", "array" #' data("k562_scrna_chr14", package = "compartmap") #' scrna.bootstrap.global.means <- precomputeBootstrapMeans( #' k562_scrna_chr14, +#' BPPARAM = BiocParallel::SerialParam(), #' assay = "rna", #' num.bootstraps = 2 #' ) diff --git a/R/scCompartments.R b/R/scCompartments.R index 7a839d6f..6d8e9b3a 100644 --- a/R/scCompartments.R +++ b/R/scCompartments.R @@ -95,8 +95,10 @@ #' sc_compartments <- scCompartments( #' k562_scrna_chr14, #' chr = "chr14", +#' group = TRUE, #' bootstrap = FALSE, -#' genome = "hg19" +#' genome = "hg19", +#' BPPARAM = BiocParallel::SerialParam() #' ) scCompartments <- function( obj, diff --git a/man/arrayCompartments.Rd b/man/arrayCompartments.Rd index 871c5c52..45bff8b4 100644 --- a/man/arrayCompartments.Rd +++ b/man/arrayCompartments.Rd @@ -129,9 +129,11 @@ if (requireNamespace("minfi", quietly = TRUE)) { array_compartments <- arrayCompartments( array.data.chr14, chr="chr14", + group=TRUE, bootstrap=FALSE, genome="hg19", - array.type="hm450" + array.type="hm450", + BPPARAM = BiocParallel::SerialParam() ) } } diff --git a/man/precomputeBootstrapMeans.Rd b/man/precomputeBootstrapMeans.Rd index 6f8536a4..edfcd27f 100644 --- a/man/precomputeBootstrapMeans.Rd +++ b/man/precomputeBootstrapMeans.Rd @@ -33,6 +33,7 @@ Pre-compute the global means for bootstrapping compartments data("k562_scrna_chr14", package = "compartmap") scrna.bootstrap.global.means <- precomputeBootstrapMeans( k562_scrna_chr14, + BPPARAM = BiocParallel::SerialParam(), assay = "rna", num.bootstraps = 2 ) diff --git a/man/scCompartments.Rd b/man/scCompartments.Rd index eae9ca74..f727b1ba 100644 --- a/man/scCompartments.Rd +++ b/man/scCompartments.Rd @@ -121,7 +121,9 @@ data("k562_scrna_chr14", package = "compartmap") sc_compartments <- scCompartments( k562_scrna_chr14, chr = "chr14", + group = TRUE, bootstrap = FALSE, - genome = "hg19" + genome = "hg19", + BPPARAM = BiocParallel::SerialParam() ) }