From 62009eb3e009757be2c04d5fe390016e759a90d6 Mon Sep 17 00:00:00 2001 From: "Tristan F.-R." Date: Fri, 13 Jun 2025 16:30:10 -0700 Subject: [PATCH 01/17] feat: container error handling closes #21, closes #119, closes #218. This PR introduces the new general ContainerError to wrap all potential container handlers from now on. DOMINO and OI2 are demonstrated using this error by searching for a specific error, or else bubbling the error up if no predicted error happened. --- spras/containers.py | 66 ++++++++++++++++--- spras/domino.py | 21 ++++-- spras/omicsintegrator2.py | 29 ++++++-- .../input/empty/oi2-edges.txt | 1 + .../input/empty/oi2-prizes.txt | 1 + .../input/{ => simple}/oi2-edges.txt | 0 .../input/{ => simple}/oi2-prizes.txt | 0 test/OmicsIntegrator2/test_oi2.py | 16 ++++- 8 files changed, 108 insertions(+), 26 deletions(-) create mode 100644 test/OmicsIntegrator2/input/empty/oi2-edges.txt create mode 100644 test/OmicsIntegrator2/input/empty/oi2-prizes.txt rename test/OmicsIntegrator2/input/{ => simple}/oi2-edges.txt (100%) rename test/OmicsIntegrator2/input/{ => simple}/oi2-prizes.txt (100%) diff --git a/spras/containers.py b/spras/containers.py index a1fda05f2..4af3c036c 100644 --- a/spras/containers.py +++ b/spras/containers.py @@ -3,9 +3,11 @@ import re import subprocess from pathlib import Path, PurePath, PurePosixPath +import textwrap from typing import Any, Dict, List, Optional, Tuple, Union import docker +import docker.errors import spras.config as config from spras.logging import indent @@ -120,6 +122,45 @@ def prepare_dsub_cmd(flags: dict): print(f"dsub command: {dsub_command}") return dsub_command +class ContainerError(RuntimeError): + """Raises when anything goes wrong inside a container""" + + error_code: int + stdout: Optional[str] + stderr: Optional[str] + + def __init__(self, message: str, error_code: int, stdout: Optional[str], stderr: Optional[str], *args): + """ + Constructs a new ContainerError. + + @param message: The message to display to the user. This should usually refer to the indent call to differentriate between + general logging done by Snakemake/logging calls. + @param error_code: Also known as exit status; this should generally be non-zero for ContainerErrors. + @param stdout: The standard output stream. If the origin of the stream is unknown, leave it in stdout. + @param stderr: The standard error stream. + """ + + # https://stackoverflow.com/a/26938914/7589775 + self.message = message + + self.error_code = error_code + self.stdout = stdout + self.stderr = stderr + + super(ContainerError, self).__init__(message, error_code, stdout, stderr, *args) + + def streams_contain(self, snippet: str): + stdout = self.stdout if self.stdout else '' + stderr = self.stderr if self.stderr else '' + + return snippet in stdout or snippet in stderr + + # Due to + # https://github.com/snakemake/snakemake/blob/d4890b4da691506b6a258f7534ac41fdb7ef5ab4/src/snakemake/exceptions.py#L18 + # this overrides the tostr implementation to have nicer container errors + def __str__(self): + return self.message + # TODO consider a better default environment variable # TODO environment currently a single string (e.g. 'TMPDIR=/OmicsIntegrator1'), should it be a list? @@ -171,22 +212,29 @@ def run_container_and_log(name: str, framework: str, container_suffix: str, comm if 'message' in out: # This is the format of a singularity message. # See https://singularityhub.github.io/singularity-cli/api/source/spython.main.html?highlight=execute#spython.main.execute.execute. - if 'return_code' in out and not out['return_code'] == 0: - print(f"(Program exited with non-zero exit code '{out['return_code']}')") + exit_status = int(out['return_code']) if 'return_code' in out else 0 out = ''.join(out['message']) + if exit_status != 0: + message = f'An unexpected non-zero exit status ({exit_status}) occured while running this singularity container:\n' + indent(out) + raise ContainerError(message, exit_status, out, None) else: - print("Note: This is an unknown message format - if you want this pretty printed, please file an issue at https://github.com/Reed-CompBio/spras/issues/new.") + print("Note: The following output is an unknown message format which should be properly handled.") + print("Please file an issue at https://github.com/Reed-CompBio/spras/issues/new with this output.") out = str(out) elif not isinstance(out, str): out = str(out, "utf-8") print(indent(out)) except docker.errors.ContainerError as err: - print(f"(Command formatted as list: `{err.command}`)") - print(f"An unexpected non-zero exit status ({err.exit_status}) inside the docker image {err.image} occurred:") - err = str(err.stderr if err.stderr is not None else "", "utf-8") - print(indent(err)) - except Exception as err: - raise err + # TODO: does this lose us any information provided by stdout while the container was running? + # ContainerError doesn't expose any stdout property. + + stderr = err.stderr if err.stderr else '' + stderr = str(stderr, 'utf-8') if isinstance(stderr, bytes) else stderr + + message = textwrap.dedent(f'''\ + (Command formatted as list: `{err.command}`) + An unexpected non-zero exit status ({err.exit_status}) inside the docker image {err.image} occurred:\n''') + indent(stderr) + raise ContainerError(message, err.exit_status, None, stderr) # TODO any issue with creating a new client each time inside this function? def run_container_docker(container: str, command: List[str], volumes: List[Tuple[PurePath, PurePath]], working_dir: str, environment: str = 'SPRAS=True'): diff --git a/spras/domino.py b/spras/domino.py index 2364300aa..86b9fd795 100644 --- a/spras/domino.py +++ b/spras/domino.py @@ -3,7 +3,7 @@ import pandas as pd -from spras.containers import prepare_volume, run_container_and_log +from spras.containers import ContainerError, prepare_volume, run_container_and_log from spras.interactome import ( add_constant, reinsert_direction_col_undirected, @@ -137,12 +137,19 @@ def run(network=None, active_genes=None, output_file=None, slice_threshold=None, if module_threshold is not None: domino_command.extend(['--module_threshold', str(module_threshold)]) - run_container_and_log('DOMINO', - container_framework, - container_suffix, - domino_command, - volumes, - work_dir) + try: + run_container_and_log('DOMINO', + container_framework, + container_suffix, + domino_command, + volumes, + work_dir) + except ContainerError as err: + # TODO: can we more appropiately handle this case? Here, DOMINO + # still outputs to our output folder with a still viable HTML output. + # https://github.com/Reed-CompBio/spras/pull/103#issuecomment-1681526958 + if not err.streams_contain("ValueError: cannot apply union_all to an empty list"): + raise err # DOMINO creates a new folder in out_dir to output its modules HTML files into called active_genes # The filename is determined by the input active_genes and cannot be configured diff --git a/spras/omicsintegrator2.py b/spras/omicsintegrator2.py index b631da90f..02311b5f9 100644 --- a/spras/omicsintegrator2.py +++ b/spras/omicsintegrator2.py @@ -2,7 +2,7 @@ import pandas as pd -from spras.containers import prepare_volume, run_container_and_log +from spras.containers import ContainerError, prepare_volume, run_container_and_log from spras.dataset import Dataset from spras.interactome import reinsert_direction_col_undirected from spras.prm import PRM @@ -119,12 +119,27 @@ def run(edges=None, prizes=None, output_file=None, w=None, b=None, g=None, noise command.extend(['--seed', str(seed)]) container_suffix = "omics-integrator-2:v2" - run_container_and_log('Omics Integrator 2', - container_framework, - container_suffix, - command, - volumes, - work_dir) + + # We use this later either if we encounter unrecoverable OI2 errors + # or as the main output file we want to post-process in parse_output. + output_tsv = Path(out_dir, 'oi2.tsv') + + try: + run_container_and_log('Omics Integrator 2', + container_framework, + container_suffix, + command, + volumes, + work_dir) + except ContainerError as err: + needle = "all the input arrays must have same number of dimensions, but the array at index 0 has 2 dimension(s) and the array at index 1 has 1 dimension(s)" + if not err.streams_contain(needle): + raise err + else: + # https://github.com/Reed-CompBio/spras/issues/218 + # This error occurs when we have an empty dataframe passed to OI2. + Path(output_tsv).write_text("protein1\tprotein2\tcost\n") + # TODO do we want to retain other output files? # TODO if deleting other output files, write them all to a tmp directory and copy diff --git a/test/OmicsIntegrator2/input/empty/oi2-edges.txt b/test/OmicsIntegrator2/input/empty/oi2-edges.txt new file mode 100644 index 000000000..fc3c8cb19 --- /dev/null +++ b/test/OmicsIntegrator2/input/empty/oi2-edges.txt @@ -0,0 +1 @@ +protein1 protein2 cost diff --git a/test/OmicsIntegrator2/input/empty/oi2-prizes.txt b/test/OmicsIntegrator2/input/empty/oi2-prizes.txt new file mode 100644 index 000000000..5cb9a7529 --- /dev/null +++ b/test/OmicsIntegrator2/input/empty/oi2-prizes.txt @@ -0,0 +1 @@ +name prize diff --git a/test/OmicsIntegrator2/input/oi2-edges.txt b/test/OmicsIntegrator2/input/simple/oi2-edges.txt similarity index 100% rename from test/OmicsIntegrator2/input/oi2-edges.txt rename to test/OmicsIntegrator2/input/simple/oi2-edges.txt diff --git a/test/OmicsIntegrator2/input/oi2-prizes.txt b/test/OmicsIntegrator2/input/simple/oi2-prizes.txt similarity index 100% rename from test/OmicsIntegrator2/input/oi2-prizes.txt rename to test/OmicsIntegrator2/input/simple/oi2-prizes.txt diff --git a/test/OmicsIntegrator2/test_oi2.py b/test/OmicsIntegrator2/test_oi2.py index 2a0a3e3c1..3aba22966 100644 --- a/test/OmicsIntegrator2/test_oi2.py +++ b/test/OmicsIntegrator2/test_oi2.py @@ -8,11 +8,14 @@ config.init_from_file("config/config.yaml") -TEST_DIR = 'test/OmicsIntegrator2/' -EDGE_FILE = TEST_DIR+'input/oi2-edges.txt' -PRIZE_FILE = TEST_DIR+'input/oi2-prizes.txt' +TEST_DIR = Path('test', 'OmicsIntegrator2') OUT_FILE = Path(TEST_DIR, 'output', 'test.tsv') +EDGE_FILE = TEST_DIR / 'input' / 'simple' / 'oi2-edges.txt' +PRIZE_FILE = TEST_DIR / 'input' / 'simple' / 'oi2-prizes.txt' + +EDGE_FILE_EMPTY = TEST_DIR / 'input' / 'empty' / 'oi2-edges.txt' +PRIZE_FILE_EMPTY = TEST_DIR / 'input' / 'empty' / 'oi2-prizes.txt' class TestOmicsIntegrator2: """ @@ -58,6 +61,13 @@ def test_oi2_missing(self): OmicsIntegrator2.run(edges=EDGE_FILE, prizes=PRIZE_FILE) + def test_oi2_empty(self): + OUT_FILE.unlink(missing_ok=True) + OmicsIntegrator2.run(edges=EDGE_FILE_EMPTY, + prizes=PRIZE_FILE_EMPTY, + output_file=OUT_FILE) + assert OUT_FILE.exists() + # Only run Singularity test if the binary is available on the system # spython is only available on Unix, but do not explicitly skip non-Unix platforms @pytest.mark.skipif(not shutil.which('singularity'), reason='Singularity not found on system') From ea624e02ce3064fb2364e50e2bef6d94edcad01f Mon Sep 17 00:00:00 2001 From: "Tristan F.-R." Date: Fri, 13 Jun 2025 16:32:25 -0700 Subject: [PATCH 02/17] docs(domino): correct todo error msg --- spras/domino.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/spras/domino.py b/spras/domino.py index 86b9fd795..1609f7fb8 100644 --- a/spras/domino.py +++ b/spras/domino.py @@ -145,7 +145,7 @@ def run(network=None, active_genes=None, output_file=None, slice_threshold=None, volumes, work_dir) except ContainerError as err: - # TODO: can we more appropiately handle this case? Here, DOMINO + # TODO: is this too general of an error to handle? Here, DOMINO # still outputs to our output folder with a still viable HTML output. # https://github.com/Reed-CompBio/spras/pull/103#issuecomment-1681526958 if not err.streams_contain("ValueError: cannot apply union_all to an empty list"): From 13a549151121ff116430a28683ec97e90c281ed0 Mon Sep 17 00:00:00 2001 From: "Tristan F.-R." Date: Fri, 13 Jun 2025 16:33:34 -0700 Subject: [PATCH 03/17] docs: update cmt on using from None --- spras/containers.py | 13 +++++++------ spras/omicsintegrator2.py | 2 +- 2 files changed, 8 insertions(+), 7 deletions(-) diff --git a/spras/containers.py b/spras/containers.py index 4af3c036c..268f7eece 100644 --- a/spras/containers.py +++ b/spras/containers.py @@ -2,8 +2,8 @@ import platform import re import subprocess -from pathlib import Path, PurePath, PurePosixPath import textwrap +from pathlib import Path, PurePath, PurePosixPath from typing import Any, Dict, List, Optional, Tuple, Union import docker @@ -132,7 +132,7 @@ class ContainerError(RuntimeError): def __init__(self, message: str, error_code: int, stdout: Optional[str], stderr: Optional[str], *args): """ Constructs a new ContainerError. - + @param message: The message to display to the user. This should usually refer to the indent call to differentriate between general logging done by Snakemake/logging calls. @param error_code: Also known as exit status; this should generally be non-zero for ContainerErrors. @@ -147,14 +147,14 @@ def __init__(self, message: str, error_code: int, stdout: Optional[str], stderr: self.stdout = stdout self.stderr = stderr - super(ContainerError, self).__init__(message, error_code, stdout, stderr, *args) + super(ContainerError, self).__init__(message, error_code, stdout, stderr, *args) def streams_contain(self, snippet: str): stdout = self.stdout if self.stdout else '' stderr = self.stderr if self.stderr else '' return snippet in stdout or snippet in stderr - + # Due to # https://github.com/snakemake/snakemake/blob/d4890b4da691506b6a258f7534ac41fdb7ef5ab4/src/snakemake/exceptions.py#L18 # this overrides the tostr implementation to have nicer container errors @@ -227,14 +227,15 @@ def run_container_and_log(name: str, framework: str, container_suffix: str, comm except docker.errors.ContainerError as err: # TODO: does this lose us any information provided by stdout while the container was running? # ContainerError doesn't expose any stdout property. - + stderr = err.stderr if err.stderr else '' stderr = str(stderr, 'utf-8') if isinstance(stderr, bytes) else stderr message = textwrap.dedent(f'''\ (Command formatted as list: `{err.command}`) An unexpected non-zero exit status ({err.exit_status}) inside the docker image {err.image} occurred:\n''') + indent(stderr) - raise ContainerError(message, err.exit_status, None, stderr) + # We retrieved all of the information from docker.errors.ContainerError, so here, we ignore the original error. + raise ContainerError(message, err.exit_status, None, stderr) from None # TODO any issue with creating a new client each time inside this function? def run_container_docker(container: str, command: List[str], volumes: List[Tuple[PurePath, PurePath]], working_dir: str, environment: str = 'SPRAS=True'): diff --git a/spras/omicsintegrator2.py b/spras/omicsintegrator2.py index 02311b5f9..6d0478fa8 100644 --- a/spras/omicsintegrator2.py +++ b/spras/omicsintegrator2.py @@ -139,7 +139,7 @@ def run(edges=None, prizes=None, output_file=None, w=None, b=None, g=None, noise # https://github.com/Reed-CompBio/spras/issues/218 # This error occurs when we have an empty dataframe passed to OI2. Path(output_tsv).write_text("protein1\tprotein2\tcost\n") - + # TODO do we want to retain other output files? # TODO if deleting other output files, write them all to a tmp directory and copy From 00928765e7451a21223633265a4a16829c77a5b4 Mon Sep 17 00:00:00 2001 From: "Tristan F.-R." Date: Fri, 13 Jun 2025 16:35:11 -0700 Subject: [PATCH 04/17] style: fmt rcal --- spras/domino.py | 10 +++++----- spras/omicsintegrator2.py | 10 +++++----- 2 files changed, 10 insertions(+), 10 deletions(-) diff --git a/spras/domino.py b/spras/domino.py index 1609f7fb8..a92d9683d 100644 --- a/spras/domino.py +++ b/spras/domino.py @@ -139,11 +139,11 @@ def run(network=None, active_genes=None, output_file=None, slice_threshold=None, try: run_container_and_log('DOMINO', - container_framework, - container_suffix, - domino_command, - volumes, - work_dir) + container_framework, + container_suffix, + domino_command, + volumes, + work_dir) except ContainerError as err: # TODO: is this too general of an error to handle? Here, DOMINO # still outputs to our output folder with a still viable HTML output. diff --git a/spras/omicsintegrator2.py b/spras/omicsintegrator2.py index 6d0478fa8..9ea2576f1 100644 --- a/spras/omicsintegrator2.py +++ b/spras/omicsintegrator2.py @@ -126,11 +126,11 @@ def run(edges=None, prizes=None, output_file=None, w=None, b=None, g=None, noise try: run_container_and_log('Omics Integrator 2', - container_framework, - container_suffix, - command, - volumes, - work_dir) + container_framework, + container_suffix, + command, + volumes, + work_dir) except ContainerError as err: needle = "all the input arrays must have same number of dimensions, but the array at index 0 has 2 dimension(s) and the array at index 1 has 1 dimension(s)" if not err.streams_contain(needle): From 50dc14ed386b090f3452c57fcec2f3759c14a4f1 Mon Sep 17 00:00:00 2001 From: "Tristan F.-R." Date: Fri, 13 Jun 2025 17:00:18 -0700 Subject: [PATCH 05/17] fix: proper domino err handling --- spras/domino.py | 11 +++- .../input/empty/domino-active-genes.txt | 0 test/DOMINO/input/empty/domino-network.txt | 0 .../{ => simple}/domino-active-genes.txt | 0 .../input/{ => simple}/domino-network.txt | 0 .../input/small/domino-active-genes.txt | 2 + test/DOMINO/input/small/domino-network.txt | 4 ++ test/DOMINO/test_domino.py | 58 ++++++++++++------- 8 files changed, 52 insertions(+), 23 deletions(-) create mode 100644 test/DOMINO/input/empty/domino-active-genes.txt create mode 100644 test/DOMINO/input/empty/domino-network.txt rename test/DOMINO/input/{ => simple}/domino-active-genes.txt (100%) rename test/DOMINO/input/{ => simple}/domino-network.txt (100%) create mode 100644 test/DOMINO/input/small/domino-active-genes.txt create mode 100644 test/DOMINO/input/small/domino-network.txt diff --git a/spras/domino.py b/spras/domino.py index a92d9683d..a8ed80539 100644 --- a/spras/domino.py +++ b/spras/domino.py @@ -145,10 +145,15 @@ def run(network=None, active_genes=None, output_file=None, slice_threshold=None, volumes, work_dir) except ContainerError as err: - # TODO: is this too general of an error to handle? Here, DOMINO + # TODO: should we let DOMINO output an empty file here? Here, DOMINO # still outputs to our output folder with a still viable HTML output. # https://github.com/Reed-CompBio/spras/pull/103#issuecomment-1681526958 - if not err.streams_contain("ValueError: cannot apply union_all to an empty list"): + if err.streams_contain("ValueError: cannot apply union_all to an empty list"): + pass + # Occurs when DOMINO gets passed some empty dataframe. + elif err.streams_contain("pandas.errors.EmptyDataError: No columns to parse from file"): + pass + else: raise err # DOMINO creates a new folder in out_dir to output its modules HTML files into called active_genes @@ -165,7 +170,7 @@ def run(network=None, active_genes=None, output_file=None, slice_threshold=None, # Clean up DOMINO intermediate and pickle files slices_file.unlink(missing_ok=True) Path(out_dir, 'network.slices.pkl').unlink(missing_ok=True) - Path(network + '.pkl').unlink(missing_ok=True) + Path(str(network) + '.pkl').unlink(missing_ok=True) @staticmethod def parse_output(raw_pathway_file, standardized_pathway_file): diff --git a/test/DOMINO/input/empty/domino-active-genes.txt b/test/DOMINO/input/empty/domino-active-genes.txt new file mode 100644 index 000000000..e69de29bb diff --git a/test/DOMINO/input/empty/domino-network.txt b/test/DOMINO/input/empty/domino-network.txt new file mode 100644 index 000000000..e69de29bb diff --git a/test/DOMINO/input/domino-active-genes.txt b/test/DOMINO/input/simple/domino-active-genes.txt similarity index 100% rename from test/DOMINO/input/domino-active-genes.txt rename to test/DOMINO/input/simple/domino-active-genes.txt diff --git a/test/DOMINO/input/domino-network.txt b/test/DOMINO/input/simple/domino-network.txt similarity index 100% rename from test/DOMINO/input/domino-network.txt rename to test/DOMINO/input/simple/domino-network.txt diff --git a/test/DOMINO/input/small/domino-active-genes.txt b/test/DOMINO/input/small/domino-active-genes.txt new file mode 100644 index 000000000..167225fce --- /dev/null +++ b/test/DOMINO/input/small/domino-active-genes.txt @@ -0,0 +1,2 @@ +A +C \ No newline at end of file diff --git a/test/DOMINO/input/small/domino-network.txt b/test/DOMINO/input/small/domino-network.txt new file mode 100644 index 000000000..2e4bcdd08 --- /dev/null +++ b/test/DOMINO/input/small/domino-network.txt @@ -0,0 +1,4 @@ +ID_interactor_A ppi ID_interactor_b +A ppi B +B ppi C +C ppi B \ No newline at end of file diff --git a/test/DOMINO/test_domino.py b/test/DOMINO/test_domino.py index 7f09fa975..efe017084 100644 --- a/test/DOMINO/test_domino.py +++ b/test/DOMINO/test_domino.py @@ -9,9 +9,9 @@ config.init_from_file("config/config.yaml") -TEST_DIR = 'test/DOMINO/' -OUT_FILE_DEFAULT = TEST_DIR+'output/domino-output.txt' -OUT_FILE_OPTIONAL = TEST_DIR+'output/domino-output-thresholds.txt' +TEST_DIR = Path('test', 'DOMINO') +OUT_FILE_DEFAULT = TEST_DIR / 'output' / 'domino-output.txt' +OUT_FILE_OPTIONAL = TEST_DIR / 'output' / 'domino-output-thresholds.txt' class TestDOMINO: @@ -26,34 +26,32 @@ class TestDOMINO: def test_domino_required(self): # Only include required arguments - out_path = Path(OUT_FILE_DEFAULT) - out_path.unlink(missing_ok=True) + OUT_FILE_DEFAULT.unlink(missing_ok=True) DOMINO.run( - network=TEST_DIR+'input/domino-network.txt', - active_genes=TEST_DIR+'input/domino-active-genes.txt', + network=TEST_DIR / 'input' / 'simple' / 'domino-network.txt', + active_genes=TEST_DIR / 'input' / 'simple' / 'domino-active-genes.txt', output_file=OUT_FILE_DEFAULT) # output_file should be empty - assert out_path.exists() + assert OUT_FILE_DEFAULT.exists() def test_domino_optional(self): # Include optional arguments - out_path = Path(OUT_FILE_OPTIONAL) - out_path.unlink(missing_ok=True) + OUT_FILE_DEFAULT.unlink(missing_ok=True) DOMINO.run( - network=TEST_DIR+'input/domino-network.txt', - active_genes=TEST_DIR+'input/domino-active-genes.txt', + network=TEST_DIR / 'input' / 'simple' / 'domino-network.txt', + active_genes=TEST_DIR / 'input' / 'simple' / 'domino-active-genes.txt', output_file=OUT_FILE_OPTIONAL, slice_threshold=0.4, module_threshold=0.06) # output_file should be empty - assert out_path.exists() + assert OUT_FILE_OPTIONAL.exists() def test_domino_missing_active_genes(self): # Test the expected error is raised when active_genes argument is missing with pytest.raises(ValueError): # No active_genes DOMINO.run( - network=TEST_DIR+'input/domino-network.txt', + network=TEST_DIR / 'input' / 'simple' / 'domino-network.txt', output_file=OUT_FILE_DEFAULT) def test_domino_missing_network(self): @@ -61,22 +59,42 @@ def test_domino_missing_network(self): with pytest.raises(ValueError): # No network DOMINO.run( - active_genes=TEST_DIR+'input/domino-active-genes.txt', + active_genes=TEST_DIR / 'input' / 'simple' / 'domino-active-genes.txt', output_file=OUT_FILE_DEFAULT) + def test_domino_small(self): + # Test over small files + # https://github.com/Reed-CompBio/spras/pull/103#issuecomment-1681526958 + OUT_FILE_DEFAULT.unlink(missing_ok=True) + DOMINO.run( + network=TEST_DIR / 'input' / 'small' / 'domino-network.txt', + active_genes=TEST_DIR / 'input' / 'small' / 'domino-active-genes.txt', + output_file=OUT_FILE_DEFAULT) + assert OUT_FILE_DEFAULT.exists() + + def test_domino_empty(self): + # Test over empty files + # https://github.com/Reed-CompBio/spras/pull/103#issuecomment-1681526958 + OUT_FILE_DEFAULT.unlink(missing_ok=True) + DOMINO.run( + network=TEST_DIR / 'input' / 'empty' / 'domino-network.txt', + active_genes=TEST_DIR / 'input' / 'empty' / 'domino-active-genes.txt', + output_file=OUT_FILE_DEFAULT) + assert OUT_FILE_DEFAULT.exists() + + # Only run Singularity test if the binary is available on the system # spython is only available on Unix, but do not explicitly skip non-Unix platforms @pytest.mark.skipif(not shutil.which('singularity'), reason='Singularity not found on system') def test_domino_singularity(self): - out_path = Path(OUT_FILE_DEFAULT) - out_path.unlink(missing_ok=True) + OUT_FILE_DEFAULT.unlink(missing_ok=True) # Only include required arguments and run with Singularity DOMINO.run( - network=TEST_DIR+'input/domino-network.txt', - active_genes=TEST_DIR+'input/domino-active-genes.txt', + network=TEST_DIR / 'input' / 'simple' / 'domino-network.txt', + active_genes=TEST_DIR / 'input' / 'simple' / 'domino-active-genes.txt', output_file=OUT_FILE_DEFAULT, container_framework="singularity") - assert out_path.exists() + assert OUT_FILE_DEFAULT.exists() def test_pre_id_transform(self): """ From 3b6acd709b6818b3e04e1007e04bf0a839e2e5c6 Mon Sep 17 00:00:00 2001 From: "Tristan F.-R." Date: Mon, 16 Jun 2025 16:30:20 +0000 Subject: [PATCH 06/17] fix: wrap slicer in try-except --- spras/containers.py | 2 +- spras/domino.py | 27 +++++++++++++++++---------- 2 files changed, 18 insertions(+), 11 deletions(-) diff --git a/spras/containers.py b/spras/containers.py index 268f7eece..ac776f132 100644 --- a/spras/containers.py +++ b/spras/containers.py @@ -153,7 +153,7 @@ def streams_contain(self, snippet: str): stdout = self.stdout if self.stdout else '' stderr = self.stderr if self.stderr else '' - return snippet in stdout or snippet in stderr + return (snippet in stdout) or (snippet in stderr) # Due to # https://github.com/snakemake/snakemake/blob/d4890b4da691506b6a258f7534ac41fdb7ef5ab4/src/snakemake/exceptions.py#L18 diff --git a/spras/domino.py b/spras/domino.py index a8ed80539..5427b009a 100644 --- a/spras/domino.py +++ b/spras/domino.py @@ -113,12 +113,19 @@ def run(network=None, active_genes=None, output_file=None, slice_threshold=None, '--output_file', mapped_slices_file] container_suffix = "domino" - run_container_and_log('slicer', - container_framework, - container_suffix, - slicer_command, - volumes, - work_dir) + try: + run_container_and_log('slicer', + container_framework, + container_suffix, + slicer_command, + volumes, + work_dir) + except ContainerError as err: + # Occurs when DOMINO gets passed some empty dataframe. + if err.streams_contain("pandas.errors.EmptyDataError: No columns to parse from file"): + pass + else: + raise err # Make the Python command to run within the container domino_command = ['domino', @@ -145,13 +152,13 @@ def run(network=None, active_genes=None, output_file=None, slice_threshold=None, volumes, work_dir) except ContainerError as err: + # Occurs when DOMINO gets passed some empty dataframe. + if err.streams_contain("pandas.errors.EmptyDataError: No columns to parse from file"): + pass # TODO: should we let DOMINO output an empty file here? Here, DOMINO # still outputs to our output folder with a still viable HTML output. # https://github.com/Reed-CompBio/spras/pull/103#issuecomment-1681526958 - if err.streams_contain("ValueError: cannot apply union_all to an empty list"): - pass - # Occurs when DOMINO gets passed some empty dataframe. - elif err.streams_contain("pandas.errors.EmptyDataError: No columns to parse from file"): + elif err.streams_contain("ValueError: cannot apply union_all to an empty list"): pass else: raise err From 7ce6467f66700cbbfa2859df8220e95b8803ad46 Mon Sep 17 00:00:00 2001 From: "Tristan F." Date: Tue, 17 Jun 2025 19:55:56 +0000 Subject: [PATCH 07/17] chore: drop oi2 error handling --- spras/omicsintegrator2.py | 21 ++++++------------- .../input/empty/oi2-edges.txt | 1 - .../input/empty/oi2-prizes.txt | 1 - test/OmicsIntegrator2/test_oi2.py | 10 --------- 4 files changed, 6 insertions(+), 27 deletions(-) delete mode 100644 test/OmicsIntegrator2/input/empty/oi2-edges.txt delete mode 100644 test/OmicsIntegrator2/input/empty/oi2-prizes.txt diff --git a/spras/omicsintegrator2.py b/spras/omicsintegrator2.py index 9ea2576f1..040a3f247 100644 --- a/spras/omicsintegrator2.py +++ b/spras/omicsintegrator2.py @@ -124,21 +124,12 @@ def run(edges=None, prizes=None, output_file=None, w=None, b=None, g=None, noise # or as the main output file we want to post-process in parse_output. output_tsv = Path(out_dir, 'oi2.tsv') - try: - run_container_and_log('Omics Integrator 2', - container_framework, - container_suffix, - command, - volumes, - work_dir) - except ContainerError as err: - needle = "all the input arrays must have same number of dimensions, but the array at index 0 has 2 dimension(s) and the array at index 1 has 1 dimension(s)" - if not err.streams_contain(needle): - raise err - else: - # https://github.com/Reed-CompBio/spras/issues/218 - # This error occurs when we have an empty dataframe passed to OI2. - Path(output_tsv).write_text("protein1\tprotein2\tcost\n") + run_container_and_log('Omics Integrator 2', + container_framework, + container_suffix, + command, + volumes, + work_dir) # TODO do we want to retain other output files? diff --git a/test/OmicsIntegrator2/input/empty/oi2-edges.txt b/test/OmicsIntegrator2/input/empty/oi2-edges.txt deleted file mode 100644 index fc3c8cb19..000000000 --- a/test/OmicsIntegrator2/input/empty/oi2-edges.txt +++ /dev/null @@ -1 +0,0 @@ -protein1 protein2 cost diff --git a/test/OmicsIntegrator2/input/empty/oi2-prizes.txt b/test/OmicsIntegrator2/input/empty/oi2-prizes.txt deleted file mode 100644 index 5cb9a7529..000000000 --- a/test/OmicsIntegrator2/input/empty/oi2-prizes.txt +++ /dev/null @@ -1 +0,0 @@ -name prize diff --git a/test/OmicsIntegrator2/test_oi2.py b/test/OmicsIntegrator2/test_oi2.py index 3aba22966..b26e2a0b3 100644 --- a/test/OmicsIntegrator2/test_oi2.py +++ b/test/OmicsIntegrator2/test_oi2.py @@ -14,9 +14,6 @@ EDGE_FILE = TEST_DIR / 'input' / 'simple' / 'oi2-edges.txt' PRIZE_FILE = TEST_DIR / 'input' / 'simple' / 'oi2-prizes.txt' -EDGE_FILE_EMPTY = TEST_DIR / 'input' / 'empty' / 'oi2-edges.txt' -PRIZE_FILE_EMPTY = TEST_DIR / 'input' / 'empty' / 'oi2-prizes.txt' - class TestOmicsIntegrator2: """ Run Omics Integrator 2 in the Docker image @@ -61,13 +58,6 @@ def test_oi2_missing(self): OmicsIntegrator2.run(edges=EDGE_FILE, prizes=PRIZE_FILE) - def test_oi2_empty(self): - OUT_FILE.unlink(missing_ok=True) - OmicsIntegrator2.run(edges=EDGE_FILE_EMPTY, - prizes=PRIZE_FILE_EMPTY, - output_file=OUT_FILE) - assert OUT_FILE.exists() - # Only run Singularity test if the binary is available on the system # spython is only available on Unix, but do not explicitly skip non-Unix platforms @pytest.mark.skipif(not shutil.which('singularity'), reason='Singularity not found on system') From bf9ed60c8b04f398419b9c3b4cd80abd109a8019 Mon Sep 17 00:00:00 2001 From: "Tristan F." Date: Sat, 23 Aug 2025 20:12:54 +0000 Subject: [PATCH 08/17] docs: solidify DOMINO wrapping details --- docs/prms/domino.rst | 6 ++++++ spras/domino.py | 2 +- 2 files changed, 7 insertions(+), 1 deletion(-) diff --git a/docs/prms/domino.rst b/docs/prms/domino.rst index a8dac732c..f562cf449 100644 --- a/docs/prms/domino.rst +++ b/docs/prms/domino.rst @@ -8,3 +8,9 @@ DOMINO has two optional parameters: * slice_threshold: the p-value threshold for considering a slice as relevant * module_threshold: the p-value threshold for considering a putative module as final module + +Wrapper Details +=============== + +If the input dataframe is empty or too 'small' (where no modules are found), +SPRAS will instead emit an empty output file. diff --git a/spras/domino.py b/spras/domino.py index b15c76af8..181ae1b83 100644 --- a/spras/domino.py +++ b/spras/domino.py @@ -154,7 +154,7 @@ def run(network=None, active_genes=None, output_file=None, slice_threshold=None, # Occurs when DOMINO gets passed some empty dataframe. if err.streams_contain("pandas.errors.EmptyDataError: No columns to parse from file"): pass - # TODO: should we let DOMINO output an empty file here? Here, DOMINO + # Here, DOMINO # still outputs to our output folder with a still viable HTML output. # https://github.com/Reed-CompBio/spras/pull/103#issuecomment-1681526958 elif err.streams_contain("ValueError: cannot apply union_all to an empty list"): From e3c03faa8444e2b35438a92f6e6dbf2da2b6070e Mon Sep 17 00:00:00 2001 From: "Tristan F.-R." Date: Fri, 5 Sep 2025 14:12:17 -0700 Subject: [PATCH 09/17] docs: typo --- spras/containers.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/spras/containers.py b/spras/containers.py index 03af7b1f7..b8c8372f9 100644 --- a/spras/containers.py +++ b/spras/containers.py @@ -133,7 +133,7 @@ def __init__(self, message: str, error_code: int, stdout: Optional[str], stderr: """ Constructs a new ContainerError. - @param message: The message to display to the user. This should usually refer to the indent call to differentriate between + @param message: The message to display to the user. This should usually refer to the indent call to differentiate between general logging done by Snakemake/logging calls. @param error_code: Also known as exit status; this should generally be non-zero for ContainerErrors. @param stdout: The standard output stream. If the origin of the stream is unknown, leave it in stdout. From ddebead35fdb4ed60b0226348ce5b18bfdfef430 Mon Sep 17 00:00:00 2001 From: "Tristan F.-R." Date: Fri, 5 Sep 2025 14:13:00 -0700 Subject: [PATCH 10/17] docs: typo --- spras/containers.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/spras/containers.py b/spras/containers.py index b8c8372f9..c16913173 100644 --- a/spras/containers.py +++ b/spras/containers.py @@ -222,7 +222,7 @@ def run_container_and_log(name: str, framework: str, container_suffix: str, comm exit_status = int(out['return_code']) if 'return_code' in out else 0 out = ''.join(out['message']) if exit_status != 0: - message = f'An unexpected non-zero exit status ({exit_status}) occured while running this singularity container:\n' + indent(out) + message = f'An unexpected non-zero exit status ({exit_status}) occurred while running this singularity container:\n' + indent(out) raise ContainerError(message, exit_status, out, None) else: print("Note: The following output is an unknown message format which should be properly handled.") From f046e00127c0e71d63ff8c1e79008ee9ff013fa0 Mon Sep 17 00:00:00 2001 From: "Tristan F.-R." Date: Fri, 5 Sep 2025 21:21:44 +0000 Subject: [PATCH 11/17] partial review addressing --- spras/containers.py | 8 ++++++-- spras/omicsintegrator2.py | 4 ---- test/DOMINO/test_domino.py | 2 +- test/OmicsIntegrator2/test_oi2.py | 2 -- 4 files changed, 7 insertions(+), 9 deletions(-) diff --git a/spras/containers.py b/spras/containers.py index c16913173..97fdc4d38 100644 --- a/spras/containers.py +++ b/spras/containers.py @@ -149,11 +149,15 @@ def __init__(self, message: str, error_code: int, stdout: Optional[str], stderr: super(ContainerError, self).__init__(message, error_code, stdout, stderr, *args) - def streams_contain(self, snippet: str): + def streams_contain(self, needle: str): + """ + Checks (with case sensitivity) + if any of the stdout/err streams have the provided needle. + """ stdout = self.stdout if self.stdout else '' stderr = self.stderr if self.stderr else '' - return (snippet in stdout) or (snippet in stderr) + return (needle in stdout) or (needle in stderr) # Due to # https://github.com/snakemake/snakemake/blob/d4890b4da691506b6a258f7534ac41fdb7ef5ab4/src/snakemake/exceptions.py#L18 diff --git a/spras/omicsintegrator2.py b/spras/omicsintegrator2.py index 6d6046b03..51635f640 100644 --- a/spras/omicsintegrator2.py +++ b/spras/omicsintegrator2.py @@ -130,10 +130,6 @@ def run(edges=None, prizes=None, output_file=None, w=None, b=None, g=None, noise container_suffix = "omics-integrator-2:v2" - # We use this later either if we encounter unrecoverable OI2 errors - # or as the main output file we want to post-process in parse_output. - output_tsv = Path(out_dir, 'oi2.tsv') - run_container_and_log('Omics Integrator 2', container_framework, container_suffix, diff --git a/test/DOMINO/test_domino.py b/test/DOMINO/test_domino.py index 8a00b078d..dd0e8d10e 100644 --- a/test/DOMINO/test_domino.py +++ b/test/DOMINO/test_domino.py @@ -35,7 +35,7 @@ def test_domino_required(self): def test_domino_optional(self): # Include optional arguments - OUT_FILE_DEFAULT.unlink(missing_ok=True) + OUT_FILE_OPTIONAL.unlink(missing_ok=True) DOMINO.run( network=TEST_DIR / 'input' / 'simple' / 'domino-network.txt', active_genes=TEST_DIR / 'input' / 'simple' / 'domino-active-genes.txt', diff --git a/test/OmicsIntegrator2/test_oi2.py b/test/OmicsIntegrator2/test_oi2.py index 86eca8606..85a408689 100644 --- a/test/OmicsIntegrator2/test_oi2.py +++ b/test/OmicsIntegrator2/test_oi2.py @@ -9,8 +9,6 @@ config.init_from_file("config/config.yaml") TEST_DIR = Path('test', 'OmicsIntegrator2') -EDGE_FILE = TEST_DIR / 'input' / 'oi2-edges.txt' -PRIZE_FILE = TEST_DIR / 'input' / 'oi2-prizes.txt' OUT_FILE = TEST_DIR / 'output' / 'test.tsv' EDGE_FILE = TEST_DIR / 'input' / 'simple' / 'oi2-edges.txt' From 4652fe34862a10ddada5cbd9d724dfaca399b891 Mon Sep 17 00:00:00 2001 From: "Tristan F.-R." Date: Fri, 5 Sep 2025 21:28:59 +0000 Subject: [PATCH 12/17] docs: patch dockerpy later? --- spras/containers.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/spras/containers.py b/spras/containers.py index 97fdc4d38..3ca60fc66 100644 --- a/spras/containers.py +++ b/spras/containers.py @@ -236,8 +236,10 @@ def run_container_and_log(name: str, framework: str, container_suffix: str, comm out = str(out, "utf-8") print(indent(out)) except docker.errors.ContainerError as err: - # TODO: does this lose us any information provided by stdout while the container was running? - # ContainerError doesn't expose any stdout property. + # TODO: do we want to patch dockerpy to preserve stdout on ContainerError? + # ContainerError doesn't expose any stdout property, since it never gets stdout passed in in the first place: + # https://github.com/docker/docker-py/blob/6e6a273573fe77f00776b30de0685162a102e43f/docker/models/containers.py#L897-L908 + # (Specifically, out = container.logs(stdout=False, stderr=True) erases the stdout usually attached to the local `out` variable in the above code.) stderr = err.stderr if err.stderr else '' stderr = str(stderr, 'utf-8') if isinstance(stderr, bytes) else stderr From f68fee915e21e3a1de6eee3ed2961bcc2eda2640 Mon Sep 17 00:00:00 2001 From: "Tristan F.-R." Date: Fri, 5 Sep 2025 21:37:50 +0000 Subject: [PATCH 13/17] docs: link to spras issue --- spras/containers.py | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/spras/containers.py b/spras/containers.py index 3ca60fc66..5263c92c1 100644 --- a/spras/containers.py +++ b/spras/containers.py @@ -236,10 +236,8 @@ def run_container_and_log(name: str, framework: str, container_suffix: str, comm out = str(out, "utf-8") print(indent(out)) except docker.errors.ContainerError as err: - # TODO: do we want to patch dockerpy to preserve stdout on ContainerError? - # ContainerError doesn't expose any stdout property, since it never gets stdout passed in in the first place: - # https://github.com/docker/docker-py/blob/6e6a273573fe77f00776b30de0685162a102e43f/docker/models/containers.py#L897-L908 - # (Specifically, out = container.logs(stdout=False, stderr=True) erases the stdout usually attached to the local `out` variable in the above code.) + # TODO: dockerpy does not share stdout on error. + # https://github.com/Reed-CompBio/spras/issues/388 stderr = err.stderr if err.stderr else '' stderr = str(stderr, 'utf-8') if isinstance(stderr, bytes) else stderr From 33814c1d115245d495bd83dc79b550764a87b5e6 Mon Sep 17 00:00:00 2001 From: "Tristan F." Date: Fri, 5 Sep 2025 22:09:46 -0700 Subject: [PATCH 14/17] docs: cmt --- spras/domino.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/spras/domino.py b/spras/domino.py index 181ae1b83..07eea0810 100644 --- a/spras/domino.py +++ b/spras/domino.py @@ -120,7 +120,8 @@ def run(network=None, active_genes=None, output_file=None, slice_threshold=None, volumes, work_dir) except ContainerError as err: - # Occurs when DOMINO gets passed some empty dataframe. + # Occurs when DOMINO gets passed some empty dataframe from network_file. + # This counts as an empty input, so we return an empty output. if err.streams_contain("pandas.errors.EmptyDataError: No columns to parse from file"): pass else: @@ -151,7 +152,8 @@ def run(network=None, active_genes=None, output_file=None, slice_threshold=None, volumes, work_dir) except ContainerError as err: - # Occurs when DOMINO gets passed some empty dataframe. + # Occurs when DOMINO gets passed some empty dataframe from network_file. + # This counts as an empty input, so we return an empty output. if err.streams_contain("pandas.errors.EmptyDataError: No columns to parse from file"): pass # Here, DOMINO From e2819a9724b7360bf40dad29d234886d106358c9 Mon Sep 17 00:00:00 2001 From: "Tristan F." Date: Sat, 6 Sep 2025 08:04:11 +0000 Subject: [PATCH 15/17] fix: access stdout via container logs --- spras/containers.py | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) diff --git a/spras/containers.py b/spras/containers.py index 5263c92c1..4679f3c8c 100644 --- a/spras/containers.py +++ b/spras/containers.py @@ -236,17 +236,14 @@ def run_container_and_log(name: str, framework: str, container_suffix: str, comm out = str(out, "utf-8") print(indent(out)) except docker.errors.ContainerError as err: - # TODO: dockerpy does not share stdout on error. - # https://github.com/Reed-CompBio/spras/issues/388 - - stderr = err.stderr if err.stderr else '' - stderr = str(stderr, 'utf-8') if isinstance(stderr, bytes) else stderr + stdout = str(err.container.logs(stdout=True, stderr=False), 'utf-8') + stderr = str(err.container.logs(stdout=False, stderr=True), 'utf-8') message = textwrap.dedent(f'''\ (Command formatted as list: `{err.command}`) An unexpected non-zero exit status ({err.exit_status}) inside the docker image {err.image} occurred:\n''') + indent(stderr) # We retrieved all of the information from docker.errors.ContainerError, so here, we ignore the original error. - raise ContainerError(message, err.exit_status, None, stderr) from None + raise ContainerError(message, err.exit_status, stdout, stderr) from None # TODO any issue with creating a new client each time inside this function? def run_container_docker(container: str, command: List[str], volumes: List[Tuple[PurePath, PurePath]], working_dir: str, environment: Optional[dict[str, str]] = None): From 93606b3275e80776151be1ee2eda68ea8c204d4f Mon Sep 17 00:00:00 2001 From: Tristan Figueroa-Reid Date: Wed, 8 Oct 2025 20:10:06 +0000 Subject: [PATCH 16/17] chore: rm domino small test --- test/DOMINO/test_domino.py | 10 ---------- 1 file changed, 10 deletions(-) diff --git a/test/DOMINO/test_domino.py b/test/DOMINO/test_domino.py index dd0e8d10e..849ab1aef 100644 --- a/test/DOMINO/test_domino.py +++ b/test/DOMINO/test_domino.py @@ -61,16 +61,6 @@ def test_domino_missing_network(self): active_genes=TEST_DIR / 'input' / 'simple' / 'domino-active-genes.txt', output_file=OUT_FILE_DEFAULT) - def test_domino_small(self): - # Test over small files - # https://github.com/Reed-CompBio/spras/pull/103#issuecomment-1681526958 - OUT_FILE_DEFAULT.unlink(missing_ok=True) - DOMINO.run( - network=TEST_DIR / 'input' / 'small' / 'domino-network.txt', - active_genes=TEST_DIR / 'input' / 'small' / 'domino-active-genes.txt', - output_file=OUT_FILE_DEFAULT) - assert OUT_FILE_DEFAULT.exists() - def test_domino_empty(self): # Test over empty files # https://github.com/Reed-CompBio/spras/pull/103#issuecomment-1681526958 From 6873ee88e051a85214ce3a8c50da1d4077714a5b Mon Sep 17 00:00:00 2001 From: "Tristan F.-R." Date: Sat, 11 Oct 2025 00:49:51 +0000 Subject: [PATCH 17/17] test: rm small --- test/DOMINO/input/small/domino-active-genes.txt | 2 -- test/DOMINO/input/small/domino-network.txt | 4 ---- 2 files changed, 6 deletions(-) delete mode 100644 test/DOMINO/input/small/domino-active-genes.txt delete mode 100644 test/DOMINO/input/small/domino-network.txt diff --git a/test/DOMINO/input/small/domino-active-genes.txt b/test/DOMINO/input/small/domino-active-genes.txt deleted file mode 100644 index 167225fce..000000000 --- a/test/DOMINO/input/small/domino-active-genes.txt +++ /dev/null @@ -1,2 +0,0 @@ -A -C \ No newline at end of file diff --git a/test/DOMINO/input/small/domino-network.txt b/test/DOMINO/input/small/domino-network.txt deleted file mode 100644 index 2e4bcdd08..000000000 --- a/test/DOMINO/input/small/domino-network.txt +++ /dev/null @@ -1,4 +0,0 @@ -ID_interactor_A ppi ID_interactor_b -A ppi B -B ppi C -C ppi B \ No newline at end of file