From 0b48518b30652dd56f53798a2b72c0da57ac63b7 Mon Sep 17 00:00:00 2001 From: Simeon Carstens Date: Tue, 16 Jan 2024 15:35:02 +0100 Subject: [PATCH 001/163] `args.time_delay` -> `args.run.time_delay` in `Runner` --- looper/looper.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/looper/looper.py b/looper/looper.py index 32e97a0d8..e6c10718e 100755 --- a/looper/looper.py +++ b/looper/looper.py @@ -400,7 +400,7 @@ def __call__(self, args, rerun=False, **compute_kwargs): pipeline_interface=piface, prj=self.prj, compute_variables=comp_vars, - delay=args.time_delay, + delay=args.run.time_delay, extra_args=args.command_extra, extra_args_override=args.command_extra_override, ignore_flags=args.ignore_flags, From 412f3e0f18d38ad008badc20a6aa7c2b76e8fc4d Mon Sep 17 00:00:00 2001 From: Simeon Carstens Date: Tue, 16 Jan 2024 15:52:27 +0100 Subject: [PATCH 002/163] Uncomment `enrich_args()` call --- looper/cli_pydantic.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/looper/cli_pydantic.py b/looper/cli_pydantic.py index 07c0e8552..34ac756af 100644 --- a/looper/cli_pydantic.py +++ b/looper/cli_pydantic.py @@ -57,7 +57,7 @@ def main() -> None: f"Looper config file does not exist. Use looper init to create one at {looper_cfg_path}." ) - args = enrich_args_via_cfg(args, parser, False) + # args = enrich_args_via_cfg(args, parser, False) divcfg = ( select_divvy_config(filepath=args.divvy) if hasattr(args, "divvy") else None ) From 146ce169bceef77993405e18df35a40edb7436d1 Mon Sep 17 00:00:00 2001 From: Simeon Carstens Date: Tue, 16 Jan 2024 15:52:48 +0100 Subject: [PATCH 003/163] Fix `lump` argument type --- looper/command_models/arguments.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/looper/command_models/arguments.py b/looper/command_models/arguments.py index 9c4f9c007..5367f5cc3 100644 --- a/looper/command_models/arguments.py +++ b/looper/command_models/arguments.py @@ -95,7 +95,7 @@ class ArgumentEnum(enum.Enum): ) LUMP = Argument( name="lump", - default=(int, None), + default=(float, None), description="Total input file size (GB) to batch into one job", ) LUMPN = Argument( From 7068f1c3cb24d5c38a564814ade851f30f2c0c54 Mon Sep 17 00:00:00 2001 From: Simeon Carstens Date: Tue, 16 Jan 2024 15:53:19 +0100 Subject: [PATCH 004/163] Fix argument accessions in `Runner` --- looper/looper.py | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/looper/looper.py b/looper/looper.py index e6c10718e..38d17d02d 100755 --- a/looper/looper.py +++ b/looper/looper.py @@ -401,11 +401,11 @@ def __call__(self, args, rerun=False, **compute_kwargs): prj=self.prj, compute_variables=comp_vars, delay=args.run.time_delay, - extra_args=args.command_extra, - extra_args_override=args.command_extra_override, - ignore_flags=args.ignore_flags, - max_cmds=args.lumpn, - max_size=args.lump, + extra_args=args.run.command_extra, + extra_args_override=args.run.command_extra_override, + ignore_flags=args.run.ignore_flags, + max_cmds=args.run.lumpn, + max_size=args.run.lump, ) submission_conductors[piface.pipe_iface_file] = conductor @@ -414,7 +414,7 @@ def __call__(self, args, rerun=False, **compute_kwargs): self.prj.pipestat_configured_project or self.prj.pipestat_configured ) - for sample in select_samples(prj=self.prj, args=args): + for sample in select_samples(prj=self.prj, args=args.run): pl_fails = [] skip_reasons = [] sample_pifaces = self.prj.get_sample_piface( @@ -486,7 +486,7 @@ def __call__(self, args, rerun=False, **compute_kwargs): ) _LOGGER.info("Commands submitted: {} of {}".format(cmd_sub_total, max_cmds)) self.debug[DEBUG_COMMANDS] = "{} of {}".format(cmd_sub_total, max_cmds) - if args.dry_run: + if args.run.dry_run: job_sub_total_if_real = job_sub_total job_sub_total = 0 _LOGGER.info( From 0037c02afcbb7f7c0107e5c07e15456b1bf2516e Mon Sep 17 00:00:00 2001 From: Simeon Carstens Date: Tue, 16 Jan 2024 15:55:00 +0100 Subject: [PATCH 005/163] Print separator line for better visibility --- looper/cli_pydantic.py | 1 + 1 file changed, 1 insertion(+) diff --git a/looper/cli_pydantic.py b/looper/cli_pydantic.py index 34ac756af..6128b811c 100644 --- a/looper/cli_pydantic.py +++ b/looper/cli_pydantic.py @@ -41,6 +41,7 @@ def main() -> None: ) args = parser.parse_typed_args() print(args) + print("#########################################") # here comes adapted `cli_looper.py` code looper_cfg_path = os.path.relpath(dotfile_path(), start=os.curdir) From 081acc43aef154b49a9f11139ab2548e1a69f444 Mon Sep 17 00:00:00 2001 From: Simeon Carstens Date: Wed, 17 Jan 2024 10:55:59 +0100 Subject: [PATCH 006/163] Formatting changes --- looper/cli_pydantic.py | 3 +-- looper/command_models/arguments.py | 22 ++++++++-------------- looper/command_models/commands.py | 11 ++++++++--- 3 files changed, 17 insertions(+), 19 deletions(-) diff --git a/looper/cli_pydantic.py b/looper/cli_pydantic.py index 6128b811c..4d11c61b3 100644 --- a/looper/cli_pydantic.py +++ b/looper/cli_pydantic.py @@ -8,12 +8,11 @@ from pephubclient import PEPHubClient from ubiquerg import VersionInHelpParser -from .command_models.commands import TopLevelParser - from divvy import select_divvy_config from . import __version__ from .cli_looper import _proc_resources_spec +from .command_models.commands import TopLevelParser from .const import * from .divvy import DEFAULT_COMPUTE_RESOURCES_NAME, select_divvy_config from .exceptions import * diff --git a/looper/command_models/arguments.py b/looper/command_models/arguments.py index 5367f5cc3..53389d7b6 100644 --- a/looper/command_models/arguments.py +++ b/looper/command_models/arguments.py @@ -1,5 +1,5 @@ -from copy import copy import enum +from copy import copy from typing import Any import pydantic @@ -117,7 +117,7 @@ class ArgumentEnum(enum.Enum): SETTINGS = Argument( name="settings", default=(str, ""), - description="Path to a YAML settings file with compute settings" + description="Path to a YAML settings file with compute settings", ) PEP_CONFIG = Argument( name="pep_config", @@ -132,25 +132,19 @@ class ArgumentEnum(enum.Enum): SAMPLE_PIPELINE_INTERFACES = Argument( name="sample_pipeline_interfaces", default=(list, []), - description="Paths to looper sample config files" + description="Paths to looper sample config files", ) PROJECT_PIPELINE_INTERFACES = Argument( name="project_pipeline_interfaces", default=(list, []), - description="Paths to looper project config files" - ) + description="Paths to looper project config files", + ) AMEND = Argument( - name="amend", - default=(list, []), - description="List of amendments to activate" + name="amend", default=(list, []), description="List of amendments to activate" ) SEL_FLAG = Argument( - name="sel_flag", - default=(str, ""), - description="Sample selection flag" + name="sel_flag", default=(str, ""), description="Sample selection flag" ) EXC_FLAG = Argument( - name="exc_flag", - default=(str, ""), - description="Sample exclusion flag" + name="exc_flag", default=(str, ""), description="Sample exclusion flag" ) diff --git a/looper/command_models/commands.py b/looper/command_models/commands.py index ec66ac0ab..c1621f9ff 100644 --- a/looper/command_models/commands.py +++ b/looper/command_models/commands.py @@ -3,8 +3,9 @@ import pydantic -from .arguments import Argument, ArgumentEnum from ..const import MESSAGE_BY_SUBCOMMAND +from .arguments import Argument, ArgumentEnum + @dataclass class Command: @@ -69,8 +70,12 @@ class TopLevelParser(pydantic.BaseModel): pep_config: Optional[str] = ArgumentEnum.PEP_CONFIG.value.with_reduced_default() output_dir: Optional[str] = ArgumentEnum.OUTPUT_DIR.value.with_reduced_default() config_file: Optional[str] = ArgumentEnum.CONFIG_FILE.value.with_reduced_default() - sample_pipeline_interfaces: Optional[list[str]] = ArgumentEnum.SAMPLE_PIPELINE_INTERFACES.value.with_reduced_default() - project_pipeline_interfaces: Optional[list[str]] = ArgumentEnum.PROJECT_PIPELINE_INTERFACES.value.with_reduced_default() + sample_pipeline_interfaces: Optional[ + list[str] + ] = ArgumentEnum.SAMPLE_PIPELINE_INTERFACES.value.with_reduced_default() + project_pipeline_interfaces: Optional[ + list[str] + ] = ArgumentEnum.PROJECT_PIPELINE_INTERFACES.value.with_reduced_default() amend: Optional[list[str]] = ArgumentEnum.AMEND.value.with_reduced_default() sel_flag: Optional[str] = ArgumentEnum.SEL_FLAG.value.with_reduced_default() exc_flag: Optional[str] = ArgumentEnum.EXC_FLAG.value.with_reduced_default() From 4462d513760850f6dc509f163750160df8718a60 Mon Sep 17 00:00:00 2001 From: Simeon Carstens Date: Wed, 17 Jan 2024 10:56:52 +0100 Subject: [PATCH 007/163] Add LOOPER_CONFIG as an argument --- looper/command_models/arguments.py | 5 +++++ looper/command_models/commands.py | 3 +++ 2 files changed, 8 insertions(+) diff --git a/looper/command_models/arguments.py b/looper/command_models/arguments.py index 53389d7b6..b1ee53525 100644 --- a/looper/command_models/arguments.py +++ b/looper/command_models/arguments.py @@ -114,6 +114,11 @@ class ArgumentEnum(enum.Enum): default=(str, None), description="Project configuration file", ) + LOOPER_CONFIG = Argument( + name="looper_config", + default=(str, None), + description="Looper configuration file (YAML)", + ) SETTINGS = Argument( name="settings", default=(str, ""), diff --git a/looper/command_models/commands.py b/looper/command_models/commands.py index c1621f9ff..8886802be 100644 --- a/looper/command_models/commands.py +++ b/looper/command_models/commands.py @@ -70,6 +70,9 @@ class TopLevelParser(pydantic.BaseModel): pep_config: Optional[str] = ArgumentEnum.PEP_CONFIG.value.with_reduced_default() output_dir: Optional[str] = ArgumentEnum.OUTPUT_DIR.value.with_reduced_default() config_file: Optional[str] = ArgumentEnum.CONFIG_FILE.value.with_reduced_default() + looper_config: Optional[ + str + ] = ArgumentEnum.LOOPER_CONFIG.value.with_reduced_default() sample_pipeline_interfaces: Optional[ list[str] ] = ArgumentEnum.SAMPLE_PIPELINE_INTERFACES.value.with_reduced_default() From 8edc9bea4ffcd8a117fcf99f75f6d6f7b598e698 Mon Sep 17 00:00:00 2001 From: Simeon Carstens Date: Wed, 17 Jan 2024 10:59:12 +0100 Subject: [PATCH 008/163] Formatting changes --- looper/utils.py | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/looper/utils.py b/looper/utils.py index 3796cbc6f..18b12c7c9 100644 --- a/looper/utils.py +++ b/looper/utils.py @@ -1,22 +1,21 @@ """ Helpers without an obvious logical home. """ import argparse -from collections import defaultdict, namedtuple import glob import itertools -from logging import getLogger import os +import re import sys +from logging import getLogger from typing import * -import re import jinja2 import yaml +from pephubclient.constants import RegistryPath from peppy import Project as peppyProject from peppy.const import * -from ubiquerg import convert_value, expandpath, parse_registry_path -from pephubclient.constants import RegistryPath from pydantic.error_wrappers import ValidationError +from ubiquerg import convert_value, expandpath, parse_registry_path from .const import * from .exceptions import MisconfigurationException, RegistryPathException From c9bdac8d87d5e5eb0566affa09a529c9ed1c7348 Mon Sep 17 00:00:00 2001 From: Simeon Carstens Date: Wed, 17 Jan 2024 10:59:25 +0100 Subject: [PATCH 009/163] Un-uncomment `enrich_args_via_cfg()` call --- looper/cli_pydantic.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/looper/cli_pydantic.py b/looper/cli_pydantic.py index 4d11c61b3..7e593034e 100644 --- a/looper/cli_pydantic.py +++ b/looper/cli_pydantic.py @@ -57,7 +57,7 @@ def main() -> None: f"Looper config file does not exist. Use looper init to create one at {looper_cfg_path}." ) - # args = enrich_args_via_cfg(args, parser, False) + args = enrich_args_via_cfg(args, parser, False) divcfg = ( select_divvy_config(filepath=args.divvy) if hasattr(args, "divvy") else None ) From d1ca6a7af1baad7a88be4674b86629ce1e96f530 Mon Sep 17 00:00:00 2001 From: Simeon Carstens Date: Wed, 17 Jan 2024 10:59:58 +0100 Subject: [PATCH 010/163] Modify enrich_args_via_cfg to get nested CLI args --- looper/utils.py | 40 +++++++++++++++++++++++++++++----------- 1 file changed, 29 insertions(+), 11 deletions(-) diff --git a/looper/utils.py b/looper/utils.py index 18b12c7c9..26d42b293 100644 --- a/looper/utils.py +++ b/looper/utils.py @@ -273,18 +273,36 @@ def enrich_args_via_cfg(parser_args, aux_parser, test_args=None): cli_args, _ = aux_parser.parse_known_args() for dest in vars(parser_args): - if dest not in POSITIONAL or not hasattr(result, dest): - if dest in cli_args: - x = getattr(cli_args, dest) - r = convert_value(x) if isinstance(x, str) else x - elif cfg_args_all is not None and dest in cfg_args_all: - if isinstance(cfg_args_all[dest], list): - r = [convert_value(i) for i in cfg_args_all[dest]] + if dest not in ["run"]: + if dest not in POSITIONAL or not hasattr(result, dest): + if dest in cli_args: + x = getattr(cli_args, dest) + r = convert_value(x) if isinstance(x, str) else x + elif cfg_args_all is not None and dest in cfg_args_all: + if isinstance(cfg_args_all[dest], list): + r = [convert_value(i) for i in cfg_args_all[dest]] + else: + r = convert_value(cfg_args_all[dest]) else: - r = convert_value(cfg_args_all[dest]) - else: - r = getattr(parser_args, dest) - setattr(result, dest, r) + r = getattr(parser_args, dest) + setattr(result, dest, r) + else: + nested_result = argparse.Namespace() + dest_value = getattr(parser_args, dest) + for nested_dest in vars(dest_value): + if nested_dest not in POSITIONAL or not hasattr(result, nested_dest): + if nested_dest in cli_args: + x = getattr(cli_args, nested_dest) + r = convert_value(x) if isinstance(x, str) else x + elif cfg_args_all is not None and nested_dest in cfg_args_all: + if isinstance(cfg_args_all[nested_dest], list): + r = [convert_value(i) for i in cfg_args_all[nested_dest]] + else: + r = convert_value(cfg_args_all[nested_dest]) + else: + r = getattr(dest_value, nested_dest) + setattr(nested_result, nested_dest, r) + setattr(result, dest, nested_result) return result From a13c5d4f35465ed75f131ca9d069622ea6aa6c7c Mon Sep 17 00:00:00 2001 From: Simeon Carstens Date: Wed, 17 Jan 2024 11:22:04 +0100 Subject: [PATCH 011/163] Refactor `enrich_args_via_cfg` --- looper/utils.py | 53 ++++++++++++++++++++++--------------------------- 1 file changed, 24 insertions(+), 29 deletions(-) diff --git a/looper/utils.py b/looper/utils.py index 26d42b293..cd80d5208 100644 --- a/looper/utils.py +++ b/looper/utils.py @@ -272,37 +272,32 @@ def enrich_args_via_cfg(parser_args, aux_parser, test_args=None): else: cli_args, _ = aux_parser.parse_known_args() - for dest in vars(parser_args): - if dest not in ["run"]: - if dest not in POSITIONAL or not hasattr(result, dest): - if dest in cli_args: - x = getattr(cli_args, dest) - r = convert_value(x) if isinstance(x, str) else x - elif cfg_args_all is not None and dest in cfg_args_all: - if isinstance(cfg_args_all[dest], list): - r = [convert_value(i) for i in cfg_args_all[dest]] - else: - r = convert_value(cfg_args_all[dest]) + + def set_single_arg(argname, default_source_namespace, result_namespace): + if argname not in POSITIONAL or not hasattr(result, argname): + if argname in cli_args: + cli_provided_value = getattr(cli_args, argname) + r = convert_value(cli_provided_value) if isinstance(cli_provided_value, str) else cli_provided_value + elif cfg_args_all is not None and argname in cfg_args_all: + if isinstance(cfg_args_all[argname], list): + r = [convert_value(i) for i in cfg_args_all[argname]] else: - r = getattr(parser_args, dest) - setattr(result, dest, r) + r = convert_value(cfg_args_all[argname]) + else: + r = getattr(default_source_namespace, argname) + setattr(result_namespace, argname, r) + + for top_level_argname in vars(parser_args): + if top_level_argname not in ["run"]: + # this argument is a top-level argument + set_single_arg(top_level_argname, parser_args, result) else: - nested_result = argparse.Namespace() - dest_value = getattr(parser_args, dest) - for nested_dest in vars(dest_value): - if nested_dest not in POSITIONAL or not hasattr(result, nested_dest): - if nested_dest in cli_args: - x = getattr(cli_args, nested_dest) - r = convert_value(x) if isinstance(x, str) else x - elif cfg_args_all is not None and nested_dest in cfg_args_all: - if isinstance(cfg_args_all[nested_dest], list): - r = [convert_value(i) for i in cfg_args_all[nested_dest]] - else: - r = convert_value(cfg_args_all[nested_dest]) - else: - r = getattr(dest_value, nested_dest) - setattr(nested_result, nested_dest, r) - setattr(result, dest, nested_result) + # this argument actually is a subcommand + enriched_command_namespace = argparse.Namespace() + command_namespace = getattr(parser_args, top_level_argname) + for argname in vars(command_namespace): + set_single_arg(argname, command_namespace, enriched_command_namespace) + setattr(result, top_level_argname, enriched_command_namespace) return result From df10a9bf83dcc1a174e62e7cbcaa2d29a963fdb1 Mon Sep 17 00:00:00 2001 From: Simeon Carstens Date: Wed, 17 Jan 2024 11:59:56 +0100 Subject: [PATCH 012/163] Add `DIVVY` argument --- looper/command_models/arguments.py | 6 ++++++ looper/command_models/commands.py | 1 + 2 files changed, 7 insertions(+) diff --git a/looper/command_models/arguments.py b/looper/command_models/arguments.py index b1ee53525..d8cd5f9c1 100644 --- a/looper/command_models/arguments.py +++ b/looper/command_models/arguments.py @@ -1,5 +1,6 @@ import enum from copy import copy +import os from typing import Any import pydantic @@ -153,3 +154,8 @@ class ArgumentEnum(enum.Enum): EXC_FLAG = Argument( name="exc_flag", default=(str, ""), description="Sample exclusion flag" ) + DIVVY = Argument( + name="divvy", default=(str, os.getenv("DIVCFG", None)), description=( + "Path to divvy configuration file. Default=$DIVCFG env " + "variable. Currently: {}".format(os.getenv("DIVCFG", None) or "not set")) + ) diff --git a/looper/command_models/commands.py b/looper/command_models/commands.py index 8886802be..32f9abdc7 100644 --- a/looper/command_models/commands.py +++ b/looper/command_models/commands.py @@ -50,6 +50,7 @@ def create_model(self) -> type[pydantic.BaseModel]: ArgumentEnum.LUMPN.value, ArgumentEnum.LIMIT.value, ArgumentEnum.SKIP.value, + ArgumentEnum.DIVVY.value ], ) RunParserModel = RunParser.create_model() From 564e01750b0f46a080bef978d651cd0af132e0db Mon Sep 17 00:00:00 2001 From: Simeon Carstens Date: Wed, 17 Jan 2024 12:05:39 +0100 Subject: [PATCH 013/163] Fix `divvy` argument accession --- looper/cli_pydantic.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/looper/cli_pydantic.py b/looper/cli_pydantic.py index 7e593034e..b03b3885a 100644 --- a/looper/cli_pydantic.py +++ b/looper/cli_pydantic.py @@ -59,7 +59,7 @@ def main() -> None: args = enrich_args_via_cfg(args, parser, False) divcfg = ( - select_divvy_config(filepath=args.divvy) if hasattr(args, "divvy") else None + select_divvy_config(filepath=args.run.divvy) if hasattr(args.run, "divvy") else None ) # Ignore flags if user is selecting or excluding on flags: if args.sel_flag or args.exc_flag: From d25e18677bb3c565d2ddf1c8eb9b596a0e1715eb Mon Sep 17 00:00:00 2001 From: Simeon Carstens Date: Wed, 17 Jan 2024 19:03:06 +0100 Subject: [PATCH 014/163] Clean up imports --- looper/cli_pydantic.py | 7 ------- 1 file changed, 7 deletions(-) diff --git a/looper/cli_pydantic.py b/looper/cli_pydantic.py index b03b3885a..ef1e6411b 100644 --- a/looper/cli_pydantic.py +++ b/looper/cli_pydantic.py @@ -1,12 +1,9 @@ import os import sys -import logmuse import pydantic_argparse import yaml -from eido import inspect_project from pephubclient import PEPHubClient -from ubiquerg import VersionInHelpParser from divvy import select_divvy_config @@ -22,12 +19,8 @@ from .utils import ( dotfile_path, enrich_args_via_cfg, - init_generic_pipeline, - initiate_looper_config, is_registry_path, - read_looper_config_file, read_looper_dotfile, - read_yaml_file, ) From 8e40b21d9db458d1ffe1660f7e9d7e8fe9a626db Mon Sep 17 00:00:00 2001 From: Simeon Carstens Date: Wed, 17 Jan 2024 19:06:17 +0100 Subject: [PATCH 015/163] Add docstring to `cli_pydantic.py` It explicitly points out that this is only a test script. --- looper/cli_pydantic.py | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/looper/cli_pydantic.py b/looper/cli_pydantic.py index ef1e6411b..1ea55e33d 100644 --- a/looper/cli_pydantic.py +++ b/looper/cli_pydantic.py @@ -1,3 +1,17 @@ +""" +CLI script using `pydantic-argparse` for parsing of arguments + +Arguments / commands are defined in `command_models/` and are given, eventually, as +`pydantic` models, allowing for type-checking and validation of arguments. + +Note: this is only a test script so far, and coexists next to the current CLI +(`cli_looper.py`), which uses `argparse` directly. The goal is to eventually +replace the current CLI with a CLI based on above-mentioned `pydantic` models, +but whether this will happen with `pydantic-argparse` or another, possibly self- +written library is not yet clear. +It is well possible that this script will be removed again. +""" + import os import sys From f48c99d0a20b42ffead0a289e8be0bd00b40cc42 Mon Sep 17 00:00:00 2001 From: Simeon Carstens Date: Wed, 17 Jan 2024 19:13:12 +0100 Subject: [PATCH 016/163] Add docstrings --- looper/command_models/arguments.py | 4 ++++ looper/command_models/commands.py | 4 ++++ 2 files changed, 8 insertions(+) diff --git a/looper/command_models/arguments.py b/looper/command_models/arguments.py index d8cd5f9c1..ca3c2824e 100644 --- a/looper/command_models/arguments.py +++ b/looper/command_models/arguments.py @@ -1,3 +1,7 @@ +""" +Argument definitions via a thin wrapper around `pydantic.fields.FieldInfo` +""" + import enum from copy import copy import os diff --git a/looper/command_models/commands.py b/looper/command_models/commands.py index 32f9abdc7..9cee62050 100644 --- a/looper/command_models/commands.py +++ b/looper/command_models/commands.py @@ -1,3 +1,7 @@ +""" +`pydantic` models for `looper` commands and a wrapper class. +""" + from dataclasses import dataclass from typing import Optional From bbebe3e33472a89c42bba8454691b6808ae03e6f Mon Sep 17 00:00:00 2001 From: Simeon Carstens Date: Wed, 17 Jan 2024 19:13:25 +0100 Subject: [PATCH 017/163] Refactor to easily support future commands --- looper/command_models/commands.py | 1 + looper/utils.py | 4 ++-- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/looper/command_models/commands.py b/looper/command_models/commands.py index 9cee62050..a71fd1b43 100644 --- a/looper/command_models/commands.py +++ b/looper/command_models/commands.py @@ -59,6 +59,7 @@ def create_model(self) -> type[pydantic.BaseModel]: ) RunParserModel = RunParser.create_model() +SUPPORTED_COMMANDS = [RunParser] class TopLevelParser(pydantic.BaseModel): """ diff --git a/looper/utils.py b/looper/utils.py index cd80d5208..515f75e74 100644 --- a/looper/utils.py +++ b/looper/utils.py @@ -18,8 +18,8 @@ from ubiquerg import convert_value, expandpath, parse_registry_path from .const import * +from .command_models.commands import SUPPORTED_COMMANDS from .exceptions import MisconfigurationException, RegistryPathException - _LOGGER = getLogger(__name__) @@ -288,7 +288,7 @@ def set_single_arg(argname, default_source_namespace, result_namespace): setattr(result_namespace, argname, r) for top_level_argname in vars(parser_args): - if top_level_argname not in ["run"]: + if top_level_argname not in [cmd.name for cmd in SUPPORTED_COMMANDS]: # this argument is a top-level argument set_single_arg(top_level_argname, parser_args, result) else: From 8487d4bae42494ad06389a61768ba79bd710e837 Mon Sep 17 00:00:00 2001 From: Zhihan Zhang Date: Thu, 18 Jan 2024 17:17:19 +0800 Subject: [PATCH 018/163] HTTP API settings --- looper/api/__init__.py | 0 looper/api/main.py | 10 ++++++++++ requirements/requirements-all.txt | 2 ++ 3 files changed, 12 insertions(+) create mode 100644 looper/api/__init__.py create mode 100644 looper/api/main.py diff --git a/looper/api/__init__.py b/looper/api/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/looper/api/main.py b/looper/api/main.py new file mode 100644 index 000000000..24715665f --- /dev/null +++ b/looper/api/main.py @@ -0,0 +1,10 @@ +from fastapi import FastAPI +from looper.command_models.commands import RunParserModel + +app = FastAPI(validate_model=True) + + +@app.post("/run") +async def run_endpoint(run_model: RunParserModel): + print(run_model) + return run_model diff --git a/requirements/requirements-all.txt b/requirements/requirements-all.txt index 3b8fa208c..34426795c 100644 --- a/requirements/requirements-all.txt +++ b/requirements/requirements-all.txt @@ -12,3 +12,5 @@ rich>=9.10.0 ubiquerg>=0.5.2 yacman>=0.9.2 pydantic-argparse==0.8.0 +fastapi +uvicorn From d8a106f9320662746e6b8773bb74aebc0f4e811f Mon Sep 17 00:00:00 2001 From: Simeon Carstens Date: Thu, 18 Jan 2024 18:10:18 +0100 Subject: [PATCH 019/163] Run formatter --- looper/cli_pydantic.py | 4 +++- looper/command_models/arguments.py | 7 +++++-- looper/command_models/commands.py | 3 ++- looper/utils.py | 8 ++++++-- 4 files changed, 16 insertions(+), 6 deletions(-) diff --git a/looper/cli_pydantic.py b/looper/cli_pydantic.py index 1ea55e33d..0ba540080 100644 --- a/looper/cli_pydantic.py +++ b/looper/cli_pydantic.py @@ -66,7 +66,9 @@ def main() -> None: args = enrich_args_via_cfg(args, parser, False) divcfg = ( - select_divvy_config(filepath=args.run.divvy) if hasattr(args.run, "divvy") else None + select_divvy_config(filepath=args.run.divvy) + if hasattr(args.run, "divvy") + else None ) # Ignore flags if user is selecting or excluding on flags: if args.sel_flag or args.exc_flag: diff --git a/looper/command_models/arguments.py b/looper/command_models/arguments.py index ca3c2824e..11e7912ef 100644 --- a/looper/command_models/arguments.py +++ b/looper/command_models/arguments.py @@ -159,7 +159,10 @@ class ArgumentEnum(enum.Enum): name="exc_flag", default=(str, ""), description="Sample exclusion flag" ) DIVVY = Argument( - name="divvy", default=(str, os.getenv("DIVCFG", None)), description=( + name="divvy", + default=(str, os.getenv("DIVCFG", None)), + description=( "Path to divvy configuration file. Default=$DIVCFG env " - "variable. Currently: {}".format(os.getenv("DIVCFG", None) or "not set")) + "variable. Currently: {}".format(os.getenv("DIVCFG", None) or "not set") + ), ) diff --git a/looper/command_models/commands.py b/looper/command_models/commands.py index a71fd1b43..3cb040aa2 100644 --- a/looper/command_models/commands.py +++ b/looper/command_models/commands.py @@ -54,13 +54,14 @@ def create_model(self) -> type[pydantic.BaseModel]: ArgumentEnum.LUMPN.value, ArgumentEnum.LIMIT.value, ArgumentEnum.SKIP.value, - ArgumentEnum.DIVVY.value + ArgumentEnum.DIVVY.value, ], ) RunParserModel = RunParser.create_model() SUPPORTED_COMMANDS = [RunParser] + class TopLevelParser(pydantic.BaseModel): """ Top level parser that takes diff --git a/looper/utils.py b/looper/utils.py index 515f75e74..86054fa86 100644 --- a/looper/utils.py +++ b/looper/utils.py @@ -20,6 +20,7 @@ from .const import * from .command_models.commands import SUPPORTED_COMMANDS from .exceptions import MisconfigurationException, RegistryPathException + _LOGGER = getLogger(__name__) @@ -272,12 +273,15 @@ def enrich_args_via_cfg(parser_args, aux_parser, test_args=None): else: cli_args, _ = aux_parser.parse_known_args() - def set_single_arg(argname, default_source_namespace, result_namespace): if argname not in POSITIONAL or not hasattr(result, argname): if argname in cli_args: cli_provided_value = getattr(cli_args, argname) - r = convert_value(cli_provided_value) if isinstance(cli_provided_value, str) else cli_provided_value + r = ( + convert_value(cli_provided_value) + if isinstance(cli_provided_value, str) + else cli_provided_value + ) elif cfg_args_all is not None and argname in cfg_args_all: if isinstance(cfg_args_all[argname], list): r = [convert_value(i) for i in cfg_args_all[argname]] From 4acb00fe29df2a9b304041e0e2432ab00970f87d Mon Sep 17 00:00:00 2001 From: Zhihan Zhang Date: Fri, 19 Jan 2024 12:13:35 +0800 Subject: [PATCH 020/163] Create an argparse.Namespace --- looper/api/main.py | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/looper/api/main.py b/looper/api/main.py index 24715665f..f46f97af8 100644 --- a/looper/api/main.py +++ b/looper/api/main.py @@ -1,10 +1,21 @@ +from argparse import Namespace + from fastapi import FastAPI from looper.command_models.commands import RunParserModel app = FastAPI(validate_model=True) +def create_argparse_namespace(run_model: RunParserModel) -> Namespace: + # Create an argparse namespace from the submitted run model + namespace = Namespace() + for arg in vars(run_model): + setattr(namespace, arg, getattr(run_model, arg)) + return namespace + + @app.post("/run") async def run_endpoint(run_model: RunParserModel): - print(run_model) + argparse_namespace = create_argparse_namespace(run_model) + print(argparse_namespace) return run_model From 3483e7fb5378d52c1435f8a742141a2a440183de Mon Sep 17 00:00:00 2001 From: Zhihan Zhang Date: Fri, 19 Jan 2024 15:35:51 +0800 Subject: [PATCH 021/163] Add run function from cli_pydantic --- looper/api/main.py | 145 ++++++++++++++++++++++++++++++++++++++++++--- 1 file changed, 136 insertions(+), 9 deletions(-) diff --git a/looper/api/main.py b/looper/api/main.py index f46f97af8..69e99ef1d 100644 --- a/looper/api/main.py +++ b/looper/api/main.py @@ -1,21 +1,148 @@ +import os +import sys from argparse import Namespace +import yaml +from divvy import select_divvy_config from fastapi import FastAPI -from looper.command_models.commands import RunParserModel +from looper.cli_looper import _proc_resources_spec +from looper.command_models.commands import ( # RunParserModel, + SUPPORTED_COMMANDS, + TopLevelParser, +) +from looper.const import * +from looper.divvy import DEFAULT_COMPUTE_RESOURCES_NAME, select_divvy_config +from looper.exceptions import * +from looper.looper import * +from looper.parser_types import * +from looper.project import Project, ProjectContext +from looper.utils import ( + dotfile_path, + enrich_args_via_cfg, + is_registry_path, + read_looper_dotfile, +) +from pephubclient import PEPHubClient app = FastAPI(validate_model=True) -def create_argparse_namespace(run_model: RunParserModel) -> Namespace: - # Create an argparse namespace from the submitted run model +def create_argparse_namespace(top_level_model: TopLevelParser) -> Namespace: + # Create an argparse namespace from the submitted top level model namespace = Namespace() - for arg in vars(run_model): - setattr(namespace, arg, getattr(run_model, arg)) + for arg in vars(top_level_model): + if arg not in [cmd.name for cmd in SUPPORTED_COMMANDS]: + setattr(namespace, arg, getattr(top_level_model, arg)) + else: + command_namespace = Namespace() + command_namespace_args = getattr(top_level_model, arg) + for argname in vars(command_namespace_args): + setattr( + command_namespace, + argname, + getattr(command_namespace_args, argname), + ) + setattr(namespace, arg, command_namespace) return namespace +def run_cmd(args: Namespace): + # here comes adapted `cli_looper.py` code + looper_cfg_path = os.path.relpath(dotfile_path(), start=os.curdir) + try: + looper_config_dict = read_looper_dotfile() + + for looper_config_key, looper_config_item in looper_config_dict.items(): + print(looper_config_key, looper_config_item) + setattr(args, looper_config_key, looper_config_item) + + except OSError: + # parser.print_help(sys.stderr) + raise ValueError( + f"Looper config file does not exist. Use looper init to create one at {looper_cfg_path}." + ) + + print("#####################################") + print(args) + + # args = enrich_args_via_cfg(args, parser, False) + divcfg = ( + select_divvy_config(filepath=args.run.divvy) + if hasattr(args.run, "divvy") + else None + ) + # Ignore flags if user is selecting or excluding on flags: + if args.sel_flag or args.exc_flag: + args.ignore_flags = True + + # Initialize project + if is_registry_path(args.config_file): + if vars(args)[SAMPLE_PL_ARG]: + p = Project( + amendments=args.amend, + divcfg_path=divcfg, + runp=args.command == "runp", + project_dict=PEPHubClient()._load_raw_pep( + registry_path=args.config_file + ), + **{ + attr: getattr(args, attr) for attr in CLI_PROJ_ATTRS if attr in args + }, + ) + else: + raise MisconfigurationException( + f"`sample_pipeline_interface` is missing. Provide it in the parameters." + ) + else: + try: + p = Project( + cfg=args.config_file, + amendments=args.amend, + divcfg_path=divcfg, + runp=False, + **{ + attr: getattr(args, attr) for attr in CLI_PROJ_ATTRS if attr in args + }, + ) + except yaml.parser.ParserError as e: + _LOGGER.error(f"Project config parse failed -- {e}") + sys.exit(1) + + selected_compute_pkg = p.selected_compute_package or DEFAULT_COMPUTE_RESOURCES_NAME + if p.dcc is not None and not p.dcc.activate_package(selected_compute_pkg): + _LOGGER.info( + "Failed to activate '{}' computing package. " + "Using the default one".format(selected_compute_pkg) + ) + + with ProjectContext( + prj=p, + selector_attribute="toggle", + selector_include=None, + selector_exclude=None, + selector_flag=None, + exclusion_flag=None, + ) as prj: + command = "run" + if command == "run": + run = Runner(prj) + try: + compute_kwargs = _proc_resources_spec(args) + return run(args, rerun=False, **compute_kwargs) + except SampleFailedException: + sys.exit(1) + except IOError: + _LOGGER.error( + "{} pipeline_interfaces: '{}'".format( + prj.__class__.__name__, prj.pipeline_interface_sources + ) + ) + raise + + @app.post("/run") -async def run_endpoint(run_model: RunParserModel): - argparse_namespace = create_argparse_namespace(run_model) - print(argparse_namespace) - return run_model +async def run_endpoint(top_level_model: TopLevelParser): + print(top_level_model) + argparse_namespace = create_argparse_namespace(top_level_model) + run_cmd(argparse_namespace) + return top_level_model From 73151e931357a1c726b83af10102e1157a172537 Mon Sep 17 00:00:00 2001 From: Zhihan Zhang Date: Fri, 19 Jan 2024 15:50:31 +0800 Subject: [PATCH 022/163] Adjust enrich_args_via_cfg to http api --- looper/utils.py | 21 ++++++++++++++------- 1 file changed, 14 insertions(+), 7 deletions(-) diff --git a/looper/utils.py b/looper/utils.py index 515f75e74..911ef3719 100644 --- a/looper/utils.py +++ b/looper/utils.py @@ -17,9 +17,10 @@ from pydantic.error_wrappers import ValidationError from ubiquerg import convert_value, expandpath, parse_registry_path -from .const import * from .command_models.commands import SUPPORTED_COMMANDS +from .const import * from .exceptions import MisconfigurationException, RegistryPathException + _LOGGER = getLogger(__name__) @@ -249,7 +250,7 @@ def read_yaml_file(filepath): return data -def enrich_args_via_cfg(parser_args, aux_parser, test_args=None): +def enrich_args_via_cfg(parser_args, aux_parser, test_args=None, http_api=False): """ Read in a looper dotfile and set arguments. @@ -266,18 +267,24 @@ def enrich_args_via_cfg(parser_args, aux_parser, test_args=None): else dict() ) result = argparse.Namespace() - if test_args: - cli_args, _ = aux_parser.parse_known_args(args=test_args) + if not http_api: + if test_args: + cli_args, _ = aux_parser.parse_known_args(args=test_args) + else: + cli_args, _ = aux_parser.parse_known_args() else: - cli_args, _ = aux_parser.parse_known_args() - + cli_args = [] def set_single_arg(argname, default_source_namespace, result_namespace): if argname not in POSITIONAL or not hasattr(result, argname): if argname in cli_args: cli_provided_value = getattr(cli_args, argname) - r = convert_value(cli_provided_value) if isinstance(cli_provided_value, str) else cli_provided_value + r = ( + convert_value(cli_provided_value) + if isinstance(cli_provided_value, str) + else cli_provided_value + ) elif cfg_args_all is not None and argname in cfg_args_all: if isinstance(cfg_args_all[argname], list): r = [convert_value(i) for i in cfg_args_all[argname]] From f4d684efdaeb49c5b7fda5a176f7771bee13844d Mon Sep 17 00:00:00 2001 From: Zhihan Zhang Date: Fri, 19 Jan 2024 15:52:33 +0800 Subject: [PATCH 023/163] Run adjusted enrich_args_via_cfg in http api --- looper/api/main.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/looper/api/main.py b/looper/api/main.py index 69e99ef1d..8540c2088 100644 --- a/looper/api/main.py +++ b/looper/api/main.py @@ -65,7 +65,7 @@ def run_cmd(args: Namespace): print("#####################################") print(args) - # args = enrich_args_via_cfg(args, parser, False) + args = enrich_args_via_cfg(args, None, False, True) divcfg = ( select_divvy_config(filepath=args.run.divvy) if hasattr(args.run, "divvy") From 0141fb362422b9cff2e01589e06bc9f82c41d4bd Mon Sep 17 00:00:00 2001 From: Zhihan Zhang Date: Fri, 19 Jan 2024 16:20:42 +0800 Subject: [PATCH 024/163] Re-organize cli_pydantic.py to run looper run via CLI and http-api --- looper/api/main.py | 134 +++++------------------------------------ looper/cli_pydantic.py | 38 +++++++----- 2 files changed, 39 insertions(+), 133 deletions(-) diff --git a/looper/api/main.py b/looper/api/main.py index 8540c2088..65e16cbd4 100644 --- a/looper/api/main.py +++ b/looper/api/main.py @@ -1,34 +1,25 @@ -import os -import sys from argparse import Namespace -import yaml -from divvy import select_divvy_config from fastapi import FastAPI -from looper.cli_looper import _proc_resources_spec -from looper.command_models.commands import ( # RunParserModel, - SUPPORTED_COMMANDS, - TopLevelParser, -) -from looper.const import * -from looper.divvy import DEFAULT_COMPUTE_RESOURCES_NAME, select_divvy_config -from looper.exceptions import * -from looper.looper import * -from looper.parser_types import * -from looper.project import Project, ProjectContext -from looper.utils import ( - dotfile_path, - enrich_args_via_cfg, - is_registry_path, - read_looper_dotfile, -) -from pephubclient import PEPHubClient +from looper.cli_pydantic import run_looper +from looper.command_models.commands import SUPPORTED_COMMANDS, TopLevelParser app = FastAPI(validate_model=True) def create_argparse_namespace(top_level_model: TopLevelParser) -> Namespace: - # Create an argparse namespace from the submitted top level model + """ + Converts a TopLevelParser instance into an argparse.Namespace object. + + This function takes a TopLevelParser instance, and converts it into an + argparse.Namespace object. It includes handling for supported commands + specified in SUPPORTED_COMMANDS. + + :param TopLevelParser top_level_model: An instance of the TopLevelParser + model + :return argparse.Namespace: An argparse.Namespace object representing + the parsed command-line arguments. + """ namespace = Namespace() for arg in vars(top_level_model): if arg not in [cmd.name for cmd in SUPPORTED_COMMANDS]: @@ -46,103 +37,8 @@ def create_argparse_namespace(top_level_model: TopLevelParser) -> Namespace: return namespace -def run_cmd(args: Namespace): - # here comes adapted `cli_looper.py` code - looper_cfg_path = os.path.relpath(dotfile_path(), start=os.curdir) - try: - looper_config_dict = read_looper_dotfile() - - for looper_config_key, looper_config_item in looper_config_dict.items(): - print(looper_config_key, looper_config_item) - setattr(args, looper_config_key, looper_config_item) - - except OSError: - # parser.print_help(sys.stderr) - raise ValueError( - f"Looper config file does not exist. Use looper init to create one at {looper_cfg_path}." - ) - - print("#####################################") - print(args) - - args = enrich_args_via_cfg(args, None, False, True) - divcfg = ( - select_divvy_config(filepath=args.run.divvy) - if hasattr(args.run, "divvy") - else None - ) - # Ignore flags if user is selecting or excluding on flags: - if args.sel_flag or args.exc_flag: - args.ignore_flags = True - - # Initialize project - if is_registry_path(args.config_file): - if vars(args)[SAMPLE_PL_ARG]: - p = Project( - amendments=args.amend, - divcfg_path=divcfg, - runp=args.command == "runp", - project_dict=PEPHubClient()._load_raw_pep( - registry_path=args.config_file - ), - **{ - attr: getattr(args, attr) for attr in CLI_PROJ_ATTRS if attr in args - }, - ) - else: - raise MisconfigurationException( - f"`sample_pipeline_interface` is missing. Provide it in the parameters." - ) - else: - try: - p = Project( - cfg=args.config_file, - amendments=args.amend, - divcfg_path=divcfg, - runp=False, - **{ - attr: getattr(args, attr) for attr in CLI_PROJ_ATTRS if attr in args - }, - ) - except yaml.parser.ParserError as e: - _LOGGER.error(f"Project config parse failed -- {e}") - sys.exit(1) - - selected_compute_pkg = p.selected_compute_package or DEFAULT_COMPUTE_RESOURCES_NAME - if p.dcc is not None and not p.dcc.activate_package(selected_compute_pkg): - _LOGGER.info( - "Failed to activate '{}' computing package. " - "Using the default one".format(selected_compute_pkg) - ) - - with ProjectContext( - prj=p, - selector_attribute="toggle", - selector_include=None, - selector_exclude=None, - selector_flag=None, - exclusion_flag=None, - ) as prj: - command = "run" - if command == "run": - run = Runner(prj) - try: - compute_kwargs = _proc_resources_spec(args) - return run(args, rerun=False, **compute_kwargs) - except SampleFailedException: - sys.exit(1) - except IOError: - _LOGGER.error( - "{} pipeline_interfaces: '{}'".format( - prj.__class__.__name__, prj.pipeline_interface_sources - ) - ) - raise - - @app.post("/run") async def run_endpoint(top_level_model: TopLevelParser): - print(top_level_model) argparse_namespace = create_argparse_namespace(top_level_model) - run_cmd(argparse_namespace) + run_looper(argparse_namespace, None, True) return top_level_model diff --git a/looper/cli_pydantic.py b/looper/cli_pydantic.py index 1ea55e33d..af566b658 100644 --- a/looper/cli_pydantic.py +++ b/looper/cli_pydantic.py @@ -14,10 +14,12 @@ import os import sys +from argparse import Namespace import pydantic_argparse import yaml from pephubclient import PEPHubClient +from pydantic_argparse.argparse.parser import ArgumentParser from divvy import select_divvy_config @@ -38,17 +40,9 @@ ) -def main() -> None: - parser = pydantic_argparse.ArgumentParser( - model=TopLevelParser, - prog="looper", - description="pydantic-argparse demo", - add_help=True, - ) - args = parser.parse_typed_args() - print(args) - print("#########################################") - +def run_looper( + args: Namespace | TopLevelParser, parser: ArgumentParser, http_api=False +): # here comes adapted `cli_looper.py` code looper_cfg_path = os.path.relpath(dotfile_path(), start=os.curdir) try: @@ -59,14 +53,17 @@ def main() -> None: setattr(args, looper_config_key, looper_config_item) except OSError: - parser.print_help(sys.stderr) + if not http_api: + parser.print_help(sys.stderr) raise ValueError( f"Looper config file does not exist. Use looper init to create one at {looper_cfg_path}." ) - args = enrich_args_via_cfg(args, parser, False) + args = enrich_args_via_cfg(args, parser, False, http_api) divcfg = ( - select_divvy_config(filepath=args.run.divvy) if hasattr(args.run, "divvy") else None + select_divvy_config(filepath=args.run.divvy) + if hasattr(args.run, "divvy") + else None ) # Ignore flags if user is selecting or excluding on flags: if args.sel_flag or args.exc_flag: @@ -137,5 +134,18 @@ def main() -> None: raise +def main() -> None: + parser = pydantic_argparse.ArgumentParser( + model=TopLevelParser, + prog="looper", + description="pydantic-argparse demo", + add_help=True, + ) + args = parser.parse_typed_args() + print(args) + print("#########################################") + run_looper(args, parser) + + if __name__ == "__main__": main() From ceb2f99d434ca64705dd688c4e633a4976cad0b5 Mon Sep 17 00:00:00 2001 From: Simeon Carstens Date: Fri, 19 Jan 2024 09:50:52 +0100 Subject: [PATCH 025/163] Infer currently used subcommand instead of hardcoding it --- looper/cli_pydantic.py | 19 ++++++++++++++----- 1 file changed, 14 insertions(+), 5 deletions(-) diff --git a/looper/cli_pydantic.py b/looper/cli_pydantic.py index 0ba540080..42b01a55b 100644 --- a/looper/cli_pydantic.py +++ b/looper/cli_pydantic.py @@ -23,7 +23,7 @@ from . import __version__ from .cli_looper import _proc_resources_spec -from .command_models.commands import TopLevelParser +from .command_models.commands import SUPPORTED_COMMANDS, TopLevelParser from .const import * from .divvy import DEFAULT_COMPUTE_RESOURCES_NAME, select_divvy_config from .exceptions import * @@ -49,6 +49,16 @@ def main() -> None: print(args) print("#########################################") + # Find out which subcommand was used + supported_command_names = [cmd.name for cmd in SUPPORTED_COMMANDS] + subcommand_valued_args = [ + (arg, value) + for arg, value in vars(args).items() + if arg and arg in supported_command_names + ] + # Only one subcommand argument will be not `None`, else we found a bug in `pydantic-argparse` + [(subcommand_name, subcommand_args)] = subcommand_valued_args + # here comes adapted `cli_looper.py` code looper_cfg_path = os.path.relpath(dotfile_path(), start=os.curdir) try: @@ -66,8 +76,8 @@ def main() -> None: args = enrich_args_via_cfg(args, parser, False) divcfg = ( - select_divvy_config(filepath=args.run.divvy) - if hasattr(args.run, "divvy") + select_divvy_config(filepath=subcommand_args.divvy) + if hasattr(subcommand_args, "divvy") else None ) # Ignore flags if user is selecting or excluding on flags: @@ -122,8 +132,7 @@ def main() -> None: selector_flag=None, exclusion_flag=None, ) as prj: - command = "run" - if command == "run": + if subcommand_name == "run": run = Runner(prj) try: compute_kwargs = _proc_resources_spec(args) From def280d6823ccda5c8982fea45cd85b3e32c2326 Mon Sep 17 00:00:00 2001 From: Simeon Carstens Date: Fri, 19 Jan 2024 09:52:16 +0100 Subject: [PATCH 026/163] Format `setup.py` --- setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.py b/setup.py index e6d940868..c05314fe6 100644 --- a/setup.py +++ b/setup.py @@ -81,7 +81,7 @@ def get_static(name, condition=None): "console_scripts": [ "looper = looper.__main__:main", "divvy = looper.__main__:divvy_main", - "looper-pydantic-argparse = looper.cli_pydantic:main" + "looper-pydantic-argparse = looper.cli_pydantic:main", ], }, scripts=scripts, From 17cf965bfc08b23914af3210096a7dea121383a7 Mon Sep 17 00:00:00 2001 From: Simeon Carstens Date: Fri, 19 Jan 2024 10:09:22 +0100 Subject: [PATCH 027/163] Loosen `pydantic-argparse` version constraint --- requirements/requirements-all.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/requirements/requirements-all.txt b/requirements/requirements-all.txt index 3b8fa208c..71c0df877 100644 --- a/requirements/requirements-all.txt +++ b/requirements/requirements-all.txt @@ -11,4 +11,4 @@ pyyaml>=3.12 rich>=9.10.0 ubiquerg>=0.5.2 yacman>=0.9.2 -pydantic-argparse==0.8.0 +pydantic-argparse>=0.8.0 From e0338ff29f52e0a56b31b7d259f98e86bc7adbda Mon Sep 17 00:00:00 2001 From: Simeon Carstens Date: Fri, 19 Jan 2024 10:12:42 +0100 Subject: [PATCH 028/163] Apply suggestions from code review Co-authored-by: Vince --- looper/command_models/__init__.py | 6 +++--- looper/command_models/arguments.py | 2 +- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/looper/command_models/__init__.py b/looper/command_models/__init__.py index 92f3a9e69..4258506b0 100644 --- a/looper/command_models/__init__.py +++ b/looper/command_models/__init__.py @@ -1,6 +1,6 @@ """ -This module holds `pydantic` models that describe commands and their arguments. +This package holds `pydantic` models that describe commands and their arguments. -These can be used either with the `pydantic-argparse` library to build a CLI or -by an HTTP API. +These can be used either by an HTTP API or with the `pydantic-argparse` +library to build a CLI. """ diff --git a/looper/command_models/arguments.py b/looper/command_models/arguments.py index 11e7912ef..658ca051c 100644 --- a/looper/command_models/arguments.py +++ b/looper/command_models/arguments.py @@ -163,6 +163,6 @@ class ArgumentEnum(enum.Enum): default=(str, os.getenv("DIVCFG", None)), description=( "Path to divvy configuration file. Default=$DIVCFG env " - "variable. Currently: {}".format(os.getenv("DIVCFG", None) or "not set") + "variable. Currently: {}".format(os.getenv("DIVCFG") or "not set") ), ) From 74dbd1fb507853b6b447643a3717178f075c1d07 Mon Sep 17 00:00:00 2001 From: Simeon Carstens Date: Fri, 19 Jan 2024 14:27:29 +0100 Subject: [PATCH 029/163] Slight refactor of `create_argparse_namespace` --- looper/api/main.py | 17 +++++++++-------- 1 file changed, 9 insertions(+), 8 deletions(-) diff --git a/looper/api/main.py b/looper/api/main.py index 65e16cbd4..9ec0418f8 100644 --- a/looper/api/main.py +++ b/looper/api/main.py @@ -21,19 +21,20 @@ def create_argparse_namespace(top_level_model: TopLevelParser) -> Namespace: the parsed command-line arguments. """ namespace = Namespace() - for arg in vars(top_level_model): - if arg not in [cmd.name for cmd in SUPPORTED_COMMANDS]: - setattr(namespace, arg, getattr(top_level_model, arg)) + + for argname, value in vars(top_level_model).items(): + if argname not in [cmd.name for cmd in SUPPORTED_COMMANDS]: + setattr(namespace, argname, value) else: command_namespace = Namespace() - command_namespace_args = getattr(top_level_model, arg) - for argname in vars(command_namespace_args): + command_namespace_args = value + for command_argname, command_arg_value in vars(command_namespace_args).items(): setattr( command_namespace, - argname, - getattr(command_namespace_args, argname), + command_argname, + command_arg_value, ) - setattr(namespace, arg, command_namespace) + setattr(namespace, argname, command_namespace) return namespace From ee69d0c64abe57f57932d80658605b2444cbceec Mon Sep 17 00:00:00 2001 From: Simeon Carstens Date: Fri, 19 Jan 2024 14:47:16 +0100 Subject: [PATCH 030/163] Remove `run` from route That's because this endpoint will support _all_ commands, and not only `run`. --- looper/api/main.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/looper/api/main.py b/looper/api/main.py index 9ec0418f8..a6addb2fb 100644 --- a/looper/api/main.py +++ b/looper/api/main.py @@ -38,7 +38,7 @@ def create_argparse_namespace(top_level_model: TopLevelParser) -> Namespace: return namespace -@app.post("/run") +@app.post("/") async def run_endpoint(top_level_model: TopLevelParser): argparse_namespace = create_argparse_namespace(top_level_model) run_looper(argparse_namespace, None, True) From e241898a0dceacd2ff037e782e6ae76e79cc6e56 Mon Sep 17 00:00:00 2001 From: Simeon Carstens Date: Fri, 19 Jan 2024 14:56:31 +0100 Subject: [PATCH 031/163] Capture stderr / stdout and return in HTTP response --- looper/api/main.py | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/looper/api/main.py b/looper/api/main.py index a6addb2fb..7655c4b65 100644 --- a/looper/api/main.py +++ b/looper/api/main.py @@ -1,4 +1,6 @@ from argparse import Namespace +from contextlib import redirect_stderr, redirect_stdout +import io from fastapi import FastAPI from looper.cli_pydantic import run_looper @@ -41,5 +43,11 @@ def create_argparse_namespace(top_level_model: TopLevelParser) -> Namespace: @app.post("/") async def run_endpoint(top_level_model: TopLevelParser): argparse_namespace = create_argparse_namespace(top_level_model) - run_looper(argparse_namespace, None, True) - return top_level_model + stdout_stream = io.StringIO() + stderr_stream = io.StringIO() + with redirect_stderr(stderr_stream), redirect_stdout(stdout_stream): + run_looper(argparse_namespace, None, True) + return { + "stdout": stdout_stream.getvalue(), + "stderr": stderr_stream.getvalue() + } From a7395559c66c62e42f4c7c1f4020d64447537195 Mon Sep 17 00:00:00 2001 From: Simeon Carstens Date: Fri, 19 Jan 2024 15:05:23 +0100 Subject: [PATCH 032/163] Rename `run_endpoint` -> `main_endpoint` --- looper/api/main.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/looper/api/main.py b/looper/api/main.py index 7655c4b65..338d15ef3 100644 --- a/looper/api/main.py +++ b/looper/api/main.py @@ -41,7 +41,7 @@ def create_argparse_namespace(top_level_model: TopLevelParser) -> Namespace: @app.post("/") -async def run_endpoint(top_level_model: TopLevelParser): +async def main_endpoint(top_level_model: TopLevelParser): argparse_namespace = create_argparse_namespace(top_level_model) stdout_stream = io.StringIO() stderr_stream = io.StringIO() From 9b9c0a93496bbb99e8e683e00cc677c938e46975 Mon Sep 17 00:00:00 2001 From: Simeon Carstens Date: Fri, 19 Jan 2024 15:13:34 +0100 Subject: [PATCH 033/163] Add response model --- looper/api/main.py | 18 +++++++++++++----- 1 file changed, 13 insertions(+), 5 deletions(-) diff --git a/looper/api/main.py b/looper/api/main.py index 338d15ef3..2143e9358 100644 --- a/looper/api/main.py +++ b/looper/api/main.py @@ -5,6 +5,7 @@ from fastapi import FastAPI from looper.cli_pydantic import run_looper from looper.command_models.commands import SUPPORTED_COMMANDS, TopLevelParser +import pydantic app = FastAPI(validate_model=True) @@ -39,15 +40,22 @@ def create_argparse_namespace(top_level_model: TopLevelParser) -> Namespace: setattr(namespace, argname, command_namespace) return namespace +class MainResponse(pydantic.BaseModel): + """ + Response of the main endpoint. + """ + stdout: str = pydantic.Field(description="Standard output produced by `looper` while running a command") + stderr: str = pydantic.Field(description="Standard error output produced by `looper` while running a command") + @app.post("/") -async def main_endpoint(top_level_model: TopLevelParser): +async def main_endpoint(top_level_model: TopLevelParser) -> MainResponse: argparse_namespace = create_argparse_namespace(top_level_model) stdout_stream = io.StringIO() stderr_stream = io.StringIO() with redirect_stderr(stderr_stream), redirect_stdout(stdout_stream): run_looper(argparse_namespace, None, True) - return { - "stdout": stdout_stream.getvalue(), - "stderr": stderr_stream.getvalue() - } + return MainResponse( + stdout=stdout_stream.getvalue(), + stderr=stderr_stream.getvalue() + ) From 1ee1d471e599bbb3367fe88b584d6f63b40e5d5c Mon Sep 17 00:00:00 2001 From: Simeon Carstens Date: Fri, 19 Jan 2024 22:51:06 +0100 Subject: [PATCH 034/163] Add a comment about the endpoint likely being blocking --- looper/api/main.py | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/looper/api/main.py b/looper/api/main.py index 2143e9358..9b0109c9b 100644 --- a/looper/api/main.py +++ b/looper/api/main.py @@ -54,6 +54,15 @@ async def main_endpoint(top_level_model: TopLevelParser) -> MainResponse: stdout_stream = io.StringIO() stderr_stream = io.StringIO() with redirect_stderr(stderr_stream), redirect_stdout(stdout_stream): + # TODO: as it stands, because of the `async def`, and the lacking `await` + # in the following line, this endpoint is (I (Simeon) thing) currently blocking. + # We would need to make `run_looper()` return a future, but it inherently does + # not support `async` calls. + # So one option would be to run `run_looper()` in its own thread whose + # termination we can `await`, using `fastapi.run_in_threadpool`. But that fails + # with an error stemming from the `yacman` library about `signal.signal` only + # working in the main thread of the main interpreter. We have to investigate + # how to solve this. run_looper(argparse_namespace, None, True) return MainResponse( stdout=stdout_stream.getvalue(), From 97b3157d0a514f715af5b2e2a9f70184bee39972 Mon Sep 17 00:00:00 2001 From: Zhihan Zhang Date: Mon, 22 Jan 2024 15:57:29 +0800 Subject: [PATCH 035/163] Apply formatter --- looper/api/main.py | 21 ++++++++++++++------- 1 file changed, 14 insertions(+), 7 deletions(-) diff --git a/looper/api/main.py b/looper/api/main.py index 9b0109c9b..7091b7dfe 100644 --- a/looper/api/main.py +++ b/looper/api/main.py @@ -1,11 +1,11 @@ +import io from argparse import Namespace from contextlib import redirect_stderr, redirect_stdout -import io +import pydantic from fastapi import FastAPI from looper.cli_pydantic import run_looper from looper.command_models.commands import SUPPORTED_COMMANDS, TopLevelParser -import pydantic app = FastAPI(validate_model=True) @@ -31,7 +31,9 @@ def create_argparse_namespace(top_level_model: TopLevelParser) -> Namespace: else: command_namespace = Namespace() command_namespace_args = value - for command_argname, command_arg_value in vars(command_namespace_args).items(): + for command_argname, command_arg_value in vars( + command_namespace_args + ).items(): setattr( command_namespace, command_argname, @@ -40,12 +42,18 @@ def create_argparse_namespace(top_level_model: TopLevelParser) -> Namespace: setattr(namespace, argname, command_namespace) return namespace + class MainResponse(pydantic.BaseModel): """ Response of the main endpoint. """ - stdout: str = pydantic.Field(description="Standard output produced by `looper` while running a command") - stderr: str = pydantic.Field(description="Standard error output produced by `looper` while running a command") + + stdout: str = pydantic.Field( + description="Standard output produced by `looper` while running a command" + ) + stderr: str = pydantic.Field( + description="Standard error output produced by `looper` while running a command" + ) @app.post("/") @@ -65,6 +73,5 @@ async def main_endpoint(top_level_model: TopLevelParser) -> MainResponse: # how to solve this. run_looper(argparse_namespace, None, True) return MainResponse( - stdout=stdout_stream.getvalue(), - stderr=stderr_stream.getvalue() + stdout=stdout_stream.getvalue(), stderr=stderr_stream.getvalue() ) From 8f7e2750769ba79dec0e9cae38386ac169681bdc Mon Sep 17 00:00:00 2001 From: Zhihan Zhang Date: Mon, 22 Jan 2024 17:14:45 +0800 Subject: [PATCH 036/163] Apply formatter --- looper/command_models/arguments.py | 9 ++++++--- looper/command_models/commands.py | 3 ++- 2 files changed, 8 insertions(+), 4 deletions(-) diff --git a/looper/command_models/arguments.py b/looper/command_models/arguments.py index ca3c2824e..33aa037e8 100644 --- a/looper/command_models/arguments.py +++ b/looper/command_models/arguments.py @@ -3,8 +3,8 @@ """ import enum -from copy import copy import os +from copy import copy from typing import Any import pydantic @@ -159,7 +159,10 @@ class ArgumentEnum(enum.Enum): name="exc_flag", default=(str, ""), description="Sample exclusion flag" ) DIVVY = Argument( - name="divvy", default=(str, os.getenv("DIVCFG", None)), description=( + name="divvy", + default=(str, os.getenv("DIVCFG", None)), + description=( "Path to divvy configuration file. Default=$DIVCFG env " - "variable. Currently: {}".format(os.getenv("DIVCFG", None) or "not set")) + "variable. Currently: {}".format(os.getenv("DIVCFG", None) or "not set") + ), ) diff --git a/looper/command_models/commands.py b/looper/command_models/commands.py index a71fd1b43..3cb040aa2 100644 --- a/looper/command_models/commands.py +++ b/looper/command_models/commands.py @@ -54,13 +54,14 @@ def create_model(self) -> type[pydantic.BaseModel]: ArgumentEnum.LUMPN.value, ArgumentEnum.LIMIT.value, ArgumentEnum.SKIP.value, - ArgumentEnum.DIVVY.value + ArgumentEnum.DIVVY.value, ], ) RunParserModel = RunParser.create_model() SUPPORTED_COMMANDS = [RunParser] + class TopLevelParser(pydantic.BaseModel): """ Top level parser that takes From 09b057f60ec0bd6511199401b4cb1d7e4d821f80 Mon Sep 17 00:00:00 2001 From: Zhihan Zhang Date: Mon, 22 Jan 2024 17:16:27 +0800 Subject: [PATCH 037/163] Add arguments for logging These arguments are compatible with logmuse generated parser --- looper/command_models/arguments.py | 18 ++++++++++++++++++ looper/command_models/commands.py | 5 +++++ 2 files changed, 23 insertions(+) diff --git a/looper/command_models/arguments.py b/looper/command_models/arguments.py index 33aa037e8..f4e14f76f 100644 --- a/looper/command_models/arguments.py +++ b/looper/command_models/arguments.py @@ -166,3 +166,21 @@ class ArgumentEnum(enum.Enum): "variable. Currently: {}".format(os.getenv("DIVCFG", None) or "not set") ), ) + + # Arguments for logger compatible with logmuse + SILENT = Argument( + name="silent", default=(bool, False), description="Whether to silence logging" + ) + VERBOSITY = Argument( + name="verbosity", + default=(int, None), + description="Alternate mode of expression for logging level that better " + "accords with intuition about how to convey this.", + ) + LOGDEV = Argument( + name="logdev", + default=(bool, False), + description="whether to log in development mode; possibly among other " + "behavioral changes to logs handling, use a more information-rich " + "message format template.", + ) diff --git a/looper/command_models/commands.py b/looper/command_models/commands.py index 3cb040aa2..0a5b6df75 100644 --- a/looper/command_models/commands.py +++ b/looper/command_models/commands.py @@ -89,3 +89,8 @@ class TopLevelParser(pydantic.BaseModel): amend: Optional[list[str]] = ArgumentEnum.AMEND.value.with_reduced_default() sel_flag: Optional[str] = ArgumentEnum.SEL_FLAG.value.with_reduced_default() exc_flag: Optional[str] = ArgumentEnum.EXC_FLAG.value.with_reduced_default() + + # arguments for logging + silent: Optional[bool] = ArgumentEnum.SILENT.value.with_reduced_default() + verbosity: Optional[int] = ArgumentEnum.VERBOSITY.value.with_reduced_default() + logdev: Optional[bool] = ArgumentEnum.LOGDEV.value.with_reduced_default() From 4cf48bcea080b8ae27fa157e7b0062888a1cc26c Mon Sep 17 00:00:00 2001 From: Zhihan Zhang Date: Mon, 22 Jan 2024 17:17:21 +0800 Subject: [PATCH 038/163] Add logger def to be captured by API and also CLI --- looper/cli_pydantic.py | 54 +++++++++++++++++++++++++++++++----------- 1 file changed, 40 insertions(+), 14 deletions(-) diff --git a/looper/cli_pydantic.py b/looper/cli_pydantic.py index af566b658..9f8f152e2 100644 --- a/looper/cli_pydantic.py +++ b/looper/cli_pydantic.py @@ -16,6 +16,7 @@ import sys from argparse import Namespace +import logmuse import pydantic_argparse import yaml from pephubclient import PEPHubClient @@ -25,7 +26,7 @@ from . import __version__ from .cli_looper import _proc_resources_spec -from .command_models.commands import TopLevelParser +from .command_models.commands import SUPPORTED_COMMANDS, TopLevelParser from .const import * from .divvy import DEFAULT_COMPUTE_RESOURCES_NAME, select_divvy_config from .exceptions import * @@ -36,6 +37,7 @@ dotfile_path, enrich_args_via_cfg, is_registry_path, + read_looper_config_file, read_looper_dotfile, ) @@ -44,22 +46,46 @@ def run_looper( args: Namespace | TopLevelParser, parser: ArgumentParser, http_api=False ): # here comes adapted `cli_looper.py` code - looper_cfg_path = os.path.relpath(dotfile_path(), start=os.curdir) - try: - looper_config_dict = read_looper_dotfile() - - for looper_config_key, looper_config_item in looper_config_dict.items(): - print(looper_config_key, looper_config_item) - setattr(args, looper_config_key, looper_config_item) - - except OSError: - if not http_api: - parser.print_help(sys.stderr) - raise ValueError( - f"Looper config file does not exist. Use looper init to create one at {looper_cfg_path}." + global _LOGGER + + _LOGGER = logmuse.logger_via_cli(args, make_root=True) + args_command = [ + attr for attr in [cmd.name for cmd in SUPPORTED_COMMANDS] if hasattr(args, attr) + ] + _LOGGER.info("Looper version: {}\nCommand: {}".format(__version__, args_command)) + + if args.config_file is None: + looper_cfg_path = os.path.relpath(dotfile_path(), start=os.curdir) + try: + if args.looper_config: + looper_config_dict = read_looper_config_file(args.looper_config) + else: + looper_config_dict = read_looper_dotfile() + _LOGGER.info(f"Using looper config ({looper_cfg_path}).") + + for looper_config_key, looper_config_item in looper_config_dict.items(): + setattr(args, looper_config_key, looper_config_item) + + except OSError: + if not http_api: + parser.print_help(sys.stderr) + _LOGGER.warning( + f"Looper config file does not exist. Use looper init to create one at {looper_cfg_path}." + ) + sys.exit(1) + else: + _LOGGER.warning( + "This PEP configures looper through the project config. This approach is deprecated and will " + "be removed in future versions. Please use a looper config file. For more information see " + "looper.databio.org/en/latest/looper-config" ) args = enrich_args_via_cfg(args, parser, False, http_api) + + # If project pipeline interface defined in the cli, change name to: "pipeline_interface" + if vars(args)[PROJECT_PL_ARG]: + args.pipeline_interfaces = vars(args)[PROJECT_PL_ARG] + divcfg = ( select_divvy_config(filepath=args.run.divvy) if hasattr(args.run, "divvy") From 6a53856306c8fee7b18fb7c82f89d21338b16baf Mon Sep 17 00:00:00 2001 From: Zhihan Zhang Date: Mon, 22 Jan 2024 17:30:16 +0800 Subject: [PATCH 039/163] Add README for the API --- looper/api/README.md | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) create mode 100644 looper/api/README.md diff --git a/looper/api/README.md b/looper/api/README.md new file mode 100644 index 000000000..ffcf951e7 --- /dev/null +++ b/looper/api/README.md @@ -0,0 +1,21 @@ +# Looper HTTP API + +## Overview + +This API provides an HTTP interface for running the `looper` commands, allowing users to interact with Looper via HTTP requests. + +## Usage +### Running the API +To run the API, execute the following command: +```bash +cd looper/api +uvicorn main:app --reload +``` +### Example API Usage +To run the `looper run` command through the HTTP API, you can use the following curl command: +```bash +curl -X POST -H "Content-Type: application/json" -d '{"run": {}, "looper_config": ".looper.yaml"}' "http://127.0.0.1:8000" +``` +with the project files in the same `looper/api` folder. + +This example sends a JSON payload with the `run` and `looper_config` parameters to the `/` endpoint. From 6c13bb9133e9b79b3d11ab3885a89f250fcff5b9 Mon Sep 17 00:00:00 2001 From: Zhihan Zhang Date: Tue, 23 Jan 2024 13:15:02 +0800 Subject: [PATCH 040/163] Adjust run_looper to get which subcommand was used --- looper/cli_pydantic.py | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) diff --git a/looper/cli_pydantic.py b/looper/cli_pydantic.py index a2356a20d..28ca25add 100644 --- a/looper/cli_pydantic.py +++ b/looper/cli_pydantic.py @@ -49,10 +49,18 @@ def run_looper( global _LOGGER _LOGGER = logmuse.logger_via_cli(args, make_root=True) - args_command = [ - attr for attr in [cmd.name for cmd in SUPPORTED_COMMANDS] if hasattr(args, attr) + + # Find out which subcommand was used + supported_command_names = [cmd.name for cmd in SUPPORTED_COMMANDS] + subcommand_valued_args = [ + (arg, value) + for arg, value in vars(args).items() + if arg and arg in supported_command_names ] - _LOGGER.info("Looper version: {}\nCommand: {}".format(__version__, args_command)) + # Only one subcommand argument will be not `None`, else we found a bug in `pydantic-argparse` + [(subcommand_name, subcommand_args)] = subcommand_valued_args + + _LOGGER.info("Looper version: {}\nCommand: {}".format(__version__, subcommand_name)) if args.config_file is None: looper_cfg_path = os.path.relpath(dotfile_path(), start=os.curdir) From 7cd099f24689082a25a7c9279130b88dbdf1baaf Mon Sep 17 00:00:00 2001 From: Zhihan Zhang Date: Tue, 23 Jan 2024 17:47:24 +0800 Subject: [PATCH 041/163] Add endpoint "\status" to capture UUID --- looper/api/main.py | 15 ++++++++++++++- looper/cli_pydantic.py | 4 ++++ requirements/requirements-all.txt | 1 + 3 files changed, 19 insertions(+), 1 deletion(-) diff --git a/looper/api/main.py b/looper/api/main.py index 7091b7dfe..9a455feb4 100644 --- a/looper/api/main.py +++ b/looper/api/main.py @@ -9,6 +9,8 @@ app = FastAPI(validate_model=True) +_UUID = None + def create_argparse_namespace(top_level_model: TopLevelParser) -> Namespace: """ @@ -58,6 +60,7 @@ class MainResponse(pydantic.BaseModel): @app.post("/") async def main_endpoint(top_level_model: TopLevelParser) -> MainResponse: + global _UUID argparse_namespace = create_argparse_namespace(top_level_model) stdout_stream = io.StringIO() stderr_stream = io.StringIO() @@ -71,7 +74,17 @@ async def main_endpoint(top_level_model: TopLevelParser) -> MainResponse: # with an error stemming from the `yacman` library about `signal.signal` only # working in the main thread of the main interpreter. We have to investigate # how to solve this. - run_looper(argparse_namespace, None, True) + _, _UUID = run_looper(argparse_namespace, None, True) return MainResponse( stdout=stdout_stream.getvalue(), stderr=stderr_stream.getvalue() ) + + +@app.get("/status") +async def get_status(): + global _UUID + if _UUID: + print(_UUID) + return {"UUID": _UUID} + else: + return {"UUID": "Not found"} diff --git a/looper/cli_pydantic.py b/looper/cli_pydantic.py index 28ca25add..dfeef07de 100644 --- a/looper/cli_pydantic.py +++ b/looper/cli_pydantic.py @@ -14,6 +14,7 @@ import os import sys +import uuid from argparse import Namespace import logmuse @@ -48,6 +49,7 @@ def run_looper( # here comes adapted `cli_looper.py` code global _LOGGER + _UUID = str(uuid.uuid4()) _LOGGER = logmuse.logger_via_cli(args, make_root=True) # Find out which subcommand was used @@ -155,6 +157,8 @@ def run_looper( run = Runner(prj) try: compute_kwargs = _proc_resources_spec(args) + if http_api: + return (run(args, rerun=False, **compute_kwargs), _UUID) return run(args, rerun=False, **compute_kwargs) except SampleFailedException: sys.exit(1) diff --git a/requirements/requirements-all.txt b/requirements/requirements-all.txt index 0c3ae375d..bcd2ca204 100644 --- a/requirements/requirements-all.txt +++ b/requirements/requirements-all.txt @@ -14,3 +14,4 @@ yacman>=0.9.2 pydantic-argparse>=0.8.0 fastapi uvicorn +uuid From 0cf4ecc099fbcf8f0c789e73aea8bd58291d1f60 Mon Sep 17 00:00:00 2001 From: Zhihan Zhang Date: Wed, 24 Jan 2024 18:07:58 +0800 Subject: [PATCH 042/163] Create 2-step job submission and result workflow Co-authored-by: Simeon Carstens --- looper/api/main.py | 68 +++++++++++++++++++++++++----------------- looper/cli_pydantic.py | 4 --- 2 files changed, 40 insertions(+), 32 deletions(-) diff --git a/looper/api/main.py b/looper/api/main.py index 9a455feb4..14fb5bad4 100644 --- a/looper/api/main.py +++ b/looper/api/main.py @@ -1,15 +1,46 @@ import io from argparse import Namespace from contextlib import redirect_stderr, redirect_stdout +from typing import Dict +from uuid import UUID, uuid4 import pydantic from fastapi import FastAPI from looper.cli_pydantic import run_looper from looper.command_models.commands import SUPPORTED_COMMANDS, TopLevelParser + +class Job(pydantic.BaseModel): + id: UUID = pydantic.Field(default_factory=uuid4) + status: str = "in_progress" + progress: int = 0 + stdout: str = None + stderr: str = None + + app = FastAPI(validate_model=True) +jobs: Dict[UUID, Job] = {} + + +def background_async(top_level_model: TopLevelParser, job_id: UUID) -> None: + argparse_namespace = create_argparse_namespace(top_level_model) + stdout_stream = io.StringIO() + stderr_stream = io.StringIO() + with redirect_stderr(stderr_stream), redirect_stdout(stdout_stream): + # TODO: as it stands, because of the `async def`, and the lacking `await` + # in the following line, this endpoint is (I (Simeon) thing) currently blocking. + # We would need to make `run_looper()` return a future, but it inherently does + # not support `async` calls. + # So one option would be to run `run_looper()` in its own thread whose + # termination we can `await`, using `fastapi.run_in_threadpool`. But that fails + # with an error stemming from the `yacman` library about `signal.signal` only + # working in the main thread of the main interpreter. We have to investigate + # how to solve this. + run_looper(argparse_namespace, None, True) -_UUID = None + jobs[job_id].status = "completed" + jobs[job_id].stdout = stdout_stream.getvalue() + jobs[job_id].stderr = stderr_stream.getvalue() def create_argparse_namespace(top_level_model: TopLevelParser) -> Namespace: @@ -59,32 +90,13 @@ class MainResponse(pydantic.BaseModel): @app.post("/") -async def main_endpoint(top_level_model: TopLevelParser) -> MainResponse: - global _UUID - argparse_namespace = create_argparse_namespace(top_level_model) - stdout_stream = io.StringIO() - stderr_stream = io.StringIO() - with redirect_stderr(stderr_stream), redirect_stdout(stdout_stream): - # TODO: as it stands, because of the `async def`, and the lacking `await` - # in the following line, this endpoint is (I (Simeon) thing) currently blocking. - # We would need to make `run_looper()` return a future, but it inherently does - # not support `async` calls. - # So one option would be to run `run_looper()` in its own thread whose - # termination we can `await`, using `fastapi.run_in_threadpool`. But that fails - # with an error stemming from the `yacman` library about `signal.signal` only - # working in the main thread of the main interpreter. We have to investigate - # how to solve this. - _, _UUID = run_looper(argparse_namespace, None, True) - return MainResponse( - stdout=stdout_stream.getvalue(), stderr=stderr_stream.getvalue() - ) +async def main_endpoint(top_level_model: TopLevelParser) -> Dict: + job = Job() + jobs[job.id] = job + background_async(top_level_model, job.id) + return {"job_id": job.id} -@app.get("/status") -async def get_status(): - global _UUID - if _UUID: - print(_UUID) - return {"UUID": _UUID} - else: - return {"UUID": "Not found"} +@app.get("/status/{job_id}") +async def get_status(job_id: UUID): + return jobs[job_id] diff --git a/looper/cli_pydantic.py b/looper/cli_pydantic.py index dfeef07de..28ca25add 100644 --- a/looper/cli_pydantic.py +++ b/looper/cli_pydantic.py @@ -14,7 +14,6 @@ import os import sys -import uuid from argparse import Namespace import logmuse @@ -49,7 +48,6 @@ def run_looper( # here comes adapted `cli_looper.py` code global _LOGGER - _UUID = str(uuid.uuid4()) _LOGGER = logmuse.logger_via_cli(args, make_root=True) # Find out which subcommand was used @@ -157,8 +155,6 @@ def run_looper( run = Runner(prj) try: compute_kwargs = _proc_resources_spec(args) - if http_api: - return (run(args, rerun=False, **compute_kwargs), _UUID) return run(args, rerun=False, **compute_kwargs) except SampleFailedException: sys.exit(1) From e68c5425b99a258a0c88b551ce64a08dd7487d85 Mon Sep 17 00:00:00 2001 From: Simeon Carstens Date: Wed, 24 Jan 2024 15:44:58 +0100 Subject: [PATCH 043/163] Allow `None` stderr / stdout in job model --- looper/api/main.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/looper/api/main.py b/looper/api/main.py index 14fb5bad4..e727f3b9f 100644 --- a/looper/api/main.py +++ b/looper/api/main.py @@ -14,8 +14,8 @@ class Job(pydantic.BaseModel): id: UUID = pydantic.Field(default_factory=uuid4) status: str = "in_progress" progress: int = 0 - stdout: str = None - stderr: str = None + stdout: str | None = None + stderr: str | None = None app = FastAPI(validate_model=True) From d0ab17ee6865b3d27f3dd9cacbce999324ffe408 Mon Sep 17 00:00:00 2001 From: Simeon Carstens Date: Wed, 24 Jan 2024 15:45:33 +0100 Subject: [PATCH 044/163] Run `run_looper()` in FastAPI background task --- looper/api/main.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/looper/api/main.py b/looper/api/main.py index e727f3b9f..c13ffc959 100644 --- a/looper/api/main.py +++ b/looper/api/main.py @@ -3,6 +3,7 @@ from contextlib import redirect_stderr, redirect_stdout from typing import Dict from uuid import UUID, uuid4 +import fastapi import pydantic from fastapi import FastAPI @@ -22,7 +23,7 @@ class Job(pydantic.BaseModel): jobs: Dict[UUID, Job] = {} -def background_async(top_level_model: TopLevelParser, job_id: UUID) -> None: +async def background_async(top_level_model: TopLevelParser, job_id: UUID) -> None: argparse_namespace = create_argparse_namespace(top_level_model) stdout_stream = io.StringIO() stderr_stream = io.StringIO() @@ -90,10 +91,10 @@ class MainResponse(pydantic.BaseModel): @app.post("/") -async def main_endpoint(top_level_model: TopLevelParser) -> Dict: +async def main_endpoint(top_level_model: TopLevelParser, background_tasks: fastapi.BackgroundTasks) -> Dict: job = Job() jobs[job.id] = job - background_async(top_level_model, job.id) + background_tasks.add_task(background_async, top_level_model, job.id) return {"job_id": job.id} From 903c626a85cd8acf161df319f38fe017bca25c33 Mon Sep 17 00:00:00 2001 From: Simeon Carstens Date: Wed, 24 Jan 2024 16:03:51 +0100 Subject: [PATCH 045/163] Document / make self-documenting `Job` fields --- looper/api/main.py | 32 ++++++++++++++------------------ 1 file changed, 14 insertions(+), 18 deletions(-) diff --git a/looper/api/main.py b/looper/api/main.py index c13ffc959..37aaa1b92 100644 --- a/looper/api/main.py +++ b/looper/api/main.py @@ -12,11 +12,21 @@ class Job(pydantic.BaseModel): - id: UUID = pydantic.Field(default_factory=uuid4) - status: str = "in_progress" + id: UUID = pydantic.Field( + default_factory=uuid4, + description="The unique identifier of the job" + ) + status: str = pydantic.Field( + default="in_progress", + description="The current status of the job. Can be either `in_progress` or `completed`." + ) progress: int = 0 - stdout: str | None = None - stderr: str | None = None + stdout: str | None = pydantic.Field(default=None, + description="Standard output produced by `looper` while performing the requested action" + ) + stderr: str | None = pydantic.Field(default=None, + description="Standard error output produced by `looper` while performing the requested action" + ) app = FastAPI(validate_model=True) @@ -76,20 +86,6 @@ def create_argparse_namespace(top_level_model: TopLevelParser) -> Namespace: setattr(namespace, argname, command_namespace) return namespace - -class MainResponse(pydantic.BaseModel): - """ - Response of the main endpoint. - """ - - stdout: str = pydantic.Field( - description="Standard output produced by `looper` while running a command" - ) - stderr: str = pydantic.Field( - description="Standard error output produced by `looper` while running a command" - ) - - @app.post("/") async def main_endpoint(top_level_model: TopLevelParser, background_tasks: fastapi.BackgroundTasks) -> Dict: job = Job() From 1714de13a42679c9277252a7f6b1b7e4cfb6f88a Mon Sep 17 00:00:00 2001 From: Simeon Carstens Date: Wed, 24 Jan 2024 16:08:49 +0100 Subject: [PATCH 046/163] Make `/` route return a 202 (Accepted) HTTP status code --- looper/api/main.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/looper/api/main.py b/looper/api/main.py index 37aaa1b92..7826786c1 100644 --- a/looper/api/main.py +++ b/looper/api/main.py @@ -86,7 +86,7 @@ def create_argparse_namespace(top_level_model: TopLevelParser) -> Namespace: setattr(namespace, argname, command_namespace) return namespace -@app.post("/") +@app.post("/", status_code=202) async def main_endpoint(top_level_model: TopLevelParser, background_tasks: fastapi.BackgroundTasks) -> Dict: job = Job() jobs[job.id] = job From 76ca1aa41bf4b7fe953242958cf0317f548b62fa Mon Sep 17 00:00:00 2001 From: Simeon Carstens Date: Wed, 24 Jan 2024 16:20:13 +0100 Subject: [PATCH 047/163] Replace job UUID with a shorter random string The UUID is very, very long, and such a length / collision safety is not needed. So this uses a much shorter (length 6 characters) random job ID created using `secrets.token_urlsafe()`. Empiric testing says that this gives, for two random IDs, a collision probability of ~1e-4. --- looper/api/main.py | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/looper/api/main.py b/looper/api/main.py index 7826786c1..46da4bbf1 100644 --- a/looper/api/main.py +++ b/looper/api/main.py @@ -1,8 +1,8 @@ import io from argparse import Namespace from contextlib import redirect_stderr, redirect_stdout -from typing import Dict -from uuid import UUID, uuid4 +import secrets +from typing import Dict, TypeAlias import fastapi import pydantic @@ -10,10 +10,11 @@ from looper.cli_pydantic import run_looper from looper.command_models.commands import SUPPORTED_COMMANDS, TopLevelParser +JobId: TypeAlias = str class Job(pydantic.BaseModel): - id: UUID = pydantic.Field( - default_factory=uuid4, + id: JobId = pydantic.Field( + default_factory=lambda: secrets.token_urlsafe(4), description="The unique identifier of the job" ) status: str = pydantic.Field( @@ -28,12 +29,11 @@ class Job(pydantic.BaseModel): description="Standard error output produced by `looper` while performing the requested action" ) - app = FastAPI(validate_model=True) -jobs: Dict[UUID, Job] = {} +jobs: Dict[str, Job] = {} -async def background_async(top_level_model: TopLevelParser, job_id: UUID) -> None: +async def background_async(top_level_model: TopLevelParser, job_id: JobId) -> None: argparse_namespace = create_argparse_namespace(top_level_model) stdout_stream = io.StringIO() stderr_stream = io.StringIO() @@ -95,5 +95,5 @@ async def main_endpoint(top_level_model: TopLevelParser, background_tasks: fasta @app.get("/status/{job_id}") -async def get_status(job_id: UUID): +async def get_status(job_id: JobId): return jobs[job_id] From 3d081ee7f4d1c050c19639e052632107503808cb Mon Sep 17 00:00:00 2001 From: Simeon Carstens Date: Wed, 24 Jan 2024 16:23:53 +0100 Subject: [PATCH 048/163] Reorder imports --- looper/api/main.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/looper/api/main.py b/looper/api/main.py index 46da4bbf1..f74a4b310 100644 --- a/looper/api/main.py +++ b/looper/api/main.py @@ -3,10 +3,11 @@ from contextlib import redirect_stderr, redirect_stdout import secrets from typing import Dict, TypeAlias -import fastapi -import pydantic +import fastapi from fastapi import FastAPI +import pydantic + from looper.cli_pydantic import run_looper from looper.command_models.commands import SUPPORTED_COMMANDS, TopLevelParser From 18b293ce5ddad32add960dbf9f753922cff68a07 Mon Sep 17 00:00:00 2001 From: Simeon Carstens Date: Wed, 24 Jan 2024 17:03:05 +0100 Subject: [PATCH 049/163] Fix a typo --- looper/api/main.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/looper/api/main.py b/looper/api/main.py index f74a4b310..6c8c8d61a 100644 --- a/looper/api/main.py +++ b/looper/api/main.py @@ -40,7 +40,7 @@ async def background_async(top_level_model: TopLevelParser, job_id: JobId) -> No stderr_stream = io.StringIO() with redirect_stderr(stderr_stream), redirect_stdout(stdout_stream): # TODO: as it stands, because of the `async def`, and the lacking `await` - # in the following line, this endpoint is (I (Simeon) thing) currently blocking. + # in the following line, this endpoint is (I (Simeon) think) currently blocking. # We would need to make `run_looper()` return a future, but it inherently does # not support `async` calls. # So one option would be to run `run_looper()` in its own thread whose From 33ce8837295fff9cd46b92fb1519e77a85134ffb Mon Sep 17 00:00:00 2001 From: Zhihan Zhang Date: Fri, 26 Jan 2024 12:44:44 +0800 Subject: [PATCH 050/163] Fix typing to support Python 3.8 --- looper/cli_pydantic.py | 5 +++++ looper/command_models/arguments.py | 8 ++++---- looper/command_models/commands.py | 12 ++++++------ 3 files changed, 15 insertions(+), 10 deletions(-) diff --git a/looper/cli_pydantic.py b/looper/cli_pydantic.py index 42b01a55b..a93965df2 100644 --- a/looper/cli_pydantic.py +++ b/looper/cli_pydantic.py @@ -12,6 +12,11 @@ It is well possible that this script will be removed again. """ +# Note: The following import is used for forward annotations (Python 3.8) +# to prevent potential 'TypeError' related to the use of the '|' operator +# with types. +from __future__ import annotations + import os import sys diff --git a/looper/command_models/arguments.py b/looper/command_models/arguments.py index 658ca051c..09a813747 100644 --- a/looper/command_models/arguments.py +++ b/looper/command_models/arguments.py @@ -5,7 +5,7 @@ import enum from copy import copy import os -from typing import Any +from typing import Any, List import pydantic @@ -141,16 +141,16 @@ class ArgumentEnum(enum.Enum): ) SAMPLE_PIPELINE_INTERFACES = Argument( name="sample_pipeline_interfaces", - default=(list, []), + default=(List, []), description="Paths to looper sample config files", ) PROJECT_PIPELINE_INTERFACES = Argument( name="project_pipeline_interfaces", - default=(list, []), + default=(List, []), description="Paths to looper project config files", ) AMEND = Argument( - name="amend", default=(list, []), description="List of amendments to activate" + name="amend", default=(List, []), description="List of amendments to activate" ) SEL_FLAG = Argument( name="sel_flag", default=(str, ""), description="Sample selection flag" diff --git a/looper/command_models/commands.py b/looper/command_models/commands.py index 3cb040aa2..be2330d18 100644 --- a/looper/command_models/commands.py +++ b/looper/command_models/commands.py @@ -3,7 +3,7 @@ """ from dataclasses import dataclass -from typing import Optional +from typing import List, Optional, Type import pydantic @@ -23,9 +23,9 @@ class Command: name: str description: str - arguments: list[Argument] + arguments: List[Argument] - def create_model(self) -> type[pydantic.BaseModel]: + def create_model(self) -> Type[pydantic.BaseModel]: """ Creates a `pydantic` model for this command """ @@ -81,11 +81,11 @@ class TopLevelParser(pydantic.BaseModel): str ] = ArgumentEnum.LOOPER_CONFIG.value.with_reduced_default() sample_pipeline_interfaces: Optional[ - list[str] + List[str] ] = ArgumentEnum.SAMPLE_PIPELINE_INTERFACES.value.with_reduced_default() project_pipeline_interfaces: Optional[ - list[str] + List[str] ] = ArgumentEnum.PROJECT_PIPELINE_INTERFACES.value.with_reduced_default() - amend: Optional[list[str]] = ArgumentEnum.AMEND.value.with_reduced_default() + amend: Optional[List[str]] = ArgumentEnum.AMEND.value.with_reduced_default() sel_flag: Optional[str] = ArgumentEnum.SEL_FLAG.value.with_reduced_default() exc_flag: Optional[str] = ArgumentEnum.EXC_FLAG.value.with_reduced_default() From 0b7f237e02436b5ea3ff9eb49a47f4d462bb25eb Mon Sep 17 00:00:00 2001 From: Simeon Carstens Date: Sat, 27 Jan 2024 12:16:28 +0100 Subject: [PATCH 051/163] Remove `uuid` dependency --- requirements/requirements-all.txt | 1 - 1 file changed, 1 deletion(-) diff --git a/requirements/requirements-all.txt b/requirements/requirements-all.txt index bcd2ca204..0c3ae375d 100644 --- a/requirements/requirements-all.txt +++ b/requirements/requirements-all.txt @@ -14,4 +14,3 @@ yacman>=0.9.2 pydantic-argparse>=0.8.0 fastapi uvicorn -uuid From 0cf000e961559762e828f0b301fea56c07f0c182 Mon Sep 17 00:00:00 2001 From: Simeon Carstens Date: Sat, 27 Jan 2024 12:21:23 +0100 Subject: [PATCH 052/163] Change `yacman` dependency to a hacked, but threadable version This is a temporary hack: use a Yacman branch that makes Yacman's YAMLConfigmanager not capture SIGTERM / SIGKILL (and thus Ctrl+c events). In the near future, this will be replaced by a new version of Yacman that supports a read-only mode. --- requirements/requirements-all.txt | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/requirements/requirements-all.txt b/requirements/requirements-all.txt index 0c3ae375d..ac89c71d1 100644 --- a/requirements/requirements-all.txt +++ b/requirements/requirements-all.txt @@ -10,7 +10,11 @@ pipestat>=0.6.0 pyyaml>=3.12 rich>=9.10.0 ubiquerg>=0.5.2 -yacman>=0.9.2 +# This is a temporary hack: use a Yacman branch that makes Yacman's +# YAMLConfigmanager not capture SIGTERM / SIGKILL (and thus Ctrl+c events). +# In the near future, this will be replaced by a new version of Yacman that +# supports a read-only mode. +git+https://github.com/databio/yacman.git@tweag/thread-unsafe pydantic-argparse>=0.8.0 fastapi uvicorn From 429e665e57e9d9c4efb145bb581480ebae074d53 Mon Sep 17 00:00:00 2001 From: Simeon Carstens Date: Sat, 27 Jan 2024 12:23:59 +0100 Subject: [PATCH 053/163] Add lower bound for FastAPI version --- requirements/requirements-all.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/requirements/requirements-all.txt b/requirements/requirements-all.txt index ac89c71d1..e5fb45b40 100644 --- a/requirements/requirements-all.txt +++ b/requirements/requirements-all.txt @@ -16,5 +16,5 @@ ubiquerg>=0.5.2 # supports a read-only mode. git+https://github.com/databio/yacman.git@tweag/thread-unsafe pydantic-argparse>=0.8.0 -fastapi uvicorn +fastapi>=0.109.0 From b5e723ff830c0ed7194e35d8395d2ac5ae34766f Mon Sep 17 00:00:00 2001 From: Simeon Carstens Date: Sat, 27 Jan 2024 12:24:15 +0100 Subject: [PATCH 054/163] Add lower bound for uvicorn version --- requirements/requirements-all.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/requirements/requirements-all.txt b/requirements/requirements-all.txt index e5fb45b40..06adde4f2 100644 --- a/requirements/requirements-all.txt +++ b/requirements/requirements-all.txt @@ -16,5 +16,5 @@ ubiquerg>=0.5.2 # supports a read-only mode. git+https://github.com/databio/yacman.git@tweag/thread-unsafe pydantic-argparse>=0.8.0 -uvicorn fastapi>=0.109.0 +uvicorn>=0.26.0 From f92be03d3beb7c238e3acc1ac4bdf676a17f80cb Mon Sep 17 00:00:00 2001 From: Simeon Carstens Date: Sat, 27 Jan 2024 12:27:46 +0100 Subject: [PATCH 055/163] Make background task function non-`async` This allows, together with the hacked, threadable (but _not_ thread-safe) `yacman` version, to run `looper` commands in a non-blocking way. --- looper/api/main.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/looper/api/main.py b/looper/api/main.py index 6c8c8d61a..e3ab9a82b 100644 --- a/looper/api/main.py +++ b/looper/api/main.py @@ -34,7 +34,7 @@ class Job(pydantic.BaseModel): jobs: Dict[str, Job] = {} -async def background_async(top_level_model: TopLevelParser, job_id: JobId) -> None: +def background_async(top_level_model: TopLevelParser, job_id: JobId) -> None: argparse_namespace = create_argparse_namespace(top_level_model) stdout_stream = io.StringIO() stderr_stream = io.StringIO() From 693e56e6454efa5bd1198ec4bf12c8a69b4af699 Mon Sep 17 00:00:00 2001 From: Simeon Carstens Date: Mon, 29 Jan 2024 10:29:26 +0100 Subject: [PATCH 056/163] Add logging setup Co-authored-by: Zhihan Zhang --- looper/cli_pydantic.py | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/looper/cli_pydantic.py b/looper/cli_pydantic.py index a93965df2..336ba12d5 100644 --- a/looper/cli_pydantic.py +++ b/looper/cli_pydantic.py @@ -20,6 +20,7 @@ import os import sys +import logmuse import pydantic_argparse import yaml from pephubclient import PEPHubClient @@ -77,6 +78,13 @@ def main() -> None: parser.print_help(sys.stderr) raise ValueError( f"Looper config file does not exist. Use looper init to create one at {looper_cfg_path}." + global _LOGGER + + _LOGGER = logmuse.logger_via_cli(args, make_root=True) + args_command = [ + attr for attr in [cmd.name for cmd in SUPPORTED_COMMANDS] if hasattr(args, attr) + ] + _LOGGER.info("Looper version: {}\nCommand: {}".format(__version__, args_command)) ) args = enrich_args_via_cfg(args, parser, False) From efc70aa53da00556d9e63593cfdef2c6fe9313a3 Mon Sep 17 00:00:00 2001 From: Simeon Carstens Date: Mon, 29 Jan 2024 10:31:19 +0100 Subject: [PATCH 057/163] Make CLI script accept `looper-config` argument Co-authored-by: Zhihan Zhang --- looper/cli_pydantic.py | 78 ++++++++++++++++++++++++++---------------- 1 file changed, 49 insertions(+), 29 deletions(-) diff --git a/looper/cli_pydantic.py b/looper/cli_pydantic.py index 336ba12d5..945ddd919 100644 --- a/looper/cli_pydantic.py +++ b/looper/cli_pydantic.py @@ -24,6 +24,7 @@ import pydantic_argparse import yaml from pephubclient import PEPHubClient +from pydantic_argparse.argparse.parser import ArgumentParser from divvy import select_divvy_config @@ -40,20 +41,16 @@ dotfile_path, enrich_args_via_cfg, is_registry_path, + read_looper_config_file, read_looper_dotfile, ) -def main() -> None: - parser = pydantic_argparse.ArgumentParser( - model=TopLevelParser, - prog="looper", - description="pydantic-argparse demo", - add_help=True, - ) - args = parser.parse_typed_args() - print(args) - print("#########################################") +def run_looper(args: TopLevelParser, parser: ArgumentParser): + # here comes adapted `cli_looper.py` code + global _LOGGER + + _LOGGER = logmuse.logger_via_cli(args, make_root=True) # Find out which subcommand was used supported_command_names = [cmd.name for cmd in SUPPORTED_COMMANDS] @@ -65,29 +62,39 @@ def main() -> None: # Only one subcommand argument will be not `None`, else we found a bug in `pydantic-argparse` [(subcommand_name, subcommand_args)] = subcommand_valued_args - # here comes adapted `cli_looper.py` code - looper_cfg_path = os.path.relpath(dotfile_path(), start=os.curdir) - try: - looper_config_dict = read_looper_dotfile() - - for looper_config_key, looper_config_item in looper_config_dict.items(): - print(looper_config_key, looper_config_item) - setattr(args, looper_config_key, looper_config_item) - - except OSError: - parser.print_help(sys.stderr) - raise ValueError( - f"Looper config file does not exist. Use looper init to create one at {looper_cfg_path}." - global _LOGGER + _LOGGER.info("Looper version: {}\nCommand: {}".format(__version__, subcommand_name)) - _LOGGER = logmuse.logger_via_cli(args, make_root=True) - args_command = [ - attr for attr in [cmd.name for cmd in SUPPORTED_COMMANDS] if hasattr(args, attr) - ] - _LOGGER.info("Looper version: {}\nCommand: {}".format(__version__, args_command)) + if args.config_file is None: + looper_cfg_path = os.path.relpath(dotfile_path(), start=os.curdir) + try: + if args.looper_config: + looper_config_dict = read_looper_config_file(args.looper_config) + else: + looper_config_dict = read_looper_dotfile() + _LOGGER.info(f"Using looper config ({looper_cfg_path}).") + + for looper_config_key, looper_config_item in looper_config_dict.items(): + setattr(args, looper_config_key, looper_config_item) + + except OSError: + parser.print_help(sys.stderr) + _LOGGER.warning( + f"Looper config file does not exist. Use looper init to create one at {looper_cfg_path}." + ) + sys.exit(1) + else: + _LOGGER.warning( + "This PEP configures looper through the project config. This approach is deprecated and will " + "be removed in future versions. Please use a looper config file. For more information see " + "looper.databio.org/en/latest/looper-config" ) args = enrich_args_via_cfg(args, parser, False) + + # If project pipeline interface defined in the cli, change name to: "pipeline_interface" + if vars(args)[PROJECT_PL_ARG]: + args.pipeline_interfaces = vars(args)[PROJECT_PL_ARG] + divcfg = ( select_divvy_config(filepath=subcommand_args.divvy) if hasattr(subcommand_args, "divvy") @@ -161,5 +168,18 @@ def main() -> None: raise +def main() -> None: + parser = pydantic_argparse.ArgumentParser( + model=TopLevelParser, + prog="looper", + description="pydantic-argparse demo", + add_help=True, + ) + args = parser.parse_typed_args() + print(args) + print("#########################################") + run_looper(args, parser) + + if __name__ == "__main__": main() From d71763b58eab70fc185260b8cf1ea95531d1e72f Mon Sep 17 00:00:00 2001 From: Zhihan Zhang Date: Mon, 22 Jan 2024 17:16:27 +0800 Subject: [PATCH 058/163] Add arguments for logging These arguments are compatible with logmuse generated parser --- looper/command_models/arguments.py | 18 ++++++++++++++++++ looper/command_models/commands.py | 5 +++++ 2 files changed, 23 insertions(+) diff --git a/looper/command_models/arguments.py b/looper/command_models/arguments.py index 09a813747..2b639e622 100644 --- a/looper/command_models/arguments.py +++ b/looper/command_models/arguments.py @@ -166,3 +166,21 @@ class ArgumentEnum(enum.Enum): "variable. Currently: {}".format(os.getenv("DIVCFG") or "not set") ), ) + + # Arguments for logger compatible with logmuse + SILENT = Argument( + name="silent", default=(bool, False), description="Whether to silence logging" + ) + VERBOSITY = Argument( + name="verbosity", + default=(int, None), + description="Alternate mode of expression for logging level that better " + "accords with intuition about how to convey this.", + ) + LOGDEV = Argument( + name="logdev", + default=(bool, False), + description="Whether to log in development mode; possibly among other " + "behavioral changes to logs handling, use a more information-rich " + "message format template.", + ) diff --git a/looper/command_models/commands.py b/looper/command_models/commands.py index be2330d18..5b437997a 100644 --- a/looper/command_models/commands.py +++ b/looper/command_models/commands.py @@ -89,3 +89,8 @@ class TopLevelParser(pydantic.BaseModel): amend: Optional[List[str]] = ArgumentEnum.AMEND.value.with_reduced_default() sel_flag: Optional[str] = ArgumentEnum.SEL_FLAG.value.with_reduced_default() exc_flag: Optional[str] = ArgumentEnum.EXC_FLAG.value.with_reduced_default() + + # arguments for logging + silent: Optional[bool] = ArgumentEnum.SILENT.value.with_reduced_default() + verbosity: Optional[int] = ArgumentEnum.VERBOSITY.value.with_reduced_default() + logdev: Optional[bool] = ArgumentEnum.LOGDEV.value.with_reduced_default() From 07fea992cefcbdc4f1b81a3ea1877550d0af9739 Mon Sep 17 00:00:00 2001 From: Simeon Carstens Date: Fri, 26 Jan 2024 13:40:26 +0100 Subject: [PATCH 059/163] Add `SKIP_FILE_CHECKS`, `PACKAGE` and `COMPUTE` arguments to list --- looper/command_models/arguments.py | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/looper/command_models/arguments.py b/looper/command_models/arguments.py index 2b639e622..ad2b0198c 100644 --- a/looper/command_models/arguments.py +++ b/looper/command_models/arguments.py @@ -158,6 +158,21 @@ class ArgumentEnum(enum.Enum): EXC_FLAG = Argument( name="exc_flag", default=(str, ""), description="Sample exclusion flag" ) + SKIP_FILE_CHECKS = Argument( + name="skip_file_checks", + default=(bool, False), + description="Do not perform input file checks" + ) + PACKAGE = Argument( + name="package", + default=(str, None), + description="Name of computing resource package to use" + ) + COMPUTE = Argument( + name="compute", + default=(List, []), + description="List of key-value pairs (k1=v1)" + ) DIVVY = Argument( name="divvy", default=(str, os.getenv("DIVCFG", None)), From 65e068943c91e4b03d33c4beb06e1dabaf5c2ccc Mon Sep 17 00:00:00 2001 From: Simeon Carstens Date: Fri, 26 Jan 2024 13:44:16 +0100 Subject: [PATCH 060/163] Add `SKIP_FILE_CHECKS`, `PACKAGE` and `COMPUTE` arguments to `run` --- looper/command_models/commands.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/looper/command_models/commands.py b/looper/command_models/commands.py index 5b437997a..b1900f4e9 100644 --- a/looper/command_models/commands.py +++ b/looper/command_models/commands.py @@ -55,6 +55,9 @@ def create_model(self) -> Type[pydantic.BaseModel]: ArgumentEnum.LIMIT.value, ArgumentEnum.SKIP.value, ArgumentEnum.DIVVY.value, + ArgumentEnum.SKIP_FILE_CHECKS.value, + ArgumentEnum.COMPUTE.value, + ArgumentEnum.PACKAGE.value ], ) RunParserModel = RunParser.create_model() From e45425aa52619b11ba7f5105f2f8338c76cc6b63 Mon Sep 17 00:00:00 2001 From: Zhihan Zhang Date: Fri, 26 Jan 2024 15:34:20 +0800 Subject: [PATCH 061/163] Add `pipestat` argument to CLI The description is temporarily a placeholder here and we may have a more precise one afterwards. --- looper/command_models/arguments.py | 8 ++++++-- looper/command_models/commands.py | 2 +- 2 files changed, 7 insertions(+), 3 deletions(-) diff --git a/looper/command_models/arguments.py b/looper/command_models/arguments.py index ad2b0198c..b23c3a6c0 100644 --- a/looper/command_models/arguments.py +++ b/looper/command_models/arguments.py @@ -3,8 +3,8 @@ """ import enum -from copy import copy import os +from copy import copy from typing import Any, List import pydantic @@ -181,7 +181,6 @@ class ArgumentEnum(enum.Enum): "variable. Currently: {}".format(os.getenv("DIVCFG") or "not set") ), ) - # Arguments for logger compatible with logmuse SILENT = Argument( name="silent", default=(bool, False), description="Whether to silence logging" @@ -199,3 +198,8 @@ class ArgumentEnum(enum.Enum): "behavioral changes to logs handling, use a more information-rich " "message format template.", ) + PIPESTAT = Argument( + name="pipestat", + default=(str, None), + description="Path to pipestat files.", + ) diff --git a/looper/command_models/commands.py b/looper/command_models/commands.py index b1900f4e9..8ceab93a7 100644 --- a/looper/command_models/commands.py +++ b/looper/command_models/commands.py @@ -92,8 +92,8 @@ class TopLevelParser(pydantic.BaseModel): amend: Optional[List[str]] = ArgumentEnum.AMEND.value.with_reduced_default() sel_flag: Optional[str] = ArgumentEnum.SEL_FLAG.value.with_reduced_default() exc_flag: Optional[str] = ArgumentEnum.EXC_FLAG.value.with_reduced_default() - # arguments for logging silent: Optional[bool] = ArgumentEnum.SILENT.value.with_reduced_default() verbosity: Optional[int] = ArgumentEnum.VERBOSITY.value.with_reduced_default() logdev: Optional[bool] = ArgumentEnum.LOGDEV.value.with_reduced_default() + pipestat: Optional[str] = ArgumentEnum.PIPESTAT.value.with_reduced_default() From 907acc612473a69769c6eea70637dd63ad453895 Mon Sep 17 00:00:00 2001 From: Simeon Carstens Date: Mon, 29 Jan 2024 13:17:10 +0100 Subject: [PATCH 062/163] [DELETE ME] hack to use local `yacman` copy --- requirements/requirements-all.txt | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/requirements/requirements-all.txt b/requirements/requirements-all.txt index 06adde4f2..abc49ab9a 100644 --- a/requirements/requirements-all.txt +++ b/requirements/requirements-all.txt @@ -14,7 +14,8 @@ ubiquerg>=0.5.2 # YAMLConfigmanager not capture SIGTERM / SIGKILL (and thus Ctrl+c events). # In the near future, this will be replaced by a new version of Yacman that # supports a read-only mode. -git+https://github.com/databio/yacman.git@tweag/thread-unsafe +# git+https://github.com/databio/yacman.git@tweag/thread-unsafe +../yacman/ pydantic-argparse>=0.8.0 fastapi>=0.109.0 uvicorn>=0.26.0 From 44616701f1ca4810f0e08c27c810b386d67963ba Mon Sep 17 00:00:00 2001 From: Simeon Carstens Date: Mon, 29 Jan 2024 18:03:10 +0100 Subject: [PATCH 063/163] Don't call `logmuse.init_logger()` in `looper.__init__.py` Everything will probably still work - after all, `logmuse.logger_via_cli()` is called in `cli_looper.py` / `cli_pydantic.py` which also sets up a logger. --- looper/__init__.py | 4 ---- 1 file changed, 4 deletions(-) diff --git a/looper/__init__.py b/looper/__init__.py index fe751d02d..b0931009e 100644 --- a/looper/__init__.py +++ b/looper/__init__.py @@ -7,10 +7,6 @@ """ -import logmuse - -logmuse.init_logger("looper") - from .divvy import ComputingConfiguration, select_divvy_config from .divvy import DEFAULT_COMPUTE_RESOURCES_NAME from .divvy import NEW_COMPUTE_KEY as COMPUTE_KEY From 643779c0bc2d3860adb28c128587c4c877936573 Mon Sep 17 00:00:00 2001 From: Simeon Carstens Date: Mon, 29 Jan 2024 18:07:16 +0100 Subject: [PATCH 064/163] Explicitly initialize `logmuse` logger with `sys.stderr` as stream --- looper/cli_pydantic.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/looper/cli_pydantic.py b/looper/cli_pydantic.py index 28ca25add..0a2e95811 100644 --- a/looper/cli_pydantic.py +++ b/looper/cli_pydantic.py @@ -48,7 +48,7 @@ def run_looper( # here comes adapted `cli_looper.py` code global _LOGGER - _LOGGER = logmuse.logger_via_cli(args, make_root=True) + _LOGGER = logmuse.logger_via_cli(args, make_root=True, stream=sys.stderr) # Find out which subcommand was used supported_command_names = [cmd.name for cmd in SUPPORTED_COMMANDS] From a6762fa915c7458fea9c4670b063226451088404 Mon Sep 17 00:00:00 2001 From: Zhihan Zhang Date: Tue, 30 Jan 2024 16:12:54 +0800 Subject: [PATCH 065/163] Apply formatter --- looper/api/main.py | 21 ++++++++++++--------- 1 file changed, 12 insertions(+), 9 deletions(-) diff --git a/looper/api/main.py b/looper/api/main.py index e3ab9a82b..3474ac8ca 100644 --- a/looper/api/main.py +++ b/looper/api/main.py @@ -1,35 +1,38 @@ import io +import secrets from argparse import Namespace from contextlib import redirect_stderr, redirect_stdout -import secrets from typing import Dict, TypeAlias import fastapi -from fastapi import FastAPI import pydantic - +from fastapi import FastAPI from looper.cli_pydantic import run_looper from looper.command_models.commands import SUPPORTED_COMMANDS, TopLevelParser JobId: TypeAlias = str + class Job(pydantic.BaseModel): id: JobId = pydantic.Field( default_factory=lambda: secrets.token_urlsafe(4), - description="The unique identifier of the job" + description="The unique identifier of the job", ) status: str = pydantic.Field( default="in_progress", - description="The current status of the job. Can be either `in_progress` or `completed`." + description="The current status of the job. Can be either `in_progress` or `completed`.", ) progress: int = 0 - stdout: str | None = pydantic.Field(default=None, - description="Standard output produced by `looper` while performing the requested action" + stdout: str | None = pydantic.Field( + default=None, + description="Standard output produced by `looper` while performing the requested action", ) - stderr: str | None = pydantic.Field(default=None, - description="Standard error output produced by `looper` while performing the requested action" + stderr: str | None = pydantic.Field( + default=None, + description="Standard error output produced by `looper` while performing the requested action", ) + app = FastAPI(validate_model=True) jobs: Dict[str, Job] = {} From dcb0ce2cd953a3018fc9fa07c1551c5527fe9747 Mon Sep 17 00:00:00 2001 From: Zhihan Zhang Date: Tue, 30 Jan 2024 16:25:41 +0800 Subject: [PATCH 066/163] Add documentation for `POST` and `GET` requests --- looper/api/main.py | 20 +++++++++++++++++--- 1 file changed, 17 insertions(+), 3 deletions(-) diff --git a/looper/api/main.py b/looper/api/main.py index 3474ac8ca..7a73e3ced 100644 --- a/looper/api/main.py +++ b/looper/api/main.py @@ -90,14 +90,28 @@ def create_argparse_namespace(top_level_model: TopLevelParser) -> Namespace: setattr(namespace, argname, command_namespace) return namespace -@app.post("/", status_code=202) -async def main_endpoint(top_level_model: TopLevelParser, background_tasks: fastapi.BackgroundTasks) -> Dict: + +@app.post( + "/", + status_code=202, + summary="Run Looper in Background", + description="Create a new job, process data with the specified " + "`top_level_model`, and initiate a background asynchronous task to run " + "looper.", +) +async def main_endpoint( + top_level_model: TopLevelParser, background_tasks: fastapi.BackgroundTasks +) -> Dict: job = Job() jobs[job.id] = job background_tasks.add_task(background_async, top_level_model, job.id) return {"job_id": job.id} -@app.get("/status/{job_id}") +@app.get( + "/status/{job_id}", + summary="Get Job Status", + description="Retrieve the status of a job based on its unique identifier.", +) async def get_status(job_id: JobId): return jobs[job_id] From dacbb03fdadcc3d8aa35e92a01de99e2a8e7929c Mon Sep 17 00:00:00 2001 From: Zhihan Zhang Date: Wed, 31 Jan 2024 11:53:55 +0800 Subject: [PATCH 067/163] Add defaultdict as an import The import of defaultdict was removed somehow on this branch, but it is used in the code (see line 56 and line 680). --- looper/utils.py | 1 + 1 file changed, 1 insertion(+) diff --git a/looper/utils.py b/looper/utils.py index 86054fa86..982acd72a 100644 --- a/looper/utils.py +++ b/looper/utils.py @@ -6,6 +6,7 @@ import os import re import sys +from collections import defaultdict from logging import getLogger from typing import * From fef6b4bc1b92ec4256137a74c8c36b714012e3b5 Mon Sep 17 00:00:00 2001 From: Zhihan Zhang Date: Wed, 31 Jan 2024 16:52:13 +0800 Subject: [PATCH 068/163] Modify argument namespace when the command is `run` Otherwise `looper run` will always has args.run.time_delay not found issues --- looper/cli_looper.py | 39 +++++++++++++++++++++++++++++++++++++++ 1 file changed, 39 insertions(+) diff --git a/looper/cli_looper.py b/looper/cli_looper.py index 82cb7997f..db936acfc 100644 --- a/looper/cli_looper.py +++ b/looper/cli_looper.py @@ -573,6 +573,44 @@ def _proc_resources_spec(args): return settings_data +RUN_ARGS = [ + "ignore_flags", + "time_delay", + "dry_run", + "command_extra", + "command_extra_override", + "lump", + "lumpn", + "limit", + "skip", + "skip_file_checks", +] + + +def modify_args_namespace(args): + """ + Modify the argument namespace based on the specified conditions. + + If the command in the given arguments is 'run', this function creates a sub-namespace + named 'run' and moves selected arguments specified in RUN_ARGS to the 'run' namespace. + The selected arguments are also removed from the original namespace. + + :param argparse.Namespace: The argparse namespace containing program arguments. + :return argparse.Namespace: The modified argparse namespace. + """ + if args.command == "run": + command_namespace = argparse.Namespace() + for arg in vars(args): + if arg in RUN_ARGS: + setattr(command_namespace, arg, getattr(args, arg)) + + for arg in RUN_ARGS: + if hasattr(args, arg): + delattr(args, arg) + + args.run = command_namespace + return args + def main(test_args=None): """Primary workflow""" global _LOGGER @@ -585,6 +623,7 @@ def main(test_args=None): else: args, remaining_args = parser.parse_known_args() + args = modify_args_namespace(args) cli_use_errors = validate_post_parse(args) if cli_use_errors: parser.print_help(sys.stderr) From df0eb81313969757ac620b831aee3535f64db684 Mon Sep 17 00:00:00 2001 From: Zhihan Zhang Date: Wed, 31 Jan 2024 17:58:22 +0800 Subject: [PATCH 069/163] Refactor `modify_args_namespace` Co-authored-by: Simeon Carstens --- looper/cli_looper.py | 29 +++++++++++------------------ 1 file changed, 11 insertions(+), 18 deletions(-) diff --git a/looper/cli_looper.py b/looper/cli_looper.py index db936acfc..be2e38cb9 100644 --- a/looper/cli_looper.py +++ b/looper/cli_looper.py @@ -10,6 +10,7 @@ from ubiquerg import VersionInHelpParser from . import __version__ +from .command_models.commands import RunParser from .const import * from .divvy import DEFAULT_COMPUTE_RESOURCES_NAME, select_divvy_config from .exceptions import * @@ -573,20 +574,6 @@ def _proc_resources_spec(args): return settings_data -RUN_ARGS = [ - "ignore_flags", - "time_delay", - "dry_run", - "command_extra", - "command_extra_override", - "lump", - "lumpn", - "limit", - "skip", - "skip_file_checks", -] - - def modify_args_namespace(args): """ Modify the argument namespace based on the specified conditions. @@ -598,17 +585,23 @@ def modify_args_namespace(args): :param argparse.Namespace: The argparse namespace containing program arguments. :return argparse.Namespace: The modified argparse namespace. """ - if args.command == "run": + + def add_command_hierarchy(command_args): command_namespace = argparse.Namespace() for arg in vars(args): - if arg in RUN_ARGS: + if arg in command_args: setattr(command_namespace, arg, getattr(args, arg)) - for arg in RUN_ARGS: + for arg in command_args: if hasattr(args, arg): delattr(args, arg) - args.run = command_namespace + setattr(args, args.command, command_namespace) + + if args.command == "run": + run_args = [argument.name for argument in RunParser.arguments] + add_command_hierarchy(run_args) + return args def main(test_args=None): From 247a2043fdc7e389d8293c3a6cd01133daaf903d Mon Sep 17 00:00:00 2001 From: Zhihan Zhang Date: Wed, 31 Jan 2024 17:59:35 +0800 Subject: [PATCH 070/163] Adapt `divvy` argument retrieval to run command Co-authored-by: Simeon Carstens --- looper/cli_looper.py | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) diff --git a/looper/cli_looper.py b/looper/cli_looper.py index be2e38cb9..c84476155 100644 --- a/looper/cli_looper.py +++ b/looper/cli_looper.py @@ -684,9 +684,16 @@ def main(test_args=None): ) ) - divcfg = ( - select_divvy_config(filepath=args.divvy) if hasattr(args, "divvy") else None - ) + if args.command == "run": + divcfg = ( + select_divvy_config(filepath=args.run.divvy) + if hasattr(args.run, "divvy") + else None + ) + else: + divcfg = ( + select_divvy_config(filepath=args.divvy) if hasattr(args, "divvy") else None + ) # Ignore flags if user is selecting or excluding on flags: if args.sel_flag or args.exc_flag: From 6d6fac7317d4f2adf35c6c80ea1890972bebb35d Mon Sep 17 00:00:00 2001 From: Simeon Carstens Date: Wed, 31 Jan 2024 17:36:36 +0100 Subject: [PATCH 071/163] Fix `run` command model options --- looper/command_models/commands.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/looper/command_models/commands.py b/looper/command_models/commands.py index 8ceab93a7..01a9adcfe 100644 --- a/looper/command_models/commands.py +++ b/looper/command_models/commands.py @@ -52,12 +52,11 @@ def create_model(self) -> Type[pydantic.BaseModel]: ArgumentEnum.COMMAND_EXTRA_OVERRIDE.value, ArgumentEnum.LUMP.value, ArgumentEnum.LUMPN.value, - ArgumentEnum.LIMIT.value, - ArgumentEnum.SKIP.value, ArgumentEnum.DIVVY.value, ArgumentEnum.SKIP_FILE_CHECKS.value, ArgumentEnum.COMPUTE.value, - ArgumentEnum.PACKAGE.value + ArgumentEnum.PACKAGE.value, + ArgumentEnum.SETTINGS.value ], ) RunParserModel = RunParser.create_model() From 6bd9ffbf07b446e0f26db2e74c14588726918a31 Mon Sep 17 00:00:00 2001 From: Simeon Carstens Date: Wed, 31 Jan 2024 17:37:58 +0100 Subject: [PATCH 072/163] Take `run` special case into account in `_proc_resources_spec` --- looper/cli_looper.py | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/looper/cli_looper.py b/looper/cli_looper.py index c84476155..d5f4ba986 100644 --- a/looper/cli_looper.py +++ b/looper/cli_looper.py @@ -546,13 +546,18 @@ def _proc_resources_spec(args): :raise ValueError: if interpretation of the given specification as encoding of key-value pairs fails """ - spec = getattr(args, "compute", None) + if args.command in ("run",): + spec = getattr(args.run, "compute", None) + settings = args.run.settings + else: + spec = getattr(args, "compute", None) + settings = args.settings try: - settings_data = read_yaml_file(args.settings) or {} + settings_data = read_yaml_file(settings) or {} except yaml.YAMLError: _LOGGER.warning( "Settings file ({}) does not follow YAML format," - " disregarding".format(args.settings) + " disregarding".format(settings) ) settings_data = {} if not spec: From 9e0cc35c74f05037a10dfe9753c444b3925f3638 Mon Sep 17 00:00:00 2001 From: Simeon Carstens Date: Wed, 31 Jan 2024 17:38:54 +0100 Subject: [PATCH 073/163] Pass top-level namespace to `select_samples` --- looper/looper.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/looper/looper.py b/looper/looper.py index 38d17d02d..e8259215c 100755 --- a/looper/looper.py +++ b/looper/looper.py @@ -414,7 +414,7 @@ def __call__(self, args, rerun=False, **compute_kwargs): self.prj.pipestat_configured_project or self.prj.pipestat_configured ) - for sample in select_samples(prj=self.prj, args=args.run): + for sample in select_samples(prj=self.prj, args=args): pl_fails = [] skip_reasons = [] sample_pifaces = self.prj.get_sample_piface( From 2f8e7a6d147fdce717e16b0d7474c0f9bc61e7e1 Mon Sep 17 00:00:00 2001 From: Simeon Carstens Date: Wed, 31 Jan 2024 17:39:17 +0100 Subject: [PATCH 074/163] Take into account `run` special case in `validate_post_parse` --- looper/cli_looper.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/looper/cli_looper.py b/looper/cli_looper.py index d5f4ba986..ffc22a7a7 100644 --- a/looper/cli_looper.py +++ b/looper/cli_looper.py @@ -523,7 +523,10 @@ def validate_post_parse(args: argparse.Namespace) -> List[str]: SAMPLE_INCLUSION_OPTNAME, ], ) - if getattr(args, attr, None) + # Depending on the subcommand used, the above options might either be in + # the top-level namespace or in the subcommand namespace (the latter due + # to a `modify_args_namespace()`) + if getattr(args, attr, None)# or (getattr(args.run, attr, None) if hasattr(args, "run") else False) ] if len(used_exclusives) > 1: problems.append( @@ -677,7 +680,6 @@ def main(test_args=None): ) args = enrich_args_via_cfg(args, aux_parser, test_args) - # If project pipeline interface defined in the cli, change name to: "pipeline_interface" if vars(args)[PROJECT_PL_ARG]: args.pipeline_interfaces = vars(args)[PROJECT_PL_ARG] From bf35224fa443addfc9536d1f73a55b0325c44085 Mon Sep 17 00:00:00 2001 From: Simeon Carstens Date: Wed, 31 Jan 2024 23:17:09 +0100 Subject: [PATCH 075/163] Take into account `run` special case when passing project CLI attrs --- looper/cli_looper.py | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/looper/cli_looper.py b/looper/cli_looper.py index ffc22a7a7..32176b979 100644 --- a/looper/cli_looper.py +++ b/looper/cli_looper.py @@ -726,14 +726,19 @@ def main(test_args=None): ) else: try: + project_args = { + attr: getattr(args, attr) for attr in CLI_PROJ_ATTRS if attr in args + } + if args.command == "run": + project_args.update(**{ + attr: getattr(args.run, attr) for attr in CLI_PROJ_ATTRS if attr in args.run + }) p = Project( cfg=args.config_file, amendments=args.amend, divcfg_path=divcfg, runp=args.command == "runp", - **{ - attr: getattr(args, attr) for attr in CLI_PROJ_ATTRS if attr in args - }, + **project_args, ) except yaml.parser.ParserError as e: _LOGGER.error(f"Project config parse failed -- {e}") From 6e4e9d40e5e75078b88aca47d3898e103aadc8ad Mon Sep 17 00:00:00 2001 From: Zhihan Zhang Date: Thu, 18 Jan 2024 17:17:19 +0800 Subject: [PATCH 076/163] HTTP API settings --- looper/api/__init__.py | 0 looper/api/main.py | 10 ++++++++++ requirements/requirements-all.txt | 3 +++ 3 files changed, 13 insertions(+) create mode 100644 looper/api/__init__.py create mode 100644 looper/api/main.py diff --git a/looper/api/__init__.py b/looper/api/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/looper/api/main.py b/looper/api/main.py new file mode 100644 index 000000000..24715665f --- /dev/null +++ b/looper/api/main.py @@ -0,0 +1,10 @@ +from fastapi import FastAPI +from looper.command_models.commands import RunParserModel + +app = FastAPI(validate_model=True) + + +@app.post("/run") +async def run_endpoint(run_model: RunParserModel): + print(run_model) + return run_model diff --git a/requirements/requirements-all.txt b/requirements/requirements-all.txt index 71c0df877..0e793d423 100644 --- a/requirements/requirements-all.txt +++ b/requirements/requirements-all.txt @@ -12,3 +12,6 @@ rich>=9.10.0 ubiquerg>=0.5.2 yacman>=0.9.2 pydantic-argparse>=0.8.0 +pydantic-argparse==0.8.0 +fastapi +uvicorn From eab51271039a0cfcd8887b4eaef630483880fbe0 Mon Sep 17 00:00:00 2001 From: Zhihan Zhang Date: Fri, 19 Jan 2024 12:13:35 +0800 Subject: [PATCH 077/163] Create an argparse.Namespace --- looper/api/main.py | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/looper/api/main.py b/looper/api/main.py index 24715665f..f46f97af8 100644 --- a/looper/api/main.py +++ b/looper/api/main.py @@ -1,10 +1,21 @@ +from argparse import Namespace + from fastapi import FastAPI from looper.command_models.commands import RunParserModel app = FastAPI(validate_model=True) +def create_argparse_namespace(run_model: RunParserModel) -> Namespace: + # Create an argparse namespace from the submitted run model + namespace = Namespace() + for arg in vars(run_model): + setattr(namespace, arg, getattr(run_model, arg)) + return namespace + + @app.post("/run") async def run_endpoint(run_model: RunParserModel): - print(run_model) + argparse_namespace = create_argparse_namespace(run_model) + print(argparse_namespace) return run_model From 72087ee35bc44c85e0ec09fb3072b5c2037281c7 Mon Sep 17 00:00:00 2001 From: Zhihan Zhang Date: Fri, 19 Jan 2024 15:35:51 +0800 Subject: [PATCH 078/163] Add run function from cli_pydantic --- looper/api/main.py | 145 ++++++++++++++++++++++++++++++++++++++++++--- 1 file changed, 136 insertions(+), 9 deletions(-) diff --git a/looper/api/main.py b/looper/api/main.py index f46f97af8..69e99ef1d 100644 --- a/looper/api/main.py +++ b/looper/api/main.py @@ -1,21 +1,148 @@ +import os +import sys from argparse import Namespace +import yaml +from divvy import select_divvy_config from fastapi import FastAPI -from looper.command_models.commands import RunParserModel +from looper.cli_looper import _proc_resources_spec +from looper.command_models.commands import ( # RunParserModel, + SUPPORTED_COMMANDS, + TopLevelParser, +) +from looper.const import * +from looper.divvy import DEFAULT_COMPUTE_RESOURCES_NAME, select_divvy_config +from looper.exceptions import * +from looper.looper import * +from looper.parser_types import * +from looper.project import Project, ProjectContext +from looper.utils import ( + dotfile_path, + enrich_args_via_cfg, + is_registry_path, + read_looper_dotfile, +) +from pephubclient import PEPHubClient app = FastAPI(validate_model=True) -def create_argparse_namespace(run_model: RunParserModel) -> Namespace: - # Create an argparse namespace from the submitted run model +def create_argparse_namespace(top_level_model: TopLevelParser) -> Namespace: + # Create an argparse namespace from the submitted top level model namespace = Namespace() - for arg in vars(run_model): - setattr(namespace, arg, getattr(run_model, arg)) + for arg in vars(top_level_model): + if arg not in [cmd.name for cmd in SUPPORTED_COMMANDS]: + setattr(namespace, arg, getattr(top_level_model, arg)) + else: + command_namespace = Namespace() + command_namespace_args = getattr(top_level_model, arg) + for argname in vars(command_namespace_args): + setattr( + command_namespace, + argname, + getattr(command_namespace_args, argname), + ) + setattr(namespace, arg, command_namespace) return namespace +def run_cmd(args: Namespace): + # here comes adapted `cli_looper.py` code + looper_cfg_path = os.path.relpath(dotfile_path(), start=os.curdir) + try: + looper_config_dict = read_looper_dotfile() + + for looper_config_key, looper_config_item in looper_config_dict.items(): + print(looper_config_key, looper_config_item) + setattr(args, looper_config_key, looper_config_item) + + except OSError: + # parser.print_help(sys.stderr) + raise ValueError( + f"Looper config file does not exist. Use looper init to create one at {looper_cfg_path}." + ) + + print("#####################################") + print(args) + + # args = enrich_args_via_cfg(args, parser, False) + divcfg = ( + select_divvy_config(filepath=args.run.divvy) + if hasattr(args.run, "divvy") + else None + ) + # Ignore flags if user is selecting or excluding on flags: + if args.sel_flag or args.exc_flag: + args.ignore_flags = True + + # Initialize project + if is_registry_path(args.config_file): + if vars(args)[SAMPLE_PL_ARG]: + p = Project( + amendments=args.amend, + divcfg_path=divcfg, + runp=args.command == "runp", + project_dict=PEPHubClient()._load_raw_pep( + registry_path=args.config_file + ), + **{ + attr: getattr(args, attr) for attr in CLI_PROJ_ATTRS if attr in args + }, + ) + else: + raise MisconfigurationException( + f"`sample_pipeline_interface` is missing. Provide it in the parameters." + ) + else: + try: + p = Project( + cfg=args.config_file, + amendments=args.amend, + divcfg_path=divcfg, + runp=False, + **{ + attr: getattr(args, attr) for attr in CLI_PROJ_ATTRS if attr in args + }, + ) + except yaml.parser.ParserError as e: + _LOGGER.error(f"Project config parse failed -- {e}") + sys.exit(1) + + selected_compute_pkg = p.selected_compute_package or DEFAULT_COMPUTE_RESOURCES_NAME + if p.dcc is not None and not p.dcc.activate_package(selected_compute_pkg): + _LOGGER.info( + "Failed to activate '{}' computing package. " + "Using the default one".format(selected_compute_pkg) + ) + + with ProjectContext( + prj=p, + selector_attribute="toggle", + selector_include=None, + selector_exclude=None, + selector_flag=None, + exclusion_flag=None, + ) as prj: + command = "run" + if command == "run": + run = Runner(prj) + try: + compute_kwargs = _proc_resources_spec(args) + return run(args, rerun=False, **compute_kwargs) + except SampleFailedException: + sys.exit(1) + except IOError: + _LOGGER.error( + "{} pipeline_interfaces: '{}'".format( + prj.__class__.__name__, prj.pipeline_interface_sources + ) + ) + raise + + @app.post("/run") -async def run_endpoint(run_model: RunParserModel): - argparse_namespace = create_argparse_namespace(run_model) - print(argparse_namespace) - return run_model +async def run_endpoint(top_level_model: TopLevelParser): + print(top_level_model) + argparse_namespace = create_argparse_namespace(top_level_model) + run_cmd(argparse_namespace) + return top_level_model From 1734c80e7af977c881de8cd09922b87640a61256 Mon Sep 17 00:00:00 2001 From: Zhihan Zhang Date: Fri, 19 Jan 2024 15:50:31 +0800 Subject: [PATCH 079/163] Adjust enrich_args_via_cfg to http api --- looper/utils.py | 16 +++++++++++----- 1 file changed, 11 insertions(+), 5 deletions(-) diff --git a/looper/utils.py b/looper/utils.py index 531ea01a6..e5f602005 100644 --- a/looper/utils.py +++ b/looper/utils.py @@ -18,8 +18,8 @@ from pephubclient.constants import RegistryPath from pydantic.error_wrappers import ValidationError -from .const import * from .command_models.commands import SUPPORTED_COMMANDS +from .const import * from .exceptions import MisconfigurationException, RegistryPathException _LOGGER = getLogger(__name__) @@ -251,7 +251,7 @@ def read_yaml_file(filepath): return data -def enrich_args_via_cfg(parser_args, aux_parser, test_args=None): +def enrich_args_via_cfg(parser_args, aux_parser, test_args=None, http_api=False): """ Read in a looper dotfile and set arguments. @@ -268,11 +268,17 @@ def enrich_args_via_cfg(parser_args, aux_parser, test_args=None): else dict() ) result = argparse.Namespace() - if test_args: - cli_args, _ = aux_parser.parse_known_args(args=test_args) + if not http_api: + if test_args: + cli_args, _ = aux_parser.parse_known_args(args=test_args) + else: + cli_args, _ = aux_parser.parse_known_args() else: - cli_args, _ = aux_parser.parse_known_args() + if aux_parser: + cli_args, _ = aux_parser.parse_known_args() + else: + cli_args = [] def set_single_arg(argname, default_source_namespace, result_namespace): if argname not in POSITIONAL or not hasattr(result, argname): From e0e3a6f06c80abd1946ca47a8d7ddc3a5b598016 Mon Sep 17 00:00:00 2001 From: Zhihan Zhang Date: Fri, 19 Jan 2024 15:52:33 +0800 Subject: [PATCH 080/163] Run adjusted enrich_args_via_cfg in http api --- looper/api/main.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/looper/api/main.py b/looper/api/main.py index 69e99ef1d..8540c2088 100644 --- a/looper/api/main.py +++ b/looper/api/main.py @@ -65,7 +65,7 @@ def run_cmd(args: Namespace): print("#####################################") print(args) - # args = enrich_args_via_cfg(args, parser, False) + args = enrich_args_via_cfg(args, None, False, True) divcfg = ( select_divvy_config(filepath=args.run.divvy) if hasattr(args.run, "divvy") From 67182ddcf7c8c73f305c9804ba8283dabe5d7876 Mon Sep 17 00:00:00 2001 From: Zhihan Zhang Date: Fri, 19 Jan 2024 16:20:42 +0800 Subject: [PATCH 081/163] Re-organize cli_pydantic.py to run looper run via CLI and http-api --- looper/api/main.py | 134 +++++------------------------------------ looper/cli_pydantic.py | 21 ++++--- 2 files changed, 29 insertions(+), 126 deletions(-) diff --git a/looper/api/main.py b/looper/api/main.py index 8540c2088..65e16cbd4 100644 --- a/looper/api/main.py +++ b/looper/api/main.py @@ -1,34 +1,25 @@ -import os -import sys from argparse import Namespace -import yaml -from divvy import select_divvy_config from fastapi import FastAPI -from looper.cli_looper import _proc_resources_spec -from looper.command_models.commands import ( # RunParserModel, - SUPPORTED_COMMANDS, - TopLevelParser, -) -from looper.const import * -from looper.divvy import DEFAULT_COMPUTE_RESOURCES_NAME, select_divvy_config -from looper.exceptions import * -from looper.looper import * -from looper.parser_types import * -from looper.project import Project, ProjectContext -from looper.utils import ( - dotfile_path, - enrich_args_via_cfg, - is_registry_path, - read_looper_dotfile, -) -from pephubclient import PEPHubClient +from looper.cli_pydantic import run_looper +from looper.command_models.commands import SUPPORTED_COMMANDS, TopLevelParser app = FastAPI(validate_model=True) def create_argparse_namespace(top_level_model: TopLevelParser) -> Namespace: - # Create an argparse namespace from the submitted top level model + """ + Converts a TopLevelParser instance into an argparse.Namespace object. + + This function takes a TopLevelParser instance, and converts it into an + argparse.Namespace object. It includes handling for supported commands + specified in SUPPORTED_COMMANDS. + + :param TopLevelParser top_level_model: An instance of the TopLevelParser + model + :return argparse.Namespace: An argparse.Namespace object representing + the parsed command-line arguments. + """ namespace = Namespace() for arg in vars(top_level_model): if arg not in [cmd.name for cmd in SUPPORTED_COMMANDS]: @@ -46,103 +37,8 @@ def create_argparse_namespace(top_level_model: TopLevelParser) -> Namespace: return namespace -def run_cmd(args: Namespace): - # here comes adapted `cli_looper.py` code - looper_cfg_path = os.path.relpath(dotfile_path(), start=os.curdir) - try: - looper_config_dict = read_looper_dotfile() - - for looper_config_key, looper_config_item in looper_config_dict.items(): - print(looper_config_key, looper_config_item) - setattr(args, looper_config_key, looper_config_item) - - except OSError: - # parser.print_help(sys.stderr) - raise ValueError( - f"Looper config file does not exist. Use looper init to create one at {looper_cfg_path}." - ) - - print("#####################################") - print(args) - - args = enrich_args_via_cfg(args, None, False, True) - divcfg = ( - select_divvy_config(filepath=args.run.divvy) - if hasattr(args.run, "divvy") - else None - ) - # Ignore flags if user is selecting or excluding on flags: - if args.sel_flag or args.exc_flag: - args.ignore_flags = True - - # Initialize project - if is_registry_path(args.config_file): - if vars(args)[SAMPLE_PL_ARG]: - p = Project( - amendments=args.amend, - divcfg_path=divcfg, - runp=args.command == "runp", - project_dict=PEPHubClient()._load_raw_pep( - registry_path=args.config_file - ), - **{ - attr: getattr(args, attr) for attr in CLI_PROJ_ATTRS if attr in args - }, - ) - else: - raise MisconfigurationException( - f"`sample_pipeline_interface` is missing. Provide it in the parameters." - ) - else: - try: - p = Project( - cfg=args.config_file, - amendments=args.amend, - divcfg_path=divcfg, - runp=False, - **{ - attr: getattr(args, attr) for attr in CLI_PROJ_ATTRS if attr in args - }, - ) - except yaml.parser.ParserError as e: - _LOGGER.error(f"Project config parse failed -- {e}") - sys.exit(1) - - selected_compute_pkg = p.selected_compute_package or DEFAULT_COMPUTE_RESOURCES_NAME - if p.dcc is not None and not p.dcc.activate_package(selected_compute_pkg): - _LOGGER.info( - "Failed to activate '{}' computing package. " - "Using the default one".format(selected_compute_pkg) - ) - - with ProjectContext( - prj=p, - selector_attribute="toggle", - selector_include=None, - selector_exclude=None, - selector_flag=None, - exclusion_flag=None, - ) as prj: - command = "run" - if command == "run": - run = Runner(prj) - try: - compute_kwargs = _proc_resources_spec(args) - return run(args, rerun=False, **compute_kwargs) - except SampleFailedException: - sys.exit(1) - except IOError: - _LOGGER.error( - "{} pipeline_interfaces: '{}'".format( - prj.__class__.__name__, prj.pipeline_interface_sources - ) - ) - raise - - @app.post("/run") async def run_endpoint(top_level_model: TopLevelParser): - print(top_level_model) argparse_namespace = create_argparse_namespace(top_level_model) - run_cmd(argparse_namespace) + run_looper(argparse_namespace, None, True) return top_level_model diff --git a/looper/cli_pydantic.py b/looper/cli_pydantic.py index 831788383..e14be9b45 100644 --- a/looper/cli_pydantic.py +++ b/looper/cli_pydantic.py @@ -19,6 +19,7 @@ import os import sys +from argparse import Namespace import logmuse import pydantic_argparse @@ -46,7 +47,7 @@ ) -def run_looper(args: TopLevelParser, parser: ArgumentParser): +def run_looper(args: Namespace | TopLevelParser, parser: ArgumentParser, http_api=False): # here comes adapted `cli_looper.py` code global _LOGGER @@ -77,11 +78,11 @@ def run_looper(args: TopLevelParser, parser: ArgumentParser): setattr(args, looper_config_key, looper_config_item) except OSError: - parser.print_help(sys.stderr) - _LOGGER.warning( + if not http_api: + parser.print_help(sys.stderr) + raise ValueError( f"Looper config file does not exist. Use looper init to create one at {looper_cfg_path}." ) - sys.exit(1) else: _LOGGER.warning( "This PEP configures looper through the project config. This approach is deprecated and will " @@ -89,7 +90,7 @@ def run_looper(args: TopLevelParser, parser: ArgumentParser): "looper.databio.org/en/latest/looper-config" ) - args = enrich_args_via_cfg(args, parser, False) + args = enrich_args_via_cfg(args, parser, False, http_api) # If project pipeline interface defined in the cli, change name to: "pipeline_interface" if vars(args)[PROJECT_PL_ARG]: @@ -97,9 +98,10 @@ def run_looper(args: TopLevelParser, parser: ArgumentParser): divcfg = ( select_divvy_config(filepath=subcommand_args.divvy) - if hasattr(subcommand_args, "divvy") - else None + if hasattr(subcommand_args, "divvy") else None ) + args = enrich_args_via_cfg(args, parser, False, http_api) + # Ignore flags if user is selecting or excluding on flags: if args.sel_flag or args.exc_flag: args.ignore_flags = True @@ -176,6 +178,11 @@ def main() -> None: add_help=True, ) args = parser.parse_typed_args() +<<<<<<< HEAD +======= + print(args) + print("#########################################") +>>>>>>> 0141fb3 (Re-organize cli_pydantic.py to run looper run via CLI and http-api) run_looper(args, parser) From 634665416e9d400b00774b37f800ccefdb014c71 Mon Sep 17 00:00:00 2001 From: Simeon Carstens Date: Fri, 19 Jan 2024 14:27:29 +0100 Subject: [PATCH 082/163] Slight refactor of `create_argparse_namespace` --- looper/api/main.py | 17 +++++++++-------- 1 file changed, 9 insertions(+), 8 deletions(-) diff --git a/looper/api/main.py b/looper/api/main.py index 65e16cbd4..9ec0418f8 100644 --- a/looper/api/main.py +++ b/looper/api/main.py @@ -21,19 +21,20 @@ def create_argparse_namespace(top_level_model: TopLevelParser) -> Namespace: the parsed command-line arguments. """ namespace = Namespace() - for arg in vars(top_level_model): - if arg not in [cmd.name for cmd in SUPPORTED_COMMANDS]: - setattr(namespace, arg, getattr(top_level_model, arg)) + + for argname, value in vars(top_level_model).items(): + if argname not in [cmd.name for cmd in SUPPORTED_COMMANDS]: + setattr(namespace, argname, value) else: command_namespace = Namespace() - command_namespace_args = getattr(top_level_model, arg) - for argname in vars(command_namespace_args): + command_namespace_args = value + for command_argname, command_arg_value in vars(command_namespace_args).items(): setattr( command_namespace, - argname, - getattr(command_namespace_args, argname), + command_argname, + command_arg_value, ) - setattr(namespace, arg, command_namespace) + setattr(namespace, argname, command_namespace) return namespace From e1f730841ff8427b5d1c4732ba543ee2f5e06c07 Mon Sep 17 00:00:00 2001 From: Simeon Carstens Date: Fri, 19 Jan 2024 14:47:16 +0100 Subject: [PATCH 083/163] Remove `run` from route That's because this endpoint will support _all_ commands, and not only `run`. --- looper/api/main.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/looper/api/main.py b/looper/api/main.py index 9ec0418f8..a6addb2fb 100644 --- a/looper/api/main.py +++ b/looper/api/main.py @@ -38,7 +38,7 @@ def create_argparse_namespace(top_level_model: TopLevelParser) -> Namespace: return namespace -@app.post("/run") +@app.post("/") async def run_endpoint(top_level_model: TopLevelParser): argparse_namespace = create_argparse_namespace(top_level_model) run_looper(argparse_namespace, None, True) From dd978c88d0b7257e58bf4d7806e82a0fd152d93f Mon Sep 17 00:00:00 2001 From: Simeon Carstens Date: Fri, 19 Jan 2024 14:56:31 +0100 Subject: [PATCH 084/163] Capture stderr / stdout and return in HTTP response --- looper/api/main.py | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/looper/api/main.py b/looper/api/main.py index a6addb2fb..7655c4b65 100644 --- a/looper/api/main.py +++ b/looper/api/main.py @@ -1,4 +1,6 @@ from argparse import Namespace +from contextlib import redirect_stderr, redirect_stdout +import io from fastapi import FastAPI from looper.cli_pydantic import run_looper @@ -41,5 +43,11 @@ def create_argparse_namespace(top_level_model: TopLevelParser) -> Namespace: @app.post("/") async def run_endpoint(top_level_model: TopLevelParser): argparse_namespace = create_argparse_namespace(top_level_model) - run_looper(argparse_namespace, None, True) - return top_level_model + stdout_stream = io.StringIO() + stderr_stream = io.StringIO() + with redirect_stderr(stderr_stream), redirect_stdout(stdout_stream): + run_looper(argparse_namespace, None, True) + return { + "stdout": stdout_stream.getvalue(), + "stderr": stderr_stream.getvalue() + } From e010f75c6e29994fa92927c5cc2aab657c7ae04b Mon Sep 17 00:00:00 2001 From: Simeon Carstens Date: Fri, 19 Jan 2024 15:05:23 +0100 Subject: [PATCH 085/163] Rename `run_endpoint` -> `main_endpoint` --- looper/api/main.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/looper/api/main.py b/looper/api/main.py index 7655c4b65..338d15ef3 100644 --- a/looper/api/main.py +++ b/looper/api/main.py @@ -41,7 +41,7 @@ def create_argparse_namespace(top_level_model: TopLevelParser) -> Namespace: @app.post("/") -async def run_endpoint(top_level_model: TopLevelParser): +async def main_endpoint(top_level_model: TopLevelParser): argparse_namespace = create_argparse_namespace(top_level_model) stdout_stream = io.StringIO() stderr_stream = io.StringIO() From 8af2bb2b76a6850508a344b0e5433a49accfdff8 Mon Sep 17 00:00:00 2001 From: Simeon Carstens Date: Fri, 19 Jan 2024 15:13:34 +0100 Subject: [PATCH 086/163] Add response model --- looper/api/main.py | 18 +++++++++++++----- 1 file changed, 13 insertions(+), 5 deletions(-) diff --git a/looper/api/main.py b/looper/api/main.py index 338d15ef3..2143e9358 100644 --- a/looper/api/main.py +++ b/looper/api/main.py @@ -5,6 +5,7 @@ from fastapi import FastAPI from looper.cli_pydantic import run_looper from looper.command_models.commands import SUPPORTED_COMMANDS, TopLevelParser +import pydantic app = FastAPI(validate_model=True) @@ -39,15 +40,22 @@ def create_argparse_namespace(top_level_model: TopLevelParser) -> Namespace: setattr(namespace, argname, command_namespace) return namespace +class MainResponse(pydantic.BaseModel): + """ + Response of the main endpoint. + """ + stdout: str = pydantic.Field(description="Standard output produced by `looper` while running a command") + stderr: str = pydantic.Field(description="Standard error output produced by `looper` while running a command") + @app.post("/") -async def main_endpoint(top_level_model: TopLevelParser): +async def main_endpoint(top_level_model: TopLevelParser) -> MainResponse: argparse_namespace = create_argparse_namespace(top_level_model) stdout_stream = io.StringIO() stderr_stream = io.StringIO() with redirect_stderr(stderr_stream), redirect_stdout(stdout_stream): run_looper(argparse_namespace, None, True) - return { - "stdout": stdout_stream.getvalue(), - "stderr": stderr_stream.getvalue() - } + return MainResponse( + stdout=stdout_stream.getvalue(), + stderr=stderr_stream.getvalue() + ) From a89e7bc3eb57f641fd435addf9cf69ba2a2d61ca Mon Sep 17 00:00:00 2001 From: Simeon Carstens Date: Fri, 19 Jan 2024 22:51:06 +0100 Subject: [PATCH 087/163] Add a comment about the endpoint likely being blocking --- looper/api/main.py | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/looper/api/main.py b/looper/api/main.py index 2143e9358..9b0109c9b 100644 --- a/looper/api/main.py +++ b/looper/api/main.py @@ -54,6 +54,15 @@ async def main_endpoint(top_level_model: TopLevelParser) -> MainResponse: stdout_stream = io.StringIO() stderr_stream = io.StringIO() with redirect_stderr(stderr_stream), redirect_stdout(stdout_stream): + # TODO: as it stands, because of the `async def`, and the lacking `await` + # in the following line, this endpoint is (I (Simeon) thing) currently blocking. + # We would need to make `run_looper()` return a future, but it inherently does + # not support `async` calls. + # So one option would be to run `run_looper()` in its own thread whose + # termination we can `await`, using `fastapi.run_in_threadpool`. But that fails + # with an error stemming from the `yacman` library about `signal.signal` only + # working in the main thread of the main interpreter. We have to investigate + # how to solve this. run_looper(argparse_namespace, None, True) return MainResponse( stdout=stdout_stream.getvalue(), From 1880372a7973de9afcdd35ac6c1bb3c911ae7b89 Mon Sep 17 00:00:00 2001 From: Zhihan Zhang Date: Mon, 22 Jan 2024 15:57:29 +0800 Subject: [PATCH 088/163] Apply formatter --- looper/api/main.py | 21 ++++++++++++++------- 1 file changed, 14 insertions(+), 7 deletions(-) diff --git a/looper/api/main.py b/looper/api/main.py index 9b0109c9b..7091b7dfe 100644 --- a/looper/api/main.py +++ b/looper/api/main.py @@ -1,11 +1,11 @@ +import io from argparse import Namespace from contextlib import redirect_stderr, redirect_stdout -import io +import pydantic from fastapi import FastAPI from looper.cli_pydantic import run_looper from looper.command_models.commands import SUPPORTED_COMMANDS, TopLevelParser -import pydantic app = FastAPI(validate_model=True) @@ -31,7 +31,9 @@ def create_argparse_namespace(top_level_model: TopLevelParser) -> Namespace: else: command_namespace = Namespace() command_namespace_args = value - for command_argname, command_arg_value in vars(command_namespace_args).items(): + for command_argname, command_arg_value in vars( + command_namespace_args + ).items(): setattr( command_namespace, command_argname, @@ -40,12 +42,18 @@ def create_argparse_namespace(top_level_model: TopLevelParser) -> Namespace: setattr(namespace, argname, command_namespace) return namespace + class MainResponse(pydantic.BaseModel): """ Response of the main endpoint. """ - stdout: str = pydantic.Field(description="Standard output produced by `looper` while running a command") - stderr: str = pydantic.Field(description="Standard error output produced by `looper` while running a command") + + stdout: str = pydantic.Field( + description="Standard output produced by `looper` while running a command" + ) + stderr: str = pydantic.Field( + description="Standard error output produced by `looper` while running a command" + ) @app.post("/") @@ -65,6 +73,5 @@ async def main_endpoint(top_level_model: TopLevelParser) -> MainResponse: # how to solve this. run_looper(argparse_namespace, None, True) return MainResponse( - stdout=stdout_stream.getvalue(), - stderr=stderr_stream.getvalue() + stdout=stdout_stream.getvalue(), stderr=stderr_stream.getvalue() ) From 42119f00a7591d869bd502a74e6728350cfcbefa Mon Sep 17 00:00:00 2001 From: Zhihan Zhang Date: Mon, 22 Jan 2024 17:17:21 +0800 Subject: [PATCH 089/163] Add logger def to be captured by API and also CLI --- looper/cli_pydantic.py | 5 ----- 1 file changed, 5 deletions(-) diff --git a/looper/cli_pydantic.py b/looper/cli_pydantic.py index e14be9b45..c768db8d0 100644 --- a/looper/cli_pydantic.py +++ b/looper/cli_pydantic.py @@ -178,11 +178,6 @@ def main() -> None: add_help=True, ) args = parser.parse_typed_args() -<<<<<<< HEAD -======= - print(args) - print("#########################################") ->>>>>>> 0141fb3 (Re-organize cli_pydantic.py to run looper run via CLI and http-api) run_looper(args, parser) From f0c749db6ae0da89dca2f06fdc82601f5f584695 Mon Sep 17 00:00:00 2001 From: Zhihan Zhang Date: Mon, 22 Jan 2024 17:30:16 +0800 Subject: [PATCH 090/163] Add README for the API --- looper/api/README.md | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) create mode 100644 looper/api/README.md diff --git a/looper/api/README.md b/looper/api/README.md new file mode 100644 index 000000000..ffcf951e7 --- /dev/null +++ b/looper/api/README.md @@ -0,0 +1,21 @@ +# Looper HTTP API + +## Overview + +This API provides an HTTP interface for running the `looper` commands, allowing users to interact with Looper via HTTP requests. + +## Usage +### Running the API +To run the API, execute the following command: +```bash +cd looper/api +uvicorn main:app --reload +``` +### Example API Usage +To run the `looper run` command through the HTTP API, you can use the following curl command: +```bash +curl -X POST -H "Content-Type: application/json" -d '{"run": {}, "looper_config": ".looper.yaml"}' "http://127.0.0.1:8000" +``` +with the project files in the same `looper/api` folder. + +This example sends a JSON payload with the `run` and `looper_config` parameters to the `/` endpoint. From 6d146b5da3bb736f6db6e17d8fa146466b5ad883 Mon Sep 17 00:00:00 2001 From: Zhihan Zhang Date: Tue, 23 Jan 2024 17:47:24 +0800 Subject: [PATCH 091/163] Add endpoint "\status" to capture UUID --- looper/api/main.py | 15 ++++++++++++++- looper/cli_pydantic.py | 4 ++++ requirements/requirements-all.txt | 1 + 3 files changed, 19 insertions(+), 1 deletion(-) diff --git a/looper/api/main.py b/looper/api/main.py index 7091b7dfe..9a455feb4 100644 --- a/looper/api/main.py +++ b/looper/api/main.py @@ -9,6 +9,8 @@ app = FastAPI(validate_model=True) +_UUID = None + def create_argparse_namespace(top_level_model: TopLevelParser) -> Namespace: """ @@ -58,6 +60,7 @@ class MainResponse(pydantic.BaseModel): @app.post("/") async def main_endpoint(top_level_model: TopLevelParser) -> MainResponse: + global _UUID argparse_namespace = create_argparse_namespace(top_level_model) stdout_stream = io.StringIO() stderr_stream = io.StringIO() @@ -71,7 +74,17 @@ async def main_endpoint(top_level_model: TopLevelParser) -> MainResponse: # with an error stemming from the `yacman` library about `signal.signal` only # working in the main thread of the main interpreter. We have to investigate # how to solve this. - run_looper(argparse_namespace, None, True) + _, _UUID = run_looper(argparse_namespace, None, True) return MainResponse( stdout=stdout_stream.getvalue(), stderr=stderr_stream.getvalue() ) + + +@app.get("/status") +async def get_status(): + global _UUID + if _UUID: + print(_UUID) + return {"UUID": _UUID} + else: + return {"UUID": "Not found"} diff --git a/looper/cli_pydantic.py b/looper/cli_pydantic.py index c768db8d0..8514cce85 100644 --- a/looper/cli_pydantic.py +++ b/looper/cli_pydantic.py @@ -19,6 +19,7 @@ import os import sys +import uuid from argparse import Namespace import logmuse @@ -51,6 +52,7 @@ def run_looper(args: Namespace | TopLevelParser, parser: ArgumentParser, http_ap # here comes adapted `cli_looper.py` code global _LOGGER + _UUID = str(uuid.uuid4()) _LOGGER = logmuse.logger_via_cli(args, make_root=True) # Find out which subcommand was used @@ -158,6 +160,8 @@ def run_looper(args: Namespace | TopLevelParser, parser: ArgumentParser, http_ap run = Runner(prj) try: compute_kwargs = _proc_resources_spec(args) + if http_api: + return (run(args, rerun=False, **compute_kwargs), _UUID) return run(args, rerun=False, **compute_kwargs) except SampleFailedException: sys.exit(1) diff --git a/requirements/requirements-all.txt b/requirements/requirements-all.txt index 0e793d423..dac9400aa 100644 --- a/requirements/requirements-all.txt +++ b/requirements/requirements-all.txt @@ -15,3 +15,4 @@ pydantic-argparse>=0.8.0 pydantic-argparse==0.8.0 fastapi uvicorn +uuid From 59869fa2d5d3a207daff62a38ada5e7d4a8335ae Mon Sep 17 00:00:00 2001 From: Zhihan Zhang Date: Wed, 24 Jan 2024 18:07:58 +0800 Subject: [PATCH 092/163] Create 2-step job submission and result workflow Co-authored-by: Simeon Carstens --- looper/api/main.py | 68 +++++++++++++++++++++++++----------------- looper/cli_pydantic.py | 4 --- 2 files changed, 40 insertions(+), 32 deletions(-) diff --git a/looper/api/main.py b/looper/api/main.py index 9a455feb4..14fb5bad4 100644 --- a/looper/api/main.py +++ b/looper/api/main.py @@ -1,15 +1,46 @@ import io from argparse import Namespace from contextlib import redirect_stderr, redirect_stdout +from typing import Dict +from uuid import UUID, uuid4 import pydantic from fastapi import FastAPI from looper.cli_pydantic import run_looper from looper.command_models.commands import SUPPORTED_COMMANDS, TopLevelParser + +class Job(pydantic.BaseModel): + id: UUID = pydantic.Field(default_factory=uuid4) + status: str = "in_progress" + progress: int = 0 + stdout: str = None + stderr: str = None + + app = FastAPI(validate_model=True) +jobs: Dict[UUID, Job] = {} + + +def background_async(top_level_model: TopLevelParser, job_id: UUID) -> None: + argparse_namespace = create_argparse_namespace(top_level_model) + stdout_stream = io.StringIO() + stderr_stream = io.StringIO() + with redirect_stderr(stderr_stream), redirect_stdout(stdout_stream): + # TODO: as it stands, because of the `async def`, and the lacking `await` + # in the following line, this endpoint is (I (Simeon) thing) currently blocking. + # We would need to make `run_looper()` return a future, but it inherently does + # not support `async` calls. + # So one option would be to run `run_looper()` in its own thread whose + # termination we can `await`, using `fastapi.run_in_threadpool`. But that fails + # with an error stemming from the `yacman` library about `signal.signal` only + # working in the main thread of the main interpreter. We have to investigate + # how to solve this. + run_looper(argparse_namespace, None, True) -_UUID = None + jobs[job_id].status = "completed" + jobs[job_id].stdout = stdout_stream.getvalue() + jobs[job_id].stderr = stderr_stream.getvalue() def create_argparse_namespace(top_level_model: TopLevelParser) -> Namespace: @@ -59,32 +90,13 @@ class MainResponse(pydantic.BaseModel): @app.post("/") -async def main_endpoint(top_level_model: TopLevelParser) -> MainResponse: - global _UUID - argparse_namespace = create_argparse_namespace(top_level_model) - stdout_stream = io.StringIO() - stderr_stream = io.StringIO() - with redirect_stderr(stderr_stream), redirect_stdout(stdout_stream): - # TODO: as it stands, because of the `async def`, and the lacking `await` - # in the following line, this endpoint is (I (Simeon) thing) currently blocking. - # We would need to make `run_looper()` return a future, but it inherently does - # not support `async` calls. - # So one option would be to run `run_looper()` in its own thread whose - # termination we can `await`, using `fastapi.run_in_threadpool`. But that fails - # with an error stemming from the `yacman` library about `signal.signal` only - # working in the main thread of the main interpreter. We have to investigate - # how to solve this. - _, _UUID = run_looper(argparse_namespace, None, True) - return MainResponse( - stdout=stdout_stream.getvalue(), stderr=stderr_stream.getvalue() - ) +async def main_endpoint(top_level_model: TopLevelParser) -> Dict: + job = Job() + jobs[job.id] = job + background_async(top_level_model, job.id) + return {"job_id": job.id} -@app.get("/status") -async def get_status(): - global _UUID - if _UUID: - print(_UUID) - return {"UUID": _UUID} - else: - return {"UUID": "Not found"} +@app.get("/status/{job_id}") +async def get_status(job_id: UUID): + return jobs[job_id] diff --git a/looper/cli_pydantic.py b/looper/cli_pydantic.py index 8514cce85..c768db8d0 100644 --- a/looper/cli_pydantic.py +++ b/looper/cli_pydantic.py @@ -19,7 +19,6 @@ import os import sys -import uuid from argparse import Namespace import logmuse @@ -52,7 +51,6 @@ def run_looper(args: Namespace | TopLevelParser, parser: ArgumentParser, http_ap # here comes adapted `cli_looper.py` code global _LOGGER - _UUID = str(uuid.uuid4()) _LOGGER = logmuse.logger_via_cli(args, make_root=True) # Find out which subcommand was used @@ -160,8 +158,6 @@ def run_looper(args: Namespace | TopLevelParser, parser: ArgumentParser, http_ap run = Runner(prj) try: compute_kwargs = _proc_resources_spec(args) - if http_api: - return (run(args, rerun=False, **compute_kwargs), _UUID) return run(args, rerun=False, **compute_kwargs) except SampleFailedException: sys.exit(1) From 384a898a4dfdf540a7d20d82fa782da8cc41af76 Mon Sep 17 00:00:00 2001 From: Simeon Carstens Date: Wed, 24 Jan 2024 15:44:58 +0100 Subject: [PATCH 093/163] Allow `None` stderr / stdout in job model --- looper/api/main.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/looper/api/main.py b/looper/api/main.py index 14fb5bad4..e727f3b9f 100644 --- a/looper/api/main.py +++ b/looper/api/main.py @@ -14,8 +14,8 @@ class Job(pydantic.BaseModel): id: UUID = pydantic.Field(default_factory=uuid4) status: str = "in_progress" progress: int = 0 - stdout: str = None - stderr: str = None + stdout: str | None = None + stderr: str | None = None app = FastAPI(validate_model=True) From 0161cc68a0ebf1a752378293402004f91251e994 Mon Sep 17 00:00:00 2001 From: Simeon Carstens Date: Wed, 24 Jan 2024 15:45:33 +0100 Subject: [PATCH 094/163] Run `run_looper()` in FastAPI background task --- looper/api/main.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/looper/api/main.py b/looper/api/main.py index e727f3b9f..c13ffc959 100644 --- a/looper/api/main.py +++ b/looper/api/main.py @@ -3,6 +3,7 @@ from contextlib import redirect_stderr, redirect_stdout from typing import Dict from uuid import UUID, uuid4 +import fastapi import pydantic from fastapi import FastAPI @@ -22,7 +23,7 @@ class Job(pydantic.BaseModel): jobs: Dict[UUID, Job] = {} -def background_async(top_level_model: TopLevelParser, job_id: UUID) -> None: +async def background_async(top_level_model: TopLevelParser, job_id: UUID) -> None: argparse_namespace = create_argparse_namespace(top_level_model) stdout_stream = io.StringIO() stderr_stream = io.StringIO() @@ -90,10 +91,10 @@ class MainResponse(pydantic.BaseModel): @app.post("/") -async def main_endpoint(top_level_model: TopLevelParser) -> Dict: +async def main_endpoint(top_level_model: TopLevelParser, background_tasks: fastapi.BackgroundTasks) -> Dict: job = Job() jobs[job.id] = job - background_async(top_level_model, job.id) + background_tasks.add_task(background_async, top_level_model, job.id) return {"job_id": job.id} From 67c5d340eb7ecefeb4ed7f5304b14525306b8154 Mon Sep 17 00:00:00 2001 From: Simeon Carstens Date: Wed, 24 Jan 2024 16:03:51 +0100 Subject: [PATCH 095/163] Document / make self-documenting `Job` fields --- looper/api/main.py | 32 ++++++++++++++------------------ 1 file changed, 14 insertions(+), 18 deletions(-) diff --git a/looper/api/main.py b/looper/api/main.py index c13ffc959..37aaa1b92 100644 --- a/looper/api/main.py +++ b/looper/api/main.py @@ -12,11 +12,21 @@ class Job(pydantic.BaseModel): - id: UUID = pydantic.Field(default_factory=uuid4) - status: str = "in_progress" + id: UUID = pydantic.Field( + default_factory=uuid4, + description="The unique identifier of the job" + ) + status: str = pydantic.Field( + default="in_progress", + description="The current status of the job. Can be either `in_progress` or `completed`." + ) progress: int = 0 - stdout: str | None = None - stderr: str | None = None + stdout: str | None = pydantic.Field(default=None, + description="Standard output produced by `looper` while performing the requested action" + ) + stderr: str | None = pydantic.Field(default=None, + description="Standard error output produced by `looper` while performing the requested action" + ) app = FastAPI(validate_model=True) @@ -76,20 +86,6 @@ def create_argparse_namespace(top_level_model: TopLevelParser) -> Namespace: setattr(namespace, argname, command_namespace) return namespace - -class MainResponse(pydantic.BaseModel): - """ - Response of the main endpoint. - """ - - stdout: str = pydantic.Field( - description="Standard output produced by `looper` while running a command" - ) - stderr: str = pydantic.Field( - description="Standard error output produced by `looper` while running a command" - ) - - @app.post("/") async def main_endpoint(top_level_model: TopLevelParser, background_tasks: fastapi.BackgroundTasks) -> Dict: job = Job() From 8869378a6e3e37d9acf3b87a7361e0071032ace8 Mon Sep 17 00:00:00 2001 From: Simeon Carstens Date: Wed, 24 Jan 2024 16:08:49 +0100 Subject: [PATCH 096/163] Make `/` route return a 202 (Accepted) HTTP status code --- looper/api/main.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/looper/api/main.py b/looper/api/main.py index 37aaa1b92..7826786c1 100644 --- a/looper/api/main.py +++ b/looper/api/main.py @@ -86,7 +86,7 @@ def create_argparse_namespace(top_level_model: TopLevelParser) -> Namespace: setattr(namespace, argname, command_namespace) return namespace -@app.post("/") +@app.post("/", status_code=202) async def main_endpoint(top_level_model: TopLevelParser, background_tasks: fastapi.BackgroundTasks) -> Dict: job = Job() jobs[job.id] = job From e76c135fcc86ff85e7da26211e44b9a2f93a7d32 Mon Sep 17 00:00:00 2001 From: Simeon Carstens Date: Wed, 24 Jan 2024 16:20:13 +0100 Subject: [PATCH 097/163] Replace job UUID with a shorter random string The UUID is very, very long, and such a length / collision safety is not needed. So this uses a much shorter (length 6 characters) random job ID created using `secrets.token_urlsafe()`. Empiric testing says that this gives, for two random IDs, a collision probability of ~1e-4. --- looper/api/main.py | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/looper/api/main.py b/looper/api/main.py index 7826786c1..46da4bbf1 100644 --- a/looper/api/main.py +++ b/looper/api/main.py @@ -1,8 +1,8 @@ import io from argparse import Namespace from contextlib import redirect_stderr, redirect_stdout -from typing import Dict -from uuid import UUID, uuid4 +import secrets +from typing import Dict, TypeAlias import fastapi import pydantic @@ -10,10 +10,11 @@ from looper.cli_pydantic import run_looper from looper.command_models.commands import SUPPORTED_COMMANDS, TopLevelParser +JobId: TypeAlias = str class Job(pydantic.BaseModel): - id: UUID = pydantic.Field( - default_factory=uuid4, + id: JobId = pydantic.Field( + default_factory=lambda: secrets.token_urlsafe(4), description="The unique identifier of the job" ) status: str = pydantic.Field( @@ -28,12 +29,11 @@ class Job(pydantic.BaseModel): description="Standard error output produced by `looper` while performing the requested action" ) - app = FastAPI(validate_model=True) -jobs: Dict[UUID, Job] = {} +jobs: Dict[str, Job] = {} -async def background_async(top_level_model: TopLevelParser, job_id: UUID) -> None: +async def background_async(top_level_model: TopLevelParser, job_id: JobId) -> None: argparse_namespace = create_argparse_namespace(top_level_model) stdout_stream = io.StringIO() stderr_stream = io.StringIO() @@ -95,5 +95,5 @@ async def main_endpoint(top_level_model: TopLevelParser, background_tasks: fasta @app.get("/status/{job_id}") -async def get_status(job_id: UUID): +async def get_status(job_id: JobId): return jobs[job_id] From adc451af6df483bcde9bc41f84cf2ee7cbb0a14f Mon Sep 17 00:00:00 2001 From: Simeon Carstens Date: Wed, 24 Jan 2024 16:23:53 +0100 Subject: [PATCH 098/163] Reorder imports --- looper/api/main.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/looper/api/main.py b/looper/api/main.py index 46da4bbf1..f74a4b310 100644 --- a/looper/api/main.py +++ b/looper/api/main.py @@ -3,10 +3,11 @@ from contextlib import redirect_stderr, redirect_stdout import secrets from typing import Dict, TypeAlias -import fastapi -import pydantic +import fastapi from fastapi import FastAPI +import pydantic + from looper.cli_pydantic import run_looper from looper.command_models.commands import SUPPORTED_COMMANDS, TopLevelParser From a3b85de794e128dc16132a3daec6e366737a0b1a Mon Sep 17 00:00:00 2001 From: Simeon Carstens Date: Wed, 24 Jan 2024 17:03:05 +0100 Subject: [PATCH 099/163] Fix a typo --- looper/api/main.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/looper/api/main.py b/looper/api/main.py index f74a4b310..6c8c8d61a 100644 --- a/looper/api/main.py +++ b/looper/api/main.py @@ -40,7 +40,7 @@ async def background_async(top_level_model: TopLevelParser, job_id: JobId) -> No stderr_stream = io.StringIO() with redirect_stderr(stderr_stream), redirect_stdout(stdout_stream): # TODO: as it stands, because of the `async def`, and the lacking `await` - # in the following line, this endpoint is (I (Simeon) thing) currently blocking. + # in the following line, this endpoint is (I (Simeon) think) currently blocking. # We would need to make `run_looper()` return a future, but it inherently does # not support `async` calls. # So one option would be to run `run_looper()` in its own thread whose From ea81cb11f22ac36c11cea44ac1ade73e8ac320f5 Mon Sep 17 00:00:00 2001 From: Simeon Carstens Date: Sat, 27 Jan 2024 12:16:28 +0100 Subject: [PATCH 100/163] Remove `uuid` dependency --- requirements/requirements-all.txt | 1 - 1 file changed, 1 deletion(-) diff --git a/requirements/requirements-all.txt b/requirements/requirements-all.txt index dac9400aa..0e793d423 100644 --- a/requirements/requirements-all.txt +++ b/requirements/requirements-all.txt @@ -15,4 +15,3 @@ pydantic-argparse>=0.8.0 pydantic-argparse==0.8.0 fastapi uvicorn -uuid From 279d24f324c0bc3af548c39f6a7e7a8df5a7a43d Mon Sep 17 00:00:00 2001 From: Simeon Carstens Date: Sat, 27 Jan 2024 12:21:23 +0100 Subject: [PATCH 101/163] Change `yacman` dependency to a hacked, but threadable version This is a temporary hack: use a Yacman branch that makes Yacman's YAMLConfigmanager not capture SIGTERM / SIGKILL (and thus Ctrl+c events). In the near future, this will be replaced by a new version of Yacman that supports a read-only mode. --- requirements/requirements-all.txt | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/requirements/requirements-all.txt b/requirements/requirements-all.txt index 0e793d423..b935989be 100644 --- a/requirements/requirements-all.txt +++ b/requirements/requirements-all.txt @@ -10,7 +10,11 @@ pipestat>=0.6.0 pyyaml>=3.12 rich>=9.10.0 ubiquerg>=0.5.2 -yacman>=0.9.2 +# This is a temporary hack: use a Yacman branch that makes Yacman's +# YAMLConfigmanager not capture SIGTERM / SIGKILL (and thus Ctrl+c events). +# In the near future, this will be replaced by a new version of Yacman that +# supports a read-only mode. +git+https://github.com/databio/yacman.git@tweag/thread-unsafe pydantic-argparse>=0.8.0 pydantic-argparse==0.8.0 fastapi From 4c887884c006f21768eb195ae95f613261f1a49e Mon Sep 17 00:00:00 2001 From: Simeon Carstens Date: Sat, 27 Jan 2024 12:23:59 +0100 Subject: [PATCH 102/163] Add lower bound for FastAPI version --- requirements/requirements-all.txt | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/requirements/requirements-all.txt b/requirements/requirements-all.txt index b935989be..e5fb45b40 100644 --- a/requirements/requirements-all.txt +++ b/requirements/requirements-all.txt @@ -16,6 +16,5 @@ ubiquerg>=0.5.2 # supports a read-only mode. git+https://github.com/databio/yacman.git@tweag/thread-unsafe pydantic-argparse>=0.8.0 -pydantic-argparse==0.8.0 -fastapi uvicorn +fastapi>=0.109.0 From a82a8f78946d59fa8965e580b688b8c477fc3516 Mon Sep 17 00:00:00 2001 From: Simeon Carstens Date: Sat, 27 Jan 2024 12:24:15 +0100 Subject: [PATCH 103/163] Add lower bound for uvicorn version --- requirements/requirements-all.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/requirements/requirements-all.txt b/requirements/requirements-all.txt index e5fb45b40..06adde4f2 100644 --- a/requirements/requirements-all.txt +++ b/requirements/requirements-all.txt @@ -16,5 +16,5 @@ ubiquerg>=0.5.2 # supports a read-only mode. git+https://github.com/databio/yacman.git@tweag/thread-unsafe pydantic-argparse>=0.8.0 -uvicorn fastapi>=0.109.0 +uvicorn>=0.26.0 From f995b472a13b821f41ba791212f47468e04fead9 Mon Sep 17 00:00:00 2001 From: Simeon Carstens Date: Sat, 27 Jan 2024 12:27:46 +0100 Subject: [PATCH 104/163] Make background task function non-`async` This allows, together with the hacked, threadable (but _not_ thread-safe) `yacman` version, to run `looper` commands in a non-blocking way. --- looper/api/main.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/looper/api/main.py b/looper/api/main.py index 6c8c8d61a..e3ab9a82b 100644 --- a/looper/api/main.py +++ b/looper/api/main.py @@ -34,7 +34,7 @@ class Job(pydantic.BaseModel): jobs: Dict[str, Job] = {} -async def background_async(top_level_model: TopLevelParser, job_id: JobId) -> None: +def background_async(top_level_model: TopLevelParser, job_id: JobId) -> None: argparse_namespace = create_argparse_namespace(top_level_model) stdout_stream = io.StringIO() stderr_stream = io.StringIO() From 3c545467cdb0d65536c8578e513e5eff232e239f Mon Sep 17 00:00:00 2001 From: Simeon Carstens Date: Mon, 29 Jan 2024 13:17:10 +0100 Subject: [PATCH 105/163] [DELETE ME] hack to use local `yacman` copy --- requirements/requirements-all.txt | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/requirements/requirements-all.txt b/requirements/requirements-all.txt index 06adde4f2..abc49ab9a 100644 --- a/requirements/requirements-all.txt +++ b/requirements/requirements-all.txt @@ -14,7 +14,8 @@ ubiquerg>=0.5.2 # YAMLConfigmanager not capture SIGTERM / SIGKILL (and thus Ctrl+c events). # In the near future, this will be replaced by a new version of Yacman that # supports a read-only mode. -git+https://github.com/databio/yacman.git@tweag/thread-unsafe +# git+https://github.com/databio/yacman.git@tweag/thread-unsafe +../yacman/ pydantic-argparse>=0.8.0 fastapi>=0.109.0 uvicorn>=0.26.0 From 9b3a1daae2bae2214b924c6214b6a379ca56f0fa Mon Sep 17 00:00:00 2001 From: Simeon Carstens Date: Mon, 29 Jan 2024 18:03:10 +0100 Subject: [PATCH 106/163] Don't call `logmuse.init_logger()` in `looper.__init__.py` Everything will probably still work - after all, `logmuse.logger_via_cli()` is called in `cli_looper.py` / `cli_pydantic.py` which also sets up a logger. --- looper/__init__.py | 4 ---- 1 file changed, 4 deletions(-) diff --git a/looper/__init__.py b/looper/__init__.py index fe751d02d..b0931009e 100644 --- a/looper/__init__.py +++ b/looper/__init__.py @@ -7,10 +7,6 @@ """ -import logmuse - -logmuse.init_logger("looper") - from .divvy import ComputingConfiguration, select_divvy_config from .divvy import DEFAULT_COMPUTE_RESOURCES_NAME from .divvy import NEW_COMPUTE_KEY as COMPUTE_KEY From 16f0ab501a66afe8e98c9619abf0cf5b587589b4 Mon Sep 17 00:00:00 2001 From: Simeon Carstens Date: Mon, 29 Jan 2024 18:07:16 +0100 Subject: [PATCH 107/163] Explicitly initialize `logmuse` logger with `sys.stderr` as stream --- looper/cli_pydantic.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/looper/cli_pydantic.py b/looper/cli_pydantic.py index c768db8d0..a7da330f7 100644 --- a/looper/cli_pydantic.py +++ b/looper/cli_pydantic.py @@ -51,7 +51,7 @@ def run_looper(args: Namespace | TopLevelParser, parser: ArgumentParser, http_ap # here comes adapted `cli_looper.py` code global _LOGGER - _LOGGER = logmuse.logger_via_cli(args, make_root=True) + _LOGGER = logmuse.logger_via_cli(args, make_root=True, stream=sys.stderr) # Find out which subcommand was used supported_command_names = [cmd.name for cmd in SUPPORTED_COMMANDS] From 2370aa6708fe905d20989a68635a106d73e5c954 Mon Sep 17 00:00:00 2001 From: Zhihan Zhang Date: Tue, 30 Jan 2024 17:55:13 +0800 Subject: [PATCH 108/163] Selectively capture logs from separate jobs Before this commit, the logging stdout we captured will be mixed if we submit several jobs at the same time. This captures outputs for each thread (job) separately. See https://stackoverflow.com/questions/14890997/redirect-stdout-to-a-file-only-for-a-specific-thread. Co-authored-by: Simeon Carstens --- looper/api/main.py | 22 +-- looper/api/stdout_redirects.py | 257 +++++++++++++++++++++++++++++++++ 2 files changed, 264 insertions(+), 15 deletions(-) create mode 100644 looper/api/stdout_redirects.py diff --git a/looper/api/main.py b/looper/api/main.py index e3ab9a82b..f34fb4c17 100644 --- a/looper/api/main.py +++ b/looper/api/main.py @@ -1,6 +1,5 @@ import io from argparse import Namespace -from contextlib import redirect_stderr, redirect_stdout import secrets from typing import Dict, TypeAlias @@ -11,6 +10,10 @@ from looper.cli_pydantic import run_looper from looper.command_models.commands import SUPPORTED_COMMANDS, TopLevelParser +import stdout_redirects + +stdout_redirects.enable_proxy() + JobId: TypeAlias = str class Job(pydantic.BaseModel): @@ -36,23 +39,12 @@ class Job(pydantic.BaseModel): def background_async(top_level_model: TopLevelParser, job_id: JobId) -> None: argparse_namespace = create_argparse_namespace(top_level_model) - stdout_stream = io.StringIO() - stderr_stream = io.StringIO() - with redirect_stderr(stderr_stream), redirect_stdout(stdout_stream): - # TODO: as it stands, because of the `async def`, and the lacking `await` - # in the following line, this endpoint is (I (Simeon) think) currently blocking. - # We would need to make `run_looper()` return a future, but it inherently does - # not support `async` calls. - # So one option would be to run `run_looper()` in its own thread whose - # termination we can `await`, using `fastapi.run_in_threadpool`. But that fails - # with an error stemming from the `yacman` library about `signal.signal` only - # working in the main thread of the main interpreter. We have to investigate - # how to solve this. - run_looper(argparse_namespace, None, True) + stdout_stream = stdout_redirects.redirect() + + run_looper(argparse_namespace, None, True) jobs[job_id].status = "completed" jobs[job_id].stdout = stdout_stream.getvalue() - jobs[job_id].stderr = stderr_stream.getvalue() def create_argparse_namespace(top_level_model: TopLevelParser) -> Namespace: diff --git a/looper/api/stdout_redirects.py b/looper/api/stdout_redirects.py new file mode 100644 index 000000000..9daffe78b --- /dev/null +++ b/looper/api/stdout_redirects.py @@ -0,0 +1,257 @@ +# copied from https://stackoverflow.com/a/43667367/1193986 +# +# (c) umichscoots 2017 +# License unsepcified. Assumed to be CC-by-sa as is StackOverflow's policy +# +# The class LocalProxy is taken from the werkzeug project +# https://raw.githubusercontent.com/pallets/werkzeug/ef545f0d0bf28cbad02066b4cb7471bea50a93ee/src/werkzeug/local.py +# It is licensed under the BSD-3-Clause License +# +# I guess that means the result is CC-by-SA + + +import sys +import threading +from io import StringIO +from typing import Any, Optional, Union + +# Save all of the objects for use later. +orig___stdout__ = sys.__stdout__ +orig___stderr__ = sys.__stderr__ +orig_stdout = sys.stdout +orig_stderr = sys.stderr +thread_proxies = {} + + +class LocalProxy: + """Acts as a proxy for a werkzeug local. Forwards all operations to + a proxied object. The only operations not supported for forwarding + are right handed operands and any kind of assignment. + Example usage:: + from werkzeug.local import Local + l = Local() + # these are proxies + request = l('request') + user = l('user') + from werkzeug.local import LocalStack + _response_local = LocalStack() + # this is a proxy + response = _response_local() + Whenever something is bound to l.user / l.request the proxy objects + will forward all operations. If no object is bound a :exc:`RuntimeError` + will be raised. + To create proxies to :class:`Local` or :class:`LocalStack` objects, + call the object as shown above. If you want to have a proxy to an + object looked up by a function, you can (as of Werkzeug 0.6.1) pass + a function to the :class:`LocalProxy` constructor:: + session = LocalProxy(lambda: get_current_request().session) + .. versionchanged:: 0.6.1 + The class can be instantiated with a callable as well now. + """ + + __slots__ = ("__local", "__dict__", "__name__", "__wrapped__") + + def __init__( + self, + local: Union[Any, "LocalProxy", "LocalStack"], + name: Optional[str] = None, + ) -> None: + object.__setattr__(self, "_LocalProxy__local", local) + object.__setattr__(self, "__name__", name) + if callable(local) and not hasattr(local, "__release_local__"): + # "local" is a callable that is not an instance of Local or + # LocalManager: mark it as a wrapped function. + object.__setattr__(self, "__wrapped__", local) + + def _get_current_object( + self, + ) -> object: + """Return the current object. This is useful if you want the real + object behind the proxy at a time for performance reasons or because + you want to pass the object into a different context. + """ + if not hasattr(self.__local, "__release_local__"): + return self.__local() + try: + return getattr(self.__local, self.__name__) + except AttributeError: + raise RuntimeError(f"no object bound to {self.__name__}") + + @property + def __dict__(self): + try: + return self._get_current_object().__dict__ + except RuntimeError: + raise AttributeError("__dict__") + + def __repr__(self) -> str: + try: + obj = self._get_current_object() + except RuntimeError: + return f"<{type(self).__name__} unbound>" + return repr(obj) + + def __bool__(self) -> bool: + try: + return bool(self._get_current_object()) + except RuntimeError: + return False + + def __dir__(self): + try: + return dir(self._get_current_object()) + except RuntimeError: + return [] + + def __getattr__(self, name: str) -> Any: + if name == "__members__": + return dir(self._get_current_object()) + return getattr(self._get_current_object(), name) + + def __setitem__(self, key: Any, value: Any) -> None: + self._get_current_object()[key] = value # type: ignore + + def __delitem__(self, key): + del self._get_current_object()[key] + + __setattr__ = lambda x, n, v: setattr(x._get_current_object(), n, v) # type: ignore + __delattr__ = lambda x, n: delattr(x._get_current_object(), n) # type: ignore + __str__ = lambda x: str(x._get_current_object()) # type: ignore + __lt__ = lambda x, o: x._get_current_object() < o + __le__ = lambda x, o: x._get_current_object() <= o + __eq__ = lambda x, o: x._get_current_object() == o # type: ignore + __ne__ = lambda x, o: x._get_current_object() != o # type: ignore + __gt__ = lambda x, o: x._get_current_object() > o + __ge__ = lambda x, o: x._get_current_object() >= o + __hash__ = lambda x: hash(x._get_current_object()) # type: ignore + __call__ = lambda x, *a, **kw: x._get_current_object()(*a, **kw) + __len__ = lambda x: len(x._get_current_object()) + __getitem__ = lambda x, i: x._get_current_object()[i] + __iter__ = lambda x: iter(x._get_current_object()) + __contains__ = lambda x, i: i in x._get_current_object() + __add__ = lambda x, o: x._get_current_object() + o + __sub__ = lambda x, o: x._get_current_object() - o + __mul__ = lambda x, o: x._get_current_object() * o + __floordiv__ = lambda x, o: x._get_current_object() // o + __mod__ = lambda x, o: x._get_current_object() % o + __divmod__ = lambda x, o: x._get_current_object().__divmod__(o) + __pow__ = lambda x, o: x._get_current_object() ** o + __lshift__ = lambda x, o: x._get_current_object() << o + __rshift__ = lambda x, o: x._get_current_object() >> o + __and__ = lambda x, o: x._get_current_object() & o + __xor__ = lambda x, o: x._get_current_object() ^ o + __or__ = lambda x, o: x._get_current_object() | o + __div__ = lambda x, o: x._get_current_object().__div__(o) + __truediv__ = lambda x, o: x._get_current_object().__truediv__(o) + __neg__ = lambda x: -(x._get_current_object()) + __pos__ = lambda x: +(x._get_current_object()) + __abs__ = lambda x: abs(x._get_current_object()) + __invert__ = lambda x: ~(x._get_current_object()) + __complex__ = lambda x: complex(x._get_current_object()) + __int__ = lambda x: int(x._get_current_object()) + __long__ = lambda x: long(x._get_current_object()) # type: ignore # noqa + __float__ = lambda x: float(x._get_current_object()) + __oct__ = lambda x: oct(x._get_current_object()) + __hex__ = lambda x: hex(x._get_current_object()) + __index__ = lambda x: x._get_current_object().__index__() + __coerce__ = lambda x, o: x._get_current_object().__coerce__(x, o) + __enter__ = lambda x: x._get_current_object().__enter__() + __exit__ = lambda x, *a, **kw: x._get_current_object().__exit__(*a, **kw) + __radd__ = lambda x, o: o + x._get_current_object() + __rsub__ = lambda x, o: o - x._get_current_object() + __rmul__ = lambda x, o: o * x._get_current_object() + __rdiv__ = lambda x, o: o / x._get_current_object() + __rtruediv__ = __rdiv__ + __rfloordiv__ = lambda x, o: o // x._get_current_object() + __rmod__ = lambda x, o: o % x._get_current_object() + __rdivmod__ = lambda x, o: x._get_current_object().__rdivmod__(o) + __copy__ = lambda x: copy.copy(x._get_current_object()) + __deepcopy__ = lambda x, memo: copy.deepcopy(x._get_current_object(), memo) + + +def redirect(): + """ + Enables the redirect for the current thread's output to a single cStringIO + object and returns the object. + + :return: The StringIO object. + :rtype: ``cStringIO.StringIO`` + """ + # Get the current thread's identity. + ident = threading.currentThread().ident + + # Enable the redirect and return the cStringIO object. + thread_proxies[ident] = StringIO() + return thread_proxies[ident] + + +def stop_redirect(): + """ + Enables the redirect for the current thread's output to a single cStringIO + object and returns the object. + + :return: The final string value. + :rtype: ``str`` + """ + # Get the current thread's identity. + ident = threading.currentThread().ident + + # Only act on proxied threads. + if ident not in thread_proxies: + return + + # Read the value, close/remove the buffer, and return the value. + retval = thread_proxies[ident].getvalue() + thread_proxies[ident].close() + del thread_proxies[ident] + return retval + + +def _get_stream(original): + """ + Returns the inner function for use in the LocalProxy object. + + :param original: The stream to be returned if thread is not proxied. + :type original: ``file`` + :return: The inner function for use in the LocalProxy object. + :rtype: ``function`` + """ + + def proxy(): + """ + Returns the original stream if the current thread is not proxied, + otherwise we return the proxied item. + + :return: The stream object for the current thread. + :rtype: ``file`` + """ + # Get the current thread's identity. + ident = threading.currentThread().ident + + # Return the proxy, otherwise return the original. + return thread_proxies.get(ident, original) + + # Return the inner function. + return proxy + + +def enable_proxy(): + """ + Overwrites __stdout__, __stderr__, stdout, and stderr with the proxied + objects. + """ + sys.__stdout__ = LocalProxy(_get_stream(sys.__stdout__)) + sys.__stderr__ = LocalProxy(_get_stream(sys.__stderr__)) + sys.stdout = LocalProxy(_get_stream(sys.stdout)) + sys.stderr = LocalProxy(_get_stream(sys.stderr)) + + +def disable_proxy(): + """ + Overwrites __stdout__, __stderr__, stdout, and stderr with the original + objects. + """ + sys.__stdout__ = orig___stdout__ + sys.__stderr__ = orig___stderr__ + sys.stdout = orig_stdout + sys.stderr = orig_stderr From 8ffaef76add0c1c7583fc0bc842f603f46783bf2 Mon Sep 17 00:00:00 2001 From: Simeon Carstens Date: Tue, 30 Jan 2024 13:34:32 +0100 Subject: [PATCH 109/163] Add source for `stdout_redirects.py` --- looper/api/stdout_redirects.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/looper/api/stdout_redirects.py b/looper/api/stdout_redirects.py index 9daffe78b..f430b1b42 100644 --- a/looper/api/stdout_redirects.py +++ b/looper/api/stdout_redirects.py @@ -1,3 +1,5 @@ +# Copied from https://gitlab.com/yquemener/stdout-redirects +# # copied from https://stackoverflow.com/a/43667367/1193986 # # (c) umichscoots 2017 From d8ae6ec6a3293d834172b4c23d072e4ca36af4f3 Mon Sep 17 00:00:00 2001 From: Simeon Carstens Date: Tue, 30 Jan 2024 13:47:51 +0100 Subject: [PATCH 110/163] Add a comment about not calling `stdout_redirect.stop_redirect()` --- looper/api/main.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/looper/api/main.py b/looper/api/main.py index f34fb4c17..94462bd2c 100644 --- a/looper/api/main.py +++ b/looper/api/main.py @@ -43,6 +43,9 @@ def background_async(top_level_model: TopLevelParser, job_id: JobId) -> None: run_looper(argparse_namespace, None, True) + # Here, we should call `stdout_redirects.stop_redirect()`, but that fails for reasons discussed + # in the following issue: https://github.com/python/cpython/issues/80374 + # But this *seems* not to pose any problems. jobs[job_id].status = "completed" jobs[job_id].stdout = stdout_stream.getvalue() From db9f8f52ae1e19b9a5f25152e19b8b4d5f6e2bb9 Mon Sep 17 00:00:00 2001 From: Simeon Carstens Date: Tue, 30 Jan 2024 13:52:30 +0100 Subject: [PATCH 111/163] Remove superfluous import --- looper/api/main.py | 1 - 1 file changed, 1 deletion(-) diff --git a/looper/api/main.py b/looper/api/main.py index 94462bd2c..d44bec049 100644 --- a/looper/api/main.py +++ b/looper/api/main.py @@ -1,4 +1,3 @@ -import io from argparse import Namespace import secrets from typing import Dict, TypeAlias From ad621c6cb4b844b9edf39248ab9933c9b286b695 Mon Sep 17 00:00:00 2001 From: Simeon Carstens Date: Tue, 30 Jan 2024 13:52:49 +0100 Subject: [PATCH 112/163] Remove `progress` field from `Job` model --- looper/api/main.py | 1 - 1 file changed, 1 deletion(-) diff --git a/looper/api/main.py b/looper/api/main.py index d44bec049..4e9f1aecf 100644 --- a/looper/api/main.py +++ b/looper/api/main.py @@ -24,7 +24,6 @@ class Job(pydantic.BaseModel): default="in_progress", description="The current status of the job. Can be either `in_progress` or `completed`." ) - progress: int = 0 stdout: str | None = pydantic.Field(default=None, description="Standard output produced by `looper` while performing the requested action" ) From 95278b3749cbf62c1a4bc8e5e457da55fe2309b4 Mon Sep 17 00:00:00 2001 From: Simeon Carstens Date: Tue, 30 Jan 2024 17:03:00 +0100 Subject: [PATCH 113/163] Capture subprocess output to `sys.stdout`/`sys.stderr` This allows us to capture the output of the Bash scripts or commands `looper` executes. --- looper/conductor.py | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/looper/conductor.py b/looper/conductor.py index e83616332..ddb36fd3a 100644 --- a/looper/conductor.py +++ b/looper/conductor.py @@ -4,6 +4,7 @@ import logging import os import subprocess +import sys import time import yaml from copy import copy, deepcopy @@ -387,7 +388,12 @@ def submit(self, force=False): # Capture submission command return value so that we can # intercept and report basic submission failures; #167 try: - subprocess.check_call(submission_command, shell=True) + # Using `subprocess.run()` instead of `subprocess.check()` allows us to capture + # stdout and stderr of the child process, and pass it to the `stdout` / `stderr` + # of `looper`'s Python process. + result = subprocess.run(submission_command, check=True, shell=True, capture_output=True) + print(result.stdout.decode()) + print(result.stderr.decode(), file=sys.stderr) except subprocess.CalledProcessError: fails = ( "" if self.collate else [s.sample_name for s in self._samples] From 6870bcd80e61c6219dbae111d48bba9b702b18c8 Mon Sep 17 00:00:00 2001 From: Simeon Carstens Date: Thu, 1 Feb 2024 10:32:19 +0100 Subject: [PATCH 114/163] Make CLI for HTTP API server Co-authored-by: Zhihan Zhang --- looper/api/main.py | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/looper/api/main.py b/looper/api/main.py index 4e9f1aecf..34fd8f03c 100644 --- a/looper/api/main.py +++ b/looper/api/main.py @@ -1,10 +1,11 @@ -from argparse import Namespace +from argparse import ArgumentParser, Namespace import secrets from typing import Dict, TypeAlias import fastapi from fastapi import FastAPI import pydantic +import uvicorn from looper.cli_pydantic import run_looper from looper.command_models.commands import SUPPORTED_COMMANDS, TopLevelParser @@ -91,3 +92,12 @@ async def main_endpoint(top_level_model: TopLevelParser, background_tasks: fasta @app.get("/status/{job_id}") async def get_status(job_id: JobId): return jobs[job_id] + + +if __name__ == "__main__": + parser = ArgumentParser("looper-serve", description="Run looper HTTP API server") + parser.add_argument("--host", type=str, default="0.0.0.0", help="Host IP address to use (127.0.0.1 for local access only)") + parser.add_argument("--port", type=int, default=8000, help="Port the server listens on") + args = parser.parse_args() + + uvicorn.run(app, host=args.host, port=args.port) From 8ead693669d6146dfb6d9c5fec2accdd2a35dfbe Mon Sep 17 00:00:00 2001 From: Simeon Carstens Date: Thu, 1 Feb 2024 10:44:05 +0100 Subject: [PATCH 115/163] Add entry point console script for HTTP API server --- looper/api/main.py | 19 +++++++++++++++---- setup.py | 1 + 2 files changed, 16 insertions(+), 4 deletions(-) diff --git a/looper/api/main.py b/looper/api/main.py index 34fd8f03c..3e43e70eb 100644 --- a/looper/api/main.py +++ b/looper/api/main.py @@ -10,7 +10,7 @@ from looper.cli_pydantic import run_looper from looper.command_models.commands import SUPPORTED_COMMANDS, TopLevelParser -import stdout_redirects +from looper.api import stdout_redirects stdout_redirects.enable_proxy() @@ -94,10 +94,21 @@ async def get_status(job_id: JobId): return jobs[job_id] -if __name__ == "__main__": +def main() -> None: parser = ArgumentParser("looper-serve", description="Run looper HTTP API server") - parser.add_argument("--host", type=str, default="0.0.0.0", help="Host IP address to use (127.0.0.1 for local access only)") - parser.add_argument("--port", type=int, default=8000, help="Port the server listens on") + parser.add_argument( + "--host", + type=str, + default="0.0.0.0", + help="Host IP address to use (127.0.0.1 for local access only)", + ) + parser.add_argument( + "--port", type=int, default=8000, help="Port the server listens on" + ) args = parser.parse_args() uvicorn.run(app, host=args.host, port=args.port) + + +if __name__ == "__main__": + main() diff --git a/setup.py b/setup.py index c05314fe6..57ead1bad 100644 --- a/setup.py +++ b/setup.py @@ -82,6 +82,7 @@ def get_static(name, condition=None): "looper = looper.__main__:main", "divvy = looper.__main__:divvy_main", "looper-pydantic-argparse = looper.cli_pydantic:main", + "looper-serve = looper.api.main:main" ], }, scripts=scripts, From 8b1b2cad5a2c87571b8488323d991780c894acfe Mon Sep 17 00:00:00 2001 From: Simeon Carstens Date: Thu, 1 Feb 2024 17:01:58 +0100 Subject: [PATCH 116/163] Replace `stdout` / `stderr` job fields with `job_output` field The module we use to capture console output does not seem to distinguish easily between `stdout` and `stderr`. So we rather use a generic `console_output` field in the job model that subsumes both. --- looper/api/main.py | 11 ++++------- 1 file changed, 4 insertions(+), 7 deletions(-) diff --git a/looper/api/main.py b/looper/api/main.py index 3e43e70eb..5120ec14a 100644 --- a/looper/api/main.py +++ b/looper/api/main.py @@ -25,11 +25,8 @@ class Job(pydantic.BaseModel): default="in_progress", description="The current status of the job. Can be either `in_progress` or `completed`." ) - stdout: str | None = pydantic.Field(default=None, - description="Standard output produced by `looper` while performing the requested action" - ) - stderr: str | None = pydantic.Field(default=None, - description="Standard error output produced by `looper` while performing the requested action" + console_output: str | None = pydantic.Field(default=None, + description="Console output produced by `looper` while performing the requested action" ) app = FastAPI(validate_model=True) @@ -38,7 +35,7 @@ class Job(pydantic.BaseModel): def background_async(top_level_model: TopLevelParser, job_id: JobId) -> None: argparse_namespace = create_argparse_namespace(top_level_model) - stdout_stream = stdout_redirects.redirect() + output_stream = stdout_redirects.redirect() run_looper(argparse_namespace, None, True) @@ -46,7 +43,7 @@ def background_async(top_level_model: TopLevelParser, job_id: JobId) -> None: # in the following issue: https://github.com/python/cpython/issues/80374 # But this *seems* not to pose any problems. jobs[job_id].status = "completed" - jobs[job_id].stdout = stdout_stream.getvalue() + jobs[job_id].console_output = output_stream.getvalue() def create_argparse_namespace(top_level_model: TopLevelParser) -> Namespace: From 0d6b0162f00b8c51354ba17d39e9469f24632447 Mon Sep 17 00:00:00 2001 From: Simeon Carstens Date: Thu, 1 Feb 2024 17:03:28 +0100 Subject: [PATCH 117/163] Add return type to `/status` endpoint This makes the Swagger documentation show the job schema for that endpoint. --- looper/api/main.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/looper/api/main.py b/looper/api/main.py index 5120ec14a..fc7e1a1ce 100644 --- a/looper/api/main.py +++ b/looper/api/main.py @@ -87,7 +87,7 @@ async def main_endpoint(top_level_model: TopLevelParser, background_tasks: fasta @app.get("/status/{job_id}") -async def get_status(job_id: JobId): +async def get_status(job_id: JobId) -> Job: return jobs[job_id] From 2b4a962a54f6ae779b8eb289deade7c2d9ca9003 Mon Sep 17 00:00:00 2001 From: Zhihan Zhang <32028117+zz1874@users.noreply.github.com> Date: Fri, 2 Feb 2024 00:12:34 +0800 Subject: [PATCH 118/163] Add HTTP API Documentation (#3) * Apply formatter * Add documentation for `POST` and `GET` requests * Update looper/api/main.py Co-authored-by: Simeon Carstens * Update looper/api/main.py Co-authored-by: Simeon Carstens * Add where to access the API documentation --------- Co-authored-by: Simeon Carstens --- looper/api/README.md | 8 ++++++++ looper/api/main.py | 34 ++++++++++++++++++++++++++-------- 2 files changed, 34 insertions(+), 8 deletions(-) diff --git a/looper/api/README.md b/looper/api/README.md index ffcf951e7..64980ee5a 100644 --- a/looper/api/README.md +++ b/looper/api/README.md @@ -19,3 +19,11 @@ curl -X POST -H "Content-Type: application/json" -d '{"run": {}, "looper_config" with the project files in the same `looper/api` folder. This example sends a JSON payload with the `run` and `looper_config` parameters to the `/` endpoint. + +## API Documentation +The API documentation is automatically generated and can be accessed in your web browser: + +Swagger UI: http://127.0.0.1:8000/docs +ReDoc: http://127.0.0.1:8000/redoc + +Explore the API documentation to understand available endpoints, request parameters, and response formats. diff --git a/looper/api/main.py b/looper/api/main.py index fc7e1a1ce..fb98746cc 100644 --- a/looper/api/main.py +++ b/looper/api/main.py @@ -1,12 +1,14 @@ from argparse import ArgumentParser, Namespace import secrets +import io from typing import Dict, TypeAlias import fastapi -from fastapi import FastAPI import pydantic import uvicorn +from fastapi import FastAPI + from looper.cli_pydantic import run_looper from looper.command_models.commands import SUPPORTED_COMMANDS, TopLevelParser @@ -16,19 +18,22 @@ JobId: TypeAlias = str + class Job(pydantic.BaseModel): id: JobId = pydantic.Field( default_factory=lambda: secrets.token_urlsafe(4), - description="The unique identifier of the job" + description="The unique identifier of the job", ) status: str = pydantic.Field( default="in_progress", - description="The current status of the job. Can be either `in_progress` or `completed`." + description="The current status of the job. Can be either `in_progress` or `completed`.", ) - console_output: str | None = pydantic.Field(default=None, - description="Console output produced by `looper` while performing the requested action" + console_output: str | None = pydantic.Field( + default=None, + description="Console output produced by `looper` while performing the requested action", ) + app = FastAPI(validate_model=True) jobs: Dict[str, Job] = {} @@ -78,15 +83,28 @@ def create_argparse_namespace(top_level_model: TopLevelParser) -> Namespace: setattr(namespace, argname, command_namespace) return namespace -@app.post("/", status_code=202) -async def main_endpoint(top_level_model: TopLevelParser, background_tasks: fastapi.BackgroundTasks) -> Dict: + +@app.post( + "/", + status_code=202, + summary="Run Looper", + description="Start a `looper` command with arguments specified in " + "`top_level_model` in the background and return a job identifier.", +) +async def main_endpoint( + top_level_model: TopLevelParser, background_tasks: fastapi.BackgroundTasks +) -> Dict: job = Job() jobs[job.id] = job background_tasks.add_task(background_async, top_level_model, job.id) return {"job_id": job.id} -@app.get("/status/{job_id}") +@app.get( + "/status/{job_id}", + summary="Get job status", + description="Retrieve the status of a job based on its unique identifier.", +) async def get_status(job_id: JobId) -> Job: return jobs[job_id] From 6619440572fbd5c36e08fa37a07797d478e3edb5 Mon Sep 17 00:00:00 2001 From: Simeon Carstens Date: Thu, 1 Feb 2024 17:52:22 +0100 Subject: [PATCH 119/163] Run formatter --- looper/cli_pydantic.py | 7 +++++-- looper/conductor.py | 4 +++- setup.py | 2 +- 3 files changed, 9 insertions(+), 4 deletions(-) diff --git a/looper/cli_pydantic.py b/looper/cli_pydantic.py index a7da330f7..c1846f3d4 100644 --- a/looper/cli_pydantic.py +++ b/looper/cli_pydantic.py @@ -47,7 +47,9 @@ ) -def run_looper(args: Namespace | TopLevelParser, parser: ArgumentParser, http_api=False): +def run_looper( + args: Namespace | TopLevelParser, parser: ArgumentParser, http_api=False +): # here comes adapted `cli_looper.py` code global _LOGGER @@ -98,7 +100,8 @@ def run_looper(args: Namespace | TopLevelParser, parser: ArgumentParser, http_ap divcfg = ( select_divvy_config(filepath=subcommand_args.divvy) - if hasattr(subcommand_args, "divvy") else None + if hasattr(subcommand_args, "divvy") + else None ) args = enrich_args_via_cfg(args, parser, False, http_api) diff --git a/looper/conductor.py b/looper/conductor.py index ddb36fd3a..01fe2e102 100644 --- a/looper/conductor.py +++ b/looper/conductor.py @@ -391,7 +391,9 @@ def submit(self, force=False): # Using `subprocess.run()` instead of `subprocess.check()` allows us to capture # stdout and stderr of the child process, and pass it to the `stdout` / `stderr` # of `looper`'s Python process. - result = subprocess.run(submission_command, check=True, shell=True, capture_output=True) + result = subprocess.run( + submission_command, check=True, shell=True, capture_output=True + ) print(result.stdout.decode()) print(result.stderr.decode(), file=sys.stderr) except subprocess.CalledProcessError: diff --git a/setup.py b/setup.py index 57ead1bad..1e3857972 100644 --- a/setup.py +++ b/setup.py @@ -82,7 +82,7 @@ def get_static(name, condition=None): "looper = looper.__main__:main", "divvy = looper.__main__:divvy_main", "looper-pydantic-argparse = looper.cli_pydantic:main", - "looper-serve = looper.api.main:main" + "looper-serve = looper.api.main:main", ], }, scripts=scripts, From b3aa4aa3c20edc8a24cb938c00544c0d7c1d0f4f Mon Sep 17 00:00:00 2001 From: Simeon Carstens Date: Fri, 2 Feb 2024 10:44:40 +0100 Subject: [PATCH 120/163] Make HTTP API code Python 3.8 compatible --- looper/api/main.py | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) diff --git a/looper/api/main.py b/looper/api/main.py index fb98746cc..963aa1cda 100644 --- a/looper/api/main.py +++ b/looper/api/main.py @@ -1,6 +1,5 @@ from argparse import ArgumentParser, Namespace import secrets -import io from typing import Dict, TypeAlias import fastapi @@ -16,11 +15,9 @@ stdout_redirects.enable_proxy() -JobId: TypeAlias = str - class Job(pydantic.BaseModel): - id: JobId = pydantic.Field( + id: str = pydantic.Field( default_factory=lambda: secrets.token_urlsafe(4), description="The unique identifier of the job", ) @@ -38,7 +35,7 @@ class Job(pydantic.BaseModel): jobs: Dict[str, Job] = {} -def background_async(top_level_model: TopLevelParser, job_id: JobId) -> None: +def background_async(top_level_model: TopLevelParser, job_id: str) -> None: argparse_namespace = create_argparse_namespace(top_level_model) output_stream = stdout_redirects.redirect() @@ -105,7 +102,7 @@ async def main_endpoint( summary="Get job status", description="Retrieve the status of a job based on its unique identifier.", ) -async def get_status(job_id: JobId) -> Job: +async def get_status(job_id: str) -> Job: return jobs[job_id] From d75942cbcfa555c5bb37a4757b129355c9433d7c Mon Sep 17 00:00:00 2001 From: Simeon Carstens Date: Fri, 2 Feb 2024 11:48:56 +0100 Subject: [PATCH 121/163] Update README with more detailed usage instructions --- looper/api/README.md | 25 +++++++++++++++---------- 1 file changed, 15 insertions(+), 10 deletions(-) diff --git a/looper/api/README.md b/looper/api/README.md index 64980ee5a..e0cc9359f 100644 --- a/looper/api/README.md +++ b/looper/api/README.md @@ -5,20 +5,25 @@ This API provides an HTTP interface for running the `looper` commands, allowing users to interact with Looper via HTTP requests. ## Usage -### Running the API -To run the API, execute the following command: +### Running the server +Run the app: ```bash -cd looper/api -uvicorn main:app --reload +looper-serve [--host ] [--port ] ``` -### Example API Usage -To run the `looper run` command through the HTTP API, you can use the following curl command: + +> [!NOTE] +This assumes that all files specified in the arguments are available on the file system of the machine that is running the HTTP API server. Best make sure you use absolute file paths in all `looper` YAML configuration files. + +### Sending requests +To test this, you can clone the [`hello_looper`](https://github.com/pepkit/hello_looper) repository and then run (for example) the following in a second terminal: ```bash -curl -X POST -H "Content-Type: application/json" -d '{"run": {}, "looper_config": ".looper.yaml"}' "http://127.0.0.1:8000" +curl -X POST -H "Content-Type: application/json" -d '{"run": {"time_delay": 5}, "looper_config": "/path/to/hello_looper/.looper.yaml"}' "http://127.0.0.1:8000" ``` -with the project files in the same `looper/api` folder. - -This example sends a JSON payload with the `run` and `looper_config` parameters to the `/` endpoint. +This will return a six-letter job ID, say `abc123`. Then get the result / output of the run with +```bash +curl -X GET -v localhost:8000/status/abc123 +``` +For better visualization / readability, you can post-process the output by piping it to `jq` (` | jq -r .console_output`). ## API Documentation The API documentation is automatically generated and can be accessed in your web browser: From 480c4dfb17269c1b736b48b00851395a990b9e91 Mon Sep 17 00:00:00 2001 From: Claude Date: Wed, 5 Nov 2025 22:03:30 +0000 Subject: [PATCH 122/163] Replace wildcard imports with explicit imports for mkdocstrings compatibility This commit addresses namespace pollution and improves tool compatibility by replacing all wildcard imports with explicit imports in the looper package. Changes: - looper/looper.py: Removed unused 'from peppy.const import *', replaced wildcard imports with explicit imports for exceptions and constants - looper/__init__.py: Removed unused 'from .const import *' - looper/utils.py: Replaced 'from typing import *', 'from peppy.const import *', and 'from .const import *' with explicit imports - looper/conductor.py: Replaced 'from typing import *' and 'from .const import *' with explicit imports, removed duplicate PipelineLevel import - looper/cli_pydantic.py: Replaced all wildcard imports with explicit imports, removed unused 'from .parser_types import *' - looper/project.py: Replaced 'from .exceptions import *' and 'from .utils import *' with explicit imports - looper/pipeline_interface.py: Replaced 'from .const import *' with explicit imports - looper/plugins.py: Replaced 'from .const import *' with explicit imports - looper/processed_project.py: Removed unused 'from eido.const import *' and 'from eido.exceptions import *' Benefits: - Enables mkdocstrings documentation generation - Improves code clarity by making dependencies explicit - Follows PEP 8 best practices - Reduces namespace pollution All changes are backwards compatible and maintain existing functionality. --- looper/__init__.py | 1 - looper/cli_pydantic.py | 29 +++++++++++++++++++++++++---- looper/conductor.py | 22 +++++++++++++++++++--- looper/looper.py | 16 +++++++++++++--- looper/pipeline_interface.py | 14 +++++++++++++- looper/plugins.py | 2 +- looper/processed_project.py | 2 -- looper/project.py | 34 +++++++++++++++++++++++++++++++--- looper/utils.py | 23 ++++++++++++++++++++--- 9 files changed, 122 insertions(+), 21 deletions(-) diff --git a/looper/__init__.py b/looper/__init__.py index fe751d02d..0be3b8cb8 100644 --- a/looper/__init__.py +++ b/looper/__init__.py @@ -26,7 +26,6 @@ write_sample_yaml_prj, write_custom_template, ) -from .const import * from .pipeline_interface import PipelineInterface from .project import Project diff --git a/looper/cli_pydantic.py b/looper/cli_pydantic.py index 3ec094d0f..bb6dd0e27 100644 --- a/looper/cli_pydantic.py +++ b/looper/cli_pydantic.py @@ -35,11 +35,32 @@ TopLevelParser, add_short_arguments, ) -from .const import * +from .const import ( + CLI_KEY, + CLI_PROJ_ATTRS, + EXAMPLE_COMPUTE_SPEC_FMT, + PipelineLevel, + PROJECT_PL_ARG, + SAMPLE_EXCLUSION_OPTNAME, + SAMPLE_INCLUSION_OPTNAME, + SAMPLE_PL_ARG, +) from .divvy import DEFAULT_COMPUTE_RESOURCES_NAME, select_divvy_config -from .exceptions import * -from .looper import * -from .parser_types import * +from .exceptions import ( + MisconfigurationException, + PipestatConfigurationException, + SampleFailedException, +) +from .looper import ( + Checker, + Cleaner, + Collator, + Destroyer, + Linker, + Reporter, + Runner, + Tabulator, +) from .project import Project, ProjectContext from .utils import ( dotfile_path, diff --git a/looper/conductor.py b/looper/conductor.py index 268db5432..cf4388451 100644 --- a/looper/conductor.py +++ b/looper/conductor.py @@ -12,7 +12,7 @@ from math import ceil from json import loads from subprocess import check_output -from typing import * +from typing import Optional from eido import read_schema, get_input_files_size from eido.const import INPUT_FILE_SIZE_KEY, MISSING_KEY @@ -25,7 +25,24 @@ from yaml import dump from yacman import FutureYAMLConfigManager as YAMLConfigManager -from .const import * +from .const import ( + EXTRA_PROJECT_CMD_TEMPLATE, + EXTRA_SAMPLE_CMD_TEMPLATE, + JOB_NAME_KEY, + NOT_SUB_MSG, + OUTDIR_KEY, + OUTPUT_SCHEMA_KEY, + PipelineLevel, + PRE_SUBMIT_CMD_KEY, + PRE_SUBMIT_HOOK_KEY, + PRE_SUBMIT_PY_FUN_KEY, + PROJECT_PL_KEY, + RESULTS_SUBDIR_KEY, + SAMPLE_CWL_YAML_PATH_KEY, + SAMPLE_PL_KEY, + SUBMISSION_SUBDIR_KEY, + VAR_TEMPL_KEY, +) from .exceptions import JobSubmissionException from .processed_project import populate_sample_paths from .utils import ( @@ -33,7 +50,6 @@ jinja_render_template_strictly, expand_nested_var_templates, ) -from .const import PipelineLevel _LOGGER = logging.getLogger(__name__) diff --git a/looper/looper.py b/looper/looper.py index cb3cb3014..fa0fd1249 100755 --- a/looper/looper.py +++ b/looper/looper.py @@ -27,7 +27,6 @@ from eido import validate_config, validate_sample from eido.exceptions import EidoValidationError from jsonschema import ValidationError -from peppy.const import * from peppy.exceptions import RemoteYAMLError from rich.color import Color from rich.console import Console @@ -37,8 +36,19 @@ from .conductor import SubmissionConductor -from .exceptions import * -from .const import * +from .exceptions import ( + JobSubmissionException, + LooperReportError, + MisconfigurationException, + SampleFailedException, +) +from .const import ( + DEBUG_COMMANDS, + DEBUG_EIDO_VALIDATION, + DEBUG_JOBS, + NOT_SUB_MSG, + SUBMISSION_FAILURE_MESSAGE, +) from .project import Project from .utils import ( desired_samples_range_skipped, diff --git a/looper/pipeline_interface.py b/looper/pipeline_interface.py index f7f0793ea..ee76f790c 100644 --- a/looper/pipeline_interface.py +++ b/looper/pipeline_interface.py @@ -12,7 +12,19 @@ from ubiquerg import expandpath, is_url from yacman import load_yaml, YAMLConfigManager -from .const import * +from .const import ( + COMPUTE_KEY, + DYN_VARS_KEY, + FILE_SIZE_COLNAME, + ID_COLNAME, + INPUT_SCHEMA_KEY, + LOOPER_KEY, + PIFACE_SCHEMA_SRC, + PIPELINE_INTERFACE_PIPELINE_NAME_KEY, + RESOURCES_KEY, + SIZE_DEP_VARS_KEY, + VAR_TEMPL_KEY, +) from .exceptions import ( InvalidResourceSpecificationException, PipelineInterfaceConfigError, diff --git a/looper/plugins.py b/looper/plugins.py index dc34283e0..b877f050c 100644 --- a/looper/plugins.py +++ b/looper/plugins.py @@ -1,6 +1,6 @@ import logging import os -from .const import * +from .const import SAMPLE_CWL_YAML_PATH_KEY, SAMPLE_YAML_PATH_KEY, SAMPLE_YAML_PRJ_PATH_KEY from .conductor import _get_yaml_path _LOGGER = logging.getLogger(__name__) diff --git a/looper/processed_project.py b/looper/processed_project.py index 39b87fa0d..93fb28044 100644 --- a/looper/processed_project.py +++ b/looper/processed_project.py @@ -128,8 +128,6 @@ import os from logging import getLogger -from eido.const import * -from eido.exceptions import * from peppy.project import Project from peppy.sample import Sample diff --git a/looper/project.py b/looper/project.py index 88de52e00..cc2414f0c 100644 --- a/looper/project.py +++ b/looper/project.py @@ -20,11 +20,39 @@ from .conductor import write_pipestat_config -from .exceptions import * +from .exceptions import MisconfigurationException, PipelineInterfaceConfigError from .pipeline_interface import PipelineInterface from .processed_project import populate_project_paths, populate_sample_paths -from .utils import * -from .const import PipelineLevel +from .utils import ( + expandpath, + fetch_sample_flags, + get_sample_status, + getLogger, + is_pephub_registry_path, +) +from .const import ( + ALL_SUBCMD_KEY, + CLI_KEY, + CLI_PROJ_ATTRS, + COMPUTE_PACKAGE_KEY, + CONFIG_KEY, + DRY_RUN_KEY, + EXTRA_KEY, + FILE_CHECKS_KEY, + INPUT_SCHEMA_KEY, + LOOPER_KEY, + OUTDIR_KEY, + OUTPUT_SCHEMA_KEY, + PEP_CONFIG_KEY, + PIFACE_KEY_SELECTOR, + PIPELINE_INTERFACE_PIPELINE_NAME_KEY, + PIPELINE_INTERFACES_KEY, + PipelineLevel, + PIPESTAT_KEY, + RESULTS_SUBDIR_KEY, + SAMPLE_PL_ARG, + SUBMISSION_SUBDIR_KEY, +) __all__ = ["Project"] diff --git a/looper/utils.py b/looper/utils.py index b5d904c52..78de020c6 100644 --- a/looper/utils.py +++ b/looper/utils.py @@ -6,20 +6,37 @@ import itertools from logging import getLogger import os -from typing import * +from typing import Iterable, List, Optional, Tuple, Union import re import jinja2 import yaml from peppy import Project as peppyProject -from peppy.const import * +from peppy.const import AMENDMENTS_KEY, CONFIG_KEY, NAME_KEY, SAMPLE_MODS_KEY from ubiquerg import convert_value, expandpath, parse_registry_path, deep_update from pephubclient.constants import RegistryPath from pydantic import ValidationError from yacman import load_yaml from yaml.parser import ParserError -from .const import * +from .const import ( + ALL_SUBCMD_KEY, + CLI_KEY, + FLAGS, + LOOPER_DOTFILE_NAME, + LOOPER_GENERIC_COUNT_LINES, + LOOPER_GENERIC_OUTPUT_SCHEMA, + LOOPER_GENERIC_PIPELINE, + LOOPER_KEY, + OUTDIR_KEY, + PEP_CONFIG_KEY, + PIPELINE_INTERFACES_KEY, + PIPESTAT_KEY, + POSITIONAL, + PROJECT_PL_ARG, + PipelineLevel, + SAMPLE_PL_ARG, +) from .command_models.commands import SUPPORTED_COMMANDS from .exceptions import MisconfigurationException, PipelineInterfaceConfigError from rich.console import Console From eac3306d2776024087f83f6c6faae2851dd3cb9e Mon Sep 17 00:00:00 2001 From: Claude Date: Wed, 5 Nov 2025 22:13:36 +0000 Subject: [PATCH 123/163] Apply black formatting to plugins.py --- looper/plugins.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/looper/plugins.py b/looper/plugins.py index b877f050c..d4cc52651 100644 --- a/looper/plugins.py +++ b/looper/plugins.py @@ -1,6 +1,10 @@ import logging import os -from .const import SAMPLE_CWL_YAML_PATH_KEY, SAMPLE_YAML_PATH_KEY, SAMPLE_YAML_PRJ_PATH_KEY +from .const import ( + SAMPLE_CWL_YAML_PATH_KEY, + SAMPLE_YAML_PATH_KEY, + SAMPLE_YAML_PRJ_PATH_KEY, +) from .conductor import _get_yaml_path _LOGGER = logging.getLogger(__name__) From e8a7a2f7694125df75d9bda65a30a9b445edec39 Mon Sep 17 00:00:00 2001 From: Claude Date: Wed, 5 Nov 2025 22:20:46 +0000 Subject: [PATCH 124/163] Fix import errors in project.py - Add missing import of peppyProject from peppy - Add missing typing imports (Iterable, List, NoReturn, Union) - Move CONFIG_KEY import from looper.const to peppy.const where it's actually defined This fixes the ImportError that was preventing tests from running. --- looper/project.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/looper/project.py b/looper/project.py index cc2414f0c..da892204a 100644 --- a/looper/project.py +++ b/looper/project.py @@ -2,6 +2,7 @@ import itertools import os +from typing import Iterable, List, NoReturn, Union from yaml import safe_load @@ -15,7 +16,9 @@ from eido import PathAttrNotFoundError, read_schema from jsonschema import ValidationError from pandas.core.common import flatten +from peppy import Project as peppyProject from peppy.utils import make_abs_via_cfg +from peppy.const import CONFIG_KEY from pipestat import PipestatManager from .conductor import write_pipestat_config @@ -35,7 +38,6 @@ CLI_KEY, CLI_PROJ_ATTRS, COMPUTE_PACKAGE_KEY, - CONFIG_KEY, DRY_RUN_KEY, EXTRA_KEY, FILE_CHECKS_KEY, From acda347f8abb686ad98b6d959756bda7904aeecc Mon Sep 17 00:00:00 2001 From: Claude Date: Wed, 5 Nov 2025 22:26:55 +0000 Subject: [PATCH 125/163] Fix missing imports causing test failures - Add missing 'import os' in cli_pydantic.py - Add missing eido imports in processed_project.py (PROP_KEY, EidoSchemaInvalidError) Fixes NameError exceptions that were causing pytest failures. --- looper/cli_pydantic.py | 1 + looper/processed_project.py | 2 ++ 2 files changed, 3 insertions(+) diff --git a/looper/cli_pydantic.py b/looper/cli_pydantic.py index bb6dd0e27..fb06b687a 100644 --- a/looper/cli_pydantic.py +++ b/looper/cli_pydantic.py @@ -17,6 +17,7 @@ # with types. from __future__ import annotations +import os import sys import logmuse diff --git a/looper/processed_project.py b/looper/processed_project.py index 93fb28044..9222dadcc 100644 --- a/looper/processed_project.py +++ b/looper/processed_project.py @@ -128,6 +128,8 @@ import os from logging import getLogger +from eido.const import PROP_KEY +from eido.exceptions import EidoSchemaInvalidError from peppy.project import Project from peppy.sample import Sample From 4103a561a11b465adb1d2f19a885b9e04df4bb66 Mon Sep 17 00:00:00 2001 From: Nathan Sheffield Date: Wed, 5 Nov 2025 17:34:30 -0500 Subject: [PATCH 126/163] Update looper/project.py Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com> --- looper/project.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/looper/project.py b/looper/project.py index da892204a..4fb3fdb1e 100644 --- a/looper/project.py +++ b/looper/project.py @@ -2,7 +2,7 @@ import itertools import os -from typing import Iterable, List, NoReturn, Union +from typing import List, NoReturn, Union from yaml import safe_load From af05ddde26c530e4268383dc4c26681b9e172904 Mon Sep 17 00:00:00 2001 From: nsheff Date: Wed, 5 Nov 2025 17:38:16 -0500 Subject: [PATCH 127/163] remove unused imports --- looper/project.py | 3 --- 1 file changed, 3 deletions(-) diff --git a/looper/project.py b/looper/project.py index 4fb3fdb1e..18fb1a4bd 100644 --- a/looper/project.py +++ b/looper/project.py @@ -34,8 +34,6 @@ is_pephub_registry_path, ) from .const import ( - ALL_SUBCMD_KEY, - CLI_KEY, CLI_PROJ_ATTRS, COMPUTE_PACKAGE_KEY, DRY_RUN_KEY, @@ -45,7 +43,6 @@ LOOPER_KEY, OUTDIR_KEY, OUTPUT_SCHEMA_KEY, - PEP_CONFIG_KEY, PIFACE_KEY_SELECTOR, PIPELINE_INTERFACE_PIPELINE_NAME_KEY, PIPELINE_INTERFACES_KEY, From 8b9722fe0fbb0f45259040e3e132377ad9c2a379 Mon Sep 17 00:00:00 2001 From: nsheff Date: Wed, 5 Nov 2025 17:43:11 -0500 Subject: [PATCH 128/163] update to newest yacman --- tests/divvytests/divvy_tests/test_divvy.py | 10 +++++----- tests/divvytests/test_divvy_simple.py | 6 +++--- 2 files changed, 8 insertions(+), 8 deletions(-) diff --git a/tests/divvytests/divvy_tests/test_divvy.py b/tests/divvytests/divvy_tests/test_divvy.py index a67e489de..3a3a6fe98 100644 --- a/tests/divvytests/divvy_tests/test_divvy.py +++ b/tests/divvytests/divvy_tests/test_divvy.py @@ -1,7 +1,7 @@ """Assorted divvy tests""" import pytest -from yacman import YacAttMap, load_yaml +from yacman import YAMLConfigManager, load_yaml from looper.divvy import DEFAULT_COMPUTE_RESOURCES_NAME from tests.divvytests.conftest import DCC_ATTRIBUTES, FILES, mock_env_missing @@ -60,9 +60,9 @@ class TestGettingActivePackage: """Test for the get_active_package method""" def test_settings_nonempty(self, dcc): - """Test if get_active_package produces a nonempty YacAttMap object""" + """Test if get_active_package produces a nonempty YAMLConfigManager object""" settings = dcc.get_active_package() - assert settings != YacAttMap() + assert settings != YAMLConfigManager() class TestListingPackages: @@ -87,7 +87,7 @@ def test_reset_active_settings(self, dcc): def test_reset_active_settings_works(self, dcc): """Test if the settings are cleared""" dcc.reset_active_settings() - assert dcc.get_active_package() == YacAttMap({}) + assert dcc.get_active_package() == YAMLConfigManager({}) class UpdatingPackagesTests: @@ -98,4 +98,4 @@ def test_update_packages(self, dcc, config_file): """Test updating does not produce empty compute packages""" entries = load_yaml(config_file) dcc.update(entries) - assert dcc["compute_packages"] != YacAttMap() + assert dcc["compute_packages"] != YAMLConfigManager() diff --git a/tests/divvytests/test_divvy_simple.py b/tests/divvytests/test_divvy_simple.py index 5770661f7..bbdc1e44e 100644 --- a/tests/divvytests/test_divvy_simple.py +++ b/tests/divvytests/test_divvy_simple.py @@ -3,7 +3,7 @@ import pytest from collections import OrderedDict -from yacman import YacAttMap +from yacman import YAMLConfigManager from looper.divvy import select_divvy_config # For interactive debugging: @@ -49,7 +49,7 @@ def test_write_script(self): # "compute", # [ # dict({"mem": 1000, "test": 0}), -# YacAttMap({"mem": 1000, "test": 0}), +# YAMLConfigManager({"mem": 1000, "test": 0}), # OrderedDict({"mem": 1000, "test": 0}), # ], # ) @@ -68,7 +68,7 @@ def test_write_script(self): # def test_adapters_overwitten_by_others(self): # dcc = divvy.ComputingConfiguration() # dcc.activate_package("singularity_slurm") -# compute = YacAttMap({"mem": 1000}) +# compute = YAMLConfigManager({"mem": 1000}) # extra_vars = [{"compute": compute}, {"MEM": 333}] # dcc.write_script("test1.sub", extra_vars) # with open("test1.sub", "r") as f: From f3e43e69ba2c29b984056b3c371aac0b50e889ca Mon Sep 17 00:00:00 2001 From: nsheff Date: Wed, 5 Nov 2025 17:45:10 -0500 Subject: [PATCH 129/163] update test to python 3.13 --- .github/workflows/run-pytest.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/run-pytest.yml b/.github/workflows/run-pytest.yml index b518c4048..ceeed0d5c 100644 --- a/.github/workflows/run-pytest.yml +++ b/.github/workflows/run-pytest.yml @@ -9,7 +9,7 @@ jobs: runs-on: ${{ matrix.os }} strategy: matrix: - python-version: ["3.8", "3.11"] + python-version: ["3.8", "3.13"] os: [ubuntu-latest] steps: From 7251e7ef6d66569a7720d22a3dbaa0ed4c5fe133 Mon Sep 17 00:00:00 2001 From: Claude Date: Wed, 5 Nov 2025 22:57:57 +0000 Subject: [PATCH 130/163] Convert docstrings to Google style in utils.py and conductor.py - Updated all RST-style docstrings to Google-style format - Converted :param/:return/:raise to Args:/Returns:/Raises: sections - utils.py: 44 parameter annotations converted - conductor.py: 35 parameter annotations converted --- looper/conductor.py | 287 +++++++++++++++++++--------------- looper/utils.py | 371 +++++++++++++++++++++++++++----------------- 2 files changed, 390 insertions(+), 268 deletions(-) diff --git a/looper/conductor.py b/looper/conductor.py index cf4388451..20fb71605 100644 --- a/looper/conductor.py +++ b/looper/conductor.py @@ -56,18 +56,20 @@ def _get_yaml_path(namespaces, template_key, default_name_appendix="", filename=None): - """ - Get a path to a YAML file for the sample. - - :param dict[dict]] namespaces: namespaces mapping - :param str template_key: the name of the key in 'var_templates' piface - section that points to a template to render to get the - user-provided target YAML path - :param str default_name_appendix: a string to append to insert in target - YAML file name: '{sample.sample_name}<>.yaml' - :param str filename: A filename without folders. If not provided, a - default name of sample_name.yaml will be used. - :return str: sample YAML file path + """Get a path to a YAML file for the sample. + + Args: + namespaces (dict[dict]): Namespaces mapping. + template_key (str): The name of the key in 'var_templates' piface + section that points to a template to render to get the + user-provided target YAML path. + default_name_appendix (str): A string to append to insert in target + YAML file name: '{sample.sample_name}<>.yaml'. + filename (str): A filename without folders. If not provided, a + default name of sample_name.yaml will be used. + + Returns: + str: Sample YAML file path. """ if ( VAR_TEMPL_KEY in namespaces["pipeline"] @@ -107,11 +109,16 @@ def _get_yaml_path(namespaces, template_key, default_name_appendix="", filename= def write_pipestat_config(looper_pipestat_config_path, pipestat_config_dict): - """ - This writes a combined configuration file to be passed to a PipestatManager. - :param str looper_pipestat_config_path: path to the created pipestat configuration file - :param dict pipestat_config_dict: the dict containing key value pairs to be written to the pipestat configutation - return bool + """Write a combined configuration file to be passed to a PipestatManager. + + Args: + looper_pipestat_config_path (str): Path to the created pipestat + configuration file. + pipestat_config_dict (dict): The dict containing key value pairs to be + written to the pipestat configuration. + + Returns: + bool: True if successful. """ if not os.path.exists(os.path.dirname(looper_pipestat_config_path)): @@ -130,11 +137,13 @@ def write_pipestat_config(looper_pipestat_config_path, pipestat_config_dict): def write_submission_yaml(namespaces): - """ - Save all namespaces to YAML. + """Save all namespaces to YAML. - :param dict namespaces: variable namespaces dict - :return dict: sample namespace dict + Args: + namespaces (dict): Variable namespaces dict. + + Returns: + dict: Sample namespace dict. """ path = _get_yaml_path(namespaces, SAMPLE_CWL_YAML_PATH_KEY, "_submission") my_namespaces = {} @@ -172,43 +181,43 @@ def __init__( automatic=True, collate=False, ): - """ - Create a job submission manager. + """Create a job submission manager. The most critical inputs are the pipeline interface and the pipeline key, which together determine which provide critical pipeline information like resource allocation packages and which pipeline will be overseen by this instance, respectively. - :param PipelineInterface pipeline_interface: Collection of important - data for one or more pipelines, like resource allocation packages - and option/argument specifications - :param prj: Project with which each sample being considered is - associated (what generated each sample) - :param float delay: Time (in seconds) to wait before submitting a job - once it's ready - :param str extra_args: string to pass to each job generated, - for example additional pipeline arguments - :param str extra_args_override: string to pass to each job generated, - for example additional pipeline arguments. This deactivates the - 'extra' functionality that appends strings defined in - Sample.command_extra and Project.looper.command_extra to the - command template. - :param bool ignore_flags: Whether to ignore flag files present in - the sample folder for each sample considered for submission - :param dict[str] compute_variables: A dict with variables that will be made - available to the compute package. For example, this should include - the name of the cluster partition to which job or jobs will be submitted - :param int | NoneType max_cmds: Upper bound on number of commands to - include in a single job script. - :param int | float | NoneType max_size: Upper bound on total file - size of inputs used by the commands lumped into single job script. - :param int | float | NoneType max_jobs: Upper bound on total number of jobs to - group samples for submission. - :param bool automatic: Whether the submission should be automatic once - the pool reaches capacity. - :param bool collate: Whether a collate job is to be submitted (runs on - the project level, rather that on the sample level) + Args: + pipeline_interface (PipelineInterface): Collection of important + data for one or more pipelines, like resource allocation packages + and option/argument specifications. + prj: Project with which each sample being considered is + associated (what generated each sample). + delay (float): Time (in seconds) to wait before submitting a job + once it's ready. + extra_args (str): String to pass to each job generated, + for example additional pipeline arguments. + extra_args_override (str): String to pass to each job generated, + for example additional pipeline arguments. This deactivates the + 'extra' functionality that appends strings defined in + Sample.command_extra and Project.looper.command_extra to the + command template. + ignore_flags (bool): Whether to ignore flag files present in + the sample folder for each sample considered for submission. + compute_variables (dict[str]): A dict with variables that will be made + available to the compute package. For example, this should include + the name of the cluster partition to which job or jobs will be submitted. + max_cmds (int | None): Upper bound on number of commands to + include in a single job script. + max_size (int | float | None): Upper bound on total file + size of inputs used by the commands lumped into single job script. + max_jobs (int | float | None): Upper bound on total number of jobs to + group samples for submission. + automatic (bool): Whether the submission should be automatic once + the pool reaches capacity. + collate (bool): Whether a collate job is to be submitted (runs on + the project level, rather that on the sample level). """ super(SubmissionConductor, self).__init__() @@ -279,27 +288,30 @@ def failed_samples(self): @property def num_cmd_submissions(self): - """ - Return the number of commands that this conductor has submitted. + """Return the number of commands that this conductor has submitted. - :return int: Number of commands submitted so far. + Returns: + int: Number of commands submitted so far. """ return self._num_cmds_submitted @property def num_job_submissions(self): - """ - Return the number of jobs that this conductor has submitted. + """Return the number of jobs that this conductor has submitted. - :return int: Number of jobs submitted so far. + Returns: + int: Number of jobs submitted so far. """ return self._num_good_job_submissions def is_project_submittable(self, force=False): - """ - Check whether the current project has been already submitted + """Check whether the current project has been already submitted. - :param bool frorce: whether to force the project submission (ignore status/flags) + Args: + force (bool): Whether to force the project submission (ignore status/flags). + + Returns: + bool: True if the project is submittable, False otherwise. """ psms = {} if self.prj.pipestat_configured_project: @@ -314,17 +326,20 @@ def is_project_submittable(self, force=False): return True def add_sample(self, sample, rerun=False): - """ - Add a sample for submission to this conductor. - - :param peppy.Sample sample: sample to be included with this conductor's - currently growing collection of command submissions - :param bool rerun: whether the given sample is being rerun rather than - run for the first time - :return bool: Indication of whether the given sample was added to - the current 'pool.' - :raise TypeError: If sample subtype is provided but does not extend - the base Sample class, raise a TypeError. + """Add a sample for submission to this conductor. + + Args: + sample (peppy.Sample): Sample to be included with this conductor's + currently growing collection of command submissions. + rerun (bool): Whether the given sample is being rerun rather than + run for the first time. + + Returns: + list: List of skip reasons if sample was not added. + + Raises: + TypeError: If sample subtype is provided but does not extend + the base Sample class. """ _LOGGER.debug( "Adding {} to conductor for {} to {}run".format( @@ -406,17 +421,19 @@ def add_sample(self, sample, rerun=False): return skip_reasons def submit(self, force=False): - """ - Submit one or more commands as a job. + """Submit one or more commands as a job. This call will submit the commands corresponding to the current pool of samples if and only if the argument to 'force' evaluates to a true value, or the pool of samples is full. - :param bool force: Whether submission should be done/simulated even - if this conductor's pool isn't full. - :return bool: Whether a job was submitted (or would've been if - not for dry run) + Args: + force (bool): Whether submission should be done/simulated even + if this conductor's pool isn't full. + + Returns: + bool: Whether a job was submitted (or would've been if + not for dry run). """ submitted = False @@ -479,25 +496,29 @@ def submit(self, force=False): return submitted def _is_full(self, pool, size): - """ - Determine whether it's time to submit a job for the pool of commands. + """Determine whether it's time to submit a job for the pool of commands. Instances of this class maintain a sort of 'pool' of commands that expands as each new command is added, until a time that it's deemed - 'full' and th + 'full'. - :return bool: Whether this conductor's pool of commands is 'full' and - ready for submission, as determined by its parameterization + Args: + pool: Collection of samples/commands. + size: Current total size. + + Returns: + bool: Whether this conductor's pool of commands is 'full' and + ready for submission, as determined by its parameterization. """ return self.max_cmds == len(pool) or size >= self.max_size @property def _samples(self): - """ - Return a collection of pooled samples. + """Return a collection of pooled samples. - :return Iterable[str]: collection of samples currently in the active - pool for this submission conductor + Returns: + Iterable[str]: Collection of samples currently in the active + pool for this submission conductor. """ return [s for s in self._pool] @@ -522,15 +543,20 @@ def _sample_lump_name(self, pool): return "lump{}".format(self._num_total_job_submissions + 1) def _signal_int_handler(self, signal, frame): - """ - For catching interrupt (Ctrl +C) signals. Fails gracefully. + """For catching interrupt (Ctrl +C) signals. Fails gracefully. + + Args: + signal: Signal received. + frame: Current stack frame. """ signal_type = "SIGINT" self._generic_signal_handler(signal_type) def _generic_signal_handler(self, signal_type): - """ - Function for handling both SIGTERM and SIGINT + """Function for handling both SIGTERM and SIGINT. + + Args: + signal_type (str): Type of signal received (SIGTERM or SIGINT). """ message = "Received " + signal_type + ". Failing gracefully..." _LOGGER.warning(msg=message) @@ -587,14 +613,17 @@ def pskill(proc_pid, sig=signal.SIGINT): _LOGGER.warning(msg=f"Child process {self.process_id} {note}.") def _attend_process(self, proc, sleeptime): - """ - Waits on a process for a given time to see if it finishes, returns True - if it's still running after the given time or False as soon as it - returns. + """Wait on a process for a given time to see if it finishes. - :param psutil.Process proc: Process object opened by psutil.Popen() - :param float sleeptime: Time to wait - :return bool: True if process is still running; otherwise false + Returns True if it's still running after the given time or False as + soon as it returns. + + Args: + proc (psutil.Process): Process object opened by psutil.Popen(). + sleeptime (float): Time to wait. + + Returns: + bool: True if process is still running; otherwise false. """ try: proc.wait(timeout=int(sleeptime)) @@ -607,14 +636,18 @@ def _jobname(self, pool): return "{}_{}".format(self.pl_iface.pipeline_name, self._sample_lump_name(pool)) def _build_looper_namespace(self, pool, size): - """ + """Compile a mapping of looper/submission related settings. + Compile a mapping of looper/submission related settings for use in the command templates and in submission script creation in divvy (via adapters). - :param Iterable[peppy.Sample] pool: collection of sample instances - :param float size: cumulative size of the given pool - :return yacman.YAMLConfigManager: looper/submission related settings + Args: + pool (Iterable[peppy.Sample]): Collection of sample instances. + size (float): Cumulative size of the given pool. + + Returns: + yacman.YAMLConfigManager: Looper/submission related settings. """ settings = YAMLConfigManager() settings["config_file"] = self.prj.config_file @@ -651,14 +684,18 @@ def _build_looper_namespace(self, pool, size): def _set_pipestat_namespace( self, sample_name: Optional[str] = None ) -> YAMLConfigManager: - """ + """Compile a mapping of pipestat-related settings. + Compile a mapping of pipestat-related settings for use in the command templates. Accessible via: {pipestat.attrname} - :param str sample_name: name of the sample to get the pipestat - namespace for. If not provided the pipestat namespace will - be determined based on the Project - :return yacman.YAMLConfigManager: pipestat namespace + Args: + sample_name (str): Name of the sample to get the pipestat + namespace for. If not provided the pipestat namespace will + be determined based on the Project. + + Returns: + yacman.YAMLConfigManager: Pipestat namespace. """ try: psm = self.pl_iface.psm @@ -684,12 +721,14 @@ def _set_pipestat_namespace( return YAMLConfigManager(filtered_namespace) def write_script(self, pool, size): - """ - Create the script for job submission. + """Create the script for job submission. + + Args: + pool (Iterable[peppy.Sample]): Collection of sample instances. + size (float): Cumulative size of the given pool. - :param Iterable[peppy.Sample] pool: collection of sample instances - :param float size: cumulative size of the given pool - :return str: Path to the job submission script created. + Returns: + str: Path to the job submission script created. """ # looper settings determination if self.collate: @@ -806,23 +845,27 @@ def _use_sample(flag, skips): def _exec_pre_submit(piface, namespaces): - """ - Execute pre submission hooks defined in the pipeline interface + """Execute pre submission hooks defined in the pipeline interface. + + Args: + piface (PipelineInterface): Piface, a source of pre_submit hooks to execute. + namespaces (dict[dict]): Namespaces mapping. - :param PipelineInterface piface: piface, a source of pre_submit hooks to execute - :param dict[dict[]] namespaces: namspaces mapping - :return dict[dict[]]: updated namspaces mapping + Returns: + dict[dict]: Updated namespaces mapping. """ def _update_namespaces(x, y, cmd=False): - """ + """Update namespaces mapping with new values. + Update namespaces mapping with a dictionary of the same structure, that includes just the values that need to be updated. - :param dict[dict] x: namespaces mapping - :param dict[dict] y: mapping to update namespaces with - :param bool cmd: whether the mapping to update with comes from the - command template, used for messaging + Args: + x (dict[dict]): Namespaces mapping. + y (dict[dict]): Mapping to update namespaces with. + cmd (bool): Whether the mapping to update with comes from the + command template, used for messaging. """ if not y: return diff --git a/looper/utils.py b/looper/utils.py index 78de020c6..9cc57ef61 100644 --- a/looper/utils.py +++ b/looper/utils.py @@ -46,22 +46,26 @@ def fetch_flag_files(prj=None, results_folder="", flags=FLAGS): - """ - Find all flag file paths for the given project. - - :param Project | AttributeDict prj: full Project or AttributeDict with - similar metadata and access/usage pattern - :param str results_folder: path to results folder, corresponding to the - 1:1 sample:folder notion that a looper Project has. That is, this - function uses the assumption that if results_folder rather than project - is provided, the structure of the file tree rooted at results_folder is - such that any flag files to be found are not directly within rootdir but - are directly within on of its first layer of subfolders. - :param Iterable[str] | str flags: Collection of flag names or single flag - name for which to fetch files - :return Mapping[str, list[str]]: collection of filepaths associated with - particular flag for samples within the given project - :raise TypeError: if neither or both of project and rootdir are given + """Find all flag file paths for the given project. + + Args: + prj (Project | AttributeDict): Full Project or AttributeDict with + similar metadata and access/usage pattern. + results_folder (str): Path to results folder, corresponding to the + 1:1 sample:folder notion that a looper Project has. That is, this + function uses the assumption that if results_folder rather than project + is provided, the structure of the file tree rooted at results_folder is + such that any flag files to be found are not directly within rootdir but + are directly within on of its first layer of subfolders. + flags (Iterable[str] | str): Collection of flag names or single flag + name for which to fetch files. + + Returns: + Mapping[str, list[str]]: Collection of filepaths associated with + particular flag for samples within the given project. + + Raises: + TypeError: If neither or both of project and rootdir are given. """ if not (prj or results_folder) or (prj and results_folder): @@ -94,14 +98,17 @@ def fetch_flag_files(prj=None, results_folder="", flags=FLAGS): def fetch_sample_flags(prj, sample, pl_name, flag_dir=None): - """ - Find any flag files present for a sample associated with a project + """Find any flag files present for a sample associated with a project. - :param looper.Project prj: project of interest - :param peppy.Sample sample: sample object of interest - :param str pl_name: name of the pipeline for which flag(s) should be found - :return Iterable[str]: collection of flag file path(s) associated with the - given sample for the given project + Args: + prj (looper.Project): Project of interest. + sample (peppy.Sample): Sample object of interest. + pl_name (str): Name of the pipeline for which flag(s) should be found. + flag_dir: Flag directory path. + + Returns: + Iterable[str]: Collection of flag file path(s) associated with the + given sample for the given project. """ sfolder = flag_dir or sample_folder(prj=prj, sample=sample) if not os.path.isdir(sfolder): @@ -122,9 +129,14 @@ def fetch_sample_flags(prj, sample, pl_name, flag_dir=None): def get_sample_status(sample, flags): - """ - get a sample status + """Get a sample status. + Args: + sample: Sample identifier. + flags: Collection of flag file paths. + + Returns: + str or None: Status string if found, None otherwise. """ statuses = [] @@ -145,8 +157,7 @@ def get_sample_status(sample, flags): def grab_project_data(prj): - """ - From the given Project, grab Sample-independent data. + """From the given Project, grab Sample-independent data. There are some aspects of a Project of which it's beneficial for a Sample to be aware, particularly for post-hoc analysis. Since Sample objects @@ -155,8 +166,11 @@ def grab_project_data(prj): so for each Sample knowledge of Project data is limited. This method facilitates adoption of that conceptual model. - :param Project prj: Project from which to grab data - :return Mapping: Sample-independent data sections from given Project + Args: + prj (Project): Project from which to grab data. + + Returns: + Mapping: Sample-independent data sections from given Project. """ if not prj: return {} @@ -168,30 +182,36 @@ def grab_project_data(prj): def sample_folder(prj, sample): - """ - Get the path to this Project's root folder for the given Sample. + """Get the path to this Project's root folder for the given Sample. - :param AttributeDict | Project prj: project with which sample is associated - :param Mapping sample: Sample or sample data for which to get root output - folder path. - :return str: this Project's root folder for the given Sample + Args: + prj (AttributeDict | Project): Project with which sample is associated. + sample (Mapping): Sample or sample data for which to get root output + folder path. + + Returns: + str: This Project's root folder for the given Sample. """ return os.path.join(prj.results_folder, sample[prj.sample_table_index]) def get_file_for_project(prj, pipeline_name, appendix=None, directory=None): - """ - Create a path to the file for the current project. - Takes the possibility of amendment being activated at the time + """Create a path to the file for the current project. + + Takes the possibility of amendment being activated at the time. Format of the output path: {output_dir}/{directory}/{p.name}_{pipeline_name}_{active_amendments}_{appendix} - :param looper.Project prj: project object - :param str pipeline_name: name of the pipeline to get the file for - :param str appendix: the appendix of the file to create the path for, - like 'objs_summary.tsv' for objects summary file - :return str: path to the file + Args: + prj (looper.Project): Project object. + pipeline_name (str): Name of the pipeline to get the file for. + appendix (str): The appendix of the file to create the path for, + like 'objs_summary.tsv' for objects summary file. + directory (str): Directory path component. + + Returns: + str: Path to the file. """ fp = os.path.join( prj.output_dir, directory or "", f"{prj[NAME_KEY]}_{pipeline_name}" @@ -203,14 +223,17 @@ def get_file_for_project(prj, pipeline_name, appendix=None, directory=None): def get_file_for_project_old(prj, appendix): - """ - Create a path to the file for the current project. - Takes the possibility of amendment being activated at the time + """Create a path to the file for the current project. + + Takes the possibility of amendment being activated at the time. + + Args: + prj (looper.Project): Project object. + appendix (str): The appendix of the file to create the path for, + like 'objs_summary.tsv' for objects summary file. - :param looper.Project prj: project object - :param str appendix: the appendix of the file to create the path for, - like 'objs_summary.tsv' for objects summary file - :return str: path to the file + Returns: + str: Path to the file. """ fp = os.path.join(prj.output_dir, prj[NAME_KEY]) if hasattr(prj, AMENDMENTS_KEY) and getattr(prj, AMENDMENTS_KEY): @@ -220,18 +243,20 @@ def get_file_for_project_old(prj, appendix): def jinja_render_template_strictly(template, namespaces): - """ - Render a command string in the provided namespaces context. + """Render a command string in the provided namespaces context. Strictly, which means that all the requested attributes must be - available in the namespaces - - :param str template: command template do be filled in with the - variables in the provided namespaces. For example: - "prog.py --name {project.name} --len {sample.len}" - :param Mapping[Mapping[str] namespaces: context for command rendering. - Possible namespaces are: looper, project, sample, pipeline - :return str: rendered command + available in the namespaces. + + Args: + template (str): Command template to be filled in with the + variables in the provided namespaces. For example: + "prog.py --name {project.name} --len {sample.len}". + namespaces (Mapping[Mapping[str]]): Context for command rendering. + Possible namespaces are: looper, project, sample, pipeline. + + Returns: + str: Rendered command. """ def _finfun(x): @@ -260,11 +285,13 @@ def _finfun(x): def read_yaml_file(filepath): - """ - Read a YAML file + """Read a YAML file. - :param str filepath: path to the file to read - :return dict: read data + Args: + filepath (str): Path to the file to read. + + Returns: + dict: Read data. """ data = None if os.path.exists(filepath): @@ -280,18 +307,21 @@ def enrich_args_via_cfg( test_args=None, cli_modifiers=None, ): - """ - Read in a looper dotfile, pep config and set arguments. + """Read in a looper dotfile, pep config and set arguments. Priority order: CLI > dotfile/config > pep_config > parser default - :param subcommand name: the name of the command used - :param argparse.Namespace parser_args: parsed args by the original parser - :param argparse.Namespace aux_parser: parsed args by the argument parser - with defaults suppressed - :param dict test_args: dict of args used for pytesting - :param dict cli_modifiers: dict of args existing if user supplied cli args in looper config file - :return argparse.Namespace: selected argument values + Args: + subcommand_name: The name of the command used. + parser_args (argparse.Namespace): Parsed args by the original parser. + aux_parser (argparse.Namespace): Parsed args by the argument parser + with defaults suppressed. + test_args (dict): Dict of args used for pytesting. + cli_modifiers (dict): Dict of args existing if user supplied cli args + in looper config file. + + Returns: + argparse.Namespace: Selected argument values. """ # Did the user provide arguments in the PEP config? @@ -373,7 +403,8 @@ def set_single_arg(argname, default_source_namespace, result_namespace): def _get_subcommand_args(subcommand_name, parser_args): - """ + """Get the union of values for the subcommand arguments. + Get the union of values for the subcommand arguments from Project.looper, Project.looper.cli. and Project.looper.cli.all. If any are duplicated, the above is the selection priority order. @@ -382,8 +413,11 @@ def _get_subcommand_args(subcommand_name, parser_args): with '_'), which strongly relies on argument parser using default destinations. - :param argparser.Namespace parser_args: argument namespace - :return dict: mapping of argument destinations to their values + Args: + parser_args (argparser.Namespace): Argument namespace. + + Returns: + dict: Mapping of argument destinations to their values. """ args = dict() cfg = peppyProject( @@ -430,8 +464,13 @@ def _get_subcommand_args(subcommand_name, parser_args): def init_generic_pipeline(pipelinepath: Optional[str] = None): - """ - Create generic pipeline interface + """Create generic pipeline interface. + + Args: + pipelinepath (str, optional): Path to pipeline directory. + + Returns: + bool: True if successful. """ console = Console() @@ -541,11 +580,14 @@ def init_generic_pipeline(pipelinepath: Optional[str] = None): def read_looper_dotfile(): - """ - Read looper config file - :return str: path to the config file read from the dotfile - :raise MisconfigurationException: if the dotfile does not consist of the - required key pointing to the PEP + """Read looper config file. + + Returns: + str: Path to the config file read from the dotfile. + + Raises: + MisconfigurationException: If the dotfile does not consist of the + required key pointing to the PEP. """ dot_file_path = dotfile_path(must_exist=True) return read_looper_config_file(looper_config_path=dot_file_path) @@ -559,16 +601,20 @@ def initiate_looper_config( project_pipeline_interfaces: Union[List[str], str] = None, force=False, ): - """ - Initialize looper config file - - :param str looper_config_path: absolute path to the file to initialize - :param str pep_path: path to the PEP to be used in pipeline - :param str output_dir: path to the output directory - :param str|list sample_pipeline_interfaces: path or list of paths to sample pipeline interfaces - :param str|list project_pipeline_interfaces: path or list of paths to project pipeline interfaces - :param bool force: whether the existing file should be overwritten - :return bool: whether the file was initialized + """Initialize looper config file. + + Args: + looper_config_path (str): Absolute path to the file to initialize. + pep_path (str): Path to the PEP to be used in pipeline. + output_dir (str): Path to the output directory. + sample_pipeline_interfaces (str | list): Path or list of paths to + sample pipeline interfaces. + project_pipeline_interfaces (str | list): Path or list of paths to + project pipeline interfaces. + force (bool): Whether the existing file should be overwritten. + + Returns: + bool: Whether the file was initialized. """ console = Console() console.clear() @@ -624,10 +670,10 @@ def initiate_looper_config( def looper_config_tutorial(): - """ - Prompt a user through configuring a .looper.yaml file for a new project. + """Prompt a user through configuring a .looper.yaml file for a new project. - :return bool: whether the file was initialized + Returns: + bool: Whether the file was initialized. """ console = Console() @@ -743,13 +789,18 @@ def looper_config_tutorial(): def determine_pipeline_type(piface_path: str, looper_config_path: str): - """ - Read pipeline interface from disk and determine if it contains "sample_interface", "project_interface" or both + """Read pipeline interface and determine its type. + Read pipeline interface from disk and determine if it contains + "sample_interface", "project_interface" or both. - :param str piface_path: path to pipeline_interface - :param str looper_config_path: path to looper config file - :return Tuple[Union[str,None],Union[str,None]] : (pipeline type, resolved path) or (None, None) + Args: + piface_path (str): Path to pipeline_interface. + looper_config_path (str): Path to looper config file. + + Returns: + Tuple[Union[str, None], Union[str, None]]: (pipeline type, resolved path) + or (None, None). """ if piface_path is None: @@ -788,15 +839,21 @@ def determine_pipeline_type(piface_path: str, looper_config_path: str): def read_looper_config_file(looper_config_path: str) -> dict: - """ + """Read Looper config file. + Read Looper config file which includes: - PEP config (local path or pephub registry path) - looper output dir - looper pipeline interfaces - :param str looper_config_path: path to looper config path - :return dict: looper config file content - :raise MisconfigurationException: incorrect configuration. + Args: + looper_config_path (str): Path to looper config path. + + Returns: + dict: Looper config file content. + + Raises: + MisconfigurationException: Incorrect configuration. """ return_dict = {} @@ -891,16 +948,20 @@ def read_looper_config_file(looper_config_path: str) -> dict: def dotfile_path(directory=os.getcwd(), must_exist=False): - """ - Get the path to the looper dotfile + """Get the path to the looper dotfile. If file existence is forced this function will look for it in - the directory parents + the directory parents. + + Args: + directory (str): Directory path to start the search in. + must_exist (bool): Whether the file must exist. - :param str directory: directory path to start the search in - :param bool must_exist: whether the file must exist - :return str: path to the dotfile - :raise OSError: if the file does not exist + Returns: + str: Path to the dotfile. + + Raises: + OSError: If the file does not exist. """ cur_dir = directory if not must_exist: @@ -919,8 +980,13 @@ def dotfile_path(directory=os.getcwd(), must_exist=False): def is_PEP_file_type(input_string: str) -> bool: - """ - Determines if the provided path is actually a file type that Looper can use for loading PEP + """Determines if the provided path is a file type that Looper can use for loading PEP. + + Args: + input_string (str): Path to check. + + Returns: + bool: True if the path is a valid PEP file type. """ PEP_FILE_TYPES = ["yaml", "csv"] @@ -930,10 +996,13 @@ def is_PEP_file_type(input_string: str) -> bool: def is_pephub_registry_path(input_string: str) -> bool: - """ - Check if input is a registry path to pephub - :param str input_string: path to the PEP (or registry path) - :return bool: True if input is a registry path + """Check if input is a registry path to pephub. + + Args: + input_string (str): Path to the PEP (or registry path). + + Returns: + bool: True if input is a registry path. """ try: registry_path = RegistryPath(**parse_registry_path(input_string)) @@ -997,11 +1066,14 @@ def to_range(self) -> Iterable[int]: @classmethod def from_string(cls, s: str, upper_bound: int) -> "IntRange": - """ - Create an instance from a string, e.g. command-line argument. + """Create an instance from a string, e.g. command-line argument. + + Args: + s (str): The string to parse as an interval. + upper_bound (int): The default upper bound. - :param str s: The string to parse as an interval - :param int upper_bound: the default upper bound + Returns: + IntRange: New instance created from the string. """ if upper_bound < 1: raise NatIntervalException(f"Upper bound must be positive: {upper_bound}") @@ -1035,18 +1107,20 @@ def from_string(cls, s: str, upper_bound: int) -> "IntRange": def desired_samples_range_limited(arg: str, num_samples: int) -> Iterable[int]: - """ - Create a contiguous interval of natural numbers. Used for _positive_ selection of samples. + """Create a contiguous interval of natural numbers for positive selection of samples. Interpret given arg as upper bound (1-based) if it's a single value, but take the minimum of that and the given number of samples. If arg is parseable as a range, use that. - :param str arg: CLI specification of a range of samples to use, or as the greatest - 1-based index of a sample to include - :param int num_samples: what to use as the upper bound on the 1-based index interval - if the given arg isn't a range but rather a single value. - :return: an iterable of 1-based indices into samples to select + Args: + arg (str): CLI specification of a range of samples to use, or as the greatest + 1-based index of a sample to include. + num_samples (int): What to use as the upper bound on the 1-based index interval + if the given arg isn't a range but rather a single value. + + Returns: + Iterable[int]: An iterable of 1-based indices into samples to select. """ try: upper_bound = min(int(arg), num_samples) @@ -1059,13 +1133,15 @@ def desired_samples_range_limited(arg: str, num_samples: int) -> Iterable[int]: def desired_samples_range_skipped(arg: str, num_samples: int) -> Iterable[int]: - """ - Create a contiguous interval of natural numbers. Used for _negative_ selection of samples. + """Create a contiguous interval of natural numbers for negative selection of samples. - :param str arg: CLI specification of a range of samples to use, or as the lowest - 1-based index of a sample to skip - :param int num_samples: highest 1-based index of samples to include - :return: an iterable of 1-based indices into samples to select + Args: + arg (str): CLI specification of a range of samples to use, or as the lowest + 1-based index of a sample to skip. + num_samples (int): Highest 1-based index of samples to include. + + Returns: + Iterable[int]: An iterable of 1-based indices into samples to select. """ try: lower_bound = int(arg) @@ -1082,14 +1158,17 @@ def desired_samples_range_skipped(arg: str, num_samples: int) -> Iterable[int]: def write_submit_script(fp, content, data): - """ - Write a submission script for divvy by populating a template with data. - :param str fp: Path to the file to which to create/write submissions script. - :param str content: Template for submission script, defining keys that - will be filled by given data - :param Mapping data: a "pool" from which values are available to replace - keys in the template - :return str: Path to the submission script + """Write a submission script for divvy by populating a template with data. + + Args: + fp (str): Path to the file to which to create/write submissions script. + content (str): Template for submission script, defining keys that + will be filled by given data. + data (Mapping): A "pool" from which values are available to replace + keys in the template. + + Returns: + str: Path to the submission script. """ for k, v in data.items(): @@ -1117,10 +1196,10 @@ def write_submit_script(fp, content, data): def inspect_looper_config_file(looper_config_dict) -> None: - """ - Inspects looper config by printing it to terminal. - param dict looper_config_dict: dict representing looper_config + """Inspects looper config by printing it to terminal. + Args: + looper_config_dict (dict): Dict representing looper_config. """ # Simply print this to terminal print("LOOPER INSPECT") From 58d1dc591a3bfc499cfa3434872de0cdb62c964e Mon Sep 17 00:00:00 2001 From: Claude Date: Wed, 5 Nov 2025 22:59:35 +0000 Subject: [PATCH 131/163] Convert docstrings to Google style in exceptions.py, const.py, and plugins.py - exceptions.py: 1 docstring converted - const.py: 1 docstring converted - plugins.py: 3 docstrings converted - All RST-style :param/:return annotations updated to Args:/Returns: format --- looper/const.py | 16 ++++++++++------ looper/exceptions.py | 6 +++--- looper/plugins.py | 30 ++++++++++++++++++------------ 3 files changed, 31 insertions(+), 21 deletions(-) diff --git a/looper/const.py b/looper/const.py index bfa51309b..d60d052ef 100644 --- a/looper/const.py +++ b/looper/const.py @@ -108,13 +108,17 @@ def _get_apperance_dict(type, templ=APPEARANCE_BY_FLAG): - """ - Based on the type of the HTML element provided construct the appearence - mapping using the template + """Construct the appearance mapping using the template. + + Based on the type of the HTML element provided construct the appearance + mapping using the template. + + Args: + type (str): Type of HTML element to populate template with. + templ (dict): Appearance template to populate. - :param dict templ: appearance template to populate - :param str type: type of HTML element to populate template with - :return dict: populated appearance template + Returns: + dict: Populated appearance template. """ from copy import deepcopy diff --git a/looper/exceptions.py b/looper/exceptions.py index 62b9e041e..c3fd631be 100644 --- a/looper/exceptions.py +++ b/looper/exceptions.py @@ -90,10 +90,10 @@ class PipelineInterfaceConfigError(LooperError): """Error with PipelineInterface config data during construction.""" def __init__(self, context): - """ - For exception context, provide message or collection of missing sections. + """For exception context, provide message or collection of missing sections. - :param str | Iterable[str] context: + Args: + context (str | Iterable[str]): Message or collection of missing sections. """ if not isinstance(context, str) and isinstance(context, Iterable): context = "Missing section(s): {}".format(", ".join(context)) diff --git a/looper/plugins.py b/looper/plugins.py index d4cc52651..37859db27 100644 --- a/looper/plugins.py +++ b/looper/plugins.py @@ -11,16 +11,18 @@ def write_sample_yaml_prj(namespaces): - """ - Plugin: saves sample representation with project reference to YAML. + """Plugin: saves sample representation with project reference to YAML. This plugin can be parametrized by providing the path value/template in 'pipeline.var_templates.sample_yaml_prj_path'. This needs to be a complete and absolute path to the file where sample YAML representation is to be stored. - :param dict namespaces: variable namespaces dict - :return dict: sample namespace dict + Args: + namespaces (dict): Variable namespaces dict. + + Returns: + dict: Sample namespace dict. """ sample = namespaces["sample"] sample.to_yaml( @@ -68,8 +70,7 @@ def load_template(pipeline): def write_sample_yaml_cwl(namespaces): - """ - Plugin: Produce a cwl-compatible yaml representation of the sample + """Plugin: Produce a cwl-compatible yaml representation of the sample. Also adds the 'cwl_yaml' attribute to sample objects, which points to the file produced. @@ -79,8 +80,11 @@ def write_sample_yaml_cwl(namespaces): absolute path to the file where sample YAML representation is to be stored. - :param dict namespaces: variable namespaces dict - :return dict: updated variable namespaces dict + Args: + namespaces (dict): Variable namespaces dict. + + Returns: + dict: Updated variable namespaces dict. """ from eido import read_schema from ubiquerg import is_url @@ -145,16 +149,18 @@ def _get_schema_source( def write_sample_yaml(namespaces): - """ - Plugin: saves sample representation to YAML. + """Plugin: saves sample representation to YAML. This plugin can be parametrized by providing the path value/template in 'pipeline.var_templates.sample_yaml_path'. This needs to be a complete and absolute path to the file where sample YAML representation is to be stored. - :param dict namespaces: variable namespaces dict - :return dict: sample namespace dict + Args: + namespaces (dict): Variable namespaces dict. + + Returns: + dict: Sample namespace dict. """ sample = namespaces["sample"] sample["sample_yaml_path"] = _get_yaml_path( From 28713ef9deca933e7f2ef8461095b720deaae0b0 Mon Sep 17 00:00:00 2001 From: Claude Date: Wed, 5 Nov 2025 23:01:18 +0000 Subject: [PATCH 132/163] Convert docstrings to Google style in parser_types.py and command_models - parser_types.py: 2 docstrings converted - command_models/arguments.py: 1 docstring converted - command_models/commands.py: 2 docstrings converted - All RST-style annotations updated to Google-style format --- looper/command_models/arguments.py | 18 ++++++++--------- looper/command_models/commands.py | 28 ++++++++++++++++---------- looper/parser_types.py | 32 +++++++++++++++++------------- 3 files changed, 44 insertions(+), 34 deletions(-) diff --git a/looper/command_models/arguments.py b/looper/command_models/arguments.py index 68c329772..04c11d5ca 100644 --- a/looper/command_models/arguments.py +++ b/looper/command_models/arguments.py @@ -11,8 +11,7 @@ class Argument(pydantic.fields.FieldInfo): - """ - CLI argument / flag definition + """CLI argument / flag definition. This class is designed to define CLI arguments or flags. It leverages Pydantic for data validation and serves as a source of truth for multiple @@ -24,13 +23,14 @@ class Argument(pydantic.fields.FieldInfo): so we instead subclass `FieldInfo` directly and validate it in the constructor. - :param str name: argument name, e.g. "ignore-args" - :param Any default: a tuple of the form (type, default_value). If the - default value is `...` (Ellipsis), then the argument is required. - :param str description: argument description, which will appear as the - help text for this argument - :param dict kwargs: additional keyword arguments supported by - `FieldInfo`. These are passed along as they are. + Args: + name (str): Argument name, e.g. "ignore-args". + default (Any): A tuple of the form (type, default_value). If the + default value is `...` (Ellipsis), then the argument is required. + description (str): Argument description, which will appear as the + help text for this argument. + kwargs (dict): Additional keyword arguments supported by + `FieldInfo`. These are passed along as they are. """ def __init__( diff --git a/looper/command_models/commands.py b/looper/command_models/commands.py index 69312f0d6..d88c82328 100644 --- a/looper/command_models/commands.py +++ b/looper/command_models/commands.py @@ -14,12 +14,12 @@ @dataclass class Command: - """ - Representation of a command + """Representation of a command. - :param str name: command name - :param str description: command description - :param list[Argument] arguments: list of arguments supported by this command + Args: + name (str): Command name. + description (str): Command description. + arguments (list[Argument]): List of arguments supported by this command. """ name: str @@ -242,13 +242,19 @@ def create_model(self) -> Type[pydantic.BaseModel]: def add_short_arguments( parser: ArgumentParser, argument_enums: Type[ArgumentEnum] ) -> ArgumentParser: - """ - This function takes a parser object created under pydantic argparse and adds the short arguments AFTER the initial creation. - This is a workaround as pydantic-argparse does not currently support this during initial parser creation. + """Add short arguments to parser after initial creation. + + This function takes a parser object created under pydantic argparse and adds + the short arguments AFTER the initial creation. This is a workaround as + pydantic-argparse does not currently support this during initial parser creation. + + Args: + parser (ArgumentParser): Parser before adding short arguments. + argument_enums (Type[ArgumentEnum]): Enumeration of arguments that contain + names and aliases. - :param ArgumentParser parser: parser before adding short arguments - :param Type[ArgumentEnum] argument_enums: enumeration of arguments that contain names and aliases - :return ArgumentParser parser: parser after short arguments have been added + Returns: + ArgumentParser: Parser after short arguments have been added. """ for cmd in parser._subcommands.choices.keys(): diff --git a/looper/parser_types.py b/looper/parser_types.py index 984049650..97c8b6f99 100644 --- a/looper/parser_types.py +++ b/looper/parser_types.py @@ -29,14 +29,16 @@ def fun(x=None, caravel_data=caravel_data, caravel=caravel): def html_checkbox(caravel=False, checked=False): - """ - Create argument for type parameter on argparse.ArgumentParser.add_argument. + """Create argument for type parameter on argparse.ArgumentParser.add_argument. + + Args: + caravel (bool): Whether this is being used in the caravel context. + checked (bool): Whether to add a particular key-value entry to a + collection used by caravel. - :param bool caravel: whether this is being used in the caravel context - :param bool checked: whether to add a particular key-value entry to a - collection used by caravel - :return callable: argument to the type parameter of an - argparse.ArgumentParser's add_argument method. + Returns: + callable: Argument to the type parameter of an + argparse.ArgumentParser's add_argument method. """ caravel_data = YAMLConfigManager({"element_type": "checkbox", "element_args": {}}) if checked: @@ -49,14 +51,16 @@ def fun(x=None, caravel_data=caravel_data, caravel=caravel): def html_select(choices, caravel=False): - """ - Create argument for type parameter on argparse.ArgumentParser.add_argument. + """Create argument for type parameter on argparse.ArgumentParser.add_argument. + + Args: + choices (list[object]): Collection of valid argument provisions via + to a particular CLI option. + caravel (bool): Whether this is being used in the caravel context. - :param list[object] choices: collection of valid argument provisions via - to a particular CLI option - :param bool caravel: whether this is being used in the caravel context - :return callable: argument to the type parameter of an - argparse.ArgumentParser's add_argument method. + Returns: + callable: Argument to the type parameter of an + argparse.ArgumentParser's add_argument method. """ if not isinstance(choices, list): raise TypeError( From b843f3952275907677f8af81958c33367b98e3e5 Mon Sep 17 00:00:00 2001 From: Claude Date: Wed, 5 Nov 2025 23:11:31 +0000 Subject: [PATCH 133/163] Convert docstrings to Google style in remaining core files - divvy.py: 16 docstrings converted - looper.py: 11 docstrings converted - pipeline_interface.py: 10 docstrings converted - project.py: 24 docstrings converted - processed_project.py: 5 docstrings converted - cli_pydantic.py: 1 docstring converted - cli_divvy.py: 1 docstring converted All RST-style docstrings (:param/:return/:raise) have been converted to Google-style format (Args:/Returns:/Raises:) with proper formatting and capitalization. --- looper/cli_divvy.py | 6 +- looper/cli_pydantic.py | 21 +-- looper/divvy.py | 149 ++++++++++--------- looper/looper.py | 94 ++++++------ looper/pipeline_interface.py | 116 ++++++++------- looper/processed_project.py | 88 +++++++----- looper/project.py | 267 ++++++++++++++++++----------------- 7 files changed, 395 insertions(+), 346 deletions(-) diff --git a/looper/cli_divvy.py b/looper/cli_divvy.py index 1fa98b69e..0305e40e8 100644 --- a/looper/cli_divvy.py +++ b/looper/cli_divvy.py @@ -12,10 +12,10 @@ def build_argparser(): - """ - Builds argument parser. + """Builds argument parser. - :return argparse.ArgumentParser + Returns: + argparse.ArgumentParser: The argument parser. """ banner = ( diff --git a/looper/cli_pydantic.py b/looper/cli_pydantic.py index fb06b687a..37452a26d 100644 --- a/looper/cli_pydantic.py +++ b/looper/cli_pydantic.py @@ -380,18 +380,23 @@ def main_cli() -> None: def _proc_resources_spec(args): - """ - Process CLI-sources compute setting specification. There are two sources - of compute settings in the CLI alone: + """Process CLI-sources compute setting specification. + + There are two sources of compute settings in the CLI alone: * YAML file (--settings argument) * itemized compute settings (--compute argument) - The itemized compute specification is given priority + The itemized compute specification is given priority. + + Args: + args (argparse.Namespace): Arguments namespace. + + Returns: + Mapping[str, str]: Binding between resource setting name and value. - :param argparse.Namespace: arguments namespace - :return Mapping[str, str]: binding between resource setting name and value - :raise ValueError: if interpretation of the given specification as encoding - of key-value pairs fails + Raises: + ValueError: If interpretation of the given specification as encoding + of key-value pairs fails. """ spec = getattr(args, "compute", None) settings = args.settings diff --git a/looper/divvy.py b/looper/divvy.py index 84e66ed71..031cae69c 100644 --- a/looper/divvy.py +++ b/looper/divvy.py @@ -26,8 +26,7 @@ class ComputingConfiguration(YAMLConfigManager): - """ - Represents computing configuration objects. + """Represents computing configuration objects. The ComputingConfiguration class provides a computing configuration object that is an *in memory* representation of a `divvy` computing configuration @@ -35,10 +34,11 @@ class ComputingConfiguration(YAMLConfigManager): and retrieve computing configuration files, and use these values to populate job submission script templates. - :param str | Iterable[(str, object)] | Mapping[str, object] entries: config - Collection of key-value pairs. - :param str filepath: YAML file specifying computing package data. (the - `DIVCFG` file) + Args: + entries (str | Iterable[(str, object)] | Mapping[str, object]): Config + collection of key-value pairs. + filepath (str): YAML file specifying computing package data (the + `DIVCFG` file). """ def __init__( @@ -75,20 +75,20 @@ def write(self, filename=None): @property def compute_env_var(self): - """ - Environment variable through which to access compute settings. + """Environment variable through which to access compute settings. - :return list[str]: names of candidate environment variables, for which - value may be path to compute settings file; first found is used. + Returns: + list[str]: Names of candidate environment variables, for which + value may be path to compute settings file; first found is used. """ return COMPUTE_SETTINGS_VARNAME @property def default_config_file(self): - """ - Path to default compute environment settings file. + """Path to default compute environment settings file. - :return str: Path to default compute settings file + Returns: + str: Path to default compute settings file. """ return DEFAULT_CONFIG_FILEPATH @@ -96,20 +96,20 @@ def default_config_file(self): # it will get treated as a PathExAttMap treats all properties, which # is that it will turn any double-slashes into single slashes. def template(self): - """ - Get the currently active submission template. + """Get the currently active submission template. - :return str: submission script content template for current state + Returns: + str: Submission script content template for current state. """ with open(self.compute["submission_template"], "r") as f: return f.read() @property def templates_folder(self): - """ - Path to folder with default submission templates. + """Path to folder with default submission templates. - :return str: path to folder with default submission templates + Returns: + str: Path to folder with default submission templates. """ if self.filepath: return os.path.join(os.path.dirname(self.filepath), "divvy_templates") @@ -119,16 +119,18 @@ def templates_folder(self): ) def activate_package(self, package_name): - """ - Activates a compute package. + """Activates a compute package. This copies the computing attributes from the configuration file into the `compute` attribute, where the class stores current compute settings. - :param str package_name: name for non-resource compute bundle, - the name of a subsection in an environment configuration file - :return bool: success flag for attempt to establish compute settings + Args: + package_name (str): Name for non-resource compute bundle, + the name of a subsection in an environment configuration file. + + Returns: + bool: Success flag for attempt to establish compute settings. """ # Hope that environment & environment compute are present. @@ -193,20 +195,22 @@ def activate_package(self, package_name): return False def clean_start(self, package_name): - """ - Clear current active settings and then activate the given package. + """Clear current active settings and then activate the given package. + + Args: + package_name (str): Name of the resource package to activate. - :param str package_name: name of the resource package to activate - :return bool: success flag + Returns: + bool: Success flag. """ self.reset_active_settings() return self.activate_package(package_name) def get_active_package(self) -> YAMLConfigManager: - """ - Returns settings for the currently active compute package + """Returns settings for the currently active compute package. - :return YAMLConfigManager: data defining the active compute package + Returns: + YAMLConfigManager: Data defining the active compute package. """ return self.compute @@ -216,46 +220,46 @@ def compute_packages(self): return self["compute_packages"] def list_compute_packages(self): - """ - Returns a list of available compute packages. + """Returns a list of available compute packages. - :return set[str]: names of available compute packages + Returns: + set[str]: Names of available compute packages. """ return set(self["compute_packages"].keys()) def reset_active_settings(self): - """ - Clear out current compute settings. + """Clear out current compute settings. - :return bool: success flag + Returns: + bool: Success flag. """ self.compute = YAMLConfigManager() return True def update_packages(self, config_file): - """ - Parse data from divvy configuration file. + """Parse data from divvy configuration file. Given a divvy configuration file, this function will update (not overwrite) existing compute packages with existing values. It does not affect any currently active settings. - :param str config_file: path to file with new divvy configuration data + Args: + config_file (str): Path to file with new divvy configuration data. """ entries = load_yaml(config_file) self.update(entries) return True def get_adapters(self) -> YAMLConfigManager: - """ - Get current adapters, if defined. + """Get current adapters, if defined. Adapters are sourced from the 'adapters' section in the root of the divvy configuration file and updated with an active compute package-specific set of adapters, if any defined in 'adapters' section under currently active compute package. - :return YAMLConfigManager: current adapters mapping + Returns: + YAMLConfigManager: Current adapters mapping. """ adapters = YAMLConfigManager() if "adapters" in self and self["adapters"] is not None: @@ -284,26 +288,31 @@ def submit(self, output_path, extra_vars=None): os.system(submission_command) def write_script(self, output_path, extra_vars=None): - """ - Given currently active settings, populate the active template to write a - submission script. Additionally use the current adapters to adjust - the select of the provided variables - - :param str output_path: Path to file to write as submission script - :param Iterable[Mapping] extra_vars: A list of Dict objects with - key-value pairs with which to populate template fields. These will - override any values in the currently active compute package. - :return str: Path to the submission script file + """Given currently active settings, populate the active template to write a submission script. + + Additionally use the current adapters to adjust the select of the + provided variables. + + Args: + output_path (str): Path to file to write as submission script. + extra_vars (Iterable[Mapping]): A list of Dict objects with + key-value pairs with which to populate template fields. These will + override any values in the currently active compute package. + + Returns: + str: Path to the submission script file. """ def _get_from_dict(map, attrs): - """ - Get value from a possibly mapping using a list of its attributes + """Get value from a possibly mapping using a list of its attributes. + + Args: + map (collections.Mapping): Mapping to retrieve values from. + attrs (Iterable[str]): A list of attributes. - :param collections.Mapping map: mapping to retrieve values from - :param Iterable[str] attrs: a list of attributes - :return: value found in the the requested attribute or - None if one of the keys does not exist + Returns: + Value found in the the requested attribute or None if one of the + keys does not exist. """ for a in attrs: try: @@ -372,16 +381,18 @@ def _handle_missing_env_attrs(self, config_file, when_missing): def select_divvy_config(filepath): - """ - Selects the divvy config file path to load. + """Selects the divvy config file path to load. This uses a priority ordering to first choose a config file path if it's given, but if not, then look in a priority list of environment variables and choose the first available file path to return. If none of these options succeed, the default config path will be returned. - :param str | NoneType filepath: direct file path specification - :return str: path to the config file to read + Args: + filepath (str | NoneType): Direct file path specification. + + Returns: + str: Path to the config file to read. """ divcfg = select_config( config_filepath=filepath, @@ -395,13 +406,13 @@ def select_divvy_config(filepath): def divvy_init(config_path, template_config_path): - """ - Initialize a genome config file. + """Initialize a genome config file. - :param str config_path: path to divvy configuration file to - create/initialize - :param str template_config_path: path to divvy configuration file to - copy FROM + Args: + config_path (str): Path to divvy configuration file to + create/initialize. + template_config_path (str): Path to divvy configuration file to + copy FROM. """ if not config_path: _LOGGER.error("You must specify a file path to initialize.") diff --git a/looper/looper.py b/looper/looper.py index fa0fd1249..de1015539 100755 --- a/looper/looper.py +++ b/looper/looper.py @@ -74,10 +74,10 @@ class Executor(object): __metaclass__ = abc.ABCMeta def __init__(self, prj): - """ - The Project defines the instance; establish an iteration counter. + """The Project defines the instance; establish an iteration counter. - :param Project prj: Project with which to work/operate on + Args: + prj (Project): Project with which to work/operate on. """ super(Executor, self).__init__() self.prj = prj @@ -91,10 +91,10 @@ def __call__(self, *args, **kwargs): class Checker(Executor): def __call__(self, args): - """ - Check Project status, using pipestat. + """Check Project status, using pipestat. - :param argparse.Namespace: arguments provided to the command + Args: + args (argparse.Namespace): Arguments provided to the command. """ # aggregate pipeline status data @@ -201,11 +201,11 @@ class Cleaner(Executor): """Remove all intermediate files (defined by pypiper clean scripts).""" def __call__(self, args, preview_flag=True): - """ - Execute the file cleaning process. + """Execute the file cleaning process. - :param argparse.Namespace args: command-line options and arguments - :param bool preview_flag: whether to halt before actually removing files + Args: + args (argparse.Namespace): Command-line options and arguments. + preview_flag (bool): Whether to halt before actually removing files. """ self.counter.show(name=self.prj.name, type="project") for sample in self.prj.samples: @@ -262,11 +262,11 @@ class Destroyer(Executor): """Destroyer of files and folders associated with Project's Samples""" def __call__(self, args, preview_flag=True): - """ - Completely remove all output produced by any pipelines. + """Completely remove all output produced by any pipelines. - :param argparse.Namespace args: command-line options and arguments - :param bool preview_flag: whether to halt before actually removing files + Args: + args (argparse.Namespace): Command-line options and arguments. + preview_flag (bool): Whether to halt before actually removing files. """ use_pipestat = ( @@ -328,21 +328,19 @@ class Collator(Executor): """Submitter for project-level pipelines""" def __init__(self, prj): - """ - Initializes an instance + """Initializes an instance. - :param Project prj: Project with which to work/operate on + Args: + prj (Project): Project with which to work/operate on. """ super(Executor, self).__init__() self.prj = prj def __call__(self, args, **compute_kwargs): - """ - Matches collators by protocols, creates submission scripts - and submits them + """Matches collators by protocols, creates submission scripts and submits them. - :param argparse.Namespace args: parsed command-line options and - arguments, recognized by looper + Args: + args (argparse.Namespace): Parsed command-line options and arguments, recognized by looper. """ jobs = 0 self.debug = {} @@ -389,15 +387,12 @@ class Runner(Executor): """The true submitter of pipelines""" def __call__(self, args, top_level_args=None, rerun=False, **compute_kwargs): - """ - Do the Sample submission. - - :param argparse.Namespace args: parsed command-line options and - arguments, recognized by looper - :param list remaining_args: command-line options and arguments not - recognized by looper, germane to samples/pipelines - :param bool rerun: whether the given sample is being rerun rather than - run for the first time + """Do the Sample submission. + + Args: + args (argparse.Namespace): Parsed command-line options and arguments, recognized by looper. + remaining_args (list): Command-line options and arguments not recognized by looper, germane to samples/pipelines. + rerun (bool): Whether the given sample is being rerun rather than run for the first time. """ self.debug = {} # initialize empty dict for return values max_cmds = sum(list(map(len, self.prj._samples_by_interface.values()))) @@ -649,9 +644,10 @@ def __call__(self, args): class Tabulator(Executor): - """Project/Sample statistics and table output generator + """Project/Sample statistics and table output generator. - :return list[str|any] results: list containing output file paths of stats and objects + Returns: + list[str|any]: List containing output file paths of stats and objects. """ def __call__(self, args): @@ -684,13 +680,11 @@ def _create_failure_message(reason, samples): def _remove_or_dry_run(paths, dry_run=False): - """ - Remove file or directory or just inform what would be removed in - case of dry run + """Remove file or directory or just inform what would be removed in case of dry run. - :param list|str paths: list of paths to files/dirs to be removed - :param bool dry_run: logical indicating whether the files should remain - untouched and message printed + Args: + paths (list|str): List of paths to files/dirs to be removed. + dry_run (bool): Logical indicating whether the files should remain untouched and message printed. """ paths = paths if isinstance(paths, list) else [paths] for path in paths: @@ -770,11 +764,10 @@ def destroy_summary(prj, dry_run=False, project_level=False): class LooperCounter(object): - """ - Count samples as you loop through them, and create text for the - subcommand logging status messages. + """Count samples as you loop through them, and create text for the subcommand logging status messages. - :param int total: number of jobs to process + Args: + total (int): Number of jobs to process. """ def __init__(self, total): @@ -782,17 +775,18 @@ def __init__(self, total): self.total = total def show(self, name, type="sample", pipeline_name=None): - """ - Display sample counts status for a particular protocol type. + """Display sample counts status for a particular protocol type. The counts are running vs. total for the protocol within the Project, and as a side-effect of the call, the running count is incremented. - :param str name: name of the sample - :param str type: the name of the level of entity being displayed, - either project or sample - :param str pipeline_name: name of the pipeline - :return str: message suitable for logging a status update + Args: + name (str): Name of the sample. + type (str): The name of the level of entity being displayed, either project or sample. + pipeline_name (str): Name of the pipeline. + + Returns: + str: Message suitable for logging a status update. """ self.count += 1 return _submission_status_text( diff --git a/looper/pipeline_interface.py b/looper/pipeline_interface.py index ee76f790c..bf023961a 100644 --- a/looper/pipeline_interface.py +++ b/looper/pipeline_interface.py @@ -40,15 +40,15 @@ @peputil.copy class PipelineInterface(YAMLConfigManager): """ - This class parses, holds, and returns information for a yaml file that - specifies how to interact with each individual pipeline. This - includes both resources to request for cluster job submission, as well as - arguments to be passed from the sample annotation metadata to the pipeline - - :param str | Mapping config: path to file from which to parse - configuration data, or pre-parsed configuration data. - :param str pipeline_type: type of the pipeline, - must be either 'sample' or 'project'. + This class parses, holds, and returns information for a yaml file that specifies how to interact with each individual pipeline. + + This includes both resources to request for cluster job submission, as well as + arguments to be passed from the sample annotation metadata to the pipeline. + + Args: + config (str | Mapping): Path to file from which to parse configuration data, + or pre-parsed configuration data. + pipeline_type (str): Type of the pipeline, must be either 'sample' or 'project'. """ def __init__(self, config, pipeline_type=None): @@ -78,7 +78,8 @@ def render_var_templates(self, namespaces): """ Render path templates under 'var_templates' in this pipeline interface. - :param dict namespaces: namespaces to use for rendering + Args: + namespaces (dict): Namespaces to use for rendering. """ try: curr_data = self[VAR_TEMPL_KEY] @@ -99,8 +100,11 @@ def get_pipeline_schemas(self, schema_key=INPUT_SCHEMA_KEY): """ Get path to the pipeline schema. - :param str schema_key: where to look for schemas in the pipeline iface - :return str: absolute path to the pipeline schema file + Args: + schema_key (str): Where to look for schemas in the pipeline iface. + + Returns: + str: Absolute path to the pipeline schema file. """ schema_source = None if schema_key in self: @@ -119,15 +123,19 @@ def choose_resource_package(self, namespaces, file_size): """ Select resource bundle for given input file size to given pipeline. - :param float file_size: Size of input data (in gigabytes). - :param Mapping[Mapping[str]] namespaces: namespaced variables to pass - as a context for fluid attributes command rendering - :return MutableMapping: resource bundle appropriate for given pipeline, - for given input file size - :raises ValueError: if indicated file size is negative, or if the - file size value specified for any resource package is negative - :raises InvalidResourceSpecificationException: if no default - resource package specification is provided + Args: + file_size (float): Size of input data (in gigabytes). + namespaces (Mapping[Mapping[str]]): Namespaced variables to pass as a context + for fluid attributes command rendering. + + Returns: + MutableMapping: Resource bundle appropriate for given pipeline, for given input file size. + + Raises: + ValueError: If indicated file size is negative, or if the file size value + specified for any resource package is negative. + InvalidResourceSpecificationException: If no default resource package + specification is provided. """ def _file_size_ante(name, data): @@ -157,12 +165,13 @@ def _notify(msg): def _load_dynamic_vars(pipeline): """ - Render command string (jinja2 template), execute it in a subprocess - and return its result (JSON object) as a dict + Render command string (jinja2 template), execute it in a subprocess and return its result (JSON object) as a dict. - :param Mapping pipeline: pipeline dict - :return Mapping: a dict with attributes returned in the JSON - by called command + Args: + pipeline (Mapping): Pipeline dict. + + Returns: + Mapping: A dict with attributes returned in the JSON by called command. """ def _log_raise_latest(): @@ -209,11 +218,14 @@ def _log_raise_latest(): def _load_size_dep_vars(piface): """ - Read the resources from a TSV provided in the pipeline interface + Read the resources from a TSV provided in the pipeline interface. + + Args: + piface (looper.PipelineInterface): Currently processed piface. + section (str): Section of pipeline interface to process. - :param looper.PipelineInterface piface: currently processed piface - :param str section: section of pipeline interface to process - :return pandas.DataFrame: resources + Returns: + pandas.DataFrame: Resources. """ df = None if COMPUTE_KEY in piface and SIZE_DEP_VARS_KEY in piface[COMPUTE_KEY]: @@ -291,20 +303,23 @@ def _load_size_dep_vars(piface): def _expand_paths(self, keys): """ - Expand paths defined in the pipeline interface file + Expand paths defined in the pipeline interface file. - :param list keys: list of keys resembling the nested structure to get - to the pipeline interface attributre to expand + Args: + keys (list): List of keys resembling the nested structure to get to the + pipeline interface attribute to expand. """ def _get_from_dict(map, attrs): """ - Get value from a possibly nested mapping using a list of its attributes + Get value from a possibly nested mapping using a list of its attributes. - :param collections.Mapping map: mapping to retrieve values from - :param Iterable[str] attrs: a list of attributes - :return: value found in the the requested attribute or - None if one of the keys does not exist + Args: + map (collections.Mapping): Mapping to retrieve values from. + attrs (Iterable[str]): A list of attributes. + + Returns: + Value found in the requested attribute or None if one of the keys does not exist. """ for a in attrs: try: @@ -315,13 +330,15 @@ def _get_from_dict(map, attrs): def _set_in_dict(map, attrs, val): """ - Set value in a mapping, creating a possibly nested structure + Set value in a mapping, creating a possibly nested structure. + + Args: + map (collections.Mapping): Mapping to retrieve values from. + attrs (Iterable[str]): A list of attributes. + val: Value to set. - :param collections.Mapping map: mapping to retrieve values from - :param Iterable[str] attrs: a list of attributes - :param val: value to set - :return: value found in the the requested attribute or - None if one of the keys does not exist + Returns: + Value found in the requested attribute or None if one of the keys does not exist. """ for a in attrs: if a == attrs[-1]: @@ -355,12 +372,13 @@ def _set_in_dict(map, attrs, val): def _validate(self, schema_src, exclude_case=False, flavor="generic"): """ - Generic function to validate the object against a schema + Generic function to validate the object against a schema. - :param str schema_src: schema source to validate against, URL or path - :param bool exclude_case: whether to exclude validated objects - from the error. Useful when used ith large projects - :param str flavor: type of the pipeline schema to use + Args: + schema_src (str): Schema source to validate against, URL or path. + exclude_case (bool): Whether to exclude validated objects from the error. + Useful when used with large projects. + flavor (str): Type of the pipeline schema to use. """ schema_source = schema_src.format(flavor) for schema in read_schema(schema_source): diff --git a/looper/processed_project.py b/looper/processed_project.py index 9222dadcc..16cc07750 100644 --- a/looper/processed_project.py +++ b/looper/processed_project.py @@ -140,26 +140,31 @@ def _get_path_sect_keys(mapping, keys=[PATH_KEY]): - """ - Get names of subsections in a mapping that contain collection of keys + """Get names of subsections in a mapping that contain collection of keys. + + Args: + mapping (Mapping): Schema subsection to search for paths. + keys (Iterable[str]): Collection of keys to check for. - :param Mapping mapping: schema subsection to search for paths - :param Iterable[str] keys: collection of keys to check for - :return Iterable[str]: collection of keys to path-like sections + Returns: + Iterable[str]: Collection of keys to path-like sections. """ return [k for k, v in mapping.items() if bool(set(keys) & set(mapping[k]))] def _populate_paths(object, schema, check_exist): - """ - Populate path-like object attributes with other object attributes - based on a defined template, e.g. '/Users/x/test_{name}/{genome}_file.txt' - - :param Mapping object: object with attributes to populate path template with - :param dict schema: schema with path attributes defined, e.g. - output of read_schema function - :param bool check_exist: whether the paths should be check for existence - :return Mapping: object with path templates populated + """Populate path-like object attributes with other object attributes. + + Based on a defined template, e.g. '/Users/x/test_{name}/{genome}_file.txt' + + Args: + object (Mapping): Object with attributes to populate path template with. + schema (dict): Schema with path attributes defined, e.g. + output of read_schema function. + check_exist (bool): Whether the paths should be check for existence. + + Returns: + Mapping: Object with path templates populated. """ if PROP_KEY not in schema: raise EidoSchemaInvalidError("Schema is missing properties section.") @@ -189,15 +194,18 @@ def _populate_paths(object, schema, check_exist): def populate_sample_paths(sample, schema, check_exist=False): - """ - Populate path-like Sample attributes with other object attributes - based on a defined template, e.g. '/Users/x/test_{name}/{genome}_file.txt' - - :param peppy.Sample sample: sample to populate paths in - :param Iterable[dict] schema: schema with path attributes defined, e.g. - output of read_schema function - :param bool check_exist: whether the paths should be check for existence - :return Mapping: Sample with path templates populated + """Populate path-like Sample attributes with other object attributes. + + Based on a defined template, e.g. '/Users/x/test_{name}/{genome}_file.txt' + + Args: + sample (peppy.Sample): Sample to populate paths in. + schema (Iterable[dict]): Schema with path attributes defined, e.g. + output of read_schema function. + check_exist (bool): Whether the paths should be check for existence. + + Returns: + Mapping: Sample with path templates populated. """ if not isinstance(sample, Sample): raise TypeError("Can only populate paths in peppy.Sample objects") @@ -207,15 +215,18 @@ def populate_sample_paths(sample, schema, check_exist=False): def populate_project_paths(project, schema, check_exist=False): - """ - Populate path-like Project attributes with other object attributes - based on a defined template, e.g. '/Users/x/test_{name}/{genome}_file.txt' - - :param peppy.Project project: project to populate paths in - :param dict schema: schema with path attributes defined, e.g. - output of read_schema function - :param bool check_exist: whether the paths should be check for existence - :return Mapping: Project with path templates populated + """Populate path-like Project attributes with other object attributes. + + Based on a defined template, e.g. '/Users/x/test_{name}/{genome}_file.txt' + + Args: + project (peppy.Project): Project to populate paths in. + schema (dict): Schema with path attributes defined, e.g. + output of read_schema function. + check_exist (bool): Whether the paths should be check for existence. + + Returns: + Mapping: Project with path templates populated. """ if not isinstance(project, Project): raise TypeError("Can only populate paths in peppy.Project objects") @@ -223,13 +234,14 @@ def populate_project_paths(project, schema, check_exist=False): def get_project_outputs(project, schema): - """ - Get project level outputs with path-like attributes populated with - project attributes + """Get project level outputs with path-like attributes populated with project attributes. + + Args: + project (peppy.Project): Project to get outputs for. + schema (Iterable[dict]): Schema to source the outputs from. - :param peppy.Project project: - :param Iterable[dict] schema: - :return yacman.YAMLConfigManager: mapping with populated path-like attributes + Returns: + yacman.YAMLConfigManager: Mapping with populated path-like attributes. """ from yacman import YAMLConfigManager diff --git a/looper/project.py b/looper/project.py index 18fb1a4bd..9a854370e 100644 --- a/looper/project.py +++ b/looper/project.py @@ -123,18 +123,18 @@ def __exit__(self, *args): class Project(peppyProject): - """ - Looper-specific Project. - - :param str cfg: path to configuration file with data from - which Project is to be built - :param Iterable[str] amendments: name indicating amendment to use, optional - :param str divcfg_path: path to an environment configuration YAML file - specifying compute settings. - :param bool permissive: Whether a error should be thrown if - a sample input file(s) do not exist or cannot be open. - :param str compute_env_file: Environment configuration YAML file specifying - compute settings. + """Looper-specific Project. + + Args: + cfg (str): Path to configuration file with data from which Project is + to be built. + amendments (Iterable[str]): Name indicating amendment to use, optional. + divcfg_path (str): Path to an environment configuration YAML file + specifying compute settings. + permissive (bool): Whether a error should be thrown if a sample input + file(s) do not exist or cannot be open. + compute_env_file (str): Environment configuration YAML file specifying + compute settings. """ def __init__(self, cfg=None, amendments=None, divcfg_path=None, **kwargs): @@ -184,28 +184,28 @@ def __init__(self, cfg=None, amendments=None, divcfg_path=None, **kwargs): @property def piface_key(self): - """ - Name of the pipeline interface attribute for this project + """Name of the pipeline interface attribute for this project. - :return str: name of the pipeline interface attribute + Returns: + str: Name of the pipeline interface attribute. """ return self._extra_cli_or_cfg(PIFACE_KEY_SELECTOR) or PIPELINE_INTERFACES_KEY @property def selected_compute_package(self): - """ - Compute package name specified in object constructor + """Compute package name specified in object constructor. - :return str: compute package name + Returns: + str: Compute package name. """ return self._extra_cli_or_cfg(COMPUTE_PACKAGE_KEY) @property def cli_pifaces(self): - """ - Collection of pipeline interface sources specified in object constructor + """Collection of pipeline interface sources specified in object constructor. - :return list[str]: collection of pipeline interface sources + Returns: + list[str]: Collection of pipeline interface sources. """ x = self._extra_cli_or_cfg(self.piface_key) return ( @@ -216,22 +216,22 @@ def cli_pifaces(self): @property def output_dir(self): - """ - Output directory for the project, specified in object constructor + """Output directory for the project, specified in object constructor. - :return str: path to the output directory + Returns: + str: Path to the output directory. """ return self._extra_cli_or_cfg(OUTDIR_KEY, strict=True) def _extra_cli_or_cfg(self, attr_name, strict=False): - """ - Get attribute value provided in kwargs in object constructor of from - looper section in the configuration file + """Get attribute value provided in kwargs in object constructor of from looper section in the configuration file. + + Args: + attr_name (str): Name of the attribute to get value for. + strict (bool): Whether a non-existent attribute is exceptional. - :param str attr_name: name of the attribute to get value for - :param bool strict: whether a non-existent attribute is exceptional - :raise MisconfigurationException: in strict mode, when no attribute - found + Raises: + MisconfigurationException: In strict mode, when no attribute found. """ try: result = self[EXTRA_KEY][attr_name] @@ -257,31 +257,34 @@ def _extra_cli_or_cfg(self, attr_name, strict=False): @property def results_folder(self): - """ - Path to the results folder for the project + """Path to the results folder for the project. - :return str: path to the results folder in the output folder + Returns: + str: Path to the results folder in the output folder. """ return self._out_subdir_path(RESULTS_SUBDIR_KEY, default="results_pipeline") @property def submission_folder(self): - """ - Path to the submission folder for the project + """Path to the submission folder for the project. - :return str: path to the submission in the output folder + Returns: + str: Path to the submission in the output folder. """ return self._out_subdir_path(SUBMISSION_SUBDIR_KEY, default="submission") def _out_subdir_path(self, key: str, default: str) -> str: - """ - Create a system path relative to the project output directory. + """Create a system path relative to the project output directory. + The values for the names of the subdirectories are sourced from kwargs passed to the object constructor. - :param str key: name of the attribute mapped to the value of interest - :param str default: if key not specified, a default to use - :return str: path to the folder + Args: + key (str): Name of the attribute mapped to the value of interest. + default (str): If key not specified, a default to use. + + Returns: + str: Path to the folder. """ parent = getattr(self, OUTDIR_KEY) child = getattr(self[EXTRA_KEY], key, default) or default @@ -307,11 +310,12 @@ def make_project_dirs(self): @cached_property def project_pipeline_interface_sources(self): - """ - Get a list of all valid project-level pipeline interface sources - associated with this project. Sources that are file paths are expanded + """Get a list of all valid project-level pipeline interface sources associated with this project. - :return list[str]: collection of valid pipeline interface sources: + Sources that are file paths are expanded. + + Returns: + list[str]: Collection of valid pipeline interface sources. """ return ( [self._resolve_path_with_cfg(src) for src in self.cli_pifaces] @@ -321,15 +325,14 @@ def project_pipeline_interface_sources(self): @cached_property def project_pipeline_interfaces(self): - """ - Flat list of all valid project-level interface objects associated - with this Project + """Flat list of all valid project-level interface objects associated with this Project. Note that only valid pipeline interfaces will show up in the result (ones that exist on disk/remotely and validate successfully - against the schema) + against the schema). - :return list[looper.PipelineInterface]: list of pipeline interfaces + Returns: + list[looper.PipelineInterface]: List of pipeline interfaces. """ return [ PipelineInterface(pi, pipeline_type=PipelineLevel.PROJECT.value) @@ -338,59 +341,62 @@ def project_pipeline_interfaces(self): @cached_property def pipeline_interfaces(self): - """ - Flat list of all valid interface objects associated with this Project + """Flat list of all valid interface objects associated with this Project. Note that only valid pipeline interfaces will show up in the result (ones that exist on disk/remotely and validate successfully - against the schema) + against the schema). - :return list[looper.PipelineInterface]: list of pipeline interfaces + Returns: + list[looper.PipelineInterface]: List of pipeline interfaces. """ return [pi for ifaces in self._interfaces_by_sample.values() for pi in ifaces] @cached_property def pipeline_interface_sources(self): - """ - Get a list of all valid pipeline interface sources associated - with this project. Sources that are file paths are expanded + """Get a list of all valid pipeline interface sources associated with this project. - :return list[str]: collection of valid pipeline interface sources + Sources that are file paths are expanded. + + Returns: + list[str]: Collection of valid pipeline interface sources. """ return self._samples_by_interface.keys() @cached_property def pipestat_configured(self): - """ - Whether pipestat configuration is complete for all sample pipelines + """Whether pipestat configuration is complete for all sample pipelines. - :return bool: whether pipestat configuration is complete + Returns: + bool: Whether pipestat configuration is complete. """ return self._check_if_pipestat_configured() @cached_property def pipestat_configured_project(self): - """ - Whether pipestat configuration is complete for all project pipelines + """Whether pipestat configuration is complete for all project pipelines. - :return bool: whether pipestat configuration is complete + Returns: + bool: Whether pipestat configuration is complete. """ return self._check_if_pipestat_configured( pipeline_type=PipelineLevel.PROJECT.value ) def get_sample_piface(self, sample_name): - """ - Get a list of pipeline interfaces associated with the specified sample. + """Get a list of pipeline interfaces associated with the specified sample. Note that only valid pipeline interfaces will show up in the result (ones that exist on disk/remotely and validate successfully - against the schema) + against the schema). - :param str sample_name: name of the sample to retrieve list of - pipeline interfaces for - :return list[looper.PipelineInterface]: collection of valid - pipeline interfaces associated with selected sample + Args: + sample_name (str): Name of the sample to retrieve list of pipeline + interfaces for. + + Returns: + list[looper.PipelineInterface]: Collection of valid pipeline + interfaces associated with selected sample. """ try: return self._interfaces_by_sample[sample_name] @@ -399,13 +405,14 @@ def get_sample_piface(self, sample_name): @staticmethod def get_schemas(pifaces, schema_key=INPUT_SCHEMA_KEY): - """ - Get the list of unique schema paths for a list of pipeline interfaces + """Get the list of unique schema paths for a list of pipeline interfaces. - :param str | Iterable[str] pifaces: pipeline interfaces to search - schemas for - :param str schema_key: where to look for schemas in the piface - :return Iterable[str]: unique list of schema file paths + Args: + pifaces (str | Iterable[str]): Pipeline interfaces to search schemas for. + schema_key (str): Where to look for schemas in the piface. + + Returns: + Iterable[str]: Unique list of schema file paths. """ if isinstance(pifaces, str): pifaces = [pifaces] @@ -625,16 +632,16 @@ def populate_pipeline_outputs(self): populate_project_paths(self, read_schema(schema)[0]) def _get_linked_pifaces(self): - """ - Get linked sample pipeline interfaces by project pipeline interface. + """Get linked sample pipeline interfaces by project pipeline interface. These are indicated in project pipeline interface by 'linked_pipeline_interfaces' key. If a project pipeline interface - does not have such key defined, an empty list is returned for that - pipeline interface. + does not have such key defined, an empty list is returned for that + pipeline interface. - :return dict[list[str]]: mapping of sample pipeline interfaces - by project pipeline interfaces + Returns: + dict[list[str]]: Mapping of sample pipeline interfaces by project + pipeline interfaces. """ def _process_linked_piface(p, piface, prj_piface): @@ -662,11 +669,11 @@ def _process_linked_piface(p, piface, prj_piface): return linked_pifaces def _piface_by_samples(self): - """ - Create a mapping of all defined interfaces in this Project by samples. + """Create a mapping of all defined interfaces in this Project by samples. - :return dict[str, list[PipelineInterface]]: a collection of pipeline - interfaces keyed by sample name + Returns: + dict[str, list[PipelineInterface]]: A collection of pipeline + interfaces keyed by sample name. """ pifaces_by_sample = {} for source, sample_names in self._samples_by_interface.items(): @@ -680,20 +687,22 @@ def _piface_by_samples(self): return pifaces_by_sample def _omit_from_repr(self, k, cls): - """ - Exclude the interfaces from representation. + """Exclude the interfaces from representation. - :param str k: key of item to consider for omission - :param type cls: placeholder to comply with superclass signature + Args: + k (str): Key of item to consider for omission. + cls (type): Placeholder to comply with superclass signature. """ return super(Project, self)._omit_from_repr(k, cls) or k == "interfaces" def _resolve_path_with_cfg(self, pth): - """ - Expand provided path and make it absolute using project config path + """Expand provided path and make it absolute using project config path. + + Args: + pth (str): Path, possibly including env vars and/or relative. - :param str pth: path, possibly including env vars and/or relative - :return str: absolute path + Returns: + str: Absolute path. """ if pth is None: return @@ -704,13 +713,13 @@ def _resolve_path_with_cfg(self, pth): return pth def _samples_by_piface(self, piface_key): - """ - Create a collection of all samples with valid pipeline interfaces + """Create a collection of all samples with valid pipeline interfaces. + + Args: + piface_key (str): Name of the attribute that holds pipeline interfaces. - :param str piface_key: name of the attribute that holds pipeline - interfaces - :return list[str]: a collection of samples keyed by pipeline interface - source + Returns: + list[str]: A collection of samples keyed by pipeline interface source. """ samples_by_piface = {} msgs = set() @@ -746,10 +755,10 @@ def _samples_by_piface(self, piface_key): return samples_by_piface def set_sample_piface(self, sample_piface: Union[List[str], str]) -> NoReturn: - """ - Add sample pipeline interfaces variable to object + """Add sample pipeline interfaces variable to object. - :param list | str sample_piface: sample pipeline interface + Args: + sample_piface (list | str): Sample pipeline interface. """ self.config.setdefault("sample_modifiers", {}) self.config["sample_modifiers"].setdefault("append", {}) @@ -766,37 +775,37 @@ def fetch_samples( selector_flag=None, exclusion_flag=None, ): - """ - Collect samples of particular protocol(s). + """Collect samples of particular protocol(s). Protocols can't be both positively selected for and negatively selected against. That is, it makes no sense and is not allowed to specify both selector_include and selector_exclude protocols. On the - other hand, if - neither is provided, all of the Project's Samples are returned. + other hand, if neither is provided, all of the Project's Samples are returned. If selector_include is specified, Samples without a protocol will be - excluded, - but if selector_exclude is specified, protocol-less Samples will be + excluded, but if selector_exclude is specified, protocol-less Samples will be included. - :param Project prj: the Project with Samples to fetch - :param str selector_attribute: name of attribute on which to base the - fetch - :param Iterable[str] | str selector_include: protocol(s) of interest; - if specified, a Sample must - :param Iterable[str] | str selector_exclude: protocol(s) to include - :param Iterable[str] | str selector_flag: flag to select on, e.g. FAILED, COMPLETED - :param Iterable[str] | str exclusion_flag: flag to exclude on, e.g. FAILED, COMPLETED - :return list[Sample]: Collection of this Project's samples with - protocol that either matches one of those in selector_include, - or either - lacks a protocol or does not match one of those in selector_exclude - :raise TypeError: if both selector_include and selector_exclude - protocols are - specified; TypeError since it's basically providing two arguments - when only one is accepted, so remain consistent with vanilla - Python2; - also possible if name of attribute for selection isn't a string + Args: + prj (Project): The Project with Samples to fetch. + selector_attribute (str): Name of attribute on which to base the fetch. + selector_include (Iterable[str] | str): Protocol(s) of interest; if + specified, a Sample must. + selector_exclude (Iterable[str] | str): Protocol(s) to include. + selector_flag (Iterable[str] | str): Flag to select on, e.g. FAILED, + COMPLETED. + exclusion_flag (Iterable[str] | str): Flag to exclude on, e.g. FAILED, + COMPLETED. + + Returns: + list[Sample]: Collection of this Project's samples with protocol that + either matches one of those in selector_include, or either lacks a + protocol or does not match one of those in selector_exclude. + + Raises: + TypeError: If both selector_include and selector_exclude protocols are + specified; TypeError since it's basically providing two arguments + when only one is accepted, so remain consistent with vanilla Python2; + also possible if name of attribute for selection isn't a string. """ kept_samples = prj.samples From 90a2e6234cc850942f125cd7a67392c130fcfad7 Mon Sep 17 00:00:00 2001 From: Nathan Sheffield Date: Wed, 5 Nov 2025 20:01:12 -0500 Subject: [PATCH 134/163] Update looper/utils.py Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com> --- looper/utils.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/looper/utils.py b/looper/utils.py index 9cc57ef61..e119947db 100644 --- a/looper/utils.py +++ b/looper/utils.py @@ -56,7 +56,7 @@ def fetch_flag_files(prj=None, results_folder="", flags=FLAGS): function uses the assumption that if results_folder rather than project is provided, the structure of the file tree rooted at results_folder is such that any flag files to be found are not directly within rootdir but - are directly within on of its first layer of subfolders. + are directly within one of its first layer of subfolders. flags (Iterable[str] | str): Collection of flag names or single flag name for which to fetch files. From e518d64b66f76643ebd45479e8ba9ad6fa610ee3 Mon Sep 17 00:00:00 2001 From: Nathan Sheffield Date: Wed, 5 Nov 2025 20:01:39 -0500 Subject: [PATCH 135/163] Update looper/project.py Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com> --- looper/project.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/looper/project.py b/looper/project.py index 9a854370e..60c476c14 100644 --- a/looper/project.py +++ b/looper/project.py @@ -224,7 +224,7 @@ def output_dir(self): return self._extra_cli_or_cfg(OUTDIR_KEY, strict=True) def _extra_cli_or_cfg(self, attr_name, strict=False): - """Get attribute value provided in kwargs in object constructor of from looper section in the configuration file. + """Get attribute value provided in kwargs in object constructor or from looper section in the configuration file. Args: attr_name (str): Name of the attribute to get value for. From cf7f2f1c6b1d9f6aedddf42dbc37dfd68ea3f96e Mon Sep 17 00:00:00 2001 From: nsheff Date: Wed, 5 Nov 2025 20:03:45 -0500 Subject: [PATCH 136/163] clarify docstring --- looper/conductor.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/looper/conductor.py b/looper/conductor.py index cf4388451..260c77585 100644 --- a/looper/conductor.py +++ b/looper/conductor.py @@ -484,7 +484,7 @@ def _is_full(self, pool, size): Instances of this class maintain a sort of 'pool' of commands that expands as each new command is added, until a time that it's deemed - 'full' and th + 'full'. :return bool: Whether this conductor's pool of commands is 'full' and ready for submission, as determined by its parameterization From 7faf2d6446eb3de69457bf8f53cd591d8f3237f6 Mon Sep 17 00:00:00 2001 From: nsheff Date: Tue, 10 Feb 2026 12:09:43 -0500 Subject: [PATCH 137/163] prep for yacman update --- looper/divvy.py | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/looper/divvy.py b/looper/divvy.py index 031cae69c..894356fcf 100644 --- a/looper/divvy.py +++ b/looper/divvy.py @@ -46,12 +46,8 @@ def __init__( entries=None, wait_max=None, strict_ro_locks=False, - schema_source=None, - validate_on_write=False, ): - super().__init__( - entries, wait_max, strict_ro_locks, schema_source, validate_on_write - ) + super().__init__(entries, wait_max, strict_ro_locks) if "compute_packages" not in self: self["compute_packages"] = {} From eebe47bd381ad3307108df09d9d0040704d8f202 Mon Sep 17 00:00:00 2001 From: nsheff Date: Tue, 10 Feb 2026 21:56:45 -0500 Subject: [PATCH 138/163] prep for yacman 0.9.5 --- looper/conductor.py | 2 +- looper/divvy.py | 5 ++--- looper/parser_types.py | 2 +- looper/pipeline_interface.py | 2 +- requirements/requirements-all.txt | 2 +- setup.cfg | 2 +- 6 files changed, 7 insertions(+), 8 deletions(-) diff --git a/looper/conductor.py b/looper/conductor.py index 20fb71605..fcc49beab 100644 --- a/looper/conductor.py +++ b/looper/conductor.py @@ -23,7 +23,7 @@ from pipestat import PipestatError from ubiquerg import expandpath from yaml import dump -from yacman import FutureYAMLConfigManager as YAMLConfigManager +from yacman import YAMLConfigManager from .const import ( EXTRA_PROJECT_CMD_TEMPLATE, diff --git a/looper/divvy.py b/looper/divvy.py index 894356fcf..5b4898891 100644 --- a/looper/divvy.py +++ b/looper/divvy.py @@ -6,8 +6,7 @@ from shutil import copytree -from yacman import FutureYAMLConfigManager as YAMLConfigManager -from yacman import write_lock, FILEPATH_KEY, load_yaml, select_config +from yacman import YAMLConfigManager, write_lock, load_yaml, select_config from .const import ( @@ -61,7 +60,7 @@ def write(self, filename=None): with write_lock(self) as locked_ym: locked_ym.rebase() locked_ym.write() - filename = filename or getattr(self, FILEPATH_KEY) + filename = filename or self.filepath filedir = os.path.dirname(filename) # For this object, we *also* have to write the template files for pkg_name, pkg in self["compute_packages"].items(): diff --git a/looper/parser_types.py b/looper/parser_types.py index 97c8b6f99..d7a201906 100644 --- a/looper/parser_types.py +++ b/looper/parser_types.py @@ -42,7 +42,7 @@ def html_checkbox(caravel=False, checked=False): """ caravel_data = YAMLConfigManager({"element_type": "checkbox", "element_args": {}}) if checked: - caravel_data.add_entries({"element_args": {"checked": True}}) + caravel_data.update({"element_args": {"checked": True}}) def fun(x=None, caravel_data=caravel_data, caravel=caravel): return caravel_data if caravel else eval(x) diff --git a/looper/pipeline_interface.py b/looper/pipeline_interface.py index bf023961a..50323ac1b 100644 --- a/looper/pipeline_interface.py +++ b/looper/pipeline_interface.py @@ -344,7 +344,7 @@ def _set_in_dict(map, attrs, val): if a == attrs[-1]: map[a] = val break - map.setdefault(a, PXAM()) + map.setdefault(a, {}) map = map[a] raw_path = _get_from_dict(self, keys) diff --git a/requirements/requirements-all.txt b/requirements/requirements-all.txt index 88af67d8d..cf60cd24f 100644 --- a/requirements/requirements-all.txt +++ b/requirements/requirements-all.txt @@ -9,6 +9,6 @@ peppy>=0.40.6 pyyaml>=3.12 rich>=9.10.0 ubiquerg>=0.8.1 -yacman==0.9.3 +yacman>=0.9.5 pydantic-argparse>=0.9.0 psutil \ No newline at end of file diff --git a/setup.cfg b/setup.cfg index ec734d815..898fa9895 100644 --- a/setup.cfg +++ b/setup.cfg @@ -1,6 +1,6 @@ [aliases] test = pytest -[pytest] +[tool:pytest] # Only request extra info from failures and errors. addopts = -rfE From f8b66ace5081596a56393aaffaf663a6109000c3 Mon Sep 17 00:00:00 2001 From: nsheff Date: Tue, 10 Feb 2026 22:31:56 -0500 Subject: [PATCH 139/163] python modernization update --- .github/workflows/black.yml | 13 +- .github/workflows/python-publish.yml | 32 ++--- .github/workflows/run-pytest.yml | 29 ++-- .pre-commit-config.yaml | 19 +-- MANIFEST.in | 9 -- looper/__init__.py | 15 +- looper/__main__.py | 1 - looper/_version.py | 2 - looper/cli_divvy.py | 10 +- looper/cli_pydantic.py | 49 +++---- looper/command_models/arguments.py | 9 +- looper/command_models/commands.py | 15 +- looper/conductor.py | 107 +++++++------- looper/const.py | 6 +- looper/divvy.py | 45 +++--- looper/exceptions.py | 25 ++-- looper/looper.py | 99 +++++++------ looper/parser_types.py | 12 +- looper/pipeline_interface.py | 30 ++-- looper/plugins.py | 14 +- looper/processed_project.py | 21 ++- looper/project.py | 132 ++++++++++-------- looper/utils.py | 112 ++++++++------- pyproject.toml | 83 +++++++++++ requirements/requirements-all.txt | 14 -- requirements/requirements-doc.txt | 9 -- requirements/requirements-test.txt | 7 - setup.cfg | 6 - setup.py | 95 ------------- tests/conftest.py | 6 +- .../pipeline/count_lines_plot.py | 3 +- .../pipestat_example/pipeline/count_lines.py | 2 +- .../pipeline/count_lines_plot.py | 5 +- .../pipeline_pipestat/count_lines.py | 2 +- tests/divvytests/conftest.py | 8 +- tests/divvytests/divvy_tests/test_divvy.py | 3 +- tests/divvytests/helpers.py | 2 +- .../regression/test_write_script.py | 4 +- tests/divvytests/test_divvy_simple.py | 5 +- tests/smoketests/test_cli_validation.py | 9 +- tests/smoketests/test_other.py | 9 +- tests/smoketests/test_run.py | 6 +- tests/test_clean.py | 4 +- tests/test_comprehensive.py | 14 +- tests/test_desired_sample_range.py | 3 +- tests/test_natural_range.py | 11 +- 46 files changed, 534 insertions(+), 582 deletions(-) delete mode 100644 MANIFEST.in delete mode 100644 looper/_version.py create mode 100644 pyproject.toml delete mode 100644 requirements/requirements-all.txt delete mode 100644 requirements/requirements-doc.txt delete mode 100644 requirements/requirements-test.txt delete mode 100644 setup.cfg delete mode 100644 setup.py diff --git a/.github/workflows/black.yml b/.github/workflows/black.yml index aec1766ed..7a5062633 100644 --- a/.github/workflows/black.yml +++ b/.github/workflows/black.yml @@ -1,14 +1,15 @@ name: Lint -on: [pull_request] +on: [push, pull_request] jobs: lint: runs-on: ubuntu-latest steps: - - uses: actions/checkout@v2 - - uses: actions/setup-python@v2 - - uses: psf/black@stable + - uses: actions/checkout@v4 + - uses: actions/setup-python@v5 with: - options: "--check --diff --color --verbose" - jupyter: true + python-version: "3.12" + - run: pip install ruff + - run: ruff check . + - run: ruff format --check . diff --git a/.github/workflows/python-publish.yml b/.github/workflows/python-publish.yml index 365a8b1b1..a637870e8 100644 --- a/.github/workflows/python-publish.yml +++ b/.github/workflows/python-publish.yml @@ -1,6 +1,3 @@ -# This workflows will upload a Python Package using Twine when a release is created -# For more information see: https://help.github.com/en/actions/language-and-framework-guides/using-python-with-github-actions#publishing-to-package-registries - name: Upload Python Package on: @@ -12,19 +9,18 @@ jobs: runs-on: ubuntu-latest name: upload release to PyPI permissions: - id-token: write # IMPORTANT: this permission is mandatory for trusted publishing + contents: read + id-token: write + steps: - - uses: actions/checkout@v4 - - name: Set up Python - uses: actions/setup-python@v5 - with: - python-version: "3.x" - - name: Install dependencies - run: | - python -m pip install --upgrade pip - pip install setuptools wheel twine - - name: Build and publish - run: | - python setup.py sdist bdist_wheel - - name: Publish package distributions to PyPI - uses: pypa/gh-action-pypi-publish@release/v1 + - uses: actions/checkout@v4 + - name: Set up Python + uses: actions/setup-python@v5 + with: + python-version: '3.x' + - name: Install build dependencies + run: python -m pip install --upgrade pip build + - name: Build package + run: python -m build + - name: Publish package distributions to PyPI + uses: pypa/gh-action-pypi-publish@release/v1 diff --git a/.github/workflows/run-pytest.yml b/.github/workflows/run-pytest.yml index ceeed0d5c..91d8774fc 100644 --- a/.github/workflows/run-pytest.yml +++ b/.github/workflows/run-pytest.yml @@ -1,30 +1,29 @@ name: Run pytests on: - pull_request: + push: branches: [master, dev] + pull_request: + branches: [master] jobs: pytest: runs-on: ${{ matrix.os }} strategy: matrix: - python-version: ["3.8", "3.13"] - os: [ubuntu-latest] + python-version: ["3.10", "3.14"] + os: [ubuntu-latest, macos-latest] steps: - - uses: actions/checkout@v2 - - - name: Set up Python ${{ matrix.python-version }} - uses: actions/setup-python@v2 - with: - python-version: ${{ matrix.python-version }} + - uses: actions/checkout@v4 - - name: Install test dependencies - run: if [ -f requirements/requirements-test.txt ]; then pip install -r requirements/requirements-test.txt; fi + - name: Set up Python ${{ matrix.python-version }} + uses: actions/setup-python@v5 + with: + python-version: ${{ matrix.python-version }} - - name: Install package - run: python -m pip install . + - name: Install package with test dependencies + run: python -m pip install ".[test]" - - name: Run pytest tests - run: pytest tests -x -vv --remote-data + - name: Run pytest tests + run: pytest tests diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index c91376149..a136b1232 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -1,20 +1,15 @@ repos: - repo: https://github.com/pre-commit/pre-commit-hooks - rev: v4.0.1 + rev: v5.0.0 hooks: - id: trailing-whitespace - id: check-yaml - id: end-of-file-fixer - - id: requirements-txt-fixer - - id: trailing-whitespace - - - repo: https://github.com/PyCQA/isort - rev: 5.9.1 - hooks: - - id: isort - args: ["--profile", "black"] + - id: check-ast - - repo: https://github.com/psf/black - rev: 21.6b0 + - repo: https://github.com/astral-sh/ruff-pre-commit + rev: v0.15.0 hooks: - - id: black + - id: ruff + args: [--fix] + - id: ruff-format diff --git a/MANIFEST.in b/MANIFEST.in deleted file mode 100644 index 15473d351..000000000 --- a/MANIFEST.in +++ /dev/null @@ -1,9 +0,0 @@ -include requirements/* -include README.md -include logo_looper.svg -include looper/jinja_templates/* -include looper/default_config/* -include looper/default_config/divvy_templates/* -include looper/jinja_templates_old/* -include looper/schemas/* -include looper/command_models/* diff --git a/looper/__init__.py b/looper/__init__.py index 0be3b8cb8..19a05ad25 100644 --- a/looper/__init__.py +++ b/looper/__init__.py @@ -11,22 +11,27 @@ logmuse.init_logger("looper") -from .divvy import ComputingConfiguration, select_divvy_config -from .divvy import DEFAULT_COMPUTE_RESOURCES_NAME +from importlib.metadata import version + +from .divvy import ( + DEFAULT_COMPUTE_RESOURCES_NAME, + ComputingConfiguration, + select_divvy_config, +) from .divvy import NEW_COMPUTE_KEY as COMPUTE_KEY -from ._version import __version__ +__version__ = version("looper") from .conductor import ( SubmissionConductor, write_submission_yaml, ) +from .pipeline_interface import PipelineInterface from .plugins import ( + write_custom_template, write_sample_yaml, write_sample_yaml_cwl, write_sample_yaml_prj, - write_custom_template, ) -from .pipeline_interface import PipelineInterface from .project import Project # Not used here, but make this the main import interface between peppy and diff --git a/looper/__main__.py b/looper/__main__.py index 3e9816554..ff3d95f85 100644 --- a/looper/__main__.py +++ b/looper/__main__.py @@ -1,7 +1,6 @@ import sys from .cli_pydantic import main -from .cli_divvy import main as divvy_main if __name__ == "__main__": try: diff --git a/looper/_version.py b/looper/_version.py deleted file mode 100644 index c9ded3fc2..000000000 --- a/looper/_version.py +++ /dev/null @@ -1,2 +0,0 @@ -__version__ = "2.0.3" -# You must change the version in parser = pydantic_argparse.ArgumentParser in cli_pydantic.py!!! diff --git a/looper/cli_divvy.py b/looper/cli_divvy.py index 0305e40e8..33b67947a 100644 --- a/looper/cli_divvy.py +++ b/looper/cli_divvy.py @@ -1,14 +1,16 @@ -import logmuse import os import sys + +import logmuse import yaml +from ubiquerg import VersionInHelpParser, is_writable from yaml import SafeLoader -from ubiquerg import is_writable, VersionInHelpParser + from .const import ( DEFAULT_COMPUTE_RESOURCES_NAME, DEFAULT_CONFIG_FILEPATH, ) -from .divvy import select_divvy_config, ComputingConfiguration, divvy_init +from .divvy import ComputingConfiguration, divvy_init, select_divvy_config def build_argparser(): @@ -99,7 +101,7 @@ def add_subparser(cmd, description): return parser -def main(): +def main() -> None: """Primary workflow for divvy CLI""" parser = logmuse.add_logging_options(build_argparser()) diff --git a/looper/cli_pydantic.py b/looper/cli_pydantic.py index 37452a26d..7d7037dfc 100644 --- a/looper/cli_pydantic.py +++ b/looper/cli_pydantic.py @@ -12,11 +12,6 @@ It is well possible that this script will be removed again. """ -# Note: The following import is used for forward annotations (Python 3.8) -# to prevent potential 'TypeError' related to the use of the '|' operator -# with types. -from __future__ import annotations - import os import sys @@ -26,11 +21,10 @@ from eido import inspect_project from pephubclient import PEPHubClient from pydantic_argparse.argparse.parser import ArgumentParser +from rich.console import Console from . import __version__ - from .command_models.arguments import ArgumentEnum - from .command_models.commands import ( SUPPORTED_COMMANDS, TopLevelParser, @@ -40,11 +34,11 @@ CLI_KEY, CLI_PROJ_ATTRS, EXAMPLE_COMPUTE_SPEC_FMT, - PipelineLevel, PROJECT_PL_ARG, SAMPLE_EXCLUSION_OPTNAME, SAMPLE_INCLUSION_OPTNAME, SAMPLE_PL_ARG, + PipelineLevel, ) from .divvy import DEFAULT_COMPUTE_RESOURCES_NAME, select_divvy_config from .exceptions import ( @@ -66,27 +60,24 @@ from .utils import ( dotfile_path, enrich_args_via_cfg, - is_pephub_registry_path, - read_looper_config_file, - read_looper_dotfile, - initiate_looper_config, init_generic_pipeline, - read_yaml_file, + initiate_looper_config, inspect_looper_config_file, is_PEP_file_type, + is_pephub_registry_path, looper_config_tutorial, + read_looper_config_file, + read_looper_dotfile, + read_yaml_file, ) -from typing import List, Tuple -from rich.console import Console - -def opt_attr_pair(name: str) -> Tuple[str, str]: +def opt_attr_pair(name: str) -> tuple[str, str]: """Takes argument as attribute and returns as tuple of top-level or subcommand used.""" return f"--{name}", name.replace("-", "_") -def validate_post_parse(args: argparse.Namespace) -> List[str]: +def validate_post_parse(args) -> list[str]: """Checks if user is attempting to use mutually exclusive options.""" problems = [] used_exclusives = [ @@ -144,10 +135,9 @@ def run_looper(args: TopLevelParser, parser: ArgumentParser, test_args=None): sys.exit(1) if subcommand_name == "init": - console = Console() console.clear() - console.rule(f"\n[magenta]Looper initialization[/magenta]") + console.rule("\n[magenta]Looper initialization[/magenta]") selection = subcommand_args.generic if selection is True: console.clear() @@ -252,18 +242,19 @@ def run_looper(args: TopLevelParser, parser: ArgumentParser, test_args=None): ) else: raise MisconfigurationException( - f"`sample_pipeline_interface` is missing. Provide it in the parameters." + "`sample_pipeline_interface` is missing. Provide it in the parameters." ) else: raise MisconfigurationException( - f"Cannot load PEP. Check file path or registry path to pep." + "Cannot load PEP. Check file path or registry path to pep." ) selected_compute_pkg = p.selected_compute_package or DEFAULT_COMPUTE_RESOURCES_NAME if p.dcc is not None and not p.dcc.activate_package(selected_compute_pkg): _LOGGER.info( - "Failed to activate '{}' computing package. " - "Using the default one".format(selected_compute_pkg) + "Failed to activate '{}' computing package. Using the default one".format( + selected_compute_pkg + ) ) with ProjectContext( @@ -274,7 +265,6 @@ def run_looper(args: TopLevelParser, parser: ArgumentParser, test_args=None): selector_flag=subcommand_args.sel_flag, exclusion_flag=subcommand_args.exc_flag, ) as prj: - # Check at the beginning if user wants to use pipestat and pipestat is configurable is_pipestat_configured = ( prj._check_if_pipestat_configured(pipeline_type=PipelineLevel.PROJECT.value) @@ -362,7 +352,7 @@ def main(test_args=None) -> dict: prog="looper", description="Looper: A job submitter for Portable Encapsulated Projects", add_help=True, - version="2.0.3", + version=__version__, ) parser = add_short_arguments(parser, ArgumentEnum) @@ -379,7 +369,7 @@ def main_cli() -> None: main() -def _proc_resources_spec(args): +def _proc_resources_spec(args) -> dict[str, str]: """Process CLI-sources compute setting specification. There are two sources of compute settings in the CLI alone: @@ -404,8 +394,9 @@ def _proc_resources_spec(args): settings_data = read_yaml_file(settings) or {} except yaml.YAMLError: _LOGGER.warning( - "Settings file ({}) does not follow YAML format," - " disregarding".format(settings) + "Settings file ({}) does not follow YAML format, disregarding".format( + settings + ) ) settings_data = {} if not spec: diff --git a/looper/command_models/arguments.py b/looper/command_models/arguments.py index 04c11d5ca..a821a1c84 100644 --- a/looper/command_models/arguments.py +++ b/looper/command_models/arguments.py @@ -34,7 +34,12 @@ class Argument(pydantic.fields.FieldInfo): """ def __init__( - self, name: str, default: Any, description: str, alias: str = None, **kwargs + self, + name: str, + default: Any, + description: str, + alias: str | None = None, + **kwargs, ) -> None: self._name = name super().__init__( @@ -43,7 +48,7 @@ def __init__( self._validate() @property - def name(self): + def name(self) -> str: """ Argument name as used in the CLI, e.g. "ignore-args" """ diff --git a/looper/command_models/commands.py b/looper/command_models/commands.py index d88c82328..176df38a0 100644 --- a/looper/command_models/commands.py +++ b/looper/command_models/commands.py @@ -3,13 +3,13 @@ """ from dataclasses import dataclass -from typing import List, Optional, Type, Union +from typing import Optional import pydantic.v1 as pydantic +from pydantic_argparse import ArgumentParser from ..const import MESSAGE_BY_SUBCOMMAND from .arguments import Argument, ArgumentEnum -from pydantic_argparse import ArgumentParser @dataclass @@ -24,9 +24,9 @@ class Command: name: str description: str - arguments: List[Argument] + arguments: list[Argument] - def create_model(self) -> Type[pydantic.BaseModel]: + def create_model(self) -> type[pydantic.BaseModel]: """ Creates a `pydantic` model for this command """ @@ -240,7 +240,7 @@ def create_model(self) -> Type[pydantic.BaseModel]: def add_short_arguments( - parser: ArgumentParser, argument_enums: Type[ArgumentEnum] + parser: ArgumentParser, argument_enums: type[ArgumentEnum] ) -> ArgumentParser: """Add short arguments to parser after initial creation. @@ -258,13 +258,12 @@ def add_short_arguments( """ for cmd in parser._subcommands.choices.keys(): - for argument_enum in list(argument_enums): # First check there is an alias for the argument otherwise skip if argument_enum.value.alias: short_key = argument_enum.value.alias - long_key = "--" + argument_enum.value.name.replace( - "_", "-" + long_key = ( + "--" + argument_enum.value.name.replace("_", "-") ) # We must do this because the ArgumentEnum names are transformed during parser creation if long_key in parser._subcommands.choices[cmd]._option_string_actions: argument = parser._subcommands.choices[cmd]._option_string_actions[ diff --git a/looper/conductor.py b/looper/conductor.py index fcc49beab..da0228e1f 100644 --- a/looper/conductor.py +++ b/looper/conductor.py @@ -3,27 +3,25 @@ import importlib import logging import os -import subprocess import signal -import psutil +import subprocess import sys import time -import yaml -from math import ceil from json import loads +from math import ceil from subprocess import check_output -from typing import Optional -from eido import read_schema, get_input_files_size +import psutil +import yaml +from eido import get_input_files_size, read_schema from eido.const import INPUT_FILE_SIZE_KEY, MISSING_KEY from jinja2.exceptions import UndefinedError - from peppy.const import CONFIG_KEY, SAMPLE_YAML_EXT from peppy.exceptions import RemoteYAMLError from pipestat import PipestatError from ubiquerg import expandpath -from yaml import dump from yacman import YAMLConfigManager +from yaml import dump from .const import ( EXTRA_PROJECT_CMD_TEMPLATE, @@ -32,7 +30,6 @@ NOT_SUB_MSG, OUTDIR_KEY, OUTPUT_SCHEMA_KEY, - PipelineLevel, PRE_SUBMIT_CMD_KEY, PRE_SUBMIT_HOOK_KEY, PRE_SUBMIT_PY_FUN_KEY, @@ -42,20 +39,25 @@ SAMPLE_PL_KEY, SUBMISSION_SUBDIR_KEY, VAR_TEMPL_KEY, + PipelineLevel, ) from .exceptions import JobSubmissionException from .processed_project import populate_sample_paths from .utils import ( + expand_nested_var_templates, fetch_sample_flags, jinja_render_template_strictly, - expand_nested_var_templates, ) - _LOGGER = logging.getLogger(__name__) -def _get_yaml_path(namespaces, template_key, default_name_appendix="", filename=None): +def _get_yaml_path( + namespaces: dict, + template_key: str, + default_name_appendix: str = "", + filename: str | None = None, +) -> str: """Get a path to a YAML file for the sample. Args: @@ -108,7 +110,9 @@ def _get_yaml_path(namespaces, template_key, default_name_appendix="", filename= return final_path -def write_pipestat_config(looper_pipestat_config_path, pipestat_config_dict): +def write_pipestat_config( + looper_pipestat_config_path: str, pipestat_config_dict: dict +) -> bool: """Write a combined configuration file to be passed to a PipestatManager. Args: @@ -136,7 +140,7 @@ def write_pipestat_config(looper_pipestat_config_path, pipestat_config_dict): return True -def write_submission_yaml(namespaces): +def write_submission_yaml(namespaces: dict) -> dict: """Save all namespaces to YAML. Args: @@ -170,17 +174,17 @@ def __init__( self, pipeline_interface, prj, - delay=0, - extra_args=None, - extra_args_override=None, - ignore_flags=False, - compute_variables=None, - max_cmds=None, - max_size=None, - max_jobs=None, - automatic=True, - collate=False, - ): + delay: float = 0, + extra_args: str | None = None, + extra_args_override: str | None = None, + ignore_flags: bool = False, + compute_variables: dict | None = None, + max_cmds: int | None = None, + max_size: int | float | None = None, + max_jobs: int | float | None = None, + automatic: bool = True, + collate: bool = False, + ) -> None: """Create a job submission manager. The most critical inputs are the pipeline interface and the pipeline @@ -249,8 +253,9 @@ def __init__( if self.extra_pipe_args: _LOGGER.debug( - "String appended to every pipeline command: " - "{}".format(self.extra_pipe_args) + "String appended to every pipeline command: {}".format( + self.extra_pipe_args + ) ) if max_jobs: @@ -283,11 +288,11 @@ def __init__( self._skipped_sample_pools = [] @property - def failed_samples(self): + def failed_samples(self) -> list[str]: return self._failed_sample_names @property - def num_cmd_submissions(self): + def num_cmd_submissions(self) -> int: """Return the number of commands that this conductor has submitted. Returns: @@ -296,7 +301,7 @@ def num_cmd_submissions(self): return self._num_cmds_submitted @property - def num_job_submissions(self): + def num_job_submissions(self) -> int: """Return the number of jobs that this conductor has submitted. Returns: @@ -304,7 +309,7 @@ def num_job_submissions(self): """ return self._num_good_job_submissions - def is_project_submittable(self, force=False): + def is_project_submittable(self, force: bool = False) -> bool: """Check whether the current project has been already submitted. Args: @@ -325,7 +330,7 @@ def is_project_submittable(self, force=False): return False return True - def add_sample(self, sample, rerun=False): + def add_sample(self, sample, rerun: bool = False) -> list: """Add a sample for submission to this conductor. Args: @@ -362,7 +367,7 @@ def add_sample(self, sample, rerun=False): use_this_sample = True # default to running this sample msg = None if rerun and sample_statuses == []: - msg = f"> Skipping sample because rerun requested, but no failed or waiting flag found." + msg = "> Skipping sample because rerun requested, but no failed or waiting flag found." use_this_sample = False if sample_statuses: status_str = ", ".join(sample_statuses) @@ -420,7 +425,7 @@ def add_sample(self, sample, rerun=False): return skip_reasons - def submit(self, force=False): + def submit(self, force: bool = False) -> bool: """Submit one or more commands as a job. This call will submit the commands corresponding to the current pool @@ -495,7 +500,7 @@ def submit(self, force=False): return submitted - def _is_full(self, pool, size): + def _is_full(self, pool: list, size: float) -> bool: """Determine whether it's time to submit a job for the pool of commands. Instances of this class maintain a sort of 'pool' of commands that @@ -513,7 +518,7 @@ def _is_full(self, pool, size): return self.max_cmds == len(pool) or size >= self.max_size @property - def _samples(self): + def _samples(self) -> list: """Return a collection of pooled samples. Returns: @@ -522,7 +527,7 @@ def _samples(self): """ return [s for s in self._pool] - def _sample_lump_name(self, pool): + def _sample_lump_name(self, pool: list) -> str: """Determine how to refer to the 'sample' for this submission.""" if self.collate: return self.prj.name @@ -542,7 +547,7 @@ def _sample_lump_name(self, pool): # name concordant with 1-based, not 0-based indexing. return "lump{}".format(self._num_total_job_submissions + 1) - def _signal_int_handler(self, signal, frame): + def _signal_int_handler(self, signal, frame) -> None: """For catching interrupt (Ctrl +C) signals. Fails gracefully. Args: @@ -552,7 +557,7 @@ def _signal_int_handler(self, signal, frame): signal_type = "SIGINT" self._generic_signal_handler(signal_type) - def _generic_signal_handler(self, signal_type): + def _generic_signal_handler(self, signal_type: str) -> None: """Function for handling both SIGTERM and SIGINT. Args: @@ -565,7 +570,7 @@ def _generic_signal_handler(self, signal_type): sys.exit(1) - def _terminate_current_subprocess(self): + def _terminate_current_subprocess(self) -> None: """This terminates the current sub process associated with self.process_id""" def pskill(proc_pid, sig=signal.SIGINT): @@ -612,7 +617,7 @@ def pskill(proc_pid, sig=signal.SIGINT): note = "was already terminated" _LOGGER.warning(msg=f"Child process {self.process_id} {note}.") - def _attend_process(self, proc, sleeptime): + def _attend_process(self, proc, sleeptime: float) -> bool: """Wait on a process for a given time to see if it finishes. Returns True if it's still running after the given time or False as @@ -631,11 +636,11 @@ def _attend_process(self, proc, sleeptime): return True return False - def _jobname(self, pool): + def _jobname(self, pool: list) -> str: """Create the name for a job submission.""" return "{}_{}".format(self.pl_iface.pipeline_name, self._sample_lump_name(pool)) - def _build_looper_namespace(self, pool, size): + def _build_looper_namespace(self, pool: list, size: float) -> YAMLConfigManager: """Compile a mapping of looper/submission related settings. Compile a mapping of looper/submission related settings for use in @@ -672,7 +677,7 @@ def _build_looper_namespace(self, pool, size): if pl_config_file: if not os.path.isfile(pl_config_file): _LOGGER.error( - "Pipeline config file specified " "but not found: %s", + "Pipeline config file specified but not found: %s", pl_config_file, ) raise IOError(pl_config_file) @@ -682,7 +687,7 @@ def _build_looper_namespace(self, pool, size): return settings def _set_pipestat_namespace( - self, sample_name: Optional[str] = None + self, sample_name: str | None = None ) -> YAMLConfigManager: """Compile a mapping of pipestat-related settings. @@ -720,7 +725,7 @@ def _set_pipestat_namespace( filtered_namespace = {k: v for k, v in full_namespace.items() if v} return YAMLConfigManager(filtered_namespace) - def write_script(self, pool, size): + def write_script(self, pool: list, size: float) -> str: """Create the script for job submission. Args: @@ -782,7 +787,7 @@ def write_script(self, pool, size): pl_iface[VAR_TEMPL_KEY] = self.pl_iface.render_var_templates( namespaces=namespaces ) - _LOGGER.debug(f"namespace pipelines: { pl_iface }") + _LOGGER.debug(f"namespace pipelines: {pl_iface}") namespaces["pipeline"]["var_templates"] = pl_iface[VAR_TEMPL_KEY] or {} @@ -830,21 +835,21 @@ def write_script(self, pool, size): output_path=subm_base + ".sub", extra_vars=[{"looper": looper}] ) - def _reset_pool(self): + def _reset_pool(self) -> None: """Reset the state of the pool of samples""" self._pool = [] self._curr_size = 0 - def _reset_curr_skips(self): + def _reset_curr_skips(self) -> None: self._curr_skip_pool = [] self._curr_skip_size = 0 -def _use_sample(flag, skips): +def _use_sample(flag: bool, skips: list) -> bool: return flag and not skips -def _exec_pre_submit(piface, namespaces): +def _exec_pre_submit(piface, namespaces: dict) -> dict: """Execute pre submission hooks defined in the pipeline interface. Args: diff --git a/looper/const.py b/looper/const.py index d60d052ef..86f17ecb4 100644 --- a/looper/const.py +++ b/looper/const.py @@ -3,10 +3,6 @@ import os from enum import Enum -__author__ = "Databio lab" -__email__ = "nathan@code.databio.org" - - __all__ = [ "BUTTON_APPEARANCE_BY_FLAG", "TABLE_APPEARANCE_BY_FLAG", @@ -107,7 +103,7 @@ } -def _get_apperance_dict(type, templ=APPEARANCE_BY_FLAG): +def _get_apperance_dict(type: str, templ: dict = APPEARANCE_BY_FLAG) -> dict: """Construct the appearance mapping using the template. Based on the type of the HTML element provided construct the appearance diff --git a/looper/divvy.py b/looper/divvy.py index 5b4898891..913f64517 100644 --- a/looper/divvy.py +++ b/looper/divvy.py @@ -3,22 +3,18 @@ import logging import os import shutil - - from shutil import copytree -from yacman import YAMLConfigManager, write_lock, load_yaml, select_config +from yacman import YAMLConfigManager, load_yaml, select_config, write_lock from .const import ( COMPUTE_SETTINGS_VARNAME, DEFAULT_COMPUTE_RESOURCES_NAME, - NEW_COMPUTE_KEY, DEFAULT_CONFIG_FILEPATH, - DEFAULT_CONFIG_SCHEMA, + NEW_COMPUTE_KEY, ) from .utils import write_submit_script - _LOGGER = logging.getLogger(__name__) # This is the divvy.py submodule from divvy @@ -44,8 +40,8 @@ def __init__( self, entries=None, wait_max=None, - strict_ro_locks=False, - ): + strict_ro_locks: bool = False, + ) -> None: super().__init__(entries, wait_max, strict_ro_locks) if "compute_packages" not in self: @@ -56,7 +52,7 @@ def __init__( self.setdefault("adapters", None) self.activate_package(DEFAULT_COMPUTE_RESOURCES_NAME) - def write(self, filename=None): + def write(self, filename: str | None = None) -> None: with write_lock(self) as locked_ym: locked_ym.rebase() locked_ym.write() @@ -69,7 +65,7 @@ def write(self, filename=None): shutil.copyfile(pkg.submission_template, destfile) @property - def compute_env_var(self): + def compute_env_var(self) -> list[str]: """Environment variable through which to access compute settings. Returns: @@ -79,7 +75,7 @@ def compute_env_var(self): return COMPUTE_SETTINGS_VARNAME @property - def default_config_file(self): + def default_config_file(self) -> str: """Path to default compute environment settings file. Returns: @@ -90,7 +86,7 @@ def default_config_file(self): # Warning: template cannot be a property, because otherwise # it will get treated as a PathExAttMap treats all properties, which # is that it will turn any double-slashes into single slashes. - def template(self): + def template(self) -> str: """Get the currently active submission template. Returns: @@ -100,7 +96,7 @@ def template(self): return f.read() @property - def templates_folder(self): + def templates_folder(self) -> str: """Path to folder with default submission templates. Returns: @@ -113,7 +109,7 @@ def templates_folder(self): os.path.dirname(__file__), "default_config", "divvy_templates" ) - def activate_package(self, package_name): + def activate_package(self, package_name: str) -> bool: """Activates a compute package. This copies the computing attributes from the configuration file into @@ -155,7 +151,6 @@ def activate_package(self, package_name): # but now, it makes more sense to do it here so we can piggyback on # the default update() method and not even have to do that. if not os.path.isabs(self.compute["submission_template"]): - try: if self.filepath: self.compute["submission_template"] = os.path.join( @@ -189,7 +184,7 @@ def activate_package(self, package_name): return False - def clean_start(self, package_name): + def clean_start(self, package_name: str) -> bool: """Clear current active settings and then activate the given package. Args: @@ -210,11 +205,11 @@ def get_active_package(self) -> YAMLConfigManager: return self.compute @property - def compute_packages(self): + def compute_packages(self) -> dict: return self["compute_packages"] - def list_compute_packages(self): + def list_compute_packages(self) -> set[str]: """Returns a list of available compute packages. Returns: @@ -222,7 +217,7 @@ def list_compute_packages(self): """ return set(self["compute_packages"].keys()) - def reset_active_settings(self): + def reset_active_settings(self) -> bool: """Clear out current compute settings. Returns: @@ -231,7 +226,7 @@ def reset_active_settings(self): self.compute = YAMLConfigManager() return True - def update_packages(self, config_file): + def update_packages(self, config_file: str) -> bool: """Parse data from divvy configuration file. Given a divvy configuration file, this function will update (not @@ -265,7 +260,7 @@ def get_adapters(self) -> YAMLConfigManager: _LOGGER.debug("No adapters determined in divvy configuration file.") return adapters - def submit(self, output_path, extra_vars=None): + def submit(self, output_path: str | None, extra_vars: list | None = None) -> None: if not output_path: import tempfile @@ -282,7 +277,7 @@ def submit(self, output_path, extra_vars=None): _LOGGER.info(submission_command) os.system(submission_command) - def write_script(self, output_path, extra_vars=None): + def write_script(self, output_path: str, extra_vars: list | None = None) -> str: """Given currently active settings, populate the active template to write a submission script. Additionally use the current adapters to adjust the select of the @@ -357,7 +352,7 @@ def _get_from_dict(map, attrs): return write_submit_script(output_path, self.template(), variables) - def _handle_missing_env_attrs(self, config_file, when_missing): + def _handle_missing_env_attrs(self, config_file: str, when_missing) -> None: """Default environment settings aren't required; warn, though.""" missing_env_attrs = [ attr @@ -375,7 +370,7 @@ def _handle_missing_env_attrs(self, config_file, when_missing): when_missing(message) -def select_divvy_config(filepath): +def select_divvy_config(filepath: str | None) -> str: """Selects the divvy config file path to load. This uses a priority ordering to first choose a config file path if @@ -400,7 +395,7 @@ def select_divvy_config(filepath): return divcfg -def divvy_init(config_path, template_config_path): +def divvy_init(config_path: str, template_config_path: str) -> None: """Initialize a genome config file. Args: diff --git a/looper/exceptions.py b/looper/exceptions.py index c3fd631be..3e6d52c4a 100644 --- a/looper/exceptions.py +++ b/looper/exceptions.py @@ -3,9 +3,6 @@ from abc import ABCMeta from collections.abc import Iterable -__author__ = "Vince Reuter" -__email__ = "vreuter@virginia.edu" - _all__ = [ "DuplicatePipelineKeyException", "InvalidResourceSpecificationException", @@ -34,35 +31,35 @@ class SampleFailedException(LooperError): class MisconfigurationException(LooperError): """Looper not properly configured""" - def __init__(self, key): + def __init__(self, key: str) -> None: super(MisconfigurationException, self).__init__(key) class RegistryPathException(LooperError): """Duplication of pipeline identifier precludes unique pipeline ref.""" - def __init__(self, msg): + def __init__(self, msg: str) -> None: super(RegistryPathException, self).__init__(msg) class DuplicatePipelineKeyException(LooperError): """Duplication of pipeline identifier precludes unique pipeline ref.""" - def __init__(self, key): + def __init__(self, key: str) -> None: super(DuplicatePipelineKeyException, self).__init__(key) class InvalidResourceSpecificationException(LooperError): """Pipeline interface resources--if present--needs default.""" - def __init__(self, reason): + def __init__(self, reason: str) -> None: super(InvalidResourceSpecificationException, self).__init__(reason) class JobSubmissionException(LooperError): """Error type for when job submission fails.""" - def __init__(self, sub_cmd, script): + def __init__(self, sub_cmd: str, script: str) -> None: self.script = script reason = "Error for command {} and script '{}'".format(sub_cmd, self.script) super(JobSubmissionException, self).__init__(reason) @@ -73,8 +70,8 @@ class PipestatConfigurationException(LooperError): def __init__( self, - sub_cmd, - ): + sub_cmd: str, + ) -> None: reason = "Pipestat must be configured for command {}".format(sub_cmd) super(PipestatConfigurationException, self).__init__(reason) @@ -82,14 +79,14 @@ def __init__( class MissingPipelineConfigurationException(LooperError): """A selected pipeline needs configuration data.""" - def __init__(self, pipeline): + def __init__(self, pipeline: str) -> None: super(MissingPipelineConfigurationException, self).__init__(pipeline) class PipelineInterfaceConfigError(LooperError): """Error with PipelineInterface config data during construction.""" - def __init__(self, context): + def __init__(self, context: str | Iterable[str]) -> None: """For exception context, provide message or collection of missing sections. Args: @@ -103,7 +100,7 @@ def __init__(self, context): class PipelineInterfaceRequirementsError(LooperError): """Invalid specification of pipeline requirements in interface config.""" - def __init__(self, typename_by_requirement): + def __init__(self, typename_by_requirement: dict) -> None: super(PipelineInterfaceRequirementsError, self).__init__( "{} invalid requirements: {}".format( len(typename_by_requirement), typename_by_requirement @@ -115,5 +112,5 @@ def __init__(self, typename_by_requirement): class LooperReportError(LooperError): """Looper reporting errors""" - def __init__(self, reason): + def __init__(self, reason: str) -> None: super(LooperReportError, self).__init__(reason) diff --git a/looper/looper.py b/looper/looper.py index de1015539..f4537d721 100755 --- a/looper/looper.py +++ b/looper/looper.py @@ -5,13 +5,10 @@ import abc import argparse -import csv import glob import logging -import subprocess -import yaml import os -import pandas as _pd +import subprocess # Need specific sequence of actions for colorama imports? from colorama import init @@ -19,29 +16,22 @@ from .const import PipelineLevel init() -from shutil import rmtree - # from collections.abc import Mapping from collections import defaultdict +from shutil import rmtree + from colorama import Fore, Style from eido import validate_config, validate_sample from eido.exceptions import EidoValidationError -from jsonschema import ValidationError from peppy.exceptions import RemoteYAMLError +from pipestat.exceptions import PipestatSummarizeError +from pipestat.reports import get_file_for_table from rich.color import Color from rich.console import Console from rich.table import Table from ubiquerg.cli_tools import query_yes_no - from .conductor import SubmissionConductor - -from .exceptions import ( - JobSubmissionException, - LooperReportError, - MisconfigurationException, - SampleFailedException, -) from .const import ( DEBUG_COMMANDS, DEBUG_EIDO_VALIDATION, @@ -49,14 +39,18 @@ NOT_SUB_MSG, SUBMISSION_FAILURE_MESSAGE, ) +from .exceptions import ( + JobSubmissionException, + LooperReportError, + MisconfigurationException, + SampleFailedException, +) from .project import Project from .utils import ( - desired_samples_range_skipped, desired_samples_range_limited, + desired_samples_range_skipped, sample_folder, ) -from pipestat.reports import get_file_for_table -from pipestat.exceptions import PipestatSummarizeError _PKGNAME = "looper" _LOGGER = logging.getLogger(_PKGNAME) @@ -73,7 +67,7 @@ class Executor(object): __metaclass__ = abc.ABCMeta - def __init__(self, prj): + def __init__(self, prj: Project) -> None: """The Project defines the instance; establish an iteration counter. Args: @@ -90,7 +84,7 @@ def __call__(self, *args, **kwargs): class Checker(Executor): - def __call__(self, args): + def __call__(self, args: argparse.Namespace) -> dict: """Check Project status, using pipestat. Args: @@ -102,7 +96,6 @@ def __call__(self, args): psms = {} if getattr(args, "project", None): - for piface in self.prj.project_pipeline_interfaces: if piface.psm.pipeline_type == PipelineLevel.PROJECT.value: if piface.psm.pipeline_name not in psms: @@ -141,7 +134,7 @@ def __call__(self, args): title=table_title, width=len(table_title) + 10, ) - table.add_column(f"Status", justify="center") + table.add_column("Status", justify="center") table.add_column("Jobs count/total jobs", justify="center") for status_id in psms[pipeline_name].status_schema.keys(): status_list = list(pipeline_status.values()) @@ -181,7 +174,7 @@ def __call__(self, args): table = Table( show_header=True, header_style="bold magenta", - title=f"Status codes description", + title="Status codes description", width=len(psms[pipeline_name].status_schema_source) + 20, caption=f"Descriptions source: {psms[pipeline_name].status_schema_source}", ) @@ -200,7 +193,7 @@ def __call__(self, args): class Cleaner(Executor): """Remove all intermediate files (defined by pypiper clean scripts).""" - def __call__(self, args, preview_flag=True): + def __call__(self, args: argparse.Namespace, preview_flag: bool = True) -> int: """Execute the file cleaning process. Args: @@ -261,7 +254,9 @@ def select_samples(prj: Project, args: argparse.Namespace): class Destroyer(Executor): """Destroyer of files and folders associated with Project's Samples""" - def __call__(self, args, preview_flag=True): + def __call__( + self, args: argparse.Namespace, preview_flag: bool = True + ) -> int | None: """Completely remove all output produced by any pipelines. Args: @@ -327,7 +322,7 @@ def __call__(self, args, preview_flag=True): class Collator(Executor): """Submitter for project-level pipelines""" - def __init__(self, prj): + def __init__(self, prj: Project) -> None: """Initializes an instance. Args: @@ -336,7 +331,7 @@ def __init__(self, prj): super(Executor, self).__init__() self.prj = prj - def __call__(self, args, **compute_kwargs): + def __call__(self, args: argparse.Namespace, **compute_kwargs) -> dict: """Matches collators by protocols, creates submission scripts and submits them. Args: @@ -386,7 +381,13 @@ def __call__(self, args, **compute_kwargs): class Runner(Executor): """The true submitter of pipelines""" - def __call__(self, args, top_level_args=None, rerun=False, **compute_kwargs): + def __call__( + self, + args: argparse.Namespace, + top_level_args=None, + rerun: bool = False, + **compute_kwargs, + ) -> dict: """Do the Sample submission. Args: @@ -536,8 +537,9 @@ def __call__(self, args, top_level_args=None, rerun=False, **compute_kwargs): failed_sub_samples = samples_by_reason.get(SUBMISSION_FAILURE_MESSAGE) if failed_sub_samples: _LOGGER.info( - "\n{} samples with at least one failed job submission:" - " {}".format(len(failed_sub_samples), ", ".join(failed_sub_samples)) + "\n{} samples with at least one failed job submission: {}".format( + len(failed_sub_samples), ", ".join(failed_sub_samples) + ) ) # If failure keys are only added when there's at least one sample that @@ -565,10 +567,9 @@ def __call__(self, args, top_level_args=None, rerun=False, **compute_kwargs): class Reporter(Executor): """Combine project outputs into a browsable HTML report""" - def __call__(self, args): + def __call__(self, args: argparse.Namespace) -> dict: # initialize the report builder self.debug = {} - p = self.prj project_level = getattr(args, "project", None) portable = args.portable @@ -578,7 +579,6 @@ def __call__(self, args): psms = {} if project_level: - for piface in self.prj.project_pipeline_interfaces: if piface.psm.pipeline_type == PipelineLevel.PROJECT.value: if piface.psm.pipeline_name not in psms: @@ -621,9 +621,7 @@ def __call__(self, args): class Linker(Executor): """Create symlinks for reported results. Requires pipestat to be configured.""" - def __call__(self, args): - # initialize the report builder - p = self.prj + def __call__(self, args: argparse.Namespace) -> None: project_level = getattr(args, "project", None) link_dir = getattr(args, "output_dir", None) @@ -650,7 +648,7 @@ class Tabulator(Executor): list[str|any]: List containing output file paths of stats and objects. """ - def __call__(self, args): + def __call__(self, args: argparse.Namespace) -> list: # p = self.prj project_level = getattr(args, "project", None) report_dir = getattr(args, "report_dir", None) @@ -674,12 +672,12 @@ def __call__(self, args): return results -def _create_failure_message(reason, samples): +def _create_failure_message(reason: str, samples: set[str]) -> str: """Explain lack of submission for a single reason, 1 or more samples.""" return f"{Fore.LIGHTRED_EX + reason + Style.RESET_ALL}: {', '.join(samples)}" -def _remove_or_dry_run(paths, dry_run=False): +def _remove_or_dry_run(paths: list | str, dry_run: bool = False) -> None: """Remove file or directory or just inform what would be removed in case of dry run. Args: @@ -701,7 +699,9 @@ def _remove_or_dry_run(paths, dry_run=False): _LOGGER.info(path + " does not exist.") -def destroy_summary(prj, dry_run=False, project_level=False): +def destroy_summary( + prj: Project, dry_run: bool = False, project_level: bool = False +) -> None: """ Delete the summary files if not in dry run mode This function is for use with pipestat configured projects. @@ -770,11 +770,13 @@ class LooperCounter(object): total (int): Number of jobs to process. """ - def __init__(self, total): + def __init__(self, total: int) -> None: self.count = 0 self.total = total - def show(self, name, type="sample", pipeline_name=None): + def show( + self, name: str, type: str = "sample", pipeline_name: str | None = None + ) -> str: """Display sample counts status for a particular protocol type. The counts are running vs. total for the protocol within the Project, @@ -798,16 +800,21 @@ def show(self, name, type="sample", pipeline_name=None): color=Fore.CYAN, ) - def reset(self): + def reset(self) -> None: self.count = 0 - def __str__(self): + def __str__(self) -> str: return "LooperCounter of size {}".format(self.total) def _submission_status_text( - curr, total, name, pipeline_name=None, type="sample", color=Fore.CYAN -): + curr: int, + total: int, + name: str, + pipeline_name: str | None = None, + type: str = "sample", + color: str = Fore.CYAN, +) -> str: """Generate submission sample text for run or collate""" txt = color + f"## [{curr} of {total}] {type}: {name}" if pipeline_name: diff --git a/looper/parser_types.py b/looper/parser_types.py index d7a201906..723baa1bb 100644 --- a/looper/parser_types.py +++ b/looper/parser_types.py @@ -3,7 +3,13 @@ from yacman import YAMLConfigManager -def html_range(caravel=False, min_val=0, max_val=10, step=1, value=0): +def html_range( + caravel: bool = False, + min_val: int = 0, + max_val: int = 10, + step: int = 1, + value: int = 0, +) -> callable: caravel_data = YAMLConfigManager( { "element_type": "range", @@ -28,7 +34,7 @@ def fun(x=None, caravel_data=caravel_data, caravel=caravel): return fun -def html_checkbox(caravel=False, checked=False): +def html_checkbox(caravel: bool = False, checked: bool = False) -> callable: """Create argument for type parameter on argparse.ArgumentParser.add_argument. Args: @@ -50,7 +56,7 @@ def fun(x=None, caravel_data=caravel_data, caravel=caravel): return fun -def html_select(choices, caravel=False): +def html_select(choices: list, caravel: bool = False) -> callable: """Create argument for type parameter on argparse.ArgumentParser.add_argument. Args: diff --git a/looper/pipeline_interface.py b/looper/pipeline_interface.py index 50323ac1b..b962da0a7 100644 --- a/looper/pipeline_interface.py +++ b/looper/pipeline_interface.py @@ -3,14 +3,13 @@ import os from collections.abc import Mapping from logging import getLogger -from warnings import warn import jsonschema import pandas as pd from eido import read_schema from peppy import utils as peputil from ubiquerg import expandpath, is_url -from yacman import load_yaml, YAMLConfigManager +from yacman import YAMLConfigManager, load_yaml from .const import ( COMPUTE_KEY, @@ -31,9 +30,6 @@ ) from .utils import render_nested_var_templates -__author__ = "Michal Stolarczyk" -__email__ = "michal@virginia.edu" - _LOGGER = getLogger(__name__) @@ -51,7 +47,7 @@ class PipelineInterface(YAMLConfigManager): pipeline_type (str): Type of the pipeline, must be either 'sample' or 'project'. """ - def __init__(self, config, pipeline_type=None): + def __init__(self, config: str | Mapping, pipeline_type: str | None = None) -> None: super(PipelineInterface, self).__init__() if isinstance(config, Mapping): @@ -71,10 +67,10 @@ def __init__(self, config, pipeline_type=None): self._expand_paths(["compute", "dynamic_variables_script_path"]) @property - def pipeline_name(self): + def pipeline_name(self) -> str: return self[PIPELINE_INTERFACE_PIPELINE_NAME_KEY] - def render_var_templates(self, namespaces): + def render_var_templates(self, namespaces: dict) -> dict: """ Render path templates under 'var_templates' in this pipeline interface. @@ -96,7 +92,7 @@ def render_var_templates(self, namespaces): var_templates = render_nested_var_templates(var_templates, namespaces) return var_templates - def get_pipeline_schemas(self, schema_key=INPUT_SCHEMA_KEY): + def get_pipeline_schemas(self, schema_key: str = INPUT_SCHEMA_KEY) -> str | None: """ Get path to the pipeline schema. @@ -119,7 +115,7 @@ def get_pipeline_schemas(self, schema_key=INPUT_SCHEMA_KEY): ) return schema_source - def choose_resource_package(self, namespaces, file_size): + def choose_resource_package(self, namespaces: dict, file_size: float) -> dict: """ Select resource bundle for given input file size to given pipeline. @@ -152,7 +148,8 @@ def _file_size_ante(name, data): if fsize < 0: raise InvalidResourceSpecificationException( "Found negative value () in '{}' column; package '{}'".format( - fsize, FILE_SIZE_COLNAME, name + fsize, + FILE_SIZE_COLNAME, ) ) return fsize @@ -251,8 +248,9 @@ def _load_size_dep_vars(piface): # Ensure that we have a numeric value before attempting comparison. file_size = float(file_size) assert file_size >= 0, ValueError( - "Attempted selection of resource " - "package for negative file size: {}".format(file_size) + "Attempted selection of resource package for negative file size: {}".format( + file_size + ) ) fluid_resources = _load_dynamic_vars(self) @@ -301,7 +299,7 @@ def _load_size_dep_vars(piface): resources_data.update(project[LOOPER_KEY][COMPUTE_KEY][RESOURCES_KEY]) return resources_data - def _expand_paths(self, keys): + def _expand_paths(self, keys: list[str]) -> None: """ Expand paths defined in the pipeline interface file. @@ -370,7 +368,9 @@ def _set_in_dict(map, attrs, val): _LOGGER.debug("Expanded path: {}".format(pipe_path)) _set_in_dict(self, keys, pipe_path) - def _validate(self, schema_src, exclude_case=False, flavor="generic"): + def _validate( + self, schema_src: str, exclude_case: bool = False, flavor: str = "generic" + ) -> None: """ Generic function to validate the object against a schema. diff --git a/looper/plugins.py b/looper/plugins.py index 37859db27..3d628038e 100644 --- a/looper/plugins.py +++ b/looper/plugins.py @@ -1,16 +1,17 @@ import logging import os + +from .conductor import _get_yaml_path from .const import ( SAMPLE_CWL_YAML_PATH_KEY, SAMPLE_YAML_PATH_KEY, SAMPLE_YAML_PRJ_PATH_KEY, ) -from .conductor import _get_yaml_path _LOGGER = logging.getLogger(__name__) -def write_sample_yaml_prj(namespaces): +def write_sample_yaml_prj(namespaces: dict) -> dict: """Plugin: saves sample representation with project reference to YAML. This plugin can be parametrized by providing the path value/template in @@ -32,7 +33,7 @@ def write_sample_yaml_prj(namespaces): return {"sample": sample} -def write_custom_template(namespaces): +def write_custom_template(namespaces: dict) -> dict | None: """ Plugin: Populates a user-provided jinja template @@ -69,7 +70,7 @@ def load_template(pipeline): return {"sample": namespaces["sample"]} -def write_sample_yaml_cwl(namespaces): +def write_sample_yaml_cwl(namespaces: dict) -> dict: """Plugin: Produce a cwl-compatible yaml representation of the sample. Also adds the 'cwl_yaml' attribute to sample objects, which points @@ -140,15 +141,14 @@ def _get_schema_source( sample[dir_attr] = {"class": "Directory", "location": dir_attr_value} else: _LOGGER.warning( - "No 'input_schema' defined, producing a regular " - "sample YAML representation" + "No 'input_schema' defined, producing a regular sample YAML representation" ) _LOGGER.info("Writing sample yaml to {}".format(sample.sample_yaml_cwl)) sample.to_yaml(sample.sample_yaml_cwl) return {"sample": sample} -def write_sample_yaml(namespaces): +def write_sample_yaml(namespaces: dict) -> dict: """Plugin: saves sample representation to YAML. This plugin can be parametrized by providing the path value/template in diff --git a/looper/processed_project.py b/looper/processed_project.py index 16cc07750..b863f10ed 100644 --- a/looper/processed_project.py +++ b/looper/processed_project.py @@ -4,9 +4,6 @@ but the report generation approach has changed. """ -__author__ = "Michal Stolarczyk" -__email__ = "michal@virginia.edu" - # import os # from collections.abc import Mapping # from copy import copy @@ -139,7 +136,7 @@ PATH_LIKE = [PATH_KEY, THUMB_PATH_KEY] -def _get_path_sect_keys(mapping, keys=[PATH_KEY]): +def _get_path_sect_keys(mapping: dict, keys: list[str] = [PATH_KEY]) -> list[str]: """Get names of subsections in a mapping that contain collection of keys. Args: @@ -152,7 +149,7 @@ def _get_path_sect_keys(mapping, keys=[PATH_KEY]): return [k for k, v in mapping.items() if bool(set(keys) & set(mapping[k]))] -def _populate_paths(object, schema, check_exist): +def _populate_paths(object, schema: dict, check_exist: bool) -> None: """Populate path-like object attributes with other object attributes. Based on a defined template, e.g. '/Users/x/test_{name}/{genome}_file.txt' @@ -177,8 +174,7 @@ def _populate_paths(object, schema, check_exist): populated = templ.format(**dict(object.items())) except Exception as e: _LOGGER.warning( - "Caught exception: {}.\n" - "Could not populate path: {}".format( + "Caught exception: {}.\nCould not populate path: {}".format( getattr(e, "message", repr(e)), templ ) ) @@ -193,7 +189,7 @@ def _populate_paths(object, schema, check_exist): ) -def populate_sample_paths(sample, schema, check_exist=False): +def populate_sample_paths(sample, schema: dict, check_exist: bool = False) -> None: """Populate path-like Sample attributes with other object attributes. Based on a defined template, e.g. '/Users/x/test_{name}/{genome}_file.txt' @@ -214,7 +210,7 @@ def populate_sample_paths(sample, schema, check_exist=False): _populate_paths(sample, schema, check_exist) -def populate_project_paths(project, schema, check_exist=False): +def populate_project_paths(project, schema: dict, check_exist: bool = False) -> None: """Populate path-like Project attributes with other object attributes. Based on a defined template, e.g. '/Users/x/test_{name}/{genome}_file.txt' @@ -233,7 +229,7 @@ def populate_project_paths(project, schema, check_exist=False): _populate_paths(project, schema, check_exist) -def get_project_outputs(project, schema): +def get_project_outputs(project, schema: list[dict]): """Get project level outputs with path-like attributes populated with project attributes. Args: @@ -262,7 +258,8 @@ def get_project_outputs(project, schema): res[ps][p] = s[ps][p].format(**dict(project.items())) except Exception as e: _LOGGER.debug( - "Caught exception: {}.\n Could not populate {} " - "path".format(p, str(e)) + "Caught exception: {}.\n Could not populate {} path".format( + p, str(e) + ) ) return YAMLConfigManager(res) diff --git a/looper/project.py b/looper/project.py index 60c476c14..dd40cb537 100644 --- a/looper/project.py +++ b/looper/project.py @@ -1,8 +1,7 @@ """Looper version of NGS project model.""" -import itertools import os -from typing import List, NoReturn, Union +from typing import NoReturn from yaml import safe_load @@ -12,27 +11,15 @@ # cached_property was introduced in python 3.8 cached_property = property -from .divvy import ComputingConfiguration -from eido import PathAttrNotFoundError, read_schema +from eido import read_schema from jsonschema import ValidationError from pandas.core.common import flatten from peppy import Project as peppyProject -from peppy.utils import make_abs_via_cfg from peppy.const import CONFIG_KEY +from peppy.utils import make_abs_via_cfg from pipestat import PipestatManager from .conductor import write_pipestat_config - -from .exceptions import MisconfigurationException, PipelineInterfaceConfigError -from .pipeline_interface import PipelineInterface -from .processed_project import populate_project_paths, populate_sample_paths -from .utils import ( - expandpath, - fetch_sample_flags, - get_sample_status, - getLogger, - is_pephub_registry_path, -) from .const import ( CLI_PROJ_ATTRS, COMPUTE_PACKAGE_KEY, @@ -46,11 +33,22 @@ PIFACE_KEY_SELECTOR, PIPELINE_INTERFACE_PIPELINE_NAME_KEY, PIPELINE_INTERFACES_KEY, - PipelineLevel, PIPESTAT_KEY, RESULTS_SUBDIR_KEY, SAMPLE_PL_ARG, SUBMISSION_SUBDIR_KEY, + PipelineLevel, +) +from .divvy import ComputingConfiguration +from .exceptions import MisconfigurationException, PipelineInterfaceConfigError +from .pipeline_interface import PipelineInterface +from .processed_project import populate_project_paths, populate_sample_paths +from .utils import ( + expandpath, + fetch_sample_flags, + get_sample_status, + getLogger, + is_pephub_registry_path, ) __all__ = ["Project"] @@ -64,17 +62,18 @@ class ProjectContext(object): def __init__( self, prj, - selector_attribute=None, - selector_include=None, - selector_exclude=None, - selector_flag=None, - exclusion_flag=None, - ): + selector_attribute: str | None = None, + selector_include: list | str | None = None, + selector_exclude: list | str | None = None, + selector_flag: list | str | None = None, + exclusion_flag: list | str | None = None, + ) -> None: """Project and what to include/exclude defines the context.""" if not isinstance(selector_attribute, str): raise TypeError( - "Name of attribute for sample selection isn't a string: {} " - "({})".format(selector_attribute, type(selector_attribute)) + "Name of attribute for sample selection isn't a string: {} ({})".format( + selector_attribute, type(selector_attribute) + ) ) self.prj = prj self.include = selector_include @@ -137,7 +136,13 @@ class Project(peppyProject): compute settings. """ - def __init__(self, cfg=None, amendments=None, divcfg_path=None, **kwargs): + def __init__( + self, + cfg: str | None = None, + amendments=None, + divcfg_path: str | None = None, + **kwargs, + ) -> None: super(Project, self).__init__(cfg=cfg, amendments=amendments) prj_dict = kwargs.get("project_dict") pep_config = kwargs.get("pep_config", None) @@ -154,7 +159,7 @@ def __init__(self, cfg=None, amendments=None, divcfg_path=None, **kwargs): try: # For loading PEPs via CSV, Peppy cannot infer project name. - name = self.name + self.name # noqa: B018 except NotImplementedError: self.name = None @@ -183,7 +188,7 @@ def __init__(self, cfg=None, amendments=None, divcfg_path=None, **kwargs): self.make_project_dirs() @property - def piface_key(self): + def piface_key(self) -> str: """Name of the pipeline interface attribute for this project. Returns: @@ -192,7 +197,7 @@ def piface_key(self): return self._extra_cli_or_cfg(PIFACE_KEY_SELECTOR) or PIPELINE_INTERFACES_KEY @property - def selected_compute_package(self): + def selected_compute_package(self) -> str | None: """Compute package name specified in object constructor. Returns: @@ -201,7 +206,7 @@ def selected_compute_package(self): return self._extra_cli_or_cfg(COMPUTE_PACKAGE_KEY) @property - def cli_pifaces(self): + def cli_pifaces(self) -> list[str] | None: """Collection of pipeline interface sources specified in object constructor. Returns: @@ -215,7 +220,7 @@ def cli_pifaces(self): ) @property - def output_dir(self): + def output_dir(self) -> str: """Output directory for the project, specified in object constructor. Returns: @@ -223,7 +228,7 @@ def output_dir(self): """ return self._extra_cli_or_cfg(OUTDIR_KEY, strict=True) - def _extra_cli_or_cfg(self, attr_name, strict=False): + def _extra_cli_or_cfg(self, attr_name: str, strict: bool = False): """Get attribute value provided in kwargs in object constructor or from looper section in the configuration file. Args: @@ -256,7 +261,7 @@ def _extra_cli_or_cfg(self, attr_name, strict=False): return @property - def results_folder(self): + def results_folder(self) -> str: """Path to the results folder for the project. Returns: @@ -265,7 +270,7 @@ def results_folder(self): return self._out_subdir_path(RESULTS_SUBDIR_KEY, default="results_pipeline") @property - def submission_folder(self): + def submission_folder(self) -> str: """Path to the submission folder for the project. Returns: @@ -290,7 +295,7 @@ def _out_subdir_path(self, key: str, default: str) -> str: child = getattr(self[EXTRA_KEY], key, default) or default return os.path.join(parent, child) - def make_project_dirs(self): + def make_project_dirs(self) -> None: """ Create project directory structure if it doesn't exist. """ @@ -309,7 +314,7 @@ def make_project_dirs(self): ) @cached_property - def project_pipeline_interface_sources(self): + def project_pipeline_interface_sources(self) -> list[str]: """Get a list of all valid project-level pipeline interface sources associated with this project. Sources that are file paths are expanded. @@ -324,7 +329,7 @@ def project_pipeline_interface_sources(self): ) @cached_property - def project_pipeline_interfaces(self): + def project_pipeline_interfaces(self) -> list: """Flat list of all valid project-level interface objects associated with this Project. Note that only valid pipeline interfaces will show up in the @@ -340,7 +345,7 @@ def project_pipeline_interfaces(self): ] @cached_property - def pipeline_interfaces(self): + def pipeline_interfaces(self) -> list: """Flat list of all valid interface objects associated with this Project. Note that only valid pipeline interfaces will show up in the @@ -364,7 +369,7 @@ def pipeline_interface_sources(self): return self._samples_by_interface.keys() @cached_property - def pipestat_configured(self): + def pipestat_configured(self) -> bool: """Whether pipestat configuration is complete for all sample pipelines. Returns: @@ -373,7 +378,7 @@ def pipestat_configured(self): return self._check_if_pipestat_configured() @cached_property - def pipestat_configured_project(self): + def pipestat_configured_project(self) -> bool: """Whether pipestat configuration is complete for all project pipelines. Returns: @@ -383,7 +388,7 @@ def pipestat_configured_project(self): pipeline_type=PipelineLevel.PROJECT.value ) - def get_sample_piface(self, sample_name): + def get_sample_piface(self, sample_name: str) -> list | None: """Get a list of pipeline interfaces associated with the specified sample. Note that only valid pipeline interfaces will show up in the @@ -404,7 +409,7 @@ def get_sample_piface(self, sample_name): return None @staticmethod - def get_schemas(pifaces, schema_key=INPUT_SCHEMA_KEY): + def get_schemas(pifaces, schema_key: str = INPUT_SCHEMA_KEY) -> list[str]: """Get the list of unique schema paths for a list of pipeline interfaces. Args: @@ -423,7 +428,9 @@ def get_schemas(pifaces, schema_key=INPUT_SCHEMA_KEY): schema_set.update([schema_file]) return list(schema_set) - def _check_if_pipestat_configured(self, pipeline_type=PipelineLevel.SAMPLE.value): + def _check_if_pipestat_configured( + self, pipeline_type: str = PipelineLevel.SAMPLE.value + ) -> bool: # First check if pipestat key is in looper_config, if not return false @@ -437,13 +444,14 @@ def _check_if_pipestat_configured(self, pipeline_type=PipelineLevel.SAMPLE.value # This should return True OR raise an exception at this point. return self._get_pipestat_configuration(pipeline_type) - def _get_pipestat_configuration(self, pipeline_type=PipelineLevel.SAMPLE.value): + def _get_pipestat_configuration( + self, pipeline_type: str = PipelineLevel.SAMPLE.value + ) -> bool: # First check if it already exists if pipeline_type == PipelineLevel.SAMPLE.value: for piface in self.pipeline_interfaces: - pipestat_config_path = self._check_for_existing_pipestat_config(piface) if not pipestat_config_path: @@ -476,7 +484,7 @@ def _get_pipestat_configuration(self, pipeline_type=PipelineLevel.SAMPLE.value): return True - def _check_for_existing_pipestat_config(self, piface): + def _check_for_existing_pipestat_config(self, piface) -> str | None: """ config files should be in looper output directory and named as: @@ -507,7 +515,7 @@ def _check_for_existing_pipestat_config(self, piface): else: return None - def _create_pipestat_config(self, piface, pipeline_type): + def _create_pipestat_config(self, piface, pipeline_type: str) -> None: """ Each piface needs its own config file and associated psm """ @@ -613,7 +621,7 @@ def _create_pipestat_config(self, piface, pipeline_type): return None - def populate_pipeline_outputs(self): + def populate_pipeline_outputs(self) -> None: """ Populate project and sample output attributes based on output schemas that pipeline interfaces point to. @@ -631,7 +639,7 @@ def populate_pipeline_outputs(self): for schema in schemas: populate_project_paths(self, read_schema(schema)[0]) - def _get_linked_pifaces(self): + def _get_linked_pifaces(self) -> dict[str, list[str]]: """Get linked sample pipeline interfaces by project pipeline interface. These are indicated in project pipeline interface by @@ -668,7 +676,7 @@ def _process_linked_piface(p, piface, prj_piface): ) return linked_pifaces - def _piface_by_samples(self): + def _piface_by_samples(self) -> dict: """Create a mapping of all defined interfaces in this Project by samples. Returns: @@ -686,7 +694,7 @@ def _piface_by_samples(self): pifaces_by_sample.setdefault(sample_name, []).append(pi) return pifaces_by_sample - def _omit_from_repr(self, k, cls): + def _omit_from_repr(self, k: str, cls: type) -> bool: """Exclude the interfaces from representation. Args: @@ -695,7 +703,7 @@ def _omit_from_repr(self, k, cls): """ return super(Project, self)._omit_from_repr(k, cls) or k == "interfaces" - def _resolve_path_with_cfg(self, pth): + def _resolve_path_with_cfg(self, pth: str | None) -> str | None: """Expand provided path and make it absolute using project config path. Args: @@ -712,7 +720,7 @@ def _resolve_path_with_cfg(self, pth): _LOGGER.debug("Relative path made absolute: {}".format(pth)) return pth - def _samples_by_piface(self, piface_key): + def _samples_by_piface(self, piface_key: str) -> dict[str, set[str]]: """Create a collection of all samples with valid pipeline interfaces. Args: @@ -754,7 +762,7 @@ def _samples_by_piface(self, piface_key): _LOGGER.warning(msg) return samples_by_piface - def set_sample_piface(self, sample_piface: Union[List[str], str]) -> NoReturn: + def set_sample_piface(self, sample_piface: list[str] | str) -> NoReturn: """Add sample pipeline interfaces variable to object. Args: @@ -769,12 +777,12 @@ def set_sample_piface(self, sample_piface: Union[List[str], str]) -> NoReturn: def fetch_samples( prj, - selector_attribute=None, - selector_include=None, - selector_exclude=None, - selector_flag=None, - exclusion_flag=None, -): + selector_attribute: str | None = None, + selector_include: list | str | None = None, + selector_exclude: list | str | None = None, + selector_flag: list | str | None = None, + exclusion_flag: list | str | None = None, +) -> list: """Collect samples of particular protocol(s). Protocols can't be both positively selected for and negatively @@ -832,7 +840,7 @@ def keep(s): # nonsense user error. if selector_include and selector_exclude: raise TypeError( - "Specify only selector_include or selector_exclude parameter, " "not both." + "Specify only selector_include or selector_exclude parameter, not both." ) if not isinstance(selector_attribute, str): @@ -935,7 +943,7 @@ def keep(s): return kept_samples -def make_set(items): +def make_set(items) -> list: if isinstance(items, str): items = [items] elif len(items) == 1: diff --git a/looper/utils.py b/looper/utils.py index e119947db..2aabcf7c0 100644 --- a/looper/utils.py +++ b/looper/utils.py @@ -1,24 +1,27 @@ """Helpers without an obvious logical home.""" import argparse -from collections import defaultdict import glob import itertools -from logging import getLogger import os -from typing import Iterable, List, Optional, Tuple, Union import re +from collections import defaultdict +from collections.abc import Iterable +from logging import getLogger import jinja2 import yaml +from pephubclient.constants import RegistryPath from peppy import Project as peppyProject from peppy.const import AMENDMENTS_KEY, CONFIG_KEY, NAME_KEY, SAMPLE_MODS_KEY -from ubiquerg import convert_value, expandpath, parse_registry_path, deep_update -from pephubclient.constants import RegistryPath from pydantic import ValidationError +from rich.console import Console +from rich.pretty import pprint +from ubiquerg import convert_value, deep_update, expandpath, parse_registry_path from yacman import load_yaml from yaml.parser import ParserError +from .command_models.commands import SUPPORTED_COMMANDS from .const import ( ALL_SUBCMD_KEY, CLI_KEY, @@ -34,18 +37,17 @@ PIPESTAT_KEY, POSITIONAL, PROJECT_PL_ARG, - PipelineLevel, SAMPLE_PL_ARG, + PipelineLevel, ) -from .command_models.commands import SUPPORTED_COMMANDS from .exceptions import MisconfigurationException, PipelineInterfaceConfigError -from rich.console import Console -from rich.pretty import pprint _LOGGER = getLogger(__name__) -def fetch_flag_files(prj=None, results_folder="", flags=FLAGS): +def fetch_flag_files( + prj=None, results_folder: str = "", flags: Iterable[str] | str = FLAGS +) -> dict[str, list[str]]: """Find all flag file paths for the given project. Args: @@ -97,7 +99,9 @@ def fetch_flag_files(prj=None, results_folder="", flags=FLAGS): return files_by_flag -def fetch_sample_flags(prj, sample, pl_name, flag_dir=None): +def fetch_sample_flags( + prj, sample, pl_name: str, flag_dir: str | None = None +) -> list[str]: """Find any flag files present for a sample associated with a project. Args: @@ -128,7 +132,7 @@ def fetch_sample_flags(prj, sample, pl_name, flag_dir=None): ] -def get_sample_status(sample, flags): +def get_sample_status(sample: str, flags: list[str]) -> str | None: """Get a sample status. Args: @@ -156,7 +160,7 @@ def get_sample_status(sample, flags): return statuses[0] -def grab_project_data(prj): +def grab_project_data(prj) -> dict: """From the given Project, grab Sample-independent data. There are some aspects of a Project of which it's beneficial for a Sample @@ -181,7 +185,7 @@ def grab_project_data(prj): _LOGGER.debug("Project lacks section '%s', skipping", CONFIG_KEY) -def sample_folder(prj, sample): +def sample_folder(prj, sample) -> str: """Get the path to this Project's root folder for the given Sample. Args: @@ -195,7 +199,9 @@ def sample_folder(prj, sample): return os.path.join(prj.results_folder, sample[prj.sample_table_index]) -def get_file_for_project(prj, pipeline_name, appendix=None, directory=None): +def get_file_for_project( + prj, pipeline_name: str, appendix: str | None = None, directory: str | None = None +) -> str: """Create a path to the file for the current project. Takes the possibility of amendment being activated at the time. @@ -222,7 +228,7 @@ def get_file_for_project(prj, pipeline_name, appendix=None, directory=None): return fp -def get_file_for_project_old(prj, appendix): +def get_file_for_project_old(prj, appendix: str) -> str: """Create a path to the file for the current project. Takes the possibility of amendment being activated at the time. @@ -242,7 +248,7 @@ def get_file_for_project_old(prj, appendix): return fp -def jinja_render_template_strictly(template, namespaces): +def jinja_render_template_strictly(template: str, namespaces: dict) -> str: """Render a command string in the provided namespaces context. Strictly, which means that all the requested attributes must be @@ -284,7 +290,7 @@ def _finfun(x): return rendered -def read_yaml_file(filepath): +def read_yaml_file(filepath: str) -> dict | None: """Read a YAML file. Args: @@ -301,12 +307,12 @@ def read_yaml_file(filepath): def enrich_args_via_cfg( - subcommand_name, + subcommand_name: str, parser_args, aux_parser, - test_args=None, - cli_modifiers=None, -): + test_args: dict | None = None, + cli_modifiers: dict | None = None, +) -> argparse.Namespace: """Read in a looper dotfile, pep config and set arguments. Priority order: CLI > dotfile/config > pep_config > parser default @@ -402,7 +408,7 @@ def set_single_arg(argname, default_source_namespace, result_namespace): return result -def _get_subcommand_args(subcommand_name, parser_args): +def _get_subcommand_args(subcommand_name: str, parser_args) -> dict | None: """Get the union of values for the subcommand arguments. Get the union of values for the subcommand arguments from @@ -463,7 +469,7 @@ def _get_subcommand_args(subcommand_name, parser_args): return args -def init_generic_pipeline(pipelinepath: Optional[str] = None): +def init_generic_pipeline(pipelinepath: str | None = None): """Create generic pipeline interface. Args: @@ -502,7 +508,7 @@ def init_generic_pipeline(pipelinepath: Optional[str] = None): }, } - console.rule(f"\n[magenta]Pipeline Interface[/magenta]") + console.rule("\n[magenta]Pipeline Interface[/magenta]") # Write file if not os.path.exists(dest_file): pprint(generic_pipeline_dict, expand_all=True) @@ -537,7 +543,7 @@ def init_generic_pipeline(pipelinepath: Optional[str] = None): }, } - console.rule(f"\n[magenta]Output Schema[/magenta]") + console.rule("\n[magenta]Output Schema[/magenta]") # Write file if not os.path.exists(dest_file): pprint(generic_output_schema_dict, expand_all=True) @@ -551,7 +557,7 @@ def init_generic_pipeline(pipelinepath: Optional[str] = None): f"Output schema file already exists [yellow]`{dest_file}`[/yellow]. Skipping creation.." ) - console.rule(f"\n[magenta]Example Pipeline Shell Script[/magenta]") + console.rule("\n[magenta]Example Pipeline Shell Script[/magenta]") # Create Generic countlines.sh if not pipelinepath: @@ -579,7 +585,7 @@ def init_generic_pipeline(pipelinepath: Optional[str] = None): return True -def read_looper_dotfile(): +def read_looper_dotfile() -> dict: """Read looper config file. Returns: @@ -597,10 +603,10 @@ def initiate_looper_config( looper_config_path: str, pep_path: str = None, output_dir: str = None, - sample_pipeline_interfaces: Union[List[str], str] = None, - project_pipeline_interfaces: Union[List[str], str] = None, - force=False, -): + sample_pipeline_interfaces: list[str] | str = None, + project_pipeline_interfaces: list[str] | str = None, + force: bool = False, +) -> bool: """Initialize looper config file. Args: @@ -618,7 +624,7 @@ def initiate_looper_config( """ console = Console() console.clear() - console.rule(f"\n[magenta]Looper initialization[/magenta]") + console.rule("\n[magenta]Looper initialization[/magenta]") if os.path.exists(looper_config_path) and not force: console.print( @@ -669,7 +675,7 @@ def initiate_looper_config( return True -def looper_config_tutorial(): +def looper_config_tutorial() -> bool: """Prompt a user through configuring a .looper.yaml file for a new project. Returns: @@ -678,7 +684,7 @@ def looper_config_tutorial(): console = Console() console.clear() - console.rule(f"\n[magenta]Looper initialization[/magenta]") + console.rule("\n[magenta]Looper initialization[/magenta]") looper_cfg_path = ".looper.yaml" # not changeable @@ -750,8 +756,8 @@ def looper_config_tutorial(): console.print( f"""\ -[yellow]pep_config:[/yellow] {cfg['pep_config']} -[yellow]output_dir:[/yellow] {cfg['output_dir']} +[yellow]pep_config:[/yellow] {cfg["pep_config"]} +[yellow]output_dir:[/yellow] {cfg["output_dir"]} [yellow]pipeline_interfaces:[/yellow] - {piface_paths} """ @@ -788,7 +794,9 @@ def looper_config_tutorial(): return True -def determine_pipeline_type(piface_path: str, looper_config_path: str): +def determine_pipeline_type( + piface_path: str, looper_config_path: str +) -> tuple[list[str] | None, str | None]: """Read pipeline interface and determine its type. Read pipeline interface from disk and determine if it contains @@ -832,7 +840,7 @@ def determine_pipeline_type(piface_path: str, looper_config_path: str): if pipeline_types == []: raise PipelineInterfaceConfigError( - f"sample_interface and/or project_interface must be defined in each pipeline interface." + "sample_interface and/or project_interface must be defined in each pipeline interface." ) return pipeline_types, piface_path @@ -890,7 +898,6 @@ def read_looper_config_file(looper_config_path: str) -> dict: return_dict[CLI_KEY] = dp_data[CLI_KEY] if PIPELINE_INTERFACES_KEY in dp_data: - dp_data.setdefault(PIPELINE_INTERFACES_KEY, {}) all_pipeline_interfaces = dp_data.get(PIPELINE_INTERFACES_KEY) @@ -947,7 +954,7 @@ def read_looper_config_file(looper_config_path: str) -> dict: return return_dict -def dotfile_path(directory=os.getcwd(), must_exist=False): +def dotfile_path(directory: str = os.getcwd(), must_exist: bool = False) -> str: """Get the path to the looper dotfile. If file existence is forced this function will look for it in @@ -973,8 +980,9 @@ def dotfile_path(directory=os.getcwd(), must_exist=False): if cur_dir == parent_dir: # root, file does not exist raise OSError( - "Looper dotfile ({}) not found in '{}' and all " - "its parents".format(LOOPER_DOTFILE_NAME, directory) + "Looper dotfile ({}) not found in '{}' and all its parents".format( + LOOPER_DOTFILE_NAME, directory + ) ) cur_dir = parent_dir @@ -1005,7 +1013,7 @@ def is_pephub_registry_path(input_string: str) -> bool: bool: True if input is a registry path. """ try: - registry_path = RegistryPath(**parse_registry_path(input_string)) + RegistryPath(**parse_registry_path(input_string)) except (ValidationError, TypeError): return False return True @@ -1029,7 +1037,7 @@ def __init__(self, lo: int, hi: int): ) def __eq__(self, other) -> bool: - return type(other) == type(self) and self.to_tuple() == other.to_tuple() + return type(other) is type(self) and self.to_tuple() == other.to_tuple() def __hash__(self) -> int: return hash(self.to_tuple()) @@ -1040,7 +1048,7 @@ def __repr__(self) -> str: def __str__(self) -> str: return f"{self.__class__.__name__}: {self.to_tuple()}" - def to_tuple(self) -> Tuple[int, int]: + def to_tuple(self) -> tuple[int, int]: return self.lo, self.hi @property @@ -1051,7 +1059,7 @@ def lo(self) -> int: def hi(self) -> int: return self._hi - def _invalidations(self) -> Iterable[str]: + def _invalidations(self) -> list[str]: problems = [] if self.lo < 1: problems.append(f"Interval must be on natural numbers: {self.lo}") @@ -1065,7 +1073,7 @@ def to_range(self) -> Iterable[int]: return range(self.lo, self.hi + 1) @classmethod - def from_string(cls, s: str, upper_bound: int) -> "IntRange": + def from_string(cls, s: str, upper_bound: int) -> "NatIntervalInclusive": """Create an instance from a string, e.g. command-line argument. Args: @@ -1157,7 +1165,7 @@ def desired_samples_range_skipped(arg: str, num_samples: int) -> Iterable[int]: return intv.to_range() -def write_submit_script(fp, content, data): +def write_submit_script(fp: str, content: str, data: dict) -> str: """Write a submission script for divvy by populating a template with data. Args: @@ -1178,7 +1186,7 @@ def write_submit_script(fp, content, data): keys_left = re.findall(r"!$\{(.+?)\}", content) if len(keys_left) > 0: _LOGGER.warning( - "> Warning: %d submission template variables are not " "populated: '%s'", + "> Warning: %d submission template variables are not populated: '%s'", len(keys_left), str(keys_left), ) @@ -1207,7 +1215,7 @@ def inspect_looper_config_file(looper_config_dict) -> None: print(f"{key} {value}") -def expand_nested_var_templates(var_templates_dict, namespaces): +def expand_nested_var_templates(var_templates_dict: dict, namespaces: dict) -> dict: "Takes all var_templates as a dict and recursively expands any paths." result = {} @@ -1221,7 +1229,7 @@ def expand_nested_var_templates(var_templates_dict, namespaces): return result -def render_nested_var_templates(var_templates_dict, namespaces): +def render_nested_var_templates(var_templates_dict: dict, namespaces: dict) -> dict: "Takes all var_templates as a dict and recursively renders the jinja templates." result = {} diff --git a/pyproject.toml b/pyproject.toml new file mode 100644 index 000000000..e338067a1 --- /dev/null +++ b/pyproject.toml @@ -0,0 +1,83 @@ +[project] +name = "looper" +version = "2.0.3" +description = "A pipeline submission engine that parses sample inputs and submits pipelines for each sample." +readme = "README.md" +license = "BSD-2-Clause" +requires-python = ">=3.10" +authors = [ + { name = "Nathan Sheffield" }, + { name = "Vince Reuter" }, + { name = "Michal Stolarczyk" }, + { name = "Johanna Klughammer" }, + { name = "Andre Rendeiro" }, +] +keywords = ["bioinformatics", "sequencing", "ngs"] +classifiers = [ + "Development Status :: 4 - Beta", + "License :: OSI Approved :: BSD License", + "Programming Language :: Python :: 3.10", + "Programming Language :: Python :: 3.11", + "Programming Language :: Python :: 3.12", + "Programming Language :: Python :: 3.13", + "Programming Language :: Python :: 3.14", + "Topic :: Scientific/Engineering :: Bio-Informatics", +] +dependencies = [ + "colorama>=0.3.9", + "eido>=0.2.4", + "jinja2", + "logmuse>=0.2.0", + "pandas>=2.0.2", + "pephubclient>=0.4.0", + "pipestat>=0.12.0a1", + "peppy>=0.40.6", + "pyyaml>=3.12", + "rich>=9.10.0", + "ubiquerg>=0.8.1", + "yacman>=0.9.5", + "pydantic-argparse>=0.9.0", + "psutil", +] + +[project.urls] +Homepage = "https://github.com/pepkit/looper" + +[project.scripts] +looper = "looper.cli_pydantic:main_cli" +divvy = "looper.__main__:divvy_main" + +[build-system] +requires = ["hatchling"] +build-backend = "hatchling.build" + +[project.optional-dependencies] +test = [ + "hypothesis>=6.84.3", + "mock", + "pytest", + "pytest-cov", + "pytest-remotedata", + "GitPython", + "psutil", +] + +[tool.pytest.ini_options] +addopts = "-rfE" +testpaths = ["tests"] + +[tool.ruff] +line-length = 88 + +[tool.ruff.lint] +select = ["E", "F", "I"] +ignore = ["F403", "F405", "E501"] + +[tool.ruff.lint.per-file-ignores] +"looper/__init__.py" = ["E402", "F401"] +"looper/looper.py" = ["E402"] +"looper/processed_project.py" = ["E402", "F821"] +"tests/**" = ["F841", "E712", "E722"] + +[tool.ruff.lint.isort] +known-first-party = ["looper"] diff --git a/requirements/requirements-all.txt b/requirements/requirements-all.txt deleted file mode 100644 index cf60cd24f..000000000 --- a/requirements/requirements-all.txt +++ /dev/null @@ -1,14 +0,0 @@ -colorama>=0.3.9 -eido>=0.2.4 -jinja2 -logmuse>=0.2.0 -pandas>=2.0.2 -pephubclient>=0.4.0 -pipestat>=0.12.0a1 -peppy>=0.40.6 -pyyaml>=3.12 -rich>=9.10.0 -ubiquerg>=0.8.1 -yacman>=0.9.5 -pydantic-argparse>=0.9.0 -psutil \ No newline at end of file diff --git a/requirements/requirements-doc.txt b/requirements/requirements-doc.txt deleted file mode 100644 index c5cb76cc1..000000000 --- a/requirements/requirements-doc.txt +++ /dev/null @@ -1,9 +0,0 @@ -https://github.com/databio/mkdocs-databio/archive/master.zip -markdown-include -looper -pephubclient -mkdocs>=1.0 -https://github.com/pepkit/pipestat/archive/refs/heads/master.zip -pydoc-markdown -# versioneer -# Cython diff --git a/requirements/requirements-test.txt b/requirements/requirements-test.txt deleted file mode 100644 index e3ba5d423..000000000 --- a/requirements/requirements-test.txt +++ /dev/null @@ -1,7 +0,0 @@ -hypothesis >= 6.84.3 -mock -pytest -pytest-cov -pytest-remotedata -GitPython -psutil \ No newline at end of file diff --git a/setup.cfg b/setup.cfg deleted file mode 100644 index 898fa9895..000000000 --- a/setup.cfg +++ /dev/null @@ -1,6 +0,0 @@ -[aliases] -test = pytest - -[tool:pytest] -# Only request extra info from failures and errors. -addopts = -rfE diff --git a/setup.py b/setup.py deleted file mode 100644 index 08c455ba4..000000000 --- a/setup.py +++ /dev/null @@ -1,95 +0,0 @@ -#! /usr/bin/env python - -import os -import sys - -from setuptools import setup - -# Additional keyword arguments for setup(). -extra = {} - - -# Ordinary dependencies -DEPENDENCIES = [] -with open("requirements/requirements-all.txt", "r") as reqs_file: - for line in reqs_file: - if not line.strip(): - continue - # DEPENDENCIES.append(line.split("=")[0].rstrip("<>")) - DEPENDENCIES.append(line) - - -# numexpr for pandas -try: - import numexpr -except ImportError: - # No numexpr is OK for pandas. - pass -else: - # pandas 0.20.2 needs updated numexpr; the claim is 2.4.6, but that failed. - DEPENDENCIES.append("numexpr>=2.6.2") -extra["install_requires"] = DEPENDENCIES - - -# Additional files to include with package -def get_static(name, condition=None): - static = [ - os.path.join(name, f) - for f in os.listdir( - os.path.join(os.path.dirname(os.path.realpath(__file__)), name) - ) - ] - if condition is None: - return static - else: - return [i for i in filter(lambda x: eval(condition), static)] - - -# scripts to be added to the $PATH -# scripts = get_static("scripts", condition="'.' in x") -# scripts removed (TO remove this) -scripts = None - - -with open("looper/_version.py", "r") as versionfile: - version = versionfile.readline().split()[-1].strip("\"'\n") - -with open("README.md") as f: - long_description = f.read() - -setup( - name="looper", - packages=["looper"], - version=version, - description="A pipeline submission engine that parses sample inputs and submits pipelines for each sample.", - long_description=long_description, - long_description_content_type="text/markdown", - classifiers=[ - "Development Status :: 4 - Beta", - "License :: OSI Approved :: BSD License", - "Programming Language :: Python :: 3.8", - "Programming Language :: Python :: 3.9", - "Programming Language :: Python :: 3.10", - "Programming Language :: Python :: 3.11", - "Topic :: Scientific/Engineering :: Bio-Informatics", - ], - keywords="bioinformatics, sequencing, ngs", - url="https://github.com/pepkit/looper", - author="Nathan Sheffield, Vince Reuter, Michal Stolarczyk, Johanna Klughammer, Andre Rendeiro", - license="BSD2", - entry_points={ - "console_scripts": [ - "looper = looper.cli_pydantic:main_cli", - "divvy = looper.__main__:divvy_main", - ], - }, - scripts=scripts, - package_data={"looper": ["submit_templates/*"]}, - include_package_data=True, - test_suite="tests", - tests_require=(["mock", "pytest"]), - setup_requires=( - ["pytest-runner"] if {"test", "pytest", "ptr"} & set(sys.argv) else [] - ), - **extra -) diff --git a/tests/conftest.py b/tests/conftest.py index 960a98b44..61806a649 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -1,9 +1,9 @@ -import shutil -from contextlib import contextmanager import os +import shutil import subprocess -from shutil import copyfile, rmtree, copytree import tempfile +from contextlib import contextmanager +from shutil import copyfile from typing import * import peppy diff --git a/tests/data/hello_looper-dev/pep_derived_attrs/pipeline/count_lines_plot.py b/tests/data/hello_looper-dev/pep_derived_attrs/pipeline/count_lines_plot.py index 398c1c02a..7f16be4cc 100644 --- a/tests/data/hello_looper-dev/pep_derived_attrs/pipeline/count_lines_plot.py +++ b/tests/data/hello_looper-dev/pep_derived_attrs/pipeline/count_lines_plot.py @@ -1,7 +1,8 @@ -import matplotlib.pyplot as plt import os import sys +import matplotlib.pyplot as plt + results_dir = sys.argv[ 1 ] # Obtain the looper results directory passed via the looper command template diff --git a/tests/data/hello_looper-dev/pipestat_example/pipeline/count_lines.py b/tests/data/hello_looper-dev/pipestat_example/pipeline/count_lines.py index 6f6a4ab8f..b1576bfa0 100755 --- a/tests/data/hello_looper-dev/pipestat_example/pipeline/count_lines.py +++ b/tests/data/hello_looper-dev/pipestat_example/pipeline/count_lines.py @@ -1,7 +1,7 @@ import os.path +import sys import pipestat -import sys # Very simple pipeline that calls pipestat # takes arguments invoked during looper submission via command templates diff --git a/tests/data/hello_looper-dev/pipestat_example/pipeline/count_lines_plot.py b/tests/data/hello_looper-dev/pipestat_example/pipeline/count_lines_plot.py index bc3a2bce3..6799a335a 100644 --- a/tests/data/hello_looper-dev/pipestat_example/pipeline/count_lines_plot.py +++ b/tests/data/hello_looper-dev/pipestat_example/pipeline/count_lines_plot.py @@ -1,8 +1,9 @@ -import matplotlib.pyplot as plt # be sure to `pip install matplotlib` import os -import pipestat import sys +import matplotlib.pyplot as plt # be sure to `pip install matplotlib` +import pipestat + # A pipeline that retrieves previously reported pipestat results # and plots them in a bar chart results_file = sys.argv[1] diff --git a/tests/data/hello_looper-dev/pytesting/pipestat_test/pipeline_pipestat/count_lines.py b/tests/data/hello_looper-dev/pytesting/pipestat_test/pipeline_pipestat/count_lines.py index 6f6a4ab8f..b1576bfa0 100755 --- a/tests/data/hello_looper-dev/pytesting/pipestat_test/pipeline_pipestat/count_lines.py +++ b/tests/data/hello_looper-dev/pytesting/pipestat_test/pipeline_pipestat/count_lines.py @@ -1,7 +1,7 @@ import os.path +import sys import pipestat -import sys # Very simple pipeline that calls pipestat # takes arguments invoked during looper submission via command templates diff --git a/tests/divvytests/conftest.py b/tests/divvytests/conftest.py index 2fa0c9049..d42a58724 100644 --- a/tests/divvytests/conftest.py +++ b/tests/divvytests/conftest.py @@ -1,10 +1,10 @@ -import os import glob -import looper.divvy as divvy -import pytest +import os -from looper.divvy import select_divvy_config, DEFAULT_CONFIG_SCHEMA +import pytest +import looper.divvy as divvy +from looper.divvy import select_divvy_config THIS_DIR = os.path.dirname(os.path.abspath(__file__)) DATA_DIR = os.path.join(THIS_DIR, "data/divcfg-master") diff --git a/tests/divvytests/divvy_tests/test_divvy.py b/tests/divvytests/divvy_tests/test_divvy.py index 3a3a6fe98..ce247455b 100644 --- a/tests/divvytests/divvy_tests/test_divvy.py +++ b/tests/divvytests/divvy_tests/test_divvy.py @@ -2,8 +2,9 @@ import pytest from yacman import YAMLConfigManager, load_yaml + from looper.divvy import DEFAULT_COMPUTE_RESOURCES_NAME -from tests.divvytests.conftest import DCC_ATTRIBUTES, FILES, mock_env_missing +from tests.divvytests.conftest import DCC_ATTRIBUTES, FILES class TestDefaultDCC: diff --git a/tests/divvytests/helpers.py b/tests/divvytests/helpers.py index be4b11044..1e9c344f9 100644 --- a/tests/divvytests/helpers.py +++ b/tests/divvytests/helpers.py @@ -12,7 +12,7 @@ def get_random_key(n=10): :return str: Randomize text key """ if not isinstance(n, int): - raise TypeError("Non-integral key size".format(n)) + raise TypeError("Non-integral key size") if n < 1: raise ValueError("Non-positive key size: {}".format(n)) return "".join(random.choice(string.ascii_letters) for _ in range(n)) diff --git a/tests/divvytests/regression/test_write_script.py b/tests/divvytests/regression/test_write_script.py index 0a82753c1..4cc68331b 100644 --- a/tests/divvytests/regression/test_write_script.py +++ b/tests/divvytests/regression/test_write_script.py @@ -1,8 +1,10 @@ """Specific case tests for writing submission script""" -from copy import deepcopy import random +from copy import deepcopy + import pytest + from looper.divvy import ComputingConfiguration, select_divvy_config from tests.divvytests.helpers import get_random_key diff --git a/tests/divvytests/test_divvy_simple.py b/tests/divvytests/test_divvy_simple.py index bbdc1e44e..912716034 100644 --- a/tests/divvytests/test_divvy_simple.py +++ b/tests/divvytests/test_divvy_simple.py @@ -1,9 +1,6 @@ -import looper.divvy as divvy import os -import pytest -from collections import OrderedDict -from yacman import YAMLConfigManager +import looper.divvy as divvy from looper.divvy import select_divvy_config # For interactive debugging: diff --git a/tests/smoketests/test_cli_validation.py b/tests/smoketests/test_cli_validation.py index 82e6b4eb1..84872e4fc 100644 --- a/tests/smoketests/test_cli_validation.py +++ b/tests/smoketests/test_cli_validation.py @@ -1,17 +1,16 @@ """Tests for the validation of looper CLI use""" -import argparse from typing import * import pytest + +from looper.cli_pydantic import main from looper.const import ( - SAMPLE_SELECTION_ATTRIBUTE_OPTNAME, SAMPLE_EXCLUSION_OPTNAME, SAMPLE_INCLUSION_OPTNAME, + SAMPLE_SELECTION_ATTRIBUTE_OPTNAME, ) -from tests.conftest import print_standard_stream, subp_exec, test_args_expansion -from looper.cli_pydantic import main - +from tests.conftest import test_args_expansion SUBCOMMANDS_WHICH_SUPPORT_SKIP_XOR_LIMIT = ["run", "destroy"] diff --git a/tests/smoketests/test_other.py b/tests/smoketests/test_other.py index bc23bfb64..62669ca2f 100644 --- a/tests/smoketests/test_other.py +++ b/tests/smoketests/test_other.py @@ -1,16 +1,16 @@ import os.path +import pandas as pd import pytest from peppy import Project +from looper.cli_pydantic import main from looper.exceptions import ( - PipestatConfigurationException, - MisconfigurationException, LooperReportError, + MisconfigurationException, + PipestatConfigurationException, ) from tests.conftest import * -from looper.cli_pydantic import main -import pandas as pd def _make_flags_pipestat(cfg, type, pipeline_name): @@ -63,7 +63,6 @@ def _make_flags(cfg, type, pipeline_name): class TestLooperPipestat: - @pytest.mark.parametrize("cmd", ["report", "table", "check"]) def test_fail_no_pipestat_config(self, prep_temp_pep, cmd): "report, table, and check should fail if pipestat is NOT configured." diff --git a/tests/smoketests/test_run.py b/tests/smoketests/test_run.py index c35d59470..a9871dff5 100644 --- a/tests/smoketests/test_run.py +++ b/tests/smoketests/test_run.py @@ -4,11 +4,11 @@ from peppy.const import * from yaml import dump +from looper.cli_pydantic import main from looper.const import * from looper.project import Project -from tests.conftest import * from looper.utils import * -from looper.cli_pydantic import main +from tests.conftest import * CMD_STRS = ["string", " --string", " --sjhsjd 212", "7867#$@#$cc@@"] @@ -160,7 +160,6 @@ def test_looper_single_pipeline(self, prep_temp_pep): tp = prep_temp_pep with mod_yaml_data(tp) as config_data: - pifaces = config_data[PIPELINE_INTERFACES_KEY] config_data[PIPELINE_INTERFACES_KEY] = pifaces[0] @@ -593,7 +592,6 @@ def test_cli_compute_overwrites_yaml_settings_spec(self, prep_temp_pep, cmd): reason="This functionality requires input from the user. Causing pytest to error if run without -s flag" ) class TestLooperConfig: - def test_init_config_file(self, prep_temp_pep): tp = prep_temp_pep x = ["init", "--force-yes"] diff --git a/tests/test_clean.py b/tests/test_clean.py index 17a1fa9d0..be70e2c9d 100644 --- a/tests/test_clean.py +++ b/tests/test_clean.py @@ -1,9 +1,11 @@ """Tests for looper's cleaning functionality""" import argparse + import pytest -from looper.looper import Cleaner + from looper import Project +from looper.looper import Cleaner def build_namespace(**kwargs): diff --git a/tests/test_comprehensive.py b/tests/test_comprehensive.py index 41c73ea0c..d374d1fc1 100644 --- a/tests/test_comprehensive.py +++ b/tests/test_comprehensive.py @@ -2,19 +2,15 @@ import pytest from peppy.const import * -from yaml import dump +from pipestat import PipestatManager +from pipestat.exceptions import RecordNotFoundError +from yaml import dump, safe_load +from looper.cli_pydantic import main from looper.const import * -from looper.project import Project -from tests.conftest import * from looper.utils import * -from looper.cli_pydantic import main +from tests.conftest import * from tests.smoketests.test_run import is_connected -from tempfile import TemporaryDirectory -from pipestat import PipestatManager -from pipestat.exceptions import RecordNotFoundError - -from yaml import dump, safe_load CMD_STRS = ["string", " --string", " --sjhsjd 212", "7867#$@#$cc@@"] diff --git a/tests/test_desired_sample_range.py b/tests/test_desired_sample_range.py index 97c662561..bd1394596 100644 --- a/tests/test_desired_sample_range.py +++ b/tests/test_desired_sample_range.py @@ -1,8 +1,9 @@ """Tests for determination of desired sample range""" from itertools import chain + import pytest -from hypothesis import given, strategies as st + from looper.utils import ( NatIntervalException, desired_samples_range_limited, diff --git a/tests/test_natural_range.py b/tests/test_natural_range.py index 76f899539..d2c06b22c 100644 --- a/tests/test_natural_range.py +++ b/tests/test_natural_range.py @@ -1,10 +1,12 @@ """Tests for the natural numbers range data type""" from typing import * + import pytest -from hypothesis import given, strategies as st -from looper.utils import NatIntervalException, NatIntervalInclusive +from hypothesis import given +from hypothesis import strategies as st +from looper.utils import NatIntervalException, NatIntervalInclusive gen_pos_int = st.integers(min_value=1) gen_opt_int = st.one_of(st.integers(), st.none()) @@ -71,8 +73,9 @@ def test_from_string__just_delimiter__does_not_parse(legit_delim, upper_bound): @given( lo_hi_upper=st.tuples(gen_opt_int, gen_opt_int, st.integers()).filter( - lambda t: (t[0] is not None or t[1] is not None) - and any(is_non_pos(n) for n in t) + lambda t: ( + (t[0] is not None or t[1] is not None) and any(is_non_pos(n) for n in t) + ) ) ) def test_from_string__nonpositive_values__fail_with_expected_error( From cb6513d0134081a4c4389e170e7b6555f1f4c139 Mon Sep 17 00:00:00 2001 From: nsheff Date: Wed, 11 Feb 2026 11:26:39 -0500 Subject: [PATCH 140/163] merge --- looper/cli_pydantic.py | 14 +++++++++----- pyproject.toml | 5 +++++ 2 files changed, 14 insertions(+), 5 deletions(-) diff --git a/looper/cli_pydantic.py b/looper/cli_pydantic.py index 7d7037dfc..1ad7b94b2 100644 --- a/looper/cli_pydantic.py +++ b/looper/cli_pydantic.py @@ -125,13 +125,17 @@ def run_looper(args: TopLevelParser, parser: ArgumentParser, test_args=None): cli_use_errors = validate_post_parse(subcommand_args) if cli_use_errors: - parser.print_help(sys.stderr) - parser.error( - f"{len(cli_use_errors)} CLI use problem(s): {', '.join(cli_use_errors)}" - ) + if parser: + parser.print_help(sys.stderr) + parser.error( + f"{len(cli_use_errors)} CLI use problem(s): {', '.join(cli_use_errors)}" + ) + else: + raise ValueError(f"CLI use problem(s): {', '.join(cli_use_errors)}") if subcommand_name is None: - parser.print_help(sys.stderr) + if parser: + parser.print_help(sys.stderr) sys.exit(1) if subcommand_name == "init": diff --git a/pyproject.toml b/pyproject.toml index e338067a1..2306d22bf 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -45,6 +45,7 @@ Homepage = "https://github.com/pepkit/looper" [project.scripts] looper = "looper.cli_pydantic:main_cli" +looper-serve = "looper.api.main:main" divvy = "looper.__main__:divvy_main" [build-system] @@ -52,6 +53,10 @@ requires = ["hatchling"] build-backend = "hatchling.build" [project.optional-dependencies] +api = [ + "fastapi>=0.100.0", + "uvicorn>=0.22.0", +] test = [ "hypothesis>=6.84.3", "mock", From ae74628e05c877a33cc26a6740961734ae61f37e Mon Sep 17 00:00:00 2001 From: nsheff Date: Wed, 11 Feb 2026 11:34:31 -0500 Subject: [PATCH 141/163] make serve alpha; add jobs endpoint and fix some api bugs --- looper/api/main.py | 45 +++++++++++++++++++++++++++++++-------------- pyproject.toml | 1 - 2 files changed, 31 insertions(+), 15 deletions(-) diff --git a/looper/api/main.py b/looper/api/main.py index 963aa1cda..315d38252 100644 --- a/looper/api/main.py +++ b/looper/api/main.py @@ -1,18 +1,16 @@ -from argparse import ArgumentParser, Namespace import secrets -from typing import Dict, TypeAlias +from argparse import ArgumentParser, Namespace +from typing import Dict import fastapi import pydantic import uvicorn +from fastapi import FastAPI, HTTPException -from fastapi import FastAPI - +from looper.api import stdout_redirects from looper.cli_pydantic import run_looper from looper.command_models.commands import SUPPORTED_COMMANDS, TopLevelParser -from looper.api import stdout_redirects - stdout_redirects.enable_proxy() @@ -23,12 +21,16 @@ class Job(pydantic.BaseModel): ) status: str = pydantic.Field( default="in_progress", - description="The current status of the job. Can be either `in_progress` or `completed`.", + description="The current status of the job. Can be `in_progress`, `completed`, or `failed`.", ) console_output: str | None = pydantic.Field( default=None, description="Console output produced by `looper` while performing the requested action", ) + error: str | None = pydantic.Field( + default=None, + description="Error message if the job failed", + ) app = FastAPI(validate_model=True) @@ -39,13 +41,17 @@ def background_async(top_level_model: TopLevelParser, job_id: str) -> None: argparse_namespace = create_argparse_namespace(top_level_model) output_stream = stdout_redirects.redirect() - run_looper(argparse_namespace, None, True) - - # Here, we should call `stdout_redirects.stop_redirect()`, but that fails for reasons discussed - # in the following issue: https://github.com/python/cpython/issues/80374 - # But this *seems* not to pose any problems. - jobs[job_id].status = "completed" - jobs[job_id].console_output = output_stream.getvalue() + try: + run_looper(argparse_namespace, parser=None) + jobs[job_id].status = "completed" + except Exception as e: + jobs[job_id].status = "failed" + jobs[job_id].error = str(e) + finally: + # Here, we should call `stdout_redirects.stop_redirect()`, but that fails for reasons discussed + # in the following issue: https://github.com/python/cpython/issues/80374 + # But this *seems* not to pose any problems. + jobs[job_id].console_output = output_stream.getvalue() def create_argparse_namespace(top_level_model: TopLevelParser) -> Namespace: @@ -103,9 +109,20 @@ async def main_endpoint( description="Retrieve the status of a job based on its unique identifier.", ) async def get_status(job_id: str) -> Job: + if job_id not in jobs: + raise HTTPException(status_code=404, detail=f"Job '{job_id}' not found") return jobs[job_id] +@app.get( + "/jobs", + summary="List all jobs", + description="Retrieve a list of all submitted jobs with their IDs and statuses.", +) +async def list_jobs() -> Dict: + return {"jobs": [{"id": job.id, "status": job.status} for job in jobs.values()]} + + def main() -> None: parser = ArgumentParser("looper-serve", description="Run looper HTTP API server") parser.add_argument( diff --git a/pyproject.toml b/pyproject.toml index 2306d22bf..4edf41080 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -45,7 +45,6 @@ Homepage = "https://github.com/pepkit/looper" [project.scripts] looper = "looper.cli_pydantic:main_cli" -looper-serve = "looper.api.main:main" divvy = "looper.__main__:divvy_main" [build-system] From f696255ce84b7d84dc515bc9ff7c48541894f64f Mon Sep 17 00:00:00 2001 From: nsheff Date: Wed, 11 Feb 2026 11:38:37 -0500 Subject: [PATCH 142/163] format --- looper/utils.py | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/looper/utils.py b/looper/utils.py index 2aabcf7c0..2b3a43eb8 100644 --- a/looper/utils.py +++ b/looper/utils.py @@ -754,14 +754,12 @@ def looper_config_tutorial() -> bool: console.print("\n") - console.print( - f"""\ + console.print(f"""\ [yellow]pep_config:[/yellow] {cfg["pep_config"]} [yellow]output_dir:[/yellow] {cfg["output_dir"]} [yellow]pipeline_interfaces:[/yellow] - {piface_paths} -""" - ) +""") for piface_path in piface_paths: if not os.path.exists(piface_path): From 0c0d91cf23d552dd9ca19583f3e2183913d2941b Mon Sep 17 00:00:00 2001 From: nsheff Date: Wed, 11 Feb 2026 11:43:10 -0500 Subject: [PATCH 143/163] cleanup stdout redirects --- looper/api/stdout_redirects.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/looper/api/stdout_redirects.py b/looper/api/stdout_redirects.py index f430b1b42..924c9c5a6 100644 --- a/looper/api/stdout_redirects.py +++ b/looper/api/stdout_redirects.py @@ -1,3 +1,4 @@ +# ruff: noqa: E731 # Copied from https://gitlab.com/yquemener/stdout-redirects # # copied from https://stackoverflow.com/a/43667367/1193986 @@ -12,6 +13,7 @@ # I guess that means the result is CC-by-SA +import copy import sys import threading from io import StringIO @@ -55,7 +57,7 @@ class LocalProxy: def __init__( self, - local: Union[Any, "LocalProxy", "LocalStack"], + local: Union[Any, "LocalProxy"], name: Optional[str] = None, ) -> None: object.__setattr__(self, "_LocalProxy__local", local) From 3c420a15d33d412fd3d63da9a3d4f51cd71ca5e3 Mon Sep 17 00:00:00 2001 From: nsheff Date: Wed, 11 Feb 2026 11:59:20 -0500 Subject: [PATCH 144/163] Use better shell inference for submission commands. See #282 --- looper/conductor.py | 18 ++++++++++++++++-- pyproject.toml | 2 +- 2 files changed, 17 insertions(+), 3 deletions(-) diff --git a/looper/conductor.py b/looper/conductor.py index da0228e1f..984447e4d 100644 --- a/looper/conductor.py +++ b/looper/conductor.py @@ -3,6 +3,7 @@ import importlib import logging import os +import shlex import signal import subprocess import sys @@ -470,10 +471,23 @@ def submit(self, force: bool = False) -> bool: _LOGGER.info("Dry run, not submitted") elif self._rendered_ok: sub_cmd = self.prj.dcc.compute["submission_command"] - submission_command = "{} {}".format(sub_cmd, script) + + # Detect shell metacharacters that require shell=True + shell_chars = set('|&;<>()$`\\"\' \t\n*?[#~') + needs_shell = any(c in sub_cmd for c in shell_chars) and sub_cmd != "." + # Capture submission command return value so that we can # intercept and report basic submission failures; #167 - process = subprocess.Popen(submission_command, shell=True) + if sub_cmd == ".": + # Direct execution: run script through bash without a submission wrapper + _LOGGER.debug("Direct execution via bash: %s", script) + process = subprocess.Popen(["/bin/bash", script]) + elif needs_shell: + _LOGGER.debug("Shell execution (detected shell syntax): %s %s", sub_cmd, script) + process = subprocess.Popen(f"{sub_cmd} {script}", shell=True, executable="/bin/bash") + else: + _LOGGER.debug("Direct execution: %s %s", sub_cmd, script) + process = subprocess.Popen(shlex.split(sub_cmd) + [script]) self.process_id = process.pid process.wait() if process.returncode != 0: diff --git a/pyproject.toml b/pyproject.toml index 4edf41080..8e1ceec6c 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -35,7 +35,7 @@ dependencies = [ "pyyaml>=3.12", "rich>=9.10.0", "ubiquerg>=0.8.1", - "yacman>=0.9.5", + "yacman @ git+https://github.com/databio/yacman.git@dev", # TODO: revert to yacman>=0.9.5 after release "pydantic-argparse>=0.9.0", "psutil", ] From a2ee46bd6da826c90f3836db532adeb479d6b650 Mon Sep 17 00:00:00 2001 From: nsheff Date: Wed, 11 Feb 2026 12:04:28 -0500 Subject: [PATCH 145/163] update actions, etc --- .github/workflows/black.yml | 4 ++-- .github/workflows/python-publish.yml | 2 +- .github/workflows/run-pytest.yml | 2 +- looper/conductor.py | 12 +++++++++--- pyproject.toml | 3 +++ 5 files changed, 16 insertions(+), 7 deletions(-) diff --git a/.github/workflows/black.yml b/.github/workflows/black.yml index 7a5062633..46241541a 100644 --- a/.github/workflows/black.yml +++ b/.github/workflows/black.yml @@ -6,10 +6,10 @@ jobs: lint: runs-on: ubuntu-latest steps: - - uses: actions/checkout@v4 + - uses: actions/checkout@v6 - uses: actions/setup-python@v5 with: python-version: "3.12" - - run: pip install ruff + - run: pip install ruff==0.15.0 - run: ruff check . - run: ruff format --check . diff --git a/.github/workflows/python-publish.yml b/.github/workflows/python-publish.yml index a637870e8..692182e85 100644 --- a/.github/workflows/python-publish.yml +++ b/.github/workflows/python-publish.yml @@ -13,7 +13,7 @@ jobs: id-token: write steps: - - uses: actions/checkout@v4 + - uses: actions/checkout@v6 - name: Set up Python uses: actions/setup-python@v5 with: diff --git a/.github/workflows/run-pytest.yml b/.github/workflows/run-pytest.yml index 91d8774fc..d497a22a7 100644 --- a/.github/workflows/run-pytest.yml +++ b/.github/workflows/run-pytest.yml @@ -15,7 +15,7 @@ jobs: os: [ubuntu-latest, macos-latest] steps: - - uses: actions/checkout@v4 + - uses: actions/checkout@v6 - name: Set up Python ${{ matrix.python-version }} uses: actions/setup-python@v5 diff --git a/looper/conductor.py b/looper/conductor.py index 984447e4d..962351bda 100644 --- a/looper/conductor.py +++ b/looper/conductor.py @@ -473,7 +473,7 @@ def submit(self, force: bool = False) -> bool: sub_cmd = self.prj.dcc.compute["submission_command"] # Detect shell metacharacters that require shell=True - shell_chars = set('|&;<>()$`\\"\' \t\n*?[#~') + shell_chars = set("|&;<>()$`\\\"' \t\n*?[#~") needs_shell = any(c in sub_cmd for c in shell_chars) and sub_cmd != "." # Capture submission command return value so that we can @@ -483,8 +483,14 @@ def submit(self, force: bool = False) -> bool: _LOGGER.debug("Direct execution via bash: %s", script) process = subprocess.Popen(["/bin/bash", script]) elif needs_shell: - _LOGGER.debug("Shell execution (detected shell syntax): %s %s", sub_cmd, script) - process = subprocess.Popen(f"{sub_cmd} {script}", shell=True, executable="/bin/bash") + _LOGGER.debug( + "Shell execution (detected shell syntax): %s %s", + sub_cmd, + script, + ) + process = subprocess.Popen( + f"{sub_cmd} {script}", shell=True, executable="/bin/bash" + ) else: _LOGGER.debug("Direct execution: %s %s", sub_cmd, script) process = subprocess.Popen(shlex.split(sub_cmd) + [script]) diff --git a/pyproject.toml b/pyproject.toml index 8e1ceec6c..0864c1e03 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -51,6 +51,9 @@ divvy = "looper.__main__:divvy_main" requires = ["hatchling"] build-backend = "hatchling.build" +[tool.hatch.metadata] +allow-direct-references = true + [project.optional-dependencies] api = [ "fastapi>=0.100.0", From 8130304dd67d4d3d3ef5f0e6cd97e7b8ca584add Mon Sep 17 00:00:00 2001 From: nsheff Date: Thu, 12 Feb 2026 07:53:05 -0500 Subject: [PATCH 146/163] remove old docs --- docs/README.md | 57 -- docs/advanced.md | 85 -- docs/autodoc_build/.gitignore | 2 - docs/changelog.md | 463 ---------- docs/concentric-templates.md | 60 -- docs/config-files.md | 44 - docs/containers.md | 64 -- docs/contributing.md | 10 - docs/defining-a-project.md | 42 - docs/derived-columns.md | 74 -- docs/divvy/README.md | 66 -- docs/divvy/adapters.md | 18 - docs/divvy/configuration.md | 97 -- docs/divvy/containers.md | 76 -- docs/divvy/default-packages.md | 6 - docs/faq.md | 43 - docs/features.md | 49 - docs/grouping-jobs.md | 11 - docs/how-to-merge-inputs.md | 60 -- docs/img/HTML.svg | 526 ----------- docs/img/cli.svg | 379 -------- docs/img/collate.svg | 133 --- docs/img/computing.svg | 756 --------------- docs/img/divvy-connect.svg | 648 ------------- docs/img/divvy-merge.svg | 1066 ---------------------- docs/img/divvy_bug.svg | 103 --- docs/img/divvy_logo.svg | 153 ---- docs/img/divvy_logo_dark.svg | 153 ---- docs/img/favicon.ico | Bin 3186 -> 0 bytes docs/img/favicon_looper.ico | Bin 3186 -> 0 bytes docs/img/favicon_looper.svg | 72 -- docs/img/file_yaml.svg | 394 -------- docs/img/flexible_pipelines.svg | 270 ------ docs/img/job_monitoring.svg | 286 ------ docs/img/looper_bug.svg | 94 -- docs/img/looper_bug_dark.svg | 94 -- docs/img/looper_logo.svg | 130 --- docs/img/looper_logo_dark.svg | 122 --- docs/img/looper_logo_text.svg | 110 --- docs/img/modular.svg | 118 --- docs/img/nodivvy.svg | 646 ------------- docs/img/resources.svg | 635 ------------- docs/img/subprojects.svg | 293 ------ docs/implied-columns.md | 49 - docs/initialize.md | 21 - docs/looper-config.md | 36 - docs/looper-report.md | 13 - docs/multiple-pipelines.md | 22 - docs/parameterizing-pipelines.md | 69 -- docs/pipeline-interface-specification.md | 222 ----- docs/pipeline-tiers.md | 19 - docs/pipestat.md | 175 ---- docs/pre-submission-hooks.md | 282 ------ docs/running-a-pipeline.md | 19 - docs/running-on-a-cluster.md | 24 - docs/sample-annotation-sheet.md | 47 - docs/support.md | 5 - docs/usage.md | 669 -------------- docs/usage.template | 26 - docs/using-geofetch.md | 35 - docs/variable-namespaces.md | 120 --- docs/writing-a-pipeline-interface.md | 34 - docs_jupyter/build/.gitignore | 2 - docs_jupyter/cli_divvy.ipynb | 390 -------- docs_jupyter/debug_divvy.ipynb | 56 -- docs_jupyter/hello-world.ipynb | 524 ----------- docs_jupyter/tutorial_divvy.ipynb | 413 --------- mkdocs.yml | 59 -- 68 files changed, 11839 deletions(-) delete mode 100644 docs/README.md delete mode 100644 docs/advanced.md delete mode 100644 docs/autodoc_build/.gitignore delete mode 100644 docs/changelog.md delete mode 100644 docs/concentric-templates.md delete mode 100644 docs/config-files.md delete mode 100644 docs/containers.md delete mode 100644 docs/contributing.md delete mode 100644 docs/defining-a-project.md delete mode 100644 docs/derived-columns.md delete mode 100644 docs/divvy/README.md delete mode 100644 docs/divvy/adapters.md delete mode 100644 docs/divvy/configuration.md delete mode 100644 docs/divvy/containers.md delete mode 100644 docs/divvy/default-packages.md delete mode 100644 docs/faq.md delete mode 100644 docs/features.md delete mode 100644 docs/grouping-jobs.md delete mode 100644 docs/how-to-merge-inputs.md delete mode 100644 docs/img/HTML.svg delete mode 100644 docs/img/cli.svg delete mode 100644 docs/img/collate.svg delete mode 100644 docs/img/computing.svg delete mode 100644 docs/img/divvy-connect.svg delete mode 100644 docs/img/divvy-merge.svg delete mode 100644 docs/img/divvy_bug.svg delete mode 100644 docs/img/divvy_logo.svg delete mode 100644 docs/img/divvy_logo_dark.svg delete mode 100644 docs/img/favicon.ico delete mode 100644 docs/img/favicon_looper.ico delete mode 100644 docs/img/favicon_looper.svg delete mode 100644 docs/img/file_yaml.svg delete mode 100644 docs/img/flexible_pipelines.svg delete mode 100644 docs/img/job_monitoring.svg delete mode 100644 docs/img/looper_bug.svg delete mode 100644 docs/img/looper_bug_dark.svg delete mode 100644 docs/img/looper_logo.svg delete mode 100644 docs/img/looper_logo_dark.svg delete mode 100644 docs/img/looper_logo_text.svg delete mode 100644 docs/img/modular.svg delete mode 100644 docs/img/nodivvy.svg delete mode 100644 docs/img/resources.svg delete mode 100644 docs/img/subprojects.svg delete mode 100644 docs/implied-columns.md delete mode 100644 docs/initialize.md delete mode 100644 docs/looper-config.md delete mode 100644 docs/looper-report.md delete mode 100644 docs/multiple-pipelines.md delete mode 100644 docs/parameterizing-pipelines.md delete mode 100644 docs/pipeline-interface-specification.md delete mode 100644 docs/pipeline-tiers.md delete mode 100644 docs/pipestat.md delete mode 100644 docs/pre-submission-hooks.md delete mode 100644 docs/running-a-pipeline.md delete mode 100644 docs/running-on-a-cluster.md delete mode 100644 docs/sample-annotation-sheet.md delete mode 100644 docs/support.md delete mode 100644 docs/usage.md delete mode 100644 docs/usage.template delete mode 100644 docs/using-geofetch.md delete mode 100644 docs/variable-namespaces.md delete mode 100644 docs/writing-a-pipeline-interface.md delete mode 100644 docs_jupyter/build/.gitignore delete mode 100644 docs_jupyter/cli_divvy.ipynb delete mode 100644 docs_jupyter/debug_divvy.ipynb delete mode 100644 docs_jupyter/hello-world.ipynb delete mode 100644 docs_jupyter/tutorial_divvy.ipynb delete mode 100644 mkdocs.yml diff --git a/docs/README.md b/docs/README.md deleted file mode 100644 index 026059840..000000000 --- a/docs/README.md +++ /dev/null @@ -1,57 +0,0 @@ -# pipeline submitting engine - -[![PEP compatible](https://pepkit.github.io/img/PEP-compatible-green.svg)](http://pepkit.github.io) - -## What is looper? - -Looper is a job submitting engine. Looper deploys arbitrary shell commands for each sample in a [standard PEP project](https://pepkit.github.io/docs/home/). You can think of looper as providing a single user interface to running, monitoring, and managing all of your sample-intensive research projects the same way, regardless of data type or pipeline used. - -## What makes looper better? - -Looper **decouples job handling from the pipeline process**. In a typical pipeline, job handling (managing how individual jobs are submitted to a cluster) is delicately intertwined with actual pipeline commands (running the actual code for a single compute job). In contrast, the looper approach is modular: looper *only* manages job submission. This approach leads to several advantages compared with the traditional integrated approach: - -1. pipelines do not need to independently re-implement job handling code, which is shared. -2. every project uses a universal structure, so datasets can move from one pipeline to another. -3. users must learn only a single interface that works with any project for any pipeline. -4. running just one or two samples/jobs is simpler, and does not require a distributed compute environment. - - - - -## Installing - -Releases are posted as [GitHub releases](https://github.com/pepkit/looper/releases), or you can install using `pip`: - - -```console -pip install --user looper -``` - -Update with: - -```console -pip install --user --upgrade looper -``` - -If the `looper` executable in not automatically in your `$PATH`, add the following line to your `.bashrc` or `.profile`: - -```console -export PATH=~/.local/bin:$PATH -``` - -## Quick start - -To test `looper`, follow the [Hello Looper example repository](https://github.com/databio/hello_looper) to run your first looper project: - - -```console -# download and unzip the hello_looper repository -wget https://github.com/databio/hello_looper/archive/master.zip -unzip master.zip - -# Run looper: -cd hello_looper-master -looper run --looper-config .looper.yaml project/project_config.yaml -``` - -Detailed explanation of results is in the [Hello world tutorial](hello-world.md). diff --git a/docs/advanced.md b/docs/advanced.md deleted file mode 100644 index e2d653bc1..000000000 --- a/docs/advanced.md +++ /dev/null @@ -1,85 +0,0 @@ -# Advanced features - -## Handling multiple input files -Sometimes you have multiple input files that you want to merge for one sample. -For example, a common use case is a single library that was spread across multiple sequencing lanes, -yielding multiple input files that need to be merged and then run through the pipeline as one unit. -Rather than putting multiple lines in your sample annotation sheet, which causes conceptual and analytical challenges, -we introduce **two ways to merge inputs**: - -1. Use *shell expansion characters* (`*` or `[]`) in your `data_source` definition or filename; -for relatively simple merge cases this works well. -2. Specify a *merge table*, which maps input files to samples for samples with more than one input file. -To accommodate complex merger use cases, this is infinitely customizable. - -To do the first option, simply change data source specification: - -```yaml -data_sources: - data_R1: "${DATA}/{id}_S{nexseq_num}_L00*_R1_001.fastq.gz" - data_R2: "${DATA}/{id}_S{nexseq_num}_L00*_R2_001.fastq.gz" -``` - -For the second option, provide *in the `metadata` section* of your project config file a path to merge table file: - -```yaml -metadata: - merge_table: mergetable.csv -``` - -Make sure the `sample_name` column of this table matches, and then include any columns needed to point to the data. -Looper will automatically include all of these files as input passed to the pipelines. - -***Warning***: do not use *both* of these options for the same sample at the same time; that will lead to multiple mergers. - -**Note**: mergers are *not* the way to handle different functional/conceptual *kinds* of input files (e.g., `read1` and `read2` for a sample sequenced with a paired-end protocol). -Such cases should be handled as *separate derived columns* in the main sample annotation sheet if they're different arguments to the pipeline. - - -## Connecting to multiple pipelines - -If you have a project that contains samples of different types, then you may need to specify multiple pipeline repositories to your project. -Starting in version 0.5, looper can handle a priority list of pipelines. -Starting with version 0.6, each path should be directly to a pipeline interface file. - -**Example**: - -```yaml -metadata: - pipeline_interfaces: [pipeline_iface1.yaml, pipeline_iface2.yaml] -``` - -In this case, for a given sample, `looper` will first look in `pipeline_iface1.yaml` -to see if an appropriate (i.e., protocol-matched) pipeline exists for this sample type. -If one is found, `looper` will use that pipeline (or set of pipelines, as specified in the `protocol_mapping`). -Once a pipeline is submitted any remaining interface files will be ignored. -Until an appropriate pipeline is found, each interface file will be considered in succession. -If no suitable pipeline is found in any interface, the sample will be skipped. -In other words, the `pipeline_interfaces` value specifies a *prioritized* search list. - -## Set up tab completion - -Source `bash_complete.sh` to your `~/.bashrc` to get basic tab completion for Looper. - -Then, simply type `looper ` to see a list of commands and `looper comma` to get autocompletion for specific commands. - -Source script to add to `~/.bashrc`: -```bash -# Begin looper bash autocomplete -_looper_autocomplete() -{ - local cur prev opts1 - cur=${COMP_WORDS[COMP_CWORD]} - prev=${COMP_WORDS[COMP_CWORD-1]} - opts1=$(looper --commands) - case ${COMP_CWORD} in - 1) - COMPREPLY=($(compgen -W "${opts1}" -- ${cur})) - ;; - 2) - COMPREPLY=() - ;; - esac -} && complete -o bashdefault -o default -F _looper_autocomplete looper -# end looper bash autocomplete -``` \ No newline at end of file diff --git a/docs/autodoc_build/.gitignore b/docs/autodoc_build/.gitignore deleted file mode 100644 index d6b7ef32c..000000000 --- a/docs/autodoc_build/.gitignore +++ /dev/null @@ -1,2 +0,0 @@ -* -!.gitignore diff --git a/docs/changelog.md b/docs/changelog.md deleted file mode 100644 index 268f8e0e9..000000000 --- a/docs/changelog.md +++ /dev/null @@ -1,463 +0,0 @@ -# Changelog - -This project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html) and [Keep a Changelog](https://keepachangelog.com/en/1.0.0/) format. - -## [2.0.3] -- 2025-09-23 -### Fixed -- Fixed [#543](https://github.com/pepkit/looper/issues/543) -- Fixed [#547](https://github.com/pepkit/looper/issues/547) -- Fixed [#548](https://github.com/pepkit/looper/issues/548) - - -## [2.0.2] -- 2025-09-22 -### Changed -- Remove veracitools dependency from the requirements. - -## [2.0.1] -- 2025-03-05 - -### Changed -- update ubiquerg>=0.8.1 - -### Fixed -- [#541](https://github.com/pepkit/looper/issues/541) - -## [2.0.0] -- 2025-01-16 - -This release breaks backwards compatibility for Looper versions < 2.0.0 - -### Fixed -- divvy init [#520](https://github.com/pepkit/looper/issues/520) -- replaced deprecated PEPHubClient function, `_load_raw_pep` with `.load_raw_pep` -- looper cli parameters now take priority as originally intended [#518](https://github.com/pepkit/looper/issues/518) -- fix divvy inspect -- remove printed dictionary at looper finish [#511](https://github.com/pepkit/looper/issues/511) -- fix [#536](https://github.com/pepkit/looper/issues/536) -- fix [#522](https://github.com/pepkit/looper/issues/522) -- fix [#537](https://github.com/pepkit/looper/issues/537) -- fix [#534](https://github.com/pepkit/looper/issues/534) - -### Changed -- `--looper-config` is now `--config`, `-c`. [#455](https://github.com/pepkit/looper/issues/455) -- A pipeline interface now consolidates a `sample_interface` and a `project_interface` [#493](https://github.com/pepkit/looper/issues/493) -- Updated documentation for Looper 2.0.0, removing previous versions [pepspec PR #34](https://github.com/pepkit/pepspec/pull/34) -- remove position based argument for divvy config, must use --config or run as default config - -### Added -- `looper init` tutorial [#466](https://github.com/pepkit/looper/issues/466) -- looper config allows for `pephub_path` in pipestat config section of `.looper.yaml` [#519](https://github.com/pepkit/looper/issues/519) -- improve error messaging for bad/malformed looper configurations [#515](https://github.com/pepkit/looper/issues/515) -- add shortform argument for --package (alias is now -p) - - -## [1.9.1] -- 2024-07-18 - -### Changed -- ensure peppy requirement peppy>=0.40.0,<=0.40.2 - -## [1.9.0] -- 2024-06-26 - -### Added -- user can now add cli modifiers to looper config instead of PEP project [#270](https://github.com/pepkit/looper/issues/270) -- pipeline interfaces no longer must be nested under sample and project keys within looper config file [#465](https://github.com/pepkit/looper/issues/465) -- var_templates can now be hierarchical [#334](https://github.com/pepkit/looper/issues/334) -- looper can now gracefully halt spawned subprocesses when the user sends a keyboard interrupt [#37](https://github.com/pepkit/looper/issues/37) - -## [1.8.1] -- 2024-06-06 - -### Fixed -- added `-v` and `--version` to the CLI -- fixed running project level with `--project` argument - -## [1.8.0] -- 2024-06-04 - -### Added -- looper destroy now destroys individual results when pipestat is configured: https://github.com/pepkit/looper/issues/469 -- comprehensive smoketests: https://github.com/pepkit/looper/issues/464 -- allow rerun to work on both failed or waiting flags: https://github.com/pepkit/looper/issues/463 - -### Changed -- Migrated `argparse` CLI definition to a pydantic basis for all commands. See: https://github.com/pepkit/looper/issues/438 -- during project load, check if PEP file path is a file first, then check if it is a registry path: https://github.com/pepkit/looper/issues/456 -- Looper now uses FutureYamlConfigManager due to the yacman refactor v0.9.3: https://github.com/pepkit/looper/issues/452 - -### Fixed -- inferring project name when loading PEP from csv: https://github.com/pepkit/looper/issues/484 -- fix inconsistency resolving pipeline interface paths if multiple paths are supplied: https://github.com/pepkit/looper/issues/474 -- fix bug with checking for completed flags: https://github.com/pepkit/looper/issues/470 -- fix looper destroy not properly destroying all related files: https://github.com/pepkit/looper/issues/468 -- looper rerun now only runs failed jobs as intended: https://github.com/pepkit/looper/issues/467 -- looper inspect now inspects the looper config: https://github.com/pepkit/looper/issues/462 -- Load PEP from CSV: https://github.com/pepkit/looper/issues/456 -- looper now works with sample_table_index https://github.com/pepkit/looper/issues/458 - -## [1.7.1] -- 2024-05-28 - -### Fixed -- pin pipestat version to be between pipestat>=0.8.0,<0.9.0 https://github.com/pepkit/looper/issues/494 - -## [1.7.0] -- 2024-01-26 - -### Added -- `--portable` flag to `looper report` to create a portable version of the html report -- `--lump-j` allows grouping samples into a defined number of jobs - -### Changed -- `--lumpn` is now `--lump-n` -- `--lump` is now `--lump-s` - -## [1.6.0] -- 2023-12-22 - -### Added -- `looper link` creates symlinks for results grouped by record_identifier. It requires pipestat to be configured. [#72](https://github.com/pepkit/looper/issues/72) -- basic tab completion. - -### Changed -- looper now works with pipestat v0.6.0 and greater. -- `looper table`, `check` now use pipestat and therefore require pipestat configuration. [#390](https://github.com/pepkit/looper/issues/390) -- changed how looper configures pipestat [#411](https://github.com/pepkit/looper/issues/411) -- initializing pipeline interface also writes an example `output_schema.yaml` and `count_lines.sh` pipeline - -### Fixed -- filtering via attributes that are integers. - -## [1.5.1] -- 2023-08-14 - -### Fixed -- fix `looper table` failing without `sample.protocol` - -### Changed -- correct `--looper_conifg` to `--looper-config` - -## [1.5.0] -- 2023-08-09 - -### Added - -- ability to use PEPs from PEPhub without downloading project [#341](https://github.com/pepkit/looper/issues/341) -- ability to specify pipeline interfaces inside looper config [Looper Config](https://looper.databio.org/en/dev/how_to_define_looper_config/) -- divvy re-integrated in looper -- divvy inspect -p package -- Looper will now check that the command path provided in the pipeline interface is callable before submitting. - - -### Changed -- initialization of generic pipeline interface available using subcommand `init-piface` -- `looper report` will now use pipestat to generate browsable HTML reports if pipestat is configured. -- looper now works with pipestat v0.5.0. -- Removed --toggle-key functionality. -- Allow for user to input single integer value for --sel-incl or --sel-excl - -## [1.4.3] -- 2023-08-01 - -### Fixed -- Fix regression for var_templates expansion. - -## [1.4.2] -- 2023-07-31 - -### Fixed -- Fix for expanding paths properly. - -## [1.4.1] -- 2023-06-22 - - -## [1.4.0] -- 2023-04-24 - -### Added - -- preliminary support for [pipestat](http://pipestat.databio.org). -- ability to skip samples using `-k` or `--skip` [#367](https://github.com/pepkit/looper/pull/367) -- ability to input a range into `limit` and `skip`[#367](https://github.com/pepkit/looper/pull/367) -- `limit` and `skip` are now both usable with Destroy and Run. [#367](https://github.com/pepkit/looper/pull/367) -- ability to generate generic pipeline interface using `init -p` or `init --piface` [#368](https://github.com/pepkit/looper/pull/368) -- Fixed ability to use custom sample index -- Added `write_custom_template`, a built-in pre-submit plugin for writing templates - -### Changed -- looper now returns nonzero if any samples fail submission -- various other developer changes - -### Deprecated -- `path` variable will be deprecated in favor of `var_templates` [#322](https://github.com/pepkit/looper/issues/322) - -## [1.3.2] -- 2022-02-09 - -### Changed -- Fixed bug with use_2to3 for setuptools compatibility. - -## [1.3.1] -- 2021-06-18 - -### Changed -- If remote schemas are not accessible, the job submission doesn't fail anymore -- Fixed a bug where looper stated "No failed flag found" when a failed flag was found - -### Deprecated -- Fixed and deprecated `looper inspect`. Use `eido inspect` from now on. - - -## [1.3.0] -- 2020-10-07 - -### Added -- New plugin system for pre-submission hooks -- Included plugin functions: `write_sample_yaml`, `write_sample_yaml_prj`, `write_sample_yaml_cwl` and `write_submission_yaml` -- New `var_templates` section for defining variables in the pipeline interface - -### Changed -- Pipeline interface specification was updated to accommodate new `var_templates` section and pre-submission hooks - -### Deprecated -- pipeline interface sections: - - `dynamic_variables_command_template`, which can now be more simply accomplished with a pre-submission hook - - `path`, which is replaced by a more generic `var_templates` section - -## [1.2.1] - 2020-08-26 - -### Added -- Environment variables expansion in custom sample YAML paths; [Issue 273](https://github.com/pepkit/looper/issues/273) -- `dynamic_variables_script_path` key in the pipeline interface. Path, absolute or relative to the pipeline interface file; [Issue 276](https://github.com/pepkit/looper/issues/276) -### Changed -- Resolve project pipeline interface path by making it relative to the config not current directory; [Issue 268](https://github.com/pepkit/looper/issues/268) -### Fixed -- Unclear error when `output_dir` was not provided in a config `looper` section; [Issue 286](https://github.com/pepkit/looper/issues/286) - -## [1.2.0] - 2020-05-26 - -**This version introduced backwards-incompatible changes.** - -### Added -- Commands: - - `init`; initializes `.looper.yaml` file - - `inspect`; inspects `Project` or `Sample` objects - - `table`; writes summary stats table - - `runp`; runs project level pipelines -- Input schemas and output schemas -- `--settings` argument to specify compute resources as a YAML file -- Option to preset CLI options in a dotfile -- `--command-extra` and `--command-extra-override` arguments that append specified string to pipeline commands. These functions supercede the previous `pipeline_config` and `pipeline_args` sections, which are now deprecated. The new method is more universal, and can accomplish the same functionality but more simply, using the built-in PEP machinery to selectively apply commands to samples. -- Option to specify destination of sample YAML in pipeline interface -- `--pipeline_interfaces` argument that allows pipeline interface specification via CLI - -### Changed -- `looper summarize` to `looper report` -- Pipeline interface format changed drastically -- The PyPi name changed from 'loopercli' to 'looper' -- resources section in pipeline interface replaced with `size_dependent_attributes` or `dynamic_variables_command_template`. -- `--compute` can be used to specify arguments other than resources -- `all_input_files` and `required_input_files` keys in pipeline interface moved to the input schema and renamed to `files` and `required_files` -- pipeline interface specification - -## [0.12.6] -- 2020-02-21 - -### Added -- possibility to execute library module as a script: `python -m looper ...` - -### Changed -- in the summary page account for missing values when plotting; the value is disregarded in such a case and plot is still created -- show 50 rows in the summary table -- make links to the summary page relative -- long entries in the sample stats table are truncated with an option to see original value in a popover - -### Fixed -- inactive jQuery dependent components in the status page -- project objects layout in the summary index page -- inactivation of popovers after Bootstrap Table events -- non-homogeneous status flags appearance - -## [0.12.5] -- 2019-12-13 -### Changed -- reduce verbosity of missing options; [Issue 174](https://github.com/pepkit/looper/issues/174) -- switch to [Bootstrap Table](https://bootstrap-table.com/) in the summary index page table and sample status tables - -## [0.12.4] -- 2019-07-18 -### Added -- Ability to declare `required_executables` in a `PipelineInterface`, to trigger a naive "runnability" check for a sample submission -- A possibility to opt out of status page inclusion in the navbar - -### Changed -- The status tables now use DataTables jQuery plugin to make them interactive - -### Fixed -- Navbar links creation - -## [0.12.3] -- 2019-06-20 -### Fixed -- Bug in `Sample` YAML naming, whereby a base `Sample` was being suffixed as a subtype would be, leading to a pipeline argument based on `yaml_file` that did not exist on disk. - -## [0.12.2] -- 2019-06-06 - -### Fixed -- Fixed various bugs related to populating derived attributes, including using attributes like `sample_name` as keys. -- Fixed a bug related to singularity attributes not being passed from a pipeline interface file. -- Fixed several bugs with incorrect version requirements. - -## [0.12.1] -- 2019-05-20 - -### Added -- Made `looper.Sample` include more specific functionality from `peppy` - -### Changed -- Status table creation is possible outside of `looper`. -- In the summary index page the plottable columns list is now scrollable -- Status page relies on the `profile.tsv` file rather than `*.log`; [Issue 159](https://github.com/pepkit/looper/issues/159) - -### Fixed -- In HTML reporting module, do not ignore objects which are neither HTMLs nor images in the summary, e.g. CSVs -- Restore parsing and application of pipeline-level computing resource specification from a pipeline interface file; [Issue 184](https://github.com/pepkit/looper/issues/184) -- Allow `ignore_flags` to properly modulate submission messaging; [Issue 179](https://github.com/pepkit/looper/issues/179) -- Do not display time-like summary columns as the plottable ones; [Issue 182](https://github.com/pepkit/looper/issues/182) - -## [0.12.0] -- 2019-05-03 - -### Added -- First implementation of pipeline interface 'outputs', so pipeline authors can specify items of interest produced by the pipeline. -- Functions and attributes on `Project` to support "outputs" (`interfaces`, `get_interfaces`, `get_outputs`) - -### Changed -- Start "compute" --> "compute_packages" transition -- `get_logger` moved to `peppy` - -### Fixed -- Prevent CLI option duplication in pipeline commands generated -- Make functional CLI spec of particular attribute on which to base selection of a subset of a project's samples ([`peppy` 298](https://github.com/pepkit/peppy/issues/298)) - -## [0.11.1] -- 2019-04-17 - -### Changed -- Improved documentation -- Improved interaction with `peppy` and `divvy` dependencies - -## [0.11] -- 2019-04-17 - -### Added -- Implemented `looper rerun` command. -- Support use of custom `resources` in pipeline's `compute` section -- Listen for itemized compute resource specification on command-line with `--resources` -- Support pointing to `Project` config file with folder path rather than full filepath -- Add `selector-attribute` parameter for more generic sample selection. - -### Changed -- Switched to a Jinja-style templating system for summary output -- Made various UI changes to adapt to `caravel` use. -- Using `attmap` for "attribute-style key-vale store" implementation -- Removed Python 3.4 support. -- UI: change parameter names `in/exclude-samples` to `selector-in/exclude`. - -## [0.10.0] -- 2018-12-20 - -### Changed -- `PipelineInterface` now derives from `peppy.AttributeDict`. -- On `PipelineInterface`, iteration over pipelines now is with `iterpipes`. -- Rename `parse_arguments` to `build_parser`, which returns `argparse.ArgumentParser` object -- Integers in HTML reports are made more human-readable by including commas. -- Column headers in HTML reports are now stricly for sorting; there's a separate list for plottable columns. -- More informative error messages -- HTML samples list is fully populated. -- Existence of an object lacking an anchor image is no longer problematic for `summarize`. -- Basic package test in Python 3 now succeeds: `python3 setup.py test`. - -## [v0.9.2] -- 2018-11-12 - -### Changed -- Fixed bugs with `looper summarize` when no summarizers were present -- Added CLI flag to force `looper destroy` for programmatic access -- Fixed a bug for samples with duplicate names -- Added new display features (graphs, table display) for HTML summary output. - - -## [0.9.1] -- 2018-06-30 - -### Changed -- Fixed several bugs with `looper summarize` that caused failure on edge cases. - -## [0.9.0] -- 2018-06-25 - -### Added -- Support for custom summarizers -- Add `allow-duplicate-names` command-line options -- Allow any variables in environment config files or other `compute` sections to be used in submission templates. This allows looper to be used with containers. -- Add nice universal project-level HTML reporting - -## [0.8.1] -- 2018-04-02 - -### Changed -- Minor documentation and packaging updates for first Pypi release. -- Fix a bug that incorrectly mapped protocols due to case sensitive issues -- Fix a bug with `report_figure` that made it output pandas code - - -## [0.8.0] -- 2018-01-19 - -### Changed -- Use independent `peppy` package, replacing `models` module for core data types. -- Integrate `ProtocolInterface` functionality into `PipelineInterface`. - -## [0.7.2] -- 2017-11-16 -### Changed -- Correctly count successful command submissions when not using `--dry-run`. - -## [0.7.1] -- 2017-11-15 - -### Changed -- No longer falsely display that there's a submission failure. -- Allow non-string values to be unquoted in the `pipeline_args` section. - -## [0.7] -- 2017-11-15 -### Added -- Add `--lump` and `--lumpn` options -- Catch submission errors from cluster resource managers -- Implied columns can now be derived -- Now protocols can be specified on the command-line `--include-protocols` -- Add rudimentary figure summaries -- Simplifies command-line help display -- Allow wildcard protocol_mapping for catch-all pipeline assignment -- Improve user messages -- New sample_subtypes section in pipeline_interface - -### Changed -- Sample child classes are now defined explicitly in the pipeline interface. Previously, they were guessed based on presence of a class extending Sample in a pipeline script. -- Changed 'library' key sample attribute to 'protocol' - -## [0.6] -- 2017-07-21 -### Added - - Add support for implied_column section of the project config file - - Add support for Python 3 - - Merges pipeline interface and protocol mappings. This means we now allow direct pointers to `pipeline_interface.yaml` files, increasing flexibility, so this relaxes the specified folder structure that was previously used for `pipelines_dir` (with `config` subfolder). - - Allow URLs as paths to sample sheets. - - Allow tsv format for sample sheets. - - Checks that the path to a pipeline actually exists before writing the submission script. - -### Changed -- Changed LOOPERENV environment variable to PEPENV, generalizing it to generic models -- Changed name of `pipelines_dir` to `pipeline_interfaces` (but maintained backwards compatibility for now). -- Changed name of `run` column to `toggle`, since `run` can also refer to a sequencing run. -- Relaxes many constraints (like resources sections, pipelines_dir columns), making project configuration files useful outside looper. This moves us closer to dividing models from looper, and improves flexibility. -- Various small bug fixes and dev improvements. -- Require `setuptools` for installation, and `pandas 0.20.2`. If `numexpr` is installed, version `2.6.2` is required. -- Allows tilde in `pipeline_interfaces` - -## [0.5] -- 2017-03-01 -### Added -- Add new looper version tracking, with `--version` and `-V` options and printing version at runtime -- Add support for asterisks in file paths -- Add support for multiple pipeline directories in priority order -- Revamp of messages make more intuitive output -- Colorize output -- Complete rehaul of logging and test infrastructure, using logging and pytest packages - -### Changed -- Removes pipelines_dir requirement for models, making it useful outside looper -- Small bug fixes related to `all_input_files` and `required_input_files` attributes -- More robust installation and more explicit requirement of Python 2.7 - - -## [0.4] -- 2017-01-12 -### Added -- New command-line interface (CLI) based on sub-commands -- New subcommand (`looper summarize`) replacing the `summarizePipelineStats.R` script -- New subcommand (`looper check`) replacing the `flagCheck.sh` script -- New command (`looper destroy`) to remove all output of a project -- New command (`looper clean`) to remove intermediate files of a project flagged for deletion -- Support for portable and pipeline-independent allocation of computing resources with Looperenv. - -### Changed -- Removed requirement to have `pipelines` repository installed in order to extend base Sample objects -- Maintenance of sample attributes as provided by user by means of reading them in as strings (to be improved further) -- Improved serialization of Sample objects diff --git a/docs/concentric-templates.md b/docs/concentric-templates.md deleted file mode 100644 index 8ca3155a5..000000000 --- a/docs/concentric-templates.md +++ /dev/null @@ -1,60 +0,0 @@ -# Looper's concentric template system - -## Introduction - -To build job scripts, looper uses a 2-level template system consisting of an inner template wrapped by an outer template. The inner template is called a *command template*, which produces the individual commands to execute. The outer template is the *submission template*, which wraps the commands in environment handling code. This layered design allows us to decouple the computing environment from the pipeline, which improves portability. - -## The command template - -The command template is specified by a pipeline in the pipeline interface. A very basic command template could be something like this: - -```console -pipeline_command {sample.input_file} --arg -``` - -In the simplest case, looper can run the pipeline by simply running these commands. This example contains no information about computing environment, such as SLURM submission directives. - -## The submission template - -To extend to submitting the commands to a cluster, we simply need to add some more information around the command above, specifying things like memory use, job name, *etc.* It may be tempting to add these details directly to the command template, causing the jobs to be submitted to SLURM instead of run directly. This *would* work; however, this would restrict the pipeline to *only* running via SLURM, since the submission code would be tightly coupled to the command code. Instead, looper retains flexibility by introducing a second template layer, the *submission template*. While the *command template* is specified by the pipeline interface, the *submission template* is specified at the level of the computing environment. A submission template can also be as simple or complex as required. For a command to be run in a local computing environment, a basic template will suffice: - -```console -#! /usr/bin/bash - -{CODE} -``` - -A more complicated template could submit a job to a SLURM cluster: - -```console -#!/bin/bash -#SBATCH --job-name='{JOBNAME}' -#SBATCH --output='{LOGFILE}' -#SBATCH --mem='{MEM}' -#SBATCH --cpus-per-task='{CORES}' -#SBATCH --time='{TIME}' -echo 'Compute node:' `hostname` -echo 'Start time:' `date +'%Y-%m-%d %T'` - -srun {CODE} -``` - -In these templates, the `{CODE}` variable is populated by the populated result of the command template -- that's what makes these templates concentric. - -## The advantages of concentric templates - -Looper first populates the command template, and then provides the output as a variable and used to populate the `{CODE}` variable in the submission template. This decoupling provides substantial advantages: - -1. The commands can be run on any computing environment by simply switching the submission template. -2. The submission template can be used for any computing environment parameters, such as containers. -3. The submission template only has to be defined once *per environment*, so many pipelines can use them. -4. We can [group multiple individual commands](grouping-jobs.md) into a single submission script. -5. The submission template is universal and can be handled by dedicated submission template software. - -## Looper and divvy - -The last point about the submission template being universal is exactly what looper does. Looper uses [divvy](http://divvy.databio.org) to handle submission templates. Besides being useful for looper, this means the divvy submission templates can be used for interactive submission of jobs, or used by other software. It also means to configure looper to work with your computing environment, you just have to configure divvy. - -## Populating templates - -The task of running jobs can be thought of as simply populating the templates with variables. To do this, Looper provides [variables from several sources](variable-namespaces.md). diff --git a/docs/config-files.md b/docs/config-files.md deleted file mode 100644 index bb6bfe5e0..000000000 --- a/docs/config-files.md +++ /dev/null @@ -1,44 +0,0 @@ -# Configuration files - -Looper uses [YAML](http://www.yaml.org/) configuration files for several purposes. -It's designed to be organized, modular, and very configurable, so there are several configuration files. -We've organized these files so that each handle a different level of infrastructure - -- Environment -- Project -- Pipeline - -This makes the system very adaptable and portable, but for a newcomer, it is easy to map each to its purpose. -So, here's an explanation of each for you to use as a reference until you are familiar with the whole ecosystem. -Which ones you need to know about will depend on whether you're a pipeline *user* (running pipelines on your project) -or a pipeline *developer* (building your own pipeline). - - -## Pipeline users - -Users (non-developers) of pipelines only need to be aware of one or two config files. - -### Project configuration - -[**project config**](defining-a-project.md) -- this file is specific to each project and contains information about the project's metadata, where the processed files should be saved, and other variables that allow to configure the pipelines specifically for this project. It follows the standard Portable Encapsulated Project format, or PEP for short. - -### Environment configuration - -[**environment config**](http://divvy.databio.org/en/latest/configuration/) -- if you are planning to submit jobs to a cluster, then you need to be aware of environment configuration. This task is farmed out to [divvy](http://divvy.databio.org/en/latest/), a computing resource configuration manager. Follow the divvy documentation to learn about ways to tweak the computing environment settins according to your needs. - -That should be all you need to worry about as a pipeline user. If you need to adjust compute resources or want to develop a pipeline or have more advanced project-level control over pipelines, you'll need knowledge of the config files used by pipeline developers. - - -## Pipeline developers - -### Pipeline configuration - -If you want to make pipeline compatible with looper, tweak the way looper interacts with a pipeline for a given project, -or change the default cluster resources requested by a pipeline, you need to know about a configuration file that coordinates linking pipelines to a project. This happens via the [pipeline interface file](pipeline-interface-specification.md). - -Finally, if you're using [the pypiper framework](https://github.com/databio/pypiper) to develop pipelines, -it uses a pipeline-specific configuration file, which is detailed in the [pypiper documentation](http://pypiper.readthedocs.io/en/latest/advanced.html#pipeline-config-files). - -Essentially, each pipeline may provide a configuration file describing where software is, -and parameters to use for tasks within the pipeline. This configuration file is by default named like pipeline name, -with a `.yaml` extension instead of `.py`. For example, by default `rna_seq.py` looks for an accompanying `rna_seq.yaml` file. diff --git a/docs/containers.md b/docs/containers.md deleted file mode 100644 index fbe26eaad..000000000 --- a/docs/containers.md +++ /dev/null @@ -1,64 +0,0 @@ -# How to run jobs in a linux container - -Because `looper` uses `divvy` for computing configuration, running jobs in containers is easy! `Divvy` can use the same template system to do either cluster computing or to run jobs in linux containers (for example, using `docker` or `singularity`). You can even run jobs in a container *on a cluster*. - -All you need to do is follow the same instructions as in [running jobs on a cluster](running-on-a-cluster.md), but use templates that run those jobs in containers. To see examples of how to do this, refer to the [divvy docs on running containers](http://divvy.databio.org/en/latest/containers/). - - -## Overview - -Here is a quick guide to get you started using containers with `looper`: - -### 1. Get your container image. - -This could be a docker image (hosted on dockerhub), which you would download via `docker pull`, or it could be a `singularity` image you have saved in a local folder. This is pipeline-specific, and you'll need to download the image recommended by the authors of the pipeline or pipelines you want to run. - - -### 2. Specify the image in your `pipeline_interface` - -The `pipeline_interface.yaml` file will need a `compute` section for each pipeline that can be run in a container, specifying the image. For example: - - -```yaml -compute: - singularity_image: ${SIMAGES}myimage - docker_image: databio/myimage -``` - -For singularity images, you just need to make sure that the images indicated in the `pipeline_interface` are available in those locations on your system. For docker, make sure you have the docker images pulled. - - -### 3. Configure your `DIVCFG`. - -`Divvy` will need templates that work with the container. This just needs to be set up once for your compute environment, which would enable you to run any pipeline in a container (as long as you have an image). You should set up the DIVCFG compute environment configuration by following instructions in the [DIVCFG readme](https://github.com/pepkit/divcfg). If it's not already container-aware, you will just need to add a new container-aware "compute package" to your DIVCFG file. Here's an example of how to add one for using singularity in a SLURM environment: - -```yaml -singularity_slurm: - submission_template: templates/slurm_singularity_template.sub - submission_command: sbatch - singularity_args: -B /sfs/lustre:/sfs/lustre,/nm/t1:/nm/t1 -``` - -In `singularity_args` you'll need to pass any mounts or other settings to be passed to singularity. The actual `slurm_singularity_template.sub` file looks something like this: - -```bash -#!/bin/bash -#SBATCH --job-name='{JOBNAME}' -#SBATCH --output='{LOGFILE}' -#SBATCH --mem='{MEM}' -#SBATCH --cpus-per-task='{CORES}' -#SBATCH --time='{TIME}' -#SBATCH --partition='{PARTITION}' -#SBATCH -m block -#SBATCH --ntasks=1 - -echo 'Compute node:' `hostname` -echo 'Start time:' `date +'%Y-%m-%d %T'` - -singularity instance.start {SINGULARITY_ARGS} {SINGULARITY_IMAGE} {JOBNAME}_image -srun singularity exec instance://{JOBNAME}_image {CODE} - -singularity instance.stop {JOBNAME}_image -``` - -Notice how these values will be used to populate a template that will run the pipeline in a container. Now, to use singularity, you just need to activate this compute package in the usual way, which is using the `package` argument: ``looper run --package singularity_slurm``. diff --git a/docs/contributing.md b/docs/contributing.md deleted file mode 100644 index 412132b02..000000000 --- a/docs/contributing.md +++ /dev/null @@ -1,10 +0,0 @@ -# Contributing - -Pull requests or issues are welcome. - -- After adding tests in `tests` for a new feature or a bug fix, please run the test suite. -- To do so, the only additional dependencies needed beyond those for the package can be installed with: - - `pip install -r requirements/requirements-test.txt` - -- Once those are installed, the tests can be run with `pytest` or `python setup.py test`. diff --git a/docs/defining-a-project.md b/docs/defining-a-project.md deleted file mode 100644 index 14225969d..000000000 --- a/docs/defining-a-project.md +++ /dev/null @@ -1,42 +0,0 @@ -# How to define a project - -## 1. Start with a basic PEP - -To start, you need a project defined in the [standard Portable Encapsulated Project (PEP) format](http://pep.databio.org). Start by [creating a PEP](https://pep.databio.org/en/latest/simple_example/). - -## 2. Specify the Sample Annotation - -This information generally lives in a `project_config.yaml` file. - -Simplest example: -```yaml -pep_version: 2.0.0 -sample_table: sample_annotation.csv -``` - -A more complicated example taken from [PEPATAC](https://pepatac.databio.org/en/latest/): - -```yaml -pep_version: 2.0.0 -sample_table: tutorial.csv - -sample_modifiers: - derive: - attributes: [read1, read2] - sources: - # Obtain tutorial data from http://big.databio.org/pepatac/ then set - # path to your local saved files - R1: "${TUTORIAL}/tools/pepatac/examples/data/{sample_name}_r1.fastq.gz" - R2: "${TUTORIAL}/tools/pepatac/examples/data/{sample_name}_r2.fastq.gz" - imply: - - if: - organism: ["human", "Homo sapiens", "Human", "Homo_sapiens"] - then: - genome: hg38 - prealignment_names: ["rCRSd"] - deduplicator: samblaster # Default. [options: picard] - trimmer: skewer # Default. [options: pyadapt, trimmomatic] - peak_type: fixed # Default. [options: variable] - extend: "250" # Default. For fixed-width peaks, extend this distance up- and down-stream. - frip_ref_peaks: None # Default. Use an external reference set of peaks instead of the peaks called from this run -``` \ No newline at end of file diff --git a/docs/derived-columns.md b/docs/derived-columns.md deleted file mode 100644 index c13713f46..000000000 --- a/docs/derived-columns.md +++ /dev/null @@ -1,74 +0,0 @@ -# Derived columns - -On your sample sheet, you will need to point to the input file or files for each sample. -Of course, you could just add a column with the file path, like `/path/to/input/file.fastq.gz`. For example: - -A ***bad* example**: - -```CSV -sample_name,library,organism,time,file_path -pig_0h,RRBS,pig,0,/data/lab/project/pig_0h.fastq -pig_1h,RRBS,pig,1,/data/lab/project/pig_1h.fastq -frog_0h,RRBS,frog,0,/data/lab/project/frog_0h.fastq -frog_1h,RRBS,frog,1,/data/lab/project/frog_1h.fastq -``` - -This is common, and it works in a pinch with Looper, but what if the data get moved, or your filesystem changes, or you switch servers or move institutes? -Will this data still be there in 2 years? Do you want long file paths cluttering your annotation sheet? -What if you have 2 or 3 input files? Do you want to manually manage these unwieldy absolute paths? - -Looper makes it really easy to do better. You can make one or your annotation columns into a flexible *derived column* -that will be populated based on a source template you specify in the project configuration file. -What was originally `/long/path/to/sample.fastq.gz` would instead contain just a key, like `source1`. -Columns that use a key like this are called *derived columns*. -Here's an example of the same sheet using a derived column (`file_path`): - -A ***good* example**: -```CSV -sample_name,library,organism,time,file_path -pig_0h,RRBS,pig,0,source1 -pig_1h,RRBS,pig,1,source1 -frog_0h,RRBS,frog,0,source1 -frog_1h,RRBS,frog,1,source1 -``` - -For this to succeed, your project config file must specify two things: -- Which columns are to be derived (in this case, ``file_path``) -- A `data_sources` section mapping keys to strings that will construct your path, like this: - ```yaml - derived_columns: [file_path] - data_sources: - source1: /data/lab/project/{sample_name}.fastq - source2: /path/from/collaborator/weirdNamingScheme_{external_id}.fastq - ``` - -That's it! The source string can use other sample attributes (columns) using braces, as in `{sample_name}`. -The attributes will be automatically populated separately for each sample. -To take this a step further, you'd get the same result with this config file, -which substitutes `{sample_name}` for other sample attributes, `{organism}` and `{time}`: - -```yaml -derived_columns: [file_path] -data_sources: - source1: /data/lab/project/{organism}_{time}h.fastq - source2: /path/from/collaborator/weirdNamingScheme_{external_id}.fastq -``` - -As long as your file naming system is systematic, you can easily deal with any external naming scheme, no problem at all. -The idea is this: don't put *absolute* paths to files in your annotation sheet. -Instead, specify a data source and then provide a regex in the config file. - -Then if your data change locations (which happens more often than we would like), or you change servers, -or you want to share or publish the project, you just have to change the config file and not update paths in the annotation sheet. -This makes the annotation sheet universal across environments, users, publication, etc. The whole project is now portable. - -You can specify as many derived columns as you want. An expression including any sample attributes (using `{attribute}`) will be populated for each of those columns. - -Think of each sample as belonging to a certain type (for simple experiments, the type will be the same). -Then define the location of these samples in the project configuration file. -As a side bonus, you can easily include samples from different locations, and you can also use the same sample annotation sheet on different environments -(i.e. servers or users) by having multiple project config files (or, better yet, by defining a `subproject` for each environment). -The only thing you have to change is the project-level expression describing the location, not any sample attributes. -Plus, you get to eliminate those annoying `long/path/arguments/in/your/sample/annotation/sheet`. - -Check out the complete working example in the [`microtest` repository](https://github.com/databio/microtest/tree/master/config). diff --git a/docs/divvy/README.md b/docs/divvy/README.md deleted file mode 100644 index a691fda91..000000000 --- a/docs/divvy/README.md +++ /dev/null @@ -1,66 +0,0 @@ -![Logo](../img/divvy_logo.svg) - -## What is `divvy`? - -The submission configuration tool embedded in `looper` is called `divvy`. Divvy is useful independently from looper, but it ships with looper. Divvy allows you to populate job submission scripts by integrating job-specific settings with separately configured computing environment settings. Divvy *makes software portable*, so users may easily toggle among any computing resource (laptop, cluster, cloud). - -![Merge](../img/divvy-merge.svg) -## What makes `divvy` better? - -![NoDivvy](../img/nodivvy.svg) - -Tools require a particular compute resource setup. For example, one pipeline requires SLURM, another requires AWS, and yet another just runs directly on your laptop. This makes it difficult to transfer to different environments. For tools that can run in multiple environments, each one must be configured separately. - -
- - -Instead, `divvy`-compatible tools can run on any computing resource. **Users configure their computing environment once, and all divvy-compatible tools will use this same configuration.** - -![Connect](../img/divvy-connect.svg) - -Divvy reads a standard configuration file describing available compute resources and then uses a simple template system to write custom job submission scripts. Computing resources are organized as *compute packages*, which users select, populate with values, and build scripts for compute jobs. - -
- -Use the default compute packages or [configure your own](configuration.md). See what's available: - -```{console} -divvy list -``` - -```{console} -Divvy config: divvy_config.yaml - -docker -default -singularity_slurm -singularity -local -slurm -``` - - -Divvy will take variables from a file or the command line, merge these with environment settings to create a specific job script. Write a submission script from the command line: - -```{console} -divvy write --package slurm \ - --settings myjob.yaml \ - --compute sample=sample1 \ - --outfile submit_script.txt -``` - -### Python interface - -You can also use `divvy` via python interface, or you can use it to make your own python tools divvy-compatible: - -```{python} -import divvy -dcc = divvy.ComputingConfiguration() -dcc.activate_package("slurm") - -# write out a submission script -dcc.write_script("test_script.sub", - {"code": "bowtie2 input.bam output.bam"}) -``` - -For more details, check out the [tutorial](tutorial). diff --git a/docs/divvy/adapters.md b/docs/divvy/adapters.md deleted file mode 100644 index 161fd51e6..000000000 --- a/docs/divvy/adapters.md +++ /dev/null @@ -1,18 +0,0 @@ -# Adapters make template variables flexible - -Starting with `divvy v0.5.0` the configuration file can include an `adapters` section, which is used to provide a set of variable mappings that `divvy` uses to populate the submission templates. - -This makes the connection with `divvy` and client software more flexible and more elegant, since the source of the data does not need to follow any particular naming scheme, any mapping can be used and adapted to work with any `divvy` templates. - -## Example - -```yaml -adapters: - CODE: namespace.command - LOGFILE: namespace1.log_file - JOBNAME: user_settings.program.job_name - CORES: processors_number -... -``` - -As you can see in the example `adapters` section above, each adapter is a key-value pair that maps a `divvy` template variable to a target value. The target values can use namespaces (nested mapping). diff --git a/docs/divvy/configuration.md b/docs/divvy/configuration.md deleted file mode 100644 index ad5943e01..000000000 --- a/docs/divvy/configuration.md +++ /dev/null @@ -1,97 +0,0 @@ -# Installing divvy - -Divvy is automatically installed when you install looper. See if your install worked by calling `divvy -h` on the command line. If the `divvy` executable in not in your `$PATH`, append this to your `.bashrc` or `.profile` (or `.bash_profile` on macOS): - -```{console} -export PATH=~/.local/bin:$PATH -``` - -# Initial configuration - -On a fresh install, `divvy` comes pre-loaded with some built-in compute packages, which you can explore by typing `divvy list`. If you need to tweak these or create your own packages, you will need to configure divvy manually. Start by initializing an empty `divvy` config file: - -```{console} -export DIVCFG="divvy_config.yaml" -divvy init $DIVCFG -``` - -This `init` command will create a default config file, along with a folder of templates. - -The `divvy write` and `list` commands require knowing where this genome config file is. You can pass it on the command line all the time (using the -c parameter), but this gets old. An alternative is to set up the $DIVCFG environment variable. Divvy will automatically use the config file in this environmental variable if it exists. Add this line to your `.bashrc` or `.profile` if you want it to persist for future command-line sessions. You can always specify -c if you want to override the value in the $DIVCFG variable on an ad-hoc basis: - -```{console} -export DIVCFG=/path/to/divvy_config.yaml -``` - -# The divvy configuration file - -At the heart of `divvy` is a the *divvy configuration file*, or `DIVCFG` for short. This is a `yaml` file that specifies a user's available *compute packages*. Each compute package represents a computing resource; for example, by default we have a package called `local` that populates templates to simple run jobs in the local console, and another package called `slurm` with a generic template to submit jobs to a SLURM cluster resource manager. Users can customize compute packages as much as needed. - -## Configuration file priority lookup - -When `divvy` starts, it checks a few places for the `DIVCFG` file. First, the user may may specify a `DIVCFG` file when invoking `divvy` either from the command line or from within python. If the file is not provided, `divvy` will next look file in the `$DIVCFG` environment variable. If it cannot find one there, then it will load a default configuration file with a few basic compute packages. We recommend setting the `DIVCFG` environment variable as the most convenient use case. - -## Customizing your configuration file - -The easiest way to customize your computing configuration is to edit the default configuration file. To get a fresh copy of the default configuration, use `divvy init custom_divvy_config.yaml`. This will create for you a config file along with a folder containing all the default templates. - -Here is an example `divvy` configuration file: - -```{console} -compute_packages: - default: - submission_template: templates/local_template.sub - submission_command: sh - local: - submission_template: templates/local_template.sub - submission_command: sh - develop_package: - submission_template: templates/slurm_template.sub - submission_command: sbatch - partition: develop - big: - submission_template: templates/slurm_template.sub - submission_command: sbatch - partition: bigmem -``` - -The sub-sections below `compute_packages` each define a *compute package* that can be activated. `Divvy` uses these compute packages to determine how to submit your jobs. If you don't specify a package to activate, `divvy` uses the package named `default`. You can make your default whatever you like. You can activate any other compute package __on the fly__ by calling the `activate_package` function from python, or using the `--package` command-line option. - -You can make as many compute packages as you wish, and name them whatever you wish. You can also add whatever attributes you like to the compute package. There are only two required attributes: each compute package must specify the `submission_command` and `submission_template` attributes. - -### The `submission_command` attribute - -The `submission_command` attribute is the string your cluster resource manager uses to submit a job. For example, in our compute package named `develop_package`, we've set `submission_command` to `sbatch`. We are telling divvy that submitting this job should be done with: `sbatch submission_script.txt`. - -### The `submission_template` attribute - -Each compute package specifies a path to a template file (`submission_template`). The template file provides a skeleton that `divvy` will populate with job-specific attributes. These paths can be relative or absolute; relative paths are considered *relative to the DIVCFG file*. Let's explore what template files look like next. - -## Template files - -Each compute package must point to a template file with the `submission_template` attribute. These template files are typically stored relative to the `divvy` configuration file. Template files are taken by `divvy`, populated with job-specific information, and then run as scripts. Here's an example of a generic SLURM template file: - -```{bash} -#!/bin/bash -#SBATCH --job-name='{JOBNAME}' -#SBATCH --output='{LOGFILE}' -#SBATCH --mem='{MEM}' -#SBATCH --cpus-per-task='{CORES}' -#SBATCH --time='{TIME}' -#SBATCH --partition='{PARTITION}' -#SBATCH -m block -#SBATCH --ntasks=1 - -echo 'Compute node:' `hostname` -echo 'Start time:' `date +'%Y-%m-%d %T'` - -srun {CODE} -``` - -Template files use variables (*e.g.* `{VARIABLE}`), which will be populated independently for each job. If you want to make your own templates, you should check out the default templates (in the [submit_templates](https://github.com/pepkit/divcfg/tree/master/templates) folder). Many users will not need to tweak the template files, but if you need to, you can also create your own templates, giving `divvy` ultimate flexibility to work with any compute infrastructure in any environment. To create a custom template, just follow the examples. Then, point to your custom template in the `submission_template` attribute of a compute package in your `DIVCFG` config file. - - - -## Resources - -You may notice that the compute config file does not specify resources to request (like memory, CPUs, or time). Yet, these are required in order to submit a job to a cluster. **Resources are not handled by the divcfg file** because they not relative to a particular computing environment; instead they vary by pipeline and sample. As such, these items should be provided elsewhere. diff --git a/docs/divvy/containers.md b/docs/divvy/containers.md deleted file mode 100644 index a90d801c3..000000000 --- a/docs/divvy/containers.md +++ /dev/null @@ -1,76 +0,0 @@ - -# Configuring containers with divvy - -The divvy template framework is a natural way to run commands in a container, for example, using `docker` or `singularity`. All we need to do is 1) design a template that will run the job in the container, instead of natively; and 2) create a new compute package that will use that template. - -## A template for container runs - -If you start up divvy without giving it a DIVCFG file, it will come with a few default compute packages that include templates for containers. You can also find these in [the divcfg repository](http://github.com/pepkit/divcfg), which includes these scenarios: - -- singularity on SLURM -- singularity on localhost -- docker on localhost -- others - -If you need a different system, looking at those examples should get you started toward making your own. To take a quick example, using singularity on SLURM combines the basic SLURM script template with these lines to execute the run in container: - -``` -singularity instance.start {SINGULARITY_ARGS} {SINGULARITY_IMAGE} {JOBNAME}_image -srun singularity exec instance://{JOBNAME}_image {CODE} -singularity instance.stop {JOBNAME}_image -``` - -This particular template uses some variables provided by different sources: `{JOBNAME}`, `{CODE}`, `{SINGULARITY_ARGS}` and `{SINGULARITY_IMAGE}`. These arguments could be defined at different places. For example, the `{SINGULARITY_IMAGE}` variable should point to a singularity image that could vary by pipeline, so it makes most sense to define this variable individually for each pipeline. So, any pipeline that provides a container should probably include a `singularity_image` attribute providing a place to point to the appropriate container image. - -Of course, you will also need to make sure that you have access to `singularity` command from the compute nodes; on some clusters, you may need to add a `module load singularity` (or some variation) to enable it. - -The `{SINGULARITY_ARGS}` variable comes just right after the `instance.start` command, and can be used to pass any command-line arguments to singularity. We use these, for example, to bind host disk paths into the container. **It is critical that you explicitly bind any file systems with data necessary for the pipeline so the running container can see those files**. The [singularity documentation](https://singularity.lbl.gov/docs-mount#specifying-bind-paths) explains this, and you can find other arguments detailed there. Because this setting describes something about the computing environment (rather than an individual pipeline or sample), it makes most sense to put it in the `DIVCFG` file for a particular compute package. The next section includes examples of how to use `singularity_args`. - -If you're using [looper](http://looper.databio.org), the `{JOBNAME}` and `{CODE}` variables will be provided automatically by looper. - -## Adding compute packages for container templates - -To add a package for these templates to a `DIVCFG` file, we just add a new section. There are a few examples in this repository. A singularity example we use at UVA looks like this: - -``` -singularity_slurm: - submission_template: templates/slurm_singularity_template.sub - submission_command: sbatch - singularity_args: --bind /sfs/lustre:/sfs/lustre,/nm/t1:/nm/t1 -singularity_local: - submission_template: templates/localhost_singularity_template.sub - submission_command: sh - singularity_args: --bind /ext:/ext -``` - -These singularity compute packages look just like the typical ones, but just change the `submission_template` to point to the new containerized templates described in the previous section, and then they add the `singularity_args` variable, which is what will populate the `{SINGULARITY_ARGS}` variable in the template. Here we've used these to bind (mount) particular file systems the container will need. You can use these to pass along any environment-specific settings to your singularity container. - -With this setup, if you want to run a singularity container, just specify `--compute singularity_slurm` or `--compute singularity_local` and it will use the appropriate template. - -For another example, take a look at the basic `localhost_container.yaml` DIVCFG file, which describes a possible setup for running docker on a local computer: - -``` -compute: - default: - submission_template: templates/localhost_template.sub - submission_command: sh - singularity: - submission_template: templates/localhost_singularity_template.sub - submission_command: sh - singularity_args: --bind /ext:/ext - docker: - submission_template: templates/localhost_docker_template.sub - submission_command: sh - docker_args: | - --user=$(id -u) \ - --env="DISPLAY" \ - --volume ${HOME}:${HOME} \ - --volume="/etc/group:/etc/group:ro" \ - --volume="/etc/passwd:/etc/passwd:ro" \ - --volume="/etc/shadow:/etc/shadow:ro" \ - --volume="/etc/sudoers.d:/etc/sudoers.d:ro" \ - --volume="/tmp/.X11-unix:/tmp/.X11-unix:rw" \ - --workdir="`pwd`" \ -``` - -Notice the `--volume` arguments, which mount disk volumes from the host into the container. This should work out of the box for most docker users. diff --git a/docs/divvy/default-packages.md b/docs/divvy/default-packages.md deleted file mode 100644 index eed0fa14b..000000000 --- a/docs/divvy/default-packages.md +++ /dev/null @@ -1,6 +0,0 @@ -# Default divvy compute packages - -Divvy comes with a built-in default configuration that provides a few packages and templates. You can configure your own with `divvy init` and then adding whatever you like. The defaults provided can be found at these links: - -- [list of available default packages](https://github.com/pepkit/divvy/blob/master/divvy/submit_templates/default_compute_settings.yaml) -- [default templates](https://github.com/pepkit/divvy/tree/master/divvy/submit_templates) \ No newline at end of file diff --git a/docs/faq.md b/docs/faq.md deleted file mode 100644 index 3ae9b30c1..000000000 --- a/docs/faq.md +++ /dev/null @@ -1,43 +0,0 @@ -# FAQ - - -## What kind of pipelines can `looper` run? - -`Looper` can run samples through *any pipeline that runs on the command line*. The flexible [pipeline interface](../pipeline-interface) file allows `looper` to execute arbitrary shell commands. A pipeline may consist of scripts in languages like Perl, Python, or bash, or it may be built with a particular framework. Typically, we use Python pipelines built using the [`pypiper` package](http://pypiper.readthedocs.io), which provides some additional power to `looper`, but that's optional. - - -## Why isn't the `looper` executable available on `PATH`? - -By default, Python packages are installed to `~/.local/bin`. -You can add that location to your path by appending it (`export PATH=$PATH:~/.local/bin`). - -## How can I run my jobs on a cluster? - -Looper uses the external package [divvy](http://code.databio.org/divvy) for cluster computing, making it flexible enough to use with any cluster resource environment. Please see the [tutorial on cluster computing with looper and divvy](running-on-a-cluster.md). - - -## What's the difference between `looper` and `pypiper`? - -[`pypiper`](http://pypiper.readthedocs.io) is a more traditional workflow-building framework; it helps you build pipelines to process individual samples. [`looper`](http://looper.readthedocs.io) is completely pipeline-agnostic, and has nothing to do with individual processing steps; it operates groups of samples (as in a project), submitting the appropriate pipeline(s) to a cluster or server (or running them locally). The two projects are independent and can be used separately, but they are most powerful when combined. They complement one another, together constituting a comprehensive pipeline management system. - -## Why isn't a sample being processed by a pipeline (`Not submitting, flag found: ['*_.flag']`)? - -When using the `run` subcommand, for each sample being processed `looper` first checks for *"flag" files* in the sample's designated output folder for flag files (which can be `_completed.flag`, or `_running.flag`, or `_failed.flag`). Typically, we don't want to resubmit a job that's already running or already finished, so by default, `looper` **will *not* submit a job when it finds a flag file**. This is what the message above is indicating. - -If you do in fact want to re-rerun a sample (maybe you've updated the pipeline, or you want to run restart a failed attempt), you can do so by just passing to `looper` at startup the `--ignore-flags` option; this will skip the flag check **for *all* samples**. If you only want to re-run or restart a few samples, it's best to just delete the flag files for the samples you want to restart, then use `looper run` as normal. - -You may be interested in the [usage docs](../usage) for the `looper rerun` command, which runs any failed samples. - -## How can I resubmit a subset of jobs that failed? - -As of version `0.11`, you can use `looper rerun` to submit only jobs with a `failed` flag. By default, `looper` will *not* submit a job that has already run. If you want to restart a sample (maybe you've updated the pipeline, or you want to restart a failed attempt), you can either use `looper rerun` to restart only failed jobs, or you pass `--ignore-flags`, which will **resubmit *all* samples**. If you want more specificity, you can just manually delete the "flag" files for the samples you want to restart, then use `looper run` as normal. - -## Why are computing resources defined in the pipeline interface file instead of in the `divvy` computing configuration file? - -You may notice that the compute config file does not specify resources to request (like memory, CPUs, or time). Yet, these are required in order to submit a job to a cluster. **Resources are not handled by the divcfg file** because they not relative to a particular computing environment; instead they vary by pipeline and sample. As such, these items should be defined at other stages. - -Resources defined in the `pipeline_interface.yaml` file that connects looper to a pipeline. The reason for this is that pipeline developers are the most likely to know what sort of resources their pipeline requires, so they are in the best position to define the resources requested. For more information on how to adjust resources, see the `compute` section of the [pipeline interface page](pipeline-interface-specification.md). If all the different configuration files seem confusing, now is a good time to review [who's who in configuration files](config-files.md). - -## Which configuration file has which settings? - -There's a list on the [config files page](config-files.md). diff --git a/docs/features.md b/docs/features.md deleted file mode 100644 index c45ff71f9..000000000 --- a/docs/features.md +++ /dev/null @@ -1,49 +0,0 @@ -# Features and benefits - -[cli]: img/cli.svg -[computing]: img/computing.svg -[flexible_pipelines]: img/flexible_pipelines.svg -[job_monitoring]: img/job_monitoring.svg -[resources]: img/resources.svg -[subprojects]: img/subprojects.svg -[collate]: img/collate.svg -[file_yaml]: img/file_yaml.svg -[html]: img/HTML.svg -[modular]: img/modular.svg - - -![modular][modular] **Modular approach to job handling** - -Looper **completely divides job handling from pipeline processing**. This modular approach simplifies the pipeline-building process because pipelines no longer need to worry about sample metadata parsing. - -![file_yaml][file_yaml] **The power of standard PEP format** - -`Looper` inherits a bunch of advantages from [standard PEP format](http://pepkit.github.io): For example, **you only need to learn 1 way to format your project metadata, and it will work with any pipeline**. PEP format allows **subprojects**, which make it easy to define two very similar projects without duplicating project metadata. It also makes your project immediately compatible with other tools in pepkit; for example, you can import all your sample metadata (and pipeline results) in an R or python analysis environment with the [pepr](https://github.com/pepkit/pepr) R package or the [peppy](https://github.com/pepkit/peppy) python package. Using PEP's *derived attributes* feature makes projects portable, and can also be used to collate input files across file systems and naming conventions, making it easy to share projects across compute environments or individuals. - - -![computing][computing] **Universal parallelization implementation** - -Looper's sample-level parallelization applies to all pipelines, so individual pipelines do not need reinvent the wheel. By default `looper`will simply run your jobs serially, but `looper` employs [divvy](http://code.databio.org/divvy) to let you process your pipelines on any cluster resource manager (SLURM, SGE, etc.). Looper also allows you to specify compute queue/partition on-the-fly, by passing the ``--compute`` parameter to your call to ``looper run``, making flexible if you have complex resource needs. This provides a convenient interface for submitting pipelines either to local compute or to any cluster resource manager, so individual pipeline authors do not need to worry about cluster job submission. - -![flexible_pipelines][flexible_pipelines] **Flexible pipelines** - -Use looper with any pipeline, any library, in any domain. We designed it to work with [pypiper](http://code.databio.org/pypiper), but **looper has an infinitely flexible command-line argument system that will let you configure it to work with any script (pipeline) that accepts command-line arguments**. You can also configure looper to submit multiple pipelines per sample. - - -![job_monitoring][job_monitoring] **Job completion monitoring** - -Looper is job-aware and will not submit new jobs for samples that are already running or finished, making it easy to add new samples to existing projects, or re-run failed samples. - - -![resources][resources] **Flexible resources** - -Looper has an easy-to-use resource requesting scheme. With a few lines to define CPU, memory, clock time, or anything else, pipeline authors can specify different computational resources depending on the size of the input sample and pipeline to run. Or, just use a default if you don't want to mess with setup. - -![cli][cli] **Command line interface** - -Looper uses a command-line interface so you have total power at your fingertips. - -![html][html] **Beautiful linked result reports** - -Looper automatically creates an internally linked, portable HTML report highlighting all results for your pipeline, for every pipeline. -For an html report example see: [PEPATAC Gold Summary](https://pepatac.databio.org/en/latest/files/examples/gold/gold_summary.html) \ No newline at end of file diff --git a/docs/grouping-jobs.md b/docs/grouping-jobs.md deleted file mode 100644 index 9c247b4de..000000000 --- a/docs/grouping-jobs.md +++ /dev/null @@ -1,11 +0,0 @@ -# Grouping many jobs into one - -By default, `looper` will translate each row in your `sample_table` into a single job. But perhaps you are running a project with tens of thousands of rows, and each job only takes mere minutes to run; in this case, you'd rather just submit a single job to process many samples. `Looper` makes this easy with the `--lump` and `--lumpn` command line arguments. - -## Lumping jobs by job count: `--lumpn` - -It's quite simple: if you want to run 100 samples in a single job submission script, just tell looper `--lumpn 100`. - -## Lumping jobs by input file size: `--lump` - -But what if your samples are quite different in terms of input file size? For example, your project may include many small samples, which you'd like to lump together with 10 jobs to 1, but you also have a few control samples that are very large and should have their own dedicated job. If you just use `--lumpn` with 10 samples per job, you could end up lumping your control samples together, which would be terrible. To alleviate this problem, `looper` provides the `--lump` argument, which uses input file size to group samples together. By default, you specify an argument in number of gigabytes. Looper will go through your samples and accumulate them until the total input file size reaches your limit, at which point it finalizes and submits the job. This will keep larger files in independent runs and smaller files grouped together. diff --git a/docs/how-to-merge-inputs.md b/docs/how-to-merge-inputs.md deleted file mode 100644 index a1d983f9c..000000000 --- a/docs/how-to-merge-inputs.md +++ /dev/null @@ -1,60 +0,0 @@ -# How to handle multiple input files - -*Dealing with multiple input files is described in detail in the [PEP documentation](http://pep.databio.org/en/latest/specification/#project-attribute-subsample_table).* - -Briefly: - -Sometimes you have multiple input files that you want to merge for one sample. For example, a common use case is a single library that was spread across multiple sequencing lanes, yielding multiple input files that need to be merged, and then run through the pipeline as one. Rather than putting multiple lines in your sample annotation sheet, which causes conceptual and analytical challenges, PEP has two ways to merge these: - -1. Use shell expansion characters (like `*` or `[]`) in your file path definitions (good for simple merges) -2. Specify a *sample subannotation tables* which maps input files to samples for samples with more than one input file (infinitely customizable for more complicated merges). - - -## Multi-value sample attributes behavior in the pipeline interface command templates - -Both sample subannotation tables and shell expansion characters lead to sample attributes with multiple values, stored in a list of strings (`multi_attr1` and `multi_attr1`), as opposed to a standard scenario, where a single value is stored as a string (`single_attr`): - -``` -Sample -sample_name: sample1 -subsample_name: ['0', '1', '2'] -multi_attr1: ['one', 'two', 'three'] -multi_attr2: ['four', 'five', 'six'] -single_attr: test_val -``` - -### Access individual elements in lists - -Pipeline interface author can leverage that fact and access the individual elements, e.g iterate over them and append to a string using the Jinja2 syntax: - -```bash -pipeline_name: test_iter -pipeline_type: sample -command_template: > - --input-iter {%- for x in sample.multi_attr1 -%} --test-individual {x} {% endfor %} # iterate over multiple values - --input-single {sample.single_attr} # use the single value as is - -``` - -This results in a submission script that includes the following command: -```bash ---input-iter --test-individual one --test-individual two --test-individual three ---input-single test_val -``` - -### Concatenate elements in lists - -The most common use case is just concatenating the multiple values and separate them with space -- **providing multiple input values to a single argument on the command line**. Therefore, all the multi-value sample attributes that have not been processed with Jinja2 logic are automatically concatenated. For instance, the following command template in a pipeline interface will result in the submission script presented below: - -Pipeline interface: -```bash -pipeline_name: test_concat -pipeline_type: sample -command_template: > - --input-concat {sample.multi_attr1} # concatenate all the values -``` - -Command in the submission script: -```bash ---input-concat one two three -``` diff --git a/docs/img/HTML.svg b/docs/img/HTML.svg deleted file mode 100644 index 3282c9982..000000000 --- a/docs/img/HTML.svg +++ /dev/null @@ -1,526 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - image/svg+xml - - - - - Openclipart - - - - - - - - - - - - diff --git a/docs/img/cli.svg b/docs/img/cli.svg deleted file mode 100644 index 803ad3b99..000000000 --- a/docs/img/cli.svg +++ /dev/null @@ -1,379 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - image/svg+xml - - - - - - - - - - - - - - - - - - - - - - - - - - - diff --git a/docs/img/collate.svg b/docs/img/collate.svg deleted file mode 100644 index c536fff2e..000000000 --- a/docs/img/collate.svg +++ /dev/null @@ -1,133 +0,0 @@ - - - - - - - - - - - - - - - - - - image/svg+xml - - - - - - - - - - - - - - diff --git a/docs/img/computing.svg b/docs/img/computing.svg deleted file mode 100644 index eb3fb2f8d..000000000 --- a/docs/img/computing.svg +++ /dev/null @@ -1,756 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - image/svg+xml - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - diff --git a/docs/img/divvy-connect.svg b/docs/img/divvy-connect.svg deleted file mode 100644 index 9bf7c637a..000000000 --- a/docs/img/divvy-connect.svg +++ /dev/null @@ -1,648 +0,0 @@ - - - - - - - - - - image/svg+xml - - - - - - - - Tool 1 - - - - - - - - - - - - - - - - - - - - - - - - - - - - SLURMcluster - - Tool 2 - - Tool 3 - Cloud - - - - - - - - Laptop - - - - - Tool 1 - - - - - - - - - - - - - - - - - - - - - - - - - - - - SLURMcluster - - Tool 2 - - Tool 3 - Cloud - - - - - - - - Laptop - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - diff --git a/docs/img/divvy-merge.svg b/docs/img/divvy-merge.svg deleted file mode 100644 index fefe9cd7d..000000000 --- a/docs/img/divvy-merge.svg +++ /dev/null @@ -1,1066 +0,0 @@ - - - - - - - - - - - - image/svg+xml - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - Jobsettings - - - - - - Environmentsettings - - - - - - - - Submissionscript - - - - - - - - - - - - - - - - SUB - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - template - command - - - - - - - - - - Command-lineadjustments - - - - diff --git a/docs/img/divvy_bug.svg b/docs/img/divvy_bug.svg deleted file mode 100644 index c9f1472c8..000000000 --- a/docs/img/divvy_bug.svg +++ /dev/null @@ -1,103 +0,0 @@ - - - - - - - - - - image/svg+xml - - - - - - - - - - - - - - - - - - - diff --git a/docs/img/divvy_logo.svg b/docs/img/divvy_logo.svg deleted file mode 100644 index 0ca13923e..000000000 --- a/docs/img/divvy_logo.svg +++ /dev/null @@ -1,153 +0,0 @@ - - - - - - - - - - image/svg+xml - - - - - - - - - - - - - - - - - - - - - - - - - - divvy - - diff --git a/docs/img/divvy_logo_dark.svg b/docs/img/divvy_logo_dark.svg deleted file mode 100644 index b7b6dfc66..000000000 --- a/docs/img/divvy_logo_dark.svg +++ /dev/null @@ -1,153 +0,0 @@ - - - - - - - - - - image/svg+xml - - - - - - - - - - - - - - - - - - - - - - - - - - divvy - - diff --git a/docs/img/favicon.ico b/docs/img/favicon.ico deleted file mode 100644 index d118e47544f4b640f354e4acba8f78e6e0db2dce..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 3186 zcmV-&42|=NP)`htcE z5D5xM=&@R9SK9aQ^pD@}vJb7$f?z!zegBbu_jm98o%6l-o_p@OXQ4!>>O56_TR-^t zrK)-_q>q%S{A^$k&=?9Q=oyG}s3S<1KSsjRQn?&gR5*p!CJ)z6uco2aiQ8!a8~~Py z$WXC%V zZ9V+Ykw8v)BAGGF^~KtjLO@YW@wu7W=pK)*E*kYU&Wma{E(B!ZPr$R|+~h2=} z28IhSOs*u}KQlbq>pLl;>xBqduc`|~48?59QSR%)Qg8a+{gwJ?{a<{G*6`K~>iRUo%9u_-dUlU#*i+B+0M;*fXN? zGbVZ1vauCk-tFy_26q7glj|#(cx~CKOKctF|6~lP4tDBfs_TjPgP3 zo8z%$SiA`wulAU1xVectZz}Y4PWMLm=JO}%8HnY#dk~m~?UUdS#t6rfIeKNI7y*r7 zGyU`{o%Dy|U?&-TN>y8c-vbMR;RHL{y4ks}Q2ALcm25pf@`J!S5xGD0|Ap`!U@35@ z%58GzjZJ)H1QO2lMfuiqC$neXwLmv81-K`XRQ~WzfOsO4DB;WRqZk1*!OpNa(HZ7+ zZzNs*8EoYj2E$2q?CIg9{e^3EX0wMa8>Z1W@ll3H7@9!g;uyXXuMlk~B0Yu^!NlSU=)h-5x5&$j0)xmIr zU+g}|%Oe=5^vLHazo!)C(-4tfU)&3I1T5U$*rC(9xA~ljr5PD@MQ~fSxR? z{*%|c2*)gb_QpAWcd$_T`BSUdx^Y@MSH};mDe)jx6e8jWRs(yh+$O6RHE{3J!kN%B z7~_fU9r%NB4xb!iI5mJB-4P-&3!qk2XJxl3*alNZZeb#+>^pYeUY+P1=JD+voaih( zbS#)w#l!2TruW}IU=46kRX+i|AR=8CtGue3Ddl@p{k~Y`Hg8^Yy${n6e)h(>ociYi zF~0uOcB1jjx~IJ>%upnmJ{zngwS&RPGhihkgTo2lJ(ZhcTU_~R*~+gLkptKU^C{r8 zs_svs$npKO-Apl`uJJ1R+98J<6O7NS#J?wgS^!(-at}vl+GJV$A1nh#m z!;q?e7kKDwUzE@P;Mk}gW@B*o<^1HjtGMl&da#$`RU(qi z$f~N9z*(T~rTu+ud$n`S9fQr}^kxs6RX&NLgJ!+WpWnh`xWvp7e*vG;4;Fz<` zw`|Cq^kTpRz?urDu|`lHTsfIdD_V%fEy6L2Kq$_^_8^DaL+NwNh&a^~Avl~&6Ywb@ zhk!=_dIw|qCoBM_HMv=JT?4ZxS5oCRsqr{)Iz|nR?Gh17RW}1~d8?c)pIkSUFKs(< zLFI3rS4Zn4cX}y3Dk5#F>UB8`TqBxL4~u5kFgR@QT<7|uymF|YcTR>#=KYlC^~3$# zx2ze{kh@fMh5)GQp8;<~;uin;!vAvMguSj>>MK}#gO3H%s;KdpBgNQ`00)8Nz%k$i zwioCJ{2~%h_5UF7qe#r+$FFqq#ykB*m3KOXuY7zi&30U1FK|nWNWnhNHehe1t-u^? zw|yaSBUwg08n+0Blbq>`@Z!Ecj+_diK!wxbtA90z##$G^W9bH;0|+_GGZ?_iAP`U<>N4n$R-0zQ#QDu$7B@v#SZ5_mQh z1-fLY>i2-Z?iq;jg&!U#k~a$;T5}c4udPRfFfhA>8Fr!Xt^oc4xCcAPOFXGKO~Jqp zA>l9u!(Ox7fW;!RL_~f(UdpHNRUqN3b+X}>rkod5Zj)=LSEqxQ|21C91Bl40sc6&; z>?^W>sa>=LvEg*k*`u^ZYtoj`N=QpD%4r#}U#o$IJ2k zvm)|&YyZ$A|OqH9+gDoh-Y?{$;Ca2p^h}8U62z$eGJ2fQW=d zuTpa0QFT4W;T0ReZ3Et zz3cozMBXlwOji!7YO3lfRn_BXhcpn1r+@MN7v+}iia}Mstg0G{B+|+URCUA)y~~OJ Y12Fe2qw4bamjD0&07*qoM6N<$f_(cL^Z)<= diff --git a/docs/img/favicon_looper.ico b/docs/img/favicon_looper.ico deleted file mode 100644 index d118e47544f4b640f354e4acba8f78e6e0db2dce..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 3186 zcmV-&42|=NP)`htcE z5D5xM=&@R9SK9aQ^pD@}vJb7$f?z!zegBbu_jm98o%6l-o_p@OXQ4!>>O56_TR-^t zrK)-_q>q%S{A^$k&=?9Q=oyG}s3S<1KSsjRQn?&gR5*p!CJ)z6uco2aiQ8!a8~~Py z$WXC%V zZ9V+Ykw8v)BAGGF^~KtjLO@YW@wu7W=pK)*E*kYU&Wma{E(B!ZPr$R|+~h2=} z28IhSOs*u}KQlbq>pLl;>xBqduc`|~48?59QSR%)Qg8a+{gwJ?{a<{G*6`K~>iRUo%9u_-dUlU#*i+B+0M;*fXN? zGbVZ1vauCk-tFy_26q7glj|#(cx~CKOKctF|6~lP4tDBfs_TjPgP3 zo8z%$SiA`wulAU1xVectZz}Y4PWMLm=JO}%8HnY#dk~m~?UUdS#t6rfIeKNI7y*r7 zGyU`{o%Dy|U?&-TN>y8c-vbMR;RHL{y4ks}Q2ALcm25pf@`J!S5xGD0|Ap`!U@35@ z%58GzjZJ)H1QO2lMfuiqC$neXwLmv81-K`XRQ~WzfOsO4DB;WRqZk1*!OpNa(HZ7+ zZzNs*8EoYj2E$2q?CIg9{e^3EX0wMa8>Z1W@ll3H7@9!g;uyXXuMlk~B0Yu^!NlSU=)h-5x5&$j0)xmIr zU+g}|%Oe=5^vLHazo!)C(-4tfU)&3I1T5U$*rC(9xA~ljr5PD@MQ~fSxR? z{*%|c2*)gb_QpAWcd$_T`BSUdx^Y@MSH};mDe)jx6e8jWRs(yh+$O6RHE{3J!kN%B z7~_fU9r%NB4xb!iI5mJB-4P-&3!qk2XJxl3*alNZZeb#+>^pYeUY+P1=JD+voaih( zbS#)w#l!2TruW}IU=46kRX+i|AR=8CtGue3Ddl@p{k~Y`Hg8^Yy${n6e)h(>ociYi zF~0uOcB1jjx~IJ>%upnmJ{zngwS&RPGhihkgTo2lJ(ZhcTU_~R*~+gLkptKU^C{r8 zs_svs$npKO-Apl`uJJ1R+98J<6O7NS#J?wgS^!(-at}vl+GJV$A1nh#m z!;q?e7kKDwUzE@P;Mk}gW@B*o<^1HjtGMl&da#$`RU(qi z$f~N9z*(T~rTu+ud$n`S9fQr}^kxs6RX&NLgJ!+WpWnh`xWvp7e*vG;4;Fz<` zw`|Cq^kTpRz?urDu|`lHTsfIdD_V%fEy6L2Kq$_^_8^DaL+NwNh&a^~Avl~&6Ywb@ zhk!=_dIw|qCoBM_HMv=JT?4ZxS5oCRsqr{)Iz|nR?Gh17RW}1~d8?c)pIkSUFKs(< zLFI3rS4Zn4cX}y3Dk5#F>UB8`TqBxL4~u5kFgR@QT<7|uymF|YcTR>#=KYlC^~3$# zx2ze{kh@fMh5)GQp8;<~;uin;!vAvMguSj>>MK}#gO3H%s;KdpBgNQ`00)8Nz%k$i zwioCJ{2~%h_5UF7qe#r+$FFqq#ykB*m3KOXuY7zi&30U1FK|nWNWnhNHehe1t-u^? zw|yaSBUwg08n+0Blbq>`@Z!Ecj+_diK!wxbtA90z##$G^W9bH;0|+_GGZ?_iAP`U<>N4n$R-0zQ#QDu$7B@v#SZ5_mQh z1-fLY>i2-Z?iq;jg&!U#k~a$;T5}c4udPRfFfhA>8Fr!Xt^oc4xCcAPOFXGKO~Jqp zA>l9u!(Ox7fW;!RL_~f(UdpHNRUqN3b+X}>rkod5Zj)=LSEqxQ|21C91Bl40sc6&; z>?^W>sa>=LvEg*k*`u^ZYtoj`N=QpD%4r#}U#o$IJ2k zvm)|&YyZ$A|OqH9+gDoh-Y?{$;Ca2p^h}8U62z$eGJ2fQW=d zuTpa0QFT4W;T0ReZ3Et zz3cozMBXlwOji!7YO3lfRn_BXhcpn1r+@MN7v+}iia}Mstg0G{B+|+URCUA)y~~OJ Y12Fe2qw4bamjD0&07*qoM6N<$f_(cL^Z)<= diff --git a/docs/img/favicon_looper.svg b/docs/img/favicon_looper.svg deleted file mode 100644 index 8b16d8fee..000000000 --- a/docs/img/favicon_looper.svg +++ /dev/null @@ -1,72 +0,0 @@ - - - - - - - - - - image/svg+xml - - - - - - - - - - diff --git a/docs/img/file_yaml.svg b/docs/img/file_yaml.svg deleted file mode 100644 index 2aaa54142..000000000 --- a/docs/img/file_yaml.svg +++ /dev/null @@ -1,394 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - image/svg+xml - - - - - - - - - - - - - - - - - - - - - - - .SH - - - - - - - - - - - .yaml - - - - - - - - diff --git a/docs/img/flexible_pipelines.svg b/docs/img/flexible_pipelines.svg deleted file mode 100644 index 5a331625c..000000000 --- a/docs/img/flexible_pipelines.svg +++ /dev/null @@ -1,270 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - image/svg+xml - - - - - - - - - - - - - diff --git a/docs/img/job_monitoring.svg b/docs/img/job_monitoring.svg deleted file mode 100644 index 3f09da534..000000000 --- a/docs/img/job_monitoring.svg +++ /dev/null @@ -1,286 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - image/svg+xml - - - - - - - - - - - - - - - diff --git a/docs/img/looper_bug.svg b/docs/img/looper_bug.svg deleted file mode 100644 index 27e97ac2d..000000000 --- a/docs/img/looper_bug.svg +++ /dev/null @@ -1,94 +0,0 @@ - - - - - - - - - - image/svg+xml - - - - - - - - - - - - - - - diff --git a/docs/img/looper_bug_dark.svg b/docs/img/looper_bug_dark.svg deleted file mode 100644 index eb0129501..000000000 --- a/docs/img/looper_bug_dark.svg +++ /dev/null @@ -1,94 +0,0 @@ - - - - - - - - - - image/svg+xml - - - - - - - - - - - - - - - diff --git a/docs/img/looper_logo.svg b/docs/img/looper_logo.svg deleted file mode 100644 index d60979f69..000000000 --- a/docs/img/looper_logo.svg +++ /dev/null @@ -1,130 +0,0 @@ - - - - - - - - - - image/svg+xml - - - - - - - - - - - - - - - - - - - - - - - diff --git a/docs/img/looper_logo_dark.svg b/docs/img/looper_logo_dark.svg deleted file mode 100644 index 6b7d25ab5..000000000 --- a/docs/img/looper_logo_dark.svg +++ /dev/null @@ -1,122 +0,0 @@ - - - - - - - - - - image/svg+xml - - - - - - - - - - - - - - - - - - - - - - - diff --git a/docs/img/looper_logo_text.svg b/docs/img/looper_logo_text.svg deleted file mode 100644 index 86ce44636..000000000 --- a/docs/img/looper_logo_text.svg +++ /dev/null @@ -1,110 +0,0 @@ - - - - - - - - - - image/svg+xml - - - - - - - - - - - - - - - looper - - - diff --git a/docs/img/modular.svg b/docs/img/modular.svg deleted file mode 100644 index 10e1edf81..000000000 --- a/docs/img/modular.svg +++ /dev/null @@ -1,118 +0,0 @@ - - - - - - - - - - - - - - - - image/svg+xml - - - - - Openclipart - - - ftnetwork connected - 2011-01-31T02:06:32 - Originally uploaded by Danny Allen for OCAL 0.18 this icon is part of the flat theme - https://openclipart.org/detail/113647/ftnetwork-connected-by-anonymous - - - Anonymous - - - - - flat - icon - theme - - - - - - - - - - - diff --git a/docs/img/nodivvy.svg b/docs/img/nodivvy.svg deleted file mode 100644 index 50316a87f..000000000 --- a/docs/img/nodivvy.svg +++ /dev/null @@ -1,646 +0,0 @@ - - - - - - - - - - image/svg+xml - - - - - - - - Tool 1 - - - - - - - - - - - - - - - - - - - - - - - - - - - - SLURMcluster - - Tool 2 - - Tool 3 - Cloud - - - - - - - - Laptop - - - - - Tool 1 - - - - - - - - - - - - - - - - - - - - - - - - - - - - SLURMcluster - - Tool 2 - - Tool 3 - Cloud - - - - - - - - Laptop - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - diff --git a/docs/img/resources.svg b/docs/img/resources.svg deleted file mode 100644 index 944f83f2e..000000000 --- a/docs/img/resources.svg +++ /dev/null @@ -1,635 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - image/svg+xml - - - - - - - - - - - - diff --git a/docs/img/subprojects.svg b/docs/img/subprojects.svg deleted file mode 100644 index e35e1db46..000000000 --- a/docs/img/subprojects.svg +++ /dev/null @@ -1,293 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - image/svg+xml - - - - - - - - - - - diff --git a/docs/implied-columns.md b/docs/implied-columns.md deleted file mode 100644 index 7879956ca..000000000 --- a/docs/implied-columns.md +++ /dev/null @@ -1,49 +0,0 @@ -# Implied columns - -At some point, you may have a situation where you need a single sample attribute (or column) -to populate several different pipeline arguments with different values. -In other words, the value of a given attribute may *imply* values for other attributes. -It would be nice if you didn't have to enumerate all of these secondary, implied attributes, -and could instead just infer them from the value of the original attribute. - -For example, if my `organism` attribute is `human`, this implies a few other secondary attributes -(which may be project-specific): For one project, I want to set `genome` to `hg38` and `macs_genome_size` to `hs`. -Of course, I could just define columns called `genome` and `macs_genome_size`, but these would be constant across samples, so it feels inefficient and unwieldy. -Plus, changing the aligned genome would require changing the sample annotation sheet (every sample, in fact). -You can certainly do this with `looper`, but a better way is to handle these things at the project level. - -As a more elegant alternative, in a project config file `looper` will recognize a section called `implied_columns`. -Instead of hard-coding `genome` and `macs_genome_size` in the sample annotation sheet, -you can simply specify that the attribute `organism` *implies* additional attribute-value pairs -(which may vary by sample based on the value of the `organism` attribute). -This lets you specify assemblies, genome size, and other similar variables all in your project config file. - -To do this, just add an `implied_columns` section to your project_config.yaml file. Example: - -```yaml -implied_columns: - organism: - human: - genome: "hg38" - macs_genome_size: "hs" - mouse: - genome: "mm10" - macs_genome_size: "mm" -``` - -There are 3 levels in the `implied_columns` hierarchy. -The first (directly under `implied_columns`; here, `organism`), are primary columns from which new attributes will be inferred. -The second layer (here, `human` or `mouse`) are possible values your samples may take in the primary column. -The third layer (`genome` and `macs_genome_size`) are the key-value pair of new, implied columns -for any samples with the required value for that primary column. - -In this example, any samples with organism set to `"human"` will automatically also have attributes for `genome` (`"hg38"`) and for `macs_genome_size` (`"hs"`). -Any samples with `organism` set to `"mouse"` will have the corresponding values. -A sample with `organism` set to `"frog"` would lack attributes for `genome` and `macs_genome_size`, since those columns are not implied by `"frog"`. - -This system essentially lets you set global, species-level attributes at the project level instead of duplicating -that information for every sample that belongs to a species. -Even better, it's generic, so you can do this for any partition of samples (just replace `organism` with whatever you like). - -This makes your project more portable and does a better job conceptually with separating sample attributes from project attributes. -After all, a reference assembly is not a property of a sample, but is part of the broader project context. diff --git a/docs/initialize.md b/docs/initialize.md deleted file mode 100644 index 0a2c71537..000000000 --- a/docs/initialize.md +++ /dev/null @@ -1,21 +0,0 @@ -# How to initialize a looper repository - -*This is considered a beta feature and may change in future releases*. - -Looper provides a command `looper init` that allows you to initialize folders as looper repositories. This enables you to use `looper` without passing your PEP every time. - -```bash -looper init pep.yaml -``` - -Now, as long as you are operating from within this directory or any of the subdirectories, you can run any looper command without passing `pep.yaml`: - -```bash -looper run -``` - -The `looper init` command creates a dotfile called `.looper.yaml` in the current directory. This file simply points looper to the config file passed as positional argument to `looper init`: - -```yaml -config_file_path: relative/path/to/pep.yaml -``` diff --git a/docs/looper-config.md b/docs/looper-config.md deleted file mode 100644 index 3c2d095ce..000000000 --- a/docs/looper-config.md +++ /dev/null @@ -1,36 +0,0 @@ -# How to use the looper config file - -Starting with `looper` version `>=1.5.0`, you should specify a pipeline interface in the looper config file, rather than in the PEP. - -Example looper config file using local PEP: - -```yaml -pep_config: $HOME/hello_looper-master/project/project_config.yaml -output_dir: "$HOME/hello_looper-master/output" -pipeline_interfaces: - sample: ["$HOME/hello_looper-master/pipeline/pipeline_interface"] - project: "some/project/pipeline" -``` - -In addition, looper>=1.5.0 supports projects from [PEPhub](https://pephub.databio.org/). -Using a PEP from PEPhub allows a user to run a pipeline without downloading the PEP. This allows you to keep the sample table in a centralized, shared location. You need only specify all necessary -environment variables used by the PEP. - -Example looper config file using PEPhub project: - -```yaml -pep_config: pephub::databio/looper:default -output_dir: "$HOME/hello_looper-master/output" -pipeline_interfaces: - sample: ["$HOME/hello_looper-master/pipeline/pipeline_interface"] - project: "$HOME/hello_looper-master/project/pipeline" -``` - -Where: -- `output_dir` is pipeline output directory, where results will be saved. -- `pep_config` is a local config file or PEPhub registry path. (registry path should be specified in one -one of supported ways: `namespace/name`, `pephub::namespace/name`, `namespace/name:tag`, or `pephub::namespace/name:tag`) -- `pipeline interfaces` is a local path to project or sample pipelines. - -To run pipeline, go to the directory of .looper.config and execute command in your terminal: -`looper run --looper-config {looper_config_path}` or `looper runp --looper-config {looper_config_path}`. diff --git a/docs/looper-report.md b/docs/looper-report.md deleted file mode 100644 index 6cd4a79ea..000000000 --- a/docs/looper-report.md +++ /dev/null @@ -1,13 +0,0 @@ -# Create a Browsable HTML Report - -Looper can create a browsable html report of all project results using the command: - -```terminal -looper report --looper-config .your_looper_config.yaml -``` - -Beginning in Looper 1.7.0, the ``--portable`` flag can be used to create a shareable, zipped version of the html report. - -An example html report out put can be found here: [PEPATAC Gold Summary](https://pepatac.databio.org/en/latest/files/examples/gold/gold_summary.html) - -Note: pipestat must be configured by looper to perform this operation. Please see the pipestat section for more information: [Using pipestat](pipestat.md) \ No newline at end of file diff --git a/docs/multiple-pipelines.md b/docs/multiple-pipelines.md deleted file mode 100644 index adc296006..000000000 --- a/docs/multiple-pipelines.md +++ /dev/null @@ -1,22 +0,0 @@ -# A project with multiple pipelines - -In earlier versions of looper (v < 1.0), we used a `protocol_mappings` section to map samples with different `protocol` attributes to different pipelines. In the current pipeline interface (looper v > 1.0), we eliminated the `protocol_mappings`, because this can now be handled using sample modifiers, simplifying the pipeline interface. Now, each pipeline has exactly 1 pipeline interface. You link to the pipeline interface with a sample attribute. If you want the same pipeline to run on all samples, it's as easy as using an `append` modifier like this: - -``` -sample_modifiers: - append: - pipeline_interfaces: "test.yaml" -``` - -But if you want to submit different samples to different pipelines, depending on a sample attribute, like `protocol`, you can use an implied attribute: - -``` -sample_modifiers: - imply: - - if: - protocol: [PRO-seq, pro-seq, GRO-seq, gro-seq] # OR - then: - pipeline_interfaces: ["peppro.yaml"] -``` - -This approach uses only functionality of PEPs to handle the connection to pipelines as sample attributes, which provides full control and power using the familiar sample modifiers. It completely eliminates the need for re-inventing this complexity within looper, which eliminated the protocol mapping section to simplify the looper pipeline interface files. You can read more about the rationale of this change in [issue 244](https://github.com/pepkit/looper/issues/244#issuecomment-611154594). diff --git a/docs/parameterizing-pipelines.md b/docs/parameterizing-pipelines.md deleted file mode 100644 index e1c6f3a62..000000000 --- a/docs/parameterizing-pipelines.md +++ /dev/null @@ -1,69 +0,0 @@ -# How to pass extra command-line arguments - -Occasionally, a particular project needs to run a particular flavor of a pipeline. How can you adjust pipeline arguments for just this project? You can use looper *command extras* to solve this problem. Command extras let you pass any string on to the pipeline, which will be appended to the command. - -There are 2 ways to use command extras: for sample pipelines, or for project pipelines: - -## 1. Sample pipeline command extras - -### Adding sample command extras via sample attributes - -Looper uses a reserved sample attribute called `command_extras`, which you can set using general PEP sample modifiers however you wish. For example, if your extras are the same for all samples you could use an `append` modifier: - - -```yaml -sample_modifiers: - append: - command_extra: "--flavor-flag" -``` - -This will add `--flavor-flag` the end of the command looper constructs. If you need to modulate the extras depending on another attribute value, you could use an imply modifier: - -```yaml -sample_modifiers: - imply: - - if: - protocol: "rrbs" - then: - command_extra: "-C flavor.yaml --epilog" -``` - -### Adding sample command extras via the command line - -You can also pass extra arguments using `--command-extra` like this: - -```bash -looper run --looper-config .looper.yaml --command-extra="--flavor-flag" -``` - -## 2. Project pipeline command extras - -For *project pipelines*, you can specify command extras in the `looper` section of the PEP config: - -```yaml -looper: - output_dir: "/path/to/output_dir" - cli: - runp: - command-extra: "--flavor" -``` - -or as an argument to the `looper runp` command: - - -```bash -looper runp --looper-config .looper.yaml --command-extra="--flavor-flag" -``` - - -## Overriding PEP-based command extras - -By default, the CLI extras are *appended to the command_extra specified in your PEP*. If you instead want to *override* the command extras listed in the PEP, you can instead use `--command-extra-override`. - -So, for example, make your looper call like this: - -```bash -looper run --command-extra-override="-R" -``` - -That will remove any defined command extras and append `-R` to the end of any commands created by looper. diff --git a/docs/pipeline-interface-specification.md b/docs/pipeline-interface-specification.md deleted file mode 100644 index 8a0a01732..000000000 --- a/docs/pipeline-interface-specification.md +++ /dev/null @@ -1,222 +0,0 @@ ---- -title: Pipeline interface specification ---- - -

Pipeline interface specification

- -Table of contents: - -[TOC] - -## Introduction - -In order to run an arbitrary pipeline, we require a formal specification for how the pipeline is to be used. We define this using a *pipeline interface* file. It maps attributes of a PEP project or sample to the pipeline CLI arguments. Thus, it defines the interface between the project metadata (the PEP) and the pipeline itself. - -If you're using *existing* `looper`-compatible pipelines, you don't need to create a new interface; just point your project at the one that comes with the pipeline. When creating *new* `looper`-compatible pipelines, you'll need to create a new pipeline interface file. - -Pipeline interfaces are defined in the looper config file (e.g. `.looper.yaml`): - -```yaml -pep_config: ./project/project_config.yaml # pephub registry path or local path -output_dir: ./results -pipeline_interfaces: - sample: ./pipeline_pipestat/pipeline_interface.yaml -pipestat: - results_file_path: results.yaml - -``` - - -## Overview of pipeline interface components - -A pipeline interface may contain the following keys: - -- `pipeline_name` (REQUIRED) - A string identifying the pipeline, -- `pipeline_type` (REQUIRED) - A string indicating a pipeline type: "sample" (for `run`) or "project" (for `runp`), -- `command_template` (REQUIRED) - A [Jinja2](https://jinja.palletsprojects.com/en/2.11.x/) template used to construct a pipeline command to run. -- `linked_pipeline_interfaces` (OPTIONAL) - A collection of paths to sample pipeline interfaces related to this pipeline interface (used only in project pipeline interfaces for `looper report` purposes). -- `input_schema` (RECOMMENDED) - A [PEP Schema](http://eido.databio.org) formally defining *required inputs* for the pipeline -- `schema_path` (RECOMMENDED| REQUIRED FOR PIPESTAT) - A schema describing the *outputs* of the pipeline. -- `compute` (RECOMMENDED) - Settings for computing resources -- `var_templates` (RECOMMENDED) - A mapping of [Jinja2](https://jinja.palletsprojects.com/en/2.11.x/) templates and corresponding names, typically used to encode submission-specific paths that can be submission-specific -- `pre_submit` (OPTIONAL) - A mapping that defines the pre-submission tasks to be executed - -The pipeline interface should define either a sample pipeline or a project pipeline. Here's a simple example: - -```yaml -pipeline_name: RRBS -pipeline_type: sample -var_templates: - pipeline: "{looper.piface_dir}/pipeline1.py" - sample_info: "{looper.piface_dir}/{sample.name}/info.txt" -input_schema: path/to/rrbs_schema.yaml -command_template: {pipeline.var_templates.pipeline} --input {sample.data_path} --info {pipeline.sample_info.path} -``` - -Pretty simple. The `pipeline_name` is arbitrary. It's used for messaging and identification. Ideally, it's unique to each pipeline. In this example, we define a single sample-level pipeline. - -## Details of pipeline interface components - -### pipeline_name - -The pipeline name is arbitrary. It should be unique for each pipeline. Looper uses it for a few things: - -1. to construct the `job_name` variable (accessible via `{ looper.job_name }`). See [variable namespaces](variable-namespaces.md) for more details. - -2. to check for flags. For pipelines that produce flags, looper will be aware of them and not re-submit running jobs. - -### pipeline_type - -Looper can run 2 kinds of pipeline: *sample pipelines* run once per sample; *project pipelines* run once per project. The type of pipeline must be specified in the pipeline interface as `pipeline_type: sample` or `pipeline_type: project`. - -### command_template - -The command template is the most critical part of the pipeline interface. It is a [Jinja2](https://jinja.palletsprojects.com/) template for the command to run for each sample. Within the `command_template`, you have access to variables from several sources. These variables are divided into namespaces depending on the variable source. You can access the values of these variables in the command template using the single-brace jinja2 template language syntax: `{namespace.variable}`. For example, looper automatically creates a variable called `job_name`, which you may want to pass as an argument to your pipeline. You can access this variable with `{looper.job_name}`. The available namespaces are described in detail in [looper variable namespaces](variable-namespaces.md). - -Because it's based on Jinja2, command templates are extremely flexible. For example, optional arguments can be accommodated using Jinja2 syntax, like this: - -``` -command_template: > - {pipeline.path} - --sample-name {sample.sample_name} - --genome {sample.genome} - --input {sample.read1} - --single-or-paired {sample.read_type} - {% if sample.read2 is defined %} --input2 {sample.read2} {% endif %} - {% if sample.peak_caller is defined %} --peak-caller {sample.peak_caller} {% endif %} - {% if sample.FRIP_ref is defined %} --frip-ref-peaks {sample.FRIP_ref} {% endif %} -``` - -Arguments wrapped in Jinja2 conditionals will only be added *if the specified attribute exists for the sample*. - -### input_schema - -The input schema formally specifies the *input processed by this pipeline*. The input schema serves 2 related purposes: - -1. **Validation**. Looper uses the input schema to ensure that the project fulfills all pipeline requirements before submitting any jobs. Looper uses the PEP validation tool, [eido](http://eido.databio.org), to validate input data by ensuring that input samples have the attributes and input files required by the pipeline. Looper will only submit a sample pipeline if the sample validates against the pipeline's input schema. - -2. **Description**. The input schema is also useful to describe the inputs, including both required and optional inputs, thereby providing a standard way to describe a pipeline's inputs. In the schema, the pipeline author can describe exactly what the inputs mean, making it easier for users to learn how to structure a project for the pipeline. - -Details for how to write a schema in [writing a schema](http://eido.databio.org/en/latest/writing-a-schema/). The input schema format is an extended [PEP JSON-schema validation framework](http://pep.databio.org/en/latest/howto_validate/), which adds several capabilities, including - -- `required` (optional): A list of sample attributes (columns in the sample table) that **must be defined** -- `required_files` (optional): A list of sample attributes that point to **input files that must exist**. -- `files` (optional): A list of sample attributes that point to input files that are not necessarily required, but if they exist, should be counted in the total size calculation for requesting resources. - -If no `input_schema` is included in the pipeline interface, looper will not be able to validate the samples and will simply submit each job without validation. - -### output_schema - -The output schema formally specifies the *output produced by this pipeline*. It is used by downstream tools to that need to be aware of the products of the pipeline for further visualization or analysis. Beginning with Looper 1.6.0 and Pipestat 0.6.0, the output schema is a JSON-schema: [pipestat schema specification](http://pipestat.databio.org/en/latest/pipestat_specification/#pipestat-schema). - -Here is an example output schema: - -```yaml -title: An example output schema -description: An example description -type: object -properties: - pipeline_name: "default_pipeline_name" - samples: - type: object - properties: - number_of_things: - type: integer - description: "Number of things" - percentage_of_things: - type: number - description: "Percentage of things" - name_of_something: - type: string - description: "Name of something" - switch_value: - type: boolean - description: "Is the switch on or off" - output_file: - $ref: "#/$defs/file" - description: "This a path to the output file" - output_image: - $ref: "#/$defs/image" - description: "This a path to the output image" - md5sum: - type: string - description: "MD5SUM of an object" - highlight: true -$defs: - image: - type: object - object_type: image - properties: - path: - type: string - thumbnail_path: - type: string - title: - type: string - required: - - path - - thumbnail_path - - title - file: - type: object - object_type: file - properties: - path: - type: string - title: - type: string - required: - - path - - title -``` -Looper uses the output schema in its `report` function, which produces a browsable HTML report summarizing the pipeline results. The output schema provides the relative locations to sample-level and project-level outputs produced by the pipeline, which looper can then integrate into the output results. If the output schema is not included, the `looper report` will be unable to locate and integrate the files produced by the pipeline and will therefore be limited to simple statistics. - -### compute - -The compute section of the pipeline interface provides a way to set compute settings at the pipeline level. These variables can then be accessed in the command template. They can also be overridden by values in the PEP config, or on the command line. See the [looper variable namespaces](variable-namespaces.md) for details. - -There is one reserved attribute under `compute` with specialized behavior -- `size_dependent_variables` which we'll now describe in detail. - -#### size_dependent_variables - -The `size_dependent_variables` section lets you specify variables with values that are modulated based on the total input file size for the run. This is typically used to add variables for memory, CPU, and clock time to request, if they depend on the input file size. Specify variables by providing a relative path to a `.tsv` file that defines the variables as columns, with input sizes as rows. - -The pipeline interface simply points to a `tsv` file: - -```yaml -pipeline_type: sample -var_templates: - pipeline: {looper.piface_dir}/pepatac.py -command_template: > - {pipeline.var_templates.pipeline} ... -compute: - size_dependent_variables: resources-sample.tsv -``` - -The `resources-sample.tsv` file consists of a file with at least 1 column called `max_file_size`. Add any other columns you wish, each one will represent a new attribute added to the `compute` namespace and available for use in your command template. Here's an example: - -```tsv -max_file_size cores mem time -0.001 1 8000 00-04:00:00 -0.05 2 12000 00-08:00:00 -0.5 4 16000 00-12:00:00 -1 8 16000 00-24:00:00 -10 16 32000 02-00:00:00 -NaN 32 32000 04-00:00:00 -``` - -This example will add 3 variableS: `cores`, `mem`, and `time`, which can be accessed via `{compute.cores}`, `{compute.mem}`, and `{compute.time}`. Each row defines a "packages" of variable values. Think of it like a group of steps of increasing size. For a given job, looper calculates the total size of the input files (which are defined in the `input_schema`). Using this value, looper then selects the best-fit row by iterating over the rows until the calculated input file size does not exceed the `max_file_size` value in the row. This selects the largest resource package whose `max_file_size` attribute does not exceed the size of the input file. Max file sizes are specified in GB, so `5` means 5 GB. - -This final line in the resources `tsv` must include `NaN` in the `max_file_size` column, which serves as a catch-all for files larger than the largest specified file size. Add as many resource sets as you want. - -#### var_templates - -This section can consist of multiple variable templates that are rendered and can be reused. The namespaces available to the templates are listed in [variable namespaces](variable-namespaces.md) section. Please note that the variables defined here (even if they are paths) are arbitrary and are *not* subject to be made relative. Therefore, the pipeline interface author needs take care of making them portable (the `{looper.piface_dir}` value comes in handy!). - -#### pre_submit - -This section can consist of two subsections: `python_funcions` and/or `command_templates`, which specify the pre-submission tasks to be run before the main pipeline command is submitted. Please refer to the [pre-submission hooks system](pre-submission-hooks.md) section for a detailed explanation of this feature and syntax. - -## Validating a pipeline interface - -A pipeline interface can be validated using JSON Schema against [schema.databio.org/pipelines/pipeline_interface.yaml](http://schema.databio.org/pipelines/pipeline_interface.yaml). Looper automatically validates pipeline interfaces at submission initialization stage. diff --git a/docs/pipeline-tiers.md b/docs/pipeline-tiers.md deleted file mode 100644 index 13c2593b6..000000000 --- a/docs/pipeline-tiers.md +++ /dev/null @@ -1,19 +0,0 @@ -# The concept of two-tiered pipelines - -In our experience, we are typically interested in running two different types of commands: Those that operate on each sample independently, and those that operate on all samples simultaneously. Since sample-independent pipelines can be easily parallelized by sample, we distinguish these. - -Looper divides pipelines into two types: *sample* pipelines and *project* pipelines. - -This philosophy is conceptually similar to the [MapReduce](https://en.wikipedia.org/wiki/MapReduce) programming model, which applies a *split-apply-combine* strategy. In the case of running pipelines on sample-intensive research projects, we *split* the project into samples and *apply* the first tier of processing (the *sample* pipeline). We then *combine* the results in the second tier of processing (the *project* pipeline). - -Looper doesn't require you to use this two-stage system, but it simply makes it easy to do so. Many pipelines operate only at the sample level and leave the downstream cross-sample analysis to the user. - -## Sample pipelines - -The typical use case is sample-level pipelines. These are run with `looper run`. Pipeline interface defining a sample pipeline must to include `pipeline_type: "sample"` statement. - -## Project pipelines - -Project pipelines, identified by `pipeline_type: "project"` statement in the pipeline interface, will be run with `looper runp` (where the *p* stands for *project*). Running a project pipeline operates in almost exactly the same way as the sample pipeline, with 2 key exceptions: First, instead of creating a separate command for every sample, the `looper runp` will only create a single command per pipeline for the project. And second, the command template itself will not have access to a `sample` namespace representing a particular sample, since it's not running on a particular sample; instead, it will have access to a `samples` (plural) namespace, which contains all the attributes from all the samples. - -In a typical workflow, a user will first run the samples individually using `looper run`, and then, if the pipeline provides one, will run the project component using `looper runp` to summarize or aggregate the results into a project-level output. diff --git a/docs/pipestat.md b/docs/pipestat.md deleted file mode 100644 index d7ced7ef3..000000000 --- a/docs/pipestat.md +++ /dev/null @@ -1,175 +0,0 @@ -# Pipestat - -Starting with version 1.4.0, looper supports additional functionality for [pipestat](http://pipestat.databio.org/)-compatible pipelines. Pipestat-compatible pipelines will allow you to use looper to do 2 things: - -1. monitor the status of pipeline runs -2. summarize the results of pipelines - -For non-pipestat-compatible pipelines, you can still use looper to run pipelines, but you won't be able to use `looper report` or `looper check` to manage their output. - -## Pipestat configuration overview -Starting with version 1.6.0 configuring looper to work with pipestat has changed. - -Now, Looper will obtain pipestat configurations data from two sources: -1. pipeline interface -2. looper_config file - -Looper will combine the necessary configuration data and write a new pipestat configuration file named `looper_pipestat_config.yaml` which looper will place in its output directory. Pipestat then uses this configuration file to create the required PipestatManager objects. See [Hello_Looper](https://github.com/pepkit/hello_looper) for a specific example. - -Briefly, the Looper config file must contain a pipestat field. A project name must be supplied if running a project level pipeline. The user must also supply a file path for a results file if using a local file backend or database credentials if using a postgresql database backend. - -```yaml -pep_config: project_config_pipestat.yaml # pephub registry path or local path -output_dir: output -sample_table: annotation_sheet.csv -pipeline_interfaces: - sample: ./pipeline_interface1_sample_pipestat.yaml - project: ./pipeline_interface1_project_pipestat.yaml -pipestat: - project_name: TEST_PROJECT_NAME - results_file_path: tmp_pipestat_results.yaml - flag_file_dir: output/results_pipeline - database: - dialect: postgresql - driver: psycopg2 - name: pipestat-test - user: postgres - password: pipestat-password - host: 127.0.0.1 - port: 5432 -``` -And the pipeline interface must include information required by pipestat such as pipeline_name, pipeline_type, and an output schema path: -```yaml -pipeline_name: example_pipestat_pipeline -pipeline_type: sample -output_schema: pipeline_pipestat/pipestat_output_schema.yaml -command_template: > - python {looper.piface_dir}/count_lines.py {sample.file} {sample.sample_name} {pipestat.results_file} - -``` - - - - -### Pipestat Configuration for Looper Versions 1.4.0-1.5.0 -Note: The instructions below are for older versions of Looper. - -Generally, pipestat configuration comes from 3 sources, with the following priority: - -1. `PipestatManager` constructor -2. Pipestat configuration file -3. Environment variables - -In looper, only 1 and 2 are available, and can be specified via the project or sample attributes. Pipestat environment variables are *intentionally not supported* to ensure looper runs are reproducible -- otherwise, jobs configured in one computing environment could lead to totally different configuration and errors in other environments. - -## Usage - -The `PipestatManager` constructor attributes mentioned in the previous section are sourced from either sample attributes (for `looper run`) or project attributes ( for`looper runp`). One of the attributes can be used to specify the [pipestat configuration file](http://pipestat.databio.org/en/latest/config/), which is the other way of configuring pipestat. - -The *names* of the attributes can be adjusted in the PEP configuration file. Let's take a pipestat namespace as an example: by default the value for the namespace is taken from `Sample.sample_name` but can be changed with `looper.pipestat.sample.namespace_attribute` in the PEP configuration file, like so: - -```yaml -looper: - pipestat: - sample: - namespace_attribute: custom_attribute -``` - -Now the value for the pipestat namespace will be sourced from `Sample.custom_attribute` rather than `Sample.sample_name`. - -Similarly, a project-level pipestat namespace can be configured with `looper.pipestat.project.namespace_attribute`: - -```yaml -looper: - pipestat: - project: - namespace_attribute: custom_attribute -``` - -Now the value for the pipestat namespace will be sourced from `Project.custom_attribute` rather than `Project.name`. - -Naturally, this configuration procedure can be applied to other pipestat options. The only exception is pipestat results schema, which is never specified here, since it's sourced from the `output_schema` attribute of the pipeline interface. - -```yaml -looper: - pipestat: - sample: - results_file_attribute: pipestat_results_file - config_attribute: pipestat_config - namespace_attribute: sample_name - project: - results_file_attribute: pipestat_results_file - config_attribute: pipestat_config - namespace_attribute: name -``` - -Again, the values above are defaults -- not needed, but configurable. - -## Examples - -To make the pipestat configuration rules more clear let's consider the following pipestat configuration setups. - -### **Example 1:** All configuration as sample attributes - -In this case the pipestat configuration options are sourced only from the sample attributes. Namely, `pipestat_results_file` and `custom_namespace`. - -#### PEP config - -```yaml -pep_version: 2.0.0 -sample_table: sample_table.csv -sample_modifiers: - append: - pipestat_results_file: $HOME/my_results_file.yaml - derive: - attributes: [custom_namespace] - sources: - namespace: "{sample_name}_pipelineX" -looper: - pipestat: - sample: - namespace_attribute: "custom_namespace" -``` - -#### PEP sample table (`sample_table.csv`) - -```csv -sample_name,custom_namespace -sample1,namespace -``` - -### **Example 2:** A mix of pipestat configuration sources - -In this case the pipestat configuration options are sourced from both sample attributes and pipestat configuration file. - -Looper sourced the value for pipestat namespace from `Sample.sample_name` and database login credentials from the pipestat configuration file. - -#### PEP config - -```yaml -pep_version: 2.0.0 -sample_table: sample_table.csv -sample_modifiers: - append: - pipestat_config: pipestat_config.yaml -``` - -#### PEP sample table (`sample_table.csv`) - -```csv -sample_name -sample1 -``` - -#### Pipestat configuration file (`pipestat_config.yaml`) - -```yaml -database: - name: database_name - user: user_name - password: user_password - host: localhost - port: 5432 - dialect: postgresql - driver: psycopg2 -``` diff --git a/docs/pre-submission-hooks.md b/docs/pre-submission-hooks.md deleted file mode 100644 index d0628a769..000000000 --- a/docs/pre-submission-hooks.md +++ /dev/null @@ -1,282 +0,0 @@ -# Pre-submission hooks - -## Purpose - -Sometimes there is a need to perform some job/submission related tasks *before* the main pipeline submission. For example, we may need to generate a particular representation of the sample metadata to be consumed by a pipeline run. Some of these pre-submission tasks may depend on the information outside of the sample, such as the compute settings. For this purpose looper provides **pre-submission hooks**, which allows users to run arbitrary shell commands or Python functions before submitting the actual pipeline. These hooks have access to all of the job submission settings looper uses to populate the primary command template. They can be used in two ways: 1) to simply run required tasks, producing required output before the pipeline is run; and 2) to modify the job submission settings, which can then be used in the actual submission template. - - -## How to specify pre-submission tasks in the pipeline interface - -The pre-submission tasks to be executed are listed in the [pipeline interface](pipeline-interface-specification.md) file under the top-level `pre_submit` key. The `pre_submit` section is divided into two subsections corresponding to two types of hooks: `python_functions` and `command_templates`. The `python_functions` key specifies a list of strings corresponding to Python functions to run. The `command_templates` key is more generic, specifying shell command templates to be executed in a subprocess. Here is an example: - -```yaml -pre_submit: - python_functions: - - "package_name.function_name" - - "package_name1.function_name" - command_templates: - - "tool.sh --param {sample.attribute}" - - "tool1.sh --param {sample.attribute1}" -``` - -Because the looper variables are the input to each task, and are also potentially modified by each task, the order of execution is critical. Execution order follows two rules: First, `python_functions` are *always* executed before `command_templates`; and second, the user-specified order in the pipeline interface is preserved within each subsection. - -## Built-in pre-submission functions - -Looper ships with several included plugins that you can use as pre-submission functions without installing additional software. These plugins produce various representations of the sample metadata, which can be useful for different types of pipelines. The included plugins are described below: - - -### Included plugin: `looper.write_sample_yaml` - -Saves all sample metadata as a YAML file. The output file path can be customized using `var_templates.sample_yaml_path`. If this parameter is not provided, the file will be saved as `{looper.output_dir}/submission/{sample.sample_name}_sample.yaml`. - -**Parameters:** - - - `pipeline.var_templates.sample_yaml_path` (optional): absolute path to file where YAML is to be stored. - -**Usage:** - -```yaml -pipeline_type: sample -var_templates: - main: "{looper.piface_dir}/pipelines/pipeline1.py" - sample_yaml_path: "{looper.output_dir}/custom_sample_yamls" -pre_submit: - python_functions: - - looper.write_sample_yaml -command_template: > - {pipeline.var_templates.main} {sample.sample_yaml_path} ... -``` - -### Included plugin: `looper.write_sample_yaml_cwl` - -This plugin writes a sample yaml file compatible as a job input file for a CWL pipeline. This plugin allows looper to be used as a scatterer to run an independent CWL workflow for each sample in your PEP sample table. You can parametrize the plugin with a custom output file name using `sample_yaml_cwl_path`. If the parameter is not provided, the file will be saved in `{looper.output_dir}/submission/{sample.sample_name}_sample_cwl.yaml`. - -**Parameters:** - - - `pipeline.var_templates.sample_yaml_path` (optional): absolute path to file where YAML is to be stored. - -**Usage:** - -```yaml -pipeline_type: sample -var_templates: - main: "{looper.piface_dir}/pipelines/pipeline1.py" - sample_yaml_cwl_path: "{looper.output_dir}/custom_sample_yamls/custom_{sample.name}.yaml" -pre_submit: - python_functions: - - looper.write_sample_yaml_cwl -command_template: > - {pipeline.var_templates.main} {sample.sample_yaml_cwl} ... -``` - - -### Included plugin: `looper.write_sample_yaml_prj` - -Saves the sample to YAML file with project reference. This plugin can be parametrized with a custom YAML directory (see "parameters" below). If the parameter is not provided, the file will be saved in `{looper.output_dir}/submission/{sample.sample_name}_sample_prj.yaml`. - -**Parameters:** - - - `pipeline.var_templates.sample_yaml_prj_path` (optional): absolute path to file where YAML is to be stored. - -**Usage:** - -```yaml -pipeline_type: sample -var_templates: - main: "{looper.piface_dir}/pipelines/pipeline1.py" - sample_yaml_prj_path: "{looper.output_dir}/custom_sample_yamls" -pre_submit: - python_functions: - - looper.write_sample_yaml_prj -command_template: > - {pipeline.var_templates.main} ... -``` - -### Included plugin: `looper.write_submission_yaml` - -Saves all five namespaces of pre-submission to YAML file. This plugin can be parametrized with a custom YAML directory (see "parameters" below). If the parameter is not provided, the file will be saved in `{looper.output_dir}/submission/{sample.sample_name}_submission.yaml`. - -**Parameters:** - - - `pipeline.var_templates.submission_yaml_path` (optional): a complete and absolute path to the *directory* where submission YAML representation is to be stored. - -**Example usage:** - -```yaml -pipeline_type: sample -var_templates: - main: "{looper.piface_dir}/pipelines/pipeline1.py" - submission_yaml_path: "{looper.output_dir}/custom_path" -pre_submit: - python_functions: - - looper.write_submission_yaml -command_template: > - {pipeline.var_templates.main} ... -``` - -### Included plugin: `looper.write_custom_template` - -Populates an independent jinja template with values from all the available looper namespaces. - -**Parameters:** -- `pipeline.var_templates.custom_template` (required): a jinja template to be populated for each job. -- `pipeline.var_templates.custom_template_output` (optional): path to which the populated template file will be saved. If not provided, the populated fill will be saved in `{looper.output_dir}/submission/{sample.sample_name}_config.yaml - -**Example usage:** - -```yaml -pipeline_type: sample -var_templates: - custom_template: custom_template.jinja - custom_template_output: "{looper.output_dir}/submission/{sample.sample_name}_custom_config.yaml" -pre_submit: - python_functions: - - looper.write_custom_template -command_template: > - {pipeline.var_templates.main} ... -``` - - -## Writing your own pre-submission hooks - -Pre-submission tasks can be written as a Python function or a shell commands. We will explain each type below: - -### Python functions - -Python plugin functions have access *all of the metadata variables looper has access to to construct the primary command template*. The Python function must obey the following rules: - -1. The Python function *must* take as input a `namespaces` object, which is a Python [`dict`](https://docs.python.org/3/tutorial/datastructures.html#dictionaries) of [looper variable namespaces](variable-namespaces.md). - -2. The function *should* return any updated namespace variables; or can potentially return an empty `dict` (`{}`) if no changes are intended, which may the case if the function is only used for its side effect. - -#### Custom function input parameters - -How can you parameterize your plugin function? Since the function will have access to all the looper variable namespaces, this means that plugin authors may require users to specify any attributes within any namespace to parametrize them. For example, a plugin that increases the compute wall time by an arbitrary amount of time may require `extra_time` attribute in the `pipeline` namespace. Users would specify this parameter like this: - -```{yaml} -pipeline_name: my_pipeline -pipeline_type: sample -extra_time: 3 -``` - -This variable would be accessible in your python function as `namespaces["pipeline"]["extra_time"]`. This works, but we recommend keeping things clean by putting all required pipeline parameters into the [`pipeline.template_vars`](pipeline-interface-specification.md#var_templates) section. This not only keeps things tidy in a particular section, but also adds additional functionality of making these templates that can themselves refer to namespace variables, which can be very convenient. For example, a better approach would be: - -```{yaml} -pipeline_name: my_pipeline -pipeline_type: sample -var_templates: - extra_time: 3 - plugin_path: "{looper.piface_dir}/plugin_results" -``` - -In this example you'd use `namespaces["pipeline"]["var_templates"]["extra_time"]` to access the user-provided parameter. Notice we included another example, `plugin_path`, which can refer to the `{looper.piface_dir}` variable. Because this variable is included under `var_templates`, it will be populated with any namespace variables. - -The plugins need to handle incomplete parametrization, either by providing defaults or by raising exceptions. - -#### Function output: updating submission metadata via return value - -One of the features of the pre-submission hooks is that they can be used to update the [looper variable namespaces](variable-namespaces.md) so that you can use modified variables in your primary command template. This is effectively a way for a plugin function to provide output that can be used by looper. The way this works is that after every successful pre-submission hook execution, the input namespaces are updated with the return value of the hook execution. Existing values are overwritten with the returned ones, whereas omitted values are not changed. Therefore, you must simply write your function to return any updated variables in the same format as in the input function. That is, your return value should be a Python [`dict`](https://docs.python.org/3/tutorial/datastructures.html#dictionaries) of [looper variable namespaces](variable-namespaces.md) - - -For example, given this input (which represents the looper variable namespaces): - -Input: -```yaml -sample: - name: test - size: 30 - genome: hg38 -looper: - log_file: /home/michal/my_log.txt - job_name: test_pepatac -compute: - submission_template: /home/michal/divvy_templates/localhost_template.sub - submission_command: sh -... -``` - -Say your function returned this data: -```yaml -sample: - size: 1000 -looper: - log_file: /home/michal/Desktop/new_log.txt -``` - -Then looper would have this object available for populating the primary command template (input + returned data): -```yaml -sample: - name: test - size: 1000 - genome: hg38 -looper: - log_file: /home/michal/Desktop/new_log.txt - job_name: test_pepatac -compute: - submission_template: /home/michal/divvy_templates/localhost_template.sub - submission_command: sh -... -``` - -### Shell command plugins - -In case you need more flexibility than a Python function, you can also execute arbitrary commands as a pre-submission task. You define exactly what command you want to run, like this: - -```yaml -var_templates: - compute_script: "{looper.piface_dir}/hooks/script.py" -pre_submit: - command_templates: - - "{pipeline.var_templates.compute_script} --genome {sample.genome} --log-file {looper.output_dir}/log.txt" -``` - -This `command_templates` section specifies a list with one or more entries. Each entry specifies a command. The commands are themselves templates, just like the primary `command_template`, so you have access to the looper variable namespaces to put together the appropriate command. In fact, really, the other difference between these `pre_submit.command_templates` and the primary `command_template` is that the final one has access to the changes introduce in the variables by the `pre_submit` commands. The inputs to the script are completely user-defined -- you choose what information and how you want to pass it to your script. - -**Output:** The output of your command should be a JSON-formatted string (`str`), that is processed with [json.loads](https://docs.python.org/3/library/json.html#json.loads) and [subprocess.check_output](https://docs.python.org/3/library/subprocess.html#subprocess.check_output) as follows: `json.loads(subprocess.check_output(str))`. This JSON object will be used to update the looper variable namespaces. - -#### Example: Dynamic compute parameters - -In the `compute` section of the pipeline interface, looper allows you to specify a `size_dependent_variables` section, which lets you specify variables with values that are modulated based on the total input file size for the run. This is typically used to add variables for memory, CPU, and clock time to request, if they depend on the input file size. This a good example of modulating computing variables based on file size, but it is not flexible enough to allow modulated compute variables on the basis of other sample attributes. For a more flexible version, you can use a pre-submission hook. - -The `pre_submit.command_templates` specifies a list of Jinja2 templates to construct a system command run in a subprocess. This command template has available all of the namespaces in the primary command template. The command should return a JSON object, which is then used to populate the namespaces. This allows you to specify computing variables that depend on any attributes of a project, sample, or pipeline, which can be used for ultimate flexibility in computing. - -**Usage**: - -```yaml -pipeline_type: sample -var_templates: - pipeline_path: "{looper.piface_dir}/pipelines/pepatac.py" - compute_script: "{looper.piface_dir}/hooks/script.py" -pre_submit: - command_templates: - - "{pipeline.var_templates.compute_script} --genome {sample.genome} --log-file {looper.output_dir}/log.txt" -command_template: > - {pipeline.var_templates.pipeline_path} ... -``` - -**Script example:** - -```python -#!/usr/bin/env python3 - -import json -from argparse import ArgumentParser - -parser = ArgumentParser(description="Test script") - -parser.add_argument("-s", "--sample-size", help="Sample size", required=False) -parser.add_argument("-g", "--genome", type=str, help="Genome", required=True) -parser.add_argument("-m", "--log-file", type=str, help="Log file path", required=True) -parser.add_argument("-c", "--custom-cores", type=str, help="Force number of cores to use", required=False) -args = parser.parse_args() - -y = json.dumps({ - "cores": args.custom_cores or "4", - "mem": "10000" if args.genome == "hg38" else "20000", - "time": "00-11:00:00", - "logfile": args.log_file -}) - -print(y) -``` diff --git a/docs/running-a-pipeline.md b/docs/running-a-pipeline.md deleted file mode 100644 index c6aad0f72..000000000 --- a/docs/running-a-pipeline.md +++ /dev/null @@ -1,19 +0,0 @@ -# How to run a pipeline - -You first have to [define your project](defining-a-project.md) and a [config file](looper-config.md). This will give you a PEP linked to a pipeline. Next, we'll run the pipeline. - -The basic command is `looper run`. To run your pipeline, just: - -```console -looper run --looper-config .your_looper_config.yaml -``` - -This will submit a job for each sample. That's basically all there is to it; after this, there's a lot of powerful options and tweaks you can do to control your jobs. Here we'll just mention a few of them. - -- **Dry runs**. You can use `-d, --dry-run` to create the job submission scripts, but not actually run them. This is really useful for testing that everything is set up correctly before you commit to submitting hundreds of jobs. -- **Limiting the number of jobs**. You can `-l, --limit` to test a few before running all samples. You can also use the `--selector-*` arguments to select certain samples to include or exclude. -- **Grouping jobs**. You can use `-u, --lump` or `-n, --lumpn` to group jobs. [More details on grouping jobs](grouping-jobs.md). -- **Changing compute settings**. You can use `-p, --package`, `-s, --settings`, or `-c, --compute` to change the compute templates. Read more in [running on a cluster](running-on-a-cluster.md). -- **Time delay**. You can stagger submissions to not overload a submission engine using `--time-delay`. -- **Use rerun to resubmit jobs**. To run only jobs that previously failed, try `looper rerun`. -- **Tweak the command on-the-fly**. The `--command-extra` arguments allow you to pass extra arguments to every command straight through from looper. See [parameterizing pipelines](parameterizing-pipelines.md). diff --git a/docs/running-on-a-cluster.md b/docs/running-on-a-cluster.md deleted file mode 100644 index 76fe54ae5..000000000 --- a/docs/running-on-a-cluster.md +++ /dev/null @@ -1,24 +0,0 @@ -# How to submit looper jobs to a cluster - -By default, `looper` will build a shell script for each sample and then run it sequentially on the local computer. This is convenient for simple cases, but when it comes time to scale up, this is where `looper` really excels. Looper uses a powerful [concentric template system](concentric-templates.md) that enables looper to run jobs on any cluster resource manager (like SLURM, SGE, LFS, etc.) by simply setting up a template for it. The environment templates are managed by [divvy](http://code.databio.org/divvy). - -## Overview and basic example of cluster computing - -To configure `looper` for cluster computing, you just configure divvy. Divvy is automatically installed when you install looper. Briefly, first create a `divvy` computing configuration file using `divvy init`: - -```bash -export DIVCFG="divvy_config.yaml" -divvy init -c $DIVCFG -``` - -Looper will now have access to your computing configuration. You can run `divvy list` to see what compute packages are available in this file. For example, you'll start with a package called 'slurm', which you can use with looper by calling `looper --package slurm`. For many systems (SLURM, SGE, LFS, etc), the default divvy configuration will work out of the box. If you need to tweak things, the template system is flexible and you can configure it to run in any compute environment. That's all there is to it. - -Complete details on how to configure divvy are described in the [divvy documentation](http://divvy.databio.org). - -## Divvy config file locations - -Looper will by default will look for the divvy configuration file in `$DIVCFG`, but you can override this by specifying a path to other file with `--divvy` argument, like this: - -```bash -looper --divvy /path/to/env_cfg.yaml ... -``` diff --git a/docs/sample-annotation-sheet.md b/docs/sample-annotation-sheet.md deleted file mode 100644 index 0c8a3884f..000000000 --- a/docs/sample-annotation-sheet.md +++ /dev/null @@ -1,47 +0,0 @@ -# Sample annotation sheet - -The *sample annotation sheet* is a csv file containing information about all samples in a project. -This should be regarded as static and a project's most important metadata. -**One row corresponds to one pipeline run** (if there's just one pipeline run per sample, there's 1:1 correspondence between rows and samples as well.) - -A sample annotation sheet may contain any number of columns you need for your project. -You can think of these columns as *sample attributes*, and you may use these columns later in your pipelines or analysis. -For example, you could define a column called `organism` and use the resulting attribute on a sample to adjust the assembly used by a pipeline through which it's run. - -## Special columns - -Certain keyword columns are required or provide `looper`-specific features. -Any additional columns become attributes of your sample and will be part of the project's metadata for the samples. -Mostly, you have control over any other column names you want to add, but there are a few reserved column names: - -- `sample_name` - a **unique** string1 identifying each sample. This is **required** for `Sample` construction, -but it's the *only required column*. -- `organism` - a string identifying the organism ("human", "mouse", "mixed"). ***Recommended** but not required*. -- `library` - While not needed to build a `Sample`, this column is required for submission of job(s). -It specifies the source of data for the sample (e.g. ATAC-seq, RNA-seq, RRBS). -`looper` uses this information to determine which pipelines are relevant for the `Sample`. -- `data_source` - This column is used by default to specify the location of the input data file. -Usually you want your annotation sheet to specify the locations of files corresponding to each sample. -You can use this to simplify pointing to file locations with a neat string-replacement method that keeps things clean and portable. -For more details, see the [derived columns page](derived-columns.md). -Really, you just need any column specifying at least 1 data file for input. This is **required** for `looper` to submit job(s) for a `Sample`. -- `toggle` - If the value of this column is not 1, `looper` will not submit the pipeline for that sample. -This enables you to submit a subset of samples. - -Here's an **example** annotation sheet: - -```CSV -sample_name, library, organism, flowcell, lane, BSF_name, data_source -"albt_0h", "RRBS", "albatross", "BSFX0190", "1", "albt_0h", "bsf_sample" -"albt_1h", "RRBS", "albatross", "BSFX0190", "1", "albt_1h", "bsf_sample" -"albt_2h", "RRBS", "albatross", "BSFX0190", "1", "albt_2h", "bsf_sample" -"albt_3h", "RRBS", "albatross", "BSFX0190", "1", "albt_3h", "bsf_sample" -"frog_0h", "RRBS", "frog", "", "", "", "frog_data" -"frog_1h", "RRBS", "frog", "", "", "", "frog_data" -"frog_2h", "RRBS", "frog", "", "", "", "frog_data" -"frog_3h", "RRBS", "frog", "", "", "", "frog_data" - -``` - -1 The sample name should contain no whitespace. If it does, an error will be thrown. -Similarly, `looper` will not allow any duplicate entries under sample_name. diff --git a/docs/support.md b/docs/support.md deleted file mode 100644 index f844c3557..000000000 --- a/docs/support.md +++ /dev/null @@ -1,5 +0,0 @@ -# Support - -Please use the [issue tracker at GitHub](https://github.com/pepkit/looper/issues) to file bug reports or feature requests. - -Looper supports Python 2.7 and Python 3, and has been tested in Linux. If you clone this repository and then an attempt at local installation, e.g. with `pip install --upgrade ./`, fails, this may be due to an issue with `setuptools` and `six`. A `FileNotFoundError` (Python 3) or an `IOError` (Python2), with a message/traceback about a nonexistent `METADATA` file means that this is even more likely the cause. To get around this, you can first manually `pip install --upgrade six` or `pip install six==1.11.0`, as upgrading from `six` from 1.10.0 to 1.11.0 resolves this issue, then retry the `looper` installation. diff --git a/docs/usage.md b/docs/usage.md deleted file mode 100644 index c8c58a5fe..000000000 --- a/docs/usage.md +++ /dev/null @@ -1,669 +0,0 @@ -# Usage reference - -Looper doesn't just run pipelines; it can also check and summarize the progress of your jobs, as well as remove all files created by them. - -Each task is controlled by one of the following commands: `run`, `rerun`, `runp` , `table`,`report`, `destroy`, `check`, `clean`, `inspect`, `init` - -- `looper run`: Runs pipelines for each sample, for each pipeline. This will use your `compute` settings to build and submit scripts to your specified compute environment, or run them sequentially on your local computer. - -- `looper runp`: Runs pipelines for each pipeline for project. - -- `looper rerun`: Exactly the same as `looper run`, but only runs jobs with a failed flag. - -- `looper report`: Summarize your project results in a form of browsable HTML pages. - -- `looper table`: This command parses all key-value results reported in the each sample `stats.tsv` and collates them into a large summary matrix, which it saves in the project output directory. This creates such a matrix for each pipeline type run on the project, and a combined master summary table - -- `looper check`: Checks the run progress of the current project. This will display a summary of job status; which pipelines are currently running on which samples, which have completed, which have failed, etc. - -- `looper destroy`: Deletes all output results for this project. - -- `looper inspect`: Display the Project or Sample information - -- `looper init`: Initialize a looper dotfile (`.looper.yaml`) in the current directory - - -Here you can see the command-line usage instructions for the main looper command and for each subcommand: -## `looper --help` -```console -usage: looper [-h] [-v] [--silent] [--verbosity VERBOSITY] [--logdev] - {run,rerun,runp,table,report,destroy,check,clean,init,init_piface,link,inspect} - ... - -Looper Pydantic Argument Parser - -commands: - {run,rerun,runp,table,report,destroy,check,clean,init,init_piface,link,inspect} - run Run or submit sample jobs. - rerun Resubmit sample jobs with failed flags. - runp Run or submit project jobs. - table Write summary stats table for project samples. - report Create browsable HTML report of project results. - destroy Remove output files of the project. - check Check flag status of current runs. - clean Run clean scripts of already processed jobs. - init Initialize looper config file. - init_piface Initialize generic pipeline interface. - link Create directory of symlinks for reported results. - inspect Print information about a project. - -optional arguments: - --silent Whether to silence logging (default: False) - --verbosity VERBOSITY - Alternate mode of expression for logging level that - better accords with intuition about how to convey - this. (default: None) - --logdev Whether to log in development mode; possibly among - other behavioral changes to logs handling, use a more - information-rich message format template. (default: - False) - -help: - -h, --help show this help message and exit - -v, --version show program's version number and exit -``` - -## `looper run --help` -```console -usage: looper run [-h] [-i] [-t TIME_DELAY] [-d] [-x COMMAND_EXTRA] - [-y COMMAND_EXTRA_OVERRIDE] [-u LUMP] [-n LUMP_N] - [-j LUMP_J] [--divvy DIVVY] [-f] [-c COMPUTE [COMPUTE ...]] - [--package PACKAGE] [--settings SETTINGS] - [--exc-flag EXC_FLAG [EXC_FLAG ...]] - [--sel-flag SEL_FLAG [SEL_FLAG ...]] [--sel-attr SEL_ATTR] - [--sel-incl SEL_INCL [SEL_INCL ...]] [--sel-excl SEL_EXCL] - [-l LIMIT] [-k SKIP] [--pep-config PEP_CONFIG] - [-o OUTPUT_DIR] [--config-file CONFIG_FILE] - [--looper-config LOOPER_CONFIG] - [-S SAMPLE_PIPELINE_INTERFACES [SAMPLE_PIPELINE_INTERFACES ...]] - [-P PROJECT_PIPELINE_INTERFACES [PROJECT_PIPELINE_INTERFACES ...]] - [--pipestat PIPESTAT] [--amend AMEND [AMEND ...]] - [--project] - -optional arguments: - -i, --ignore-flags Ignore run status flags (default: False) - -t TIME_DELAY, --time-delay TIME_DELAY - Time delay in seconds between job submissions (min: 0, - max: 30) (default: 0) - -d, --dry-run Don't actually submit jobs (default: False) - -x COMMAND_EXTRA, --command-extra COMMAND_EXTRA - String to append to every command (default: ) - -y COMMAND_EXTRA_OVERRIDE, --command-extra-override COMMAND_EXTRA_OVERRIDE - Same as command-extra, but overrides values in PEP - (default: ) - -u LUMP, --lump LUMP Total input file size (GB) to batch into one job - (default: None) - -n LUMP_N, --lump-n LUMP_N - Number of commands to batch into one job (default: - None) - -j LUMP_J, --lump-j LUMP_J - Lump samples into number of jobs. (default: None) - --divvy DIVVY Path to divvy configuration file. Default=$DIVCFG env - variable. Currently: not set (default: None) - -f, --skip-file-checks - Do not perform input file checks (default: False) - -c COMPUTE [COMPUTE ...], --compute COMPUTE [COMPUTE ...] - List of key-value pairs (k1=v1) (default: []) - --package PACKAGE Name of computing resource package to use (default: - None) - --settings SETTINGS Path to a YAML settings file with compute settings - (default: ) - --exc-flag EXC_FLAG [EXC_FLAG ...] - Sample exclusion flag (default: []) - --sel-flag SEL_FLAG [SEL_FLAG ...] - Sample selection flag (default: []) - --sel-attr SEL_ATTR Attribute for sample exclusion OR inclusion (default: - toggle) - --sel-incl SEL_INCL [SEL_INCL ...] - Include only samples with these values (default: []) - --sel-excl SEL_EXCL Exclude samples with these values (default: ) - -l LIMIT, --limit LIMIT - Limit to n samples (default: None) - -k SKIP, --skip SKIP Skip samples by numerical index (default: None) - --pep-config PEP_CONFIG - PEP configuration file (default: None) - -o OUTPUT_DIR, --output-dir OUTPUT_DIR - Output directory (default: None) - --config-file CONFIG_FILE - Project configuration file (default: None) - --looper-config LOOPER_CONFIG - Looper configuration file (YAML) (default: None) - -S SAMPLE_PIPELINE_INTERFACES [SAMPLE_PIPELINE_INTERFACES ...], --sample-pipeline-interfaces SAMPLE_PIPELINE_INTERFACES [SAMPLE_PIPELINE_INTERFACES ...] - Paths to looper sample pipeline interfaces (default: - []) - -P PROJECT_PIPELINE_INTERFACES [PROJECT_PIPELINE_INTERFACES ...], --project-pipeline-interfaces PROJECT_PIPELINE_INTERFACES [PROJECT_PIPELINE_INTERFACES ...] - Paths to looper project pipeline interfaces (default: - []) - --pipestat PIPESTAT Path to pipestat files. (default: None) - --amend AMEND [AMEND ...] - List of amendments to activate (default: []) - --project Is this command executed for project-level? (default: - False) - -help: - -h, --help show this help message and exit -``` - -## `looper runp --help` -```console -usage: looper runp [-h] [-i] [-t TIME_DELAY] [-d] [-x COMMAND_EXTRA] - [-y COMMAND_EXTRA_OVERRIDE] [-u LUMP] [-n LUMP_N] - [--divvy DIVVY] [-f] [-c COMPUTE [COMPUTE ...]] - [--package PACKAGE] [--settings SETTINGS] - [--exc-flag EXC_FLAG [EXC_FLAG ...]] - [--sel-flag SEL_FLAG [SEL_FLAG ...]] [--sel-attr SEL_ATTR] - [--sel-incl SEL_INCL [SEL_INCL ...]] [--sel-excl SEL_EXCL] - [-l LIMIT] [-k SKIP] [--pep-config PEP_CONFIG] - [-o OUTPUT_DIR] [--config-file CONFIG_FILE] - [--looper-config LOOPER_CONFIG] - [-S SAMPLE_PIPELINE_INTERFACES [SAMPLE_PIPELINE_INTERFACES ...]] - [-P PROJECT_PIPELINE_INTERFACES [PROJECT_PIPELINE_INTERFACES ...]] - [--pipestat PIPESTAT] [--amend AMEND [AMEND ...]] - [--project] - -optional arguments: - -i, --ignore-flags Ignore run status flags (default: False) - -t TIME_DELAY, --time-delay TIME_DELAY - Time delay in seconds between job submissions (min: 0, - max: 30) (default: 0) - -d, --dry-run Don't actually submit jobs (default: False) - -x COMMAND_EXTRA, --command-extra COMMAND_EXTRA - String to append to every command (default: ) - -y COMMAND_EXTRA_OVERRIDE, --command-extra-override COMMAND_EXTRA_OVERRIDE - Same as command-extra, but overrides values in PEP - (default: ) - -u LUMP, --lump LUMP Total input file size (GB) to batch into one job - (default: None) - -n LUMP_N, --lump-n LUMP_N - Number of commands to batch into one job (default: - None) - --divvy DIVVY Path to divvy configuration file. Default=$DIVCFG env - variable. Currently: not set (default: None) - -f, --skip-file-checks - Do not perform input file checks (default: False) - -c COMPUTE [COMPUTE ...], --compute COMPUTE [COMPUTE ...] - List of key-value pairs (k1=v1) (default: []) - --package PACKAGE Name of computing resource package to use (default: - None) - --settings SETTINGS Path to a YAML settings file with compute settings - (default: ) - --exc-flag EXC_FLAG [EXC_FLAG ...] - Sample exclusion flag (default: []) - --sel-flag SEL_FLAG [SEL_FLAG ...] - Sample selection flag (default: []) - --sel-attr SEL_ATTR Attribute for sample exclusion OR inclusion (default: - toggle) - --sel-incl SEL_INCL [SEL_INCL ...] - Include only samples with these values (default: []) - --sel-excl SEL_EXCL Exclude samples with these values (default: ) - -l LIMIT, --limit LIMIT - Limit to n samples (default: None) - -k SKIP, --skip SKIP Skip samples by numerical index (default: None) - --pep-config PEP_CONFIG - PEP configuration file (default: None) - -o OUTPUT_DIR, --output-dir OUTPUT_DIR - Output directory (default: None) - --config-file CONFIG_FILE - Project configuration file (default: None) - --looper-config LOOPER_CONFIG - Looper configuration file (YAML) (default: None) - -S SAMPLE_PIPELINE_INTERFACES [SAMPLE_PIPELINE_INTERFACES ...], --sample-pipeline-interfaces SAMPLE_PIPELINE_INTERFACES [SAMPLE_PIPELINE_INTERFACES ...] - Paths to looper sample pipeline interfaces (default: - []) - -P PROJECT_PIPELINE_INTERFACES [PROJECT_PIPELINE_INTERFACES ...], --project-pipeline-interfaces PROJECT_PIPELINE_INTERFACES [PROJECT_PIPELINE_INTERFACES ...] - Paths to looper project pipeline interfaces (default: - []) - --pipestat PIPESTAT Path to pipestat files. (default: None) - --amend AMEND [AMEND ...] - List of amendments to activate (default: []) - --project Is this command executed for project-level? (default: - False) - -help: - -h, --help show this help message and exit -``` - -## `looper rerun --help` -```console -usage: looper rerun [-h] [-i] [-t TIME_DELAY] [-d] [-x COMMAND_EXTRA] - [-y COMMAND_EXTRA_OVERRIDE] [-u LUMP] [-n LUMP_N] - [-j LUMP_J] [--divvy DIVVY] [-f] - [-c COMPUTE [COMPUTE ...]] [--package PACKAGE] - [--settings SETTINGS] [--exc-flag EXC_FLAG [EXC_FLAG ...]] - [--sel-flag SEL_FLAG [SEL_FLAG ...]] [--sel-attr SEL_ATTR] - [--sel-incl SEL_INCL [SEL_INCL ...]] [--sel-excl SEL_EXCL] - [-l LIMIT] [-k SKIP] [--pep-config PEP_CONFIG] - [-o OUTPUT_DIR] [--config-file CONFIG_FILE] - [--looper-config LOOPER_CONFIG] - [-S SAMPLE_PIPELINE_INTERFACES [SAMPLE_PIPELINE_INTERFACES ...]] - [-P PROJECT_PIPELINE_INTERFACES [PROJECT_PIPELINE_INTERFACES ...]] - [--pipestat PIPESTAT] [--amend AMEND [AMEND ...]] - [--project] - -optional arguments: - -i, --ignore-flags Ignore run status flags (default: False) - -t TIME_DELAY, --time-delay TIME_DELAY - Time delay in seconds between job submissions (min: 0, - max: 30) (default: 0) - -d, --dry-run Don't actually submit jobs (default: False) - -x COMMAND_EXTRA, --command-extra COMMAND_EXTRA - String to append to every command (default: ) - -y COMMAND_EXTRA_OVERRIDE, --command-extra-override COMMAND_EXTRA_OVERRIDE - Same as command-extra, but overrides values in PEP - (default: ) - -u LUMP, --lump LUMP Total input file size (GB) to batch into one job - (default: None) - -n LUMP_N, --lump-n LUMP_N - Number of commands to batch into one job (default: - None) - -j LUMP_J, --lump-j LUMP_J - Lump samples into number of jobs. (default: None) - --divvy DIVVY Path to divvy configuration file. Default=$DIVCFG env - variable. Currently: not set (default: None) - -f, --skip-file-checks - Do not perform input file checks (default: False) - -c COMPUTE [COMPUTE ...], --compute COMPUTE [COMPUTE ...] - List of key-value pairs (k1=v1) (default: []) - --package PACKAGE Name of computing resource package to use (default: - None) - --settings SETTINGS Path to a YAML settings file with compute settings - (default: ) - --exc-flag EXC_FLAG [EXC_FLAG ...] - Sample exclusion flag (default: []) - --sel-flag SEL_FLAG [SEL_FLAG ...] - Sample selection flag (default: []) - --sel-attr SEL_ATTR Attribute for sample exclusion OR inclusion (default: - toggle) - --sel-incl SEL_INCL [SEL_INCL ...] - Include only samples with these values (default: []) - --sel-excl SEL_EXCL Exclude samples with these values (default: ) - -l LIMIT, --limit LIMIT - Limit to n samples (default: None) - -k SKIP, --skip SKIP Skip samples by numerical index (default: None) - --pep-config PEP_CONFIG - PEP configuration file (default: None) - -o OUTPUT_DIR, --output-dir OUTPUT_DIR - Output directory (default: None) - --config-file CONFIG_FILE - Project configuration file (default: None) - --looper-config LOOPER_CONFIG - Looper configuration file (YAML) (default: None) - -S SAMPLE_PIPELINE_INTERFACES [SAMPLE_PIPELINE_INTERFACES ...], --sample-pipeline-interfaces SAMPLE_PIPELINE_INTERFACES [SAMPLE_PIPELINE_INTERFACES ...] - Paths to looper sample pipeline interfaces (default: - []) - -P PROJECT_PIPELINE_INTERFACES [PROJECT_PIPELINE_INTERFACES ...], --project-pipeline-interfaces PROJECT_PIPELINE_INTERFACES [PROJECT_PIPELINE_INTERFACES ...] - Paths to looper project pipeline interfaces (default: - []) - --pipestat PIPESTAT Path to pipestat files. (default: None) - --amend AMEND [AMEND ...] - List of amendments to activate (default: []) - --project Is this command executed for project-level? (default: - False) - -help: - -h, --help show this help message and exit -``` - -## `looper report --help` -```console -usage: looper report [-h] [--portable] [--settings SETTINGS] - [--exc-flag EXC_FLAG [EXC_FLAG ...]] - [--sel-flag SEL_FLAG [SEL_FLAG ...]] - [--sel-attr SEL_ATTR] - [--sel-incl SEL_INCL [SEL_INCL ...]] - [--sel-excl SEL_EXCL] [-l LIMIT] [-k SKIP] - [--pep-config PEP_CONFIG] [-o OUTPUT_DIR] - [--config-file CONFIG_FILE] - [--looper-config LOOPER_CONFIG] - [-S SAMPLE_PIPELINE_INTERFACES [SAMPLE_PIPELINE_INTERFACES ...]] - [-P PROJECT_PIPELINE_INTERFACES [PROJECT_PIPELINE_INTERFACES ...]] - [--pipestat PIPESTAT] [--amend AMEND [AMEND ...]] - [--project] - -optional arguments: - --portable Makes html report portable. (default: False) - --settings SETTINGS Path to a YAML settings file with compute settings - (default: ) - --exc-flag EXC_FLAG [EXC_FLAG ...] - Sample exclusion flag (default: []) - --sel-flag SEL_FLAG [SEL_FLAG ...] - Sample selection flag (default: []) - --sel-attr SEL_ATTR Attribute for sample exclusion OR inclusion (default: - toggle) - --sel-incl SEL_INCL [SEL_INCL ...] - Include only samples with these values (default: []) - --sel-excl SEL_EXCL Exclude samples with these values (default: ) - -l LIMIT, --limit LIMIT - Limit to n samples (default: None) - -k SKIP, --skip SKIP Skip samples by numerical index (default: None) - --pep-config PEP_CONFIG - PEP configuration file (default: None) - -o OUTPUT_DIR, --output-dir OUTPUT_DIR - Output directory (default: None) - --config-file CONFIG_FILE - Project configuration file (default: None) - --looper-config LOOPER_CONFIG - Looper configuration file (YAML) (default: None) - -S SAMPLE_PIPELINE_INTERFACES [SAMPLE_PIPELINE_INTERFACES ...], --sample-pipeline-interfaces SAMPLE_PIPELINE_INTERFACES [SAMPLE_PIPELINE_INTERFACES ...] - Paths to looper sample pipeline interfaces (default: - []) - -P PROJECT_PIPELINE_INTERFACES [PROJECT_PIPELINE_INTERFACES ...], --project-pipeline-interfaces PROJECT_PIPELINE_INTERFACES [PROJECT_PIPELINE_INTERFACES ...] - Paths to looper project pipeline interfaces (default: - []) - --pipestat PIPESTAT Path to pipestat files. (default: None) - --amend AMEND [AMEND ...] - List of amendments to activate (default: []) - --project Is this command executed for project-level? (default: - False) - -help: - -h, --help show this help message and exit -``` - -## `looper table --help` -```console -usage: looper table [-h] [--settings SETTINGS] - [--exc-flag EXC_FLAG [EXC_FLAG ...]] - [--sel-flag SEL_FLAG [SEL_FLAG ...]] [--sel-attr SEL_ATTR] - [--sel-incl SEL_INCL [SEL_INCL ...]] [--sel-excl SEL_EXCL] - [-l LIMIT] [-k SKIP] [--pep-config PEP_CONFIG] - [-o OUTPUT_DIR] [--config-file CONFIG_FILE] - [--looper-config LOOPER_CONFIG] - [-S SAMPLE_PIPELINE_INTERFACES [SAMPLE_PIPELINE_INTERFACES ...]] - [-P PROJECT_PIPELINE_INTERFACES [PROJECT_PIPELINE_INTERFACES ...]] - [--pipestat PIPESTAT] [--amend AMEND [AMEND ...]] - [--project] - -optional arguments: - --settings SETTINGS Path to a YAML settings file with compute settings - (default: ) - --exc-flag EXC_FLAG [EXC_FLAG ...] - Sample exclusion flag (default: []) - --sel-flag SEL_FLAG [SEL_FLAG ...] - Sample selection flag (default: []) - --sel-attr SEL_ATTR Attribute for sample exclusion OR inclusion (default: - toggle) - --sel-incl SEL_INCL [SEL_INCL ...] - Include only samples with these values (default: []) - --sel-excl SEL_EXCL Exclude samples with these values (default: ) - -l LIMIT, --limit LIMIT - Limit to n samples (default: None) - -k SKIP, --skip SKIP Skip samples by numerical index (default: None) - --pep-config PEP_CONFIG - PEP configuration file (default: None) - -o OUTPUT_DIR, --output-dir OUTPUT_DIR - Output directory (default: None) - --config-file CONFIG_FILE - Project configuration file (default: None) - --looper-config LOOPER_CONFIG - Looper configuration file (YAML) (default: None) - -S SAMPLE_PIPELINE_INTERFACES [SAMPLE_PIPELINE_INTERFACES ...], --sample-pipeline-interfaces SAMPLE_PIPELINE_INTERFACES [SAMPLE_PIPELINE_INTERFACES ...] - Paths to looper sample pipeline interfaces (default: - []) - -P PROJECT_PIPELINE_INTERFACES [PROJECT_PIPELINE_INTERFACES ...], --project-pipeline-interfaces PROJECT_PIPELINE_INTERFACES [PROJECT_PIPELINE_INTERFACES ...] - Paths to looper project pipeline interfaces (default: - []) - --pipestat PIPESTAT Path to pipestat files. (default: None) - --amend AMEND [AMEND ...] - List of amendments to activate (default: []) - --project Is this command executed for project-level? (default: - False) - -help: - -h, --help show this help message and exit -``` - -## `looper inspect --help` -```console -usage: looper inspect [-h] [--settings SETTINGS] - [--exc-flag EXC_FLAG [EXC_FLAG ...]] - [--sel-flag SEL_FLAG [SEL_FLAG ...]] - [--sel-attr SEL_ATTR] - [--sel-incl SEL_INCL [SEL_INCL ...]] - [--sel-excl SEL_EXCL] [-l LIMIT] [-k SKIP] - [--pep-config PEP_CONFIG] [-o OUTPUT_DIR] - [--config-file CONFIG_FILE] - [--looper-config LOOPER_CONFIG] - [-S SAMPLE_PIPELINE_INTERFACES [SAMPLE_PIPELINE_INTERFACES ...]] - [-P PROJECT_PIPELINE_INTERFACES [PROJECT_PIPELINE_INTERFACES ...]] - [--pipestat PIPESTAT] [--amend AMEND [AMEND ...]] - [--project] - -optional arguments: - --settings SETTINGS Path to a YAML settings file with compute settings - (default: ) - --exc-flag EXC_FLAG [EXC_FLAG ...] - Sample exclusion flag (default: []) - --sel-flag SEL_FLAG [SEL_FLAG ...] - Sample selection flag (default: []) - --sel-attr SEL_ATTR Attribute for sample exclusion OR inclusion (default: - toggle) - --sel-incl SEL_INCL [SEL_INCL ...] - Include only samples with these values (default: []) - --sel-excl SEL_EXCL Exclude samples with these values (default: ) - -l LIMIT, --limit LIMIT - Limit to n samples (default: None) - -k SKIP, --skip SKIP Skip samples by numerical index (default: None) - --pep-config PEP_CONFIG - PEP configuration file (default: None) - -o OUTPUT_DIR, --output-dir OUTPUT_DIR - Output directory (default: None) - --config-file CONFIG_FILE - Project configuration file (default: None) - --looper-config LOOPER_CONFIG - Looper configuration file (YAML) (default: None) - -S SAMPLE_PIPELINE_INTERFACES [SAMPLE_PIPELINE_INTERFACES ...], --sample-pipeline-interfaces SAMPLE_PIPELINE_INTERFACES [SAMPLE_PIPELINE_INTERFACES ...] - Paths to looper sample pipeline interfaces (default: - []) - -P PROJECT_PIPELINE_INTERFACES [PROJECT_PIPELINE_INTERFACES ...], --project-pipeline-interfaces PROJECT_PIPELINE_INTERFACES [PROJECT_PIPELINE_INTERFACES ...] - Paths to looper project pipeline interfaces (default: - []) - --pipestat PIPESTAT Path to pipestat files. (default: None) - --amend AMEND [AMEND ...] - List of amendments to activate (default: []) - --project Is this command executed for project-level? (default: - False) - -help: - -h, --help show this help message and exit -``` - -## `looper init --help` -```console -usage: looper init [-h] [-f] [-o OUTPUT_DIR] [--pep-config PEP_CONFIG] - [-S SAMPLE_PIPELINE_INTERFACES [SAMPLE_PIPELINE_INTERFACES ...]] - [-P PROJECT_PIPELINE_INTERFACES [PROJECT_PIPELINE_INTERFACES ...]] - -optional arguments: - -f, --force-yes Provide upfront confirmation of destruction intent, to - skip console query. Default=False (default: False) - -o OUTPUT_DIR, --output-dir OUTPUT_DIR - Output directory (default: None) - --pep-config PEP_CONFIG - PEP configuration file (default: None) - -S SAMPLE_PIPELINE_INTERFACES [SAMPLE_PIPELINE_INTERFACES ...], --sample-pipeline-interfaces SAMPLE_PIPELINE_INTERFACES [SAMPLE_PIPELINE_INTERFACES ...] - Paths to looper sample pipeline interfaces (default: - []) - -P PROJECT_PIPELINE_INTERFACES [PROJECT_PIPELINE_INTERFACES ...], --project-pipeline-interfaces PROJECT_PIPELINE_INTERFACES [PROJECT_PIPELINE_INTERFACES ...] - Paths to looper project pipeline interfaces (default: - []) - -help: - -h, --help show this help message and exit -``` - -## `looper destroy --help` -```console -usage: looper destroy [-h] [-d] [-f] [--settings SETTINGS] - [--exc-flag EXC_FLAG [EXC_FLAG ...]] - [--sel-flag SEL_FLAG [SEL_FLAG ...]] - [--sel-attr SEL_ATTR] - [--sel-incl SEL_INCL [SEL_INCL ...]] - [--sel-excl SEL_EXCL] [-l LIMIT] [-k SKIP] - [--pep-config PEP_CONFIG] [-o OUTPUT_DIR] - [--config-file CONFIG_FILE] - [--looper-config LOOPER_CONFIG] - [-S SAMPLE_PIPELINE_INTERFACES [SAMPLE_PIPELINE_INTERFACES ...]] - [-P PROJECT_PIPELINE_INTERFACES [PROJECT_PIPELINE_INTERFACES ...]] - [--pipestat PIPESTAT] [--amend AMEND [AMEND ...]] - [--project] - -optional arguments: - -d, --dry-run Don't actually submit jobs (default: False) - -f, --force-yes Provide upfront confirmation of destruction intent, to - skip console query. Default=False (default: False) - --settings SETTINGS Path to a YAML settings file with compute settings - (default: ) - --exc-flag EXC_FLAG [EXC_FLAG ...] - Sample exclusion flag (default: []) - --sel-flag SEL_FLAG [SEL_FLAG ...] - Sample selection flag (default: []) - --sel-attr SEL_ATTR Attribute for sample exclusion OR inclusion (default: - toggle) - --sel-incl SEL_INCL [SEL_INCL ...] - Include only samples with these values (default: []) - --sel-excl SEL_EXCL Exclude samples with these values (default: ) - -l LIMIT, --limit LIMIT - Limit to n samples (default: None) - -k SKIP, --skip SKIP Skip samples by numerical index (default: None) - --pep-config PEP_CONFIG - PEP configuration file (default: None) - -o OUTPUT_DIR, --output-dir OUTPUT_DIR - Output directory (default: None) - --config-file CONFIG_FILE - Project configuration file (default: None) - --looper-config LOOPER_CONFIG - Looper configuration file (YAML) (default: None) - -S SAMPLE_PIPELINE_INTERFACES [SAMPLE_PIPELINE_INTERFACES ...], --sample-pipeline-interfaces SAMPLE_PIPELINE_INTERFACES [SAMPLE_PIPELINE_INTERFACES ...] - Paths to looper sample pipeline interfaces (default: - []) - -P PROJECT_PIPELINE_INTERFACES [PROJECT_PIPELINE_INTERFACES ...], --project-pipeline-interfaces PROJECT_PIPELINE_INTERFACES [PROJECT_PIPELINE_INTERFACES ...] - Paths to looper project pipeline interfaces (default: - []) - --pipestat PIPESTAT Path to pipestat files. (default: None) - --amend AMEND [AMEND ...] - List of amendments to activate (default: []) - --project Is this command executed for project-level? (default: - False) - -help: - -h, --help show this help message and exit -``` - -## `looper check --help` -```console -usage: looper check [-h] [--describe-codes] [--itemized] - [-f FLAGS [FLAGS ...]] [--settings SETTINGS] - [--exc-flag EXC_FLAG [EXC_FLAG ...]] - [--sel-flag SEL_FLAG [SEL_FLAG ...]] [--sel-attr SEL_ATTR] - [--sel-incl SEL_INCL [SEL_INCL ...]] [--sel-excl SEL_EXCL] - [-l LIMIT] [-k SKIP] [--pep-config PEP_CONFIG] - [-o OUTPUT_DIR] [--config-file CONFIG_FILE] - [--looper-config LOOPER_CONFIG] - [-S SAMPLE_PIPELINE_INTERFACES [SAMPLE_PIPELINE_INTERFACES ...]] - [-P PROJECT_PIPELINE_INTERFACES [PROJECT_PIPELINE_INTERFACES ...]] - [--pipestat PIPESTAT] [--amend AMEND [AMEND ...]] - [--project] - -optional arguments: - --describe-codes Show status codes description. Default=False (default: - False) - --itemized Show detailed overview of sample statuses. - Default=False (default: False) - -f FLAGS [FLAGS ...], --flags FLAGS [FLAGS ...] - Only check samples based on these status flags. - (default: []) - --settings SETTINGS Path to a YAML settings file with compute settings - (default: ) - --exc-flag EXC_FLAG [EXC_FLAG ...] - Sample exclusion flag (default: []) - --sel-flag SEL_FLAG [SEL_FLAG ...] - Sample selection flag (default: []) - --sel-attr SEL_ATTR Attribute for sample exclusion OR inclusion (default: - toggle) - --sel-incl SEL_INCL [SEL_INCL ...] - Include only samples with these values (default: []) - --sel-excl SEL_EXCL Exclude samples with these values (default: ) - -l LIMIT, --limit LIMIT - Limit to n samples (default: None) - -k SKIP, --skip SKIP Skip samples by numerical index (default: None) - --pep-config PEP_CONFIG - PEP configuration file (default: None) - -o OUTPUT_DIR, --output-dir OUTPUT_DIR - Output directory (default: None) - --config-file CONFIG_FILE - Project configuration file (default: None) - --looper-config LOOPER_CONFIG - Looper configuration file (YAML) (default: None) - -S SAMPLE_PIPELINE_INTERFACES [SAMPLE_PIPELINE_INTERFACES ...], --sample-pipeline-interfaces SAMPLE_PIPELINE_INTERFACES [SAMPLE_PIPELINE_INTERFACES ...] - Paths to looper sample pipeline interfaces (default: - []) - -P PROJECT_PIPELINE_INTERFACES [PROJECT_PIPELINE_INTERFACES ...], --project-pipeline-interfaces PROJECT_PIPELINE_INTERFACES [PROJECT_PIPELINE_INTERFACES ...] - Paths to looper project pipeline interfaces (default: - []) - --pipestat PIPESTAT Path to pipestat files. (default: None) - --amend AMEND [AMEND ...] - List of amendments to activate (default: []) - --project Is this command executed for project-level? (default: - False) - -help: - -h, --help show this help message and exit -``` - -## `looper clean --help` -```console -usage: looper clean [-h] [-d] [-f] [--settings SETTINGS] - [--exc-flag EXC_FLAG [EXC_FLAG ...]] - [--sel-flag SEL_FLAG [SEL_FLAG ...]] [--sel-attr SEL_ATTR] - [--sel-incl SEL_INCL [SEL_INCL ...]] [--sel-excl SEL_EXCL] - [-l LIMIT] [-k SKIP] [--pep-config PEP_CONFIG] - [-o OUTPUT_DIR] [--config-file CONFIG_FILE] - [--looper-config LOOPER_CONFIG] - [-S SAMPLE_PIPELINE_INTERFACES [SAMPLE_PIPELINE_INTERFACES ...]] - [-P PROJECT_PIPELINE_INTERFACES [PROJECT_PIPELINE_INTERFACES ...]] - [--pipestat PIPESTAT] [--amend AMEND [AMEND ...]] - [--project] - -optional arguments: - -d, --dry-run Don't actually submit jobs (default: False) - -f, --force-yes Provide upfront confirmation of destruction intent, to - skip console query. Default=False (default: False) - --settings SETTINGS Path to a YAML settings file with compute settings - (default: ) - --exc-flag EXC_FLAG [EXC_FLAG ...] - Sample exclusion flag (default: []) - --sel-flag SEL_FLAG [SEL_FLAG ...] - Sample selection flag (default: []) - --sel-attr SEL_ATTR Attribute for sample exclusion OR inclusion (default: - toggle) - --sel-incl SEL_INCL [SEL_INCL ...] - Include only samples with these values (default: []) - --sel-excl SEL_EXCL Exclude samples with these values (default: ) - -l LIMIT, --limit LIMIT - Limit to n samples (default: None) - -k SKIP, --skip SKIP Skip samples by numerical index (default: None) - --pep-config PEP_CONFIG - PEP configuration file (default: None) - -o OUTPUT_DIR, --output-dir OUTPUT_DIR - Output directory (default: None) - --config-file CONFIG_FILE - Project configuration file (default: None) - --looper-config LOOPER_CONFIG - Looper configuration file (YAML) (default: None) - -S SAMPLE_PIPELINE_INTERFACES [SAMPLE_PIPELINE_INTERFACES ...], --sample-pipeline-interfaces SAMPLE_PIPELINE_INTERFACES [SAMPLE_PIPELINE_INTERFACES ...] - Paths to looper sample pipeline interfaces (default: - []) - -P PROJECT_PIPELINE_INTERFACES [PROJECT_PIPELINE_INTERFACES ...], --project-pipeline-interfaces PROJECT_PIPELINE_INTERFACES [PROJECT_PIPELINE_INTERFACES ...] - Paths to looper project pipeline interfaces (default: - []) - --pipestat PIPESTAT Path to pipestat files. (default: None) - --amend AMEND [AMEND ...] - List of amendments to activate (default: []) - --project Is this command executed for project-level? (default: - False) - -help: - -h, --help show this help message and exit -``` - diff --git a/docs/usage.template b/docs/usage.template deleted file mode 100644 index 59ba47b50..000000000 --- a/docs/usage.template +++ /dev/null @@ -1,26 +0,0 @@ -# Usage reference - -Looper doesn't just run pipelines; it can also check and summarize the progress of your jobs, as well as remove all files created by them. - -Each task is controlled by one of the following commands: `run`, `rerun`, `runp` , `table`,`report`, `destroy`, `check`, `clean`, `inspect`, `init` - -- `looper run`: Runs pipelines for each sample, for each pipeline. This will use your `compute` settings to build and submit scripts to your specified compute environment, or run them sequentially on your local computer. - -- `looper runp`: Runs pipelines for each pipeline for project. - -- `looper rerun`: Exactly the same as `looper run`, but only runs jobs with a failed flag. - -- `looper report`: Summarize your project results in a form of browsable HTML pages. - -- `looper table`: This command parses all key-value results reported in the each sample `stats.tsv` and collates them into a large summary matrix, which it saves in the project output directory. This creates such a matrix for each pipeline type run on the project, and a combined master summary table - -- `looper check`: Checks the run progress of the current project. This will display a summary of job status; which pipelines are currently running on which samples, which have completed, which have failed, etc. - -- `looper destroy`: Deletes all output results for this project. - -- `looper inspect`: Display the Project or Sample information - -- `looper init`: Initialize a looper dotfile (`.looper.yaml`) in the current directory - - -Here you can see the command-line usage instructions for the main looper command and for each subcommand: diff --git a/docs/using-geofetch.md b/docs/using-geofetch.md deleted file mode 100644 index 113b1252d..000000000 --- a/docs/using-geofetch.md +++ /dev/null @@ -1,35 +0,0 @@ -# How to set up a new GEO project - -You can use [geofetch](http://geofetch.databio.org) to quickly set up a project to run with looper. - -## Download data - -``` -geofetch -i GSE69993 --just-metadata -m metadata -``` - -## Initialize looper - -Make it easier to run looper without specifying the config - -``` -looper init metadata/*.yaml -``` - -## Convert to fastq - -Now, you can convert the files from sra into fastq format: - -``` -looper run --amend sra_convert -``` - -## Run pipeline - -Add a pipeline interface to link to a project - -(Experimental) - -``` -looper mod "pipeline_interfaces: /path/to/piface.yaml" -``` diff --git a/docs/variable-namespaces.md b/docs/variable-namespaces.md deleted file mode 100644 index b3e2b2a8a..000000000 --- a/docs/variable-namespaces.md +++ /dev/null @@ -1,120 +0,0 @@ -# Looper variable namespaces - -## Populating the templates - -Looper creates job scripts using [concentric templates](concentric-templates.md) consisting of a *command template* and a *submission template*. This layered design allows us to decouple the computing environment from the pipeline, which improves portability. The task of running jobs can be thought of as simply populating the templates with variables. These variables are pooled from several sources: - -1. the command line, where the user provides any on-the-fly variables for a particular run. -2. the PEP, which provides information on the project and samples. -3. the pipeline interface, which provides information on the pipeline to run. -4. the divvy config file, which provides information on the computing environment. - -Variables from these sources are used to populate the templates to construct the commands to run. To keep things organized, looper groups the variables into namespaces. These namespaces are used first to populate the command template, which produces a built command. This command is then treated as a variable in itself, which is pooled with the other variables to populate the submission template. Looper provides 6 variable namespaces for populating the templates: - -## 1. project - -The `project` namespace contains all PEP config attributes. For example, if you have a config file like this: - -```yaml -pep_version: 2.0.0 -my_variable: 123 -``` - -Then `project.my_variable` would have value `123`. You can use the project namespace to refer to any information in the project. You can use `project.looper` to refer to any attributes in the `looper` section of the PEP. - -## 2. sample or samples - -For sample-level pipelines, the `sample` namespace contains all PEP post-processing sample attributes for the given sample. For project-level pipelines, looper constructs a single job for an entire project, so there is no `sample` namespace; instead, there is a `samples` (plural) namespace, which is a list of all the samples in the project. This can be useful if you need to iterate through all the samples in your command template. - -## 3. pipeline - -Everything under `pipeline` in the pipeline interface for this pipeline. This simply provides a convenient way to annotate pipeline-level variables for use in templates. - -## 4. looper - -The `looper` namespace consists of automatic variables created by looper: - -**paths:** - -- `output_dir` -- parent output directory provided in `project.looper.output_dir` in the project configuration file -- `results_subdir` -- the path to the results directory. It is a sub directory of `output_dir` called `project.looper.results_subdir` or "results_pipeline" by default -- `sample_output_folder` -- a sample-specific or project-specific output folder (`results_subdir`/`sample.sample_name`) -- `piface_dir` -- directory the pipeline interface has been read from -- `pep_config` -- path to the project configuration file used for this looper run -- `log_file` -- an automatically created log file path, to be stored in the looper submission subdirectory - -**others:** - -- `total_input_size` -- the sum of file sizes for all files marked as input files in the input schema -- `command` -- the result of populating the command template -- `job_name` -- job name made by concatenating the pipeline identifier and unique sample name - -The `looper.command` value is what enables the two-layer template system, whereby the output of the command template is used as input to the submission template. - -## 5. compute - -The `compute` namespace consists of a group of variables relevant for computing resources. The `compute` namespace has a unique behavior: it aggregates variables from several sources in a priority order, overriding values with more specific ones as priority increases. The list of variable sources in priority order is: - -1. Looper CLI (`--compute` or `--settings` for on-the-fly settings) -2. PEP config, `project.looper.compute` section -3. Pipeline interface, `compute` section -4. Activated divvy compute package (`--package` CLI argument) - -So, the compute namespace is first populated with any variables from the selected divvy compute package. It then updates this with settings given in the `compute` section of the pipeline interface. It then updates from the PEP `project.looper.compute`, and then finally anything passed to `--compute` on the looper CLI. This provides a way to modulate looper behavior at the level of a computing environment, a pipeline, a project, or a run, in that order. - -## 6. pipestat - -The `pipestat` namespace conists of a group of variables that reflect the [pipestat](http://pipestat.databio.org) configuration for a submission. - -1. results_file (`pipestat.file`) -2. record_id (`pipestat.record_identifier`) -3. config (`pipestat.config_path`) - -## Mapping variables to submission templates using divvy adapters - -One remaining issue is how to map variables from the looper variable namespaces onto the variables used in divvy templates. Divvy is decoupled from looper, and its templates are completely customizable, so they do not necessarily understand how to connect to looper variables into divvy templates. The default divvy templates use variables like `{CODE}`, `{JOBNAME}`, and `{LOGFILE}`, among others. A user may customize rename these or add custom variables names in divvy templates. How do we map the looper variables onto these arbitrary divvy template variables? Through divvy adapters. - -These variables are linked to looper namespaces via *divvy adapters*. Here are the default divvy adapters: - -```yaml -adapters: - CODE: looper.command - JOBNAME: looper.job_name - CORES: compute.cores - LOGFILE: looper.log_file - TIME: compute.time - MEM: compute.mem - DOCKER_ARGS: compute.docker_args - DOCKER_IMAGE: compute.docker_image - SINGULARITY_IMAGE: compute.singularity_image - SINGULARITY_ARGS: compute.singularity_args -``` - -The divvy adapters is a section in the divvy configuration file that links the divvy template variable (left side) to any other arbitrary variable names (right side). This example, we've populated the adapters with links to the namespaced input variables provided by looper (right side). You can adjust this section in your configuration file to map any variables into your submission template. - -## Best practices on storing compute variables - -Since compute variables can be stored in several places, it can be confusing to know where you should put things. Here are some guidelines: - -### Partition or queue name - -Because the partition or queue name is relative to your environment, we don't usually specify this in the `resources` section, but rather, in the `pepenv` config. - -### DIVCFG config file - -Variables that describes settings of a **compute environment** should go in the `DIVCFG` file. Any attributes in the activated compute package will be available to populate template variables. For example, the `partition` attribute is specified in many of our default `DIVCFG` files; that attribute is used to populate a template `{PARTITION}` variable. This is what enables pipelines to work in any compute environment, since we have no control over what your partitions are named. You can also use this to change SLURM queues on-the-fly. - -### Pipeline interface - -Variables that are **specific to a pipeline** can be defined in the `pipeline interface` file, `compute` section.As an example of a variable pulled from the `compute` section, we defined in our `pipeline_interface.yaml` a variable pointing to the singularity or docker image that can be used to run the pipeline, like this: - -```yaml -compute: - singularity_image: /absolute/path/to/images/image -``` - -Now, this variable will be available for use in a template as `{SINGULARITY_IMAGE}`. This makes sense to put in the pipeline interface because it is specific to this pipeline. This path should probably be absolute, because a relative path will be interpreted as relative to the working directory where your job is executed (*not* relative to the pipeline interface). This section is also useful for adjusting the amount of resources we need to request from a resource manager like SLURM. For example: `{MEM}`, `{CORES}`, and `{TIME}` are all defined frequently in this section, and they vary for different input file sizes. - -### Project config - -Finally, project-level variables can also be populated from the `compute` section of a project config file. This would enable you to make project-specific compute changes (such as billing a particular project to a particular SLURM resource account). diff --git a/docs/writing-a-pipeline-interface.md b/docs/writing-a-pipeline-interface.md deleted file mode 100644 index 7a9585eb4..000000000 --- a/docs/writing-a-pipeline-interface.md +++ /dev/null @@ -1,34 +0,0 @@ ---- -title: Pipeline interface specification ---- - -# Writing a pipeline interface - -## Introduction - -If you want to use looper to run samples in a PEP through an arbitrary shell command, you will need to write a pipeline interface. Here is a basic walkthrough to write a simple interface file. Once you've been through this, you can consult the formal [pipeline interface format specification](pipeline-interface-specification.md) for further details and reference. - -## Example - -Let's start with a simple example from the [hello_looper repository](https://github.com/pepkit/hello_looper): - -```yaml -pipeline_name: count_lines -pipeline_type: sample -var_templates: - pipeline: {looper.piface_dir}/count_lines.sh -command_template: {pipeline.var_templates.pipeline} {sample.file} -``` - -You can edit this to start your own interface. - -First, think of a unique name for your pipeline and put it in `pipeline_name`. This will be used for messaging and identification. - -Next, choose a `pipeline_type`, which can be either "sample" or "project". Most likely, you're writing a sample pipeline, but you can read more about [sample and project pipelines](pipeline-tiers.md) if you like. - -Next, we need to set the `pipeline` path to our script. This path is relative to the pipeline interface file, so you need to put the pipeline interface somewhere specific relative to the pipeline; perhaps in the same folder or in a parent folder. -Note: previous versions used the `path` variable instead of `var_templates: pipeline:`. However, path functionality will be deprecated in the future. - -Finally, populate the `command_template`. You can use the full power of Jinja2 Python templates here, but most likely you'll just need to use a few variables using curly braces. In this case, we refer to the `count_lines.sh` script with `{pipeline.var_templates.pipeline}`, which points directly to the `pipeline` variable defined above. Then, we use `{sample.file}` to refer to the `file` column in the sample table specified in the PEP. This pipeline thus takes a single positional command-line argument. You can make the command template much more complicated and refer to any sample or project attributes, as well as a bunch of [other variables made available by looper](variable-namespaces.md). - -Now, you have a basic functional pipeline interface. There are many more advanced features you can use to make your pipeline more powerful, such as providing a schema to specify inputs or outputs, making input-size-dependent compute settings, and more. For complete details, consult the formal [pipeline interface format specification](pipeline-interface-specification.md). diff --git a/docs_jupyter/build/.gitignore b/docs_jupyter/build/.gitignore deleted file mode 100644 index d6b7ef32c..000000000 --- a/docs_jupyter/build/.gitignore +++ /dev/null @@ -1,2 +0,0 @@ -* -!.gitignore diff --git a/docs_jupyter/cli_divvy.ipynb b/docs_jupyter/cli_divvy.ipynb deleted file mode 100644 index 5b027bf62..000000000 --- a/docs_jupyter/cli_divvy.ipynb +++ /dev/null @@ -1,390 +0,0 @@ -{ - "cells": [ - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# command-line tutorial\n", - "\n", - "`Divvy` also provides a command-line interface that gives you the same power as the python API. You can use `--help` to get a list of the command-line options:" - ] - }, - { - "cell_type": "code", - "execution_count": 1, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "version: 0.5.0\n", - "usage: divvy [-h] [--version] [--verbosity V] [--silent] [--logdev]\n", - " {write,init,list,submit} ...\n", - "\n", - "divvy - write compute job scripts that can be submitted to any computing\n", - "resource\n", - "\n", - "positional arguments:\n", - " {write,init,list,submit}\n", - " write Write a job script\n", - " init Initialize a new divvy config file\n", - " list List available compute packages\n", - " submit Write and then submit a job script\n", - "\n", - "optional arguments:\n", - " -h, --help show this help message and exit\n", - " --version show program's version number and exit\n", - " --verbosity V Set logging level (1-5 or logging module level name)\n", - " --silent Silence logging. Overrides verbosity.\n", - " --logdev Expand content of logging message format.\n", - "\n", - "https://divvy.databio.org\n" - ] - } - ], - "source": [ - "divvy --help" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# The `list` command\n", - "\n", - "Let's first use `divvy list` to show us our available computing packages:" - ] - }, - { - "cell_type": "code", - "execution_count": 2, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Using default config. No config found in env var: ['DIVCFG', 'PEPENV']\n", - "Using divvy config: /home/nsheff/.local/lib/python2.7/site-packages/divvy/default_config/divvy_config.yaml\n", - "Available compute packages:\n", - "\n", - "default\n", - "slurm\n", - "singularity_slurm\n", - "singularity\n", - "local\n", - "docker\n" - ] - }, - { - "ename": "", - "evalue": "1", - "output_type": "error", - "traceback": [] - } - ], - "source": [ - "divvy list" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# The `write` command\n", - "\n", - "Use `divvy write` to actually write a new script using a template. To do this, you'll need to provide 3 things: a template (which comes from your compute package), a settings file with variables, and an outfile.\n", - "\n", - "\n", - "## The settings file\n", - "\n", - "The settings argument is where you can pass an existing `yaml` file with key-value pairs. Here's a simple example:" - ] - }, - { - "cell_type": "code", - "execution_count": 3, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "time: 4-0-0\n", - "logfile: results.log\n", - "cores: 6\n", - "partition: large_mem\n", - "mem: 16G\n" - ] - } - ], - "source": [ - "cat settings.yaml" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Now let's take a look at the template we are going to use by activating the `slurm` package" - ] - }, - { - "cell_type": "code", - "execution_count": 5, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "#!/bin/bash\n", - "#SBATCH --job-name='{JOBNAME}'\n", - "#SBATCH --output='{LOGFILE}'\n", - "#SBATCH --mem='{MEM}'\n", - "#SBATCH --cpus-per-task='{CORES}'\n", - "#SBATCH --time='{TIME}'\n", - "#SBATCH --partition='{PARTITION}'\n", - "#SBATCH -m block\n", - "#SBATCH --ntasks=1\n", - "\n", - "echo 'Compute node:' `hostname`\n", - "echo 'Start time:' `date +'%Y-%m-%d %T'`\n", - "\n", - "{CODE}\n" - ] - } - ], - "source": [ - "cat ../divvy/default_config/divvy_templates/slurm_template.sub" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "We use `divvy` to populate that template with our list of variables above, like this:" - ] - }, - { - "cell_type": "code", - "execution_count": 6, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Using default config. No config found in env var: ['DIVCFG', 'PEPENV']\n", - "Using divvy config: /home/nsheff/.local/lib/python2.7/site-packages/divvy/default_config/divvy_config.yaml\n", - "Activating compute package 'slurm'\n", - "Loading settings file: settings.yaml\n", - "Writing script to /home/nsheff/code/divvy/docs_jupyter/test.sub\n" - ] - } - ], - "source": [ - "divvy write -p slurm -s settings.yaml -o test.sub" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Now we can take a look at what our sbumission scripts looks like." - ] - }, - { - "cell_type": "code", - "execution_count": 7, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "#!/bin/bash\n", - "#SBATCH --job-name='{JOBNAME}'\n", - "#SBATCH --output='results.log'\n", - "#SBATCH --mem='16G'\n", - "#SBATCH --cpus-per-task='6'\n", - "#SBATCH --time='4-0-0'\n", - "#SBATCH --partition='large_mem'\n", - "#SBATCH -m block\n", - "#SBATCH --ntasks=1\n", - "\n", - "echo 'Compute node:' `hostname`\n", - "echo 'Start time:' `date +'%Y-%m-%d %T'`\n", - "\n", - "{CODE}\n" - ] - } - ], - "source": [ - "cat test.sub" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "We populated several variables, like `{LOGFILE}` and `{TIME}`, from the `settings.yaml` file. However, the `{CODE}` and `{JOBNAME}` variables are still unpopulated, so this submission script is incomplete. To remedy this, we'll use `divvy`'s command-line variable passing: any non-interpreted arguments passed to `divvy` are assumed to be variables to populate the template. These command-line variables are considered highest priority and so will override any values in the more distant locations. For example:" - ] - }, - { - "cell_type": "code", - "execution_count": 8, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Using default config. No config found in env var: ['DIVCFG', 'PEPENV']\n", - "Using divvy config: /home/nsheff/.local/lib/python2.7/site-packages/divvy/default_config/divvy_config.yaml\n", - "Activating compute package 'slurm'\n", - "Loading settings file: settings.yaml\n", - "Writing script to /home/nsheff/code/divvy/docs_jupyter/test.sub\n" - ] - } - ], - "source": [ - "divvy write -p slurm -s settings.yaml -o test.sub -c code=run-this-cmd jobname=12345 time=6-0-0" - ] - }, - { - "cell_type": "code", - "execution_count": 9, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "#!/bin/bash\n", - "#SBATCH --job-name='12345'\n", - "#SBATCH --output='results.log'\n", - "#SBATCH --mem='16G'\n", - "#SBATCH --cpus-per-task='6'\n", - "#SBATCH --time='6-0-0'\n", - "#SBATCH --partition='large_mem'\n", - "#SBATCH -m block\n", - "#SBATCH --ntasks=1\n", - "\n", - "echo 'Compute node:' `hostname`\n", - "echo 'Start time:' `date +'%Y-%m-%d %T'`\n", - "\n", - "run-this-cmd\n" - ] - } - ], - "source": [ - "cat test.sub" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Now we have a complete script, which we can run with `sbatch test.sub`. Notice also that the `time` variable uses the one provided on the CLI rather than the one provided in the `settings.yaml` file, because the CLI has a higher priority.\n", - "\n", - "Variables can come from these 3 sources, in order of increasing priority: 1) compute package (defined in the `divvy` configuration file and selected with the `-p` or `--package` argument); 2) `settings.yaml` file, passed with `-s` or `--settings`; 3) any additional variables passed on the command line as key-value pairs to `-c`." - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# Submitting jobs\n", - "\n", - "Let's try actually submitting these jobs with `divvy submit`:" - ] - }, - { - "cell_type": "code", - "execution_count": 10, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Using default config. No config found in env var: ['DIVCFG', 'PEPENV']\n", - "Using divvy config: /home/nsheff/.local/lib/python2.7/site-packages/divvy/default_config/divvy_config.yaml\n", - "Activating compute package 'slurm'\n", - "Loading settings file: settings.yaml\n", - "Writing script to /home/nsheff/code/divvy/docs_jupyter/test.sub\n", - "sbatch test.sub\n", - "sh: 1: sbatch: not found\n" - ] - } - ], - "source": [ - "divvy submit -p slurm -s settings.yaml -o test.sub -c code=run-this-cmd jobname=12345 time=6-0-0" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "The *slurm* package uses `sbatch` as its `submission_command`, but since I'm running this locally, it won't run as I have no `sbatch` command available. Let's try `local` instead:" - ] - }, - { - "cell_type": "code", - "execution_count": 11, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Using default config. No config found in env var: ['DIVCFG', 'PEPENV']\n", - "Using divvy config: /home/nsheff/.local/lib/python2.7/site-packages/divvy/default_config/divvy_config.yaml\n", - "Activating compute package 'local'\n", - "Loading settings file: settings.yaml\n", - "Writing script to /home/nsheff/code/divvy/docs_jupyter/test.sub\n", - "sh test.sub\n", - "Compute node: zither\n", - "Start time: 2020-05-19 07:46:03\n", - "build\n", - "cli.ipynb\n", - "debug.ipynb\n", - "results.log\n", - "settings.yaml\n", - "test_local.sub\n", - "test_script.sub\n", - "test.sub\n", - "tutorial.ipynb\n" - ] - } - ], - "source": [ - "divvy submit -p local -s settings.yaml -o test.sub -c code=ls" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "There I switched the command to `ls`, which shows you a result of everything on this computer." - ] - } - ], - "metadata": { - "kernelspec": { - "display_name": "Bash", - "language": "bash", - "name": "bash" - }, - "language_info": { - "codemirror_mode": "shell", - "file_extension": ".sh", - "mimetype": "text/x-sh", - "name": "bash" - } - }, - "nbformat": 4, - "nbformat_minor": 2 -} diff --git a/docs_jupyter/debug_divvy.ipynb b/docs_jupyter/debug_divvy.ipynb deleted file mode 100644 index 050581e69..000000000 --- a/docs_jupyter/debug_divvy.ipynb +++ /dev/null @@ -1,56 +0,0 @@ -{ - "cells": [ - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "If you want to explore `divvy` with more output, you can turn on debug mode mode like this:" - ] - }, - { - "cell_type": "code", - "execution_count": 3, - "metadata": { - "collapsed": false - }, - "outputs": [ - { - "data": { - "text/plain": [ - "" - ] - }, - "execution_count": 3, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "import divvy\n", - "\n", - "divvy.setup_divvy_logger(\"DEBUG\", devmode=True)" - ] - } - ], - "metadata": { - "kernelspec": { - "display_name": "Python 2", - "language": "python", - "name": "python2" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 2 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython2", - "version": "2.7.12" - } - }, - "nbformat": 4, - "nbformat_minor": 2 -} diff --git a/docs_jupyter/hello-world.ipynb b/docs_jupyter/hello-world.ipynb deleted file mode 100644 index e6119f62e..000000000 --- a/docs_jupyter/hello-world.ipynb +++ /dev/null @@ -1,524 +0,0 @@ -{ - "cells": [ - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# Hello World! example for looper\n", - "\n", - "This tutorial demonstrates how to install `looper` and use it to run a pipeline on a PEP project. \n", - "\n", - "## 1. Install the latest version of looper:\n", - "\n", - "```console\n", - "pip install --user --upgrade looper\n", - "```\n", - "\n", - "## 2. Download and unzip the hello_looper repository\n", - "\n", - "The [hello looper repository](http://github.com/pepkit/hello_looper) contains a basic functional example project (in `/project`) and a looper-compatible pipeline (in `/pipeline`) that can run on that project. Let's download and unzip it:\n" - ] - }, - { - "cell_type": "code", - "execution_count": 13, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "--2023-11-08 17:27:01-- https://github.com/pepkit/hello_looper/archive/refs/heads/master.zip\n", - "Resolving github.com (github.com)... 140.82.114.3\n", - "Connecting to github.com (github.com)|140.82.114.3|:443... connected.\n", - "HTTP request sent, awaiting response... 302 Found\n", - "Location: https://codeload.github.com/pepkit/hello_looper/zip/refs/heads/master [following]\n", - "--2023-11-08 17:27:01-- https://codeload.github.com/pepkit/hello_looper/zip/refs/heads/master\n", - "Resolving codeload.github.com (codeload.github.com)... 140.82.113.10\n", - "Connecting to codeload.github.com (codeload.github.com)|140.82.113.10|:443... connected.\n", - "HTTP request sent, awaiting response... 200 OK\n", - "Length: unspecified [application/zip]\n", - "Saving to: ‘master.zip’\n", - "\n", - "master.zip [ <=> ] 13.37K --.-KB/s in 0.03s \n", - "\n", - "2023-11-08 17:27:01 (472 KB/s) - ‘master.zip’ saved [13693]\n", - "\n" - ] - } - ], - "source": [ - "!wget https://github.com/pepkit/hello_looper/archive/refs/heads/master.zip" - ] - }, - { - "cell_type": "code", - "execution_count": 14, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Archive: master.zip\r\n", - "73ef08e38d3e17fd3d4f940282c80e3ee4dbb91f\r\n", - " creating: hello_looper-master/\r\n", - " inflating: hello_looper-master/.gitignore \r\n", - " inflating: hello_looper-master/.looper.yaml \r\n", - " inflating: hello_looper-master/.looper_pephub.yaml \r\n", - " inflating: hello_looper-master/.looper_pipestat.yaml \r\n", - " inflating: hello_looper-master/.looper_pipestat_shell.yaml \r\n", - " inflating: hello_looper-master/README.md \r\n", - " creating: hello_looper-master/data/\r\n", - " inflating: hello_looper-master/data/frog1_data.txt \r\n", - " inflating: hello_looper-master/data/frog2_data.txt \r\n", - " inflating: hello_looper-master/looper_pipelines.md \r\n", - " creating: hello_looper-master/old_specification/\r\n", - " inflating: hello_looper-master/old_specification/README.md \r\n", - " creating: hello_looper-master/old_specification/data/\r\n", - " inflating: hello_looper-master/old_specification/data/frog1_data.txt \r\n", - " inflating: hello_looper-master/old_specification/data/frog2_data.txt \r\n", - " creating: hello_looper-master/old_specification/pipeline/\r\n", - " inflating: hello_looper-master/old_specification/pipeline/count_lines.sh \r\n", - " inflating: hello_looper-master/old_specification/pipeline/pipeline_interface.yaml \r\n", - " creating: hello_looper-master/old_specification/project/\r\n", - " inflating: hello_looper-master/old_specification/project/project_config.yaml \r\n", - " inflating: hello_looper-master/old_specification/project/sample_annotation.csv \r\n", - " creating: hello_looper-master/pipeline/\r\n", - " inflating: hello_looper-master/pipeline/count_lines.sh \r\n", - " inflating: hello_looper-master/pipeline/pipeline_interface.yaml \r\n", - " inflating: hello_looper-master/pipeline/pipeline_interface_project.yaml \r\n", - " creating: hello_looper-master/pipeline_pipestat/\r\n", - " inflating: hello_looper-master/pipeline_pipestat/count_lines.py \r\n", - " inflating: hello_looper-master/pipeline_pipestat/count_lines_pipestat.sh \r\n", - " inflating: hello_looper-master/pipeline_pipestat/pipeline_interface.yaml \r\n", - " inflating: hello_looper-master/pipeline_pipestat/pipeline_interface_shell.yaml \r\n", - " inflating: hello_looper-master/pipeline_pipestat/pipestat_output_schema.yaml \r\n", - " creating: hello_looper-master/project/\r\n", - " inflating: hello_looper-master/project/project_config.yaml \r\n", - " inflating: hello_looper-master/project/sample_annotation.csv \r\n" - ] - } - ], - "source": [ - "!unzip master.zip" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## 3. Run it\n", - "\n", - "Run it by changing to the directory and then invoking `looper run` on the project configuration file." - ] - }, - { - "cell_type": "code", - "execution_count": 18, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Looper version: 1.5.2-dev\r\n", - "Command: run\r\n", - "Using default divvy config. You may specify in env var: ['DIVCFG']\r\n", - "Pipestat compatible: False\r\n", - "\u001b[36m## [1 of 2] sample: frog_1; pipeline: count_lines\u001b[0m\r\n", - "/home/drc/GITHUB/looper/master/looper/docs_jupyter/hello_looper-master/pipeline/count_lines.sh data/frog1_data.txt\r\n", - "Writing script to /home/drc/GITHUB/looper/master/looper/docs_jupyter/hello_looper-master/results/submission/count_lines_frog_1.sub\r\n", - "Job script (n=1; 0.00Gb): /home/drc/GITHUB/looper/master/looper/docs_jupyter/hello_looper-master/results/submission/count_lines_frog_1.sub\r\n", - "Compute node: databio\r\n", - "Start time: 2023-11-08 17:29:45\r\n", - "wc: data/frog1_data.txt: No such file or directory\r\n", - "Number of lines: \r\n", - "\u001b[36m## [2 of 2] sample: frog_2; pipeline: count_lines\u001b[0m\r\n", - "/home/drc/GITHUB/looper/master/looper/docs_jupyter/hello_looper-master/pipeline/count_lines.sh data/frog2_data.txt\r\n", - "Writing script to /home/drc/GITHUB/looper/master/looper/docs_jupyter/hello_looper-master/results/submission/count_lines_frog_2.sub\r\n", - "Job script (n=1; 0.00Gb): /home/drc/GITHUB/looper/master/looper/docs_jupyter/hello_looper-master/results/submission/count_lines_frog_2.sub\r\n", - "Compute node: databio\r\n", - "Start time: 2023-11-08 17:29:45\r\n", - "wc: data/frog2_data.txt: No such file or directory\r\n", - "Number of lines: \r\n", - "\r\n", - "Looper finished\r\n", - "Samples valid for job generation: 2 of 2\r\n", - "Commands submitted: 2 of 2\r\n", - "Jobs submitted: 2\r\n", - "{'Pipestat compatible': False, 'Commands submitted': '2 of 2', 'Jobs submitted': 2}\r\n", - "\u001b[0m" - ] - } - ], - "source": [ - "!looper run --looper-config hello_looper-master/.looper.yaml" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Voila! You've run your very first pipeline across multiple samples using `looper`!" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# Exploring the results\n", - "\n", - "Now, let's inspect the `hello_looper` repository you downloaded. It has 3 components, each in a subfolder:" - ] - }, - { - "cell_type": "code", - "execution_count": 5, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "hello_looper-master/data/\r\n", - "├── frog1_data.txt\r\n", - "└── frog2_data.txt\r\n", - "hello_looper-master/pipeline/\r\n", - "├── count_lines.sh\r\n", - "└── pipeline_interface.yaml\r\n", - "hello_looper-master/project/\r\n", - "├── project_config.yaml\r\n", - "└── sample_annotation.csv\r\n", - "\r\n", - "0 directories, 6 files\r\n" - ] - } - ], - "source": [ - "!tree hello_looper-master/*/" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "These are:\n", - "\n", - " * `/data` -- contains 2 data files for 2 samples. These input files were each passed to the pipeline.\n", - " * `/pipeline` -- contains the script we want to run on each sample in our project. Our pipeline is a very simple shell script named `count_lines.sh`, which (duh!) counts the number of lines in an input file.\n", - " * `/project` -- contains 2 files that describe metadata for the project (`project_config.yaml`) and the samples (`sample_annotation.csv`). This particular project describes just two samples listed in the annotation file. These files together make up a [PEP](http://pep.databio.org)-formatted project, and can therefore be read by any PEP-compatible tool, including `looper`.\n", - "\n" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "\n", - "When we invoke `looper` from the command line we told it to `run project/project_config.yaml`. `looper` reads the [project/project_config.yaml](https://github.com/pepkit/hello_looper/blob/master/project/project_config.yaml) file, which points to a few things:\n", - "\n", - " * the [project/sample_annotation.csv](https://github.com/pepkit/hello_looper/blob/master/project/sample_annotation.csv) file, which specifies a few samples, their type, and path to data file\n", - " * the `output_dir`, which is where looper results are saved. Results will be saved in `$HOME/hello_looper_results`.\n", - " * the `pipeline_interface.yaml` file, ([pipeline/pipeline_interface.yaml](https://github.com/pepkit/hello_looper/blob/master/pipeline/pipeline_interface.yaml)), which tells looper how to connect to the pipeline ([pipeline/count_lines.sh](https://github.com/pepkit/hello_looper/blob/master/pipeline/)).\n", - "\n", - "The 3 folders (`data`, `project`, and `pipeline`) are modular; there is no need for these to live in any predetermined folder structure. For this example, the data and pipeline are included locally, but in practice, they are usually in a separate folder; you can point to anything (so data, pipelines, and projects may reside in distinct spaces on disk). You may also include more than one pipeline interface in your `project_config.yaml`, so in a looper project, many-to-many relationships are possible." - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Looper config\n", - "\n", - "The [looper config](looper-config.md) contains paths to the project config, the output_dir as well as any dfine pipeline interfaces. " - ] - }, - { - "cell_type": "code", - "execution_count": 23, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "pep_config: project/project_config.yaml # local path to pep config\r\n", - "# pep_config: pepkit/hello_looper:default # you can also use a pephub registry path\r\n", - "output_dir: \"results\"\r\n", - "pipeline_interfaces:\r\n", - " sample: pipeline/pipeline_interface.yaml\r\n" - ] - } - ], - "source": [ - "!cat hello_looper-master/.looper.yaml" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "\n", - "\n", - "## Project Config\n", - "\n", - "The project config file contains the PEP version and sample annotation sheet. (see [defining a project](defining-a-project.md)).\n" - ] - }, - { - "cell_type": "code", - "execution_count": 19, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "pep_version: 2.0.0\r\n", - "sample_table: sample_annotation.csv\r\n" - ] - } - ], - "source": [ - "!cat hello_looper-master/project/project_config.yaml" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Pipeline Interface\n", - "\n", - "The [pipeline interface](pipeline-interface-specification.md) shows the pipeline_name, pipeline_type, as well as the var_templates and command_templates used for this pipeline.\n" - ] - }, - { - "cell_type": "code", - "execution_count": 21, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "pipeline_name: count_lines\r\n", - "pipeline_type: sample\r\n", - "var_templates:\r\n", - " pipeline: '{looper.piface_dir}/count_lines.sh'\r\n", - "command_template: >\r\n", - " {pipeline.var_templates.pipeline} {sample.file}\r\n" - ] - } - ], - "source": [ - "!cat hello_looper-master/pipeline/pipeline_interface.yaml" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Alright, next let's explore what this pipeline stuck into our `output_dir`:\n" - ] - }, - { - "cell_type": "code", - "execution_count": 7, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "/home/nsheff/hello_looper_results\r\n", - "├── results_pipeline\r\n", - "└── submission\r\n", - " ├── count_lines.sh_frog_1.log\r\n", - " ├── count_lines.sh_frog_1.sub\r\n", - " ├── count_lines.sh_frog_2.log\r\n", - " ├── count_lines.sh_frog_2.sub\r\n", - " ├── frog_1.yaml\r\n", - " └── frog_2.yaml\r\n", - "\r\n", - "2 directories, 6 files\r\n" - ] - } - ], - "source": [ - "!tree $HOME/hello_looper_results" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "\n", - "Inside of an `output_dir` there will be two directories:\n", - "\n", - "- `results_pipeline` - a directory with output of the pipeline(s), for each sample/pipeline combination (often one per sample)\n", - "- `submissions` - which holds a YAML representation of each sample and a log file for each submitted job\n", - "\n", - "From here to running hundreds of samples of various sample types is virtually the same effort!\n" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Running PEPs from PEPHub\n", - "\n", - "Looper also supports running a PEP from [PEPHub](https://pephub.databio.org/)!" - ] - }, - { - "cell_type": "code", - "execution_count": 1, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "pep_config: pepkit/hello_looper:default # pephub registry path or local path\r\n", - "output_dir: results\r\n", - "pipeline_interfaces:\r\n", - " sample: pipeline/pipeline_interface.yaml\r\n" - ] - } - ], - "source": [ - "!cat hello_looper-master/.looper_pephub.yaml" - ] - }, - { - "cell_type": "code", - "execution_count": 2, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Looper version: 1.5.2-dev\n", - "Command: run\n", - "Using default divvy config. You may specify in env var: ['DIVCFG']\n", - "No config key in Project, or reading project from dict\n", - "Processing project from dictionary...\n", - "Pipestat compatible: False\n", - "\u001b[36m## [1 of 2] sample: frog_1; pipeline: count_lines\u001b[0m\n", - "/home/drc/GITHUB/looper/master/looper/docs_jupyter/hello_looper-master/pipeline/count_lines.sh data/frog1_data.txt\n", - "Writing script to /home/drc/GITHUB/looper/master/looper/docs_jupyter/hello_looper-master/results/submission/count_lines_frog_1.sub\n", - "Job script (n=1; 0.00Gb): /home/drc/GITHUB/looper/master/looper/docs_jupyter/hello_looper-master/results/submission/count_lines_frog_1.sub\n", - "Compute node: databio\n", - "Start time: 2023-11-09 15:39:28\n", - "wc: data/frog1_data.txt: No such file or directory\n", - "Number of lines: \n", - "\u001b[36m## [2 of 2] sample: frog_2; pipeline: count_lines\u001b[0m\n", - "/home/drc/GITHUB/looper/master/looper/docs_jupyter/hello_looper-master/pipeline/count_lines.sh data/frog2_data.txt\n", - "Writing script to /home/drc/GITHUB/looper/master/looper/docs_jupyter/hello_looper-master/results/submission/count_lines_frog_2.sub\n", - "Job script (n=1; 0.00Gb): /home/drc/GITHUB/looper/master/looper/docs_jupyter/hello_looper-master/results/submission/count_lines_frog_2.sub\n", - "Compute node: databio\n", - "Start time: 2023-11-09 15:39:28\n", - "wc: data/frog2_data.txt: No such file or directory\n", - "Number of lines: \n", - "\n", - "Looper finished\n", - "Samples valid for job generation: 2 of 2\n", - "Commands submitted: 2 of 2\n", - "Jobs submitted: 2\n", - "{'Pipestat compatible': False, 'Commands submitted': '2 of 2', 'Jobs submitted': 2}\n", - "\u001b[0m" - ] - } - ], - "source": [ - "!looper run --looper-config hello_looper-master/.looper_pephub.yaml" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# Pipestat compatible configurations\n", - "\n", - "Looper can also be used in tandem with [pipestat](https://pipestat.databio.org/en/latest/) to report pipeline results." - ] - }, - { - "cell_type": "code", - "execution_count": 3, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "pep_config: ./project/project_config.yaml # pephub registry path or local path\r\n", - "output_dir: ./results\r\n", - "pipeline_interfaces:\r\n", - " sample: ./pipeline_pipestat/pipeline_interface.yaml\r\n", - "pipestat:\r\n", - " results_file_path: results.yaml" - ] - } - ], - "source": [ - "!cat hello_looper-master/.looper_pipestat.yaml" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "\n", - "## A few more basic looper options\n", - "\n", - "Looper also provides a few other simple arguments that let you adjust what it does. You can find a [complete reference of usage](usage.md) in the docs. Here are a few of the more common options:\n", - "\n", - "For `looper run`:\n", - "\n", - "- `-d`: Dry run mode (creates submission scripts, but does not execute them) \n", - "- `--limit`: Only run a few samples \n", - "- `--lumpn`: Run several commands together as a single job. This is useful when you have a quick pipeline to run on many samples and want to group them.\n", - "\n", - "There are also other commands:\n", - "\n", - "- `looper check`: checks on the status (running, failed, completed) of your jobs\n", - "- `looper summarize`: produces an output file that summarizes your project results\n", - "- `looper destroy`: completely erases all results so you can restart\n", - "- `looper rerun`: rerun only jobs that have failed.\n" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## On your own\n", - "\n", - "To use `looper` on your own, you will need to prepare 2 things: a **project** (metadata that define *what* you want to process), and **pipelines** (*how* to process data). To link your project to `looper`, you will need to [define a project](defining-a-project.md). You will want to either use pre-made `looper`-compatible pipelines or link your own custom-built pipelines. These docs will also show you how to connect your pipeline to your project.\n" - ] - } - ], - "metadata": { - "kernelspec": { - "display_name": "Python 3 (ipykernel)", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.10.12" - } - }, - "nbformat": 4, - "nbformat_minor": 2 -} diff --git a/docs_jupyter/tutorial_divvy.ipynb b/docs_jupyter/tutorial_divvy.ipynb deleted file mode 100644 index a9a3c044d..000000000 --- a/docs_jupyter/tutorial_divvy.ipynb +++ /dev/null @@ -1,413 +0,0 @@ -{ - "cells": [ - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# python tutorial\n", - "\n", - "## Compute packages\n", - "\n", - "When you start `divvy`, you may provide a configuration file that specifies one or more *compute packages*. A compute package is just a set of a variables that contains information needed to run a job, such as a job submission template, the command that you use to submit a job (*e.g.* `sbatch` or `qsub`), and any other variables needed to fill the template (*e.g.* `partition` or `account`). You can find out [how to write your own divvy config file](../configuration), but for this tutorial, we'll just use the default.\n", - "\n", - "Start by importing `divvy`, and then create a new `ComputingConfiguration` object. If you provide no arguments, you'll just get a few default packages:" - ] - }, - { - "cell_type": "code", - "execution_count": 1, - "metadata": { - "scrolled": true - }, - "outputs": [], - "source": [ - "import divvy\n", - "\n", - "dcc = divvy.ComputingConfiguration()" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "This loads up the default compute package, and we see that there are a few other packages available. We can explore the compute settings in the loaded (`default`) package like this: " - ] - }, - { - "cell_type": "code", - "execution_count": 2, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "submission_template: /home/nsheff/.local/lib/python3.5/site-packages/divvy/default_config/submit_templates/localhost_template.sub\n", - "submission_command: sh" - ] - }, - "execution_count": 2, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "dcc.compute" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Here you can see that a *compute package* is really a simple thing. In this case, it's just 2 key-value pairs. The `submission_template` key is a path to a template file, with these contents: \n", - "\n" - ] - }, - { - "cell_type": "code", - "execution_count": 3, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "#!/bin/bash\n", - "\n", - "echo 'Compute node:' `hostname`\n", - "echo 'Start time:' `date +'%Y-%m-%d %T'`\n", - "\n", - "{CODE} | tee {LOGFILE}\n", - "\n" - ] - } - ], - "source": [ - "with open(dcc.compute.submission_template) as f:\n", - " print(f.read())" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "We can populate this simple template by passing values for the `{VARIABLE}` text in the template:" - ] - }, - { - "cell_type": "code", - "execution_count": 4, - "metadata": {}, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "Writing script to /home/nsheff/code/divvy/docs_jupyter/test_local.sub\n" - ] - }, - { - "data": { - "text/plain": [ - "'test_local.sub'" - ] - }, - "execution_count": 4, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "dcc.write_script(\n", - " \"test_local.sub\", {\"code\": \"run-this-command\", \"logfile\": \"logfile.txt\"}\n", - ")" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Now let's look at the contents of our populated template:" - ] - }, - { - "cell_type": "code", - "execution_count": 5, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "#!/bin/bash\n", - "\n", - "echo 'Compute node:' `hostname`\n", - "echo 'Start time:' `date +'%Y-%m-%d %T'`\n", - "\n", - "run-this-command | tee logfile.txt\n", - "\n" - ] - } - ], - "source": [ - "with open(\"test_local.sub\") as f:\n", - " print(f.read())" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "This function opens the template specified by the `submission_template` variable in the compute package, and then populates any template variables with values from the compute package. The original `{CODE}` and `{LOGFILE}` has been replaced by the variables we passed to `write_script()`.\n", - "\n", - "The other variable in the compute package is `submission_command`, which contains the shell instruction that would be used to submit this populated template; in this case, it's simply `sh` to run this script in the console. We can activate a different *compute_package* like this: " - ] - }, - { - "cell_type": "code", - "execution_count": 6, - "metadata": {}, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "Activating compute package 'slurm'\n" - ] - }, - { - "data": { - "text/plain": [ - "True" - ] - }, - "execution_count": 6, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "dcc.activate_package(\"slurm\")" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "It returns 'True' to indicate that the activation has been successful. This will change our settings. Let's inspect the new package:" - ] - }, - { - "cell_type": "code", - "execution_count": 7, - "metadata": { - "scrolled": true - }, - "outputs": [ - { - "data": { - "text/plain": [ - "submission_template: /home/nsheff/.local/lib/python3.5/site-packages/divvy/default_config/submit_templates/slurm_template.sub\n", - "submission_command: sbatch" - ] - }, - "execution_count": 7, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "dcc.compute" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Now that we've activated the package of interest, let's take a peek at the now-active `submission_template`:" - ] - }, - { - "cell_type": "code", - "execution_count": 8, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "#!/bin/bash\n", - "#SBATCH --job-name='{JOBNAME}'\n", - "#SBATCH --output='{LOGFILE}'\n", - "#SBATCH --mem='{MEM}'\n", - "#SBATCH --cpus-per-task='{CORES}'\n", - "#SBATCH --time='{TIME}'\n", - "#SBATCH --partition='{PARTITION}'\n", - "#SBATCH -m block\n", - "#SBATCH --ntasks=1\n", - "\n", - "echo 'Compute node:' `hostname`\n", - "echo 'Start time:' `date +'%Y-%m-%d %T'`\n", - "\n", - "{CODE}\n", - "\n" - ] - } - ], - "source": [ - "with open(dcc.compute.submission_template) as f:\n", - " print(f.read())" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "In this template there are a lot more variables to populate. If we don't populate them all, they will just be left in the template. Let's pass a value for the `code` variable and see how this changes the submission script output:" - ] - }, - { - "cell_type": "code", - "execution_count": 9, - "metadata": {}, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "Writing script to /home/nsheff/code/divvy/docs_jupyter/test_script.sub\n" - ] - } - ], - "source": [ - "s = dcc.write_script(\"test_script.sub\", {\"code\": \"yellow\"})" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Here's the output. Notice that the `{CODE}` variable has been replaced with the word `yellow`:" - ] - }, - { - "cell_type": "code", - "execution_count": 10, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "#!/bin/bash\n", - "#SBATCH --job-name='{JOBNAME}'\n", - "#SBATCH --output='{LOGFILE}'\n", - "#SBATCH --mem='{MEM}'\n", - "#SBATCH --cpus-per-task='{CORES}'\n", - "#SBATCH --time='{TIME}'\n", - "#SBATCH --partition='{PARTITION}'\n", - "#SBATCH -m block\n", - "#SBATCH --ntasks=1\n", - "\n", - "echo 'Compute node:' `hostname`\n", - "echo 'Start time:' `date +'%Y-%m-%d %T'`\n", - "\n", - "yellow\n", - "\n" - ] - } - ], - "source": [ - "with open(\"test_script.sub\") as f:\n", - " print(f.read())" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Using a priority list of variables\n", - "\n", - "Now, you can also pass more than one `Dict` object, in priority order, by just passing a list. Here, we'll pass 2 dicts, and any values in the 1st will override values in the 2nd:" - ] - }, - { - "cell_type": "code", - "execution_count": 11, - "metadata": {}, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "Writing script to /home/nsheff/code/divvy/docs_jupyter/test_script.sub\n" - ] - } - ], - "source": [ - "s = dcc.write_script(\n", - " \"test_script.sub\", [{\"code\": \"red\"}, {\"code\": \"yellow\", \"time\": \"now\"}]\n", - ")" - ] - }, - { - "cell_type": "code", - "execution_count": 12, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "#!/bin/bash\n", - "#SBATCH --job-name='{JOBNAME}'\n", - "#SBATCH --output='{LOGFILE}'\n", - "#SBATCH --mem='{MEM}'\n", - "#SBATCH --cpus-per-task='{CORES}'\n", - "#SBATCH --time='now'\n", - "#SBATCH --partition='{PARTITION}'\n", - "#SBATCH -m block\n", - "#SBATCH --ntasks=1\n", - "\n", - "echo 'Compute node:' `hostname`\n", - "echo 'Start time:' `date +'%Y-%m-%d %T'`\n", - "\n", - "red\n", - "\n" - ] - } - ], - "source": [ - "with open(\"test_script.sub\") as f:\n", - " print(f.read())" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "In this case the value `red` took priority for the `code` variable, because it came first; but `time` was not overwritten in the first entry, so it is maintained. This allows for a cascading cumulative priority variable replacement." - ] - } - ], - "metadata": { - "kernelspec": { - "display_name": "Python 3", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.7.5" - } - }, - "nbformat": 4, - "nbformat_minor": 2 -} diff --git a/mkdocs.yml b/mkdocs.yml deleted file mode 100644 index 8e5700de1..000000000 --- a/mkdocs.yml +++ /dev/null @@ -1,59 +0,0 @@ -site_name: Looper -site_logo: img/looper_logo_dark.svg -site_url: http://looper.databio.org/ -repo_url: http://github.com/pepkit/looper -pypi_name: loopercli - -nav: - - Getting Started: - - Introduction: README.md - - Features at-a-glance: features.md - - Hello world: hello-world.md - - How-to guides: - - Defining a project: defining-a-project.md - - Running a pipeline: running-a-pipeline.md - - Initializing a repository: initialize.md - - Using pipestat: pipestat.md - - Parameterizing pipelines: parameterizing-pipelines.md - - Running on a cluster: running-on-a-cluster.md - - Grouping many jobs into one: grouping-jobs.md - - Running jobs in containers: containers.md - - Handling multiple input files: how-to-merge-inputs.md - - Running multiple pipelines: multiple-pipelines.md - - Writing a pipeline interface: writing-a-pipeline-interface.md - - Using looper config: looper-config.md - - Using geofetch: using-geofetch.md - - Browsable HTML Reports: looper-report.md - - Using divvy: - - Introduction: divvy/README.md - - Configuring divvy: divvy/configuration.md - - "Tutorial: divvy in python": tutorial_divvy.md - - "Tutorial: divvy on the command line": cli_divvy.md - - Configuring containers: divvy/containers.md - - Configuring connection with client software: divvy/adapters.md - - Default packages: divvy/default-packages.md - - DIVCFG examples: http://github.com/pepkit/divcfg - - Reference: - - Pipeline interface specification: pipeline-interface-specification.md - - Pipeline tiers: pipeline-tiers.md - - Concentric templates: concentric-templates.md - - Pre-submission hooks system: pre-submission-hooks.md - - Looper variable namespaces: variable-namespaces.md - - Usage: usage.md - - Configuration files: config-files.md - - API: autodoc_build/looper.md - - FAQ: faq.md - - Support: support.md - - Contributing: contributing.md - - Changelog: changelog.md - -theme: databio - -plugins: - - databio: - autodoc_build: "docs/autodoc_build" - jupyter_source: "docs_jupyter" - jupyter_build: "docs_jupyter/build" - autodoc_package: "looper" - no_top_level: true - - search From 6f018f3baaa8f8be22a84e219e7c36647a2a3769 Mon Sep 17 00:00:00 2001 From: nsheff Date: Thu, 12 Feb 2026 07:52:40 -0500 Subject: [PATCH 147/163] Complete pydantic-settings migration --- looper/api/main.py | 41 +++-- looper/cli_pydantic.py | 285 +++++++++++++++++------------ looper/command_models/arguments.py | 144 ++++++++------- looper/command_models/commands.py | 178 +++++++++--------- looper/utils.py | 68 ++++++- pyproject.toml | 2 +- tests/conftest.py | 3 +- tests/smoketests/test_other.py | 9 +- 8 files changed, 427 insertions(+), 303 deletions(-) diff --git a/looper/api/main.py b/looper/api/main.py index 315d38252..36f56473c 100644 --- a/looper/api/main.py +++ b/looper/api/main.py @@ -42,7 +42,7 @@ def background_async(top_level_model: TopLevelParser, job_id: str) -> None: output_stream = stdout_redirects.redirect() try: - run_looper(argparse_namespace, parser=None) + run_looper(argparse_namespace) jobs[job_id].status = "completed" except Exception as e: jobs[job_id].status = "failed" @@ -59,31 +59,32 @@ def create_argparse_namespace(top_level_model: TopLevelParser) -> Namespace: Converts a TopLevelParser instance into an argparse.Namespace object. This function takes a TopLevelParser instance, and converts it into an - argparse.Namespace object. It includes handling for supported commands - specified in SUPPORTED_COMMANDS. + argparse.Namespace object compatible with run_looper(). - :param TopLevelParser top_level_model: An instance of the TopLevelParser - model + :param TopLevelParser top_level_model: An instance of the TopLevelParser model :return argparse.Namespace: An argparse.Namespace object representing the parsed command-line arguments. """ namespace = Namespace() - for argname, value in vars(top_level_model).items(): - if argname not in [cmd.name for cmd in SUPPORTED_COMMANDS]: - setattr(namespace, argname, value) - else: - command_namespace = Namespace() - command_namespace_args = value - for command_argname, command_arg_value in vars( - command_namespace_args - ).items(): - setattr( - command_namespace, - command_argname, - command_arg_value, - ) - setattr(namespace, argname, command_namespace) + # Find which command was specified and set it + command_name = None + for cmd in SUPPORTED_COMMANDS: + cmd_value = getattr(top_level_model, cmd.name, None) + if cmd_value is not None: + command_name = cmd.name + # Add all command arguments to the namespace + for argname, value in vars(cmd_value).items(): + setattr(namespace, argname, value) + break + + namespace.command = command_name + + # Add top-level arguments + namespace.silent = top_level_model.silent + namespace.verbosity = top_level_model.verbosity + namespace.logdev = top_level_model.logdev + return namespace diff --git a/looper/cli_pydantic.py b/looper/cli_pydantic.py index 1ad7b94b2..9670fec8d 100644 --- a/looper/cli_pydantic.py +++ b/looper/cli_pydantic.py @@ -1,35 +1,22 @@ """ -CLI script using `pydantic-argparse` for parsing of arguments +CLI script using pydantic-settings for CLI parsing. -Arguments / commands are defined in `command_models/` and are given, eventually, as -`pydantic` models, allowing for type-checking and validation of arguments. - -Note: this is only a test script so far, and coexists next to the current CLI -(`cli_looper.py`), which uses `argparse` directly. The goal is to eventually -replace the current CLI with a CLI based on above-mentioned `pydantic` models, -but whether this will happen with `pydantic-argparse` or another, possibly self- -written library is not yet clear. -It is well possible that this script will be removed again. +Arguments / commands are defined in `command_models/` as pydantic models, +allowing for type-checking and validation of arguments. """ import os import sys import logmuse -import pydantic_argparse import yaml from eido import inspect_project from pephubclient import PEPHubClient -from pydantic_argparse.argparse.parser import ArgumentParser +from pydantic_settings import get_subcommand from rich.console import Console from . import __version__ -from .command_models.arguments import ArgumentEnum -from .command_models.commands import ( - SUPPORTED_COMMANDS, - TopLevelParser, - add_short_arguments, -) +from .command_models.commands import TopLevelParser from .const import ( CLI_KEY, CLI_PROJ_ATTRS, @@ -71,6 +58,95 @@ read_yaml_file, ) +SUBCOMMAND_NAMES = { + "run": "run", + "rerun": "rerun", + "runp": "runp", + "table": "table", + "report": "report", + "destroy": "destroy", + "check": "check", + "clean": "clean", + "init": "init", + "init_piface": "init_piface", + "link": "link", + "inspect": "inspect", +} + + +class FlatArgs: + """Adapter that presents pydantic model args in a flat namespace-like structure. + + Converts from pydantic-settings structure (top-level + subcommand model) + to flat namespace expected by run_looper and other functions. + + Implements __dict__ property so vars() works correctly. + """ + + def __init__(self, top_level: TopLevelParser, command: str | None, subcmd_args): + # Use object.__setattr__ to avoid triggering our custom __setattr__ + object.__setattr__(self, "_top_level", top_level) + object.__setattr__(self, "_subcmd_args", subcmd_args) + object.__setattr__(self, "_extra", {}) # For storing extra attributes + object.__setattr__(self, "command", command) + # Copy top-level logging args + object.__setattr__(self, "silent", top_level.silent) + object.__setattr__(self, "verbosity", top_level.verbosity) + object.__setattr__(self, "logdev", top_level.logdev) + + def __getattr__(self, name: str): + # Check _extra first (for attributes set via setattr) + extra = object.__getattribute__(self, "_extra") + if name in extra: + return extra[name] + # Then check subcommand args + subcmd_args = object.__getattribute__(self, "_subcmd_args") + if subcmd_args is not None and hasattr(subcmd_args, name): + return getattr(subcmd_args, name) + # Fall back to top-level (for non-subcommand fields) + top_level = object.__getattribute__(self, "_top_level") + # Only check for non-subcommand attributes on top_level + if name in ("silent", "verbosity", "logdev"): + return getattr(top_level, name) + raise AttributeError(f"'{type(self).__name__}' has no attribute '{name}'") + + def __setattr__(self, name: str, value): + if name.startswith("_") or name in ("command", "silent", "verbosity", "logdev"): + object.__setattr__(self, name, value) + else: + # Store in _extra dict for later retrieval + extra = object.__getattribute__(self, "_extra") + extra[name] = value + + @property + def __dict__(self): + """Return all attributes as a dict for vars() compatibility.""" + result = {} + # Add command and logging args + result["command"] = self.command + result["silent"] = self.silent + result["verbosity"] = self.verbosity + result["logdev"] = self.logdev + # Add subcommand args + if self._subcmd_args is not None: + for name, value in self._subcmd_args.model_dump().items(): + result[name] = value + # Add extra attributes (set via setattr) + result.update(self._extra) + return result + + +def flatten_args(args: TopLevelParser) -> FlatArgs: + """Convert pydantic-settings args to flat namespace for compatibility.""" + subcmd_args = get_subcommand(args, is_required=True) + # Determine command name from the subcommand model type + command = None + for name in SUBCOMMAND_NAMES: + if getattr(args, name, None) is subcmd_args: + command = name + break + return FlatArgs(args, command, subcmd_args) + def opt_attr_pair(name: str) -> tuple[str, str]: """Takes argument as attribute and returns as tuple of top-level or subcommand used.""" @@ -91,12 +167,7 @@ def validate_post_parse(args) -> list[str]: SAMPLE_INCLUSION_OPTNAME, ], ) - # Depending on the subcommand used, the above options might either be in - # the top-level namespace or in the subcommand namespace (the latter due - # to a `modify_args_namespace()`) - if getattr( - args, attr, None - ) # or (getattr(args.run, attr, None) if hasattr(args, "run") else False) + if getattr(args, attr, None) ] if len(used_exclusives) > 1: problems.append( @@ -105,54 +176,43 @@ def validate_post_parse(args) -> list[str]: return problems -# TODO rename to run_looper_via_cli for running lloper as a python library: -# https://github.com/pepkit/looper/pull/472#discussion_r1521970763 -def run_looper(args: TopLevelParser, parser: ArgumentParser, test_args=None): - # here comes adapted `cli_looper.py` code +def run_looper(args: FlatArgs, test_args=None): + """Run looper with parsed arguments. + + Args: + args: Flattened arguments from pydantic-settings + test_args: Optional test arguments for testing purposes + """ global _LOGGER _LOGGER = logmuse.logger_via_cli(args, make_root=True) - # Find out which subcommand was used - supported_command_names = [cmd.name for cmd in SUPPORTED_COMMANDS] - subcommand_valued_args = [ - (arg, value) - for arg, value in vars(args).items() - if arg and arg in supported_command_names and value is not None - ] - # Only one subcommand argument will be not `None`, else we found a bug in `pydantic-argparse` - [(subcommand_name, subcommand_args)] = subcommand_valued_args - - cli_use_errors = validate_post_parse(subcommand_args) - if cli_use_errors: - if parser: - parser.print_help(sys.stderr) - parser.error( - f"{len(cli_use_errors)} CLI use problem(s): {', '.join(cli_use_errors)}" - ) - else: - raise ValueError(f"CLI use problem(s): {', '.join(cli_use_errors)}") + subcommand_name = args.command if subcommand_name is None: - if parser: - parser.print_help(sys.stderr) + print("No command specified. Use --help for usage.", file=sys.stderr) + sys.exit(1) + + cli_use_errors = validate_post_parse(args) + if cli_use_errors: + print(f"CLI use problem(s): {', '.join(cli_use_errors)}", file=sys.stderr) sys.exit(1) if subcommand_name == "init": console = Console() console.clear() console.rule("\n[magenta]Looper initialization[/magenta]") - selection = subcommand_args.generic + selection = args.generic if selection is True: console.clear() return int( not initiate_looper_config( dotfile_path(), - subcommand_args.pep_config, - subcommand_args.output_dir, - subcommand_args.sample_pipeline_interfaces, - subcommand_args.project_pipeline_interfaces, - subcommand_args.force_yes, + args.pep_config, + args.output_dir, + args.sample_pipeline_interfaces, + args.project_pipeline_interfaces, + args.force_yes, ) ) else: @@ -166,8 +226,8 @@ def run_looper(args: TopLevelParser, parser: ArgumentParser, test_args=None): looper_cfg_path = os.path.relpath(dotfile_path(), start=os.curdir) try: - if subcommand_args.config: - looper_config_dict = read_looper_config_file(subcommand_args.config) + if args.config: + looper_config_dict = read_looper_config_file(args.config) else: looper_config_dict = read_looper_dotfile() _LOGGER.info(f"Using looper config ({looper_cfg_path}).") @@ -177,71 +237,65 @@ def run_looper(args: TopLevelParser, parser: ArgumentParser, test_args=None): if looper_config_key == CLI_KEY: cli_modifiers_dict = looper_config_item else: - setattr(subcommand_args, looper_config_key, looper_config_item) + setattr(args, looper_config_key, looper_config_item) except OSError as e: - if subcommand_args.config: + if args.config: _LOGGER.warning( - f"\nLooper config file does not exist at given path {subcommand_args.config}. Use looper init to create one at {looper_cfg_path}." + f"\nLooper config file does not exist at given path {args.config}. Use looper init to create one at {looper_cfg_path}." ) else: _LOGGER.warning(e) sys.exit(1) - subcommand_args = enrich_args_via_cfg( + args = enrich_args_via_cfg( subcommand_name, - subcommand_args, - parser, + args, + None, # No parser in pydantic-settings mode test_args=test_args, cli_modifiers=cli_modifiers_dict, ) # If project pipeline interface defined in the cli, change name to: "pipeline_interface" - if vars(subcommand_args)[PROJECT_PL_ARG]: - subcommand_args.pipeline_interfaces = vars(subcommand_args)[PROJECT_PL_ARG] + if getattr(args, PROJECT_PL_ARG, None): + args.pipeline_interfaces = getattr(args, PROJECT_PL_ARG) divcfg = ( - select_divvy_config(filepath=subcommand_args.divvy) - if hasattr(subcommand_args, "divvy") - else None + select_divvy_config(filepath=args.divvy) if hasattr(args, "divvy") else None ) # Ignore flags if user is selecting or excluding on flags: - if subcommand_args.sel_flag or subcommand_args.exc_flag: - subcommand_args.ignore_flags = True + if args.sel_flag or args.exc_flag: + args.ignore_flags = True # Initialize project - if is_PEP_file_type(subcommand_args.pep_config) and os.path.exists( - subcommand_args.pep_config - ): + if is_PEP_file_type(args.pep_config) and os.path.exists(args.pep_config): try: p = Project( - cfg=subcommand_args.pep_config, - amendments=subcommand_args.amend, + cfg=args.pep_config, + amendments=args.amend, divcfg_path=divcfg, runp=subcommand_name == "runp", **{ - attr: getattr(subcommand_args, attr) + attr: getattr(args, attr) for attr in CLI_PROJ_ATTRS - if attr in subcommand_args + if hasattr(args, attr) }, ) except yaml.parser.ParserError as e: _LOGGER.error(f"Project config parse failed -- {e}") sys.exit(1) - elif is_pephub_registry_path(subcommand_args.pep_config): - if vars(subcommand_args)[SAMPLE_PL_ARG]: + elif is_pephub_registry_path(args.pep_config): + if getattr(args, SAMPLE_PL_ARG, None): p = Project( - amendments=subcommand_args.amend, + amendments=args.amend, divcfg_path=divcfg, runp=subcommand_name == "runp", - project_dict=PEPHubClient().load_raw_pep( - registry_path=subcommand_args.pep_config - ), + project_dict=PEPHubClient().load_raw_pep(registry_path=args.pep_config), **{ - attr: getattr(subcommand_args, attr) + attr: getattr(args, attr) for attr in CLI_PROJ_ATTRS - if attr in subcommand_args + if hasattr(args, attr) }, ) else: @@ -263,34 +317,30 @@ def run_looper(args: TopLevelParser, parser: ArgumentParser, test_args=None): with ProjectContext( prj=p, - selector_attribute=subcommand_args.sel_attr, - selector_include=subcommand_args.sel_incl, - selector_exclude=subcommand_args.sel_excl, - selector_flag=subcommand_args.sel_flag, - exclusion_flag=subcommand_args.exc_flag, + selector_attribute=args.sel_attr, + selector_include=args.sel_incl, + selector_exclude=args.sel_excl, + selector_flag=args.sel_flag, + exclusion_flag=args.exc_flag, ) as prj: # Check at the beginning if user wants to use pipestat and pipestat is configurable is_pipestat_configured = ( prj._check_if_pipestat_configured(pipeline_type=PipelineLevel.PROJECT.value) - if getattr(subcommand_args, "project", None) or subcommand_name == "runp" + if getattr(args, "project", None) or subcommand_name == "runp" else prj._check_if_pipestat_configured() ) if subcommand_name in ["run", "rerun"]: - if getattr(subcommand_args, "project", None): + if getattr(args, "project", None): _LOGGER.warning( "Project flag set but 'run' command was used. Please use 'runp' to run at project-level." ) rerun = subcommand_name == "rerun" run = Runner(prj) try: - # compute_kwargs = _proc_resources_spec(args) - compute_kwargs = _proc_resources_spec(subcommand_args) + compute_kwargs = _proc_resources_spec(args) - # TODO Shouldn't top level args and subcommand args be accessible on the same object? - return run( - subcommand_args, top_level_args=args, rerun=rerun, **compute_kwargs - ) + return run(args, rerun=rerun, **compute_kwargs) except SampleFailedException: sys.exit(1) except IOError: @@ -302,40 +352,40 @@ def run_looper(args: TopLevelParser, parser: ArgumentParser, test_args=None): raise if subcommand_name == "runp": - compute_kwargs = _proc_resources_spec(subcommand_args) + compute_kwargs = _proc_resources_spec(args) collate = Collator(prj) - collate(subcommand_args, **compute_kwargs) + collate(args, **compute_kwargs) return collate.debug if subcommand_name == "destroy": - return Destroyer(prj)(subcommand_args) + return Destroyer(prj)(args) if subcommand_name == "table": if is_pipestat_configured: - return Tabulator(prj)(subcommand_args) + return Tabulator(prj)(args) else: raise PipestatConfigurationException("table") if subcommand_name == "report": if is_pipestat_configured: - return Reporter(prj)(subcommand_args) + return Reporter(prj)(args) else: raise PipestatConfigurationException("report") if subcommand_name == "link": if is_pipestat_configured: - Linker(prj)(subcommand_args) + Linker(prj)(args) else: raise PipestatConfigurationException("link") if subcommand_name == "check": if is_pipestat_configured: - return Checker(prj)(subcommand_args) + return Checker(prj)(args) else: raise PipestatConfigurationException("check") if subcommand_name == "clean": - return Cleaner(prj)(subcommand_args) + return Cleaner(prj)(args) if subcommand_name == "inspect": # Inspect PEP from Eido @@ -351,22 +401,23 @@ def run_looper(args: TopLevelParser, parser: ArgumentParser, test_args=None): def main(test_args=None) -> dict: - parser = pydantic_argparse.ArgumentParser( - model=TopLevelParser, - prog="looper", - description="Looper: A job submitter for Portable Encapsulated Projects", - add_help=True, - version=__version__, - ) + """Main entry point for looper CLI. + + Uses pydantic-settings for CLI parsing. - parser = add_short_arguments(parser, ArgumentEnum) + Args: + test_args: Optional list of arguments for testing + Returns: + Result from run_looper + """ if test_args: - args = parser.parse_typed_args(args=test_args) + args = TopLevelParser(_cli_parse_args=test_args) else: - args = parser.parse_typed_args() + args = TopLevelParser() - return run_looper(args, parser, test_args=test_args) + flat_args = flatten_args(args) + return run_looper(flat_args, test_args=test_args) def main_cli() -> None: diff --git a/looper/command_models/arguments.py b/looper/command_models/arguments.py index a821a1c84..747395461 100644 --- a/looper/command_models/arguments.py +++ b/looper/command_models/arguments.py @@ -1,27 +1,24 @@ """ -Argument definitions via a thin wrapper around `pydantic.fields.FieldInfo` +Argument definitions for CLI arguments/flags. + +Stores CLI argument metadata (name, type, default, description, alias) +for use in both pydantic-settings CLI and FastAPI interfaces. """ import enum import os -from copy import copy -from typing import Any, List +from typing import Any -import pydantic.v1 as pydantic +import pydantic +from pydantic import AliasChoices -class Argument(pydantic.fields.FieldInfo): +class Argument: """CLI argument / flag definition. - This class is designed to define CLI arguments or flags. It leverages - Pydantic for data validation and serves as a source of truth for multiple - interfaces, including a CLI. - - Naively, one would think one could just subclass `pydantic.Field`, - but actually `pydantic.Field` is a function, and not a class. - `pydantic.Field()` returns a validated `FieldInfo` instance, - so we instead subclass `FieldInfo` directly and validate it in the - constructor. + This class stores CLI argument metadata for use in multiple interfaces: + - pydantic-settings CLI (via CliSubCommand) + - FastAPI HTTP API (via pydantic models) Args: name (str): Argument name, e.g. "ignore-args". @@ -29,8 +26,7 @@ class Argument(pydantic.fields.FieldInfo): default value is `...` (Ellipsis), then the argument is required. description (str): Argument description, which will appear as the help text for this argument. - kwargs (dict): Additional keyword arguments supported by - `FieldInfo`. These are passed along as they are. + alias (str | None): Short argument alias, e.g. "-i". """ def __init__( @@ -39,40 +35,57 @@ def __init__( default: Any, description: str, alias: str | None = None, - **kwargs, ) -> None: self._name = name - super().__init__( - default=default, description=description, alias=alias, **kwargs - ) - self._validate() + self._default = default # tuple: (type, default_value) + self._description = description + self._alias = alias @property def name(self) -> str: - """ - Argument name as used in the CLI, e.g. "ignore-args" - """ + """Argument name as used in the CLI, e.g. "ignore-args".""" return self._name + @property + def default(self) -> Any: + """Default value tuple (type, default_value).""" + return self._default + + @property + def description(self) -> str: + """Argument description / help text.""" + return self._description + + @property + def alias(self) -> str | None: + """Short argument alias, e.g. "-i".""" + return self._alias + def with_reduced_default(self) -> pydantic.fields.FieldInfo: """ - Convert to a `FieldInfo` instance with reduced default value - - Returns a copy of an instance, but with the `default` attribute - replaced by only the default value, without the type information. - This is required when using an instance in a direct `pydantic` - model definition, instead of creating a model dynamically using - `pydantic.create_model`. + Create a FieldInfo instance with the default value (not the type tuple). - TODO: this is due to this issue: - https://github.com/pydantic/pydantic/issues/2248#issuecomment-757448447 - and it's a bit tedious. + This is used when defining pydantic model fields directly, + where only the default value (not the type) is needed. + Uses AliasChoices to support kebab-case CLI flags (--dry-run) while + keeping underscore field names in Python (dry_run). """ - c = copy(self) - _, default_value = self.default - c.default = default_value - return c + _, default_value = self._default + # kebab-case version of the name for --dry-run style + long_name = self._name.replace("_", "-") + if self._alias: + return pydantic.Field( + default=default_value, + description=self._description, + validation_alias=AliasChoices(self._alias, long_name), + ) + # Even without alias, include kebab-case for CLI compatibility + return pydantic.Field( + default=default_value, + description=self._description, + validation_alias=AliasChoices(long_name), + ) class ArgumentEnum(enum.Enum): @@ -86,13 +99,13 @@ class ArgumentEnum(enum.Enum): IGNORE_FLAGS = Argument( name="ignore_flags", - alias="-i", + alias="i", default=(bool, False), description="Ignore run status flags", ) FORCE_YES = Argument( name="force_yes", - alias="-f", + alias="f", default=(bool, False), description="Provide upfront confirmation of destruction intent, to skip console query. Default=False", ) @@ -111,65 +124,65 @@ class ArgumentEnum(enum.Enum): FLAGS = Argument( name="flags", - alias="-f", - default=(List, []), + alias="f", + default=(list, []), description="Only check samples based on these status flags.", ) TIME_DELAY = Argument( name="time_delay", - alias="-t", + alias="t", default=(int, 0), description="Time delay in seconds between job submissions (min: 0, max: 30)", ) DRY_RUN = Argument( name="dry_run", - alias="-d", + alias="d", default=(bool, False), description="Don't actually submit jobs", ) COMMAND_EXTRA = Argument( name="command_extra", - alias="-x", + alias="x", default=(str, ""), description="String to append to every command", ) COMMAND_EXTRA_OVERRIDE = Argument( name="command_extra_override", - alias="-y", + alias="y", default=(str, ""), description="Same as command-extra, but overrides values in PEP", ) LUMP = Argument( name="lump", - alias="-u", + alias="u", default=(float, None), description="Total input file size (GB) to batch into one job", ) LUMPN = Argument( name="lump_n", - alias="-n", + alias="n", default=(int, None), description="Number of commands to batch into one job", ) LUMPJ = Argument( name="lump_j", - alias="-j", + alias="j", default=(int, None), description="Lump samples into number of jobs.", ) LIMIT = Argument( - name="limit", alias="-l", default=(int, None), description="Limit to n samples" + name="limit", alias="l", default=(int, None), description="Limit to n samples" ) SKIP = Argument( name="skip", - alias="-k", + alias="k", default=(int, None), description="Skip samples by numerical index", ) CONFIG = Argument( name="config", - alias="-c", + alias="c", default=(str, None), description="Looper configuration file (YAML)", ) @@ -185,38 +198,38 @@ class ArgumentEnum(enum.Enum): ) OUTPUT_DIR = Argument( name="output_dir", - alias="-o", + alias="o", default=(str, None), description="Output directory", ) REPORT_OUTPUT_DIR = Argument( name="report_dir", - alias="-r", + alias="r", default=(str, None), description="Set location for looper report and looper table outputs", ) GENERIC = Argument( name="generic", - alias="-g", + alias="g", default=(bool, False), description="Use generic looper config?", ) SAMPLE_PIPELINE_INTERFACES = Argument( name="sample_pipeline_interfaces", - alias="-S", - default=(List, []), + alias="spi", + default=(list, []), description="Paths to looper sample pipeline interfaces", ) PROJECT_PIPELINE_INTERFACES = Argument( name="project_pipeline_interfaces", - alias="-P", - default=(List, []), + alias="ppi", + default=(list, []), description="Paths to looper project pipeline interfaces", ) AMEND = Argument( - name="amend", default=(List, []), description="List of amendments to activate" + name="amend", default=(list, []), description="List of amendments to activate" ) SEL_ATTR = Argument( name="sel_attr", @@ -225,7 +238,7 @@ class ArgumentEnum(enum.Enum): ) SEL_INCL = Argument( name="sel_incl", - default=(List, []), + default=(list, []), description="Include only samples with these values", ) SEL_EXCL = Argument( @@ -234,26 +247,25 @@ class ArgumentEnum(enum.Enum): description="Exclude samples with these values", ) SEL_FLAG = Argument( - name="sel_flag", default=(List, []), description="Sample selection flag" + name="sel_flag", default=(list, []), description="Sample selection flag" ) EXC_FLAG = Argument( - name="exc_flag", default=(List, []), description="Sample exclusion flag" + name="exc_flag", default=(list, []), description="Sample exclusion flag" ) SKIP_FILE_CHECKS = Argument( name="skip_file_checks", - alias="-f", default=(bool, False), description="Do not perform input file checks", ) PACKAGE = Argument( name="package", - alias="-p", + alias="p", default=(str, None), description="Name of computing resource package to use", ) COMPUTE = Argument( name="compute", - default=(List, []), + default=(list, []), description="List of key-value pairs (k1=v1)", ) DIVVY = Argument( diff --git a/looper/command_models/commands.py b/looper/command_models/commands.py index 176df38a0..894d5fe5e 100644 --- a/looper/command_models/commands.py +++ b/looper/command_models/commands.py @@ -1,17 +1,42 @@ """ `pydantic` models for `looper` commands and a wrapper class. + +Uses native pydantic v2 for model definitions. The CLI is built from +these models using argparse in cli_pydantic.py. """ +import json from dataclasses import dataclass -from typing import Optional +from typing import Annotated -import pydantic.v1 as pydantic -from pydantic_argparse import ArgumentParser +import pydantic +from pydantic import AliasChoices, BeforeValidator, Field +from pydantic_settings import BaseSettings, CliSubCommand, SettingsConfigDict from ..const import MESSAGE_BY_SUBCOMMAND from .arguments import Argument, ArgumentEnum +def _parse_cli_list(v): + """Parse list values from CLI. + + pydantic-settings with AliasChoices serializes list values to JSON strings. + This validator deserializes them back to lists. + """ + if isinstance(v, str): + try: + parsed = json.loads(v) + if isinstance(parsed, list): + return parsed + except json.JSONDecodeError: + pass + return v + + +# Annotated type for list fields that handles CLI JSON serialization +CliList = Annotated[list, BeforeValidator(_parse_cli_list)] + + @dataclass class Command: """Representation of a command. @@ -28,17 +53,33 @@ class Command: def create_model(self) -> type[pydantic.BaseModel]: """ - Creates a `pydantic` model for this command + Creates a `pydantic` model for this command. + + Uses AliasChoices to support kebab-case CLI flags (--dry-run) while + keeping underscore field names in Python (dry_run). """ - arguments = dict() + arguments = {} for arg in self.arguments: - # These gymnastics are necessary because of - # https://github.com/pydantic/pydantic/issues/2248#issuecomment-757448447 arg_type, arg_default_value = arg.default - arguments[arg.name] = ( - arg_type, - pydantic.Field(arg_default_value, description=arg.description), - ) + # Use CliList for list fields to handle pydantic-settings JSON serialization + if arg_type is list: + arg_type = CliList + # kebab-case version of the name for --dry-run style + long_name = arg.name.replace("_", "-") + if arg.alias: + field = pydantic.Field( + arg_default_value, + description=arg.description, + validation_alias=AliasChoices(arg.alias, long_name), + ) + else: + # Even without alias, include kebab-case for CLI compatibility + field = pydantic.Field( + arg_default_value, + description=arg.description, + validation_alias=AliasChoices(long_name), + ) + arguments[arg.name] = (arg_type, field) return pydantic.create_model(self.name, **arguments) @@ -57,7 +98,6 @@ def create_model(self) -> type[pydantic.BaseModel]: ArgumentEnum.SAMPLE_PIPELINE_INTERFACES.value, ArgumentEnum.PROJECT_PIPELINE_INTERFACES.value, ArgumentEnum.PIPESTAT.value, - ArgumentEnum.SETTINGS.value, ArgumentEnum.AMEND.value, ArgumentEnum.PROJECT_LEVEL.value, ] @@ -224,7 +264,7 @@ def create_model(self) -> type[pydantic.BaseModel]: LinkParser.arguments.append(arg) InspectParser.arguments.append(arg) -# Create all Models +# Create all Models (for use with FastAPI) RunParserModel = RunParser.create_model() RerunParserModel = RerunParser.create_model() RunProjectParserModel = RunProjectParser.create_model() @@ -239,44 +279,6 @@ def create_model(self) -> type[pydantic.BaseModel]: InitPifaceParserModel = InitPifaceParser.create_model() -def add_short_arguments( - parser: ArgumentParser, argument_enums: type[ArgumentEnum] -) -> ArgumentParser: - """Add short arguments to parser after initial creation. - - This function takes a parser object created under pydantic argparse and adds - the short arguments AFTER the initial creation. This is a workaround as - pydantic-argparse does not currently support this during initial parser creation. - - Args: - parser (ArgumentParser): Parser before adding short arguments. - argument_enums (Type[ArgumentEnum]): Enumeration of arguments that contain - names and aliases. - - Returns: - ArgumentParser: Parser after short arguments have been added. - """ - - for cmd in parser._subcommands.choices.keys(): - for argument_enum in list(argument_enums): - # First check there is an alias for the argument otherwise skip - if argument_enum.value.alias: - short_key = argument_enum.value.alias - long_key = ( - "--" + argument_enum.value.name.replace("_", "-") - ) # We must do this because the ArgumentEnum names are transformed during parser creation - if long_key in parser._subcommands.choices[cmd]._option_string_actions: - argument = parser._subcommands.choices[cmd]._option_string_actions[ - long_key - ] - argument.option_strings = (short_key, long_key) - parser._subcommands.choices[cmd]._option_string_actions[ - short_key - ] = argument - - return parser - - SUPPORTED_COMMANDS = [ RunParser, RerunParser, @@ -293,48 +295,54 @@ def add_short_arguments( ] -class TopLevelParser(pydantic.BaseModel): - """ - Top level parser that takes - - commands (run, runp, check...) - - arguments that are required no matter the subcommand - """ +class TopLevelParser(BaseSettings): + """A pipeline submission engine for PEP-formatted projects.""" - # commands - run: Optional[RunParserModel] = pydantic.Field(description=RunParser.description) - rerun: Optional[RerunParserModel] = pydantic.Field( - description=RerunParser.description + model_config = SettingsConfigDict( + cli_parse_args=True, + cli_prog_name="looper", + cli_kebab_case=True, # Use --dry-run not --dry_run + cli_implicit_flags=True, # Allow --dry-run without value (instead of --dry-run true) + cli_hide_none_type=True, # Hide {bool,null} type hints in help ) - runp: Optional[RunProjectParserModel] = pydantic.Field( - description=RunProjectParser.description + + # commands (CliSubCommand creates argparse subparsers - only one is used at a time) + run: CliSubCommand[RunParserModel] = Field(description=MESSAGE_BY_SUBCOMMAND["run"]) + rerun: CliSubCommand[RerunParserModel] = Field( + description=MESSAGE_BY_SUBCOMMAND["rerun"] ) - table: Optional[TableParserModel] = pydantic.Field( - description=TableParser.description + runp: CliSubCommand[RunProjectParserModel] = Field( + description=MESSAGE_BY_SUBCOMMAND["runp"] ) - report: Optional[ReportParserModel] = pydantic.Field( - description=ReportParser.description + table: CliSubCommand[TableParserModel] = Field( + description=MESSAGE_BY_SUBCOMMAND["table"] ) - destroy: Optional[DestroyParserModel] = pydantic.Field( - description=DestroyParser.description + report: CliSubCommand[ReportParserModel] = Field( + description=MESSAGE_BY_SUBCOMMAND["report"] ) - check: Optional[CheckParserModel] = pydantic.Field( - description=CheckParser.description + destroy: CliSubCommand[DestroyParserModel] = Field( + description=MESSAGE_BY_SUBCOMMAND["destroy"] ) - clean: Optional[CleanParserModel] = pydantic.Field( - description=CleanParser.description + check: CliSubCommand[CheckParserModel] = Field( + description=MESSAGE_BY_SUBCOMMAND["check"] ) - init: Optional[InitParserModel] = pydantic.Field(description=InitParser.description) - init_piface: Optional[InitPifaceParserModel] = pydantic.Field( - description=InitPifaceParser.description + clean: CliSubCommand[CleanParserModel] = Field( + description=MESSAGE_BY_SUBCOMMAND["clean"] ) - link: Optional[LinkParserModel] = pydantic.Field(description=LinkParser.description) - - inspect: Optional[InspectParserModel] = pydantic.Field( - description=InspectParser.description + init: CliSubCommand[InitParserModel] = Field( + description=MESSAGE_BY_SUBCOMMAND["init"] + ) + init_piface: CliSubCommand[InitPifaceParserModel] = Field( + description=MESSAGE_BY_SUBCOMMAND["init-piface"] + ) + link: CliSubCommand[LinkParserModel] = Field( + description=MESSAGE_BY_SUBCOMMAND["link"] + ) + inspect: CliSubCommand[InspectParserModel] = Field( + description=MESSAGE_BY_SUBCOMMAND["inspect"] ) - # Additional arguments for logging, added to ALL commands - # These must be used before the command - silent: Optional[bool] = ArgumentEnum.SILENT.value.with_reduced_default() - verbosity: Optional[int] = ArgumentEnum.VERBOSITY.value.with_reduced_default() - logdev: Optional[bool] = ArgumentEnum.LOGDEV.value.with_reduced_default() + # Additional arguments for logging + silent: bool | None = ArgumentEnum.SILENT.value.with_reduced_default() + verbosity: int | None = ArgumentEnum.VERBOSITY.value.with_reduced_default() + logdev: bool | None = ArgumentEnum.LOGDEV.value.with_reduced_default() diff --git a/looper/utils.py b/looper/utils.py index 2b3a43eb8..ecf5b0aba 100644 --- a/looper/utils.py +++ b/looper/utils.py @@ -358,22 +358,78 @@ def enrich_args_via_cfg( _LOGGER.debug(msg=f"Merged CLI modifiers: {cfg_args_all}") result = argparse.Namespace() + + # Check if we have the old nested structure or new flat structure + # New structure: parser_args has 'command' attribute with subcommand name + # Old structure: parser_args has subcommand as attribute name + is_flat_structure = hasattr(parser_args, "command") + + if is_flat_structure: + # New flat argparse structure - arguments are directly on the namespace + cli_args = parser_args # Use parser_args directly, already parsed + + def set_single_arg(argname, default_source_namespace, result_namespace): + # Priority: CLI > cfg_args_all (PEP config) > parser default + cli_value = getattr(cli_args, argname, None) + cfg_value = cfg_args_all.get(argname) if cfg_args_all else None + default_value = getattr(default_source_namespace, argname, None) + + if cli_value is not None and cli_value != default_value: + # CLI provided a non-default value - use it + r = ( + convert_value(cli_value) + if isinstance(cli_value, str) + else cli_value + ) + elif cfg_value is not None: + # PEP config provided a value + if isinstance(cfg_value, list): + r = [convert_value(i) for i in cfg_value] + elif isinstance(cfg_value, dict): + r = cfg_value + else: + r = convert_value(cfg_value) + else: + # Use default + r = default_value + setattr(result_namespace, argname, r) + + # Copy all arguments from parser_args to result + for argname in vars(parser_args): + set_single_arg(argname, parser_args, result) + + # Also add any cfg_args that weren't in parser_args + if cfg_args_all: + for argname in cfg_args_all: + if not hasattr(result, argname): + cfg_value = cfg_args_all[argname] + if isinstance(cfg_value, list): + r = [convert_value(i) for i in cfg_value] + elif isinstance(cfg_value, dict): + r = cfg_value + else: + r = convert_value(cfg_value) + setattr(result, argname, r) + + return result + + # Old nested structure (pydantic-argparse) - kept for backwards compatibility if test_args: cli_args, _ = aux_parser.parse_known_args(args=test_args) - else: cli_args, _ = aux_parser.parse_known_args() # If any CLI args were provided, make sure they take priority if cli_args: - r = getattr(cli_args, subcommand_name) - for k, v in cfg_args_all.items(): - if k in r: - cfg_args_all[k] = getattr(r, k) + r = getattr(cli_args, subcommand_name, None) + if r: + for k, v in cfg_args_all.items(): + if hasattr(r, k): + cfg_args_all[k] = getattr(r, k) def set_single_arg(argname, default_source_namespace, result_namespace): if argname not in POSITIONAL or not hasattr(result, argname): - if argname in cli_args: + if hasattr(cli_args, argname): cli_provided_value = getattr(cli_args, argname) r = ( convert_value(cli_provided_value) diff --git a/pyproject.toml b/pyproject.toml index 0864c1e03..9868cc0bc 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -36,7 +36,7 @@ dependencies = [ "rich>=9.10.0", "ubiquerg>=0.8.1", "yacman @ git+https://github.com/databio/yacman.git@dev", # TODO: revert to yacman>=0.9.5 after release - "pydantic-argparse>=0.9.0", + "pydantic-settings>=2.0.0", "psutil", ] diff --git a/tests/conftest.py b/tests/conftest.py index 61806a649..47e3cc20e 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -135,8 +135,7 @@ def test_args_expansion(pth=None, cmd=None, appendix=list(), dry=True) -> List[s :param bool dry: whether to append dry run flag :return list of strings to pass to looper.main for testing """ - # --looper-config .looper.yaml run --dry-run - # x = [cmd, "-d" if dry else ""] + # --config .looper.yaml run --dry-run x = [] if cmd: x.append(cmd) diff --git a/tests/smoketests/test_other.py b/tests/smoketests/test_other.py index 62669ca2f..9535208ef 100644 --- a/tests/smoketests/test_other.py +++ b/tests/smoketests/test_other.py @@ -330,8 +330,7 @@ def test_excluding_multi_flags_works( "--config", tp, "--exc-flag", - "completed", - "running", + "completed,running", # pydantic-settings uses comma-separated for lists "--dry-run", ] @@ -380,8 +379,7 @@ def test_selecting_multi_flags_works( "--config", tp, "--sel-flag", - "completed", - "running", + "completed,running", ] try: @@ -488,8 +486,7 @@ def test_excluding_attr_and_flags_works( "--sel-attr", "protocol", "--sel-incl", - "PROTO1", - "PROTO2", + "PROTO1,PROTO2", ] try: From cbdcf4655b119581e63d203b214a28559e29e34e Mon Sep 17 00:00:00 2001 From: nsheff Date: Thu, 12 Feb 2026 08:45:23 -0500 Subject: [PATCH 148/163] improve cli startup time by moving deps to module level --- looper/__init__.py | 60 +++---- looper/cli_pydantic.py | 241 ++++++++++++----------------- looper/command_models/arguments.py | 7 + looper/command_models/commands.py | 16 +- looper/command_models/messages.py | 20 +++ looper/const.py | 18 +-- tests/test_cli_startup.py | 32 ++++ 7 files changed, 206 insertions(+), 188 deletions(-) create mode 100644 looper/command_models/messages.py create mode 100644 tests/test_cli_startup.py diff --git a/looper/__init__.py b/looper/__init__.py index 19a05ad25..015da2c00 100644 --- a/looper/__init__.py +++ b/looper/__init__.py @@ -7,37 +7,45 @@ """ +from importlib.metadata import version + import logmuse logmuse.init_logger("looper") +__version__ = version("looper") -from importlib.metadata import version - -from .divvy import ( - DEFAULT_COMPUTE_RESOURCES_NAME, - ComputingConfiguration, - select_divvy_config, -) -from .divvy import NEW_COMPUTE_KEY as COMPUTE_KEY +# Lazy imports - only loaded when accessed +_lazy_imports = { + "DEFAULT_COMPUTE_RESOURCES_NAME": ".divvy", + "ComputingConfiguration": ".divvy", + "select_divvy_config": ".divvy", + "COMPUTE_KEY": ".divvy", # NEW_COMPUTE_KEY + "SubmissionConductor": ".conductor", + "write_submission_yaml": ".conductor", + "PipelineInterface": ".pipeline_interface", + "write_custom_template": ".plugins", + "write_sample_yaml": ".plugins", + "write_sample_yaml_cwl": ".plugins", + "write_sample_yaml_prj": ".plugins", + "Project": ".project", +} + + +def __getattr__(name): + if name in _lazy_imports: + module_path = _lazy_imports[name] + import importlib + + module = importlib.import_module(module_path, __package__) + value = getattr(module, name if name != "COMPUTE_KEY" else "NEW_COMPUTE_KEY") + globals()[name] = value # Cache for subsequent access + return value + raise AttributeError(f"module {__name__!r} has no attribute {name!r}") + + +def __dir__(): + return list(_lazy_imports.keys()) + ["__version__"] -__version__ = version("looper") -from .conductor import ( - SubmissionConductor, - write_submission_yaml, -) -from .pipeline_interface import PipelineInterface -from .plugins import ( - write_custom_template, - write_sample_yaml, - write_sample_yaml_cwl, - write_sample_yaml_prj, -) -from .project import Project - -# Not used here, but make this the main import interface between peppy and -# looper, so that other modules within this package need not worry about -# the locations of some of the peppy declarations. Effectively, concentrate -# the connection between peppy and looper here, to the extent possible. __all__ = [ "Project", diff --git a/looper/cli_pydantic.py b/looper/cli_pydantic.py index 9670fec8d..562a4be28 100644 --- a/looper/cli_pydantic.py +++ b/looper/cli_pydantic.py @@ -7,145 +7,43 @@ import os import sys +from argparse import Namespace -import logmuse -import yaml -from eido import inspect_project -from pephubclient import PEPHubClient from pydantic_settings import get_subcommand -from rich.console import Console - -from . import __version__ -from .command_models.commands import TopLevelParser -from .const import ( - CLI_KEY, - CLI_PROJ_ATTRS, - EXAMPLE_COMPUTE_SPEC_FMT, - PROJECT_PL_ARG, - SAMPLE_EXCLUSION_OPTNAME, - SAMPLE_INCLUSION_OPTNAME, - SAMPLE_PL_ARG, - PipelineLevel, -) -from .divvy import DEFAULT_COMPUTE_RESOURCES_NAME, select_divvy_config -from .exceptions import ( - MisconfigurationException, - PipestatConfigurationException, - SampleFailedException, -) -from .looper import ( - Checker, - Cleaner, - Collator, - Destroyer, - Linker, - Reporter, - Runner, - Tabulator, -) -from .project import Project, ProjectContext -from .utils import ( - dotfile_path, - enrich_args_via_cfg, - init_generic_pipeline, - initiate_looper_config, - inspect_looper_config_file, - is_PEP_file_type, - is_pephub_registry_path, - looper_config_tutorial, - read_looper_config_file, - read_looper_dotfile, - read_yaml_file, -) - -SUBCOMMAND_NAMES = { - "run": "run", - "rerun": "rerun", - "runp": "runp", - "table": "table", - "report": "report", - "destroy": "destroy", - "check": "check", - "clean": "clean", - "init": "init", - "init_piface": "init_piface", - "link": "link", - "inspect": "inspect", -} - - -class FlatArgs: - """Adapter that presents pydantic model args in a flat namespace-like structure. - - Converts from pydantic-settings structure (top-level + subcommand model) - to flat namespace expected by run_looper and other functions. - - Implements __dict__ property so vars() works correctly. - """ - def __init__(self, top_level: TopLevelParser, command: str | None, subcmd_args): - # Use object.__setattr__ to avoid triggering our custom __setattr__ - object.__setattr__(self, "_top_level", top_level) - object.__setattr__(self, "_subcmd_args", subcmd_args) - object.__setattr__(self, "_extra", {}) # For storing extra attributes - object.__setattr__(self, "command", command) - # Copy top-level logging args - object.__setattr__(self, "silent", top_level.silent) - object.__setattr__(self, "verbosity", top_level.verbosity) - object.__setattr__(self, "logdev", top_level.logdev) - - def __getattr__(self, name: str): - # Check _extra first (for attributes set via setattr) - extra = object.__getattribute__(self, "_extra") - if name in extra: - return extra[name] - # Then check subcommand args - subcmd_args = object.__getattribute__(self, "_subcmd_args") - if subcmd_args is not None and hasattr(subcmd_args, name): - return getattr(subcmd_args, name) - # Fall back to top-level (for non-subcommand fields) - top_level = object.__getattribute__(self, "_top_level") - # Only check for non-subcommand attributes on top_level - if name in ("silent", "verbosity", "logdev"): - return getattr(top_level, name) - raise AttributeError(f"'{type(self).__name__}' has no attribute '{name}'") - - def __setattr__(self, name: str, value): - if name.startswith("_") or name in ("command", "silent", "verbosity", "logdev"): - object.__setattr__(self, name, value) - else: - # Store in _extra dict for later retrieval - extra = object.__getattribute__(self, "_extra") - extra[name] = value - - @property - def __dict__(self): - """Return all attributes as a dict for vars() compatibility.""" - result = {} - # Add command and logging args - result["command"] = self.command - result["silent"] = self.silent - result["verbosity"] = self.verbosity - result["logdev"] = self.logdev - # Add subcommand args - if self._subcmd_args is not None: - for name, value in self._subcmd_args.model_dump().items(): - result[name] = value - # Add extra attributes (set via setattr) - result.update(self._extra) - return result - - -def flatten_args(args: TopLevelParser) -> FlatArgs: - """Convert pydantic-settings args to flat namespace for compatibility.""" +from .command_models.commands import SUPPORTED_COMMANDS, TopLevelParser + + +def flatten_args(args: TopLevelParser) -> Namespace: + """Convert pydantic-settings args to argparse.Namespace for compatibility. + + pydantic-settings produces a nested structure where subcommand args are + accessed via args.run.dry_run, args.check.flags, etc. The rest of looper + expects flat access (args.dry_run, args.flags). This function flattens + the active subcommand's arguments into a standard Namespace. + + Only one subcommand is ever active at a time, so there are no conflicts + between arguments with the same name on different subcommands. + """ subcmd_args = get_subcommand(args, is_required=True) - # Determine command name from the subcommand model type + + # Determine command name from the subcommand model command = None - for name in SUBCOMMAND_NAMES: - if getattr(args, name, None) is subcmd_args: - command = name + for cmd in SUPPORTED_COMMANDS: + if getattr(args, cmd.name, None) is subcmd_args: + command = cmd.name break - return FlatArgs(args, command, subcmd_args) + + ns = Namespace( + command=command, + silent=args.silent, + verbosity=args.verbosity, + logdev=args.logdev, + ) + if subcmd_args is not None: + for k, v in subcmd_args.model_dump().items(): + setattr(ns, k, v) + return ns def opt_attr_pair(name: str) -> tuple[str, str]: @@ -153,7 +51,7 @@ def opt_attr_pair(name: str) -> tuple[str, str]: return f"--{name}", name.replace("-", "_") -def validate_post_parse(args) -> list[str]: +def validate_post_parse(args, sample_exclusion_optname: str, sample_inclusion_optname: str) -> list[str]: """Checks if user is attempting to use mutually exclusive options.""" problems = [] used_exclusives = [ @@ -163,8 +61,8 @@ def validate_post_parse(args) -> list[str]: [ "skip", "limit", - SAMPLE_EXCLUSION_OPTNAME, - SAMPLE_INCLUSION_OPTNAME, + sample_exclusion_optname, + sample_inclusion_optname, ], ) if getattr(args, attr, None) @@ -176,13 +74,62 @@ def validate_post_parse(args) -> list[str]: return problems -def run_looper(args: FlatArgs, test_args=None): +def run_looper(args: Namespace, test_args=None): """Run looper with parsed arguments. Args: args: Flattened arguments from pydantic-settings test_args: Optional test arguments for testing purposes """ + # Lazy imports - only load when actually running commands + import logmuse + import yaml + from eido import inspect_project + from pephubclient import PEPHubClient + from rich.console import Console + + from . import __version__ + from .const import ( + CLI_KEY, + CLI_PROJ_ATTRS, + EXAMPLE_COMPUTE_SPEC_FMT, + PROJECT_PL_ARG, + SAMPLE_EXCLUSION_OPTNAME, + SAMPLE_INCLUSION_OPTNAME, + SAMPLE_PL_ARG, + PipelineLevel, + ) + from .divvy import DEFAULT_COMPUTE_RESOURCES_NAME, select_divvy_config + from .exceptions import ( + MisconfigurationException, + PipestatConfigurationException, + SampleFailedException, + ) + from .looper import ( + Checker, + Cleaner, + Collator, + Destroyer, + Linker, + Reporter, + Runner, + Tabulator, + ) + from .project import Project, ProjectContext + from .utils import ( + dotfile_path, + enrich_args_via_cfg, + init_generic_pipeline, + initiate_looper_config, + inspect_looper_config_file, + is_PEP_file_type, + is_pephub_registry_path, + looper_config_tutorial, + read_looper_config_file, + read_looper_dotfile, + read_yaml_file, + ) + global _LOGGER _LOGGER = logmuse.logger_via_cli(args, make_root=True) @@ -193,9 +140,10 @@ def run_looper(args: FlatArgs, test_args=None): print("No command specified. Use --help for usage.", file=sys.stderr) sys.exit(1) - cli_use_errors = validate_post_parse(args) + cli_use_errors = validate_post_parse(args, SAMPLE_EXCLUSION_OPTNAME, SAMPLE_INCLUSION_OPTNAME) if cli_use_errors: - print(f"CLI use problem(s): {', '.join(cli_use_errors)}", file=sys.stderr) + print(f"Error: {', '.join(cli_use_errors)}", file=sys.stderr) + print("Run 'looper --help' for usage information.", file=sys.stderr) sys.exit(1) if subcommand_name == "init": @@ -338,7 +286,7 @@ def run_looper(args: FlatArgs, test_args=None): rerun = subcommand_name == "rerun" run = Runner(prj) try: - compute_kwargs = _proc_resources_spec(args) + compute_kwargs = _proc_resources_spec(args, read_yaml_file, EXAMPLE_COMPUTE_SPEC_FMT, _LOGGER) return run(args, rerun=rerun, **compute_kwargs) except SampleFailedException: @@ -352,7 +300,7 @@ def run_looper(args: FlatArgs, test_args=None): raise if subcommand_name == "runp": - compute_kwargs = _proc_resources_spec(args) + compute_kwargs = _proc_resources_spec(args, read_yaml_file, EXAMPLE_COMPUTE_SPEC_FMT, _LOGGER) collate = Collator(prj) collate(args, **compute_kwargs) return collate.debug @@ -424,7 +372,7 @@ def main_cli() -> None: main() -def _proc_resources_spec(args) -> dict[str, str]: +def _proc_resources_spec(args, read_yaml_file, example_compute_spec_fmt, logger) -> dict[str, str]: """Process CLI-sources compute setting specification. There are two sources of compute settings in the CLI alone: @@ -435,6 +383,9 @@ def _proc_resources_spec(args) -> dict[str, str]: Args: args (argparse.Namespace): Arguments namespace. + read_yaml_file: Function to read YAML files. + example_compute_spec_fmt: Example format string for error messages. + logger: Logger instance. Returns: Mapping[str, str]: Binding between resource setting name and value. @@ -443,12 +394,14 @@ def _proc_resources_spec(args) -> dict[str, str]: ValueError: If interpretation of the given specification as encoding of key-value pairs fails. """ + import yaml + spec = getattr(args, "compute", None) settings = args.settings try: settings_data = read_yaml_file(settings) or {} except yaml.YAMLError: - _LOGGER.warning( + logger.warning( "Settings file ({}) does not follow YAML format, disregarding".format( settings ) @@ -473,7 +426,7 @@ def _proc_resources_spec(args) -> dict[str, str]: if bads: raise ValueError( "Could not correctly parse itemized compute specification. " - "Correct format: " + EXAMPLE_COMPUTE_SPEC_FMT + "Correct format: " + example_compute_spec_fmt ) elif isinstance(spec, dict): for key, value in spec.items(): diff --git a/looper/command_models/arguments.py b/looper/command_models/arguments.py index 747395461..9ab28cf8a 100644 --- a/looper/command_models/arguments.py +++ b/looper/command_models/arguments.py @@ -218,12 +218,18 @@ class ArgumentEnum(enum.Enum): SAMPLE_PIPELINE_INTERFACES = Argument( name="sample_pipeline_interfaces", + # Backwards compatibility note: Changed from -S to spi with pydantic-settings + # migration. Single-letter aliases are case-insensitive in pydantic-settings, + # causing conflicts with other arguments. alias="spi", default=(list, []), description="Paths to looper sample pipeline interfaces", ) PROJECT_PIPELINE_INTERFACES = Argument( name="project_pipeline_interfaces", + # Backwards compatibility note: Changed from -P to ppi with pydantic-settings + # migration. Single-letter aliases are case-insensitive in pydantic-settings, + # causing conflicts with other arguments. alias="ppi", default=(list, []), description="Paths to looper project pipeline interfaces", @@ -254,6 +260,7 @@ class ArgumentEnum(enum.Enum): ) SKIP_FILE_CHECKS = Argument( name="skip_file_checks", + alias="f", # Restored: no conflict since run/rerun/runp don't use FORCE_YES default=(bool, False), description="Do not perform input file checks", ) diff --git a/looper/command_models/commands.py b/looper/command_models/commands.py index 894d5fe5e..2d147ad36 100644 --- a/looper/command_models/commands.py +++ b/looper/command_models/commands.py @@ -13,23 +13,33 @@ from pydantic import AliasChoices, BeforeValidator, Field from pydantic_settings import BaseSettings, CliSubCommand, SettingsConfigDict -from ..const import MESSAGE_BY_SUBCOMMAND from .arguments import Argument, ArgumentEnum +from .messages import MESSAGE_BY_SUBCOMMAND # Local import, no looper/__init__.py def _parse_cli_list(v): """Parse list values from CLI. - pydantic-settings with AliasChoices serializes list values to JSON strings. - This validator deserializes them back to lists. + Handles: + - JSON arrays from pydantic-settings serialization: '["a","b"]' + - Comma-separated strings from user input: 'a,b' + - Single values: 'a' """ if isinstance(v, str): + # Try JSON first (pydantic-settings serialization) try: parsed = json.loads(v) if isinstance(parsed, list): return parsed except json.JSONDecodeError: pass + # Fall back to comma-separated + if "," in v: + return [item.strip() for item in v.split(",")] + # Single non-empty value as single-item list + if v.strip(): + return [v.strip()] + return [] return v diff --git a/looper/command_models/messages.py b/looper/command_models/messages.py new file mode 100644 index 000000000..71aa82dd5 --- /dev/null +++ b/looper/command_models/messages.py @@ -0,0 +1,20 @@ +"""Subcommand help messages for CLI. + +Extracted to avoid importing looper.const (which triggers heavy package imports) +during CLI startup for --help. +""" + +MESSAGE_BY_SUBCOMMAND = { + "run": "Run or submit sample jobs.", + "rerun": "Resubmit sample jobs with failed flags.", + "runp": "Run or submit project jobs.", + "table": "Write summary stats table for project samples.", + "report": "Create browsable HTML report of project results.", + "destroy": "Remove output files of the project.", + "check": "Check flag status of current runs.", + "clean": "Run clean scripts of already processed jobs.", + "inspect": "Print information about a project.", + "init": "Initialize looper config file.", + "init-piface": "Initialize generic pipeline interface.", + "link": "Create directory of symlinks for reported results.", +} diff --git a/looper/const.py b/looper/const.py index 86f17ecb4..e36a768de 100644 --- a/looper/const.py +++ b/looper/const.py @@ -3,6 +3,9 @@ import os from enum import Enum +# Re-exported from command_models.messages for backwards compatibility +from .command_models.messages import MESSAGE_BY_SUBCOMMAND + __all__ = [ "BUTTON_APPEARANCE_BY_FLAG", "TABLE_APPEARANCE_BY_FLAG", @@ -256,21 +259,6 @@ def _get_apperance_dict(type: str, templ: dict = APPEARANCE_BY_FLAG) -> dict: SAMPLE_SELECTION_FLAG_OPTNAME = "sel-flag" SAMPLE_EXCLUSION_FLAG_OPTNAME = "exc-flag" -MESSAGE_BY_SUBCOMMAND = { - "run": "Run or submit sample jobs.", - "rerun": "Resubmit sample jobs with failed flags.", - "runp": "Run or submit project jobs.", - "table": "Write summary stats table for project samples.", - "report": "Create browsable HTML report of project results.", - "destroy": "Remove output files of the project.", - "check": "Check flag status of current runs.", - "clean": "Run clean scripts of already processed jobs.", - "inspect": "Print information about a project.", - "init": "Initialize looper config file.", - "init-piface": "Initialize generic pipeline interface.", - "link": "Create directory of symlinks for reported results.", -} - # Add project/sample enum diff --git a/tests/test_cli_startup.py b/tests/test_cli_startup.py new file mode 100644 index 000000000..bc15bc797 --- /dev/null +++ b/tests/test_cli_startup.py @@ -0,0 +1,32 @@ +"""Tests for CLI startup performance.""" + +import subprocess +import time + + +def test_cli_help_startup_time(): + """Ensure --help responds quickly without loading heavy dependencies.""" + start = time.time() + result = subprocess.run( + ["python", "-m", "looper.cli_pydantic", "--help"], + capture_output=True, + text=True, + ) + elapsed = time.time() - start + + assert result.returncode == 0, f"--help failed: {result.stderr}" + assert elapsed < 0.5, f"CLI --help took {elapsed:.2f}s, should be < 0.5s" + + +def test_subcommand_help_startup_time(): + """Ensure subcommand --help also responds quickly.""" + start = time.time() + result = subprocess.run( + ["python", "-m", "looper.cli_pydantic", "run", "--help"], + capture_output=True, + text=True, + ) + elapsed = time.time() - start + + assert result.returncode == 0, f"run --help failed: {result.stderr}" + assert elapsed < 0.5, f"CLI run --help took {elapsed:.2f}s, should be < 0.5s" From 1858f524881b811c90e52baeb8dd9ecc66358667 Mon Sep 17 00:00:00 2001 From: nsheff Date: Thu, 12 Feb 2026 09:14:02 -0500 Subject: [PATCH 149/163] format --- looper/cli_pydantic.py | 20 +++++++++++++++----- 1 file changed, 15 insertions(+), 5 deletions(-) diff --git a/looper/cli_pydantic.py b/looper/cli_pydantic.py index 562a4be28..0a41338b5 100644 --- a/looper/cli_pydantic.py +++ b/looper/cli_pydantic.py @@ -51,7 +51,9 @@ def opt_attr_pair(name: str) -> tuple[str, str]: return f"--{name}", name.replace("-", "_") -def validate_post_parse(args, sample_exclusion_optname: str, sample_inclusion_optname: str) -> list[str]: +def validate_post_parse( + args, sample_exclusion_optname: str, sample_inclusion_optname: str +) -> list[str]: """Checks if user is attempting to use mutually exclusive options.""" problems = [] used_exclusives = [ @@ -140,7 +142,9 @@ def run_looper(args: Namespace, test_args=None): print("No command specified. Use --help for usage.", file=sys.stderr) sys.exit(1) - cli_use_errors = validate_post_parse(args, SAMPLE_EXCLUSION_OPTNAME, SAMPLE_INCLUSION_OPTNAME) + cli_use_errors = validate_post_parse( + args, SAMPLE_EXCLUSION_OPTNAME, SAMPLE_INCLUSION_OPTNAME + ) if cli_use_errors: print(f"Error: {', '.join(cli_use_errors)}", file=sys.stderr) print("Run 'looper --help' for usage information.", file=sys.stderr) @@ -286,7 +290,9 @@ def run_looper(args: Namespace, test_args=None): rerun = subcommand_name == "rerun" run = Runner(prj) try: - compute_kwargs = _proc_resources_spec(args, read_yaml_file, EXAMPLE_COMPUTE_SPEC_FMT, _LOGGER) + compute_kwargs = _proc_resources_spec( + args, read_yaml_file, EXAMPLE_COMPUTE_SPEC_FMT, _LOGGER + ) return run(args, rerun=rerun, **compute_kwargs) except SampleFailedException: @@ -300,7 +306,9 @@ def run_looper(args: Namespace, test_args=None): raise if subcommand_name == "runp": - compute_kwargs = _proc_resources_spec(args, read_yaml_file, EXAMPLE_COMPUTE_SPEC_FMT, _LOGGER) + compute_kwargs = _proc_resources_spec( + args, read_yaml_file, EXAMPLE_COMPUTE_SPEC_FMT, _LOGGER + ) collate = Collator(prj) collate(args, **compute_kwargs) return collate.debug @@ -372,7 +380,9 @@ def main_cli() -> None: main() -def _proc_resources_spec(args, read_yaml_file, example_compute_spec_fmt, logger) -> dict[str, str]: +def _proc_resources_spec( + args, read_yaml_file, example_compute_spec_fmt, logger +) -> dict[str, str]: """Process CLI-sources compute setting specification. There are two sources of compute settings in the CLI alone: From 3ebcf6c5e47201c592b2a67841a46a4f1b739743 Mon Sep 17 00:00:00 2001 From: nsheff Date: Thu, 12 Feb 2026 09:33:02 -0500 Subject: [PATCH 150/163] clean up cruft --- looper/cli_pydantic.py | 1 - looper/command_models/DEVELOPER.md | 85 ---------------- looper/command_models/README.md | 38 ++++++- looper/command_models/__init__.py | 5 +- looper/const.py | 1 - looper/utils.py | 155 +++++++---------------------- 6 files changed, 71 insertions(+), 214 deletions(-) delete mode 100644 looper/command_models/DEVELOPER.md diff --git a/looper/cli_pydantic.py b/looper/cli_pydantic.py index 0a41338b5..5da08a66b 100644 --- a/looper/cli_pydantic.py +++ b/looper/cli_pydantic.py @@ -204,7 +204,6 @@ def run_looper(args: Namespace, test_args=None): args = enrich_args_via_cfg( subcommand_name, args, - None, # No parser in pydantic-settings mode test_args=test_args, cli_modifiers=cli_modifiers_dict, ) diff --git a/looper/command_models/DEVELOPER.md b/looper/command_models/DEVELOPER.md deleted file mode 100644 index d71f7bf65..000000000 --- a/looper/command_models/DEVELOPER.md +++ /dev/null @@ -1,85 +0,0 @@ -# Developer documentation - -## Adding new command models - -To add a new model (command) to the project, follow these steps: - -1. Add new arguments in `looper/command_models/arguments.py` if necessary. - -- Add a new entry for the `ArgumentEnum` class. -- For example: - -```python -# arguments.py - -class ArgumentEnum(enum.Enum): - ... - - NEW_ARGUMENT = Argument( - name="new_argument", - default=(new_argument_type, "default_value"), - description="Description of the new argument", - ) - -``` - -2. Create a new command in the existing command creation logic in `looper/command_models/commands.py`. - -- Create a new `Command` instance. -- Create a `pydantic` model for this new command. -- Add the new `Command` instance to `SUPPORTED_COMMANDS`. -- For example: - -```python -NewCommandParser = Command( - "new_command", - MESSAGE_BY_SUBCOMMAND["new_command"], - [ - ... - ArgumentEnum.NEW_ARGUMENT.value, - # Add more arguments as needed for the new command - ], -) -NewCommandParserModel = NewCommandParser.create_model() - -SUPPORTED_COMMANDS = [..., NewCommandParser] -``` - -3. Update the new argument(s) and command in `TopLevelParser` from `looper/command_models/commands.py`. - -- Add a new field for the new command. -- Add a new field for the new argument(s). -- For example: - -```python -class TopLevelParser(pydantic.BaseModel): - - # commands - ... - new_command: Optional[NewCommandParserModel] = pydantic.Field(description=NewCommandParser.description) - - # arguments - ... - new_argument: Optional[new_argument_type] = ArgumentEnum.NEW_ARGUMENT.value.with_reduced_default() -``` - -## Special treatment for the `run` command - -The `run` command in our project requires special treatment to accommodate hierarchical namespaces -and properly handle its unique characteristics. Several functions have been adapted to ensure the -correct behavior of the run command, and similar adaptations may be necessary for other commands. - -For developers looking to understand the details of the special treatment given to the `run` -command and its associated changes, we recommend to inspect the following functions / part of the -code: -- `looper/cli_looper.py`: - - `make_hierarchical_if_needed()` - - assignment of the `divcfg` variable - - assignment of the `project_args` variable - - `_proc_resources_spec()` - - `validate_post_parse()` -- `looper/utils.py`: - - `enrich_args_via_cfg()` - -If you are adding new commands to the project / migrate existing commands to a `pydantic` model-based definition, adapt these parts of the codes with equivalent behavior for your new command. -Likewise, adapt argument accessions in the corresponding executor in `looper/looper.py` to take into account the hierarchical organization of the command's arguments. diff --git a/looper/command_models/README.md b/looper/command_models/README.md index dea00d8bd..dc4eb4a67 100644 --- a/looper/command_models/README.md +++ b/looper/command_models/README.md @@ -1,4 +1,36 @@ -# `pydantic`-based definitions of `looper` commands and their arguments +# pydantic-based definitions of looper commands and their arguments -With the goal of writing an HTTP API that is in sync with the `looper` CLI, this module defines `looper` commands as `pydantic` models and arguments as fields in there. -These can then be used by the [`pydantic-argparse`](https://pydantic-argparse.supimdos.com/) library to create a type-validated CLI (see `../cli_pydantic.py`), and by the future HTTP API for validating `POST`ed JSON data. Eventually, the `pydantic-argparse`-based CLI will replace the existing `argparse`-based CLI defined in `../cli_looper.py`. +This module defines looper commands as pydantic models for use with: +- `pydantic-settings` for CLI parsing (see `../cli_pydantic.py`) +- HTTP API for validating POST data (see `../api/`) + +## Key files + +- `commands.py` - Command definitions and `TopLevelParser` (pydantic-settings entry point) +- `arguments.py` - Argument definitions (`ArgumentEnum`) +- `messages.py` - Subcommand help text + +## Adding a new command + +1. Add arguments to `ArgumentEnum` in `arguments.py`: + ```python + NEW_ARGUMENT = Argument( + name="new_argument", + default=(str, "default_value"), + description="Description", + ) + ``` + +2. Create the command in `commands.py`: + ```python + NewCommandParser = Command("new_command", MESSAGE_BY_SUBCOMMAND["new_command"], [...]) + NewCommandParserModel = NewCommandParser.create_model() + SUPPORTED_COMMANDS.append(NewCommandParser) + ``` + +3. Add to `TopLevelParser`: + ```python + new_command: CliSubCommand[NewCommandParserModel] = Field(description=...) + ``` + +4. Handle the command in `../cli_pydantic.py` `run_looper()`. diff --git a/looper/command_models/__init__.py b/looper/command_models/__init__.py index 46d1c396b..989ba827e 100644 --- a/looper/command_models/__init__.py +++ b/looper/command_models/__init__.py @@ -1,6 +1,5 @@ """ -This package holds `pydantic` models that describe commands and their arguments. +This package holds pydantic models that describe commands and their arguments. -These can be used either by an HTTP API or with the `pydantic-argparse` -library to build a CLI. +These are used by pydantic-settings for CLI parsing and by the HTTP API. """ diff --git a/looper/const.py b/looper/const.py index e36a768de..5b5de37bf 100644 --- a/looper/const.py +++ b/looper/const.py @@ -136,7 +136,6 @@ def _get_apperance_dict(type: str, templ: dict = APPEARANCE_BY_FLAG) -> dict: # Compute-related (for divvy) COMPUTE_SETTINGS_VARNAME = ["DIVCFG"] DEFAULT_COMPUTE_RESOURCES_NAME = "default" -OLD_COMPUTE_KEY = "compute" NEW_COMPUTE_KEY = "compute_packages" DEFAULT_CONFIG_FILEPATH = os.path.join( os.path.dirname(__file__), "default_config", "divvy_config.yaml" diff --git a/looper/utils.py b/looper/utils.py index ecf5b0aba..833566af7 100644 --- a/looper/utils.py +++ b/looper/utils.py @@ -13,7 +13,7 @@ import yaml from pephubclient.constants import RegistryPath from peppy import Project as peppyProject -from peppy.const import AMENDMENTS_KEY, CONFIG_KEY, NAME_KEY, SAMPLE_MODS_KEY +from peppy.const import CONFIG_KEY, NAME_KEY, SAMPLE_MODS_KEY from pydantic import ValidationError from rich.console import Console from rich.pretty import pprint @@ -21,7 +21,6 @@ from yacman import load_yaml from yaml.parser import ParserError -from .command_models.commands import SUPPORTED_COMMANDS from .const import ( ALL_SUBCMD_KEY, CLI_KEY, @@ -35,7 +34,6 @@ PEP_CONFIG_KEY, PIPELINE_INTERFACES_KEY, PIPESTAT_KEY, - POSITIONAL, PROJECT_PL_ARG, SAMPLE_PL_ARG, PipelineLevel, @@ -228,26 +226,6 @@ def get_file_for_project( return fp -def get_file_for_project_old(prj, appendix: str) -> str: - """Create a path to the file for the current project. - - Takes the possibility of amendment being activated at the time. - - Args: - prj (looper.Project): Project object. - appendix (str): The appendix of the file to create the path for, - like 'objs_summary.tsv' for objects summary file. - - Returns: - str: Path to the file. - """ - fp = os.path.join(prj.output_dir, prj[NAME_KEY]) - if hasattr(prj, AMENDMENTS_KEY) and getattr(prj, AMENDMENTS_KEY): - fp += "_" + "_".join(getattr(prj, AMENDMENTS_KEY)) - fp += "_" + appendix - return fp - - def jinja_render_template_strictly(template: str, namespaces: dict) -> str: """Render a command string in the provided namespaces context. @@ -309,7 +287,6 @@ def read_yaml_file(filepath: str) -> dict | None: def enrich_args_via_cfg( subcommand_name: str, parser_args, - aux_parser, test_args: dict | None = None, cli_modifiers: dict | None = None, ) -> argparse.Namespace: @@ -320,8 +297,6 @@ def enrich_args_via_cfg( Args: subcommand_name: The name of the command used. parser_args (argparse.Namespace): Parsed args by the original parser. - aux_parser (argparse.Namespace): Parsed args by the argument parser - with defaults suppressed. test_args (dict): Dict of args used for pytesting. cli_modifiers (dict): Dict of args existing if user supplied cli args in looper config file. @@ -358,109 +333,47 @@ def enrich_args_via_cfg( _LOGGER.debug(msg=f"Merged CLI modifiers: {cfg_args_all}") result = argparse.Namespace() + cli_args = parser_args # Use parser_args directly, already parsed - # Check if we have the old nested structure or new flat structure - # New structure: parser_args has 'command' attribute with subcommand name - # Old structure: parser_args has subcommand as attribute name - is_flat_structure = hasattr(parser_args, "command") - - if is_flat_structure: - # New flat argparse structure - arguments are directly on the namespace - cli_args = parser_args # Use parser_args directly, already parsed - - def set_single_arg(argname, default_source_namespace, result_namespace): - # Priority: CLI > cfg_args_all (PEP config) > parser default - cli_value = getattr(cli_args, argname, None) - cfg_value = cfg_args_all.get(argname) if cfg_args_all else None - default_value = getattr(default_source_namespace, argname, None) - - if cli_value is not None and cli_value != default_value: - # CLI provided a non-default value - use it - r = ( - convert_value(cli_value) - if isinstance(cli_value, str) - else cli_value - ) - elif cfg_value is not None: - # PEP config provided a value + def set_single_arg(argname, default_source_namespace, result_namespace): + # Priority: CLI > cfg_args_all (PEP config) > parser default + cli_value = getattr(cli_args, argname, None) + cfg_value = cfg_args_all.get(argname) if cfg_args_all else None + default_value = getattr(default_source_namespace, argname, None) + + if cli_value is not None and cli_value != default_value: + # CLI provided a non-default value - use it + r = convert_value(cli_value) if isinstance(cli_value, str) else cli_value + elif cfg_value is not None: + # PEP config provided a value + if isinstance(cfg_value, list): + r = [convert_value(i) for i in cfg_value] + elif isinstance(cfg_value, dict): + r = cfg_value + else: + r = convert_value(cfg_value) + else: + # Use default + r = default_value + setattr(result_namespace, argname, r) + + # Copy all arguments from parser_args to result + for argname in vars(parser_args): + set_single_arg(argname, parser_args, result) + + # Also add any cfg_args that weren't in parser_args + if cfg_args_all: + for argname in cfg_args_all: + if not hasattr(result, argname): + cfg_value = cfg_args_all[argname] if isinstance(cfg_value, list): r = [convert_value(i) for i in cfg_value] elif isinstance(cfg_value, dict): r = cfg_value else: r = convert_value(cfg_value) - else: - # Use default - r = default_value - setattr(result_namespace, argname, r) - - # Copy all arguments from parser_args to result - for argname in vars(parser_args): - set_single_arg(argname, parser_args, result) - - # Also add any cfg_args that weren't in parser_args - if cfg_args_all: - for argname in cfg_args_all: - if not hasattr(result, argname): - cfg_value = cfg_args_all[argname] - if isinstance(cfg_value, list): - r = [convert_value(i) for i in cfg_value] - elif isinstance(cfg_value, dict): - r = cfg_value - else: - r = convert_value(cfg_value) - setattr(result, argname, r) - - return result - - # Old nested structure (pydantic-argparse) - kept for backwards compatibility - if test_args: - cli_args, _ = aux_parser.parse_known_args(args=test_args) - else: - cli_args, _ = aux_parser.parse_known_args() + setattr(result, argname, r) - # If any CLI args were provided, make sure they take priority - if cli_args: - r = getattr(cli_args, subcommand_name, None) - if r: - for k, v in cfg_args_all.items(): - if hasattr(r, k): - cfg_args_all[k] = getattr(r, k) - - def set_single_arg(argname, default_source_namespace, result_namespace): - if argname not in POSITIONAL or not hasattr(result, argname): - if hasattr(cli_args, argname): - cli_provided_value = getattr(cli_args, argname) - r = ( - convert_value(cli_provided_value) - if isinstance(cli_provided_value, str) - else cli_provided_value - ) - elif cfg_args_all is not None and argname in cfg_args_all: - if isinstance(cfg_args_all[argname], list): - r = [convert_value(i) for i in cfg_args_all[argname]] - elif isinstance(cfg_args_all[argname], dict): - r = cfg_args_all[argname] - else: - r = convert_value(cfg_args_all[argname]) - else: - r = getattr(default_source_namespace, argname) - setattr(result_namespace, argname, r) - - for top_level_argname in vars(parser_args): - if top_level_argname not in [cmd.name for cmd in SUPPORTED_COMMANDS]: - # this argument is a top-level argument - set_single_arg(top_level_argname, parser_args, result) - else: - # this argument actually is a subcommand - enriched_command_namespace = argparse.Namespace() - command_namespace = getattr(parser_args, top_level_argname) - if command_namespace: - for argname in vars(command_namespace): - set_single_arg( - argname, command_namespace, enriched_command_namespace - ) - setattr(result, top_level_argname, enriched_command_namespace) return result From eaa799d162b5ca88075f85099bab8640e1c00e2d Mon Sep 17 00:00:00 2001 From: nsheff Date: Thu, 12 Feb 2026 10:09:17 -0500 Subject: [PATCH 151/163] clean up json parsing of pydantic parsed cli --- looper/command_models/arguments.py | 24 ++++++++++++------------ looper/command_models/commands.py | 30 +++++++++++++----------------- 2 files changed, 25 insertions(+), 29 deletions(-) diff --git a/looper/command_models/arguments.py b/looper/command_models/arguments.py index 9ab28cf8a..e0c98622f 100644 --- a/looper/command_models/arguments.py +++ b/looper/command_models/arguments.py @@ -156,34 +156,34 @@ class ArgumentEnum(enum.Enum): LUMP = Argument( name="lump", alias="u", - default=(float, None), + default=(float | None, None), description="Total input file size (GB) to batch into one job", ) LUMPN = Argument( name="lump_n", alias="n", - default=(int, None), + default=(int | None, None), description="Number of commands to batch into one job", ) LUMPJ = Argument( name="lump_j", alias="j", - default=(int, None), + default=(int | None, None), description="Lump samples into number of jobs.", ) LIMIT = Argument( - name="limit", alias="l", default=(int, None), description="Limit to n samples" + name="limit", alias="l", default=(int | None, None), description="Limit to n samples" ) SKIP = Argument( name="skip", alias="k", - default=(int, None), + default=(int | None, None), description="Skip samples by numerical index", ) CONFIG = Argument( name="config", alias="c", - default=(str, None), + default=(str | None, None), description="Looper configuration file (YAML)", ) SETTINGS = Argument( @@ -193,19 +193,19 @@ class ArgumentEnum(enum.Enum): ) PEP_CONFIG = Argument( name="pep_config", - default=(str, None), + default=(str | None, None), description="PEP configuration file", ) OUTPUT_DIR = Argument( name="output_dir", alias="o", - default=(str, None), + default=(str | None, None), description="Output directory", ) REPORT_OUTPUT_DIR = Argument( name="report_dir", alias="r", - default=(str, None), + default=(str | None, None), description="Set location for looper report and looper table outputs", ) @@ -267,7 +267,7 @@ class ArgumentEnum(enum.Enum): PACKAGE = Argument( name="package", alias="p", - default=(str, None), + default=(str | None, None), description="Name of computing resource package to use", ) COMPUTE = Argument( @@ -289,7 +289,7 @@ class ArgumentEnum(enum.Enum): ) VERBOSITY = Argument( name="verbosity", - default=(int, None), + default=(int | None, None), description="Alternate mode of expression for logging level that better " "accords with intuition about how to convey this.", ) @@ -302,7 +302,7 @@ class ArgumentEnum(enum.Enum): ) PIPESTAT = Argument( name="pipestat", - default=(str, None), + default=(str | None, None), description="Path to pipestat files.", ) PORTABLE = Argument( diff --git a/looper/command_models/commands.py b/looper/command_models/commands.py index 2d147ad36..2e1021415 100644 --- a/looper/command_models/commands.py +++ b/looper/command_models/commands.py @@ -17,34 +17,31 @@ from .messages import MESSAGE_BY_SUBCOMMAND # Local import, no looper/__init__.py -def _parse_cli_list(v): - """Parse list values from CLI. +def deserialize_cli_list(v): + """Deserialize list values from pydantic-settings CLI parsing. - Handles: - - JSON arrays from pydantic-settings serialization: '["a","b"]' - - Comma-separated strings from user input: 'a,b' - - Single values: 'a' + pydantic-settings internally serializes all list values as JSON strings + (e.g., ["a"] becomes '["a"]') before passing to CliSubCommand models. + Since subcommands are instantiated directly (not through settings sources), + the automatic JSON deserialization doesn't happen. + + This is a pydantic-settings limitation, not a bug in our code. + See: https://github.com/pydantic/pydantic-settings/issues/335 """ + if isinstance(v, list): + return v if isinstance(v, str): - # Try JSON first (pydantic-settings serialization) try: parsed = json.loads(v) if isinstance(parsed, list): return parsed except json.JSONDecodeError: pass - # Fall back to comma-separated - if "," in v: - return [item.strip() for item in v.split(",")] - # Single non-empty value as single-item list - if v.strip(): - return [v.strip()] - return [] + return [x.strip() for x in v.split(",") if x.strip()] return v -# Annotated type for list fields that handles CLI JSON serialization -CliList = Annotated[list, BeforeValidator(_parse_cli_list)] +CliList = Annotated[list, BeforeValidator(deserialize_cli_list)] @dataclass @@ -71,7 +68,6 @@ def create_model(self) -> type[pydantic.BaseModel]: arguments = {} for arg in self.arguments: arg_type, arg_default_value = arg.default - # Use CliList for list fields to handle pydantic-settings JSON serialization if arg_type is list: arg_type = CliList # kebab-case version of the name for --dry-run style From c7b771bdc5fd43491cd55243a3f1a70f35bab17d Mon Sep 17 00:00:00 2001 From: nsheff Date: Thu, 12 Feb 2026 10:10:42 -0500 Subject: [PATCH 152/163] bring back logo --- README.md | 2 +- looper_logo.svg | 130 ++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 131 insertions(+), 1 deletion(-) create mode 100644 looper_logo.svg diff --git a/README.md b/README.md index 631f686c4..9e6bdefd0 100644 --- a/README.md +++ b/README.md @@ -1,4 +1,4 @@ -# looper logo +# looper logo ![Run pytests](https://github.com/pepkit/looper/workflows/Run%20pytests/badge.svg) [![PEP compatible](http://pepkit.github.io/img/PEP-compatible-green.svg)](http://pepkit.github.io) diff --git a/looper_logo.svg b/looper_logo.svg new file mode 100644 index 000000000..d60979f69 --- /dev/null +++ b/looper_logo.svg @@ -0,0 +1,130 @@ + + + + + + + + + + image/svg+xml + + + + + + + + + + + + + + + + + + + + + + + From 1cb2fcdf7bac8419b72b2fc8ba88be3cbe97d684 Mon Sep 17 00:00:00 2001 From: nsheff Date: Thu, 12 Feb 2026 10:25:39 -0500 Subject: [PATCH 153/163] add new stricter looper-pipestat interface --- looper/conductor.py | 15 +- looper/pipeline_interface.py | 43 ++++ .../pipeline_interface_schema_project.yaml | 9 + .../pipeline_interface_schema_sample.yaml | 9 + looper/utils.py | 16 ++ .../pipeline/pipeline_interface1_project.yaml | 1 + .../pipeline/pipeline_interface1_sample.yaml | 1 + .../pipeline/pipeline_interface2_project.yaml | 1 + .../pipeline/pipeline_interface2_sample.yaml | 1 + .../pipestat_pipeline_interface1_sample.yaml | 1 + .../pipestat_pipeline_interface2_sample.yaml | 1 + tests/data/pipeline_interface1_project.yaml | 1 + .../pipeline_interface1_project_pipestat.yaml | 1 + tests/data/pipeline_interface1_sample.yaml | 1 + .../pipeline_interface1_sample_pipestat.yaml | 1 + tests/data/pipeline_interface2_project.yaml | 1 + tests/data/pipeline_interface2_sample.yaml | 1 + tests/test_pipestat_handoff.py | 213 ++++++++++++++++++ 18 files changed, 316 insertions(+), 1 deletion(-) create mode 100644 tests/test_pipestat_handoff.py diff --git a/looper/conductor.py b/looper/conductor.py index 962351bda..0e9759f27 100644 --- a/looper/conductor.py +++ b/looper/conductor.py @@ -48,6 +48,7 @@ expand_nested_var_templates, fetch_sample_flags, jinja_render_template_strictly, + render_inject_env_vars, ) _LOGGER = logging.getLogger(__name__) @@ -834,7 +835,19 @@ def write_script(self, pool: list, size: float) -> str: self._num_good_job_submissions += 1 self._num_total_job_submissions += 1 - looper["command"] = "\n".join(commands) + # Render inject_env_vars and prepend export statements to command + inject_env_vars = self.pl_iface.get("inject_env_vars", {}) + env_exports = [] + if inject_env_vars: + rendered_env_vars = render_inject_env_vars(inject_env_vars, namespaces) + for var_name, var_value in rendered_env_vars.items(): + env_exports.append(f"export {var_name}={shlex.quote(var_value)}") + _LOGGER.debug("Injected env vars:\n{}".format("\n".join(env_exports))) + + # Build final command with env exports prepended + all_lines = env_exports + commands + looper["command"] = "\n".join(all_lines) + if self.collate: _LOGGER.debug("samples namespace:\n{}".format(self.prj.samples)) else: diff --git a/looper/pipeline_interface.py b/looper/pipeline_interface.py index b962da0a7..87b453171 100644 --- a/looper/pipeline_interface.py +++ b/looper/pipeline_interface.py @@ -18,6 +18,7 @@ ID_COLNAME, INPUT_SCHEMA_KEY, LOOPER_KEY, + OUTPUT_SCHEMA_KEY, PIFACE_SCHEMA_SRC, PIPELINE_INTERFACE_PIPELINE_NAME_KEY, RESOURCES_KEY, @@ -65,11 +66,53 @@ def __init__(self, config: str | Mapping, pipeline_type: str | None = None) -> N self.update(config) self._validate(schema_src=PIFACE_SCHEMA_SRC) self._expand_paths(["compute", "dynamic_variables_script_path"]) + self._validate_pipestat_handoff() @property def pipeline_name(self) -> str: return self[PIPELINE_INTERFACE_PIPELINE_NAME_KEY] + def _validate_pipestat_handoff(self) -> None: + """Validate that pipestat-enabled interfaces pass config to pipeline. + + Raises: + PipelineInterfaceConfigError: If output_schema present but no handoff mechanism. + """ + if OUTPUT_SCHEMA_KEY not in self: + return # Not pipestat-enabled, nothing to validate + + if self.get("pipestat_config_required") is False: + return # Explicitly disabled + + # Check for CLI handoff: {pipestat.config_file} or {pipestat.*} in command_template + cmd_template = self.get("command_template", "") + # Also check sample_interface and project_interface sections + sample_iface = self.get("sample_interface", {}) + project_iface = self.get("project_interface", {}) + sample_cmd = sample_iface.get("command_template", "") if sample_iface else "" + project_cmd = project_iface.get("command_template", "") if project_iface else "" + + has_cli_handoff = ( + "{pipestat." in cmd_template + or "{pipestat." in sample_cmd + or "{pipestat." in project_cmd + ) + + # Check for env var handoff: PIPESTAT_CONFIG in inject_env_vars + inject_env_vars = self.get("inject_env_vars", {}) + has_env_handoff = "PIPESTAT_CONFIG" in inject_env_vars + + if not has_cli_handoff and not has_env_handoff: + raise PipelineInterfaceConfigError( + f"Pipeline '{self.pipeline_name}' has output_schema but no pipestat config handoff.\n\n" + f"Add one of:\n" + f" 1. In command_template: --pipestat-config {{pipestat.config_file}}\n" + f" 2. In inject_env_vars:\n" + f" inject_env_vars:\n" + f' PIPESTAT_CONFIG: "{{pipestat.config_file}}"\n\n' + f"Or set 'pipestat_config_required: false' to disable this check." + ) + def render_var_templates(self, namespaces: dict) -> dict: """ Render path templates under 'var_templates' in this pipeline interface. diff --git a/looper/schemas/pipeline_interface_schema_project.yaml b/looper/schemas/pipeline_interface_schema_project.yaml index 294e17aea..7f0a2aaa6 100644 --- a/looper/schemas/pipeline_interface_schema_project.yaml +++ b/looper/schemas/pipeline_interface_schema_project.yaml @@ -45,4 +45,13 @@ properties: singularity_image: type: string description: "Singularity image identifier" + inject_env_vars: + type: object + description: "Environment variables to inject into submission scripts. Keys are variable names, values are Jinja2 templates." + additionalProperties: + type: string + pipestat_config_required: + type: boolean + description: "If false, disables validation that pipestat config is passed to pipeline. Default true." + default: true required: [pipeline_name, pipeline_type, command_template] diff --git a/looper/schemas/pipeline_interface_schema_sample.yaml b/looper/schemas/pipeline_interface_schema_sample.yaml index a69a2ac7a..63b9d8b0c 100644 --- a/looper/schemas/pipeline_interface_schema_sample.yaml +++ b/looper/schemas/pipeline_interface_schema_sample.yaml @@ -45,4 +45,13 @@ properties: singularity_image: type: string description: "Singularity image identifier" + inject_env_vars: + type: object + description: "Environment variables to inject into submission scripts. Keys are variable names, values are Jinja2 templates." + additionalProperties: + type: string + pipestat_config_required: + type: boolean + description: "If false, disables validation that pipestat config is passed to pipeline. Default true." + default: true required: [pipeline_name, pipeline_type, command_template] diff --git a/looper/utils.py b/looper/utils.py index 2b3a43eb8..3d03809a4 100644 --- a/looper/utils.py +++ b/looper/utils.py @@ -290,6 +290,22 @@ def _finfun(x): return rendered +def render_inject_env_vars(inject_env_vars: dict, namespaces: dict) -> dict[str, str]: + """Render inject_env_vars templates to concrete values. + + Args: + inject_env_vars (dict): Mapping of variable names to Jinja2 templates. + namespaces (dict): Namespaces to use for rendering. + + Returns: + dict[str, str]: Rendered environment variable name-value pairs. + """ + rendered = {} + for var_name, template in inject_env_vars.items(): + rendered[var_name] = jinja_render_template_strictly(template, namespaces) + return rendered + + def read_yaml_file(filepath: str) -> dict | None: """Read a YAML file. diff --git a/tests/data/hello_looper-dev/pytesting/advanced_test/pipeline/pipeline_interface1_project.yaml b/tests/data/hello_looper-dev/pytesting/advanced_test/pipeline/pipeline_interface1_project.yaml index 534905cad..43d0077a0 100644 --- a/tests/data/hello_looper-dev/pytesting/advanced_test/pipeline/pipeline_interface1_project.yaml +++ b/tests/data/hello_looper-dev/pytesting/advanced_test/pipeline/pipeline_interface1_project.yaml @@ -1,5 +1,6 @@ pipeline_name: PIPELINE1 output_schema: output_schema.yaml +pipestat_config_required: false var_templates: path: "{looper.piface_dir}/col_pipeline1.py" project_interface: diff --git a/tests/data/hello_looper-dev/pytesting/advanced_test/pipeline/pipeline_interface1_sample.yaml b/tests/data/hello_looper-dev/pytesting/advanced_test/pipeline/pipeline_interface1_sample.yaml index d0d608498..f22c6485c 100644 --- a/tests/data/hello_looper-dev/pytesting/advanced_test/pipeline/pipeline_interface1_sample.yaml +++ b/tests/data/hello_looper-dev/pytesting/advanced_test/pipeline/pipeline_interface1_sample.yaml @@ -1,6 +1,7 @@ pipeline_name: PIPELINE1 input_schema: https://schema.databio.org/pep/2.0.0.yaml output_schema: output_schema.yaml +pipestat_config_required: false var_templates: path: "{looper.piface_dir}/pipeline1.py" pre_submit: diff --git a/tests/data/hello_looper-dev/pytesting/advanced_test/pipeline/pipeline_interface2_project.yaml b/tests/data/hello_looper-dev/pytesting/advanced_test/pipeline/pipeline_interface2_project.yaml index df557d820..5b7893368 100644 --- a/tests/data/hello_looper-dev/pytesting/advanced_test/pipeline/pipeline_interface2_project.yaml +++ b/tests/data/hello_looper-dev/pytesting/advanced_test/pipeline/pipeline_interface2_project.yaml @@ -1,5 +1,6 @@ pipeline_name: OTHER_PIPELINE2 output_schema: output_schema.yaml +pipestat_config_required: false var_templates: path: "{looper.piface_dir}/col_pipeline2.py" project_interface: diff --git a/tests/data/hello_looper-dev/pytesting/advanced_test/pipeline/pipeline_interface2_sample.yaml b/tests/data/hello_looper-dev/pytesting/advanced_test/pipeline/pipeline_interface2_sample.yaml index 0329d33a2..69b0e1f0e 100644 --- a/tests/data/hello_looper-dev/pytesting/advanced_test/pipeline/pipeline_interface2_sample.yaml +++ b/tests/data/hello_looper-dev/pytesting/advanced_test/pipeline/pipeline_interface2_sample.yaml @@ -1,5 +1,6 @@ pipeline_name: OTHER_PIPELINE2 output_schema: output_schema.yaml +pipestat_config_required: false var_templates: path: "{looper.piface_dir}/other_pipeline2.py" pre_submit: diff --git a/tests/data/hello_looper-dev/pytesting/advanced_test/pipeline/pipestat_pipeline_interface1_sample.yaml b/tests/data/hello_looper-dev/pytesting/advanced_test/pipeline/pipestat_pipeline_interface1_sample.yaml index 4bdbab1fc..52d70d962 100644 --- a/tests/data/hello_looper-dev/pytesting/advanced_test/pipeline/pipestat_pipeline_interface1_sample.yaml +++ b/tests/data/hello_looper-dev/pytesting/advanced_test/pipeline/pipestat_pipeline_interface1_sample.yaml @@ -1,6 +1,7 @@ pipeline_name: example_pipestat_pipeline input_schema: https://schema.databio.org/pep/2.0.0.yaml output_schema: pipestat_output_schema.yaml +pipestat_config_required: false var_templates: path: "{looper.piface_dir}/pipeline1.py" pre_submit: diff --git a/tests/data/hello_looper-dev/pytesting/advanced_test/pipeline/pipestat_pipeline_interface2_sample.yaml b/tests/data/hello_looper-dev/pytesting/advanced_test/pipeline/pipestat_pipeline_interface2_sample.yaml index 3fa6829c5..5f5f5cab7 100644 --- a/tests/data/hello_looper-dev/pytesting/advanced_test/pipeline/pipestat_pipeline_interface2_sample.yaml +++ b/tests/data/hello_looper-dev/pytesting/advanced_test/pipeline/pipestat_pipeline_interface2_sample.yaml @@ -1,6 +1,7 @@ pipeline_name: example_pipestat_pipeline input_schema: https://schema.databio.org/pep/2.0.0.yaml output_schema: pipestat_output_schema.yaml +pipestat_config_required: false var_templates: path: "{looper.piface_dir}/other_pipeline2.py" pre_submit: diff --git a/tests/data/pipeline_interface1_project.yaml b/tests/data/pipeline_interface1_project.yaml index cddc14b76..d3467997f 100644 --- a/tests/data/pipeline_interface1_project.yaml +++ b/tests/data/pipeline_interface1_project.yaml @@ -1,6 +1,7 @@ pipeline_name: PIPELINE1 pipeline_type: project output_schema: output_schema.yaml +pipestat_config_required: false var_templates: path: "{looper.piface_dir}/pipelines/col_pipeline1.py" command_template: > diff --git a/tests/data/pipeline_interface1_project_pipestat.yaml b/tests/data/pipeline_interface1_project_pipestat.yaml index fc341ac2d..51f3859a9 100644 --- a/tests/data/pipeline_interface1_project_pipestat.yaml +++ b/tests/data/pipeline_interface1_project_pipestat.yaml @@ -1,6 +1,7 @@ pipeline_name: PIPELINE1 pipeline_type: project output_schema: pipestat_output_schema.yaml +pipestat_config_required: false var_templates: path: "{looper.piface_dir}/pipelines/col_pipeline1.py" command_template: > diff --git a/tests/data/pipeline_interface1_sample.yaml b/tests/data/pipeline_interface1_sample.yaml index 43638d923..a7baea76d 100644 --- a/tests/data/pipeline_interface1_sample.yaml +++ b/tests/data/pipeline_interface1_sample.yaml @@ -2,6 +2,7 @@ pipeline_name: PIPELINE1 pipeline_type: sample input_schema: https://schema.databio.org/pep/2.0.0.yaml output_schema: output_schema.yaml +pipestat_config_required: false var_templates: path: "{looper.piface_dir}/pipelines/pipeline1.py" pre_submit: diff --git a/tests/data/pipeline_interface1_sample_pipestat.yaml b/tests/data/pipeline_interface1_sample_pipestat.yaml index d4e5418a2..67b12ec88 100644 --- a/tests/data/pipeline_interface1_sample_pipestat.yaml +++ b/tests/data/pipeline_interface1_sample_pipestat.yaml @@ -2,6 +2,7 @@ pipeline_name: PIPELINE1 pipeline_type: sample input_schema: https://schema.databio.org/pep/2.0.0.yaml output_schema: pipestat_output_schema.yaml +pipestat_config_required: false var_templates: path: "{looper.piface_dir}/pipelines/pipeline1.py" pre_submit: diff --git a/tests/data/pipeline_interface2_project.yaml b/tests/data/pipeline_interface2_project.yaml index 7c4a42238..cb1849b7d 100644 --- a/tests/data/pipeline_interface2_project.yaml +++ b/tests/data/pipeline_interface2_project.yaml @@ -1,6 +1,7 @@ pipeline_name: OTHER_PIPELINE2 pipeline_type: project output_schema: output_schema.yaml +pipestat_config_required: false var_templates: path: "{looper.piface_dir}/pipelines/col_pipeline2.py" command_template: > diff --git a/tests/data/pipeline_interface2_sample.yaml b/tests/data/pipeline_interface2_sample.yaml index 987f7873d..fbe8cf32d 100644 --- a/tests/data/pipeline_interface2_sample.yaml +++ b/tests/data/pipeline_interface2_sample.yaml @@ -1,6 +1,7 @@ pipeline_name: OTHER_PIPELINE2 pipeline_type: sample output_schema: output_schema.yaml +pipestat_config_required: false var_templates: path: "{looper.piface_dir}/pipelines/other_pipeline2.py" pre_submit: diff --git a/tests/test_pipestat_handoff.py b/tests/test_pipestat_handoff.py new file mode 100644 index 000000000..db9da3c15 --- /dev/null +++ b/tests/test_pipestat_handoff.py @@ -0,0 +1,213 @@ +"""Tests for pipestat config handoff validation.""" + +import pytest + +from looper.exceptions import PipelineInterfaceConfigError +from looper.pipeline_interface import PipelineInterface + + +class TestPipestatHandoffValidation: + """Tests for pipestat config handoff validation in PipelineInterface.""" + + def test_cli_handoff_with_config_file(self, tmp_path): + """Interface with {pipestat.config_file} in command_template passes validation.""" + piface_content = """ +pipeline_name: test_pipeline +pipeline_type: sample +output_schema: schema.yaml +command_template: > + python pipeline.py --pipestat-config {pipestat.config_file} +""" + piface_path = tmp_path / "piface.yaml" + piface_path.write_text(piface_content) + + # Should not raise + pi = PipelineInterface(str(piface_path)) + assert pi.pipeline_name == "test_pipeline" + + def test_cli_handoff_with_other_pipestat_var(self, tmp_path): + """Interface with any {pipestat.*} in command_template passes validation.""" + piface_content = """ +pipeline_name: test_pipeline +pipeline_type: sample +output_schema: schema.yaml +command_template: > + python pipeline.py {pipestat.results_file} {pipestat.output_schema} +""" + piface_path = tmp_path / "piface.yaml" + piface_path.write_text(piface_content) + + # Should not raise + pi = PipelineInterface(str(piface_path)) + assert pi.pipeline_name == "test_pipeline" + + def test_cli_handoff_in_sample_interface(self, tmp_path): + """Interface with {pipestat.*} in sample_interface.command_template passes.""" + piface_content = """ +pipeline_name: test_pipeline +output_schema: schema.yaml +sample_interface: + pipeline_type: sample + command_template: > + python pipeline.py --config {pipestat.config_file} +""" + piface_path = tmp_path / "piface.yaml" + piface_path.write_text(piface_content) + + # Should not raise + pi = PipelineInterface(str(piface_path)) + assert pi.pipeline_name == "test_pipeline" + + def test_cli_handoff_in_project_interface(self, tmp_path): + """Interface with {pipestat.*} in project_interface.command_template passes.""" + piface_content = """ +pipeline_name: test_pipeline +output_schema: schema.yaml +project_interface: + pipeline_type: project + command_template: > + python pipeline.py --config {pipestat.config_file} +""" + piface_path = tmp_path / "piface.yaml" + piface_path.write_text(piface_content) + + # Should not raise + pi = PipelineInterface(str(piface_path)) + assert pi.pipeline_name == "test_pipeline" + + def test_env_var_handoff(self, tmp_path): + """Interface with PIPESTAT_CONFIG in inject_env_vars passes validation.""" + piface_content = """ +pipeline_name: test_pipeline +pipeline_type: sample +output_schema: schema.yaml +inject_env_vars: + PIPESTAT_CONFIG: "{pipestat.config_file}" +command_template: > + python pipeline.py +""" + piface_path = tmp_path / "piface.yaml" + piface_path.write_text(piface_content) + + # Should not raise + pi = PipelineInterface(str(piface_path)) + assert pi.pipeline_name == "test_pipeline" + + def test_missing_handoff_raises_error(self, tmp_path): + """Interface with output_schema but no handoff mechanism raises error.""" + piface_content = """ +pipeline_name: test_pipeline +pipeline_type: sample +output_schema: schema.yaml +command_template: > + python pipeline.py --no-pipestat-handoff +""" + piface_path = tmp_path / "piface.yaml" + piface_path.write_text(piface_content) + + with pytest.raises(PipelineInterfaceConfigError) as exc_info: + PipelineInterface(str(piface_path)) + + error_msg = str(exc_info.value) + assert "test_pipeline" in error_msg + assert "output_schema" in error_msg + assert "pipestat" in error_msg.lower() + + def test_no_output_schema_skips_validation(self, tmp_path): + """Interface without output_schema skips pipestat validation entirely.""" + piface_content = """ +pipeline_name: test_pipeline +pipeline_type: sample +command_template: > + python pipeline.py --regular-pipeline +""" + piface_path = tmp_path / "piface.yaml" + piface_path.write_text(piface_content) + + # Should not raise - no pipestat, no validation + pi = PipelineInterface(str(piface_path)) + assert pi.pipeline_name == "test_pipeline" + + def test_pipestat_config_required_false_skips_validation(self, tmp_path): + """Setting pipestat_config_required: false disables validation.""" + piface_content = """ +pipeline_name: test_pipeline +pipeline_type: sample +output_schema: schema.yaml +pipestat_config_required: false +command_template: > + python pipeline.py --custom-pipestat-handling +""" + piface_path = tmp_path / "piface.yaml" + piface_path.write_text(piface_content) + + # Should not raise due to pipestat_config_required: false + pi = PipelineInterface(str(piface_path)) + assert pi.pipeline_name == "test_pipeline" + + def test_error_message_includes_guidance(self, tmp_path): + """Error message includes clear guidance on how to fix the issue.""" + piface_content = """ +pipeline_name: my_pipeline +pipeline_type: sample +output_schema: schema.yaml +command_template: > + python pipeline.py +""" + piface_path = tmp_path / "piface.yaml" + piface_path.write_text(piface_content) + + with pytest.raises(PipelineInterfaceConfigError) as exc_info: + PipelineInterface(str(piface_path)) + + error_msg = str(exc_info.value) + # Should mention both options + assert "command_template" in error_msg + assert "inject_env_vars" in error_msg + assert "PIPESTAT_CONFIG" in error_msg + # Should mention override option + assert "pipestat_config_required: false" in error_msg + + +class TestInjectEnvVars: + """Tests for inject_env_vars rendering in submission scripts.""" + + def test_inject_env_vars_renders_templates(self, tmp_path): + """inject_env_vars templates are rendered with namespaces.""" + from looper.utils import render_inject_env_vars + + inject_env_vars = { + "PIPESTAT_CONFIG": "{pipestat.config_file}", + "OUTPUT_DIR": "{looper.output_dir}", + } + namespaces = { + "pipestat": {"config_file": "/path/to/pipestat_config.yaml"}, + "looper": {"output_dir": "/path/to/output"}, + } + + result = render_inject_env_vars(inject_env_vars, namespaces) + + assert result["PIPESTAT_CONFIG"] == "/path/to/pipestat_config.yaml" + assert result["OUTPUT_DIR"] == "/path/to/output" + + def test_inject_env_vars_schema_valid(self, tmp_path): + """inject_env_vars passes schema validation.""" + piface_content = """ +pipeline_name: test_pipeline +pipeline_type: sample +output_schema: schema.yaml +inject_env_vars: + PIPESTAT_CONFIG: "{pipestat.config_file}" + CUSTOM_VAR: "static_value" + DYNAMIC_VAR: "{looper.output_dir}/subdir" +command_template: > + python pipeline.py +""" + piface_path = tmp_path / "piface.yaml" + piface_path.write_text(piface_content) + + # Should pass schema validation + pi = PipelineInterface(str(piface_path)) + assert pi.get("inject_env_vars") is not None + assert pi["inject_env_vars"]["PIPESTAT_CONFIG"] == "{pipestat.config_file}" + assert pi["inject_env_vars"]["CUSTOM_VAR"] == "static_value" From 1aa22941757cf5cddde69b1e50ed3a2969b6b8f4 Mon Sep 17 00:00:00 2001 From: nsheff Date: Thu, 12 Feb 2026 10:31:25 -0500 Subject: [PATCH 154/163] simplify tests --- .../advanced_test/pipeline/pipeline_interface1_project.yaml | 2 -- .../advanced_test/pipeline/pipeline_interface1_sample.yaml | 2 -- .../advanced_test/pipeline/pipeline_interface2_project.yaml | 2 -- .../advanced_test/pipeline/pipeline_interface2_sample.yaml | 2 -- .../pipeline/pipestat_pipeline_interface1_sample.yaml | 2 -- .../pipeline/pipestat_pipeline_interface2_sample.yaml | 2 -- tests/data/pipeline_interface1_project.yaml | 2 -- tests/data/pipeline_interface1_project_pipestat.yaml | 2 -- tests/data/pipeline_interface1_sample.yaml | 2 -- tests/data/pipeline_interface1_sample_pipestat.yaml | 2 -- tests/data/pipeline_interface2_project.yaml | 2 -- tests/data/pipeline_interface2_sample.yaml | 2 -- 12 files changed, 24 deletions(-) diff --git a/tests/data/hello_looper-dev/pytesting/advanced_test/pipeline/pipeline_interface1_project.yaml b/tests/data/hello_looper-dev/pytesting/advanced_test/pipeline/pipeline_interface1_project.yaml index 43d0077a0..711d7b70f 100644 --- a/tests/data/hello_looper-dev/pytesting/advanced_test/pipeline/pipeline_interface1_project.yaml +++ b/tests/data/hello_looper-dev/pytesting/advanced_test/pipeline/pipeline_interface1_project.yaml @@ -1,6 +1,4 @@ pipeline_name: PIPELINE1 -output_schema: output_schema.yaml -pipestat_config_required: false var_templates: path: "{looper.piface_dir}/col_pipeline1.py" project_interface: diff --git a/tests/data/hello_looper-dev/pytesting/advanced_test/pipeline/pipeline_interface1_sample.yaml b/tests/data/hello_looper-dev/pytesting/advanced_test/pipeline/pipeline_interface1_sample.yaml index f22c6485c..e47f597fd 100644 --- a/tests/data/hello_looper-dev/pytesting/advanced_test/pipeline/pipeline_interface1_sample.yaml +++ b/tests/data/hello_looper-dev/pytesting/advanced_test/pipeline/pipeline_interface1_sample.yaml @@ -1,7 +1,5 @@ pipeline_name: PIPELINE1 input_schema: https://schema.databio.org/pep/2.0.0.yaml -output_schema: output_schema.yaml -pipestat_config_required: false var_templates: path: "{looper.piface_dir}/pipeline1.py" pre_submit: diff --git a/tests/data/hello_looper-dev/pytesting/advanced_test/pipeline/pipeline_interface2_project.yaml b/tests/data/hello_looper-dev/pytesting/advanced_test/pipeline/pipeline_interface2_project.yaml index 5b7893368..c3600c3c2 100644 --- a/tests/data/hello_looper-dev/pytesting/advanced_test/pipeline/pipeline_interface2_project.yaml +++ b/tests/data/hello_looper-dev/pytesting/advanced_test/pipeline/pipeline_interface2_project.yaml @@ -1,6 +1,4 @@ pipeline_name: OTHER_PIPELINE2 -output_schema: output_schema.yaml -pipestat_config_required: false var_templates: path: "{looper.piface_dir}/col_pipeline2.py" project_interface: diff --git a/tests/data/hello_looper-dev/pytesting/advanced_test/pipeline/pipeline_interface2_sample.yaml b/tests/data/hello_looper-dev/pytesting/advanced_test/pipeline/pipeline_interface2_sample.yaml index 69b0e1f0e..6689e76a0 100644 --- a/tests/data/hello_looper-dev/pytesting/advanced_test/pipeline/pipeline_interface2_sample.yaml +++ b/tests/data/hello_looper-dev/pytesting/advanced_test/pipeline/pipeline_interface2_sample.yaml @@ -1,6 +1,4 @@ pipeline_name: OTHER_PIPELINE2 -output_schema: output_schema.yaml -pipestat_config_required: false var_templates: path: "{looper.piface_dir}/other_pipeline2.py" pre_submit: diff --git a/tests/data/hello_looper-dev/pytesting/advanced_test/pipeline/pipestat_pipeline_interface1_sample.yaml b/tests/data/hello_looper-dev/pytesting/advanced_test/pipeline/pipestat_pipeline_interface1_sample.yaml index 52d70d962..183ecf191 100644 --- a/tests/data/hello_looper-dev/pytesting/advanced_test/pipeline/pipestat_pipeline_interface1_sample.yaml +++ b/tests/data/hello_looper-dev/pytesting/advanced_test/pipeline/pipestat_pipeline_interface1_sample.yaml @@ -1,7 +1,5 @@ pipeline_name: example_pipestat_pipeline input_schema: https://schema.databio.org/pep/2.0.0.yaml -output_schema: pipestat_output_schema.yaml -pipestat_config_required: false var_templates: path: "{looper.piface_dir}/pipeline1.py" pre_submit: diff --git a/tests/data/hello_looper-dev/pytesting/advanced_test/pipeline/pipestat_pipeline_interface2_sample.yaml b/tests/data/hello_looper-dev/pytesting/advanced_test/pipeline/pipestat_pipeline_interface2_sample.yaml index 5f5f5cab7..8bcf67a3f 100644 --- a/tests/data/hello_looper-dev/pytesting/advanced_test/pipeline/pipestat_pipeline_interface2_sample.yaml +++ b/tests/data/hello_looper-dev/pytesting/advanced_test/pipeline/pipestat_pipeline_interface2_sample.yaml @@ -1,7 +1,5 @@ pipeline_name: example_pipestat_pipeline input_schema: https://schema.databio.org/pep/2.0.0.yaml -output_schema: pipestat_output_schema.yaml -pipestat_config_required: false var_templates: path: "{looper.piface_dir}/other_pipeline2.py" pre_submit: diff --git a/tests/data/pipeline_interface1_project.yaml b/tests/data/pipeline_interface1_project.yaml index d3467997f..2861c20d5 100644 --- a/tests/data/pipeline_interface1_project.yaml +++ b/tests/data/pipeline_interface1_project.yaml @@ -1,7 +1,5 @@ pipeline_name: PIPELINE1 pipeline_type: project -output_schema: output_schema.yaml -pipestat_config_required: false var_templates: path: "{looper.piface_dir}/pipelines/col_pipeline1.py" command_template: > diff --git a/tests/data/pipeline_interface1_project_pipestat.yaml b/tests/data/pipeline_interface1_project_pipestat.yaml index 51f3859a9..2861c20d5 100644 --- a/tests/data/pipeline_interface1_project_pipestat.yaml +++ b/tests/data/pipeline_interface1_project_pipestat.yaml @@ -1,7 +1,5 @@ pipeline_name: PIPELINE1 pipeline_type: project -output_schema: pipestat_output_schema.yaml -pipestat_config_required: false var_templates: path: "{looper.piface_dir}/pipelines/col_pipeline1.py" command_template: > diff --git a/tests/data/pipeline_interface1_sample.yaml b/tests/data/pipeline_interface1_sample.yaml index a7baea76d..f455d8171 100644 --- a/tests/data/pipeline_interface1_sample.yaml +++ b/tests/data/pipeline_interface1_sample.yaml @@ -1,8 +1,6 @@ pipeline_name: PIPELINE1 pipeline_type: sample input_schema: https://schema.databio.org/pep/2.0.0.yaml -output_schema: output_schema.yaml -pipestat_config_required: false var_templates: path: "{looper.piface_dir}/pipelines/pipeline1.py" pre_submit: diff --git a/tests/data/pipeline_interface1_sample_pipestat.yaml b/tests/data/pipeline_interface1_sample_pipestat.yaml index 67b12ec88..f455d8171 100644 --- a/tests/data/pipeline_interface1_sample_pipestat.yaml +++ b/tests/data/pipeline_interface1_sample_pipestat.yaml @@ -1,8 +1,6 @@ pipeline_name: PIPELINE1 pipeline_type: sample input_schema: https://schema.databio.org/pep/2.0.0.yaml -output_schema: pipestat_output_schema.yaml -pipestat_config_required: false var_templates: path: "{looper.piface_dir}/pipelines/pipeline1.py" pre_submit: diff --git a/tests/data/pipeline_interface2_project.yaml b/tests/data/pipeline_interface2_project.yaml index cb1849b7d..d589db752 100644 --- a/tests/data/pipeline_interface2_project.yaml +++ b/tests/data/pipeline_interface2_project.yaml @@ -1,7 +1,5 @@ pipeline_name: OTHER_PIPELINE2 pipeline_type: project -output_schema: output_schema.yaml -pipestat_config_required: false var_templates: path: "{looper.piface_dir}/pipelines/col_pipeline2.py" command_template: > diff --git a/tests/data/pipeline_interface2_sample.yaml b/tests/data/pipeline_interface2_sample.yaml index fbe8cf32d..094214722 100644 --- a/tests/data/pipeline_interface2_sample.yaml +++ b/tests/data/pipeline_interface2_sample.yaml @@ -1,7 +1,5 @@ pipeline_name: OTHER_PIPELINE2 pipeline_type: sample -output_schema: output_schema.yaml -pipestat_config_required: false var_templates: path: "{looper.piface_dir}/pipelines/other_pipeline2.py" pre_submit: From c663224393c3ac177ec11c89b52b240a1b4b8b1b Mon Sep 17 00:00:00 2001 From: nsheff Date: Thu, 12 Feb 2026 12:37:28 -0500 Subject: [PATCH 155/163] format --- looper/command_models/arguments.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/looper/command_models/arguments.py b/looper/command_models/arguments.py index e0c98622f..360cea32c 100644 --- a/looper/command_models/arguments.py +++ b/looper/command_models/arguments.py @@ -172,7 +172,10 @@ class ArgumentEnum(enum.Enum): description="Lump samples into number of jobs.", ) LIMIT = Argument( - name="limit", alias="l", default=(int | None, None), description="Limit to n samples" + name="limit", + alias="l", + default=(int | None, None), + description="Limit to n samples", ) SKIP = Argument( name="skip", From 0989370dd283bfb4ad0f397a2ad5049037ed4e15 Mon Sep 17 00:00:00 2001 From: nsheff Date: Fri, 13 Feb 2026 16:36:31 -0500 Subject: [PATCH 156/163] Separate fast unit tests from slow CLI integration tests. version bump to 2.1.0 --- .github/workflows/run-pytest.yml | 4 +- pyproject.toml | 8 +- pytest.ini | 6 - tests/conftest.py | 322 +----------------- tests/integration/__init__.py | 1 + tests/integration/conftest.py | 241 +++++++++++++ tests/{ => integration}/test_clean.py | 0 .../test_cli_commands.py} | 5 +- .../test_cli_run.py} | 42 +-- tests/{ => integration}/test_cli_startup.py | 5 + .../test_cli_validation.py | 2 +- tests/{ => integration}/test_comprehensive.py | 5 +- tests/scripts/test-integration.sh | 15 + tests/smoketests/.looper.yaml | 5 - tests/smoketests/__init__.py | 0 tests/unit/__init__.py | 1 + tests/unit/conftest.py | 24 ++ tests/{ => unit}/test_desired_sample_range.py | 0 tests/{ => unit}/test_natural_range.py | 0 .../test_pipeline_interface.py} | 0 20 files changed, 335 insertions(+), 351 deletions(-) delete mode 100644 pytest.ini create mode 100644 tests/integration/__init__.py create mode 100644 tests/integration/conftest.py rename tests/{ => integration}/test_clean.py (100%) rename tests/{smoketests/test_other.py => integration/test_cli_commands.py} (99%) rename tests/{smoketests/test_run.py => integration/test_cli_run.py} (96%) rename tests/{ => integration}/test_cli_startup.py (87%) rename tests/{smoketests => integration}/test_cli_validation.py (96%) rename tests/{ => integration}/test_comprehensive.py (98%) create mode 100755 tests/scripts/test-integration.sh delete mode 100644 tests/smoketests/.looper.yaml delete mode 100644 tests/smoketests/__init__.py create mode 100644 tests/unit/__init__.py create mode 100644 tests/unit/conftest.py rename tests/{ => unit}/test_desired_sample_range.py (100%) rename tests/{ => unit}/test_natural_range.py (100%) rename tests/{test_pipestat_handoff.py => unit/test_pipeline_interface.py} (100%) diff --git a/.github/workflows/run-pytest.yml b/.github/workflows/run-pytest.yml index d497a22a7..8fd3c126a 100644 --- a/.github/workflows/run-pytest.yml +++ b/.github/workflows/run-pytest.yml @@ -1,10 +1,8 @@ name: Run pytests on: - push: - branches: [master, dev] pull_request: - branches: [master] + branches: [master, dev] jobs: pytest: diff --git a/pyproject.toml b/pyproject.toml index 9868cc0bc..26e42c4df 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [project] name = "looper" -version = "2.0.3" +version = "2.1.0" description = "A pipeline submission engine that parses sample inputs and submits pipelines for each sample." readme = "README.md" license = "BSD-2-Clause" @@ -71,7 +71,11 @@ test = [ [tool.pytest.ini_options] addopts = "-rfE" -testpaths = ["tests"] +testpaths = ["tests/unit", "tests/divvytests"] # Fast tests only by default +# Integration tests: RUN_INTEGRATION_TESTS=true pytest tests/integration +python_files = ["test_*.py"] +python_classes = ["Test*", "*Test", "*Tests", "*Tester"] +python_functions = ["test_*"] [tool.ruff] line-length = 88 diff --git a/pytest.ini b/pytest.ini deleted file mode 100644 index fe4c5cc58..000000000 --- a/pytest.ini +++ /dev/null @@ -1,6 +0,0 @@ -[pytest] -; Test discovery process, matching tests directory -; Also restrict test discovery to patterned modules, classes, and functions. -python_files = test_*.py -python_classes = Test* *Test *Tests *Tester -python_functions = test_* test[A-Z]* diff --git a/tests/conftest.py b/tests/conftest.py index 47e3cc20e..dcba16693 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -1,313 +1,21 @@ -import os -import shutil -import subprocess -import tempfile -from contextlib import contextmanager -from shutil import copyfile -from typing import * - -import peppy -import pytest -from peppy.const import * -from yaml import dump, safe_load - -from looper.const import * - -REPO_URL = "https://github.com/pepkit/hello_looper.git" - -CFG = "project_config.yaml" -PIPESTAT_CONFIG = "global_pipestat_config.yaml" -PROJECT_CFG_PIPESTAT = "project_config_pipestat.yaml" -LOOPER_CFG = "looper_config_pipestat.yaml" -PIPESTAT_OS = "pipestat_output_schema.yaml" -PIPESTAT_PI = "pipeline_interface1_sample_pipestat.yaml" -PIPESTAT_PI_PRJ = "pipeline_interface1_project_pipestat.yaml" -ST = "annotation_sheet.csv" -PIP = "pipeline_interface{}_project.yaml" -PIS = "pipeline_interface{}_sample.yaml" -OS = "output_schema.yaml" -RES = "resources-{}.tsv" - - -@pytest.fixture(scope="function") -def dotfile_path(): - path = os.path.join(os.getcwd(), LOOPER_DOTFILE_NAME) - yield path - if os.path.isfile(path): - os.remove(path) - - -def get_outdir(pth): - """ - Get output directory from a config file - - :param str pth: - :return str: output directory - """ - with open(pth, "r") as conf_file: - config_data = safe_load(conf_file) - - output_path = config_data[OUTDIR_KEY] - dirname = os.path.dirname(pth) - - return os.path.join(dirname, output_path) - - -def get_project_config_path(looper_config_pth): - """ - Get project config file path from a looper config file path, since they are relative - - :param str pth: - :return str: output directory - """ - dirname = os.path.dirname(looper_config_pth) - - return os.path.join(dirname, "project/project_config.yaml") - - -def _assert_content_in_files(fs: Union[str, Iterable[str]], query: str, negate: bool): - if isinstance(fs, str): - fs = [fs] - check = (lambda doc: query not in doc) if negate else (lambda doc: query in doc) - for f in fs: - with open(f, "r") as fh: - contents = fh.read() - assert check(contents) - - -def assert_content_in_all_files(fs: Union[str, Iterable[str]], query: str): - """ - Verify that string is in files content. - - :param str | Iterable[str] fs: list of files - :param str query: string to look for - """ - _assert_content_in_files(fs, query, negate=False) +"""Root test configuration. +Test organization: +- tests/unit/ - Fast unit tests with no file I/O +- tests/integration/ - CLI integration tests (set RUN_INTEGRATION_TESTS=true to run) +- tests/divvytests/ - Divvy compute configuration tests -def assert_content_not_in_any_files(fs: Union[str, Iterable[str]], query: str): - """ - Verify that string is not in files' content. - - :param str | Iterable[str] fs: list of files - :param str query: string to look for - """ - _assert_content_in_files(fs, query, negate=True) - - -def print_standard_stream(text: Union[str, bytes]) -> None: - if isinstance(text, bytes): - text = text.decode("utf-8") - if not isinstance(text, str): - raise TypeError(f"Stream to print is neither str nor bytes, but {type(text)}") - for line in text.split("\n"): - print(line) - - -def subp_exec( - pth=None, cmd=None, appendix=list(), dry=True -) -> Tuple[bytes, bytes, int]: - """ - - :param str pth: config path - :param str cmd: looper subcommand - :param Iterable[str] appendix: other args to pass to the cmd - :param bool dry: whether to append dry run flag - :return stdout, stderr, and return code - """ - x = ["looper", cmd, "-d" if dry else ""] - if pth: - x.append(pth) - x.extend(appendix) - proc = subprocess.Popen(x, stderr=subprocess.PIPE, stdout=subprocess.PIPE) - stdout, stderr = proc.communicate() - return stdout, stderr, proc.returncode - - -def test_args_expansion(pth=None, cmd=None, appendix=list(), dry=True) -> List[str]: - """ - This function takes a path, command, extra argument list and creates a list of - strings to pass to looper.main() as test_args. - - :param str pth: config path - :param str cmd: looper subcommand - :param Iterable[str] appendix: other args to pass to the cmd - :param bool dry: whether to append dry run flag - :return list of strings to pass to looper.main for testing - """ - # --config .looper.yaml run --dry-run - x = [] - if cmd: - x.append(cmd) - if pth: - x.append("--config") - x.append(pth) - if dry: - x.append("--dry-run") - x.extend(appendix) - return x +Run commands: +- pytest tests/unit tests/divvytests # Fast tests (default) +- RUN_INTEGRATION_TESTS=true pytest tests/integration # Integration tests +- ./tests/scripts/test-integration.sh # Integration tests via script +""" +import pytest -def verify_filecount_in_dir(dirpath, pattern, count): - """ - Check if the expected number of files matching specified pattern - exist in a directory - :param str dirpath: path to the directory to investigate - :param str pattern: string pattern, used in str.endswith - :param int count: expected number of files - :raise IOError: when the number of files does not meet the expectations - """ - assert os.path.isdir(dirpath) - subm_err = IOError( - f"Expected {count} files mathing '{pattern}' pattern in " - f"'{dirpath}'. Listdir: \n{os.listdir(dirpath)}" +# Register custom markers +def pytest_configure(config): + config.addinivalue_line( + "markers", "integration: marks tests as integration tests (skipped by default)" ) - assert sum([f.endswith(pattern) for f in os.listdir(dirpath)]) == count, subm_err - - -@contextmanager -def mod_yaml_data(path): - """ - Context manager used to modify YAML formatted data - - :param str path: path to the file to modify - """ - # TODO: use everywhere - with open(path, "r") as f: - yaml_data = safe_load(f) - print(f"\nInitial YAML data: \n{yaml_data}\n") - yield yaml_data - print(f"\nModified YAML data: \n{yaml_data}\n") - with open(path, "w") as f: - dump(yaml_data, f) - - -@pytest.fixture -def example_pep_piface_path(): - return os.path.join(os.path.dirname(os.path.abspath(__file__)), "data") - - -@pytest.fixture -def example_pep_piface_path_cfg(example_pep_piface_path): - return os.path.join(example_pep_piface_path, CFG) - - -@pytest.fixture -def prep_temp_pep(example_pep_piface_path): - - # Get Path to local copy of hello_looper - hello_looper_dir_path = os.path.join(example_pep_piface_path, "hello_looper-dev") - - # Make local temp copy of hello_looper - d = tempfile.mkdtemp() - shutil.copytree(hello_looper_dir_path, d, dirs_exist_ok=True) - - advanced_dir = os.path.join(d, "pytesting/advanced_test") - path_to_looper_config = os.path.join(advanced_dir, ".looper.yaml") - - return path_to_looper_config - - -@pytest.fixture -def prep_temp_pep_basic(example_pep_piface_path): - - # Get Path to local copy of hello_looper - hello_looper_dir_path = os.path.join(example_pep_piface_path, "hello_looper-dev") - - # Make local temp copy of hello_looper - d = tempfile.mkdtemp() - shutil.copytree(hello_looper_dir_path, d, dirs_exist_ok=True) - - advanced_dir = os.path.join(d, "pytesting/intermediate_test") - path_to_looper_config = os.path.join(advanced_dir, ".looper.yaml") - - return path_to_looper_config - - -@pytest.fixture -def prep_temp_pep_csv(example_pep_piface_path): - - # Get Path to local copy of hello_looper - hello_looper_dir_path = os.path.join(example_pep_piface_path, "hello_looper-dev") - - # Make local temp copy of hello_looper - d = tempfile.mkdtemp() - shutil.copytree(hello_looper_dir_path, d, dirs_exist_ok=True) - - advanced_dir = os.path.join(d, "looper_csv_example") - path_to_looper_config = os.path.join(advanced_dir, ".looper.yaml") - - return path_to_looper_config - - -@pytest.fixture -def prep_temp_config_with_pep(example_pep_piface_path): - # temp dir - td = tempfile.mkdtemp() - out_td = os.path.join(td, "output") - # ori paths - cfg_path = os.path.join(example_pep_piface_path, CFG) - sample_table_path = os.path.join(example_pep_piface_path, ST) - piface1s_path = os.path.join(example_pep_piface_path, PIS.format("1")) - temp_path_cfg = os.path.join(td, CFG) - temp_path_sample_table = os.path.join(td, ST) - temp_path_piface1s = os.path.join(td, PIS.format("1")) - - # copying - copyfile(cfg_path, temp_path_cfg) - copyfile(sample_table_path, temp_path_sample_table) - copyfile(piface1s_path, temp_path_piface1s) - - return peppy.Project(temp_path_cfg).to_dict(extended=True), temp_path_piface1s - - -@pytest.fixture -def prep_temp_pep_pipestat(example_pep_piface_path): - - # Get Path to local copy of hello_looper - - hello_looper_dir_path = os.path.join(example_pep_piface_path, "hello_looper-dev") - - # Make local temp copy of hello_looper - d = tempfile.mkdtemp() - shutil.copytree(hello_looper_dir_path, d, dirs_exist_ok=True) - - advanced_dir = os.path.join(d, "pytesting/pipestat_test") - path_to_looper_config = os.path.join(advanced_dir, ".looper.yaml") - - return path_to_looper_config - - -@pytest.fixture -def prep_temp_pep_pipestat_advanced(example_pep_piface_path): - - # Get Path to local copy of hello_looper - - hello_looper_dir_path = os.path.join(example_pep_piface_path, "hello_looper-dev") - - # Make local temp copy of hello_looper - d = tempfile.mkdtemp() - shutil.copytree(hello_looper_dir_path, d, dirs_exist_ok=True) - - advanced_dir = os.path.join(d, "pytesting/advanced_test") - path_to_looper_config = os.path.join(advanced_dir, ".looper_advanced_pipestat.yaml") - - return path_to_looper_config - - -@pytest.fixture -def prep_temp_pep_pephub(example_pep_piface_path): - - # Get Path to local copy of hello_looper - - hello_looper_dir_path = os.path.join(example_pep_piface_path, "hello_looper-dev") - - # Make local temp copy of hello_looper - d = tempfile.mkdtemp() - shutil.copytree(hello_looper_dir_path, d, dirs_exist_ok=True) - - advanced_dir = os.path.join(d, "pephub") - path_to_looper_config = os.path.join(advanced_dir, ".looper.yaml") - - return path_to_looper_config diff --git a/tests/integration/__init__.py b/tests/integration/__init__.py new file mode 100644 index 000000000..5e73c37d2 --- /dev/null +++ b/tests/integration/__init__.py @@ -0,0 +1 @@ +# Integration tests - CLI tests requiring temp directories and file I/O diff --git a/tests/integration/conftest.py b/tests/integration/conftest.py new file mode 100644 index 000000000..f2eea1e49 --- /dev/null +++ b/tests/integration/conftest.py @@ -0,0 +1,241 @@ +"""Integration test configuration with environment variable gating.""" + +import os +import shutil +import socket +from contextlib import contextmanager +from typing import Iterable + +import peppy +import pytest +from shutil import copyfile +from yaml import dump, safe_load + +from looper.const import LOOPER_DOTFILE_NAME, OUTDIR_KEY, PIPESTAT_KEY + +# Skip all integration tests unless explicitly enabled +def pytest_collection_modifyitems(config, items): + """Skip integration tests unless RUN_INTEGRATION_TESTS=true.""" + if os.getenv("RUN_INTEGRATION_TESTS") == "true": + return + skip_marker = pytest.mark.skip( + reason="Integration tests disabled. Set RUN_INTEGRATION_TESTS=true to run." + ) + for item in items: + # Only skip tests in the integration directory that aren't marked as fast + if "integration" in str(item.fspath): + if not any(mark.name == "integration_fast" for mark in item.iter_markers()): + item.add_marker(skip_marker) + +# File constants +CFG = "project_config.yaml" +PIPESTAT_CONFIG = "global_pipestat_config.yaml" +PROJECT_CFG_PIPESTAT = "project_config_pipestat.yaml" +LOOPER_CFG = "looper_config_pipestat.yaml" +PIPESTAT_OS = "pipestat_output_schema.yaml" +PIPESTAT_PI = "pipeline_interface1_sample_pipestat.yaml" +PIPESTAT_PI_PRJ = "pipeline_interface1_project_pipestat.yaml" +ST = "annotation_sheet.csv" +PIP = "pipeline_interface{}_project.yaml" +PIS = "pipeline_interface{}_sample.yaml" +OS = "output_schema.yaml" +RES = "resources-{}.tsv" + + +@pytest.fixture(scope="function") +def dotfile_path(): + """Fixture for looper dotfile path with cleanup.""" + path = os.path.join(os.getcwd(), LOOPER_DOTFILE_NAME) + yield path + if os.path.isfile(path): + os.remove(path) + + +def get_outdir(pth): + """Get output directory from a config file.""" + with open(pth, "r") as conf_file: + config_data = safe_load(conf_file) + output_path = config_data[OUTDIR_KEY] + dirname = os.path.dirname(pth) + return os.path.join(dirname, output_path) + + +def get_project_config_path(looper_config_pth): + """Get project config file path from a looper config file path.""" + dirname = os.path.dirname(looper_config_pth) + return os.path.join(dirname, "project/project_config.yaml") + + +def _assert_content_in_files(fs: str | Iterable[str], query: str, negate: bool): + """Check file content for presence or absence of query string.""" + if isinstance(fs, str): + fs = [fs] + check = (lambda doc: query not in doc) if negate else (lambda doc: query in doc) + for f in fs: + with open(f, "r") as fh: + contents = fh.read() + assert check(contents) + + +def assert_content_in_all_files(fs: str | Iterable[str], query: str): + """Verify that string is in files content.""" + _assert_content_in_files(fs, query, negate=False) + + +def assert_content_not_in_any_files(fs: str | Iterable[str], query: str): + """Verify that string is not in files' content.""" + _assert_content_in_files(fs, query, negate=True) + + +def print_standard_stream(text: str | bytes) -> None: + """Print bytes or str to stdout.""" + if isinstance(text, bytes): + text = text.decode("utf-8") + if not isinstance(text, str): + raise TypeError(f"Stream to print is neither str nor bytes, but {type(text)}") + for line in text.split("\n"): + print(line) + + +def test_args_expansion(pth=None, cmd=None, appendix=None, dry=True): + """Create list of strings to pass to looper.main() as test_args.""" + if appendix is None: + appendix = [] + x = [] + if cmd: + x.append(cmd) + if pth: + x.append("--config") + x.append(pth) + if dry: + x.append("--dry-run") + x.extend(appendix) + return x + + +def verify_filecount_in_dir(dirpath, pattern, count): + """Check if expected number of files matching pattern exist in directory.""" + assert os.path.isdir(dirpath) + subm_err = IOError( + f"Expected {count} files matching '{pattern}' pattern in " + f"'{dirpath}'. Listdir: \n{os.listdir(dirpath)}" + ) + assert sum([f.endswith(pattern) for f in os.listdir(dirpath)]) == count, subm_err + + +def is_connected(): + """Determines if local machine can connect to the internet.""" + try: + host = socket.gethostbyname("www.databio.org") + socket.create_connection((host, 80), 2) + return True + except Exception: + pass + return False + + +@contextmanager +def mod_yaml_data(path): + """Context manager to modify YAML formatted data.""" + with open(path, "r") as f: + yaml_data = safe_load(f) + print(f"\nInitial YAML data: \n{yaml_data}\n") + yield yaml_data + print(f"\nModified YAML data: \n{yaml_data}\n") + with open(path, "w") as f: + dump(yaml_data, f) + + +@pytest.fixture +def example_pep_piface_path(): + """Path to test data directory.""" + return os.path.join(os.path.dirname(os.path.dirname(os.path.abspath(__file__))), "data") + + +@pytest.fixture +def example_pep_piface_path_cfg(example_pep_piface_path): + """Path to test project config.""" + return os.path.join(example_pep_piface_path, CFG) + + +@pytest.fixture +def prep_temp_pep(example_pep_piface_path, tmp_path): + """Prepare a temporary PEP for testing.""" + hello_looper_dir_path = os.path.join(example_pep_piface_path, "hello_looper-dev") + d = tmp_path / "pep" + shutil.copytree(hello_looper_dir_path, d, dirs_exist_ok=True) + advanced_dir = d / "pytesting" / "advanced_test" + path_to_looper_config = str(advanced_dir / ".looper.yaml") + return path_to_looper_config + + +@pytest.fixture +def prep_temp_pep_basic(example_pep_piface_path, tmp_path): + """Prepare a basic temporary PEP for testing.""" + hello_looper_dir_path = os.path.join(example_pep_piface_path, "hello_looper-dev") + d = tmp_path / "pep" + shutil.copytree(hello_looper_dir_path, d, dirs_exist_ok=True) + advanced_dir = d / "pytesting" / "intermediate_test" + path_to_looper_config = str(advanced_dir / ".looper.yaml") + return path_to_looper_config + + +@pytest.fixture +def prep_temp_pep_csv(example_pep_piface_path, tmp_path): + """Prepare a CSV-based PEP for testing.""" + hello_looper_dir_path = os.path.join(example_pep_piface_path, "hello_looper-dev") + d = tmp_path / "pep" + shutil.copytree(hello_looper_dir_path, d, dirs_exist_ok=True) + advanced_dir = d / "looper_csv_example" + path_to_looper_config = str(advanced_dir / ".looper.yaml") + return path_to_looper_config + + +@pytest.fixture +def prep_temp_config_with_pep(example_pep_piface_path, tmp_path): + """Prepare temp config with PEP project dict.""" + td = tmp_path / "cfg" + td.mkdir() + cfg_path = os.path.join(example_pep_piface_path, CFG) + sample_table_path = os.path.join(example_pep_piface_path, ST) + piface1s_path = os.path.join(example_pep_piface_path, PIS.format("1")) + temp_path_cfg = str(td / CFG) + temp_path_sample_table = str(td / ST) + temp_path_piface1s = str(td / PIS.format("1")) + copyfile(cfg_path, temp_path_cfg) + copyfile(sample_table_path, temp_path_sample_table) + copyfile(piface1s_path, temp_path_piface1s) + return peppy.Project(temp_path_cfg).to_dict(extended=True), temp_path_piface1s + + +@pytest.fixture +def prep_temp_pep_pipestat(example_pep_piface_path, tmp_path): + """Prepare a pipestat-enabled PEP for testing.""" + hello_looper_dir_path = os.path.join(example_pep_piface_path, "hello_looper-dev") + d = tmp_path / "pep" + shutil.copytree(hello_looper_dir_path, d, dirs_exist_ok=True) + advanced_dir = d / "pytesting" / "pipestat_test" + path_to_looper_config = str(advanced_dir / ".looper.yaml") + return path_to_looper_config + + +@pytest.fixture +def prep_temp_pep_pipestat_advanced(example_pep_piface_path, tmp_path): + """Prepare an advanced pipestat-enabled PEP for testing.""" + hello_looper_dir_path = os.path.join(example_pep_piface_path, "hello_looper-dev") + d = tmp_path / "pep" + shutil.copytree(hello_looper_dir_path, d, dirs_exist_ok=True) + advanced_dir = d / "pytesting" / "advanced_test" + path_to_looper_config = str(advanced_dir / ".looper_advanced_pipestat.yaml") + return path_to_looper_config + + +@pytest.fixture +def prep_temp_pep_pephub(example_pep_piface_path, tmp_path): + """Prepare a PEPhub PEP for testing.""" + hello_looper_dir_path = os.path.join(example_pep_piface_path, "hello_looper-dev") + d = tmp_path / "pep" + shutil.copytree(hello_looper_dir_path, d, dirs_exist_ok=True) + advanced_dir = d / "pephub" + path_to_looper_config = str(advanced_dir / ".looper.yaml") + return path_to_looper_config diff --git a/tests/test_clean.py b/tests/integration/test_clean.py similarity index 100% rename from tests/test_clean.py rename to tests/integration/test_clean.py diff --git a/tests/smoketests/test_other.py b/tests/integration/test_cli_commands.py similarity index 99% rename from tests/smoketests/test_other.py rename to tests/integration/test_cli_commands.py index 9535208ef..5af0e0c24 100644 --- a/tests/smoketests/test_other.py +++ b/tests/integration/test_cli_commands.py @@ -3,14 +3,17 @@ import pandas as pd import pytest from peppy import Project +from yaml import dump, safe_load from looper.cli_pydantic import main +from looper.const import FLAGS, OUTDIR_KEY, PIPESTAT_KEY from looper.exceptions import ( LooperReportError, MisconfigurationException, PipestatConfigurationException, ) -from tests.conftest import * + +from tests.integration.conftest import get_outdir, get_project_config_path def _make_flags_pipestat(cfg, type, pipeline_name): diff --git a/tests/smoketests/test_run.py b/tests/integration/test_cli_run.py similarity index 96% rename from tests/smoketests/test_run.py rename to tests/integration/test_cli_run.py index a9871dff5..e34269426 100644 --- a/tests/smoketests/test_run.py +++ b/tests/integration/test_cli_run.py @@ -6,9 +6,19 @@ from looper.cli_pydantic import main from looper.const import * +from looper.exceptions import MisconfigurationException from looper.project import Project -from looper.utils import * -from tests.conftest import * +from looper.utils import is_PEP_file_type, is_pephub_registry_path + +from tests.integration.conftest import ( + assert_content_in_all_files, + assert_content_not_in_any_files, + get_outdir, + get_project_config_path, + mod_yaml_data, + test_args_expansion, + verify_filecount_in_dir, +) CMD_STRS = ["string", " --string", " --sjhsjd 212", "7867#$@#$cc@@"] @@ -65,19 +75,6 @@ def test_is_PEP_file_type(path): assert result == True -def is_connected(): - """Determines if local machine can connect to the internet.""" - import socket - - try: - host = socket.gethostbyname("www.databio.org") - socket.create_connection((host, 80), 2) - return True - except: - pass - return False - - class TestLooperBothRuns: @pytest.mark.parametrize("cmd", ["run", "runp"]) def test_looper_cfg_invalid(self, cmd): @@ -519,10 +516,9 @@ def test_looper_uses_cli_compute_options_spec(self, prep_temp_pep, cmd): assert_content_in_all_files(subs_list, "#SBATCH --mem='12345'") @pytest.mark.parametrize("cmd", ["run", "runp"]) - def test_cli_yaml_settings_general(self, prep_temp_pep, cmd): + def test_cli_yaml_settings_general(self, prep_temp_pep, cmd, tmp_path): tp = prep_temp_pep - td = tempfile.mkdtemp() - settings_file_path = os.path.join(td, "settings.yaml") + settings_file_path = str(tmp_path / "settings.yaml") with open(settings_file_path, "w") as sf: dump({"mem": "testin_mem"}, sf) x = test_args_expansion(tp, cmd, ["--settings", settings_file_path]) @@ -541,10 +537,9 @@ def test_nonexistent_yaml_settings_disregarded(self, prep_temp_pep, cmd): raise pytest.fail("DID RAISE {0}".format(Exception)) @pytest.mark.parametrize("cmd", ["run", "runp"]) - def test_cli_yaml_settings_passes_settings(self, prep_temp_pep, cmd): + def test_cli_yaml_settings_passes_settings(self, prep_temp_pep, cmd, tmp_path): tp = prep_temp_pep - td = tempfile.mkdtemp() - settings_file_path = os.path.join(td, "settings.yaml") + settings_file_path = str(tmp_path / "settings.yaml") with open(settings_file_path, "w") as sf: dump({"mem": "testin_mem"}, sf) @@ -560,10 +555,9 @@ def test_cli_yaml_settings_passes_settings(self, prep_temp_pep, cmd): assert_content_in_all_files(subs_list, "testin_mem") @pytest.mark.parametrize("cmd", ["run", "runp"]) - def test_cli_compute_overwrites_yaml_settings_spec(self, prep_temp_pep, cmd): + def test_cli_compute_overwrites_yaml_settings_spec(self, prep_temp_pep, cmd, tmp_path): tp = prep_temp_pep - td = tempfile.mkdtemp() - settings_file_path = os.path.join(td, "settings.yaml") + settings_file_path = str(tmp_path / "settings.yaml") with open(settings_file_path, "w") as sf: dump({"mem": "testin_mem"}, sf) x = test_args_expansion( diff --git a/tests/test_cli_startup.py b/tests/integration/test_cli_startup.py similarity index 87% rename from tests/test_cli_startup.py rename to tests/integration/test_cli_startup.py index bc15bc797..640b377c5 100644 --- a/tests/test_cli_startup.py +++ b/tests/integration/test_cli_startup.py @@ -3,6 +3,11 @@ import subprocess import time +import pytest + +# These tests are fast and should run by default (not require RUN_INTEGRATION_TESTS) +pytestmark = pytest.mark.integration_fast + def test_cli_help_startup_time(): """Ensure --help responds quickly without loading heavy dependencies.""" diff --git a/tests/smoketests/test_cli_validation.py b/tests/integration/test_cli_validation.py similarity index 96% rename from tests/smoketests/test_cli_validation.py rename to tests/integration/test_cli_validation.py index 84872e4fc..64c8f9d24 100644 --- a/tests/smoketests/test_cli_validation.py +++ b/tests/integration/test_cli_validation.py @@ -10,7 +10,7 @@ SAMPLE_INCLUSION_OPTNAME, SAMPLE_SELECTION_ATTRIBUTE_OPTNAME, ) -from tests.conftest import test_args_expansion +from tests.integration.conftest import test_args_expansion SUBCOMMANDS_WHICH_SUPPORT_SKIP_XOR_LIMIT = ["run", "destroy"] diff --git a/tests/test_comprehensive.py b/tests/integration/test_comprehensive.py similarity index 98% rename from tests/test_comprehensive.py rename to tests/integration/test_comprehensive.py index d374d1fc1..0325fb817 100644 --- a/tests/test_comprehensive.py +++ b/tests/integration/test_comprehensive.py @@ -9,8 +9,9 @@ from looper.cli_pydantic import main from looper.const import * from looper.utils import * -from tests.conftest import * -from tests.smoketests.test_run import is_connected + +from tests.integration.conftest import get_project_config_path, is_connected + CMD_STRS = ["string", " --string", " --sjhsjd 212", "7867#$@#$cc@@"] diff --git a/tests/scripts/test-integration.sh b/tests/scripts/test-integration.sh new file mode 100755 index 000000000..dea470688 --- /dev/null +++ b/tests/scripts/test-integration.sh @@ -0,0 +1,15 @@ +#!/bin/bash +# Integration Test Runner for Looper +# Runs full CLI integration tests that require temp directories and file I/O + +set -e +SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" +PROJECT_ROOT="$SCRIPT_DIR/../.." +cd "$PROJECT_ROOT" + +export RUN_INTEGRATION_TESTS=true + +echo "=== Running Looper Integration Tests ===" +python3 -m pytest tests/integration/ -v "$@" + +echo "=== Integration tests completed successfully! ===" diff --git a/tests/smoketests/.looper.yaml b/tests/smoketests/.looper.yaml deleted file mode 100644 index d4cfc108f..000000000 --- a/tests/smoketests/.looper.yaml +++ /dev/null @@ -1,5 +0,0 @@ -pep_config: example/pep/path -output_dir: . -pipeline_interfaces: - sample: [] - project: [] diff --git a/tests/smoketests/__init__.py b/tests/smoketests/__init__.py deleted file mode 100644 index e69de29bb..000000000 diff --git a/tests/unit/__init__.py b/tests/unit/__init__.py new file mode 100644 index 000000000..fd6ca98d1 --- /dev/null +++ b/tests/unit/__init__.py @@ -0,0 +1 @@ +# Unit tests - fast tests with no file I/O diff --git a/tests/unit/conftest.py b/tests/unit/conftest.py new file mode 100644 index 000000000..680aeb119 --- /dev/null +++ b/tests/unit/conftest.py @@ -0,0 +1,24 @@ +"""Unit test configuration with minimal, fast fixtures (no file I/O).""" + +import pytest + + +@pytest.fixture +def sample_piface_dict(): + """Sample pipeline interface dictionary for unit tests.""" + return { + "pipeline_name": "test_pipeline", + "pipeline_type": "sample", + "command_template": "python pipeline.py {sample.sample_name}", + } + + +@pytest.fixture +def sample_piface_with_output_schema(): + """Pipeline interface dict with output_schema for pipestat tests.""" + return { + "pipeline_name": "test_pipeline", + "pipeline_type": "sample", + "output_schema": "schema.yaml", + "command_template": "python pipeline.py --pipestat-config {pipestat.config_file}", + } diff --git a/tests/test_desired_sample_range.py b/tests/unit/test_desired_sample_range.py similarity index 100% rename from tests/test_desired_sample_range.py rename to tests/unit/test_desired_sample_range.py diff --git a/tests/test_natural_range.py b/tests/unit/test_natural_range.py similarity index 100% rename from tests/test_natural_range.py rename to tests/unit/test_natural_range.py diff --git a/tests/test_pipestat_handoff.py b/tests/unit/test_pipeline_interface.py similarity index 100% rename from tests/test_pipestat_handoff.py rename to tests/unit/test_pipeline_interface.py From aca80f0c3ae225e3e20d68888171400639440018 Mon Sep 17 00:00:00 2001 From: nsheff Date: Thu, 19 Feb 2026 08:37:03 -0500 Subject: [PATCH 157/163] format --- tests/integration/conftest.py | 6 +++++- tests/integration/test_cli_run.py | 4 +++- 2 files changed, 8 insertions(+), 2 deletions(-) diff --git a/tests/integration/conftest.py b/tests/integration/conftest.py index f2eea1e49..e636d5361 100644 --- a/tests/integration/conftest.py +++ b/tests/integration/conftest.py @@ -13,6 +13,7 @@ from looper.const import LOOPER_DOTFILE_NAME, OUTDIR_KEY, PIPESTAT_KEY + # Skip all integration tests unless explicitly enabled def pytest_collection_modifyitems(config, items): """Skip integration tests unless RUN_INTEGRATION_TESTS=true.""" @@ -27,6 +28,7 @@ def pytest_collection_modifyitems(config, items): if not any(mark.name == "integration_fast" for mark in item.iter_markers()): item.add_marker(skip_marker) + # File constants CFG = "project_config.yaml" PIPESTAT_CONFIG = "global_pipestat_config.yaml" @@ -149,7 +151,9 @@ def mod_yaml_data(path): @pytest.fixture def example_pep_piface_path(): """Path to test data directory.""" - return os.path.join(os.path.dirname(os.path.dirname(os.path.abspath(__file__))), "data") + return os.path.join( + os.path.dirname(os.path.dirname(os.path.abspath(__file__))), "data" + ) @pytest.fixture diff --git a/tests/integration/test_cli_run.py b/tests/integration/test_cli_run.py index e34269426..c163f90f8 100644 --- a/tests/integration/test_cli_run.py +++ b/tests/integration/test_cli_run.py @@ -555,7 +555,9 @@ def test_cli_yaml_settings_passes_settings(self, prep_temp_pep, cmd, tmp_path): assert_content_in_all_files(subs_list, "testin_mem") @pytest.mark.parametrize("cmd", ["run", "runp"]) - def test_cli_compute_overwrites_yaml_settings_spec(self, prep_temp_pep, cmd, tmp_path): + def test_cli_compute_overwrites_yaml_settings_spec( + self, prep_temp_pep, cmd, tmp_path + ): tp = prep_temp_pep settings_file_path = str(tmp_path / "settings.yaml") with open(settings_file_path, "w") as sf: From 1c55211b7fdb18fa4aa46293b8d0ad055011c75b Mon Sep 17 00:00:00 2001 From: nsheff Date: Fri, 20 Feb 2026 15:49:00 -0500 Subject: [PATCH 158/163] make signal handling thread-safe --- looper/conductor.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/looper/conductor.py b/looper/conductor.py index 0e9759f27..d3a4760b1 100644 --- a/looper/conductor.py +++ b/looper/conductor.py @@ -7,6 +7,7 @@ import signal import subprocess import sys +import threading import time from json import loads from math import ceil @@ -445,7 +446,8 @@ def submit(self, force: bool = False) -> bool: submitted = False # Override signal handler so that Ctrl+C can be used to gracefully terminate child process - signal.signal(signal.SIGINT, self._signal_int_handler) + if threading.current_thread() is threading.main_thread(): + signal.signal(signal.SIGINT, self._signal_int_handler) if not self._pool: _LOGGER.debug("No submission (no pooled samples): %s", self.pl_name) From 41d644fa70c93e12077b7b0c1147458320027715 Mon Sep 17 00:00:00 2001 From: nsheff Date: Fri, 20 Feb 2026 21:34:10 -0500 Subject: [PATCH 159/163] update pipestat constructor format --- looper/project.py | 12 ++++++------ .../pipestat_example/pipeline/count_lines.py | 4 ++-- .../pipestat_test/pipeline_pipestat/count_lines.py | 4 ++-- 3 files changed, 10 insertions(+), 10 deletions(-) diff --git a/looper/project.py b/looper/project.py index dd40cb537..7b58a4e54 100644 --- a/looper/project.py +++ b/looper/project.py @@ -457,8 +457,8 @@ def _get_pipestat_configuration( if not pipestat_config_path: self._create_pipestat_config(piface, pipeline_type) else: - piface.psm = PipestatManager( - config_file=pipestat_config_path, + piface.psm = PipestatManager.from_config( + config=pipestat_config_path, multi_pipelines=True, pipeline_type="sample", ) @@ -472,8 +472,8 @@ def _get_pipestat_configuration( if not pipestat_config_path: self._create_pipestat_config(prj_piface, pipeline_type) else: - prj_piface.psm = PipestatManager( - config_file=pipestat_config_path, + prj_piface.psm = PipestatManager.from_config( + config=pipestat_config_path, multi_pipelines=True, pipeline_type="project", ) @@ -615,8 +615,8 @@ def _create_pipestat_config(self, piface, pipeline_type: str) -> None: # Two end goals, create a config file write_pipestat_config(pipestat_config_path, pipestat_config_dict) - piface.psm = PipestatManager( - config_file=pipestat_config_path, multi_pipelines=True + piface.psm = PipestatManager.from_config( + config=pipestat_config_path, multi_pipelines=True ) return None diff --git a/tests/data/hello_looper-dev/pipestat_example/pipeline/count_lines.py b/tests/data/hello_looper-dev/pipestat_example/pipeline/count_lines.py index b1576bfa0..8efabf415 100755 --- a/tests/data/hello_looper-dev/pipestat_example/pipeline/count_lines.py +++ b/tests/data/hello_looper-dev/pipestat_example/pipeline/count_lines.py @@ -13,9 +13,9 @@ schema_path = sys.argv[4] # Create pipestat manager and then report values -psm = pipestat.PipestatManager( - schema_path=schema_path, +psm = pipestat.PipestatManager.from_file_backend( results_file_path=results_file, + schema_path=schema_path, record_identifier=sample_name, ) diff --git a/tests/data/hello_looper-dev/pytesting/pipestat_test/pipeline_pipestat/count_lines.py b/tests/data/hello_looper-dev/pytesting/pipestat_test/pipeline_pipestat/count_lines.py index b1576bfa0..8efabf415 100755 --- a/tests/data/hello_looper-dev/pytesting/pipestat_test/pipeline_pipestat/count_lines.py +++ b/tests/data/hello_looper-dev/pytesting/pipestat_test/pipeline_pipestat/count_lines.py @@ -13,9 +13,9 @@ schema_path = sys.argv[4] # Create pipestat manager and then report values -psm = pipestat.PipestatManager( - schema_path=schema_path, +psm = pipestat.PipestatManager.from_file_backend( results_file_path=results_file, + schema_path=schema_path, record_identifier=sample_name, ) From a94abb5a22b2bd935ce9b07207a94c03d7785b42 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E2=80=9Cdonaldcampbelljr=E2=80=9D?= <125581724+donaldcampbelljr@users.noreply.github.com> Date: Tue, 24 Feb 2026 17:51:23 -0600 Subject: [PATCH 160/163] Add Donald Campbell to author list --- pyproject.toml | 1 + 1 file changed, 1 insertion(+) diff --git a/pyproject.toml b/pyproject.toml index 26e42c4df..346903d36 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -11,6 +11,7 @@ authors = [ { name = "Michal Stolarczyk" }, { name = "Johanna Klughammer" }, { name = "Andre Rendeiro" }, + { name = "Donald Campbell" }, ] keywords = ["bioinformatics", "sequencing", "ngs"] classifiers = [ From 1299a0d9ebd67e6dc9bd0febc4fe5ef9a6a423db Mon Sep 17 00:00:00 2001 From: nsheff Date: Wed, 25 Feb 2026 07:38:37 -0500 Subject: [PATCH 161/163] format, changelog --- changelog.md | 482 ++++++++++++++++++++++++ tests/conftest.py | 1 - tests/integration/conftest.py | 4 +- tests/integration/test_cli_commands.py | 1 - tests/integration/test_cli_run.py | 1 - tests/integration/test_comprehensive.py | 2 - 6 files changed, 484 insertions(+), 7 deletions(-) create mode 100644 changelog.md diff --git a/changelog.md b/changelog.md new file mode 100644 index 000000000..82e558f0d --- /dev/null +++ b/changelog.md @@ -0,0 +1,482 @@ +# Changelog + +This project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html) and [Keep a Changelog](https://keepachangelog.com/en/1.0.0/) format. + +## [2.1.0] -- 2026-02-25 +### Changed +- Migrated to yacman v1 API (`YAMLConfigManager.from_yaml_file()`, `write_lock`/`read_lock` context managers) +- Migrated CLI configuration to pydantic-settings +- Improved CLI startup time by deferring heavy imports to module level +- Made signal handling thread-safe +- Replaced wildcard imports with explicit imports +- Converted docstrings to Google style +- Better shell inference for submission commands (#282) +- Updated pipestat constructor usage to classmethod format +- Separated fast unit tests from slow CLI integration tests + +### Added +- HTTP API server (experimental/alpha) with FastAPI: `looper serve` +- Stricter looper-pipestat interface validation + +### Removed +- Old documentation files (moved to separate docs site) + +## [2.0.3] -- 2025-09-23 +### Fixed +- Fixed [#543](https://github.com/pepkit/looper/issues/543) +- Fixed [#547](https://github.com/pepkit/looper/issues/547) +- Fixed [#548](https://github.com/pepkit/looper/issues/548) + + +## [2.0.2] -- 2025-09-22 +### Changed +- Remove veracitools dependency from the requirements. + +## [2.0.1] -- 2025-03-05 + +### Changed +- update ubiquerg>=0.8.1 + +### Fixed +- [#541](https://github.com/pepkit/looper/issues/541) + +## [2.0.0] -- 2025-01-16 + +This release breaks backwards compatibility for Looper versions < 2.0.0 + +### Fixed +- divvy init [#520](https://github.com/pepkit/looper/issues/520) +- replaced deprecated PEPHubClient function, `_load_raw_pep` with `.load_raw_pep` +- looper cli parameters now take priority as originally intended [#518](https://github.com/pepkit/looper/issues/518) +- fix divvy inspect +- remove printed dictionary at looper finish [#511](https://github.com/pepkit/looper/issues/511) +- fix [#536](https://github.com/pepkit/looper/issues/536) +- fix [#522](https://github.com/pepkit/looper/issues/522) +- fix [#537](https://github.com/pepkit/looper/issues/537) +- fix [#534](https://github.com/pepkit/looper/issues/534) + +### Changed +- `--looper-config` is now `--config`, `-c`. [#455](https://github.com/pepkit/looper/issues/455) +- A pipeline interface now consolidates a `sample_interface` and a `project_interface` [#493](https://github.com/pepkit/looper/issues/493) +- Updated documentation for Looper 2.0.0, removing previous versions [pepspec PR #34](https://github.com/pepkit/pepspec/pull/34) +- remove position based argument for divvy config, must use --config or run as default config + +### Added +- `looper init` tutorial [#466](https://github.com/pepkit/looper/issues/466) +- looper config allows for `pephub_path` in pipestat config section of `.looper.yaml` [#519](https://github.com/pepkit/looper/issues/519) +- improve error messaging for bad/malformed looper configurations [#515](https://github.com/pepkit/looper/issues/515) +- add shortform argument for --package (alias is now -p) + + +## [1.9.1] -- 2024-07-18 + +### Changed +- ensure peppy requirement peppy>=0.40.0,<=0.40.2 + +## [1.9.0] -- 2024-06-26 + +### Added +- user can now add cli modifiers to looper config instead of PEP project [#270](https://github.com/pepkit/looper/issues/270) +- pipeline interfaces no longer must be nested under sample and project keys within looper config file [#465](https://github.com/pepkit/looper/issues/465) +- var_templates can now be hierarchical [#334](https://github.com/pepkit/looper/issues/334) +- looper can now gracefully halt spawned subprocesses when the user sends a keyboard interrupt [#37](https://github.com/pepkit/looper/issues/37) + +## [1.8.1] -- 2024-06-06 + +### Fixed +- added `-v` and `--version` to the CLI +- fixed running project level with `--project` argument + +## [1.8.0] -- 2024-06-04 + +### Added +- looper destroy now destroys individual results when pipestat is configured: https://github.com/pepkit/looper/issues/469 +- comprehensive smoketests: https://github.com/pepkit/looper/issues/464 +- allow rerun to work on both failed or waiting flags: https://github.com/pepkit/looper/issues/463 + +### Changed +- Migrated `argparse` CLI definition to a pydantic basis for all commands. See: https://github.com/pepkit/looper/issues/438 +- during project load, check if PEP file path is a file first, then check if it is a registry path: https://github.com/pepkit/looper/issues/456 +- Looper now uses FutureYamlConfigManager due to the yacman refactor v0.9.3: https://github.com/pepkit/looper/issues/452 + +### Fixed +- inferring project name when loading PEP from csv: https://github.com/pepkit/looper/issues/484 +- fix inconsistency resolving pipeline interface paths if multiple paths are supplied: https://github.com/pepkit/looper/issues/474 +- fix bug with checking for completed flags: https://github.com/pepkit/looper/issues/470 +- fix looper destroy not properly destroying all related files: https://github.com/pepkit/looper/issues/468 +- looper rerun now only runs failed jobs as intended: https://github.com/pepkit/looper/issues/467 +- looper inspect now inspects the looper config: https://github.com/pepkit/looper/issues/462 +- Load PEP from CSV: https://github.com/pepkit/looper/issues/456 +- looper now works with sample_table_index https://github.com/pepkit/looper/issues/458 + +## [1.7.1] -- 2024-05-28 + +### Fixed +- pin pipestat version to be between pipestat>=0.8.0,<0.9.0 https://github.com/pepkit/looper/issues/494 + +## [1.7.0] -- 2024-01-26 + +### Added +- `--portable` flag to `looper report` to create a portable version of the html report +- `--lump-j` allows grouping samples into a defined number of jobs + +### Changed +- `--lumpn` is now `--lump-n` +- `--lump` is now `--lump-s` + +## [1.6.0] -- 2023-12-22 + +### Added +- `looper link` creates symlinks for results grouped by record_identifier. It requires pipestat to be configured. [#72](https://github.com/pepkit/looper/issues/72) +- basic tab completion. + +### Changed +- looper now works with pipestat v0.6.0 and greater. +- `looper table`, `check` now use pipestat and therefore require pipestat configuration. [#390](https://github.com/pepkit/looper/issues/390) +- changed how looper configures pipestat [#411](https://github.com/pepkit/looper/issues/411) +- initializing pipeline interface also writes an example `output_schema.yaml` and `count_lines.sh` pipeline + +### Fixed +- filtering via attributes that are integers. + +## [1.5.1] -- 2023-08-14 + +### Fixed +- fix `looper table` failing without `sample.protocol` + +### Changed +- correct `--looper_conifg` to `--looper-config` + +## [1.5.0] -- 2023-08-09 + +### Added + +- ability to use PEPs from PEPhub without downloading project [#341](https://github.com/pepkit/looper/issues/341) +- ability to specify pipeline interfaces inside looper config [Looper Config](https://looper.databio.org/en/dev/how_to_define_looper_config/) +- divvy re-integrated in looper +- divvy inspect -p package +- Looper will now check that the command path provided in the pipeline interface is callable before submitting. + + +### Changed +- initialization of generic pipeline interface available using subcommand `init-piface` +- `looper report` will now use pipestat to generate browsable HTML reports if pipestat is configured. +- looper now works with pipestat v0.5.0. +- Removed --toggle-key functionality. +- Allow for user to input single integer value for --sel-incl or --sel-excl + +## [1.4.3] -- 2023-08-01 + +### Fixed +- Fix regression for var_templates expansion. + +## [1.4.2] -- 2023-07-31 + +### Fixed +- Fix for expanding paths properly. + +## [1.4.1] -- 2023-06-22 + + +## [1.4.0] -- 2023-04-24 + +### Added + +- preliminary support for [pipestat](http://pipestat.databio.org). +- ability to skip samples using `-k` or `--skip` [#367](https://github.com/pepkit/looper/pull/367) +- ability to input a range into `limit` and `skip`[#367](https://github.com/pepkit/looper/pull/367) +- `limit` and `skip` are now both usable with Destroy and Run. [#367](https://github.com/pepkit/looper/pull/367) +- ability to generate generic pipeline interface using `init -p` or `init --piface` [#368](https://github.com/pepkit/looper/pull/368) +- Fixed ability to use custom sample index +- Added `write_custom_template`, a built-in pre-submit plugin for writing templates + +### Changed +- looper now returns nonzero if any samples fail submission +- various other developer changes + +### Deprecated +- `path` variable will be deprecated in favor of `var_templates` [#322](https://github.com/pepkit/looper/issues/322) + +## [1.3.2] -- 2022-02-09 + +### Changed +- Fixed bug with use_2to3 for setuptools compatibility. + +## [1.3.1] -- 2021-06-18 + +### Changed +- If remote schemas are not accessible, the job submission doesn't fail anymore +- Fixed a bug where looper stated "No failed flag found" when a failed flag was found + +### Deprecated +- Fixed and deprecated `looper inspect`. Use `eido inspect` from now on. + + +## [1.3.0] -- 2020-10-07 + +### Added +- New plugin system for pre-submission hooks +- Included plugin functions: `write_sample_yaml`, `write_sample_yaml_prj`, `write_sample_yaml_cwl` and `write_submission_yaml` +- New `var_templates` section for defining variables in the pipeline interface + +### Changed +- Pipeline interface specification was updated to accommodate new `var_templates` section and pre-submission hooks + +### Deprecated +- pipeline interface sections: + - `dynamic_variables_command_template`, which can now be more simply accomplished with a pre-submission hook + - `path`, which is replaced by a more generic `var_templates` section + +## [1.2.1] - 2020-08-26 + +### Added +- Environment variables expansion in custom sample YAML paths; [Issue 273](https://github.com/pepkit/looper/issues/273) +- `dynamic_variables_script_path` key in the pipeline interface. Path, absolute or relative to the pipeline interface file; [Issue 276](https://github.com/pepkit/looper/issues/276) +### Changed +- Resolve project pipeline interface path by making it relative to the config not current directory; [Issue 268](https://github.com/pepkit/looper/issues/268) +### Fixed +- Unclear error when `output_dir` was not provided in a config `looper` section; [Issue 286](https://github.com/pepkit/looper/issues/286) + +## [1.2.0] - 2020-05-26 + +**This version introduced backwards-incompatible changes.** + +### Added +- Commands: + - `init`; initializes `.looper.yaml` file + - `inspect`; inspects `Project` or `Sample` objects + - `table`; writes summary stats table + - `runp`; runs project level pipelines +- Input schemas and output schemas +- `--settings` argument to specify compute resources as a YAML file +- Option to preset CLI options in a dotfile +- `--command-extra` and `--command-extra-override` arguments that append specified string to pipeline commands. These functions supercede the previous `pipeline_config` and `pipeline_args` sections, which are now deprecated. The new method is more universal, and can accomplish the same functionality but more simply, using the built-in PEP machinery to selectively apply commands to samples. +- Option to specify destination of sample YAML in pipeline interface +- `--pipeline_interfaces` argument that allows pipeline interface specification via CLI + +### Changed +- `looper summarize` to `looper report` +- Pipeline interface format changed drastically +- The PyPi name changed from 'loopercli' to 'looper' +- resources section in pipeline interface replaced with `size_dependent_attributes` or `dynamic_variables_command_template`. +- `--compute` can be used to specify arguments other than resources +- `all_input_files` and `required_input_files` keys in pipeline interface moved to the input schema and renamed to `files` and `required_files` +- pipeline interface specification + +## [0.12.6] -- 2020-02-21 + +### Added +- possibility to execute library module as a script: `python -m looper ...` + +### Changed +- in the summary page account for missing values when plotting; the value is disregarded in such a case and plot is still created +- show 50 rows in the summary table +- make links to the summary page relative +- long entries in the sample stats table are truncated with an option to see original value in a popover + +### Fixed +- inactive jQuery dependent components in the status page +- project objects layout in the summary index page +- inactivation of popovers after Bootstrap Table events +- non-homogeneous status flags appearance + +## [0.12.5] -- 2019-12-13 +### Changed +- reduce verbosity of missing options; [Issue 174](https://github.com/pepkit/looper/issues/174) +- switch to [Bootstrap Table](https://bootstrap-table.com/) in the summary index page table and sample status tables + +## [0.12.4] -- 2019-07-18 +### Added +- Ability to declare `required_executables` in a `PipelineInterface`, to trigger a naive "runnability" check for a sample submission +- A possibility to opt out of status page inclusion in the navbar + +### Changed +- The status tables now use DataTables jQuery plugin to make them interactive + +### Fixed +- Navbar links creation + +## [0.12.3] -- 2019-06-20 +### Fixed +- Bug in `Sample` YAML naming, whereby a base `Sample` was being suffixed as a subtype would be, leading to a pipeline argument based on `yaml_file` that did not exist on disk. + +## [0.12.2] -- 2019-06-06 + +### Fixed +- Fixed various bugs related to populating derived attributes, including using attributes like `sample_name` as keys. +- Fixed a bug related to singularity attributes not being passed from a pipeline interface file. +- Fixed several bugs with incorrect version requirements. + +## [0.12.1] -- 2019-05-20 + +### Added +- Made `looper.Sample` include more specific functionality from `peppy` + +### Changed +- Status table creation is possible outside of `looper`. +- In the summary index page the plottable columns list is now scrollable +- Status page relies on the `profile.tsv` file rather than `*.log`; [Issue 159](https://github.com/pepkit/looper/issues/159) + +### Fixed +- In HTML reporting module, do not ignore objects which are neither HTMLs nor images in the summary, e.g. CSVs +- Restore parsing and application of pipeline-level computing resource specification from a pipeline interface file; [Issue 184](https://github.com/pepkit/looper/issues/184) +- Allow `ignore_flags` to properly modulate submission messaging; [Issue 179](https://github.com/pepkit/looper/issues/179) +- Do not display time-like summary columns as the plottable ones; [Issue 182](https://github.com/pepkit/looper/issues/182) + +## [0.12.0] -- 2019-05-03 + +### Added +- First implementation of pipeline interface 'outputs', so pipeline authors can specify items of interest produced by the pipeline. +- Functions and attributes on `Project` to support "outputs" (`interfaces`, `get_interfaces`, `get_outputs`) + +### Changed +- Start "compute" --> "compute_packages" transition +- `get_logger` moved to `peppy` + +### Fixed +- Prevent CLI option duplication in pipeline commands generated +- Make functional CLI spec of particular attribute on which to base selection of a subset of a project's samples ([`peppy` 298](https://github.com/pepkit/peppy/issues/298)) + +## [0.11.1] -- 2019-04-17 + +### Changed +- Improved documentation +- Improved interaction with `peppy` and `divvy` dependencies + +## [0.11] -- 2019-04-17 + +### Added +- Implemented `looper rerun` command. +- Support use of custom `resources` in pipeline's `compute` section +- Listen for itemized compute resource specification on command-line with `--resources` +- Support pointing to `Project` config file with folder path rather than full filepath +- Add `selector-attribute` parameter for more generic sample selection. + +### Changed +- Switched to a Jinja-style templating system for summary output +- Made various UI changes to adapt to `caravel` use. +- Using `attmap` for "attribute-style key-vale store" implementation +- Removed Python 3.4 support. +- UI: change parameter names `in/exclude-samples` to `selector-in/exclude`. + +## [0.10.0] -- 2018-12-20 + +### Changed +- `PipelineInterface` now derives from `peppy.AttributeDict`. +- On `PipelineInterface`, iteration over pipelines now is with `iterpipes`. +- Rename `parse_arguments` to `build_parser`, which returns `argparse.ArgumentParser` object +- Integers in HTML reports are made more human-readable by including commas. +- Column headers in HTML reports are now stricly for sorting; there's a separate list for plottable columns. +- More informative error messages +- HTML samples list is fully populated. +- Existence of an object lacking an anchor image is no longer problematic for `summarize`. +- Basic package test in Python 3 now succeeds: `python3 setup.py test`. + +## [v0.9.2] -- 2018-11-12 + +### Changed +- Fixed bugs with `looper summarize` when no summarizers were present +- Added CLI flag to force `looper destroy` for programmatic access +- Fixed a bug for samples with duplicate names +- Added new display features (graphs, table display) for HTML summary output. + + +## [0.9.1] -- 2018-06-30 + +### Changed +- Fixed several bugs with `looper summarize` that caused failure on edge cases. + +## [0.9.0] -- 2018-06-25 + +### Added +- Support for custom summarizers +- Add `allow-duplicate-names` command-line options +- Allow any variables in environment config files or other `compute` sections to be used in submission templates. This allows looper to be used with containers. +- Add nice universal project-level HTML reporting + +## [0.8.1] -- 2018-04-02 + +### Changed +- Minor documentation and packaging updates for first Pypi release. +- Fix a bug that incorrectly mapped protocols due to case sensitive issues +- Fix a bug with `report_figure` that made it output pandas code + + +## [0.8.0] -- 2018-01-19 + +### Changed +- Use independent `peppy` package, replacing `models` module for core data types. +- Integrate `ProtocolInterface` functionality into `PipelineInterface`. + +## [0.7.2] -- 2017-11-16 +### Changed +- Correctly count successful command submissions when not using `--dry-run`. + +## [0.7.1] -- 2017-11-15 + +### Changed +- No longer falsely display that there's a submission failure. +- Allow non-string values to be unquoted in the `pipeline_args` section. + +## [0.7] -- 2017-11-15 +### Added +- Add `--lump` and `--lumpn` options +- Catch submission errors from cluster resource managers +- Implied columns can now be derived +- Now protocols can be specified on the command-line `--include-protocols` +- Add rudimentary figure summaries +- Simplifies command-line help display +- Allow wildcard protocol_mapping for catch-all pipeline assignment +- Improve user messages +- New sample_subtypes section in pipeline_interface + +### Changed +- Sample child classes are now defined explicitly in the pipeline interface. Previously, they were guessed based on presence of a class extending Sample in a pipeline script. +- Changed 'library' key sample attribute to 'protocol' + +## [0.6] -- 2017-07-21 +### Added + - Add support for implied_column section of the project config file + - Add support for Python 3 + - Merges pipeline interface and protocol mappings. This means we now allow direct pointers to `pipeline_interface.yaml` files, increasing flexibility, so this relaxes the specified folder structure that was previously used for `pipelines_dir` (with `config` subfolder). + - Allow URLs as paths to sample sheets. + - Allow tsv format for sample sheets. + - Checks that the path to a pipeline actually exists before writing the submission script. + +### Changed +- Changed LOOPERENV environment variable to PEPENV, generalizing it to generic models +- Changed name of `pipelines_dir` to `pipeline_interfaces` (but maintained backwards compatibility for now). +- Changed name of `run` column to `toggle`, since `run` can also refer to a sequencing run. +- Relaxes many constraints (like resources sections, pipelines_dir columns), making project configuration files useful outside looper. This moves us closer to dividing models from looper, and improves flexibility. +- Various small bug fixes and dev improvements. +- Require `setuptools` for installation, and `pandas 0.20.2`. If `numexpr` is installed, version `2.6.2` is required. +- Allows tilde in `pipeline_interfaces` + +## [0.5] -- 2017-03-01 +### Added +- Add new looper version tracking, with `--version` and `-V` options and printing version at runtime +- Add support for asterisks in file paths +- Add support for multiple pipeline directories in priority order +- Revamp of messages make more intuitive output +- Colorize output +- Complete rehaul of logging and test infrastructure, using logging and pytest packages + +### Changed +- Removes pipelines_dir requirement for models, making it useful outside looper +- Small bug fixes related to `all_input_files` and `required_input_files` attributes +- More robust installation and more explicit requirement of Python 2.7 + + +## [0.4] -- 2017-01-12 +### Added +- New command-line interface (CLI) based on sub-commands +- New subcommand (`looper summarize`) replacing the `summarizePipelineStats.R` script +- New subcommand (`looper check`) replacing the `flagCheck.sh` script +- New command (`looper destroy`) to remove all output of a project +- New command (`looper clean`) to remove intermediate files of a project flagged for deletion +- Support for portable and pipeline-independent allocation of computing resources with Looperenv. + +### Changed +- Removed requirement to have `pipelines` repository installed in order to extend base Sample objects +- Maintenance of sample attributes as provided by user by means of reading them in as strings (to be improved further) +- Improved serialization of Sample objects diff --git a/tests/conftest.py b/tests/conftest.py index dcba16693..a64446e45 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -11,7 +11,6 @@ - ./tests/scripts/test-integration.sh # Integration tests via script """ -import pytest # Register custom markers diff --git a/tests/integration/conftest.py b/tests/integration/conftest.py index e636d5361..d0eb7b315 100644 --- a/tests/integration/conftest.py +++ b/tests/integration/conftest.py @@ -4,14 +4,14 @@ import shutil import socket from contextlib import contextmanager +from shutil import copyfile from typing import Iterable import peppy import pytest -from shutil import copyfile from yaml import dump, safe_load -from looper.const import LOOPER_DOTFILE_NAME, OUTDIR_KEY, PIPESTAT_KEY +from looper.const import LOOPER_DOTFILE_NAME, OUTDIR_KEY # Skip all integration tests unless explicitly enabled diff --git a/tests/integration/test_cli_commands.py b/tests/integration/test_cli_commands.py index 5af0e0c24..5da535bcf 100644 --- a/tests/integration/test_cli_commands.py +++ b/tests/integration/test_cli_commands.py @@ -12,7 +12,6 @@ MisconfigurationException, PipestatConfigurationException, ) - from tests.integration.conftest import get_outdir, get_project_config_path diff --git a/tests/integration/test_cli_run.py b/tests/integration/test_cli_run.py index c163f90f8..9ee2c8ff8 100644 --- a/tests/integration/test_cli_run.py +++ b/tests/integration/test_cli_run.py @@ -9,7 +9,6 @@ from looper.exceptions import MisconfigurationException from looper.project import Project from looper.utils import is_PEP_file_type, is_pephub_registry_path - from tests.integration.conftest import ( assert_content_in_all_files, assert_content_not_in_any_files, diff --git a/tests/integration/test_comprehensive.py b/tests/integration/test_comprehensive.py index 0325fb817..4cdfc9bd9 100644 --- a/tests/integration/test_comprehensive.py +++ b/tests/integration/test_comprehensive.py @@ -9,10 +9,8 @@ from looper.cli_pydantic import main from looper.const import * from looper.utils import * - from tests.integration.conftest import get_project_config_path, is_connected - CMD_STRS = ["string", " --string", " --sjhsjd 212", "7867#$@#$cc@@"] From 42452f7cb2754e449d6f45977d6f0262553a895e Mon Sep 17 00:00:00 2001 From: nsheff Date: Wed, 25 Feb 2026 07:39:30 -0500 Subject: [PATCH 162/163] ruff --- tests/conftest.py | 1 - 1 file changed, 1 deletion(-) diff --git a/tests/conftest.py b/tests/conftest.py index a64446e45..f22f26dae 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -12,7 +12,6 @@ """ - # Register custom markers def pytest_configure(config): config.addinivalue_line( From e09daa5708856242df09ec2b8363d219ba2e9de3 Mon Sep 17 00:00:00 2001 From: nsheff Date: Wed, 25 Feb 2026 07:40:53 -0500 Subject: [PATCH 163/163] update changelog --- changelog.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/changelog.md b/changelog.md index 82e558f0d..c77934a9b 100644 --- a/changelog.md +++ b/changelog.md @@ -4,7 +4,7 @@ This project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.htm ## [2.1.0] -- 2026-02-25 ### Changed -- Migrated to yacman v1 API (`YAMLConfigManager.from_yaml_file()`, `write_lock`/`read_lock` context managers) +- Migrated to new yacman API (`YAMLConfigManager.from_yaml_file()`, `write_lock`/`read_lock` context managers); requires yacman >=0.9.5 - Migrated CLI configuration to pydantic-settings - Improved CLI startup time by deferring heavy imports to module level - Made signal handling thread-safe