diff --git a/.travis.yml b/.travis.yml new file mode 100644 index 00000000..ba3e475b --- /dev/null +++ b/.travis.yml @@ -0,0 +1,55 @@ +language: python +python: + - "3.6" + +install: + - sudo apt-get update +# - if [[ "$TRAVIS_PYTHON_VERSION" == "2.7" ]]; then +# wget https://repo.continuum.io/miniconda/Miniconda2-latest-Linux-x86_64.sh -O miniconda.sh; +# else +# wget https://repo.continuum.io/miniconda/Miniconda3-latest-Linux-x86_64.sh -O miniconda.sh; +# fi +# - bash miniconda.sh -b -p $HOME/miniconda +# - export PATH="$HOME/miniconda/bin:$PATH" +# - hash -r +# - conda config --set always_yes yes --set changeps1 no --set auto_activate_base False +# - conda update -q conda +# - conda init +# - source activate +# - conda info -a +# +# # - conda create -q -n test-environment python=$TRAVIS_PYTHON_VERSION +# # - source activate test-environment +# - conda config --add channels conda-forge +# - conda config --add channels bioconda +# - conda config --add channels anaconda +# - conda config --add channels workflowconversion +# +# - conda install python=$TRAVIS_PYTHON_VERSION +# # TODO could be replaced by conda package once it is merged and released +# - git clone -b topic/no-1-2x https://github.com/bernt-matthias/CTDopts $HOME/CTDopts +# - cd $HOME/CTDopts +# - python setup.py install +# - conda install -c conda-forge lxml +# - conda install -c conda-forge ruamel.yaml +# - conda install libxml2 +# - cd $TRAVIS_BUILD_DIR +# - python setup.py install +# - conda install coverage green codecov + +# - virtualenv .venv +# - . .venv/bin/activate + - pip install git+https://github.com/WorkflowConversion/CTDopts + - pip install lxml ruamel.yaml planemo + - cd $TRAVIS_BUILD_DIR + - python setup.py install +script: + - python setup.py test + - planemo l tests/test-data/ + + # planemo test content of tests/test-data (this is OK, because the previous + # tests ensure equality of the xmls that are generated and those in the package) + - export PATH=$(pwd)/tests/test-data:$PATH && planemo t tests/test-data/ + +# after_script: +# - python -m codecov diff --git a/README.md b/README.md index d1852944..8b3c5db5 100644 --- a/README.md +++ b/README.md @@ -123,17 +123,71 @@ Several inputs are given. The output is the already existent folder, `/data/wrap Please note that the output file name is **not** taken from the name of the input file, rather from the name of the tool, that is, from the `name` attribute in the `` element in its corresponding CTD. By convention, the name of the CTD file and the name of the tool match. -### Blacklisting Parameters -* Purpose: Some parameters present in the CTD are not to be exposed on the output files. Think of parameters such as `--help`, `--debug` that might won't make much sense to be exposed to final users in a workflow management system. -* Short/long version: `-b` / `--blacklist-parameters` +### Exclusion, hardcoding, and modification of Parameters +* Purpose: Some parameters present in the CTD are not to be exposed on the output files (e.g. parameters such as `--help`, `--debug` that might won't make much sense to be exposed to users in a workflow management system), other parameters should be hardcoded (i.e. parameters that should not be exposed to the user but still set to a fixed value on the generated command line), and for other parameters it might be necessary to modify attributes of the input CTD or the generated output. +* Short/long version: `-p` / `--hardcoded-parameters` * Required: no. -* Taken values: A list of parameters to be blacklisted. +* Taken values: A json file defining: exclusion, hardcoded, and modifications of parameters Example: - $ pythonconvert.py [FORMAT] ... -b h help quiet - -In this case, `CTDConverter` will not process any of the parameters named `h`, `help`, or `quiet`, that is, they will not appear in the generated output files. + $ pythonconvert.py [FORMAT] ... -p JSON_FILE + +The json defines a mapping from parameter names to a list of modifications: + +``` +{ + "parameter1": [MODIFICATION1, ...], + "parameter2": [MODIFICATION1, ...] + ... +} +``` + +where each modification is a mapping as defined below. + + +#### Hardcoding parameters + +If a parameter should always be set on the command line using a fixed value, +i.e. the user can to choose the value, this can be done as follows: + +`"parameter": [{"value":"HARDCODED_VALUE"}]` + +#### Excluding parameters + +In order to exclude a parameter, it will not appear in the generated tool or +the generated command line, the same syntax as for hardcoding is used but a +special reserved value is used: + +`"parameter": [{"value": "@"}]` + +#### Modifying parameters + +It's possible to modify attributes of the input CTD definition of a parameter +as well as attributes of the generated Galaxy XML tags. + +``` + "test": [{ + "CTD:type": "text", + "XML:type": "hidden" + }], +``` + +### Restricting modifications to a subset of the tools + +Its possible to specify modifications to a parameter for only a subset of the tools +by specifying a list of tools as follows: + +``` + "output_files": [{ + "CTD:required": true, + "tools": ["OpenSwathDIAPreScoring"] + }, { + "CTD:restrictions": "txt,tsv,pep.xml,pepXML,html", + "tools": ["SpectraSTSearchAdapter"] + + }] +``` ### Schema Validation * Purpose: Provide validation of input CTDs against a schema file (i.e, a XSD file). @@ -147,26 +201,6 @@ If a schema is provided, all input CTDs will be validated against it. **NOTE:** Please make sure to read the [section on issues with schema validation](#issues-with-libxml2-and-schema-validation) if you require validation of CTDs against a schema. -### Hardcoding Parameters -* Purpose: Fixing the value of a parameter and hide it from the end user. -* Short/long version: `-p` / `--hardcoded-parameters` -* Required: no. -* Taken values: The path of a file containing the mapping between parameter names and hardcoded values to use. - -It is sometimes required that parameters are hidden from the end user in workflow systems and that they take a predetermined, fixed value. Allowing end users to control parameters similar to `--verbosity`, `--threads`, etc., might create more problems than solving them. For this purpose, the parameter `-p`/`--hardcoded-parameters` takes the path of a file that contains up to three columns separated by whitespace that map parameter names to the hardcoded value. The first column contains the name of the parameter and the second one the hardcoded value. Only the first two columns are mandatory. - -If the parameter is to be hardcoded only for certain tools, a third column containing a comma separated list of tool names for which the hardcoding will apply can be added. - -Lines starting with `#` will be ignored. The following is an example of a valid file: - - # Parameter name # Value # Tool(s) - threads 8 - mode quiet - xtandem_executable xtandem XTandemAdapter - verbosity high Foo, Bar - -The parameters `threads` and `mode` will be set to `8` and `quiet`, respectively, for all parsed CTDs. However, the `xtandem_executable` parameter will be set to `xtandem` only for the `XTandemAdapter` tool. Similarly, the parameter `verbosity` will be set to `high` for the `Foo` and `Bar` tools only. - ### Providing a default executable Path * Purpose: Help workflow engines locate tools by providing a path. * Short/long version: `-x` / `--default-executable-path` @@ -178,7 +212,33 @@ CTDs can contain an `` element that will be used when executing The following invocation of the converter will use `/opt/suite/bin` as a prefix when providing the executable path in the output files for any input CTD that lacks the `` section: $ python convert.py [FORMAT] -x /opt/suite/bin ... - + +### Bump wrapper versions + +There are two ways to bump tool versions. + +- Definition of a `@GALAXY_VERSION@` token in the macros file. This can be used to bump all tools at once. Tool versions will be `@TOOL_VERSION@+galaxy@GALAXY_VERSION@`. +- Use the `--bump-file` parameter to specify the wrapper version of a subset of the tools in a json file that maps tool names/ids to a wrapper version. Tool version will be set to `@TOOL_VERSION@+galaxyX`, where `X` is the version found in the json file or `0` if not found. + +In case of an update of the tool version, i.e. `@TOOL_VERSION@`, in the first case `@GALAXY_VERSION@` should be reset to 0 and the dictionary in the bump file should be emptied otherwise. + +Rationale: the auto-generation of the tool xml files would overwrite the +wrapper version when regenerated. Hence it needs to be specified externally, +e.g. in the macros.xml or in the bump file. + +### Tests + +Tests for Galaxy tools are generated with: + +``` +PATH=$(pwd)/tests/test-data/:$PATH +for i in tests/test-data/*ctd +do +b=$(basename $i .ctd) +python convert.py galaxy -i tests/test-data/$b.ctd -o tests/test-data/$b.xml -m tests/test-data/macros.xml -f tests/test-data/filetypes.txt --test-test -p tests/test-data/hardcoded_params.json --tool-version 5.0.011 +done +``` + [CTDopts]: https://github.com/genericworkflownodes/CTDopts [CTDSchema]: https://github.com/WorkflowConversion/CTDSchema diff --git a/common/exceptions.py b/common/exceptions.py index ef398d30..315b4a33 100644 --- a/common/exceptions.py +++ b/common/exceptions.py @@ -42,4 +42,4 @@ def __str__(self): return self.msg def __unicode__(self): - return self.msg \ No newline at end of file + return self.msg diff --git a/common/utils.py b/common/utils.py index b5b0b1f3..481d30b2 100644 --- a/common/utils.py +++ b/common/utils.py @@ -1,14 +1,16 @@ #!/usr/bin/env python # encoding: utf-8 +from functools import reduce # forward compatibility for Python 3 +import json import ntpath +import operator import os from lxml import etree -from string import strip -from logger import info, error, warning +from common import logger from common.exceptions import ApplicationException -from CTDopts.CTDopts import CTDModel, ParameterGroup +from CTDopts.CTDopts import _InFile, _OutFile, CTDModel, ParameterGroup, Parameters, Parameter, ModelTypeError MESSAGE_INDENTATION_INCREMENT = 2 @@ -34,17 +36,55 @@ def __init__(self): # XtandemAdapter#adapter -> xtandem.exe # adapter -> adapter.exe self.separator = "!" + + # hard coded values self.parameter_map = {} + # ctd/xml attributes to overwrite + self.attribute_map = {'CTD': {}, 'XML': {}} + + # blacklisted parameters + self.blacklist = set() + + def register_blacklist(self, parameter_name, tool_name): + k = self.build_key(parameter_name, tool_name) + self.blacklist.add(k) + # the most specific value will be returned in case of overlap - def get_hardcoded_value(self, parameter_name, tool_name): + def get_blacklist(self, parameter_name, tool_name): # look for the value that would apply for all tools - generic_value = self.parameter_map.get(parameter_name, None) - specific_value = self.parameter_map.get(self.build_key(parameter_name, tool_name), None) - if specific_value is not None: - return specific_value + if self.build_key(parameter_name, tool_name) in self.blacklist: + return True + elif parameter_name in self.blacklist: + return True + else: + return False - return generic_value + def register_attribute(self, parameter_name, attribute, value, tool_name): + tpe, attribute = attribute.split(':') + if tpe not in ['CTD', 'XML']: + raise Exception('Attribute hardcoder not in CTD/XML') + + k = self.build_key(parameter_name, tool_name) + if k not in self.attribute_map[tpe]: + self.attribute_map[tpe][k] = {} + self.attribute_map[tpe][k][attribute] = value + + # the most specific value will be returned in case of overlap + def get_hardcoded_attributes(self, parameter_name, tool_name, tpe): + # look for the value that would apply for all tools + try: + return self.attribute_map[tpe][self.build_key(parameter_name, tool_name)] + except KeyError: + return self.attribute_map[tpe].get(parameter_name, None) + + # the most specific value will be returned in case of overlap + def get_hardcoded_value(self, parameter_name, tool_name): + # look for the value that would apply for all tools + try: + return self.parameter_map[self.build_key(parameter_name, tool_name)] + except KeyError: + return self.parameter_map.get(parameter_name, None) def register_parameter(self, parameter_name, parameter_value, tool_name=None): self.parameter_map[self.build_key(parameter_name, tool_name)] = parameter_value @@ -73,12 +113,15 @@ def validate_argument_is_valid_path(args, argument_name): if member_value is not None: if isinstance(member_value, list): for file_name in member_value: - paths_to_check.append(strip(str(file_name))) + paths_to_check.append(str(file_name).strip()) else: - paths_to_check.append(strip(str(member_value))) + paths_to_check.append(str(member_value).strip()) for path_to_check in paths_to_check: - validate_path_exists(path_to_check) + try: + validate_path_exists(path_to_check) + except ApplicationException: + raise ApplicationException("Argument %s: The provided output file name (%s) points to a directory." % (argument_name, path_to_check)) # taken from @@ -99,12 +142,12 @@ def parse_input_ctds(xsd_location, input_ctds, output_destination, output_file_e schema = None if xsd_location is not None: try: - info("Loading validation schema from %s" % xsd_location, 0) + logger.info("Loading validation schema from %s" % xsd_location, 0) schema = etree.XMLSchema(etree.parse(xsd_location)) - except Exception, e: - error("Could not load validation schema %s. Reason: %s" % (xsd_location, str(e)), 0) + except Exception as e: + logger.error("Could not load validation schema %s. Reason: %s" % (xsd_location, str(e)), 0) else: - warning("Validation against a schema has not been enabled.", 0) + logger.warning("Validation against a schema has not been enabled.", 0) for input_ctd in input_ctds: if schema is not None: @@ -114,8 +157,20 @@ def parse_input_ctds(xsd_location, input_ctds, output_destination, output_file_e # if multiple inputs are being converted, we need to generate a different output_file for each input if is_converting_multiple_ctds: output_file = os.path.join(output_file, get_filename_without_suffix(input_ctd) + "." + output_file_extension) - info("Parsing %s" % input_ctd) - parsed_ctds.append(ParsedCTD(CTDModel(from_file=input_ctd), input_ctd, output_file)) + logger.info("Parsing %s" % input_ctd) + + model = None + try: + model = CTDModel(from_file=input_ctd) + except ModelTypeError: + pass + try: + model = Parameters(from_file=input_ctd) + except ModelTypeError: + pass + assert model is not None, "Could not parse %s, seems to be no CTD/PARAMS" % (input_ctd) + + parsed_ctds.append(ParsedCTD(model, input_ctd, output_file)) return parsed_ctds @@ -128,7 +183,7 @@ def validate_against_schema(ctd_file, schema): try: parser = etree.XMLParser(schema=schema) etree.parse(ctd_file, parser=parser) - except etree.XMLSyntaxError, e: + except etree.XMLSyntaxError as e: raise ApplicationException("Invalid CTD file %s. Reason: %s" % (ctd_file, str(e))) @@ -143,10 +198,6 @@ def add_common_parameters(parser, version, last_updated): parser.add_argument("-x", "--default-executable-path", dest="default_executable_path", help="Use this executable path when is not present in the CTD", default=None, required=False) - parser.add_argument("-b", "--blacklist-parameters", dest="blacklisted_parameters", default=[], nargs="+", - action="append", - help="List of parameters that will be ignored and won't appear on the galaxy stub", - required=False) parser.add_argument("-p", "--hardcoded-parameters", dest="hardcoded_parameters", default=None, required=False, help="File containing hardcoded values for the given parameters. Run with '-h' or '--help' " "to see a brief example on the format of this file.") @@ -163,32 +214,39 @@ def add_common_parameters(parser, version, last_updated): def parse_hardcoded_parameters(hardcoded_parameters_file): parameter_hardcoder = ParameterHardcoder() - if hardcoded_parameters_file is not None: - line_number = 0 - with open(hardcoded_parameters_file) as f: - for line in f: - line_number += 1 - if line is None or not line.strip() or line.strip().startswith("#"): - pass - else: - # the third column must not be obtained as a whole, and not split - parsed_hardcoded_parameter = [ _ for _ in line.strip().split("\t") if _ != ""] - # valid lines contain two or three columns - if not (2 <= len(parsed_hardcoded_parameter) <= 3): - warning("Invalid line at line number %d of the given hardcoded parameters file. Line will be" - "ignored:\n%s" % (line_number, line), 0) - continue - - parameter_name = parsed_hardcoded_parameter[0] - hardcoded_value = parsed_hardcoded_parameter[1] - tool_names = None - if len(parsed_hardcoded_parameter) == 3: - tool_names = parsed_hardcoded_parameter[2].split(',') - if tool_names: - for tool_name in tool_names: - parameter_hardcoder.register_parameter(parameter_name, hardcoded_value, tool_name.strip()) + if hardcoded_parameters_file is None: + return parameter_hardcoder + with open(hardcoded_parameters_file) as fp: + data = json.load(fp) + + for parameter_name in data: + if parameter_name == "#": + continue + for el in data[parameter_name]: + hardcoded_value = el.get("value", None) + tool_names = el.get("tools", [None]) + for tool_name in tool_names: + if tool_name is not None: + tool_name = tool_name.strip() + + # hardcoded / blacklisted: + # - blacklisted: if value is @ + # - hardcoded: otherwise + if hardcoded_value is not None: + if hardcoded_value == '@': + parameter_hardcoder.register_blacklist(parameter_name, tool_name) else: - parameter_hardcoder.register_parameter(parameter_name, hardcoded_value) + parameter_hardcoder.register_parameter(parameter_name, hardcoded_value, tool_name) + else: + for a in el: + if a in ["tools", "value"]: + continue + if el[a] == "output-file": + el[a] = _OutFile + if el[a] == "input-file": + el[a] = _InFile + + parameter_hardcoder.register_attribute(parameter_name, a, el[a], tool_name) return parameter_hardcoder @@ -205,7 +263,9 @@ def extract_tool_help_text(ctd_model): if manual is not None: help_text = manual if doc_url is not None: - help_text = ("" if manual is None else manual) + "\nFor more information, visit %s" % doc_url + help_text = ("" if manual is None else manual) + if doc_url != "": + help_text += "\nFor more information, visit %s" % doc_url return help_text @@ -246,73 +306,141 @@ def extract_tool_executable_path(model, default_executable_path): return command -def extract_and_flatten_parameters(ctd_model): - parameters = [] - if len(ctd_model.parameters.parameters) > 0: - # use this to put parameters that are to be processed - # we know that CTDModel has one parent ParameterGroup - pending = [ctd_model.parameters] - while len(pending) > 0: - # take one element from 'pending' - parameter = pending.pop() - if type(parameter) is not ParameterGroup: - parameters.append(parameter) - else: - # append the first-level children of this ParameterGroup - pending.extend(parameter.parameters.values()) - # returned the reversed list of parameters (as it is now, - # we have the last parameter in the CTD as first in the list) - return reversed(parameters) +def _extract_and_flatten_parameters(parameter_group, nodes=False): + """ + get the parameters of a OptionGroup as generator + """ + for parameter in parameter_group.values(): + if type(parameter) is Parameter: + yield parameter + else: + if nodes: + yield parameter + for p in _extract_and_flatten_parameters(parameter.parameters, nodes): + yield p + + +def extract_and_flatten_parameters(ctd_model, nodes=False): + """ + get the parameters of a CTD as generator + """ + if type(ctd_model) is CTDModel: + return _extract_and_flatten_parameters(ctd_model.parameters.parameters, nodes) + else: + return _extract_and_flatten_parameters(ctd_model.parameters, nodes) + +# names = [_.name for _ in ctd_model.parameters.values()] +# if names == ["version", "1"]: +# return _extract_and_flatten_parameters(ctd_model.parameters.parameters["1"], nodes) +# else: +# return _extract_and_flatten_parameters(ctd_model, nodes) + +# for parameter in ctd_model.parameters.parameters: +# if type(parameter) is not ParameterGroup: +# yield parameter +# else: +# for p in extract_and_flatten_parameters(parameter): +# yield p + +# parameters = [] +# if len(ctd_model.parameters.parameters) > 0: +# # use this to put parameters that are to be processed +# # we know that CTDModel has one parent ParameterGroup +# pending = [ctd_model.parameters] +# while len(pending) > 0: +# # take one element from 'pending' +# parameter = pending.pop() +# if type(parameter) is not ParameterGroup: +# parameters.append(parameter) +# else: +# # append the first-level children of this ParameterGroup +# pending.extend(parameter.parameters.values()) +# # returned the reversed list of parameters (as it is now, +# # we have the last parameter in the CTD as first in the list) +# return reversed(parameters) # some parameters are mapped to command line options, this method helps resolve those mappings, if any -def resolve_param_mapping(param, ctd_model): +def resolve_param_mapping(param, ctd_model, fix_underscore=False): # go through all mappings and find if the given param appears as a reference name in a mapping element param_mapping = None - for cli_element in ctd_model.cli: + ctd_model_cli = [] + if hasattr(ctd_model, "cli"): + ctd_model_cli = ctd_model.cli + + for cli_element in ctd_model_cli: for mapping_element in cli_element.mappings: if mapping_element.reference_name == param.name: if param_mapping is not None: - warning("The parameter %s has more than one mapping in the section. " - "The first found mapping, %s, will be used." % (param.name, param_mapping), 1) + logger.warning("The parameter %s has more than one mapping in the section. " + "The first found mapping, %s, will be used." % (param.name, param_mapping), 1) else: param_mapping = cli_element.option_identifier - - return param_mapping if param_mapping is not None else param.name - - -def _extract_param_cli_name(param, ctd_model): - # we generate parameters with colons for subgroups, but not for the two topmost parents (OpenMS legacy) - if type(param.parent) == ParameterGroup: - if not hasattr(param.parent.parent, 'parent'): - return resolve_param_mapping(param, ctd_model) - elif not hasattr(param.parent.parent.parent, 'parent'): - return resolve_param_mapping(param, ctd_model) - else: - if ctd_model.cli: - warning("Using nested parameter sections (NODE elements) is not compatible with ", 1) - return extract_param_name(param.parent) + ":" + resolve_param_mapping(param, ctd_model) + if param_mapping is not None: + ret = param_mapping + else: + ret = param.name + if fix_underscore and ret.startswith("_"): + return ret[1:] else: - return resolve_param_mapping(param, ctd_model) + return ret -def extract_param_name(param): +def _extract_param_cli_name(param, ctd_model, fix_underscore=False): # we generate parameters with colons for subgroups, but not for the two topmost parents (OpenMS legacy) if type(param.parent) == ParameterGroup: - if not hasattr(param.parent.parent, "parent"): - return param.name - elif not hasattr(param.parent.parent.parent, "parent"): - return param.name - else: - return extract_param_name(param.parent) + ":" + param.name + if hasattr(ctd_model, "cli") and ctd_model.cli: + logger.warning("Using nested parameter sections (NODE elements) is not compatible with ", 1) + return ":".join(extract_param_path(param, fix_underscore)[:-1]) + ":" + resolve_param_mapping(param, ctd_model, fix_underscore) else: - return param.name + return resolve_param_mapping(param, ctd_model, fix_underscore) + + +def extract_param_path(param, fix_underscore=False): + pl = param.get_lineage(name_only=True) + if fix_underscore: + for i, p in enumerate(pl): + if p.startswith("_"): + pl[i] = pl[i][1:] + return pl +# if type(param.parent) == ParameterGroup or type(param.parent) == Parameters: +# if not hasattr(param.parent.parent, "parent"): +# return [param.name] +# elif not hasattr(param.parent.parent.parent, "parent"): +# return [param.name] +# else: +# return extract_param_path(param.parent) + [param.name] +# else: +# return [param.name] + + +def extract_param_name(param, fix_underscore=False): + # we generate parameters with colons for subgroups, but not for the two topmost parents (OpenMS legacy) + return ":".join(extract_param_path(param, fix_underscore)) def extract_command_line_prefix(param, ctd_model): - param_name = extract_param_name(param) - param_cli_name = _extract_param_cli_name(param, ctd_model) + param_name = extract_param_name(param, True) + param_cli_name = _extract_param_cli_name(param, ctd_model, True) if param_name == param_cli_name: # there was no mapping, so for the cli name we will use a '-' in the prefix param_cli_name = "-" + param_name return param_cli_name + + +def indent(s, indentation=" "): + """ + helper function to indent text + @param s the text (a string) + @param indentation the desired indentation + @return indented text + """ + return [indentation + _ for _ in s] + + +def getFromDict(dataDict, mapList): + return reduce(operator.getitem, mapList, dataDict) + + +def setInDict(dataDict, mapList, value): + getFromDict(dataDict, mapList[:-1])[mapList[-1]] = value diff --git a/convert.py b/convert.py index 4442a44b..84ed1204 100644 --- a/convert.py +++ b/convert.py @@ -16,7 +16,7 @@ program_build_date = str(__updated__) program_version_message = '%%(prog)s %s (%s)' % (program_version, program_build_date) program_short_description = "CTDConverter - A project from the WorkflowConversion family " \ - "(https://github.com/WorkflowConversion/CTDConverter)" + "(https://github.com/WorkflowConversion/CTDConverter)" program_usage = ''' USAGE: @@ -38,109 +38,7 @@ $ python converter.py [FORMAT] -i [INPUT_FILES] -o [OUTPUT_DIRECTORY] -III - Hardcoding parameters - - It is possible to hardcode parameters. This makes sense if you want to set a tool in 'quiet' mode or if your tools - support multi-threading and accept the number of threads via a parameter, without giving end users the chance to - change the values for these parameters. - - In order to generate hardcoded parameters, you need to provide a simple file. Each line of this file contains - two or three columns separated by tabs. Any line starting with a '#' will be ignored. The first column contains - the name of the parameter, the second column contains the value that will always be set for this parameter. Only the - first two columns are mandatory. - - If the parameter is to be hardcoded only for a set of tools, then a third column can be added. This column contains - a comma-separated list of tool names for which the parameter will be hardcoded. If a third column is not present, - then all processed tools containing the given parameter will get a hardcoded value for it. - - The following is an example of a valid file: - - ##################################### HARDCODED PARAMETERS example ##################################### - # Every line starting with a # will be handled as a comment and will not be parsed. - # The first column is the name of the parameter and the second column is the value that will be used. - - # Parameter name # Value # Tool(s) - threads 8 - mode quiet - xtandem_executable xtandem XTandemAdapter - verbosity high Foo, Bar - - ######################################################################################################### - - Using the above file will produce a command-line similar to: - - [TOOL] ... -threads 8 -mode quiet ... - - for all tools. For XTandemAdapter, however, the command-line will look like: - - XtandemAdapter ... -threads 8 -mode quiet -xtandem_executable xtandem ... - - And for tools Foo and Bar, the command-line will be similar to: - - Foo -threads 8 -mode quiet -verbosity high ... - - - IV - Engine-specific parameters - - i - Galaxy - - a. Providing file formats, mimetypes - - Galaxy supports the concept of file format in order to connect compatible ports, that is, input ports of a - certain data format will be able to receive data from a port from the same format. This converter allows you - to provide a personalized file in which you can relate the CTD data formats with supported Galaxy data formats. - The layout of this file consists of lines, each of either one or four columns separated by any amount of - whitespace. The content of each column is as follows: - - * 1st column: file extension - * 2nd column: data type, as listed in Galaxy - * 3rd column: full-named Galaxy data type, as it will appear on datatypes_conf.xml - * 4th column: mimetype (optional) - - The following is an example of a valid "file formats" file: - - ########################################## FILE FORMATS example ########################################## - # Every line starting with a # will be handled as a comment and will not be parsed. - # The first column is the file format as given in the CTD and second column is the Galaxy data format. The - # second, third, fourth and fifth columns can be left empty if the data type has already been registered - # in Galaxy, otherwise, all but the mimetype must be provided. - - # CTD type # Galaxy type # Long Galaxy data type # Mimetype - csv tabular galaxy.datatypes.data:Text - fasta - ini txt galaxy.datatypes.data:Text - txt - idxml txt galaxy.datatypes.xml:GenericXml application/xml - options txt galaxy.datatypes.data:Text - grid grid galaxy.datatypes.data:Grid - ########################################################################################################## - - Note that each line consists precisely of either one, three or four columns. In the case of data types already - registered in Galaxy (such as fasta and txt in the above example), only the first column is needed. In the - case of data types that haven't been yet registered in Galaxy, the first three columns are needed - (mimetype is optional). - - For information about Galaxy data types and subclasses, see the following page: - https://wiki.galaxyproject.org/Admin/Datatypes/Adding%20Datatypes - - - b. Finer control over which tools will be converted - - Sometimes only a subset of CTDs needs to be converted. It is possible to either explicitly specify which tools - will be converted or which tools will not be converted. - - The value of the -s/--skip-tools parameter is a file in which each line will be interpreted as the name of a - tool that will not be converted. Conversely, the value of the -r/--required-tools is a file in which each line - will be interpreted as a tool that is required. Only one of these parameters can be specified at a given time. - - The format of both files is exactly the same. As stated before, each line will be interpreted as the name of a - tool. Any line starting with a '#' will be ignored. - - - ii - CWL - - There are, for now, no CWL-specific parameters or options. - +For more detailed help see README.md in the root folder as well as `galaxy/README.md` or `cwl/README.md`. ''' program_license = '''%(short_description)s @@ -173,10 +71,10 @@ def main(argv=None): # at this point we cannot parse the arguments, because each converter takes different arguments, meaning each # converter will register its own parameters after we've registered the basic ones... we have to do it old school if len(argv) < 2: - utils.error("Not enough arguments provided") - print("\nUsage: $ python convert.py [TARGET] [ARGUMENTS]\n\n" + - "Where:\n" + - " target: one of 'cwl' or 'galaxy'\n\n" + + utils.logger.error("Not enough arguments provided") + print("\nUsage: $ python convert.py [TARGET] [ARGUMENTS]\n\n" + "Where:\n" + " target: one of 'cwl' or 'galaxy'\n\n" "Run again using the -h/--help option to print more detailed help.\n") return 1 @@ -188,14 +86,14 @@ def main(argv=None): from cwl import converter elif target == 'galaxy': from galaxy import converter - elif target == '-h' or target == '--help' or target == '--h' or target == 'help': - print(program_license) - return 0 +# elif target == '-h' or target == '--help' or target == '--h' or target == 'help': +# print(program_license) +# return 0 else: - utils.error("Unrecognized target engine. Supported targets are 'cwl' and 'galaxy'.") + utils.logger.error("Unrecognized target engine. Supported targets are 'cwl' and 'galaxy'.") return 1 - utils.info("Using %s converter" % target) + utils.logger.info("Using %s converter" % target) try: # Setup argument parser @@ -222,28 +120,28 @@ def main(argv=None): print("Interrupted...") return 0 - except ApplicationException, e: + except ApplicationException as e: traceback.print_exc() - utils.error("CTDConverter could not complete the requested operation.", 0) - utils.error("Reason: " + e.msg, 0) + utils.logger.error("CTDConverter could not complete the requested operation.", 0) + utils.logger.error("Reason: " + e.msg, 0) return 1 - except ModelError, e: + except ModelError as e: traceback.print_exc() - utils.error("There seems to be a problem with one of your input CTDs.", 0) - utils.error("Reason: " + e.msg, 0) + utils.logger.error("There seems to be a problem with one of your input CTDs.", 0) + utils.logger.error("Reason: " + e.msg, 0) return 1 - except Exception, e: + except Exception as e: traceback.print_exc() - utils.error("CTDConverter could not complete the requested operation.", 0) - utils.error("Reason: " + e.msg, 0) + utils.logger.error("CTDConverter could not complete the requested operation.", 0) + utils.logger.error("Reason: " + e.msg, 0) return 2 def validate_and_prepare_common_arguments(args): # flatten lists of lists to a list containing elements - lists_to_flatten = ["input_files", "blacklisted_parameters"] + lists_to_flatten = ["input_files"] for list_to_flatten in lists_to_flatten: utils.flatten_list_of_lists(args, list_to_flatten) diff --git a/create_galaxy_tests.sh b/create_galaxy_tests.sh new file mode 100755 index 00000000..0d9279cc --- /dev/null +++ b/create_galaxy_tests.sh @@ -0,0 +1,3 @@ +conda create -y --quiet --override-channels --channel iuc --channel conda-forge --channel bioconda --channel defaults --name ctdopts-1.3 ctdopts=1.3 lxml + +python convert.py galaxy -i tests/test-data/*.ctd -o tests/test-data/ -m tests/test-data/macros.xml -f tests/test-data/filetypes.txt --test-test -p tests/test-data/hardcoded_params.json --tool-version 3.8 diff --git a/cwl/converter.py b/cwl/converter.py index 08fad015..587ccebd 100755 --- a/cwl/converter.py +++ b/cwl/converter.py @@ -11,7 +11,7 @@ import ruamel.yaml as yaml -from CTDopts.CTDopts import _InFile, _OutFile, ParameterGroup, _Choices, _NumericRange, _FileFormat, ModelError, _Null +from CTDopts.CTDopts import _InFile, _OutFile, _Choices, _Null from common import utils, logger # all cwl-related properties are defined here @@ -72,7 +72,7 @@ def convert_models(args, parsed_ctds): logger.info("Writing to %s" % utils.get_filename(output_file), 1) - stream = file(output_file, 'w') + stream = open(output_file, 'w') stream.write(CWL_SHEBANG + '\n\n') stream.write("# This CWL file was automatically generated using CTDConverter.\n") stream.write("# Visit https://github.com/WorkflowConversion/CTDConverter for more information.\n\n") diff --git a/galaxy/README.md b/galaxy/README.md index fa728103..56ab0998 100644 --- a/galaxy/README.md +++ b/galaxy/README.md @@ -77,6 +77,16 @@ There are some macros that are required, namely `stdio`, `requirements` and `adv Please note that the used macros files **must** be copied to your Galaxy installation on the same location in which you place the generated *ToolConfig* files, otherwise Galaxy will not be able to parse the generated *ToolConfig* files! +## Including additional Test Macros files +* Purpose: Include macros containing tests +* `--test-macros` and `--test-macros-prefix` +* Required: no. +* Taken values: List of paths and corresponding prefixes + +This allows to specify macro file(s) containing tests. The macros should be +named `` where `PREFIX` is the value specified +with `--test-macros-prefix`. + ## Generating a `datatypes_conf.xml` File * Purpose: Specify the destination of a generated `datatypes_conf.xml` file. * Short/long version: `-d` / `--datatypes-destination` @@ -113,27 +123,6 @@ Note that each line consists of either one, three or four columns. In the case o For information about Galaxy data types and subclasses, consult the following page: https://wiki.galaxyproject.org/Admin/Datatypes/Adding%20Datatypes -## Remarks about some of the *OpenMS* Tools -* Most of the tools can be generated automatically. However, some of the tools need some extra work (for now). -* The following adapters need to be changed, such that you provide the path to the executable: - * FidoAdapter (add `-exe fido` in the command tag, delete the `$param_exe` in the command tag, delete the parameter from the input list). - * MSGFPlusAdapter (add `-executable msgfplus.jar` in the command tag, delete the `$param_executable` in the command tag, delete the parameter from the input list). - * MyriMatchAdapter (add `-myrimatch_executable myrimatch` in the command tag, delete the `$param_myrimatch_executable` in the command tag, delete the parameter from the input list). - * OMSSAAdapter (add `-omssa_executable omssa` in the command tag, delete the `$param_omssa_executable` in the command tag, delete the parameter from the input list). - * PepNovoAdapter (add `-pepnovo_executable pepnovo` in the command tag, delete the `$param_pepnovo_executable` in the command tag, delete the parameter from the input list). - * XTandemAdapter (add `-xtandem_executable xtandem` in the command tag, delete the $param_xtandem_executable in the command tag, delete the parameter from the input list). - * To avoid the deletion in the inputs you can also add these parameters to the blacklist - - $ python convert.py galaxy -b exe executable myrimatch_excutable omssa_executable pepnovo_executable xtandem_executable - -* The following tools have multiple outputs (number of inputs = number of outputs) which is not yet supported in Galaxy-stable: - * SeedListGenerator - * SpecLibSearcher - * MapAlignerIdentification - * MapAlignerPoseClustering - * MapAlignerSpectrum - * MapAlignerRTTransformer - [CTDopts]: https://github.com/genericworkflownodes/CTDopts [macros.xml]: https://github.com/WorkflowConversion/CTDConverter/blob/master/galaxy/macros.xml [CTDSchema]: https://github.com/genericworkflownodes/CTDSchema \ No newline at end of file diff --git a/galaxy/converter.py b/galaxy/converter.py index 08c2f422..be0136cb 100755 --- a/galaxy/converter.py +++ b/galaxy/converter.py @@ -1,110 +1,185 @@ #!/usr/bin/env python # encoding: utf-8 +import json import os -import string +import os.path +import re +import sys from collections import OrderedDict -from string import strip +import copy from lxml import etree -from lxml.etree import SubElement, Element, ElementTree, ParseError, parse +from lxml.etree import CDATA, SubElement, Element, ElementTree, ParseError, parse, strip_elements from common import utils, logger from common.exceptions import ApplicationException, InvalidModelException -from CTDopts.CTDopts import _InFile, _OutFile, ParameterGroup, _Choices, _NumericRange, _FileFormat, ModelError, _Null +from CTDopts.CTDopts import _InFile, _OutFile, _OutPrefix, ParameterGroup, _Choices, _NumericRange, _FileFormat, ModelError, _Null -TYPE_TO_GALAXY_TYPE = {int: 'integer', float: 'float', str: 'text', bool: 'boolean', _InFile: 'data', - _OutFile: 'data', _Choices: 'select'} +# mapping to CTD types to Galaxy types +TYPE_TO_GALAXY_TYPE = {int: 'integer', float: 'float', str: 'text', bool: 'boolean', _InFile: 'txt', + _OutFile: 'txt', _Choices: 'select', _OutPrefix: 'output-prefix'} +GALAXY_TYPE_TO_TYPE = dict() +for k in TYPE_TO_GALAXY_TYPE: + GALAXY_TYPE_TO_TYPE[TYPE_TO_GALAXY_TYPE[k]] = k + STDIO_MACRO_NAME = "stdio" REQUIREMENTS_MACRO_NAME = "requirements" -ADVANCED_OPTIONS_MACRO_NAME = "advanced_options" +ADVANCED_OPTIONS_NAME = "adv_opts_" -REQUIRED_MACROS = [REQUIREMENTS_MACRO_NAME, STDIO_MACRO_NAME, ADVANCED_OPTIONS_MACRO_NAME] +REQUIRED_MACROS = [REQUIREMENTS_MACRO_NAME, STDIO_MACRO_NAME, ADVANCED_OPTIONS_NAME + "macro"] class ExitCode: def __init__(self, code_range="", level="", description=None): self.range = code_range self.level = level - self.description = description + self.description = description class DataType: - def __init__(self, extension, galaxy_extension=None, galaxy_type=None, mimetype=None): + def __init__(self, extension, galaxy_extension, composite=None): self.extension = extension self.galaxy_extension = galaxy_extension - self.galaxy_type = galaxy_type - self.mimetype = mimetype + self.composite = composite def add_specific_args(parser): + """ + add command line arguments specific for galaxy tool generation + @param parser an instance of ArgumentParser + """ parser.add_argument("-f", "--formats-file", dest="formats_file", help="File containing the supported file formats. Run with '-h' or '--help' to see a " "brief example on the layout of this file.", default=None, required=False) parser.add_argument("-a", "--add-to-command-line", dest="add_to_command_line", - help="Adds content to the command line", default="", required=False) + help="Adds content to the command line", default="", required=False) parser.add_argument("-d", "--datatypes-destination", dest="data_types_destination", - help="Specify the location of a datatypes_conf.xml to modify and add the registered " - "data types. If the provided destination does not exist, a new file will be created.", - default=None, required=False) + help="Specify the location of a datatypes_conf.xml to modify and add the registered " + "data types. If the provided destination does not exist, a new file will be created.", + default=None, required=False) parser.add_argument("-c", "--default-category", dest="default_category", default="DEFAULT", required=False, - help="Default category to use for tools lacking a category when generating tool_conf.xml") + help="Default category to use for tools lacking a category when generating tool_conf.xml") parser.add_argument("-t", "--tool-conf-destination", dest="tool_conf_destination", default=None, required=False, - help="Specify the location of an existing tool_conf.xml that will be modified to include " - "the converted tools. If the provided destination does not exist, a new file will" - "be created.") + help="Specify the location of an existing tool_conf.xml that will be modified to include " + "the converted tools. If the provided destination does not exist, a new file will" + "be created.") parser.add_argument("-g", "--galaxy-tool-path", dest="galaxy_tool_path", default=None, required=False, - help="The path that will be prepended to the file names when generating tool_conf.xml") + help="The path that will be prepended to the file names when generating tool_conf.xml") parser.add_argument("-r", "--required-tools", dest="required_tools_file", default=None, required=False, - help="Each line of the file will be interpreted as a tool name that needs translation. " - "Run with '-h' or '--help' to see a brief example on the format of this file.") + help="Each line of the file will be interpreted as a tool name that needs translation. " + "Run with '-h' or '--help' to see a brief example on the format of this file.") parser.add_argument("-s", "--skip-tools", dest="skip_tools_file", default=None, required=False, - help="File containing a list of tools for which a Galaxy stub will not be generated. " - "Run with '-h' or '--help' to see a brief example on the format of this file.") + help="File containing a list of tools for which a Galaxy stub will not be generated. " + "Run with '-h' or '--help' to see a brief example on the format of this file.") parser.add_argument("-m", "--macros", dest="macros_files", default=[], nargs="*", - action="append", required=None, help="Import the additional given file(s) as macros. " - "The macros stdio, requirements and advanced_options are " - "required. Please see galaxy/macros.xml for an example of a " - "valid macros file. All defined macros will be imported.") + action="append", required=None, help="Import the additional given file(s) as macros. " + "The macros stdio, requirements and advanced_options are " + "required. Please see galaxy/macros.xml for an example of a " + "valid macros file. All defined macros will be imported.") + parser.add_argument("--test-macros", dest="test_macros_files", default=[], nargs="*", + action="append", required=None, + help="Import tests from the files given file(s) as macros. " + "The macro names must end with the id of the tools") + parser.add_argument("--test-macros-prefix", dest="test_macros_prefix", default=[], nargs="*", + action="append", required=None, help="The prefix of the macro name in the corresponding trest macros file") + parser.add_argument("--test-test", dest="test_test", action='store_true', default=False, required=False, + help="Generate a simple test for the internal unit tests.") + + parser.add_argument("--test-only", dest="test_only", action='store_true', default=False, required=False, + help="Generate only the test section.") + parser.add_argument("--test-unsniffable", dest="test_unsniffable", nargs="+", default=[], required=False, + help="File extensions that can't be sniffed in Galaxy." + "Needs to be the OpenMS extensions (1st column in --formats-file)." + "For testdata with such extensions ftype will be set in the tes according to the file extension") + + parser.add_argument("--tool-version", dest="tool_version", required=False, default=None, + help="Tool version to use (if not given its extracted from the CTD)") + parser.add_argument("--tool-profile", dest="tool_profile", required=False, default=None, + help="Tool profile version to use (if not given its not set)") + parser.add_argument("--bump-file", dest="bump_file", required=False, + default=None, help="json file defining tool versions." + "tools not listed in the file default to 0." + "if not given @GALAXY_VERSION@ is used") + + +def modify_param_for_galaxy(param): + """ + some parameters need galaxy specific modifications + """ + if param.type is _InFile: + # if file default is given (happens for external applications and + # files for which the default is taken from share/OpenMS) set the + # parm to not required and remove the default (external applications + # need to be taken care by hardcoded values and the other cases + # are chosen automatically if not specified on the command line) + if param.required and not (param.default is None or type(param.default) is _Null): + logger.warning("Data parameter %s with default (%s)" % (param.name, param.default), 1) + param.required = False + param.default = _Null() + return param def convert_models(args, parsed_ctds): - # validate and prepare the passed arguments - validate_and_prepare_args(args) - - # extract the names of the macros and check that we have found the ones we need - macros_to_expand = parse_macros_files(args.macros_files) - - # parse the given supported file-formats file - supported_file_formats = parse_file_formats(args.formats_file) - - # parse the skip/required tools files - skip_tools = parse_tools_list_file(args.skip_tools_file) - required_tools = parse_tools_list_file(args.required_tools_file) - - _convert_internal(parsed_ctds, - supported_file_formats=supported_file_formats, - default_executable_path=args.default_executable_path, - add_to_command_line=args.add_to_command_line, - blacklisted_parameters=args.blacklisted_parameters, - required_tools=required_tools, - skip_tools=skip_tools, - macros_file_names=args.macros_files, - macros_to_expand=macros_to_expand, - parameter_hardcoder=args.parameter_hardcoder) - - # generation of galaxy stubs is ready... now, let's see if we need to generate a tool_conf.xml - if args.tool_conf_destination is not None: - generate_tool_conf(parsed_ctds, args.tool_conf_destination, - args.galaxy_tool_path, args.default_category) - - # generate datatypes_conf.xml - if args.data_types_destination is not None: - generate_data_type_conf(supported_file_formats, args.data_types_destination) + """ + main conversion function + + @param args command line arguments + @param parsed_ctds the ctds + """ + + # validate and prepare the passed arguments + validate_and_prepare_args(args, parsed_ctds[0].ctd_model) + + # parse the given supported file-formats file + supported_file_formats = parse_file_formats(args.formats_file) + + # extract the names of the macros and check that we have found the ones we need + macros_to_expand = parse_macros_files(args.macros_files, + tool_version=args.tool_version, + supported_file_types=supported_file_formats, + required_macros=REQUIRED_MACROS, + dont_expand=[ADVANCED_OPTIONS_NAME + "macro", "references", + "list_string_val", "list_string_san", + "list_float_valsan", "list_integer_valsan"]) + + bump = parse_bump_file(args.bump_file) + + check_test_macros(args.test_macros_files, args.test_macros_prefix, parsed_ctds) + + # parse the skip/required tools files + skip_tools = parse_tools_list_file(args.skip_tools_file) + required_tools = parse_tools_list_file(args.required_tools_file) + _convert_internal(parsed_ctds, + supported_file_formats=supported_file_formats, + default_executable_path=args.default_executable_path, + add_to_command_line=args.add_to_command_line, + required_tools=required_tools, + skip_tools=skip_tools, + macros_file_names=args.macros_files, + macros_to_expand=macros_to_expand, + parameter_hardcoder=args.parameter_hardcoder, + test_test=args.test_test, + test_only=args.test_only, + test_unsniffable=args.test_unsniffable, + test_macros_file_names=args.test_macros_files, + test_macros_prefix=args.test_macros_prefix, + tool_version=args.tool_version, + tool_profile=args.tool_profile, + bump=bump) + + +def parse_bump_file(bump_file): + if bump_file is None: + return None + with open(bump_file) as fp: + return json.load(fp) def parse_tools_list_file(tools_list_file): + """ + """ tools_list = None if tools_list_file is not None: tools_list = [] @@ -118,9 +193,10 @@ def parse_tools_list_file(tools_list_file): return tools_list -def parse_macros_files(macros_file_names): - macros_to_expand = list() - +def parse_macros_files(macros_file_names, tool_version, supported_file_types, required_macros=[], dont_expand=[]): + """ + """ + macros_to_expand = [] for macros_file_name in macros_file_names: try: macros_file = open(macros_file_name) @@ -130,20 +206,59 @@ def parse_macros_files(macros_file_names): name = xml_element.attrib["name"] if name in macros_to_expand: logger.warning("Macro %s has already been found. Duplicate found in file %s." % - (name, macros_file_name), 0) - else: - logger.info("Macro %s found" % name, 1) - macros_to_expand.append(name) - except ParseError, e: - raise ApplicationException("The macros file " + macros_file_name + " could not be parsed. Cause: " + - str(e)) - except IOError, e: - raise ApplicationException("The macros file " + macros_file_name + " could not be opened. Cause: " + - str(e)) + (name, macros_file_name), 0) + continue + logger.info("Macro %s found" % name, 1) + macros_to_expand.append(name) + except ParseError as e: + raise ApplicationException("The macros file " + macros_file_name + " could not be parsed. Cause: " + str(e)) + + except IOError as e: + raise ApplicationException("The macros file " + macros_file_name + " could not be opened. Cause: " + str(e)) + else: + macros_file.close() + + tool_ver_tk = root.find("token[@name='@TOOL_VERSION@']") + galaxy_ver_tk = root.find("token[@name='@GALAXY_VERSION@']") + if tool_ver_tk is None: + tool_ver_tk = add_child_node(root, "token", OrderedDict([("name", "@TOOL_VERSION@")])) + tool_ver_tk.text = tool_version + if galaxy_ver_tk is not None: + if tool_version == tool_ver_tk.text: + galaxy_ver_tk.text = str(int(galaxy_ver_tk.text)) + else: + tool_ver_tk.text = tool_version + galaxy_ver_tk.text = "0" + + ext_foo = root.find("token[@name='@EXT_FOO@']") + if ext_foo is None: + ext_foo = add_child_node(root, "token", OrderedDict([("name", "@EXT_FOO@")])) + + g2o, o2g = get_fileformat_maps(supported_file_types) + + # make sure that the backup data type is in the map + if 'txt' not in g2o: + g2o['txt'] = 'txt' + + ext_foo.text = CDATA("""#def oms2gxyext(o) + #set m=%s + #return m[o] +#end def +#def gxy2omsext(g) + #set m=%s + #return m[g] +#end def +""" % (str(o2g), str(g2o))) + + tree = ElementTree(root) + tree.write(macros_file_name, encoding="UTF-8", xml_declaration=True, pretty_print=True) +# with open(macros_file_name, "w") as macros_file: +# tree = ElementTree(root) +# tree.write(macros_file, encoding="UTF-8", xml_declaration=True, pretty_print=True) # we depend on "stdio", "requirements" and "advanced_options" to exist on all the given macros files missing_needed_macros = [] - for required_macro in REQUIRED_MACROS: + for required_macro in required_macros: if required_macro not in macros_to_expand: missing_needed_macros.append(required_macro) @@ -153,13 +268,55 @@ def parse_macros_files(macros_file_names): "see galaxy/macros.xml for an example of a valid macros file." % ", ".join(missing_needed_macros)) - # we do not need to "expand" the advanced_options macro - macros_to_expand.remove(ADVANCED_OPTIONS_MACRO_NAME) + # remove macros that should not be expanded + for m in dont_expand: + try: + idx = macros_to_expand.index(m) + del macros_to_expand[idx] + except ValueError: + pass + return macros_to_expand +def check_test_macros(test_macros_files, test_macros_prefix, parsed_ctds): + + tool_ids = set() + for parsed_ctd in parsed_ctds: + model = parsed_ctd.ctd_model + tool_ids.add(model.name.replace(" ", "_")) + + for mf, mp in zip(test_macros_files, test_macros_prefix): + macro_ids = set() + try: + with open(mf) as macros_file: + root = parse(macros_file).getroot() + for xml_element in root.findall("xml"): + name = xml_element.attrib["name"] + if not name.startswith(mp): + logger.warning("Testmacro with invalid prefix %s." % (mp), 0) + continue + name = name[len(mp):] + macro_ids.add(name) + + except ParseError as e: + raise ApplicationException("The macros file " + mf + " could not be parsed. Cause: " + str(e)) + except IOError as e: + raise ApplicationException("The macros file " + mf + " could not be opened. Cause: " + str(e)) + for t in tool_ids - macro_ids: + logger.error("missing %s" % t) + add_child_node(root, "xml", OrderedDict([("name", mp + t)])) + + if len(macro_ids - tool_ids): + logger.warning("Unnecessary macros in %s: %s" % (mf, macro_ids - tool_ids)) + tree = ElementTree(root) + tree.write(mf, encoding="UTF-8", xml_declaration=True, pretty_print=True) + + def parse_file_formats(formats_file): - supported_formats = {} + """ + """ + supported_formats = [] if formats_file is not None: line_number = 0 with open(formats_file) as f: @@ -168,31 +325,45 @@ def parse_file_formats(formats_file): if line is None or not line.strip() or line.strip().startswith("#"): # ignore (it'd be weird to have something like: # if line is not None and not (not line.strip()) ... - pass + continue + parsed_formats = line.strip().split() + # valid lines contain either one or two columns + if len(parsed_formats) == 1: + supported_formats.append(DataType(parsed_formats[0], parsed_formats[0])) + elif len(parsed_formats) == 2: + supported_formats.append(DataType(parsed_formats[0], parsed_formats[1])) + elif len(parsed_formats) == 3: + composite = [tuple(x.split(":")) for x in parsed_formats[2].split(",")] + + supported_formats.append(DataType(parsed_formats[0], + parsed_formats[1], + composite)) else: - # not an empty line, no comment - # strip the line and split by whitespace - parsed_formats = line.strip().split() - # valid lines contain either one or four columns - if not (len(parsed_formats) == 1 or len(parsed_formats) == 3 or len(parsed_formats) == 4): - logger.warning( - "Invalid line at line number %d of the given formats file. Line will be ignored:\n%s" % - (line_number, line), 0) - # ignore the line - continue - elif len(parsed_formats) == 1: - supported_formats[parsed_formats[0]] = DataType(parsed_formats[0], parsed_formats[0]) - else: - mimetype = None - # check if mimetype was provided - if len(parsed_formats) == 4: - mimetype = parsed_formats[3] - supported_formats[parsed_formats[0]] = DataType(parsed_formats[0], parsed_formats[1], - parsed_formats[2], mimetype) + logger.warning("Invalid line at line number %d of the given formats file. Line will be ignored:\n%s" % (line_number, line), 0) return supported_formats -def validate_and_prepare_args(args): +def get_fileformat_maps(supported_formats): + """ + convenience functions to compute dictionaries mapping + Galaxy data types <-> CTD formats + """ + o2g = {} + g2o = {} + for s in supported_formats: + if s.extension not in o2g: + o2g[s.extension] = s.galaxy_extension + if s.galaxy_extension not in g2o: + g2o[s.galaxy_extension] = s.extension + return g2o, o2g + + +def validate_and_prepare_args(args, model): + """ + check command line arguments + @param args command line arguments + @return None + """ # check that only one of skip_tools_file and required_tools_file has been provided if args.skip_tools_file is not None and args.required_tools_file is not None: raise ApplicationException( @@ -201,6 +372,8 @@ def validate_and_prepare_args(args): # flatten macros_files to make sure that we have a list containing file names and not a list of lists utils.flatten_list_of_lists(args, "macros_files") + utils.flatten_list_of_lists(args, "test_macros_files") + utils.flatten_list_of_lists(args, "test_macros_prefix") # check that the arguments point to a valid, existing path input_variables_to_check = ["skip_tools_file", "required_tools_file", "macros_files", "formats_file"] @@ -217,20 +390,33 @@ def validate_and_prepare_args(args): if not args.macros_files: # list is empty, provide the default value logger.warning("Using default macros from galaxy/macros.xml", 0) - args.macros_files = ["galaxy/macros.xml"] + args.macros_files = [os.path.dirname(os.path.abspath(__file__)) + "/macros.xml"] + + if args.tool_version is None: + args.tool_version = model.version def get_preferred_file_extension(): + """ + get the file extension for the output files + @return "xml" + """ return "xml" def _convert_internal(parsed_ctds, **kwargs): - # parse all input files into models using CTDopts (via utils) - # the output is a tuple containing the model, output destination, origin file + """ + parse all input files into models using CTDopts (via utils) + + @param parsed_ctds the ctds + @param kwargs skip_tools, required_tools, and additional parameters for + expand_macros, create_command, create_inputs, create_outputs + @return a tuple containing the model, output destination, origin file + """ + + parameter_hardcoder = kwargs["parameter_hardcoder"] for parsed_ctd in parsed_ctds: model = parsed_ctd.ctd_model - origin_file = parsed_ctd.input_file - output_file = parsed_ctd.suggested_output_file if kwargs["skip_tools"] is not None and model.name in kwargs["skip_tools"]: logger.info("Skipping tool %s" % model.name, 0) @@ -238,351 +424,802 @@ def _convert_internal(parsed_ctds, **kwargs): elif kwargs["required_tools"] is not None and model.name not in kwargs["required_tools"]: logger.info("Tool %s is not required, skipping it" % model.name, 0) continue - else: - logger.info("Converting %s (source %s)" % (model.name, utils.get_filename(origin_file)), 0) - tool = create_tool(model) - write_header(tool, model) - create_description(tool, model) - expand_macros(tool, model, **kwargs) - create_command(tool, model, **kwargs) - create_inputs(tool, model, **kwargs) - create_outputs(tool, model, **kwargs) - create_help(tool, model) - - # wrap our tool element into a tree to be able to serialize it - tree = ElementTree(tool) + + origin_file = parsed_ctd.input_file + output_file = parsed_ctd.suggested_output_file + + # overwrite attributes of the parsed ctd parameters as specified in hardcoded parameterd json + for param in utils.extract_and_flatten_parameters(model): + hardcoded_attributes = parameter_hardcoder.get_hardcoded_attributes(utils.extract_param_name(param), model.name, 'CTD') + if hardcoded_attributes is not None: + for a in hardcoded_attributes: + if not hasattr(param, a): + continue + if a == "type": + try: + t = GALAXY_TYPE_TO_TYPE[hardcoded_attributes[a]] + except KeyError: + logger.error("Could not set hardcoded attribute %s=%s for %s" % (a, hardcoded_attributes[a], param.name)) + sys.exit(1) + setattr(param, a, t) + elif type(getattr(param, a)) is _FileFormat or (param.type in [_InFile, _OutFile, _OutPrefix] and a == "restrictions"): + setattr(param, a, _FileFormat(str(hardcoded_attributes[a]))) + elif type(getattr(param, a)) is _Choices: + setattr(param, a, _Choices(str(hardcoded_attributes[a]))) + elif type(getattr(param, a)) is _NumericRange: + raise Exception("Overwriting of Numeric Range not implemented") + else: + setattr(param, a, hardcoded_attributes[a]) + + if "test_only" in kwargs and kwargs["test_only"]: + test = create_test_only(parsed_ctd.ctd_model, **kwargs) + tree = ElementTree(test) + output_file = parsed_ctd.suggested_output_file logger.info("Writing to %s" % utils.get_filename(output_file), 1) - tree.write(open(output_file, 'w'), encoding="UTF-8", xml_declaration=True, pretty_print=True) + tree.write(output_file, encoding="UTF-8", xml_declaration=False, pretty_print=True) + continue + + logger.info("Converting %s (source %s)" % (model.name, utils.get_filename(origin_file)), 0) + tool = create_tool(model, + kwargs.get("tool_profile", None), + kwargs.get("bump", None)) + write_header(tool, model) + create_description(tool, model) + import_macros(tool, model, **kwargs) + expand_macros(tool, kwargs["macros_to_expand"]) +# command, inputs, outputs = create_cio(tool, model, **kwargs) + create_command(tool, model, **kwargs) + create_configfiles(tool, model, **kwargs) + inputs = create_inputs(tool, model, **kwargs) + outputs = create_outputs(tool, model, **kwargs) + if kwargs["test_test"]: + create_tests(tool, inputs=copy.deepcopy(inputs), outputs=copy.deepcopy(outputs)) + if kwargs["test_macros_prefix"]: + create_tests(tool, test_macros_prefix=kwargs['test_macros_prefix'], name=model.name) + + create_help(tool, model) + # citations are required to be at the end + expand_macro(tool, "references") + + # wrap our tool element into a tree to be able to serialize it + tree = ElementTree(tool) + logger.info("Writing to %s" % utils.get_filename(output_file), 1) + tree.write(output_file, encoding="UTF-8", xml_declaration=True, pretty_print=True) def write_header(tool, model): + """ + add comments to the tool header + @param tool the tool xml + @param model the ctd model + """ tool.addprevious(etree.Comment( "This is a configuration file for the integration of a tools into Galaxy (https://galaxyproject.org/). " "This file was automatically generated using CTDConverter.")) tool.addprevious(etree.Comment('Proposed Tool Section: [%s]' % model.opt_attribs.get("category", ""))) -def generate_tool_conf(parsed_ctds, tool_conf_destination, galaxy_tool_path, default_category): - # for each category, we keep a list of models corresponding to it - categories_to_tools = dict() - for parsed_ctd in parsed_ctds: - category = strip(parsed_ctd.ctd_model.opt_attribs.get("category", "")) - if not category.strip(): - category = default_category - if category not in categories_to_tools: - categories_to_tools[category] = [] - categories_to_tools[category].append(utils.get_filename(parsed_ctd.suggested_output_file)) - - # at this point, we should have a map for all categories->tools - toolbox_node = Element("toolbox") - - if galaxy_tool_path is not None and not galaxy_tool_path.strip().endswith("/"): - galaxy_tool_path = galaxy_tool_path.strip() + "/" - if galaxy_tool_path is None: - galaxy_tool_path = "" - - for category, file_names in categories_to_tools.iteritems(): - section_node = add_child_node(toolbox_node, "section") - section_node.attrib["id"] = "section-id-" + "".join(category.split()) - section_node.attrib["name"] = category - - for filename in file_names: - tool_node = add_child_node(section_node, "tool") - tool_node.attrib["file"] = galaxy_tool_path + filename - - toolconf_tree = ElementTree(toolbox_node) - toolconf_tree.write(open(tool_conf_destination,'w'), encoding="UTF-8", xml_declaration=True, pretty_print=True) - logger.info("Generated Galaxy tool_conf.xml in %s" % tool_conf_destination, 0) - - -def generate_data_type_conf(supported_file_formats, data_types_destination): - data_types_node = Element("datatypes") - registration_node = add_child_node(data_types_node, "registration") - registration_node.attrib["converters_path"] = "lib/galaxy/datatypes/converters" - registration_node.attrib["display_path"] = "display_applications" - - for format_name in supported_file_formats: - data_type = supported_file_formats[format_name] - # add only if it's a data type that does not exist in Galaxy - if data_type.galaxy_type is not None: - data_type_node = add_child_node(registration_node, "datatype") - # we know galaxy_extension is not None - data_type_node.attrib["extension"] = data_type.galaxy_extension - data_type_node.attrib["type"] = data_type.galaxy_type - if data_type.mimetype is not None: - data_type_node.attrib["mimetype"] = data_type.mimetype - - data_types_tree = ElementTree(data_types_node) - data_types_tree.write(open(data_types_destination,'w'), encoding="UTF-8", xml_declaration=True, pretty_print=True) - logger.info("Generated Galaxy datatypes_conf.xml in %s" % data_types_destination, 0) - - -def create_tool(model): - return Element("tool", OrderedDict([("id", model.name), ("name", model.name), ("version", model.version)])) +def create_tool(model, profile, bump): + """ + initialize the tool + @param model the ctd model + """ + + tool_id = model.name.replace(" ", "_") + + if bump is None: + gxy_version = "@GALAXY_VERSION@" + elif model.name in bump: + gxy_version = str(bump[model.name]) + elif tool_id in bump: + gxy_version = str(bump[tool_id]) + else: + gxy_version = "@GALAXY_VERSION@" + + attrib = OrderedDict([("id", tool_id), + ("name", model.name), + ("version", "@TOOL_VERSION@+galaxy" + gxy_version)]) + if profile is not None: + attrib["profile"] = profile + return Element("tool", attrib) def create_description(tool, model): + """ + add description to the tool + @param tool the Galaxy tool + @param model the ctd model + """ if "description" in model.opt_attribs.keys() and model.opt_attribs["description"] is not None: - description = SubElement(tool,"description") + description = SubElement(tool, "description") description.text = model.opt_attribs["description"] +def create_configfiles(tool, model, **kwargs): + """ + create + - + - + + The former will create a json file containing the tool parameter values + that can be accessed in cheetah with $args_json. Note that + data_style="paths" (i.e. input data sets are included in the json) is set + even if input files are given on the CLI. Reason is that in this way + default values in the CTD can be restored for optional input files. + + The latter will contain hardcoded parameters. + """ + + configfiles_node = add_child_node(tool, "configfiles") + add_child_node(configfiles_node, "inputs", + OrderedDict([("name", "args_json"), ("data_style", "paths")])) + + parameter_hardcoder = kwargs.get("parameter_hardcoder") + hc_dict = dict() + for param in utils.extract_and_flatten_parameters(model): + hardcoded_value = parameter_hardcoder.get_hardcoded_value(utils.extract_param_name(param), model.name) + if hardcoded_value is None: + continue + path = utils.extract_param_path(param) + for i, v in enumerate(path[:-1]): + try: + utils.getFromDict(hc_dict, path[:i + 1]) + except KeyError: + utils.setInDict(hc_dict, path[:i + 1], {}) + utils.setInDict(hc_dict, path, hardcoded_value) + hc_node = add_child_node(configfiles_node, "configfile", + OrderedDict([("name", "hardcoded_json")])) + hc_node.text = CDATA(json.dumps(hc_dict).replace('$', r'\$')) + # print(json.dumps(hc_dict)) + + def create_command(tool, model, **kwargs): - final_command = utils.extract_tool_executable_path(model, kwargs["default_executable_path"]) + '\n' - final_command += kwargs["add_to_command_line"] + '\n' - advanced_command_start = "#if $adv_opts.adv_opts_selector=='advanced':\n" + """ + @param tool the Galaxy tool + @param model the ctd model + @param kwargs + """ + + # main command + final_cmd = OrderedDict([('preprocessing', []), ('command', []), ('postprocessing', [])]) + advanced_cmd = {'preprocessing': [], 'command': [], 'postprocessing': []} + + final_cmd['preprocessing'].extend(["@QUOTE_FOO@", "@EXT_FOO@", "#import re", "", "## Preprocessing"]) + + # - call the executable with -write_ctd to write the ctd file (with defaults) + # - use fill_ctd.py to overwrite the defaults in the ctd file with the + # Galaxy parameters in the JSON file (from inputs config file) + # - feed the ctd file to the executable (with -ini) + # note: input and output file parameters are still given on the command line + # - output file parameters are not included in the JSON file + # - input and output files are accessed through links / files that have the correct extension + final_cmd['command'].extend(["", "## Main program call"]) + final_cmd['command'].append(""" +set -o pipefail && +@EXECUTABLE@ -write_ctd ./ && +python3 '$__tool_directory__/fill_ctd.py' '@EXECUTABLE@.ctd' '$args_json' '$hardcoded_json' && +@EXECUTABLE@ -ini @EXECUTABLE@.ctd""") + final_cmd['command'].extend(kwargs["add_to_command_line"]) + final_cmd['postprocessing'].extend(["", "## Postprocessing"]) + + advanced_command_start = "#if ${aon}cond.{aon}selector=='advanced':".format(aon=ADVANCED_OPTIONS_NAME) advanced_command_end = "#end if" - advanced_command = "" + parameter_hardcoder = kwargs["parameter_hardcoder"] + supported_file_formats = kwargs["supported_file_formats"] + g2o, o2g = get_fileformat_maps(supported_file_formats) - found_output_parameter = False for param in utils.extract_and_flatten_parameters(model): - if param.type is _OutFile: - found_output_parameter = True - command = "" - param_name = utils.extract_param_name(param) + param = modify_param_for_galaxy(param) + + param_cmd = {'preprocessing': [], 'command': [], 'postprocessing': []} command_line_prefix = utils.extract_command_line_prefix(param, model) - if param.name in kwargs["blacklisted_parameters"]: + # TODO use utils.extract_param_name(param).replace(":", "_")? Then hardcoding ctd variables (with :) and tool variables (with _) can be distinguished + if parameter_hardcoder.get_blacklist(utils.extract_param_name(param), model.name): continue - - hardcoded_value = parameter_hardcoder.get_hardcoded_value(param_name, model.name) - if hardcoded_value: - command += "%s %s\n" % (command_line_prefix, hardcoded_value) + hardcoded_value = parameter_hardcoder.get_hardcoded_value(utils.extract_param_name(param), model.name) + if hardcoded_value is not None: + pass # TODO hardcoded values should go to + # param_cmd['command'].append("%s %s" % (command_line_prefix, hardcoded_value)) else: - # parameter is neither blacklisted nor hardcoded... - galaxy_parameter_name = get_galaxy_parameter_name(param) - repeat_galaxy_parameter_name = get_repeat_galaxy_parameter_name(param) - - # logic for ITEMLISTs - if param.is_list: - if param.type is _InFile: - command += command_line_prefix + "\n" - command += " #for token in $" + galaxy_parameter_name + ":\n" - command += " $token\n" - command += " #end for\n" + # in the else branch the parameter is neither blacklisted nor hardcoded... + + _actual_parameter = get_galaxy_parameter_path(param) + actual_parameter = get_galaxy_parameter_path(param, fix_underscore=True) + # all but bool params need the command line argument (bools have it already in the true/false value) + if param.type is _OutFile or param.type is _OutPrefix or param.type is _InFile: + param_cmd['command'].append(command_line_prefix) + + # preprocessing for file inputs: + # - create a dir with name param.name + # - create a link to id.ext in this directory + # rationale: in the autogenerated tests the same file was used as input to multiple parameters + # this leads to conflicts while linking... might also be better in general + if param.type is _InFile: + param_cmd['preprocessing'].append("mkdir %s &&" % actual_parameter) + if param.is_list: + param_cmd['preprocessing'].append("mkdir ${' '.join([\"'" + actual_parameter + "/%s'\" % (i) for i, f in enumerate($" + _actual_parameter + ") if f])} && ") + param_cmd['preprocessing'].append("${' '.join([\"ln -s '%s' '" + actual_parameter + "/%s/%s.%s' && \" % (f, i, re.sub('[^\w\-_]', '_', f.element_identifier), $gxy2omsext(f.ext)) for i, f in enumerate($" + _actual_parameter + ") if f])}") + param_cmd['command'].append("${' '.join([\"'" + actual_parameter + "/%s/%s.%s'\"%(i, re.sub('[^\w\-_]', '_', f.element_identifier), $gxy2omsext(f.ext)) for i, f in enumerate($" + _actual_parameter + ") if f])}") else: - command += "\n#if $" + repeat_galaxy_parameter_name + ":\n" - command += command_line_prefix + "\n" - command += " #for token in $" + repeat_galaxy_parameter_name + ":\n" - command += " #if \" \" in str(token):\n" - command += " \"$token." + galaxy_parameter_name + "\"\n" - command += " #else\n" - command += " $token." + galaxy_parameter_name + "\n" - command += " #end if\n" - command += " #end for\n" - command += "#end if\n" - # logic for other ITEMs - else: - if param.advanced and param.type is not _OutFile: - actual_parameter = "$adv_opts.%s" % galaxy_parameter_name + param_cmd['preprocessing'].append("ln -s '$" + _actual_parameter + "' '" + actual_parameter + "/${re.sub(\"[^\w\-_]\", \"_\", $" + _actual_parameter + ".element_identifier)}.$gxy2omsext($" + _actual_parameter + ".ext)' &&") + param_cmd['command'].append("'" + actual_parameter + "/${re.sub(\"[^\w\-_]\", \"_\", $" + _actual_parameter + ".element_identifier)}.$gxy2omsext($" + _actual_parameter + ".ext)'") + elif param.type is _OutPrefix: + param_cmd['preprocessing'].append("mkdir %s &&" % actual_parameter) + param_cmd['command'].append(actual_parameter + "/") + elif param.type is _OutFile: + _actual_parameter = get_galaxy_parameter_path(param, separator="_") + actual_parameter = get_galaxy_parameter_path(param, separator="_", fix_underscore=True) + # check if there is a parameter that sets the format + # if so we add an extension to the generated files which will be used to + # determine the format in the output tag + # in all other cases (corresponding input / there is only one allowed format) + # the format will be set in the output tag + formats = get_galaxy_formats(param, model, o2g, TYPE_TO_GALAXY_TYPE[param.type]) + type_param = get_out_type_param(param, model, parameter_hardcoder) + corresponding_input, fmt_from_corresponding = get_corresponding_input(param, model) + # print("ci %s ffc %s" % (corresponding_input.name, fmt_from_corresponding)) + # print("formats %s" % (formats)) + if corresponding_input is not None: + actual_input_parameter = get_galaxy_parameter_path(corresponding_input) else: - actual_parameter = "$%s" % galaxy_parameter_name - # TODO only useful for text fields, integers or floats - # not useful for choices, input fields ... - - if not is_boolean_parameter(param) and type(param.restrictions) is _Choices : - command += "#if " + actual_parameter + ":\n" - command += " %s\n" % command_line_prefix - command += " #if \" \" in str(" + actual_parameter + "):\n" - command += " \"" + actual_parameter + "\"\n" - command += " #else\n" - command += " " + actual_parameter + "\n" - command += " #end if\n" - command += "#end if\n" - elif is_boolean_parameter(param): - command += "#if " + actual_parameter + ":\n" - command += " %s\n" % command_line_prefix - command += "#end if\n" - elif TYPE_TO_GALAXY_TYPE[param.type] is 'text': - command += "#if str(" + actual_parameter + "):\n" - command += " %s " % command_line_prefix - command += " \"" + actual_parameter + "\"\n" - command += "#end if\n" + actual_input_parameter = None + # print(len(formats) > 1, (corresponding_input is None or not + # fmt_from_corresponding)) + if type_param is not None: + type_param_name = get_galaxy_parameter_path(type_param) + elif len(formats) > 1 and (corresponding_input is None or not + fmt_from_corresponding): # and not param.is_list: + type_param_name = get_galaxy_parameter_path(param, suffix="type") else: - command += "#if str(" + actual_parameter + "):\n" - command += " %s " % command_line_prefix - command += actual_parameter + "\n" - command += "#end if\n" - - if param.advanced and param.type is not _OutFile: - advanced_command += " %s" % command - else: - final_command += command - - if advanced_command: - final_command += "%s%s%s\n" % (advanced_command_start, advanced_command, advanced_command_end) + type_param_name = None + # print("tp %s" % type_param_name) + + param_cmd['preprocessing'].append("mkdir " + actual_parameter + " &&") + + # if there is only one format (the outoput node sets format using the format attribute of the data/discover node) + # - single file: write to temp file with oms extension and move this to the actual result file + # - lists: write to files with the oms extension and remove the extension afterwards (discovery with __name__) + if len(formats) == 1: + fmt = formats.pop() + if param.is_list: + logger.info("1 fmt + list %s -> %s" % (param.name, actual_input_parameter), 1) + param_cmd['preprocessing'].append("mkdir ${' '.join([\"'" + actual_parameter + "/%s'\" % (i) for i, f in enumerate($" + actual_input_parameter + ") if f])} && ") + param_cmd['command'].append("${' '.join([\"'" + actual_parameter + "/%s/%s.%s'\"%(i, re.sub('[^\w\-_]', '_', f.element_identifier), $gxy2omsext(\"" + fmt + "\")) for i, f in enumerate($" + actual_input_parameter + ") if f])}") + param_cmd['postprocessing'].append("${' '.join([\"&& mv -n '" + actual_parameter + "/%(bn)s/%(id)s.%(gext)s' '" + _actual_parameter + "/%(bn)s/%(id)s'\"%{\"bn\": i, \"id\": re.sub('[^\w\-_]', '_', f.element_identifier), \"gext\": $gxy2omsext(\"" + fmt + "\")} for i, f in enumerate($" + actual_input_parameter + ") if f])}") + else: + logger.info("1 fmt + dataset %s" % param.name, 1) + param_cmd['command'].append("'" + actual_parameter + "/output.${gxy2omsext(\"" + fmt + "\")}'") + param_cmd['postprocessing'].append("&& mv '" + actual_parameter + "/output.${gxy2omsext(\"" + fmt + "\")}' '$" + _actual_parameter + "'") + + # if there is a type parameter then we use the type selected by the user + # - single: write to temp file with the oms extension and mv it to the actual file output which is treated via change_format + # - list: let the command create output files with the oms extensions, postprocessing renames them to the galaxy extensions, output is then discover + __name_and_ext__ + elif type_param_name is not None: + if param.is_list: + logger.info("type + list %s" % param.name, 1) + param_cmd['preprocessing'].append("mkdir ${' '.join([\"'" + actual_parameter + "/%s'\" % (i) for i, f in enumerate($" + actual_input_parameter + ") if f])} && ") + param_cmd['command'].append("${' '.join([\"'" + actual_parameter + "/%s/%s.%s'\"%(i, re.sub('[^\w\-_]', '_', f.element_identifier), $" + type_param_name + ") for i, f in enumerate($" + actual_input_parameter + ") if f])}") + param_cmd['postprocessing'].append("${' '.join([\"&& mv -n '" + actual_parameter + "/%(bn)s/%(id)s.%(omsext)s' '" + actual_parameter + "/%(bn)s/%(id)s.%(gext)s'\"%{\"bn\": i, \"id\": re.sub('[^\w\-_]', '_', f.element_identifier), \"omsext\":$" + type_param_name + ", \"gext\": $oms2gxyext(str($" + type_param_name + "))} for i, f in enumerate($" + actual_input_parameter + ") if f])}") + else: + logger.info("type + dataset %s" % param.name, 1) + # 1st create file with openms extension (often required by openms) + # then move it to the actual place specified by the parameter + # the format is then set by the tag using + param_cmd['command'].append("'" + actual_parameter + "/output.${" + type_param_name + "}'") + param_cmd['postprocessing'].append("&& mv '" + actual_parameter + "/output.${" + type_param_name + "}' '$" + actual_parameter + "'") + elif actual_input_parameter is not None: + if param.is_list: + logger.info("actual + list %s" % param.name, 1) + param_cmd['preprocessing'].append("mkdir ${' '.join([\"'" + actual_parameter + "/%s'\" % (i) for i, f in enumerate($" + actual_input_parameter + ") if f])} && ") + param_cmd['command'].append("${' '.join([\"'" + actual_parameter + "/%s/%s.%s'\"%(i, re.sub('[^\w\-_]', '_', f.element_identifier), f.ext) for i, f in enumerate($" + actual_input_parameter + ") if f])}") + else: + logger.info("actual + dataset %s %s %s" % (param.name, actual_input_parameter, corresponding_input.is_list), 1) + if corresponding_input.is_list: + param_cmd['command'].append("'" + actual_parameter + "/output.${" + actual_input_parameter + "[0].ext}'") + param_cmd['postprocessing'].append("&& mv '" + actual_parameter + "/output.${" + actual_input_parameter + "[0].ext}' '$" + _actual_parameter + "'") + else: + param_cmd['command'].append("'" + actual_parameter + "/output.${" + actual_input_parameter + ".ext}'") + param_cmd['postprocessing'].append("&& mv '" + actual_parameter + "/output.${" + actual_input_parameter + ".ext}' '$" + _actual_parameter + "'") + else: + if param.is_list: + raise Exception("output parameter itemlist %s without corresponding input") + else: + logger.info("else + dataset %s" % param.name, 1) + param_cmd['command'].append("'$" + _actual_parameter + "'") + +# # select with multiple = true +# elif is_selection_parameter(param) and param.is_list: +# param_cmd['command'].append("${' '.join(['\"%s\"'%str(_) for _ in str($" + actual_parameter + ").split(',')])}") +# elif param.is_list: +# param_cmd['command'].append("$quote($%s" % actual_parameter + ")") +# #command += "${' '.join([\"'%s'\"%str(_) for _ in $" + actual_parameter + "])}\n" +# elif is_boolean_parameter(param): +# param_cmd['command'].append("$%s" % actual_parameter + "") +# else: +# param_cmd['command'].append('"$' + actual_parameter + '"') + + # add if statement for optional parameters and preprocessing + # - for optional outputs (param_out_x) the presence of the parameter + # depends on the additional input (param_x) -> need no if + # - real string parameters (i.e. ctd type string wo restrictions) also + # need no if (otherwise the empty string could not be provided) + if not (param.required or is_boolean_parameter(param) or (param.type is str and param.restrictions is None)): + # and not(param.type is _InFile and param.is_list): + actual_parameter = get_galaxy_parameter_path(param, suffix="FLAG", fix_underscore=True) + _actual_parameter = get_galaxy_parameter_path(param, suffix="FLAG") + for stage in param_cmd: + if len(param_cmd[stage]) == 0: + continue + # special case for optional itemlists: for those if no option is selected only the parameter must be specified + if is_selection_parameter(param) and param.is_list and param.required is False: + param_cmd[stage] = [param_cmd[stage][0]] + ["#if $" + _actual_parameter + ":"] + utils.indent(param_cmd[stage][1:]) + ["#end if"] + elif is_selection_parameter(param) or param.type is _InFile: + param_cmd[stage] = ["#if $" + _actual_parameter + ":"] + utils.indent(param_cmd[stage]) + ["#end if"] + elif param.type is _OutFile or param.type is _OutPrefix: + param_cmd[stage] = ["#if \"" + param.name + "_FLAG\" in str($OPTIONAL_OUTPUTS).split(',')"] + utils.indent(param_cmd[stage]) + ["#end if"] + else: + param_cmd[stage] = ["#if str($" + _actual_parameter + "):"] + utils.indent(param_cmd[stage]) + ["#end if"] - if not found_output_parameter: - final_command += "> $param_stdout\n" + for stage in param_cmd: + if len(param_cmd[stage]) == 0: + continue + if param.advanced and hardcoded_value is None and not (param.type is _OutFile or param.type is _OutPrefix): + advanced_cmd[stage].extend(param_cmd[stage]) + else: + final_cmd[stage].extend(param_cmd[stage]) + for stage in advanced_cmd: + if len(advanced_cmd[stage]) == 0: + continue + advanced_cmd[stage] = [advanced_command_start] + utils.indent(advanced_cmd[stage]) + [advanced_command_end] + final_cmd[stage].extend(advanced_cmd[stage]) + + out, optout = all_outputs(model, parameter_hardcoder) + if len(optout) > 0 or len(out) + len(optout) == 0: + stdout = ["| tee '$stdout'"] + if len(optout) > 0: + stdout = ["#if len(str($OPTIONAL_OUTPUTS).split(',')) == 0"] + utils.indent(stdout) + ["#end if"] + final_cmd['command'].extend(stdout) + + ctd_out = ["#if \"ctd_out_FLAG\" in $OPTIONAL_OUTPUTS"] + utils.indent(["&& mv '@EXECUTABLE@.ctd' '$ctd_out'"]) + ["#end if"] + final_cmd['postprocessing'].extend(ctd_out) command_node = add_child_node(tool, "command") - command_node.text = final_command + command_node.attrib["detect_errors"] = "exit_code" + command_node.text = CDATA("\n".join(sum(final_cmd.values(), []))) -# creates the xml elements needed to import the needed macros files -# and to "expand" the macros -def expand_macros(tool, model, **kwargs): + +def import_macros(tool, model, **kwargs): + """ + creates the xml elements needed to import the needed macros files + @param tool the Galaxy tool + @param model the ctd model + @param kwargs + """ macros_node = add_child_node(tool, "macros") token_node = add_child_node(macros_node, "token") token_node.attrib["name"] = "@EXECUTABLE@" token_node.text = utils.extract_tool_executable_path(model, kwargs["default_executable_path"]) # add nodes - for macro_file_name in kwargs["macros_file_names"]: + for macro_file_name in kwargs["macros_file_names"] + kwargs["test_macros_file_names"]: macro_file = open(macro_file_name) import_node = add_child_node(macros_node, "import") # do not add the path of the file, rather, just its basename import_node.text = os.path.basename(macro_file.name) + + +def expand_macro(node, macro): + expand_node = add_child_node(node, "expand") + expand_node.attrib["macro"] = macro + return expand_node + + +# and to "expand" the macros in a node +def expand_macros(node, macros_to_expand): # add nodes - for expand_macro in kwargs["macros_to_expand"]: - expand_node = add_child_node(tool, "expand") + for expand_macro in macros_to_expand: + expand_node = add_child_node(node, "expand") expand_node.attrib["macro"] = expand_macro -def get_galaxy_parameter_name(param): - return "param_%s" % utils.extract_param_name(param).replace(":", "_").replace("-", "_") +def get_galaxy_parameter_path(param, separator=".", suffix=None, fix_underscore=False): + """ + Get the complete path for a parameter as a string where the path + components are joined by the given separator. A given suffix can + be appended. + """ + p = get_galaxy_parameter_name(param, suffix, fix_underscore) + path = utils.extract_param_path(param, fix_underscore) + if len(path) > 1: + return (separator.join(path[:-1]) + separator + p).replace("-", "_") + elif param.advanced and (param.type is not _OutFile or suffix): + return ADVANCED_OPTIONS_NAME + "cond." + p + else: + return p + + +def get_galaxy_parameter_name(param, suffix=None, fix_underscore=False): + """ + get the name of the parameter used in the galaxy tool + - replace : and - by _ + - add suffix for output parameters if not None + the idea of suffix is to be used for optional outputs (out_x) for + which an additional boolean input (out_x_FLAG) exists + + @param param the parameter + @param suffix suffix to append + @return the name used for the parameter in the tool form + """ + p = param.name.replace("-", "_") + if fix_underscore and p.startswith("_"): + p = p[1:] + if param.type is _OutFile and suffix is not None: + return "%s_%s" % (p, suffix) + else: + return "%s" % p + + +def get_out_type_param(out_param, model, parameter_hardcoder): + """ + check if there is a parameter that has the same name with appended _type + and return it if present, otherwise return None + """ + if parameter_hardcoder.get_blacklist(out_param.name + "_type", model.name): + return None -def get_input_with_same_restrictions(out_param, model, supported_file_formats): for param in utils.extract_and_flatten_parameters(model): - if param.type is _InFile: - if param.restrictions is not None: - in_param_formats = get_supported_file_types(param.restrictions.formats, supported_file_formats) - out_param_formats = get_supported_file_types(out_param.restrictions.formats, supported_file_formats) - if in_param_formats == out_param_formats: - return param - + if param.name == out_param.name + "_type": + return param + return None + + +def is_in_type_param(param, model): + return is_type_param(param, model, [_InFile]) + + +def is_out_type_param(param, model): + """ + check if the parameter is output_type parameter + - the name ends with _type and there is an output parameter without this suffix + and return True iff this is the case + """ + return is_type_param(param, model, [_OutFile, _OutPrefix]) + + +def is_type_param(param, model, tpe): + """ + check if the parameter is _type parameter of an in/output + - the name ends with _type and there is an output parameter without this suffix + and return True iff this is the case + """ + if not param.name.endswith("_type"): + return False + for out_param in utils.extract_and_flatten_parameters(model): + if out_param.type not in tpe: + continue + if param.name == out_param.name + "_type": + return True + return False + + +def get_corresponding_input(out_param, model): + """ + get the input parameter corresponding to the given output + + 1st try to get the input with the type (single file/list) and same format restrictions + if this fails get the input that has the same type + in both cases there must be only one such input + + return the found input parameter and True iff the 1st case applied + """ + c = get_input_with_same_restrictions(out_param, model, True) + if c is None: + return (get_input_with_same_restrictions(out_param, model, False), False) + else: + return (c, True) + + +def get_input_with_same_restrictions(out_param, model, check_formats): + """ + get the input parameter that has the same restrictions (ctd file_formats) + - input and output must both be lists of both be simple parameters + """ + + matching = [] + + for allow_different_type in [False, True]: + for param in utils.extract_and_flatten_parameters(model): + if param.type is not _InFile: + continue +# logger.error("%s %s %s %s %s %s" %(out_param.name, param.name, param.is_list, out_param.is_list, param.restrictions, out_param.restrictions)) + if allow_different_type or param.is_list == out_param.is_list: + if check_formats: + if param.restrictions is None and out_param.restrictions is None: + matching.append(param) + elif param.restrictions is not None and out_param.restrictions is not None and param.restrictions.formats == out_param.restrictions.formats: + matching.append(param) + else: + matching.append(param) +# logger.error("match %s "%([_.name for _ in matching])) + if len(matching) > 0: + break + if len(matching) == 1: + return matching[0] + else: + return None + def create_inputs(tool, model, **kwargs): + """ + create input section of the Galaxy tool + @param tool the Galaxy tool + @param model the ctd model + @param kwargs + @return inputs node + """ inputs_node = SubElement(tool, "inputs") + section_nodes = dict() + section_params = dict() # some suites (such as OpenMS) need some advanced options when handling inputs - expand_advanced_node = None + advanced_node = Element("expand", OrderedDict([("macro", ADVANCED_OPTIONS_NAME + "macro")])) parameter_hardcoder = kwargs["parameter_hardcoder"] + supported_file_formats = kwargs["supported_file_formats"] + g2o, o2g = get_fileformat_maps(supported_file_formats) + + # treat all non output-file/advanced/blacklisted/hardcoded parameters as inputs + for param in utils.extract_and_flatten_parameters(model, True): + if type(param) is ParameterGroup: + title, help_text = generate_label_and_help(param.description) + section_params[utils.extract_param_name(param)] = param + section_nodes[utils.extract_param_name(param)] = Element("section", OrderedDict([("name", param.name), ("title", title), ("help", help_text), ("expanded", "false")])) + continue - # treat all non output-file parameters as inputs - for param in utils.extract_and_flatten_parameters(model): + param = modify_param_for_galaxy(param) # no need to show hardcoded parameters - hardcoded_value = parameter_hardcoder.get_hardcoded_value(param.name, model.name) - if param.name in kwargs["blacklisted_parameters"] or hardcoded_value: - # let's not use an extra level of indentation and use NOP + hardcoded_value = parameter_hardcoder.get_hardcoded_value(utils.extract_param_name(param), model.name) + if hardcoded_value is not None: continue - if param.type is _OutFile: + if parameter_hardcoder.get_blacklist(utils.extract_param_name(param), model.name): continue - if param.advanced: + # do not output file type parameters for inputs since file types are + # known by Galaxy and set automatically by extension (which comes from + # the Galaxy data type which is translated to OpenMS datatype as defined + # in filetypes.txt ) + if is_in_type_param(param, model): continue - parent_node = inputs_node - - # for lists we need a repeat tag - if param.is_list and param.type is not _InFile: - rep_node = add_child_node(parent_node, "repeat") - create_repeat_attribute_list(rep_node, param) - parent_node = rep_node + if utils.extract_param_name(param.parent) in section_nodes: + parent_node = section_nodes[utils.extract_param_name(param.parent)] + elif param.advanced: + parent_node = advanced_node + else: + parent_node = inputs_node + + # sometimes special inputs are needed for outfiles: + if param.type is _OutFile or param.type is _OutPrefix: + # if there are multiple possible output formats, but no parameter to choose the type or a + # corresponding input then add a selection parameter + formats = get_galaxy_formats(param, model, o2g, TYPE_TO_GALAXY_TYPE[_OutFile]) + type_param = get_out_type_param(param, model, parameter_hardcoder) + corresponding_input, fmt_from_corresponding = get_corresponding_input(param, model) + if len(formats) > 1 and type_param is None and (corresponding_input is None or not + fmt_from_corresponding): # and not param.is_list: + fmt_select = add_child_node(parent_node, "param", OrderedDict([("name", param.name + "_type"), ("type", "select"), ("optional", "false"), ("label", "File type of output %s (%s)" % (param.name, param.description))])) + g2o, o2g = get_fileformat_maps(kwargs["supported_file_formats"]) +# for f in formats: +# option_node = add_child_node(fmt_select, "option", OrderedDict([("value", g2o[f])]), f) + for choice in param.restrictions.formats: + option_node = add_child_node(fmt_select, "option", OrderedDict([("value", str(choice))])) + option_node.text = o2g[str(choice)] + if choice.lower() != o2g[str(choice)]: + option_node.text += " (%s)" % choice + continue + # create the actual param node and fill the attributes param_node = add_child_node(parent_node, "param") - create_param_attribute_list(param_node, param, kwargs["supported_file_formats"]) + create_param_attribute_list(param_node, param, model, kwargs["supported_file_formats"]) - for param in utils.extract_and_flatten_parameters(model): - # no need to show hardcoded parameters - hardcoded_value = parameter_hardcoder.get_hardcoded_value(param.name, model.name) - if param.name in kwargs["blacklisted_parameters"] or hardcoded_value: - # let's not use an extra level of indentation and use NOP - continue - if param.type is _OutFile: - continue - if not param.advanced: + hardcoded_attributes = parameter_hardcoder.get_hardcoded_attributes(param.name, model.name, 'XML') + if hardcoded_attributes is not None: + for a in hardcoded_attributes: + param_node.attrib[a] = str(hardcoded_attributes[a]) + + section_parents = [utils.extract_param_name(section_params[sn].parent) for sn in section_nodes] + for sn in section_nodes: + if len(section_nodes[sn]) == 0 and sn not in section_parents: continue - if expand_advanced_node is None: - expand_advanced_node = add_child_node(inputs_node, "expand", OrderedDict([("macro", ADVANCED_OPTIONS_MACRO_NAME)])) - parent_node = expand_advanced_node + if utils.extract_param_name(section_params[sn].parent) in section_nodes: + section_nodes[utils.extract_param_name(section_params[sn].parent)].append(section_nodes[sn]) + else: + inputs_node.append(section_nodes[sn]) + # if there is an advanced section then append it at the end of the inputs + inputs_node.append(advanced_node) + + # Add select for optional outputs + out, optout = all_outputs(model, parameter_hardcoder) + attrib = OrderedDict([("name", "OPTIONAL_OUTPUTS"), + ("type", "select"), + ("optional", "true"), + ("multiple", "true"), + ("label", "Optional outputs")]) +# if len(out) == 0 and len(out) + len(optout) > 0: +# attrib["optional"] = "false" +# else: +# attrib["optional"] = "true" + param_node = add_child_node(inputs_node, "param", attrib) + for o in optout: + title, help_text = generate_label_and_help(o.description) + option_node = add_child_node(param_node, "option", + OrderedDict([("value", o.name + "_FLAG")]), + text="%s (%s)" % (o.name, title)) + option_node = add_child_node(param_node, "option", + OrderedDict([("value", "ctd_out_FLAG")]), + text="Output used ctd (ini) configuration file") + + return inputs_node + + +def is_default(value, param): + """ + check if the value is the default of the param or if the value is in the defaults of param + """ + return param.default == value or (type(param.default) is list and value in param.default) - # for lists we need a repeat tag - if param.is_list and param.type is not _InFile: - rep_node = add_child_node(parent_node, "repeat") - create_repeat_attribute_list(rep_node, param) - parent_node = rep_node - param_node = add_child_node(parent_node, "param") - create_param_attribute_list(param_node, param, kwargs["supported_file_formats"]) +def get_formats(param, model, o2g): + """ + determine format attribute from the CTD restictions (i.e. the OpenMS extensions) + - also check if all listed possible formats are supported in Galaxy and warn if necessary + """ + if param.restrictions is None: + return [] + elif type(param.restrictions) is _FileFormat: + choices = param.restrictions.formats + elif is_out_type_param(param, model): + choices = param.restrictions.choices + else: + raise InvalidModelException("Unrecognized restriction type [%(type)s] " + "for [%(name)s]" % {"type": type(param.restrictions), + "name": param.name}) + + # check if there are formats that have not been registered yet... + formats = set() + for format_name in choices: + if format_name not in o2g: + logger.warning("Ignoring unknown format %s for parameter %s" % (format_name, param.name), 1) + else: + formats.add(format_name) + return sorted(formats) -def get_repeat_galaxy_parameter_name(param): - return "rep_" + get_galaxy_parameter_name(param) +def get_galaxy_formats(param, model, o2g, default=None): + """ + determine galaxy formats for a parm (i.e. list of allowed Galaxy extensions) + from the CTD restictions (i.e. the OpenMS extensions) + - if there is a single one, then take this + - if there is none than use given default + """ + formats = get_formats(param, model, o2g) + gxy_formats = set([o2g[_] for _ in formats if _ in o2g]) + if len(gxy_formats) == 0: + if default is not None: + gxy_formats.add(default) + else: + raise InvalidModelException("No supported formats [%(type)s] " + "for [%(name)s]" % {"type": type(param.restrictions), + "name": param.name}) + return sorted(gxy_formats) -def create_repeat_attribute_list(rep_node, param): - rep_node.attrib["name"] = get_repeat_galaxy_parameter_name(param) - if param.required: - rep_node.attrib["min"] = "1" - else: - rep_node.attrib["min"] = "0" - # for the ITEMLISTs which have LISTITEM children we only - # need one parameter as it is given as a string - if param.default is not None and param.default is not _Null: - rep_node.attrib["max"] = "1" - rep_node.attrib["title"] = get_galaxy_parameter_name(param) +def create_param_attribute_list(param_node, param, model, supported_file_formats): + """ + get the attributes of input parameters + @param param_node the galaxy tool param node + @param param the ctd parameter + @param supported_file_formats + """ -def create_param_attribute_list(param_node, param, supported_file_formats): - param_node.attrib["name"] = get_galaxy_parameter_name(param) + g2o, o2g = get_fileformat_maps(supported_file_formats) + # set the name, argument and a first guess for the type (which will be over written + # in some cases .. see below) + # even if the conversion relies on the fact that the param names are identical + # to the ctd ITEM names we replace dashes by underscores because input and output + # parameters need to be treated in cheetah. variable names are currently fixed back + # to dashes in fill_ctd.py. currently there seems to be only a single tool + # requiring this https://github.com/OpenMS/OpenMS/pull/4529 + param_node.attrib["name"] = get_galaxy_parameter_name(param) + param_node.attrib["argument"] = "-%s" % utils.extract_param_name(param) param_type = TYPE_TO_GALAXY_TYPE[param.type] if param_type is None: raise ModelError("Unrecognized parameter type %(type)s for parameter %(name)s" % {"type": param.type, "name": param.name}) - + # ITEMLIST is rendered as text field (even if its integers or floats), an + # exception is files which are treated a bit below if param.is_list: param_type = "text" if is_selection_parameter(param): param_type = "select" - if len(param.restrictions.choices) < 5: + if len(param.restrictions.choices) < 5 and not param.is_list: param_node.attrib["display"] = "radio" - + if param.is_list: + param_node.attrib["multiple"] = "true" + if is_boolean_parameter(param): param_type = "boolean" - + if param.type is _InFile: # assume it's just text unless restrictions are provided - param_format = "txt" - if param.restrictions is not None: - # join all formats of the file, take mapping from supported_file if available for an entry - if type(param.restrictions) is _FileFormat: - param_format = ",".join([get_supported_file_type(i, supported_file_formats) if - get_supported_file_type(i, supported_file_formats) - else i for i in param.restrictions.formats]) - else: - raise InvalidModelException("Expected 'file type' restrictions for input file [%(name)s], " - "but instead got [%(type)s]" - % {"name": param.name, "type": type(param.restrictions)}) - param_node.attrib["type"] = "data" - param_node.attrib["format"] = param_format + param_node.attrib["format"] = ",".join(get_galaxy_formats(param, model, o2g, TYPE_TO_GALAXY_TYPE[_InFile])) # in the case of multiple input set multiple flag if param.is_list: param_node.attrib["multiple"] = "true" - else: param_node.attrib["type"] = param_type + if param_type == "select" and param.default in param.restrictions.choices: + param_node.attrib["optional"] = "false" + else: + param_node.attrib["optional"] = str(not param.required).lower() + # check for parameters with restricted values (which will correspond to a "select" in galaxy) - if param.restrictions is not None: + if param.restrictions is not None or param_type == "boolean": # it could be either _Choices or _NumericRange, with special case for boolean types if param_type == "boolean": create_boolean_parameter(param_node, param) elif type(param.restrictions) is _Choices: - # create as many