From a4fd3c2339f48aae574d6891646babaff567d160 Mon Sep 17 00:00:00 2001 From: Ari Frantz Date: Thu, 19 Feb 2026 10:15:32 -0500 Subject: [PATCH 1/2] best practices guide --- .vitepress/config.mts | 1 + getting-started/best-practices.md | 30 ++++++++++++++++++++++++++++++ 2 files changed, 31 insertions(+) create mode 100644 getting-started/best-practices.md diff --git a/.vitepress/config.mts b/.vitepress/config.mts index 0118c83..b0bd67d 100644 --- a/.vitepress/config.mts +++ b/.vitepress/config.mts @@ -81,6 +81,7 @@ export default defineConfig({ text: "Getting Started", items: [ { text: "Quickstart", link: "/getting-started/quickstart" }, + { text: "Best Practices", link: "/getting-started/best-practices" }, { text: "Ecosystem", link: "/getting-started/ecosystem" }, { text: "Getting help", link: "/getting-started/getting-help" }, { text: "Contributing", link: "/getting-started/contributing" }, diff --git a/getting-started/best-practices.md b/getting-started/best-practices.md new file mode 100644 index 0000000..38d6cb0 --- /dev/null +++ b/getting-started/best-practices.md @@ -0,0 +1,30 @@ +# WDL Best Practices + +- All tasks with multiple commands (including any pipes (`|`)) should have `set -euo pipefail` before any other commands. + - Tasks without multiple commands or pipes can omit this. + - These options will cause common classes of bugs in Bash scripts to fail immediately and loudly, instead of causing silent or subtle bugs in your task behavior. +- All tasks should run in a persistently versioned container. + - e.g. do not use `latest` tags for Docker images. + - This helps ensure reproducibility across time and environments. +- Check all assumptions made about workflow inputs before beginning long running executions. + - Common examples of assumptions that should be checked: + - valid `String` choice (for WDL 1.3 and later, an `enum` should be used in place of `String`s with a fixed set of valid options) + - mutually exclusive parameters + - missing optional file for selected parameters + - filename extensions + - Use `after` clauses in workflows to ensure that all these assumptions are valid before beginning tasks with heavy computation. +- If the _contents_ of a `File` are not read or do not need to be localized for a task, try to coerce the `File` variable to a `Boolean` (with `defined()`) or a `String` (with `basename()`) to avoid unnecessary disk space usage and networking. +- All requirement values are overridable at runtime. However, tasks should have easily configurable memory and disk space allocations. + - Often, tasks have a dynamic calculation for resource requirements based on input sizes. Users of a WDL should have an easy way to fine tune this calculation. + - This may mean incorporating an `Int` or `Float` in the inputs of the task that is applied to the dynamic calculation. + - For WDL 1.3 and later, WDL authors can change resource requirements between retry attempts. This enables mitigation of errors relating to resources limits, but users may inadvertantly disable these mitigations by introducing runtime overrides. WDL authors should expose resource fine tuning via the input section and incorporate those user values in any dynamic calculations to prevent runtime locking. +- Tasks which assume a file and any accessory files (e.g. a BAM and a BAI) have specific extensions and/or are in the same directory should *always* create symlinks from the mounted inputs to the work directory of the task + - This is because individual `File` types are not guarenteed to be in the same mounted directory. + - The `command` may include something like: `ln -s "~{}" "./"` +- Tasks should `rm` any temporary or intermediate files created in the work directory (including symlinks). + - This helps reduce disk bloat from keeping unnecessary files around. + - This is especially important for any large or uncompressed files, such as reference FASTAs or databases. +- Most tasks should have a default `maxRetries` of 1. + - This is because many WDL backends are prone to intermittent failures that can be recovered from with a retry. + - Certain tasks are especially prone to intermittent failure (often if any networking is involved) and can have a higher default `maxRetries`. + - Certain tasks with potentially high compute costs in cloud environments may default to `0`. This should be used in combination with call caching to aid rerunning while minimizing costs. From 7bd6f39ebff9d02bfca1aac931004cca6fac5ad9 Mon Sep 17 00:00:00 2001 From: Ari Frantz Date: Mon, 23 Feb 2026 07:15:54 -0500 Subject: [PATCH 2/2] review feedback --- getting-started/best-practices.md | 45 ++++++++++++++++++++----------- 1 file changed, 30 insertions(+), 15 deletions(-) diff --git a/getting-started/best-practices.md b/getting-started/best-practices.md index 38d6cb0..8f1ce8d 100644 --- a/getting-started/best-practices.md +++ b/getting-started/best-practices.md @@ -1,30 +1,45 @@ # WDL Best Practices +This guide is includes best practices that should be followed while authoring WDL, regardless of WDL version or backend or engine. + +## Shell Safety + - All tasks with multiple commands (including any pipes (`|`)) should have `set -euo pipefail` before any other commands. - Tasks without multiple commands or pipes can omit this. - These options will cause common classes of bugs in Bash scripts to fail immediately and loudly, instead of causing silent or subtle bugs in your task behavior. -- All tasks should run in a persistently versioned container. - - e.g. do not use `latest` tags for Docker images. - - This helps ensure reproducibility across time and environments. -- Check all assumptions made about workflow inputs before beginning long running executions. + +## Resource Management + +- If the _contents_ of a `File` are not read or do not need to be localized for a task, try to coerce the `File` variable to a `Boolean` (with `defined()`) or a `String` (with `basename()`) to avoid unnecessary disk space usage and networking. +- Most tasks should have a default `maxRetries` of 1. + - This is because many WDL backends are prone to intermittent failures that can be recovered from with a retry. + - Certain tasks are especially prone to intermittent failure (often if any networking is involved) and can have a higher default `maxRetries`. + - Certain tasks with potentially high compute costs in cloud environments may default to `0`. This should be used in combination with call caching to aid rerunning while minimizing costs. +- Tasks should have easily configurable memory and disk space allocations. + - Often, tasks have a dynamic calculation for resource requirements based on input sizes. Users of a WDL should have an easy way to fine tune this calculation. + - This may mean incorporating an `Int` or `Float` in the inputs of the task that is applied to the dynamic calculation. +- For WDL 1.3 and later, WDL authors can change resource requirements between retry attempts. This enables mitigation of errors relating to resources limits, but users may inadvertently disable these mitigations by introducing runtime overrides. + - WDL authors should expose resource fine tuning via the input section and incorporate those user values in any dynamic calculations as an alternative to overriding the requirements with static values. +- Check all assumptions made about workflow inputs before beginning long running or expensive executions. - Common examples of assumptions that should be checked: - - valid `String` choice (for WDL 1.3 and later, an `enum` should be used in place of `String`s with a fixed set of valid options) - mutually exclusive parameters - missing optional file for selected parameters - filename extensions + - valid `String` choice + - For WDL 1.3 and later, an `enum` should be used in place of `String`s with a fixed set of valid options - Use `after` clauses in workflows to ensure that all these assumptions are valid before beginning tasks with heavy computation. -- If the _contents_ of a `File` are not read or do not need to be localized for a task, try to coerce the `File` variable to a `Boolean` (with `defined()`) or a `String` (with `basename()`) to avoid unnecessary disk space usage and networking. -- All requirement values are overridable at runtime. However, tasks should have easily configurable memory and disk space allocations. - - Often, tasks have a dynamic calculation for resource requirements based on input sizes. Users of a WDL should have an easy way to fine tune this calculation. - - This may mean incorporating an `Int` or `Float` in the inputs of the task that is applied to the dynamic calculation. - - For WDL 1.3 and later, WDL authors can change resource requirements between retry attempts. This enables mitigation of errors relating to resources limits, but users may inadvertantly disable these mitigations by introducing runtime overrides. WDL authors should expose resource fine tuning via the input section and incorporate those user values in any dynamic calculations to prevent runtime locking. + +## Reproducibility + +- All tasks should run in a persistently versioned container. + - e.g. do not use `latest` tags for Docker images. + - This helps ensure reproducibility across time and environments. + +## File Management + - Tasks which assume a file and any accessory files (e.g. a BAM and a BAI) have specific extensions and/or are in the same directory should *always* create symlinks from the mounted inputs to the work directory of the task - - This is because individual `File` types are not guarenteed to be in the same mounted directory. + - This is because individual `File` types are not guaranteed to be in the same mounted directory. - The `command` may include something like: `ln -s "~{}" "./"` - Tasks should `rm` any temporary or intermediate files created in the work directory (including symlinks). - This helps reduce disk bloat from keeping unnecessary files around. - This is especially important for any large or uncompressed files, such as reference FASTAs or databases. -- Most tasks should have a default `maxRetries` of 1. - - This is because many WDL backends are prone to intermittent failures that can be recovered from with a retry. - - Certain tasks are especially prone to intermittent failure (often if any networking is involved) and can have a higher default `maxRetries`. - - Certain tasks with potentially high compute costs in cloud environments may default to `0`. This should be used in combination with call caching to aid rerunning while minimizing costs.