From f911ef46814a3deb7f92c1e80bb603d9d2178464 Mon Sep 17 00:00:00 2001
From: Andrew Thrasher <adthrasher@gmail.com>
Date: Wed, 3 Dec 2025 10:17:35 -0500
Subject: [PATCH 01/29] feat: minimap2 tool wrapper

---
 tools/minimap2.wdl | 113 +++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 113 insertions(+)
 create mode 100644 tools/minimap2.wdl

diff --git a/tools/minimap2.wdl b/tools/minimap2.wdl
new file mode 100644
index 000000000..e9ef020d9
--- /dev/null
+++ b/tools/minimap2.wdl
@@ -0,0 +1,113 @@
+version 1.2
+
+task align {
+    meta {
+        description: "Align DNA or mRNA sequences against a large reference database"
+        outputs: {
+            alignments: "The output alignment file in SAM or PAF format"
+        }
+    }
+
+    parameter_meta {
+        reads: "The input reads file in FASTQ format"
+        reference_index: "The minimap2 index file for the reference genome"
+        read_group: "The read group string to be included in the SAM header. Format: '@RG\\tID:foo\\tSM:bar'"
+        output_name: "The name of the output alignment file"
+        output_paf: "If true, output in PAF format instead of SAM"
+        cigar_in_paf: "If true and outputting PAF, include CIGAR strings in the PAF output"
+        ignore_base_quality: "If true, ignore base quality scores during alignment"
+        output_md_tag: "If true, include MD tags in the SAM output"
+        eqx: "If true, use =/X CIGAR operators instead of M"
+        soft_clip: "If true, use soft clipping for secondary alignments in SAM format"
+        secondary_alignments: "If true, report secondary alignments"
+        seed: "Seed value for the minimap2 aligner"
+        threads: "Number of threads to use for alignment"
+    }
+
+    input {
+        File reads
+        File reference_index
+        String read_group
+        String output_name = "aligned.sam"
+        Boolean output_paf = false
+        Boolean cigar_in_paf = true
+        Boolean ignore_base_quality = false
+        Boolean output_md_tag = true
+        Boolean eqx = false
+        Boolean soft_clip = true
+        Boolean secondary_alignments = true
+        Int seed = 11
+        Int threads = 3
+    }
+
+    command <<<
+        minimap2 \
+            ~{if output_paf then "" else "-a"} \
+            ~{if output_paf && cigar_in_paf then "-c" else ""} \
+            ~{if ignore_base_quality then "-Q" else ""} \
+            ~{if output_md_tag then "--MD" else ""} \
+            ~{if eqx then "-X" else ""} \
+            ~{if soft_clip then "-Y" else ""} \
+            ~{if secondary_alignments then "--secondary=yes" else "--secondary=no"} \
+            -t ~{threads} \
+            --seed ~{seed} \
+            -R "~{read_group}" \
+            "~{reference_index}" \
+            "~{reads}" \
+            > "~{output_name}"
+    >>>
+
+    output {
+        File alignments = output_name
+    }
+
+    requirements {
+        container: "quay.io/biocontainers/minimap2:2.30--h577a1d6_0"
+        cpu: threads
+        memory: "4 GB"
+    }
+}
+
+task index {
+    meta {
+        description: "Create a minimap2 index for a reference genome"
+        outputs: {
+            reference_index: "The generated minimap2 index file"
+        }
+    }
+
+    parameter_meta {
+        reference_fasta: "The reference genome in FASTA format to be indexed"
+        alt_contigs: "Optional file containing a list of alternative contigs"
+        index_name: "The name of the output index file"
+        minimizer_kmer_size: "K-mer size for minimizer indexing"
+        minimizer_window_size: "Window size for minimizer indexing"
+    }
+
+    input {
+        File reference_fasta
+        File? alt_contigs
+        String index_name = "reference.mmi"
+        Int minimizer_kmer_size = 15
+        Int minimizer_window_size = 10
+    }
+
+    command <<<
+        minimap2 \
+            -k ~{minimizer_kmer_size} \
+            -w ~{minimizer_window_size} \
+            ~{if defined(alt_contigs) then "--alt \"~{alt_contigs}\"" else ""} \
+            -d "~{index_name}" \
+            "~{reference_fasta}"
+    >>>
+
+    output {
+        File reference_index = index_name
+    }
+
+    requirements {
+        container: "quay.io/biocontainers/minimap2:2.30--h577a1d6_0"
+        cpu: 1
+        memory: "4 GB"
+    }
+}

From fd2bca4ef925630aa02c5b853f45981d4b917814 Mon Sep 17 00:00:00 2001
From: Andrew Thrasher <adthrasher@gmail.com>
Date: Wed, 3 Dec 2025 10:46:34 -0500
Subject: [PATCH 02/29] refactor: handle optionally gzipped reference

---
 tools/minimap2.wdl | 8 +++++++-
 1 file changed, 7 insertions(+), 1 deletion(-)

diff --git a/tools/minimap2.wdl b/tools/minimap2.wdl
index e9ef020d9..8912c42c0 100644
--- a/tools/minimap2.wdl
+++ b/tools/minimap2.wdl
@@ -93,12 +93,18 @@ task index {
     }
 
     command <<<
+        set -euo pipefail
+
+        ref_fasta=~{basename(reference_fasta, ".gz")}
+        gunzip -c "~{reference_fasta}" > "$ref_fasta" \
+            || ln -sf "~{reference_fasta}" "$ref_fasta"
+
         minimap2 \
             -k ~{minimizer_kmer_size} \
             -w ~{minimizer_window_size} \
             ~{if defined(alt_contigs) then "--alt \"~{alt_contigs}\"" else ""} \
             -d "~{index_name}" \
-            "~{reference_fasta}"
+            "$ref_fasta"
     >>>
 
     output {

From 1ebe2a064a19ea3cba37f1f914835e797a5eb49d Mon Sep 17 00:00:00 2001
From: Andrew Thrasher <adthrasher@gmail.com>
Date: Wed, 3 Dec 2025 11:20:08 -0500
Subject: [PATCH 03/29] chore: fill in options

---
 tools/minimap2.wdl | 40 ++++++++++++++++++++++++++++++++++------
 1 file changed, 34 insertions(+), 6 deletions(-)

diff --git a/tools/minimap2.wdl b/tools/minimap2.wdl
index 8912c42c0..262e4e324 100644
--- a/tools/minimap2.wdl
+++ b/tools/minimap2.wdl
@@ -9,10 +9,31 @@ task align {
     }
 
     parameter_meta {
-        reads: "The input reads file in FASTQ format"
+        read_one_fastq_gz: "Input gzipped FASTQ read one file to align with minimap2"
+        read_two_fastq_gz: "Input gzipped FASTQ read two file to align with minimap2"
         reference_index: "The minimap2 index file for the reference genome"
         read_group: "The read group string to be included in the SAM header. Format: '@RG\\tID:foo\\tSM:bar'"
         output_name: "The name of the output alignment file"
+        preset: {
+            description: "Minimap2 preset for alignment",
+            external_help: "https://lh3.github.io/minimap2/minimap2.html#8",
+            options: [
+                "sr",
+                "map-ont",
+                "lr:hq",
+                "map-hifi",
+                "map-pb",
+                "map-iclr",
+                "asm5",
+                "asm10",
+                "asm20",
+                "splice",
+                "splice:hq",
+                "splice:sr",
+                "ava-pb",
+                "ava-ont"
+            ],
+        }
         output_paf: "If true, output in PAF format instead of SAM"
         cigar_in_paf: "If true and outputting PAF, include CIGAR strings in the PAF output"
         ignore_base_quality: "If true, ignore base quality scores during alignment"
@@ -25,9 +46,11 @@ task align {
     }
 
     input {
-        File reads
+        File read_one_fastq_gz
         File reference_index
         String read_group
+        File? read_two_fastq_gz
+        String? preset = "sr"
         String output_name = "aligned.sam"
         Boolean output_paf = false
         Boolean cigar_in_paf = true
@@ -42,6 +65,7 @@ task align {
 
     command <<<
         minimap2 \
+            ~{if defined(preset) then "-x \"~{preset}\"" else ""} \
             ~{if output_paf then "" else "-a"} \
             ~{if output_paf && cigar_in_paf then "-c" else ""} \
             ~{if ignore_base_quality then "-Q" else ""} \
@@ -53,7 +77,8 @@ task align {
             --seed ~{seed} \
             -R "~{read_group}" \
             "~{reference_index}" \
-            "~{reads}" \
+            "~{read_one_fastq_gz}" \
+            ~{if defined(read_two_fastq_gz) then "\"~{read_two_fastq_gz}\"" else ""} \
             > "~{output_name}"
     >>>
 
@@ -64,7 +89,7 @@ task align {
     requirements {
         container: "quay.io/biocontainers/minimap2:2.30--h577a1d6_0"
         cpu: threads
-        memory: "4 GB"
+        memory: "16 GB"
     }
 }
 
@@ -82,6 +107,7 @@ task index {
         index_name: "The name of the output index file"
         minimizer_kmer_size: "K-mer size for minimizer indexing"
         minimizer_window_size: "Window size for minimizer indexing"
+        threads: "Number of threads to use for indexing"
     }
 
     input {
@@ -90,6 +116,7 @@ task index {
         String index_name = "reference.mmi"
         Int minimizer_kmer_size = 15
         Int minimizer_window_size = 10
+        Int threads = 3
     }
 
     command <<<
@@ -103,6 +130,7 @@ task index {
             -k ~{minimizer_kmer_size} \
             -w ~{minimizer_window_size} \
             ~{if defined(alt_contigs) then "--alt \"~{alt_contigs}\"" else ""} \
+            -t ~{threads} \
             -d "~{index_name}" \
             "$ref_fasta"
     >>>
@@ -113,7 +141,7 @@ task index {
 
     requirements {
         container: "quay.io/biocontainers/minimap2:2.30--h577a1d6_0"
-        cpu: 1
-        memory: "4 GB"
+        cpu: threads
+        memory: "16 GB"
     }
 }

From 9533656d9e4741e3f6a6133ecb8a521939b36051 Mon Sep 17 00:00:00 2001
From: Andrew Thrasher <adthrasher@gmail.com>
Date: Wed, 3 Dec 2025 14:28:48 -0500
Subject: [PATCH 04/29] chore: add samtools to minimap2 image and convert to
 BAM

---
 docker/minimap2/Dockerfile   |  8 ++++++++
 docker/minimap2/package.json |  5 +++++
 tools/minimap2.wdl           | 16 +++++++++++-----
 3 files changed, 24 insertions(+), 5 deletions(-)
 create mode 100644 docker/minimap2/Dockerfile
 create mode 100644 docker/minimap2/package.json

diff --git a/docker/minimap2/Dockerfile b/docker/minimap2/Dockerfile
new file mode 100644
index 000000000..7fb04869b
--- /dev/null
+++ b/docker/minimap2/Dockerfile
@@ -0,0 +1,8 @@
+FROM quay.io/biocontainers/samtools:1.17--h00cdaf9_0 AS samtools
+FROM quay.io/biocontainers/minimap2:2.30--h577a1d6_0
+
+COPY --from=samtools /usr/local/bin/ /usr/local/bin/
+COPY --from=samtools /usr/local/lib/ /usr/local/lib/
+COPY --from=samtools /usr/local/libexec/ /usr/local/libexec/
+
+ENTRYPOINT [ "minimap2" ]
\ No newline at end of file
diff --git a/docker/minimap2/package.json b/docker/minimap2/package.json
new file mode 100644
index 000000000..a78b7377c
--- /dev/null
+++ b/docker/minimap2/package.json
@@ -0,0 +1,5 @@
+{
+    "name": "minimap2",
+    "version": "2.30",
+    "revision": "0"
+}
\ No newline at end of file
diff --git a/tools/minimap2.wdl b/tools/minimap2.wdl
index 262e4e324..5a300509f 100644
--- a/tools/minimap2.wdl
+++ b/tools/minimap2.wdl
@@ -34,7 +34,7 @@ task align {
                 "ava-ont"
             ],
         }
-        output_paf: "If true, output in PAF format instead of SAM"
+        output_paf: "If true, output in PAF format instead of BAM"
         cigar_in_paf: "If true and outputting PAF, include CIGAR strings in the PAF output"
         ignore_base_quality: "If true, ignore base quality scores during alignment"
         output_md_tag: "If true, include MD tags in the SAM output"
@@ -51,7 +51,7 @@ task align {
         String read_group
         File? read_two_fastq_gz
         String? preset = "sr"
-        String output_name = "aligned.sam"
+        String output_name = "aligned.bam"
         Boolean output_paf = false
         Boolean cigar_in_paf = true
         Boolean ignore_base_quality = false
@@ -79,7 +79,13 @@ task align {
             "~{reference_index}" \
             "~{read_one_fastq_gz}" \
             ~{if defined(read_two_fastq_gz) then "\"~{read_two_fastq_gz}\"" else ""} \
-            > "~{output_name}"
+            > output
+
+            if ~{output_paf}; then
+                mv output "~{output_name}"
+            else
+                samtools view -b output > "~{output_name}"
+            fi
     >>>
 
     output {
@@ -87,7 +93,7 @@ task align {
     }
 
     requirements {
-        container: "quay.io/biocontainers/minimap2:2.30--h577a1d6_0"
+        container: "ghcr.io/stjudecloud/minimap2:branch-minimap2-2.30-0"
         cpu: threads
         memory: "16 GB"
     }
@@ -140,7 +146,7 @@ task index {
     }
 
     requirements {
-        container: "quay.io/biocontainers/minimap2:2.30--h577a1d6_0"
+        container: "ghcr.io/stjudecloud/minimap2:branch-minimap2-2.30-0"
         cpu: threads
         memory: "16 GB"
     }

From 1b28075c8c8ebecf2eb5cbea2fc837d645eb6b9a Mon Sep 17 00:00:00 2001
From: Andrew Thrasher <adthrasher@gmail.com>
Date: Wed, 3 Dec 2025 14:29:48 -0500
Subject: [PATCH 05/29] chore: lint

---
 tools/minimap2.wdl | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/tools/minimap2.wdl b/tools/minimap2.wdl
index 5a300509f..58b0cf15b 100644
--- a/tools/minimap2.wdl
+++ b/tools/minimap2.wdl
@@ -10,10 +10,9 @@ task align {
 
     parameter_meta {
         read_one_fastq_gz: "Input gzipped FASTQ read one file to align with minimap2"
-        read_two_fastq_gz: "Input gzipped FASTQ read two file to align with minimap2"
         reference_index: "The minimap2 index file for the reference genome"
         read_group: "The read group string to be included in the SAM header. Format: '@RG\\tID:foo\\tSM:bar'"
-        output_name: "The name of the output alignment file"
+        read_two_fastq_gz: "Input gzipped FASTQ read two file to align with minimap2"
         preset: {
             description: "Minimap2 preset for alignment",
             external_help: "https://lh3.github.io/minimap2/minimap2.html#8",
@@ -31,9 +30,10 @@ task align {
                 "splice:hq",
                 "splice:sr",
                 "ava-pb",
-                "ava-ont"
+                "ava-ont",
             ],
         }
+        output_name: "The name of the output alignment file"
         output_paf: "If true, output in PAF format instead of BAM"
         cigar_in_paf: "If true and outputting PAF, include CIGAR strings in the PAF output"
         ignore_base_quality: "If true, ignore base quality scores during alignment"

From 0ac46dbd6183ecd1ac2d986c2e112fd4ee1bd04a Mon Sep 17 00:00:00 2001
From: Andrew Thrasher <adthrasher@gmail.com>
Date: Wed, 3 Dec 2025 17:00:24 -0500
Subject: [PATCH 06/29] chore: add disk specification

---
 tools/minimap2.wdl | 16 ++++++++++++++++
 1 file changed, 16 insertions(+)

diff --git a/tools/minimap2.wdl b/tools/minimap2.wdl
index 58b0cf15b..d953a5e39 100644
--- a/tools/minimap2.wdl
+++ b/tools/minimap2.wdl
@@ -43,6 +43,7 @@ task align {
         secondary_alignments: "If true, report secondary alignments"
         seed: "Seed value for the minimap2 aligner"
         threads: "Number of threads to use for alignment"
+        modify_disk_size_gb: "Additional disk space to allocate (in GB)"
     }
 
     input {
@@ -61,8 +62,17 @@ task align {
         Boolean secondary_alignments = true
         Int seed = 11
         Int threads = 3
+        Int modify_disk_size_gb = 0
     }
 
+    Int disk_size_gb = ceil(
+        (
+            size(read_one_fastq_gz, "GiB") + size(read_two_fastq_gz, "GiB")
+        ) * 2)
+        + ceil(size(reference_index, "GiB"))
+        + 10
+        + modify_disk_size_gb
+
     command <<<
         minimap2 \
             ~{if defined(preset) then "-x \"~{preset}\"" else ""} \
@@ -96,6 +106,7 @@ task align {
         container: "ghcr.io/stjudecloud/minimap2:branch-minimap2-2.30-0"
         cpu: threads
         memory: "16 GB"
+        disks: "~{disk_size_gb} GB"
     }
 }
 
@@ -114,6 +125,7 @@ task index {
         minimizer_kmer_size: "K-mer size for minimizer indexing"
         minimizer_window_size: "Window size for minimizer indexing"
         threads: "Number of threads to use for indexing"
+        modify_disk_size_gb: "Additional disk space to allocate (in GB)"
     }
 
     input {
@@ -123,8 +135,11 @@ task index {
         Int minimizer_kmer_size = 15
         Int minimizer_window_size = 10
         Int threads = 3
+        Int modify_disk_size_gb = 0
     }
 
+    Int disk_size_gb = ceil(size(reference_fasta, "GiB")) + 10 + modify_disk_size_gb
+
     command <<<
         set -euo pipefail
 
@@ -149,5 +164,6 @@ task index {
         container: "ghcr.io/stjudecloud/minimap2:branch-minimap2-2.30-0"
         cpu: threads
         memory: "16 GB"
+        disks: "~{disk_size_gb} GB"
     }
 }

From 5161669f78a05a84231edb352bdaf07f2900039e Mon Sep 17 00:00:00 2001
From: Andrew Thrasher <adthrasher@gmail.com>
Date: Wed, 3 Dec 2025 17:02:24 -0500
Subject: [PATCH 07/29] feat: add bwa-mem2 task

---
 docker/bwamem2/Dockerfile   |   8 +++
 docker/bwamem2/package.json |   5 ++
 tools/bwamem2.wdl           | 127 ++++++++++++++++++++++++++++++++++++
 3 files changed, 140 insertions(+)
 create mode 100644 docker/bwamem2/Dockerfile
 create mode 100644 docker/bwamem2/package.json
 create mode 100644 tools/bwamem2.wdl

diff --git a/docker/bwamem2/Dockerfile b/docker/bwamem2/Dockerfile
new file mode 100644
index 000000000..a79bab258
--- /dev/null
+++ b/docker/bwamem2/Dockerfile
@@ -0,0 +1,8 @@
+FROM quay.io/biocontainers/samtools:1.17--h00cdaf9_0 AS samtools
+FROM quay.io/biocontainers/bwa-mem2:2.3--he70b90d_0
+
+COPY --from=samtools /usr/local/bin/ /usr/local/bin/
+COPY --from=samtools /usr/local/lib/ /usr/local/lib/
+COPY --from=samtools /usr/local/libexec/ /usr/local/libexec/
+
+ENTRYPOINT [ "bwa-mem2" ]
\ No newline at end of file
diff --git a/docker/bwamem2/package.json b/docker/bwamem2/package.json
new file mode 100644
index 000000000..8a483d363
--- /dev/null
+++ b/docker/bwamem2/package.json
@@ -0,0 +1,5 @@
+{
+    "name": "bwamem2",
+    "version": "2.3",
+    "revision": "0"
+}
\ No newline at end of file
diff --git a/tools/bwamem2.wdl b/tools/bwamem2.wdl
new file mode 100644
index 000000000..bdba1e258
--- /dev/null
+++ b/tools/bwamem2.wdl
@@ -0,0 +1,127 @@
+version 1.2
+
+task align {
+    meta {
+        description: "Align DNA sequences against a large reference database using BWA-MEM2"
+        outputs: {
+            alignments: "The output alignment file in SAM format"
+        }
+    }
+
+    parameter_meta {
+        read_one_fastq_gz: "Input gzipped FASTQ read one file to align with BWA-MEM2"
+        reference_index: "The BWA-MEM2 index file for the reference genome"
+        read_group: "The read group string to be included in the SAM header. Format: '@RG\\tID:foo\\tSM:bar'"
+        read_two_fastq_gz: "Input gzipped FASTQ read two file to align with BWA-MEM2"
+        prefix: "Prefix for the BAM file. The extension `.bam` will be added."
+        seed: "Seed value for the BWA-MEM2 aligner"
+        threads: "Number of threads to use for alignment"
+        modify_disk_size_gb: "Additional disk space to allocate (in GB)"
+    }
+
+    input {
+        File read_one_fastq_gz
+        File reference_index
+        String read_group
+        File? read_two_fastq_gz
+        String prefix = sub(
+            basename(read_one_fastq_gz),
+            "([_\\.][rR][12])?(\\.subsampled)?\\.(fastq|fq)(\\.gz)?$",
+            ""
+        )
+        Int threads = 4
+        Int modify_disk_size_gb = 0
+        Int seed_length = 19
+        Int min_score = 30
+        Boolean smart_pairing = false
+        Boolean skip_mate_rescue = false
+    }
+
+    String output_name = prefix + ".bam"
+    Int disk_size_gb = ceil((
+            size(read_one_fastq_gz, "GiB") + size(read_two_fastq_gz, "GiB")
+        ) * 2)
+        + ceil(size(reference_index, "GiB"))
+        + 10
+        + modify_disk_size_gb
+
+    command <<<
+        set -euo pipefail
+
+        mkdir bwa_db
+        tar -C bwa_db -xzf "~{reference_index}" --no-same-owner
+        PREFIX=$(basename bwa_db/*.ann ".ann")
+
+        bwa-mem2 mem \
+            -t ~{threads} \
+            -R "~{read_group}" \
+            -k ~{seed_length} \
+            -T ~{min_score} \
+            ~{if smart_pairing then "-p" else ""} \
+            ~{if skip_mate_rescue then "-S" else ""} \
+            bwa_db/"$PREFIX" \
+            "~{read_one_fastq_gz}" \
+            ~{if defined(read_two_fastq_gz) then "~{read_two_fastq_gz}" else ""} |
+        samtools view -b -o "~{output_name}" -
+    >>>
+
+    output {
+        File alignments = output_name
+    }
+
+    requirements {
+        container: "ghcr.io/stjudecloud/bwamem2:branch-minimap2-2.3-0"
+        cpu: threads
+        memory: "~{disk_size_gb * 2} GB"
+        disks: "~{disk_size_gb} GB"
+    }
+}
+
+task index {
+    meta {
+        description: "Index a reference genome for alignment with minimap2"
+        outputs: {
+            reference_index: "The minimap2 index file for the reference genome"
+        }
+    }
+
+    parameter_meta {
+        reference_fasta: "The reference genome in FASTA format to be indexed"
+        db_name: "The base name for the output index files"
+        modify_disk_size_gb: "Additional disk space to allocate (in GB)"
+    }
+
+    input {
+        File reference_fasta
+        String db_name = "reference"
+        Int modify_disk_size_gb = 0
+    }
+
+    Float input_fasta_size = size(reference_fasta, "GiB")
+    Int disk_size_gb = ceil(input_fasta_size * 2) + 10 + modify_disk_size_gb
+    String bwa_db_out_name = db_name + ".tar.gz"
+
+    command <<<
+        set -euo pipefail
+
+        ref_fasta=~{basename(reference_fasta, ".gz")}
+        gunzip -c "~{reference_fasta}" > "$ref_fasta" \
+            || ln -sf "~{reference_fasta}" "$ref_fasta"
+
+        bwa-mem2 index \
+            "$ref_fasta"
+
+        tar -czf "~{bwa_db_out_name}" "$ref_fasta"*
+    >>>
+
+    output {
+        File reference_index = bwa_db_out_name
+    }
+
+    requirements {
+        container: "ghcr.io/stjudecloud/bwamem2:branch-minimap2-2.3-0"
+        cpu: 1
+        memory: "16 GB"
+        disks: "~{disk_size_gb} GB"
+    }
+}

From af994e2029101b4c977aaf7d26f31cf02fe8a1ae Mon Sep 17 00:00:00 2001
From: Andrew Thrasher <adthrasher@gmail.com>
Date: Thu, 4 Dec 2025 12:55:40 -0500
Subject: [PATCH 08/29] chore: lint

---
 tools/bwamem2.wdl | 13 ++++++++-----
 1 file changed, 8 insertions(+), 5 deletions(-)

diff --git a/tools/bwamem2.wdl b/tools/bwamem2.wdl
index bdba1e258..15a499fd2 100644
--- a/tools/bwamem2.wdl
+++ b/tools/bwamem2.wdl
@@ -14,9 +14,12 @@ task align {
         read_group: "The read group string to be included in the SAM header. Format: '@RG\\tID:foo\\tSM:bar'"
         read_two_fastq_gz: "Input gzipped FASTQ read two file to align with BWA-MEM2"
         prefix: "Prefix for the BAM file. The extension `.bam` will be added."
-        seed: "Seed value for the BWA-MEM2 aligner"
+        smart_pairing: "If true, enable smart pairing mode for paired-end reads"
+        skip_mate_rescue: "If true, skip mate rescue for paired-end reads"
         threads: "Number of threads to use for alignment"
         modify_disk_size_gb: "Additional disk space to allocate (in GB)"
+        seed_length: "Seed value for the BWA-MEM2 aligner"
+        min_score: "Minimum score threshold for reporting alignments"
     }
 
     input {
@@ -29,12 +32,12 @@ task align {
             "([_\\.][rR][12])?(\\.subsampled)?\\.(fastq|fq)(\\.gz)?$",
             ""
         )
+        Boolean smart_pairing = false
+        Boolean skip_mate_rescue = false
         Int threads = 4
         Int modify_disk_size_gb = 0
         Int seed_length = 19
         Int min_score = 30
-        Boolean smart_pairing = false
-        Boolean skip_mate_rescue = false
     }
 
     String output_name = prefix + ".bam"
@@ -61,7 +64,7 @@ task align {
             ~{if skip_mate_rescue then "-S" else ""} \
             bwa_db/"$PREFIX" \
             "~{read_one_fastq_gz}" \
-            ~{if defined(read_two_fastq_gz) then "~{read_two_fastq_gz}" else ""} |
+            ~{if defined(read_two_fastq_gz) then "\"~{read_two_fastq_gz}\"" else ""} |
         samtools view -b -o "~{output_name}" -
     >>>
 
@@ -121,7 +124,7 @@ task index {
     requirements {
         container: "ghcr.io/stjudecloud/bwamem2:branch-minimap2-2.3-0"
         cpu: 1
-        memory: "16 GB"
+        memory: "120 GB"
         disks: "~{disk_size_gb} GB"
     }
 }

From 00deed43a5564a4b7c1c1231a73e017df46a376c Mon Sep 17 00:00:00 2001
From: Andrew Thrasher <adthrasher@gmail.com>
Date: Thu, 4 Dec 2025 12:56:02 -0500
Subject: [PATCH 09/29] feat: add hisat2 task

---
 docker/hisat2/Dockerfile   |   8 ++
 docker/hisat2/package.json |   5 ++
 tools/hisat2.wdl           | 169 +++++++++++++++++++++++++++++++++++++
 3 files changed, 182 insertions(+)
 create mode 100644 docker/hisat2/Dockerfile
 create mode 100644 docker/hisat2/package.json
 create mode 100644 tools/hisat2.wdl

diff --git a/docker/hisat2/Dockerfile b/docker/hisat2/Dockerfile
new file mode 100644
index 000000000..a57d0c75a
--- /dev/null
+++ b/docker/hisat2/Dockerfile
@@ -0,0 +1,8 @@
+FROM quay.io/biocontainers/samtools:1.17--h00cdaf9_0 AS samtools
+FROM quay.io/biocontainers/hisat2:2.2.1--h503566f_8
+
+COPY --from=samtools /usr/local/bin/ /usr/local/bin/
+COPY --from=samtools /usr/local/lib/ /usr/local/lib/
+COPY --from=samtools /usr/local/libexec/ /usr/local/libexec/
+
+ENTRYPOINT [ "hisat2" ]
\ No newline at end of file
diff --git a/docker/hisat2/package.json b/docker/hisat2/package.json
new file mode 100644
index 000000000..e17679260
--- /dev/null
+++ b/docker/hisat2/package.json
@@ -0,0 +1,5 @@
+{
+    "name": "hisat2",
+    "version": "2.2.1",
+    "revision": "0"
+}
\ No newline at end of file
diff --git a/tools/hisat2.wdl b/tools/hisat2.wdl
new file mode 100644
index 000000000..bd06d2eaa
--- /dev/null
+++ b/tools/hisat2.wdl
@@ -0,0 +1,169 @@
+version 1.2
+
+task align {
+    meta {
+        description: "Align RNA-seq reads against a reference genome using HISAT2"
+        outputs: {
+            alignments: "The output alignment file in SAM format"
+        }
+    }
+
+    parameter_meta {
+        read_one_fastq_gz: "Input gzipped FASTQ read one file to align with HISAT2"
+        reference_index: "The HISAT2 index files for the reference genome"
+        read_two_fastq_gz: "Input gzipped FASTQ read two file to align with HISAT2"
+        output_name: "The name of the output alignment file"
+        threads: "Number of threads to use for alignment"
+        modify_disk_size_gb: "Additional disk space to allocate (in GB)"
+    }
+
+    input {
+        File read_one_fastq_gz
+        File reference_index
+        File? read_two_fastq_gz
+        String output_name = "aligned.sam"
+        Int threads = 4
+        Int modify_disk_size_gb = 0
+    }
+
+    Int disk_size_gb = ceil((
+            size(read_one_fastq_gz, "GiB") + size(read_two_fastq_gz, "GiB")
+        ) * 2)
+        + ceil(size(reference_index, "GiB"))
+        + 10
+        + modify_disk_size_gb
+
+    command <<<
+        hisat2 \
+            -q \
+            -p ~{threads} \
+            -x "~{reference_index}" \
+            -1 "~{read_one_fastq_gz}" \
+            ~{if defined(read_two_fastq_gz) then "-2 \"~{read_two_fastq_gz}\"" else ""} \
+            -S "~{output_name}"
+    >>>
+
+    output {
+        File alignments = "~{output_name}"
+    }
+
+    requirements {
+        cpu: threads
+        memory: "16 GB"
+        disks: "~{disk_size_gb} GB"
+        container: "ghcr.io/stjudecloud/hisat2:branch-minimap2-2.2.1-0"
+    }
+}
+
+task index {
+    meta {
+        description: "Index a reference genome for alignment with HISAT2"
+        outputs: {
+            reference_index: "The HISAT2 index files for the reference genome"
+        }
+    }
+
+    parameter_meta {
+        reference_fasta: "The reference genome in FASTA format to be indexed"
+        snp: "List of SNPs"
+        haplotype: "List of haplotypes"
+        splice_site: "List of splice sites. Use with `exon`."
+        exon: "List of exons. Use with `splice_site`."
+        repeat_ref: ""
+        repeat_info: ""
+        repeat_snp: ""
+        repeat_haplotype: ""
+        bmax: "Maximum number of suffixes allowed in a block"
+        seed: "Seed for psuedo-random number generator"
+        bmaxdivn: "Maximum number of suffixes allowed in a block, expressed as a fraction of the length of the reference"
+        index_base_name: "The base name for the output index files"
+        force_large_index: "Force creation of a large index"
+        disable_auto_fitting: "Disable automatic fitting of index parameters"
+        nodc: "Disable difference-cover sample"
+        no_ref: "Do not build bitpacked version of reference sequence for paired-end alignment"
+        just_ref: "Build only the bitpacked version of reference sequence for paired-end alignment"
+        threads: "Number of threads to use for indexing"
+        dcv: "Period for the difference-cover sample. A larger period uses less memory, but may be slower. Must be a power of 2, no greater than 4096."
+        offrate: "The off-rate for the FM index"
+        ftabchars: "The lookup table to calculate initial BW range with respect to the first N characters of the query"
+        localoffrate: "The off-rate for the local FM index"
+        localftabchars: "The lookup table to calculate initial BW range for the local FM"
+        modify_disk_size_gb: "Additional disk space to allocate (in GB)"
+    }
+
+    input {
+        File reference_fasta
+        File? snp
+        File? haplotype
+        File? splice_site
+        File? exon
+        File? repeat_ref
+        File? repeat_info
+        File? repeat_snp
+        File? repeat_haplotype
+        Int? bmax
+        Int? seed
+        Int? bmaxdivn = 4
+        String index_base_name = "hisat2_index"
+        Boolean force_large_index = false
+        Boolean disable_auto_fitting = false
+        Boolean nodc = false
+        Boolean no_ref = false
+        Boolean just_ref = false
+        Int threads = 1
+        Int dcv = 1024
+        Int offrate = 5
+        Int ftabchars = 10
+        Int localoffrate = 3
+        Int localftabchars = 6
+        Int modify_disk_size_gb = 0
+    }
+
+    Int disk_size_gb = ceil(size(reference_fasta, "GiB") * 2) + 10 + modify_disk_size_gb
+
+    command <<<
+        set -euo pipefail
+
+        ref_fasta=~{basename(reference_fasta, ".gz")}
+        gunzip -c "~{reference_fasta}" > "$ref_fasta" \
+            || ln -sf "~{reference_fasta}" "$ref_fasta"
+
+        hisat2-build \
+            ~{if force_large_index then "--large-index" else ""} \
+            ~{if disable_auto_fitting then "--disable-auto-fitting" else ""} \
+            -p ~{threads} \
+            ~{if defined(bmax) then "--bmax \"~{bmax}\"" else ""} \
+            ~{if defined(bmaxdivn) then "--bmaxdivn \"~{bmaxdivn}\"" else ""} \
+            ~{if !nodc then "--dcv \"~{dcv}\"" else ""} \
+            ~{if no_ref then "--no-ref" else ""} \
+            ~{if just_ref then "--just-ref" else ""} \
+            --offrate "~{offrate}" \
+            --ftabchars "~{ftabchars}" \
+            --localoffrate "~{localoffrate}" \
+            --localftabchars "~{localftabchars}" \
+            ~{if defined(snp) then "--snp \"~{snp}\"" else ""} \
+            ~{if defined(haplotype) then "--haplotype \"~{haplotype}\"" else ""} \
+            ~{if defined(splice_site) then "--ss \"~{splice_site}\"" else ""} \
+            ~{if defined(exon) then "--exon \"~{exon}\"" else ""} \
+            ~{if defined(repeat_ref) then "--repeat-ref \"~{repeat_ref}\"" else ""} \
+            ~{if defined(repeat_info) then "--repeat-info \"~{repeat_info}\"" else ""} \
+            ~{if defined(repeat_snp) then "--repeat-snp \"~{repeat_snp}\"" else ""} \
+            ~{if defined(repeat_haplotype) then "--repeat-haplotype \"~{repeat_haplotype}\"" else ""} \
+            ~{if defined(seed) then "--seed \"~{seed}\"" else ""} \
+            "$ref_fasta" \
+            "~{index_base_name}"
+
+            tar -czf "~{index_base_name}.tar.gz" "~{index_base_name}"*
+    >>>
+
+    output {
+        File reference_index = "~{index_base_name}.tar.gz"
+    }
+
+    requirements {
+        cpu: threads
+        memory: "16 GB"
+        disks: "~{disk_size_gb} GB"
+        container: "ghcr.io/stjudecloud/hisat2:branch-minimap2-2.2.1-0"
+    }
+}

From 443466617858f7359ba329a618133a1513d74125 Mon Sep 17 00:00:00 2001
From: Andrew Thrasher <adthrasher@gmail.com>
Date: Thu, 4 Dec 2025 13:56:15 -0500
Subject: [PATCH 10/29] chore: change base image as other segfaults

---
 docker/hisat2/Dockerfile | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docker/hisat2/Dockerfile b/docker/hisat2/Dockerfile
index a57d0c75a..85b3898dd 100644
--- a/docker/hisat2/Dockerfile
+++ b/docker/hisat2/Dockerfile
@@ -1,5 +1,5 @@
 FROM quay.io/biocontainers/samtools:1.17--h00cdaf9_0 AS samtools
-FROM quay.io/biocontainers/hisat2:2.2.1--h503566f_8
+FROM quay.io/biocontainers/hisat2:2.2.1--hdbdd923_7
 
 COPY --from=samtools /usr/local/bin/ /usr/local/bin/
 COPY --from=samtools /usr/local/lib/ /usr/local/lib/

From b07d7690d6f2de6903dc92b96ed520c824d77bb6 Mon Sep 17 00:00:00 2001
From: Andrew Thrasher <adthrasher@gmail.com>
Date: Fri, 5 Dec 2025 11:47:35 -0500
Subject: [PATCH 11/29] feat: add `vg` indexing

---
 tools/vg.wdl | 75 ++++++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 75 insertions(+)
 create mode 100644 tools/vg.wdl

diff --git a/tools/vg.wdl b/tools/vg.wdl
new file mode 100644
index 000000000..f002533ce
--- /dev/null
+++ b/tools/vg.wdl
@@ -0,0 +1,75 @@
+version 1.2
+
+task index {
+    meta {
+        description: "Index a reference genome for alignment with vg giraffe"
+        outputs: {
+            reference_index: "The vg giraffe index file for the reference genome"
+        }
+    }
+
+    parameter_meta {
+        reference_fasta: "The reference genome in FASTA format to be indexed"
+        vcf_files: "VCF(s) containing variants to augment the graph"
+        transcript_gff: "GFF(s) containing transcript annotations"
+        db_prefix: "The base name for the output index files"
+        gff_feature: "The feature type in the GFF to use for transcripts"
+        gff_id_tag: "The attribute tag in the GFF to use as transcript ID"
+        workflow: {
+            description: "The vg autoindex workflow to use",
+            choices: [
+                "map",
+                "mpmap",
+                "rpvg",
+                "giraffe",
+                "sr-giraffe",
+                "lr-giraffe",
+            ],
+        }
+        modify_disk_size_gb: "Additional disk space to allocate (in GB)"
+        threads: "Number of threads to use for indexing"
+    }
+
+    input {
+        File reference_fasta
+        Array[File] vcf_files = []
+        Array[File] transcript_gff = []
+        String db_prefix = "reference"
+        String gff_feature = "exon"
+        String gff_id_tag = "transcript_id"
+        String workflow = "giraffe"
+        Int modify_disk_size_gb = 0
+        Int threads = 4
+    }
+
+    Float input_fasta_size = size(reference_fasta, "GiB")
+    Float vcf_size = size(vcf_files, "GiB")
+    Float transcript_gff_size = size(transcript_gff, "GiB")
+    Int disk_size_gb = ceil(input_fasta_size * 2)
+        + ceil(vcf_size * 2)
+        + ceil(transcript_gff_size * 2)
+        + 10 + modify_disk_size_gb
+
+    command <<<
+        vg autoindex \
+            --workflow "~{workflow}" \
+            -r "~{reference_fasta}" \
+            -p "~{db_prefix}" \
+            ~{sep(" ", prefix("-v ", quote(vcf_files)))} \
+            ~{sep(" ", prefix("-x ", quote(transcript_gff)))} \
+            -t ~{threads} \
+            --gff-feature "~{gff_feature}" \
+            --gff-tx-tag "~{gff_id_tag}"
+    >>>
+
+    output {
+        Array[File] reference_index = glob("~{db_prefix}*")
+    }
+
+    requirements {
+        container: "quay.io/biocontainers/vg:1.70.0--h9ee0642_0"
+        cpu: threads
+        memory: "120 GB"
+        disks: "~{disk_size_gb} GB"
+    }
+}

From cb47772356a169eebcfbc278948428cf1c6ececf Mon Sep 17 00:00:00 2001
From: Andrew Thrasher <adthrasher@gmail.com>
Date: Fri, 5 Dec 2025 12:41:18 -0500
Subject: [PATCH 12/29] chore: localize fasta for indexing

---
 tools/vg.wdl | 8 +++++++-
 1 file changed, 7 insertions(+), 1 deletion(-)

diff --git a/tools/vg.wdl b/tools/vg.wdl
index f002533ce..6407a79a9 100644
--- a/tools/vg.wdl
+++ b/tools/vg.wdl
@@ -51,9 +51,15 @@ task index {
         + 10 + modify_disk_size_gb
 
     command <<<
+        set -euo pipefail
+
+        ref_fasta=~{basename(reference_fasta, ".gz")}
+        gunzip -c "~{reference_fasta}" > "$ref_fasta" \
+            || ln -sf "~{reference_fasta}" "$ref_fasta"
+
         vg autoindex \
             --workflow "~{workflow}" \
-            -r "~{reference_fasta}" \
+            -r "$ref_fasta" \
             -p "~{db_prefix}" \
             ~{sep(" ", prefix("-v ", quote(vcf_files)))} \
             ~{sep(" ", prefix("-x ", quote(transcript_gff)))} \

From 39ca901c99e728c4da2ffdf60029dbdd220aecfb Mon Sep 17 00:00:00 2001
From: Andrew Thrasher <adthrasher@gmail.com>
Date: Fri, 5 Dec 2025 13:54:52 -0500
Subject: [PATCH 13/29] feat: add vg giraffe task

---
 tools/vg.wdl | 105 +++++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 105 insertions(+)

diff --git a/tools/vg.wdl b/tools/vg.wdl
index 6407a79a9..86f2a95f2 100644
--- a/tools/vg.wdl
+++ b/tools/vg.wdl
@@ -1,5 +1,110 @@
 version 1.2
 
+task giraffe {
+    meta {
+        description: "Align DNA sequences against a variation graph using vg giraffe"
+        outputs: {
+            alignments: "The output alignment file in GAM format"
+        }
+    }
+
+    parameter_meta {
+        read_one_fastq_gz: "Input gzipped FASTQ read one file to align with vg giraffe"
+        gbz_graph: "The vg GBZ graph file for the reference genome"
+        minimizer_index: "The vg minimizer index file for the reference genome"
+        zipcode_name: "The vg zipcode name file for the reference genome"
+        distance_index: "The vg distance index file for the reference genome"
+        read_two_fastq_gz: "Input gzipped FASTQ read two file to align with vg giraffe"
+        haploytype: "The haplotype information file"
+        kff: "The KFF file containing kmer counts"
+        sample_name: "The sample name to include"
+        read_group: "The read group"
+        output_name: "The name of the output alignment file"
+        output_format: {
+            description: "The output format for alignments",
+            options: [
+                "gam",
+                "gaf",
+                "json",
+                "tsv",
+                "SAM",
+                "BAM",
+                "CRAM",
+            ],
+        }
+        preset: {
+            description: "vg giraffe preset for alignment",
+            options: [
+                "chaining-sr",
+                "default",
+                "fast",
+                "hifi",
+                "r10",
+                "srold",
+            ],
+        }
+        threads: "Number of threads to use for alignment"
+        modify_disk_size_gb: "Additional disk space to allocate (in GB)"
+    }
+
+    input {
+        File read_one_fastq_gz
+        File gbz_graph
+        File minimizer_index
+        File zipcode_name
+        File distance_index
+        File? read_two_fastq_gz
+        File? haploytype
+        File? kff
+        String? sample_name
+        String? read_group
+        String output_name = "aligned.bam"
+        String output_format = "BAM"
+        String preset = "default"
+        Int threads = 4
+        Int modify_disk_size_gb = 0
+    }
+
+    Int disk_size_gb = ceil((
+            size(read_one_fastq_gz, "GiB") + size(read_two_fastq_gz, "GiB")
+        ) * 2)
+        + ceil(size(gbz_graph, "GiB"))
+        + ceil(size(minimizer_index, "GiB"))
+        + ceil(size(distance_index, "GiB"))
+        + ceil(size(zipcode_name, "GiB"))
+        + 10
+        + modify_disk_size_gb
+
+    command <<<
+        vg giraffe \
+            -t ~{threads} \
+            -Z "~{gbz_graph}" \
+            -m "~{minimizer_index}" \
+            -d "~{distance_index}" \
+            -z "~{zipcode_name}" \
+            -f "~{read_one_fastq_gz}" \
+            ~{if defined(read_two_fastq_gz) then "-f \"~{read_two_fastq_gz}\"" else ""} \
+            -o "~{output_format}" \
+            ~{if defined(sample_name) then "--sample \"~{sample_name}\"" else ""} \
+            ~{if defined(read_group) then "--read-group \"~{read_group}\"" else ""} \
+            ~{if defined(haploytype) then "--haplotype-name \"~{haploytype}\"" else ""} \
+            ~{if defined(kff) then "--kff-name \"~{kff}\"" else ""} \
+            --parameter-preset "~{preset}" \
+            > "~{output_name}"
+    >>>
+
+    output {
+        File alignments = "~{output_name}"
+    }
+
+    requirements {
+        container: "quay.io/biocontainers/vg:1.70.0--h9ee0642_0"
+        cpu: threads
+        memory: "120 GB"
+        disks: "~{disk_size_gb} GB"
+    }
+}
+
 task index {
     meta {
         description: "Index a reference genome for alignment with vg giraffe"

From a921e17f160ec24c8bbb432cf082c2532fb1b914 Mon Sep 17 00:00:00 2001
From: Andrew Thrasher <adthrasher@gmail.com>
Date: Tue, 9 Dec 2025 14:23:38 -0600
Subject: [PATCH 14/29] chore: avoid writing intermediate SAM to disk

---
 tools/minimap2.wdl | 8 +++-----
 1 file changed, 3 insertions(+), 5 deletions(-)

diff --git a/tools/minimap2.wdl b/tools/minimap2.wdl
index d953a5e39..3a8dd364d 100644
--- a/tools/minimap2.wdl
+++ b/tools/minimap2.wdl
@@ -89,12 +89,10 @@ task align {
             "~{reference_index}" \
             "~{read_one_fastq_gz}" \
             ~{if defined(read_two_fastq_gz) then "\"~{read_two_fastq_gz}\"" else ""} \
-            > output
-
-            if ~{output_paf}; then
-                mv output "~{output_name}"
+            | if ~{output_paf}; then
+                cat - > "~{output_name}"
             else
-                samtools view -b output > "~{output_name}"
+                samtools view -b - > "~{output_name}"
             fi
     >>>
 

From 2dc478a40cc8b05a350767768c6d555ba57d4db0 Mon Sep 17 00:00:00 2001
From: Andrew Thrasher <adthrasher@gmail.com>
Date: Fri, 12 Dec 2025 09:33:30 -0500
Subject: [PATCH 15/29] chore: use database prefix

---
 tools/hisat2.wdl | 14 +++++++++++---
 1 file changed, 11 insertions(+), 3 deletions(-)

diff --git a/tools/hisat2.wdl b/tools/hisat2.wdl
index bd06d2eaa..e61c75439 100644
--- a/tools/hisat2.wdl
+++ b/tools/hisat2.wdl
@@ -34,13 +34,21 @@ task align {
         + modify_disk_size_gb
 
     command <<<
+        set -euo pipefail
+
+        mkdir hisat2_db
+        tar -C hisat2_db -xzf "~{hisat2_db_tar_gz}" --no-same-owner
+        PREFIX=$(basename hisat2_db/*.1.ht2 ".1.ht2")
+
         hisat2 \
             -q \
             -p ~{threads} \
-            -x "~{reference_index}" \
+            -S "~{output_name}" \
+            -x "$PREFIX" \
             -1 "~{read_one_fastq_gz}" \
-            ~{if defined(read_two_fastq_gz) then "-2 \"~{read_two_fastq_gz}\"" else ""} \
-            -S "~{output_name}"
+            ~{if defined(read_two_fastq_gz) then "-2 \"~{read_two_fastq_gz}\"" else ""}
+
+        rm -r hisat2_db
     >>>
 
     output {

From 1c832fcc43fd2b078ca6d949c8c92d5eed0d4f7c Mon Sep 17 00:00:00 2001
From: Andrew Thrasher <adthrasher@gmail.com>
Date: Fri, 12 Dec 2025 13:23:10 -0600
Subject: [PATCH 16/29] chore: bump resources for azure

---
 tools/hisat2.wdl | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/tools/hisat2.wdl b/tools/hisat2.wdl
index e61c75439..56d143556 100644
--- a/tools/hisat2.wdl
+++ b/tools/hisat2.wdl
@@ -29,7 +29,7 @@ task align {
     Int disk_size_gb = ceil((
             size(read_one_fastq_gz, "GiB") + size(read_two_fastq_gz, "GiB")
         ) * 2)
-        + ceil(size(reference_index, "GiB"))
+        + ceil(size(reference_index, "GiB") * 5)
         + 10
         + modify_disk_size_gb
 
@@ -37,14 +37,14 @@ task align {
         set -euo pipefail
 
         mkdir hisat2_db
-        tar -C hisat2_db -xzf "~{hisat2_db_tar_gz}" --no-same-owner
+        tar -C hisat2_db -xzf "~{reference_index}" --no-same-owner
         PREFIX=$(basename hisat2_db/*.1.ht2 ".1.ht2")
 
         hisat2 \
             -q \
             -p ~{threads} \
             -S "~{output_name}" \
-            -x "$PREFIX" \
+            -x "hisat2_db/$PREFIX" \
             -1 "~{read_one_fastq_gz}" \
             ~{if defined(read_two_fastq_gz) then "-2 \"~{read_two_fastq_gz}\"" else ""}
 
@@ -57,7 +57,7 @@ task align {
 
     requirements {
         cpu: threads
-        memory: "16 GB"
+        memory: "64 GB"
         disks: "~{disk_size_gb} GB"
         container: "ghcr.io/stjudecloud/hisat2:branch-minimap2-2.2.1-0"
     }

From bd841324406b61c7fd5ddf842c8228496a17d943 Mon Sep 17 00:00:00 2001
From: Andrew Thrasher <adthrasher@gmail.com>
Date: Fri, 12 Dec 2025 14:28:50 -0500
Subject: [PATCH 17/29] chore: format+lint

---
 tools/hisat2.wdl | 10 +++++++---
 1 file changed, 7 insertions(+), 3 deletions(-)

diff --git a/tools/hisat2.wdl b/tools/hisat2.wdl
index 56d143556..a049ec07e 100644
--- a/tools/hisat2.wdl
+++ b/tools/hisat2.wdl
@@ -4,7 +4,7 @@ task align {
     meta {
         description: "Align RNA-seq reads against a reference genome using HISAT2"
         outputs: {
-            alignments: "The output alignment file in SAM format"
+            alignments: "The output alignment file in SAM format",
         }
     }
 
@@ -67,7 +67,7 @@ task index {
     meta {
         description: "Index a reference genome for alignment with HISAT2"
         outputs: {
-            reference_index: "The HISAT2 index files for the reference genome"
+            reference_index: "The HISAT2 index files for the reference genome",
         }
     }
 
@@ -156,7 +156,11 @@ task index {
             ~{if defined(repeat_ref) then "--repeat-ref \"~{repeat_ref}\"" else ""} \
             ~{if defined(repeat_info) then "--repeat-info \"~{repeat_info}\"" else ""} \
             ~{if defined(repeat_snp) then "--repeat-snp \"~{repeat_snp}\"" else ""} \
-            ~{if defined(repeat_haplotype) then "--repeat-haplotype \"~{repeat_haplotype}\"" else ""} \
+            ~{(
+                if defined(repeat_haplotype)
+                then "--repeat-haplotype \"~{repeat_haplotype}\""
+                else ""
+            )} \
             ~{if defined(seed) then "--seed \"~{seed}\"" else ""} \
             "$ref_fasta" \
             "~{index_base_name}"

From 8abaf9c35b7d2a81537ef0e411d35fd5ea4d9a52 Mon Sep 17 00:00:00 2001
From: Andrew Thrasher <adthrasher@gmail.com>
Date: Thu, 18 Dec 2025 08:09:43 -0600
Subject: [PATCH 18/29] chore: remove memory oversubscribe

---
 tools/bwamem2.wdl | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tools/bwamem2.wdl b/tools/bwamem2.wdl
index 15a499fd2..c34634a73 100644
--- a/tools/bwamem2.wdl
+++ b/tools/bwamem2.wdl
@@ -75,7 +75,7 @@ task align {
     requirements {
         container: "ghcr.io/stjudecloud/bwamem2:branch-minimap2-2.3-0"
         cpu: threads
-        memory: "~{disk_size_gb * 2} GB"
+        memory: "64 GB"
         disks: "~{disk_size_gb} GB"
     }
 }

From d42fbfb0e14867b77939912ba268ddcfb10ea8d3 Mon Sep 17 00:00:00 2001
From: Andrew Thrasher <adthrasher@gmail.com>
Date: Thu, 18 Dec 2025 13:52:47 -0500
Subject: [PATCH 19/29] feat: add strelka and manta wrappers

---
 tools/manta.wdl   | 125 ++++++++++++++++++++++++++++++++++++++
 tools/strelka.wdl | 151 ++++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 276 insertions(+)
 create mode 100644 tools/manta.wdl
 create mode 100644 tools/strelka.wdl

diff --git a/tools/manta.wdl b/tools/manta.wdl
new file mode 100644
index 000000000..29946b749
--- /dev/null
+++ b/tools/manta.wdl
@@ -0,0 +1,125 @@
+version 1.2
+
+task manta_germline {
+    meta {
+        description: "Run Manta structural variant and indel caller"
+        outputs: {
+            manta_output: "Directory containing Manta variant calls",
+            log_file: "Log file from the Manta workflow execution",
+        }
+    }
+
+    parameter_meta {
+        reference_fasta: "Reference genome in FASTA format"
+        bam: "Input BAM file with aligned reads"
+        output_dir: "Directory to store Manta output"
+        threads: "Number of threads to use"
+        modify_disk_size_gb: "Additional disk size in GB to allocate"
+    }
+
+    input {
+        File reference_fasta
+        File bam
+        String output_dir = "manta_output"
+        Int threads = 4
+        Int modify_disk_size_gb = 0
+    }
+
+    Int disk_size_gb = ceil(size(reference_fasta, "GiB") * 2)
+        + ceil(size(bam, "GiB"))
+        + 20
+        + modify_disk_size_gb
+
+    command <<<
+        set -euo pipefail
+
+        ref_fasta=~{basename(reference_fasta, ".gz")}
+        gunzip -c "~{reference_fasta}" > "$ref_fasta" \
+            || ln -sf "~{reference_fasta}" "$ref_fasta"
+
+        configManta.py \
+            --bam "~{bam}" \
+            --referenceFasta "$ref_fasta" \
+            --runDir "~{output_dir}"
+
+        "~{output_dir}/runWorkflow.py" -j "~{threads}"
+
+        rm -rf "$ref_fasta"
+    >>>
+
+    output {
+        Directory manta_output = output_dir
+        File log_file = "~{output_dir}/manta.log"
+    }
+
+    requirements {
+        container: "quay.io/biocontainers/manta:1.6.0--py27h9948957_6"
+        cpu: threads
+        memory: "25 GB"
+        disks: "~{disk_size_gb} GB"
+    }
+}
+
+task manta_somatic {
+    meta {
+        description: "Run Manta structural variant and indel caller in somatic mode"
+        outputs: {
+            manta_output: "Directory containing Manta variant calls",
+            log_file: "Log file from the Manta workflow execution",
+        }
+    }
+
+    parameter_meta {
+        reference_fasta: "Reference genome in FASTA format"
+        tumor_bam: "Input BAM file with aligned reads from tumor sample"
+        normal_bam: "Input BAM file with aligned reads from normal sample"
+        output_dir: "Directory to store Manta output"
+        threads: "Number of threads to use"
+        modify_disk_size_gb: "Additional disk size in GB to allocate"
+    }
+
+    input {
+        File reference_fasta
+        File tumor_bam
+        File normal_bam
+        String output_dir = "manta_output"
+        Int threads = 4
+        Int modify_disk_size_gb = 0
+    }
+
+    Int disk_size_gb = ceil(size(reference_fasta, "GiB") * 2)
+        + ceil(size(tumor_bam, "GiB"))
+        + ceil(size(normal_bam, "GiB"))
+        + 20
+        + modify_disk_size_gb
+
+    command <<<
+        set -euo pipefail
+
+        ref_fasta=~{basename(reference_fasta, ".gz")}
+        gunzip -c "~{reference_fasta}" > "$ref_fasta" \
+            || ln -sf "~{reference_fasta}" "$ref_fasta"
+
+        configManta.py \
+            --normalBam "~{normal_bam}" \
+            --tumorBam "~{tumor_bam}" \
+            --referenceFasta "$ref_fasta" \
+            --runDir "~{output_dir}"
+
+        "~{output_dir}/runWorkflow.py" -j "~{threads}"
+
+        rm -rf "$ref_fasta"
+    >>>
+
+    output {
+        Directory manta_output = output_dir
+        File log_file = "~{output_dir}/workspace/pyflow.data/logs/pyflow_log.txt"
+    }
+
+    requirements {
+        container: "quay.io/biocontainers/manta:1.6.0--py27h9948957_6"
+        cpu: threads
+        memory: "25 GB"
+        disks: "~{disk_size_gb} GB"
+    }
+}
diff --git a/tools/strelka.wdl b/tools/strelka.wdl
new file mode 100644
index 000000000..b986693a4
--- /dev/null
+++ b/tools/strelka.wdl
@@ -0,0 +1,151 @@
+version 1.2
+
+task somatic {
+    meta {
+        description: "Run Strelka somatic variant calling workflow"
+        outputs: {
+            strelka_output: "Directory containing Strelka somatic variant calls",
+            log_file: "Log file from the Strelka workflow execution",
+        }
+    }
+
+    parameter_meta {
+        reference_fasta: "Reference genome in FASTA format"
+        normal_bam: "Input BAM file with aligned reads for normal sample"
+        tumor_bam: "Input BAM file with aligned reads for tumor sample"
+        indel_candidates: "Optional VCF file with candidate indels, recommended to be generated by Manta"
+        output_dir: "Directory to store Strelka output"
+        exome: "Boolean indicating if the data is exome sequencing"
+        rna: "Boolean indicating if the data is RNA sequencing"
+        threads: "Number of threads to use"
+        modify_disk_size_gb: "Additional disk size in GB to allocate"
+    }
+
+    input {
+        File reference_fasta
+        File normal_bam
+        File tumor_bam
+        File? indel_candidates
+        String output_dir = "strelka_somatic_output"
+        Boolean exome = false
+        Boolean rna = false
+        Int threads = 4
+        Int modify_disk_size_gb = 0
+    }
+
+    Int disk_size_gb = ceil(size(reference_fasta, "GiB") * 2)
+        + ceil(size(normal_bam, "GiB"))
+        + ceil(size(tumor_bam, "GiB"))
+        + (
+            if defined(indel_candidates)
+            then ceil(size(indel_candidates, "GiB"))
+            else 0
+        )
+        + 20
+        + modify_disk_size_gb
+
+    command <<<
+        set -euo pipefail
+
+        ref_fasta=~{basename(reference_fasta, ".gz")}
+        gunzip -c "~{reference_fasta}" > "$ref_fasta" \
+            || ln -sf "~{reference_fasta}" "$ref_fasta"
+
+        configureStrelkaSomaticWorkflow.py \
+            --referenceFasta "$ref_fasta" \
+            --runDir "~{output_dir}" \
+            --tumorBam "~{tumor_bam}" \
+            --normalBam "~{normal_bam}" \
+            ~{if (exome) then "--exome" else ""} \
+            ~{if (rna) then "--rna" else ""} \
+            ~{(
+                if (defined(indel_candidates))
+                then "--indelCandidates '~{indel_candidates}'"
+                else ""
+            )}
+
+        
+        "~{output_dir}/runWorkflow.py" -m local -j ~{threads}
+
+        rm -rf "$ref_fasta"
+    >>>
+
+    output {
+        Directory strelka_output = output_dir
+        File log_file = "~{output_dir}/workspace/pyflow.data/logs/pyflow_log.txt"
+    }
+
+    requirements {
+        container: "quay.io/biocontainers/strelka:2.9.10--hdfd78af_2"
+        cpu: threads
+        memory: "25 GB"
+        disks: "~{disk_size_gb} GB"
+    }
+}
+
+task germline {
+    meta {
+        description: "Run Strelka germline variant calling workflow"
+        outputs: {
+            strelka_output: "Directory containing Strelka germline variant calls",
+            log_file: "Log file from the Strelka workflow execution",
+        }
+    }
+
+    parameter_meta {
+        reference_fasta: "Reference genome in FASTA format"
+        bam: "Input BAM file with aligned reads"
+        output_dir: "Directory to store Strelka output"
+        exome: "Boolean indicating if the data is exome sequencing"
+        rna: "Boolean indicating if the data is RNA sequencing"
+        threads: "Number of threads to use"
+        modify_disk_size_gb: "Additional disk size in GB to allocate"
+    }
+
+    input {
+        File reference_fasta
+        File bam
+        String output_dir = "strelka_germline_output"
+        Boolean exome = false
+        Boolean rna = false
+        Int threads = 4
+        Int modify_disk_size_gb = 0
+    }
+
+    Int disk_size_gb = ceil(size(reference_fasta, "GiB") * 2)
+        + ceil(size(bam, "GiB"))
+        + 20
+        + modify_disk_size_gb
+
+    command <<<
+        set -euo pipefail
+
+        ref_fasta=~{basename(reference_fasta, ".gz")}
+        gunzip -c "~{reference_fasta}" > "$ref_fasta" \
+            || ln -sf "~{reference_fasta}" "$ref_fasta"
+
+        configureStrelkaGermlineWorkflow.py \
+            --referenceFasta "$ref_fasta" \
+            --runDir "~{output_dir}" \
+            --bam "~{bam}" \
+            ~{if (exome) then "--exome" else ""} \
+            ~{if (rna) then "--rna" else ""}
+
+        
+        "~{output_dir}/runWorkflow.py" -m local -j ~{threads}
+
+        rm -rf "$ref_fasta"
+    >>>
+
+    output {
+        Directory strelka_output = output_dir
+        File log_file = "~{output_dir}/workspace/pyflow.data/logs/pyflow_log.txt"
+    }
+
+    requirements {
+        container: "quay.io/biocontainers/strelka:2.9.10--hdfd78af_2"
+        cpu: threads
+        memory: "25 GB"
+        disks: "~{disk_size_gb} GB"
+    }
+}

From 589190dc9061fa72bedf9aa90b5832904038ce52 Mon Sep 17 00:00:00 2001
From: Andrew Thrasher <adthrasher@gmail.com>
Date: Thu, 18 Dec 2025 14:25:59 -0500
Subject: [PATCH 20/29] feat: add clair3 wrapper

---
 tools/clair3.wdl | 90 ++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 90 insertions(+)
 create mode 100644 tools/clair3.wdl

diff --git a/tools/clair3.wdl b/tools/clair3.wdl
new file mode 100644
index 000000000..6b23f1de7
--- /dev/null
+++ b/tools/clair3.wdl
@@ -0,0 +1,90 @@
+version 1.2
+
+task clair3 {
+    meta {
+        description: "Run Clair3 variant caller for small variants using deep neural networks"
+        outputs: {
+            pileup_vcf: "VCF file with variants called using pileup model",
+            full_alignment_vcf: "VCF file with variants called using full-alignment model",
+            merged_vcf: "Final merged VCF file with variants from both models",
+        }
+    }
+
+    parameter_meta {
+        reference_fasta: "Reference genome in FASTA format"
+        bam: "Input BAM file with aligned reads"
+        model: "Pre-trained Clair3 model to use for variant calling"
+        bed_regions: "Optional BED file specifying regions to call variants in"
+        vcf_candidates: "Optional VCF file with candidate variants to consider"
+        output_dir: "Directory to store Clair3 output"
+        platform: {
+            description: "Sequencing platform used to generate the reads",
+            choices: [
+                "ont",
+                "hifi",
+                "ilmn",
+            ],
+        }
+        all_contigs: "Boolean indicating whether to include all contigs in variant calling. If false only chr{1..22,X,Y} are called."
+        print_ref_calls: "Boolean indicating whether to print reference calls in the output VCF"
+        gvcf: "Boolean indicating whether to output gVCF format"
+        threads: "Number of threads to use"
+        modify_disk_size_gb: "Additional disk size in GB to allocate"
+    }
+
+    input {
+        File reference_fasta
+        File bam
+        File model
+        File? bed_regions
+        File? vcf_candidates
+        String output_dir = "clair3_output"
+        String platform = "ilmn"
+        Boolean all_contigs = false
+        Boolean print_ref_calls = false
+        Boolean gvcf = false
+        Int threads = 4
+        Int modify_disk_size_gb = 0
+    }
+
+    Int disk_size_gb = ceil(size(reference_fasta, "GiB") * 2)
+        + ceil(size(bam, "GiB"))
+        + 20
+        + modify_disk_size_gb
+
+    command <<<
+        set -euo pipefail
+
+        ref_fasta=~{basename(reference_fasta, ".gz")}
+        gunzip -c "~{reference_fasta}" > "$ref_fasta" \
+            || ln -sf "~{reference_fasta}" "$ref_fasta"
+
+        ./run_clair3.sh \
+            --bam_fn="~{bam}" \
+            --ref_fn="$ref_fasta" \
+            --threads="~{threads}" \
+            --platform="~{platform}" \
+            --model_path="~{model}" \
+            --output="~{output_dir}" \
+            ~{if all_contigs then "--include_all_ctgs" else ""} \
+            ~{if print_ref_calls then "--print_ref_calls" else ""} \
+            ~{if defined(bed_regions) then "--bed_fn='~{bed_regions}'" else ""} \
+            ~{if defined(vcf_candidates) then "--vcf_fn='~{vcf_candidates}'" else ""} \
+            ~{if gvcf then "--gvcf" else ""}
+
+        rm -rf "$ref_fasta"
+    >>>
+
+    output {
+        File pileup_vcf = "~{output_dir}/pileup.vcf.gz"
+        File full_alignment_vcf = "~{output_dir}/full_alignment.vcf.gz"
+        File merged_vcf = "~{output_dir}/merge_output.vcf.gz"
+    }
+
+    requirements {
+        container: "quay.io/biocontainers/clair3:1.2.0--py310h779eee5_0"
+        cpu: threads
+        memory: "16 GB"
+        disks: "~{disk_size_gb} GB"
+    }
+}

From efb1d50cc9d9542d309795e79b7d471ef51f5dfc Mon Sep 17 00:00:00 2001
From: Andrew Thrasher <adthrasher@gmail.com>
Date: Thu, 18 Dec 2025 14:51:13 -0500
Subject: [PATCH 21/29] chore: fix invocation

---
 tools/{clair3.wdl => clair.wdl} | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)
 rename tools/{clair3.wdl => clair.wdl} (99%)

diff --git a/tools/clair3.wdl b/tools/clair.wdl
similarity index 99%
rename from tools/clair3.wdl
rename to tools/clair.wdl
index 6b23f1de7..20225f61c 100644
--- a/tools/clair3.wdl
+++ b/tools/clair.wdl
@@ -59,7 +59,7 @@ task clair3 {
         gunzip -c "~{reference_fasta}" > "$ref_fasta" \
             || ln -sf "~{reference_fasta}" "$ref_fasta"
 
-        ./run_clair3.sh \
+        run_clair3.sh \
             --bam_fn="~{bam}" \
             --ref_fn="$ref_fasta" \
             --threads="~{threads}" \

From 25681c72a9546488bed2848dedb3a8178f2126c6 Mon Sep 17 00:00:00 2001
From: Andrew Thrasher <adthrasher@gmail.com>
Date: Thu, 18 Dec 2025 15:48:41 -0500
Subject: [PATCH 22/29] feat: add NGSEP wrapper

---
 docker/ngsep/Dockerfile   |  5 ++++
 docker/ngsep/package.json |  5 ++++
 tools/ngsep.wdl           | 59 +++++++++++++++++++++++++++++++++++++++
 3 files changed, 69 insertions(+)
 create mode 100644 docker/ngsep/Dockerfile
 create mode 100644 docker/ngsep/package.json
 create mode 100644 tools/ngsep.wdl

diff --git a/docker/ngsep/Dockerfile b/docker/ngsep/Dockerfile
new file mode 100644
index 000000000..d7c3ada65
--- /dev/null
+++ b/docker/ngsep/Dockerfile
@@ -0,0 +1,5 @@
+FROM eclipse-temurin:8
+
+RUN wget https://github.com/NGSEP/NGSEPcore/releases/download/v5.1.0/NGSEPcore_5.1.0.jar -O /usr/local/bin/NGSEPcore.jar
+
+ENTRYPOINT [ "java", "-jar", "/usr/local/bin/NGSEPcore.jar" ]
\ No newline at end of file
diff --git a/docker/ngsep/package.json b/docker/ngsep/package.json
new file mode 100644
index 000000000..19a575139
--- /dev/null
+++ b/docker/ngsep/package.json
@@ -0,0 +1,5 @@
+{
+    "name": "ngsep",
+    "version": "5.1.0",
+    "revision": "0"
+}
diff --git a/tools/ngsep.wdl b/tools/ngsep.wdl
new file mode 100644
index 000000000..bf93f9472
--- /dev/null
+++ b/tools/ngsep.wdl
@@ -0,0 +1,59 @@
+version 1.2
+
+task germline_variant {
+    meta {
+        description: "Call germline variants using NGSEP"
+        outputs: {
+            vcf_output: "VCF file containing called germline variants"
+        }
+    }
+
+    parameter_meta {
+        reference_fasta: "Reference genome in FASTA format"
+        bam: "Input BAM file with aligned reads"
+        output_prefix: "Prefix for the output file with called variants"
+        threads: "Number of threads to use"
+        modify_disk_size_gb: "Additional disk size in GB to allocate"
+    }
+
+    input {
+        File reference_fasta
+        File bam
+        String output_prefix = "ngsep_germline_output"
+        Int threads = 4
+        Int modify_disk_size_gb = 0
+    }
+
+    Int disk_size_gb = ceil(size(reference_fasta, "GiB") * 2)
+        + ceil(size(bam, "GiB"))
+        + 20
+        + modify_disk_size_gb
+
+    command <<<
+        set -euo pipefail
+
+        ref_fasta=~{basename(reference_fasta, ".gz")}
+        gunzip -c "~{reference_fasta}" > "$ref_fasta" \
+            || ln -sf "~{reference_fasta}" "$ref_fasta"
+
+        java -Xmx16g -jar /usr/local/bin/NGSEPcore.jar \
+            SingleSampleVariantsDetector \
+            -r "$ref_fasta" \
+            -i "~{bam}" \
+            -o "~{output_prefix}" \
+            -t "~{threads}"
+
+        rm -rf "$ref_fasta"
+    >>>
+
+    output {
+        Array[File] vcf_output = glob("~{output_prefix}*")
+    }
+
+    requirements {
+        container: "ghcr.io/stjude/ngsep:5.1.0-0"
+        cpu: threads
+        memory: "20 GB"
+        disks: "~{disk_size_gb} GB"
+    }
+}

From 1083326853229804e8d2675c0f353a0800c412de Mon Sep 17 00:00:00 2001
From: Andrew Thrasher <adthrasher@gmail.com>
Date: Thu, 18 Dec 2025 17:27:15 -0500
Subject: [PATCH 23/29] feat: add deepsomatic and deepvariant wrappers with GPU
 support

---
 tools/deepvariant.wdl | 216 ++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 216 insertions(+)
 create mode 100644 tools/deepvariant.wdl

diff --git a/tools/deepvariant.wdl b/tools/deepvariant.wdl
new file mode 100644
index 000000000..c5c1a3ad9
--- /dev/null
+++ b/tools/deepvariant.wdl
@@ -0,0 +1,216 @@
+version 1.2
+
+task deepsomatic {
+    meta {
+        description: "Call variants using DeepSomatic"
+        outputs: {
+            vcf_output: "VCF file containing called somatic variants",
+            gvcf_output: "gVCF file containing called somatic variants",
+            runtime: "Optional HTML report of runtime metrics",
+            vcf_stats: "Optional HTML report of VCF statistics",
+        }
+    }
+
+    parameter_meta {
+        reference_fasta: "Reference genome in FASTA format"
+        reference_fasta_index: "Index file for the reference genome FASTA"
+        tumor_bam: "Input BAM file with aligned reads for tumor sample"
+        normal_bam: "Input BAM file with aligned reads for normal sample"
+        output_prefix: "Prefix for output VCF and gVCF files"
+        tumor_sample_name: "Sample name for the tumor sample"
+        normal_sample_name: "Sample name for the normal sample"
+        model_type: {
+            description: "Type of model to use for variant calling",
+            choices: [
+                "WGS",
+                "WES",
+                "PACBIO",
+                "ONT",
+                "FFPE_WGS",
+                "FFPE_WES",
+                "FFPE_WGS_TUMOR_ONLY",
+                "FFPE_WES_TUMOR_ONLY",
+                "WGS_TUMOR_ONLY",
+                "WES_TUMOR_ONLY",
+                "PACBIO_TUMOR_ONLY",
+                "ONT_TUMOR_ONLY",
+            ],
+        }
+        runtime_report: "Output make_examples_somatic runtime metrics and create a visual runtime report using runtime_by_region_vis."
+        vcf_stats_report: "Output a visual report (HTML) of statistics about the output VCF."
+        threads: "Number of threads to use"
+        modify_disk_size_gb: "Additional disk size in GB to allocate"
+    }
+
+    input {
+        File reference_fasta
+        File reference_fasta_index
+        File tumor_bam
+        File normal_bam
+        String output_prefix = "deepsomatic_output"
+        String tumor_sample_name = "tumor"
+        String normal_sample_name = "normal"
+        String model_type = "WGS"
+        Boolean runtime_report = false
+        Boolean vcf_stats_report = false
+        Int threads = 8
+        Int modify_disk_size_gb = 0
+    }
+
+    Int disk_size_gb = ceil(size(reference_fasta, "GiB") * 2)
+        + ceil(size(tumor_bam, "GiB"))
+        + ceil(size(normal_bam, "GiB"))
+        + 50
+        + modify_disk_size_gb
+
+    command <<<
+        set -euo pipefail
+
+        ref_fasta=~{basename(reference_fasta, ".gz")}
+        gunzip -c "~{reference_fasta}" > "$ref_fasta" \
+            || ln -sf "~{reference_fasta}" "$ref_fasta"
+        ln -sf "~{reference_fasta_index}" "$ref_fasta.fai"
+
+        run_deepsomatic \
+            --model_type="~{model_type}" \
+            --ref="$ref_fasta" \
+            --tumor_bam="~{tumor_bam}" \
+            --normal_bam="~{normal_bam}" \
+            --output_vcf="~{output_prefix}.vcf.gz" \
+            --output_gvcf="~{output_prefix}.g.vcf.gz" \
+            --tumor_sample_name="~{tumor_sample_name}" \
+            --normal_sample_name="~{normal_sample_name}" \
+            --num_shards="~{threads}" \
+            --logging_dir="logs" \
+            --intermediate_results_dir="intermediate_results" \
+            ~{if runtime_report then "--runtime_report" else ""} \
+            ~{if vcf_stats_report then "--vcf_stats_report" else ""}
+
+
+        rm -rf "$ref_fasta"
+
+    >>>
+
+    output {
+        File vcf_output = "~{output_prefix}.vcf.gz"
+        File gvcf_output = "~{output_prefix}.g.vcf.gz"
+        File? runtime = "logs/runtime_by_region_vis.html"
+        File? vcf_stats = "logs/vcf_stats_report.html"
+    }
+
+    requirements {
+        container: "google/deepsomatic:1.9.0-gpu"
+        cpu: threads
+        memory: "32 GB"
+        disks: "~{disk_size_gb} GB"
+        gpu: true
+    }
+
+    hints {
+        gpu: 1
+    }
+}
+
+task deepvariant {
+    meta {
+        description: "Call variants using DeepVariant"
+        outputs: {
+            vcf_output: "VCF file containing called variants",
+            gvcf_output: "gVCF file containing called variants",
+            runtime: "Optional HTML report of runtime metrics",
+            vcf_stats: "Optional HTML report of VCF statistics",
+        }
+    }
+
+    parameter_meta {
+        reference_fasta: "Reference genome in FASTA format"
+        reference_fasta_index: "Index file for the reference genome FASTA"
+        bam: "Input BAM file with aligned reads for sample"
+        haploid_chromosomes: "List of chromosomes to be treated as haploid during variant calling"
+        output_prefix: "Prefix for output VCF and gVCF files"
+        model_type: {
+            description: "Type of model to use for variant calling",
+            choices: [
+                "WGS",
+                "WES",
+                "PACBIO",
+                "ONT",
+                "FFPE_WGS",
+                "FFPE_WES",
+                "FFPE_WGS_TUMOR_ONLY",
+                "FFPE_WES_TUMOR_ONLY",
+                "WGS_TUMOR_ONLY",
+                "WES_TUMOR_ONLY",
+                "PACBIO_TUMOR_ONLY",
+                "ONT_TUMOR_ONLY",
+            ],
+        }
+        runtime_report: "Output make_examples_somatic runtime metrics and create a visual runtime report using runtime_by_region_vis."
+        vcf_stats_report: "Output a visual report (HTML) of statistics about the output VCF."
+        threads: "Number of threads to use"
+        modify_disk_size_gb: "Additional disk size in GB to allocate"
+    }
+
+    input {
+        File reference_fasta
+        File reference_fasta_index
+        File bam
+        Array[String] haploid_chromosomes = ["chrX", "chrY"]
+        String output_prefix = "deepsomatic_output"
+        String model_type = "WGS"
+        Boolean runtime_report = false
+        Boolean vcf_stats_report = false
+        Int threads = 8
+        Int modify_disk_size_gb = 0
+    }
+
+    Int disk_size_gb = ceil(size(reference_fasta, "GiB") * 2)
+        + ceil(size(bam, "GiB"))
+        + 50
+        + modify_disk_size_gb
+
+    command <<<
+        set -euo pipefail
+
+        ref_fasta=~{basename(reference_fasta, ".gz")}
+        gunzip -c "~{reference_fasta}" > "$ref_fasta" \
+            || ln -sf "~{reference_fasta}" "$ref_fasta"
+        ln -sf "~{reference_fasta_index}" "$ref_fasta.fai"
+
+        run_deepvariant \
+            --model_type="~{model_type}" \
+            --ref="$ref_fasta" \
+            --reads="~{bam}" \
+            --output_vcf="~{output_prefix}.vcf.gz" \
+            --output_gvcf="~{output_prefix}.g.vcf.gz" \
+            --num_shards="~{threads}" \
+            --logging_dir="logs" \
+            --intermediate_results_dir="intermediate_results" \
+            ~{if runtime_report then "--runtime_report" else ""} \
+            ~{if vcf_stats_report then "--vcf_stats_report" else ""} \
+            --haploid_contigs="~{sep(",", haploid_chromosomes)}"
+
+
+        rm -rf "$ref_fasta"
+
+    >>>
+
+    output {
+        File vcf_output = "~{output_prefix}.vcf.gz"
+        File gvcf_output = "~{output_prefix}.g.vcf.gz"
+        File? runtime = "logs/runtime_by_region_vis.html"
+        File? vcf_stats = "logs/vcf_stats_report.html"
+    }
+
+    requirements {
+        container: "google/deepvariant:1.9.0-gpu"
+        cpu: threads
+        memory: "32 GB"
+        disks: "~{disk_size_gb} GB"
+        gpu: true
+    }
+
+    hints {
+        gpu: 1
+    }
+}

From 30d11593c4ecf571ce4fb9b07bf03b1367e4cd76 Mon Sep 17 00:00:00 2001
From: Andrew Thrasher <adthrasher@gmail.com>
Date: Fri, 19 Dec 2025 12:45:46 -0500
Subject: [PATCH 24/29] chore: change hisat2 output to BAM

---
 tools/hisat2.wdl | 8 +++++---
 1 file changed, 5 insertions(+), 3 deletions(-)

diff --git a/tools/hisat2.wdl b/tools/hisat2.wdl
index a049ec07e..9e9dcb250 100644
--- a/tools/hisat2.wdl
+++ b/tools/hisat2.wdl
@@ -21,7 +21,7 @@ task align {
         File read_one_fastq_gz
         File reference_index
         File? read_two_fastq_gz
-        String output_name = "aligned.sam"
+        String output_name = "aligned.bam"
         Int threads = 4
         Int modify_disk_size_gb = 0
     }
@@ -40,13 +40,15 @@ task align {
         tar -C hisat2_db -xzf "~{reference_index}" --no-same-owner
         PREFIX=$(basename hisat2_db/*.1.ht2 ".1.ht2")
 
+        mkfifo hisat2_stdout_pipe
         hisat2 \
             -q \
             -p ~{threads} \
-            -S "~{output_name}" \
+            -S hisat2_stdout_pipe \
             -x "hisat2_db/$PREFIX" \
             -1 "~{read_one_fastq_gz}" \
-            ~{if defined(read_two_fastq_gz) then "-2 \"~{read_two_fastq_gz}\"" else ""}
+            ~{if defined(read_two_fastq_gz) then "-2 \"~{read_two_fastq_gz}\"" else ""} &
+        samtools view -bS hisat2_stdout_pipe > "~{output_name}"
 
         rm -r hisat2_db
     >>>

From e58c2edecbe0c6b65cca1ced3ff650a32d4cf39d Mon Sep 17 00:00:00 2001
From: Andrew Thrasher <adthrasher@gmail.com>
Date: Fri, 19 Dec 2025 12:47:43 -0500
Subject: [PATCH 25/29] chore: write to stdout instead of fifo

---
 tools/hisat2.wdl | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

diff --git a/tools/hisat2.wdl b/tools/hisat2.wdl
index 9e9dcb250..aa3077859 100644
--- a/tools/hisat2.wdl
+++ b/tools/hisat2.wdl
@@ -40,15 +40,13 @@ task align {
         tar -C hisat2_db -xzf "~{reference_index}" --no-same-owner
         PREFIX=$(basename hisat2_db/*.1.ht2 ".1.ht2")
 
-        mkfifo hisat2_stdout_pipe
         hisat2 \
             -q \
             -p ~{threads} \
-            -S hisat2_stdout_pipe \
             -x "hisat2_db/$PREFIX" \
             -1 "~{read_one_fastq_gz}" \
-            ~{if defined(read_two_fastq_gz) then "-2 \"~{read_two_fastq_gz}\"" else ""} &
-        samtools view -bS hisat2_stdout_pipe > "~{output_name}"
+            ~{if defined(read_two_fastq_gz) then "-2 \"~{read_two_fastq_gz}\"" else ""} \
+            | samtools view -bS - > "~{output_name}"
 
         rm -r hisat2_db
     >>>

From e680a5b4248e69ecfe9d58145d0dbef7489df3d0 Mon Sep 17 00:00:00 2001
From: Andrew Thrasher <adthrasher@gmail.com>
Date: Mon, 22 Dec 2025 16:07:33 -0500
Subject: [PATCH 26/29] chore: add undocumented FAI requirement

---
 tools/strelka.wdl | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/tools/strelka.wdl b/tools/strelka.wdl
index b986693a4..73b0236b0 100644
--- a/tools/strelka.wdl
+++ b/tools/strelka.wdl
@@ -11,6 +11,7 @@ task somatic {
 
     parameter_meta {
         reference_fasta: "Reference genome in FASTA format"
+        reference_fasta_index: "Index file for the reference genome FASTA"
         normal_bam: "Input BAM file with aligned reads for normal sample"
         tumor_bam: "Input BAM file with aligned reads for tumor sample"
         indel_candidates: "Optional VCF file with candidate indels, recommended to be generated by Manta"
@@ -23,6 +24,7 @@ task somatic {
 
     input {
         File reference_fasta
+        File reference_fasta_index
         File normal_bam
         File tumor_bam
         File? indel_candidates
@@ -50,6 +52,7 @@ task somatic {
         ref_fasta=~{basename(reference_fasta, ".gz")}
         gunzip -c "~{reference_fasta}" > "$ref_fasta" \
             || ln -sf "~{reference_fasta}" "$ref_fasta"
+        ln -sf "~{reference_fasta_index}" "$ref_fasta.fai"
 
         configureStrelkaSomaticWorkflow.py \
             --referenceFasta "$ref_fasta" \
@@ -94,6 +97,7 @@ task germline {
 
     parameter_meta {
         reference_fasta: "Reference genome in FASTA format"
+        reference_fasta_index: "Index file for the reference genome FASTA"
         bam: "Input BAM file with aligned reads"
         output_dir: "Directory to store Strelka output"
         exome: "Boolean indicating if the data is exome sequencing"
@@ -104,6 +108,7 @@ task germline {
 
     input {
         File reference_fasta
+        File reference_fasta_index
         File bam
         String output_dir = "strelka_germline_output"
         Boolean exome = false
@@ -123,6 +128,7 @@ task germline {
         ref_fasta=~{basename(reference_fasta, ".gz")}
         gunzip -c "~{reference_fasta}" > "$ref_fasta" \
             || ln -sf "~{reference_fasta}" "$ref_fasta"
+        ln -sf "~{reference_fasta_index}" "$ref_fasta.fai"
 
         configureStrelkaGermlineWorkflow.py \
             --referenceFasta "$ref_fasta" \

From 60dce466b9861c5cfb5851852a4b0281af1d28c6 Mon Sep 17 00:00:00 2001
From: Andrew Thrasher <adthrasher@gmail.com>
Date: Tue, 23 Dec 2025 09:33:09 -0500
Subject: [PATCH 27/29] chore: add error checking to minimap2

---
 tools/minimap2.wdl | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/tools/minimap2.wdl b/tools/minimap2.wdl
index 3a8dd364d..d2118a630 100644
--- a/tools/minimap2.wdl
+++ b/tools/minimap2.wdl
@@ -74,6 +74,8 @@ task align {
         + modify_disk_size_gb
 
     command <<<
+        set -euo pipefail
+
         minimap2 \
             ~{if defined(preset) then "-x \"~{preset}\"" else ""} \
             ~{if output_paf then "" else "-a"} \

From f614485d9a24a43467fddc588d9a25d680427262 Mon Sep 17 00:00:00 2001
From: Andrew Thrasher <adthrasher@gmail.com>
Date: Thu, 15 Jan 2026 11:55:48 -0500
Subject: [PATCH 28/29] chore: cleanup reference files

---
 tools/bwamem2.wdl     | 4 ++++
 tools/deepvariant.wdl | 1 -
 tools/hisat2.wdl      | 2 ++
 tools/minimap2.wdl    | 4 +++-
 4 files changed, 9 insertions(+), 2 deletions(-)

diff --git a/tools/bwamem2.wdl b/tools/bwamem2.wdl
index c34634a73..765fc44a5 100644
--- a/tools/bwamem2.wdl
+++ b/tools/bwamem2.wdl
@@ -66,6 +66,8 @@ task align {
             "~{read_one_fastq_gz}" \
             ~{if defined(read_two_fastq_gz) then "\"~{read_two_fastq_gz}\"" else ""} |
         samtools view -b -o "~{output_name}" -
+
+        rm -r bwa_db
     >>>
 
     output {
@@ -115,6 +117,8 @@ task index {
             "$ref_fasta"
 
         tar -czf "~{bwa_db_out_name}" "$ref_fasta"*
+
+        rm -r "$ref_fasta"
     >>>
 
     output {
diff --git a/tools/deepvariant.wdl b/tools/deepvariant.wdl
index c5c1a3ad9..24d789fb6 100644
--- a/tools/deepvariant.wdl
+++ b/tools/deepvariant.wdl
@@ -192,7 +192,6 @@ task deepvariant {
 
 
         rm -rf "$ref_fasta"
-
     >>>
 
     output {
diff --git a/tools/hisat2.wdl b/tools/hisat2.wdl
index aa3077859..f63dd49b9 100644
--- a/tools/hisat2.wdl
+++ b/tools/hisat2.wdl
@@ -166,6 +166,8 @@ task index {
             "~{index_base_name}"
 
             tar -czf "~{index_base_name}.tar.gz" "~{index_base_name}"*
+
+            rm -r "$ref_fasta"
     >>>
 
     output {
diff --git a/tools/minimap2.wdl b/tools/minimap2.wdl
index d2118a630..41975e1ed 100644
--- a/tools/minimap2.wdl
+++ b/tools/minimap2.wdl
@@ -68,7 +68,7 @@ task align {
     Int disk_size_gb = ceil(
         (
             size(read_one_fastq_gz, "GiB") + size(read_two_fastq_gz, "GiB")
-        ) * 2)
+        ) * 3)
         + ceil(size(reference_index, "GiB"))
         + 10
         + modify_disk_size_gb
@@ -154,6 +154,8 @@ task index {
             -t ~{threads} \
             -d "~{index_name}" \
             "$ref_fasta"
+
+        rm -r "$ref_fasta"
     >>>
 
     output {

From 352fa6f11ef27a9856c63807eb396bfc2d1fcd7c Mon Sep 17 00:00:00 2001
From: Andrew Thrasher <adthrasher@gmail.com>
Date: Tue, 20 Jan 2026 11:44:08 -0500
Subject: [PATCH 29/29] chore: minimum cores to 1

---
 tools/bwa.wdl | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/tools/bwa.wdl b/tools/bwa.wdl
index dbba3f2e7..3c407a326 100644
--- a/tools/bwa.wdl
+++ b/tools/bwa.wdl
@@ -62,7 +62,7 @@ task bwa_aln {
             n_cores=$(nproc)
         fi
         # -1 because samtools uses one more core than `--threads` specifies
-        (( samtools_cores = n_cores - 1 ))
+        (( samtools_cores = n_cores - 1 || 1 ))
 
         mkdir bwa_db
         tar -C bwa_db -xzf "~{bwa_db_tar_gz}" --no-same-owner
@@ -160,7 +160,7 @@ task bwa_aln_pe {
             n_cores=$(nproc)
         fi
         # -1 because samtools uses one more core than `--threads` specifies
-        (( samtools_cores = n_cores - 1 ))
+        (( samtools_cores = n_cores - 1 || 1 ))
 
         mkdir bwa_db
         tar -C bwa_db -xzf "~{bwa_db_tar_gz}" --no-same-owner
@@ -257,7 +257,7 @@ task bwa_mem {
             n_cores=$(nproc)
         fi
         # -1 because samtools uses one more core than `--threads` specifies
-        (( samtools_cores = n_cores - 1 ))
+        (( samtools_cores = n_cores - 1 || 1 ))
 
         mkdir bwa_db
         tar -C bwa_db -xzf "~{bwa_db_tar_gz}" --no-same-owner