From becb1f75345dede71d0de7bbac4a7800982fb043 Mon Sep 17 00:00:00 2001
From: bbimber <bbimber@gmail.com>
Date: Fri, 13 Dec 2024 15:53:19 -0800
Subject: [PATCH 01/59] Support mean.var.plot for HVGs

---
 .../singlecell/pipeline/singlecell/NormalizeAndScale.java       | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/singlecell/src/org/labkey/singlecell/pipeline/singlecell/NormalizeAndScale.java b/singlecell/src/org/labkey/singlecell/pipeline/singlecell/NormalizeAndScale.java
index caa498999..2093d1338 100644
--- a/singlecell/src/org/labkey/singlecell/pipeline/singlecell/NormalizeAndScale.java
+++ b/singlecell/src/org/labkey/singlecell/pipeline/singlecell/NormalizeAndScale.java
@@ -22,7 +22,7 @@ public Provider()
         {
             super("NormalizeAndScale", "Normalize/Scale", "CellMembrane/Seurat", "This will run standard Seurat processing steps to normalize and scale the data.", Arrays.asList(
                     SeuratToolParameter.create("variableFeatureSelectionMethod", "Variable Feature Selection Method", "The value, passed directly to Seurat's FindVariableFeatures, variableFeatureSelectionMethod", "ldk-simplecombo", new JSONObject(){{
-                        put("storeValues", "vst");
+                        put("storeValues", "vst;mean.var.plot");
                         put("initialValues", "vst");
                         put("allowBlank", false);
                     }}, "vst"),

From 0f2f806a71a07ed03d000bec2e98701139153497 Mon Sep 17 00:00:00 2001
From: bbimber <bbimber@gmail.com>
Date: Mon, 16 Dec 2024 10:59:49 -0800
Subject: [PATCH 02/59] Add gt to install script

---
 .../pipeline_code/extra_tools_install.sh      | 20 ++++++++++++++++++-
 1 file changed, 19 insertions(+), 1 deletion(-)

diff --git a/SequenceAnalysis/pipeline_code/extra_tools_install.sh b/SequenceAnalysis/pipeline_code/extra_tools_install.sh
index 33686252b..983d326fc 100755
--- a/SequenceAnalysis/pipeline_code/extra_tools_install.sh
+++ b/SequenceAnalysis/pipeline_code/extra_tools_install.sh
@@ -228,4 +228,22 @@ then
     python3 -m pip install --user multiqc
 else
     echo "Already installed"
-fi
\ No newline at end of file
+fi
+
+
+if [[ ! -e ${LKTOOLS_DIR}/gt || ! -z $FORCE_REINSTALL ]];
+then
+    echo "Cleaning up previous installs"
+    rm -Rf gt*
+    rm -Rf $LKTOOLS_DIR/gt*
+
+    wget https://github.com/genometools/genometools/releases/download/v1.6.5/gt-1.6.5-Linux_x86_64-64bit-complete.tar.gz
+    tar -xf gt-1.6.5-Linux_x86_64-64bit-complete.tar.gz
+
+    install ./gt-1.6.5-Linux_x86_64-64bit-complete/bin/gt $LKTOOLS_DIR/
+    mv ./gt-1.6.5-Linux_x86_64-64bit-complete/gtdata $LKTOOLS_DIR/
+else
+    echo "Already installed"
+fi
+
+

From 8d262ce52b69f0ea9191d03fc2c52b12aba92b70 Mon Sep 17 00:00:00 2001
From: bbimber <bbimber@gmail.com>
Date: Tue, 17 Dec 2024 10:49:23 -0800
Subject: [PATCH 03/59] Support king-cutoff on Plink/PCA

---
 .../sequenceanalysis/run/variant/PlinkPcaStep.java    | 11 +++++++++++
 1 file changed, 11 insertions(+)

diff --git a/SequenceAnalysis/src/org/labkey/sequenceanalysis/run/variant/PlinkPcaStep.java b/SequenceAnalysis/src/org/labkey/sequenceanalysis/run/variant/PlinkPcaStep.java
index 0a2ab89f6..6e4c96dab 100644
--- a/SequenceAnalysis/src/org/labkey/sequenceanalysis/run/variant/PlinkPcaStep.java
+++ b/SequenceAnalysis/src/org/labkey/sequenceanalysis/run/variant/PlinkPcaStep.java
@@ -80,6 +80,10 @@ public Provider()
                         put("valueField", "application");
                         put("sortField", "application");
                     }}, null),
+                    ToolParameterDescriptor.create("kingCutoff", "KING Cutoff", "Can be used to prune close relations, identified using KING", "ldk_numberfield", new JSONObject(){{
+                        put("minValue", 0);
+                        put("maxValue", 1);
+                    }}, 0.25),
                     ToolParameterDescriptor.create("allowMissingSamples", "Allow Missing Samples", "When using split by application, this controls whether or not the job should fail if a matching readset cannot be found for specific samples.", "checkbox", null, false),
                     ToolParameterDescriptor.create("fileSets", "File Set(s) For Readset Query", "If Split By Application is used, the system needs to query each sample name in the VCF and find the corresponding readset. If this is provided, this query will be limited to redsets where a gVCF exists and is tagged as one of these file sets", "sequenceanalysis-trimmingtextarea", null, null),
                     ToolParameterDescriptor.create(SelectSamplesStep.SAMPLE_INCLUDE, "Sample(s) Include", "Only the following samples will be included in the analysis.", "sequenceanalysis-trimmingtextarea", null, null),
@@ -178,6 +182,13 @@ private void runBatch(File inputVCF, File outputDirectory, VariantProcessingStep
         String samplesToExclude = getProvider().getParameterByName(SelectSamplesStep.SAMPLE_EXCLUDE).extractValue(getPipelineCtx().getJob(), getProvider(), getStepIdx(), String.class);
         addSubjectSelectOptions(samplesToExclude, args, "--exclude", new File(outputDirectory, "samplesToExclude.txt"), output);
 
+        Double kingCutoff = getProvider().getParameterByName("kingCutoff").extractValue(getPipelineCtx().getJob(), getProvider(), getStepIdx(), Double.class);
+        if (kingCutoff != null)
+        {
+            args.add("--king-cutoff");
+            args.add(kingCutoff.toString());
+        }
+
         args.add("--vcf");
         args.add(inputVCF.getPath());
 

From 23865c4ba1cb8055c21da94c8de101adc9797b9c Mon Sep 17 00:00:00 2001
From: bbimber <bbimber@gmail.com>
Date: Tue, 17 Dec 2024 12:28:13 -0800
Subject: [PATCH 04/59] Fix typo

---
 .../org/labkey/sequenceanalysis/run/variant/PlinkPcaStep.java   | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/SequenceAnalysis/src/org/labkey/sequenceanalysis/run/variant/PlinkPcaStep.java b/SequenceAnalysis/src/org/labkey/sequenceanalysis/run/variant/PlinkPcaStep.java
index 6e4c96dab..01ce28d43 100644
--- a/SequenceAnalysis/src/org/labkey/sequenceanalysis/run/variant/PlinkPcaStep.java
+++ b/SequenceAnalysis/src/org/labkey/sequenceanalysis/run/variant/PlinkPcaStep.java
@@ -80,7 +80,7 @@ public Provider()
                         put("valueField", "application");
                         put("sortField", "application");
                     }}, null),
-                    ToolParameterDescriptor.create("kingCutoff", "KING Cutoff", "Can be used to prune close relations, identified using KING", "ldk_numberfield", new JSONObject(){{
+                    ToolParameterDescriptor.create("kingCutoff", "KING Cutoff", "Can be used to prune close relations, identified using KING", "ldk-numberfield", new JSONObject(){{
                         put("minValue", 0);
                         put("maxValue", 1);
                     }}, 0.25),

From c693c9186c5f9cb9ec390b0b0d1c634d90942a17 Mon Sep 17 00:00:00 2001
From: bbimber <bbimber@gmail.com>
Date: Tue, 17 Dec 2024 18:51:07 -0800
Subject: [PATCH 05/59] Allow KingInferenceStep to optionally add a pedigree

---
 .../PedigreeToolParameterDescriptor.java      | 19 ++++++++++--
 .../pipeline/ToolParameterDescriptor.java     |  2 +-
 .../pipeline/VariantProcessingStep.java       | 29 +++++++++++++++++--
 .../pipeline/ProcessVariantsHandler.java      |  8 +++--
 .../run/variant/KingInferenceStep.java        | 21 ++++++++++++--
 5 files changed, 68 insertions(+), 11 deletions(-)

diff --git a/SequenceAnalysis/api-src/org/labkey/api/sequenceanalysis/pipeline/PedigreeToolParameterDescriptor.java b/SequenceAnalysis/api-src/org/labkey/api/sequenceanalysis/pipeline/PedigreeToolParameterDescriptor.java
index 3bb26661b..c68087c10 100644
--- a/SequenceAnalysis/api-src/org/labkey/api/sequenceanalysis/pipeline/PedigreeToolParameterDescriptor.java
+++ b/SequenceAnalysis/api-src/org/labkey/api/sequenceanalysis/pipeline/PedigreeToolParameterDescriptor.java
@@ -6,11 +6,24 @@ public class PedigreeToolParameterDescriptor extends ToolParameterDescriptor
 {
     public static String NAME = "pedigreeSource";
 
+    private final boolean _isRequired;
+
     public PedigreeToolParameterDescriptor()
     {
-        super(null, NAME, "Pedigree Source", "This is the table used for pedigree data", "laboratory-pedigreeselectorfield", "laboratory.subjects", new JSONObject(){{
-            put("allowBlank", false);
-        }});
+        this(true);
+    }
+
+    public PedigreeToolParameterDescriptor(boolean isRequired)
+    {
+        super(null, NAME, "Pedigree Source", "This is the table used for pedigree data", "laboratory-pedigreeselectorfield", "laboratory.subjects", new JSONObject());
+
+        getAdditionalExtConfig().put("allowBlank", isRequired);
+        _isRequired = isRequired;
+    }
+
+    public boolean isRequired()
+    {
+        return _isRequired;
     }
 
     public static String getClientDependencyPath()
diff --git a/SequenceAnalysis/api-src/org/labkey/api/sequenceanalysis/pipeline/ToolParameterDescriptor.java b/SequenceAnalysis/api-src/org/labkey/api/sequenceanalysis/pipeline/ToolParameterDescriptor.java
index dc46f0fc8..12a054d57 100644
--- a/SequenceAnalysis/api-src/org/labkey/api/sequenceanalysis/pipeline/ToolParameterDescriptor.java
+++ b/SequenceAnalysis/api-src/org/labkey/api/sequenceanalysis/pipeline/ToolParameterDescriptor.java
@@ -41,7 +41,7 @@ public class ToolParameterDescriptor
     private String _label;
     private String _description;
     private final String _fieldXtype;
-    private final JSONObject _additionalExtConfig;
+    protected JSONObject _additionalExtConfig;
     private final Object _defaultValue;
 
     public ToolParameterDescriptor(CommandLineParam ca, String name, String label, String description, String fieldXtype, @Nullable Object defaultValue, @Nullable JSONObject additionalExtConfig)
diff --git a/SequenceAnalysis/api-src/org/labkey/api/sequenceanalysis/pipeline/VariantProcessingStep.java b/SequenceAnalysis/api-src/org/labkey/api/sequenceanalysis/pipeline/VariantProcessingStep.java
index 4b77e5dd7..67ba4ab47 100644
--- a/SequenceAnalysis/api-src/org/labkey/api/sequenceanalysis/pipeline/VariantProcessingStep.java
+++ b/SequenceAnalysis/api-src/org/labkey/api/sequenceanalysis/pipeline/VariantProcessingStep.java
@@ -72,7 +72,16 @@ interface Output extends PipelineStepOutput
         File getVCF();
     }
 
-    interface RequiresPedigree
+    interface RequiresPedigree extends SupportsPedigree
+    {
+        @Override
+        default boolean isRequired()
+        {
+            return true;
+        }
+    }
+
+    interface SupportsPedigree
     {
         default String getDemographicsProviderName(PipelineStepProvider<?> provider, PipelineJob job, int stepIdx)
         {
@@ -86,7 +95,23 @@ default DemographicsProvider getDemographicsProvider(PipelineStepProvider<?> pro
                 throw new IllegalStateException("getDemographicsProvider() can only be run from the webserver");
             }
 
-            return LaboratoryService.get().getDemographicsProviderByName(job.getContainer(), job.getUser(), getDemographicsProviderName(provider, job, stepIdx));
+            String dpn = getDemographicsProviderName(provider, job, stepIdx);
+            if (dpn == null)
+            {
+                if (isRequired())
+                {
+                    throw new IllegalArgumentException("The DemographicsProvider name cannot be null");
+                }
+
+                return null;
+            }
+
+            return LaboratoryService.get().getDemographicsProviderByName(job.getContainer(), job.getUser(), dpn);
+        }
+
+        default boolean isRequired()
+        {
+            return false;
         }
     }
 
diff --git a/SequenceAnalysis/src/org/labkey/sequenceanalysis/pipeline/ProcessVariantsHandler.java b/SequenceAnalysis/src/org/labkey/sequenceanalysis/pipeline/ProcessVariantsHandler.java
index c29fd8e05..1d28158d9 100644
--- a/SequenceAnalysis/src/org/labkey/sequenceanalysis/pipeline/ProcessVariantsHandler.java
+++ b/SequenceAnalysis/src/org/labkey/sequenceanalysis/pipeline/ProcessVariantsHandler.java
@@ -259,9 +259,13 @@ public static void initVariantProcessing(PipelineJob job, SequenceAnalysisJobSup
                 }
             }
 
-            if (stepCtx.getProvider() instanceof VariantProcessingStep.RequiresPedigree rp)
+            if (stepCtx.getProvider() instanceof VariantProcessingStep.SupportsPedigree rp)
             {
-                demographicsProviders.add(rp.getDemographicsProvider(stepCtx.getProvider(), job, stepCtx.getStepIdx()));
+                DemographicsProvider dp = rp.getDemographicsProvider(stepCtx.getProvider(), job, stepCtx.getStepIdx());
+                if (dp != null)
+                {
+                    demographicsProviders.add(dp);
+                }
             }
 
             if (useScatterGather)
diff --git a/SequenceAnalysis/src/org/labkey/sequenceanalysis/run/variant/KingInferenceStep.java b/SequenceAnalysis/src/org/labkey/sequenceanalysis/run/variant/KingInferenceStep.java
index 62c4a3c0e..aa357509d 100644
--- a/SequenceAnalysis/src/org/labkey/sequenceanalysis/run/variant/KingInferenceStep.java
+++ b/SequenceAnalysis/src/org/labkey/sequenceanalysis/run/variant/KingInferenceStep.java
@@ -13,7 +13,7 @@
 import org.labkey.api.pipeline.PipelineJobException;
 import org.labkey.api.sequenceanalysis.SequenceAnalysisService;
 import org.labkey.api.sequenceanalysis.pipeline.AbstractVariantProcessingStepProvider;
-import org.labkey.api.sequenceanalysis.pipeline.CommandLineParam;
+import org.labkey.api.sequenceanalysis.pipeline.PedigreeToolParameterDescriptor;
 import org.labkey.api.sequenceanalysis.pipeline.PipelineContext;
 import org.labkey.api.sequenceanalysis.pipeline.PipelineStepProvider;
 import org.labkey.api.sequenceanalysis.pipeline.ReferenceGenome;
@@ -23,13 +23,14 @@
 import org.labkey.api.sequenceanalysis.pipeline.VariantProcessingStepOutputImpl;
 import org.labkey.api.sequenceanalysis.run.AbstractCommandPipelineStep;
 import org.labkey.api.sequenceanalysis.run.AbstractCommandWrapper;
+import org.labkey.sequenceanalysis.pipeline.ProcessVariantsHandler;
 
 import java.io.File;
 import java.io.IOException;
 import java.util.ArrayList;
 import java.util.List;
 
-public class KingInferenceStep extends AbstractCommandPipelineStep<KingInferenceStep.KingWrapper> implements VariantProcessingStep
+public class KingInferenceStep extends AbstractCommandPipelineStep<KingInferenceStep.KingWrapper> implements VariantProcessingStep, VariantProcessingStep.RequiresPedigree
 {
     public KingInferenceStep(PipelineStepProvider<?> provider, PipelineContext ctx)
     {
@@ -47,7 +48,8 @@ public Provider()
                     }}, true),
                     ToolParameterDescriptor.create("excludedContigs", "Excluded Contigs", "A comma separated list of contigs to exclude, such as X,Y,MT.", "textfield", new JSONObject(){{
 
-                    }}, "X,Y,MT")
+                    }}, "X,Y,MT"),
+                    new PedigreeToolParameterDescriptor(false)
                     ), null, "https://www.kingrelatedness.com/manual.shtml");
         }
 
@@ -118,6 +120,19 @@ public Output processVariants(File inputVCF, File outputDirectory, ReferenceGeno
         plinkArgs.add("--max-alleles");
         plinkArgs.add("2");
 
+        String demographicsProviderName = getProvider().getParameterByName(PedigreeToolParameterDescriptor.NAME).extractValue(getPipelineCtx().getJob(), getProvider(), getStepIdx());
+        if (demographicsProviderName != null)
+        {
+            File pedFile = ProcessVariantsHandler.getPedigreeFile(getPipelineCtx().getSourceDirectory(true), demographicsProviderName);
+            if (!pedFile.exists())
+            {
+                throw new PipelineJobException("Unable to find pedigree file: " + pedFile.getPath());
+            }
+
+            plinkArgs.add("--ped");
+            plinkArgs.add(pedFile.getPath());
+        }
+
         plinkArgs.add("--out");
         plinkArgs.add(plinkOut.getPath());
 

From 8d03557afbd7a3e1dd40fa1d8c8796a8f1ea2fc2 Mon Sep 17 00:00:00 2001
From: bbimber <bbimber@gmail.com>
Date: Tue, 17 Dec 2024 20:58:18 -0800
Subject: [PATCH 06/59] Bugfix to bwamem2 index args

---
 .../sequenceanalysis/run/alignment/BWAMem2Wrapper.java |  2 ++
 .../sequenceanalysis/run/alignment/BWAWrapper.java     | 10 +++++++---
 2 files changed, 9 insertions(+), 3 deletions(-)

diff --git a/SequenceAnalysis/src/org/labkey/sequenceanalysis/run/alignment/BWAMem2Wrapper.java b/SequenceAnalysis/src/org/labkey/sequenceanalysis/run/alignment/BWAMem2Wrapper.java
index d7dc80fd1..dc144206f 100644
--- a/SequenceAnalysis/src/org/labkey/sequenceanalysis/run/alignment/BWAMem2Wrapper.java
+++ b/SequenceAnalysis/src/org/labkey/sequenceanalysis/run/alignment/BWAMem2Wrapper.java
@@ -42,6 +42,8 @@ public static class BWAMem2AlignmentStep extends BWAMemAlignmentStep
         public BWAMem2AlignmentStep(AlignmentStepProvider<?> provider, PipelineContext ctx)
         {
             super(provider, ctx, new BWAMem2Wrapper(ctx.getLogger()));
+
+            _addBtwswArg = false;
         }
     }
 
diff --git a/SequenceAnalysis/src/org/labkey/sequenceanalysis/run/alignment/BWAWrapper.java b/SequenceAnalysis/src/org/labkey/sequenceanalysis/run/alignment/BWAWrapper.java
index d9662dea8..abfb1ba3d 100644
--- a/SequenceAnalysis/src/org/labkey/sequenceanalysis/run/alignment/BWAWrapper.java
+++ b/SequenceAnalysis/src/org/labkey/sequenceanalysis/run/alignment/BWAWrapper.java
@@ -41,7 +41,6 @@ public BWAWrapper(@Nullable Logger logger)
         super(logger);
     }
 
-
     public static class Provider extends AbstractAlignmentStepProvider<AlignmentStep>
     {
         public Provider()
@@ -83,6 +82,8 @@ public AlignmentStep create(PipelineContext context)
 
     public static class BWAAlignmentStep<WrapperType extends BWAWrapper> extends AbstractAlignmentPipelineStep<WrapperType> implements AlignmentStep
     {
+        protected boolean _addBtwswArg = true;
+
         public BWAAlignmentStep(AlignmentStepProvider provider, PipelineContext ctx, WrapperType wrapper)
         {
             super(provider, ctx, wrapper);
@@ -116,8 +117,11 @@ public IndexOutput createIndex(ReferenceGenome referenceGenome, File outputDir)
                 args.add("index");
 
                 //necessary for DBs larger than 2gb
-                args.add("-a");
-                args.add("bwtsw");
+                if (_addBtwswArg)
+                {
+                    args.add("-a");
+                    args.add("bwtsw");
+                }
 
                 args.add("-p");
 

From 652348ab8108a43c498f8e07dbb695aec54aaf61 Mon Sep 17 00:00:00 2001
From: bbimber <bbimber@gmail.com>
Date: Wed, 18 Dec 2024 09:32:11 -0800
Subject: [PATCH 07/59] More separation between bwa and bwamem2 indexing

---
 .../sequenceanalysis/run/alignment/BWAMem2Wrapper.java    | 6 ++++++
 .../labkey/sequenceanalysis/run/alignment/BWAWrapper.java | 8 ++++----
 2 files changed, 10 insertions(+), 4 deletions(-)

diff --git a/SequenceAnalysis/src/org/labkey/sequenceanalysis/run/alignment/BWAMem2Wrapper.java b/SequenceAnalysis/src/org/labkey/sequenceanalysis/run/alignment/BWAMem2Wrapper.java
index dc144206f..95aefa2a8 100644
--- a/SequenceAnalysis/src/org/labkey/sequenceanalysis/run/alignment/BWAMem2Wrapper.java
+++ b/SequenceAnalysis/src/org/labkey/sequenceanalysis/run/alignment/BWAMem2Wrapper.java
@@ -45,6 +45,12 @@ public BWAMem2AlignmentStep(AlignmentStepProvider<?> provider, PipelineContext c
 
             _addBtwswArg = false;
         }
+
+        @Override
+        public String getIndexCachedDirName(PipelineJob job)
+        {
+            return "bwamem2";
+        }
     }
 
     public static class Provider extends AbstractAlignmentStepProvider<AlignmentStep>
diff --git a/SequenceAnalysis/src/org/labkey/sequenceanalysis/run/alignment/BWAWrapper.java b/SequenceAnalysis/src/org/labkey/sequenceanalysis/run/alignment/BWAWrapper.java
index abfb1ba3d..794f4c0e9 100644
--- a/SequenceAnalysis/src/org/labkey/sequenceanalysis/run/alignment/BWAWrapper.java
+++ b/SequenceAnalysis/src/org/labkey/sequenceanalysis/run/alignment/BWAWrapper.java
@@ -76,7 +76,7 @@ public String getDescription()
         @Override
         public AlignmentStep create(PipelineContext context)
         {
-            return new BWAAlignmentStep(this, context, new BWAWrapper(context.getLogger()));
+            return new BWAAlignmentStep<>(this, context, new BWAWrapper(context.getLogger()));
         }
     }
 
@@ -84,7 +84,7 @@ public static class BWAAlignmentStep<WrapperType extends BWAWrapper> extends Abs
     {
         protected boolean _addBtwswArg = true;
 
-        public BWAAlignmentStep(AlignmentStepProvider provider, PipelineContext ctx, WrapperType wrapper)
+        public BWAAlignmentStep(AlignmentStepProvider<?> provider, PipelineContext ctx, WrapperType wrapper)
         {
             super(provider, ctx, wrapper);
         }
@@ -104,7 +104,7 @@ public String getIndexCachedDirName(PipelineJob job)
         @Override
         public IndexOutput createIndex(ReferenceGenome referenceGenome, File outputDir) throws PipelineJobException
         {
-            getPipelineCtx().getLogger().info("Creating BWA index");
+            getPipelineCtx().getLogger().info("Creating " + getProvider().getName() + " index");
             IndexOutputImpl output = new IndexOutputImpl(referenceGenome);
 
             File indexDir = new File(outputDir, getIndexCachedDirName(getPipelineCtx().getJob()));
@@ -139,7 +139,7 @@ public IndexOutput createIndex(ReferenceGenome referenceGenome, File outputDir)
             output.appendOutputs(referenceGenome.getWorkingFastaFile(), indexDir);
 
             //recache if not already
-            AlignerIndexUtil.saveCachedIndex(hasCachedIndex, getPipelineCtx(), indexDir, "bwa", referenceGenome);
+            AlignerIndexUtil.saveCachedIndex(hasCachedIndex, getPipelineCtx(), indexDir, getIndexCachedDirName(getPipelineCtx().getJob()), referenceGenome);
 
             return output;
         }

From e1c8606bebd54c2c913c654cf3a5b8108f27e5b9 Mon Sep 17 00:00:00 2001
From: bbimber <bbimber@gmail.com>
Date: Wed, 18 Dec 2024 09:46:41 -0800
Subject: [PATCH 08/59] Include reference for king install

---
 .../pipeline_code/extra_tools_install.sh           | 14 ++++++++++++++
 1 file changed, 14 insertions(+)

diff --git a/SequenceAnalysis/pipeline_code/extra_tools_install.sh b/SequenceAnalysis/pipeline_code/extra_tools_install.sh
index 983d326fc..3ad144bc5 100755
--- a/SequenceAnalysis/pipeline_code/extra_tools_install.sh
+++ b/SequenceAnalysis/pipeline_code/extra_tools_install.sh
@@ -246,4 +246,18 @@ else
     echo "Already installed"
 fi
 
+if [[ ! -e ${LKTOOLS_DIR}/king || ! -z $FORCE_REINSTALL ]];
+then
+    echo "Cleaning up previous installs"
+    rm -Rf king*
+    rm -Rf Linux-king*
+    rm -Rf $LKTOOLS_DIR/king*
+
+    wget https://www.kingrelatedness.com/Linux-king.tar.gz
+    tar -xf Linux-king.tar.gz
+
+    install king $LKTOOLS_DIR/
+else
+    echo "Already installed"
+fi
 

From 405f1b27779e025cc0c957049c5129fcff8033a6 Mon Sep 17 00:00:00 2001
From: bbimber <bbimber@gmail.com>
Date: Wed, 18 Dec 2024 10:15:40 -0800
Subject: [PATCH 09/59] Add debugging for celltypist in docker

---
 singlecell/resources/chunks/RunCelltypist.R | 28 +++++++++++++++++++++
 1 file changed, 28 insertions(+)

diff --git a/singlecell/resources/chunks/RunCelltypist.R b/singlecell/resources/chunks/RunCelltypist.R
index 571d3028e..1f98079a3 100644
--- a/singlecell/resources/chunks/RunCelltypist.R
+++ b/singlecell/resources/chunks/RunCelltypist.R
@@ -1,3 +1,31 @@
+if (!reticulate::py_module_available(module = 'celltypist')) {
+    logger::log_warn('python celltypist not found!')
+    logger::log_warn(paste0('Python available: ', reticulate::py_available()))
+    logger::log_warn('Python config')
+    pyConfig <- reticulate::py_config()
+    for (pn in names(pyConfig)) {
+        logger::log_warn(paste0(pn, ': ', paste0(pyConfig[[pn]]), collapse = ','))
+    }
+
+    logger::log_warn(paste0('pythonpath: ', reticulate::py_config()$pythonpath))
+
+    logger::log_warn('Python packages:')
+    for (pn in reticulate::py_list_packages()$package) {
+        logger::log_warn(pn)
+    }
+
+    if ('conga' %in% reticulate::py_list_packages()$package) {
+        tryCatch({
+            logger::log_warn(reticulate::import('celltypist'))
+        }, error = function(e){
+            logger::log_warn("Error with reticulate::import('celltypist')")
+            logger::log_warn(reticulate::py_last_error())
+            logger::log_warn(conditionMessage(e))
+            traceback()
+        })
+    }
+}
+
 for (datasetId in names(seuratObjects)) {
     printName(datasetId)
     seuratObj <- readSeuratRDS(seuratObjects[[datasetId]])

From db2b560c87887da0d2d6ed4373c8fa8d0a671353 Mon Sep 17 00:00:00 2001
From: bbimber <bbimber@gmail.com>
Date: Wed, 18 Dec 2024 10:27:40 -0800
Subject: [PATCH 10/59] Compress KING output

---
 .../sequenceanalysis/run/variant/KingInferenceStep.java       | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/SequenceAnalysis/src/org/labkey/sequenceanalysis/run/variant/KingInferenceStep.java b/SequenceAnalysis/src/org/labkey/sequenceanalysis/run/variant/KingInferenceStep.java
index aa357509d..9f4895fde 100644
--- a/SequenceAnalysis/src/org/labkey/sequenceanalysis/run/variant/KingInferenceStep.java
+++ b/SequenceAnalysis/src/org/labkey/sequenceanalysis/run/variant/KingInferenceStep.java
@@ -23,6 +23,7 @@
 import org.labkey.api.sequenceanalysis.pipeline.VariantProcessingStepOutputImpl;
 import org.labkey.api.sequenceanalysis.run.AbstractCommandPipelineStep;
 import org.labkey.api.sequenceanalysis.run.AbstractCommandWrapper;
+import org.labkey.api.util.Compress;
 import org.labkey.sequenceanalysis.pipeline.ProcessVariantsHandler;
 
 import java.io.File;
@@ -180,7 +181,7 @@ public Output processVariants(File inputVCF, File outputDirectory, ReferenceGeno
             throw new PipelineJobException("Unable to find file: " + kinshipOutput.getPath());
         }
 
-        File kinshipOutputTxt = new File(kinshipOutput.getPath() + ".txt");
+        File kinshipOutputTxt = new File(kinshipOutput.getPath() + ".txt.gz");
         if (kinshipOutputTxt.exists())
         {
             kinshipOutputTxt.delete();
@@ -188,6 +189,7 @@ public Output processVariants(File inputVCF, File outputDirectory, ReferenceGeno
 
         try
         {
+            kinshipOutput = Compress.compressGzip(kinshipOutput);
             FileUtils.moveFile(kinshipOutput, kinshipOutputTxt);
         }
         catch (IOException e)

From ccc8554858fbe2ca8761396b5b9847797bf2f0c0 Mon Sep 17 00:00:00 2001
From: bbimber <bbimber@gmail.com>
Date: Wed, 18 Dec 2024 13:22:20 -0800
Subject: [PATCH 11/59] Another bugfix to bwa/bwa2 index folders

---
 .../sequenceanalysis/pipeline/ReferenceGenome.java  |  2 +-
 .../run/alignment/BWAMem2Wrapper.java               |  6 ++++++
 .../run/alignment/BWAMemWrapper.java                |  2 +-
 .../sequenceanalysis/run/alignment/BWAWrapper.java  | 13 +++++++++----
 4 files changed, 17 insertions(+), 6 deletions(-)

diff --git a/SequenceAnalysis/api-src/org/labkey/api/sequenceanalysis/pipeline/ReferenceGenome.java b/SequenceAnalysis/api-src/org/labkey/api/sequenceanalysis/pipeline/ReferenceGenome.java
index eeb5ec226..f0e943867 100644
--- a/SequenceAnalysis/api-src/org/labkey/api/sequenceanalysis/pipeline/ReferenceGenome.java
+++ b/SequenceAnalysis/api-src/org/labkey/api/sequenceanalysis/pipeline/ReferenceGenome.java
@@ -79,7 +79,7 @@ public interface ReferenceGenome extends Serializable
 
     /**
      * @param name The name used by the aligner to identify its cached directory
-     * @return The folder expected containing the cached index, which is not guarenteed to exist.  See AlignerIndexUtil for related methods.
+     * @return The folder expected containing the cached index, which is not guaranteed to exist.  See AlignerIndexUtil for related methods.
      */
     File getAlignerIndexDir(String name);
 
diff --git a/SequenceAnalysis/src/org/labkey/sequenceanalysis/run/alignment/BWAMem2Wrapper.java b/SequenceAnalysis/src/org/labkey/sequenceanalysis/run/alignment/BWAMem2Wrapper.java
index 95aefa2a8..6d3bac2ce 100644
--- a/SequenceAnalysis/src/org/labkey/sequenceanalysis/run/alignment/BWAMem2Wrapper.java
+++ b/SequenceAnalysis/src/org/labkey/sequenceanalysis/run/alignment/BWAMem2Wrapper.java
@@ -53,6 +53,12 @@ public String getIndexCachedDirName(PipelineJob job)
         }
     }
 
+    @Override
+    protected String getIndexDirName()
+    {
+        return("bwamem2");
+    }
+
     public static class Provider extends AbstractAlignmentStepProvider<AlignmentStep>
     {
         public Provider()
diff --git a/SequenceAnalysis/src/org/labkey/sequenceanalysis/run/alignment/BWAMemWrapper.java b/SequenceAnalysis/src/org/labkey/sequenceanalysis/run/alignment/BWAMemWrapper.java
index 68599ecdd..3b2962329 100644
--- a/SequenceAnalysis/src/org/labkey/sequenceanalysis/run/alignment/BWAMemWrapper.java
+++ b/SequenceAnalysis/src/org/labkey/sequenceanalysis/run/alignment/BWAMemWrapper.java
@@ -116,7 +116,7 @@ public void performMemAlignment(PipelineJob job, AlignmentOutputImpl output, Fil
         }
         appendThreads(job, bwaArgs);
 
-        bwaArgs.add("'" + new File(referenceGenome.getAlignerIndexDir("bwa"), FileUtil.getBaseName(referenceGenome.getWorkingFastaFile().getName()) + ".bwa.index").getPath() + "'");
+        bwaArgs.add("'" + new File(referenceGenome.getAlignerIndexDir(getIndexDirName()), FileUtil.getBaseName(referenceGenome.getWorkingFastaFile().getName()) + ".bwa.index").getPath() + "'");
         bwaArgs.add("'" + inputFastq1.getPath() + "'");
 
         if (inputFastq2 != null)
diff --git a/SequenceAnalysis/src/org/labkey/sequenceanalysis/run/alignment/BWAWrapper.java b/SequenceAnalysis/src/org/labkey/sequenceanalysis/run/alignment/BWAWrapper.java
index 794f4c0e9..70e240a8c 100644
--- a/SequenceAnalysis/src/org/labkey/sequenceanalysis/run/alignment/BWAWrapper.java
+++ b/SequenceAnalysis/src/org/labkey/sequenceanalysis/run/alignment/BWAWrapper.java
@@ -151,7 +151,7 @@ public final AlignmentOutput performAlignment(Readset rs, List<File> inputFastqs
             File inputFastq2 = assertSingleFile(inputFastqs2);
 
             AlignmentOutputImpl output = new AlignmentOutputImpl();
-            AlignerIndexUtil.copyIndexIfExists(this.getPipelineCtx(), output, "bwa", referenceGenome);
+            AlignerIndexUtil.copyIndexIfExists(this.getPipelineCtx(), output, getIndexCachedDirName(getPipelineCtx().getJob()), referenceGenome);
 
             doPerformAlignment(output, inputFastq1, inputFastq2, outputDirectory, referenceGenome, basename, rs, readGroupId, platformUnit);
             output.addCommandsExecuted(getWrapper().getCommandsExecuted());
@@ -197,7 +197,7 @@ private File runBWAAln(PipelineJob job, ReferenceGenome referenceGenome, File in
         args.add("aln");
         appendThreads(job, args);
         args.addAll(bwaAlnArgs);
-        args.add(new File(referenceGenome.getAlignerIndexDir("bwa"), FileUtil.getBaseName(referenceGenome.getWorkingFastaFile().getName()) + ".bwa.index").getPath());
+        args.add(new File(referenceGenome.getAlignerIndexDir(getIndexDirName()), FileUtil.getBaseName(referenceGenome.getWorkingFastaFile().getName()) + ".bwa.index").getPath());
         args.add(inputFile.getPath());
 
         File output = new File(getOutputDir(inputFile), inputFile.getName() + ".sai");
@@ -230,12 +230,12 @@ protected void performBwaAlignment(PipelineJob job, AlignmentOutputImpl output,
             throw new PipelineJobException("Reference FASTA does not exist: " + referenceGenome.getWorkingFastaFile().getPath());
         }
 
-        File expectedIndex = new File(referenceGenome.getAlignerIndexDir("bwa"), FileUtil.getBaseName(referenceGenome.getWorkingFastaFile()) + ".bwa.index.sa");
+        File expectedIndex = new File(referenceGenome.getAlignerIndexDir(getIndexDirName()), FileUtil.getBaseName(referenceGenome.getWorkingFastaFile()) + ".bwa.index.sa");
         if (!expectedIndex.exists())
         {
             throw new PipelineJobException("Expected index does not exist: " + expectedIndex);
         }
-        args.add(new File(referenceGenome.getAlignerIndexDir("bwa"), FileUtil.getBaseName(referenceGenome.getWorkingFastaFile().getName()) + ".bwa.index").getPath());
+        args.add(new File(referenceGenome.getAlignerIndexDir(getIndexDirName()), FileUtil.getBaseName(referenceGenome.getWorkingFastaFile().getName()) + ".bwa.index").getPath());
 
         //add SAI
         args.add(sai1.getPath());
@@ -283,4 +283,9 @@ public File getExe()
     {
         return SequencePipelineService.get().getExeForPackage("BWAPATH", "bwa");
     }
+
+    protected String getIndexDirName()
+    {
+        return("bwa");
+    }
 }

From f75fee6095af86e5d2d9d16b5bfc64e910a658a5 Mon Sep 17 00:00:00 2001
From: bbimber <bbimber@gmail.com>
Date: Wed, 18 Dec 2024 13:24:11 -0800
Subject: [PATCH 12/59] Cache bwamem2 indexes by default

---
 .../sequenceanalysis/pipeline/CacheAlignerIndexesTask.java      | 2 +-
 .../labkey/sequenceanalysis/run/alignment/BWAMem2Wrapper.java   | 2 ++
 2 files changed, 3 insertions(+), 1 deletion(-)

diff --git a/SequenceAnalysis/src/org/labkey/sequenceanalysis/pipeline/CacheAlignerIndexesTask.java b/SequenceAnalysis/src/org/labkey/sequenceanalysis/pipeline/CacheAlignerIndexesTask.java
index 9663f4570..08fcaca50 100644
--- a/SequenceAnalysis/src/org/labkey/sequenceanalysis/pipeline/CacheAlignerIndexesTask.java
+++ b/SequenceAnalysis/src/org/labkey/sequenceanalysis/pipeline/CacheAlignerIndexesTask.java
@@ -63,7 +63,7 @@ public List<String> getProtocolActionNames()
         }
 
         @Override
-        public PipelineJob.Task createTask(PipelineJob job)
+        public PipelineJob.Task<?> createTask(PipelineJob job)
         {
             return new CacheAlignerIndexesTask(this, job);
         }
diff --git a/SequenceAnalysis/src/org/labkey/sequenceanalysis/run/alignment/BWAMem2Wrapper.java b/SequenceAnalysis/src/org/labkey/sequenceanalysis/run/alignment/BWAMem2Wrapper.java
index 6d3bac2ce..73d1e6412 100644
--- a/SequenceAnalysis/src/org/labkey/sequenceanalysis/run/alignment/BWAMem2Wrapper.java
+++ b/SequenceAnalysis/src/org/labkey/sequenceanalysis/run/alignment/BWAMem2Wrapper.java
@@ -74,6 +74,8 @@ public Provider()
 
                     }}, null)
             ), null, "https://github.com/bwa-mem2/bwa-mem2", true, true);
+
+            setAlwaysCacheIndex(true);
         }
 
         @Override

From 8e95311c27abf1ab6b16f22cc6c97ff1f7850cee Mon Sep 17 00:00:00 2001
From: bbimber <bbimber@gmail.com>
Date: Wed, 18 Dec 2024 15:48:26 -0800
Subject: [PATCH 13/59] Improve logged message

---
 .../labkey/api/sequenceanalysis/pipeline/AlignerIndexUtil.java  | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/SequenceAnalysis/api-src/org/labkey/api/sequenceanalysis/pipeline/AlignerIndexUtil.java b/SequenceAnalysis/api-src/org/labkey/api/sequenceanalysis/pipeline/AlignerIndexUtil.java
index ddbaba6ed..1e55f7e1a 100644
--- a/SequenceAnalysis/api-src/org/labkey/api/sequenceanalysis/pipeline/AlignerIndexUtil.java
+++ b/SequenceAnalysis/api-src/org/labkey/api/sequenceanalysis/pipeline/AlignerIndexUtil.java
@@ -40,7 +40,7 @@ public static boolean copyIndexIfExists(PipelineContext ctx, AlignmentOutputImpl
 
     public static boolean copyIndexIfExists(PipelineContext ctx, AlignmentOutputImpl output, String localName, String webserverName, ReferenceGenome genome, boolean forceCopyLocal) throws PipelineJobException
     {
-        ctx.getLogger().debug("copying index to shared dir if exists: " + localName);
+        ctx.getLogger().debug("checking if index exists: " + localName + ". copy local: " + forceCopyLocal);
         if (ctx.getWorkDir() == null)
         {
             throw new PipelineJobException("PipelineContext.getWorkDir() is null");

From bc29d9ffa78ff0be658b30e159d11d2e94d6c5cd Mon Sep 17 00:00:00 2001
From: bbimber <bbimber@gmail.com>
Date: Wed, 18 Dec 2024 15:53:02 -0800
Subject: [PATCH 14/59] Bugfix to PedigreeToolParameterDescriptor

---
 .../pipeline/PedigreeToolParameterDescriptor.java          | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/SequenceAnalysis/api-src/org/labkey/api/sequenceanalysis/pipeline/PedigreeToolParameterDescriptor.java b/SequenceAnalysis/api-src/org/labkey/api/sequenceanalysis/pipeline/PedigreeToolParameterDescriptor.java
index c68087c10..01e0cd580 100644
--- a/SequenceAnalysis/api-src/org/labkey/api/sequenceanalysis/pipeline/PedigreeToolParameterDescriptor.java
+++ b/SequenceAnalysis/api-src/org/labkey/api/sequenceanalysis/pipeline/PedigreeToolParameterDescriptor.java
@@ -13,11 +13,12 @@ public PedigreeToolParameterDescriptor()
         this(true);
     }
 
-    public PedigreeToolParameterDescriptor(boolean isRequired)
+    public PedigreeToolParameterDescriptor(final boolean isRequired)
     {
-        super(null, NAME, "Pedigree Source", "This is the table used for pedigree data", "laboratory-pedigreeselectorfield", "laboratory.subjects", new JSONObject());
+        super(null, NAME, "Pedigree Source", "This is the table used for pedigree data", "laboratory-pedigreeselectorfield", "laboratory.subjects", new JSONObject(){{
+            put("allowBlank", !isRequired);
+        }});
 
-        getAdditionalExtConfig().put("allowBlank", isRequired);
         _isRequired = isRequired;
     }
 

From e33f944d6ed5cc21a25a60244c27b4c4fd307968 Mon Sep 17 00:00:00 2001
From: bbimber <bbimber@gmail.com>
Date: Thu, 19 Dec 2024 16:51:40 -0800
Subject: [PATCH 15/59] Bugfix to KingInferenceStep and pedigree

---
 .../api/sequenceanalysis/pipeline/VariantProcessingStep.java  | 2 +-
 .../sequenceanalysis/run/variant/KingInferenceStep.java       | 4 ++--
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/SequenceAnalysis/api-src/org/labkey/api/sequenceanalysis/pipeline/VariantProcessingStep.java b/SequenceAnalysis/api-src/org/labkey/api/sequenceanalysis/pipeline/VariantProcessingStep.java
index 67ba4ab47..545815b0b 100644
--- a/SequenceAnalysis/api-src/org/labkey/api/sequenceanalysis/pipeline/VariantProcessingStep.java
+++ b/SequenceAnalysis/api-src/org/labkey/api/sequenceanalysis/pipeline/VariantProcessingStep.java
@@ -88,7 +88,7 @@ default String getDemographicsProviderName(PipelineStepProvider<?> provider, Pip
             return provider.getParameterByName(PedigreeToolParameterDescriptor.NAME).extractValue(job, provider, stepIdx, String.class);
         }
 
-        default DemographicsProvider getDemographicsProvider(PipelineStepProvider<?> provider, PipelineJob job, int stepIdx)
+        default @Nullable DemographicsProvider getDemographicsProvider(PipelineStepProvider<?> provider, PipelineJob job, int stepIdx)
         {
             if (PipelineJobService.get().getLocationType() != PipelineJobService.LocationType.WebServer)
             {
diff --git a/SequenceAnalysis/src/org/labkey/sequenceanalysis/run/variant/KingInferenceStep.java b/SequenceAnalysis/src/org/labkey/sequenceanalysis/run/variant/KingInferenceStep.java
index 9f4895fde..275b527a9 100644
--- a/SequenceAnalysis/src/org/labkey/sequenceanalysis/run/variant/KingInferenceStep.java
+++ b/SequenceAnalysis/src/org/labkey/sequenceanalysis/run/variant/KingInferenceStep.java
@@ -31,14 +31,14 @@
 import java.util.ArrayList;
 import java.util.List;
 
-public class KingInferenceStep extends AbstractCommandPipelineStep<KingInferenceStep.KingWrapper> implements VariantProcessingStep, VariantProcessingStep.RequiresPedigree
+public class KingInferenceStep extends AbstractCommandPipelineStep<KingInferenceStep.KingWrapper> implements VariantProcessingStep
 {
     public KingInferenceStep(PipelineStepProvider<?> provider, PipelineContext ctx)
     {
         super(provider, ctx, new KingInferenceStep.KingWrapper(ctx.getLogger()));
     }
 
-    public static class Provider extends AbstractVariantProcessingStepProvider<KingInferenceStep>
+    public static class Provider extends AbstractVariantProcessingStepProvider<KingInferenceStep> implements VariantProcessingStep.SupportsPedigree
     {
         public Provider()
         {

From de3d5f45a4358029140141bf221fd5fcc5ea5a8e Mon Sep 17 00:00:00 2001
From: bbimber <bbimber@gmail.com>
Date: Fri, 20 Dec 2024 06:19:22 -0800
Subject: [PATCH 16/59] Manually create ped file for KING

---
 .../run/variant/KingInferenceStep.java        | 89 ++++++++++++++++---
 1 file changed, 76 insertions(+), 13 deletions(-)

diff --git a/SequenceAnalysis/src/org/labkey/sequenceanalysis/run/variant/KingInferenceStep.java b/SequenceAnalysis/src/org/labkey/sequenceanalysis/run/variant/KingInferenceStep.java
index 275b527a9..0fdb38865 100644
--- a/SequenceAnalysis/src/org/labkey/sequenceanalysis/run/variant/KingInferenceStep.java
+++ b/SequenceAnalysis/src/org/labkey/sequenceanalysis/run/variant/KingInferenceStep.java
@@ -11,6 +11,7 @@
 import org.jetbrains.annotations.Nullable;
 import org.json.JSONObject;
 import org.labkey.api.pipeline.PipelineJobException;
+import org.labkey.api.reader.Readers;
 import org.labkey.api.sequenceanalysis.SequenceAnalysisService;
 import org.labkey.api.sequenceanalysis.pipeline.AbstractVariantProcessingStepProvider;
 import org.labkey.api.sequenceanalysis.pipeline.PedigreeToolParameterDescriptor;
@@ -24,12 +25,18 @@
 import org.labkey.api.sequenceanalysis.run.AbstractCommandPipelineStep;
 import org.labkey.api.sequenceanalysis.run.AbstractCommandWrapper;
 import org.labkey.api.util.Compress;
+import org.labkey.api.writer.PrintWriters;
 import org.labkey.sequenceanalysis.pipeline.ProcessVariantsHandler;
 
+import java.io.BufferedReader;
 import java.io.File;
 import java.io.IOException;
+import java.io.PrintWriter;
 import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.HashMap;
 import java.util.List;
+import java.util.Map;
 
 public class KingInferenceStep extends AbstractCommandPipelineStep<KingInferenceStep.KingWrapper> implements VariantProcessingStep
 {
@@ -121,19 +128,6 @@ public Output processVariants(File inputVCF, File outputDirectory, ReferenceGeno
         plinkArgs.add("--max-alleles");
         plinkArgs.add("2");
 
-        String demographicsProviderName = getProvider().getParameterByName(PedigreeToolParameterDescriptor.NAME).extractValue(getPipelineCtx().getJob(), getProvider(), getStepIdx());
-        if (demographicsProviderName != null)
-        {
-            File pedFile = ProcessVariantsHandler.getPedigreeFile(getPipelineCtx().getSourceDirectory(true), demographicsProviderName);
-            if (!pedFile.exists())
-            {
-                throw new PipelineJobException("Unable to find pedigree file: " + pedFile.getPath());
-            }
-
-            plinkArgs.add("--ped");
-            plinkArgs.add(pedFile.getPath());
-        }
-
         plinkArgs.add("--out");
         plinkArgs.add(plinkOut.getPath());
 
@@ -166,6 +160,23 @@ public Output processVariants(File inputVCF, File outputDirectory, ReferenceGeno
         kingArgs.add("--prefix");
         kingArgs.add(SequenceAnalysisService.get().getUnzippedBaseName(inputVCF.getName()));
 
+        // Update the pedigree / fam file:
+        String demographicsProviderName = getProvider().getParameterByName(PedigreeToolParameterDescriptor.NAME).extractValue(getPipelineCtx().getJob(), getProvider(), getStepIdx());
+        if (demographicsProviderName != null)
+        {
+            File pedFile = ProcessVariantsHandler.getPedigreeFile(getPipelineCtx().getSourceDirectory(true), demographicsProviderName);
+            if (!pedFile.exists())
+            {
+                throw new PipelineJobException("Unable to find pedigree file: " + pedFile.getPath());
+            }
+
+            File kingFam = createFamFile(pedFile, new File(plinkOutBed.getParentFile(), "plink.fam"), kingArgs);
+            kingArgs.add("--ped");
+            kingArgs.add(kingFam.getPath());
+
+            output.addIntermediateFile(kingFam);
+        }
+
         if (threads != null)
         {
             kingArgs.add("--cpus");
@@ -202,6 +213,58 @@ public Output processVariants(File inputVCF, File outputDirectory, ReferenceGeno
         return output;
     }
 
+    private File createFamFile(File pedFile, File famFile, List<String> kingArgs) throws PipelineJobException
+    {
+        File newFamFile = new File(famFile.getParentFile(), "king.fam");
+
+        Map<String, String> pedMap = new HashMap<>();
+        try (BufferedReader reader = Readers.getReader(pedFile))
+        {
+            String line;
+            while ((line = reader.readLine()) != null)
+            {
+                String[] tokens = line.split(" ");
+                if (tokens.length != 6)
+                {
+                    throw new PipelineJobException("Improper ped line length: " + tokens.length);
+                }
+
+                pedMap.put(tokens[1], StringUtils.join(Arrays.asList("0", tokens[1], tokens[2], tokens[3], tokens[4], "-9"), "\t"));
+            }
+        }
+        catch (IOException e)
+        {
+            throw new PipelineJobException(e);
+        }
+
+        try (BufferedReader reader = Readers.getReader(famFile);PrintWriter writer = PrintWriters.getPrintWriter(newFamFile))
+        {
+            String line;
+            while ((line = reader.readLine()) != null)
+            {
+                String[] tokens = line.split("\t");
+                if (tokens.length != 6)
+                {
+                    throw new PipelineJobException("Improper ped line length: " + tokens.length);
+                }
+
+                String newRow = pedMap.get(tokens[1]);
+                if (newRow == null)
+                {
+                    throw new PipelineJobException("Unable to find pedigree entry for: " + tokens[1]);
+                }
+
+                writer.println(newRow);
+            }
+        }
+        catch (IOException e)
+        {
+            throw new PipelineJobException(e);
+        }
+
+        return newFamFile;
+    }
+
     public static class KingWrapper extends AbstractCommandWrapper
     {
         public KingWrapper(@Nullable Logger logger)

From 93508f2133b882a01c75d6005dc5804cec842d72 Mon Sep 17 00:00:00 2001
From: bbimber <bbimber@gmail.com>
Date: Fri, 20 Dec 2024 09:35:32 -0800
Subject: [PATCH 17/59] Allow missing pedigree data

---
 .../run/variant/KingInferenceStep.java           | 16 ++++++++++------
 1 file changed, 10 insertions(+), 6 deletions(-)

diff --git a/SequenceAnalysis/src/org/labkey/sequenceanalysis/run/variant/KingInferenceStep.java b/SequenceAnalysis/src/org/labkey/sequenceanalysis/run/variant/KingInferenceStep.java
index 0fdb38865..f67b412ae 100644
--- a/SequenceAnalysis/src/org/labkey/sequenceanalysis/run/variant/KingInferenceStep.java
+++ b/SequenceAnalysis/src/org/labkey/sequenceanalysis/run/variant/KingInferenceStep.java
@@ -10,6 +10,7 @@
 import org.apache.logging.log4j.Logger;
 import org.jetbrains.annotations.Nullable;
 import org.json.JSONObject;
+import org.labkey.api.collections.CaseInsensitiveHashMap;
 import org.labkey.api.pipeline.PipelineJobException;
 import org.labkey.api.reader.Readers;
 import org.labkey.api.sequenceanalysis.SequenceAnalysisService;
@@ -170,7 +171,7 @@ public Output processVariants(File inputVCF, File outputDirectory, ReferenceGeno
                 throw new PipelineJobException("Unable to find pedigree file: " + pedFile.getPath());
             }
 
-            File kingFam = createFamFile(pedFile, new File(plinkOutBed.getParentFile(), "plink.fam"), kingArgs);
+            File kingFam = createFamFile(pedFile, new File(plinkOutBed.getParentFile(), "plink.fam"));
             kingArgs.add("--ped");
             kingArgs.add(kingFam.getPath());
 
@@ -213,11 +214,11 @@ public Output processVariants(File inputVCF, File outputDirectory, ReferenceGeno
         return output;
     }
 
-    private File createFamFile(File pedFile, File famFile, List<String> kingArgs) throws PipelineJobException
+    private File createFamFile(File pedFile, File famFile) throws PipelineJobException
     {
         File newFamFile = new File(famFile.getParentFile(), "king.fam");
 
-        Map<String, String> pedMap = new HashMap<>();
+        Map<String, String> pedMap = new CaseInsensitiveHashMap<>();
         try (BufferedReader reader = Readers.getReader(pedFile))
         {
             String line;
@@ -251,10 +252,13 @@ private File createFamFile(File pedFile, File famFile, List<String> kingArgs) th
                 String newRow = pedMap.get(tokens[1]);
                 if (newRow == null)
                 {
-                    throw new PipelineJobException("Unable to find pedigree entry for: " + tokens[1]);
+                    getPipelineCtx().getLogger().warn("Unable to find pedigree entry for: " + tokens[1] + ", reusing original");
+                    writer.println(line);
+                }
+                else
+                {
+                    writer.println(newRow);
                 }
-
-                writer.println(newRow);
             }
         }
         catch (IOException e)

From aa223ba4168c6d1241087adb556eec069d8d8b63 Mon Sep 17 00:00:00 2001
From: bbimber <bbimber@gmail.com>
Date: Sat, 21 Dec 2024 07:16:56 -0800
Subject: [PATCH 18/59] Update to nimble para parsing

---
 .../labkey/singlecell/pipeline/singlecell/AppendNimble.java  | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/singlecell/src/org/labkey/singlecell/pipeline/singlecell/AppendNimble.java b/singlecell/src/org/labkey/singlecell/pipeline/singlecell/AppendNimble.java
index b023676bb..2fade274d 100644
--- a/singlecell/src/org/labkey/singlecell/pipeline/singlecell/AppendNimble.java
+++ b/singlecell/src/org/labkey/singlecell/pipeline/singlecell/AppendNimble.java
@@ -1,5 +1,6 @@
 package org.labkey.singlecell.pipeline.singlecell;
 
+import org.apache.commons.lang3.StringUtils;
 import org.json.JSONArray;
 import org.json.JSONObject;
 import org.labkey.api.pipeline.PipelineJobException;
@@ -101,10 +102,10 @@ protected Chunk createParamChunk(SequenceOutputHandler.JobContext ctx, List<Seur
             }
 
             int genomeId = arr.getInt(0);
-            Integer maxAmbiguityAllowed2 = arr.get(2) == null ? null : arr.getInt(2);
+            String maxAmbiguityAllowed2 = arr.get(2) == null ? null : StringUtils.trimToNull(String.valueOf(arr.get(2)));
             if (maxAmbiguityAllowed2 == null)
             {
-                maxAmbiguityAllowed2 = maxAmbiguityAllowed;
+                maxAmbiguityAllowed2 = String.valueOf(maxAmbiguityAllowed);
             }
 
             ret.bodyLines.add("\t" + delim + "'" + genomeId + "' = " + (maxAmbiguityAllowed2 == null ? "Inf" : maxAmbiguityAllowed2));

From ca2c33fa3b9b1b94e17d8344d764fc43e2fcf4bd Mon Sep 17 00:00:00 2001
From: bbimber <bbimber@gmail.com>
Date: Sat, 21 Dec 2024 17:02:36 -0800
Subject: [PATCH 19/59] Support decoupleR

---
 singlecell/resources/chunks/RunDecoupler.R    | 12 ++++++
 .../labkey/singlecell/SingleCellModule.java   |  2 +
 .../pipeline/singlecell/RunDecoupler.java     | 37 +++++++++++++++++++
 3 files changed, 51 insertions(+)
 create mode 100644 singlecell/resources/chunks/RunDecoupler.R
 create mode 100644 singlecell/src/org/labkey/singlecell/pipeline/singlecell/RunDecoupler.java

diff --git a/singlecell/resources/chunks/RunDecoupler.R b/singlecell/resources/chunks/RunDecoupler.R
new file mode 100644
index 000000000..4be28bbdb
--- /dev/null
+++ b/singlecell/resources/chunks/RunDecoupler.R
@@ -0,0 +1,12 @@
+for (datasetId in names(seuratObjects)) {
+    printName(datasetId)
+    seuratObj <- readSeuratRDS(seuratObjects[[datasetId]])
+
+    seuratObj <- CellMembrane::RunDecoupler(seuratObj)
+
+    saveData(seuratObj, datasetId)
+
+    # Cleanup
+    rm(seuratObj)
+    gc()
+}
\ No newline at end of file
diff --git a/singlecell/src/org/labkey/singlecell/SingleCellModule.java b/singlecell/src/org/labkey/singlecell/SingleCellModule.java
index 922c93ee9..5588d1677 100644
--- a/singlecell/src/org/labkey/singlecell/SingleCellModule.java
+++ b/singlecell/src/org/labkey/singlecell/SingleCellModule.java
@@ -82,6 +82,7 @@
 import org.labkey.singlecell.pipeline.singlecell.RunCelltypistCustomModel;
 import org.labkey.singlecell.pipeline.singlecell.RunConga;
 import org.labkey.singlecell.pipeline.singlecell.RunCsCore;
+import org.labkey.singlecell.pipeline.singlecell.RunDecoupler;
 import org.labkey.singlecell.pipeline.singlecell.RunEscape;
 import org.labkey.singlecell.pipeline.singlecell.RunLDA;
 import org.labkey.singlecell.pipeline.singlecell.RunPCA;
@@ -291,6 +292,7 @@ public static void registerPipelineSteps()
         SequencePipelineService.get().registerPipelineStep(new CustomGSEA.Provider());
         SequencePipelineService.get().registerPipelineStep(new StudyMetadata.Provider());
         SequencePipelineService.get().registerPipelineStep(new UpdateSeuratPrototype.Provider());
+        SequencePipelineService.get().registerPipelineStep(new RunDecoupler.Provider());
 
         SequenceAnalysisService.get().registerReadsetListener(new SingleCellReadsetListener());
     }
diff --git a/singlecell/src/org/labkey/singlecell/pipeline/singlecell/RunDecoupler.java b/singlecell/src/org/labkey/singlecell/pipeline/singlecell/RunDecoupler.java
new file mode 100644
index 000000000..211d8d935
--- /dev/null
+++ b/singlecell/src/org/labkey/singlecell/pipeline/singlecell/RunDecoupler.java
@@ -0,0 +1,37 @@
+package org.labkey.singlecell.pipeline.singlecell;
+
+import org.labkey.api.sequenceanalysis.pipeline.AbstractPipelineStepProvider;
+import org.labkey.api.sequenceanalysis.pipeline.PipelineContext;
+import org.labkey.api.singlecell.pipeline.SingleCellStep;
+
+import java.util.Arrays;
+
+public class RunDecoupler extends AbstractCellMembraneStep
+{
+    public RunDecoupler(PipelineContext ctx, RunDecoupler.Provider provider)
+    {
+        super(provider, ctx);
+    }
+
+    public static class Provider extends AbstractPipelineStepProvider<SingleCellStep>
+    {
+        public Provider()
+        {
+            super("RunDecoupler", "Run decoupleR", "decoupleR", "This will run decoupleR to score transcription factor enrichment.", Arrays.asList(
+
+            ), null, null);
+        }
+
+        @Override
+        public RunDecoupler create(PipelineContext ctx)
+        {
+            return new RunDecoupler(ctx, this);
+        }
+    }
+
+    @Override
+    public String getFileSuffix()
+    {
+        return "decoupler";
+    }
+}
\ No newline at end of file

From a901f3ee190e6a0ac8d49c4a289d9d4a11177150 Mon Sep 17 00:00:00 2001
From: bbimber <bbimber@gmail.com>
Date: Tue, 24 Dec 2024 08:54:34 -0800
Subject: [PATCH 20/59] Fix typo in decoupleR

---
 .../sequenceanalysis/SequenceAnalysisMaintenanceTask.java | 8 ++++----
 singlecell/resources/chunks/RunDecoupler.R                | 2 +-
 2 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/SequenceAnalysis/src/org/labkey/sequenceanalysis/SequenceAnalysisMaintenanceTask.java b/SequenceAnalysis/src/org/labkey/sequenceanalysis/SequenceAnalysisMaintenanceTask.java
index e6b593698..57ebce8cb 100644
--- a/SequenceAnalysis/src/org/labkey/sequenceanalysis/SequenceAnalysisMaintenanceTask.java
+++ b/SequenceAnalysis/src/org/labkey/sequenceanalysis/SequenceAnalysisMaintenanceTask.java
@@ -63,7 +63,7 @@ public SequenceAnalysisMaintenanceTask()
     @Override
     public String getDescription()
     {
-        return "Delete SequenceAnalysis Artifacts";
+        return "SequenceAnalysis File Maintenance";
     }
 
     @Override
@@ -293,12 +293,12 @@ else if (sf.getFilePath() == null)
 
     private void processContainer(Container c, Logger log) throws IOException, PipelineJobException
     {
+        if (!c.isWorkbook())
+            log.info("processing container: " + c.getPath());
+
         PipeRoot root = PipelineService.get().getPipelineRootSetting(c);
         if (root != null && !root.isCloudRoot())
         {
-            if (!c.isWorkbook())
-                log.info("processing container: " + c.getPath());
-
             //first sequences
             File sequenceDir = new File(root.getRootPath(), ".sequences");
             TableInfo tableRefNtSequences = SequenceAnalysisSchema.getTable(SequenceAnalysisSchema.TABLE_REF_NT_SEQUENCES);
diff --git a/singlecell/resources/chunks/RunDecoupler.R b/singlecell/resources/chunks/RunDecoupler.R
index 4be28bbdb..6e4656164 100644
--- a/singlecell/resources/chunks/RunDecoupler.R
+++ b/singlecell/resources/chunks/RunDecoupler.R
@@ -2,7 +2,7 @@ for (datasetId in names(seuratObjects)) {
     printName(datasetId)
     seuratObj <- readSeuratRDS(seuratObjects[[datasetId]])
 
-    seuratObj <- CellMembrane::RunDecoupler(seuratObj)
+    seuratObj <- CellMembrane::RunDecoupleR(seuratObj)
 
     saveData(seuratObj, datasetId)
 

From ae174608b7959b69e49fabfdb81a9f68d687b5ff Mon Sep 17 00:00:00 2001
From: bbimber <bbimber@gmail.com>
Date: Fri, 27 Dec 2024 13:49:11 -0800
Subject: [PATCH 21/59] Improve error message

---
 singlecell/src/org/labkey/singlecell/run/NimbleHelper.java | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/singlecell/src/org/labkey/singlecell/run/NimbleHelper.java b/singlecell/src/org/labkey/singlecell/run/NimbleHelper.java
index fd6fef01d..8dffb88ae 100644
--- a/singlecell/src/org/labkey/singlecell/run/NimbleHelper.java
+++ b/singlecell/src/org/labkey/singlecell/run/NimbleHelper.java
@@ -339,7 +339,7 @@ private void updateNimbleConfigFile(File configFile, NimbleGenome genome) throws
         }
         catch (IOException e)
         {
-            throw new PipelineJobException(e);
+            throw new PipelineJobException("Unable to parse JSON: " + configFile.getPath(), e);
         }
 
         JSONObject config = json.getJSONObject(0);

From 2f856341103040715eb2b5f0b2553bd35d31a238 Mon Sep 17 00:00:00 2001
From: bbimber <bbimber@gmail.com>
Date: Thu, 2 Jan 2025 11:43:37 -0800
Subject: [PATCH 22/59] Download the uncompressed SRA files to the temp folder,
 not work dir

---
 .../pipeline/SequenceAlignmentTask.java       | 54 ++++++++++++++++---
 1 file changed, 46 insertions(+), 8 deletions(-)

diff --git a/SequenceAnalysis/src/org/labkey/sequenceanalysis/pipeline/SequenceAlignmentTask.java b/SequenceAnalysis/src/org/labkey/sequenceanalysis/pipeline/SequenceAlignmentTask.java
index b254cb86d..7c35dfda5 100644
--- a/SequenceAnalysis/src/org/labkey/sequenceanalysis/pipeline/SequenceAlignmentTask.java
+++ b/SequenceAnalysis/src/org/labkey/sequenceanalysis/pipeline/SequenceAlignmentTask.java
@@ -1933,8 +1933,22 @@ else if (sraIDs.contains(rd.getSra_accession()))
                     continue;
                 }
 
+                File unzippedOutDir = new File(SequencePipelineService.get().getJavaTempDir(), "cachedReadDataGz");
+                getTaskFileManagerImpl().addIntermediateFile(unzippedOutDir);
+                if (unzippedOutDir.exists())
+                {
+                    try
+                    {
+                        FileUtils.deleteDirectory(unzippedOutDir);
+                    }
+                    catch (IOException e)
+                    {
+                        throw new PipelineJobException(e);
+                    }
+                }
+
                 File outDir = new File(getHelper().getWorkingDirectory(), "cachedReadData");
-                getTaskFileManagerImpl().addDeferredIntermediateFile(outDir);
+                getTaskFileManagerImpl().addIntermediateFile(outDir);
 
                 File doneFile = new File(outDir, rd.getSra_accession() + ".done");
                 sraIDs.add(rd.getSra_accession());
@@ -1949,18 +1963,42 @@ else if (sraIDs.contains(rd.getSra_accession()))
                 }
                 else
                 {
-                    if (!outDir.exists())
+                    try
                     {
+                        if (outDir.exists())
+                        {
+                            getHelper().getLogger().debug("Deleting existing folder: " + outDir.getPath());
+                            FileUtils.deleteDirectory(outDir);
+                        }
+
                         outDir.mkdirs();
-                    }
 
-                    Pair<File, File> downloaded = sra.downloadSra(rd.getSra_accession(), outDir, rd.isPairedEnd());
-                    rdi.setFile(downloaded.first, 1);
-                    rdi.setFile(downloaded.second, 2);
+                        Pair<File, File> downloaded = sra.downloadSra(rd.getSra_accession(), unzippedOutDir, rd.isPairedEnd());
+                        File moved1 = new File(outDir, downloaded.first.getName());
+                        if (moved1.exists())
+                        {
+                            moved1.delete();
+                        }
+                        FileUtils.moveFile(downloaded.first, moved1);
+                        rdi.setFile(moved1, 1);
+
+                        if (downloaded.second != null)
+                        {
+                            File moved2 = new File(outDir, downloaded.second.getName());
+                            if (moved2.exists())
+                            {
+                                moved2.delete();
+                            }
+                            FileUtils.moveFile(downloaded.second, moved2);
+                            rdi.setFile(moved2, 2);
+                        }
 
-                    try
-                    {
                         Files.touch(doneFile);
+
+                        if (unzippedOutDir.exists())
+                        {
+                            FileUtils.deleteDirectory(unzippedOutDir);
+                        }
                     }
                     catch (IOException e)
                     {

From 7aade9e0a3c611379ceb5c71d394387db8218908 Mon Sep 17 00:00:00 2001
From: bbimber <bbimber@gmail.com>
Date: Thu, 2 Jan 2025 17:03:54 -0800
Subject: [PATCH 23/59] Improve logging

---
 .../labkey/sequenceanalysis/pipeline/SequenceAlignmentTask.java  | 1 +
 1 file changed, 1 insertion(+)

diff --git a/SequenceAnalysis/src/org/labkey/sequenceanalysis/pipeline/SequenceAlignmentTask.java b/SequenceAnalysis/src/org/labkey/sequenceanalysis/pipeline/SequenceAlignmentTask.java
index 7c35dfda5..9c58d3bb8 100644
--- a/SequenceAnalysis/src/org/labkey/sequenceanalysis/pipeline/SequenceAlignmentTask.java
+++ b/SequenceAnalysis/src/org/labkey/sequenceanalysis/pipeline/SequenceAlignmentTask.java
@@ -1955,6 +1955,7 @@ else if (sraIDs.contains(rd.getSra_accession()))
                 RestoreSraDataHandler.FastqDumpWrapper sra = new RestoreSraDataHandler.FastqDumpWrapper(getJob().getLogger());
                 if (doneFile.exists())
                 {
+                    getPipelineJob().getLogger().debug("Re-using existing SRA download: " + rd.getSra_accession());
                     rdi.setFile(new File(outDir, rd.getSra_accession() + (rd.isPairedEnd() ? "_1" : "") + ".fastq.gz"), 1);
                     if (rd.getFileId2() != null)
                     {

From 470319be51a86b11f1d8f49f99a6e346458bf234 Mon Sep 17 00:00:00 2001
From: bbimber <bbimber@gmail.com>
Date: Thu, 2 Jan 2025 17:09:26 -0800
Subject: [PATCH 24/59] SRA download needs to use addDeferredIntermediateFile()

---
 .../labkey/sequenceanalysis/pipeline/SequenceAlignmentTask.java | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/SequenceAnalysis/src/org/labkey/sequenceanalysis/pipeline/SequenceAlignmentTask.java b/SequenceAnalysis/src/org/labkey/sequenceanalysis/pipeline/SequenceAlignmentTask.java
index 9c58d3bb8..4d31296d1 100644
--- a/SequenceAnalysis/src/org/labkey/sequenceanalysis/pipeline/SequenceAlignmentTask.java
+++ b/SequenceAnalysis/src/org/labkey/sequenceanalysis/pipeline/SequenceAlignmentTask.java
@@ -1948,7 +1948,7 @@ else if (sraIDs.contains(rd.getSra_accession()))
                 }
 
                 File outDir = new File(getHelper().getWorkingDirectory(), "cachedReadData");
-                getTaskFileManagerImpl().addIntermediateFile(outDir);
+                getTaskFileManagerImpl().addDeferredIntermediateFile(outDir); // NOTE: this must be deferred so it remains until the end
 
                 File doneFile = new File(outDir, rd.getSra_accession() + ".done");
                 sraIDs.add(rd.getSra_accession());

From 696619e44d14d49a949f2edd4e08aabd6652ab36 Mon Sep 17 00:00:00 2001
From: bbimber <bbimber@gmail.com>
Date: Sun, 5 Jan 2025 17:53:45 -0800
Subject: [PATCH 25/59] Bugfix to SRA download with multiple file pairs

---
 .../pipeline/SequenceAlignmentTask.java             | 13 +++++++++++--
 1 file changed, 11 insertions(+), 2 deletions(-)

diff --git a/SequenceAnalysis/src/org/labkey/sequenceanalysis/pipeline/SequenceAlignmentTask.java b/SequenceAnalysis/src/org/labkey/sequenceanalysis/pipeline/SequenceAlignmentTask.java
index 4d31296d1..b8edd0a16 100644
--- a/SequenceAnalysis/src/org/labkey/sequenceanalysis/pipeline/SequenceAlignmentTask.java
+++ b/SequenceAnalysis/src/org/labkey/sequenceanalysis/pipeline/SequenceAlignmentTask.java
@@ -1968,8 +1968,14 @@ else if (sraIDs.contains(rd.getSra_accession()))
                     {
                         if (outDir.exists())
                         {
-                            getHelper().getLogger().debug("Deleting existing folder: " + outDir.getPath());
-                            FileUtils.deleteDirectory(outDir);
+                            getHelper().getLogger().debug("Inspecting existing folder: " + outDir.getPath());
+                            Arrays.stream(outDir.listFiles()).forEach(f -> {
+                                if (f.getName().startsWith(rd.getSra_accession()))
+                                {
+                                    getPipelineJob().getLogger().debug("Deleting existing file: " + f.getPath());
+                                    f.delete();
+                                }
+                            });
                         }
 
                         outDir.mkdirs();
@@ -1980,6 +1986,8 @@ else if (sraIDs.contains(rd.getSra_accession()))
                         {
                             moved1.delete();
                         }
+
+                        getPipelineJob().getLogger().debug("Moving gzipped file to: " + moved1.getPath());
                         FileUtils.moveFile(downloaded.first, moved1);
                         rdi.setFile(moved1, 1);
 
@@ -1990,6 +1998,7 @@ else if (sraIDs.contains(rd.getSra_accession()))
                             {
                                 moved2.delete();
                             }
+                            getPipelineJob().getLogger().debug("Moving gzipped file to: " + moved2.getPath());
                             FileUtils.moveFile(downloaded.second, moved2);
                             rdi.setFile(moved2, 2);
                         }

From 31629fd54944672714d0dc4e935e90fd934b22dc Mon Sep 17 00:00:00 2001
From: bbimber <bbimber@gmail.com>
Date: Sun, 5 Jan 2025 17:54:25 -0800
Subject: [PATCH 26/59] Bugfix to SRA download with multiple file pairs

---
 .../sequenceanalysis/pipeline/SequenceAlignmentTask.java    | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/SequenceAnalysis/src/org/labkey/sequenceanalysis/pipeline/SequenceAlignmentTask.java b/SequenceAnalysis/src/org/labkey/sequenceanalysis/pipeline/SequenceAlignmentTask.java
index b8edd0a16..4f4a0c8f8 100644
--- a/SequenceAnalysis/src/org/labkey/sequenceanalysis/pipeline/SequenceAlignmentTask.java
+++ b/SequenceAnalysis/src/org/labkey/sequenceanalysis/pipeline/SequenceAlignmentTask.java
@@ -1977,8 +1977,10 @@ else if (sraIDs.contains(rd.getSra_accession()))
                                 }
                             });
                         }
-
-                        outDir.mkdirs();
+                        else
+                        {
+                            outDir.mkdirs();
+                        }
 
                         Pair<File, File> downloaded = sra.downloadSra(rd.getSra_accession(), unzippedOutDir, rd.isPairedEnd());
                         File moved1 = new File(outDir, downloaded.first.getName());

From ebbf8c70b8ae764d3f1aaadda6e8026af5529f6a Mon Sep 17 00:00:00 2001
From: bbimber <bbimber@gmail.com>
Date: Tue, 7 Jan 2025 12:30:44 -0800
Subject: [PATCH 27/59] Allow docker jobs to use a local container cache

---
 .../pipeline/SequencePipelineService.java     |  2 +
 .../sequenceanalysis/run/DockerWrapper.java   | 77 +++++++++++++++++--
 .../SequencePipelineServiceImpl.java          | 12 +++
 3 files changed, 86 insertions(+), 5 deletions(-)

diff --git a/SequenceAnalysis/api-src/org/labkey/api/sequenceanalysis/pipeline/SequencePipelineService.java b/SequenceAnalysis/api-src/org/labkey/api/sequenceanalysis/pipeline/SequencePipelineService.java
index dd19bba68..b57555a1c 100644
--- a/SequenceAnalysis/api-src/org/labkey/api/sequenceanalysis/pipeline/SequencePipelineService.java
+++ b/SequenceAnalysis/api-src/org/labkey/api/sequenceanalysis/pipeline/SequencePipelineService.java
@@ -100,6 +100,8 @@ static public void setInstance(SequencePipelineService instance)
      */
     abstract public String getDockerCommand();
 
+    abstract public boolean useLocalDockerContainerStorage();
+
     abstract public Collection<String> getDockerVolumes(Container c);
 
     /**
diff --git a/SequenceAnalysis/api-src/org/labkey/api/sequenceanalysis/run/DockerWrapper.java b/SequenceAnalysis/api-src/org/labkey/api/sequenceanalysis/run/DockerWrapper.java
index a91cea5c7..d0a3fd26f 100644
--- a/SequenceAnalysis/api-src/org/labkey/api/sequenceanalysis/run/DockerWrapper.java
+++ b/SequenceAnalysis/api-src/org/labkey/api/sequenceanalysis/run/DockerWrapper.java
@@ -1,5 +1,6 @@
 package org.labkey.api.sequenceanalysis.run;
 
+import org.apache.commons.io.FileUtils;
 import org.apache.commons.lang3.StringUtils;
 import org.apache.logging.log4j.Logger;
 import org.jetbrains.annotations.Nullable;
@@ -7,6 +8,7 @@
 import org.labkey.api.sequenceanalysis.pipeline.PipelineContext;
 import org.labkey.api.sequenceanalysis.pipeline.PipelineOutputTracker;
 import org.labkey.api.sequenceanalysis.pipeline.SequencePipelineService;
+import org.labkey.api.util.FileUtil;
 import org.labkey.api.writer.PrintWriters;
 
 import java.io.File;
@@ -28,7 +30,8 @@ public class DockerWrapper extends AbstractCommandWrapper
     private final PipelineContext _ctx;
     private File _tmpDir = null;
     private String _entryPoint = null;
-    private boolean _runPrune = true;
+    private boolean _runPrune = false;
+    private boolean _useLocalContainerStorage;
     private String _alternateUserHome = null;
     private final Map<String, String> _dockerEnvironment = new HashMap<>();
 
@@ -37,6 +40,8 @@ public DockerWrapper(String containerName, Logger log, PipelineContext ctx)
         super(log);
         _containerName = containerName;
         _ctx = ctx;
+
+        _useLocalContainerStorage = SequencePipelineService.get().useLocalDockerContainerStorage();
     }
 
     public void setAlternateUserHome(String alternateUserHome)
@@ -59,6 +64,11 @@ public void setRunPrune(boolean runPrune)
         _runPrune = runPrune;
     }
 
+    public void setUseLocalContainerStorage(boolean useLocalContainerStorage)
+    {
+        _useLocalContainerStorage = useLocalContainerStorage;
+    }
+
     public void executeWithDocker(List<String> containerArgs, File workDir, PipelineOutputTracker tracker) throws PipelineJobException
     {
         executeWithDocker(containerArgs, workDir, tracker, null);
@@ -79,14 +89,19 @@ public void executeWithDocker(List<String> containerArgs, File workDir, Pipeline
             writer.println("set -e");
 
             writer.println("DOCKER='" + SequencePipelineService.get().getDockerCommand() + "'");
-            writer.println("$DOCKER pull " + _containerName);
+            writer.println("$DOCKER pull " + getLocalStorageArgs() + getEffectiveContainerName());
             if (_runPrune)
             {
-                writer.println("$DOCKER image prune -f");
+                writer.println("$DOCKER image prune " + getLocalStorageArgs() + "-f");
             }
 
             writer.println("$DOCKER run --rm=true \\");
-            writer.println("\t--group-add keep-groups \\");
+            if (_useLocalContainerStorage)
+            {
+                getLogger().debug("Using local container storage: " + getLocalContainerDir().getPath());
+                prepareLocalStorage();
+                writer.println("\t" + getLocalStorageArgs() + "\\");
+            }
 
             // NOTE: getDockerVolumes() should be refactored to remove the -v and this logic should be updated accordingly:
             File homeDir = new File(System.getProperty("user.home"));
@@ -149,7 +164,7 @@ public void executeWithDocker(List<String> containerArgs, File workDir, Pipeline
             {
                 writer.println("\t-e " + key + "='" + _dockerEnvironment.get(key) + "' \\");
             }
-            writer.println("\t" + _containerName + " \\");
+            writer.println("\t" + getEffectiveContainerName() + " \\");
             writer.println("\t" + dockerBashScript.getPath());
             writer.println("DOCKER_EXIT_CODE=$?");
             writer.println("echo 'Docker run exit code: '$DOCKER_EXIT_CODE");
@@ -170,6 +185,23 @@ public void executeWithDocker(List<String> containerArgs, File workDir, Pipeline
         localBashScript.setExecutable(true);
         dockerBashScript.setExecutable(true);
         execute(Arrays.asList("/bin/bash", localBashScript.getPath()));
+
+        if (_useLocalContainerStorage)
+        {
+            try
+            {
+                FileUtils.deleteDirectory(getLocalContainerDir());
+            }
+            catch (IOException e)
+            {
+                throw new PipelineJobException(e);
+            }
+        }
+    }
+
+    private String getEffectiveContainerName()
+    {
+        return _containerName;
     }
 
     public void addToDockerEnvironment(String key, String value)
@@ -203,4 +235,39 @@ private Collection<File> inspectInputFiles(Collection<File> inputFiles)
 
         return Collections.emptySet();
     }
+
+    private File getLocalContainerDir()
+    {
+        return new File(SequencePipelineService.get().getJavaTempDir(), "containers");
+    }
+
+    private File prepareLocalStorage() throws PipelineJobException
+    {
+        try
+        {
+            if (getLocalContainerDir().exists())
+            {
+                getLogger().debug("Deleting existing container dir: " + getLocalContainerDir());
+                FileUtils.deleteDirectory(getLocalContainerDir());
+            }
+
+            FileUtil.createDirectory(getLocalContainerDir().toPath());
+
+            return getLocalContainerDir();
+        }
+        catch (IOException e)
+        {
+            throw new PipelineJobException(e);
+        }
+    }
+
+    private String getLocalStorageArgs()
+    {
+        if (!_useLocalContainerStorage)
+        {
+            return "";
+        }
+
+        return "--root=" + getLocalContainerDir().getPath() + " ";
+    }
 }
diff --git a/SequenceAnalysis/src/org/labkey/sequenceanalysis/SequencePipelineServiceImpl.java b/SequenceAnalysis/src/org/labkey/sequenceanalysis/SequencePipelineServiceImpl.java
index bfaaaba78..60f02bd15 100644
--- a/SequenceAnalysis/src/org/labkey/sequenceanalysis/SequencePipelineServiceImpl.java
+++ b/SequenceAnalysis/src/org/labkey/sequenceanalysis/SequencePipelineServiceImpl.java
@@ -459,6 +459,18 @@ public String getDockerCommand()
         return "docker";
     }
 
+    @Override
+    public boolean useLocalDockerContainerStorage()
+    {
+        String value = PipelineJobService.get().getConfigProperties().getSoftwarePackagePath("USE_LOCAL_DOCKER_STORAGE");
+        if (StringUtils.trimToNull(value) == null)
+        {
+            return false;
+        }
+
+        return Boolean.parseBoolean(value);
+    }
+
     @Override
     public Collection<String> getDockerVolumes(Container c)
     {

From d0ca84aa4de9e0aa1e7df2b6d5a28a4089f4f4ae Mon Sep 17 00:00:00 2001
From: bbimber <bbimber@gmail.com>
Date: Tue, 7 Jan 2025 13:17:42 -0800
Subject: [PATCH 28/59] More active deletion of temp files during SRA download

---
 .../pipeline/SequenceAlignmentTask.java       | 42 ++++++++++++++++++-
 .../labkey/singlecell/run/NimbleHelper.java   | 15 +++++++
 2 files changed, 56 insertions(+), 1 deletion(-)

diff --git a/SequenceAnalysis/src/org/labkey/sequenceanalysis/pipeline/SequenceAlignmentTask.java b/SequenceAnalysis/src/org/labkey/sequenceanalysis/pipeline/SequenceAlignmentTask.java
index 4f4a0c8f8..0ec9d6ac6 100644
--- a/SequenceAnalysis/src/org/labkey/sequenceanalysis/pipeline/SequenceAlignmentTask.java
+++ b/SequenceAnalysis/src/org/labkey/sequenceanalysis/pipeline/SequenceAlignmentTask.java
@@ -702,6 +702,20 @@ private void alignSet(Readset rs, String basename, Map<ReadData, Pair<File, File
             _resumer.setInitialAlignmentDone(bam, alignActions, discardBam);
         }
 
+        // Because this folder can be large, delete it quickly when possible:
+        if (getCachedReadDataDir().exists())
+        {
+            try
+            {
+                getJob().getLogger().debug("Deleting cachedReadDataDir");
+                FileUtils.deleteDirectory(getCachedReadDataDir());
+            }
+            catch (IOException e)
+            {
+                throw new PipelineJobException(e);
+            }
+        }
+
         // This is a special case where the alignment does not actually generate a permanent BAM
         if (bam == null && discardBam)
         {
@@ -1892,8 +1906,34 @@ public void serializeTest() throws Exception
         }
     }
 
+    private File getCachedReadDataDir()
+    {
+        return new File(getHelper().getWorkingDirectory(), "cachedReadData");
+    }
+
     private void restoreArchivedReadDataIfNeeded(Readset rs) throws PipelineJobException
     {
+        if (_resumer.isInitialAlignmentDone())
+        {
+            getJob().getLogger().debug("Initial alignment is complete, will not attempt to download SRA if needed");
+
+            if (getCachedReadDataDir().exists())
+            {
+                getJob().getLogger().debug("Deleting existing cachedReadDataDir");
+
+                try
+                {
+                    FileUtils.deleteDirectory(getCachedReadDataDir());
+                }
+                catch (IOException e)
+                {
+                    throw new PipelineJobException(e);
+                }
+            }
+
+            return;
+        }
+
         Set<String> sraIDs = new HashSet<>();
         for (ReadData rd : rs.getReadData())
         {
@@ -1947,7 +1987,7 @@ else if (sraIDs.contains(rd.getSra_accession()))
                     }
                 }
 
-                File outDir = new File(getHelper().getWorkingDirectory(), "cachedReadData");
+                File outDir = getCachedReadDataDir();
                 getTaskFileManagerImpl().addDeferredIntermediateFile(outDir); // NOTE: this must be deferred so it remains until the end
 
                 File doneFile = new File(outDir, rd.getSra_accession() + ".done");
diff --git a/singlecell/src/org/labkey/singlecell/run/NimbleHelper.java b/singlecell/src/org/labkey/singlecell/run/NimbleHelper.java
index 8dffb88ae..d6cf466a1 100644
--- a/singlecell/src/org/labkey/singlecell/run/NimbleHelper.java
+++ b/singlecell/src/org/labkey/singlecell/run/NimbleHelper.java
@@ -460,6 +460,21 @@ private Map<NimbleGenome, File> doAlignment(List<NimbleGenome> genomes, List<Fil
         alignArgs.add(tmpDir.getPath());
 
         boolean dockerRan = runUsingDocker(alignArgs, output, "align.all");
+
+        // Because this can be large, delete it quickly:
+        if (tmpDir.exists())
+        {
+            try
+            {
+                getPipelineCtx().getLogger().debug("Deleting nimble temp dir");
+                FileUtils.deleteDirectory(tmpDir);
+            }
+            catch (IOException e)
+            {
+                throw new PipelineJobException(e);
+            }
+        }
+
         for (NimbleGenome genome : genomes)
         {
             File alignResultsGz = new File(getPipelineCtx().getWorkingDirectory(), "alignResults." + genome.genomeId + ".txt.gz");

From 81882a7ddce9b13d40ddbc9f26240efa925cd0d7 Mon Sep 17 00:00:00 2001
From: bbimber <bbimber@gmail.com>
Date: Wed, 8 Jan 2025 15:05:12 -0800
Subject: [PATCH 29/59] Make DockerWrapper check whether pull needed before
 automatically pulling

---
 .../api/sequenceanalysis/run/DockerWrapper.java    | 14 +++++++++++++-
 .../pipeline_code/extra_tools_install.sh           | 13 +++++++++++++
 2 files changed, 26 insertions(+), 1 deletion(-)

diff --git a/SequenceAnalysis/api-src/org/labkey/api/sequenceanalysis/run/DockerWrapper.java b/SequenceAnalysis/api-src/org/labkey/api/sequenceanalysis/run/DockerWrapper.java
index d0a3fd26f..652a77a52 100644
--- a/SequenceAnalysis/api-src/org/labkey/api/sequenceanalysis/run/DockerWrapper.java
+++ b/SequenceAnalysis/api-src/org/labkey/api/sequenceanalysis/run/DockerWrapper.java
@@ -89,7 +89,19 @@ public void executeWithDocker(List<String> containerArgs, File workDir, Pipeline
             writer.println("set -e");
 
             writer.println("DOCKER='" + SequencePipelineService.get().getDockerCommand() + "'");
-            writer.println("$DOCKER pull " + getLocalStorageArgs() + getEffectiveContainerName());
+
+            writer.println("IMAGE_EXISTS=`$DOCKER images -q \"" + getEffectiveContainerName() + "\" | wc -l`");
+            writer.println("LOCAL=not_present");
+            writer.println("if [ $IMAGE_EXISTS > 0 ];then");
+            writer.println("\tLOCAL=`docker inspect --format='{{.Digest}}' " + getEffectiveContainerName() + "`");
+            writer.println("fi");
+            writer.println("LATEST=`regctl image digest --list " + getEffectiveContainerName() + "`");
+            writer.println("if [ $LOCAL != $LATEST ];then");
+            writer.println("\t$DOCKER pull " + getLocalStorageArgs() + getEffectiveContainerName());
+            writer.println("else");
+            writer.println("\techo 'Image up to date'");
+            writer.println("fi");
+
             if (_runPrune)
             {
                 writer.println("$DOCKER image prune " + getLocalStorageArgs() + "-f");
diff --git a/SequenceAnalysis/pipeline_code/extra_tools_install.sh b/SequenceAnalysis/pipeline_code/extra_tools_install.sh
index 3ad144bc5..dae691917 100755
--- a/SequenceAnalysis/pipeline_code/extra_tools_install.sh
+++ b/SequenceAnalysis/pipeline_code/extra_tools_install.sh
@@ -261,3 +261,16 @@ else
     echo "Already installed"
 fi
 
+if [[ ! -e ${LKTOOLS_DIR}/regctl || ! -z $FORCE_REINSTALL ]];
+then
+    echo "Cleaning up previous installs"
+    rm -Rf regctl*
+    rm -Rf $LKTOOLS_DIR/regctl*
+
+    curl -L https://github.com/regclient/regclient/releases/latest/download/regctl-linux-amd64 > regctl
+    chmod 755 regctl
+
+    install regctl $LKTOOLS_DIR/
+else
+    echo "Already installed"
+fi

From adc2889b54909e9b64f45aeb18b0767982f43546 Mon Sep 17 00:00:00 2001
From: bbimber <bbimber@gmail.com>
Date: Wed, 8 Jan 2025 16:27:28 -0800
Subject: [PATCH 30/59] Bugfix to DockerWrapper

---
 .../org/labkey/api/sequenceanalysis/run/DockerWrapper.java      | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/SequenceAnalysis/api-src/org/labkey/api/sequenceanalysis/run/DockerWrapper.java b/SequenceAnalysis/api-src/org/labkey/api/sequenceanalysis/run/DockerWrapper.java
index 652a77a52..3a61bf728 100644
--- a/SequenceAnalysis/api-src/org/labkey/api/sequenceanalysis/run/DockerWrapper.java
+++ b/SequenceAnalysis/api-src/org/labkey/api/sequenceanalysis/run/DockerWrapper.java
@@ -92,7 +92,7 @@ public void executeWithDocker(List<String> containerArgs, File workDir, Pipeline
 
             writer.println("IMAGE_EXISTS=`$DOCKER images -q \"" + getEffectiveContainerName() + "\" | wc -l`");
             writer.println("LOCAL=not_present");
-            writer.println("if [ $IMAGE_EXISTS > 0 ];then");
+            writer.println("if [[ $IMAGE_EXISTS > 0 ]];then");
             writer.println("\tLOCAL=`docker inspect --format='{{.Digest}}' " + getEffectiveContainerName() + "`");
             writer.println("fi");
             writer.println("LATEST=`regctl image digest --list " + getEffectiveContainerName() + "`");

From 333d25bf65426fb43cdfeb30e450c070f00c28af Mon Sep 17 00:00:00 2001
From: bbimber <bbimber@gmail.com>
Date: Thu, 9 Jan 2025 05:46:19 -0800
Subject: [PATCH 31/59] Restore --group-add to docker

---
 .../org/labkey/api/sequenceanalysis/run/DockerWrapper.java       | 1 +
 1 file changed, 1 insertion(+)

diff --git a/SequenceAnalysis/api-src/org/labkey/api/sequenceanalysis/run/DockerWrapper.java b/SequenceAnalysis/api-src/org/labkey/api/sequenceanalysis/run/DockerWrapper.java
index 3a61bf728..16c526909 100644
--- a/SequenceAnalysis/api-src/org/labkey/api/sequenceanalysis/run/DockerWrapper.java
+++ b/SequenceAnalysis/api-src/org/labkey/api/sequenceanalysis/run/DockerWrapper.java
@@ -108,6 +108,7 @@ public void executeWithDocker(List<String> containerArgs, File workDir, Pipeline
             }
 
             writer.println("$DOCKER run --rm=true \\");
+            writer.println("\t--group-add keep-groups \\");
             if (_useLocalContainerStorage)
             {
                 getLogger().debug("Using local container storage: " + getLocalContainerDir().getPath());

From 892c793311aa227fa5d5681ec61d0f880147065d Mon Sep 17 00:00:00 2001
From: bbimber <bbimber@gmail.com>
Date: Mon, 13 Jan 2025 12:45:51 -0800
Subject: [PATCH 32/59] Remove -S from fasterq-dump

---
 .../org/labkey/api/sequenceanalysis/run/DockerWrapper.java      | 2 ++
 .../org/labkey/sequenceanalysis/run/RestoreSraDataHandler.java  | 2 +-
 2 files changed, 3 insertions(+), 1 deletion(-)

diff --git a/SequenceAnalysis/api-src/org/labkey/api/sequenceanalysis/run/DockerWrapper.java b/SequenceAnalysis/api-src/org/labkey/api/sequenceanalysis/run/DockerWrapper.java
index 16c526909..dfc2bacc1 100644
--- a/SequenceAnalysis/api-src/org/labkey/api/sequenceanalysis/run/DockerWrapper.java
+++ b/SequenceAnalysis/api-src/org/labkey/api/sequenceanalysis/run/DockerWrapper.java
@@ -109,6 +109,8 @@ public void executeWithDocker(List<String> containerArgs, File workDir, Pipeline
 
             writer.println("$DOCKER run --rm=true \\");
             writer.println("\t--group-add keep-groups \\");
+            writer.println("\t--transient-store \\");
+
             if (_useLocalContainerStorage)
             {
                 getLogger().debug("Using local container storage: " + getLocalContainerDir().getPath());
diff --git a/SequenceAnalysis/src/org/labkey/sequenceanalysis/run/RestoreSraDataHandler.java b/SequenceAnalysis/src/org/labkey/sequenceanalysis/run/RestoreSraDataHandler.java
index c70726ec6..1970b4fb7 100644
--- a/SequenceAnalysis/src/org/labkey/sequenceanalysis/run/RestoreSraDataHandler.java
+++ b/SequenceAnalysis/src/org/labkey/sequenceanalysis/run/RestoreSraDataHandler.java
@@ -464,7 +464,7 @@ public Pair<File, File> downloadSra(String dataset, File outDir, boolean expectP
             List<String> args = new ArrayList<>();
             args.add(getExe().getPath());
 
-            args.add("-S");
+            // NOTE: we probably want the --split-3 behavior, which is the default for fasterq-dump
             args.add("--include-technical");
 
             Integer threads = SequencePipelineService.get().getMaxThreads(getLogger());

From 806a1fb2698c21afe65f6c194d5618194a38d1c9 Mon Sep 17 00:00:00 2001
From: bbimber <bbimber@gmail.com>
Date: Thu, 16 Jan 2025 10:46:49 -0800
Subject: [PATCH 33/59] Drop --include-technical

---
 .../sequenceanalysis/pipeline/SequenceAlignmentTask.java | 2 +-
 .../sequenceanalysis/run/RestoreSraDataHandler.java      | 9 ++++++---
 2 files changed, 7 insertions(+), 4 deletions(-)

diff --git a/SequenceAnalysis/src/org/labkey/sequenceanalysis/pipeline/SequenceAlignmentTask.java b/SequenceAnalysis/src/org/labkey/sequenceanalysis/pipeline/SequenceAlignmentTask.java
index 0ec9d6ac6..19ea891a7 100644
--- a/SequenceAnalysis/src/org/labkey/sequenceanalysis/pipeline/SequenceAlignmentTask.java
+++ b/SequenceAnalysis/src/org/labkey/sequenceanalysis/pipeline/SequenceAlignmentTask.java
@@ -2022,7 +2022,7 @@ else if (sraIDs.contains(rd.getSra_accession()))
                             outDir.mkdirs();
                         }
 
-                        Pair<File, File> downloaded = sra.downloadSra(rd.getSra_accession(), unzippedOutDir, rd.isPairedEnd());
+                        Pair<File, File> downloaded = sra.downloadSra(rd.getSra_accession(), unzippedOutDir, rd.isPairedEnd(), false);
                         File moved1 = new File(outDir, downloaded.first.getName());
                         if (moved1.exists())
                         {
diff --git a/SequenceAnalysis/src/org/labkey/sequenceanalysis/run/RestoreSraDataHandler.java b/SequenceAnalysis/src/org/labkey/sequenceanalysis/run/RestoreSraDataHandler.java
index 1970b4fb7..8779afda1 100644
--- a/SequenceAnalysis/src/org/labkey/sequenceanalysis/run/RestoreSraDataHandler.java
+++ b/SequenceAnalysis/src/org/labkey/sequenceanalysis/run/RestoreSraDataHandler.java
@@ -384,7 +384,7 @@ public void processFilesRemote(List<Readset> readsets, JobContext ctx) throws Un
                     File expectedFile2 = rd.getFileId2() == null ? null : ctx.getSequenceSupport().getCachedData(rd.getFileId2());
 
                     FastqDumpWrapper wrapper = new FastqDumpWrapper(ctx.getLogger());
-                    Pair<File, File> files = wrapper.downloadSra(accession, ctx.getOutputDir(), rd.isPairedEnd());
+                    Pair<File, File> files = wrapper.downloadSra(accession, ctx.getOutputDir(), rd.isPairedEnd(), false);
 
                     long lines1 = SequenceUtil.getLineCount(files.first) / 4;
                     ctx.getJob().getLogger().debug("Reads in " + files.first.getName() + ": " + lines1);
@@ -459,13 +459,16 @@ public FastqDumpWrapper(@Nullable Logger logger)
             super(logger);
         }
 
-        public Pair<File, File> downloadSra(String dataset, File outDir, boolean expectPaired) throws PipelineJobException
+        public Pair<File, File> downloadSra(String dataset, File outDir, boolean expectPaired, boolean includeTechnical) throws PipelineJobException
         {
             List<String> args = new ArrayList<>();
             args.add(getExe().getPath());
 
             // NOTE: we probably want the --split-3 behavior, which is the default for fasterq-dump
-            args.add("--include-technical");
+            if (includeTechnical)
+            {
+                args.add("--include-technical");
+            }
 
             Integer threads = SequencePipelineService.get().getMaxThreads(getLogger());
             if (threads != null)

From 569af02b701dfbbd027d17ae491aaf4b33c908db Mon Sep 17 00:00:00 2001
From: bbimber <bbimber@gmail.com>
Date: Sat, 18 Jan 2025 14:47:09 -0800
Subject: [PATCH 34/59] Support minAllowableSingletRate in cell hashing

---
 .../org/labkey/api/singlecell/CellHashingService.java       | 3 +++
 .../src/org/labkey/singlecell/CellHashingServiceImpl.java   | 6 ++++++
 2 files changed, 9 insertions(+)

diff --git a/singlecell/api-src/org/labkey/api/singlecell/CellHashingService.java b/singlecell/api-src/org/labkey/api/singlecell/CellHashingService.java
index a6f314dd3..9939f706c 100644
--- a/singlecell/api-src/org/labkey/api/singlecell/CellHashingService.java
+++ b/singlecell/api-src/org/labkey/api/singlecell/CellHashingService.java
@@ -104,6 +104,7 @@ public static class CellHashingParameters
         public Integer minCountPerCell = 5;
         public Double majorityConsensusThreshold = null;
         public Double minAllowableDoubletRateFilter = null;
+        public Double minAllowableSingletRate = null;
         public Double callerDisagreementThreshold = null;
         public List<CALLING_METHOD> methods = CALLING_METHOD.getDefaultConsensusMethods(); //Default to just executing the set used for default consensus calls, rather than additional ones
         public List<CALLING_METHOD> consensusMethods = null;
@@ -127,6 +128,7 @@ public static CellHashingService.CellHashingParameters createFromStep(SequenceOu
             ret.minCountPerCell = step.getProvider().getParameterByName("minCountPerCell").extractValue(ctx.getJob(), step.getProvider(), step.getStepIdx(), Integer.class, 3);
             ret.majorityConsensusThreshold = step.getProvider().getParameterByName("majorityConsensusThreshold").extractValue(ctx.getJob(), step.getProvider(), step.getStepIdx(), Double.class, null);
             ret.minAllowableDoubletRateFilter = step.getProvider().getParameterByName("minAllowableDoubletRateFilter").extractValue(ctx.getJob(), step.getProvider(), step.getStepIdx(), Double.class, null);
+            ret.minAllowableSingletRate = step.getProvider().getParameterByName("minAllowableSingletRate").extractValue(ctx.getJob(), step.getProvider(), step.getStepIdx(), Double.class, null);
             ret.callerDisagreementThreshold = step.getProvider().getParameterByName("callerDisagreementThreshold").extractValue(ctx.getJob(), step.getProvider(), step.getStepIdx(), Double.class, null);
             ret.doTSNE = step.getProvider().getParameterByName("doTSNE").extractValue(ctx.getJob(), step.getProvider(), step.getStepIdx(), Boolean.class, false);
             ret.doNotAllowResume = step.getProvider().getParameterByName("doNotAllowResume").extractValue(ctx.getJob(), step.getProvider(), step.getStepIdx(), Boolean.class, false);
@@ -171,6 +173,7 @@ public static CellHashingParameters createFromJson(BARCODE_TYPE type, File webse
             ret.minCountPerCell = params.optInt("minCountPerCell", 3);
             ret.majorityConsensusThreshold = params.get("majorityConsensusThreshold") == null ? null : params.getDouble("majorityConsensusThreshold");
             ret.minAllowableDoubletRateFilter = params.get("minAllowableDoubletRateFilter") == null ? null : params.getDouble("minAllowableDoubletRateFilter");
+            ret.minAllowableSingletRate = params.get("minAllowableSingletRate") == null ? null : params.getDouble("minAllowableSingletRate");
             ret.callerDisagreementThreshold = params.get("callerDisagreementThreshold") == null ? null : params.getDouble("callerDisagreementThreshold");
             ret.doTSNE = params.optBoolean("doTSNE", false);
             ret.doNotAllowResume = params.optBoolean("doNotAllowResume", false);
diff --git a/singlecell/src/org/labkey/singlecell/CellHashingServiceImpl.java b/singlecell/src/org/labkey/singlecell/CellHashingServiceImpl.java
index 81629915c..ef3f80482 100644
--- a/singlecell/src/org/labkey/singlecell/CellHashingServiceImpl.java
+++ b/singlecell/src/org/labkey/singlecell/CellHashingServiceImpl.java
@@ -979,6 +979,11 @@ public List<ToolParameterDescriptor> getHashingCallingParams(boolean allowMethod
                 put("maxValue", 1);
                 put("decimalPrecision", 2);
             }}, 0.2),
+                ToolParameterDescriptor.create("minAllowableSingletRate", "Min Allowable Singlet Rate", "This is an optional threshold designed to automatically discard poorly performing callers. If a given algorithm returns a singlet rate below this level, it is discarded from the consensus.", "ldk-numberfield", new JSONObject(){{
+                    put("minValue", 0);
+                    put("maxValue", 1);
+                    put("decimalPrecision", 2);
+                }}, 0.05),
             ToolParameterDescriptor.create("skipNormalizationQc", "Skip Normalization QC", null, "checkbox", null, true),
             ToolParameterDescriptor.create("doNotAllowResume", "Do Not Allow Resume", "If checked, on resume the job will repeat hashing scoring, rather than allowing resume from a saved state", "checkbox", null, false),
             ToolParameterDescriptor.create("doTSNE", "Do tSNE", "If true, tSNE will be performed as part of QC", "checkbox", null, false),
@@ -1275,6 +1280,7 @@ public File generateCellHashingCalls(File citeSeqCountOutDir, File outputDir, St
                     ", majorityConsensusThreshold = " + (parameters.majorityConsensusThreshold == null ? "NULL" : parameters.majorityConsensusThreshold) +
                     ", callerDisagreementThreshold = " + (parameters.callerDisagreementThreshold == null ? "NULL" : parameters.callerDisagreementThreshold) +
                     (parameters.minAllowableDoubletRateFilter == null ? "" : ", minAllowableDoubletRateFilter = " + parameters.minAllowableDoubletRateFilter) +
+                    (parameters.minAllowableSingletRate == null ? "" : ", minAllowableSingletRate = " + parameters.minAllowableSingletRate) +
                     ", doTSNE = " + doTSNE + ")");
             writer.println("print('Rmarkdown complete')");
 

From 846a5a225e48c4d5154e23c6a7601a821d40d97a Mon Sep 17 00:00:00 2001
From: bbimber <bbimber@gmail.com>
Date: Mon, 20 Jan 2025 15:26:35 -0800
Subject: [PATCH 35/59] Fix bug in logging

---
 singlecell/resources/chunks/SaveData.R | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/singlecell/resources/chunks/SaveData.R b/singlecell/resources/chunks/SaveData.R
index 307704ae8..46002cb05 100644
--- a/singlecell/resources/chunks/SaveData.R
+++ b/singlecell/resources/chunks/SaveData.R
@@ -11,8 +11,8 @@ if (length(errorMessages) > 0) {
     write(errorMessages, file = 'seuratErrors.txt')
 }
 
-if (file.exists('savedSeuratObjects.txt')) {
-    print(paste0('Total lines in savedSeuratObjects.txt:', length(readLines('savedSeuratObjects.txt'))))
+if (file.exists(trackerFile)) {
+    print(paste0('Total lines in ', trackerFile, ':', length(readLines(trackerFile))))
 } else {
-    print('File does not exist: savedSeuratObjects.txt')
+    print(paste0('File does not exist: ', trackerFile))
 }
\ No newline at end of file

From 5a5740f05df2336ec49707c0781c16b2780200ad Mon Sep 17 00:00:00 2001
From: bbimber <bbimber@gmail.com>
Date: Mon, 20 Jan 2025 16:29:29 -0800
Subject: [PATCH 36/59] Include all algorithms in hashing, even if some are not
 in consensusMethods

---
 .../org/labkey/singlecell/CellHashingServiceImpl.java | 11 ++---------
 1 file changed, 2 insertions(+), 9 deletions(-)

diff --git a/singlecell/src/org/labkey/singlecell/CellHashingServiceImpl.java b/singlecell/src/org/labkey/singlecell/CellHashingServiceImpl.java
index ef3f80482..a291d1aa7 100644
--- a/singlecell/src/org/labkey/singlecell/CellHashingServiceImpl.java
+++ b/singlecell/src/org/labkey/singlecell/CellHashingServiceImpl.java
@@ -1238,15 +1238,8 @@ public File generateCellHashingCalls(File citeSeqCountOutDir, File outputDir, St
             Set<String> allowableBarcodes = parameters.getAllowableBarcodeNames();
             String allowableBarcodeParam = allowableBarcodes != null ? "c('" + StringUtils.join(allowableBarcodes, "','") + "')" : "NULL";
 
-            List<String> methodNames = parameters.methods.stream().filter(m -> {
-                if (totalCellBarcodes < m.getMinCells())
-                {
-                    ctx.getLogger().debug("Dropping method due to insufficient cells: " + m.name());
-                    return false;
-                }
-
-                return true;
-            }).map(CALLING_METHOD::getLabel).distinct().toList();
+            // NOTE: we do not need to filter total methods on min cells:
+            List<String> methodNames = parameters.methods.stream().map(CALLING_METHOD::getLabel).distinct().toList();
 
             List<String> consensusMethodNames = parameters.consensusMethods == null ? Collections.emptyList() : parameters.consensusMethods.stream().filter(m -> {
                 if (totalCellBarcodes < m.getMinCells())

From 22034c2b4ea96e7a54280893d0c27af0350cbf65 Mon Sep 17 00:00:00 2001
From: bbimber <bbimber@gmail.com>
Date: Thu, 23 Jan 2025 13:25:14 -0800
Subject: [PATCH 37/59] Allow nimble append to support
 queryDatabaseForLineageUpdates

---
 singlecell/resources/chunks/AppendNimble.R    |  5 +++--
 .../web/singlecell/panel/NimbleAppendPanel.js | 16 +++++++++++---
 .../pipeline/singlecell/AppendNimble.java     | 21 ++++++++++++++++++-
 3 files changed, 36 insertions(+), 6 deletions(-)

diff --git a/singlecell/resources/chunks/AppendNimble.R b/singlecell/resources/chunks/AppendNimble.R
index 7a913d757..97023dea0 100644
--- a/singlecell/resources/chunks/AppendNimble.R
+++ b/singlecell/resources/chunks/AppendNimble.R
@@ -17,8 +17,9 @@ for (datasetId in names(seuratObjects)) {
   seuratObj <- readSeuratRDS(seuratObjects[[datasetId]])
 
   for (genomeId in names(nimbleGenomes)) {
-    maxAmbiguityAllowed <- !nimbleGenomeAmbiguousPreference[[genomeId]]
-    seuratObj <- Rdiscvr::DownloadAndAppendNimble(seuratObject = seuratObj, allowableGenomes = genomeId, ensureSamplesShareAllGenomes = ensureSamplesShareAllGenomes, targetAssayName = nimbleGenomes[[genomeId]], enforceUniqueFeatureNames = TRUE, maxAmbiguityAllowed = maxAmbiguityAllowed, maxLibrarySizeRatio = maxLibrarySizeRatio)
+    maxAmbiguityAllowed <- nimbleGenomeAmbiguousPreference[[genomeId]]
+    queryDatabaseForLineageUpdates <- queryDatabaseForLineageUpdatesPreference[[genomeId]]
+    seuratObj <- Rdiscvr::DownloadAndAppendNimble(seuratObject = seuratObj, allowableGenomes = genomeId, ensureSamplesShareAllGenomes = ensureSamplesShareAllGenomes, targetAssayName = nimbleGenomes[[genomeId]], enforceUniqueFeatureNames = TRUE, maxAmbiguityAllowed = maxAmbiguityAllowed, maxLibrarySizeRatio = maxLibrarySizeRatio, queryDatabaseForLineageUpdates = queryDatabaseForLineageUpdates)
   }
 
   saveData(seuratObj, datasetId)
diff --git a/singlecell/resources/web/singlecell/panel/NimbleAppendPanel.js b/singlecell/resources/web/singlecell/panel/NimbleAppendPanel.js
index f01e307ce..7712dc1c0 100644
--- a/singlecell/resources/web/singlecell/panel/NimbleAppendPanel.js
+++ b/singlecell/resources/web/singlecell/panel/NimbleAppendPanel.js
@@ -40,7 +40,7 @@ Ext4.define('SingleCell.panel.NimbleAppendPanel', {
 				},LABKEY.ext4.GRIDBUTTONS.DELETERECORD()],
 				store: {
 					type: 'array',
-					fields: ['genomeId', 'targetAssay','maxAmbiguityAllowed']
+					fields: ['genomeId', 'targetAssay','maxAmbiguityAllowed', 'queryDatabaseForLineageUpdates']
 				},
 				columns: [{
 					dataIndex: 'genomeId',
@@ -77,6 +77,15 @@ Ext4.define('SingleCell.panel.NimbleAppendPanel', {
 						allowBlank: true,
 						minValue: 0
 					}
+				},{
+					dataIndex: 'queryDatabaseForLineageUpdates',
+					width: 175,
+					header: 'Check for Lineage Updates',
+					editor: {
+						xtype: 'checkbox',
+						allowBlank: true,
+						value: false
+					}
 				}]
 			}]
 		});
@@ -87,7 +96,7 @@ Ext4.define('SingleCell.panel.NimbleAppendPanel', {
 	getValue: function(){
 		var ret = [];
 		this.down('ldk-gridpanel').store.each(function(r, i) {
-			ret.push([r.data.genomeId, r.data.targetAssay, r.data.maxAmbiguityAllowed ?? '']);
+			ret.push([r.data.genomeId, r.data.targetAssay, r.data.maxAmbiguityAllowed ?? '', !!r.data.queryDatabaseForLineageUpdates]);
 		}, this);
 
 		return Ext4.isEmpty(ret) ? null : JSON.stringify(ret);
@@ -123,7 +132,8 @@ Ext4.define('SingleCell.panel.NimbleAppendPanel', {
 				var rec = grid.store.createModel({
 					genomeId: row[0],
 					targetAssay: row[1],
-					maxAmbiguityAllowed: row[2]
+					maxAmbiguityAllowed: row[2],
+					queryDatabaseForLineageUpdates: !!row[3]
 				});
 				grid.store.add(rec);
 			}, this);
diff --git a/singlecell/src/org/labkey/singlecell/pipeline/singlecell/AppendNimble.java b/singlecell/src/org/labkey/singlecell/pipeline/singlecell/AppendNimble.java
index 2fade274d..67b24280a 100644
--- a/singlecell/src/org/labkey/singlecell/pipeline/singlecell/AppendNimble.java
+++ b/singlecell/src/org/labkey/singlecell/pipeline/singlecell/AppendNimble.java
@@ -96,7 +96,7 @@ protected Chunk createParamChunk(SequenceOutputHandler.JobContext ctx, List<Seur
         for (int i = 0; i < json.length(); i++)
         {
             JSONArray arr = json.getJSONArray(i);
-            if (arr.length() != 3)
+            if (arr.length() < 3)
             {
                 throw new PipelineJobException("Unexpected value: " + json.getString(i));
             }
@@ -113,6 +113,25 @@ protected Chunk createParamChunk(SequenceOutputHandler.JobContext ctx, List<Seur
         }
         ret.bodyLines.add(")");
 
+        ret.bodyLines.add("queryDatabaseForLineageUpdatesPreference <- list(");
+        delim = "";
+        for (int i = 0; i < json.length(); i++)
+        {
+            JSONArray arr = json.getJSONArray(i);
+            if (arr.length() != 4)
+            {
+                throw new PipelineJobException("Unexpected value: " + json.getString(i));
+            }
+
+            int genomeId = arr.getInt(0);
+            String valStr = arr.get(2) == null ? null : StringUtils.trimToNull(String.valueOf(arr.get(2)));
+            boolean val = Boolean.parseBoolean(valStr);
+
+            ret.bodyLines.add("\t" + delim + "'" + genomeId + "' = " + (val ? "TRUE" : "FALSE"));
+            delim = ",";
+        }
+        ret.bodyLines.add(")");
+
         return ret;
     }
 

From 69acb61cbd6ed1e130e7422cc7a0db77552c786a Mon Sep 17 00:00:00 2001
From: bbimber <bbimber@gmail.com>
Date: Thu, 23 Jan 2025 18:34:30 -0800
Subject: [PATCH 38/59] Bugfix to AppendNimble

---
 .../org/labkey/singlecell/pipeline/singlecell/AppendNimble.java | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/singlecell/src/org/labkey/singlecell/pipeline/singlecell/AppendNimble.java b/singlecell/src/org/labkey/singlecell/pipeline/singlecell/AppendNimble.java
index 67b24280a..1da9a91b1 100644
--- a/singlecell/src/org/labkey/singlecell/pipeline/singlecell/AppendNimble.java
+++ b/singlecell/src/org/labkey/singlecell/pipeline/singlecell/AppendNimble.java
@@ -77,7 +77,7 @@ protected Chunk createParamChunk(SequenceOutputHandler.JobContext ctx, List<Seur
         for (int i = 0; i < json.length(); i++)
         {
             JSONArray arr = json.getJSONArray(i);
-            if (arr.length() != 3)
+            if (arr.length() != 4)
             {
                 throw new PipelineJobException("Unexpected value: " + json.getString(i));
             }

From b795083db8d906b0932fbe01ea02075942eea96e Mon Sep 17 00:00:00 2001
From: bbimber <bbimber@gmail.com>
Date: Thu, 23 Jan 2025 20:42:35 -0800
Subject: [PATCH 39/59] Bugfix to AppendNimble

---
 .../org/labkey/singlecell/pipeline/singlecell/AppendNimble.java | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/singlecell/src/org/labkey/singlecell/pipeline/singlecell/AppendNimble.java b/singlecell/src/org/labkey/singlecell/pipeline/singlecell/AppendNimble.java
index 1da9a91b1..087abeac6 100644
--- a/singlecell/src/org/labkey/singlecell/pipeline/singlecell/AppendNimble.java
+++ b/singlecell/src/org/labkey/singlecell/pipeline/singlecell/AppendNimble.java
@@ -79,7 +79,7 @@ protected Chunk createParamChunk(SequenceOutputHandler.JobContext ctx, List<Seur
             JSONArray arr = json.getJSONArray(i);
             if (arr.length() != 4)
             {
-                throw new PipelineJobException("Unexpected value: " + json.getString(i));
+                throw new PipelineJobException("Unexpected value: " + json.get(i));
             }
 
             int genomeId = arr.getInt(0);

From 585b31a034fc413b99fb8bc2b1ea400e9a98316a Mon Sep 17 00:00:00 2001
From: bbimber <bbimber@gmail.com>
Date: Sat, 25 Jan 2025 16:12:58 -0800
Subject: [PATCH 40/59] Widen NimbleAppendPanel

---
 .../resources/web/singlecell/panel/NimbleAppendPanel.js       | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/singlecell/resources/web/singlecell/panel/NimbleAppendPanel.js b/singlecell/resources/web/singlecell/panel/NimbleAppendPanel.js
index 7712dc1c0..1f748636d 100644
--- a/singlecell/resources/web/singlecell/panel/NimbleAppendPanel.js
+++ b/singlecell/resources/web/singlecell/panel/NimbleAppendPanel.js
@@ -10,7 +10,7 @@ Ext4.define('SingleCell.panel.NimbleAppendPanel', {
 	initComponent: function(){
 		Ext4.apply(this, {
 			style: 'padding: 10px;margins: 5px;',
-            minWidth: 850,
+            minWidth: 950,
 			border: true,
 			items: [{
 				html: 'This step will query nimble results for the selected genome(s). It will then append these results to the seurat object on the target assay.',
@@ -20,7 +20,7 @@ Ext4.define('SingleCell.panel.NimbleAppendPanel', {
 			},{
 				xtype: 'ldk-gridpanel',
 				clicksToEdit: 1,
-				width: 775,
+				width: 875,
 				tbar: [{
 					text: 'Add',
 					handler: function(btn){

From 46f5727e5815888b3b45c4452bce4824a616d8a5 Mon Sep 17 00:00:00 2001
From: bbimber <bbimber@gmail.com>
Date: Sun, 26 Jan 2025 12:53:31 -0800
Subject: [PATCH 41/59] Support additional params for nimble append

---
 singlecell/resources/chunks/AppendNimble.R    |  3 ++-
 .../web/singlecell/panel/NimbleAppendPanel.js | 21 +++++++++++----
 .../pipeline/singlecell/AppendNimble.java     | 27 ++++++++++++++++---
 3 files changed, 41 insertions(+), 10 deletions(-)

diff --git a/singlecell/resources/chunks/AppendNimble.R b/singlecell/resources/chunks/AppendNimble.R
index 97023dea0..d24ac73bf 100644
--- a/singlecell/resources/chunks/AppendNimble.R
+++ b/singlecell/resources/chunks/AppendNimble.R
@@ -19,7 +19,8 @@ for (datasetId in names(seuratObjects)) {
   for (genomeId in names(nimbleGenomes)) {
     maxAmbiguityAllowed <- nimbleGenomeAmbiguousPreference[[genomeId]]
     queryDatabaseForLineageUpdates <- queryDatabaseForLineageUpdatesPreference[[genomeId]]
-    seuratObj <- Rdiscvr::DownloadAndAppendNimble(seuratObject = seuratObj, allowableGenomes = genomeId, ensureSamplesShareAllGenomes = ensureSamplesShareAllGenomes, targetAssayName = nimbleGenomes[[genomeId]], enforceUniqueFeatureNames = TRUE, maxAmbiguityAllowed = maxAmbiguityAllowed, maxLibrarySizeRatio = maxLibrarySizeRatio, queryDatabaseForLineageUpdates = queryDatabaseForLineageUpdates)
+    replaceExistingAssayData <- replaceExistingAssayDataByGenome[[genomeId]]
+    seuratObj <- Rdiscvr::DownloadAndAppendNimble(seuratObject = seuratObj, allowableGenomes = genomeId, ensureSamplesShareAllGenomes = ensureSamplesShareAllGenomes, targetAssayName = nimbleGenomes[[genomeId]], enforceUniqueFeatureNames = TRUE, maxAmbiguityAllowed = maxAmbiguityAllowed, maxLibrarySizeRatio = maxLibrarySizeRatio, queryDatabaseForLineageUpdates = queryDatabaseForLineageUpdates, replaceExistingAssayData = replaceExistingAssayData)
   }
 
   saveData(seuratObj, datasetId)
diff --git a/singlecell/resources/web/singlecell/panel/NimbleAppendPanel.js b/singlecell/resources/web/singlecell/panel/NimbleAppendPanel.js
index 1f748636d..9f346ff14 100644
--- a/singlecell/resources/web/singlecell/panel/NimbleAppendPanel.js
+++ b/singlecell/resources/web/singlecell/panel/NimbleAppendPanel.js
@@ -10,7 +10,7 @@ Ext4.define('SingleCell.panel.NimbleAppendPanel', {
 	initComponent: function(){
 		Ext4.apply(this, {
 			style: 'padding: 10px;margins: 5px;',
-            minWidth: 950,
+            minWidth: 1025,
 			border: true,
 			items: [{
 				html: 'This step will query nimble results for the selected genome(s). It will then append these results to the seurat object on the target assay.',
@@ -20,7 +20,7 @@ Ext4.define('SingleCell.panel.NimbleAppendPanel', {
 			},{
 				xtype: 'ldk-gridpanel',
 				clicksToEdit: 1,
-				width: 875,
+				width: 1000,
 				tbar: [{
 					text: 'Add',
 					handler: function(btn){
@@ -40,7 +40,7 @@ Ext4.define('SingleCell.panel.NimbleAppendPanel', {
 				},LABKEY.ext4.GRIDBUTTONS.DELETERECORD()],
 				store: {
 					type: 'array',
-					fields: ['genomeId', 'targetAssay','maxAmbiguityAllowed', 'queryDatabaseForLineageUpdates']
+					fields: ['genomeId', 'targetAssay','maxAmbiguityAllowed', 'queryDatabaseForLineageUpdates', 'replaceExistingAssayData']
 				},
 				columns: [{
 					dataIndex: 'genomeId',
@@ -86,6 +86,16 @@ Ext4.define('SingleCell.panel.NimbleAppendPanel', {
 						allowBlank: true,
 						value: false
 					}
+				},{
+					dataIndex: 'replaceExistingAssayData',
+					width: 150,
+					header: 'Replace Existing Data?',
+					editor: {
+						xtype: 'checkbox',
+						allowBlank: true,
+						value: true
+					}
+
 				}]
 			}]
 		});
@@ -96,7 +106,7 @@ Ext4.define('SingleCell.panel.NimbleAppendPanel', {
 	getValue: function(){
 		var ret = [];
 		this.down('ldk-gridpanel').store.each(function(r, i) {
-			ret.push([r.data.genomeId, r.data.targetAssay, r.data.maxAmbiguityAllowed ?? '', !!r.data.queryDatabaseForLineageUpdates]);
+			ret.push([r.data.genomeId, r.data.targetAssay, r.data.maxAmbiguityAllowed ?? '', !!r.data.queryDatabaseForLineageUpdates], !!r.data.replaceExistingAssayData);
 		}, this);
 
 		return Ext4.isEmpty(ret) ? null : JSON.stringify(ret);
@@ -133,7 +143,8 @@ Ext4.define('SingleCell.panel.NimbleAppendPanel', {
 					genomeId: row[0],
 					targetAssay: row[1],
 					maxAmbiguityAllowed: row[2],
-					queryDatabaseForLineageUpdates: !!row[3]
+					queryDatabaseForLineageUpdates: !!row[3],
+					replaceExistingAssayData: !!row[4]
 				});
 				grid.store.add(rec);
 			}, this);
diff --git a/singlecell/src/org/labkey/singlecell/pipeline/singlecell/AppendNimble.java b/singlecell/src/org/labkey/singlecell/pipeline/singlecell/AppendNimble.java
index 087abeac6..093c56e44 100644
--- a/singlecell/src/org/labkey/singlecell/pipeline/singlecell/AppendNimble.java
+++ b/singlecell/src/org/labkey/singlecell/pipeline/singlecell/AppendNimble.java
@@ -77,7 +77,7 @@ protected Chunk createParamChunk(SequenceOutputHandler.JobContext ctx, List<Seur
         for (int i = 0; i < json.length(); i++)
         {
             JSONArray arr = json.getJSONArray(i);
-            if (arr.length() != 4)
+            if (arr.length() != 5)
             {
                 throw new PipelineJobException("Unexpected value: " + json.get(i));
             }
@@ -96,7 +96,7 @@ protected Chunk createParamChunk(SequenceOutputHandler.JobContext ctx, List<Seur
         for (int i = 0; i < json.length(); i++)
         {
             JSONArray arr = json.getJSONArray(i);
-            if (arr.length() < 3)
+            if (arr.length() != 5)
             {
                 throw new PipelineJobException("Unexpected value: " + json.getString(i));
             }
@@ -118,13 +118,32 @@ protected Chunk createParamChunk(SequenceOutputHandler.JobContext ctx, List<Seur
         for (int i = 0; i < json.length(); i++)
         {
             JSONArray arr = json.getJSONArray(i);
-            if (arr.length() != 4)
+            if (arr.length() != 5)
             {
                 throw new PipelineJobException("Unexpected value: " + json.getString(i));
             }
 
             int genomeId = arr.getInt(0);
-            String valStr = arr.get(2) == null ? null : StringUtils.trimToNull(String.valueOf(arr.get(2)));
+            String valStr = arr.get(3) == null ? null : StringUtils.trimToNull(String.valueOf(arr.get(2)));
+            boolean val = Boolean.parseBoolean(valStr);
+
+            ret.bodyLines.add("\t" + delim + "'" + genomeId + "' = " + (val ? "TRUE" : "FALSE"));
+            delim = ",";
+        }
+        ret.bodyLines.add(")");
+
+        ret.bodyLines.add("replaceExistingAssayDataByGenome <- list(");
+        delim = "";
+        for (int i = 0; i < json.length(); i++)
+        {
+            JSONArray arr = json.getJSONArray(i);
+            if (arr.length() != 5)
+            {
+                throw new PipelineJobException("Unexpected value: " + json.getString(i));
+            }
+
+            int genomeId = arr.getInt(0);
+            String valStr = arr.get(4) == null ? null : StringUtils.trimToNull(String.valueOf(arr.get(2)));
             boolean val = Boolean.parseBoolean(valStr);
 
             ret.bodyLines.add("\t" + delim + "'" + genomeId + "' = " + (val ? "TRUE" : "FALSE"));

From a569b4696dd115fe92d51f1a8e005607b26cc903 Mon Sep 17 00:00:00 2001
From: bbimber <bbimber@gmail.com>
Date: Mon, 27 Jan 2025 09:58:30 -0800
Subject: [PATCH 42/59] Bugfix to nimble append

---
 .../labkey/singlecell/pipeline/singlecell/AppendNimble.java   | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/singlecell/src/org/labkey/singlecell/pipeline/singlecell/AppendNimble.java b/singlecell/src/org/labkey/singlecell/pipeline/singlecell/AppendNimble.java
index 093c56e44..106464839 100644
--- a/singlecell/src/org/labkey/singlecell/pipeline/singlecell/AppendNimble.java
+++ b/singlecell/src/org/labkey/singlecell/pipeline/singlecell/AppendNimble.java
@@ -124,7 +124,7 @@ protected Chunk createParamChunk(SequenceOutputHandler.JobContext ctx, List<Seur
             }
 
             int genomeId = arr.getInt(0);
-            String valStr = arr.get(3) == null ? null : StringUtils.trimToNull(String.valueOf(arr.get(2)));
+            String valStr = arr.get(3) == null ? null : StringUtils.trimToNull(String.valueOf(arr.get(3)));
             boolean val = Boolean.parseBoolean(valStr);
 
             ret.bodyLines.add("\t" + delim + "'" + genomeId + "' = " + (val ? "TRUE" : "FALSE"));
@@ -143,7 +143,7 @@ protected Chunk createParamChunk(SequenceOutputHandler.JobContext ctx, List<Seur
             }
 
             int genomeId = arr.getInt(0);
-            String valStr = arr.get(4) == null ? null : StringUtils.trimToNull(String.valueOf(arr.get(2)));
+            String valStr = arr.get(4) == null ? null : StringUtils.trimToNull(String.valueOf(arr.get(4)));
             boolean val = Boolean.parseBoolean(valStr);
 
             ret.bodyLines.add("\t" + delim + "'" + genomeId + "' = " + (val ? "TRUE" : "FALSE"));

From fa50992cf38f333faf77b78ce7309a7707db90cd Mon Sep 17 00:00:00 2001
From: bbimber <bbimber@gmail.com>
Date: Mon, 27 Jan 2025 11:25:32 -0800
Subject: [PATCH 43/59] Allow nimble plot to run and not produce an HTML file

---
 .../org/labkey/singlecell/run/NimbleHelper.java    | 14 +++-----------
 1 file changed, 3 insertions(+), 11 deletions(-)

diff --git a/singlecell/src/org/labkey/singlecell/run/NimbleHelper.java b/singlecell/src/org/labkey/singlecell/run/NimbleHelper.java
index d6cf466a1..b48c5cb14 100644
--- a/singlecell/src/org/labkey/singlecell/run/NimbleHelper.java
+++ b/singlecell/src/org/labkey/singlecell/run/NimbleHelper.java
@@ -292,17 +292,9 @@ public void doNimbleAlign(File bam, PipelineStepOutput output, Readset rs, Strin
 
             output.addSequenceOutput(results, basename + ": nimble align", "Nimble Results", rs.getRowId(), null, genome.getGenomeId(), description);
 
+            // NOTE: situations like zero alignments would result in no report being created. Rely on the code in doAlign to verify proper execution of nimble
             File reportHtml = getReportHtmlFileFromResults(results);
-            if (!reportHtml.exists())
-            {
-                if (SequencePipelineService.get().hasMinLineCount(results, 2))
-                {
-                    long lineCount = SequencePipelineService.get().getLineCount(results);
-                    _ctx.getLogger().debug("Found {} lines in file {}", lineCount, results.getPath());
-                    throw new PipelineJobException("Unable to find file: " + reportHtml.getPath());
-                }
-            }
-            else
+            if (reportHtml.exists())
             {
                 output.addSequenceOutput(reportHtml, basename + ": nimble report", NIMBLE_REPORT_CATEGORY, rs.getRowId(), null, genome.getGenomeId(), description);
             }
@@ -554,7 +546,7 @@ public static File runNimbleReport(File alignResultsGz, int genomeId, PipelineSt
 
             if (!plotResultsHtml.exists())
             {
-                throw new PipelineJobException("Missing file: " + plotResultsHtml.getPath());
+                ctx.getLogger().info("No report HTML generated, but nimble plot had exit code 0");
             }
         }
         else

From 41739623f97f07ba2de8e2e49fd64757c91a446a Mon Sep 17 00:00:00 2001
From: bbimber <bbimber@gmail.com>
Date: Tue, 28 Jan 2025 12:34:43 -0800
Subject: [PATCH 44/59] Support additional studies

---
 .../pipeline_code/sequence_tools_install.sh    | 18 +++++++++---------
 singlecell/resources/chunks/StudyMetadata.R    |  4 ++++
 .../pipeline/singlecell/StudyMetadata.java     |  2 +-
 3 files changed, 14 insertions(+), 10 deletions(-)

diff --git a/SequenceAnalysis/pipeline_code/sequence_tools_install.sh b/SequenceAnalysis/pipeline_code/sequence_tools_install.sh
index d3d768a04..3e31985f4 100755
--- a/SequenceAnalysis/pipeline_code/sequence_tools_install.sh
+++ b/SequenceAnalysis/pipeline_code/sequence_tools_install.sh
@@ -985,15 +985,15 @@ then
     rm -Rf $LKTOOLS_DIR/blast_formatter
     rm -Rf $LKTOOLS_DIR/makeblastdb
 
-    wget $WGET_OPTS ftp://ftp.ncbi.nlm.nih.gov/blast/executables/blast+/2.2.31/ncbi-blast-2.2.31+-x64-linux.tar.gz
-    gunzip ncbi-blast-2.2.31+-x64-linux.tar.gz
-    tar -xf ncbi-blast-2.2.31+-x64-linux.tar
-    gzip ncbi-blast-2.2.31+-x64-linux.tar
-
-    install ./ncbi-blast-2.2.31+/bin/blastn $LKTOOLS_DIR/blastn
-    install ./ncbi-blast-2.2.31+/bin/blast_formatter $LKTOOLS_DIR/blast_formatter
-    install ./ncbi-blast-2.2.31+/bin/makeblastdb $LKTOOLS_DIR/makeblastdb
-    install ./ncbi-blast-2.2.31+/bin/makembindex $LKTOOLS_DIR/makembindex
+    wget $WGET_OPTS ftp://ftp.ncbi.nlm.nih.gov/blast/executables/blast+/2.16.0/ncbi-blast-2.16.0+-x64-linux.tar.gz
+    gunzip ncbi-blast-2.16.0+-x64-linux.tar.gz
+    tar -xf ncbi-blast-2.16.0+-x64-linux.tar
+    gzip ncbi-blast-2.16.0+-x64-linux.tar
+
+    install ./ncbi-blast-2.16.0+/bin/blastn $LKTOOLS_DIR/blastn
+    install ./ncbi-blast-2.16.0+/bin/blast_formatter $LKTOOLS_DIR/blast_formatter
+    install ./ncbi-blast-2.16.0+/bin/makeblastdb $LKTOOLS_DIR/makeblastdb
+    install ./ncbi-blast-2.16.0+/bin/makembindex $LKTOOLS_DIR/makembindex
 else
     echo "Already installed"
 fi
diff --git a/singlecell/resources/chunks/StudyMetadata.R b/singlecell/resources/chunks/StudyMetadata.R
index fd8d4e931..b64a2c66e 100644
--- a/singlecell/resources/chunks/StudyMetadata.R
+++ b/singlecell/resources/chunks/StudyMetadata.R
@@ -21,6 +21,10 @@ for (datasetId in names(seuratObjects)) {
         seuratObj <- Rdiscvr::ApplyPC531Metadata(seuratObj, errorIfUnknownIdsFound = errorIfUnknownIdsFound)
     } else if (studyName == 'AcuteNx') {
         seuratObj <- Rdiscvr::ApplyAcuteNxMetadata(seuratObj, errorIfUnknownIdsFound = errorIfUnknownIdsFound)
+    } else if (studyName == 'EC') {
+        seuratObj <- Rdiscvr::ApplyEC_Metadata(seuratObj, errorIfUnknownIdsFound = errorIfUnknownIdsFound)
+    } else if (studyName == 'PPG_Stims') {
+        seuratObj <- Rdiscvr::ApplyPPG_Stim_Metadata(seuratObj, errorIfUnknownIdsFound = errorIfUnknownIdsFound)
     } else {
         stop(paste0('Unknown study: ', studyName))
     }
diff --git a/singlecell/src/org/labkey/singlecell/pipeline/singlecell/StudyMetadata.java b/singlecell/src/org/labkey/singlecell/pipeline/singlecell/StudyMetadata.java
index 666250a2a..27050d587 100644
--- a/singlecell/src/org/labkey/singlecell/pipeline/singlecell/StudyMetadata.java
+++ b/singlecell/src/org/labkey/singlecell/pipeline/singlecell/StudyMetadata.java
@@ -24,7 +24,7 @@ public Provider()
                     {{
                         put("multiSelect", false);
                         put("allowBlank", false);
-                        put("storeValues", "PC475;PC531;TB;Malaria;AcuteNx");
+                        put("storeValues", "PC475;PC531;TB;Malaria;AcuteNx;EC;PPG_Stims");
                         put("delimiter", ";");
                     }}, null, null, false, false),
                     SeuratToolParameter.create("errorIfUnknownIdsFound", "Error If Unknown Ids Found", "If true, the job will fail if the seurat object contains ID not present in the metadata", "checkbox", null, true)

From 00da0725faf89b5c01c3c936de9ede5f32c610bb Mon Sep 17 00:00:00 2001
From: bbimber <bbimber@gmail.com>
Date: Tue, 28 Jan 2025 19:25:25 -0800
Subject: [PATCH 45/59] Support nimble append

---
 .../chunks/PerformDefaultNimbleAppend.R       | 26 ++++++++++++++
 .../labkey/singlecell/SingleCellModule.java   |  2 ++
 .../PerformDefaultNimbleAppend.java           | 34 +++++++++++++++++++
 3 files changed, 62 insertions(+)
 create mode 100644 singlecell/resources/chunks/PerformDefaultNimbleAppend.R
 create mode 100644 singlecell/src/org/labkey/singlecell/pipeline/singlecell/PerformDefaultNimbleAppend.java

diff --git a/singlecell/resources/chunks/PerformDefaultNimbleAppend.R b/singlecell/resources/chunks/PerformDefaultNimbleAppend.R
new file mode 100644
index 000000000..90e0b659b
--- /dev/null
+++ b/singlecell/resources/chunks/PerformDefaultNimbleAppend.R
@@ -0,0 +1,26 @@
+netRc <- paste0(Sys.getenv('USER_HOME'), '/.netrc')
+if (!file.exists(netRc)) {
+  print(list.files(Sys.getenv('USER_HOME')))
+  stop(paste0('Unable to find file: ', netRc))
+}
+
+invisible(Rlabkey::labkey.setCurlOptions(NETRC_FILE = netRc))
+Rdiscvr::SetLabKeyDefaults(baseUrl = serverBaseUrl, defaultFolder = defaultLabKeyFolder)
+
+# NOTE: this file is created by DownloadAndAppendNimble if there was an error. It might exist if a job failed and then was restarted
+if (file.exists('debug.nimble.txt.gz')) {
+  unlink('debug.nimble.txt.gz')
+}
+
+for (datasetId in names(seuratObjects)) {
+  printName(datasetId)
+  seuratObj <- readSeuratRDS(seuratObjects[[datasetId]])
+
+  Rdiscvr::PerformDefaultNimbleAppend(seuratObj)
+
+  saveData(seuratObj, datasetId)
+
+  # Cleanup
+  rm(seuratObj)
+  gc()
+}
\ No newline at end of file
diff --git a/singlecell/src/org/labkey/singlecell/SingleCellModule.java b/singlecell/src/org/labkey/singlecell/SingleCellModule.java
index 5588d1677..724b15efa 100644
--- a/singlecell/src/org/labkey/singlecell/SingleCellModule.java
+++ b/singlecell/src/org/labkey/singlecell/SingleCellModule.java
@@ -71,6 +71,7 @@
 import org.labkey.singlecell.pipeline.singlecell.IntegrateData;
 import org.labkey.singlecell.pipeline.singlecell.MergeSeurat;
 import org.labkey.singlecell.pipeline.singlecell.NormalizeAndScale;
+import org.labkey.singlecell.pipeline.singlecell.PerformDefaultNimbleAppend;
 import org.labkey.singlecell.pipeline.singlecell.PhenotypePlots;
 import org.labkey.singlecell.pipeline.singlecell.PlotAssayFeatures;
 import org.labkey.singlecell.pipeline.singlecell.PlotAverageCiteSeqCounts;
@@ -293,6 +294,7 @@ public static void registerPipelineSteps()
         SequencePipelineService.get().registerPipelineStep(new StudyMetadata.Provider());
         SequencePipelineService.get().registerPipelineStep(new UpdateSeuratPrototype.Provider());
         SequencePipelineService.get().registerPipelineStep(new RunDecoupler.Provider());
+        SequencePipelineService.get().registerPipelineStep(new PerformDefaultNimbleAppend.Provider());
 
         SequenceAnalysisService.get().registerReadsetListener(new SingleCellReadsetListener());
     }
diff --git a/singlecell/src/org/labkey/singlecell/pipeline/singlecell/PerformDefaultNimbleAppend.java b/singlecell/src/org/labkey/singlecell/pipeline/singlecell/PerformDefaultNimbleAppend.java
new file mode 100644
index 000000000..f9a5ca8b7
--- /dev/null
+++ b/singlecell/src/org/labkey/singlecell/pipeline/singlecell/PerformDefaultNimbleAppend.java
@@ -0,0 +1,34 @@
+package org.labkey.singlecell.pipeline.singlecell;
+
+import org.labkey.api.sequenceanalysis.pipeline.AbstractPipelineStepProvider;
+import org.labkey.api.sequenceanalysis.pipeline.PipelineContext;
+import org.labkey.api.singlecell.pipeline.SingleCellStep;
+
+public class PerformDefaultNimbleAppend extends AbstractRDiscvrStep
+{
+    public PerformDefaultNimbleAppend(PipelineContext ctx, PerformDefaultNimbleAppend.Provider provider)
+    {
+        super(provider, ctx);
+    }
+
+    public static class Provider extends AbstractPipelineStepProvider<SingleCellStep>
+    {
+        public Provider()
+        {
+            super("PerformDefaultNimbleAppend", "Default Nimble Append", "RDiscvr", "This uses Rdiscvr to run the default nimble append, adding MHC, KIR, NKG, Viral and Ig data", null, null, null);
+        }
+
+        @Override
+        public PerformDefaultNimbleAppend create(PipelineContext ctx)
+        {
+            return new PerformDefaultNimbleAppend(ctx, this);
+        }
+    }
+
+    @Override
+    public String getFileSuffix()
+    {
+        return "nbl";
+    }
+}
+

From ec32afaf0bc0ede34799e68eb95199646b7ae235 Mon Sep 17 00:00:00 2001
From: bbimber <bbimber@gmail.com>
Date: Thu, 30 Jan 2025 18:18:04 -0800
Subject: [PATCH 46/59] Bugfix to PerformDefaultNimbleAppend

---
 singlecell/resources/chunks/PerformDefaultNimbleAppend.R | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/singlecell/resources/chunks/PerformDefaultNimbleAppend.R b/singlecell/resources/chunks/PerformDefaultNimbleAppend.R
index 90e0b659b..f4f37dcd1 100644
--- a/singlecell/resources/chunks/PerformDefaultNimbleAppend.R
+++ b/singlecell/resources/chunks/PerformDefaultNimbleAppend.R
@@ -16,7 +16,7 @@ for (datasetId in names(seuratObjects)) {
   printName(datasetId)
   seuratObj <- readSeuratRDS(seuratObjects[[datasetId]])
 
-  Rdiscvr::PerformDefaultNimbleAppend(seuratObj)
+  seuratObj <- Rdiscvr::PerformDefaultNimbleAppend(seuratObj)
 
   saveData(seuratObj, datasetId)
 

From d8a8d9a8d579571bfc5658f51145353ecdbc560a Mon Sep 17 00:00:00 2001
From: bbimber <bbimber@gmail.com>
Date: Fri, 31 Jan 2025 09:51:23 -0800
Subject: [PATCH 47/59] Rework MergeSeurat to handle large data better

---
 singlecell/resources/chunks/MergeSeurat.R     | 111 +++++++++++-------
 .../pipeline/singlecell/MergeSeurat.java      |   3 +-
 2 files changed, 68 insertions(+), 46 deletions(-)

diff --git a/singlecell/resources/chunks/MergeSeurat.R b/singlecell/resources/chunks/MergeSeurat.R
index 8b0d72d7c..c4d353ae5 100644
--- a/singlecell/resources/chunks/MergeSeurat.R
+++ b/singlecell/resources/chunks/MergeSeurat.R
@@ -5,15 +5,17 @@ if (!doDiet && length(seuratObjects) > 20 && !disableAutoDietSeurat) {
     doDiet <- TRUE
 }
 
-mergeBatch <- function(dat) {
+filesToDelete <- c()
+
+mergeBatchInMemory <- function(datasetIdToFilePath, saveFile) {
     toMerge <- list()
-    for (datasetId in names(dat)) {
+    for (datasetId in names(datasetIdToFilePath)) {
         print(paste0('Loading: ', datasetId))
         if (doDiet) {
-            toMerge[[datasetId]] <- Seurat::DietSeurat(readSeuratRDS(dat[[datasetId]]))
+            toMerge[[datasetId]] <- Seurat::DietSeurat(readSeuratRDS(datasetIdToFilePath[[datasetId]]))
             gc()
         } else {
-            toMerge[[datasetId]] <- readSeuratRDS(dat[[datasetId]])
+            toMerge[[datasetId]] <- readSeuratRDS(datasetIdToFilePath[[datasetId]])
         }
 
         if (ncol(toMerge[[datasetId]]) == 1) {
@@ -38,59 +40,78 @@ mergeBatch <- function(dat) {
     }
 
     seuratObj <- CellMembrane::MergeSeuratObjs(toMerge, projectName = projectName, doGC = doDiet, errorOnBarcodeSuffix = errorOnBarcodeSuffix)
-    return(seuratObj)
+    saveRDS(seuratObj, file = saveFile)
+    filesToDelete <<- c(filesToDelete, saveFile)
+
+    return(fn)
 }
 
-if (length(seuratObjects) == 1) {
-    print('There is only one seurat object, no need to merge')
-    datasetId <- names(seuratObjects)[[1]]
-    saveData(readSeuratRDS(seuratObjects[[datasetId]]), datasetId)
-} else {
-    batchSize <- 20
-    numBatches <- ceiling(length(seuratObjects) / batchSize)
+mergeBatch <- function(seuratObjects, outerBatchIdx, maxBatchSize = 20, maxInputFileSizeMb = maxAllowableInputFileSizeMb) {
+    logger::log_info(paste0('Beginning outer batch: ', outerBatchIdx, ' with total files: ', length(seuratObjects)))
+
+    if (length(seuratObjects) == 1) {
+        print('Single file, nothing to do')
+        return(seuratObjects)
+    }
+
+    # Phase 1: group into batches:
+    batchList <- list()
+    activeBatch <- c()
+    sizeOfBatch <- 0
+    batchIdx <- 1
+    for (datasetId in names(seuratObjects)) {
+        activeBatch <- c(activeBatch, seuratObjects[[datasetId]])
+        sizeInMb <- (file.size(seuratObjects[[datasetId]]) / 1024^2)
+        sizeOfBatch <- sizeOfBatch + sizeInMb
+
+        if (length(activeBatch) >= maxBatchSize || (sizeOfBatch >= maxInputFileSizeMb && length(activeBatch) > 1)) {
+            logger::log_info(paste0('adding to batch with ', length(activeBatch), ' files and ', sizeOfBatch, 'MB'))
+            batchList[batchIdx] <- activeBatch
+            activeBatch <- c()
+            sizeOfBatch <- 0
+            batchIdx <- batchIdx + 1
+            next
+        }
+    }
+
+    # Account for final files:
+    if (length(activeBatch) > 0) {
+        logger::log_info(paste0('finalizing batch with ', length(activeBatch), ' files and ', sizeOfBatch, 'MB'))
+        batchList[batchIdx] <- activeBatch
+    }
+
+    if (length(batchList) == 0){
+        stop('Error: zero length batchList')
+    }
+
     mergedObjectFiles <- list()
-    for (i in 1:numBatches) {
-        logger::log_info(paste0('Merging batch ', i, ' of ', numBatches))
-        start <- 1 + (i-1)*batchSize
-        end <- min(start+batchSize-1, length(seuratObjects))
-        logger::log_info(paste0('processing: ', start, ' to ', end, ' of ', length(seuratObjects)))
+    for (i in 1:length(batchList)) {
+        activeBatch <- batchList[[i]]
+        logger::log_info(paste0('Merging inner batch ', i, ' of ', length(batchList), ' with ', length(activeBatch), ' files'))
 
-        fn <- paste0('mergeBatch.', i, '.rds')
-        saveRDS(mergeBatch(seuratObjects[start:end]), file = fn)
-        mergedObjectFiles[[i]] <- fn
+        saveFile <- paste0('merge.', outerBatchIdx, '.', i, '.rds')
+        mergedObjectFiles[[i]] <- mergeBatchInMemory(activeBatch, saveFile = saveFile)
 
         logger::log_info(paste0('mem used: ', R.utils::hsize(as.numeric(pryr::mem_used()))))
         gc()
         logger::log_info(paste0('after gc: ', R.utils::hsize(as.numeric(pryr::mem_used()))))
     }
+    logger::log_info('Done with inner batch')
 
-    logger::log_info('Done with batches')
-    if (length(mergedObjectFiles) == 1) {
-        seuratObj <- readRDS(mergedObjectFiles[[1]])
-        unlink(mergedObjectFiles[[1]])
-    } else {
-        logger::log_info('performing final merge')
-        # TODO: check for single cell in object
-        seuratObj <- readRDS(mergedObjectFiles[[1]])
-        unlink(mergedObjectFiles[[1]])
-
-        for (i in 2:length(mergedObjectFiles)) {
-            logger::log_info(paste0('Merging final file ', i, ' of ', length(mergedObjectFiles)))
-            seuratObj <- merge(x = seuratObj, y = readRDS(mergedObjectFiles[[i]]), project = seuratObj@project.name)
-            if (HasSplitLayers(seuratObj)) {
-                seuratObj <- MergeSplitLayers(seuratObj)
-            }
+    if (length(mergedObjectFiles) > 1) {
+        return(mergeBatch(mergedObjectFiles, outerBatchIdx = (outerBatchIdx + 1), maxInputFileSizeMb = maxInputFileSizeMb, maxBatchSize = maxBatchSize))
+    }
 
-            unlink(mergedObjectFiles[[i]])
+    return(mergedObjectFiles)
+}
 
-            logger::log_info(paste0('mem used: ', R.utils::hsize(as.numeric(pryr::mem_used()))))
-            logger::log_info(paste0('seurat object: ', R.utils::hsize(as.numeric(utils::object.size(seuratObj)))))
-            gc()
-            logger::log_info(paste0('after gc: ', R.utils::hsize(as.numeric(pryr::mem_used()))))
-        }
-    }
+mergedObjectFiles <- mergeBatch(seuratObjects, outerBatchIdx = 1)
 
-    gc()
+print('Overall merge complete')
+gc()
+saveData(seuratObjMerged, projectName)
 
-    saveData(seuratObj, projectName)
+# Cleanup:
+for (fn in filesToDelete) {
+    unlink(fn)
 }
\ No newline at end of file
diff --git a/singlecell/src/org/labkey/singlecell/pipeline/singlecell/MergeSeurat.java b/singlecell/src/org/labkey/singlecell/pipeline/singlecell/MergeSeurat.java
index b27f985b9..993590b79 100644
--- a/singlecell/src/org/labkey/singlecell/pipeline/singlecell/MergeSeurat.java
+++ b/singlecell/src/org/labkey/singlecell/pipeline/singlecell/MergeSeurat.java
@@ -35,7 +35,8 @@ public Provider()
                         put("height", 150);
                         put("delimiter", ",");
                         put("stripCharsRe", "/(^['\"]+)|(['\"]+$)/g");
-                    }}, "RNA.orig").delimiter(",")
+                    }}, "RNA.orig").delimiter(","),
+                    SeuratToolParameter.create("maxAllowableInputFileSizeMb", "Max Allowable Batch Size (MB)", "The largest allowable amount of data (in MB), measured as the size of the RDS files, to be allowed in one unit of data to merge in memory.", "ldk-integerfield", null, 200, "maxAllowableInputFileSizeMb", true, false)
             ), null, null);
         }
 

From cde358a3ece1ba8eaf9fe639043153893d4997b9 Mon Sep 17 00:00:00 2001
From: bbimber <bbimber@gmail.com>
Date: Fri, 31 Jan 2025 11:40:03 -0800
Subject: [PATCH 48/59] Bugfix to MergeSeurat

---
 singlecell/resources/chunks/MergeSeurat.R | 14 +++++++-------
 1 file changed, 7 insertions(+), 7 deletions(-)

diff --git a/singlecell/resources/chunks/MergeSeurat.R b/singlecell/resources/chunks/MergeSeurat.R
index c4d353ae5..820dd112c 100644
--- a/singlecell/resources/chunks/MergeSeurat.R
+++ b/singlecell/resources/chunks/MergeSeurat.R
@@ -56,18 +56,18 @@ mergeBatch <- function(seuratObjects, outerBatchIdx, maxBatchSize = 20, maxInput
 
     # Phase 1: group into batches:
     batchList <- list()
-    activeBatch <- c()
+    activeBatch <- list()
     sizeOfBatch <- 0
     batchIdx <- 1
     for (datasetId in names(seuratObjects)) {
-        activeBatch <- c(activeBatch, seuratObjects[[datasetId]])
+        activeBatch[[datasetId]] <- seuratObjects[[datasetId]]
         sizeInMb <- (file.size(seuratObjects[[datasetId]]) / 1024^2)
         sizeOfBatch <- sizeOfBatch + sizeInMb
 
         if (length(activeBatch) >= maxBatchSize || (sizeOfBatch >= maxInputFileSizeMb && length(activeBatch) > 1)) {
-            logger::log_info(paste0('adding to batch with ', length(activeBatch), ' files and ', sizeOfBatch, 'MB'))
-            batchList[batchIdx] <- activeBatch
-            activeBatch <- c()
+            logger::log_info(paste0('adding to batch with ', length(activeBatch), ' files and ', sizeOfBatch, ' MB'))
+            batchList[[batchIdx]] <- activeBatch
+            activeBatch <- list()
             sizeOfBatch <- 0
             batchIdx <- batchIdx + 1
             next
@@ -76,8 +76,8 @@ mergeBatch <- function(seuratObjects, outerBatchIdx, maxBatchSize = 20, maxInput
 
     # Account for final files:
     if (length(activeBatch) > 0) {
-        logger::log_info(paste0('finalizing batch with ', length(activeBatch), ' files and ', sizeOfBatch, 'MB'))
-        batchList[batchIdx] <- activeBatch
+        logger::log_info(paste0('finalizing batch with ', length(activeBatch), ' files and ', sizeOfBatch, ' MB'))
+        batchList[[batchIdx]] <- activeBatch
     }
 
     if (length(batchList) == 0){

From 3843ffebef3f044017ce0a1847cf6e01fa5db404 Mon Sep 17 00:00:00 2001
From: bbimber <bbimber@gmail.com>
Date: Fri, 31 Jan 2025 13:13:49 -0800
Subject: [PATCH 49/59] Make AppendMetadata more tolerant

---
 singlecell/resources/chunks/AppendMetadata.R | 8 +++++++-
 1 file changed, 7 insertions(+), 1 deletion(-)

diff --git a/singlecell/resources/chunks/AppendMetadata.R b/singlecell/resources/chunks/AppendMetadata.R
index addefee43..f14a25f65 100644
--- a/singlecell/resources/chunks/AppendMetadata.R
+++ b/singlecell/resources/chunks/AppendMetadata.R
@@ -11,7 +11,13 @@ for (datasetId in names(seuratObjects)) {
     printName(datasetId)
     seuratObj <- readSeuratRDS(seuratObjects[[datasetId]])
 
-    seuratObj <- Rdiscvr::QueryAndApplyCdnaMetadata(seuratObj)
+    if ('BarcodePrefix' %in% names(seuratObj@meta.data) && !any(is.na(as.integer(seuratObj$BarcodePrefix)))) {
+        seuratObj <- Rdiscvr::QueryAndApplyCdnaMetadata(seuratObj)
+    } else if ('cDNA_ID' %in% names(seuratObj@meta.data)) {
+        seuratObj <- Rdiscvr::QueryAndApplyMetadataUsingCDNA(seuratObj)
+    } else {
+        stop('Unable to find either BarcodePrefix or cDNA_ID in meta.data')
+    }
 
     saveData(seuratObj, datasetId)
 

From cc57c13352535b3fa311cc122826c8700842ce44 Mon Sep 17 00:00:00 2001
From: bbimber <bbimber@gmail.com>
Date: Fri, 31 Jan 2025 13:23:16 -0800
Subject: [PATCH 50/59] Bugfix to MergeSeurat

---
 singlecell/resources/chunks/MergeSeurat.R | 11 ++++++++---
 1 file changed, 8 insertions(+), 3 deletions(-)

diff --git a/singlecell/resources/chunks/MergeSeurat.R b/singlecell/resources/chunks/MergeSeurat.R
index 820dd112c..294f74498 100644
--- a/singlecell/resources/chunks/MergeSeurat.R
+++ b/singlecell/resources/chunks/MergeSeurat.R
@@ -8,6 +8,10 @@ if (!doDiet && length(seuratObjects) > 20 && !disableAutoDietSeurat) {
 filesToDelete <- c()
 
 mergeBatchInMemory <- function(datasetIdToFilePath, saveFile) {
+    if (all(is.null(names(datasetIdToFilePath)))) {
+        stop('No names provided on datasetIdToFilePath')
+    }
+
     toMerge <- list()
     for (datasetId in names(datasetIdToFilePath)) {
         print(paste0('Loading: ', datasetId))
@@ -43,7 +47,7 @@ mergeBatchInMemory <- function(datasetIdToFilePath, saveFile) {
     saveRDS(seuratObj, file = saveFile)
     filesToDelete <<- c(filesToDelete, saveFile)
 
-    return(fn)
+    return(saveFile)
 }
 
 mergeBatch <- function(seuratObjects, outerBatchIdx, maxBatchSize = 20, maxInputFileSizeMb = maxAllowableInputFileSizeMb) {
@@ -89,8 +93,9 @@ mergeBatch <- function(seuratObjects, outerBatchIdx, maxBatchSize = 20, maxInput
         activeBatch <- batchList[[i]]
         logger::log_info(paste0('Merging inner batch ', i, ' of ', length(batchList), ' with ', length(activeBatch), ' files'))
 
-        saveFile <- paste0('merge.', outerBatchIdx, '.', i, '.rds')
-        mergedObjectFiles[[i]] <- mergeBatchInMemory(activeBatch, saveFile = saveFile)
+        batchName <- paste0('merge.', outerBatchIdx, '.', i)
+        saveFile <- paste0(batchName, '.rds')
+        mergedObjectFiles[[batchName]] <- mergeBatchInMemory(activeBatch, saveFile = saveFile)
 
         logger::log_info(paste0('mem used: ', R.utils::hsize(as.numeric(pryr::mem_used()))))
         gc()

From d6df8203e283df2bfdd3447c46ae4fc5fd71c696 Mon Sep 17 00:00:00 2001
From: bbimber <bbimber@gmail.com>
Date: Sat, 1 Feb 2025 06:37:41 -0800
Subject: [PATCH 51/59] Improvements to MergeSeurat

---
 singlecell/resources/chunks/MergeSeurat.R | 16 ++++++++++------
 1 file changed, 10 insertions(+), 6 deletions(-)

diff --git a/singlecell/resources/chunks/MergeSeurat.R b/singlecell/resources/chunks/MergeSeurat.R
index 294f74498..88e6c6516 100644
--- a/singlecell/resources/chunks/MergeSeurat.R
+++ b/singlecell/resources/chunks/MergeSeurat.R
@@ -43,10 +43,13 @@ mergeBatchInMemory <- function(datasetIdToFilePath, saveFile) {
         stop('There were no passing seurat objects!')
     }
 
-    seuratObj <- CellMembrane::MergeSeuratObjs(toMerge, projectName = projectName, doGC = doDiet, errorOnBarcodeSuffix = errorOnBarcodeSuffix)
-    saveRDS(seuratObj, file = saveFile)
+    saveRDS(CellMembrane::MergeSeuratObjs(toMerge, projectName = projectName, doGC = doDiet, errorOnBarcodeSuffix = errorOnBarcodeSuffix), file = saveFile)
     filesToDelete <<- c(filesToDelete, saveFile)
 
+    logger::log_info(paste0('mem used: ', R.utils::hsize(as.numeric(pryr::mem_used()))))
+    gc()
+    logger::log_info(paste0('after gc: ', R.utils::hsize(as.numeric(pryr::mem_used()))))
+
     return(saveFile)
 }
 
@@ -95,11 +98,12 @@ mergeBatch <- function(seuratObjects, outerBatchIdx, maxBatchSize = 20, maxInput
 
         batchName <- paste0('merge.', outerBatchIdx, '.', i)
         saveFile <- paste0(batchName, '.rds')
+        if (length(activeBatch) == 1){
+            print('Only single file in batch, skipping merge')
+            mergedObjectFiles[[batchName]] <- activeBatch[[1]]
+            next
+        }
         mergedObjectFiles[[batchName]] <- mergeBatchInMemory(activeBatch, saveFile = saveFile)
-
-        logger::log_info(paste0('mem used: ', R.utils::hsize(as.numeric(pryr::mem_used()))))
-        gc()
-        logger::log_info(paste0('after gc: ', R.utils::hsize(as.numeric(pryr::mem_used()))))
     }
     logger::log_info('Done with inner batch')
 

From 4af7b79b4999a0a4673cce6dca944cb0336a0495 Mon Sep 17 00:00:00 2001
From: bbimber <bbimber@gmail.com>
Date: Sat, 1 Feb 2025 12:07:32 -0800
Subject: [PATCH 52/59] Expand orphan pipeline job tests

---
 .../pipeline/OrphanFilePipelineJob.java       | 23 ++++++++++++-------
 1 file changed, 15 insertions(+), 8 deletions(-)

diff --git a/SequenceAnalysis/src/org/labkey/sequenceanalysis/pipeline/OrphanFilePipelineJob.java b/SequenceAnalysis/src/org/labkey/sequenceanalysis/pipeline/OrphanFilePipelineJob.java
index 9ffea44dc..026177561 100644
--- a/SequenceAnalysis/src/org/labkey/sequenceanalysis/pipeline/OrphanFilePipelineJob.java
+++ b/SequenceAnalysis/src/org/labkey/sequenceanalysis/pipeline/OrphanFilePipelineJob.java
@@ -396,11 +396,10 @@ public void getOrphanFilesForContainer(Container c, User u, Set<File> orphanFile
                 }
             }
 
-            //TODO: look for .deleted and /archive
             File deletedDir = new File(root.getRootPath().getParentFile(), ".deleted");
             if (deletedDir.exists())
             {
-                messages.add("## .deleted dir found: " + deletedDir.getPath());
+                getJob().getLogger().warn("WARNING: .deleted dir found: " + deletedDir.getPath());
             }
 
             File assayData = new File(root.getRootPath(), "assaydata");
@@ -513,12 +512,6 @@ private void getOrphanFilesForDirectory(Set<Integer> knownExpDatas, Map<URI, Set
 
             for (File f : arr)
             {
-                //skipped for perf reasons.  extremely unlikely
-                //if (Files.isSymbolicLink(f.toPath()))
-                //{
-                //    continue;
-                //}
-
                 if (f.isDirectory())
                 {
                     if (f.getName().endsWith(".gdb"))
@@ -535,6 +528,20 @@ private void getOrphanFilesForDirectory(Set<Integer> knownExpDatas, Map<URI, Set
                 {
                     //iterate possible issues:
 
+                    // extremely large log (200 MB):
+                    if (f.getName().endsWith(".log") & f.length() > (200*1024*1024))
+                    {
+                        getJob().getLogger().warn("WARNING: Extremely large log file: " + f.getPath() + ", " + FileUtils.byteCountToDisplaySize(f.length()));
+                    }
+
+                    if (f.getName().endsWith(".rds"))
+                    {
+                        if (!dataMap.containsKey(f.toURI()))
+                        {
+                            getJob().getLogger().warn("WARNING: Unknown RDS file: " + f.getPath());
+                        }
+                    }
+
                     //orphan index
                     if (f.getPath().toLowerCase().endsWith(".bai") || f.getPath().toLowerCase().endsWith(".tbi") || f.getPath().toLowerCase().endsWith(".idx") || f.getPath().toLowerCase().endsWith(".crai"))
                     {

From 11fddd3369983cc1cb719e3751de2fe02f0e9cf5 Mon Sep 17 00:00:00 2001
From: bbimber <bbimber@gmail.com>
Date: Sat, 1 Feb 2025 13:44:59 -0800
Subject: [PATCH 53/59] Bugfix to MergeSeurat

---
 singlecell/resources/chunks/MergeSeurat.R | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/singlecell/resources/chunks/MergeSeurat.R b/singlecell/resources/chunks/MergeSeurat.R
index 88e6c6516..134005614 100644
--- a/singlecell/resources/chunks/MergeSeurat.R
+++ b/singlecell/resources/chunks/MergeSeurat.R
@@ -115,10 +115,14 @@ mergeBatch <- function(seuratObjects, outerBatchIdx, maxBatchSize = 20, maxInput
 }
 
 mergedObjectFiles <- mergeBatch(seuratObjects, outerBatchIdx = 1)
+if (length(mergedObjectFiles) != 1) {
+    stop(paste0('Expected single file, found: ', length(mergedObjectFiles)))
+}
 
 print('Overall merge complete')
 gc()
-saveData(seuratObjMerged, projectName)
+
+saveData(readRDS(mergedObjectFiles[[1]]), projectName)
 
 # Cleanup:
 for (fn in filesToDelete) {

From 55b3207713c0a116a2f049431b39876a2e89cf80 Mon Sep 17 00:00:00 2001
From: bbimber <bbimber@gmail.com>
Date: Sun, 2 Feb 2025 10:57:03 -0800
Subject: [PATCH 54/59] Support SVTyper

---
 .../pipeline_code/extra_tools_install.sh      |  18 ++
 .../SequenceAnalysisModule.java               |   2 +
 .../run/alignment/SVTyperStep.java            | 188 ++++++++++++++++++
 3 files changed, 208 insertions(+)
 create mode 100644 SequenceAnalysis/src/org/labkey/sequenceanalysis/run/alignment/SVTyperStep.java

diff --git a/SequenceAnalysis/pipeline_code/extra_tools_install.sh b/SequenceAnalysis/pipeline_code/extra_tools_install.sh
index dae691917..308401cee 100755
--- a/SequenceAnalysis/pipeline_code/extra_tools_install.sh
+++ b/SequenceAnalysis/pipeline_code/extra_tools_install.sh
@@ -274,3 +274,21 @@ then
 else
     echo "Already installed"
 fi
+
+if [[ ! -e ${LKTOOLS_DIR}/svtyper || ! -z $FORCE_REINSTALL ]];
+then
+    echo "Cleaning up previous installs"
+    rm -Rf $LKTOOLS_DIR/svtyper*
+
+    # NOTE: this fork is used to ensure python3 compatibility
+    #python3 -m pip install --user git+https://github.com/hall-lab/svtyper.git
+    python3 -m pip install --user git+https://github.com/bbimber/svtyper.git
+
+    SVTYPER=`which svtyper`
+    ln -s $SVTYPER ${LKTOOLS_DIR}/svtyper
+
+    SVTYPER=`which svtyper-sso`
+    ln -s $SVTYPER ${LKTOOLS_DIR}/svtyper-sso
+else
+    echo "Already installed"
+fi
\ No newline at end of file
diff --git a/SequenceAnalysis/src/org/labkey/sequenceanalysis/SequenceAnalysisModule.java b/SequenceAnalysis/src/org/labkey/sequenceanalysis/SequenceAnalysisModule.java
index 61c9a0fe1..38d53f119 100644
--- a/SequenceAnalysis/src/org/labkey/sequenceanalysis/SequenceAnalysisModule.java
+++ b/SequenceAnalysis/src/org/labkey/sequenceanalysis/SequenceAnalysisModule.java
@@ -101,6 +101,7 @@
 import org.labkey.sequenceanalysis.run.alignment.MosaikWrapper;
 import org.labkey.sequenceanalysis.run.alignment.ParagraphStep;
 import org.labkey.sequenceanalysis.run.alignment.Pbmm2Wrapper;
+import org.labkey.sequenceanalysis.run.alignment.SVTyperStep;
 import org.labkey.sequenceanalysis.run.alignment.StarWrapper;
 import org.labkey.sequenceanalysis.run.alignment.VulcanWrapper;
 import org.labkey.sequenceanalysis.run.analysis.BamIterator;
@@ -403,6 +404,7 @@ public static void registerPipelineSteps()
         SequenceAnalysisService.get().registerFileHandler(new DeepVariantHandler());
         SequenceAnalysisService.get().registerFileHandler(new GLNexusHandler());
         SequenceAnalysisService.get().registerFileHandler(new ParagraphStep());
+        SequenceAnalysisService.get().registerFileHandler(new SVTyperStep());
         SequenceAnalysisService.get().registerFileHandler(new UpdateReadsetFilesHandler());
 
         SequenceAnalysisService.get().registerReadsetHandler(new MultiQCHandler());
diff --git a/SequenceAnalysis/src/org/labkey/sequenceanalysis/run/alignment/SVTyperStep.java b/SequenceAnalysis/src/org/labkey/sequenceanalysis/run/alignment/SVTyperStep.java
new file mode 100644
index 000000000..28bda7b03
--- /dev/null
+++ b/SequenceAnalysis/src/org/labkey/sequenceanalysis/run/alignment/SVTyperStep.java
@@ -0,0 +1,188 @@
+package org.labkey.sequenceanalysis.run.alignment;
+
+import org.apache.commons.io.FileUtils;
+import org.apache.commons.lang3.StringUtils;
+import org.json.JSONObject;
+import org.labkey.api.module.ModuleLoader;
+import org.labkey.api.pipeline.PipelineJob;
+import org.labkey.api.pipeline.PipelineJobException;
+import org.labkey.api.pipeline.RecordedAction;
+import org.labkey.api.sequenceanalysis.SequenceAnalysisService;
+import org.labkey.api.sequenceanalysis.SequenceOutputFile;
+import org.labkey.api.sequenceanalysis.pipeline.AbstractParameterizedOutputHandler;
+import org.labkey.api.sequenceanalysis.pipeline.SequenceAnalysisJobSupport;
+import org.labkey.api.sequenceanalysis.pipeline.SequenceOutputHandler;
+import org.labkey.api.sequenceanalysis.pipeline.SequencePipelineService;
+import org.labkey.api.sequenceanalysis.pipeline.ToolParameterDescriptor;
+import org.labkey.api.sequenceanalysis.run.AbstractCommandWrapper;
+import org.labkey.api.sequenceanalysis.run.SimpleScriptWrapper;
+import org.labkey.sequenceanalysis.SequenceAnalysisModule;
+import org.labkey.sequenceanalysis.util.SequenceUtil;
+
+import java.io.File;
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.List;
+
+//https://github.com/hall-lab/svtyper
+
+public class SVTyperStep extends AbstractParameterizedOutputHandler<SequenceOutputHandler.SequenceOutputProcessor>
+{
+    public SVTyperStep()
+    {
+        super(ModuleLoader.getInstance().getModule(SequenceAnalysisModule.class), "SVTyper SV Genotyping", "This will run SVTyper on one or more BAM files to genotype SVs", null, Arrays.asList(
+                ToolParameterDescriptor.createExpDataParam("svVCF", "Input VCF", "This is the DataId of the VCF containing the SVs to genotype", "ldk-expdatafield", new JSONObject()
+                {{
+                    put("allowBlank", false);
+                }}, null),
+                ToolParameterDescriptor.create("useOutputFileContainer", "Submit to Source File Workbook", "If checked, each job will be submitted to the same workbook as the input file, as opposed to submitting all jobs to the same workbook.  This is primarily useful if submitting a large batch of files to process separately. This only applies if 'Run Separately' is selected.", "checkbox", new JSONObject(){{
+                    put("checked", true);
+                }}, true)
+        ));
+    }
+
+    @Override
+    public boolean doSplitJobs()
+    {
+        return true;
+    }
+
+    @Override
+    public boolean canProcess(SequenceOutputFile o)
+    {
+        return o.getFile() != null && o.getFile().exists() && SequenceUtil.FILETYPE.bamOrCram.getFileType().isType(o.getFile());
+    }
+
+    @Override
+    public boolean doRunRemote()
+    {
+        return true;
+    }
+
+    @Override
+    public boolean doRunLocal()
+    {
+        return false;
+    }
+
+    @Override
+    public SequenceOutputProcessor getProcessor()
+    {
+        return new Processor();
+    }
+
+    public static class Processor implements SequenceOutputProcessor
+    {
+        @Override
+        public void processFilesOnWebserver(PipelineJob job, SequenceAnalysisJobSupport support, List<SequenceOutputFile> inputFiles, JSONObject params, File outputDir, List<RecordedAction> actions, List<SequenceOutputFile> outputsToCreate) throws UnsupportedOperationException, PipelineJobException
+        {
+
+        }
+
+        @Override
+        public void processFilesRemote(List<SequenceOutputFile> inputFiles, JobContext ctx) throws UnsupportedOperationException, PipelineJobException
+        {
+            int svVcfId = ctx.getParams().optInt("svVCF", 0);
+            if (svVcfId == 0)
+            {
+                throw new PipelineJobException("svVCF param was null");
+            }
+
+            File svVcf = ctx.getSequenceSupport().getCachedData(svVcfId);
+            if (svVcf == null)
+            {
+                throw new PipelineJobException("File not found for ID: " + svVcfId);
+            }
+            else if (!svVcf.exists())
+            {
+                throw new PipelineJobException("Missing file: " + svVcf.getPath());
+            }
+
+            Integer threads = SequencePipelineService.get().getMaxThreads(ctx.getLogger());
+
+            for (SequenceOutputFile so : inputFiles)
+            {
+                List<String> jsonArgs = new ArrayList<>();
+                SimpleScriptWrapper wrapper = new SimpleScriptWrapper(ctx.getLogger());
+                jsonArgs.add(AbstractCommandWrapper.resolveFileInPath("svtyper", null, true).getPath());
+                jsonArgs.add("-b");
+                jsonArgs.add(so.getFile().getPath());
+
+                File coverageJson = new File(ctx.getWorkingDirectory(), "bam.json");
+                jsonArgs.add("-l");
+                jsonArgs.add(coverageJson.getPath());
+
+
+                if (threads != null)
+                {
+                    jsonArgs.add("--max_reads");
+                    jsonArgs.add(threads.toString());
+                }
+
+                File doneFile = new File(ctx.getWorkingDirectory(), "json.done");
+                ctx.getFileManager().addIntermediateFile(doneFile);
+                if (doneFile.exists())
+                {
+                    ctx.getLogger().info("BAM json already generated, skipping");
+                }
+                else
+                {
+                    wrapper.execute(jsonArgs);
+                    try
+                    {
+                        FileUtils.touch(doneFile);
+                        ctx.getFileManager().addIntermediateFile(doneFile);
+                    }
+                    catch (IOException e)
+                    {
+                        throw new PipelineJobException(e);
+                    }
+                }
+
+                if (!coverageJson.exists())
+                {
+                    throw new PipelineJobException("Missing file: " + coverageJson.getPath());
+                }
+                ctx.getFileManager().addIntermediateFile(coverageJson);
+
+                List<String> svtyperArgs = new ArrayList<>();
+                svtyperArgs.add(AbstractCommandWrapper.resolveFileInPath("svtyper-sso", null, true).getPath());
+
+                svtyperArgs.add("-i");
+                svtyperArgs.add(svVcf.getPath());
+
+                svtyperArgs.add("-B");
+                svtyperArgs.add(so.getFile().getPath());
+
+                svtyperArgs.add("-l");
+                svtyperArgs.add(coverageJson.getPath());
+
+                if (threads != null)
+                {
+                    svtyperArgs.add("--core");
+                    svtyperArgs.add(threads.toString());
+                }
+
+                File genotypes = new File(ctx.getWorkingDirectory(), SequenceAnalysisService.get().getUnzippedBaseName(so.getName()) + ".svtyper.vcf.gz");
+                wrapper.execute(Arrays.asList("/bin/bash", "-c", StringUtils.join(svtyperArgs, " ") + "| bgzip -c"), ProcessBuilder.Redirect.to(genotypes));
+
+                if (!genotypes.exists())
+                {
+                    throw new PipelineJobException("Missing file: " + genotypes.getPath());
+                }
+
+                try
+                {
+                    SequenceAnalysisService.get().ensureVcfIndex(genotypes, ctx.getLogger());
+                }
+                catch (IOException e)
+                {
+                    throw new PipelineJobException(e);
+                }
+
+                ctx.getFileManager().addSequenceOutput(genotypes, "SVTyper Genotypes: " + so.getName(), "SVTyper Genoypes", so.getReadset(), null, so.getLibrary_id(), "Input VCF: " + svVcf.getName() + " (" + svVcfId + ")");
+            }
+        }
+    }
+}

From 36cf5d8d2ef5fcabc20973510b4a9cfe8ad2064a Mon Sep 17 00:00:00 2001
From: bbimber <bbimber@gmail.com>
Date: Sun, 2 Feb 2025 18:01:00 -0800
Subject: [PATCH 55/59] Bugfix SVTyper

---
 .../org/labkey/sequenceanalysis/run/alignment/SVTyperStep.java  | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/SequenceAnalysis/src/org/labkey/sequenceanalysis/run/alignment/SVTyperStep.java b/SequenceAnalysis/src/org/labkey/sequenceanalysis/run/alignment/SVTyperStep.java
index 28bda7b03..d7d84e101 100644
--- a/SequenceAnalysis/src/org/labkey/sequenceanalysis/run/alignment/SVTyperStep.java
+++ b/SequenceAnalysis/src/org/labkey/sequenceanalysis/run/alignment/SVTyperStep.java
@@ -106,7 +106,7 @@ else if (!svVcf.exists())
                 List<String> jsonArgs = new ArrayList<>();
                 SimpleScriptWrapper wrapper = new SimpleScriptWrapper(ctx.getLogger());
                 jsonArgs.add(AbstractCommandWrapper.resolveFileInPath("svtyper", null, true).getPath());
-                jsonArgs.add("-b");
+                jsonArgs.add("-B");
                 jsonArgs.add(so.getFile().getPath());
 
                 File coverageJson = new File(ctx.getWorkingDirectory(), "bam.json");

From 5110798cf98bd36d0e8195d218cdc425519dff27 Mon Sep 17 00:00:00 2001
From: bbimber <bbimber@gmail.com>
Date: Sun, 2 Feb 2025 18:04:36 -0800
Subject: [PATCH 56/59] Support additional options for mGAP tracks

---
 .../org/labkey/sequenceanalysis/run/alignment/SVTyperStep.java   | 1 -
 1 file changed, 1 deletion(-)

diff --git a/SequenceAnalysis/src/org/labkey/sequenceanalysis/run/alignment/SVTyperStep.java b/SequenceAnalysis/src/org/labkey/sequenceanalysis/run/alignment/SVTyperStep.java
index d7d84e101..584dc0d47 100644
--- a/SequenceAnalysis/src/org/labkey/sequenceanalysis/run/alignment/SVTyperStep.java
+++ b/SequenceAnalysis/src/org/labkey/sequenceanalysis/run/alignment/SVTyperStep.java
@@ -113,7 +113,6 @@ else if (!svVcf.exists())
                 jsonArgs.add("-l");
                 jsonArgs.add(coverageJson.getPath());
 
-
                 if (threads != null)
                 {
                     jsonArgs.add("--max_reads");

From 166cd840e9643e346f2844b094178ba530dba6a1 Mon Sep 17 00:00:00 2001
From: bbimber <bbimber@gmail.com>
Date: Mon, 3 Feb 2025 21:13:07 -0800
Subject: [PATCH 57/59] Support Graphtyper

---
 .../pipeline_code/extra_tools_install.sh      |  15 +-
 .../SequenceAnalysisModule.java               |   2 +
 .../run/alignment/Graphtyper.java             | 149 ++++++++++++++++++
 .../run/alignment/SVTyperStep.java            |   2 +-
 4 files changed, 166 insertions(+), 2 deletions(-)
 create mode 100644 SequenceAnalysis/src/org/labkey/sequenceanalysis/run/alignment/Graphtyper.java

diff --git a/SequenceAnalysis/pipeline_code/extra_tools_install.sh b/SequenceAnalysis/pipeline_code/extra_tools_install.sh
index 308401cee..823aa7dcd 100755
--- a/SequenceAnalysis/pipeline_code/extra_tools_install.sh
+++ b/SequenceAnalysis/pipeline_code/extra_tools_install.sh
@@ -291,4 +291,17 @@ then
     ln -s $SVTYPER ${LKTOOLS_DIR}/svtyper-sso
 else
     echo "Already installed"
-fi
\ No newline at end of file
+fi
+
+if [[ ! -e ${LKTOOLS_DIR}/graphtyper || ! -z $FORCE_REINSTALL ]];
+then
+    echo "Cleaning up previous installs"
+    rm -Rf $LKTOOLS_DIR/graphtyper*
+
+    wget https://github.com/DecodeGenetics/graphtyper/releases/download/v2.7.7/graphtyper
+    chmod a+x graphtyper
+
+    mv ./graphtyper $LKTOOLS_DIR/
+else
+    echo "Already installed"
+fi
diff --git a/SequenceAnalysis/src/org/labkey/sequenceanalysis/SequenceAnalysisModule.java b/SequenceAnalysis/src/org/labkey/sequenceanalysis/SequenceAnalysisModule.java
index 38d53f119..2d7ea845f 100644
--- a/SequenceAnalysis/src/org/labkey/sequenceanalysis/SequenceAnalysisModule.java
+++ b/SequenceAnalysis/src/org/labkey/sequenceanalysis/SequenceAnalysisModule.java
@@ -98,6 +98,7 @@
 import org.labkey.sequenceanalysis.run.alignment.Bowtie2Wrapper;
 import org.labkey.sequenceanalysis.run.alignment.BowtieWrapper;
 import org.labkey.sequenceanalysis.run.alignment.GSnapWrapper;
+import org.labkey.sequenceanalysis.run.alignment.Graphtyper;
 import org.labkey.sequenceanalysis.run.alignment.MosaikWrapper;
 import org.labkey.sequenceanalysis.run.alignment.ParagraphStep;
 import org.labkey.sequenceanalysis.run.alignment.Pbmm2Wrapper;
@@ -405,6 +406,7 @@ public static void registerPipelineSteps()
         SequenceAnalysisService.get().registerFileHandler(new GLNexusHandler());
         SequenceAnalysisService.get().registerFileHandler(new ParagraphStep());
         SequenceAnalysisService.get().registerFileHandler(new SVTyperStep());
+        SequenceAnalysisService.get().registerFileHandler(new Graphtyper());
         SequenceAnalysisService.get().registerFileHandler(new UpdateReadsetFilesHandler());
 
         SequenceAnalysisService.get().registerReadsetHandler(new MultiQCHandler());
diff --git a/SequenceAnalysis/src/org/labkey/sequenceanalysis/run/alignment/Graphtyper.java b/SequenceAnalysis/src/org/labkey/sequenceanalysis/run/alignment/Graphtyper.java
new file mode 100644
index 000000000..125ebb480
--- /dev/null
+++ b/SequenceAnalysis/src/org/labkey/sequenceanalysis/run/alignment/Graphtyper.java
@@ -0,0 +1,149 @@
+package org.labkey.sequenceanalysis.run.alignment;
+
+import org.json.JSONObject;
+import org.labkey.api.module.ModuleLoader;
+import org.labkey.api.pipeline.PipelineJob;
+import org.labkey.api.pipeline.PipelineJobException;
+import org.labkey.api.pipeline.RecordedAction;
+import org.labkey.api.sequenceanalysis.SequenceAnalysisService;
+import org.labkey.api.sequenceanalysis.SequenceOutputFile;
+import org.labkey.api.sequenceanalysis.pipeline.AbstractParameterizedOutputHandler;
+import org.labkey.api.sequenceanalysis.pipeline.ReferenceGenome;
+import org.labkey.api.sequenceanalysis.pipeline.SequenceAnalysisJobSupport;
+import org.labkey.api.sequenceanalysis.pipeline.SequenceOutputHandler;
+import org.labkey.api.sequenceanalysis.pipeline.SequencePipelineService;
+import org.labkey.api.sequenceanalysis.pipeline.ToolParameterDescriptor;
+import org.labkey.api.sequenceanalysis.run.AbstractCommandWrapper;
+import org.labkey.api.sequenceanalysis.run.SimpleScriptWrapper;
+import org.labkey.sequenceanalysis.SequenceAnalysisModule;
+import org.labkey.sequenceanalysis.util.SequenceUtil;
+
+import java.io.File;
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.List;
+
+//https://github.com/hall-lab/svtyper
+
+public class Graphtyper extends AbstractParameterizedOutputHandler<SequenceOutputHandler.SequenceOutputProcessor>
+{
+    public Graphtyper()
+    {
+        super(ModuleLoader.getInstance().getModule(SequenceAnalysisModule.class), "Graphtyper (SV Genotyping)", "This will run Graphtyper on one or more BAM files to genotype SVs", null, Arrays.asList(
+                ToolParameterDescriptor.createExpDataParam("svVCF", "Input VCF", "This is the DataId of the VCF containing the SVs to genotype", "ldk-expdatafield", new JSONObject()
+                {{
+                    put("allowBlank", false);
+                }}, null),
+                ToolParameterDescriptor.create("useOutputFileContainer", "Submit to Source File Workbook", "If checked, each job will be submitted to the same workbook as the input file, as opposed to submitting all jobs to the same workbook.  This is primarily useful if submitting a large batch of files to process separately. This only applies if 'Run Separately' is selected.", "checkbox", new JSONObject(){{
+                    put("checked", true);
+                }}, true)
+        ));
+    }
+
+    @Override
+    public boolean doSplitJobs()
+    {
+        return true;
+    }
+
+    @Override
+    public boolean canProcess(SequenceOutputFile o)
+    {
+        return o.getFile() != null && o.getFile().exists() && SequenceUtil.FILETYPE.bamOrCram.getFileType().isType(o.getFile());
+    }
+
+    @Override
+    public boolean doRunRemote()
+    {
+        return true;
+    }
+
+    @Override
+    public boolean doRunLocal()
+    {
+        return false;
+    }
+
+    @Override
+    public SequenceOutputProcessor getProcessor()
+    {
+        return new Processor();
+    }
+
+    public static class Processor implements SequenceOutputProcessor
+    {
+        @Override
+        public void processFilesOnWebserver(PipelineJob job, SequenceAnalysisJobSupport support, List<SequenceOutputFile> inputFiles, JSONObject params, File outputDir, List<RecordedAction> actions, List<SequenceOutputFile> outputsToCreate) throws UnsupportedOperationException, PipelineJobException
+        {
+
+        }
+
+        @Override
+        public void processFilesRemote(List<SequenceOutputFile> inputFiles, JobContext ctx) throws UnsupportedOperationException, PipelineJobException
+        {
+            int svVcfId = ctx.getParams().optInt("svVCF", 0);
+            if (svVcfId == 0)
+            {
+                throw new PipelineJobException("svVCF param was null");
+            }
+
+            File svVcf = ctx.getSequenceSupport().getCachedData(svVcfId);
+            if (svVcf == null)
+            {
+                throw new PipelineJobException("File not found for ID: " + svVcfId);
+            }
+            else if (!svVcf.exists())
+            {
+                throw new PipelineJobException("Missing file: " + svVcf.getPath());
+            }
+
+            Integer threads = SequencePipelineService.get().getMaxThreads(ctx.getLogger());
+
+            for (SequenceOutputFile so : inputFiles)
+            {
+                List<String> args = new ArrayList<>();
+                SimpleScriptWrapper wrapper = new SimpleScriptWrapper(ctx.getLogger());
+                args.add(AbstractCommandWrapper.resolveFileInPath("graphtyper", null, true).getPath());
+                args.add("genotype_sv");
+
+                ReferenceGenome rg = ctx.getSequenceSupport().getCachedGenome(so.getLibrary_id());
+                if (rg == null)
+                {
+                    throw new PipelineJobException("Missing reference genome: " + so.getLibrary_id());
+                }
+
+                args.add(rg.getWorkingFastaFile().getPath());
+                args.add(svVcf.toString());
+
+                args.add("--sam");
+                args.add(so.getFile().getPath());
+
+                if (threads != null)
+                {
+                    args.add("--threads");
+                    args.add(threads.toString());
+                }
+
+                wrapper.execute(args);
+                
+                File genotypes = new File(ctx.getWorkingDirectory(), "sv_results/" + SequenceAnalysisService.get().getUnzippedBaseName(so.getName()) + ".vcf.gz");
+                if (!genotypes.exists())
+                {
+                    throw new PipelineJobException("Missing file: " + genotypes.getPath());
+                }
+
+                try
+                {
+                    SequenceAnalysisService.get().ensureVcfIndex(genotypes, ctx.getLogger());
+                }
+                catch (IOException e)
+                {
+                    throw new PipelineJobException(e);
+                }
+
+                ctx.getFileManager().addSequenceOutput(genotypes, "Graphtyper Genotypes: " + so.getName(), "Graphtyper Genoypes", so.getReadset(), null, so.getLibrary_id(), "Input VCF: " + svVcf.getName() + " (" + svVcfId + ")");
+            }
+        }
+    }
+}
diff --git a/SequenceAnalysis/src/org/labkey/sequenceanalysis/run/alignment/SVTyperStep.java b/SequenceAnalysis/src/org/labkey/sequenceanalysis/run/alignment/SVTyperStep.java
index 584dc0d47..54ce99135 100644
--- a/SequenceAnalysis/src/org/labkey/sequenceanalysis/run/alignment/SVTyperStep.java
+++ b/SequenceAnalysis/src/org/labkey/sequenceanalysis/run/alignment/SVTyperStep.java
@@ -115,7 +115,7 @@ else if (!svVcf.exists())
 
                 if (threads != null)
                 {
-                    jsonArgs.add("--max_reads");
+                    jsonArgs.add("--cores");
                     jsonArgs.add(threads.toString());
                 }
 

From 58af56eafebf00d28ea9042c3f46051c1e129aa2 Mon Sep 17 00:00:00 2001
From: bbimber <bbimber@gmail.com>
Date: Mon, 3 Feb 2025 21:55:44 -0800
Subject: [PATCH 58/59] Bugfix to SVtyper

---
 .../labkey/sequenceanalysis/run/alignment/SVTyperStep.java  | 6 ------
 1 file changed, 6 deletions(-)

diff --git a/SequenceAnalysis/src/org/labkey/sequenceanalysis/run/alignment/SVTyperStep.java b/SequenceAnalysis/src/org/labkey/sequenceanalysis/run/alignment/SVTyperStep.java
index 54ce99135..2c5b7a629 100644
--- a/SequenceAnalysis/src/org/labkey/sequenceanalysis/run/alignment/SVTyperStep.java
+++ b/SequenceAnalysis/src/org/labkey/sequenceanalysis/run/alignment/SVTyperStep.java
@@ -113,12 +113,6 @@ else if (!svVcf.exists())
                 jsonArgs.add("-l");
                 jsonArgs.add(coverageJson.getPath());
 
-                if (threads != null)
-                {
-                    jsonArgs.add("--cores");
-                    jsonArgs.add(threads.toString());
-                }
-
                 File doneFile = new File(ctx.getWorkingDirectory(), "json.done");
                 ctx.getFileManager().addIntermediateFile(doneFile);
                 if (doneFile.exists())

From e5a0c7091a497abafc07323ef54ee7e9913fc4ef Mon Sep 17 00:00:00 2001
From: bbimber <bbimber@gmail.com>
Date: Mon, 3 Feb 2025 22:15:39 -0800
Subject: [PATCH 59/59] Bugfix to Graphtyper

---
 .../run/alignment/Graphtyper.java             | 54 +++++++++++++++++++
 1 file changed, 54 insertions(+)

diff --git a/SequenceAnalysis/src/org/labkey/sequenceanalysis/run/alignment/Graphtyper.java b/SequenceAnalysis/src/org/labkey/sequenceanalysis/run/alignment/Graphtyper.java
index 125ebb480..4c0b47120 100644
--- a/SequenceAnalysis/src/org/labkey/sequenceanalysis/run/alignment/Graphtyper.java
+++ b/SequenceAnalysis/src/org/labkey/sequenceanalysis/run/alignment/Graphtyper.java
@@ -1,5 +1,8 @@
 package org.labkey.sequenceanalysis.run.alignment;
 
+import htsjdk.samtools.util.CloseableIterator;
+import htsjdk.variant.variantcontext.VariantContext;
+import htsjdk.variant.vcf.VCFFileReader;
 import org.json.JSONObject;
 import org.labkey.api.module.ModuleLoader;
 import org.labkey.api.pipeline.PipelineJob;
@@ -15,11 +18,13 @@
 import org.labkey.api.sequenceanalysis.pipeline.ToolParameterDescriptor;
 import org.labkey.api.sequenceanalysis.run.AbstractCommandWrapper;
 import org.labkey.api.sequenceanalysis.run.SimpleScriptWrapper;
+import org.labkey.api.writer.PrintWriters;
 import org.labkey.sequenceanalysis.SequenceAnalysisModule;
 import org.labkey.sequenceanalysis.util.SequenceUtil;
 
 import java.io.File;
 import java.io.IOException;
+import java.io.PrintWriter;
 import java.util.ArrayList;
 import java.util.Arrays;
 import java.util.List;
@@ -125,6 +130,12 @@ else if (!svVcf.exists())
                     args.add(threads.toString());
                 }
 
+                args.add("--region_file");
+                File regionFile = new File(ctx.getWorkingDirectory(), "regions.txt");
+                args.add(regionFile.getPath());
+
+                generateRegionFile(svVcf, regionFile);
+                ctx.getFileManager().addIntermediateFile(regionFile);
                 wrapper.execute(args);
                 
                 File genotypes = new File(ctx.getWorkingDirectory(), "sv_results/" + SequenceAnalysisService.get().getUnzippedBaseName(so.getName()) + ".vcf.gz");
@@ -146,4 +157,47 @@ else if (!svVcf.exists())
             }
         }
     }
+
+    private static void generateRegionFile(File svVcf, File regionFile) throws PipelineJobException
+    {
+        try (PrintWriter writer = PrintWriters.getPrintWriter(regionFile))
+        {
+            try (VCFFileReader reader = new VCFFileReader(svVcf, true); CloseableIterator<VariantContext> iterator = reader.iterator())
+            {
+                String chr = null;
+                int start = 1;
+                int end = -1;
+                while (iterator.hasNext())
+                {
+                    VariantContext vc = iterator.next();
+                    if (chr == null)
+                    {
+                        chr = vc.getContig();
+                    }
+
+                    if (!vc.getContig().equals(chr))
+                    {
+                        writer.println(chr + ":" + start + "-" + end);
+
+                        // Reset
+                        chr = vc.getContig();
+                        start = 1;
+                        end = -1;
+                    }
+
+                    start = Math.min(start, vc.getStart());
+                    end = Math.max(end, vc.getEnd());
+                }
+
+                if (chr != null)
+                {
+                    writer.println(chr + ":" + start + "-" + end);
+                }
+            }
+        }
+        catch (IOException e)
+        {
+            throw new PipelineJobException(e);
+        }
+    }
 }