From becb1f75345dede71d0de7bbac4a7800982fb043 Mon Sep 17 00:00:00 2001 From: bbimber Date: Fri, 13 Dec 2024 15:53:19 -0800 Subject: [PATCH 01/59] Support mean.var.plot for HVGs --- .../singlecell/pipeline/singlecell/NormalizeAndScale.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/singlecell/src/org/labkey/singlecell/pipeline/singlecell/NormalizeAndScale.java b/singlecell/src/org/labkey/singlecell/pipeline/singlecell/NormalizeAndScale.java index caa498999..2093d1338 100644 --- a/singlecell/src/org/labkey/singlecell/pipeline/singlecell/NormalizeAndScale.java +++ b/singlecell/src/org/labkey/singlecell/pipeline/singlecell/NormalizeAndScale.java @@ -22,7 +22,7 @@ public Provider() { super("NormalizeAndScale", "Normalize/Scale", "CellMembrane/Seurat", "This will run standard Seurat processing steps to normalize and scale the data.", Arrays.asList( SeuratToolParameter.create("variableFeatureSelectionMethod", "Variable Feature Selection Method", "The value, passed directly to Seurat's FindVariableFeatures, variableFeatureSelectionMethod", "ldk-simplecombo", new JSONObject(){{ - put("storeValues", "vst"); + put("storeValues", "vst;mean.var.plot"); put("initialValues", "vst"); put("allowBlank", false); }}, "vst"), From 0f2f806a71a07ed03d000bec2e98701139153497 Mon Sep 17 00:00:00 2001 From: bbimber Date: Mon, 16 Dec 2024 10:59:49 -0800 Subject: [PATCH 02/59] Add gt to install script --- .../pipeline_code/extra_tools_install.sh | 20 ++++++++++++++++++- 1 file changed, 19 insertions(+), 1 deletion(-) diff --git a/SequenceAnalysis/pipeline_code/extra_tools_install.sh b/SequenceAnalysis/pipeline_code/extra_tools_install.sh index 33686252b..983d326fc 100755 --- a/SequenceAnalysis/pipeline_code/extra_tools_install.sh +++ b/SequenceAnalysis/pipeline_code/extra_tools_install.sh @@ -228,4 +228,22 @@ then python3 -m pip install --user multiqc else echo "Already installed" -fi \ No newline at end of file +fi + + +if [[ ! -e ${LKTOOLS_DIR}/gt || ! -z $FORCE_REINSTALL ]]; +then + echo "Cleaning up previous installs" + rm -Rf gt* + rm -Rf $LKTOOLS_DIR/gt* + + wget https://github.com/genometools/genometools/releases/download/v1.6.5/gt-1.6.5-Linux_x86_64-64bit-complete.tar.gz + tar -xf gt-1.6.5-Linux_x86_64-64bit-complete.tar.gz + + install ./gt-1.6.5-Linux_x86_64-64bit-complete/bin/gt $LKTOOLS_DIR/ + mv ./gt-1.6.5-Linux_x86_64-64bit-complete/gtdata $LKTOOLS_DIR/ +else + echo "Already installed" +fi + + From 8d262ce52b69f0ea9191d03fc2c52b12aba92b70 Mon Sep 17 00:00:00 2001 From: bbimber Date: Tue, 17 Dec 2024 10:49:23 -0800 Subject: [PATCH 03/59] Support king-cutoff on Plink/PCA --- .../sequenceanalysis/run/variant/PlinkPcaStep.java | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/SequenceAnalysis/src/org/labkey/sequenceanalysis/run/variant/PlinkPcaStep.java b/SequenceAnalysis/src/org/labkey/sequenceanalysis/run/variant/PlinkPcaStep.java index 0a2ab89f6..6e4c96dab 100644 --- a/SequenceAnalysis/src/org/labkey/sequenceanalysis/run/variant/PlinkPcaStep.java +++ b/SequenceAnalysis/src/org/labkey/sequenceanalysis/run/variant/PlinkPcaStep.java @@ -80,6 +80,10 @@ public Provider() put("valueField", "application"); put("sortField", "application"); }}, null), + ToolParameterDescriptor.create("kingCutoff", "KING Cutoff", "Can be used to prune close relations, identified using KING", "ldk_numberfield", new JSONObject(){{ + put("minValue", 0); + put("maxValue", 1); + }}, 0.25), ToolParameterDescriptor.create("allowMissingSamples", "Allow Missing Samples", "When using split by application, this controls whether or not the job should fail if a matching readset cannot be found for specific samples.", "checkbox", null, false), ToolParameterDescriptor.create("fileSets", "File Set(s) For Readset Query", "If Split By Application is used, the system needs to query each sample name in the VCF and find the corresponding readset. If this is provided, this query will be limited to redsets where a gVCF exists and is tagged as one of these file sets", "sequenceanalysis-trimmingtextarea", null, null), ToolParameterDescriptor.create(SelectSamplesStep.SAMPLE_INCLUDE, "Sample(s) Include", "Only the following samples will be included in the analysis.", "sequenceanalysis-trimmingtextarea", null, null), @@ -178,6 +182,13 @@ private void runBatch(File inputVCF, File outputDirectory, VariantProcessingStep String samplesToExclude = getProvider().getParameterByName(SelectSamplesStep.SAMPLE_EXCLUDE).extractValue(getPipelineCtx().getJob(), getProvider(), getStepIdx(), String.class); addSubjectSelectOptions(samplesToExclude, args, "--exclude", new File(outputDirectory, "samplesToExclude.txt"), output); + Double kingCutoff = getProvider().getParameterByName("kingCutoff").extractValue(getPipelineCtx().getJob(), getProvider(), getStepIdx(), Double.class); + if (kingCutoff != null) + { + args.add("--king-cutoff"); + args.add(kingCutoff.toString()); + } + args.add("--vcf"); args.add(inputVCF.getPath()); From 23865c4ba1cb8055c21da94c8de101adc9797b9c Mon Sep 17 00:00:00 2001 From: bbimber Date: Tue, 17 Dec 2024 12:28:13 -0800 Subject: [PATCH 04/59] Fix typo --- .../org/labkey/sequenceanalysis/run/variant/PlinkPcaStep.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/SequenceAnalysis/src/org/labkey/sequenceanalysis/run/variant/PlinkPcaStep.java b/SequenceAnalysis/src/org/labkey/sequenceanalysis/run/variant/PlinkPcaStep.java index 6e4c96dab..01ce28d43 100644 --- a/SequenceAnalysis/src/org/labkey/sequenceanalysis/run/variant/PlinkPcaStep.java +++ b/SequenceAnalysis/src/org/labkey/sequenceanalysis/run/variant/PlinkPcaStep.java @@ -80,7 +80,7 @@ public Provider() put("valueField", "application"); put("sortField", "application"); }}, null), - ToolParameterDescriptor.create("kingCutoff", "KING Cutoff", "Can be used to prune close relations, identified using KING", "ldk_numberfield", new JSONObject(){{ + ToolParameterDescriptor.create("kingCutoff", "KING Cutoff", "Can be used to prune close relations, identified using KING", "ldk-numberfield", new JSONObject(){{ put("minValue", 0); put("maxValue", 1); }}, 0.25), From c693c9186c5f9cb9ec390b0b0d1c634d90942a17 Mon Sep 17 00:00:00 2001 From: bbimber Date: Tue, 17 Dec 2024 18:51:07 -0800 Subject: [PATCH 05/59] Allow KingInferenceStep to optionally add a pedigree --- .../PedigreeToolParameterDescriptor.java | 19 ++++++++++-- .../pipeline/ToolParameterDescriptor.java | 2 +- .../pipeline/VariantProcessingStep.java | 29 +++++++++++++++++-- .../pipeline/ProcessVariantsHandler.java | 8 +++-- .../run/variant/KingInferenceStep.java | 21 ++++++++++++-- 5 files changed, 68 insertions(+), 11 deletions(-) diff --git a/SequenceAnalysis/api-src/org/labkey/api/sequenceanalysis/pipeline/PedigreeToolParameterDescriptor.java b/SequenceAnalysis/api-src/org/labkey/api/sequenceanalysis/pipeline/PedigreeToolParameterDescriptor.java index 3bb26661b..c68087c10 100644 --- a/SequenceAnalysis/api-src/org/labkey/api/sequenceanalysis/pipeline/PedigreeToolParameterDescriptor.java +++ b/SequenceAnalysis/api-src/org/labkey/api/sequenceanalysis/pipeline/PedigreeToolParameterDescriptor.java @@ -6,11 +6,24 @@ public class PedigreeToolParameterDescriptor extends ToolParameterDescriptor { public static String NAME = "pedigreeSource"; + private final boolean _isRequired; + public PedigreeToolParameterDescriptor() { - super(null, NAME, "Pedigree Source", "This is the table used for pedigree data", "laboratory-pedigreeselectorfield", "laboratory.subjects", new JSONObject(){{ - put("allowBlank", false); - }}); + this(true); + } + + public PedigreeToolParameterDescriptor(boolean isRequired) + { + super(null, NAME, "Pedigree Source", "This is the table used for pedigree data", "laboratory-pedigreeselectorfield", "laboratory.subjects", new JSONObject()); + + getAdditionalExtConfig().put("allowBlank", isRequired); + _isRequired = isRequired; + } + + public boolean isRequired() + { + return _isRequired; } public static String getClientDependencyPath() diff --git a/SequenceAnalysis/api-src/org/labkey/api/sequenceanalysis/pipeline/ToolParameterDescriptor.java b/SequenceAnalysis/api-src/org/labkey/api/sequenceanalysis/pipeline/ToolParameterDescriptor.java index dc46f0fc8..12a054d57 100644 --- a/SequenceAnalysis/api-src/org/labkey/api/sequenceanalysis/pipeline/ToolParameterDescriptor.java +++ b/SequenceAnalysis/api-src/org/labkey/api/sequenceanalysis/pipeline/ToolParameterDescriptor.java @@ -41,7 +41,7 @@ public class ToolParameterDescriptor private String _label; private String _description; private final String _fieldXtype; - private final JSONObject _additionalExtConfig; + protected JSONObject _additionalExtConfig; private final Object _defaultValue; public ToolParameterDescriptor(CommandLineParam ca, String name, String label, String description, String fieldXtype, @Nullable Object defaultValue, @Nullable JSONObject additionalExtConfig) diff --git a/SequenceAnalysis/api-src/org/labkey/api/sequenceanalysis/pipeline/VariantProcessingStep.java b/SequenceAnalysis/api-src/org/labkey/api/sequenceanalysis/pipeline/VariantProcessingStep.java index 4b77e5dd7..67ba4ab47 100644 --- a/SequenceAnalysis/api-src/org/labkey/api/sequenceanalysis/pipeline/VariantProcessingStep.java +++ b/SequenceAnalysis/api-src/org/labkey/api/sequenceanalysis/pipeline/VariantProcessingStep.java @@ -72,7 +72,16 @@ interface Output extends PipelineStepOutput File getVCF(); } - interface RequiresPedigree + interface RequiresPedigree extends SupportsPedigree + { + @Override + default boolean isRequired() + { + return true; + } + } + + interface SupportsPedigree { default String getDemographicsProviderName(PipelineStepProvider provider, PipelineJob job, int stepIdx) { @@ -86,7 +95,23 @@ default DemographicsProvider getDemographicsProvider(PipelineStepProvider pro throw new IllegalStateException("getDemographicsProvider() can only be run from the webserver"); } - return LaboratoryService.get().getDemographicsProviderByName(job.getContainer(), job.getUser(), getDemographicsProviderName(provider, job, stepIdx)); + String dpn = getDemographicsProviderName(provider, job, stepIdx); + if (dpn == null) + { + if (isRequired()) + { + throw new IllegalArgumentException("The DemographicsProvider name cannot be null"); + } + + return null; + } + + return LaboratoryService.get().getDemographicsProviderByName(job.getContainer(), job.getUser(), dpn); + } + + default boolean isRequired() + { + return false; } } diff --git a/SequenceAnalysis/src/org/labkey/sequenceanalysis/pipeline/ProcessVariantsHandler.java b/SequenceAnalysis/src/org/labkey/sequenceanalysis/pipeline/ProcessVariantsHandler.java index c29fd8e05..1d28158d9 100644 --- a/SequenceAnalysis/src/org/labkey/sequenceanalysis/pipeline/ProcessVariantsHandler.java +++ b/SequenceAnalysis/src/org/labkey/sequenceanalysis/pipeline/ProcessVariantsHandler.java @@ -259,9 +259,13 @@ public static void initVariantProcessing(PipelineJob job, SequenceAnalysisJobSup } } - if (stepCtx.getProvider() instanceof VariantProcessingStep.RequiresPedigree rp) + if (stepCtx.getProvider() instanceof VariantProcessingStep.SupportsPedigree rp) { - demographicsProviders.add(rp.getDemographicsProvider(stepCtx.getProvider(), job, stepCtx.getStepIdx())); + DemographicsProvider dp = rp.getDemographicsProvider(stepCtx.getProvider(), job, stepCtx.getStepIdx()); + if (dp != null) + { + demographicsProviders.add(dp); + } } if (useScatterGather) diff --git a/SequenceAnalysis/src/org/labkey/sequenceanalysis/run/variant/KingInferenceStep.java b/SequenceAnalysis/src/org/labkey/sequenceanalysis/run/variant/KingInferenceStep.java index 62c4a3c0e..aa357509d 100644 --- a/SequenceAnalysis/src/org/labkey/sequenceanalysis/run/variant/KingInferenceStep.java +++ b/SequenceAnalysis/src/org/labkey/sequenceanalysis/run/variant/KingInferenceStep.java @@ -13,7 +13,7 @@ import org.labkey.api.pipeline.PipelineJobException; import org.labkey.api.sequenceanalysis.SequenceAnalysisService; import org.labkey.api.sequenceanalysis.pipeline.AbstractVariantProcessingStepProvider; -import org.labkey.api.sequenceanalysis.pipeline.CommandLineParam; +import org.labkey.api.sequenceanalysis.pipeline.PedigreeToolParameterDescriptor; import org.labkey.api.sequenceanalysis.pipeline.PipelineContext; import org.labkey.api.sequenceanalysis.pipeline.PipelineStepProvider; import org.labkey.api.sequenceanalysis.pipeline.ReferenceGenome; @@ -23,13 +23,14 @@ import org.labkey.api.sequenceanalysis.pipeline.VariantProcessingStepOutputImpl; import org.labkey.api.sequenceanalysis.run.AbstractCommandPipelineStep; import org.labkey.api.sequenceanalysis.run.AbstractCommandWrapper; +import org.labkey.sequenceanalysis.pipeline.ProcessVariantsHandler; import java.io.File; import java.io.IOException; import java.util.ArrayList; import java.util.List; -public class KingInferenceStep extends AbstractCommandPipelineStep implements VariantProcessingStep +public class KingInferenceStep extends AbstractCommandPipelineStep implements VariantProcessingStep, VariantProcessingStep.RequiresPedigree { public KingInferenceStep(PipelineStepProvider provider, PipelineContext ctx) { @@ -47,7 +48,8 @@ public Provider() }}, true), ToolParameterDescriptor.create("excludedContigs", "Excluded Contigs", "A comma separated list of contigs to exclude, such as X,Y,MT.", "textfield", new JSONObject(){{ - }}, "X,Y,MT") + }}, "X,Y,MT"), + new PedigreeToolParameterDescriptor(false) ), null, "https://www.kingrelatedness.com/manual.shtml"); } @@ -118,6 +120,19 @@ public Output processVariants(File inputVCF, File outputDirectory, ReferenceGeno plinkArgs.add("--max-alleles"); plinkArgs.add("2"); + String demographicsProviderName = getProvider().getParameterByName(PedigreeToolParameterDescriptor.NAME).extractValue(getPipelineCtx().getJob(), getProvider(), getStepIdx()); + if (demographicsProviderName != null) + { + File pedFile = ProcessVariantsHandler.getPedigreeFile(getPipelineCtx().getSourceDirectory(true), demographicsProviderName); + if (!pedFile.exists()) + { + throw new PipelineJobException("Unable to find pedigree file: " + pedFile.getPath()); + } + + plinkArgs.add("--ped"); + plinkArgs.add(pedFile.getPath()); + } + plinkArgs.add("--out"); plinkArgs.add(plinkOut.getPath()); From 8d03557afbd7a3e1dd40fa1d8c8796a8f1ea2fc2 Mon Sep 17 00:00:00 2001 From: bbimber Date: Tue, 17 Dec 2024 20:58:18 -0800 Subject: [PATCH 06/59] Bugfix to bwamem2 index args --- .../sequenceanalysis/run/alignment/BWAMem2Wrapper.java | 2 ++ .../sequenceanalysis/run/alignment/BWAWrapper.java | 10 +++++++--- 2 files changed, 9 insertions(+), 3 deletions(-) diff --git a/SequenceAnalysis/src/org/labkey/sequenceanalysis/run/alignment/BWAMem2Wrapper.java b/SequenceAnalysis/src/org/labkey/sequenceanalysis/run/alignment/BWAMem2Wrapper.java index d7dc80fd1..dc144206f 100644 --- a/SequenceAnalysis/src/org/labkey/sequenceanalysis/run/alignment/BWAMem2Wrapper.java +++ b/SequenceAnalysis/src/org/labkey/sequenceanalysis/run/alignment/BWAMem2Wrapper.java @@ -42,6 +42,8 @@ public static class BWAMem2AlignmentStep extends BWAMemAlignmentStep public BWAMem2AlignmentStep(AlignmentStepProvider provider, PipelineContext ctx) { super(provider, ctx, new BWAMem2Wrapper(ctx.getLogger())); + + _addBtwswArg = false; } } diff --git a/SequenceAnalysis/src/org/labkey/sequenceanalysis/run/alignment/BWAWrapper.java b/SequenceAnalysis/src/org/labkey/sequenceanalysis/run/alignment/BWAWrapper.java index d9662dea8..abfb1ba3d 100644 --- a/SequenceAnalysis/src/org/labkey/sequenceanalysis/run/alignment/BWAWrapper.java +++ b/SequenceAnalysis/src/org/labkey/sequenceanalysis/run/alignment/BWAWrapper.java @@ -41,7 +41,6 @@ public BWAWrapper(@Nullable Logger logger) super(logger); } - public static class Provider extends AbstractAlignmentStepProvider { public Provider() @@ -83,6 +82,8 @@ public AlignmentStep create(PipelineContext context) public static class BWAAlignmentStep extends AbstractAlignmentPipelineStep implements AlignmentStep { + protected boolean _addBtwswArg = true; + public BWAAlignmentStep(AlignmentStepProvider provider, PipelineContext ctx, WrapperType wrapper) { super(provider, ctx, wrapper); @@ -116,8 +117,11 @@ public IndexOutput createIndex(ReferenceGenome referenceGenome, File outputDir) args.add("index"); //necessary for DBs larger than 2gb - args.add("-a"); - args.add("bwtsw"); + if (_addBtwswArg) + { + args.add("-a"); + args.add("bwtsw"); + } args.add("-p"); From 652348ab8108a43c498f8e07dbb695aec54aaf61 Mon Sep 17 00:00:00 2001 From: bbimber Date: Wed, 18 Dec 2024 09:32:11 -0800 Subject: [PATCH 07/59] More separation between bwa and bwamem2 indexing --- .../sequenceanalysis/run/alignment/BWAMem2Wrapper.java | 6 ++++++ .../labkey/sequenceanalysis/run/alignment/BWAWrapper.java | 8 ++++---- 2 files changed, 10 insertions(+), 4 deletions(-) diff --git a/SequenceAnalysis/src/org/labkey/sequenceanalysis/run/alignment/BWAMem2Wrapper.java b/SequenceAnalysis/src/org/labkey/sequenceanalysis/run/alignment/BWAMem2Wrapper.java index dc144206f..95aefa2a8 100644 --- a/SequenceAnalysis/src/org/labkey/sequenceanalysis/run/alignment/BWAMem2Wrapper.java +++ b/SequenceAnalysis/src/org/labkey/sequenceanalysis/run/alignment/BWAMem2Wrapper.java @@ -45,6 +45,12 @@ public BWAMem2AlignmentStep(AlignmentStepProvider provider, PipelineContext c _addBtwswArg = false; } + + @Override + public String getIndexCachedDirName(PipelineJob job) + { + return "bwamem2"; + } } public static class Provider extends AbstractAlignmentStepProvider diff --git a/SequenceAnalysis/src/org/labkey/sequenceanalysis/run/alignment/BWAWrapper.java b/SequenceAnalysis/src/org/labkey/sequenceanalysis/run/alignment/BWAWrapper.java index abfb1ba3d..794f4c0e9 100644 --- a/SequenceAnalysis/src/org/labkey/sequenceanalysis/run/alignment/BWAWrapper.java +++ b/SequenceAnalysis/src/org/labkey/sequenceanalysis/run/alignment/BWAWrapper.java @@ -76,7 +76,7 @@ public String getDescription() @Override public AlignmentStep create(PipelineContext context) { - return new BWAAlignmentStep(this, context, new BWAWrapper(context.getLogger())); + return new BWAAlignmentStep<>(this, context, new BWAWrapper(context.getLogger())); } } @@ -84,7 +84,7 @@ public static class BWAAlignmentStep extends Abs { protected boolean _addBtwswArg = true; - public BWAAlignmentStep(AlignmentStepProvider provider, PipelineContext ctx, WrapperType wrapper) + public BWAAlignmentStep(AlignmentStepProvider provider, PipelineContext ctx, WrapperType wrapper) { super(provider, ctx, wrapper); } @@ -104,7 +104,7 @@ public String getIndexCachedDirName(PipelineJob job) @Override public IndexOutput createIndex(ReferenceGenome referenceGenome, File outputDir) throws PipelineJobException { - getPipelineCtx().getLogger().info("Creating BWA index"); + getPipelineCtx().getLogger().info("Creating " + getProvider().getName() + " index"); IndexOutputImpl output = new IndexOutputImpl(referenceGenome); File indexDir = new File(outputDir, getIndexCachedDirName(getPipelineCtx().getJob())); @@ -139,7 +139,7 @@ public IndexOutput createIndex(ReferenceGenome referenceGenome, File outputDir) output.appendOutputs(referenceGenome.getWorkingFastaFile(), indexDir); //recache if not already - AlignerIndexUtil.saveCachedIndex(hasCachedIndex, getPipelineCtx(), indexDir, "bwa", referenceGenome); + AlignerIndexUtil.saveCachedIndex(hasCachedIndex, getPipelineCtx(), indexDir, getIndexCachedDirName(getPipelineCtx().getJob()), referenceGenome); return output; } From e1c8606bebd54c2c913c654cf3a5b8108f27e5b9 Mon Sep 17 00:00:00 2001 From: bbimber Date: Wed, 18 Dec 2024 09:46:41 -0800 Subject: [PATCH 08/59] Include reference for king install --- .../pipeline_code/extra_tools_install.sh | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/SequenceAnalysis/pipeline_code/extra_tools_install.sh b/SequenceAnalysis/pipeline_code/extra_tools_install.sh index 983d326fc..3ad144bc5 100755 --- a/SequenceAnalysis/pipeline_code/extra_tools_install.sh +++ b/SequenceAnalysis/pipeline_code/extra_tools_install.sh @@ -246,4 +246,18 @@ else echo "Already installed" fi +if [[ ! -e ${LKTOOLS_DIR}/king || ! -z $FORCE_REINSTALL ]]; +then + echo "Cleaning up previous installs" + rm -Rf king* + rm -Rf Linux-king* + rm -Rf $LKTOOLS_DIR/king* + + wget https://www.kingrelatedness.com/Linux-king.tar.gz + tar -xf Linux-king.tar.gz + + install king $LKTOOLS_DIR/ +else + echo "Already installed" +fi From 405f1b27779e025cc0c957049c5129fcff8033a6 Mon Sep 17 00:00:00 2001 From: bbimber Date: Wed, 18 Dec 2024 10:15:40 -0800 Subject: [PATCH 09/59] Add debugging for celltypist in docker --- singlecell/resources/chunks/RunCelltypist.R | 28 +++++++++++++++++++++ 1 file changed, 28 insertions(+) diff --git a/singlecell/resources/chunks/RunCelltypist.R b/singlecell/resources/chunks/RunCelltypist.R index 571d3028e..1f98079a3 100644 --- a/singlecell/resources/chunks/RunCelltypist.R +++ b/singlecell/resources/chunks/RunCelltypist.R @@ -1,3 +1,31 @@ +if (!reticulate::py_module_available(module = 'celltypist')) { + logger::log_warn('python celltypist not found!') + logger::log_warn(paste0('Python available: ', reticulate::py_available())) + logger::log_warn('Python config') + pyConfig <- reticulate::py_config() + for (pn in names(pyConfig)) { + logger::log_warn(paste0(pn, ': ', paste0(pyConfig[[pn]]), collapse = ',')) + } + + logger::log_warn(paste0('pythonpath: ', reticulate::py_config()$pythonpath)) + + logger::log_warn('Python packages:') + for (pn in reticulate::py_list_packages()$package) { + logger::log_warn(pn) + } + + if ('conga' %in% reticulate::py_list_packages()$package) { + tryCatch({ + logger::log_warn(reticulate::import('celltypist')) + }, error = function(e){ + logger::log_warn("Error with reticulate::import('celltypist')") + logger::log_warn(reticulate::py_last_error()) + logger::log_warn(conditionMessage(e)) + traceback() + }) + } +} + for (datasetId in names(seuratObjects)) { printName(datasetId) seuratObj <- readSeuratRDS(seuratObjects[[datasetId]]) From db2b560c87887da0d2d6ed4373c8fa8d0a671353 Mon Sep 17 00:00:00 2001 From: bbimber Date: Wed, 18 Dec 2024 10:27:40 -0800 Subject: [PATCH 10/59] Compress KING output --- .../sequenceanalysis/run/variant/KingInferenceStep.java | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/SequenceAnalysis/src/org/labkey/sequenceanalysis/run/variant/KingInferenceStep.java b/SequenceAnalysis/src/org/labkey/sequenceanalysis/run/variant/KingInferenceStep.java index aa357509d..9f4895fde 100644 --- a/SequenceAnalysis/src/org/labkey/sequenceanalysis/run/variant/KingInferenceStep.java +++ b/SequenceAnalysis/src/org/labkey/sequenceanalysis/run/variant/KingInferenceStep.java @@ -23,6 +23,7 @@ import org.labkey.api.sequenceanalysis.pipeline.VariantProcessingStepOutputImpl; import org.labkey.api.sequenceanalysis.run.AbstractCommandPipelineStep; import org.labkey.api.sequenceanalysis.run.AbstractCommandWrapper; +import org.labkey.api.util.Compress; import org.labkey.sequenceanalysis.pipeline.ProcessVariantsHandler; import java.io.File; @@ -180,7 +181,7 @@ public Output processVariants(File inputVCF, File outputDirectory, ReferenceGeno throw new PipelineJobException("Unable to find file: " + kinshipOutput.getPath()); } - File kinshipOutputTxt = new File(kinshipOutput.getPath() + ".txt"); + File kinshipOutputTxt = new File(kinshipOutput.getPath() + ".txt.gz"); if (kinshipOutputTxt.exists()) { kinshipOutputTxt.delete(); @@ -188,6 +189,7 @@ public Output processVariants(File inputVCF, File outputDirectory, ReferenceGeno try { + kinshipOutput = Compress.compressGzip(kinshipOutput); FileUtils.moveFile(kinshipOutput, kinshipOutputTxt); } catch (IOException e) From ccc8554858fbe2ca8761396b5b9847797bf2f0c0 Mon Sep 17 00:00:00 2001 From: bbimber Date: Wed, 18 Dec 2024 13:22:20 -0800 Subject: [PATCH 11/59] Another bugfix to bwa/bwa2 index folders --- .../sequenceanalysis/pipeline/ReferenceGenome.java | 2 +- .../run/alignment/BWAMem2Wrapper.java | 6 ++++++ .../run/alignment/BWAMemWrapper.java | 2 +- .../sequenceanalysis/run/alignment/BWAWrapper.java | 13 +++++++++---- 4 files changed, 17 insertions(+), 6 deletions(-) diff --git a/SequenceAnalysis/api-src/org/labkey/api/sequenceanalysis/pipeline/ReferenceGenome.java b/SequenceAnalysis/api-src/org/labkey/api/sequenceanalysis/pipeline/ReferenceGenome.java index eeb5ec226..f0e943867 100644 --- a/SequenceAnalysis/api-src/org/labkey/api/sequenceanalysis/pipeline/ReferenceGenome.java +++ b/SequenceAnalysis/api-src/org/labkey/api/sequenceanalysis/pipeline/ReferenceGenome.java @@ -79,7 +79,7 @@ public interface ReferenceGenome extends Serializable /** * @param name The name used by the aligner to identify its cached directory - * @return The folder expected containing the cached index, which is not guarenteed to exist. See AlignerIndexUtil for related methods. + * @return The folder expected containing the cached index, which is not guaranteed to exist. See AlignerIndexUtil for related methods. */ File getAlignerIndexDir(String name); diff --git a/SequenceAnalysis/src/org/labkey/sequenceanalysis/run/alignment/BWAMem2Wrapper.java b/SequenceAnalysis/src/org/labkey/sequenceanalysis/run/alignment/BWAMem2Wrapper.java index 95aefa2a8..6d3bac2ce 100644 --- a/SequenceAnalysis/src/org/labkey/sequenceanalysis/run/alignment/BWAMem2Wrapper.java +++ b/SequenceAnalysis/src/org/labkey/sequenceanalysis/run/alignment/BWAMem2Wrapper.java @@ -53,6 +53,12 @@ public String getIndexCachedDirName(PipelineJob job) } } + @Override + protected String getIndexDirName() + { + return("bwamem2"); + } + public static class Provider extends AbstractAlignmentStepProvider { public Provider() diff --git a/SequenceAnalysis/src/org/labkey/sequenceanalysis/run/alignment/BWAMemWrapper.java b/SequenceAnalysis/src/org/labkey/sequenceanalysis/run/alignment/BWAMemWrapper.java index 68599ecdd..3b2962329 100644 --- a/SequenceAnalysis/src/org/labkey/sequenceanalysis/run/alignment/BWAMemWrapper.java +++ b/SequenceAnalysis/src/org/labkey/sequenceanalysis/run/alignment/BWAMemWrapper.java @@ -116,7 +116,7 @@ public void performMemAlignment(PipelineJob job, AlignmentOutputImpl output, Fil } appendThreads(job, bwaArgs); - bwaArgs.add("'" + new File(referenceGenome.getAlignerIndexDir("bwa"), FileUtil.getBaseName(referenceGenome.getWorkingFastaFile().getName()) + ".bwa.index").getPath() + "'"); + bwaArgs.add("'" + new File(referenceGenome.getAlignerIndexDir(getIndexDirName()), FileUtil.getBaseName(referenceGenome.getWorkingFastaFile().getName()) + ".bwa.index").getPath() + "'"); bwaArgs.add("'" + inputFastq1.getPath() + "'"); if (inputFastq2 != null) diff --git a/SequenceAnalysis/src/org/labkey/sequenceanalysis/run/alignment/BWAWrapper.java b/SequenceAnalysis/src/org/labkey/sequenceanalysis/run/alignment/BWAWrapper.java index 794f4c0e9..70e240a8c 100644 --- a/SequenceAnalysis/src/org/labkey/sequenceanalysis/run/alignment/BWAWrapper.java +++ b/SequenceAnalysis/src/org/labkey/sequenceanalysis/run/alignment/BWAWrapper.java @@ -151,7 +151,7 @@ public final AlignmentOutput performAlignment(Readset rs, List inputFastqs File inputFastq2 = assertSingleFile(inputFastqs2); AlignmentOutputImpl output = new AlignmentOutputImpl(); - AlignerIndexUtil.copyIndexIfExists(this.getPipelineCtx(), output, "bwa", referenceGenome); + AlignerIndexUtil.copyIndexIfExists(this.getPipelineCtx(), output, getIndexCachedDirName(getPipelineCtx().getJob()), referenceGenome); doPerformAlignment(output, inputFastq1, inputFastq2, outputDirectory, referenceGenome, basename, rs, readGroupId, platformUnit); output.addCommandsExecuted(getWrapper().getCommandsExecuted()); @@ -197,7 +197,7 @@ private File runBWAAln(PipelineJob job, ReferenceGenome referenceGenome, File in args.add("aln"); appendThreads(job, args); args.addAll(bwaAlnArgs); - args.add(new File(referenceGenome.getAlignerIndexDir("bwa"), FileUtil.getBaseName(referenceGenome.getWorkingFastaFile().getName()) + ".bwa.index").getPath()); + args.add(new File(referenceGenome.getAlignerIndexDir(getIndexDirName()), FileUtil.getBaseName(referenceGenome.getWorkingFastaFile().getName()) + ".bwa.index").getPath()); args.add(inputFile.getPath()); File output = new File(getOutputDir(inputFile), inputFile.getName() + ".sai"); @@ -230,12 +230,12 @@ protected void performBwaAlignment(PipelineJob job, AlignmentOutputImpl output, throw new PipelineJobException("Reference FASTA does not exist: " + referenceGenome.getWorkingFastaFile().getPath()); } - File expectedIndex = new File(referenceGenome.getAlignerIndexDir("bwa"), FileUtil.getBaseName(referenceGenome.getWorkingFastaFile()) + ".bwa.index.sa"); + File expectedIndex = new File(referenceGenome.getAlignerIndexDir(getIndexDirName()), FileUtil.getBaseName(referenceGenome.getWorkingFastaFile()) + ".bwa.index.sa"); if (!expectedIndex.exists()) { throw new PipelineJobException("Expected index does not exist: " + expectedIndex); } - args.add(new File(referenceGenome.getAlignerIndexDir("bwa"), FileUtil.getBaseName(referenceGenome.getWorkingFastaFile().getName()) + ".bwa.index").getPath()); + args.add(new File(referenceGenome.getAlignerIndexDir(getIndexDirName()), FileUtil.getBaseName(referenceGenome.getWorkingFastaFile().getName()) + ".bwa.index").getPath()); //add SAI args.add(sai1.getPath()); @@ -283,4 +283,9 @@ public File getExe() { return SequencePipelineService.get().getExeForPackage("BWAPATH", "bwa"); } + + protected String getIndexDirName() + { + return("bwa"); + } } From f75fee6095af86e5d2d9d16b5bfc64e910a658a5 Mon Sep 17 00:00:00 2001 From: bbimber Date: Wed, 18 Dec 2024 13:24:11 -0800 Subject: [PATCH 12/59] Cache bwamem2 indexes by default --- .../sequenceanalysis/pipeline/CacheAlignerIndexesTask.java | 2 +- .../labkey/sequenceanalysis/run/alignment/BWAMem2Wrapper.java | 2 ++ 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/SequenceAnalysis/src/org/labkey/sequenceanalysis/pipeline/CacheAlignerIndexesTask.java b/SequenceAnalysis/src/org/labkey/sequenceanalysis/pipeline/CacheAlignerIndexesTask.java index 9663f4570..08fcaca50 100644 --- a/SequenceAnalysis/src/org/labkey/sequenceanalysis/pipeline/CacheAlignerIndexesTask.java +++ b/SequenceAnalysis/src/org/labkey/sequenceanalysis/pipeline/CacheAlignerIndexesTask.java @@ -63,7 +63,7 @@ public List getProtocolActionNames() } @Override - public PipelineJob.Task createTask(PipelineJob job) + public PipelineJob.Task createTask(PipelineJob job) { return new CacheAlignerIndexesTask(this, job); } diff --git a/SequenceAnalysis/src/org/labkey/sequenceanalysis/run/alignment/BWAMem2Wrapper.java b/SequenceAnalysis/src/org/labkey/sequenceanalysis/run/alignment/BWAMem2Wrapper.java index 6d3bac2ce..73d1e6412 100644 --- a/SequenceAnalysis/src/org/labkey/sequenceanalysis/run/alignment/BWAMem2Wrapper.java +++ b/SequenceAnalysis/src/org/labkey/sequenceanalysis/run/alignment/BWAMem2Wrapper.java @@ -74,6 +74,8 @@ public Provider() }}, null) ), null, "https://github.com/bwa-mem2/bwa-mem2", true, true); + + setAlwaysCacheIndex(true); } @Override From 8e95311c27abf1ab6b16f22cc6c97ff1f7850cee Mon Sep 17 00:00:00 2001 From: bbimber Date: Wed, 18 Dec 2024 15:48:26 -0800 Subject: [PATCH 13/59] Improve logged message --- .../labkey/api/sequenceanalysis/pipeline/AlignerIndexUtil.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/SequenceAnalysis/api-src/org/labkey/api/sequenceanalysis/pipeline/AlignerIndexUtil.java b/SequenceAnalysis/api-src/org/labkey/api/sequenceanalysis/pipeline/AlignerIndexUtil.java index ddbaba6ed..1e55f7e1a 100644 --- a/SequenceAnalysis/api-src/org/labkey/api/sequenceanalysis/pipeline/AlignerIndexUtil.java +++ b/SequenceAnalysis/api-src/org/labkey/api/sequenceanalysis/pipeline/AlignerIndexUtil.java @@ -40,7 +40,7 @@ public static boolean copyIndexIfExists(PipelineContext ctx, AlignmentOutputImpl public static boolean copyIndexIfExists(PipelineContext ctx, AlignmentOutputImpl output, String localName, String webserverName, ReferenceGenome genome, boolean forceCopyLocal) throws PipelineJobException { - ctx.getLogger().debug("copying index to shared dir if exists: " + localName); + ctx.getLogger().debug("checking if index exists: " + localName + ". copy local: " + forceCopyLocal); if (ctx.getWorkDir() == null) { throw new PipelineJobException("PipelineContext.getWorkDir() is null"); From bc29d9ffa78ff0be658b30e159d11d2e94d6c5cd Mon Sep 17 00:00:00 2001 From: bbimber Date: Wed, 18 Dec 2024 15:53:02 -0800 Subject: [PATCH 14/59] Bugfix to PedigreeToolParameterDescriptor --- .../pipeline/PedigreeToolParameterDescriptor.java | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/SequenceAnalysis/api-src/org/labkey/api/sequenceanalysis/pipeline/PedigreeToolParameterDescriptor.java b/SequenceAnalysis/api-src/org/labkey/api/sequenceanalysis/pipeline/PedigreeToolParameterDescriptor.java index c68087c10..01e0cd580 100644 --- a/SequenceAnalysis/api-src/org/labkey/api/sequenceanalysis/pipeline/PedigreeToolParameterDescriptor.java +++ b/SequenceAnalysis/api-src/org/labkey/api/sequenceanalysis/pipeline/PedigreeToolParameterDescriptor.java @@ -13,11 +13,12 @@ public PedigreeToolParameterDescriptor() this(true); } - public PedigreeToolParameterDescriptor(boolean isRequired) + public PedigreeToolParameterDescriptor(final boolean isRequired) { - super(null, NAME, "Pedigree Source", "This is the table used for pedigree data", "laboratory-pedigreeselectorfield", "laboratory.subjects", new JSONObject()); + super(null, NAME, "Pedigree Source", "This is the table used for pedigree data", "laboratory-pedigreeselectorfield", "laboratory.subjects", new JSONObject(){{ + put("allowBlank", !isRequired); + }}); - getAdditionalExtConfig().put("allowBlank", isRequired); _isRequired = isRequired; } From e33f944d6ed5cc21a25a60244c27b4c4fd307968 Mon Sep 17 00:00:00 2001 From: bbimber Date: Thu, 19 Dec 2024 16:51:40 -0800 Subject: [PATCH 15/59] Bugfix to KingInferenceStep and pedigree --- .../api/sequenceanalysis/pipeline/VariantProcessingStep.java | 2 +- .../sequenceanalysis/run/variant/KingInferenceStep.java | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/SequenceAnalysis/api-src/org/labkey/api/sequenceanalysis/pipeline/VariantProcessingStep.java b/SequenceAnalysis/api-src/org/labkey/api/sequenceanalysis/pipeline/VariantProcessingStep.java index 67ba4ab47..545815b0b 100644 --- a/SequenceAnalysis/api-src/org/labkey/api/sequenceanalysis/pipeline/VariantProcessingStep.java +++ b/SequenceAnalysis/api-src/org/labkey/api/sequenceanalysis/pipeline/VariantProcessingStep.java @@ -88,7 +88,7 @@ default String getDemographicsProviderName(PipelineStepProvider provider, Pip return provider.getParameterByName(PedigreeToolParameterDescriptor.NAME).extractValue(job, provider, stepIdx, String.class); } - default DemographicsProvider getDemographicsProvider(PipelineStepProvider provider, PipelineJob job, int stepIdx) + default @Nullable DemographicsProvider getDemographicsProvider(PipelineStepProvider provider, PipelineJob job, int stepIdx) { if (PipelineJobService.get().getLocationType() != PipelineJobService.LocationType.WebServer) { diff --git a/SequenceAnalysis/src/org/labkey/sequenceanalysis/run/variant/KingInferenceStep.java b/SequenceAnalysis/src/org/labkey/sequenceanalysis/run/variant/KingInferenceStep.java index 9f4895fde..275b527a9 100644 --- a/SequenceAnalysis/src/org/labkey/sequenceanalysis/run/variant/KingInferenceStep.java +++ b/SequenceAnalysis/src/org/labkey/sequenceanalysis/run/variant/KingInferenceStep.java @@ -31,14 +31,14 @@ import java.util.ArrayList; import java.util.List; -public class KingInferenceStep extends AbstractCommandPipelineStep implements VariantProcessingStep, VariantProcessingStep.RequiresPedigree +public class KingInferenceStep extends AbstractCommandPipelineStep implements VariantProcessingStep { public KingInferenceStep(PipelineStepProvider provider, PipelineContext ctx) { super(provider, ctx, new KingInferenceStep.KingWrapper(ctx.getLogger())); } - public static class Provider extends AbstractVariantProcessingStepProvider + public static class Provider extends AbstractVariantProcessingStepProvider implements VariantProcessingStep.SupportsPedigree { public Provider() { From de3d5f45a4358029140141bf221fd5fcc5ea5a8e Mon Sep 17 00:00:00 2001 From: bbimber Date: Fri, 20 Dec 2024 06:19:22 -0800 Subject: [PATCH 16/59] Manually create ped file for KING --- .../run/variant/KingInferenceStep.java | 89 ++++++++++++++++--- 1 file changed, 76 insertions(+), 13 deletions(-) diff --git a/SequenceAnalysis/src/org/labkey/sequenceanalysis/run/variant/KingInferenceStep.java b/SequenceAnalysis/src/org/labkey/sequenceanalysis/run/variant/KingInferenceStep.java index 275b527a9..0fdb38865 100644 --- a/SequenceAnalysis/src/org/labkey/sequenceanalysis/run/variant/KingInferenceStep.java +++ b/SequenceAnalysis/src/org/labkey/sequenceanalysis/run/variant/KingInferenceStep.java @@ -11,6 +11,7 @@ import org.jetbrains.annotations.Nullable; import org.json.JSONObject; import org.labkey.api.pipeline.PipelineJobException; +import org.labkey.api.reader.Readers; import org.labkey.api.sequenceanalysis.SequenceAnalysisService; import org.labkey.api.sequenceanalysis.pipeline.AbstractVariantProcessingStepProvider; import org.labkey.api.sequenceanalysis.pipeline.PedigreeToolParameterDescriptor; @@ -24,12 +25,18 @@ import org.labkey.api.sequenceanalysis.run.AbstractCommandPipelineStep; import org.labkey.api.sequenceanalysis.run.AbstractCommandWrapper; import org.labkey.api.util.Compress; +import org.labkey.api.writer.PrintWriters; import org.labkey.sequenceanalysis.pipeline.ProcessVariantsHandler; +import java.io.BufferedReader; import java.io.File; import java.io.IOException; +import java.io.PrintWriter; import java.util.ArrayList; +import java.util.Arrays; +import java.util.HashMap; import java.util.List; +import java.util.Map; public class KingInferenceStep extends AbstractCommandPipelineStep implements VariantProcessingStep { @@ -121,19 +128,6 @@ public Output processVariants(File inputVCF, File outputDirectory, ReferenceGeno plinkArgs.add("--max-alleles"); plinkArgs.add("2"); - String demographicsProviderName = getProvider().getParameterByName(PedigreeToolParameterDescriptor.NAME).extractValue(getPipelineCtx().getJob(), getProvider(), getStepIdx()); - if (demographicsProviderName != null) - { - File pedFile = ProcessVariantsHandler.getPedigreeFile(getPipelineCtx().getSourceDirectory(true), demographicsProviderName); - if (!pedFile.exists()) - { - throw new PipelineJobException("Unable to find pedigree file: " + pedFile.getPath()); - } - - plinkArgs.add("--ped"); - plinkArgs.add(pedFile.getPath()); - } - plinkArgs.add("--out"); plinkArgs.add(plinkOut.getPath()); @@ -166,6 +160,23 @@ public Output processVariants(File inputVCF, File outputDirectory, ReferenceGeno kingArgs.add("--prefix"); kingArgs.add(SequenceAnalysisService.get().getUnzippedBaseName(inputVCF.getName())); + // Update the pedigree / fam file: + String demographicsProviderName = getProvider().getParameterByName(PedigreeToolParameterDescriptor.NAME).extractValue(getPipelineCtx().getJob(), getProvider(), getStepIdx()); + if (demographicsProviderName != null) + { + File pedFile = ProcessVariantsHandler.getPedigreeFile(getPipelineCtx().getSourceDirectory(true), demographicsProviderName); + if (!pedFile.exists()) + { + throw new PipelineJobException("Unable to find pedigree file: " + pedFile.getPath()); + } + + File kingFam = createFamFile(pedFile, new File(plinkOutBed.getParentFile(), "plink.fam"), kingArgs); + kingArgs.add("--ped"); + kingArgs.add(kingFam.getPath()); + + output.addIntermediateFile(kingFam); + } + if (threads != null) { kingArgs.add("--cpus"); @@ -202,6 +213,58 @@ public Output processVariants(File inputVCF, File outputDirectory, ReferenceGeno return output; } + private File createFamFile(File pedFile, File famFile, List kingArgs) throws PipelineJobException + { + File newFamFile = new File(famFile.getParentFile(), "king.fam"); + + Map pedMap = new HashMap<>(); + try (BufferedReader reader = Readers.getReader(pedFile)) + { + String line; + while ((line = reader.readLine()) != null) + { + String[] tokens = line.split(" "); + if (tokens.length != 6) + { + throw new PipelineJobException("Improper ped line length: " + tokens.length); + } + + pedMap.put(tokens[1], StringUtils.join(Arrays.asList("0", tokens[1], tokens[2], tokens[3], tokens[4], "-9"), "\t")); + } + } + catch (IOException e) + { + throw new PipelineJobException(e); + } + + try (BufferedReader reader = Readers.getReader(famFile);PrintWriter writer = PrintWriters.getPrintWriter(newFamFile)) + { + String line; + while ((line = reader.readLine()) != null) + { + String[] tokens = line.split("\t"); + if (tokens.length != 6) + { + throw new PipelineJobException("Improper ped line length: " + tokens.length); + } + + String newRow = pedMap.get(tokens[1]); + if (newRow == null) + { + throw new PipelineJobException("Unable to find pedigree entry for: " + tokens[1]); + } + + writer.println(newRow); + } + } + catch (IOException e) + { + throw new PipelineJobException(e); + } + + return newFamFile; + } + public static class KingWrapper extends AbstractCommandWrapper { public KingWrapper(@Nullable Logger logger) From 93508f2133b882a01c75d6005dc5804cec842d72 Mon Sep 17 00:00:00 2001 From: bbimber Date: Fri, 20 Dec 2024 09:35:32 -0800 Subject: [PATCH 17/59] Allow missing pedigree data --- .../run/variant/KingInferenceStep.java | 16 ++++++++++------ 1 file changed, 10 insertions(+), 6 deletions(-) diff --git a/SequenceAnalysis/src/org/labkey/sequenceanalysis/run/variant/KingInferenceStep.java b/SequenceAnalysis/src/org/labkey/sequenceanalysis/run/variant/KingInferenceStep.java index 0fdb38865..f67b412ae 100644 --- a/SequenceAnalysis/src/org/labkey/sequenceanalysis/run/variant/KingInferenceStep.java +++ b/SequenceAnalysis/src/org/labkey/sequenceanalysis/run/variant/KingInferenceStep.java @@ -10,6 +10,7 @@ import org.apache.logging.log4j.Logger; import org.jetbrains.annotations.Nullable; import org.json.JSONObject; +import org.labkey.api.collections.CaseInsensitiveHashMap; import org.labkey.api.pipeline.PipelineJobException; import org.labkey.api.reader.Readers; import org.labkey.api.sequenceanalysis.SequenceAnalysisService; @@ -170,7 +171,7 @@ public Output processVariants(File inputVCF, File outputDirectory, ReferenceGeno throw new PipelineJobException("Unable to find pedigree file: " + pedFile.getPath()); } - File kingFam = createFamFile(pedFile, new File(plinkOutBed.getParentFile(), "plink.fam"), kingArgs); + File kingFam = createFamFile(pedFile, new File(plinkOutBed.getParentFile(), "plink.fam")); kingArgs.add("--ped"); kingArgs.add(kingFam.getPath()); @@ -213,11 +214,11 @@ public Output processVariants(File inputVCF, File outputDirectory, ReferenceGeno return output; } - private File createFamFile(File pedFile, File famFile, List kingArgs) throws PipelineJobException + private File createFamFile(File pedFile, File famFile) throws PipelineJobException { File newFamFile = new File(famFile.getParentFile(), "king.fam"); - Map pedMap = new HashMap<>(); + Map pedMap = new CaseInsensitiveHashMap<>(); try (BufferedReader reader = Readers.getReader(pedFile)) { String line; @@ -251,10 +252,13 @@ private File createFamFile(File pedFile, File famFile, List kingArgs) th String newRow = pedMap.get(tokens[1]); if (newRow == null) { - throw new PipelineJobException("Unable to find pedigree entry for: " + tokens[1]); + getPipelineCtx().getLogger().warn("Unable to find pedigree entry for: " + tokens[1] + ", reusing original"); + writer.println(line); + } + else + { + writer.println(newRow); } - - writer.println(newRow); } } catch (IOException e) From aa223ba4168c6d1241087adb556eec069d8d8b63 Mon Sep 17 00:00:00 2001 From: bbimber Date: Sat, 21 Dec 2024 07:16:56 -0800 Subject: [PATCH 18/59] Update to nimble para parsing --- .../labkey/singlecell/pipeline/singlecell/AppendNimble.java | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/singlecell/src/org/labkey/singlecell/pipeline/singlecell/AppendNimble.java b/singlecell/src/org/labkey/singlecell/pipeline/singlecell/AppendNimble.java index b023676bb..2fade274d 100644 --- a/singlecell/src/org/labkey/singlecell/pipeline/singlecell/AppendNimble.java +++ b/singlecell/src/org/labkey/singlecell/pipeline/singlecell/AppendNimble.java @@ -1,5 +1,6 @@ package org.labkey.singlecell.pipeline.singlecell; +import org.apache.commons.lang3.StringUtils; import org.json.JSONArray; import org.json.JSONObject; import org.labkey.api.pipeline.PipelineJobException; @@ -101,10 +102,10 @@ protected Chunk createParamChunk(SequenceOutputHandler.JobContext ctx, List Date: Sat, 21 Dec 2024 17:02:36 -0800 Subject: [PATCH 19/59] Support decoupleR --- singlecell/resources/chunks/RunDecoupler.R | 12 ++++++ .../labkey/singlecell/SingleCellModule.java | 2 + .../pipeline/singlecell/RunDecoupler.java | 37 +++++++++++++++++++ 3 files changed, 51 insertions(+) create mode 100644 singlecell/resources/chunks/RunDecoupler.R create mode 100644 singlecell/src/org/labkey/singlecell/pipeline/singlecell/RunDecoupler.java diff --git a/singlecell/resources/chunks/RunDecoupler.R b/singlecell/resources/chunks/RunDecoupler.R new file mode 100644 index 000000000..4be28bbdb --- /dev/null +++ b/singlecell/resources/chunks/RunDecoupler.R @@ -0,0 +1,12 @@ +for (datasetId in names(seuratObjects)) { + printName(datasetId) + seuratObj <- readSeuratRDS(seuratObjects[[datasetId]]) + + seuratObj <- CellMembrane::RunDecoupler(seuratObj) + + saveData(seuratObj, datasetId) + + # Cleanup + rm(seuratObj) + gc() +} \ No newline at end of file diff --git a/singlecell/src/org/labkey/singlecell/SingleCellModule.java b/singlecell/src/org/labkey/singlecell/SingleCellModule.java index 922c93ee9..5588d1677 100644 --- a/singlecell/src/org/labkey/singlecell/SingleCellModule.java +++ b/singlecell/src/org/labkey/singlecell/SingleCellModule.java @@ -82,6 +82,7 @@ import org.labkey.singlecell.pipeline.singlecell.RunCelltypistCustomModel; import org.labkey.singlecell.pipeline.singlecell.RunConga; import org.labkey.singlecell.pipeline.singlecell.RunCsCore; +import org.labkey.singlecell.pipeline.singlecell.RunDecoupler; import org.labkey.singlecell.pipeline.singlecell.RunEscape; import org.labkey.singlecell.pipeline.singlecell.RunLDA; import org.labkey.singlecell.pipeline.singlecell.RunPCA; @@ -291,6 +292,7 @@ public static void registerPipelineSteps() SequencePipelineService.get().registerPipelineStep(new CustomGSEA.Provider()); SequencePipelineService.get().registerPipelineStep(new StudyMetadata.Provider()); SequencePipelineService.get().registerPipelineStep(new UpdateSeuratPrototype.Provider()); + SequencePipelineService.get().registerPipelineStep(new RunDecoupler.Provider()); SequenceAnalysisService.get().registerReadsetListener(new SingleCellReadsetListener()); } diff --git a/singlecell/src/org/labkey/singlecell/pipeline/singlecell/RunDecoupler.java b/singlecell/src/org/labkey/singlecell/pipeline/singlecell/RunDecoupler.java new file mode 100644 index 000000000..211d8d935 --- /dev/null +++ b/singlecell/src/org/labkey/singlecell/pipeline/singlecell/RunDecoupler.java @@ -0,0 +1,37 @@ +package org.labkey.singlecell.pipeline.singlecell; + +import org.labkey.api.sequenceanalysis.pipeline.AbstractPipelineStepProvider; +import org.labkey.api.sequenceanalysis.pipeline.PipelineContext; +import org.labkey.api.singlecell.pipeline.SingleCellStep; + +import java.util.Arrays; + +public class RunDecoupler extends AbstractCellMembraneStep +{ + public RunDecoupler(PipelineContext ctx, RunDecoupler.Provider provider) + { + super(provider, ctx); + } + + public static class Provider extends AbstractPipelineStepProvider + { + public Provider() + { + super("RunDecoupler", "Run decoupleR", "decoupleR", "This will run decoupleR to score transcription factor enrichment.", Arrays.asList( + + ), null, null); + } + + @Override + public RunDecoupler create(PipelineContext ctx) + { + return new RunDecoupler(ctx, this); + } + } + + @Override + public String getFileSuffix() + { + return "decoupler"; + } +} \ No newline at end of file From a901f3ee190e6a0ac8d49c4a289d9d4a11177150 Mon Sep 17 00:00:00 2001 From: bbimber Date: Tue, 24 Dec 2024 08:54:34 -0800 Subject: [PATCH 20/59] Fix typo in decoupleR --- .../sequenceanalysis/SequenceAnalysisMaintenanceTask.java | 8 ++++---- singlecell/resources/chunks/RunDecoupler.R | 2 +- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/SequenceAnalysis/src/org/labkey/sequenceanalysis/SequenceAnalysisMaintenanceTask.java b/SequenceAnalysis/src/org/labkey/sequenceanalysis/SequenceAnalysisMaintenanceTask.java index e6b593698..57ebce8cb 100644 --- a/SequenceAnalysis/src/org/labkey/sequenceanalysis/SequenceAnalysisMaintenanceTask.java +++ b/SequenceAnalysis/src/org/labkey/sequenceanalysis/SequenceAnalysisMaintenanceTask.java @@ -63,7 +63,7 @@ public SequenceAnalysisMaintenanceTask() @Override public String getDescription() { - return "Delete SequenceAnalysis Artifacts"; + return "SequenceAnalysis File Maintenance"; } @Override @@ -293,12 +293,12 @@ else if (sf.getFilePath() == null) private void processContainer(Container c, Logger log) throws IOException, PipelineJobException { + if (!c.isWorkbook()) + log.info("processing container: " + c.getPath()); + PipeRoot root = PipelineService.get().getPipelineRootSetting(c); if (root != null && !root.isCloudRoot()) { - if (!c.isWorkbook()) - log.info("processing container: " + c.getPath()); - //first sequences File sequenceDir = new File(root.getRootPath(), ".sequences"); TableInfo tableRefNtSequences = SequenceAnalysisSchema.getTable(SequenceAnalysisSchema.TABLE_REF_NT_SEQUENCES); diff --git a/singlecell/resources/chunks/RunDecoupler.R b/singlecell/resources/chunks/RunDecoupler.R index 4be28bbdb..6e4656164 100644 --- a/singlecell/resources/chunks/RunDecoupler.R +++ b/singlecell/resources/chunks/RunDecoupler.R @@ -2,7 +2,7 @@ for (datasetId in names(seuratObjects)) { printName(datasetId) seuratObj <- readSeuratRDS(seuratObjects[[datasetId]]) - seuratObj <- CellMembrane::RunDecoupler(seuratObj) + seuratObj <- CellMembrane::RunDecoupleR(seuratObj) saveData(seuratObj, datasetId) From ae174608b7959b69e49fabfdb81a9f68d687b5ff Mon Sep 17 00:00:00 2001 From: bbimber Date: Fri, 27 Dec 2024 13:49:11 -0800 Subject: [PATCH 21/59] Improve error message --- singlecell/src/org/labkey/singlecell/run/NimbleHelper.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/singlecell/src/org/labkey/singlecell/run/NimbleHelper.java b/singlecell/src/org/labkey/singlecell/run/NimbleHelper.java index fd6fef01d..8dffb88ae 100644 --- a/singlecell/src/org/labkey/singlecell/run/NimbleHelper.java +++ b/singlecell/src/org/labkey/singlecell/run/NimbleHelper.java @@ -339,7 +339,7 @@ private void updateNimbleConfigFile(File configFile, NimbleGenome genome) throws } catch (IOException e) { - throw new PipelineJobException(e); + throw new PipelineJobException("Unable to parse JSON: " + configFile.getPath(), e); } JSONObject config = json.getJSONObject(0); From 2f856341103040715eb2b5f0b2553bd35d31a238 Mon Sep 17 00:00:00 2001 From: bbimber Date: Thu, 2 Jan 2025 11:43:37 -0800 Subject: [PATCH 22/59] Download the uncompressed SRA files to the temp folder, not work dir --- .../pipeline/SequenceAlignmentTask.java | 54 ++++++++++++++++--- 1 file changed, 46 insertions(+), 8 deletions(-) diff --git a/SequenceAnalysis/src/org/labkey/sequenceanalysis/pipeline/SequenceAlignmentTask.java b/SequenceAnalysis/src/org/labkey/sequenceanalysis/pipeline/SequenceAlignmentTask.java index b254cb86d..7c35dfda5 100644 --- a/SequenceAnalysis/src/org/labkey/sequenceanalysis/pipeline/SequenceAlignmentTask.java +++ b/SequenceAnalysis/src/org/labkey/sequenceanalysis/pipeline/SequenceAlignmentTask.java @@ -1933,8 +1933,22 @@ else if (sraIDs.contains(rd.getSra_accession())) continue; } + File unzippedOutDir = new File(SequencePipelineService.get().getJavaTempDir(), "cachedReadDataGz"); + getTaskFileManagerImpl().addIntermediateFile(unzippedOutDir); + if (unzippedOutDir.exists()) + { + try + { + FileUtils.deleteDirectory(unzippedOutDir); + } + catch (IOException e) + { + throw new PipelineJobException(e); + } + } + File outDir = new File(getHelper().getWorkingDirectory(), "cachedReadData"); - getTaskFileManagerImpl().addDeferredIntermediateFile(outDir); + getTaskFileManagerImpl().addIntermediateFile(outDir); File doneFile = new File(outDir, rd.getSra_accession() + ".done"); sraIDs.add(rd.getSra_accession()); @@ -1949,18 +1963,42 @@ else if (sraIDs.contains(rd.getSra_accession())) } else { - if (!outDir.exists()) + try { + if (outDir.exists()) + { + getHelper().getLogger().debug("Deleting existing folder: " + outDir.getPath()); + FileUtils.deleteDirectory(outDir); + } + outDir.mkdirs(); - } - Pair downloaded = sra.downloadSra(rd.getSra_accession(), outDir, rd.isPairedEnd()); - rdi.setFile(downloaded.first, 1); - rdi.setFile(downloaded.second, 2); + Pair downloaded = sra.downloadSra(rd.getSra_accession(), unzippedOutDir, rd.isPairedEnd()); + File moved1 = new File(outDir, downloaded.first.getName()); + if (moved1.exists()) + { + moved1.delete(); + } + FileUtils.moveFile(downloaded.first, moved1); + rdi.setFile(moved1, 1); + + if (downloaded.second != null) + { + File moved2 = new File(outDir, downloaded.second.getName()); + if (moved2.exists()) + { + moved2.delete(); + } + FileUtils.moveFile(downloaded.second, moved2); + rdi.setFile(moved2, 2); + } - try - { Files.touch(doneFile); + + if (unzippedOutDir.exists()) + { + FileUtils.deleteDirectory(unzippedOutDir); + } } catch (IOException e) { From 7aade9e0a3c611379ceb5c71d394387db8218908 Mon Sep 17 00:00:00 2001 From: bbimber Date: Thu, 2 Jan 2025 17:03:54 -0800 Subject: [PATCH 23/59] Improve logging --- .../labkey/sequenceanalysis/pipeline/SequenceAlignmentTask.java | 1 + 1 file changed, 1 insertion(+) diff --git a/SequenceAnalysis/src/org/labkey/sequenceanalysis/pipeline/SequenceAlignmentTask.java b/SequenceAnalysis/src/org/labkey/sequenceanalysis/pipeline/SequenceAlignmentTask.java index 7c35dfda5..9c58d3bb8 100644 --- a/SequenceAnalysis/src/org/labkey/sequenceanalysis/pipeline/SequenceAlignmentTask.java +++ b/SequenceAnalysis/src/org/labkey/sequenceanalysis/pipeline/SequenceAlignmentTask.java @@ -1955,6 +1955,7 @@ else if (sraIDs.contains(rd.getSra_accession())) RestoreSraDataHandler.FastqDumpWrapper sra = new RestoreSraDataHandler.FastqDumpWrapper(getJob().getLogger()); if (doneFile.exists()) { + getPipelineJob().getLogger().debug("Re-using existing SRA download: " + rd.getSra_accession()); rdi.setFile(new File(outDir, rd.getSra_accession() + (rd.isPairedEnd() ? "_1" : "") + ".fastq.gz"), 1); if (rd.getFileId2() != null) { From 470319be51a86b11f1d8f49f99a6e346458bf234 Mon Sep 17 00:00:00 2001 From: bbimber Date: Thu, 2 Jan 2025 17:09:26 -0800 Subject: [PATCH 24/59] SRA download needs to use addDeferredIntermediateFile() --- .../labkey/sequenceanalysis/pipeline/SequenceAlignmentTask.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/SequenceAnalysis/src/org/labkey/sequenceanalysis/pipeline/SequenceAlignmentTask.java b/SequenceAnalysis/src/org/labkey/sequenceanalysis/pipeline/SequenceAlignmentTask.java index 9c58d3bb8..4d31296d1 100644 --- a/SequenceAnalysis/src/org/labkey/sequenceanalysis/pipeline/SequenceAlignmentTask.java +++ b/SequenceAnalysis/src/org/labkey/sequenceanalysis/pipeline/SequenceAlignmentTask.java @@ -1948,7 +1948,7 @@ else if (sraIDs.contains(rd.getSra_accession())) } File outDir = new File(getHelper().getWorkingDirectory(), "cachedReadData"); - getTaskFileManagerImpl().addIntermediateFile(outDir); + getTaskFileManagerImpl().addDeferredIntermediateFile(outDir); // NOTE: this must be deferred so it remains until the end File doneFile = new File(outDir, rd.getSra_accession() + ".done"); sraIDs.add(rd.getSra_accession()); From 696619e44d14d49a949f2edd4e08aabd6652ab36 Mon Sep 17 00:00:00 2001 From: bbimber Date: Sun, 5 Jan 2025 17:53:45 -0800 Subject: [PATCH 25/59] Bugfix to SRA download with multiple file pairs --- .../pipeline/SequenceAlignmentTask.java | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/SequenceAnalysis/src/org/labkey/sequenceanalysis/pipeline/SequenceAlignmentTask.java b/SequenceAnalysis/src/org/labkey/sequenceanalysis/pipeline/SequenceAlignmentTask.java index 4d31296d1..b8edd0a16 100644 --- a/SequenceAnalysis/src/org/labkey/sequenceanalysis/pipeline/SequenceAlignmentTask.java +++ b/SequenceAnalysis/src/org/labkey/sequenceanalysis/pipeline/SequenceAlignmentTask.java @@ -1968,8 +1968,14 @@ else if (sraIDs.contains(rd.getSra_accession())) { if (outDir.exists()) { - getHelper().getLogger().debug("Deleting existing folder: " + outDir.getPath()); - FileUtils.deleteDirectory(outDir); + getHelper().getLogger().debug("Inspecting existing folder: " + outDir.getPath()); + Arrays.stream(outDir.listFiles()).forEach(f -> { + if (f.getName().startsWith(rd.getSra_accession())) + { + getPipelineJob().getLogger().debug("Deleting existing file: " + f.getPath()); + f.delete(); + } + }); } outDir.mkdirs(); @@ -1980,6 +1986,8 @@ else if (sraIDs.contains(rd.getSra_accession())) { moved1.delete(); } + + getPipelineJob().getLogger().debug("Moving gzipped file to: " + moved1.getPath()); FileUtils.moveFile(downloaded.first, moved1); rdi.setFile(moved1, 1); @@ -1990,6 +1998,7 @@ else if (sraIDs.contains(rd.getSra_accession())) { moved2.delete(); } + getPipelineJob().getLogger().debug("Moving gzipped file to: " + moved2.getPath()); FileUtils.moveFile(downloaded.second, moved2); rdi.setFile(moved2, 2); } From 31629fd54944672714d0dc4e935e90fd934b22dc Mon Sep 17 00:00:00 2001 From: bbimber Date: Sun, 5 Jan 2025 17:54:25 -0800 Subject: [PATCH 26/59] Bugfix to SRA download with multiple file pairs --- .../sequenceanalysis/pipeline/SequenceAlignmentTask.java | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/SequenceAnalysis/src/org/labkey/sequenceanalysis/pipeline/SequenceAlignmentTask.java b/SequenceAnalysis/src/org/labkey/sequenceanalysis/pipeline/SequenceAlignmentTask.java index b8edd0a16..4f4a0c8f8 100644 --- a/SequenceAnalysis/src/org/labkey/sequenceanalysis/pipeline/SequenceAlignmentTask.java +++ b/SequenceAnalysis/src/org/labkey/sequenceanalysis/pipeline/SequenceAlignmentTask.java @@ -1977,8 +1977,10 @@ else if (sraIDs.contains(rd.getSra_accession())) } }); } - - outDir.mkdirs(); + else + { + outDir.mkdirs(); + } Pair downloaded = sra.downloadSra(rd.getSra_accession(), unzippedOutDir, rd.isPairedEnd()); File moved1 = new File(outDir, downloaded.first.getName()); From ebbf8c70b8ae764d3f1aaadda6e8026af5529f6a Mon Sep 17 00:00:00 2001 From: bbimber Date: Tue, 7 Jan 2025 12:30:44 -0800 Subject: [PATCH 27/59] Allow docker jobs to use a local container cache --- .../pipeline/SequencePipelineService.java | 2 + .../sequenceanalysis/run/DockerWrapper.java | 77 +++++++++++++++++-- .../SequencePipelineServiceImpl.java | 12 +++ 3 files changed, 86 insertions(+), 5 deletions(-) diff --git a/SequenceAnalysis/api-src/org/labkey/api/sequenceanalysis/pipeline/SequencePipelineService.java b/SequenceAnalysis/api-src/org/labkey/api/sequenceanalysis/pipeline/SequencePipelineService.java index dd19bba68..b57555a1c 100644 --- a/SequenceAnalysis/api-src/org/labkey/api/sequenceanalysis/pipeline/SequencePipelineService.java +++ b/SequenceAnalysis/api-src/org/labkey/api/sequenceanalysis/pipeline/SequencePipelineService.java @@ -100,6 +100,8 @@ static public void setInstance(SequencePipelineService instance) */ abstract public String getDockerCommand(); + abstract public boolean useLocalDockerContainerStorage(); + abstract public Collection getDockerVolumes(Container c); /** diff --git a/SequenceAnalysis/api-src/org/labkey/api/sequenceanalysis/run/DockerWrapper.java b/SequenceAnalysis/api-src/org/labkey/api/sequenceanalysis/run/DockerWrapper.java index a91cea5c7..d0a3fd26f 100644 --- a/SequenceAnalysis/api-src/org/labkey/api/sequenceanalysis/run/DockerWrapper.java +++ b/SequenceAnalysis/api-src/org/labkey/api/sequenceanalysis/run/DockerWrapper.java @@ -1,5 +1,6 @@ package org.labkey.api.sequenceanalysis.run; +import org.apache.commons.io.FileUtils; import org.apache.commons.lang3.StringUtils; import org.apache.logging.log4j.Logger; import org.jetbrains.annotations.Nullable; @@ -7,6 +8,7 @@ import org.labkey.api.sequenceanalysis.pipeline.PipelineContext; import org.labkey.api.sequenceanalysis.pipeline.PipelineOutputTracker; import org.labkey.api.sequenceanalysis.pipeline.SequencePipelineService; +import org.labkey.api.util.FileUtil; import org.labkey.api.writer.PrintWriters; import java.io.File; @@ -28,7 +30,8 @@ public class DockerWrapper extends AbstractCommandWrapper private final PipelineContext _ctx; private File _tmpDir = null; private String _entryPoint = null; - private boolean _runPrune = true; + private boolean _runPrune = false; + private boolean _useLocalContainerStorage; private String _alternateUserHome = null; private final Map _dockerEnvironment = new HashMap<>(); @@ -37,6 +40,8 @@ public DockerWrapper(String containerName, Logger log, PipelineContext ctx) super(log); _containerName = containerName; _ctx = ctx; + + _useLocalContainerStorage = SequencePipelineService.get().useLocalDockerContainerStorage(); } public void setAlternateUserHome(String alternateUserHome) @@ -59,6 +64,11 @@ public void setRunPrune(boolean runPrune) _runPrune = runPrune; } + public void setUseLocalContainerStorage(boolean useLocalContainerStorage) + { + _useLocalContainerStorage = useLocalContainerStorage; + } + public void executeWithDocker(List containerArgs, File workDir, PipelineOutputTracker tracker) throws PipelineJobException { executeWithDocker(containerArgs, workDir, tracker, null); @@ -79,14 +89,19 @@ public void executeWithDocker(List containerArgs, File workDir, Pipeline writer.println("set -e"); writer.println("DOCKER='" + SequencePipelineService.get().getDockerCommand() + "'"); - writer.println("$DOCKER pull " + _containerName); + writer.println("$DOCKER pull " + getLocalStorageArgs() + getEffectiveContainerName()); if (_runPrune) { - writer.println("$DOCKER image prune -f"); + writer.println("$DOCKER image prune " + getLocalStorageArgs() + "-f"); } writer.println("$DOCKER run --rm=true \\"); - writer.println("\t--group-add keep-groups \\"); + if (_useLocalContainerStorage) + { + getLogger().debug("Using local container storage: " + getLocalContainerDir().getPath()); + prepareLocalStorage(); + writer.println("\t" + getLocalStorageArgs() + "\\"); + } // NOTE: getDockerVolumes() should be refactored to remove the -v and this logic should be updated accordingly: File homeDir = new File(System.getProperty("user.home")); @@ -149,7 +164,7 @@ public void executeWithDocker(List containerArgs, File workDir, Pipeline { writer.println("\t-e " + key + "='" + _dockerEnvironment.get(key) + "' \\"); } - writer.println("\t" + _containerName + " \\"); + writer.println("\t" + getEffectiveContainerName() + " \\"); writer.println("\t" + dockerBashScript.getPath()); writer.println("DOCKER_EXIT_CODE=$?"); writer.println("echo 'Docker run exit code: '$DOCKER_EXIT_CODE"); @@ -170,6 +185,23 @@ public void executeWithDocker(List containerArgs, File workDir, Pipeline localBashScript.setExecutable(true); dockerBashScript.setExecutable(true); execute(Arrays.asList("/bin/bash", localBashScript.getPath())); + + if (_useLocalContainerStorage) + { + try + { + FileUtils.deleteDirectory(getLocalContainerDir()); + } + catch (IOException e) + { + throw new PipelineJobException(e); + } + } + } + + private String getEffectiveContainerName() + { + return _containerName; } public void addToDockerEnvironment(String key, String value) @@ -203,4 +235,39 @@ private Collection inspectInputFiles(Collection inputFiles) return Collections.emptySet(); } + + private File getLocalContainerDir() + { + return new File(SequencePipelineService.get().getJavaTempDir(), "containers"); + } + + private File prepareLocalStorage() throws PipelineJobException + { + try + { + if (getLocalContainerDir().exists()) + { + getLogger().debug("Deleting existing container dir: " + getLocalContainerDir()); + FileUtils.deleteDirectory(getLocalContainerDir()); + } + + FileUtil.createDirectory(getLocalContainerDir().toPath()); + + return getLocalContainerDir(); + } + catch (IOException e) + { + throw new PipelineJobException(e); + } + } + + private String getLocalStorageArgs() + { + if (!_useLocalContainerStorage) + { + return ""; + } + + return "--root=" + getLocalContainerDir().getPath() + " "; + } } diff --git a/SequenceAnalysis/src/org/labkey/sequenceanalysis/SequencePipelineServiceImpl.java b/SequenceAnalysis/src/org/labkey/sequenceanalysis/SequencePipelineServiceImpl.java index bfaaaba78..60f02bd15 100644 --- a/SequenceAnalysis/src/org/labkey/sequenceanalysis/SequencePipelineServiceImpl.java +++ b/SequenceAnalysis/src/org/labkey/sequenceanalysis/SequencePipelineServiceImpl.java @@ -459,6 +459,18 @@ public String getDockerCommand() return "docker"; } + @Override + public boolean useLocalDockerContainerStorage() + { + String value = PipelineJobService.get().getConfigProperties().getSoftwarePackagePath("USE_LOCAL_DOCKER_STORAGE"); + if (StringUtils.trimToNull(value) == null) + { + return false; + } + + return Boolean.parseBoolean(value); + } + @Override public Collection getDockerVolumes(Container c) { From d0ca84aa4de9e0aa1e7df2b6d5a28a4089f4f4ae Mon Sep 17 00:00:00 2001 From: bbimber Date: Tue, 7 Jan 2025 13:17:42 -0800 Subject: [PATCH 28/59] More active deletion of temp files during SRA download --- .../pipeline/SequenceAlignmentTask.java | 42 ++++++++++++++++++- .../labkey/singlecell/run/NimbleHelper.java | 15 +++++++ 2 files changed, 56 insertions(+), 1 deletion(-) diff --git a/SequenceAnalysis/src/org/labkey/sequenceanalysis/pipeline/SequenceAlignmentTask.java b/SequenceAnalysis/src/org/labkey/sequenceanalysis/pipeline/SequenceAlignmentTask.java index 4f4a0c8f8..0ec9d6ac6 100644 --- a/SequenceAnalysis/src/org/labkey/sequenceanalysis/pipeline/SequenceAlignmentTask.java +++ b/SequenceAnalysis/src/org/labkey/sequenceanalysis/pipeline/SequenceAlignmentTask.java @@ -702,6 +702,20 @@ private void alignSet(Readset rs, String basename, Map sraIDs = new HashSet<>(); for (ReadData rd : rs.getReadData()) { @@ -1947,7 +1987,7 @@ else if (sraIDs.contains(rd.getSra_accession())) } } - File outDir = new File(getHelper().getWorkingDirectory(), "cachedReadData"); + File outDir = getCachedReadDataDir(); getTaskFileManagerImpl().addDeferredIntermediateFile(outDir); // NOTE: this must be deferred so it remains until the end File doneFile = new File(outDir, rd.getSra_accession() + ".done"); diff --git a/singlecell/src/org/labkey/singlecell/run/NimbleHelper.java b/singlecell/src/org/labkey/singlecell/run/NimbleHelper.java index 8dffb88ae..d6cf466a1 100644 --- a/singlecell/src/org/labkey/singlecell/run/NimbleHelper.java +++ b/singlecell/src/org/labkey/singlecell/run/NimbleHelper.java @@ -460,6 +460,21 @@ private Map doAlignment(List genomes, List Date: Wed, 8 Jan 2025 15:05:12 -0800 Subject: [PATCH 29/59] Make DockerWrapper check whether pull needed before automatically pulling --- .../api/sequenceanalysis/run/DockerWrapper.java | 14 +++++++++++++- .../pipeline_code/extra_tools_install.sh | 13 +++++++++++++ 2 files changed, 26 insertions(+), 1 deletion(-) diff --git a/SequenceAnalysis/api-src/org/labkey/api/sequenceanalysis/run/DockerWrapper.java b/SequenceAnalysis/api-src/org/labkey/api/sequenceanalysis/run/DockerWrapper.java index d0a3fd26f..652a77a52 100644 --- a/SequenceAnalysis/api-src/org/labkey/api/sequenceanalysis/run/DockerWrapper.java +++ b/SequenceAnalysis/api-src/org/labkey/api/sequenceanalysis/run/DockerWrapper.java @@ -89,7 +89,19 @@ public void executeWithDocker(List containerArgs, File workDir, Pipeline writer.println("set -e"); writer.println("DOCKER='" + SequencePipelineService.get().getDockerCommand() + "'"); - writer.println("$DOCKER pull " + getLocalStorageArgs() + getEffectiveContainerName()); + + writer.println("IMAGE_EXISTS=`$DOCKER images -q \"" + getEffectiveContainerName() + "\" | wc -l`"); + writer.println("LOCAL=not_present"); + writer.println("if [ $IMAGE_EXISTS > 0 ];then"); + writer.println("\tLOCAL=`docker inspect --format='{{.Digest}}' " + getEffectiveContainerName() + "`"); + writer.println("fi"); + writer.println("LATEST=`regctl image digest --list " + getEffectiveContainerName() + "`"); + writer.println("if [ $LOCAL != $LATEST ];then"); + writer.println("\t$DOCKER pull " + getLocalStorageArgs() + getEffectiveContainerName()); + writer.println("else"); + writer.println("\techo 'Image up to date'"); + writer.println("fi"); + if (_runPrune) { writer.println("$DOCKER image prune " + getLocalStorageArgs() + "-f"); diff --git a/SequenceAnalysis/pipeline_code/extra_tools_install.sh b/SequenceAnalysis/pipeline_code/extra_tools_install.sh index 3ad144bc5..dae691917 100755 --- a/SequenceAnalysis/pipeline_code/extra_tools_install.sh +++ b/SequenceAnalysis/pipeline_code/extra_tools_install.sh @@ -261,3 +261,16 @@ else echo "Already installed" fi +if [[ ! -e ${LKTOOLS_DIR}/regctl || ! -z $FORCE_REINSTALL ]]; +then + echo "Cleaning up previous installs" + rm -Rf regctl* + rm -Rf $LKTOOLS_DIR/regctl* + + curl -L https://github.com/regclient/regclient/releases/latest/download/regctl-linux-amd64 > regctl + chmod 755 regctl + + install regctl $LKTOOLS_DIR/ +else + echo "Already installed" +fi From adc2889b54909e9b64f45aeb18b0767982f43546 Mon Sep 17 00:00:00 2001 From: bbimber Date: Wed, 8 Jan 2025 16:27:28 -0800 Subject: [PATCH 30/59] Bugfix to DockerWrapper --- .../org/labkey/api/sequenceanalysis/run/DockerWrapper.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/SequenceAnalysis/api-src/org/labkey/api/sequenceanalysis/run/DockerWrapper.java b/SequenceAnalysis/api-src/org/labkey/api/sequenceanalysis/run/DockerWrapper.java index 652a77a52..3a61bf728 100644 --- a/SequenceAnalysis/api-src/org/labkey/api/sequenceanalysis/run/DockerWrapper.java +++ b/SequenceAnalysis/api-src/org/labkey/api/sequenceanalysis/run/DockerWrapper.java @@ -92,7 +92,7 @@ public void executeWithDocker(List containerArgs, File workDir, Pipeline writer.println("IMAGE_EXISTS=`$DOCKER images -q \"" + getEffectiveContainerName() + "\" | wc -l`"); writer.println("LOCAL=not_present"); - writer.println("if [ $IMAGE_EXISTS > 0 ];then"); + writer.println("if [[ $IMAGE_EXISTS > 0 ]];then"); writer.println("\tLOCAL=`docker inspect --format='{{.Digest}}' " + getEffectiveContainerName() + "`"); writer.println("fi"); writer.println("LATEST=`regctl image digest --list " + getEffectiveContainerName() + "`"); From 333d25bf65426fb43cdfeb30e450c070f00c28af Mon Sep 17 00:00:00 2001 From: bbimber Date: Thu, 9 Jan 2025 05:46:19 -0800 Subject: [PATCH 31/59] Restore --group-add to docker --- .../org/labkey/api/sequenceanalysis/run/DockerWrapper.java | 1 + 1 file changed, 1 insertion(+) diff --git a/SequenceAnalysis/api-src/org/labkey/api/sequenceanalysis/run/DockerWrapper.java b/SequenceAnalysis/api-src/org/labkey/api/sequenceanalysis/run/DockerWrapper.java index 3a61bf728..16c526909 100644 --- a/SequenceAnalysis/api-src/org/labkey/api/sequenceanalysis/run/DockerWrapper.java +++ b/SequenceAnalysis/api-src/org/labkey/api/sequenceanalysis/run/DockerWrapper.java @@ -108,6 +108,7 @@ public void executeWithDocker(List containerArgs, File workDir, Pipeline } writer.println("$DOCKER run --rm=true \\"); + writer.println("\t--group-add keep-groups \\"); if (_useLocalContainerStorage) { getLogger().debug("Using local container storage: " + getLocalContainerDir().getPath()); From 892c793311aa227fa5d5681ec61d0f880147065d Mon Sep 17 00:00:00 2001 From: bbimber Date: Mon, 13 Jan 2025 12:45:51 -0800 Subject: [PATCH 32/59] Remove -S from fasterq-dump --- .../org/labkey/api/sequenceanalysis/run/DockerWrapper.java | 2 ++ .../org/labkey/sequenceanalysis/run/RestoreSraDataHandler.java | 2 +- 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/SequenceAnalysis/api-src/org/labkey/api/sequenceanalysis/run/DockerWrapper.java b/SequenceAnalysis/api-src/org/labkey/api/sequenceanalysis/run/DockerWrapper.java index 16c526909..dfc2bacc1 100644 --- a/SequenceAnalysis/api-src/org/labkey/api/sequenceanalysis/run/DockerWrapper.java +++ b/SequenceAnalysis/api-src/org/labkey/api/sequenceanalysis/run/DockerWrapper.java @@ -109,6 +109,8 @@ public void executeWithDocker(List containerArgs, File workDir, Pipeline writer.println("$DOCKER run --rm=true \\"); writer.println("\t--group-add keep-groups \\"); + writer.println("\t--transient-store \\"); + if (_useLocalContainerStorage) { getLogger().debug("Using local container storage: " + getLocalContainerDir().getPath()); diff --git a/SequenceAnalysis/src/org/labkey/sequenceanalysis/run/RestoreSraDataHandler.java b/SequenceAnalysis/src/org/labkey/sequenceanalysis/run/RestoreSraDataHandler.java index c70726ec6..1970b4fb7 100644 --- a/SequenceAnalysis/src/org/labkey/sequenceanalysis/run/RestoreSraDataHandler.java +++ b/SequenceAnalysis/src/org/labkey/sequenceanalysis/run/RestoreSraDataHandler.java @@ -464,7 +464,7 @@ public Pair downloadSra(String dataset, File outDir, boolean expectP List args = new ArrayList<>(); args.add(getExe().getPath()); - args.add("-S"); + // NOTE: we probably want the --split-3 behavior, which is the default for fasterq-dump args.add("--include-technical"); Integer threads = SequencePipelineService.get().getMaxThreads(getLogger()); From 806a1fb2698c21afe65f6c194d5618194a38d1c9 Mon Sep 17 00:00:00 2001 From: bbimber Date: Thu, 16 Jan 2025 10:46:49 -0800 Subject: [PATCH 33/59] Drop --include-technical --- .../sequenceanalysis/pipeline/SequenceAlignmentTask.java | 2 +- .../sequenceanalysis/run/RestoreSraDataHandler.java | 9 ++++++--- 2 files changed, 7 insertions(+), 4 deletions(-) diff --git a/SequenceAnalysis/src/org/labkey/sequenceanalysis/pipeline/SequenceAlignmentTask.java b/SequenceAnalysis/src/org/labkey/sequenceanalysis/pipeline/SequenceAlignmentTask.java index 0ec9d6ac6..19ea891a7 100644 --- a/SequenceAnalysis/src/org/labkey/sequenceanalysis/pipeline/SequenceAlignmentTask.java +++ b/SequenceAnalysis/src/org/labkey/sequenceanalysis/pipeline/SequenceAlignmentTask.java @@ -2022,7 +2022,7 @@ else if (sraIDs.contains(rd.getSra_accession())) outDir.mkdirs(); } - Pair downloaded = sra.downloadSra(rd.getSra_accession(), unzippedOutDir, rd.isPairedEnd()); + Pair downloaded = sra.downloadSra(rd.getSra_accession(), unzippedOutDir, rd.isPairedEnd(), false); File moved1 = new File(outDir, downloaded.first.getName()); if (moved1.exists()) { diff --git a/SequenceAnalysis/src/org/labkey/sequenceanalysis/run/RestoreSraDataHandler.java b/SequenceAnalysis/src/org/labkey/sequenceanalysis/run/RestoreSraDataHandler.java index 1970b4fb7..8779afda1 100644 --- a/SequenceAnalysis/src/org/labkey/sequenceanalysis/run/RestoreSraDataHandler.java +++ b/SequenceAnalysis/src/org/labkey/sequenceanalysis/run/RestoreSraDataHandler.java @@ -384,7 +384,7 @@ public void processFilesRemote(List readsets, JobContext ctx) throws Un File expectedFile2 = rd.getFileId2() == null ? null : ctx.getSequenceSupport().getCachedData(rd.getFileId2()); FastqDumpWrapper wrapper = new FastqDumpWrapper(ctx.getLogger()); - Pair files = wrapper.downloadSra(accession, ctx.getOutputDir(), rd.isPairedEnd()); + Pair files = wrapper.downloadSra(accession, ctx.getOutputDir(), rd.isPairedEnd(), false); long lines1 = SequenceUtil.getLineCount(files.first) / 4; ctx.getJob().getLogger().debug("Reads in " + files.first.getName() + ": " + lines1); @@ -459,13 +459,16 @@ public FastqDumpWrapper(@Nullable Logger logger) super(logger); } - public Pair downloadSra(String dataset, File outDir, boolean expectPaired) throws PipelineJobException + public Pair downloadSra(String dataset, File outDir, boolean expectPaired, boolean includeTechnical) throws PipelineJobException { List args = new ArrayList<>(); args.add(getExe().getPath()); // NOTE: we probably want the --split-3 behavior, which is the default for fasterq-dump - args.add("--include-technical"); + if (includeTechnical) + { + args.add("--include-technical"); + } Integer threads = SequencePipelineService.get().getMaxThreads(getLogger()); if (threads != null) From 569af02b701dfbbd027d17ae491aaf4b33c908db Mon Sep 17 00:00:00 2001 From: bbimber Date: Sat, 18 Jan 2025 14:47:09 -0800 Subject: [PATCH 34/59] Support minAllowableSingletRate in cell hashing --- .../org/labkey/api/singlecell/CellHashingService.java | 3 +++ .../src/org/labkey/singlecell/CellHashingServiceImpl.java | 6 ++++++ 2 files changed, 9 insertions(+) diff --git a/singlecell/api-src/org/labkey/api/singlecell/CellHashingService.java b/singlecell/api-src/org/labkey/api/singlecell/CellHashingService.java index a6f314dd3..9939f706c 100644 --- a/singlecell/api-src/org/labkey/api/singlecell/CellHashingService.java +++ b/singlecell/api-src/org/labkey/api/singlecell/CellHashingService.java @@ -104,6 +104,7 @@ public static class CellHashingParameters public Integer minCountPerCell = 5; public Double majorityConsensusThreshold = null; public Double minAllowableDoubletRateFilter = null; + public Double minAllowableSingletRate = null; public Double callerDisagreementThreshold = null; public List methods = CALLING_METHOD.getDefaultConsensusMethods(); //Default to just executing the set used for default consensus calls, rather than additional ones public List consensusMethods = null; @@ -127,6 +128,7 @@ public static CellHashingService.CellHashingParameters createFromStep(SequenceOu ret.minCountPerCell = step.getProvider().getParameterByName("minCountPerCell").extractValue(ctx.getJob(), step.getProvider(), step.getStepIdx(), Integer.class, 3); ret.majorityConsensusThreshold = step.getProvider().getParameterByName("majorityConsensusThreshold").extractValue(ctx.getJob(), step.getProvider(), step.getStepIdx(), Double.class, null); ret.minAllowableDoubletRateFilter = step.getProvider().getParameterByName("minAllowableDoubletRateFilter").extractValue(ctx.getJob(), step.getProvider(), step.getStepIdx(), Double.class, null); + ret.minAllowableSingletRate = step.getProvider().getParameterByName("minAllowableSingletRate").extractValue(ctx.getJob(), step.getProvider(), step.getStepIdx(), Double.class, null); ret.callerDisagreementThreshold = step.getProvider().getParameterByName("callerDisagreementThreshold").extractValue(ctx.getJob(), step.getProvider(), step.getStepIdx(), Double.class, null); ret.doTSNE = step.getProvider().getParameterByName("doTSNE").extractValue(ctx.getJob(), step.getProvider(), step.getStepIdx(), Boolean.class, false); ret.doNotAllowResume = step.getProvider().getParameterByName("doNotAllowResume").extractValue(ctx.getJob(), step.getProvider(), step.getStepIdx(), Boolean.class, false); @@ -171,6 +173,7 @@ public static CellHashingParameters createFromJson(BARCODE_TYPE type, File webse ret.minCountPerCell = params.optInt("minCountPerCell", 3); ret.majorityConsensusThreshold = params.get("majorityConsensusThreshold") == null ? null : params.getDouble("majorityConsensusThreshold"); ret.minAllowableDoubletRateFilter = params.get("minAllowableDoubletRateFilter") == null ? null : params.getDouble("minAllowableDoubletRateFilter"); + ret.minAllowableSingletRate = params.get("minAllowableSingletRate") == null ? null : params.getDouble("minAllowableSingletRate"); ret.callerDisagreementThreshold = params.get("callerDisagreementThreshold") == null ? null : params.getDouble("callerDisagreementThreshold"); ret.doTSNE = params.optBoolean("doTSNE", false); ret.doNotAllowResume = params.optBoolean("doNotAllowResume", false); diff --git a/singlecell/src/org/labkey/singlecell/CellHashingServiceImpl.java b/singlecell/src/org/labkey/singlecell/CellHashingServiceImpl.java index 81629915c..ef3f80482 100644 --- a/singlecell/src/org/labkey/singlecell/CellHashingServiceImpl.java +++ b/singlecell/src/org/labkey/singlecell/CellHashingServiceImpl.java @@ -979,6 +979,11 @@ public List getHashingCallingParams(boolean allowMethod put("maxValue", 1); put("decimalPrecision", 2); }}, 0.2), + ToolParameterDescriptor.create("minAllowableSingletRate", "Min Allowable Singlet Rate", "This is an optional threshold designed to automatically discard poorly performing callers. If a given algorithm returns a singlet rate below this level, it is discarded from the consensus.", "ldk-numberfield", new JSONObject(){{ + put("minValue", 0); + put("maxValue", 1); + put("decimalPrecision", 2); + }}, 0.05), ToolParameterDescriptor.create("skipNormalizationQc", "Skip Normalization QC", null, "checkbox", null, true), ToolParameterDescriptor.create("doNotAllowResume", "Do Not Allow Resume", "If checked, on resume the job will repeat hashing scoring, rather than allowing resume from a saved state", "checkbox", null, false), ToolParameterDescriptor.create("doTSNE", "Do tSNE", "If true, tSNE will be performed as part of QC", "checkbox", null, false), @@ -1275,6 +1280,7 @@ public File generateCellHashingCalls(File citeSeqCountOutDir, File outputDir, St ", majorityConsensusThreshold = " + (parameters.majorityConsensusThreshold == null ? "NULL" : parameters.majorityConsensusThreshold) + ", callerDisagreementThreshold = " + (parameters.callerDisagreementThreshold == null ? "NULL" : parameters.callerDisagreementThreshold) + (parameters.minAllowableDoubletRateFilter == null ? "" : ", minAllowableDoubletRateFilter = " + parameters.minAllowableDoubletRateFilter) + + (parameters.minAllowableSingletRate == null ? "" : ", minAllowableSingletRate = " + parameters.minAllowableSingletRate) + ", doTSNE = " + doTSNE + ")"); writer.println("print('Rmarkdown complete')"); From 846a5a225e48c4d5154e23c6a7601a821d40d97a Mon Sep 17 00:00:00 2001 From: bbimber Date: Mon, 20 Jan 2025 15:26:35 -0800 Subject: [PATCH 35/59] Fix bug in logging --- singlecell/resources/chunks/SaveData.R | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/singlecell/resources/chunks/SaveData.R b/singlecell/resources/chunks/SaveData.R index 307704ae8..46002cb05 100644 --- a/singlecell/resources/chunks/SaveData.R +++ b/singlecell/resources/chunks/SaveData.R @@ -11,8 +11,8 @@ if (length(errorMessages) > 0) { write(errorMessages, file = 'seuratErrors.txt') } -if (file.exists('savedSeuratObjects.txt')) { - print(paste0('Total lines in savedSeuratObjects.txt:', length(readLines('savedSeuratObjects.txt')))) +if (file.exists(trackerFile)) { + print(paste0('Total lines in ', trackerFile, ':', length(readLines(trackerFile)))) } else { - print('File does not exist: savedSeuratObjects.txt') + print(paste0('File does not exist: ', trackerFile)) } \ No newline at end of file From 5a5740f05df2336ec49707c0781c16b2780200ad Mon Sep 17 00:00:00 2001 From: bbimber Date: Mon, 20 Jan 2025 16:29:29 -0800 Subject: [PATCH 36/59] Include all algorithms in hashing, even if some are not in consensusMethods --- .../org/labkey/singlecell/CellHashingServiceImpl.java | 11 ++--------- 1 file changed, 2 insertions(+), 9 deletions(-) diff --git a/singlecell/src/org/labkey/singlecell/CellHashingServiceImpl.java b/singlecell/src/org/labkey/singlecell/CellHashingServiceImpl.java index ef3f80482..a291d1aa7 100644 --- a/singlecell/src/org/labkey/singlecell/CellHashingServiceImpl.java +++ b/singlecell/src/org/labkey/singlecell/CellHashingServiceImpl.java @@ -1238,15 +1238,8 @@ public File generateCellHashingCalls(File citeSeqCountOutDir, File outputDir, St Set allowableBarcodes = parameters.getAllowableBarcodeNames(); String allowableBarcodeParam = allowableBarcodes != null ? "c('" + StringUtils.join(allowableBarcodes, "','") + "')" : "NULL"; - List methodNames = parameters.methods.stream().filter(m -> { - if (totalCellBarcodes < m.getMinCells()) - { - ctx.getLogger().debug("Dropping method due to insufficient cells: " + m.name()); - return false; - } - - return true; - }).map(CALLING_METHOD::getLabel).distinct().toList(); + // NOTE: we do not need to filter total methods on min cells: + List methodNames = parameters.methods.stream().map(CALLING_METHOD::getLabel).distinct().toList(); List consensusMethodNames = parameters.consensusMethods == null ? Collections.emptyList() : parameters.consensusMethods.stream().filter(m -> { if (totalCellBarcodes < m.getMinCells()) From 22034c2b4ea96e7a54280893d0c27af0350cbf65 Mon Sep 17 00:00:00 2001 From: bbimber Date: Thu, 23 Jan 2025 13:25:14 -0800 Subject: [PATCH 37/59] Allow nimble append to support queryDatabaseForLineageUpdates --- singlecell/resources/chunks/AppendNimble.R | 5 +++-- .../web/singlecell/panel/NimbleAppendPanel.js | 16 +++++++++++--- .../pipeline/singlecell/AppendNimble.java | 21 ++++++++++++++++++- 3 files changed, 36 insertions(+), 6 deletions(-) diff --git a/singlecell/resources/chunks/AppendNimble.R b/singlecell/resources/chunks/AppendNimble.R index 7a913d757..97023dea0 100644 --- a/singlecell/resources/chunks/AppendNimble.R +++ b/singlecell/resources/chunks/AppendNimble.R @@ -17,8 +17,9 @@ for (datasetId in names(seuratObjects)) { seuratObj <- readSeuratRDS(seuratObjects[[datasetId]]) for (genomeId in names(nimbleGenomes)) { - maxAmbiguityAllowed <- !nimbleGenomeAmbiguousPreference[[genomeId]] - seuratObj <- Rdiscvr::DownloadAndAppendNimble(seuratObject = seuratObj, allowableGenomes = genomeId, ensureSamplesShareAllGenomes = ensureSamplesShareAllGenomes, targetAssayName = nimbleGenomes[[genomeId]], enforceUniqueFeatureNames = TRUE, maxAmbiguityAllowed = maxAmbiguityAllowed, maxLibrarySizeRatio = maxLibrarySizeRatio) + maxAmbiguityAllowed <- nimbleGenomeAmbiguousPreference[[genomeId]] + queryDatabaseForLineageUpdates <- queryDatabaseForLineageUpdatesPreference[[genomeId]] + seuratObj <- Rdiscvr::DownloadAndAppendNimble(seuratObject = seuratObj, allowableGenomes = genomeId, ensureSamplesShareAllGenomes = ensureSamplesShareAllGenomes, targetAssayName = nimbleGenomes[[genomeId]], enforceUniqueFeatureNames = TRUE, maxAmbiguityAllowed = maxAmbiguityAllowed, maxLibrarySizeRatio = maxLibrarySizeRatio, queryDatabaseForLineageUpdates = queryDatabaseForLineageUpdates) } saveData(seuratObj, datasetId) diff --git a/singlecell/resources/web/singlecell/panel/NimbleAppendPanel.js b/singlecell/resources/web/singlecell/panel/NimbleAppendPanel.js index f01e307ce..7712dc1c0 100644 --- a/singlecell/resources/web/singlecell/panel/NimbleAppendPanel.js +++ b/singlecell/resources/web/singlecell/panel/NimbleAppendPanel.js @@ -40,7 +40,7 @@ Ext4.define('SingleCell.panel.NimbleAppendPanel', { },LABKEY.ext4.GRIDBUTTONS.DELETERECORD()], store: { type: 'array', - fields: ['genomeId', 'targetAssay','maxAmbiguityAllowed'] + fields: ['genomeId', 'targetAssay','maxAmbiguityAllowed', 'queryDatabaseForLineageUpdates'] }, columns: [{ dataIndex: 'genomeId', @@ -77,6 +77,15 @@ Ext4.define('SingleCell.panel.NimbleAppendPanel', { allowBlank: true, minValue: 0 } + },{ + dataIndex: 'queryDatabaseForLineageUpdates', + width: 175, + header: 'Check for Lineage Updates', + editor: { + xtype: 'checkbox', + allowBlank: true, + value: false + } }] }] }); @@ -87,7 +96,7 @@ Ext4.define('SingleCell.panel.NimbleAppendPanel', { getValue: function(){ var ret = []; this.down('ldk-gridpanel').store.each(function(r, i) { - ret.push([r.data.genomeId, r.data.targetAssay, r.data.maxAmbiguityAllowed ?? '']); + ret.push([r.data.genomeId, r.data.targetAssay, r.data.maxAmbiguityAllowed ?? '', !!r.data.queryDatabaseForLineageUpdates]); }, this); return Ext4.isEmpty(ret) ? null : JSON.stringify(ret); @@ -123,7 +132,8 @@ Ext4.define('SingleCell.panel.NimbleAppendPanel', { var rec = grid.store.createModel({ genomeId: row[0], targetAssay: row[1], - maxAmbiguityAllowed: row[2] + maxAmbiguityAllowed: row[2], + queryDatabaseForLineageUpdates: !!row[3] }); grid.store.add(rec); }, this); diff --git a/singlecell/src/org/labkey/singlecell/pipeline/singlecell/AppendNimble.java b/singlecell/src/org/labkey/singlecell/pipeline/singlecell/AppendNimble.java index 2fade274d..67b24280a 100644 --- a/singlecell/src/org/labkey/singlecell/pipeline/singlecell/AppendNimble.java +++ b/singlecell/src/org/labkey/singlecell/pipeline/singlecell/AppendNimble.java @@ -96,7 +96,7 @@ protected Chunk createParamChunk(SequenceOutputHandler.JobContext ctx, List Date: Thu, 23 Jan 2025 18:34:30 -0800 Subject: [PATCH 38/59] Bugfix to AppendNimble --- .../org/labkey/singlecell/pipeline/singlecell/AppendNimble.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/singlecell/src/org/labkey/singlecell/pipeline/singlecell/AppendNimble.java b/singlecell/src/org/labkey/singlecell/pipeline/singlecell/AppendNimble.java index 67b24280a..1da9a91b1 100644 --- a/singlecell/src/org/labkey/singlecell/pipeline/singlecell/AppendNimble.java +++ b/singlecell/src/org/labkey/singlecell/pipeline/singlecell/AppendNimble.java @@ -77,7 +77,7 @@ protected Chunk createParamChunk(SequenceOutputHandler.JobContext ctx, List Date: Thu, 23 Jan 2025 20:42:35 -0800 Subject: [PATCH 39/59] Bugfix to AppendNimble --- .../org/labkey/singlecell/pipeline/singlecell/AppendNimble.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/singlecell/src/org/labkey/singlecell/pipeline/singlecell/AppendNimble.java b/singlecell/src/org/labkey/singlecell/pipeline/singlecell/AppendNimble.java index 1da9a91b1..087abeac6 100644 --- a/singlecell/src/org/labkey/singlecell/pipeline/singlecell/AppendNimble.java +++ b/singlecell/src/org/labkey/singlecell/pipeline/singlecell/AppendNimble.java @@ -79,7 +79,7 @@ protected Chunk createParamChunk(SequenceOutputHandler.JobContext ctx, List Date: Sat, 25 Jan 2025 16:12:58 -0800 Subject: [PATCH 40/59] Widen NimbleAppendPanel --- .../resources/web/singlecell/panel/NimbleAppendPanel.js | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/singlecell/resources/web/singlecell/panel/NimbleAppendPanel.js b/singlecell/resources/web/singlecell/panel/NimbleAppendPanel.js index 7712dc1c0..1f748636d 100644 --- a/singlecell/resources/web/singlecell/panel/NimbleAppendPanel.js +++ b/singlecell/resources/web/singlecell/panel/NimbleAppendPanel.js @@ -10,7 +10,7 @@ Ext4.define('SingleCell.panel.NimbleAppendPanel', { initComponent: function(){ Ext4.apply(this, { style: 'padding: 10px;margins: 5px;', - minWidth: 850, + minWidth: 950, border: true, items: [{ html: 'This step will query nimble results for the selected genome(s). It will then append these results to the seurat object on the target assay.', @@ -20,7 +20,7 @@ Ext4.define('SingleCell.panel.NimbleAppendPanel', { },{ xtype: 'ldk-gridpanel', clicksToEdit: 1, - width: 775, + width: 875, tbar: [{ text: 'Add', handler: function(btn){ From 46f5727e5815888b3b45c4452bce4824a616d8a5 Mon Sep 17 00:00:00 2001 From: bbimber Date: Sun, 26 Jan 2025 12:53:31 -0800 Subject: [PATCH 41/59] Support additional params for nimble append --- singlecell/resources/chunks/AppendNimble.R | 3 ++- .../web/singlecell/panel/NimbleAppendPanel.js | 21 +++++++++++---- .../pipeline/singlecell/AppendNimble.java | 27 ++++++++++++++++--- 3 files changed, 41 insertions(+), 10 deletions(-) diff --git a/singlecell/resources/chunks/AppendNimble.R b/singlecell/resources/chunks/AppendNimble.R index 97023dea0..d24ac73bf 100644 --- a/singlecell/resources/chunks/AppendNimble.R +++ b/singlecell/resources/chunks/AppendNimble.R @@ -19,7 +19,8 @@ for (datasetId in names(seuratObjects)) { for (genomeId in names(nimbleGenomes)) { maxAmbiguityAllowed <- nimbleGenomeAmbiguousPreference[[genomeId]] queryDatabaseForLineageUpdates <- queryDatabaseForLineageUpdatesPreference[[genomeId]] - seuratObj <- Rdiscvr::DownloadAndAppendNimble(seuratObject = seuratObj, allowableGenomes = genomeId, ensureSamplesShareAllGenomes = ensureSamplesShareAllGenomes, targetAssayName = nimbleGenomes[[genomeId]], enforceUniqueFeatureNames = TRUE, maxAmbiguityAllowed = maxAmbiguityAllowed, maxLibrarySizeRatio = maxLibrarySizeRatio, queryDatabaseForLineageUpdates = queryDatabaseForLineageUpdates) + replaceExistingAssayData <- replaceExistingAssayDataByGenome[[genomeId]] + seuratObj <- Rdiscvr::DownloadAndAppendNimble(seuratObject = seuratObj, allowableGenomes = genomeId, ensureSamplesShareAllGenomes = ensureSamplesShareAllGenomes, targetAssayName = nimbleGenomes[[genomeId]], enforceUniqueFeatureNames = TRUE, maxAmbiguityAllowed = maxAmbiguityAllowed, maxLibrarySizeRatio = maxLibrarySizeRatio, queryDatabaseForLineageUpdates = queryDatabaseForLineageUpdates, replaceExistingAssayData = replaceExistingAssayData) } saveData(seuratObj, datasetId) diff --git a/singlecell/resources/web/singlecell/panel/NimbleAppendPanel.js b/singlecell/resources/web/singlecell/panel/NimbleAppendPanel.js index 1f748636d..9f346ff14 100644 --- a/singlecell/resources/web/singlecell/panel/NimbleAppendPanel.js +++ b/singlecell/resources/web/singlecell/panel/NimbleAppendPanel.js @@ -10,7 +10,7 @@ Ext4.define('SingleCell.panel.NimbleAppendPanel', { initComponent: function(){ Ext4.apply(this, { style: 'padding: 10px;margins: 5px;', - minWidth: 950, + minWidth: 1025, border: true, items: [{ html: 'This step will query nimble results for the selected genome(s). It will then append these results to the seurat object on the target assay.', @@ -20,7 +20,7 @@ Ext4.define('SingleCell.panel.NimbleAppendPanel', { },{ xtype: 'ldk-gridpanel', clicksToEdit: 1, - width: 875, + width: 1000, tbar: [{ text: 'Add', handler: function(btn){ @@ -40,7 +40,7 @@ Ext4.define('SingleCell.panel.NimbleAppendPanel', { },LABKEY.ext4.GRIDBUTTONS.DELETERECORD()], store: { type: 'array', - fields: ['genomeId', 'targetAssay','maxAmbiguityAllowed', 'queryDatabaseForLineageUpdates'] + fields: ['genomeId', 'targetAssay','maxAmbiguityAllowed', 'queryDatabaseForLineageUpdates', 'replaceExistingAssayData'] }, columns: [{ dataIndex: 'genomeId', @@ -86,6 +86,16 @@ Ext4.define('SingleCell.panel.NimbleAppendPanel', { allowBlank: true, value: false } + },{ + dataIndex: 'replaceExistingAssayData', + width: 150, + header: 'Replace Existing Data?', + editor: { + xtype: 'checkbox', + allowBlank: true, + value: true + } + }] }] }); @@ -96,7 +106,7 @@ Ext4.define('SingleCell.panel.NimbleAppendPanel', { getValue: function(){ var ret = []; this.down('ldk-gridpanel').store.each(function(r, i) { - ret.push([r.data.genomeId, r.data.targetAssay, r.data.maxAmbiguityAllowed ?? '', !!r.data.queryDatabaseForLineageUpdates]); + ret.push([r.data.genomeId, r.data.targetAssay, r.data.maxAmbiguityAllowed ?? '', !!r.data.queryDatabaseForLineageUpdates], !!r.data.replaceExistingAssayData); }, this); return Ext4.isEmpty(ret) ? null : JSON.stringify(ret); @@ -133,7 +143,8 @@ Ext4.define('SingleCell.panel.NimbleAppendPanel', { genomeId: row[0], targetAssay: row[1], maxAmbiguityAllowed: row[2], - queryDatabaseForLineageUpdates: !!row[3] + queryDatabaseForLineageUpdates: !!row[3], + replaceExistingAssayData: !!row[4] }); grid.store.add(rec); }, this); diff --git a/singlecell/src/org/labkey/singlecell/pipeline/singlecell/AppendNimble.java b/singlecell/src/org/labkey/singlecell/pipeline/singlecell/AppendNimble.java index 087abeac6..093c56e44 100644 --- a/singlecell/src/org/labkey/singlecell/pipeline/singlecell/AppendNimble.java +++ b/singlecell/src/org/labkey/singlecell/pipeline/singlecell/AppendNimble.java @@ -77,7 +77,7 @@ protected Chunk createParamChunk(SequenceOutputHandler.JobContext ctx, List Date: Mon, 27 Jan 2025 09:58:30 -0800 Subject: [PATCH 42/59] Bugfix to nimble append --- .../labkey/singlecell/pipeline/singlecell/AppendNimble.java | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/singlecell/src/org/labkey/singlecell/pipeline/singlecell/AppendNimble.java b/singlecell/src/org/labkey/singlecell/pipeline/singlecell/AppendNimble.java index 093c56e44..106464839 100644 --- a/singlecell/src/org/labkey/singlecell/pipeline/singlecell/AppendNimble.java +++ b/singlecell/src/org/labkey/singlecell/pipeline/singlecell/AppendNimble.java @@ -124,7 +124,7 @@ protected Chunk createParamChunk(SequenceOutputHandler.JobContext ctx, List Date: Mon, 27 Jan 2025 11:25:32 -0800 Subject: [PATCH 43/59] Allow nimble plot to run and not produce an HTML file --- .../org/labkey/singlecell/run/NimbleHelper.java | 14 +++----------- 1 file changed, 3 insertions(+), 11 deletions(-) diff --git a/singlecell/src/org/labkey/singlecell/run/NimbleHelper.java b/singlecell/src/org/labkey/singlecell/run/NimbleHelper.java index d6cf466a1..b48c5cb14 100644 --- a/singlecell/src/org/labkey/singlecell/run/NimbleHelper.java +++ b/singlecell/src/org/labkey/singlecell/run/NimbleHelper.java @@ -292,17 +292,9 @@ public void doNimbleAlign(File bam, PipelineStepOutput output, Readset rs, Strin output.addSequenceOutput(results, basename + ": nimble align", "Nimble Results", rs.getRowId(), null, genome.getGenomeId(), description); + // NOTE: situations like zero alignments would result in no report being created. Rely on the code in doAlign to verify proper execution of nimble File reportHtml = getReportHtmlFileFromResults(results); - if (!reportHtml.exists()) - { - if (SequencePipelineService.get().hasMinLineCount(results, 2)) - { - long lineCount = SequencePipelineService.get().getLineCount(results); - _ctx.getLogger().debug("Found {} lines in file {}", lineCount, results.getPath()); - throw new PipelineJobException("Unable to find file: " + reportHtml.getPath()); - } - } - else + if (reportHtml.exists()) { output.addSequenceOutput(reportHtml, basename + ": nimble report", NIMBLE_REPORT_CATEGORY, rs.getRowId(), null, genome.getGenomeId(), description); } @@ -554,7 +546,7 @@ public static File runNimbleReport(File alignResultsGz, int genomeId, PipelineSt if (!plotResultsHtml.exists()) { - throw new PipelineJobException("Missing file: " + plotResultsHtml.getPath()); + ctx.getLogger().info("No report HTML generated, but nimble plot had exit code 0"); } } else From 41739623f97f07ba2de8e2e49fd64757c91a446a Mon Sep 17 00:00:00 2001 From: bbimber Date: Tue, 28 Jan 2025 12:34:43 -0800 Subject: [PATCH 44/59] Support additional studies --- .../pipeline_code/sequence_tools_install.sh | 18 +++++++++--------- singlecell/resources/chunks/StudyMetadata.R | 4 ++++ .../pipeline/singlecell/StudyMetadata.java | 2 +- 3 files changed, 14 insertions(+), 10 deletions(-) diff --git a/SequenceAnalysis/pipeline_code/sequence_tools_install.sh b/SequenceAnalysis/pipeline_code/sequence_tools_install.sh index d3d768a04..3e31985f4 100755 --- a/SequenceAnalysis/pipeline_code/sequence_tools_install.sh +++ b/SequenceAnalysis/pipeline_code/sequence_tools_install.sh @@ -985,15 +985,15 @@ then rm -Rf $LKTOOLS_DIR/blast_formatter rm -Rf $LKTOOLS_DIR/makeblastdb - wget $WGET_OPTS ftp://ftp.ncbi.nlm.nih.gov/blast/executables/blast+/2.2.31/ncbi-blast-2.2.31+-x64-linux.tar.gz - gunzip ncbi-blast-2.2.31+-x64-linux.tar.gz - tar -xf ncbi-blast-2.2.31+-x64-linux.tar - gzip ncbi-blast-2.2.31+-x64-linux.tar - - install ./ncbi-blast-2.2.31+/bin/blastn $LKTOOLS_DIR/blastn - install ./ncbi-blast-2.2.31+/bin/blast_formatter $LKTOOLS_DIR/blast_formatter - install ./ncbi-blast-2.2.31+/bin/makeblastdb $LKTOOLS_DIR/makeblastdb - install ./ncbi-blast-2.2.31+/bin/makembindex $LKTOOLS_DIR/makembindex + wget $WGET_OPTS ftp://ftp.ncbi.nlm.nih.gov/blast/executables/blast+/2.16.0/ncbi-blast-2.16.0+-x64-linux.tar.gz + gunzip ncbi-blast-2.16.0+-x64-linux.tar.gz + tar -xf ncbi-blast-2.16.0+-x64-linux.tar + gzip ncbi-blast-2.16.0+-x64-linux.tar + + install ./ncbi-blast-2.16.0+/bin/blastn $LKTOOLS_DIR/blastn + install ./ncbi-blast-2.16.0+/bin/blast_formatter $LKTOOLS_DIR/blast_formatter + install ./ncbi-blast-2.16.0+/bin/makeblastdb $LKTOOLS_DIR/makeblastdb + install ./ncbi-blast-2.16.0+/bin/makembindex $LKTOOLS_DIR/makembindex else echo "Already installed" fi diff --git a/singlecell/resources/chunks/StudyMetadata.R b/singlecell/resources/chunks/StudyMetadata.R index fd8d4e931..b64a2c66e 100644 --- a/singlecell/resources/chunks/StudyMetadata.R +++ b/singlecell/resources/chunks/StudyMetadata.R @@ -21,6 +21,10 @@ for (datasetId in names(seuratObjects)) { seuratObj <- Rdiscvr::ApplyPC531Metadata(seuratObj, errorIfUnknownIdsFound = errorIfUnknownIdsFound) } else if (studyName == 'AcuteNx') { seuratObj <- Rdiscvr::ApplyAcuteNxMetadata(seuratObj, errorIfUnknownIdsFound = errorIfUnknownIdsFound) + } else if (studyName == 'EC') { + seuratObj <- Rdiscvr::ApplyEC_Metadata(seuratObj, errorIfUnknownIdsFound = errorIfUnknownIdsFound) + } else if (studyName == 'PPG_Stims') { + seuratObj <- Rdiscvr::ApplyPPG_Stim_Metadata(seuratObj, errorIfUnknownIdsFound = errorIfUnknownIdsFound) } else { stop(paste0('Unknown study: ', studyName)) } diff --git a/singlecell/src/org/labkey/singlecell/pipeline/singlecell/StudyMetadata.java b/singlecell/src/org/labkey/singlecell/pipeline/singlecell/StudyMetadata.java index 666250a2a..27050d587 100644 --- a/singlecell/src/org/labkey/singlecell/pipeline/singlecell/StudyMetadata.java +++ b/singlecell/src/org/labkey/singlecell/pipeline/singlecell/StudyMetadata.java @@ -24,7 +24,7 @@ public Provider() {{ put("multiSelect", false); put("allowBlank", false); - put("storeValues", "PC475;PC531;TB;Malaria;AcuteNx"); + put("storeValues", "PC475;PC531;TB;Malaria;AcuteNx;EC;PPG_Stims"); put("delimiter", ";"); }}, null, null, false, false), SeuratToolParameter.create("errorIfUnknownIdsFound", "Error If Unknown Ids Found", "If true, the job will fail if the seurat object contains ID not present in the metadata", "checkbox", null, true) From 00da0725faf89b5c01c3c936de9ede5f32c610bb Mon Sep 17 00:00:00 2001 From: bbimber Date: Tue, 28 Jan 2025 19:25:25 -0800 Subject: [PATCH 45/59] Support nimble append --- .../chunks/PerformDefaultNimbleAppend.R | 26 ++++++++++++++ .../labkey/singlecell/SingleCellModule.java | 2 ++ .../PerformDefaultNimbleAppend.java | 34 +++++++++++++++++++ 3 files changed, 62 insertions(+) create mode 100644 singlecell/resources/chunks/PerformDefaultNimbleAppend.R create mode 100644 singlecell/src/org/labkey/singlecell/pipeline/singlecell/PerformDefaultNimbleAppend.java diff --git a/singlecell/resources/chunks/PerformDefaultNimbleAppend.R b/singlecell/resources/chunks/PerformDefaultNimbleAppend.R new file mode 100644 index 000000000..90e0b659b --- /dev/null +++ b/singlecell/resources/chunks/PerformDefaultNimbleAppend.R @@ -0,0 +1,26 @@ +netRc <- paste0(Sys.getenv('USER_HOME'), '/.netrc') +if (!file.exists(netRc)) { + print(list.files(Sys.getenv('USER_HOME'))) + stop(paste0('Unable to find file: ', netRc)) +} + +invisible(Rlabkey::labkey.setCurlOptions(NETRC_FILE = netRc)) +Rdiscvr::SetLabKeyDefaults(baseUrl = serverBaseUrl, defaultFolder = defaultLabKeyFolder) + +# NOTE: this file is created by DownloadAndAppendNimble if there was an error. It might exist if a job failed and then was restarted +if (file.exists('debug.nimble.txt.gz')) { + unlink('debug.nimble.txt.gz') +} + +for (datasetId in names(seuratObjects)) { + printName(datasetId) + seuratObj <- readSeuratRDS(seuratObjects[[datasetId]]) + + Rdiscvr::PerformDefaultNimbleAppend(seuratObj) + + saveData(seuratObj, datasetId) + + # Cleanup + rm(seuratObj) + gc() +} \ No newline at end of file diff --git a/singlecell/src/org/labkey/singlecell/SingleCellModule.java b/singlecell/src/org/labkey/singlecell/SingleCellModule.java index 5588d1677..724b15efa 100644 --- a/singlecell/src/org/labkey/singlecell/SingleCellModule.java +++ b/singlecell/src/org/labkey/singlecell/SingleCellModule.java @@ -71,6 +71,7 @@ import org.labkey.singlecell.pipeline.singlecell.IntegrateData; import org.labkey.singlecell.pipeline.singlecell.MergeSeurat; import org.labkey.singlecell.pipeline.singlecell.NormalizeAndScale; +import org.labkey.singlecell.pipeline.singlecell.PerformDefaultNimbleAppend; import org.labkey.singlecell.pipeline.singlecell.PhenotypePlots; import org.labkey.singlecell.pipeline.singlecell.PlotAssayFeatures; import org.labkey.singlecell.pipeline.singlecell.PlotAverageCiteSeqCounts; @@ -293,6 +294,7 @@ public static void registerPipelineSteps() SequencePipelineService.get().registerPipelineStep(new StudyMetadata.Provider()); SequencePipelineService.get().registerPipelineStep(new UpdateSeuratPrototype.Provider()); SequencePipelineService.get().registerPipelineStep(new RunDecoupler.Provider()); + SequencePipelineService.get().registerPipelineStep(new PerformDefaultNimbleAppend.Provider()); SequenceAnalysisService.get().registerReadsetListener(new SingleCellReadsetListener()); } diff --git a/singlecell/src/org/labkey/singlecell/pipeline/singlecell/PerformDefaultNimbleAppend.java b/singlecell/src/org/labkey/singlecell/pipeline/singlecell/PerformDefaultNimbleAppend.java new file mode 100644 index 000000000..f9a5ca8b7 --- /dev/null +++ b/singlecell/src/org/labkey/singlecell/pipeline/singlecell/PerformDefaultNimbleAppend.java @@ -0,0 +1,34 @@ +package org.labkey.singlecell.pipeline.singlecell; + +import org.labkey.api.sequenceanalysis.pipeline.AbstractPipelineStepProvider; +import org.labkey.api.sequenceanalysis.pipeline.PipelineContext; +import org.labkey.api.singlecell.pipeline.SingleCellStep; + +public class PerformDefaultNimbleAppend extends AbstractRDiscvrStep +{ + public PerformDefaultNimbleAppend(PipelineContext ctx, PerformDefaultNimbleAppend.Provider provider) + { + super(provider, ctx); + } + + public static class Provider extends AbstractPipelineStepProvider + { + public Provider() + { + super("PerformDefaultNimbleAppend", "Default Nimble Append", "RDiscvr", "This uses Rdiscvr to run the default nimble append, adding MHC, KIR, NKG, Viral and Ig data", null, null, null); + } + + @Override + public PerformDefaultNimbleAppend create(PipelineContext ctx) + { + return new PerformDefaultNimbleAppend(ctx, this); + } + } + + @Override + public String getFileSuffix() + { + return "nbl"; + } +} + From ec32afaf0bc0ede34799e68eb95199646b7ae235 Mon Sep 17 00:00:00 2001 From: bbimber Date: Thu, 30 Jan 2025 18:18:04 -0800 Subject: [PATCH 46/59] Bugfix to PerformDefaultNimbleAppend --- singlecell/resources/chunks/PerformDefaultNimbleAppend.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/singlecell/resources/chunks/PerformDefaultNimbleAppend.R b/singlecell/resources/chunks/PerformDefaultNimbleAppend.R index 90e0b659b..f4f37dcd1 100644 --- a/singlecell/resources/chunks/PerformDefaultNimbleAppend.R +++ b/singlecell/resources/chunks/PerformDefaultNimbleAppend.R @@ -16,7 +16,7 @@ for (datasetId in names(seuratObjects)) { printName(datasetId) seuratObj <- readSeuratRDS(seuratObjects[[datasetId]]) - Rdiscvr::PerformDefaultNimbleAppend(seuratObj) + seuratObj <- Rdiscvr::PerformDefaultNimbleAppend(seuratObj) saveData(seuratObj, datasetId) From d8a8d9a8d579571bfc5658f51145353ecdbc560a Mon Sep 17 00:00:00 2001 From: bbimber Date: Fri, 31 Jan 2025 09:51:23 -0800 Subject: [PATCH 47/59] Rework MergeSeurat to handle large data better --- singlecell/resources/chunks/MergeSeurat.R | 111 +++++++++++------- .../pipeline/singlecell/MergeSeurat.java | 3 +- 2 files changed, 68 insertions(+), 46 deletions(-) diff --git a/singlecell/resources/chunks/MergeSeurat.R b/singlecell/resources/chunks/MergeSeurat.R index 8b0d72d7c..c4d353ae5 100644 --- a/singlecell/resources/chunks/MergeSeurat.R +++ b/singlecell/resources/chunks/MergeSeurat.R @@ -5,15 +5,17 @@ if (!doDiet && length(seuratObjects) > 20 && !disableAutoDietSeurat) { doDiet <- TRUE } -mergeBatch <- function(dat) { +filesToDelete <- c() + +mergeBatchInMemory <- function(datasetIdToFilePath, saveFile) { toMerge <- list() - for (datasetId in names(dat)) { + for (datasetId in names(datasetIdToFilePath)) { print(paste0('Loading: ', datasetId)) if (doDiet) { - toMerge[[datasetId]] <- Seurat::DietSeurat(readSeuratRDS(dat[[datasetId]])) + toMerge[[datasetId]] <- Seurat::DietSeurat(readSeuratRDS(datasetIdToFilePath[[datasetId]])) gc() } else { - toMerge[[datasetId]] <- readSeuratRDS(dat[[datasetId]]) + toMerge[[datasetId]] <- readSeuratRDS(datasetIdToFilePath[[datasetId]]) } if (ncol(toMerge[[datasetId]]) == 1) { @@ -38,59 +40,78 @@ mergeBatch <- function(dat) { } seuratObj <- CellMembrane::MergeSeuratObjs(toMerge, projectName = projectName, doGC = doDiet, errorOnBarcodeSuffix = errorOnBarcodeSuffix) - return(seuratObj) + saveRDS(seuratObj, file = saveFile) + filesToDelete <<- c(filesToDelete, saveFile) + + return(fn) } -if (length(seuratObjects) == 1) { - print('There is only one seurat object, no need to merge') - datasetId <- names(seuratObjects)[[1]] - saveData(readSeuratRDS(seuratObjects[[datasetId]]), datasetId) -} else { - batchSize <- 20 - numBatches <- ceiling(length(seuratObjects) / batchSize) +mergeBatch <- function(seuratObjects, outerBatchIdx, maxBatchSize = 20, maxInputFileSizeMb = maxAllowableInputFileSizeMb) { + logger::log_info(paste0('Beginning outer batch: ', outerBatchIdx, ' with total files: ', length(seuratObjects))) + + if (length(seuratObjects) == 1) { + print('Single file, nothing to do') + return(seuratObjects) + } + + # Phase 1: group into batches: + batchList <- list() + activeBatch <- c() + sizeOfBatch <- 0 + batchIdx <- 1 + for (datasetId in names(seuratObjects)) { + activeBatch <- c(activeBatch, seuratObjects[[datasetId]]) + sizeInMb <- (file.size(seuratObjects[[datasetId]]) / 1024^2) + sizeOfBatch <- sizeOfBatch + sizeInMb + + if (length(activeBatch) >= maxBatchSize || (sizeOfBatch >= maxInputFileSizeMb && length(activeBatch) > 1)) { + logger::log_info(paste0('adding to batch with ', length(activeBatch), ' files and ', sizeOfBatch, 'MB')) + batchList[batchIdx] <- activeBatch + activeBatch <- c() + sizeOfBatch <- 0 + batchIdx <- batchIdx + 1 + next + } + } + + # Account for final files: + if (length(activeBatch) > 0) { + logger::log_info(paste0('finalizing batch with ', length(activeBatch), ' files and ', sizeOfBatch, 'MB')) + batchList[batchIdx] <- activeBatch + } + + if (length(batchList) == 0){ + stop('Error: zero length batchList') + } + mergedObjectFiles <- list() - for (i in 1:numBatches) { - logger::log_info(paste0('Merging batch ', i, ' of ', numBatches)) - start <- 1 + (i-1)*batchSize - end <- min(start+batchSize-1, length(seuratObjects)) - logger::log_info(paste0('processing: ', start, ' to ', end, ' of ', length(seuratObjects))) + for (i in 1:length(batchList)) { + activeBatch <- batchList[[i]] + logger::log_info(paste0('Merging inner batch ', i, ' of ', length(batchList), ' with ', length(activeBatch), ' files')) - fn <- paste0('mergeBatch.', i, '.rds') - saveRDS(mergeBatch(seuratObjects[start:end]), file = fn) - mergedObjectFiles[[i]] <- fn + saveFile <- paste0('merge.', outerBatchIdx, '.', i, '.rds') + mergedObjectFiles[[i]] <- mergeBatchInMemory(activeBatch, saveFile = saveFile) logger::log_info(paste0('mem used: ', R.utils::hsize(as.numeric(pryr::mem_used())))) gc() logger::log_info(paste0('after gc: ', R.utils::hsize(as.numeric(pryr::mem_used())))) } + logger::log_info('Done with inner batch') - logger::log_info('Done with batches') - if (length(mergedObjectFiles) == 1) { - seuratObj <- readRDS(mergedObjectFiles[[1]]) - unlink(mergedObjectFiles[[1]]) - } else { - logger::log_info('performing final merge') - # TODO: check for single cell in object - seuratObj <- readRDS(mergedObjectFiles[[1]]) - unlink(mergedObjectFiles[[1]]) - - for (i in 2:length(mergedObjectFiles)) { - logger::log_info(paste0('Merging final file ', i, ' of ', length(mergedObjectFiles))) - seuratObj <- merge(x = seuratObj, y = readRDS(mergedObjectFiles[[i]]), project = seuratObj@project.name) - if (HasSplitLayers(seuratObj)) { - seuratObj <- MergeSplitLayers(seuratObj) - } + if (length(mergedObjectFiles) > 1) { + return(mergeBatch(mergedObjectFiles, outerBatchIdx = (outerBatchIdx + 1), maxInputFileSizeMb = maxInputFileSizeMb, maxBatchSize = maxBatchSize)) + } - unlink(mergedObjectFiles[[i]]) + return(mergedObjectFiles) +} - logger::log_info(paste0('mem used: ', R.utils::hsize(as.numeric(pryr::mem_used())))) - logger::log_info(paste0('seurat object: ', R.utils::hsize(as.numeric(utils::object.size(seuratObj))))) - gc() - logger::log_info(paste0('after gc: ', R.utils::hsize(as.numeric(pryr::mem_used())))) - } - } +mergedObjectFiles <- mergeBatch(seuratObjects, outerBatchIdx = 1) - gc() +print('Overall merge complete') +gc() +saveData(seuratObjMerged, projectName) - saveData(seuratObj, projectName) +# Cleanup: +for (fn in filesToDelete) { + unlink(fn) } \ No newline at end of file diff --git a/singlecell/src/org/labkey/singlecell/pipeline/singlecell/MergeSeurat.java b/singlecell/src/org/labkey/singlecell/pipeline/singlecell/MergeSeurat.java index b27f985b9..993590b79 100644 --- a/singlecell/src/org/labkey/singlecell/pipeline/singlecell/MergeSeurat.java +++ b/singlecell/src/org/labkey/singlecell/pipeline/singlecell/MergeSeurat.java @@ -35,7 +35,8 @@ public Provider() put("height", 150); put("delimiter", ","); put("stripCharsRe", "/(^['\"]+)|(['\"]+$)/g"); - }}, "RNA.orig").delimiter(",") + }}, "RNA.orig").delimiter(","), + SeuratToolParameter.create("maxAllowableInputFileSizeMb", "Max Allowable Batch Size (MB)", "The largest allowable amount of data (in MB), measured as the size of the RDS files, to be allowed in one unit of data to merge in memory.", "ldk-integerfield", null, 200, "maxAllowableInputFileSizeMb", true, false) ), null, null); } From cde358a3ece1ba8eaf9fe639043153893d4997b9 Mon Sep 17 00:00:00 2001 From: bbimber Date: Fri, 31 Jan 2025 11:40:03 -0800 Subject: [PATCH 48/59] Bugfix to MergeSeurat --- singlecell/resources/chunks/MergeSeurat.R | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/singlecell/resources/chunks/MergeSeurat.R b/singlecell/resources/chunks/MergeSeurat.R index c4d353ae5..820dd112c 100644 --- a/singlecell/resources/chunks/MergeSeurat.R +++ b/singlecell/resources/chunks/MergeSeurat.R @@ -56,18 +56,18 @@ mergeBatch <- function(seuratObjects, outerBatchIdx, maxBatchSize = 20, maxInput # Phase 1: group into batches: batchList <- list() - activeBatch <- c() + activeBatch <- list() sizeOfBatch <- 0 batchIdx <- 1 for (datasetId in names(seuratObjects)) { - activeBatch <- c(activeBatch, seuratObjects[[datasetId]]) + activeBatch[[datasetId]] <- seuratObjects[[datasetId]] sizeInMb <- (file.size(seuratObjects[[datasetId]]) / 1024^2) sizeOfBatch <- sizeOfBatch + sizeInMb if (length(activeBatch) >= maxBatchSize || (sizeOfBatch >= maxInputFileSizeMb && length(activeBatch) > 1)) { - logger::log_info(paste0('adding to batch with ', length(activeBatch), ' files and ', sizeOfBatch, 'MB')) - batchList[batchIdx] <- activeBatch - activeBatch <- c() + logger::log_info(paste0('adding to batch with ', length(activeBatch), ' files and ', sizeOfBatch, ' MB')) + batchList[[batchIdx]] <- activeBatch + activeBatch <- list() sizeOfBatch <- 0 batchIdx <- batchIdx + 1 next @@ -76,8 +76,8 @@ mergeBatch <- function(seuratObjects, outerBatchIdx, maxBatchSize = 20, maxInput # Account for final files: if (length(activeBatch) > 0) { - logger::log_info(paste0('finalizing batch with ', length(activeBatch), ' files and ', sizeOfBatch, 'MB')) - batchList[batchIdx] <- activeBatch + logger::log_info(paste0('finalizing batch with ', length(activeBatch), ' files and ', sizeOfBatch, ' MB')) + batchList[[batchIdx]] <- activeBatch } if (length(batchList) == 0){ From 3843ffebef3f044017ce0a1847cf6e01fa5db404 Mon Sep 17 00:00:00 2001 From: bbimber Date: Fri, 31 Jan 2025 13:13:49 -0800 Subject: [PATCH 49/59] Make AppendMetadata more tolerant --- singlecell/resources/chunks/AppendMetadata.R | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/singlecell/resources/chunks/AppendMetadata.R b/singlecell/resources/chunks/AppendMetadata.R index addefee43..f14a25f65 100644 --- a/singlecell/resources/chunks/AppendMetadata.R +++ b/singlecell/resources/chunks/AppendMetadata.R @@ -11,7 +11,13 @@ for (datasetId in names(seuratObjects)) { printName(datasetId) seuratObj <- readSeuratRDS(seuratObjects[[datasetId]]) - seuratObj <- Rdiscvr::QueryAndApplyCdnaMetadata(seuratObj) + if ('BarcodePrefix' %in% names(seuratObj@meta.data) && !any(is.na(as.integer(seuratObj$BarcodePrefix)))) { + seuratObj <- Rdiscvr::QueryAndApplyCdnaMetadata(seuratObj) + } else if ('cDNA_ID' %in% names(seuratObj@meta.data)) { + seuratObj <- Rdiscvr::QueryAndApplyMetadataUsingCDNA(seuratObj) + } else { + stop('Unable to find either BarcodePrefix or cDNA_ID in meta.data') + } saveData(seuratObj, datasetId) From cc57c13352535b3fa311cc122826c8700842ce44 Mon Sep 17 00:00:00 2001 From: bbimber Date: Fri, 31 Jan 2025 13:23:16 -0800 Subject: [PATCH 50/59] Bugfix to MergeSeurat --- singlecell/resources/chunks/MergeSeurat.R | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/singlecell/resources/chunks/MergeSeurat.R b/singlecell/resources/chunks/MergeSeurat.R index 820dd112c..294f74498 100644 --- a/singlecell/resources/chunks/MergeSeurat.R +++ b/singlecell/resources/chunks/MergeSeurat.R @@ -8,6 +8,10 @@ if (!doDiet && length(seuratObjects) > 20 && !disableAutoDietSeurat) { filesToDelete <- c() mergeBatchInMemory <- function(datasetIdToFilePath, saveFile) { + if (all(is.null(names(datasetIdToFilePath)))) { + stop('No names provided on datasetIdToFilePath') + } + toMerge <- list() for (datasetId in names(datasetIdToFilePath)) { print(paste0('Loading: ', datasetId)) @@ -43,7 +47,7 @@ mergeBatchInMemory <- function(datasetIdToFilePath, saveFile) { saveRDS(seuratObj, file = saveFile) filesToDelete <<- c(filesToDelete, saveFile) - return(fn) + return(saveFile) } mergeBatch <- function(seuratObjects, outerBatchIdx, maxBatchSize = 20, maxInputFileSizeMb = maxAllowableInputFileSizeMb) { @@ -89,8 +93,9 @@ mergeBatch <- function(seuratObjects, outerBatchIdx, maxBatchSize = 20, maxInput activeBatch <- batchList[[i]] logger::log_info(paste0('Merging inner batch ', i, ' of ', length(batchList), ' with ', length(activeBatch), ' files')) - saveFile <- paste0('merge.', outerBatchIdx, '.', i, '.rds') - mergedObjectFiles[[i]] <- mergeBatchInMemory(activeBatch, saveFile = saveFile) + batchName <- paste0('merge.', outerBatchIdx, '.', i) + saveFile <- paste0(batchName, '.rds') + mergedObjectFiles[[batchName]] <- mergeBatchInMemory(activeBatch, saveFile = saveFile) logger::log_info(paste0('mem used: ', R.utils::hsize(as.numeric(pryr::mem_used())))) gc() From d6df8203e283df2bfdd3447c46ae4fc5fd71c696 Mon Sep 17 00:00:00 2001 From: bbimber Date: Sat, 1 Feb 2025 06:37:41 -0800 Subject: [PATCH 51/59] Improvements to MergeSeurat --- singlecell/resources/chunks/MergeSeurat.R | 16 ++++++++++------ 1 file changed, 10 insertions(+), 6 deletions(-) diff --git a/singlecell/resources/chunks/MergeSeurat.R b/singlecell/resources/chunks/MergeSeurat.R index 294f74498..88e6c6516 100644 --- a/singlecell/resources/chunks/MergeSeurat.R +++ b/singlecell/resources/chunks/MergeSeurat.R @@ -43,10 +43,13 @@ mergeBatchInMemory <- function(datasetIdToFilePath, saveFile) { stop('There were no passing seurat objects!') } - seuratObj <- CellMembrane::MergeSeuratObjs(toMerge, projectName = projectName, doGC = doDiet, errorOnBarcodeSuffix = errorOnBarcodeSuffix) - saveRDS(seuratObj, file = saveFile) + saveRDS(CellMembrane::MergeSeuratObjs(toMerge, projectName = projectName, doGC = doDiet, errorOnBarcodeSuffix = errorOnBarcodeSuffix), file = saveFile) filesToDelete <<- c(filesToDelete, saveFile) + logger::log_info(paste0('mem used: ', R.utils::hsize(as.numeric(pryr::mem_used())))) + gc() + logger::log_info(paste0('after gc: ', R.utils::hsize(as.numeric(pryr::mem_used())))) + return(saveFile) } @@ -95,11 +98,12 @@ mergeBatch <- function(seuratObjects, outerBatchIdx, maxBatchSize = 20, maxInput batchName <- paste0('merge.', outerBatchIdx, '.', i) saveFile <- paste0(batchName, '.rds') + if (length(activeBatch) == 1){ + print('Only single file in batch, skipping merge') + mergedObjectFiles[[batchName]] <- activeBatch[[1]] + next + } mergedObjectFiles[[batchName]] <- mergeBatchInMemory(activeBatch, saveFile = saveFile) - - logger::log_info(paste0('mem used: ', R.utils::hsize(as.numeric(pryr::mem_used())))) - gc() - logger::log_info(paste0('after gc: ', R.utils::hsize(as.numeric(pryr::mem_used())))) } logger::log_info('Done with inner batch') From 4af7b79b4999a0a4673cce6dca944cb0336a0495 Mon Sep 17 00:00:00 2001 From: bbimber Date: Sat, 1 Feb 2025 12:07:32 -0800 Subject: [PATCH 52/59] Expand orphan pipeline job tests --- .../pipeline/OrphanFilePipelineJob.java | 23 ++++++++++++------- 1 file changed, 15 insertions(+), 8 deletions(-) diff --git a/SequenceAnalysis/src/org/labkey/sequenceanalysis/pipeline/OrphanFilePipelineJob.java b/SequenceAnalysis/src/org/labkey/sequenceanalysis/pipeline/OrphanFilePipelineJob.java index 9ffea44dc..026177561 100644 --- a/SequenceAnalysis/src/org/labkey/sequenceanalysis/pipeline/OrphanFilePipelineJob.java +++ b/SequenceAnalysis/src/org/labkey/sequenceanalysis/pipeline/OrphanFilePipelineJob.java @@ -396,11 +396,10 @@ public void getOrphanFilesForContainer(Container c, User u, Set orphanFile } } - //TODO: look for .deleted and /archive File deletedDir = new File(root.getRootPath().getParentFile(), ".deleted"); if (deletedDir.exists()) { - messages.add("## .deleted dir found: " + deletedDir.getPath()); + getJob().getLogger().warn("WARNING: .deleted dir found: " + deletedDir.getPath()); } File assayData = new File(root.getRootPath(), "assaydata"); @@ -513,12 +512,6 @@ private void getOrphanFilesForDirectory(Set knownExpDatas, Map knownExpDatas, Map (200*1024*1024)) + { + getJob().getLogger().warn("WARNING: Extremely large log file: " + f.getPath() + ", " + FileUtils.byteCountToDisplaySize(f.length())); + } + + if (f.getName().endsWith(".rds")) + { + if (!dataMap.containsKey(f.toURI())) + { + getJob().getLogger().warn("WARNING: Unknown RDS file: " + f.getPath()); + } + } + //orphan index if (f.getPath().toLowerCase().endsWith(".bai") || f.getPath().toLowerCase().endsWith(".tbi") || f.getPath().toLowerCase().endsWith(".idx") || f.getPath().toLowerCase().endsWith(".crai")) { From 11fddd3369983cc1cb719e3751de2fe02f0e9cf5 Mon Sep 17 00:00:00 2001 From: bbimber Date: Sat, 1 Feb 2025 13:44:59 -0800 Subject: [PATCH 53/59] Bugfix to MergeSeurat --- singlecell/resources/chunks/MergeSeurat.R | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/singlecell/resources/chunks/MergeSeurat.R b/singlecell/resources/chunks/MergeSeurat.R index 88e6c6516..134005614 100644 --- a/singlecell/resources/chunks/MergeSeurat.R +++ b/singlecell/resources/chunks/MergeSeurat.R @@ -115,10 +115,14 @@ mergeBatch <- function(seuratObjects, outerBatchIdx, maxBatchSize = 20, maxInput } mergedObjectFiles <- mergeBatch(seuratObjects, outerBatchIdx = 1) +if (length(mergedObjectFiles) != 1) { + stop(paste0('Expected single file, found: ', length(mergedObjectFiles))) +} print('Overall merge complete') gc() -saveData(seuratObjMerged, projectName) + +saveData(readRDS(mergedObjectFiles[[1]]), projectName) # Cleanup: for (fn in filesToDelete) { From 55b3207713c0a116a2f049431b39876a2e89cf80 Mon Sep 17 00:00:00 2001 From: bbimber Date: Sun, 2 Feb 2025 10:57:03 -0800 Subject: [PATCH 54/59] Support SVTyper --- .../pipeline_code/extra_tools_install.sh | 18 ++ .../SequenceAnalysisModule.java | 2 + .../run/alignment/SVTyperStep.java | 188 ++++++++++++++++++ 3 files changed, 208 insertions(+) create mode 100644 SequenceAnalysis/src/org/labkey/sequenceanalysis/run/alignment/SVTyperStep.java diff --git a/SequenceAnalysis/pipeline_code/extra_tools_install.sh b/SequenceAnalysis/pipeline_code/extra_tools_install.sh index dae691917..308401cee 100755 --- a/SequenceAnalysis/pipeline_code/extra_tools_install.sh +++ b/SequenceAnalysis/pipeline_code/extra_tools_install.sh @@ -274,3 +274,21 @@ then else echo "Already installed" fi + +if [[ ! -e ${LKTOOLS_DIR}/svtyper || ! -z $FORCE_REINSTALL ]]; +then + echo "Cleaning up previous installs" + rm -Rf $LKTOOLS_DIR/svtyper* + + # NOTE: this fork is used to ensure python3 compatibility + #python3 -m pip install --user git+https://github.com/hall-lab/svtyper.git + python3 -m pip install --user git+https://github.com/bbimber/svtyper.git + + SVTYPER=`which svtyper` + ln -s $SVTYPER ${LKTOOLS_DIR}/svtyper + + SVTYPER=`which svtyper-sso` + ln -s $SVTYPER ${LKTOOLS_DIR}/svtyper-sso +else + echo "Already installed" +fi \ No newline at end of file diff --git a/SequenceAnalysis/src/org/labkey/sequenceanalysis/SequenceAnalysisModule.java b/SequenceAnalysis/src/org/labkey/sequenceanalysis/SequenceAnalysisModule.java index 61c9a0fe1..38d53f119 100644 --- a/SequenceAnalysis/src/org/labkey/sequenceanalysis/SequenceAnalysisModule.java +++ b/SequenceAnalysis/src/org/labkey/sequenceanalysis/SequenceAnalysisModule.java @@ -101,6 +101,7 @@ import org.labkey.sequenceanalysis.run.alignment.MosaikWrapper; import org.labkey.sequenceanalysis.run.alignment.ParagraphStep; import org.labkey.sequenceanalysis.run.alignment.Pbmm2Wrapper; +import org.labkey.sequenceanalysis.run.alignment.SVTyperStep; import org.labkey.sequenceanalysis.run.alignment.StarWrapper; import org.labkey.sequenceanalysis.run.alignment.VulcanWrapper; import org.labkey.sequenceanalysis.run.analysis.BamIterator; @@ -403,6 +404,7 @@ public static void registerPipelineSteps() SequenceAnalysisService.get().registerFileHandler(new DeepVariantHandler()); SequenceAnalysisService.get().registerFileHandler(new GLNexusHandler()); SequenceAnalysisService.get().registerFileHandler(new ParagraphStep()); + SequenceAnalysisService.get().registerFileHandler(new SVTyperStep()); SequenceAnalysisService.get().registerFileHandler(new UpdateReadsetFilesHandler()); SequenceAnalysisService.get().registerReadsetHandler(new MultiQCHandler()); diff --git a/SequenceAnalysis/src/org/labkey/sequenceanalysis/run/alignment/SVTyperStep.java b/SequenceAnalysis/src/org/labkey/sequenceanalysis/run/alignment/SVTyperStep.java new file mode 100644 index 000000000..28bda7b03 --- /dev/null +++ b/SequenceAnalysis/src/org/labkey/sequenceanalysis/run/alignment/SVTyperStep.java @@ -0,0 +1,188 @@ +package org.labkey.sequenceanalysis.run.alignment; + +import org.apache.commons.io.FileUtils; +import org.apache.commons.lang3.StringUtils; +import org.json.JSONObject; +import org.labkey.api.module.ModuleLoader; +import org.labkey.api.pipeline.PipelineJob; +import org.labkey.api.pipeline.PipelineJobException; +import org.labkey.api.pipeline.RecordedAction; +import org.labkey.api.sequenceanalysis.SequenceAnalysisService; +import org.labkey.api.sequenceanalysis.SequenceOutputFile; +import org.labkey.api.sequenceanalysis.pipeline.AbstractParameterizedOutputHandler; +import org.labkey.api.sequenceanalysis.pipeline.SequenceAnalysisJobSupport; +import org.labkey.api.sequenceanalysis.pipeline.SequenceOutputHandler; +import org.labkey.api.sequenceanalysis.pipeline.SequencePipelineService; +import org.labkey.api.sequenceanalysis.pipeline.ToolParameterDescriptor; +import org.labkey.api.sequenceanalysis.run.AbstractCommandWrapper; +import org.labkey.api.sequenceanalysis.run.SimpleScriptWrapper; +import org.labkey.sequenceanalysis.SequenceAnalysisModule; +import org.labkey.sequenceanalysis.util.SequenceUtil; + +import java.io.File; +import java.io.IOException; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.List; + +//https://github.com/hall-lab/svtyper + +public class SVTyperStep extends AbstractParameterizedOutputHandler +{ + public SVTyperStep() + { + super(ModuleLoader.getInstance().getModule(SequenceAnalysisModule.class), "SVTyper SV Genotyping", "This will run SVTyper on one or more BAM files to genotype SVs", null, Arrays.asList( + ToolParameterDescriptor.createExpDataParam("svVCF", "Input VCF", "This is the DataId of the VCF containing the SVs to genotype", "ldk-expdatafield", new JSONObject() + {{ + put("allowBlank", false); + }}, null), + ToolParameterDescriptor.create("useOutputFileContainer", "Submit to Source File Workbook", "If checked, each job will be submitted to the same workbook as the input file, as opposed to submitting all jobs to the same workbook. This is primarily useful if submitting a large batch of files to process separately. This only applies if 'Run Separately' is selected.", "checkbox", new JSONObject(){{ + put("checked", true); + }}, true) + )); + } + + @Override + public boolean doSplitJobs() + { + return true; + } + + @Override + public boolean canProcess(SequenceOutputFile o) + { + return o.getFile() != null && o.getFile().exists() && SequenceUtil.FILETYPE.bamOrCram.getFileType().isType(o.getFile()); + } + + @Override + public boolean doRunRemote() + { + return true; + } + + @Override + public boolean doRunLocal() + { + return false; + } + + @Override + public SequenceOutputProcessor getProcessor() + { + return new Processor(); + } + + public static class Processor implements SequenceOutputProcessor + { + @Override + public void processFilesOnWebserver(PipelineJob job, SequenceAnalysisJobSupport support, List inputFiles, JSONObject params, File outputDir, List actions, List outputsToCreate) throws UnsupportedOperationException, PipelineJobException + { + + } + + @Override + public void processFilesRemote(List inputFiles, JobContext ctx) throws UnsupportedOperationException, PipelineJobException + { + int svVcfId = ctx.getParams().optInt("svVCF", 0); + if (svVcfId == 0) + { + throw new PipelineJobException("svVCF param was null"); + } + + File svVcf = ctx.getSequenceSupport().getCachedData(svVcfId); + if (svVcf == null) + { + throw new PipelineJobException("File not found for ID: " + svVcfId); + } + else if (!svVcf.exists()) + { + throw new PipelineJobException("Missing file: " + svVcf.getPath()); + } + + Integer threads = SequencePipelineService.get().getMaxThreads(ctx.getLogger()); + + for (SequenceOutputFile so : inputFiles) + { + List jsonArgs = new ArrayList<>(); + SimpleScriptWrapper wrapper = new SimpleScriptWrapper(ctx.getLogger()); + jsonArgs.add(AbstractCommandWrapper.resolveFileInPath("svtyper", null, true).getPath()); + jsonArgs.add("-b"); + jsonArgs.add(so.getFile().getPath()); + + File coverageJson = new File(ctx.getWorkingDirectory(), "bam.json"); + jsonArgs.add("-l"); + jsonArgs.add(coverageJson.getPath()); + + + if (threads != null) + { + jsonArgs.add("--max_reads"); + jsonArgs.add(threads.toString()); + } + + File doneFile = new File(ctx.getWorkingDirectory(), "json.done"); + ctx.getFileManager().addIntermediateFile(doneFile); + if (doneFile.exists()) + { + ctx.getLogger().info("BAM json already generated, skipping"); + } + else + { + wrapper.execute(jsonArgs); + try + { + FileUtils.touch(doneFile); + ctx.getFileManager().addIntermediateFile(doneFile); + } + catch (IOException e) + { + throw new PipelineJobException(e); + } + } + + if (!coverageJson.exists()) + { + throw new PipelineJobException("Missing file: " + coverageJson.getPath()); + } + ctx.getFileManager().addIntermediateFile(coverageJson); + + List svtyperArgs = new ArrayList<>(); + svtyperArgs.add(AbstractCommandWrapper.resolveFileInPath("svtyper-sso", null, true).getPath()); + + svtyperArgs.add("-i"); + svtyperArgs.add(svVcf.getPath()); + + svtyperArgs.add("-B"); + svtyperArgs.add(so.getFile().getPath()); + + svtyperArgs.add("-l"); + svtyperArgs.add(coverageJson.getPath()); + + if (threads != null) + { + svtyperArgs.add("--core"); + svtyperArgs.add(threads.toString()); + } + + File genotypes = new File(ctx.getWorkingDirectory(), SequenceAnalysisService.get().getUnzippedBaseName(so.getName()) + ".svtyper.vcf.gz"); + wrapper.execute(Arrays.asList("/bin/bash", "-c", StringUtils.join(svtyperArgs, " ") + "| bgzip -c"), ProcessBuilder.Redirect.to(genotypes)); + + if (!genotypes.exists()) + { + throw new PipelineJobException("Missing file: " + genotypes.getPath()); + } + + try + { + SequenceAnalysisService.get().ensureVcfIndex(genotypes, ctx.getLogger()); + } + catch (IOException e) + { + throw new PipelineJobException(e); + } + + ctx.getFileManager().addSequenceOutput(genotypes, "SVTyper Genotypes: " + so.getName(), "SVTyper Genoypes", so.getReadset(), null, so.getLibrary_id(), "Input VCF: " + svVcf.getName() + " (" + svVcfId + ")"); + } + } + } +} From 36cf5d8d2ef5fcabc20973510b4a9cfe8ad2064a Mon Sep 17 00:00:00 2001 From: bbimber Date: Sun, 2 Feb 2025 18:01:00 -0800 Subject: [PATCH 55/59] Bugfix SVTyper --- .../org/labkey/sequenceanalysis/run/alignment/SVTyperStep.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/SequenceAnalysis/src/org/labkey/sequenceanalysis/run/alignment/SVTyperStep.java b/SequenceAnalysis/src/org/labkey/sequenceanalysis/run/alignment/SVTyperStep.java index 28bda7b03..d7d84e101 100644 --- a/SequenceAnalysis/src/org/labkey/sequenceanalysis/run/alignment/SVTyperStep.java +++ b/SequenceAnalysis/src/org/labkey/sequenceanalysis/run/alignment/SVTyperStep.java @@ -106,7 +106,7 @@ else if (!svVcf.exists()) List jsonArgs = new ArrayList<>(); SimpleScriptWrapper wrapper = new SimpleScriptWrapper(ctx.getLogger()); jsonArgs.add(AbstractCommandWrapper.resolveFileInPath("svtyper", null, true).getPath()); - jsonArgs.add("-b"); + jsonArgs.add("-B"); jsonArgs.add(so.getFile().getPath()); File coverageJson = new File(ctx.getWorkingDirectory(), "bam.json"); From 5110798cf98bd36d0e8195d218cdc425519dff27 Mon Sep 17 00:00:00 2001 From: bbimber Date: Sun, 2 Feb 2025 18:04:36 -0800 Subject: [PATCH 56/59] Support additional options for mGAP tracks --- .../org/labkey/sequenceanalysis/run/alignment/SVTyperStep.java | 1 - 1 file changed, 1 deletion(-) diff --git a/SequenceAnalysis/src/org/labkey/sequenceanalysis/run/alignment/SVTyperStep.java b/SequenceAnalysis/src/org/labkey/sequenceanalysis/run/alignment/SVTyperStep.java index d7d84e101..584dc0d47 100644 --- a/SequenceAnalysis/src/org/labkey/sequenceanalysis/run/alignment/SVTyperStep.java +++ b/SequenceAnalysis/src/org/labkey/sequenceanalysis/run/alignment/SVTyperStep.java @@ -113,7 +113,6 @@ else if (!svVcf.exists()) jsonArgs.add("-l"); jsonArgs.add(coverageJson.getPath()); - if (threads != null) { jsonArgs.add("--max_reads"); From 166cd840e9643e346f2844b094178ba530dba6a1 Mon Sep 17 00:00:00 2001 From: bbimber Date: Mon, 3 Feb 2025 21:13:07 -0800 Subject: [PATCH 57/59] Support Graphtyper --- .../pipeline_code/extra_tools_install.sh | 15 +- .../SequenceAnalysisModule.java | 2 + .../run/alignment/Graphtyper.java | 149 ++++++++++++++++++ .../run/alignment/SVTyperStep.java | 2 +- 4 files changed, 166 insertions(+), 2 deletions(-) create mode 100644 SequenceAnalysis/src/org/labkey/sequenceanalysis/run/alignment/Graphtyper.java diff --git a/SequenceAnalysis/pipeline_code/extra_tools_install.sh b/SequenceAnalysis/pipeline_code/extra_tools_install.sh index 308401cee..823aa7dcd 100755 --- a/SequenceAnalysis/pipeline_code/extra_tools_install.sh +++ b/SequenceAnalysis/pipeline_code/extra_tools_install.sh @@ -291,4 +291,17 @@ then ln -s $SVTYPER ${LKTOOLS_DIR}/svtyper-sso else echo "Already installed" -fi \ No newline at end of file +fi + +if [[ ! -e ${LKTOOLS_DIR}/graphtyper || ! -z $FORCE_REINSTALL ]]; +then + echo "Cleaning up previous installs" + rm -Rf $LKTOOLS_DIR/graphtyper* + + wget https://github.com/DecodeGenetics/graphtyper/releases/download/v2.7.7/graphtyper + chmod a+x graphtyper + + mv ./graphtyper $LKTOOLS_DIR/ +else + echo "Already installed" +fi diff --git a/SequenceAnalysis/src/org/labkey/sequenceanalysis/SequenceAnalysisModule.java b/SequenceAnalysis/src/org/labkey/sequenceanalysis/SequenceAnalysisModule.java index 38d53f119..2d7ea845f 100644 --- a/SequenceAnalysis/src/org/labkey/sequenceanalysis/SequenceAnalysisModule.java +++ b/SequenceAnalysis/src/org/labkey/sequenceanalysis/SequenceAnalysisModule.java @@ -98,6 +98,7 @@ import org.labkey.sequenceanalysis.run.alignment.Bowtie2Wrapper; import org.labkey.sequenceanalysis.run.alignment.BowtieWrapper; import org.labkey.sequenceanalysis.run.alignment.GSnapWrapper; +import org.labkey.sequenceanalysis.run.alignment.Graphtyper; import org.labkey.sequenceanalysis.run.alignment.MosaikWrapper; import org.labkey.sequenceanalysis.run.alignment.ParagraphStep; import org.labkey.sequenceanalysis.run.alignment.Pbmm2Wrapper; @@ -405,6 +406,7 @@ public static void registerPipelineSteps() SequenceAnalysisService.get().registerFileHandler(new GLNexusHandler()); SequenceAnalysisService.get().registerFileHandler(new ParagraphStep()); SequenceAnalysisService.get().registerFileHandler(new SVTyperStep()); + SequenceAnalysisService.get().registerFileHandler(new Graphtyper()); SequenceAnalysisService.get().registerFileHandler(new UpdateReadsetFilesHandler()); SequenceAnalysisService.get().registerReadsetHandler(new MultiQCHandler()); diff --git a/SequenceAnalysis/src/org/labkey/sequenceanalysis/run/alignment/Graphtyper.java b/SequenceAnalysis/src/org/labkey/sequenceanalysis/run/alignment/Graphtyper.java new file mode 100644 index 000000000..125ebb480 --- /dev/null +++ b/SequenceAnalysis/src/org/labkey/sequenceanalysis/run/alignment/Graphtyper.java @@ -0,0 +1,149 @@ +package org.labkey.sequenceanalysis.run.alignment; + +import org.json.JSONObject; +import org.labkey.api.module.ModuleLoader; +import org.labkey.api.pipeline.PipelineJob; +import org.labkey.api.pipeline.PipelineJobException; +import org.labkey.api.pipeline.RecordedAction; +import org.labkey.api.sequenceanalysis.SequenceAnalysisService; +import org.labkey.api.sequenceanalysis.SequenceOutputFile; +import org.labkey.api.sequenceanalysis.pipeline.AbstractParameterizedOutputHandler; +import org.labkey.api.sequenceanalysis.pipeline.ReferenceGenome; +import org.labkey.api.sequenceanalysis.pipeline.SequenceAnalysisJobSupport; +import org.labkey.api.sequenceanalysis.pipeline.SequenceOutputHandler; +import org.labkey.api.sequenceanalysis.pipeline.SequencePipelineService; +import org.labkey.api.sequenceanalysis.pipeline.ToolParameterDescriptor; +import org.labkey.api.sequenceanalysis.run.AbstractCommandWrapper; +import org.labkey.api.sequenceanalysis.run.SimpleScriptWrapper; +import org.labkey.sequenceanalysis.SequenceAnalysisModule; +import org.labkey.sequenceanalysis.util.SequenceUtil; + +import java.io.File; +import java.io.IOException; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.List; + +//https://github.com/hall-lab/svtyper + +public class Graphtyper extends AbstractParameterizedOutputHandler +{ + public Graphtyper() + { + super(ModuleLoader.getInstance().getModule(SequenceAnalysisModule.class), "Graphtyper (SV Genotyping)", "This will run Graphtyper on one or more BAM files to genotype SVs", null, Arrays.asList( + ToolParameterDescriptor.createExpDataParam("svVCF", "Input VCF", "This is the DataId of the VCF containing the SVs to genotype", "ldk-expdatafield", new JSONObject() + {{ + put("allowBlank", false); + }}, null), + ToolParameterDescriptor.create("useOutputFileContainer", "Submit to Source File Workbook", "If checked, each job will be submitted to the same workbook as the input file, as opposed to submitting all jobs to the same workbook. This is primarily useful if submitting a large batch of files to process separately. This only applies if 'Run Separately' is selected.", "checkbox", new JSONObject(){{ + put("checked", true); + }}, true) + )); + } + + @Override + public boolean doSplitJobs() + { + return true; + } + + @Override + public boolean canProcess(SequenceOutputFile o) + { + return o.getFile() != null && o.getFile().exists() && SequenceUtil.FILETYPE.bamOrCram.getFileType().isType(o.getFile()); + } + + @Override + public boolean doRunRemote() + { + return true; + } + + @Override + public boolean doRunLocal() + { + return false; + } + + @Override + public SequenceOutputProcessor getProcessor() + { + return new Processor(); + } + + public static class Processor implements SequenceOutputProcessor + { + @Override + public void processFilesOnWebserver(PipelineJob job, SequenceAnalysisJobSupport support, List inputFiles, JSONObject params, File outputDir, List actions, List outputsToCreate) throws UnsupportedOperationException, PipelineJobException + { + + } + + @Override + public void processFilesRemote(List inputFiles, JobContext ctx) throws UnsupportedOperationException, PipelineJobException + { + int svVcfId = ctx.getParams().optInt("svVCF", 0); + if (svVcfId == 0) + { + throw new PipelineJobException("svVCF param was null"); + } + + File svVcf = ctx.getSequenceSupport().getCachedData(svVcfId); + if (svVcf == null) + { + throw new PipelineJobException("File not found for ID: " + svVcfId); + } + else if (!svVcf.exists()) + { + throw new PipelineJobException("Missing file: " + svVcf.getPath()); + } + + Integer threads = SequencePipelineService.get().getMaxThreads(ctx.getLogger()); + + for (SequenceOutputFile so : inputFiles) + { + List args = new ArrayList<>(); + SimpleScriptWrapper wrapper = new SimpleScriptWrapper(ctx.getLogger()); + args.add(AbstractCommandWrapper.resolveFileInPath("graphtyper", null, true).getPath()); + args.add("genotype_sv"); + + ReferenceGenome rg = ctx.getSequenceSupport().getCachedGenome(so.getLibrary_id()); + if (rg == null) + { + throw new PipelineJobException("Missing reference genome: " + so.getLibrary_id()); + } + + args.add(rg.getWorkingFastaFile().getPath()); + args.add(svVcf.toString()); + + args.add("--sam"); + args.add(so.getFile().getPath()); + + if (threads != null) + { + args.add("--threads"); + args.add(threads.toString()); + } + + wrapper.execute(args); + + File genotypes = new File(ctx.getWorkingDirectory(), "sv_results/" + SequenceAnalysisService.get().getUnzippedBaseName(so.getName()) + ".vcf.gz"); + if (!genotypes.exists()) + { + throw new PipelineJobException("Missing file: " + genotypes.getPath()); + } + + try + { + SequenceAnalysisService.get().ensureVcfIndex(genotypes, ctx.getLogger()); + } + catch (IOException e) + { + throw new PipelineJobException(e); + } + + ctx.getFileManager().addSequenceOutput(genotypes, "Graphtyper Genotypes: " + so.getName(), "Graphtyper Genoypes", so.getReadset(), null, so.getLibrary_id(), "Input VCF: " + svVcf.getName() + " (" + svVcfId + ")"); + } + } + } +} diff --git a/SequenceAnalysis/src/org/labkey/sequenceanalysis/run/alignment/SVTyperStep.java b/SequenceAnalysis/src/org/labkey/sequenceanalysis/run/alignment/SVTyperStep.java index 584dc0d47..54ce99135 100644 --- a/SequenceAnalysis/src/org/labkey/sequenceanalysis/run/alignment/SVTyperStep.java +++ b/SequenceAnalysis/src/org/labkey/sequenceanalysis/run/alignment/SVTyperStep.java @@ -115,7 +115,7 @@ else if (!svVcf.exists()) if (threads != null) { - jsonArgs.add("--max_reads"); + jsonArgs.add("--cores"); jsonArgs.add(threads.toString()); } From 58af56eafebf00d28ea9042c3f46051c1e129aa2 Mon Sep 17 00:00:00 2001 From: bbimber Date: Mon, 3 Feb 2025 21:55:44 -0800 Subject: [PATCH 58/59] Bugfix to SVtyper --- .../labkey/sequenceanalysis/run/alignment/SVTyperStep.java | 6 ------ 1 file changed, 6 deletions(-) diff --git a/SequenceAnalysis/src/org/labkey/sequenceanalysis/run/alignment/SVTyperStep.java b/SequenceAnalysis/src/org/labkey/sequenceanalysis/run/alignment/SVTyperStep.java index 54ce99135..2c5b7a629 100644 --- a/SequenceAnalysis/src/org/labkey/sequenceanalysis/run/alignment/SVTyperStep.java +++ b/SequenceAnalysis/src/org/labkey/sequenceanalysis/run/alignment/SVTyperStep.java @@ -113,12 +113,6 @@ else if (!svVcf.exists()) jsonArgs.add("-l"); jsonArgs.add(coverageJson.getPath()); - if (threads != null) - { - jsonArgs.add("--cores"); - jsonArgs.add(threads.toString()); - } - File doneFile = new File(ctx.getWorkingDirectory(), "json.done"); ctx.getFileManager().addIntermediateFile(doneFile); if (doneFile.exists()) From e5a0c7091a497abafc07323ef54ee7e9913fc4ef Mon Sep 17 00:00:00 2001 From: bbimber Date: Mon, 3 Feb 2025 22:15:39 -0800 Subject: [PATCH 59/59] Bugfix to Graphtyper --- .../run/alignment/Graphtyper.java | 54 +++++++++++++++++++ 1 file changed, 54 insertions(+) diff --git a/SequenceAnalysis/src/org/labkey/sequenceanalysis/run/alignment/Graphtyper.java b/SequenceAnalysis/src/org/labkey/sequenceanalysis/run/alignment/Graphtyper.java index 125ebb480..4c0b47120 100644 --- a/SequenceAnalysis/src/org/labkey/sequenceanalysis/run/alignment/Graphtyper.java +++ b/SequenceAnalysis/src/org/labkey/sequenceanalysis/run/alignment/Graphtyper.java @@ -1,5 +1,8 @@ package org.labkey.sequenceanalysis.run.alignment; +import htsjdk.samtools.util.CloseableIterator; +import htsjdk.variant.variantcontext.VariantContext; +import htsjdk.variant.vcf.VCFFileReader; import org.json.JSONObject; import org.labkey.api.module.ModuleLoader; import org.labkey.api.pipeline.PipelineJob; @@ -15,11 +18,13 @@ import org.labkey.api.sequenceanalysis.pipeline.ToolParameterDescriptor; import org.labkey.api.sequenceanalysis.run.AbstractCommandWrapper; import org.labkey.api.sequenceanalysis.run.SimpleScriptWrapper; +import org.labkey.api.writer.PrintWriters; import org.labkey.sequenceanalysis.SequenceAnalysisModule; import org.labkey.sequenceanalysis.util.SequenceUtil; import java.io.File; import java.io.IOException; +import java.io.PrintWriter; import java.util.ArrayList; import java.util.Arrays; import java.util.List; @@ -125,6 +130,12 @@ else if (!svVcf.exists()) args.add(threads.toString()); } + args.add("--region_file"); + File regionFile = new File(ctx.getWorkingDirectory(), "regions.txt"); + args.add(regionFile.getPath()); + + generateRegionFile(svVcf, regionFile); + ctx.getFileManager().addIntermediateFile(regionFile); wrapper.execute(args); File genotypes = new File(ctx.getWorkingDirectory(), "sv_results/" + SequenceAnalysisService.get().getUnzippedBaseName(so.getName()) + ".vcf.gz"); @@ -146,4 +157,47 @@ else if (!svVcf.exists()) } } } + + private static void generateRegionFile(File svVcf, File regionFile) throws PipelineJobException + { + try (PrintWriter writer = PrintWriters.getPrintWriter(regionFile)) + { + try (VCFFileReader reader = new VCFFileReader(svVcf, true); CloseableIterator iterator = reader.iterator()) + { + String chr = null; + int start = 1; + int end = -1; + while (iterator.hasNext()) + { + VariantContext vc = iterator.next(); + if (chr == null) + { + chr = vc.getContig(); + } + + if (!vc.getContig().equals(chr)) + { + writer.println(chr + ":" + start + "-" + end); + + // Reset + chr = vc.getContig(); + start = 1; + end = -1; + } + + start = Math.min(start, vc.getStart()); + end = Math.max(end, vc.getEnd()); + } + + if (chr != null) + { + writer.println(chr + ":" + start + "-" + end); + } + } + } + catch (IOException e) + { + throw new PipelineJobException(e); + } + } }