diff --git a/documentation/src/command/FORMULAS.rst b/documentation/src/command/FORMULAS.rst index 54524cf3..6c06ae1a 100644 --- a/documentation/src/command/FORMULAS.rst +++ b/documentation/src/command/FORMULAS.rst @@ -276,6 +276,8 @@ Genomic-Specific Functions +-------------------------------------------------------+---------------------------------------------------------------------------------------------------------------------------------+ | ``REFBASES(str,int,int) : str`` | The reference bases, based on the build specified in the gor_config.txt file. | +-------------------------------------------------------+---------------------------------------------------------------------------------------------------------------------------------+ +| ``REFBASES_WITH_BUILD(str,int,int,str) : str`` | The reference bases, based on the build supplied. | ++-------------------------------------------------------+---------------------------------------------------------------------------------------------------------------------------------+ | ``BAMTAG(col,str) : str`` | Extract a single substring from an attribute value TAG_VALUE-like field (as in BAM files). | +-------------------------------------------------------+---------------------------------------------------------------------------------------------------------------------------------+ | ``TAG(col,str,sep) : str`` | Extract a single substring from an attribute value field (as in GFF or VCF files, e.g. use semicolon ';' as separator). | diff --git a/documentation/src/functions.rst b/documentation/src/functions.rst index d41073eb..829d429b 100755 --- a/documentation/src/functions.rst +++ b/documentation/src/functions.rst @@ -468,6 +468,9 @@ Genomic-Specific Functions * - ``REFBASES(str,int,int) : str`` - The reference bases, based on the build specified in the gor_config.txt file. - :ref:`refbases` + * - ``REFBASES_WITH_BUILD(str,int,int,str) : str`` + - The reference bases, based on the build specified. + - :ref:`refbases_with_build` * - ``BAMTAG(col,str) : str`` - Extract a single substring from an attribute value TAG_VALUE-like field (as in BAM files). - :ref:`bamtag` diff --git a/drivers/src/test/java/org/gorpipe/s3/shared/ITestS3Shared.java b/drivers/src/test/java/org/gorpipe/s3/shared/ITestS3Shared.java index 44408a63..042df1e2 100644 --- a/drivers/src/test/java/org/gorpipe/s3/shared/ITestS3Shared.java +++ b/drivers/src/test/java/org/gorpipe/s3/shared/ITestS3Shared.java @@ -538,8 +538,9 @@ public void testWriteExplicitWrite() throws IOException { String securityContext = createSecurityContext("s3data", Credentials.OwnerType.System, "some_env", S3_KEY, S3_SECRET); String gorRoot = Path.of(workDir.getRoot().toString(), "some_project").toString(); String dataPath = "user_data/dummy2.gor"; + Files.createDirectories(Path.of(gorRoot).resolve("result_cache")); - String result = runGorPipeCLI(String.format("create #x = gorrow chr1,1 | write s3data://shared/%s;\n" + + String result = runGorPipeServer(String.format("create #x = gorrow chr1,1 | write s3data://shared/%s;\n" + "create #y = gor [#x] | calc x 4;\n" + "gor [#y]\n", dataPath), gorRoot, securityContext); diff --git a/gortools/src/main/scala/gorsat/parser/GenomeFunctions.scala b/gortools/src/main/scala/gorsat/parser/GenomeFunctions.scala index 5c9b61a8..49b1b197 100644 --- a/gortools/src/main/scala/gorsat/parser/GenomeFunctions.scala +++ b/gortools/src/main/scala/gorsat/parser/GenomeFunctions.scala @@ -63,6 +63,7 @@ object GenomeFunctions { functions.register("CHARSPHASED2GT", getSignatureStringDouble2String(charsPhased2Gt), charsPhased2Gt _) functions.registerWithOwner("REFBASE", getSignatureStringInt2String(removeOwner(refBase)), refBase _) functions.registerWithOwner("REFBASES", getSignatureStringIntInt2String(removeOwner(refBases)), refBases _) + functions.registerWithOwner("REFBASES_WITH_BUILD", getSignatureStringIntIntString2String(removeOwner(refBases_with_build)), refBases_with_build _) functions.registerWithOwner("GTFA", "String:Int:String:String:Int:String:String:Int:String:String2String", gtfa _ ) functions.registerWithOwner("GTMA", "String:Int:String:String:Int:String:String:Int:String:String2String", gtma _ ) functions.registerWithOwner("GTSTAT", "String:Int:String:String:Int:String:String:Int:String:String2String", gtstat _ ) @@ -99,6 +100,12 @@ object GenomeFunctions { } } + def refBases_with_build(owner: ParseArith, ex1: sFun, ex2: iFun, ex3: iFun, ex4: sFun): sFun = { + cvp => { + owner.context.getSession.getProjectContext.createRefSeq(ex4(cvp)).getBases(ex1(cvp), ex2(cvp), ex3(cvp)) + } + } + def refBase(owner: ParseArith, ex1: sFun, ex2: iFun): sFun = { cvp => { owner.refSeq.getBase(ex1(cvp), ex2(cvp)).toString diff --git a/gortools/src/main/scala/gorsat/parser/ParseArith.scala b/gortools/src/main/scala/gorsat/parser/ParseArith.scala index 2a353a30..7dfdc68e 100644 --- a/gortools/src/main/scala/gorsat/parser/ParseArith.scala +++ b/gortools/src/main/scala/gorsat/parser/ParseArith.scala @@ -105,7 +105,6 @@ class ParseArith(rs: GenomicIterator = null) extends JavaTokenParsers with Seria } def close(): Unit = { - if (refSeq != null) refSeq.close() subFilters.forEach(f => f.close()) } diff --git a/gortools/src/main/scala/gorsat/process/TestSessionFactory.scala b/gortools/src/main/scala/gorsat/process/TestSessionFactory.scala index dfc3d3a6..d7b84bf6 100644 --- a/gortools/src/main/scala/gorsat/process/TestSessionFactory.scala +++ b/gortools/src/main/scala/gorsat/process/TestSessionFactory.scala @@ -56,10 +56,10 @@ class TestSessionFactory(pipeOptions: PipeOptions, whitelistedCmdFiles:String, s val projectContext = projectContextBuilder .setAliasFile(pipeOptions.aliasFile) .setCacheDir(pipeOptions.cacheDir) - .setConfigFile(pipeOptions.configFile) .setLogDirectory(pipeOptions.logDir) .setConfigFile(pipeOptions.configFile) .setRoot(pipeOptions.gorRoot) + //.setConfigFile(if (pipeOptions.configFile != null) pipeOptions.configFile else "../tests/config/gor_unittests_config.txt") .setProjectName("project_10004") .setFileReader(fileReader) .setFileCache(new LocalFileCacheClient(fileReader, pipeOptions.cacheDir, useSubFolder, subFolderSize)) diff --git a/gortools/src/test/java/gorsat/UTestInclude.java b/gortools/src/test/java/gorsat/UTestInclude.java index 8b36830a..4a8d423e 100644 --- a/gortools/src/test/java/gorsat/UTestInclude.java +++ b/gortools/src/test/java/gorsat/UTestInclude.java @@ -21,7 +21,7 @@ public void testInclude() throws IOException { var subpath = workDir.getRoot().toPath().resolve(DataUtil.toFile("subquery", DataType.GORQ)); Files.writeString(subpath, subquery); var query = "def sim = sim;\n include "+subpath.toAbsolutePath()+";\n create yyy = gor [xxx] \n| group chrom -count;\n gor [yyy] | top 1"; - var result = TestUtils.runGorPipe(query); + var result = TestUtils.runGorPipe(new String[]{query, "-cachedir", workDir.getRoot().toString()}, true); Assert.assertEquals("Chrom\tbpStart\tbpStop\tallCount\n" + "chr1\t0\t250000000\t2\n", result); } diff --git a/gortools/src/test/java/gorsat/parser/UTestGenomicFunctions.java b/gortools/src/test/java/gorsat/parser/UTestGenomicFunctions.java index a9b7c2c0..bf7a0163 100644 --- a/gortools/src/test/java/gorsat/parser/UTestGenomicFunctions.java +++ b/gortools/src/test/java/gorsat/parser/UTestGenomicFunctions.java @@ -86,7 +86,19 @@ public void testRefBase() throws IOException { @Test public void testRefBases() { - TestUtils.assertCalculated("refbases('', 0, 0)", "N"); + Assert.assertEquals("cct", TestUtils.getCalculatedWithArgs("refbases('chr1', 10101, 10103)", new String[]{"-config", "../tests/data/ref_mini/gor_config.txt"})); + } + + @Test + public void testRefBasesWithBuild() { + TestUtils.assertCalculated("refbases_with_build('chr1', 10101, 10103, '../tests/data/ref_mini/chromSeq')", "cct"); + // todo test with meaningful input + } + + @Ignore("Read from full build, outside what is in ref_mini, skip as full build is not normally available") + @Test + public void testRefBasesWithBuild2() { + TestUtils.assertCalculated("refbases_with_build('chr1', 1010101, 1010103, '/private/gorkube-mount/csa/ref/hg19/chromSeq')", "ACC"); // todo test with meaningful input } diff --git a/gortools/src/test/scala/gorsat/Script/UTestSplitManager.scala b/gortools/src/test/scala/gorsat/Script/UTestSplitManager.scala index 0a05945a..0072551c 100644 --- a/gortools/src/test/scala/gorsat/Script/UTestSplitManager.scala +++ b/gortools/src/test/scala/gorsat/Script/UTestSplitManager.scala @@ -168,6 +168,13 @@ class UTestSplitManager extends AnyFunSuite { assertResult(SplitManager.SPLIT_REPLACEMENT_PATTERN)(manager.replacementPattern) assertResult(142)(manager.chromosomeSplits.size) } + + test("Create splitmanager from command: pgor split size") { + val query = "pgor " + SplitManager.SPLIT_REPLACEMENT_PATTERN + " -split 10000000 #dbsnp# | top 10" + val manager = createSplitManagerFromCommand(query) + assertResult(SplitManager.SPLIT_REPLACEMENT_PATTERN)(manager.replacementPattern) + assertResult(396)(manager.chromosomeSplits.size) + } test("Create splitmanager from command: pgor force whole chrom") { val query = "pgor " + SplitManager.REGULAR_REPLACEMENT_PATTERN + " #dbsnp# | rank 1000000 pos" diff --git a/model/src/main/java/org/gorpipe/gor/session/ProjectContext.java b/model/src/main/java/org/gorpipe/gor/session/ProjectContext.java index 79a9c832..ea104600 100644 --- a/model/src/main/java/org/gorpipe/gor/session/ProjectContext.java +++ b/model/src/main/java/org/gorpipe/gor/session/ProjectContext.java @@ -174,6 +174,15 @@ public RefSeq createRefSeq() { return refSeqFactory.create(); } + public RefSeq createRefSeq(String chromSeqPath) { + if (refSeqFactory == null) { + createRefSeqFactory(); + } + + // Lets create the default behaviour if it is not set + return refSeqFactory.create(chromSeqPath); + } + public String getProjectRoot() { return root.split("[ \t]+")[0]; } diff --git a/model/src/main/scala/org/gorpipe/model/gor/iterators/RefSeqFactory.scala b/model/src/main/scala/org/gorpipe/model/gor/iterators/RefSeqFactory.scala index d1676911..250f2d8a 100644 --- a/model/src/main/scala/org/gorpipe/model/gor/iterators/RefSeqFactory.scala +++ b/model/src/main/scala/org/gorpipe/model/gor/iterators/RefSeqFactory.scala @@ -24,5 +24,6 @@ package org.gorpipe.model.gor.iterators import org.gorpipe.gor.session.GenericFactory -abstract class RefSeqFactory extends GenericFactory[RefSeq]{ +abstract class RefSeqFactory extends GenericFactory[RefSeq] { + def create(altPath: String): RefSeq } diff --git a/model/src/main/scala/org/gorpipe/model/gor/iterators/RefSeqFromConfig.scala b/model/src/main/scala/org/gorpipe/model/gor/iterators/RefSeqFromChromSeq.scala similarity index 95% rename from model/src/main/scala/org/gorpipe/model/gor/iterators/RefSeqFromConfig.scala rename to model/src/main/scala/org/gorpipe/model/gor/iterators/RefSeqFromChromSeq.scala index 3d23b693..d548a9f6 100644 --- a/model/src/main/scala/org/gorpipe/model/gor/iterators/RefSeqFromConfig.scala +++ b/model/src/main/scala/org/gorpipe/model/gor/iterators/RefSeqFromChromSeq.scala @@ -36,15 +36,12 @@ import org.slf4j.{Logger, LoggerFactory} import java.nio.file.{Files, Path, Paths} -object RefSeqFromConfig { - var download_triggered = false -} - -class RefSeqFromConfig(ipath : String, fileReader : FileReader) extends RefSeq { +class RefSeqFromChromSeq(ipath : String, fileReader : FileReader) extends RefSeq { + protected var download_triggered = false private val GOR_REFSEQ_CACHE_FOLDER = System.getProperty("gor.refseq.cache.folder") private val GOR_REFSEQ_CACHE_DOWNLOAD = Option(System.getProperty("gor.refseq.cache.download", "true")).exists(_.toBoolean) - private val log: Logger = LoggerFactory.getLogger(RefSeqFromConfig.this.getClass) + private val log: Logger = LoggerFactory.getLogger(RefSeqFromChromSeq.this.getClass) lazy val path: String = getBuildPath(ipath) val buffLength = 10000 @@ -69,8 +66,8 @@ class RefSeqFromConfig(ipath : String, fileReader : FileReader) extends RefSeq { if (Files.exists(fullCachePath)) { log.debug("Using cached reference build {}", fullCachePath.toString) return fullCachePath.toString - } else if (GOR_REFSEQ_CACHE_DOWNLOAD && !RefSeqFromConfig.download_triggered) { - RefSeqFromConfig.download_triggered = true // Only trigger download once per client + } else if (GOR_REFSEQ_CACHE_DOWNLOAD && !download_triggered) { + download_triggered = true // Only trigger download once per client triggerRefSeqDownload(fullRefPath, fullCachePath) } } diff --git a/model/src/main/scala/org/gorpipe/model/gor/iterators/RefSeqFromConfigFactory.scala b/model/src/main/scala/org/gorpipe/model/gor/iterators/RefSeqFromConfigFactory.scala index 71281255..600e2a8b 100644 --- a/model/src/main/scala/org/gorpipe/model/gor/iterators/RefSeqFromConfigFactory.scala +++ b/model/src/main/scala/org/gorpipe/model/gor/iterators/RefSeqFromConfigFactory.scala @@ -24,6 +24,19 @@ package org.gorpipe.model.gor.iterators import org.gorpipe.gor.model.FileReader +import java.util +import java.util.Collections + class RefSeqFromConfigFactory(path: String, reader: FileReader) extends RefSeqFactory { - override def create(): RefSeq = new RefSeqFromConfig(path, reader) + var refSeqMap = Collections.synchronizedMap(new util.HashMap[String, RefSeq]()) + + override def create(): RefSeq = { + refSeqMap.computeIfAbsent(path, _ => { + new RefSeqFromChromSeq(path, reader)}) + } + + def create(altPath: String): RefSeq = { + refSeqMap.computeIfAbsent(altPath, _ => { + new RefSeqFromChromSeq(altPath, reader)}) + } } diff --git a/model/src/main/scala/org/gorpipe/model/gor/iterators/RefSeqRotatingFactory.scala b/model/src/main/scala/org/gorpipe/model/gor/iterators/RefSeqRotatingFactory.scala index 809e7026..8a782681 100644 --- a/model/src/main/scala/org/gorpipe/model/gor/iterators/RefSeqRotatingFactory.scala +++ b/model/src/main/scala/org/gorpipe/model/gor/iterators/RefSeqRotatingFactory.scala @@ -24,4 +24,6 @@ package org.gorpipe.model.gor.iterators class RefSeqRotatingFactory extends RefSeqFactory { override def create(): RefSeq = new RefSeqRotating() + + override def create(altPath: String): RefSeq = new RefSeqRotating() } diff --git a/model/src/test/java/org/gorpipe/model/gor/iterators/UTestRefSeqFromConfig.java b/model/src/test/java/org/gorpipe/model/gor/iterators/UTestRefSeqFromConfig.java index fc530b25..35b783b4 100644 --- a/model/src/test/java/org/gorpipe/model/gor/iterators/UTestRefSeqFromConfig.java +++ b/model/src/test/java/org/gorpipe/model/gor/iterators/UTestRefSeqFromConfig.java @@ -37,7 +37,7 @@ public class UTestRefSeqFromConfig { @Test public void testGetRefbase() { - RefSeqFromConfig refseq = new RefSeqFromConfig("../tests/data/ref_mini/chromSeq", new DriverBackedFileReader("")); + RefSeqFromChromSeq refseq = new RefSeqFromChromSeq("../tests/data/ref_mini/chromSeq", new DriverBackedFileReader("")); Assert.assertEquals('C', refseq.getBase("chr1", 101000)); @@ -48,7 +48,7 @@ public void testGetRefbase() { Assert.assertEquals( 'N', refseq.getBase("chr1", 250000)); // Outside from same buffer, with fresh refseq - refseq = new RefSeqFromConfig("../tests/data/ref_mini/chromSeq", new DriverBackedFileReader("")); + refseq = new RefSeqFromChromSeq("../tests/data/ref_mini/chromSeq", new DriverBackedFileReader("")); Assert.assertEquals( 'N', refseq.getBase("chr1", 250001)); } @@ -56,7 +56,7 @@ public void testGetRefbase() { @Test public void testGetRefbases() { - RefSeqFromConfig refseq = new RefSeqFromConfig("../tests/data/ref_mini/chromSeq", new DriverBackedFileReader("")); + RefSeqFromChromSeq refseq = new RefSeqFromChromSeq("../tests/data/ref_mini/chromSeq", new DriverBackedFileReader("")); Assert.assertEquals("C", refseq.getBases("chr1", 101000, 101000)); @@ -74,7 +74,7 @@ public void testGetRefbases() { Assert.assertEquals( "NN", refseq.getBases("chr1", 250001, 250002)); // Outside from same buffer, with fresh refseq - refseq = new RefSeqFromConfig("../tests/data/ref_mini/chromSeq", new DriverBackedFileReader("")); + refseq = new RefSeqFromChromSeq("../tests/data/ref_mini/chromSeq", new DriverBackedFileReader("")); Assert.assertEquals( "NN", refseq.getBases("chr1", 250001, 250002)); } @@ -83,7 +83,7 @@ public void testGetRefbases() { @Test public void testGetRefbasesPerformance() { long startTime; - RefSeqFromConfig refseq = new RefSeqFromConfig("../tests/data/ref_mini/chromSeq", new DriverBackedFileReader("")); + RefSeqFromChromSeq refseq = new RefSeqFromChromSeq("../tests/data/ref_mini/chromSeq", new DriverBackedFileReader("")); // Prep buffers. refseq.getBase("chr1", 100001); @@ -107,7 +107,7 @@ public void testGetRefbasesPerformance() { public void testGetFullCachePath() { var refPath = "../tests/data/ref_mini/chromSeq"; var fullRefPath = Path.of(refPath).toAbsolutePath(); - RefSeqFromConfig refseq = new RefSeqFromConfig(refPath, new DriverBackedFileReader("")); + RefSeqFromChromSeq refseq = new RefSeqFromChromSeq(refPath, new DriverBackedFileReader("")); Assert.assertEquals("/tmp/cache/ref_mini/chromSeq", refseq.getFullCachePath(fullRefPath).toString()); } @@ -118,9 +118,11 @@ public void testGetRefbaseFromCache() throws InterruptedException { System.setProperty("gor.refseq.cache.download", "True"); System.setProperty("gor.refseq.cache.folder", workDirPath.resolve("cache").toString()); - RefSeqFromConfig.download_triggered_$eq(false); - RefSeqFromConfig refseq = new RefSeqFromConfig("../tests/data/ref_mini/chromSeq", new DriverBackedFileReader("")); + RefSeqFromChromSeq refseq = new RefSeqFromChromSeq("../tests/data/ref_mini/chromSeq", new DriverBackedFileReader("")); + + refseq.download_triggered_$eq(false); + Assert.assertEquals('C', refseq.getBase("chr1", 101000)); diff --git a/test/src/main/java/gorsat/TestUtils.java b/test/src/main/java/gorsat/TestUtils.java index 5b869488..90904035 100644 --- a/test/src/main/java/gorsat/TestUtils.java +++ b/test/src/main/java/gorsat/TestUtils.java @@ -28,6 +28,7 @@ import gorsat.Utilities.MacroUtilities; import gorsat.process.*; import org.apache.commons.lang.SystemUtils; +import org.apache.commons.lang3.ArrayUtils; import org.gorpipe.exceptions.GorSystemException; import org.gorpipe.gor.driver.meta.DataType; import org.gorpipe.gor.model.GenomicIterator; @@ -467,6 +468,12 @@ public static String getCalculated(String expression) { return result[result.length - 1].replace("\n", ""); } + public static String getCalculatedWithArgs(String expression, String[] args) { + String query = "gor 1.mem | select 1,2 | top 1 | calc NEWCOL " + expression + " | top 1"; + String[] result = runGorPipe(ArrayUtils.addFirst(args, query), true).split("\t"); + return result[result.length - 1].replace("\n", ""); + } + public static void assertCalculated(String expression, String expectedResult) { String resultValue = getCalculated(expression); Assert.assertEquals("Expression: " + expression, expectedResult, resultValue);