Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions documentation/src/command/FORMULAS.rst
Original file line number Diff line number Diff line change
Expand Up @@ -276,6 +276,8 @@ Genomic-Specific Functions
+-------------------------------------------------------+---------------------------------------------------------------------------------------------------------------------------------+
| ``REFBASES(str,int,int) : str`` | The reference bases, based on the build specified in the gor_config.txt file. |
+-------------------------------------------------------+---------------------------------------------------------------------------------------------------------------------------------+
| ``REFBASES_WITH_BUILD(str,int,int,str) : str`` | The reference bases, based on the build supplied. |
+-------------------------------------------------------+---------------------------------------------------------------------------------------------------------------------------------+
| ``BAMTAG(col,str) : str`` | Extract a single substring from an attribute value TAG_VALUE-like field (as in BAM files). |
+-------------------------------------------------------+---------------------------------------------------------------------------------------------------------------------------------+
| ``TAG(col,str,sep) : str`` | Extract a single substring from an attribute value field (as in GFF or VCF files, e.g. use semicolon ';' as separator). |
Expand Down
3 changes: 3 additions & 0 deletions documentation/src/functions.rst
Original file line number Diff line number Diff line change
Expand Up @@ -468,6 +468,9 @@ Genomic-Specific Functions
* - ``REFBASES(str,int,int) : str``
- The reference bases, based on the build specified in the gor_config.txt file.
- :ref:`refbases`
* - ``REFBASES_WITH_BUILD(str,int,int,str) : str``
- The reference bases, based on the build specified.
- :ref:`refbases_with_build`
* - ``BAMTAG(col,str) : str``
- Extract a single substring from an attribute value TAG_VALUE-like field (as in BAM files).
- :ref:`bamtag`
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -538,8 +538,9 @@ public void testWriteExplicitWrite() throws IOException {
String securityContext = createSecurityContext("s3data", Credentials.OwnerType.System, "some_env", S3_KEY, S3_SECRET);
String gorRoot = Path.of(workDir.getRoot().toString(), "some_project").toString();
String dataPath = "user_data/dummy2.gor";
Files.createDirectories(Path.of(gorRoot).resolve("result_cache"));

String result = runGorPipeCLI(String.format("create #x = gorrow chr1,1 | write s3data://shared/%s;\n" +
String result = runGorPipeServer(String.format("create #x = gorrow chr1,1 | write s3data://shared/%s;\n" +
"create #y = gor [#x] | calc x 4;\n" +
"gor [#y]\n", dataPath), gorRoot, securityContext);

Expand Down
7 changes: 7 additions & 0 deletions gortools/src/main/scala/gorsat/parser/GenomeFunctions.scala
Original file line number Diff line number Diff line change
Expand Up @@ -63,6 +63,7 @@ object GenomeFunctions {
functions.register("CHARSPHASED2GT", getSignatureStringDouble2String(charsPhased2Gt), charsPhased2Gt _)
functions.registerWithOwner("REFBASE", getSignatureStringInt2String(removeOwner(refBase)), refBase _)
functions.registerWithOwner("REFBASES", getSignatureStringIntInt2String(removeOwner(refBases)), refBases _)
functions.registerWithOwner("REFBASES_WITH_BUILD", getSignatureStringIntIntString2String(removeOwner(refBases_with_build)), refBases_with_build _)
functions.registerWithOwner("GTFA", "String:Int:String:String:Int:String:String:Int:String:String2String", gtfa _ )
functions.registerWithOwner("GTMA", "String:Int:String:String:Int:String:String:Int:String:String2String", gtma _ )
functions.registerWithOwner("GTSTAT", "String:Int:String:String:Int:String:String:Int:String:String2String", gtstat _ )
Expand Down Expand Up @@ -99,6 +100,12 @@ object GenomeFunctions {
}
}

def refBases_with_build(owner: ParseArith, ex1: sFun, ex2: iFun, ex3: iFun, ex4: sFun): sFun = {
cvp => {
owner.context.getSession.getProjectContext.createRefSeq(ex4(cvp)).getBases(ex1(cvp), ex2(cvp), ex3(cvp))
}
}

def refBase(owner: ParseArith, ex1: sFun, ex2: iFun): sFun = {
cvp => {
owner.refSeq.getBase(ex1(cvp), ex2(cvp)).toString
Expand Down
1 change: 0 additions & 1 deletion gortools/src/main/scala/gorsat/parser/ParseArith.scala
Original file line number Diff line number Diff line change
Expand Up @@ -105,7 +105,6 @@ class ParseArith(rs: GenomicIterator = null) extends JavaTokenParsers with Seria
}

def close(): Unit = {
if (refSeq != null) refSeq.close()
subFilters.forEach(f => f.close())
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -56,10 +56,10 @@ class TestSessionFactory(pipeOptions: PipeOptions, whitelistedCmdFiles:String, s
val projectContext = projectContextBuilder
.setAliasFile(pipeOptions.aliasFile)
.setCacheDir(pipeOptions.cacheDir)
.setConfigFile(pipeOptions.configFile)
.setLogDirectory(pipeOptions.logDir)
.setConfigFile(pipeOptions.configFile)
.setRoot(pipeOptions.gorRoot)
//.setConfigFile(if (pipeOptions.configFile != null) pipeOptions.configFile else "../tests/config/gor_unittests_config.txt")
.setProjectName("project_10004")
.setFileReader(fileReader)
.setFileCache(new LocalFileCacheClient(fileReader, pipeOptions.cacheDir, useSubFolder, subFolderSize))
Expand Down
2 changes: 1 addition & 1 deletion gortools/src/test/java/gorsat/UTestInclude.java
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ public void testInclude() throws IOException {
var subpath = workDir.getRoot().toPath().resolve(DataUtil.toFile("subquery", DataType.GORQ));
Files.writeString(subpath, subquery);
var query = "def sim = sim;\n include "+subpath.toAbsolutePath()+";\n create yyy = gor [xxx] \n| group chrom -count;\n gor [yyy] | top 1";
var result = TestUtils.runGorPipe(query);
var result = TestUtils.runGorPipe(new String[]{query, "-cachedir", workDir.getRoot().toString()}, true);
Assert.assertEquals("Chrom\tbpStart\tbpStop\tallCount\n" +
"chr1\t0\t250000000\t2\n", result);
}
Expand Down
14 changes: 13 additions & 1 deletion gortools/src/test/java/gorsat/parser/UTestGenomicFunctions.java
Original file line number Diff line number Diff line change
Expand Up @@ -86,7 +86,19 @@ public void testRefBase() throws IOException {

@Test
public void testRefBases() {
TestUtils.assertCalculated("refbases('', 0, 0)", "N");
Assert.assertEquals("cct", TestUtils.getCalculatedWithArgs("refbases('chr1', 10101, 10103)", new String[]{"-config", "../tests/data/ref_mini/gor_config.txt"}));
}

@Test
public void testRefBasesWithBuild() {
TestUtils.assertCalculated("refbases_with_build('chr1', 10101, 10103, '../tests/data/ref_mini/chromSeq')", "cct");
// todo test with meaningful input
}

@Ignore("Read from full build, outside what is in ref_mini, skip as full build is not normally available")
@Test
public void testRefBasesWithBuild2() {
TestUtils.assertCalculated("refbases_with_build('chr1', 1010101, 1010103, '/private/gorkube-mount/csa/ref/hg19/chromSeq')", "ACC");
// todo test with meaningful input
}

Expand Down
7 changes: 7 additions & 0 deletions gortools/src/test/scala/gorsat/Script/UTestSplitManager.scala
Original file line number Diff line number Diff line change
Expand Up @@ -168,6 +168,13 @@ class UTestSplitManager extends AnyFunSuite {
assertResult(SplitManager.SPLIT_REPLACEMENT_PATTERN)(manager.replacementPattern)
assertResult(142)(manager.chromosomeSplits.size)
}

test("Create splitmanager from command: pgor split size") {
val query = "pgor " + SplitManager.SPLIT_REPLACEMENT_PATTERN + " -split 10000000 #dbsnp# | top 10"
val manager = createSplitManagerFromCommand(query)
assertResult(SplitManager.SPLIT_REPLACEMENT_PATTERN)(manager.replacementPattern)
assertResult(396)(manager.chromosomeSplits.size)
}

test("Create splitmanager from command: pgor force whole chrom") {
val query = "pgor " + SplitManager.REGULAR_REPLACEMENT_PATTERN + " #dbsnp# | rank 1000000 pos"
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -174,6 +174,15 @@ public RefSeq createRefSeq() {
return refSeqFactory.create();
}

public RefSeq createRefSeq(String chromSeqPath) {
if (refSeqFactory == null) {
createRefSeqFactory();
}

// Lets create the default behaviour if it is not set
return refSeqFactory.create(chromSeqPath);
}

public String getProjectRoot() {
return root.split("[ \t]+")[0];
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -24,5 +24,6 @@ package org.gorpipe.model.gor.iterators

import org.gorpipe.gor.session.GenericFactory

abstract class RefSeqFactory extends GenericFactory[RefSeq]{
abstract class RefSeqFactory extends GenericFactory[RefSeq] {
def create(altPath: String): RefSeq
}
Original file line number Diff line number Diff line change
Expand Up @@ -36,15 +36,12 @@ import org.slf4j.{Logger, LoggerFactory}

import java.nio.file.{Files, Path, Paths}

object RefSeqFromConfig {
var download_triggered = false
}

class RefSeqFromConfig(ipath : String, fileReader : FileReader) extends RefSeq {
class RefSeqFromChromSeq(ipath : String, fileReader : FileReader) extends RefSeq {
protected var download_triggered = false
private val GOR_REFSEQ_CACHE_FOLDER = System.getProperty("gor.refseq.cache.folder")
private val GOR_REFSEQ_CACHE_DOWNLOAD = Option(System.getProperty("gor.refseq.cache.download", "true")).exists(_.toBoolean)

private val log: Logger = LoggerFactory.getLogger(RefSeqFromConfig.this.getClass)
private val log: Logger = LoggerFactory.getLogger(RefSeqFromChromSeq.this.getClass)

lazy val path: String = getBuildPath(ipath)
val buffLength = 10000
Expand All @@ -69,8 +66,8 @@ class RefSeqFromConfig(ipath : String, fileReader : FileReader) extends RefSeq {
if (Files.exists(fullCachePath)) {
log.debug("Using cached reference build {}", fullCachePath.toString)
return fullCachePath.toString
} else if (GOR_REFSEQ_CACHE_DOWNLOAD && !RefSeqFromConfig.download_triggered) {
RefSeqFromConfig.download_triggered = true // Only trigger download once per client
} else if (GOR_REFSEQ_CACHE_DOWNLOAD && !download_triggered) {
download_triggered = true // Only trigger download once per client
triggerRefSeqDownload(fullRefPath, fullCachePath)
}
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,19 @@ package org.gorpipe.model.gor.iterators

import org.gorpipe.gor.model.FileReader

import java.util
import java.util.Collections

class RefSeqFromConfigFactory(path: String, reader: FileReader) extends RefSeqFactory {
override def create(): RefSeq = new RefSeqFromConfig(path, reader)
var refSeqMap = Collections.synchronizedMap(new util.HashMap[String, RefSeq]())

override def create(): RefSeq = {
refSeqMap.computeIfAbsent(path, _ => {
new RefSeqFromChromSeq(path, reader)})
}

def create(altPath: String): RefSeq = {
refSeqMap.computeIfAbsent(altPath, _ => {
new RefSeqFromChromSeq(altPath, reader)})
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -24,4 +24,6 @@ package org.gorpipe.model.gor.iterators

class RefSeqRotatingFactory extends RefSeqFactory {
override def create(): RefSeq = new RefSeqRotating()

override def create(altPath: String): RefSeq = new RefSeqRotating()
}
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@ public class UTestRefSeqFromConfig {
@Test
public void testGetRefbase() {

RefSeqFromConfig refseq = new RefSeqFromConfig("../tests/data/ref_mini/chromSeq", new DriverBackedFileReader(""));
RefSeqFromChromSeq refseq = new RefSeqFromChromSeq("../tests/data/ref_mini/chromSeq", new DriverBackedFileReader(""));

Assert.assertEquals('C', refseq.getBase("chr1", 101000));

Expand All @@ -48,15 +48,15 @@ public void testGetRefbase() {
Assert.assertEquals( 'N', refseq.getBase("chr1", 250000));

// Outside from same buffer, with fresh refseq
refseq = new RefSeqFromConfig("../tests/data/ref_mini/chromSeq", new DriverBackedFileReader(""));
refseq = new RefSeqFromChromSeq("../tests/data/ref_mini/chromSeq", new DriverBackedFileReader(""));
Assert.assertEquals( 'N', refseq.getBase("chr1", 250001));
}


@Test
public void testGetRefbases() {

RefSeqFromConfig refseq = new RefSeqFromConfig("../tests/data/ref_mini/chromSeq", new DriverBackedFileReader(""));
RefSeqFromChromSeq refseq = new RefSeqFromChromSeq("../tests/data/ref_mini/chromSeq", new DriverBackedFileReader(""));

Assert.assertEquals("C", refseq.getBases("chr1", 101000, 101000));

Expand All @@ -74,7 +74,7 @@ public void testGetRefbases() {
Assert.assertEquals( "NN", refseq.getBases("chr1", 250001, 250002));

// Outside from same buffer, with fresh refseq
refseq = new RefSeqFromConfig("../tests/data/ref_mini/chromSeq", new DriverBackedFileReader(""));
refseq = new RefSeqFromChromSeq("../tests/data/ref_mini/chromSeq", new DriverBackedFileReader(""));
Assert.assertEquals( "NN", refseq.getBases("chr1", 250001, 250002));

}
Expand All @@ -83,7 +83,7 @@ public void testGetRefbases() {
@Test
public void testGetRefbasesPerformance() {
long startTime;
RefSeqFromConfig refseq = new RefSeqFromConfig("../tests/data/ref_mini/chromSeq", new DriverBackedFileReader(""));
RefSeqFromChromSeq refseq = new RefSeqFromChromSeq("../tests/data/ref_mini/chromSeq", new DriverBackedFileReader(""));

// Prep buffers.
refseq.getBase("chr1", 100001);
Expand All @@ -107,7 +107,7 @@ public void testGetRefbasesPerformance() {
public void testGetFullCachePath() {
var refPath = "../tests/data/ref_mini/chromSeq";
var fullRefPath = Path.of(refPath).toAbsolutePath();
RefSeqFromConfig refseq = new RefSeqFromConfig(refPath, new DriverBackedFileReader(""));
RefSeqFromChromSeq refseq = new RefSeqFromChromSeq(refPath, new DriverBackedFileReader(""));
Assert.assertEquals("/tmp/cache/ref_mini/chromSeq", refseq.getFullCachePath(fullRefPath).toString());
}

Expand All @@ -118,9 +118,11 @@ public void testGetRefbaseFromCache() throws InterruptedException {

System.setProperty("gor.refseq.cache.download", "True");
System.setProperty("gor.refseq.cache.folder", workDirPath.resolve("cache").toString());
RefSeqFromConfig.download_triggered_$eq(false);

RefSeqFromConfig refseq = new RefSeqFromConfig("../tests/data/ref_mini/chromSeq", new DriverBackedFileReader(""));
RefSeqFromChromSeq refseq = new RefSeqFromChromSeq("../tests/data/ref_mini/chromSeq", new DriverBackedFileReader(""));

refseq.download_triggered_$eq(false);


Assert.assertEquals('C', refseq.getBase("chr1", 101000));

Expand Down
7 changes: 7 additions & 0 deletions test/src/main/java/gorsat/TestUtils.java
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@
import gorsat.Utilities.MacroUtilities;
import gorsat.process.*;
import org.apache.commons.lang.SystemUtils;
import org.apache.commons.lang3.ArrayUtils;
import org.gorpipe.exceptions.GorSystemException;
import org.gorpipe.gor.driver.meta.DataType;
import org.gorpipe.gor.model.GenomicIterator;
Expand Down Expand Up @@ -467,6 +468,12 @@ public static String getCalculated(String expression) {
return result[result.length - 1].replace("\n", "");
}

public static String getCalculatedWithArgs(String expression, String[] args) {
String query = "gor 1.mem | select 1,2 | top 1 | calc NEWCOL " + expression + " | top 1";
String[] result = runGorPipe(ArrayUtils.addFirst(args, query), true).split("\t");
return result[result.length - 1].replace("\n", "");
}

public static void assertCalculated(String expression, String expectedResult) {
String resultValue = getCalculated(expression);
Assert.assertEquals("Expression: " + expression, expectedResult, resultValue);
Expand Down
Loading