From 335a711bca3ee2ee227a5680be8fb03bde82e90c Mon Sep 17 00:00:00 2001 From: Patrick Golden Date: Tue, 16 Dec 2025 09:56:52 -0500 Subject: [PATCH] Add option to split output according to predicates This commit adds a `--split-with-predicates` flag which changes the behavior of the `--split` flag. Typically, the output is split along a subject prefix and an object prefix, e.g. `MONDO-to-NCIT.sssom.tsv`. When the `--split-with-predicates` flag is passed, it also includes the CURIE of the relation in the name of the output split file, e.g. `MONDO-skos_exactMatch-NCIT.sssom.tsv`. (Note that the colon in the CURIE has been replaced by an underscore, since colons cannot appear in filenames in Windows). This (almost) matches the behavior of sssom-py: The difference being that sssom-py only includes the local name of the predicate, while this pull request includes the full CURIE. --- .../obofoundry/sssom/cli/SimpleCLI.java | 23 ++++++++++++++++--- 1 file changed, 20 insertions(+), 3 deletions(-) diff --git a/cli/src/main/java/org/incenp/obofoundry/sssom/cli/SimpleCLI.java b/cli/src/main/java/org/incenp/obofoundry/sssom/cli/SimpleCLI.java index 29c9bf8..c12b41d 100644 --- a/cli/src/main/java/org/incenp/obofoundry/sssom/cli/SimpleCLI.java +++ b/cli/src/main/java/org/incenp/obofoundry/sssom/cli/SimpleCLI.java @@ -198,6 +198,11 @@ private static class OutputOptions { description = "Split the set along subject and object prefix names and write the split sets in the specified directory.") String splitDirectory; + @Option(names = "--split-with-predicates", + description = "When splitting, include the predicate CURIE in the split identifier.") + boolean splitWithPredicates; + + @Option(names = { "-c", "--force-cardinality" }, hidden = true, description = "Include mapping cardinality values.") @@ -646,7 +651,7 @@ private void writeOutput(MappingSet set) { } if ( outputOpts.splitDirectory != null ) { - writeSplitSet(set, outputOpts.splitDirectory); + writeSplitSet(set, outputOpts.splitDirectory, outputOpts.splitWithPredicates); return; // Skip writing the full set when writing splits } boolean stdout = outputOpts.file.equals("-"); @@ -660,7 +665,7 @@ private void writeOutput(MappingSet set) { } } - private void writeSplitSet(MappingSet ms, String directory) { + private void writeSplitSet(MappingSet ms, String directory, boolean splitWithPredicates) { File dir = new File(directory); if ( !dir.isDirectory() && !dir.mkdirs() ) { helper.error("cannot create directory %s", directory); @@ -677,7 +682,19 @@ private void writeSplitSet(MappingSet ms, String directory) { String subjectPrefixName = pm.getPrefixName(mapping.getSubjectId()); String objectPrefixName = pm.getPrefixName(mapping.getObjectId()); if ( subjectPrefixName != null && objectPrefixName != null ) { - String splitId = subjectPrefixName + "-to-" + objectPrefixName; + String splitId; + if (splitWithPredicates) { + String predicatePrefixName = pm.getPrefixName(mapping.getPredicateId()); + if (predicatePrefixName != null) { + splitId = subjectPrefixName + "-" + pm.shortenIdentifier(mapping.getPredicateId()) + "-" + objectPrefixName; + splitId = splitId.replace(":", "_"); + } else { + splitId = subjectPrefixName + "-to-" + objectPrefixName; + } + } else { + splitId = subjectPrefixName + "-to-" + objectPrefixName; + } + mappingsBySplit.computeIfAbsent(splitId, k -> new ArrayList()).add(mapping); } }