diff --git a/mmif/utils/cli/source.py b/mmif/utils/cli/source.py index 7e662f9f..154c315f 100644 --- a/mmif/utils/cli/source.py +++ b/mmif/utils/cli/source.py @@ -206,7 +206,13 @@ def generate_source_mmif_from_file(documents, prefix=None, scheme='file', **igno location = str(location) doc = Document() doc.at_type = at_types[mime.split('/', maxsplit=1)[0]] - doc.properties.location = f"{location_uri.scheme}://{location if not location.startswith(location_uri.scheme) else location[len(location_uri.scheme)+3:]}" + # Use the scheme parameter if urlparse misinterpreted the location string + if location_uri.scheme == scheme or location_uri.scheme == 'file': + # Correct scheme was used, build location with parsed scheme + doc.properties.location = f"{location_uri.scheme}://{location if not location.startswith(location_uri.scheme + '://') else location[len(location_uri.scheme)+3:]}" + else: + # urlparse incorrectly interpreted part of location as a scheme, use the scheme parameter instead + doc.properties.location = f"{scheme}://{location}" doc.properties.id = f'd{doc_id}' doc.properties.mime = mime pl.add_document(doc) diff --git a/mmif/ver.py b/mmif/ver.py new file mode 100644 index 00000000..bc348c0a --- /dev/null +++ b/mmif/ver.py @@ -0,0 +1,2 @@ +__version__ = "1.0.0" +__specver__ = "1.1.0" diff --git a/tests/test_utils_cli.py b/tests/test_utils_cli.py index adb8e857..fde14f66 100644 --- a/tests/test_utils_cli.py +++ b/tests/test_utils_cli.py @@ -114,6 +114,18 @@ def test_generate_mixed_scheme(self): self.assertTrue('baapb' in schemes) self.assertTrue('file' in schemes) + def test_scheme_with_colon_in_location(self): + # Test for bug fix: location with colon should not be misinterpreted as a scheme + self.scheme = 'baapb' + self.docs.append("video:cpb-aacip-507-v40js9j432:video") + source_mmif = Mmif(self.generate_source_mmif()) + self.assertEqual(len(source_mmif.documents), 1) + doc = source_mmif.documents[0] + # Verify the scheme is correct + self.assertEqual(doc.location_scheme(), 'baapb') + # Verify the full location preserves the original identifier with colon + self.assertEqual(doc.location, 'baapb://cpb-aacip-507-v40js9j432:video') + class TestRewind(unittest.TestCase): def setUp(self):