From 58181d3c8e98fc0b2f2a11cbe8b9b115e22b428f Mon Sep 17 00:00:00 2001 From: mastikamastika Date: Fri, 19 Dec 2025 01:14:22 +0300 Subject: [PATCH 1/7] Handle unbound XML prefixes when strict=False --- didl_lite/didl_lite.py | 30 ++++++++++++++++++++++++++++-- 1 file changed, 28 insertions(+), 2 deletions(-) diff --git a/didl_lite/didl_lite.py b/didl_lite/didl_lite.py index a890e44..def2ec6 100644 --- a/didl_lite/didl_lite.py +++ b/didl_lite/didl_lite.py @@ -1072,8 +1072,34 @@ def to_xml_string(*objects: DidlObject) -> bytes: def from_xml_string( xml_string: str, strict: bool = True -) -> List[Union[DidlObject, Descriptor]]: - """Convert XML string to DIDL Objects.""" +) -> List[Union[DidlObject]]: + """Parse DIDL-Lite XML string. + + :param xml_string: The XML string to parse. + :param strict: Whether to use strict parsing. + :return: List of DidlObjects. + :""" + + if not strict: + import re + # Find all prefixes used in tags, e.g., + used_prefixes = set(re.findall(r'<([a-zA-Z0-9]+):', xml_string)) + + # Find all defined namespaces, e.g., xmlns:prefix=... + defined_prefixes = set(re.findall(r'xmlns:([a-zA-Z0-9]+)=', xml_string)) + + # Identify prefixes used but not defined. + # Exclude known namespaces that might be handled globally or are standard. + missing_prefixes = used_prefixes - defined_prefixes - {'DIDL-Lite', 'dc', 'upnp', 'dlna'} + + if missing_prefixes: + for prefix in missing_prefixes: + # Inject a temporary namespace definition for each missing prefix. + # We anchor the injection next to the standard dlna namespace. + replacement = f'xmlns:dlna="urn:schemas-dlna-org:metadata-1-0/" xmlns:{prefix}="http://tempuri.org/{prefix}/"' + xml_string = xml_string.replace('xmlns:dlna="urn:schemas-dlna-org:metadata-1-0/"', replacement) + + # Proceed with parsing using the (potentially) patched xml_string xml_el = defusedxml.ElementTree.fromstring(xml_string) return from_xml_el(xml_el, strict) From 10bceb31bdd5e5dd6dd02f1d91088c25638c5485 Mon Sep 17 00:00:00 2001 From: mastikamastika Date: Fri, 19 Dec 2025 01:45:49 +0300 Subject: [PATCH 2/7] Simplify code by removing redundant if-check for missing prefixes --- didl_lite/didl_lite.py | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) diff --git a/didl_lite/didl_lite.py b/didl_lite/didl_lite.py index def2ec6..b02e42c 100644 --- a/didl_lite/didl_lite.py +++ b/didl_lite/didl_lite.py @@ -1089,15 +1089,14 @@ def from_xml_string( defined_prefixes = set(re.findall(r'xmlns:([a-zA-Z0-9]+)=', xml_string)) # Identify prefixes used but not defined. - # Exclude known namespaces that might be handled globally or are standard. missing_prefixes = used_prefixes - defined_prefixes - {'DIDL-Lite', 'dc', 'upnp', 'dlna'} - if missing_prefixes: - for prefix in missing_prefixes: - # Inject a temporary namespace definition for each missing prefix. - # We anchor the injection next to the standard dlna namespace. - replacement = f'xmlns:dlna="urn:schemas-dlna-org:metadata-1-0/" xmlns:{prefix}="http://tempuri.org/{prefix}/"' - xml_string = xml_string.replace('xmlns:dlna="urn:schemas-dlna-org:metadata-1-0/"', replacement) + # Remove the "if missing_prefixes:" line and just keep the for loop + for prefix in missing_prefixes: + dlna_ns = 'xmlns:dlna="urn:schemas-dlna-org:metadata-1-0/"' + if dlna_ns in xml_string: + replacement = f'{dlna_ns} xmlns:{prefix}="http://tempuri.org/{prefix}/"' + xml_string = xml_string.replace(dlna_ns, replacement) # Proceed with parsing using the (potentially) patched xml_string xml_el = defusedxml.ElementTree.fromstring(xml_string) From 4f20bde065ce405da52f776a64c6b42c5ab05949 Mon Sep 17 00:00:00 2001 From: mastikamastika Date: Fri, 19 Dec 2025 01:59:21 +0300 Subject: [PATCH 3/7] Fix test case XML namespace and verify unbound prefix handling --- tests/test_didl_lite.py | 23 +++++++++++++++++++++++ 1 file changed, 23 insertions(+) diff --git a/tests/test_didl_lite.py b/tests/test_didl_lite.py index f16787c..1fc6935 100644 --- a/tests/test_didl_lite.py +++ b/tests/test_didl_lite.py @@ -680,3 +680,26 @@ def test_item_improper_class_nesting(self) -> None: item = items[0] assert isinstance(item, didl_lite.MusicTrack) + + def test_from_xml_string_unbound_prefix(self): + # The key fix is adding: xmlns="urn:schemas-upnp-org:metadata-1-0/DIDL-Lite/" + broken_xml = ( + '' + '' + 'Test Title' + 'Test Subtitle' + 'object.item.audioItem.musicTrack' + '' + '' + ) + + # Bu çağrı strict=False iken ParseError fırlatmamalıdır + objs = didl_lite.from_xml_string(broken_xml, strict=False) + + assert len(objs) == 1 + assert objs[0].title == "Test Title" + # Geçici namespace'in doğru atandığını kontrol edebilirsin + assert "sub_title" in objs[0].__dict__ From b4c98f292c379fd42fda72a8ff1a7e35d0644887 Mon Sep 17 00:00:00 2001 From: Steven Looman Date: Wed, 24 Dec 2025 21:44:44 +0100 Subject: [PATCH 4/7] Fix linting etc --- didl_lite/didl_lite.py | 20 ++++++++------------ pylintrc | 2 +- tests/test_didl_lite.py | 19 +++++++++++-------- tox.ini | 2 +- 4 files changed, 21 insertions(+), 22 deletions(-) diff --git a/didl_lite/didl_lite.py b/didl_lite/didl_lite.py index b02e42c..52718f9 100644 --- a/didl_lite/didl_lite.py +++ b/didl_lite/didl_lite.py @@ -2,6 +2,7 @@ """DIDL-Lite (Digital Item Declaration Language) tools for Python.""" # pylint: disable=too-many-lines +import re from typing import ( Any, Dict, @@ -1072,24 +1073,19 @@ def to_xml_string(*objects: DidlObject) -> bytes: def from_xml_string( xml_string: str, strict: bool = True -) -> List[Union[DidlObject]]: - """Parse DIDL-Lite XML string. - - :param xml_string: The XML string to parse. - :param strict: Whether to use strict parsing. - :return: List of DidlObjects. - :""" - +) -> List[Union[DidlObject, Descriptor]]: + """Parse DIDL-Lite XML string.""" if not strict: - import re # Find all prefixes used in tags, e.g., - used_prefixes = set(re.findall(r'<([a-zA-Z0-9]+):', xml_string)) + used_prefixes = set(re.findall(r"<([a-zA-Z0-9]+):", xml_string)) # Find all defined namespaces, e.g., xmlns:prefix=... - defined_prefixes = set(re.findall(r'xmlns:([a-zA-Z0-9]+)=', xml_string)) + defined_prefixes = set(re.findall(r"xmlns:([a-zA-Z0-9]+)=", xml_string)) # Identify prefixes used but not defined. - missing_prefixes = used_prefixes - defined_prefixes - {'DIDL-Lite', 'dc', 'upnp', 'dlna'} + missing_prefixes = ( + used_prefixes - defined_prefixes - {"DIDL-Lite", "dc", "upnp", "dlna"} + ) # Remove the "if missing_prefixes:" line and just keep the for loop for prefix in missing_prefixes: diff --git a/pylintrc b/pylintrc index b9da209..360b504 100644 --- a/pylintrc +++ b/pylintrc @@ -1,2 +1,2 @@ [BASIC] -good-names=otherItem, storageMedium +good-names=otherItem, storageMedium diff --git a/tests/test_didl_lite.py b/tests/test_didl_lite.py index 1fc6935..9f88979 100644 --- a/tests/test_didl_lite.py +++ b/tests/test_didl_lite.py @@ -681,7 +681,8 @@ def test_item_improper_class_nesting(self) -> None: item = items[0] assert isinstance(item, didl_lite.MusicTrack) - def test_from_xml_string_unbound_prefix(self): + def test_from_xml_string_unbound_prefix(self) -> None: + """Test from_xml_string with unbound namespace prefix.""" # The key fix is adding: xmlns="urn:schemas-upnp-org:metadata-1-0/DIDL-Lite/" broken_xml = ( '' '' - 'Test Title' - 'Test Subtitle' - 'object.item.audioItem.musicTrack' - '' - '' + "Test Title" + "Test Subtitle" + "object.item.audioItem.musicTrack" + "" + "" ) - # Bu çağrı strict=False iken ParseError fırlatmamalıdır + # This call should not throw ParseError when strict=False. objs = didl_lite.from_xml_string(broken_xml, strict=False) assert len(objs) == 1 assert objs[0].title == "Test Title" - # Geçici namespace'in doğru atandığını kontrol edebilirsin + # Check that the temporary namespace is correctly assigned. assert "sub_title" in objs[0].__dict__ + assert objs[0].sub_title == "Test Subtitle" + assert isinstance(objs[0], didl_lite.MusicTrack) diff --git a/tox.ini b/tox.ini index 5c4fdbd..8e369dc 100644 --- a/tox.ini +++ b/tox.ini @@ -1,6 +1,6 @@ [tox] envlist = py38, py39, py310, py311, py312, flake8, pylint, codespell, typing, black - + [gh-actions] python = 3.8: py38 From 5f8000d51d47924d36242be043c366ede656569c Mon Sep 17 00:00:00 2001 From: mastikamastika Date: Sat, 27 Dec 2025 02:24:59 +0300 Subject: [PATCH 5/7] Add artist and genre to MusicTrack and include unit tests --- tests/test_didl_lite.py | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/tests/test_didl_lite.py b/tests/test_didl_lite.py index 9f88979..a328158 100644 --- a/tests/test_didl_lite.py +++ b/tests/test_didl_lite.py @@ -706,3 +706,12 @@ def test_from_xml_string_unbound_prefix(self) -> None: assert "sub_title" in objs[0].__dict__ assert objs[0].sub_title == "Test Subtitle" assert isinstance(objs[0], didl_lite.MusicTrack) + +def test_music_track_artist_and_genre(): + from didl_lite.didl_lite import MusicTrack, to_xml_string + track = MusicTrack(id="1", parent_id="0", title="Test", restricted="0", artist="My Artist", genre="My Genre") + + xml = to_xml_string(track) + + assert b"My Artist" in xml + assert b"My Genre" in xml From 348da742be6ac4ed88be479b2f3265a3124e4c0f Mon Sep 17 00:00:00 2001 From: Steven Looman Date: Thu, 1 Jan 2026 15:46:57 +0100 Subject: [PATCH 6/7] Fix linting --- tests/test_didl_lite.py | 19 +++++++++++++------ 1 file changed, 13 insertions(+), 6 deletions(-) diff --git a/tests/test_didl_lite.py b/tests/test_didl_lite.py index a328158..e4627b0 100644 --- a/tests/test_didl_lite.py +++ b/tests/test_didl_lite.py @@ -707,11 +707,18 @@ def test_from_xml_string_unbound_prefix(self) -> None: assert objs[0].sub_title == "Test Subtitle" assert isinstance(objs[0], didl_lite.MusicTrack) -def test_music_track_artist_and_genre(): - from didl_lite.didl_lite import MusicTrack, to_xml_string - track = MusicTrack(id="1", parent_id="0", title="Test", restricted="0", artist="My Artist", genre="My Genre") + def test_music_track_artist_and_genre(self) -> None: + """Test MusicTrack artist and genre properties.""" + track = didl_lite.MusicTrack( + id="1", + parent_id="0", + title="Test", + restricted="0", + artist="My Artist", + genre="My Genre", + ) - xml = to_xml_string(track) + xml = didl_lite.to_xml_string(track) - assert b"My Artist" in xml - assert b"My Genre" in xml + assert b"My Artist" in xml + assert b"My Genre" in xml From c57165eed55b275b9d3e12c887c99c91506a0c1e Mon Sep 17 00:00:00 2001 From: Steven Looman Date: Thu, 1 Jan 2026 15:50:35 +0100 Subject: [PATCH 7/7] Add change file --- changes/35.feature | 5 +++++ 1 file changed, 5 insertions(+) create mode 100644 changes/35.feature diff --git a/changes/35.feature b/changes/35.feature new file mode 100644 index 0000000..bc5bae4 --- /dev/null +++ b/changes/35.feature @@ -0,0 +1,5 @@ +Handle unbound XML prefixes when strict=False + +Allow for unbound XML prefixes when parsing with strict=False. +This is useful for handling XML documents that may have missing +namespace declarations.