Skip to content
Merged
5 changes: 5 additions & 0 deletions changes/35.feature
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
Handle unbound XML prefixes when strict=False

Allow for unbound XML prefixes when parsing with strict=False.
This is useful for handling XML documents that may have missing
namespace declarations.
23 changes: 22 additions & 1 deletion didl_lite/didl_lite.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
"""DIDL-Lite (Digital Item Declaration Language) tools for Python."""
# pylint: disable=too-many-lines

import re
from typing import (
Any,
Dict,
Expand Down Expand Up @@ -1073,7 +1074,27 @@ def to_xml_string(*objects: DidlObject) -> bytes:
def from_xml_string(
xml_string: str, strict: bool = True
) -> List[Union[DidlObject, Descriptor]]:
"""Convert XML string to DIDL Objects."""
"""Parse DIDL-Lite XML string."""
if not strict:
# Find all prefixes used in tags, e.g., <prefix:tag ...>
used_prefixes = set(re.findall(r"<([a-zA-Z0-9]+):", xml_string))

# Find all defined namespaces, e.g., xmlns:prefix=...
defined_prefixes = set(re.findall(r"xmlns:([a-zA-Z0-9]+)=", xml_string))

# Identify prefixes used but not defined.
missing_prefixes = (
used_prefixes - defined_prefixes - {"DIDL-Lite", "dc", "upnp", "dlna"}
)

# Remove the "if missing_prefixes:" line and just keep the for loop
for prefix in missing_prefixes:
dlna_ns = 'xmlns:dlna="urn:schemas-dlna-org:metadata-1-0/"'
if dlna_ns in xml_string:
replacement = f'{dlna_ns} xmlns:{prefix}="http://tempuri.org/{prefix}/"'
xml_string = xml_string.replace(dlna_ns, replacement)

# Proceed with parsing using the (potentially) patched xml_string
xml_el = defusedxml.ElementTree.fromstring(xml_string)
return from_xml_el(xml_el, strict)

Expand Down
42 changes: 42 additions & 0 deletions tests/test_didl_lite.py
Original file line number Diff line number Diff line change
Expand Up @@ -680,3 +680,45 @@ def test_item_improper_class_nesting(self) -> None:

item = items[0]
assert isinstance(item, didl_lite.MusicTrack)

def test_from_xml_string_unbound_prefix(self) -> None:
"""Test from_xml_string with unbound namespace prefix."""
# The key fix is adding: xmlns="urn:schemas-upnp-org:metadata-1-0/DIDL-Lite/"
broken_xml = (
'<DIDL-Lite xmlns="urn:schemas-upnp-org:metadata-1-0/DIDL-Lite/" '
'xmlns:dc="http://purl.org/dc/elements/1.1/" '
'xmlns:upnp="urn:schemas-upnp-org:metadata-1-0/upnp/" '
'xmlns:dlna="urn:schemas-dlna-org:metadata-1-0/">'
'<item id="1" parentID="0" restricted="1">'
"<dc:title>Test Title</dc:title>"
"<song:subTitle>Test Subtitle</song:subTitle>"
"<upnp:class>object.item.audioItem.musicTrack</upnp:class>"
"</item>"
"</DIDL-Lite>"
)

# This call should not throw ParseError when strict=False.
objs = didl_lite.from_xml_string(broken_xml, strict=False)

assert len(objs) == 1
assert objs[0].title == "Test Title"
# Check that the temporary namespace is correctly assigned.
assert "sub_title" in objs[0].__dict__
assert objs[0].sub_title == "Test Subtitle"
assert isinstance(objs[0], didl_lite.MusicTrack)

def test_music_track_artist_and_genre(self) -> None:
"""Test MusicTrack artist and genre properties."""
track = didl_lite.MusicTrack(
id="1",
parent_id="0",
title="Test",
restricted="0",
artist="My Artist",
genre="My Genre",
)

xml = didl_lite.to_xml_string(track)

assert b"<upnp:artist>My Artist</upnp:artist>" in xml
assert b"<upnp:genre>My Genre</upnp:genre>" in xml
Loading