diff --git a/.gitignore b/.gitignore index 84afd451..5dd9b09b 100644 --- a/.gitignore +++ b/.gitignore @@ -82,3 +82,5 @@ mmif/vocabulary documentation/_build/ /VERSION +_issues + diff --git a/mmif/serialize/annotation.py b/mmif/serialize/annotation.py index 0fd8061c..6f1471aa 100644 --- a/mmif/serialize/annotation.py +++ b/mmif/serialize/annotation.py @@ -232,6 +232,7 @@ def add_property(self, name: str, :param name: the name of the property :param value: the property's desired value + :return: None """ # if self.check_prop_value_is_simple_enough(value): self.properties[name] = value @@ -256,18 +257,41 @@ def __getitem__(self, prop_name: str): def get(self, prop_name: str, default=None): """ - A getter for Annotation, will search for a property by its name, - and return the value if found, or the default value if not found. - This is designed to allow for directly accessing properties without - having to go through the properties object, or view-level - annotation metadata (common properties) encoded in the - ``view.metadata.contains`` dict. Note that the regular properties - will take the priority over the view-level common properties when - there are name conflicts. - - :param prop_name: the name of the property to get - :param default: the value to return if the property is not found - :return: the value of the property + Safe property access with optional default value. + + Searches for an annotation property by name and returns its value, + or a default value if not found. This method searches in multiple + locations with the following priority: + + 1. Direct properties (in ``annotation.properties``) + 2. Ephemeral properties (view-level metadata from ``contains``) + 3. Special fields (``@type``, ``properties``) + + This allows convenient access to properties without explicitly + checking the ``properties`` object or view-level metadata. + + :param prop_name: The name of the property to retrieve + :param default: The value to return if the property is not found (default: None) + :return: The property value, or the default value if not found + + Examples + -------- + .. code-block:: python + + # Access annotation properties: + label = annotation.get('label', default='unknown') + start_time = annotation.get('start', default=0) + + # Access @type: + at_type = annotation.get('@type') + + # Safe access with custom default: + targets = annotation.get('targets', default=[]) + + See Also + -------- + __getitem__ : Direct property access that raises KeyError when not found + get_property : Alias for this method """ try: return self.__getitem__(prop_name) @@ -381,13 +405,44 @@ def add_property(self, name: str, def get(self, prop_name, default=None): """ - A special getter for Document properties. The major difference from - the super class's :py:meth:`Annotation.get` method is that Document - class has one more set of *"pending"* properties, that are added after - the Document object is created and will be serialized as a separate - :py:class:`Annotation` object of which ``@type = Annotation``. The - pending properties will take the priority over the regular properties - when there are conflicts. + Safe property access with optional default value for Document objects. + + Searches for a document property by name and returns its value, or a + default value if not found. Documents have a more complex property + hierarchy than regular annotations: + + Priority order (highest to lowest): + 1. Special fields ('id', 'location') + 2. Pending properties (added after creation, to be serialized as ``Annotation`` objects) + 3. Ephemeral properties (from existing ``Annotation`` annotations or view metadata) + 4. Original properties (in ``document.properties``) + + This allows convenient access to all document properties regardless of + where they're stored internally. + + :param prop_name: The name of the property to retrieve + :param default: The value to return if the property is not found (default: None) + :return: The property value, or the default value if not found + + Examples + -------- + .. code-block:: python + + # Access document properties: + mime = document.get('mime', default='application/octet-stream') + location = document.get('location') + + # Access properties added after creation (pending): + author = document.get('author', default='anonymous') + publisher = document.get('publisher') + + # Access ephemeral properties from Annotation objects: + sentiment = document.get('sentiment', default='neutral') + + See Also + -------- + add_property : Add a new property to the document + Mmif.generate_capital_annotations : How pending properties are serialized """ if prop_name == 'id': # because all three dicts have `id` key as required field, we need @@ -402,7 +457,7 @@ class has one more set of *"pending"* properties, that are added after elif prop_name in self._props_ephemeral: return self._props_ephemeral[prop_name] else: - return super().get(prop_name) + return super().get(prop_name, default) get_property = get diff --git a/mmif/serialize/mmif.py b/mmif/serialize/mmif.py index 2fc2a99a..2b759765 100644 --- a/mmif/serialize/mmif.py +++ b/mmif/serialize/mmif.py @@ -2,6 +2,11 @@ The :mod:`mmif` module contains the classes used to represent a full MMIF file as a live Python object. +The :class:`Mmif` class is a high-level container that provides convenient +string-based access to documents, views, and annotations via ``mmif[id]``. +The underlying ``documents`` and ``views`` attributes are list-like collections +that use integer indexing; use container-level access for ID-based lookups. + See the specification docs and the JSON Schema file for more information. """ @@ -139,8 +144,35 @@ class Mmif(MmifObject): """ MmifObject that represents a full MMIF file. + This is a high-level container object that provides convenient string-based + access to documents, views, and annotations using their IDs. The underlying + collections (``documents`` and ``views``) are list-like and use integer + indexing, but Mmif itself accepts string IDs for convenient access. + :param mmif_obj: the JSON data :param validate: whether to validate the data against the MMIF JSON schema. + + Examples + -------- + Accessing objects by ID (high-level, convenient): + + .. code-block:: python + + mmif = Mmif(mmif_json) + doc = mmif['m1'] # Get document by ID + view = mmif['v1'] # Get view by ID + ann = mmif['v1:a1'] # Get annotation by long-form ID + + # Safe access with default: + doc = mmif.get('m99', default=None) + + Accessing via underlying lists (positional access): + + .. code-block:: python + + first_doc = mmif.documents[0] # First document + last_view = mmif.views[-1] # Last view + all_views = mmif.views[1:4] # Slice of views """ def __init__(self, mmif_obj: Optional[Union[bytes, str, dict]] = None, *, validate: bool = True) -> None: @@ -267,8 +299,8 @@ def _when_failed(): ## caching alignments if all(map(lambda x: x in alignment_ann.properties, ('source', 'target'))): try: - source_ann = self[alignment_ann.get('source')] - target_ann = self[alignment_ann.get('target')] + source_ann = self.__getitem__(alignment_ann.get('source')) + target_ann = self.__getitem__(alignment_ann.get('target')) if isinstance(source_ann, Annotation) and isinstance(target_ann, Annotation): source_ann._cache_alignment(alignment_ann, target_ann) target_ann._cache_alignment(alignment_ann, source_ann) @@ -591,7 +623,7 @@ def get_alignments(self, at_type1: Union[str, ThingTypesBase], at_type2: Union[s aligned_types = set() for ann_id in [alignment['target'], alignment['source']]: ann_id = cast(str, ann_id) - aligned_type = cast(Annotation, self[ann_id]).at_type + aligned_type = cast(Annotation, self.__getitem__(ann_id)).at_type aligned_types.add(aligned_type) aligned_types = list(aligned_types) # because membership check for sets also checks hash() values if len(aligned_types) == 2 and at_type1 in aligned_types and at_type2 in aligned_types: @@ -750,10 +782,10 @@ def _get_linear_anchor_point(self, ann: Annotation, targets_sorted=False, start: point = math.inf if start else -1 comp = min if start else max for target_id in ann.get_property('targets'): - point = comp(point, self._get_linear_anchor_point(self[target_id], start=start)) + point = comp(point, self._get_linear_anchor_point(self.__getitem__(target_id), start=start)) return point target_id = ann.get_property('targets')[0 if start else -1] - return self._get_linear_anchor_point(self[target_id], start=start) + return self._get_linear_anchor_point(self.__getitem__(target_id), start=start) elif (start and 'start' in props) or (not start and 'end' in props): return ann.get_property('start' if start else 'end') else: @@ -771,50 +803,77 @@ def get_end(self, annotation: Annotation) -> Union[int, float]: """ return self._get_linear_anchor_point(annotation, start=False) - def __getitem__(self, item: str) \ - -> Union[Document, View, Annotation, MmifMetadata, DocumentsList, ViewsList]: + def __getitem__(self, item: str) -> Union[Document, View, Annotation, MmifMetadata]: """ - index ([]) implementation for Mmif. This will try to find any object, given an identifier or an immediate - attribute name. When nothing is found, this will raise an error rather than returning a None + High-level string-based access to MMIF objects by their IDs. + + This method provides convenient access to documents, views, and annotations + using their string identifiers. For long-form annotation IDs (format: ``V:A``), + performs a two-level search through the specified view. + + Note: This is a high-level convenience method on the Mmif container itself. + The underlying ``documents`` and ``views`` collections are list-like and + only support integer indexing. + + :param item: An object identifier: + - Document ID (e.g., 'm1', 'd1') + - View ID (e.g., 'v1', 'v_0') + - Annotation ID in long form (e.g., 'v1:a1', 'v1:tf1') + - Attribute name (e.g., 'metadata', 'documents', 'views') + :return: The requested Document, View, Annotation, or attribute object + :raises KeyError: If the item is not found + + Examples + -------- + High-level access by ID: - :raises KeyError: if the item is not found or if the search results are ambiguous - :param item: an attribute name or an object identifier (a document ID, a view ID, or an annotation ID). When - annotation ID is given as a "short" ID (without view ID prefix), the method will try to find a - match from the first view, and return immediately if found. - :return: the object searched for - :raise KeyError: if the item is not found or multiple objects are found with the same ID + .. code-block:: python + + mmif = Mmif(mmif_json) + + # Access documents: + doc = mmif['m1'] # Returns Document with ID 'm1' + + # Access views: + view = mmif['v1'] # Returns View with ID 'v1' + + # Access annotations (long-form ID): + ann = mmif['v1:a1'] # Returns Annotation from view v1 + + # Access attributes: + metadata = mmif['metadata'] # Returns MmifMetadata object + + # Will raise KeyError: + doc = mmif['nonexistent'] # KeyError! + + For list-style positional access, use the underlying collections: + + .. code-block:: python + + first_doc = mmif.documents[0] # Integer index + second_view = mmif.views[1] # Integer index + + See Also + -------- + get : Safe access with default value instead of raising KeyError """ if item in self._named_attributes(): - return self.__dict__[item] + return self.__dict__.__getitem__(item) if self.id_delimiter in item: vid, _ = item.split(self.id_delimiter, 1) - return self.views[vid].annotations[item] + view = self.views._items.get(vid) + if view is None: + raise KeyError(f"View with ID {vid} not found in the MMIF object.") + ann = view.annotations._items.get(item) + if ann is None: + raise KeyError(f"Annotation with ID {item} not found in the MMIF object.") + return ann else: # search for document first, then views # raise KeyError if nothing is found - try: - return self.documents.__getitem__(item) - except KeyError: - try: - return self.views.__getitem__(item) - except KeyError: + ret = self.documents._items.get(item) + if ret is None: + ret = self.views._items.get(item) + if ret is None: raise KeyError(f"Object with ID {item} not found in the MMIF object. ") - - def get(self, obj_id, default=None): - """ - High-level getter for Mmif. This will try to find any object, given - an identifier or an immediate attribute name. When nothing is found, - this will return a default value instead of raising an error. - - :param obj_id: an immediate attribute name or an object identifier - (a document ID, a view ID, or an annotation ID). When - annotation ID is given as a "short" ID (without view - ID prefix), the method will try to find a match from - the first view, and return immediately if found. - :param default: the default value to return if none is found - :return: the object searched for or the default value - """ - try: - return self.__getitem__(obj_id) - except KeyError: - return default + return ret diff --git a/mmif/serialize/model.py b/mmif/serialize/model.py index deb5b749..1bec7b29 100644 --- a/mmif/serialize/model.py +++ b/mmif/serialize/model.py @@ -10,11 +10,21 @@ core functionality for deserializing MMIF JSON data into live objects and serializing live objects into MMIF JSON data. Specialized behavior for the different components of MMIF is added in the subclasses. + +This module defines two main collection types: + +- :class:`DataList`: List-like collections that support integer/slice + indexing. For ID-based access, use indexing or ``get`` in the container + level. For example, for DocumentList, use its parent Mmif object's + getter methods to access documents by ID. (e.g., ``mmif['doc1']``). +- :class:`DataDict`: Dict-like collections that support string key access. + """ import json +import warnings from datetime import datetime -from typing import Union, Any, Dict, Optional, TypeVar, Generic, Generator, Iterator, Type, Set, ClassVar +from typing import Union, Any, Dict, Optional, TypeVar, Generic, Generator, Iterator, Type, Set, ClassVar, List T = TypeVar('T') S = TypeVar('S') @@ -327,14 +337,14 @@ def __contains__(self, key: str) -> bool: try: self.__getitem__(key) return True - except (TypeError, AttributeError, KeyError): + except (TypeError, KeyError): return False - def __getitem__(self, key) -> Union['MmifObject', str, datetime]: + def __getitem__(self, key) -> Any: if key in self._named_attributes(): value = self.__dict__[key] elif self._unnamed_attributes is None: - raise AttributeError(f"Additional properties are disallowed by {self.__class__}: {key}") + raise KeyError(f"Additional properties are disallowed by {self.__class__}: {key}") else: value = self._unnamed_attributes[key] if key not in self._required_attributes and self.is_empty(value): @@ -342,6 +352,43 @@ def __getitem__(self, key) -> Union['MmifObject', str, datetime]: else: return value + def get(self, obj_id, default=None): + """ + High-level safe getter that returns a default value instead of raising KeyError. + + This method wraps ``__getitem__()`` with exception handling, making it safe + to query for objects that might not exist. Available on all MmifObject subclasses. + + :param obj_id: An attribute name or object identifier (document ID, view ID, + annotation ID, or property name depending on the object type). + For Mmif objects: when annotation ID is given as a "short" ID + (without view ID prefix), searches from the first view. + :param default: The value to return if the key is not found (default: None) + :return: The object/value searched for, or the default value if not found + + Examples + -------- + Safe access pattern (works on all MmifObject subclasses): + + .. code-block:: python + + # On Mmif objects: + view = mmif.get('v1', default=None) # Returns None if not found + doc = mmif.get('doc1', default=None) + + # On Annotation/Document objects: + label = annotation.get('label', default='unknown') + author = document.get('author', default='anonymous') + + See Also + -------- + __getitem__ : Direct access that raises KeyError when not found + """ + try: + return self.__getitem__(obj_id) + except KeyError: + return default + class MmifObjectEncoder(json.JSONEncoder): """ @@ -403,18 +450,48 @@ def _deserialize(self, input_list: list) -> None: def get(self, key: str, default=None) -> Optional[T]: """ - Standard dictionary-style get() method. Relies on the implementation of __getitem__. + .. deprecated:: 1.1.3 + Do not use in new code. Will be removed in 2.0.0. + Use container-level access or positional indexing instead. - Will return the default value if the key is not found. + Deprecated method for retrieving list elements by string ID. :param key: the key to search for - :param default: the default value to return if the key is not found (defaults to None) + :param default: the default value to return if the key is not found + (defaults to None) :return: the value matching that key, or the default value if not found + + Examples + -------- + Old pattern (deprecated, do not use): + + .. code-block:: python + + view = mmif.views.get('v1') # DeprecationWarning! + + New patterns to use instead: + + .. code-block:: python + + # For ID-based access, use container: + view = mmif['v1'] + # Or with safe access: + view = mmif.get('v1', default=None) + # For positional access: + view = mmif.views[0] + + See Also + -------- + __getitem__ : List-style positional access with integers """ - try: - return self[key] - except KeyError: - return default + warnings.warn( + "The 'get' method on list-like collections is deprecated and " + "will be removed in 2.0.0. Use container-level access " + "(e.g., mmif['v1']) or positional indexing (e.g., views[0]).", + DeprecationWarning, + stacklevel=2 + ) + return self._items.get(key, default) def _append_with_key(self, key: str, value: T, overwrite=False) -> None: """ @@ -439,11 +516,46 @@ def _append_with_key(self, key: str, value: T, overwrite=False) -> None: def append(self, value, overwrite): raise NotImplementedError() - def __getitem__(self, key: str) -> T: - if key not in self.reserved_names: - return self._items.__getitem__(key) + def __getitem__(self, key: Union[int, slice]) -> Union[T, List[T]]: + """ + List-style positional access using integers or slices. + + This method provides pythonic list behavior - it only accepts integers + for positional access or slices for range access. For string-based ID + access, use container-level indexing instead (e.g., ``mmif['v1']``). + + :param key: An integer index or slice object + :return: The element at the index, or a list of elements for slices + :raises TypeError: If key is not an integer or slice (e.g., if a + string is passed) + + Examples + -------- + Positional access (pythonic list behavior): + + .. code-block:: python + + # Get first view: + first_view = mmif.views[0] + + # Get last document: + last_doc = mmif.documents[-1] + + # Slice to get multiple elements: + first_three_views = mmif.views[0:3] + + # This will raise TypeError: + view = mmif.views['v1'] # TypeError! + + # For ID-based access, use container: + view = mmif['v1'] # Correct way + """ + if isinstance(key, (int, slice)): + # Python's dicts preserve insertion order since 3.7. + # We can convert values to a list and index it. + return list(self._items.values())[key] else: - raise KeyError("Don't use __getitem__ to access a reserved name") + raise TypeError(f"list indices must be integers or slices, not {type(key).__name__}") def __setitem__(self, key: str, value: T): if key not in self.reserved_names: @@ -480,6 +592,28 @@ def _serialize(self, *args, **kwargs) -> dict: return super()._serialize(self._items, **kwargs) def get(self, key: T, default=None) -> Optional[S]: + """ + Dictionary-style safe access with optional default value. + + This method provides pythonic dict behavior - returns the value for + the given key, or a default value if the key is not found. + + :param key: The key to look up + :param default: The value to return if key is not found (default: None) + :return: The value associated with the key, or the default value + + Examples + -------- + .. code-block:: python + + # Access contains metadata: + timeframe_meta = view.metadata.contains.get(AnnotationTypes.TimeFrame) + if timeframe_meta is None: + print("No TimeFrame annotations in this view") + + # With custom default: + value = some_dict.get('key', default={}) + """ return self._items.get(key, default) def _append_with_key(self, key: T, value: S, overwrite=False) -> None: diff --git a/mmif/serialize/view.py b/mmif/serialize/view.py index 24339e2a..80b7a65b 100644 --- a/mmif/serialize/view.py +++ b/mmif/serialize/view.py @@ -4,6 +4,12 @@ In MMIF, views are created by apps in a pipeline that are annotating data that was previously present in the MMIF file. + +The :class:`View` class is a high-level container that provides convenient +string-based access to annotations via ``view[id]``. The underlying +``annotations`` attribute is a list-like collection that uses integer indexing; +use container-level access for ID-based lookups. + """ import json import warnings @@ -26,9 +32,35 @@ class View(MmifObject): a list of annotations, and potentially a JSON-LD ``@context`` IRI. + This is a high-level container object that provides convenient string-based + access to annotations using their IDs. The underlying ``annotations`` collection + is list-like and uses integer indexing, but View itself accepts string IDs for + convenient access. + If ``view_obj`` is not provided, an empty View will be generated. :param view_obj: the JSON data that defines the view + + Examples + -------- + Accessing annotations by ID (high-level, convenient): + + .. code-block:: python + + view = mmif['v1'] + ann = view['v1:a1'] # Get annotation by ID + doc = view['v1:td1'] # Get document by ID + + # Safe access with default: + ann = view.get('v1:a999', default=None) + + Accessing via underlying list (positional access): + + .. code-block:: python + + first_ann = view.annotations[0] # First annotation + last_ann = view.annotations[-1] # Last annotation + some_anns = view.annotations[1:5] # Slice of annotations """ def __init__(self, view_obj: Optional[Union[bytes, str, dict]] = None, parent_mmif=None, *_) -> None: @@ -287,22 +319,61 @@ def get_document_by_id(self, doc_id) -> Document: "View.get_document_by_id() is deprecated, use view[doc_id] instead.", DeprecationWarning ) - doc_found = self.annotations[doc_id] + doc_found = self.annotations._items.get(doc_id) if not isinstance(doc_found, Document): raise KeyError(f"Document \"{doc_id}\" not found in view {self.id}.") return cast(Document, doc_found) def __getitem__(self, key: str) -> 'Annotation': """ - index ([]) implementation for View. + High-level string-based access to annotations by their IDs. + + This method provides convenient access to annotations and documents + within this view using their string identifiers. + + Note: This is a high-level convenience method on the View container + itself. The underlying ``annotations`` collection is list-like and + only supports integer indexing. + + :param key: The annotation or document ID (e.g., 'v1:a1', 'v1:td1'), + or an attribute name (e.g., 'metadata', 'annotations') + :return: The requested Annotation, Document, or attribute object + :raises KeyError: If the key is not found + + Examples + -------- + High-level access by ID: + + .. code-block:: python + + view = mmif['v1'] + + # Access annotations: + ann = view['v1:a1'] # Returns Annotation with ID 'v1:a1' + + # Access text documents in view: + doc = view['v1:td1'] # Returns Document with ID 'v1:td1' + + # Access attributes: + metadata = view['metadata'] # Returns ViewMetadata object + + # Will raise KeyError: + ann = view['nonexistent'] # KeyError! + + For list-style positional access, use the underlying collection: + + .. code-block:: python + + first_ann = view.annotations[0] # Integer index - :raises KeyError: if the key is not found - :param key: the search string. - :return: the :class:`mmif.serialize.annotation.Annotation` object searched for + See Also + -------- + get : Safe access with default value instead of raising KeyError + get_annotations : Search for annotations by type or properties """ if key in self._named_attributes(): return self.__dict__[key] - anno_result = self.annotations.get(key) + anno_result = self.annotations._items.get(key) if not anno_result: raise KeyError("Annotation ID not found: %s" % key) return anno_result diff --git a/tests/mmif_examples.py b/tests/mmif_examples.py index 5635d144..b19f9d9a 100644 --- a/tests/mmif_examples.py +++ b/tests/mmif_examples.py @@ -1,4 +1,7 @@ import itertools +import os +import subprocess +from pathlib import Path from string import Template from urllib import request @@ -11,10 +14,47 @@ 'FRACTIONAL_EXAMPLES', ] + +def _load_from_url_or_git(url): + """ + Load content from URL or local git repository. + If LOCALMMIF env var is set, use git show to load from local repo. + LOCALMMIF should be the path to the local mmif repository. + """ + localmmif_str = os.environ.get('LOCALMMIF') + if localmmif_str: + localmmif = Path(localmmif_str) + if not localmmif.is_dir(): + raise ValueError(f"LOCALMMIF path is not a valid directory: {localmmif}") + # Extract the version/branch and file path from the URL + # URL format: https://raw.githubusercontent.com/clamsproject/mmif/{version}/{filepath} + url_prefix = "https://raw.githubusercontent.com/clamsproject/mmif/" + if url.startswith(url_prefix): + remainder = url[len(url_prefix):] + parts = remainder.split('/', 1) + if len(parts) == 2: + version, filepath = parts + # Use git show to get the file from the specific version + git_ref = f"{version}:{filepath}" + try: + result = subprocess.run( + ['git', 'show', git_ref], + cwd=str(localmmif), + capture_output=True, + text=True, + check=True + ) + return result.stdout + except subprocess.CalledProcessError as e: + raise RuntimeError(f"Failed to load {git_ref} from local git repo at {localmmif}: {e.stderr}") + + # Fallback to URL loading + return request.urlopen(url).read().decode('utf-8') + everything_file_url = f"https://raw.githubusercontent.com/clamsproject/mmif/{__specver__}/specifications/samples/everything/raw.json" -old_mmif_w_short_id = f"https://raw.githubusercontent.com/clamsproject/mmif/1.0.5/specifications/samples/everything/raw.json" -EVERYTHING_JSON = request.urlopen(everything_file_url).read().decode('utf-8') -OLD_SHORTID_JSON = request.urlopen(old_mmif_w_short_id).read().decode('utf-8') +old_mmif_w_short_id_url = f"https://raw.githubusercontent.com/clamsproject/mmif/1.0.5/specifications/samples/everything/raw.json" +EVERYTHING_JSON = _load_from_url_or_git(everything_file_url) +OLD_SHORTID_JSON = _load_from_url_or_git(old_mmif_w_short_id_url) SWT_1_0_JSON = open('tests/samples/1.0/swt.mmif').read() # for keys and values in chain all typevers in mmif.vocabulary.*_types modules diff --git a/tests/test_serialize.py b/tests/test_serialize.py index 1bf6ff0e..b0836c5a 100644 --- a/tests/test_serialize.py +++ b/tests/test_serialize.py @@ -885,14 +885,14 @@ def test_new_textdocument(self): self.assertTrue(td1.properties.text_value == td1.text_value) self.assertNotEqual(td1.text_language, td2.text_language) self.assertEqual(english_text, td1.text_value) - self.assertEqual(td1, self.view_obj.annotations.get(td1.id)) + self.assertEqual(td1, self.view_obj[td1.id]) td3 = self.view_obj.new_textdocument(english_text, mime='plain/text') self.assertEqual(td1.text_value, td3.text_value) self.assertEqual(len(td1.properties), len(td3.properties) - 1) def test_parent(self): mmif_obj = Mmif(self.mmif_examples_json['everything']) - self.assertTrue(all(anno.parent == v.id for v in mmif_obj.views for anno in mmif_obj.get_view_by_id(v.id).annotations)) + self.assertTrue(all(anno.parent == v.id for v in mmif_obj.views for anno in mmif_obj[v.id].annotations)) def test_non_existing_parent(self): anno_obj = Annotation(FRACTIONAL_EXAMPLES['doc_only']) @@ -903,20 +903,20 @@ def test_non_existing_parent(self): def test_get_by_id(self): mmif_obj = Mmif(MMIF_EXAMPLES['everything']) - mmif_obj['m1'] - mmif_obj['v4:td1'] + mmif_obj.__getitem__('m1') + mmif_obj.__getitem__('v4:td1') with self.assertRaises(KeyError): - mmif_obj['m55'] + mmif_obj.__getitem__('m55') with self.assertRaises(KeyError): - mmif_obj['v1:td1'] - view_obj = mmif_obj['v4'] - td1 = view_obj['v4:td1'] + mmif_obj.__getitem__('v1:td1') + view_obj = mmif_obj.__getitem__('v4') + td1 = view_obj.__getitem__('v4:td1') self.assertEqual(td1.properties.mime, 'text/plain') - a1 = view_obj['v4:a1'] + a1 = view_obj.__getitem__('v4:a1') self.assertEqual(a1.at_type, AnnotationTypes.Alignment) with self.assertRaises(KeyError): - view_obj['completely-unlikely-annotation-id'] - + view_obj.__getitem__('completely-unlikely-annotation-id') + def test_get_annotations(self): mmif_obj = Mmif(MMIF_EXAMPLES['everything']) # simple search by at_type @@ -996,7 +996,8 @@ def test_error_to_text(self): self.assertTrue(aview.has_error()) self.assertTrue(isinstance(mmif_obj.get_last_error(), str)) err_str = 'custom error as a single long string' - aview.metadata.error = err_str + aview.metadata.error = ErrorDict({'message': err_str}) + print(aview.metadata.error) self.assertTrue(aview.has_error()) self.assertTrue(isinstance(mmif_obj.get_last_error(), str)) self.assertIn(err_str, mmif_obj.get_last_error()) @@ -1122,7 +1123,7 @@ def test_add_property(self): removed_prop_key, removed_prop_value = list(props.items())[-1] props.pop(removed_prop_key) new_mmif = Mmif(datum['json']) - new_mmif.get_view_by_id(view_id).annotations[first_ann_id].add_property(removed_prop_key, removed_prop_value) + new_mmif.get(view_id).annotations[first_ann_id].add_property(removed_prop_key, removed_prop_value) self.assertEqual(json.loads(datum['string'])['views'][j], json.loads(new_mmif.serialize())['views'][j], f'Failed on {i}, {view_id}') @@ -1319,9 +1320,9 @@ def test_document_adding_duplicate_properties(self): doc1.add_property('publisher', 'they') self.assertEqual(2, len(doc1._props_pending)) mmif_roundtrip3 = Mmif(mmif_roundtrip2.serialize()) - r0_v_anns = list(mmif_roundtrip3.views[r0_vid].get_annotations(AnnotationTypes.Annotation)) - r1_v_anns = list(mmif_roundtrip3.views[r1_vid].get_annotations(AnnotationTypes.Annotation)) - r2_v_anns = list(mmif_roundtrip3.views[r2_vid].get_annotations(AnnotationTypes.Annotation)) + r0_v_anns = list(mmif_roundtrip3[r0_vid].get_annotations(AnnotationTypes.Annotation)) + r1_v_anns = list(mmif_roundtrip3[r1_vid].get_annotations(AnnotationTypes.Annotation)) + r2_v_anns = list(mmif_roundtrip3[r2_vid].get_annotations(AnnotationTypes.Annotation)) # two props (`author` and `publisher`) are serialized to one `Annotation` objects self.assertEqual(1, len(r0_v_anns)) self.assertEqual(0, len(r1_v_anns)) @@ -1407,7 +1408,7 @@ def test_capital_annotation_generation_viewfinder(self): mmif[f'doc{i+1}'].add_property('author', authors[i]) mmif_roundtrip = Mmif(mmif.serialize()) for i in range(1, 3): - cap_anns = list(mmif_roundtrip.views[f'v{i}'].get_annotations(AnnotationTypes.Annotation)) + cap_anns = list(mmif_roundtrip[f'v{i}'].get_annotations(AnnotationTypes.Annotation)) self.assertEqual(1, len(cap_anns)) self.assertEqual(authors[i-1], cap_anns[0].get_property('author')) @@ -1474,7 +1475,7 @@ def test_add_property(self): properties.pop(removed_prop_key) try: new_mmif = Mmif(datum['json']) - new_mmif.get_document_by_id(document_id).add_property(removed_prop_key, removed_prop_value) + new_mmif.get(document_id).add_property(removed_prop_key, removed_prop_value) self.assertEqual(json.loads(datum['string']), json.loads(new_mmif.serialize()), f'Failed on {i}, {document_id}') except ValidationError: continue @@ -1489,13 +1490,6 @@ def test_setitem(self): self.datalist['v1'] = View({'id': 'v1'}) self.datalist['v2'] = View({'id': 'v2'}) - def test_getitem(self): - self.assertIs(self.mmif_obj['v1'], self.datalist['v1']) - - def test_getitem_raises(self): - with self.assertRaises(KeyError): - _ = self.datalist['reserved_names'] - def test_append(self): self.assertTrue('v256' not in self.datalist._items) self.datalist.append(View({'id': 'v256'})) @@ -1545,7 +1539,157 @@ def test_setitem_fail_on_reserved_name(self): self.assertEqual("can't set item on a reserved name", ke.args[0]) def test_get(self): - self.assertEqual(self.datalist['v1'], self.datalist.get('v1')) + # Test that get() returns the correct view and returns default for + # non-existent IDs + view = self.datalist.get('v1') + self.assertIsNotNone(view) + self.assertEqual('v1', view.id) + + # Test default value + self.assertIsNone(self.datalist.get('nonexistent')) + self.assertEqual('default', self.datalist.get('nonexistent', 'default')) + + # New tests for pythonic getters (#295) + def test_integer_indexing(self): + """Test that DataList supports integer indexing (list-like behavior).""" + # Positive indexing + first_view = self.datalist[0] + self.assertEqual('v1', first_view.id) + + second_view = self.datalist[1] + self.assertEqual('v2', second_view.id) + + # Negative indexing + last_view = self.datalist[-1] + self.assertEqual('v8', last_view.id) + + second_to_last = self.datalist[-2] + self.assertEqual('v7', second_to_last.id) + + def test_slice_indexing(self): + """Test that DataList supports slice indexing (list-like behavior).""" + # Basic slice + first_three = self.datalist[0:3] + self.assertEqual(3, len(first_three)) + self.assertIsInstance(first_three, list) + self.assertEqual('v1', first_three[0].id) + self.assertEqual('v3', first_three[2].id) + + # Slice with step + every_other = self.datalist[::2] + self.assertEqual(4, len(every_other)) + self.assertEqual('v1', every_other[0].id) + self.assertEqual('v3', every_other[1].id) + self.assertEqual('v5', every_other[2].id) + + # Slice from middle + middle = self.datalist[2:5] + self.assertEqual(3, len(middle)) + self.assertEqual('v3', middle[0].id) + self.assertEqual('v4', middle[1].id) + + # Empty slice + empty = self.datalist[10:20] + self.assertEqual(0, len(empty)) + + def test_string_indexing_raises_typeerror(self): + """Test that DataList raises TypeError for string indexing.""" + # String indexing should raise TypeError + with self.assertRaises(TypeError) as cm: + _ = self.datalist['v1'] + self.assertIn("list indices must be integers or slices", str(cm.exception)) + self.assertIn("not str", str(cm.exception)) + + # Test with documents list too + with self.assertRaises(TypeError) as cm: + _ = self.mmif_obj.documents['m1'] + self.assertIn("list indices must be integers or slices", str(cm.exception)) + + def test_get_deprecated_warning(self): + """Test that get() method raises DeprecationWarning.""" + with warnings.catch_warnings(record=True) as w: + warnings.simplefilter("always") + result = self.datalist.get('v1') + + # Check that warning was raised + self.assertEqual(1, len(w)) + self.assertTrue(issubclass(w[0].category, DeprecationWarning)) + self.assertIn("deprecated", str(w[0].message).lower()) + self.assertIn("2.0.0", str(w[0].message)) + + # But it should still work and return the view + self.assertIsNotNone(result) + self.assertEqual('v1', result.id) + + def test_high_level_mmif_string_access(self): + """Test that high-level Mmif container accepts string IDs.""" + # Mmif should accept string access (container behavior) + view = self.mmif_obj['v1'] + self.assertEqual('v1', view.id) + + doc = self.mmif_obj['m1'] + self.assertEqual('m1', doc.id) + + # Long-form annotation ID + ann = self.mmif_obj['v5:bb1'] + self.assertIsNotNone(ann) + + def test_high_level_view_string_access(self): + """Test that high-level View container accepts string IDs.""" + view = self.mmif_obj['v4'] + + # View should accept string access (container behavior) + ann = view['v4:a1'] + self.assertIsNotNone(ann) + self.assertEqual('v4:a1', ann.id) + + def test_mmif_get_with_default(self): + """Test safe access on Mmif with default values.""" + # Existing object + view = self.mmif_obj.get('v1') + self.assertIsNotNone(view) + self.assertEqual('v1', view.id) + + # Non-existent object with default + result = self.mmif_obj.get('v999', default=None) + self.assertIsNone(result) + + # Non-existent with custom default + default_value = "not found" + result = self.mmif_obj.get('v999', default=default_value) + self.assertEqual(default_value, result) + + def test_mixed_access_patterns(self): + """Test that different access patterns can be used together.""" + # Integer access on list + first_view = self.mmif_obj.views[0] + + # String access on high-level container + specific_view = self.mmif_obj['v5'] + + # All should work + self.assertEqual('v1', first_view.id) + self.assertEqual('v5', specific_view.id) + + def test_datalist_all_collections(self): + """Test that all DataList subclasses behave consistently.""" + # ViewsList + view_by_int = self.mmif_obj.views[0] + view_by_container = self.mmif_obj[view_by_int.id] + self.assertEqual(view_by_int.id, view_by_container.id) + + # DocumentsList + if len(self.mmif_obj.documents) > 0: + doc_by_int = self.mmif_obj.documents[0] + doc_by_container = self.mmif_obj[doc_by_int.id] + self.assertEqual(doc_by_int.id, doc_by_container.id) + + # AnnotationsList + view = self.mmif_obj['v4'] + if len(view.annotations) > 0: + ann_by_int = view.annotations[0] + ann_by_container = view[ann_by_int.id] + self.assertEqual(ann_by_int.id, ann_by_container.id) def test_update(self): other_contains = """{