From 2bd897b8cd6aa4db160fc81140a7a72d17ad6113 Mon Sep 17 00:00:00 2001 From: Keigh Rim Date: Tue, 28 Oct 2025 20:33:46 -0400 Subject: [PATCH] updated outdated docstrings --- mmif/serialize/annotation.py | 45 ++++++++++++---------- mmif/serialize/mmif.py | 30 ++++++++------- mmif/serialize/model.py | 10 ++--- mmif/serialize/view.py | 20 +++++----- mmif/utils/sequence_helper.py | 10 ++--- mmif/utils/text_document_helper.py | 9 +++++ mmif/utils/video_document_helper.py | 17 +++++--- templates/python/vocabulary/base_types.txt | 11 ++++++ 8 files changed, 92 insertions(+), 60 deletions(-) diff --git a/mmif/serialize/annotation.py b/mmif/serialize/annotation.py index 0b2d3ad6..9987a600 100644 --- a/mmif/serialize/annotation.py +++ b/mmif/serialize/annotation.py @@ -74,10 +74,11 @@ def _deserialize(self, input_dict: dict) -> None: def _cache_alignment(self, alignment_ann: 'Annotation', alignedto_ann: 'Annotation') -> None: """ - Cache alignment information. This cache will not be serialized. - + Cache alignment information. This cache will not be serialized. + :param alignment_ann: the Alignment annotation that has this annotation on one side :param alignedto_ann: the annotation that this annotation is aligned to (other side of Alignment) + :return: None """ self._alignments[alignment_ann] = alignedto_ann @@ -228,10 +229,9 @@ def add_property(self, name: str, value: Union[PRMTV_TYPES, LIST_PRMTV, LIST_LIST_PRMTV, DICT_PRMTV, DICT_LIST_PRMTV]) -> None: """ Adds a property to the annotation's properties. - + :param name: the name of the property :param value: the property's desired value - :return: None """ # if self.check_prop_value_is_simple_enough(value): self.properties[name] = value @@ -336,29 +336,32 @@ def add_property(self, name: str, ) -> None: """ Adds a property to the document's properties. - - Unlike the parent :class:`Annotation` class, added properties of a - ``Document`` object can be lost during serialization unless it belongs - to somewhere in a ``Mmif`` object. This is because we want to keep - ``Document`` object as "read-only" as possible. Thus, if you want to add - a property to a ``Document`` object, - - * add the document to a ``Mmif`` object (either in the documents list or + + Unlike the parent :class:`Annotation` class, added properties of a + ``Document`` object can be lost during serialization unless it belongs + to somewhere in a ``Mmif`` object. This is because we want to keep + ``Document`` object as "read-only" as possible. Thus, if you want to add + a property to a ``Document`` object, + + * add the document to a ``Mmif`` object (either in the documents list or in a view from the views list), or * directly write to ``Document.properties`` instead of using this method - (which is not recommended). - - With the former method, the SDK will record the added property as a - `Annotation` annotation object, separate from the original `Document` + (which is not recommended). + + With the former method, the SDK will record the added property as a + `Annotation` annotation object, separate from the original `Document` object. See :meth:`.Mmif.generate_capital_annotations()` for more. - + A few notes to keep in mind: - - #. You can't overwrite an existing property of a ``Document`` object. - #. A MMIF can have multiple ``Annotation`` objects with the same + + #. You can't overwrite an existing property of a ``Document`` object. + #. A MMIF can have multiple ``Annotation`` objects with the same property name but different values. When this happens, the SDK will - only keep the latest value (in order of appearances in views list) of + only keep the latest value (in order of appearances in views list) of the property, effectively overwriting the previous values. + + :param name: the name of the property + :param value: the property's desired value (note: Document accepts fewer value types than Annotation) """ # we don't checking if this k-v already exists in _original (new props) or _ephemeral (read from existing MMIF) # because it is impossible to keep the _original updated when a new annotation is added (via `new_annotation`) diff --git a/mmif/serialize/mmif.py b/mmif/serialize/mmif.py index 2715bae8..57c8d93f 100644 --- a/mmif/serialize/mmif.py +++ b/mmif/serialize/mmif.py @@ -183,13 +183,13 @@ def serialize(self, pretty: bool = False, sanitize: bool = False, autogenerate_c """ Serializes the MMIF object to a JSON string. - :param sanitize: If True, performs some sanitization of before returning + :param pretty: If True, returns string representation with indentation. + :param sanitize: If True, performs some sanitization of before returning the JSON string. See :meth:`sanitize` for details. - :param autogenerate_capital_annotations: If True, automatically convert - any "pending" temporary properties from `Document` objects to - `Annotation` objects. See :meth:`generate_capital_annotations` for + :param autogenerate_capital_annotations: If True, automatically convert + any "pending" temporary properties from `Document` objects to + `Annotation` objects. See :meth:`generate_capital_annotations` for details. - :param pretty: If True, returns string representation with indentation. :return: JSON string of the MMIF object. """ if autogenerate_capital_annotations: @@ -408,13 +408,13 @@ def add_view(self, view: View, overwrite=False) -> None: """ Appends a View object to the views list. - Fails if there is already a view with the same ID or a document + Fails if there is already a view with the same ID or a document with the same ID in the MMIF object. - :param view: the Document object to add + :param view: the View object to add :param overwrite: if set to True, will overwrite an existing view with the same ID - :raises KeyError: if ``overwrite`` is set to False and existing + :raises KeyError: if ``overwrite`` is set to False and existing object (document or view) with the same ID exists :return: None """ @@ -503,6 +503,7 @@ def get_documents_locations(self, m_type: Union[DocumentTypes, str], path_only=F Only top-level documents have locations, so we only check them. :param m_type: the type to search for + :param path_only: if True, returns resolved file system paths instead of location URIs :return: a list of the values of the location fields in the corresponding documents """ docs = [document for document in self.documents if document.is_type(m_type) and document.location is not None] @@ -516,6 +517,7 @@ def get_document_location(self, m_type: Union[DocumentTypes, str], path_only=Fal Method to get the location of *first* document of given type. :param m_type: the type to search for + :param path_only: if True, returns resolved file system path instead of location URI :return: the value of the location field in the corresponding document """ # TODO (krim @ 8/10/20): Is returning the first location desirable? @@ -568,6 +570,8 @@ def get_alignments(self, at_type1: Union[str, ThingTypesBase], at_type2: Union[s """ Finds views where alignments between two given annotation types occurred. + :param at_type1: the first annotation type to search for alignments + :param at_type2: the second annotation type to search for alignments :return: a dict that keyed by view IDs (str) and has lists of alignment Annotation objects as values. """ v_and_a = {} @@ -691,8 +695,8 @@ def get_annotations_between_time(self, start: Union[int, float], end: Union[int, """ Finds annotations that are anchored between the given time points. - :param start: the start time point in the unit of `input_unit` - :param end: the end time point in the unit of `input_unit` + :param start: the start time point + :param end: the end time point :param time_unit: the unit of the input time points. Default is `ms`. :param at_types: a list of annotation types to filter with. Any type in this list will be included in the return. :return: an iterator of Annotation objects that are anchored between the given time points @@ -725,12 +729,12 @@ def get_annotations_between_time(self, start: Union[int, float], end: Union[int, def _get_linear_anchor_point(self, ann: Annotation, targets_sorted=False, start: bool = True) -> Union[int, float]: # TODO (krim @ 2/5/24): Update the return type once timeunits are unified to `ms` as integers (https://github.com/clamsproject/mmif/issues/192) """ - Retrieves the anchor point of the annotation. Currently, this method only supports linear anchors, + Retrieves the anchor point of the annotation. Currently, this method only supports linear anchors, namely time and text, hence does not work with spatial anchors (polygons or video-object). - + :param ann: An Annotation object that has a linear anchor point. Namely, some subtypes of `Region` vocabulary type. - :param start: If True, returns the start anchor point. Otherwise, returns the end anchor point. N/A for `timePoint` anchors. :param targets_sorted: If True, the method will assume that the targets are sorted in the order of the anchor points. + :param start: If True, returns the start anchor point. Otherwise, returns the end anchor point. N/A for `timePoint` anchors. :return: the anchor point of the annotation. 1d for linear regions (time, text) """ props = ann.properties diff --git a/mmif/serialize/model.py b/mmif/serialize/model.py index 70ff9e25..8f2ee634 100644 --- a/mmif/serialize/model.py +++ b/mmif/serialize/model.py @@ -157,6 +157,7 @@ def _serialize(self, alt_container: Optional[Dict] = None) -> dict: If a subclass needs special treatment during the mapping, it needs to override this method. + :param alt_container: optional alternative container dict to serialize instead of _unnamed_attributes :return: the prepared dictionary """ container = alt_container if alt_container is not None else self._unnamed_attributes @@ -375,14 +376,13 @@ def _deserialize(self, input_list: list) -> None: def get(self, key: str, default=None) -> Optional[T]: """ - Standard dictionary-style get() method, albeit with no ``default`` - parameter. Relies on the implementation of __getitem__. + Standard dictionary-style get() method. Relies on the implementation of __getitem__. - Will return ``None`` if the key is not found. + Will return the default value if the key is not found. :param key: the key to search for - :param default: the default value to return if the key is not found - :return: the value matching that key + :param default: the default value to return if the key is not found (defaults to None) + :return: the value matching that key, or the default value if not found """ try: return self[key] diff --git a/mmif/serialize/view.py b/mmif/serialize/view.py index d8821c5b..2fec31a1 100644 --- a/mmif/serialize/view.py +++ b/mmif/serialize/view.py @@ -215,7 +215,7 @@ def add_document(self, document: Document, overwrite=False) -> Annotation: :param document: the Document object to add :param overwrite: if set to True, will overwrite an existing view with the same ID - :return: None + :return: the added Document object (as an Annotation) """ return self.add_annotation(document, overwrite) @@ -270,18 +270,18 @@ def get_documents(self) -> List[Document]: def get_document_by_id(self, doc_id) -> Document: """ .. deprecated:: 1.1.0 - Will be removed in 2.0.0. - Use general ``Mmif.__getitem__()`` method instead to retrieve - any document across the MMIF, or View.__getitems__() to + Will be removed in 2.0.0. + Use general ``Mmif.__getitem__()`` method instead to retrieve + any document across the MMIF, or View.__getitems__() to retrieve documents within the view. - Thinly wraps the Mmif.__getitem__ method and returns an Annotation - object. Note that although this method is under View class, it can - be used to retrieve any annotation across the entire MMIF. + Thinly wraps the Mmif.__getitem__ method and returns a Document + object. Note that although this method is under View class, it can + be used to retrieve any document across the entire MMIF. - :param ann_id: the ID of the annotation to retrieve. - :return: found :class:`mmif.serialize.annotation.Annotation` object. - :raises KeyError: if the annotation with the given ID is not found + :param doc_id: the ID of the document to retrieve. + :return: found :class:`mmif.serialize.annotation.Document` object. + :raises KeyError: if the document with the given ID is not found """ warnings.warn( "View.get_document_by_id() is deprecated, use view[doc_id] instead.", diff --git a/mmif/utils/sequence_helper.py b/mmif/utils/sequence_helper.py index 5ea5289c..f4edcb19 100644 --- a/mmif/utils/sequence_helper.py +++ b/mmif/utils/sequence_helper.py @@ -150,17 +150,17 @@ def smooth_outlying_short_intervals(scores: List[float], works essentially in the "stitching" only mode. :param scores: **SORTED** list of scores to be smoothed. The score list - is assumed to be "exhaust" the entire time or space of + is assumed to be "exhaust" the entire time or space of the underlying document segment. (Sorted by the start, and then by the end of anchors) - :param min_score: minimum threshold to use to discard - low-scored units (strictly less than) - :param min_spseq_size: minimum size of a positive sequence not to be + :param min_spseq_size: minimum size of a positive sequence not to be smoothed (greater or equal to) :param min_snseq_size: minimum size of a negative sequence not to be smoothed (greater or equal to) + :param min_score: minimum threshold to use to discard + low-scored units (strictly less than) :return: list of tuples of start(inclusive)/end(exclusive) indices - of the "positive" sequences. Negative sequences (regardless of + of the "positive" sequences. Negative sequences (regardless of their size) are not included in the output. """ diff --git a/mmif/utils/text_document_helper.py b/mmif/utils/text_document_helper.py index 23cc92aa..87e8c478 100644 --- a/mmif/utils/text_document_helper.py +++ b/mmif/utils/text_document_helper.py @@ -4,6 +4,15 @@ def slice_text(mmif_obj, start: int, end: int, unit: str = "milliseconds") -> str: + """ + Extracts text from tokens within a specified time range. + + :param mmif_obj: MMIF object to search for tokens + :param start: start time point + :param end: end time point + :param unit: time unit for start and end parameters (default: "milliseconds") + :return: space-separated string of token words found in the time range + """ token_type = AnnotationTypes.Token anns_found = mmif_obj.get_annotations_between_time(start, end, unit) tokens_sliced = [] diff --git a/mmif/utils/video_document_helper.py b/mmif/utils/video_document_helper.py index b76ece7c..a1b9c59a 100644 --- a/mmif/utils/video_document_helper.py +++ b/mmif/utils/video_document_helper.py @@ -76,11 +76,12 @@ def get_framerate(video_document: Document) -> float: def extract_frames_as_images(video_document: Document, framenums: Iterable[int], as_PIL: bool = False, record_ffmpeg_errors: bool = False): """ Extracts frames from a video document as a list of :py:class:`numpy.ndarray`. - Use with :py:func:`sample_frames` function to get the list of frame numbers first. - + Use with :py:func:`sample_frames` function to get the list of frame numbers first. + :param video_document: :py:class:`~mmif.serialize.annotation.Document` instance that holds a video document (``"@type": ".../VideoDocument/..."``) :param framenums: iterable integers representing the frame numbers to extract :param as_PIL: return :py:class:`PIL.Image.Image` instead of :py:class:`~numpy.ndarray` + :param record_ffmpeg_errors: if True, records and warns about FFmpeg stderr output during extraction :return: frames as a list of :py:class:`~numpy.ndarray` or :py:class:`~PIL.Image.Image` """ import cv2 @@ -212,11 +213,11 @@ def sample_frames(start_frame: int, end_frame: int, sample_rate: float = 1) -> L """ Helper function to sample frames from a time interval. Can also be used as a "cutoff" function when used with ``start_frame==0`` and ``sample_rate==1``. - + :param start_frame: start frame of the interval :param end_frame: end frame of the interval :param sample_rate: sampling rate (or step) to configure how often to take a frame, default is 1, meaning all consecutive frames are sampled - + :return: list of frame numbers to extract """ if sample_rate < 1: raise ValueError(f"Sample rate must be greater than 1, but got {sample_rate}") @@ -235,9 +236,13 @@ def get_annotation_property(mmif, annotation, prop_name): .. deprecated:: 1.0.8 Will be removed in 2.0.0. Use :py:meth:`mmif.serialize.annotation.Annotation.get_property` method instead. - + Get a property value from an annotation. If the property is not found in the annotation, it will look up the metadata of the annotation's parent view and return the value from there. - xisting + + :param mmif: MMIF object containing the annotation + :param annotation: Annotation object to get property from + :param prop_name: name of the property to retrieve + :return: the property value """ warnings.warn(f'{__name__}() is deprecated. ' f'Directly ask the annotation for a property by calling annotation.get_property() instead.', diff --git a/templates/python/vocabulary/base_types.txt b/templates/python/vocabulary/base_types.txt index 09736741..dbff5e1d 100644 --- a/templates/python/vocabulary/base_types.txt +++ b/templates/python/vocabulary/base_types.txt @@ -17,6 +17,11 @@ class TypesBase(object): old_lapps_type_shortnames = {'Token', 'Sentence', 'Paragraph', 'Markable', 'NamedEntity', 'NounChunk', 'VerbChunk'} def __init__(self, type_uri: str): + """ + Initialize a vocabulary type. + + :param type_uri: full URI or short name of the vocabulary type + """ self.fuzzy_eq = False self.parse_names(type_uri) if self.__repr__() not in self.__class__._prefixes: @@ -122,6 +127,12 @@ class ClamsTypesBase(TypesBase): dev_version: ClassVar[str] = 'develop' def __init__(self, type_uri, fuzzymode=True): + """ + Initialize a CLAMS vocabulary type. + + :param type_uri: full URI or short name of the CLAMS vocabulary type + :param fuzzymode: if True, enables fuzzy equality comparison that ignores version differences + """ super().__init__(type_uri) self.fuzzy_eq = fuzzymode