Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
45 changes: 24 additions & 21 deletions mmif/serialize/annotation.py
Original file line number Diff line number Diff line change
Expand Up @@ -74,10 +74,11 @@ def _deserialize(self, input_dict: dict) -> None:

def _cache_alignment(self, alignment_ann: 'Annotation', alignedto_ann: 'Annotation') -> None:
"""
Cache alignment information. This cache will not be serialized.
Cache alignment information. This cache will not be serialized.

:param alignment_ann: the Alignment annotation that has this annotation on one side
:param alignedto_ann: the annotation that this annotation is aligned to (other side of Alignment)
:return: None
"""
self._alignments[alignment_ann] = alignedto_ann

Expand Down Expand Up @@ -228,10 +229,9 @@ def add_property(self, name: str,
value: Union[PRMTV_TYPES, LIST_PRMTV, LIST_LIST_PRMTV, DICT_PRMTV, DICT_LIST_PRMTV]) -> None:
"""
Adds a property to the annotation's properties.

:param name: the name of the property
:param value: the property's desired value
:return: None
"""
# if self.check_prop_value_is_simple_enough(value):
self.properties[name] = value
Expand Down Expand Up @@ -336,29 +336,32 @@ def add_property(self, name: str,
) -> None:
"""
Adds a property to the document's properties.
Unlike the parent :class:`Annotation` class, added properties of a
``Document`` object can be lost during serialization unless it belongs
to somewhere in a ``Mmif`` object. This is because we want to keep
``Document`` object as "read-only" as possible. Thus, if you want to add
a property to a ``Document`` object,
* add the document to a ``Mmif`` object (either in the documents list or

Unlike the parent :class:`Annotation` class, added properties of a
``Document`` object can be lost during serialization unless it belongs
to somewhere in a ``Mmif`` object. This is because we want to keep
``Document`` object as "read-only" as possible. Thus, if you want to add
a property to a ``Document`` object,

* add the document to a ``Mmif`` object (either in the documents list or
in a view from the views list), or
* directly write to ``Document.properties`` instead of using this method
(which is not recommended).
With the former method, the SDK will record the added property as a
`Annotation` annotation object, separate from the original `Document`
(which is not recommended).

With the former method, the SDK will record the added property as a
`Annotation` annotation object, separate from the original `Document`
object. See :meth:`.Mmif.generate_capital_annotations()` for more.

A few notes to keep in mind:
#. You can't overwrite an existing property of a ``Document`` object.
#. A MMIF can have multiple ``Annotation`` objects with the same

#. You can't overwrite an existing property of a ``Document`` object.
#. A MMIF can have multiple ``Annotation`` objects with the same
property name but different values. When this happens, the SDK will
only keep the latest value (in order of appearances in views list) of
only keep the latest value (in order of appearances in views list) of
the property, effectively overwriting the previous values.

:param name: the name of the property
:param value: the property's desired value (note: Document accepts fewer value types than Annotation)
"""
# we don't checking if this k-v already exists in _original (new props) or _ephemeral (read from existing MMIF)
# because it is impossible to keep the _original updated when a new annotation is added (via `new_annotation`)
Expand Down
20 changes: 12 additions & 8 deletions mmif/serialize/mmif.py
Original file line number Diff line number Diff line change
Expand Up @@ -409,13 +409,13 @@ def add_view(self, view: View, overwrite=False) -> None:
"""
Appends a View object to the views list.

Fails if there is already a view with the same ID or a document
Fails if there is already a view with the same ID or a document
with the same ID in the MMIF object.

:param view: the Document object to add
:param view: the View object to add
:param overwrite: if set to True, will overwrite
an existing view with the same ID
:raises KeyError: if ``overwrite`` is set to False and existing
:raises KeyError: if ``overwrite`` is set to False and existing
object (document or view) with the same ID exists
:return: None
"""
Expand Down Expand Up @@ -504,6 +504,7 @@ def get_documents_locations(self, m_type: Union[DocumentTypes, str], path_only=F
Only top-level documents have locations, so we only check them.

:param m_type: the type to search for
:param path_only: if True, returns resolved file system paths instead of location URIs
:return: a list of the values of the location fields in the corresponding documents
"""
docs = [document for document in self.documents if document.is_type(m_type) and document.location is not None]
Expand All @@ -517,6 +518,7 @@ def get_document_location(self, m_type: Union[DocumentTypes, str], path_only=Fal
Method to get the location of *first* document of given type.

:param m_type: the type to search for
:param path_only: if True, returns resolved file system path instead of location URI
:return: the value of the location field in the corresponding document
"""
# TODO (krim @ 8/10/20): Is returning the first location desirable?
Expand Down Expand Up @@ -569,6 +571,8 @@ def get_alignments(self, at_type1: Union[str, ThingTypesBase], at_type2: Union[s
"""
Finds views where alignments between two given annotation types occurred.

:param at_type1: the first annotation type to search for alignments
:param at_type2: the second annotation type to search for alignments
:return: a dict that keyed by view IDs (str) and has lists of alignment Annotation objects as values.
"""
v_and_a = {}
Expand Down Expand Up @@ -692,8 +696,8 @@ def get_annotations_between_time(self, start: Union[int, float], end: Union[int,
"""
Finds annotations that are anchored between the given time points.

:param start: the start time point in the unit of `input_unit`
:param end: the end time point in the unit of `input_unit`
:param start: the start time point
:param end: the end time point
:param time_unit: the unit of the input time points. Default is `ms`.
:param at_types: a list of annotation types to filter with. Any type in this list will be included in the return.
:return: an iterator of Annotation objects that are anchored between the given time points
Expand Down Expand Up @@ -726,12 +730,12 @@ def get_annotations_between_time(self, start: Union[int, float], end: Union[int,
def _get_linear_anchor_point(self, ann: Annotation, targets_sorted=False, start: bool = True) -> Union[int, float]:
# TODO (krim @ 2/5/24): Update the return type once timeunits are unified to `ms` as integers (https://github.com/clamsproject/mmif/issues/192)
"""
Retrieves the anchor point of the annotation. Currently, this method only supports linear anchors,
Retrieves the anchor point of the annotation. Currently, this method only supports linear anchors,
namely time and text, hence does not work with spatial anchors (polygons or video-object).

:param ann: An Annotation object that has a linear anchor point. Namely, some subtypes of `Region` vocabulary type.
:param start: If True, returns the start anchor point. Otherwise, returns the end anchor point. N/A for `timePoint` anchors.
:param targets_sorted: If True, the method will assume that the targets are sorted in the order of the anchor points.
:param start: If True, returns the start anchor point. Otherwise, returns the end anchor point. N/A for `timePoint` anchors.
:return: the anchor point of the annotation. 1d for linear regions (time, text)
"""
props = ann.properties
Expand Down
11 changes: 5 additions & 6 deletions mmif/serialize/model.py
Original file line number Diff line number Diff line change
Expand Up @@ -164,7 +164,7 @@ def _serialize(self, alt_container: Optional[Dict] = None, include_context: bool
If a subclass needs special treatment during the mapping, it needs to
override this method.

:param alt_container: Alternative container to serialize from
:param alt_container: optional alternative container dict to serialize instead of _unnamed_attributes
:param include_context: See :meth:`serialize` for details.
:return: the prepared dictionary
"""
Expand Down Expand Up @@ -403,14 +403,13 @@ def _deserialize(self, input_list: list) -> None:

def get(self, key: str, default=None) -> Optional[T]:
"""
Standard dictionary-style get() method, albeit with no ``default``
parameter. Relies on the implementation of __getitem__.
Standard dictionary-style get() method. Relies on the implementation of __getitem__.

Will return ``None`` if the key is not found.
Will return the default value if the key is not found.

:param key: the key to search for
:param default: the default value to return if the key is not found
:return: the value matching that key
:param default: the default value to return if the key is not found (defaults to None)
:return: the value matching that key, or the default value if not found
"""
try:
return self[key]
Expand Down
20 changes: 10 additions & 10 deletions mmif/serialize/view.py
Original file line number Diff line number Diff line change
Expand Up @@ -215,7 +215,7 @@ def add_document(self, document: Document, overwrite=False) -> Annotation:
:param document: the Document object to add
:param overwrite: if set to True, will overwrite
an existing view with the same ID
:return: None
:return: the added Document object (as an Annotation)
"""
return self.add_annotation(document, overwrite)

Expand Down Expand Up @@ -270,18 +270,18 @@ def get_documents(self) -> List[Document]:
def get_document_by_id(self, doc_id) -> Document:
"""
.. deprecated:: 1.1.0
Will be removed in 2.0.0.
Use general ``Mmif.__getitem__()`` method instead to retrieve
any document across the MMIF, or View.__getitems__() to
Will be removed in 2.0.0.
Use general ``Mmif.__getitem__()`` method instead to retrieve
any document across the MMIF, or View.__getitems__() to
retrieve documents within the view.

Thinly wraps the Mmif.__getitem__ method and returns an Annotation
object. Note that although this method is under View class, it can
be used to retrieve any annotation across the entire MMIF.
Thinly wraps the Mmif.__getitem__ method and returns a Document
object. Note that although this method is under View class, it can
be used to retrieve any document across the entire MMIF.

:param ann_id: the ID of the annotation to retrieve.
:return: found :class:`mmif.serialize.annotation.Annotation` object.
:raises KeyError: if the annotation with the given ID is not found
:param doc_id: the ID of the document to retrieve.
:return: found :class:`mmif.serialize.annotation.Document` object.
:raises KeyError: if the document with the given ID is not found
"""
warnings.warn(
"View.get_document_by_id() is deprecated, use view[doc_id] instead.",
Expand Down
10 changes: 5 additions & 5 deletions mmif/utils/sequence_helper.py
Original file line number Diff line number Diff line change
Expand Up @@ -150,17 +150,17 @@ def smooth_outlying_short_intervals(scores: List[float],
works essentially in the "stitching" only mode.

:param scores: **SORTED** list of scores to be smoothed. The score list
is assumed to be "exhaust" the entire time or space of
is assumed to be "exhaust" the entire time or space of
the underlying document segment.
(Sorted by the start, and then by the end of anchors)
:param min_score: minimum threshold to use to discard
low-scored units (strictly less than)
:param min_spseq_size: minimum size of a positive sequence not to be
:param min_spseq_size: minimum size of a positive sequence not to be
smoothed (greater or equal to)
:param min_snseq_size: minimum size of a negative sequence not to be
smoothed (greater or equal to)
:param min_score: minimum threshold to use to discard
low-scored units (strictly less than)
:return: list of tuples of start(inclusive)/end(exclusive) indices
of the "positive" sequences. Negative sequences (regardless of
of the "positive" sequences. Negative sequences (regardless of
their size) are not included in the output.
"""

Expand Down
9 changes: 9 additions & 0 deletions mmif/utils/text_document_helper.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,15 @@


def slice_text(mmif_obj, start: int, end: int, unit: str = "milliseconds") -> str:
"""
Extracts text from tokens within a specified time range.

:param mmif_obj: MMIF object to search for tokens
:param start: start time point
:param end: end time point
:param unit: time unit for start and end parameters (default: "milliseconds")
:return: space-separated string of token words found in the time range
"""
token_type = AnnotationTypes.Token
anns_found = mmif_obj.get_annotations_between_time(start, end, unit)
tokens_sliced = []
Expand Down
17 changes: 11 additions & 6 deletions mmif/utils/video_document_helper.py
Original file line number Diff line number Diff line change
Expand Up @@ -76,11 +76,12 @@ def get_framerate(video_document: Document) -> float:
def extract_frames_as_images(video_document: Document, framenums: Iterable[int], as_PIL: bool = False, record_ffmpeg_errors: bool = False):
"""
Extracts frames from a video document as a list of :py:class:`numpy.ndarray`.
Use with :py:func:`sample_frames` function to get the list of frame numbers first.
Use with :py:func:`sample_frames` function to get the list of frame numbers first.

:param video_document: :py:class:`~mmif.serialize.annotation.Document` instance that holds a video document (``"@type": ".../VideoDocument/..."``)
:param framenums: iterable integers representing the frame numbers to extract
:param as_PIL: return :py:class:`PIL.Image.Image` instead of :py:class:`~numpy.ndarray`
:param record_ffmpeg_errors: if True, records and warns about FFmpeg stderr output during extraction
:return: frames as a list of :py:class:`~numpy.ndarray` or :py:class:`~PIL.Image.Image`
"""
import cv2
Expand Down Expand Up @@ -212,11 +213,11 @@ def sample_frames(start_frame: int, end_frame: int, sample_rate: float = 1) -> L
"""
Helper function to sample frames from a time interval.
Can also be used as a "cutoff" function when used with ``start_frame==0`` and ``sample_rate==1``.

:param start_frame: start frame of the interval
:param end_frame: end frame of the interval
:param sample_rate: sampling rate (or step) to configure how often to take a frame, default is 1, meaning all consecutive frames are sampled

:return: list of frame numbers to extract
"""
if sample_rate < 1:
raise ValueError(f"Sample rate must be greater than 1, but got {sample_rate}")
Expand All @@ -235,9 +236,13 @@ def get_annotation_property(mmif, annotation, prop_name):
.. deprecated:: 1.0.8
Will be removed in 2.0.0.
Use :py:meth:`mmif.serialize.annotation.Annotation.get_property` method instead.

Get a property value from an annotation. If the property is not found in the annotation, it will look up the metadata of the annotation's parent view and return the value from there.
xisting

:param mmif: MMIF object containing the annotation
:param annotation: Annotation object to get property from
:param prop_name: name of the property to retrieve
:return: the property value
"""
warnings.warn(f'{__name__}() is deprecated. '
f'Directly ask the annotation for a property by calling annotation.get_property() instead.',
Expand Down
11 changes: 11 additions & 0 deletions templates/python/vocabulary/base_types.txt
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,11 @@ class TypesBase(object):
old_lapps_type_shortnames = {'Token', 'Sentence', 'Paragraph', 'Markable', 'NamedEntity', 'NounChunk', 'VerbChunk'}

def __init__(self, type_uri: str):
"""
Initialize a vocabulary type.

:param type_uri: full URI or short name of the vocabulary type
"""
self.fuzzy_eq = False
self.parse_names(type_uri)
if self.__repr__() not in self.__class__._prefixes:
Expand Down Expand Up @@ -122,6 +127,12 @@ class ClamsTypesBase(TypesBase):
dev_version: ClassVar[str] = 'develop'

def __init__(self, type_uri, fuzzymode=True):
"""
Initialize a CLAMS vocabulary type.

:param type_uri: full URI or short name of the CLAMS vocabulary type
:param fuzzymode: if True, enables fuzzy equality comparison that ignores version differences
"""
super().__init__(type_uri)
self.fuzzy_eq = fuzzymode

Expand Down
Loading