diff --git a/CHANGELOG.md b/CHANGELOG.md index 008c822..0985caa 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,6 +1,6 @@ # Changelog -## Version 0.7.0 +## Version 0.7.0 - 0.7.1 - Major update to type hints throughout the module for better type safety and consistency. - Fixed bug in slice operations where column indices might be incorrectly initialized. @@ -8,8 +8,10 @@ - Accept a list of column values and column names to initialize a biocframe object. - Implement empty, contains, head, tail, - Coercions to list and `NamedList` from bioctuls. +- `BiocFrame` now extends `BiocObject`, with metadata attribute now a `NamedList` from the biocutils package. ## Version 0.6.3 + - Implement `remove_rows()`. - Implement `has_row()`. - Add support for slice objects in `remove_columns()` and enforce homogeneous types. @@ -47,6 +49,7 @@ - Internal refactoring to use generics from the BiocUtils package. ## Version 0.3 + This release migrates the package to a more palatable Google's Python style guide. A major modification to the package is with casing, all `camelCase` methods, functions and parameters are now `snake_case`. In addition, docstrings and documentation has been updated to use sphinx's features of linking objects to their types. Sphinx now also documents private and special dunder methods (e.g. `__getitem__`, `__copy__` etc). Intersphinx has been updated to link to references from dependent packages. @@ -56,6 +59,7 @@ configuration for flake8, ruff and black has been added to pyproject.toml and se In addition, pyscaffold has been updated to use "myst-parser" as the markdown compiler instead of recommonmark. As part of the pyscaffold setup, one may use pre-commits to run some of the routine tasks of linting and formatting before every commit. While this is sometimes annoying and can be ignored with `--no-verify`, it brings some consistency to the code base. ## Version 0.2 + - refactor DataFrame as BiocFrame - implementing slicing methods, tests diff --git a/src/biocframe/frame.py b/src/biocframe/frame.py index c93e09f..8b83d91 100644 --- a/src/biocframe/frame.py +++ b/src/biocframe/frame.py @@ -104,7 +104,7 @@ def __next__(self) -> Tuple[Optional[Union[ut.Names, str]], Dict[str, Any]]: ############################ -class BiocFrame: +class BiocFrame(ut.BiocObject): """`BiocFrame` is an alternative to :class:`~pandas.DataFrame`, with support for nested and flexible column types. Inspired by the ``DFrame`` class from Bioconductor's **S4Vectors** package. Any object may be used as a column, provided it has: @@ -168,6 +168,12 @@ def __init__( _validate: Internal use only. """ + + super().__init__( + metadata=metadata, + _validate=_validate, + ) + if data is None: data = {} @@ -176,7 +182,7 @@ def __init__( # making sure all column values are lists for k, v in data.items(): if not isinstance(v, list): - # if its a scalar, make a list else corce to list + # if its a scalar, make a list else coerce to list data[k] = list(v) if isinstance(v, abc.Sequence) else [v] elif isinstance(data, Sequence) and not isinstance(data, (str, dict)): if column_names is None: @@ -206,19 +212,12 @@ def __init__( else: self._column_names = column_names if isinstance(column_names, ut.Names) else ut.Names(column_names) - self._metadata = {} if metadata is None else metadata self._column_data = column_data if _validate: _validate_rows(self._number_of_rows, self._data, self._row_names) _validate_columns(self._column_names, self._data, self._column_data) - def _define_output(self, in_place: bool = False) -> BiocFrame: - if in_place is True: - return self - else: - return self.__copy__() - def __eq__(self, other: Any) -> bool: """Check if the current object is equal to another. @@ -668,51 +667,6 @@ def column_data(self, column_data: Optional[BiocFrame]) -> None: ) self.set_column_data(column_data, in_place=True) - def get_metadata(self) -> dict: - """Get the metadata. - - Returns: - Dictionary of metadata for this object. - """ - return self._metadata - - def set_metadata(self, metadata: Dict[str, Any], in_place: bool = False) -> BiocFrame: - """Set new metadata. - - Args: - metadata: - New metadata for this object. - - in_place: - Whether to modify the ``BiocFrame`` object in place. - - Returns: - A modified ``BiocFrame`` object, either as a copy of the original - or as a reference to the (in-place-modified) original. - """ - if not isinstance(metadata, dict): - raise TypeError(f"`metadata` must be a dictionary, provided {type(metadata)}.") - output = self._define_output(in_place) - output._metadata = metadata - return output - - @property - def metadata(self) -> Dict[str, Any]: - """Alias for :py:attr:`~get_metadata`.""" - return self.get_metadata() - - @metadata.setter - def metadata(self, metadata: Dict[str, Any]) -> None: - """Alias for :py:attr:`~set_metadata` with ``in_place = True``. - - As this mutates the original object, a warning is raised. - """ - warn( - "Setting property 'metadata' is an in-place operation, use 'set_metadata' instead", - UserWarning, - ) - self.set_metadata(metadata, in_place=True) - ################################ ######>> Single getters <<###### ################################ diff --git a/tests/test_methods.py b/tests/test_methods.py index 18b6397..93bbeea 100644 --- a/tests/test_methods.py +++ b/tests/test_methods.py @@ -59,7 +59,7 @@ def test_bframe_basic_ops(): assert len(bframe.column_names) == 3 assert bframe.get_column_data() is None - assert bframe.metadata == {} + assert bframe.metadata == ut.NamedList() assert len(bframe.dims) == 2 assert bframe.dims == (3, 3) @@ -104,7 +104,7 @@ def test_bframe_setters(): assert bframe.column_names is not None assert len(bframe.column_names) == 3 - assert bframe.metadata == {} + assert bframe.metadata == ut.NamedList() bframe.metadata = {"a": "b"} assert bframe.metadata is not None