From 3ec125a46fea9e67736a754f7e122108a02baf5c Mon Sep 17 00:00:00 2001 From: schobbejak Date: Mon, 3 Mar 2025 21:25:08 +0100 Subject: [PATCH 1/6] Rename folder to core and split class in core to multiple files --- epochlib/{pipeline => core}/__init__.py | 5 +- epochlib/core/base.py | 107 +++++++ epochlib/core/block.py | 22 ++ epochlib/core/parallel_system.py | 113 +++++++ epochlib/core/sequential_system.py | 60 ++++ epochlib/{pipeline => core}/training.py | 5 +- epochlib/{pipeline => core}/transforming.py | 5 +- epochlib/ensemble.py | 2 +- epochlib/model.py | 2 +- epochlib/pipeline/core.py | 285 ------------------ epochlib/training/training.py | 2 +- epochlib/training/training_block.py | 2 +- epochlib/transformation/transformation.py | 2 +- .../transformation/transformation_block.py | 2 +- tests/core/test_base.py | 47 +++ .../test__core.py => core/test_core.py} | 49 +-- tests/{pipeline => core}/test_training.py | 4 +- tests/{pipeline => core}/test_transforming.py | 4 +- tests/{pipeline => core}/util.py | 0 tests/training/test_training.py | 2 +- tests/transformation/test_transformation.py | 2 +- 21 files changed, 375 insertions(+), 347 deletions(-) rename epochlib/{pipeline => core}/__init__.py (79%) create mode 100644 epochlib/core/base.py create mode 100644 epochlib/core/block.py create mode 100644 epochlib/core/parallel_system.py create mode 100644 epochlib/core/sequential_system.py rename epochlib/{pipeline => core}/training.py (99%) rename epochlib/{pipeline => core}/transforming.py (98%) delete mode 100644 epochlib/pipeline/core.py create mode 100644 tests/core/test_base.py rename tests/{pipeline/test__core.py => core/test_core.py} (71%) rename tests/{pipeline => core}/test_training.py (99%) rename tests/{pipeline => core}/test_transforming.py (99%) rename tests/{pipeline => core}/util.py (100%) diff --git a/epochlib/pipeline/__init__.py b/epochlib/core/__init__.py similarity index 79% rename from epochlib/pipeline/__init__.py rename to epochlib/core/__init__.py index 745d851..7b6a529 100644 --- a/epochlib/pipeline/__init__.py +++ b/epochlib/core/__init__.py @@ -1,6 +1,9 @@ """Core pipeline functionality for training and transforming data.""" -from .core import Base, Block, ParallelSystem, SequentialSystem +from .base import Base +from .block import Block +from .parallel_system import ParallelSystem +from .sequential_system import SequentialSystem from .training import ParallelTrainingSystem, Pipeline, Trainer, TrainingSystem, TrainType from .transforming import ParallelTransformingSystem, Transformer, TransformingSystem, TransformType diff --git a/epochlib/core/base.py b/epochlib/core/base.py new file mode 100644 index 0000000..9e7f052 --- /dev/null +++ b/epochlib/core/base.py @@ -0,0 +1,107 @@ +"""The base module contains the Base class.""" + +from dataclasses import dataclass +from pathlib import Path +from typing import Any, Sequence + +from joblib import hash + + +@dataclass +class Base: + """The Base class is the base class for all classes in the epochlib package. + + Methods: + .. code-block:: python + def get_hash(self) -> str: + # Get the hash of base + + def get_parent(self) -> Any: + # Get the parent of base. + + def get_children(self) -> list[Any]: + # Get the children of base + + def save_to_html(self, file_path: Path) -> None: + # Save html format to file_path + """ + + def __post_init__(self) -> None: + """Initialize the block.""" + self.set_hash("") + self.set_parent(None) + self.set_children([]) + + def set_hash(self, prev_hash: str) -> None: + """Set the hash of the block. + + :param prev_hash: The hash of the previous block. + """ + self._hash = hash(prev_hash + str(self)) + + def get_hash(self) -> str: + """Get the hash of the block. + + :return: The hash of the block. + """ + return self._hash + + def get_parent(self) -> Any: + """Get the parent of the block. + + :return: Parent of the block + """ + return self._parent + + def get_children(self) -> Sequence[Any]: + """Get the children of the block. + + :return: Children of the block + """ + return self._children + + def save_to_html(self, file_path: Path) -> None: + """Write html representation of class to file. + + :param file_path: File path to write to + """ + html = self._repr_html_() + with open(file_path, "w") as file: + file.write(html) + + def set_parent(self, parent: Any) -> None: + """Set the parent of the block. + + :param parent: Parent of the block + """ + self._parent = parent + + def set_children(self, children: Sequence[Any]) -> None: + """Set the children of the block. + + :param children: Children of the block + """ + self._children = children + + def _repr_html_(self) -> str: + """Return representation of class in html format. + + :return: String representation of html + """ + html = "
" + html += f"

Class: {self.__class__.__name__}

" + html += "" + html += "
" + return html diff --git a/epochlib/core/block.py b/epochlib/core/block.py new file mode 100644 index 0000000..7d7c3d1 --- /dev/null +++ b/epochlib/core/block.py @@ -0,0 +1,22 @@ +"""Module for the block class.""" + +from .base import Base + + +class Block(Base): + """The Block class is the base class for all blocks. + + Methods: + .. code-block:: python + def get_hash(self) -> str: + # Get the hash of the block. + + def get_parent(self) -> Any: + # Get the parent of the block. + + def get_children(self) -> list[Any]: + # Get the children of the block + + def save_to_html(self, file_path: Path) -> None: + # Save html format to file_path + """ diff --git a/epochlib/core/parallel_system.py b/epochlib/core/parallel_system.py new file mode 100644 index 0000000..bffb824 --- /dev/null +++ b/epochlib/core/parallel_system.py @@ -0,0 +1,113 @@ +"""This module contains the ParallelSystem class.""" + +from abc import abstractmethod +from dataclasses import dataclass, field +from typing import Any + +from joblib import hash + +from .base import Base + + +@dataclass +class ParallelSystem(Base): + """The System class is the base class for all systems. + + Parameters: + - steps (list[_Base]): The steps in the system. + - weights (list[float]): Weights of steps in the system, if not specified they are equal. + + Methods: + .. code-block:: python + @abstractmethod + def concat(self, original_data: Any), data_to_concat: Any, weight: float = 1.0) -> Any: + # Specifies how to concat data after parallel computations + + def get_hash(self) -> str: + # Get the hash of the block. + + def get_parent(self) -> Any: + # Get the parent of the block. + + def get_children(self) -> list[Any]: + # Get the children of the block + + def save_to_html(self, file_path: Path) -> None: + # Save html format to file_path + """ + + steps: list[Base] = field(default_factory=list) + weights: list[float] = field(default_factory=list) + + def __post_init__(self) -> None: + """Post init function of _System class.""" + # Sort the steps by name, to ensure consistent ordering of parallel computations + self.steps = sorted(self.steps, key=lambda x: x.__class__.__name__) + + super().__post_init__() + + # Set parent and children + for step in self.steps: + step.set_parent(self) + + # Set weights if they exist + if len(self.weights) == len(self.get_steps()): + [w / sum(self.weights) for w in self.weights] + else: + num_steps = len(self.get_steps()) + self.weights = [1 / num_steps for x in self.steps] + + self.set_children(self.steps) + + def get_steps(self) -> list[Base]: + """Return list of steps of ParallelSystem. + + :return: List of steps + """ + return self.steps + + def get_weights(self) -> list[float]: + """Return list of weights of ParallelSystem. + + :return: List of weights + """ + if len(self.get_steps()) != len(self.weights): + raise TypeError("Mismatch between weights and steps") + return self.weights + + def set_hash(self, prev_hash: str) -> None: + """Set the hash of the system. + + :param prev_hash: The hash of the previous block. + """ + self._hash = prev_hash + + # System has no steps and as such hash should not be affected + if len(self.steps) == 0: + return + + # System is one step and should act as such + if len(self.steps) == 1: + step = self.steps[0] + step.set_hash(prev_hash) + self._hash = step.get_hash() + return + + # System has at least two steps so hash should become a combination + total = self.get_hash() + for step in self.steps: + step.set_hash(prev_hash) + total = total + step.get_hash() + + self._hash = hash(total) + + @abstractmethod + def concat(self, original_data: Any, data_to_concat: Any, weight: float = 1.0) -> Any: + """Concatenate the transformed data. + + :param original_data: The first input data. + :param data_to_concat: The second input data. + :param weight: Weight of data to concat + :return: The concatenated data. + """ + raise NotImplementedError(f"{self.__class__.__name__} does not implement concat method.") diff --git a/epochlib/core/sequential_system.py b/epochlib/core/sequential_system.py new file mode 100644 index 0000000..ebe956a --- /dev/null +++ b/epochlib/core/sequential_system.py @@ -0,0 +1,60 @@ +"""This module contains the SequentialSystem class.""" + +from dataclasses import dataclass, field +from typing import Sequence + +from .base import Base + + +@dataclass +class SequentialSystem(Base): + """The SequentialSystem class is the base class for all systems. + + Parameters: + - steps (list[_Base]): The steps in the system. + + Methods: + .. code-block:: python + def get_hash(self) -> str: + # Get the hash of the block. + + def get_parent(self) -> Any: + # Get the parent of the block. + + def get_children(self) -> list[Any]: + # Get the children of the block + + def save_to_html(self, file_path: Path) -> None: + # Save html format to file_path + """ + + steps: Sequence[Base] = field(default_factory=list) + + def __post_init__(self) -> None: + """Post init function of _System class.""" + super().__post_init__() + + # Set parent and children + for step in self.steps: + step.set_parent(self) + + self.set_children(self.steps) + + def get_steps(self) -> Sequence[Base]: + """Return list of steps of _ParallelSystem. + + :return: List of steps + """ + return self.steps + + def set_hash(self, prev_hash: str) -> None: + """Set the hash of the system. + + :param prev_hash: The hash of the previous block. + """ + self._hash = prev_hash + + # Set hash of each step using previous hash and then update hash with last step + for step in self.steps: + step.set_hash(self.get_hash()) + self._hash = step.get_hash() diff --git a/epochlib/pipeline/training.py b/epochlib/core/training.py similarity index 99% rename from epochlib/pipeline/training.py rename to epochlib/core/training.py index 3938f45..1ed4f3f 100644 --- a/epochlib/pipeline/training.py +++ b/epochlib/core/training.py @@ -8,7 +8,10 @@ from joblib import hash -from .core import Base, Block, ParallelSystem, SequentialSystem +from .base import Base +from .block import Block +from .parallel_system import ParallelSystem +from .sequential_system import SequentialSystem from .transforming import TransformingSystem diff --git a/epochlib/pipeline/transforming.py b/epochlib/core/transforming.py similarity index 98% rename from epochlib/pipeline/transforming.py rename to epochlib/core/transforming.py index 529aa26..bca286e 100644 --- a/epochlib/pipeline/transforming.py +++ b/epochlib/core/transforming.py @@ -5,7 +5,10 @@ from abc import abstractmethod from typing import Any -from .core import Base, Block, ParallelSystem, SequentialSystem +from .base import Base +from .block import Block +from .parallel_system import ParallelSystem +from .sequential_system import SequentialSystem class TransformType(Base): diff --git a/epochlib/ensemble.py b/epochlib/ensemble.py index 69cb652..54327f8 100644 --- a/epochlib/ensemble.py +++ b/epochlib/ensemble.py @@ -3,8 +3,8 @@ from typing import Any from epochlib.caching import CacheArgs +from epochlib.core import ParallelTrainingSystem from epochlib.model import ModelPipeline -from epochlib.pipeline import ParallelTrainingSystem class EnsemblePipeline(ParallelTrainingSystem): diff --git a/epochlib/model.py b/epochlib/model.py index 9d3f85f..0265975 100644 --- a/epochlib/model.py +++ b/epochlib/model.py @@ -3,7 +3,7 @@ from typing import Any from epochlib.caching import CacheArgs, Cacher -from epochlib.pipeline import Pipeline +from epochlib.core import Pipeline class ModelPipeline(Pipeline): diff --git a/epochlib/pipeline/core.py b/epochlib/pipeline/core.py deleted file mode 100644 index b4e2935..0000000 --- a/epochlib/pipeline/core.py +++ /dev/null @@ -1,285 +0,0 @@ -"""This module contains the core classes for all classes in the epochlib package.""" - -from abc import abstractmethod -from dataclasses import dataclass, field -from pathlib import Path -from typing import Any, Sequence - -from joblib import hash - - -@dataclass -class Base: - """The Base class is the base class for all classes in the epochlib package. - - Methods: - .. code-block:: python - def get_hash(self) -> str: - # Get the hash of base - - def get_parent(self) -> Any: - # Get the parent of base. - - def get_children(self) -> list[Any]: - # Get the children of base - - def save_to_html(self, file_path: Path) -> None: - # Save html format to file_path - """ - - def __post_init__(self) -> None: - """Initialize the block.""" - self.set_hash("") - self.set_parent(None) - self.set_children([]) - - def set_hash(self, prev_hash: str) -> None: - """Set the hash of the block. - - :param prev_hash: The hash of the previous block. - """ - self._hash = hash(prev_hash + str(self)) - - def get_hash(self) -> str: - """Get the hash of the block. - - :return: The hash of the block. - """ - return self._hash - - def get_parent(self) -> Any: - """Get the parent of the block. - - :return: Parent of the block - """ - return self._parent - - def get_children(self) -> Sequence[Any]: - """Get the children of the block. - - :return: Children of the block - """ - return self._children - - def save_to_html(self, file_path: Path) -> None: - """Write html representation of class to file. - - :param file_path: File path to write to - """ - html = self._repr_html_() - with open(file_path, "w") as file: - file.write(html) - - def set_parent(self, parent: Any) -> None: - """Set the parent of the block. - - :param parent: Parent of the block - """ - self._parent = parent - - def set_children(self, children: Sequence[Any]) -> None: - """Set the children of the block. - - :param children: Children of the block - """ - self._children = children - - def _repr_html_(self) -> str: - """Return representation of class in html format. - - :return: String representation of html - """ - html = "
" - html += f"

Class: {self.__class__.__name__}

" - html += "" - html += "
" - return html - - -class Block(Base): - """The Block class is the base class for all blocks. - - Methods: - .. code-block:: python - def get_hash(self) -> str: - # Get the hash of the block. - - def get_parent(self) -> Any: - # Get the parent of the block. - - def get_children(self) -> list[Any]: - # Get the children of the block - - def save_to_html(self, file_path: Path) -> None: - # Save html format to file_path - """ - - -@dataclass -class ParallelSystem(Base): - """The System class is the base class for all systems. - - Parameters: - - steps (list[_Base]): The steps in the system. - - weights (list[float]): Weights of steps in the system, if not specified they are equal. - - Methods: - .. code-block:: python - @abstractmethod - def concat(self, original_data: Any), data_to_concat: Any, weight: float = 1.0) -> Any: - # Specifies how to concat data after parallel computations - - def get_hash(self) -> str: - # Get the hash of the block. - - def get_parent(self) -> Any: - # Get the parent of the block. - - def get_children(self) -> list[Any]: - # Get the children of the block - - def save_to_html(self, file_path: Path) -> None: - # Save html format to file_path - """ - - steps: list[Base] = field(default_factory=list) - weights: list[float] = field(default_factory=list) - - def __post_init__(self) -> None: - """Post init function of _System class.""" - # Sort the steps by name, to ensure consistent ordering of parallel computations - self.steps = sorted(self.steps, key=lambda x: x.__class__.__name__) - - super().__post_init__() - - # Set parent and children - for step in self.steps: - step.set_parent(self) - - # Set weights if they exist - if len(self.weights) == len(self.get_steps()): - [w / sum(self.weights) for w in self.weights] - else: - num_steps = len(self.get_steps()) - self.weights = [1 / num_steps for x in self.steps] - - self.set_children(self.steps) - - def get_steps(self) -> list[Base]: - """Return list of steps of ParallelSystem. - - :return: List of steps - """ - return self.steps - - def get_weights(self) -> list[float]: - """Return list of weights of ParallelSystem. - - :return: List of weights - """ - if len(self.get_steps()) != len(self.weights): - raise TypeError("Mismatch between weights and steps") - return self.weights - - def set_hash(self, prev_hash: str) -> None: - """Set the hash of the system. - - :param prev_hash: The hash of the previous block. - """ - self._hash = prev_hash - - # System has no steps and as such hash should not be affected - if len(self.steps) == 0: - return - - # System is one step and should act as such - if len(self.steps) == 1: - step = self.steps[0] - step.set_hash(prev_hash) - self._hash = step.get_hash() - return - - # System has at least two steps so hash should become a combination - total = self.get_hash() - for step in self.steps: - step.set_hash(prev_hash) - total = total + step.get_hash() - - self._hash = hash(total) - - @abstractmethod - def concat(self, original_data: Any, data_to_concat: Any, weight: float = 1.0) -> Any: - """Concatenate the transformed data. - - :param original_data: The first input data. - :param data_to_concat: The second input data. - :param weight: Weight of data to concat - :return: The concatenated data. - """ - raise NotImplementedError(f"{self.__class__.__name__} does not implement concat method.") - - -@dataclass -class SequentialSystem(Base): - """The SequentialSystem class is the base class for all systems. - - Parameters: - - steps (list[_Base]): The steps in the system. - - Methods: - .. code-block:: python - def get_hash(self) -> str: - # Get the hash of the block. - - def get_parent(self) -> Any: - # Get the parent of the block. - - def get_children(self) -> list[Any]: - # Get the children of the block - - def save_to_html(self, file_path: Path) -> None: - # Save html format to file_path - """ - - steps: Sequence[Base] = field(default_factory=list) - - def __post_init__(self) -> None: - """Post init function of _System class.""" - super().__post_init__() - - # Set parent and children - for step in self.steps: - step.set_parent(self) - - self.set_children(self.steps) - - def get_steps(self) -> Sequence[Base]: - """Return list of steps of _ParallelSystem. - - :return: List of steps - """ - return self.steps - - def set_hash(self, prev_hash: str) -> None: - """Set the hash of the system. - - :param prev_hash: The hash of the previous block. - """ - self._hash = prev_hash - - # Set hash of each step using previous hash and then update hash with last step - for step in self.steps: - step.set_hash(self.get_hash()) - self._hash = step.get_hash() diff --git a/epochlib/training/training.py b/epochlib/training/training.py index 0b35ba0..e671772 100644 --- a/epochlib/training/training.py +++ b/epochlib/training/training.py @@ -4,7 +4,7 @@ from typing import Any from epochlib.caching import CacheArgs, Cacher -from epochlib.pipeline import TrainingSystem, TrainType +from epochlib.core import TrainingSystem, TrainType @dataclass diff --git a/epochlib/training/training_block.py b/epochlib/training/training_block.py index c37d556..96939ca 100644 --- a/epochlib/training/training_block.py +++ b/epochlib/training/training_block.py @@ -4,7 +4,7 @@ from typing import Any from epochlib.caching import CacheArgs, Cacher -from epochlib.pipeline import Trainer +from epochlib.core import Trainer class TrainingBlock(Trainer, Cacher): diff --git a/epochlib/transformation/transformation.py b/epochlib/transformation/transformation.py index a8515e7..ec1f276 100644 --- a/epochlib/transformation/transformation.py +++ b/epochlib/transformation/transformation.py @@ -4,7 +4,7 @@ from typing import Any from epochlib.caching.cacher import CacheArgs, Cacher -from epochlib.pipeline import TransformingSystem, TransformType +from epochlib.core import TransformingSystem, TransformType @dataclass diff --git a/epochlib/transformation/transformation_block.py b/epochlib/transformation/transformation_block.py index fbe82cf..c16091b 100644 --- a/epochlib/transformation/transformation_block.py +++ b/epochlib/transformation/transformation_block.py @@ -4,7 +4,7 @@ from typing import Any from epochlib.caching.cacher import CacheArgs, Cacher -from epochlib.pipeline import Transformer +from epochlib.core import Transformer class TransformationBlock(Transformer, Cacher): diff --git a/tests/core/test_base.py b/tests/core/test_base.py new file mode 100644 index 0000000..1034e23 --- /dev/null +++ b/tests/core/test_base.py @@ -0,0 +1,47 @@ +from epochlib.core import Base +from tests.core.util import remove_cache_files +from pathlib import Path + +class Test_Base: + def test_init(self): + base = Base() + assert base is not None + + def test_set_hash(self): + base = Base() + prev_hash = base.get_hash() + base.set_hash("prev_hash") + assert base.get_hash() != prev_hash + + def test_get_children(self): + base = Base() + assert base.get_children() == [] + + def test_get_parent(self): + base = Base() + assert base.get_parent() is None + + def test__set_parent(self): + base = Base() + base.set_parent(base) + assert base.get_parent() == base + + def test__set_children(self): + base = Base() + base.set_children([base]) + assert base.get_children() == [base] + + def test__repr_html_(self): + base = Base() + assert ( + base._repr_html_() + == "

Class: Base

" + ) + + def test_save_to_html(self): + html_path = Path("./tests/cache/test_html.html") + Path("./tests/cache/").mkdir(parents=True, exist_ok=True) + base = Base() + base.save_to_html(html_path) + assert Path.exists(html_path) + remove_cache_files() \ No newline at end of file diff --git a/tests/pipeline/test__core.py b/tests/core/test_core.py similarity index 71% rename from tests/pipeline/test__core.py rename to tests/core/test_core.py index 8e26b22..a9f90ab 100644 --- a/tests/pipeline/test__core.py +++ b/tests/core/test_core.py @@ -1,53 +1,8 @@ -from epochlib.pipeline import Block, Base, SequentialSystem, ParallelSystem -from tests.pipeline.util import remove_cache_files +from epochlib.core import Block, Base, SequentialSystem, ParallelSystem +from tests.core.util import remove_cache_files from pathlib import Path -class Test_Base: - def test_init(self): - base = Base() - assert base is not None - - def test_set_hash(self): - base = Base() - prev_hash = base.get_hash() - base.set_hash("prev_hash") - assert base.get_hash() != prev_hash - - def test_get_children(self): - base = Base() - assert base.get_children() == [] - - def test_get_parent(self): - base = Base() - assert base.get_parent() is None - - def test__set_parent(self): - base = Base() - base.set_parent(base) - assert base.get_parent() == base - - def test__set_children(self): - base = Base() - base.set_children([base]) - assert base.get_children() == [base] - - def test__repr_html_(self): - base = Base() - assert ( - base._repr_html_() - == "

Class: Base

" - ) - - def test_save_to_html(self): - html_path = Path("./tests/cache/test_html.html") - Path("./tests/cache/").mkdir(parents=True, exist_ok=True) - base = Base() - base.save_to_html(html_path) - assert Path.exists(html_path) - remove_cache_files() - - class TestBlock: def test_block_init(self): block = Block() diff --git a/tests/pipeline/test_training.py b/tests/core/test_training.py similarity index 99% rename from tests/pipeline/test_training.py rename to tests/core/test_training.py index 0eab661..151118a 100644 --- a/tests/pipeline/test_training.py +++ b/tests/core/test_training.py @@ -1,7 +1,7 @@ import pytest import warnings -from epochlib.pipeline import Trainer, TrainingSystem, ParallelTrainingSystem, Pipeline -from epochlib.pipeline import Transformer, TransformingSystem +from epochlib.core import Trainer, TrainingSystem, ParallelTrainingSystem, Pipeline +from epochlib.core import Transformer, TransformingSystem import numpy as np diff --git a/tests/pipeline/test_transforming.py b/tests/core/test_transforming.py similarity index 99% rename from tests/pipeline/test_transforming.py rename to tests/core/test_transforming.py index 394d900..1e523da 100644 --- a/tests/pipeline/test_transforming.py +++ b/tests/core/test_transforming.py @@ -2,8 +2,8 @@ import numpy as np import pytest -from epochlib.pipeline import Trainer -from epochlib.pipeline import ( +from epochlib.core import Trainer +from epochlib.core import ( Transformer, TransformingSystem, ParallelTransformingSystem, diff --git a/tests/pipeline/util.py b/tests/core/util.py similarity index 100% rename from tests/pipeline/util.py rename to tests/core/util.py diff --git a/tests/training/test_training.py b/tests/training/test_training.py index d8ba215..029612c 100644 --- a/tests/training/test_training.py +++ b/tests/training/test_training.py @@ -1,6 +1,6 @@ import numpy as np import pytest -from epochlib.pipeline import Trainer +from epochlib.core import Trainer from epochlib.training import TrainingPipeline from epochlib.training import TrainingBlock diff --git a/tests/transformation/test_transformation.py b/tests/transformation/test_transformation.py index cb2d14b..9d9fcfd 100644 --- a/tests/transformation/test_transformation.py +++ b/tests/transformation/test_transformation.py @@ -2,7 +2,7 @@ import numpy as np import pytest -from epochlib.pipeline import Transformer +from epochlib.core import Transformer from epochlib.transformation import TransformationPipeline from epochlib.transformation import TransformationBlock From 517d0e8b63e95c1b8c994ef2c1aaec21d1c97514 Mon Sep 17 00:00:00 2001 From: schobbejak Date: Mon, 3 Mar 2025 21:30:09 +0100 Subject: [PATCH 2/6] Update tests --- tests/core/test_block.py | 32 +++++++ tests/core/test_core.py | 125 --------------------------- tests/core/test_parallel_system.py | 50 +++++++++++ tests/core/test_sequential_system.py | 50 +++++++++++ 4 files changed, 132 insertions(+), 125 deletions(-) create mode 100644 tests/core/test_block.py delete mode 100644 tests/core/test_core.py create mode 100644 tests/core/test_parallel_system.py create mode 100644 tests/core/test_sequential_system.py diff --git a/tests/core/test_block.py b/tests/core/test_block.py new file mode 100644 index 0000000..f7afb1a --- /dev/null +++ b/tests/core/test_block.py @@ -0,0 +1,32 @@ +from epochlib.core import Block +from tests.core.util import remove_cache_files +from pathlib import Path + + +class TestBlock: + def test_block_init(self): + block = Block() + assert block is not None + + def test_block_set_hash(self): + block = Block() + block.set_hash("") + hash1 = block.get_hash() + assert hash1 != "" + block.set_hash(hash1) + hash2 = block.get_hash() + assert hash2 != "" + assert hash1 != hash2 + + def test_block_get_hash(self): + block = Block() + block.set_hash("") + hash1 = block.get_hash() + assert hash1 != "" + + def test__repr_html_(self): + block_instance = Block() + + html_representation = block_instance._repr_html_() + + assert html_representation is not None \ No newline at end of file diff --git a/tests/core/test_core.py b/tests/core/test_core.py deleted file mode 100644 index a9f90ab..0000000 --- a/tests/core/test_core.py +++ /dev/null @@ -1,125 +0,0 @@ -from epochlib.core import Block, Base, SequentialSystem, ParallelSystem -from tests.core.util import remove_cache_files -from pathlib import Path - - -class TestBlock: - def test_block_init(self): - block = Block() - assert block is not None - - def test_block_set_hash(self): - block = Block() - block.set_hash("") - hash1 = block.get_hash() - assert hash1 != "" - block.set_hash(hash1) - hash2 = block.get_hash() - assert hash2 != "" - assert hash1 != hash2 - - def test_block_get_hash(self): - block = Block() - block.set_hash("") - hash1 = block.get_hash() - assert hash1 != "" - - def test__repr_html_(self): - block_instance = Block() - - html_representation = block_instance._repr_html_() - - assert html_representation is not None - - -class TestSequentialSystem: - def test_system_init(self): - system = SequentialSystem() - assert system is not None - - def test_system_hash_no_steps(self): - system = SequentialSystem() - assert system.get_hash() == "" - - def test_system_hash_with_1_step(self): - block1 = Block() - - system = SequentialSystem([block1]) - assert system.get_hash() != "" - assert block1.get_hash() == system.get_hash() - - def test_system_hash_with_2_steps(self): - block1 = Block() - block2 = Block() - - system = SequentialSystem([block1, block2]) - assert system.get_hash() != block1.get_hash() - assert ( - system.get_hash() == block2.get_hash() != "" - ) - - def test_system_hash_with_3_steps(self): - block1 = Block() - block2 = Block() - block3 = Block() - - system = SequentialSystem([block1, block2, block3]) - assert system.get_hash() != block1.get_hash() - assert system.get_hash() != block2.get_hash() - assert block1.get_hash() != block2.get_hash() - assert ( - system.get_hash() == block3.get_hash() != "" - ) - - def test__repr_html_(self): - block_instance = Block() - system_instance = SequentialSystem([block_instance, block_instance]) - html_representation = system_instance._repr_html_() - - assert html_representation is not None - - -class TestParallelSystem: - def test_parallel_system_init(self): - parallel_system = ParallelSystem() - assert parallel_system is not None - - def test_parallel_system_hash_no_steps(self): - system = ParallelSystem() - assert system.get_hash() == "" - - def test_parallel_system_hash_with_1_step(self): - block1 = Block() - - system = ParallelSystem([block1]) - assert system.get_hash() != "" - assert block1.get_hash() == system.get_hash() - - def test_parallel_system_hash_with_2_steps(self): - block1 = Block() - block2 = Block() - - system = ParallelSystem([block1, block2]) - assert system.get_hash() != block1.get_hash() - assert block1.get_hash() == block2.get_hash() - assert system.get_hash() != block2.get_hash() - assert system.get_hash() != "" - - def test_parallel_system_hash_with_3_steps(self): - block1 = Block() - block2 = Block() - block3 = Block() - - system = ParallelSystem([block1, block2, block3]) - assert system.get_hash() != block1.get_hash() - assert system.get_hash() != block2.get_hash() - assert system.get_hash() != block3.get_hash() - assert block1.get_hash() == block2.get_hash() == block3.get_hash() - assert system.get_hash() != "" - - def test_parallel_system__repr_html_(self): - block_instance = Block() - system_instance = ParallelSystem([block_instance, block_instance]) - html_representation = system_instance._repr_html_() - - assert html_representation is not None diff --git a/tests/core/test_parallel_system.py b/tests/core/test_parallel_system.py new file mode 100644 index 0000000..259a196 --- /dev/null +++ b/tests/core/test_parallel_system.py @@ -0,0 +1,50 @@ +from epochlib.core import Block, ParallelSystem +from tests.core.util import remove_cache_files +from pathlib import Path + + +class TestParallelSystem: + def test_parallel_system_init(self): + parallel_system = ParallelSystem() + assert parallel_system is not None + + def test_parallel_system_hash_no_steps(self): + system = ParallelSystem() + assert system.get_hash() == "" + + def test_parallel_system_hash_with_1_step(self): + block1 = Block() + + system = ParallelSystem([block1]) + assert system.get_hash() != "" + assert block1.get_hash() == system.get_hash() + + def test_parallel_system_hash_with_2_steps(self): + block1 = Block() + block2 = Block() + + system = ParallelSystem([block1, block2]) + assert system.get_hash() != block1.get_hash() + assert block1.get_hash() == block2.get_hash() + assert system.get_hash() != block2.get_hash() + assert system.get_hash() != "" + + def test_parallel_system_hash_with_3_steps(self): + block1 = Block() + block2 = Block() + block3 = Block() + + system = ParallelSystem([block1, block2, block3]) + assert system.get_hash() != block1.get_hash() + assert system.get_hash() != block2.get_hash() + assert system.get_hash() != block3.get_hash() + assert block1.get_hash() == block2.get_hash() == block3.get_hash() + assert system.get_hash() != "" + + def test_parallel_system__repr_html_(self): + block_instance = Block() + system_instance = ParallelSystem([block_instance, block_instance]) + html_representation = system_instance._repr_html_() + + assert html_representation is not None + diff --git a/tests/core/test_sequential_system.py b/tests/core/test_sequential_system.py new file mode 100644 index 0000000..4adc8d1 --- /dev/null +++ b/tests/core/test_sequential_system.py @@ -0,0 +1,50 @@ +from epochlib.core import Block, SequentialSystem +from tests.core.util import remove_cache_files +from pathlib import Path + + +class TestSequentialSystem: + def test_system_init(self): + system = SequentialSystem() + assert system is not None + + def test_system_hash_no_steps(self): + system = SequentialSystem() + assert system.get_hash() == "" + + def test_system_hash_with_1_step(self): + block1 = Block() + + system = SequentialSystem([block1]) + assert system.get_hash() != "" + assert block1.get_hash() == system.get_hash() + + def test_system_hash_with_2_steps(self): + block1 = Block() + block2 = Block() + + system = SequentialSystem([block1, block2]) + assert system.get_hash() != block1.get_hash() + assert ( + system.get_hash() == block2.get_hash() != "" + ) + + def test_system_hash_with_3_steps(self): + block1 = Block() + block2 = Block() + block3 = Block() + + system = SequentialSystem([block1, block2, block3]) + assert system.get_hash() != block1.get_hash() + assert system.get_hash() != block2.get_hash() + assert block1.get_hash() != block2.get_hash() + assert ( + system.get_hash() == block3.get_hash() != "" + ) + + def test__repr_html_(self): + block_instance = Block() + system_instance = SequentialSystem([block_instance, block_instance]) + html_representation = system_instance._repr_html_() + + assert html_representation is not None \ No newline at end of file From 8caf3bc019bf61a3cc4b0b9cde7119ec1c7031fc Mon Sep 17 00:00:00 2001 From: schobbejak Date: Mon, 3 Mar 2025 21:38:09 +0100 Subject: [PATCH 3/6] Split up types --- epochlib/core/__init__.py | 4 +-- epochlib/core/training.py | 25 +------------- epochlib/core/transforming.py | 17 +-------- epochlib/core/types.py | 42 +++++++++++++++++++++++ epochlib/training/training.py | 3 +- epochlib/transformation/transformation.py | 3 +- 6 files changed, 50 insertions(+), 44 deletions(-) create mode 100644 epochlib/core/types.py diff --git a/epochlib/core/__init__.py b/epochlib/core/__init__.py index 7b6a529..4f5b1fe 100644 --- a/epochlib/core/__init__.py +++ b/epochlib/core/__init__.py @@ -4,8 +4,8 @@ from .block import Block from .parallel_system import ParallelSystem from .sequential_system import SequentialSystem -from .training import ParallelTrainingSystem, Pipeline, Trainer, TrainingSystem, TrainType -from .transforming import ParallelTransformingSystem, Transformer, TransformingSystem, TransformType +from .training import ParallelTrainingSystem, Pipeline, Trainer, TrainingSystem +from .transforming import ParallelTransformingSystem, Transformer, TransformingSystem __all__ = [ "TrainType", diff --git a/epochlib/core/training.py b/epochlib/core/training.py index 1ed4f3f..0c0911b 100644 --- a/epochlib/core/training.py +++ b/epochlib/core/training.py @@ -2,39 +2,16 @@ import copy import warnings -from abc import abstractmethod from dataclasses import dataclass from typing import Any from joblib import hash -from .base import Base from .block import Block from .parallel_system import ParallelSystem from .sequential_system import SequentialSystem from .transforming import TransformingSystem - - -class TrainType(Base): - """Abstract train type describing a class that implements two functions train and predict.""" - - @abstractmethod - def train(self, x: Any, y: Any, **train_args: Any) -> tuple[Any, Any]: - """Train the block. - - :param x: The input data. - :param y: The target variable. - """ - raise NotImplementedError(f"{self.__class__.__name__} does not implement train method.") - - @abstractmethod - def predict(self, x: Any, **pred_args: Any) -> Any: - """Predict the target variable. - - :param x: The input data. - :return: The predictions. - """ - raise NotImplementedError(f"{self.__class__.__name__} does not implement predict method.") +from .types import TrainType class Trainer(TrainType, Block): diff --git a/epochlib/core/transforming.py b/epochlib/core/transforming.py index bca286e..7f0fc61 100644 --- a/epochlib/core/transforming.py +++ b/epochlib/core/transforming.py @@ -2,27 +2,12 @@ import copy import warnings -from abc import abstractmethod from typing import Any -from .base import Base from .block import Block from .parallel_system import ParallelSystem from .sequential_system import SequentialSystem - - -class TransformType(Base): - """Abstract transform type describing a class that implements the transform function.""" - - @abstractmethod - def transform(self, data: Any, **transform_args: Any) -> Any: - """Transform the input data. - - :param data: The input data. - :param transform_args: Keyword arguments. - :return: The transformed data. - """ - raise NotImplementedError(f"{self.__class__.__name__} does not implement transform method.") +from .types import TransformType class Transformer(TransformType, Block): diff --git a/epochlib/core/types.py b/epochlib/core/types.py new file mode 100644 index 0000000..5defcc8 --- /dev/null +++ b/epochlib/core/types.py @@ -0,0 +1,42 @@ +"""This module contains the different types of blocks for core.""" + +from abc import abstractmethod +from typing import Any + +from .base import Base + + +class TrainType(Base): + """Abstract train type describing a class that implements two functions train and predict.""" + + @abstractmethod + def train(self, x: Any, y: Any, **train_args: Any) -> tuple[Any, Any]: + """Train the block. + + :param x: The input data. + :param y: The target variable. + """ + raise NotImplementedError(f"{self.__class__.__name__} does not implement train method.") + + @abstractmethod + def predict(self, x: Any, **pred_args: Any) -> Any: + """Predict the target variable. + + :param x: The input data. + :return: The predictions. + """ + raise NotImplementedError(f"{self.__class__.__name__} does not implement predict method.") + + +class TransformType(Base): + """Abstract transform type describing a class that implements the transform function.""" + + @abstractmethod + def transform(self, data: Any, **transform_args: Any) -> Any: + """Transform the input data. + + :param data: The input data. + :param transform_args: Keyword arguments. + :return: The transformed data. + """ + raise NotImplementedError(f"{self.__class__.__name__} does not implement transform method.") diff --git a/epochlib/training/training.py b/epochlib/training/training.py index e671772..b227d56 100644 --- a/epochlib/training/training.py +++ b/epochlib/training/training.py @@ -4,7 +4,8 @@ from typing import Any from epochlib.caching import CacheArgs, Cacher -from epochlib.core import TrainingSystem, TrainType +from epochlib.core import TrainingSystem +from epochlib.core.types import TrainType @dataclass diff --git a/epochlib/transformation/transformation.py b/epochlib/transformation/transformation.py index ec1f276..a2fffd1 100644 --- a/epochlib/transformation/transformation.py +++ b/epochlib/transformation/transformation.py @@ -4,7 +4,8 @@ from typing import Any from epochlib.caching.cacher import CacheArgs, Cacher -from epochlib.core import TransformingSystem, TransformType +from epochlib.core import TransformingSystem +from epochlib.core.types import TransformType @dataclass From dd36242c561bc604105a1fd0ce338866610535b5 Mon Sep 17 00:00:00 2001 From: JasperVS Date: Thu, 13 Mar 2025 17:59:25 +0100 Subject: [PATCH 4/6] Refactor training.py --- epochlib/core/__init__.py | 5 +- epochlib/core/parallel_training_system.py | 113 ++++ epochlib/core/pipeline.py | 158 +++++ epochlib/core/trainer.py | 49 ++ epochlib/core/training.py | 416 ------------- epochlib/core/training_system.py | 113 ++++ tests/core/test_base.py | 1 + tests/core/test_block.py | 2 - tests/core/test_parallel_system.py | 2 - tests/core/test_parallel_training_system.py | 238 ++++++++ tests/core/test_pipeline.py | 208 +++++++ tests/core/test_sequential_system.py | 2 - tests/core/test_trainer.py | 35 ++ tests/core/test_training.py | 617 -------------------- tests/core/test_training_system.py | 146 +++++ 15 files changed, 1065 insertions(+), 1040 deletions(-) create mode 100644 epochlib/core/parallel_training_system.py create mode 100644 epochlib/core/pipeline.py create mode 100644 epochlib/core/trainer.py delete mode 100644 epochlib/core/training.py create mode 100644 epochlib/core/training_system.py create mode 100644 tests/core/test_parallel_training_system.py create mode 100644 tests/core/test_pipeline.py create mode 100644 tests/core/test_trainer.py delete mode 100644 tests/core/test_training.py create mode 100644 tests/core/test_training_system.py diff --git a/epochlib/core/__init__.py b/epochlib/core/__init__.py index 4f5b1fe..e9c5e0f 100644 --- a/epochlib/core/__init__.py +++ b/epochlib/core/__init__.py @@ -4,8 +4,11 @@ from .block import Block from .parallel_system import ParallelSystem from .sequential_system import SequentialSystem -from .training import ParallelTrainingSystem, Pipeline, Trainer, TrainingSystem +from .trainer import Trainer +from .training_system import TrainingSystem from .transforming import ParallelTransformingSystem, Transformer, TransformingSystem +from .parallel_training_system import ParallelTrainingSystem +from .pipeline import Pipeline __all__ = [ "TrainType", diff --git a/epochlib/core/parallel_training_system.py b/epochlib/core/parallel_training_system.py new file mode 100644 index 0000000..789c5db --- /dev/null +++ b/epochlib/core/parallel_training_system.py @@ -0,0 +1,113 @@ +"""This module contains the parallel training system class.""" + +import copy +from typing import Any + +from .parallel_system import ParallelSystem +from .types import TrainType + + +class ParallelTrainingSystem(TrainType, ParallelSystem): + """A system that trains the input data in parallel. + + Parameters: + - steps (list[Trainer | TrainingSystem | ParallelTrainingSystem]): The steps in the system. + - weights (list[float]): The weights of steps in the system, if not specified they are all equal. + + Methods: + .. code-block:: python + @abstractmethod + def concat(self, data1: Any, data2: Any) -> Any: # Concatenate the transformed data. + + def train(self, x: Any, y: Any) -> tuple[Any, Any]: # Train the system. + + def predict(self, x: Any, pred_args: dict[str, Any] = {}) -> Any: # Predict the output of the system. + + def concat_labels(self, data1: Any, data2: Any) -> Any: # Concatenate the transformed labels. + + def get_hash(self) -> str: # Get the hash of the system. + + Usage: + .. code-block:: python + from epochlib.pipeline import ParallelTrainingSystem + + trainer_1 = CustomTrainer() + trainer_2 = CustomTrainer() + + + class CustomParallelTrainingSystem(ParallelTrainingSystem): + def concat(self, data1: Any, data2: Any) -> Any: + # Concatenate the transformed data. + return data1 + data2 + + + training_system = CustomParallelTrainingSystem(steps=[trainer_1, trainer_2]) + trained_x, trained_y = training_system.train(x, y) + predictions = training_system.predict(x) + """ + + def __post_init__(self) -> None: + """Post init method for the ParallelTrainingSystem class.""" + # Assert all steps correct instances + for step in self.steps: + if not isinstance(step, (TrainType)): + raise TypeError(f"{step} is not an instance of TrainType") + + super().__post_init__() + + def train(self, x: Any, y: Any, **train_args: Any) -> tuple[Any, Any]: + """Train the system. + + :param x: The input to the system. + :param y: The expected output of the system. + :return: The input and output of the system. + """ + # Loop through each step and call the train method + out_x, out_y = None, None + for i, step in enumerate(self.steps): + step_name = step.__class__.__name__ + + step_args = train_args.get(step_name, {}) + + if isinstance(step, (TrainType)): + step_x, step_y = step.train(copy.deepcopy(x), copy.deepcopy(y), **step_args) + out_x, out_y = ( + self.concat(out_x, step_x, self.get_weights()[i]), + self.concat_labels(out_y, step_y, self.get_weights()[i]), + ) + else: + raise TypeError(f"{step} is not an instance of TrainType") + + return out_x, out_y + + def predict(self, x: Any, **pred_args: Any) -> Any: + """Predict the output of the system. + + :param x: The input to the system. + :return: The output of the system. + """ + # Loop through each trainer and call the predict method + out_x = None + for i, step in enumerate(self.steps): + step_name = step.__class__.__name__ + + step_args = pred_args.get(step_name, {}) + + if isinstance(step, (TrainType)): + step_x = step.predict(copy.deepcopy(x), **step_args) + out_x = self.concat(out_x, step_x, self.get_weights()[i]) + else: + raise TypeError(f"{step} is not an instance of TrainType") + + return out_x + + def concat_labels(self, original_data: Any, data_to_concat: Any, weight: float = 1.0) -> Any: + """Concatenate the transformed labels. Will use concat method if not overridden. + + :param original_data: The first input data. + :param data_to_concat: The second input data. + :param weight: Weight of data to concat + :return: The concatenated data. + """ + return self.concat(original_data, data_to_concat, weight) + diff --git a/epochlib/core/pipeline.py b/epochlib/core/pipeline.py new file mode 100644 index 0000000..5932ce1 --- /dev/null +++ b/epochlib/core/pipeline.py @@ -0,0 +1,158 @@ +"""This module contains the pipeline class for training and predicting on data.""" + +from dataclasses import dataclass +from typing import Any + +from joblib import hash + +from .transforming import TransformingSystem +from .types import TrainType +from .trainer import Trainer +from .training_system import TrainingSystem +from .parallel_training_system import ParallelTrainingSystem + + +@dataclass +class Pipeline(TrainType): + """A pipeline of systems that can be trained and predicted. + + Parameters: + - x_sys (TransformingSystem | None): The system to transform the input data. + - y_sys (TransformingSystem | None): The system to transform the labelled data. + - train_sys (TrainingSystem | None): The system to train the data. + - pred_sys (TransformingSystem | None): The system to transform the predictions. + - label_sys (TransformingSystem | None): The system to transform the labels. + + Methods: + .. code-block:: python + def train(self, x: Any, y: Any, **train_args: Any) -> tuple[Any, Any]: + # Train the system. + + def predict(self, x: Any, **pred_args) -> Any: + # Predict the output of the system. + + def get_hash(self) -> str: + # Get the hash of the pipeline + + def get_parent(self) -> Any: + # Get the parent of the pipeline + + def get_children(self) -> list[Any]: + # Get the children of the pipeline + + def save_to_html(self, file_path: Path) -> None: + # Save html format to file_path + + Usage: + .. code-block:: python + from epochlib.pipeline import Pipeline + + x_sys = CustomTransformingSystem() + y_sys = CustomTransformingSystem() + train_sys = CustomTrainingSystem() + pred_sys = CustomTransformingSystem() + label_sys = CustomTransformingSystem() + + pipeline = Pipeline(x_sys=x_sys, y_sys=y_sys, train_sys=train_sys, pred_sys=pred_sys, label_sys=label_sys) + trained_x, trained_y = pipeline.train(x, y) + predictions = pipeline.predict(x) + """ + + x_sys: TransformingSystem | None = None + y_sys: TransformingSystem | None = None + train_sys: Trainer | TrainingSystem | ParallelTrainingSystem | None = None + pred_sys: TransformingSystem | None = None + label_sys: TransformingSystem | None = None + + def __post_init__(self) -> None: + """Post initialization function of the Pipeline.""" + super().__post_init__() + + # Set children and parents + children = [] + systems = [ + self.x_sys, + self.y_sys, + self.train_sys, + self.pred_sys, + self.label_sys, + ] + + for sys in systems: + if sys is not None: + sys.set_parent(self) + children.append(sys) + + self.set_children(children) + + def train(self, x: Any, y: Any, **train_args: Any) -> tuple[Any, Any]: + """Train the system. + + :param x: The input to the system. + :param y: The expected output of the system. + :param train_args: The arguments to pass to the training system. (Default is {}) + :return: The input and output of the system. + """ + if self.x_sys is not None: + x = self.x_sys.transform(x, **train_args.get("x_sys", {})) + if self.y_sys is not None: + y = self.y_sys.transform(y, **train_args.get("y_sys", {})) + if self.train_sys is not None: + x, y = self.train_sys.train(x, y, **train_args.get("train_sys", {})) + if self.pred_sys is not None: + x = self.pred_sys.transform(x, **train_args.get("pred_sys", {})) + if self.label_sys is not None: + y = self.label_sys.transform(y, **train_args.get("label_sys", {})) + + return x, y + + def predict(self, x: Any, **pred_args: Any) -> Any: + """Predict the output of the system. + + :param x: The input to the system. + :param pred_args: The arguments to pass to the prediction system. (Default is {}) + :return: The output of the system. + """ + if self.x_sys is not None: + x = self.x_sys.transform(x, **pred_args.get("x_sys", {})) + if self.train_sys is not None: + x = self.train_sys.predict(x, **pred_args.get("train_sys", {})) + if self.pred_sys is not None: + x = self.pred_sys.transform(x, **pred_args.get("pred_sys", {})) + + return x + + def set_hash(self, prev_hash: str) -> None: + """Set the hash of the pipeline. + + :param prev_hash: The hash of the previous block. + """ + self._hash = prev_hash + + xy_hash = "" + if self.x_sys is not None: + self.x_sys.set_hash(self.get_hash()) + xy_hash += self.x_sys.get_hash() + if self.y_sys is not None: + self.y_sys.set_hash(self.get_hash()) + xy_hash += self.y_sys.get_hash()[::-1] # Reversed for edge case where you have two pipelines with the same system but one in x the other in y + + if xy_hash != "": + self._hash = hash(xy_hash) + + if self.train_sys is not None: + self.train_sys.set_hash(self.get_hash()) + training_hash = self.train_sys.get_hash() + if training_hash != "": + self._hash = hash(self._hash + training_hash) + + predlabel_hash = "" + if self.pred_sys is not None: + self.pred_sys.set_hash(self.get_hash()) + predlabel_hash += self.pred_sys.get_hash() + if self.label_sys is not None: + self.label_sys.set_hash(self.get_hash()) + predlabel_hash += self.label_sys.get_hash() + + if predlabel_hash != "": + self._hash = hash(predlabel_hash) diff --git a/epochlib/core/trainer.py b/epochlib/core/trainer.py new file mode 100644 index 0000000..9e9deae --- /dev/null +++ b/epochlib/core/trainer.py @@ -0,0 +1,49 @@ +"""Module containing the Trainer class.""" +from .block import Block +from .types import TrainType + + +class Trainer(TrainType, Block): + """The trainer block is for blocks that need to train on two inputs and predict on one. + + Methods: + .. code-block:: python + @abstractmethod + def train(self, x: Any, y: Any, **train_args: Any) -> tuple[Any, Any]: + # Train the block. + + @abstractmethod + def predict(self, x: Any, **pred_args: Any) -> Any: + # Predict the target variable. + + def get_hash(self) -> str: + # Get the hash of the block. + + def get_parent(self) -> Any: + # Get the parent of the block. + + def get_children(self) -> list[Any]: + # Get the children of the block + + def save_to_html(self, file_path: Path) -> None: + # Save html format to file_path + + Usage: + .. code-block:: python + from epochlib.pipeline import Trainer + + + class MyTrainer(Trainer): + def train(self, x: Any, y: Any, **train_args: Any) -> tuple[Any, Any]: + # Train the block. + return x, y + + def predict(self, x: Any, **pred_args: Any) -> Any: + # Predict the target variable. + return x + + + my_trainer = MyTrainer() + predictions, labels = my_trainer.train(x, y) + predictions = my_trainer.predict(x) + """ diff --git a/epochlib/core/training.py b/epochlib/core/training.py deleted file mode 100644 index 0c0911b..0000000 --- a/epochlib/core/training.py +++ /dev/null @@ -1,416 +0,0 @@ -"""This module contains classes for training and predicting on data.""" - -import copy -import warnings -from dataclasses import dataclass -from typing import Any - -from joblib import hash - -from .block import Block -from .parallel_system import ParallelSystem -from .sequential_system import SequentialSystem -from .transforming import TransformingSystem -from .types import TrainType - - -class Trainer(TrainType, Block): - """The trainer block is for blocks that need to train on two inputs and predict on one. - - Methods: - .. code-block:: python - @abstractmethod - def train(self, x: Any, y: Any, **train_args: Any) -> tuple[Any, Any]: - # Train the block. - - @abstractmethod - def predict(self, x: Any, **pred_args: Any) -> Any: - # Predict the target variable. - - def get_hash(self) -> str: - # Get the hash of the block. - - def get_parent(self) -> Any: - # Get the parent of the block. - - def get_children(self) -> list[Any]: - # Get the children of the block - - def save_to_html(self, file_path: Path) -> None: - # Save html format to file_path - - Usage: - .. code-block:: python - from epochlib.pipeline import Trainer - - - class MyTrainer(Trainer): - def train(self, x: Any, y: Any, **train_args: Any) -> tuple[Any, Any]: - # Train the block. - return x, y - - def predict(self, x: Any, **pred_args: Any) -> Any: - # Predict the target variable. - return x - - - my_trainer = MyTrainer() - predictions, labels = my_trainer.train(x, y) - predictions = my_trainer.predict(x) - """ - - -class TrainingSystem(TrainType, SequentialSystem): - """A system that trains on the input data and labels. - - Parameters: - - steps (list[TrainType]): The steps in the system. - - Methods: - .. code-block:: python - def train(self, x: Any, y: Any, **train_args: Any) -> tuple[Any, Any]: # Train the system. - - def predict(self, x: Any, **pred_args: Any) -> Any: # Predict the output of the system. - - def get_hash(self) -> str: - # Get the hash of the block. - - def get_parent(self) -> Any: - # Get the parent of the block. - - def get_children(self) -> list[Any]: - # Get the children of the block - - def save_to_html(self, file_path: Path) -> None: - # Save html format to file_path - - Usage: - .. code-block:: python - from epochlib.pipeline import TrainingSystem - - trainer_1 = CustomTrainer() - trainer_2 = CustomTrainer() - - training_system = TrainingSystem(steps=[trainer_1, trainer_2]) - trained_x, trained_y = training_system.train(x, y) - predictions = training_system.predict(x) - """ - - def __post_init__(self) -> None: - """Post init method for the TrainingSystem class.""" - # Assert all steps are a subclass of Trainer - for step in self.steps: - if not isinstance( - step, - (TrainType), - ): - raise TypeError(f"step: {step} is not an instance of TrainType") - - super().__post_init__() - - def train(self, x: Any, y: Any, **train_args: Any) -> tuple[Any, Any]: - """Train the system. - - :param x: The input to the system. - :param y: The output of the system. - :return: The input and output of the system. - """ - set_of_steps = set() - for step in self.steps: - step_name = step.__class__.__name__ - set_of_steps.add(step_name) - - if set_of_steps != set(train_args.keys()): - # Raise a warning and print all the keys that do not match - warnings.warn(f"The following steps do not exist but were given in the kwargs: {set(train_args.keys()) - set_of_steps}", UserWarning, stacklevel=2) - - # Loop through each step and call the train method - for step in self.steps: - step_name = step.__class__.__name__ - - step_args = train_args.get(step_name, {}) - if isinstance(step, (TrainType)): - x, y = step.train(x, y, **step_args) - else: - raise TypeError(f"{step} is not an instance of TrainType") - - return x, y - - def predict(self, x: Any, **pred_args: Any) -> Any: - """Predict the output of the system. - - :param x: The input to the system. - :return: The output of the system. - """ - set_of_steps = set() - for step in self.steps: - step_name = step.__class__.__name__ - set_of_steps.add(step_name) - - if set_of_steps != set(pred_args.keys()): - # Raise a warning and print all the keys that do not match - warnings.warn(f"The following steps do not exist but were given in the kwargs: {set(pred_args.keys()) - set_of_steps}", UserWarning, stacklevel=2) - - # Loop through each step and call the predict method - for step in self.steps: - step_name = step.__class__.__name__ - - step_args = pred_args.get(step_name, {}) - - if isinstance(step, (TrainType)): - x = step.predict(x, **step_args) - else: - raise TypeError(f"{step} is not an instance of TrainType") - - return x - - -class ParallelTrainingSystem(TrainType, ParallelSystem): - """A system that trains the input data in parallel. - - Parameters: - - steps (list[Trainer | TrainingSystem | ParallelTrainingSystem]): The steps in the system. - - weights (list[float]): The weights of steps in the system, if not specified they are all equal. - - Methods: - .. code-block:: python - @abstractmethod - def concat(self, data1: Any, data2: Any) -> Any: # Concatenate the transformed data. - - def train(self, x: Any, y: Any) -> tuple[Any, Any]: # Train the system. - - def predict(self, x: Any, pred_args: dict[str, Any] = {}) -> Any: # Predict the output of the system. - - def concat_labels(self, data1: Any, data2: Any) -> Any: # Concatenate the transformed labels. - - def get_hash(self) -> str: # Get the hash of the system. - - Usage: - .. code-block:: python - from epochlib.pipeline import ParallelTrainingSystem - - trainer_1 = CustomTrainer() - trainer_2 = CustomTrainer() - - - class CustomParallelTrainingSystem(ParallelTrainingSystem): - def concat(self, data1: Any, data2: Any) -> Any: - # Concatenate the transformed data. - return data1 + data2 - - - training_system = CustomParallelTrainingSystem(steps=[trainer_1, trainer_2]) - trained_x, trained_y = training_system.train(x, y) - predictions = training_system.predict(x) - """ - - def __post_init__(self) -> None: - """Post init method for the ParallelTrainingSystem class.""" - # Assert all steps correct instances - for step in self.steps: - if not isinstance(step, (TrainType)): - raise TypeError(f"{step} is not an instance of TrainType") - - super().__post_init__() - - def train(self, x: Any, y: Any, **train_args: Any) -> tuple[Any, Any]: - """Train the system. - - :param x: The input to the system. - :param y: The expected output of the system. - :return: The input and output of the system. - """ - # Loop through each step and call the train method - out_x, out_y = None, None - for i, step in enumerate(self.steps): - step_name = step.__class__.__name__ - - step_args = train_args.get(step_name, {}) - - if isinstance(step, (TrainType)): - step_x, step_y = step.train(copy.deepcopy(x), copy.deepcopy(y), **step_args) - out_x, out_y = ( - self.concat(out_x, step_x, self.get_weights()[i]), - self.concat_labels(out_y, step_y, self.get_weights()[i]), - ) - else: - raise TypeError(f"{step} is not an instance of TrainType") - - return out_x, out_y - - def predict(self, x: Any, **pred_args: Any) -> Any: - """Predict the output of the system. - - :param x: The input to the system. - :return: The output of the system. - """ - # Loop through each trainer and call the predict method - out_x = None - for i, step in enumerate(self.steps): - step_name = step.__class__.__name__ - - step_args = pred_args.get(step_name, {}) - - if isinstance(step, (TrainType)): - step_x = step.predict(copy.deepcopy(x), **step_args) - out_x = self.concat(out_x, step_x, self.get_weights()[i]) - else: - raise TypeError(f"{step} is not an instance of TrainType") - - return out_x - - def concat_labels(self, original_data: Any, data_to_concat: Any, weight: float = 1.0) -> Any: - """Concatenate the transformed labels. Will use concat method if not overridden. - - :param original_data: The first input data. - :param data_to_concat: The second input data. - :param weight: Weight of data to concat - :return: The concatenated data. - """ - return self.concat(original_data, data_to_concat, weight) - - -@dataclass -class Pipeline(TrainType): - """A pipeline of systems that can be trained and predicted. - - Parameters: - - x_sys (TransformingSystem | None): The system to transform the input data. - - y_sys (TransformingSystem | None): The system to transform the labelled data. - - train_sys (TrainingSystem | None): The system to train the data. - - pred_sys (TransformingSystem | None): The system to transform the predictions. - - label_sys (TransformingSystem | None): The system to transform the labels. - - Methods: - .. code-block:: python - def train(self, x: Any, y: Any, **train_args: Any) -> tuple[Any, Any]: - # Train the system. - - def predict(self, x: Any, **pred_args) -> Any: - # Predict the output of the system. - - def get_hash(self) -> str: - # Get the hash of the pipeline - - def get_parent(self) -> Any: - # Get the parent of the pipeline - - def get_children(self) -> list[Any]: - # Get the children of the pipeline - - def save_to_html(self, file_path: Path) -> None: - # Save html format to file_path - - Usage: - .. code-block:: python - from epochlib.pipeline import Pipeline - - x_sys = CustomTransformingSystem() - y_sys = CustomTransformingSystem() - train_sys = CustomTrainingSystem() - pred_sys = CustomTransformingSystem() - label_sys = CustomTransformingSystem() - - pipeline = Pipeline(x_sys=x_sys, y_sys=y_sys, train_sys=train_sys, pred_sys=pred_sys, label_sys=label_sys) - trained_x, trained_y = pipeline.train(x, y) - predictions = pipeline.predict(x) - """ - - x_sys: TransformingSystem | None = None - y_sys: TransformingSystem | None = None - train_sys: Trainer | TrainingSystem | ParallelTrainingSystem | None = None - pred_sys: TransformingSystem | None = None - label_sys: TransformingSystem | None = None - - def __post_init__(self) -> None: - """Post initialization function of the Pipeline.""" - super().__post_init__() - - # Set children and parents - children = [] - systems = [ - self.x_sys, - self.y_sys, - self.train_sys, - self.pred_sys, - self.label_sys, - ] - - for sys in systems: - if sys is not None: - sys.set_parent(self) - children.append(sys) - - self.set_children(children) - - def train(self, x: Any, y: Any, **train_args: Any) -> tuple[Any, Any]: - """Train the system. - - :param x: The input to the system. - :param y: The expected output of the system. - :param train_args: The arguments to pass to the training system. (Default is {}) - :return: The input and output of the system. - """ - if self.x_sys is not None: - x = self.x_sys.transform(x, **train_args.get("x_sys", {})) - if self.y_sys is not None: - y = self.y_sys.transform(y, **train_args.get("y_sys", {})) - if self.train_sys is not None: - x, y = self.train_sys.train(x, y, **train_args.get("train_sys", {})) - if self.pred_sys is not None: - x = self.pred_sys.transform(x, **train_args.get("pred_sys", {})) - if self.label_sys is not None: - y = self.label_sys.transform(y, **train_args.get("label_sys", {})) - - return x, y - - def predict(self, x: Any, **pred_args: Any) -> Any: - """Predict the output of the system. - - :param x: The input to the system. - :param pred_args: The arguments to pass to the prediction system. (Default is {}) - :return: The output of the system. - """ - if self.x_sys is not None: - x = self.x_sys.transform(x, **pred_args.get("x_sys", {})) - if self.train_sys is not None: - x = self.train_sys.predict(x, **pred_args.get("train_sys", {})) - if self.pred_sys is not None: - x = self.pred_sys.transform(x, **pred_args.get("pred_sys", {})) - - return x - - def set_hash(self, prev_hash: str) -> None: - """Set the hash of the pipeline. - - :param prev_hash: The hash of the previous block. - """ - self._hash = prev_hash - - xy_hash = "" - if self.x_sys is not None: - self.x_sys.set_hash(self.get_hash()) - xy_hash += self.x_sys.get_hash() - if self.y_sys is not None: - self.y_sys.set_hash(self.get_hash()) - xy_hash += self.y_sys.get_hash()[::-1] # Reversed for edge case where you have two pipelines with the same system but one in x the other in y - - if xy_hash != "": - self._hash = hash(xy_hash) - - if self.train_sys is not None: - self.train_sys.set_hash(self.get_hash()) - training_hash = self.train_sys.get_hash() - if training_hash != "": - self._hash = hash(self._hash + training_hash) - - predlabel_hash = "" - if self.pred_sys is not None: - self.pred_sys.set_hash(self.get_hash()) - predlabel_hash += self.pred_sys.get_hash() - if self.label_sys is not None: - self.label_sys.set_hash(self.get_hash()) - predlabel_hash += self.label_sys.get_hash() - - if predlabel_hash != "": - self._hash = hash(predlabel_hash) diff --git a/epochlib/core/training_system.py b/epochlib/core/training_system.py new file mode 100644 index 0000000..eb6fea8 --- /dev/null +++ b/epochlib/core/training_system.py @@ -0,0 +1,113 @@ +"""This module contains the training system class.""" + +import warnings +from typing import Any + + +from .sequential_system import SequentialSystem +from .types import TrainType + + +class TrainingSystem(TrainType, SequentialSystem): + """A system that trains on the input data and labels. + + Parameters: + - steps (list[TrainType]): The steps in the system. + + Methods: + .. code-block:: python + def train(self, x: Any, y: Any, **train_args: Any) -> tuple[Any, Any]: # Train the system. + + def predict(self, x: Any, **pred_args: Any) -> Any: # Predict the output of the system. + + def get_hash(self) -> str: + # Get the hash of the block. + + def get_parent(self) -> Any: + # Get the parent of the block. + + def get_children(self) -> list[Any]: + # Get the children of the block + + def save_to_html(self, file_path: Path) -> None: + # Save html format to file_path + + Usage: + .. code-block:: python + from epochlib.pipeline import TrainingSystem + + trainer_1 = CustomTrainer() + trainer_2 = CustomTrainer() + + training_system = TrainingSystem(steps=[trainer_1, trainer_2]) + trained_x, trained_y = training_system.train(x, y) + predictions = training_system.predict(x) + """ + + def __post_init__(self) -> None: + """Post init method for the TrainingSystem class.""" + # Assert all steps are a subclass of Trainer + for step in self.steps: + if not isinstance( + step, + (TrainType), + ): + raise TypeError(f"step: {step} is not an instance of TrainType") + + super().__post_init__() + + def train(self, x: Any, y: Any, **train_args: Any) -> tuple[Any, Any]: + """Train the system. + + :param x: The input to the system. + :param y: The output of the system. + :return: The input and output of the system. + """ + set_of_steps = set() + for step in self.steps: + step_name = step.__class__.__name__ + set_of_steps.add(step_name) + + if set_of_steps != set(train_args.keys()): + # Raise a warning and print all the keys that do not match + warnings.warn(f"The following steps do not exist but were given in the kwargs: {set(train_args.keys()) - set_of_steps}", UserWarning, stacklevel=2) + + # Loop through each step and call the train method + for step in self.steps: + step_name = step.__class__.__name__ + + step_args = train_args.get(step_name, {}) + if isinstance(step, (TrainType)): + x, y = step.train(x, y, **step_args) + else: + raise TypeError(f"{step} is not an instance of TrainType") + + return x, y + + def predict(self, x: Any, **pred_args: Any) -> Any: + """Predict the output of the system. + + :param x: The input to the system. + :return: The output of the system. + """ + set_of_steps = set() + for step in self.steps: + step_name = step.__class__.__name__ + set_of_steps.add(step_name) + + if set_of_steps != set(pred_args.keys()): + # Raise a warning and print all the keys that do not match + warnings.warn(f"The following steps do not exist but were given in the kwargs: {set(pred_args.keys()) - set_of_steps}", UserWarning, stacklevel=2) + + # Loop through each step and call the predict method + for step in self.steps: + step_name = step.__class__.__name__ + + step_args = pred_args.get(step_name, {}) + + if isinstance(step, (TrainType)): + x = step.predict(x, **step_args) + else: + raise TypeError(f"{step} is not an instance of TrainType") + + return x diff --git a/tests/core/test_base.py b/tests/core/test_base.py index 1034e23..ce7500e 100644 --- a/tests/core/test_base.py +++ b/tests/core/test_base.py @@ -2,6 +2,7 @@ from tests.core.util import remove_cache_files from pathlib import Path + class Test_Base: def test_init(self): base = Base() diff --git a/tests/core/test_block.py b/tests/core/test_block.py index f7afb1a..685dc00 100644 --- a/tests/core/test_block.py +++ b/tests/core/test_block.py @@ -1,6 +1,4 @@ from epochlib.core import Block -from tests.core.util import remove_cache_files -from pathlib import Path class TestBlock: diff --git a/tests/core/test_parallel_system.py b/tests/core/test_parallel_system.py index 259a196..6487b1a 100644 --- a/tests/core/test_parallel_system.py +++ b/tests/core/test_parallel_system.py @@ -1,6 +1,4 @@ from epochlib.core import Block, ParallelSystem -from tests.core.util import remove_cache_files -from pathlib import Path class TestParallelSystem: diff --git a/tests/core/test_parallel_training_system.py b/tests/core/test_parallel_training_system.py new file mode 100644 index 0000000..c5bc32d --- /dev/null +++ b/tests/core/test_parallel_training_system.py @@ -0,0 +1,238 @@ +import pytest +from epochlib.core import Trainer, TrainingSystem, ParallelTrainingSystem +from epochlib.core import Transformer +import numpy as np + + +class TestParallelTrainingSystem: + def test_PTrainSys_init(self): + system = ParallelTrainingSystem() + + assert system is not None + + def test_PTrainSys_init_trainers(self): + t1 = Trainer() + t2 = TrainingSystem() + + system = ParallelTrainingSystem(steps=[t1, t2]) + + assert system is not None + + def test_PTrainSys_init_wrong_trainers(self): + class WrongTrainer: + """Wrong trainer""" + + t1 = WrongTrainer() + + with pytest.raises(TypeError): + ParallelTrainingSystem(steps=[t1]) + + def test_PTrainSys_train(self): + class trainer(Trainer): + def train(self, x, y): + return x, y + + class pts(ParallelTrainingSystem): + def concat(self, data1, data2, weight): + if data1 is None: + return data2 + + return data1 + data2 + + t1 = trainer() + + system = pts(steps=[t1]) + + assert system is not None + assert system.train([1, 2, 3], [1, 2, 3]) == ([1, 2, 3], [1, 2, 3]) + + def test_PTrainSys_trainers(self): + class trainer(Trainer): + def train(self, x, y): + return x, y + + class pts(ParallelTrainingSystem): + def concat(self, data1, data2, weight): + if data1 is None: + return data2 + return data1 + data2 + + t1 = trainer() + t2 = trainer() + + system = pts(steps=[t1, t2]) + + assert system is not None + assert system.train([1, 2, 3], [1, 2, 3]) == ( + [1, 2, 3, 1, 2, 3], + [1, 2, 3, 1, 2, 3], + ) + + def test_PTrainSys_trainers_with_weights(self): + class trainer(Trainer): + def train(self, x, y): + return x, y + + class trainer2(Trainer): + def train(self, x, y): + return x * 3, y + + class pts(ParallelTrainingSystem): + def concat(self, data1, data2, weight): + if data1 is None: + return data2 * weight + return data1 + data2 * weight + + t1 = trainer() + t2 = trainer2() + + system = pts(steps=[t1, t2]) + + assert system is not None + test = np.array([1, 2, 3]) + preds, labels = system.train(test, test) + assert np.array_equal(preds, test * 2) + assert np.array_equal(labels, test) + + def test_PTrainSys_predict(self): + class trainer(Trainer): + def predict(self, x): + return x + + class pts(ParallelTrainingSystem): + def concat(self, data1, data2, weight): + if data1 is None: + return data2 + return data1 + data2 + + t1 = trainer() + + system = pts(steps=[t1]) + + assert system is not None + assert system.predict([1, 2, 3]) == [1, 2, 3] + + def test_PTrainSys_predict_with_trainsys(self): + class trainer(Trainer): + def predict(self, x): + return x + + class pts(ParallelTrainingSystem): + def concat(self, data1, data2, weight): + if data1 is None: + return data2 + return data1 + data2 + + t1 = trainer() + t2 = TrainingSystem(steps=[t1]) + + system = pts(steps=[t2, t1]) + + assert system is not None + assert system.predict([1, 2, 3]) == [1, 2, 3, 1, 2, 3] + + def test_PTrainSys_predict_with_trainer_and_trainsys(self): + class trainer(Trainer): + def predict(self, x): + return x + + class pts(ParallelTrainingSystem): + def concat(self, data1, data2, weight): + if data1 is None: + return data2 + return data1 + data2 + + t1 = trainer() + t2 = trainer() + t3 = TrainingSystem(steps=[t1, t2]) + + system = pts(steps=[t1, t2, t3]) + + assert system is not None + assert t3.predict([1, 2, 3]) == [1, 2, 3] + assert system.predict([1, 2, 3]) == [1, 2, 3, 1, 2, 3, 1, 2, 3] + + def test_PTrainSys_predictors(self): + class trainer(Trainer): + def predict(self, x): + return x + + class pts(ParallelTrainingSystem): + def concat(self, data1, data2, weight): + if data1 is None: + return data2 + return data1 + data2 + + t1 = trainer() + t2 = trainer() + + system = pts(steps=[t1, t2]) + + assert system is not None + assert system.predict([1, 2, 3]) == [1, 2, 3, 1, 2, 3] + + def test_PTrainSys_concat_labels_throws_error(self): + system = ParallelTrainingSystem() + + with pytest.raises(NotImplementedError): + system.concat_labels([1, 2, 3], [4, 5, 6]) + + def test_PTrainSys_step_1_changed(self): + system = ParallelTrainingSystem() + + t1 = Transformer() + system.steps = [t1] + + with pytest.raises(TypeError): + system.train([1, 2, 3], [1, 2, 3]) + + with pytest.raises(TypeError): + system.predict([1, 2, 3]) + + def test_PTrainSys_step_2_changed(self): + class pts(ParallelTrainingSystem): + def concat(self, data1, data2, weight): + if data1 is None: + return data2 + + return data1 + data2 + + system = pts() + + class trainer(Trainer): + def train(self, x, y): + return x, y + + def predict(self, x): + return x + + t1 = trainer() + t2 = Transformer() + system.steps = [t1, t2] + + with pytest.raises(TypeError): + system.train([1, 2, 3], [1, 2, 3]) + + with pytest.raises(TypeError): + system.predict([1, 2, 3]) + + def test_train_parallel_hashes(self): + class SubTrainer1(Trainer): + def train(self, x, y): + return x, y + + class SubTrainer2(Trainer): + def train(self, x, y): + return x * 2, y + + block1 = SubTrainer1() + block2 = SubTrainer2() + + system1 = ParallelTrainingSystem(steps=[block1, block2]) + system1_copy = ParallelTrainingSystem(steps=[block1, block2]) + system2 = ParallelTrainingSystem(steps=[block2, block1]) + system2_copy = ParallelTrainingSystem(steps=[block2, block1]) + + assert system1.get_hash() == system2.get_hash() + assert system1.get_hash() == system1_copy.get_hash() + assert system2.get_hash() == system2_copy.get_hash() diff --git a/tests/core/test_pipeline.py b/tests/core/test_pipeline.py new file mode 100644 index 0000000..a74be6a --- /dev/null +++ b/tests/core/test_pipeline.py @@ -0,0 +1,208 @@ +from epochlib.core import TrainingSystem, Pipeline +from epochlib.core import Transformer, TransformingSystem +import numpy as np + + +class TestPipeline: + def test_pipeline_init(self): + pipeline = Pipeline() + assert pipeline is not None + + def test_pipeline_init_with_systems(self): + x_system = TransformingSystem() + y_system = TransformingSystem() + training_system = TrainingSystem() + prediction_system = TransformingSystem() + label_system = TransformingSystem() + pipeline = Pipeline( + x_sys=x_system, + y_sys=y_system, + train_sys=training_system, + pred_sys=prediction_system, + label_sys=label_system, + ) + assert pipeline is not None + + def test_pipeline_train(self): + x_system = TransformingSystem() + y_system = TransformingSystem() + training_system = TrainingSystem() + prediction_system = TransformingSystem() + label_system = TransformingSystem() + pipeline = Pipeline( + x_sys=x_system, + y_sys=y_system, + train_sys=training_system, + pred_sys=prediction_system, + label_sys=label_system, + ) + assert pipeline.train([1, 2, 3], [1, 2, 3]) == ([1, 2, 3], [1, 2, 3]) + + def test_pipeline_train_no_y_system(self): + x_system = TransformingSystem() + training_system = TrainingSystem() + prediction_system = TransformingSystem() + pipeline = Pipeline( + x_sys=x_system, + train_sys=training_system, + pred_sys=prediction_system, + ) + assert pipeline.train([1, 2, 3], [1, 2, 3]) == ([1, 2, 3], [1, 2, 3]) + + def test_pipeline_train_no_x_system(self): + y_system = TransformingSystem() + training_system = TrainingSystem() + prediction_system = TransformingSystem() + pipeline = Pipeline( + y_sys=y_system, + train_sys=training_system, + pred_sys=prediction_system, + ) + assert pipeline.train([1, 2, 3], [1, 2, 3]) == ([1, 2, 3], [1, 2, 3]) + + def test_pipeline_train_no_train_system(self): + x_system = TransformingSystem() + y_system = TransformingSystem() + post_system = TransformingSystem() + post_label_system = TransformingSystem() + pipeline = Pipeline( + x_sys=x_system, + y_sys=y_system, + train_sys=None, + pred_sys=post_system, + label_sys=post_label_system, + ) + assert pipeline.train([1, 2], [1, 2]) == ([1, 2], [1, 2]) + + def test_pipeline_train_no_refining_system(self): + x_system = TransformingSystem() + y_system = TransformingSystem() + training_system = TrainingSystem() + pipeline = Pipeline(x_sys=x_system, y_sys=y_system, train_sys=training_system) + assert pipeline.train([1, 2, 3], [1, 2, 3]) == ([1, 2, 3], [1, 2, 3]) + + def test_pipeline_train_1_x_transform_block(self): + class TransformingBlock(Transformer): + def transform(self, x): + return x * 2 + + transform1 = TransformingBlock() + x_system = TransformingSystem(steps=[transform1]) + y_system = TransformingSystem() + training_system = TrainingSystem() + prediction_system = TransformingSystem() + pipeline = Pipeline( + x_sys=x_system, + y_sys=y_system, + train_sys=training_system, + pred_sys=prediction_system, + ) + result = pipeline.train(np.array([1, 2, 3]), [1, 2, 3]) + assert np.array_equal(result[0], np.array([2, 4, 6])) and np.array_equal( + result[1], np.array([1, 2, 3]) + ) + + def test_pipeline_predict(self): + x_system = TransformingSystem() + y_system = TransformingSystem() + training_system = TrainingSystem() + prediction_system = TransformingSystem() + pipeline = Pipeline( + x_sys=x_system, + y_sys=y_system, + train_sys=training_system, + pred_sys=prediction_system, + ) + assert pipeline.predict([1, 2, 3]) == [1, 2, 3] + + def test_pipeline_predict_no_y_system(self): + x_system = TransformingSystem() + training_system = TrainingSystem() + prediction_system = TransformingSystem() + pipeline = Pipeline( + x_sys=x_system, + train_sys=training_system, + pred_sys=prediction_system, + ) + assert pipeline.predict([1, 2, 3]) == [1, 2, 3] + + def test_pipeline_predict_no_systems(self): + pipeline = Pipeline() + assert pipeline.predict([1, 2, 3]) == [1, 2, 3] + + def test_pipeline_get_hash_no_change(self): + x_system = TransformingSystem() + y_system = TransformingSystem() + training_system = TrainingSystem() + predicting_system = TransformingSystem() + pipeline = Pipeline( + x_sys=x_system, + y_sys=y_system, + train_sys=training_system, + pred_sys=predicting_system, + ) + assert x_system.get_hash() == "" + + def test_pipeline_get_hash_with_change(self): + class TransformingBlock(Transformer): + def transform(self, x): + return x * 2 + + transform1 = TransformingBlock() + x_system = TransformingSystem(steps=[transform1]) + y_system = TransformingSystem() + training_system = TrainingSystem() + prediction_system = TransformingSystem() + pipeline = Pipeline( + x_sys=x_system, + y_sys=y_system, + train_sys=training_system, + pred_sys=prediction_system, + ) + assert x_system.get_hash() != y_system.get_hash() + assert pipeline.get_hash() != "" + + def test_pipeline_predict_system_hash(self): + class TransformingBlock(Transformer): + def transform(self, x): + return x * 2 + + transform1 = TransformingBlock() + x_system = TransformingSystem() + y_system = TransformingSystem() + training_system = TrainingSystem() + prediction_system = TransformingSystem(steps=[transform1]) + pipeline = Pipeline( + x_sys=x_system, + y_sys=y_system, + train_sys=training_system, + pred_sys=prediction_system, + ) + assert prediction_system.get_hash() != x_system.get_hash() + assert pipeline.get_hash() != "" + + def test_pipeline_pre_post_hash(self): + class TransformingBlock(Transformer): + def transform(self, x): + return x * 2 + + transform1 = TransformingBlock() + x_system = TransformingSystem(steps=[transform1]) + y_system = TransformingSystem() + training_system = TrainingSystem() + prediction_system = TransformingSystem(steps=[transform1]) + assert x_system.get_hash() == prediction_system.get_hash() + pipeline1 = Pipeline( + x_sys=x_system, + y_sys=y_system, + train_sys=training_system, + pred_sys=prediction_system, + ) + pipeline1_train_sys_hash = pipeline1.train_sys.get_hash() + pipeline2 = Pipeline( + x_sys=TransformingSystem(), + y_sys=y_system, + train_sys=training_system, + pred_sys=prediction_system, + ) + assert pipeline1_train_sys_hash != pipeline2.train_sys.get_hash() diff --git a/tests/core/test_sequential_system.py b/tests/core/test_sequential_system.py index 4adc8d1..8d05171 100644 --- a/tests/core/test_sequential_system.py +++ b/tests/core/test_sequential_system.py @@ -1,6 +1,4 @@ from epochlib.core import Block, SequentialSystem -from tests.core.util import remove_cache_files -from pathlib import Path class TestSequentialSystem: diff --git a/tests/core/test_trainer.py b/tests/core/test_trainer.py new file mode 100644 index 0000000..25d97aa --- /dev/null +++ b/tests/core/test_trainer.py @@ -0,0 +1,35 @@ +import pytest +from epochlib.core import Trainer + + +class TestTrainer: + def test_trainer_abstract_train(self): + trainer = Trainer() + with pytest.raises(NotImplementedError): + trainer.train([1, 2, 3], [1, 2, 3]) + + def test_trainer_abstract_predict(self): + trainer = Trainer() + with pytest.raises(NotImplementedError): + trainer.predict([1, 2, 3]) + + def test_trainer_train(self): + class trainerInstance(Trainer): + def train(self, x, y): + return x, y + + trainer = trainerInstance() + assert trainer.train([1, 2, 3], [1, 2, 3]) == ([1, 2, 3], [1, 2, 3]) + + def test_trainer_predict(self): + class trainerInstance(Trainer): + def predict(self, x): + return x + + trainer = trainerInstance() + assert trainer.predict([1, 2, 3]) == [1, 2, 3] + + def test_trainer_hash(self): + trainer = Trainer() + assert trainer.get_hash() != "" + diff --git a/tests/core/test_training.py b/tests/core/test_training.py deleted file mode 100644 index 151118a..0000000 --- a/tests/core/test_training.py +++ /dev/null @@ -1,617 +0,0 @@ -import pytest -import warnings -from epochlib.core import Trainer, TrainingSystem, ParallelTrainingSystem, Pipeline -from epochlib.core import Transformer, TransformingSystem -import numpy as np - - -class TestTrainer: - def test_trainer_abstract_train(self): - trainer = Trainer() - with pytest.raises(NotImplementedError): - trainer.train([1, 2, 3], [1, 2, 3]) - - def test_trainer_abstract_predict(self): - trainer = Trainer() - with pytest.raises(NotImplementedError): - trainer.predict([1, 2, 3]) - - def test_trainer_train(self): - class trainerInstance(Trainer): - def train(self, x, y): - return x, y - - trainer = trainerInstance() - assert trainer.train([1, 2, 3], [1, 2, 3]) == ([1, 2, 3], [1, 2, 3]) - - def test_trainer_predict(self): - class trainerInstance(Trainer): - def predict(self, x): - return x - - trainer = trainerInstance() - assert trainer.predict([1, 2, 3]) == [1, 2, 3] - - def test_trainer_hash(self): - trainer = Trainer() - assert trainer.get_hash() != "" - - -class TestTrainingSystem: - def test_training_system_init(self): - training_system = TrainingSystem() - assert training_system is not None - - def test_training_system_init_with_steps(self): - class SubTrainer(Trainer): - def predict(self, x): - return x - - block1 = SubTrainer() - training_system = TrainingSystem(steps=[block1]) - assert training_system is not None - - def test_training_system_wrong_step(self): - class SubTrainer: - def predict(self, x): - return x - - with pytest.raises(TypeError): - TrainingSystem(steps=[SubTrainer()]) - - def test_training_system_steps_changed_predict(self): - class SubTrainer: - def predict(self, x): - return x - - block1 = SubTrainer() - training_system = TrainingSystem() - training_system.steps = [block1] - with pytest.raises(TypeError): - training_system.predict([1, 2, 3]) - - def test_training_system_predict(self): - class SubTrainer(Trainer): - def predict(self, x): - return x - - block1 = SubTrainer() - training_system = TrainingSystem(steps=[block1]) - assert training_system.predict([1, 2, 3]) == [1, 2, 3] - - def test_trainsys_predict_with_trainer_and_trainsys(self): - class SubTrainer(Trainer): - def predict(self, x): - return x - - block1 = SubTrainer() - block2 = SubTrainer() - block3 = TrainingSystem(steps=[block1, block2]) - assert block2.get_parent() == block3 - assert block1 in block3.get_children() - training_system = TrainingSystem(steps=[block1, block2, block3]) - assert training_system.predict([1, 2, 3]) == [1, 2, 3] - - def test_training_system_train(self): - class SubTrainer(Trainer): - def train(self, x, y): - return x, y - - block1 = SubTrainer() - training_system = TrainingSystem(steps=[block1]) - assert training_system.train([1, 2, 3], [1, 2, 3]) == ([1, 2, 3], [1, 2, 3]) - - def test_traiinsys_train_with_trainer_and_trainsys(self): - class SubTrainer(Trainer): - def train(self, x, y): - return x, y - - block1 = SubTrainer() - block2 = SubTrainer() - block3 = TrainingSystem(steps=[block1, block2]) - training_system = TrainingSystem(steps=[block1, block2, block3]) - assert training_system.train([1, 2, 3], [1, 2, 3]) == ([1, 2, 3], [1, 2, 3]) - - def test_training_system_steps_changed_train(self): - class SubTrainer: - def train(self, x, y): - return x, y - - block1 = SubTrainer() - training_system = TrainingSystem() - training_system.steps = [block1] - with pytest.raises(TypeError): - training_system.train([1, 2, 3], [1, 2, 3]) - - def test_training_system_empty_hash(self): - training_system = TrainingSystem() - assert training_system.get_hash() == "" - - def test_training_system_wrong_kwargs(self): - class Block1(Trainer): - def train(self, x, y, **kwargs): - return x, y - - def predict(self, x, **pred_args): - return x - - class Block2(Trainer): - def train(self, x, y, **kwargs): - return x, y - - def predict(self, x, **pred_args): - return x - - block1 = Block1() - block2 = Block2() - system = TrainingSystem(steps=[block1, block2]) - kwargs = {"Block1": {}, "block2": {}} - with pytest.warns( - UserWarning, - match="The following steps do not exist but were given in the kwargs:", - ): - system.train([1, 2, 3], [1, 2, 3], **kwargs) - system.predict([1, 2, 3], **kwargs) - - def test_training_system_right_kwargs(self): - class Block1(Trainer): - def train(self, x, y, **kwargs): - return x, y - - def predict(self, x, **pred_args): - return x - - class Block2(Trainer): - def train(self, x, y, **kwargs): - return x, y - - def predict(self, x, **pred_args): - return x - - block1 = Block1() - block2 = Block2() - system = TrainingSystem(steps=[block1, block2]) - kwargs = {"Block1": {}, "Block2": {}} - with warnings.catch_warnings(record=True) as caught_warnings: - system.train([1, 2, 3], [1, 2, 3], **kwargs) - system.predict([1, 2, 3], **kwargs) - assert not caught_warnings - - -class TestParallelTrainingSystem: - def test_PTrainSys_init(self): - system = ParallelTrainingSystem() - - assert system is not None - - def test_PTrainSys_init_trainers(self): - t1 = Trainer() - t2 = TrainingSystem() - - system = ParallelTrainingSystem(steps=[t1, t2]) - - assert system is not None - - def test_PTrainSys_init_wrong_trainers(self): - class WrongTrainer: - """Wrong trainer""" - - t1 = WrongTrainer() - - with pytest.raises(TypeError): - ParallelTrainingSystem(steps=[t1]) - - def test_PTrainSys_train(self): - class trainer(Trainer): - def train(self, x, y): - return x, y - - class pts(ParallelTrainingSystem): - def concat(self, data1, data2, weight): - if data1 is None: - return data2 - - return data1 + data2 - - t1 = trainer() - - system = pts(steps=[t1]) - - assert system is not None - assert system.train([1, 2, 3], [1, 2, 3]) == ([1, 2, 3], [1, 2, 3]) - - def test_PTrainSys_trainers(self): - class trainer(Trainer): - def train(self, x, y): - return x, y - - class pts(ParallelTrainingSystem): - def concat(self, data1, data2, weight): - if data1 is None: - return data2 - return data1 + data2 - - t1 = trainer() - t2 = trainer() - - system = pts(steps=[t1, t2]) - - assert system is not None - assert system.train([1, 2, 3], [1, 2, 3]) == ( - [1, 2, 3, 1, 2, 3], - [1, 2, 3, 1, 2, 3], - ) - - def test_PTrainSys_trainers_with_weights(self): - class trainer(Trainer): - def train(self, x, y): - return x, y - - class trainer2(Trainer): - def train(self, x, y): - return x * 3, y - - class pts(ParallelTrainingSystem): - def concat(self, data1, data2, weight): - if data1 is None: - return data2 * weight - return data1 + data2 * weight - - t1 = trainer() - t2 = trainer2() - - system = pts(steps=[t1, t2]) - - assert system is not None - test = np.array([1, 2, 3]) - preds, labels = system.train(test, test) - assert np.array_equal(preds, test * 2) - assert np.array_equal(labels, test) - - def test_PTrainSys_predict(self): - class trainer(Trainer): - def predict(self, x): - return x - - class pts(ParallelTrainingSystem): - def concat(self, data1, data2, weight): - if data1 is None: - return data2 - return data1 + data2 - - t1 = trainer() - - system = pts(steps=[t1]) - - assert system is not None - assert system.predict([1, 2, 3]) == [1, 2, 3] - - def test_PTrainSys_predict_with_trainsys(self): - class trainer(Trainer): - def predict(self, x): - return x - - class pts(ParallelTrainingSystem): - def concat(self, data1, data2, weight): - if data1 is None: - return data2 - return data1 + data2 - - t1 = trainer() - t2 = TrainingSystem(steps=[t1]) - - system = pts(steps=[t2, t1]) - - assert system is not None - assert system.predict([1, 2, 3]) == [1, 2, 3, 1, 2, 3] - - def test_PTrainSys_predict_with_trainer_and_trainsys(self): - class trainer(Trainer): - def predict(self, x): - return x - - class pts(ParallelTrainingSystem): - def concat(self, data1, data2, weight): - if data1 is None: - return data2 - return data1 + data2 - - t1 = trainer() - t2 = trainer() - t3 = TrainingSystem(steps=[t1, t2]) - - system = pts(steps=[t1, t2, t3]) - - assert system is not None - assert t3.predict([1, 2, 3]) == [1, 2, 3] - assert system.predict([1, 2, 3]) == [1, 2, 3, 1, 2, 3, 1, 2, 3] - - def test_PTrainSys_predictors(self): - class trainer(Trainer): - def predict(self, x): - return x - - class pts(ParallelTrainingSystem): - def concat(self, data1, data2, weight): - if data1 is None: - return data2 - return data1 + data2 - - t1 = trainer() - t2 = trainer() - - system = pts(steps=[t1, t2]) - - assert system is not None - assert system.predict([1, 2, 3]) == [1, 2, 3, 1, 2, 3] - - def test_PTrainSys_concat_labels_throws_error(self): - system = ParallelTrainingSystem() - - with pytest.raises(NotImplementedError): - system.concat_labels([1, 2, 3], [4, 5, 6]) - - def test_PTrainSys_step_1_changed(self): - system = ParallelTrainingSystem() - - t1 = Transformer() - system.steps = [t1] - - with pytest.raises(TypeError): - system.train([1, 2, 3], [1, 2, 3]) - - with pytest.raises(TypeError): - system.predict([1, 2, 3]) - - def test_PTrainSys_step_2_changed(self): - class pts(ParallelTrainingSystem): - def concat(self, data1, data2, weight): - if data1 is None: - return data2 - - return data1 + data2 - - system = pts() - - class trainer(Trainer): - def train(self, x, y): - return x, y - - def predict(self, x): - return x - - t1 = trainer() - t2 = Transformer() - system.steps = [t1, t2] - - with pytest.raises(TypeError): - system.train([1, 2, 3], [1, 2, 3]) - - with pytest.raises(TypeError): - system.predict([1, 2, 3]) - - def test_train_parallel_hashes(self): - class SubTrainer1(Trainer): - def train(self, x, y): - return x, y - - class SubTrainer2(Trainer): - def train(self, x, y): - return x * 2, y - - block1 = SubTrainer1() - block2 = SubTrainer2() - - system1 = ParallelTrainingSystem(steps=[block1, block2]) - system1_copy = ParallelTrainingSystem(steps=[block1, block2]) - system2 = ParallelTrainingSystem(steps=[block2, block1]) - system2_copy = ParallelTrainingSystem(steps=[block2, block1]) - - assert system1.get_hash() == system2.get_hash() - assert system1.get_hash() == system1_copy.get_hash() - assert system2.get_hash() == system2_copy.get_hash() - - -class TestPipeline: - def test_pipeline_init(self): - pipeline = Pipeline() - assert pipeline is not None - - def test_pipeline_init_with_systems(self): - x_system = TransformingSystem() - y_system = TransformingSystem() - training_system = TrainingSystem() - prediction_system = TransformingSystem() - label_system = TransformingSystem() - pipeline = Pipeline( - x_sys=x_system, - y_sys=y_system, - train_sys=training_system, - pred_sys=prediction_system, - label_sys=label_system, - ) - assert pipeline is not None - - def test_pipeline_train(self): - x_system = TransformingSystem() - y_system = TransformingSystem() - training_system = TrainingSystem() - prediction_system = TransformingSystem() - label_system = TransformingSystem() - pipeline = Pipeline( - x_sys=x_system, - y_sys=y_system, - train_sys=training_system, - pred_sys=prediction_system, - label_sys=label_system, - ) - assert pipeline.train([1, 2, 3], [1, 2, 3]) == ([1, 2, 3], [1, 2, 3]) - - def test_pipeline_train_no_y_system(self): - x_system = TransformingSystem() - training_system = TrainingSystem() - prediction_system = TransformingSystem() - pipeline = Pipeline( - x_sys=x_system, - train_sys=training_system, - pred_sys=prediction_system, - ) - assert pipeline.train([1, 2, 3], [1, 2, 3]) == ([1, 2, 3], [1, 2, 3]) - - def test_pipeline_train_no_x_system(self): - y_system = TransformingSystem() - training_system = TrainingSystem() - prediction_system = TransformingSystem() - pipeline = Pipeline( - y_sys=y_system, - train_sys=training_system, - pred_sys=prediction_system, - ) - assert pipeline.train([1, 2, 3], [1, 2, 3]) == ([1, 2, 3], [1, 2, 3]) - - def test_pipeline_train_no_train_system(self): - x_system = TransformingSystem() - y_system = TransformingSystem() - post_system = TransformingSystem() - post_label_system = TransformingSystem() - pipeline = Pipeline( - x_sys=x_system, - y_sys=y_system, - train_sys=None, - pred_sys=post_system, - label_sys=post_label_system, - ) - assert pipeline.train([1, 2], [1, 2]) == ([1, 2], [1, 2]) - - def test_pipeline_train_no_refining_system(self): - x_system = TransformingSystem() - y_system = TransformingSystem() - training_system = TrainingSystem() - pipeline = Pipeline(x_sys=x_system, y_sys=y_system, train_sys=training_system) - assert pipeline.train([1, 2, 3], [1, 2, 3]) == ([1, 2, 3], [1, 2, 3]) - - def test_pipeline_train_1_x_transform_block(self): - class TransformingBlock(Transformer): - def transform(self, x): - return x * 2 - - transform1 = TransformingBlock() - x_system = TransformingSystem(steps=[transform1]) - y_system = TransformingSystem() - training_system = TrainingSystem() - prediction_system = TransformingSystem() - pipeline = Pipeline( - x_sys=x_system, - y_sys=y_system, - train_sys=training_system, - pred_sys=prediction_system, - ) - result = pipeline.train(np.array([1, 2, 3]), [1, 2, 3]) - assert np.array_equal(result[0], np.array([2, 4, 6])) and np.array_equal( - result[1], np.array([1, 2, 3]) - ) - - def test_pipeline_predict(self): - x_system = TransformingSystem() - y_system = TransformingSystem() - training_system = TrainingSystem() - prediction_system = TransformingSystem() - pipeline = Pipeline( - x_sys=x_system, - y_sys=y_system, - train_sys=training_system, - pred_sys=prediction_system, - ) - assert pipeline.predict([1, 2, 3]) == [1, 2, 3] - - def test_pipeline_predict_no_y_system(self): - x_system = TransformingSystem() - training_system = TrainingSystem() - prediction_system = TransformingSystem() - pipeline = Pipeline( - x_sys=x_system, - train_sys=training_system, - pred_sys=prediction_system, - ) - assert pipeline.predict([1, 2, 3]) == [1, 2, 3] - - def test_pipeline_predict_no_systems(self): - pipeline = Pipeline() - assert pipeline.predict([1, 2, 3]) == [1, 2, 3] - - def test_pipeline_get_hash_no_change(self): - x_system = TransformingSystem() - y_system = TransformingSystem() - training_system = TrainingSystem() - predicting_system = TransformingSystem() - pipeline = Pipeline( - x_sys=x_system, - y_sys=y_system, - train_sys=training_system, - pred_sys=predicting_system, - ) - assert x_system.get_hash() == "" - - def test_pipeline_get_hash_with_change(self): - class TransformingBlock(Transformer): - def transform(self, x): - return x * 2 - - transform1 = TransformingBlock() - x_system = TransformingSystem(steps=[transform1]) - y_system = TransformingSystem() - training_system = TrainingSystem() - prediction_system = TransformingSystem() - pipeline = Pipeline( - x_sys=x_system, - y_sys=y_system, - train_sys=training_system, - pred_sys=prediction_system, - ) - assert x_system.get_hash() != y_system.get_hash() - assert pipeline.get_hash() != "" - - def test_pipeline_predict_system_hash(self): - class TransformingBlock(Transformer): - def transform(self, x): - return x * 2 - - transform1 = TransformingBlock() - x_system = TransformingSystem() - y_system = TransformingSystem() - training_system = TrainingSystem() - prediction_system = TransformingSystem(steps=[transform1]) - pipeline = Pipeline( - x_sys=x_system, - y_sys=y_system, - train_sys=training_system, - pred_sys=prediction_system, - ) - assert prediction_system.get_hash() != x_system.get_hash() - assert pipeline.get_hash() != "" - - def test_pipeline_pre_post_hash(self): - class TransformingBlock(Transformer): - def transform(self, x): - return x * 2 - - transform1 = TransformingBlock() - x_system = TransformingSystem(steps=[transform1]) - y_system = TransformingSystem() - training_system = TrainingSystem() - prediction_system = TransformingSystem(steps=[transform1]) - assert x_system.get_hash() == prediction_system.get_hash() - pipeline1 = Pipeline( - x_sys=x_system, - y_sys=y_system, - train_sys=training_system, - pred_sys=prediction_system, - ) - pipeline1_train_sys_hash = pipeline1.train_sys.get_hash() - pipeline2 = Pipeline( - x_sys=TransformingSystem(), - y_sys=y_system, - train_sys=training_system, - pred_sys=prediction_system, - ) - assert pipeline1_train_sys_hash != pipeline2.train_sys.get_hash() diff --git a/tests/core/test_training_system.py b/tests/core/test_training_system.py new file mode 100644 index 0000000..912fc4b --- /dev/null +++ b/tests/core/test_training_system.py @@ -0,0 +1,146 @@ +import pytest +import warnings +from epochlib.core import Trainer, TrainingSystem + + +class TestTrainingSystem: + def test_training_system_init(self): + training_system = TrainingSystem() + assert training_system is not None + + def test_training_system_init_with_steps(self): + class SubTrainer(Trainer): + def predict(self, x): + return x + + block1 = SubTrainer() + training_system = TrainingSystem(steps=[block1]) + assert training_system is not None + + def test_training_system_wrong_step(self): + class SubTrainer: + def predict(self, x): + return x + + with pytest.raises(TypeError): + TrainingSystem(steps=[SubTrainer()]) + + def test_training_system_steps_changed_predict(self): + class SubTrainer: + def predict(self, x): + return x + + block1 = SubTrainer() + training_system = TrainingSystem() + training_system.steps = [block1] + with pytest.raises(TypeError): + training_system.predict([1, 2, 3]) + + def test_training_system_predict(self): + class SubTrainer(Trainer): + def predict(self, x): + return x + + block1 = SubTrainer() + training_system = TrainingSystem(steps=[block1]) + assert training_system.predict([1, 2, 3]) == [1, 2, 3] + + def test_trainsys_predict_with_trainer_and_trainsys(self): + class SubTrainer(Trainer): + def predict(self, x): + return x + + block1 = SubTrainer() + block2 = SubTrainer() + block3 = TrainingSystem(steps=[block1, block2]) + assert block2.get_parent() == block3 + assert block1 in block3.get_children() + training_system = TrainingSystem(steps=[block1, block2, block3]) + assert training_system.predict([1, 2, 3]) == [1, 2, 3] + + def test_training_system_train(self): + class SubTrainer(Trainer): + def train(self, x, y): + return x, y + + block1 = SubTrainer() + training_system = TrainingSystem(steps=[block1]) + assert training_system.train([1, 2, 3], [1, 2, 3]) == ([1, 2, 3], [1, 2, 3]) + + def test_traiinsys_train_with_trainer_and_trainsys(self): + class SubTrainer(Trainer): + def train(self, x, y): + return x, y + + block1 = SubTrainer() + block2 = SubTrainer() + block3 = TrainingSystem(steps=[block1, block2]) + training_system = TrainingSystem(steps=[block1, block2, block3]) + assert training_system.train([1, 2, 3], [1, 2, 3]) == ([1, 2, 3], [1, 2, 3]) + + def test_training_system_steps_changed_train(self): + class SubTrainer: + def train(self, x, y): + return x, y + + block1 = SubTrainer() + training_system = TrainingSystem() + training_system.steps = [block1] + with pytest.raises(TypeError): + training_system.train([1, 2, 3], [1, 2, 3]) + + def test_training_system_empty_hash(self): + training_system = TrainingSystem() + assert training_system.get_hash() == "" + + def test_training_system_wrong_kwargs(self): + class Block1(Trainer): + def train(self, x, y, **kwargs): + return x, y + + def predict(self, x, **pred_args): + return x + + class Block2(Trainer): + def train(self, x, y, **kwargs): + return x, y + + def predict(self, x, **pred_args): + return x + + block1 = Block1() + block2 = Block2() + system = TrainingSystem(steps=[block1, block2]) + kwargs = {"Block1": {}, "block2": {}} + with pytest.warns( + UserWarning, + match="The following steps do not exist but were given in the kwargs:", + ): + system.train([1, 2, 3], [1, 2, 3], **kwargs) + system.predict([1, 2, 3], **kwargs) + + def test_training_system_right_kwargs(self): + class Block1(Trainer): + def train(self, x, y, **kwargs): + return x, y + + def predict(self, x, **pred_args): + return x + + class Block2(Trainer): + def train(self, x, y, **kwargs): + return x, y + + def predict(self, x, **pred_args): + return x + + block1 = Block1() + block2 = Block2() + system = TrainingSystem(steps=[block1, block2]) + kwargs = {"Block1": {}, "Block2": {}} + with warnings.catch_warnings(record=True) as caught_warnings: + system.train([1, 2, 3], [1, 2, 3], **kwargs) + system.predict([1, 2, 3], **kwargs) + assert not caught_warnings + + From df8e6e15c74b840741820a0c9199629bc4cd972f Mon Sep 17 00:00:00 2001 From: JasperVS Date: Thu, 13 Mar 2025 18:11:24 +0100 Subject: [PATCH 5/6] Refactor transforming --- epochlib/core/__init__.py | 4 +- epochlib/core/parallel_transforming_system.py | 85 ++++++++ epochlib/core/pipeline.py | 2 +- epochlib/core/transformer.py | 41 ++++ epochlib/core/transforming.py | 197 ------------------ epochlib/core/transforming_system.py | 79 +++++++ .../core/test_parallel_transforming_system.py | 121 +++++++++++ tests/core/test_transformer.py | 24 +++ ...forming.py => test_transforming_system.py} | 146 +------------ 9 files changed, 355 insertions(+), 344 deletions(-) create mode 100644 epochlib/core/parallel_transforming_system.py create mode 100644 epochlib/core/transformer.py delete mode 100644 epochlib/core/transforming.py create mode 100644 epochlib/core/transforming_system.py create mode 100644 tests/core/test_parallel_transforming_system.py create mode 100644 tests/core/test_transformer.py rename tests/core/{test_transforming.py => test_transforming_system.py} (58%) diff --git a/epochlib/core/__init__.py b/epochlib/core/__init__.py index e9c5e0f..c487db4 100644 --- a/epochlib/core/__init__.py +++ b/epochlib/core/__init__.py @@ -6,7 +6,9 @@ from .sequential_system import SequentialSystem from .trainer import Trainer from .training_system import TrainingSystem -from .transforming import ParallelTransformingSystem, Transformer, TransformingSystem +from .parallel_transforming_system import ParallelTransformingSystem +from .transformer import Transformer +from .transforming_system import TransformingSystem from .parallel_training_system import ParallelTrainingSystem from .pipeline import Pipeline diff --git a/epochlib/core/parallel_transforming_system.py b/epochlib/core/parallel_transforming_system.py new file mode 100644 index 0000000..a9323fc --- /dev/null +++ b/epochlib/core/parallel_transforming_system.py @@ -0,0 +1,85 @@ +"""This module contains the parallel transforming system class.""" + +import copy +from typing import Any + +from .parallel_system import ParallelSystem +from .types import TransformType + + +class ParallelTransformingSystem(TransformType, ParallelSystem): + """A system that transforms the input data in parallel. + + Parameters: + - steps (list[Transformer | TransformingSystem | ParallelTransformingSystem]): The steps in the system. + - weights (list[float]): Weights of steps in system, if not specified they are all equal. + + Methods: + .. code-block:: python + @abstractmethod + def concat(self, original_data: Any), data_to_concat: Any, weight: float = 1.0) -> Any: + # Specifies how to concat data after parallel computations + + def get_hash(self) -> str: + # Get the hash of the ParallelTransformingSystem. + + def get_parent(self) -> Any: + # Get the parent of the ParallelTransformingSystem. + + def get_children(self) -> list[Any]: + # Get the children of the ParallelTransformingSystem + + def save_to_html(self, file_path: Path) -> None: + # Save html format to file_path + + Usage: + .. code-block:: python + from epochlib.pipeline import ParallelTransformingSystem + + transformer_1 = CustomTransformer() + transformer_2 = CustomTransformer() + + + class CustomParallelTransformingSystem(ParallelTransformingSystem): + def concat(self, data1: Any, data2: Any) -> Any: + # Concatenate the transformed data. + return data1 + data2 + + + transforming_system = CustomParallelTransformingSystem(steps=[transformer_1, transformer_2]) + + transformed_data = transforming_system.transform(data) + """ + + def __post_init__(self) -> None: + """Post init method for the ParallelTransformingSystem class.""" + # Assert all steps are a subclass of Transformer or TransformingSystem + for step in self.steps: + if not isinstance(step, (TransformType)): + raise TypeError(f"{step} is not an instance of TransformType") + + super().__post_init__() + + def transform(self, data: Any, **transform_args: Any) -> Any: + """Transform the input data. + + :param data: The input data. + :return: The transformed data. + """ + # Loop through each step and call the transform method + out_data = None + if len(self.get_steps()) == 0: + return data + + for i, step in enumerate(self.get_steps()): + step_name = step.__class__.__name__ + + step_args = transform_args.get(step_name, {}) + + if isinstance(step, (TransformType)): + step_data = step.transform(copy.deepcopy(data), **step_args) + out_data = self.concat(out_data, step_data, self.get_weights()[i]) + else: + raise TypeError(f"{step} is not an instance of TransformType") + + return out_data diff --git a/epochlib/core/pipeline.py b/epochlib/core/pipeline.py index 5932ce1..05eb15d 100644 --- a/epochlib/core/pipeline.py +++ b/epochlib/core/pipeline.py @@ -5,7 +5,7 @@ from joblib import hash -from .transforming import TransformingSystem +from .transforming_system import TransformingSystem from .types import TrainType from .trainer import Trainer from .training_system import TrainingSystem diff --git a/epochlib/core/transformer.py b/epochlib/core/transformer.py new file mode 100644 index 0000000..236005a --- /dev/null +++ b/epochlib/core/transformer.py @@ -0,0 +1,41 @@ +"""This module contains the transformer class.""" + +from .block import Block +from .types import TransformType + + +class Transformer(TransformType, Block): + """The transformer block transforms any data it could be x or y data. + + Methods: + .. code-block:: python + @abstractmethod + def transform(self, data: Any, **transform_args: Any) -> Any: + # Transform the input data. + + def get_hash(self) -> str: + # Get the hash of the Transformer + + def get_parent(self) -> Any: + # Get the parent of the Transformer + + def get_children(self) -> list[Any]: + # Get the children of the Transformer + + def save_to_html(self, file_path: Path) -> None: + # Save html format to file_path + + Usage: + .. code-block:: python + from epochlib.pipeline import Transformer + + + class MyTransformer(Transformer): + def transform(self, data: Any, **transform_args: Any) -> Any: + # Transform the input data. + return data + + + my_transformer = MyTransformer() + transformed_data = my_transformer.transform(data) + """ diff --git a/epochlib/core/transforming.py b/epochlib/core/transforming.py deleted file mode 100644 index 7f0fc61..0000000 --- a/epochlib/core/transforming.py +++ /dev/null @@ -1,197 +0,0 @@ -"""This module contains the classes for transforming data in the epochlib package.""" - -import copy -import warnings -from typing import Any - -from .block import Block -from .parallel_system import ParallelSystem -from .sequential_system import SequentialSystem -from .types import TransformType - - -class Transformer(TransformType, Block): - """The transformer block transforms any data it could be x or y data. - - Methods: - .. code-block:: python - @abstractmethod - def transform(self, data: Any, **transform_args: Any) -> Any: - # Transform the input data. - - def get_hash(self) -> str: - # Get the hash of the Transformer - - def get_parent(self) -> Any: - # Get the parent of the Transformer - - def get_children(self) -> list[Any]: - # Get the children of the Transformer - - def save_to_html(self, file_path: Path) -> None: - # Save html format to file_path - - Usage: - .. code-block:: python - from epochlib.pipeline import Transformer - - - class MyTransformer(Transformer): - def transform(self, data: Any, **transform_args: Any) -> Any: - # Transform the input data. - return data - - - my_transformer = MyTransformer() - transformed_data = my_transformer.transform(data) - """ - - -class TransformingSystem(TransformType, SequentialSystem): - """A system that transforms the input data. - - Parameters: - - steps (list[Transformer | TransformingSystem | ParallelTransformingSystem]): The steps in the system. - - Implements the following methods: - .. code-block:: python - def transform(self, data: Any, **transform_args: Any) -> Any: - # Transform the input data. - - def get_hash(self) -> str: - # Get the hash of the TransformingSystem - - def get_parent(self) -> Any: - # Get the parent of the TransformingSystem - - def get_children(self) -> list[Any]: - # Get the children of the TransformingSystem - - def save_to_html(self, file_path: Path) -> None: - # Save html format to file_path - - - Usage: - .. code-block:: python - from epochlib.pipeline import TransformingSystem - - transformer_1 = CustomTransformer() - transformer_2 = CustomTransformer() - - transforming_system = TransformingSystem(steps=[transformer_1, transformer_2]) - transformed_data = transforming_system.transform(data) - predictions = transforming_system.predict(data) - """ - - def __post_init__(self) -> None: - """Post init method for the TransformingSystem class.""" - # Assert all steps are a subclass of Transformer - for step in self.steps: - if not isinstance(step, (TransformType)): - raise TypeError(f"{step} is not an instance of TransformType") - - super().__post_init__() - - def transform(self, data: Any, **transform_args: Any) -> Any: - """Transform the input data. - - :param data: The input data. - :return: The transformed data. - """ - set_of_steps = set() - for step in self.steps: - step_name = step.__class__.__name__ - set_of_steps.add(step_name) - if set_of_steps != set(transform_args.keys()): - # Raise a warning and print all the keys that do not match - warnings.warn(f"The following steps do not exist but were given in the kwargs: {set(transform_args.keys()) - set_of_steps}", stacklevel=2) - - # Loop through each step and call the transform method - for step in self.steps: - step_name = step.__class__.__name__ - - step_args = transform_args.get(step_name, {}) - if isinstance(step, (TransformType)): - data = step.transform(data, **step_args) - else: - raise TypeError(f"{step} is not an instance of TransformType") - - return data - - -class ParallelTransformingSystem(TransformType, ParallelSystem): - """A system that transforms the input data in parallel. - - Parameters: - - steps (list[Transformer | TransformingSystem | ParallelTransformingSystem]): The steps in the system. - - weights (list[float]): Weights of steps in system, if not specified they are all equal. - - Methods: - .. code-block:: python - @abstractmethod - def concat(self, original_data: Any), data_to_concat: Any, weight: float = 1.0) -> Any: - # Specifies how to concat data after parallel computations - - def get_hash(self) -> str: - # Get the hash of the ParallelTransformingSystem. - - def get_parent(self) -> Any: - # Get the parent of the ParallelTransformingSystem. - - def get_children(self) -> list[Any]: - # Get the children of the ParallelTransformingSystem - - def save_to_html(self, file_path: Path) -> None: - # Save html format to file_path - - Usage: - .. code-block:: python - from epochlib.pipeline import ParallelTransformingSystem - - transformer_1 = CustomTransformer() - transformer_2 = CustomTransformer() - - - class CustomParallelTransformingSystem(ParallelTransformingSystem): - def concat(self, data1: Any, data2: Any) -> Any: - # Concatenate the transformed data. - return data1 + data2 - - - transforming_system = CustomParallelTransformingSystem(steps=[transformer_1, transformer_2]) - - transformed_data = transforming_system.transform(data) - """ - - def __post_init__(self) -> None: - """Post init method for the ParallelTransformingSystem class.""" - # Assert all steps are a subclass of Transformer or TransformingSystem - for step in self.steps: - if not isinstance(step, (TransformType)): - raise TypeError(f"{step} is not an instance of TransformType") - - super().__post_init__() - - def transform(self, data: Any, **transform_args: Any) -> Any: - """Transform the input data. - - :param data: The input data. - :return: The transformed data. - """ - # Loop through each step and call the transform method - out_data = None - if len(self.get_steps()) == 0: - return data - - for i, step in enumerate(self.get_steps()): - step_name = step.__class__.__name__ - - step_args = transform_args.get(step_name, {}) - - if isinstance(step, (TransformType)): - step_data = step.transform(copy.deepcopy(data), **step_args) - out_data = self.concat(out_data, step_data, self.get_weights()[i]) - else: - raise TypeError(f"{step} is not an instance of TransformType") - - return out_data diff --git a/epochlib/core/transforming_system.py b/epochlib/core/transforming_system.py new file mode 100644 index 0000000..53eac8c --- /dev/null +++ b/epochlib/core/transforming_system.py @@ -0,0 +1,79 @@ +"""This module contains the transforming system class.""" + +import warnings +from typing import Any + +from .sequential_system import SequentialSystem +from .types import TransformType + + +class TransformingSystem(TransformType, SequentialSystem): + """A system that transforms the input data. + + Parameters: + - steps (list[Transformer | TransformingSystem | ParallelTransformingSystem]): The steps in the system. + + Implements the following methods: + .. code-block:: python + def transform(self, data: Any, **transform_args: Any) -> Any: + # Transform the input data. + + def get_hash(self) -> str: + # Get the hash of the TransformingSystem + + def get_parent(self) -> Any: + # Get the parent of the TransformingSystem + + def get_children(self) -> list[Any]: + # Get the children of the TransformingSystem + + def save_to_html(self, file_path: Path) -> None: + # Save html format to file_path + + + Usage: + .. code-block:: python + from epochlib.pipeline import TransformingSystem + + transformer_1 = CustomTransformer() + transformer_2 = CustomTransformer() + + transforming_system = TransformingSystem(steps=[transformer_1, transformer_2]) + transformed_data = transforming_system.transform(data) + predictions = transforming_system.predict(data) + """ + + def __post_init__(self) -> None: + """Post init method for the TransformingSystem class.""" + # Assert all steps are a subclass of Transformer + for step in self.steps: + if not isinstance(step, (TransformType)): + raise TypeError(f"{step} is not an instance of TransformType") + + super().__post_init__() + + def transform(self, data: Any, **transform_args: Any) -> Any: + """Transform the input data. + + :param data: The input data. + :return: The transformed data. + """ + set_of_steps = set() + for step in self.steps: + step_name = step.__class__.__name__ + set_of_steps.add(step_name) + if set_of_steps != set(transform_args.keys()): + # Raise a warning and print all the keys that do not match + warnings.warn(f"The following steps do not exist but were given in the kwargs: {set(transform_args.keys()) - set_of_steps}", stacklevel=2) + + # Loop through each step and call the transform method + for step in self.steps: + step_name = step.__class__.__name__ + + step_args = transform_args.get(step_name, {}) + if isinstance(step, (TransformType)): + data = step.transform(data, **step_args) + else: + raise TypeError(f"{step} is not an instance of TransformType") + + return data diff --git a/tests/core/test_parallel_transforming_system.py b/tests/core/test_parallel_transforming_system.py new file mode 100644 index 0000000..0a2a00c --- /dev/null +++ b/tests/core/test_parallel_transforming_system.py @@ -0,0 +1,121 @@ +import pytest + +from epochlib.core import Trainer, Transformer, TransformingSystem, ParallelTransformingSystem + + +class TestParallelTransformingSystem: + def test_parallel_transforming_system(self): + # Create an instance of the system + system = ParallelTransformingSystem() + + # Assert the system is an instance of ParallelTransformingSystem + assert isinstance(system, ParallelTransformingSystem) + assert system is not None + + def test_parallel_transforming_system_wrong_step(self): + class SubTransformer: + def transform(self, x): + return x + + with pytest.raises(TypeError): + ParallelTransformingSystem(steps=[SubTransformer()]) + + def test_parallel_transforming_system_transformers(self): + transformer1 = Transformer() + transformer2 = TransformingSystem() + + system = ParallelTransformingSystem(steps=[transformer1, transformer2]) + assert system is not None + + def test_parallel_transforming_system_transform(self): + class transformer(Transformer): + def transform(self, data): + return data + + class pts(ParallelTransformingSystem): + def concat(self, data1, data2, weight): + if data1 is None: + return data2 + return data1 + data2 + + t1 = transformer() + + system = pts(steps=[t1]) + + assert system is not None + assert system.transform([1, 2, 3]) == [1, 2, 3] + + def test_pts_transformers_transform(self): + class transformer(Transformer): + def transform(self, data): + return data + + class pts(ParallelTransformingSystem): + def concat(self, data1, data2, weight): + if data1 is None: + return data2 + return data1 + data2 + + t1 = transformer() + t2 = transformer() + + system = pts(steps=[t1, t2]) + + assert system is not None + assert system.transform([1, 2, 3]) == [1, 2, 3, 1, 2, 3] + + def test_parallel_transforming_system_concat_throws_error(self): + system = ParallelTransformingSystem() + + with pytest.raises(NotImplementedError): + system.concat([1, 2, 3], [4, 5, 6]) + + def test_pts_step_1_changed(self): + system = ParallelTransformingSystem() + + t1 = Trainer() + system.steps = [t1] + + with pytest.raises(TypeError): + system.transform([1, 2, 3]) + + def test_pts_step_2_changed(self): + class pts(ParallelTransformingSystem): + def concat(self, data1, data2, weight): + if data1 is None: + return data2 + return data1 + data2 + + system = pts() + + class transformer(Transformer): + def transform(self, data): + return data + + t1 = transformer() + t2 = Trainer() + system.steps = [t1, t2] + + with pytest.raises(TypeError): + system.transform([1, 2, 3]) + + def test_transform_parallel_hashes(self): + class SubTransformer1(Transformer): + def transform(self, x): + return x + + class SubTransformer2(Transformer): + def transform(self, x): + return x * 2 + + block1 = SubTransformer1() + block2 = SubTransformer2() + + system1 = ParallelTransformingSystem(steps=[block1, block2]) + system1_copy = ParallelTransformingSystem(steps=[block1, block2]) + system2 = ParallelTransformingSystem(steps=[block2, block1]) + system2_copy = ParallelTransformingSystem(steps=[block2, block1]) + + assert system1.get_hash() == system2.get_hash() + assert system1.get_hash() == system1_copy.get_hash() + assert system2.get_hash() == system2_copy.get_hash() diff --git a/tests/core/test_transformer.py b/tests/core/test_transformer.py new file mode 100644 index 0000000..6fb8d2c --- /dev/null +++ b/tests/core/test_transformer.py @@ -0,0 +1,24 @@ +import pytest + +from epochlib.core import Transformer + + +class TestTransformer: + def test_transformer_abstract(self): + transformer = Transformer() + + with pytest.raises(NotImplementedError): + transformer.transform([1, 2, 3]) + + def test_transformer_transform(self): + class transformerInstance(Transformer): + def transform(self, data): + return data + + transformer = transformerInstance() + + assert transformer.transform([1, 2, 3]) == [1, 2, 3] + + def test_transformer_hash(self): + transformer = Transformer() + assert transformer.get_hash() == "1cbcc4f2d0921b050d9b719d2beb6529" diff --git a/tests/core/test_transforming.py b/tests/core/test_transforming_system.py similarity index 58% rename from tests/core/test_transforming.py rename to tests/core/test_transforming_system.py index 1e523da..355e1c0 100644 --- a/tests/core/test_transforming.py +++ b/tests/core/test_transforming_system.py @@ -2,33 +2,7 @@ import numpy as np import pytest -from epochlib.core import Trainer -from epochlib.core import ( - Transformer, - TransformingSystem, - ParallelTransformingSystem, -) - - -class TestTransformer: - def test_transformer_abstract(self): - transformer = Transformer() - - with pytest.raises(NotImplementedError): - transformer.transform([1, 2, 3]) - - def test_transformer_transform(self): - class transformerInstance(Transformer): - def transform(self, data): - return data - - transformer = transformerInstance() - - assert transformer.transform([1, 2, 3]) == [1, 2, 3] - - def test_transformer_hash(self): - transformer = Transformer() - assert transformer.get_hash() == "1cbcc4f2d0921b050d9b719d2beb6529" +from epochlib.core import Transformer, TransformingSystem class TestTransformingSystem: @@ -201,121 +175,3 @@ def transform(self, x, **kwargs): system.transform([1, 2, 3], **kwargs) assert not caught_warnings - - -class TestParallelTransformingSystem: - def test_parallel_transforming_system(self): - # Create an instance of the system - system = ParallelTransformingSystem() - - # Assert the system is an instance of ParallelTransformingSystem - assert isinstance(system, ParallelTransformingSystem) - assert system is not None - - def test_parallel_transforming_system_wrong_step(self): - class SubTransformer: - def transform(self, x): - return x - - with pytest.raises(TypeError): - ParallelTransformingSystem(steps=[SubTransformer()]) - - def test_parallel_transforming_system_transformers(self): - transformer1 = Transformer() - transformer2 = TransformingSystem() - - system = ParallelTransformingSystem(steps=[transformer1, transformer2]) - assert system is not None - - def test_parallel_transforming_system_transform(self): - class transformer(Transformer): - def transform(self, data): - return data - - class pts(ParallelTransformingSystem): - def concat(self, data1, data2, weight): - if data1 is None: - return data2 - return data1 + data2 - - t1 = transformer() - - system = pts(steps=[t1]) - - assert system is not None - assert system.transform([1, 2, 3]) == [1, 2, 3] - - def test_pts_transformers_transform(self): - class transformer(Transformer): - def transform(self, data): - return data - - class pts(ParallelTransformingSystem): - def concat(self, data1, data2, weight): - if data1 is None: - return data2 - return data1 + data2 - - t1 = transformer() - t2 = transformer() - - system = pts(steps=[t1, t2]) - - assert system is not None - assert system.transform([1, 2, 3]) == [1, 2, 3, 1, 2, 3] - - def test_parallel_transforming_system_concat_throws_error(self): - system = ParallelTransformingSystem() - - with pytest.raises(NotImplementedError): - system.concat([1, 2, 3], [4, 5, 6]) - - def test_pts_step_1_changed(self): - system = ParallelTransformingSystem() - - t1 = Trainer() - system.steps = [t1] - - with pytest.raises(TypeError): - system.transform([1, 2, 3]) - - def test_pts_step_2_changed(self): - class pts(ParallelTransformingSystem): - def concat(self, data1, data2, weight): - if data1 is None: - return data2 - return data1 + data2 - - system = pts() - - class transformer(Transformer): - def transform(self, data): - return data - - t1 = transformer() - t2 = Trainer() - system.steps = [t1, t2] - - with pytest.raises(TypeError): - system.transform([1, 2, 3]) - - def test_transform_parallel_hashes(self): - class SubTransformer1(Transformer): - def transform(self, x): - return x - - class SubTransformer2(Transformer): - def transform(self, x): - return x * 2 - - block1 = SubTransformer1() - block2 = SubTransformer2() - - system1 = ParallelTransformingSystem(steps=[block1, block2]) - system1_copy = ParallelTransformingSystem(steps=[block1, block2]) - system2 = ParallelTransformingSystem(steps=[block2, block1]) - system2_copy = ParallelTransformingSystem(steps=[block2, block1]) - - assert system1.get_hash() == system2.get_hash() - assert system1.get_hash() == system1_copy.get_hash() - assert system2.get_hash() == system2_copy.get_hash() From ea90e09901007ca690e90474cb7442c4fbafacbc Mon Sep 17 00:00:00 2001 From: JasperVS Date: Thu, 13 Mar 2025 18:29:17 +0100 Subject: [PATCH 6/6] Fix checkstyle --- epochlib/core/__init__.py | 6 +++--- epochlib/core/parallel_training_system.py | 1 - epochlib/core/pipeline.py | 6 +++--- epochlib/core/trainer.py | 1 + epochlib/core/training_system.py | 1 - 5 files changed, 7 insertions(+), 8 deletions(-) diff --git a/epochlib/core/__init__.py b/epochlib/core/__init__.py index c487db4..34d2229 100644 --- a/epochlib/core/__init__.py +++ b/epochlib/core/__init__.py @@ -3,14 +3,14 @@ from .base import Base from .block import Block from .parallel_system import ParallelSystem +from .parallel_training_system import ParallelTrainingSystem +from .parallel_transforming_system import ParallelTransformingSystem +from .pipeline import Pipeline from .sequential_system import SequentialSystem from .trainer import Trainer from .training_system import TrainingSystem -from .parallel_transforming_system import ParallelTransformingSystem from .transformer import Transformer from .transforming_system import TransformingSystem -from .parallel_training_system import ParallelTrainingSystem -from .pipeline import Pipeline __all__ = [ "TrainType", diff --git a/epochlib/core/parallel_training_system.py b/epochlib/core/parallel_training_system.py index 789c5db..756c332 100644 --- a/epochlib/core/parallel_training_system.py +++ b/epochlib/core/parallel_training_system.py @@ -110,4 +110,3 @@ def concat_labels(self, original_data: Any, data_to_concat: Any, weight: float = :return: The concatenated data. """ return self.concat(original_data, data_to_concat, weight) - diff --git a/epochlib/core/pipeline.py b/epochlib/core/pipeline.py index 05eb15d..ce38440 100644 --- a/epochlib/core/pipeline.py +++ b/epochlib/core/pipeline.py @@ -5,11 +5,11 @@ from joblib import hash -from .transforming_system import TransformingSystem -from .types import TrainType +from .parallel_training_system import ParallelTrainingSystem from .trainer import Trainer from .training_system import TrainingSystem -from .parallel_training_system import ParallelTrainingSystem +from .transforming_system import TransformingSystem +from .types import TrainType @dataclass diff --git a/epochlib/core/trainer.py b/epochlib/core/trainer.py index 9e9deae..a787469 100644 --- a/epochlib/core/trainer.py +++ b/epochlib/core/trainer.py @@ -1,4 +1,5 @@ """Module containing the Trainer class.""" + from .block import Block from .types import TrainType diff --git a/epochlib/core/training_system.py b/epochlib/core/training_system.py index eb6fea8..f5deb95 100644 --- a/epochlib/core/training_system.py +++ b/epochlib/core/training_system.py @@ -3,7 +3,6 @@ import warnings from typing import Any - from .sequential_system import SequentialSystem from .types import TrainType