Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -75,6 +75,11 @@ Documentation is generated using [Sphinx](https://www.sphinx-doc.org/en/master/)
To make the documentation yourself, run `make html` with `docs` as the working directory.
The documentation can then be found in `docs/_build/html/index.html`.

## Architectural guidelines

1. Generally 'one class = one file' unless they are very tightly coupled.
2. Use __init__.py to change import paths so they are shorter for the end user

## Maintainers

EpochLib is maintained by [Team Epoch](https://teamepoch.ai), based in the [Dream Hall](https://www.tudelft.nl/ddream) of the [Delft University of Technology](https://www.tudelft.nl/).
29 changes: 29 additions & 0 deletions epochlib/core/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
"""Core pipeline functionality for training and transforming data."""

from .base import Base
from .block import Block
from .parallel_system import ParallelSystem
from .parallel_training_system import ParallelTrainingSystem
from .parallel_transforming_system import ParallelTransformingSystem
from .pipeline import Pipeline
from .sequential_system import SequentialSystem
from .trainer import Trainer
from .training_system import TrainingSystem
from .transformer import Transformer
from .transforming_system import TransformingSystem

__all__ = [
"TrainType",
"Trainer",
"TrainingSystem",
"ParallelTrainingSystem",
"Pipeline",
"TransformType",
"Transformer",
"TransformingSystem",
"ParallelTransformingSystem",
"Base",
"SequentialSystem",
"ParallelSystem",
"Block",
]
107 changes: 107 additions & 0 deletions epochlib/core/base.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,107 @@
"""The base module contains the Base class."""

from dataclasses import dataclass
from pathlib import Path
from typing import Any, Sequence

from joblib import hash


@dataclass
class Base:
"""The Base class is the base class for all classes in the epochlib package.

Methods:
.. code-block:: python
def get_hash(self) -> str:
# Get the hash of base

def get_parent(self) -> Any:
# Get the parent of base.

def get_children(self) -> list[Any]:
# Get the children of base

def save_to_html(self, file_path: Path) -> None:
# Save html format to file_path
"""

def __post_init__(self) -> None:
"""Initialize the block."""
self.set_hash("")
self.set_parent(None)
self.set_children([])

def set_hash(self, prev_hash: str) -> None:
"""Set the hash of the block.

:param prev_hash: The hash of the previous block.
"""
self._hash = hash(prev_hash + str(self))

def get_hash(self) -> str:
"""Get the hash of the block.

:return: The hash of the block.
"""
return self._hash

def get_parent(self) -> Any:
"""Get the parent of the block.

:return: Parent of the block
"""
return self._parent

def get_children(self) -> Sequence[Any]:
"""Get the children of the block.

:return: Children of the block
"""
return self._children

def save_to_html(self, file_path: Path) -> None:
"""Write html representation of class to file.

:param file_path: File path to write to
"""
html = self._repr_html_()
with open(file_path, "w") as file:
file.write(html)

def set_parent(self, parent: Any) -> None:
"""Set the parent of the block.

:param parent: Parent of the block
"""
self._parent = parent

def set_children(self, children: Sequence[Any]) -> None:
"""Set the children of the block.

:param children: Children of the block
"""
self._children = children

def _repr_html_(self) -> str:
"""Return representation of class in html format.

:return: String representation of html
"""
html = "<div style='border: 1px solid black; padding: 10px;'>"
html += f"<p><strong>Class:</strong> {self.__class__.__name__}</p>"
html += "<ul>"
html += f"<li><strong>Hash:</strong> {self.get_hash()}</li>"
html += f"<li><strong>Parent:</strong> {self.get_parent()}</li>"
html += "<li><strong>Children:</strong> "
if self.get_children():
html += "<ul>"
for child in self.get_children():
html += f"<li>{child._repr_html_()}</li>"
html += "</ul>"
else:
html += "None"
html += "</li>"
html += "</ul>"
html += "</div>"
return html
22 changes: 22 additions & 0 deletions epochlib/core/block.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
"""Module for the block class."""

from .base import Base


class Block(Base):
"""The Block class is the base class for all blocks.

Methods:
.. code-block:: python
def get_hash(self) -> str:
# Get the hash of the block.

def get_parent(self) -> Any:
# Get the parent of the block.

def get_children(self) -> list[Any]:
# Get the children of the block

def save_to_html(self, file_path: Path) -> None:
# Save html format to file_path
"""
113 changes: 113 additions & 0 deletions epochlib/core/parallel_system.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,113 @@
"""This module contains the ParallelSystem class."""

from abc import abstractmethod
from dataclasses import dataclass, field
from typing import Any

from joblib import hash

from .base import Base


@dataclass
class ParallelSystem(Base):
"""The System class is the base class for all systems.

Parameters:
- steps (list[_Base]): The steps in the system.
- weights (list[float]): Weights of steps in the system, if not specified they are equal.

Methods:
.. code-block:: python
@abstractmethod
def concat(self, original_data: Any), data_to_concat: Any, weight: float = 1.0) -> Any:
# Specifies how to concat data after parallel computations

def get_hash(self) -> str:
# Get the hash of the block.

def get_parent(self) -> Any:
# Get the parent of the block.

def get_children(self) -> list[Any]:
# Get the children of the block

def save_to_html(self, file_path: Path) -> None:
# Save html format to file_path
"""

steps: list[Base] = field(default_factory=list)
weights: list[float] = field(default_factory=list)

def __post_init__(self) -> None:
"""Post init function of _System class."""
# Sort the steps by name, to ensure consistent ordering of parallel computations
self.steps = sorted(self.steps, key=lambda x: x.__class__.__name__)

super().__post_init__()

# Set parent and children
for step in self.steps:
step.set_parent(self)

# Set weights if they exist
if len(self.weights) == len(self.get_steps()):
[w / sum(self.weights) for w in self.weights]
else:
num_steps = len(self.get_steps())
self.weights = [1 / num_steps for x in self.steps]

self.set_children(self.steps)

def get_steps(self) -> list[Base]:
"""Return list of steps of ParallelSystem.

:return: List of steps
"""
return self.steps

def get_weights(self) -> list[float]:
"""Return list of weights of ParallelSystem.

:return: List of weights
"""
if len(self.get_steps()) != len(self.weights):
raise TypeError("Mismatch between weights and steps")
return self.weights

def set_hash(self, prev_hash: str) -> None:
"""Set the hash of the system.

:param prev_hash: The hash of the previous block.
"""
self._hash = prev_hash

# System has no steps and as such hash should not be affected
if len(self.steps) == 0:
return

# System is one step and should act as such
if len(self.steps) == 1:
step = self.steps[0]
step.set_hash(prev_hash)
self._hash = step.get_hash()
return

# System has at least two steps so hash should become a combination
total = self.get_hash()
for step in self.steps:
step.set_hash(prev_hash)
total = total + step.get_hash()

self._hash = hash(total)

@abstractmethod
def concat(self, original_data: Any, data_to_concat: Any, weight: float = 1.0) -> Any:
"""Concatenate the transformed data.

:param original_data: The first input data.
:param data_to_concat: The second input data.
:param weight: Weight of data to concat
:return: The concatenated data.
"""
raise NotImplementedError(f"{self.__class__.__name__} does not implement concat method.")
112 changes: 112 additions & 0 deletions epochlib/core/parallel_training_system.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,112 @@
"""This module contains the parallel training system class."""

import copy
from typing import Any

from .parallel_system import ParallelSystem
from .types import TrainType


class ParallelTrainingSystem(TrainType, ParallelSystem):
"""A system that trains the input data in parallel.

Parameters:
- steps (list[Trainer | TrainingSystem | ParallelTrainingSystem]): The steps in the system.
- weights (list[float]): The weights of steps in the system, if not specified they are all equal.

Methods:
.. code-block:: python
@abstractmethod
def concat(self, data1: Any, data2: Any) -> Any: # Concatenate the transformed data.

def train(self, x: Any, y: Any) -> tuple[Any, Any]: # Train the system.

def predict(self, x: Any, pred_args: dict[str, Any] = {}) -> Any: # Predict the output of the system.

def concat_labels(self, data1: Any, data2: Any) -> Any: # Concatenate the transformed labels.

def get_hash(self) -> str: # Get the hash of the system.

Usage:
.. code-block:: python
from epochlib.pipeline import ParallelTrainingSystem

trainer_1 = CustomTrainer()
trainer_2 = CustomTrainer()


class CustomParallelTrainingSystem(ParallelTrainingSystem):
def concat(self, data1: Any, data2: Any) -> Any:
# Concatenate the transformed data.
return data1 + data2


training_system = CustomParallelTrainingSystem(steps=[trainer_1, trainer_2])
trained_x, trained_y = training_system.train(x, y)
predictions = training_system.predict(x)
"""

def __post_init__(self) -> None:
"""Post init method for the ParallelTrainingSystem class."""
# Assert all steps correct instances
for step in self.steps:
if not isinstance(step, (TrainType)):
raise TypeError(f"{step} is not an instance of TrainType")

super().__post_init__()

def train(self, x: Any, y: Any, **train_args: Any) -> tuple[Any, Any]:
"""Train the system.

:param x: The input to the system.
:param y: The expected output of the system.
:return: The input and output of the system.
"""
# Loop through each step and call the train method
out_x, out_y = None, None
for i, step in enumerate(self.steps):
step_name = step.__class__.__name__

step_args = train_args.get(step_name, {})

if isinstance(step, (TrainType)):
step_x, step_y = step.train(copy.deepcopy(x), copy.deepcopy(y), **step_args)
out_x, out_y = (
self.concat(out_x, step_x, self.get_weights()[i]),
self.concat_labels(out_y, step_y, self.get_weights()[i]),
)
else:
raise TypeError(f"{step} is not an instance of TrainType")

return out_x, out_y

def predict(self, x: Any, **pred_args: Any) -> Any:
"""Predict the output of the system.

:param x: The input to the system.
:return: The output of the system.
"""
# Loop through each trainer and call the predict method
out_x = None
for i, step in enumerate(self.steps):
step_name = step.__class__.__name__

step_args = pred_args.get(step_name, {})

if isinstance(step, (TrainType)):
step_x = step.predict(copy.deepcopy(x), **step_args)
out_x = self.concat(out_x, step_x, self.get_weights()[i])
else:
raise TypeError(f"{step} is not an instance of TrainType")

return out_x

def concat_labels(self, original_data: Any, data_to_concat: Any, weight: float = 1.0) -> Any:
"""Concatenate the transformed labels. Will use concat method if not overridden.

:param original_data: The first input data.
:param data_to_concat: The second input data.
:param weight: Weight of data to concat
:return: The concatenated data.
"""
return self.concat(original_data, data_to_concat, weight)
Loading
Loading