From 7d83ff919804c9e507c1ab521393e792359b9d02 Mon Sep 17 00:00:00 2001
From: Sam Wolk <36545842+szvsw@users.noreply.github.com>
Date: Fri, 6 Mar 2026 16:36:23 -0500
Subject: [PATCH 01/27] begin scaffolding

---
 pyproject.toml                               |    5 +
 src/globi/models/surrogate/__init__.py       |    1 +
 src/globi/models/surrogate/dummy.py          |   39 +
 src/globi/models/surrogate/training.py       | 1101 ++++++++++++++++++
 src/globi/pipelines/__init__.py              |   13 +
 src/globi/{pipelines.py => pipelines/gis.py} |  227 +---
 src/globi/pipelines/simulations.py           |  235 ++++
 src/globi/pipelines/training.py              |  323 +++++
 src/globi/worker/Dockerfile                  |    3 +-
 src/globi/worker/main.py                     |   23 +-
 uv.lock                                      |   52 +-
 11 files changed, 1792 insertions(+), 230 deletions(-)
 create mode 100644 src/globi/models/surrogate/__init__.py
 create mode 100644 src/globi/models/surrogate/dummy.py
 create mode 100644 src/globi/models/surrogate/training.py
 create mode 100644 src/globi/pipelines/__init__.py
 rename src/globi/{pipelines.py => pipelines/gis.py} (58%)
 create mode 100644 src/globi/pipelines/simulations.py
 create mode 100644 src/globi/pipelines/training.py

diff --git a/pyproject.toml b/pyproject.toml
index 44de639..6f33108 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -46,6 +46,11 @@ visualization = [
     "playwright>=1.40.0",
 ]
 
+ml = [
+    "lightgbm>=4.6.0",
+    "xgboost>=3.2.0",
+]
+
 cli = [
     "click>=8.1.7",
     "xlsxwriter>=3.2.9",
diff --git a/src/globi/models/surrogate/__init__.py b/src/globi/models/surrogate/__init__.py
new file mode 100644
index 0000000..d5affc9
--- /dev/null
+++ b/src/globi/models/surrogate/__init__.py
@@ -0,0 +1 @@
+"""Models used for the surrogate pipeline."""
diff --git a/src/globi/models/surrogate/dummy.py b/src/globi/models/surrogate/dummy.py
new file mode 100644
index 0000000..0617998
--- /dev/null
+++ b/src/globi/models/surrogate/dummy.py
@@ -0,0 +1,39 @@
+"""Dummy simulation for testing."""
+
+from pathlib import Path
+
+import pandas as pd
+from scythe.base import ExperimentInputSpec, ExperimentOutputSpec
+from scythe.registry import ExperimentRegistry
+
+
+class DummySimulationInput(ExperimentInputSpec):
+    """The input for the dummy simulation."""
+
+    a: int
+    b: float
+
+
+class DummySimulationOutput(ExperimentOutputSpec):
+    """The output for the dummy simulation."""
+
+    c: float
+
+
+@ExperimentRegistry.Register(
+    description="A dummy simulation.",
+)
+def dummy_simulation(
+    input_spec: DummySimulationInput, tempdir: Path
+) -> DummySimulationOutput:
+    """A dummy simulation."""
+    df = pd.DataFrame({
+        "target_0": [input_spec.a + input_spec.b],
+        "target_1": [input_spec.a - input_spec.b],
+        "target_2": [input_spec.a * input_spec.b],
+        "target_3": [input_spec.a / input_spec.b],
+    })
+    df = df.set_index(input_spec.make_multiindex())
+    return DummySimulationOutput(
+        c=input_spec.a + input_spec.b, dataframes={"main_result": df}
+    )
diff --git a/src/globi/models/surrogate/training.py b/src/globi/models/surrogate/training.py
new file mode 100644
index 0000000..e695715
--- /dev/null
+++ b/src/globi/models/surrogate/training.py
@@ -0,0 +1,1101 @@
+"""Models used for the surrogate training pipeline."""
+
+from functools import cached_property
+from pathlib import Path
+from typing import TYPE_CHECKING, Literal, cast
+
+import numpy as np
+import pandas as pd
+from pydantic import BaseModel, Field, model_validator
+from scythe.base import BaseSpec, ExperimentInputSpec
+from scythe.scatter_gather import RecursionMap
+from scythe.settings import ScytheStorageSettings
+from scythe.utils.filesys import FileReference, OptionalFileReference
+
+if TYPE_CHECKING:
+    from mypy_boto3_s3.client import S3Client as S3ClientType
+else:
+    S3ClientType = object
+
+
+class ConvergenceThresholds(BaseModel):
+    """The thresholds for convergence."""
+
+    mae: float = Field(default=0.5, description="The maximum MAE for convergence.")
+    rmse: float = Field(default=0.5, description="The maximum RMSE for convergence.")
+    mape: float = Field(default=0.15, description="The maximum MAPE for convergence.")
+    r2: float = Field(default=0.95, description="The minimum R2 for convergence.")
+    cvrmse: float = Field(
+        default=0.05, description="The maximum CV_RMSE for convergence."
+    )
+
+    @property
+    def thresholds(self) -> pd.Series:
+        """The thresholds for convergence."""
+        return pd.Series(self.model_dump(), name="metric")
+
+    def check_convergence(self, metrics: pd.Series):
+        """Check if the metrics have converged.
+
+        Note that this requires the metrics data frame to have the following shape:
+
+        """
+        thresholds = pd.Series(self.model_dump(), name="metric")
+
+        # first, we will select the appropriate threshold for each metric
+        comparators = thresholds.loc[metrics.index.get_level_values("metric")]
+        # we can then copy over the index safely
+        comparators.index = metrics.index
+
+        # next, we will flip the sign of the r2 metric since it is a maximization metric rather thin min
+        metrics = metrics * np.where(
+            metrics.index.get_level_values("metric") == "r2", -1, 1
+        )
+        comparators = comparators * np.where(
+            comparators.index.get_level_values("metric") == "r2", -1, 1
+        )
+
+        # run the comparisons
+        comparison = metrics < comparators
+
+        # now we will groupby the stratum (e.g. features.weather.file)
+        # and by the target (e.g. Electricity, Gas, etc.)
+        # we are converged if any of the metrics have converged for that target
+        # in that stratum
+        comparison_stratum_and_target = comparison.groupby(
+            level=[lev for lev in comparison.index.names if lev != "metric"]
+        ).any()
+
+        # then we will check that all targets have converged for each stratum
+        comparison_strata = comparison_stratum_and_target.groupby(level="stratum").all()
+
+        # finally, we will check that all strata have converged
+        comparison_all = comparison_strata.all()
+
+        return (
+            comparison_all,
+            comparison_strata,
+            comparison_stratum_and_target,
+            comparison,
+        )
+
+
+class XGBHyperparameters(BaseModel):
+    """The parameters for the xgboost model."""
+
+    max_depth: int = Field(default=5, description="The maximum depth of the tree.")
+    eta: float = Field(default=0.1, description="The learning rate.")
+    min_child_weight: int = Field(default=3, description="The minimum child weight.")
+    subsample: float = Field(default=0.8, description="The subsample rate.")
+    colsample_bytree: float = Field(
+        default=0.8, description="The column sample by tree rate."
+    )
+    alpha: float = Field(default=0.01, description="The alpha parameter.")
+    lam: float = Field(default=0.01, description="The lambda parameter.")
+    gamma: float = Field(default=0.01, description="The gamma parameter.")
+
+
+class LGBHyperparameters(BaseModel):
+    """The parameters for the lightgbm model."""
+
+    objective: Literal["regression", "binary", "multiclass"] = Field(
+        default="regression", description="The objective function to use."
+    )
+    metric: Literal["rmse"] = Field(
+        default="rmse", description="The metric to optimize."
+    )
+    # TODO: add other parameters as needed
+
+
+ModelHPType = XGBHyperparameters | LGBHyperparameters
+
+
+class StratificationSpec(BaseModel):
+    """A spec for stratifying the data."""
+
+    field: str = Field(
+        default="feature.weather.file", description="The field to stratify by."
+    )
+    sampling: Literal["equal", "error-weighted", "proportional"] = Field(
+        default="equal",
+        description="The sampling method to use over the strata.",
+    )
+    aliases: list[str] = Field(
+        default_factory=lambda: ["epwzip_path", "epw_path"],
+        description="The alias to use for the stratum as a fallback.",
+    )
+
+    # TODO: consider allowing the stratification to be a compound with e.g. component_map_uri and semantic_fields_uri and database_uri
+
+
+class CrossValidationSpec(BaseModel):
+    """The cross validation spec."""
+
+    n_folds: int = Field(
+        default=5, description="The number of folds for the entire parent task."
+    )
+
+
+class IterationSpec(BaseModel):
+    """The iteration spec."""
+
+    n_init: int = Field(default=10000, description="The number of initial samples.")
+    min_per_stratum: int = Field(
+        default=100, description="The minimum number of samples per stratum."
+    )
+    n_per_iter: int = Field(
+        default=10000,
+        description="The number of samples to add per each iteration of the outer loop.",
+    )
+    max_iters: int = Field(
+        default=100,
+        description="The maximum number of outer loop iterations to perform.",
+    )
+    recursion: RecursionMap = Field(
+        default_factory=lambda: RecursionMap(factor=100, max_depth=1),
+        description="The recursion spec.",
+    )
+
+
+# TODO: should this be a subclass of ExperimentInputSpec?
+class ProgressiveTrainingSpec(BaseSpec):
+    """A spec for iteratively training an SBEM regression model."""
+
+    convergence_criteria: ConvergenceThresholds = Field(
+        default_factory=ConvergenceThresholds,
+        description="The convergence criteria.",
+    )
+    model_hyperparameters: ModelHPType = Field(
+        default_factory=LGBHyperparameters,
+        description="The hyperparameters for the model.",
+    )
+    stratification: StratificationSpec = Field(
+        default_factory=StratificationSpec,
+        description="The stratification spec.",
+    )
+    cross_val: CrossValidationSpec = Field(
+        default_factory=CrossValidationSpec,
+        description="The cross validation spec.",
+    )
+    iteration: IterationSpec = Field(
+        default_factory=IterationSpec,
+        description="The iteration spec.",
+    )
+    gis_uri: FileReference = Field(
+        ...,
+        description="The uri of the gis data to train on.",
+    )
+    storage_settings: ScytheStorageSettings = Field(
+        default=...,
+        description="The storage settings to use.",
+    )
+
+    @property
+    def gis_path(self) -> Path:
+        """The path to the gis data."""
+        if isinstance(self.gis_uri, Path):
+            return self.gis_uri
+        return self.fetch_uri(self.gis_uri)
+
+    @cached_property
+    def gis_data(self) -> pd.DataFrame:
+        """Load the gis data."""
+        return pd.read_parquet(self.gis_path)
+
+    # def s3_key_for_iteration(self, iteration_ix: int) -> str:
+    #     """The s3 root key for the iteration."""
+    #     return f"{self.experiment_id}/iter-{iteration_ix:03d}"
+
+    # def upload_self(self, s3_client: S3ClientType):
+    #     """Upload a dumpout of this spec to the s3 bucket root."""
+    #     with tempfile.TemporaryDirectory() as tempdir:
+    #         tempdir = Path(tempdir)
+    #         fpath = tempdir / "spec.yml"
+    #         with open(fpath, "w") as f:
+    #             yaml.dump(self.model_dump(mode="json"), f, indent=2)
+    #         s3_client.upload_file(
+    #             fpath.as_posix(),
+    #             self.bucket,
+    #             f"hatchet/{self.experiment_id}/artifacts/experiment-spec.yml",
+    #         )
+
+
+class StageSpec(BaseModel):
+    """A spec that is common to both the sample and train stages (and possibly others)."""
+
+    progressive_training_spec: ProgressiveTrainingSpec = Field(
+        ...,
+        description="The progressive training spec.",
+    )
+    progressive_training_iteration_ix: int = Field(
+        ...,
+        description="The index of the current training iteration within the outer loop.",
+    )
+    data_uri: OptionalFileReference = Field(
+        ...,
+        description="The uris of the previous simulation results to sample from.",
+    )
+    stage_type: Literal["sample", "train"] = Field(
+        ...,
+        description="The type of stage.",
+    )
+
+    @cached_property
+    def random_generator(self) -> np.random.Generator:
+        """The random generator."""
+        return np.random.default_rng(self.progressive_training_iteration_ix)
+
+    # @cached_property
+    # def experiment_key(self) -> str:
+    #     """The root key for the experiment."""
+    #     return f"{self.progressive_training_spec.s3_key_for_iteration(self.progressive_training_iteration_ix)}/{self.stage_type}"
+
+    # def load_previous_data(self, s3_client: S3ClientType) -> pd.DataFrame | None:
+    #     """Load the previous data."""
+    #     if self.data_uri is None:
+    #         return None
+    #     with tempfile.TemporaryDirectory() as tmpdir:
+    #         tmpdir = Path(tmpdir)
+    #         fpath = tmpdir / "previous_data.parquet"
+    #         fetch_uri(
+    #             uri=self.data_uri,
+    #             local_path=fpath,
+    #             use_cache=False,
+    #             s3=s3_client,
+    #         )
+    #         df = pd.read_parquet(fpath)
+    #     return df
+
+
+# BASE EXPERIMENT/v1.0.0
+# BASE EXPERIMENT/v1.0.0/simulations/v1.0.0/[...]
+# BASE EXPERIMENT/v1.0.0/training/v1.0.0/[...]
+# BASE EXPERIMENT/v1.0.0/simulations/v2.0.0/[...]
+# BASE EXPERIMENT/v1.0.0/training/v2.0.0/[...]
+# BASE EXPERIMENT/v1.0.0/simulations/v2.0.0/[...]
+# BASE EXPERIMENT/v1.0.0/training/v3.0.0/[...]
+
+
+class SampleSpec(StageSpec):
+    """A spec for thhe sampling stage of the progressive training."""
+
+    # TODO: add the ability to receive the last set of error metrics and use them to inform the sampling
+
+    def stratified_selection(self) -> pd.DataFrame:
+        """Sample the gis data."""
+        df = self.progressive_training_spec.gis_data
+
+        stratification_field = self.progressive_training_spec.stratification.field
+        stratification_aliases = self.progressive_training_spec.stratification.aliases
+
+        if stratification_field not in df.columns and not any(
+            alias in df.columns for alias in stratification_aliases
+        ):
+            msg = f"Stratification field {stratification_field} not found in gis data.  Please check the field name and/or the aliases."
+            raise ValueError(msg)
+
+        if stratification_field not in df.columns:
+            stratification_field = next(
+                alias for alias in stratification_aliases if alias in df.columns
+            )
+
+        strata = cast(list[str], df[stratification_field].unique().tolist())
+
+        if self.progressive_training_spec.stratification.sampling == "equal":
+            return self.sample_equally_by_stratum(df, strata, stratification_field)
+        elif self.progressive_training_spec.stratification.sampling == "error-weighted":
+            msg = "Error-weighted sampling is not yet implemented."
+            raise NotImplementedError(msg)
+        elif self.progressive_training_spec.stratification.sampling == "proportional":
+            msg = "Proportional sampling is not yet implemented."
+            raise NotImplementedError(msg)
+        else:
+            msg = f"Invalid sampling method: {self.progressive_training_spec.stratification.sampling}"
+            raise ValueError(msg)
+
+    def sample_equally_by_stratum(
+        self, df: pd.DataFrame, strata: list[str], stratification_field: str
+    ) -> pd.DataFrame:
+        """Sample equally by stratum.
+
+        This will break the dataframe up into n strata and ensure that each strata ends up with the same number of samples.
+
+        Args:
+            df (pd.DataFrame): The dataframe to sample from.
+            strata (list[str]): The unique values of the strata.
+            stratification_field (str): The field to stratify the data by.
+
+        Returns:
+            samples (pd.DataFrame): The sampled dataframe.
+        """
+        stratum_dfs = {
+            stratum: df[df[stratification_field] == stratum] for stratum in strata
+        }
+        n_per_iter = (
+            self.progressive_training_spec.iteration.n_per_iter
+            if self.progressive_training_iteration_ix != 0
+            else self.progressive_training_spec.iteration.n_init
+        )
+        n_per_stratum = max(
+            n_per_iter // len(strata),
+            (
+                self.progressive_training_spec.iteration.min_per_stratum
+                if self.progressive_training_iteration_ix == 0
+                else 0
+            ),
+        )
+
+        # TODO: consider how we want to handle potentially having the same geometry appear in both
+        # the training and testing sets.
+        # if any(len(stratum_df) < n_per_stratum for stratum_df in stratum_dfs.values()):
+        #     msg = "There are not enough buildings in some strata to sample the desired number of buildings per stratum."
+        #     # connsider making this a warning?
+        #     raise ValueError(msg)
+
+        sampled_strata = {
+            stratum: stratum_df.sample(
+                n=n_per_stratum, random_state=self.random_generator, replace=True
+            )
+            for stratum, stratum_df in stratum_dfs.items()
+        }
+        return cast(pd.DataFrame, pd.concat(sampled_strata.values()))
+
+    # def sample_semantic_fields(self, df: pd.DataFrame) -> pd.DataFrame:
+    #     """Sample the semantic fields."""
+    #     # TODO: consider randomizing the locations?
+    #     semantic_fields = self.progressive_training_spec.semantic_fields_data
+    #     for field in semantic_fields.Fields:
+    #         if isinstance(field, CategoricalFieldSpec):
+    #             options = field.Options
+    #             df[field.Name] = self.random_generator.choice(options, size=len(df))
+    #         elif isinstance(field, NumericFieldSpec):
+    #             df[field.Name] = self.random_generator.uniform(
+    #                 field.Min, field.Max, size=len(df)
+    #             )
+    #         else:
+    #             msg = f"Invalid field type: {type(field)}"
+    #             raise TypeError(msg)
+    #     return df
+
+    # def sample_basements_and_attics(self, df: pd.DataFrame) -> pd.DataFrame:
+    #     """Add basement/attics to models."""
+    #     # get the options for the type literal
+    #     options: list[BasementAtticOccupationConditioningStatus] = [
+    #         "none",
+    #         "occupied_unconditioned",
+    #         "unoccupied_unconditioned",
+    #         "occupied_conditioned",
+    #         "unoccupied_conditioned",
+    #     ]
+    #     weights = [0.5, *([0.5 / 4] * 4)]
+    #     # sample the type literal
+    #     df["basement"] = self.random_generator.choice(options, size=len(df), p=weights)
+    #     df["attic"] = self.random_generator.choice(options, size=len(df), p=weights)
+    #     df["exposed_basement_frac"] = self.random_generator.uniform(
+    #         0.1, 0.5, size=len(df)
+    #     )
+    #     return df
+
+    # def sample_wwrs(self, df: pd.DataFrame) -> pd.DataFrame:
+    #     """Sample the wwrs."""
+    #     wwr_min = 0.05
+    #     wwr_max = 0.35
+    #     df["wwr"] = self.random_generator.uniform(wwr_min, wwr_max, size=len(df))
+    #     return df
+
+    # def sample_f2f_heights(self, df: pd.DataFrame) -> pd.DataFrame:
+    #     """Sample the f2f heights."""
+    #     f2f_min = 2.3
+    #     f2f_max = 4.3
+    #     df["f2f_height"] = self.random_generator.uniform(f2f_min, f2f_max, size=len(df))
+    #     return df
+
+    def to_sim_specs(self, df: pd.DataFrame):
+        """Convert the sampled dataframe to a list of simulation specs.
+
+        For now, we are assuming that all the other necessary fields are present and we are just
+        ensuring that sort_index and experiment_id are set appropriately.
+        """
+        # df["semantic_field_context"] = df.apply(
+        #     lambda row: {
+        #         field.Name: row[field.Name]
+        #         for field in self.progressive_training_spec.semantic_fields_data.Fields
+        #     },
+        #     axis=1,
+        # )
+        # df["sort_index"] = np.arange(len(df))
+        # df["experiment_id"] = self.experiment_key
+        # # TODO: consider allowing the component map/semantic_fields/database to be inherited from the row
+        # # e.g. to allow multiple component maps and dbs per run.
+        # df["component_map_uri"] = str(self.progressive_training_spec.component_map_uri)
+        # df["semantic_fields_uri"] = str(
+        #     self.progressive_training_spec.semantic_fields_uri
+        # )
+        # df["db_uri"] = str(self.progressive_training_spec.database_uri)
+        return df
+
+    # def make_payload(self, s3_client: S3ClientType):
+    #     """Make the payload for the scatter gather task, including generating the simulation specs and serializing them to s3."""
+    #     df = self.stratified_selection()
+    #     # df = self.sample_semantic_fields(df)
+    #     # df = self.sample_basements_and_attics(df)
+    #     # df = self.sample_wwrs(df)
+    #     # df = self.sample_f2f_heights(df)
+    #     df = self.to_sim_specs(df)
+    #     # serialize to a parquet file and upload to s3
+    #     bucket = self.progressive_training_spec.storage_settings.BUCKET
+    #     with tempfile.TemporaryDirectory() as tmpdir:
+    #         tmpdir = Path(tmpdir)
+    #         fpath = tmpdir / "specs.pq"
+    #         df.to_parquet(fpath)
+    #         key = f"hatchet/{self.experiment_key}/specs.pq"
+    #         specs_uri = f"s3://{bucket}/{key}"
+    #         s3_client.upload_file(fpath.as_posix(), bucket, key)
+
+    #     payload = {
+    #         "specs": specs_uri,
+    #         "bucket": bucket,
+    #         "workflow_name": "simulate_sbem_shoebox",
+    #         "experiment_id": self.experiment_key,
+    #         "recursion_map": {
+    #             "factor": self.progressive_training_spec.iteration.recursion_factor,
+    #             "max_depth": self.progressive_training_spec.iteration.recursion_max_depth,
+    #         },
+    #     }
+    #     return payload
+
+    # def combine_results(self, new_data_uri: URIResponse, s3_client: S3ClientType):
+    #     """Combine the results of the previous and new data."""
+    #     previous_data = self.load_previous_data(s3_client)
+    #     with tempfile.TemporaryDirectory() as tmpdir:
+    #         tmpdir = Path(tmpdir)
+    #         fpath = tmpdir / "new_data.parquet"
+    #         fetch_uri(
+    #             uri=new_data_uri.uri, local_path=fpath, use_cache=False, s3=s3_client
+    #         )
+    #         # TODO: data frame subsection selection should be a configuration option within the
+    #         # progressive iteration training spec.
+    #         df = cast(
+    #             pd.DataFrame,
+    #             cast(pd.DataFrame, pd.read_hdf(fpath, key="results")),
+    #         )
+    #     if previous_data is not None:
+    #         df = pd.concat([previous_data, df], axis=0)
+
+    #     # strip out any constant columns
+    #     is_all_zeros = (df.max(axis=0) - df.min(axis=0)).abs() < 1e-5
+    #     df = df.loc[:, ~is_all_zeros]
+    #     # serialize to a parquet file and upload to s3
+    #     bucket = self.progressive_training_spec.bucket
+    #     with tempfile.TemporaryDirectory() as tmpdir:
+    #         tmpdir = Path(tmpdir)
+    #         fpath = tmpdir / "results.parquet"
+    #         df.to_parquet(fpath)
+    #         key = f"hatchet/{self.experiment_key}/full-dataset.pq"
+    #         specs_uri = f"s3://{bucket}/{key}"
+    #         s3_client.upload_file(fpath.as_posix(), bucket, key)
+    #     return specs_uri
+
+    @model_validator(mode="after")
+    def check_stage(self):
+        """The sampling spec must have stage set to 'sample'."""
+        if self.stage_type != "sample":
+            msg = f"Invalid stage: {self.stage_type}"
+            raise ValueError(msg)
+        return self
+
+
+class TrainFoldSpec(ExperimentInputSpec):
+    """Train an sbem model for a specific fold.
+
+    The fold is determined by the sort_index, which does mean we need to know the n_folds.
+
+    We will need to know:
+    - where the data is
+    - the desired stratification (e.g. feature.weather.file)
+    - how to divide the data into training and testing splits given the desired stratification
+
+    The data uri should be assumed to have features in the index and targets in the columns.
+
+    TODO: consider the potential for leakage when a stratum has few buildings!
+
+    First, we will subdivide the data into its strata.
+
+    Then for each stratum, we will create a train/test split according to the fold index.
+
+    We wish to return validation metrics with the following hierarchy for the column index
+    - train/test ["split_segment"]
+    - loc1/loc2 ... ["stratum"]
+    - mae/rmse/r2/... ["metric"]
+
+    Theoretically, we also might want to pass in normalization specifications for features and/or targets.
+    However, with xgb, this is less imperative.
+    """
+
+    n_folds: int = Field(
+        ..., description="The number of folds for the entire parent task."
+    )
+    data_uri: FileReference = Field(..., description="The uri of the data to train on.")
+    stratification_field: str = Field(
+        ...,
+        description="The field to stratify the data by for monitoring convergence in parent task.",
+    )
+    progressive_training_iter_ix: int = Field(
+        ...,
+        description="The index of the current training iteration within the outer loop.",
+    )
+
+    @property
+    def data_path(self) -> Path:
+        """The path to the data."""
+        if isinstance(self.data_uri, Path):
+            return self.data_uri
+        return self.fetch_uri(self.data_uri)
+
+    @cached_property
+    def data(self) -> pd.DataFrame:
+        """The data."""
+        df_all = pd.read_parquet(self.data_path)
+        df_energy: pd.DataFrame = cast(pd.DataFrame, df_all["Energy"]["Raw"])
+        df_energy = cast(
+            pd.DataFrame,
+            (
+                df_energy.T.groupby(
+                    level=[
+                        lev for lev in df_energy.columns.names if lev.lower() != "month"
+                    ]
+                )
+                .sum()
+                .T
+            ),
+        )
+        df_peaks: pd.DataFrame = cast(pd.DataFrame, df_all["Peak"]["Raw"])
+        df_peaks = cast(
+            pd.DataFrame,
+            (
+                df_peaks.T.groupby(
+                    level=[
+                        lev for lev in df_peaks.columns.names if lev.lower() != "month"
+                    ]
+                )
+                .max()
+                .T
+            ),
+        )
+        df_all_annual = pd.concat(
+            [df_energy, df_peaks],
+            axis=1,
+            keys=["Energy", "Peak"],
+            names=["Measurement"],
+        )
+        # TODO: should we assume they are shuffled already?
+        # shuffle the order of the rows
+        df_all_annual = df_all_annual.sample(frac=1, random_state=42, replace=False)
+        return df_all_annual
+
+    @cached_property
+    def dparams(self) -> pd.DataFrame:
+        """The index of the data."""
+        return self.data.index.to_frame()
+
+    @cached_property
+    def stratum_names(self) -> list[str]:
+        """The values of the stratification field."""
+        return sorted(self.dparams[self.stratification_field].unique().tolist())
+
+    @cached_property
+    def data_by_stratum(self) -> dict[str, pd.DataFrame]:
+        """Subdivide the data by the stratification field.
+
+        We want 1/n_folds data in the test segment for each stratification option,
+        so we will need to compute train/test splits separately for each stratum.
+
+        This would not be necessary if we knew that the strata always had equal representation, but
+        since we might use things like adaptive sampling or generating samples proportionally to the number of buildings in that stratum,
+        e.g. by population, then what *could* happen if we just did a random train/test split is that some strata might end up
+        entirely in the train set.
+        """
+        return {
+            val: cast(
+                pd.DataFrame, self.data[self.dparams[self.stratification_field] == val]
+            )
+            for val in self.stratum_names
+        }
+
+    @cached_property
+    def train_test_split_by_fold_and_stratum(self) -> pd.DataFrame:
+        """Create the folds for the data.
+
+        To do this, we will go to each stratum and use a strided step to
+        construct each fold, then assign the fold matching the sort_index
+        to the test split.  We also recombine the strata since they are now
+        safely stratified.
+        """
+        all_strata = []
+        for val in self.stratum_names:
+            folds = []
+            for i in range(self.n_folds):
+                fold = self.data_by_stratum[val].iloc[i :: self.n_folds]
+                folds.append(fold)
+            folds_df = pd.concat(
+                folds,
+                axis=0,
+                keys=[
+                    "test" if i == self.sort_index else "train"
+                    for i in range(self.n_folds)
+                ],
+                names=["split_segment"],
+            )
+            all_strata.append(folds_df)
+        return pd.concat(all_strata)
+
+    @cached_property
+    def train_segment(self) -> tuple[pd.DataFrame, pd.DataFrame]:
+        """Get the training segment."""
+        train_df = cast(
+            pd.DataFrame,
+            self.train_test_split_by_fold_and_stratum.xs(
+                "train", level="split_segment"
+            ),
+        )
+        params = train_df.index.to_frame(index=False)
+        targets = train_df
+        return params, targets
+
+    @cached_property
+    def test_segment(self) -> tuple[pd.DataFrame, pd.DataFrame]:
+        """Get the test segment."""
+        test_df = cast(
+            pd.DataFrame,
+            self.train_test_split_by_fold_and_stratum.xs("test", level="split_segment"),
+        )
+        params = test_df.index.to_frame(index=False)
+        targets = test_df
+        return params, targets
+
+    @cached_property
+    def non_numeric_options(self) -> dict[str, list[str]]:
+        """Get the non-numeric options for categorical features.
+
+        We must perform this across the entire dataset not just splits for consistency
+        and to ensure we get all options.
+
+        TODO: In the future, this should be based off of transform instructions.
+        """
+        fparams = self.dparams[
+            [col for col in self.dparams.columns if col.startswith("feature.")]
+        ]
+        non_numeric_cols = fparams.select_dtypes(include=["object"]).columns
+        non_numeric_options = {
+            col: sorted(cast(pd.Series, fparams[col]).unique().tolist())
+            for col in non_numeric_cols
+        }
+        return non_numeric_options
+
+    # @cached_property
+    # def numeric_min_maxs(self) -> dict[str, tuple[float, float]]:
+    #     """Get the min and max for numeric features.
+
+    #     We perform this only on the training set to prevent leakage.
+
+    #     TODO: In the future, this should be based off of transform instructions.
+
+    #     Args:
+    #         params (pd.DataFrame): The parameters to get the min and max for.
+
+    #     Returns:
+    #         norm_bounds (dict[str, tuple[float, float]]): The min and max for each numeric feature.
+    #     """
+    #     params, _ = self.train_segment
+    #     fparams = params[[col for col in params.columns if col.startswith("feature.")]]
+    #     numeric_cols = fparams.select_dtypes(include=["number"]).columns
+    #     numeric_min_maxs = {
+    #         col: (float(fparams[col].min()), float(fparams[col].max()))
+    #         for col in numeric_cols
+    #     }
+    #     for col in numeric_min_maxs:
+    #         low, high = numeric_min_maxs[col]
+    #         # we want to floor the "low" value down to the nearest 0.001
+    #         # and ceil the "high" value up to the nearest 0.001
+    #         # e.g. if low is -0.799, we want to set it to -0.800
+    #         # and if high is 0.799, we want to set it to 0.800
+    #         numeric_min_maxs[col] = (
+    #             math.floor(low * 1000) / 1000,
+    #             math.ceil(high * 1000) / 1000,
+    #         )
+    #     return numeric_min_maxs
+
+    # @cached_property
+    # def feature_spec(self) -> RegressorInputSpec:
+    #     """Get the feature spec which can be serialized and reloaded."""
+    #     params, _ = self.train_segment
+    #     features: list[CategoricalFeature | ContinuousFeature] = []
+    #     for col in params.columns:
+    #         if col in self.numeric_min_maxs:
+    #             low, high = self.numeric_min_maxs[col]
+    #             features.append(
+    #                 ContinuousFeature(name=col, min=float(low), max=float(high))
+    #             )
+    #         elif col in self.non_numeric_options:
+    #             opts = self.non_numeric_options[col]
+    #             features.append(CategoricalFeature(name=col, values=opts))
+    #     return RegressorInputSpec(features=features)
+
+    # def normalize_params(self, params: pd.DataFrame) -> pd.DataFrame:
+    #     """Normalize the params."""
+    #     regressor_spec = self.feature_spec
+    #     fparams = regressor_spec.transform(params, do_check=False)
+    #     return fparams
+
+    # def run(
+    #     self,
+    # ):
+    #     """Train the model."""
+    #     train_params, train_targets = self.train_segment
+    #     test_params, test_targets = self.test_segment
+
+    #     # select/transform the params as necessary
+    #     train_params = self.normalize_params(train_params)
+    #     test_params = self.normalize_params(test_params)
+
+    #     # Train the model
+    #     # train_preds, test_preds = self.train_xgboost(
+    #     #     train_params, train_targets, test_params, test_targets
+    #     # )
+    #     s3_client = boto3.client("s3")
+    #     train_preds, test_preds = self.train_lightgbm(
+    #         train_params, train_targets, test_params, test_targets, s3_client
+    #     )
+
+    #     # compute the metrics
+    #     global_train_metrics, stratum_train_metrics = self.compute_metrics(
+    #         train_preds, train_targets
+    #     )
+    #     global_test_metrics, stratum_test_metrics = self.compute_metrics(
+    #         test_preds, test_targets
+    #     )
+
+    #     global_metrics = pd.concat(
+    #         [global_train_metrics, global_test_metrics],
+    #         axis=1,
+    #         keys=["train", "test"],
+    #         names=["split_segment"],
+    #     )
+    #     stratum_metrics = pd.concat(
+    #         [stratum_train_metrics, stratum_test_metrics],
+    #         axis=1,
+    #         keys=["train", "test"],
+    #         names=["split_segment"],
+    #     )
+    #     return {
+    #         "global_metrics": global_metrics,
+    #         "stratum_metrics": stratum_metrics,
+    #     }
+
+    # def compute_frame_metrics(
+    #     self, preds: pd.DataFrame, targets: pd.DataFrame
+    # ) -> pd.DataFrame:
+    #     """Compute the metrics."""
+    #     from sklearn.metrics import (
+    #         mean_absolute_error,
+    #         mean_absolute_percentage_error,
+    #         mean_squared_error,
+    #         r2_score,
+    #     )
+
+    #     mae = mean_absolute_error(targets, preds, multioutput="raw_values")
+    #     mse = mean_squared_error(targets, preds, multioutput="raw_values")
+    #     rmse = np.sqrt(mse)
+    #     r2 = r2_score(targets, preds, multioutput="raw_values")
+    #     cvrmse = rmse / (targets.mean(axis=0) + 1e-5)
+    #     mape = mean_absolute_percentage_error(
+    #         targets + 1e-5,
+    #         preds,
+    #         multioutput="raw_values",
+    #     )
+
+    #     metrics = pd.DataFrame(
+    #         {
+    #             "mae": mae,
+    #             "rmse": rmse,
+    #             "r2": r2,
+    #             "cvrmse": cvrmse,
+    #             "mape": mape,
+    #         },
+    #     )
+    #     metrics.columns.names = ["metric"]
+    #     metrics.index.names = ["measurement", "target"]
+    #     return metrics
+
+    # def compute_metrics(self, preds: pd.DataFrame, targets: pd.DataFrame):
+    #     """Compute the metrics."""
+    #     global_metrics = self.compute_frame_metrics(preds, targets)
+    #     stratum_metric_dfs = {}
+    #     for stratum_name in self.stratum_names:
+    #         stratum_targets = cast(
+    #             pd.DataFrame, targets.xs(stratum_name, level=self.stratification_field)
+    #         )
+    #         stratum_preds = cast(
+    #             pd.DataFrame, preds.xs(stratum_name, level=self.stratification_field)
+    #         )
+    #         metrics = self.compute_frame_metrics(stratum_preds, stratum_targets)
+    #         stratum_metric_dfs[stratum_name] = metrics
+
+    #     stratum_metrics = pd.concat(
+    #         stratum_metric_dfs,
+    #         axis=1,
+    #         keys=self.stratum_names,
+    #         names=["stratum"],
+    #     )
+    #     global_metrics = (
+    #         global_metrics.set_index(
+    #             pd.Index(
+    #                 [self.sort_index] * len(global_metrics),
+    #                 name="sort_index",
+    #             ),
+    #             append=True,
+    #         )
+    #         .set_index(
+    #             pd.Index(
+    #                 [self.progressive_training_iter_ix] * len(global_metrics),
+    #                 name="progressive_training_iter_ix",
+    #             ),
+    #             append=True,
+    #         )
+    #         .unstack(level="target")
+    #     )
+
+    #     stratum_metrics = (
+    #         stratum_metrics.set_index(
+    #             pd.Index(
+    #                 [self.sort_index] * len(stratum_metrics),
+    #                 name="sort_index",
+    #             ),
+    #             append=True,
+    #         )
+    #         .set_index(
+    #             pd.Index(
+    #                 [self.progressive_training_iter_ix] * len(stratum_metrics),
+    #                 name="progressive_training_iter_ix",
+    #             ),
+    #             append=True,
+    #         )
+    #         .unstack(level="target")
+    #     )
+    #     return global_metrics, stratum_metrics
+
+    # def train_lightgbm(
+    #     self,
+    #     train_params: pd.DataFrame,
+    #     train_targets: pd.DataFrame,
+    #     test_params: pd.DataFrame,
+    #     test_targets: pd.DataFrame,
+    #     s3_client: S3ClientType | None = None,
+    # ):
+    #     """Train the lightgbm model."""
+    #     import lightgbm as lgb
+
+    #     lgb_params = {
+    #         "objective": "regression",
+    #         "metric": "rmse",
+    #     }
+    #     test_preds = {}
+    #     train_preds = {}
+    #     for col in train_targets.columns:
+    #         lgb_train_data = lgb.Dataset(train_params, label=train_targets[col])
+    #         lgb_test_data = lgb.Dataset(test_params, label=test_targets[col])
+    #         model = lgb.train(
+    #             lgb_params,
+    #             lgb_train_data,
+    #             num_boost_round=4000,
+    #             valid_sets=[lgb_test_data],
+    #             valid_names=["eval"],
+    #             callbacks=[lgb.early_stopping(20)],
+    #         )
+    #         test_preds[col] = pd.Series(
+    #             cast(np.ndarray, model.predict(test_params)),
+    #             index=test_targets.index,
+    #             name=col,
+    #         )
+    #         train_preds[col] = pd.Series(
+    #             cast(np.ndarray, model.predict(train_params)),
+    #             index=train_targets.index,
+    #             name=col,
+    #         )
+    #         if s3_client is not None:
+    #             model_name = (
+    #                 f"{col}.lgb"
+    #                 if not isinstance(col, tuple)
+    #                 else f"{'.'.join(col)}.lgb"
+    #             )
+    #             model_key = self.format_model_key(model_name)
+    #             model_str = model.model_to_string()
+    #             s3_client.put_object(Bucket=self.bucket, Key=model_key, Body=model_str)
+
+    #     if s3_client is not None:
+    #         import yaml
+
+    #         space_key = self.format_model_key("space.yml")
+    #         space_str = yaml.dump(
+    #             self.feature_spec.model_dump(mode="json"), indent=2, sort_keys=False
+    #         )
+    #         s3_client.put_object(Bucket=self.bucket, Key=space_key, Body=space_str)
+
+    #     test_preds = pd.concat(test_preds, axis=1)
+    #     train_preds = pd.concat(train_preds, axis=1)
+    #     return train_preds, test_preds
+
+    # @property
+    # def model_dir_key(self) -> str:
+    #     """Get the key for the model directory."""
+    #     return f"{self.experiment_id}/{self.sort_index}/models"
+
+    # def format_model_key(self, model_name: str) -> str:
+    #     """Format the model key."""
+    #     return f"hatchet/{self.model_dir_key}/{model_name}"
+
+    # def train_xgboost(
+    #     self,
+    #     train_params: pd.DataFrame,
+    #     train_targets: pd.DataFrame,
+    #     test_params: pd.DataFrame,
+    #     test_targets: pd.DataFrame,
+    # ):
+    #     """Train the xgboost model."""
+    #     import xgboost as xgb
+
+    #     hparams = {
+    #         "objective": "reg:squarederror",
+    #         "eval_metric": "rmse",
+    #         "max_depth": 5,  # 7
+    #         "eta": 0.1,
+    #         "min_child_weight": 3,
+    #         "subsample": 0.8,
+    #         "colsample_bytree": 0.8,
+    #         # "alpha": 0.01,
+    #         # "lambda": 0.01,
+    #         # "gamma": 0.01,
+    #     }
+
+    #     train_dmatrix = xgb.DMatrix(train_params, label=train_targets)
+    #     test_dmatrix = xgb.DMatrix(test_params, label=test_targets)
+
+    #     model = xgb.train(
+    #         hparams,
+    #         train_dmatrix,
+    #         num_boost_round=2000,
+    #         early_stopping_rounds=20,
+    #         verbose_eval=True,
+    #         evals=[(test_dmatrix, "test")],
+    #     )
+
+    #     # compute the metrics
+    #     train_preds = model.predict(train_dmatrix)
+    #     test_preds = model.predict(test_dmatrix)
+    #     train_preds = pd.DataFrame(
+    #         train_preds, index=train_targets.index, columns=train_targets.columns
+    #     )
+    #     test_preds = pd.DataFrame(
+    #         test_preds, index=test_targets.index, columns=test_targets.columns
+    #     )
+
+    #     return train_preds, test_preds
+
+
+class TrainWithCVSpec(StageSpec):
+    """Train an SBEM model using a scatter gather approach for cross-fold validation."""
+
+    @model_validator(mode="after")
+    def check_stage(self):
+        """The training spec must have stage set to 'train'."""
+        if self.stage_type != "train":
+            msg = f"Invalid stage: {self.stage_type}"
+            raise ValueError(msg)
+        return self
+
+    @property
+    def schedule(self) -> list[TrainFoldSpec]:
+        """Create the task schedule."""
+        schedule = []
+        data_uri = self.data_uri
+        if data_uri is None:
+            msg = "Data URI is required for training."
+            raise ValueError(msg)
+
+        for i in range(self.progressive_training_spec.cross_val.n_folds):
+            schedule.append(
+                TrainFoldSpec(
+                    # TODO: this should be set in a better manner
+                    experiment_id="placeholder",
+                    sort_index=i,
+                    n_folds=self.progressive_training_spec.cross_val.n_folds,
+                    data_uri=data_uri,
+                    stratification_field=self.progressive_training_spec.stratification.field,
+                    progressive_training_iter_ix=self.progressive_training_iteration_ix,
+                    storage_settings=self.progressive_training_spec.storage_settings,
+                )
+            )
+        return schedule
+
+    # def allocate(self, s3_client: S3ClientType):
+    #     """Allocate the task."""
+    #     # 1. turn the schedule into a parquet dataframe
+    #     df = pd.DataFrame([m.model_dump(mode="json") for m in self.schedule])
+    #     bucket = self.progressive_training_spec.bucket
+    #     with tempfile.TemporaryDirectory() as tempdir:
+    #         temp_path = Path(tempdir) / "train_specs.parquet"
+    #         df.to_parquet(temp_path)
+    #         key = f"hatchet/{self.experiment_key}/train_specs.parquet"
+    #         specs_uri = f"s3://{bucket}/{key}"
+    #         s3_client.upload_file(temp_path.as_posix(), bucket, key)
+
+    #     payload = {
+    #         "specs": specs_uri,
+    #         "bucket": bucket,
+    #         # TODO: this should be selected in a better manner.
+    #         "workflow_name": "train_regressor_with_cv_fold",
+    #         "experiment_id": self.experiment_key,
+    #     }
+    #     return payload
+
+    # def check_convergence(self, uri: URIResponse, s3_client: S3ClientType):
+    #     """Check the convergence of the training."""
+    #     with tempfile.TemporaryDirectory() as tempdir:
+    #         tempdir = Path(tempdir)
+    #         results_path = tempdir / "results.hdf"
+    #         # download the results from s3
+    #         fetch_uri(uri.uri, local_path=results_path, use_cache=False, s3=s3_client)
+    #         results = cast(
+    #             pd.DataFrame, pd.read_hdf(results_path, key="stratum_metrics")
+    #         )
+
+    #     fold_averages = cast(
+    #         pd.Series,
+    #         results.xs(
+    #             "test",
+    #             level="split_segment",
+    #             axis=1,
+    #         )
+    #         .groupby(level="measurement")
+    #         .mean()
+    #         .unstack(level="measurement"),
+    #     )
+    #     with tempfile.TemporaryDirectory() as tempdir:
+    #         fold_averages_path = Path(tempdir) / "fold-averaged-errors.pq"
+    #         fold_averages.to_frame(
+    #             name=self.progressive_training_iteration_ix
+    #         ).to_parquet(fold_averages_path)
+    #         key = f"hatchet/{self.experiment_key}/fold-averaged-errors.pq"
+    #         bucket = self.progressive_training_spec.bucket
+    #         s3_client.upload_file(fold_averages_path.as_posix(), bucket, key)
+
+    #     (
+    #         convergence_all,
+    #         convergence_monitor_segment,
+    #         convergence_monitor_segment_and_target,
+    #         convergence,
+    #     ) = self.progressive_training_spec.convergence_criteria.check_convergence(
+    #         fold_averages.xs("Energy", level="measurement")
+    #     )
+
+    #     return convergence_all, convergence
diff --git a/src/globi/pipelines/__init__.py b/src/globi/pipelines/__init__.py
new file mode 100644
index 0000000..9c61f28
--- /dev/null
+++ b/src/globi/pipelines/__init__.py
@@ -0,0 +1,13 @@
+"""Pipelines for the GloBI project."""
+
+from globi.models.surrogate.dummy import dummy_simulation
+from globi.pipelines.gis import preprocess_gis_file
+from globi.pipelines.simulations import simulate_globi_building
+from globi.pipelines.training import iterative_training
+
+__all__ = [
+    "dummy_simulation",
+    "iterative_training",
+    "preprocess_gis_file",
+    "simulate_globi_building",
+]
diff --git a/src/globi/pipelines.py b/src/globi/pipelines/gis.py
similarity index 58%
rename from src/globi/pipelines.py
rename to src/globi/pipelines/gis.py
index 24ddd58..c3ad274 100644
--- a/src/globi/pipelines.py
+++ b/src/globi/pipelines/gis.py
@@ -1,27 +1,12 @@
-"""Experiment configuration for building builder simulations."""
+"""GIS processing pipelines for the GloBI project."""
 
 import logging
 from pathlib import Path
 from typing import cast
 
 import geopandas as gpd
-import numpy as np
-import pandas as pd
 import yaml
-from epinterface.geometry import (
-    SceneContext,
-    ShoeboxGeometry,
-)
-from epinterface.sbem.builder import (
-    AtticAssumptions,
-    BasementAssumptions,
-    Model,
-    construct_zone_def,
-)
 from epinterface.sbem.fields.spec import SemanticModelFields
-from scythe.registry import ExperimentRegistry
-from scythe.utils.filesys import FileReference
-from shapely import Polygon, from_wkt
 
 from globi.gis.errors import SemanticFieldsFileHasNoBuildingIDColumnError
 from globi.gis.geometry import (
@@ -53,206 +38,10 @@
     FileConfig,
     GISPreprocessorColumnMap,
 )
-from globi.models.tasks import GloBIBuildingSpec, GloBIOutputSpec
 
 logger = logging.getLogger(__name__)
 
 
-INDEX_COLS_TO_KEEP: list[str] = [
-    "feature.geometry.long_edge",
-    "feature.geometry.short_edge",
-    "feature.geometry.orientation",
-    "feature.geometry.num_floors",
-    "feature.geometry.energy_model_conditioned_area",
-    "feature.geometry.energy_model_occupied_area",
-    "feature.semantic.Typology",
-    "feature.semantic.Age_bracket",
-    "feature.semantic.Region",
-    "feature.weather.file",
-    "feature.geometry.wwr",
-    "feature.geometry.f2f_height",
-    "feature.geometry.attic_height",
-]
-
-
-def simulate_globi_building_pipeline(
-    input_spec: GloBIBuildingSpec,
-    tempdir: Path,
-) -> GloBIOutputSpec:
-    """Simulate a GlobiSpec building and return energy and peak results.
-
-    Args:
-        input_spec: The input specification containing building parameters and file URIs
-        tempdir: Temporary directory for intermediate files
-    Returns:
-        Output specification containing a DataFrame with MultiIndex:
-        - Top level: Measurement type (Energy, Peak)
-        - Feature levels from input specification
-    """
-    spec = input_spec
-    log = logger.info
-    zone_def = construct_zone_def(
-        component_map_path=spec.component_map,
-        db_path=spec.db_path,
-        semantic_field_context=spec.semantic_field_context,
-    )
-    model = Model(
-        Weather=spec.epwzip_path,
-        Zone=zone_def,
-        Basement=BasementAssumptions(
-            Conditioned=spec.basement_is_conditioned,
-            UseFraction=spec.basement_use_fraction
-            if spec.basement_is_occupied
-            else None,
-        ),
-        Attic=AtticAssumptions(
-            Conditioned=spec.attic_is_conditioned,
-            UseFraction=spec.attic_use_fraction if spec.attic_is_occupied else None,
-        ),
-        geometry=ShoeboxGeometry(
-            x=0,
-            y=0,
-            w=spec.long_edge,
-            d=spec.short_edge,
-            h=spec.f2f_height,
-            wwr=spec.wwr,
-            num_stories=spec.num_floors,
-            basement=spec.has_basement,
-            zoning=spec.use_core_perim_zoning,
-            roof_height=spec.attic_height,
-            exposed_basement_frac=spec.exposed_basement_frac,
-            scene_context=SceneContext(
-                building=cast(Polygon, from_wkt(spec.rotated_rectangle)),
-                neighbors=[
-                    cast(Polygon, from_wkt(poly)) for poly in spec.neighbor_polys
-                ],
-                neighbor_heights=[
-                    float(h) if h is not None else 0 for h in spec.neighbor_heights
-                ],
-                orientation=spec.long_edge_angle,
-            ),
-        ),
-    )
-
-    log("Building and running model...")
-    overheating_config = (
-        spec.parent_experiment_spec.overheating_config
-        if spec.parent_experiment_spec
-        else None
-    )
-    run_result = model.run(
-        eplus_parent_dir=tempdir,
-        overheating_config=overheating_config,
-    )
-    # Validate conditioned area
-    if not np.allclose(
-        model.total_conditioned_area, spec.energy_model_conditioned_area
-    ):
-        msg = (
-            f"Total conditioned area mismatch: "
-            f"{model.total_conditioned_area} != {spec.energy_model_conditioned_area}"
-        )
-        raise ValueError(msg)
-
-    # Results Post-processing
-    # TODO: consider if we actually want all t he columns we are including.
-    feature_index = spec.make_multiindex(
-        n_rows=1, additional_index_data=spec.feature_dict
-    )
-    results = run_result.energy_and_peak.to_frame().T.set_index(feature_index)
-
-    dfs: dict[str, pd.DataFrame] = {
-        "EnergyAndPeak": results,
-    }
-    if run_result.overheating_results is not None:
-        # TODO: add feature dict to overheating df indices? Or instead of a full feature df, just add a single column with the building id?
-        edh = run_result.overheating_results.edh
-        old_ix = edh.index
-        feature_index = spec.make_multiindex(
-            n_rows=len(edh), include_sort_subindex=False
-        )
-        edh.index = feature_index
-        edh = edh.set_index(old_ix, append=True)
-        dfs["ExceedanceDegreeHours"] = edh
-
-        basic_oh = run_result.overheating_results.basic_oh
-        old_ix = basic_oh.index
-        feature_index = spec.make_multiindex(
-            n_rows=len(basic_oh), include_sort_subindex=False
-        )
-        basic_oh.index = feature_index
-        basic_oh = basic_oh.set_index(old_ix, append=True)
-        dfs["BasicOverheating"] = basic_oh
-
-        heat_index_categories = run_result.overheating_results.hi
-        old_ix = heat_index_categories.index
-        feature_index = spec.make_multiindex(
-            n_rows=len(heat_index_categories), include_sort_subindex=False
-        )
-        heat_index_categories.index = feature_index
-        heat_index_categories = heat_index_categories.set_index(old_ix, append=True)
-        dfs["HeatIndexCategories"] = heat_index_categories
-
-        consecutive_e_zone = run_result.overheating_results.consecutive_e_zone
-        # may be zero if no streaks found in any zones
-        if len(consecutive_e_zone) > 0:
-            old_ix = consecutive_e_zone.index
-            feature_index = spec.make_multiindex(
-                n_rows=len(consecutive_e_zone), include_sort_subindex=False
-            )
-            consecutive_e_zone.index = feature_index
-            consecutive_e_zone = consecutive_e_zone.set_index(old_ix, append=True)
-            dfs["ConsecutiveExceedances"] = consecutive_e_zone
-
-    hourly_data_outpath: FileReference | None = None
-
-    if spec.parent_experiment_spec and spec.parent_experiment_spec.hourly_data_config:
-        hourly_df = run_result.sql.timeseries_by_name(
-            spec.parent_experiment_spec.hourly_data_config.data,
-            reporting_frequency="Hourly",
-        )
-        hourly_df.index.names = ["Timestep"]
-        hourly_df.columns.names = ["Trash", "Group", "Meter"]
-        hourly_df: pd.DataFrame = cast(
-            pd.DataFrame,
-            hourly_df.droplevel("Trash", axis=1)
-            .stack(level="Group", future_stack=True)
-            .unstack(level="Timestep"),
-        )
-        hourly_multiindex = spec.make_multiindex(
-            n_rows=len(hourly_df), include_sort_subindex=False
-        )
-        old_ix = hourly_df.index
-        hourly_df.index = hourly_multiindex
-        hourly_df = hourly_df.set_index(old_ix, append=True)
-
-        if spec.parent_experiment_spec.hourly_data_config.does_dataframe_output:
-            for meter_name in hourly_df.columns.get_level_values("Meter").unique():
-                variable_df = hourly_df.xs(meter_name, level="Meter", axis=1)
-                dataframe_key = f"HourlyData.{meter_name.replace(' ', '')}"
-                dfs[dataframe_key] = variable_df
-        if spec.parent_experiment_spec.hourly_data_config.does_file_output:
-            hourly_data_outpath = tempdir / "outputs_hourly_data.pq"
-            hourly_df.to_parquet(hourly_data_outpath)
-
-    return GloBIOutputSpec(
-        dataframes=dfs,
-        hourly_data=hourly_data_outpath,
-    )
-
-
-@ExperimentRegistry.Register(retries=2, schedule_timeout="10h", execution_timeout="30m")
-def simulate_globi_building(
-    input_spec: GloBIBuildingSpec, tempdir: Path
-) -> GloBIOutputSpec:
-    """Simulate a GlobiSpec building and return monthly energy and peak results.
-
-    NB: this is separated from the pipeline above so the pipeline can still be used as a
-    local invocation without *too* much difficulty.
-    """
-    return simulate_globi_building_pipeline(input_spec, tempdir)
-
-
 def preprocess_gis_file(
     config: DeterministicGISPreprocessorConfig,
     file_config: "FileConfig",
@@ -522,17 +311,3 @@ def preprocess_gis_file(
         logger.info(f"saved {len(gdf)} features to {output_path}")
 
     return gdf, column_output_map
-
-
-if __name__ == "__main__":
-    import tempfile
-
-    from globi.models.tasks import MinimalBuildingSpec
-
-    with tempfile.TemporaryDirectory() as tempdir:
-        with open("inputs/building.yml") as f:
-            input_spec = MinimalBuildingSpec.model_validate(yaml.safe_load(f))
-        o = simulate_globi_building_pipeline(
-            input_spec=input_spec.globi_spec,
-            tempdir=Path(tempdir),
-        )
diff --git a/src/globi/pipelines/simulations.py b/src/globi/pipelines/simulations.py
new file mode 100644
index 0000000..dfaff88
--- /dev/null
+++ b/src/globi/pipelines/simulations.py
@@ -0,0 +1,235 @@
+"""Experiment configuration for building builder simulations."""
+
+import logging
+from pathlib import Path
+from typing import cast
+
+import numpy as np
+import pandas as pd
+import yaml
+from epinterface.geometry import (
+    SceneContext,
+    ShoeboxGeometry,
+)
+from epinterface.sbem.builder import (
+    AtticAssumptions,
+    BasementAssumptions,
+    Model,
+    construct_zone_def,
+)
+from scythe.registry import ExperimentRegistry
+from scythe.utils.filesys import FileReference
+from shapely import Polygon, from_wkt
+
+from globi.models.tasks import GloBIBuildingSpec, GloBIOutputSpec
+
+logger = logging.getLogger(__name__)
+
+
+INDEX_COLS_TO_KEEP: list[str] = [
+    "feature.geometry.long_edge",
+    "feature.geometry.short_edge",
+    "feature.geometry.orientation",
+    "feature.geometry.num_floors",
+    "feature.geometry.energy_model_conditioned_area",
+    "feature.geometry.energy_model_occupied_area",
+    "feature.semantic.Typology",
+    "feature.semantic.Age_bracket",
+    "feature.semantic.Region",
+    "feature.weather.file",
+    "feature.geometry.wwr",
+    "feature.geometry.f2f_height",
+    "feature.geometry.attic_height",
+]
+
+
+def simulate_globi_building_pipeline(
+    input_spec: GloBIBuildingSpec,
+    tempdir: Path,
+) -> GloBIOutputSpec:
+    """Simulate a GlobiSpec building and return energy and peak results.
+
+    Args:
+        input_spec: The input specification containing building parameters and file URIs
+        tempdir: Temporary directory for intermediate files
+    Returns:
+        Output specification containing a DataFrame with MultiIndex:
+        - Top level: Measurement type (Energy, Peak)
+        - Feature levels from input specification
+    """
+    spec = input_spec
+    log = logger.info
+    zone_def = construct_zone_def(
+        component_map_path=spec.component_map,
+        db_path=spec.db_path,
+        semantic_field_context=spec.semantic_field_context,
+    )
+    model = Model(
+        Weather=spec.epwzip_path,
+        Zone=zone_def,
+        Basement=BasementAssumptions(
+            Conditioned=spec.basement_is_conditioned,
+            UseFraction=spec.basement_use_fraction
+            if spec.basement_is_occupied
+            else None,
+        ),
+        Attic=AtticAssumptions(
+            Conditioned=spec.attic_is_conditioned,
+            UseFraction=spec.attic_use_fraction if spec.attic_is_occupied else None,
+        ),
+        geometry=ShoeboxGeometry(
+            x=0,
+            y=0,
+            w=spec.long_edge,
+            d=spec.short_edge,
+            h=spec.f2f_height,
+            wwr=spec.wwr,
+            num_stories=spec.num_floors,
+            basement=spec.has_basement,
+            zoning=spec.use_core_perim_zoning,
+            roof_height=spec.attic_height,
+            exposed_basement_frac=spec.exposed_basement_frac,
+            scene_context=SceneContext(
+                building=cast(Polygon, from_wkt(spec.rotated_rectangle)),
+                neighbors=[
+                    cast(Polygon, from_wkt(poly)) for poly in spec.neighbor_polys
+                ],
+                neighbor_heights=[
+                    float(h) if h is not None else 0 for h in spec.neighbor_heights
+                ],
+                orientation=spec.long_edge_angle,
+            ),
+        ),
+    )
+
+    log("Building and running model...")
+    overheating_config = (
+        spec.parent_experiment_spec.overheating_config
+        if spec.parent_experiment_spec
+        else None
+    )
+    run_result = model.run(
+        eplus_parent_dir=tempdir,
+        overheating_config=overheating_config,
+    )
+    # Validate conditioned area
+    if not np.allclose(
+        model.total_conditioned_area, spec.energy_model_conditioned_area
+    ):
+        msg = (
+            f"Total conditioned area mismatch: "
+            f"{model.total_conditioned_area} != {spec.energy_model_conditioned_area}"
+        )
+        raise ValueError(msg)
+
+    # Results Post-processing
+    # TODO: consider if we actually want all t he columns we are including.
+    feature_index = spec.make_multiindex(
+        n_rows=1, additional_index_data=spec.feature_dict
+    )
+    results = run_result.energy_and_peak.to_frame().T.set_index(feature_index)
+
+    dfs: dict[str, pd.DataFrame] = {
+        "EnergyAndPeak": results,
+    }
+    if run_result.overheating_results is not None:
+        # TODO: add feature dict to overheating df indices? Or instead of a full feature df, just add a single column with the building id?
+        edh = run_result.overheating_results.edh
+        old_ix = edh.index
+        feature_index = spec.make_multiindex(
+            n_rows=len(edh), include_sort_subindex=False
+        )
+        edh.index = feature_index
+        edh = edh.set_index(old_ix, append=True)
+        dfs["ExceedanceDegreeHours"] = edh
+
+        basic_oh = run_result.overheating_results.basic_oh
+        old_ix = basic_oh.index
+        feature_index = spec.make_multiindex(
+            n_rows=len(basic_oh), include_sort_subindex=False
+        )
+        basic_oh.index = feature_index
+        basic_oh = basic_oh.set_index(old_ix, append=True)
+        dfs["BasicOverheating"] = basic_oh
+
+        heat_index_categories = run_result.overheating_results.hi
+        old_ix = heat_index_categories.index
+        feature_index = spec.make_multiindex(
+            n_rows=len(heat_index_categories), include_sort_subindex=False
+        )
+        heat_index_categories.index = feature_index
+        heat_index_categories = heat_index_categories.set_index(old_ix, append=True)
+        dfs["HeatIndexCategories"] = heat_index_categories
+
+        consecutive_e_zone = run_result.overheating_results.consecutive_e_zone
+        # may be zero if no streaks found in any zones
+        if len(consecutive_e_zone) > 0:
+            old_ix = consecutive_e_zone.index
+            feature_index = spec.make_multiindex(
+                n_rows=len(consecutive_e_zone), include_sort_subindex=False
+            )
+            consecutive_e_zone.index = feature_index
+            consecutive_e_zone = consecutive_e_zone.set_index(old_ix, append=True)
+            dfs["ConsecutiveExceedances"] = consecutive_e_zone
+
+    hourly_data_outpath: FileReference | None = None
+
+    if spec.parent_experiment_spec and spec.parent_experiment_spec.hourly_data_config:
+        hourly_df = run_result.sql.timeseries_by_name(
+            spec.parent_experiment_spec.hourly_data_config.data,
+            reporting_frequency="Hourly",
+        )
+        hourly_df.index.names = ["Timestep"]
+        hourly_df.columns.names = ["Trash", "Group", "Meter"]
+        hourly_df: pd.DataFrame = cast(
+            pd.DataFrame,
+            hourly_df.droplevel("Trash", axis=1)
+            .stack(level="Group", future_stack=True)
+            .unstack(level="Timestep"),
+        )
+        hourly_multiindex = spec.make_multiindex(
+            n_rows=len(hourly_df), include_sort_subindex=False
+        )
+        old_ix = hourly_df.index
+        hourly_df.index = hourly_multiindex
+        hourly_df = hourly_df.set_index(old_ix, append=True)
+
+        if spec.parent_experiment_spec.hourly_data_config.does_dataframe_output:
+            for meter_name in hourly_df.columns.get_level_values("Meter").unique():
+                variable_df = hourly_df.xs(meter_name, level="Meter", axis=1)
+                dataframe_key = f"HourlyData.{meter_name.replace(' ', '')}"
+                dfs[dataframe_key] = variable_df
+        if spec.parent_experiment_spec.hourly_data_config.does_file_output:
+            hourly_data_outpath = tempdir / "outputs_hourly_data.pq"
+            hourly_df.to_parquet(hourly_data_outpath)
+
+    return GloBIOutputSpec(
+        dataframes=dfs,
+        hourly_data=hourly_data_outpath,
+    )
+
+
+@ExperimentRegistry.Register(retries=2, schedule_timeout="10h", execution_timeout="30m")
+def simulate_globi_building(
+    input_spec: GloBIBuildingSpec, tempdir: Path
+) -> GloBIOutputSpec:
+    """Simulate a GlobiSpec building and return monthly energy and peak results.
+
+    NB: this is separated from the pipeline above so the pipeline can still be used as a
+    local invocation without *too* much difficulty.
+    """
+    return simulate_globi_building_pipeline(input_spec, tempdir)
+
+
+if __name__ == "__main__":
+    import tempfile
+
+    from globi.models.tasks import MinimalBuildingSpec
+
+    with tempfile.TemporaryDirectory() as tempdir:
+        with open("inputs/building.yml") as f:
+            input_spec = MinimalBuildingSpec.model_validate(yaml.safe_load(f))
+        o = simulate_globi_building_pipeline(
+            input_spec=input_spec.globi_spec,
+            tempdir=Path(tempdir),
+        )
diff --git a/src/globi/pipelines/training.py b/src/globi/pipelines/training.py
new file mode 100644
index 0000000..f5c10f6
--- /dev/null
+++ b/src/globi/pipelines/training.py
@@ -0,0 +1,323 @@
+"""The training pipeline."""
+
+from datetime import datetime, timedelta
+from pathlib import Path
+from typing import Literal
+
+from hatchet_sdk import Context
+from pydantic import BaseModel, HttpUrl
+from scythe.base import ExperimentOutputSpec
+from scythe.experiments import (
+    BaseExperiment,
+    ExperimentRun,
+    SemVer,
+    VersionedExperiment,
+)
+from scythe.hatchet import hatchet
+from scythe.registry import ExperimentRegistry
+from scythe.scatter_gather import RecursionMap, ScatterGatherResult, scatter_gather
+
+from globi.models.surrogate.dummy import DummySimulationInput, dummy_simulation
+from globi.models.surrogate.training import SampleSpec, TrainFoldSpec, TrainWithCVSpec
+
+
+class FoldResult(ExperimentOutputSpec):
+    """The output for a fold."""
+
+    pass
+
+
+@ExperimentRegistry.Register(
+    description="Train a regressor with cross-fold validation.",
+)
+def train_regressor_with_cv_fold(
+    input_spec: TrainFoldSpec, tempdir: Path
+) -> FoldResult:
+    """Train a regressor with cross-fold validation."""
+    # DO TRAINING
+
+    return FoldResult()
+
+
+class ExperimentMetadata(BaseModel):
+    """Metadata about an experiment."""
+
+    workflow_run_id: str
+    run_id: str
+    run_name: str
+    version: SemVer
+    datetime: datetime
+
+
+class CombineResultsResult(BaseModel):
+    """The result of combining the results of the simulations."""
+
+    scatter_gather_result: ScatterGatherResult
+    combined_scatter_gather_result: ScatterGatherResult
+
+
+iterative_training = hatchet.workflow(
+    name="iterative_training",
+    description="Sample a collection of buliding simulations to then simulate and train a surrogate model.",
+    input_validator=SampleSpec,
+)
+
+
+@iterative_training.task(
+    name="iterative_training.create_simulations",
+    schedule_timeout=timedelta(minutes=30),
+    execution_timeout=timedelta(minutes=10),
+)
+def create_simulations(spec: SampleSpec, context: Context) -> ExperimentMetadata:
+    """Create the simulations."""
+    # STEP 1: Generate the training samples, allocate simulations
+    specs = [
+        DummySimulationInput(
+            a=i,
+            b=i,
+            experiment_id="placeholder",
+            sort_index=i,
+        )
+        for i in range(10)
+    ]
+
+    # STEP 2: Simulate the simulations using scythe
+    root_run_name = spec.progressive_training_spec.experiment_id
+    run_name = f"{root_run_name}/sample"
+
+    exp = BaseExperiment(
+        # TODO: replace with simulate_globi_flat_building
+        experiment=dummy_simulation,  # TODO: add configurability to switch between simulations.
+        run_name=run_name,
+        storage_settings=spec.progressive_training_spec.storage_settings,
+    )
+
+    run, ref = exp.allocate(
+        specs,
+        version="bumpmajor",  # TODO: bump minor if not the first iteration.
+        recursion_map=spec.progressive_training_spec.iteration.recursion,
+    )
+
+    run_name = run.versioned_experiment.base_experiment.run_name
+    if not run_name:
+        msg = "Run name is required."
+        raise ValueError(msg)
+    run_id = run.experiment_id
+
+    return ExperimentMetadata(
+        workflow_run_id=ref.workflow_run_id,
+        run_id=run_id,
+        run_name=run_name,
+        version=run.versioned_experiment.version,
+        datetime=run.timestamp,
+    )
+
+
+@iterative_training.task(
+    name="iterative_training.await_simulations",
+    schedule_timeout=timedelta(hours=5),
+    execution_timeout=timedelta(hours=5),
+    parents=[create_simulations],
+)
+async def await_simulations(spec: SampleSpec, context: Context) -> ScatterGatherResult:
+    """Await the simulations."""
+    parent_output = context.task_output(create_simulations)
+    workflow_run_id = parent_output.workflow_run_id
+    context.log("Awaiting simulations...")
+    results = await scatter_gather.aio_get_result(workflow_run_id)
+    context.log("Simulations completed.")
+
+    return results
+
+
+@iterative_training.task(
+    name="iterative_training.combine_results",
+    schedule_timeout=timedelta(hours=5),
+    execution_timeout=timedelta(hours=1),
+    parents=[await_simulations, create_simulations],
+)
+async def combine_results(spec: SampleSpec, context: Context) -> CombineResultsResult:
+    """Combine the results of the simulations."""
+    results = context.task_output(await_simulations)
+    run_info = context.task_output(create_simulations)
+    # TODO: kind of annoying have to reconstruct the run object here; necessary because the base experiment is not serializable.
+    _run = ExperimentRun(
+        versioned_experiment=VersionedExperiment(
+            base_experiment=BaseExperiment(
+                experiment=dummy_simulation,  # TODO: replace with simulate_globi_flat_building
+                run_name=run_info.run_name,
+                storage_settings=spec.progressive_training_spec.storage_settings,
+            ),
+            version=run_info.version,
+        ),
+        timestamp=run_info.datetime,
+    )
+    # files = run.list_results_files()
+    # TODO: configure which files to store/combine via input spec.
+    return CombineResultsResult(
+        scatter_gather_result=results,
+        combined_scatter_gather_result=results,
+    )
+
+
+class StartTrainingResult(BaseModel):
+    """The result of starting the training."""
+
+    training_spec: TrainWithCVSpec
+    experiment_metadata: ExperimentMetadata
+
+
+@iterative_training.task(
+    name="iterative_training.start_training",
+    schedule_timeout=timedelta(hours=5),
+    execution_timeout=timedelta(hours=1),
+    parents=[combine_results],
+)
+async def start_training(spec: SampleSpec, context: Context) -> StartTrainingResult:
+    """Start the training."""
+    results = context.task_output(combine_results)
+
+    train_spec = TrainWithCVSpec(
+        progressive_training_spec=spec.progressive_training_spec,
+        progressive_training_iteration_ix=spec.progressive_training_iteration_ix,
+        data_uri=results.combined_scatter_gather_result.uris[
+            "main_result"
+        ],  # TODO: should be configure which result to use
+        stage_type="train",
+    )
+
+    # TODO: create the training specs and then allocate the experiment
+
+    specs = train_spec.schedule
+
+    root_run_name = spec.progressive_training_spec.experiment_id
+    run_name = f"{root_run_name}/train"
+    exp = BaseExperiment(
+        experiment=train_regressor_with_cv_fold,
+        run_name=run_name,
+        storage_settings=spec.progressive_training_spec.storage_settings,
+    )
+    run, ref = exp.allocate(
+        specs,
+        version="bumpmajor",  # TODO: bump minor if not the first iteration.
+        recursion_map=RecursionMap(
+            factor=2,
+            max_depth=0,
+        ),
+    )
+
+    if not run.versioned_experiment.base_experiment.run_name:
+        msg = "Run name is required."
+        raise ValueError(msg)
+
+    return StartTrainingResult(
+        training_spec=train_spec,
+        experiment_metadata=ExperimentMetadata(
+            workflow_run_id=ref.workflow_run_id,
+            run_id=run.experiment_id,
+            run_name=run.versioned_experiment.base_experiment.run_name,
+            version=run.versioned_experiment.version,
+            datetime=run.timestamp,
+        ),
+    )
+
+
+@iterative_training.task(
+    name="iterative_training.await_training",
+    schedule_timeout=timedelta(hours=5),
+    execution_timeout=timedelta(hours=5),
+    parents=[start_training],
+)
+async def await_training(spec: SampleSpec, context: Context) -> ScatterGatherResult:
+    """Await the training."""
+    parent_output = context.task_output(start_training)
+    workflow_run_id = parent_output.experiment_metadata.workflow_run_id
+    context.log("Awaiting training...")
+    results = await scatter_gather.aio_get_result(workflow_run_id)
+    context.log("Training completed.")
+
+    return results
+
+
+class TrainingEvaluationResult(BaseModel):
+    """The result of evaluating the training."""
+
+    converged: bool
+
+
+class RecursionTransition(BaseModel):
+    """The transition of the recursion."""
+
+    reasoning: Literal["max_depth", "converged"] | None
+    child_workflow_run_id: str | None
+
+
+@iterative_training.task(
+    name="iterative_training.evaluate_training",
+    schedule_timeout=timedelta(hours=5),
+    execution_timeout=timedelta(minutes=5),
+    parents=[await_training],
+)
+async def evaluate_training(
+    spec: SampleSpec, context: Context
+) -> TrainingEvaluationResult:
+    """Evaluate the training."""
+    _results = context.task_output(await_training)
+    return TrainingEvaluationResult(converged=True)
+
+
+@iterative_training.task(
+    name="iterative_training.transition_recursion",
+    schedule_timeout=timedelta(hours=5),
+    execution_timeout=timedelta(minutes=5),
+    parents=[evaluate_training, start_training],
+)
+async def transition_recursion(
+    spec: SampleSpec, context: Context
+) -> RecursionTransition:
+    """Transition the recursion."""
+    results = context.task_output(evaluate_training)
+    if results.converged:
+        # create child
+        return RecursionTransition(reasoning="converged", child_workflow_run_id=None)
+    if (
+        spec.progressive_training_iteration_ix + 1
+        >= spec.progressive_training_spec.iteration.max_iters
+    ):
+        return RecursionTransition(reasoning="max_depth", child_workflow_run_id=None)
+
+    start_training_output = context.task_output(start_training)
+
+    sample_spec = SampleSpec(
+        progressive_training_spec=spec.progressive_training_spec,
+        progressive_training_iteration_ix=spec.progressive_training_iteration_ix + 1,
+        data_uri=start_training_output.training_spec.data_uri,
+        stage_type="sample",
+    )
+
+    ref = await iterative_training.aio_run_no_wait(
+        sample_spec,
+    )
+    return RecursionTransition(
+        reasoning=None, child_workflow_run_id=ref.workflow_run_id
+    )
+
+
+if __name__ == "__main__":
+    from scythe.settings import ScytheStorageSettings
+
+    from globi.models.surrogate.training import ProgressiveTrainingSpec
+
+    progressive_training_spec = ProgressiveTrainingSpec(
+        experiment_id="test-experiment",
+        gis_uri=HttpUrl("https://example.com/gis.parquet"),
+        storage_settings=ScytheStorageSettings(),
+    )
+    spec = SampleSpec(
+        progressive_training_spec=progressive_training_spec,
+        progressive_training_iteration_ix=0,
+        data_uri=None,
+        stage_type="sample",
+    )
+    result = iterative_training.run(spec)
+    print(result)
diff --git a/src/globi/worker/Dockerfile b/src/globi/worker/Dockerfile
index 321b76b..a5c6bc6 100644
--- a/src/globi/worker/Dockerfile
+++ b/src/globi/worker/Dockerfile
@@ -93,7 +93,8 @@ RUN EP_VERSION_DASH=$(echo "${EP_VERSION}" | tr '.' '-') && \
 WORKDIR /code
 COPY uv.lock pyproject.toml README.md /code/
 
-RUN uv sync --locked --no-install-project --extra cli
+# TODO: only insttall ml for certain containers by passing in a flag to the docker build command
+RUN uv sync --locked --no-install-project --extra cli --extra ml
 
 RUN uv run epi prisma generate
 
diff --git a/src/globi/worker/main.py b/src/globi/worker/main.py
index 4a38a4a..ee7eb38 100644
--- a/src/globi/worker/main.py
+++ b/src/globi/worker/main.py
@@ -1,16 +1,35 @@
 """Worker main script."""
 
+from scythe.hatchet import hatchet
+from scythe.registry import ExperimentRegistry
+from scythe.scatter_gather import scatter_gather
 from scythe.worker import ScytheWorkerConfig
 
 from globi.pipelines import *  # noqa: F403
+from globi.pipelines import iterative_training
 
 conf = ScytheWorkerConfig()
 
 
 def main():
     """Main function for the worker."""
-    conf.start()
+    # TODO: this is required since scythe does not allow registering extra tasks/workflows at the moment.
+    worker = hatchet.worker(
+        name=conf.computed_name,
+        slots=conf.computed_slots,
+        durable_slots=conf.computed_durable_slots,
+        labels=conf.labels,
+    )
+    workflows = ([scatter_gather] if conf.DOES_FAN else []) + (
+        ExperimentRegistry.experiments() if conf.DOES_LEAF else []
+    )
+    for workflow in workflows:
+        worker.register_workflow(workflow)
+    worker.register_workflow(iterative_training)
+    worker.start()
+
+    # conf.start()
 
 
 if __name__ == "__main__":
-    conf.start()
+    main()
diff --git a/uv.lock b/uv.lock
index 73b2575..2cf86e0 100644
--- a/uv.lock
+++ b/uv.lock
@@ -1379,6 +1379,10 @@ cli = [
     { name = "click" },
     { name = "xlsxwriter" },
 ]
+ml = [
+    { name = "lightgbm" },
+    { name = "xgboost" },
+]
 visualization = [
     { name = "bokeh" },
     { name = "folium" },
@@ -1421,6 +1425,7 @@ requires-dist = [
     { name = "folium", marker = "extra == 'visualization'", specifier = ">=0.15.0" },
     { name = "geopandas", specifier = ">=0.14.0" },
     { name = "ladybug-core", specifier = ">=0.44.29" },
+    { name = "lightgbm", marker = "extra == 'ml'", specifier = ">=4.6.0" },
     { name = "matplotlib", marker = "extra == 'visualization'", specifier = ">=3.8.0" },
     { name = "numpy", specifier = ">=1.26.0" },
     { name = "pandas", specifier = ">=2.1.0" },
@@ -1435,9 +1440,10 @@ requires-dist = [
     { name = "seaborn", marker = "extra == 'visualization'", specifier = ">=0.13.0" },
     { name = "shapely", specifier = ">=2.0.0" },
     { name = "streamlit", marker = "extra == 'visualization'", specifier = ">=1.28.0" },
+    { name = "xgboost", marker = "extra == 'ml'", specifier = ">=3.2.0" },
     { name = "xlsxwriter", marker = "extra == 'cli'", specifier = ">=3.2.9" },
 ]
-provides-extras = ["visualization", "cli"]
+provides-extras = ["visualization", "ml", "cli"]
 
 [package.metadata.requires-dev]
 dev = [
@@ -2156,6 +2162,23 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/d4/52/7b8421a8ace22a17ae77dd9a8367e916364ed8be72502cb744805f06d6ac/ladybug_geometry-1.34.14-py3-none-any.whl", hash = "sha256:af91ee9285333ca1ddfaf439530306dff7f0a891cae40d4dc5491f139fcf7d36", size = 198221, upload-time = "2025-11-07T04:16:46.986Z" },
 ]
 
+[[package]]
+name = "lightgbm"
+version = "4.6.0"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "numpy" },
+    { name = "scipy" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/68/0b/a2e9f5c5da7ef047cc60cef37f86185088845e8433e54d2e7ed439cce8a3/lightgbm-4.6.0.tar.gz", hash = "sha256:cb1c59720eb569389c0ba74d14f52351b573af489f230032a1c9f314f8bab7fe", size = 1703705, upload-time = "2025-02-15T04:03:03.111Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/f2/75/cffc9962cca296bc5536896b7e65b4a7cdeb8db208e71b9c0133c08f8f7e/lightgbm-4.6.0-py3-none-macosx_10_15_x86_64.whl", hash = "sha256:b7a393de8a334d5c8e490df91270f0763f83f959574d504c7ccb9eee4aef70ed", size = 2010151, upload-time = "2025-02-15T04:02:50.961Z" },
+    { url = "https://files.pythonhosted.org/packages/21/1b/550ee378512b78847930f5d74228ca1fdba2a7fbdeaac9aeccc085b0e257/lightgbm-4.6.0-py3-none-macosx_12_0_arm64.whl", hash = "sha256:2dafd98d4e02b844ceb0b61450a660681076b1ea6c7adb8c566dfd66832aafad", size = 1592172, upload-time = "2025-02-15T04:02:53.937Z" },
+    { url = "https://files.pythonhosted.org/packages/64/41/4fbde2c3d29e25ee7c41d87df2f2e5eda65b431ee154d4d462c31041846c/lightgbm-4.6.0-py3-none-manylinux2014_aarch64.whl", hash = "sha256:4d68712bbd2b57a0b14390cbf9376c1d5ed773fa2e71e099cac588703b590336", size = 3454567, upload-time = "2025-02-15T04:02:56.443Z" },
+    { url = "https://files.pythonhosted.org/packages/42/86/dabda8fbcb1b00bcfb0003c3776e8ade1aa7b413dff0a2c08f457dace22f/lightgbm-4.6.0-py3-none-manylinux_2_28_x86_64.whl", hash = "sha256:cb19b5afea55b5b61cbb2131095f50538bd608a00655f23ad5d25ae3e3bf1c8d", size = 3569831, upload-time = "2025-02-15T04:02:58.925Z" },
+    { url = "https://files.pythonhosted.org/packages/5e/23/f8b28ca248bb629b9e08f877dd2965d1994e1674a03d67cd10c5246da248/lightgbm-4.6.0-py3-none-win_amd64.whl", hash = "sha256:37089ee95664b6550a7189d887dbf098e3eadab03537e411f52c63c121e3ba4b", size = 1451509, upload-time = "2025-02-15T04:03:01.515Z" },
+]
+
 [[package]]
 name = "littleutils"
 version = "0.2.4"
@@ -2992,6 +3015,15 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/67/0e/35082d13c09c02c011cf21570543d202ad929d961c02a147493cb0c2bdf5/numpy-2.2.6-cp313-cp313t-win_amd64.whl", hash = "sha256:6031dd6dfecc0cf9f668681a37648373bddd6421fff6c66ec1624eed0180ee06", size = 12771374, upload-time = "2025-05-17T21:43:35.479Z" },
 ]
 
+[[package]]
+name = "nvidia-nccl-cu12"
+version = "2.29.7"
+source = { registry = "https://pypi.org/simple" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/20/cc/f48875411d1f176bce58e6343fd5d4131fc1db5420719ff25944bdc006c6/nvidia_nccl_cu12-2.29.7-py3-none-manylinux_2_18_aarch64.whl", hash = "sha256:0cf032ee22b560447daf0456108a75e32bd74a4de6c6b64725637a359fa48cd8", size = 293563644, upload-time = "2026-03-03T05:34:46.166Z" },
+    { url = "https://files.pythonhosted.org/packages/31/1e/9e366f36efc550f07d6737f199e3f6bffafdf28795d007f10a77dd274f5c/nvidia_nccl_cu12-2.29.7-py3-none-manylinux_2_18_x86_64.whl", hash = "sha256:ecd0a012051abc20c1aa87328841efa8cade3ced65803046e38c2f03c0891fea", size = 293633942, upload-time = "2026-03-03T05:37:05.625Z" },
+]
+
 [[package]]
 name = "openpyxl"
 version = "3.1.5"
@@ -4786,6 +4818,24 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/3f/0e/fa3b193432cfc60c93b42f3be03365f5f909d2b3ea410295cf36df739e31/widgetsnbextension-4.0.15-py3-none-any.whl", hash = "sha256:8156704e4346a571d9ce73b84bee86a29906c9abfd7223b7228a28899ccf3366", size = 2196503, upload-time = "2025-11-01T21:15:53.565Z" },
 ]
 
+[[package]]
+name = "xgboost"
+version = "3.2.0"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "numpy" },
+    { name = "nvidia-nccl-cu12", marker = "sys_platform == 'linux'" },
+    { name = "scipy" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/91/bb/1eb0242409d22db725d7a88088e6cfd6556829fb0736f9ff69aa9f1e9455/xgboost-3.2.0.tar.gz", hash = "sha256:99b0e9a2a64896cdaf509c5e46372d336c692406646d20f2af505003c0c5d70d", size = 1263936, upload-time = "2026-02-10T11:03:05.542Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/2d/49/6e4cdd877c24adf56cb3586bc96d93d4dcd780b5ea1efb32e1ee0de08bae/xgboost-3.2.0-py3-none-macosx_10_15_x86_64.whl", hash = "sha256:2f661966d3e322536d9c448090a870fcba1e32ee5760c10b7c46bac7a342079a", size = 2507014, upload-time = "2026-02-10T10:50:57.44Z" },
+    { url = "https://files.pythonhosted.org/packages/93/f1/c09ef1add609453aa3ba5bafcd0d1c1a805c1263c0b60138ec968f8ec296/xgboost-3.2.0-py3-none-macosx_12_0_arm64.whl", hash = "sha256:eabbd40d474b8dbf6cb3536325f9150b9e6f0db32d18de9914fb3227d0bef5b7", size = 2328527, upload-time = "2026-02-10T10:51:17.502Z" },
+    { url = "https://files.pythonhosted.org/packages/96/9f/d9914a7b8df842832850b1a18e5f47aaa071c217cdd1da2ae9deb291018b/xgboost-3.2.0-py3-none-manylinux_2_28_aarch64.whl", hash = "sha256:852eabc6d3b3702a59bf78dbfdcd1cb9c4d3a3b6e5ed1f8781d8b9512354fdd2", size = 131100954, upload-time = "2026-02-10T11:02:42.704Z" },
+    { url = "https://files.pythonhosted.org/packages/79/98/679de17c2caa4fd3b0b4386ecf7377301702cb0afb22930a07c142fcb1d8/xgboost-3.2.0-py3-none-manylinux_2_28_x86_64.whl", hash = "sha256:99b4a6bbcb47212fec5cf5fbe12347215f073c08967431b0122cfbd1ee70312c", size = 131748579, upload-time = "2026-02-10T10:54:40.424Z" },
+    { url = "https://files.pythonhosted.org/packages/1f/3d/1661dd114a914a67e3f7ab66fa1382e7599c2a8c340f314ad30a3e2b4d08/xgboost-3.2.0-py3-none-win_amd64.whl", hash = "sha256:0d169736fd836fc13646c7ab787167b3a8110351c2c6bc770c755ee1618f0442", size = 101681668, upload-time = "2026-02-10T10:59:31.202Z" },
+]
+
 [[package]]
 name = "xlsxwriter"
 version = "3.2.9"

From 8a7d36ec4962dfd1e4f86775902b57517f54b032 Mon Sep 17 00:00:00 2001
From: Sam Wolk <36545842+szvsw@users.noreply.github.com>
Date: Sat, 7 Mar 2026 13:25:08 -0500
Subject: [PATCH 02/27] use scythe to manage outer loop experiment

---
 pyproject.toml                         |   2 +-
 src/globi/models/surrogate/training.py |  99 +++++++--------
 src/globi/pipelines/training.py        | 168 +++++++++++++------------
 uv.lock                                |   8 +-
 4 files changed, 137 insertions(+), 140 deletions(-)

diff --git a/pyproject.toml b/pyproject.toml
index 6f33108..8315ab7 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -85,7 +85,7 @@ globi = "globi.tools.cli.main:cli"
 
 [tool.uv.sources]
 # scythe-engine = {git = "https://github.com/szvsw/scythe", branch = "feature/allow-optional-filerefs"}
-# scythe-engine = {git = "https://github.com/szvsw/scythe", branch = "feature/update-hatchet"}
+scythe-engine = {git = "https://github.com/szvsw/scythe", branch = "feature/allow-versioning-workflows"}
 # scythe-engine = {path = "../scythe", editable = true}
 # epinterface = {path = "../epinterface", editable = true}
 # epinterface = {path = "epinterface", editable = true}
diff --git a/src/globi/models/surrogate/training.py b/src/globi/models/surrogate/training.py
index e695715..760bacf 100644
--- a/src/globi/models/surrogate/training.py
+++ b/src/globi/models/surrogate/training.py
@@ -6,10 +6,9 @@
 
 import numpy as np
 import pandas as pd
-from pydantic import BaseModel, Field, model_validator
-from scythe.base import BaseSpec, ExperimentInputSpec
+from pydantic import BaseModel, Field
+from scythe.base import ExperimentInputSpec
 from scythe.scatter_gather import RecursionMap
-from scythe.settings import ScytheStorageSettings
 from scythe.utils.filesys import FileReference, OptionalFileReference
 
 if TYPE_CHECKING:
@@ -155,12 +154,25 @@ class IterationSpec(BaseModel):
         default_factory=lambda: RecursionMap(factor=100, max_depth=1),
         description="The recursion spec.",
     )
+    current_iter: int = Field(
+        default=0,
+        description="The index of the current training iteration within the outer loop.",
+    )
+
+    @property
+    def at_max_iters(self) -> bool:
+        """Whether the current iteration is the maximum number of iterations."""
+        return self.current_iter + 1 >= self.max_iters
 
 
 # TODO: should this be a subclass of ExperimentInputSpec?
-class ProgressiveTrainingSpec(BaseSpec):
+class ProgressiveTrainingSpec(ExperimentInputSpec):
     """A spec for iteratively training an SBEM regression model."""
 
+    base_run_name: str = Field(
+        ...,
+        description="The base run name for the experiment.",
+    )
     convergence_criteria: ConvergenceThresholds = Field(
         default_factory=ConvergenceThresholds,
         description="The convergence criteria.",
@@ -185,9 +197,9 @@ class ProgressiveTrainingSpec(BaseSpec):
         ...,
         description="The uri of the gis data to train on.",
     )
-    storage_settings: ScytheStorageSettings = Field(
-        default=...,
-        description="The storage settings to use.",
+    data_uri: OptionalFileReference = Field(
+        ...,
+        description="The uris of the previous simulation results to sample from.",
     )
 
     @property
@@ -223,27 +235,15 @@ def gis_data(self) -> pd.DataFrame:
 class StageSpec(BaseModel):
     """A spec that is common to both the sample and train stages (and possibly others)."""
 
-    progressive_training_spec: ProgressiveTrainingSpec = Field(
+    parent: ProgressiveTrainingSpec = Field(
         ...,
-        description="The progressive training spec.",
-    )
-    progressive_training_iteration_ix: int = Field(
-        ...,
-        description="The index of the current training iteration within the outer loop.",
-    )
-    data_uri: OptionalFileReference = Field(
-        ...,
-        description="The uris of the previous simulation results to sample from.",
-    )
-    stage_type: Literal["sample", "train"] = Field(
-        ...,
-        description="The type of stage.",
+        description="The parent spec.",
     )
 
     @cached_property
     def random_generator(self) -> np.random.Generator:
         """The random generator."""
-        return np.random.default_rng(self.progressive_training_iteration_ix)
+        return np.random.default_rng(self.parent.iteration.current_iter)
 
     # @cached_property
     # def experiment_key(self) -> str:
@@ -277,16 +277,16 @@ def random_generator(self) -> np.random.Generator:
 
 
 class SampleSpec(StageSpec):
-    """A spec for thhe sampling stage of the progressive training."""
+    """A spec for the sampling stage of the progressive training."""
 
     # TODO: add the ability to receive the last set of error metrics and use them to inform the sampling
 
     def stratified_selection(self) -> pd.DataFrame:
         """Sample the gis data."""
-        df = self.progressive_training_spec.gis_data
+        df = self.parent.gis_data
 
-        stratification_field = self.progressive_training_spec.stratification.field
-        stratification_aliases = self.progressive_training_spec.stratification.aliases
+        stratification_field = self.parent.stratification.field
+        stratification_aliases = self.parent.stratification.aliases
 
         if stratification_field not in df.columns and not any(
             alias in df.columns for alias in stratification_aliases
@@ -301,16 +301,16 @@ def stratified_selection(self) -> pd.DataFrame:
 
         strata = cast(list[str], df[stratification_field].unique().tolist())
 
-        if self.progressive_training_spec.stratification.sampling == "equal":
+        if self.parent.stratification.sampling == "equal":
             return self.sample_equally_by_stratum(df, strata, stratification_field)
-        elif self.progressive_training_spec.stratification.sampling == "error-weighted":
+        elif self.parent.stratification.sampling == "error-weighted":
             msg = "Error-weighted sampling is not yet implemented."
             raise NotImplementedError(msg)
-        elif self.progressive_training_spec.stratification.sampling == "proportional":
+        elif self.parent.stratification.sampling == "proportional":
             msg = "Proportional sampling is not yet implemented."
             raise NotImplementedError(msg)
         else:
-            msg = f"Invalid sampling method: {self.progressive_training_spec.stratification.sampling}"
+            msg = f"Invalid sampling method: {self.parent.stratification.sampling}"
             raise ValueError(msg)
 
     def sample_equally_by_stratum(
@@ -332,15 +332,15 @@ def sample_equally_by_stratum(
             stratum: df[df[stratification_field] == stratum] for stratum in strata
         }
         n_per_iter = (
-            self.progressive_training_spec.iteration.n_per_iter
-            if self.progressive_training_iteration_ix != 0
-            else self.progressive_training_spec.iteration.n_init
+            self.parent.iteration.n_per_iter
+            if self.parent.iteration.current_iter != 0
+            else self.parent.iteration.n_init
         )
         n_per_stratum = max(
             n_per_iter // len(strata),
             (
-                self.progressive_training_spec.iteration.min_per_stratum
-                if self.progressive_training_iteration_ix == 0
+                self.parent.iteration.min_per_stratum
+                if self.parent.iteration.current_iter == 0
                 else 0
             ),
         )
@@ -496,14 +496,6 @@ def to_sim_specs(self, df: pd.DataFrame):
     #         s3_client.upload_file(fpath.as_posix(), bucket, key)
     #     return specs_uri
 
-    @model_validator(mode="after")
-    def check_stage(self):
-        """The sampling spec must have stage set to 'sample'."""
-        if self.stage_type != "sample":
-            msg = f"Invalid stage: {self.stage_type}"
-            raise ValueError(msg)
-        return self
-
 
 class TrainFoldSpec(ExperimentInputSpec):
     """Train an sbem model for a specific fold.
@@ -1005,13 +997,10 @@ def non_numeric_options(self) -> dict[str, list[str]]:
 class TrainWithCVSpec(StageSpec):
     """Train an SBEM model using a scatter gather approach for cross-fold validation."""
 
-    @model_validator(mode="after")
-    def check_stage(self):
-        """The training spec must have stage set to 'train'."""
-        if self.stage_type != "train":
-            msg = f"Invalid stage: {self.stage_type}"
-            raise ValueError(msg)
-        return self
+    data_uri: FileReference = Field(
+        ...,
+        description="The uri of the data to train on.",
+    )
 
     @property
     def schedule(self) -> list[TrainFoldSpec]:
@@ -1022,17 +1011,17 @@ def schedule(self) -> list[TrainFoldSpec]:
             msg = "Data URI is required for training."
             raise ValueError(msg)
 
-        for i in range(self.progressive_training_spec.cross_val.n_folds):
+        for i in range(self.parent.cross_val.n_folds):
             schedule.append(
                 TrainFoldSpec(
                     # TODO: this should be set in a better manner
                     experiment_id="placeholder",
                     sort_index=i,
-                    n_folds=self.progressive_training_spec.cross_val.n_folds,
+                    n_folds=self.parent.cross_val.n_folds,
                     data_uri=data_uri,
-                    stratification_field=self.progressive_training_spec.stratification.field,
-                    progressive_training_iter_ix=self.progressive_training_iteration_ix,
-                    storage_settings=self.progressive_training_spec.storage_settings,
+                    stratification_field=self.parent.stratification.field,
+                    progressive_training_iter_ix=self.parent.iteration.current_iter,
+                    storage_settings=self.parent.storage_settings,
                 )
             )
         return schedule
diff --git a/src/globi/pipelines/training.py b/src/globi/pipelines/training.py
index f5c10f6..1fba256 100644
--- a/src/globi/pipelines/training.py
+++ b/src/globi/pipelines/training.py
@@ -1,6 +1,6 @@
 """The training pipeline."""
 
-from datetime import datetime, timedelta
+from datetime import timedelta
 from pathlib import Path
 from typing import Literal
 
@@ -10,15 +10,18 @@
 from scythe.experiments import (
     BaseExperiment,
     ExperimentRun,
-    SemVer,
-    VersionedExperiment,
 )
 from scythe.hatchet import hatchet
 from scythe.registry import ExperimentRegistry
 from scythe.scatter_gather import RecursionMap, ScatterGatherResult, scatter_gather
 
 from globi.models.surrogate.dummy import DummySimulationInput, dummy_simulation
-from globi.models.surrogate.training import SampleSpec, TrainFoldSpec, TrainWithCVSpec
+from globi.models.surrogate.training import (
+    IterationSpec,
+    ProgressiveTrainingSpec,
+    TrainFoldSpec,
+    TrainWithCVSpec,
+)
 
 
 class FoldResult(ExperimentOutputSpec):
@@ -39,16 +42,6 @@ def train_regressor_with_cv_fold(
     return FoldResult()
 
 
-class ExperimentMetadata(BaseModel):
-    """Metadata about an experiment."""
-
-    workflow_run_id: str
-    run_id: str
-    run_name: str
-    version: SemVer
-    datetime: datetime
-
-
 class CombineResultsResult(BaseModel):
     """The result of combining the results of the simulations."""
 
@@ -59,16 +52,25 @@ class CombineResultsResult(BaseModel):
 iterative_training = hatchet.workflow(
     name="iterative_training",
     description="Sample a collection of buliding simulations to then simulate and train a surrogate model.",
-    input_validator=SampleSpec,
+    input_validator=ProgressiveTrainingSpec,
 )
 
 
+class ExperimentRunWithRef(BaseModel):
+    """An experiment run with a workflow run id."""
+
+    run: ExperimentRun
+    workflow_run_id: str
+
+
 @iterative_training.task(
     name="iterative_training.create_simulations",
     schedule_timeout=timedelta(minutes=30),
     execution_timeout=timedelta(minutes=10),
 )
-def create_simulations(spec: SampleSpec, context: Context) -> ExperimentMetadata:
+def create_simulations(
+    spec: ProgressiveTrainingSpec, context: Context
+) -> ExperimentRunWithRef:
     """Create the simulations."""
     # STEP 1: Generate the training samples, allocate simulations
     specs = [
@@ -82,34 +84,29 @@ def create_simulations(spec: SampleSpec, context: Context) -> ExperimentMetadata
     ]
 
     # STEP 2: Simulate the simulations using scythe
-    root_run_name = spec.progressive_training_spec.experiment_id
-    run_name = f"{root_run_name}/sample"
+    run_name = f"{spec.experiment_id}/sample"
 
     exp = BaseExperiment(
         # TODO: replace with simulate_globi_flat_building
         experiment=dummy_simulation,  # TODO: add configurability to switch between simulations.
         run_name=run_name,
-        storage_settings=spec.progressive_training_spec.storage_settings,
+        storage_settings=spec.storage_settings or ScytheStorageSettings(),
     )
 
     run, ref = exp.allocate(
         specs,
-        version="bumpmajor",  # TODO: bump minor if not the first iteration.
-        recursion_map=spec.progressive_training_spec.iteration.recursion,
+        version="bumpmajor",  # TODO: bump minor if not the first iteration; actually, not necessary since root experiment takes care of this
+        recursion_map=spec.iteration.recursion,
     )
 
     run_name = run.versioned_experiment.base_experiment.run_name
     if not run_name:
         msg = "Run name is required."
         raise ValueError(msg)
-    run_id = run.experiment_id
 
-    return ExperimentMetadata(
+    return ExperimentRunWithRef(
+        run=run,
         workflow_run_id=ref.workflow_run_id,
-        run_id=run_id,
-        run_name=run_name,
-        version=run.versioned_experiment.version,
-        datetime=run.timestamp,
     )
 
 
@@ -119,7 +116,9 @@ def create_simulations(spec: SampleSpec, context: Context) -> ExperimentMetadata
     execution_timeout=timedelta(hours=5),
     parents=[create_simulations],
 )
-async def await_simulations(spec: SampleSpec, context: Context) -> ScatterGatherResult:
+async def await_simulations(
+    spec: ProgressiveTrainingSpec, context: Context
+) -> ScatterGatherResult:
     """Await the simulations."""
     parent_output = context.task_output(create_simulations)
     workflow_run_id = parent_output.workflow_run_id
@@ -136,22 +135,14 @@ async def await_simulations(spec: SampleSpec, context: Context) -> ScatterGather
     execution_timeout=timedelta(hours=1),
     parents=[await_simulations, create_simulations],
 )
-async def combine_results(spec: SampleSpec, context: Context) -> CombineResultsResult:
+async def combine_results(
+    spec: ProgressiveTrainingSpec, context: Context
+) -> CombineResultsResult:
     """Combine the results of the simulations."""
     results = context.task_output(await_simulations)
     run_info = context.task_output(create_simulations)
     # TODO: kind of annoying have to reconstruct the run object here; necessary because the base experiment is not serializable.
-    _run = ExperimentRun(
-        versioned_experiment=VersionedExperiment(
-            base_experiment=BaseExperiment(
-                experiment=dummy_simulation,  # TODO: replace with simulate_globi_flat_building
-                run_name=run_info.run_name,
-                storage_settings=spec.progressive_training_spec.storage_settings,
-            ),
-            version=run_info.version,
-        ),
-        timestamp=run_info.datetime,
-    )
+    _run = run_info.run
     # files = run.list_results_files()
     # TODO: configure which files to store/combine via input spec.
     return CombineResultsResult(
@@ -164,7 +155,7 @@ class StartTrainingResult(BaseModel):
     """The result of starting the training."""
 
     training_spec: TrainWithCVSpec
-    experiment_metadata: ExperimentMetadata
+    experiment_run_with_ref: ExperimentRunWithRef
 
 
 @iterative_training.task(
@@ -173,29 +164,28 @@ class StartTrainingResult(BaseModel):
     execution_timeout=timedelta(hours=1),
     parents=[combine_results],
 )
-async def start_training(spec: SampleSpec, context: Context) -> StartTrainingResult:
+async def start_training(
+    spec: ProgressiveTrainingSpec, context: Context
+) -> StartTrainingResult:
     """Start the training."""
     results = context.task_output(combine_results)
 
     train_spec = TrainWithCVSpec(
-        progressive_training_spec=spec.progressive_training_spec,
-        progressive_training_iteration_ix=spec.progressive_training_iteration_ix,
+        parent=spec,
         data_uri=results.combined_scatter_gather_result.uris[
             "main_result"
         ],  # TODO: should be configure which result to use
-        stage_type="train",
     )
 
     # TODO: create the training specs and then allocate the experiment
 
     specs = train_spec.schedule
 
-    root_run_name = spec.progressive_training_spec.experiment_id
-    run_name = f"{root_run_name}/train"
+    run_name = f"{spec.experiment_id}/train"
     exp = BaseExperiment(
         experiment=train_regressor_with_cv_fold,
         run_name=run_name,
-        storage_settings=spec.progressive_training_spec.storage_settings,
+        storage_settings=spec.storage_settings or ScytheStorageSettings(),
     )
     run, ref = exp.allocate(
         specs,
@@ -212,12 +202,9 @@ async def start_training(spec: SampleSpec, context: Context) -> StartTrainingRes
 
     return StartTrainingResult(
         training_spec=train_spec,
-        experiment_metadata=ExperimentMetadata(
+        experiment_run_with_ref=ExperimentRunWithRef(
+            run=run,
             workflow_run_id=ref.workflow_run_id,
-            run_id=run.experiment_id,
-            run_name=run.versioned_experiment.base_experiment.run_name,
-            version=run.versioned_experiment.version,
-            datetime=run.timestamp,
         ),
     )
 
@@ -228,10 +215,12 @@ async def start_training(spec: SampleSpec, context: Context) -> StartTrainingRes
     execution_timeout=timedelta(hours=5),
     parents=[start_training],
 )
-async def await_training(spec: SampleSpec, context: Context) -> ScatterGatherResult:
+async def await_training(
+    spec: ProgressiveTrainingSpec, context: Context
+) -> ScatterGatherResult:
     """Await the training."""
     parent_output = context.task_output(start_training)
-    workflow_run_id = parent_output.experiment_metadata.workflow_run_id
+    workflow_run_id = parent_output.experiment_run_with_ref.workflow_run_id
     context.log("Awaiting training...")
     results = await scatter_gather.aio_get_result(workflow_run_id)
     context.log("Training completed.")
@@ -259,11 +248,11 @@ class RecursionTransition(BaseModel):
     parents=[await_training],
 )
 async def evaluate_training(
-    spec: SampleSpec, context: Context
+    spec: ProgressiveTrainingSpec, context: Context
 ) -> TrainingEvaluationResult:
     """Evaluate the training."""
     _results = context.task_output(await_training)
-    return TrainingEvaluationResult(converged=True)
+    return TrainingEvaluationResult(converged=False)
 
 
 @iterative_training.task(
@@ -273,30 +262,35 @@ async def evaluate_training(
     parents=[evaluate_training, start_training],
 )
 async def transition_recursion(
-    spec: SampleSpec, context: Context
+    spec: ProgressiveTrainingSpec, context: Context
 ) -> RecursionTransition:
     """Transition the recursion."""
     results = context.task_output(evaluate_training)
     if results.converged:
         # create child
         return RecursionTransition(reasoning="converged", child_workflow_run_id=None)
-    if (
-        spec.progressive_training_iteration_ix + 1
-        >= spec.progressive_training_spec.iteration.max_iters
-    ):
+    if spec.iteration.at_max_iters:
         return RecursionTransition(reasoning="max_depth", child_workflow_run_id=None)
 
     start_training_output = context.task_output(start_training)
 
-    sample_spec = SampleSpec(
-        progressive_training_spec=spec.progressive_training_spec,
-        progressive_training_iteration_ix=spec.progressive_training_iteration_ix + 1,
-        data_uri=start_training_output.training_spec.data_uri,
-        stage_type="sample",
+    next_spec = spec.model_copy(deep=True)
+    next_spec.iteration.current_iter += 1
+    next_spec.data_uri = (
+        start_training_output.training_spec.data_uri
+    )  # or could be from combined
+    exp = BaseExperiment(
+        experiment=iterative_training,
+        run_name=f"{next_spec.base_run_name}",
+        storage_settings=spec.storage_settings or ScytheStorageSettings(),
     )
-
-    ref = await iterative_training.aio_run_no_wait(
-        sample_spec,
+    _run, ref = exp.allocate(
+        next_spec,
+        version="bumpminor",
+        recursion_map=RecursionMap(
+            factor=2,
+            max_depth=0,
+        ),
     )
     return RecursionTransition(
         reasoning=None, child_workflow_run_id=ref.workflow_run_id
@@ -308,16 +302,34 @@ async def transition_recursion(
 
     from globi.models.surrogate.training import ProgressiveTrainingSpec
 
+    base_run_name = "test-experiment"
     progressive_training_spec = ProgressiveTrainingSpec(
-        experiment_id="test-experiment",
+        sort_index=0,
+        experiment_id="placeholder",
         gis_uri=HttpUrl("https://example.com/gis.parquet"),
+        iteration=IterationSpec(
+            max_iters=4,
+        ),
         storage_settings=ScytheStorageSettings(),
-    )
-    spec = SampleSpec(
-        progressive_training_spec=progressive_training_spec,
-        progressive_training_iteration_ix=0,
         data_uri=None,
-        stage_type="sample",
+        base_run_name=base_run_name,
+    )
+
+    exp = BaseExperiment(
+        experiment=iterative_training,
+        run_name="test-experiment",
     )
-    result = iterative_training.run(spec)
-    print(result)
+
+    run, ref = exp.allocate(
+        progressive_training_spec,
+        version="bumpmajor",
+        recursion_map=RecursionMap(
+            factor=2,
+            max_depth=0,
+        ),
+    )
+    import yaml
+
+    print(yaml.dump(run.model_dump(mode="json"), indent=2, sort_keys=False))
+    # result = iterative_training.run(spec)
+    # print(result)
diff --git a/uv.lock b/uv.lock
index 2cf86e0..e9ec544 100644
--- a/uv.lock
+++ b/uv.lock
@@ -1436,7 +1436,7 @@ requires-dist = [
     { name = "rasterio", marker = "extra == 'visualization'", specifier = ">=1.3.9" },
     { name = "scikit-learn", specifier = ">=1.3.0" },
     { name = "scipy", specifier = ">=1.11.0,<1.15" },
-    { name = "scythe-engine", specifier = ">=0.1.2" },
+    { name = "scythe-engine", git = "https://github.com/szvsw/scythe?branch=feature%2Fallow-versioning-workflows" },
     { name = "seaborn", marker = "extra == 'visualization'", specifier = ">=0.13.0" },
     { name = "shapely", specifier = ">=2.0.0" },
     { name = "streamlit", marker = "extra == 'visualization'", specifier = ">=1.28.0" },
@@ -4260,7 +4260,7 @@ wheels = [
 [[package]]
 name = "scythe-engine"
 version = "0.1.2"
-source = { registry = "https://pypi.org/simple" }
+source = { git = "https://github.com/szvsw/scythe?branch=feature%2Fallow-versioning-workflows#a750f3f18525dde2ebb73c9ad0629cba5ede77a1" }
 dependencies = [
     { name = "boto3" },
     { name = "fastparquet" },
@@ -4273,10 +4273,6 @@ dependencies = [
     { name = "tables" },
     { name = "tqdm" },
 ]
-sdist = { url = "https://files.pythonhosted.org/packages/ac/00/061a3e1301b03b3b1c6817ea5db19cc62c0448c02c504e391d49273451c2/scythe_engine-0.1.2.tar.gz", hash = "sha256:a53c49a8a8700f1dfd7a61f4868898289c1d3751b42ca767369faf7a3c08dc5e", size = 225628, upload-time = "2026-02-12T15:53:02.416Z" }
-wheels = [
-    { url = "https://files.pythonhosted.org/packages/d3/69/1cfac0fe0aa049d335f2ff6a3aeef32cc7893551ffe831e4d78ccde50b7b/scythe_engine-0.1.2-py3-none-any.whl", hash = "sha256:b2dd6924c0b26a1dfe9a68e9f6b028b77a944263849c82d41a28e635baf899d8", size = 33195, upload-time = "2026-02-12T15:53:00.827Z" },
-]
 
 [[package]]
 name = "seaborn"

From 58058fafa85eca20408806541d580aa45f0d3bf4 Mon Sep 17 00:00:00 2001
From: Sam Wolk <36545842+szvsw@users.noreply.github.com>
Date: Sat, 7 Mar 2026 14:39:32 -0500
Subject: [PATCH 03/27] enable results cache growing

---
 src/globi/models/surrogate/training.py |  30 ++++--
 src/globi/pipelines/training.py        | 136 ++++++++++++++-----------
 2 files changed, 100 insertions(+), 66 deletions(-)

diff --git a/src/globi/models/surrogate/training.py b/src/globi/models/surrogate/training.py
index 760bacf..963e7fa 100644
--- a/src/globi/models/surrogate/training.py
+++ b/src/globi/models/surrogate/training.py
@@ -8,8 +8,8 @@
 import pandas as pd
 from pydantic import BaseModel, Field
 from scythe.base import ExperimentInputSpec
-from scythe.scatter_gather import RecursionMap
-from scythe.utils.filesys import FileReference, OptionalFileReference
+from scythe.scatter_gather import RecursionMap, ScatterGatherResult
+from scythe.utils.filesys import FileReference, S3Url
 
 if TYPE_CHECKING:
     from mypy_boto3_s3.client import S3Client as S3ClientType
@@ -197,11 +197,24 @@ class ProgressiveTrainingSpec(ExperimentInputSpec):
         ...,
         description="The uri of the gis data to train on.",
     )
-    data_uri: OptionalFileReference = Field(
-        ...,
-        description="The uris of the previous simulation results to sample from.",
+    data_uris: ScatterGatherResult | None = Field(
+        default=None,
+        description="The uri of the previous simulation results to train on.",
     )
 
+    def format_combined_output_key(self, key: str) -> str:
+        """Format the output key for a combined result file."""
+        return f"{self.prefix}/combined/{key}.parquet"
+
+    def format_combined_output_uri(self, key: str) -> S3Url:
+        """Format the output uri for a combined result file."""
+        if self.storage_settings is None:
+            msg = "Storage settings are not set, so we can't construct a combined output uri."
+            raise ValueError(msg)
+        return S3Url(
+            f"s3://{self.storage_settings.BUCKET}/{self.format_combined_output_key(key)}"
+        )
+
     @property
     def gis_path(self) -> Path:
         """The path to the gis data."""
@@ -997,16 +1010,17 @@ def non_numeric_options(self) -> dict[str, list[str]]:
 class TrainWithCVSpec(StageSpec):
     """Train an SBEM model using a scatter gather approach for cross-fold validation."""
 
-    data_uri: FileReference = Field(
+    data_uris: ScatterGatherResult = Field(
         ...,
-        description="The uri of the data to train on.",
+        description="The uris of the data to train on.",
     )
 
     @property
     def schedule(self) -> list[TrainFoldSpec]:
         """Create the task schedule."""
         schedule = []
-        data_uri = self.data_uri
+        # TODO: this should be configured/selected/etc
+        data_uri = self.data_uris.uris["main_result"]
         if data_uri is None:
             msg = "Data URI is required for training."
             raise ValueError(msg)
diff --git a/src/globi/pipelines/training.py b/src/globi/pipelines/training.py
index 1fba256..96e8bfe 100644
--- a/src/globi/pipelines/training.py
+++ b/src/globi/pipelines/training.py
@@ -4,6 +4,7 @@
 from pathlib import Path
 from typing import Literal
 
+import pandas as pd
 from hatchet_sdk import Context
 from pydantic import BaseModel, HttpUrl
 from scythe.base import ExperimentOutputSpec
@@ -14,6 +15,7 @@
 from scythe.hatchet import hatchet
 from scythe.registry import ExperimentRegistry
 from scythe.scatter_gather import RecursionMap, ScatterGatherResult, scatter_gather
+from scythe.utils.filesys import S3Url
 
 from globi.models.surrogate.dummy import DummySimulationInput, dummy_simulation
 from globi.models.surrogate.training import (
@@ -30,6 +32,40 @@ class FoldResult(ExperimentOutputSpec):
     pass
 
 
+class CombineResultsResult(BaseModel):
+    """The result of combining the results of the simulations."""
+
+    incoming: ScatterGatherResult
+    combined: ScatterGatherResult
+
+
+class ExperimentRunWithRef(BaseModel):
+    """An experiment run with a workflow run id."""
+
+    run: ExperimentRun
+    workflow_run_id: str
+
+
+class StartTrainingResult(BaseModel):
+    """The result of starting the training."""
+
+    training_spec: TrainWithCVSpec
+    experiment_run_with_ref: ExperimentRunWithRef
+
+
+class TrainingEvaluationResult(BaseModel):
+    """The result of evaluating the training."""
+
+    converged: bool
+
+
+class RecursionTransition(BaseModel):
+    """The transition of the recursion."""
+
+    reasoning: Literal["max_depth", "converged"] | None
+    child_workflow_run_id: str | None
+
+
 @ExperimentRegistry.Register(
     description="Train a regressor with cross-fold validation.",
 )
@@ -42,13 +78,6 @@ def train_regressor_with_cv_fold(
     return FoldResult()
 
 
-class CombineResultsResult(BaseModel):
-    """The result of combining the results of the simulations."""
-
-    scatter_gather_result: ScatterGatherResult
-    combined_scatter_gather_result: ScatterGatherResult
-
-
 iterative_training = hatchet.workflow(
     name="iterative_training",
     description="Sample a collection of buliding simulations to then simulate and train a surrogate model.",
@@ -56,13 +85,6 @@ class CombineResultsResult(BaseModel):
 )
 
 
-class ExperimentRunWithRef(BaseModel):
-    """An experiment run with a workflow run id."""
-
-    run: ExperimentRun
-    workflow_run_id: str
-
-
 @iterative_training.task(
     name="iterative_training.create_simulations",
     schedule_timeout=timedelta(minutes=30),
@@ -133,38 +155,54 @@ async def await_simulations(
     name="iterative_training.combine_results",
     schedule_timeout=timedelta(hours=5),
     execution_timeout=timedelta(hours=1),
-    parents=[await_simulations, create_simulations],
+    parents=[await_simulations],
 )
-async def combine_results(
+def combine_results(
     spec: ProgressiveTrainingSpec, context: Context
 ) -> CombineResultsResult:
     """Combine the results of the simulations."""
+    # TODO: major consider how we handle beyond-memory scale scenarios.
+    # i.e. we probably need to refactor to allow lists of files that only the
+    # main worker is responsible for combining.
     results = context.task_output(await_simulations)
-    run_info = context.task_output(create_simulations)
-    # TODO: kind of annoying have to reconstruct the run object here; necessary because the base experiment is not serializable.
-    _run = run_info.run
-    # files = run.list_results_files()
-    # TODO: configure which files to store/combine via input spec.
+    combined_results: dict[str, S3Url] = {}
+
+    if spec.data_uris:
+        shared_keys = set(spec.data_uris.uris.keys()) & set(results.uris.keys())
+        old_keys_only = set(spec.data_uris.uris.keys()) - shared_keys
+        new_keys_only = set(results.uris.keys()) - shared_keys
+        # TODO: consider copying these over to the `combined` folder anyways.
+        for key in old_keys_only:
+            combined_results[key] = spec.data_uris.uris[key]
+        for key in new_keys_only:
+            combined_results[key] = results.uris[key]
+        # TODO: refactor to use a threadpool executor?
+        # For memory reasons, it might be a good idea to stay single threaded here.
+        for key in shared_keys:
+            old_df = pd.read_parquet(str(spec.data_uris.uris[key]))
+            new_df = pd.read_parquet(str(results.uris[key]))
+            combined_df = pd.concat([old_df, new_df], axis=0)
+            uri = spec.format_combined_output_uri(key)
+            combined_df.to_parquet(str(uri))
+            combined_results[key] = uri
+
+    else:
+        # TODO: consider copying these over to the `combined` folder anyways.
+        combined_results = results.uris
+
     return CombineResultsResult(
-        scatter_gather_result=results,
-        combined_scatter_gather_result=results,
+        incoming=results,
+        combined=ScatterGatherResult(uris=combined_results),
     )
 
 
-class StartTrainingResult(BaseModel):
-    """The result of starting the training."""
-
-    training_spec: TrainWithCVSpec
-    experiment_run_with_ref: ExperimentRunWithRef
-
-
 @iterative_training.task(
     name="iterative_training.start_training",
     schedule_timeout=timedelta(hours=5),
     execution_timeout=timedelta(hours=1),
     parents=[combine_results],
 )
-async def start_training(
+def start_training(
     spec: ProgressiveTrainingSpec, context: Context
 ) -> StartTrainingResult:
     """Start the training."""
@@ -172,13 +210,9 @@ async def start_training(
 
     train_spec = TrainWithCVSpec(
         parent=spec,
-        data_uri=results.combined_scatter_gather_result.uris[
-            "main_result"
-        ],  # TODO: should be configure which result to use
+        data_uris=results.combined,  # TODO: should configure which results to use
     )
 
-    # TODO: create the training specs and then allocate the experiment
-
     specs = train_spec.schedule
 
     run_name = f"{spec.experiment_id}/train"
@@ -189,7 +223,7 @@ async def start_training(
     )
     run, ref = exp.allocate(
         specs,
-        version="bumpmajor",  # TODO: bump minor if not the first iteration.
+        version="bumpmajor",  # There is normally only ever one training round per parent minor version, except during replays etc
         recursion_map=RecursionMap(
             factor=2,
             max_depth=0,
@@ -228,26 +262,13 @@ async def await_training(
     return results
 
 
-class TrainingEvaluationResult(BaseModel):
-    """The result of evaluating the training."""
-
-    converged: bool
-
-
-class RecursionTransition(BaseModel):
-    """The transition of the recursion."""
-
-    reasoning: Literal["max_depth", "converged"] | None
-    child_workflow_run_id: str | None
-
-
 @iterative_training.task(
     name="iterative_training.evaluate_training",
     schedule_timeout=timedelta(hours=5),
     execution_timeout=timedelta(minutes=5),
     parents=[await_training],
 )
-async def evaluate_training(
+def evaluate_training(
     spec: ProgressiveTrainingSpec, context: Context
 ) -> TrainingEvaluationResult:
     """Evaluate the training."""
@@ -259,9 +280,9 @@ async def evaluate_training(
     name="iterative_training.transition_recursion",
     schedule_timeout=timedelta(hours=5),
     execution_timeout=timedelta(minutes=5),
-    parents=[evaluate_training, start_training],
+    parents=[evaluate_training, combine_results],
 )
-async def transition_recursion(
+def transition_recursion(
     spec: ProgressiveTrainingSpec, context: Context
 ) -> RecursionTransition:
     """Transition the recursion."""
@@ -272,13 +293,12 @@ async def transition_recursion(
     if spec.iteration.at_max_iters:
         return RecursionTransition(reasoning="max_depth", child_workflow_run_id=None)
 
-    start_training_output = context.task_output(start_training)
+    # start_training_output = context.task_output(start_training)
+    combine_results_output = context.task_output(combine_results)
 
     next_spec = spec.model_copy(deep=True)
     next_spec.iteration.current_iter += 1
-    next_spec.data_uri = (
-        start_training_output.training_spec.data_uri
-    )  # or could be from combined
+    next_spec.data_uris = combine_results_output.combined
     exp = BaseExperiment(
         experiment=iterative_training,
         run_name=f"{next_spec.base_run_name}",
@@ -311,7 +331,7 @@ async def transition_recursion(
             max_iters=4,
         ),
         storage_settings=ScytheStorageSettings(),
-        data_uri=None,
+        data_uris=None,
         base_run_name=base_run_name,
     )
 

From 3707ecee60ea7a75e858d47f6d8b61e9c64097a6 Mon Sep 17 00:00:00 2001
From: Sam Wolk <36545842+szvsw@users.noreply.github.com>
Date: Sat, 7 Mar 2026 15:20:34 -0500
Subject: [PATCH 04/27] clean up some vestiges and update scythe

---
 src/globi/models/surrogate/training.py | 79 --------------------------
 src/globi/pipelines/training.py        |  5 ++
 uv.lock                                |  2 +-
 3 files changed, 6 insertions(+), 80 deletions(-)

diff --git a/src/globi/models/surrogate/training.py b/src/globi/models/surrogate/training.py
index 963e7fa..65be229 100644
--- a/src/globi/models/surrogate/training.py
+++ b/src/globi/models/surrogate/training.py
@@ -227,23 +227,6 @@ def gis_data(self) -> pd.DataFrame:
         """Load the gis data."""
         return pd.read_parquet(self.gis_path)
 
-    # def s3_key_for_iteration(self, iteration_ix: int) -> str:
-    #     """The s3 root key for the iteration."""
-    #     return f"{self.experiment_id}/iter-{iteration_ix:03d}"
-
-    # def upload_self(self, s3_client: S3ClientType):
-    #     """Upload a dumpout of this spec to the s3 bucket root."""
-    #     with tempfile.TemporaryDirectory() as tempdir:
-    #         tempdir = Path(tempdir)
-    #         fpath = tempdir / "spec.yml"
-    #         with open(fpath, "w") as f:
-    #             yaml.dump(self.model_dump(mode="json"), f, indent=2)
-    #         s3_client.upload_file(
-    #             fpath.as_posix(),
-    #             self.bucket,
-    #             f"hatchet/{self.experiment_id}/artifacts/experiment-spec.yml",
-    #         )
-
 
 class StageSpec(BaseModel):
     """A spec that is common to both the sample and train stages (and possibly others)."""
@@ -258,36 +241,6 @@ def random_generator(self) -> np.random.Generator:
         """The random generator."""
         return np.random.default_rng(self.parent.iteration.current_iter)
 
-    # @cached_property
-    # def experiment_key(self) -> str:
-    #     """The root key for the experiment."""
-    #     return f"{self.progressive_training_spec.s3_key_for_iteration(self.progressive_training_iteration_ix)}/{self.stage_type}"
-
-    # def load_previous_data(self, s3_client: S3ClientType) -> pd.DataFrame | None:
-    #     """Load the previous data."""
-    #     if self.data_uri is None:
-    #         return None
-    #     with tempfile.TemporaryDirectory() as tmpdir:
-    #         tmpdir = Path(tmpdir)
-    #         fpath = tmpdir / "previous_data.parquet"
-    #         fetch_uri(
-    #             uri=self.data_uri,
-    #             local_path=fpath,
-    #             use_cache=False,
-    #             s3=s3_client,
-    #         )
-    #         df = pd.read_parquet(fpath)
-    #     return df
-
-
-# BASE EXPERIMENT/v1.0.0
-# BASE EXPERIMENT/v1.0.0/simulations/v1.0.0/[...]
-# BASE EXPERIMENT/v1.0.0/training/v1.0.0/[...]
-# BASE EXPERIMENT/v1.0.0/simulations/v2.0.0/[...]
-# BASE EXPERIMENT/v1.0.0/training/v2.0.0/[...]
-# BASE EXPERIMENT/v1.0.0/simulations/v2.0.0/[...]
-# BASE EXPERIMENT/v1.0.0/training/v3.0.0/[...]
-
 
 class SampleSpec(StageSpec):
     """A spec for the sampling stage of the progressive training."""
@@ -477,38 +430,6 @@ def to_sim_specs(self, df: pd.DataFrame):
     #     }
     #     return payload
 
-    # def combine_results(self, new_data_uri: URIResponse, s3_client: S3ClientType):
-    #     """Combine the results of the previous and new data."""
-    #     previous_data = self.load_previous_data(s3_client)
-    #     with tempfile.TemporaryDirectory() as tmpdir:
-    #         tmpdir = Path(tmpdir)
-    #         fpath = tmpdir / "new_data.parquet"
-    #         fetch_uri(
-    #             uri=new_data_uri.uri, local_path=fpath, use_cache=False, s3=s3_client
-    #         )
-    #         # TODO: data frame subsection selection should be a configuration option within the
-    #         # progressive iteration training spec.
-    #         df = cast(
-    #             pd.DataFrame,
-    #             cast(pd.DataFrame, pd.read_hdf(fpath, key="results")),
-    #         )
-    #     if previous_data is not None:
-    #         df = pd.concat([previous_data, df], axis=0)
-
-    #     # strip out any constant columns
-    #     is_all_zeros = (df.max(axis=0) - df.min(axis=0)).abs() < 1e-5
-    #     df = df.loc[:, ~is_all_zeros]
-    #     # serialize to a parquet file and upload to s3
-    #     bucket = self.progressive_training_spec.bucket
-    #     with tempfile.TemporaryDirectory() as tmpdir:
-    #         tmpdir = Path(tmpdir)
-    #         fpath = tmpdir / "results.parquet"
-    #         df.to_parquet(fpath)
-    #         key = f"hatchet/{self.experiment_key}/full-dataset.pq"
-    #         specs_uri = f"s3://{bucket}/{key}"
-    #         s3_client.upload_file(fpath.as_posix(), bucket, key)
-    #     return specs_uri
-
 
 class TrainFoldSpec(ExperimentInputSpec):
     """Train an sbem model for a specific fold.
diff --git a/src/globi/pipelines/training.py b/src/globi/pipelines/training.py
index 96e8bfe..e614d53 100644
--- a/src/globi/pipelines/training.py
+++ b/src/globi/pipelines/training.py
@@ -167,6 +167,11 @@ def combine_results(
     results = context.task_output(await_simulations)
     combined_results: dict[str, S3Url] = {}
 
+    # TODO: in the old version, w removed constant columns from the data, i.e.:
+    #     is_constant = (df.max(axis=0) - df.min(axis=0)).abs() < 1e-5
+    #     df = df.loc[:, ~is_constant]
+    # Should this sort of data cleaning be done here, or should it be done in the training task?
+
     if spec.data_uris:
         shared_keys = set(spec.data_uris.uris.keys()) & set(results.uris.keys())
         old_keys_only = set(spec.data_uris.uris.keys()) - shared_keys
diff --git a/uv.lock b/uv.lock
index e9ec544..e642899 100644
--- a/uv.lock
+++ b/uv.lock
@@ -4260,7 +4260,7 @@ wheels = [
 [[package]]
 name = "scythe-engine"
 version = "0.1.2"
-source = { git = "https://github.com/szvsw/scythe?branch=feature%2Fallow-versioning-workflows#a750f3f18525dde2ebb73c9ad0629cba5ede77a1" }
+source = { git = "https://github.com/szvsw/scythe?branch=feature%2Fallow-versioning-workflows#9aad5d97eaa9ca33bc5ac9e21ec31c9b60f677f1" }
 dependencies = [
     { name = "boto3" },
     { name = "fastparquet" },

From 79f0d19d513ca4472312c5643ae237fe01439d09 Mon Sep 17 00:00:00 2001
From: Sam Wolk <36545842+szvsw@users.noreply.github.com>
Date: Sat, 7 Mar 2026 18:04:52 -0500
Subject: [PATCH 05/27] improve data management

---
 src/globi/models/surrogate/training.py | 189 +++++++++----------------
 src/globi/pipelines/training.py        |   6 +-
 2 files changed, 73 insertions(+), 122 deletions(-)

diff --git a/src/globi/models/surrogate/training.py b/src/globi/models/surrogate/training.py
index 65be229..37a67db 100644
--- a/src/globi/models/surrogate/training.py
+++ b/src/globi/models/surrogate/training.py
@@ -1,5 +1,6 @@
 """Models used for the surrogate training pipeline."""
 
+import math
 from functools import cached_property
 from pathlib import Path
 from typing import TYPE_CHECKING, Literal, cast
@@ -458,66 +459,41 @@ class TrainFoldSpec(ExperimentInputSpec):
     However, with xgb, this is less imperative.
     """
 
-    n_folds: int = Field(
-        ..., description="The number of folds for the entire parent task."
-    )
-    data_uri: FileReference = Field(..., description="The uri of the data to train on.")
-    stratification_field: str = Field(
-        ...,
-        description="The field to stratify the data by for monitoring convergence in parent task.",
-    )
-    progressive_training_iter_ix: int = Field(
-        ...,
-        description="The index of the current training iteration within the outer loop.",
+    data_uris: dict[str, S3Url] = Field(
+        ..., description="The uris of the data to train on."
     )
-
-    @property
-    def data_path(self) -> Path:
-        """The path to the data."""
-        if isinstance(self.data_uri, Path):
-            return self.data_uri
-        return self.fetch_uri(self.data_uri)
+    parent: ProgressiveTrainingSpec = Field(..., description="The parent spec.")
 
     @cached_property
+    def combined_data(self) -> pd.DataFrame:
+        """Combines the data from the data uris into a single dataframe with a flattened column index."""
+        dfs: dict[str, pd.DataFrame] = {
+            key: pd.read_parquet(str(uri)) for key, uri in self.data_uris.items()
+        }
+        if not all(
+            df.index.equals(next(iter(dfs.values())).index) for df in dfs.values()
+        ):
+            msg = "The indices of the dataframes are not all equal. "
+            "This is not supported, since the features must be identical for all outputs.."
+            raise ValueError(msg)
+
+        for df in dfs.values():
+            df.columns = df.columns.to_flat_index()
+            df.columns = [
+                "/".join(col) if isinstance(col, tuple | list) else col
+                for col in df.columns
+            ]
+
+        combined_df = pd.concat(dfs, axis=1)
+        combined_df.columns = combined_df.columns.to_flat_index()
+        combined_df.columns = ["/".join(col) for col in combined_df.columns]
+        shuffled_df = combined_df.sample(frac=1, random_state=42, replace=False)
+        return shuffled_df
+
+    @property
     def data(self) -> pd.DataFrame:
-        """The data."""
-        df_all = pd.read_parquet(self.data_path)
-        df_energy: pd.DataFrame = cast(pd.DataFrame, df_all["Energy"]["Raw"])
-        df_energy = cast(
-            pd.DataFrame,
-            (
-                df_energy.T.groupby(
-                    level=[
-                        lev for lev in df_energy.columns.names if lev.lower() != "month"
-                    ]
-                )
-                .sum()
-                .T
-            ),
-        )
-        df_peaks: pd.DataFrame = cast(pd.DataFrame, df_all["Peak"]["Raw"])
-        df_peaks = cast(
-            pd.DataFrame,
-            (
-                df_peaks.T.groupby(
-                    level=[
-                        lev for lev in df_peaks.columns.names if lev.lower() != "month"
-                    ]
-                )
-                .max()
-                .T
-            ),
-        )
-        df_all_annual = pd.concat(
-            [df_energy, df_peaks],
-            axis=1,
-            keys=["Energy", "Peak"],
-            names=["Measurement"],
-        )
-        # TODO: should we assume they are shuffled already?
-        # shuffle the order of the rows
-        df_all_annual = df_all_annual.sample(frac=1, random_state=42, replace=False)
-        return df_all_annual
+        """The combined data."""
+        return self.combined_data
 
     @cached_property
     def dparams(self) -> pd.DataFrame:
@@ -527,7 +503,7 @@ def dparams(self) -> pd.DataFrame:
     @cached_property
     def stratum_names(self) -> list[str]:
         """The values of the stratification field."""
-        return sorted(self.dparams[self.stratification_field].unique().tolist())
+        return sorted(self.dparams[self.parent.stratification.field].unique().tolist())
 
     @cached_property
     def data_by_stratum(self) -> dict[str, pd.DataFrame]:
@@ -543,7 +519,8 @@ def data_by_stratum(self) -> dict[str, pd.DataFrame]:
         """
         return {
             val: cast(
-                pd.DataFrame, self.data[self.dparams[self.stratification_field] == val]
+                pd.DataFrame,
+                self.data[self.dparams[self.parent.stratification.field] == val],
             )
             for val in self.stratum_names
         }
@@ -560,15 +537,17 @@ def train_test_split_by_fold_and_stratum(self) -> pd.DataFrame:
         all_strata = []
         for val in self.stratum_names:
             folds = []
-            for i in range(self.n_folds):
-                fold = self.data_by_stratum[val].iloc[i :: self.n_folds]
+            for i in range(self.parent.cross_val.n_folds):
+                fold = self.data_by_stratum[val].iloc[
+                    i :: self.parent.cross_val.n_folds
+                ]
                 folds.append(fold)
             folds_df = pd.concat(
                 folds,
                 axis=0,
                 keys=[
                     "test" if i == self.sort_index else "train"
-                    for i in range(self.n_folds)
+                    for i in range(self.parent.cross_val.n_folds)
                 ],
                 names=["split_segment"],
             )
@@ -618,38 +597,38 @@ def non_numeric_options(self) -> dict[str, list[str]]:
         }
         return non_numeric_options
 
-    # @cached_property
-    # def numeric_min_maxs(self) -> dict[str, tuple[float, float]]:
-    #     """Get the min and max for numeric features.
+    @cached_property
+    def numeric_min_maxs(self) -> dict[str, tuple[float, float]]:
+        """Get the min and max for numeric features.
 
-    #     We perform this only on the training set to prevent leakage.
+        We perform this only on the training set to prevent leakage.
 
-    #     TODO: In the future, this should be based off of transform instructions.
+        TODO: In the future, this should be based off of transform instructions.
 
-    #     Args:
-    #         params (pd.DataFrame): The parameters to get the min and max for.
+        Args:
+            params (pd.DataFrame): The parameters to get the min and max for.
 
-    #     Returns:
-    #         norm_bounds (dict[str, tuple[float, float]]): The min and max for each numeric feature.
-    #     """
-    #     params, _ = self.train_segment
-    #     fparams = params[[col for col in params.columns if col.startswith("feature.")]]
-    #     numeric_cols = fparams.select_dtypes(include=["number"]).columns
-    #     numeric_min_maxs = {
-    #         col: (float(fparams[col].min()), float(fparams[col].max()))
-    #         for col in numeric_cols
-    #     }
-    #     for col in numeric_min_maxs:
-    #         low, high = numeric_min_maxs[col]
-    #         # we want to floor the "low" value down to the nearest 0.001
-    #         # and ceil the "high" value up to the nearest 0.001
-    #         # e.g. if low is -0.799, we want to set it to -0.800
-    #         # and if high is 0.799, we want to set it to 0.800
-    #         numeric_min_maxs[col] = (
-    #             math.floor(low * 1000) / 1000,
-    #             math.ceil(high * 1000) / 1000,
-    #         )
-    #     return numeric_min_maxs
+        Returns:
+            norm_bounds (dict[str, tuple[float, float]]): The min and max for each numeric feature.
+        """
+        params, _ = self.train_segment
+        fparams = params[[col for col in params.columns if col.startswith("feature.")]]
+        numeric_cols = fparams.select_dtypes(include=["number"]).columns
+        numeric_min_maxs = {
+            col: (float(fparams[col].min()), float(fparams[col].max()))
+            for col in numeric_cols
+        }
+        for col in numeric_min_maxs:
+            low, high = numeric_min_maxs[col]
+            # we want to floor the "low" value down to the nearest 0.001
+            # and ceil the "high" value up to the nearest 0.001
+            # e.g. if low is -0.799, we want to set it to -0.800
+            # and if high is 0.799, we want to set it to 0.800
+            numeric_min_maxs[col] = (
+                math.floor(low * 1000) / 1000,
+                math.ceil(high * 1000) / 1000,
+            )
+        return numeric_min_maxs
 
     # @cached_property
     # def feature_spec(self) -> RegressorInputSpec:
@@ -941,10 +920,6 @@ def schedule(self) -> list[TrainFoldSpec]:
         """Create the task schedule."""
         schedule = []
         # TODO: this should be configured/selected/etc
-        data_uri = self.data_uris.uris["main_result"]
-        if data_uri is None:
-            msg = "Data URI is required for training."
-            raise ValueError(msg)
 
         for i in range(self.parent.cross_val.n_folds):
             schedule.append(
@@ -952,36 +927,12 @@ def schedule(self) -> list[TrainFoldSpec]:
                     # TODO: this should be set in a better manner
                     experiment_id="placeholder",
                     sort_index=i,
-                    n_folds=self.parent.cross_val.n_folds,
-                    data_uri=data_uri,
-                    stratification_field=self.parent.stratification.field,
-                    progressive_training_iter_ix=self.parent.iteration.current_iter,
-                    storage_settings=self.parent.storage_settings,
+                    data_uris=self.data_uris.uris,
+                    parent=self.parent,
                 )
             )
         return schedule
 
-    # def allocate(self, s3_client: S3ClientType):
-    #     """Allocate the task."""
-    #     # 1. turn the schedule into a parquet dataframe
-    #     df = pd.DataFrame([m.model_dump(mode="json") for m in self.schedule])
-    #     bucket = self.progressive_training_spec.bucket
-    #     with tempfile.TemporaryDirectory() as tempdir:
-    #         temp_path = Path(tempdir) / "train_specs.parquet"
-    #         df.to_parquet(temp_path)
-    #         key = f"hatchet/{self.experiment_key}/train_specs.parquet"
-    #         specs_uri = f"s3://{bucket}/{key}"
-    #         s3_client.upload_file(temp_path.as_posix(), bucket, key)
-
-    #     payload = {
-    #         "specs": specs_uri,
-    #         "bucket": bucket,
-    #         # TODO: this should be selected in a better manner.
-    #         "workflow_name": "train_regressor_with_cv_fold",
-    #         "experiment_id": self.experiment_key,
-    #     }
-    #     return payload
-
     # def check_convergence(self, uri: URIResponse, s3_client: S3ClientType):
     #     """Check the convergence of the training."""
     #     with tempfile.TemporaryDirectory() as tempdir:
diff --git a/src/globi/pipelines/training.py b/src/globi/pipelines/training.py
index e614d53..5cd58cf 100644
--- a/src/globi/pipelines/training.py
+++ b/src/globi/pipelines/training.py
@@ -29,7 +29,7 @@
 class FoldResult(ExperimentOutputSpec):
     """The output for a fold."""
 
-    pass
+    columns: list[str]
 
 
 class CombineResultsResult(BaseModel):
@@ -74,8 +74,7 @@ def train_regressor_with_cv_fold(
 ) -> FoldResult:
     """Train a regressor with cross-fold validation."""
     # DO TRAINING
-
-    return FoldResult()
+    return FoldResult(columns=input_spec.data.columns.tolist())
 
 
 iterative_training = hatchet.workflow(
@@ -218,6 +217,7 @@ def start_training(
         data_uris=results.combined,  # TODO: should configure which results to use
     )
 
+    # Alternatively, one task per fold-column combination?
     specs = train_spec.schedule
 
     run_name = f"{spec.experiment_id}/train"

From 44338dec63567e60e42c03849919443920bcff09 Mon Sep 17 00:00:00 2001
From: Sam Wolk <36545842+szvsw@users.noreply.github.com>
Date: Sat, 7 Mar 2026 21:22:43 -0500
Subject: [PATCH 06/27] begin training

---
 .cursor/rules/hatchet-docs.mdc         |  12 +
 docker-compose.yml                     |   4 +
 pyproject.toml                         |  49 ++-
 src/globi/models/surrogate/dummy.py    |  10 +-
 src/globi/models/surrogate/training.py | 335 +++++++++++++--
 src/globi/pipelines/training.py        |  18 +-
 src/globi/worker/Dockerfile            |   4 +-
 uv.lock                                | 567 ++++++++++++++++++++++++-
 8 files changed, 931 insertions(+), 68 deletions(-)
 create mode 100644 .cursor/rules/hatchet-docs.mdc

diff --git a/.cursor/rules/hatchet-docs.mdc b/.cursor/rules/hatchet-docs.mdc
new file mode 100644
index 0000000..7738938
--- /dev/null
+++ b/.cursor/rules/hatchet-docs.mdc
@@ -0,0 +1,12 @@
+---
+description: Hatchet documentation MCP server
+alwaysApply: true
+---
+
+When working with Hatchet (task queues, workflows, durable execution), use the
+Hatchet MCP docs server for accurate, up-to-date API reference and examples.
+
+MCP server URL: https://docs.hatchet.run/api/mcp
+
+Use the search_docs tool to find relevant documentation pages, or get_full_docs
+for comprehensive context. Documentation covers Python, TypeScript, and Go SDKs.
diff --git a/docker-compose.yml b/docker-compose.yml
index c5a1d8a..9e945da 100644
--- a/docker-compose.yml
+++ b/docker-compose.yml
@@ -16,6 +16,10 @@ services:
     deploy:
       mode: replicated
       replicas: ${SIMULATIONS_REPLICAS:-4}
+      resources:
+        reservations:
+          devices:
+            - capabilities: [gpu] # Requests access to all GPUs
     volumes:
       - ./inputs:/code/inputs
       - ./outputs:/code/outputs
diff --git a/pyproject.toml b/pyproject.toml
index 8315ab7..e9d3431 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -46,9 +46,26 @@ visualization = [
     "playwright>=1.40.0",
 ]
 
-ml = [
+# ml = [
+#     "torch>=2.5.0",
+#     "lightgbm>=4.6.0",
+#     "xgboost>=3.2.0",
+#     "pytorch-tabular>=1.2.0",
+#     "tensorboard>=2.20.0",
+#     "wandb>=0.25.0",
+#     "pytorch-tabular>=1.2.0",
+#     "torch>=2.5.0",
+# ]
+
+ml-gpu = [
+    "torch>=2.5.0",
     "lightgbm>=4.6.0",
     "xgboost>=3.2.0",
+    "pytorch-tabular>=1.2.0",
+    "tensorboard>=2.20.0",
+    "wandb>=0.25.0",
+    "pytorch-tabular>=1.2.0",
+    "torch>=2.5.0",
 ]
 
 cli = [
@@ -79,11 +96,41 @@ docs = [
     "mkdocs-click>=0.9.0",
 ]
 
+# [tool.uv]
+# conflicts = [
+#   [
+#     { extra = "ml" },
+#     { extra = "ml-gpu" },
+#   ],
+# ]
+
 [project.scripts]
 worker = "globi.worker.main:main"
 globi = "globi.tools.cli.main:cli"
 
+[[tool.uv.index]]
+name = "pytorch-cu128"
+url = "https://download.pytorch.org/whl/cu128"
+explicit = true
+
+[[tool.uv.index]]
+name = "pytorch-cpu"
+url = "https://download.pytorch.org/whl/cpu"
+explicit = true
+
+[[tool.uv.index]]
+name = "pypi"
+url = "https://pypi.org/simple"
+explicit = true
+
 [tool.uv.sources]
+# PyTorch: CUDA 12.8 on Linux/Windows (where builds exist), PyPI (CPU) on macOS
+torch = [
+  { index = "pytorch-cu128", marker = "sys_platform != 'darwin'", extra = "ml-gpu" },
+#   { index = "pytorch-cpu", marker = "sys_platform != 'darwin'", extra = "ml" },
+  { index = "pypi", marker = "sys_platform == 'darwin'", extra = "ml-gpu" },
+#   { index = "pypi", marker = "sys_platform == 'darwin'", extra = "ml" },
+]
 # scythe-engine = {git = "https://github.com/szvsw/scythe", branch = "feature/allow-optional-filerefs"}
 scythe-engine = {git = "https://github.com/szvsw/scythe", branch = "feature/allow-versioning-workflows"}
 # scythe-engine = {path = "../scythe", editable = true}
diff --git a/src/globi/models/surrogate/dummy.py b/src/globi/models/surrogate/dummy.py
index 0617998..1ebcf24 100644
--- a/src/globi/models/surrogate/dummy.py
+++ b/src/globi/models/surrogate/dummy.py
@@ -1,6 +1,8 @@
 """Dummy simulation for testing."""
 
+import math
 from pathlib import Path
+from typing import Literal
 
 import pandas as pd
 from scythe.base import ExperimentInputSpec, ExperimentOutputSpec
@@ -10,8 +12,10 @@
 class DummySimulationInput(ExperimentInputSpec):
     """The input for the dummy simulation."""
 
+    weather_file: Literal["some", "other"]
     a: int
     b: float
+    c: int
 
 
 class DummySimulationOutput(ExperimentOutputSpec):
@@ -30,9 +34,11 @@ def dummy_simulation(
     df = pd.DataFrame({
         "target_0": [input_spec.a + input_spec.b],
         "target_1": [input_spec.a - input_spec.b],
-        "target_2": [input_spec.a * input_spec.b],
-        "target_3": [input_spec.a / input_spec.b],
+        "target_2": [input_spec.a * input_spec.b * input_spec.c],
+        "target_3": [input_spec.a / math.sin(input_spec.b)],
     })
+    df_neg = -df
+    df = pd.concat([df, df_neg], axis=1, keys=["positive", "negative"], names=["sign"])
     df = df.set_index(input_spec.make_multiindex())
     return DummySimulationOutput(
         c=input_spec.a + input_spec.b, dataframes={"main_result": df}
diff --git a/src/globi/models/surrogate/training.py b/src/globi/models/surrogate/training.py
index 37a67db..7d42e06 100644
--- a/src/globi/models/surrogate/training.py
+++ b/src/globi/models/surrogate/training.py
@@ -1,6 +1,6 @@
 """Models used for the surrogate training pipeline."""
 
-import math
+import warnings
 from functools import cached_property
 from pathlib import Path
 from typing import TYPE_CHECKING, Literal, cast
@@ -166,6 +166,56 @@ def at_max_iters(self) -> bool:
         return self.current_iter + 1 >= self.max_iters
 
 
+class TargetsConfigSpec(BaseModel):
+    """The targets config spec."""
+
+    columns: list[str] = Field(
+        default_factory=list, description="The columns to use as targets."
+    )
+    normalization: Literal["min-max", "standard", "none"] = Field(
+        default="none", description="The normalization method to use."
+    )
+
+
+class FeatureConfigSpec(BaseModel):
+    """The feature config spec."""
+
+    continuous_columns: frozenset[str] = Field(
+        default=frozenset(), description="The continuous columns to use as features."
+    )
+    categorical_columns: frozenset[str] = Field(
+        default=frozenset(), description="The categorical columns to use as features."
+    )
+    exclude_columns: frozenset[str] = Field(
+        default=frozenset(),
+        description="The columns to exclude from the features.",
+    )
+    cont_cat_unicity_transition_threshold: int = Field(
+        default=10,
+        description="The threshold for the number of unique values to transition from continuous to categorical variable.",
+    )
+
+
+EXCLUDED_COLUMNS = frozenset({
+    "experiment_id",
+    "sort_index",
+    "workflow_run_id",
+    "root_workflow_run_id",
+})
+
+
+class RegressionIOConfigSpec(BaseModel):
+    """The input/output spec for a regression model."""
+
+    targets: TargetsConfigSpec = Field(
+        default_factory=TargetsConfigSpec, description="The targets config spec."
+    )
+    features: FeatureConfigSpec = Field(
+        default_factory=FeatureConfigSpec,
+        description="The features config spec.",
+    )
+
+
 # TODO: should this be a subclass of ExperimentInputSpec?
 class ProgressiveTrainingSpec(ExperimentInputSpec):
     """A spec for iteratively training an SBEM regression model."""
@@ -178,10 +228,14 @@ class ProgressiveTrainingSpec(ExperimentInputSpec):
         default_factory=ConvergenceThresholds,
         description="The convergence criteria.",
     )
-    model_hyperparameters: ModelHPType = Field(
-        default_factory=LGBHyperparameters,
-        description="The hyperparameters for the model.",
+    regression_io_config: RegressionIOConfigSpec = Field(
+        default_factory=RegressionIOConfigSpec,
+        description="The regression io config spec.",
     )
+    # model_hyperparameters: ModelHPType = Field(
+    #     default_factory=LGBHyperparameters,
+    #     description="The hyperparameters for the model.",
+    # )
     stratification: StratificationSpec = Field(
         default_factory=StratificationSpec,
         description="The stratification spec.",
@@ -478,7 +532,10 @@ def combined_data(self) -> pd.DataFrame:
             raise ValueError(msg)
 
         for df in dfs.values():
+            # TODO: use level names while constructing the sequential name
+            _level_names = df.columns.names
             df.columns = df.columns.to_flat_index()
+
             df.columns = [
                 "/".join(col) if isinstance(col, tuple | list) else col
                 for col in df.columns
@@ -500,6 +557,91 @@ def dparams(self) -> pd.DataFrame:
         """The index of the data."""
         return self.data.index.to_frame()
 
+    @cached_property
+    def all_columns(self) -> frozenset[str]:
+        """The names of all columns."""
+        return frozenset(self.data.columns)
+
+    @cached_property
+    def continuous_columns(self) -> frozenset[str]:
+        """The continuous columns."""
+        feature_conf = self.parent.regression_io_config.features
+        candidates = self.all_columns - feature_conf.exclude_columns - EXCLUDED_COLUMNS
+        object_dype_columns = (
+            self.data[candidates].select_dtypes(include=["object"]).columns.tolist()
+        )
+        candidates = candidates - frozenset(object_dype_columns)
+        nunique_counts = cast(pd.Series, self.data[candidates].nunique())
+        thresh = feature_conf.cont_cat_unicity_transition_threshold
+        passing_candidates = cast(
+            list[str],
+            cast(pd.Series, nunique_counts[nunique_counts > thresh]).index.tolist(),
+        )
+        non_passing_candidates = cast(
+            list[str],
+            cast(pd.Series, nunique_counts[nunique_counts <= thresh]).index.tolist(),
+        )
+        prespecified = feature_conf.continuous_columns
+        if prespecified:
+            skipped_candidates = frozenset(passing_candidates) - (prespecified)
+            possibly_not_continuous_candidats = (
+                frozenset(non_passing_candidates) & prespecified
+            )
+            if possibly_not_continuous_candidats:
+                warnings.warn(
+                    f"The following columns were specified as continuous but have less than {thresh} unique values: {possibly_not_continuous_candidats}",
+                    stacklevel=2,
+                )
+            if skipped_candidates:
+                warnings.warn(
+                    f"The following columns are likely continuous but are not included in the continuous columns: {skipped_candidates}",
+                    stacklevel=2,
+                )
+            return prespecified
+        return frozenset(passing_candidates)
+
+    @cached_property
+    def categorical_columns(self) -> frozenset[str]:
+        """The categorical columns."""
+        feature_conf = self.parent.regression_io_config.features
+        candidates = self.all_columns - feature_conf.exclude_columns - EXCLUDED_COLUMNS
+        object_dtype_columns = (
+            self.data[candidates].select_dtypes(include=["object"]).columns.tolist()
+        )
+        non_obj_dtype_columns = candidates - frozenset(object_dtype_columns)
+        nunique_counts = cast(pd.Series, self.data[non_obj_dtype_columns].nunique())
+        thresh = feature_conf.cont_cat_unicity_transition_threshold
+        passing_non_obj_dtype_candidates = cast(
+            list[str],
+            cast(pd.Series, nunique_counts[nunique_counts <= thresh]).index.tolist(),
+        )
+        non_passing_non_obj_dtype_candidates = cast(
+            list[str],
+            cast(pd.Series, nunique_counts[nunique_counts > thresh]).index.tolist(),
+        )
+        prespecified = feature_conf.categorical_columns
+        if prespecified:
+            skipped_candidates = frozenset(passing_non_obj_dtype_candidates) - (
+                prespecified
+            )
+            possibly_not_categorical_candidats = (
+                frozenset(non_passing_non_obj_dtype_candidates) & prespecified
+            )
+            if possibly_not_categorical_candidats:
+                warnings.warn(
+                    f"The following columns were specified as categorical but have more than {thresh} unique values: {possibly_not_categorical_candidats}",
+                    stacklevel=2,
+                )
+            if skipped_candidates:
+                warnings.warn(
+                    f"The following columns are likely categorical but are not included in the categorical columns: {skipped_candidates}",
+                    stacklevel=2,
+                )
+            return prespecified
+        return frozenset(passing_non_obj_dtype_candidates) | frozenset(
+            object_dtype_columns
+        )
+
     @cached_property
     def stratum_names(self) -> list[str]:
         """The values of the stratification field."""
@@ -579,56 +721,155 @@ def test_segment(self) -> tuple[pd.DataFrame, pd.DataFrame]:
         return params, targets
 
     @cached_property
-    def non_numeric_options(self) -> dict[str, list[str]]:
-        """Get the non-numeric options for categorical features.
-
-        We must perform this across the entire dataset not just splits for consistency
-        and to ensure we get all options.
+    def targets(self) -> list[str]:
+        """The list of regression targets."""
+        return (
+            self.parent.regression_io_config.targets.columns
+            or self.data.columns.tolist()
+        )
 
-        TODO: In the future, this should be based off of transform instructions.
-        """
-        fparams = self.dparams[
-            [col for col in self.dparams.columns if col.startswith("feature.")]
+    @cached_property
+    def target_range(self) -> list[tuple[float, float]]:
+        """The range of the regression targets."""
+        _, targets = self.train_segment
+        targets = targets[self.targets]
+        return [
+            (float(targets[col].min() * 0.8), float(targets[col].max() * 1.2))
+            for col in self.targets
         ]
-        non_numeric_cols = fparams.select_dtypes(include=["object"]).columns
-        non_numeric_options = {
-            col: sorted(cast(pd.Series, fparams[col]).unique().tolist())
-            for col in non_numeric_cols
-        }
-        return non_numeric_options
 
-    @cached_property
-    def numeric_min_maxs(self) -> dict[str, tuple[float, float]]:
-        """Get the min and max for numeric features.
+    def train_pytorch_tabular(self, tempdir: Path):
+        """Train a pytorch tabular model."""
+        from pytorch_tabular import TabularModel
+        from pytorch_tabular.config import (
+            DataConfig,
+            ExperimentConfig,
+            OptimizerConfig,
+            TrainerConfig,
+        )
+        from pytorch_tabular.models import GANDALFConfig
+        from pytorch_tabular.models.common.heads import LinearHeadConfig
+
+        data_config = DataConfig(
+            target=self.targets,
+            continuous_cols=list(self.continuous_columns),
+            categorical_cols=list(self.categorical_columns),
+            # validation_split=0.2,
+            # continuous_feature_transform="",
+            # normalize_continuous_features=True,
+        )
+        n_epochs = 200
+        optimizer_config = OptimizerConfig(  # TODO: make this all configurable
+            optimizer="AdamW",
+            optimizer_params={"weight_decay": 1e-5},
+            lr_scheduler="CosineAnnealingLR",
+            lr_scheduler_params={"T_max": n_epochs, "eta_min": 1e-5},
+        )
+        trainer_config = TrainerConfig(
+            batch_size=256,
+            fast_dev_run=False,
+            max_epochs=n_epochs,
+            min_epochs=max(n_epochs // 20, 1),
+            early_stopping=None,
+            # gradient_clip_val=1.0,
+            # auto_lr_find=False
+            # max_time=60,
+        )
 
-        We perform this only on the training set to prevent leakage.
+        model_config = GANDALFConfig(
+            task="regression",
+            head="LinearHead",
+            head_config=LinearHeadConfig(
+                # layers="",
+                activation="SiLU",
+                use_batch_norm=False,
+                # dropout=0,
+            ).__dict__,
+            target_range=self.target_range,
+            embedding_dims=None,
+            embedding_dropout=0.1,
+            batch_norm_continuous_input=True,
+            gflu_stages=6,
+            gflu_dropout=0.0,
+            gflu_feature_init_sparsity=0.3,
+            learnable_sparsity=True,
+        )
 
-        TODO: In the future, this should be based off of transform instructions.
+        experiment_config = ExperimentConfig(
+            run_name=self.experiment_id,
+            project_name="globi-surrogate-training",
+            log_target="tensorboard",
+        )
 
-        Args:
-            params (pd.DataFrame): The parameters to get the min and max for.
+        model = TabularModel(
+            data_config=data_config,
+            optimizer_config=optimizer_config,
+            trainer_config=trainer_config,
+            experiment_config=experiment_config,
+            model_config=model_config,
+        )
 
-        Returns:
-            norm_bounds (dict[str, tuple[float, float]]): The min and max for each numeric feature.
-        """
-        params, _ = self.train_segment
-        fparams = params[[col for col in params.columns if col.startswith("feature.")]]
-        numeric_cols = fparams.select_dtypes(include=["number"]).columns
-        numeric_min_maxs = {
-            col: (float(fparams[col].min()), float(fparams[col].max()))
-            for col in numeric_cols
-        }
-        for col in numeric_min_maxs:
-            low, high = numeric_min_maxs[col]
-            # we want to floor the "low" value down to the nearest 0.001
-            # and ceil the "high" value up to the nearest 0.001
-            # e.g. if low is -0.799, we want to set it to -0.800
-            # and if high is 0.799, we want to set it to 0.800
-            numeric_min_maxs[col] = (
-                math.floor(low * 1000) / 1000,
-                math.ceil(high * 1000) / 1000,
-            )
-        return numeric_min_maxs
+        _, train_targets = self.train_segment
+        _, test_targets = self.test_segment
+        trainer = model.fit(
+            train=train_targets.reset_index(),
+            validation=test_targets.reset_index(),
+            seed=42,
+        )
+        model.save_model((tempdir / "model").as_posix())
+        return model, trainer
+
+    # @cached_property
+    # def non_numeric_options(self) -> dict[str, list[str]]:
+    #     """Get the non-numeric options for categorical features.
+
+    #     We must perform this across the entire dataset not just splits for consistency
+    #     and to ensure we get all options.
+
+    #     TODO: In the future, this should be based off of transform instructions.
+    #     """
+    #     fparams = self.dparams[
+    #         [col for col in self.dparams.columns if col.startswith("feature.")]
+    #     ]
+    #     non_numeric_cols = fparams.select_dtypes(include=["object"]).columns
+    #     non_numeric_options = {
+    #         col: sorted(cast(pd.Series, fparams[col]).unique().tolist())
+    #         for col in non_numeric_cols
+    #     }
+    #     return non_numeric_options
+
+    # @cached_property
+    # def numeric_min_maxs(self) -> dict[str, tuple[float, float]]:
+    #     """Get the min and max for numeric features.
+
+    #     We perform this only on the training set to prevent leakage.
+
+    #     TODO: In the future, this should be based off of transform instructions.
+
+    #     Args:
+    #         params (pd.DataFrame): The parameters to get the min and max for.
+
+    #     Returns:
+    #         norm_bounds (dict[str, tuple[float, float]]): The min and max for each numeric feature.
+    #     """
+    #     params, _ = self.train_segment
+    #     fparams = params[[col for col in params.columns if col.startswith("feature.")]]
+    #     numeric_cols = fparams.select_dtypes(include=["number"]).columns
+    #     numeric_min_maxs = {
+    #         col: (float(fparams[col].min()), float(fparams[col].max()))
+    #         for col in numeric_cols
+    #     }
+    #     for col in numeric_min_maxs:
+    #         low, high = numeric_min_maxs[col]
+    #         # we want to floor the "low" value down to the nearest 0.001
+    #         # and ceil the "high" value up to the nearest 0.001
+    #         # e.g. if low is -0.799, we want to set it to -0.800
+    #         # and if high is 0.799, we want to set it to 0.800
+    #         numeric_min_maxs[col] = (
+    #             math.floor(low * 1000) / 1000,
+    #             math.ceil(high * 1000) / 1000,
+    #         )
+    #     return numeric_min_maxs
 
     # @cached_property
     # def feature_spec(self) -> RegressorInputSpec:
diff --git a/src/globi/pipelines/training.py b/src/globi/pipelines/training.py
index 5cd58cf..f12589c 100644
--- a/src/globi/pipelines/training.py
+++ b/src/globi/pipelines/training.py
@@ -1,5 +1,6 @@
 """The training pipeline."""
 
+import random
 from datetime import timedelta
 from pathlib import Path
 from typing import Literal
@@ -74,6 +75,7 @@ def train_regressor_with_cv_fold(
 ) -> FoldResult:
     """Train a regressor with cross-fold validation."""
     # DO TRAINING
+    _model, _trainer = input_spec.train_pytorch_tabular(tempdir)
     return FoldResult(columns=input_spec.data.columns.tolist())
 
 
@@ -96,12 +98,14 @@ def create_simulations(
     # STEP 1: Generate the training samples, allocate simulations
     specs = [
         DummySimulationInput(
+            weather_file="some" if random.random() < 0.5 else "other",  # noqa: S311
             a=i,
-            b=i,
+            b=-i,
+            c=random.randint(-10, 10),  # noqa: S311
             experiment_id="placeholder",
             sort_index=i,
         )
-        for i in range(10)
+        for i in range(1000)
     ]
 
     # STEP 2: Simulate the simulations using scythe
@@ -325,13 +329,21 @@ def transition_recursion(
 if __name__ == "__main__":
     from scythe.settings import ScytheStorageSettings
 
-    from globi.models.surrogate.training import ProgressiveTrainingSpec
+    from globi.models.surrogate.training import (
+        ProgressiveTrainingSpec,
+        StratificationSpec,
+    )
 
     base_run_name = "test-experiment"
     progressive_training_spec = ProgressiveTrainingSpec(
         sort_index=0,
         experiment_id="placeholder",
         gis_uri=HttpUrl("https://example.com/gis.parquet"),
+        stratification=StratificationSpec(
+            field="weather_file",
+            sampling="equal",
+            aliases=["feature.weather.file"],
+        ),
         iteration=IterationSpec(
             max_iters=4,
         ),
diff --git a/src/globi/worker/Dockerfile b/src/globi/worker/Dockerfile
index a5c6bc6..e4f1409 100644
--- a/src/globi/worker/Dockerfile
+++ b/src/globi/worker/Dockerfile
@@ -94,12 +94,12 @@ WORKDIR /code
 COPY uv.lock pyproject.toml README.md /code/
 
 # TODO: only insttall ml for certain containers by passing in a flag to the docker build command
-RUN uv sync --locked --no-install-project --extra cli --extra ml
+RUN uv sync --locked --no-install-project --extra cli --extra ml-gpu
 
 RUN uv run epi prisma generate
 
 COPY src /code/src/
 
-RUN uv sync --locked --extra cli
+RUN uv sync --locked --extra cli --extra ml-gpu
 
 CMD [ "uv", "run", "src/globi/worker/main.py" ]
diff --git a/uv.lock b/uv.lock
index e642899..45faccc 100644
--- a/uv.lock
+++ b/uv.lock
@@ -2,9 +2,21 @@ version = 1
 revision = 3
 requires-python = ">=3.12"
 resolution-markers = [
-    "python_full_version >= '3.14'",
-    "python_full_version == '3.13.*'",
-    "python_full_version < '3.13'",
+    "python_full_version >= '3.14' and sys_platform == 'linux'",
+    "python_full_version < '3.14' and sys_platform == 'linux'",
+    "python_full_version >= '3.14' and sys_platform != 'darwin' and sys_platform != 'linux'",
+    "python_full_version < '3.14' and sys_platform != 'darwin' and sys_platform != 'linux'",
+    "python_full_version >= '3.14' and sys_platform == 'darwin'",
+    "python_full_version < '3.14' and sys_platform == 'darwin'",
+]
+
+[[package]]
+name = "absl-py"
+version = "2.4.0"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/64/c7/8de93764ad66968d19329a7e0c147a2bb3c7054c554d4a119111b8f9440f/absl_py-2.4.0.tar.gz", hash = "sha256:8c6af82722b35cf71e0f4d1d47dcaebfff286e27110a99fc359349b247dfb5d4", size = 116543, upload-time = "2026-01-28T10:17:05.322Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/18/a6/907a406bb7d359e6a63f99c313846d9eec4f7e6f7437809e03aa00fa3074/absl_py-2.4.0-py3-none-any.whl", hash = "sha256:88476fd881ca8aab94ffa78b7b6c632a782ab3ba1cd19c9bd423abc4fb4cd28d", size = 135750, upload-time = "2026-01-28T10:17:04.19Z" },
 ]
 
 [[package]]
@@ -148,6 +160,12 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/78/b6/6307fbef88d9b5ee7421e68d78a9f162e0da4900bc5f5793f6d3d0e34fb8/annotated_types-0.7.0-py3-none-any.whl", hash = "sha256:1f02e8b43a8fbbc3f3e0d4f0f4bfc8131bcb4eebe8849b8e5c773f3a1c582a53", size = 13643, upload-time = "2024-05-20T21:33:24.1Z" },
 ]
 
+[[package]]
+name = "antlr4-python3-runtime"
+version = "4.9.3"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/3e/38/7859ff46355f76f8d19459005ca000b6e7012f2f1ca597746cbcd1fbfe5e/antlr4-python3-runtime-4.9.3.tar.gz", hash = "sha256:f224469b4168294902bb1efa80a8bf7855f24c99aef99cbefc1bcd3cce77881b", size = 117034, upload-time = "2021-11-06T17:52:23.524Z" }
+
 [[package]]
 name = "anyio"
 version = "4.11.0"
@@ -899,6 +917,34 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/ce/c6/c71e82e041c95ffe6a92ac707785500aa2a515a4339c2c7dd67e3c449249/cramjam-2.11.0-cp314-cp314t-win_amd64.whl", hash = "sha256:028400d699442d40dbda02f74158c73d05cb76587a12490d0bfedd958fd49188", size = 1713108, upload-time = "2025-07-27T21:24:10.147Z" },
 ]
 
+[[package]]
+name = "cuda-bindings"
+version = "12.9.4"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "cuda-pathfinder", marker = "sys_platform != 'darwin'" },
+]
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/0c/c2/65bfd79292b8ff18be4dd7f7442cea37bcbc1a228c1886f1dea515c45b67/cuda_bindings-12.9.4-cp312-cp312-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:694ba35023846625ef471257e6b5a4bc8af690f961d197d77d34b1d1db393f56", size = 11760260, upload-time = "2025-10-21T14:51:40.79Z" },
+    { url = "https://files.pythonhosted.org/packages/a9/c1/dabe88f52c3e3760d861401bb994df08f672ec893b8f7592dc91626adcf3/cuda_bindings-12.9.4-cp312-cp312-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:fda147a344e8eaeca0c6ff113d2851ffca8f7dfc0a6c932374ee5c47caa649c8", size = 12151019, upload-time = "2025-10-21T14:51:43.167Z" },
+    { url = "https://files.pythonhosted.org/packages/05/8b/b4b2d1c7775fa403b64333e720cfcfccef8dcb9cdeb99947061ca5a77628/cuda_bindings-12.9.4-cp313-cp313-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:cf8bfaedc238f3b115d957d1fd6562b7e8435ba57f6d0e2f87d0e7149ccb2da5", size = 11570071, upload-time = "2025-10-21T14:51:47.472Z" },
+    { url = "https://files.pythonhosted.org/packages/63/56/e465c31dc9111be3441a9ba7df1941fe98f4aa6e71e8788a3fb4534ce24d/cuda_bindings-12.9.4-cp313-cp313-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:32bdc5a76906be4c61eb98f546a6786c5773a881f3b166486449b5d141e4a39f", size = 11906628, upload-time = "2025-10-21T14:51:49.905Z" },
+    { url = "https://files.pythonhosted.org/packages/ec/07/6aff13bc1e977e35aaa6b22f52b172e2890c608c6db22438cf7ed2bf43a6/cuda_bindings-12.9.4-cp313-cp313t-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:3adf4958dcf68ae7801a59b73fb00a8b37f8d0595060d66ceae111b1002de38d", size = 11566797, upload-time = "2025-10-21T14:51:54.581Z" },
+    { url = "https://files.pythonhosted.org/packages/a3/84/1e6be415e37478070aeeee5884c2022713c1ecc735e6d82d744de0252eee/cuda_bindings-12.9.4-cp313-cp313t-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:56e0043c457a99ac473ddc926fe0dc4046694d99caef633e92601ab52cbe17eb", size = 11925991, upload-time = "2025-10-21T14:51:56.535Z" },
+    { url = "https://files.pythonhosted.org/packages/1e/b5/96a6696e20c4ffd2b327f54c7d0fde2259bdb998d045c25d5dedbbe30290/cuda_bindings-12.9.4-cp314-cp314-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:1f53a7f453d4b2643d8663d036bafe29b5ba89eb904c133180f295df6dc151e5", size = 11624530, upload-time = "2025-10-21T14:52:01.539Z" },
+    { url = "https://files.pythonhosted.org/packages/d1/af/6dfd8f2ed90b1d4719bc053ff8940e494640fe4212dc3dd72f383e4992da/cuda_bindings-12.9.4-cp314-cp314-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:8b72ee72a9cc1b531db31eebaaee5c69a8ec3500e32c6933f2d3b15297b53686", size = 11922703, upload-time = "2025-10-21T14:52:03.585Z" },
+    { url = "https://files.pythonhosted.org/packages/39/73/d2fc40c043bac699c3880bf88d3cebe9d88410cd043795382826c93a89f0/cuda_bindings-12.9.4-cp314-cp314t-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:20f2699d61d724de3eb3f3369d57e2b245f93085cab44fd37c3bea036cea1a6f", size = 11565056, upload-time = "2025-10-21T14:52:08.338Z" },
+    { url = "https://files.pythonhosted.org/packages/6c/19/90ac264acc00f6df8a49378eedec9fd2db3061bf9263bf9f39fd3d8377c3/cuda_bindings-12.9.4-cp314-cp314t-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:d80bffc357df9988dca279734bc9674c3934a654cab10cadeed27ce17d8635ee", size = 11924658, upload-time = "2025-10-21T14:52:10.411Z" },
+]
+
+[[package]]
+name = "cuda-pathfinder"
+version = "1.4.1"
+source = { registry = "https://pypi.org/simple" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/07/02/59a5bc738a09def0b49aea0e460bdf97f65206d0d041246147cf6207e69c/cuda_pathfinder-1.4.1-py3-none-any.whl", hash = "sha256:40793006082de88e0950753655e55558a446bed9a7d9d0bcb48b2506d50ed82a", size = 43903, upload-time = "2026-03-06T21:05:24.372Z" },
+]
+
 [[package]]
 name = "cycler"
 version = "0.12.1"
@@ -956,6 +1002,15 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/33/6b/e0547afaf41bf2c42e52430072fa5658766e3d65bd4b03a563d1b6336f57/distlib-0.4.0-py2.py3-none-any.whl", hash = "sha256:9659f7d87e46584a30b5780e43ac7a2143098441670ff0a49d5f9034c54a6c16", size = 469047, upload-time = "2025-07-17T16:51:58.613Z" },
 ]
 
+[[package]]
+name = "einops"
+version = "0.8.2"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/2c/77/850bef8d72ffb9219f0b1aac23fbc1bf7d038ee6ea666f331fa273031aa2/einops-0.8.2.tar.gz", hash = "sha256:609da665570e5e265e27283aab09e7f279ade90c4f01bcfca111f3d3e13f2827", size = 56261, upload-time = "2026-01-26T04:13:17.638Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/2a/09/f8d8f8f31e4483c10a906437b4ce31bdf3d6d417b73fe33f1a8b59e34228/einops-0.8.2-py3-none-any.whl", hash = "sha256:54058201ac7087911181bfec4af6091bb59380360f069276601256a76af08193", size = 65638, upload-time = "2026-01-26T04:13:18.546Z" },
+]
+
 [[package]]
 name = "energy-pandas"
 version = "0.4.1"
@@ -1292,6 +1347,11 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/47/71/70db47e4f6ce3e5c37a607355f80da8860a33226be640226ac52cb05ef2e/fsspec-2025.9.0-py3-none-any.whl", hash = "sha256:530dc2a2af60a414a832059574df4a6e10cce927f6f4a78209390fe38955cfb7", size = 199289, upload-time = "2025-09-02T19:10:47.708Z" },
 ]
 
+[package.optional-dependencies]
+http = [
+    { name = "aiohttp" },
+]
+
 [[package]]
 name = "future"
 version = "1.0.0"
@@ -1379,8 +1439,13 @@ cli = [
     { name = "click" },
     { name = "xlsxwriter" },
 ]
-ml = [
+ml-gpu = [
     { name = "lightgbm" },
+    { name = "pytorch-tabular" },
+    { name = "tensorboard" },
+    { name = "torch", version = "2.10.0", source = { registry = "https://pypi.org/simple" }, marker = "sys_platform == 'darwin'" },
+    { name = "torch", version = "2.10.0+cu128", source = { registry = "https://download.pytorch.org/whl/cu128" }, marker = "sys_platform != 'darwin'" },
+    { name = "wandb" },
     { name = "xgboost" },
 ]
 visualization = [
@@ -1425,7 +1490,7 @@ requires-dist = [
     { name = "folium", marker = "extra == 'visualization'", specifier = ">=0.15.0" },
     { name = "geopandas", specifier = ">=0.14.0" },
     { name = "ladybug-core", specifier = ">=0.44.29" },
-    { name = "lightgbm", marker = "extra == 'ml'", specifier = ">=4.6.0" },
+    { name = "lightgbm", marker = "extra == 'ml-gpu'", specifier = ">=4.6.0" },
     { name = "matplotlib", marker = "extra == 'visualization'", specifier = ">=3.8.0" },
     { name = "numpy", specifier = ">=1.26.0" },
     { name = "pandas", specifier = ">=2.1.0" },
@@ -1433,6 +1498,7 @@ requires-dist = [
     { name = "plotly", marker = "extra == 'visualization'", specifier = ">=5.18.0" },
     { name = "pydantic", specifier = ">=2.11,<3" },
     { name = "pyproj", specifier = ">=3.6.0" },
+    { name = "pytorch-tabular", marker = "extra == 'ml-gpu'", specifier = ">=1.2.0" },
     { name = "rasterio", marker = "extra == 'visualization'", specifier = ">=1.3.9" },
     { name = "scikit-learn", specifier = ">=1.3.0" },
     { name = "scipy", specifier = ">=1.11.0,<1.15" },
@@ -1440,10 +1506,14 @@ requires-dist = [
     { name = "seaborn", marker = "extra == 'visualization'", specifier = ">=0.13.0" },
     { name = "shapely", specifier = ">=2.0.0" },
     { name = "streamlit", marker = "extra == 'visualization'", specifier = ">=1.28.0" },
-    { name = "xgboost", marker = "extra == 'ml'", specifier = ">=3.2.0" },
+    { name = "tensorboard", marker = "extra == 'ml-gpu'", specifier = ">=2.20.0" },
+    { name = "torch", marker = "sys_platform == 'darwin' and extra == 'ml-gpu'", specifier = ">=2.5.0", index = "https://pypi.org/simple", conflict = { package = "globi", extra = "ml-gpu" } },
+    { name = "torch", marker = "sys_platform != 'darwin' and extra == 'ml-gpu'", specifier = ">=2.5.0", index = "https://download.pytorch.org/whl/cu128", conflict = { package = "globi", extra = "ml-gpu" } },
+    { name = "wandb", marker = "extra == 'ml-gpu'", specifier = ">=0.25.0" },
+    { name = "xgboost", marker = "extra == 'ml-gpu'", specifier = ">=3.2.0" },
     { name = "xlsxwriter", marker = "extra == 'cli'", specifier = ">=3.2.9" },
 ]
-provides-extras = ["visualization", "ml", "cli"]
+provides-extras = ["visualization", "ml-gpu", "cli"]
 
 [package.metadata.requires-dev]
 dev = [
@@ -1982,7 +2052,7 @@ dependencies = [
     { name = "nbformat" },
     { name = "packaging" },
     { name = "prometheus-client" },
-    { name = "pywinpty", marker = "os_name == 'nt'" },
+    { name = "pywinpty", marker = "os_name == 'nt' and sys_platform != 'darwin' and sys_platform != 'linux'" },
     { name = "pyzmq" },
     { name = "send2trash" },
     { name = "terminado" },
@@ -2000,7 +2070,7 @@ name = "jupyter-server-terminals"
 version = "0.5.4"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
-    { name = "pywinpty", marker = "os_name == 'nt'" },
+    { name = "pywinpty", marker = "os_name == 'nt' and sys_platform != 'darwin' and sys_platform != 'linux'" },
     { name = "terminado" },
 ]
 sdist = { url = "https://files.pythonhosted.org/packages/f4/a7/bcd0a9b0cbba88986fe944aaaf91bfda603e5a50bda8ed15123f381a3b2f/jupyter_server_terminals-0.5.4.tar.gz", hash = "sha256:bbda128ed41d0be9020349f9f1f2a4ab9952a73ed5f5ac9f1419794761fb87f5", size = 31770, upload-time = "2026-01-14T16:53:20.213Z" }
@@ -2179,6 +2249,19 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/5e/23/f8b28ca248bb629b9e08f877dd2965d1994e1674a03d67cd10c5246da248/lightgbm-4.6.0-py3-none-win_amd64.whl", hash = "sha256:37089ee95664b6550a7189d887dbf098e3eadab03537e411f52c63c121e3ba4b", size = 1451509, upload-time = "2025-02-15T04:03:01.515Z" },
 ]
 
+[[package]]
+name = "lightning-utilities"
+version = "0.15.3"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "packaging" },
+    { name = "typing-extensions" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/f1/45/7fa8f56b17dc0f0a41ec70dd307ecd6787254483549843bef4c30ab5adce/lightning_utilities-0.15.3.tar.gz", hash = "sha256:792ae0204c79f6859721ac7f386c237a33b0ed06ba775009cb894e010a842033", size = 33553, upload-time = "2026-02-22T14:48:53.348Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/25/f4/ead6e0e37209b07c9baa3e984ccdb0348ca370b77cea3aaea8ddbb097e00/lightning_utilities-0.15.3-py3-none-any.whl", hash = "sha256:6c55f1bee70084a1cbeaa41ada96e4b3a0fea5909e844dd335bd80f5a73c5f91", size = 31906, upload-time = "2026-02-22T14:48:52.488Z" },
+]
+
 [[package]]
 name = "littleutils"
 version = "0.2.4"
@@ -2259,6 +2342,18 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/59/1b/6ef961f543593969d25b2afe57a3564200280528caa9bd1082eecdd7b3bc/markdown-3.10.1-py3-none-any.whl", hash = "sha256:867d788939fe33e4b736426f5b9f651ad0c0ae0ecf89df0ca5d1176c70812fe3", size = 107684, upload-time = "2026-01-21T18:09:27.203Z" },
 ]
 
+[[package]]
+name = "markdown-it-py"
+version = "4.0.0"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "mdurl" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/5b/f5/4ec618ed16cc4f8fb3b701563655a69816155e79e24a17b651541804721d/markdown_it_py-4.0.0.tar.gz", hash = "sha256:cb0a2b4aa34f932c007117b194e945bd74e0ec24133ceb5bac59009cda1cb9f3", size = 73070, upload-time = "2025-08-11T12:57:52.854Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/94/54/e7d793b573f298e1c9013b8c4dade17d481164aa517d1d7148619c2cedbf/markdown_it_py-4.0.0-py3-none-any.whl", hash = "sha256:87327c59b172c5011896038353a81343b6754500a08cd7a4973bb48c6d578147", size = 87321, upload-time = "2025-08-11T12:57:51.923Z" },
+]
+
 [[package]]
 name = "markupsafe"
 version = "3.0.3"
@@ -2388,6 +2483,15 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/af/33/ee4519fa02ed11a94aef9559552f3b17bb863f2ecfe1a35dc7f548cde231/matplotlib_inline-0.2.1-py3-none-any.whl", hash = "sha256:d56ce5156ba6085e00a9d54fead6ed29a9c47e215cd1bba2e976ef39f5710a76", size = 9516, upload-time = "2025-10-23T09:00:20.675Z" },
 ]
 
+[[package]]
+name = "mdurl"
+version = "0.1.2"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/d6/54/cfe61301667036ec958cb99bd3efefba235e65cdeb9c84d24a8293ba1d90/mdurl-0.1.2.tar.gz", hash = "sha256:bb413d29f5eea38f31dd4754dd7377d4465116fb207585f97bf925588687c1ba", size = 8729, upload-time = "2022-08-14T12:40:10.846Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/b3/38/89ba8ad64ae25be8de66a6d463314cf1eb366222074cfda9ee839c56a4b4/mdurl-0.1.2-py3-none-any.whl", hash = "sha256:84008a41e51615a49fc9966191ff91509e3c40b939176e643fd50a5c2196b8f8", size = 9979, upload-time = "2022-08-14T12:40:09.779Z" },
+]
+
 [[package]]
 name = "mergedeep"
 version = "1.3.4"
@@ -2538,6 +2642,15 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/98/5c/2597cef67b6947b15c47f8dba967a0baf19fbdfdc86f6e4a8ba7af8b581a/mkdocstrings_python-1.19.0-py3-none-any.whl", hash = "sha256:395c1032af8f005234170575cc0c5d4d20980846623b623b35594281be4a3059", size = 143417, upload-time = "2025-11-10T13:30:54.164Z" },
 ]
 
+[[package]]
+name = "mpmath"
+version = "1.3.0"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/e0/47/dd32fa426cc72114383ac549964eecb20ecfd886d1e5ccf5340b55b02f57/mpmath-1.3.0.tar.gz", hash = "sha256:7a28eb2a9774d00c7bc92411c19a89209d5da7c4c9a9e227be8330a23a25b91f", size = 508106, upload-time = "2023-03-07T16:47:11.061Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/43/e3/7d92a15f894aa0c9c4b49b8ee9ac9850d6e63b03c9c32c0367a13ae62209/mpmath-1.3.0-py3-none-any.whl", hash = "sha256:a0b2b9fe80bbcd81a6647ff13108738cfb482d481d826cc0e02f5b35e5c88d2c", size = 536198, upload-time = "2023-03-07T16:47:09.197Z" },
+]
+
 [[package]]
 name = "msgpack"
 version = "1.1.2"
@@ -3015,13 +3128,166 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/67/0e/35082d13c09c02c011cf21570543d202ad929d961c02a147493cb0c2bdf5/numpy-2.2.6-cp313-cp313t-win_amd64.whl", hash = "sha256:6031dd6dfecc0cf9f668681a37648373bddd6421fff6c66ec1624eed0180ee06", size = 12771374, upload-time = "2025-05-17T21:43:35.479Z" },
 ]
 
+[[package]]
+name = "nvidia-cublas-cu12"
+version = "12.8.4.1"
+source = { registry = "https://pypi.org/simple" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/29/99/db44d685f0e257ff0e213ade1964fc459b4a690a73293220e98feb3307cf/nvidia_cublas_cu12-12.8.4.1-py3-none-manylinux_2_27_aarch64.whl", hash = "sha256:b86f6dd8935884615a0683b663891d43781b819ac4f2ba2b0c9604676af346d0", size = 590537124, upload-time = "2025-03-07T01:43:53.556Z" },
+    { url = "https://files.pythonhosted.org/packages/dc/61/e24b560ab2e2eaeb3c839129175fb330dfcfc29e5203196e5541a4c44682/nvidia_cublas_cu12-12.8.4.1-py3-none-manylinux_2_27_x86_64.whl", hash = "sha256:8ac4e771d5a348c551b2a426eda6193c19aa630236b418086020df5ba9667142", size = 594346921, upload-time = "2025-03-07T01:44:31.254Z" },
+]
+
+[[package]]
+name = "nvidia-cuda-cupti-cu12"
+version = "12.8.90"
+source = { registry = "https://pypi.org/simple" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/d5/1f/b3bd73445e5cb342727fd24fe1f7b748f690b460acadc27ea22f904502c8/nvidia_cuda_cupti_cu12-12.8.90-py3-none-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:4412396548808ddfed3f17a467b104ba7751e6b58678a4b840675c56d21cf7ed", size = 9533318, upload-time = "2025-03-07T01:40:10.421Z" },
+    { url = "https://files.pythonhosted.org/packages/f8/02/2adcaa145158bf1a8295d83591d22e4103dbfd821bcaf6f3f53151ca4ffa/nvidia_cuda_cupti_cu12-12.8.90-py3-none-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:ea0cb07ebda26bb9b29ba82cda34849e73c166c18162d3913575b0c9db9a6182", size = 10248621, upload-time = "2025-03-07T01:40:21.213Z" },
+]
+
+[[package]]
+name = "nvidia-cuda-nvrtc-cu12"
+version = "12.8.93"
+source = { registry = "https://pypi.org/simple" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/05/6b/32f747947df2da6994e999492ab306a903659555dddc0fbdeb9d71f75e52/nvidia_cuda_nvrtc_cu12-12.8.93-py3-none-manylinux2010_x86_64.manylinux_2_12_x86_64.whl", hash = "sha256:a7756528852ef889772a84c6cd89d41dfa74667e24cca16bb31f8f061e3e9994", size = 88040029, upload-time = "2025-03-07T01:42:13.562Z" },
+    { url = "https://files.pythonhosted.org/packages/eb/d1/e50d0acaab360482034b84b6e27ee83c6738f7d32182b987f9c7a4e32962/nvidia_cuda_nvrtc_cu12-12.8.93-py3-none-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:fc1fec1e1637854b4c0a65fb9a8346b51dd9ee69e61ebaccc82058441f15bce8", size = 43106076, upload-time = "2025-03-07T01:41:59.817Z" },
+]
+
+[[package]]
+name = "nvidia-cuda-runtime-cu12"
+version = "12.8.90"
+source = { registry = "https://pypi.org/simple" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/7c/75/f865a3b236e4647605ea34cc450900854ba123834a5f1598e160b9530c3a/nvidia_cuda_runtime_cu12-12.8.90-py3-none-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:52bf7bbee900262ffefe5e9d5a2a69a30d97e2bc5bb6cc866688caa976966e3d", size = 965265, upload-time = "2025-03-07T01:39:43.533Z" },
+    { url = "https://files.pythonhosted.org/packages/0d/9b/a997b638fcd068ad6e4d53b8551a7d30fe8b404d6f1804abf1df69838932/nvidia_cuda_runtime_cu12-12.8.90-py3-none-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:adade8dcbd0edf427b7204d480d6066d33902cab2a4707dcfc48a2d0fd44ab90", size = 954765, upload-time = "2025-03-07T01:40:01.615Z" },
+]
+
+[[package]]
+name = "nvidia-cudnn-cu12"
+version = "9.10.2.21"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "nvidia-cublas-cu12", marker = "sys_platform != 'darwin'" },
+]
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/fa/41/e79269ce215c857c935fd86bcfe91a451a584dfc27f1e068f568b9ad1ab7/nvidia_cudnn_cu12-9.10.2.21-py3-none-manylinux_2_27_aarch64.whl", hash = "sha256:c9132cc3f8958447b4910a1720036d9eff5928cc3179b0a51fb6d167c6cc87d8", size = 705026878, upload-time = "2025-06-06T21:52:51.348Z" },
+    { url = "https://files.pythonhosted.org/packages/ba/51/e123d997aa098c61d029f76663dedbfb9bc8dcf8c60cbd6adbe42f76d049/nvidia_cudnn_cu12-9.10.2.21-py3-none-manylinux_2_27_x86_64.whl", hash = "sha256:949452be657fa16687d0930933f032835951ef0892b37d2d53824d1a84dc97a8", size = 706758467, upload-time = "2025-06-06T21:54:08.597Z" },
+]
+
+[[package]]
+name = "nvidia-cufft-cu12"
+version = "11.3.3.83"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "nvidia-nvjitlink-cu12", marker = "sys_platform != 'darwin'" },
+]
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/60/bc/7771846d3a0272026c416fbb7e5f4c1f146d6d80704534d0b187dd6f4800/nvidia_cufft_cu12-11.3.3.83-py3-none-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:848ef7224d6305cdb2a4df928759dca7b1201874787083b6e7550dd6765ce69a", size = 193109211, upload-time = "2025-03-07T01:44:56.873Z" },
+    { url = "https://files.pythonhosted.org/packages/1f/13/ee4e00f30e676b66ae65b4f08cb5bcbb8392c03f54f2d5413ea99a5d1c80/nvidia_cufft_cu12-11.3.3.83-py3-none-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:4d2dd21ec0b88cf61b62e6b43564355e5222e4a3fb394cac0db101f2dd0d4f74", size = 193118695, upload-time = "2025-03-07T01:45:27.821Z" },
+]
+
+[[package]]
+name = "nvidia-cufile-cu12"
+version = "1.13.1.3"
+source = { registry = "https://pypi.org/simple" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/bb/fe/1bcba1dfbfb8d01be8d93f07bfc502c93fa23afa6fd5ab3fc7c1df71038a/nvidia_cufile_cu12-1.13.1.3-py3-none-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:1d069003be650e131b21c932ec3d8969c1715379251f8d23a1860554b1cb24fc", size = 1197834, upload-time = "2025-03-07T01:45:50.723Z" },
+    { url = "https://files.pythonhosted.org/packages/1e/f5/5607710447a6fe9fd9b3283956fceeee8a06cda1d2f56ce31371f595db2a/nvidia_cufile_cu12-1.13.1.3-py3-none-manylinux_2_27_aarch64.whl", hash = "sha256:4beb6d4cce47c1a0f1013d72e02b0994730359e17801d395bdcbf20cfb3bb00a", size = 1120705, upload-time = "2025-03-07T01:45:41.434Z" },
+]
+
+[[package]]
+name = "nvidia-curand-cu12"
+version = "10.3.9.90"
+source = { registry = "https://pypi.org/simple" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/45/5e/92aa15eca622a388b80fbf8375d4760738df6285b1e92c43d37390a33a9a/nvidia_curand_cu12-10.3.9.90-py3-none-manylinux_2_27_aarch64.whl", hash = "sha256:dfab99248034673b779bc6decafdc3404a8a6f502462201f2f31f11354204acd", size = 63625754, upload-time = "2025-03-07T01:46:10.735Z" },
+    { url = "https://files.pythonhosted.org/packages/fb/aa/6584b56dc84ebe9cf93226a5cde4d99080c8e90ab40f0c27bda7a0f29aa1/nvidia_curand_cu12-10.3.9.90-py3-none-manylinux_2_27_x86_64.whl", hash = "sha256:b32331d4f4df5d6eefa0554c565b626c7216f87a06a4f56fab27c3b68a830ec9", size = 63619976, upload-time = "2025-03-07T01:46:23.323Z" },
+]
+
+[[package]]
+name = "nvidia-cusolver-cu12"
+version = "11.7.3.90"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "nvidia-cublas-cu12", marker = "sys_platform != 'darwin'" },
+    { name = "nvidia-cusparse-cu12", marker = "sys_platform != 'darwin'" },
+    { name = "nvidia-nvjitlink-cu12", marker = "sys_platform != 'darwin'" },
+]
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/c8/32/f7cd6ce8a7690544d084ea21c26e910a97e077c9b7f07bf5de623ee19981/nvidia_cusolver_cu12-11.7.3.90-py3-none-manylinux_2_27_aarch64.whl", hash = "sha256:db9ed69dbef9715071232caa9b69c52ac7de3a95773c2db65bdba85916e4e5c0", size = 267229841, upload-time = "2025-03-07T01:46:54.356Z" },
+    { url = "https://files.pythonhosted.org/packages/85/48/9a13d2975803e8cf2777d5ed57b87a0b6ca2cc795f9a4f59796a910bfb80/nvidia_cusolver_cu12-11.7.3.90-py3-none-manylinux_2_27_x86_64.whl", hash = "sha256:4376c11ad263152bd50ea295c05370360776f8c3427b30991df774f9fb26c450", size = 267506905, upload-time = "2025-03-07T01:47:16.273Z" },
+]
+
+[[package]]
+name = "nvidia-cusparse-cu12"
+version = "12.5.8.93"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "nvidia-nvjitlink-cu12", marker = "sys_platform != 'darwin'" },
+]
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/bc/f7/cd777c4109681367721b00a106f491e0d0d15cfa1fd59672ce580ce42a97/nvidia_cusparse_cu12-12.5.8.93-py3-none-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:9b6c161cb130be1a07a27ea6923df8141f3c295852f4b260c65f18f3e0a091dc", size = 288117129, upload-time = "2025-03-07T01:47:40.407Z" },
+    { url = "https://files.pythonhosted.org/packages/c2/f5/e1854cb2f2bcd4280c44736c93550cc300ff4b8c95ebe370d0aa7d2b473d/nvidia_cusparse_cu12-12.5.8.93-py3-none-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:1ec05d76bbbd8b61b06a80e1eaf8cf4959c3d4ce8e711b65ebd0443bb0ebb13b", size = 288216466, upload-time = "2025-03-07T01:48:13.779Z" },
+]
+
+[[package]]
+name = "nvidia-cusparselt-cu12"
+version = "0.7.1"
+source = { registry = "https://pypi.org/simple" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/73/b9/598f6ff36faaece4b3c50d26f50e38661499ff34346f00e057760b35cc9d/nvidia_cusparselt_cu12-0.7.1-py3-none-manylinux2014_aarch64.whl", hash = "sha256:8878dce784d0fac90131b6817b607e803c36e629ba34dc5b433471382196b6a5", size = 283835557, upload-time = "2025-02-26T00:16:54.265Z" },
+    { url = "https://files.pythonhosted.org/packages/56/79/12978b96bd44274fe38b5dde5cfb660b1d114f70a65ef962bcbbed99b549/nvidia_cusparselt_cu12-0.7.1-py3-none-manylinux2014_x86_64.whl", hash = "sha256:f1bb701d6b930d5a7cea44c19ceb973311500847f81b634d802b7b539dc55623", size = 287193691, upload-time = "2025-02-26T00:15:44.104Z" },
+]
+
 [[package]]
 name = "nvidia-nccl-cu12"
-version = "2.29.7"
+version = "2.27.5"
 source = { registry = "https://pypi.org/simple" }
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/20/cc/f48875411d1f176bce58e6343fd5d4131fc1db5420719ff25944bdc006c6/nvidia_nccl_cu12-2.29.7-py3-none-manylinux_2_18_aarch64.whl", hash = "sha256:0cf032ee22b560447daf0456108a75e32bd74a4de6c6b64725637a359fa48cd8", size = 293563644, upload-time = "2026-03-03T05:34:46.166Z" },
-    { url = "https://files.pythonhosted.org/packages/31/1e/9e366f36efc550f07d6737f199e3f6bffafdf28795d007f10a77dd274f5c/nvidia_nccl_cu12-2.29.7-py3-none-manylinux_2_18_x86_64.whl", hash = "sha256:ecd0a012051abc20c1aa87328841efa8cade3ced65803046e38c2f03c0891fea", size = 293633942, upload-time = "2026-03-03T05:37:05.625Z" },
+    { url = "https://files.pythonhosted.org/packages/bb/1c/857979db0ef194ca5e21478a0612bcdbbe59458d7694361882279947b349/nvidia_nccl_cu12-2.27.5-py3-none-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:31432ad4d1fb1004eb0c56203dc9bc2178a1ba69d1d9e02d64a6938ab5e40e7a", size = 322400625, upload-time = "2025-06-26T04:11:04.496Z" },
+    { url = "https://files.pythonhosted.org/packages/6e/89/f7a07dc961b60645dbbf42e80f2bc85ade7feb9a491b11a1e973aa00071f/nvidia_nccl_cu12-2.27.5-py3-none-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:ad730cf15cb5d25fe849c6e6ca9eb5b76db16a80f13f425ac68d8e2e55624457", size = 322348229, upload-time = "2025-06-26T04:11:28.385Z" },
+]
+
+[[package]]
+name = "nvidia-nvjitlink-cu12"
+version = "12.8.93"
+source = { registry = "https://pypi.org/simple" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/f6/74/86a07f1d0f42998ca31312f998bd3b9a7eff7f52378f4f270c8679c77fb9/nvidia_nvjitlink_cu12-12.8.93-py3-none-manylinux2010_x86_64.manylinux_2_12_x86_64.whl", hash = "sha256:81ff63371a7ebd6e6451970684f916be2eab07321b73c9d244dc2b4da7f73b88", size = 39254836, upload-time = "2025-03-07T01:49:55.661Z" },
+    { url = "https://files.pythonhosted.org/packages/2a/a2/8cee5da30d13430e87bf99bb33455d2724d0a4a9cb5d7926d80ccb96d008/nvidia_nvjitlink_cu12-12.8.93-py3-none-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:adccd7161ace7261e01bb91e44e88da350895c270d23f744f0820c818b7229e7", size = 38386204, upload-time = "2025-03-07T01:49:43.612Z" },
+]
+
+[[package]]
+name = "nvidia-nvshmem-cu12"
+version = "3.4.5"
+source = { registry = "https://pypi.org/simple" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/1d/6a/03aa43cc9bd3ad91553a88b5f6fb25ed6a3752ae86ce2180221962bc2aa5/nvidia_nvshmem_cu12-3.4.5-py3-none-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:0b48363fc6964dede448029434c6abed6c5e37f823cb43c3bcde7ecfc0457e15", size = 138936938, upload-time = "2025-09-06T00:32:05.589Z" },
+    { url = "https://files.pythonhosted.org/packages/b5/09/6ea3ea725f82e1e76684f0708bbedd871fc96da89945adeba65c3835a64c/nvidia_nvshmem_cu12-3.4.5-py3-none-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:042f2500f24c021db8a06c5eec2539027d57460e1c1a762055a6554f72c369bd", size = 139103095, upload-time = "2025-09-06T00:32:31.266Z" },
+]
+
+[[package]]
+name = "nvidia-nvtx-cu12"
+version = "12.8.90"
+source = { registry = "https://pypi.org/simple" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/10/c0/1b303feea90d296f6176f32a2a70b5ef230f9bdeb3a72bddb0dc922dc137/nvidia_nvtx_cu12-12.8.90-py3-none-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:d7ad891da111ebafbf7e015d34879f7112832fc239ff0d7d776b6cb685274615", size = 91161, upload-time = "2025-03-07T01:42:23.922Z" },
+    { url = "https://files.pythonhosted.org/packages/a2/eb/86626c1bbc2edb86323022371c39aa48df6fd8b0a1647bc274577f72e90b/nvidia_nvtx_cu12-12.8.90-py3-none-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:5b17e2001cc0d751a5bc2c6ec6d26ad95913324a4adb86788c944f8ce9ba441f", size = 89954, upload-time = "2025-03-07T01:42:44.131Z" },
+]
+
+[[package]]
+name = "omegaconf"
+version = "2.3.0"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "antlr4-python3-runtime" },
+    { name = "pyyaml" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/09/48/6388f1bb9da707110532cb70ec4d2822858ddfb44f1cdf1233c20a80ea4b/omegaconf-2.3.0.tar.gz", hash = "sha256:d5d4b6d29955cc50ad50c46dc269bcd92c6e00f5f90d23ab5fee7bfca4ba4cc7", size = 3298120, upload-time = "2022-12-08T20:59:22.753Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/e3/94/1843518e420fa3ed6919835845df698c7e27e183cb997394e4a670973a65/omegaconf-2.3.0-py3-none-any.whl", hash = "sha256:7b4df175cdb08ba400f45cae3bdcae7ba8365db4d165fc65fd04b050ab63b46b", size = 79500, upload-time = "2022-12-08T20:59:19.686Z" },
 ]
 
 [[package]]
@@ -3899,6 +4165,49 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/51/e5/fecf13f06e5e5f67e8837d777d1bc43fac0ed2b77a676804df5c34744727/python_json_logger-4.0.0-py3-none-any.whl", hash = "sha256:af09c9daf6a813aa4cc7180395f50f2a9e5fa056034c9953aec92e381c5ba1e2", size = 15548, upload-time = "2025-10-06T04:15:17.553Z" },
 ]
 
+[[package]]
+name = "pytorch-lightning"
+version = "2.6.1"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "fsspec", extra = ["http"] },
+    { name = "lightning-utilities" },
+    { name = "packaging" },
+    { name = "pyyaml" },
+    { name = "torch", version = "2.10.0", source = { registry = "https://pypi.org/simple" }, marker = "sys_platform == 'darwin'" },
+    { name = "torch", version = "2.10.0+cu128", source = { registry = "https://download.pytorch.org/whl/cu128" }, marker = "sys_platform != 'darwin'" },
+    { name = "torchmetrics" },
+    { name = "tqdm" },
+    { name = "typing-extensions" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/8b/ac/ebd5f6f58691cbd4f73836e43e1727f3814311b960c41f88e259606ca2b2/pytorch_lightning-2.6.1.tar.gz", hash = "sha256:ba08f8901cf226fcca473046ad9346f414e99117762dc869c76e650d5b3d7bdc", size = 665563, upload-time = "2026-01-30T14:59:11.636Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/0e/93/c8c361bf0a2fe50f828f32def460e8b8a14b93955d3fd302b1a9b63b19e4/pytorch_lightning-2.6.1-py3-none-any.whl", hash = "sha256:1f8118567ec829e3055f16cf1aa320883a86a47c836951bfd9dcfa34ec7ffd59", size = 857273, upload-time = "2026-01-30T14:59:10.141Z" },
+]
+
+[[package]]
+name = "pytorch-tabular"
+version = "1.2.0"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "einops" },
+    { name = "numpy" },
+    { name = "omegaconf" },
+    { name = "pandas" },
+    { name = "pytorch-lightning" },
+    { name = "rich" },
+    { name = "scikit-base" },
+    { name = "scikit-learn" },
+    { name = "scipy" },
+    { name = "torch", version = "2.10.0", source = { registry = "https://pypi.org/simple" }, marker = "sys_platform == 'darwin'" },
+    { name = "torch", version = "2.10.0+cu128", source = { registry = "https://download.pytorch.org/whl/cu128" }, marker = "sys_platform != 'darwin'" },
+    { name = "torchmetrics" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/0b/f2/823de16d6a461504f4ed8e4a555d6ce356e5f81e6525d95e2b64895ec94f/pytorch_tabular-1.2.0.tar.gz", hash = "sha256:1b96b576eb3de443840b313d0b298293eaf83dcfdbba53ed8974b76d1351b821", size = 2312825, upload-time = "2026-01-26T21:48:22.577Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/6f/c9/1e01c682e2ad7132bc1943d8d367c96f241bf85679e76d66eb0c4e4cbde9/pytorch_tabular-1.2.0-py3-none-any.whl", hash = "sha256:0a59f8a2304856b3d1e905f7b66153ebc65df1a6a017f2c8a13a29f62dc95b26", size = 165800, upload-time = "2026-01-26T21:48:21.195Z" },
+]
+
 [[package]]
 name = "pytz"
 version = "2025.2"
@@ -4077,6 +4386,19 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/1e/db/4254e3eabe8020b458f1a747140d32277ec7a271daf1d235b70dc0b4e6e3/requests-2.32.5-py3-none-any.whl", hash = "sha256:2462f94637a34fd532264295e186976db0f5d453d1cdd31473c85a6a161affb6", size = 64738, upload-time = "2025-08-18T20:46:00.542Z" },
 ]
 
+[[package]]
+name = "rich"
+version = "14.3.3"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "markdown-it-py" },
+    { name = "pygments" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/b3/c6/f3b320c27991c46f43ee9d856302c70dc2d0fb2dba4842ff739d5f46b393/rich-14.3.3.tar.gz", hash = "sha256:b8daa0b9e4eef54dd8cf7c86c03713f53241884e814f4e2f5fb342fe520f639b", size = 230582, upload-time = "2026-02-19T17:23:12.474Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/14/25/b208c5683343959b670dc001595f2f3737e051da617f66c31f7c4fa93abc/rich-14.3.3-py3-none-any.whl", hash = "sha256:793431c1f8619afa7d3b52b2cdec859562b950ea0d4b6b505397612db8d5362d", size = 310458, upload-time = "2026-02-19T17:23:13.732Z" },
+]
+
 [[package]]
 name = "rpds-py"
 version = "0.28.0"
@@ -4196,6 +4518,15 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/48/f0/ae7ca09223a81a1d890b2557186ea015f6e0502e9b8cb8e1813f1d8cfa4e/s3transfer-0.14.0-py3-none-any.whl", hash = "sha256:ea3b790c7077558ed1f02a3072fb3cb992bbbd253392f4b6e9e8976941c7d456", size = 85712, upload-time = "2025-09-09T19:23:30.041Z" },
 ]
 
+[[package]]
+name = "scikit-base"
+version = "0.13.1"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/56/a8/610f99f01f326178b8a7347db2ede654b42548e9697b516480cc081e344d/scikit_base-0.13.1.tar.gz", hash = "sha256:169e5427233f7237b38c7d858bf07b8a86bbf59feccf0708e26dad4ac312c593", size = 134482, upload-time = "2026-01-25T11:31:38.814Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/e3/55/c20d8319aab037e11f1d6403b6102d1041694abe24a3aa4a1e27f2cdb9f2/scikit_base-0.13.1-py3-none-any.whl", hash = "sha256:1aca86759435fd2d32d83a526ce11095119c0745e4e5dd91f2e5820023ca8e39", size = 159779, upload-time = "2026-01-25T11:31:36.759Z" },
+]
+
 [[package]]
 name = "scikit-learn"
 version = "1.7.2"
@@ -4297,6 +4628,19 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/1c/78/504fdd027da3b84ff1aecd9f6957e65f35134534ccc6da8628eb71e76d3f/send2trash-2.1.0-py3-none-any.whl", hash = "sha256:0da2f112e6d6bb22de6aa6daa7e144831a4febf2a87261451c4ad849fe9a873c", size = 17610, upload-time = "2026-01-14T06:27:35.218Z" },
 ]
 
+[[package]]
+name = "sentry-sdk"
+version = "2.54.0"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "certifi" },
+    { name = "urllib3" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/c8/e9/2e3a46c304e7fa21eaa70612f60354e32699c7102eb961f67448e222ad7c/sentry_sdk-2.54.0.tar.gz", hash = "sha256:2620c2575128d009b11b20f7feb81e4e4e8ae08ec1d36cbc845705060b45cc1b", size = 413813, upload-time = "2026-03-02T15:12:41.355Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/53/39/be412cc86bc6247b8f69e9383d7950711bd86f8d0a4a4b0fe8fad685bc21/sentry_sdk-2.54.0-py2.py3-none-any.whl", hash = "sha256:fd74e0e281dcda63afff095d23ebcd6e97006102cdc8e78a29f19ecdf796a0de", size = 439198, upload-time = "2026-03-02T15:12:39.546Z" },
+]
+
 [[package]]
 name = "setuptools"
 version = "80.9.0"
@@ -4457,6 +4801,18 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/39/60/868371b6482ccd9ef423c6f62650066cf8271fdb2ee84f192695ad6b7a96/streamlit-1.51.0-py3-none-any.whl", hash = "sha256:4008b029f71401ce54946bb09a6a3e36f4f7652cbb48db701224557738cfda38", size = 10171702, upload-time = "2025-10-29T17:07:35.97Z" },
 ]
 
+[[package]]
+name = "sympy"
+version = "1.14.0"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "mpmath" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/83/d3/803453b36afefb7c2bb238361cd4ae6125a569b4db67cd9e79846ba2d68c/sympy-1.14.0.tar.gz", hash = "sha256:d3d3fe8df1e5a0b42f0e7bdf50541697dbe7d23746e894990c030e2b05e72517", size = 7793921, upload-time = "2025-04-27T18:05:01.611Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/a2/09/77d55d46fd61b4a135c444fc97158ef34a095e5681d0a6c10b75bf356191/sympy-1.14.0-py3-none-any.whl", hash = "sha256:e091cc3e99d2141a0ba2847328f5479b05d94a6635cb96148ccb3f34671bd8f5", size = 6299353, upload-time = "2025-04-27T18:04:59.103Z" },
+]
+
 [[package]]
 name = "tables"
 version = "3.10.2"
@@ -4501,13 +4857,43 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/e5/30/643397144bfbfec6f6ef821f36f33e57d35946c44a2352d3c9f0ae847619/tenacity-9.1.2-py3-none-any.whl", hash = "sha256:f77bf36710d8b73a50b2dd155c97b870017ad21afe6ab300326b0371b3b05138", size = 28248, upload-time = "2025-04-02T08:25:07.678Z" },
 ]
 
+[[package]]
+name = "tensorboard"
+version = "2.20.0"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "absl-py" },
+    { name = "grpcio" },
+    { name = "markdown" },
+    { name = "numpy" },
+    { name = "packaging" },
+    { name = "pillow" },
+    { name = "protobuf" },
+    { name = "setuptools" },
+    { name = "tensorboard-data-server" },
+    { name = "werkzeug" },
+]
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/9c/d9/a5db55f88f258ac669a92858b70a714bbbd5acd993820b41ec4a96a4d77f/tensorboard-2.20.0-py3-none-any.whl", hash = "sha256:9dc9f978cb84c0723acf9a345d96c184f0293d18f166bb8d59ee098e6cfaaba6", size = 5525680, upload-time = "2025-07-17T19:20:49.638Z" },
+]
+
+[[package]]
+name = "tensorboard-data-server"
+version = "0.7.2"
+source = { registry = "https://pypi.org/simple" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/7a/13/e503968fefabd4c6b2650af21e110aa8466fe21432cd7c43a84577a89438/tensorboard_data_server-0.7.2-py3-none-any.whl", hash = "sha256:7e0610d205889588983836ec05dc098e80f97b7e7bbff7e994ebb78f578d0ddb", size = 2356, upload-time = "2023-10-23T21:23:32.16Z" },
+    { url = "https://files.pythonhosted.org/packages/b7/85/dabeaf902892922777492e1d253bb7e1264cadce3cea932f7ff599e53fea/tensorboard_data_server-0.7.2-py3-none-macosx_10_9_x86_64.whl", hash = "sha256:9fe5d24221b29625dbc7328b0436ca7fc1c23de4acf4d272f1180856e32f9f60", size = 4823598, upload-time = "2023-10-23T21:23:33.714Z" },
+    { url = "https://files.pythonhosted.org/packages/73/c6/825dab04195756cf8ff2e12698f22513b3db2f64925bdd41671bfb33aaa5/tensorboard_data_server-0.7.2-py3-none-manylinux_2_31_x86_64.whl", hash = "sha256:ef687163c24185ae9754ed5650eb5bc4d84ff257aabdc33f0cc6f74d8ba54530", size = 6590363, upload-time = "2023-10-23T21:23:35.583Z" },
+]
+
 [[package]]
 name = "terminado"
 version = "0.18.1"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
     { name = "ptyprocess", marker = "os_name != 'nt'" },
-    { name = "pywinpty", marker = "os_name == 'nt'" },
+    { name = "pywinpty", marker = "os_name == 'nt' and sys_platform != 'darwin' and sys_platform != 'linux'" },
     { name = "tornado" },
 ]
 sdist = { url = "https://files.pythonhosted.org/packages/8a/11/965c6fd8e5cc254f1fe142d547387da17a8ebfd75a3455f637c663fb38a0/terminado-0.18.1.tar.gz", hash = "sha256:de09f2c4b85de4765f7714688fff57d3e75bad1f909b589fde880460c753fd2e", size = 32701, upload-time = "2024-03-12T14:34:39.026Z" }
@@ -4601,6 +4987,103 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/bd/75/8539d011f6be8e29f339c42e633aae3cb73bffa95dd0f9adec09b9c58e85/tomlkit-0.13.3-py3-none-any.whl", hash = "sha256:c89c649d79ee40629a9fda55f8ace8c6a1b42deb912b2a8fd8d942ddadb606b0", size = 38901, upload-time = "2025-06-05T07:13:43.546Z" },
 ]
 
+[[package]]
+name = "torch"
+version = "2.10.0"
+source = { registry = "https://pypi.org/simple" }
+resolution-markers = [
+    "python_full_version >= '3.14' and sys_platform == 'darwin'",
+    "python_full_version < '3.14' and sys_platform == 'darwin'",
+]
+dependencies = [
+    { name = "filelock", marker = "sys_platform == 'darwin'" },
+    { name = "fsspec", marker = "sys_platform == 'darwin'" },
+    { name = "jinja2", marker = "sys_platform == 'darwin'" },
+    { name = "networkx", marker = "sys_platform == 'darwin'" },
+    { name = "setuptools", marker = "sys_platform == 'darwin'" },
+    { name = "sympy", marker = "sys_platform == 'darwin'" },
+    { name = "typing-extensions", marker = "sys_platform == 'darwin'" },
+]
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/d3/54/a2ba279afcca44bbd320d4e73675b282fcee3d81400ea1b53934efca6462/torch-2.10.0-2-cp312-none-macosx_11_0_arm64.whl", hash = "sha256:13ec4add8c3faaed8d13e0574f5cd4a323c11655546f91fbe6afa77b57423574", size = 79498202, upload-time = "2026-02-10T21:44:52.603Z" },
+    { url = "https://files.pythonhosted.org/packages/ec/23/2c9fe0c9c27f7f6cb865abcea8a4568f29f00acaeadfc6a37f6801f84cb4/torch-2.10.0-2-cp313-none-macosx_11_0_arm64.whl", hash = "sha256:e521c9f030a3774ed770a9c011751fb47c4d12029a3d6522116e48431f2ff89e", size = 79498254, upload-time = "2026-02-10T21:44:44.095Z" },
+    { url = "https://files.pythonhosted.org/packages/c9/5c/dee910b87c4d5c0fcb41b50839ae04df87c1cfc663cf1b5fca7ea565eeaa/torch-2.10.0-cp312-none-macosx_11_0_arm64.whl", hash = "sha256:6d3707a61863d1c4d6ebba7be4ca320f42b869ee657e9b2c21c736bf17000294", size = 79498198, upload-time = "2026-01-21T16:24:34.704Z" },
+    { url = "https://files.pythonhosted.org/packages/1a/0b/39929b148f4824bc3ad6f9f72a29d4ad865bcf7ebfc2fa67584773e083d2/torch-2.10.0-cp313-cp313t-macosx_14_0_arm64.whl", hash = "sha256:3202429f58309b9fa96a614885eace4b7995729f44beb54d3e4a47773649d382", size = 79851305, upload-time = "2026-01-21T16:24:09.209Z" },
+    { url = "https://files.pythonhosted.org/packages/0e/13/e76b4d9c160e89fff48bf16b449ea324bda84745d2ab30294c37c2434c0d/torch-2.10.0-cp313-none-macosx_11_0_arm64.whl", hash = "sha256:cdf2a523d699b70d613243211ecaac14fe9c5df8a0b0a9c02add60fb2a413e0f", size = 79498248, upload-time = "2026-01-21T16:23:09.315Z" },
+    { url = "https://files.pythonhosted.org/packages/4f/93/716b5ac0155f1be70ed81bacc21269c3ece8dba0c249b9994094110bfc51/torch-2.10.0-cp314-cp314-macosx_14_0_arm64.whl", hash = "sha256:bf0d9ff448b0218e0433aeb198805192346c4fd659c852370d5cc245f602a06a", size = 79464992, upload-time = "2026-01-21T16:23:05.162Z" },
+    { url = "https://files.pythonhosted.org/packages/d8/94/71994e7d0d5238393df9732fdab607e37e2b56d26a746cb59fdb415f8966/torch-2.10.0-cp314-cp314t-macosx_14_0_arm64.whl", hash = "sha256:f5ab4ba32383061be0fb74bda772d470140a12c1c3b58a0cfbf3dae94d164c28", size = 79850324, upload-time = "2026-01-21T16:22:09.494Z" },
+]
+
+[[package]]
+name = "torch"
+version = "2.10.0+cu128"
+source = { registry = "https://download.pytorch.org/whl/cu128" }
+resolution-markers = [
+    "python_full_version >= '3.14' and sys_platform == 'linux'",
+    "python_full_version < '3.14' and sys_platform == 'linux'",
+    "python_full_version >= '3.14' and sys_platform != 'darwin' and sys_platform != 'linux'",
+    "python_full_version < '3.14' and sys_platform != 'darwin' and sys_platform != 'linux'",
+]
+dependencies = [
+    { name = "cuda-bindings", marker = "sys_platform == 'linux'" },
+    { name = "filelock", marker = "sys_platform != 'darwin'" },
+    { name = "fsspec", marker = "sys_platform != 'darwin'" },
+    { name = "jinja2", marker = "sys_platform != 'darwin'" },
+    { name = "networkx", marker = "sys_platform != 'darwin'" },
+    { name = "nvidia-cublas-cu12", marker = "sys_platform == 'linux'" },
+    { name = "nvidia-cuda-cupti-cu12", marker = "sys_platform == 'linux'" },
+    { name = "nvidia-cuda-nvrtc-cu12", marker = "sys_platform == 'linux'" },
+    { name = "nvidia-cuda-runtime-cu12", marker = "sys_platform == 'linux'" },
+    { name = "nvidia-cudnn-cu12", marker = "sys_platform == 'linux'" },
+    { name = "nvidia-cufft-cu12", marker = "sys_platform == 'linux'" },
+    { name = "nvidia-cufile-cu12", marker = "sys_platform == 'linux'" },
+    { name = "nvidia-curand-cu12", marker = "sys_platform == 'linux'" },
+    { name = "nvidia-cusolver-cu12", marker = "sys_platform == 'linux'" },
+    { name = "nvidia-cusparse-cu12", marker = "sys_platform == 'linux'" },
+    { name = "nvidia-cusparselt-cu12", marker = "sys_platform == 'linux'" },
+    { name = "nvidia-nccl-cu12", marker = "sys_platform == 'linux'" },
+    { name = "nvidia-nvjitlink-cu12", marker = "sys_platform == 'linux'" },
+    { name = "nvidia-nvshmem-cu12", marker = "sys_platform == 'linux'" },
+    { name = "nvidia-nvtx-cu12", marker = "sys_platform == 'linux'" },
+    { name = "setuptools", marker = "sys_platform != 'darwin'" },
+    { name = "sympy", marker = "sys_platform != 'darwin'" },
+    { name = "triton", marker = "sys_platform == 'linux'" },
+    { name = "typing-extensions", marker = "sys_platform != 'darwin'" },
+]
+wheels = [
+    { url = "https://download.pytorch.org/whl/cu128/torch-2.10.0%2Bcu128-cp312-cp312-manylinux_2_28_aarch64.whl", hash = "sha256:6f09cdf2415516be028ae82e6b985bcfc3eac37bc52ab401142689f6224516ca" },
+    { url = "https://download.pytorch.org/whl/cu128/torch-2.10.0%2Bcu128-cp312-cp312-manylinux_2_28_x86_64.whl", hash = "sha256:628e89bd5110ced7debee2a57c69959725b7fbc64eab81a39dd70e46c7e28ba5" },
+    { url = "https://download.pytorch.org/whl/cu128/torch-2.10.0%2Bcu128-cp312-cp312-win_amd64.whl", hash = "sha256:fbde8f6a9ec8c76979a0d14df21c10b9e5cab6f0d106a73ca73e2179bc597cae" },
+    { url = "https://download.pytorch.org/whl/cu128/torch-2.10.0%2Bcu128-cp313-cp313-manylinux_2_28_aarch64.whl", hash = "sha256:bdbcc703382f948e951c063448c9406bf38ce66c41dd698d9e2733fcf96c037a" },
+    { url = "https://download.pytorch.org/whl/cu128/torch-2.10.0%2Bcu128-cp313-cp313-manylinux_2_28_x86_64.whl", hash = "sha256:7b4bd23ed63de97456fcc81c26fea9f02ee02ce1112111c4dac0d8cfe574b23e" },
+    { url = "https://download.pytorch.org/whl/cu128/torch-2.10.0%2Bcu128-cp313-cp313-win_amd64.whl", hash = "sha256:4d1b0b49c54223c7c04050b49eac141d77b6edbc34aea1dfc74a6fdb661baa8c" },
+    { url = "https://download.pytorch.org/whl/cu128/torch-2.10.0%2Bcu128-cp313-cp313t-manylinux_2_28_aarch64.whl", hash = "sha256:f1f8b840c64b645a4bc61a393db48effb9c92b2dc26c8373873911f0750d1ea7" },
+    { url = "https://download.pytorch.org/whl/cu128/torch-2.10.0%2Bcu128-cp313-cp313t-manylinux_2_28_x86_64.whl", hash = "sha256:23f58258012bcf1c349cb22af387e33aadca7f83ea617b080e774eb41e4fe8ff" },
+    { url = "https://download.pytorch.org/whl/cu128/torch-2.10.0%2Bcu128-cp313-cp313t-win_amd64.whl", hash = "sha256:01b216e097b17a5277cfb47c383cdcacf06abeadcb0daca0c76b59e72854c3b6" },
+    { url = "https://download.pytorch.org/whl/cu128/torch-2.10.0%2Bcu128-cp314-cp314-manylinux_2_28_aarch64.whl", hash = "sha256:c42377bc2607e3e1c60da71b792fb507c3938c87fd6edab8b21c59c91473c36d" },
+    { url = "https://download.pytorch.org/whl/cu128/torch-2.10.0%2Bcu128-cp314-cp314-manylinux_2_28_x86_64.whl", hash = "sha256:37d71feea068776855686a1512058df3f19f6f040a151f055aa746601678744f" },
+    { url = "https://download.pytorch.org/whl/cu128/torch-2.10.0%2Bcu128-cp314-cp314-win_amd64.whl", hash = "sha256:c57017ca29e62271e362fdeee7d20070e254755a5148b30b553d8a10fc83c7ef" },
+    { url = "https://download.pytorch.org/whl/cu128/torch-2.10.0%2Bcu128-cp314-cp314t-manylinux_2_28_aarch64.whl", hash = "sha256:777461f50b2daf77e4bdd8e2ad34bdfc5a993bf1bdf2ab9ef39f5edfe4e9c12b" },
+    { url = "https://download.pytorch.org/whl/cu128/torch-2.10.0%2Bcu128-cp314-cp314t-manylinux_2_28_x86_64.whl", hash = "sha256:7bcba6a7c5f0987a13298b1ca843155dcceceac758fa3c7ccd5c7af4059a1080" },
+    { url = "https://download.pytorch.org/whl/cu128/torch-2.10.0%2Bcu128-cp314-cp314t-win_amd64.whl", hash = "sha256:70d89143c956389d4806cb4e5fe0b1129fe0db280e1073288d17fa76c101cba4" },
+]
+
+[[package]]
+name = "torchmetrics"
+version = "1.8.2"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "lightning-utilities" },
+    { name = "numpy" },
+    { name = "packaging" },
+    { name = "torch", version = "2.10.0", source = { registry = "https://pypi.org/simple" }, marker = "sys_platform == 'darwin'" },
+    { name = "torch", version = "2.10.0+cu128", source = { registry = "https://download.pytorch.org/whl/cu128" }, marker = "sys_platform != 'darwin'" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/85/2e/48a887a59ecc4a10ce9e8b35b3e3c5cef29d902c4eac143378526e7485cb/torchmetrics-1.8.2.tar.gz", hash = "sha256:cf64a901036bf107f17a524009eea7781c9c5315d130713aeca5747a686fe7a5", size = 580679, upload-time = "2025-09-03T14:00:54.077Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/02/21/aa0f434434c48490f91b65962b1ce863fdcce63febc166ca9fe9d706c2b6/torchmetrics-1.8.2-py3-none-any.whl", hash = "sha256:08382fd96b923e39e904c4d570f3d49e2cc71ccabd2a94e0f895d1f0dac86242", size = 983161, upload-time = "2025-09-03T14:00:51.921Z" },
+]
+
 [[package]]
 name = "tornado"
 version = "6.5.2"
@@ -4653,6 +5136,23 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/61/7a/f38385f1b2d5f54221baf1db3d6371dc6eef8041d95abff39576c694e9d9/transforms3d-0.4.2-py3-none-any.whl", hash = "sha256:1c70399d9e9473ecc23311fd947f727f7c69ed0b063244828c383aa1aefa5941", size = 1376759, upload-time = "2024-06-20T11:09:19.43Z" },
 ]
 
+[[package]]
+name = "triton"
+version = "3.6.0"
+source = { registry = "https://pypi.org/simple" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/17/5d/08201db32823bdf77a0e2b9039540080b2e5c23a20706ddba942924ebcd6/triton-3.6.0-cp312-cp312-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:374f52c11a711fd062b4bfbb201fd9ac0a5febd28a96fb41b4a0f51dde3157f4", size = 176128243, upload-time = "2026-01-20T16:16:07.857Z" },
+    { url = "https://files.pythonhosted.org/packages/ab/a8/cdf8b3e4c98132f965f88c2313a4b493266832ad47fb52f23d14d4f86bb5/triton-3.6.0-cp312-cp312-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:74caf5e34b66d9f3a429af689c1c7128daba1d8208df60e81106b115c00d6fca", size = 188266850, upload-time = "2026-01-20T16:00:43.041Z" },
+    { url = "https://files.pythonhosted.org/packages/3c/12/34d71b350e89a204c2c7777a9bba0dcf2f19a5bfdd70b57c4dbc5ffd7154/triton-3.6.0-cp313-cp313-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:448e02fe6dc898e9e5aa89cf0ee5c371e99df5aa5e8ad976a80b93334f3494fd", size = 176133521, upload-time = "2026-01-20T16:16:13.321Z" },
+    { url = "https://files.pythonhosted.org/packages/f9/0b/37d991d8c130ce81a8728ae3c25b6e60935838e9be1b58791f5997b24a54/triton-3.6.0-cp313-cp313-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:10c7f76c6e72d2ef08df639e3d0d30729112f47a56b0c81672edc05ee5116ac9", size = 188289450, upload-time = "2026-01-20T16:00:49.136Z" },
+    { url = "https://files.pythonhosted.org/packages/ce/4e/41b0c8033b503fd3cfcd12392cdd256945026a91ff02452bef40ec34bee7/triton-3.6.0-cp313-cp313t-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:1722e172d34e32abc3eb7711d0025bb69d7959ebea84e3b7f7a341cd7ed694d6", size = 176276087, upload-time = "2026-01-20T16:16:18.989Z" },
+    { url = "https://files.pythonhosted.org/packages/35/f8/9c66bfc55361ec6d0e4040a0337fb5924ceb23de4648b8a81ae9d33b2b38/triton-3.6.0-cp313-cp313t-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:d002e07d7180fd65e622134fbd980c9a3d4211fb85224b56a0a0efbd422ab72f", size = 188400296, upload-time = "2026-01-20T16:00:56.042Z" },
+    { url = "https://files.pythonhosted.org/packages/49/55/5ecf0dcaa0f2fbbd4420f7ef227ee3cb172e91e5fede9d0ecaddc43363b4/triton-3.6.0-cp314-cp314-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:ef5523241e7d1abca00f1d240949eebdd7c673b005edbbce0aca95b8191f1d43", size = 176138577, upload-time = "2026-01-20T16:16:25.426Z" },
+    { url = "https://files.pythonhosted.org/packages/df/3d/9e7eee57b37c80cec63322c0231bb6da3cfe535a91d7a4d64896fcb89357/triton-3.6.0-cp314-cp314-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:a17a5d5985f0ac494ed8a8e54568f092f7057ef60e1b0fa09d3fd1512064e803", size = 188273063, upload-time = "2026-01-20T16:01:07.278Z" },
+    { url = "https://files.pythonhosted.org/packages/48/db/56ee649cab5eaff4757541325aca81f52d02d4a7cd3506776cad2451e060/triton-3.6.0-cp314-cp314t-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:0b3a97e8ed304dfa9bd23bb41ca04cdf6b2e617d5e782a8653d616037a5d537d", size = 176274804, upload-time = "2026-01-20T16:16:31.528Z" },
+    { url = "https://files.pythonhosted.org/packages/f6/56/6113c23ff46c00aae423333eb58b3e60bdfe9179d542781955a5e1514cb3/triton-3.6.0-cp314-cp314t-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:46bd1c1af4b6704e554cad2eeb3b0a6513a980d470ccfa63189737340c7746a7", size = 188397994, upload-time = "2026-01-20T16:01:14.236Z" },
+]
+
 [[package]]
 name = "tsam"
 version = "2.3.9"
@@ -4754,6 +5254,35 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/6a/2a/dc2228b2888f51192c7dc766106cd475f1b768c10caaf9727659726f7391/virtualenv-20.36.1-py3-none-any.whl", hash = "sha256:575a8d6b124ef88f6f51d56d656132389f961062a9177016a50e4f507bbcc19f", size = 6008258, upload-time = "2026-01-09T18:20:59.425Z" },
 ]
 
+[[package]]
+name = "wandb"
+version = "0.25.0"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "click" },
+    { name = "gitpython" },
+    { name = "packaging" },
+    { name = "platformdirs" },
+    { name = "protobuf" },
+    { name = "pydantic" },
+    { name = "pyyaml" },
+    { name = "requests" },
+    { name = "sentry-sdk" },
+    { name = "typing-extensions" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/fd/60/d94952549920469524b689479c864c692ca47eca4b8c2fe3389b64a58778/wandb-0.25.0.tar.gz", hash = "sha256:45840495a288e34245d69d07b5a0b449220fbc5b032e6b51c4f92ec9026d2ad1", size = 43951335, upload-time = "2026-02-13T00:17:45.515Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/c1/7d/0c131db3ec9deaabbd32263d90863cbfbe07659527e11c35a5c738cecdc5/wandb-0.25.0-py3-none-macosx_12_0_arm64.whl", hash = "sha256:5eecb3c7b5e60d1acfa4b056bfbaa0b79a482566a9db58c9f99724b3862bc8e5", size = 23287536, upload-time = "2026-02-13T00:17:20.265Z" },
+    { url = "https://files.pythonhosted.org/packages/c3/95/31bb7f76a966ec87495e5a72ac7570685be162494c41757ac871768dbc4f/wandb-0.25.0-py3-none-macosx_12_0_x86_64.whl", hash = "sha256:daeedaadb183dc466e634fba90ab2bab1d4e93000912be0dee95065a0624a3fd", size = 25196062, upload-time = "2026-02-13T00:17:23.356Z" },
+    { url = "https://files.pythonhosted.org/packages/d9/a1/258cdedbf30cebc692198a774cf0ef945b7ed98ee64bdaf62621281c95d8/wandb-0.25.0-py3-none-manylinux_2_28_aarch64.whl", hash = "sha256:5e0127dbcef13eea48f4b84268da7004d34d3120ebc7b2fa9cefb72b49dbb825", size = 22799744, upload-time = "2026-02-13T00:17:26.437Z" },
+    { url = "https://files.pythonhosted.org/packages/de/91/ec9465d014cfd199c5b2083d271d31b3c2aedeae66f3d8a0712f7f54bdf3/wandb-0.25.0-py3-none-manylinux_2_28_x86_64.whl", hash = "sha256:6c4c38077836f9b7569a35b0e1dcf1f0c43616fcd936d182f475edbfea063665", size = 25262839, upload-time = "2026-02-13T00:17:28.8Z" },
+    { url = "https://files.pythonhosted.org/packages/c7/95/cb2d1c7143f534544147fb53fe87944508b8cb9a058bc5b6f8a94adbee15/wandb-0.25.0-py3-none-musllinux_1_2_aarch64.whl", hash = "sha256:6edd8948d305cb73745bf564b807bd73da2ccbd47c548196b8a362f7df40aed8", size = 22853714, upload-time = "2026-02-13T00:17:31.68Z" },
+    { url = "https://files.pythonhosted.org/packages/d7/94/68163f70c1669edcf130822aaaea782d8198b5df74443eca0085ec596774/wandb-0.25.0-py3-none-musllinux_1_2_x86_64.whl", hash = "sha256:ada6f08629bb014ad6e0a19d5dec478cdaa116431baa3f0a4bf4ab8d9893611f", size = 25358037, upload-time = "2026-02-13T00:17:34.676Z" },
+    { url = "https://files.pythonhosted.org/packages/cc/fb/9578eed2c01b2fc6c8b693da110aa9c73a33d7bb556480f5cfc42e48c94e/wandb-0.25.0-py3-none-win32.whl", hash = "sha256:020b42ca4d76e347709d65f59b30d4623a115edc28f462af1c92681cb17eae7c", size = 24604118, upload-time = "2026-02-13T00:17:37.641Z" },
+    { url = "https://files.pythonhosted.org/packages/25/97/460f6cb738aaa39b4eb2e6b4c630b2ae4321cdd70a79d5955ea75a878981/wandb-0.25.0-py3-none-win_amd64.whl", hash = "sha256:78307ac0b328f2dc334c8607bec772851215584b62c439eb320c4af4fb077a00", size = 24604122, upload-time = "2026-02-13T00:17:39.991Z" },
+    { url = "https://files.pythonhosted.org/packages/27/6c/5847b4dda1dfd52630dac08711d4348c69ed657f0698fc2d949c7f7a6622/wandb-0.25.0-py3-none-win_arm64.whl", hash = "sha256:c6174401fd6fb726295e98d57b4231c100eca96bd17de51bfc64038a57230aaf", size = 21785298, upload-time = "2026-02-13T00:17:42.475Z" },
+]
+
 [[package]]
 name = "watchdog"
 version = "6.0.0"
@@ -4805,6 +5334,18 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/34/db/b10e48aa8fff7407e67470363eac595018441cf32d5e1001567a7aeba5d2/websocket_client-1.9.0-py3-none-any.whl", hash = "sha256:af248a825037ef591efbf6ed20cc5faa03d3b47b9e5a2230a529eeee1c1fc3ef", size = 82616, upload-time = "2025-10-07T21:16:34.951Z" },
 ]
 
+[[package]]
+name = "werkzeug"
+version = "3.1.6"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "markupsafe" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/61/f1/ee81806690a87dab5f5653c1f146c92bc066d7f4cebc603ef88eb9e13957/werkzeug-3.1.6.tar.gz", hash = "sha256:210c6bede5a420a913956b4791a7f4d6843a43b6fcee4dfa08a65e93007d0d25", size = 864736, upload-time = "2026-02-19T15:17:18.884Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/4d/ec/d58832f89ede95652fd01f4f24236af7d32b70cab2196dfcc2d2fd13c5c2/werkzeug-3.1.6-py3-none-any.whl", hash = "sha256:7ddf3357bb9564e407607f988f683d72038551200c704012bb9a4c523d42f131", size = 225166, upload-time = "2026-02-19T15:17:17.475Z" },
+]
+
 [[package]]
 name = "widgetsnbextension"
 version = "4.0.15"

From 4f9223a4ad2a58e9bd7ca6651146b994a3bce87d Mon Sep 17 00:00:00 2001
From: Sam Wolk <36545842+szvsw@users.noreply.github.com>
Date: Sun, 8 Mar 2026 12:54:35 -0400
Subject: [PATCH 07/27] enable xgboost training

---
 src/globi/models/surrogate/dummy.py    |  24 +-
 src/globi/models/surrogate/training.py | 468 ++++++++++++++++---------
 src/globi/pipelines/training.py        |  26 +-
 src/globi/worker/main.py               |   3 +-
 4 files changed, 344 insertions(+), 177 deletions(-)

diff --git a/src/globi/models/surrogate/dummy.py b/src/globi/models/surrogate/dummy.py
index 1ebcf24..ee17a81 100644
--- a/src/globi/models/surrogate/dummy.py
+++ b/src/globi/models/surrogate/dummy.py
@@ -32,10 +32,26 @@ def dummy_simulation(
 ) -> DummySimulationOutput:
     """A dummy simulation."""
     df = pd.DataFrame({
-        "target_0": [input_spec.a + input_spec.b],
-        "target_1": [input_spec.a - input_spec.b],
-        "target_2": [input_spec.a * input_spec.b * input_spec.c],
-        "target_3": [input_spec.a / math.sin(input_spec.b)],
+        "target_0": [
+            (input_spec.a + input_spec.b)
+            if input_spec.weather_file == "some"
+            else (input_spec.a - input_spec.b)
+        ],
+        "target_1": [
+            (input_spec.a - input_spec.b)
+            if input_spec.weather_file == "some"
+            else (input_spec.a + input_spec.b)
+        ],
+        "target_2": [
+            (input_spec.a * input_spec.b * input_spec.c)
+            if input_spec.weather_file == "some"
+            else (input_spec.a * input_spec.b / input_spec.c)
+        ],
+        "target_3": [
+            (input_spec.a / math.sin(input_spec.b))
+            if input_spec.weather_file == "some"
+            else (input_spec.a / math.cos(input_spec.b))
+        ],
     })
     df_neg = -df
     df = pd.concat([df, df_neg], axis=1, keys=["positive", "negative"], names=["sign"])
diff --git a/src/globi/models/surrogate/training.py b/src/globi/models/surrogate/training.py
index 7d42e06..29b57cc 100644
--- a/src/globi/models/surrogate/training.py
+++ b/src/globi/models/surrogate/training.py
@@ -1,9 +1,10 @@
 """Models used for the surrogate training pipeline."""
 
 import warnings
+from collections.abc import Callable
 from functools import cached_property
 from pathlib import Path
-from typing import TYPE_CHECKING, Literal, cast
+from typing import TYPE_CHECKING, Any, Literal, cast
 
 import numpy as np
 import pandas as pd
@@ -18,6 +19,7 @@
     S3ClientType = object
 
 
+# TODO: allow specific configuration per column.
 class ConvergenceThresholds(BaseModel):
     """The thresholds for convergence."""
 
@@ -80,19 +82,70 @@ def check_convergence(self, metrics: pd.Series):
         )
 
 
-class XGBHyperparameters(BaseModel):
-    """The parameters for the xgboost model."""
+class XGBTrainerConfig(BaseModel):
+    """The trainer hyperparameters for the xgboost model."""
+
+    num_boost_round: int = Field(
+        default=4000, description="The number of boosting rounds."
+    )
+    early_stopping_rounds: int = Field(
+        default=10, description="The number of boosting rounds to early stop."
+    )
+    verbose_eval: bool = Field(
+        default=True, description="Whether to print verbose evaluation results."
+    )
+
+
+class XGBModelConfig(BaseModel):
+    """The model hyperparameters for the xgboost model."""
 
     max_depth: int = Field(default=5, description="The maximum depth of the tree.")
     eta: float = Field(default=0.1, description="The learning rate.")
-    min_child_weight: int = Field(default=3, description="The minimum child weight.")
-    subsample: float = Field(default=0.8, description="The subsample rate.")
-    colsample_bytree: float = Field(
-        default=0.8, description="The column sample by tree rate."
+    min_child_weight: int | None = Field(
+        default=3, description="The minimum child weight."
+    )
+    subsample: float | None = Field(default=None, description="The subsample rate.")
+    colsample_bytree: float | None = Field(
+        default=None, description="The column sample by tree rate."
+    )
+    alpha: float | None = Field(default=None, description="The alpha parameter.")
+    lam: float | None = Field(default=None, description="The lambda parameter.")
+    gamma: float | None = Field(default=None, description="The gamma parameter.")
+    seed: int = Field(
+        default=42, description="The seed for the random number generator."
+    )
+
+    @property
+    def param_dict(self) -> dict[str, Any]:
+        """The dictionary of parameters."""
+        import torch
+
+        data = {
+            "objective": "reg:squarederror",
+            "eval_metric": "rmse",
+            "tree_method": "auto",
+            "seed": self.seed,
+            # hyperparameters
+            **self.model_dump(
+                exclude_none=True,
+            ),
+        }
+        if torch.cuda.is_available():
+            data["device"] = "cuda"
+        return data
+
+
+class XGBHyperparameters(BaseModel):
+    """The parameters for the xgboost model."""
+
+    hp: XGBModelConfig = Field(
+        default_factory=XGBModelConfig,
+        description="The hyperparameters for the model.",
+    )
+    trainer: XGBTrainerConfig = Field(
+        default_factory=XGBTrainerConfig,
+        description="The trainer hyperparameters for the model.",
     )
-    alpha: float = Field(default=0.01, description="The alpha parameter.")
-    lam: float = Field(default=0.01, description="The lambda parameter.")
-    gamma: float = Field(default=0.01, description="The gamma parameter.")
 
 
 class LGBHyperparameters(BaseModel):
@@ -232,10 +285,10 @@ class ProgressiveTrainingSpec(ExperimentInputSpec):
         default_factory=RegressionIOConfigSpec,
         description="The regression io config spec.",
     )
-    # model_hyperparameters: ModelHPType = Field(
-    #     default_factory=LGBHyperparameters,
-    #     description="The hyperparameters for the model.",
-    # )
+    hyperparameters: ModelHPType = Field(
+        default_factory=XGBHyperparameters,
+        description="The hyperparameters for the model.",
+    )
     stratification: StratificationSpec = Field(
         default_factory=StratificationSpec,
         description="The stratification spec.",
@@ -558,7 +611,12 @@ def dparams(self) -> pd.DataFrame:
         return self.data.index.to_frame()
 
     @cached_property
-    def all_columns(self) -> frozenset[str]:
+    def all_feature_columns(self) -> frozenset[str]:
+        """The names of all columns."""
+        return frozenset(self.dparams.columns)
+
+    @cached_property
+    def all_target_columns(self) -> frozenset[str]:
         """The names of all columns."""
         return frozenset(self.data.columns)
 
@@ -566,12 +624,14 @@ def all_columns(self) -> frozenset[str]:
     def continuous_columns(self) -> frozenset[str]:
         """The continuous columns."""
         feature_conf = self.parent.regression_io_config.features
-        candidates = self.all_columns - feature_conf.exclude_columns - EXCLUDED_COLUMNS
+        candidates = (
+            self.all_feature_columns - feature_conf.exclude_columns - EXCLUDED_COLUMNS
+        )
         object_dype_columns = (
-            self.data[candidates].select_dtypes(include=["object"]).columns.tolist()
+            self.dparams[candidates].select_dtypes(include=["object"]).columns.tolist()
         )
         candidates = candidates - frozenset(object_dype_columns)
-        nunique_counts = cast(pd.Series, self.data[candidates].nunique())
+        nunique_counts = cast(pd.Series, self.dparams[candidates].nunique())
         thresh = feature_conf.cont_cat_unicity_transition_threshold
         passing_candidates = cast(
             list[str],
@@ -604,12 +664,14 @@ def continuous_columns(self) -> frozenset[str]:
     def categorical_columns(self) -> frozenset[str]:
         """The categorical columns."""
         feature_conf = self.parent.regression_io_config.features
-        candidates = self.all_columns - feature_conf.exclude_columns - EXCLUDED_COLUMNS
+        candidates = (
+            self.all_feature_columns - feature_conf.exclude_columns - EXCLUDED_COLUMNS
+        )
         object_dtype_columns = (
-            self.data[candidates].select_dtypes(include=["object"]).columns.tolist()
+            self.dparams[candidates].select_dtypes(include=["object"]).columns.tolist()
         )
         non_obj_dtype_columns = candidates - frozenset(object_dtype_columns)
-        nunique_counts = cast(pd.Series, self.data[non_obj_dtype_columns].nunique())
+        nunique_counts = cast(pd.Series, self.dparams[non_obj_dtype_columns].nunique())
         thresh = feature_conf.cont_cat_unicity_transition_threshold
         passing_non_obj_dtype_candidates = cast(
             list[str],
@@ -723,9 +785,8 @@ def test_segment(self) -> tuple[pd.DataFrame, pd.DataFrame]:
     @cached_property
     def targets(self) -> list[str]:
         """The list of regression targets."""
-        return (
-            self.parent.regression_io_config.targets.columns
-            or self.data.columns.tolist()
+        return self.parent.regression_io_config.targets.columns or sorted(
+            self.all_target_columns
         )
 
     @cached_property
@@ -738,6 +799,125 @@ def target_range(self) -> list[tuple[float, float]]:
             for col in self.targets
         ]
 
+    def train(self, tempdir: Path):
+        """Train the model."""
+        if isinstance(self.parent.hyperparameters, XGBHyperparameters):
+            # TOOO: Consider adding an interface/protocol/base class so signatures can be consistent.
+            return self.train_xgboost(tempdir)
+        else:
+            raise NotImplementedError(
+                f"Unsupported hyperparameters type: {type(self.parent.hyperparameters)}"
+            )
+
+    def train_xgboost(self, tempdir: Path):
+        """Train an xgboost model."""
+        import xgboost as xgb
+
+        hp = (
+            self.parent.hyperparameters
+            if isinstance(self.parent.hyperparameters, XGBHyperparameters)
+            else XGBHyperparameters()
+        )
+
+        x_train, y_train = self.train_segment
+        x_test, y_test = self.test_segment
+
+        # select the features
+        x_train_selected, x_test_selected = (
+            x_train.loc[:, self.continuous_columns | self.categorical_columns],
+            x_test.loc[:, self.continuous_columns | self.categorical_columns],
+        )
+        cats = {
+            col: self.dparams[col].unique().tolist() for col in self.categorical_columns
+        }
+        x_train_encoded = self.index_encode_categorical_columns(x_train_selected, cats)
+        x_test_encoded = self.index_encode_categorical_columns(x_test_selected, cats)
+
+        # select the targets
+        y_train, y_test = y_train.loc[:, self.targets], y_test.loc[:, self.targets]
+
+        train_dmat = xgb.DMatrix(
+            x_train_encoded.reset_index(drop=True), label=y_train.reset_index(drop=True)
+        )
+        test_dmat = xgb.DMatrix(
+            x_test_encoded.reset_index(drop=True), label=y_test.reset_index(drop=True)
+        )
+
+        evals = [(train_dmat, "train"), (test_dmat, "eval")]
+        model = xgb.train(
+            hp.hp.param_dict,
+            train_dmat,
+            num_boost_round=hp.trainer.num_boost_round,
+            evals=evals,
+            early_stopping_rounds=hp.trainer.early_stopping_rounds,
+            verbose_eval=hp.trainer.verbose_eval,
+        )
+
+        def predict(x: pd.DataFrame) -> pd.DataFrame:
+            """Predict the targets for the given features."""
+            x_selected = cast(
+                pd.DataFrame,
+                x.loc[:, self.continuous_columns | self.categorical_columns],
+            )
+            x_encoded = self.index_encode_categorical_columns(x_selected, cats)
+            preds = model.predict(
+                xgb.DMatrix(
+                    x_encoded.reset_index(drop=True),
+                )
+            )
+            return pd.DataFrame(
+                preds, columns=pd.Index(self.targets), index=pd.MultiIndex.from_frame(x)
+            )
+
+        evaluation = self.evaluate(predict, x_train, x_test, y_train, y_test)
+        model_path = tempdir / "model.ubj"
+        model.save_model(model_path.as_posix())
+        return model, evaluation, model_path
+
+    def evaluate(
+        self,
+        fn: Callable[[pd.DataFrame], pd.DataFrame],
+        x_train: pd.DataFrame,
+        x_test: pd.DataFrame,
+        y_train: pd.DataFrame,
+        y_test: pd.DataFrame,
+    ) -> tuple[pd.DataFrame, pd.DataFrame]:
+        """Evaluate a model on the train and test segments."""
+        y_train_preds = fn(x_train)
+        y_test_preds = fn(x_test)
+
+        # compute the metrics
+        global_train_metrics, stratum_train_metrics = self.compute_metrics(
+            y_train_preds, y_train
+        )
+        global_test_metrics, stratum_test_metrics = self.compute_metrics(
+            y_test_preds, y_test
+        )
+
+        global_metrics = pd.concat(
+            [global_train_metrics, global_test_metrics],
+            axis=1,
+            keys=["train", "test"],
+            names=["split_segment"],
+        )
+        stratum_metrics = pd.concat(
+            [stratum_train_metrics, stratum_test_metrics],
+            axis=1,
+            keys=["train", "test"],
+            names=["split_segment"],
+        )
+        return global_metrics, stratum_metrics
+
+    def index_encode_categorical_columns(
+        self, df: pd.DataFrame, cats: dict[str, list[str]]
+    ) -> pd.DataFrame:
+        """Index encode the categorical columns."""
+        df = df.copy(deep=True)
+        for col in df.columns:
+            if df[col].dtype == "object":
+                df[col] = pd.Categorical(df[col], categories=cats[col]).codes
+        return df
+
     def train_pytorch_tabular(self, tempdir: Path):
         """Train a pytorch tabular model."""
         from pytorch_tabular import TabularModel
@@ -762,8 +942,8 @@ def train_pytorch_tabular(self, tempdir: Path):
         optimizer_config = OptimizerConfig(  # TODO: make this all configurable
             optimizer="AdamW",
             optimizer_params={"weight_decay": 1e-5},
-            lr_scheduler="CosineAnnealingLR",
-            lr_scheduler_params={"T_max": n_epochs, "eta_min": 1e-5},
+            # lr_scheduler="CosineAnnealingLR",
+            # lr_scheduler_params={"T_max": n_epochs, "eta_min": 1e-5},
         )
         trainer_config = TrainerConfig(
             batch_size=256,
@@ -771,6 +951,10 @@ def train_pytorch_tabular(self, tempdir: Path):
             max_epochs=n_epochs,
             min_epochs=max(n_epochs // 20, 1),
             early_stopping=None,
+            # early_stopping= "valid_loss",
+            # early_stopping_min_delta=0.001,
+            # early_stopping_mode="min",
+            # early_stopping_patience=3,
             # gradient_clip_val=1.0,
             # auto_lr_find=False
             # max_time=60,
@@ -780,16 +964,16 @@ def train_pytorch_tabular(self, tempdir: Path):
             task="regression",
             head="LinearHead",
             head_config=LinearHeadConfig(
-                # layers="",
+                layers="256-128-64",
                 activation="SiLU",
-                use_batch_norm=False,
+                use_batch_norm=True,
                 # dropout=0,
             ).__dict__,
             target_range=self.target_range,
             embedding_dims=None,
-            embedding_dropout=0.1,
+            embedding_dropout=0.05,
             batch_norm_continuous_input=True,
-            gflu_stages=6,
+            gflu_stages=24,
             gflu_dropout=0.0,
             gflu_feature_init_sparsity=0.3,
             learnable_sparsity=True,
@@ -938,97 +1122,100 @@ def train_pytorch_tabular(self, tempdir: Path):
     #         "stratum_metrics": stratum_metrics,
     #     }
 
-    # def compute_frame_metrics(
-    #     self, preds: pd.DataFrame, targets: pd.DataFrame
-    # ) -> pd.DataFrame:
-    #     """Compute the metrics."""
-    #     from sklearn.metrics import (
-    #         mean_absolute_error,
-    #         mean_absolute_percentage_error,
-    #         mean_squared_error,
-    #         r2_score,
-    #     )
+    def compute_frame_metrics(
+        self, preds: pd.DataFrame, targets: pd.DataFrame
+    ) -> pd.DataFrame:
+        """Compute the metrics."""
+        from sklearn.metrics import (
+            mean_absolute_error,
+            mean_absolute_percentage_error,
+            mean_squared_error,
+            r2_score,
+        )
 
-    #     mae = mean_absolute_error(targets, preds, multioutput="raw_values")
-    #     mse = mean_squared_error(targets, preds, multioutput="raw_values")
-    #     rmse = np.sqrt(mse)
-    #     r2 = r2_score(targets, preds, multioutput="raw_values")
-    #     cvrmse = rmse / (targets.mean(axis=0) + 1e-5)
-    #     mape = mean_absolute_percentage_error(
-    #         targets + 1e-5,
-    #         preds,
-    #         multioutput="raw_values",
-    #     )
+        mae = mean_absolute_error(targets, preds, multioutput="raw_values")
+        mse = mean_squared_error(targets, preds, multioutput="raw_values")
+        rmse = np.sqrt(mse)
+        r2 = r2_score(targets, preds, multioutput="raw_values")
+        cvrmse = rmse / np.abs(targets.mean(axis=0) + 1e-5)
+        mape = mean_absolute_percentage_error(
+            targets + 1e-5,
+            preds,
+            multioutput="raw_values",
+        )
 
-    #     metrics = pd.DataFrame(
-    #         {
-    #             "mae": mae,
-    #             "rmse": rmse,
-    #             "r2": r2,
-    #             "cvrmse": cvrmse,
-    #             "mape": mape,
-    #         },
-    #     )
-    #     metrics.columns.names = ["metric"]
-    #     metrics.index.names = ["measurement", "target"]
-    #     return metrics
-
-    # def compute_metrics(self, preds: pd.DataFrame, targets: pd.DataFrame):
-    #     """Compute the metrics."""
-    #     global_metrics = self.compute_frame_metrics(preds, targets)
-    #     stratum_metric_dfs = {}
-    #     for stratum_name in self.stratum_names:
-    #         stratum_targets = cast(
-    #             pd.DataFrame, targets.xs(stratum_name, level=self.stratification_field)
-    #         )
-    #         stratum_preds = cast(
-    #             pd.DataFrame, preds.xs(stratum_name, level=self.stratification_field)
-    #         )
-    #         metrics = self.compute_frame_metrics(stratum_preds, stratum_targets)
-    #         stratum_metric_dfs[stratum_name] = metrics
+        metrics = pd.DataFrame(
+            {
+                "mae": mae,
+                "rmse": rmse,
+                "r2": r2,
+                "cvrmse": cvrmse,
+                "mape": mape,
+            },
+        )
+        metrics.columns.names = ["metric"]
+        metrics.index.names = ["target"]
 
-    #     stratum_metrics = pd.concat(
-    #         stratum_metric_dfs,
-    #         axis=1,
-    #         keys=self.stratum_names,
-    #         names=["stratum"],
-    #     )
-    #     global_metrics = (
-    #         global_metrics.set_index(
-    #             pd.Index(
-    #                 [self.sort_index] * len(global_metrics),
-    #                 name="sort_index",
-    #             ),
-    #             append=True,
-    #         )
-    #         .set_index(
-    #             pd.Index(
-    #                 [self.progressive_training_iter_ix] * len(global_metrics),
-    #                 name="progressive_training_iter_ix",
-    #             ),
-    #             append=True,
-    #         )
-    #         .unstack(level="target")
-    #     )
+        return metrics
 
-    #     stratum_metrics = (
-    #         stratum_metrics.set_index(
-    #             pd.Index(
-    #                 [self.sort_index] * len(stratum_metrics),
-    #                 name="sort_index",
-    #             ),
-    #             append=True,
-    #         )
-    #         .set_index(
-    #             pd.Index(
-    #                 [self.progressive_training_iter_ix] * len(stratum_metrics),
-    #                 name="progressive_training_iter_ix",
-    #             ),
-    #             append=True,
-    #         )
-    #         .unstack(level="target")
-    #     )
-    #     return global_metrics, stratum_metrics
+    def compute_metrics(self, preds: pd.DataFrame, targets: pd.DataFrame):
+        """Compute the metrics."""
+        global_metrics = self.compute_frame_metrics(preds, targets)
+        stratum_metric_dfs = {}
+        for stratum_name in self.stratum_names:
+            stratum_targets = cast(
+                pd.DataFrame,
+                targets.xs(stratum_name, level=self.parent.stratification.field),
+            )
+            stratum_preds = cast(
+                pd.DataFrame,
+                preds.xs(stratum_name, level=self.parent.stratification.field),
+            )
+            metrics = self.compute_frame_metrics(stratum_preds, stratum_targets)
+            stratum_metric_dfs[stratum_name] = metrics
+
+        stratum_metrics = pd.concat(
+            stratum_metric_dfs,
+            axis=1,
+            keys=self.stratum_names,
+            names=["stratum"],
+        )
+        global_metrics = (
+            global_metrics.set_index(
+                pd.Index(
+                    [self.sort_index] * len(global_metrics),
+                    name="sort_index",
+                ),
+                append=True,
+            )
+            .set_index(
+                pd.Index(
+                    [self.parent.iteration.current_iter] * len(global_metrics),
+                    name="iteration",
+                ),
+                append=True,
+            )
+            .unstack(level="target")
+        )
+
+        stratum_metrics = (
+            stratum_metrics.set_index(
+                pd.Index(
+                    [self.sort_index] * len(stratum_metrics),
+                    name="sort_index",
+                ),
+                append=True,
+            )
+            .set_index(
+                pd.Index(
+                    [self.parent.iteration.current_iter] * len(stratum_metrics),
+                    name="iteration",
+                ),
+                append=True,
+            )
+            .unstack(level="target")
+        )
+        return global_metrics, stratum_metrics
 
     # def train_lightgbm(
     #     self,
@@ -1100,53 +1287,6 @@ def train_pytorch_tabular(self, tempdir: Path):
     #     """Format the model key."""
     #     return f"hatchet/{self.model_dir_key}/{model_name}"
 
-    # def train_xgboost(
-    #     self,
-    #     train_params: pd.DataFrame,
-    #     train_targets: pd.DataFrame,
-    #     test_params: pd.DataFrame,
-    #     test_targets: pd.DataFrame,
-    # ):
-    #     """Train the xgboost model."""
-    #     import xgboost as xgb
-
-    #     hparams = {
-    #         "objective": "reg:squarederror",
-    #         "eval_metric": "rmse",
-    #         "max_depth": 5,  # 7
-    #         "eta": 0.1,
-    #         "min_child_weight": 3,
-    #         "subsample": 0.8,
-    #         "colsample_bytree": 0.8,
-    #         # "alpha": 0.01,
-    #         # "lambda": 0.01,
-    #         # "gamma": 0.01,
-    #     }
-
-    #     train_dmatrix = xgb.DMatrix(train_params, label=train_targets)
-    #     test_dmatrix = xgb.DMatrix(test_params, label=test_targets)
-
-    #     model = xgb.train(
-    #         hparams,
-    #         train_dmatrix,
-    #         num_boost_round=2000,
-    #         early_stopping_rounds=20,
-    #         verbose_eval=True,
-    #         evals=[(test_dmatrix, "test")],
-    #     )
-
-    #     # compute the metrics
-    #     train_preds = model.predict(train_dmatrix)
-    #     test_preds = model.predict(test_dmatrix)
-    #     train_preds = pd.DataFrame(
-    #         train_preds, index=train_targets.index, columns=train_targets.columns
-    #     )
-    #     test_preds = pd.DataFrame(
-    #         test_preds, index=test_targets.index, columns=test_targets.columns
-    #     )
-
-    #     return train_preds, test_preds
-
 
 class TrainWithCVSpec(StageSpec):
     """Train an SBEM model using a scatter gather approach for cross-fold validation."""
diff --git a/src/globi/pipelines/training.py b/src/globi/pipelines/training.py
index f12589c..59e7f91 100644
--- a/src/globi/pipelines/training.py
+++ b/src/globi/pipelines/training.py
@@ -16,7 +16,7 @@
 from scythe.hatchet import hatchet
 from scythe.registry import ExperimentRegistry
 from scythe.scatter_gather import RecursionMap, ScatterGatherResult, scatter_gather
-from scythe.utils.filesys import S3Url
+from scythe.utils.filesys import FileReference, S3Url
 
 from globi.models.surrogate.dummy import DummySimulationInput, dummy_simulation
 from globi.models.surrogate.training import (
@@ -30,7 +30,7 @@
 class FoldResult(ExperimentOutputSpec):
     """The output for a fold."""
 
-    columns: list[str]
+    regressor: FileReference
 
 
 class CombineResultsResult(BaseModel):
@@ -69,14 +69,22 @@ class RecursionTransition(BaseModel):
 
 @ExperimentRegistry.Register(
     description="Train a regressor with cross-fold validation.",
+    schedule_timeout=timedelta(hours=5),
+    execution_timeout=timedelta(hours=1),
 )
 def train_regressor_with_cv_fold(
     input_spec: TrainFoldSpec, tempdir: Path
 ) -> FoldResult:
     """Train a regressor with cross-fold validation."""
     # DO TRAINING
-    _model, _trainer = input_spec.train_pytorch_tabular(tempdir)
-    return FoldResult(columns=input_spec.data.columns.tolist())
+    _model, (global_results, stratum_results), model_path = input_spec.train(tempdir)
+    return FoldResult(
+        regressor=model_path,
+        dataframes={
+            "global": global_results,
+            "stratums": stratum_results,
+        },
+    )
 
 
 iterative_training = hatchet.workflow(
@@ -99,13 +107,13 @@ def create_simulations(
     specs = [
         DummySimulationInput(
             weather_file="some" if random.random() < 0.5 else "other",  # noqa: S311
-            a=i,
-            b=-i,
+            a=random.randint(-10, 10),  # noqa: S311
+            b=random.randint(-10, 10),  # noqa: S311
             c=random.randint(-10, 10),  # noqa: S311
             experiment_id="placeholder",
             sort_index=i,
         )
-        for i in range(1000)
+        for i in range(1_000)
     ]
 
     # STEP 2: Simulate the simulations using scythe
@@ -326,6 +334,8 @@ def transition_recursion(
     )
 
 
+# TODO: Final training stage? or should we save models along the way.
+
 if __name__ == "__main__":
     from scythe.settings import ScytheStorageSettings
 
@@ -345,7 +355,7 @@ def transition_recursion(
             aliases=["feature.weather.file"],
         ),
         iteration=IterationSpec(
-            max_iters=4,
+            max_iters=10,
         ),
         storage_settings=ScytheStorageSettings(),
         data_uris=None,
diff --git a/src/globi/worker/main.py b/src/globi/worker/main.py
index ee7eb38..95fe32d 100644
--- a/src/globi/worker/main.py
+++ b/src/globi/worker/main.py
@@ -25,7 +25,8 @@ def main():
     )
     for workflow in workflows:
         worker.register_workflow(workflow)
-    worker.register_workflow(iterative_training)
+    if conf.DOES_FAN:
+        worker.register_workflow(iterative_training)
     worker.start()
 
     # conf.start()

From 8963690874edc34547b55d5241904f00a587a72e Mon Sep 17 00:00:00 2001
From: Sam Wolk <36545842+szvsw@users.noreply.github.com>
Date: Sun, 8 Mar 2026 14:29:19 -0400
Subject: [PATCH 08/27] enable recursive convergence checks

---
 src/globi/models/surrogate/training.py | 108 ++++++++++++-------------
 src/globi/pipelines/training.py        |  39 +++++++--
 2 files changed, 85 insertions(+), 62 deletions(-)

diff --git a/src/globi/models/surrogate/training.py b/src/globi/models/surrogate/training.py
index 29b57cc..3959b4a 100644
--- a/src/globi/models/surrogate/training.py
+++ b/src/globi/models/surrogate/training.py
@@ -1,5 +1,7 @@
 """Models used for the surrogate training pipeline."""
 
+import fnmatch
+import re
 import warnings
 from collections.abc import Callable
 from functools import cached_property
@@ -23,25 +25,30 @@
 class ConvergenceThresholds(BaseModel):
     """The thresholds for convergence."""
 
-    mae: float = Field(default=0.5, description="The maximum MAE for convergence.")
-    rmse: float = Field(default=0.5, description="The maximum RMSE for convergence.")
-    mape: float = Field(default=0.15, description="The maximum MAPE for convergence.")
-    r2: float = Field(default=0.95, description="The minimum R2 for convergence.")
+    # TODO: instead of using a risky hardcoded "n/a" token, make nullability have better support.
+    mae: float = Field(default=-9e9, description="The maximum MAE for convergence.")
+    rmse: float = Field(default=-9e9, description="The maximum RMSE for convergence.")
+    mape: float = Field(default=-9e9, description="The maximum MAPE for convergence.")
+    r2: float = Field(default=9e9, description="The minimum R2 for convergence.")
     cvrmse: float = Field(
-        default=0.05, description="The maximum CV_RMSE for convergence."
+        default=-9e9, description="The maximum CV_RMSE for convergence."
     )
 
-    @property
-    def thresholds(self) -> pd.Series:
-        """The thresholds for convergence."""
-        return pd.Series(self.model_dump(), name="metric")
-
-    def check_convergence(self, metrics: pd.Series):
+    def check_convergence(self, metrics: pd.Series, target: re.Pattern | None = None):
         """Check if the metrics have converged.
 
         Note that this requires the metrics data frame to have the following shape:
 
         """
+        # first, we select the data for the relevant targets:
+        if target is not None:
+            target_level = metrics.index.get_level_values("target")
+            # Interpret target as a regex and match
+            mask = cast(pd.Series, target_level.to_series().astype(str)).str.match(
+                target
+            )
+            metrics = cast(pd.Series, metrics.loc[mask.values])
+
         thresholds = pd.Series(self.model_dump(), name="metric")
 
         # first, we will select the appropriate threshold for each metric
@@ -60,15 +67,41 @@ def check_convergence(self, metrics: pd.Series):
         # run the comparisons
         comparison = metrics < comparators
 
+        return comparison
+
+
+class ConvergenceThresholdsByTarget(BaseModel):
+    """The thresholds for convergence by target."""
+
+    thresholds: dict[str, ConvergenceThresholds] = Field(
+        default_factory=lambda: {"*": ConvergenceThresholds()},
+        description="The thresholds for convergence by target.",
+    )
+
+    def make_comparisons(self, metrics: pd.Series) -> list[pd.Series]:
+        """Generate a list of all stratum/target/metric True/False comparisons."""
+        return [
+            self.thresholds[target].check_convergence(
+                metrics, re.compile(fnmatch.translate(target))
+            )
+            for target in self.thresholds
+        ]
+
+    def combine_and_check_strata_and_targets(self, comparisons: list[pd.Series]):
+        """Combine the comparisons and aggregate first by targets then by strata."""
+        comparison = pd.concat(comparisons, axis=0)
         # now we will groupby the stratum (e.g. features.weather.file)
         # and by the target (e.g. Electricity, Gas, etc.)
         # we are converged if any of the metrics have converged for that target
         # in that stratum
         comparison_stratum_and_target = comparison.groupby(
             level=[lev for lev in comparison.index.names if lev != "metric"]
-        ).any()
+        ).any()  # TODO: make it configurable such that instead of `any`, we can specify a count, i.e. at least 2 must be converged
 
         # then we will check that all targets have converged for each stratum
+
+        # only levels left in multiindex should be stratum and target
+
         comparison_strata = comparison_stratum_and_target.groupby(level="stratum").all()
 
         # finally, we will check that all strata have converged
@@ -81,6 +114,11 @@ def check_convergence(self, metrics: pd.Series):
             comparison,
         )
 
+    def run(self, metrics: pd.Series) -> tuple[bool, pd.Series, pd.Series, pd.Series]:
+        """Run the convergence criteria."""
+        comparisons = self.make_comparisons(metrics)
+        return self.combine_and_check_strata_and_targets(comparisons)
+
 
 class XGBTrainerConfig(BaseModel):
     """The trainer hyperparameters for the xgboost model."""
@@ -277,8 +315,8 @@ class ProgressiveTrainingSpec(ExperimentInputSpec):
         ...,
         description="The base run name for the experiment.",
     )
-    convergence_criteria: ConvergenceThresholds = Field(
-        default_factory=ConvergenceThresholds,
+    convergence_criteria: ConvergenceThresholdsByTarget = Field(
+        default_factory=ConvergenceThresholdsByTarget,
         description="The convergence criteria.",
     )
     regression_io_config: RegressionIOConfigSpec = Field(
@@ -1313,45 +1351,3 @@ def schedule(self) -> list[TrainFoldSpec]:
                 )
             )
         return schedule
-
-    # def check_convergence(self, uri: URIResponse, s3_client: S3ClientType):
-    #     """Check the convergence of the training."""
-    #     with tempfile.TemporaryDirectory() as tempdir:
-    #         tempdir = Path(tempdir)
-    #         results_path = tempdir / "results.hdf"
-    #         # download the results from s3
-    #         fetch_uri(uri.uri, local_path=results_path, use_cache=False, s3=s3_client)
-    #         results = cast(
-    #             pd.DataFrame, pd.read_hdf(results_path, key="stratum_metrics")
-    #         )
-
-    #     fold_averages = cast(
-    #         pd.Series,
-    #         results.xs(
-    #             "test",
-    #             level="split_segment",
-    #             axis=1,
-    #         )
-    #         .groupby(level="measurement")
-    #         .mean()
-    #         .unstack(level="measurement"),
-    #     )
-    #     with tempfile.TemporaryDirectory() as tempdir:
-    #         fold_averages_path = Path(tempdir) / "fold-averaged-errors.pq"
-    #         fold_averages.to_frame(
-    #             name=self.progressive_training_iteration_ix
-    #         ).to_parquet(fold_averages_path)
-    #         key = f"hatchet/{self.experiment_key}/fold-averaged-errors.pq"
-    #         bucket = self.progressive_training_spec.bucket
-    #         s3_client.upload_file(fold_averages_path.as_posix(), bucket, key)
-
-    #     (
-    #         convergence_all,
-    #         convergence_monitor_segment,
-    #         convergence_monitor_segment_and_target,
-    #         convergence,
-    #     ) = self.progressive_training_spec.convergence_criteria.check_convergence(
-    #         fold_averages.xs("Energy", level="measurement")
-    #     )
-
-    #     return convergence_all, convergence
diff --git a/src/globi/pipelines/training.py b/src/globi/pipelines/training.py
index 59e7f91..b0673f1 100644
--- a/src/globi/pipelines/training.py
+++ b/src/globi/pipelines/training.py
@@ -3,7 +3,7 @@
 import random
 from datetime import timedelta
 from pathlib import Path
-from typing import Literal
+from typing import Literal, cast
 
 import pandas as pd
 from hatchet_sdk import Context
@@ -82,7 +82,7 @@ def train_regressor_with_cv_fold(
         regressor=model_path,
         dataframes={
             "global": global_results,
-            "stratums": stratum_results,
+            "strata": stratum_results,
         },
     )
 
@@ -120,7 +120,7 @@ def create_simulations(
     run_name = f"{spec.experiment_id}/sample"
 
     exp = BaseExperiment(
-        # TODO: replace with simulate_globi_flat_building
+        # TODO: replace with simulate_globi_flat_building, or better yet, allow loading from the registry via config.
         experiment=dummy_simulation,  # TODO: add configurability to switch between simulations.
         run_name=run_name,
         storage_settings=spec.storage_settings or ScytheStorageSettings(),
@@ -128,7 +128,7 @@ def create_simulations(
 
     run, ref = exp.allocate(
         specs,
-        version="bumpmajor",  # TODO: bump minor if not the first iteration; actually, not necessary since root experiment takes care of this
+        version="bumpmajor",
         recursion_map=spec.iteration.recursion,
     )
 
@@ -289,8 +289,28 @@ def evaluate_training(
     spec: ProgressiveTrainingSpec, context: Context
 ) -> TrainingEvaluationResult:
     """Evaluate the training."""
-    _results = context.task_output(await_training)
-    return TrainingEvaluationResult(converged=False)
+    results_output = context.task_output(await_training)
+    strata = results_output.uris["strata"]
+    _globals = results_output.uris["global"]
+    results = pd.read_parquet(str(strata))
+
+    fold_averages = cast(
+        pd.Series,
+        results.xs("test", level="split_segment", axis=1)
+        .groupby(level="iteration")
+        .mean()
+        .unstack(),
+    )
+    # TODO: fold_averages and strata and globals should be saved to s3
+
+    (
+        convergence_all,
+        _convergence_monitor_segment,
+        _convergence_monitor_segment_and_target,
+        _convergence,
+    ) = spec.convergence_criteria.run(fold_averages)
+
+    return TrainingEvaluationResult(converged=convergence_all)
 
 
 @iterative_training.task(
@@ -340,6 +360,8 @@ def transition_recursion(
     from scythe.settings import ScytheStorageSettings
 
     from globi.models.surrogate.training import (
+        ConvergenceThresholds,
+        ConvergenceThresholdsByTarget,
         ProgressiveTrainingSpec,
         StratificationSpec,
     )
@@ -357,6 +379,11 @@ def transition_recursion(
         iteration=IterationSpec(
             max_iters=10,
         ),
+        convergence_criteria=ConvergenceThresholdsByTarget(
+            thresholds={
+                "*": ConvergenceThresholds(r2=0.975),
+            },
+        ),
         storage_settings=ScytheStorageSettings(),
         data_uris=None,
         base_run_name=base_run_name,

From 7db748f052e61392b292eafbcdbdcbaf4912f561 Mon Sep 17 00:00:00 2001
From: Sam Wolk <36545842+szvsw@users.noreply.github.com>
Date: Sun, 8 Mar 2026 17:04:18 -0400
Subject: [PATCH 09/27] reorganize some files

---
 src/globi/models/surrogate/outputs.py  | 44 ++++++++++++++++++++
 src/globi/models/surrogate/training.py |  8 +++-
 src/globi/pipelines/training.py        | 56 +++++---------------------
 3 files changed, 62 insertions(+), 46 deletions(-)
 create mode 100644 src/globi/models/surrogate/outputs.py

diff --git a/src/globi/models/surrogate/outputs.py b/src/globi/models/surrogate/outputs.py
new file mode 100644
index 0000000..0bdb9b6
--- /dev/null
+++ b/src/globi/models/surrogate/outputs.py
@@ -0,0 +1,44 @@
+"""Outputs for the surrogate model pipeline."""
+
+from typing import Literal
+
+from pydantic import BaseModel
+from scythe.experiments import ExperimentRun
+from scythe.scatter_gather import ScatterGatherResult
+
+from globi.models.surrogate.training import TrainWithCVSpec
+
+
+class CombineResultsResult(BaseModel):
+    """The result of combining the results of the simulations."""
+
+    incoming: ScatterGatherResult
+    combined: ScatterGatherResult
+
+
+# TODO: This should perhaps go somewhere else since it is generally useful.
+class ExperimentRunWithRef(BaseModel):
+    """An experiment run with a workflow run id."""
+
+    run: ExperimentRun
+    workflow_run_id: str
+
+
+class StartTrainingResult(BaseModel):
+    """The result of starting the training."""
+
+    training_spec: TrainWithCVSpec
+    experiment_run_with_ref: ExperimentRunWithRef
+
+
+class TrainingEvaluationResult(BaseModel):
+    """The result of evaluating the training."""
+
+    converged: bool
+
+
+class RecursionTransition(BaseModel):
+    """The transition of the recursion."""
+
+    reasoning: Literal["max_depth", "converged"] | None
+    child_workflow_run_id: str | None
diff --git a/src/globi/models/surrogate/training.py b/src/globi/models/surrogate/training.py
index 3959b4a..fa6b5b1 100644
--- a/src/globi/models/surrogate/training.py
+++ b/src/globi/models/surrogate/training.py
@@ -11,7 +11,7 @@
 import numpy as np
 import pandas as pd
 from pydantic import BaseModel, Field
-from scythe.base import ExperimentInputSpec
+from scythe.base import ExperimentInputSpec, ExperimentOutputSpec
 from scythe.scatter_gather import RecursionMap, ScatterGatherResult
 from scythe.utils.filesys import FileReference, S3Url
 
@@ -1326,6 +1326,12 @@ def compute_metrics(self, preds: pd.DataFrame, targets: pd.DataFrame):
     #     return f"hatchet/{self.model_dir_key}/{model_name}"
 
 
+class FoldResult(ExperimentOutputSpec):
+    """The output for a fold."""
+
+    regressor: FileReference
+
+
 class TrainWithCVSpec(StageSpec):
     """Train an SBEM model using a scatter gather approach for cross-fold validation."""
 
diff --git a/src/globi/pipelines/training.py b/src/globi/pipelines/training.py
index b0673f1..5d4b891 100644
--- a/src/globi/pipelines/training.py
+++ b/src/globi/pipelines/training.py
@@ -3,23 +3,29 @@
 import random
 from datetime import timedelta
 from pathlib import Path
-from typing import Literal, cast
+from typing import cast
 
 import pandas as pd
 from hatchet_sdk import Context
-from pydantic import BaseModel, HttpUrl
-from scythe.base import ExperimentOutputSpec
+from pydantic import HttpUrl
 from scythe.experiments import (
     BaseExperiment,
-    ExperimentRun,
 )
 from scythe.hatchet import hatchet
 from scythe.registry import ExperimentRegistry
 from scythe.scatter_gather import RecursionMap, ScatterGatherResult, scatter_gather
-from scythe.utils.filesys import FileReference, S3Url
+from scythe.utils.filesys import S3Url
 
 from globi.models.surrogate.dummy import DummySimulationInput, dummy_simulation
+from globi.models.surrogate.outputs import (
+    CombineResultsResult,
+    ExperimentRunWithRef,
+    RecursionTransition,
+    StartTrainingResult,
+    TrainingEvaluationResult,
+)
 from globi.models.surrogate.training import (
+    FoldResult,
     IterationSpec,
     ProgressiveTrainingSpec,
     TrainFoldSpec,
@@ -27,46 +33,6 @@
 )
 
 
-class FoldResult(ExperimentOutputSpec):
-    """The output for a fold."""
-
-    regressor: FileReference
-
-
-class CombineResultsResult(BaseModel):
-    """The result of combining the results of the simulations."""
-
-    incoming: ScatterGatherResult
-    combined: ScatterGatherResult
-
-
-class ExperimentRunWithRef(BaseModel):
-    """An experiment run with a workflow run id."""
-
-    run: ExperimentRun
-    workflow_run_id: str
-
-
-class StartTrainingResult(BaseModel):
-    """The result of starting the training."""
-
-    training_spec: TrainWithCVSpec
-    experiment_run_with_ref: ExperimentRunWithRef
-
-
-class TrainingEvaluationResult(BaseModel):
-    """The result of evaluating the training."""
-
-    converged: bool
-
-
-class RecursionTransition(BaseModel):
-    """The transition of the recursion."""
-
-    reasoning: Literal["max_depth", "converged"] | None
-    child_workflow_run_id: str | None
-
-
 @ExperimentRegistry.Register(
     description="Train a regressor with cross-fold validation.",
     schedule_timeout=timedelta(hours=5),

From bb657b3d92398f7ed0f2b8edc81e9bbaf95ebab7 Mon Sep 17 00:00:00 2001
From: Sam Wolk <36545842+szvsw@users.noreply.github.com>
Date: Sun, 8 Mar 2026 17:26:47 -0400
Subject: [PATCH 10/27] reorganize modules

---
 .../models/surrogate/configs/__init__.py      |   1 +
 .../models/surrogate/configs/pipeline.py      | 292 +++++++++
 .../models/surrogate/configs/regression.py    |  86 +++
 src/globi/models/surrogate/sampling.py        | 196 ++++++
 src/globi/models/surrogate/training.py        | 568 +-----------------
 src/globi/pipelines/training.py               |  10 +-
 6 files changed, 586 insertions(+), 567 deletions(-)
 create mode 100644 src/globi/models/surrogate/configs/__init__.py
 create mode 100644 src/globi/models/surrogate/configs/pipeline.py
 create mode 100644 src/globi/models/surrogate/configs/regression.py
 create mode 100644 src/globi/models/surrogate/sampling.py

diff --git a/src/globi/models/surrogate/configs/__init__.py b/src/globi/models/surrogate/configs/__init__.py
new file mode 100644
index 0000000..2b2e032
--- /dev/null
+++ b/src/globi/models/surrogate/configs/__init__.py
@@ -0,0 +1 @@
+"""Configs for the surrogate model pipeline."""
diff --git a/src/globi/models/surrogate/configs/pipeline.py b/src/globi/models/surrogate/configs/pipeline.py
new file mode 100644
index 0000000..a088a79
--- /dev/null
+++ b/src/globi/models/surrogate/configs/pipeline.py
@@ -0,0 +1,292 @@
+"""Configs for the surrogate model pipeline."""
+
+import fnmatch
+import re
+from functools import cached_property
+from pathlib import Path
+from typing import Literal, cast
+
+import numpy as np
+import pandas as pd
+from pydantic import BaseModel, Field
+from scythe.base import ExperimentInputSpec
+from scythe.scatter_gather import RecursionMap, ScatterGatherResult
+from scythe.utils.filesys import FileReference, S3Url
+
+from globi.models.surrogate.configs.regression import ModelHPType, XGBHyperparameters
+
+
+class IterationSpec(BaseModel):
+    """The iteration spec."""
+
+    n_init: int = Field(default=10000, description="The number of initial samples.")
+    min_per_stratum: int = Field(
+        default=100, description="The minimum number of samples per stratum."
+    )
+    n_per_iter: int = Field(
+        default=10000,
+        description="The number of samples to add per each iteration of the outer loop.",
+    )
+    max_iters: int = Field(
+        default=100,
+        description="The maximum number of outer loop iterations to perform.",
+    )
+    recursion: RecursionMap = Field(
+        default_factory=lambda: RecursionMap(factor=100, max_depth=1),
+        description="The recursion spec.",
+    )
+    current_iter: int = Field(
+        default=0,
+        description="The index of the current training iteration within the outer loop.",
+    )
+
+    @property
+    def at_max_iters(self) -> bool:
+        """Whether the current iteration is the maximum number of iterations."""
+        return self.current_iter + 1 >= self.max_iters
+
+
+class StratificationSpec(BaseModel):
+    """A spec for stratifying the data."""
+
+    field: str = Field(
+        default="feature.weather.file", description="The field to stratify by."
+    )
+    sampling: Literal["equal", "error-weighted", "proportional"] = Field(
+        default="equal",
+        description="The sampling method to use over the strata.",
+    )
+    aliases: list[str] = Field(
+        default_factory=lambda: ["epwzip_path", "epw_path"],
+        description="The alias to use for the stratum as a fallback.",
+    )
+
+    # TODO: consider allowing the stratification to be a compound with e.g. component_map_uri and semantic_fields_uri and database_uri
+
+
+class CrossValidationSpec(BaseModel):
+    """The cross validation spec."""
+
+    n_folds: int = Field(
+        default=5, description="The number of folds for the entire parent task."
+    )
+
+
+class ConvergenceThresholds(BaseModel):
+    """The thresholds for convergence."""
+
+    # TODO: instead of using a risky hardcoded "n/a" token, make nullability have better support.
+    mae: float = Field(default=-9e9, description="The maximum MAE for convergence.")
+    rmse: float = Field(default=-9e9, description="The maximum RMSE for convergence.")
+    mape: float = Field(default=-9e9, description="The maximum MAPE for convergence.")
+    r2: float = Field(default=9e9, description="The minimum R2 for convergence.")
+    cvrmse: float = Field(
+        default=-9e9, description="The maximum CV_RMSE for convergence."
+    )
+
+    def check_convergence(self, metrics: pd.Series, target: re.Pattern | None = None):
+        """Check if the metrics have converged.
+
+        Note that this requires the metrics data frame to have the following shape:
+
+        """
+        # first, we select the data for the relevant targets:
+        if target is not None:
+            target_level = metrics.index.get_level_values("target")
+            # Interpret target as a regex and match
+            mask = cast(pd.Series, target_level.to_series().astype(str)).str.match(
+                target
+            )
+            metrics = cast(pd.Series, metrics.loc[mask.values])
+
+        thresholds = pd.Series(self.model_dump(), name="metric")
+
+        # first, we will select the appropriate threshold for each metric
+        comparators = thresholds.loc[metrics.index.get_level_values("metric")]
+        # we can then copy over the index safely
+        comparators.index = metrics.index
+
+        # next, we will flip the sign of the r2 metric since it is a maximization metric rather thin min
+        metrics = metrics * np.where(
+            metrics.index.get_level_values("metric") == "r2", -1, 1
+        )
+        comparators = comparators * np.where(
+            comparators.index.get_level_values("metric") == "r2", -1, 1
+        )
+
+        # run the comparisons
+        comparison = metrics < comparators
+
+        return comparison
+
+
+class ConvergenceThresholdsByTarget(BaseModel):
+    """The thresholds for convergence by target."""
+
+    thresholds: dict[str, ConvergenceThresholds] = Field(
+        default_factory=lambda: {"*": ConvergenceThresholds()},
+        description="The thresholds for convergence by target.",
+    )
+
+    def make_comparisons(self, metrics: pd.Series) -> list[pd.Series]:
+        """Generate a list of all stratum/target/metric True/False comparisons."""
+        return [
+            self.thresholds[target].check_convergence(
+                metrics, re.compile(fnmatch.translate(target))
+            )
+            for target in self.thresholds
+        ]
+
+    def combine_and_check_strata_and_targets(self, comparisons: list[pd.Series]):
+        """Combine the comparisons and aggregate first by targets then by strata."""
+        comparison = pd.concat(comparisons, axis=0)
+        # now we will groupby the stratum (e.g. features.weather.file)
+        # and by the target (e.g. Electricity, Gas, etc.)
+        # we are converged if any of the metrics have converged for that target
+        # in that stratum
+        comparison_stratum_and_target = comparison.groupby(
+            level=[lev for lev in comparison.index.names if lev != "metric"]
+        ).any()  # TODO: make it configurable such that instead of `any`, we can specify a count, i.e. at least 2 must be converged
+
+        # then we will check that all targets have converged for each stratum
+
+        # only levels left in multiindex should be stratum and target
+
+        comparison_strata = comparison_stratum_and_target.groupby(level="stratum").all()
+
+        # finally, we will check that all strata have converged
+        comparison_all = comparison_strata.all()
+
+        return (
+            comparison_all,
+            comparison_strata,
+            comparison_stratum_and_target,
+            comparison,
+        )
+
+    def run(self, metrics: pd.Series) -> tuple[bool, pd.Series, pd.Series, pd.Series]:
+        """Run the convergence criteria."""
+        comparisons = self.make_comparisons(metrics)
+        return self.combine_and_check_strata_and_targets(comparisons)
+
+
+class TargetsConfigSpec(BaseModel):
+    """The targets config spec."""
+
+    columns: list[str] = Field(
+        default_factory=list, description="The columns to use as targets."
+    )
+    normalization: Literal["min-max", "standard", "none"] = Field(
+        default="none", description="The normalization method to use."
+    )
+
+
+class FeatureConfigSpec(BaseModel):
+    """The feature config spec."""
+
+    continuous_columns: frozenset[str] = Field(
+        default=frozenset(), description="The continuous columns to use as features."
+    )
+    categorical_columns: frozenset[str] = Field(
+        default=frozenset(), description="The categorical columns to use as features."
+    )
+    exclude_columns: frozenset[str] = Field(
+        default=frozenset(),
+        description="The columns to exclude from the features.",
+    )
+    cont_cat_unicity_transition_threshold: int = Field(
+        default=10,
+        description="The threshold for the number of unique values to transition from continuous to categorical variable.",
+    )
+
+
+class RegressionIOConfigSpec(BaseModel):
+    """The input/output spec for a regression model."""
+
+    targets: TargetsConfigSpec = Field(
+        default_factory=TargetsConfigSpec, description="The targets config spec."
+    )
+    features: FeatureConfigSpec = Field(
+        default_factory=FeatureConfigSpec,
+        description="The features config spec.",
+    )
+
+
+class ProgressiveTrainingSpec(ExperimentInputSpec):
+    """A spec for iteratively training an SBEM regression model."""
+
+    base_run_name: str = Field(
+        ...,
+        description="The base run name for the experiment.",
+    )
+    convergence_criteria: ConvergenceThresholdsByTarget = Field(
+        default_factory=ConvergenceThresholdsByTarget,
+        description="The convergence criteria.",
+    )
+    regression_io_config: RegressionIOConfigSpec = Field(
+        default_factory=RegressionIOConfigSpec,
+        description="The regression io config spec.",
+    )
+    hyperparameters: ModelHPType = Field(
+        default_factory=XGBHyperparameters,
+        description="The hyperparameters for the model.",
+    )
+    stratification: StratificationSpec = Field(
+        default_factory=StratificationSpec,
+        description="The stratification spec.",
+    )
+    cross_val: CrossValidationSpec = Field(
+        default_factory=CrossValidationSpec,
+        description="The cross validation spec.",
+    )
+    iteration: IterationSpec = Field(
+        default_factory=IterationSpec,
+        description="The iteration spec.",
+    )
+    gis_uri: FileReference = Field(
+        ...,
+        description="The uri of the gis data to train on.",
+    )
+    data_uris: ScatterGatherResult | None = Field(
+        default=None,
+        description="The uri of the previous simulation results to train on.",
+    )
+
+    def format_combined_output_key(self, key: str) -> str:
+        """Format the output key for a combined result file."""
+        return f"{self.prefix}/combined/{key}.parquet"
+
+    def format_combined_output_uri(self, key: str) -> S3Url:
+        """Format the output uri for a combined result file."""
+        if self.storage_settings is None:
+            msg = "Storage settings are not set, so we can't construct a combined output uri."
+            raise ValueError(msg)
+        return S3Url(
+            f"s3://{self.storage_settings.BUCKET}/{self.format_combined_output_key(key)}"
+        )
+
+    @property
+    def gis_path(self) -> Path:
+        """The path to the gis data."""
+        if isinstance(self.gis_uri, Path):
+            return self.gis_uri
+        return self.fetch_uri(self.gis_uri)
+
+    @cached_property
+    def gis_data(self) -> pd.DataFrame:
+        """Load the gis data."""
+        return pd.read_parquet(self.gis_path)
+
+
+class StageSpec(BaseModel):
+    """A spec that is common to both the sample and train stages (and possibly others)."""
+
+    parent: ProgressiveTrainingSpec = Field(
+        ...,
+        description="The parent spec.",
+    )
+
+    @cached_property
+    def random_generator(self) -> np.random.Generator:
+        """The random generator."""
+        return np.random.default_rng(self.parent.iteration.current_iter)
diff --git a/src/globi/models/surrogate/configs/regression.py b/src/globi/models/surrogate/configs/regression.py
new file mode 100644
index 0000000..b65c64a
--- /dev/null
+++ b/src/globi/models/surrogate/configs/regression.py
@@ -0,0 +1,86 @@
+"""Configs for the surrogate model pipeline."""
+
+from typing import Any, Literal
+
+from pydantic import BaseModel, Field
+
+
+class XGBTrainerConfig(BaseModel):
+    """The trainer hyperparameters for the xgboost model."""
+
+    num_boost_round: int = Field(
+        default=4000, description="The number of boosting rounds."
+    )
+    early_stopping_rounds: int = Field(
+        default=10, description="The number of boosting rounds to early stop."
+    )
+    verbose_eval: bool = Field(
+        default=True, description="Whether to print verbose evaluation results."
+    )
+
+
+class XGBModelConfig(BaseModel):
+    """The model hyperparameters for the xgboost model."""
+
+    max_depth: int = Field(default=5, description="The maximum depth of the tree.")
+    eta: float = Field(default=0.1, description="The learning rate.")
+    min_child_weight: int | None = Field(
+        default=3, description="The minimum child weight."
+    )
+    subsample: float | None = Field(default=None, description="The subsample rate.")
+    colsample_bytree: float | None = Field(
+        default=None, description="The column sample by tree rate."
+    )
+    alpha: float | None = Field(default=None, description="The alpha parameter.")
+    lam: float | None = Field(default=None, description="The lambda parameter.")
+    gamma: float | None = Field(default=None, description="The gamma parameter.")
+    seed: int = Field(
+        default=42, description="The seed for the random number generator."
+    )
+
+    @property
+    def param_dict(self) -> dict[str, Any]:
+        """The dictionary of parameters."""
+        import torch
+
+        data = {
+            "objective": "reg:squarederror",
+            "eval_metric": "rmse",
+            "tree_method": "auto",
+            "seed": self.seed,
+            # hyperparameters
+            **self.model_dump(
+                exclude_none=True,
+            ),
+        }
+        if torch.cuda.is_available():
+            data["device"] = "cuda"
+        return data
+
+
+class XGBHyperparameters(BaseModel):
+    """The parameters for the xgboost model."""
+
+    hp: XGBModelConfig = Field(
+        default_factory=XGBModelConfig,
+        description="The hyperparameters for the model.",
+    )
+    trainer: XGBTrainerConfig = Field(
+        default_factory=XGBTrainerConfig,
+        description="The trainer hyperparameters for the model.",
+    )
+
+
+class LGBHyperparameters(BaseModel):
+    """The parameters for the lightgbm model."""
+
+    objective: Literal["regression", "binary", "multiclass"] = Field(
+        default="regression", description="The objective function to use."
+    )
+    metric: Literal["rmse"] = Field(
+        default="rmse", description="The metric to optimize."
+    )
+    # TODO: add other parameters as needed
+
+
+ModelHPType = XGBHyperparameters | LGBHyperparameters
diff --git a/src/globi/models/surrogate/sampling.py b/src/globi/models/surrogate/sampling.py
new file mode 100644
index 0000000..b4bb1d8
--- /dev/null
+++ b/src/globi/models/surrogate/sampling.py
@@ -0,0 +1,196 @@
+"""Models used for the training set sampling pipeline."""
+
+from typing import cast
+
+import pandas as pd
+
+from globi.models.surrogate.configs.pipeline import StageSpec
+
+
+class SampleSpec(StageSpec):
+    """A spec for the sampling stage of the progressive training."""
+
+    # TODO: add the ability to receive the last set of error metrics and use them to inform the sampling
+
+    def stratified_selection(self) -> pd.DataFrame:
+        """Sample the gis data."""
+        df = self.parent.gis_data
+
+        stratification_field = self.parent.stratification.field
+        stratification_aliases = self.parent.stratification.aliases
+
+        if stratification_field not in df.columns and not any(
+            alias in df.columns for alias in stratification_aliases
+        ):
+            msg = f"Stratification field {stratification_field} not found in gis data.  Please check the field name and/or the aliases."
+            raise ValueError(msg)
+
+        if stratification_field not in df.columns:
+            stratification_field = next(
+                alias for alias in stratification_aliases if alias in df.columns
+            )
+
+        strata = cast(list[str], df[stratification_field].unique().tolist())
+
+        if self.parent.stratification.sampling == "equal":
+            return self.sample_equally_by_stratum(df, strata, stratification_field)
+        elif self.parent.stratification.sampling == "error-weighted":
+            msg = "Error-weighted sampling is not yet implemented."
+            raise NotImplementedError(msg)
+        elif self.parent.stratification.sampling == "proportional":
+            msg = "Proportional sampling is not yet implemented."
+            raise NotImplementedError(msg)
+        else:
+            msg = f"Invalid sampling method: {self.parent.stratification.sampling}"
+            raise ValueError(msg)
+
+    def sample_equally_by_stratum(
+        self, df: pd.DataFrame, strata: list[str], stratification_field: str
+    ) -> pd.DataFrame:
+        """Sample equally by stratum.
+
+        This will break the dataframe up into n strata and ensure that each strata ends up with the same number of samples.
+
+        Args:
+            df (pd.DataFrame): The dataframe to sample from.
+            strata (list[str]): The unique values of the strata.
+            stratification_field (str): The field to stratify the data by.
+
+        Returns:
+            samples (pd.DataFrame): The sampled dataframe.
+        """
+        stratum_dfs = {
+            stratum: df[df[stratification_field] == stratum] for stratum in strata
+        }
+        n_per_iter = (
+            self.parent.iteration.n_per_iter
+            if self.parent.iteration.current_iter != 0
+            else self.parent.iteration.n_init
+        )
+        n_per_stratum = max(
+            n_per_iter // len(strata),
+            (
+                self.parent.iteration.min_per_stratum
+                if self.parent.iteration.current_iter == 0
+                else 0
+            ),
+        )
+
+        # TODO: consider how we want to handle potentially having the same geometry appear in both
+        # the training and testing sets.
+        # if any(len(stratum_df) < n_per_stratum for stratum_df in stratum_dfs.values()):
+        #     msg = "There are not enough buildings in some strata to sample the desired number of buildings per stratum."
+        #     # connsider making this a warning?
+        #     raise ValueError(msg)
+
+        sampled_strata = {
+            stratum: stratum_df.sample(
+                n=n_per_stratum, random_state=self.random_generator, replace=True
+            )
+            for stratum, stratum_df in stratum_dfs.items()
+        }
+        return cast(pd.DataFrame, pd.concat(sampled_strata.values()))
+
+    # def sample_semantic_fields(self, df: pd.DataFrame) -> pd.DataFrame:
+    #     """Sample the semantic fields."""
+    #     # TODO: consider randomizing the locations?
+    #     semantic_fields = self.progressive_training_spec.semantic_fields_data
+    #     for field in semantic_fields.Fields:
+    #         if isinstance(field, CategoricalFieldSpec):
+    #             options = field.Options
+    #             df[field.Name] = self.random_generator.choice(options, size=len(df))
+    #         elif isinstance(field, NumericFieldSpec):
+    #             df[field.Name] = self.random_generator.uniform(
+    #                 field.Min, field.Max, size=len(df)
+    #             )
+    #         else:
+    #             msg = f"Invalid field type: {type(field)}"
+    #             raise TypeError(msg)
+    #     return df
+
+    # def sample_basements_and_attics(self, df: pd.DataFrame) -> pd.DataFrame:
+    #     """Add basement/attics to models."""
+    #     # get the options for the type literal
+    #     options: list[BasementAtticOccupationConditioningStatus] = [
+    #         "none",
+    #         "occupied_unconditioned",
+    #         "unoccupied_unconditioned",
+    #         "occupied_conditioned",
+    #         "unoccupied_conditioned",
+    #     ]
+    #     weights = [0.5, *([0.5 / 4] * 4)]
+    #     # sample the type literal
+    #     df["basement"] = self.random_generator.choice(options, size=len(df), p=weights)
+    #     df["attic"] = self.random_generator.choice(options, size=len(df), p=weights)
+    #     df["exposed_basement_frac"] = self.random_generator.uniform(
+    #         0.1, 0.5, size=len(df)
+    #     )
+    #     return df
+
+    # def sample_wwrs(self, df: pd.DataFrame) -> pd.DataFrame:
+    #     """Sample the wwrs."""
+    #     wwr_min = 0.05
+    #     wwr_max = 0.35
+    #     df["wwr"] = self.random_generator.uniform(wwr_min, wwr_max, size=len(df))
+    #     return df
+
+    # def sample_f2f_heights(self, df: pd.DataFrame) -> pd.DataFrame:
+    #     """Sample the f2f heights."""
+    #     f2f_min = 2.3
+    #     f2f_max = 4.3
+    #     df["f2f_height"] = self.random_generator.uniform(f2f_min, f2f_max, size=len(df))
+    #     return df
+
+    def to_sim_specs(self, df: pd.DataFrame):
+        """Convert the sampled dataframe to a list of simulation specs.
+
+        For now, we are assuming that all the other necessary fields are present and we are just
+        ensuring that sort_index and experiment_id are set appropriately.
+        """
+        # df["semantic_field_context"] = df.apply(
+        #     lambda row: {
+        #         field.Name: row[field.Name]
+        #         for field in self.progressive_training_spec.semantic_fields_data.Fields
+        #     },
+        #     axis=1,
+        # )
+        # df["sort_index"] = np.arange(len(df))
+        # df["experiment_id"] = self.experiment_key
+        # # TODO: consider allowing the component map/semantic_fields/database to be inherited from the row
+        # # e.g. to allow multiple component maps and dbs per run.
+        # df["component_map_uri"] = str(self.progressive_training_spec.component_map_uri)
+        # df["semantic_fields_uri"] = str(
+        #     self.progressive_training_spec.semantic_fields_uri
+        # )
+        # df["db_uri"] = str(self.progressive_training_spec.database_uri)
+        return df
+
+    # def make_payload(self, s3_client: S3ClientType):
+    #     """Make the payload for the scatter gather task, including generating the simulation specs and serializing them to s3."""
+    #     df = self.stratified_selection()
+    #     # df = self.sample_semantic_fields(df)
+    #     # df = self.sample_basements_and_attics(df)
+    #     # df = self.sample_wwrs(df)
+    #     # df = self.sample_f2f_heights(df)
+    #     df = self.to_sim_specs(df)
+    #     # serialize to a parquet file and upload to s3
+    #     bucket = self.progressive_training_spec.storage_settings.BUCKET
+    #     with tempfile.TemporaryDirectory() as tmpdir:
+    #         tmpdir = Path(tmpdir)
+    #         fpath = tmpdir / "specs.pq"
+    #         df.to_parquet(fpath)
+    #         key = f"hatchet/{self.experiment_key}/specs.pq"
+    #         specs_uri = f"s3://{bucket}/{key}"
+    #         s3_client.upload_file(fpath.as_posix(), bucket, key)
+
+    #     payload = {
+    #         "specs": specs_uri,
+    #         "bucket": bucket,
+    #         "workflow_name": "simulate_sbem_shoebox",
+    #         "experiment_id": self.experiment_key,
+    #         "recursion_map": {
+    #             "factor": self.progressive_training_spec.iteration.recursion_factor,
+    #             "max_depth": self.progressive_training_spec.iteration.recursion_max_depth,
+    #         },
+    #     }
+    #     return payload
diff --git a/src/globi/models/surrogate/training.py b/src/globi/models/surrogate/training.py
index fa6b5b1..b3fa03e 100644
--- a/src/globi/models/surrogate/training.py
+++ b/src/globi/models/surrogate/training.py
@@ -1,292 +1,27 @@
 """Models used for the surrogate training pipeline."""
 
-import fnmatch
-import re
 import warnings
 from collections.abc import Callable
 from functools import cached_property
 from pathlib import Path
-from typing import TYPE_CHECKING, Any, Literal, cast
+from typing import TYPE_CHECKING, cast
 
 import numpy as np
 import pandas as pd
-from pydantic import BaseModel, Field
+from pydantic import Field
 from scythe.base import ExperimentInputSpec, ExperimentOutputSpec
-from scythe.scatter_gather import RecursionMap, ScatterGatherResult
+from scythe.scatter_gather import ScatterGatherResult
 from scythe.utils.filesys import FileReference, S3Url
 
+from globi.models.surrogate.configs.pipeline import ProgressiveTrainingSpec, StageSpec
+from globi.models.surrogate.configs.regression import XGBHyperparameters
+
 if TYPE_CHECKING:
     from mypy_boto3_s3.client import S3Client as S3ClientType
 else:
     S3ClientType = object
 
 
-# TODO: allow specific configuration per column.
-class ConvergenceThresholds(BaseModel):
-    """The thresholds for convergence."""
-
-    # TODO: instead of using a risky hardcoded "n/a" token, make nullability have better support.
-    mae: float = Field(default=-9e9, description="The maximum MAE for convergence.")
-    rmse: float = Field(default=-9e9, description="The maximum RMSE for convergence.")
-    mape: float = Field(default=-9e9, description="The maximum MAPE for convergence.")
-    r2: float = Field(default=9e9, description="The minimum R2 for convergence.")
-    cvrmse: float = Field(
-        default=-9e9, description="The maximum CV_RMSE for convergence."
-    )
-
-    def check_convergence(self, metrics: pd.Series, target: re.Pattern | None = None):
-        """Check if the metrics have converged.
-
-        Note that this requires the metrics data frame to have the following shape:
-
-        """
-        # first, we select the data for the relevant targets:
-        if target is not None:
-            target_level = metrics.index.get_level_values("target")
-            # Interpret target as a regex and match
-            mask = cast(pd.Series, target_level.to_series().astype(str)).str.match(
-                target
-            )
-            metrics = cast(pd.Series, metrics.loc[mask.values])
-
-        thresholds = pd.Series(self.model_dump(), name="metric")
-
-        # first, we will select the appropriate threshold for each metric
-        comparators = thresholds.loc[metrics.index.get_level_values("metric")]
-        # we can then copy over the index safely
-        comparators.index = metrics.index
-
-        # next, we will flip the sign of the r2 metric since it is a maximization metric rather thin min
-        metrics = metrics * np.where(
-            metrics.index.get_level_values("metric") == "r2", -1, 1
-        )
-        comparators = comparators * np.where(
-            comparators.index.get_level_values("metric") == "r2", -1, 1
-        )
-
-        # run the comparisons
-        comparison = metrics < comparators
-
-        return comparison
-
-
-class ConvergenceThresholdsByTarget(BaseModel):
-    """The thresholds for convergence by target."""
-
-    thresholds: dict[str, ConvergenceThresholds] = Field(
-        default_factory=lambda: {"*": ConvergenceThresholds()},
-        description="The thresholds for convergence by target.",
-    )
-
-    def make_comparisons(self, metrics: pd.Series) -> list[pd.Series]:
-        """Generate a list of all stratum/target/metric True/False comparisons."""
-        return [
-            self.thresholds[target].check_convergence(
-                metrics, re.compile(fnmatch.translate(target))
-            )
-            for target in self.thresholds
-        ]
-
-    def combine_and_check_strata_and_targets(self, comparisons: list[pd.Series]):
-        """Combine the comparisons and aggregate first by targets then by strata."""
-        comparison = pd.concat(comparisons, axis=0)
-        # now we will groupby the stratum (e.g. features.weather.file)
-        # and by the target (e.g. Electricity, Gas, etc.)
-        # we are converged if any of the metrics have converged for that target
-        # in that stratum
-        comparison_stratum_and_target = comparison.groupby(
-            level=[lev for lev in comparison.index.names if lev != "metric"]
-        ).any()  # TODO: make it configurable such that instead of `any`, we can specify a count, i.e. at least 2 must be converged
-
-        # then we will check that all targets have converged for each stratum
-
-        # only levels left in multiindex should be stratum and target
-
-        comparison_strata = comparison_stratum_and_target.groupby(level="stratum").all()
-
-        # finally, we will check that all strata have converged
-        comparison_all = comparison_strata.all()
-
-        return (
-            comparison_all,
-            comparison_strata,
-            comparison_stratum_and_target,
-            comparison,
-        )
-
-    def run(self, metrics: pd.Series) -> tuple[bool, pd.Series, pd.Series, pd.Series]:
-        """Run the convergence criteria."""
-        comparisons = self.make_comparisons(metrics)
-        return self.combine_and_check_strata_and_targets(comparisons)
-
-
-class XGBTrainerConfig(BaseModel):
-    """The trainer hyperparameters for the xgboost model."""
-
-    num_boost_round: int = Field(
-        default=4000, description="The number of boosting rounds."
-    )
-    early_stopping_rounds: int = Field(
-        default=10, description="The number of boosting rounds to early stop."
-    )
-    verbose_eval: bool = Field(
-        default=True, description="Whether to print verbose evaluation results."
-    )
-
-
-class XGBModelConfig(BaseModel):
-    """The model hyperparameters for the xgboost model."""
-
-    max_depth: int = Field(default=5, description="The maximum depth of the tree.")
-    eta: float = Field(default=0.1, description="The learning rate.")
-    min_child_weight: int | None = Field(
-        default=3, description="The minimum child weight."
-    )
-    subsample: float | None = Field(default=None, description="The subsample rate.")
-    colsample_bytree: float | None = Field(
-        default=None, description="The column sample by tree rate."
-    )
-    alpha: float | None = Field(default=None, description="The alpha parameter.")
-    lam: float | None = Field(default=None, description="The lambda parameter.")
-    gamma: float | None = Field(default=None, description="The gamma parameter.")
-    seed: int = Field(
-        default=42, description="The seed for the random number generator."
-    )
-
-    @property
-    def param_dict(self) -> dict[str, Any]:
-        """The dictionary of parameters."""
-        import torch
-
-        data = {
-            "objective": "reg:squarederror",
-            "eval_metric": "rmse",
-            "tree_method": "auto",
-            "seed": self.seed,
-            # hyperparameters
-            **self.model_dump(
-                exclude_none=True,
-            ),
-        }
-        if torch.cuda.is_available():
-            data["device"] = "cuda"
-        return data
-
-
-class XGBHyperparameters(BaseModel):
-    """The parameters for the xgboost model."""
-
-    hp: XGBModelConfig = Field(
-        default_factory=XGBModelConfig,
-        description="The hyperparameters for the model.",
-    )
-    trainer: XGBTrainerConfig = Field(
-        default_factory=XGBTrainerConfig,
-        description="The trainer hyperparameters for the model.",
-    )
-
-
-class LGBHyperparameters(BaseModel):
-    """The parameters for the lightgbm model."""
-
-    objective: Literal["regression", "binary", "multiclass"] = Field(
-        default="regression", description="The objective function to use."
-    )
-    metric: Literal["rmse"] = Field(
-        default="rmse", description="The metric to optimize."
-    )
-    # TODO: add other parameters as needed
-
-
-ModelHPType = XGBHyperparameters | LGBHyperparameters
-
-
-class StratificationSpec(BaseModel):
-    """A spec for stratifying the data."""
-
-    field: str = Field(
-        default="feature.weather.file", description="The field to stratify by."
-    )
-    sampling: Literal["equal", "error-weighted", "proportional"] = Field(
-        default="equal",
-        description="The sampling method to use over the strata.",
-    )
-    aliases: list[str] = Field(
-        default_factory=lambda: ["epwzip_path", "epw_path"],
-        description="The alias to use for the stratum as a fallback.",
-    )
-
-    # TODO: consider allowing the stratification to be a compound with e.g. component_map_uri and semantic_fields_uri and database_uri
-
-
-class CrossValidationSpec(BaseModel):
-    """The cross validation spec."""
-
-    n_folds: int = Field(
-        default=5, description="The number of folds for the entire parent task."
-    )
-
-
-class IterationSpec(BaseModel):
-    """The iteration spec."""
-
-    n_init: int = Field(default=10000, description="The number of initial samples.")
-    min_per_stratum: int = Field(
-        default=100, description="The minimum number of samples per stratum."
-    )
-    n_per_iter: int = Field(
-        default=10000,
-        description="The number of samples to add per each iteration of the outer loop.",
-    )
-    max_iters: int = Field(
-        default=100,
-        description="The maximum number of outer loop iterations to perform.",
-    )
-    recursion: RecursionMap = Field(
-        default_factory=lambda: RecursionMap(factor=100, max_depth=1),
-        description="The recursion spec.",
-    )
-    current_iter: int = Field(
-        default=0,
-        description="The index of the current training iteration within the outer loop.",
-    )
-
-    @property
-    def at_max_iters(self) -> bool:
-        """Whether the current iteration is the maximum number of iterations."""
-        return self.current_iter + 1 >= self.max_iters
-
-
-class TargetsConfigSpec(BaseModel):
-    """The targets config spec."""
-
-    columns: list[str] = Field(
-        default_factory=list, description="The columns to use as targets."
-    )
-    normalization: Literal["min-max", "standard", "none"] = Field(
-        default="none", description="The normalization method to use."
-    )
-
-
-class FeatureConfigSpec(BaseModel):
-    """The feature config spec."""
-
-    continuous_columns: frozenset[str] = Field(
-        default=frozenset(), description="The continuous columns to use as features."
-    )
-    categorical_columns: frozenset[str] = Field(
-        default=frozenset(), description="The categorical columns to use as features."
-    )
-    exclude_columns: frozenset[str] = Field(
-        default=frozenset(),
-        description="The columns to exclude from the features.",
-    )
-    cont_cat_unicity_transition_threshold: int = Field(
-        default=10,
-        description="The threshold for the number of unique values to transition from continuous to categorical variable.",
-    )
-
-
 EXCLUDED_COLUMNS = frozenset({
     "experiment_id",
     "sort_index",
@@ -295,288 +30,6 @@ class FeatureConfigSpec(BaseModel):
 })
 
 
-class RegressionIOConfigSpec(BaseModel):
-    """The input/output spec for a regression model."""
-
-    targets: TargetsConfigSpec = Field(
-        default_factory=TargetsConfigSpec, description="The targets config spec."
-    )
-    features: FeatureConfigSpec = Field(
-        default_factory=FeatureConfigSpec,
-        description="The features config spec.",
-    )
-
-
-# TODO: should this be a subclass of ExperimentInputSpec?
-class ProgressiveTrainingSpec(ExperimentInputSpec):
-    """A spec for iteratively training an SBEM regression model."""
-
-    base_run_name: str = Field(
-        ...,
-        description="The base run name for the experiment.",
-    )
-    convergence_criteria: ConvergenceThresholdsByTarget = Field(
-        default_factory=ConvergenceThresholdsByTarget,
-        description="The convergence criteria.",
-    )
-    regression_io_config: RegressionIOConfigSpec = Field(
-        default_factory=RegressionIOConfigSpec,
-        description="The regression io config spec.",
-    )
-    hyperparameters: ModelHPType = Field(
-        default_factory=XGBHyperparameters,
-        description="The hyperparameters for the model.",
-    )
-    stratification: StratificationSpec = Field(
-        default_factory=StratificationSpec,
-        description="The stratification spec.",
-    )
-    cross_val: CrossValidationSpec = Field(
-        default_factory=CrossValidationSpec,
-        description="The cross validation spec.",
-    )
-    iteration: IterationSpec = Field(
-        default_factory=IterationSpec,
-        description="The iteration spec.",
-    )
-    gis_uri: FileReference = Field(
-        ...,
-        description="The uri of the gis data to train on.",
-    )
-    data_uris: ScatterGatherResult | None = Field(
-        default=None,
-        description="The uri of the previous simulation results to train on.",
-    )
-
-    def format_combined_output_key(self, key: str) -> str:
-        """Format the output key for a combined result file."""
-        return f"{self.prefix}/combined/{key}.parquet"
-
-    def format_combined_output_uri(self, key: str) -> S3Url:
-        """Format the output uri for a combined result file."""
-        if self.storage_settings is None:
-            msg = "Storage settings are not set, so we can't construct a combined output uri."
-            raise ValueError(msg)
-        return S3Url(
-            f"s3://{self.storage_settings.BUCKET}/{self.format_combined_output_key(key)}"
-        )
-
-    @property
-    def gis_path(self) -> Path:
-        """The path to the gis data."""
-        if isinstance(self.gis_uri, Path):
-            return self.gis_uri
-        return self.fetch_uri(self.gis_uri)
-
-    @cached_property
-    def gis_data(self) -> pd.DataFrame:
-        """Load the gis data."""
-        return pd.read_parquet(self.gis_path)
-
-
-class StageSpec(BaseModel):
-    """A spec that is common to both the sample and train stages (and possibly others)."""
-
-    parent: ProgressiveTrainingSpec = Field(
-        ...,
-        description="The parent spec.",
-    )
-
-    @cached_property
-    def random_generator(self) -> np.random.Generator:
-        """The random generator."""
-        return np.random.default_rng(self.parent.iteration.current_iter)
-
-
-class SampleSpec(StageSpec):
-    """A spec for the sampling stage of the progressive training."""
-
-    # TODO: add the ability to receive the last set of error metrics and use them to inform the sampling
-
-    def stratified_selection(self) -> pd.DataFrame:
-        """Sample the gis data."""
-        df = self.parent.gis_data
-
-        stratification_field = self.parent.stratification.field
-        stratification_aliases = self.parent.stratification.aliases
-
-        if stratification_field not in df.columns and not any(
-            alias in df.columns for alias in stratification_aliases
-        ):
-            msg = f"Stratification field {stratification_field} not found in gis data.  Please check the field name and/or the aliases."
-            raise ValueError(msg)
-
-        if stratification_field not in df.columns:
-            stratification_field = next(
-                alias for alias in stratification_aliases if alias in df.columns
-            )
-
-        strata = cast(list[str], df[stratification_field].unique().tolist())
-
-        if self.parent.stratification.sampling == "equal":
-            return self.sample_equally_by_stratum(df, strata, stratification_field)
-        elif self.parent.stratification.sampling == "error-weighted":
-            msg = "Error-weighted sampling is not yet implemented."
-            raise NotImplementedError(msg)
-        elif self.parent.stratification.sampling == "proportional":
-            msg = "Proportional sampling is not yet implemented."
-            raise NotImplementedError(msg)
-        else:
-            msg = f"Invalid sampling method: {self.parent.stratification.sampling}"
-            raise ValueError(msg)
-
-    def sample_equally_by_stratum(
-        self, df: pd.DataFrame, strata: list[str], stratification_field: str
-    ) -> pd.DataFrame:
-        """Sample equally by stratum.
-
-        This will break the dataframe up into n strata and ensure that each strata ends up with the same number of samples.
-
-        Args:
-            df (pd.DataFrame): The dataframe to sample from.
-            strata (list[str]): The unique values of the strata.
-            stratification_field (str): The field to stratify the data by.
-
-        Returns:
-            samples (pd.DataFrame): The sampled dataframe.
-        """
-        stratum_dfs = {
-            stratum: df[df[stratification_field] == stratum] for stratum in strata
-        }
-        n_per_iter = (
-            self.parent.iteration.n_per_iter
-            if self.parent.iteration.current_iter != 0
-            else self.parent.iteration.n_init
-        )
-        n_per_stratum = max(
-            n_per_iter // len(strata),
-            (
-                self.parent.iteration.min_per_stratum
-                if self.parent.iteration.current_iter == 0
-                else 0
-            ),
-        )
-
-        # TODO: consider how we want to handle potentially having the same geometry appear in both
-        # the training and testing sets.
-        # if any(len(stratum_df) < n_per_stratum for stratum_df in stratum_dfs.values()):
-        #     msg = "There are not enough buildings in some strata to sample the desired number of buildings per stratum."
-        #     # connsider making this a warning?
-        #     raise ValueError(msg)
-
-        sampled_strata = {
-            stratum: stratum_df.sample(
-                n=n_per_stratum, random_state=self.random_generator, replace=True
-            )
-            for stratum, stratum_df in stratum_dfs.items()
-        }
-        return cast(pd.DataFrame, pd.concat(sampled_strata.values()))
-
-    # def sample_semantic_fields(self, df: pd.DataFrame) -> pd.DataFrame:
-    #     """Sample the semantic fields."""
-    #     # TODO: consider randomizing the locations?
-    #     semantic_fields = self.progressive_training_spec.semantic_fields_data
-    #     for field in semantic_fields.Fields:
-    #         if isinstance(field, CategoricalFieldSpec):
-    #             options = field.Options
-    #             df[field.Name] = self.random_generator.choice(options, size=len(df))
-    #         elif isinstance(field, NumericFieldSpec):
-    #             df[field.Name] = self.random_generator.uniform(
-    #                 field.Min, field.Max, size=len(df)
-    #             )
-    #         else:
-    #             msg = f"Invalid field type: {type(field)}"
-    #             raise TypeError(msg)
-    #     return df
-
-    # def sample_basements_and_attics(self, df: pd.DataFrame) -> pd.DataFrame:
-    #     """Add basement/attics to models."""
-    #     # get the options for the type literal
-    #     options: list[BasementAtticOccupationConditioningStatus] = [
-    #         "none",
-    #         "occupied_unconditioned",
-    #         "unoccupied_unconditioned",
-    #         "occupied_conditioned",
-    #         "unoccupied_conditioned",
-    #     ]
-    #     weights = [0.5, *([0.5 / 4] * 4)]
-    #     # sample the type literal
-    #     df["basement"] = self.random_generator.choice(options, size=len(df), p=weights)
-    #     df["attic"] = self.random_generator.choice(options, size=len(df), p=weights)
-    #     df["exposed_basement_frac"] = self.random_generator.uniform(
-    #         0.1, 0.5, size=len(df)
-    #     )
-    #     return df
-
-    # def sample_wwrs(self, df: pd.DataFrame) -> pd.DataFrame:
-    #     """Sample the wwrs."""
-    #     wwr_min = 0.05
-    #     wwr_max = 0.35
-    #     df["wwr"] = self.random_generator.uniform(wwr_min, wwr_max, size=len(df))
-    #     return df
-
-    # def sample_f2f_heights(self, df: pd.DataFrame) -> pd.DataFrame:
-    #     """Sample the f2f heights."""
-    #     f2f_min = 2.3
-    #     f2f_max = 4.3
-    #     df["f2f_height"] = self.random_generator.uniform(f2f_min, f2f_max, size=len(df))
-    #     return df
-
-    def to_sim_specs(self, df: pd.DataFrame):
-        """Convert the sampled dataframe to a list of simulation specs.
-
-        For now, we are assuming that all the other necessary fields are present and we are just
-        ensuring that sort_index and experiment_id are set appropriately.
-        """
-        # df["semantic_field_context"] = df.apply(
-        #     lambda row: {
-        #         field.Name: row[field.Name]
-        #         for field in self.progressive_training_spec.semantic_fields_data.Fields
-        #     },
-        #     axis=1,
-        # )
-        # df["sort_index"] = np.arange(len(df))
-        # df["experiment_id"] = self.experiment_key
-        # # TODO: consider allowing the component map/semantic_fields/database to be inherited from the row
-        # # e.g. to allow multiple component maps and dbs per run.
-        # df["component_map_uri"] = str(self.progressive_training_spec.component_map_uri)
-        # df["semantic_fields_uri"] = str(
-        #     self.progressive_training_spec.semantic_fields_uri
-        # )
-        # df["db_uri"] = str(self.progressive_training_spec.database_uri)
-        return df
-
-    # def make_payload(self, s3_client: S3ClientType):
-    #     """Make the payload for the scatter gather task, including generating the simulation specs and serializing them to s3."""
-    #     df = self.stratified_selection()
-    #     # df = self.sample_semantic_fields(df)
-    #     # df = self.sample_basements_and_attics(df)
-    #     # df = self.sample_wwrs(df)
-    #     # df = self.sample_f2f_heights(df)
-    #     df = self.to_sim_specs(df)
-    #     # serialize to a parquet file and upload to s3
-    #     bucket = self.progressive_training_spec.storage_settings.BUCKET
-    #     with tempfile.TemporaryDirectory() as tmpdir:
-    #         tmpdir = Path(tmpdir)
-    #         fpath = tmpdir / "specs.pq"
-    #         df.to_parquet(fpath)
-    #         key = f"hatchet/{self.experiment_key}/specs.pq"
-    #         specs_uri = f"s3://{bucket}/{key}"
-    #         s3_client.upload_file(fpath.as_posix(), bucket, key)
-
-    #     payload = {
-    #         "specs": specs_uri,
-    #         "bucket": bucket,
-    #         "workflow_name": "simulate_sbem_shoebox",
-    #         "experiment_id": self.experiment_key,
-    #         "recursion_map": {
-    #             "factor": self.progressive_training_spec.iteration.recursion_factor,
-    #             "max_depth": self.progressive_training_spec.iteration.recursion_max_depth,
-    #         },
-    #     }
-    #     return payload
-
-
 class TrainFoldSpec(ExperimentInputSpec):
     """Train an sbem model for a specific fold.
 
@@ -1316,15 +769,6 @@ def compute_metrics(self, preds: pd.DataFrame, targets: pd.DataFrame):
     #     train_preds = pd.concat(train_preds, axis=1)
     #     return train_preds, test_preds
 
-    # @property
-    # def model_dir_key(self) -> str:
-    #     """Get the key for the model directory."""
-    #     return f"{self.experiment_id}/{self.sort_index}/models"
-
-    # def format_model_key(self, model_name: str) -> str:
-    #     """Format the model key."""
-    #     return f"hatchet/{self.model_dir_key}/{model_name}"
-
 
 class FoldResult(ExperimentOutputSpec):
     """The output for a fold."""
diff --git a/src/globi/pipelines/training.py b/src/globi/pipelines/training.py
index 5d4b891..f9b7054 100644
--- a/src/globi/pipelines/training.py
+++ b/src/globi/pipelines/training.py
@@ -7,13 +7,13 @@
 
 import pandas as pd
 from hatchet_sdk import Context
-from pydantic import HttpUrl
 from scythe.experiments import (
     BaseExperiment,
 )
 from scythe.hatchet import hatchet
 from scythe.registry import ExperimentRegistry
 from scythe.scatter_gather import RecursionMap, ScatterGatherResult, scatter_gather
+from scythe.settings import ScytheStorageSettings
 from scythe.utils.filesys import S3Url
 
 from globi.models.surrogate.dummy import DummySimulationInput, dummy_simulation
@@ -26,7 +26,6 @@
 )
 from globi.models.surrogate.training import (
     FoldResult,
-    IterationSpec,
     ProgressiveTrainingSpec,
     TrainFoldSpec,
     TrainWithCVSpec,
@@ -323,12 +322,13 @@ def transition_recursion(
 # TODO: Final training stage? or should we save models along the way.
 
 if __name__ == "__main__":
+    from pydantic import HttpUrl
     from scythe.settings import ScytheStorageSettings
 
-    from globi.models.surrogate.training import (
+    from globi.models.surrogate.configs.pipeline import (
         ConvergenceThresholds,
         ConvergenceThresholdsByTarget,
-        ProgressiveTrainingSpec,
+        IterationSpec,
         StratificationSpec,
     )
 
@@ -343,7 +343,7 @@ def transition_recursion(
             aliases=["feature.weather.file"],
         ),
         iteration=IterationSpec(
-            max_iters=10,
+            max_iters=3,
         ),
         convergence_criteria=ConvergenceThresholdsByTarget(
             thresholds={

From 31663da2057e95dba6d6e3c3826433ad88be89c7 Mon Sep 17 00:00:00 2001
From: Sam Wolk <36545842+szvsw@users.noreply.github.com>
Date: Sun, 8 Mar 2026 20:30:19 -0400
Subject: [PATCH 11/27] enable runnable selection at surrogate level

---
 src/globi/allocate.py                          |  4 ++--
 src/globi/models/surrogate/configs/pipeline.py |  3 ++-
 src/globi/models/surrogate/training.py         |  3 +++
 src/globi/pipelines/training.py                | 13 ++++++-------
 src/globi/tools/cli/main.py                    |  4 ++--
 src/globi/tools/visualization/data_sources.py  |  2 +-
 uv.lock                                        | 16 ++++++++--------
 7 files changed, 24 insertions(+), 21 deletions(-)

diff --git a/src/globi/allocate.py b/src/globi/allocate.py
index c7691db..988a022 100644
--- a/src/globi/allocate.py
+++ b/src/globi/allocate.py
@@ -102,7 +102,7 @@ def allocate_globi_experiment(
         raise ValueError(msg)
 
     experiment = BaseExperiment[ExperimentInputSpec, ExperimentOutputSpec](
-        experiment=simulate_globi_building, run_name=name
+        runnable=simulate_globi_building, run_name=name
     )
     print(f"Submitting {len(buildings_gdf)} buildings for experiment {name}")
     min_branches_required, _, _ = calculate_branching_factor(specs)
@@ -182,7 +182,7 @@ def allocate_globi_dryrun(
         raise ValueError(msg)
 
     experiment = BaseExperiment[ExperimentInputSpec, ExperimentOutputSpec](
-        experiment=simulate_globi_building,
+        runnable=simulate_globi_building,
         run_name=f"{config.name}/dryrun/{config.scenario}",
     )
 
diff --git a/src/globi/models/surrogate/configs/pipeline.py b/src/globi/models/surrogate/configs/pipeline.py
index a088a79..11a0e9b 100644
--- a/src/globi/models/surrogate/configs/pipeline.py
+++ b/src/globi/models/surrogate/configs/pipeline.py
@@ -10,6 +10,7 @@
 import pandas as pd
 from pydantic import BaseModel, Field
 from scythe.base import ExperimentInputSpec
+from scythe.experiments import SerializableRunnable
 from scythe.scatter_gather import RecursionMap, ScatterGatherResult
 from scythe.utils.filesys import FileReference, S3Url
 
@@ -212,7 +213,7 @@ class RegressionIOConfigSpec(BaseModel):
     )
 
 
-class ProgressiveTrainingSpec(ExperimentInputSpec):
+class ProgressiveTrainingSpec(ExperimentInputSpec, SerializableRunnable):
     """A spec for iteratively training an SBEM regression model."""
 
     base_run_name: str = Field(
diff --git a/src/globi/models/surrogate/training.py b/src/globi/models/surrogate/training.py
index b3fa03e..260691d 100644
--- a/src/globi/models/surrogate/training.py
+++ b/src/globi/models/surrogate/training.py
@@ -68,6 +68,9 @@ def combined_data(self) -> pd.DataFrame:
         dfs: dict[str, pd.DataFrame] = {
             key: pd.read_parquet(str(uri)) for key, uri in self.data_uris.items()
         }
+        # TODO: we should drop any dataframes which do not participate in training
+        # for instance, by checking their regression io spec, or if there is another place to check.
+        # Mostly important for preventing errors on the next line when many differently shaped dataframes are returned.
         if not all(
             df.index.equals(next(iter(dfs.values())).index) for df in dfs.values()
         ):
diff --git a/src/globi/pipelines/training.py b/src/globi/pipelines/training.py
index f9b7054..02982e2 100644
--- a/src/globi/pipelines/training.py
+++ b/src/globi/pipelines/training.py
@@ -85,8 +85,7 @@ def create_simulations(
     run_name = f"{spec.experiment_id}/sample"
 
     exp = BaseExperiment(
-        # TODO: replace with simulate_globi_flat_building, or better yet, allow loading from the registry via config.
-        experiment=dummy_simulation,  # TODO: add configurability to switch between simulations.
+        runnable=spec.runnable,
         run_name=run_name,
         storage_settings=spec.storage_settings or ScytheStorageSettings(),
     )
@@ -147,6 +146,7 @@ def combine_results(
     #     is_constant = (df.max(axis=0) - df.min(axis=0)).abs() < 1e-5
     #     df = df.loc[:, ~is_constant]
     # Should this sort of data cleaning be done here, or should it be done in the training task?
+    # also, should we make sure to remove NaN?
 
     if spec.data_uris:
         shared_keys = set(spec.data_uris.uris.keys()) & set(results.uris.keys())
@@ -199,7 +199,7 @@ def start_training(
 
     run_name = f"{spec.experiment_id}/train"
     exp = BaseExperiment(
-        experiment=train_regressor_with_cv_fold,
+        runnable=train_regressor_with_cv_fold,
         run_name=run_name,
         storage_settings=spec.storage_settings or ScytheStorageSettings(),
     )
@@ -302,7 +302,7 @@ def transition_recursion(
     next_spec.iteration.current_iter += 1
     next_spec.data_uris = combine_results_output.combined
     exp = BaseExperiment(
-        experiment=iterative_training,
+        runnable=iterative_training,
         run_name=f"{next_spec.base_run_name}",
         storage_settings=spec.storage_settings or ScytheStorageSettings(),
     )
@@ -334,6 +334,7 @@ def transition_recursion(
 
     base_run_name = "test-experiment"
     progressive_training_spec = ProgressiveTrainingSpec(
+        runnable=dummy_simulation,
         sort_index=0,
         experiment_id="placeholder",
         gis_uri=HttpUrl("https://example.com/gis.parquet"),
@@ -356,7 +357,7 @@ def transition_recursion(
     )
 
     exp = BaseExperiment(
-        experiment=iterative_training,
+        runnable=iterative_training,
         run_name="test-experiment",
     )
 
@@ -371,5 +372,3 @@ def transition_recursion(
     import yaml
 
     print(yaml.dump(run.model_dump(mode="json"), indent=2, sort_keys=False))
-    # result = iterative_training.run(spec)
-    # print(result)
diff --git a/src/globi/tools/cli/main.py b/src/globi/tools/cli/main.py
index e36e447..eb35b05 100644
--- a/src/globi/tools/cli/main.py
+++ b/src/globi/tools/cli/main.py
@@ -127,7 +127,7 @@ def simulate(
     import pandas as pd
 
     from globi.models.tasks import MinimalBuildingSpec
-    from globi.pipelines import simulate_globi_building_pipeline
+    from globi.pipelines.simulations import simulate_globi_building_pipeline
 
     if isinstance(config, str):
         config = Path(config)
@@ -371,7 +371,7 @@ def experiment(
 
     s3_client: S3Client = boto3.client("s3")
     s3_settings = ScytheStorageSettings()
-    exp = BaseExperiment(experiment=simulate_globi_building, run_name=run_name)
+    exp = BaseExperiment(runnable=simulate_globi_building, run_name=run_name)
 
     if not version:
         exp_version = exp.latest_version(s3_client, from_cache=False)
diff --git a/src/globi/tools/visualization/data_sources.py b/src/globi/tools/visualization/data_sources.py
index 8397258..4c8bace 100644
--- a/src/globi/tools/visualization/data_sources.py
+++ b/src/globi/tools/visualization/data_sources.py
@@ -262,7 +262,7 @@ def load_run_data(self, run_id: str) -> pd.DataFrame:
         s3_client = self.client
         s3_settings = ScytheStorageSettings()
         exp = BaseExperiment(
-            experiment=simulate_globi_building,
+            runnable=simulate_globi_building,
             run_name=run_id,
         )
 
diff --git a/uv.lock b/uv.lock
index 45faccc..5c39dd3 100644
--- a/uv.lock
+++ b/uv.lock
@@ -922,7 +922,7 @@ name = "cuda-bindings"
 version = "12.9.4"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
-    { name = "cuda-pathfinder", marker = "sys_platform != 'darwin'" },
+    { name = "cuda-pathfinder", marker = "sys_platform == 'linux'" },
 ]
 wheels = [
     { url = "https://files.pythonhosted.org/packages/0c/c2/65bfd79292b8ff18be4dd7f7442cea37bcbc1a228c1886f1dea515c45b67/cuda_bindings-12.9.4-cp312-cp312-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:694ba35023846625ef471257e6b5a4bc8af690f961d197d77d34b1d1db393f56", size = 11760260, upload-time = "2025-10-21T14:51:40.79Z" },
@@ -3169,7 +3169,7 @@ name = "nvidia-cudnn-cu12"
 version = "9.10.2.21"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
-    { name = "nvidia-cublas-cu12", marker = "sys_platform != 'darwin'" },
+    { name = "nvidia-cublas-cu12", marker = "sys_platform == 'linux'" },
 ]
 wheels = [
     { url = "https://files.pythonhosted.org/packages/fa/41/e79269ce215c857c935fd86bcfe91a451a584dfc27f1e068f568b9ad1ab7/nvidia_cudnn_cu12-9.10.2.21-py3-none-manylinux_2_27_aarch64.whl", hash = "sha256:c9132cc3f8958447b4910a1720036d9eff5928cc3179b0a51fb6d167c6cc87d8", size = 705026878, upload-time = "2025-06-06T21:52:51.348Z" },
@@ -3181,7 +3181,7 @@ name = "nvidia-cufft-cu12"
 version = "11.3.3.83"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
-    { name = "nvidia-nvjitlink-cu12", marker = "sys_platform != 'darwin'" },
+    { name = "nvidia-nvjitlink-cu12", marker = "sys_platform == 'linux'" },
 ]
 wheels = [
     { url = "https://files.pythonhosted.org/packages/60/bc/7771846d3a0272026c416fbb7e5f4c1f146d6d80704534d0b187dd6f4800/nvidia_cufft_cu12-11.3.3.83-py3-none-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:848ef7224d6305cdb2a4df928759dca7b1201874787083b6e7550dd6765ce69a", size = 193109211, upload-time = "2025-03-07T01:44:56.873Z" },
@@ -3211,9 +3211,9 @@ name = "nvidia-cusolver-cu12"
 version = "11.7.3.90"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
-    { name = "nvidia-cublas-cu12", marker = "sys_platform != 'darwin'" },
-    { name = "nvidia-cusparse-cu12", marker = "sys_platform != 'darwin'" },
-    { name = "nvidia-nvjitlink-cu12", marker = "sys_platform != 'darwin'" },
+    { name = "nvidia-cublas-cu12", marker = "sys_platform == 'linux'" },
+    { name = "nvidia-cusparse-cu12", marker = "sys_platform == 'linux'" },
+    { name = "nvidia-nvjitlink-cu12", marker = "sys_platform == 'linux'" },
 ]
 wheels = [
     { url = "https://files.pythonhosted.org/packages/c8/32/f7cd6ce8a7690544d084ea21c26e910a97e077c9b7f07bf5de623ee19981/nvidia_cusolver_cu12-11.7.3.90-py3-none-manylinux_2_27_aarch64.whl", hash = "sha256:db9ed69dbef9715071232caa9b69c52ac7de3a95773c2db65bdba85916e4e5c0", size = 267229841, upload-time = "2025-03-07T01:46:54.356Z" },
@@ -3225,7 +3225,7 @@ name = "nvidia-cusparse-cu12"
 version = "12.5.8.93"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
-    { name = "nvidia-nvjitlink-cu12", marker = "sys_platform != 'darwin'" },
+    { name = "nvidia-nvjitlink-cu12", marker = "sys_platform == 'linux'" },
 ]
 wheels = [
     { url = "https://files.pythonhosted.org/packages/bc/f7/cd777c4109681367721b00a106f491e0d0d15cfa1fd59672ce580ce42a97/nvidia_cusparse_cu12-12.5.8.93-py3-none-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:9b6c161cb130be1a07a27ea6923df8141f3c295852f4b260c65f18f3e0a091dc", size = 288117129, upload-time = "2025-03-07T01:47:40.407Z" },
@@ -4591,7 +4591,7 @@ wheels = [
 [[package]]
 name = "scythe-engine"
 version = "0.1.2"
-source = { git = "https://github.com/szvsw/scythe?branch=feature%2Fallow-versioning-workflows#9aad5d97eaa9ca33bc5ac9e21ec31c9b60f677f1" }
+source = { git = "https://github.com/szvsw/scythe?branch=feature%2Fallow-versioning-workflows#2976bb3da4cec82784057b673e55d5c5cdda469f" }
 dependencies = [
     { name = "boto3" },
     { name = "fastparquet" },

From 3af36694d0e35d8ec58b94ffc622ae8e2cfbd12e Mon Sep 17 00:00:00 2001
From: Sam Wolk <36545842+szvsw@users.noreply.github.com>
Date: Mon, 9 Mar 2026 10:41:49 -0400
Subject: [PATCH 12/27] improve results collation and finalization

---
 .../models/surrogate/configs/pipeline.py      |  62 +++++-
 src/globi/models/surrogate/outputs.py         |  13 ++
 src/globi/models/surrogate/training.py        |   2 -
 src/globi/pipelines/training.py               | 210 +++++++++++++-----
 src/globi/tools/cli/main.py                   |  26 +++
 uv.lock                                       |   2 +-
 6 files changed, 249 insertions(+), 66 deletions(-)

diff --git a/src/globi/models/surrogate/configs/pipeline.py b/src/globi/models/surrogate/configs/pipeline.py
index 11a0e9b..7f3be86 100644
--- a/src/globi/models/surrogate/configs/pipeline.py
+++ b/src/globi/models/surrogate/configs/pipeline.py
@@ -76,13 +76,20 @@ class CrossValidationSpec(BaseModel):
 class ConvergenceThresholds(BaseModel):
     """The thresholds for convergence."""
 
-    # TODO: instead of using a risky hardcoded "n/a" token, make nullability have better support.
-    mae: float = Field(default=-9e9, description="The maximum MAE for convergence.")
-    rmse: float = Field(default=-9e9, description="The maximum RMSE for convergence.")
-    mape: float = Field(default=-9e9, description="The maximum MAPE for convergence.")
-    r2: float = Field(default=9e9, description="The minimum R2 for convergence.")
-    cvrmse: float = Field(
-        default=-9e9, description="The maximum CV_RMSE for convergence."
+    mae: float | None = Field(
+        default=None, description="The maximum MAE for convergence."
+    )
+    rmse: float | None = Field(
+        default=None, description="The maximum RMSE for convergence."
+    )
+    mape: float | None = Field(
+        default=None, description="The maximum MAPE for convergence."
+    )
+    r2: float | None = Field(
+        default=None, description="The minimum R2 for convergence."
+    )
+    cvrmse: float | None = Field(
+        default=None, description="The maximum CV_RMSE for convergence."
     )
 
     def check_convergence(self, metrics: pd.Series, target: re.Pattern | None = None):
@@ -104,9 +111,13 @@ def check_convergence(self, metrics: pd.Series, target: re.Pattern | None = None
 
         # first, we will select the appropriate threshold for each metric
         comparators = thresholds.loc[metrics.index.get_level_values("metric")]
+
         # we can then copy over the index safely
         comparators.index = metrics.index
 
+        # we will ignore any thresholds that are not set or are NaN
+        comparators_are_na = comparators.isna()
+
         # next, we will flip the sign of the r2 metric since it is a maximization metric rather thin min
         metrics = metrics * np.where(
             metrics.index.get_level_values("metric") == "r2", -1, 1
@@ -117,6 +128,7 @@ def check_convergence(self, metrics: pd.Series, target: re.Pattern | None = None
 
         # run the comparisons
         comparison = metrics < comparators
+        comparison = comparison.loc[~comparators_are_na]
 
         return comparison
 
@@ -252,10 +264,18 @@ class ProgressiveTrainingSpec(ExperimentInputSpec, SerializableRunnable):
         default=None,
         description="The uri of the previous simulation results to train on.",
     )
+    metrics_uris: list[ScatterGatherResult] = Field(
+        default_factory=list,
+        description="The uris of the iteration metrics from previous iterations.",
+    )
+    previous_experiment_ids: list[str] = Field(
+        default_factory=list,
+        description="The ids of the previous experiments.",
+    )
 
     def format_combined_output_key(self, key: str) -> str:
         """Format the output key for a combined result file."""
-        return f"{self.prefix}/combined/{key}.parquet"
+        return f"{self.prefix}/combined/data/{key}.parquet"
 
     def format_combined_output_uri(self, key: str) -> S3Url:
         """Format the output uri for a combined result file."""
@@ -266,6 +286,32 @@ def format_combined_output_uri(self, key: str) -> S3Url:
             f"s3://{self.storage_settings.BUCKET}/{self.format_combined_output_key(key)}"
         )
 
+    def format_metrics_output_key(self, key: str) -> str:
+        """Format the output key for a metrics file."""
+        return f"{self.prefix}/combined/metrics/{key}.parquet"
+
+    def format_metrics_output_uri(self, key: str) -> S3Url:
+        """Format the output uri for a metrics file."""
+        if self.storage_settings is None:
+            msg = "Storage settings are not set, so we can't construct a metrics output uri."
+            raise ValueError(msg)
+        return S3Url(
+            f"s3://{self.storage_settings.BUCKET}/{self.format_metrics_output_key(key)}"
+        )
+
+    def format_summary_manifest_key(self) -> str:
+        """Format the output key for a summary manifest file."""
+        return f"{self.prefix}/summary.yml"
+
+    def format_summary_manifest_uri(self) -> S3Url:
+        """Format the output uri for a summary manifest file."""
+        if self.storage_settings is None:
+            msg = "Storage settings are not set, so we can't construct a summary manifest uri."
+            raise ValueError(msg)
+        return S3Url(
+            f"s3://{self.storage_settings.BUCKET}/{self.format_summary_manifest_key()}"
+        )
+
     @property
     def gis_path(self) -> Path:
         """The path to the gis data."""
diff --git a/src/globi/models/surrogate/outputs.py b/src/globi/models/surrogate/outputs.py
index 0bdb9b6..961df92 100644
--- a/src/globi/models/surrogate/outputs.py
+++ b/src/globi/models/surrogate/outputs.py
@@ -5,6 +5,7 @@
 from pydantic import BaseModel
 from scythe.experiments import ExperimentRun
 from scythe.scatter_gather import ScatterGatherResult
+from scythe.utils.filesys import S3Url
 
 from globi.models.surrogate.training import TrainWithCVSpec
 
@@ -17,6 +18,7 @@ class CombineResultsResult(BaseModel):
 
 
 # TODO: This should perhaps go somewhere else since it is generally useful.
+# (most likely into scythe itself)
 class ExperimentRunWithRef(BaseModel):
     """An experiment run with a workflow run id."""
 
@@ -35,6 +37,8 @@ class TrainingEvaluationResult(BaseModel):
     """The result of evaluating the training."""
 
     converged: bool
+    # TODO: possibly get rid of this since we have nice combined outputs already.
+    metrics: dict
 
 
 class RecursionTransition(BaseModel):
@@ -42,3 +46,12 @@ class RecursionTransition(BaseModel):
 
     reasoning: Literal["max_depth", "converged"] | None
     child_workflow_run_id: str | None
+
+
+class FinalizeResult(BaseModel):
+    """The result of finalizing the training."""
+
+    reasoning: Literal["max_depth", "converged"] | None
+    data_uris: dict[str, S3Url]
+    metrics_uris: dict[str, S3Url]
+    experiment_ids: list[str]
diff --git a/src/globi/models/surrogate/training.py b/src/globi/models/surrogate/training.py
index 260691d..c90b6b2 100644
--- a/src/globi/models/surrogate/training.py
+++ b/src/globi/models/surrogate/training.py
@@ -791,12 +791,10 @@ class TrainWithCVSpec(StageSpec):
     def schedule(self) -> list[TrainFoldSpec]:
         """Create the task schedule."""
         schedule = []
-        # TODO: this should be configured/selected/etc
 
         for i in range(self.parent.cross_val.n_folds):
             schedule.append(
                 TrainFoldSpec(
-                    # TODO: this should be set in a better manner
                     experiment_id="placeholder",
                     sort_index=i,
                     data_uris=self.data_uris.uris,
diff --git a/src/globi/pipelines/training.py b/src/globi/pipelines/training.py
index 02982e2..17c0751 100644
--- a/src/globi/pipelines/training.py
+++ b/src/globi/pipelines/training.py
@@ -1,11 +1,14 @@
 """The training pipeline."""
 
 import random
+import tempfile
 from datetime import timedelta
 from pathlib import Path
 from typing import cast
 
+import boto3
 import pandas as pd
+import yaml
 from hatchet_sdk import Context
 from scythe.experiments import (
     BaseExperiment,
@@ -16,10 +19,11 @@
 from scythe.settings import ScytheStorageSettings
 from scythe.utils.filesys import S3Url
 
-from globi.models.surrogate.dummy import DummySimulationInput, dummy_simulation
+from globi.models.surrogate.dummy import DummySimulationInput
 from globi.models.surrogate.outputs import (
     CombineResultsResult,
     ExperimentRunWithRef,
+    FinalizeResult,
     RecursionTransition,
     StartTrainingResult,
     TrainingEvaluationResult,
@@ -255,9 +259,10 @@ def evaluate_training(
 ) -> TrainingEvaluationResult:
     """Evaluate the training."""
     results_output = context.task_output(await_training)
-    strata = results_output.uris["strata"]
-    _globals = results_output.uris["global"]
-    results = pd.read_parquet(str(strata))
+    strata_uri = results_output.uris["strata"]
+    globals_uri = results_output.uris["global"]
+    results = pd.read_parquet(str(strata_uri))
+    results_globals = pd.read_parquet(str(globals_uri))
 
     fold_averages = cast(
         pd.Series,
@@ -268,6 +273,14 @@ def evaluate_training(
     )
     # TODO: fold_averages and strata and globals should be saved to s3
 
+    global_averages = cast(
+        pd.Series,
+        results_globals.xs("test", level="split_segment", axis=1)
+        .groupby(level="iteration")
+        .mean()
+        .unstack(),
+    )
+
     (
         convergence_all,
         _convergence_monitor_segment,
@@ -275,14 +288,19 @@ def evaluate_training(
         _convergence,
     ) = spec.convergence_criteria.run(fold_averages)
 
-    return TrainingEvaluationResult(converged=convergence_all)
+    return TrainingEvaluationResult(
+        converged=convergence_all,
+        metrics={
+            "global_averages": global_averages.reset_index().to_dict(orient="records"),
+        },
+    )
 
 
 @iterative_training.task(
     name="iterative_training.transition_recursion",
     schedule_timeout=timedelta(hours=5),
     execution_timeout=timedelta(minutes=5),
-    parents=[evaluate_training, combine_results],
+    parents=[evaluate_training, combine_results, await_training],
 )
 def transition_recursion(
     spec: ProgressiveTrainingSpec, context: Context
@@ -290,17 +308,19 @@ def transition_recursion(
     """Transition the recursion."""
     results = context.task_output(evaluate_training)
     if results.converged:
-        # create child
         return RecursionTransition(reasoning="converged", child_workflow_run_id=None)
     if spec.iteration.at_max_iters:
         return RecursionTransition(reasoning="max_depth", child_workflow_run_id=None)
 
+    await_training_output = context.task_output(await_training)
     # start_training_output = context.task_output(start_training)
     combine_results_output = context.task_output(combine_results)
 
     next_spec = spec.model_copy(deep=True)
     next_spec.iteration.current_iter += 1
     next_spec.data_uris = combine_results_output.combined
+    next_spec.metrics_uris.append(await_training_output)
+    next_spec.previous_experiment_ids.append(spec.experiment_id)
     exp = BaseExperiment(
         runnable=iterative_training,
         run_name=f"{next_spec.base_run_name}",
@@ -319,56 +339,136 @@ def transition_recursion(
     )
 
 
-# TODO: Final training stage? or should we save models along the way.
+@iterative_training.task(
+    name="iterative_training.finalize",
+    schedule_timeout=timedelta(hours=5),
+    execution_timeout=timedelta(minutes=30),
+    parents=[transition_recursion, await_training, combine_results],
+    # skip_if=[
+    #     # TODO: maybe we should just run every time?
+    #     ParentCondition(
+    #         parent=transition_recursion,
+    #         expression="output.reasoning == null",
+    #     )
+    # ],
+)
+def finalize(spec: ProgressiveTrainingSpec, context: Context) -> FinalizeResult:
+    """Run when training has exited the loop (converged, max depth, or other reason). Saves final models and artifacts."""
+    # TODO: save the final model?
+    transition = context.task_output(transition_recursion)
+    context.log(f"Training finished. Finalizing: {transition.reasoning}")
+
+    context.log("Fetching metrics from all iterations...")
+    await_training_output = context.task_output(await_training)
+    metrics_uris = [*spec.metrics_uris, await_training_output]
+    metrics_by_key: dict[str, list[pd.DataFrame]] = {}
+    for i, metrics_uri in enumerate(metrics_uris):
+        context.log(f"\tFetching metrics from iteration {i}...")
+        for key in metrics_uri.uris:
+            context.log(f"\t\tFetching metrics for key {key} from iteration {i}...")
+            if key not in metrics_by_key:
+                metrics_by_key[key] = []
+            metrics_by_key[key].append(pd.read_parquet(str(metrics_uri.uris[key])))
+    context.log("Combining metrics from all iterations...")
+    combined_metrics = {
+        key: pd.concat(metrics, axis=0) for key, metrics in metrics_by_key.items()
+    }
+    combined_metrics_uris = {
+        key: spec.format_metrics_output_uri(key) for key in combined_metrics
+    }
+    context.log("Saving combined metrics to s3...")
+    for key, metrics in combined_metrics.items():
+        context.log(f"\tSaving metrics for key {key} to s3...")
+        metrics.to_parquet(str(combined_metrics_uris[key]))
+    context.log("Final metrics saved to s3.")
+
+    # Get the simulation data outputs from all steps and this step
+    combine_results_output = context.task_output(combine_results)
 
-if __name__ == "__main__":
-    from pydantic import HttpUrl
-    from scythe.settings import ScytheStorageSettings
+    # Get the experiment ids from all steps and this step
+    experiment_ids = [*spec.previous_experiment_ids, spec.experiment_id]
 
-    from globi.models.surrogate.configs.pipeline import (
-        ConvergenceThresholds,
-        ConvergenceThresholdsByTarget,
-        IterationSpec,
-        StratificationSpec,
-    )
+    # TODO: save final models, or return them a little more directly?
 
-    base_run_name = "test-experiment"
-    progressive_training_spec = ProgressiveTrainingSpec(
-        runnable=dummy_simulation,
-        sort_index=0,
-        experiment_id="placeholder",
-        gis_uri=HttpUrl("https://example.com/gis.parquet"),
-        stratification=StratificationSpec(
-            field="weather_file",
-            sampling="equal",
-            aliases=["feature.weather.file"],
-        ),
-        iteration=IterationSpec(
-            max_iters=3,
-        ),
-        convergence_criteria=ConvergenceThresholdsByTarget(
-            thresholds={
-                "*": ConvergenceThresholds(r2=0.975),
-            },
-        ),
-        storage_settings=ScytheStorageSettings(),
-        data_uris=None,
-        base_run_name=base_run_name,
-    )
-
-    exp = BaseExperiment(
-        runnable=iterative_training,
-        run_name="test-experiment",
-    )
-
-    run, ref = exp.allocate(
-        progressive_training_spec,
-        version="bumpmajor",
-        recursion_map=RecursionMap(
-            factor=2,
-            max_depth=0,
-        ),
+    result = FinalizeResult(
+        reasoning=transition.reasoning,
+        data_uris=combine_results_output.combined.uris,
+        metrics_uris=combined_metrics_uris,
+        experiment_ids=experiment_ids,
     )
-    import yaml
 
-    print(yaml.dump(run.model_dump(mode="json"), indent=2, sort_keys=False))
+    s3_client = boto3.client("s3")
+    summary_manifest_uri = spec.format_summary_manifest_key()
+    with tempfile.TemporaryDirectory() as temp_dir:
+        temp_path = Path(temp_dir) / "summary.yml"
+        with open(temp_path, "w") as f:
+            yaml.dump(result.model_dump(mode="json"), f, indent=2, sort_keys=False)
+        if spec.storage_settings is None:
+            msg = (
+                "Storage settings are not set, so we can't upload the summary manifest."
+            )
+            raise ValueError(msg)
+        s3_client.upload_file(
+            temp_path.as_posix(), spec.storage_settings.BUCKET, summary_manifest_uri
+        )
+    return result
+
+
+# if __name__ == "__main__":
+#     import yaml
+#     from pydantic import HttpUrl
+#     from scythe.settings import ScytheStorageSettings
+
+#     from globi.models.surrogate.configs.pipeline import (
+#         ConvergenceThresholds,
+#         ConvergenceThresholdsByTarget,
+#         IterationSpec,
+#         StratificationSpec,
+#     )
+#     from globi.models.surrogate.dummy import dummy_simulation
+
+#     base_run_name = "test-experiment"
+#     progressive_training_spec = ProgressiveTrainingSpec(
+#         runnable=dummy_simulation,
+#         sort_index=0,
+#         experiment_id="placeholder",
+#         gis_uri=HttpUrl("https://example.com/gis.parquet"),
+#         stratification=StratificationSpec(
+#             field="weather_file",
+#             sampling="equal",
+#             aliases=["feature.weather.file"],
+#         ),
+#         iteration=IterationSpec(
+#             max_iters=3,
+#         ),
+#         convergence_criteria=ConvergenceThresholdsByTarget(
+#             thresholds={
+#                 "*": ConvergenceThresholds(r2=0.975),
+#             },
+#         ),
+#         storage_settings=ScytheStorageSettings(),
+#         base_run_name=base_run_name,
+#     )
+#     with open("inputs/training.yml", "w") as f:
+#         yaml.dump(
+#             progressive_training_spec.model_dump(mode="json"),
+#             f,
+#             indent=2,
+#             sort_keys=False,
+#         )
+
+# exp = BaseExperiment(
+#     runnable=iterative_training,
+#     run_name="test-experiment",
+# )
+
+# run, ref = exp.allocate(
+#     progressive_training_spec,
+#     version="bumpmajor",
+#     recursion_map=RecursionMap(
+#         factor=2,
+#         max_depth=0,
+#     ),
+# )
+
+# print(yaml.dump(run.model_dump(mode="json"), indent=2, sort_keys=False))
diff --git a/src/globi/tools/cli/main.py b/src/globi/tools/cli/main.py
index eb35b05..cb11098 100644
--- a/src/globi/tools/cli/main.py
+++ b/src/globi/tools/cli/main.py
@@ -7,6 +7,7 @@
 import boto3
 import click
 import yaml
+from scythe.experiments import BaseExperiment
 
 if TYPE_CHECKING:
     from mypy_boto3_s3 import S3Client
@@ -102,6 +103,31 @@ def manifest(
         )
 
 
+@submit.command()
+@click.option(
+    "--path",
+    type=click.Path(exists=True),
+    help="The path to the manifest file which will be used to configure the experiment.",
+    prompt="Manifest file path (.yml)",
+)
+def surrogate(path):
+    """Submit a GloBI surrogate experiment."""
+    from globi.models.surrogate.configs.pipeline import ProgressiveTrainingSpec
+    from globi.pipelines.training import iterative_training
+
+    with open(path) as f:
+        manifest = yaml.safe_load(f)
+
+    config = ProgressiveTrainingSpec.model_validate(manifest)
+
+    exp = BaseExperiment(runnable=iterative_training, run_name=config.base_run_name)
+    run, _ref = exp.allocate(
+        config,
+        version="bumpmajor",
+    )
+    print(yaml.dump(run.model_dump(mode="json"), indent=2, sort_keys=False))
+
+
 @cli.command()
 @click.option(
     "--config",
diff --git a/uv.lock b/uv.lock
index 5c39dd3..8b45dad 100644
--- a/uv.lock
+++ b/uv.lock
@@ -4591,7 +4591,7 @@ wheels = [
 [[package]]
 name = "scythe-engine"
 version = "0.1.2"
-source = { git = "https://github.com/szvsw/scythe?branch=feature%2Fallow-versioning-workflows#2976bb3da4cec82784057b673e55d5c5cdda469f" }
+source = { git = "https://github.com/szvsw/scythe?branch=feature%2Fallow-versioning-workflows#0bc501d15c20ab23b2379b690756fd3ff3267054" }
 dependencies = [
     { name = "boto3" },
     { name = "fastparquet" },

From 6603b70f848d0fc07cf2dd7d71952a3587119f2b Mon Sep 17 00:00:00 2001
From: Sam Wolk <36545842+szvsw@users.noreply.github.com>
Date: Mon, 9 Mar 2026 11:34:28 -0400
Subject: [PATCH 13/27] add non linear sample size scaling

---
 src/globi/models/surrogate/configs/pipeline.py | 16 +++++++++++-----
 src/globi/models/surrogate/sampling.py         |  6 +-----
 2 files changed, 12 insertions(+), 10 deletions(-)

diff --git a/src/globi/models/surrogate/configs/pipeline.py b/src/globi/models/surrogate/configs/pipeline.py
index 7f3be86..6707b1f 100644
--- a/src/globi/models/surrogate/configs/pipeline.py
+++ b/src/globi/models/surrogate/configs/pipeline.py
@@ -20,14 +20,13 @@
 class IterationSpec(BaseModel):
     """The iteration spec."""
 
-    n_init: int = Field(default=10000, description="The number of initial samples.")
+    n_per_iter: int | list[int] = Field(
+        default=10_000,
+        description="The number of samples to generate per generation. If the current iteration exceeds the length of the list, the last element will be used.",
+    )
     min_per_stratum: int = Field(
         default=100, description="The minimum number of samples per stratum."
     )
-    n_per_iter: int = Field(
-        default=10000,
-        description="The number of samples to add per each iteration of the outer loop.",
-    )
     max_iters: int = Field(
         default=100,
         description="The maximum number of outer loop iterations to perform.",
@@ -46,6 +45,13 @@ def at_max_iters(self) -> bool:
         """Whether the current iteration is the maximum number of iterations."""
         return self.current_iter + 1 >= self.max_iters
 
+    @property
+    def n_per_gen_for_current_iter(self) -> int:
+        """The number of samples to generate for the current iteration."""
+        if isinstance(self.n_per_iter, int):
+            return self.n_per_iter
+        return self.n_per_iter[min(self.current_iter, len(self.n_per_iter) - 1)]
+
 
 class StratificationSpec(BaseModel):
     """A spec for stratifying the data."""
diff --git a/src/globi/models/surrogate/sampling.py b/src/globi/models/surrogate/sampling.py
index b4bb1d8..fff8068 100644
--- a/src/globi/models/surrogate/sampling.py
+++ b/src/globi/models/surrogate/sampling.py
@@ -62,11 +62,7 @@ def sample_equally_by_stratum(
         stratum_dfs = {
             stratum: df[df[stratification_field] == stratum] for stratum in strata
         }
-        n_per_iter = (
-            self.parent.iteration.n_per_iter
-            if self.parent.iteration.current_iter != 0
-            else self.parent.iteration.n_init
-        )
+        n_per_iter = self.parent.iteration.n_per_gen_for_current_iter
         n_per_stratum = max(
             n_per_iter // len(strata),
             (

From b3ebb8fe63155512d6cac4a79cac278b67fbc87e Mon Sep 17 00:00:00 2001
From: Sam Wolk <36545842+szvsw@users.noreply.github.com>
Date: Mon, 9 Mar 2026 15:06:01 -0400
Subject: [PATCH 14/27] enabling sampling specification

---
 .../models/surrogate/configs/pipeline.py      |  31 +-
 src/globi/models/surrogate/dummy.py           | 318 +++++++-
 src/globi/models/surrogate/samplers.py        | 720 ++++++++++++++++++
 src/globi/models/surrogate/sampling.py        | 141 +---
 src/globi/pipelines/training.py               |  27 +-
 5 files changed, 1076 insertions(+), 161 deletions(-)
 create mode 100644 src/globi/models/surrogate/samplers.py

diff --git a/src/globi/models/surrogate/configs/pipeline.py b/src/globi/models/surrogate/configs/pipeline.py
index 6707b1f..3b2ed96 100644
--- a/src/globi/models/surrogate/configs/pipeline.py
+++ b/src/globi/models/surrogate/configs/pipeline.py
@@ -12,9 +12,10 @@
 from scythe.base import ExperimentInputSpec
 from scythe.experiments import SerializableRunnable
 from scythe.scatter_gather import RecursionMap, ScatterGatherResult
-from scythe.utils.filesys import FileReference, S3Url
+from scythe.utils.filesys import OptionalFileReference, S3Url
 
 from globi.models.surrogate.configs.regression import ModelHPType, XGBHyperparameters
+from globi.models.surrogate.samplers import Priors
 
 
 class IterationSpec(BaseModel):
@@ -254,6 +255,10 @@ class ProgressiveTrainingSpec(ExperimentInputSpec, SerializableRunnable):
         default_factory=StratificationSpec,
         description="The stratification spec.",
     )
+    samplers: Priors = Field(
+        ...,
+        description="The sampling spec.",
+    )
     cross_val: CrossValidationSpec = Field(
         default_factory=CrossValidationSpec,
         description="The cross validation spec.",
@@ -262,8 +267,8 @@ class ProgressiveTrainingSpec(ExperimentInputSpec, SerializableRunnable):
         default_factory=IterationSpec,
         description="The iteration spec.",
     )
-    gis_uri: FileReference = Field(
-        ...,
+    context: OptionalFileReference = Field(
+        default=None,
         description="The uri of the gis data to train on.",
     )
     data_uris: ScatterGatherResult | None = Field(
@@ -318,17 +323,25 @@ def format_summary_manifest_uri(self) -> S3Url:
             f"s3://{self.storage_settings.BUCKET}/{self.format_summary_manifest_key()}"
         )
 
+    def subrun_name(self, subrun: Literal["sample", "train"]) -> str:
+        """Format the run name for a subrun."""
+        return f"{self.experiment_id}/{subrun}"
+
     @property
-    def gis_path(self) -> Path:
+    def context_path(self) -> Path | None:
         """The path to the gis data."""
-        if isinstance(self.gis_uri, Path):
-            return self.gis_uri
-        return self.fetch_uri(self.gis_uri)
+        if self.context is None:
+            return None
+        if isinstance(self.context, Path):
+            return self.context
+        return self.fetch_uri(self.context)
 
     @cached_property
-    def gis_data(self) -> pd.DataFrame:
+    def context_data(self) -> pd.DataFrame | None:
         """Load the gis data."""
-        return pd.read_parquet(self.gis_path)
+        if self.context_path is None:
+            return None
+        return pd.read_parquet(self.context_path)
 
 
 class StageSpec(BaseModel):
diff --git a/src/globi/models/surrogate/dummy.py b/src/globi/models/surrogate/dummy.py
index ee17a81..bd08fda 100644
--- a/src/globi/models/surrogate/dummy.py
+++ b/src/globi/models/surrogate/dummy.py
@@ -1,27 +1,59 @@
 """Dummy simulation for testing."""
 
 import math
+from dataclasses import dataclass
 from pathlib import Path
-from typing import Literal
+from typing import Any, Literal, get_args
 
+import numpy as np
 import pandas as pd
 from scythe.base import ExperimentInputSpec, ExperimentOutputSpec
 from scythe.registry import ExperimentRegistry
 
+StratificationOption = Literal["some", "other", "option", "another"]
+
 
 class DummySimulationInput(ExperimentInputSpec):
     """The input for the dummy simulation."""
 
-    weather_file: Literal["some", "other"]
-    a: int
-    b: float
-    c: int
+    x0: float
+    x1: float
+    x2: float
+    x3: float
+    stratification_field: StratificationOption
+
+    @property
+    def encoded_stratification_field(self) -> float:
+        """Encode the stratification field as an integer."""
+        return get_args(StratificationOption).index(self.stratification_field) / (
+            len(get_args(StratificationOption))
+            - (1 if len(get_args(StratificationOption)) > 1 else 0)
+        )
+
+    @property
+    def values(self) -> list[float]:
+        """Get the values of the input spec."""
+        vals = self.model_dump(
+            exclude={
+                "stratification_field",
+                "experiment_id",
+                "sort_index",
+                "workflow_run_id",
+                "root_workflow_run_id",
+            }
+        )
+        x_vals = {k: v for k, v in vals.items() if k.startswith("x")}
+        return [*x_vals.values(), self.encoded_stratification_field]
+
+    def n_inputs(self) -> int:
+        """Get the number of inputs."""
+        return len(self.values)
 
 
 class DummySimulationOutput(ExperimentOutputSpec):
     """The output for the dummy simulation."""
 
-    c: float
+    y0: float
 
 
 @ExperimentRegistry.Register(
@@ -31,31 +63,253 @@ def dummy_simulation(
     input_spec: DummySimulationInput, tempdir: Path
 ) -> DummySimulationOutput:
     """A dummy simulation."""
-    df = pd.DataFrame({
-        "target_0": [
-            (input_spec.a + input_spec.b)
-            if input_spec.weather_file == "some"
-            else (input_spec.a - input_spec.b)
-        ],
-        "target_1": [
-            (input_spec.a - input_spec.b)
-            if input_spec.weather_file == "some"
-            else (input_spec.a + input_spec.b)
-        ],
-        "target_2": [
-            (input_spec.a * input_spec.b * input_spec.c)
-            if input_spec.weather_file == "some"
-            else (input_spec.a * input_spec.b / input_spec.c)
-        ],
-        "target_3": [
-            (input_spec.a / math.sin(input_spec.b))
-            if input_spec.weather_file == "some"
-            else (input_spec.a / math.cos(input_spec.b))
-        ],
-    })
-    df_neg = -df
-    df = pd.concat([df, df_neg], axis=1, keys=["positive", "negative"], names=["sign"])
-    df = df.set_index(input_spec.make_multiindex())
+    n_inputs = input_spec.n_inputs()
+    n_outputs = 5
+    problem = SyntheticMultiOutputProblem(
+        n_inputs,
+        SyntheticProblemConfig(
+            n_outputs=5,
+            n_latents=8,
+            difficulty="medium",
+            noise_std=0.0,
+            normalize_outputs=True,
+        ),
+        input_spec.sort_index,
+    )
+    y = problem.evaluate(np.array(input_spec.values))
+
+    main_result = pd.DataFrame({f"y{i}": [y[i]] for i in range(1, n_outputs)})
+    main_result = main_result.set_index(input_spec.make_multiindex())
+    main_result_neg = -main_result
+    main_result = pd.concat(
+        [main_result, main_result_neg],
+        axis=1,
+        keys=["positive", "negative"],
+        names=["sign"],
+    )
     return DummySimulationOutput(
-        c=input_spec.a + input_spec.b, dataframes={"main_result": df}
+        y0=y[0],
+        dataframes={"main_result": main_result},
     )
+
+
+@dataclass(frozen=True)
+class SyntheticProblemConfig:
+    """Configuration for a synthetic multi-output regression problem."""
+
+    n_outputs: int = 8
+    n_latents: int = 4
+    difficulty: Literal["easy", "medium"] = "easy"
+    noise_std: float = 0.0
+    normalize_outputs: bool = True
+
+
+class SyntheticMultiOutputProblem:
+    """Deterministic synthetic multi-output function family.
+
+    Inputs:
+        x in [0, 1]^d
+
+    Outputs:
+        y in R^m
+
+    Design goals:
+    - cheap to evaluate
+    - arbitrary input dimension
+    - arbitrary output count
+    - some outputs share latent structure
+    - some outputs contain mild independent residuals
+    - difficulty is tunable but never absurd
+    """
+
+    def __init__(self, n_inputs: int, config: SyntheticProblemConfig, seed: int):
+        """Initialize the synthetic multi-output problem."""
+        if n_inputs < 1:
+            msg = "n_inputs must be >= 1"
+            raise ValueError(msg)
+        if config.n_outputs < 1:
+            msg = "n_outputs must be >= 1"
+            raise ValueError(msg)
+        if config.n_latents < 1:
+            msg = "n_latents must be >= 1"
+            raise ValueError(msg)
+
+        self.n_inputs = n_inputs
+        self.config = config
+        self.rng = np.random.default_rng(seed)
+        self.seed = seed
+
+        self.active_dims_per_latent = (
+            min(5, n_inputs) if config.difficulty == "easy" else min(8, n_inputs)
+        )
+        self.freq_max = 2 if config.difficulty == "easy" else 4
+        self.residual_scale = 0.05 if config.difficulty == "easy" else 0.12
+
+        # Shared latent parameters
+        self.latent_defs = [
+            self._make_latent_definition(k) for k in range(config.n_latents)
+        ]
+
+        # Output mixing weights: this is what creates output dependency
+        self.mix_weights = self.rng.normal(
+            loc=0.0,
+            scale=1.0 / math.sqrt(config.n_latents),
+            size=(config.n_outputs, config.n_latents),
+        )
+
+        # Small output-specific residual definitions
+        self.residual_defs = [
+            self._make_residual_definition(j) for j in range(config.n_outputs)
+        ]
+
+        # Optional approximate normalization constants computed deterministically
+        self.output_shift = np.zeros(config.n_outputs, dtype=float)
+        self.output_scale = np.ones(config.n_outputs, dtype=float)
+        if config.normalize_outputs:
+            self._fit_normalization()
+
+    def evaluate(self, x: np.ndarray) -> np.ndarray:
+        """Evaluate all outputs at one input vector x."""
+        x = np.asarray(x, dtype=float)
+        if x.shape != (self.n_inputs,):
+            msg = f"Expected x shape {(self.n_inputs,)}, got {x.shape}"
+            raise ValueError(msg)
+
+        # Clamp defensively; upstream encoder should already map into [0, 1]
+        x = np.clip(x, 0.0, 1.0)
+
+        z = np.array([self._eval_latent(x, ld) for ld in self.latent_defs], dtype=float)
+        y = self.mix_weights @ z
+
+        # Add small output-specific residuals so not everything is perfectly low-rank
+        residual = np.array(
+            [self._eval_residual(x, rd) for rd in self.residual_defs], dtype=float
+        )
+        y = y + residual
+
+        if self.config.noise_std > 0:
+            # deterministic if seed fixed and call order fixed; default is off for stable tests
+            y = y + self.rng.normal(
+                0.0, self.config.noise_std, size=self.config.n_outputs
+            )
+
+        y = (y - self.output_shift) / self.output_scale
+        return y
+
+    def _make_latent_definition(self, k: int) -> dict[str, Any]:
+        """Create one latent function definition."""
+        latent_type = k % 4
+        dims = self.rng.choice(
+            self.n_inputs, size=self.active_dims_per_latent, replace=False
+        )
+
+        if latent_type == 0:
+            # additive sinusoid
+            return {
+                "type": "additive_sin",
+                "dims": dims,
+                "amp": self.rng.uniform(0.4, 1.2, size=len(dims)),
+                "freq": self.rng.integers(1, self.freq_max + 1, size=len(dims)),
+                "phase": self.rng.uniform(0.0, 2 * math.pi, size=len(dims)),
+            }
+
+        if latent_type == 1:
+            # smooth quadratic bowl-ish feature
+            return {
+                "type": "quadratic",
+                "dims": dims,
+                "weight": self.rng.uniform(0.5, 1.5, size=len(dims)),
+                "center": self.rng.uniform(0.2, 0.8, size=len(dims)),
+            }
+
+        if latent_type == 2:
+            # pairwise interaction latent
+            pair_count = max(1, len(dims) // 2)
+            pair_dims = dims[: 2 * pair_count].reshape(pair_count, 2)
+            return {
+                "type": "pairwise_sin",
+                "pairs": pair_dims,
+                "weight": self.rng.uniform(0.4, 1.0, size=pair_count),
+            }
+
+        # Friedman-like latent, adapted to arbitrary dimension by cycling
+        d0 = dims[0 % len(dims)]
+        d1 = dims[1 % len(dims)]
+        d2 = dims[2 % len(dims)]
+        d3 = dims[3 % len(dims)]
+        d4 = dims[4 % len(dims)]
+        return {
+            "type": "friedman_like",
+            "dims": np.array([d0, d1, d2, d3, d4], dtype=int),
+        }
+
+    def _make_residual_definition(self, j: int) -> dict[str, Any]:
+        """Create a small output-specific residual."""
+        dims = self.rng.choice(self.n_inputs, size=min(3, self.n_inputs), replace=False)
+        return {
+            "dims": dims,
+            "amp": self.rng.uniform(0.2, 0.8, size=len(dims)) * self.residual_scale,
+            "freq": self.rng.integers(1, self.freq_max + 1, size=len(dims)),
+            "phase": self.rng.uniform(0.0, 2 * math.pi, size=len(dims)),
+        }
+
+    def _eval_latent(self, x: np.ndarray, ld: dict[str, Any]) -> float:
+        t = ld["type"]
+
+        if t == "additive_sin":
+            dims = ld["dims"]
+            return float(
+                np.sum(
+                    ld["amp"] * np.sin(2 * math.pi * ld["freq"] * x[dims] + ld["phase"])
+                )
+            )
+
+        if t == "quadratic":
+            dims = ld["dims"]
+            xc = x[dims] - ld["center"]
+            return float(np.sum(ld["weight"] * xc * xc))
+
+        if t == "pairwise_sin":
+            total = 0.0
+            for w, (i, j) in zip(ld["weight"], ld["pairs"], strict=True):
+                total += float(w * math.sin(math.pi * x[i] * x[j]))
+            return total
+
+        if t == "friedman_like":
+            i0, i1, i2, i3, i4 = ld["dims"]
+            return float(
+                10.0 * math.sin(math.pi * x[i0] * x[i1])
+                + 20.0 * (x[i2] - 0.5) ** 2
+                + 10.0 * x[i3]
+                + 5.0 * x[i4]
+            )
+
+        msg = f"Unknown latent type: {t}"
+        raise ValueError(msg)
+
+    def _eval_residual(self, x: np.ndarray, rd: dict[str, Any]) -> float:
+        dims = rd["dims"]
+        return float(
+            np.sum(rd["amp"] * np.sin(2 * math.pi * rd["freq"] * x[dims] + rd["phase"]))
+        )
+
+    def _fit_normalization(self) -> None:
+        """Approximate output mean/std over a fixed reference design."""
+        ref_rng = np.random.default_rng(self.seed + 1_000_000)
+        n_ref = 2048 if self.config.difficulty == "easy" else 4096
+        X = ref_rng.uniform(0.0, 1.0, size=(n_ref, self.n_inputs))
+
+        Y = np.zeros((n_ref, self.config.n_outputs), dtype=float)
+        for i in range(n_ref):
+            z = np.array(
+                [self._eval_latent(X[i], ld) for ld in self.latent_defs], dtype=float
+            )
+            residual = np.array(
+                [self._eval_residual(X[i], rd) for rd in self.residual_defs],
+                dtype=float,
+            )
+            Y[i] = self.mix_weights @ z + residual
+
+        self.output_shift = Y.mean(axis=0)
+        self.output_scale = Y.std(axis=0)
+        self.output_scale[self.output_scale < 1e-8] = 1.0
diff --git a/src/globi/models/surrogate/samplers.py b/src/globi/models/surrogate/samplers.py
new file mode 100644
index 0000000..32acb4c
--- /dev/null
+++ b/src/globi/models/surrogate/samplers.py
@@ -0,0 +1,720 @@
+"""Conditional Priors and Samplers.
+
+Ported from epengine/models/sampling.py with enhancements:
+- Fixed NaN comparison bug in ConditionalPrior
+- Added MultiColumnConditionalPrior for multi-column conditioning
+  without requiring ConcatenateFeaturesSampler intermediate columns
+"""
+
+from abc import ABC, abstractmethod
+from typing import Literal, cast
+
+import networkx as nx
+import numpy as np
+import pandas as pd
+from pydantic import BaseModel, model_validator
+
+# TODO: Make sure that all of the samplers can be serialized and deserialized with proper discrimination, i.e. that they do not share identical field names.
+
+
+class SamplingError(Exception):
+    """A sampling error."""
+
+    pass
+
+
+class Sampler(ABC):
+    """A sampler."""
+
+    @abstractmethod
+    def sample(
+        self, context: pd.DataFrame, n: int, generator: np.random.Generator
+    ) -> np.ndarray:
+        """Sample features from a prior, which may depend on a context."""
+        pass
+
+    @property
+    @abstractmethod
+    def depends_on(self) -> set[str]:
+        """The features that this sampler depends on."""
+        pass
+
+
+class UniformSampler(BaseModel, Sampler):
+    """A uniform sampler which generates values uniformly between a min and max value."""
+
+    min: float
+    max: float
+    round: Literal["ceil", "floor", "nearest"] | None = None
+
+    def sample(
+        self, context: pd.DataFrame, n: int, generator: np.random.Generator
+    ) -> np.ndarray:
+        """Sample uniformly between a min and max value."""
+        samples = generator.uniform(self.min, self.max, size=n)
+        if self.round == "ceil":
+            samples = np.ceil(samples)
+        elif self.round == "floor":
+            samples = np.floor(samples)
+        elif self.round == "nearest":
+            samples = np.round(samples)
+        return samples
+
+    @property
+    def depends_on(self) -> set[str]:
+        """The features that this sampler depends on."""
+        return set()
+
+
+class ClippedNormalSampler(BaseModel, Sampler):
+    """A clipped normal sampler which generates values from a normal distribution, clipped to a min and max value."""
+
+    mean: float
+    std: float
+    clip_min: float | None
+    clip_max: float | None
+
+    def sample(
+        self, context: pd.DataFrame, n: int, generator: np.random.Generator
+    ) -> np.ndarray:
+        """Sample from a normal distribution, clipped to a min and max value."""
+        clip_min = self.clip_min if self.clip_min is not None else -np.inf
+        clip_max = self.clip_max if self.clip_max is not None else np.inf
+        samples = generator.normal(self.mean, self.std, size=n).clip(clip_min, clip_max)
+        return samples
+
+    @property
+    def depends_on(self) -> set[str]:
+        """The features that this sampler depends on."""
+        return set()
+
+
+class FixedValueSampler(BaseModel):
+    """A fixed value sampler which generates a fixed value for all samples."""
+
+    value: float | str | int | bool
+
+    def sample(
+        self, context: pd.DataFrame, n: int, generator: np.random.Generator
+    ) -> np.ndarray:
+        """Sample a fixed value."""
+        return np.full(n, self.value)
+
+    @property
+    def depends_on(self) -> set[str]:
+        """The features that this sampler depends on."""
+        return set()
+
+
+class CategoricalSampler(BaseModel):
+    """A categorical sampler which generates values from a categorical distribution."""
+
+    values: list[str] | list[float] | list[int]
+    weights: list[float]
+
+    def sample(
+        self, context: pd.DataFrame, n: int, generator: np.random.Generator
+    ) -> np.ndarray:
+        """Sample from a categorical distribution."""
+        return generator.choice(self.values, size=n, p=self.weights)
+
+    @property
+    def depends_on(self) -> set[str]:
+        """The features that this sampler depends on."""
+        return set()
+
+    @model_validator(mode="after")
+    def check_values_and_weights(self):
+        """Check that the values and weights are the same length and normalized."""
+        if len(self.values) != len(self.weights):
+            msg = "values and weights must be the same length"
+            raise ValueError(msg)
+        if not np.isclose(sum(self.weights), 1):
+            self.weights = [w / sum(self.weights) for w in self.weights]
+        return self
+
+
+class CopySampler(BaseModel):
+    """A deterministic sampler which generates a copy of a feature in the provided context dataframe."""
+
+    feature_to_copy: str
+
+    def sample(
+        self, context: pd.DataFrame, n: int, generator: np.random.Generator
+    ) -> np.ndarray:
+        """Compute a copy of a feature."""
+        if self.feature_to_copy not in context.columns:
+            msg = f"Feature to copy {self.feature_to_copy} not found in context dataframe."
+            raise SamplingError(msg)
+        if len(context) != n:
+            msg = (
+                f"Context dataframe must have {n} rows, but it has {len(context)} rows."
+            )
+            raise SamplingError(msg)
+        return context[self.feature_to_copy].to_numpy()
+
+    @property
+    def depends_on(self) -> set[str]:
+        """The features that this sampler depends on."""
+        return {self.feature_to_copy}
+
+
+class AddValueSampler(BaseModel):
+    """A deterministic sampler which adds a value to a feature."""
+
+    feature_to_add_to: str
+    value_to_add: float
+
+    def sample(
+        self, context: pd.DataFrame, n: int, generator: np.random.Generator
+    ) -> np.ndarray:
+        """Compute a sum of a feature and a value."""
+        if self.feature_to_add_to not in context.columns:
+            msg = f"Feature to add to {self.feature_to_add_to} not found in context dataframe."
+            raise SamplingError(msg)
+        return context[self.feature_to_add_to].to_numpy() + self.value_to_add
+
+    @property
+    def depends_on(self) -> set[str]:
+        """The features that this sampler depends on."""
+        return {self.feature_to_add_to}
+
+
+class SumValuesSampler(BaseModel):
+    """A deterministic sampler which generates a sum of features."""
+
+    features_to_sum: list[str]
+
+    def sample(
+        self, context: pd.DataFrame, n: int, generator: np.random.Generator
+    ) -> np.ndarray:
+        """Compute a sum of features."""
+        if not all(f in context.columns for f in self.features_to_sum):
+            msg = f"All features to sum {self.features_to_sum} must be found in context dataframe."
+            raise SamplingError(msg)
+        if len(context) != n:
+            msg = (
+                f"Context dataframe must have {n} rows, but it has {len(context)} rows."
+            )
+            raise SamplingError(msg)
+        return np.sum(context[self.features_to_sum].to_numpy(), axis=1)
+
+    @property
+    def depends_on(self) -> set[str]:
+        """The features that this sampler depends on."""
+        return set(self.features_to_sum)
+
+
+class MultiplyValueSampler(BaseModel):
+    """A deterministic sampler which generates a product of a feature and a value."""
+
+    feature_to_multiply: str
+    value_to_multiply: float
+
+    def sample(
+        self, context: pd.DataFrame, n: int, generator: np.random.Generator
+    ) -> np.ndarray:
+        """Compute a multiply of a feature."""
+        if self.feature_to_multiply not in context.columns:
+            msg = f"Feature to multiply {self.feature_to_multiply} not found in context dataframe."
+            raise SamplingError(msg)
+        if len(context) != n:
+            msg = (
+                f"Context dataframe must have {n} rows, but it has {len(context)} rows."
+            )
+            raise SamplingError(msg)
+        return context[self.feature_to_multiply].to_numpy() * self.value_to_multiply
+
+    @property
+    def depends_on(self) -> set[str]:
+        """The features that this sampler depends on."""
+        return {self.feature_to_multiply}
+
+
+class ProductValuesSampler(BaseModel):
+    """A deterministic sampler which generates a product of features."""
+
+    features_to_multiply: list[str]
+
+    def sample(
+        self, context: pd.DataFrame, n: int, generator: np.random.Generator
+    ) -> np.ndarray:
+        """Compute a product of features."""
+        if not all(f in context.columns for f in self.features_to_multiply):
+            msg = f"All features to multiply {self.features_to_multiply} must be found in context dataframe."
+            raise SamplingError(msg)
+        if len(context) != n:
+            msg = (
+                f"Context dataframe must have {n} rows, but it has {len(context)} rows."
+            )
+            raise SamplingError(msg)
+        return np.prod(context[self.features_to_multiply].to_numpy(), axis=1)
+
+    @property
+    def depends_on(self) -> set[str]:
+        """The features that this sampler depends on."""
+        return set(self.features_to_multiply)
+
+
+class InvertSampler(BaseModel):
+    """A deterministic sampler which generates the multiplicative inverse of a feature."""
+
+    feature_to_invert: str
+
+    def sample(
+        self, context: pd.DataFrame, n: int, generator: np.random.Generator
+    ) -> np.ndarray:
+        """Compute an invert of a feature."""
+        if self.feature_to_invert not in context.columns:
+            msg = f"Feature to invert {self.feature_to_invert} not found in context dataframe."
+            raise SamplingError(msg)
+        if len(context) != n:
+            msg = (
+                f"Context dataframe must have {n} rows, but it has {len(context)} rows."
+            )
+            raise SamplingError(msg)
+        return 1 / context[self.feature_to_invert].to_numpy()
+
+    @property
+    def depends_on(self) -> set[str]:
+        """The features that this sampler depends on."""
+        return {self.feature_to_invert}
+
+
+class PowerSampler(BaseModel):
+    """A deterministic sampler which generates a power of a feature."""
+
+    feature_to_power: str
+    power: float
+
+    def sample(
+        self, context: pd.DataFrame, n: int, generator: np.random.Generator
+    ) -> np.ndarray:
+        """Compute a power of a feature."""
+        return context[self.feature_to_power].to_numpy() ** self.power
+
+    @property
+    def depends_on(self) -> set[str]:
+        """The features that this sampler depends on."""
+        return {self.feature_to_power}
+
+
+class LogSampler(BaseModel):
+    """A deterministic sampler which generates a log of a feature."""
+
+    feature_to_log: str
+    base: float = np.e
+
+    def sample(
+        self, context: pd.DataFrame, n: int, generator: np.random.Generator
+    ) -> np.ndarray:
+        """Compute a log of a feature."""
+        if self.feature_to_log not in context.columns:
+            msg = (
+                f"Feature to log {self.feature_to_log} not found in context dataframe."
+            )
+            raise SamplingError(msg)
+        if len(context) != n:
+            msg = (
+                f"Context dataframe must have {n} rows, but it has {len(context)} rows."
+            )
+            raise SamplingError(msg)
+        return np.log(context[self.feature_to_log].to_numpy()) / np.log(self.base)
+
+    @property
+    def depends_on(self) -> set[str]:
+        """The features that this sampler depends on."""
+        return {self.feature_to_log}
+
+
+class RoundSampler(BaseModel):
+    """A deterministic sampler which applies ceil, floor, or nearest to a feature."""
+
+    feature_to_round: str
+    operation: Literal["ceil", "floor", "nearest"]
+
+    def sample(
+        self, context: pd.DataFrame, n: int, generator: np.random.Generator
+    ) -> np.ndarray:
+        """Apply ceil, floor, or nearest to a feature."""
+        if self.feature_to_round not in context.columns:
+            msg = f"Feature to round {self.feature_to_round} not found in context dataframe."
+            raise SamplingError(msg)
+        if len(context) != n:
+            msg = (
+                f"Context dataframe must have {n} rows, but it has {len(context)} rows."
+            )
+            raise SamplingError(msg)
+        values = context[self.feature_to_round].to_numpy()
+        if self.operation == "ceil":
+            return np.ceil(values)
+        if self.operation == "floor":
+            return np.floor(values)
+        return np.round(values)
+
+    @property
+    def depends_on(self) -> set[str]:
+        """The features that this sampler depends on."""
+        return {self.feature_to_round}
+
+
+class ConcatenateFeaturesSampler(BaseModel):
+    """A deterministic sampler which concatenates features.
+
+    Retained for backward compatibility. Prefer MultiColumnConditionalPrior
+    for multi-column conditioning instead of creating intermediate compound key columns.
+    """
+
+    features_to_concatenate: list[str]
+    separator: str = ":"
+
+    def sample(
+        self, context: pd.DataFrame, n: int, generator: np.random.Generator
+    ) -> np.ndarray:
+        """Compute a concatenation of features."""
+        if not all(f in context.columns for f in self.features_to_concatenate):
+            msg = f"All features to concatenate {self.features_to_concatenate} must be found in context dataframe."
+            raise SamplingError(msg)
+        if len(context) != n:
+            msg = (
+                f"Context dataframe must have {n} rows, but it has {len(context)} rows."
+            )
+            raise SamplingError(msg)
+        cols: pd.DataFrame = cast(pd.DataFrame, context[self.features_to_concatenate])
+        return cols.astype(str).agg(self.separator.join, axis=1).to_numpy()
+
+    @property
+    def depends_on(self) -> set[str]:
+        """The features that this sampler depends on."""
+        return set(self.features_to_concatenate)
+
+
+PriorSampler = (
+    UniformSampler
+    | ClippedNormalSampler
+    | FixedValueSampler
+    | CategoricalSampler
+    | CopySampler
+    | AddValueSampler
+    | SumValuesSampler
+    | MultiplyValueSampler
+    | ProductValuesSampler
+    | InvertSampler
+    | LogSampler
+    | RoundSampler
+    | ConcatenateFeaturesSampler
+    | PowerSampler
+)
+
+
+class ConditionalPriorCondition(BaseModel):
+    """A conditional prior condition."""
+
+    match_val: str | float | int | bool
+    sampler: PriorSampler
+
+    def sample(
+        self, context: pd.DataFrame, n: int, generator: np.random.Generator
+    ) -> np.ndarray:
+        """Sample from a conditional prior condition."""
+        return self.sampler.sample(context, n, generator)
+
+    @property
+    def depends_on(self) -> set[str]:
+        """The features that this sampler depends on."""
+        return self.sampler.depends_on
+
+
+class MultiColumnCondition(BaseModel):
+    """A condition that matches on multiple source features simultaneously.
+
+    Used with MultiColumnConditionalPrior to condition on combinations
+    of column values without creating intermediate compound key columns.
+    """
+
+    match_vals: tuple[str | float | int | bool, ...]
+    sampler: PriorSampler
+
+    def sample(
+        self, context: pd.DataFrame, n: int, generator: np.random.Generator
+    ) -> np.ndarray:
+        """Sample from this condition's sampler."""
+        return self.sampler.sample(context, n, generator)
+
+    @property
+    def depends_on(self) -> set[str]:
+        """The features that this sampler depends on."""
+        return self.sampler.depends_on
+
+
+class PriorABC(ABC):
+    """A prior."""
+
+    @abstractmethod
+    def sample(
+        self, context: pd.DataFrame, n: int, generator: np.random.Generator
+    ) -> np.ndarray:
+        """Sample from a prior."""
+        pass
+
+    @property
+    @abstractmethod
+    def depends_on(self) -> set[str]:
+        """The features that this sampler depends on."""
+        pass
+
+
+class ConditionalPrior(BaseModel, PriorABC):
+    """A conditional prior that selects a sampler based on a single source feature."""
+
+    source_feature: str
+    conditions: list[ConditionalPriorCondition]
+    fallback_prior: PriorSampler | None
+
+    def sample(self, context: pd.DataFrame, n: int, generator: np.random.Generator):
+        """Sample from a conditional prior."""
+        conditional_samples = {
+            c.match_val: c.sampler.sample(context, n, generator)
+            for c in self.conditions
+        }
+        test_feature = context[self.source_feature].to_numpy()
+
+        final = np.full(n, np.nan)
+
+        any_matched_mask = np.full(n, False)
+        for match_val, samples_for_match_val in conditional_samples.items():
+            mask = test_feature == match_val
+            any_matched_mask = any_matched_mask | mask
+            final = np.where(mask, samples_for_match_val, final)
+
+        if self.fallback_prior is not None:
+            mask = ~any_matched_mask
+            final = np.where(
+                mask, self.fallback_prior.sample(context, n, generator), final
+            )
+
+        if np.isnan(final).any():
+            msg = (
+                "Final array contains NaN values; possibly due to an unmatched value for "
+                f"feature {self.source_feature}."
+            )
+            raise SamplingError(msg)
+
+        return final
+
+    @property
+    def depends_on(self) -> set[str]:
+        """The features that this sampler depends on."""
+        return {self.source_feature} | {
+            dependency for c in self.conditions for dependency in c.depends_on
+        }
+
+
+class MultiColumnConditionalPrior(BaseModel, PriorABC):
+    """A conditional prior that selects a sampler based on multiple source features.
+
+    This eliminates the need for ConcatenateFeaturesSampler + compound key columns.
+    Instead of creating an intermediate concatenated column and matching on strings,
+    this prior directly matches on tuples of column values.
+
+    Example usage::
+
+        prior = MultiColumnConditionalPrior(
+            source_features=["Typology", "Age_bracket"],
+            conditions=[
+                MultiColumnCondition(
+                    match_vals=("SFH", "pre_1975"),
+                    sampler=CategoricalSampler(values=[...], weights=[...]),
+                ),
+                MultiColumnCondition(
+                    match_vals=("MFH", "post_2003"),
+                    sampler=UniformSampler(min=0.5, max=1.0),
+                ),
+            ],
+            fallback_prior=CategoricalSampler(values=[...], weights=[...]),
+        )
+    """
+
+    source_features: list[str]
+    conditions: list[MultiColumnCondition]
+    fallback_prior: PriorSampler | None
+
+    @model_validator(mode="after")
+    def validate_condition_lengths(self):
+        """Ensure all conditions have match_vals aligned with source_features."""
+        for i, c in enumerate(self.conditions):
+            if len(c.match_vals) != len(self.source_features):
+                msg = (
+                    f"Condition {i}: match_vals length {len(c.match_vals)} "
+                    f"!= source_features length {len(self.source_features)}"
+                )
+                raise ValueError(msg)
+        return self
+
+    def sample(self, context: pd.DataFrame, n: int, generator: np.random.Generator):
+        """Sample from a multi-column conditional prior."""
+        for f in self.source_features:
+            if f not in context.columns:
+                msg = f"Source feature {f} not found in context dataframe."
+                raise SamplingError(msg)
+        if len(context) != n:
+            msg = (
+                f"Context dataframe must have {n} rows, but it has {len(context)} rows."
+            )
+            raise SamplingError(msg)
+
+        row_tuples = list(
+            zip(*(context[f].to_numpy() for f in self.source_features), strict=True)
+        )
+        conditional_samples = {
+            c.match_vals: c.sampler.sample(context, n, generator)
+            for c in self.conditions
+        }
+
+        final = np.full(n, np.nan)
+        any_matched = np.full(n, False)
+
+        for match_vals, samples in conditional_samples.items():
+            mask = np.array([t == match_vals for t in row_tuples])
+            any_matched |= mask
+            final = np.where(mask, samples, final)
+
+        if self.fallback_prior is not None:
+            final = np.where(
+                ~any_matched,
+                self.fallback_prior.sample(context, n, generator),
+                final,
+            )
+
+        if np.isnan(final).any():
+            unmatched_examples = [
+                row_tuples[i] for i in range(n) if not any_matched[i]
+            ][:5]
+            msg = (
+                "Final array contains NaN values; possibly due to unmatched values for "
+                f"features {self.source_features}. Examples of unmatched tuples: {unmatched_examples}"
+            )
+            raise SamplingError(msg)
+
+        return final
+
+    @property
+    def depends_on(self) -> set[str]:
+        """The features that this sampler depends on."""
+        return set(self.source_features) | {
+            dependency for c in self.conditions for dependency in c.sampler.depends_on
+        }
+
+
+class UnconditionalPrior(BaseModel, PriorABC):
+    """An unconditional prior."""
+
+    sampler: PriorSampler
+
+    def sample(self, context: pd.DataFrame, n: int, generator: np.random.Generator):
+        """Sample from an unconditional prior."""
+        return self.sampler.sample(context, n, generator)
+
+    @property
+    def depends_on(self) -> set[str]:
+        """The features that this sampler depends on."""
+        return self.sampler.depends_on
+
+
+Prior = UnconditionalPrior | ConditionalPrior | MultiColumnConditionalPrior
+
+
+class Priors(BaseModel):
+    """A collection of priors defining a dependency graph for sampling.
+
+    The sampled_features dict must be ordered such that dependencies come before
+    dependents (i.e. topological order). Sampling iterates in dict order.
+
+    TODO: Add automatic topological sort and validation that all required
+    target model fields appear as terminal nodes in the graph.
+    """
+
+    sampled_features: dict[str, Prior]
+
+    def sample(self, context: pd.DataFrame, n: int, generator: np.random.Generator):
+        """Sample from all priors in dependency order."""
+        working_df = context.copy(deep=True)
+        # TODO: how to do we deal with race conditions here in the sense that
+        # some features may require previous features to have already been sampled?
+        # TODO: Similarly, how do we ensure that there are no cycles in the dependency graph?
+        for feature, prior in self.sampled_features.items():
+            working_df[feature] = prior.sample(working_df, n, generator)
+        if working_df.isna().any().any():  # pyright: ignore [reportAttributeAccessIssue]
+            # TODO: allow na values eg in training?
+            msg = "Working dataframe contains NaN values; possibly due to an unmatched value."
+            raise SamplingError(msg)
+        return working_df
+
+    @property
+    def depends_on(self) -> set[str]:
+        """The features that this sampler depends on."""
+        return {
+            dependency
+            for prior in self.sampled_features.values()
+            for dependency in prior.depends_on
+        }
+
+    @property
+    def dependency_graph(self) -> nx.DiGraph:
+        """Construct a dependency graph between columns in the context dataframe.
+
+        Edges connect *from* the dependency *to* the dependent feature.
+        """
+        g = nx.DiGraph()
+        for feature, prior in self.sampled_features.items():
+            if prior.depends_on:
+                for dependency in prior.depends_on:
+                    g.add_edge(dependency, feature)
+        return g
+
+    @property
+    def root_features(self) -> set[str]:
+        """The features that have no dependencies."""
+        return {
+            node
+            for node in self.dependency_graph.nodes
+            if self.dependency_graph.in_degree(node) == 0
+        }
+
+    def select_prior_tree_for_changed_features(
+        self, changed_features: set[str], resample_changed_features: bool = True
+    ) -> "Priors":
+        """Select the prior tree for the changed features.
+
+        Returns a new Priors object with only the priors that are
+        downstream of the changed features.
+
+        Args:
+            changed_features: The features that have changed.
+            resample_changed_features: Whether to resample the changed features
+                themselves (dependencies are always resampled). You probably want
+                this to be False, but for backwards compatibility it defaults to True.
+
+        Returns:
+            A new Priors object with only the downstream priors.
+        """
+        g = self.dependency_graph
+        all_changing_priors: set[str] = set()
+        for any_feature in self.root_features.union(set(self.sampled_features.keys())):
+            if any(f == any_feature for f in changed_features):
+                descendants = nx.descendants(g, any_feature)
+
+                if any_feature in self.sampled_features and resample_changed_features:
+                    all_changing_priors.add(any_feature)
+
+                for dep in descendants:
+                    if dep in self.sampled_features:
+                        all_changing_priors.add(dep)
+
+        return Priors(
+            sampled_features={
+                f: p
+                for f, p in self.sampled_features.items()
+                if f in all_changing_priors
+            }
+        )
diff --git a/src/globi/models/surrogate/sampling.py b/src/globi/models/surrogate/sampling.py
index fff8068..8ead08f 100644
--- a/src/globi/models/surrogate/sampling.py
+++ b/src/globi/models/surrogate/sampling.py
@@ -2,19 +2,29 @@
 
 from typing import cast
 
+import numpy as np
 import pandas as pd
+from pydantic import Field
+from scythe.base import ExperimentInputSpec
 
 from globi.models.surrogate.configs.pipeline import StageSpec
+from globi.models.surrogate.samplers import Priors
 
 
 class SampleSpec(StageSpec):
     """A spec for the sampling stage of the progressive training."""
 
     # TODO: add the ability to receive the last set of error metrics and use them to inform the sampling
+    priors: Priors = Field(
+        ...,
+        description="The priors to use for sampling.",
+    )
 
-    def stratified_selection(self) -> pd.DataFrame:
+    def stratified_selection(self) -> pd.DataFrame | None:
         """Sample the gis data."""
-        df = self.parent.gis_data
+        df = self.parent.context_data
+        if df is None:
+            return None
 
         stratification_field = self.parent.stratification.field
         stratification_aliases = self.parent.stratification.aliases
@@ -87,106 +97,27 @@ def sample_equally_by_stratum(
         }
         return cast(pd.DataFrame, pd.concat(sampled_strata.values()))
 
-    # def sample_semantic_fields(self, df: pd.DataFrame) -> pd.DataFrame:
-    #     """Sample the semantic fields."""
-    #     # TODO: consider randomizing the locations?
-    #     semantic_fields = self.progressive_training_spec.semantic_fields_data
-    #     for field in semantic_fields.Fields:
-    #         if isinstance(field, CategoricalFieldSpec):
-    #             options = field.Options
-    #             df[field.Name] = self.random_generator.choice(options, size=len(df))
-    #         elif isinstance(field, NumericFieldSpec):
-    #             df[field.Name] = self.random_generator.uniform(
-    #                 field.Min, field.Max, size=len(df)
-    #             )
-    #         else:
-    #             msg = f"Invalid field type: {type(field)}"
-    #             raise TypeError(msg)
-    #     return df
-
-    # def sample_basements_and_attics(self, df: pd.DataFrame) -> pd.DataFrame:
-    #     """Add basement/attics to models."""
-    #     # get the options for the type literal
-    #     options: list[BasementAtticOccupationConditioningStatus] = [
-    #         "none",
-    #         "occupied_unconditioned",
-    #         "unoccupied_unconditioned",
-    #         "occupied_conditioned",
-    #         "unoccupied_conditioned",
-    #     ]
-    #     weights = [0.5, *([0.5 / 4] * 4)]
-    #     # sample the type literal
-    #     df["basement"] = self.random_generator.choice(options, size=len(df), p=weights)
-    #     df["attic"] = self.random_generator.choice(options, size=len(df), p=weights)
-    #     df["exposed_basement_frac"] = self.random_generator.uniform(
-    #         0.1, 0.5, size=len(df)
-    #     )
-    #     return df
-
-    # def sample_wwrs(self, df: pd.DataFrame) -> pd.DataFrame:
-    #     """Sample the wwrs."""
-    #     wwr_min = 0.05
-    #     wwr_max = 0.35
-    #     df["wwr"] = self.random_generator.uniform(wwr_min, wwr_max, size=len(df))
-    #     return df
-
-    # def sample_f2f_heights(self, df: pd.DataFrame) -> pd.DataFrame:
-    #     """Sample the f2f heights."""
-    #     f2f_min = 2.3
-    #     f2f_max = 4.3
-    #     df["f2f_height"] = self.random_generator.uniform(f2f_min, f2f_max, size=len(df))
-    #     return df
-
-    def to_sim_specs(self, df: pd.DataFrame):
-        """Convert the sampled dataframe to a list of simulation specs.
-
-        For now, we are assuming that all the other necessary fields are present and we are just
-        ensuring that sort_index and experiment_id are set appropriately.
-        """
-        # df["semantic_field_context"] = df.apply(
-        #     lambda row: {
-        #         field.Name: row[field.Name]
-        #         for field in self.progressive_training_spec.semantic_fields_data.Fields
-        #     },
-        #     axis=1,
-        # )
-        # df["sort_index"] = np.arange(len(df))
-        # df["experiment_id"] = self.experiment_key
-        # # TODO: consider allowing the component map/semantic_fields/database to be inherited from the row
-        # # e.g. to allow multiple component maps and dbs per run.
-        # df["component_map_uri"] = str(self.progressive_training_spec.component_map_uri)
-        # df["semantic_fields_uri"] = str(
-        #     self.progressive_training_spec.semantic_fields_uri
-        # )
-        # df["db_uri"] = str(self.progressive_training_spec.database_uri)
-        return df
-
-    # def make_payload(self, s3_client: S3ClientType):
-    #     """Make the payload for the scatter gather task, including generating the simulation specs and serializing them to s3."""
-    #     df = self.stratified_selection()
-    #     # df = self.sample_semantic_fields(df)
-    #     # df = self.sample_basements_and_attics(df)
-    #     # df = self.sample_wwrs(df)
-    #     # df = self.sample_f2f_heights(df)
-    #     df = self.to_sim_specs(df)
-    #     # serialize to a parquet file and upload to s3
-    #     bucket = self.progressive_training_spec.storage_settings.BUCKET
-    #     with tempfile.TemporaryDirectory() as tmpdir:
-    #         tmpdir = Path(tmpdir)
-    #         fpath = tmpdir / "specs.pq"
-    #         df.to_parquet(fpath)
-    #         key = f"hatchet/{self.experiment_key}/specs.pq"
-    #         specs_uri = f"s3://{bucket}/{key}"
-    #         s3_client.upload_file(fpath.as_posix(), bucket, key)
-
-    #     payload = {
-    #         "specs": specs_uri,
-    #         "bucket": bucket,
-    #         "workflow_name": "simulate_sbem_shoebox",
-    #         "experiment_id": self.experiment_key,
-    #         "recursion_map": {
-    #             "factor": self.progressive_training_spec.iteration.recursion_factor,
-    #             "max_depth": self.progressive_training_spec.iteration.recursion_max_depth,
-    #         },
-    #     }
-    #     return payload
+    # TODO: Add the ability to check the compatiblity of a sampling spec with an input_validator_type.
+
+    def populate_sample_df(self) -> pd.DataFrame:
+        """Populate the sample dataframe with the priors."""
+        base_df = self.stratified_selection()
+        if base_df is None:
+            base_df = pd.DataFrame()
+        # in case we needed more samples due to the strata min req
+        n_samples = max(self.parent.iteration.n_per_gen_for_current_iter, len(base_df))
+        return self.priors.sample(
+            base_df,
+            n_samples,
+            self.random_generator,
+        )
+
+    def convert_to_specs(
+        self, df: pd.DataFrame, input_validator: type[ExperimentInputSpec]
+    ):
+        """Convert the sampled dataframe to a list of simulation specs."""
+        df["experiment_id"] = "placeholder"
+        df["sort_index"] = np.arange(len(df))
+        return [
+            input_validator.model_validate(row) for row in df.to_dict(orient="records")
+        ]
diff --git a/src/globi/pipelines/training.py b/src/globi/pipelines/training.py
index 17c0751..0f53f0f 100644
--- a/src/globi/pipelines/training.py
+++ b/src/globi/pipelines/training.py
@@ -1,6 +1,5 @@
 """The training pipeline."""
 
-import random
 import tempfile
 from datetime import timedelta
 from pathlib import Path
@@ -10,6 +9,7 @@
 import pandas as pd
 import yaml
 from hatchet_sdk import Context
+from scythe.base import ExperimentInputSpec
 from scythe.experiments import (
     BaseExperiment,
 )
@@ -19,7 +19,6 @@
 from scythe.settings import ScytheStorageSettings
 from scythe.utils.filesys import S3Url
 
-from globi.models.surrogate.dummy import DummySimulationInput
 from globi.models.surrogate.outputs import (
     CombineResultsResult,
     ExperimentRunWithRef,
@@ -28,6 +27,7 @@
     StartTrainingResult,
     TrainingEvaluationResult,
 )
+from globi.models.surrogate.sampling import SampleSpec
 from globi.models.surrogate.training import (
     FoldResult,
     ProgressiveTrainingSpec,
@@ -73,20 +73,17 @@ def create_simulations(
 ) -> ExperimentRunWithRef:
     """Create the simulations."""
     # STEP 1: Generate the training samples, allocate simulations
-    specs = [
-        DummySimulationInput(
-            weather_file="some" if random.random() < 0.5 else "other",  # noqa: S311
-            a=random.randint(-10, 10),  # noqa: S311
-            b=random.randint(-10, 10),  # noqa: S311
-            c=random.randint(-10, 10),  # noqa: S311
-            experiment_id="placeholder",
-            sort_index=i,
-        )
-        for i in range(1_000)
-    ]
+    sample_spec = SampleSpec(parent=spec, priors=spec.samplers)
+    sample_df = sample_spec.populate_sample_df()
+
+    # TODO: we shouldn't have to cast here, but the typing on `runnable` is not working as expected.
+    input_validator = cast(
+        type[ExperimentInputSpec], spec.runnable.input_validator_type
+    )
+    specs = sample_spec.convert_to_specs(sample_df, input_validator)
 
     # STEP 2: Simulate the simulations using scythe
-    run_name = f"{spec.experiment_id}/sample"
+    run_name = spec.subrun_name("sample")
 
     exp = BaseExperiment(
         runnable=spec.runnable,
@@ -201,7 +198,7 @@ def start_training(
     # Alternatively, one task per fold-column combination?
     specs = train_spec.schedule
 
-    run_name = f"{spec.experiment_id}/train"
+    run_name = spec.subrun_name("train")
     exp = BaseExperiment(
         runnable=train_regressor_with_cv_fold,
         run_name=run_name,

From 5e277f760fcd46ee1264636496e805a044e80a9b Mon Sep 17 00:00:00 2001
From: Sam Wolk <36545842+szvsw@users.noreply.github.com>
Date: Mon, 9 Mar 2026 15:07:40 -0400
Subject: [PATCH 15/27] vestigial cleanup

---
 src/globi/models/surrogate/training.py | 180 -------------------------
 src/globi/pipelines/training.py        |  60 ---------
 2 files changed, 240 deletions(-)

diff --git a/src/globi/models/surrogate/training.py b/src/globi/models/surrogate/training.py
index c90b6b2..8da98a1 100644
--- a/src/globi/models/surrogate/training.py
+++ b/src/globi/models/surrogate/training.py
@@ -497,125 +497,6 @@ def train_pytorch_tabular(self, tempdir: Path):
         model.save_model((tempdir / "model").as_posix())
         return model, trainer
 
-    # @cached_property
-    # def non_numeric_options(self) -> dict[str, list[str]]:
-    #     """Get the non-numeric options for categorical features.
-
-    #     We must perform this across the entire dataset not just splits for consistency
-    #     and to ensure we get all options.
-
-    #     TODO: In the future, this should be based off of transform instructions.
-    #     """
-    #     fparams = self.dparams[
-    #         [col for col in self.dparams.columns if col.startswith("feature.")]
-    #     ]
-    #     non_numeric_cols = fparams.select_dtypes(include=["object"]).columns
-    #     non_numeric_options = {
-    #         col: sorted(cast(pd.Series, fparams[col]).unique().tolist())
-    #         for col in non_numeric_cols
-    #     }
-    #     return non_numeric_options
-
-    # @cached_property
-    # def numeric_min_maxs(self) -> dict[str, tuple[float, float]]:
-    #     """Get the min and max for numeric features.
-
-    #     We perform this only on the training set to prevent leakage.
-
-    #     TODO: In the future, this should be based off of transform instructions.
-
-    #     Args:
-    #         params (pd.DataFrame): The parameters to get the min and max for.
-
-    #     Returns:
-    #         norm_bounds (dict[str, tuple[float, float]]): The min and max for each numeric feature.
-    #     """
-    #     params, _ = self.train_segment
-    #     fparams = params[[col for col in params.columns if col.startswith("feature.")]]
-    #     numeric_cols = fparams.select_dtypes(include=["number"]).columns
-    #     numeric_min_maxs = {
-    #         col: (float(fparams[col].min()), float(fparams[col].max()))
-    #         for col in numeric_cols
-    #     }
-    #     for col in numeric_min_maxs:
-    #         low, high = numeric_min_maxs[col]
-    #         # we want to floor the "low" value down to the nearest 0.001
-    #         # and ceil the "high" value up to the nearest 0.001
-    #         # e.g. if low is -0.799, we want to set it to -0.800
-    #         # and if high is 0.799, we want to set it to 0.800
-    #         numeric_min_maxs[col] = (
-    #             math.floor(low * 1000) / 1000,
-    #             math.ceil(high * 1000) / 1000,
-    #         )
-    #     return numeric_min_maxs
-
-    # @cached_property
-    # def feature_spec(self) -> RegressorInputSpec:
-    #     """Get the feature spec which can be serialized and reloaded."""
-    #     params, _ = self.train_segment
-    #     features: list[CategoricalFeature | ContinuousFeature] = []
-    #     for col in params.columns:
-    #         if col in self.numeric_min_maxs:
-    #             low, high = self.numeric_min_maxs[col]
-    #             features.append(
-    #                 ContinuousFeature(name=col, min=float(low), max=float(high))
-    #             )
-    #         elif col in self.non_numeric_options:
-    #             opts = self.non_numeric_options[col]
-    #             features.append(CategoricalFeature(name=col, values=opts))
-    #     return RegressorInputSpec(features=features)
-
-    # def normalize_params(self, params: pd.DataFrame) -> pd.DataFrame:
-    #     """Normalize the params."""
-    #     regressor_spec = self.feature_spec
-    #     fparams = regressor_spec.transform(params, do_check=False)
-    #     return fparams
-
-    # def run(
-    #     self,
-    # ):
-    #     """Train the model."""
-    #     train_params, train_targets = self.train_segment
-    #     test_params, test_targets = self.test_segment
-
-    #     # select/transform the params as necessary
-    #     train_params = self.normalize_params(train_params)
-    #     test_params = self.normalize_params(test_params)
-
-    #     # Train the model
-    #     # train_preds, test_preds = self.train_xgboost(
-    #     #     train_params, train_targets, test_params, test_targets
-    #     # )
-    #     s3_client = boto3.client("s3")
-    #     train_preds, test_preds = self.train_lightgbm(
-    #         train_params, train_targets, test_params, test_targets, s3_client
-    #     )
-
-    #     # compute the metrics
-    #     global_train_metrics, stratum_train_metrics = self.compute_metrics(
-    #         train_preds, train_targets
-    #     )
-    #     global_test_metrics, stratum_test_metrics = self.compute_metrics(
-    #         test_preds, test_targets
-    #     )
-
-    #     global_metrics = pd.concat(
-    #         [global_train_metrics, global_test_metrics],
-    #         axis=1,
-    #         keys=["train", "test"],
-    #         names=["split_segment"],
-    #     )
-    #     stratum_metrics = pd.concat(
-    #         [stratum_train_metrics, stratum_test_metrics],
-    #         axis=1,
-    #         keys=["train", "test"],
-    #         names=["split_segment"],
-    #     )
-    #     return {
-    #         "global_metrics": global_metrics,
-    #         "stratum_metrics": stratum_metrics,
-    #     }
-
     def compute_frame_metrics(
         self, preds: pd.DataFrame, targets: pd.DataFrame
     ) -> pd.DataFrame:
@@ -711,67 +592,6 @@ def compute_metrics(self, preds: pd.DataFrame, targets: pd.DataFrame):
         )
         return global_metrics, stratum_metrics
 
-    # def train_lightgbm(
-    #     self,
-    #     train_params: pd.DataFrame,
-    #     train_targets: pd.DataFrame,
-    #     test_params: pd.DataFrame,
-    #     test_targets: pd.DataFrame,
-    #     s3_client: S3ClientType | None = None,
-    # ):
-    #     """Train the lightgbm model."""
-    #     import lightgbm as lgb
-
-    #     lgb_params = {
-    #         "objective": "regression",
-    #         "metric": "rmse",
-    #     }
-    #     test_preds = {}
-    #     train_preds = {}
-    #     for col in train_targets.columns:
-    #         lgb_train_data = lgb.Dataset(train_params, label=train_targets[col])
-    #         lgb_test_data = lgb.Dataset(test_params, label=test_targets[col])
-    #         model = lgb.train(
-    #             lgb_params,
-    #             lgb_train_data,
-    #             num_boost_round=4000,
-    #             valid_sets=[lgb_test_data],
-    #             valid_names=["eval"],
-    #             callbacks=[lgb.early_stopping(20)],
-    #         )
-    #         test_preds[col] = pd.Series(
-    #             cast(np.ndarray, model.predict(test_params)),
-    #             index=test_targets.index,
-    #             name=col,
-    #         )
-    #         train_preds[col] = pd.Series(
-    #             cast(np.ndarray, model.predict(train_params)),
-    #             index=train_targets.index,
-    #             name=col,
-    #         )
-    #         if s3_client is not None:
-    #             model_name = (
-    #                 f"{col}.lgb"
-    #                 if not isinstance(col, tuple)
-    #                 else f"{'.'.join(col)}.lgb"
-    #             )
-    #             model_key = self.format_model_key(model_name)
-    #             model_str = model.model_to_string()
-    #             s3_client.put_object(Bucket=self.bucket, Key=model_key, Body=model_str)
-
-    #     if s3_client is not None:
-    #         import yaml
-
-    #         space_key = self.format_model_key("space.yml")
-    #         space_str = yaml.dump(
-    #             self.feature_spec.model_dump(mode="json"), indent=2, sort_keys=False
-    #         )
-    #         s3_client.put_object(Bucket=self.bucket, Key=space_key, Body=space_str)
-
-    #     test_preds = pd.concat(test_preds, axis=1)
-    #     train_preds = pd.concat(train_preds, axis=1)
-    #     return train_preds, test_preds
-
 
 class FoldResult(ExperimentOutputSpec):
     """The output for a fold."""
diff --git a/src/globi/pipelines/training.py b/src/globi/pipelines/training.py
index 0f53f0f..e9b0ce0 100644
--- a/src/globi/pipelines/training.py
+++ b/src/globi/pipelines/training.py
@@ -409,63 +409,3 @@ def finalize(spec: ProgressiveTrainingSpec, context: Context) -> FinalizeResult:
             temp_path.as_posix(), spec.storage_settings.BUCKET, summary_manifest_uri
         )
     return result
-
-
-# if __name__ == "__main__":
-#     import yaml
-#     from pydantic import HttpUrl
-#     from scythe.settings import ScytheStorageSettings
-
-#     from globi.models.surrogate.configs.pipeline import (
-#         ConvergenceThresholds,
-#         ConvergenceThresholdsByTarget,
-#         IterationSpec,
-#         StratificationSpec,
-#     )
-#     from globi.models.surrogate.dummy import dummy_simulation
-
-#     base_run_name = "test-experiment"
-#     progressive_training_spec = ProgressiveTrainingSpec(
-#         runnable=dummy_simulation,
-#         sort_index=0,
-#         experiment_id="placeholder",
-#         gis_uri=HttpUrl("https://example.com/gis.parquet"),
-#         stratification=StratificationSpec(
-#             field="weather_file",
-#             sampling="equal",
-#             aliases=["feature.weather.file"],
-#         ),
-#         iteration=IterationSpec(
-#             max_iters=3,
-#         ),
-#         convergence_criteria=ConvergenceThresholdsByTarget(
-#             thresholds={
-#                 "*": ConvergenceThresholds(r2=0.975),
-#             },
-#         ),
-#         storage_settings=ScytheStorageSettings(),
-#         base_run_name=base_run_name,
-#     )
-#     with open("inputs/training.yml", "w") as f:
-#         yaml.dump(
-#             progressive_training_spec.model_dump(mode="json"),
-#             f,
-#             indent=2,
-#             sort_keys=False,
-#         )
-
-# exp = BaseExperiment(
-#     runnable=iterative_training,
-#     run_name="test-experiment",
-# )
-
-# run, ref = exp.allocate(
-#     progressive_training_spec,
-#     version="bumpmajor",
-#     recursion_map=RecursionMap(
-#         factor=2,
-#         max_depth=0,
-#     ),
-# )
-
-# print(yaml.dump(run.model_dump(mode="json"), indent=2, sort_keys=False))

From d7d0105b6885edbb565cd89b3a432238ec2b4e6a Mon Sep 17 00:00:00 2001
From: Sam Wolk <36545842+szvsw@users.noreply.github.com>
Date: Mon, 9 Mar 2026 15:18:21 -0400
Subject: [PATCH 16/27] lazily import scythe to prevent docs error

---
 src/globi/tools/cli/main.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/src/globi/tools/cli/main.py b/src/globi/tools/cli/main.py
index cb11098..7950de6 100644
--- a/src/globi/tools/cli/main.py
+++ b/src/globi/tools/cli/main.py
@@ -7,7 +7,6 @@
 import boto3
 import click
 import yaml
-from scythe.experiments import BaseExperiment
 
 if TYPE_CHECKING:
     from mypy_boto3_s3 import S3Client
@@ -112,6 +111,8 @@ def manifest(
 )
 def surrogate(path):
     """Submit a GloBI surrogate experiment."""
+    from scythe.experiments import BaseExperiment
+
     from globi.models.surrogate.configs.pipeline import ProgressiveTrainingSpec
     from globi.pipelines.training import iterative_training
 

From 5474f63380f0ebcbdabb2fb5d797dc603a3303e3 Mon Sep 17 00:00:00 2001
From: Sam Wolk <36545842+szvsw@users.noreply.github.com>
Date: Mon, 9 Mar 2026 15:25:13 -0400
Subject: [PATCH 17/27] easier dummy fn

---
 src/globi/models/surrogate/dummy.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/globi/models/surrogate/dummy.py b/src/globi/models/surrogate/dummy.py
index bd08fda..e8af474 100644
--- a/src/globi/models/surrogate/dummy.py
+++ b/src/globi/models/surrogate/dummy.py
@@ -69,8 +69,8 @@ def dummy_simulation(
         n_inputs,
         SyntheticProblemConfig(
             n_outputs=5,
-            n_latents=8,
-            difficulty="medium",
+            n_latents=3,
+            difficulty="easy",
             noise_std=0.0,
             normalize_outputs=True,
         ),

From b49a701541507aa6cf7ff19b4e00cecc5e19dc48 Mon Sep 17 00:00:00 2001
From: Sam Wolk <36545842+szvsw@users.noreply.github.com>
Date: Mon, 9 Mar 2026 15:31:37 -0400
Subject: [PATCH 18/27] add a bunch of log statements

---
 src/globi/pipelines/training.py | 35 ++++++++++++++++++++++++++++++++-
 1 file changed, 34 insertions(+), 1 deletion(-)

diff --git a/src/globi/pipelines/training.py b/src/globi/pipelines/training.py
index e9b0ce0..d768327 100644
--- a/src/globi/pipelines/training.py
+++ b/src/globi/pipelines/training.py
@@ -73,14 +73,18 @@ def create_simulations(
 ) -> ExperimentRunWithRef:
     """Create the simulations."""
     # STEP 1: Generate the training samples, allocate simulations
+    context.log("Generating training samples...")
     sample_spec = SampleSpec(parent=spec, priors=spec.samplers)
     sample_df = sample_spec.populate_sample_df()
+    context.log("Training samples generated.")
 
     # TODO: we shouldn't have to cast here, but the typing on `runnable` is not working as expected.
     input_validator = cast(
         type[ExperimentInputSpec], spec.runnable.input_validator_type
     )
+    context.log("Converting training samples to specs...")
     specs = sample_spec.convert_to_specs(sample_df, input_validator)
+    context.log("Training samples converted to specs.")
 
     # STEP 2: Simulate the simulations using scythe
     run_name = spec.subrun_name("sample")
@@ -91,11 +95,13 @@ def create_simulations(
         storage_settings=spec.storage_settings or ScytheStorageSettings(),
     )
 
+    context.log("Allocating simulations...")
     run, ref = exp.allocate(
         specs,
         version="bumpmajor",
         recursion_map=spec.iteration.recursion,
     )
+    context.log("Simulations allocated.")
 
     run_name = run.versioned_experiment.base_experiment.run_name
     if not run_name:
@@ -136,7 +142,12 @@ async def await_simulations(
 def combine_results(
     spec: ProgressiveTrainingSpec, context: Context
 ) -> CombineResultsResult:
-    """Combine the results of the simulations."""
+    """Combine the results of the simulations.
+
+    Specifically, this step is responsible for combining the results of the simulations
+    of the previous iteration(s) with the results of the current iteration.  In other words,
+    this is where we grow our simulation cache.
+    """
     # TODO: major consider how we handle beyond-memory scale scenarios.
     # i.e. we probably need to refactor to allow lists of files that only the
     # main worker is responsible for combining.
@@ -150,6 +161,7 @@ def combine_results(
     # also, should we make sure to remove NaN?
 
     if spec.data_uris:
+        context.log("Combining results from previous iterations...")
         shared_keys = set(spec.data_uris.uris.keys()) & set(results.uris.keys())
         old_keys_only = set(spec.data_uris.uris.keys()) - shared_keys
         new_keys_only = set(results.uris.keys()) - shared_keys
@@ -161,15 +173,20 @@ def combine_results(
         # TODO: refactor to use a threadpool executor?
         # For memory reasons, it might be a good idea to stay single threaded here.
         for key in shared_keys:
+            context.log(f"Combining results for key {key}...")
             old_df = pd.read_parquet(str(spec.data_uris.uris[key]))
             new_df = pd.read_parquet(str(results.uris[key]))
             combined_df = pd.concat([old_df, new_df], axis=0)
             uri = spec.format_combined_output_uri(key)
             combined_df.to_parquet(str(uri))
+            context.log(f"Results for key {key} combined and saved to s3.")
             combined_results[key] = uri
 
     else:
         # TODO: consider copying these over to the `combined` folder anyways.
+        context.log(
+            "No previous iterations to combine results from, so using results from current iteration."
+        )
         combined_results = results.uris
 
     return CombineResultsResult(
@@ -198,6 +215,7 @@ def start_training(
     # Alternatively, one task per fold-column combination?
     specs = train_spec.schedule
 
+    context.log("Scheduling training...")
     run_name = spec.subrun_name("train")
     exp = BaseExperiment(
         runnable=train_regressor_with_cv_fold,
@@ -212,6 +230,7 @@ def start_training(
             max_depth=0,
         ),
     )
+    context.log("Training scheduled.")
 
     if not run.versioned_experiment.base_experiment.run_name:
         msg = "Run name is required."
@@ -258,8 +277,12 @@ def evaluate_training(
     results_output = context.task_output(await_training)
     strata_uri = results_output.uris["strata"]
     globals_uri = results_output.uris["global"]
+    context.log("Reading strata results from s3...")
     results = pd.read_parquet(str(strata_uri))
+    context.log("Strata results read from s3.")
+    context.log("Reading global results from s3...")
     results_globals = pd.read_parquet(str(globals_uri))
+    context.log("Global results read from s3.")
 
     fold_averages = cast(
         pd.Series,
@@ -278,12 +301,14 @@ def evaluate_training(
         .unstack(),
     )
 
+    context.log("Running convergence criteria...")
     (
         convergence_all,
         _convergence_monitor_segment,
         _convergence_monitor_segment_and_target,
         _convergence,
     ) = spec.convergence_criteria.run(fold_averages)
+    context.log("Convergence criteria run.")
 
     return TrainingEvaluationResult(
         converged=convergence_all,
@@ -305,14 +330,21 @@ def transition_recursion(
     """Transition the recursion."""
     results = context.task_output(evaluate_training)
     if results.converged:
+        context.log("Converged! Time to wrap up... no more recursion.")
         return RecursionTransition(reasoning="converged", child_workflow_run_id=None)
     if spec.iteration.at_max_iters:
+        context.log(
+            "Not converged, but we're at the max number of iterations. Time to wrap up... no more recursion."
+        )
         return RecursionTransition(reasoning="max_depth", child_workflow_run_id=None)
 
     await_training_output = context.task_output(await_training)
     # start_training_output = context.task_output(start_training)
     combine_results_output = context.task_output(combine_results)
 
+    context.log(
+        "Not converged, but we have more iterations to try. Time to continue recursion..."
+    )
     next_spec = spec.model_copy(deep=True)
     next_spec.iteration.current_iter += 1
     next_spec.data_uris = combine_results_output.combined
@@ -331,6 +363,7 @@ def transition_recursion(
             max_depth=0,
         ),
     )
+    context.log("Recursion transitioned.")
     return RecursionTransition(
         reasoning=None, child_workflow_run_id=ref.workflow_run_id
     )

From a15f8eb2083a522357636dacce3aa2b02a860457 Mon Sep 17 00:00:00 2001
From: Sam Wolk <36545842+szvsw@users.noreply.github.com>
Date: Mon, 9 Mar 2026 15:51:09 -0400
Subject: [PATCH 19/27] simplify dummy problem

---
 src/globi/models/surrogate/dummy.py | 249 ++++------------------------
 1 file changed, 29 insertions(+), 220 deletions(-)

diff --git a/src/globi/models/surrogate/dummy.py b/src/globi/models/surrogate/dummy.py
index e8af474..ee33d5e 100644
--- a/src/globi/models/surrogate/dummy.py
+++ b/src/globi/models/surrogate/dummy.py
@@ -1,9 +1,8 @@
 """Dummy simulation for testing."""
 
 import math
-from dataclasses import dataclass
 from pathlib import Path
-from typing import Any, Literal, get_args
+from typing import Literal, get_args
 
 import numpy as np
 import pandas as pd
@@ -65,16 +64,10 @@ def dummy_simulation(
     """A dummy simulation."""
     n_inputs = input_spec.n_inputs()
     n_outputs = 5
-    problem = SyntheticMultiOutputProblem(
+    problem = SimpleSyntheticProblem(
         n_inputs,
-        SyntheticProblemConfig(
-            n_outputs=5,
-            n_latents=3,
-            difficulty="easy",
-            noise_std=0.0,
-            normalize_outputs=True,
-        ),
-        input_spec.sort_index,
+        n_outputs,
+        seed=input_spec.sort_index,
     )
     y = problem.evaluate(np.array(input_spec.values))
 
@@ -93,223 +86,39 @@ def dummy_simulation(
     )
 
 
-@dataclass(frozen=True)
-class SyntheticProblemConfig:
-    """Configuration for a synthetic multi-output regression problem."""
-
-    n_outputs: int = 8
-    n_latents: int = 4
-    difficulty: Literal["easy", "medium"] = "easy"
-    noise_std: float = 0.0
-    normalize_outputs: bool = True
-
-
-class SyntheticMultiOutputProblem:
-    """Deterministic synthetic multi-output function family.
-
-    Inputs:
-        x in [0, 1]^d
-
-    Outputs:
-        y in R^m
-
-    Design goals:
-    - cheap to evaluate
-    - arbitrary input dimension
-    - arbitrary output count
-    - some outputs share latent structure
-    - some outputs contain mild independent residuals
-    - difficulty is tunable but never absurd
-    """
-
-    def __init__(self, n_inputs: int, config: SyntheticProblemConfig, seed: int):
-        """Initialize the synthetic multi-output problem."""
-        if n_inputs < 1:
-            msg = "n_inputs must be >= 1"
-            raise ValueError(msg)
-        if config.n_outputs < 1:
-            msg = "n_outputs must be >= 1"
-            raise ValueError(msg)
-        if config.n_latents < 1:
-            msg = "n_latents must be >= 1"
-            raise ValueError(msg)
+class SimpleSyntheticProblem:
+    """A simple synthetic problem."""
 
+    def __init__(self, n_inputs: int, n_outputs: int, seed: int):
+        """Initialize the simple synthetic problem."""
         self.n_inputs = n_inputs
-        self.config = config
-        self.rng = np.random.default_rng(seed)
-        self.seed = seed
-
-        self.active_dims_per_latent = (
-            min(5, n_inputs) if config.difficulty == "easy" else min(8, n_inputs)
-        )
-        self.freq_max = 2 if config.difficulty == "easy" else 4
-        self.residual_scale = 0.05 if config.difficulty == "easy" else 0.12
+        self.n_outputs = n_outputs
+        rng = np.random.default_rng(seed)
 
-        # Shared latent parameters
-        self.latent_defs = [
-            self._make_latent_definition(k) for k in range(config.n_latents)
-        ]
-
-        # Output mixing weights: this is what creates output dependency
-        self.mix_weights = self.rng.normal(
-            loc=0.0,
-            scale=1.0 / math.sqrt(config.n_latents),
-            size=(config.n_outputs, config.n_latents),
-        )
-
-        # Small output-specific residual definitions
-        self.residual_defs = [
-            self._make_residual_definition(j) for j in range(config.n_outputs)
-        ]
-
-        # Optional approximate normalization constants computed deterministically
-        self.output_shift = np.zeros(config.n_outputs, dtype=float)
-        self.output_scale = np.ones(config.n_outputs, dtype=float)
-        if config.normalize_outputs:
-            self._fit_normalization()
+        self.alpha = rng.normal(size=n_outputs)
+        self.beta = rng.normal(scale=0.8, size=(n_outputs, n_inputs))
+        self.gamma = rng.normal(scale=0.4, size=(n_outputs, n_inputs))
+        self.delta = rng.normal(scale=0.3, size=(n_outputs, max(0, n_inputs - 1)))
+        self.eta = rng.normal(scale=0.2, size=n_outputs)
+        self.sine_dim = rng.integers(0, n_inputs, size=n_outputs)
 
     def evaluate(self, x: np.ndarray) -> np.ndarray:
-        """Evaluate all outputs at one input vector x."""
+        """Evaluate the simple synthetic problem."""
         x = np.asarray(x, dtype=float)
-        if x.shape != (self.n_inputs,):
-            msg = f"Expected x shape {(self.n_inputs,)}, got {x.shape}"
-            raise ValueError(msg)
-
-        # Clamp defensively; upstream encoder should already map into [0, 1]
         x = np.clip(x, 0.0, 1.0)
 
-        z = np.array([self._eval_latent(x, ld) for ld in self.latent_defs], dtype=float)
-        y = self.mix_weights @ z
-
-        # Add small output-specific residuals so not everything is perfectly low-rank
-        residual = np.array(
-            [self._eval_residual(x, rd) for rd in self.residual_defs], dtype=float
-        )
-        y = y + residual
-
-        if self.config.noise_std > 0:
-            # deterministic if seed fixed and call order fixed; default is off for stable tests
-            y = y + self.rng.normal(
-                0.0, self.config.noise_std, size=self.config.n_outputs
-            )
-
-        y = (y - self.output_shift) / self.output_scale
-        return y
-
-    def _make_latent_definition(self, k: int) -> dict[str, Any]:
-        """Create one latent function definition."""
-        latent_type = k % 4
-        dims = self.rng.choice(
-            self.n_inputs, size=self.active_dims_per_latent, replace=False
-        )
-
-        if latent_type == 0:
-            # additive sinusoid
-            return {
-                "type": "additive_sin",
-                "dims": dims,
-                "amp": self.rng.uniform(0.4, 1.2, size=len(dims)),
-                "freq": self.rng.integers(1, self.freq_max + 1, size=len(dims)),
-                "phase": self.rng.uniform(0.0, 2 * math.pi, size=len(dims)),
-            }
-
-        if latent_type == 1:
-            # smooth quadratic bowl-ish feature
-            return {
-                "type": "quadratic",
-                "dims": dims,
-                "weight": self.rng.uniform(0.5, 1.5, size=len(dims)),
-                "center": self.rng.uniform(0.2, 0.8, size=len(dims)),
-            }
-
-        if latent_type == 2:
-            # pairwise interaction latent
-            pair_count = max(1, len(dims) // 2)
-            pair_dims = dims[: 2 * pair_count].reshape(pair_count, 2)
-            return {
-                "type": "pairwise_sin",
-                "pairs": pair_dims,
-                "weight": self.rng.uniform(0.4, 1.0, size=pair_count),
-            }
-
-        # Friedman-like latent, adapted to arbitrary dimension by cycling
-        d0 = dims[0 % len(dims)]
-        d1 = dims[1 % len(dims)]
-        d2 = dims[2 % len(dims)]
-        d3 = dims[3 % len(dims)]
-        d4 = dims[4 % len(dims)]
-        return {
-            "type": "friedman_like",
-            "dims": np.array([d0, d1, d2, d3, d4], dtype=int),
-        }
-
-    def _make_residual_definition(self, j: int) -> dict[str, Any]:
-        """Create a small output-specific residual."""
-        dims = self.rng.choice(self.n_inputs, size=min(3, self.n_inputs), replace=False)
-        return {
-            "dims": dims,
-            "amp": self.rng.uniform(0.2, 0.8, size=len(dims)) * self.residual_scale,
-            "freq": self.rng.integers(1, self.freq_max + 1, size=len(dims)),
-            "phase": self.rng.uniform(0.0, 2 * math.pi, size=len(dims)),
-        }
-
-    def _eval_latent(self, x: np.ndarray, ld: dict[str, Any]) -> float:
-        t = ld["type"]
-
-        if t == "additive_sin":
-            dims = ld["dims"]
-            return float(
-                np.sum(
-                    ld["amp"] * np.sin(2 * math.pi * ld["freq"] * x[dims] + ld["phase"])
-                )
-            )
-
-        if t == "quadratic":
-            dims = ld["dims"]
-            xc = x[dims] - ld["center"]
-            return float(np.sum(ld["weight"] * xc * xc))
-
-        if t == "pairwise_sin":
-            total = 0.0
-            for w, (i, j) in zip(ld["weight"], ld["pairs"], strict=True):
-                total += float(w * math.sin(math.pi * x[i] * x[j]))
-            return total
-
-        if t == "friedman_like":
-            i0, i1, i2, i3, i4 = ld["dims"]
-            return float(
-                10.0 * math.sin(math.pi * x[i0] * x[i1])
-                + 20.0 * (x[i2] - 0.5) ** 2
-                + 10.0 * x[i3]
-                + 5.0 * x[i4]
-            )
-
-        msg = f"Unknown latent type: {t}"
-        raise ValueError(msg)
-
-    def _eval_residual(self, x: np.ndarray, rd: dict[str, Any]) -> float:
-        dims = rd["dims"]
-        return float(
-            np.sum(rd["amp"] * np.sin(2 * math.pi * rd["freq"] * x[dims] + rd["phase"]))
-        )
+        linear = self.beta @ x
+        quad = self.gamma @ (x**2)
 
-    def _fit_normalization(self) -> None:
-        """Approximate output mean/std over a fixed reference design."""
-        ref_rng = np.random.default_rng(self.seed + 1_000_000)
-        n_ref = 2048 if self.config.difficulty == "easy" else 4096
-        X = ref_rng.uniform(0.0, 1.0, size=(n_ref, self.n_inputs))
+        if self.n_inputs > 1:
+            pairwise_terms = x[:-1] * x[1:]
+            pairwise = self.delta @ pairwise_terms
+        else:
+            pairwise = np.zeros(self.n_outputs)
 
-        Y = np.zeros((n_ref, self.config.n_outputs), dtype=float)
-        for i in range(n_ref):
-            z = np.array(
-                [self._eval_latent(X[i], ld) for ld in self.latent_defs], dtype=float
-            )
-            residual = np.array(
-                [self._eval_residual(X[i], rd) for rd in self.residual_defs],
-                dtype=float,
-            )
-            Y[i] = self.mix_weights @ z + residual
+        periodic = np.array([
+            self.eta[j] * math.sin(2 * math.pi * x[self.sine_dim[j]])
+            for j in range(self.n_outputs)
+        ])
 
-        self.output_shift = Y.mean(axis=0)
-        self.output_scale = Y.std(axis=0)
-        self.output_scale[self.output_scale < 1e-8] = 1.0
+        return self.alpha + linear + quad + pairwise + periodic

From 49553c2eea9d3d47cb983caed9fb7dba707458e1 Mon Sep 17 00:00:00 2001
From: Sam Wolk <36545842+szvsw@users.noreply.github.com>
Date: Mon, 9 Mar 2026 16:19:18 -0400
Subject: [PATCH 20/27] oops...

---
 src/globi/models/surrogate/dummy.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/globi/models/surrogate/dummy.py b/src/globi/models/surrogate/dummy.py
index ee33d5e..88eaee3 100644
--- a/src/globi/models/surrogate/dummy.py
+++ b/src/globi/models/surrogate/dummy.py
@@ -67,7 +67,7 @@ def dummy_simulation(
     problem = SimpleSyntheticProblem(
         n_inputs,
         n_outputs,
-        seed=input_spec.sort_index,
+        seed=42,
     )
     y = problem.evaluate(np.array(input_spec.values))
 

From 89f2ba4e1c61bbe18ad08be35471ab14fddce601 Mon Sep 17 00:00:00 2001
From: Sam Wolk <36545842+szvsw@users.noreply.github.com>
Date: Tue, 10 Mar 2026 09:42:24 -0400
Subject: [PATCH 21/27] fix insane race condition

---
 src/globi/models/surrogate/configs/pipeline.py | 10 +++++++++-
 src/globi/pipelines/training.py                |  8 +++-----
 2 files changed, 12 insertions(+), 6 deletions(-)

diff --git a/src/globi/models/surrogate/configs/pipeline.py b/src/globi/models/surrogate/configs/pipeline.py
index 3b2ed96..05b65b2 100644
--- a/src/globi/models/surrogate/configs/pipeline.py
+++ b/src/globi/models/surrogate/configs/pipeline.py
@@ -10,7 +10,7 @@
 import pandas as pd
 from pydantic import BaseModel, Field
 from scythe.base import ExperimentInputSpec
-from scythe.experiments import SerializableRunnable
+from scythe.experiments import SemVer, SerializableRunnable
 from scythe.scatter_gather import RecursionMap, ScatterGatherResult
 from scythe.utils.filesys import OptionalFileReference, S3Url
 
@@ -343,6 +343,14 @@ def context_data(self) -> pd.DataFrame | None:
             return None
         return pd.read_parquet(self.context_path)
 
+    @property
+    def current_version(self) -> SemVer:
+        """The current version."""
+        vstr = [
+            piece for piece in self.experiment_id.split("/") if piece.startswith("v")
+        ][-1]
+        return SemVer.FromString(vstr)
+
 
 class StageSpec(BaseModel):
     """A spec that is common to both the sample and train stages (and possibly others)."""
diff --git a/src/globi/pipelines/training.py b/src/globi/pipelines/training.py
index d768327..21c2476 100644
--- a/src/globi/pipelines/training.py
+++ b/src/globi/pipelines/training.py
@@ -355,13 +355,11 @@ def transition_recursion(
         run_name=f"{next_spec.base_run_name}",
         storage_settings=spec.storage_settings or ScytheStorageSettings(),
     )
+    # manually bump minor here to avoid race conditions between e.g. simultaneously running v29.2.0 and v30.1.0... pretty sure the error only happens when they finish in the exact same second, but... it happened once so.
     _run, ref = exp.allocate(
         next_spec,
-        version="bumpminor",
-        recursion_map=RecursionMap(
-            factor=2,
-            max_depth=0,
-        ),
+        version=spec.current_version.next_minor_version(),
+        recursion_map=None,
     )
     context.log("Recursion transitioned.")
     return RecursionTransition(

From cfb0aa728543649cf34389c0dbd824e8dddc9eac Mon Sep 17 00:00:00 2001
From: Sam Wolk <36545842+szvsw@users.noreply.github.com>
Date: Tue, 10 Mar 2026 10:55:38 -0400
Subject: [PATCH 22/27] enable gpu indicator for tasks

---
 .env.scythe.training            |  8 ++++++++
 Makefile                        |  6 +++++-
 docker-compose.yml              | 21 +++++++++++++++++++++
 src/globi/pipelines/training.py |  2 ++
 src/globi/worker/main.py        | 23 ++---------------------
 uv.lock                         |  2 +-
 6 files changed, 39 insertions(+), 23 deletions(-)
 create mode 100644 .env.scythe.training

diff --git a/.env.scythe.training b/.env.scythe.training
new file mode 100644
index 0000000..90a58c0
--- /dev/null
+++ b/.env.scythe.training
@@ -0,0 +1,8 @@
+SCYTHE_WORKER_SLOTS=1
+SCYTHE_WORKER_DOES_FAN=False
+SCYTHE_WORKER_DOES_LEAF=True
+SCYTHE_WORKER_HAS_GPU=True
+
+SCYTHE_TIMEOUT_EXPERIMENT_SCHEDULE=10h
+SCYTHE_TIMEOUT_SCATTER_GATHER_SCHEDULE=10h
+SCYTHE_TIMEOUT_SCATTER_GATHER_EXECUTION=10h
diff --git a/Makefile b/Makefile
index 87de19d..1f2aae0 100644
--- a/Makefile
+++ b/Makefile
@@ -1,4 +1,4 @@
-AWS_ENV ?= local.host
+AWS_ENV ?= prod
 HATCHET_ENV ?= local.host
 
 ##################### Installation/Environment Management #####################
@@ -71,6 +71,10 @@ simulations-native: ## Run the simulations
 fanouts-native: ## Run the fanouts
 	@uv run --env-file .env.$(AWS_ENV).aws --env-file .env.$(HATCHET_ENV).hatchet --env-file .env.scythe.storage --env-file .env.scythe.fanouts worker
 
+.PHONY: training-native
+training-native: ## Run the training
+	@uv run --env-file .env.$(AWS_ENV).aws --env-file .env.$(HATCHET_ENV).hatchet --env-file .env.scythe.storage --env-file .env.scythe.training worker
+
 .PHONY: viz-native
 viz-native: ## Run the visualization tool # TODO: possibly add env vars to the command
 	@uv run streamlit run src/globi/tools/visualization/main.py
diff --git a/docker-compose.yml b/docker-compose.yml
index 9e945da..a66b0b3 100644
--- a/docker-compose.yml
+++ b/docker-compose.yml
@@ -16,6 +16,27 @@ services:
     deploy:
       mode: replicated
       replicas: ${SIMULATIONS_REPLICAS:-4}
+    volumes:
+      - ./inputs:/code/inputs
+      - ./outputs:/code/outputs
+      - ./tests/data/e2e:/code/tests/data/e2e
+  training:
+    image: ${AWS_ACCOUNT_ID:-123456789012}.dkr.ecr.${AWS_REGION:-us-east-1}.amazonaws.com/hatchet/globi:${IMAGE_TAG:-latest}
+    build:
+      context: .
+      dockerfile: src/globi/worker/Dockerfile
+      args:
+        EP_VERSION: ${EP_VERSION:-25.2.0}
+        PYTHON_VERSION: ${PYTHON_VERSION:-3.12}
+    env_file:
+      - .env
+      - .env.${AWS_ENV:-local}.aws
+      - .env.${HATCHET_ENV:-local}.hatchet
+      - .env.scythe.storage
+      - .env.scythe.training
+    deploy:
+      mode: replicated
+      replicas: ${TRAINING_REPLICAS:-0}
       resources:
         reservations:
           devices:
diff --git a/src/globi/pipelines/training.py b/src/globi/pipelines/training.py
index 21c2476..4ab59b5 100644
--- a/src/globi/pipelines/training.py
+++ b/src/globi/pipelines/training.py
@@ -18,6 +18,7 @@
 from scythe.scatter_gather import RecursionMap, ScatterGatherResult, scatter_gather
 from scythe.settings import ScytheStorageSettings
 from scythe.utils.filesys import S3Url
+from scythe.worker import ScytheWorkerLabel
 
 from globi.models.surrogate.outputs import (
     CombineResultsResult,
@@ -40,6 +41,7 @@
     description="Train a regressor with cross-fold validation.",
     schedule_timeout=timedelta(hours=5),
     execution_timeout=timedelta(hours=1),
+    desired_worker_labels=ScytheWorkerLabel.HAS_GPU.worker_label,
 )
 def train_regressor_with_cv_fold(
     input_spec: TrainFoldSpec, tempdir: Path
diff --git a/src/globi/worker/main.py b/src/globi/worker/main.py
index 95fe32d..3b9f074 100644
--- a/src/globi/worker/main.py
+++ b/src/globi/worker/main.py
@@ -1,8 +1,5 @@
 """Worker main script."""
 
-from scythe.hatchet import hatchet
-from scythe.registry import ExperimentRegistry
-from scythe.scatter_gather import scatter_gather
 from scythe.worker import ScytheWorkerConfig
 
 from globi.pipelines import *  # noqa: F403
@@ -12,24 +9,8 @@
 
 
 def main():
-    """Main function for the worker."""
-    # TODO: this is required since scythe does not allow registering extra tasks/workflows at the moment.
-    worker = hatchet.worker(
-        name=conf.computed_name,
-        slots=conf.computed_slots,
-        durable_slots=conf.computed_durable_slots,
-        labels=conf.labels,
-    )
-    workflows = ([scatter_gather] if conf.DOES_FAN else []) + (
-        ExperimentRegistry.experiments() if conf.DOES_LEAF else []
-    )
-    for workflow in workflows:
-        worker.register_workflow(workflow)
-    if conf.DOES_FAN:
-        worker.register_workflow(iterative_training)
-    worker.start()
-
-    # conf.start()
+    """Start the worker."""
+    conf.start(additional_workflows=[iterative_training])
 
 
 if __name__ == "__main__":
diff --git a/uv.lock b/uv.lock
index 8b45dad..dd4635b 100644
--- a/uv.lock
+++ b/uv.lock
@@ -4591,7 +4591,7 @@ wheels = [
 [[package]]
 name = "scythe-engine"
 version = "0.1.2"
-source = { git = "https://github.com/szvsw/scythe?branch=feature%2Fallow-versioning-workflows#0bc501d15c20ab23b2379b690756fd3ff3267054" }
+source = { git = "https://github.com/szvsw/scythe?branch=feature%2Fallow-versioning-workflows#54e0668df5ab4741d05925c3b5dddff39ff4c9e6" }
 dependencies = [
     { name = "boto3" },
     { name = "fastparquet" },

From 4cc66534d43839cb8e36d932b267ca39737e9ded Mon Sep 17 00:00:00 2001
From: Sam Wolk <36545842+szvsw@users.noreply.github.com>
Date: Tue, 10 Mar 2026 10:57:22 -0400
Subject: [PATCH 23/27] update gitignore

---
 .gitignore | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/.gitignore b/.gitignore
index b31cb9f..3e3a71c 100644
--- a/.gitignore
+++ b/.gitignore
@@ -217,3 +217,5 @@ inputs/
 
 .env.local.hatchet
 .env.local.host.hatchet
+
+scratch/

From 1db2fd3ebb72c23e5a8bb4d0d6ce86fd0196472c Mon Sep 17 00:00:00 2001
From: Sam Wolk <36545842+szvsw@users.noreply.github.com>
Date: Tue, 10 Mar 2026 11:04:21 -0400
Subject: [PATCH 24/27] add defaults

---
 .env.scythe.training | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/.env.scythe.training b/.env.scythe.training
index 90a58c0..d2e3634 100644
--- a/.env.scythe.training
+++ b/.env.scythe.training
@@ -3,6 +3,7 @@ SCYTHE_WORKER_DOES_FAN=False
 SCYTHE_WORKER_DOES_LEAF=True
 SCYTHE_WORKER_HAS_GPU=True
 
-SCYTHE_TIMEOUT_EXPERIMENT_SCHEDULE=10h
+SCYTHE_TIMEOUT_EXPERIMENT_SCHEDULE=2h
+SCYTHE_TIMEOUT_EXPERIMENT_EXECUTION=1h
 SCYTHE_TIMEOUT_SCATTER_GATHER_SCHEDULE=10h
 SCYTHE_TIMEOUT_SCATTER_GATHER_EXECUTION=10h

From 676a9b96127397087f2c3202e0baa3d439725ecd Mon Sep 17 00:00:00 2001
From: Sam Wolk <36545842+szvsw@users.noreply.github.com>
Date: Tue, 10 Mar 2026 11:25:26 -0400
Subject: [PATCH 25/27] restore old env args

---
 Makefile | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Makefile b/Makefile
index 1f2aae0..67c54e6 100644
--- a/Makefile
+++ b/Makefile
@@ -1,4 +1,4 @@
-AWS_ENV ?= prod
+AWS_ENV ?= local.host
 HATCHET_ENV ?= local.host
 
 ##################### Installation/Environment Management #####################

From 8d417db4fb322b46d7f25cfab68ad68a8945edd8 Mon Sep 17 00:00:00 2001
From: Sam Wolk <36545842+szvsw@users.noreply.github.com>
Date: Tue, 10 Mar 2026 11:30:48 -0400
Subject: [PATCH 26/27] drop torch temporarily

---
 pyproject.toml |  28 +---
 uv.lock        | 441 -------------------------------------------------
 2 files changed, 7 insertions(+), 462 deletions(-)

diff --git a/pyproject.toml b/pyproject.toml
index e9d3431..a1d02e3 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -47,25 +47,21 @@ visualization = [
 ]
 
 # ml = [
-#     "torch>=2.5.0",
 #     "lightgbm>=4.6.0",
 #     "xgboost>=3.2.0",
 #     "pytorch-tabular>=1.2.0",
+#     "torch>=2.5.0",
 #     "tensorboard>=2.20.0",
 #     "wandb>=0.25.0",
-#     "pytorch-tabular>=1.2.0",
-#     "torch>=2.5.0",
 # ]
 
 ml-gpu = [
-    "torch>=2.5.0",
     "lightgbm>=4.6.0",
     "xgboost>=3.2.0",
-    "pytorch-tabular>=1.2.0",
+    # "pytorch-tabular>=1.2.0",
+    # "torch>=2.5.0",
     "tensorboard>=2.20.0",
     "wandb>=0.25.0",
-    "pytorch-tabular>=1.2.0",
-    "torch>=2.5.0",
 ]
 
 cli = [
@@ -96,14 +92,6 @@ docs = [
     "mkdocs-click>=0.9.0",
 ]
 
-# [tool.uv]
-# conflicts = [
-#   [
-#     { extra = "ml" },
-#     { extra = "ml-gpu" },
-#   ],
-# ]
-
 [project.scripts]
 worker = "globi.worker.main:main"
 globi = "globi.tools.cli.main:cli"
@@ -125,12 +113,10 @@ explicit = true
 
 [tool.uv.sources]
 # PyTorch: CUDA 12.8 on Linux/Windows (where builds exist), PyPI (CPU) on macOS
-torch = [
-  { index = "pytorch-cu128", marker = "sys_platform != 'darwin'", extra = "ml-gpu" },
-#   { index = "pytorch-cpu", marker = "sys_platform != 'darwin'", extra = "ml" },
-  { index = "pypi", marker = "sys_platform == 'darwin'", extra = "ml-gpu" },
-#   { index = "pypi", marker = "sys_platform == 'darwin'", extra = "ml" },
-]
+# torch = [
+#   { index = "pytorch-cu128", marker = "sys_platform != 'darwin'", extra = "ml-gpu" },
+#   { index = "pypi", marker = "sys_platform == 'darwin'", extra = "ml-gpu" },
+# ]
 # scythe-engine = {git = "https://github.com/szvsw/scythe", branch = "feature/allow-optional-filerefs"}
 scythe-engine = {git = "https://github.com/szvsw/scythe", branch = "feature/allow-versioning-workflows"}
 # scythe-engine = {path = "../scythe", editable = true}
diff --git a/uv.lock b/uv.lock
index dd4635b..9043efb 100644
--- a/uv.lock
+++ b/uv.lock
@@ -160,12 +160,6 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/78/b6/6307fbef88d9b5ee7421e68d78a9f162e0da4900bc5f5793f6d3d0e34fb8/annotated_types-0.7.0-py3-none-any.whl", hash = "sha256:1f02e8b43a8fbbc3f3e0d4f0f4bfc8131bcb4eebe8849b8e5c773f3a1c582a53", size = 13643, upload-time = "2024-05-20T21:33:24.1Z" },
 ]
 
-[[package]]
-name = "antlr4-python3-runtime"
-version = "4.9.3"
-source = { registry = "https://pypi.org/simple" }
-sdist = { url = "https://files.pythonhosted.org/packages/3e/38/7859ff46355f76f8d19459005ca000b6e7012f2f1ca597746cbcd1fbfe5e/antlr4-python3-runtime-4.9.3.tar.gz", hash = "sha256:f224469b4168294902bb1efa80a8bf7855f24c99aef99cbefc1bcd3cce77881b", size = 117034, upload-time = "2021-11-06T17:52:23.524Z" }
-
 [[package]]
 name = "anyio"
 version = "4.11.0"
@@ -917,34 +911,6 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/ce/c6/c71e82e041c95ffe6a92ac707785500aa2a515a4339c2c7dd67e3c449249/cramjam-2.11.0-cp314-cp314t-win_amd64.whl", hash = "sha256:028400d699442d40dbda02f74158c73d05cb76587a12490d0bfedd958fd49188", size = 1713108, upload-time = "2025-07-27T21:24:10.147Z" },
 ]
 
-[[package]]
-name = "cuda-bindings"
-version = "12.9.4"
-source = { registry = "https://pypi.org/simple" }
-dependencies = [
-    { name = "cuda-pathfinder", marker = "sys_platform == 'linux'" },
-]
-wheels = [
-    { url = "https://files.pythonhosted.org/packages/0c/c2/65bfd79292b8ff18be4dd7f7442cea37bcbc1a228c1886f1dea515c45b67/cuda_bindings-12.9.4-cp312-cp312-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:694ba35023846625ef471257e6b5a4bc8af690f961d197d77d34b1d1db393f56", size = 11760260, upload-time = "2025-10-21T14:51:40.79Z" },
-    { url = "https://files.pythonhosted.org/packages/a9/c1/dabe88f52c3e3760d861401bb994df08f672ec893b8f7592dc91626adcf3/cuda_bindings-12.9.4-cp312-cp312-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:fda147a344e8eaeca0c6ff113d2851ffca8f7dfc0a6c932374ee5c47caa649c8", size = 12151019, upload-time = "2025-10-21T14:51:43.167Z" },
-    { url = "https://files.pythonhosted.org/packages/05/8b/b4b2d1c7775fa403b64333e720cfcfccef8dcb9cdeb99947061ca5a77628/cuda_bindings-12.9.4-cp313-cp313-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:cf8bfaedc238f3b115d957d1fd6562b7e8435ba57f6d0e2f87d0e7149ccb2da5", size = 11570071, upload-time = "2025-10-21T14:51:47.472Z" },
-    { url = "https://files.pythonhosted.org/packages/63/56/e465c31dc9111be3441a9ba7df1941fe98f4aa6e71e8788a3fb4534ce24d/cuda_bindings-12.9.4-cp313-cp313-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:32bdc5a76906be4c61eb98f546a6786c5773a881f3b166486449b5d141e4a39f", size = 11906628, upload-time = "2025-10-21T14:51:49.905Z" },
-    { url = "https://files.pythonhosted.org/packages/ec/07/6aff13bc1e977e35aaa6b22f52b172e2890c608c6db22438cf7ed2bf43a6/cuda_bindings-12.9.4-cp313-cp313t-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:3adf4958dcf68ae7801a59b73fb00a8b37f8d0595060d66ceae111b1002de38d", size = 11566797, upload-time = "2025-10-21T14:51:54.581Z" },
-    { url = "https://files.pythonhosted.org/packages/a3/84/1e6be415e37478070aeeee5884c2022713c1ecc735e6d82d744de0252eee/cuda_bindings-12.9.4-cp313-cp313t-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:56e0043c457a99ac473ddc926fe0dc4046694d99caef633e92601ab52cbe17eb", size = 11925991, upload-time = "2025-10-21T14:51:56.535Z" },
-    { url = "https://files.pythonhosted.org/packages/1e/b5/96a6696e20c4ffd2b327f54c7d0fde2259bdb998d045c25d5dedbbe30290/cuda_bindings-12.9.4-cp314-cp314-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:1f53a7f453d4b2643d8663d036bafe29b5ba89eb904c133180f295df6dc151e5", size = 11624530, upload-time = "2025-10-21T14:52:01.539Z" },
-    { url = "https://files.pythonhosted.org/packages/d1/af/6dfd8f2ed90b1d4719bc053ff8940e494640fe4212dc3dd72f383e4992da/cuda_bindings-12.9.4-cp314-cp314-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:8b72ee72a9cc1b531db31eebaaee5c69a8ec3500e32c6933f2d3b15297b53686", size = 11922703, upload-time = "2025-10-21T14:52:03.585Z" },
-    { url = "https://files.pythonhosted.org/packages/39/73/d2fc40c043bac699c3880bf88d3cebe9d88410cd043795382826c93a89f0/cuda_bindings-12.9.4-cp314-cp314t-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:20f2699d61d724de3eb3f3369d57e2b245f93085cab44fd37c3bea036cea1a6f", size = 11565056, upload-time = "2025-10-21T14:52:08.338Z" },
-    { url = "https://files.pythonhosted.org/packages/6c/19/90ac264acc00f6df8a49378eedec9fd2db3061bf9263bf9f39fd3d8377c3/cuda_bindings-12.9.4-cp314-cp314t-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:d80bffc357df9988dca279734bc9674c3934a654cab10cadeed27ce17d8635ee", size = 11924658, upload-time = "2025-10-21T14:52:10.411Z" },
-]
-
-[[package]]
-name = "cuda-pathfinder"
-version = "1.4.1"
-source = { registry = "https://pypi.org/simple" }
-wheels = [
-    { url = "https://files.pythonhosted.org/packages/07/02/59a5bc738a09def0b49aea0e460bdf97f65206d0d041246147cf6207e69c/cuda_pathfinder-1.4.1-py3-none-any.whl", hash = "sha256:40793006082de88e0950753655e55558a446bed9a7d9d0bcb48b2506d50ed82a", size = 43903, upload-time = "2026-03-06T21:05:24.372Z" },
-]
-
 [[package]]
 name = "cycler"
 version = "0.12.1"
@@ -1002,15 +968,6 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/33/6b/e0547afaf41bf2c42e52430072fa5658766e3d65bd4b03a563d1b6336f57/distlib-0.4.0-py2.py3-none-any.whl", hash = "sha256:9659f7d87e46584a30b5780e43ac7a2143098441670ff0a49d5f9034c54a6c16", size = 469047, upload-time = "2025-07-17T16:51:58.613Z" },
 ]
 
-[[package]]
-name = "einops"
-version = "0.8.2"
-source = { registry = "https://pypi.org/simple" }
-sdist = { url = "https://files.pythonhosted.org/packages/2c/77/850bef8d72ffb9219f0b1aac23fbc1bf7d038ee6ea666f331fa273031aa2/einops-0.8.2.tar.gz", hash = "sha256:609da665570e5e265e27283aab09e7f279ade90c4f01bcfca111f3d3e13f2827", size = 56261, upload-time = "2026-01-26T04:13:17.638Z" }
-wheels = [
-    { url = "https://files.pythonhosted.org/packages/2a/09/f8d8f8f31e4483c10a906437b4ce31bdf3d6d417b73fe33f1a8b59e34228/einops-0.8.2-py3-none-any.whl", hash = "sha256:54058201ac7087911181bfec4af6091bb59380360f069276601256a76af08193", size = 65638, upload-time = "2026-01-26T04:13:18.546Z" },
-]
-
 [[package]]
 name = "energy-pandas"
 version = "0.4.1"
@@ -1347,11 +1304,6 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/47/71/70db47e4f6ce3e5c37a607355f80da8860a33226be640226ac52cb05ef2e/fsspec-2025.9.0-py3-none-any.whl", hash = "sha256:530dc2a2af60a414a832059574df4a6e10cce927f6f4a78209390fe38955cfb7", size = 199289, upload-time = "2025-09-02T19:10:47.708Z" },
 ]
 
-[package.optional-dependencies]
-http = [
-    { name = "aiohttp" },
-]
-
 [[package]]
 name = "future"
 version = "1.0.0"
@@ -1441,10 +1393,7 @@ cli = [
 ]
 ml-gpu = [
     { name = "lightgbm" },
-    { name = "pytorch-tabular" },
     { name = "tensorboard" },
-    { name = "torch", version = "2.10.0", source = { registry = "https://pypi.org/simple" }, marker = "sys_platform == 'darwin'" },
-    { name = "torch", version = "2.10.0+cu128", source = { registry = "https://download.pytorch.org/whl/cu128" }, marker = "sys_platform != 'darwin'" },
     { name = "wandb" },
     { name = "xgboost" },
 ]
@@ -1498,7 +1447,6 @@ requires-dist = [
     { name = "plotly", marker = "extra == 'visualization'", specifier = ">=5.18.0" },
     { name = "pydantic", specifier = ">=2.11,<3" },
     { name = "pyproj", specifier = ">=3.6.0" },
-    { name = "pytorch-tabular", marker = "extra == 'ml-gpu'", specifier = ">=1.2.0" },
     { name = "rasterio", marker = "extra == 'visualization'", specifier = ">=1.3.9" },
     { name = "scikit-learn", specifier = ">=1.3.0" },
     { name = "scipy", specifier = ">=1.11.0,<1.15" },
@@ -1507,8 +1455,6 @@ requires-dist = [
     { name = "shapely", specifier = ">=2.0.0" },
     { name = "streamlit", marker = "extra == 'visualization'", specifier = ">=1.28.0" },
     { name = "tensorboard", marker = "extra == 'ml-gpu'", specifier = ">=2.20.0" },
-    { name = "torch", marker = "sys_platform == 'darwin' and extra == 'ml-gpu'", specifier = ">=2.5.0", index = "https://pypi.org/simple", conflict = { package = "globi", extra = "ml-gpu" } },
-    { name = "torch", marker = "sys_platform != 'darwin' and extra == 'ml-gpu'", specifier = ">=2.5.0", index = "https://download.pytorch.org/whl/cu128", conflict = { package = "globi", extra = "ml-gpu" } },
     { name = "wandb", marker = "extra == 'ml-gpu'", specifier = ">=0.25.0" },
     { name = "xgboost", marker = "extra == 'ml-gpu'", specifier = ">=3.2.0" },
     { name = "xlsxwriter", marker = "extra == 'cli'", specifier = ">=3.2.9" },
@@ -2249,19 +2195,6 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/5e/23/f8b28ca248bb629b9e08f877dd2965d1994e1674a03d67cd10c5246da248/lightgbm-4.6.0-py3-none-win_amd64.whl", hash = "sha256:37089ee95664b6550a7189d887dbf098e3eadab03537e411f52c63c121e3ba4b", size = 1451509, upload-time = "2025-02-15T04:03:01.515Z" },
 ]
 
-[[package]]
-name = "lightning-utilities"
-version = "0.15.3"
-source = { registry = "https://pypi.org/simple" }
-dependencies = [
-    { name = "packaging" },
-    { name = "typing-extensions" },
-]
-sdist = { url = "https://files.pythonhosted.org/packages/f1/45/7fa8f56b17dc0f0a41ec70dd307ecd6787254483549843bef4c30ab5adce/lightning_utilities-0.15.3.tar.gz", hash = "sha256:792ae0204c79f6859721ac7f386c237a33b0ed06ba775009cb894e010a842033", size = 33553, upload-time = "2026-02-22T14:48:53.348Z" }
-wheels = [
-    { url = "https://files.pythonhosted.org/packages/25/f4/ead6e0e37209b07c9baa3e984ccdb0348ca370b77cea3aaea8ddbb097e00/lightning_utilities-0.15.3-py3-none-any.whl", hash = "sha256:6c55f1bee70084a1cbeaa41ada96e4b3a0fea5909e844dd335bd80f5a73c5f91", size = 31906, upload-time = "2026-02-22T14:48:52.488Z" },
-]
-
 [[package]]
 name = "littleutils"
 version = "0.2.4"
@@ -2342,18 +2275,6 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/59/1b/6ef961f543593969d25b2afe57a3564200280528caa9bd1082eecdd7b3bc/markdown-3.10.1-py3-none-any.whl", hash = "sha256:867d788939fe33e4b736426f5b9f651ad0c0ae0ecf89df0ca5d1176c70812fe3", size = 107684, upload-time = "2026-01-21T18:09:27.203Z" },
 ]
 
-[[package]]
-name = "markdown-it-py"
-version = "4.0.0"
-source = { registry = "https://pypi.org/simple" }
-dependencies = [
-    { name = "mdurl" },
-]
-sdist = { url = "https://files.pythonhosted.org/packages/5b/f5/4ec618ed16cc4f8fb3b701563655a69816155e79e24a17b651541804721d/markdown_it_py-4.0.0.tar.gz", hash = "sha256:cb0a2b4aa34f932c007117b194e945bd74e0ec24133ceb5bac59009cda1cb9f3", size = 73070, upload-time = "2025-08-11T12:57:52.854Z" }
-wheels = [
-    { url = "https://files.pythonhosted.org/packages/94/54/e7d793b573f298e1c9013b8c4dade17d481164aa517d1d7148619c2cedbf/markdown_it_py-4.0.0-py3-none-any.whl", hash = "sha256:87327c59b172c5011896038353a81343b6754500a08cd7a4973bb48c6d578147", size = 87321, upload-time = "2025-08-11T12:57:51.923Z" },
-]
-
 [[package]]
 name = "markupsafe"
 version = "3.0.3"
@@ -2483,15 +2404,6 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/af/33/ee4519fa02ed11a94aef9559552f3b17bb863f2ecfe1a35dc7f548cde231/matplotlib_inline-0.2.1-py3-none-any.whl", hash = "sha256:d56ce5156ba6085e00a9d54fead6ed29a9c47e215cd1bba2e976ef39f5710a76", size = 9516, upload-time = "2025-10-23T09:00:20.675Z" },
 ]
 
-[[package]]
-name = "mdurl"
-version = "0.1.2"
-source = { registry = "https://pypi.org/simple" }
-sdist = { url = "https://files.pythonhosted.org/packages/d6/54/cfe61301667036ec958cb99bd3efefba235e65cdeb9c84d24a8293ba1d90/mdurl-0.1.2.tar.gz", hash = "sha256:bb413d29f5eea38f31dd4754dd7377d4465116fb207585f97bf925588687c1ba", size = 8729, upload-time = "2022-08-14T12:40:10.846Z" }
-wheels = [
-    { url = "https://files.pythonhosted.org/packages/b3/38/89ba8ad64ae25be8de66a6d463314cf1eb366222074cfda9ee839c56a4b4/mdurl-0.1.2-py3-none-any.whl", hash = "sha256:84008a41e51615a49fc9966191ff91509e3c40b939176e643fd50a5c2196b8f8", size = 9979, upload-time = "2022-08-14T12:40:09.779Z" },
-]
-
 [[package]]
 name = "mergedeep"
 version = "1.3.4"
@@ -2642,15 +2554,6 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/98/5c/2597cef67b6947b15c47f8dba967a0baf19fbdfdc86f6e4a8ba7af8b581a/mkdocstrings_python-1.19.0-py3-none-any.whl", hash = "sha256:395c1032af8f005234170575cc0c5d4d20980846623b623b35594281be4a3059", size = 143417, upload-time = "2025-11-10T13:30:54.164Z" },
 ]
 
-[[package]]
-name = "mpmath"
-version = "1.3.0"
-source = { registry = "https://pypi.org/simple" }
-sdist = { url = "https://files.pythonhosted.org/packages/e0/47/dd32fa426cc72114383ac549964eecb20ecfd886d1e5ccf5340b55b02f57/mpmath-1.3.0.tar.gz", hash = "sha256:7a28eb2a9774d00c7bc92411c19a89209d5da7c4c9a9e227be8330a23a25b91f", size = 508106, upload-time = "2023-03-07T16:47:11.061Z" }
-wheels = [
-    { url = "https://files.pythonhosted.org/packages/43/e3/7d92a15f894aa0c9c4b49b8ee9ac9850d6e63b03c9c32c0367a13ae62209/mpmath-1.3.0-py3-none-any.whl", hash = "sha256:a0b2b9fe80bbcd81a6647ff13108738cfb482d481d826cc0e02f5b35e5c88d2c", size = 536198, upload-time = "2023-03-07T16:47:09.197Z" },
-]
-
 [[package]]
 name = "msgpack"
 version = "1.1.2"
@@ -3128,119 +3031,6 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/67/0e/35082d13c09c02c011cf21570543d202ad929d961c02a147493cb0c2bdf5/numpy-2.2.6-cp313-cp313t-win_amd64.whl", hash = "sha256:6031dd6dfecc0cf9f668681a37648373bddd6421fff6c66ec1624eed0180ee06", size = 12771374, upload-time = "2025-05-17T21:43:35.479Z" },
 ]
 
-[[package]]
-name = "nvidia-cublas-cu12"
-version = "12.8.4.1"
-source = { registry = "https://pypi.org/simple" }
-wheels = [
-    { url = "https://files.pythonhosted.org/packages/29/99/db44d685f0e257ff0e213ade1964fc459b4a690a73293220e98feb3307cf/nvidia_cublas_cu12-12.8.4.1-py3-none-manylinux_2_27_aarch64.whl", hash = "sha256:b86f6dd8935884615a0683b663891d43781b819ac4f2ba2b0c9604676af346d0", size = 590537124, upload-time = "2025-03-07T01:43:53.556Z" },
-    { url = "https://files.pythonhosted.org/packages/dc/61/e24b560ab2e2eaeb3c839129175fb330dfcfc29e5203196e5541a4c44682/nvidia_cublas_cu12-12.8.4.1-py3-none-manylinux_2_27_x86_64.whl", hash = "sha256:8ac4e771d5a348c551b2a426eda6193c19aa630236b418086020df5ba9667142", size = 594346921, upload-time = "2025-03-07T01:44:31.254Z" },
-]
-
-[[package]]
-name = "nvidia-cuda-cupti-cu12"
-version = "12.8.90"
-source = { registry = "https://pypi.org/simple" }
-wheels = [
-    { url = "https://files.pythonhosted.org/packages/d5/1f/b3bd73445e5cb342727fd24fe1f7b748f690b460acadc27ea22f904502c8/nvidia_cuda_cupti_cu12-12.8.90-py3-none-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:4412396548808ddfed3f17a467b104ba7751e6b58678a4b840675c56d21cf7ed", size = 9533318, upload-time = "2025-03-07T01:40:10.421Z" },
-    { url = "https://files.pythonhosted.org/packages/f8/02/2adcaa145158bf1a8295d83591d22e4103dbfd821bcaf6f3f53151ca4ffa/nvidia_cuda_cupti_cu12-12.8.90-py3-none-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:ea0cb07ebda26bb9b29ba82cda34849e73c166c18162d3913575b0c9db9a6182", size = 10248621, upload-time = "2025-03-07T01:40:21.213Z" },
-]
-
-[[package]]
-name = "nvidia-cuda-nvrtc-cu12"
-version = "12.8.93"
-source = { registry = "https://pypi.org/simple" }
-wheels = [
-    { url = "https://files.pythonhosted.org/packages/05/6b/32f747947df2da6994e999492ab306a903659555dddc0fbdeb9d71f75e52/nvidia_cuda_nvrtc_cu12-12.8.93-py3-none-manylinux2010_x86_64.manylinux_2_12_x86_64.whl", hash = "sha256:a7756528852ef889772a84c6cd89d41dfa74667e24cca16bb31f8f061e3e9994", size = 88040029, upload-time = "2025-03-07T01:42:13.562Z" },
-    { url = "https://files.pythonhosted.org/packages/eb/d1/e50d0acaab360482034b84b6e27ee83c6738f7d32182b987f9c7a4e32962/nvidia_cuda_nvrtc_cu12-12.8.93-py3-none-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:fc1fec1e1637854b4c0a65fb9a8346b51dd9ee69e61ebaccc82058441f15bce8", size = 43106076, upload-time = "2025-03-07T01:41:59.817Z" },
-]
-
-[[package]]
-name = "nvidia-cuda-runtime-cu12"
-version = "12.8.90"
-source = { registry = "https://pypi.org/simple" }
-wheels = [
-    { url = "https://files.pythonhosted.org/packages/7c/75/f865a3b236e4647605ea34cc450900854ba123834a5f1598e160b9530c3a/nvidia_cuda_runtime_cu12-12.8.90-py3-none-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:52bf7bbee900262ffefe5e9d5a2a69a30d97e2bc5bb6cc866688caa976966e3d", size = 965265, upload-time = "2025-03-07T01:39:43.533Z" },
-    { url = "https://files.pythonhosted.org/packages/0d/9b/a997b638fcd068ad6e4d53b8551a7d30fe8b404d6f1804abf1df69838932/nvidia_cuda_runtime_cu12-12.8.90-py3-none-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:adade8dcbd0edf427b7204d480d6066d33902cab2a4707dcfc48a2d0fd44ab90", size = 954765, upload-time = "2025-03-07T01:40:01.615Z" },
-]
-
-[[package]]
-name = "nvidia-cudnn-cu12"
-version = "9.10.2.21"
-source = { registry = "https://pypi.org/simple" }
-dependencies = [
-    { name = "nvidia-cublas-cu12", marker = "sys_platform == 'linux'" },
-]
-wheels = [
-    { url = "https://files.pythonhosted.org/packages/fa/41/e79269ce215c857c935fd86bcfe91a451a584dfc27f1e068f568b9ad1ab7/nvidia_cudnn_cu12-9.10.2.21-py3-none-manylinux_2_27_aarch64.whl", hash = "sha256:c9132cc3f8958447b4910a1720036d9eff5928cc3179b0a51fb6d167c6cc87d8", size = 705026878, upload-time = "2025-06-06T21:52:51.348Z" },
-    { url = "https://files.pythonhosted.org/packages/ba/51/e123d997aa098c61d029f76663dedbfb9bc8dcf8c60cbd6adbe42f76d049/nvidia_cudnn_cu12-9.10.2.21-py3-none-manylinux_2_27_x86_64.whl", hash = "sha256:949452be657fa16687d0930933f032835951ef0892b37d2d53824d1a84dc97a8", size = 706758467, upload-time = "2025-06-06T21:54:08.597Z" },
-]
-
-[[package]]
-name = "nvidia-cufft-cu12"
-version = "11.3.3.83"
-source = { registry = "https://pypi.org/simple" }
-dependencies = [
-    { name = "nvidia-nvjitlink-cu12", marker = "sys_platform == 'linux'" },
-]
-wheels = [
-    { url = "https://files.pythonhosted.org/packages/60/bc/7771846d3a0272026c416fbb7e5f4c1f146d6d80704534d0b187dd6f4800/nvidia_cufft_cu12-11.3.3.83-py3-none-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:848ef7224d6305cdb2a4df928759dca7b1201874787083b6e7550dd6765ce69a", size = 193109211, upload-time = "2025-03-07T01:44:56.873Z" },
-    { url = "https://files.pythonhosted.org/packages/1f/13/ee4e00f30e676b66ae65b4f08cb5bcbb8392c03f54f2d5413ea99a5d1c80/nvidia_cufft_cu12-11.3.3.83-py3-none-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:4d2dd21ec0b88cf61b62e6b43564355e5222e4a3fb394cac0db101f2dd0d4f74", size = 193118695, upload-time = "2025-03-07T01:45:27.821Z" },
-]
-
-[[package]]
-name = "nvidia-cufile-cu12"
-version = "1.13.1.3"
-source = { registry = "https://pypi.org/simple" }
-wheels = [
-    { url = "https://files.pythonhosted.org/packages/bb/fe/1bcba1dfbfb8d01be8d93f07bfc502c93fa23afa6fd5ab3fc7c1df71038a/nvidia_cufile_cu12-1.13.1.3-py3-none-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:1d069003be650e131b21c932ec3d8969c1715379251f8d23a1860554b1cb24fc", size = 1197834, upload-time = "2025-03-07T01:45:50.723Z" },
-    { url = "https://files.pythonhosted.org/packages/1e/f5/5607710447a6fe9fd9b3283956fceeee8a06cda1d2f56ce31371f595db2a/nvidia_cufile_cu12-1.13.1.3-py3-none-manylinux_2_27_aarch64.whl", hash = "sha256:4beb6d4cce47c1a0f1013d72e02b0994730359e17801d395bdcbf20cfb3bb00a", size = 1120705, upload-time = "2025-03-07T01:45:41.434Z" },
-]
-
-[[package]]
-name = "nvidia-curand-cu12"
-version = "10.3.9.90"
-source = { registry = "https://pypi.org/simple" }
-wheels = [
-    { url = "https://files.pythonhosted.org/packages/45/5e/92aa15eca622a388b80fbf8375d4760738df6285b1e92c43d37390a33a9a/nvidia_curand_cu12-10.3.9.90-py3-none-manylinux_2_27_aarch64.whl", hash = "sha256:dfab99248034673b779bc6decafdc3404a8a6f502462201f2f31f11354204acd", size = 63625754, upload-time = "2025-03-07T01:46:10.735Z" },
-    { url = "https://files.pythonhosted.org/packages/fb/aa/6584b56dc84ebe9cf93226a5cde4d99080c8e90ab40f0c27bda7a0f29aa1/nvidia_curand_cu12-10.3.9.90-py3-none-manylinux_2_27_x86_64.whl", hash = "sha256:b32331d4f4df5d6eefa0554c565b626c7216f87a06a4f56fab27c3b68a830ec9", size = 63619976, upload-time = "2025-03-07T01:46:23.323Z" },
-]
-
-[[package]]
-name = "nvidia-cusolver-cu12"
-version = "11.7.3.90"
-source = { registry = "https://pypi.org/simple" }
-dependencies = [
-    { name = "nvidia-cublas-cu12", marker = "sys_platform == 'linux'" },
-    { name = "nvidia-cusparse-cu12", marker = "sys_platform == 'linux'" },
-    { name = "nvidia-nvjitlink-cu12", marker = "sys_platform == 'linux'" },
-]
-wheels = [
-    { url = "https://files.pythonhosted.org/packages/c8/32/f7cd6ce8a7690544d084ea21c26e910a97e077c9b7f07bf5de623ee19981/nvidia_cusolver_cu12-11.7.3.90-py3-none-manylinux_2_27_aarch64.whl", hash = "sha256:db9ed69dbef9715071232caa9b69c52ac7de3a95773c2db65bdba85916e4e5c0", size = 267229841, upload-time = "2025-03-07T01:46:54.356Z" },
-    { url = "https://files.pythonhosted.org/packages/85/48/9a13d2975803e8cf2777d5ed57b87a0b6ca2cc795f9a4f59796a910bfb80/nvidia_cusolver_cu12-11.7.3.90-py3-none-manylinux_2_27_x86_64.whl", hash = "sha256:4376c11ad263152bd50ea295c05370360776f8c3427b30991df774f9fb26c450", size = 267506905, upload-time = "2025-03-07T01:47:16.273Z" },
-]
-
-[[package]]
-name = "nvidia-cusparse-cu12"
-version = "12.5.8.93"
-source = { registry = "https://pypi.org/simple" }
-dependencies = [
-    { name = "nvidia-nvjitlink-cu12", marker = "sys_platform == 'linux'" },
-]
-wheels = [
-    { url = "https://files.pythonhosted.org/packages/bc/f7/cd777c4109681367721b00a106f491e0d0d15cfa1fd59672ce580ce42a97/nvidia_cusparse_cu12-12.5.8.93-py3-none-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:9b6c161cb130be1a07a27ea6923df8141f3c295852f4b260c65f18f3e0a091dc", size = 288117129, upload-time = "2025-03-07T01:47:40.407Z" },
-    { url = "https://files.pythonhosted.org/packages/c2/f5/e1854cb2f2bcd4280c44736c93550cc300ff4b8c95ebe370d0aa7d2b473d/nvidia_cusparse_cu12-12.5.8.93-py3-none-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:1ec05d76bbbd8b61b06a80e1eaf8cf4959c3d4ce8e711b65ebd0443bb0ebb13b", size = 288216466, upload-time = "2025-03-07T01:48:13.779Z" },
-]
-
-[[package]]
-name = "nvidia-cusparselt-cu12"
-version = "0.7.1"
-source = { registry = "https://pypi.org/simple" }
-wheels = [
-    { url = "https://files.pythonhosted.org/packages/73/b9/598f6ff36faaece4b3c50d26f50e38661499ff34346f00e057760b35cc9d/nvidia_cusparselt_cu12-0.7.1-py3-none-manylinux2014_aarch64.whl", hash = "sha256:8878dce784d0fac90131b6817b607e803c36e629ba34dc5b433471382196b6a5", size = 283835557, upload-time = "2025-02-26T00:16:54.265Z" },
-    { url = "https://files.pythonhosted.org/packages/56/79/12978b96bd44274fe38b5dde5cfb660b1d114f70a65ef962bcbbed99b549/nvidia_cusparselt_cu12-0.7.1-py3-none-manylinux2014_x86_64.whl", hash = "sha256:f1bb701d6b930d5a7cea44c19ceb973311500847f81b634d802b7b539dc55623", size = 287193691, upload-time = "2025-02-26T00:15:44.104Z" },
-]
-
 [[package]]
 name = "nvidia-nccl-cu12"
 version = "2.27.5"
@@ -3250,46 +3040,6 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/6e/89/f7a07dc961b60645dbbf42e80f2bc85ade7feb9a491b11a1e973aa00071f/nvidia_nccl_cu12-2.27.5-py3-none-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:ad730cf15cb5d25fe849c6e6ca9eb5b76db16a80f13f425ac68d8e2e55624457", size = 322348229, upload-time = "2025-06-26T04:11:28.385Z" },
 ]
 
-[[package]]
-name = "nvidia-nvjitlink-cu12"
-version = "12.8.93"
-source = { registry = "https://pypi.org/simple" }
-wheels = [
-    { url = "https://files.pythonhosted.org/packages/f6/74/86a07f1d0f42998ca31312f998bd3b9a7eff7f52378f4f270c8679c77fb9/nvidia_nvjitlink_cu12-12.8.93-py3-none-manylinux2010_x86_64.manylinux_2_12_x86_64.whl", hash = "sha256:81ff63371a7ebd6e6451970684f916be2eab07321b73c9d244dc2b4da7f73b88", size = 39254836, upload-time = "2025-03-07T01:49:55.661Z" },
-    { url = "https://files.pythonhosted.org/packages/2a/a2/8cee5da30d13430e87bf99bb33455d2724d0a4a9cb5d7926d80ccb96d008/nvidia_nvjitlink_cu12-12.8.93-py3-none-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:adccd7161ace7261e01bb91e44e88da350895c270d23f744f0820c818b7229e7", size = 38386204, upload-time = "2025-03-07T01:49:43.612Z" },
-]
-
-[[package]]
-name = "nvidia-nvshmem-cu12"
-version = "3.4.5"
-source = { registry = "https://pypi.org/simple" }
-wheels = [
-    { url = "https://files.pythonhosted.org/packages/1d/6a/03aa43cc9bd3ad91553a88b5f6fb25ed6a3752ae86ce2180221962bc2aa5/nvidia_nvshmem_cu12-3.4.5-py3-none-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:0b48363fc6964dede448029434c6abed6c5e37f823cb43c3bcde7ecfc0457e15", size = 138936938, upload-time = "2025-09-06T00:32:05.589Z" },
-    { url = "https://files.pythonhosted.org/packages/b5/09/6ea3ea725f82e1e76684f0708bbedd871fc96da89945adeba65c3835a64c/nvidia_nvshmem_cu12-3.4.5-py3-none-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:042f2500f24c021db8a06c5eec2539027d57460e1c1a762055a6554f72c369bd", size = 139103095, upload-time = "2025-09-06T00:32:31.266Z" },
-]
-
-[[package]]
-name = "nvidia-nvtx-cu12"
-version = "12.8.90"
-source = { registry = "https://pypi.org/simple" }
-wheels = [
-    { url = "https://files.pythonhosted.org/packages/10/c0/1b303feea90d296f6176f32a2a70b5ef230f9bdeb3a72bddb0dc922dc137/nvidia_nvtx_cu12-12.8.90-py3-none-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:d7ad891da111ebafbf7e015d34879f7112832fc239ff0d7d776b6cb685274615", size = 91161, upload-time = "2025-03-07T01:42:23.922Z" },
-    { url = "https://files.pythonhosted.org/packages/a2/eb/86626c1bbc2edb86323022371c39aa48df6fd8b0a1647bc274577f72e90b/nvidia_nvtx_cu12-12.8.90-py3-none-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:5b17e2001cc0d751a5bc2c6ec6d26ad95913324a4adb86788c944f8ce9ba441f", size = 89954, upload-time = "2025-03-07T01:42:44.131Z" },
-]
-
-[[package]]
-name = "omegaconf"
-version = "2.3.0"
-source = { registry = "https://pypi.org/simple" }
-dependencies = [
-    { name = "antlr4-python3-runtime" },
-    { name = "pyyaml" },
-]
-sdist = { url = "https://files.pythonhosted.org/packages/09/48/6388f1bb9da707110532cb70ec4d2822858ddfb44f1cdf1233c20a80ea4b/omegaconf-2.3.0.tar.gz", hash = "sha256:d5d4b6d29955cc50ad50c46dc269bcd92c6e00f5f90d23ab5fee7bfca4ba4cc7", size = 3298120, upload-time = "2022-12-08T20:59:22.753Z" }
-wheels = [
-    { url = "https://files.pythonhosted.org/packages/e3/94/1843518e420fa3ed6919835845df698c7e27e183cb997394e4a670973a65/omegaconf-2.3.0-py3-none-any.whl", hash = "sha256:7b4df175cdb08ba400f45cae3bdcae7ba8365db4d165fc65fd04b050ab63b46b", size = 79500, upload-time = "2022-12-08T20:59:19.686Z" },
-]
-
 [[package]]
 name = "openpyxl"
 version = "3.1.5"
@@ -4165,49 +3915,6 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/51/e5/fecf13f06e5e5f67e8837d777d1bc43fac0ed2b77a676804df5c34744727/python_json_logger-4.0.0-py3-none-any.whl", hash = "sha256:af09c9daf6a813aa4cc7180395f50f2a9e5fa056034c9953aec92e381c5ba1e2", size = 15548, upload-time = "2025-10-06T04:15:17.553Z" },
 ]
 
-[[package]]
-name = "pytorch-lightning"
-version = "2.6.1"
-source = { registry = "https://pypi.org/simple" }
-dependencies = [
-    { name = "fsspec", extra = ["http"] },
-    { name = "lightning-utilities" },
-    { name = "packaging" },
-    { name = "pyyaml" },
-    { name = "torch", version = "2.10.0", source = { registry = "https://pypi.org/simple" }, marker = "sys_platform == 'darwin'" },
-    { name = "torch", version = "2.10.0+cu128", source = { registry = "https://download.pytorch.org/whl/cu128" }, marker = "sys_platform != 'darwin'" },
-    { name = "torchmetrics" },
-    { name = "tqdm" },
-    { name = "typing-extensions" },
-]
-sdist = { url = "https://files.pythonhosted.org/packages/8b/ac/ebd5f6f58691cbd4f73836e43e1727f3814311b960c41f88e259606ca2b2/pytorch_lightning-2.6.1.tar.gz", hash = "sha256:ba08f8901cf226fcca473046ad9346f414e99117762dc869c76e650d5b3d7bdc", size = 665563, upload-time = "2026-01-30T14:59:11.636Z" }
-wheels = [
-    { url = "https://files.pythonhosted.org/packages/0e/93/c8c361bf0a2fe50f828f32def460e8b8a14b93955d3fd302b1a9b63b19e4/pytorch_lightning-2.6.1-py3-none-any.whl", hash = "sha256:1f8118567ec829e3055f16cf1aa320883a86a47c836951bfd9dcfa34ec7ffd59", size = 857273, upload-time = "2026-01-30T14:59:10.141Z" },
-]
-
-[[package]]
-name = "pytorch-tabular"
-version = "1.2.0"
-source = { registry = "https://pypi.org/simple" }
-dependencies = [
-    { name = "einops" },
-    { name = "numpy" },
-    { name = "omegaconf" },
-    { name = "pandas" },
-    { name = "pytorch-lightning" },
-    { name = "rich" },
-    { name = "scikit-base" },
-    { name = "scikit-learn" },
-    { name = "scipy" },
-    { name = "torch", version = "2.10.0", source = { registry = "https://pypi.org/simple" }, marker = "sys_platform == 'darwin'" },
-    { name = "torch", version = "2.10.0+cu128", source = { registry = "https://download.pytorch.org/whl/cu128" }, marker = "sys_platform != 'darwin'" },
-    { name = "torchmetrics" },
-]
-sdist = { url = "https://files.pythonhosted.org/packages/0b/f2/823de16d6a461504f4ed8e4a555d6ce356e5f81e6525d95e2b64895ec94f/pytorch_tabular-1.2.0.tar.gz", hash = "sha256:1b96b576eb3de443840b313d0b298293eaf83dcfdbba53ed8974b76d1351b821", size = 2312825, upload-time = "2026-01-26T21:48:22.577Z" }
-wheels = [
-    { url = "https://files.pythonhosted.org/packages/6f/c9/1e01c682e2ad7132bc1943d8d367c96f241bf85679e76d66eb0c4e4cbde9/pytorch_tabular-1.2.0-py3-none-any.whl", hash = "sha256:0a59f8a2304856b3d1e905f7b66153ebc65df1a6a017f2c8a13a29f62dc95b26", size = 165800, upload-time = "2026-01-26T21:48:21.195Z" },
-]
-
 [[package]]
 name = "pytz"
 version = "2025.2"
@@ -4386,19 +4093,6 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/1e/db/4254e3eabe8020b458f1a747140d32277ec7a271daf1d235b70dc0b4e6e3/requests-2.32.5-py3-none-any.whl", hash = "sha256:2462f94637a34fd532264295e186976db0f5d453d1cdd31473c85a6a161affb6", size = 64738, upload-time = "2025-08-18T20:46:00.542Z" },
 ]
 
-[[package]]
-name = "rich"
-version = "14.3.3"
-source = { registry = "https://pypi.org/simple" }
-dependencies = [
-    { name = "markdown-it-py" },
-    { name = "pygments" },
-]
-sdist = { url = "https://files.pythonhosted.org/packages/b3/c6/f3b320c27991c46f43ee9d856302c70dc2d0fb2dba4842ff739d5f46b393/rich-14.3.3.tar.gz", hash = "sha256:b8daa0b9e4eef54dd8cf7c86c03713f53241884e814f4e2f5fb342fe520f639b", size = 230582, upload-time = "2026-02-19T17:23:12.474Z" }
-wheels = [
-    { url = "https://files.pythonhosted.org/packages/14/25/b208c5683343959b670dc001595f2f3737e051da617f66c31f7c4fa93abc/rich-14.3.3-py3-none-any.whl", hash = "sha256:793431c1f8619afa7d3b52b2cdec859562b950ea0d4b6b505397612db8d5362d", size = 310458, upload-time = "2026-02-19T17:23:13.732Z" },
-]
-
 [[package]]
 name = "rpds-py"
 version = "0.28.0"
@@ -4518,15 +4212,6 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/48/f0/ae7ca09223a81a1d890b2557186ea015f6e0502e9b8cb8e1813f1d8cfa4e/s3transfer-0.14.0-py3-none-any.whl", hash = "sha256:ea3b790c7077558ed1f02a3072fb3cb992bbbd253392f4b6e9e8976941c7d456", size = 85712, upload-time = "2025-09-09T19:23:30.041Z" },
 ]
 
-[[package]]
-name = "scikit-base"
-version = "0.13.1"
-source = { registry = "https://pypi.org/simple" }
-sdist = { url = "https://files.pythonhosted.org/packages/56/a8/610f99f01f326178b8a7347db2ede654b42548e9697b516480cc081e344d/scikit_base-0.13.1.tar.gz", hash = "sha256:169e5427233f7237b38c7d858bf07b8a86bbf59feccf0708e26dad4ac312c593", size = 134482, upload-time = "2026-01-25T11:31:38.814Z" }
-wheels = [
-    { url = "https://files.pythonhosted.org/packages/e3/55/c20d8319aab037e11f1d6403b6102d1041694abe24a3aa4a1e27f2cdb9f2/scikit_base-0.13.1-py3-none-any.whl", hash = "sha256:1aca86759435fd2d32d83a526ce11095119c0745e4e5dd91f2e5820023ca8e39", size = 159779, upload-time = "2026-01-25T11:31:36.759Z" },
-]
-
 [[package]]
 name = "scikit-learn"
 version = "1.7.2"
@@ -4801,18 +4486,6 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/39/60/868371b6482ccd9ef423c6f62650066cf8271fdb2ee84f192695ad6b7a96/streamlit-1.51.0-py3-none-any.whl", hash = "sha256:4008b029f71401ce54946bb09a6a3e36f4f7652cbb48db701224557738cfda38", size = 10171702, upload-time = "2025-10-29T17:07:35.97Z" },
 ]
 
-[[package]]
-name = "sympy"
-version = "1.14.0"
-source = { registry = "https://pypi.org/simple" }
-dependencies = [
-    { name = "mpmath" },
-]
-sdist = { url = "https://files.pythonhosted.org/packages/83/d3/803453b36afefb7c2bb238361cd4ae6125a569b4db67cd9e79846ba2d68c/sympy-1.14.0.tar.gz", hash = "sha256:d3d3fe8df1e5a0b42f0e7bdf50541697dbe7d23746e894990c030e2b05e72517", size = 7793921, upload-time = "2025-04-27T18:05:01.611Z" }
-wheels = [
-    { url = "https://files.pythonhosted.org/packages/a2/09/77d55d46fd61b4a135c444fc97158ef34a095e5681d0a6c10b75bf356191/sympy-1.14.0-py3-none-any.whl", hash = "sha256:e091cc3e99d2141a0ba2847328f5479b05d94a6635cb96148ccb3f34671bd8f5", size = 6299353, upload-time = "2025-04-27T18:04:59.103Z" },
-]
-
 [[package]]
 name = "tables"
 version = "3.10.2"
@@ -4987,103 +4660,6 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/bd/75/8539d011f6be8e29f339c42e633aae3cb73bffa95dd0f9adec09b9c58e85/tomlkit-0.13.3-py3-none-any.whl", hash = "sha256:c89c649d79ee40629a9fda55f8ace8c6a1b42deb912b2a8fd8d942ddadb606b0", size = 38901, upload-time = "2025-06-05T07:13:43.546Z" },
 ]
 
-[[package]]
-name = "torch"
-version = "2.10.0"
-source = { registry = "https://pypi.org/simple" }
-resolution-markers = [
-    "python_full_version >= '3.14' and sys_platform == 'darwin'",
-    "python_full_version < '3.14' and sys_platform == 'darwin'",
-]
-dependencies = [
-    { name = "filelock", marker = "sys_platform == 'darwin'" },
-    { name = "fsspec", marker = "sys_platform == 'darwin'" },
-    { name = "jinja2", marker = "sys_platform == 'darwin'" },
-    { name = "networkx", marker = "sys_platform == 'darwin'" },
-    { name = "setuptools", marker = "sys_platform == 'darwin'" },
-    { name = "sympy", marker = "sys_platform == 'darwin'" },
-    { name = "typing-extensions", marker = "sys_platform == 'darwin'" },
-]
-wheels = [
-    { url = "https://files.pythonhosted.org/packages/d3/54/a2ba279afcca44bbd320d4e73675b282fcee3d81400ea1b53934efca6462/torch-2.10.0-2-cp312-none-macosx_11_0_arm64.whl", hash = "sha256:13ec4add8c3faaed8d13e0574f5cd4a323c11655546f91fbe6afa77b57423574", size = 79498202, upload-time = "2026-02-10T21:44:52.603Z" },
-    { url = "https://files.pythonhosted.org/packages/ec/23/2c9fe0c9c27f7f6cb865abcea8a4568f29f00acaeadfc6a37f6801f84cb4/torch-2.10.0-2-cp313-none-macosx_11_0_arm64.whl", hash = "sha256:e521c9f030a3774ed770a9c011751fb47c4d12029a3d6522116e48431f2ff89e", size = 79498254, upload-time = "2026-02-10T21:44:44.095Z" },
-    { url = "https://files.pythonhosted.org/packages/c9/5c/dee910b87c4d5c0fcb41b50839ae04df87c1cfc663cf1b5fca7ea565eeaa/torch-2.10.0-cp312-none-macosx_11_0_arm64.whl", hash = "sha256:6d3707a61863d1c4d6ebba7be4ca320f42b869ee657e9b2c21c736bf17000294", size = 79498198, upload-time = "2026-01-21T16:24:34.704Z" },
-    { url = "https://files.pythonhosted.org/packages/1a/0b/39929b148f4824bc3ad6f9f72a29d4ad865bcf7ebfc2fa67584773e083d2/torch-2.10.0-cp313-cp313t-macosx_14_0_arm64.whl", hash = "sha256:3202429f58309b9fa96a614885eace4b7995729f44beb54d3e4a47773649d382", size = 79851305, upload-time = "2026-01-21T16:24:09.209Z" },
-    { url = "https://files.pythonhosted.org/packages/0e/13/e76b4d9c160e89fff48bf16b449ea324bda84745d2ab30294c37c2434c0d/torch-2.10.0-cp313-none-macosx_11_0_arm64.whl", hash = "sha256:cdf2a523d699b70d613243211ecaac14fe9c5df8a0b0a9c02add60fb2a413e0f", size = 79498248, upload-time = "2026-01-21T16:23:09.315Z" },
-    { url = "https://files.pythonhosted.org/packages/4f/93/716b5ac0155f1be70ed81bacc21269c3ece8dba0c249b9994094110bfc51/torch-2.10.0-cp314-cp314-macosx_14_0_arm64.whl", hash = "sha256:bf0d9ff448b0218e0433aeb198805192346c4fd659c852370d5cc245f602a06a", size = 79464992, upload-time = "2026-01-21T16:23:05.162Z" },
-    { url = "https://files.pythonhosted.org/packages/d8/94/71994e7d0d5238393df9732fdab607e37e2b56d26a746cb59fdb415f8966/torch-2.10.0-cp314-cp314t-macosx_14_0_arm64.whl", hash = "sha256:f5ab4ba32383061be0fb74bda772d470140a12c1c3b58a0cfbf3dae94d164c28", size = 79850324, upload-time = "2026-01-21T16:22:09.494Z" },
-]
-
-[[package]]
-name = "torch"
-version = "2.10.0+cu128"
-source = { registry = "https://download.pytorch.org/whl/cu128" }
-resolution-markers = [
-    "python_full_version >= '3.14' and sys_platform == 'linux'",
-    "python_full_version < '3.14' and sys_platform == 'linux'",
-    "python_full_version >= '3.14' and sys_platform != 'darwin' and sys_platform != 'linux'",
-    "python_full_version < '3.14' and sys_platform != 'darwin' and sys_platform != 'linux'",
-]
-dependencies = [
-    { name = "cuda-bindings", marker = "sys_platform == 'linux'" },
-    { name = "filelock", marker = "sys_platform != 'darwin'" },
-    { name = "fsspec", marker = "sys_platform != 'darwin'" },
-    { name = "jinja2", marker = "sys_platform != 'darwin'" },
-    { name = "networkx", marker = "sys_platform != 'darwin'" },
-    { name = "nvidia-cublas-cu12", marker = "sys_platform == 'linux'" },
-    { name = "nvidia-cuda-cupti-cu12", marker = "sys_platform == 'linux'" },
-    { name = "nvidia-cuda-nvrtc-cu12", marker = "sys_platform == 'linux'" },
-    { name = "nvidia-cuda-runtime-cu12", marker = "sys_platform == 'linux'" },
-    { name = "nvidia-cudnn-cu12", marker = "sys_platform == 'linux'" },
-    { name = "nvidia-cufft-cu12", marker = "sys_platform == 'linux'" },
-    { name = "nvidia-cufile-cu12", marker = "sys_platform == 'linux'" },
-    { name = "nvidia-curand-cu12", marker = "sys_platform == 'linux'" },
-    { name = "nvidia-cusolver-cu12", marker = "sys_platform == 'linux'" },
-    { name = "nvidia-cusparse-cu12", marker = "sys_platform == 'linux'" },
-    { name = "nvidia-cusparselt-cu12", marker = "sys_platform == 'linux'" },
-    { name = "nvidia-nccl-cu12", marker = "sys_platform == 'linux'" },
-    { name = "nvidia-nvjitlink-cu12", marker = "sys_platform == 'linux'" },
-    { name = "nvidia-nvshmem-cu12", marker = "sys_platform == 'linux'" },
-    { name = "nvidia-nvtx-cu12", marker = "sys_platform == 'linux'" },
-    { name = "setuptools", marker = "sys_platform != 'darwin'" },
-    { name = "sympy", marker = "sys_platform != 'darwin'" },
-    { name = "triton", marker = "sys_platform == 'linux'" },
-    { name = "typing-extensions", marker = "sys_platform != 'darwin'" },
-]
-wheels = [
-    { url = "https://download.pytorch.org/whl/cu128/torch-2.10.0%2Bcu128-cp312-cp312-manylinux_2_28_aarch64.whl", hash = "sha256:6f09cdf2415516be028ae82e6b985bcfc3eac37bc52ab401142689f6224516ca" },
-    { url = "https://download.pytorch.org/whl/cu128/torch-2.10.0%2Bcu128-cp312-cp312-manylinux_2_28_x86_64.whl", hash = "sha256:628e89bd5110ced7debee2a57c69959725b7fbc64eab81a39dd70e46c7e28ba5" },
-    { url = "https://download.pytorch.org/whl/cu128/torch-2.10.0%2Bcu128-cp312-cp312-win_amd64.whl", hash = "sha256:fbde8f6a9ec8c76979a0d14df21c10b9e5cab6f0d106a73ca73e2179bc597cae" },
-    { url = "https://download.pytorch.org/whl/cu128/torch-2.10.0%2Bcu128-cp313-cp313-manylinux_2_28_aarch64.whl", hash = "sha256:bdbcc703382f948e951c063448c9406bf38ce66c41dd698d9e2733fcf96c037a" },
-    { url = "https://download.pytorch.org/whl/cu128/torch-2.10.0%2Bcu128-cp313-cp313-manylinux_2_28_x86_64.whl", hash = "sha256:7b4bd23ed63de97456fcc81c26fea9f02ee02ce1112111c4dac0d8cfe574b23e" },
-    { url = "https://download.pytorch.org/whl/cu128/torch-2.10.0%2Bcu128-cp313-cp313-win_amd64.whl", hash = "sha256:4d1b0b49c54223c7c04050b49eac141d77b6edbc34aea1dfc74a6fdb661baa8c" },
-    { url = "https://download.pytorch.org/whl/cu128/torch-2.10.0%2Bcu128-cp313-cp313t-manylinux_2_28_aarch64.whl", hash = "sha256:f1f8b840c64b645a4bc61a393db48effb9c92b2dc26c8373873911f0750d1ea7" },
-    { url = "https://download.pytorch.org/whl/cu128/torch-2.10.0%2Bcu128-cp313-cp313t-manylinux_2_28_x86_64.whl", hash = "sha256:23f58258012bcf1c349cb22af387e33aadca7f83ea617b080e774eb41e4fe8ff" },
-    { url = "https://download.pytorch.org/whl/cu128/torch-2.10.0%2Bcu128-cp313-cp313t-win_amd64.whl", hash = "sha256:01b216e097b17a5277cfb47c383cdcacf06abeadcb0daca0c76b59e72854c3b6" },
-    { url = "https://download.pytorch.org/whl/cu128/torch-2.10.0%2Bcu128-cp314-cp314-manylinux_2_28_aarch64.whl", hash = "sha256:c42377bc2607e3e1c60da71b792fb507c3938c87fd6edab8b21c59c91473c36d" },
-    { url = "https://download.pytorch.org/whl/cu128/torch-2.10.0%2Bcu128-cp314-cp314-manylinux_2_28_x86_64.whl", hash = "sha256:37d71feea068776855686a1512058df3f19f6f040a151f055aa746601678744f" },
-    { url = "https://download.pytorch.org/whl/cu128/torch-2.10.0%2Bcu128-cp314-cp314-win_amd64.whl", hash = "sha256:c57017ca29e62271e362fdeee7d20070e254755a5148b30b553d8a10fc83c7ef" },
-    { url = "https://download.pytorch.org/whl/cu128/torch-2.10.0%2Bcu128-cp314-cp314t-manylinux_2_28_aarch64.whl", hash = "sha256:777461f50b2daf77e4bdd8e2ad34bdfc5a993bf1bdf2ab9ef39f5edfe4e9c12b" },
-    { url = "https://download.pytorch.org/whl/cu128/torch-2.10.0%2Bcu128-cp314-cp314t-manylinux_2_28_x86_64.whl", hash = "sha256:7bcba6a7c5f0987a13298b1ca843155dcceceac758fa3c7ccd5c7af4059a1080" },
-    { url = "https://download.pytorch.org/whl/cu128/torch-2.10.0%2Bcu128-cp314-cp314t-win_amd64.whl", hash = "sha256:70d89143c956389d4806cb4e5fe0b1129fe0db280e1073288d17fa76c101cba4" },
-]
-
-[[package]]
-name = "torchmetrics"
-version = "1.8.2"
-source = { registry = "https://pypi.org/simple" }
-dependencies = [
-    { name = "lightning-utilities" },
-    { name = "numpy" },
-    { name = "packaging" },
-    { name = "torch", version = "2.10.0", source = { registry = "https://pypi.org/simple" }, marker = "sys_platform == 'darwin'" },
-    { name = "torch", version = "2.10.0+cu128", source = { registry = "https://download.pytorch.org/whl/cu128" }, marker = "sys_platform != 'darwin'" },
-]
-sdist = { url = "https://files.pythonhosted.org/packages/85/2e/48a887a59ecc4a10ce9e8b35b3e3c5cef29d902c4eac143378526e7485cb/torchmetrics-1.8.2.tar.gz", hash = "sha256:cf64a901036bf107f17a524009eea7781c9c5315d130713aeca5747a686fe7a5", size = 580679, upload-time = "2025-09-03T14:00:54.077Z" }
-wheels = [
-    { url = "https://files.pythonhosted.org/packages/02/21/aa0f434434c48490f91b65962b1ce863fdcce63febc166ca9fe9d706c2b6/torchmetrics-1.8.2-py3-none-any.whl", hash = "sha256:08382fd96b923e39e904c4d570f3d49e2cc71ccabd2a94e0f895d1f0dac86242", size = 983161, upload-time = "2025-09-03T14:00:51.921Z" },
-]
-
 [[package]]
 name = "tornado"
 version = "6.5.2"
@@ -5136,23 +4712,6 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/61/7a/f38385f1b2d5f54221baf1db3d6371dc6eef8041d95abff39576c694e9d9/transforms3d-0.4.2-py3-none-any.whl", hash = "sha256:1c70399d9e9473ecc23311fd947f727f7c69ed0b063244828c383aa1aefa5941", size = 1376759, upload-time = "2024-06-20T11:09:19.43Z" },
 ]
 
-[[package]]
-name = "triton"
-version = "3.6.0"
-source = { registry = "https://pypi.org/simple" }
-wheels = [
-    { url = "https://files.pythonhosted.org/packages/17/5d/08201db32823bdf77a0e2b9039540080b2e5c23a20706ddba942924ebcd6/triton-3.6.0-cp312-cp312-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:374f52c11a711fd062b4bfbb201fd9ac0a5febd28a96fb41b4a0f51dde3157f4", size = 176128243, upload-time = "2026-01-20T16:16:07.857Z" },
-    { url = "https://files.pythonhosted.org/packages/ab/a8/cdf8b3e4c98132f965f88c2313a4b493266832ad47fb52f23d14d4f86bb5/triton-3.6.0-cp312-cp312-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:74caf5e34b66d9f3a429af689c1c7128daba1d8208df60e81106b115c00d6fca", size = 188266850, upload-time = "2026-01-20T16:00:43.041Z" },
-    { url = "https://files.pythonhosted.org/packages/3c/12/34d71b350e89a204c2c7777a9bba0dcf2f19a5bfdd70b57c4dbc5ffd7154/triton-3.6.0-cp313-cp313-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:448e02fe6dc898e9e5aa89cf0ee5c371e99df5aa5e8ad976a80b93334f3494fd", size = 176133521, upload-time = "2026-01-20T16:16:13.321Z" },
-    { url = "https://files.pythonhosted.org/packages/f9/0b/37d991d8c130ce81a8728ae3c25b6e60935838e9be1b58791f5997b24a54/triton-3.6.0-cp313-cp313-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:10c7f76c6e72d2ef08df639e3d0d30729112f47a56b0c81672edc05ee5116ac9", size = 188289450, upload-time = "2026-01-20T16:00:49.136Z" },
-    { url = "https://files.pythonhosted.org/packages/ce/4e/41b0c8033b503fd3cfcd12392cdd256945026a91ff02452bef40ec34bee7/triton-3.6.0-cp313-cp313t-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:1722e172d34e32abc3eb7711d0025bb69d7959ebea84e3b7f7a341cd7ed694d6", size = 176276087, upload-time = "2026-01-20T16:16:18.989Z" },
-    { url = "https://files.pythonhosted.org/packages/35/f8/9c66bfc55361ec6d0e4040a0337fb5924ceb23de4648b8a81ae9d33b2b38/triton-3.6.0-cp313-cp313t-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:d002e07d7180fd65e622134fbd980c9a3d4211fb85224b56a0a0efbd422ab72f", size = 188400296, upload-time = "2026-01-20T16:00:56.042Z" },
-    { url = "https://files.pythonhosted.org/packages/49/55/5ecf0dcaa0f2fbbd4420f7ef227ee3cb172e91e5fede9d0ecaddc43363b4/triton-3.6.0-cp314-cp314-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:ef5523241e7d1abca00f1d240949eebdd7c673b005edbbce0aca95b8191f1d43", size = 176138577, upload-time = "2026-01-20T16:16:25.426Z" },
-    { url = "https://files.pythonhosted.org/packages/df/3d/9e7eee57b37c80cec63322c0231bb6da3cfe535a91d7a4d64896fcb89357/triton-3.6.0-cp314-cp314-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:a17a5d5985f0ac494ed8a8e54568f092f7057ef60e1b0fa09d3fd1512064e803", size = 188273063, upload-time = "2026-01-20T16:01:07.278Z" },
-    { url = "https://files.pythonhosted.org/packages/48/db/56ee649cab5eaff4757541325aca81f52d02d4a7cd3506776cad2451e060/triton-3.6.0-cp314-cp314t-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:0b3a97e8ed304dfa9bd23bb41ca04cdf6b2e617d5e782a8653d616037a5d537d", size = 176274804, upload-time = "2026-01-20T16:16:31.528Z" },
-    { url = "https://files.pythonhosted.org/packages/f6/56/6113c23ff46c00aae423333eb58b3e60bdfe9179d542781955a5e1514cb3/triton-3.6.0-cp314-cp314t-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:46bd1c1af4b6704e554cad2eeb3b0a6513a980d470ccfa63189737340c7746a7", size = 188397994, upload-time = "2026-01-20T16:01:14.236Z" },
-]
-
 [[package]]
 name = "tsam"
 version = "2.3.9"

From 53342876d6e4b0798c4f3a3f4bb35edd77fb70fc Mon Sep 17 00:00:00 2001
From: Sam Wolk <36545842+szvsw@users.noreply.github.com>
Date: Tue, 10 Mar 2026 11:54:46 -0400
Subject: [PATCH 27/27] re-enable torch

---
 pyproject.toml                                |  13 +-
 .../models/surrogate/configs/regression.py    |   9 +-
 uv.lock                                       | 443 ++++++++++++++++++
 3 files changed, 456 insertions(+), 9 deletions(-)

diff --git a/pyproject.toml b/pyproject.toml
index a1d02e3..40f537e 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -58,8 +58,9 @@ visualization = [
 ml-gpu = [
     "lightgbm>=4.6.0",
     "xgboost>=3.2.0",
-    # "pytorch-tabular>=1.2.0",
-    # "torch>=2.5.0",
+    "numba>=0.63.1",
+    "pytorch-tabular>=1.2.0",
+    "torch>=2.5.0",
     "tensorboard>=2.20.0",
     "wandb>=0.25.0",
 ]
@@ -113,10 +114,10 @@ explicit = true
 
 [tool.uv.sources]
 # PyTorch: CUDA 12.8 on Linux/Windows (where builds exist), PyPI (CPU) on macOS
-# torch = [
-#   { index = "pytorch-cu128", marker = "sys_platform != 'darwin'", extra = "ml-gpu" },
-#   { index = "pypi", marker = "sys_platform == 'darwin'", extra = "ml-gpu" },
-# ]
+torch = [
+  { index = "pytorch-cu128", marker = "sys_platform != 'darwin'", extra = "ml-gpu" },
+  { index = "pypi", marker = "sys_platform == 'darwin'", extra = "ml-gpu" },
+]
 # scythe-engine = {git = "https://github.com/szvsw/scythe", branch = "feature/allow-optional-filerefs"}
 scythe-engine = {git = "https://github.com/szvsw/scythe", branch = "feature/allow-versioning-workflows"}
 # scythe-engine = {path = "../scythe", editable = true}
diff --git a/src/globi/models/surrogate/configs/regression.py b/src/globi/models/surrogate/configs/regression.py
index b65c64a..edebb95 100644
--- a/src/globi/models/surrogate/configs/regression.py
+++ b/src/globi/models/surrogate/configs/regression.py
@@ -1,5 +1,6 @@
 """Configs for the surrogate model pipeline."""
 
+import warnings
 from typing import Any, Literal
 
 from pydantic import BaseModel, Field
@@ -43,7 +44,7 @@ def param_dict(self) -> dict[str, Any]:
         """The dictionary of parameters."""
         import torch
 
-        data = {
+        params = {
             "objective": "reg:squarederror",
             "eval_metric": "rmse",
             "tree_method": "auto",
@@ -54,8 +55,10 @@ def param_dict(self) -> dict[str, Any]:
             ),
         }
         if torch.cuda.is_available():
-            data["device"] = "cuda"
-        return data
+            params["device"] = "cuda"
+        else:
+            warnings.warn("CUDA is not available, using CPU.", stacklevel=3)
+        return params
 
 
 class XGBHyperparameters(BaseModel):
diff --git a/uv.lock b/uv.lock
index 9043efb..7c83226 100644
--- a/uv.lock
+++ b/uv.lock
@@ -160,6 +160,12 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/78/b6/6307fbef88d9b5ee7421e68d78a9f162e0da4900bc5f5793f6d3d0e34fb8/annotated_types-0.7.0-py3-none-any.whl", hash = "sha256:1f02e8b43a8fbbc3f3e0d4f0f4bfc8131bcb4eebe8849b8e5c773f3a1c582a53", size = 13643, upload-time = "2024-05-20T21:33:24.1Z" },
 ]
 
+[[package]]
+name = "antlr4-python3-runtime"
+version = "4.9.3"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/3e/38/7859ff46355f76f8d19459005ca000b6e7012f2f1ca597746cbcd1fbfe5e/antlr4-python3-runtime-4.9.3.tar.gz", hash = "sha256:f224469b4168294902bb1efa80a8bf7855f24c99aef99cbefc1bcd3cce77881b", size = 117034, upload-time = "2021-11-06T17:52:23.524Z" }
+
 [[package]]
 name = "anyio"
 version = "4.11.0"
@@ -911,6 +917,34 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/ce/c6/c71e82e041c95ffe6a92ac707785500aa2a515a4339c2c7dd67e3c449249/cramjam-2.11.0-cp314-cp314t-win_amd64.whl", hash = "sha256:028400d699442d40dbda02f74158c73d05cb76587a12490d0bfedd958fd49188", size = 1713108, upload-time = "2025-07-27T21:24:10.147Z" },
 ]
 
+[[package]]
+name = "cuda-bindings"
+version = "12.9.4"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "cuda-pathfinder", marker = "sys_platform == 'linux'" },
+]
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/0c/c2/65bfd79292b8ff18be4dd7f7442cea37bcbc1a228c1886f1dea515c45b67/cuda_bindings-12.9.4-cp312-cp312-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:694ba35023846625ef471257e6b5a4bc8af690f961d197d77d34b1d1db393f56", size = 11760260, upload-time = "2025-10-21T14:51:40.79Z" },
+    { url = "https://files.pythonhosted.org/packages/a9/c1/dabe88f52c3e3760d861401bb994df08f672ec893b8f7592dc91626adcf3/cuda_bindings-12.9.4-cp312-cp312-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:fda147a344e8eaeca0c6ff113d2851ffca8f7dfc0a6c932374ee5c47caa649c8", size = 12151019, upload-time = "2025-10-21T14:51:43.167Z" },
+    { url = "https://files.pythonhosted.org/packages/05/8b/b4b2d1c7775fa403b64333e720cfcfccef8dcb9cdeb99947061ca5a77628/cuda_bindings-12.9.4-cp313-cp313-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:cf8bfaedc238f3b115d957d1fd6562b7e8435ba57f6d0e2f87d0e7149ccb2da5", size = 11570071, upload-time = "2025-10-21T14:51:47.472Z" },
+    { url = "https://files.pythonhosted.org/packages/63/56/e465c31dc9111be3441a9ba7df1941fe98f4aa6e71e8788a3fb4534ce24d/cuda_bindings-12.9.4-cp313-cp313-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:32bdc5a76906be4c61eb98f546a6786c5773a881f3b166486449b5d141e4a39f", size = 11906628, upload-time = "2025-10-21T14:51:49.905Z" },
+    { url = "https://files.pythonhosted.org/packages/ec/07/6aff13bc1e977e35aaa6b22f52b172e2890c608c6db22438cf7ed2bf43a6/cuda_bindings-12.9.4-cp313-cp313t-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:3adf4958dcf68ae7801a59b73fb00a8b37f8d0595060d66ceae111b1002de38d", size = 11566797, upload-time = "2025-10-21T14:51:54.581Z" },
+    { url = "https://files.pythonhosted.org/packages/a3/84/1e6be415e37478070aeeee5884c2022713c1ecc735e6d82d744de0252eee/cuda_bindings-12.9.4-cp313-cp313t-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:56e0043c457a99ac473ddc926fe0dc4046694d99caef633e92601ab52cbe17eb", size = 11925991, upload-time = "2025-10-21T14:51:56.535Z" },
+    { url = "https://files.pythonhosted.org/packages/1e/b5/96a6696e20c4ffd2b327f54c7d0fde2259bdb998d045c25d5dedbbe30290/cuda_bindings-12.9.4-cp314-cp314-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:1f53a7f453d4b2643d8663d036bafe29b5ba89eb904c133180f295df6dc151e5", size = 11624530, upload-time = "2025-10-21T14:52:01.539Z" },
+    { url = "https://files.pythonhosted.org/packages/d1/af/6dfd8f2ed90b1d4719bc053ff8940e494640fe4212dc3dd72f383e4992da/cuda_bindings-12.9.4-cp314-cp314-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:8b72ee72a9cc1b531db31eebaaee5c69a8ec3500e32c6933f2d3b15297b53686", size = 11922703, upload-time = "2025-10-21T14:52:03.585Z" },
+    { url = "https://files.pythonhosted.org/packages/39/73/d2fc40c043bac699c3880bf88d3cebe9d88410cd043795382826c93a89f0/cuda_bindings-12.9.4-cp314-cp314t-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:20f2699d61d724de3eb3f3369d57e2b245f93085cab44fd37c3bea036cea1a6f", size = 11565056, upload-time = "2025-10-21T14:52:08.338Z" },
+    { url = "https://files.pythonhosted.org/packages/6c/19/90ac264acc00f6df8a49378eedec9fd2db3061bf9263bf9f39fd3d8377c3/cuda_bindings-12.9.4-cp314-cp314t-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:d80bffc357df9988dca279734bc9674c3934a654cab10cadeed27ce17d8635ee", size = 11924658, upload-time = "2025-10-21T14:52:10.411Z" },
+]
+
+[[package]]
+name = "cuda-pathfinder"
+version = "1.4.1"
+source = { registry = "https://pypi.org/simple" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/07/02/59a5bc738a09def0b49aea0e460bdf97f65206d0d041246147cf6207e69c/cuda_pathfinder-1.4.1-py3-none-any.whl", hash = "sha256:40793006082de88e0950753655e55558a446bed9a7d9d0bcb48b2506d50ed82a", size = 43903, upload-time = "2026-03-06T21:05:24.372Z" },
+]
+
 [[package]]
 name = "cycler"
 version = "0.12.1"
@@ -968,6 +1002,15 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/33/6b/e0547afaf41bf2c42e52430072fa5658766e3d65bd4b03a563d1b6336f57/distlib-0.4.0-py2.py3-none-any.whl", hash = "sha256:9659f7d87e46584a30b5780e43ac7a2143098441670ff0a49d5f9034c54a6c16", size = 469047, upload-time = "2025-07-17T16:51:58.613Z" },
 ]
 
+[[package]]
+name = "einops"
+version = "0.8.2"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/2c/77/850bef8d72ffb9219f0b1aac23fbc1bf7d038ee6ea666f331fa273031aa2/einops-0.8.2.tar.gz", hash = "sha256:609da665570e5e265e27283aab09e7f279ade90c4f01bcfca111f3d3e13f2827", size = 56261, upload-time = "2026-01-26T04:13:17.638Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/2a/09/f8d8f8f31e4483c10a906437b4ce31bdf3d6d417b73fe33f1a8b59e34228/einops-0.8.2-py3-none-any.whl", hash = "sha256:54058201ac7087911181bfec4af6091bb59380360f069276601256a76af08193", size = 65638, upload-time = "2026-01-26T04:13:18.546Z" },
+]
+
 [[package]]
 name = "energy-pandas"
 version = "0.4.1"
@@ -1304,6 +1347,11 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/47/71/70db47e4f6ce3e5c37a607355f80da8860a33226be640226ac52cb05ef2e/fsspec-2025.9.0-py3-none-any.whl", hash = "sha256:530dc2a2af60a414a832059574df4a6e10cce927f6f4a78209390fe38955cfb7", size = 199289, upload-time = "2025-09-02T19:10:47.708Z" },
 ]
 
+[package.optional-dependencies]
+http = [
+    { name = "aiohttp" },
+]
+
 [[package]]
 name = "future"
 version = "1.0.0"
@@ -1393,7 +1441,11 @@ cli = [
 ]
 ml-gpu = [
     { name = "lightgbm" },
+    { name = "numba" },
+    { name = "pytorch-tabular" },
     { name = "tensorboard" },
+    { name = "torch", version = "2.10.0", source = { registry = "https://pypi.org/simple" }, marker = "sys_platform == 'darwin'" },
+    { name = "torch", version = "2.10.0+cu128", source = { registry = "https://download.pytorch.org/whl/cu128" }, marker = "sys_platform != 'darwin'" },
     { name = "wandb" },
     { name = "xgboost" },
 ]
@@ -1441,12 +1493,14 @@ requires-dist = [
     { name = "ladybug-core", specifier = ">=0.44.29" },
     { name = "lightgbm", marker = "extra == 'ml-gpu'", specifier = ">=4.6.0" },
     { name = "matplotlib", marker = "extra == 'visualization'", specifier = ">=3.8.0" },
+    { name = "numba", marker = "extra == 'ml-gpu'", specifier = ">=0.63.1" },
     { name = "numpy", specifier = ">=1.26.0" },
     { name = "pandas", specifier = ">=2.1.0" },
     { name = "playwright", marker = "extra == 'visualization'", specifier = ">=1.40.0" },
     { name = "plotly", marker = "extra == 'visualization'", specifier = ">=5.18.0" },
     { name = "pydantic", specifier = ">=2.11,<3" },
     { name = "pyproj", specifier = ">=3.6.0" },
+    { name = "pytorch-tabular", marker = "extra == 'ml-gpu'", specifier = ">=1.2.0" },
     { name = "rasterio", marker = "extra == 'visualization'", specifier = ">=1.3.9" },
     { name = "scikit-learn", specifier = ">=1.3.0" },
     { name = "scipy", specifier = ">=1.11.0,<1.15" },
@@ -1455,6 +1509,8 @@ requires-dist = [
     { name = "shapely", specifier = ">=2.0.0" },
     { name = "streamlit", marker = "extra == 'visualization'", specifier = ">=1.28.0" },
     { name = "tensorboard", marker = "extra == 'ml-gpu'", specifier = ">=2.20.0" },
+    { name = "torch", marker = "sys_platform == 'darwin' and extra == 'ml-gpu'", specifier = ">=2.5.0", index = "https://pypi.org/simple", conflict = { package = "globi", extra = "ml-gpu" } },
+    { name = "torch", marker = "sys_platform != 'darwin' and extra == 'ml-gpu'", specifier = ">=2.5.0", index = "https://download.pytorch.org/whl/cu128", conflict = { package = "globi", extra = "ml-gpu" } },
     { name = "wandb", marker = "extra == 'ml-gpu'", specifier = ">=0.25.0" },
     { name = "xgboost", marker = "extra == 'ml-gpu'", specifier = ">=3.2.0" },
     { name = "xlsxwriter", marker = "extra == 'cli'", specifier = ">=3.2.9" },
@@ -2195,6 +2251,19 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/5e/23/f8b28ca248bb629b9e08f877dd2965d1994e1674a03d67cd10c5246da248/lightgbm-4.6.0-py3-none-win_amd64.whl", hash = "sha256:37089ee95664b6550a7189d887dbf098e3eadab03537e411f52c63c121e3ba4b", size = 1451509, upload-time = "2025-02-15T04:03:01.515Z" },
 ]
 
+[[package]]
+name = "lightning-utilities"
+version = "0.15.3"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "packaging" },
+    { name = "typing-extensions" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/f1/45/7fa8f56b17dc0f0a41ec70dd307ecd6787254483549843bef4c30ab5adce/lightning_utilities-0.15.3.tar.gz", hash = "sha256:792ae0204c79f6859721ac7f386c237a33b0ed06ba775009cb894e010a842033", size = 33553, upload-time = "2026-02-22T14:48:53.348Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/25/f4/ead6e0e37209b07c9baa3e984ccdb0348ca370b77cea3aaea8ddbb097e00/lightning_utilities-0.15.3-py3-none-any.whl", hash = "sha256:6c55f1bee70084a1cbeaa41ada96e4b3a0fea5909e844dd335bd80f5a73c5f91", size = 31906, upload-time = "2026-02-22T14:48:52.488Z" },
+]
+
 [[package]]
 name = "littleutils"
 version = "0.2.4"
@@ -2275,6 +2344,18 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/59/1b/6ef961f543593969d25b2afe57a3564200280528caa9bd1082eecdd7b3bc/markdown-3.10.1-py3-none-any.whl", hash = "sha256:867d788939fe33e4b736426f5b9f651ad0c0ae0ecf89df0ca5d1176c70812fe3", size = 107684, upload-time = "2026-01-21T18:09:27.203Z" },
 ]
 
+[[package]]
+name = "markdown-it-py"
+version = "4.0.0"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "mdurl" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/5b/f5/4ec618ed16cc4f8fb3b701563655a69816155e79e24a17b651541804721d/markdown_it_py-4.0.0.tar.gz", hash = "sha256:cb0a2b4aa34f932c007117b194e945bd74e0ec24133ceb5bac59009cda1cb9f3", size = 73070, upload-time = "2025-08-11T12:57:52.854Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/94/54/e7d793b573f298e1c9013b8c4dade17d481164aa517d1d7148619c2cedbf/markdown_it_py-4.0.0-py3-none-any.whl", hash = "sha256:87327c59b172c5011896038353a81343b6754500a08cd7a4973bb48c6d578147", size = 87321, upload-time = "2025-08-11T12:57:51.923Z" },
+]
+
 [[package]]
 name = "markupsafe"
 version = "3.0.3"
@@ -2404,6 +2485,15 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/af/33/ee4519fa02ed11a94aef9559552f3b17bb863f2ecfe1a35dc7f548cde231/matplotlib_inline-0.2.1-py3-none-any.whl", hash = "sha256:d56ce5156ba6085e00a9d54fead6ed29a9c47e215cd1bba2e976ef39f5710a76", size = 9516, upload-time = "2025-10-23T09:00:20.675Z" },
 ]
 
+[[package]]
+name = "mdurl"
+version = "0.1.2"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/d6/54/cfe61301667036ec958cb99bd3efefba235e65cdeb9c84d24a8293ba1d90/mdurl-0.1.2.tar.gz", hash = "sha256:bb413d29f5eea38f31dd4754dd7377d4465116fb207585f97bf925588687c1ba", size = 8729, upload-time = "2022-08-14T12:40:10.846Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/b3/38/89ba8ad64ae25be8de66a6d463314cf1eb366222074cfda9ee839c56a4b4/mdurl-0.1.2-py3-none-any.whl", hash = "sha256:84008a41e51615a49fc9966191ff91509e3c40b939176e643fd50a5c2196b8f8", size = 9979, upload-time = "2022-08-14T12:40:09.779Z" },
+]
+
 [[package]]
 name = "mergedeep"
 version = "1.3.4"
@@ -2554,6 +2644,15 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/98/5c/2597cef67b6947b15c47f8dba967a0baf19fbdfdc86f6e4a8ba7af8b581a/mkdocstrings_python-1.19.0-py3-none-any.whl", hash = "sha256:395c1032af8f005234170575cc0c5d4d20980846623b623b35594281be4a3059", size = 143417, upload-time = "2025-11-10T13:30:54.164Z" },
 ]
 
+[[package]]
+name = "mpmath"
+version = "1.3.0"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/e0/47/dd32fa426cc72114383ac549964eecb20ecfd886d1e5ccf5340b55b02f57/mpmath-1.3.0.tar.gz", hash = "sha256:7a28eb2a9774d00c7bc92411c19a89209d5da7c4c9a9e227be8330a23a25b91f", size = 508106, upload-time = "2023-03-07T16:47:11.061Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/43/e3/7d92a15f894aa0c9c4b49b8ee9ac9850d6e63b03c9c32c0367a13ae62209/mpmath-1.3.0-py3-none-any.whl", hash = "sha256:a0b2b9fe80bbcd81a6647ff13108738cfb482d481d826cc0e02f5b35e5c88d2c", size = 536198, upload-time = "2023-03-07T16:47:09.197Z" },
+]
+
 [[package]]
 name = "msgpack"
 version = "1.1.2"
@@ -3031,6 +3130,119 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/67/0e/35082d13c09c02c011cf21570543d202ad929d961c02a147493cb0c2bdf5/numpy-2.2.6-cp313-cp313t-win_amd64.whl", hash = "sha256:6031dd6dfecc0cf9f668681a37648373bddd6421fff6c66ec1624eed0180ee06", size = 12771374, upload-time = "2025-05-17T21:43:35.479Z" },
 ]
 
+[[package]]
+name = "nvidia-cublas-cu12"
+version = "12.8.4.1"
+source = { registry = "https://pypi.org/simple" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/29/99/db44d685f0e257ff0e213ade1964fc459b4a690a73293220e98feb3307cf/nvidia_cublas_cu12-12.8.4.1-py3-none-manylinux_2_27_aarch64.whl", hash = "sha256:b86f6dd8935884615a0683b663891d43781b819ac4f2ba2b0c9604676af346d0", size = 590537124, upload-time = "2025-03-07T01:43:53.556Z" },
+    { url = "https://files.pythonhosted.org/packages/dc/61/e24b560ab2e2eaeb3c839129175fb330dfcfc29e5203196e5541a4c44682/nvidia_cublas_cu12-12.8.4.1-py3-none-manylinux_2_27_x86_64.whl", hash = "sha256:8ac4e771d5a348c551b2a426eda6193c19aa630236b418086020df5ba9667142", size = 594346921, upload-time = "2025-03-07T01:44:31.254Z" },
+]
+
+[[package]]
+name = "nvidia-cuda-cupti-cu12"
+version = "12.8.90"
+source = { registry = "https://pypi.org/simple" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/d5/1f/b3bd73445e5cb342727fd24fe1f7b748f690b460acadc27ea22f904502c8/nvidia_cuda_cupti_cu12-12.8.90-py3-none-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:4412396548808ddfed3f17a467b104ba7751e6b58678a4b840675c56d21cf7ed", size = 9533318, upload-time = "2025-03-07T01:40:10.421Z" },
+    { url = "https://files.pythonhosted.org/packages/f8/02/2adcaa145158bf1a8295d83591d22e4103dbfd821bcaf6f3f53151ca4ffa/nvidia_cuda_cupti_cu12-12.8.90-py3-none-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:ea0cb07ebda26bb9b29ba82cda34849e73c166c18162d3913575b0c9db9a6182", size = 10248621, upload-time = "2025-03-07T01:40:21.213Z" },
+]
+
+[[package]]
+name = "nvidia-cuda-nvrtc-cu12"
+version = "12.8.93"
+source = { registry = "https://pypi.org/simple" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/05/6b/32f747947df2da6994e999492ab306a903659555dddc0fbdeb9d71f75e52/nvidia_cuda_nvrtc_cu12-12.8.93-py3-none-manylinux2010_x86_64.manylinux_2_12_x86_64.whl", hash = "sha256:a7756528852ef889772a84c6cd89d41dfa74667e24cca16bb31f8f061e3e9994", size = 88040029, upload-time = "2025-03-07T01:42:13.562Z" },
+    { url = "https://files.pythonhosted.org/packages/eb/d1/e50d0acaab360482034b84b6e27ee83c6738f7d32182b987f9c7a4e32962/nvidia_cuda_nvrtc_cu12-12.8.93-py3-none-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:fc1fec1e1637854b4c0a65fb9a8346b51dd9ee69e61ebaccc82058441f15bce8", size = 43106076, upload-time = "2025-03-07T01:41:59.817Z" },
+]
+
+[[package]]
+name = "nvidia-cuda-runtime-cu12"
+version = "12.8.90"
+source = { registry = "https://pypi.org/simple" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/7c/75/f865a3b236e4647605ea34cc450900854ba123834a5f1598e160b9530c3a/nvidia_cuda_runtime_cu12-12.8.90-py3-none-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:52bf7bbee900262ffefe5e9d5a2a69a30d97e2bc5bb6cc866688caa976966e3d", size = 965265, upload-time = "2025-03-07T01:39:43.533Z" },
+    { url = "https://files.pythonhosted.org/packages/0d/9b/a997b638fcd068ad6e4d53b8551a7d30fe8b404d6f1804abf1df69838932/nvidia_cuda_runtime_cu12-12.8.90-py3-none-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:adade8dcbd0edf427b7204d480d6066d33902cab2a4707dcfc48a2d0fd44ab90", size = 954765, upload-time = "2025-03-07T01:40:01.615Z" },
+]
+
+[[package]]
+name = "nvidia-cudnn-cu12"
+version = "9.10.2.21"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "nvidia-cublas-cu12", marker = "sys_platform == 'linux'" },
+]
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/fa/41/e79269ce215c857c935fd86bcfe91a451a584dfc27f1e068f568b9ad1ab7/nvidia_cudnn_cu12-9.10.2.21-py3-none-manylinux_2_27_aarch64.whl", hash = "sha256:c9132cc3f8958447b4910a1720036d9eff5928cc3179b0a51fb6d167c6cc87d8", size = 705026878, upload-time = "2025-06-06T21:52:51.348Z" },
+    { url = "https://files.pythonhosted.org/packages/ba/51/e123d997aa098c61d029f76663dedbfb9bc8dcf8c60cbd6adbe42f76d049/nvidia_cudnn_cu12-9.10.2.21-py3-none-manylinux_2_27_x86_64.whl", hash = "sha256:949452be657fa16687d0930933f032835951ef0892b37d2d53824d1a84dc97a8", size = 706758467, upload-time = "2025-06-06T21:54:08.597Z" },
+]
+
+[[package]]
+name = "nvidia-cufft-cu12"
+version = "11.3.3.83"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "nvidia-nvjitlink-cu12", marker = "sys_platform == 'linux'" },
+]
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/60/bc/7771846d3a0272026c416fbb7e5f4c1f146d6d80704534d0b187dd6f4800/nvidia_cufft_cu12-11.3.3.83-py3-none-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:848ef7224d6305cdb2a4df928759dca7b1201874787083b6e7550dd6765ce69a", size = 193109211, upload-time = "2025-03-07T01:44:56.873Z" },
+    { url = "https://files.pythonhosted.org/packages/1f/13/ee4e00f30e676b66ae65b4f08cb5bcbb8392c03f54f2d5413ea99a5d1c80/nvidia_cufft_cu12-11.3.3.83-py3-none-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:4d2dd21ec0b88cf61b62e6b43564355e5222e4a3fb394cac0db101f2dd0d4f74", size = 193118695, upload-time = "2025-03-07T01:45:27.821Z" },
+]
+
+[[package]]
+name = "nvidia-cufile-cu12"
+version = "1.13.1.3"
+source = { registry = "https://pypi.org/simple" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/bb/fe/1bcba1dfbfb8d01be8d93f07bfc502c93fa23afa6fd5ab3fc7c1df71038a/nvidia_cufile_cu12-1.13.1.3-py3-none-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:1d069003be650e131b21c932ec3d8969c1715379251f8d23a1860554b1cb24fc", size = 1197834, upload-time = "2025-03-07T01:45:50.723Z" },
+    { url = "https://files.pythonhosted.org/packages/1e/f5/5607710447a6fe9fd9b3283956fceeee8a06cda1d2f56ce31371f595db2a/nvidia_cufile_cu12-1.13.1.3-py3-none-manylinux_2_27_aarch64.whl", hash = "sha256:4beb6d4cce47c1a0f1013d72e02b0994730359e17801d395bdcbf20cfb3bb00a", size = 1120705, upload-time = "2025-03-07T01:45:41.434Z" },
+]
+
+[[package]]
+name = "nvidia-curand-cu12"
+version = "10.3.9.90"
+source = { registry = "https://pypi.org/simple" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/45/5e/92aa15eca622a388b80fbf8375d4760738df6285b1e92c43d37390a33a9a/nvidia_curand_cu12-10.3.9.90-py3-none-manylinux_2_27_aarch64.whl", hash = "sha256:dfab99248034673b779bc6decafdc3404a8a6f502462201f2f31f11354204acd", size = 63625754, upload-time = "2025-03-07T01:46:10.735Z" },
+    { url = "https://files.pythonhosted.org/packages/fb/aa/6584b56dc84ebe9cf93226a5cde4d99080c8e90ab40f0c27bda7a0f29aa1/nvidia_curand_cu12-10.3.9.90-py3-none-manylinux_2_27_x86_64.whl", hash = "sha256:b32331d4f4df5d6eefa0554c565b626c7216f87a06a4f56fab27c3b68a830ec9", size = 63619976, upload-time = "2025-03-07T01:46:23.323Z" },
+]
+
+[[package]]
+name = "nvidia-cusolver-cu12"
+version = "11.7.3.90"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "nvidia-cublas-cu12", marker = "sys_platform == 'linux'" },
+    { name = "nvidia-cusparse-cu12", marker = "sys_platform == 'linux'" },
+    { name = "nvidia-nvjitlink-cu12", marker = "sys_platform == 'linux'" },
+]
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/c8/32/f7cd6ce8a7690544d084ea21c26e910a97e077c9b7f07bf5de623ee19981/nvidia_cusolver_cu12-11.7.3.90-py3-none-manylinux_2_27_aarch64.whl", hash = "sha256:db9ed69dbef9715071232caa9b69c52ac7de3a95773c2db65bdba85916e4e5c0", size = 267229841, upload-time = "2025-03-07T01:46:54.356Z" },
+    { url = "https://files.pythonhosted.org/packages/85/48/9a13d2975803e8cf2777d5ed57b87a0b6ca2cc795f9a4f59796a910bfb80/nvidia_cusolver_cu12-11.7.3.90-py3-none-manylinux_2_27_x86_64.whl", hash = "sha256:4376c11ad263152bd50ea295c05370360776f8c3427b30991df774f9fb26c450", size = 267506905, upload-time = "2025-03-07T01:47:16.273Z" },
+]
+
+[[package]]
+name = "nvidia-cusparse-cu12"
+version = "12.5.8.93"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "nvidia-nvjitlink-cu12", marker = "sys_platform == 'linux'" },
+]
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/bc/f7/cd777c4109681367721b00a106f491e0d0d15cfa1fd59672ce580ce42a97/nvidia_cusparse_cu12-12.5.8.93-py3-none-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:9b6c161cb130be1a07a27ea6923df8141f3c295852f4b260c65f18f3e0a091dc", size = 288117129, upload-time = "2025-03-07T01:47:40.407Z" },
+    { url = "https://files.pythonhosted.org/packages/c2/f5/e1854cb2f2bcd4280c44736c93550cc300ff4b8c95ebe370d0aa7d2b473d/nvidia_cusparse_cu12-12.5.8.93-py3-none-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:1ec05d76bbbd8b61b06a80e1eaf8cf4959c3d4ce8e711b65ebd0443bb0ebb13b", size = 288216466, upload-time = "2025-03-07T01:48:13.779Z" },
+]
+
+[[package]]
+name = "nvidia-cusparselt-cu12"
+version = "0.7.1"
+source = { registry = "https://pypi.org/simple" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/73/b9/598f6ff36faaece4b3c50d26f50e38661499ff34346f00e057760b35cc9d/nvidia_cusparselt_cu12-0.7.1-py3-none-manylinux2014_aarch64.whl", hash = "sha256:8878dce784d0fac90131b6817b607e803c36e629ba34dc5b433471382196b6a5", size = 283835557, upload-time = "2025-02-26T00:16:54.265Z" },
+    { url = "https://files.pythonhosted.org/packages/56/79/12978b96bd44274fe38b5dde5cfb660b1d114f70a65ef962bcbbed99b549/nvidia_cusparselt_cu12-0.7.1-py3-none-manylinux2014_x86_64.whl", hash = "sha256:f1bb701d6b930d5a7cea44c19ceb973311500847f81b634d802b7b539dc55623", size = 287193691, upload-time = "2025-02-26T00:15:44.104Z" },
+]
+
 [[package]]
 name = "nvidia-nccl-cu12"
 version = "2.27.5"
@@ -3040,6 +3252,46 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/6e/89/f7a07dc961b60645dbbf42e80f2bc85ade7feb9a491b11a1e973aa00071f/nvidia_nccl_cu12-2.27.5-py3-none-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:ad730cf15cb5d25fe849c6e6ca9eb5b76db16a80f13f425ac68d8e2e55624457", size = 322348229, upload-time = "2025-06-26T04:11:28.385Z" },
 ]
 
+[[package]]
+name = "nvidia-nvjitlink-cu12"
+version = "12.8.93"
+source = { registry = "https://pypi.org/simple" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/f6/74/86a07f1d0f42998ca31312f998bd3b9a7eff7f52378f4f270c8679c77fb9/nvidia_nvjitlink_cu12-12.8.93-py3-none-manylinux2010_x86_64.manylinux_2_12_x86_64.whl", hash = "sha256:81ff63371a7ebd6e6451970684f916be2eab07321b73c9d244dc2b4da7f73b88", size = 39254836, upload-time = "2025-03-07T01:49:55.661Z" },
+    { url = "https://files.pythonhosted.org/packages/2a/a2/8cee5da30d13430e87bf99bb33455d2724d0a4a9cb5d7926d80ccb96d008/nvidia_nvjitlink_cu12-12.8.93-py3-none-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:adccd7161ace7261e01bb91e44e88da350895c270d23f744f0820c818b7229e7", size = 38386204, upload-time = "2025-03-07T01:49:43.612Z" },
+]
+
+[[package]]
+name = "nvidia-nvshmem-cu12"
+version = "3.4.5"
+source = { registry = "https://pypi.org/simple" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/1d/6a/03aa43cc9bd3ad91553a88b5f6fb25ed6a3752ae86ce2180221962bc2aa5/nvidia_nvshmem_cu12-3.4.5-py3-none-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:0b48363fc6964dede448029434c6abed6c5e37f823cb43c3bcde7ecfc0457e15", size = 138936938, upload-time = "2025-09-06T00:32:05.589Z" },
+    { url = "https://files.pythonhosted.org/packages/b5/09/6ea3ea725f82e1e76684f0708bbedd871fc96da89945adeba65c3835a64c/nvidia_nvshmem_cu12-3.4.5-py3-none-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:042f2500f24c021db8a06c5eec2539027d57460e1c1a762055a6554f72c369bd", size = 139103095, upload-time = "2025-09-06T00:32:31.266Z" },
+]
+
+[[package]]
+name = "nvidia-nvtx-cu12"
+version = "12.8.90"
+source = { registry = "https://pypi.org/simple" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/10/c0/1b303feea90d296f6176f32a2a70b5ef230f9bdeb3a72bddb0dc922dc137/nvidia_nvtx_cu12-12.8.90-py3-none-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:d7ad891da111ebafbf7e015d34879f7112832fc239ff0d7d776b6cb685274615", size = 91161, upload-time = "2025-03-07T01:42:23.922Z" },
+    { url = "https://files.pythonhosted.org/packages/a2/eb/86626c1bbc2edb86323022371c39aa48df6fd8b0a1647bc274577f72e90b/nvidia_nvtx_cu12-12.8.90-py3-none-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:5b17e2001cc0d751a5bc2c6ec6d26ad95913324a4adb86788c944f8ce9ba441f", size = 89954, upload-time = "2025-03-07T01:42:44.131Z" },
+]
+
+[[package]]
+name = "omegaconf"
+version = "2.3.0"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "antlr4-python3-runtime" },
+    { name = "pyyaml" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/09/48/6388f1bb9da707110532cb70ec4d2822858ddfb44f1cdf1233c20a80ea4b/omegaconf-2.3.0.tar.gz", hash = "sha256:d5d4b6d29955cc50ad50c46dc269bcd92c6e00f5f90d23ab5fee7bfca4ba4cc7", size = 3298120, upload-time = "2022-12-08T20:59:22.753Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/e3/94/1843518e420fa3ed6919835845df698c7e27e183cb997394e4a670973a65/omegaconf-2.3.0-py3-none-any.whl", hash = "sha256:7b4df175cdb08ba400f45cae3bdcae7ba8365db4d165fc65fd04b050ab63b46b", size = 79500, upload-time = "2022-12-08T20:59:19.686Z" },
+]
+
 [[package]]
 name = "openpyxl"
 version = "3.1.5"
@@ -3915,6 +4167,49 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/51/e5/fecf13f06e5e5f67e8837d777d1bc43fac0ed2b77a676804df5c34744727/python_json_logger-4.0.0-py3-none-any.whl", hash = "sha256:af09c9daf6a813aa4cc7180395f50f2a9e5fa056034c9953aec92e381c5ba1e2", size = 15548, upload-time = "2025-10-06T04:15:17.553Z" },
 ]
 
+[[package]]
+name = "pytorch-lightning"
+version = "2.6.1"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "fsspec", extra = ["http"] },
+    { name = "lightning-utilities" },
+    { name = "packaging" },
+    { name = "pyyaml" },
+    { name = "torch", version = "2.10.0", source = { registry = "https://pypi.org/simple" }, marker = "sys_platform == 'darwin'" },
+    { name = "torch", version = "2.10.0+cu128", source = { registry = "https://download.pytorch.org/whl/cu128" }, marker = "sys_platform != 'darwin'" },
+    { name = "torchmetrics" },
+    { name = "tqdm" },
+    { name = "typing-extensions" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/8b/ac/ebd5f6f58691cbd4f73836e43e1727f3814311b960c41f88e259606ca2b2/pytorch_lightning-2.6.1.tar.gz", hash = "sha256:ba08f8901cf226fcca473046ad9346f414e99117762dc869c76e650d5b3d7bdc", size = 665563, upload-time = "2026-01-30T14:59:11.636Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/0e/93/c8c361bf0a2fe50f828f32def460e8b8a14b93955d3fd302b1a9b63b19e4/pytorch_lightning-2.6.1-py3-none-any.whl", hash = "sha256:1f8118567ec829e3055f16cf1aa320883a86a47c836951bfd9dcfa34ec7ffd59", size = 857273, upload-time = "2026-01-30T14:59:10.141Z" },
+]
+
+[[package]]
+name = "pytorch-tabular"
+version = "1.2.0"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "einops" },
+    { name = "numpy" },
+    { name = "omegaconf" },
+    { name = "pandas" },
+    { name = "pytorch-lightning" },
+    { name = "rich" },
+    { name = "scikit-base" },
+    { name = "scikit-learn" },
+    { name = "scipy" },
+    { name = "torch", version = "2.10.0", source = { registry = "https://pypi.org/simple" }, marker = "sys_platform == 'darwin'" },
+    { name = "torch", version = "2.10.0+cu128", source = { registry = "https://download.pytorch.org/whl/cu128" }, marker = "sys_platform != 'darwin'" },
+    { name = "torchmetrics" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/0b/f2/823de16d6a461504f4ed8e4a555d6ce356e5f81e6525d95e2b64895ec94f/pytorch_tabular-1.2.0.tar.gz", hash = "sha256:1b96b576eb3de443840b313d0b298293eaf83dcfdbba53ed8974b76d1351b821", size = 2312825, upload-time = "2026-01-26T21:48:22.577Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/6f/c9/1e01c682e2ad7132bc1943d8d367c96f241bf85679e76d66eb0c4e4cbde9/pytorch_tabular-1.2.0-py3-none-any.whl", hash = "sha256:0a59f8a2304856b3d1e905f7b66153ebc65df1a6a017f2c8a13a29f62dc95b26", size = 165800, upload-time = "2026-01-26T21:48:21.195Z" },
+]
+
 [[package]]
 name = "pytz"
 version = "2025.2"
@@ -4093,6 +4388,19 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/1e/db/4254e3eabe8020b458f1a747140d32277ec7a271daf1d235b70dc0b4e6e3/requests-2.32.5-py3-none-any.whl", hash = "sha256:2462f94637a34fd532264295e186976db0f5d453d1cdd31473c85a6a161affb6", size = 64738, upload-time = "2025-08-18T20:46:00.542Z" },
 ]
 
+[[package]]
+name = "rich"
+version = "14.3.3"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "markdown-it-py" },
+    { name = "pygments" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/b3/c6/f3b320c27991c46f43ee9d856302c70dc2d0fb2dba4842ff739d5f46b393/rich-14.3.3.tar.gz", hash = "sha256:b8daa0b9e4eef54dd8cf7c86c03713f53241884e814f4e2f5fb342fe520f639b", size = 230582, upload-time = "2026-02-19T17:23:12.474Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/14/25/b208c5683343959b670dc001595f2f3737e051da617f66c31f7c4fa93abc/rich-14.3.3-py3-none-any.whl", hash = "sha256:793431c1f8619afa7d3b52b2cdec859562b950ea0d4b6b505397612db8d5362d", size = 310458, upload-time = "2026-02-19T17:23:13.732Z" },
+]
+
 [[package]]
 name = "rpds-py"
 version = "0.28.0"
@@ -4212,6 +4520,15 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/48/f0/ae7ca09223a81a1d890b2557186ea015f6e0502e9b8cb8e1813f1d8cfa4e/s3transfer-0.14.0-py3-none-any.whl", hash = "sha256:ea3b790c7077558ed1f02a3072fb3cb992bbbd253392f4b6e9e8976941c7d456", size = 85712, upload-time = "2025-09-09T19:23:30.041Z" },
 ]
 
+[[package]]
+name = "scikit-base"
+version = "0.13.1"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/56/a8/610f99f01f326178b8a7347db2ede654b42548e9697b516480cc081e344d/scikit_base-0.13.1.tar.gz", hash = "sha256:169e5427233f7237b38c7d858bf07b8a86bbf59feccf0708e26dad4ac312c593", size = 134482, upload-time = "2026-01-25T11:31:38.814Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/e3/55/c20d8319aab037e11f1d6403b6102d1041694abe24a3aa4a1e27f2cdb9f2/scikit_base-0.13.1-py3-none-any.whl", hash = "sha256:1aca86759435fd2d32d83a526ce11095119c0745e4e5dd91f2e5820023ca8e39", size = 159779, upload-time = "2026-01-25T11:31:36.759Z" },
+]
+
 [[package]]
 name = "scikit-learn"
 version = "1.7.2"
@@ -4486,6 +4803,18 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/39/60/868371b6482ccd9ef423c6f62650066cf8271fdb2ee84f192695ad6b7a96/streamlit-1.51.0-py3-none-any.whl", hash = "sha256:4008b029f71401ce54946bb09a6a3e36f4f7652cbb48db701224557738cfda38", size = 10171702, upload-time = "2025-10-29T17:07:35.97Z" },
 ]
 
+[[package]]
+name = "sympy"
+version = "1.14.0"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "mpmath" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/83/d3/803453b36afefb7c2bb238361cd4ae6125a569b4db67cd9e79846ba2d68c/sympy-1.14.0.tar.gz", hash = "sha256:d3d3fe8df1e5a0b42f0e7bdf50541697dbe7d23746e894990c030e2b05e72517", size = 7793921, upload-time = "2025-04-27T18:05:01.611Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/a2/09/77d55d46fd61b4a135c444fc97158ef34a095e5681d0a6c10b75bf356191/sympy-1.14.0-py3-none-any.whl", hash = "sha256:e091cc3e99d2141a0ba2847328f5479b05d94a6635cb96148ccb3f34671bd8f5", size = 6299353, upload-time = "2025-04-27T18:04:59.103Z" },
+]
+
 [[package]]
 name = "tables"
 version = "3.10.2"
@@ -4660,6 +4989,103 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/bd/75/8539d011f6be8e29f339c42e633aae3cb73bffa95dd0f9adec09b9c58e85/tomlkit-0.13.3-py3-none-any.whl", hash = "sha256:c89c649d79ee40629a9fda55f8ace8c6a1b42deb912b2a8fd8d942ddadb606b0", size = 38901, upload-time = "2025-06-05T07:13:43.546Z" },
 ]
 
+[[package]]
+name = "torch"
+version = "2.10.0"
+source = { registry = "https://pypi.org/simple" }
+resolution-markers = [
+    "python_full_version >= '3.14' and sys_platform == 'darwin'",
+    "python_full_version < '3.14' and sys_platform == 'darwin'",
+]
+dependencies = [
+    { name = "filelock", marker = "sys_platform == 'darwin'" },
+    { name = "fsspec", marker = "sys_platform == 'darwin'" },
+    { name = "jinja2", marker = "sys_platform == 'darwin'" },
+    { name = "networkx", marker = "sys_platform == 'darwin'" },
+    { name = "setuptools", marker = "sys_platform == 'darwin'" },
+    { name = "sympy", marker = "sys_platform == 'darwin'" },
+    { name = "typing-extensions", marker = "sys_platform == 'darwin'" },
+]
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/d3/54/a2ba279afcca44bbd320d4e73675b282fcee3d81400ea1b53934efca6462/torch-2.10.0-2-cp312-none-macosx_11_0_arm64.whl", hash = "sha256:13ec4add8c3faaed8d13e0574f5cd4a323c11655546f91fbe6afa77b57423574", size = 79498202, upload-time = "2026-02-10T21:44:52.603Z" },
+    { url = "https://files.pythonhosted.org/packages/ec/23/2c9fe0c9c27f7f6cb865abcea8a4568f29f00acaeadfc6a37f6801f84cb4/torch-2.10.0-2-cp313-none-macosx_11_0_arm64.whl", hash = "sha256:e521c9f030a3774ed770a9c011751fb47c4d12029a3d6522116e48431f2ff89e", size = 79498254, upload-time = "2026-02-10T21:44:44.095Z" },
+    { url = "https://files.pythonhosted.org/packages/c9/5c/dee910b87c4d5c0fcb41b50839ae04df87c1cfc663cf1b5fca7ea565eeaa/torch-2.10.0-cp312-none-macosx_11_0_arm64.whl", hash = "sha256:6d3707a61863d1c4d6ebba7be4ca320f42b869ee657e9b2c21c736bf17000294", size = 79498198, upload-time = "2026-01-21T16:24:34.704Z" },
+    { url = "https://files.pythonhosted.org/packages/1a/0b/39929b148f4824bc3ad6f9f72a29d4ad865bcf7ebfc2fa67584773e083d2/torch-2.10.0-cp313-cp313t-macosx_14_0_arm64.whl", hash = "sha256:3202429f58309b9fa96a614885eace4b7995729f44beb54d3e4a47773649d382", size = 79851305, upload-time = "2026-01-21T16:24:09.209Z" },
+    { url = "https://files.pythonhosted.org/packages/0e/13/e76b4d9c160e89fff48bf16b449ea324bda84745d2ab30294c37c2434c0d/torch-2.10.0-cp313-none-macosx_11_0_arm64.whl", hash = "sha256:cdf2a523d699b70d613243211ecaac14fe9c5df8a0b0a9c02add60fb2a413e0f", size = 79498248, upload-time = "2026-01-21T16:23:09.315Z" },
+    { url = "https://files.pythonhosted.org/packages/4f/93/716b5ac0155f1be70ed81bacc21269c3ece8dba0c249b9994094110bfc51/torch-2.10.0-cp314-cp314-macosx_14_0_arm64.whl", hash = "sha256:bf0d9ff448b0218e0433aeb198805192346c4fd659c852370d5cc245f602a06a", size = 79464992, upload-time = "2026-01-21T16:23:05.162Z" },
+    { url = "https://files.pythonhosted.org/packages/d8/94/71994e7d0d5238393df9732fdab607e37e2b56d26a746cb59fdb415f8966/torch-2.10.0-cp314-cp314t-macosx_14_0_arm64.whl", hash = "sha256:f5ab4ba32383061be0fb74bda772d470140a12c1c3b58a0cfbf3dae94d164c28", size = 79850324, upload-time = "2026-01-21T16:22:09.494Z" },
+]
+
+[[package]]
+name = "torch"
+version = "2.10.0+cu128"
+source = { registry = "https://download.pytorch.org/whl/cu128" }
+resolution-markers = [
+    "python_full_version >= '3.14' and sys_platform == 'linux'",
+    "python_full_version < '3.14' and sys_platform == 'linux'",
+    "python_full_version >= '3.14' and sys_platform != 'darwin' and sys_platform != 'linux'",
+    "python_full_version < '3.14' and sys_platform != 'darwin' and sys_platform != 'linux'",
+]
+dependencies = [
+    { name = "cuda-bindings", marker = "sys_platform == 'linux'" },
+    { name = "filelock", marker = "sys_platform != 'darwin'" },
+    { name = "fsspec", marker = "sys_platform != 'darwin'" },
+    { name = "jinja2", marker = "sys_platform != 'darwin'" },
+    { name = "networkx", marker = "sys_platform != 'darwin'" },
+    { name = "nvidia-cublas-cu12", marker = "sys_platform == 'linux'" },
+    { name = "nvidia-cuda-cupti-cu12", marker = "sys_platform == 'linux'" },
+    { name = "nvidia-cuda-nvrtc-cu12", marker = "sys_platform == 'linux'" },
+    { name = "nvidia-cuda-runtime-cu12", marker = "sys_platform == 'linux'" },
+    { name = "nvidia-cudnn-cu12", marker = "sys_platform == 'linux'" },
+    { name = "nvidia-cufft-cu12", marker = "sys_platform == 'linux'" },
+    { name = "nvidia-cufile-cu12", marker = "sys_platform == 'linux'" },
+    { name = "nvidia-curand-cu12", marker = "sys_platform == 'linux'" },
+    { name = "nvidia-cusolver-cu12", marker = "sys_platform == 'linux'" },
+    { name = "nvidia-cusparse-cu12", marker = "sys_platform == 'linux'" },
+    { name = "nvidia-cusparselt-cu12", marker = "sys_platform == 'linux'" },
+    { name = "nvidia-nccl-cu12", marker = "sys_platform == 'linux'" },
+    { name = "nvidia-nvjitlink-cu12", marker = "sys_platform == 'linux'" },
+    { name = "nvidia-nvshmem-cu12", marker = "sys_platform == 'linux'" },
+    { name = "nvidia-nvtx-cu12", marker = "sys_platform == 'linux'" },
+    { name = "setuptools", marker = "sys_platform != 'darwin'" },
+    { name = "sympy", marker = "sys_platform != 'darwin'" },
+    { name = "triton", marker = "sys_platform == 'linux'" },
+    { name = "typing-extensions", marker = "sys_platform != 'darwin'" },
+]
+wheels = [
+    { url = "https://download.pytorch.org/whl/cu128/torch-2.10.0%2Bcu128-cp312-cp312-manylinux_2_28_aarch64.whl", hash = "sha256:6f09cdf2415516be028ae82e6b985bcfc3eac37bc52ab401142689f6224516ca" },
+    { url = "https://download.pytorch.org/whl/cu128/torch-2.10.0%2Bcu128-cp312-cp312-manylinux_2_28_x86_64.whl", hash = "sha256:628e89bd5110ced7debee2a57c69959725b7fbc64eab81a39dd70e46c7e28ba5" },
+    { url = "https://download.pytorch.org/whl/cu128/torch-2.10.0%2Bcu128-cp312-cp312-win_amd64.whl", hash = "sha256:fbde8f6a9ec8c76979a0d14df21c10b9e5cab6f0d106a73ca73e2179bc597cae" },
+    { url = "https://download.pytorch.org/whl/cu128/torch-2.10.0%2Bcu128-cp313-cp313-manylinux_2_28_aarch64.whl", hash = "sha256:bdbcc703382f948e951c063448c9406bf38ce66c41dd698d9e2733fcf96c037a" },
+    { url = "https://download.pytorch.org/whl/cu128/torch-2.10.0%2Bcu128-cp313-cp313-manylinux_2_28_x86_64.whl", hash = "sha256:7b4bd23ed63de97456fcc81c26fea9f02ee02ce1112111c4dac0d8cfe574b23e" },
+    { url = "https://download.pytorch.org/whl/cu128/torch-2.10.0%2Bcu128-cp313-cp313-win_amd64.whl", hash = "sha256:4d1b0b49c54223c7c04050b49eac141d77b6edbc34aea1dfc74a6fdb661baa8c" },
+    { url = "https://download.pytorch.org/whl/cu128/torch-2.10.0%2Bcu128-cp313-cp313t-manylinux_2_28_aarch64.whl", hash = "sha256:f1f8b840c64b645a4bc61a393db48effb9c92b2dc26c8373873911f0750d1ea7" },
+    { url = "https://download.pytorch.org/whl/cu128/torch-2.10.0%2Bcu128-cp313-cp313t-manylinux_2_28_x86_64.whl", hash = "sha256:23f58258012bcf1c349cb22af387e33aadca7f83ea617b080e774eb41e4fe8ff" },
+    { url = "https://download.pytorch.org/whl/cu128/torch-2.10.0%2Bcu128-cp313-cp313t-win_amd64.whl", hash = "sha256:01b216e097b17a5277cfb47c383cdcacf06abeadcb0daca0c76b59e72854c3b6" },
+    { url = "https://download.pytorch.org/whl/cu128/torch-2.10.0%2Bcu128-cp314-cp314-manylinux_2_28_aarch64.whl", hash = "sha256:c42377bc2607e3e1c60da71b792fb507c3938c87fd6edab8b21c59c91473c36d" },
+    { url = "https://download.pytorch.org/whl/cu128/torch-2.10.0%2Bcu128-cp314-cp314-manylinux_2_28_x86_64.whl", hash = "sha256:37d71feea068776855686a1512058df3f19f6f040a151f055aa746601678744f" },
+    { url = "https://download.pytorch.org/whl/cu128/torch-2.10.0%2Bcu128-cp314-cp314-win_amd64.whl", hash = "sha256:c57017ca29e62271e362fdeee7d20070e254755a5148b30b553d8a10fc83c7ef" },
+    { url = "https://download.pytorch.org/whl/cu128/torch-2.10.0%2Bcu128-cp314-cp314t-manylinux_2_28_aarch64.whl", hash = "sha256:777461f50b2daf77e4bdd8e2ad34bdfc5a993bf1bdf2ab9ef39f5edfe4e9c12b" },
+    { url = "https://download.pytorch.org/whl/cu128/torch-2.10.0%2Bcu128-cp314-cp314t-manylinux_2_28_x86_64.whl", hash = "sha256:7bcba6a7c5f0987a13298b1ca843155dcceceac758fa3c7ccd5c7af4059a1080" },
+    { url = "https://download.pytorch.org/whl/cu128/torch-2.10.0%2Bcu128-cp314-cp314t-win_amd64.whl", hash = "sha256:70d89143c956389d4806cb4e5fe0b1129fe0db280e1073288d17fa76c101cba4" },
+]
+
+[[package]]
+name = "torchmetrics"
+version = "1.8.2"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "lightning-utilities" },
+    { name = "numpy" },
+    { name = "packaging" },
+    { name = "torch", version = "2.10.0", source = { registry = "https://pypi.org/simple" }, marker = "sys_platform == 'darwin'" },
+    { name = "torch", version = "2.10.0+cu128", source = { registry = "https://download.pytorch.org/whl/cu128" }, marker = "sys_platform != 'darwin'" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/85/2e/48a887a59ecc4a10ce9e8b35b3e3c5cef29d902c4eac143378526e7485cb/torchmetrics-1.8.2.tar.gz", hash = "sha256:cf64a901036bf107f17a524009eea7781c9c5315d130713aeca5747a686fe7a5", size = 580679, upload-time = "2025-09-03T14:00:54.077Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/02/21/aa0f434434c48490f91b65962b1ce863fdcce63febc166ca9fe9d706c2b6/torchmetrics-1.8.2-py3-none-any.whl", hash = "sha256:08382fd96b923e39e904c4d570f3d49e2cc71ccabd2a94e0f895d1f0dac86242", size = 983161, upload-time = "2025-09-03T14:00:51.921Z" },
+]
+
 [[package]]
 name = "tornado"
 version = "6.5.2"
@@ -4712,6 +5138,23 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/61/7a/f38385f1b2d5f54221baf1db3d6371dc6eef8041d95abff39576c694e9d9/transforms3d-0.4.2-py3-none-any.whl", hash = "sha256:1c70399d9e9473ecc23311fd947f727f7c69ed0b063244828c383aa1aefa5941", size = 1376759, upload-time = "2024-06-20T11:09:19.43Z" },
 ]
 
+[[package]]
+name = "triton"
+version = "3.6.0"
+source = { registry = "https://pypi.org/simple" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/17/5d/08201db32823bdf77a0e2b9039540080b2e5c23a20706ddba942924ebcd6/triton-3.6.0-cp312-cp312-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:374f52c11a711fd062b4bfbb201fd9ac0a5febd28a96fb41b4a0f51dde3157f4", size = 176128243, upload-time = "2026-01-20T16:16:07.857Z" },
+    { url = "https://files.pythonhosted.org/packages/ab/a8/cdf8b3e4c98132f965f88c2313a4b493266832ad47fb52f23d14d4f86bb5/triton-3.6.0-cp312-cp312-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:74caf5e34b66d9f3a429af689c1c7128daba1d8208df60e81106b115c00d6fca", size = 188266850, upload-time = "2026-01-20T16:00:43.041Z" },
+    { url = "https://files.pythonhosted.org/packages/3c/12/34d71b350e89a204c2c7777a9bba0dcf2f19a5bfdd70b57c4dbc5ffd7154/triton-3.6.0-cp313-cp313-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:448e02fe6dc898e9e5aa89cf0ee5c371e99df5aa5e8ad976a80b93334f3494fd", size = 176133521, upload-time = "2026-01-20T16:16:13.321Z" },
+    { url = "https://files.pythonhosted.org/packages/f9/0b/37d991d8c130ce81a8728ae3c25b6e60935838e9be1b58791f5997b24a54/triton-3.6.0-cp313-cp313-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:10c7f76c6e72d2ef08df639e3d0d30729112f47a56b0c81672edc05ee5116ac9", size = 188289450, upload-time = "2026-01-20T16:00:49.136Z" },
+    { url = "https://files.pythonhosted.org/packages/ce/4e/41b0c8033b503fd3cfcd12392cdd256945026a91ff02452bef40ec34bee7/triton-3.6.0-cp313-cp313t-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:1722e172d34e32abc3eb7711d0025bb69d7959ebea84e3b7f7a341cd7ed694d6", size = 176276087, upload-time = "2026-01-20T16:16:18.989Z" },
+    { url = "https://files.pythonhosted.org/packages/35/f8/9c66bfc55361ec6d0e4040a0337fb5924ceb23de4648b8a81ae9d33b2b38/triton-3.6.0-cp313-cp313t-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:d002e07d7180fd65e622134fbd980c9a3d4211fb85224b56a0a0efbd422ab72f", size = 188400296, upload-time = "2026-01-20T16:00:56.042Z" },
+    { url = "https://files.pythonhosted.org/packages/49/55/5ecf0dcaa0f2fbbd4420f7ef227ee3cb172e91e5fede9d0ecaddc43363b4/triton-3.6.0-cp314-cp314-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:ef5523241e7d1abca00f1d240949eebdd7c673b005edbbce0aca95b8191f1d43", size = 176138577, upload-time = "2026-01-20T16:16:25.426Z" },
+    { url = "https://files.pythonhosted.org/packages/df/3d/9e7eee57b37c80cec63322c0231bb6da3cfe535a91d7a4d64896fcb89357/triton-3.6.0-cp314-cp314-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:a17a5d5985f0ac494ed8a8e54568f092f7057ef60e1b0fa09d3fd1512064e803", size = 188273063, upload-time = "2026-01-20T16:01:07.278Z" },
+    { url = "https://files.pythonhosted.org/packages/48/db/56ee649cab5eaff4757541325aca81f52d02d4a7cd3506776cad2451e060/triton-3.6.0-cp314-cp314t-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:0b3a97e8ed304dfa9bd23bb41ca04cdf6b2e617d5e782a8653d616037a5d537d", size = 176274804, upload-time = "2026-01-20T16:16:31.528Z" },
+    { url = "https://files.pythonhosted.org/packages/f6/56/6113c23ff46c00aae423333eb58b3e60bdfe9179d542781955a5e1514cb3/triton-3.6.0-cp314-cp314t-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:46bd1c1af4b6704e554cad2eeb3b0a6513a980d470ccfa63189737340c7746a7", size = 188397994, upload-time = "2026-01-20T16:01:14.236Z" },
+]
+
 [[package]]
 name = "tsam"
 version = "2.3.9"