RapidFireAI · david-rfai · Jan 6, 2026 · Nov 20, 2025 · Dec 2, 2025 · Dec 12, 2025
diff --git a/README.md b/README.md
@@ -161,25 +161,26 @@ Built-in procedures for searching over configuration knob combinations, includin
 
 ```text
 rapidfireai/
-├── fit
-    ├── automl/          # Search and AutoML algorithms for knob tuning
-    ├── backend/         # Core backend components (controller, scheduler, worker)
-    ├── db/              # Database interface and SQLite operations
-    ├── dispatcher/      # Flask-based web API for UI communication
-    ├── frontend/        # Frontend components (dashboard, IC Ops implementation)
-    ├── ml/              # ML training utilities and trainer classes
-    └── utils/           # Utility functions and helper modules
+├── automl/              # Search and AutoML algorithms for knob tuning
+├── cli.py               # CLI script
 ├── evals
     ├── actors/          # Ray-based workers for doc and query processing  
-    ├── automl/          # Search and AutoML algorithms for knob tuning
     ├── data/            # Data sharding and handling
     ├── db/              # Database interface and SQLite operations
     ├── dispatcher/      # Flask-based web API for UI communication
     ├── metrics/         # Online aggregation logic and metrics handling
     ├── rag/             # Stages of RAG pipeline
     ├── scheduling/      # Fair scheduler for multi-config resource sharing
     └── utils/           # Utility functions and helper modules
-└── experiment.py        # Main experiment lifecycle management
+├── experiment.py        # Main experiment lifecycle management
+├── fit
+    ├── backend/         # Core backend components (controller, scheduler, worker)
+    ├── db/              # Database interface and SQLite operations
+    ├── dispatcher/      # Flask-based web API for UI communication
+    ├── frontend/        # Frontend components (dashboard, IC Ops implementation)
+    ├── ml/              # ML training utilities and trainer classes
+    └── utils/           # Utility functions and helper modules
+└── utils.py             # Utility functions and helper modules
 ```
 
 ## Architecture
@@ -327,7 +328,9 @@ used to overwrite the defaults.
 - `RF_LOG_FILENAME` - Default log file name (default: rapidfire.log)
 - `RF_TRAINING_LOG_FILENAME` - Default training log file name (default: training.log)
 - `RF_DB_PATH` - Base directory for database files (default: ${RF_HOME}/db)
-- `RF_TRACKING_BACKEND` - Tracking backend used (default: mlflow on Non-Google Colab and tensorboard on Google Colab)
+- `RF_MLFLOW_ENABLED` - Enable MLFlow tracking backend
+- `RF_TENSORBOARD_ENABLED` - Enable Tensorboard tracking backend
+- `RF_TRACKIO_ENABLED` - Enable TrackIO tracking backend
 - `RF_COLAB_MODE` - Whether running on colab (default: false on Non-Google Colab and true on Google Colab)
 - `RF_TUTORIAL_PATH` - Location that `rapidfireai init` copies `tutorial_notebooks` to (default: ./tutorial_notebooks)
 - `RF_TEST_PATH` - Location that `rapidfireai --test-noteobooks` copies test notebooks to (default: ./tutorial_notebooks/tests)

diff --git a/pyproject.toml b/pyproject.toml
@@ -25,21 +25,40 @@ classifiers = [
     "Topic :: Software Development :: Libraries :: Python Modules",
     "Topic :: Software Development :: Libraries :: Application Frameworks",
 ]
+# dependencies = [
+#     # REST API (Dispatcher)
+#     "flask>=3.1.1",
+#     "flask-cors>=6.0.1",
+#     "waitress>=3.0.2",
+
+#     # JSON Query Tool
+#     "jq>=1.10.0",
+#     # "protobuf==5.29.5",
+
+#     # Other
+#     "dill>=0.3.0,<0.3.9",
+#     "jedi>=0.16",
+#     # "pytest>=8.4.2",
+#     "uv>=0.8.14",
+# ]
 dependencies = [
     # REST API (Dispatcher)
-    "flask>=3.1.1",
-    "flask-cors>=6.0.1",
-    "waitress>=3.0.2",
+    "flask",
+    "flask-cors",
+    "waitress",
 
     # JSON Query Tool
-    "jq>=1.10.0",
+    "jq",
     # "protobuf==5.29.5",
 
     # Other
-    "dill>=0.3.0,<0.3.9",
-    "jedi>=0.16",
+    "dill",
+    "jedi",
     # "pytest>=8.4.2",
-    "uv>=0.8.14",
+    "uv",
+    "trackio",
+    "mlflow",
+    "fsspec<=2025.10.0",
 ]
 
 [project.optional-dependencies]
@@ -104,7 +123,7 @@ local_evals = [
     "ray==2.44.1",
 
     # LLM Inference
-    "transformers==4.56.1",
+    "transformers>=4.56.1",
     "vllm==0.7.2",
 
     # OpenAI API

diff --git a/rapidfireai/__init__.py b/rapidfireai/__init__.py
@@ -7,13 +7,6 @@
 __author__ = "RapidFire AI Inc."
 __email__ = "support@rapidfire.ai"
 
-# Core imports - always available
-# from rapidfireai.experiment import Experiment
-
-# Optional evals imports - gracefully handle missing dependencies
-# get_dispatcher_url = None
-# get_dispatcher_headers = None
-# get_colab_auth_token = None
 
 try:
     from rapidfireai.experiment import Experiment
@@ -47,7 +40,4 @@ def __repr__(self):
     "Experiment",
     "__version__",
     "__version_info__",
-    # "get_dispatcher_url",
-    # "get_dispatcher_headers",
-    # "get_colab_auth_token",
 ]
diff --git a/rapidfireai/automl/model_config.py b/rapidfireai/automl/model_config.py
@@ -1,7 +1,5 @@
 """Model configuration for AutoML training and evaluation."""
-
-# from __future__ import annotations
-
+from __future__ import annotations
 import copy
 import inspect
 from abc import ABC, abstractmethod
@@ -11,6 +9,7 @@
 
 from rapidfireai.automl.datatypes import List, Range
 
+
 # Fit mode dependencies (peft, trl)
 try:
     from peft import LoraConfig
@@ -140,8 +139,8 @@ class RFModelConfig:
     formatting_func: Callable | List | None = None
     compute_metrics: Callable | List | None = None
     peft_config: RFLoraConfig | List | None = None
-    # training_args: RFSFTConfig | RFDPOConfig | RFGRPOConfig | None = None
-    training_args = None
+    training_args: RFSFTConfig | RFDPOConfig | RFGRPOConfig | None = None
+    # training_args = None
     model_type: str | None = "causal_lm"
     model_kwargs: dict[str, Any] | None = None
     ref_model_name: str | None = None

diff --git a/rapidfireai/cli.py b/rapidfireai/cli.py
@@ -397,10 +397,12 @@ def main():
     parser.add_argument("--version", action="version", version=f"RapidFire AI {__version__}")
 
     parser.add_argument(
-        "--tracking-backend",
-        choices=["mlflow", "tensorboard", "both"],
-        default=os.getenv("RF_TRACKING_BACKEND", "mlflow" if not ColabConfig.ON_COLAB else "tensorboard"),
-        help="Tracking backend to use for metrics (default: mlflow)",
+        "--tracking-backends",
+        choices=["mlflow", "tensorboard", "trackio"],
+        default=["mlflow"] if not ColabConfig.ON_COLAB else ["tensorboard"],
+        help="Tracking backend to use for metrics (default: mlflow on Non-Google Colab and tensorboard on Google Colab)",
+        nargs="*",
+        action="extend"
     )
 
     parser.add_argument(
@@ -421,6 +423,8 @@ def main():
         help="Copy test notebooks to the tutorial_notebooks directory",
     )
 
+    parser.add_argument("--force", "-f", action="store_true", help="Force action without confirmation")
+
     parser.add_argument("--evals", action="store_true", help="Initialize with evaluation dependencies")
 
     parser.add_argument("--log-lines", type=int, default=10, help="Number of lines to log to the console")
@@ -429,14 +433,26 @@ def main():
 
     # Set environment variables from CLI args
 
-    if args.tracking_backend:
-        os.environ["RF_TRACKING_BACKEND"] = args.tracking_backend
+    if args.tracking_backends:
+        os.environ["RF_MLFLOW_ENABLED"] = "false"
+        os.environ["RF_TENSORBOARD_ENABLED"] = "false"
+        os.environ["RF_TRACKIO_ENABLED"] = "false"
+        if "mlflow" in args.tracking_backends:
+            os.environ["RF_MLFLOW_ENABLED"] = "true"
+        if "tensorboard" in args.tracking_backends:
+            os.environ["RF_TENSORBOARD_ENABLED"] = "true"
+        if "trackio" in args.tracking_backends:
+            os.environ["RF_TRACKIO_ENABLED"] = "true"
     if args.tensorboard_log_dir:
         os.environ["RF_TENSORBOARD_LOG_DIR"] = args.tensorboard_log_dir
     if args.colab:
         os.environ["RF_COLAB_MODE"] = "true"
     elif ColabConfig.ON_COLAB and os.getenv("RF_COLAB_MODE") is None:
         os.environ["RF_COLAB_MODE"] = "true"
+
+    # Handle force command separately
+    if args.force:
+        os.environ["RF_FORCE"] = "true"
 
     # Handle doctor command separately
     if args.command == "doctor":

diff --git a/rapidfireai/evals/db/rf_db.py b/rapidfireai/evals/db/rf_db.py
@@ -43,12 +43,22 @@ def _initialize_schema(self):
                 self.db.conn.executescript(schema_sql)
                 self.db.conn.commit()
 
-        # Migration: Add mlflow_run_id column to pipelines table if it doesn't exist
+        # Migration: Add metric_run_id to pipelines table if they don't exist
         try:
             cursor = self.db.conn.execute("PRAGMA table_info(pipelines)")
             columns = [row[1] for row in cursor.fetchall()]
-            if "mlflow_run_id" not in columns:
-                self.db.conn.execute("ALTER TABLE pipelines ADD COLUMN mlflow_run_id TEXT")
+            if "metric_run_id" not in columns:
+                self.db.conn.execute("ALTER TABLE pipelines ADD COLUMN metric_run_id TEXT")
+                self.db.conn.commit()
+        except Exception:
+            pass
+
+        # Migration: Add metric_experiment_id to experiments table if they don't exist
+        try:
+            cursor = self.db.conn.execute("PRAGMA table_info(experiments)")
+            columns = [row[1] for row in cursor.fetchall()]
+            if "metric_experiment_id" not in columns:
+                self.db.conn.execute("ALTER TABLE experiments ADD COLUMN metric_experiment_id TEXT")
                 self.db.conn.commit()
         except Exception:
             pass
@@ -67,7 +77,7 @@ def create_experiment(
         num_actors: int,
         num_cpus: int = None,
         num_gpus: int = None,
-        mlflow_experiment_id: str = None,
+        metric_experiment_id: str = None,
         status: ExperimentStatus = ExperimentStatus.RUNNING,
         num_shards: int = 0,
     ) -> int:
@@ -79,7 +89,7 @@ def create_experiment(
             num_actors: Number of query processing actors
             num_cpus: Number of CPUs allocated
             num_gpus: Number of GPUs allocated
-            mlflow_experiment_id: Optional MLflow experiment ID
+            metric_experiment_id: Optional MetricLogger experiment ID
             status: Initial status (default: ExperimentStatus.RUNNING)
             num_shards: Number of shards for the dataset (default: 0)
 
@@ -89,7 +99,7 @@ def create_experiment(
         query = """
         INSERT INTO experiments (
             experiment_name, num_actors, num_shards, num_cpus, num_gpus,
-            mlflow_experiment_id, status, error
+            metric_experiment_id, status, error
         ) VALUES (?, ?, ?, ?, ?, ?, ?, '')
         """
         self.db.execute(
@@ -100,7 +110,7 @@ def create_experiment(
                 num_shards,
                 num_cpus,
                 num_gpus,
-                mlflow_experiment_id,
+                metric_experiment_id,
                 status.value,
             ),
             commit=True,
@@ -241,7 +251,7 @@ def get_experiment(self, experiment_id: int) -> dict[str, Any] | None:
         """
         query = """
         SELECT experiment_id, experiment_name, num_actors, num_cpus, num_gpus,
-               mlflow_experiment_id, status, num_shards, error, created_at
+               metric_experiment_id, status, num_shards, error, created_at
         FROM experiments
         WHERE experiment_id = ?
         """
@@ -254,7 +264,7 @@ def get_experiment(self, experiment_id: int) -> dict[str, Any] | None:
                 "num_actors": row[2],
                 "num_cpus": row[3],
                 "num_gpus": row[4],
-                "mlflow_experiment_id": row[5],
+                "metric_experiment_id": row[5],
                 "status": row[6],
                 "num_shards": row[7],
                 "error": row[8],
@@ -295,7 +305,7 @@ def get_running_experiment(self) -> dict[str, Any] | None:
             Dictionary with all experiment fields, or None if no running experiment
         """
         query = """
-        SELECT experiment_id, experiment_name, mlflow_experiment_id, num_shards,
+        SELECT experiment_id, experiment_name, metric_experiment_id, num_shards,
                num_actors, num_cpus, num_gpus, status, error, created_at
         FROM experiments
         WHERE status = ?
@@ -308,7 +318,7 @@ def get_running_experiment(self) -> dict[str, Any] | None:
             return {
                 "experiment_id": row[0],
                 "experiment_name": row[1],
-                "mlflow_experiment_id": row[2],
+                "metric_experiment_id": row[2],
                 "num_shards": row[3],
                 "num_actors": row[4],
                 "num_cpus": row[5],
@@ -509,7 +519,7 @@ def create_pipeline(
         INSERT INTO pipelines (
             context_id, pipeline_type,
             pipeline_config, pipeline_config_json, status, error,
-            current_shard_id, shards_completed, total_samples_processed, mlflow_run_id
+            current_shard_id, shards_completed, total_samples_processed, metric_run_id
         ) VALUES (?, ?, ?, ?, ?, '', '', 0, 0, NULL)
         """
         self.db.execute(
@@ -538,7 +548,7 @@ def set_pipeline_progress(self, pipeline_id: int) -> dict[str, Any] | None:
         query = """
         SELECT pipeline_id, context_id, pipeline_type,
                pipeline_config, pipeline_config_json, status, current_shard_id,
-               shards_completed, total_samples_processed, mlflow_run_id, error, created_at
+               shards_completed, total_samples_processed, metric_run_id, error, created_at
         FROM pipelines
         WHERE pipeline_id = ?
         """
@@ -561,7 +571,7 @@ def set_pipeline_progress(self, pipeline_id: int) -> dict[str, Any] | None:
                 "current_shard_id": row[6],
                 "shards_completed": row[7],
                 "total_samples_processed": row[8],
-                "mlflow_run_id": row[9],
+                "metric_run_id": row[9],
                 "error": row[10],
                 "created_at": row[11],
             }
@@ -580,7 +590,7 @@ def get_pipeline(self, pipeline_id: int) -> dict[str, Any] | None:
         query = """
         SELECT pipeline_id, context_id, pipeline_type,
                pipeline_config, pipeline_config_json, status, current_shard_id,
-               shards_completed, total_samples_processed, mlflow_run_id, error, created_at
+               shards_completed, total_samples_processed, metric_run_id, error, created_at
         FROM pipelines
         WHERE pipeline_id = ?
         """
@@ -603,7 +613,7 @@ def get_pipeline(self, pipeline_id: int) -> dict[str, Any] | None:
                 "current_shard_id": row[6],
                 "shards_completed": row[7],
                 "total_samples_processed": row[8],
-                "mlflow_run_id": row[9],
+                "metric_run_id": row[9],
                 "error": row[10],
                 "created_at": row[11],
             }
@@ -673,7 +683,7 @@ def get_all_pipelines(self) -> list[dict[str, Any]]:
         query = """
         SELECT pipeline_id, context_id, pipeline_type,
                pipeline_config, pipeline_config_json, status, current_shard_id,
-               shards_completed, total_samples_processed, mlflow_run_id, error, created_at
+               shards_completed, total_samples_processed, metric_run_id, error, created_at
         FROM pipelines
         ORDER BY pipeline_id DESC
         """
@@ -698,7 +708,7 @@ def get_all_pipelines(self) -> list[dict[str, Any]]:
                         "current_shard_id": row[6],
                         "shards_completed": row[7],
                         "total_samples_processed": row[8],
-                        "mlflow_run_id": row[9],
+                        "metric_run_id": row[9],
                         "error": row[10],
                         "created_at": row[11],
                     }
@@ -765,16 +775,16 @@ def set_pipeline_error(self, pipeline_id: int, error: str):
         query = "UPDATE pipelines SET error = ? WHERE pipeline_id = ?"
         self.db.execute(query, (error, pipeline_id), commit=True)
 
-    def set_pipeline_mlflow_run_id(self, pipeline_id: int, mlflow_run_id: str):
+    def set_pipeline_metric_run_id(self, pipeline_id: int, metric_run_id: str):
         """
-        Set MLflow run ID for a pipeline.
+        Set MetricLogger run ID for a pipeline.
 
         Args:
             pipeline_id: ID of the pipeline
-            mlflow_run_id: MLflow run ID
+            metric_run_id: MetricLogger run ID
         """
-        query = "UPDATE pipelines SET mlflow_run_id = ? WHERE pipeline_id = ?"
-        self.db.execute(query, (mlflow_run_id, pipeline_id), commit=True)
+        query = "UPDATE pipelines SET metric_run_id = ? WHERE pipeline_id = ?"
+        self.db.execute(query, (metric_run_id, pipeline_id), commit=True)
 
     # ============================================================================
     # ACTOR_TASKS TABLE METHODS