From 5d951d40dd1a2963f03998eff337353174c5a4dd Mon Sep 17 00:00:00 2001
From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com>
Date: Tue, 30 Dec 2025 07:38:33 +0000
Subject: [PATCH 1/4] Initial plan


From 785234063201d364c4f34c5998832bd16da15784 Mon Sep 17 00:00:00 2001
From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com>
Date: Tue, 30 Dec 2025 07:45:52 +0000
Subject: [PATCH 2/4] Add comprehensive tests for GradientAccumulation callback

Co-authored-by: henrytsui000 <54672031+henrytsui000@users.noreply.github.com>
---
 tests/test_utils/test_model_utils.py | 296 +++++++++++++++++++++++++++
 1 file changed, 296 insertions(+)
 create mode 100644 tests/test_utils/test_model_utils.py

diff --git a/tests/test_utils/test_model_utils.py b/tests/test_utils/test_model_utils.py
new file mode 100644
index 000000000..8b9353384
--- /dev/null
+++ b/tests/test_utils/test_model_utils.py
@@ -0,0 +1,296 @@
+import sys
+from pathlib import Path
+from unittest.mock import MagicMock, Mock
+
+import pytest
+import torch
+
+project_root = Path(__file__).resolve().parent.parent.parent
+sys.path.append(str(project_root))
+
+from yolo.config.config import DataConfig, SchedulerConfig
+from yolo.utils.model_utils import GradientAccumulation, lerp
+
+
+class TestLerp:
+    """Test the lerp (linear interpolation) function."""
+
+    def test_lerp_basic(self):
+        """Test basic linear interpolation."""
+        assert lerp(0, 10, 0, 10) == 0
+        assert lerp(0, 10, 5, 10) == 5
+        assert lerp(0, 10, 10, 10) == 10
+
+    def test_lerp_fractional(self):
+        """Test linear interpolation with fractional steps."""
+        assert lerp(0, 100, 25, 100) == 25
+        assert lerp(0, 100, 50, 100) == 50
+        assert lerp(0, 100, 75, 100) == 75
+
+    def test_lerp_negative_values(self):
+        """Test linear interpolation with negative values."""
+        assert lerp(-10, 10, 0, 10) == -10
+        assert lerp(-10, 10, 5, 10) == 0
+        assert lerp(-10, 10, 10, 10) == 10
+
+    def test_lerp_reverse_range(self):
+        """Test linear interpolation from larger to smaller value."""
+        assert lerp(10, 0, 0, 10) == 10
+        assert lerp(10, 0, 5, 10) == 5
+        assert lerp(10, 0, 10, 10) == 0
+
+
+class TestGradientAccumulation:
+    """Test the GradientAccumulation callback."""
+
+    @pytest.fixture
+    def data_cfg(self):
+        """Create a mock DataConfig for testing."""
+        cfg = Mock(spec=DataConfig)
+        cfg.equivalent_batch_size = 64
+        cfg.batch_size = 16
+        return cfg
+
+    @pytest.fixture
+    def scheduler_cfg_with_warmup(self):
+        """Create a mock SchedulerConfig with warmup."""
+        cfg = Mock(spec=SchedulerConfig)
+        # Create a warmup object that supports attribute access
+        warmup = Mock()
+        warmup.epochs = 3
+        cfg.warmup = warmup
+        return cfg
+
+    @pytest.fixture
+    def scheduler_cfg_without_warmup(self):
+        """Create a mock SchedulerConfig without warmup."""
+        cfg = Mock(spec=SchedulerConfig)
+        # Create a warmup object without epochs attribute
+        warmup = Mock(spec=[])  # Empty spec means no attributes
+        cfg.warmup = warmup
+        return cfg
+
+    @pytest.fixture
+    def mock_trainer(self):
+        """Create a mock Trainer."""
+        trainer = Mock()
+        trainer.world_size = 1
+        trainer.global_step = 0
+        trainer.accumulate_grad_batches = 1
+        return trainer
+
+    @pytest.fixture
+    def mock_pl_module(self):
+        """Create a mock LightningModule."""
+        pl_module = Mock()
+        # Mock train_loader with 100 batches
+        pl_module.train_loader = list(range(100))
+        return pl_module
+
+    def test_init_with_warmup(self, data_cfg, scheduler_cfg_with_warmup):
+        """Test initialization with warmup configuration."""
+        callback = GradientAccumulation(data_cfg, scheduler_cfg_with_warmup)
+        
+        assert callback.equivalent_batch_size == 64
+        assert callback.actual_batch_size == 16
+        assert callback.warmup_epochs == 3
+        assert callback.current_batch == 0
+        assert callback.max_accumulation == 1
+        assert callback.warmup_batches == 0
+
+    def test_init_without_warmup(self, data_cfg, scheduler_cfg_without_warmup):
+        """Test initialization without warmup configuration."""
+        callback = GradientAccumulation(data_cfg, scheduler_cfg_without_warmup)
+        
+        assert callback.equivalent_batch_size == 64
+        assert callback.actual_batch_size == 16
+        assert callback.warmup_epochs == 0
+        assert callback.current_batch == 0
+        assert callback.max_accumulation == 1
+        assert callback.warmup_batches == 0
+
+    def test_setup_single_gpu(self, data_cfg, scheduler_cfg_with_warmup, mock_trainer, mock_pl_module):
+        """Test setup method with single GPU (world_size=1)."""
+        callback = GradientAccumulation(data_cfg, scheduler_cfg_with_warmup)
+        mock_trainer.world_size = 1
+        
+        callback.setup(mock_trainer, mock_pl_module, "fit")
+        
+        # equivalent_batch_size=64, actual_batch_size=16, world_size=1
+        # effective_batch_size = 16 * 1 = 16
+        # max_accumulation = round(64 / 16) = 4
+        assert callback.max_accumulation == 4
+        # warmup_batches = warmup_epochs * batches_per_epoch
+        # warmup_batches = 3 * (100 / 1) = 300
+        assert callback.warmup_batches == 300
+
+    def test_setup_multi_gpu(self, data_cfg, scheduler_cfg_with_warmup, mock_trainer, mock_pl_module):
+        """Test setup method with multiple GPUs (world_size=4)."""
+        callback = GradientAccumulation(data_cfg, scheduler_cfg_with_warmup)
+        mock_trainer.world_size = 4
+        
+        callback.setup(mock_trainer, mock_pl_module, "fit")
+        
+        # equivalent_batch_size=64, actual_batch_size=16, world_size=4
+        # effective_batch_size = 16 * 4 = 64
+        # max_accumulation = round(64 / 64) = 1
+        assert callback.max_accumulation == 1
+        # warmup_batches = warmup_epochs * batches_per_epoch
+        # warmup_batches = 3 * (100 / 4) = 75
+        assert callback.warmup_batches == 75
+
+    def test_setup_fractional_accumulation(self, scheduler_cfg_with_warmup, mock_trainer, mock_pl_module):
+        """Test setup with fractional accumulation (should round to nearest int)."""
+        data_cfg = Mock(spec=DataConfig)
+        data_cfg.equivalent_batch_size = 100
+        data_cfg.batch_size = 16
+        
+        callback = GradientAccumulation(data_cfg, scheduler_cfg_with_warmup)
+        mock_trainer.world_size = 1
+        
+        callback.setup(mock_trainer, mock_pl_module, "fit")
+        
+        # equivalent_batch_size=100, actual_batch_size=16, world_size=1
+        # effective_batch_size = 16 * 1 = 16
+        # max_accumulation = round(100 / 16) = round(6.25) = 6
+        assert callback.max_accumulation == 6
+
+    def test_setup_minimum_accumulation(self, scheduler_cfg_with_warmup, mock_trainer, mock_pl_module):
+        """Test that max_accumulation is at least 1."""
+        data_cfg = Mock(spec=DataConfig)
+        data_cfg.equivalent_batch_size = 16
+        data_cfg.batch_size = 32
+        
+        callback = GradientAccumulation(data_cfg, scheduler_cfg_with_warmup)
+        mock_trainer.world_size = 2
+        
+        callback.setup(mock_trainer, mock_pl_module, "fit")
+        
+        # equivalent_batch_size=16, actual_batch_size=32, world_size=2
+        # effective_batch_size = 32 * 2 = 64
+        # max_accumulation = max(1, round(16 / 64)) = max(1, 0) = 1
+        assert callback.max_accumulation == 1
+
+    def test_on_train_epoch_start(self, data_cfg, scheduler_cfg_with_warmup, mock_trainer, mock_pl_module):
+        """Test that current_batch is updated at epoch start."""
+        callback = GradientAccumulation(data_cfg, scheduler_cfg_with_warmup)
+        mock_trainer.global_step = 150
+        
+        callback.on_train_epoch_start(mock_trainer, mock_pl_module)
+        
+        assert callback.current_batch == 150
+
+    def test_on_train_batch_start_before_warmup(self, data_cfg, scheduler_cfg_with_warmup, mock_trainer, mock_pl_module):
+        """Test gradient accumulation during warmup phase."""
+        callback = GradientAccumulation(data_cfg, scheduler_cfg_with_warmup)
+        callback.setup(mock_trainer, mock_pl_module, "fit")
+        
+        # warmup_batches = 300, max_accumulation = 4
+        assert callback.warmup_batches == 300
+        assert callback.max_accumulation == 4
+        
+        # At batch 0 (start of warmup), should be 1
+        callback.current_batch = 0
+        callback.on_train_batch_start(mock_trainer, mock_pl_module)
+        assert mock_trainer.accumulate_grad_batches == 1
+        
+        # At batch 75 (25% through warmup), should interpolate
+        # lerp(1, 4, 75, 300) = 1 + (4-1) * 75/300 = 1 + 0.75 = 1.75 -> round to 2
+        callback.current_batch = 75
+        callback.on_train_batch_start(mock_trainer, mock_pl_module)
+        assert mock_trainer.accumulate_grad_batches == 2
+        
+        # At batch 150 (50% through warmup), should be halfway
+        # lerp(1, 4, 150, 300) = 1 + (4-1) * 150/300 = 1 + 1.5 = 2.5 -> round to 2 or 3
+        callback.current_batch = 150
+        callback.on_train_batch_start(mock_trainer, mock_pl_module)
+        assert mock_trainer.accumulate_grad_batches == round(1 + (4-1) * 150/300)
+        
+        # At batch 225 (75% through warmup)
+        # lerp(1, 4, 225, 300) = 1 + (4-1) * 225/300 = 1 + 2.25 = 3.25 -> round to 3
+        callback.current_batch = 225
+        callback.on_train_batch_start(mock_trainer, mock_pl_module)
+        assert mock_trainer.accumulate_grad_batches == 3
+
+    def test_on_train_batch_start_after_warmup(self, data_cfg, scheduler_cfg_with_warmup, mock_trainer, mock_pl_module):
+        """Test gradient accumulation after warmup phase."""
+        callback = GradientAccumulation(data_cfg, scheduler_cfg_with_warmup)
+        callback.setup(mock_trainer, mock_pl_module, "fit")
+        
+        # warmup_batches = 300, max_accumulation = 4
+        assert callback.warmup_batches == 300
+        assert callback.max_accumulation == 4
+        
+        # At batch 300 (end of warmup), should be max_accumulation
+        callback.current_batch = 300
+        callback.on_train_batch_start(mock_trainer, mock_pl_module)
+        assert mock_trainer.accumulate_grad_batches == 4
+        
+        # At batch 500 (well after warmup), should still be max_accumulation
+        callback.current_batch = 500
+        callback.on_train_batch_start(mock_trainer, mock_pl_module)
+        assert mock_trainer.accumulate_grad_batches == 4
+
+    def test_on_train_batch_start_no_warmup(self, data_cfg, scheduler_cfg_without_warmup, mock_trainer, mock_pl_module):
+        """Test gradient accumulation when warmup is disabled."""
+        callback = GradientAccumulation(data_cfg, scheduler_cfg_without_warmup)
+        callback.setup(mock_trainer, mock_pl_module, "fit")
+        
+        # warmup_batches = 0, max_accumulation = 4
+        assert callback.warmup_batches == 0
+        assert callback.max_accumulation == 4
+        
+        # From the start, should use max_accumulation
+        callback.current_batch = 0
+        callback.on_train_batch_start(mock_trainer, mock_pl_module)
+        assert mock_trainer.accumulate_grad_batches == 4
+        
+        callback.current_batch = 100
+        callback.on_train_batch_start(mock_trainer, mock_pl_module)
+        assert mock_trainer.accumulate_grad_batches == 4
+
+    def test_on_train_batch_end(self, data_cfg, scheduler_cfg_with_warmup, mock_trainer, mock_pl_module):
+        """Test that current_batch is incremented after each batch."""
+        callback = GradientAccumulation(data_cfg, scheduler_cfg_with_warmup)
+        callback.current_batch = 0
+        
+        callback.on_train_batch_end(mock_trainer, mock_pl_module)
+        assert callback.current_batch == 1
+        
+        callback.on_train_batch_end(mock_trainer, mock_pl_module)
+        assert callback.current_batch == 2
+        
+        callback.on_train_batch_end(mock_trainer, mock_pl_module)
+        assert callback.current_batch == 3
+
+    def test_full_training_cycle(self, data_cfg, scheduler_cfg_with_warmup, mock_trainer, mock_pl_module):
+        """Test a complete training cycle with warmup."""
+        callback = GradientAccumulation(data_cfg, scheduler_cfg_with_warmup)
+        callback.setup(mock_trainer, mock_pl_module, "fit")
+        
+        # Start of epoch
+        callback.on_train_epoch_start(mock_trainer, mock_pl_module)
+        assert callback.current_batch == 0
+        
+        # First batch - should start at accumulation of 1
+        callback.on_train_batch_start(mock_trainer, mock_pl_module)
+        assert mock_trainer.accumulate_grad_batches == 1
+        callback.on_train_batch_end(mock_trainer, mock_pl_module)
+        assert callback.current_batch == 1
+        
+        # Simulate warmup progression
+        for _ in range(149):  # Complete up to batch 150
+            callback.on_train_batch_start(mock_trainer, mock_pl_module)
+            callback.on_train_batch_end(mock_trainer, mock_pl_module)
+        
+        assert callback.current_batch == 150
+        
+        # Continue past warmup (warmup_batches = 300)
+        for _ in range(200):  # Go to batch 350
+            callback.on_train_batch_start(mock_trainer, mock_pl_module)
+            callback.on_train_batch_end(mock_trainer, mock_pl_module)
+        
+        assert callback.current_batch == 350
+        # After warmup, should be at max_accumulation
+        callback.on_train_batch_start(mock_trainer, mock_pl_module)
+        assert mock_trainer.accumulate_grad_batches == 4

From bc691ca8963e98fc07ab29e1173955b34a865023 Mon Sep 17 00:00:00 2001
From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com>
Date: Tue, 30 Dec 2025 07:47:19 +0000
Subject: [PATCH 3/4] Improve test clarity based on code review feedback

Co-authored-by: henrytsui000 <54672031+henrytsui000@users.noreply.github.com>
---
 tests/test_utils/test_model_utils.py | 8 +++++---
 1 file changed, 5 insertions(+), 3 deletions(-)

diff --git a/tests/test_utils/test_model_utils.py b/tests/test_utils/test_model_utils.py
index 8b9353384..444144409 100644
--- a/tests/test_utils/test_model_utils.py
+++ b/tests/test_utils/test_model_utils.py
@@ -65,8 +65,9 @@ def scheduler_cfg_with_warmup(self):
     def scheduler_cfg_without_warmup(self):
         """Create a mock SchedulerConfig without warmup."""
         cfg = Mock(spec=SchedulerConfig)
-        # Create a warmup object without epochs attribute
-        warmup = Mock(spec=[])  # Empty spec means no attributes
+        # Create a warmup object without epochs attribute to simulate missing warmup config
+        warmup = Mock()
+        del warmup.epochs
         cfg.warmup = warmup
         return cfg
 
@@ -203,8 +204,9 @@ def test_on_train_batch_start_before_warmup(self, data_cfg, scheduler_cfg_with_w
         # At batch 150 (50% through warmup), should be halfway
         # lerp(1, 4, 150, 300) = 1 + (4-1) * 150/300 = 1 + 1.5 = 2.5 -> round to 2 or 3
         callback.current_batch = 150
+        expected_accumulation = round(lerp(1, 4, 150, 300))
         callback.on_train_batch_start(mock_trainer, mock_pl_module)
-        assert mock_trainer.accumulate_grad_batches == round(1 + (4-1) * 150/300)
+        assert mock_trainer.accumulate_grad_batches == expected_accumulation
         
         # At batch 225 (75% through warmup)
         # lerp(1, 4, 225, 300) = 1 + (4-1) * 225/300 = 1 + 2.25 = 3.25 -> round to 3

From 5161ef4177ca84c0e84e7bc6f48910d192680fbb Mon Sep 17 00:00:00 2001
From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com>
Date: Tue, 30 Dec 2025 07:48:29 +0000
Subject: [PATCH 4/4] Remove unused imports from test file

Co-authored-by: henrytsui000 <54672031+henrytsui000@users.noreply.github.com>
---
 tests/test_utils/test_model_utils.py | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/tests/test_utils/test_model_utils.py b/tests/test_utils/test_model_utils.py
index 444144409..ddd52e5aa 100644
--- a/tests/test_utils/test_model_utils.py
+++ b/tests/test_utils/test_model_utils.py
@@ -1,9 +1,8 @@
 import sys
 from pathlib import Path
-from unittest.mock import MagicMock, Mock
+from unittest.mock import Mock
 
 import pytest
-import torch
 
 project_root = Path(__file__).resolve().parent.parent.parent
 sys.path.append(str(project_root))