From 5d951d40dd1a2963f03998eff337353174c5a4dd Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Tue, 30 Dec 2025 07:38:33 +0000 Subject: [PATCH 1/4] Initial plan From 785234063201d364c4f34c5998832bd16da15784 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Tue, 30 Dec 2025 07:45:52 +0000 Subject: [PATCH 2/4] Add comprehensive tests for GradientAccumulation callback Co-authored-by: henrytsui000 <54672031+henrytsui000@users.noreply.github.com> --- tests/test_utils/test_model_utils.py | 296 +++++++++++++++++++++++++++ 1 file changed, 296 insertions(+) create mode 100644 tests/test_utils/test_model_utils.py diff --git a/tests/test_utils/test_model_utils.py b/tests/test_utils/test_model_utils.py new file mode 100644 index 000000000..8b9353384 --- /dev/null +++ b/tests/test_utils/test_model_utils.py @@ -0,0 +1,296 @@ +import sys +from pathlib import Path +from unittest.mock import MagicMock, Mock + +import pytest +import torch + +project_root = Path(__file__).resolve().parent.parent.parent +sys.path.append(str(project_root)) + +from yolo.config.config import DataConfig, SchedulerConfig +from yolo.utils.model_utils import GradientAccumulation, lerp + + +class TestLerp: + """Test the lerp (linear interpolation) function.""" + + def test_lerp_basic(self): + """Test basic linear interpolation.""" + assert lerp(0, 10, 0, 10) == 0 + assert lerp(0, 10, 5, 10) == 5 + assert lerp(0, 10, 10, 10) == 10 + + def test_lerp_fractional(self): + """Test linear interpolation with fractional steps.""" + assert lerp(0, 100, 25, 100) == 25 + assert lerp(0, 100, 50, 100) == 50 + assert lerp(0, 100, 75, 100) == 75 + + def test_lerp_negative_values(self): + """Test linear interpolation with negative values.""" + assert lerp(-10, 10, 0, 10) == -10 + assert lerp(-10, 10, 5, 10) == 0 + assert lerp(-10, 10, 10, 10) == 10 + + def test_lerp_reverse_range(self): + """Test linear interpolation from larger to smaller value.""" + assert lerp(10, 0, 0, 10) == 10 + assert lerp(10, 0, 5, 10) == 5 + assert lerp(10, 0, 10, 10) == 0 + + +class TestGradientAccumulation: + """Test the GradientAccumulation callback.""" + + @pytest.fixture + def data_cfg(self): + """Create a mock DataConfig for testing.""" + cfg = Mock(spec=DataConfig) + cfg.equivalent_batch_size = 64 + cfg.batch_size = 16 + return cfg + + @pytest.fixture + def scheduler_cfg_with_warmup(self): + """Create a mock SchedulerConfig with warmup.""" + cfg = Mock(spec=SchedulerConfig) + # Create a warmup object that supports attribute access + warmup = Mock() + warmup.epochs = 3 + cfg.warmup = warmup + return cfg + + @pytest.fixture + def scheduler_cfg_without_warmup(self): + """Create a mock SchedulerConfig without warmup.""" + cfg = Mock(spec=SchedulerConfig) + # Create a warmup object without epochs attribute + warmup = Mock(spec=[]) # Empty spec means no attributes + cfg.warmup = warmup + return cfg + + @pytest.fixture + def mock_trainer(self): + """Create a mock Trainer.""" + trainer = Mock() + trainer.world_size = 1 + trainer.global_step = 0 + trainer.accumulate_grad_batches = 1 + return trainer + + @pytest.fixture + def mock_pl_module(self): + """Create a mock LightningModule.""" + pl_module = Mock() + # Mock train_loader with 100 batches + pl_module.train_loader = list(range(100)) + return pl_module + + def test_init_with_warmup(self, data_cfg, scheduler_cfg_with_warmup): + """Test initialization with warmup configuration.""" + callback = GradientAccumulation(data_cfg, scheduler_cfg_with_warmup) + + assert callback.equivalent_batch_size == 64 + assert callback.actual_batch_size == 16 + assert callback.warmup_epochs == 3 + assert callback.current_batch == 0 + assert callback.max_accumulation == 1 + assert callback.warmup_batches == 0 + + def test_init_without_warmup(self, data_cfg, scheduler_cfg_without_warmup): + """Test initialization without warmup configuration.""" + callback = GradientAccumulation(data_cfg, scheduler_cfg_without_warmup) + + assert callback.equivalent_batch_size == 64 + assert callback.actual_batch_size == 16 + assert callback.warmup_epochs == 0 + assert callback.current_batch == 0 + assert callback.max_accumulation == 1 + assert callback.warmup_batches == 0 + + def test_setup_single_gpu(self, data_cfg, scheduler_cfg_with_warmup, mock_trainer, mock_pl_module): + """Test setup method with single GPU (world_size=1).""" + callback = GradientAccumulation(data_cfg, scheduler_cfg_with_warmup) + mock_trainer.world_size = 1 + + callback.setup(mock_trainer, mock_pl_module, "fit") + + # equivalent_batch_size=64, actual_batch_size=16, world_size=1 + # effective_batch_size = 16 * 1 = 16 + # max_accumulation = round(64 / 16) = 4 + assert callback.max_accumulation == 4 + # warmup_batches = warmup_epochs * batches_per_epoch + # warmup_batches = 3 * (100 / 1) = 300 + assert callback.warmup_batches == 300 + + def test_setup_multi_gpu(self, data_cfg, scheduler_cfg_with_warmup, mock_trainer, mock_pl_module): + """Test setup method with multiple GPUs (world_size=4).""" + callback = GradientAccumulation(data_cfg, scheduler_cfg_with_warmup) + mock_trainer.world_size = 4 + + callback.setup(mock_trainer, mock_pl_module, "fit") + + # equivalent_batch_size=64, actual_batch_size=16, world_size=4 + # effective_batch_size = 16 * 4 = 64 + # max_accumulation = round(64 / 64) = 1 + assert callback.max_accumulation == 1 + # warmup_batches = warmup_epochs * batches_per_epoch + # warmup_batches = 3 * (100 / 4) = 75 + assert callback.warmup_batches == 75 + + def test_setup_fractional_accumulation(self, scheduler_cfg_with_warmup, mock_trainer, mock_pl_module): + """Test setup with fractional accumulation (should round to nearest int).""" + data_cfg = Mock(spec=DataConfig) + data_cfg.equivalent_batch_size = 100 + data_cfg.batch_size = 16 + + callback = GradientAccumulation(data_cfg, scheduler_cfg_with_warmup) + mock_trainer.world_size = 1 + + callback.setup(mock_trainer, mock_pl_module, "fit") + + # equivalent_batch_size=100, actual_batch_size=16, world_size=1 + # effective_batch_size = 16 * 1 = 16 + # max_accumulation = round(100 / 16) = round(6.25) = 6 + assert callback.max_accumulation == 6 + + def test_setup_minimum_accumulation(self, scheduler_cfg_with_warmup, mock_trainer, mock_pl_module): + """Test that max_accumulation is at least 1.""" + data_cfg = Mock(spec=DataConfig) + data_cfg.equivalent_batch_size = 16 + data_cfg.batch_size = 32 + + callback = GradientAccumulation(data_cfg, scheduler_cfg_with_warmup) + mock_trainer.world_size = 2 + + callback.setup(mock_trainer, mock_pl_module, "fit") + + # equivalent_batch_size=16, actual_batch_size=32, world_size=2 + # effective_batch_size = 32 * 2 = 64 + # max_accumulation = max(1, round(16 / 64)) = max(1, 0) = 1 + assert callback.max_accumulation == 1 + + def test_on_train_epoch_start(self, data_cfg, scheduler_cfg_with_warmup, mock_trainer, mock_pl_module): + """Test that current_batch is updated at epoch start.""" + callback = GradientAccumulation(data_cfg, scheduler_cfg_with_warmup) + mock_trainer.global_step = 150 + + callback.on_train_epoch_start(mock_trainer, mock_pl_module) + + assert callback.current_batch == 150 + + def test_on_train_batch_start_before_warmup(self, data_cfg, scheduler_cfg_with_warmup, mock_trainer, mock_pl_module): + """Test gradient accumulation during warmup phase.""" + callback = GradientAccumulation(data_cfg, scheduler_cfg_with_warmup) + callback.setup(mock_trainer, mock_pl_module, "fit") + + # warmup_batches = 300, max_accumulation = 4 + assert callback.warmup_batches == 300 + assert callback.max_accumulation == 4 + + # At batch 0 (start of warmup), should be 1 + callback.current_batch = 0 + callback.on_train_batch_start(mock_trainer, mock_pl_module) + assert mock_trainer.accumulate_grad_batches == 1 + + # At batch 75 (25% through warmup), should interpolate + # lerp(1, 4, 75, 300) = 1 + (4-1) * 75/300 = 1 + 0.75 = 1.75 -> round to 2 + callback.current_batch = 75 + callback.on_train_batch_start(mock_trainer, mock_pl_module) + assert mock_trainer.accumulate_grad_batches == 2 + + # At batch 150 (50% through warmup), should be halfway + # lerp(1, 4, 150, 300) = 1 + (4-1) * 150/300 = 1 + 1.5 = 2.5 -> round to 2 or 3 + callback.current_batch = 150 + callback.on_train_batch_start(mock_trainer, mock_pl_module) + assert mock_trainer.accumulate_grad_batches == round(1 + (4-1) * 150/300) + + # At batch 225 (75% through warmup) + # lerp(1, 4, 225, 300) = 1 + (4-1) * 225/300 = 1 + 2.25 = 3.25 -> round to 3 + callback.current_batch = 225 + callback.on_train_batch_start(mock_trainer, mock_pl_module) + assert mock_trainer.accumulate_grad_batches == 3 + + def test_on_train_batch_start_after_warmup(self, data_cfg, scheduler_cfg_with_warmup, mock_trainer, mock_pl_module): + """Test gradient accumulation after warmup phase.""" + callback = GradientAccumulation(data_cfg, scheduler_cfg_with_warmup) + callback.setup(mock_trainer, mock_pl_module, "fit") + + # warmup_batches = 300, max_accumulation = 4 + assert callback.warmup_batches == 300 + assert callback.max_accumulation == 4 + + # At batch 300 (end of warmup), should be max_accumulation + callback.current_batch = 300 + callback.on_train_batch_start(mock_trainer, mock_pl_module) + assert mock_trainer.accumulate_grad_batches == 4 + + # At batch 500 (well after warmup), should still be max_accumulation + callback.current_batch = 500 + callback.on_train_batch_start(mock_trainer, mock_pl_module) + assert mock_trainer.accumulate_grad_batches == 4 + + def test_on_train_batch_start_no_warmup(self, data_cfg, scheduler_cfg_without_warmup, mock_trainer, mock_pl_module): + """Test gradient accumulation when warmup is disabled.""" + callback = GradientAccumulation(data_cfg, scheduler_cfg_without_warmup) + callback.setup(mock_trainer, mock_pl_module, "fit") + + # warmup_batches = 0, max_accumulation = 4 + assert callback.warmup_batches == 0 + assert callback.max_accumulation == 4 + + # From the start, should use max_accumulation + callback.current_batch = 0 + callback.on_train_batch_start(mock_trainer, mock_pl_module) + assert mock_trainer.accumulate_grad_batches == 4 + + callback.current_batch = 100 + callback.on_train_batch_start(mock_trainer, mock_pl_module) + assert mock_trainer.accumulate_grad_batches == 4 + + def test_on_train_batch_end(self, data_cfg, scheduler_cfg_with_warmup, mock_trainer, mock_pl_module): + """Test that current_batch is incremented after each batch.""" + callback = GradientAccumulation(data_cfg, scheduler_cfg_with_warmup) + callback.current_batch = 0 + + callback.on_train_batch_end(mock_trainer, mock_pl_module) + assert callback.current_batch == 1 + + callback.on_train_batch_end(mock_trainer, mock_pl_module) + assert callback.current_batch == 2 + + callback.on_train_batch_end(mock_trainer, mock_pl_module) + assert callback.current_batch == 3 + + def test_full_training_cycle(self, data_cfg, scheduler_cfg_with_warmup, mock_trainer, mock_pl_module): + """Test a complete training cycle with warmup.""" + callback = GradientAccumulation(data_cfg, scheduler_cfg_with_warmup) + callback.setup(mock_trainer, mock_pl_module, "fit") + + # Start of epoch + callback.on_train_epoch_start(mock_trainer, mock_pl_module) + assert callback.current_batch == 0 + + # First batch - should start at accumulation of 1 + callback.on_train_batch_start(mock_trainer, mock_pl_module) + assert mock_trainer.accumulate_grad_batches == 1 + callback.on_train_batch_end(mock_trainer, mock_pl_module) + assert callback.current_batch == 1 + + # Simulate warmup progression + for _ in range(149): # Complete up to batch 150 + callback.on_train_batch_start(mock_trainer, mock_pl_module) + callback.on_train_batch_end(mock_trainer, mock_pl_module) + + assert callback.current_batch == 150 + + # Continue past warmup (warmup_batches = 300) + for _ in range(200): # Go to batch 350 + callback.on_train_batch_start(mock_trainer, mock_pl_module) + callback.on_train_batch_end(mock_trainer, mock_pl_module) + + assert callback.current_batch == 350 + # After warmup, should be at max_accumulation + callback.on_train_batch_start(mock_trainer, mock_pl_module) + assert mock_trainer.accumulate_grad_batches == 4 From bc691ca8963e98fc07ab29e1173955b34a865023 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Tue, 30 Dec 2025 07:47:19 +0000 Subject: [PATCH 3/4] Improve test clarity based on code review feedback Co-authored-by: henrytsui000 <54672031+henrytsui000@users.noreply.github.com> --- tests/test_utils/test_model_utils.py | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/tests/test_utils/test_model_utils.py b/tests/test_utils/test_model_utils.py index 8b9353384..444144409 100644 --- a/tests/test_utils/test_model_utils.py +++ b/tests/test_utils/test_model_utils.py @@ -65,8 +65,9 @@ def scheduler_cfg_with_warmup(self): def scheduler_cfg_without_warmup(self): """Create a mock SchedulerConfig without warmup.""" cfg = Mock(spec=SchedulerConfig) - # Create a warmup object without epochs attribute - warmup = Mock(spec=[]) # Empty spec means no attributes + # Create a warmup object without epochs attribute to simulate missing warmup config + warmup = Mock() + del warmup.epochs cfg.warmup = warmup return cfg @@ -203,8 +204,9 @@ def test_on_train_batch_start_before_warmup(self, data_cfg, scheduler_cfg_with_w # At batch 150 (50% through warmup), should be halfway # lerp(1, 4, 150, 300) = 1 + (4-1) * 150/300 = 1 + 1.5 = 2.5 -> round to 2 or 3 callback.current_batch = 150 + expected_accumulation = round(lerp(1, 4, 150, 300)) callback.on_train_batch_start(mock_trainer, mock_pl_module) - assert mock_trainer.accumulate_grad_batches == round(1 + (4-1) * 150/300) + assert mock_trainer.accumulate_grad_batches == expected_accumulation # At batch 225 (75% through warmup) # lerp(1, 4, 225, 300) = 1 + (4-1) * 225/300 = 1 + 2.25 = 3.25 -> round to 3 From 5161ef4177ca84c0e84e7bc6f48910d192680fbb Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Tue, 30 Dec 2025 07:48:29 +0000 Subject: [PATCH 4/4] Remove unused imports from test file Co-authored-by: henrytsui000 <54672031+henrytsui000@users.noreply.github.com> --- tests/test_utils/test_model_utils.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/tests/test_utils/test_model_utils.py b/tests/test_utils/test_model_utils.py index 444144409..ddd52e5aa 100644 --- a/tests/test_utils/test_model_utils.py +++ b/tests/test_utils/test_model_utils.py @@ -1,9 +1,8 @@ import sys from pathlib import Path -from unittest.mock import MagicMock, Mock +from unittest.mock import Mock import pytest -import torch project_root = Path(__file__).resolve().parent.parent.parent sys.path.append(str(project_root))