From 1657c8c156d6fe395dad48413fede671893c3a31 Mon Sep 17 00:00:00 2001 From: svij Date: Mon, 24 Nov 2025 16:49:20 -0800 Subject: [PATCH 1/5] if using g4 default hyper disk --- python/gigl/common/services/vertex_ai.py | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/python/gigl/common/services/vertex_ai.py b/python/gigl/common/services/vertex_ai.py index 1e7b288e..bd16a292 100644 --- a/python/gigl/common/services/vertex_ai.py +++ b/python/gigl/common/services/vertex_ai.py @@ -98,7 +98,7 @@ class VertexAiJobConfig: accelerator_type: str = "ACCELERATOR_TYPE_UNSPECIFIED" accelerator_count: int = 0 replica_count: int = 1 - boot_disk_type: str = "pd-ssd" # Persistent Disk SSD + boot_disk_type: str = None # Persistent Disk SSD boot_disk_size_gb: int = 100 # Default disk size in GB labels: Optional[dict[str, str]] = None timeout_s: Optional[ @@ -108,6 +108,14 @@ class VertexAiJobConfig: scheduling_strategy: Optional[aiplatform.gapic.Scheduling.Strategy] = None + def __post_init__(self): + if self.boot_disk_type is None: + if self.machine_type.startswith("g4-"): + self.boot_disk_type = "hyperdisk-balanced" + else: + self.boot_disk_type = "pd-ssd" + + class VertexAIService: """ A class representing a Vertex AI service. From 867bb7407f81974f27a0060705f2e1e2120b2217 Mon Sep 17 00:00:00 2001 From: svij Date: Mon, 24 Nov 2025 17:24:44 -0800 Subject: [PATCH 2/5] test --- .../tests/integration/common/services/vertex_ai_test.py | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/python/tests/integration/common/services/vertex_ai_test.py b/python/tests/integration/common/services/vertex_ai_test.py index 43a4e2f9..b2ccad19 100644 --- a/python/tests/integration/common/services/vertex_ai_test.py +++ b/python/tests/integration/common/services/vertex_ai_test.py @@ -221,6 +221,15 @@ def test_run_pipeline_fails(self): job.resource_name, timeout=60 * 30, polling_period_s=10 ) + def default_boot_disk_for_g4_machine(self): + job_config = VertexAiJobConfig( + job_name="job_name", + container_uri="container_uri", + command=["command"], + machine_type="g4-standard-8", + ) + self.assertEqual(job_config.boot_disk_type, "hyperdisk-balanced") + if __name__ == "__main__": unittest.main() From 4caacea4474970d83093ad8dd204aab192963ee9 Mon Sep 17 00:00:00 2001 From: bve-loaner Date: Tue, 25 Nov 2025 02:06:28 +0000 Subject: [PATCH 3/5] fix --- python/gigl/common/services/vertex_ai.py | 6 ++-- .../common/services/vertex_ai_test.py | 9 ----- python/tests/unit/common/services/__init__.py | 0 .../unit/common/services/vertex_ai_test.py | 36 +++++++++++++++++++ 4 files changed, 39 insertions(+), 12 deletions(-) create mode 100644 python/tests/unit/common/services/__init__.py create mode 100644 python/tests/unit/common/services/vertex_ai_test.py diff --git a/python/gigl/common/services/vertex_ai.py b/python/gigl/common/services/vertex_ai.py index bd16a292..5e8e9c58 100644 --- a/python/gigl/common/services/vertex_ai.py +++ b/python/gigl/common/services/vertex_ai.py @@ -85,6 +85,7 @@ def get_pipeline() -> int: # NOTE: `get_pipeline` here is the Pipeline name DEFAULT_PIPELINE_TIMEOUT_S: Final[int] = 60 * 60 * 36 # 36 hours DEFAULT_CUSTOM_JOB_TIMEOUT_S: Final[int] = 60 * 60 * 24 # 24 hours +BOOT_DISK_PLACEHOLDER: Final[str] = "DISK_TYPE_UNSPECIFIED" @dataclass @@ -98,7 +99,7 @@ class VertexAiJobConfig: accelerator_type: str = "ACCELERATOR_TYPE_UNSPECIFIED" accelerator_count: int = 0 replica_count: int = 1 - boot_disk_type: str = None # Persistent Disk SSD + boot_disk_type: str = BOOT_DISK_PLACEHOLDER # Persistent Disk SSD boot_disk_size_gb: int = 100 # Default disk size in GB labels: Optional[dict[str, str]] = None timeout_s: Optional[ @@ -107,9 +108,8 @@ class VertexAiJobConfig: enable_web_access: bool = True scheduling_strategy: Optional[aiplatform.gapic.Scheduling.Strategy] = None - def __post_init__(self): - if self.boot_disk_type is None: + if self.boot_disk_type is BOOT_DISK_PLACEHOLDER: if self.machine_type.startswith("g4-"): self.boot_disk_type = "hyperdisk-balanced" else: diff --git a/python/tests/integration/common/services/vertex_ai_test.py b/python/tests/integration/common/services/vertex_ai_test.py index b2ccad19..43a4e2f9 100644 --- a/python/tests/integration/common/services/vertex_ai_test.py +++ b/python/tests/integration/common/services/vertex_ai_test.py @@ -221,15 +221,6 @@ def test_run_pipeline_fails(self): job.resource_name, timeout=60 * 30, polling_period_s=10 ) - def default_boot_disk_for_g4_machine(self): - job_config = VertexAiJobConfig( - job_name="job_name", - container_uri="container_uri", - command=["command"], - machine_type="g4-standard-8", - ) - self.assertEqual(job_config.boot_disk_type, "hyperdisk-balanced") - if __name__ == "__main__": unittest.main() diff --git a/python/tests/unit/common/services/__init__.py b/python/tests/unit/common/services/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/python/tests/unit/common/services/vertex_ai_test.py b/python/tests/unit/common/services/vertex_ai_test.py new file mode 100644 index 00000000..9e2bf6e3 --- /dev/null +++ b/python/tests/unit/common/services/vertex_ai_test.py @@ -0,0 +1,36 @@ +import unittest + +from parameterized import param, parameterized + +from gigl.common.services.vertex_ai import VertexAiJobConfig + + +class VertexAIServiceTest(unittest.TestCase): + @parameterized.expand( + [ + param( + "g4 machine ; should default to hyperdisk-balanced", + machine_type="g4-standard-8", + expected_boot_disk_type="hyperdisk-balanced", + ), + param( + "n1 machine ; should default to pd-ssd", + machine_type="n1-standard-4", + expected_boot_disk_type="pd-ssd", + ), + ] + ) + def test_default_boot_disk_for_machine( + self, _, machine_type, expected_boot_disk_type + ): + job_config = VertexAiJobConfig( + job_name="job_name", + container_uri="container_uri", + command=["command"], + machine_type=machine_type, + ) + self.assertEqual(job_config.boot_disk_type, expected_boot_disk_type) + + +if __name__ == "__main__": + unittest.main() From eb46801ff8a996907aea56e2202742f8e813f062 Mon Sep 17 00:00:00 2001 From: svij Date: Tue, 25 Nov 2025 02:09:02 +0000 Subject: [PATCH 4/5] require use of this machine --- python/gigl/common/services/vertex_ai.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/python/gigl/common/services/vertex_ai.py b/python/gigl/common/services/vertex_ai.py index 5e8e9c58..d8aa413d 100644 --- a/python/gigl/common/services/vertex_ai.py +++ b/python/gigl/common/services/vertex_ai.py @@ -111,7 +111,7 @@ class VertexAiJobConfig: def __post_init__(self): if self.boot_disk_type is BOOT_DISK_PLACEHOLDER: if self.machine_type.startswith("g4-"): - self.boot_disk_type = "hyperdisk-balanced" + self.boot_disk_type = "hyperdisk-balanced" # g4 machines require use of hyperdisk-balanced else: self.boot_disk_type = "pd-ssd" From 1a1a574606c890466e7dff2c159f29e3f1ae6884 Mon Sep 17 00:00:00 2001 From: svij Date: Tue, 25 Nov 2025 05:25:20 +0000 Subject: [PATCH 5/5] adding logs --- python/gigl/common/services/vertex_ai.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/python/gigl/common/services/vertex_ai.py b/python/gigl/common/services/vertex_ai.py index d8aa413d..b3ce330c 100644 --- a/python/gigl/common/services/vertex_ai.py +++ b/python/gigl/common/services/vertex_ai.py @@ -111,8 +111,10 @@ class VertexAiJobConfig: def __post_init__(self): if self.boot_disk_type is BOOT_DISK_PLACEHOLDER: if self.machine_type.startswith("g4-"): + logger.info(f"No boot disk type set, and g4 machine detected, using hyperdisk-balanced") self.boot_disk_type = "hyperdisk-balanced" # g4 machines require use of hyperdisk-balanced else: + logger.info(f"No boot disk type set, using pd-ssd") self.boot_disk_type = "pd-ssd"