From 3051a21bd8ac4d8327d2ca585b2b7b9effe2252d Mon Sep 17 00:00:00 2001 From: yansun1996 Date: Tue, 20 Jan 2026 00:40:25 +0000 Subject: [PATCH] [CI] Setup hourly build for operator utils image Signed-off-by: yansun1996 --- Makefile | 31 +++++++++++++++++++ hack/k8s-patch/metadata-patch/values.yaml | 8 ++--- .../metadata-patch/values.yaml | 9 ++++-- helm-charts-k8s/README.md | 8 ++--- helm-charts-k8s/values.yaml | 8 ++--- helm-charts-openshift/values.yaml | 9 ++++-- 6 files changed, 55 insertions(+), 18 deletions(-) diff --git a/Makefile b/Makefile index 5232e72c..a4007f9d 100644 --- a/Makefile +++ b/Makefile @@ -31,6 +31,17 @@ KMM_BUILDER_IMG ?= gcr.io/kaniko-project/executor:v1.23.2 KMM_WEBHOOK_IMG_NAME ?= $(DOCKER_REGISTRY)/kernel-module-management-webhook-server KMM_OPERATOR_IMG_NAME ?= $(DOCKER_REGISTRY)/kernel-module-management-operator +# Operand related images +EXPORTER_IMAGE_TAG ?= latest +METRICS_EXPORTER_IMG = $(DOCKER_REGISTRY)/device-metrics-exporter:$(EXPORTER_IMAGE_TAG) +DEVICE_CONFIG_MANAGER_IMAGE_TAG ?= latest +DEVICE_CONFIG_MANAGER_IMG = $(DOCKER_REGISTRY)/device-config-manager:$(DEVICE_CONFIG_MANAGER_IMAGE_TAG) +TEST_RUNNER_IMAGE_TAG ?= latest +TEST_RUNNER_IMG = $(DOCKER_REGISTRY)/test-runner:$(TEST_RUNNER_IMAGE_TAG) +UTILS_IMAGE_TAG ?= latest +UTILS_IMAGE_NAME ?= $(IMAGE_NAME)-utils +UTILS_IMG ?= $(DOCKER_REGISTRY)/$(UTILS_IMAGE_NAME):$(UTILS_IMAGE_TAG) + ####################### # Helm Charts variables YAML_FILES=bundle/manifests/amd-gpu-operator-node-metrics_rbac.authorization.k8s.io_v1_rolebinding.yaml bundle/manifests/amd-gpu-operator.clusterserviceversion.yaml bundle/manifests/amd-gpu-operator-node-labeller_rbac.authorization.k8s.io_v1_clusterrolebinding.yaml bundle/manifests/amd-gpu-operator-node-metrics_monitoring.coreos.com_v1_servicemonitor.yaml config/samples/amd.com_deviceconfigs.yaml config/manifests/bases/amd-gpu-operator.clusterserviceversion.yaml example/deviceconfig_example.yaml config/default/kustomization.yaml @@ -38,6 +49,7 @@ CRD_YAML_FILES = deviceconfig-crd.yaml remediationworkflowstatus-crd.yaml K8S_KMM_CRD_YAML_FILES=module-crd.yaml nodemodulesconfig-crd.yaml OPENSHIFT_KMM_CRD_YAML_FILES=module-crd.yaml nodemodulesconfig-crd.yaml OPENSHIFT_CLUSTER_NFD_CRD_YAML_FILES=nodefeature-crd.yaml nodefeaturediscovery-crd.yaml nodefeaturerule-crd.yaml +DEFAULT_VALUES_FILES=helm-charts-k8s/values.yaml helm-charts-openshift/values.yaml hack/k8s-patch/metadata-patch/values.yaml hack/openshift-patch/metadata-patch/values.yaml ifdef OPENSHIFT $(info selected openshift) @@ -199,6 +211,13 @@ update-registry: hack/k8s-patch/metadata-patch/values.yaml helm-charts-k8s/values.yaml \ hack/openshift-patch/metadata-patch/values.yaml helm-charts-openshift/values.yaml \ example/deviceconfig_example.yaml + # update operands image tags + @for file in $(DEFAULT_VALUES_FILES); do \ + yq eval -i '.deviceConfig.spec.metricsExporter.image = "$(METRICS_EXPORTER_IMG)"' $$file; \ + yq eval -i '.deviceConfig.spec.configManager.image = "$(DEVICE_CONFIG_MANAGER_IMG)"' $$file; \ + yq eval -i '.deviceConfig.spec.testRunner.image = "$(TEST_RUNNER_IMG)"' $$file; \ + yq eval -i '.deviceConfig.spec.commonConfig.utilsContainer.image = "$(UTILS_IMG)"' $$file; \ + done sed -i -e 's|tag:.*$$|tag: ${KMM_IMAGE_TAG}|' \ -e 's|repository:.*operator.*$$|repository: ${KMM_OPERATOR_IMG_NAME}|' \ -e 's|repository:.*webhook.*$$|repository: ${KMM_WEBHOOK_IMG_NAME}|' \ @@ -298,6 +317,18 @@ docker-push: ## Push docker image with the manager. docker-save: ## Save the container image with the manager. docker save $(IMG) | gzip > $(DOCKER_CONTAINER_IMG).tar.gz +.PHONY: docker-build-utils +docker-build-utils: ## Build docker image for utils container. + DOCKER_BUILDKIT=1 docker build -t $(UTILS_IMG) --label HOURLY_TAG=$(HOURLY_TAG_LABEL) -f internal/utils_container/Dockerfile . + +.PHONY: docker-push-utils +docker-push-utils: ## Push docker image for utils container. + docker push $(UTILS_IMG) + +.PHONY: docker-save-utils +docker-save-utils: ## Save the utils container image as tar.gz. + docker save $(UTILS_IMG) | gzip > $(IMAGE_NAME)-utils-$(IMAGE_TAG).tar.gz + .PHONY: docker-build-env docker-build-env: ## Build the docker shell container. @echo "Building the Docker environment..." diff --git a/hack/k8s-patch/metadata-patch/values.yaml b/hack/k8s-patch/metadata-patch/values.yaml index b08b2f32..6b14cebe 100644 --- a/hack/k8s-patch/metadata-patch/values.yaml +++ b/hack/k8s-patch/metadata-patch/values.yaml @@ -88,7 +88,7 @@ deviceConfig: initContainerImage: busybox:1.36 utilsContainer: # -- gpu operator utility container image - image: docker.io/rocm/gpu-operator-utils:v1.4.0 + image: docker.io/rocm/gpu-operator-utils:latest # -- utility container image pull policy imagePullPolicy: IfNotPresent # -- utility container image pull secret, e.g. {"name": "mySecretName"} @@ -129,7 +129,7 @@ deviceConfig: # -- external port for pulling metrics from outside the cluster for NodePort service, in the range 30000-32767 (assigned automatically by default) nodePort: 32500 # -- metrics exporter image - image: rocm/device-metrics-exporter:latest + image: docker.io/rocm/device-metrics-exporter:latest # -- metrics exporter image pull policy imagePullPolicy: "IfNotPresent" # -- name of the metrics exporter config map, e.g. {"name": "metricConfigMapName"} @@ -187,7 +187,7 @@ deviceConfig: # -- enable / disable test runner enable: false # -- test runner image - image: rocm/test-runner:latest + image: docker.io/rocm/test-runner:latest # -- test runner image pull policy imagePullPolicy: "IfNotPresent" # -- test runner config map, e.g. {"name": "myConfigMap"} @@ -214,7 +214,7 @@ deviceConfig: # -- enable/disable the config manager enable: false # -- config manager image - image: rocm/device-config-manager:latest + image: docker.io/rocm/device-config-manager:latest # -- image pull policy for config manager image imagePullPolicy: IfNotPresent # -- image pull secret for config manager image, e.g. {"name": "myPullSecret"} diff --git a/hack/openshift-patch/metadata-patch/values.yaml b/hack/openshift-patch/metadata-patch/values.yaml index 30dfc45c..82f247da 100644 --- a/hack/openshift-patch/metadata-patch/values.yaml +++ b/hack/openshift-patch/metadata-patch/values.yaml @@ -75,8 +75,11 @@ metricsExporter: deviceConfig: spec: metricsExporter: - image: rocm/device-metrics-exporter:latest + image: docker.io/rocm/device-metrics-exporter:latest configManager: - image: rocm/device-config-manager:latest + image: docker.io/rocm/device-config-manager:latest testRunner: - image: rocm/test-runner:latest + image: docker.io/rocm/test-runner:latest + commonConfig: + utilsContainer: + image: docker.io/rocm/gpu-operator-utils:latest diff --git a/helm-charts-k8s/README.md b/helm-charts-k8s/README.md index 216745a6..31355583 100644 --- a/helm-charts-k8s/README.md +++ b/helm-charts-k8s/README.md @@ -160,13 +160,13 @@ Kubernetes: `>= 1.29.0-0` | crds.defaultCR.install | bool | `true` | Deploy default DeviceConfig during helm chart installation | | crds.defaultCR.upgrade | bool | `false` | Deploy / Patch default DeviceConfig during helm chart upgrade. Be careful about this option: 1. Your customized change on default DeviceConfig may be overwritten 2. Your existing DeviceConfig may conflict with upgraded default DeviceConfig | | deviceConfig.spec.commonConfig.initContainerImage | string | `"busybox:1.36"` | init container image | -| deviceConfig.spec.commonConfig.utilsContainer.image | string | `"docker.io/rocm/gpu-operator-utils:v1.4.0"` | gpu operator utility container image | +| deviceConfig.spec.commonConfig.utilsContainer.image | string | `"docker.io/rocm/gpu-operator-utils:latest"` | gpu operator utility container image | | deviceConfig.spec.commonConfig.utilsContainer.imagePullPolicy | string | `"IfNotPresent"` | utility container image pull policy | | deviceConfig.spec.commonConfig.utilsContainer.imageRegistrySecret | object | `{}` | utility container image pull secret, e.g. {"name": "mySecretName"} | | deviceConfig.spec.configManager.config | object | `{}` | config map for config manager, e.g. {"name": "myConfigMap"} | | deviceConfig.spec.configManager.configManagerTolerations | list | `[]` | config manager tolerations | | deviceConfig.spec.configManager.enable | bool | `false` | enable/disable the config manager | -| deviceConfig.spec.configManager.image | string | `"rocm/device-config-manager:latest"` | config manager image | +| deviceConfig.spec.configManager.image | string | `"docker.io/rocm/device-config-manager:latest"` | config manager image | | deviceConfig.spec.configManager.imagePullPolicy | string | `"IfNotPresent"` | image pull policy for config manager image | | deviceConfig.spec.configManager.imageRegistrySecret | object | `{}` | image pull secret for config manager image, e.g. {"name": "myPullSecret"} | | deviceConfig.spec.configManager.selector | object | `{}` | node selector for config manager, if not specified it will reuse spec.selector | @@ -206,7 +206,7 @@ Kubernetes: `>= 1.29.0-0` | deviceConfig.spec.driver.version | string | `"30.20.1"` | specify an out-of-tree driver version to install | | deviceConfig.spec.metricsExporter.config | object | `{}` | name of the metrics exporter config map, e.g. {"name": "metricConfigMapName"} | | deviceConfig.spec.metricsExporter.enable | bool | `true` | enable / disable device metrics exporter | -| deviceConfig.spec.metricsExporter.image | string | `"rocm/device-metrics-exporter:latest"` | metrics exporter image | +| deviceConfig.spec.metricsExporter.image | string | `"docker.io/rocm/device-metrics-exporter:latest"` | metrics exporter image | | deviceConfig.spec.metricsExporter.imagePullPolicy | string | `"IfNotPresent"` | metrics exporter image pull policy | | deviceConfig.spec.metricsExporter.imageRegistrySecret | object | `{}` | metrics exporter image pull secret, e.g. {"name": "pullSecretName"} | | deviceConfig.spec.metricsExporter.nodePort | int | `32500` | external port for pulling metrics from outside the cluster for NodePort service, in the range 30000-32767 (assigned automatically by default) | @@ -238,7 +238,7 @@ Kubernetes: `>= 1.29.0-0` | deviceConfig.spec.selector | object | `{"feature.node.kubernetes.io/amd-gpu":"true"}` | Set node selector for the default DeviceConfig | | deviceConfig.spec.testRunner.config | object | `{}` | test runner config map, e.g. {"name": "myConfigMap"} | | deviceConfig.spec.testRunner.enable | bool | `false` | enable / disable test runner | -| deviceConfig.spec.testRunner.image | string | `"rocm/test-runner:latest"` | test runner image | +| deviceConfig.spec.testRunner.image | string | `"docker.io/rocm/test-runner:latest"` | test runner image | | deviceConfig.spec.testRunner.imagePullPolicy | string | `"IfNotPresent"` | test runner image pull policy | | deviceConfig.spec.testRunner.imageRegistrySecret | object | `{}` | test runner image pull secret | | deviceConfig.spec.testRunner.logsLocation.hostPath | string | `"/var/log/amd-test-runner"` | host directory to save test run logs | diff --git a/helm-charts-k8s/values.yaml b/helm-charts-k8s/values.yaml index b08b2f32..6b14cebe 100644 --- a/helm-charts-k8s/values.yaml +++ b/helm-charts-k8s/values.yaml @@ -88,7 +88,7 @@ deviceConfig: initContainerImage: busybox:1.36 utilsContainer: # -- gpu operator utility container image - image: docker.io/rocm/gpu-operator-utils:v1.4.0 + image: docker.io/rocm/gpu-operator-utils:latest # -- utility container image pull policy imagePullPolicy: IfNotPresent # -- utility container image pull secret, e.g. {"name": "mySecretName"} @@ -129,7 +129,7 @@ deviceConfig: # -- external port for pulling metrics from outside the cluster for NodePort service, in the range 30000-32767 (assigned automatically by default) nodePort: 32500 # -- metrics exporter image - image: rocm/device-metrics-exporter:latest + image: docker.io/rocm/device-metrics-exporter:latest # -- metrics exporter image pull policy imagePullPolicy: "IfNotPresent" # -- name of the metrics exporter config map, e.g. {"name": "metricConfigMapName"} @@ -187,7 +187,7 @@ deviceConfig: # -- enable / disable test runner enable: false # -- test runner image - image: rocm/test-runner:latest + image: docker.io/rocm/test-runner:latest # -- test runner image pull policy imagePullPolicy: "IfNotPresent" # -- test runner config map, e.g. {"name": "myConfigMap"} @@ -214,7 +214,7 @@ deviceConfig: # -- enable/disable the config manager enable: false # -- config manager image - image: rocm/device-config-manager:latest + image: docker.io/rocm/device-config-manager:latest # -- image pull policy for config manager image imagePullPolicy: IfNotPresent # -- image pull secret for config manager image, e.g. {"name": "myPullSecret"} diff --git a/helm-charts-openshift/values.yaml b/helm-charts-openshift/values.yaml index 30dfc45c..82f247da 100644 --- a/helm-charts-openshift/values.yaml +++ b/helm-charts-openshift/values.yaml @@ -75,8 +75,11 @@ metricsExporter: deviceConfig: spec: metricsExporter: - image: rocm/device-metrics-exporter:latest + image: docker.io/rocm/device-metrics-exporter:latest configManager: - image: rocm/device-config-manager:latest + image: docker.io/rocm/device-config-manager:latest testRunner: - image: rocm/test-runner:latest + image: docker.io/rocm/test-runner:latest + commonConfig: + utilsContainer: + image: docker.io/rocm/gpu-operator-utils:latest