From 5e84b41937e5e03edbd75ebcc5c928e81fee6708 Mon Sep 17 00:00:00 2001 From: Richa Shalom Gadagotti Date: Sun, 25 Jan 2026 08:42:47 +0000 Subject: [PATCH 1/3] Instance types CRD changes --- ...aker.aws.amazon.com_inferenceendpointconfigs.yaml | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/helm_chart/HyperPodHelmChart/charts/inference-operator/config/crd/inference.sagemaker.aws.amazon.com_inferenceendpointconfigs.yaml b/helm_chart/HyperPodHelmChart/charts/inference-operator/config/crd/inference.sagemaker.aws.amazon.com_inferenceendpointconfigs.yaml index 908fb9f8..674036d9 100644 --- a/helm_chart/HyperPodHelmChart/charts/inference-operator/config/crd/inference.sagemaker.aws.amazon.com_inferenceendpointconfigs.yaml +++ b/helm_chart/HyperPodHelmChart/charts/inference-operator/config/crd/inference.sagemaker.aws.amazon.com_inferenceendpointconfigs.yaml @@ -351,9 +351,19 @@ spec: pattern: ^$|^[a-zA-Z0-9](-*[a-zA-Z0-9]){0,62}$ type: string instanceType: - description: Instance Type to deploy the model on + description: |- + Single instance type to deploy the model on. + This field is mutually exclusive with instanceTypes. + Use this when you want to deploy on a specific instance type. pattern: ^ml\..* type: string + instanceTypes: + description: |- + List of instance types to deploy the model on, in order of preference. + Instance types are selected based on the order specified, selecting the first available type. + items: + type: string + type: array intelligentRoutingSpec: description: |- Configuration for intelligent routing From f97ab81d520ef8b95826e4c62583f656b8627298 Mon Sep 17 00:00:00 2001 From: Richa Shalom Gadagotti Date: Mon, 26 Jan 2026 18:20:27 +0000 Subject: [PATCH 2/3] Version change for Instance types --- .../HyperPodHelmChart/charts/inference-operator/Chart.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/helm_chart/HyperPodHelmChart/charts/inference-operator/Chart.yaml b/helm_chart/HyperPodHelmChart/charts/inference-operator/Chart.yaml index 86e81948..6a86d714 100644 --- a/helm_chart/HyperPodHelmChart/charts/inference-operator/Chart.yaml +++ b/helm_chart/HyperPodHelmChart/charts/inference-operator/Chart.yaml @@ -15,7 +15,7 @@ type: application # This is the chart version. This version number should be incremented each time you make changes # to the chart and its templates, including the app version. # Versions are expected to follow Semantic Versioning (https://semver.org/) -version: 1.2.0 +version: 1.3.0 # This is the version number of the application being deployed. Keep this aligned # with operator image MAJOR.MINOR version. From 7956dc8a80a4888cf3cfb023b4176ec26e012c1b Mon Sep 17 00:00:00 2001 From: Richa Shalom Gadagotti Date: Mon, 26 Jan 2026 20:09:24 +0000 Subject: [PATCH 3/3] Instance types CRD and version changes --- helm_chart/HyperPodHelmChart/Chart.yaml | 2 +- .../charts/inference-operator/Chart.yaml | 2 +- ...s.amazon.com_inferenceendpointconfigs.yaml | 133 +++++++++++++++--- ...emaker.aws.amazon.com_jumpstartmodels.yaml | 36 +++-- .../charts/inference-operator/values.yaml | 2 +- 5 files changed, 141 insertions(+), 34 deletions(-) diff --git a/helm_chart/HyperPodHelmChart/Chart.yaml b/helm_chart/HyperPodHelmChart/Chart.yaml index e89e3cb3..b7e6b668 100644 --- a/helm_chart/HyperPodHelmChart/Chart.yaml +++ b/helm_chart/HyperPodHelmChart/Chart.yaml @@ -81,7 +81,7 @@ dependencies: repository: "file://charts/team-role-and-bindings" condition: team-role-and-bindings.enabled - name: hyperpod-inference-operator - version: "1.2.0" + version: "1.3.0" repository: "file://charts/inference-operator" condition: inferenceOperators.enabled - name: hyperpod-patching diff --git a/helm_chart/HyperPodHelmChart/charts/inference-operator/Chart.yaml b/helm_chart/HyperPodHelmChart/charts/inference-operator/Chart.yaml index 6a86d714..9f6aa27e 100644 --- a/helm_chart/HyperPodHelmChart/charts/inference-operator/Chart.yaml +++ b/helm_chart/HyperPodHelmChart/charts/inference-operator/Chart.yaml @@ -19,7 +19,7 @@ version: 1.3.0 # This is the version number of the application being deployed. Keep this aligned # with operator image MAJOR.MINOR version. -appVersion: "2.2" +appVersion: "2.3" dependencies: - name: aws-mountpoint-s3-csi-driver diff --git a/helm_chart/HyperPodHelmChart/charts/inference-operator/config/crd/inference.sagemaker.aws.amazon.com_inferenceendpointconfigs.yaml b/helm_chart/HyperPodHelmChart/charts/inference-operator/config/crd/inference.sagemaker.aws.amazon.com_inferenceendpointconfigs.yaml index 674036d9..0fa281b7 100644 --- a/helm_chart/HyperPodHelmChart/charts/inference-operator/config/crd/inference.sagemaker.aws.amazon.com_inferenceendpointconfigs.yaml +++ b/helm_chart/HyperPodHelmChart/charts/inference-operator/config/crd/inference.sagemaker.aws.amazon.com_inferenceendpointconfigs.yaml @@ -352,15 +352,15 @@ spec: type: string instanceType: description: |- - Single instance type to deploy the model on. - This field is mutually exclusive with instanceTypes. + Single instance type to deploy the model on. + This field is mutually exclusive with instanceTypes. Use this when you want to deploy on a specific instance type. pattern: ^ml\..* type: string instanceTypes: description: |- - List of instance types to deploy the model on, in order of preference. - Instance types are selected based on the order specified, selecting the first available type. + List of instance types to deploy the model on, in order of preference. + Instance types are selected based on the order specified, selecting the first available type. items: type: string type: array @@ -873,8 +873,9 @@ spec: in a Container. properties: name: - description: Name of the environment variable. Must be a - C_IDENTIFIER. + description: |- + Name of the environment variable. + May consist of any printable ASCII characters except '='. type: string value: description: |- @@ -932,6 +933,43 @@ spec: - fieldPath type: object x-kubernetes-map-type: atomic + fileKeyRef: + description: |- + FileKeyRef selects a key of the env file. + Requires the EnvFiles feature gate to be enabled. + properties: + key: + description: |- + The key within the env file. An invalid key will prevent the pod from starting. + The keys defined within a source may consist of any printable ASCII characters except '='. + During Alpha stage of the EnvFiles feature gate, the key size is limited to 128 characters. + type: string + optional: + default: false + description: |- + Specify whether the file or its key must be defined. If the file or key + does not exist, then the env var is not published. + If optional is set to true and the specified key does not exist, + the environment variable will not be set in the Pod's containers. + + If optional is set to false and the specified key does not exist, + an error will be returned during Pod creation. + type: boolean + path: + description: |- + The path within the volume from which to select the file. + Must be relative and may not contain the '..' path or start with '..'. + type: string + volumeName: + description: The name of the volume mount containing + the env file. + type: string + required: + - key + - path + - volumeName + type: object + x-kubernetes-map-type: atomic resourceFieldRef: description: |- Selects a resource of the container: only resources limits and requests @@ -1505,7 +1543,7 @@ spec: Claims lists the names of resources, defined in spec.resourceClaims, that are used by this container. - This is an alpha field and requires enabling the + This field depends on the DynamicResourceAllocation feature gate. This field is immutable. It can only be set for containers. @@ -1566,7 +1604,6 @@ spec: - resources type: object required: - - instanceType - modelName - modelSourceConfig - worker @@ -1657,8 +1694,8 @@ spec: description: Status of the Deployment Object properties: availableReplicas: - description: Total number of available pods (ready for at - least minReadySeconds) targeted by this deployment. + description: Total number of available non-terminating pods + (ready for at least minReadySeconds) targeted by this deployment. format: int32 type: integer collisionCount: @@ -1711,15 +1748,23 @@ spec: format: int64 type: integer readyReplicas: - description: readyReplicas is the number of pods targeted + description: Total number of non-terminating pods targeted by this Deployment with a Ready Condition. format: int32 type: integer replicas: - description: Total number of non-terminated pods targeted + description: Total number of non-terminating pods targeted by this deployment (their labels match the selector). format: int32 type: integer + terminatingReplicas: + description: |- + Total number of terminating pods targeted by this deployment. Terminating pods have a non-null + .metadata.deletionTimestamp and have not yet reached the Failed or Succeeded .status.phase. + + This is an alpha field. Enable DeploymentReplicaSetTerminatingReplicas to be able to use this field. + format: int32 + type: integer unavailableReplicas: description: |- Total number of unavailable pods targeted by this deployment. This is the total number of @@ -1728,7 +1773,7 @@ spec: format: int32 type: integer updatedReplicas: - description: Total number of non-terminated pods targeted + description: Total number of non-terminating pods targeted by this deployment that have the desired template spec. format: int32 type: integer @@ -2226,8 +2271,9 @@ spec: in a Container. properties: name: - description: Name of the environment variable. Must be a - C_IDENTIFIER. + description: |- + Name of the environment variable. + May consist of any printable ASCII characters except '='. type: string value: description: |- @@ -2285,6 +2331,43 @@ spec: - fieldPath type: object x-kubernetes-map-type: atomic + fileKeyRef: + description: |- + FileKeyRef selects a key of the env file. + Requires the EnvFiles feature gate to be enabled. + properties: + key: + description: |- + The key within the env file. An invalid key will prevent the pod from starting. + The keys defined within a source may consist of any printable ASCII characters except '='. + During Alpha stage of the EnvFiles feature gate, the key size is limited to 128 characters. + type: string + optional: + default: false + description: |- + Specify whether the file or its key must be defined. If the file or key + does not exist, then the env var is not published. + If optional is set to true and the specified key does not exist, + the environment variable will not be set in the Pod's containers. + + If optional is set to false and the specified key does not exist, + an error will be returned during Pod creation. + type: boolean + path: + description: |- + The path within the volume from which to select the file. + Must be relative and may not contain the '..' path or start with '..'. + type: string + volumeName: + description: The name of the volume mount containing + the env file. + type: string + required: + - key + - path + - volumeName + type: object + x-kubernetes-map-type: atomic resourceFieldRef: description: |- Selects a resource of the container: only resources limits and requests @@ -2390,7 +2473,7 @@ spec: Claims lists the names of resources, defined in spec.resourceClaims, that are used by this container. - This is an alpha field and requires enabling the + This field depends on the DynamicResourceAllocation feature gate. This field is immutable. It can only be set for containers. @@ -2539,8 +2622,8 @@ spec: description: Status of the Deployment Object properties: availableReplicas: - description: Total number of available pods (ready for at - least minReadySeconds) targeted by this deployment. + description: Total number of available non-terminating pods + (ready for at least minReadySeconds) targeted by this deployment. format: int32 type: integer collisionCount: @@ -2593,15 +2676,23 @@ spec: format: int64 type: integer readyReplicas: - description: readyReplicas is the number of pods targeted + description: Total number of non-terminating pods targeted by this Deployment with a Ready Condition. format: int32 type: integer replicas: - description: Total number of non-terminated pods targeted + description: Total number of non-terminating pods targeted by this deployment (their labels match the selector). format: int32 type: integer + terminatingReplicas: + description: |- + Total number of terminating pods targeted by this deployment. Terminating pods have a non-null + .metadata.deletionTimestamp and have not yet reached the Failed or Succeeded .status.phase. + + This is an alpha field. Enable DeploymentReplicaSetTerminatingReplicas to be able to use this field. + format: int32 + type: integer unavailableReplicas: description: |- Total number of unavailable pods targeted by this deployment. This is the total number of @@ -2610,7 +2701,7 @@ spec: format: int32 type: integer updatedReplicas: - description: Total number of non-terminated pods targeted + description: Total number of non-terminating pods targeted by this deployment that have the desired template spec. format: int32 type: integer diff --git a/helm_chart/HyperPodHelmChart/charts/inference-operator/config/crd/inference.sagemaker.aws.amazon.com_jumpstartmodels.yaml b/helm_chart/HyperPodHelmChart/charts/inference-operator/config/crd/inference.sagemaker.aws.amazon.com_jumpstartmodels.yaml index 4e1b5443..93118977 100644 --- a/helm_chart/HyperPodHelmChart/charts/inference-operator/config/crd/inference.sagemaker.aws.amazon.com_jumpstartmodels.yaml +++ b/helm_chart/HyperPodHelmChart/charts/inference-operator/config/crd/inference.sagemaker.aws.amazon.com_jumpstartmodels.yaml @@ -944,8 +944,8 @@ spec: description: Status of the Deployment Object properties: availableReplicas: - description: Total number of available pods (ready for at - least minReadySeconds) targeted by this deployment. + description: Total number of available non-terminating pods + (ready for at least minReadySeconds) targeted by this deployment. format: int32 type: integer collisionCount: @@ -998,15 +998,23 @@ spec: format: int64 type: integer readyReplicas: - description: readyReplicas is the number of pods targeted + description: Total number of non-terminating pods targeted by this Deployment with a Ready Condition. format: int32 type: integer replicas: - description: Total number of non-terminated pods targeted + description: Total number of non-terminating pods targeted by this deployment (their labels match the selector). format: int32 type: integer + terminatingReplicas: + description: |- + Total number of terminating pods targeted by this deployment. Terminating pods have a non-null + .metadata.deletionTimestamp and have not yet reached the Failed or Succeeded .status.phase. + + This is an alpha field. Enable DeploymentReplicaSetTerminatingReplicas to be able to use this field. + format: int32 + type: integer unavailableReplicas: description: |- Total number of unavailable pods targeted by this deployment. This is the total number of @@ -1015,7 +1023,7 @@ spec: format: int32 type: integer updatedReplicas: - description: Total number of non-terminated pods targeted + description: Total number of non-terminating pods targeted by this deployment that have the desired template spec. format: int32 type: integer @@ -1598,8 +1606,8 @@ spec: description: Status of the Deployment Object properties: availableReplicas: - description: Total number of available pods (ready for at - least minReadySeconds) targeted by this deployment. + description: Total number of available non-terminating pods + (ready for at least minReadySeconds) targeted by this deployment. format: int32 type: integer collisionCount: @@ -1652,15 +1660,23 @@ spec: format: int64 type: integer readyReplicas: - description: readyReplicas is the number of pods targeted + description: Total number of non-terminating pods targeted by this Deployment with a Ready Condition. format: int32 type: integer replicas: - description: Total number of non-terminated pods targeted + description: Total number of non-terminating pods targeted by this deployment (their labels match the selector). format: int32 type: integer + terminatingReplicas: + description: |- + Total number of terminating pods targeted by this deployment. Terminating pods have a non-null + .metadata.deletionTimestamp and have not yet reached the Failed or Succeeded .status.phase. + + This is an alpha field. Enable DeploymentReplicaSetTerminatingReplicas to be able to use this field. + format: int32 + type: integer unavailableReplicas: description: |- Total number of unavailable pods targeted by this deployment. This is the total number of @@ -1669,7 +1685,7 @@ spec: format: int32 type: integer updatedReplicas: - description: Total number of non-terminated pods targeted + description: Total number of non-terminating pods targeted by this deployment that have the desired template spec. format: int32 type: integer diff --git a/helm_chart/HyperPodHelmChart/charts/inference-operator/values.yaml b/helm_chart/HyperPodHelmChart/charts/inference-operator/values.yaml index 48171a40..4129d871 100644 --- a/helm_chart/HyperPodHelmChart/charts/inference-operator/values.yaml +++ b/helm_chart/HyperPodHelmChart/charts/inference-operator/values.yaml @@ -21,7 +21,7 @@ image: ap-southeast-4: 311141544681.dkr.ecr.ap-southeast-4.amazonaws.com ap-southeast-3: 158128612970.dkr.ecr.ap-southeast-3.amazonaws.com eu-south-2: 025050981094.dkr.ecr.eu-south-2.amazonaws.com - tag: v2.2 + tag: v2.3 pullPolicy: Always repository: hyperpodClusterArn: