diff --git a/helm_chart/HyperPodHelmChart/Chart.yaml b/helm_chart/HyperPodHelmChart/Chart.yaml index e89e3cb3..b7e6b668 100644 --- a/helm_chart/HyperPodHelmChart/Chart.yaml +++ b/helm_chart/HyperPodHelmChart/Chart.yaml @@ -81,7 +81,7 @@ dependencies: repository: "file://charts/team-role-and-bindings" condition: team-role-and-bindings.enabled - name: hyperpod-inference-operator - version: "1.2.0" + version: "1.3.0" repository: "file://charts/inference-operator" condition: inferenceOperators.enabled - name: hyperpod-patching diff --git a/helm_chart/HyperPodHelmChart/charts/inference-operator/Chart.yaml b/helm_chart/HyperPodHelmChart/charts/inference-operator/Chart.yaml index 86e81948..9f6aa27e 100644 --- a/helm_chart/HyperPodHelmChart/charts/inference-operator/Chart.yaml +++ b/helm_chart/HyperPodHelmChart/charts/inference-operator/Chart.yaml @@ -15,11 +15,11 @@ type: application # This is the chart version. This version number should be incremented each time you make changes # to the chart and its templates, including the app version. # Versions are expected to follow Semantic Versioning (https://semver.org/) -version: 1.2.0 +version: 1.3.0 # This is the version number of the application being deployed. Keep this aligned # with operator image MAJOR.MINOR version. -appVersion: "2.2" +appVersion: "2.3" dependencies: - name: aws-mountpoint-s3-csi-driver diff --git a/helm_chart/HyperPodHelmChart/charts/inference-operator/config/crd/inference.sagemaker.aws.amazon.com_inferenceendpointconfigs.yaml b/helm_chart/HyperPodHelmChart/charts/inference-operator/config/crd/inference.sagemaker.aws.amazon.com_inferenceendpointconfigs.yaml index 908fb9f8..0fa281b7 100644 --- a/helm_chart/HyperPodHelmChart/charts/inference-operator/config/crd/inference.sagemaker.aws.amazon.com_inferenceendpointconfigs.yaml +++ b/helm_chart/HyperPodHelmChart/charts/inference-operator/config/crd/inference.sagemaker.aws.amazon.com_inferenceendpointconfigs.yaml @@ -351,9 +351,19 @@ spec: pattern: ^$|^[a-zA-Z0-9](-*[a-zA-Z0-9]){0,62}$ type: string instanceType: - description: Instance Type to deploy the model on + description: |- + Single instance type to deploy the model on. + This field is mutually exclusive with instanceTypes. + Use this when you want to deploy on a specific instance type. pattern: ^ml\..* type: string + instanceTypes: + description: |- + List of instance types to deploy the model on, in order of preference. + Instance types are selected based on the order specified, selecting the first available type. + items: + type: string + type: array intelligentRoutingSpec: description: |- Configuration for intelligent routing @@ -863,8 +873,9 @@ spec: in a Container. properties: name: - description: Name of the environment variable. Must be a - C_IDENTIFIER. + description: |- + Name of the environment variable. + May consist of any printable ASCII characters except '='. type: string value: description: |- @@ -922,6 +933,43 @@ spec: - fieldPath type: object x-kubernetes-map-type: atomic + fileKeyRef: + description: |- + FileKeyRef selects a key of the env file. + Requires the EnvFiles feature gate to be enabled. + properties: + key: + description: |- + The key within the env file. An invalid key will prevent the pod from starting. + The keys defined within a source may consist of any printable ASCII characters except '='. + During Alpha stage of the EnvFiles feature gate, the key size is limited to 128 characters. + type: string + optional: + default: false + description: |- + Specify whether the file or its key must be defined. If the file or key + does not exist, then the env var is not published. + If optional is set to true and the specified key does not exist, + the environment variable will not be set in the Pod's containers. + + If optional is set to false and the specified key does not exist, + an error will be returned during Pod creation. + type: boolean + path: + description: |- + The path within the volume from which to select the file. + Must be relative and may not contain the '..' path or start with '..'. + type: string + volumeName: + description: The name of the volume mount containing + the env file. + type: string + required: + - key + - path + - volumeName + type: object + x-kubernetes-map-type: atomic resourceFieldRef: description: |- Selects a resource of the container: only resources limits and requests @@ -1495,7 +1543,7 @@ spec: Claims lists the names of resources, defined in spec.resourceClaims, that are used by this container. - This is an alpha field and requires enabling the + This field depends on the DynamicResourceAllocation feature gate. This field is immutable. It can only be set for containers. @@ -1556,7 +1604,6 @@ spec: - resources type: object required: - - instanceType - modelName - modelSourceConfig - worker @@ -1647,8 +1694,8 @@ spec: description: Status of the Deployment Object properties: availableReplicas: - description: Total number of available pods (ready for at - least minReadySeconds) targeted by this deployment. + description: Total number of available non-terminating pods + (ready for at least minReadySeconds) targeted by this deployment. format: int32 type: integer collisionCount: @@ -1701,15 +1748,23 @@ spec: format: int64 type: integer readyReplicas: - description: readyReplicas is the number of pods targeted + description: Total number of non-terminating pods targeted by this Deployment with a Ready Condition. format: int32 type: integer replicas: - description: Total number of non-terminated pods targeted + description: Total number of non-terminating pods targeted by this deployment (their labels match the selector). format: int32 type: integer + terminatingReplicas: + description: |- + Total number of terminating pods targeted by this deployment. Terminating pods have a non-null + .metadata.deletionTimestamp and have not yet reached the Failed or Succeeded .status.phase. + + This is an alpha field. Enable DeploymentReplicaSetTerminatingReplicas to be able to use this field. + format: int32 + type: integer unavailableReplicas: description: |- Total number of unavailable pods targeted by this deployment. This is the total number of @@ -1718,7 +1773,7 @@ spec: format: int32 type: integer updatedReplicas: - description: Total number of non-terminated pods targeted + description: Total number of non-terminating pods targeted by this deployment that have the desired template spec. format: int32 type: integer @@ -2216,8 +2271,9 @@ spec: in a Container. properties: name: - description: Name of the environment variable. Must be a - C_IDENTIFIER. + description: |- + Name of the environment variable. + May consist of any printable ASCII characters except '='. type: string value: description: |- @@ -2275,6 +2331,43 @@ spec: - fieldPath type: object x-kubernetes-map-type: atomic + fileKeyRef: + description: |- + FileKeyRef selects a key of the env file. + Requires the EnvFiles feature gate to be enabled. + properties: + key: + description: |- + The key within the env file. An invalid key will prevent the pod from starting. + The keys defined within a source may consist of any printable ASCII characters except '='. + During Alpha stage of the EnvFiles feature gate, the key size is limited to 128 characters. + type: string + optional: + default: false + description: |- + Specify whether the file or its key must be defined. If the file or key + does not exist, then the env var is not published. + If optional is set to true and the specified key does not exist, + the environment variable will not be set in the Pod's containers. + + If optional is set to false and the specified key does not exist, + an error will be returned during Pod creation. + type: boolean + path: + description: |- + The path within the volume from which to select the file. + Must be relative and may not contain the '..' path or start with '..'. + type: string + volumeName: + description: The name of the volume mount containing + the env file. + type: string + required: + - key + - path + - volumeName + type: object + x-kubernetes-map-type: atomic resourceFieldRef: description: |- Selects a resource of the container: only resources limits and requests @@ -2380,7 +2473,7 @@ spec: Claims lists the names of resources, defined in spec.resourceClaims, that are used by this container. - This is an alpha field and requires enabling the + This field depends on the DynamicResourceAllocation feature gate. This field is immutable. It can only be set for containers. @@ -2529,8 +2622,8 @@ spec: description: Status of the Deployment Object properties: availableReplicas: - description: Total number of available pods (ready for at - least minReadySeconds) targeted by this deployment. + description: Total number of available non-terminating pods + (ready for at least minReadySeconds) targeted by this deployment. format: int32 type: integer collisionCount: @@ -2583,15 +2676,23 @@ spec: format: int64 type: integer readyReplicas: - description: readyReplicas is the number of pods targeted + description: Total number of non-terminating pods targeted by this Deployment with a Ready Condition. format: int32 type: integer replicas: - description: Total number of non-terminated pods targeted + description: Total number of non-terminating pods targeted by this deployment (their labels match the selector). format: int32 type: integer + terminatingReplicas: + description: |- + Total number of terminating pods targeted by this deployment. Terminating pods have a non-null + .metadata.deletionTimestamp and have not yet reached the Failed or Succeeded .status.phase. + + This is an alpha field. Enable DeploymentReplicaSetTerminatingReplicas to be able to use this field. + format: int32 + type: integer unavailableReplicas: description: |- Total number of unavailable pods targeted by this deployment. This is the total number of @@ -2600,7 +2701,7 @@ spec: format: int32 type: integer updatedReplicas: - description: Total number of non-terminated pods targeted + description: Total number of non-terminating pods targeted by this deployment that have the desired template spec. format: int32 type: integer diff --git a/helm_chart/HyperPodHelmChart/charts/inference-operator/config/crd/inference.sagemaker.aws.amazon.com_jumpstartmodels.yaml b/helm_chart/HyperPodHelmChart/charts/inference-operator/config/crd/inference.sagemaker.aws.amazon.com_jumpstartmodels.yaml index 4e1b5443..93118977 100644 --- a/helm_chart/HyperPodHelmChart/charts/inference-operator/config/crd/inference.sagemaker.aws.amazon.com_jumpstartmodels.yaml +++ b/helm_chart/HyperPodHelmChart/charts/inference-operator/config/crd/inference.sagemaker.aws.amazon.com_jumpstartmodels.yaml @@ -944,8 +944,8 @@ spec: description: Status of the Deployment Object properties: availableReplicas: - description: Total number of available pods (ready for at - least minReadySeconds) targeted by this deployment. + description: Total number of available non-terminating pods + (ready for at least minReadySeconds) targeted by this deployment. format: int32 type: integer collisionCount: @@ -998,15 +998,23 @@ spec: format: int64 type: integer readyReplicas: - description: readyReplicas is the number of pods targeted + description: Total number of non-terminating pods targeted by this Deployment with a Ready Condition. format: int32 type: integer replicas: - description: Total number of non-terminated pods targeted + description: Total number of non-terminating pods targeted by this deployment (their labels match the selector). format: int32 type: integer + terminatingReplicas: + description: |- + Total number of terminating pods targeted by this deployment. Terminating pods have a non-null + .metadata.deletionTimestamp and have not yet reached the Failed or Succeeded .status.phase. + + This is an alpha field. Enable DeploymentReplicaSetTerminatingReplicas to be able to use this field. + format: int32 + type: integer unavailableReplicas: description: |- Total number of unavailable pods targeted by this deployment. This is the total number of @@ -1015,7 +1023,7 @@ spec: format: int32 type: integer updatedReplicas: - description: Total number of non-terminated pods targeted + description: Total number of non-terminating pods targeted by this deployment that have the desired template spec. format: int32 type: integer @@ -1598,8 +1606,8 @@ spec: description: Status of the Deployment Object properties: availableReplicas: - description: Total number of available pods (ready for at - least minReadySeconds) targeted by this deployment. + description: Total number of available non-terminating pods + (ready for at least minReadySeconds) targeted by this deployment. format: int32 type: integer collisionCount: @@ -1652,15 +1660,23 @@ spec: format: int64 type: integer readyReplicas: - description: readyReplicas is the number of pods targeted + description: Total number of non-terminating pods targeted by this Deployment with a Ready Condition. format: int32 type: integer replicas: - description: Total number of non-terminated pods targeted + description: Total number of non-terminating pods targeted by this deployment (their labels match the selector). format: int32 type: integer + terminatingReplicas: + description: |- + Total number of terminating pods targeted by this deployment. Terminating pods have a non-null + .metadata.deletionTimestamp and have not yet reached the Failed or Succeeded .status.phase. + + This is an alpha field. Enable DeploymentReplicaSetTerminatingReplicas to be able to use this field. + format: int32 + type: integer unavailableReplicas: description: |- Total number of unavailable pods targeted by this deployment. This is the total number of @@ -1669,7 +1685,7 @@ spec: format: int32 type: integer updatedReplicas: - description: Total number of non-terminated pods targeted + description: Total number of non-terminating pods targeted by this deployment that have the desired template spec. format: int32 type: integer diff --git a/helm_chart/HyperPodHelmChart/charts/inference-operator/values.yaml b/helm_chart/HyperPodHelmChart/charts/inference-operator/values.yaml index 48171a40..4129d871 100644 --- a/helm_chart/HyperPodHelmChart/charts/inference-operator/values.yaml +++ b/helm_chart/HyperPodHelmChart/charts/inference-operator/values.yaml @@ -21,7 +21,7 @@ image: ap-southeast-4: 311141544681.dkr.ecr.ap-southeast-4.amazonaws.com ap-southeast-3: 158128612970.dkr.ecr.ap-southeast-3.amazonaws.com eu-south-2: 025050981094.dkr.ecr.eu-south-2.amazonaws.com - tag: v2.2 + tag: v2.3 pullPolicy: Always repository: hyperpodClusterArn: