From f02582a55077a6b59e89356af905fe4972f0fc94 Mon Sep 17 00:00:00 2001 From: Abhijith Ganesh Date: Tue, 29 Jul 2025 16:56:04 +0530 Subject: [PATCH 1/2] Initial commit Signed-off-by: Abhijith Ganesh --- helm/online-feature-store/Chart.yaml | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) create mode 100644 helm/online-feature-store/Chart.yaml diff --git a/helm/online-feature-store/Chart.yaml b/helm/online-feature-store/Chart.yaml new file mode 100644 index 00000000..fdfa8e92 --- /dev/null +++ b/helm/online-feature-store/Chart.yaml @@ -0,0 +1,21 @@ +apiVersion: v2 +name: online-feature-store +description: BharatMLStack Online Feature Store - A hyper-scalable feature store for real-time ML +type: application +version: 1.0.0 +appVersion: "1.0.0" +home: https://github.com/AbhijithGanesh/BharatMLStack +sources: + - https://github.com/AbhijithGanesh/BharatMLStack/tree/main/online-feature-store +maintainers: + - name: BharatMLStack Team + email: contact@bharatmlstack.dev +keywords: + - machine-learning + - feature-store + - real-time + - ml-platform + - bharatmlstack +annotations: + category: Machine Learning + licenses: MIT From 14425213ec619cf1b99f2bab86c3b553b56d7117 Mon Sep 17 00:00:00 2001 From: Abhijith Ganesh Date: Tue, 29 Jul 2025 17:22:07 +0530 Subject: [PATCH 2/2] Interim commit Signed-off-by: Abhijith Ganesh --- helm/horizon/Chart.yaml | 21 ++ helm/horizon/values.yaml | 334 +++++++++++++++++ helm/online-feature-store/Chart.yaml | 4 +- .../templates/_helpers.tpl | 139 ++++++++ .../templates/deployment.yaml | 222 ++++++++++++ .../templates/gateway.yaml | 37 ++ helm/online-feature-store/templates/hpa.yaml | 50 +++ .../templates/httproute.yaml | 35 ++ .../templates/ingress.yaml | 60 ++++ .../templates/networkpolicy.yaml | 24 ++ helm/online-feature-store/templates/pdb.yaml | 19 + .../templates/service.yaml | 24 ++ .../templates/serviceaccount.yaml | 14 + .../templates/servicemonitor.yaml | 34 ++ .../templates/tests/api-test.yaml | 41 +++ .../templates/tests/latency-test.yaml | 46 +++ helm/online-feature-store/templates/vpa.yaml | 29 ++ helm/online-feature-store/values-dev.yaml | 88 +++++ helm/online-feature-store/values-prod.yaml | 171 +++++++++ helm/online-feature-store/values.yaml | 336 ++++++++++++++++++ helm/trufflebox-ui/Chart.yaml | 21 ++ helm/trufflebox-ui/values.yaml | 268 ++++++++++++++ 22 files changed, 2015 insertions(+), 2 deletions(-) create mode 100644 helm/horizon/Chart.yaml create mode 100644 helm/horizon/values.yaml create mode 100644 helm/online-feature-store/templates/_helpers.tpl create mode 100644 helm/online-feature-store/templates/deployment.yaml create mode 100644 helm/online-feature-store/templates/gateway.yaml create mode 100644 helm/online-feature-store/templates/hpa.yaml create mode 100644 helm/online-feature-store/templates/httproute.yaml create mode 100644 helm/online-feature-store/templates/ingress.yaml create mode 100644 helm/online-feature-store/templates/networkpolicy.yaml create mode 100644 helm/online-feature-store/templates/pdb.yaml create mode 100644 helm/online-feature-store/templates/service.yaml create mode 100644 helm/online-feature-store/templates/serviceaccount.yaml create mode 100644 helm/online-feature-store/templates/servicemonitor.yaml create mode 100644 helm/online-feature-store/templates/tests/api-test.yaml create mode 100644 helm/online-feature-store/templates/tests/latency-test.yaml create mode 100644 helm/online-feature-store/templates/vpa.yaml create mode 100644 helm/online-feature-store/values-dev.yaml create mode 100644 helm/online-feature-store/values-prod.yaml create mode 100644 helm/online-feature-store/values.yaml create mode 100644 helm/trufflebox-ui/Chart.yaml create mode 100644 helm/trufflebox-ui/values.yaml diff --git a/helm/horizon/Chart.yaml b/helm/horizon/Chart.yaml new file mode 100644 index 00000000..c6fd68c5 --- /dev/null +++ b/helm/horizon/Chart.yaml @@ -0,0 +1,21 @@ +apiVersion: v2 +name: horizon +description: BharatMLStack Horizon - Control plane backend for metadata and configuration management +type: application +version: 1.0.0 +appVersion: "1.0.0" +home: https://github.com/AbhijithGanesh/BharatMLStack +sources: + - https://github.com/AbhijithGanesh/BharatMLStack/tree/main/horizon +maintainers: + - name: BharatMLStack Team + email: contact@bharatmlstack.dev +keywords: + - machine-learning + - feature-store + - control-plane + - metadata-management + - bharatmlstack +annotations: + category: Machine Learning + licenses: MIT diff --git a/helm/horizon/values.yaml b/helm/horizon/values.yaml new file mode 100644 index 00000000..fd082909 --- /dev/null +++ b/helm/horizon/values.yaml @@ -0,0 +1,334 @@ +# Default values for horizon +# This is a YAML-formatted file. +# Declare variables to be passed into your templates. + +# Global settings +global: + imageRegistry: "ghcr.io/meesho" + imagePullSecrets: [] + storageClass: "" + +# Container image settings +image: + registry: "" + repository: "horizon" + tag: "latest" + pullPolicy: IfNotPresent + +# Service account settings +serviceAccount: + create: true + automount: true + annotations: {} + name: "" + +# Pod Security Context +podSecurityContext: + fsGroup: 65532 + runAsNonRoot: true + runAsUser: 65532 + runAsGroup: 65532 + +# Container Security Context +securityContext: + allowPrivilegeEscalation: false + capabilities: + drop: + - ALL + readOnlyRootFilesystem: true + runAsNonRoot: true + runAsUser: 65532 + +# Deployment settings +replicaCount: 2 + +# Resource limits and requests +resources: + limits: + cpu: 500m + memory: 1Gi + requests: + cpu: 250m + memory: 512Mi + +# Horizontal Pod Autoscaler +autoscaling: + enabled: true + minReplicas: 2 + maxReplicas: 8 + targetCPUUtilizationPercentage: 70 + targetMemoryUtilizationPercentage: 80 + +# Vertical Pod Autoscaler +verticalPodAutoscaler: + enabled: false + updateMode: "Auto" + minAllowed: + cpu: 50m + memory: 128Mi + maxAllowed: + cpu: 1 + memory: 2Gi + +# Pod Disruption Budget +podDisruptionBudget: + enabled: true + minAvailable: 1 + +# Service settings +service: + type: ClusterIP + port: 8082 + targetPort: 8082 + annotations: {} + +# Ingress settings (NGINX default) +ingress: + enabled: true + className: "nginx" + annotations: + nginx.ingress.kubernetes.io/rewrite-target: / + nginx.ingress.kubernetes.io/ssl-redirect: "true" + nginx.ingress.kubernetes.io/force-ssl-redirect: "true" + cert-manager.io/cluster-issuer: "letsencrypt-prod" + hosts: + - host: horizon.bharatmlstack.local + paths: + - path: / + pathType: Prefix + tls: + - secretName: horizon-tls + hosts: + - horizon.bharatmlstack.local + +# Gateway API support (production-ready routing) +gateway: + enabled: false + className: "istio" + gatewayName: "bharatmlstack-gateway" + namespace: "bharatml-system" + hosts: + - "horizon.bharatml.prod.com" + tls: + enabled: true + certificateRefs: + - name: "bharatml-tls" + namespace: "bharatml-system" + +# HTTP Route for Gateway API +httpRoute: + enabled: false + parentRefs: + - name: "bharatmlstack-gateway" + namespace: "bharatml-system" + hostnames: + - "horizon.bharatml.prod.com" + rules: + - matches: + - path: + type: PathPrefix + value: / + backendRefs: + - name: "horizon" + port: 8082 + +# Network Policies +networkPolicy: + enabled: true + ingress: + - from: + - namespaceSelector: + matchLabels: + name: bharatml-system + - podSelector: + matchLabels: + app.kubernetes.io/name: trufflebox-ui + - podSelector: + matchLabels: + app.kubernetes.io/name: online-feature-store + ports: + - protocol: TCP + port: 8082 + egress: + - to: + - namespaceSelector: + matchLabels: + name: bharatml-infra + ports: + - protocol: TCP + port: 3306 # MySQL + - protocol: TCP + port: 9042 # ScyllaDB + - protocol: TCP + port: 2379 # etcd + - to: [] + ports: + - protocol: TCP + port: 53 + - protocol: UDP + port: 53 + +# Service Monitor for Prometheus +serviceMonitor: + enabled: true + namespace: "" + interval: 30s + scrapeTimeout: 10s + labels: {} + annotations: {} + path: /metrics + honorLabels: false + +# Liveness and readiness probes +probes: + liveness: + enabled: true + initialDelaySeconds: 30 + periodSeconds: 10 + timeoutSeconds: 5 + failureThreshold: 3 + httpGet: + path: /health + port: http + readiness: + enabled: true + initialDelaySeconds: 5 + periodSeconds: 5 + timeoutSeconds: 3 + failureThreshold: 3 + httpGet: + path: /health + port: http + +# Application configuration +config: + # Application settings + app: + name: "horizon" + environment: "PROD" + env: "production" + port: 8082 + logLevel: "INFO" + metricSamplingRate: 1 + gcPercentage: 1 + + # MySQL database configuration + mysql: + master: + host: "mysql.bharatml-infra.svc.cluster.local" + port: 3306 + username: "root" + password: "" # Set via secret + dbName: "testdb" + maxPoolSize: 10 + minPoolSize: 2 + slave: + host: "mysql.bharatml-infra.svc.cluster.local" + port: 3306 + username: "root" + password: "" # Set via secret + maxPoolSize: 10 + minPoolSize: 2 + activeConfigIds: "2" + + # etcd configuration + etcd: + server: "etcd.bharatml-infra.svc.cluster.local:2379" + watcherEnabled: true + + # CORS configuration + cors: + origins: "http://localhost:3000,http://localhost:8080" + + # Online Feature Store integration + onlineFeatureStore: + appName: "onfs" + + # ScyllaDB configuration (for metadata) + scylla: + contactPoints: "scylla.bharatml-infra.svc.cluster.local" + keyspace: "onfs" + port: 9042 + numConns: 5 + timeoutMs: 10000 + username: "" + password: "" + activeConfigIds: "1" + + # Redis configuration (for caching) + redis: + failover: + activeConfigIds: "4" + +# Secrets (externally managed) +secrets: + # Database credentials + database: + secretName: "horizon-db-credentials" + mysqlPasswordKey: "mysql-password" + scyllaUsernameKey: "scylla-username" + scyllaPasswordKey: "scylla-password" + +# Node selector +nodeSelector: {} + +# Tolerations +tolerations: [] + +# Affinity and anti-affinity +affinity: + podAntiAffinity: + preferredDuringSchedulingIgnoredDuringExecution: + - weight: 100 + podAffinityTerm: + labelSelector: + matchExpressions: + - key: app.kubernetes.io/name + operator: In + values: + - horizon + topologyKey: kubernetes.io/hostname + +# Additional labels +labels: {} + +# Additional annotations +annotations: {} + +# Pod annotations +podAnnotations: {} + +# Pod labels +podLabels: {} + +# Volume mounts for temporary storage +volumeMounts: + - name: tmp + mountPath: /tmp + - name: var-run + mountPath: /var/run + +# Volumes +volumes: + - name: tmp + emptyDir: {} + - name: var-run + emptyDir: {} + +# Extra environment variables +extraEnvVars: [] + +# Extra environment variables from ConfigMaps/Secrets +extraEnvVarsFrom: [] + +# Init containers +initContainers: [] + +# Sidecar containers +sidecars: [] + +# Additional volumes +extraVolumes: [] + +# Additional volume mounts +extraVolumeMounts: [] diff --git a/helm/online-feature-store/Chart.yaml b/helm/online-feature-store/Chart.yaml index fdfa8e92..f64d1d2f 100644 --- a/helm/online-feature-store/Chart.yaml +++ b/helm/online-feature-store/Chart.yaml @@ -4,9 +4,9 @@ description: BharatMLStack Online Feature Store - A hyper-scalable feature store type: application version: 1.0.0 appVersion: "1.0.0" -home: https://github.com/AbhijithGanesh/BharatMLStack +home: https://github.com/Meesho/BharatMLStack sources: - - https://github.com/AbhijithGanesh/BharatMLStack/tree/main/online-feature-store + - https://github.com/Meesho/BharatMLStack/tree/main/online-feature-store maintainers: - name: BharatMLStack Team email: contact@bharatmlstack.dev diff --git a/helm/online-feature-store/templates/_helpers.tpl b/helm/online-feature-store/templates/_helpers.tpl new file mode 100644 index 00000000..ebb1af86 --- /dev/null +++ b/helm/online-feature-store/templates/_helpers.tpl @@ -0,0 +1,139 @@ +{{/* +Expand the name of the chart. +*/}} +{{- define "online-feature-store.name" -}} +{{- default .Chart.Name .Values.nameOverride | trunc 63 | trimSuffix "-" }} +{{- end }} + +{{/* +Create a default fully qualified app name. +We truncate at 63 chars because some Kubernetes name fields are limited to this (by the DNS naming spec). +If release name contains chart name it will be used as a full name. +*/}} +{{- define "online-feature-store.fullname" -}} +{{- if .Values.fullnameOverride }} +{{- .Values.fullnameOverride | trunc 63 | trimSuffix "-" }} +{{- else }} +{{- $name := default .Chart.Name .Values.nameOverride }} +{{- if contains $name .Release.Name }} +{{- .Release.Name | trunc 63 | trimSuffix "-" }} +{{- else }} +{{- printf "%s-%s" .Release.Name $name | trunc 63 | trimSuffix "-" }} +{{- end }} +{{- end }} +{{- end }} + +{{/* +Create chart name and version as used by the chart label. +*/}} +{{- define "online-feature-store.chart" -}} +{{- printf "%s-%s" .Chart.Name .Chart.Version | replace "+" "_" | trunc 63 | trimSuffix "-" }} +{{- end }} + +{{/* +Common labels +*/}} +{{- define "online-feature-store.labels" -}} +helm.sh/chart: {{ include "online-feature-store.chart" . }} +{{ include "online-feature-store.selectorLabels" . }} +{{- if .Chart.AppVersion }} +app.kubernetes.io/version: {{ .Chart.AppVersion | quote }} +{{- end }} +app.kubernetes.io/managed-by: {{ .Release.Service }} +app.kubernetes.io/component: feature-store +app.kubernetes.io/part-of: bharatml-stack +{{- with .Values.labels }} +{{ toYaml . }} +{{- end }} +{{- end }} + +{{/* +Selector labels +*/}} +{{- define "online-feature-store.selectorLabels" -}} +app.kubernetes.io/name: {{ include "online-feature-store.name" . }} +app.kubernetes.io/instance: {{ .Release.Name }} +{{- end }} + +{{/* +Create the name of the service account to use +*/}} +{{- define "online-feature-store.serviceAccountName" -}} +{{- if .Values.serviceAccount.create }} +{{- default (include "online-feature-store.fullname" .) .Values.serviceAccount.name }} +{{- else }} +{{- default "default" .Values.serviceAccount.name }} +{{- end }} +{{- end }} + +{{/* +Create the image pull secret names +*/}} +{{- define "online-feature-store.imagePullSecrets" -}} +{{- with .Values.global.imagePullSecrets }} +{{- range . }} +- name: {{ . }} +{{- end }} +{{- end }} +{{- end }} + +{{/* +Create a default network policy name +*/}} +{{- define "online-feature-store.networkPolicyName" -}} +{{- printf "%s-netpol" (include "online-feature-store.fullname" .) }} +{{- end }} + +{{/* +Create a default service monitor name +*/}} +{{- define "online-feature-store.serviceMonitorName" -}} +{{- printf "%s-metrics" (include "online-feature-store.fullname" .) }} +{{- end }} + +{{/* +Create a default HPA name +*/}} +{{- define "online-feature-store.hpaName" -}} +{{- printf "%s-hpa" (include "online-feature-store.fullname" .) }} +{{- end }} + +{{/* +Create a default VPA name +*/}} +{{- define "online-feature-store.vpaName" -}} +{{- printf "%s-vpa" (include "online-feature-store.fullname" .) }} +{{- end }} + +{{/* +Create a default PDB name +*/}} +{{- define "online-feature-store.pdbName" -}} +{{- printf "%s-pdb" (include "online-feature-store.fullname" .) }} +{{- end }} + +{{/* +Create ingress hostname +*/}} +{{- define "online-feature-store.ingressHost" -}} +{{- if .Values.ingress.hosts }} +{{- range .Values.ingress.hosts }} +{{- .host }} +{{- end }} +{{- else }} +{{- printf "%s.%s" (include "online-feature-store.name" .) "local" }} +{{- end }} +{{- end }} + +{{/* +Create gateway hostname +*/}} +{{- define "online-feature-store.gatewayHost" -}} +{{- if .Values.gateway.hosts }} +{{- range .Values.gateway.hosts }} +{{- . }} +{{- end }} +{{- else }} +{{- printf "%s.%s" (include "online-feature-store.name" .) "local" }} +{{- end }} +{{- end }} diff --git a/helm/online-feature-store/templates/deployment.yaml b/helm/online-feature-store/templates/deployment.yaml new file mode 100644 index 00000000..df5c0e66 --- /dev/null +++ b/helm/online-feature-store/templates/deployment.yaml @@ -0,0 +1,222 @@ +apiVersion: apps/v1 +kind: Deployment +metadata: + name: {{ include "online-feature-store.fullname" . }} + namespace: {{ .Release.Namespace | quote }} + labels: + {{- include "online-feature-store.labels" . | nindent 4 }} + {{- with .Values.annotations }} + annotations: + {{- toYaml . | nindent 4 }} + {{- end }} +spec: + {{- if not .Values.autoscaling.enabled }} + replicas: {{ .Values.replicaCount }} + {{- end }} + selector: + matchLabels: + {{- include "online-feature-store.selectorLabels" . | nindent 6 }} + template: + metadata: + {{- with .Values.podAnnotations }} + annotations: + {{- toYaml . | nindent 8 }} + {{- end }} + labels: + {{- include "online-feature-store.labels" . | nindent 8 }} + {{- with .Values.podLabels }} + {{- toYaml . | nindent 8 }} + {{- end }} + spec: + {{- with .Values.global.imagePullSecrets }} + imagePullSecrets: + {{- toYaml . | nindent 8 }} + {{- end }} + serviceAccountName: {{ include "online-feature-store.serviceAccountName" . }} + securityContext: + {{- toYaml .Values.podSecurityContext | nindent 8 }} + {{- if .Values.initContainers }} + initContainers: + {{- toYaml .Values.initContainers | nindent 8 }} + {{- end }} + containers: + - name: online-feature-store + securityContext: + {{- toYaml .Values.securityContext | nindent 12 }} + image: "{{ .Values.image.registry | default .Values.global.imageRegistry }}/{{ .Values.image.repository }}:{{ .Values.image.tag | default .Chart.AppVersion }}" + imagePullPolicy: {{ .Values.image.pullPolicy }} + ports: + - name: http + containerPort: {{ .Values.config.app.port }} + protocol: TCP + - name: metrics + containerPort: 8080 + protocol: TCP + {{- if .Values.probes.liveness.enabled }} + livenessProbe: + httpGet: + path: {{ .Values.probes.liveness.httpGet.path }} + port: {{ .Values.probes.liveness.httpGet.port }} + initialDelaySeconds: {{ .Values.probes.liveness.initialDelaySeconds }} + periodSeconds: {{ .Values.probes.liveness.periodSeconds }} + timeoutSeconds: {{ .Values.probes.liveness.timeoutSeconds }} + failureThreshold: {{ .Values.probes.liveness.failureThreshold }} + {{- end }} + {{- if .Values.probes.readiness.enabled }} + readinessProbe: + httpGet: + path: {{ .Values.probes.readiness.httpGet.path }} + port: {{ .Values.probes.readiness.httpGet.port }} + initialDelaySeconds: {{ .Values.probes.readiness.initialDelaySeconds }} + periodSeconds: {{ .Values.probes.readiness.periodSeconds }} + timeoutSeconds: {{ .Values.probes.readiness.timeoutSeconds }} + failureThreshold: {{ .Values.probes.readiness.failureThreshold }} + {{- end }} + env: + # Application configuration + - name: APP_ENV + value: {{ .Values.config.app.env | quote }} + - name: APP_LOG_LEVEL + value: {{ .Values.config.app.logLevel | quote }} + - name: APP_METRIC_SAMPLING_RATE + value: {{ .Values.config.app.metricSamplingRate | quote }} + - name: APP_NAME + value: {{ .Values.config.app.name | quote }} + - name: APP_PORT + value: {{ .Values.config.app.port | quote }} + {{- if .Values.secrets.authToken.secretName }} + - name: AUTH_TOKEN + valueFrom: + secretKeyRef: + name: {{ .Values.secrets.authToken.secretName }} + key: {{ .Values.secrets.authToken.key }} + {{- end }} + + # Pod identification + - name: POD_IP + valueFrom: + fieldRef: + fieldPath: status.podIP + - name: NODE_IP + valueFrom: + fieldRef: + fieldPath: status.hostIP + - name: POD_NAME + valueFrom: + fieldRef: + fieldPath: metadata.name + - name: NAMESPACE + valueFrom: + fieldRef: + fieldPath: metadata.namespace + + # etcd configuration + - name: ETCD_SERVER + value: {{ .Values.config.etcd.server | quote }} + - name: ETCD_WATCHER_ENABLED + value: {{ .Values.config.etcd.watcherEnabled | quote }} + + # ScyllaDB configuration + - name: STORAGE_SCYLLA_1_CONTACT_POINTS + value: {{ .Values.config.storage.scylla.contactPoints | quote }} + - name: STORAGE_SCYLLA_1_KEYSPACE + value: {{ .Values.config.storage.scylla.keyspace | quote }} + - name: STORAGE_SCYLLA_1_PORT + value: {{ .Values.config.storage.scylla.port | quote }} + - name: STORAGE_SCYLLA_1_NUM_CONNS + value: {{ .Values.config.storage.scylla.numConns | quote }} + - name: STORAGE_SCYLLA_1_TIMEOUT_IN_MS + value: {{ .Values.config.storage.scylla.timeoutMs | quote }} + {{- if .Values.secrets.database.secretName }} + - name: STORAGE_SCYLLA_1_USERNAME + valueFrom: + secretKeyRef: + name: {{ .Values.secrets.database.secretName }} + key: {{ .Values.secrets.database.scyllaUsernameKey }} + - name: STORAGE_SCYLLA_1_PASSWORD + valueFrom: + secretKeyRef: + name: {{ .Values.secrets.database.secretName }} + key: {{ .Values.secrets.database.scyllaPasswordKey }} + {{- end }} + - name: STORAGE_SCYLLA_ACTIVE_CONFIG_IDS + value: {{ .Values.config.storage.scylla.activeConfigIds | quote }} + + # Redis configuration + - name: STORAGE_REDIS_STANDALONE_2_ADDR + value: {{ .Values.config.storage.redis.addr | quote }} + - name: STORAGE_REDIS_STANDALONE_2_DB + value: {{ .Values.config.storage.redis.db | quote }} + - name: STORAGE_REDIS_STANDALONE_2_MAX_IDLE_CONN + value: {{ .Values.config.storage.redis.maxIdleConn | quote }} + - name: STORAGE_REDIS_STANDALONE_2_MIN_IDLE_CONN + value: {{ .Values.config.storage.redis.minIdleConn | quote }} + - name: STORAGE_REDIS_STANDALONE_2_MAX_ACTIVE_CONN + value: {{ .Values.config.storage.redis.maxActiveConn | quote }} + - name: STORAGE_REDIS_STANDALONE_2_POOL_TIMEOUT_IN_MS + value: {{ .Values.config.storage.redis.poolTimeout | quote }} + - name: STORAGE_REDIS_STANDALONE_2_READ_TIMEOUT_IN_MS + value: {{ .Values.config.storage.redis.readTimeout | quote }} + - name: STORAGE_REDIS_STANDALONE_2_WRITE_TIMEOUT_IN_MS + value: {{ .Values.config.storage.redis.writeTimeout | quote }} + - name: STORAGE_REDIS_STANDALONE_ACTIVE_CONFIG_IDS + value: {{ .Values.config.storage.redis.activeConfigIds | quote }} + + # Cache configuration + - name: IN_MEM_CACHE_3_ENABLED + value: {{ .Values.config.cache.inMemory.enabled | quote }} + - name: IN_MEM_CACHE_3_NAME + value: {{ .Values.config.cache.inMemory.name | quote }} + - name: IN_MEM_CACHE_3_SIZE_IN_BYTES + value: {{ .Values.config.cache.inMemory.sizeInBytes | quote }} + - name: IN_MEM_CACHE_ACTIVE_CONFIG_IDS + value: {{ .Values.config.cache.inMemory.activeConfigIds | quote }} + - name: DISTRIBUTED_CACHE_CONF_IDS + value: {{ .Values.config.cache.distributed.confIds | quote }} + + # P2P Cache configuration + - name: P2P_CACHE_5_ENABLED + value: {{ .Values.config.cache.p2p.enabled | quote }} + - name: P2P_CACHE_5_NAME + value: {{ .Values.config.cache.p2p.name | quote }} + - name: P2P_CACHE_5_OWN_PARTITION_SIZE_IN_BYTES + value: {{ .Values.config.cache.p2p.ownPartitionSizeInBytes | quote }} + - name: P2P_CACHE_5_GLOBAL_SIZE_IN_BYTES + value: {{ .Values.config.cache.p2p.globalSizeInBytes | quote }} + - name: P2P_CACHE_ACTIVE_CONFIG_IDS + value: {{ .Values.config.cache.p2p.activeConfigIds | quote }} + + {{- with .Values.extraEnvVars }} + {{- toYaml . | nindent 12 }} + {{- end }} + {{- if .Values.extraEnvVarsFrom }} + envFrom: + {{- toYaml .Values.extraEnvVarsFrom | nindent 12 }} + {{- end }} + resources: + {{- toYaml .Values.resources | nindent 12 }} + volumeMounts: + {{- toYaml .Values.volumeMounts | nindent 12 }} + {{- with .Values.extraVolumeMounts }} + {{- toYaml . | nindent 12 }} + {{- end }} + {{- if .Values.sidecars }} + {{- toYaml .Values.sidecars | nindent 8 }} + {{- end }} + volumes: + {{- toYaml .Values.volumes | nindent 8 }} + {{- with .Values.extraVolumes }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.nodeSelector }} + nodeSelector: + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.affinity }} + affinity: + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.tolerations }} + tolerations: + {{- toYaml . | nindent 8 }} + {{- end }} diff --git a/helm/online-feature-store/templates/gateway.yaml b/helm/online-feature-store/templates/gateway.yaml new file mode 100644 index 00000000..ca21ecfd --- /dev/null +++ b/helm/online-feature-store/templates/gateway.yaml @@ -0,0 +1,37 @@ +{{- if .Values.gateway.enabled -}} +apiVersion: gateway.networking.k8s.io/v1beta1 +kind: Gateway +metadata: + name: {{ include "online-feature-store.fullname" . }}-gateway + namespace: {{ .Values.gateway.namespace | quote }} + labels: + {{- include "online-feature-store.labels" . | nindent 4 }} +spec: + gatewayClassName: {{ .Values.gateway.className }} + listeners: + - name: http + port: 80 + protocol: HTTP + hostname: {{ range .Values.gateway.hosts }}"{{ . }}"{{ end }} + {{- if not .Values.gateway.tls.enabled }} + allowedRoutes: + namespaces: + from: Same + {{- end }} + {{- if .Values.gateway.tls.enabled }} + - name: https + port: 443 + protocol: HTTPS + hostname: {{ range .Values.gateway.hosts }}"{{ . }}"{{ end }} + tls: + mode: Terminate + certificateRefs: + {{- range .Values.gateway.tls.certificateRefs }} + - name: {{ .name }} + namespace: {{ .namespace }} + {{- end }} + allowedRoutes: + namespaces: + from: Same + {{- end }} +{{- end }} diff --git a/helm/online-feature-store/templates/hpa.yaml b/helm/online-feature-store/templates/hpa.yaml new file mode 100644 index 00000000..71c7d0ab --- /dev/null +++ b/helm/online-feature-store/templates/hpa.yaml @@ -0,0 +1,50 @@ +{{- if .Values.autoscaling.enabled }} +apiVersion: autoscaling/v2 +kind: HorizontalPodAutoscaler +metadata: + name: {{ include "online-feature-store.hpaName" . }} + namespace: {{ .Release.Namespace | quote }} + labels: + {{- include "online-feature-store.labels" . | nindent 4 }} +spec: + scaleTargetRef: + apiVersion: apps/v1 + kind: Deployment + name: {{ include "online-feature-store.fullname" . }} + minReplicas: {{ .Values.autoscaling.minReplicas }} + maxReplicas: {{ .Values.autoscaling.maxReplicas }} + metrics: + {{- if .Values.autoscaling.targetCPUUtilizationPercentage }} + - type: Resource + resource: + name: cpu + target: + type: Utilization + averageUtilization: {{ .Values.autoscaling.targetCPUUtilizationPercentage }} + {{- end }} + {{- if .Values.autoscaling.targetMemoryUtilizationPercentage }} + - type: Resource + resource: + name: memory + target: + type: Utilization + averageUtilization: {{ .Values.autoscaling.targetMemoryUtilizationPercentage }} + {{- end }} + behavior: + scaleDown: + stabilizationWindowSeconds: 300 + policies: + - type: Percent + value: 10 + periodSeconds: 60 + scaleUp: + stabilizationWindowSeconds: 60 + policies: + - type: Percent + value: 100 + periodSeconds: 15 + - type: Pods + value: 4 + periodSeconds: 15 + selectPolicy: Max +{{- end }} diff --git a/helm/online-feature-store/templates/httproute.yaml b/helm/online-feature-store/templates/httproute.yaml new file mode 100644 index 00000000..fe3608a3 --- /dev/null +++ b/helm/online-feature-store/templates/httproute.yaml @@ -0,0 +1,35 @@ +{{- if .Values.httpRoute.enabled -}} +apiVersion: gateway.networking.k8s.io/v1beta1 +kind: HTTPRoute +metadata: + name: {{ include "online-feature-store.fullname" . }}-route + namespace: {{ .Release.Namespace | quote }} + labels: + {{- include "online-feature-store.labels" . | nindent 4 }} +spec: + parentRefs: + {{- range .Values.httpRoute.parentRefs }} + - name: {{ .name }} + namespace: {{ .namespace }} + {{- end }} + {{- if .Values.httpRoute.hostnames }} + hostnames: + {{- range .Values.httpRoute.hostnames }} + - {{ . | quote }} + {{- end }} + {{- end }} + rules: + {{- range .Values.httpRoute.rules }} + - matches: + {{- range .matches }} + - path: + type: {{ .path.type }} + value: {{ .path.value }} + {{- end }} + backendRefs: + {{- range .backendRefs }} + - name: {{ .name }} + port: {{ .port }} + {{- end }} + {{- end }} +{{- end }} diff --git a/helm/online-feature-store/templates/ingress.yaml b/helm/online-feature-store/templates/ingress.yaml new file mode 100644 index 00000000..0f41db24 --- /dev/null +++ b/helm/online-feature-store/templates/ingress.yaml @@ -0,0 +1,60 @@ +{{- if .Values.ingress.enabled -}} +{{- $fullName := include "online-feature-store.fullname" . -}} +{{- $svcPort := .Values.service.port -}} +{{- if and .Values.ingress.className (not (hasKey .Values.ingress.annotations "kubernetes.io/ingress.class")) }} +{{- $_ := set .Values.ingress.annotations "kubernetes.io/ingress.class" .Values.ingress.className}} +{{- end }} +{{- if semverCompare ">=1.19-0" .Capabilities.KubeVersion.GitVersion -}} +apiVersion: networking.k8s.io/v1 +{{- else if semverCompare ">=1.14-0" .Capabilities.KubeVersion.GitVersion -}} +apiVersion: networking.k8s.io/v1beta1 +{{- else -}} +apiVersion: extensions/v1beta1 +{{- end }} +kind: Ingress +metadata: + name: {{ $fullName }} + namespace: {{ .Release.Namespace | quote }} + labels: + {{- include "online-feature-store.labels" . | nindent 4 }} + {{- with .Values.ingress.annotations }} + annotations: + {{- toYaml . | nindent 4 }} + {{- end }} +spec: + {{- if and .Values.ingress.className (semverCompare ">=1.18-0" .Capabilities.KubeVersion.GitVersion) }} + ingressClassName: {{ .Values.ingress.className }} + {{- end }} + {{- if .Values.ingress.tls }} + tls: + {{- range .Values.ingress.tls }} + - hosts: + {{- range .hosts }} + - {{ . | quote }} + {{- end }} + secretName: {{ .secretName }} + {{- end }} + {{- end }} + rules: + {{- range .Values.ingress.hosts }} + - host: {{ .host | quote }} + http: + paths: + {{- range .paths }} + - path: {{ .path }} + {{- if and .pathType (semverCompare ">=1.18-0" $.Capabilities.KubeVersion.GitVersion) }} + pathType: {{ .pathType }} + {{- end }} + backend: + {{- if semverCompare ">=1.19-0" $.Capabilities.KubeVersion.GitVersion }} + service: + name: {{ $fullName }} + port: + number: {{ $svcPort }} + {{- else }} + serviceName: {{ $fullName }} + servicePort: {{ $svcPort }} + {{- end }} + {{- end }} + {{- end }} +{{- end }} diff --git a/helm/online-feature-store/templates/networkpolicy.yaml b/helm/online-feature-store/templates/networkpolicy.yaml new file mode 100644 index 00000000..6242465c --- /dev/null +++ b/helm/online-feature-store/templates/networkpolicy.yaml @@ -0,0 +1,24 @@ +{{- if .Values.networkPolicy.enabled -}} +apiVersion: networking.k8s.io/v1 +kind: NetworkPolicy +metadata: + name: {{ include "online-feature-store.networkPolicyName" . }} + namespace: {{ .Release.Namespace | quote }} + labels: + {{- include "online-feature-store.labels" . | nindent 4 }} +spec: + podSelector: + matchLabels: + {{- include "online-feature-store.selectorLabels" . | nindent 6 }} + policyTypes: + - Ingress + - Egress + {{- if .Values.networkPolicy.ingress }} + ingress: + {{- toYaml .Values.networkPolicy.ingress | nindent 2 }} + {{- end }} + {{- if .Values.networkPolicy.egress }} + egress: + {{- toYaml .Values.networkPolicy.egress | nindent 2 }} + {{- end }} +{{- end }} diff --git a/helm/online-feature-store/templates/pdb.yaml b/helm/online-feature-store/templates/pdb.yaml new file mode 100644 index 00000000..7f121f4d --- /dev/null +++ b/helm/online-feature-store/templates/pdb.yaml @@ -0,0 +1,19 @@ +{{- if .Values.podDisruptionBudget.enabled }} +apiVersion: policy/v1 +kind: PodDisruptionBudget +metadata: + name: {{ include "online-feature-store.pdbName" . }} + namespace: {{ .Release.Namespace | quote }} + labels: + {{- include "online-feature-store.labels" . | nindent 4 }} +spec: + {{- if .Values.podDisruptionBudget.minAvailable }} + minAvailable: {{ .Values.podDisruptionBudget.minAvailable }} + {{- end }} + {{- if .Values.podDisruptionBudget.maxUnavailable }} + maxUnavailable: {{ .Values.podDisruptionBudget.maxUnavailable }} + {{- end }} + selector: + matchLabels: + {{- include "online-feature-store.selectorLabels" . | nindent 6 }} +{{- end }} diff --git a/helm/online-feature-store/templates/service.yaml b/helm/online-feature-store/templates/service.yaml new file mode 100644 index 00000000..c8ce52b4 --- /dev/null +++ b/helm/online-feature-store/templates/service.yaml @@ -0,0 +1,24 @@ +apiVersion: v1 +kind: Service +metadata: + name: {{ include "online-feature-store.fullname" . }} + namespace: {{ .Release.Namespace | quote }} + labels: + {{- include "online-feature-store.labels" . | nindent 4 }} + {{- with .Values.service.annotations }} + annotations: + {{- toYaml . | nindent 4 }} + {{- end }} +spec: + type: {{ .Values.service.type }} + ports: + - port: {{ .Values.service.port }} + targetPort: {{ .Values.service.targetPort }} + protocol: TCP + name: http + - port: 8080 + targetPort: 8080 + protocol: TCP + name: metrics + selector: + {{- include "online-feature-store.selectorLabels" . | nindent 4 }} diff --git a/helm/online-feature-store/templates/serviceaccount.yaml b/helm/online-feature-store/templates/serviceaccount.yaml new file mode 100644 index 00000000..9bbb67a6 --- /dev/null +++ b/helm/online-feature-store/templates/serviceaccount.yaml @@ -0,0 +1,14 @@ +{{- if .Values.serviceAccount.create -}} +apiVersion: v1 +kind: ServiceAccount +metadata: + name: {{ include "online-feature-store.serviceAccountName" . }} + namespace: {{ .Release.Namespace | quote }} + labels: + {{- include "online-feature-store.labels" . | nindent 4 }} + {{- with .Values.serviceAccount.annotations }} + annotations: + {{- toYaml . | nindent 4 }} + {{- end }} +automountServiceAccountToken: {{ .Values.serviceAccount.automount }} +{{- end }} diff --git a/helm/online-feature-store/templates/servicemonitor.yaml b/helm/online-feature-store/templates/servicemonitor.yaml new file mode 100644 index 00000000..28d76d60 --- /dev/null +++ b/helm/online-feature-store/templates/servicemonitor.yaml @@ -0,0 +1,34 @@ +{{- if .Values.serviceMonitor.enabled -}} +apiVersion: monitoring.coreos.com/v1 +kind: ServiceMonitor +metadata: + name: {{ include "online-feature-store.serviceMonitorName" . }} + namespace: {{ .Values.serviceMonitor.namespace | default .Release.Namespace | quote }} + labels: + {{- include "online-feature-store.labels" . | nindent 4 }} + {{- with .Values.serviceMonitor.labels }} + {{- toYaml . | nindent 4 }} + {{- end }} + {{- with .Values.serviceMonitor.annotations }} + annotations: + {{- toYaml . | nindent 4 }} + {{- end }} +spec: + selector: + matchLabels: + {{- include "online-feature-store.selectorLabels" . | nindent 6 }} + endpoints: + - port: metrics + path: {{ .Values.serviceMonitor.path }} + interval: {{ .Values.serviceMonitor.interval }} + scrapeTimeout: {{ .Values.serviceMonitor.scrapeTimeout }} + honorLabels: {{ .Values.serviceMonitor.honorLabels }} + {{- if .Values.serviceMonitor.metricRelabelings }} + metricRelabelings: + {{- toYaml .Values.serviceMonitor.metricRelabelings | nindent 4 }} + {{- end }} + {{- if .Values.serviceMonitor.relabelings }} + relabelings: + {{- toYaml .Values.serviceMonitor.relabelings | nindent 4 }} + {{- end }} +{{- end }} diff --git a/helm/online-feature-store/templates/tests/api-test.yaml b/helm/online-feature-store/templates/tests/api-test.yaml new file mode 100644 index 00000000..e2f65085 --- /dev/null +++ b/helm/online-feature-store/templates/tests/api-test.yaml @@ -0,0 +1,41 @@ +apiVersion: v1 +kind: Pod +metadata: + name: "{{ include "online-feature-store.fullname" . }}-api-test" + namespace: {{ .Release.Namespace | quote }} + labels: + {{- include "online-feature-store.labels" . | nindent 4 }} + annotations: + "helm.sh/hook": test + "helm.sh/hook-weight": "2" +spec: + restartPolicy: Never + containers: + - name: api-test + image: curlimages/curl:latest + command: ['sh', '-c'] + args: + - | + set -e + echo "Testing Online Feature Store API endpoints..." + + SERVICE_URL="http://{{ include "online-feature-store.fullname" . }}:{{ .Values.service.port }}" + HEALTH_ENDPOINT="$SERVICE_URL/health/self" + METRICS_ENDPOINT="http://{{ include "online-feature-store.fullname" . }}:8080/metrics" + + echo "Testing health endpoint: $HEALTH_ENDPOINT" + curl -f $HEALTH_ENDPOINT + echo "✅ Health endpoint test PASSED" + + echo "Testing metrics endpoint: $METRICS_ENDPOINT" + curl -f $METRICS_ENDPOINT | head -10 + echo "✅ Metrics endpoint test PASSED" + + echo "✅ All API tests passed!" + resources: + limits: + cpu: 100m + memory: 64Mi + requests: + cpu: 10m + memory: 32Mi diff --git a/helm/online-feature-store/templates/tests/latency-test.yaml b/helm/online-feature-store/templates/tests/latency-test.yaml new file mode 100644 index 00000000..f8bc7970 --- /dev/null +++ b/helm/online-feature-store/templates/tests/latency-test.yaml @@ -0,0 +1,46 @@ +apiVersion: v1 +kind: Pod +metadata: + name: "{{ include "online-feature-store.fullname" . }}-latency-test" + namespace: {{ .Release.Namespace | quote }} + labels: + {{- include "online-feature-store.labels" . | nindent 4 }} + annotations: + "helm.sh/hook": test + "helm.sh/hook-weight": "1" +spec: + restartPolicy: Never + containers: + - name: latency-test + image: curlimages/curl:latest + command: ['sh', '-c'] + args: + - | + set -e + echo "Testing Online Feature Store latency..." + + SERVICE_URL="http://{{ include "online-feature-store.fullname" . }}:{{ .Values.service.port }}" + HEALTH_ENDPOINT="$SERVICE_URL/health/self" + + echo "Testing health endpoint: $HEALTH_ENDPOINT" + + # Test health endpoint response time + RESPONSE_TIME=$(curl -w "%{time_total}" -s -o /dev/null $HEALTH_ENDPOINT || echo "999") + echo "Health endpoint response time: ${RESPONSE_TIME}s" + + # Check if response time is acceptable (< 1 second) + if [ $(echo "$RESPONSE_TIME < 1.0" | bc -l) -eq 1 ]; then + echo "✅ Latency test PASSED: Response time is acceptable ($RESPONSE_TIME seconds)" + else + echo "❌ Latency test FAILED: Response time too high ($RESPONSE_TIME seconds)" + exit 1 + fi + + echo "✅ All latency tests passed!" + resources: + limits: + cpu: 100m + memory: 64Mi + requests: + cpu: 10m + memory: 32Mi diff --git a/helm/online-feature-store/templates/vpa.yaml b/helm/online-feature-store/templates/vpa.yaml new file mode 100644 index 00000000..a567dc48 --- /dev/null +++ b/helm/online-feature-store/templates/vpa.yaml @@ -0,0 +1,29 @@ +{{- if .Values.verticalPodAutoscaler.enabled }} +apiVersion: autoscaling.k8s.io/v1 +kind: VerticalPodAutoscaler +metadata: + name: {{ include "online-feature-store.vpaName" . }} + namespace: {{ .Release.Namespace | quote }} + labels: + {{- include "online-feature-store.labels" . | nindent 4 }} +spec: + targetRef: + apiVersion: apps/v1 + kind: Deployment + name: {{ include "online-feature-store.fullname" . }} + updatePolicy: + updateMode: {{ .Values.verticalPodAutoscaler.updateMode }} + resourcePolicy: + containerPolicies: + - containerName: online-feature-store + {{- if .Values.verticalPodAutoscaler.minAllowed }} + minAllowed: + {{- toYaml .Values.verticalPodAutoscaler.minAllowed | nindent 8 }} + {{- end }} + {{- if .Values.verticalPodAutoscaler.maxAllowed }} + maxAllowed: + {{- toYaml .Values.verticalPodAutoscaler.maxAllowed | nindent 8 }} + {{- end }} + controlledResources: ["cpu", "memory"] + controlledValues: RequestsAndLimits +{{- end }} diff --git a/helm/online-feature-store/values-dev.yaml b/helm/online-feature-store/values-dev.yaml new file mode 100644 index 00000000..a6772ea6 --- /dev/null +++ b/helm/online-feature-store/values-dev.yaml @@ -0,0 +1,88 @@ +# Development environment overrides +# This file overrides values.yaml for development environments + +replicaCount: 1 + +image: + tag: "latest" + pullPolicy: Always + +resources: + limits: + cpu: 500m + memory: 1Gi + requests: + cpu: 100m + memory: 256Mi + +autoscaling: + enabled: false + +verticalPodAutoscaler: + enabled: false + +podDisruptionBudget: + enabled: false + +ingress: + enabled: true + className: "nginx" + annotations: + nginx.ingress.kubernetes.io/rewrite-target: / + hosts: + - host: onfs.bharatml.dev + paths: + - path: / + pathType: Prefix + tls: [] + +gateway: + enabled: false + +networkPolicy: + enabled: false + +serviceMonitor: + enabled: false + +config: + app: + env: "development" + logLevel: "DEBUG" + metricSamplingRate: 1 + + etcd: + server: "etcd.default.svc.cluster.local:2379" + + storage: + scylla: + contactPoints: "scylla.default.svc.cluster.local" + numConns: 1 + timeoutMs: 10000 + + redis: + addr: "redis.default.svc.cluster.local:6379" + maxIdleConn: 8 + minIdleConn: 2 + maxActiveConn: 8 + + cache: + inMemory: + sizeInBytes: 100000 + + p2p: + ownPartitionSizeInBytes: 100000 + globalSizeInBytes: 1000 + +probes: + liveness: + initialDelaySeconds: 10 + readiness: + initialDelaySeconds: 5 + +# Development-specific environment variables +extraEnvVars: + - name: APP_ENV + value: "development" + - name: DEBUG_MODE + value: "true" diff --git a/helm/online-feature-store/values-prod.yaml b/helm/online-feature-store/values-prod.yaml new file mode 100644 index 00000000..a5050d91 --- /dev/null +++ b/helm/online-feature-store/values-prod.yaml @@ -0,0 +1,171 @@ +# Production environment overrides +# This file overrides values.yaml for production environments + +replicaCount: 5 + +image: + tag: "v1.0.0" # Use stable version in production + pullPolicy: IfNotPresent + +resources: + limits: + cpu: 2000m + memory: 4Gi + requests: + cpu: 1000m + memory: 2Gi + +autoscaling: + enabled: true + minReplicas: 5 + maxReplicas: 20 + targetCPUUtilizationPercentage: 60 + targetMemoryUtilizationPercentage: 70 + +verticalPodAutoscaler: + enabled: true + updateMode: "Auto" + minAllowed: + cpu: 500m + memory: 1Gi + maxAllowed: + cpu: 4 + memory: 8Gi + +podDisruptionBudget: + enabled: true + minAvailable: 3 + +# Use Gateway API for production +ingress: + enabled: false + +gateway: + enabled: true + className: "istio" + gatewayName: "bharatmlstack-gateway" + namespace: "bharatml-system" + hosts: + - "onfs.bharatml.prod.com" + tls: + enabled: true + certificateRefs: + - name: "bharatml-prod-tls" + namespace: "bharatml-system" + +httpRoute: + enabled: true + parentRefs: + - name: "bharatmlstack-gateway" + namespace: "bharatml-system" + hostnames: + - "onfs.bharatml.prod.com" + +networkPolicy: + enabled: true + ingress: + - from: + - namespaceSelector: + matchLabels: + name: bharatml-system + - podSelector: + matchLabels: + app.kubernetes.io/name: horizon + - podSelector: + matchLabels: + app.kubernetes.io/name: trufflebox-ui + - podSelector: + matchLabels: + app.kubernetes.io/name: istio-proxy + ports: + - protocol: TCP + port: 8089 + +serviceMonitor: + enabled: true + namespace: "bharatml-monitoring" + interval: 15s + scrapeTimeout: 10s + +config: + app: + env: "production" + logLevel: "INFO" + metricSamplingRate: 0.1 + + etcd: + server: "etcd.bharatml-infra.svc.cluster.local:2379" + + storage: + scylla: + contactPoints: "scylla-cluster.bharatml-infra.svc.cluster.local" + numConns: 20 + timeoutMs: 30000 + + redis: + addr: "redis-cluster.bharatml-infra.svc.cluster.local:6379" + maxIdleConn: 64 + minIdleConn: 32 + maxActiveConn: 128 + + cache: + inMemory: + sizeInBytes: 10000000 + + p2p: + ownPartitionSizeInBytes: 10000000 + globalSizeInBytes: 100000 + +# Production affinity rules +affinity: + podAntiAffinity: + requiredDuringSchedulingIgnoredDuringExecution: + - labelSelector: + matchExpressions: + - key: app.kubernetes.io/name + operator: In + values: + - online-feature-store + topologyKey: kubernetes.io/hostname + nodeAffinity: + preferredDuringSchedulingIgnoredDuringExecution: + - weight: 100 + preference: + matchExpressions: + - key: node-type + operator: In + values: + - compute-optimized + +# Production node selector +nodeSelector: + bharatml.io/workload-type: "feature-store" + +# Production tolerations +tolerations: + - key: "bharatml.io/feature-store" + operator: "Equal" + value: "true" + effect: "NoSchedule" + +# Enhanced probes for production +probes: + liveness: + initialDelaySeconds: 60 + periodSeconds: 15 + timeoutSeconds: 10 + failureThreshold: 3 + readiness: + initialDelaySeconds: 30 + periodSeconds: 10 + timeoutSeconds: 5 + failureThreshold: 2 + +# Production-specific environment variables +extraEnvVars: + - name: APP_ENV + value: "production" + - name: GOMAXPROCS + value: "4" + - name: GOGC + value: "100" diff --git a/helm/online-feature-store/values.yaml b/helm/online-feature-store/values.yaml new file mode 100644 index 00000000..b3708841 --- /dev/null +++ b/helm/online-feature-store/values.yaml @@ -0,0 +1,336 @@ +# Default values for online-feature-store +# This is a YAML-formatted file. +# Declare variables to be passed into your templates. + +# Global settings +global: + imageRegistry: "ghcr.io/meesho" + imagePullSecrets: [] + storageClass: "" + +# Container image settings +image: + registry: "" + repository: "onfs-api-server" + tag: "latest" + pullPolicy: IfNotPresent + +# Service account settings +serviceAccount: + create: true + automount: true + annotations: {} + name: "" + +# Pod Security Context +podSecurityContext: + fsGroup: 65532 + runAsNonRoot: true + runAsUser: 65532 + runAsGroup: 65532 + +# Container Security Context +securityContext: + allowPrivilegeEscalation: false + capabilities: + drop: + - ALL + readOnlyRootFilesystem: true + runAsNonRoot: true + runAsUser: 65532 + +# Deployment settings +replicaCount: 3 + +# Resource limits and requests +resources: + limits: + cpu: 1000m + memory: 2Gi + requests: + cpu: 500m + memory: 1Gi + +# Horizontal Pod Autoscaler +autoscaling: + enabled: true + minReplicas: 3 + maxReplicas: 10 + targetCPUUtilizationPercentage: 70 + targetMemoryUtilizationPercentage: 80 + +# Vertical Pod Autoscaler +verticalPodAutoscaler: + enabled: false + updateMode: "Auto" # Off, Auto, Recreation, Initial + minAllowed: + cpu: 100m + memory: 256Mi + maxAllowed: + cpu: 2 + memory: 4Gi + +# Pod Disruption Budget +podDisruptionBudget: + enabled: true + minAvailable: 2 + +# Service settings +service: + type: ClusterIP + port: 8089 + targetPort: 8089 + annotations: {} + +# Ingress settings (NGINX default) +ingress: + enabled: true + className: "nginx" + annotations: + nginx.ingress.kubernetes.io/rewrite-target: / + nginx.ingress.kubernetes.io/ssl-redirect: "true" + nginx.ingress.kubernetes.io/force-ssl-redirect: "true" + cert-manager.io/cluster-issuer: "letsencrypt-prod" + hosts: + - host: onfs.bharatmlstack.local + paths: + - path: / + pathType: Prefix + tls: + - secretName: onfs-tls + hosts: + - onfs.bharatmlstack.local + +# Gateway API support (production-ready routing) +gateway: + enabled: false + className: "istio" + gatewayName: "bharatmlstack-gateway" + namespace: "bharatml-system" + hosts: + - "onfs.bharatml.prod.com" + tls: + enabled: true + certificateRefs: + - name: "bharatml-tls" + namespace: "bharatml-system" + +# HTTP Route for Gateway API +httpRoute: + enabled: false + parentRefs: + - name: "bharatmlstack-gateway" + namespace: "bharatml-system" + hostnames: + - "onfs.bharatml.prod.com" + rules: + - matches: + - path: + type: PathPrefix + value: / + backendRefs: + - name: "online-feature-store" + port: 8089 + +# Network Policies +networkPolicy: + enabled: true + ingress: + - from: + - namespaceSelector: + matchLabels: + name: bharatml-system + - podSelector: + matchLabels: + app.kubernetes.io/name: horizon + - podSelector: + matchLabels: + app.kubernetes.io/name: trufflebox-ui + ports: + - protocol: TCP + port: 8089 + egress: + - to: + - namespaceSelector: + matchLabels: + name: bharatml-infra + ports: + - protocol: TCP + port: 9042 # ScyllaDB + - protocol: TCP + port: 6379 # Redis + - protocol: TCP + port: 2379 # etcd + - to: [] + ports: + - protocol: TCP + port: 53 + - protocol: UDP + port: 53 + +# Service Monitor for Prometheus +serviceMonitor: + enabled: true + namespace: "" + interval: 30s + scrapeTimeout: 10s + labels: {} + annotations: {} + path: /metrics + honorLabels: false + +# Liveness and readiness probes +probes: + liveness: + enabled: true + initialDelaySeconds: 30 + periodSeconds: 10 + timeoutSeconds: 5 + failureThreshold: 3 + httpGet: + path: /health/self + port: http + readiness: + enabled: true + initialDelaySeconds: 5 + periodSeconds: 5 + timeoutSeconds: 3 + failureThreshold: 3 + httpGet: + path: /health/self + port: http + +# Application configuration +config: + # Application settings + app: + env: "production" + logLevel: "INFO" + metricSamplingRate: 1 + name: "onfs" + port: 8089 + authToken: "" # Set via secret + + # etcd configuration + etcd: + server: "etcd.bharatml-infra.svc.cluster.local:2379" + watcherEnabled: true + + # Storage configuration + storage: + scylla: + contactPoints: "scylla.bharatml-infra.svc.cluster.local" + keyspace: "onfs" + port: 9042 + numConns: 10 + timeoutMs: 30000 + username: "" + password: "" + activeConfigIds: "1" + + redis: + addr: "redis.bharatml-infra.svc.cluster.local:6379" + db: 0 + maxIdleConn: 32 + minIdleConn: 20 + maxActiveConn: 32 + poolTimeout: 300 + readTimeout: 300 + writeTimeout: 300 + activeConfigIds: "2" + + # Caching configuration + cache: + inMemory: + enabled: true + name: "onfs" + sizeInBytes: 1000000 + activeConfigIds: "3" + + distributed: + confIds: "2" + + p2p: + enabled: true + name: "p2p-onfs" + ownPartitionSizeInBytes: 1000000 + globalSizeInBytes: 10000 + activeConfigIds: "5" + +# Secrets (externally managed) +secrets: + # Auth token secret name + authToken: + secretName: "onfs-auth" + key: "token" + + # Database credentials + database: + secretName: "onfs-db-credentials" + scyllaUsernameKey: "scylla-username" + scyllaPasswordKey: "scylla-password" + redisPasswordKey: "redis-password" + +# Node selector +nodeSelector: {} + +# Tolerations +tolerations: [] + +# Affinity and anti-affinity +affinity: + podAntiAffinity: + preferredDuringSchedulingIgnoredDuringExecution: + - weight: 100 + podAffinityTerm: + labelSelector: + matchExpressions: + - key: app.kubernetes.io/name + operator: In + values: + - online-feature-store + topologyKey: kubernetes.io/hostname + +# Additional labels +labels: {} + +# Additional annotations +annotations: {} + +# Pod annotations +podAnnotations: {} + +# Pod labels +podLabels: {} + +# Volume mounts for temporary storage +volumeMounts: + - name: tmp + mountPath: /tmp + - name: var-run + mountPath: /var/run + +# Volumes +volumes: + - name: tmp + emptyDir: {} + - name: var-run + emptyDir: {} + +# Extra environment variables +extraEnvVars: [] + +# Extra environment variables from ConfigMaps/Secrets +extraEnvVarsFrom: [] + +# Init containers +initContainers: [] + +# Sidecar containers +sidecars: [] + +# Additional volumes +extraVolumes: [] + +# Additional volume mounts +extraVolumeMounts: [] diff --git a/helm/trufflebox-ui/Chart.yaml b/helm/trufflebox-ui/Chart.yaml new file mode 100644 index 00000000..67f720ea --- /dev/null +++ b/helm/trufflebox-ui/Chart.yaml @@ -0,0 +1,21 @@ +apiVersion: v2 +name: trufflebox-ui +description: BharatMLStack TruffleBox UI - Management console for the feature store ecosystem +type: application +version: 1.0.0 +appVersion: "1.0.0" +home: https://github.com/AbhijithGanesh/BharatMLStack +sources: + - https://github.com/AbhijithGanesh/BharatMLStack/tree/main/trufflebox-ui +maintainers: + - name: BharatMLStack Team + email: contact@bharatmlstack.dev +keywords: + - machine-learning + - feature-store + - web-ui + - management-console + - bharatmlstack +annotations: + category: Machine Learning + licenses: MIT diff --git a/helm/trufflebox-ui/values.yaml b/helm/trufflebox-ui/values.yaml new file mode 100644 index 00000000..e4ba8c9a --- /dev/null +++ b/helm/trufflebox-ui/values.yaml @@ -0,0 +1,268 @@ +# Default values for trufflebox-ui +# This is a YAML-formatted file. +# Declare variables to be passed into your templates. + +# Global settings +global: + imageRegistry: "ghcr.io/meesho" + imagePullSecrets: [] + storageClass: "" + +# Container image settings +image: + registry: "" + repository: "trufflebox-ui" + tag: "latest" + pullPolicy: IfNotPresent + +# Service account settings +serviceAccount: + create: true + automount: true + annotations: {} + name: "" + +# Pod Security Context +podSecurityContext: + fsGroup: 101 + runAsNonRoot: true + runAsUser: 101 + runAsGroup: 101 + +# Container Security Context +securityContext: + allowPrivilegeEscalation: false + capabilities: + drop: + - ALL + add: + - NET_BIND_SERVICE + readOnlyRootFilesystem: true + runAsNonRoot: true + runAsUser: 101 + +# Deployment settings +replicaCount: 2 + +# Resource limits and requests +resources: + limits: + cpu: 200m + memory: 256Mi + requests: + cpu: 50m + memory: 128Mi + +# Horizontal Pod Autoscaler +autoscaling: + enabled: true + minReplicas: 2 + maxReplicas: 6 + targetCPUUtilizationPercentage: 70 + targetMemoryUtilizationPercentage: 80 + +# Vertical Pod Autoscaler +verticalPodAutoscaler: + enabled: false + updateMode: "Auto" + minAllowed: + cpu: 20m + memory: 64Mi + maxAllowed: + cpu: 500m + memory: 512Mi + +# Pod Disruption Budget +podDisruptionBudget: + enabled: true + minAvailable: 1 + +# Service settings +service: + type: ClusterIP + port: 80 + targetPort: 80 + annotations: {} + +# Ingress settings (NGINX default) +ingress: + enabled: true + className: "nginx" + annotations: + nginx.ingress.kubernetes.io/rewrite-target: / + nginx.ingress.kubernetes.io/ssl-redirect: "true" + nginx.ingress.kubernetes.io/force-ssl-redirect: "true" + cert-manager.io/cluster-issuer: "letsencrypt-prod" + hosts: + - host: trufflebox.bharatmlstack.local + paths: + - path: / + pathType: Prefix + tls: + - secretName: trufflebox-tls + hosts: + - trufflebox.bharatmlstack.local + +# Gateway API support (production-ready routing) +gateway: + enabled: false + className: "istio" + gatewayName: "bharatmlstack-gateway" + namespace: "bharatml-system" + hosts: + - "trufflebox.bharatml.prod.com" + tls: + enabled: true + certificateRefs: + - name: "bharatml-tls" + namespace: "bharatml-system" + +# HTTP Route for Gateway API +httpRoute: + enabled: false + parentRefs: + - name: "bharatmlstack-gateway" + namespace: "bharatml-system" + hostnames: + - "trufflebox.bharatml.prod.com" + rules: + - matches: + - path: + type: PathPrefix + value: / + backendRefs: + - name: "trufflebox-ui" + port: 80 + +# Network Policies +networkPolicy: + enabled: true + ingress: + - from: + - namespaceSelector: + matchLabels: + name: bharatml-system + - podSelector: {} # Allow from all pods in same namespace + ports: + - protocol: TCP + port: 80 + egress: + - to: + - podSelector: + matchLabels: + app.kubernetes.io/name: horizon + ports: + - protocol: TCP + port: 8082 + - to: [] + ports: + - protocol: TCP + port: 53 + - protocol: UDP + port: 53 + +# Service Monitor for Prometheus +serviceMonitor: + enabled: true + namespace: "" + interval: 30s + scrapeTimeout: 10s + labels: {} + annotations: {} + path: /metrics + honorLabels: false + +# Liveness and readiness probes +probes: + liveness: + enabled: true + initialDelaySeconds: 30 + periodSeconds: 10 + timeoutSeconds: 5 + failureThreshold: 3 + httpGet: + path: / + port: http + readiness: + enabled: true + initialDelaySeconds: 5 + periodSeconds: 5 + timeoutSeconds: 3 + failureThreshold: 3 + httpGet: + path: / + port: http + +# Application configuration +config: + # Horizon backend URL + horizon: + baseUrl: "http://horizon.bharatml-system.svc.cluster.local:8082" + +# Node selector +nodeSelector: {} + +# Tolerations +tolerations: [] + +# Affinity and anti-affinity +affinity: + podAntiAffinity: + preferredDuringSchedulingIgnoredDuringExecution: + - weight: 100 + podAffinityTerm: + labelSelector: + matchExpressions: + - key: app.kubernetes.io/name + operator: In + values: + - trufflebox-ui + topologyKey: kubernetes.io/hostname + +# Additional labels +labels: {} + +# Additional annotations +annotations: {} + +# Pod annotations +podAnnotations: {} + +# Pod labels +podLabels: {} + +# Volume mounts for nginx and tmp +volumeMounts: + - name: tmp + mountPath: /tmp + - name: var-cache-nginx + mountPath: /var/cache/nginx + - name: var-run + mountPath: /var/run + +# Volumes +volumes: + - name: tmp + emptyDir: {} + - name: var-cache-nginx + emptyDir: {} + - name: var-run + emptyDir: {} + +# Extra environment variables +extraEnvVars: [] + +# Extra environment variables from ConfigMaps/Secrets +extraEnvVarsFrom: [] + +# Init containers +initContainers: [] + +# Sidecar containers +sidecars: [] + +# Additional volumes +extraVolumes: [] + +# Additional volume mounts +extraVolumeMounts: []