diff --git a/example/kubernetes/Dockerfile b/example/kubernetes/Dockerfile new file mode 100644 index 00000000..95f4ec3a --- /dev/null +++ b/example/kubernetes/Dockerfile @@ -0,0 +1,11 @@ +FROM almalinux:8 + +ADD rocm.repo /etc/yum.repos.d/ +ADD protobuf.repo /etc/yum.repos.d/ +RUN yum install net-tools procps-ng gcc-c++ rdc protobuf python3-devel -y +ENV LD_LIBRARY_PATH /opt/rocm/rdc/lib:/opt/rocm/rdc/grpc/lib/ +ENV PATH /root/.local/bin:$PATH +RUN pip3 install --user -U pip +RUN pip3 install --user protobuf==3.11.3 +RUN pip3 install --user prometheus_client kuryr_kubernetes +RUN ln -s /lib64/libprotobuf.so.22 /lib64/libprotobuf.so.3.11.2.0 \ No newline at end of file diff --git a/example/kubernetes/protobuf.repo b/example/kubernetes/protobuf.repo new file mode 100644 index 00000000..70f50e5d --- /dev/null +++ b/example/kubernetes/protobuf.repo @@ -0,0 +1,7 @@ +[ussuri] +name=protobuf +baseurl=https://buildlogs.centos.org/centos/8/cloud/x86_64/openstack-ussuri +enabled=1 +fastestmirror_enabled=0 +gpgcheck=0 +priority=1 diff --git a/example/kubernetes/rdc.yaml b/example/kubernetes/rdc.yaml new file mode 100644 index 00000000..77e2d596 --- /dev/null +++ b/example/kubernetes/rdc.yaml @@ -0,0 +1,161 @@ +apiVersion: apps/v1 +kind: DaemonSet +metadata: + name: rdc + labels: + app: rdc + namespace: kube-system +spec: + selector: + matchLabels: + app: rdc + template: + metadata: + labels: + app: rdc + spec: + priorityClassName: system-node-critical + tolerations: + - key: CriticalAddonsOnly + operator: Exists + - effect: NoSchedule + key: amd.com/gpu + operator: Exists + nodeSelector: + gpu: "amd" + containers: + - name: rdc + image: your_container_repository/rdc:5.1 + imagePullPolicy: Always + command: ["/opt/rocm/rdc/bin/rdcd"] + args: ["-u"] + securityContext: + privileged: true + capabilities: + drop: + - all + lifecycle: + postStart: + exec: + command: + - /bin/bash + - -c + - while [ $(netstat -lntp |grep rdcd| wc -l) -ne 1 ]; do sleep 1; done + preStop: + exec: + command: + - /bin/bash + - -c + - while [ $(netstat -lntp |grep 5000| wc -l) -ne 0 ]; do sleep 1; done + readinessProbe: + tcpSocket: + port: 50051 + initialDelaySeconds: 5 + periodSeconds: 10 + livenessProbe: + tcpSocket: + port: 50051 + initialDelaySeconds: 15 + periodSeconds: 20 + resources: + limits: + cpu: 100m + memory: 30Mi + requests: + cpu: 20m + memory: 30Mi + ports: + - containerPort: 50051 + name: rdc + protocol: TCP + volumeMounts: + - name: sys + mountPath: /sys + - name: prometheus-rdc + command: ["python3"] + args: ["/opt/rocm/rdc/python_binding/rdc_prometheus.py", "--rdc_unauth", "--enable_kubernetes_integration"] + image: your_container_repository/rdc:5.1 + imagePullPolicy: Always + securityContext: + privileged: true + capabilities: + drop: + - all + readinessProbe: + httpGet: + path: /metrics + port: metrics + scheme: HTTP + initialDelaySeconds: 5 + periodSeconds: 10 + livenessProbe: + httpGet: + path: /metrics + port: metrics + scheme: HTTP + initialDelaySeconds: 15 + periodSeconds: 20 + resources: + limits: + cpu: 100m + memory: 100Mi + requests: + cpu: 20m + memory: 100Mi + ports: + - containerPort: 5000 + name: metrics + protocol: TCP + volumeMounts: + - name: sys + mountPath: /sys + - name: podresources-api + mountPath: /var/lib/kubelet/pod-resources + terminationGracePeriodSeconds: 30 + volumes: + - name: podresources-api + hostPath: + path: /var/lib/kubelet/pod-resources + - name: sys + hostPath: + path: /sys +--- +apiVersion: v1 +kind: Service +metadata: + labels: + app: rdc + name: rdc + namespace: kube-system +spec: + ports: + - port: 5000 + protocol: TCP + targetPort: 5000 + name: metrics + selector: + app: rdc + type: ClusterIP +--- +apiVersion: monitoring.coreos.com/v1 +kind: ServiceMonitor +metadata: + name: rdc + namespace: kube-system +spec: + endpoints: + - interval: 30s + path: /metrics + port: metrics + relabelings: + - sourceLabels: [__meta_kubernetes_pod_node_name] + targetLabel: node + namespaceSelector: + matchNames: + - kube-system + selector: + matchLabels: + app: rdc + + + diff --git a/example/kubernetes/rocm.repo b/example/kubernetes/rocm.repo new file mode 100644 index 00000000..ad4b7b31 --- /dev/null +++ b/example/kubernetes/rocm.repo @@ -0,0 +1,7 @@ +[rocm] +name=rocm +baseurl=https://repo.radeon.com/rocm/centos8/latest/main +enabled=1 +fastestmirror_enabled=0 +gpgcheck=0 +priority=1 diff --git a/python_binding/rdc_prometheus.py b/python_binding/rdc_prometheus.py index ec9e733e..c248c35e 100644 --- a/python_binding/rdc_prometheus.py +++ b/python_binding/rdc_prometheus.py @@ -15,7 +15,7 @@ class PrometheusReader(RdcReader): def __init__(self, rdc_ip_port, field_ids, update_freq, max_keep_age, max_keep_samples, - gpu_indexes, rdc_unauth, enable_plugin_monitoring): + gpu_indexes, rdc_unauth, enable_plugin_monitoring, enable_kubernetes_integration): group_name = "rdc_prometheus_plugin_group" field_group_name = "rdc_prometheus_plugin_fieldgroup" if rdc_unauth: @@ -32,15 +32,75 @@ def __init__(self, rdc_ip_port, field_ids, update_freq, max_keep_age, max_keep_s REGISTRY.unregister(PROCESS_COLLECTOR) REGISTRY.unregister(PLATFORM_COLLECTOR) - # Create the guages - self.guages = {} + self.enable_kubernetes_integration = enable_kubernetes_integration + + # Create the gauges + self.gauges = {} for fid in self.field_ids: field_name = self.rdc_util.field_id_string(fid).lower() - self.guages[fid] = Gauge(field_name, field_name, labelnames=['gpu_index']) + if enable_kubernetes_integration: + self.gauges[fid] = Gauge(field_name, field_name, labelnames=['gpu_index', 'pod', 'namespace', 'container']) + else: + self.gauges[fid] = Gauge(field_name, field_name, labelnames=['gpu_index']) + + if enable_kubernetes_integration: + import sys, os + sys.path.append('/opt/rocm/bin') + from rocm_smi import getBus, initializeRsmi + from kuryr_kubernetes.pod_resources.client import PodResourcesClient + + # Create kubelet client for podresources api to get pcie bus address of attached gpu + self.pr_client = PodResourcesClient(os.getenv('RDC_KUBERNETES_KUBELET_PATH','/var/lib/kubelet')) + + self.empty_label_value = os.getenv('RDC_KUBERNETES_EMPTY_LABEL_VALUE','') + + initializeRsmi() + + # Cache mapping between gpu indexes and PCIe bus addresses, assumes no hotplug of gpus + self.index_to_bus_addr = {} + for item in self.gpu_indexes: + self.index_to_bus_addr[item] = getBus(item) + + def process(self): + # Make sure no other thread collects metrics before we are fully finished with them + with REGISTRY._lock: + if self.enable_kubernetes_integration: + from google.protobuf.json_format import MessageToDict + # Get list of all pods and their containers with devices attached to them + self.pod_list = MessageToDict(self.pr_client.list()) + # Clear the labels and populate them later again + for fid in self.field_ids: + self.gauges[fid].clear() + RdcReader.process(self) def handle_field(self, gpu_index, value): - if value.field_id.value in self.guages: - self.guages[value.field_id.value].labels(gpu_index).set(value.value.l_int) + fid = value.field_id.value + if fid in self.gauges: + if self.enable_kubernetes_integration: + gpu_bus_addr = self.index_to_bus_addr[gpu_index] + # Check if currently processed gpu is attached to any container, single gpu can only be attached to a single container + container_data = self.findContainer(gpu_bus_addr) + if container_data: + self.gauges[fid].labels(gpu_index=gpu_index, pod=container_data['pod'], namespace=container_data['namespace'], container=container_data['container']).set(value.value.l_int) + else: + self.gauges[fid].labels(gpu_index=gpu_index, pod=self.empty_label_value, namespace=self.empty_label_value, container=self.empty_label_value).set(value.value.l_int) + else: + self.gauges[fid].labels(gpu_index).set(value.value.l_int) + + def findContainer(self,dev_id): + container_dict = {} + for pod in self.pod_list['podResources']: + for container in pod['containers']: + if 'devices' in container: + for device in container['devices']: + if device['resourceName'] == 'amd.com/gpu': + if device['deviceIds'][0] == dev_id: + container_dict['container'] = container['name'] + container_dict['pod'] = pod['name'] + container_dict['namespace'] = pod['namespace'] + return container_dict + return container_dict + def get_field_ids(args): field_ids = [] @@ -81,7 +141,8 @@ def get_field_ids(args): parser.add_argument('--rdc_fields_file', default=None, help='The list of fields name can also be read from a file with each field name in a separated line (default: None)') parser.add_argument('--rdc_gpu_indexes', default=None, nargs='+', help='The list of GPUs to be watched (default: All GPUs)') parser.add_argument('--enable_plugin_monitoring', default=False, action='store_true', help = 'Set this option to collect process metrics of the plugin itself (default: false)') - + parser.add_argument('--enable_kubernetes_integration', default=False, action='store_true', help='Set this option if you want per pod gpu monitoring in kubernetes (default: false)') + args = parser.parse_args() field_ids = get_field_ids(args) @@ -94,7 +155,7 @@ def get_field_ids(args): reader = PrometheusReader(rdc_ip_port, field_ids, args.rdc_update_freq*1000000, args.rdc_max_keep_age, args.rdc_max_keep_samples, - args.rdc_gpu_indexes, args.rdc_unauth, args.enable_plugin_monitoring) + args.rdc_gpu_indexes, args.rdc_unauth, args.enable_plugin_monitoring, args.enable_kubernetes_integration) start_http_server(args.listen_port) print("The RDC Prometheus plugin listen at port %d" % (args.listen_port)) time.sleep(3)