Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions backend/graph-proxy/src/graphql/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,7 @@ use axum_extra::{
use lazy_static::lazy_static;
use opentelemetry::KeyValue;
use std::fmt::Display;
use tracing::instrument;
use workflow_templates::WorkflowTemplatesMutation;

/// The root schema of the service
Expand All @@ -60,6 +61,7 @@ pub struct NodeQuery;

#[Object]
impl NodeQuery {
#[instrument(name = "graph_proxy_node_query", skip(self, ctx))]
async fn node(&self, ctx: &Context<'_>, id: ID) -> Option<NodeValue> {
let id_str = id.to_string();
let parts: Vec<&str> = id_str.split(':').collect();
Expand Down
8 changes: 8 additions & 0 deletions charts/monitoring/staging-values.yaml
Original file line number Diff line number Diff line change
@@ -1,6 +1,14 @@
cluster: pollux

grafana:
datasources:
datasources.yaml:
apiVersion: 1
datasources:
- name: Prometheus
type: prometheus
url: "http://{{ .Release.Name }}-prometheus-server:80"
isDefault: true
ingress:
enabled: true
path: "/"
Expand Down
2 changes: 1 addition & 1 deletion charts/otel-collector/templates/role.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -5,5 +5,5 @@ metadata:
name: opentelemetry-collector
rules:
- apiGroups: [""]
resources: ["pods", "services", "endpoints"]
resources: ["pods", "services", "endpoints", "namespaces", "nodes", "resourcequotas"]
verbs: ["get", "list", "watch"]
46 changes: 46 additions & 0 deletions charts/otel-collector/values.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,20 @@ opentelemetry-collector:
presets:
kubernetesAttributes:
enabled: true
clusterRole:
rules:
- apiGroups: [""]
resources: ["replicationcontrollers", "resourcequotas", "services", "endpoints"]
verbs: ["get", "list", "watch"]
- apiGroups: ["apps"]
resources: ["deployments", "replicasets", "statefulsets", "daemonsets"]
verbs: ["get", "list", "watch"]
- apiGroups: ["batch"]
resources: ["jobs", "cronjobs"]
verbs: ["get", "list", "watch"]
- apiGroups: ["autoscaling"]
resources: ["horizontalpodautoscalers"]
verbs: ["get", "list", "watch"]
ports:
prometheus:
enabled: true
Expand Down Expand Up @@ -44,6 +58,9 @@ opentelemetry-collector:
memory: 24Gi
config:
processors:
groupbyattrs:
keys:
- k8s.namespace.name
batch:
send_batch_size: 512
k8sattributes:
Expand Down Expand Up @@ -119,6 +136,33 @@ opentelemetry-collector:
- source_labels: [__meta_kubernetes_pod_phase]
regex: Pending|Succeeded|Failed|Completed
action: drop
- job_name: 'kubelet-resource-metrics'
scheme: https
tls_config:
insecure_skip_verify: true
bearer_token_file: /var/run/secrets/kubernetes.io/serviceaccount/token
kubernetes_sd_configs:
- role: node
relabel_configs:
- action: labelmap
regex: __meta_kubernetes_node_label_(.+)
- target_label: __address__
replacement: kubernetes.default.svc:443
- source_labels: [__meta_kubernetes_node_name]
regex: (.+)
target_label: __metrics_path__
replacement: /api/v1/nodes/$1/proxy/metrics/resource
- source_labels: [__meta_kubernetes_node_name]
target_label: node
k8s_cluster:
collection_interval: 30s
auth_type: serviceAccount
resource_attributes:
k8s.namespace.name:
enabled: true
allocatable_types_to_report:
- cpu
- memory
otlp:
protocols:
grpc:
Expand Down Expand Up @@ -147,8 +191,10 @@ opentelemetry-collector:
receivers:
- prometheus
- otlp
- k8s_cluster
processors:
- k8sattributes
- groupbyattrs
- memory_limiter
- batch
exporters:
Expand Down
3 changes: 3 additions & 0 deletions charts/workflows-cluster/staging-values.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,9 @@ vcluster:
deployment:
replicas: 2

serviceMonitor:
enabled: true

experimental:
deploy:
vcluster:
Expand Down
Loading