From a51d8f73fa5bf85742ce454761980b123e347f7e Mon Sep 17 00:00:00 2001 From: maximopalopoli Date: Fri, 19 Dec 2025 18:04:32 -0300 Subject: [PATCH 01/14] feat(aggregation-mode): add grafana dashboard --- aggregation_mode/Cargo.lock | 265 +++++++++++++- aggregation_mode/Cargo.toml | 3 + aggregation_mode/gateway/Cargo.toml | 4 + aggregation_mode/gateway/src/config.rs | 1 + aggregation_mode/gateway/src/http.rs | 19 + aggregation_mode/gateway/src/lib.rs | 1 + aggregation_mode/gateway/src/metrics.rs | 72 ++++ ...fig-agg-mode-gateway-ethereum-package.yaml | 4 + config-files/config-agg-mode-gateway.yaml | 3 + .../aligned/aggregation_mode_gateway.json | 325 ++++++++++++++++++ prometheus/prometheus.yaml | 7 + 11 files changed, 693 insertions(+), 11 deletions(-) create mode 100644 aggregation_mode/gateway/src/metrics.rs create mode 100644 grafana/provisioning/dashboards/aligned/aggregation_mode_gateway.json diff --git a/aggregation_mode/Cargo.lock b/aggregation_mode/Cargo.lock index 6e424297a6..845d0fbb30 100644 --- a/aggregation_mode/Cargo.lock +++ b/aggregation_mode/Cargo.lock @@ -233,6 +233,21 @@ dependencies = [ "syn 2.0.111", ] +[[package]] +name = "actix-web-prometheus" +version = "0.1.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ad5228fd1a6b5d0f60d636776c2a70acc9fc667034bb4ac02ec4259f0eeeab6c" +dependencies = [ + "actix-service", + "actix-web", + "futures-lite", + "pin-project", + "prometheus", + "quanta", + "thiserror 1.0.69", +] + [[package]] name = "addchain" version = "0.2.0" @@ -1588,7 +1603,7 @@ dependencies = [ "aws-smithy-types", "aws-types", "bytes", - "fastrand", + "fastrand 2.3.0", "hex", "http 1.4.0", "ring 0.17.14", @@ -1648,7 +1663,7 @@ dependencies = [ "aws-smithy-types", "aws-types", "bytes", - "fastrand", + "fastrand 2.3.0", "http 0.2.12", "http-body 0.4.6", "percent-encoding", @@ -1673,7 +1688,7 @@ dependencies = [ "aws-smithy-types", "aws-types", "bytes", - "fastrand", + "fastrand 2.3.0", "http 0.2.12", "regex-lite", "tracing", @@ -1695,7 +1710,7 @@ dependencies = [ "aws-smithy-types", "aws-types", "bytes", - "fastrand", + "fastrand 2.3.0", "http 0.2.12", "regex-lite", "tracing", @@ -1717,7 +1732,7 @@ dependencies = [ "aws-smithy-types", "aws-types", "bytes", - "fastrand", + "fastrand 2.3.0", "http 0.2.12", "regex-lite", "tracing", @@ -1740,7 +1755,7 @@ dependencies = [ "aws-smithy-types", "aws-smithy-xml", "aws-types", - "fastrand", + "fastrand 2.3.0", "http 0.2.12", "regex-lite", "tracing", @@ -1871,7 +1886,7 @@ dependencies = [ "aws-smithy-runtime-api", "aws-smithy-types", "bytes", - "fastrand", + "fastrand 2.3.0", "http 0.2.12", "http 1.4.0", "http-body 0.4.6", @@ -2023,7 +2038,7 @@ version = "1.6.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "cffb0e931875b666fc4fcb20fee52e9bbd1ef836fd9e9e04ec21555f9f85f7ef" dependencies = [ - "fastrand", + "fastrand 2.3.0", "gloo-timers 0.3.0", "tokio", ] @@ -4132,6 +4147,15 @@ version = "0.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "2acce4a10f12dc2fb14a218589d4f1f62ef011b2d0cc4b3cb1bba8e94da14649" +[[package]] +name = "fastrand" +version = "1.9.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e51093e27b0797c359783294ca4f0a911c270184cb10f85783b118614a1501be" +dependencies = [ + "instant", +] + [[package]] name = "fastrand" version = "2.3.0" @@ -4409,6 +4433,21 @@ version = "0.3.31" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9e5c1b78ca4aae1ac06c48a526a655760685149f0d465d21f37abfe57ce075c6" +[[package]] +name = "futures-lite" +version = "1.13.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "49a9d51ce47660b1e808d3c990b4709f2f415d928835a17dfd16991515c46bce" +dependencies = [ + "fastrand 1.9.0", + "futures-core", + "futures-io", + "memchr", + "parking", + "pin-project-lite", + "waker-fn", +] + [[package]] name = "futures-locks" version = "0.7.1" @@ -4491,11 +4530,14 @@ version = "0.1.0" dependencies = [ "actix-multipart", "actix-web", + "actix-web-prometheus", "agg_mode_sdk", "aligned-sdk", "alloy", + "anyhow", "bincode", "hex", + "prometheus", "serde", "serde_json", "serde_yaml", @@ -4504,6 +4546,7 @@ dependencies = [ "tokio", "tracing", "tracing-subscriber 0.3.22", + "warp", ] [[package]] @@ -4572,7 +4615,7 @@ dependencies = [ "cfg-if", "js-sys", "libc", - "wasi", + "wasi 0.11.1+wasi-snapshot-preview1", "wasm-bindgen", ] @@ -4807,6 +4850,30 @@ dependencies = [ "hashbrown 0.15.5", ] +[[package]] +name = "headers" +version = "0.3.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "06683b93020a07e3dbcf5f8c0f6d40080d725bea7936fc01ad345c01b97dc270" +dependencies = [ + "base64 0.21.7", + "bytes", + "headers-core", + "http 0.2.12", + "httpdate", + "mime", + "sha1", +] + +[[package]] +name = "headers-core" +version = "0.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e7f66481bfee273957b1f20485a4ff3362987f85b2c236580d81b4eb7a326429" +dependencies = [ + "http 0.2.12", +] + [[package]] name = "heapless" version = "0.7.17" @@ -5725,6 +5792,15 @@ version = "0.1.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "112b39cec0b298b6c1999fee3e31427f74f676e4cb9879ed1a121b43661a4154" +[[package]] +name = "mach" +version = "0.3.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b823e83b2affd8f40a9ee8c29dbc56404c1e34cd2710921f2801e2cf29527afa" +dependencies = [ + "libc", +] + [[package]] name = "macro-string" version = "0.1.4" @@ -5949,10 +6025,28 @@ checksum = "69d83b0086dc8ecf3ce9ae2874b2d1290252e2a30720bea58a5c6639b0092873" dependencies = [ "libc", "log", - "wasi", + "wasi 0.11.1+wasi-snapshot-preview1", "windows-sys 0.61.2", ] +[[package]] +name = "multer" +version = "2.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "01acbdc23469fd8fe07ab135923371d5f5a422fbf9c522158677c8eb15bc51c2" +dependencies = [ + "bytes", + "encoding_rs", + "futures-util", + "http 0.2.12", + "httparse", + "log", + "memchr", + "mime", + "spin 0.9.8", + "version_check", +] + [[package]] name = "multimap" version = "0.10.1" @@ -7150,6 +7244,46 @@ dependencies = [ "version_check", ] +[[package]] +name = "procfs" +version = "0.16.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "731e0d9356b0c25f16f33b5be79b1c57b562f141ebfcdb0ad8ac2c13a24293b4" +dependencies = [ + "bitflags 2.10.0", + "hex", + "lazy_static", + "procfs-core", + "rustix 0.38.44", +] + +[[package]] +name = "procfs-core" +version = "0.16.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2d3554923a69f4ce04c4a754260c338f505ce22642d3830e049a399fc2059a29" +dependencies = [ + "bitflags 2.10.0", + "hex", +] + +[[package]] +name = "prometheus" +version = "0.13.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3d33c28a30771f7f96db69893f78b857f7450d7e0237e9c8fc6427a81bae7ed1" +dependencies = [ + "cfg-if", + "fnv", + "lazy_static", + "libc", + "memchr", + "parking_lot", + "procfs", + "protobuf", + "thiserror 1.0.69", +] + [[package]] name = "proof_aggregator" version = "0.1.0" @@ -7252,6 +7386,12 @@ dependencies = [ "prost", ] +[[package]] +name = "protobuf" +version = "2.28.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "106dd99e98437432fed6519dedecfade6a06a73bb7b2a1e019fdd2bee5778d94" + [[package]] name = "puffin" version = "0.19.1" @@ -7266,6 +7406,22 @@ dependencies = [ "parking_lot", ] +[[package]] +name = "quanta" +version = "0.10.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b7e31331286705f455e56cca62e0e717158474ff02b7936c1fa596d983f4ae27" +dependencies = [ + "crossbeam-utils", + "libc", + "mach", + "once_cell", + "raw-cpuid", + "wasi 0.10.2+wasi-snapshot-preview1", + "web-sys", + "winapi", +] + [[package]] name = "quick-error" version = "1.2.3" @@ -7431,6 +7587,15 @@ dependencies = [ "num-traits", ] +[[package]] +name = "raw-cpuid" +version = "10.7.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6c297679cb867470fa8c9f67dbba74a78d78e3e98d7cf2b08d6d71540f797332" +dependencies = [ + "bitflags 1.3.2", +] + [[package]] name = "rawpointer" version = "0.2.1" @@ -8684,6 +8849,12 @@ dependencies = [ "serde_json", ] +[[package]] +name = "scoped-tls" +version = "1.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e1cf6437eb19a8f4a6cc0f7dca544973b0b78843adbfeb3683d1a94a0024a294" + [[package]] name = "scopeguard" version = "1.2.0" @@ -10218,7 +10389,7 @@ version = "3.23.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "2d31c77bdf42a745371d260a26ca7163f1e0924b64afa0b688e61b5a9fa02f16" dependencies = [ - "fastrand", + "fastrand 2.3.0", "getrandom 0.3.4", "once_cell", "rustix 1.1.2", @@ -10444,6 +10615,18 @@ dependencies = [ "webpki-roots 0.25.4", ] +[[package]] +name = "tokio-tungstenite" +version = "0.21.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c83b561d025642014097b66e6c1bb422783339e0909e4429cde4749d1990bc38" +dependencies = [ + "futures-util", + "log", + "tokio", + "tungstenite 0.21.0", +] + [[package]] name = "tokio-tungstenite" version = "0.23.1" @@ -10784,6 +10967,25 @@ dependencies = [ "utf-8", ] +[[package]] +name = "tungstenite" +version = "0.21.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9ef1a641ea34f399a848dea702823bbecfb4c486f911735368f1f137cb8257e1" +dependencies = [ + "byteorder", + "bytes", + "data-encoding", + "http 1.4.0", + "httparse", + "log", + "rand 0.8.5", + "sha1", + "thiserror 1.0.69", + "url", + "utf-8", +] + [[package]] name = "tungstenite" version = "0.23.0" @@ -11074,6 +11276,12 @@ dependencies = [ "libc", ] +[[package]] +name = "waker-fn" +version = "1.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "317211a0dc0ceedd78fb2ca9a44aed3d7b9b26f81870d485c07122b4350673b7" + [[package]] name = "walkdir" version = "2.5.0" @@ -11093,6 +11301,41 @@ dependencies = [ "try-lock", ] +[[package]] +name = "warp" +version = "0.3.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4378d202ff965b011c64817db11d5829506d3404edeadb61f190d111da3f231c" +dependencies = [ + "bytes", + "futures-channel", + "futures-util", + "headers", + "http 0.2.12", + "hyper 0.14.32", + "log", + "mime", + "mime_guess", + "multer", + "percent-encoding", + "pin-project", + "scoped-tls", + "serde", + "serde_json", + "serde_urlencoded", + "tokio", + "tokio-tungstenite 0.21.0", + "tokio-util", + "tower-service", + "tracing", +] + +[[package]] +name = "wasi" +version = "0.10.2+wasi-snapshot-preview1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fd6fbd9a79829dd1ad0cc20627bf1ed606756a7f77edff7b66b7064f9cb327c6" + [[package]] name = "wasi" version = "0.11.1+wasi-snapshot-preview1" diff --git a/aggregation_mode/Cargo.toml b/aggregation_mode/Cargo.toml index 2ab51157c9..b61eb7cde1 100644 --- a/aggregation_mode/Cargo.toml +++ b/aggregation_mode/Cargo.toml @@ -16,6 +16,9 @@ aligned-sdk = { path = "../crates/sdk/" } db = { path = "./db" } sp1-sdk = "5.0.0" risc0-zkvm = { version = "3.0.3" } +prometheus = { version = "0.13.4", features = ["process"] } +anyhow = { version = "1.0" } +warp = "0.3.7" [profile.release] opt-level = 3 diff --git a/aggregation_mode/gateway/Cargo.toml b/aggregation_mode/gateway/Cargo.toml index 79ef53351c..e4347c50b2 100644 --- a/aggregation_mode/gateway/Cargo.toml +++ b/aggregation_mode/gateway/Cargo.toml @@ -7,6 +7,9 @@ edition = "2021" serde = { workspace = true } serde_json = { workspace = true } serde_yaml = { workspace = true } +prometheus = { workspace = true } +anyhow = { workspace = true } +warp = { workspace = true } agg_mode_sdk = { path = "../sdk"} aligned-sdk = { workspace = true } sp1-sdk = { workspace = true } @@ -15,6 +18,7 @@ tracing-subscriber = { version = "0.3.0", features = ["env-filter"] } bincode = "1.3.3" actix-web = "4" actix-multipart = "0.7.2" +actix-web-prometheus = "0.1.0-beta.8" alloy = { workspace = true } tokio = { version = "1", features = ["time"]} # TODO: enable tls diff --git a/aggregation_mode/gateway/src/config.rs b/aggregation_mode/gateway/src/config.rs index 444882ab56..e6b7947679 100644 --- a/aggregation_mode/gateway/src/config.rs +++ b/aggregation_mode/gateway/src/config.rs @@ -8,6 +8,7 @@ pub struct Config { pub db_connection_url: String, pub network: String, pub max_daily_proofs_per_user: i64, + pub metrics_port: u16, } impl Config { diff --git a/aggregation_mode/gateway/src/http.rs b/aggregation_mode/gateway/src/http.rs index dabd367344..285fe5e17f 100644 --- a/aggregation_mode/gateway/src/http.rs +++ b/aggregation_mode/gateway/src/http.rs @@ -1,4 +1,5 @@ use std::{ + collections::HashMap, str::FromStr, time::{SystemTime, UNIX_EPOCH}, }; @@ -8,6 +9,7 @@ use actix_web::{ web::{self, Data}, App, HttpRequest, HttpResponse, HttpServer, Responder, }; +use actix_web_prometheus::PrometheusMetricsBuilder; use agg_mode_sdk::types::Network; use aligned_sdk::aggregation_layer::AggregationModeProvingSystem; use alloy::signers::Signature; @@ -23,6 +25,7 @@ use crate::{ config::Config, db::Db, helpers::get_time_left_day_formatted, + metrics::GatewayMetrics, types::{GetReceiptsResponse, SubmitProofRequestRisc0, SubmitProofRequestSP1}, verifiers::{verify_sp1_proof, VerificationError}, }; @@ -32,15 +35,22 @@ pub struct GatewayServer { db: Db, config: Config, network: Network, + metrics: GatewayMetrics, } impl GatewayServer { pub fn new(db: Db, config: Config) -> Self { let network = Network::from_str(&config.network).expect("A valid network in config file"); + + tracing::info!("Starting metrics server on port {}", config.metrics_port); + let metrics = + GatewayMetrics::start(config.metrics_port).expect("Failed to start metrics server"); + Self { db, config, network, + metrics, } } @@ -49,10 +59,19 @@ impl GatewayServer { let port = self.config.port; let state = self.clone(); + let mut labels = HashMap::new(); + labels.insert("label1".to_string(), "value1".to_string()); + let prometheus = PrometheusMetricsBuilder::new("api") + .endpoint("/metrics") + .const_labels(labels) + .build() + .unwrap(); + tracing::info!("Starting server at port {}", self.config.port); HttpServer::new(move || { App::new() .app_data(Data::new(state.clone())) + .wrap(prometheus.clone()) .route("/nonce/{address}", web::get().to(Self::get_nonce)) .route("/receipts", web::get().to(Self::get_receipts)) .route("/proof/sp1", web::post().to(Self::post_proof_sp1)) diff --git a/aggregation_mode/gateway/src/lib.rs b/aggregation_mode/gateway/src/lib.rs index 84693d8cd6..0df12ac02d 100644 --- a/aggregation_mode/gateway/src/lib.rs +++ b/aggregation_mode/gateway/src/lib.rs @@ -2,5 +2,6 @@ pub mod config; pub mod db; mod helpers; pub mod http; +mod metrics; mod types; mod verifiers; diff --git a/aggregation_mode/gateway/src/metrics.rs b/aggregation_mode/gateway/src/metrics.rs new file mode 100644 index 0000000000..6bff5a767c --- /dev/null +++ b/aggregation_mode/gateway/src/metrics.rs @@ -0,0 +1,72 @@ +use prometheus::{self, opts, register_int_counter, IntCounter}; +use warp::{reject::Rejection, reply::Reply, Filter}; + +#[derive(Clone, Debug)] +pub struct GatewayMetrics { + pub success_response: IntCounter, + pub server_error_response: IntCounter, + pub user_error_response: IntCounter, +} + +impl GatewayMetrics { + pub fn start(metrics_port: u16) -> anyhow::Result { + let registry = prometheus::Registry::new(); + + let success_response = + register_int_counter!(opts!("success_response_count", "Success Responses"))?; + + let server_error_response = + register_int_counter!(opts!("server_error_response_count", "Success Responses"))?; + + let user_error_response = + register_int_counter!(opts!("user_error_response_count", "Success Responses"))?; + + registry.register(Box::new(success_response.clone()))?; + registry.register(Box::new(server_error_response.clone()))?; + registry.register(Box::new(user_error_response.clone()))?; + + let metrics_route = warp::path!("metrics") + .and(warp::any().map(move || registry.clone())) + .and_then(GatewayMetrics::metrics_handler); + + tokio::task::spawn(async move { + warp::serve(metrics_route) + .run(([0, 0, 0, 0], metrics_port)) + .await; + }); + + Ok(Self { + success_response, + server_error_response, + user_error_response, + }) + } + + pub async fn metrics_handler(registry: prometheus::Registry) -> Result { + use prometheus::Encoder; + let encoder = prometheus::TextEncoder::new(); + + let mut buffer = Vec::new(); + if let Err(e) = encoder.encode(®istry.gather(), &mut buffer) { + eprintln!("could not encode prometheus metrics: {}", e); + }; + let res = String::from_utf8(buffer.clone()) + .inspect_err(|e| eprintln!("prometheus metrics could not be parsed correctly: {e}")) + .unwrap_or_default(); + buffer.clear(); + + Ok(res) + } + + pub fn inc_success_response(&self) { + self.success_response.inc(); + } + + pub fn inc_server_error_response(&self) { + self.server_error_response.inc(); + } + + pub fn inc_user_error_response(&self) { + self.user_error_response.inc(); + } +} diff --git a/config-files/config-agg-mode-gateway-ethereum-package.yaml b/config-files/config-agg-mode-gateway-ethereum-package.yaml index 21a8438268..63b37a412a 100644 --- a/config-files/config-agg-mode-gateway-ethereum-package.yaml +++ b/config-files/config-agg-mode-gateway-ethereum-package.yaml @@ -4,3 +4,7 @@ eth_rpc_url: "http://localhost:8545" payment_service_address: "0x922D6956C99E12DFeB3224DEA977D0939758A1Fe" network: "devnet" max_daily_proofs_per_user: 32 +last_block_fetched_filepath: "config-files/proof-aggregator.last_block_fetched.json" + +# Metrics +metrics_port: 9093 diff --git a/config-files/config-agg-mode-gateway.yaml b/config-files/config-agg-mode-gateway.yaml index a2112f4cf4..379418889d 100644 --- a/config-files/config-agg-mode-gateway.yaml +++ b/config-files/config-agg-mode-gateway.yaml @@ -5,3 +5,6 @@ payment_service_address: "0x922D6956C99E12DFeB3224DEA977D0939758A1Fe" network: "devnet" max_daily_proofs_per_user: 4 last_block_fetched_filepath: "config-files/proof-aggregator.last_block_fetched.json" + +# Metrics +metrics_port: 9093 diff --git a/grafana/provisioning/dashboards/aligned/aggregation_mode_gateway.json b/grafana/provisioning/dashboards/aligned/aggregation_mode_gateway.json new file mode 100644 index 0000000000..cf7da161ab --- /dev/null +++ b/grafana/provisioning/dashboards/aligned/aggregation_mode_gateway.json @@ -0,0 +1,325 @@ +{ + "annotations": { + "list": [ + { + "builtIn": 1, + "datasource": { + "type": "grafana", + "uid": "-- Grafana --" + }, + "enable": true, + "hide": true, + "iconColor": "rgba(0, 211, 255, 1)", + "name": "Annotations & Alerts", + "type": "dashboard" + } + ] + }, + "editable": true, + "fiscalYearStartMonth": 0, + "graphTooltip": 0, + "id": 2, + "links": [], + "liveNow": false, + "panels": [ + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 8, + "x": 0, + "y": 0 + }, + "id": 3, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "editorMode": "code", + "expr": "sum(\n rate(api_response_code{statuscode=~\"400\"}[100m])\n)", + "instant": false, + "legendFormat": "__auto", + "range": true, + "refId": "A" + } + ], + "title": "User error response count", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 8, + "x": 8, + "y": 0 + }, + "id": 2, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "editorMode": "code", + "expr": "api_response_code{statuscode=\"500\"}", + "instant": false, + "legendFormat": "__auto", + "range": true, + "refId": "A" + } + ], + "title": "Server Error response count", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 8, + "x": 16, + "y": 0 + }, + "id": 1, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "editorMode": "code", + "expr": "success_response_count{}", + "instant": false, + "legendFormat": "__auto", + "range": true, + "refId": "A" + } + ], + "title": "Success response count", + "type": "timeseries" + } + ], + "refresh": false, + "schemaVersion": 38, + "style": "dark", + "tags": [], + "templating": { + "list": [] + }, + "time": { + "from": "now-30m", + "to": "now" + }, + "timepicker": {}, + "timezone": "", + "title": "New dashboard", + "uid": "a66a5480-6a60-4b87-9d29-4f0f446edafd", + "version": 1, + "weekStart": "" +} diff --git a/prometheus/prometheus.yaml b/prometheus/prometheus.yaml index 7b84e52e39..d7ce023aa9 100644 --- a/prometheus/prometheus.yaml +++ b/prometheus/prometheus.yaml @@ -37,3 +37,10 @@ scrape_configs: - targets: ["host.docker.internal:9100"] labels: bot: "node-exporter" + + - job_name: "gateway-api" + scrape_interval: 60s + static_configs: + - targets: ["host.docker.internal:8089"] + labels: + bot: "gateway-exporter" From 94c04046083f60df9fa5e20f025a7e226b84b2ee Mon Sep 17 00:00:00 2001 From: maximopalopoli Date: Fri, 19 Dec 2025 18:36:06 -0300 Subject: [PATCH 02/14] fix: return BadRequest for the request denied exams --- aggregation_mode/gateway/src/http.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/aggregation_mode/gateway/src/http.rs b/aggregation_mode/gateway/src/http.rs index 285fe5e17f..a715e061d1 100644 --- a/aggregation_mode/gateway/src/http.rs +++ b/aggregation_mode/gateway/src/http.rs @@ -170,7 +170,7 @@ impl GatewayServer { if daily_tasks_by_address >= state.config.max_daily_proofs_per_user { let formatted_time_left = get_time_left_day_formatted(); - return HttpResponse::InternalServerError().json(AppResponse::new_unsucessfull( + return HttpResponse::BadRequest().json(AppResponse::new_unsucessfull( format!( "Request denied: Query limit exceeded. Quotas renew in {formatted_time_left}" ) From 9ce4dde39dd7243fa012c3376ecaf379ed9a3c28 Mon Sep 17 00:00:00 2001 From: maximopalopoli Date: Mon, 22 Dec 2025 10:36:12 -0300 Subject: [PATCH 03/14] Update the dashboard to include code ranges (2xx, 4xx, 5xx) --- .../aligned/aggregation_mode_gateway.json | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/grafana/provisioning/dashboards/aligned/aggregation_mode_gateway.json b/grafana/provisioning/dashboards/aligned/aggregation_mode_gateway.json index cf7da161ab..e516e099fe 100644 --- a/grafana/provisioning/dashboards/aligned/aggregation_mode_gateway.json +++ b/grafana/provisioning/dashboards/aligned/aggregation_mode_gateway.json @@ -106,7 +106,7 @@ "uid": "prometheus" }, "editorMode": "code", - "expr": "sum(\n rate(api_response_code{statuscode=~\"400\"}[100m])\n)", + "expr": "sum(\n rate(api_response_code{statuscode=~\"4..\"}[100m])\n)", "instant": false, "legendFormat": "__auto", "range": true, @@ -200,7 +200,7 @@ "uid": "prometheus" }, "editorMode": "code", - "expr": "api_response_code{statuscode=\"500\"}", + "expr": "sum(\n rate(api_response_code{statuscode=~\"5..\"}[100m])\n)", "instant": false, "legendFormat": "__auto", "range": true, @@ -294,7 +294,7 @@ "uid": "prometheus" }, "editorMode": "code", - "expr": "success_response_count{}", + "expr": "sum(\n rate(api_response_code{statuscode=~\"2..\"}[100m])\n)", "instant": false, "legendFormat": "__auto", "range": true, @@ -305,7 +305,7 @@ "type": "timeseries" } ], - "refresh": false, + "refresh": "", "schemaVersion": 38, "style": "dark", "tags": [], @@ -313,13 +313,13 @@ "list": [] }, "time": { - "from": "now-30m", + "from": "now-5m", "to": "now" }, "timepicker": {}, "timezone": "", - "title": "New dashboard", + "title": "Aggregation Mode", "uid": "a66a5480-6a60-4b87-9d29-4f0f446edafd", - "version": 1, + "version": 3, "weekStart": "" } From 531e9291f1fba8682512f625d06d269f06ee4f8c Mon Sep 17 00:00:00 2001 From: maximopalopoli Date: Mon, 22 Dec 2025 11:03:10 -0300 Subject: [PATCH 04/14] Filter by range in dashboards and remove the /metrics reqs in 2xx one --- .../dashboards/aligned/aggregation_mode_gateway.json | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/grafana/provisioning/dashboards/aligned/aggregation_mode_gateway.json b/grafana/provisioning/dashboards/aligned/aggregation_mode_gateway.json index e516e099fe..2057298a9c 100644 --- a/grafana/provisioning/dashboards/aligned/aggregation_mode_gateway.json +++ b/grafana/provisioning/dashboards/aligned/aggregation_mode_gateway.json @@ -106,7 +106,7 @@ "uid": "prometheus" }, "editorMode": "code", - "expr": "sum(\n rate(api_response_code{statuscode=~\"4..\"}[100m])\n)", + "expr": "sum(\n rate(api_response_code{statuscode=~\"4..\"}[$__range])\n)", "instant": false, "legendFormat": "__auto", "range": true, @@ -200,7 +200,7 @@ "uid": "prometheus" }, "editorMode": "code", - "expr": "sum(\n rate(api_response_code{statuscode=~\"5..\"}[100m])\n)", + "expr": "sum(\n rate(api_response_code{statuscode=~\"5..\"}[$__range])\n)", "instant": false, "legendFormat": "__auto", "range": true, @@ -294,7 +294,7 @@ "uid": "prometheus" }, "editorMode": "code", - "expr": "sum(\n rate(api_response_code{statuscode=~\"2..\"}[100m])\n)", + "expr": "sum(\n rate(api_response_code{statuscode=~\"2..\",endpoint!=\"/metrics\"}[$__range])\n)", "instant": false, "legendFormat": "__auto", "range": true, From 61f889ab5e2c4b8ece536b34530b1f75d7248786 Mon Sep 17 00:00:00 2001 From: maximopalopoli Date: Mon, 22 Dec 2025 12:01:11 -0300 Subject: [PATCH 05/14] Add time_elapsed_db_post metric to have a metric in use --- aggregation_mode/gateway/src/http.rs | 17 +- aggregation_mode/gateway/src/metrics.rs | 38 +--- .../aligned/aggregation_mode_gateway.json | 196 +++++++++++++++++- 3 files changed, 215 insertions(+), 36 deletions(-) diff --git a/aggregation_mode/gateway/src/http.rs b/aggregation_mode/gateway/src/http.rs index a715e061d1..ddd1aa97e7 100644 --- a/aggregation_mode/gateway/src/http.rs +++ b/aggregation_mode/gateway/src/http.rs @@ -1,7 +1,7 @@ use std::{ collections::HashMap, str::FromStr, - time::{SystemTime, UNIX_EPOCH}, + time::{Instant, SystemTime, UNIX_EPOCH}, }; use actix_multipart::form::MultipartForm; @@ -240,6 +240,8 @@ impl GatewayServer { return HttpResponse::BadRequest().json(AppResponse::new_unsucessfull(message, 400)); }; + let start = Instant::now(); + match state .db .insert_task( @@ -252,9 +254,16 @@ impl GatewayServer { ) .await { - Ok(task_id) => HttpResponse::Ok().json(AppResponse::new_sucessfull( - serde_json::json!({ "task_id": task_id.to_string() }), - )), + Ok(task_id) => { + let duration = start.elapsed(); + state + .metrics + .register_db_response_time_post(duration.as_secs_f64()); + + HttpResponse::Ok().json(AppResponse::new_sucessfull( + serde_json::json!({ "task_id": task_id.to_string() }), + )) + } Err(_) => HttpResponse::InternalServerError() .json(AppResponse::new_unsucessfull("Internal server error", 500)), } diff --git a/aggregation_mode/gateway/src/metrics.rs b/aggregation_mode/gateway/src/metrics.rs index 6bff5a767c..e31f70a0ec 100644 --- a/aggregation_mode/gateway/src/metrics.rs +++ b/aggregation_mode/gateway/src/metrics.rs @@ -1,29 +1,21 @@ -use prometheus::{self, opts, register_int_counter, IntCounter}; +use prometheus::{self, histogram_opts, register_histogram}; use warp::{reject::Rejection, reply::Reply, Filter}; #[derive(Clone, Debug)] pub struct GatewayMetrics { - pub success_response: IntCounter, - pub server_error_response: IntCounter, - pub user_error_response: IntCounter, + pub time_elapsed_db_post: prometheus::Histogram, } impl GatewayMetrics { pub fn start(metrics_port: u16) -> anyhow::Result { let registry = prometheus::Registry::new(); - let success_response = - register_int_counter!(opts!("success_response_count", "Success Responses"))?; + let time_elapsed_db_post = register_histogram!(histogram_opts!( + "time_elapsed_db_post", + "Time elapsed in DB posts" + ))?; - let server_error_response = - register_int_counter!(opts!("server_error_response_count", "Success Responses"))?; - - let user_error_response = - register_int_counter!(opts!("user_error_response_count", "Success Responses"))?; - - registry.register(Box::new(success_response.clone()))?; - registry.register(Box::new(server_error_response.clone()))?; - registry.register(Box::new(user_error_response.clone()))?; + registry.register(Box::new(time_elapsed_db_post.clone()))?; let metrics_route = warp::path!("metrics") .and(warp::any().map(move || registry.clone())) @@ -36,9 +28,7 @@ impl GatewayMetrics { }); Ok(Self { - success_response, - server_error_response, - user_error_response, + time_elapsed_db_post, }) } @@ -58,15 +48,7 @@ impl GatewayMetrics { Ok(res) } - pub fn inc_success_response(&self) { - self.success_response.inc(); - } - - pub fn inc_server_error_response(&self) { - self.server_error_response.inc(); - } - - pub fn inc_user_error_response(&self) { - self.user_error_response.inc(); + pub fn register_db_response_time_post(&self, value: f64) { + self.time_elapsed_db_post.observe(value); } } diff --git a/grafana/provisioning/dashboards/aligned/aggregation_mode_gateway.json b/grafana/provisioning/dashboards/aligned/aggregation_mode_gateway.json index 2057298a9c..2b88ffb74b 100644 --- a/grafana/provisioning/dashboards/aligned/aggregation_mode_gateway.json +++ b/grafana/provisioning/dashboards/aligned/aggregation_mode_gateway.json @@ -106,7 +106,7 @@ "uid": "prometheus" }, "editorMode": "code", - "expr": "sum(\n rate(api_response_code{statuscode=~\"4..\"}[$__range])\n)", + "expr": "sum(\n rate(api_response_code{statuscode=~\"4..\"}[5m])\n)", "instant": false, "legendFormat": "__auto", "range": true, @@ -200,7 +200,7 @@ "uid": "prometheus" }, "editorMode": "code", - "expr": "sum(\n rate(api_response_code{statuscode=~\"5..\"}[$__range])\n)", + "expr": "sum(\n rate(api_response_code{statuscode=~\"5..\"}[100m])\n)", "instant": false, "legendFormat": "__auto", "range": true, @@ -303,6 +303,194 @@ ], "title": "Success response count", "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 8 + }, + "id": 4, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "editorMode": "code", + "expr": "histogram_quantile(0.50, sum by (le) (rate(time_elapsed_db_post_bucket[$__rate_interval])))", + "instant": false, + "legendFormat": "__auto", + "range": true, + "refId": "A" + } + ], + "title": "DB posts latency (p50)", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 8 + }, + "id": 5, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "editorMode": "code", + "expr": "sum(rate(time_elapsed_db_post_count[$__rate_interval]))", + "instant": false, + "legendFormat": "__auto", + "range": true, + "refId": "A" + } + ], + "title": "DB posts throughput", + "type": "timeseries" } ], "refresh": "", @@ -313,8 +501,8 @@ "list": [] }, "time": { - "from": "now-5m", - "to": "now" + "from": "now-19m", + "to": "now-15m" }, "timepicker": {}, "timezone": "", From 00d533835953859ce0473337eaf491717eba898e Mon Sep 17 00:00:00 2001 From: maximopalopoli Date: Mon, 22 Dec 2025 12:41:39 -0300 Subject: [PATCH 06/14] Add the payments poller metrics integration --- aggregation_mode/Cargo.lock | 3 + aggregation_mode/gateway/src/config.rs | 2 +- aggregation_mode/gateway/src/http.rs | 9 +- aggregation_mode/payments_poller/Cargo.toml | 3 + .../payments_poller/src/config.rs | 1 + aggregation_mode/payments_poller/src/lib.rs | 1 + .../payments_poller/src/metrics.rs | 54 ++++++++++ .../payments_poller/src/payments.rs | 12 +++ ...fig-agg-mode-gateway-ethereum-package.yaml | 5 +- config-files/config-agg-mode-gateway.yaml | 5 +- .../aligned/aggregation_mode_gateway.json | 100 +++++++++++++++++- prometheus/prometheus.yaml | 14 +++ 12 files changed, 198 insertions(+), 11 deletions(-) create mode 100644 aggregation_mode/payments_poller/src/metrics.rs diff --git a/aggregation_mode/Cargo.lock b/aggregation_mode/Cargo.lock index 845d0fbb30..33a8937f92 100644 --- a/aggregation_mode/Cargo.lock +++ b/aggregation_mode/Cargo.lock @@ -6894,7 +6894,9 @@ dependencies = [ "actix-web", "aligned-sdk", "alloy", + "anyhow", "hex", + "prometheus", "serde", "serde_json", "serde_yaml", @@ -6902,6 +6904,7 @@ dependencies = [ "tokio", "tracing", "tracing-subscriber 0.3.22", + "warp", ] [[package]] diff --git a/aggregation_mode/gateway/src/config.rs b/aggregation_mode/gateway/src/config.rs index e6b7947679..dfce0b0b29 100644 --- a/aggregation_mode/gateway/src/config.rs +++ b/aggregation_mode/gateway/src/config.rs @@ -8,7 +8,7 @@ pub struct Config { pub db_connection_url: String, pub network: String, pub max_daily_proofs_per_user: i64, - pub metrics_port: u16, + pub gateway_metrics_port: u16, } impl Config { diff --git a/aggregation_mode/gateway/src/http.rs b/aggregation_mode/gateway/src/http.rs index ddd1aa97e7..7eca4a49ae 100644 --- a/aggregation_mode/gateway/src/http.rs +++ b/aggregation_mode/gateway/src/http.rs @@ -42,9 +42,12 @@ impl GatewayServer { pub fn new(db: Db, config: Config) -> Self { let network = Network::from_str(&config.network).expect("A valid network in config file"); - tracing::info!("Starting metrics server on port {}", config.metrics_port); - let metrics = - GatewayMetrics::start(config.metrics_port).expect("Failed to start metrics server"); + tracing::info!( + "Starting metrics server on port {}", + config.gateway_metrics_port + ); + let metrics = GatewayMetrics::start(config.gateway_metrics_port) + .expect("Failed to start metrics server"); Self { db, diff --git a/aggregation_mode/payments_poller/Cargo.toml b/aggregation_mode/payments_poller/Cargo.toml index c145fd02e5..108b428ba8 100644 --- a/aggregation_mode/payments_poller/Cargo.toml +++ b/aggregation_mode/payments_poller/Cargo.toml @@ -8,6 +8,9 @@ serde = { workspace = true } serde_json = { workspace = true } serde_yaml = { workspace = true } aligned-sdk = { workspace = true } +prometheus = { workspace = true } +anyhow = { workspace = true } +warp = { workspace = true } tracing = { version = "0.1", features = ["log"] } tracing-subscriber = { version = "0.3.0", features = ["env-filter"] } actix-web = "4" diff --git a/aggregation_mode/payments_poller/src/config.rs b/aggregation_mode/payments_poller/src/config.rs index 8b51181470..c84362ae8f 100644 --- a/aggregation_mode/payments_poller/src/config.rs +++ b/aggregation_mode/payments_poller/src/config.rs @@ -8,6 +8,7 @@ pub struct Config { pub eth_rpc_url: String, pub payment_service_address: String, pub last_block_fetched_filepath: String, + pub poller_metrics_port: u16, } #[derive(Debug, Deserialize, Serialize)] diff --git a/aggregation_mode/payments_poller/src/lib.rs b/aggregation_mode/payments_poller/src/lib.rs index 84bee5b7ba..654c3e2378 100644 --- a/aggregation_mode/payments_poller/src/lib.rs +++ b/aggregation_mode/payments_poller/src/lib.rs @@ -1,4 +1,5 @@ pub mod config; pub mod db; +pub mod metrics; pub mod payments; pub mod types; diff --git a/aggregation_mode/payments_poller/src/metrics.rs b/aggregation_mode/payments_poller/src/metrics.rs new file mode 100644 index 0000000000..ed7b7c8eb3 --- /dev/null +++ b/aggregation_mode/payments_poller/src/metrics.rs @@ -0,0 +1,54 @@ +use prometheus::{self, opts, register_gauge}; +use warp::{reject::Rejection, reply::Reply, Filter}; + +#[derive(Clone, Debug)] +pub struct PaymentsPollerMetrics { + pub last_processed_block: prometheus::Gauge, +} + +impl PaymentsPollerMetrics { + pub fn start(metrics_port: u16) -> anyhow::Result { + let registry = prometheus::Registry::new(); + + let last_processed_block = register_gauge!(opts!( + "last_processed_block", + "Last processed block by poller" + ))?; + + registry.register(Box::new(last_processed_block.clone()))?; + + let metrics_route = warp::path!("metrics") + .and(warp::any().map(move || registry.clone())) + .and_then(PaymentsPollerMetrics::metrics_handler); + + tokio::task::spawn(async move { + warp::serve(metrics_route) + .run(([0, 0, 0, 0], metrics_port)) + .await; + }); + + Ok(Self { + last_processed_block, + }) + } + + pub async fn metrics_handler(registry: prometheus::Registry) -> Result { + use prometheus::Encoder; + let encoder = prometheus::TextEncoder::new(); + + let mut buffer = Vec::new(); + if let Err(e) = encoder.encode(®istry.gather(), &mut buffer) { + eprintln!("could not encode prometheus metrics: {}", e); + }; + let res = String::from_utf8(buffer.clone()) + .inspect_err(|e| eprintln!("prometheus metrics could not be parsed correctly: {e}")) + .unwrap_or_default(); + buffer.clear(); + + Ok(res) + } + + pub fn register_last_processed_block(&self, value: u64) { + self.last_processed_block.set(value as f64); + } +} diff --git a/aggregation_mode/payments_poller/src/payments.rs b/aggregation_mode/payments_poller/src/payments.rs index 78639e6176..ac72dd54a4 100644 --- a/aggregation_mode/payments_poller/src/payments.rs +++ b/aggregation_mode/payments_poller/src/payments.rs @@ -3,6 +3,7 @@ use std::str::FromStr; use crate::{ config::Config, db::Db, + metrics::PaymentsPollerMetrics, types::{AggregationModePaymentService, AggregationModePaymentServiceContract, RpcProvider}, }; use alloy::{ @@ -21,6 +22,7 @@ pub struct PaymentsPoller { proof_aggregation_service: AggregationModePaymentServiceContract, rpc_provider: RpcProvider, config: Config, + metrics: PaymentsPollerMetrics, } impl PaymentsPoller { @@ -38,11 +40,19 @@ impl PaymentsPoller { .get_last_block_fetched() .map_err(|err| PaymentsPollerError::ReadLastBlockError(err.to_string())); + tracing::info!( + "Starting metrics server on port {}", + config.poller_metrics_port + ); + let metrics = PaymentsPollerMetrics::start(config.poller_metrics_port) + .expect("Failed to start metrics server"); + Ok(Self { db, proof_aggregation_service, rpc_provider, config, + metrics, }) } @@ -121,6 +131,8 @@ impl PaymentsPoller { continue; }; + self.metrics.register_last_processed_block(current_block); + tokio::time::sleep(std::time::Duration::from_secs( seconds_to_wait_between_polls, )) diff --git a/config-files/config-agg-mode-gateway-ethereum-package.yaml b/config-files/config-agg-mode-gateway-ethereum-package.yaml index 63b37a412a..e4f4ce7b41 100644 --- a/config-files/config-agg-mode-gateway-ethereum-package.yaml +++ b/config-files/config-agg-mode-gateway-ethereum-package.yaml @@ -3,8 +3,9 @@ db_connection_url: "postgres://postgres:postgres@localhost:5435/" eth_rpc_url: "http://localhost:8545" payment_service_address: "0x922D6956C99E12DFeB3224DEA977D0939758A1Fe" network: "devnet" -max_daily_proofs_per_user: 32 +max_daily_proofs_per_user: 100 last_block_fetched_filepath: "config-files/proof-aggregator.last_block_fetched.json" # Metrics -metrics_port: 9093 +gateway_metrics_port: 9094 +poller_metrics_port: 9095 diff --git a/config-files/config-agg-mode-gateway.yaml b/config-files/config-agg-mode-gateway.yaml index 379418889d..e4f4ce7b41 100644 --- a/config-files/config-agg-mode-gateway.yaml +++ b/config-files/config-agg-mode-gateway.yaml @@ -3,8 +3,9 @@ db_connection_url: "postgres://postgres:postgres@localhost:5435/" eth_rpc_url: "http://localhost:8545" payment_service_address: "0x922D6956C99E12DFeB3224DEA977D0939758A1Fe" network: "devnet" -max_daily_proofs_per_user: 4 +max_daily_proofs_per_user: 100 last_block_fetched_filepath: "config-files/proof-aggregator.last_block_fetched.json" # Metrics -metrics_port: 9093 +gateway_metrics_port: 9094 +poller_metrics_port: 9095 diff --git a/grafana/provisioning/dashboards/aligned/aggregation_mode_gateway.json b/grafana/provisioning/dashboards/aligned/aggregation_mode_gateway.json index 2b88ffb74b..7e34382519 100644 --- a/grafana/provisioning/dashboards/aligned/aggregation_mode_gateway.json +++ b/grafana/provisioning/dashboards/aligned/aggregation_mode_gateway.json @@ -462,6 +462,100 @@ "x": 12, "y": 8 }, + "id": 6, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "editorMode": "code", + "expr": "last_processed_block{}", + "instant": false, + "legendFormat": "__auto", + "range": true, + "refId": "A" + } + ], + "title": "Payments Poller Last Processed Block", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 16 + }, "id": 5, "options": { "legend": { @@ -501,13 +595,13 @@ "list": [] }, "time": { - "from": "now-19m", - "to": "now-15m" + "from": "now-5m", + "to": "now" }, "timepicker": {}, "timezone": "", "title": "Aggregation Mode", "uid": "a66a5480-6a60-4b87-9d29-4f0f446edafd", - "version": 3, + "version": 12, "weekStart": "" } diff --git a/prometheus/prometheus.yaml b/prometheus/prometheus.yaml index d7ce023aa9..3de7b120f9 100644 --- a/prometheus/prometheus.yaml +++ b/prometheus/prometheus.yaml @@ -44,3 +44,17 @@ scrape_configs: - targets: ["host.docker.internal:8089"] labels: bot: "gateway-exporter" + + - job_name: "aggregation-mode-gateway" + scrape_interval: 1s + static_configs: + - targets: ["host.docker.internal:9094"] + labels: + bot: "aggregation-gateway" + + - job_name: "aggregation-mode-payments-poller" + scrape_interval: 1s + static_configs: + - targets: ["host.docker.internal:9095"] + labels: + bot: "aggregation-poller" From 77fc789118e6d9d338192b2c946aea6e9f0c4137 Mon Sep 17 00:00:00 2001 From: maximopalopoli Date: Mon, 22 Dec 2025 12:43:02 -0300 Subject: [PATCH 07/14] fix clippy lints on agg mode --- aggregation_mode/gateway/src/metrics.rs | 2 +- aggregation_mode/payments_poller/src/metrics.rs | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/aggregation_mode/gateway/src/metrics.rs b/aggregation_mode/gateway/src/metrics.rs index e31f70a0ec..d9d4ddb0d9 100644 --- a/aggregation_mode/gateway/src/metrics.rs +++ b/aggregation_mode/gateway/src/metrics.rs @@ -38,7 +38,7 @@ impl GatewayMetrics { let mut buffer = Vec::new(); if let Err(e) = encoder.encode(®istry.gather(), &mut buffer) { - eprintln!("could not encode prometheus metrics: {}", e); + eprintln!("could not encode prometheus metrics: {e}"); }; let res = String::from_utf8(buffer.clone()) .inspect_err(|e| eprintln!("prometheus metrics could not be parsed correctly: {e}")) diff --git a/aggregation_mode/payments_poller/src/metrics.rs b/aggregation_mode/payments_poller/src/metrics.rs index ed7b7c8eb3..4b0c921abe 100644 --- a/aggregation_mode/payments_poller/src/metrics.rs +++ b/aggregation_mode/payments_poller/src/metrics.rs @@ -38,7 +38,7 @@ impl PaymentsPollerMetrics { let mut buffer = Vec::new(); if let Err(e) = encoder.encode(®istry.gather(), &mut buffer) { - eprintln!("could not encode prometheus metrics: {}", e); + eprintln!("could not encode prometheus metrics: {e}"); }; let res = String::from_utf8(buffer.clone()) .inspect_err(|e| eprintln!("prometheus metrics could not be parsed correctly: {e}")) From 710ff46831f13e6070167f773dab4dfe71d057da Mon Sep 17 00:00:00 2001 From: maximopalopoli Date: Mon, 22 Dec 2025 15:22:11 -0300 Subject: [PATCH 08/14] Remove unnecessary labeling on actix prometheus metrics builder --- aggregation_mode/gateway/src/http.rs | 4 ---- 1 file changed, 4 deletions(-) diff --git a/aggregation_mode/gateway/src/http.rs b/aggregation_mode/gateway/src/http.rs index 7eca4a49ae..a55d930bed 100644 --- a/aggregation_mode/gateway/src/http.rs +++ b/aggregation_mode/gateway/src/http.rs @@ -1,5 +1,4 @@ use std::{ - collections::HashMap, str::FromStr, time::{Instant, SystemTime, UNIX_EPOCH}, }; @@ -62,11 +61,8 @@ impl GatewayServer { let port = self.config.port; let state = self.clone(); - let mut labels = HashMap::new(); - labels.insert("label1".to_string(), "value1".to_string()); let prometheus = PrometheusMetricsBuilder::new("api") .endpoint("/metrics") - .const_labels(labels) .build() .unwrap(); From 3aeb64a76c6ffc70ba361070db1accf6bc6cc059 Mon Sep 17 00:00:00 2001 From: maximopalopoli Date: Mon, 22 Dec 2025 15:29:01 -0300 Subject: [PATCH 09/14] use tracing::error instead of eprintln --- aggregation_mode/gateway/src/metrics.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/aggregation_mode/gateway/src/metrics.rs b/aggregation_mode/gateway/src/metrics.rs index d9d4ddb0d9..28d5cbb5a5 100644 --- a/aggregation_mode/gateway/src/metrics.rs +++ b/aggregation_mode/gateway/src/metrics.rs @@ -38,7 +38,7 @@ impl GatewayMetrics { let mut buffer = Vec::new(); if let Err(e) = encoder.encode(®istry.gather(), &mut buffer) { - eprintln!("could not encode prometheus metrics: {e}"); + tracing::error!("could not encode prometheus metrics: {e}"); }; let res = String::from_utf8(buffer.clone()) .inspect_err(|e| eprintln!("prometheus metrics could not be parsed correctly: {e}")) From 07eb91b9449b570739975792b2b9cae032b893fd Mon Sep 17 00:00:00 2001 From: maximopalopoli Date: Mon, 22 Dec 2025 15:38:46 -0300 Subject: [PATCH 10/14] Remove anyhow dependency --- aggregation_mode/Cargo.lock | 2 -- aggregation_mode/Cargo.toml | 1 - aggregation_mode/gateway/Cargo.toml | 1 - aggregation_mode/gateway/src/metrics.rs | 2 +- aggregation_mode/payments_poller/Cargo.toml | 1 - aggregation_mode/payments_poller/src/metrics.rs | 2 +- 6 files changed, 2 insertions(+), 7 deletions(-) diff --git a/aggregation_mode/Cargo.lock b/aggregation_mode/Cargo.lock index 33a8937f92..d17757dfb2 100644 --- a/aggregation_mode/Cargo.lock +++ b/aggregation_mode/Cargo.lock @@ -4534,7 +4534,6 @@ dependencies = [ "agg_mode_sdk", "aligned-sdk", "alloy", - "anyhow", "bincode", "hex", "prometheus", @@ -6894,7 +6893,6 @@ dependencies = [ "actix-web", "aligned-sdk", "alloy", - "anyhow", "hex", "prometheus", "serde", diff --git a/aggregation_mode/Cargo.toml b/aggregation_mode/Cargo.toml index b61eb7cde1..6187dc3099 100644 --- a/aggregation_mode/Cargo.toml +++ b/aggregation_mode/Cargo.toml @@ -17,7 +17,6 @@ db = { path = "./db" } sp1-sdk = "5.0.0" risc0-zkvm = { version = "3.0.3" } prometheus = { version = "0.13.4", features = ["process"] } -anyhow = { version = "1.0" } warp = "0.3.7" [profile.release] diff --git a/aggregation_mode/gateway/Cargo.toml b/aggregation_mode/gateway/Cargo.toml index e4347c50b2..253800676e 100644 --- a/aggregation_mode/gateway/Cargo.toml +++ b/aggregation_mode/gateway/Cargo.toml @@ -8,7 +8,6 @@ serde = { workspace = true } serde_json = { workspace = true } serde_yaml = { workspace = true } prometheus = { workspace = true } -anyhow = { workspace = true } warp = { workspace = true } agg_mode_sdk = { path = "../sdk"} aligned-sdk = { workspace = true } diff --git a/aggregation_mode/gateway/src/metrics.rs b/aggregation_mode/gateway/src/metrics.rs index 28d5cbb5a5..6ae72db7d9 100644 --- a/aggregation_mode/gateway/src/metrics.rs +++ b/aggregation_mode/gateway/src/metrics.rs @@ -7,7 +7,7 @@ pub struct GatewayMetrics { } impl GatewayMetrics { - pub fn start(metrics_port: u16) -> anyhow::Result { + pub fn start(metrics_port: u16) -> Result { let registry = prometheus::Registry::new(); let time_elapsed_db_post = register_histogram!(histogram_opts!( diff --git a/aggregation_mode/payments_poller/Cargo.toml b/aggregation_mode/payments_poller/Cargo.toml index 108b428ba8..93fbe9b6ed 100644 --- a/aggregation_mode/payments_poller/Cargo.toml +++ b/aggregation_mode/payments_poller/Cargo.toml @@ -9,7 +9,6 @@ serde_json = { workspace = true } serde_yaml = { workspace = true } aligned-sdk = { workspace = true } prometheus = { workspace = true } -anyhow = { workspace = true } warp = { workspace = true } tracing = { version = "0.1", features = ["log"] } tracing-subscriber = { version = "0.3.0", features = ["env-filter"] } diff --git a/aggregation_mode/payments_poller/src/metrics.rs b/aggregation_mode/payments_poller/src/metrics.rs index 4b0c921abe..0283b32752 100644 --- a/aggregation_mode/payments_poller/src/metrics.rs +++ b/aggregation_mode/payments_poller/src/metrics.rs @@ -7,7 +7,7 @@ pub struct PaymentsPollerMetrics { } impl PaymentsPollerMetrics { - pub fn start(metrics_port: u16) -> anyhow::Result { + pub fn start(metrics_port: u16) -> Result { let registry = prometheus::Registry::new(); let last_processed_block = register_gauge!(opts!( From dad51de8c303e27e5be188b9db50cd011911ae8e Mon Sep 17 00:00:00 2001 From: maximopalopoli Date: Mon, 22 Dec 2025 18:02:08 -0300 Subject: [PATCH 11/14] Convert the warp servers into --- aggregation_mode/Cargo.lock | 110 ------------------ aggregation_mode/Cargo.toml | 1 - aggregation_mode/gateway/Cargo.toml | 1 - aggregation_mode/gateway/src/metrics.rs | 59 ++++++---- aggregation_mode/payments_poller/Cargo.toml | 1 - .../payments_poller/src/metrics.rs | 66 ++++++----- 6 files changed, 73 insertions(+), 165 deletions(-) diff --git a/aggregation_mode/Cargo.lock b/aggregation_mode/Cargo.lock index d17757dfb2..b291acd46b 100644 --- a/aggregation_mode/Cargo.lock +++ b/aggregation_mode/Cargo.lock @@ -4545,7 +4545,6 @@ dependencies = [ "tokio", "tracing", "tracing-subscriber 0.3.22", - "warp", ] [[package]] @@ -4849,30 +4848,6 @@ dependencies = [ "hashbrown 0.15.5", ] -[[package]] -name = "headers" -version = "0.3.9" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "06683b93020a07e3dbcf5f8c0f6d40080d725bea7936fc01ad345c01b97dc270" -dependencies = [ - "base64 0.21.7", - "bytes", - "headers-core", - "http 0.2.12", - "httpdate", - "mime", - "sha1", -] - -[[package]] -name = "headers-core" -version = "0.2.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e7f66481bfee273957b1f20485a4ff3362987f85b2c236580d81b4eb7a326429" -dependencies = [ - "http 0.2.12", -] - [[package]] name = "heapless" version = "0.7.17" @@ -6028,24 +6003,6 @@ dependencies = [ "windows-sys 0.61.2", ] -[[package]] -name = "multer" -version = "2.1.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "01acbdc23469fd8fe07ab135923371d5f5a422fbf9c522158677c8eb15bc51c2" -dependencies = [ - "bytes", - "encoding_rs", - "futures-util", - "http 0.2.12", - "httparse", - "log", - "memchr", - "mime", - "spin 0.9.8", - "version_check", -] - [[package]] name = "multimap" version = "0.10.1" @@ -6902,7 +6859,6 @@ dependencies = [ "tokio", "tracing", "tracing-subscriber 0.3.22", - "warp", ] [[package]] @@ -8850,12 +8806,6 @@ dependencies = [ "serde_json", ] -[[package]] -name = "scoped-tls" -version = "1.0.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e1cf6437eb19a8f4a6cc0f7dca544973b0b78843adbfeb3683d1a94a0024a294" - [[package]] name = "scopeguard" version = "1.2.0" @@ -10616,18 +10566,6 @@ dependencies = [ "webpki-roots 0.25.4", ] -[[package]] -name = "tokio-tungstenite" -version = "0.21.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c83b561d025642014097b66e6c1bb422783339e0909e4429cde4749d1990bc38" -dependencies = [ - "futures-util", - "log", - "tokio", - "tungstenite 0.21.0", -] - [[package]] name = "tokio-tungstenite" version = "0.23.1" @@ -10968,25 +10906,6 @@ dependencies = [ "utf-8", ] -[[package]] -name = "tungstenite" -version = "0.21.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9ef1a641ea34f399a848dea702823bbecfb4c486f911735368f1f137cb8257e1" -dependencies = [ - "byteorder", - "bytes", - "data-encoding", - "http 1.4.0", - "httparse", - "log", - "rand 0.8.5", - "sha1", - "thiserror 1.0.69", - "url", - "utf-8", -] - [[package]] name = "tungstenite" version = "0.23.0" @@ -11302,35 +11221,6 @@ dependencies = [ "try-lock", ] -[[package]] -name = "warp" -version = "0.3.7" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4378d202ff965b011c64817db11d5829506d3404edeadb61f190d111da3f231c" -dependencies = [ - "bytes", - "futures-channel", - "futures-util", - "headers", - "http 0.2.12", - "hyper 0.14.32", - "log", - "mime", - "mime_guess", - "multer", - "percent-encoding", - "pin-project", - "scoped-tls", - "serde", - "serde_json", - "serde_urlencoded", - "tokio", - "tokio-tungstenite 0.21.0", - "tokio-util", - "tower-service", - "tracing", -] - [[package]] name = "wasi" version = "0.10.2+wasi-snapshot-preview1" diff --git a/aggregation_mode/Cargo.toml b/aggregation_mode/Cargo.toml index 6187dc3099..67bd14f395 100644 --- a/aggregation_mode/Cargo.toml +++ b/aggregation_mode/Cargo.toml @@ -17,7 +17,6 @@ db = { path = "./db" } sp1-sdk = "5.0.0" risc0-zkvm = { version = "3.0.3" } prometheus = { version = "0.13.4", features = ["process"] } -warp = "0.3.7" [profile.release] opt-level = 3 diff --git a/aggregation_mode/gateway/Cargo.toml b/aggregation_mode/gateway/Cargo.toml index 253800676e..9ff8ddb023 100644 --- a/aggregation_mode/gateway/Cargo.toml +++ b/aggregation_mode/gateway/Cargo.toml @@ -8,7 +8,6 @@ serde = { workspace = true } serde_json = { workspace = true } serde_yaml = { workspace = true } prometheus = { workspace = true } -warp = { workspace = true } agg_mode_sdk = { path = "../sdk"} aligned-sdk = { workspace = true } sp1-sdk = { workspace = true } diff --git a/aggregation_mode/gateway/src/metrics.rs b/aggregation_mode/gateway/src/metrics.rs index 6ae72db7d9..451541c755 100644 --- a/aggregation_mode/gateway/src/metrics.rs +++ b/aggregation_mode/gateway/src/metrics.rs @@ -1,51 +1,60 @@ -use prometheus::{self, histogram_opts, register_histogram}; -use warp::{reject::Rejection, reply::Reply, Filter}; +use actix_web::{web, App, HttpResponse, HttpServer, Responder}; +use prometheus::{self, histogram_opts, Encoder, Histogram, Registry, TextEncoder}; +use std::sync::Arc; #[derive(Clone, Debug)] pub struct GatewayMetrics { - pub time_elapsed_db_post: prometheus::Histogram, + pub registry: Registry, + pub time_elapsed_db_post: Histogram, } impl GatewayMetrics { pub fn start(metrics_port: u16) -> Result { - let registry = prometheus::Registry::new(); + let registry = Registry::new(); - let time_elapsed_db_post = register_histogram!(histogram_opts!( + let time_elapsed_db_post = Histogram::with_opts(histogram_opts!( "time_elapsed_db_post", "Time elapsed in DB posts" ))?; registry.register(Box::new(time_elapsed_db_post.clone()))?; - let metrics_route = warp::path!("metrics") - .and(warp::any().map(move || registry.clone())) - .and_then(GatewayMetrics::metrics_handler); + // Arc is used because metrics are a shared resource accessed by both the background and metrics HTTP + // server and the application code, across multiple Actix worker threads. The server outlives start(), + // so the data must be static and safely shared between threads. + let metrics = Arc::new(Self { + registry, + time_elapsed_db_post, + }); - tokio::task::spawn(async move { - warp::serve(metrics_route) - .run(([0, 0, 0, 0], metrics_port)) - .await; + let server_metrics = metrics.clone(); + tokio::spawn(async move { + let _ = HttpServer::new(move || { + App::new() + .app_data(web::Data::new(server_metrics.clone())) + .route("/metrics", web::get().to(GatewayMetrics::metrics_handler)) + }) + .bind(("0.0.0.0", metrics_port)) + .expect("failed to bind metrics server") + .run() + .await; }); - Ok(Self { - time_elapsed_db_post, - }) + Ok(Arc::try_unwrap(metrics).unwrap_or_else(|arc| (*arc).clone())) } - pub async fn metrics_handler(registry: prometheus::Registry) -> Result { - use prometheus::Encoder; - let encoder = prometheus::TextEncoder::new(); + async fn metrics_handler(metrics: web::Data>) -> impl Responder { + let encoder = TextEncoder::new(); + let metric_families = metrics.registry.gather(); let mut buffer = Vec::new(); - if let Err(e) = encoder.encode(®istry.gather(), &mut buffer) { + if let Err(e) = encoder.encode(&metric_families, &mut buffer) { tracing::error!("could not encode prometheus metrics: {e}"); - }; - let res = String::from_utf8(buffer.clone()) - .inspect_err(|e| eprintln!("prometheus metrics could not be parsed correctly: {e}")) - .unwrap_or_default(); - buffer.clear(); + } - Ok(res) + HttpResponse::Ok() + .insert_header(("Content-Type", encoder.format_type())) + .body(buffer) } pub fn register_db_response_time_post(&self, value: f64) { diff --git a/aggregation_mode/payments_poller/Cargo.toml b/aggregation_mode/payments_poller/Cargo.toml index 93fbe9b6ed..d6273c3247 100644 --- a/aggregation_mode/payments_poller/Cargo.toml +++ b/aggregation_mode/payments_poller/Cargo.toml @@ -9,7 +9,6 @@ serde_json = { workspace = true } serde_yaml = { workspace = true } aligned-sdk = { workspace = true } prometheus = { workspace = true } -warp = { workspace = true } tracing = { version = "0.1", features = ["log"] } tracing-subscriber = { version = "0.3.0", features = ["env-filter"] } actix-web = "4" diff --git a/aggregation_mode/payments_poller/src/metrics.rs b/aggregation_mode/payments_poller/src/metrics.rs index 0283b32752..2f41701d45 100644 --- a/aggregation_mode/payments_poller/src/metrics.rs +++ b/aggregation_mode/payments_poller/src/metrics.rs @@ -1,51 +1,63 @@ -use prometheus::{self, opts, register_gauge}; -use warp::{reject::Rejection, reply::Reply, Filter}; +use actix_web::{web, App, HttpResponse, HttpServer, Responder}; +use prometheus::{self, opts, Encoder, Gauge, Registry, TextEncoder}; +use std::sync::Arc; #[derive(Clone, Debug)] pub struct PaymentsPollerMetrics { - pub last_processed_block: prometheus::Gauge, + pub registry: Registry, + pub last_processed_block: Gauge, } impl PaymentsPollerMetrics { pub fn start(metrics_port: u16) -> Result { - let registry = prometheus::Registry::new(); + let registry = Registry::new(); - let last_processed_block = register_gauge!(opts!( + let last_processed_block = Gauge::with_opts(opts!( "last_processed_block", "Last processed block by poller" ))?; registry.register(Box::new(last_processed_block.clone()))?; - let metrics_route = warp::path!("metrics") - .and(warp::any().map(move || registry.clone())) - .and_then(PaymentsPollerMetrics::metrics_handler); + // Arc is used because metrics are a shared resource accessed by both the background and metrics HTTP + // server and the application code, across multiple Actix worker threads. The server outlives start(), + // so the data must be static and safely shared between threads. + let metrics = Arc::new(Self { + registry, + last_processed_block, + }); - tokio::task::spawn(async move { - warp::serve(metrics_route) - .run(([0, 0, 0, 0], metrics_port)) - .await; + let server_metrics = metrics.clone(); + tokio::spawn(async move { + let _ = HttpServer::new(move || { + App::new() + .app_data(web::Data::new(server_metrics.clone())) + .route( + "/metrics", + web::get().to(PaymentsPollerMetrics::metrics_handler), + ) + }) + .bind(("0.0.0.0", metrics_port)) + .expect("failed to bind metrics server") + .run() + .await; }); - Ok(Self { - last_processed_block, - }) + Ok(Arc::try_unwrap(metrics).unwrap_or_else(|arc| (*arc).clone())) } - pub async fn metrics_handler(registry: prometheus::Registry) -> Result { - use prometheus::Encoder; - let encoder = prometheus::TextEncoder::new(); + async fn metrics_handler(metrics: web::Data>) -> impl Responder { + let encoder = TextEncoder::new(); + let metric_families = metrics.registry.gather(); let mut buffer = Vec::new(); - if let Err(e) = encoder.encode(®istry.gather(), &mut buffer) { - eprintln!("could not encode prometheus metrics: {e}"); - }; - let res = String::from_utf8(buffer.clone()) - .inspect_err(|e| eprintln!("prometheus metrics could not be parsed correctly: {e}")) - .unwrap_or_default(); - buffer.clear(); - - Ok(res) + if let Err(e) = encoder.encode(&metric_families, &mut buffer) { + tracing::error!("could not encode prometheus metrics: {e}"); + } + + HttpResponse::Ok() + .insert_header(("Content-Type", encoder.format_type())) + .body(buffer) } pub fn register_last_processed_block(&self, value: u64) { From deaa77a90dc0bb0c4a48ea89a79cebb87e658779 Mon Sep 17 00:00:00 2001 From: maximopalopoli Date: Tue, 23 Dec 2025 15:03:26 -0300 Subject: [PATCH 12/14] Add comment about the actix http prometheus server --- aggregation_mode/gateway/src/http.rs | 2 ++ 1 file changed, 2 insertions(+) diff --git a/aggregation_mode/gateway/src/http.rs b/aggregation_mode/gateway/src/http.rs index d051b402f9..d6dfe6e797 100644 --- a/aggregation_mode/gateway/src/http.rs +++ b/aggregation_mode/gateway/src/http.rs @@ -61,6 +61,8 @@ impl GatewayServer { let port = self.config.port; let state = self.clone(); + // Note: This creates a new Prometheus server different from the one created in GatewayServer::new. The created + // server exposes metrics related to the actix HTTP server, like response codes and response times let prometheus = PrometheusMetricsBuilder::new("api") .endpoint("/metrics") .build() From 09eb0154f991c9cc4c5359507fc9778aa6b738dd Mon Sep 17 00:00:00 2001 From: maximopalopoli Date: Tue, 23 Dec 2025 15:27:18 -0300 Subject: [PATCH 13/14] Return an Arc in the Metrics structs start method --- aggregation_mode/gateway/src/http.rs | 3 ++- aggregation_mode/gateway/src/metrics.rs | 4 ++-- aggregation_mode/payments_poller/src/metrics.rs | 4 ++-- aggregation_mode/payments_poller/src/payments.rs | 4 ++-- 4 files changed, 8 insertions(+), 7 deletions(-) diff --git a/aggregation_mode/gateway/src/http.rs b/aggregation_mode/gateway/src/http.rs index d6dfe6e797..500f06fab4 100644 --- a/aggregation_mode/gateway/src/http.rs +++ b/aggregation_mode/gateway/src/http.rs @@ -1,5 +1,6 @@ use std::{ str::FromStr, + sync::Arc, time::{Instant, SystemTime, UNIX_EPOCH}, }; @@ -34,7 +35,7 @@ pub struct GatewayServer { db: Db, config: Config, network: Network, - metrics: GatewayMetrics, + metrics: Arc, } impl GatewayServer { diff --git a/aggregation_mode/gateway/src/metrics.rs b/aggregation_mode/gateway/src/metrics.rs index 451541c755..1e3a06286a 100644 --- a/aggregation_mode/gateway/src/metrics.rs +++ b/aggregation_mode/gateway/src/metrics.rs @@ -9,7 +9,7 @@ pub struct GatewayMetrics { } impl GatewayMetrics { - pub fn start(metrics_port: u16) -> Result { + pub fn start(metrics_port: u16) -> Result, prometheus::Error> { let registry = Registry::new(); let time_elapsed_db_post = Histogram::with_opts(histogram_opts!( @@ -40,7 +40,7 @@ impl GatewayMetrics { .await; }); - Ok(Arc::try_unwrap(metrics).unwrap_or_else(|arc| (*arc).clone())) + Ok(metrics) } async fn metrics_handler(metrics: web::Data>) -> impl Responder { diff --git a/aggregation_mode/payments_poller/src/metrics.rs b/aggregation_mode/payments_poller/src/metrics.rs index 2f41701d45..aabdc5e5ae 100644 --- a/aggregation_mode/payments_poller/src/metrics.rs +++ b/aggregation_mode/payments_poller/src/metrics.rs @@ -9,7 +9,7 @@ pub struct PaymentsPollerMetrics { } impl PaymentsPollerMetrics { - pub fn start(metrics_port: u16) -> Result { + pub fn start(metrics_port: u16) -> Result, prometheus::Error> { let registry = Registry::new(); let last_processed_block = Gauge::with_opts(opts!( @@ -43,7 +43,7 @@ impl PaymentsPollerMetrics { .await; }); - Ok(Arc::try_unwrap(metrics).unwrap_or_else(|arc| (*arc).clone())) + Ok(metrics) } async fn metrics_handler(metrics: web::Data>) -> impl Responder { diff --git a/aggregation_mode/payments_poller/src/payments.rs b/aggregation_mode/payments_poller/src/payments.rs index ac72dd54a4..005c56e7dd 100644 --- a/aggregation_mode/payments_poller/src/payments.rs +++ b/aggregation_mode/payments_poller/src/payments.rs @@ -1,4 +1,4 @@ -use std::str::FromStr; +use std::{str::FromStr, sync::Arc}; use crate::{ config::Config, @@ -22,7 +22,7 @@ pub struct PaymentsPoller { proof_aggregation_service: AggregationModePaymentServiceContract, rpc_provider: RpcProvider, config: Config, - metrics: PaymentsPollerMetrics, + metrics: Arc, } impl PaymentsPoller { From 7ee82013a76a6dd7c4d845bc2f02ddfcce88605d Mon Sep 17 00:00:00 2001 From: maximopalopoli Date: Tue, 23 Dec 2025 16:16:17 -0300 Subject: [PATCH 14/14] Make the time_elapsed_db_query to be a histogram vec --- aggregation_mode/gateway/src/http.rs | 2 +- aggregation_mode/gateway/src/metrics.rs | 22 ++++++++++--------- .../aligned/aggregation_mode_gateway.json | 8 +++---- 3 files changed, 17 insertions(+), 15 deletions(-) diff --git a/aggregation_mode/gateway/src/http.rs b/aggregation_mode/gateway/src/http.rs index 500f06fab4..ee4fdff5b0 100644 --- a/aggregation_mode/gateway/src/http.rs +++ b/aggregation_mode/gateway/src/http.rs @@ -260,7 +260,7 @@ impl GatewayServer { let duration = start.elapsed(); state .metrics - .register_db_response_time_post(duration.as_secs_f64()); + .register_db_response_time_post("sp1-post", duration.as_secs_f64()); HttpResponse::Ok().json(AppResponse::new_sucessfull( serde_json::json!({ "task_id": task_id.to_string() }), diff --git a/aggregation_mode/gateway/src/metrics.rs b/aggregation_mode/gateway/src/metrics.rs index 1e3a06286a..f2621ffc2c 100644 --- a/aggregation_mode/gateway/src/metrics.rs +++ b/aggregation_mode/gateway/src/metrics.rs @@ -1,30 +1,30 @@ use actix_web::{web, App, HttpResponse, HttpServer, Responder}; -use prometheus::{self, histogram_opts, Encoder, Histogram, Registry, TextEncoder}; +use prometheus::{self, histogram_opts, Encoder, HistogramVec, Registry, TextEncoder}; use std::sync::Arc; #[derive(Clone, Debug)] pub struct GatewayMetrics { pub registry: Registry, - pub time_elapsed_db_post: Histogram, + pub time_elapsed_db_query: HistogramVec, } impl GatewayMetrics { pub fn start(metrics_port: u16) -> Result, prometheus::Error> { let registry = Registry::new(); - let time_elapsed_db_post = Histogram::with_opts(histogram_opts!( - "time_elapsed_db_post", - "Time elapsed in DB posts" - ))?; + let time_elapsed_db_query = HistogramVec::new( + histogram_opts!("time_elapsed_db_query", "Time elapsed in DB posts"), + &["query"], + )?; - registry.register(Box::new(time_elapsed_db_post.clone()))?; + registry.register(Box::new(time_elapsed_db_query.clone()))?; // Arc is used because metrics are a shared resource accessed by both the background and metrics HTTP // server and the application code, across multiple Actix worker threads. The server outlives start(), // so the data must be static and safely shared between threads. let metrics = Arc::new(Self { registry, - time_elapsed_db_post, + time_elapsed_db_query, }); let server_metrics = metrics.clone(); @@ -57,7 +57,9 @@ impl GatewayMetrics { .body(buffer) } - pub fn register_db_response_time_post(&self, value: f64) { - self.time_elapsed_db_post.observe(value); + pub fn register_db_response_time_post(&self, query: &str, value: f64) { + self.time_elapsed_db_query + .with_label_values(&[query]) + .observe(value); } } diff --git a/grafana/provisioning/dashboards/aligned/aggregation_mode_gateway.json b/grafana/provisioning/dashboards/aligned/aggregation_mode_gateway.json index 7e34382519..951d2179b9 100644 --- a/grafana/provisioning/dashboards/aligned/aggregation_mode_gateway.json +++ b/grafana/provisioning/dashboards/aligned/aggregation_mode_gateway.json @@ -18,7 +18,7 @@ "editable": true, "fiscalYearStartMonth": 0, "graphTooltip": 0, - "id": 2, + "id": 8, "links": [], "liveNow": false, "panels": [ @@ -388,7 +388,7 @@ "uid": "prometheus" }, "editorMode": "code", - "expr": "histogram_quantile(0.50, sum by (le) (rate(time_elapsed_db_post_bucket[$__rate_interval])))", + "expr": "histogram_quantile(0.50, sum by (le) (rate(time_elapsed_db_query_bucket{query=\"sp1-post\"}[$__rate_interval])))", "instant": false, "legendFormat": "__auto", "range": true, @@ -576,7 +576,7 @@ "uid": "prometheus" }, "editorMode": "code", - "expr": "sum(rate(time_elapsed_db_post_count[$__rate_interval]))", + "expr": "sum(rate(time_elapsed_db_query_count[$__rate_interval]))", "instant": false, "legendFormat": "__auto", "range": true, @@ -602,6 +602,6 @@ "timezone": "", "title": "Aggregation Mode", "uid": "a66a5480-6a60-4b87-9d29-4f0f446edafd", - "version": 12, + "version": 1, "weekStart": "" }