diff --git a/aggregation_mode/Cargo.lock b/aggregation_mode/Cargo.lock index 6e424297a6..b291acd46b 100644 --- a/aggregation_mode/Cargo.lock +++ b/aggregation_mode/Cargo.lock @@ -233,6 +233,21 @@ dependencies = [ "syn 2.0.111", ] +[[package]] +name = "actix-web-prometheus" +version = "0.1.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ad5228fd1a6b5d0f60d636776c2a70acc9fc667034bb4ac02ec4259f0eeeab6c" +dependencies = [ + "actix-service", + "actix-web", + "futures-lite", + "pin-project", + "prometheus", + "quanta", + "thiserror 1.0.69", +] + [[package]] name = "addchain" version = "0.2.0" @@ -1588,7 +1603,7 @@ dependencies = [ "aws-smithy-types", "aws-types", "bytes", - "fastrand", + "fastrand 2.3.0", "hex", "http 1.4.0", "ring 0.17.14", @@ -1648,7 +1663,7 @@ dependencies = [ "aws-smithy-types", "aws-types", "bytes", - "fastrand", + "fastrand 2.3.0", "http 0.2.12", "http-body 0.4.6", "percent-encoding", @@ -1673,7 +1688,7 @@ dependencies = [ "aws-smithy-types", "aws-types", "bytes", - "fastrand", + "fastrand 2.3.0", "http 0.2.12", "regex-lite", "tracing", @@ -1695,7 +1710,7 @@ dependencies = [ "aws-smithy-types", "aws-types", "bytes", - "fastrand", + "fastrand 2.3.0", "http 0.2.12", "regex-lite", "tracing", @@ -1717,7 +1732,7 @@ dependencies = [ "aws-smithy-types", "aws-types", "bytes", - "fastrand", + "fastrand 2.3.0", "http 0.2.12", "regex-lite", "tracing", @@ -1740,7 +1755,7 @@ dependencies = [ "aws-smithy-types", "aws-smithy-xml", "aws-types", - "fastrand", + "fastrand 2.3.0", "http 0.2.12", "regex-lite", "tracing", @@ -1871,7 +1886,7 @@ dependencies = [ "aws-smithy-runtime-api", "aws-smithy-types", "bytes", - "fastrand", + "fastrand 2.3.0", "http 0.2.12", "http 1.4.0", "http-body 0.4.6", @@ -2023,7 +2038,7 @@ version = "1.6.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "cffb0e931875b666fc4fcb20fee52e9bbd1ef836fd9e9e04ec21555f9f85f7ef" dependencies = [ - "fastrand", + "fastrand 2.3.0", "gloo-timers 0.3.0", "tokio", ] @@ -4132,6 +4147,15 @@ version = "0.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "2acce4a10f12dc2fb14a218589d4f1f62ef011b2d0cc4b3cb1bba8e94da14649" +[[package]] +name = "fastrand" +version = "1.9.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e51093e27b0797c359783294ca4f0a911c270184cb10f85783b118614a1501be" +dependencies = [ + "instant", +] + [[package]] name = "fastrand" version = "2.3.0" @@ -4409,6 +4433,21 @@ version = "0.3.31" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9e5c1b78ca4aae1ac06c48a526a655760685149f0d465d21f37abfe57ce075c6" +[[package]] +name = "futures-lite" +version = "1.13.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "49a9d51ce47660b1e808d3c990b4709f2f415d928835a17dfd16991515c46bce" +dependencies = [ + "fastrand 1.9.0", + "futures-core", + "futures-io", + "memchr", + "parking", + "pin-project-lite", + "waker-fn", +] + [[package]] name = "futures-locks" version = "0.7.1" @@ -4491,11 +4530,13 @@ version = "0.1.0" dependencies = [ "actix-multipart", "actix-web", + "actix-web-prometheus", "agg_mode_sdk", "aligned-sdk", "alloy", "bincode", "hex", + "prometheus", "serde", "serde_json", "serde_yaml", @@ -4572,7 +4613,7 @@ dependencies = [ "cfg-if", "js-sys", "libc", - "wasi", + "wasi 0.11.1+wasi-snapshot-preview1", "wasm-bindgen", ] @@ -5725,6 +5766,15 @@ version = "0.1.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "112b39cec0b298b6c1999fee3e31427f74f676e4cb9879ed1a121b43661a4154" +[[package]] +name = "mach" +version = "0.3.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b823e83b2affd8f40a9ee8c29dbc56404c1e34cd2710921f2801e2cf29527afa" +dependencies = [ + "libc", +] + [[package]] name = "macro-string" version = "0.1.4" @@ -5949,7 +5999,7 @@ checksum = "69d83b0086dc8ecf3ce9ae2874b2d1290252e2a30720bea58a5c6639b0092873" dependencies = [ "libc", "log", - "wasi", + "wasi 0.11.1+wasi-snapshot-preview1", "windows-sys 0.61.2", ] @@ -6801,6 +6851,7 @@ dependencies = [ "aligned-sdk", "alloy", "hex", + "prometheus", "serde", "serde_json", "serde_yaml", @@ -7150,6 +7201,46 @@ dependencies = [ "version_check", ] +[[package]] +name = "procfs" +version = "0.16.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "731e0d9356b0c25f16f33b5be79b1c57b562f141ebfcdb0ad8ac2c13a24293b4" +dependencies = [ + "bitflags 2.10.0", + "hex", + "lazy_static", + "procfs-core", + "rustix 0.38.44", +] + +[[package]] +name = "procfs-core" +version = "0.16.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2d3554923a69f4ce04c4a754260c338f505ce22642d3830e049a399fc2059a29" +dependencies = [ + "bitflags 2.10.0", + "hex", +] + +[[package]] +name = "prometheus" +version = "0.13.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3d33c28a30771f7f96db69893f78b857f7450d7e0237e9c8fc6427a81bae7ed1" +dependencies = [ + "cfg-if", + "fnv", + "lazy_static", + "libc", + "memchr", + "parking_lot", + "procfs", + "protobuf", + "thiserror 1.0.69", +] + [[package]] name = "proof_aggregator" version = "0.1.0" @@ -7252,6 +7343,12 @@ dependencies = [ "prost", ] +[[package]] +name = "protobuf" +version = "2.28.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "106dd99e98437432fed6519dedecfade6a06a73bb7b2a1e019fdd2bee5778d94" + [[package]] name = "puffin" version = "0.19.1" @@ -7266,6 +7363,22 @@ dependencies = [ "parking_lot", ] +[[package]] +name = "quanta" +version = "0.10.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b7e31331286705f455e56cca62e0e717158474ff02b7936c1fa596d983f4ae27" +dependencies = [ + "crossbeam-utils", + "libc", + "mach", + "once_cell", + "raw-cpuid", + "wasi 0.10.2+wasi-snapshot-preview1", + "web-sys", + "winapi", +] + [[package]] name = "quick-error" version = "1.2.3" @@ -7431,6 +7544,15 @@ dependencies = [ "num-traits", ] +[[package]] +name = "raw-cpuid" +version = "10.7.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6c297679cb867470fa8c9f67dbba74a78d78e3e98d7cf2b08d6d71540f797332" +dependencies = [ + "bitflags 1.3.2", +] + [[package]] name = "rawpointer" version = "0.2.1" @@ -10218,7 +10340,7 @@ version = "3.23.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "2d31c77bdf42a745371d260a26ca7163f1e0924b64afa0b688e61b5a9fa02f16" dependencies = [ - "fastrand", + "fastrand 2.3.0", "getrandom 0.3.4", "once_cell", "rustix 1.1.2", @@ -11074,6 +11196,12 @@ dependencies = [ "libc", ] +[[package]] +name = "waker-fn" +version = "1.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "317211a0dc0ceedd78fb2ca9a44aed3d7b9b26f81870d485c07122b4350673b7" + [[package]] name = "walkdir" version = "2.5.0" @@ -11093,6 +11221,12 @@ dependencies = [ "try-lock", ] +[[package]] +name = "wasi" +version = "0.10.2+wasi-snapshot-preview1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fd6fbd9a79829dd1ad0cc20627bf1ed606756a7f77edff7b66b7064f9cb327c6" + [[package]] name = "wasi" version = "0.11.1+wasi-snapshot-preview1" diff --git a/aggregation_mode/Cargo.toml b/aggregation_mode/Cargo.toml index 2ab51157c9..67bd14f395 100644 --- a/aggregation_mode/Cargo.toml +++ b/aggregation_mode/Cargo.toml @@ -16,6 +16,7 @@ aligned-sdk = { path = "../crates/sdk/" } db = { path = "./db" } sp1-sdk = "5.0.0" risc0-zkvm = { version = "3.0.3" } +prometheus = { version = "0.13.4", features = ["process"] } [profile.release] opt-level = 3 diff --git a/aggregation_mode/gateway/Cargo.toml b/aggregation_mode/gateway/Cargo.toml index 79ef53351c..9ff8ddb023 100644 --- a/aggregation_mode/gateway/Cargo.toml +++ b/aggregation_mode/gateway/Cargo.toml @@ -7,6 +7,7 @@ edition = "2021" serde = { workspace = true } serde_json = { workspace = true } serde_yaml = { workspace = true } +prometheus = { workspace = true } agg_mode_sdk = { path = "../sdk"} aligned-sdk = { workspace = true } sp1-sdk = { workspace = true } @@ -15,6 +16,7 @@ tracing-subscriber = { version = "0.3.0", features = ["env-filter"] } bincode = "1.3.3" actix-web = "4" actix-multipart = "0.7.2" +actix-web-prometheus = "0.1.0-beta.8" alloy = { workspace = true } tokio = { version = "1", features = ["time"]} # TODO: enable tls diff --git a/aggregation_mode/gateway/src/config.rs b/aggregation_mode/gateway/src/config.rs index 8af91a2a97..616ef67a59 100644 --- a/aggregation_mode/gateway/src/config.rs +++ b/aggregation_mode/gateway/src/config.rs @@ -9,6 +9,7 @@ pub struct Config { pub db_connection_url: String, pub network: String, pub max_daily_proofs_per_user: i64, + pub gateway_metrics_port: u16, } impl Config { diff --git a/aggregation_mode/gateway/src/http.rs b/aggregation_mode/gateway/src/http.rs index 2a88116a02..ee4fdff5b0 100644 --- a/aggregation_mode/gateway/src/http.rs +++ b/aggregation_mode/gateway/src/http.rs @@ -1,6 +1,7 @@ use std::{ str::FromStr, - time::{SystemTime, UNIX_EPOCH}, + sync::Arc, + time::{Instant, SystemTime, UNIX_EPOCH}, }; use actix_multipart::form::MultipartForm; @@ -8,6 +9,7 @@ use actix_web::{ web::{self, Data}, App, HttpRequest, HttpResponse, HttpServer, Responder, }; +use actix_web_prometheus::PrometheusMetricsBuilder; use agg_mode_sdk::types::Network; use aligned_sdk::aggregation_layer::AggregationModeProvingSystem; use alloy::signers::Signature; @@ -23,6 +25,7 @@ use crate::{ config::Config, db::Db, helpers::get_time_left_day_formatted, + metrics::GatewayMetrics, types::{GetReceiptsResponse, SubmitProofRequestRisc0, SubmitProofRequestSP1}, verifiers::{verify_sp1_proof, VerificationError}, }; @@ -32,15 +35,25 @@ pub struct GatewayServer { db: Db, config: Config, network: Network, + metrics: Arc, } impl GatewayServer { pub fn new(db: Db, config: Config) -> Self { let network = Network::from_str(&config.network).expect("A valid network in config file"); + + tracing::info!( + "Starting metrics server on port {}", + config.gateway_metrics_port + ); + let metrics = GatewayMetrics::start(config.gateway_metrics_port) + .expect("Failed to start metrics server"); + Self { db, config, network, + metrics, } } @@ -49,10 +62,18 @@ impl GatewayServer { let port = self.config.port; let state = self.clone(); + // Note: This creates a new Prometheus server different from the one created in GatewayServer::new. The created + // server exposes metrics related to the actix HTTP server, like response codes and response times + let prometheus = PrometheusMetricsBuilder::new("api") + .endpoint("/metrics") + .build() + .unwrap(); + tracing::info!("Starting server at port {}", self.config.port); HttpServer::new(move || { App::new() .app_data(Data::new(state.clone())) + .wrap(prometheus.clone()) .route("/nonce/{address}", web::get().to(Self::get_nonce)) .route("/receipts", web::get().to(Self::get_receipts)) .route("/proof/sp1", web::post().to(Self::post_proof_sp1)) @@ -151,7 +172,7 @@ impl GatewayServer { if daily_tasks_by_address >= state.config.max_daily_proofs_per_user { let formatted_time_left = get_time_left_day_formatted(); - return HttpResponse::InternalServerError().json(AppResponse::new_unsucessfull( + return HttpResponse::BadRequest().json(AppResponse::new_unsucessfull( format!( "Request denied: Query limit exceeded. Quotas renew in {formatted_time_left}" ) @@ -221,6 +242,8 @@ impl GatewayServer { return HttpResponse::BadRequest().json(AppResponse::new_unsucessfull(message, 400)); }; + let start = Instant::now(); + match state .db .insert_task( @@ -233,9 +256,16 @@ impl GatewayServer { ) .await { - Ok(task_id) => HttpResponse::Ok().json(AppResponse::new_sucessfull( - serde_json::json!({ "task_id": task_id.to_string() }), - )), + Ok(task_id) => { + let duration = start.elapsed(); + state + .metrics + .register_db_response_time_post("sp1-post", duration.as_secs_f64()); + + HttpResponse::Ok().json(AppResponse::new_sucessfull( + serde_json::json!({ "task_id": task_id.to_string() }), + )) + } Err(_) => HttpResponse::InternalServerError() .json(AppResponse::new_unsucessfull("Internal server error", 500)), } diff --git a/aggregation_mode/gateway/src/lib.rs b/aggregation_mode/gateway/src/lib.rs index 84693d8cd6..0df12ac02d 100644 --- a/aggregation_mode/gateway/src/lib.rs +++ b/aggregation_mode/gateway/src/lib.rs @@ -2,5 +2,6 @@ pub mod config; pub mod db; mod helpers; pub mod http; +mod metrics; mod types; mod verifiers; diff --git a/aggregation_mode/gateway/src/metrics.rs b/aggregation_mode/gateway/src/metrics.rs new file mode 100644 index 0000000000..f2621ffc2c --- /dev/null +++ b/aggregation_mode/gateway/src/metrics.rs @@ -0,0 +1,65 @@ +use actix_web::{web, App, HttpResponse, HttpServer, Responder}; +use prometheus::{self, histogram_opts, Encoder, HistogramVec, Registry, TextEncoder}; +use std::sync::Arc; + +#[derive(Clone, Debug)] +pub struct GatewayMetrics { + pub registry: Registry, + pub time_elapsed_db_query: HistogramVec, +} + +impl GatewayMetrics { + pub fn start(metrics_port: u16) -> Result, prometheus::Error> { + let registry = Registry::new(); + + let time_elapsed_db_query = HistogramVec::new( + histogram_opts!("time_elapsed_db_query", "Time elapsed in DB posts"), + &["query"], + )?; + + registry.register(Box::new(time_elapsed_db_query.clone()))?; + + // Arc is used because metrics are a shared resource accessed by both the background and metrics HTTP + // server and the application code, across multiple Actix worker threads. The server outlives start(), + // so the data must be static and safely shared between threads. + let metrics = Arc::new(Self { + registry, + time_elapsed_db_query, + }); + + let server_metrics = metrics.clone(); + tokio::spawn(async move { + let _ = HttpServer::new(move || { + App::new() + .app_data(web::Data::new(server_metrics.clone())) + .route("/metrics", web::get().to(GatewayMetrics::metrics_handler)) + }) + .bind(("0.0.0.0", metrics_port)) + .expect("failed to bind metrics server") + .run() + .await; + }); + + Ok(metrics) + } + + async fn metrics_handler(metrics: web::Data>) -> impl Responder { + let encoder = TextEncoder::new(); + let metric_families = metrics.registry.gather(); + + let mut buffer = Vec::new(); + if let Err(e) = encoder.encode(&metric_families, &mut buffer) { + tracing::error!("could not encode prometheus metrics: {e}"); + } + + HttpResponse::Ok() + .insert_header(("Content-Type", encoder.format_type())) + .body(buffer) + } + + pub fn register_db_response_time_post(&self, query: &str, value: f64) { + self.time_elapsed_db_query + .with_label_values(&[query]) + .observe(value); + } +} diff --git a/aggregation_mode/payments_poller/Cargo.toml b/aggregation_mode/payments_poller/Cargo.toml index c145fd02e5..d6273c3247 100644 --- a/aggregation_mode/payments_poller/Cargo.toml +++ b/aggregation_mode/payments_poller/Cargo.toml @@ -8,6 +8,7 @@ serde = { workspace = true } serde_json = { workspace = true } serde_yaml = { workspace = true } aligned-sdk = { workspace = true } +prometheus = { workspace = true } tracing = { version = "0.1", features = ["log"] } tracing-subscriber = { version = "0.3.0", features = ["env-filter"] } actix-web = "4" diff --git a/aggregation_mode/payments_poller/src/config.rs b/aggregation_mode/payments_poller/src/config.rs index 8b51181470..c84362ae8f 100644 --- a/aggregation_mode/payments_poller/src/config.rs +++ b/aggregation_mode/payments_poller/src/config.rs @@ -8,6 +8,7 @@ pub struct Config { pub eth_rpc_url: String, pub payment_service_address: String, pub last_block_fetched_filepath: String, + pub poller_metrics_port: u16, } #[derive(Debug, Deserialize, Serialize)] diff --git a/aggregation_mode/payments_poller/src/lib.rs b/aggregation_mode/payments_poller/src/lib.rs index 84bee5b7ba..654c3e2378 100644 --- a/aggregation_mode/payments_poller/src/lib.rs +++ b/aggregation_mode/payments_poller/src/lib.rs @@ -1,4 +1,5 @@ pub mod config; pub mod db; +pub mod metrics; pub mod payments; pub mod types; diff --git a/aggregation_mode/payments_poller/src/metrics.rs b/aggregation_mode/payments_poller/src/metrics.rs new file mode 100644 index 0000000000..aabdc5e5ae --- /dev/null +++ b/aggregation_mode/payments_poller/src/metrics.rs @@ -0,0 +1,66 @@ +use actix_web::{web, App, HttpResponse, HttpServer, Responder}; +use prometheus::{self, opts, Encoder, Gauge, Registry, TextEncoder}; +use std::sync::Arc; + +#[derive(Clone, Debug)] +pub struct PaymentsPollerMetrics { + pub registry: Registry, + pub last_processed_block: Gauge, +} + +impl PaymentsPollerMetrics { + pub fn start(metrics_port: u16) -> Result, prometheus::Error> { + let registry = Registry::new(); + + let last_processed_block = Gauge::with_opts(opts!( + "last_processed_block", + "Last processed block by poller" + ))?; + + registry.register(Box::new(last_processed_block.clone()))?; + + // Arc is used because metrics are a shared resource accessed by both the background and metrics HTTP + // server and the application code, across multiple Actix worker threads. The server outlives start(), + // so the data must be static and safely shared between threads. + let metrics = Arc::new(Self { + registry, + last_processed_block, + }); + + let server_metrics = metrics.clone(); + tokio::spawn(async move { + let _ = HttpServer::new(move || { + App::new() + .app_data(web::Data::new(server_metrics.clone())) + .route( + "/metrics", + web::get().to(PaymentsPollerMetrics::metrics_handler), + ) + }) + .bind(("0.0.0.0", metrics_port)) + .expect("failed to bind metrics server") + .run() + .await; + }); + + Ok(metrics) + } + + async fn metrics_handler(metrics: web::Data>) -> impl Responder { + let encoder = TextEncoder::new(); + let metric_families = metrics.registry.gather(); + + let mut buffer = Vec::new(); + if let Err(e) = encoder.encode(&metric_families, &mut buffer) { + tracing::error!("could not encode prometheus metrics: {e}"); + } + + HttpResponse::Ok() + .insert_header(("Content-Type", encoder.format_type())) + .body(buffer) + } + + pub fn register_last_processed_block(&self, value: u64) { + self.last_processed_block.set(value as f64); + } +} diff --git a/aggregation_mode/payments_poller/src/payments.rs b/aggregation_mode/payments_poller/src/payments.rs index 78639e6176..005c56e7dd 100644 --- a/aggregation_mode/payments_poller/src/payments.rs +++ b/aggregation_mode/payments_poller/src/payments.rs @@ -1,8 +1,9 @@ -use std::str::FromStr; +use std::{str::FromStr, sync::Arc}; use crate::{ config::Config, db::Db, + metrics::PaymentsPollerMetrics, types::{AggregationModePaymentService, AggregationModePaymentServiceContract, RpcProvider}, }; use alloy::{ @@ -21,6 +22,7 @@ pub struct PaymentsPoller { proof_aggregation_service: AggregationModePaymentServiceContract, rpc_provider: RpcProvider, config: Config, + metrics: Arc, } impl PaymentsPoller { @@ -38,11 +40,19 @@ impl PaymentsPoller { .get_last_block_fetched() .map_err(|err| PaymentsPollerError::ReadLastBlockError(err.to_string())); + tracing::info!( + "Starting metrics server on port {}", + config.poller_metrics_port + ); + let metrics = PaymentsPollerMetrics::start(config.poller_metrics_port) + .expect("Failed to start metrics server"); + Ok(Self { db, proof_aggregation_service, rpc_provider, config, + metrics, }) } @@ -121,6 +131,8 @@ impl PaymentsPoller { continue; }; + self.metrics.register_last_processed_block(current_block); + tokio::time::sleep(std::time::Duration::from_secs( seconds_to_wait_between_polls, )) diff --git a/config-files/config-agg-mode-gateway-ethereum-package.yaml b/config-files/config-agg-mode-gateway-ethereum-package.yaml index a0a58b8758..045e1de830 100644 --- a/config-files/config-agg-mode-gateway-ethereum-package.yaml +++ b/config-files/config-agg-mode-gateway-ethereum-package.yaml @@ -6,3 +6,7 @@ payment_service_address: "0x922D6956C99E12DFeB3224DEA977D0939758A1Fe" network: "devnet" max_daily_proofs_per_user: 32 last_block_fetched_filepath: "config-files/proof-aggregator.last_block_fetched.json" + +# Metrics +gateway_metrics_port: 9094 +poller_metrics_port: 9095 diff --git a/config-files/config-agg-mode-gateway.yaml b/config-files/config-agg-mode-gateway.yaml index cab03d9793..014c6cc278 100644 --- a/config-files/config-agg-mode-gateway.yaml +++ b/config-files/config-agg-mode-gateway.yaml @@ -4,5 +4,9 @@ db_connection_url: "postgres://postgres:postgres@localhost:5435/" eth_rpc_url: "http://localhost:8545" payment_service_address: "0x922D6956C99E12DFeB3224DEA977D0939758A1Fe" network: "devnet" -max_daily_proofs_per_user: 4 +max_daily_proofs_per_user: 100 last_block_fetched_filepath: "config-files/proof-aggregator.last_block_fetched.json" + +# Metrics +gateway_metrics_port: 9094 +poller_metrics_port: 9095 diff --git a/grafana/provisioning/dashboards/aligned/aggregation_mode_gateway.json b/grafana/provisioning/dashboards/aligned/aggregation_mode_gateway.json new file mode 100644 index 0000000000..951d2179b9 --- /dev/null +++ b/grafana/provisioning/dashboards/aligned/aggregation_mode_gateway.json @@ -0,0 +1,607 @@ +{ + "annotations": { + "list": [ + { + "builtIn": 1, + "datasource": { + "type": "grafana", + "uid": "-- Grafana --" + }, + "enable": true, + "hide": true, + "iconColor": "rgba(0, 211, 255, 1)", + "name": "Annotations & Alerts", + "type": "dashboard" + } + ] + }, + "editable": true, + "fiscalYearStartMonth": 0, + "graphTooltip": 0, + "id": 8, + "links": [], + "liveNow": false, + "panels": [ + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 8, + "x": 0, + "y": 0 + }, + "id": 3, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "editorMode": "code", + "expr": "sum(\n rate(api_response_code{statuscode=~\"4..\"}[5m])\n)", + "instant": false, + "legendFormat": "__auto", + "range": true, + "refId": "A" + } + ], + "title": "User error response count", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 8, + "x": 8, + "y": 0 + }, + "id": 2, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "editorMode": "code", + "expr": "sum(\n rate(api_response_code{statuscode=~\"5..\"}[100m])\n)", + "instant": false, + "legendFormat": "__auto", + "range": true, + "refId": "A" + } + ], + "title": "Server Error response count", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 8, + "x": 16, + "y": 0 + }, + "id": 1, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "editorMode": "code", + "expr": "sum(\n rate(api_response_code{statuscode=~\"2..\",endpoint!=\"/metrics\"}[$__range])\n)", + "instant": false, + "legendFormat": "__auto", + "range": true, + "refId": "A" + } + ], + "title": "Success response count", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 8 + }, + "id": 4, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "editorMode": "code", + "expr": "histogram_quantile(0.50, sum by (le) (rate(time_elapsed_db_query_bucket{query=\"sp1-post\"}[$__rate_interval])))", + "instant": false, + "legendFormat": "__auto", + "range": true, + "refId": "A" + } + ], + "title": "DB posts latency (p50)", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 8 + }, + "id": 6, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "editorMode": "code", + "expr": "last_processed_block{}", + "instant": false, + "legendFormat": "__auto", + "range": true, + "refId": "A" + } + ], + "title": "Payments Poller Last Processed Block", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 16 + }, + "id": 5, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "editorMode": "code", + "expr": "sum(rate(time_elapsed_db_query_count[$__rate_interval]))", + "instant": false, + "legendFormat": "__auto", + "range": true, + "refId": "A" + } + ], + "title": "DB posts throughput", + "type": "timeseries" + } + ], + "refresh": "", + "schemaVersion": 38, + "style": "dark", + "tags": [], + "templating": { + "list": [] + }, + "time": { + "from": "now-5m", + "to": "now" + }, + "timepicker": {}, + "timezone": "", + "title": "Aggregation Mode", + "uid": "a66a5480-6a60-4b87-9d29-4f0f446edafd", + "version": 1, + "weekStart": "" +} diff --git a/prometheus/prometheus.yaml b/prometheus/prometheus.yaml index 7b84e52e39..3de7b120f9 100644 --- a/prometheus/prometheus.yaml +++ b/prometheus/prometheus.yaml @@ -37,3 +37,24 @@ scrape_configs: - targets: ["host.docker.internal:9100"] labels: bot: "node-exporter" + + - job_name: "gateway-api" + scrape_interval: 60s + static_configs: + - targets: ["host.docker.internal:8089"] + labels: + bot: "gateway-exporter" + + - job_name: "aggregation-mode-gateway" + scrape_interval: 1s + static_configs: + - targets: ["host.docker.internal:9094"] + labels: + bot: "aggregation-gateway" + + - job_name: "aggregation-mode-payments-poller" + scrape_interval: 1s + static_configs: + - targets: ["host.docker.internal:9095"] + labels: + bot: "aggregation-poller"