Skip to main content

fedimint_server/
metrics.rs

1pub(crate) mod jsonrpsee;
2
3use std::sync::LazyLock;
4use std::time::Duration;
5
6use fedimint_core::backup::ClientBackupKeyPrefix;
7use fedimint_core::db::{Database, IDatabaseTransactionOpsCoreTyped};
8use fedimint_core::task::{TaskGroup, sleep};
9use fedimint_metrics::prometheus::{
10    HistogramVec, IntCounterVec, IntGauge, IntGaugeVec, register_histogram_vec_with_registry,
11    register_int_gauge_vec_with_registry, register_int_gauge_with_registry,
12};
13use fedimint_metrics::{
14    Histogram, REGISTRY, histogram_opts, opts, register_histogram_with_registry,
15    register_int_counter_vec_with_registry,
16};
17use futures::StreamExt as _;
18use tokio::sync::OnceCell;
19
20use crate::consensus::api::backup_statistics_static;
21
22const BACKUP_STATS_REFRESH_INTERVAL: Duration = Duration::from_mins(1);
23
24pub static TX_ELEMS_BUCKETS: LazyLock<Vec<f64>> = LazyLock::new(|| {
25    vec![
26        1.0, 2.0, 5.0, 10.0, 20.0, 50.0, 100.0, 200.0, 500.0, 1000.0, 2000.0, 5000.0,
27    ]
28});
29pub(crate) static CONSENSUS_TX_PROCESSED_INPUTS: LazyLock<Histogram> = LazyLock::new(|| {
30    register_histogram_with_registry!(
31        histogram_opts!(
32            "consensus_tx_processed_inputs",
33            "Number of inputs processed in a transaction",
34            TX_ELEMS_BUCKETS.clone()
35        ),
36        REGISTRY
37    )
38    .unwrap()
39});
40pub(crate) static CONSENSUS_TX_PROCESSED_OUTPUTS: LazyLock<Histogram> = LazyLock::new(|| {
41    register_histogram_with_registry!(
42        histogram_opts!(
43            "consensus_tx_processed_outputs",
44            "Number of outputs processed in a transaction",
45            TX_ELEMS_BUCKETS.clone()
46        ),
47        REGISTRY
48    )
49    .unwrap()
50});
51pub(crate) static CONSENSUS_ITEMS_PROCESSED_TOTAL: LazyLock<IntCounterVec> = LazyLock::new(|| {
52    register_int_counter_vec_with_registry!(
53        opts!(
54            "consensus_items_processed_total",
55            "Number of consensus items processed in the consensus",
56        ),
57        &["peer_id"],
58        REGISTRY
59    )
60    .unwrap()
61});
62pub(crate) static CONSENSUS_ITEM_PROCESSING_DURATION_SECONDS: LazyLock<HistogramVec> =
63    LazyLock::new(|| {
64        register_histogram_vec_with_registry!(
65            histogram_opts!(
66                "consensus_item_processing_duration_seconds",
67                "Duration of processing a consensus item",
68            ),
69            &["peer_id"],
70            REGISTRY
71        )
72        .unwrap()
73    });
74pub(crate) static CONSENSUS_ITEM_PROCESSING_MODULE_AUDIT_DURATION_SECONDS: LazyLock<HistogramVec> =
75    LazyLock::new(|| {
76        register_histogram_vec_with_registry!(
77            histogram_opts!(
78                "consensus_item_processing_module_audit_duration_seconds",
79                "Duration of processing a consensus item",
80            ),
81            &["module_id", "module_kind"],
82            REGISTRY
83        )
84        .unwrap()
85    });
86
87pub(crate) static CONSENSUS_ORDERING_LATENCY_SECONDS: LazyLock<Histogram> = LazyLock::new(|| {
88    register_histogram_with_registry!(
89        histogram_opts!(
90            "consensus_ordering_latency_seconds",
91            "Duration of ordering a batch of consensus items",
92        ),
93        REGISTRY
94    )
95    .unwrap()
96});
97
98pub(crate) static IROH_API_CONNECTIONS_ACTIVE: LazyLock<IntGauge> = LazyLock::new(|| {
99    register_int_gauge_with_registry!(
100        opts!(
101            "iroh_api_connections_active",
102            "Number of currently active iroh API connections",
103        ),
104        REGISTRY
105    )
106    .unwrap()
107});
108
109pub(crate) static IROH_API_CONNECTION_DURATION_SECONDS: LazyLock<Histogram> = LazyLock::new(|| {
110    register_histogram_with_registry!(
111        histogram_opts!(
112            "iroh_api_connection_duration_seconds",
113            "Duration of iroh API connections",
114        ),
115        REGISTRY
116    )
117    .unwrap()
118});
119
120pub(crate) static IROH_API_REQUEST_DURATION_SECONDS: LazyLock<HistogramVec> = LazyLock::new(|| {
121    register_histogram_vec_with_registry!(
122        histogram_opts!(
123            "iroh_api_request_duration_seconds",
124            "Duration of processing an iroh API request",
125        ),
126        &["method"],
127        REGISTRY
128    )
129    .unwrap()
130});
131
132pub(crate) static JSONRPC_API_REQUEST_DURATION_SECONDS: LazyLock<HistogramVec> =
133    LazyLock::new(|| {
134        register_histogram_vec_with_registry!(
135            histogram_opts!(
136                "jsonrpc_api_request_duration_seconds",
137                "Duration of processing an rpc request",
138            ),
139            &["method"],
140            REGISTRY
141        )
142        .unwrap()
143    });
144pub(crate) static JSONRPC_API_REQUEST_RESPONSE_CODE: LazyLock<IntCounterVec> =
145    LazyLock::new(|| {
146        register_int_counter_vec_with_registry!(
147            opts!(
148                "jsonrpc_api_request_response_code_total",
149                "Count of response counts and types",
150            ),
151            &["method", "code", "type"],
152            REGISTRY
153        )
154        .unwrap()
155    });
156pub(crate) static CONSENSUS_SESSION_COUNT: LazyLock<IntGauge> = LazyLock::new(|| {
157    register_int_gauge_with_registry!(
158        opts!(
159            "consensus_session_count",
160            "Fedimint consensus session count",
161        ),
162        REGISTRY
163    )
164    .unwrap()
165});
166pub(crate) static CONSENSUS_PEER_CONTRIBUTION_SESSION_IDX: LazyLock<IntGaugeVec> =
167    LazyLock::new(|| {
168        register_int_gauge_vec_with_registry!(
169            opts!(
170                "consensus_peer_contribution_session_idx",
171                "Latest contribution session idx by peer_id",
172            ),
173            &["self_id", "peer_id"],
174            REGISTRY
175        )
176        .unwrap()
177    });
178pub(crate) static BACKUP_WRITE_SIZE_BYTES: LazyLock<Histogram> = LazyLock::new(|| {
179    register_histogram_with_registry!(
180        histogram_opts!(
181            "backup_write_size_bytes",
182            "Size of every backup being written",
183            vec![
184                1.0, 10., 100., 1_000., 5_000., 10_000., 50_000., 100_000., 1_000_000.
185            ]
186        ),
187        REGISTRY
188    )
189    .unwrap()
190});
191pub(crate) static STORED_BACKUPS_COUNT: LazyLock<IntGauge> = LazyLock::new(|| {
192    register_int_gauge_with_registry!(
193        opts!("stored_backups_count", "Total amount of backups stored",),
194        REGISTRY
195    )
196    .unwrap()
197});
198
199pub(crate) static BACKUP_COUNTS: LazyLock<IntGaugeVec> = LazyLock::new(|| {
200    register_int_gauge_vec_with_registry!(
201        opts!(
202            "backup_counts",
203            "Backups refreshed at least once in a given timeframe",
204        ),
205        &["timeframe"],
206        REGISTRY
207    )
208    .unwrap()
209});
210
211pub(crate) static TOTAL_BACKUP_SIZE: LazyLock<IntGauge> = LazyLock::new(|| {
212    register_int_gauge_with_registry!(
213        opts!("total_backup_size", "Total size og backups in the DB",),
214        REGISTRY
215    )
216    .unwrap()
217});
218
219/// Lock for spawning exactly one task for updating backup related gauges that
220/// are computed fresh from DB regularly instead of being updated incrementally.
221static BACKUP_COUNTS_UPDATE_TASK: OnceCell<()> = OnceCell::const_new();
222
223pub(crate) static PEER_CONNECT_COUNT: LazyLock<IntCounterVec> = LazyLock::new(|| {
224    register_int_counter_vec_with_registry!(
225        opts!("peer_connect_total", "Number of times peer (re/)connected",),
226        &["self_id", "peer_id", "direction"],
227        REGISTRY
228    )
229    .unwrap()
230});
231pub(crate) static PEER_DISCONNECT_COUNT: LazyLock<IntCounterVec> = LazyLock::new(|| {
232    register_int_counter_vec_with_registry!(
233        opts!(
234            "peer_disconnect_total",
235            "Number of times peer (re/)connected",
236        ),
237        &["self_id", "peer_id"],
238        REGISTRY
239    )
240    .unwrap()
241});
242pub(crate) static PEER_MESSAGES_COUNT: LazyLock<IntCounterVec> = LazyLock::new(|| {
243    register_int_counter_vec_with_registry!(
244        opts!("peer_messages_total", "Messages with the peer",),
245        &["self_id", "peer_id", "direction"],
246        REGISTRY
247    )
248    .unwrap()
249});
250
251/// Initialize gauges or other metrics that need eager initialization on start,
252/// e.g. because they are triggered infrequently.
253pub(crate) async fn initialize_gauge_metrics(tg: &TaskGroup, db: &Database) {
254    STORED_BACKUPS_COUNT.set(
255        db.begin_transaction_nc()
256            .await
257            .find_by_prefix(&ClientBackupKeyPrefix)
258            .await
259            .count()
260            .await as i64,
261    );
262
263    let db_inner = db.clone();
264    BACKUP_COUNTS_UPDATE_TASK
265        .get_or_init(move || async move {
266            tg.spawn_cancellable("prometheus_backup_stats", async move {
267                loop {
268                    let backup_counts =
269                        backup_statistics_static(&mut db_inner.begin_transaction_nc().await).await;
270
271                    BACKUP_COUNTS.with_label_values(&["1d"]).set(
272                        backup_counts
273                            .refreshed_1d
274                            .try_into()
275                            .expect("u64 to i64 overflow"),
276                    );
277                    BACKUP_COUNTS.with_label_values(&["1w"]).set(
278                        backup_counts
279                            .refreshed_1w
280                            .try_into()
281                            .expect("u64 to i64 overflow"),
282                    );
283                    BACKUP_COUNTS.with_label_values(&["1m"]).set(
284                        backup_counts
285                            .refreshed_1m
286                            .try_into()
287                            .expect("u64 to i64 overflow"),
288                    );
289                    BACKUP_COUNTS.with_label_values(&["3m"]).set(
290                        backup_counts
291                            .refreshed_3m
292                            .try_into()
293                            .expect("u64 to i64 overflow"),
294                    );
295                    BACKUP_COUNTS.with_label_values(&["all_time"]).set(
296                        backup_counts
297                            .num_backups
298                            .try_into()
299                            .expect("u64 to i64 overflow"),
300                    );
301
302                    TOTAL_BACKUP_SIZE.set(
303                        backup_counts
304                            .total_size
305                            .try_into()
306                            .expect("u64 to i64 overflow"),
307                    );
308
309                    sleep(BACKUP_STATS_REFRESH_INTERVAL).await;
310                }
311            });
312        })
313        .await;
314}