fedimint_server/
metrics.rs

1pub(crate) mod jsonrpsee;
2
3use std::sync::LazyLock;
4use std::time::Duration;
5
6use fedimint_core::backup::ClientBackupKeyPrefix;
7use fedimint_core::db::{Database, IDatabaseTransactionOpsCoreTyped};
8use fedimint_core::task::{TaskGroup, sleep};
9use fedimint_metrics::prometheus::{
10    HistogramVec, IntCounterVec, IntGauge, IntGaugeVec, register_histogram_vec_with_registry,
11    register_int_gauge_vec_with_registry, register_int_gauge_with_registry,
12};
13use fedimint_metrics::{
14    Histogram, REGISTRY, histogram_opts, opts, register_histogram_with_registry,
15    register_int_counter_vec_with_registry,
16};
17use futures::StreamExt as _;
18use tokio::sync::OnceCell;
19
20use crate::consensus::api::backup_statistics_static;
21
22const BACKUP_STATS_REFRESH_INTERVAL: Duration = Duration::from_secs(60);
23
24pub static TX_ELEMS_BUCKETS: LazyLock<Vec<f64>> = LazyLock::new(|| {
25    vec![
26        1.0, 2.0, 5.0, 10.0, 20.0, 50.0, 100.0, 200.0, 500.0, 1000.0, 2000.0, 5000.0,
27    ]
28});
29pub(crate) static CONSENSUS_TX_PROCESSED_INPUTS: LazyLock<Histogram> = LazyLock::new(|| {
30    register_histogram_with_registry!(
31        histogram_opts!(
32            "consensus_tx_processed_inputs",
33            "Number of inputs processed in a transaction",
34            TX_ELEMS_BUCKETS.clone()
35        ),
36        REGISTRY
37    )
38    .unwrap()
39});
40pub(crate) static CONSENSUS_TX_PROCESSED_OUTPUTS: LazyLock<Histogram> = LazyLock::new(|| {
41    register_histogram_with_registry!(
42        histogram_opts!(
43            "consensus_tx_processed_outputs",
44            "Number of outputs processed in a transaction",
45            TX_ELEMS_BUCKETS.clone()
46        ),
47        REGISTRY
48    )
49    .unwrap()
50});
51pub(crate) static CONSENSUS_ITEMS_PROCESSED_TOTAL: LazyLock<IntCounterVec> = LazyLock::new(|| {
52    register_int_counter_vec_with_registry!(
53        opts!(
54            "consensus_items_processed_total",
55            "Number of consensus items processed in the consensus",
56        ),
57        &["peer_id"],
58        REGISTRY
59    )
60    .unwrap()
61});
62pub(crate) static CONSENSUS_ITEM_PROCESSING_DURATION_SECONDS: LazyLock<HistogramVec> =
63    LazyLock::new(|| {
64        register_histogram_vec_with_registry!(
65            histogram_opts!(
66                "consensus_item_processing_duration_seconds",
67                "Duration of processing a consensus item",
68            ),
69            &["peer_id"],
70            REGISTRY
71        )
72        .unwrap()
73    });
74pub(crate) static CONSENSUS_ITEM_PROCESSING_MODULE_AUDIT_DURATION_SECONDS: LazyLock<HistogramVec> =
75    LazyLock::new(|| {
76        register_histogram_vec_with_registry!(
77            histogram_opts!(
78                "consensus_item_processing_module_audit_duration_seconds",
79                "Duration of processing a consensus item",
80            ),
81            &["module_id", "module_kind"],
82            REGISTRY
83        )
84        .unwrap()
85    });
86
87pub(crate) static CONSENSUS_ORDERING_LATENCY_SECONDS: LazyLock<Histogram> = LazyLock::new(|| {
88    register_histogram_with_registry!(
89        histogram_opts!(
90            "consensus_ordering_latency_seconds",
91            "Duration of ordering a batch of consensus items",
92        ),
93        REGISTRY
94    )
95    .unwrap()
96});
97
98pub(crate) static JSONRPC_API_REQUEST_DURATION_SECONDS: LazyLock<HistogramVec> =
99    LazyLock::new(|| {
100        register_histogram_vec_with_registry!(
101            histogram_opts!(
102                "jsonrpc_api_request_duration_seconds",
103                "Duration of processing an rpc request",
104            ),
105            &["method"],
106            REGISTRY
107        )
108        .unwrap()
109    });
110pub(crate) static JSONRPC_API_REQUEST_RESPONSE_CODE: LazyLock<IntCounterVec> =
111    LazyLock::new(|| {
112        register_int_counter_vec_with_registry!(
113            opts!(
114                "jsonrpc_api_request_response_code_total",
115                "Count of response counts and types",
116            ),
117            &["method", "code", "type"],
118            REGISTRY
119        )
120        .unwrap()
121    });
122pub(crate) static CONSENSUS_SESSION_COUNT: LazyLock<IntGauge> = LazyLock::new(|| {
123    register_int_gauge_with_registry!(
124        opts!(
125            "consensus_session_count",
126            "Fedimint consensus session count",
127        ),
128        REGISTRY
129    )
130    .unwrap()
131});
132pub(crate) static CONSENSUS_PEER_CONTRIBUTION_SESSION_IDX: LazyLock<IntGaugeVec> =
133    LazyLock::new(|| {
134        register_int_gauge_vec_with_registry!(
135            opts!(
136                "consensus_peer_contribution_session_idx",
137                "Latest contribution session idx by peer_id",
138            ),
139            &["self_id", "peer_id"],
140            REGISTRY
141        )
142        .unwrap()
143    });
144pub(crate) static BACKUP_WRITE_SIZE_BYTES: LazyLock<Histogram> = LazyLock::new(|| {
145    register_histogram_with_registry!(
146        histogram_opts!(
147            "backup_write_size_bytes",
148            "Size of every backup being written",
149            vec![
150                1.0, 10., 100., 1_000., 5_000., 10_000., 50_000., 100_000., 1_000_000.
151            ]
152        ),
153        REGISTRY
154    )
155    .unwrap()
156});
157pub(crate) static STORED_BACKUPS_COUNT: LazyLock<IntGauge> = LazyLock::new(|| {
158    register_int_gauge_with_registry!(
159        opts!("stored_backups_count", "Total amount of backups stored",),
160        REGISTRY
161    )
162    .unwrap()
163});
164
165pub(crate) static BACKUP_COUNTS: LazyLock<IntGaugeVec> = LazyLock::new(|| {
166    register_int_gauge_vec_with_registry!(
167        opts!(
168            "backup_counts",
169            "Backups refreshed at least once in a given timeframe",
170        ),
171        &["timeframe"],
172        REGISTRY
173    )
174    .unwrap()
175});
176
177pub(crate) static TOTAL_BACKUP_SIZE: LazyLock<IntGauge> = LazyLock::new(|| {
178    register_int_gauge_with_registry!(
179        opts!("total_backup_size", "Total size og backups in the DB",),
180        REGISTRY
181    )
182    .unwrap()
183});
184
185/// Lock for spawning exactly one task for updating backup related gauges that
186/// are computed fresh from DB regularly instead of being updated incrementally.
187static BACKUP_COUNTS_UPDATE_TASK: OnceCell<()> = OnceCell::const_new();
188
189pub(crate) static PEER_CONNECT_COUNT: LazyLock<IntCounterVec> = LazyLock::new(|| {
190    register_int_counter_vec_with_registry!(
191        opts!("peer_connect_total", "Number of times peer (re/)connected",),
192        &["self_id", "peer_id", "direction"],
193        REGISTRY
194    )
195    .unwrap()
196});
197pub(crate) static PEER_DISCONNECT_COUNT: LazyLock<IntCounterVec> = LazyLock::new(|| {
198    register_int_counter_vec_with_registry!(
199        opts!(
200            "peer_disconnect_total",
201            "Number of times peer (re/)connected",
202        ),
203        &["self_id", "peer_id"],
204        REGISTRY
205    )
206    .unwrap()
207});
208pub(crate) static PEER_MESSAGES_COUNT: LazyLock<IntCounterVec> = LazyLock::new(|| {
209    register_int_counter_vec_with_registry!(
210        opts!("peer_messages_total", "Messages with the peer",),
211        &["self_id", "peer_id", "direction"],
212        REGISTRY
213    )
214    .unwrap()
215});
216
217/// Initialize gauges or other metrics that need eager initialization on start,
218/// e.g. because they are triggered infrequently.
219pub(crate) async fn initialize_gauge_metrics(tg: &TaskGroup, db: &Database) {
220    STORED_BACKUPS_COUNT.set(
221        db.begin_transaction_nc()
222            .await
223            .find_by_prefix(&ClientBackupKeyPrefix)
224            .await
225            .count()
226            .await as i64,
227    );
228
229    let db_inner = db.clone();
230    BACKUP_COUNTS_UPDATE_TASK
231        .get_or_init(move || async move {
232            tg.spawn_cancellable("prometheus_backup_stats", async move {
233                loop {
234                    let backup_counts =
235                        backup_statistics_static(&mut db_inner.begin_transaction_nc().await).await;
236
237                    BACKUP_COUNTS.with_label_values(&["1d"]).set(
238                        backup_counts
239                            .refreshed_1d
240                            .try_into()
241                            .expect("u64 to i64 overflow"),
242                    );
243                    BACKUP_COUNTS.with_label_values(&["1w"]).set(
244                        backup_counts
245                            .refreshed_1w
246                            .try_into()
247                            .expect("u64 to i64 overflow"),
248                    );
249                    BACKUP_COUNTS.with_label_values(&["1m"]).set(
250                        backup_counts
251                            .refreshed_1m
252                            .try_into()
253                            .expect("u64 to i64 overflow"),
254                    );
255                    BACKUP_COUNTS.with_label_values(&["3m"]).set(
256                        backup_counts
257                            .refreshed_3m
258                            .try_into()
259                            .expect("u64 to i64 overflow"),
260                    );
261                    BACKUP_COUNTS.with_label_values(&["all_time"]).set(
262                        backup_counts
263                            .num_backups
264                            .try_into()
265                            .expect("u64 to i64 overflow"),
266                    );
267
268                    TOTAL_BACKUP_SIZE.set(
269                        backup_counts
270                            .total_size
271                            .try_into()
272                            .expect("u64 to i64 overflow"),
273                    );
274
275                    sleep(BACKUP_STATS_REFRESH_INTERVAL).await;
276                }
277            });
278        })
279        .await;
280}