1pub(crate) mod jsonrpsee;
2
3use std::sync::LazyLock;
4use std::time::Duration;
5
6use fedimint_core::backup::ClientBackupKeyPrefix;
7use fedimint_core::db::{Database, IDatabaseTransactionOpsCoreTyped};
8use fedimint_core::task::{TaskGroup, sleep};
9use fedimint_metrics::prometheus::{
10 HistogramVec, IntCounterVec, IntGauge, IntGaugeVec, register_histogram_vec_with_registry,
11 register_int_gauge_vec_with_registry, register_int_gauge_with_registry,
12};
13use fedimint_metrics::{
14 Histogram, REGISTRY, histogram_opts, opts, register_histogram_with_registry,
15 register_int_counter_vec_with_registry,
16};
17use futures::StreamExt as _;
18use tokio::sync::OnceCell;
19
20use crate::consensus::api::backup_statistics_static;
21
22const BACKUP_STATS_REFRESH_INTERVAL: Duration = Duration::from_mins(1);
23
24pub static TX_ELEMS_BUCKETS: LazyLock<Vec<f64>> = LazyLock::new(|| {
25 vec![
26 1.0, 2.0, 5.0, 10.0, 20.0, 50.0, 100.0, 200.0, 500.0, 1000.0, 2000.0, 5000.0,
27 ]
28});
29pub(crate) static CONSENSUS_TX_PROCESSED_INPUTS: LazyLock<Histogram> = LazyLock::new(|| {
30 register_histogram_with_registry!(
31 histogram_opts!(
32 "consensus_tx_processed_inputs",
33 "Number of inputs processed in a transaction",
34 TX_ELEMS_BUCKETS.clone()
35 ),
36 REGISTRY
37 )
38 .unwrap()
39});
40pub(crate) static CONSENSUS_TX_PROCESSED_OUTPUTS: LazyLock<Histogram> = LazyLock::new(|| {
41 register_histogram_with_registry!(
42 histogram_opts!(
43 "consensus_tx_processed_outputs",
44 "Number of outputs processed in a transaction",
45 TX_ELEMS_BUCKETS.clone()
46 ),
47 REGISTRY
48 )
49 .unwrap()
50});
51pub(crate) static CONSENSUS_ITEMS_PROCESSED_TOTAL: LazyLock<IntCounterVec> = LazyLock::new(|| {
52 register_int_counter_vec_with_registry!(
53 opts!(
54 "consensus_items_processed_total",
55 "Number of consensus items processed in the consensus",
56 ),
57 &["peer_id"],
58 REGISTRY
59 )
60 .unwrap()
61});
62pub(crate) static CONSENSUS_ITEM_PROCESSING_DURATION_SECONDS: LazyLock<HistogramVec> =
63 LazyLock::new(|| {
64 register_histogram_vec_with_registry!(
65 histogram_opts!(
66 "consensus_item_processing_duration_seconds",
67 "Duration of processing a consensus item",
68 ),
69 &["peer_id"],
70 REGISTRY
71 )
72 .unwrap()
73 });
74pub(crate) static CONSENSUS_ITEM_PROCESSING_MODULE_AUDIT_DURATION_SECONDS: LazyLock<HistogramVec> =
75 LazyLock::new(|| {
76 register_histogram_vec_with_registry!(
77 histogram_opts!(
78 "consensus_item_processing_module_audit_duration_seconds",
79 "Duration of processing a consensus item",
80 ),
81 &["module_id", "module_kind"],
82 REGISTRY
83 )
84 .unwrap()
85 });
86
87pub(crate) static CONSENSUS_ORDERING_LATENCY_SECONDS: LazyLock<Histogram> = LazyLock::new(|| {
88 register_histogram_with_registry!(
89 histogram_opts!(
90 "consensus_ordering_latency_seconds",
91 "Duration of ordering a batch of consensus items",
92 ),
93 REGISTRY
94 )
95 .unwrap()
96});
97
98pub(crate) static IROH_API_CONNECTIONS_ACTIVE: LazyLock<IntGauge> = LazyLock::new(|| {
99 register_int_gauge_with_registry!(
100 opts!(
101 "iroh_api_connections_active",
102 "Number of currently active iroh API connections",
103 ),
104 REGISTRY
105 )
106 .unwrap()
107});
108
109pub(crate) static IROH_API_CONNECTION_DURATION_SECONDS: LazyLock<Histogram> = LazyLock::new(|| {
110 register_histogram_with_registry!(
111 histogram_opts!(
112 "iroh_api_connection_duration_seconds",
113 "Duration of iroh API connections",
114 ),
115 REGISTRY
116 )
117 .unwrap()
118});
119
120pub(crate) static IROH_API_REQUEST_DURATION_SECONDS: LazyLock<HistogramVec> = LazyLock::new(|| {
121 register_histogram_vec_with_registry!(
122 histogram_opts!(
123 "iroh_api_request_duration_seconds",
124 "Duration of processing an iroh API request",
125 ),
126 &["method"],
127 REGISTRY
128 )
129 .unwrap()
130});
131
132pub(crate) static JSONRPC_API_REQUEST_DURATION_SECONDS: LazyLock<HistogramVec> =
133 LazyLock::new(|| {
134 register_histogram_vec_with_registry!(
135 histogram_opts!(
136 "jsonrpc_api_request_duration_seconds",
137 "Duration of processing an rpc request",
138 ),
139 &["method"],
140 REGISTRY
141 )
142 .unwrap()
143 });
144pub(crate) static JSONRPC_API_REQUEST_RESPONSE_CODE: LazyLock<IntCounterVec> =
145 LazyLock::new(|| {
146 register_int_counter_vec_with_registry!(
147 opts!(
148 "jsonrpc_api_request_response_code_total",
149 "Count of response counts and types",
150 ),
151 &["method", "code", "type"],
152 REGISTRY
153 )
154 .unwrap()
155 });
156pub(crate) static CONSENSUS_SESSION_COUNT: LazyLock<IntGauge> = LazyLock::new(|| {
157 register_int_gauge_with_registry!(
158 opts!(
159 "consensus_session_count",
160 "Fedimint consensus session count",
161 ),
162 REGISTRY
163 )
164 .unwrap()
165});
166pub(crate) static CONSENSUS_PEER_CONTRIBUTION_SESSION_IDX: LazyLock<IntGaugeVec> =
167 LazyLock::new(|| {
168 register_int_gauge_vec_with_registry!(
169 opts!(
170 "consensus_peer_contribution_session_idx",
171 "Latest contribution session idx by peer_id",
172 ),
173 &["self_id", "peer_id"],
174 REGISTRY
175 )
176 .unwrap()
177 });
178pub(crate) static BACKUP_WRITE_SIZE_BYTES: LazyLock<Histogram> = LazyLock::new(|| {
179 register_histogram_with_registry!(
180 histogram_opts!(
181 "backup_write_size_bytes",
182 "Size of every backup being written",
183 vec![
184 1.0, 10., 100., 1_000., 5_000., 10_000., 50_000., 100_000., 1_000_000.
185 ]
186 ),
187 REGISTRY
188 )
189 .unwrap()
190});
191pub(crate) static STORED_BACKUPS_COUNT: LazyLock<IntGauge> = LazyLock::new(|| {
192 register_int_gauge_with_registry!(
193 opts!("stored_backups_count", "Total amount of backups stored",),
194 REGISTRY
195 )
196 .unwrap()
197});
198
199pub(crate) static BACKUP_COUNTS: LazyLock<IntGaugeVec> = LazyLock::new(|| {
200 register_int_gauge_vec_with_registry!(
201 opts!(
202 "backup_counts",
203 "Backups refreshed at least once in a given timeframe",
204 ),
205 &["timeframe"],
206 REGISTRY
207 )
208 .unwrap()
209});
210
211pub(crate) static TOTAL_BACKUP_SIZE: LazyLock<IntGauge> = LazyLock::new(|| {
212 register_int_gauge_with_registry!(
213 opts!("total_backup_size", "Total size og backups in the DB",),
214 REGISTRY
215 )
216 .unwrap()
217});
218
219static BACKUP_COUNTS_UPDATE_TASK: OnceCell<()> = OnceCell::const_new();
222
223pub(crate) static PEER_CONNECT_COUNT: LazyLock<IntCounterVec> = LazyLock::new(|| {
224 register_int_counter_vec_with_registry!(
225 opts!("peer_connect_total", "Number of times peer (re/)connected",),
226 &["self_id", "peer_id", "direction"],
227 REGISTRY
228 )
229 .unwrap()
230});
231pub(crate) static PEER_DISCONNECT_COUNT: LazyLock<IntCounterVec> = LazyLock::new(|| {
232 register_int_counter_vec_with_registry!(
233 opts!(
234 "peer_disconnect_total",
235 "Number of times peer (re/)connected",
236 ),
237 &["self_id", "peer_id"],
238 REGISTRY
239 )
240 .unwrap()
241});
242pub(crate) static PEER_MESSAGES_COUNT: LazyLock<IntCounterVec> = LazyLock::new(|| {
243 register_int_counter_vec_with_registry!(
244 opts!("peer_messages_total", "Messages with the peer",),
245 &["self_id", "peer_id", "direction"],
246 REGISTRY
247 )
248 .unwrap()
249});
250
251pub(crate) async fn initialize_gauge_metrics(tg: &TaskGroup, db: &Database) {
254 STORED_BACKUPS_COUNT.set(
255 db.begin_transaction_nc()
256 .await
257 .find_by_prefix(&ClientBackupKeyPrefix)
258 .await
259 .count()
260 .await as i64,
261 );
262
263 let db_inner = db.clone();
264 BACKUP_COUNTS_UPDATE_TASK
265 .get_or_init(move || async move {
266 tg.spawn_cancellable("prometheus_backup_stats", async move {
267 loop {
268 let backup_counts =
269 backup_statistics_static(&mut db_inner.begin_transaction_nc().await).await;
270
271 BACKUP_COUNTS.with_label_values(&["1d"]).set(
272 backup_counts
273 .refreshed_1d
274 .try_into()
275 .expect("u64 to i64 overflow"),
276 );
277 BACKUP_COUNTS.with_label_values(&["1w"]).set(
278 backup_counts
279 .refreshed_1w
280 .try_into()
281 .expect("u64 to i64 overflow"),
282 );
283 BACKUP_COUNTS.with_label_values(&["1m"]).set(
284 backup_counts
285 .refreshed_1m
286 .try_into()
287 .expect("u64 to i64 overflow"),
288 );
289 BACKUP_COUNTS.with_label_values(&["3m"]).set(
290 backup_counts
291 .refreshed_3m
292 .try_into()
293 .expect("u64 to i64 overflow"),
294 );
295 BACKUP_COUNTS.with_label_values(&["all_time"]).set(
296 backup_counts
297 .num_backups
298 .try_into()
299 .expect("u64 to i64 overflow"),
300 );
301
302 TOTAL_BACKUP_SIZE.set(
303 backup_counts
304 .total_size
305 .try_into()
306 .expect("u64 to i64 overflow"),
307 );
308
309 sleep(BACKUP_STATS_REFRESH_INTERVAL).await;
310 }
311 });
312 })
313 .await;
314}