Skip to content

Commit 0cf2482

Browse files
committed
gui: overview network stats
1 parent f214a04 commit 0cf2482

22 files changed

+555
-221
lines changed

book/api/metrics-generated.md

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -194,6 +194,7 @@
194194
|--------|------|-------------|
195195
| <span class="metrics-name">bundle_&#8203;transaction_&#8203;received</span> | counter | Total count of transactions received, including transactions within bundles |
196196
| <span class="metrics-name">bundle_&#8203;packet_&#8203;received</span> | counter | Total count of packets received |
197+
| <span class="metrics-name">bundle_&#8203;proto_&#8203;received_&#8203;bytes</span> | counter | Total count of bytes from received grpc protobuf payloads |
197198
| <span class="metrics-name">bundle_&#8203;bundle_&#8203;received</span> | counter | Total count of bundles received |
198199
| <span class="metrics-name">bundle_&#8203;errors</span><br/>{bundle_&#8203;error="<span class="metrics-enum">protobuf</span>"} | counter | Number of gRPC errors encountered (Protobuf decode/encode error) |
199200
| <span class="metrics-name">bundle_&#8203;errors</span><br/>{bundle_&#8203;error="<span class="metrics-enum">transport</span>"} | counter | Number of gRPC errors encountered (Transport error) |
@@ -450,7 +451,9 @@
450451
| <span class="metrics-name">shred_&#8203;force_&#8203;complete_&#8203;failure</span> | counter | The number of times we failed to force complete a FEC set on request |
451452
| <span class="metrics-name">shred_&#8203;force_&#8203;complete_&#8203;success</span> | counter | The number of times we successfully forced completed a FEC set on request |
452453
| <span class="metrics-name">shred_&#8203;shred_&#8203;out_&#8203;rcv</span> | counter | The number of times we received a repair shred |
453-
| <span class="metrics-name">shred_&#8203;shred_&#8203;turbine_&#8203;rcv</span> | counter | The number of times we received a turbine shred |
454+
| <span class="metrics-name">shred_&#8203;shred_&#8203;out_&#8203;rcv_&#8203;bytes</span> | counter | The number bytes received from network packets with repair shreds. Bytes include network headers. |
455+
| <span class="metrics-name">shred_&#8203;shred_&#8203;turbine_&#8203;rcv</span> | counter | The number bytes received from network packets with turbine shreds. Bytes include network headers. |
456+
| <span class="metrics-name">shred_&#8203;shred_&#8203;turbine_&#8203;rcv_&#8203;bytes</span> | counter | The number of bytes received from turbine shreds |
454457
| <span class="metrics-name">shred_&#8203;store_&#8203;insert_&#8203;wait</span> | histogram | Time in seconds spent waiting for the store to insert a new FEC set |
455458
| <span class="metrics-name">shred_&#8203;store_&#8203;insert_&#8203;work</span> | histogram | Time in seconds spent on inserting a new FEC set |
456459

@@ -483,6 +486,9 @@
483486
| Metric | Type | Description |
484487
|--------|------|-------------|
485488
| <span class="metrics-name">metric_&#8203;boot_&#8203;timestamp_&#8203;nanos</span> | gauge | Timestamp when validator was started (nanoseconds since epoch) |
489+
| <span class="metrics-name">metric_&#8203;connection_&#8203;count</span> | gauge | The number of active http connections to the Prometheus endpoint |
490+
| <span class="metrics-name">metric_&#8203;bytes_&#8203;written</span> | counter | The total number of bytes written to all responses on the Prometheus endpoint |
491+
| <span class="metrics-name">metric_&#8203;bytes_&#8203;read</span> | counter | The total number of bytes read from all requests to the Prometheus endpoint |
486492

487493
</div>
488494

@@ -492,7 +498,7 @@
492498

493499
| Metric | Type | Description |
494500
|--------|------|-------------|
495-
| <span class="metrics-name">gui_&#8203;connection_&#8203;count</span> | gauge | The number of active connections to the GUI service |
501+
| <span class="metrics-name">gui_&#8203;connection_&#8203;count</span> | gauge | The number of active http connections to the GUI service, excluding connections that have been upgraded to a WebSocket connection |
496502
| <span class="metrics-name">gui_&#8203;websocket_&#8203;connection_&#8203;count</span> | gauge | The number of active websocket connections to the GUI service |
497503
| <span class="metrics-name">gui_&#8203;websocket_&#8203;frames_&#8203;sent</span> | counter | The total number of websocket frames sent to all connections to the GUI service |
498504
| <span class="metrics-name">gui_&#8203;websocket_&#8203;frames_&#8203;received</span> | counter | The total number of websocket frames received from all connections to the GUI service |

book/api/websocket.md

Lines changed: 50 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -748,6 +748,56 @@ transactions per second.
748748
}
749749
```
750750

751+
#### `summary.live_network_metrics`
752+
| frequency | type | example |
753+
|-----------------|------------------|---------|
754+
| *Once* + *10ms* | `NetworkMetrics` | below |
755+
756+
Live network metrics provides a live view of network bandwidth
757+
utilization across the various protocols used in the client.
758+
759+
The `protocols` list contains various different protocols the client
760+
uses to communicate with the internet.
761+
762+
```json
763+
[
764+
"turbine",
765+
"gossip",
766+
"tpu",
767+
"repair",
768+
"metrics"
769+
]
770+
```
771+
772+
- turbine: the protocol used to disseminate blockchain data, which
773+
contains primarily executable transactions.
774+
- gossip: the protocol used to disseminate node metadata, including node
775+
IP addresses used to help nodes find each other on the network
776+
- tpu: "transaction processing unit", refers to the various subsystems
777+
in a client used to consume and forward incoming Solana transactions for
778+
their next leader slot.
779+
- repair: a client subsystem which requests any missing block data
780+
needed by the replay pipeline which may have been lost over the network
781+
- metrics: refers to the Firedancer metrics tile, which serves an http
782+
Prometheus metrics endpoint
783+
784+
```json
785+
{
786+
"topic": "summary",
787+
"key": "live_network_metrics",
788+
"value": {
789+
"ingress": [12345432, 5431234, 92345, ...],
790+
"egress": [12345432, 5431234, 92345, ...],
791+
}
792+
}
793+
```
794+
795+
**`NetworkMetrics`**
796+
| Field | Type | Description |
797+
|---------|------------|-------------|
798+
| ingress | `number[]` | `ingress[i]` is the total number of ingress network bytes for `protocols[i]` |
799+
| egress | `number[]` | `egress[i]` is the total number of egress network bytes for `protocols[i]` |
800+
751801
#### `summary.live_txn_waterfall`
752802
| frequency | type | example |
753803
|------------------|--------------------|---------|

src/disco/bundle/fd_bundle_client.c

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -799,6 +799,7 @@ fd_bundle_client_grpc_rx_msg(
799799
ulong request_ctx
800800
) {
801801
fd_bundle_tile_t * ctx = app_ctx;
802+
ctx->metrics.proto_received_bytes += protobuf_sz;
802803
pb_istream_t istream = pb_istream_from_buffer( protobuf, protobuf_sz );
803804
switch( request_ctx ) {
804805
case FD_BUNDLE_CLIENT_REQ_Auth_GenerateAuthChallenge:

src/disco/bundle/fd_bundle_tile.c

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -49,6 +49,7 @@ metrics_write( fd_bundle_tile_t * ctx ) {
4949
FD_MCNT_SET( BUNDLE, TRANSACTION_RECEIVED, ctx->metrics.txn_received_cnt );
5050
FD_MCNT_SET( BUNDLE, BUNDLE_RECEIVED, ctx->metrics.bundle_received_cnt );
5151
FD_MCNT_SET( BUNDLE, PACKET_RECEIVED, ctx->metrics.packet_received_cnt );
52+
FD_MCNT_SET( BUNDLE, PROTO_RECEIVED_BYTES, ctx->metrics.proto_received_bytes );
5253
FD_MCNT_SET( BUNDLE, SHREDSTREAM_HEARTBEATS, ctx->metrics.shredstream_heartbeat_cnt );
5354
FD_MCNT_SET( BUNDLE, KEEPALIVES, ctx->metrics.ping_ack_cnt );
5455
FD_MCNT_SET( BUNDLE, ERRORS_PROTOBUF, ctx->metrics.decode_fail_cnt );

src/disco/bundle/fd_bundle_tile_private.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,7 @@ struct fd_bundle_metrics {
3333
ulong txn_received_cnt;
3434
ulong bundle_received_cnt;
3535
ulong packet_received_cnt;
36+
ulong proto_received_bytes;
3637
ulong shredstream_heartbeat_cnt;
3738
ulong ping_ack_cnt;
3839

src/disco/gui/Local.mk

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
ifdef FD_HAS_INT128
2-
$(call add-hdrs,fd_gui.h fd_gui_printf.h fd_gui_peers.h fd_gui_config_parse.h)
2+
$(call add-hdrs,fd_gui.h fd_gui_printf.h fd_gui_peers.h fd_gui_config_parse.h fd_gui_metrics.h)
33
$(call add-objs,fd_gui fd_gui_printf fd_gui_peers fd_gui_config_parse fd_gui_tile generated/http_import_dist,fd_disco)
44
$(OBJDIR)/obj/disco/gui/fd_gui_tile.o: book/public/fire.svg
55
$(call make-unit-test,test_live_table,test_live_table,fd_disco fd_util)

src/disco/gui/fd_gui.c

Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
#include "fd_gui.h"
22
#include "fd_gui_peers.h"
33
#include "fd_gui_printf.h"
4+
#include "fd_gui_metrics.h"
45

56
#include "../metrics/fd_metrics.h"
67
#include "../plugin/fd_plugin.h"
@@ -397,6 +398,28 @@ fd_gui_estimated_tps_snap( fd_gui_t * gui ) {
397398
gui->summary.estimated_tps_history_idx = (gui->summary.estimated_tps_history_idx+1UL) % FD_GUI_TPS_HISTORY_SAMPLE_CNT;
398399
}
399400

401+
static void
402+
fd_gui_network_stats_snap( fd_gui_t * gui,
403+
fd_gui_network_stats_t * cur ) {
404+
fd_topo_t * topo = gui->topo;
405+
ulong gossvf_tile_cnt = fd_topo_tile_name_cnt( topo, "gossvf" );
406+
ulong gossip_tile_cnt = fd_topo_tile_name_cnt( topo, "gossip" );
407+
ulong shred_tile_cnt = fd_topo_tile_name_cnt( topo, "shred" );
408+
ulong net_tile_cnt = fd_topo_tile_name_cnt( topo, "net" );
409+
ulong quic_tile_cnt = fd_topo_tile_name_cnt( topo, "quic" );
410+
411+
cur->in.turbine = fd_gui_metrics_turbine_total_ingress_bytes( topo, shred_tile_cnt );
412+
cur->out.turbine = fd_gui_metrics_turbine_total_egress_bytes( topo, net_tile_cnt );
413+
cur->in.repair = fd_gui_metrics_repair_total_ingress_bytes( topo, shred_tile_cnt );
414+
cur->out.repair = fd_gui_metrics_repair_total_egress_bytes( topo, net_tile_cnt );
415+
cur->in.gossip = fd_gui_metrics_gossip_total_ingress_bytes( topo, gossvf_tile_cnt );
416+
cur->out.gossip = fd_gui_metrics_gosip_total_egress_bytes( topo, gossip_tile_cnt );
417+
cur->in.tpu = fd_gui_metrics_tpu_total_ingress_bytes( topo, quic_tile_cnt );
418+
cur->out.tpu = fd_gui_metrics_tpu_total_egress_bytes( topo, net_tile_cnt );
419+
cur->in.metric = fd_gui_metrics_metric_total_ingress_bytes( topo );
420+
cur->out.metric = fd_gui_metrics_metric_total_egress_bytes( topo );
421+
}
422+
400423
/* Snapshot all of the data from metrics to construct a view of the
401424
transaction waterfall.
402425
@@ -828,6 +851,10 @@ fd_gui_poll( fd_gui_t * gui, long now ) {
828851
fd_gui_printf_live_txn_waterfall( gui, gui->summary.txn_waterfall_reference, gui->summary.txn_waterfall_current, 0UL /* TODO: REAL NEXT LEADER SLOT */ );
829852
fd_http_server_ws_broadcast( gui->http );
830853

854+
fd_gui_network_stats_snap( gui, gui->summary.network_stats_current );
855+
fd_gui_printf_live_network_metrics( gui, gui->summary.network_stats_current );
856+
fd_http_server_ws_broadcast( gui->http );
857+
831858
*gui->summary.tile_stats_reference = *gui->summary.tile_stats_current;
832859
fd_gui_tile_stats_snap( gui, gui->summary.txn_waterfall_current, gui->summary.tile_stats_current, now );
833860
fd_gui_printf_live_tile_stats( gui, gui->summary.tile_stats_reference, gui->summary.tile_stats_current );

src/disco/gui/fd_gui.h

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -257,6 +257,19 @@ struct fd_gui_scheduler_counts {
257257

258258
typedef struct fd_gui_scheduler_counts fd_gui_scheduler_counts_t;
259259

260+
struct fd_gui_network_stats {
261+
/* total bytes accumulated */
262+
struct {
263+
ulong turbine;
264+
ulong gossip;
265+
ulong tpu;
266+
ulong repair;
267+
ulong metric;
268+
} in, out;
269+
};
270+
271+
typedef struct fd_gui_network_stats fd_gui_network_stats_t;
272+
260273
struct fd_gui_leader_slot {
261274
ulong slot;
262275
long leader_start_time; /* UNIX timestamp of when we first became leader in this slot */
@@ -633,6 +646,8 @@ struct fd_gui {
633646
ulong estimated_tps_history_idx;
634647
ulong estimated_tps_history[ FD_GUI_TPS_HISTORY_SAMPLE_CNT ][ 3UL ];
635648

649+
fd_gui_network_stats_t network_stats_current[ 1 ];
650+
636651
fd_gui_txn_waterfall_t txn_waterfall_reference[ 1 ];
637652
fd_gui_txn_waterfall_t txn_waterfall_current[ 1 ];
638653

0 commit comments

Comments
 (0)