Skip to content

Commit 958c1f8

Browse files
committed
feat(tower): txns from replay / resolv, rm verify in tower, stem burst
1 parent 2f14dfc commit 958c1f8

File tree

7 files changed

+190
-173
lines changed

7 files changed

+190
-173
lines changed

src/app/firedancer/config/default.toml

Lines changed: 10 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -1367,24 +1367,19 @@ user = ""
13671367

13681368
# The tower tile runs the fork choice and tower rules to determine
13691369
# both what block to vote on and what block to build our own leader
1370-
# blocks on top of.
1370+
# blocks on top of, as well as confirming blocks based on vote txns.
13711371
[tiles.tower]
1372-
# Firedancer can process at most this many slots without rooting
1373-
# in the consensus rules before it must begin evicting.
1372+
# Solana reaches consensus via replay, but can "cluster confirm"
1373+
# slots ahead of the replay tip by listening to vote txns from
1374+
# gossip or TPU. The larger max_lookahead_conf, the further
1375+
# ahead slots can be cluster confirmed before they are replayed.
13741376
#
1375-
# This is an estimate and should be set as generously as
1376-
# possible to allow for temporary outages such as network
1377-
# partitions. For example, the validator might get disconnected
1378-
# from part of the cluster due to data center issues. Roughly,
1379-
# the default of 4096 allows for 30 minutes without rooting.
1377+
# Specifically, tower will ignore gossip or TPU votes that are
1378+
# more than max_lookahead_conf slots ahead of the root.
13801379
#
1381-
# Specifically, tower will ignore gossip votes that exceed max
1382-
# unrooted slots ahead of the current root. Additionally, both
1383-
# fork choice and tower structures will OOM and cause Firedancer
1384-
# to exit if it needs to maintain more than max unrooted slots
1385-
# tower forks (TODO in the future Firedancer will instead
1386-
# gracefully degrade by evicting forks).
1387-
max_unrooted_slots = 4096
1380+
# Note max_lookahead_conf must be >= max_live_slots and
1381+
# Firedancer will ignore a value where this is not the case.
1382+
max_lookahead_conf = 4096
13881383

13891384
[tiles.send]
13901385
# The port the send tile uses for QUIC, to send votes and other

src/app/firedancer/topology.c

Lines changed: 14 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -371,6 +371,8 @@ fd_topo_initialize( config_t * config ) {
371371

372372
fd_topob_wksp( topo, "cswtch" );
373373

374+
fd_topob_wksp( topo, "exec_replay" );
375+
374376
if( FD_LIKELY( snapshots_enabled ) ) {
375377
fd_topob_wksp( topo, "snapct" );
376378
fd_topob_wksp( topo, "snapld" );
@@ -407,9 +409,7 @@ fd_topo_initialize( config_t * config ) {
407409

408410
#define FOR(cnt) for( ulong i=0UL; i<cnt; i++ )
409411

410-
/* TODO: Explain this .... USHORT_MAX is not dcache max */
411-
ulong pending_fec_shreds_depth = fd_ulong_min( fd_ulong_pow2_up( config->tiles.shred.max_pending_shred_sets * FD_REEDSOL_DATA_SHREDS_MAX ), USHORT_MAX + 1 /* dcache max */ );
412-
ulong max_unrooted_slots = config->tiles.tower.max_unrooted_slots;
412+
ulong shred_depth = 65536UL; /* from fdctl/topology.c shred_store link. MAKE SURE TO KEEP IN SYNC. */
413413

414414
/* topo, link_name, wksp_name, depth, mtu, burst */
415415
/**/ fd_topob_link( topo, "gossip_net", "net_gossip", 32768UL, FD_NET_MTU, 1UL );
@@ -481,14 +481,15 @@ fd_topo_initialize( config_t * config ) {
481481
/**/ fd_topob_link( topo, "send_sign", "send_sign", 128UL, FD_TXN_MTU, 1UL ); /* TODO: Depth probably doesn't need to be 128 */
482482
/**/ fd_topob_link( topo, "sign_send", "sign_send", 128UL, sizeof(fd_ed25519_sig_t), 1UL ); /* TODO: Depth probably doesn't need to be 128 */
483483

484-
FOR(shred_tile_cnt) fd_topob_link( topo, "shred_out", "shred_out", pending_fec_shreds_depth, FD_SHRED_OUT_MTU, 3UL ); /* TODO: Pretty sure burst of 3 is incorrect here */
485-
FOR(shred_tile_cnt) fd_topob_link( topo, "repair_shred", "shred_out", pending_fec_shreds_depth, sizeof(fd_ed25519_sig_t), 1UL ); /* TODO: Also pending_fec_shreds_depth? Seems wrong */
486-
/**/ fd_topob_link( topo, "tower_out", "tower_out", max_unrooted_slots, sizeof(fd_tower_msg_t), 1UL );
484+
FOR(shred_tile_cnt) fd_topob_link( topo, "shred_out", "shred_out", shred_depth, FD_SHRED_OUT_MTU, 3UL ); /* TODO: Pretty sure burst of 3 is incorrect here */
485+
FOR(shred_tile_cnt) fd_topob_link( topo, "repair_shred", "shred_out", shred_depth, sizeof(fd_ed25519_sig_t), 1UL );
486+
/**/ fd_topob_link( topo, "tower_out", "tower_out", 128UL, sizeof(fd_tower_msg_t), 3UL ); /* dup conf + cluster conf + slot_done */
487487
/**/ fd_topob_link( topo, "send_out", "send_out", 128UL, FD_TPU_RAW_MTU, 1UL );
488488

489489
fd_topob_link( topo, "replay_exec", "replay_exec", 16384UL, sizeof(fd_exec_task_msg_t), 1UL );
490490

491491
FOR(exec_tile_cnt) fd_topob_link( topo, "exec_sig", "exec_sig", 16384UL, 64UL, 1UL );
492+
FOR(exec_tile_cnt) fd_topob_link( topo, "exec_replay", "exec_replay", 16384UL, sizeof(fd_exec_task_done_msg_t), 1UL );
492493

493494
ushort parsed_tile_to_cpu[ FD_TILE_MAX ];
494495
/* Unassigned tiles will be floating, unless auto topology is enabled. */
@@ -670,8 +671,9 @@ fd_topo_initialize( config_t * config ) {
670671
/**/ fd_topob_tile_in ( topo, "replay", 0UL, "metric_in", "poh_replay", 0UL, FD_TOPOB_RELIABLE, FD_TOPOB_POLLED );
671672
FOR(exec_tile_cnt) fd_topob_tile_in ( topo, "exec", i, "metric_in", "replay_exec", 0UL, FD_TOPOB_RELIABLE, FD_TOPOB_POLLED );
672673
674+
/**/ fd_topob_tile_in ( topo, "tower", 0UL, "metric_in", "dedup_resolv", 0UL, FD_TOPOB_RELIABLE, FD_TOPOB_POLLED );
675+
/**/ fd_topob_tile_in ( topo, "tower", 0UL, "metric_in", "replay_exec", 0UL, FD_TOPOB_RELIABLE, FD_TOPOB_POLLED );
673676
/**/ fd_topob_tile_in ( topo, "tower", 0UL, "metric_in", "genesi_out", 0UL, FD_TOPOB_RELIABLE, FD_TOPOB_POLLED );
674-
/**/ fd_topob_tile_in ( topo, "tower", 0UL, "metric_in", "gossip_out", 0UL, FD_TOPOB_RELIABLE, FD_TOPOB_POLLED );
675677
/**/ fd_topob_tile_in ( topo, "tower", 0UL, "metric_in", "replay_out", 0UL, FD_TOPOB_RELIABLE, FD_TOPOB_POLLED );
676678
if( snapshots_enabled ) {
677679
fd_topob_tile_in ( topo, "tower", 0UL, "metric_in", "snapin_manif", 0UL, FD_TOPOB_RELIABLE, FD_TOPOB_POLLED );
@@ -726,6 +728,9 @@ fd_topo_initialize( config_t * config ) {
726728
FOR(exec_tile_cnt) fd_topob_tile_in ( topo, "dedup", 0UL, "metric_in", "exec_sig", i, FD_TOPOB_RELIABLE, FD_TOPOB_POLLED );
727729
FOR(exec_tile_cnt) fd_topob_tile_in ( topo, "pack", 0UL, "metric_in", "exec_sig", i, FD_TOPOB_RELIABLE, FD_TOPOB_POLLED );
728730
FOR(exec_tile_cnt) fd_topob_tile_out( topo, "exec", i, "exec_sig", i );
731+
FOR(exec_tile_cnt) fd_topob_tile_out( topo, "exec", i, "exec_replay", i );
732+
FOR(exec_tile_cnt) fd_topob_tile_in ( topo, "replay", 0UL, "metric_in", "exec_replay", i, FD_TOPOB_RELIABLE, FD_TOPOB_POLLED );
733+
729734

730735
if( FD_UNLIKELY( config->tiles.bundle.enabled ) ) {
731736
fd_topob_wksp( topo, "bundle_verif" );
@@ -838,11 +843,6 @@ fd_topo_initialize( config_t * config ) {
838843
fd_topob_tile_in( topo, "replay", 0UL, "metric_in", "rpc_replay", 0UL, FD_TOPOB_RELIABLE, FD_TOPOB_POLLED );
839844
}
840845

841-
fd_topob_wksp( topo, "exec_replay" );
842-
FOR(exec_tile_cnt) fd_topob_link( topo, "exec_replay", "exec_replay", 16384UL, sizeof(fd_exec_task_done_msg_t), 1UL );
843-
FOR(exec_tile_cnt) fd_topob_tile_out( topo, "exec", i, "exec_replay", i );
844-
FOR(exec_tile_cnt) fd_topob_tile_in( topo, "replay", 0UL, "metric_in", "exec_replay", i, FD_TOPOB_RELIABLE, FD_TOPOB_POLLED );
845-
846846
if( FD_LIKELY( !is_auto_affinity ) ) {
847847
if( FD_UNLIKELY( affinity_tile_cnt<topo->tile_cnt ) )
848848
FD_LOG_ERR(( "The topology you are using has %lu tiles, but the CPU affinity specified in the config tile as [layout.affinity] only provides for %lu cores. "
@@ -941,7 +941,6 @@ fd_topo_initialize( config_t * config ) {
941941
FOR(exec_tile_cnt) fd_topob_tile_uses( topo, &topo->tiles[ fd_topo_find_tile( topo, "exec", i ) ], bank_hash_cmp_obj, FD_SHMEM_JOIN_MODE_READ_WRITE );
942942
FD_TEST( fd_pod_insertf_ulong( topo->props, bank_hash_cmp_obj->id, "bh_cmp" ) );
943943

944-
ulong shred_depth = 65536UL; /* from fdctl/topology.c shred_store link. MAKE SURE TO KEEP IN SYNC. */
945944
ulong fec_set_cnt = shred_depth + config->tiles.shred.max_pending_shred_sets + 4UL;
946945
ulong fec_sets_sz = fec_set_cnt*sizeof(fd_shred34_t)*4; /* mirrors # of dcache entires in frankendancer */
947946
fd_topo_obj_t * fec_sets_obj = setup_topo_fec_sets( topo, "fec_sets", shred_tile_cnt*fec_sets_sz );
@@ -1209,7 +1208,8 @@ fd_topo_configure_tile( fd_topo_tile_t * tile,
12091208

12101209
} else if( FD_UNLIKELY( !strcmp( tile->name, "tower" ) ) ) {
12111210

1212-
tile->tower.slot_max = config->tiles.tower.max_unrooted_slots;
1211+
tile->tower.max_live_slots = config->firedancer.runtime.max_live_slots;
1212+
tile->tower.max_lookahead_conf = config->tiles.tower.max_lookahead_conf;
12131213
strncpy( tile->tower.identity_key, config->paths.identity_key, sizeof(tile->tower.identity_key) );
12141214
strncpy( tile->tower.vote_account, config->paths.vote_account, sizeof(tile->tower.vote_account) );
12151215
strncpy( tile->tower.base_path, config->paths.base, sizeof(tile->tower.base_path) );

src/app/shared/fd_config.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -482,7 +482,7 @@ struct fd_config {
482482
} shredcap;
483483

484484
struct {
485-
ulong max_unrooted_slots;
485+
ulong max_lookahead_conf;
486486
} tower;
487487

488488
} tiles;

src/app/shared/fd_config_parse.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -255,7 +255,7 @@ fd_config_extract_pod( uchar * pod,
255255

256256
CFG_POP ( ushort, tiles.send.send_src_port );
257257

258-
CFG_POP ( ulong, tiles.tower.max_unrooted_slots );
258+
CFG_POP ( ulong, tiles.tower.max_lookahead_conf );
259259

260260
CFG_POP ( bool, tiles.archiver.enabled );
261261
CFG_POP ( ulong, tiles.archiver.end_slot );

src/disco/topo/fd_topo.h

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -491,7 +491,8 @@ struct fd_topo_tile {
491491
} archiver;
492492

493493
struct {
494-
ulong slot_max;
494+
ulong max_live_slots;
495+
ulong max_lookahead_conf;
495496
char identity_key[ PATH_MAX ];
496497
char vote_account[ PATH_MAX ];
497498
char base_path[PATH_MAX];
@@ -575,7 +576,6 @@ struct fd_topo_tile {
575576

576577
struct {
577578
ulong max_live_slots;
578-
579579
ulong txncache_obj_id;
580580
ulong funk_obj_id;
581581
ulong progcache_obj_id;

src/discof/replay/fd_replay_tile.c

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -2392,10 +2392,8 @@ returnable_frag( fd_replay_tile_t * ctx,
23922392
/* Implement replay plugin API here */
23932393

23942394
switch( msg->kind ) {
2395-
case FD_TOWER_SLOT_CONFIRMED_OPTIMISTIC:
2396-
break;
2397-
case FD_TOWER_SLOT_CONFIRMED_ROOTED:
2398-
break;
2395+
case FD_TOWER_SLOT_CONFIRMED_OPTIMISTIC: break;
2396+
case FD_TOWER_SLOT_CONFIRMED_ROOTED: break;
23992397
}
24002398
};
24012399
break;

0 commit comments

Comments
 (0)