Skip to content

Commit dd00bbf

Browse files
authored
Optimize SONIC configuration generation with global caches (#1779)
Further optimize SONIC configuration generation by implementing global caching strategies for metalbox devices and VIP addresses, and simplifying peer type calculation. Changes in config_generator.py: 1. Metalbox Global Cache: - Add _metalbox_devices_cache global variable - Create _load_metalbox_devices_cache() to bulk fetch all metalbox devices, interfaces, and IPs at sync start - Modify _get_metalbox_ip_for_device() to use pre-loaded cache - Performance: O(N × M × K) → O(1) lookups 2. Peer Type Optimization: - Simplify _determine_peer_type() to use existing device_as_mapping - Eliminate redundant spine/superspine device queries and group calculations - Performance: Removed O(N) queries per peer type check Changes in connections.py: 3. VIP Addresses Global Cache: - Add _vip_addresses_cache global variable - Create load_vip_addresses_cache() to bulk fetch all VIP addresses at sync start - Create clear_vip_addresses_cache() for cache cleanup - Modify get_connected_interface_ipv4_address() to use cached VIPs - Performance: O(N × P) → O(1) lookups Changes in sync.py: 4. Cache Integration: - Call _load_metalbox_devices_cache() after cache clearing - Call load_vip_addresses_cache() after cache clearing - Add clear_vip_addresses_cache() to cleanup process All function signatures preserved. Error handling and logging patterns maintained. AI-assisted: Claude Code Signed-off-by: Christian Berendt <[email protected]>
1 parent b5a8757 commit dd00bbf

File tree

3 files changed

+190
-91
lines changed

3 files changed

+190
-91
lines changed

osism/tasks/conductor/sonic/config_generator.py

Lines changed: 132 additions & 83 deletions
Original file line numberDiff line numberDiff line change
@@ -41,6 +41,9 @@
4141
# Global cache for metalbox IPs per device to avoid duplicate lookups
4242
_metalbox_ip_cache: dict[int, Optional[str]] = {}
4343

44+
# Global cache for all metalbox devices with their interfaces and IPs
45+
_metalbox_devices_cache: Optional[dict] = None
46+
4447

4548
def natural_sort_key(port_name):
4649
"""Extract numeric part from port name for natural sorting."""
@@ -1076,40 +1079,12 @@ def _determine_peer_type(local_device, connected_device, device_as_mapping=None)
10761079
connected_as = None
10771080
if device_as_mapping and connected_device.id in device_as_mapping:
10781081
connected_as = device_as_mapping[connected_device.id]
1079-
else:
1080-
# If connected device is not in device_as_mapping, check if it's a spine/superspine
1081-
# and calculate AS for its group
1082-
if connected_device.role and connected_device.role.slug in [
1083-
"spine",
1084-
"superspine",
1085-
]:
1086-
# Import here to avoid circular imports
1087-
from .bgp import calculate_minimum_as_for_group
1088-
from .connections import find_interconnected_devices
1089-
1090-
# Get all devices to find the group
1091-
all_devices = list(
1092-
utils.nb.dcim.devices.filter(role=["spine", "superspine"])
1093-
)
1094-
spine_groups = find_interconnected_devices(
1095-
all_devices, ["spine", "superspine"]
1096-
)
1097-
1098-
# Find which group the connected device belongs to
1099-
for group in spine_groups:
1100-
if any(dev.id == connected_device.id for dev in group):
1101-
connected_as = calculate_minimum_as_for_group(group)
1102-
if connected_as:
1103-
logger.debug(
1104-
f"Calculated AS {connected_as} for connected spine/superspine device {connected_device.name}"
1105-
)
1106-
break
1107-
1108-
# Fallback to calculating from IPv4 if still no AS
1109-
if not connected_as and connected_device.primary_ip4:
1110-
connected_as = calculate_local_asn_from_ipv4(
1111-
str(connected_device.primary_ip4.address)
1112-
)
1082+
elif connected_device.primary_ip4:
1083+
# If not in mapping (e.g., not in a spine/superspine group),
1084+
# calculate AS directly from IPv4 address
1085+
connected_as = calculate_local_asn_from_ipv4(
1086+
str(connected_device.primary_ip4.address)
1087+
)
11131088

11141089
# Compare AS numbers
11151090
if local_as and connected_as and local_as == connected_as:
@@ -1124,6 +1099,81 @@ def _determine_peer_type(local_device, connected_device, device_as_mapping=None)
11241099
return "external" # Default to external on error
11251100

11261101

1102+
def _load_metalbox_devices_cache():
1103+
"""Load all metalbox devices with their interfaces and IPs into cache.
1104+
1105+
This function performs bulk fetching at the start of sync to avoid
1106+
repeated queries per device. It loads all metalbox devices, their
1107+
interfaces, and IP addresses in a single pass.
1108+
"""
1109+
global _metalbox_devices_cache
1110+
1111+
logger.debug("Loading metalbox devices cache...")
1112+
_metalbox_devices_cache = {}
1113+
1114+
try:
1115+
# Bulk fetch all metalbox devices
1116+
metalbox_devices = list(utils.nb.dcim.devices.filter(role="metalbox"))
1117+
logger.debug(f"Found {len(metalbox_devices)} metalbox devices")
1118+
1119+
for metalbox in metalbox_devices:
1120+
metalbox_data = {
1121+
"device": metalbox,
1122+
"interfaces": {},
1123+
}
1124+
1125+
# Bulk fetch all interfaces for this metalbox
1126+
try:
1127+
interfaces = list(
1128+
utils.nb.dcim.interfaces.filter(device_id=metalbox.id)
1129+
)
1130+
logger.debug(
1131+
f"Metalbox {metalbox.name} has {len(interfaces)} interfaces"
1132+
)
1133+
1134+
for interface in interfaces:
1135+
# Skip management-only interfaces
1136+
if hasattr(interface, "mgmt_only") and interface.mgmt_only:
1137+
continue
1138+
1139+
# Check if this is a VLAN interface (SVI)
1140+
is_vlan_interface = (
1141+
hasattr(interface, "type")
1142+
and interface.type
1143+
and interface.type.value == "virtual"
1144+
and interface.name.startswith("Vlan")
1145+
)
1146+
1147+
# Bulk fetch IP addresses for this interface
1148+
ip_addresses = list(
1149+
utils.nb.ipam.ip_addresses.filter(
1150+
assigned_object_id=interface.id,
1151+
)
1152+
)
1153+
1154+
# Store interface with its IPs
1155+
metalbox_data["interfaces"][interface.id] = {
1156+
"interface": interface,
1157+
"is_vlan": is_vlan_interface,
1158+
"ips": [ip_addr for ip_addr in ip_addresses if ip_addr.address],
1159+
}
1160+
1161+
except Exception as e:
1162+
logger.warning(
1163+
f"Could not fetch interfaces for metalbox {metalbox.name}: {e}"
1164+
)
1165+
1166+
_metalbox_devices_cache[metalbox.id] = metalbox_data
1167+
1168+
logger.info(
1169+
f"Loaded metalbox cache with {len(_metalbox_devices_cache)} devices"
1170+
)
1171+
1172+
except Exception as e:
1173+
logger.warning(f"Could not load metalbox devices cache: {e}")
1174+
_metalbox_devices_cache = {}
1175+
1176+
11271177
def _get_metalbox_ip_for_device(device):
11281178
"""Get Metalbox IP for a SONiC device based on OOB connection.
11291179
@@ -1133,6 +1183,8 @@ def _get_metalbox_ip_for_device(device):
11331183
11341184
This IP is used for both NTP and DNS services.
11351185
1186+
Uses the pre-loaded metalbox devices cache for optimal performance.
1187+
11361188
Args:
11371189
device: SONiC device object
11381190
@@ -1149,6 +1201,7 @@ def _get_metalbox_ip_for_device(device):
11491201
oob_ip_result = get_device_oob_ip(device)
11501202
if not oob_ip_result:
11511203
logger.debug(f"No OOB IP found for device {device.name}")
1204+
_metalbox_ip_cache[device.id] = None
11521205
return None
11531206

11541207
oob_ip, prefix_len = oob_ip_result
@@ -1159,58 +1212,46 @@ def _get_metalbox_ip_for_device(device):
11591212

11601213
device_network = IPv4Network(f"{oob_ip}/{prefix_len}", strict=False)
11611214

1162-
# Get all metalbox devices
1163-
metalbox_devices = utils.nb.dcim.devices.filter(role="metalbox")
1215+
# Use the pre-loaded metalbox devices cache
1216+
if _metalbox_devices_cache is None:
1217+
logger.warning(
1218+
"Metalbox devices cache not loaded - call _load_metalbox_devices_cache() first"
1219+
)
1220+
_metalbox_ip_cache[device.id] = None
1221+
return None
11641222

1165-
for metalbox in metalbox_devices:
1223+
# Iterate through cached metalbox devices
1224+
for metalbox_id, metalbox_data in _metalbox_devices_cache.items():
1225+
metalbox = metalbox_data["device"]
11661226
logger.debug(f"Checking metalbox device {metalbox.name} for services")
11671227

1168-
# Get all interfaces on this metalbox
1169-
interfaces = utils.nb.dcim.interfaces.filter(device_id=metalbox.id)
1170-
1171-
for interface in interfaces:
1172-
# Skip management-only interfaces
1173-
if hasattr(interface, "mgmt_only") and interface.mgmt_only:
1174-
continue
1175-
1176-
# Check both physical interfaces and VLAN interfaces (SVIs)
1177-
# VLAN interfaces are typically named "Vlan123" for VLAN ID 123
1178-
is_vlan_interface = (
1179-
hasattr(interface, "type")
1180-
and interface.type
1181-
and interface.type.value == "virtual"
1182-
and interface.name.startswith("Vlan")
1183-
)
1184-
1185-
# Get IP addresses for this interface
1186-
ip_addresses = utils.nb.ipam.ip_addresses.filter(
1187-
assigned_object_id=interface.id,
1188-
)
1189-
1190-
for ip_addr in ip_addresses:
1191-
if ip_addr.address:
1192-
# Extract IP address without prefix
1193-
ip_only = ip_addr.address.split("/")[0]
1194-
1195-
# Check if it's IPv4 and in the same network as the SONiC device
1196-
try:
1197-
metalbox_ip = IPv4Address(ip_only)
1198-
if metalbox_ip in device_network:
1199-
interface_type = (
1200-
"VLAN interface"
1201-
if is_vlan_interface
1202-
else "interface"
1203-
)
1204-
logger.info(
1205-
f"Found Metalbox {ip_only} on {metalbox.name} "
1206-
f"{interface_type} {interface.name} for SONiC device {device.name}"
1207-
)
1208-
# Cache the result
1209-
_metalbox_ip_cache[device.id] = ip_only
1210-
return ip_only
1211-
except ValueError:
1212-
# Skip non-IPv4 addresses
1213-
continue
1228+
# Iterate through cached interfaces for this metalbox
1229+
for interface_id, interface_data in metalbox_data["interfaces"].items():
1230+
interface = interface_data["interface"]
1231+
is_vlan_interface = interface_data["is_vlan"]
1232+
1233+
# Check all cached IP addresses for this interface
1234+
for ip_addr in interface_data["ips"]:
1235+
# Extract IP address without prefix
1236+
ip_only = ip_addr.address.split("/")[0]
1237+
1238+
# Check if it's IPv4 and in the same network as the SONiC device
1239+
try:
1240+
metalbox_ip = IPv4Address(ip_only)
1241+
if metalbox_ip in device_network:
1242+
interface_type = (
1243+
"VLAN interface" if is_vlan_interface else "interface"
1244+
)
1245+
logger.info(
1246+
f"Found Metalbox {ip_only} on {metalbox.name} "
1247+
f"{interface_type} {interface.name} for SONiC device {device.name}"
1248+
)
1249+
# Cache the result
1250+
_metalbox_ip_cache[device.id] = ip_only
1251+
return ip_only
1252+
except ValueError:
1253+
# Skip non-IPv4 addresses
1254+
continue
12141255

12151256
logger.warning(f"No suitable Metalbox found for SONiC device {device.name}")
12161257
# Cache None result to avoid repeated lookups
@@ -1321,6 +1362,13 @@ def clear_metalbox_ip_cache():
13211362
logger.debug("Cleared metalbox IP cache")
13221363

13231364

1365+
def clear_metalbox_devices_cache():
1366+
"""Clear the metalbox devices cache. Should be called at the start of sync_sonic."""
1367+
global _metalbox_devices_cache
1368+
_metalbox_devices_cache = None
1369+
logger.debug("Cleared metalbox devices cache")
1370+
1371+
13241372
def _add_dns_configuration(config, device):
13251373
"""Add DNS_NAMESERVER configuration to device config.
13261374
@@ -1346,6 +1394,7 @@ def clear_all_caches():
13461394
"""Clear all caches in config_generator module."""
13471395
clear_ntp_cache()
13481396
clear_metalbox_ip_cache()
1397+
clear_metalbox_devices_cache()
13491398
clear_port_config_cache()
13501399
logger.debug("Cleared all config_generator caches")
13511400

osism/tasks/conductor/sonic/connections.py

Lines changed: 39 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,9 @@
1616
)
1717
from .cache import get_cached_device_interfaces
1818

19+
# Global cache for VIP addresses to avoid repeated queries
20+
_vip_addresses_cache = None
21+
1922

2023
def get_connected_device_via_interface(
2124
interface: Any, source_device_id: int
@@ -293,6 +296,31 @@ def find_interconnected_devices(
293296
return all_groups
294297

295298

299+
def load_vip_addresses_cache():
300+
"""Load all VIP addresses into cache at start of sync.
301+
302+
This avoids repeated queries for VIP addresses during connected interface
303+
IP address lookups.
304+
"""
305+
global _vip_addresses_cache
306+
307+
logger.debug("Loading VIP addresses cache...")
308+
309+
try:
310+
_vip_addresses_cache = list(utils.nb.ipam.ip_addresses.filter(role="vip"))
311+
logger.info(f"Loaded {len(_vip_addresses_cache)} VIP addresses into cache")
312+
except Exception as e:
313+
logger.warning(f"Could not load VIP addresses cache: {e}")
314+
_vip_addresses_cache = []
315+
316+
317+
def clear_vip_addresses_cache():
318+
"""Clear the VIP addresses cache."""
319+
global _vip_addresses_cache
320+
_vip_addresses_cache = None
321+
logger.debug("Cleared VIP addresses cache")
322+
323+
296324
def get_device_bgp_neighbors_via_loopback(
297325
device: Any,
298326
portchannel_info: dict,
@@ -457,12 +485,17 @@ def get_connected_interface_ipv4_address(device, sonic_port_name, netbox):
457485
interface_type="dcim.interface", interface_id=connected_interface.id
458486
)
459487

460-
# Get all VIP addresses once to avoid repeated API calls
461-
try:
462-
all_vip_addresses = netbox.ipam.ip_addresses.filter(role="vip")
463-
except Exception as vip_e:
464-
logger.debug(f"Could not query VIP addresses: {vip_e}")
465-
all_vip_addresses = []
488+
# Use cached VIP addresses if available, otherwise query
489+
if _vip_addresses_cache is not None:
490+
all_vip_addresses = _vip_addresses_cache
491+
logger.debug("Using cached VIP addresses")
492+
else:
493+
try:
494+
all_vip_addresses = list(netbox.ipam.ip_addresses.filter(role="vip"))
495+
logger.debug("VIP cache not loaded, querying VIP addresses")
496+
except Exception as vip_e:
497+
logger.debug(f"Could not query VIP addresses: {vip_e}")
498+
all_vip_addresses = []
466499

467500
# Collect all VIP IPv4 addresses from all FHRP groups this interface belongs to
468501
vip_addresses_found = []

osism/tasks/conductor/sonic/sync.py

Lines changed: 19 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -7,8 +7,16 @@
77
from osism import utils
88
from osism.tasks.conductor.netbox import get_nb_device_query_list_sonic
99
from .bgp import calculate_minimum_as_for_group
10-
from .connections import find_interconnected_devices
11-
from .config_generator import generate_sonic_config, clear_all_caches
10+
from .connections import (
11+
find_interconnected_devices,
12+
load_vip_addresses_cache,
13+
clear_vip_addresses_cache,
14+
)
15+
from .config_generator import (
16+
generate_sonic_config,
17+
clear_all_caches,
18+
_load_metalbox_devices_cache,
19+
)
1220
from .constants import DEFAULT_SONIC_ROLES, SUPPORTED_HWSKUS
1321
from .exporter import save_config_to_netbox, export_config_to_file
1422
from .cache import clear_interface_cache, get_interface_cache_stats
@@ -35,6 +43,14 @@ def sync_sonic(device_name=None, task_id=None, show_diff=True):
3543
clear_all_caches()
3644
logger.debug("Initialized all caches for sync_sonic task")
3745

46+
# Load metalbox devices cache for optimal performance
47+
_load_metalbox_devices_cache()
48+
logger.debug("Loaded metalbox devices cache")
49+
50+
# Load VIP addresses cache for optimal performance
51+
load_vip_addresses_cache()
52+
logger.debug("Loaded VIP addresses cache")
53+
3854
# Dictionary to store configurations for all devices
3955
device_configs = {}
4056

@@ -220,6 +236,7 @@ def sync_sonic(device_name=None, task_id=None, show_diff=True):
220236

221237
clear_interface_cache()
222238
clear_all_caches()
239+
clear_vip_addresses_cache()
223240
logger.debug("Cleared all caches after sync_sonic task completion")
224241

225242
# Finish task output if task_id is available

0 commit comments

Comments
 (0)