diff --git a/cratedb_toolkit/admin/xmover/analysis/shard.py b/cratedb_toolkit/admin/xmover/analysis/shard.py index f6f24b6b..1824a9c2 100644 --- a/cratedb_toolkit/admin/xmover/analysis/shard.py +++ b/cratedb_toolkit/admin/xmover/analysis/shard.py @@ -13,11 +13,14 @@ from rich.table import Table from cratedb_toolkit.admin.xmover.model import ( + ActiveShardActivity, + ActiveShardSnapshot, DistributionStats, NodeInfo, ShardInfo, ShardRelocationConstraints, ShardRelocationResponse, + TableStatsType, ) from cratedb_toolkit.admin.xmover.util.database import CrateDBClient from cratedb_toolkit.admin.xmover.util.format import format_percentage, format_size @@ -36,7 +39,7 @@ def __init__(self, client: CrateDBClient): self.shards: List[ShardInfo] = [] # Initialize session-based caches for performance. - self._zone_conflict_cache: Dict[Tuple[str, int, str], Union[str, None]] = {} + self._zone_conflict_cache: Dict[Tuple[str, str, int, str], Union[str, None]] = {} self._node_lookup_cache: Dict[str, Union[NodeInfo, None]] = {} self._target_nodes_cache: Dict[Tuple[float, frozenset[Any], float, float], List[NodeInfo]] = {} self._cache_hits = 0 @@ -181,8 +184,6 @@ def find_nodes_with_capacity( free_space_gb = node.available_space_gb if free_space_gb >= (required_space_gb + min_free_space_gb): available_nodes.append(node) - else: - continue # Sort by available space (most space first) - prioritize nodes with more free space available_nodes.sort(key=lambda n: n.available_space_gb, reverse=True) @@ -204,7 +205,7 @@ def generate_rebalancing_recommendations( # Get moveable shards (only healthy ones for actual operations) moveable_shards = self.find_moveable_shards(constraints.min_size, constraints.max_size, constraints.table_name) - print( + logger.info( f"Analyzing {len(moveable_shards)} candidate shards " f"in size range {constraints.min_size}-{constraints.max_size}GB..." ) @@ -237,12 +238,11 @@ def generate_rebalancing_recommendations( # Optimize processing: if filtering by source node, only process those shards if constraints.source_node: processing_shards = [s for s in moveable_shards if s.node_name == constraints.source_node] - print(f"Focusing on {len(processing_shards)} shards from node {constraints.source_node}") + logger.info(f"Focusing on {len(processing_shards)} shards from node {constraints.source_node}") else: processing_shards = moveable_shards # Generate move recommendations - safe_recommendations = 0 # noqa: F841 total_evaluated = 0 for i, shard in enumerate(processing_shards): @@ -366,12 +366,12 @@ def generate_rebalancing_recommendations( if len(processing_shards) > 100: print() # New line after progress dots - print(f"Generated {len(recommendations)} move recommendations (evaluated {total_evaluated} shards)") - print(f"Performance: {self.get_cache_stats()}") + logger.info(f"Generated {len(recommendations)} move recommendations (evaluated {total_evaluated} shards)") + logger.info(f"Performance: {self.get_cache_stats()}") return recommendations def validate_move_safety( - self, recommendation: ShardRelocationResponse, max_disk_usage_percent: float = 90.0 + self, recommendation: ShardRelocationResponse, max_disk_usage_percent: float = 90.0, buffer_gb: float = 50.0 ) -> Tuple[bool, str]: """Validate that a move recommendation is safe to execute""" # Find target node (with caching) @@ -386,7 +386,7 @@ def validate_move_safety( return False, zone_conflict # Check available space - required_space_gb = recommendation.size_gb + 50 # 50GB buffer + required_space_gb = recommendation.size_gb + buffer_gb if target_node.available_space_gb < required_space_gb: return ( False, @@ -421,7 +421,7 @@ def _check_zone_conflict_cached(self, recommendation: ShardRelocationResponse) - """Check zone conflicts with caching""" # Create cache key: table, shard, target zone target_zone = self._get_node_zone(recommendation.to_node) - cache_key = (recommendation.table_name, recommendation.shard_id, target_zone) + cache_key = (recommendation.schema_name, recommendation.table_name, recommendation.shard_id, target_zone) if cache_key in self._zone_conflict_cache: self._cache_hits += 1 @@ -615,6 +615,200 @@ def _check_zone_conflict(self, recommendation: ShardRelocationResponse) -> Optio # If we can't check, err on the side of caution return f"Cannot verify zone safety: {str(e)}" + def get_shard_size_overview(self) -> Dict[str, Any]: + """Get shard size distribution analysis""" + # Only analyze STARTED shards + started_shards = [s for s in self.shards if s.state == "STARTED"] + + # Define size buckets (in GB) + size_buckets = { + "<1GB": {"count": 0, "total_size": 0.0, "avg_size_gb": 0.0, "max_size": 0.0}, + "1GB-5GB": {"count": 0, "total_size": 0.0, "avg_size_gb": 0.0, "max_size": 0.0}, + "5GB-10GB": {"count": 0, "total_size": 0.0, "avg_size_gb": 0.0, "max_size": 0.0}, + "10GB-50GB": {"count": 0, "total_size": 0.0, "avg_size_gb": 0.0, "max_size": 0.0}, + ">=50GB": {"count": 0, "total_size": 0.0, "avg_size_gb": 0.0, "max_size": 0.0}, + } + + if not started_shards: + return { + "total_shards": 0, + "total_size_gb": 0.0, + "avg_shard_size_gb": 0.0, + "size_buckets": size_buckets, + "large_shards_count": 0, + "very_small_shards_percentage": 0.0, + } + + total_shards = len(started_shards) + total_size_gb = sum(s.size_gb for s in started_shards) + avg_size_gb = total_size_gb / total_shards if total_shards > 0 else 0.0 + + # Categorize shards by size + large_shards_count = 0 # >50GB shards + very_small_shards = 0 # <1GB shards (for percentage calculation) + + for shard in started_shards: + size_gb = shard.size_gb + + if size_gb >= 50: + size_buckets[">=50GB"]["count"] += 1 + size_buckets[">=50GB"]["total_size"] += size_gb + size_buckets[">=50GB"]["max_size"] = max(size_buckets[">=50GB"]["max_size"], size_gb) + large_shards_count += 1 + elif size_gb >= 10: + size_buckets["10GB-50GB"]["count"] += 1 + size_buckets["10GB-50GB"]["total_size"] += size_gb + size_buckets["10GB-50GB"]["max_size"] = max(size_buckets["10GB-50GB"]["max_size"], size_gb) + elif size_gb >= 5: + size_buckets["5GB-10GB"]["count"] += 1 + size_buckets["5GB-10GB"]["total_size"] += size_gb + size_buckets["5GB-10GB"]["max_size"] = max(size_buckets["5GB-10GB"]["max_size"], size_gb) + elif size_gb >= 1: + size_buckets["1GB-5GB"]["count"] += 1 + size_buckets["1GB-5GB"]["total_size"] += size_gb + size_buckets["1GB-5GB"]["max_size"] = max(size_buckets["1GB-5GB"]["max_size"], size_gb) + else: + size_buckets["<1GB"]["count"] += 1 + size_buckets["<1GB"]["total_size"] += size_gb + size_buckets["<1GB"]["max_size"] = max(size_buckets["<1GB"]["max_size"], size_gb) + very_small_shards += 1 + + # Calculate the average size for each bucket + for _, bucket_data in size_buckets.items(): + if bucket_data["count"] > 0: + bucket_data["avg_size_gb"] = bucket_data["total_size"] / bucket_data["count"] + else: + bucket_data["avg_size_gb"] = 0.0 + + # Calculate the percentage of very small shards (<1GB) + very_small_percentage = (very_small_shards / total_shards * 100) if total_shards > 0 else 0.0 + + return { + "total_shards": total_shards, + "total_size_gb": total_size_gb, + "avg_shard_size_gb": avg_size_gb, + "size_buckets": size_buckets, + "large_shards_count": large_shards_count, + "very_small_shards_percentage": very_small_percentage, + } + + def get_large_shards_details(self) -> List[Dict[str, Any]]: + """Get detailed information about large shards (>=50GB) including partition values""" + # Optimized query to fetch only large shards directly from database + query = """ + SELECT + s.schema_name, + s.table_name, + translate(p.values::text, ':{}', '=()') as partition_values, + s.id as shard_id, + s.size / 1024^3 as size_gb, + s."primary" as is_primary, + s.node['name'] as node_name, + s.node['id'] as node_id + FROM sys.shards s + LEFT JOIN information_schema.table_partitions p + ON s.table_name = p.table_name + AND s.schema_name = p.table_schema + AND s.partition_ident = p.partition_ident + WHERE s.state = 'STARTED' + AND s.size >= 50 * 1024^3 -- 50GB in bytes + ORDER BY s.size DESC + """ + + result = self.client.execute_query(query) + + large_shards = [] + for row in result.get("rows", []): + # Get zone information from our nodes data + node_id = row[7] + zone = next((node.zone for node in self.nodes if node.id == node_id), "unknown") + + large_shards.append( + { + "schema_name": row[0] or "doc", + "table_name": row[1], + "partition_values": row[2], + "shard_id": row[3], + "size_gb": float(row[4]) if row[4] else 0.0, + "is_primary": row[5] or False, + "node_name": row[6], + "zone": zone, + } + ) + + return large_shards + + def get_small_shards_details(self, limit: int = 10) -> List[Dict[str, Any]]: + """Get detailed information about the smallest shards, grouped by table/partition""" + # Query to get all shards, ordered by size ascending to get the smallest + query = """ + SELECT + s.schema_name, + s.table_name, + translate(p.values::text, ':{}', '=()') as partition_values, + s.id as shard_id, + s.size / 1024^3 as size_gb, + s."primary" as is_primary, + s.node['name'] as node_name, + s.node['id'] as node_id + FROM sys.shards s + LEFT JOIN information_schema.table_partitions p + ON s.table_name = p.table_name + AND s.schema_name = p.table_schema + AND s.partition_ident = p.partition_ident + WHERE s.state = 'STARTED' + ORDER BY s.size ASC + """ + + result = self.client.execute_query(query) + + # Group by table/partition to get aggregated stats + table_partition_stats: TableStatsType = {} + for row in result.get("rows", []): + # Get zone information from our nodes data + node_id = row[7] + + # FIXME: `zone` does not get used. + zone = next((node.zone for node in self.nodes if node.id == node_id), "unknown") # noqa: F841 + + # Create table key with schema + schema_name = row[0] or "doc" + table_name = row[1] + table_display = table_name + if schema_name and schema_name != "doc": + table_display = f"{schema_name}.{table_name}" + + # Create partition key + partition_key = row[2] or "N/A" + + # Create combined key + key = (table_display, partition_key) + + if key not in table_partition_stats: + table_partition_stats[key] = {"sizes": [], "primary_count": 0, "replica_count": 0, "total_size": 0.0} + + # Aggregate stats + stats = table_partition_stats[key] + size_gb = float(row[4]) if row[4] else 0.0 + stats["sizes"].append(size_gb) + stats["total_size"] += size_gb + if row[5]: # is_primary + stats["primary_count"] += 1 + else: + stats["replica_count"] += 1 + + # Sort by average size ascending (smallest first) and return top tables/partitions + sorted_stats: List[Dict[str, Any]] = [] + for (table_name, partition_key), stats in table_partition_stats.items(): + avg_size = sum(stats["sizes"]) / len(stats["sizes"]) if stats["sizes"] else 0 + sorted_stats.append( + {"table_name": table_name, "partition_key": partition_key, "stats": stats, "avg_size": avg_size} + ) + + # Sort by average size and take the top 'limit' entries + sorted_stats.sort(key=lambda x: x["avg_size"]) + return sorted_stats[:limit] + def get_cluster_overview(self) -> Dict[str, Any]: """Get a comprehensive overview of the cluster""" # Get cluster watermark settings @@ -811,11 +1005,14 @@ def plan_node_decommission(self, node_name: str, min_free_space_gb: float = 100. # Determine feasibility feasible = len(infeasible_moves) == 0 + # Safety margin for cluster capacity after decommission + capacity_safety_margin = 1.2 # 20 % buffer + # Add capacity warnings if feasible: - # Check if remaining cluster capacity is sufficient after decommission + # Check if the remaining cluster capacity is sufficient after decommission remaining_capacity = sum(n.available_space_gb for n in self.nodes if n.name != node_name) - if remaining_capacity < total_size_gb * 1.2: # 20% safety margin + if remaining_capacity < total_size_gb * capacity_safety_margin: warnings.append( f"Low remaining capacity after decommission. " f"Only {remaining_capacity:.1f}GB available for {total_size_gb:.1f}GB of data" @@ -831,11 +1028,111 @@ def plan_node_decommission(self, node_name: str, min_free_space_gb: float = 100. "recommendations": move_plan, "infeasible_moves": infeasible_moves, "warnings": warnings, - "estimated_time_hours": len(move_plan) * 0.1, # Rough estimate: 6 minutes per move + "estimated_time_hours": len(move_plan) * 0.1, # Rough estimate: 0.1 hours (6 minutes) per move "message": "Decommission plan generated" if feasible else "Decommission not currently feasible", } +class TranslogReporter: + def __init__(self, client: CrateDBClient): + self.client = client + + def problematic_translogs(self, size_mb: int) -> List[str]: + """Find and optionally cancel shards with problematic translog sizes.""" + console.print(Panel.fit("[bold blue]Problematic Translog Analysis[/bold blue]")) + console.print(f"[dim]Looking for replica shards with translog uncommitted size > {size_mb}MB[/dim]") + console.print() + + # Query to find problematic replica shards + query = """ + SELECT sh.schema_name, \ + sh.table_name, \ + translate(p.values::text, ':{}', '=()') as partition_values, \ + sh.id AS shard_id, \ + node['name'] as node_name, \ + sh.translog_stats['uncommitted_size'] / 1024^2 AS translog_uncommitted_mb + FROM + sys.shards AS sh + LEFT JOIN information_schema.table_partitions p + ON sh.table_name = p.table_name + AND sh.schema_name = p.table_schema + AND sh.partition_ident = p.partition_ident + WHERE + sh.state = 'STARTED' + AND sh.translog_stats['uncommitted_size'] \ + > ? * 1024^2 + AND primary = FALSE + ORDER BY + 6 DESC \ + """ + + try: + result = self.client.execute_query(query, [size_mb]) + rows = result.get("rows", []) + + if not rows: + console.print(f"[green]โœ“ No replica shards found with translog uncommitted size > {size_mb}MB[/green]") + return [] + + console.print(f"Found {len(rows)} shards with problematic translogs:") + console.print() + + # Display query results table + results_table = Table(title=f"Problematic Replica Shards (translog > {size_mb}MB)", box=box.ROUNDED) + results_table.add_column("Schema", style="cyan") + results_table.add_column("Table", style="blue") + results_table.add_column("Partition", style="magenta") + results_table.add_column("Shard ID", justify="right", style="yellow") + results_table.add_column("Node", style="green") + results_table.add_column("Translog MB", justify="right", style="red") + + for row in rows: + schema_name, table_name, partition_values, shard_id, node_name, translog_mb = row + partition_display = ( + partition_values if partition_values and partition_values != "NULL" else "[dim]none[/dim]" + ) + results_table.add_row( + schema_name, table_name, partition_display, str(shard_id), node_name, f"{translog_mb:.1f}" + ) + + console.print(results_table) + console.print() + console.print("[bold]Generated ALTER Commands:[/bold]") + console.print() + + # Generate ALTER commands + alter_commands = [] + for row in rows: + schema_name, table_name, partition_values, shard_id, node_name, translog_mb = row + + # Build the ALTER command based on whether it's partitioned + if partition_values and partition_values != "NULL": + # partition_values already formatted like ("sync_day"=1757376000000) from the translate function + alter_cmd = ( + f'ALTER TABLE "{schema_name}"."{table_name}" partition {partition_values} ' + f"REROUTE CANCEL SHARD {shard_id} on '{node_name}' WITH (allow_primary=False);" + ) + else: + alter_cmd = ( + f'ALTER TABLE "{schema_name}"."{table_name}" ' + f"REROUTE CANCEL SHARD {shard_id} on '{node_name}' WITH (allow_primary=False);" + ) + + alter_commands.append(alter_cmd) + console.print(alter_cmd) + + console.print() + console.print(f"[bold]Total: {len(alter_commands)} ALTER commands generated[/bold]") + return alter_commands + + except Exception as e: + console.print(f"[red]Error analyzing problematic translogs: {e}[/red]") + import traceback + + console.print(f"[dim]{traceback.format_exc()}[/dim]") + return [] + + class ShardReporter: def __init__(self, analyzer: ShardAnalyzer): self.analyzer = analyzer @@ -930,20 +1227,428 @@ def distribution(self, table: str = None): console.print(node_table) + console.print() + + # Shard Size Overview + size_overview = self.analyzer.get_shard_size_overview() + + size_table = Table(title="Shard Size Distribution", box=box.ROUNDED) + size_table.add_column("Size Range", style="cyan") + size_table.add_column("Count", justify="right", style="magenta") + size_table.add_column("Percentage", justify="right", style="green") + size_table.add_column("Avg Size", justify="right", style="blue") + size_table.add_column("Max Size", justify="right", style="red") + size_table.add_column("Total Size", justify="right", style="yellow") + + total_shards = size_overview["total_shards"] + + # Define color coding thresholds + large_shards_threshold = 0 # warn if ANY shards >=50GB (red flag) + small_shards_percentage_threshold = 40 # warn if >40% of shards are small (<1GB) + + for bucket_name, bucket_data in size_overview["size_buckets"].items(): + count = bucket_data["count"] + avg_size = bucket_data["avg_size_gb"] + total_size = bucket_data["total_size"] + percentage = (count / total_shards * 100) if total_shards > 0 else 0 + + # Apply color coding + count_str = str(count) + percentage_str = f"{percentage:.1f}%" + + # Color code large shards (>=50GB) - ANY large shard is a red flag + if bucket_name == ">=50GB" and count > large_shards_threshold: + count_str = f"[red]{count}[/red]" + percentage_str = f"[red]{percentage:.1f}%[/red]" + + # Color code if too many very small shards (<1GB) + if bucket_name == "<1GB" and percentage > small_shards_percentage_threshold: + count_str = f"[yellow]{count}[/yellow]" + percentage_str = f"[yellow]{percentage:.1f}%[/yellow]" + + size_table.add_row( + bucket_name, + count_str, + percentage_str, + f"{avg_size:.2f}GB" if avg_size > 0 else "0GB", + f"{bucket_data['max_size']:.2f}GB" if bucket_data["max_size"] > 0 else "0GB", + format_size(total_size), + ) + + console.print(size_table) + + # Add warnings if thresholds are exceeded + warnings = [] + if size_overview["large_shards_count"] > large_shards_threshold: + warnings.append( + f"[red]๐Ÿ”ฅ CRITICAL: {size_overview['large_shards_count']} " + f"large shards (>=50GB) detected - IMMEDIATE ACTION REQUIRED![/red]" + ) + warnings.append("[red] Large shards cause slow recovery, memory pressure, and performance issues[/red]") + + # Calculate the percentage of very small shards (<1GB) + very_small_count = size_overview["size_buckets"]["<1GB"]["count"] + very_small_percentage = (very_small_count / total_shards * 100) if total_shards > 0 else 0 + + if very_small_percentage > small_shards_percentage_threshold: + warnings.append( + f"[yellow]โš ๏ธ {very_small_percentage:.1f}% of shards are very small (<1GB) - " + f"consider optimizing shard allocation[/yellow]" + ) + warnings.append("[yellow] Too many small shards create metadata overhead and reduce efficiency[/yellow]") + + if warnings: + console.print() + for warning in warnings: + console.print(warning) + + # Show compact table/partition breakdown of large shards if any exist + if size_overview["large_shards_count"] > 0: + console.print() + large_shards_details = self.analyzer.get_large_shards_details() + + # Aggregate by table/partition + table_partition_stats: TableStatsType = {} + for shard in large_shards_details: + # Create table key with schema + table_display = shard["table_name"] + if shard["schema_name"] and shard["schema_name"] != "doc": + table_display = f"{shard['schema_name']}.{shard['table_name']}" + + # Create partition key + partition_key = shard["partition_values"] or "N/A" + + # Create combined key + key = (table_display, partition_key) + + if key not in table_partition_stats: + table_partition_stats[key] = { + "sizes": [], + "primary_count": 0, + "replica_count": 0, + "total_size": 0.0, + } + + # Aggregate stats + stats = table_partition_stats[key] + stats["sizes"].append(shard["size_gb"]) + stats["total_size"] += shard["size_gb"] + if shard["is_primary"]: + stats["primary_count"] += 1 + else: + stats["replica_count"] += 1 + + # Create compact table + large_shards_table = Table(title="Large Shards Breakdown by Table/Partition (>=50GB)", box=box.ROUNDED) + large_shards_table.add_column("Table", style="cyan") + large_shards_table.add_column("Partition", style="blue") + large_shards_table.add_column("Shards", justify="right", style="magenta") + large_shards_table.add_column("P/R", justify="center", style="yellow") + large_shards_table.add_column("Min Size", justify="right", style="green") + large_shards_table.add_column("Avg Size", justify="right", style="red") + large_shards_table.add_column("Max Size", justify="right", style="red") + large_shards_table.add_column("Total Size", justify="right", style="red") + + # Sort by total size descending (most problematic first) + sorted_stats = sorted(table_partition_stats.items(), key=lambda x: x[1]["total_size"], reverse=True) + + for (table_name, partition_key), stats in sorted_stats: + # Format partition display + partition_display = partition_key + if partition_display != "N/A" and len(partition_display) > 25: + partition_display = partition_display[:22] + "..." + + # Calculate size stats + sizes = stats["sizes"] + min_size = min(sizes) + avg_size = sum(sizes) / len(sizes) + max_size = max(sizes) + total_size = stats["total_size"] + total_shards = len(sizes) + + # Format primary/replica ratio + p_r_display = f"{stats['primary_count']}P/{stats['replica_count']}R" + + large_shards_table.add_row( + table_name, + partition_display, + str(total_shards), + p_r_display, + f"{min_size:.1f}GB", + f"{avg_size:.1f}GB", + f"{max_size:.1f}GB", + f"{total_size:.1f}GB", + ) + + console.print(large_shards_table) + + # Add summary stats + total_primary = sum(stats["primary_count"] for stats in table_partition_stats.values()) + total_replica = sum(stats["replica_count"] for stats in table_partition_stats.values()) + affected_table_partitions = len(table_partition_stats) + + console.print() + console.print( + f"[dim]๐Ÿ“Š Summary: {total_primary} primary, {total_replica} replica shards " + f"across {affected_table_partitions} table/partition(s)[/dim]" + ) + + # Show compact table/partition breakdown of smallest shards (top 10) + console.print() + small_shards_details = self.analyzer.get_small_shards_details(limit=10) + + if small_shards_details: + # Create compact table + small_shards_table = Table(title="Smallest Shards Breakdown by Table/Partition (Top 10)", box=box.ROUNDED) + small_shards_table.add_column("Table", style="cyan") + small_shards_table.add_column("Partition", style="blue") + small_shards_table.add_column("Shards", justify="right", style="magenta") + small_shards_table.add_column("P/R", justify="center", style="yellow") + small_shards_table.add_column("Min Size", justify="right", style="green") + small_shards_table.add_column("Avg Size", justify="right", style="red") + small_shards_table.add_column("Max Size", justify="right", style="red") + small_shards_table.add_column("Total Size", justify="right", style="red") + + for entry in small_shards_details: + table_name = entry["table_name"] + partition_key = entry["partition_key"] + stats = entry["stats"] + + # Format partition display + partition_display = partition_key + if partition_display != "N/A" and len(partition_display) > 25: + partition_display = partition_display[:22] + "..." + + # Calculate size stats + sizes = stats["sizes"] + min_size = min(sizes) + avg_size = sum(sizes) / len(sizes) + max_size = max(sizes) + total_size = stats["total_size"] + total_shards = len(sizes) + + # Format primary/replica ratio + p_r_display = f"{stats['primary_count']}P/{stats['replica_count']}R" + + small_shards_table.add_row( + table_name, + partition_display, + str(total_shards), + p_r_display, + f"{min_size:.1f}GB", + f"{avg_size:.1f}GB", + f"{max_size:.1f}GB", + f"{total_size:.1f}GB", + ) + + console.print(small_shards_table) + + # Add summary stats for smallest shards + total_small_primary = sum(entry["stats"]["primary_count"] for entry in small_shards_details) + total_small_replica = sum(entry["stats"]["replica_count"] for entry in small_shards_details) + small_table_partitions = len(small_shards_details) + + console.print() + console.print( + f"[dim]๐Ÿ“Š Summary: {total_small_primary} primary, " + f"{total_small_replica} replica shards across {small_table_partitions} table/partition(s) " + f"with smallest average sizes[/dim]" + ) + + console.print() + # Table-specific analysis if requested if table: console.print() console.print(Panel.fit(f"[bold blue]Analysis for table: {table}[/bold blue]")) - stats = self.analyzer.analyze_distribution(table) + distribution_stats = self.analyzer.analyze_distribution(table) table_summary = Table(title=f"Table {table} Distribution", box=box.ROUNDED) table_summary.add_column("Metric", style="cyan") table_summary.add_column("Value", style="magenta") - table_summary.add_row("Total Shards", str(stats.total_shards)) - table_summary.add_row("Total Size", format_size(stats.total_size_gb)) - table_summary.add_row("Zone Balance Score", f"{stats.zone_balance_score:.1f}/100") - table_summary.add_row("Node Balance Score", f"{stats.node_balance_score:.1f}/100") + table_summary.add_row("Total Shards", str(distribution_stats.total_shards)) + table_summary.add_row("Total Size", format_size(distribution_stats.total_size_gb)) + table_summary.add_row("Zone Balance Score", f"{distribution_stats.zone_balance_score:.1f}/100") + table_summary.add_row("Node Balance Score", f"{distribution_stats.node_balance_score:.1f}/100") console.print(table_summary) + + +class ActiveShardMonitor: + """Monitor active shard checkpoint progression over time""" + + def __init__(self, client: CrateDBClient): + self.client = client + + def compare_snapshots( + self, + snapshot1: List[ActiveShardSnapshot], + snapshot2: List[ActiveShardSnapshot], + min_activity_threshold: int = 0, + ) -> List["ActiveShardActivity"]: + """Compare two snapshots and return activity data for shards present in both + + Args: + snapshot1: First snapshot (baseline) + snapshot2: Second snapshot (comparison) + min_activity_threshold: Minimum checkpoint delta to consider active (default: 0) + """ + + # Create lookup dict for snapshot1 + snapshot1_dict = {snap.shard_identifier: snap for snap in snapshot1} + + activities = [] + + for snap2 in snapshot2: + snap1 = snapshot1_dict.get(snap2.shard_identifier) + if snap1: + # Calculate local checkpoint delta + local_checkpoint_delta = snap2.local_checkpoint - snap1.local_checkpoint + time_diff = snap2.timestamp - snap1.timestamp + + # Filter based on actual activity between snapshots + if local_checkpoint_delta >= min_activity_threshold: + activity = ActiveShardActivity( + schema_name=snap2.schema_name, + table_name=snap2.table_name, + shard_id=snap2.shard_id, + node_name=snap2.node_name, + is_primary=snap2.is_primary, + partition_ident=snap2.partition_ident, + local_checkpoint_delta=local_checkpoint_delta, + snapshot1=snap1, + snapshot2=snap2, + time_diff_seconds=time_diff, + ) + activities.append(activity) + + # Sort by activity (highest checkpoint delta first) + activities.sort(key=lambda x: x.local_checkpoint_delta, reverse=True) + + return activities + + def format_activity_display( + self, activities: List["ActiveShardActivity"], show_count: int = 10, watch_mode: bool = False + ) -> str: + """Format activity data for console display""" + if not activities: + return "โœ… No active shards with significant checkpoint progression found" + + # Limit to requested count + activities = activities[:show_count] + + # Calculate observation period for context + if activities: + observation_period = activities[0].time_diff_seconds + output = [ + f"\n๐Ÿ”ฅ Most Active Shards ({len(activities)} shown, {observation_period:.0f}s observation period)" + ] + else: + output = [f"\n๐Ÿ”ฅ Most Active Shards ({len(activities)} shown, sorted by checkpoint activity)"] + + output.append("") + + # Add activity rate context + if activities: + total_activity = sum(a.local_checkpoint_delta for a in activities) + avg_rate = sum(a.activity_rate for a in activities) / len(activities) + output.append( + f"[dim]Total checkpoint activity: {total_activity:,} changes, Average rate: {avg_rate:.1f}/sec[/dim]" + ) + output.append("") + + # Create table headers + headers = ["Rank", "Schema.Table", "Shard", "Partition", "Node", "Type", "Checkpoint ฮ”", "Rate/sec", "Trend"] + + # Calculate column widths + col_widths = [len(h) for h in headers] + + # Prepare rows + rows = [] + for i, activity in enumerate(activities, 1): + # Format values + rank = str(i) + table_id = activity.table_identifier + shard_id = str(activity.shard_id) + partition = ( + activity.partition_ident[:14] + "..." + if len(activity.partition_ident) > 14 + else activity.partition_ident or "-" + ) + node = activity.node_name + shard_type = "P" if activity.is_primary else "R" + checkpoint_delta = f"{activity.local_checkpoint_delta:,}" + rate = f"{activity.activity_rate:.1f}" if activity.activity_rate >= 0.1 else "<0.1" + + # Calculate activity trend indicator + if activity.activity_rate >= 100: + trend = "๐Ÿ”ฅ HOT" + elif activity.activity_rate >= 50: + trend = "๐Ÿ“ˆ HIGH" + elif activity.activity_rate >= 10: + trend = "๐Ÿ“Š MED" + else: + trend = "๐Ÿ“‰ LOW" + + row = [rank, table_id, shard_id, partition, node, shard_type, checkpoint_delta, rate, trend] + rows.append(row) + + # Update column widths + for j, cell in enumerate(row): + col_widths[j] = max(col_widths[j], len(cell)) + + # Format table + header_row = " " + " | ".join(h.ljust(w) for h, w in zip(headers, col_widths)) + output.append(header_row) + output.append(" " + "-" * (len(header_row) - 3)) + + # Data rows + for row in rows: + data_row = " " + " | ".join(cell.ljust(w) for cell, w in zip(row, col_widths)) + output.append(data_row) + + # Only show legend and insights in non-watch mode + if not watch_mode: + output.append("") + output.append("Legend:") + output.append(" โ€ข Checkpoint ฮ”: Write operations during observation period") + output.append(" โ€ข Rate/sec: Checkpoint changes per second") + output.append(" โ€ข Partition: partition_ident (truncated if >14 chars, '-' if none)") + output.append(" โ€ข Type: P=Primary, R=Replica") + output.append(" โ€ข Trend: ๐Ÿ”ฅ HOT (โ‰ฅ100/s), ๐Ÿ“ˆ HIGH (โ‰ฅ50/s), ๐Ÿ“Š MED (โ‰ฅ10/s), ๐Ÿ“‰ LOW (<10/s)") + + # Add insights about activity patterns + if activities: + output.append("") + output.append("Insights:") + + # Count by trend + hot_count = len([a for a in activities if a.activity_rate >= 100]) + high_count = len([a for a in activities if 50 <= a.activity_rate < 100]) + med_count = len([a for a in activities if 10 <= a.activity_rate < 50]) + low_count = len([a for a in activities if a.activity_rate < 10]) + + if hot_count > 0: + output.append(f" โ€ข {hot_count} HOT shards (โ‰ฅ100 changes/sec) - consider load balancing") + if high_count > 0: + output.append(f" โ€ข {high_count} HIGH activity shards - monitor capacity") + if med_count > 0: + output.append(f" โ€ข {med_count} MEDIUM activity shards - normal operation") + if low_count > 0: + output.append(f" โ€ข {low_count} LOW activity shards - occasional writes") + + # Identify patterns + primary_activities = [a for a in activities if a.is_primary] + if len(primary_activities) == len(activities): + output.append(" โ€ข All active shards are PRIMARY - normal write pattern") + elif len(primary_activities) < len(activities) * 0.5: + output.append(" โ€ข Many REPLICA shards active - possible recovery/replication activity") + + # Node concentration + nodes = {a.node_name for a in activities} + if len(nodes) <= 2: + output.append(f" โ€ข Activity concentrated on {len(nodes)} node(s) - consider redistribution") + + return "\n".join(output) diff --git a/cratedb_toolkit/admin/xmover/analysis/table.py b/cratedb_toolkit/admin/xmover/analysis/table.py new file mode 100644 index 00000000..b3322cd9 --- /dev/null +++ b/cratedb_toolkit/admin/xmover/analysis/table.py @@ -0,0 +1,794 @@ +""" +Shard Distribution Analysis for CrateDB Clusters + +This module analyzes shard distribution across nodes to detect imbalances +and provide recommendations for optimization. +""" + +import logging +import statistics +from dataclasses import dataclass +from typing import Any, Dict, List, Optional, Tuple + +from rich import print as rprint +from rich.console import Console +from rich.table import Table + +from cratedb_toolkit.admin.xmover.model import NodeInfo +from cratedb_toolkit.admin.xmover.util.database import CrateDBClient + +logger = logging.getLogger(__name__) + + +def format_storage_size(size_gb: float) -> str: + """Format storage size with appropriate units and spacing""" + if size_gb < 0.001: + return "0 B" + elif size_gb < 1.0: + size_mb = size_gb * 1024 + return f"{size_mb:.0f} MB" + elif size_gb < 1024: + return f"{size_gb:.1f} GB" + else: + size_tb = size_gb / 1024 + return f"{size_tb:.2f} TB" + + +@dataclass +class TableDistribution: + """Represents shard distribution for a single table""" + + schema_name: str + table_name: str + total_primary_size_gb: float + node_distributions: Dict[str, Dict[str, Any]] # node_name -> metrics + + @property + def full_table_name(self) -> str: + return f"{self.schema_name}.{self.table_name}" if self.schema_name != "doc" else self.table_name + + +@dataclass +class DistributionAnomaly: + """Represents a detected distribution anomaly""" + + table: TableDistribution + anomaly_type: str + severity_score: float + impact_score: float + combined_score: float + description: str + details: Dict[str, Any] + recommendations: List[str] + + +class DistributionAnalyzer: + """Analyzes shard distribution across cluster nodes""" + + def __init__(self, client: CrateDBClient): + self.client = client + self.console = Console() + + def find_table_by_name(self, table_name: str) -> Optional[str]: + """Find table by name and resolve schema ambiguity""" + + query = """ + SELECT DISTINCT schema_name, table_name + FROM sys.shards + WHERE table_name = ? + AND schema_name NOT IN ('sys', 'information_schema', 'pg_catalog') + AND routing_state = 'STARTED' + ORDER BY schema_name \ + """ + + result = self.client.execute_query(query, [table_name]) + rows = result.get("rows", []) + + if not rows: + return None + elif len(rows) == 1: + schema, table = rows[0] + return f"{schema}.{table}" if schema != "doc" else table + else: + # Multiple schemas have this table - ask user + rprint(f"[yellow]Multiple schemas contain table '{table_name}':[/yellow]") + for i, (schema, table) in enumerate(rows, 1): + full_name = f"{schema}.{table}" if schema != "doc" else table + rprint(f" {i}. {full_name}") + + try: + choice = input("\nSelect table (enter number): ").strip() + if not choice: + rprint("[yellow]No selection made[/yellow]") + return None + idx = int(choice) - 1 + if 0 <= idx < len(rows): + schema, table = rows[idx] + return f"{schema}.{table}" if schema != "doc" else table + else: + rprint("[red]Invalid selection[/red]") + return None + except (ValueError, KeyboardInterrupt): + rprint("\n[yellow]Selection cancelled[/yellow]") + return None + + def get_table_distribution_detailed(self, table_identifier: str) -> Optional[TableDistribution]: + """Get detailed distribution data for a specific table""" + + # Parse schema and table name + if "." in table_identifier: + schema_name, table_name = table_identifier.split(".", 1) + else: + schema_name = "doc" + table_name = table_identifier + + query = """ + SELECT s.schema_name, \ + s.table_name, \ + s.node['name'] as node_name, \ + COUNT(CASE WHEN s."primary" = true THEN 1 END) as primary_shards, \ + COUNT(CASE WHEN s."primary" = false THEN 1 END) as replica_shards, \ + COUNT(*) as total_shards, \ + ROUND(SUM(s.size) / 1024.0 / 1024.0 / 1024.0, 2) as total_size_gb, \ + ROUND(SUM(CASE WHEN s."primary" = true THEN s.size ELSE 0 END) / 1024.0 / 1024.0 / 1024.0, \ + 2) as primary_size_gb, \ + ROUND(SUM(CASE WHEN s."primary" = false THEN s.size ELSE 0 END) / 1024.0 / 1024.0 / 1024.0, \ + 2) as replica_size_gb, \ + SUM(s.num_docs) as total_documents + FROM sys.shards s + WHERE s.schema_name = ? \ + AND s.table_name = ? + AND s.routing_state = 'STARTED' + GROUP BY s.schema_name, s.table_name, s.node['name'] + ORDER BY s.node['name'] \ + """ # noqa: E501 + + result = self.client.execute_query(query, [schema_name, table_name]) + rows = result.get("rows", []) + + if not rows: + return None + + # Build node distributions + node_distributions = {} + for row in rows: + node_distributions[row[2]] = { + "primary_shards": row[3], + "replica_shards": row[4], + "total_shards": row[5], + "total_size_gb": row[6], + "primary_size_gb": row[7], + "replica_size_gb": row[8], + "total_documents": row[9], + } + + # Calculate total primary size + total_primary_size = sum(node["primary_size_gb"] for node in node_distributions.values()) + + return TableDistribution( + schema_name=rows[0][0], + table_name=rows[0][1], + total_primary_size_gb=total_primary_size, + node_distributions=node_distributions, + ) + + def format_table_health_report(self, table_dist: TableDistribution) -> None: + """Format and display comprehensive table health report""" + + rprint(f"\n[bold blue]๐Ÿ“‹ Table Health Report: {table_dist.full_table_name}[/bold blue]") + rprint("=" * 80) + + # Calculate overview stats + all_nodes_info = self.client.get_nodes_info() + cluster_nodes = {node.name for node in all_nodes_info if node.name} + table_nodes = set(table_dist.node_distributions.keys()) + missing_nodes = cluster_nodes - table_nodes + + total_shards = sum(node["total_shards"] for node in table_dist.node_distributions.values()) + total_primary_shards = sum(node["primary_shards"] for node in table_dist.node_distributions.values()) + total_replica_shards = sum(node["replica_shards"] for node in table_dist.node_distributions.values()) + total_size_gb = sum(node["total_size_gb"] for node in table_dist.node_distributions.values()) + total_documents = sum(node["total_documents"] for node in table_dist.node_distributions.values()) + + # Table Overview + rprint("\n[bold]๐ŸŽฏ Overview[/bold]") + rprint(f"โ€ข Primary Data Size: {format_storage_size(table_dist.total_primary_size_gb)}") + rprint(f"โ€ข Total Size (with replicas): {format_storage_size(total_size_gb)}") + rprint(f"โ€ข Total Shards: {total_shards} ({total_primary_shards} primary + {total_replica_shards} replica)") + rprint(f"โ€ข Total Documents: {total_documents:,}") + rprint( + f"โ€ข Node Coverage: {len(table_nodes)}/{len(cluster_nodes)} nodes " + f"({len(table_nodes) / len(cluster_nodes) * 100:.0f}%)" + ) + + if missing_nodes: + rprint(f"โ€ข [yellow]Missing from nodes: {', '.join(sorted(missing_nodes))}[/yellow]") + + # Shard Distribution Table + rprint("\n[bold]๐Ÿ“Š Shard Distribution by Node[/bold]") + + shard_table = Table(show_header=True) + shard_table.add_column("Node", width=15) + shard_table.add_column("Primary", width=8, justify="right") + shard_table.add_column("Replica", width=8, justify="right") + shard_table.add_column("Total", width=8, justify="right") + shard_table.add_column("Primary Size", width=12, justify="right") + shard_table.add_column("Replica Size", width=12, justify="right") + shard_table.add_column("Total Size", width=12, justify="right") + shard_table.add_column("Documents", width=12, justify="right") + + for node_name in sorted(table_dist.node_distributions.keys()): + node_data = table_dist.node_distributions[node_name] + + # Color coding based on shard count compared to average + avg_total_shards = total_shards / len(table_dist.node_distributions) + if node_data["total_shards"] > avg_total_shards * 1.5: + node_color = "red" + elif node_data["total_shards"] < avg_total_shards * 0.5: + node_color = "yellow" + else: + node_color = "white" + + shard_table.add_row( + f"[{node_color}]{node_name}[/{node_color}]", + str(node_data["primary_shards"]), + str(node_data["replica_shards"]), + f"[{node_color}]{node_data['total_shards']}[/{node_color}]", + format_storage_size(node_data["primary_size_gb"]), + format_storage_size(node_data["replica_size_gb"]), + f"[{node_color}]{format_storage_size(node_data['total_size_gb'])}[/{node_color}]", + f"{node_data['total_documents']:,}", + ) + + self.console.print(shard_table) + + # Distribution Analysis + rprint("\n[bold]๐Ÿ” Distribution Analysis[/bold]") + + # Calculate statistics + shard_counts = [node["total_shards"] for node in table_dist.node_distributions.values()] + storage_sizes = [node["total_size_gb"] for node in table_dist.node_distributions.values()] + doc_counts = [node["total_documents"] for node in table_dist.node_distributions.values()] + + shard_cv = self.calculate_coefficient_of_variation(shard_counts) + storage_cv = self.calculate_coefficient_of_variation(storage_sizes) + doc_cv = self.calculate_coefficient_of_variation(doc_counts) + + min_shards, max_shards = min(shard_counts), max(shard_counts) + min_storage, max_storage = min(storage_sizes), max(storage_sizes) + min_docs, max_docs = min(doc_counts), max(doc_counts) + + # Shard distribution analysis + if shard_cv > 0.3: + rprint( + f"โ€ข [red]โš  Shard Imbalance:[/red] Range {min_shards}-{max_shards} shards per node (CV: {shard_cv:.2f})" + ) + else: + rprint(f"โ€ข [green]โœ“ Shard Balance:[/green] Well distributed (CV: {shard_cv:.2f})") + + # Storage distribution analysis + if storage_cv > 0.4: + rprint( + f"โ€ข [red]โš  Storage Imbalance:[/red] Range " + f"{format_storage_size(min_storage)}-{format_storage_size(max_storage)} per node (CV: {storage_cv:.2f})" + ) + else: + rprint(f"โ€ข [green]โœ“ Storage Balance:[/green] Well distributed (CV: {storage_cv:.2f})") + + # Document distribution analysis + if doc_cv > 0.5: + rprint(f"โ€ข [red]โš  Document Skew:[/red] Range {min_docs:,}-{max_docs:,} docs per node (CV: {doc_cv:.2f})") + else: + rprint(f"โ€ข [green]โœ“ Document Distribution:[/green] Well balanced (CV: {doc_cv:.2f})") + + # Node coverage analysis + coverage_ratio = len(table_nodes) / len(cluster_nodes) + if coverage_ratio < 0.7: + missing_list = ", ".join(sorted(missing_nodes)[:5]) # Show up to 5 nodes + if len(missing_nodes) > 5: + missing_list += f", +{len(missing_nodes) - 5} more" + rprint(f"โ€ข [red]โš  Limited Coverage:[/red] {coverage_ratio:.0%} cluster coverage, missing: {missing_list}") + else: + rprint(f"โ€ข [green]โœ“ Good Coverage:[/green] {coverage_ratio:.0%} of cluster nodes have this table") + + # Zone analysis if available + try: + zone_distribution = {} + for node_name, node_data in table_dist.node_distributions.items(): + # Try to get zone info for each node + node_info: Optional[NodeInfo] = next((n for n in all_nodes_info if n.name == node_name), None) + if node_info and node_info.zone: + zone = node_info.zone + if zone not in zone_distribution: + zone_distribution[zone] = {"nodes": 0, "shards": 0, "size": 0} + zone_distribution[zone]["nodes"] += 1 + zone_distribution[zone]["shards"] += node_data["total_shards"] + zone_distribution[zone]["size"] += node_data["total_size_gb"] + + if zone_distribution: + rprint("\n[bold]๐ŸŒ Zone Distribution[/bold]") + for zone in sorted(zone_distribution.keys()): + zone_data = zone_distribution[zone] + rprint( + f"โ€ข {zone}: {zone_data['nodes']} nodes, " + f"{zone_data['shards']} shards, {format_storage_size(zone_data['size'])}" + ) + + except Exception: + # Zone info not available + logger.exception("Zone info not available") + + # Health Summary + rprint("\n[bold]๐Ÿ’Š Health Summary[/bold]") + issues = [] + recommendations = [] + + if shard_cv > 0.3: + issues.append("Shard imbalance") + recommendations.append("Consider moving shards between nodes for better distribution") + + if storage_cv > 0.4: + issues.append("Storage imbalance") + recommendations.append("Rebalance shards to distribute storage more evenly") + + if doc_cv > 0.5: + issues.append("Document skew") + recommendations.append("Review routing configuration - data may not be evenly distributed") + + if coverage_ratio < 0.7: + issues.append("Limited node coverage") + recommendations.append("Consider adding replicas to improve availability and distribution") + + if not issues: + rprint("โ€ข [green]โœ… Table appears healthy with good distribution[/green]") + else: + rprint(f"โ€ข [yellow]โš  Issues found: {', '.join(issues)}[/yellow]") + rprint("\n[bold]๐Ÿ’ก Recommendations:[/bold]") + for rec in recommendations: + rprint(f" โ€ข {rec}") + + rprint() + + def get_largest_tables_distribution(self, top_n: int = 10) -> List[TableDistribution]: + """Get distribution data for the largest tables using BIGDUDES query""" + + query = """ + WITH largest_tables AS (SELECT schema_name, \ + table_name, \ + SUM(CASE WHEN "primary" = true THEN size ELSE 0 END) as total_primary_size \ + FROM sys.shards \ + WHERE schema_name NOT IN ('sys', 'information_schema', 'pg_catalog') \ + AND routing_state = 'STARTED' \ + GROUP BY schema_name, table_name \ + ORDER BY total_primary_size DESC + LIMIT ? + ) + SELECT s.schema_name, \ + s.table_name, \ + s.node['name'] as node_name, \ + COUNT(CASE WHEN s."primary" = true THEN 1 END) as primary_shards, \ + COUNT(CASE WHEN s."primary" = false THEN 1 END) as replica_shards, \ + COUNT(*) as total_shards, \ + ROUND(SUM(s.size) / 1024.0 / 1024.0 / 1024.0, 2) as total_size_gb, \ + ROUND(SUM(CASE WHEN s."primary" = true THEN s.size ELSE 0 END) / 1024.0 / 1024.0 / 1024.0, \ + 2) as primary_size_gb, \ + ROUND(SUM(CASE WHEN s."primary" = false THEN s.size ELSE 0 END) / 1024.0 / 1024.0 / 1024.0, \ + 2) as replica_size_gb, \ + SUM(s.num_docs) as total_documents + FROM sys.shards s + INNER JOIN largest_tables lt \ + ON (s.schema_name = lt.schema_name AND s.table_name = lt.table_name) + WHERE s.routing_state = 'STARTED' + GROUP BY s.schema_name, s.table_name, s.node['name'] + ORDER BY s.schema_name, s.table_name, s.node['name'] \ + """ # noqa: E501 + + result = self.client.execute_query(query, [top_n]) + + # Extract rows from the result dictionary + rows = result.get("rows", []) + + if not rows: + return [] + + # Group results by table + tables_data = {} + for row in rows: + # Ensure we have enough columns + if len(row) < 10: + continue + + table_key = f"{row[0]}.{row[1]}" + if table_key not in tables_data: + tables_data[table_key] = {"schema_name": row[0], "table_name": row[1], "nodes": {}} + + tables_data[table_key]["nodes"][row[2]] = { + "primary_shards": row[3], + "replica_shards": row[4], + "total_shards": row[5], + "total_size_gb": row[6], + "primary_size_gb": row[7], + "replica_size_gb": row[8], + "total_documents": row[9], + } + + # Calculate total primary sizes and create TableDistribution objects + distributions = [] + for table_data in tables_data.values(): + total_primary_size = sum(node["primary_size_gb"] for node in table_data["nodes"].values()) + + distribution = TableDistribution( + schema_name=table_data["schema_name"], + table_name=table_data["table_name"], + total_primary_size_gb=total_primary_size, + node_distributions=table_data["nodes"], + ) + distributions.append(distribution) + + # Sort by primary size (descending) + return sorted(distributions, key=lambda x: x.total_primary_size_gb, reverse=True) + + def calculate_coefficient_of_variation(self, values: List[float]) -> float: + """Calculate coefficient of variation (std dev / mean)""" + if not values or len(values) < 2: + return 0.0 + + mean_val = statistics.mean(values) + if mean_val == 0: + return 0.0 + + try: + std_dev = statistics.stdev(values) + return std_dev / mean_val + except statistics.StatisticsError: + return 0.0 + + def detect_shard_count_imbalance(self, table: TableDistribution) -> Optional[DistributionAnomaly]: + """Detect imbalances in shard count distribution""" + if not table.node_distributions: + return None + + # Get shard counts per node + total_shards = [node["total_shards"] for node in table.node_distributions.values()] + primary_shards = [node["primary_shards"] for node in table.node_distributions.values()] + replica_shards = [node["replica_shards"] for node in table.node_distributions.values()] + + # Calculate coefficient of variation + total_cv = self.calculate_coefficient_of_variation(total_shards) + primary_cv = self.calculate_coefficient_of_variation(primary_shards) + replica_cv = self.calculate_coefficient_of_variation(replica_shards) + + # Severity based on highest CV (higher CV = more imbalanced) + max_cv = max(total_cv, primary_cv, replica_cv) + + # Consider it an anomaly if CV > 0.3 (30% variation) + if max_cv < 0.3: + return None + + # Impact based on table size + impact_score = min(table.total_primary_size_gb / 100.0, 10.0) # Cap at 10 + severity_score = min(max_cv * 10, 10.0) # Scale to 0-10 + combined_score = impact_score * severity_score + + # Generate recommendations + recommendations = [] + min_shards = min(total_shards) + max_shards = max(total_shards) + + if max_shards - min_shards > 1: + overloaded_nodes = [ + node for node, data in table.node_distributions.items() if data["total_shards"] == max_shards + ] + underloaded_nodes = [ + node for node, data in table.node_distributions.items() if data["total_shards"] == min_shards + ] + + if overloaded_nodes and underloaded_nodes: + recommendations.append(f"Move shards from {overloaded_nodes[0]} to {underloaded_nodes[0]}") + + return DistributionAnomaly( + table=table, + anomaly_type="Shard Count Imbalance", + severity_score=severity_score, + impact_score=impact_score, + combined_score=combined_score, + description=f"Uneven shard distribution (CV: {max_cv:.2f})", + details={ + "total_cv": total_cv, + "primary_cv": primary_cv, + "replica_cv": replica_cv, + "shard_counts": {node: data["total_shards"] for node, data in table.node_distributions.items()}, + }, + recommendations=recommendations, + ) + + def detect_storage_imbalance(self, table: TableDistribution) -> Optional[DistributionAnomaly]: + """Detect imbalances in storage distribution""" + if not table.node_distributions: + return None + + storage_sizes = [node["total_size_gb"] for node in table.node_distributions.values()] + + # Skip if all sizes are very small (< 1GB total) + if sum(storage_sizes) < 1.0: + return None + + cv = self.calculate_coefficient_of_variation(storage_sizes) + + # Consider it an anomaly if CV > 0.4 (40% variation) for storage + if cv < 0.4: + return None + + impact_score = min(table.total_primary_size_gb / 50.0, 10.0) + severity_score = min(cv * 8, 10.0) + combined_score = impact_score * severity_score + + # Generate recommendations + recommendations = [] + min_size = min(storage_sizes) + max_size = max(storage_sizes) + + if max_size > min_size * 2: # If difference is > 2x + overloaded_node = None + underloaded_node = None + + for node, data in table.node_distributions.items(): + if data["total_size_gb"] == max_size: + overloaded_node = node + elif data["total_size_gb"] == min_size: + underloaded_node = node + + if overloaded_node and underloaded_node: + recommendations.append( + f"Rebalance storage from {overloaded_node} ({format_storage_size(max_size)}) " + f"to {underloaded_node} ({format_storage_size(min_size)})" + ) + + return DistributionAnomaly( + table=table, + anomaly_type="Storage Imbalance", + severity_score=severity_score, + impact_score=impact_score, + combined_score=combined_score, + description=f"Uneven storage distribution (CV: {cv:.2f})", + details={ + "storage_cv": cv, + "storage_sizes": {node: data["total_size_gb"] for node, data in table.node_distributions.items()}, + }, + recommendations=recommendations, + ) + + def detect_node_coverage_issues(self, table: TableDistribution) -> Optional[DistributionAnomaly]: + """Detect nodes with missing shard coverage""" + if not table.node_distributions: + return None + + # Get all cluster nodes + all_nodes = set() + try: + nodes_info = self.client.get_nodes_info() + all_nodes = {node.name for node in nodes_info if node.name} + except Exception: + # If we can't get node info, use nodes that have shards + all_nodes = set(table.node_distributions.keys()) + + nodes_with_shards = set(table.node_distributions.keys()) + nodes_without_shards = all_nodes - nodes_with_shards + + # Only flag as an anomaly if we have missing nodes and the table is significant + if not nodes_without_shards or table.total_primary_size_gb < 10.0: + return None + + coverage_ratio = len(nodes_with_shards) / len(all_nodes) + + # Consider it an anomaly if coverage < 70% + if coverage_ratio >= 0.7: + return None + + impact_score = min(table.total_primary_size_gb / 100.0, 10.0) + severity_score = (1 - coverage_ratio) * 10 # Higher severity for lower coverage + combined_score = impact_score * severity_score + + recommendations = [f"Consider adding replicas to nodes: {', '.join(sorted(nodes_without_shards))}"] + + return DistributionAnomaly( + table=table, + anomaly_type="Node Coverage Issue", + severity_score=severity_score, + impact_score=impact_score, + combined_score=combined_score, + description=f"Limited node coverage ({len(nodes_with_shards)}/{len(all_nodes)} nodes)", + details={ + "coverage_ratio": coverage_ratio, + "nodes_with_shards": sorted(nodes_with_shards), + "nodes_without_shards": sorted(nodes_without_shards), + }, + recommendations=recommendations, + ) + + def detect_document_imbalance(self, table: TableDistribution) -> Optional[DistributionAnomaly]: + """Detect imbalances in document distribution""" + if not table.node_distributions: + return None + + document_counts = [node["total_documents"] for node in table.node_distributions.values()] + + # Skip if total documents is very low + if sum(document_counts) < 10000: + return None + + cv = self.calculate_coefficient_of_variation(document_counts) + + # Consider it an anomaly if CV > 0.5 (50% variation) for documents + if cv < 0.5: + return None + + impact_score = min(table.total_primary_size_gb / 100.0, 10.0) + severity_score = min(cv * 6, 10.0) + combined_score = impact_score * severity_score + + # Generate recommendations + recommendations = ["Document imbalance may indicate data skew - consider reviewing shard routing"] + + min_docs = min(document_counts) + max_docs = max(document_counts) + + if max_docs > min_docs * 3: # If difference is > 3x + recommendations.append(f"Significant document skew detected ({min_docs:,} to {max_docs:,} docs per node)") + + return DistributionAnomaly( + table=table, + anomaly_type="Document Imbalance", + severity_score=severity_score, + impact_score=impact_score, + combined_score=combined_score, + description=f"Uneven document distribution (CV: {cv:.2f})", + details={ + "document_cv": cv, + "document_counts": {node: data["total_documents"] for node, data in table.node_distributions.items()}, + }, + recommendations=recommendations, + ) + + def analyze_distribution(self, top_tables: int = 10) -> Tuple[List[DistributionAnomaly], int]: + """Analyze shard distribution and return ranked anomalies""" + + # Get table distributions + distributions = self.get_largest_tables_distribution(top_tables) + + # Detect all anomalies + anomalies = [] + + for table_dist in distributions: + # Check each type of anomaly + for detector in [ + self.detect_shard_count_imbalance, + self.detect_storage_imbalance, + self.detect_node_coverage_issues, + self.detect_document_imbalance, + ]: + anomaly = detector(table_dist) + if anomaly: + anomalies.append(anomaly) + + # Sort by combined score (highest first) + return sorted(anomalies, key=lambda x: x.combined_score, reverse=True), len(distributions) + + def format_distribution_report(self, anomalies: List[DistributionAnomaly], tables_analyzed: int) -> None: + """Format and display the distribution analysis report""" + + if not anomalies: + rprint( + f"[green]โœ“ No significant shard distribution anomalies " + f"detected in top {tables_analyzed} tables![/green]" + ) + return + + # Show analysis scope + unique_tables = {anomaly.table.full_table_name for anomaly in anomalies} + rprint( + f"[blue]๐Ÿ“‹ Analyzed {tables_analyzed} largest tables, found issues in {len(unique_tables)} tables[/blue]" + ) + rprint() + + # Summary table + table = Table(title="๐ŸŽฏ Shard Distribution Anomalies", show_header=True) + table.add_column("Rank", width=4) + table.add_column("Table", min_width=20) + table.add_column("Issue Type", min_width=15) + table.add_column("Score", width=8) + table.add_column("Primary Size", width=12) + table.add_column("Description", min_width=25) + + for i, anomaly in enumerate(anomalies[:10], 1): # Top 10 + # Color coding by severity + if anomaly.combined_score >= 50: + rank_color = "red" + elif anomaly.combined_score >= 25: + rank_color = "yellow" + else: + rank_color = "blue" + + table.add_row( + f"[{rank_color}]{i}[/{rank_color}]", + anomaly.table.full_table_name, + anomaly.anomaly_type, + f"[{rank_color}]{anomaly.combined_score:.1f}[/{rank_color}]", + format_storage_size(anomaly.table.total_primary_size_gb), + anomaly.description, + ) + + self.console.print(table) + + # Detailed recommendations for top issues + if anomalies: + rprint("\n[bold]๐Ÿ”ง Top Recommendations:[/bold]") + + for i, anomaly in enumerate(anomalies[:5], 1): # Top 5 recommendations + rprint(f"\n[bold]{i}. {anomaly.table.full_table_name}[/bold] - {anomaly.anomaly_type}") + + # Show the problem analysis first + rprint(f" [yellow]๐Ÿ” Problem:[/yellow] {anomaly.description}") + + # Add specific details about what's wrong + if anomaly.anomaly_type == "Shard Count Imbalance": + if "shard_counts" in anomaly.details: + counts = anomaly.details["shard_counts"] + min_count = min(counts.values()) + max_count = max(counts.values()) + overloaded = [node for node, count in counts.items() if count == max_count] + underloaded = [node for node, count in counts.items() if count == min_count] + rprint( + f" [red]โš  Issue:[/red] {overloaded[0]} has {max_count} shards " + f"while {underloaded[0]} has only {min_count} shards" + ) + + elif anomaly.anomaly_type == "Storage Imbalance": + if "storage_sizes" in anomaly.details: + sizes = anomaly.details["storage_sizes"] + min_size = min(sizes.values()) + max_size = max(sizes.values()) + overloaded = [node for node, size in sizes.items() if size == max_size][0] + underloaded = [node for node, size in sizes.items() if size == min_size][0] + rprint( + f" [red]โš  Issue:[/red] Storage ranges from {format_storage_size(min_size)} ({underloaded}) " # noqa: E501 + f"to {format_storage_size(max_size)} ({overloaded}) - {max_size / min_size:.1f}x difference" + ) + + elif anomaly.anomaly_type == "Node Coverage Issue": + if "nodes_without_shards" in anomaly.details: + missing_nodes = anomaly.details["nodes_without_shards"] + coverage_ratio = anomaly.details["coverage_ratio"] + rprint( + f" [red]โš  Issue:[/red] Table missing from {len(missing_nodes)} nodes " + f"({coverage_ratio:.0%} cluster coverage)" + ) + ellipsis = "..." if len(missing_nodes) > 3 else "" + rprint(f" [dim] Missing from: {', '.join(missing_nodes[:3])}{ellipsis}[/dim]") + + elif anomaly.anomaly_type == "Document Imbalance": + if "document_counts" in anomaly.details: + doc_counts = anomaly.details["document_counts"] + min_docs = min(doc_counts.values()) + max_docs = max(doc_counts.values()) + ratio = max_docs / min_docs if min_docs > 0 else float("inf") + rprint( + f" [red]โš  Issue:[/red] Document counts range " + f"from {min_docs:,} to {max_docs:,} ({ratio:.1f}x difference)" + ) + + # Show recommendations + rprint(" [green]๐Ÿ’ก Solutions:[/green]") + for rec in anomaly.recommendations: + rprint(f" โ€ข {rec}") + + # Summary statistics + unique_tables = {anomaly.table.full_table_name for anomaly in anomalies} + rprint("\n[dim]๐Ÿ“Š Analysis Summary:[/dim]") + rprint(f"[dim]โ€ข Tables analyzed: {tables_analyzed}[/dim]") + rprint(f"[dim]โ€ข Tables with issues: {len(unique_tables)}[/dim]") + rprint(f"[dim]โ€ข Total anomalies found: {len(anomalies)}[/dim]") + rprint(f"[dim]โ€ข Critical issues (score >50): {len([a for a in anomalies if a.combined_score >= 50])}[/dim]") + rprint( + f"[dim]โ€ข Warning issues (score 25-50): {len([a for a in anomalies if 25 <= a.combined_score < 50])}[/dim]" + ) diff --git a/cratedb_toolkit/admin/xmover/analysis/zone.py b/cratedb_toolkit/admin/xmover/analysis/zone.py index 718d88f0..07e67803 100644 --- a/cratedb_toolkit/admin/xmover/analysis/zone.py +++ b/cratedb_toolkit/admin/xmover/analysis/zone.py @@ -135,7 +135,8 @@ def distribution_conflicts(self, shard_details: bool = False, table: Optional[st health_indicator = "โœ“" if shard_copy.routing_state == "STARTED" else "โš " console.print( f" {health_indicator} {shard_copy.shard_type} " - f"on {shard_copy.node_name} ({shard_copy.zone}) - {shard_copy.routing_state}" + f"on {shard_copy.node_name} ({shard_copy.zone}) - " + f"{shard_copy.state}/{shard_copy.routing_state}" ) console.print(analysis_table) diff --git a/cratedb_toolkit/admin/xmover/cli.py b/cratedb_toolkit/admin/xmover/cli.py index 339f9e7f..bff94f20 100644 --- a/cratedb_toolkit/admin/xmover/cli.py +++ b/cratedb_toolkit/admin/xmover/cli.py @@ -1,16 +1,25 @@ """ XMover - CrateDB Shard Analyzer and Movement Tool -Command Line Interface. +A tool for analyzing CrateDB shard distribution across +nodes and availability zones, and for generating safe +SQL commands for shard rebalancing. """ -import sys +import time from typing import Optional import click from rich.console import Console +from rich.panel import Panel -from cratedb_toolkit.admin.xmover.analysis.shard import ShardAnalyzer, ShardReporter +from cratedb_toolkit.admin.xmover.analysis.shard import ( + ActiveShardMonitor, + ShardAnalyzer, + ShardReporter, + TranslogReporter, +) +from cratedb_toolkit.admin.xmover.analysis.table import DistributionAnalyzer from cratedb_toolkit.admin.xmover.analysis.zone import ZoneReport from cratedb_toolkit.admin.xmover.model import ( ShardRelocationConstraints, @@ -26,28 +35,25 @@ console = Console() -@click.group() +@click.group(help=__doc__) @click.version_option() @click.pass_context def main(ctx): - """XMover - CrateDB Shard Analyzer and Movement Tool - - A tool for analyzing CrateDB shard distribution across nodes and availability zones, - and generating safe SQL commands for shard rebalancing. - """ ctx.ensure_object(dict) - # Test connection on startup - try: - client = CrateDBClient() - if not client.test_connection(): - console.print("[red]Error: Could not connect to CrateDB[/red]") - console.print("Please check your CRATE_CONNECTION_STRING in .env file") - sys.exit(1) - ctx.obj["client"] = client - except Exception as e: - console.print(f"[red]Error connecting to CrateDB: {e}[/red]") - sys.exit(1) + # Test connection on startup. + client = CrateDBClient() + if not client.test_connection(): + console.print("[red]Error: Failed connecting to CrateDB[/red]") + console.print( + "Please check your database connection string, " + "i.e. the CRATE_CONNECTION_STRING environment variable, " + "possibly stored within an .env file" + ) + raise click.Abort() + + # Propagate the client handle. + ctx.obj["client"] = client @main.command() @@ -167,11 +173,11 @@ def test_connection(ctx, connection_string: Optional[str]): console.print(f" โ€ข {node.name} (zone: {node.zone})") else: console.print("[red]โœ— Connection failed[/red]") - sys.exit(1) + raise click.Abort() except Exception as e: console.print(f"[red]โœ— Connection error: {e}[/red]") - sys.exit(1) + raise click.Abort() from e @main.command() @@ -185,6 +191,325 @@ def check_balance(ctx, table: Optional[str], tolerance: float): report.shard_balance(tolerance=tolerance, table=table) +@main.command() +@click.option("--top-tables", default=10, help="Number of largest tables to analyze (default: 10)") +@click.option("--table", help='Analyze specific table only (e.g., "my_table" or "schema.table")') +@click.pass_context +def shard_distribution(ctx, top_tables: int, table: Optional[str]): + """Analyze shard distribution anomalies across cluster nodes + + This command analyzes the largest tables in your cluster to detect: + โ€ข Uneven shard count distribution between nodes + โ€ข Storage imbalances across nodes + โ€ข Missing node coverage for tables + โ€ข Document count imbalances indicating data skew + + Results are ranked by impact and severity to help prioritize fixes. + + Examples: + xmover shard-distribution # Analyze top 10 tables + xmover shard-distribution --top-tables 20 # Analyze top 20 tables + xmover shard-distribution --table my_table # Detailed report for specific table + """ + try: + client = ctx.obj["client"] + analyzer = DistributionAnalyzer(client) + + if table: + # Focused table analysis mode + console.print(f"[blue]๐Ÿ” Analyzing table: {table}...[/blue]") + + # Find table (handles schema auto-detection) + table_identifier = analyzer.find_table_by_name(table) + if not table_identifier: + console.print(f"[red]โŒ Table '{table}' not found[/red]") + return + + # Get detailed distribution + table_dist = analyzer.get_table_distribution_detailed(table_identifier) + if not table_dist: + console.print(f"[red]โŒ No shard data found for table '{table_identifier}'[/red]") + return + + # Display comprehensive health report + analyzer.format_table_health_report(table_dist) + + else: + # General anomaly detection mode + console.print(f"[blue]๐Ÿ” Analyzing shard distribution for top {top_tables} tables...[/blue]") + console.print() + + # Perform analysis + anomalies, tables_analyzed = analyzer.analyze_distribution(top_tables) + + # Display results + analyzer.format_distribution_report(anomalies, tables_analyzed) + + except KeyboardInterrupt: + console.print("\n[yellow]Analysis interrupted by user[/yellow]") + except Exception as e: + console.print(f"[red]Error during distribution analysis: {e}[/red]") + import traceback + + console.print(f"[dim]{traceback.format_exc()}[/dim]") + + +@main.command() +@click.option("--count", default=10, help="Number of most active shards to show (default: 10)") +@click.option("--interval", default=30, help="Observation interval in seconds (default: 30)") +@click.option( + "--min-checkpoint-delta", + default=1000, + help="Minimum checkpoint progression between snapshots to show shard (default: 1000)", +) +@click.option("--table", "-t", help="Monitor specific table only") +@click.option("--node", "-n", help="Monitor specific node only") +@click.option("--watch", "-w", is_flag=True, help="Continuously monitor (refresh every interval)") +@click.option("--exclude-system", is_flag=True, help="Exclude system tables (gc.*, information_schema.*)") +@click.option("--min-rate", type=float, help="Minimum activity rate (changes/sec) to show") +@click.option("--show-replicas/--hide-replicas", default=True, help="Show replica shards (default: True)") +@click.pass_context +def active_shards( + ctx, + count: int, + interval: int, + min_checkpoint_delta: int, + table: Optional[str], + node: Optional[str], + watch: bool, + exclude_system: bool, + min_rate: Optional[float], + show_replicas: bool, +): + """Monitor most active shards by checkpoint progression + + This command takes two snapshots of ALL started shards separated by the + observation interval, then shows the shards with the highest checkpoint + progression (activity) between the snapshots. + + Unlike other commands, this tracks ALL shards and filters based on actual + activity between snapshots, not current state. This captures shards that + become active during the observation period. + + Useful for identifying which shards are receiving the most write activity + in your cluster and understanding write patterns. + + Examples: + xmover active-shards --count 20 --interval 60 # Top 20 over 60 seconds + xmover active-shards --watch --interval 30 # Continuous monitoring + xmover active-shards --table my_table --watch # Monitor specific table + xmover active-shards --node data-hot-1 --count 5 # Top 5 on specific node + xmover active-shards --min-checkpoint-delta 500 # Lower activity threshold + xmover active-shards --exclude-system --min-rate 50 # Skip system tables, min 50/sec + xmover active-shards --hide-replicas --count 20 # Only primary shards + """ + client = ctx.obj["client"] + monitor = ActiveShardMonitor(client) + + def get_filtered_snapshot(): + """Get snapshot with optional filtering""" + snapshots = client.get_active_shards_snapshot(min_checkpoint_delta=min_checkpoint_delta) + + # Apply table filter if specified + if table: + snapshots = [s for s in snapshots if s.table_name == table or f"{s.schema_name}.{s.table_name}" == table] + + # Apply node filter if specified + if node: + snapshots = [s for s in snapshots if s.node_name == node] + + # Exclude system tables if requested + if exclude_system: + snapshots = [ + s + for s in snapshots + if not ( + s.schema_name.startswith("gc.") + or s.schema_name == "information_schema" + or s.schema_name == "sys" + or s.table_name.endswith("_events") + or s.table_name.endswith("_log") + ) + ] + + return snapshots + + def run_single_analysis(): + """Run a single analysis cycle""" + if not watch: + console.print(Panel.fit("[bold blue]Active Shards Monitor[/bold blue]")) + + # Show configuration - simplified for watch mode + if watch: + config_parts = [f"{interval}s interval", f"threshold: {min_checkpoint_delta:,}", f"top {count}"] + if table: + config_parts.append(f"table: {table}") + if node: + config_parts.append(f"node: {node}") + console.print(f"[dim]{' | '.join(config_parts)}[/dim]") + else: + config_info = [ + f"Observation interval: {interval}s", + f"Min checkpoint delta: {min_checkpoint_delta:,}", + f"Show count: {count}", + ] + if table: + config_info.append(f"Table filter: {table}") + if node: + config_info.append(f"Node filter: {node}") + if exclude_system: + config_info.append("Excluding system tables") + if min_rate: + config_info.append(f"Min rate: {min_rate}/sec") + if not show_replicas: + config_info.append("Primary shards only") + + console.print("[dim]" + " | ".join(config_info) + "[/dim]") + console.print() + + # Take first snapshot + if not watch: + console.print("๐Ÿ“ท Taking first snapshot...") + snapshot1 = get_filtered_snapshot() + + if not snapshot1: + console.print("[yellow]No started shards found matching criteria[/yellow]") + return + + if not watch: + console.print(f" Tracking {len(snapshot1)} started shards for activity") + console.print(f"โฑ๏ธ Waiting {interval} seconds for activity...") + + # Wait for observation interval + if watch: + # Simplified countdown for watch mode + for remaining in range(interval, 0, -1): + if remaining % 5 == 0 or remaining <= 3: # Show fewer updates + console.print(f"[dim]โฑ๏ธ {remaining}s...[/dim]", end="\r") + time.sleep(1) + console.print(" " * 15, end="\r") # Clear countdown + else: + time.sleep(interval) + + # Take second snapshot + if not watch: + console.print("๐Ÿ“ท Taking second snapshot...") + snapshot2 = get_filtered_snapshot() + + if not snapshot2: + console.print("[yellow]No started shards found in second snapshot[/yellow]") + return + + if not watch: + console.print(f" Tracking {len(snapshot2)} started shards for activity") + + # Compare snapshots and show results + activities = monitor.compare_snapshots(snapshot1, snapshot2, min_activity_threshold=min_checkpoint_delta) + + # Apply additional filters + if not show_replicas: + activities = [a for a in activities if a.is_primary] + + if min_rate: + activities = [a for a in activities if a.activity_rate >= min_rate] + + if not activities: + console.print( + f"[green]โœ… No shards exceeded activity threshold ({min_checkpoint_delta:,} checkpoint changes)[/green]" + ) + if min_rate: + console.print(f"[dim]Also filtered by minimum rate: {min_rate}/sec[/dim]") + else: + if not watch: + overlap_count = len({s.shard_identifier for s in snapshot1} & {s.shard_identifier for s in snapshot2}) + console.print(f"[dim]Analyzed {overlap_count} shards present in both snapshots[/dim]") + console.print(monitor.format_activity_display(activities, show_count=count, watch_mode=watch)) + + try: + if watch: + console.print("[dim]Press Ctrl+C to stop monitoring[/dim]") + console.print() + + while True: + run_single_analysis() + if watch: + console.print(f"\n[dim]โ”โ”โ” Next update in {interval}s โ”โ”โ”[/dim]\n") + time.sleep(interval) + else: + run_single_analysis() + + except KeyboardInterrupt: + console.print("\n[yellow]Monitoring stopped by user[/yellow]") + except Exception as e: + console.print(f"[red]Error during active shards monitoring: {e}[/red]") + import traceback + + console.print(f"[dim]{traceback.format_exc()}[/dim]") + + +@main.command() +@click.option("--size-mb", default=300, help="Minimum translog uncommitted size in MB (default: 300)") +@click.option("--cancel", is_flag=True, help="Execute the cancel commands after confirmation") +@click.pass_context +def problematic_translogs(ctx, size_mb: int, cancel: bool): + """ + Find and optionally cancel shards with problematic translog sizes. + + This command identifies replica shards with large uncommitted translog sizes + that may indicate replication issues. By default, it shows the ALTER commands + that would cancel these shards. With --cancel, it executes them after confirmation. + """ + client = ctx.obj["client"] + report = TranslogReporter(client=client) + alter_commands = report.problematic_translogs(size_mb=size_mb) + + try: + if cancel and alter_commands: + console.print() + console.print("[yellow]โš ๏ธ WARNING: This will cancel the specified shards![/yellow]") + console.print("[yellow]This may cause temporary data unavailability for these shards.[/yellow]") + console.print() + + if click.confirm("Are you sure you want to execute these ALTER commands?"): + console.print() + console.print("[bold blue]Executing ALTER commands...[/bold blue]") + + executed = 0 + failed = 0 + + for i, cmd in enumerate(alter_commands, 1): + if cmd.startswith("--"): + console.print(f"[yellow]Skipping command {i} (parse error): {cmd}[/yellow]") + continue + + try: + console.print(f"[dim]({i}/{len(alter_commands)}) Executing...[/dim]") + client.execute_query(cmd) + console.print(f"[green]โœ“ Command {i} executed successfully[/green]") + executed += 1 + except Exception as e: + console.print(f"[red]โœ— Command {i} failed: {e}[/red]") + failed += 1 + + # Small delay between commands to avoid overwhelming the cluster + if i < len(alter_commands): + time.sleep(1) + + console.print() + console.print("[bold]Execution Summary:[/bold]") + console.print(f"[green]โœ“ Successful: {executed}[/green]") + if failed > 0: + console.print(f"[red]โœ— Failed: {failed}[/red]") + else: + console.print("[yellow]Operation cancelled by user[/yellow]") + + except Exception as e: + console.print(f"[red]Error analyzing problematic translogs: {e}[/red]") + import traceback + + console.print(f"[dim]{traceback.format_exc()}[/dim]") + + @main.command() @click.option("--table", "-t", help="Analyze zones for specific table only") @click.option("--show-shards/--no-show-shards", default=False, help="Show individual shard details (default: False)") @@ -266,13 +591,14 @@ def monitor_recovery( xmover monitor-recovery --watch # Continuous monitoring xmover monitor-recovery --recovery-type PEER # Only PEER recoveries """ + effective_recovery_type = None if recovery_type == "all" else recovery_type recovery_monitor = RecoveryMonitor( client=ctx.obj["client"], options=RecoveryOptions( table=table, node=node, refresh_interval=refresh_interval, - recovery_type=recovery_type, + recovery_type=effective_recovery_type, include_transitioning=include_transitioning, ), ) diff --git a/cratedb_toolkit/admin/xmover/model.py b/cratedb_toolkit/admin/xmover/model.py index 34e43f77..2f57129b 100644 --- a/cratedb_toolkit/admin/xmover/model.py +++ b/cratedb_toolkit/admin/xmover/model.py @@ -1,6 +1,7 @@ -import dataclasses from dataclasses import dataclass -from typing import Dict, Optional +from typing import Any, Dict, Optional, Tuple + +TableStatsType = Dict[Tuple[str, str], Dict[str, Any]] @dataclass @@ -57,6 +58,7 @@ class RecoveryInfo: schema_name: str table_name: str + partition_values: Optional[str] # Partition values for partitioned tables shard_id: int node_name: str node_id: str @@ -149,6 +151,12 @@ def safety_score(self) -> float: if "rebalancing" in self.reason.lower(): score += 0.2 + # Consider shard size - smaller shards are safer to move + if self.size_gb < 10: + score += 0.1 + elif self.size_gb > 100: + score -= 0.2 + # Ensure score stays in valid range return max(0.0, min(1.0, score)) @@ -165,7 +173,7 @@ class DistributionStats: node_balance_score: float # 0-100, higher is better -@dataclasses.dataclass +@dataclass class SizeCriteria: min_size: float = 40.0 max_size: float = 60.0 @@ -173,7 +181,7 @@ class SizeCriteria: source_node: Optional[str] = None -@dataclasses.dataclass +@dataclass class ShardRelocationConstraints: min_size: float = SizeCriteria().min_size max_size: float = SizeCriteria().max_size @@ -184,3 +192,67 @@ class ShardRelocationConstraints: max_recommendations: int = 10 max_disk_usage: float = 90.0 prioritize_space: bool = False + + +@dataclass +class ActiveShardSnapshot: + """Snapshot of active shard checkpoint data for tracking activity""" + + schema_name: str + table_name: str + shard_id: int + node_name: str + is_primary: bool + partition_ident: str + local_checkpoint: int + global_checkpoint: int + translog_uncommitted_bytes: int + timestamp: float # Unix timestamp when snapshot was taken + + @property + def checkpoint_delta(self) -> int: + """Current checkpoint delta (local - global)""" + return self.local_checkpoint - self.global_checkpoint + + @property + def translog_uncommitted_mb(self) -> float: + """Translog uncommitted size in MB""" + return self.translog_uncommitted_bytes / (1024 * 1024) + + @property + def shard_identifier(self) -> str: + """Unique identifier for this shard including partition""" + shard_type = "P" if self.is_primary else "R" + partition = f":{self.partition_ident}" if self.partition_ident else "" + return f"{self.schema_name}.{self.table_name}:{self.shard_id}:{self.node_name}:{shard_type}{partition}" + + +@dataclass +class ActiveShardActivity: + """Activity comparison between two snapshots of the same shard""" + + schema_name: str + table_name: str + shard_id: int + node_name: str + is_primary: bool + partition_ident: str + local_checkpoint_delta: int # Change in local checkpoint between snapshots + snapshot1: ActiveShardSnapshot + snapshot2: ActiveShardSnapshot + time_diff_seconds: float + + @property + def activity_rate(self) -> float: + """Activity rate as checkpoint changes per second""" + if self.time_diff_seconds > 0: + return self.local_checkpoint_delta / self.time_diff_seconds + return 0.0 + + @property + def shard_type(self) -> str: + return "PRIMARY" if self.is_primary else "REPLICA" + + @property + def table_identifier(self) -> str: + return f"{self.schema_name}.{self.table_name}" diff --git a/cratedb_toolkit/admin/xmover/operational/candidates.py b/cratedb_toolkit/admin/xmover/operational/candidates.py index dd7d4930..9841624b 100644 --- a/cratedb_toolkit/admin/xmover/operational/candidates.py +++ b/cratedb_toolkit/admin/xmover/operational/candidates.py @@ -14,7 +14,7 @@ class CandidateFinder: def __init__(self, analyzer: ShardAnalyzer): self.analyzer = analyzer - def movement_candidates(self, criteria: SizeCriteria, limit: int): + def movement_candidates(self, criteria: SizeCriteria, limit: int) -> int: """ Find shard candidates for movement based on size criteria @@ -23,7 +23,7 @@ def movement_candidates(self, criteria: SizeCriteria, limit: int): """ console.print( - Panel.fit(f"[bold blue]Finding Moveable Shards ({criteria.min_size}-{criteria.max_size}GB)[/bold blue]") + Panel.fit(f"[bold blue]Finding Movable Shards ({criteria.min_size}-{criteria.max_size}GB)[/bold blue]") ) if criteria.source_node: @@ -45,7 +45,7 @@ def movement_candidates(self, criteria: SizeCriteria, limit: int): console.print("[dim]Tip: Try different size ranges or remove --node filter to see all candidates[/dim]") else: console.print("[yellow]No moveable shards found in the specified size range.[/yellow]") - return + return 0 # Show limited results shown_candidates = candidates[:limit] @@ -82,3 +82,5 @@ def movement_candidates(self, criteria: SizeCriteria, limit: int): if len(candidates) > limit: console.print(f"\n[dim]... and {len(candidates) - limit} more candidates[/dim]") + + return len(candidates) diff --git a/cratedb_toolkit/admin/xmover/operational/monitor.py b/cratedb_toolkit/admin/xmover/operational/monitor.py index d88a295f..f9acf0e5 100644 --- a/cratedb_toolkit/admin/xmover/operational/monitor.py +++ b/cratedb_toolkit/admin/xmover/operational/monitor.py @@ -7,7 +7,7 @@ from cratedb_toolkit.admin.xmover.model import RecoveryInfo from cratedb_toolkit.admin.xmover.util.database import CrateDBClient -from cratedb_toolkit.admin.xmover.util.format import format_translog_info +from cratedb_toolkit.admin.xmover.util.format import format_table_display_with_partition, format_translog_info console = Console() @@ -24,9 +24,9 @@ class RecoveryOptions: class RecoveryMonitor: """Monitor shard recovery operations""" - def __init__(self, client: CrateDBClient, options: RecoveryOptions): + def __init__(self, client: CrateDBClient, options: Optional[RecoveryOptions] = None): self.client = client - self.options = options + self.options = options or RecoveryOptions() def get_cluster_recovery_status(self) -> List[RecoveryInfo]: """Get comprehensive recovery status with minimal cluster impact""" @@ -37,11 +37,15 @@ def get_cluster_recovery_status(self) -> List[RecoveryInfo]: ) # Apply recovery type filter - if self.options.recovery_type is not None: + if self.options.recovery_type is not None and self.options.recovery_type.lower() != "all": recoveries = [r for r in recoveries if r.recovery_type.upper() == self.options.recovery_type.upper()] return recoveries + def get_problematic_shards(self) -> List[Dict[str, Any]]: + """Get shards that need attention but aren't actively recovering""" + return self.client.get_problematic_shards(self.options.table, self.options.node) + def get_recovery_summary(self, recoveries: List[RecoveryInfo]) -> Dict[str, Any]: """Generate a summary of recovery operations""" @@ -126,18 +130,22 @@ def _format_recovery_table(self, recoveries: List[RecoveryInfo]) -> str: return " No recoveries of this type" # Table headers - headers = ["Table", "Shard", "Node", "Type", "Stage", "Progress", "Size(GB)", "Time(s)"] + headers = ["Table", "Shard", "Node", "Recovery", "Stage", "Progress", "Size(GB)", "Time(s)"] # Calculate column widths col_widths = [len(h) for h in headers] rows = [] for recovery in recoveries: + # Format table name with partition values if available + table_display = f"{recovery.schema_name}.{recovery.table_name}" + if recovery.partition_values: + table_display = f"{table_display} {recovery.partition_values}" row = [ - f"{recovery.schema_name}.{recovery.table_name}", + table_display, str(recovery.shard_id), recovery.node_name, - recovery.shard_type, + recovery.recovery_type, recovery.stage, f"{recovery.overall_progress:.1f}%", f"{recovery.size_gb:.1f}", @@ -178,7 +186,6 @@ def start(self, watch: bool, debug: bool = False): # Track previous state for change detection previous_recoveries: Dict[str, Dict[str, Any]] = {} - previous_timestamp = None first_run = True while True: @@ -199,10 +206,9 @@ def start(self, watch: bool, debug: bool = False): ) # Create complete table name - if recovery.schema_name == "doc": - table_display = recovery.table_name - else: - table_display = f"{recovery.schema_name}.{recovery.table_name}" + table_display = format_table_display_with_partition( + recovery.schema_name, recovery.table_name, recovery.partition_values + ) # Count active vs completed if recovery.stage == "DONE" and recovery.overall_progress >= 100.0: @@ -226,16 +232,22 @@ def start(self, watch: bool, debug: bool = False): translog_info = format_translog_info(recovery) if diff > 0: + table_display = format_table_display_with_partition( + recovery.schema_name, recovery.table_name, recovery.partition_values + ) changes.append( f"[green]๐Ÿ“ˆ[/green] {table_display} S{recovery.shard_id} " - f"{recovery.overall_progress:.1f}% (+{diff:.1f}%) " - f"{recovery.size_gb:.1f}GB{translog_info}{node_route}" + f"{recovery.recovery_type} {recovery.overall_progress:.1f}% " + f"(+{diff:.1f}%) {recovery.size_gb:.1f}GB{translog_info}{node_route}" ) else: + table_display = format_table_display_with_partition( + recovery.schema_name, recovery.table_name, recovery.partition_values + ) changes.append( f"[yellow]๐Ÿ“‰[/yellow] {table_display} S{recovery.shard_id} " - f"{recovery.overall_progress:.1f}% ({diff:.1f}%) " - f"{recovery.size_gb:.1f}GB{translog_info}{node_route}" + f"{recovery.recovery_type} {recovery.overall_progress:.1f}% " + f"({diff:.1f}%) {recovery.size_gb:.1f}GB{translog_info}{node_route}" ) elif prev["stage"] != recovery.stage: # Create node route display @@ -247,10 +259,12 @@ def start(self, watch: bool, debug: bool = False): # Add translog info translog_info = format_translog_info(recovery) - + table_display = format_table_display_with_partition( + recovery.schema_name, recovery.table_name, recovery.partition_values + ) changes.append( f"[blue]๐Ÿ”„[/blue] {table_display} S{recovery.shard_id} " - f"{prev['stage']}โ†’{recovery.stage} " + f"{recovery.recovery_type} {prev['stage']}โ†’{recovery.stage} " f"{recovery.size_gb:.1f}GB{translog_info}{node_route}" ) else: @@ -268,12 +282,15 @@ def start(self, watch: bool, debug: bool = False): node_route = f" disk โ†’ {recovery.node_name}" status_icon = "[cyan]๐Ÿ†•[/cyan]" if not first_run else "[blue]๐Ÿ“‹[/blue]" + # Add translog info translog_info = format_translog_info(recovery) - + table_display = format_table_display_with_partition( + recovery.schema_name, recovery.table_name, recovery.partition_values + ) changes.append( f"{status_icon} {table_display} S{recovery.shard_id} " - f"{recovery.stage} {recovery.overall_progress:.1f}% " + f"{recovery.recovery_type} {recovery.stage} {recovery.overall_progress:.1f}% " f"{recovery.size_gb:.1f}GB{translog_info}{node_route}" ) @@ -283,31 +300,82 @@ def start(self, watch: bool, debug: bool = False): "stage": recovery.stage, } - # Always show a status line - if not recoveries: - console.print(f"{current_time} | [green]No recoveries - cluster stable[/green]") + # Get problematic shards for comprehensive status + problematic_shards = self.get_problematic_shards() + + # Filter out shards that are already being recovered + non_recovering_shards = [] + if problematic_shards: + for shard in problematic_shards: + # Check if this shard is already in our recoveries list + is_recovering = any( + r.shard_id == shard["shard_id"] + and r.table_name == shard["table_name"] + and r.schema_name == shard["schema_name"] + for r in recoveries + ) + if not is_recovering: + non_recovering_shards.append(shard) + + # Always show a comprehensive status line + if not recoveries and not non_recovering_shards: + console.print(f"{current_time} | [green]No issues - cluster stable[/green]") + previous_recoveries.clear() + elif not recoveries and non_recovering_shards: + console.print( + f"{current_time} | [yellow]{len(non_recovering_shards)} shards " + f"need attention (not recovering)[/yellow]" + ) + # Show first few problematic shards + for shard in non_recovering_shards[:5]: + table_display = format_table_display_with_partition( + shard["schema_name"], shard["table_name"], shard.get("partition_values") + ) + primary_indicator = "P" if shard.get("primary") else "R" + console.print( + f" | [red]โš [/red] {table_display} " + f"S{shard['shard_id']}{primary_indicator} {shard['state']}" + ) + if len(non_recovering_shards) > 5: + console.print(f" | [dim]... and {len(non_recovering_shards) - 5} more[/dim]") previous_recoveries.clear() else: - # Build status message - status = "" + # Build status message for active recoveries + status_parts = [] if active_count > 0: - status = f"{active_count} active" + status_parts.append(f"{active_count} recovering") if completed_count > 0: - status += f", {completed_count} done" if status else f"{completed_count} done" + status_parts.append(f"{completed_count} done") + if non_recovering_shards: + status_parts.append(f"[yellow]{len(non_recovering_shards)} awaiting recovery[/yellow]") + + status = " | ".join(status_parts) # Show status line with changes or periodic update if changes: console.print(f"{current_time} | {status}") for change in changes: console.print(f" | {change}") + # Show some problematic shards if there are any + if non_recovering_shards and len(changes) < 3: # Don't overwhelm the output + for shard in non_recovering_shards[:2]: + table_display = format_table_display_with_partition( + shard["schema_name"], shard["table_name"], shard.get("partition_values") + ) + primary_indicator = "P" if shard.get("primary") else "R" + console.print( + f" | [red]โš [/red] {table_display} " + f"S{shard['shard_id']}{primary_indicator} {shard['state']}" + ) else: # Show periodic status even without changes if self.options.include_transitioning and completed_count > 0: console.print(f"{current_time} | {status} (transitioning)") elif active_count > 0: console.print(f"{current_time} | {status} (no changes)") + elif non_recovering_shards: + console.print(f"{current_time} | {status} (issues persist)") - previous_timestamp = current_time # noqa: F841 first_run = False time.sleep(self.options.refresh_interval) @@ -317,30 +385,63 @@ def start(self, watch: bool, debug: bool = False): # Show final summary final_recoveries = self.get_cluster_recovery_status() - if final_recoveries: - console.print("\n๐Ÿ“Š [bold]Final Recovery Summary:[/bold]") - summary = self.get_recovery_summary(final_recoveries) + final_problematic_shards = self.get_problematic_shards() + + # Filter out shards that are already being recovered + final_non_recovering_shards = [] + if final_problematic_shards: + for shard in final_problematic_shards: + is_recovering = any( + r.shard_id == shard["shard_id"] + and r.table_name == shard["table_name"] + and r.schema_name == shard["schema_name"] + for r in final_recoveries + ) + if not is_recovering: + final_non_recovering_shards.append(shard) - # Count active vs completed - active_count = len( - [r for r in final_recoveries if r.overall_progress < 100.0 or r.stage != "DONE"] - ) - completed_count = len(final_recoveries) - active_count + if final_recoveries or final_non_recovering_shards: + console.print("\n๐Ÿ“Š [bold]Final Cluster Status Summary:[/bold]") - console.print(f" Total recoveries: {summary['total_recoveries']}") - console.print(f" Active: {active_count}, Completed: {completed_count}") - console.print(f" Total size: {summary['total_size_gb']:.1f} GB") - console.print(f" Average progress: {summary['avg_progress']:.1f}%") + if final_recoveries: + summary = self.get_recovery_summary(final_recoveries) + # Count active vs completed + active_count = len( + [r for r in final_recoveries if r.overall_progress < 100.0 or r.stage != "DONE"] + ) + completed_count = len(final_recoveries) - active_count + + console.print(f" Total recoveries: {summary['total_recoveries']}") + console.print(f" Active: {active_count}, Completed: {completed_count}") + console.print(f" Total size: {summary['total_size_gb']:.1f} GB") + console.print(f" Average progress: {summary['avg_progress']:.1f}%") + + if summary["by_type"]: + console.print(" By recovery type:") + for rec_type, stats in summary["by_type"].items(): + console.print( + f" {rec_type}: {stats['count']} recoveries, " + f"{stats['avg_progress']:.1f}% avg progress" + ) - if summary["by_type"]: - console.print(" By recovery type:") - for rec_type, stats in summary["by_type"].items(): - console.print( - f" {rec_type}: {stats['count']} recoveries, " - f"{stats['avg_progress']:.1f}% avg progress" - ) + if final_non_recovering_shards: + console.print( + f" [yellow]Problematic shards needing attention: " + f"{len(final_non_recovering_shards)}[/yellow]" + ) + # Group by state for summary + by_state = {} + for shard in final_non_recovering_shards: + state = shard["state"] + if state not in by_state: + by_state[state] = 0 + by_state[state] += 1 + + for state, count in by_state.items(): + console.print(f" {state}: {count} shards") else: console.print("\n[green]โœ… No active recoveries at exit[/green]") + console.print("\n[green]โœ… Cluster stable - no issues detected[/green]") return @@ -351,20 +452,66 @@ def start(self, watch: bool, debug: bool = False): display_output = self.format_recovery_display(recoveries) console.print(display_output) - if not recoveries: + # Get problematic shards for comprehensive status + problematic_shards = self.get_problematic_shards() + + # Filter out shards that are already being recovered + non_recovering_shards = [] + if problematic_shards: + for shard in problematic_shards: + is_recovering = any( + r.shard_id == shard["shard_id"] + and r.table_name == shard["table_name"] + and r.schema_name == shard["schema_name"] + for r in recoveries + ) + if not is_recovering: + non_recovering_shards.append(shard) + + if not recoveries and not non_recovering_shards: if self.options.include_transitioning: - console.print("\n[green]โœ… No recoveries found (active or transitioning)[/green]") + console.print("\n[green]โœ… No issues found - cluster stable[/green]") else: console.print("\n[green]โœ… No active recoveries found[/green]") console.print( "[dim]๐Ÿ’ก Use --include-transitioning to see completed recoveries still transitioning[/dim]" ) + + elif not recoveries and non_recovering_shards: + console.print( + f"\n[yellow]โš ๏ธ {len(non_recovering_shards)} shards need attention (not recovering)[/yellow]" + ) + # Group by state for summary + by_state = {} + for shard in non_recovering_shards: + state = shard["state"] + if state not in by_state: + by_state[state] = 0 + by_state[state] += 1 + + for state, count in by_state.items(): + console.print(f" {state}: {count} shards") + + # Show first few examples + console.print("\nExamples:") + for shard in non_recovering_shards[:5]: + table_display = format_table_display_with_partition( + shard["schema_name"], shard["table_name"], shard.get("partition_values") + ) + primary_indicator = "P" if shard.get("primary") else "R" + console.print( + f" [red]โš [/red] {table_display} S{shard['shard_id']}{primary_indicator} {shard['state']}" + ) + + if len(non_recovering_shards) > 5: + console.print(f" [dim]... and {len(non_recovering_shards) - 5} more[/dim]") + else: - # Show summary + # Show recovery summary summary = self.get_recovery_summary(recoveries) - console.print("\n๐Ÿ“Š [bold]Recovery Summary:[/bold]") - console.print(f" Total recoveries: {summary['total_recoveries']}") - console.print(f" Total size: {summary['total_size_gb']:.1f} GB") + console.print("\n๐Ÿ“Š [bold]Cluster Status Summary:[/bold]") + console.print(f" Active recoveries: {summary['total_recoveries']}") + console.print(f" Total recovery size: {summary['total_size_gb']:.1f} GB") console.print(f" Average progress: {summary['avg_progress']:.1f}%") # Show breakdown by type @@ -376,6 +523,21 @@ def start(self, watch: bool, debug: bool = False): f"{stats['avg_progress']:.1f}% avg progress" ) + # Show problematic shards if any + if non_recovering_shards: + console.print( + f"\n [yellow]Problematic shards needing attention: {len(non_recovering_shards)}[/yellow]" + ) + by_state = {} + for shard in non_recovering_shards: + state = shard["state"] + if state not in by_state: + by_state[state] = 0 + by_state[state] += 1 + + for state, count in by_state.items(): + console.print(f" {state}: {count} shards") + console.print("\n[dim]๐Ÿ’ก Use --watch flag for continuous monitoring[/dim]") except Exception as e: diff --git a/cratedb_toolkit/admin/xmover/operational/recommend.py b/cratedb_toolkit/admin/xmover/operational/recommend.py index ab5156e6..f7f9e3ea 100644 --- a/cratedb_toolkit/admin/xmover/operational/recommend.py +++ b/cratedb_toolkit/admin/xmover/operational/recommend.py @@ -123,8 +123,8 @@ def validate(self, request: ShardRelocationRequest): console.print() console.print("[dim]# Monitor shard health after execution[/dim]") console.print( - "[dim]# Check with: SELECT * FROM sys.shards " - "WHERE table_name = '{table_name}' AND id = {shard_id};[/dim]" + "[dim]# Check with: SELECT * FROM sys.shards " # noqa: S608 + f"WHERE table_name = '{table_name}' AND id = {request.shard_id};[/dim]" ) else: console.print("[red]โœ— VALIDATION FAILED - Move not safe[/red]") @@ -323,7 +323,7 @@ def execute( rec, max_disk_usage_percent=constraints.max_disk_usage ) if not is_safe: - if "Zone conflict" in safety_msg: + if "zone conflict" in safety_msg.lower(): zone_conflicts += 1 console.print(f"-- Move {i}: SKIPPED - {safety_msg}") console.print( @@ -340,7 +340,7 @@ def execute( # Auto-execution if requested if auto_execute: - self._execute_recommendations_safely(recommendations, validate) + self._execute_recommendations_safely(constraints, recommendations, validate) if validate and safe_moves < len(recommendations): if zone_conflicts > 0: @@ -352,14 +352,16 @@ def execute( f"[yellow]Warning: Only {safe_moves} of {len(recommendations)} moves passed safety validation[/yellow]" ) - def _execute_recommendations_safely(self, recommendations, validate: bool): + def _execute_recommendations_safely(self, constraints, recommendations, validate: bool): """Execute recommendations with extensive safety measures""" # Filter to only safe recommendations safe_recommendations = [] if validate: for rec in recommendations: - is_safe, safety_msg = self.analyzer.validate_move_safety(rec, max_disk_usage_percent=95.0) + is_safe, safety_msg = self.analyzer.validate_move_safety( + rec, max_disk_usage_percent=constraints.max_disk_usage + ) if is_safe: safe_recommendations.append(rec) else: @@ -423,7 +425,8 @@ def _execute_recommendations_safely(self, recommendations, validate: bool): # Execute the SQL command result = self.client.execute_query(sql_command) - if result.get("rowcount", 0) >= 0: # Success indicator for ALTER statements + # ALTER TABLE REROUTE commands don't return rowcount, check for no error instead. + if "error" not in result: console.print(" [green]โœ… SUCCESS[/green] - Move initiated") successful_moves += 1 @@ -482,7 +485,8 @@ def _wait_for_recovery_capacity(self, max_concurrent_recoveries: int = 5): while True: # Check active recoveries (including transitioning) recoveries = recovery_monitor.get_cluster_recovery_status() - active_count = len([r for r in recoveries if r.overall_progress < 100.0 or r.stage != "DONE"]) + # Count recoveries that are actively running (not completed) + active_count = len([r for r in recoveries if r.overall_progress < 100.0]) status = f"{active_count}/{max_concurrent_recoveries}" if active_count < max_concurrent_recoveries: if wait_time > 0: diff --git a/cratedb_toolkit/admin/xmover/util/database.py b/cratedb_toolkit/admin/xmover/util/database.py index 21950ab0..d1a0e14e 100644 --- a/cratedb_toolkit/admin/xmover/util/database.py +++ b/cratedb_toolkit/admin/xmover/util/database.py @@ -10,7 +10,7 @@ import urllib3 from dotenv import load_dotenv -from cratedb_toolkit.admin.xmover.model import NodeInfo, RecoveryInfo, ShardInfo +from cratedb_toolkit.admin.xmover.model import ActiveShardSnapshot, NodeInfo, RecoveryInfo, ShardInfo logger = logging.getLogger(__name__) @@ -39,6 +39,8 @@ def __init__(self, connection_string: Optional[str] = None): if not self.connection_string.endswith("/_sql"): self.connection_string = self.connection_string.rstrip("/") + "/_sql" + self.session = requests.Session() + def execute_query(self, query: str, parameters: Optional[List] = None) -> Dict[str, Any]: """Execute a SQL query against CrateDB""" payload: Dict[str, Any] = {"stmt": query} @@ -51,11 +53,18 @@ def execute_query(self, query: str, parameters: Optional[List] = None) -> Dict[s auth = (self.username, self.password) try: - response = requests.post( + response = self.session.post( self.connection_string, json=payload, auth=auth, verify=self.ssl_verify, timeout=30 ) response.raise_for_status() - return response.json() + data = response.json() + # CrateDB may include an "error" field even with 200 OK + if isinstance(data, dict) and "error" in data and data["error"]: + # Best-effort message extraction + err = data["error"] + msg = err.get("message") if isinstance(err, dict) else str(err) + raise Exception(f"CrateDB error: {msg}") + return data except requests.exceptions.RequestException as e: raise Exception(f"Failed to execute query: {e}") from e @@ -325,6 +334,7 @@ def get_recovery_details(self, schema_name: str, table_name: str, shard_id: int) SELECT s.table_name, s.schema_name, + translate(p.values::text, ':{}', '=()') as partition_values, s.id as shard_id, s.node['name'] as node_name, s.node['id'] as node_id, @@ -335,13 +345,17 @@ def get_recovery_details(self, schema_name: str, table_name: str, shard_id: int) s."primary", s.translog_stats['size'] as translog_size FROM sys.shards s - WHERE s.table_name = ? AND s.id = ? + LEFT JOIN information_schema.table_partitions p + ON s.table_name = p.table_name + AND s.schema_name = p.table_schema + AND s.partition_ident = p.partition_ident + WHERE s.schema_name = ? AND s.table_name = ? AND s.id = ? AND (s.state = 'RECOVERING' OR s.routing_state IN ('INITIALIZING', 'RELOCATING')) ORDER BY s.schema_name LIMIT 1 """ - result = self.execute_query(query, [table_name, shard_id]) + result = self.execute_query(query, [schema_name, table_name, shard_id]) if not result.get("rows"): return None @@ -350,15 +364,16 @@ def get_recovery_details(self, schema_name: str, table_name: str, shard_id: int) return { "table_name": row[0], "schema_name": row[1], - "shard_id": row[2], - "node_name": row[3], - "node_id": row[4], - "routing_state": row[5], - "state": row[6], - "recovery": row[7], - "size": row[8], - "primary": row[9], - "translog_size": row[10] or 0, + "partition_values": row[2], + "shard_id": row[3], + "node_name": row[4], + "node_id": row[5], + "routing_state": row[6], + "state": row[7], + "recovery": row[8], + "size": row[9], + "primary": row[10], + "translog_size": row[11] or 0, } def get_all_recovering_shards( @@ -433,6 +448,7 @@ def _parse_recovery_info(self, allocation: Dict[str, Any], shard_detail: Dict[st return RecoveryInfo( schema_name=shard_detail["schema_name"], table_name=shard_detail["table_name"], + partition_values=shard_detail.get("partition_values"), shard_id=shard_detail["shard_id"], node_name=shard_detail["node_name"], node_id=shard_detail["node_id"], @@ -496,3 +512,118 @@ def _is_recovery_completed(self, recovery_info: RecoveryInfo) -> bool: and recovery_info.files_percent >= 100.0 and recovery_info.bytes_percent >= 100.0 ) + + def get_problematic_shards( + self, table_name: Optional[str] = None, node_name: Optional[str] = None + ) -> List[Dict[str, Any]]: + """Get shards that need attention but aren't actively recovering""" + + where_conditions = ["s.state != 'STARTED'"] + parameters = [] + + if table_name: + where_conditions.append("s.table_name = ?") + parameters.append(table_name) + + if node_name: + where_conditions.append("s.node['name'] = ?") + parameters.append(node_name) + + where_clause = f"WHERE {' AND '.join(where_conditions)}" + + query = f""" + SELECT + s.schema_name, + s.table_name, + translate(p.values::text, ':{{}}', '=()') as partition_values, + s.id as shard_id, + s.state, + s.routing_state, + s.node['name'] as node_name, + s.node['id'] as node_id, + s."primary" + FROM sys.shards s + LEFT JOIN information_schema.table_partitions p + ON s.table_name = p.table_name + AND s.schema_name = p.table_schema + AND s.partition_ident = p.partition_ident + {where_clause} + ORDER BY s.state, s.table_name, s.id + """ # noqa: S608 + + result = self.execute_query(query, parameters) + + problematic_shards = [] + for row in result.get("rows", []): + problematic_shards.append( + { + "schema_name": row[0] or "doc", + "table_name": row[1], + "partition_values": row[2], + "shard_id": row[3], + "state": row[4], + "routing_state": row[5], + "node_name": row[6], + "node_id": row[7], + "primary": row[8], + } + ) + + return problematic_shards + + def get_active_shards_snapshot(self, min_checkpoint_delta: int = 1000) -> List[ActiveShardSnapshot]: + """Get a snapshot of all started shards for activity monitoring + + Note: This captures ALL started shards regardless of current activity level. + The min_checkpoint_delta parameter is kept for backwards compatibility but + filtering is now done during snapshot comparison to catch shards that + become active between observations. + + Args: + min_checkpoint_delta: Kept for compatibility - filtering now done in comparison + + Returns: + List of ActiveShardSnapshot objects for all started shards + """ + import time + + query = """ + SELECT sh.schema_name, \ + sh.table_name, \ + sh.id AS shard_id, \ + sh."primary", \ + node['name'] as node_name, \ + sh.partition_ident, \ + sh.translog_stats['uncommitted_size'] AS translog_uncommitted_bytes, \ + sh.seq_no_stats['local_checkpoint'] AS local_checkpoint, \ + sh.seq_no_stats['global_checkpoint'] AS global_checkpoint + FROM sys.shards AS sh + WHERE sh.state = 'STARTED' + ORDER BY sh.schema_name, sh.table_name, sh.id, sh.node['name'] \ + """ + + try: + result = self.execute_query(query) + snapshots = [] + current_time = time.time() + + for row in result.get("rows", []): + snapshot = ActiveShardSnapshot( + schema_name=row[0], + table_name=row[1], + shard_id=row[2], + is_primary=row[3], + node_name=row[4], + partition_ident=row[5] or "", + translog_uncommitted_bytes=row[6] or 0, + local_checkpoint=row[7] or 0, + global_checkpoint=row[8] or 0, + timestamp=current_time, + ) + snapshots.append(snapshot) + + return snapshots + + except Exception as e: + logger.error(f"Error getting active shards snapshot: {e}") + return [] diff --git a/cratedb_toolkit/admin/xmover/util/error.py b/cratedb_toolkit/admin/xmover/util/error.py index 11dd5f39..22494098 100644 --- a/cratedb_toolkit/admin/xmover/util/error.py +++ b/cratedb_toolkit/admin/xmover/util/error.py @@ -1,12 +1,23 @@ -from typing import List, Optional, cast +from typing import List, Optional -from rich.console import Console +from rich import get_console from rich.panel import Panel -console = Console() +console = get_console() def explain_cratedb_error(error_message: Optional[str]): + """ + Decode and troubleshoot common CrateDB shard allocation errors. + + Parameters + ---------- + error_message: + Raw CrateDB error message. If None and interactive=True, the user is prompted + to paste the message (finish with two blank lines). + interactive: + When False, never prompt for input; return early if no message is provided. + """ console.print(Panel.fit("[bold blue]CrateDB Error Message Decoder[/bold blue]")) console.print("[dim]Helps decode and troubleshoot CrateDB shard allocation errors[/dim]") console.print() @@ -24,7 +35,7 @@ def explain_cratedb_error(error_message: Optional[str]): break error_message = "\n".join(lines) - if not error_message.strip(): + if not (error_message or "").strip(): console.print("[yellow]No error message provided[/yellow]") return @@ -96,7 +107,7 @@ def explain_cratedb_error(error_message: Optional[str]): error_lower = error_message.lower() for pattern_info in error_patterns: - if cast(str, pattern_info["pattern"]).lower() in error_lower: + if pattern_info["pattern"].lower() in error_lower: # type: ignore[attr-defined] matches.append(pattern_info) if matches: diff --git a/cratedb_toolkit/admin/xmover/util/format.py b/cratedb_toolkit/admin/xmover/util/format.py index 82c8a3d0..37b2a2cf 100644 --- a/cratedb_toolkit/admin/xmover/util/format.py +++ b/cratedb_toolkit/admin/xmover/util/format.py @@ -1,11 +1,14 @@ +TL_MIN_BYTES = 10 * 1024 * 1024 # 10MiB threshold for visibility + + def format_size(size_gb: float) -> str: """Format size in GB with appropriate precision""" - if size_gb >= 1000: - return f"{size_gb / 1000:.1f}TB" + if size_gb >= 1024: + return f"{size_gb / 1024:.1f}TB" elif size_gb >= 1: return f"{size_gb:.1f}GB" else: - return f"{size_gb * 1000:.0f}MB" + return f"{size_gb * 1024:.0f}MB" def format_percentage(value: float) -> str: @@ -22,8 +25,8 @@ def format_translog_info(recovery_info) -> str: """Format translog size information with color coding""" tl_bytes = recovery_info.translog_size_bytes - # Only show if significant (>10MB for production) - if tl_bytes < 10 * 1024 * 1024: # 10MB for production + # Only show if significant (>10MB for production), ignore others. + if tl_bytes < TL_MIN_BYTES: return "" tl_gb = recovery_info.translog_size_gb @@ -36,10 +39,20 @@ def format_translog_info(recovery_info) -> str: else: color = "green" - # Format size - if tl_gb >= 1.0: - size_str = f"{tl_gb:.1f}GB" + size_str = format_size(tl_gb) + return f" [dim]([{color}]TL:{size_str}[/{color}])[/dim]" + + +def format_table_display_with_partition(schema_name: str, table_name: str, partition_values: str = None) -> str: + """Format table display with partition values if available""" + # Create base table name + if schema_name and schema_name != "doc": + base_display = f"{schema_name}.{table_name}" else: - size_str = f"{tl_gb * 1000:.0f}MB" + base_display = table_name - return f" [dim]([{color}]TL:{size_str}[/{color}])[/dim]" + # Add partition values if available + if partition_values: + return f"{base_display} {partition_values}" + else: + return base_display diff --git a/doc/admin/xmover/handbook.md b/doc/admin/xmover/handbook.md index cf9b4abe..22428976 100644 --- a/doc/admin/xmover/handbook.md +++ b/doc/admin/xmover/handbook.md @@ -56,6 +56,19 @@ xmover recommend --execute xmover recommend --prioritize-space ``` +### Shard Distribution Analysis +This view focuses on large tables. +```bash +# Analyze distribution anomalies for top 10 largest tables +xmover shard-distribution + +# Analyze more tables +xmover shard-distribution --top-tables 20 + +# Detailed health report for specific table +xmover shard-distribution --table my_table +``` + ### Zone Analysis ```bash # Check zone balance @@ -115,11 +128,12 @@ Generates intelligent shard movement recommendations for cluster rebalancing. - `--zone-tolerance`: Zone balance tolerance percentage (default: 10) - `--min-free-space`: Minimum free space required on target nodes in GB (default: 100) - `--max-moves`: Maximum number of move recommendations (default: 10) -- `--max-disk-usage`: Maximum disk usage percentage for target nodes (default: 85) +- `--max-disk-usage`: Maximum disk usage percentage for target nodes (default: 90) - `--validate/--no-validate`: Validate move safety (default: True) - `--prioritize-space/--prioritize-zones`: Prioritize available space over zone balancing (default: False) - `--dry-run/--execute`: Show what would be done without generating SQL commands (default: True) - `--node`: Only recommend moves from this specific source node (e.g., data-hot-4) +- `--auto-execute`: Automatically execute the SQL commands (requires `--execute`, asks for confirmation) (default: False) **Examples:** ```bash @@ -231,6 +245,173 @@ xmover monitor-recovery --watch --include-transitioning - **PEER**: Copying shard data from another node (replication/relocation) - **DISK**: Rebuilding shard from local data (after restart/disk issues) + +### `problematic-translogs` +Find and optionally cancel replica shards with problematic translog sizes. + +**Options:** +- `--size-mb INTEGER`: Minimum translog uncommitted size in MB (default: 300) +- `--cancel`: Execute the cancel commands after confirmation + +**Description:** +This command identifies replica shards with large uncommitted translog sizes that may indicate replication issues. By default, it shows the ALTER commands that would cancel these shards. With `--cancel`, it executes them after confirmation. + +**Examples:** +```bash +# Show problematic shards with translog > 300MB (default) +xmover problematic-translogs + +# Show shards with translog > 500MB +xmover problematic-translogs --size-mb 500 + +# Execute cancel commands for shards > 1GB after confirmation +xmover problematic-translogs --size-mb 1000 --cancel +``` + +**Sample Output:** +``` +Found 3 shards with problematic translogs: + Problematic Replica Shards (translog > 300MB) +โ•ญโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ฌโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ฌโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ฌโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ฌโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ฌโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ•ฎ +โ”‚ Schema โ”‚ Table โ”‚ Partition โ”‚ Shard ID โ”‚ Node โ”‚ Translog MB โ”‚ +โ”œโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ผโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ผโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ผโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ผโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ผโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ค +โ”‚ TURVO โ”‚ shipmentFormFieldData โ”‚ none โ”‚ 14 โ”‚ data-hot-6 โ”‚ 7040.9 โ”‚ +โ”‚ TURVO โ”‚ shipmentFormFieldData_events โ”‚ ("sync_day"=1757376000000) โ”‚ 3 โ”‚ data-hot-2 โ”‚ 481.2 โ”‚ +โ”‚ TURVO โ”‚ orderFormFieldData โ”‚ none โ”‚ 5 โ”‚ data-hot-1 โ”‚ 469.5 โ”‚ +โ•ฐโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ดโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ดโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ดโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ดโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ดโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ•ฏ +Generated ALTER Commands: +ALTER TABLE "TURVO"."shipmentFormFieldData" REROUTE CANCEL SHARD 14 on 'data-hot-6' WITH (allow_primary=False); +ALTER TABLE "TURVO"."shipmentFormFieldData_events" partition ("sync_day"=1757376000000) REROUTE CANCEL SHARD 3 on 'data-hot-2' WITH (allow_primary=False); +ALTER TABLE "TURVO"."orderFormFieldData" REROUTE CANCEL SHARD 5 on 'data-hot-1' WITH (allow_primary=False); +Total: 3 ALTER commands generated +``` + + +### `active-shards` +Monitor the most active shards by tracking checkpoint progression over time. +This command helps identify which shards are receiving the most write activity +by measuring local checkpoint progression between two snapshots. + +**Options:** +- `--count`: Number of most active shards to show (default: 10) +- `--interval`: Observation interval in seconds (default: 30) +- `--min-checkpoint-delta`: Minimum checkpoint progression between snapshots to show shard (default: 1000) +- `--table, -t`: Monitor specific table only +- `--node, -n`: Monitor specific node only +- `--watch, -w`: Continuously monitor (refresh every interval) +- `--exclude-system`: Exclude system tables (gc.*, information_schema.*, *_events, *_log) +- `--min-rate`: Minimum activity rate (changes/sec) to show +- `--show-replicas/--hide-replicas`: Show replica shards (default: True) + +**How it works:** +1. **Takes snapshot of ALL started shards** (not just currently active ones) +2. **Waits for observation interval** (configurable, default: 30 seconds) +3. **Takes second snapshot** of all started shards +4. **Compares snapshots** to find shards with checkpoint progression โ‰ฅ threshold +5. **Shows ranked results** with activity trends and insights + +**Enhanced output features:** +- **Checkpoint visibility**: Shows actual `local_checkpoint` values (CP Start โ†’ CP End โ†’ Delta) +- **Partition awareness**: Separate tracking for partitioned tables (different partition_ident values) +- **Activity trends**: ๐Ÿ”ฅ HOT (โ‰ฅ100/s), ๐Ÿ“ˆ HIGH (โ‰ฅ50/s), ๐Ÿ“Š MED (โ‰ฅ10/s), ๐Ÿ“‰ LOW (<10/s) +- **Smart insights**: Identifies concentration patterns and load distribution (non-watch mode) +- **Flexible filtering**: Exclude system tables, set minimum rates, hide replicas +- **Context information**: Total activity, average rates, observation period +- **Clean watch mode**: Streamlined output without legend/insights for continuous monitoring + +This approach captures shards that become active during the observation period, providing a complete view of cluster write patterns and identifying hot spots. The enhanced filtering helps focus on business-critical activity patterns. + +**Sample output (single run):** +``` +๐Ÿ”ฅ Most Active Shards (3 shown, 30s observation period) +Total checkpoint activity: 190,314 changes, Average rate: 2,109.0/sec + Rank | Schema.Table | Shard | Partition | Node | Type | Checkpoint ฮ” | Rate/sec | Trend + ----------------------------------------------------------------------------------------------------------- + 1 | gc.scheduled_jobs_log | 0 | - | data-hot-8 | P | 113,744 | 3,791.5 | ๐Ÿ”ฅ HOT + 2 | TURVO.events | 0 | 04732dpl6osj8d | data-hot-0 | P | 45,837 | 1,527.9 | ๐Ÿ”ฅ HOT + 3 | doc.user_actions | 1 | 04732dpk70rj6d | data-hot-2 | P | 30,733 | 1,024.4 | ๐Ÿ”ฅ HOT +Legend: + โ€ข Checkpoint ฮ”: Write operations during observation period + โ€ข Partition: partition_ident (truncated if >14 chars, '-' if none) +Insights: + โ€ข 3 HOT shards (โ‰ฅ100 changes/sec) - consider load balancing + โ€ข All active shards are PRIMARY - normal write pattern +``` + +**Sample output (watch mode - cleaner):** +``` +30s interval | threshold: 1,000 | top 5 +๐Ÿ”ฅ Most Active Shards (3 shown, 30s observation period) +Total checkpoint activity: 190,314 changes, Average rate: 2,109.0/sec + Rank | Schema.Table | Shard | Partition | Node | Type | Checkpoint ฮ” | Rate/sec | Trend + ----------------------------------------------------------------------------------------------------------- + 1 | gc.scheduled_jobs_log | 0 | - | data-hot-8 | P | 113,744 | 3,791.5 | ๐Ÿ”ฅ HOT + 2 | TURVO.events | 0 | 04732dpl6osj8d | data-hot-0 | P | 45,837 | 1,527.9 | ๐Ÿ”ฅ HOT + 3 | doc.user_actions | 1 | 04732dpk70rj6d | data-hot-2 | P | 30,733 | 1,024.4 | ๐Ÿ”ฅ HOT +โ”โ”โ” Next update in 30s โ”โ”โ” +``` + +#### Examples +```bash +# Show top 10 most active shards over 30 seconds +xmover active-shards + +# Top 20 shards with 60-second observation period +xmover active-shards --count 20 --interval 60 + +# Continuous monitoring with 30-second intervals +xmover active-shards --watch --interval 30 + +# Monitor specific table activity +xmover active-shards --table my_table --watch + +# Monitor specific node with custom threshold +xmover active-shards --node data-hot-1 --min-checkpoint-delta 500 + +# Exclude system tables and event logs for business data focus +xmover active-shards --exclude-system --count 20 + +# Only show high-activity shards (โ‰ฅ50 changes/sec) +xmover active-shards --min-rate 50 --count 15 + +# Focus on primary shards only +xmover active-shards --hide-replicas --count 20 +``` + +#### Monitoring Active Shards and Write Patterns + +Identify which shards are receiving the most write activity: + +1. Quick snapshot of most active shards: +```bash +# Show top 10 most active shards over 30 seconds +xmover active-shards + +# Longer observation period for more accurate results +xmover active-shards --count 15 --interval 60 +``` + +2. Continuous monitoring for real-time insights: +```bash +# Continuous monitoring with 30-second intervals +xmover active-shards --watch --interval 30 + +# Monitor specific table for focused analysis +xmover active-shards --table critical_table --watch +``` + +3. Integration with rebalancing workflow: +```bash +# Identify hot shards first +xmover active-shards --count 20 --interval 60 + +# Move hot shards away from overloaded nodes +xmover recommend --table hot_table --prioritize-space --execute + +# Monitor the impact +xmover active-shards --table hot_table --watch +``` + ### `test-connection` Tests the connection to CrateDB and displays basic cluster information. diff --git a/doc/admin/xmover/index.md b/doc/admin/xmover/index.md index affa4825..d1aead79 100644 --- a/doc/admin/xmover/index.md +++ b/doc/admin/xmover/index.md @@ -11,6 +11,7 @@ SQL commands for shard rebalancing and node decommissioning. ## Features - **Cluster Analysis**: Complete overview of shard distribution across nodes and zones +- **Shard Distribution Analysis**: Detect and rank distribution anomalies across the largest tables - **Shard Movement Recommendations**: Intelligent suggestions for rebalancing with safety validation - **Recovery Monitoring**: Track ongoing shard recovery operations with progress details - **Zone Conflict Detection**: Prevents moves that would violate CrateDB's zone awareness diff --git a/doc/admin/xmover/queries.md b/doc/admin/xmover/queries.md index 27bd89e6..17af71c9 100644 --- a/doc/admin/xmover/queries.md +++ b/doc/admin/xmover/queries.md @@ -69,7 +69,7 @@ ORDER BY name; +------------+--------------------+-----------------------------------------------+ ``` -## List biggest SHARDS on a particular Nodes +## List biggest shards on a particular node ```sql select node['name'], table_name, schema_name, id, sum(size) / 1024^3 from sys.shards @@ -216,3 +216,34 @@ SELECT WHERE current_state != 'STARTED' and table_name = 'dispatchio' and shard_id = 19 ORDER BY current_state, table_name, shard_id; ``` + +## "BIGDUDES" Focuses on your **biggest storage consumers** and shows how their shards are distributed across nodes. + +```sql +WITH largest_tables AS ( + SELECT + schema_name, + table_name, + SUM(CASE WHEN "primary" = true THEN size ELSE 0 END) as total_primary_size + FROM sys.shards + WHERE schema_name NOT IN ('sys', 'information_schema', 'pg_catalog') + GROUP BY schema_name, table_name + ORDER BY total_primary_size DESC + LIMIT 10 + ) + SELECT + s.schema_name, + s.table_name, + s.node['name'] as node_name, + COUNT(CASE WHEN s."primary" = true THEN 1 END) as primary_shards, + COUNT(CASE WHEN s."primary" = false THEN 1 END) as replica_shards, + COUNT(*) as total_shards, + ROUND(SUM(s.size) / 1024.0 / 1024.0 / 1024.0, 2) as total_size_gb, + ROUND(SUM(CASE WHEN s."primary" = true THEN s.size ELSE 0 END) / 1024.0 / 1024.0 / 1024.0, 2) as primary_size_gb, + ROUND(SUM(CASE WHEN s."primary" = false THEN s.size ELSE 0 END) / 1024.0 / 1024.0 / 1024.0, 2) as replica_size_gb, + SUM(s.num_docs) as total_documents + FROM sys.shards s + INNER JOIN largest_tables lt ON (s.schema_name = lt.schema_name AND s.table_name = lt.table_name) + GROUP BY s.schema_name, s.table_name, s.node['name'] + ORDER BY s.schema_name, s.table_name, s.node['name']; +``` diff --git a/pyproject.toml b/pyproject.toml index f6614eb8..6770d234 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -331,6 +331,7 @@ lint.per-file-ignores."doc/conf.py" = [ "A001", "ERA001" ] lint.per-file-ignores."examples/*" = [ "ERA001", "F401", "T201", "T203" ] # Allow `print` and `pprint` lint.per-file-ignores."tests/*" = [ "S101" ] # Allow use of `assert`, and `print`. lint.per-file-ignores."tests/adapter/test_rockset.py" = [ "E402" ] +lint.per-file-ignores."tests/admin/*" = [ "T201" ] # Allow use of `print`. lint.per-file-ignores."tests/info/test_http.py" = [ "E402" ] [tool.pytest.ini_options] diff --git a/tests/admin/test_active_shard_monitor.py b/tests/admin/test_active_shard_monitor.py new file mode 100644 index 00000000..55268b15 --- /dev/null +++ b/tests/admin/test_active_shard_monitor.py @@ -0,0 +1,472 @@ +""" +Tests for ActiveShardMonitor functionality +""" + +import time +from unittest.mock import Mock, patch + +from cratedb_toolkit.admin.xmover.analysis.shard import ActiveShardMonitor +from cratedb_toolkit.admin.xmover.model import ActiveShardActivity, ActiveShardSnapshot +from cratedb_toolkit.admin.xmover.util.database import CrateDBClient + + +class TestActiveShardSnapshot: + """Test ActiveShardSnapshot dataclass""" + + def test_checkpoint_delta(self): + """Test checkpoint delta calculation""" + snapshot = ActiveShardSnapshot( + schema_name="test_schema", + table_name="test_table", + shard_id=1, + node_name="node1", + is_primary=True, + partition_ident="", + local_checkpoint=1500, + global_checkpoint=500, + translog_uncommitted_bytes=10485760, # 10MB + timestamp=time.time(), + ) + + assert snapshot.checkpoint_delta == 1000 + assert snapshot.translog_uncommitted_mb == 10.0 + assert snapshot.shard_identifier == "test_schema.test_table:1:node1:P" + + +class TestActiveShardActivity: + """Test ActiveShardActivity dataclass""" + + def test_activity_calculations(self): + """Test activity rate and property calculations""" + snapshot1 = ActiveShardSnapshot( + schema_name="test_schema", + table_name="test_table", + shard_id=1, + node_name="node1", + is_primary=True, + partition_ident="", + local_checkpoint=1000, + global_checkpoint=500, + translog_uncommitted_bytes=5242880, # 5MB + timestamp=100.0, + ) + + snapshot2 = ActiveShardSnapshot( + schema_name="test_schema", + table_name="test_table", + shard_id=1, + node_name="node1", + is_primary=True, + partition_ident="", + local_checkpoint=1500, + global_checkpoint=500, + translog_uncommitted_bytes=10485760, # 10MB + timestamp=130.0, # 30 seconds later + ) + + activity = ActiveShardActivity( + schema_name="test_schema", + table_name="test_table", + shard_id=1, + node_name="node1", + is_primary=True, + partition_ident="", + local_checkpoint_delta=500, + snapshot1=snapshot1, + snapshot2=snapshot2, + time_diff_seconds=30.0, + ) + + assert activity.activity_rate == 500 / 30.0 # ~16.67 changes/sec + assert activity.shard_type == "PRIMARY" + assert activity.table_identifier == "test_schema.test_table" + + +class TestCrateDBClientActiveShards: + """Test CrateDB client active shards functionality""" + + @patch.object(CrateDBClient, "execute_query") + def test_get_active_shards_snapshot_success(self, mock_execute): + """Test successful snapshot retrieval""" + mock_execute.return_value = { + "rows": [ + ["schema1", "table1", 1, True, "node1", "", 10485760, 1500, 500], + ["schema1", "table2", 2, False, "node2", "part1", 20971520, 2000, 800], + ] + } + + client = CrateDBClient("http://test") + snapshots = client.get_active_shards_snapshot(min_checkpoint_delta=1000) + + assert len(snapshots) == 2 + + # Check first snapshot + snap1 = snapshots[0] + assert snap1.schema_name == "schema1" + assert snap1.table_name == "table1" + assert snap1.shard_id == 1 + assert snap1.is_primary is True + assert snap1.node_name == "node1" + assert snap1.local_checkpoint == 1500 + assert snap1.global_checkpoint == 500 + assert snap1.checkpoint_delta == 1000 + assert snap1.translog_uncommitted_mb == 10.0 + + # Check second snapshot + snap2 = snapshots[1] + assert snap2.schema_name == "schema1" + assert snap2.table_name == "table2" + assert snap2.shard_id == 2 + assert snap2.is_primary is False + assert snap2.node_name == "node2" + assert snap2.partition_ident == "part1" + assert snap2.checkpoint_delta == 1200 + assert snap2.translog_uncommitted_mb == 20.0 + + # Verify query was called without checkpoint delta filter (new behavior) + mock_execute.assert_called_once() + args = mock_execute.call_args[0] + # No longer passes min_checkpoint_delta parameter + assert len(args) == 1 # Only the query, no parameters + + @patch.object(CrateDBClient, "execute_query") + def test_get_active_shards_snapshot_empty(self, mock_execute): + """Test snapshot retrieval with no results""" + mock_execute.return_value = {"rows": []} + + client = CrateDBClient("http://test") + snapshots = client.get_active_shards_snapshot(min_checkpoint_delta=1000) + + assert snapshots == [] + + @patch.object(CrateDBClient, "execute_query") + def test_get_active_shards_snapshot_error(self, mock_execute): + """Test snapshot retrieval with database error""" + mock_execute.side_effect = Exception("Database connection failed") + + client = CrateDBClient("http://test") + snapshots = client.get_active_shards_snapshot(min_checkpoint_delta=1000) + + assert snapshots == [] + + +class TestActiveShardMonitor: + """Test ActiveShardMonitor class""" + + def setup_method(self): + """Set up test fixtures""" + self.mock_client = Mock(spec=CrateDBClient) + self.monitor = ActiveShardMonitor(self.mock_client) + + def create_test_snapshot( + self, + schema: str, + table: str, + shard_id: int, + node: str, + is_primary: bool, + local_checkpoint: int, + timestamp: float, + ): + """Helper to create test snapshots""" + return ActiveShardSnapshot( + schema_name=schema, + table_name=table, + shard_id=shard_id, + node_name=node, + is_primary=is_primary, + partition_ident="", + local_checkpoint=local_checkpoint, + global_checkpoint=500, # Fixed for simplicity + translog_uncommitted_bytes=10485760, # 10MB + timestamp=timestamp, + ) + + def test_compare_snapshots_with_activity(self): + """Test comparing snapshots with active shards""" + # Create first snapshot + snapshot1 = [ + self.create_test_snapshot("schema1", "table1", 1, "node1", True, 1000, 100.0), + self.create_test_snapshot("schema1", "table2", 1, "node2", False, 2000, 100.0), + self.create_test_snapshot("schema1", "table3", 1, "node1", True, 3000, 100.0), + ] + + # Create second snapshot (30 seconds later with activity) + snapshot2 = [ + self.create_test_snapshot("schema1", "table1", 1, "node1", True, 1500, 130.0), # +500 + self.create_test_snapshot("schema1", "table2", 1, "node2", False, 2200, 130.0), # +200 + self.create_test_snapshot("schema1", "table3", 1, "node1", True, 3000, 130.0), # No change + self.create_test_snapshot("schema1", "table4", 1, "node3", True, 1000, 130.0), # New shard + ] + + activities = self.monitor.compare_snapshots(snapshot1, snapshot2, min_activity_threshold=1) + + # Should have 2 activities (table3 had no change, table4 is new) + assert len(activities) == 2 + + # Check activities are sorted by checkpoint delta (highest first) + assert activities[0].local_checkpoint_delta == 500 # table1 + assert activities[0].schema_name == "schema1" + assert activities[0].table_name == "table1" + + assert activities[1].local_checkpoint_delta == 200 # table2 + assert activities[1].schema_name == "schema1" + assert activities[1].table_name == "table2" + + # Check activity rate calculation + assert activities[0].activity_rate == 500 / 30.0 # ~16.67/sec + assert activities[1].activity_rate == 200 / 30.0 # ~6.67/sec + + def test_compare_snapshots_no_activity(self): + """Test comparing snapshots with no activity""" + # Create identical snapshots + snapshot1 = [ + self.create_test_snapshot("schema1", "table1", 1, "node1", True, 1000, 100.0), + ] + + snapshot2 = [ + self.create_test_snapshot("schema1", "table1", 1, "node1", True, 1000, 130.0), # No change + ] + + activities = self.monitor.compare_snapshots(snapshot1, snapshot2, min_activity_threshold=1) + + assert activities == [] + + def test_compare_snapshots_no_overlap(self): + """Test comparing snapshots with no overlapping shards""" + snapshot1 = [ + self.create_test_snapshot("schema1", "table1", 1, "node1", True, 1000, 100.0), + ] + + snapshot2 = [ + self.create_test_snapshot("schema1", "table2", 1, "node2", True, 1500, 130.0), # Different shard + ] + + activities = self.monitor.compare_snapshots(snapshot1, snapshot2, min_activity_threshold=1) + + assert activities == [] + + def test_format_activity_display_with_activities(self): + """Test formatting activity display with data""" + # Create test activities + snapshot1 = self.create_test_snapshot("schema1", "table1", 1, "node1", True, 1000, 100.0) + snapshot2 = self.create_test_snapshot("schema1", "table1", 1, "node1", True, 1500, 130.0) + + activity = ActiveShardActivity( + schema_name="schema1", + table_name="table1", + shard_id=1, + node_name="node1", + is_primary=True, + partition_ident="", + local_checkpoint_delta=500, + snapshot1=snapshot1, + snapshot2=snapshot2, + time_diff_seconds=30.0, + ) + + display = self.monitor.format_activity_display([activity], show_count=10, watch_mode=False) + + # Check that output contains expected elements + assert "Most Active Shards" in display + assert "schema1.table1" in display + assert "500" in display # checkpoint delta + assert "16.7" in display # activity rate + assert "P" in display # primary indicator + assert "Legend:" in display + assert "Trend:" in display # new trend column explanation + assert "Partition:" in display # new partition column explanation + + def test_format_activity_display_empty(self): + """Test formatting activity display with no data""" + display = self.monitor.format_activity_display([], show_count=10, watch_mode=False) + + assert "No active shards with significant checkpoint progression found" in display + + def test_format_activity_display_count_limit(self): + """Test that display respects show_count limit""" + # Create multiple activities + activities = [] + for i in range(15): + snapshot1 = self.create_test_snapshot("schema1", f"table{i}", 1, "node1", True, 1000, 100.0) + snapshot2 = self.create_test_snapshot("schema1", f"table{i}", 1, "node1", True, 1000 + (i + 1) * 100, 130.0) + + activity = ActiveShardActivity( + schema_name="schema1", + table_name=f"table{i}", + shard_id=1, + node_name="node1", + is_primary=True, + partition_ident="", + local_checkpoint_delta=(i + 1) * 100, + snapshot1=snapshot1, + snapshot2=snapshot2, + time_diff_seconds=30.0, + ) + activities.append(activity) + + # Sort activities by checkpoint delta (highest first) - same as compare_snapshots does + activities.sort(key=lambda x: x.local_checkpoint_delta, reverse=True) + + # Should only show top 5 + display = self.monitor.format_activity_display(activities, show_count=5, watch_mode=False) + + # Count number of table entries in display + table_count = display.count("schema1.table") + assert table_count == 5 # Should only show 5 entries + + # Should show highest activity first (table14 has highest checkpoint delta) + assert "schema1.table14" in display + + def test_compare_snapshots_with_activity_threshold(self): + """Test filtering activities by minimum threshold""" + # Create snapshots with various activity levels + snapshot1 = [ + self.create_test_snapshot("schema1", "table1", 1, "node1", True, 1000, 100.0), # Will have +2000 delta + self.create_test_snapshot("schema1", "table2", 1, "node2", False, 2000, 100.0), # Will have +500 delta + self.create_test_snapshot("schema1", "table3", 1, "node1", True, 3000, 100.0), # Will have +100 delta + ] + + snapshot2 = [ + self.create_test_snapshot("schema1", "table1", 1, "node1", True, 3000, 130.0), # +2000 delta + self.create_test_snapshot("schema1", "table2", 1, "node2", False, 2500, 130.0), # +500 delta + self.create_test_snapshot("schema1", "table3", 1, "node1", True, 3100, 130.0), # +100 delta + ] + + # Test with threshold of 1000 - should only show table1 (2000 delta) + activities_high_threshold = self.monitor.compare_snapshots(snapshot1, snapshot2, min_activity_threshold=1000) + assert len(activities_high_threshold) == 1 + assert activities_high_threshold[0].table_name == "table1" + assert activities_high_threshold[0].local_checkpoint_delta == 2000 + + # Test with threshold of 200 - should show table1 and table2 + activities_medium_threshold = self.monitor.compare_snapshots(snapshot1, snapshot2, min_activity_threshold=200) + assert len(activities_medium_threshold) == 2 + assert activities_medium_threshold[0].local_checkpoint_delta == 2000 # table1 first (highest) + assert activities_medium_threshold[1].local_checkpoint_delta == 500 # table2 second + + # Test with threshold of 0 - should show all three + activities_low_threshold = self.monitor.compare_snapshots(snapshot1, snapshot2, min_activity_threshold=0) + assert len(activities_low_threshold) == 3 + assert activities_low_threshold[0].local_checkpoint_delta == 2000 # Sorted by activity + assert activities_low_threshold[1].local_checkpoint_delta == 500 + assert activities_low_threshold[2].local_checkpoint_delta == 100 + + def test_primary_replica_separation(self): + """Test that primary and replica shards are tracked separately""" + # Create snapshots with same table/shard but different primary/replica + snapshot1 = [ + # Primary shard + self.create_test_snapshot("gc", "scheduled_jobs_log", 0, "data-hot-8", True, 15876, 100.0), + # Replica shard (same table/shard/node but different type) + self.create_test_snapshot("gc", "scheduled_jobs_log", 0, "data-hot-8", False, 129434, 100.0), + ] + + snapshot2 = [ + # Primary shard progresses normally + self.create_test_snapshot("gc", "scheduled_jobs_log", 0, "data-hot-8", True, 16000, 130.0), # +124 delta + # Replica shard progresses normally + self.create_test_snapshot("gc", "scheduled_jobs_log", 0, "data-hot-8", False, 129500, 130.0), # +66 delta + ] + + activities = self.monitor.compare_snapshots(snapshot1, snapshot2, min_activity_threshold=1) + + # Should have 2 separate activities (primary and replica tracked separately) + assert len(activities) == 2 + + # Find primary and replica activities + primary_activity = next(a for a in activities if a.is_primary) + replica_activity = next(a for a in activities if not a.is_primary) + + # Verify deltas are calculated correctly for each type + assert primary_activity.local_checkpoint_delta == 124 # 16000 - 15876 + assert replica_activity.local_checkpoint_delta == 66 # 129500 - 129434 + + # Verify they have different shard identifiers + assert primary_activity.snapshot1.shard_identifier != replica_activity.snapshot1.shard_identifier + assert "data-hot-8:P" in primary_activity.snapshot1.shard_identifier + assert "data-hot-8:R" in replica_activity.snapshot1.shard_identifier + + # This test prevents the bug where we mixed primary CP End with replica CP Start + # which created fake deltas like 129434 - 15876 = 113558 + + def test_partition_separation(self): + """Test that partitions within the same table/shard are tracked separately""" + # Create snapshots with same table/shard but different partitions + snapshot1 = [ + # Partition 1 + self.create_test_snapshot("TURVO", "appointmentFormFieldData_events", 0, "data-hot-8", True, 32684, 100.0), + # Partition 2 (same table/shard/node/type but different partition) + self.create_test_snapshot("TURVO", "appointmentFormFieldData_events", 0, "data-hot-8", True, 54289, 100.0), + ] + + # Modify partition_ident for the snapshots to simulate different partitions + snapshot1[0].partition_ident = "04732dpl6osj8d1g60o30c1g" + snapshot1[1].partition_ident = "04732dpl6os3adpm60o30c1g" + + snapshot2 = [ + # Partition 1 progresses + self.create_test_snapshot("TURVO", "appointmentFormFieldData_events", 0, "data-hot-8", True, 32800, 130.0), + # +116 delta + # Partition 2 progresses + self.create_test_snapshot("TURVO", "appointmentFormFieldData_events", 0, "data-hot-8", True, 54400, 130.0), + # +111 delta + ] + + # Set partition_ident for second snapshot + snapshot2[0].partition_ident = "04732dpl6osj8d1g60o30c1g" + snapshot2[1].partition_ident = "04732dpl6os3adpm60o30c1g" + + activities = self.monitor.compare_snapshots(snapshot1, snapshot2, min_activity_threshold=1) + + # Should have 2 separate activities (partitions tracked separately) + assert len(activities) == 2 + + # Verify deltas are calculated correctly for each partition + partition1_activity = next(a for a in activities if "04732dpl6osj8d1g60o30c1g" in a.snapshot1.shard_identifier) + partition2_activity = next(a for a in activities if "04732dpl6os3adpm60o30c1g" in a.snapshot1.shard_identifier) + + assert partition1_activity.local_checkpoint_delta == 116 # 32800 - 32684 + assert partition2_activity.local_checkpoint_delta == 111 # 54400 - 54289 + + # Verify they have different shard identifiers due to partition + assert partition1_activity.snapshot1.shard_identifier != partition2_activity.snapshot1.shard_identifier + assert ":04732dpl6osj8d1g60o30c1g" in partition1_activity.snapshot1.shard_identifier + assert ":04732dpl6os3adpm60o30c1g" in partition2_activity.snapshot1.shard_identifier + + # This test prevents mixing partitions which would create fake activity measurements + + def test_format_activity_display_watch_mode(self): + """Test that watch mode excludes legend and insights""" + snapshot1 = self.create_test_snapshot("schema1", "table1", 1, "node1", True, 1000, 100.0) + snapshot2 = self.create_test_snapshot("schema1", "table1", 1, "node1", True, 1500, 130.0) + + activity = ActiveShardActivity( + schema_name="schema1", + table_name="table1", + shard_id=1, + node_name="node1", + is_primary=True, + partition_ident="", + local_checkpoint_delta=500, + snapshot1=snapshot1, + snapshot2=snapshot2, + time_diff_seconds=30.0, + ) + + # Test non-watch mode (should include legend and insights) + normal_display = self.monitor.format_activity_display([activity], show_count=10, watch_mode=False) + assert "Legend:" in normal_display + assert "Insights:" in normal_display + assert "Checkpoint ฮ”:" in normal_display + + # Test watch mode (should exclude legend and insights) + watch_display = self.monitor.format_activity_display([activity], show_count=10, watch_mode=True) + assert "Legend:" not in watch_display + assert "Insights:" not in watch_display + assert "Checkpoint ฮ”" in watch_display # Core data should still be present + + # But should still contain the core data + assert "Most Active Shards" in watch_display + assert "schema1.table1" in watch_display + assert "500" in watch_display # checkpoint delta diff --git a/tests/admin/test_cli.py b/tests/admin/test_cli.py index 60e8d810..c90086bb 100644 --- a/tests/admin/test_cli.py +++ b/tests/admin/test_cli.py @@ -7,12 +7,15 @@ @pytest.mark.parametrize( "subcommand", [ + "active-shards", "analyze", "check-balance", "explain-error", "find-candidates", "monitor-recovery", + "problematic-translogs", "recommend", + "shard-distribution", "test-connection", "zone-analysis", ], diff --git a/tests/admin/test_distribution_analyzer.py b/tests/admin/test_distribution_analyzer.py new file mode 100644 index 00000000..92b4f580 --- /dev/null +++ b/tests/admin/test_distribution_analyzer.py @@ -0,0 +1,338 @@ +""" +Tests for distribution analyzer functionality +""" + +from unittest.mock import Mock, patch + +from cratedb_toolkit.admin.xmover.analysis.table import DistributionAnalyzer, DistributionAnomaly, TableDistribution +from cratedb_toolkit.admin.xmover.model import NodeInfo +from cratedb_toolkit.admin.xmover.util.database import CrateDBClient + + +class TestDistributionAnalyzer: + def setup_method(self): + """Set up test fixtures""" + self.mock_client = Mock(spec=CrateDBClient) + self.analyzer = DistributionAnalyzer(self.mock_client) + + def test_coefficient_of_variation_calculation(self): + """Test CV calculation with different scenarios""" + + # Normal case + values = [10, 12, 8, 14, 6] + cv = self.analyzer.calculate_coefficient_of_variation(values) + assert cv > 0 + + # All equal values (should return 0) + equal_values = [10, 10, 10, 10] + cv_equal = self.analyzer.calculate_coefficient_of_variation(equal_values) + assert cv_equal == 0.0 + + # Empty list + empty_values = [] + cv_empty = self.analyzer.calculate_coefficient_of_variation(empty_values) + assert cv_empty == 0.0 + + # Single value + single_value = [10] + cv_single = self.analyzer.calculate_coefficient_of_variation(single_value) + assert cv_single == 0.0 + + def test_get_largest_tables_distribution(self): + """Test fetching table distribution data""" + + # Mock query results + mock_results = { + "rows": [ + # schema, table, node, primary_shards, replica_shards, total_shards, total_size, primary_size, replica_size, docs # noqa: E501, ERA001 + ["doc", "large_table", "node1", 5, 2, 7, 100.5, 80.2, 20.3, 1000000], + ["doc", "large_table", "node2", 4, 3, 7, 95.1, 75.8, 19.3, 950000], + ["doc", "large_table", "node3", 6, 1, 7, 110.2, 85.9, 24.3, 1100000], + ["custom", "another_table", "node1", 3, 2, 5, 50.1, 40.2, 9.9, 500000], + ["custom", "another_table", "node2", 2, 3, 5, 45.8, 35.1, 10.7, 480000], + ] + } + + self.mock_client.execute_query.return_value = mock_results + + distributions = self.analyzer.get_largest_tables_distribution(top_n=10) + + # Verify query was called with correct parameters + self.mock_client.execute_query.assert_called_once() + call_args = self.mock_client.execute_query.call_args + assert call_args[0][1] == [10] # top_n parameter + + # Verify we got the expected number of tables + assert len(distributions) == 2 + + # Verify table data structure + large_table = next(d for d in distributions if d.table_name == "large_table") + assert large_table.schema_name == "doc" + assert large_table.full_table_name == "large_table" # Should omit 'doc' schema + assert len(large_table.node_distributions) == 3 + + another_table = next(d for d in distributions if d.table_name == "another_table") + assert another_table.schema_name == "custom" + assert another_table.full_table_name == "custom.another_table" + assert len(another_table.node_distributions) == 2 + + # Verify sorting by primary size (descending) + assert distributions[0].total_primary_size_gb >= distributions[1].total_primary_size_gb + + def test_detect_shard_count_imbalance(self): + """Test shard count imbalance detection""" + + # Create test table with imbalanced shard distribution + imbalanced_table = TableDistribution( + schema_name="doc", + table_name="imbalanced_table", + total_primary_size_gb=500.0, + node_distributions={ + "node1": {"total_shards": 10, "primary_shards": 5, "replica_shards": 5}, + "node2": {"total_shards": 15, "primary_shards": 8, "replica_shards": 7}, + "node3": {"total_shards": 5, "primary_shards": 2, "replica_shards": 3}, + }, + ) + + anomaly = self.analyzer.detect_shard_count_imbalance(imbalanced_table) + + assert anomaly is not None + assert anomaly.anomaly_type == "Shard Count Imbalance" + assert anomaly.combined_score > 0 + assert len(anomaly.recommendations) > 0 + + # Create balanced table (should not detect anomaly) + balanced_table = TableDistribution( + schema_name="doc", + table_name="balanced_table", + total_primary_size_gb=100.0, + node_distributions={ + "node1": {"total_shards": 8, "primary_shards": 4, "replica_shards": 4}, + "node2": {"total_shards": 8, "primary_shards": 4, "replica_shards": 4}, + "node3": {"total_shards": 8, "primary_shards": 4, "replica_shards": 4}, + }, + ) + + no_anomaly = self.analyzer.detect_shard_count_imbalance(balanced_table) + assert no_anomaly is None + + def test_detect_storage_imbalance(self): + """Test storage imbalance detection""" + + # Create test table with storage imbalance + storage_imbalanced_table = TableDistribution( + schema_name="doc", + table_name="storage_imbalanced", + total_primary_size_gb=300.0, + node_distributions={ + "node1": {"total_size_gb": 150.0, "primary_size_gb": 100.0, "replica_size_gb": 50.0}, + "node2": {"total_size_gb": 50.0, "primary_size_gb": 30.0, "replica_size_gb": 20.0}, + "node3": {"total_size_gb": 100.0, "primary_size_gb": 70.0, "replica_size_gb": 30.0}, + }, + ) + + anomaly = self.analyzer.detect_storage_imbalance(storage_imbalanced_table) + + assert anomaly is not None + assert anomaly.anomaly_type == "Storage Imbalance" + assert anomaly.combined_score > 0 + + # Small table (should be ignored) + small_table = TableDistribution( + schema_name="doc", + table_name="small_table", + total_primary_size_gb=0.1, + node_distributions={ + "node1": {"total_size_gb": 0.5, "primary_size_gb": 0.05, "replica_size_gb": 0.05}, + "node2": {"total_size_gb": 0.1, "primary_size_gb": 0.03, "replica_size_gb": 0.02}, + }, + ) + + no_anomaly = self.analyzer.detect_storage_imbalance(small_table) + assert no_anomaly is None + + def test_detect_node_coverage_issues(self): + """Test node coverage issue detection""" + + # Mock nodes_info to simulate cluster with 4 nodes + mock_nodes = [ + NodeInfo( + id="node1", + name="node1", + zone=None, + heap_used=None, + heap_max=None, + fs_total=None, + fs_used=None, + fs_available=None, + ), + NodeInfo( + id="node2", + name="node2", + zone=None, + heap_used=None, + heap_max=None, + fs_total=None, + fs_used=None, + fs_available=None, + ), + NodeInfo( + id="node3", + name="node3", + zone=None, + heap_used=None, + heap_max=None, + fs_total=None, + fs_used=None, + fs_available=None, + ), + NodeInfo( + id="node4", + name="node4", + zone=None, + heap_used=None, + heap_max=None, + fs_total=None, + fs_used=None, + fs_available=None, + ), + ] + self.mock_client.get_nodes_info.return_value = mock_nodes + + # Table with limited coverage (only on 2 out of 4 nodes) + limited_coverage_table = TableDistribution( + schema_name="doc", + table_name="limited_coverage", + total_primary_size_gb=100.0, # Significant size + node_distributions={ + "node1": {"total_shards": 10, "primary_shards": 5, "replica_shards": 5}, + "node2": {"total_shards": 10, "primary_shards": 5, "replica_shards": 5}, + # node3 and node4 missing + }, + ) + + anomaly = self.analyzer.detect_node_coverage_issues(limited_coverage_table) + + assert anomaly is not None + assert anomaly.anomaly_type == "Node Coverage Issue" + assert "node3" in anomaly.details["nodes_without_shards"] + assert "node4" in anomaly.details["nodes_without_shards"] + assert len(anomaly.recommendations) > 0 + + def test_detect_document_imbalance(self): + """Test document imbalance detection""" + + # Table with document imbalance + doc_imbalanced_table = TableDistribution( + schema_name="doc", + table_name="doc_imbalanced", + total_primary_size_gb=200.0, + node_distributions={ + "node1": {"total_documents": 1000000}, # 1M docs + "node2": {"total_documents": 500000}, # 500K docs + "node3": {"total_documents": 100000}, # 100K docs (5x imbalance) + }, + ) + + anomaly = self.analyzer.detect_document_imbalance(doc_imbalanced_table) + + assert anomaly is not None + assert anomaly.anomaly_type == "Document Imbalance" + assert "data skew" in anomaly.recommendations[0].lower() + + # Table with very few documents (should be ignored) + low_doc_table = TableDistribution( + schema_name="doc", + table_name="low_docs", + total_primary_size_gb=100.0, + node_distributions={ + "node1": {"total_documents": 1000}, + "node2": {"total_documents": 500}, + }, + ) + + no_anomaly = self.analyzer.detect_document_imbalance(low_doc_table) + assert no_anomaly is None + + def test_analyze_distribution_integration(self): + """Test the full analysis workflow""" + + # Mock the get_largest_tables_distribution method + mock_table = TableDistribution( + schema_name="doc", + table_name="test_table", + total_primary_size_gb=500.0, + node_distributions={ + "node1": { + "total_shards": 15, + "primary_shards": 8, + "replica_shards": 7, + "total_size_gb": 200.0, + "primary_size_gb": 120.0, + "replica_size_gb": 80.0, + "total_documents": 2000000, + }, + "node2": { + "total_shards": 8, + "primary_shards": 4, + "replica_shards": 4, + "total_size_gb": 100.0, + "primary_size_gb": 60.0, + "replica_size_gb": 40.0, + "total_documents": 1000000, + }, + "node3": { + "total_shards": 5, + "primary_shards": 3, + "replica_shards": 2, + "total_size_gb": 50.0, + "primary_size_gb": 30.0, + "replica_size_gb": 20.0, + "total_documents": 500000, + }, + }, + ) + + with patch.object(self.analyzer, "get_largest_tables_distribution", return_value=[mock_table]): + anomalies, tables_analyzed = self.analyzer.analyze_distribution(top_tables=10) + + # Should detect multiple types of anomalies + assert len(anomalies) > 0 + assert tables_analyzed == 1 # We provided 1 mock table + + # Anomalies should be sorted by combined score (descending) + if len(anomalies) > 1: + for i in range(len(anomalies) - 1): + assert anomalies[i].combined_score >= anomalies[i + 1].combined_score + + # Each anomaly should have required fields + for anomaly in anomalies: + assert anomaly.table is not None + assert anomaly.anomaly_type is not None + assert anomaly.combined_score >= 0 + assert isinstance(anomaly.recommendations, list) + + def test_format_distribution_report_no_anomalies(self): + """Test report formatting when no anomalies found""" + + # This should not raise an exception + with patch("builtins.print"): # Mock print to avoid console output during tests + self.analyzer.format_distribution_report([], 5) + + def test_format_distribution_report_with_anomalies(self): + """Test report formatting with anomalies""" + + mock_anomaly = DistributionAnomaly( + table=TableDistribution("doc", "test_table", 100.0, {}), + anomaly_type="Test Anomaly", + severity_score=7.5, + impact_score=8.0, + combined_score=60.0, + description="Test description", + details={}, + recommendations=["Test recommendation"], + ) + + # This should not raise an exception + with patch("builtins.print"): # Mock print to avoid console output during tests + self.analyzer.format_distribution_report([mock_anomaly], 3) diff --git a/tests/admin/test_problematic_translogs.py b/tests/admin/test_problematic_translogs.py new file mode 100644 index 00000000..f63fd4cb --- /dev/null +++ b/tests/admin/test_problematic_translogs.py @@ -0,0 +1,213 @@ +""" +Tests for problematic translogs functionality. +""" + +from unittest.mock import Mock, patch + +from click.testing import CliRunner + +from cratedb_toolkit.admin.xmover.cli import main as cli +from cratedb_toolkit.admin.xmover.util.database import CrateDBClient + + +class TestXMoverProblematicTranslogs: + def setup_method(self): + """Set up test fixtures""" + self.runner = CliRunner() + self.mock_client = Mock(spec=CrateDBClient) + + def test_no_problematic_shards(self): + """Test when no shards meet the criteria""" + self.mock_client.execute_query.return_value = {"rows": []} + self.mock_client.test_connection.return_value = True + + with patch("cratedb_toolkit.admin.xmover.cli.CrateDBClient", return_value=self.mock_client): + result = self.runner.invoke(cli, ["problematic-translogs", "--size-mb", "300"], catch_exceptions=False) + + assert result.exit_code == 0, result.output + assert "No replica shards found" in result.output + assert "300MB" in result.output + + def test_non_partitioned_table_command_generation(self): + """Test ALTER command generation for non-partitioned tables""" + mock_rows = [ + ["TURVO", "shipmentFormFieldData", None, 14, "data-hot-6", 7011.8], + ["TURVO", "orderFormFieldData", "NULL", 5, "data-hot-1", 469.5], + ] + self.mock_client.execute_query.return_value = {"rows": mock_rows} + self.mock_client.test_connection.return_value = True + + with patch("cratedb_toolkit.admin.xmover.cli.CrateDBClient", return_value=self.mock_client): + result = self.runner.invoke(cli, ["problematic-translogs", "--size-mb", "300"]) + + assert result.exit_code == 0, result.output + assert "Found 2 shards with problematic translogs" in result.output + # Check that the query results table is shown + assert "Problematic Replica Shards" in result.output + assert "Generated ALTER Commands:" in result.output + # Check that key parts of the ALTER commands are present (Rich may wrap lines) + assert 'ALTER TABLE "TURVO"."shipmentFormFieldData"' in result.output + assert "REROUTE CANCEL SHARD 14" in result.output + assert "data-hot-6" in result.output + assert 'ALTER TABLE "TURVO"."orderFormFieldData"' in result.output + assert "REROUTE CANCEL SHARD 5" in result.output + assert "data-hot-1" in result.output + assert "Total: 2 ALTER commands generated" in result.output + + def test_partitioned_table_command_generation(self): + """Test ALTER command generation for partitioned tables""" + mock_rows = [ + ["TURVO", "shipmentFormFieldData_events", '("sync_day"=1757376000000)', 3, "data-hot-2", 481.2], + ] + self.mock_client.execute_query.return_value = {"rows": mock_rows} + self.mock_client.test_connection.return_value = True + + with patch("cratedb_toolkit.admin.xmover.cli.CrateDBClient", return_value=self.mock_client): + result = self.runner.invoke(cli, ["problematic-translogs", "--size-mb", "400"]) + + assert result.exit_code == 0, result.output + assert "Found 1 shards with problematic translogs" in result.output + # Check that the query results table is shown + assert "Problematic Replica Shards" in result.output + assert "Generated ALTER Commands:" in result.output + # Check that key parts of the partitioned ALTER command are present + assert 'ALTER TABLE "TURVO"."shipmentFormFieldData_events"' in result.output + assert '("sync_day"=1757376000000)' in result.output + assert "REROUTE CANCEL SHARD 3" in result.output + assert "data-hot-2" in result.output + + def test_mixed_partitioned_non_partitioned(self): + """Test handling of both partitioned and non-partitioned tables""" + mock_rows = [ + ["TURVO", "shipmentFormFieldData", None, 14, "data-hot-6", 7011.8], + ["TURVO", "shipmentFormFieldData_events", '("sync_day"=1757376000000)', 3, "data-hot-2", 481.2], + ["TURVO", "orderFormFieldData", "NULL", 5, "data-hot-1", 469.5], + ] + self.mock_client.execute_query.return_value = {"rows": mock_rows} + self.mock_client.test_connection.return_value = True + + with patch("cratedb_toolkit.admin.xmover.cli.CrateDBClient", return_value=self.mock_client): + result = self.runner.invoke(cli, ["problematic-translogs", "--size-mb", "200"]) + + assert result.exit_code == 0, result.output + assert "Found 3 shards with problematic translogs" in result.output + # Check that the query results table is shown + assert "Problematic Replica Shards" in result.output + assert "Generated ALTER Commands:" in result.output + + # Check non-partitioned command + assert 'ALTER TABLE "TURVO"."shipmentFormFieldData"' in result.output + assert "REROUTE CANCEL SHARD 14" in result.output + assert "data-hot-6" in result.output + + # Check partitioned command + assert 'ALTER TABLE "TURVO"."shipmentFormFieldData_events"' in result.output + assert '("sync_day"=1757376000000)' in result.output + assert "REROUTE CANCEL SHARD 3" in result.output + assert "data-hot-2" in result.output + + # Check NULL partition handled as non-partitioned + assert 'ALTER TABLE "TURVO"."orderFormFieldData"' in result.output + assert "REROUTE CANCEL SHARD 5" in result.output + assert "data-hot-1" in result.output + + def test_query_parameters(self): + """Test that the query is called with correct parameters""" + self.mock_client.execute_query.return_value = {"rows": []} + self.mock_client.test_connection.return_value = True + + with patch("cratedb_toolkit.admin.xmover.cli.CrateDBClient", return_value=self.mock_client): + self.runner.invoke(cli, ["problematic-translogs", "--size-mb", "500"]) + + # Verify the query was called with the correct threshold + self.mock_client.execute_query.assert_called_once() + call_args = self.mock_client.execute_query.call_args + query = call_args[0][0] + parameters = call_args[0][1] + + assert "sh.translog_stats['uncommitted_size']" in query + assert "1024^2" in query + assert "primary = FALSE" in query + assert "6 DESC" in query # More flexible whitespace matching + assert parameters == [500] + + def test_cancel_flag_user_confirmation_no(self): + """Test --cancel flag with user declining confirmation""" + mock_rows = [["TURVO", "shipmentFormFieldData", None, 14, "data-hot-6", 7011.8]] + self.mock_client.execute_query.return_value = {"rows": mock_rows} + self.mock_client.test_connection.return_value = True + + with patch("cratedb_toolkit.admin.xmover.cli.CrateDBClient", return_value=self.mock_client), patch( + "click.confirm", return_value=False + ): + result = self.runner.invoke(cli, ["problematic-translogs", "--cancel"]) + + assert result.exit_code == 0, result.output + assert "Operation cancelled by user" in result.output + # Should only be called once for the initial query, not for execution + assert self.mock_client.execute_query.call_count == 1 + + def test_cancel_flag_user_confirmation_yes(self): + """Test --cancel flag with user confirming execution""" + mock_rows = [["TURVO", "shipmentFormFieldData", None, 14, "data-hot-6", 7011.8]] + self.mock_client.execute_query.return_value = {"rows": mock_rows} + self.mock_client.test_connection.return_value = True + + with patch("cratedb_toolkit.admin.xmover.cli.CrateDBClient", return_value=self.mock_client), patch( + "click.confirm", return_value=True + ), patch("time.sleep"): # Mock sleep to speed up test + result = self.runner.invoke(cli, ["problematic-translogs", "--cancel"]) + + assert result.exit_code == 0, result.output + assert "Executing ALTER commands" in result.output + assert "Command 1 executed successfully" in result.output + assert "Successful: 1" in result.output + + # Should be called twice: once for query, once for execution + assert self.mock_client.execute_query.call_count == 2 + + def test_execution_failure_handling(self): + """Test handling of failed command execution""" + mock_rows = [["TURVO", "shipmentFormFieldData", None, 14, "data-hot-6", 7011.8]] + + # First call returns rows, second call (execution) raises exception + self.mock_client.execute_query.side_effect = [{"rows": mock_rows}, Exception("Shard not found")] + self.mock_client.test_connection.return_value = True + + with patch("cratedb_toolkit.admin.xmover.cli.CrateDBClient", return_value=self.mock_client), patch( + "click.confirm", return_value=True + ), patch("time.sleep"): + result = self.runner.invoke(cli, ["problematic-translogs", "--cancel"]) + + assert result.exit_code == 0, result.output + assert "Command 1 failed: Shard not found" in result.output + assert "Failed: 1" in result.output + assert "Successful: 0" in result.output + + def test_database_error_handling(self): + """Test handling of database connection errors""" + self.mock_client.execute_query.side_effect = Exception("Connection failed") + self.mock_client.test_connection.return_value = True + + with patch("cratedb_toolkit.admin.xmover.cli.CrateDBClient", return_value=self.mock_client): + result = self.runner.invoke(cli, ["problematic-translogs"]) + + assert result.exit_code == 0, result.output + assert "Error analyzing problematic translogs" in result.output + assert "Connection failed" in result.output + + def test_default_size_mb(self): + """Test that default sizeMB is 300""" + self.mock_client.execute_query.return_value = {"rows": []} + self.mock_client.test_connection.return_value = True + + with patch("cratedb_toolkit.admin.xmover.cli.CrateDBClient", return_value=self.mock_client): + result = self.runner.invoke(cli, ["problematic-translogs"]) + + assert result.exit_code == 0, result.output + assert "300MB" in result.output + + # Verify query was called with default value + call_args = self.mock_client.execute_query.call_args + parameters = call_args[0][1] + assert parameters == [300] diff --git a/tests/admin/test_recovery_monitor.py b/tests/admin/test_recovery_monitor.py new file mode 100644 index 00000000..e8ee0a26 --- /dev/null +++ b/tests/admin/test_recovery_monitor.py @@ -0,0 +1,303 @@ +""" +Test script for XMover recovery monitoring functionality + +This script tests the recovery monitoring features by creating mock recovery scenarios +and verifying the output formatting and data parsing. +""" + +import sys +from typing import Any, Dict +from unittest.mock import Mock + +from cratedb_toolkit.admin.xmover.model import RecoveryInfo +from cratedb_toolkit.admin.xmover.operational.monitor import RecoveryMonitor, RecoveryOptions +from cratedb_toolkit.admin.xmover.util.database import CrateDBClient +from cratedb_toolkit.model import DatabaseAddress + + +def create_mock_allocation( + schema_name: str, table_name: str, shard_id: int, current_state: str, node_id: str +) -> Dict[str, Any]: + """Create a mock allocation response""" + return { + "schema_name": schema_name, + "table_name": table_name, + "shard_id": shard_id, + "current_state": current_state, + "node_id": node_id, + "explanation": None, + } + + +def create_mock_shard_detail( + schema_name: str, + table_name: str, + shard_id: int, + node_name: str, + node_id: str, + recovery_type: str, + stage: str, + files_percent: float, + bytes_percent: float, + total_time: int, + size: int, + is_primary: bool, +) -> Dict[str, Any]: + """Create a mock shard detail response""" + return { + "schema_name": schema_name, + "table_name": table_name, + "shard_id": shard_id, + "node_name": node_name, + "node_id": node_id, + "routing_state": "RELOCATING", + "state": "RECOVERING", + "recovery": { + "type": recovery_type, + "stage": stage, + "files": {"percent": files_percent}, + "size": {"percent": bytes_percent}, + "total_time": total_time, + }, + "size": size, + "primary": is_primary, + } + + +def test_recovery_info_parsing(): + """Test RecoveryInfo dataclass and its properties""" + print("Testing RecoveryInfo parsing...") + + recovery = RecoveryInfo( + schema_name="CURVO", + table_name="PartioffD", + partition_values="NULL", + shard_id=19, + node_name="data-hot-1", + node_id="ZH6fBanGSjanGqeSh-sw0A", + recovery_type="PEER", + stage="DONE", + files_percent=100.0, + bytes_percent=100.0, + total_time_ms=1555907, + routing_state="RELOCATING", + current_state="RELOCATING", + is_primary=False, + size_bytes=56565284209, + ) + + # Test properties + assert recovery.overall_progress == 100.0, f"Expected 100.0, got {recovery.overall_progress}" + assert abs(recovery.size_gb - 52.681) < 0.01, f"Expected ~52.681, got {recovery.size_gb:.3f}" + assert recovery.shard_type == "REPLICA", f"Expected REPLICA, got {recovery.shard_type}" + assert recovery.total_time_seconds == 1555.907, f"Expected 1555.907, got {recovery.total_time_seconds}" + + print("โœ… RecoveryInfo parsing tests passed") + + +def test_database_client_parsing(cratedb): + """Test database client recovery parsing logic""" + print("Testing database client recovery parsing...") + + # Create a real client instance to test the parsing method + client = CrateDBClient.__new__(CrateDBClient) # Create without calling __init__ + client.username = None + client.password = None + client.connection_string = DatabaseAddress.from_string(cratedb.database.dburi).httpuri + client.ssl_verify = False + + # Create test data + allocation = create_mock_allocation("CURVO", "PartioffD", 19, "RELOCATING", "node1") + shard_detail = create_mock_shard_detail( + "CURVO", "PartioffD", 19, "data-hot-1", "node1", "PEER", "DONE", 100.0, 100.0, 1555907, 56565284209, False + ) + + # Test the parsing method directly + recovery_info = client._parse_recovery_info(allocation, shard_detail) + + assert recovery_info.recovery_type == "PEER" + assert recovery_info.stage == "DONE" + assert recovery_info.overall_progress == 0.0 + + print("โœ… Database client parsing tests passed") + + +def test_recovery_monitor_formatting(): + """Test recovery monitor display formatting""" + print("Testing recovery monitor formatting...") + + # Create mock client + mock_client = Mock(spec=CrateDBClient) + monitor = RecoveryMonitor(mock_client) + + # Create test recovery data + recoveries = [ + RecoveryInfo( + schema_name="CURVO", + table_name="PartioffD", + partition_values="NULL", + shard_id=19, + node_name="data-hot-1", + node_id="node1", + recovery_type="PEER", + stage="DONE", + files_percent=100.0, + bytes_percent=100.0, + total_time_ms=1555907, + routing_state="RELOCATING", + current_state="RELOCATING", + is_primary=False, + size_bytes=56565284209, + ), + RecoveryInfo( + schema_name="CURVO", + table_name="orderTracking", + partition_values="NULL", + shard_id=7, + node_name="data-hot-2", + node_id="node2", + recovery_type="DISK", + stage="INDEX", + files_percent=75.5, + bytes_percent=67.8, + total_time_ms=890234, + routing_state="INITIALIZING", + current_state="INITIALIZING", + is_primary=True, + size_bytes=25120456789, + ), + ] + + # Test summary generation + summary = monitor.get_recovery_summary(recoveries) + + assert summary["total_recoveries"] == 2 + assert "PEER" in summary["by_type"] + assert "DISK" in summary["by_type"] + assert summary["by_type"]["PEER"]["count"] == 1 + assert summary["by_type"]["DISK"]["count"] == 1 + + # Test display formatting + display_output = monitor.format_recovery_display(recoveries) + + assert "Active Shard Recoveries (2 total)" in display_output + assert "PEER Recoveries (1)" in display_output + assert "DISK Recoveries (1)" in display_output + assert "PartioffD" in display_output + assert "orderTracking" in display_output + + print("โœ… Recovery monitor formatting tests passed") + + +def test_empty_recovery_handling(): + """Test handling of no active recoveries""" + print("Testing empty recovery handling...") + + mock_client = Mock(spec=CrateDBClient) + monitor = RecoveryMonitor(mock_client) + + # Test empty list + empty_recoveries = [] + + summary = monitor.get_recovery_summary(empty_recoveries) + assert summary["total_recoveries"] == 0 + assert summary["by_type"] == {} + + display_output = monitor.format_recovery_display(empty_recoveries) + assert "No active shard recoveries found" in display_output + + print("โœ… Empty recovery handling tests passed") + + +def test_recovery_type_filtering(): + """Test filtering by recovery type""" + print("Testing recovery type filtering...") + + mock_client = Mock(spec=CrateDBClient) + + # Mock the get_all_recovering_shards method + mock_recoveries = [ + RecoveryInfo( + schema_name="test", + table_name="table1", + partition_values="NULL", + shard_id=1, + node_name="node1", + node_id="n1", + recovery_type="PEER", + stage="DONE", + files_percent=100.0, + bytes_percent=100.0, + total_time_ms=1000, + routing_state="RELOCATING", + current_state="RELOCATING", + is_primary=True, + size_bytes=1000000, + ), + RecoveryInfo( + schema_name="test", + table_name="table2", + partition_values="NULL", + shard_id=2, + node_name="node2", + node_id="n2", + recovery_type="DISK", + stage="INDEX", + files_percent=50.0, + bytes_percent=45.0, + total_time_ms=2000, + routing_state="INITIALIZING", + current_state="INITIALIZING", + is_primary=False, + size_bytes=2000000, + ), + ] + + mock_client.get_all_recovering_shards.return_value = mock_recoveries + + # Test filtering + monitor = RecoveryMonitor(mock_client, options=RecoveryOptions(recovery_type="PEER")) + peer_only = monitor.get_cluster_recovery_status() + assert len(peer_only) == 1 + assert peer_only[0].recovery_type == "PEER" + + monitor = RecoveryMonitor(mock_client, options=RecoveryOptions(recovery_type="DISK")) + disk_only = monitor.get_cluster_recovery_status() + assert len(disk_only) == 1 + assert disk_only[0].recovery_type == "DISK" + + monitor = RecoveryMonitor(mock_client, options=RecoveryOptions(recovery_type="all")) + all_recoveries = monitor.get_cluster_recovery_status() + assert len(all_recoveries) == 2 + + print("โœ… Recovery type filtering tests passed") + + +def main(): + """Run all tests""" + print("๐Ÿงช Running XMover Recovery Monitor Tests") + print("=" * 50) + + try: + test_recovery_info_parsing() + test_database_client_parsing() + test_recovery_monitor_formatting() + test_empty_recovery_handling() + test_recovery_type_filtering() + + print("\n๐ŸŽ‰ All tests passed successfully!") + print("\n๐Ÿ“‹ Test Summary:") + print(" โœ… RecoveryInfo data class and properties") + print(" โœ… Database client parsing logic") + print(" โœ… Recovery monitor display formatting") + print(" โœ… Empty recovery state handling") + print(" โœ… Recovery type filtering") + + print("\n๐Ÿš€ Recovery monitoring feature is ready for use!") + + except Exception as e: + print(f"\nโŒ Test failed: {e}") + import traceback + + traceback.print_exc() + sys.exit(1)