From c0ee048f01d34ad1037b6249d1bb0f108d74d17f Mon Sep 17 00:00:00 2001 From: Walter Behmann Date: Thu, 21 Aug 2025 14:02:46 +0200 Subject: [PATCH 01/13] Admin/XMover: Add shard distribution analysis for (large) tables --- .../admin/xmover/analysis/table.py | 783 ++++++++++++++++++ cratedb_toolkit/admin/xmover/cli.py | 64 ++ doc/admin/xmover/handbook.md | 13 + doc/admin/xmover/index.md | 1 + doc/admin/xmover/queries.md | 31 + tests/admin/test_cli.py | 1 + 6 files changed, 893 insertions(+) create mode 100644 cratedb_toolkit/admin/xmover/analysis/table.py diff --git a/cratedb_toolkit/admin/xmover/analysis/table.py b/cratedb_toolkit/admin/xmover/analysis/table.py new file mode 100644 index 00000000..b8f1a7ce --- /dev/null +++ b/cratedb_toolkit/admin/xmover/analysis/table.py @@ -0,0 +1,783 @@ +""" +Shard Distribution Analysis for CrateDB Clusters + +This module analyzes shard distribution across nodes to detect imbalances +and provide recommendations for optimization. +""" + +import statistics +from dataclasses import dataclass +from typing import Any, Dict, List, Optional, Tuple + +from rich import print as rprint +from rich.console import Console +from rich.table import Table + +from cratedb_toolkit.admin.xmover.util.database import CrateDBClient + + +def format_storage_size(size_gb: float) -> str: + """Format storage size with appropriate units and spacing""" + if size_gb < 0.001: + return "0 B" + elif size_gb < 1.0: + size_mb = size_gb * 1024 + return f"{size_mb:.0f} MB" + elif size_gb < 1024: + return f"{size_gb:.1f} GB" + else: + size_tb = size_gb / 1024 + return f"{size_tb:.2f} TB" + + +@dataclass +class TableDistribution: + """Represents shard distribution for a single table""" + + schema_name: str + table_name: str + total_primary_size_gb: float + node_distributions: Dict[str, Dict[str, Any]] # node_name -> metrics + + @property + def full_table_name(self) -> str: + return f"{self.schema_name}.{self.table_name}" if self.schema_name != "doc" else self.table_name + + +@dataclass +class DistributionAnomaly: + """Represents a detected distribution anomaly""" + + table: TableDistribution + anomaly_type: str + severity_score: float + impact_score: float + combined_score: float + description: str + details: Dict[str, Any] + recommendations: List[str] + + +class DistributionAnalyzer: + """Analyzes shard distribution across cluster nodes""" + + def __init__(self, client: CrateDBClient): + self.client = client + self.console = Console() + + def find_table_by_name(self, table_name: str) -> Optional[str]: + """Find table by name and resolve schema ambiguity""" + + query = """ + SELECT DISTINCT schema_name, table_name + FROM sys.shards + WHERE table_name = ? + AND schema_name NOT IN ('sys', 'information_schema', 'pg_catalog') + AND routing_state = 'STARTED' + ORDER BY schema_name \ + """ + + result = self.client.execute_query(query, [table_name]) + rows = result.get("rows", []) + + if not rows: + return None + elif len(rows) == 1: + schema, table = rows[0] + return f"{schema}.{table}" if schema != "doc" else table + else: + # Multiple schemas have this table - ask user + rprint(f"[yellow]Multiple schemas contain table '{table_name}':[/yellow]") + for i, (schema, table) in enumerate(rows, 1): + full_name = f"{schema}.{table}" if schema != "doc" else table + rprint(f" {i}. {full_name}") + + try: + choice = input("\nSelect table (enter number): ").strip() + idx = int(choice) - 1 + if 0 <= idx < len(rows): + schema, table = rows[idx] + return f"{schema}.{table}" if schema != "doc" else table + else: + rprint("[red]Invalid selection[/red]") + return None + except (ValueError, KeyboardInterrupt): + rprint("\n[yellow]Selection cancelled[/yellow]") + return None + + def get_table_distribution_detailed(self, table_identifier: str) -> Optional[TableDistribution]: + """Get detailed distribution data for a specific table""" + + # Parse schema and table name + if "." in table_identifier: + schema_name, table_name = table_identifier.split(".", 1) + else: + schema_name = "doc" + table_name = table_identifier + + query = """ + SELECT s.schema_name, \ + s.table_name, \ + s.node['name'] as node_name, \ + COUNT(CASE WHEN s."primary" = true THEN 1 END) as primary_shards, \ + COUNT(CASE WHEN s."primary" = false THEN 1 END) as replica_shards, \ + COUNT(*) as total_shards, \ + ROUND(SUM(s.size) / 1024.0 / 1024.0 / 1024.0, 2) as total_size_gb, \ + ROUND(SUM(CASE WHEN s."primary" = true THEN s.size ELSE 0 END) / 1024.0 / 1024.0 / 1024.0, \ + 2) as primary_size_gb, \ + ROUND(SUM(CASE WHEN s."primary" = false THEN s.size ELSE 0 END) / 1024.0 / 1024.0 / 1024.0, \ + 2) as replica_size_gb, \ + SUM(s.num_docs) as total_documents + FROM sys.shards s + WHERE s.schema_name = ? \ + AND s.table_name = ? + AND s.routing_state = 'STARTED' + GROUP BY s.schema_name, s.table_name, s.node['name'] + ORDER BY s.node['name'] \ + """ + + result = self.client.execute_query(query, [schema_name, table_name]) + rows = result.get("rows", []) + + if not rows: + return None + + # Build node distributions + node_distributions = {} + for row in rows: + node_distributions[row[2]] = { + "primary_shards": row[3], + "replica_shards": row[4], + "total_shards": row[5], + "total_size_gb": row[6], + "primary_size_gb": row[7], + "replica_size_gb": row[8], + "total_documents": row[9], + } + + # Calculate total primary size + total_primary_size = sum(node["primary_size_gb"] for node in node_distributions.values()) + + return TableDistribution( + schema_name=rows[0][0], + table_name=rows[0][1], + total_primary_size_gb=total_primary_size, + node_distributions=node_distributions, + ) + + def format_table_health_report(self, table_dist: TableDistribution) -> None: + """Format and display comprehensive table health report""" + + rprint(f"\n[bold blue]๐Ÿ“‹ Table Health Report: {table_dist.full_table_name}[/bold blue]") + rprint("=" * 80) + + # Calculate overview stats + all_nodes_info = self.client.get_nodes_info() + cluster_nodes = {node.name for node in all_nodes_info if node.name} + table_nodes = set(table_dist.node_distributions.keys()) + missing_nodes = cluster_nodes - table_nodes + + total_shards = sum(node["total_shards"] for node in table_dist.node_distributions.values()) + total_primary_shards = sum(node["primary_shards"] for node in table_dist.node_distributions.values()) + total_replica_shards = sum(node["replica_shards"] for node in table_dist.node_distributions.values()) + total_size_gb = sum(node["total_size_gb"] for node in table_dist.node_distributions.values()) + total_documents = sum(node["total_documents"] for node in table_dist.node_distributions.values()) + + # Table Overview + rprint("\n[bold]๐ŸŽฏ Overview[/bold]") + rprint(f"โ€ข Primary Data Size: {format_storage_size(table_dist.total_primary_size_gb)}") + rprint(f"โ€ข Total Size (with replicas): {format_storage_size(total_size_gb)}") + rprint(f"โ€ข Total Shards: {total_shards} ({total_primary_shards} primary + {total_replica_shards} replica)") + rprint(f"โ€ข Total Documents: {total_documents:,}") + rprint( + f"โ€ข Node Coverage: {len(table_nodes)}/{len(cluster_nodes)} nodes ({len(table_nodes) / len(cluster_nodes) * 100:.0f}%)" + ) + + if missing_nodes: + rprint(f"โ€ข [yellow]Missing from nodes: {', '.join(sorted(missing_nodes))}[/yellow]") + + # Shard Distribution Table + rprint("\n[bold]๐Ÿ“Š Shard Distribution by Node[/bold]") + + shard_table = Table(show_header=True) + shard_table.add_column("Node", width=15) + shard_table.add_column("Primary", width=8, justify="right") + shard_table.add_column("Replica", width=8, justify="right") + shard_table.add_column("Total", width=8, justify="right") + shard_table.add_column("Primary Size", width=12, justify="right") + shard_table.add_column("Replica Size", width=12, justify="right") + shard_table.add_column("Total Size", width=12, justify="right") + shard_table.add_column("Documents", width=12, justify="right") + + for node_name in sorted(table_dist.node_distributions.keys()): + node_data = table_dist.node_distributions[node_name] + + # Color coding based on shard count compared to average + avg_total_shards = total_shards / len(table_dist.node_distributions) + if node_data["total_shards"] > avg_total_shards * 1.5: + node_color = "red" + elif node_data["total_shards"] < avg_total_shards * 0.5: + node_color = "yellow" + else: + node_color = "white" + + shard_table.add_row( + f"[{node_color}]{node_name}[/{node_color}]", + str(node_data["primary_shards"]), + str(node_data["replica_shards"]), + f"[{node_color}]{node_data['total_shards']}[/{node_color}]", + format_storage_size(node_data["primary_size_gb"]), + format_storage_size(node_data["replica_size_gb"]), + f"[{node_color}]{format_storage_size(node_data['total_size_gb'])}[/{node_color}]", + f"{node_data['total_documents']:,}", + ) + + self.console.print(shard_table) + + # Distribution Analysis + rprint("\n[bold]๐Ÿ” Distribution Analysis[/bold]") + + # Calculate statistics + shard_counts = [node["total_shards"] for node in table_dist.node_distributions.values()] + storage_sizes = [node["total_size_gb"] for node in table_dist.node_distributions.values()] + doc_counts = [node["total_documents"] for node in table_dist.node_distributions.values()] + + shard_cv = self.calculate_coefficient_of_variation(shard_counts) + storage_cv = self.calculate_coefficient_of_variation(storage_sizes) + doc_cv = self.calculate_coefficient_of_variation(doc_counts) + + min_shards, max_shards = min(shard_counts), max(shard_counts) + min_storage, max_storage = min(storage_sizes), max(storage_sizes) + min_docs, max_docs = min(doc_counts), max(doc_counts) + + # Shard distribution analysis + if shard_cv > 0.3: + rprint( + f"โ€ข [red]โš  Shard Imbalance:[/red] Range {min_shards}-{max_shards} shards per node (CV: {shard_cv:.2f})" + ) + else: + rprint(f"โ€ข [green]โœ“ Shard Balance:[/green] Well distributed (CV: {shard_cv:.2f})") + + # Storage distribution analysis + if storage_cv > 0.4: + rprint( + f"โ€ข [red]โš  Storage Imbalance:[/red] Range {format_storage_size(min_storage)}-{format_storage_size(max_storage)} per node (CV: {storage_cv:.2f})" + ) + else: + rprint(f"โ€ข [green]โœ“ Storage Balance:[/green] Well distributed (CV: {storage_cv:.2f})") + + # Document distribution analysis + if doc_cv > 0.5: + rprint(f"โ€ข [red]โš  Document Skew:[/red] Range {min_docs:,}-{max_docs:,} docs per node (CV: {doc_cv:.2f})") + else: + rprint(f"โ€ข [green]โœ“ Document Distribution:[/green] Well balanced (CV: {doc_cv:.2f})") + + # Node coverage analysis + coverage_ratio = len(table_nodes) / len(cluster_nodes) + if coverage_ratio < 0.7: + missing_list = ", ".join(sorted(missing_nodes)[:5]) # Show up to 5 nodes + if len(missing_nodes) > 5: + missing_list += f", +{len(missing_nodes) - 5} more" + rprint(f"โ€ข [red]โš  Limited Coverage:[/red] {coverage_ratio:.0%} cluster coverage, missing: {missing_list}") + else: + rprint(f"โ€ข [green]โœ“ Good Coverage:[/green] {coverage_ratio:.0%} of cluster nodes have this table") + + # Zone analysis if available + try: + zone_distribution = {} + for node_name, node_data in table_dist.node_distributions.items(): + # Try to get zone info for each node + node_info = next((n for n in all_nodes_info if n.name == node_name), None) + if ( + node_info + and hasattr(node_info, "attributes") + and node_info.attributes + and "zone" in node_info.attributes + ): + zone = node_info.attributes["zone"] + if zone not in zone_distribution: + zone_distribution[zone] = {"nodes": 0, "shards": 0, "size": 0} + zone_distribution[zone]["nodes"] += 1 + zone_distribution[zone]["shards"] += node_data["total_shards"] + zone_distribution[zone]["size"] += node_data["total_size_gb"] + + if zone_distribution: + rprint("\n[bold]๐ŸŒ Zone Distribution[/bold]") + for zone in sorted(zone_distribution.keys()): + zone_data = zone_distribution[zone] + rprint( + f"โ€ข {zone}: {zone_data['nodes']} nodes, {zone_data['shards']} shards, {format_storage_size(zone_data['size'])}" + ) + + except Exception: + pass # Zone info not available + + # Health Summary + rprint("\n[bold]๐Ÿ’Š Health Summary[/bold]") + issues = [] + recommendations = [] + + if shard_cv > 0.3: + issues.append("Shard imbalance") + recommendations.append("Consider moving shards between nodes for better distribution") + + if storage_cv > 0.4: + issues.append("Storage imbalance") + recommendations.append("Rebalance shards to distribute storage more evenly") + + if doc_cv > 0.5: + issues.append("Document skew") + recommendations.append("Review routing configuration - data may not be evenly distributed") + + if coverage_ratio < 0.7: + issues.append("Limited node coverage") + recommendations.append("Consider adding replicas to improve availability and distribution") + + if not issues: + rprint("โ€ข [green]โœ… Table appears healthy with good distribution[/green]") + else: + rprint(f"โ€ข [yellow]โš  Issues found: {', '.join(issues)}[/yellow]") + rprint("\n[bold]๐Ÿ’ก Recommendations:[/bold]") + for rec in recommendations: + rprint(f" โ€ข {rec}") + + rprint() + + def get_largest_tables_distribution(self, top_n: int = 10) -> List[TableDistribution]: + """Get distribution data for the largest tables using BIGDUDES query""" + + query = """ + WITH largest_tables AS (SELECT schema_name, \ + table_name, \ + SUM(CASE WHEN "primary" = true THEN size ELSE 0 END) as total_primary_size \ + FROM sys.shards \ + WHERE schema_name NOT IN ('sys', 'information_schema', 'pg_catalog') \ + AND routing_state = 'STARTED' \ + GROUP BY schema_name, table_name \ + ORDER BY total_primary_size DESC + LIMIT ? + ) + SELECT s.schema_name, \ + s.table_name, \ + s.node['name'] as node_name, \ + COUNT(CASE WHEN s."primary" = true THEN 1 END) as primary_shards, \ + COUNT(CASE WHEN s."primary" = false THEN 1 END) as replica_shards, \ + COUNT(*) as total_shards, \ + ROUND(SUM(s.size) / 1024.0 / 1024.0 / 1024.0, 2) as total_size_gb, \ + ROUND(SUM(CASE WHEN s."primary" = true THEN s.size ELSE 0 END) / 1024.0 / 1024.0 / 1024.0, \ + 2) as primary_size_gb, \ + ROUND(SUM(CASE WHEN s."primary" = false THEN s.size ELSE 0 END) / 1024.0 / 1024.0 / 1024.0, \ + 2) as replica_size_gb, \ + SUM(s.num_docs) as total_documents + FROM sys.shards s + INNER JOIN largest_tables lt \ + ON (s.schema_name = lt.schema_name AND s.table_name = lt.table_name) + WHERE s.routing_state = 'STARTED' + GROUP BY s.schema_name, s.table_name, s.node['name'] + ORDER BY s.schema_name, s.table_name, s.node['name'] \ + """ + + result = self.client.execute_query(query, [top_n]) + + # Extract rows from the result dictionary + rows = result.get("rows", []) + + if not rows: + return [] + + # Group results by table + tables_data = {} + for row in rows: + # Ensure we have enough columns + if len(row) < 10: + continue + + table_key = f"{row[0]}.{row[1]}" + if table_key not in tables_data: + tables_data[table_key] = {"schema_name": row[0], "table_name": row[1], "nodes": {}} + + tables_data[table_key]["nodes"][row[2]] = { + "primary_shards": row[3], + "replica_shards": row[4], + "total_shards": row[5], + "total_size_gb": row[6], + "primary_size_gb": row[7], + "replica_size_gb": row[8], + "total_documents": row[9], + } + + # Calculate total primary sizes and create TableDistribution objects + distributions = [] + for table_data in tables_data.values(): + total_primary_size = sum(node["primary_size_gb"] for node in table_data["nodes"].values()) + + distribution = TableDistribution( + schema_name=table_data["schema_name"], + table_name=table_data["table_name"], + total_primary_size_gb=total_primary_size, + node_distributions=table_data["nodes"], + ) + distributions.append(distribution) + + # Sort by primary size (descending) + return sorted(distributions, key=lambda x: x.total_primary_size_gb, reverse=True) + + def calculate_coefficient_of_variation(self, values: List[float]) -> float: + """Calculate coefficient of variation (std dev / mean)""" + if not values or len(values) < 2: + return 0.0 + + mean_val = statistics.mean(values) + if mean_val == 0: + return 0.0 + + try: + std_dev = statistics.stdev(values) + return std_dev / mean_val + except statistics.StatisticsError: + return 0.0 + + def detect_shard_count_imbalance(self, table: TableDistribution) -> Optional[DistributionAnomaly]: + """Detect imbalances in shard count distribution""" + if not table.node_distributions: + return None + + # Get shard counts per node + total_shards = [node["total_shards"] for node in table.node_distributions.values()] + primary_shards = [node["primary_shards"] for node in table.node_distributions.values()] + replica_shards = [node["replica_shards"] for node in table.node_distributions.values()] + + # Calculate coefficient of variation + total_cv = self.calculate_coefficient_of_variation(total_shards) + primary_cv = self.calculate_coefficient_of_variation(primary_shards) + replica_cv = self.calculate_coefficient_of_variation(replica_shards) + + # Severity based on highest CV (higher CV = more imbalanced) + max_cv = max(total_cv, primary_cv, replica_cv) + + # Consider it an anomaly if CV > 0.3 (30% variation) + if max_cv < 0.3: + return None + + # Impact based on table size + impact_score = min(table.total_primary_size_gb / 100.0, 10.0) # Cap at 10 + severity_score = min(max_cv * 10, 10.0) # Scale to 0-10 + combined_score = impact_score * severity_score + + # Generate recommendations + recommendations = [] + min_shards = min(total_shards) + max_shards = max(total_shards) + + if max_shards - min_shards > 1: + overloaded_nodes = [ + node for node, data in table.node_distributions.items() if data["total_shards"] == max_shards + ] + underloaded_nodes = [ + node for node, data in table.node_distributions.items() if data["total_shards"] == min_shards + ] + + if overloaded_nodes and underloaded_nodes: + recommendations.append(f"Move shards from {overloaded_nodes[0]} to {underloaded_nodes[0]}") + + return DistributionAnomaly( + table=table, + anomaly_type="Shard Count Imbalance", + severity_score=severity_score, + impact_score=impact_score, + combined_score=combined_score, + description=f"Uneven shard distribution (CV: {max_cv:.2f})", + details={ + "total_cv": total_cv, + "primary_cv": primary_cv, + "replica_cv": replica_cv, + "shard_counts": {node: data["total_shards"] for node, data in table.node_distributions.items()}, + }, + recommendations=recommendations, + ) + + def detect_storage_imbalance(self, table: TableDistribution) -> Optional[DistributionAnomaly]: + """Detect imbalances in storage distribution""" + if not table.node_distributions: + return None + + storage_sizes = [node["total_size_gb"] for node in table.node_distributions.values()] + + # Skip if all sizes are very small (< 1GB total) + if sum(storage_sizes) < 1.0: + return None + + cv = self.calculate_coefficient_of_variation(storage_sizes) + + # Consider it an anomaly if CV > 0.4 (40% variation) for storage + if cv < 0.4: + return None + + impact_score = min(table.total_primary_size_gb / 50.0, 10.0) + severity_score = min(cv * 8, 10.0) + combined_score = impact_score * severity_score + + # Generate recommendations + recommendations = [] + min_size = min(storage_sizes) + max_size = max(storage_sizes) + + if max_size > min_size * 2: # If difference is > 2x + overloaded_node = None + underloaded_node = None + + for node, data in table.node_distributions.items(): + if data["total_size_gb"] == max_size: + overloaded_node = node + elif data["total_size_gb"] == min_size: + underloaded_node = node + + if overloaded_node and underloaded_node: + recommendations.append( + f"Rebalance storage from {overloaded_node} ({format_storage_size(max_size)}) to {underloaded_node} ({format_storage_size(min_size)})" + ) + + return DistributionAnomaly( + table=table, + anomaly_type="Storage Imbalance", + severity_score=severity_score, + impact_score=impact_score, + combined_score=combined_score, + description=f"Uneven storage distribution (CV: {cv:.2f})", + details={ + "storage_cv": cv, + "storage_sizes": {node: data["total_size_gb"] for node, data in table.node_distributions.items()}, + }, + recommendations=recommendations, + ) + + def detect_node_coverage_issues(self, table: TableDistribution) -> Optional[DistributionAnomaly]: + """Detect nodes with missing shard coverage""" + if not table.node_distributions: + return None + + # Get all cluster nodes + all_nodes = set() + try: + nodes_info = self.client.get_nodes_info() + all_nodes = {node.name for node in nodes_info if node.name} + except Exception: + # If we can't get node info, use nodes that have shards + all_nodes = set(table.node_distributions.keys()) + + nodes_with_shards = set(table.node_distributions.keys()) + nodes_without_shards = all_nodes - nodes_with_shards + + # Only flag as anomaly if we have missing nodes and the table is significant + if not nodes_without_shards or table.total_primary_size_gb < 10.0: + return None + + coverage_ratio = len(nodes_with_shards) / len(all_nodes) + + # Consider it an anomaly if coverage < 70% + if coverage_ratio >= 0.7: + return None + + impact_score = min(table.total_primary_size_gb / 100.0, 10.0) + severity_score = (1 - coverage_ratio) * 10 # Higher severity for lower coverage + combined_score = impact_score * severity_score + + recommendations = [f"Consider adding replicas to nodes: {', '.join(sorted(nodes_without_shards))}"] + + return DistributionAnomaly( + table=table, + anomaly_type="Node Coverage Issue", + severity_score=severity_score, + impact_score=impact_score, + combined_score=combined_score, + description=f"Limited node coverage ({len(nodes_with_shards)}/{len(all_nodes)} nodes)", + details={ + "coverage_ratio": coverage_ratio, + "nodes_with_shards": sorted(nodes_with_shards), + "nodes_without_shards": sorted(nodes_without_shards), + }, + recommendations=recommendations, + ) + + def detect_document_imbalance(self, table: TableDistribution) -> Optional[DistributionAnomaly]: + """Detect imbalances in document distribution""" + if not table.node_distributions: + return None + + document_counts = [node["total_documents"] for node in table.node_distributions.values()] + + # Skip if total documents is very low + if sum(document_counts) < 10000: + return None + + cv = self.calculate_coefficient_of_variation(document_counts) + + # Consider it an anomaly if CV > 0.5 (50% variation) for documents + if cv < 0.5: + return None + + impact_score = min(table.total_primary_size_gb / 100.0, 10.0) + severity_score = min(cv * 6, 10.0) + combined_score = impact_score * severity_score + + # Generate recommendations + recommendations = ["Document imbalance may indicate data skew - consider reviewing shard routing"] + + min_docs = min(document_counts) + max_docs = max(document_counts) + + if max_docs > min_docs * 3: # If difference is > 3x + recommendations.append(f"Significant document skew detected ({min_docs:,} to {max_docs:,} docs per node)") + + return DistributionAnomaly( + table=table, + anomaly_type="Document Imbalance", + severity_score=severity_score, + impact_score=impact_score, + combined_score=combined_score, + description=f"Uneven document distribution (CV: {cv:.2f})", + details={ + "document_cv": cv, + "document_counts": {node: data["total_documents"] for node, data in table.node_distributions.items()}, + }, + recommendations=recommendations, + ) + + def analyze_distribution(self, top_tables: int = 10) -> List[DistributionAnomaly]: + """Analyze shard distribution and return ranked anomalies""" + + # Get table distributions + distributions = self.get_largest_tables_distribution(top_tables) + + # Detect all anomalies + anomalies = [] + + for table_dist in distributions: + # Check each type of anomaly + for detector in [ + self.detect_shard_count_imbalance, + self.detect_storage_imbalance, + self.detect_node_coverage_issues, + self.detect_document_imbalance, + ]: + anomaly = detector(table_dist) + if anomaly: + anomalies.append(anomaly) + + # Sort by combined score (highest first) + return sorted(anomalies, key=lambda x: x.combined_score, reverse=True), len(distributions) + + def format_distribution_report(self, anomalies: List[DistributionAnomaly], tables_analyzed: int) -> None: + """Format and display the distribution analysis report""" + + if not anomalies: + rprint( + f"[green]โœ“ No significant shard distribution anomalies detected in top {tables_analyzed} tables![/green]" + ) + return + + # Show analysis scope + unique_tables = set(anomaly.table.full_table_name for anomaly in anomalies) + rprint( + f"[blue]๐Ÿ“‹ Analyzed {tables_analyzed} largest tables, found issues in {len(unique_tables)} tables[/blue]" + ) + rprint() + + # Summary table + table = Table(title="๐ŸŽฏ Shard Distribution Anomalies", show_header=True) + table.add_column("Rank", width=4) + table.add_column("Table", min_width=20) + table.add_column("Issue Type", min_width=15) + table.add_column("Score", width=8) + table.add_column("Primary Size", width=12) + table.add_column("Description", min_width=25) + + for i, anomaly in enumerate(anomalies[:10], 1): # Top 10 + # Color coding by severity + if anomaly.combined_score >= 50: + rank_color = "red" + elif anomaly.combined_score >= 25: + rank_color = "yellow" + else: + rank_color = "blue" + + table.add_row( + f"[{rank_color}]{i}[/{rank_color}]", + anomaly.table.full_table_name, + anomaly.anomaly_type, + f"[{rank_color}]{anomaly.combined_score:.1f}[/{rank_color}]", + format_storage_size(anomaly.table.total_primary_size_gb), + anomaly.description, + ) + + self.console.print(table) + + # Detailed recommendations for top issues + if anomalies: + rprint("\n[bold]๐Ÿ”ง Top Recommendations:[/bold]") + + for i, anomaly in enumerate(anomalies[:5], 1): # Top 5 recommendations + rprint(f"\n[bold]{i}. {anomaly.table.full_table_name}[/bold] - {anomaly.anomaly_type}") + + # Show the problem analysis first + rprint(f" [yellow]๐Ÿ” Problem:[/yellow] {anomaly.description}") + + # Add specific details about what's wrong + if anomaly.anomaly_type == "Shard Count Imbalance": + if "shard_counts" in anomaly.details: + counts = anomaly.details["shard_counts"] + min_count = min(counts.values()) + max_count = max(counts.values()) + overloaded = [node for node, count in counts.items() if count == max_count] + underloaded = [node for node, count in counts.items() if count == min_count] + rprint( + f" [red]โš  Issue:[/red] {overloaded[0]} has {max_count} shards while {underloaded[0]} has only {min_count} shards" + ) + + elif anomaly.anomaly_type == "Storage Imbalance": + if "storage_sizes" in anomaly.details: + sizes = anomaly.details["storage_sizes"] + min_size = min(sizes.values()) + max_size = max(sizes.values()) + overloaded = [node for node, size in sizes.items() if size == max_size][0] + underloaded = [node for node, size in sizes.items() if size == min_size][0] + rprint( + f" [red]โš  Issue:[/red] Storage ranges from {format_storage_size(min_size)} ({underloaded}) to {format_storage_size(max_size)} ({overloaded}) - {max_size / min_size:.1f}x difference" + ) + + elif anomaly.anomaly_type == "Node Coverage Issue": + if "nodes_without_shards" in anomaly.details: + missing_nodes = anomaly.details["nodes_without_shards"] + coverage_ratio = anomaly.details["coverage_ratio"] + rprint( + f" [red]โš  Issue:[/red] Table missing from {len(missing_nodes)} nodes ({coverage_ratio:.0%} cluster coverage)" + ) + rprint( + f" [dim] Missing from: {', '.join(missing_nodes[:3])}{'...' if len(missing_nodes) > 3 else ''}[/dim]" + ) + + elif anomaly.anomaly_type == "Document Imbalance": + if "document_counts" in anomaly.details: + doc_counts = anomaly.details["document_counts"] + min_docs = min(doc_counts.values()) + max_docs = max(doc_counts.values()) + ratio = max_docs / min_docs if min_docs > 0 else float("inf") + rprint( + f" [red]โš  Issue:[/red] Document counts range from {min_docs:,} to {max_docs:,} ({ratio:.1f}x difference)" + ) + + # Show recommendations + rprint(" [green]๐Ÿ’ก Solutions:[/green]") + for rec in anomaly.recommendations: + rprint(f" โ€ข {rec}") + + # Summary statistics + unique_tables = set(anomaly.table.full_table_name for anomaly in anomalies) + rprint("\n[dim]๐Ÿ“Š Analysis Summary:[/dim]") + rprint(f"[dim]โ€ข Tables analyzed: {tables_analyzed}[/dim]") + rprint(f"[dim]โ€ข Tables with issues: {len(unique_tables)}[/dim]") + rprint(f"[dim]โ€ข Total anomalies found: {len(anomalies)}[/dim]") + rprint(f"[dim]โ€ข Critical issues (score >50): {len([a for a in anomalies if a.combined_score >= 50])}[/dim]") + rprint( + f"[dim]โ€ข Warning issues (score 25-50): {len([a for a in anomalies if 25 <= a.combined_score < 50])}[/dim]" + ) diff --git a/cratedb_toolkit/admin/xmover/cli.py b/cratedb_toolkit/admin/xmover/cli.py index 339f9e7f..e5e6e834 100644 --- a/cratedb_toolkit/admin/xmover/cli.py +++ b/cratedb_toolkit/admin/xmover/cli.py @@ -11,6 +11,7 @@ from rich.console import Console from cratedb_toolkit.admin.xmover.analysis.shard import ShardAnalyzer, ShardReporter +from cratedb_toolkit.admin.xmover.analysis.table import DistributionAnalyzer from cratedb_toolkit.admin.xmover.analysis.zone import ZoneReport from cratedb_toolkit.admin.xmover.model import ( ShardRelocationConstraints, @@ -185,6 +186,69 @@ def check_balance(ctx, table: Optional[str], tolerance: float): report.shard_balance(tolerance=tolerance, table=table) +@main.command() +@click.option("--top-tables", default=10, help="Number of largest tables to analyze (default: 10)") +@click.option("--table", help='Analyze specific table only (e.g., "my_table" or "schema.table")') +@click.pass_context +def shard_distribution(ctx, top_tables: int, table: Optional[str]): + """Analyze shard distribution anomalies across cluster nodes + + This command analyzes the largest tables in your cluster to detect: + โ€ข Uneven shard count distribution between nodes + โ€ข Storage imbalances across nodes + โ€ข Missing node coverage for tables + โ€ข Document count imbalances indicating data skew + + Results are ranked by impact and severity to help prioritize fixes. + + Examples: + xmover shard-distribution # Analyze top 10 tables + xmover shard-distribution --top-tables 20 # Analyze top 20 tables + xmover shard-distribution --table my_table # Detailed report for specific table + """ + try: + client = ctx.obj["client"] + analyzer = DistributionAnalyzer(client) + + if table: + # Focused table analysis mode + console.print(f"[blue]๐Ÿ” Analyzing table: {table}...[/blue]") + + # Find table (handles schema auto-detection) + table_identifier = analyzer.find_table_by_name(table) + if not table_identifier: + console.print(f"[red]โŒ Table '{table}' not found[/red]") + return + + # Get detailed distribution + table_dist = analyzer.get_table_distribution_detailed(table_identifier) + if not table_dist: + console.print(f"[red]โŒ No shard data found for table '{table_identifier}'[/red]") + return + + # Display comprehensive health report + analyzer.format_table_health_report(table_dist) + + else: + # General anomaly detection mode + console.print(f"[blue]๐Ÿ” Analyzing shard distribution for top {top_tables} tables...[/blue]") + console.print() + + # Perform analysis + anomalies, tables_analyzed = analyzer.analyze_distribution(top_tables) + + # Display results + analyzer.format_distribution_report(anomalies, tables_analyzed) + + except KeyboardInterrupt: + console.print("\n[yellow]Analysis interrupted by user[/yellow]") + except Exception as e: + console.print(f"[red]Error during distribution analysis: {e}[/red]") + import traceback + + console.print(f"[dim]{traceback.format_exc()}[/dim]") + + @main.command() @click.option("--table", "-t", help="Analyze zones for specific table only") @click.option("--show-shards/--no-show-shards", default=False, help="Show individual shard details (default: False)") diff --git a/doc/admin/xmover/handbook.md b/doc/admin/xmover/handbook.md index cf9b4abe..05a3c57a 100644 --- a/doc/admin/xmover/handbook.md +++ b/doc/admin/xmover/handbook.md @@ -56,6 +56,19 @@ xmover recommend --execute xmover recommend --prioritize-space ``` +### Shard Distribution Analysis +This view is dedicating a specific focus on large tables. +```bash +# Analyze distribution anomalies for top 10 largest tables +xmover shard-distribution + +# Analyze more tables +xmover shard-distribution --top-tables 20 + +# Detailed health report for specific table +xmover shard-distribution --table my_table +``` + ### Zone Analysis ```bash # Check zone balance diff --git a/doc/admin/xmover/index.md b/doc/admin/xmover/index.md index affa4825..99fd4404 100644 --- a/doc/admin/xmover/index.md +++ b/doc/admin/xmover/index.md @@ -11,6 +11,7 @@ SQL commands for shard rebalancing and node decommissioning. ## Features - **Cluster Analysis**: Complete overview of shard distribution across nodes and zones +- **Shard Distribution Analysis**: Detect and rank distribution anomalies across largest tables - **Shard Movement Recommendations**: Intelligent suggestions for rebalancing with safety validation - **Recovery Monitoring**: Track ongoing shard recovery operations with progress details - **Zone Conflict Detection**: Prevents moves that would violate CrateDB's zone awareness diff --git a/doc/admin/xmover/queries.md b/doc/admin/xmover/queries.md index 27bd89e6..9844d8f6 100644 --- a/doc/admin/xmover/queries.md +++ b/doc/admin/xmover/queries.md @@ -216,3 +216,34 @@ SELECT WHERE current_state != 'STARTED' and table_name = 'dispatchio' and shard_id = 19 ORDER BY current_state, table_name, shard_id; ``` + +## "BIGDUDES" Focuses on your **biggest storage consumers** and shows how their shards are distributed across nodes. + +ยดยดยดsql +WITH largest_tables AS ( + SELECT + schema_name, + table_name, + SUM(CASE WHEN "primary" = true THEN size ELSE 0 END) as total_primary_size + FROM sys.shards + WHERE schema_name NOT IN ('sys', 'information_schema', 'pg_catalog') + GROUP BY schema_name, table_name + ORDER BY total_primary_size DESC + LIMIT 10 + ) + SELECT + s.schema_name, + s.table_name, + s.node['name'] as node_name, + COUNT(CASE WHEN s."primary" = true THEN 1 END) as primary_shards, + COUNT(CASE WHEN s."primary" = false THEN 1 END) as replica_shards, + COUNT(*) as total_shards, + ROUND(SUM(s.size) / 1024.0 / 1024.0 / 1024.0, 2) as total_size_gb, + ROUND(SUM(CASE WHEN s."primary" = true THEN s.size ELSE 0 END) / 1024.0 / 1024.0 / 1024.0, 2) as primary_size_gb, + ROUND(SUM(CASE WHEN s."primary" = false THEN s.size ELSE 0 END) / 1024.0 / 1024.0 / 1024.0, 2) as replica_size_gb, + SUM(s.num_docs) as total_documents + FROM sys.shards s + INNER JOIN largest_tables lt ON (s.schema_name = lt.schema_name AND s.table_name = lt.table_name) + GROUP BY s.schema_name, s.table_name, s.node['name'] + ORDER BY s.schema_name, s.table_name, s.node['name']; +``` diff --git a/tests/admin/test_cli.py b/tests/admin/test_cli.py index 60e8d810..de3e4624 100644 --- a/tests/admin/test_cli.py +++ b/tests/admin/test_cli.py @@ -15,6 +15,7 @@ "recommend", "test-connection", "zone-analysis", + "shard-distribution", ], ) def test_xmover_all(cratedb, subcommand): From 39393863d1d0e09b8176c1e5ac9197c90a30d42c Mon Sep 17 00:00:00 2001 From: Andreas Motl Date: Thu, 21 Aug 2025 14:13:11 +0200 Subject: [PATCH 02/13] Admin/XMover: Code formatting. Linting. Type checking. - More or less just line-length fixes. - Only a single type adjustment was needed on the return value of the `analyze_distribution` method. - Ruff recommended to use set comprehensions, so here we go. - At a single spot where an exception has been `pass`ed, we added error output. Is it bad? --- .../admin/xmover/analysis/table.py | 48 ++++++++++++------- 1 file changed, 30 insertions(+), 18 deletions(-) diff --git a/cratedb_toolkit/admin/xmover/analysis/table.py b/cratedb_toolkit/admin/xmover/analysis/table.py index b8f1a7ce..ef6dbdf3 100644 --- a/cratedb_toolkit/admin/xmover/analysis/table.py +++ b/cratedb_toolkit/admin/xmover/analysis/table.py @@ -5,6 +5,7 @@ and provide recommendations for optimization. """ +import logging import statistics from dataclasses import dataclass from typing import Any, Dict, List, Optional, Tuple @@ -15,6 +16,8 @@ from cratedb_toolkit.admin.xmover.util.database import CrateDBClient +logger = logging.getLogger(__name__) + def format_storage_size(size_gb: float) -> str: """Format storage size with appropriate units and spacing""" @@ -134,7 +137,7 @@ def get_table_distribution_detailed(self, table_identifier: str) -> Optional[Tab AND s.routing_state = 'STARTED' GROUP BY s.schema_name, s.table_name, s.node['name'] ORDER BY s.node['name'] \ - """ + """ # noqa: E501 result = self.client.execute_query(query, [schema_name, table_name]) rows = result.get("rows", []) @@ -190,7 +193,8 @@ def format_table_health_report(self, table_dist: TableDistribution) -> None: rprint(f"โ€ข Total Shards: {total_shards} ({total_primary_shards} primary + {total_replica_shards} replica)") rprint(f"โ€ข Total Documents: {total_documents:,}") rprint( - f"โ€ข Node Coverage: {len(table_nodes)}/{len(cluster_nodes)} nodes ({len(table_nodes) / len(cluster_nodes) * 100:.0f}%)" + f"โ€ข Node Coverage: {len(table_nodes)}/{len(cluster_nodes)} nodes " + f"({len(table_nodes) / len(cluster_nodes) * 100:.0f}%)" ) if missing_nodes: @@ -261,7 +265,8 @@ def format_table_health_report(self, table_dist: TableDistribution) -> None: # Storage distribution analysis if storage_cv > 0.4: rprint( - f"โ€ข [red]โš  Storage Imbalance:[/red] Range {format_storage_size(min_storage)}-{format_storage_size(max_storage)} per node (CV: {storage_cv:.2f})" + f"โ€ข [red]โš  Storage Imbalance:[/red] Range " + f"{format_storage_size(min_storage)}-{format_storage_size(max_storage)} per node (CV: {storage_cv:.2f})" ) else: rprint(f"โ€ข [green]โœ“ Storage Balance:[/green] Well distributed (CV: {storage_cv:.2f})") @@ -306,11 +311,13 @@ def format_table_health_report(self, table_dist: TableDistribution) -> None: for zone in sorted(zone_distribution.keys()): zone_data = zone_distribution[zone] rprint( - f"โ€ข {zone}: {zone_data['nodes']} nodes, {zone_data['shards']} shards, {format_storage_size(zone_data['size'])}" + f"โ€ข {zone}: {zone_data['nodes']} nodes, " + f"{zone_data['shards']} shards, {format_storage_size(zone_data['size'])}" ) except Exception: - pass # Zone info not available + # Zone info not available + logger.exception("Zone info not available") # Health Summary rprint("\n[bold]๐Ÿ’Š Health Summary[/bold]") @@ -375,7 +382,7 @@ def get_largest_tables_distribution(self, top_n: int = 10) -> List[TableDistribu WHERE s.routing_state = 'STARTED' GROUP BY s.schema_name, s.table_name, s.node['name'] ORDER BY s.schema_name, s.table_name, s.node['name'] \ - """ + """ # noqa: E501 result = self.client.execute_query(query, [top_n]) @@ -534,7 +541,8 @@ def detect_storage_imbalance(self, table: TableDistribution) -> Optional[Distrib if overloaded_node and underloaded_node: recommendations.append( - f"Rebalance storage from {overloaded_node} ({format_storage_size(max_size)}) to {underloaded_node} ({format_storage_size(min_size)})" + f"Rebalance storage from {overloaded_node} ({format_storage_size(max_size)}) " + f"to {underloaded_node} ({format_storage_size(min_size)})" ) return DistributionAnomaly( @@ -643,7 +651,7 @@ def detect_document_imbalance(self, table: TableDistribution) -> Optional[Distri recommendations=recommendations, ) - def analyze_distribution(self, top_tables: int = 10) -> List[DistributionAnomaly]: + def analyze_distribution(self, top_tables: int = 10) -> Tuple[List[DistributionAnomaly], int]: """Analyze shard distribution and return ranked anomalies""" # Get table distributions @@ -672,12 +680,13 @@ def format_distribution_report(self, anomalies: List[DistributionAnomaly], table if not anomalies: rprint( - f"[green]โœ“ No significant shard distribution anomalies detected in top {tables_analyzed} tables![/green]" + f"[green]โœ“ No significant shard distribution anomalies " + f"detected in top {tables_analyzed} tables![/green]" ) return # Show analysis scope - unique_tables = set(anomaly.table.full_table_name for anomaly in anomalies) + unique_tables = {anomaly.table.full_table_name for anomaly in anomalies} rprint( f"[blue]๐Ÿ“‹ Analyzed {tables_analyzed} largest tables, found issues in {len(unique_tables)} tables[/blue]" ) @@ -731,7 +740,8 @@ def format_distribution_report(self, anomalies: List[DistributionAnomaly], table overloaded = [node for node, count in counts.items() if count == max_count] underloaded = [node for node, count in counts.items() if count == min_count] rprint( - f" [red]โš  Issue:[/red] {overloaded[0]} has {max_count} shards while {underloaded[0]} has only {min_count} shards" + f" [red]โš  Issue:[/red] {overloaded[0]} has {max_count} shards " + f"while {underloaded[0]} has only {min_count} shards" ) elif anomaly.anomaly_type == "Storage Imbalance": @@ -742,7 +752,8 @@ def format_distribution_report(self, anomalies: List[DistributionAnomaly], table overloaded = [node for node, size in sizes.items() if size == max_size][0] underloaded = [node for node, size in sizes.items() if size == min_size][0] rprint( - f" [red]โš  Issue:[/red] Storage ranges from {format_storage_size(min_size)} ({underloaded}) to {format_storage_size(max_size)} ({overloaded}) - {max_size / min_size:.1f}x difference" + f" [red]โš  Issue:[/red] Storage ranges from {format_storage_size(min_size)} ({underloaded}) " # noqa: E501 + f"to {format_storage_size(max_size)} ({overloaded}) - {max_size / min_size:.1f}x difference" ) elif anomaly.anomaly_type == "Node Coverage Issue": @@ -750,11 +761,11 @@ def format_distribution_report(self, anomalies: List[DistributionAnomaly], table missing_nodes = anomaly.details["nodes_without_shards"] coverage_ratio = anomaly.details["coverage_ratio"] rprint( - f" [red]โš  Issue:[/red] Table missing from {len(missing_nodes)} nodes ({coverage_ratio:.0%} cluster coverage)" - ) - rprint( - f" [dim] Missing from: {', '.join(missing_nodes[:3])}{'...' if len(missing_nodes) > 3 else ''}[/dim]" + f" [red]โš  Issue:[/red] Table missing from {len(missing_nodes)} nodes " + f"({coverage_ratio:.0%} cluster coverage)" ) + ellipsis = "..." if len(missing_nodes) > 3 else "" + rprint(f" [dim] Missing from: {', '.join(missing_nodes[:3])}{ellipsis}[/dim]") elif anomaly.anomaly_type == "Document Imbalance": if "document_counts" in anomaly.details: @@ -763,7 +774,8 @@ def format_distribution_report(self, anomalies: List[DistributionAnomaly], table max_docs = max(doc_counts.values()) ratio = max_docs / min_docs if min_docs > 0 else float("inf") rprint( - f" [red]โš  Issue:[/red] Document counts range from {min_docs:,} to {max_docs:,} ({ratio:.1f}x difference)" + f" [red]โš  Issue:[/red] Document counts range " + f"from {min_docs:,} to {max_docs:,} ({ratio:.1f}x difference)" ) # Show recommendations @@ -772,7 +784,7 @@ def format_distribution_report(self, anomalies: List[DistributionAnomaly], table rprint(f" โ€ข {rec}") # Summary statistics - unique_tables = set(anomaly.table.full_table_name for anomaly in anomalies) + unique_tables = {anomaly.table.full_table_name for anomaly in anomalies} rprint("\n[dim]๐Ÿ“Š Analysis Summary:[/dim]") rprint(f"[dim]โ€ข Tables analyzed: {tables_analyzed}[/dim]") rprint(f"[dim]โ€ข Tables with issues: {len(unique_tables)}[/dim]") From a4f244a6f35ed04ffeafa9fac3527c81c1bad31d Mon Sep 17 00:00:00 2001 From: Walter Behmann Date: Fri, 5 Sep 2025 11:32:34 +0200 Subject: [PATCH 03/13] Admin/XMover: Add module for active shard monitoring --- .../admin/xmover/analysis/shard.py | 180 +++++++ cratedb_toolkit/admin/xmover/cli.py | 197 +++++++- cratedb_toolkit/admin/xmover/model.py | 64 +++ cratedb_toolkit/admin/xmover/util/database.py | 59 ++- doc/admin/xmover/handbook.md | 126 +++++ pyproject.toml | 1 + tests/admin/test_active_shard_monitor.py | 472 ++++++++++++++++++ tests/admin/test_distribution_analyzer.py | 294 +++++++++++ tests/admin/test_recovery_monitor.py | 292 +++++++++++ 9 files changed, 1683 insertions(+), 2 deletions(-) create mode 100644 tests/admin/test_active_shard_monitor.py create mode 100644 tests/admin/test_distribution_analyzer.py create mode 100644 tests/admin/test_recovery_monitor.py diff --git a/cratedb_toolkit/admin/xmover/analysis/shard.py b/cratedb_toolkit/admin/xmover/analysis/shard.py index f6f24b6b..a1869019 100644 --- a/cratedb_toolkit/admin/xmover/analysis/shard.py +++ b/cratedb_toolkit/admin/xmover/analysis/shard.py @@ -13,6 +13,8 @@ from rich.table import Table from cratedb_toolkit.admin.xmover.model import ( + ActiveShardActivity, + ActiveShardSnapshot, DistributionStats, NodeInfo, ShardInfo, @@ -947,3 +949,181 @@ def distribution(self, table: str = None): table_summary.add_row("Node Balance Score", f"{stats.node_balance_score:.1f}/100") console.print(table_summary) + + +class ActiveShardMonitor: + """Monitor active shard checkpoint progression over time""" + + def __init__(self, client: CrateDBClient): + self.client = client + + def compare_snapshots( + self, + snapshot1: List[ActiveShardSnapshot], + snapshot2: List[ActiveShardSnapshot], + min_activity_threshold: int = 0, + ) -> List["ActiveShardActivity"]: + """Compare two snapshots and return activity data for shards present in both + + Args: + snapshot1: First snapshot (baseline) + snapshot2: Second snapshot (comparison) + min_activity_threshold: Minimum checkpoint delta to consider active (default: 0) + """ + + # Create lookup dict for snapshot1 + snapshot1_dict = {snap.shard_identifier: snap for snap in snapshot1} + + activities = [] + + for snap2 in snapshot2: + snap1 = snapshot1_dict.get(snap2.shard_identifier) + if snap1: + # Calculate local checkpoint delta + local_checkpoint_delta = snap2.local_checkpoint - snap1.local_checkpoint + time_diff = snap2.timestamp - snap1.timestamp + + # Filter based on actual activity between snapshots + if local_checkpoint_delta >= min_activity_threshold: + activity = ActiveShardActivity( + schema_name=snap2.schema_name, + table_name=snap2.table_name, + shard_id=snap2.shard_id, + node_name=snap2.node_name, + is_primary=snap2.is_primary, + partition_ident=snap2.partition_ident, + local_checkpoint_delta=local_checkpoint_delta, + snapshot1=snap1, + snapshot2=snap2, + time_diff_seconds=time_diff, + ) + activities.append(activity) + + # Sort by activity (highest checkpoint delta first) + activities.sort(key=lambda x: x.local_checkpoint_delta, reverse=True) + + return activities + + def format_activity_display( + self, activities: List["ActiveShardActivity"], show_count: int = 10, watch_mode: bool = False + ) -> str: + """Format activity data for console display""" + if not activities: + return "โœ… No active shards with significant checkpoint progression found" + + # Limit to requested count + activities = activities[:show_count] + + # Calculate observation period for context + if activities: + observation_period = activities[0].time_diff_seconds + output = [ + f"\n๐Ÿ”ฅ Most Active Shards ({len(activities)} shown, {observation_period:.0f}s observation period)" + ] + else: + output = [f"\n๐Ÿ”ฅ Most Active Shards ({len(activities)} shown, sorted by checkpoint activity)"] + + output.append("") + + # Add activity rate context + if activities: + total_activity = sum(a.local_checkpoint_delta for a in activities) + avg_rate = sum(a.activity_rate for a in activities) / len(activities) + output.append( + f"[dim]Total checkpoint activity: {total_activity:,} changes, Average rate: {avg_rate:.1f}/sec[/dim]" + ) + output.append("") + + # Create table headers + headers = ["Rank", "Schema.Table", "Shard", "Partition", "Node", "Type", "Checkpoint ฮ”", "Rate/sec", "Trend"] + + # Calculate column widths + col_widths = [len(h) for h in headers] + + # Prepare rows + rows = [] + for i, activity in enumerate(activities, 1): + # Format values + rank = str(i) + table_id = activity.table_identifier + shard_id = str(activity.shard_id) + partition = ( + activity.partition_ident[:14] + "..." + if len(activity.partition_ident) > 14 + else activity.partition_ident or "-" + ) + node = activity.node_name + shard_type = "P" if activity.is_primary else "R" + checkpoint_delta = f"{activity.local_checkpoint_delta:,}" + rate = f"{activity.activity_rate:.1f}" if activity.activity_rate >= 0.1 else "<0.1" + + # Calculate activity trend indicator + if activity.activity_rate >= 100: + trend = "๐Ÿ”ฅ HOT" + elif activity.activity_rate >= 50: + trend = "๐Ÿ“ˆ HIGH" + elif activity.activity_rate >= 10: + trend = "๐Ÿ“Š MED" + else: + trend = "๐Ÿ“‰ LOW" + + row = [rank, table_id, shard_id, partition, node, shard_type, checkpoint_delta, rate, trend] + rows.append(row) + + # Update column widths + for j, cell in enumerate(row): + col_widths[j] = max(col_widths[j], len(cell)) + + # Format table + header_row = " " + " | ".join(h.ljust(w) for h, w in zip(headers, col_widths)) + output.append(header_row) + output.append(" " + "-" * (len(header_row) - 3)) + + # Data rows + for row in rows: + data_row = " " + " | ".join(cell.ljust(w) for cell, w in zip(row, col_widths)) + output.append(data_row) + + # Only show legend and insights in non-watch mode + if not watch_mode: + output.append("") + output.append("Legend:") + output.append(" โ€ข Checkpoint ฮ”: Write operations during observation period") + output.append(" โ€ข Rate/sec: Checkpoint changes per second") + output.append(" โ€ข Partition: partition_ident (truncated if >14 chars, '-' if none)") + output.append(" โ€ข Type: P=Primary, R=Replica") + output.append(" โ€ข Trend: ๐Ÿ”ฅ HOT (โ‰ฅ100/s), ๐Ÿ“ˆ HIGH (โ‰ฅ50/s), ๐Ÿ“Š MED (โ‰ฅ10/s), ๐Ÿ“‰ LOW (<10/s)") + + # Add insights about activity patterns + if activities: + output.append("") + output.append("Insights:") + + # Count by trend + hot_count = len([a for a in activities if a.activity_rate >= 100]) + high_count = len([a for a in activities if 50 <= a.activity_rate < 100]) + med_count = len([a for a in activities if 10 <= a.activity_rate < 50]) + low_count = len([a for a in activities if a.activity_rate < 10]) + + if hot_count > 0: + output.append(f" โ€ข {hot_count} HOT shards (โ‰ฅ100 changes/sec) - consider load balancing") + if high_count > 0: + output.append(f" โ€ข {high_count} HIGH activity shards - monitor capacity") + if med_count > 0: + output.append(f" โ€ข {med_count} MEDIUM activity shards - normal operation") + if low_count > 0: + output.append(f" โ€ข {low_count} LOW activity shards - occasional writes") + + # Identify patterns + primary_activities = [a for a in activities if a.is_primary] + if len(primary_activities) == len(activities): + output.append(" โ€ข All active shards are PRIMARY - normal write pattern") + elif len(primary_activities) < len(activities) * 0.5: + output.append(" โ€ข Many REPLICA shards active - possible recovery/replication activity") + + # Node concentration + nodes = {a.node_name for a in activities} + if len(nodes) <= 2: + output.append(f" โ€ข Activity concentrated on {len(nodes)} node(s) - consider redistribution") + + return "\n".join(output) diff --git a/cratedb_toolkit/admin/xmover/cli.py b/cratedb_toolkit/admin/xmover/cli.py index e5e6e834..010f9aeb 100644 --- a/cratedb_toolkit/admin/xmover/cli.py +++ b/cratedb_toolkit/admin/xmover/cli.py @@ -5,12 +5,14 @@ """ import sys +import time from typing import Optional import click from rich.console import Console +from rich.panel import Panel -from cratedb_toolkit.admin.xmover.analysis.shard import ShardAnalyzer, ShardReporter +from cratedb_toolkit.admin.xmover.analysis.shard import ActiveShardMonitor, ShardAnalyzer, ShardReporter from cratedb_toolkit.admin.xmover.analysis.table import DistributionAnalyzer from cratedb_toolkit.admin.xmover.analysis.zone import ZoneReport from cratedb_toolkit.admin.xmover.model import ( @@ -249,6 +251,199 @@ def shard_distribution(ctx, top_tables: int, table: Optional[str]): console.print(f"[dim]{traceback.format_exc()}[/dim]") +@main.command() +@click.option("--count", default=10, help="Number of most active shards to show (default: 10)") +@click.option("--interval", default=30, help="Observation interval in seconds (default: 30)") +@click.option( + "--min-checkpoint-delta", + default=1000, + help="Minimum checkpoint progression between snapshots to show shard (default: 1000)", +) +@click.option("--table", "-t", help="Monitor specific table only") +@click.option("--node", "-n", help="Monitor specific node only") +@click.option("--watch", "-w", is_flag=True, help="Continuously monitor (refresh every interval)") +@click.option("--exclude-system", is_flag=True, help="Exclude system tables (gc.*, information_schema.*)") +@click.option("--min-rate", type=float, help="Minimum activity rate (changes/sec) to show") +@click.option("--show-replicas/--hide-replicas", default=True, help="Show replica shards (default: True)") +@click.pass_context +def active_shards( + ctx, + count: int, + interval: int, + min_checkpoint_delta: int, + table: Optional[str], + node: Optional[str], + watch: bool, + exclude_system: bool, + min_rate: Optional[float], + show_replicas: bool, +): + """Monitor most active shards by checkpoint progression + + This command takes two snapshots of ALL started shards separated by the + observation interval, then shows the shards with the highest checkpoint + progression (activity) between the snapshots. + + Unlike other commands, this tracks ALL shards and filters based on actual + activity between snapshots, not current state. This captures shards that + become active during the observation period. + + Useful for identifying which shards are receiving the most write activity + in your cluster and understanding write patterns. + + Examples: + xmover active-shards --count 20 --interval 60 # Top 20 over 60 seconds + xmover active-shards --watch --interval 30 # Continuous monitoring + xmover active-shards --table my_table --watch # Monitor specific table + xmover active-shards --node data-hot-1 --count 5 # Top 5 on specific node + xmover active-shards --min-checkpoint-delta 500 # Lower activity threshold + xmover active-shards --exclude-system --min-rate 50 # Skip system tables, min 50/sec + xmover active-shards --hide-replicas --count 20 # Only primary shards + """ + client = ctx.obj["client"] + monitor = ActiveShardMonitor(client) + + def get_filtered_snapshot(): + """Get snapshot with optional filtering""" + snapshots = client.get_active_shards_snapshot(min_checkpoint_delta=min_checkpoint_delta) + + # Apply table filter if specified + if table: + snapshots = [s for s in snapshots if s.table_name == table or f"{s.schema_name}.{s.table_name}" == table] + + # Apply node filter if specified + if node: + snapshots = [s for s in snapshots if s.node_name == node] + + # Exclude system tables if requested + if exclude_system: + snapshots = [ + s + for s in snapshots + if not ( + s.schema_name.startswith("gc.") + or s.schema_name == "information_schema" + or s.schema_name == "sys" + or s.table_name.endswith("_events") + or s.table_name.endswith("_log") + ) + ] + + return snapshots + + def run_single_analysis(): + """Run a single analysis cycle""" + if not watch: + console.print(Panel.fit("[bold blue]Active Shards Monitor[/bold blue]")) + + # Show configuration - simplified for watch mode + if watch: + config_parts = [f"{interval}s interval", f"threshold: {min_checkpoint_delta:,}", f"top {count}"] + if table: + config_parts.append(f"table: {table}") + if node: + config_parts.append(f"node: {node}") + console.print(f"[dim]{' | '.join(config_parts)}[/dim]") + else: + config_info = [ + f"Observation interval: {interval}s", + f"Min checkpoint delta: {min_checkpoint_delta:,}", + f"Show count: {count}", + ] + if table: + config_info.append(f"Table filter: {table}") + if node: + config_info.append(f"Node filter: {node}") + if exclude_system: + config_info.append("Excluding system tables") + if min_rate: + config_info.append(f"Min rate: {min_rate}/sec") + if not show_replicas: + config_info.append("Primary shards only") + + console.print("[dim]" + " | ".join(config_info) + "[/dim]") + console.print() + + # Take first snapshot + if not watch: + console.print("๐Ÿ“ท Taking first snapshot...") + snapshot1 = get_filtered_snapshot() + + if not snapshot1: + console.print("[yellow]No started shards found matching criteria[/yellow]") + return + + if not watch: + console.print(f" Tracking {len(snapshot1)} started shards for activity") + console.print(f"โฑ๏ธ Waiting {interval} seconds for activity...") + + # Wait for observation interval + if watch: + # Simplified countdown for watch mode + for remaining in range(interval, 0, -1): + if remaining % 5 == 0 or remaining <= 3: # Show fewer updates + console.print(f"[dim]โฑ๏ธ {remaining}s...[/dim]", end="\r") + time.sleep(1) + console.print(" " * 15, end="\r") # Clear countdown + else: + time.sleep(interval) + + # Take second snapshot + if not watch: + console.print("๐Ÿ“ท Taking second snapshot...") + snapshot2 = get_filtered_snapshot() + + if not snapshot2: + console.print("[yellow]No started shards found in second snapshot[/yellow]") + return + + if not watch: + console.print(f" Tracking {len(snapshot2)} started shards for activity") + + # Compare snapshots and show results + activities = monitor.compare_snapshots(snapshot1, snapshot2, min_activity_threshold=min_checkpoint_delta) + + # Apply additional filters + if not show_replicas: + activities = [a for a in activities if a.is_primary] + + if min_rate: + activities = [a for a in activities if a.activity_rate >= min_rate] + + if not activities: + console.print( + f"[green]โœ… No shards exceeded activity threshold ({min_checkpoint_delta:,} checkpoint changes)[/green]" + ) + if min_rate: + console.print(f"[dim]Also filtered by minimum rate: {min_rate}/sec[/dim]") + else: + if not watch: + overlap_count = len({s.shard_identifier for s in snapshot1} & {s.shard_identifier for s in snapshot2}) + console.print(f"[dim]Analyzed {overlap_count} shards present in both snapshots[/dim]") + console.print(monitor.format_activity_display(activities, show_count=count, watch_mode=watch)) + + try: + if watch: + console.print("[dim]Press Ctrl+C to stop monitoring[/dim]") + console.print() + + while True: + run_single_analysis() + if watch: + console.print(f"\n[dim]โ”โ”โ” Next update in {interval}s โ”โ”โ”[/dim]\n") + time.sleep(interval) + else: + run_single_analysis() + + except KeyboardInterrupt: + console.print("\n[yellow]Monitoring stopped by user[/yellow]") + except Exception as e: + console.print(f"[red]Error during active shards monitoring: {e}[/red]") + import traceback + + console.print(f"[dim]{traceback.format_exc()}[/dim]") + + @main.command() @click.option("--table", "-t", help="Analyze zones for specific table only") @click.option("--show-shards/--no-show-shards", default=False, help="Show individual shard details (default: False)") diff --git a/cratedb_toolkit/admin/xmover/model.py b/cratedb_toolkit/admin/xmover/model.py index 34e43f77..d8511b31 100644 --- a/cratedb_toolkit/admin/xmover/model.py +++ b/cratedb_toolkit/admin/xmover/model.py @@ -184,3 +184,67 @@ class ShardRelocationConstraints: max_recommendations: int = 10 max_disk_usage: float = 90.0 prioritize_space: bool = False + + +@dataclass +class ActiveShardSnapshot: + """Snapshot of active shard checkpoint data for tracking activity""" + + schema_name: str + table_name: str + shard_id: int + node_name: str + is_primary: bool + partition_ident: str + local_checkpoint: int + global_checkpoint: int + translog_uncommitted_bytes: int + timestamp: float # Unix timestamp when snapshot was taken + + @property + def checkpoint_delta(self) -> int: + """Current checkpoint delta (local - global)""" + return self.local_checkpoint - self.global_checkpoint + + @property + def translog_uncommitted_mb(self) -> float: + """Translog uncommitted size in MB""" + return self.translog_uncommitted_bytes / (1024 * 1024) + + @property + def shard_identifier(self) -> str: + """Unique identifier for this shard including partition""" + shard_type = "P" if self.is_primary else "R" + partition = f":{self.partition_ident}" if self.partition_ident else "" + return f"{self.schema_name}.{self.table_name}:{self.shard_id}:{self.node_name}:{shard_type}{partition}" + + +@dataclass +class ActiveShardActivity: + """Activity comparison between two snapshots of the same shard""" + + schema_name: str + table_name: str + shard_id: int + node_name: str + is_primary: bool + partition_ident: str + local_checkpoint_delta: int # Change in local checkpoint between snapshots + snapshot1: ActiveShardSnapshot + snapshot2: ActiveShardSnapshot + time_diff_seconds: float + + @property + def activity_rate(self) -> float: + """Activity rate as checkpoint changes per second""" + if self.time_diff_seconds > 0: + return self.local_checkpoint_delta / self.time_diff_seconds + return 0.0 + + @property + def shard_type(self) -> str: + return "PRIMARY" if self.is_primary else "REPLICA" + + @property + def table_identifier(self) -> str: + return f"{self.schema_name}.{self.table_name}" diff --git a/cratedb_toolkit/admin/xmover/util/database.py b/cratedb_toolkit/admin/xmover/util/database.py index 21950ab0..5c9011bd 100644 --- a/cratedb_toolkit/admin/xmover/util/database.py +++ b/cratedb_toolkit/admin/xmover/util/database.py @@ -10,7 +10,7 @@ import urllib3 from dotenv import load_dotenv -from cratedb_toolkit.admin.xmover.model import NodeInfo, RecoveryInfo, ShardInfo +from cratedb_toolkit.admin.xmover.model import ActiveShardSnapshot, NodeInfo, RecoveryInfo, ShardInfo logger = logging.getLogger(__name__) @@ -496,3 +496,60 @@ def _is_recovery_completed(self, recovery_info: RecoveryInfo) -> bool: and recovery_info.files_percent >= 100.0 and recovery_info.bytes_percent >= 100.0 ) + + def get_active_shards_snapshot(self, min_checkpoint_delta: int = 1000) -> List[ActiveShardSnapshot]: + """Get a snapshot of all started shards for activity monitoring + + Note: This captures ALL started shards regardless of current activity level. + The min_checkpoint_delta parameter is kept for backwards compatibility but + filtering is now done during snapshot comparison to catch shards that + become active between observations. + + Args: + min_checkpoint_delta: Kept for compatibility - filtering now done in comparison + + Returns: + List of ActiveShardSnapshot objects for all started shards + """ + import time + + query = """ + SELECT sh.schema_name, \ + sh.table_name, \ + sh.id AS shard_id, \ + sh."primary", \ + node['name'] as node_name, \ + sh.partition_ident, \ + sh.translog_stats['uncommitted_size'] AS translog_uncommitted_bytes, \ + sh.seq_no_stats['local_checkpoint'] AS local_checkpoint, \ + sh.seq_no_stats['global_checkpoint'] AS global_checkpoint + FROM sys.shards AS sh + WHERE sh.state = 'STARTED' + ORDER BY sh.schema_name, sh.table_name, sh.id, sh.node['name'] \ + """ + + try: + result = self.execute_query(query) + snapshots = [] + current_time = time.time() + + for row in result.get("rows", []): + snapshot = ActiveShardSnapshot( + schema_name=row[0], + table_name=row[1], + shard_id=row[2], + is_primary=row[3], + node_name=row[4], + partition_ident=row[5] or "", + translog_uncommitted_bytes=row[6] or 0, + local_checkpoint=row[7] or 0, + global_checkpoint=row[8] or 0, + timestamp=current_time, + ) + snapshots.append(snapshot) + + return snapshots + + except Exception as e: + logger.error(f"Error getting active shards snapshot: {e}") + return [] diff --git a/doc/admin/xmover/handbook.md b/doc/admin/xmover/handbook.md index 05a3c57a..f9aee2e0 100644 --- a/doc/admin/xmover/handbook.md +++ b/doc/admin/xmover/handbook.md @@ -244,6 +244,132 @@ xmover monitor-recovery --watch --include-transitioning - **PEER**: Copying shard data from another node (replication/relocation) - **DISK**: Rebuilding shard from local data (after restart/disk issues) + +### `active-shards` +Monitor the most active shards by tracking checkpoint progression over time. +This command helps identify which shards are receiving the most write activity +by measuring local checkpoint progression between two snapshots. + +**Options:** +- `--count`: Number of most active shards to show (default: 10) +- `--interval`: Observation interval in seconds (default: 30) +- `--min-checkpoint-delta`: Minimum checkpoint progression between snapshots to show shard (default: 1000) +- `--table, -t`: Monitor specific table only +- `--node, -n`: Monitor specific node only +- `--watch, -w`: Continuously monitor (refresh every interval) +- `--exclude-system`: Exclude system tables (gc.*, information_schema.*, *_events, *_log) +- `--min-rate`: Minimum activity rate (changes/sec) to show +- `--show-replicas/--hide-replicas`: Show replica shards (default: True) + +**How it works:** +1. **Takes snapshot of ALL started shards** (not just currently active ones) +2. **Waits for observation interval** (configurable, default: 30 seconds) +3. **Takes second snapshot** of all started shards +4. **Compares snapshots** to find shards with checkpoint progression โ‰ฅ threshold +5. **Shows ranked results** with activity trends and insights + +**Enhanced output features:** +- **Checkpoint visibility**: Shows actual `local_checkpoint` values (CP Start โ†’ CP End โ†’ Delta) +- **Partition awareness**: Separate tracking for partitioned tables (different partition_ident values) +- **Activity trends**: ๐Ÿ”ฅ HOT (โ‰ฅ100/s), ๐Ÿ“ˆ HIGH (โ‰ฅ50/s), ๐Ÿ“Š MED (โ‰ฅ10/s), ๐Ÿ“‰ LOW (<10/s) +- **Smart insights**: Identifies concentration patterns and load distribution (non-watch mode) +- **Flexible filtering**: Exclude system tables, set minimum rates, hide replicas +- **Context information**: Total activity, average rates, observation period +- **Clean watch mode**: Streamlined output without legend/insights for continuous monitoring + +This approach captures shards that become active during the observation period, providing a complete view of cluster write patterns and identifying hot spots. The enhanced filtering helps focus on business-critical activity patterns. + +**Sample output (single run):** +``` +๐Ÿ”ฅ Most Active Shards (3 shown, 30s observation period) +Total checkpoint activity: 190,314 changes, Average rate: 2,109.0/sec + Rank | Schema.Table | Shard | Partition | Node | Type | Checkpoint ฮ” | Rate/sec | Trend + ----------------------------------------------------------------------------------------------------------- + 1 | gc.scheduled_jobs_log | 0 | - | data-hot-8 | P | 113,744 | 3,791.5 | ๐Ÿ”ฅ HOT + 2 | TURVO.events | 0 | 04732dpl6osj8d | data-hot-0 | P | 45,837 | 1,527.9 | ๐Ÿ”ฅ HOT + 3 | doc.user_actions | 1 | 04732dpk70rj6d | data-hot-2 | P | 30,733 | 1,024.4 | ๐Ÿ”ฅ HOT +Legend: + โ€ข Checkpoint ฮ”: Write operations during observation period + โ€ข Partition: partition_ident (truncated if >14 chars, '-' if none) +Insights: + โ€ข 3 HOT shards (โ‰ฅ100 changes/sec) - consider load balancing + โ€ข All active shards are PRIMARY - normal write pattern +``` + +**Sample output (watch mode - cleaner):** +``` +30s interval | threshold: 1,000 | top 5 +๐Ÿ”ฅ Most Active Shards (3 shown, 30s observation period) +Total checkpoint activity: 190,314 changes, Average rate: 2,109.0/sec + Rank | Schema.Table | Shard | Partition | Node | Type | Checkpoint ฮ” | Rate/sec | Trend + ----------------------------------------------------------------------------------------------------------- + 1 | gc.scheduled_jobs_log | 0 | - | data-hot-8 | P | 113,744 | 3,791.5 | ๐Ÿ”ฅ HOT + 2 | TURVO.events | 0 | 04732dpl6osj8d | data-hot-0 | P | 45,837 | 1,527.9 | ๐Ÿ”ฅ HOT + 3 | doc.user_actions | 1 | 04732dpk70rj6d | data-hot-2 | P | 30,733 | 1,024.4 | ๐Ÿ”ฅ HOT +โ”โ”โ” Next update in 30s โ”โ”โ” +``` + +#### Examples +```bash +# Show top 10 most active shards over 30 seconds +xmover active-shards + +# Top 20 shards with 60-second observation period +xmover active-shards --count 20 --interval 60 + +# Continuous monitoring with 30-second intervals +xmover active-shards --watch --interval 30 + +# Monitor specific table activity +xmover active-shards --table my_table --watch + +# Monitor specific node with custom threshold +xmover active-shards --node data-hot-1 --min-checkpoint-delta 500 + +# Exclude system tables and event logs for business data focus +xmover active-shards --exclude-system --count 20 + +# Only show high-activity shards (โ‰ฅ50 changes/sec) +xmover active-shards --min-rate 50 --count 15 + +# Focus on primary shards only +xmover active-shards --hide-replicas --count 20 +``` + +#### Monitoring Active Shards and Write Patterns + +Identify which shards are receiving the most write activity: + +1. Quick snapshot of most active shards: +```bash +# Show top 10 most active shards over 30 seconds +xmover active-shards + +# Longer observation period for more accurate results +xmover active-shards --count 15 --interval 60 +``` + +2. Continuous monitoring for real-time insights: +```bash +# Continuous monitoring with 30-second intervals +xmover active-shards --watch --interval 30 + +# Monitor specific table for focused analysis +xmover active-shards --table critical_table --watch +``` + +3. Integration with rebalancing workflow: +```bash +# Identify hot shards first +xmover active-shards --count 20 --interval 60 + +# Move hot shards away from overloaded nodes +xmover recommend --table hot_table --prioritize-space --execute + +# Monitor the impact +xmover active-shards --table hot_table --watch +``` + ### `test-connection` Tests the connection to CrateDB and displays basic cluster information. diff --git a/pyproject.toml b/pyproject.toml index f6614eb8..6770d234 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -331,6 +331,7 @@ lint.per-file-ignores."doc/conf.py" = [ "A001", "ERA001" ] lint.per-file-ignores."examples/*" = [ "ERA001", "F401", "T201", "T203" ] # Allow `print` and `pprint` lint.per-file-ignores."tests/*" = [ "S101" ] # Allow use of `assert`, and `print`. lint.per-file-ignores."tests/adapter/test_rockset.py" = [ "E402" ] +lint.per-file-ignores."tests/admin/*" = [ "T201" ] # Allow use of `print`. lint.per-file-ignores."tests/info/test_http.py" = [ "E402" ] [tool.pytest.ini_options] diff --git a/tests/admin/test_active_shard_monitor.py b/tests/admin/test_active_shard_monitor.py new file mode 100644 index 00000000..55268b15 --- /dev/null +++ b/tests/admin/test_active_shard_monitor.py @@ -0,0 +1,472 @@ +""" +Tests for ActiveShardMonitor functionality +""" + +import time +from unittest.mock import Mock, patch + +from cratedb_toolkit.admin.xmover.analysis.shard import ActiveShardMonitor +from cratedb_toolkit.admin.xmover.model import ActiveShardActivity, ActiveShardSnapshot +from cratedb_toolkit.admin.xmover.util.database import CrateDBClient + + +class TestActiveShardSnapshot: + """Test ActiveShardSnapshot dataclass""" + + def test_checkpoint_delta(self): + """Test checkpoint delta calculation""" + snapshot = ActiveShardSnapshot( + schema_name="test_schema", + table_name="test_table", + shard_id=1, + node_name="node1", + is_primary=True, + partition_ident="", + local_checkpoint=1500, + global_checkpoint=500, + translog_uncommitted_bytes=10485760, # 10MB + timestamp=time.time(), + ) + + assert snapshot.checkpoint_delta == 1000 + assert snapshot.translog_uncommitted_mb == 10.0 + assert snapshot.shard_identifier == "test_schema.test_table:1:node1:P" + + +class TestActiveShardActivity: + """Test ActiveShardActivity dataclass""" + + def test_activity_calculations(self): + """Test activity rate and property calculations""" + snapshot1 = ActiveShardSnapshot( + schema_name="test_schema", + table_name="test_table", + shard_id=1, + node_name="node1", + is_primary=True, + partition_ident="", + local_checkpoint=1000, + global_checkpoint=500, + translog_uncommitted_bytes=5242880, # 5MB + timestamp=100.0, + ) + + snapshot2 = ActiveShardSnapshot( + schema_name="test_schema", + table_name="test_table", + shard_id=1, + node_name="node1", + is_primary=True, + partition_ident="", + local_checkpoint=1500, + global_checkpoint=500, + translog_uncommitted_bytes=10485760, # 10MB + timestamp=130.0, # 30 seconds later + ) + + activity = ActiveShardActivity( + schema_name="test_schema", + table_name="test_table", + shard_id=1, + node_name="node1", + is_primary=True, + partition_ident="", + local_checkpoint_delta=500, + snapshot1=snapshot1, + snapshot2=snapshot2, + time_diff_seconds=30.0, + ) + + assert activity.activity_rate == 500 / 30.0 # ~16.67 changes/sec + assert activity.shard_type == "PRIMARY" + assert activity.table_identifier == "test_schema.test_table" + + +class TestCrateDBClientActiveShards: + """Test CrateDB client active shards functionality""" + + @patch.object(CrateDBClient, "execute_query") + def test_get_active_shards_snapshot_success(self, mock_execute): + """Test successful snapshot retrieval""" + mock_execute.return_value = { + "rows": [ + ["schema1", "table1", 1, True, "node1", "", 10485760, 1500, 500], + ["schema1", "table2", 2, False, "node2", "part1", 20971520, 2000, 800], + ] + } + + client = CrateDBClient("http://test") + snapshots = client.get_active_shards_snapshot(min_checkpoint_delta=1000) + + assert len(snapshots) == 2 + + # Check first snapshot + snap1 = snapshots[0] + assert snap1.schema_name == "schema1" + assert snap1.table_name == "table1" + assert snap1.shard_id == 1 + assert snap1.is_primary is True + assert snap1.node_name == "node1" + assert snap1.local_checkpoint == 1500 + assert snap1.global_checkpoint == 500 + assert snap1.checkpoint_delta == 1000 + assert snap1.translog_uncommitted_mb == 10.0 + + # Check second snapshot + snap2 = snapshots[1] + assert snap2.schema_name == "schema1" + assert snap2.table_name == "table2" + assert snap2.shard_id == 2 + assert snap2.is_primary is False + assert snap2.node_name == "node2" + assert snap2.partition_ident == "part1" + assert snap2.checkpoint_delta == 1200 + assert snap2.translog_uncommitted_mb == 20.0 + + # Verify query was called without checkpoint delta filter (new behavior) + mock_execute.assert_called_once() + args = mock_execute.call_args[0] + # No longer passes min_checkpoint_delta parameter + assert len(args) == 1 # Only the query, no parameters + + @patch.object(CrateDBClient, "execute_query") + def test_get_active_shards_snapshot_empty(self, mock_execute): + """Test snapshot retrieval with no results""" + mock_execute.return_value = {"rows": []} + + client = CrateDBClient("http://test") + snapshots = client.get_active_shards_snapshot(min_checkpoint_delta=1000) + + assert snapshots == [] + + @patch.object(CrateDBClient, "execute_query") + def test_get_active_shards_snapshot_error(self, mock_execute): + """Test snapshot retrieval with database error""" + mock_execute.side_effect = Exception("Database connection failed") + + client = CrateDBClient("http://test") + snapshots = client.get_active_shards_snapshot(min_checkpoint_delta=1000) + + assert snapshots == [] + + +class TestActiveShardMonitor: + """Test ActiveShardMonitor class""" + + def setup_method(self): + """Set up test fixtures""" + self.mock_client = Mock(spec=CrateDBClient) + self.monitor = ActiveShardMonitor(self.mock_client) + + def create_test_snapshot( + self, + schema: str, + table: str, + shard_id: int, + node: str, + is_primary: bool, + local_checkpoint: int, + timestamp: float, + ): + """Helper to create test snapshots""" + return ActiveShardSnapshot( + schema_name=schema, + table_name=table, + shard_id=shard_id, + node_name=node, + is_primary=is_primary, + partition_ident="", + local_checkpoint=local_checkpoint, + global_checkpoint=500, # Fixed for simplicity + translog_uncommitted_bytes=10485760, # 10MB + timestamp=timestamp, + ) + + def test_compare_snapshots_with_activity(self): + """Test comparing snapshots with active shards""" + # Create first snapshot + snapshot1 = [ + self.create_test_snapshot("schema1", "table1", 1, "node1", True, 1000, 100.0), + self.create_test_snapshot("schema1", "table2", 1, "node2", False, 2000, 100.0), + self.create_test_snapshot("schema1", "table3", 1, "node1", True, 3000, 100.0), + ] + + # Create second snapshot (30 seconds later with activity) + snapshot2 = [ + self.create_test_snapshot("schema1", "table1", 1, "node1", True, 1500, 130.0), # +500 + self.create_test_snapshot("schema1", "table2", 1, "node2", False, 2200, 130.0), # +200 + self.create_test_snapshot("schema1", "table3", 1, "node1", True, 3000, 130.0), # No change + self.create_test_snapshot("schema1", "table4", 1, "node3", True, 1000, 130.0), # New shard + ] + + activities = self.monitor.compare_snapshots(snapshot1, snapshot2, min_activity_threshold=1) + + # Should have 2 activities (table3 had no change, table4 is new) + assert len(activities) == 2 + + # Check activities are sorted by checkpoint delta (highest first) + assert activities[0].local_checkpoint_delta == 500 # table1 + assert activities[0].schema_name == "schema1" + assert activities[0].table_name == "table1" + + assert activities[1].local_checkpoint_delta == 200 # table2 + assert activities[1].schema_name == "schema1" + assert activities[1].table_name == "table2" + + # Check activity rate calculation + assert activities[0].activity_rate == 500 / 30.0 # ~16.67/sec + assert activities[1].activity_rate == 200 / 30.0 # ~6.67/sec + + def test_compare_snapshots_no_activity(self): + """Test comparing snapshots with no activity""" + # Create identical snapshots + snapshot1 = [ + self.create_test_snapshot("schema1", "table1", 1, "node1", True, 1000, 100.0), + ] + + snapshot2 = [ + self.create_test_snapshot("schema1", "table1", 1, "node1", True, 1000, 130.0), # No change + ] + + activities = self.monitor.compare_snapshots(snapshot1, snapshot2, min_activity_threshold=1) + + assert activities == [] + + def test_compare_snapshots_no_overlap(self): + """Test comparing snapshots with no overlapping shards""" + snapshot1 = [ + self.create_test_snapshot("schema1", "table1", 1, "node1", True, 1000, 100.0), + ] + + snapshot2 = [ + self.create_test_snapshot("schema1", "table2", 1, "node2", True, 1500, 130.0), # Different shard + ] + + activities = self.monitor.compare_snapshots(snapshot1, snapshot2, min_activity_threshold=1) + + assert activities == [] + + def test_format_activity_display_with_activities(self): + """Test formatting activity display with data""" + # Create test activities + snapshot1 = self.create_test_snapshot("schema1", "table1", 1, "node1", True, 1000, 100.0) + snapshot2 = self.create_test_snapshot("schema1", "table1", 1, "node1", True, 1500, 130.0) + + activity = ActiveShardActivity( + schema_name="schema1", + table_name="table1", + shard_id=1, + node_name="node1", + is_primary=True, + partition_ident="", + local_checkpoint_delta=500, + snapshot1=snapshot1, + snapshot2=snapshot2, + time_diff_seconds=30.0, + ) + + display = self.monitor.format_activity_display([activity], show_count=10, watch_mode=False) + + # Check that output contains expected elements + assert "Most Active Shards" in display + assert "schema1.table1" in display + assert "500" in display # checkpoint delta + assert "16.7" in display # activity rate + assert "P" in display # primary indicator + assert "Legend:" in display + assert "Trend:" in display # new trend column explanation + assert "Partition:" in display # new partition column explanation + + def test_format_activity_display_empty(self): + """Test formatting activity display with no data""" + display = self.monitor.format_activity_display([], show_count=10, watch_mode=False) + + assert "No active shards with significant checkpoint progression found" in display + + def test_format_activity_display_count_limit(self): + """Test that display respects show_count limit""" + # Create multiple activities + activities = [] + for i in range(15): + snapshot1 = self.create_test_snapshot("schema1", f"table{i}", 1, "node1", True, 1000, 100.0) + snapshot2 = self.create_test_snapshot("schema1", f"table{i}", 1, "node1", True, 1000 + (i + 1) * 100, 130.0) + + activity = ActiveShardActivity( + schema_name="schema1", + table_name=f"table{i}", + shard_id=1, + node_name="node1", + is_primary=True, + partition_ident="", + local_checkpoint_delta=(i + 1) * 100, + snapshot1=snapshot1, + snapshot2=snapshot2, + time_diff_seconds=30.0, + ) + activities.append(activity) + + # Sort activities by checkpoint delta (highest first) - same as compare_snapshots does + activities.sort(key=lambda x: x.local_checkpoint_delta, reverse=True) + + # Should only show top 5 + display = self.monitor.format_activity_display(activities, show_count=5, watch_mode=False) + + # Count number of table entries in display + table_count = display.count("schema1.table") + assert table_count == 5 # Should only show 5 entries + + # Should show highest activity first (table14 has highest checkpoint delta) + assert "schema1.table14" in display + + def test_compare_snapshots_with_activity_threshold(self): + """Test filtering activities by minimum threshold""" + # Create snapshots with various activity levels + snapshot1 = [ + self.create_test_snapshot("schema1", "table1", 1, "node1", True, 1000, 100.0), # Will have +2000 delta + self.create_test_snapshot("schema1", "table2", 1, "node2", False, 2000, 100.0), # Will have +500 delta + self.create_test_snapshot("schema1", "table3", 1, "node1", True, 3000, 100.0), # Will have +100 delta + ] + + snapshot2 = [ + self.create_test_snapshot("schema1", "table1", 1, "node1", True, 3000, 130.0), # +2000 delta + self.create_test_snapshot("schema1", "table2", 1, "node2", False, 2500, 130.0), # +500 delta + self.create_test_snapshot("schema1", "table3", 1, "node1", True, 3100, 130.0), # +100 delta + ] + + # Test with threshold of 1000 - should only show table1 (2000 delta) + activities_high_threshold = self.monitor.compare_snapshots(snapshot1, snapshot2, min_activity_threshold=1000) + assert len(activities_high_threshold) == 1 + assert activities_high_threshold[0].table_name == "table1" + assert activities_high_threshold[0].local_checkpoint_delta == 2000 + + # Test with threshold of 200 - should show table1 and table2 + activities_medium_threshold = self.monitor.compare_snapshots(snapshot1, snapshot2, min_activity_threshold=200) + assert len(activities_medium_threshold) == 2 + assert activities_medium_threshold[0].local_checkpoint_delta == 2000 # table1 first (highest) + assert activities_medium_threshold[1].local_checkpoint_delta == 500 # table2 second + + # Test with threshold of 0 - should show all three + activities_low_threshold = self.monitor.compare_snapshots(snapshot1, snapshot2, min_activity_threshold=0) + assert len(activities_low_threshold) == 3 + assert activities_low_threshold[0].local_checkpoint_delta == 2000 # Sorted by activity + assert activities_low_threshold[1].local_checkpoint_delta == 500 + assert activities_low_threshold[2].local_checkpoint_delta == 100 + + def test_primary_replica_separation(self): + """Test that primary and replica shards are tracked separately""" + # Create snapshots with same table/shard but different primary/replica + snapshot1 = [ + # Primary shard + self.create_test_snapshot("gc", "scheduled_jobs_log", 0, "data-hot-8", True, 15876, 100.0), + # Replica shard (same table/shard/node but different type) + self.create_test_snapshot("gc", "scheduled_jobs_log", 0, "data-hot-8", False, 129434, 100.0), + ] + + snapshot2 = [ + # Primary shard progresses normally + self.create_test_snapshot("gc", "scheduled_jobs_log", 0, "data-hot-8", True, 16000, 130.0), # +124 delta + # Replica shard progresses normally + self.create_test_snapshot("gc", "scheduled_jobs_log", 0, "data-hot-8", False, 129500, 130.0), # +66 delta + ] + + activities = self.monitor.compare_snapshots(snapshot1, snapshot2, min_activity_threshold=1) + + # Should have 2 separate activities (primary and replica tracked separately) + assert len(activities) == 2 + + # Find primary and replica activities + primary_activity = next(a for a in activities if a.is_primary) + replica_activity = next(a for a in activities if not a.is_primary) + + # Verify deltas are calculated correctly for each type + assert primary_activity.local_checkpoint_delta == 124 # 16000 - 15876 + assert replica_activity.local_checkpoint_delta == 66 # 129500 - 129434 + + # Verify they have different shard identifiers + assert primary_activity.snapshot1.shard_identifier != replica_activity.snapshot1.shard_identifier + assert "data-hot-8:P" in primary_activity.snapshot1.shard_identifier + assert "data-hot-8:R" in replica_activity.snapshot1.shard_identifier + + # This test prevents the bug where we mixed primary CP End with replica CP Start + # which created fake deltas like 129434 - 15876 = 113558 + + def test_partition_separation(self): + """Test that partitions within the same table/shard are tracked separately""" + # Create snapshots with same table/shard but different partitions + snapshot1 = [ + # Partition 1 + self.create_test_snapshot("TURVO", "appointmentFormFieldData_events", 0, "data-hot-8", True, 32684, 100.0), + # Partition 2 (same table/shard/node/type but different partition) + self.create_test_snapshot("TURVO", "appointmentFormFieldData_events", 0, "data-hot-8", True, 54289, 100.0), + ] + + # Modify partition_ident for the snapshots to simulate different partitions + snapshot1[0].partition_ident = "04732dpl6osj8d1g60o30c1g" + snapshot1[1].partition_ident = "04732dpl6os3adpm60o30c1g" + + snapshot2 = [ + # Partition 1 progresses + self.create_test_snapshot("TURVO", "appointmentFormFieldData_events", 0, "data-hot-8", True, 32800, 130.0), + # +116 delta + # Partition 2 progresses + self.create_test_snapshot("TURVO", "appointmentFormFieldData_events", 0, "data-hot-8", True, 54400, 130.0), + # +111 delta + ] + + # Set partition_ident for second snapshot + snapshot2[0].partition_ident = "04732dpl6osj8d1g60o30c1g" + snapshot2[1].partition_ident = "04732dpl6os3adpm60o30c1g" + + activities = self.monitor.compare_snapshots(snapshot1, snapshot2, min_activity_threshold=1) + + # Should have 2 separate activities (partitions tracked separately) + assert len(activities) == 2 + + # Verify deltas are calculated correctly for each partition + partition1_activity = next(a for a in activities if "04732dpl6osj8d1g60o30c1g" in a.snapshot1.shard_identifier) + partition2_activity = next(a for a in activities if "04732dpl6os3adpm60o30c1g" in a.snapshot1.shard_identifier) + + assert partition1_activity.local_checkpoint_delta == 116 # 32800 - 32684 + assert partition2_activity.local_checkpoint_delta == 111 # 54400 - 54289 + + # Verify they have different shard identifiers due to partition + assert partition1_activity.snapshot1.shard_identifier != partition2_activity.snapshot1.shard_identifier + assert ":04732dpl6osj8d1g60o30c1g" in partition1_activity.snapshot1.shard_identifier + assert ":04732dpl6os3adpm60o30c1g" in partition2_activity.snapshot1.shard_identifier + + # This test prevents mixing partitions which would create fake activity measurements + + def test_format_activity_display_watch_mode(self): + """Test that watch mode excludes legend and insights""" + snapshot1 = self.create_test_snapshot("schema1", "table1", 1, "node1", True, 1000, 100.0) + snapshot2 = self.create_test_snapshot("schema1", "table1", 1, "node1", True, 1500, 130.0) + + activity = ActiveShardActivity( + schema_name="schema1", + table_name="table1", + shard_id=1, + node_name="node1", + is_primary=True, + partition_ident="", + local_checkpoint_delta=500, + snapshot1=snapshot1, + snapshot2=snapshot2, + time_diff_seconds=30.0, + ) + + # Test non-watch mode (should include legend and insights) + normal_display = self.monitor.format_activity_display([activity], show_count=10, watch_mode=False) + assert "Legend:" in normal_display + assert "Insights:" in normal_display + assert "Checkpoint ฮ”:" in normal_display + + # Test watch mode (should exclude legend and insights) + watch_display = self.monitor.format_activity_display([activity], show_count=10, watch_mode=True) + assert "Legend:" not in watch_display + assert "Insights:" not in watch_display + assert "Checkpoint ฮ”" in watch_display # Core data should still be present + + # But should still contain the core data + assert "Most Active Shards" in watch_display + assert "schema1.table1" in watch_display + assert "500" in watch_display # checkpoint delta diff --git a/tests/admin/test_distribution_analyzer.py b/tests/admin/test_distribution_analyzer.py new file mode 100644 index 00000000..000fd0f9 --- /dev/null +++ b/tests/admin/test_distribution_analyzer.py @@ -0,0 +1,294 @@ +""" +Tests for distribution analyzer functionality +""" + +from unittest.mock import Mock, patch + +from cratedb_toolkit.admin.xmover.analysis.table import DistributionAnalyzer, DistributionAnomaly, TableDistribution +from cratedb_toolkit.admin.xmover.util.database import CrateDBClient + + +class TestDistributionAnalyzer: + def setup_method(self): + """Set up test fixtures""" + self.mock_client = Mock(spec=CrateDBClient) + self.analyzer = DistributionAnalyzer(self.mock_client) + + def test_coefficient_of_variation_calculation(self): + """Test CV calculation with different scenarios""" + + # Normal case + values = [10, 12, 8, 14, 6] + cv = self.analyzer.calculate_coefficient_of_variation(values) + assert cv > 0 + + # All equal values (should return 0) + equal_values = [10, 10, 10, 10] + cv_equal = self.analyzer.calculate_coefficient_of_variation(equal_values) + assert cv_equal == 0.0 + + # Empty list + empty_values = [] + cv_empty = self.analyzer.calculate_coefficient_of_variation(empty_values) + assert cv_empty == 0.0 + + # Single value + single_value = [10] + cv_single = self.analyzer.calculate_coefficient_of_variation(single_value) + assert cv_single == 0.0 + + def test_get_largest_tables_distribution(self): + """Test fetching table distribution data""" + + # Mock query results + mock_results = [ + # schema, table, node, primary_shards, replica_shards, total_shards, total_size, primary_size, replica_size, docs # noqa: E501, ERA001 + ["doc", "large_table", "node1", 5, 2, 7, 100.5, 80.2, 20.3, 1000000], + ["doc", "large_table", "node2", 4, 3, 7, 95.1, 75.8, 19.3, 950000], + ["doc", "large_table", "node3", 6, 1, 7, 110.2, 85.9, 24.3, 1100000], + ["custom", "another_table", "node1", 3, 2, 5, 50.1, 40.2, 9.9, 500000], + ["custom", "another_table", "node2", 2, 3, 5, 45.8, 35.1, 10.7, 480000], + ] + + self.mock_client.execute_query.return_value = mock_results + + distributions = self.analyzer.get_largest_tables_distribution(top_n=10) + + # Verify query was called with correct parameters + self.mock_client.execute_query.assert_called_once() + call_args = self.mock_client.execute_query.call_args + assert call_args[0][1] == [10] # top_n parameter + + # Verify we got the expected number of tables + assert len(distributions) == 2 + + # Verify table data structure + large_table = next(d for d in distributions if d.table_name == "large_table") + assert large_table.schema_name == "doc" + assert large_table.full_table_name == "large_table" # Should omit 'doc' schema + assert len(large_table.node_distributions) == 3 + + another_table = next(d for d in distributions if d.table_name == "another_table") + assert another_table.schema_name == "custom" + assert another_table.full_table_name == "custom.another_table" + assert len(another_table.node_distributions) == 2 + + # Verify sorting by primary size (descending) + assert distributions[0].total_primary_size_gb >= distributions[1].total_primary_size_gb + + def test_detect_shard_count_imbalance(self): + """Test shard count imbalance detection""" + + # Create test table with imbalanced shard distribution + imbalanced_table = TableDistribution( + schema_name="doc", + table_name="imbalanced_table", + total_primary_size_gb=500.0, + node_distributions={ + "node1": {"total_shards": 10, "primary_shards": 5, "replica_shards": 5}, + "node2": {"total_shards": 15, "primary_shards": 8, "replica_shards": 7}, + "node3": {"total_shards": 5, "primary_shards": 2, "replica_shards": 3}, + }, + ) + + anomaly = self.analyzer.detect_shard_count_imbalance(imbalanced_table) + + assert anomaly is not None + assert anomaly.anomaly_type == "Shard Count Imbalance" + assert anomaly.combined_score > 0 + assert len(anomaly.recommendations) > 0 + + # Create balanced table (should not detect anomaly) + balanced_table = TableDistribution( + schema_name="doc", + table_name="balanced_table", + total_primary_size_gb=100.0, + node_distributions={ + "node1": {"total_shards": 8, "primary_shards": 4, "replica_shards": 4}, + "node2": {"total_shards": 8, "primary_shards": 4, "replica_shards": 4}, + "node3": {"total_shards": 8, "primary_shards": 4, "replica_shards": 4}, + }, + ) + + no_anomaly = self.analyzer.detect_shard_count_imbalance(balanced_table) + assert no_anomaly is None + + def test_detect_storage_imbalance(self): + """Test storage imbalance detection""" + + # Create test table with storage imbalance + storage_imbalanced_table = TableDistribution( + schema_name="doc", + table_name="storage_imbalanced", + total_primary_size_gb=300.0, + node_distributions={ + "node1": {"total_size_gb": 150.0, "primary_size_gb": 100.0, "replica_size_gb": 50.0}, + "node2": {"total_size_gb": 50.0, "primary_size_gb": 30.0, "replica_size_gb": 20.0}, + "node3": {"total_size_gb": 100.0, "primary_size_gb": 70.0, "replica_size_gb": 30.0}, + }, + ) + + anomaly = self.analyzer.detect_storage_imbalance(storage_imbalanced_table) + + assert anomaly is not None + assert anomaly.anomaly_type == "Storage Imbalance" + assert anomaly.combined_score > 0 + + # Small table (should be ignored) + small_table = TableDistribution( + schema_name="doc", + table_name="small_table", + total_primary_size_gb=0.1, + node_distributions={ + "node1": {"total_size_gb": 0.5, "primary_size_gb": 0.05, "replica_size_gb": 0.05}, + "node2": {"total_size_gb": 0.1, "primary_size_gb": 0.03, "replica_size_gb": 0.02}, + }, + ) + + no_anomaly = self.analyzer.detect_storage_imbalance(small_table) + assert no_anomaly is None + + def test_detect_node_coverage_issues(self): + """Test node coverage issue detection""" + + # Mock nodes_info to simulate cluster with 4 nodes + mock_nodes = [Mock(name="node1"), Mock(name="node2"), Mock(name="node3"), Mock(name="node4")] + self.mock_client.get_nodes_info.return_value = mock_nodes + + # Table with limited coverage (only on 2 out of 4 nodes) + limited_coverage_table = TableDistribution( + schema_name="doc", + table_name="limited_coverage", + total_primary_size_gb=100.0, # Significant size + node_distributions={ + "node1": {"total_shards": 10, "primary_shards": 5, "replica_shards": 5}, + "node2": {"total_shards": 10, "primary_shards": 5, "replica_shards": 5}, + # node3 and node4 missing + }, + ) + + anomaly = self.analyzer.detect_node_coverage_issues(limited_coverage_table) + + assert anomaly is not None + assert anomaly.anomaly_type == "Node Coverage Issue" + assert "node3" in anomaly.details["nodes_without_shards"] + assert "node4" in anomaly.details["nodes_without_shards"] + assert len(anomaly.recommendations) > 0 + + def test_detect_document_imbalance(self): + """Test document imbalance detection""" + + # Table with document imbalance + doc_imbalanced_table = TableDistribution( + schema_name="doc", + table_name="doc_imbalanced", + total_primary_size_gb=200.0, + node_distributions={ + "node1": {"total_documents": 1000000}, # 1M docs + "node2": {"total_documents": 500000}, # 500K docs + "node3": {"total_documents": 100000}, # 100K docs (5x imbalance) + }, + ) + + anomaly = self.analyzer.detect_document_imbalance(doc_imbalanced_table) + + assert anomaly is not None + assert anomaly.anomaly_type == "Document Imbalance" + assert "data skew" in anomaly.recommendations[0].lower() + + # Table with very few documents (should be ignored) + low_doc_table = TableDistribution( + schema_name="doc", + table_name="low_docs", + total_primary_size_gb=100.0, + node_distributions={ + "node1": {"total_documents": 1000}, + "node2": {"total_documents": 500}, + }, + ) + + no_anomaly = self.analyzer.detect_document_imbalance(low_doc_table) + assert no_anomaly is None + + def test_analyze_distribution_integration(self): + """Test the full analysis workflow""" + + # Mock the get_largest_tables_distribution method + mock_table = TableDistribution( + schema_name="doc", + table_name="test_table", + total_primary_size_gb=500.0, + node_distributions={ + "node1": { + "total_shards": 15, + "primary_shards": 8, + "replica_shards": 7, + "total_size_gb": 200.0, + "primary_size_gb": 120.0, + "replica_size_gb": 80.0, + "total_documents": 2000000, + }, + "node2": { + "total_shards": 8, + "primary_shards": 4, + "replica_shards": 4, + "total_size_gb": 100.0, + "primary_size_gb": 60.0, + "replica_size_gb": 40.0, + "total_documents": 1000000, + }, + "node3": { + "total_shards": 5, + "primary_shards": 3, + "replica_shards": 2, + "total_size_gb": 50.0, + "primary_size_gb": 30.0, + "replica_size_gb": 20.0, + "total_documents": 500000, + }, + }, + ) + + with patch.object(self.analyzer, "get_largest_tables_distribution", return_value=[mock_table]): + anomalies, tables_analyzed = self.analyzer.analyze_distribution(top_tables=10) + + # Should detect multiple types of anomalies + assert len(anomalies) > 0 + assert tables_analyzed == 1 # We provided 1 mock table + + # Anomalies should be sorted by combined score (descending) + if len(anomalies) > 1: + for i in range(len(anomalies) - 1): + assert anomalies[i].combined_score >= anomalies[i + 1].combined_score + + # Each anomaly should have required fields + for anomaly in anomalies: + assert anomaly.table is not None + assert anomaly.anomaly_type is not None + assert anomaly.combined_score >= 0 + assert isinstance(anomaly.recommendations, list) + + def test_format_distribution_report_no_anomalies(self): + """Test report formatting when no anomalies found""" + + # This should not raise an exception + with patch("builtins.print"): # Mock print to avoid console output during tests + self.analyzer.format_distribution_report([], 5) + + def test_format_distribution_report_with_anomalies(self): + """Test report formatting with anomalies""" + + mock_anomaly = DistributionAnomaly( + table=TableDistribution("doc", "test_table", 100.0, {}), + anomaly_type="Test Anomaly", + severity_score=7.5, + impact_score=8.0, + combined_score=60.0, + description="Test description", + details={}, + recommendations=["Test recommendation"], + ) + + # This should not raise an exception + with patch("builtins.print"): # Mock print to avoid console output during tests + self.analyzer.format_distribution_report([mock_anomaly], 3) diff --git a/tests/admin/test_recovery_monitor.py b/tests/admin/test_recovery_monitor.py new file mode 100644 index 00000000..bb6dec8a --- /dev/null +++ b/tests/admin/test_recovery_monitor.py @@ -0,0 +1,292 @@ +""" +Test script for XMover recovery monitoring functionality + +This script tests the recovery monitoring features by creating mock recovery scenarios +and verifying the output formatting and data parsing. +""" + +import sys +from typing import Any, Dict +from unittest.mock import Mock + +from cratedb_toolkit.admin.xmover.model import RecoveryInfo +from cratedb_toolkit.admin.xmover.operational.monitor import RecoveryMonitor +from cratedb_toolkit.admin.xmover.util.database import CrateDBClient + + +def create_mock_allocation( + schema_name: str, table_name: str, shard_id: int, current_state: str, node_id: str +) -> Dict[str, Any]: + """Create a mock allocation response""" + return { + "schema_name": schema_name, + "table_name": table_name, + "shard_id": shard_id, + "current_state": current_state, + "node_id": node_id, + "explanation": None, + } + + +def create_mock_shard_detail( + schema_name: str, + table_name: str, + shard_id: int, + node_name: str, + node_id: str, + recovery_type: str, + stage: str, + files_percent: float, + bytes_percent: float, + total_time: int, + size: int, + is_primary: bool, +) -> Dict[str, Any]: + """Create a mock shard detail response""" + return { + "schema_name": schema_name, + "table_name": table_name, + "shard_id": shard_id, + "node_name": node_name, + "node_id": node_id, + "routing_state": "RELOCATING", + "state": "RECOVERING", + "recovery": { + "type": recovery_type, + "stage": stage, + "files": {"percent": files_percent}, + "size": {"percent": bytes_percent}, + "total_time": total_time, + }, + "size": size, + "primary": is_primary, + } + + +def test_recovery_info_parsing(): + """Test RecoveryInfo dataclass and its properties""" + print("Testing RecoveryInfo parsing...") + + recovery = RecoveryInfo( + schema_name="CURVO", + table_name="PartioffD", + shard_id=19, + node_name="data-hot-1", + node_id="ZH6fBanGSjanGqeSh-sw0A", + recovery_type="PEER", + stage="DONE", + files_percent=100.0, + bytes_percent=100.0, + total_time_ms=1555907, + routing_state="RELOCATING", + current_state="RELOCATING", + is_primary=False, + size_bytes=56565284209, + ) + + # Test properties + assert recovery.overall_progress == 100.0, f"Expected 100.0, got {recovery.overall_progress}" + assert abs(recovery.size_gb - 52.681) < 0.01, f"Expected ~52.681, got {recovery.size_gb:.3f}" + assert recovery.shard_type == "REPLICA", f"Expected REPLICA, got {recovery.shard_type}" + assert recovery.total_time_seconds == 1555.907, f"Expected 1555.907, got {recovery.total_time_seconds}" + + print("โœ… RecoveryInfo parsing tests passed") + + +def test_database_client_parsing(): + """Test database client recovery parsing logic""" + print("Testing database client recovery parsing...") + + # Create a real client instance to test the parsing method + client = CrateDBClient.__new__(CrateDBClient) # Create without calling __init__ + + # Create test data + allocation = create_mock_allocation("CURVO", "PartioffD", 19, "RELOCATING", "node1") + shard_detail = create_mock_shard_detail( + "CURVO", "PartioffD", 19, "data-hot-1", "node1", "PEER", "DONE", 100.0, 100.0, 1555907, 56565284209, False + ) + + # Test the parsing method directly + recovery_info = client._parse_recovery_info(allocation, shard_detail) + + assert recovery_info.recovery_type == "PEER" + assert recovery_info.stage == "DONE" + assert recovery_info.overall_progress == 100.0 + + print("โœ… Database client parsing tests passed") + + +def test_recovery_monitor_formatting(): + """Test recovery monitor display formatting""" + print("Testing recovery monitor formatting...") + + # Create mock client + mock_client = Mock(spec=CrateDBClient) + monitor = RecoveryMonitor(mock_client) + + # Create test recovery data + recoveries = [ + RecoveryInfo( + schema_name="CURVO", + table_name="PartioffD", + shard_id=19, + node_name="data-hot-1", + node_id="node1", + recovery_type="PEER", + stage="DONE", + files_percent=100.0, + bytes_percent=100.0, + total_time_ms=1555907, + routing_state="RELOCATING", + current_state="RELOCATING", + is_primary=False, + size_bytes=56565284209, + ), + RecoveryInfo( + schema_name="CURVO", + table_name="orderTracking", + shard_id=7, + node_name="data-hot-2", + node_id="node2", + recovery_type="DISK", + stage="INDEX", + files_percent=75.5, + bytes_percent=67.8, + total_time_ms=890234, + routing_state="INITIALIZING", + current_state="INITIALIZING", + is_primary=True, + size_bytes=25120456789, + ), + ] + + # Test summary generation + summary = monitor.get_recovery_summary(recoveries) + + assert summary["total_recoveries"] == 2 + assert "PEER" in summary["by_type"] + assert "DISK" in summary["by_type"] + assert summary["by_type"]["PEER"]["count"] == 1 + assert summary["by_type"]["DISK"]["count"] == 1 + + # Test display formatting + display_output = monitor.format_recovery_display(recoveries) + + assert "Active Shard Recoveries (2 total)" in display_output + assert "PEER Recoveries (1)" in display_output + assert "DISK Recoveries (1)" in display_output + assert "PartioffD" in display_output + assert "orderTracking" in display_output + + print("โœ… Recovery monitor formatting tests passed") + + +def test_empty_recovery_handling(): + """Test handling of no active recoveries""" + print("Testing empty recovery handling...") + + mock_client = Mock(spec=CrateDBClient) + monitor = RecoveryMonitor(mock_client) + + # Test empty list + empty_recoveries = [] + + summary = monitor.get_recovery_summary(empty_recoveries) + assert summary["total_recoveries"] == 0 + assert summary["by_type"] == {} + + display_output = monitor.format_recovery_display(empty_recoveries) + assert "No active shard recoveries found" in display_output + + print("โœ… Empty recovery handling tests passed") + + +def test_recovery_type_filtering(): + """Test filtering by recovery type""" + print("Testing recovery type filtering...") + + mock_client = Mock(spec=CrateDBClient) + + # Mock the get_all_recovering_shards method + mock_recoveries = [ + RecoveryInfo( + schema_name="test", + table_name="table1", + shard_id=1, + node_name="node1", + node_id="n1", + recovery_type="PEER", + stage="DONE", + files_percent=100.0, + bytes_percent=100.0, + total_time_ms=1000, + routing_state="RELOCATING", + current_state="RELOCATING", + is_primary=True, + size_bytes=1000000, + ), + RecoveryInfo( + schema_name="test", + table_name="table2", + shard_id=2, + node_name="node2", + node_id="n2", + recovery_type="DISK", + stage="INDEX", + files_percent=50.0, + bytes_percent=45.0, + total_time_ms=2000, + routing_state="INITIALIZING", + current_state="INITIALIZING", + is_primary=False, + size_bytes=2000000, + ), + ] + + mock_client.get_all_recovering_shards.return_value = mock_recoveries + + monitor = RecoveryMonitor(mock_client) + + # Test filtering + peer_only = monitor.get_cluster_recovery_status(recovery_type_filter="PEER") + assert len(peer_only) == 1 + assert peer_only[0].recovery_type == "PEER" + + disk_only = monitor.get_cluster_recovery_status(recovery_type_filter="DISK") + assert len(disk_only) == 1 + assert disk_only[0].recovery_type == "DISK" + + all_recoveries = monitor.get_cluster_recovery_status(recovery_type_filter="all") + assert len(all_recoveries) == 2 + + print("โœ… Recovery type filtering tests passed") + + +def main(): + """Run all tests""" + print("๐Ÿงช Running XMover Recovery Monitor Tests") + print("=" * 50) + + try: + test_recovery_info_parsing() + test_database_client_parsing() + test_recovery_monitor_formatting() + test_empty_recovery_handling() + test_recovery_type_filtering() + + print("\n๐ŸŽ‰ All tests passed successfully!") + print("\n๐Ÿ“‹ Test Summary:") + print(" โœ… RecoveryInfo data class and properties") + print(" โœ… Database client parsing logic") + print(" โœ… Recovery monitor display formatting") + print(" โœ… Empty recovery state handling") + print(" โœ… Recovery type filtering") + + print("\n๐Ÿš€ Recovery monitoring feature is ready for use!") + + except Exception as e: + print(f"\nโŒ Test failed: {e}") + import traceback + + traceback.print_exc() + sys.exit(1) From 72042a482141c44a86ac219021b47df7fed1994c Mon Sep 17 00:00:00 2001 From: Andreas Motl Date: Thu, 11 Sep 2025 01:46:23 +0200 Subject: [PATCH 04/13] Admin/XMover: Fix software tests --- .../admin/xmover/analysis/table.py | 2 +- .../admin/xmover/operational/monitor.py | 4 +- tests/admin/test_distribution_analyzer.py | 62 ++++++++++++++++--- tests/admin/test_recovery_monitor.py | 24 ++++--- 4 files changed, 71 insertions(+), 21 deletions(-) diff --git a/cratedb_toolkit/admin/xmover/analysis/table.py b/cratedb_toolkit/admin/xmover/analysis/table.py index ef6dbdf3..08a4fa69 100644 --- a/cratedb_toolkit/admin/xmover/analysis/table.py +++ b/cratedb_toolkit/admin/xmover/analysis/table.py @@ -576,7 +576,7 @@ def detect_node_coverage_issues(self, table: TableDistribution) -> Optional[Dist nodes_with_shards = set(table.node_distributions.keys()) nodes_without_shards = all_nodes - nodes_with_shards - # Only flag as anomaly if we have missing nodes and the table is significant + # Only flag as an anomaly if we have missing nodes and the table is significant if not nodes_without_shards or table.total_primary_size_gb < 10.0: return None diff --git a/cratedb_toolkit/admin/xmover/operational/monitor.py b/cratedb_toolkit/admin/xmover/operational/monitor.py index d88a295f..319b6a2e 100644 --- a/cratedb_toolkit/admin/xmover/operational/monitor.py +++ b/cratedb_toolkit/admin/xmover/operational/monitor.py @@ -24,9 +24,9 @@ class RecoveryOptions: class RecoveryMonitor: """Monitor shard recovery operations""" - def __init__(self, client: CrateDBClient, options: RecoveryOptions): + def __init__(self, client: CrateDBClient, options: Optional[RecoveryOptions] = None): self.client = client - self.options = options + self.options = options or RecoveryOptions() def get_cluster_recovery_status(self) -> List[RecoveryInfo]: """Get comprehensive recovery status with minimal cluster impact""" diff --git a/tests/admin/test_distribution_analyzer.py b/tests/admin/test_distribution_analyzer.py index 000fd0f9..92b4f580 100644 --- a/tests/admin/test_distribution_analyzer.py +++ b/tests/admin/test_distribution_analyzer.py @@ -5,6 +5,7 @@ from unittest.mock import Mock, patch from cratedb_toolkit.admin.xmover.analysis.table import DistributionAnalyzer, DistributionAnomaly, TableDistribution +from cratedb_toolkit.admin.xmover.model import NodeInfo from cratedb_toolkit.admin.xmover.util.database import CrateDBClient @@ -41,14 +42,16 @@ def test_get_largest_tables_distribution(self): """Test fetching table distribution data""" # Mock query results - mock_results = [ - # schema, table, node, primary_shards, replica_shards, total_shards, total_size, primary_size, replica_size, docs # noqa: E501, ERA001 - ["doc", "large_table", "node1", 5, 2, 7, 100.5, 80.2, 20.3, 1000000], - ["doc", "large_table", "node2", 4, 3, 7, 95.1, 75.8, 19.3, 950000], - ["doc", "large_table", "node3", 6, 1, 7, 110.2, 85.9, 24.3, 1100000], - ["custom", "another_table", "node1", 3, 2, 5, 50.1, 40.2, 9.9, 500000], - ["custom", "another_table", "node2", 2, 3, 5, 45.8, 35.1, 10.7, 480000], - ] + mock_results = { + "rows": [ + # schema, table, node, primary_shards, replica_shards, total_shards, total_size, primary_size, replica_size, docs # noqa: E501, ERA001 + ["doc", "large_table", "node1", 5, 2, 7, 100.5, 80.2, 20.3, 1000000], + ["doc", "large_table", "node2", 4, 3, 7, 95.1, 75.8, 19.3, 950000], + ["doc", "large_table", "node3", 6, 1, 7, 110.2, 85.9, 24.3, 1100000], + ["custom", "another_table", "node1", 3, 2, 5, 50.1, 40.2, 9.9, 500000], + ["custom", "another_table", "node2", 2, 3, 5, 45.8, 35.1, 10.7, 480000], + ] + } self.mock_client.execute_query.return_value = mock_results @@ -152,7 +155,48 @@ def test_detect_node_coverage_issues(self): """Test node coverage issue detection""" # Mock nodes_info to simulate cluster with 4 nodes - mock_nodes = [Mock(name="node1"), Mock(name="node2"), Mock(name="node3"), Mock(name="node4")] + mock_nodes = [ + NodeInfo( + id="node1", + name="node1", + zone=None, + heap_used=None, + heap_max=None, + fs_total=None, + fs_used=None, + fs_available=None, + ), + NodeInfo( + id="node2", + name="node2", + zone=None, + heap_used=None, + heap_max=None, + fs_total=None, + fs_used=None, + fs_available=None, + ), + NodeInfo( + id="node3", + name="node3", + zone=None, + heap_used=None, + heap_max=None, + fs_total=None, + fs_used=None, + fs_available=None, + ), + NodeInfo( + id="node4", + name="node4", + zone=None, + heap_used=None, + heap_max=None, + fs_total=None, + fs_used=None, + fs_available=None, + ), + ] self.mock_client.get_nodes_info.return_value = mock_nodes # Table with limited coverage (only on 2 out of 4 nodes) diff --git a/tests/admin/test_recovery_monitor.py b/tests/admin/test_recovery_monitor.py index bb6dec8a..09a482bc 100644 --- a/tests/admin/test_recovery_monitor.py +++ b/tests/admin/test_recovery_monitor.py @@ -10,8 +10,9 @@ from unittest.mock import Mock from cratedb_toolkit.admin.xmover.model import RecoveryInfo -from cratedb_toolkit.admin.xmover.operational.monitor import RecoveryMonitor +from cratedb_toolkit.admin.xmover.operational.monitor import RecoveryMonitor, RecoveryOptions from cratedb_toolkit.admin.xmover.util.database import CrateDBClient +from cratedb_toolkit.model import DatabaseAddress def create_mock_allocation( @@ -93,12 +94,16 @@ def test_recovery_info_parsing(): print("โœ… RecoveryInfo parsing tests passed") -def test_database_client_parsing(): +def test_database_client_parsing(cratedb): """Test database client recovery parsing logic""" print("Testing database client recovery parsing...") # Create a real client instance to test the parsing method client = CrateDBClient.__new__(CrateDBClient) # Create without calling __init__ + client.username = None + client.password = None + client.connection_string = DatabaseAddress.from_string(cratedb.database.dburi).httpuri + client.ssl_verify = False # Create test data allocation = create_mock_allocation("CURVO", "PartioffD", 19, "RELOCATING", "node1") @@ -111,7 +116,7 @@ def test_database_client_parsing(): assert recovery_info.recovery_type == "PEER" assert recovery_info.stage == "DONE" - assert recovery_info.overall_progress == 100.0 + assert recovery_info.overall_progress == 0.0 print("โœ… Database client parsing tests passed") @@ -245,19 +250,20 @@ def test_recovery_type_filtering(): mock_client.get_all_recovering_shards.return_value = mock_recoveries - monitor = RecoveryMonitor(mock_client) - # Test filtering - peer_only = monitor.get_cluster_recovery_status(recovery_type_filter="PEER") + monitor = RecoveryMonitor(mock_client, options=RecoveryOptions(recovery_type="PEER")) + peer_only = monitor.get_cluster_recovery_status() assert len(peer_only) == 1 assert peer_only[0].recovery_type == "PEER" - disk_only = monitor.get_cluster_recovery_status(recovery_type_filter="DISK") + monitor = RecoveryMonitor(mock_client, options=RecoveryOptions(recovery_type="DISK")) + disk_only = monitor.get_cluster_recovery_status() assert len(disk_only) == 1 assert disk_only[0].recovery_type == "DISK" - all_recoveries = monitor.get_cluster_recovery_status(recovery_type_filter="all") - assert len(all_recoveries) == 2 + monitor = RecoveryMonitor(mock_client, options=RecoveryOptions(recovery_type="all")) + all_recoveries = monitor.get_cluster_recovery_status() + assert len(all_recoveries) == 0 print("โœ… Recovery type filtering tests passed") From 4b21c79f079f0264f18a24a555ce59802ad0ebad Mon Sep 17 00:00:00 2001 From: Andreas Motl Date: Thu, 18 Sep 2025 20:10:14 +0200 Subject: [PATCH 05/13] Admin/XMover: Implement suggestions by CodeRabbit --- .../admin/xmover/analysis/shard.py | 28 +++++++++---------- .../admin/xmover/analysis/table.py | 15 +++++----- cratedb_toolkit/admin/xmover/analysis/zone.py | 3 +- cratedb_toolkit/admin/xmover/cli.py | 12 ++++---- cratedb_toolkit/admin/xmover/model.py | 11 ++++++-- .../admin/xmover/operational/candidates.py | 8 ++++-- .../admin/xmover/operational/monitor.py | 4 +-- .../admin/xmover/operational/recommend.py | 20 +++++++------ cratedb_toolkit/admin/xmover/util/database.py | 17 ++++++++--- cratedb_toolkit/admin/xmover/util/error.py | 21 ++++++++++---- cratedb_toolkit/admin/xmover/util/format.py | 20 ++++++------- doc/admin/xmover/handbook.md | 5 ++-- doc/admin/xmover/index.md | 2 +- doc/admin/xmover/queries.md | 4 +-- tests/admin/test_recovery_monitor.py | 2 +- 15 files changed, 100 insertions(+), 72 deletions(-) diff --git a/cratedb_toolkit/admin/xmover/analysis/shard.py b/cratedb_toolkit/admin/xmover/analysis/shard.py index a1869019..96bf6351 100644 --- a/cratedb_toolkit/admin/xmover/analysis/shard.py +++ b/cratedb_toolkit/admin/xmover/analysis/shard.py @@ -38,7 +38,7 @@ def __init__(self, client: CrateDBClient): self.shards: List[ShardInfo] = [] # Initialize session-based caches for performance. - self._zone_conflict_cache: Dict[Tuple[str, int, str], Union[str, None]] = {} + self._zone_conflict_cache: Dict[Tuple[str, str, int, str], Union[str, None]] = {} self._node_lookup_cache: Dict[str, Union[NodeInfo, None]] = {} self._target_nodes_cache: Dict[Tuple[float, frozenset[Any], float, float], List[NodeInfo]] = {} self._cache_hits = 0 @@ -183,8 +183,6 @@ def find_nodes_with_capacity( free_space_gb = node.available_space_gb if free_space_gb >= (required_space_gb + min_free_space_gb): available_nodes.append(node) - else: - continue # Sort by available space (most space first) - prioritize nodes with more free space available_nodes.sort(key=lambda n: n.available_space_gb, reverse=True) @@ -206,7 +204,7 @@ def generate_rebalancing_recommendations( # Get moveable shards (only healthy ones for actual operations) moveable_shards = self.find_moveable_shards(constraints.min_size, constraints.max_size, constraints.table_name) - print( + logger.info( f"Analyzing {len(moveable_shards)} candidate shards " f"in size range {constraints.min_size}-{constraints.max_size}GB..." ) @@ -239,12 +237,11 @@ def generate_rebalancing_recommendations( # Optimize processing: if filtering by source node, only process those shards if constraints.source_node: processing_shards = [s for s in moveable_shards if s.node_name == constraints.source_node] - print(f"Focusing on {len(processing_shards)} shards from node {constraints.source_node}") + logger.info(f"Focusing on {len(processing_shards)} shards from node {constraints.source_node}") else: processing_shards = moveable_shards # Generate move recommendations - safe_recommendations = 0 # noqa: F841 total_evaluated = 0 for i, shard in enumerate(processing_shards): @@ -368,12 +365,12 @@ def generate_rebalancing_recommendations( if len(processing_shards) > 100: print() # New line after progress dots - print(f"Generated {len(recommendations)} move recommendations (evaluated {total_evaluated} shards)") - print(f"Performance: {self.get_cache_stats()}") + logger.info(f"Generated {len(recommendations)} move recommendations (evaluated {total_evaluated} shards)") + logger.info(f"Performance: {self.get_cache_stats()}") return recommendations def validate_move_safety( - self, recommendation: ShardRelocationResponse, max_disk_usage_percent: float = 90.0 + self, recommendation: ShardRelocationResponse, max_disk_usage_percent: float = 90.0, buffer_gb: float = 50.0 ) -> Tuple[bool, str]: """Validate that a move recommendation is safe to execute""" # Find target node (with caching) @@ -388,7 +385,7 @@ def validate_move_safety( return False, zone_conflict # Check available space - required_space_gb = recommendation.size_gb + 50 # 50GB buffer + required_space_gb = recommendation.size_gb + buffer_gb if target_node.available_space_gb < required_space_gb: return ( False, @@ -423,7 +420,7 @@ def _check_zone_conflict_cached(self, recommendation: ShardRelocationResponse) - """Check zone conflicts with caching""" # Create cache key: table, shard, target zone target_zone = self._get_node_zone(recommendation.to_node) - cache_key = (recommendation.table_name, recommendation.shard_id, target_zone) + cache_key = (recommendation.schema_name, recommendation.table_name, recommendation.shard_id, target_zone) if cache_key in self._zone_conflict_cache: self._cache_hits += 1 @@ -813,11 +810,14 @@ def plan_node_decommission(self, node_name: str, min_free_space_gb: float = 100. # Determine feasibility feasible = len(infeasible_moves) == 0 + # Safety margin for cluster capacity after decommission + capacity_safety_margin = 1.2 # 20 % buffer + # Add capacity warnings if feasible: - # Check if remaining cluster capacity is sufficient after decommission + # Check if the remaining cluster capacity is sufficient after decommission remaining_capacity = sum(n.available_space_gb for n in self.nodes if n.name != node_name) - if remaining_capacity < total_size_gb * 1.2: # 20% safety margin + if remaining_capacity < total_size_gb * capacity_safety_margin: warnings.append( f"Low remaining capacity after decommission. " f"Only {remaining_capacity:.1f}GB available for {total_size_gb:.1f}GB of data" @@ -833,7 +833,7 @@ def plan_node_decommission(self, node_name: str, min_free_space_gb: float = 100. "recommendations": move_plan, "infeasible_moves": infeasible_moves, "warnings": warnings, - "estimated_time_hours": len(move_plan) * 0.1, # Rough estimate: 6 minutes per move + "estimated_time_hours": len(move_plan) * 0.1, # Rough estimate: 0.1 hours (6 minutes) per move "message": "Decommission plan generated" if feasible else "Decommission not currently feasible", } diff --git a/cratedb_toolkit/admin/xmover/analysis/table.py b/cratedb_toolkit/admin/xmover/analysis/table.py index 08a4fa69..b3322cd9 100644 --- a/cratedb_toolkit/admin/xmover/analysis/table.py +++ b/cratedb_toolkit/admin/xmover/analysis/table.py @@ -14,6 +14,7 @@ from rich.console import Console from rich.table import Table +from cratedb_toolkit.admin.xmover.model import NodeInfo from cratedb_toolkit.admin.xmover.util.database import CrateDBClient logger = logging.getLogger(__name__) @@ -97,6 +98,9 @@ def find_table_by_name(self, table_name: str) -> Optional[str]: try: choice = input("\nSelect table (enter number): ").strip() + if not choice: + rprint("[yellow]No selection made[/yellow]") + return None idx = int(choice) - 1 if 0 <= idx < len(rows): schema, table = rows[idx] @@ -292,14 +296,9 @@ def format_table_health_report(self, table_dist: TableDistribution) -> None: zone_distribution = {} for node_name, node_data in table_dist.node_distributions.items(): # Try to get zone info for each node - node_info = next((n for n in all_nodes_info if n.name == node_name), None) - if ( - node_info - and hasattr(node_info, "attributes") - and node_info.attributes - and "zone" in node_info.attributes - ): - zone = node_info.attributes["zone"] + node_info: Optional[NodeInfo] = next((n for n in all_nodes_info if n.name == node_name), None) + if node_info and node_info.zone: + zone = node_info.zone if zone not in zone_distribution: zone_distribution[zone] = {"nodes": 0, "shards": 0, "size": 0} zone_distribution[zone]["nodes"] += 1 diff --git a/cratedb_toolkit/admin/xmover/analysis/zone.py b/cratedb_toolkit/admin/xmover/analysis/zone.py index 718d88f0..07e67803 100644 --- a/cratedb_toolkit/admin/xmover/analysis/zone.py +++ b/cratedb_toolkit/admin/xmover/analysis/zone.py @@ -135,7 +135,8 @@ def distribution_conflicts(self, shard_details: bool = False, table: Optional[st health_indicator = "โœ“" if shard_copy.routing_state == "STARTED" else "โš " console.print( f" {health_indicator} {shard_copy.shard_type} " - f"on {shard_copy.node_name} ({shard_copy.zone}) - {shard_copy.routing_state}" + f"on {shard_copy.node_name} ({shard_copy.zone}) - " + f"{shard_copy.state}/{shard_copy.routing_state}" ) console.print(analysis_table) diff --git a/cratedb_toolkit/admin/xmover/cli.py b/cratedb_toolkit/admin/xmover/cli.py index 010f9aeb..e979986e 100644 --- a/cratedb_toolkit/admin/xmover/cli.py +++ b/cratedb_toolkit/admin/xmover/cli.py @@ -4,7 +4,6 @@ Command Line Interface. """ -import sys import time from typing import Optional @@ -46,11 +45,11 @@ def main(ctx): if not client.test_connection(): console.print("[red]Error: Could not connect to CrateDB[/red]") console.print("Please check your CRATE_CONNECTION_STRING in .env file") - sys.exit(1) + raise click.Abort() ctx.obj["client"] = client except Exception as e: console.print(f"[red]Error connecting to CrateDB: {e}[/red]") - sys.exit(1) + raise click.Abort() from e @main.command() @@ -170,11 +169,11 @@ def test_connection(ctx, connection_string: Optional[str]): console.print(f" โ€ข {node.name} (zone: {node.zone})") else: console.print("[red]โœ— Connection failed[/red]") - sys.exit(1) + raise click.Abort() except Exception as e: console.print(f"[red]โœ— Connection error: {e}[/red]") - sys.exit(1) + raise click.Abort() from e @main.command() @@ -525,13 +524,14 @@ def monitor_recovery( xmover monitor-recovery --watch # Continuous monitoring xmover monitor-recovery --recovery-type PEER # Only PEER recoveries """ + effective_recovery_type = None if recovery_type == "all" else recovery_type recovery_monitor = RecoveryMonitor( client=ctx.obj["client"], options=RecoveryOptions( table=table, node=node, refresh_interval=refresh_interval, - recovery_type=recovery_type, + recovery_type=effective_recovery_type, include_transitioning=include_transitioning, ), ) diff --git a/cratedb_toolkit/admin/xmover/model.py b/cratedb_toolkit/admin/xmover/model.py index d8511b31..7f962c3c 100644 --- a/cratedb_toolkit/admin/xmover/model.py +++ b/cratedb_toolkit/admin/xmover/model.py @@ -1,4 +1,3 @@ -import dataclasses from dataclasses import dataclass from typing import Dict, Optional @@ -149,6 +148,12 @@ def safety_score(self) -> float: if "rebalancing" in self.reason.lower(): score += 0.2 + # Consider shard size - smaller shards are safer to move + if self.size_gb < 10: + score += 0.1 + elif self.size_gb > 100: + score -= 0.2 + # Ensure score stays in valid range return max(0.0, min(1.0, score)) @@ -165,7 +170,7 @@ class DistributionStats: node_balance_score: float # 0-100, higher is better -@dataclasses.dataclass +@dataclass class SizeCriteria: min_size: float = 40.0 max_size: float = 60.0 @@ -173,7 +178,7 @@ class SizeCriteria: source_node: Optional[str] = None -@dataclasses.dataclass +@dataclass class ShardRelocationConstraints: min_size: float = SizeCriteria().min_size max_size: float = SizeCriteria().max_size diff --git a/cratedb_toolkit/admin/xmover/operational/candidates.py b/cratedb_toolkit/admin/xmover/operational/candidates.py index dd7d4930..9841624b 100644 --- a/cratedb_toolkit/admin/xmover/operational/candidates.py +++ b/cratedb_toolkit/admin/xmover/operational/candidates.py @@ -14,7 +14,7 @@ class CandidateFinder: def __init__(self, analyzer: ShardAnalyzer): self.analyzer = analyzer - def movement_candidates(self, criteria: SizeCriteria, limit: int): + def movement_candidates(self, criteria: SizeCriteria, limit: int) -> int: """ Find shard candidates for movement based on size criteria @@ -23,7 +23,7 @@ def movement_candidates(self, criteria: SizeCriteria, limit: int): """ console.print( - Panel.fit(f"[bold blue]Finding Moveable Shards ({criteria.min_size}-{criteria.max_size}GB)[/bold blue]") + Panel.fit(f"[bold blue]Finding Movable Shards ({criteria.min_size}-{criteria.max_size}GB)[/bold blue]") ) if criteria.source_node: @@ -45,7 +45,7 @@ def movement_candidates(self, criteria: SizeCriteria, limit: int): console.print("[dim]Tip: Try different size ranges or remove --node filter to see all candidates[/dim]") else: console.print("[yellow]No moveable shards found in the specified size range.[/yellow]") - return + return 0 # Show limited results shown_candidates = candidates[:limit] @@ -82,3 +82,5 @@ def movement_candidates(self, criteria: SizeCriteria, limit: int): if len(candidates) > limit: console.print(f"\n[dim]... and {len(candidates) - limit} more candidates[/dim]") + + return len(candidates) diff --git a/cratedb_toolkit/admin/xmover/operational/monitor.py b/cratedb_toolkit/admin/xmover/operational/monitor.py index 319b6a2e..206bacbf 100644 --- a/cratedb_toolkit/admin/xmover/operational/monitor.py +++ b/cratedb_toolkit/admin/xmover/operational/monitor.py @@ -37,7 +37,7 @@ def get_cluster_recovery_status(self) -> List[RecoveryInfo]: ) # Apply recovery type filter - if self.options.recovery_type is not None: + if self.options.recovery_type is not None and self.options.recovery_type.lower() != "all": recoveries = [r for r in recoveries if r.recovery_type.upper() == self.options.recovery_type.upper()] return recoveries @@ -178,7 +178,6 @@ def start(self, watch: bool, debug: bool = False): # Track previous state for change detection previous_recoveries: Dict[str, Dict[str, Any]] = {} - previous_timestamp = None first_run = True while True: @@ -307,7 +306,6 @@ def start(self, watch: bool, debug: bool = False): elif active_count > 0: console.print(f"{current_time} | {status} (no changes)") - previous_timestamp = current_time # noqa: F841 first_run = False time.sleep(self.options.refresh_interval) diff --git a/cratedb_toolkit/admin/xmover/operational/recommend.py b/cratedb_toolkit/admin/xmover/operational/recommend.py index ab5156e6..f7f9e3ea 100644 --- a/cratedb_toolkit/admin/xmover/operational/recommend.py +++ b/cratedb_toolkit/admin/xmover/operational/recommend.py @@ -123,8 +123,8 @@ def validate(self, request: ShardRelocationRequest): console.print() console.print("[dim]# Monitor shard health after execution[/dim]") console.print( - "[dim]# Check with: SELECT * FROM sys.shards " - "WHERE table_name = '{table_name}' AND id = {shard_id};[/dim]" + "[dim]# Check with: SELECT * FROM sys.shards " # noqa: S608 + f"WHERE table_name = '{table_name}' AND id = {request.shard_id};[/dim]" ) else: console.print("[red]โœ— VALIDATION FAILED - Move not safe[/red]") @@ -323,7 +323,7 @@ def execute( rec, max_disk_usage_percent=constraints.max_disk_usage ) if not is_safe: - if "Zone conflict" in safety_msg: + if "zone conflict" in safety_msg.lower(): zone_conflicts += 1 console.print(f"-- Move {i}: SKIPPED - {safety_msg}") console.print( @@ -340,7 +340,7 @@ def execute( # Auto-execution if requested if auto_execute: - self._execute_recommendations_safely(recommendations, validate) + self._execute_recommendations_safely(constraints, recommendations, validate) if validate and safe_moves < len(recommendations): if zone_conflicts > 0: @@ -352,14 +352,16 @@ def execute( f"[yellow]Warning: Only {safe_moves} of {len(recommendations)} moves passed safety validation[/yellow]" ) - def _execute_recommendations_safely(self, recommendations, validate: bool): + def _execute_recommendations_safely(self, constraints, recommendations, validate: bool): """Execute recommendations with extensive safety measures""" # Filter to only safe recommendations safe_recommendations = [] if validate: for rec in recommendations: - is_safe, safety_msg = self.analyzer.validate_move_safety(rec, max_disk_usage_percent=95.0) + is_safe, safety_msg = self.analyzer.validate_move_safety( + rec, max_disk_usage_percent=constraints.max_disk_usage + ) if is_safe: safe_recommendations.append(rec) else: @@ -423,7 +425,8 @@ def _execute_recommendations_safely(self, recommendations, validate: bool): # Execute the SQL command result = self.client.execute_query(sql_command) - if result.get("rowcount", 0) >= 0: # Success indicator for ALTER statements + # ALTER TABLE REROUTE commands don't return rowcount, check for no error instead. + if "error" not in result: console.print(" [green]โœ… SUCCESS[/green] - Move initiated") successful_moves += 1 @@ -482,7 +485,8 @@ def _wait_for_recovery_capacity(self, max_concurrent_recoveries: int = 5): while True: # Check active recoveries (including transitioning) recoveries = recovery_monitor.get_cluster_recovery_status() - active_count = len([r for r in recoveries if r.overall_progress < 100.0 or r.stage != "DONE"]) + # Count recoveries that are actively running (not completed) + active_count = len([r for r in recoveries if r.overall_progress < 100.0]) status = f"{active_count}/{max_concurrent_recoveries}" if active_count < max_concurrent_recoveries: if wait_time > 0: diff --git a/cratedb_toolkit/admin/xmover/util/database.py b/cratedb_toolkit/admin/xmover/util/database.py index 5c9011bd..e487ed8f 100644 --- a/cratedb_toolkit/admin/xmover/util/database.py +++ b/cratedb_toolkit/admin/xmover/util/database.py @@ -39,6 +39,8 @@ def __init__(self, connection_string: Optional[str] = None): if not self.connection_string.endswith("/_sql"): self.connection_string = self.connection_string.rstrip("/") + "/_sql" + self.session = requests.Session() + def execute_query(self, query: str, parameters: Optional[List] = None) -> Dict[str, Any]: """Execute a SQL query against CrateDB""" payload: Dict[str, Any] = {"stmt": query} @@ -51,11 +53,18 @@ def execute_query(self, query: str, parameters: Optional[List] = None) -> Dict[s auth = (self.username, self.password) try: - response = requests.post( + response = self.session.post( self.connection_string, json=payload, auth=auth, verify=self.ssl_verify, timeout=30 ) response.raise_for_status() - return response.json() + data = response.json() + # CrateDB may include an "error" field even with 200 OK + if isinstance(data, dict) and "error" in data and data["error"]: + # Best-effort message extraction + err = data["error"] + msg = err.get("message") if isinstance(err, dict) else str(err) + raise Exception(f"CrateDB error: {msg}") + return data except requests.exceptions.RequestException as e: raise Exception(f"Failed to execute query: {e}") from e @@ -335,13 +344,13 @@ def get_recovery_details(self, schema_name: str, table_name: str, shard_id: int) s."primary", s.translog_stats['size'] as translog_size FROM sys.shards s - WHERE s.table_name = ? AND s.id = ? + WHERE s.schema_name = ? AND s.table_name = ? AND s.id = ? AND (s.state = 'RECOVERING' OR s.routing_state IN ('INITIALIZING', 'RELOCATING')) ORDER BY s.schema_name LIMIT 1 """ - result = self.execute_query(query, [table_name, shard_id]) + result = self.execute_query(query, [schema_name, table_name, shard_id]) if not result.get("rows"): return None diff --git a/cratedb_toolkit/admin/xmover/util/error.py b/cratedb_toolkit/admin/xmover/util/error.py index 11dd5f39..22494098 100644 --- a/cratedb_toolkit/admin/xmover/util/error.py +++ b/cratedb_toolkit/admin/xmover/util/error.py @@ -1,12 +1,23 @@ -from typing import List, Optional, cast +from typing import List, Optional -from rich.console import Console +from rich import get_console from rich.panel import Panel -console = Console() +console = get_console() def explain_cratedb_error(error_message: Optional[str]): + """ + Decode and troubleshoot common CrateDB shard allocation errors. + + Parameters + ---------- + error_message: + Raw CrateDB error message. If None and interactive=True, the user is prompted + to paste the message (finish with two blank lines). + interactive: + When False, never prompt for input; return early if no message is provided. + """ console.print(Panel.fit("[bold blue]CrateDB Error Message Decoder[/bold blue]")) console.print("[dim]Helps decode and troubleshoot CrateDB shard allocation errors[/dim]") console.print() @@ -24,7 +35,7 @@ def explain_cratedb_error(error_message: Optional[str]): break error_message = "\n".join(lines) - if not error_message.strip(): + if not (error_message or "").strip(): console.print("[yellow]No error message provided[/yellow]") return @@ -96,7 +107,7 @@ def explain_cratedb_error(error_message: Optional[str]): error_lower = error_message.lower() for pattern_info in error_patterns: - if cast(str, pattern_info["pattern"]).lower() in error_lower: + if pattern_info["pattern"].lower() in error_lower: # type: ignore[attr-defined] matches.append(pattern_info) if matches: diff --git a/cratedb_toolkit/admin/xmover/util/format.py b/cratedb_toolkit/admin/xmover/util/format.py index 82c8a3d0..7bced419 100644 --- a/cratedb_toolkit/admin/xmover/util/format.py +++ b/cratedb_toolkit/admin/xmover/util/format.py @@ -1,11 +1,14 @@ +TL_MIN_BYTES = 10 * 1024 * 1024 # 10MiB threshold for visibility + + def format_size(size_gb: float) -> str: """Format size in GB with appropriate precision""" - if size_gb >= 1000: - return f"{size_gb / 1000:.1f}TB" + if size_gb >= 1024: + return f"{size_gb / 1024:.1f}TB" elif size_gb >= 1: return f"{size_gb:.1f}GB" else: - return f"{size_gb * 1000:.0f}MB" + return f"{size_gb * 1024:.0f}MB" def format_percentage(value: float) -> str: @@ -22,8 +25,8 @@ def format_translog_info(recovery_info) -> str: """Format translog size information with color coding""" tl_bytes = recovery_info.translog_size_bytes - # Only show if significant (>10MB for production) - if tl_bytes < 10 * 1024 * 1024: # 10MB for production + # Only show if significant (>10MB for production), ignore others. + if tl_bytes < TL_MIN_BYTES: return "" tl_gb = recovery_info.translog_size_gb @@ -36,10 +39,5 @@ def format_translog_info(recovery_info) -> str: else: color = "green" - # Format size - if tl_gb >= 1.0: - size_str = f"{tl_gb:.1f}GB" - else: - size_str = f"{tl_gb * 1000:.0f}MB" - + size_str = format_size(tl_gb) return f" [dim]([{color}]TL:{size_str}[/{color}])[/dim]" diff --git a/doc/admin/xmover/handbook.md b/doc/admin/xmover/handbook.md index f9aee2e0..a2049fa2 100644 --- a/doc/admin/xmover/handbook.md +++ b/doc/admin/xmover/handbook.md @@ -57,7 +57,7 @@ xmover recommend --prioritize-space ``` ### Shard Distribution Analysis -This view is dedicating a specific focus on large tables. +This view focuses on large tables. ```bash # Analyze distribution anomalies for top 10 largest tables xmover shard-distribution @@ -128,11 +128,12 @@ Generates intelligent shard movement recommendations for cluster rebalancing. - `--zone-tolerance`: Zone balance tolerance percentage (default: 10) - `--min-free-space`: Minimum free space required on target nodes in GB (default: 100) - `--max-moves`: Maximum number of move recommendations (default: 10) -- `--max-disk-usage`: Maximum disk usage percentage for target nodes (default: 85) +- `--max-disk-usage`: Maximum disk usage percentage for target nodes (default: 90) - `--validate/--no-validate`: Validate move safety (default: True) - `--prioritize-space/--prioritize-zones`: Prioritize available space over zone balancing (default: False) - `--dry-run/--execute`: Show what would be done without generating SQL commands (default: True) - `--node`: Only recommend moves from this specific source node (e.g., data-hot-4) +- `--auto-execute`: Automatically execute the SQL commands (requires `--execute`, asks for confirmation) (default: False) **Examples:** ```bash diff --git a/doc/admin/xmover/index.md b/doc/admin/xmover/index.md index 99fd4404..d1aead79 100644 --- a/doc/admin/xmover/index.md +++ b/doc/admin/xmover/index.md @@ -11,7 +11,7 @@ SQL commands for shard rebalancing and node decommissioning. ## Features - **Cluster Analysis**: Complete overview of shard distribution across nodes and zones -- **Shard Distribution Analysis**: Detect and rank distribution anomalies across largest tables +- **Shard Distribution Analysis**: Detect and rank distribution anomalies across the largest tables - **Shard Movement Recommendations**: Intelligent suggestions for rebalancing with safety validation - **Recovery Monitoring**: Track ongoing shard recovery operations with progress details - **Zone Conflict Detection**: Prevents moves that would violate CrateDB's zone awareness diff --git a/doc/admin/xmover/queries.md b/doc/admin/xmover/queries.md index 9844d8f6..17af71c9 100644 --- a/doc/admin/xmover/queries.md +++ b/doc/admin/xmover/queries.md @@ -69,7 +69,7 @@ ORDER BY name; +------------+--------------------+-----------------------------------------------+ ``` -## List biggest SHARDS on a particular Nodes +## List biggest shards on a particular node ```sql select node['name'], table_name, schema_name, id, sum(size) / 1024^3 from sys.shards @@ -219,7 +219,7 @@ SELECT ## "BIGDUDES" Focuses on your **biggest storage consumers** and shows how their shards are distributed across nodes. -ยดยดยดsql +```sql WITH largest_tables AS ( SELECT schema_name, diff --git a/tests/admin/test_recovery_monitor.py b/tests/admin/test_recovery_monitor.py index 09a482bc..6041baeb 100644 --- a/tests/admin/test_recovery_monitor.py +++ b/tests/admin/test_recovery_monitor.py @@ -263,7 +263,7 @@ def test_recovery_type_filtering(): monitor = RecoveryMonitor(mock_client, options=RecoveryOptions(recovery_type="all")) all_recoveries = monitor.get_cluster_recovery_status() - assert len(all_recoveries) == 0 + assert len(all_recoveries) == 2 print("โœ… Recovery type filtering tests passed") From 489dda51ace0a3c93a84c324a39bf1f450e9d0d4 Mon Sep 17 00:00:00 2001 From: Walter Behmann Date: Wed, 10 Sep 2025 21:36:30 +0200 Subject: [PATCH 06/13] Admin/XMover: Add problematic translog detection and cancellation The new command can find and cancel replica shards with large uncommitted translog sizes. It provides options to set size thresholds and optionally execute cancel commands after user confirmation. --- .../admin/xmover/analysis/shard.py | 100 +++++++++ cratedb_toolkit/admin/xmover/cli.py | 70 +++++- doc/admin/xmover/handbook.md | 41 ++++ tests/admin/test_problematic_translogs.py | 212 ++++++++++++++++++ 4 files changed, 422 insertions(+), 1 deletion(-) create mode 100644 tests/admin/test_problematic_translogs.py diff --git a/cratedb_toolkit/admin/xmover/analysis/shard.py b/cratedb_toolkit/admin/xmover/analysis/shard.py index 96bf6351..db942ced 100644 --- a/cratedb_toolkit/admin/xmover/analysis/shard.py +++ b/cratedb_toolkit/admin/xmover/analysis/shard.py @@ -838,6 +838,106 @@ def plan_node_decommission(self, node_name: str, min_free_space_gb: float = 100. } +class TranslogReporter: + def __init__(self, client: CrateDBClient): + self.client = client + + def problematic_translogs(self, size_mb: int) -> List[str]: + """Find and optionally cancel shards with problematic translog sizes.""" + console.print(Panel.fit("[bold blue]Problematic Translog Analysis[/bold blue]")) + console.print(f"[dim]Looking for replica shards with translog uncommitted size > {size_mb}MB[/dim]") + console.print() + + # Query to find problematic replica shards + query = """ + SELECT sh.schema_name, \ + sh.table_name, \ + translate(p.values::text, ':{}', '=()') as partition_values, \ + sh.id AS shard_id, \ + node['name'] as node_name, \ + sh.translog_stats['uncommitted_size'] / 1024^2 AS translog_uncommitted_mb + FROM + sys.shards AS sh + LEFT JOIN information_schema.table_partitions p + ON sh.table_name = p.table_name + AND sh.schema_name = p.table_schema + AND sh.partition_ident = p.partition_ident + WHERE + sh.state = 'STARTED' + AND sh.translog_stats['uncommitted_size'] \ + > ? * 1024^2 + AND primary = FALSE + ORDER BY + 6 DESC \ + """ + + try: + result = self.client.execute_query(query, [size_mb]) + rows = result.get("rows", []) + + if not rows: + console.print(f"[green]โœ“ No replica shards found with translog uncommitted size > {size_mb}MB[/green]") + return [] + + console.print(f"Found {len(rows)} shards with problematic translogs:") + console.print() + + # Display query results table + results_table = Table(title=f"Problematic Replica Shards (translog > {size_mb}MB)", box=box.ROUNDED) + results_table.add_column("Schema", style="cyan") + results_table.add_column("Table", style="blue") + results_table.add_column("Partition", style="magenta") + results_table.add_column("Shard ID", justify="right", style="yellow") + results_table.add_column("Node", style="green") + results_table.add_column("Translog MB", justify="right", style="red") + + for row in rows: + schema_name, table_name, partition_values, shard_id, node_name, translog_mb = row + partition_display = ( + partition_values if partition_values and partition_values != "NULL" else "[dim]none[/dim]" + ) + results_table.add_row( + schema_name, table_name, partition_display, str(shard_id), node_name, f"{translog_mb:.1f}" + ) + + console.print(results_table) + console.print() + console.print("[bold]Generated ALTER Commands:[/bold]") + console.print() + + # Generate ALTER commands + alter_commands = [] + for row in rows: + schema_name, table_name, partition_values, shard_id, node_name, translog_mb = row + + # Build the ALTER command based on whether it's partitioned + if partition_values and partition_values != "NULL": + # partition_values already formatted like ("sync_day"=1757376000000) from the translate function + alter_cmd = ( + f'ALTER TABLE "{schema_name}"."{table_name}" partition {partition_values} ' + f"REROUTE CANCEL SHARD {shard_id} on '{node_name}' WITH (allow_primary=False);" + ) + else: + alter_cmd = ( + f'ALTER TABLE "{schema_name}"."{table_name}" ' + f"REROUTE CANCEL SHARD {shard_id} on '{node_name}' WITH (allow_primary=False);" + ) + + alter_commands.append(alter_cmd) + console.print(alter_cmd) + + console.print() + console.print(f"[bold]Total: {len(alter_commands)} ALTER commands generated[/bold]") + return alter_commands + + except Exception as e: + console.print(f"[red]Error analyzing problematic translogs: {e}[/red]") + import traceback + + console.print(f"[dim]{traceback.format_exc()}[/dim]") + return [] + + class ShardReporter: def __init__(self, analyzer: ShardAnalyzer): self.analyzer = analyzer diff --git a/cratedb_toolkit/admin/xmover/cli.py b/cratedb_toolkit/admin/xmover/cli.py index e979986e..dfb2c3d8 100644 --- a/cratedb_toolkit/admin/xmover/cli.py +++ b/cratedb_toolkit/admin/xmover/cli.py @@ -11,7 +11,12 @@ from rich.console import Console from rich.panel import Panel -from cratedb_toolkit.admin.xmover.analysis.shard import ActiveShardMonitor, ShardAnalyzer, ShardReporter +from cratedb_toolkit.admin.xmover.analysis.shard import ( + ActiveShardMonitor, + ShardAnalyzer, + ShardReporter, + TranslogReporter, +) from cratedb_toolkit.admin.xmover.analysis.table import DistributionAnalyzer from cratedb_toolkit.admin.xmover.analysis.zone import ZoneReport from cratedb_toolkit.admin.xmover.model import ( @@ -443,6 +448,69 @@ def run_single_analysis(): console.print(f"[dim]{traceback.format_exc()}[/dim]") +@main.command() +@click.option("--size-mb", default=300, help="Minimum translog uncommitted size in MB (default: 300)") +@click.option("--cancel", is_flag=True, help="Execute the cancel commands after confirmation") +@click.pass_context +def problematic_translogs(ctx, size_mb: int, cancel: bool): + """ + Find and optionally cancel shards with problematic translog sizes. + + This command identifies replica shards with large uncommitted translog sizes + that may indicate replication issues. By default, it shows the ALTER commands + that would cancel these shards. With --cancel, it executes them after confirmation. + """ + client = ctx.obj["client"] + report = TranslogReporter(client=client) + alter_commands = report.problematic_translogs(size_mb=size_mb) + + try: + if cancel and alter_commands: + console.print() + console.print("[yellow]โš ๏ธ WARNING: This will cancel the specified shards![/yellow]") + console.print("[yellow]This may cause temporary data unavailability for these shards.[/yellow]") + console.print() + + if click.confirm("Are you sure you want to execute these ALTER commands?"): + console.print() + console.print("[bold blue]Executing ALTER commands...[/bold blue]") + + executed = 0 + failed = 0 + + for i, cmd in enumerate(alter_commands, 1): + if cmd.startswith("--"): + console.print(f"[yellow]Skipping command {i} (parse error): {cmd}[/yellow]") + continue + + try: + console.print(f"[dim]({i}/{len(alter_commands)}) Executing...[/dim]") + client.execute_query(cmd) + console.print(f"[green]โœ“ Command {i} executed successfully[/green]") + executed += 1 + except Exception as e: + console.print(f"[red]โœ— Command {i} failed: {e}[/red]") + failed += 1 + + # Small delay between commands to avoid overwhelming the cluster + if i < len(alter_commands): + time.sleep(1) + + console.print() + console.print("[bold]Execution Summary:[/bold]") + console.print(f"[green]โœ“ Successful: {executed}[/green]") + if failed > 0: + console.print(f"[red]โœ— Failed: {failed}[/red]") + else: + console.print("[yellow]Operation cancelled by user[/yellow]") + + except Exception as e: + console.print(f"[red]Error analyzing problematic translogs: {e}[/red]") + import traceback + + console.print(f"[dim]{traceback.format_exc()}[/dim]") + + @main.command() @click.option("--table", "-t", help="Analyze zones for specific table only") @click.option("--show-shards/--no-show-shards", default=False, help="Show individual shard details (default: False)") diff --git a/doc/admin/xmover/handbook.md b/doc/admin/xmover/handbook.md index a2049fa2..22428976 100644 --- a/doc/admin/xmover/handbook.md +++ b/doc/admin/xmover/handbook.md @@ -246,6 +246,47 @@ xmover monitor-recovery --watch --include-transitioning - **DISK**: Rebuilding shard from local data (after restart/disk issues) +### `problematic-translogs` +Find and optionally cancel replica shards with problematic translog sizes. + +**Options:** +- `--size-mb INTEGER`: Minimum translog uncommitted size in MB (default: 300) +- `--cancel`: Execute the cancel commands after confirmation + +**Description:** +This command identifies replica shards with large uncommitted translog sizes that may indicate replication issues. By default, it shows the ALTER commands that would cancel these shards. With `--cancel`, it executes them after confirmation. + +**Examples:** +```bash +# Show problematic shards with translog > 300MB (default) +xmover problematic-translogs + +# Show shards with translog > 500MB +xmover problematic-translogs --size-mb 500 + +# Execute cancel commands for shards > 1GB after confirmation +xmover problematic-translogs --size-mb 1000 --cancel +``` + +**Sample Output:** +``` +Found 3 shards with problematic translogs: + Problematic Replica Shards (translog > 300MB) +โ•ญโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ฌโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ฌโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ฌโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ฌโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ฌโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ•ฎ +โ”‚ Schema โ”‚ Table โ”‚ Partition โ”‚ Shard ID โ”‚ Node โ”‚ Translog MB โ”‚ +โ”œโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ผโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ผโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ผโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ผโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ผโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ค +โ”‚ TURVO โ”‚ shipmentFormFieldData โ”‚ none โ”‚ 14 โ”‚ data-hot-6 โ”‚ 7040.9 โ”‚ +โ”‚ TURVO โ”‚ shipmentFormFieldData_events โ”‚ ("sync_day"=1757376000000) โ”‚ 3 โ”‚ data-hot-2 โ”‚ 481.2 โ”‚ +โ”‚ TURVO โ”‚ orderFormFieldData โ”‚ none โ”‚ 5 โ”‚ data-hot-1 โ”‚ 469.5 โ”‚ +โ•ฐโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ดโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ดโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ดโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ดโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ดโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ•ฏ +Generated ALTER Commands: +ALTER TABLE "TURVO"."shipmentFormFieldData" REROUTE CANCEL SHARD 14 on 'data-hot-6' WITH (allow_primary=False); +ALTER TABLE "TURVO"."shipmentFormFieldData_events" partition ("sync_day"=1757376000000) REROUTE CANCEL SHARD 3 on 'data-hot-2' WITH (allow_primary=False); +ALTER TABLE "TURVO"."orderFormFieldData" REROUTE CANCEL SHARD 5 on 'data-hot-1' WITH (allow_primary=False); +Total: 3 ALTER commands generated +``` + + ### `active-shards` Monitor the most active shards by tracking checkpoint progression over time. This command helps identify which shards are receiving the most write activity diff --git a/tests/admin/test_problematic_translogs.py b/tests/admin/test_problematic_translogs.py new file mode 100644 index 00000000..7d7d406a --- /dev/null +++ b/tests/admin/test_problematic_translogs.py @@ -0,0 +1,212 @@ +""" +Tests for problematic translogs functionality. +""" + +from unittest.mock import Mock, patch + +from click.testing import CliRunner + +from cratedb_toolkit.admin.xmover.cli import main as cli +from cratedb_toolkit.admin.xmover.util.database import CrateDBClient + + +class TestXMoverProblematicTranslogs: + def setup_method(self): + """Set up test fixtures""" + self.runner = CliRunner() + self.mock_client = Mock(spec=CrateDBClient) + + def test_no_problematic_shards(self): + """Test when no shards meet the criteria""" + self.mock_client.execute_query.return_value = {"rows": []} + self.mock_client.test_connection.return_value = True + + with patch("cratedb_toolkit.admin.xmover.util.database.CrateDBClient", return_value=self.mock_client): + result = self.runner.invoke(cli, ["problematic-translogs", "--size-mb", "300"]) + + assert result.exit_code == 0 + assert "No replica shards found" in result.output + assert "300MB" in result.output + + def test_non_partitioned_table_command_generation(self): + """Test ALTER command generation for non-partitioned tables""" + mock_rows = [ + ["TURVO", "shipmentFormFieldData", None, 14, "data-hot-6", 7011.8], + ["TURVO", "orderFormFieldData", "NULL", 5, "data-hot-1", 469.5], + ] + self.mock_client.execute_query.return_value = {"rows": mock_rows} + self.mock_client.test_connection.return_value = True + + with patch("cratedb_toolkit.admin.xmover.util.database.CrateDBClient", return_value=self.mock_client): + result = self.runner.invoke(cli, ["problematic-translogs", "--size-mb", "300"]) + + assert result.exit_code == 0 + assert "Found 2 shards with problematic translogs" in result.output + # Check that the query results table is shown + assert "Problematic Replica Shards" in result.output + assert "Generated ALTER Commands:" in result.output + # Check that key parts of the ALTER commands are present (Rich may wrap lines) + assert 'ALTER TABLE "TURVO"."shipmentFormFieldData"' in result.output + assert "REROUTE CANCEL SHARD 14" in result.output + assert "data-hot-6" in result.output + assert 'ALTER TABLE "TURVO"."orderFormFieldData"' in result.output + assert "REROUTE CANCEL SHARD 5" in result.output + assert "data-hot-1" in result.output + assert "Total: 2 ALTER commands generated" in result.output + + def test_partitioned_table_command_generation(self): + """Test ALTER command generation for partitioned tables""" + mock_rows = [ + ["TURVO", "shipmentFormFieldData_events", '("sync_day"=1757376000000)', 3, "data-hot-2", 481.2], + ] + self.mock_client.execute_query.return_value = {"rows": mock_rows} + self.mock_client.test_connection.return_value = True + + with patch("cratedb_toolkit.admin.xmover.util.database.CrateDBClient", return_value=self.mock_client): + result = self.runner.invoke(cli, ["problematic-translogs", "--size-mb", "400"]) + + assert result.exit_code == 0 + assert "Found 1 shards with problematic translogs" in result.output + # Check that the query results table is shown + assert "Problematic Replica Shards" in result.output + assert "Generated ALTER Commands:" in result.output + # Check that key parts of the partitioned ALTER command are present + assert 'ALTER TABLE "TURVO"."shipmentFormFieldData_events"' in result.output + assert '("sync_day"=1757376000000)' in result.output + assert "REROUTE CANCEL SHARD 3" in result.output + assert "data-hot-2" in result.output + + def test_mixed_partitioned_non_partitioned(self): + """Test handling of both partitioned and non-partitioned tables""" + mock_rows = [ + ["TURVO", "shipmentFormFieldData", None, 14, "data-hot-6", 7011.8], + ["TURVO", "shipmentFormFieldData_events", '("sync_day"=1757376000000)', 3, "data-hot-2", 481.2], + ["TURVO", "orderFormFieldData", "NULL", 5, "data-hot-1", 469.5], + ] + self.mock_client.execute_query.return_value = {"rows": mock_rows} + self.mock_client.test_connection.return_value = True + + with patch("cratedb_toolkit.admin.xmover.util.database.CrateDBClient", return_value=self.mock_client): + result = self.runner.invoke(cli, ["problematic-translogs", "--size-mb", "200"]) + + assert result.exit_code == 0 + assert "Found 3 shards with problematic translogs" in result.output + # Check that the query results table is shown + assert "Problematic Replica Shards" in result.output + assert "Generated ALTER Commands:" in result.output + + # Check non-partitioned command + assert 'ALTER TABLE "TURVO"."shipmentFormFieldData"' in result.output + assert "REROUTE CANCEL SHARD 14" in result.output + assert "data-hot-6" in result.output + + # Check partitioned command + assert 'ALTER TABLE "TURVO"."shipmentFormFieldData_events"' in result.output + assert '("sync_day"=1757376000000)' in result.output + assert "REROUTE CANCEL SHARD 3" in result.output + assert "data-hot-2" in result.output + + # Check NULL partition handled as non-partitioned + assert 'ALTER TABLE "TURVO"."orderFormFieldData"' in result.output + assert "REROUTE CANCEL SHARD 5" in result.output + assert "data-hot-1" in result.output + + def test_query_parameters(self): + """Test that the query is called with correct parameters""" + self.mock_client.execute_query.return_value = {"rows": []} + self.mock_client.test_connection.return_value = True + + with patch("cratedb_toolkit.admin.xmover.util.database.CrateDBClient", return_value=self.mock_client): + self.runner.invoke(cli, ["problematic-translogs", "--size-mb", "500"]) + + # Verify the query was called with the correct threshold + self.mock_client.execute_query.assert_called_once() + call_args = self.mock_client.execute_query.call_args + query = call_args[0][0] + parameters = call_args[0][1] + + assert "sh.translog_stats['uncommitted_size'] > ? * 1024^2" in query + assert "primary=FALSE" in query + assert "6 DESC" in query # More flexible whitespace matching + assert parameters == [500] + + def test_cancel_flag_user_confirmation_no(self): + """Test --cancel flag with user declining confirmation""" + mock_rows = [["TURVO", "shipmentFormFieldData", None, 14, "data-hot-6", 7011.8]] + self.mock_client.execute_query.return_value = {"rows": mock_rows} + self.mock_client.test_connection.return_value = True + + with patch("cratedb_toolkit.admin.xmover.util.database.CrateDBClient", return_value=self.mock_client), patch( + "click.confirm", return_value=False + ): + result = self.runner.invoke(cli, ["problematic-translogs", "--cancel"]) + + assert result.exit_code == 0 + assert "Operation cancelled by user" in result.output + # Should only be called once for the initial query, not for execution + assert self.mock_client.execute_query.call_count == 1 + + def test_cancel_flag_user_confirmation_yes(self): + """Test --cancel flag with user confirming execution""" + mock_rows = [["TURVO", "shipmentFormFieldData", None, 14, "data-hot-6", 7011.8]] + self.mock_client.execute_query.return_value = {"rows": mock_rows} + self.mock_client.test_connection.return_value = True + + with patch("cratedb_toolkit.admin.xmover.util.database.CrateDBClient", return_value=self.mock_client), patch( + "click.confirm", return_value=True + ), patch("time.sleep"): # Mock sleep to speed up test + result = self.runner.invoke(cli, ["problematic-translogs", "--cancel"]) + + assert result.exit_code == 0 + assert "Executing ALTER commands" in result.output + assert "Command 1 executed successfully" in result.output + assert "Successful: 1" in result.output + + # Should be called twice: once for query, once for execution + assert self.mock_client.execute_query.call_count == 2 + + def test_execution_failure_handling(self): + """Test handling of failed command execution""" + mock_rows = [["TURVO", "shipmentFormFieldData", None, 14, "data-hot-6", 7011.8]] + + # First call returns rows, second call (execution) raises exception + self.mock_client.execute_query.side_effect = [{"rows": mock_rows}, Exception("Shard not found")] + self.mock_client.test_connection.return_value = True + + with patch("cratedb_toolkit.admin.xmover.util.database.CrateDBClient", return_value=self.mock_client), patch( + "click.confirm", return_value=True + ), patch("time.sleep"): + result = self.runner.invoke(cli, ["problematic-translogs", "--cancel"]) + + assert result.exit_code == 0 + assert "Command 1 failed: Shard not found" in result.output + assert "Failed: 1" in result.output + assert "Successful: 0" in result.output + + def test_database_error_handling(self): + """Test handling of database connection errors""" + self.mock_client.execute_query.side_effect = Exception("Connection failed") + self.mock_client.test_connection.return_value = True + + with patch("cratedb_toolkit.admin.xmover.util.database.CrateDBClient", return_value=self.mock_client): + result = self.runner.invoke(cli, ["problematic-translogs"]) + + assert result.exit_code == 0 + assert "Error analyzing problematic translogs" in result.output + assert "Connection failed" in result.output + + def test_default_size_mb(self): + """Test that default sizeMB is 300""" + self.mock_client.execute_query.return_value = {"rows": []} + self.mock_client.test_connection.return_value = True + + with patch("cratedb_toolkit.admin.xmover.util.database.CrateDBClient", return_value=self.mock_client): + result = self.runner.invoke(cli, ["problematic-translogs"]) + + assert result.exit_code == 0 + assert "300MB" in result.output + + # Verify query was called with default value + call_args = self.mock_client.execute_query.call_args + parameters = call_args[0][1] + assert parameters == [300] From 5b427dd563857736916d001b90eab5bbca98067c Mon Sep 17 00:00:00 2001 From: Andreas Motl Date: Thu, 18 Sep 2025 19:15:31 +0200 Subject: [PATCH 07/13] Admin/XMover: Forward process output to user when CLI tests fail --- tests/admin/test_problematic_translogs.py | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/tests/admin/test_problematic_translogs.py b/tests/admin/test_problematic_translogs.py index 7d7d406a..54baa67a 100644 --- a/tests/admin/test_problematic_translogs.py +++ b/tests/admin/test_problematic_translogs.py @@ -24,7 +24,7 @@ def test_no_problematic_shards(self): with patch("cratedb_toolkit.admin.xmover.util.database.CrateDBClient", return_value=self.mock_client): result = self.runner.invoke(cli, ["problematic-translogs", "--size-mb", "300"]) - assert result.exit_code == 0 + assert result.exit_code == 0, result.output assert "No replica shards found" in result.output assert "300MB" in result.output @@ -40,7 +40,7 @@ def test_non_partitioned_table_command_generation(self): with patch("cratedb_toolkit.admin.xmover.util.database.CrateDBClient", return_value=self.mock_client): result = self.runner.invoke(cli, ["problematic-translogs", "--size-mb", "300"]) - assert result.exit_code == 0 + assert result.exit_code == 0, result.output assert "Found 2 shards with problematic translogs" in result.output # Check that the query results table is shown assert "Problematic Replica Shards" in result.output @@ -65,7 +65,7 @@ def test_partitioned_table_command_generation(self): with patch("cratedb_toolkit.admin.xmover.util.database.CrateDBClient", return_value=self.mock_client): result = self.runner.invoke(cli, ["problematic-translogs", "--size-mb", "400"]) - assert result.exit_code == 0 + assert result.exit_code == 0, result.output assert "Found 1 shards with problematic translogs" in result.output # Check that the query results table is shown assert "Problematic Replica Shards" in result.output @@ -89,7 +89,7 @@ def test_mixed_partitioned_non_partitioned(self): with patch("cratedb_toolkit.admin.xmover.util.database.CrateDBClient", return_value=self.mock_client): result = self.runner.invoke(cli, ["problematic-translogs", "--size-mb", "200"]) - assert result.exit_code == 0 + assert result.exit_code == 0, result.output assert "Found 3 shards with problematic translogs" in result.output # Check that the query results table is shown assert "Problematic Replica Shards" in result.output @@ -141,7 +141,7 @@ def test_cancel_flag_user_confirmation_no(self): ): result = self.runner.invoke(cli, ["problematic-translogs", "--cancel"]) - assert result.exit_code == 0 + assert result.exit_code == 0, result.output assert "Operation cancelled by user" in result.output # Should only be called once for the initial query, not for execution assert self.mock_client.execute_query.call_count == 1 @@ -157,7 +157,7 @@ def test_cancel_flag_user_confirmation_yes(self): ), patch("time.sleep"): # Mock sleep to speed up test result = self.runner.invoke(cli, ["problematic-translogs", "--cancel"]) - assert result.exit_code == 0 + assert result.exit_code == 0, result.output assert "Executing ALTER commands" in result.output assert "Command 1 executed successfully" in result.output assert "Successful: 1" in result.output @@ -178,7 +178,7 @@ def test_execution_failure_handling(self): ), patch("time.sleep"): result = self.runner.invoke(cli, ["problematic-translogs", "--cancel"]) - assert result.exit_code == 0 + assert result.exit_code == 0, result.output assert "Command 1 failed: Shard not found" in result.output assert "Failed: 1" in result.output assert "Successful: 0" in result.output @@ -191,7 +191,7 @@ def test_database_error_handling(self): with patch("cratedb_toolkit.admin.xmover.util.database.CrateDBClient", return_value=self.mock_client): result = self.runner.invoke(cli, ["problematic-translogs"]) - assert result.exit_code == 0 + assert result.exit_code == 0, result.output assert "Error analyzing problematic translogs" in result.output assert "Connection failed" in result.output @@ -203,7 +203,7 @@ def test_default_size_mb(self): with patch("cratedb_toolkit.admin.xmover.util.database.CrateDBClient", return_value=self.mock_client): result = self.runner.invoke(cli, ["problematic-translogs"]) - assert result.exit_code == 0 + assert result.exit_code == 0, result.output assert "300MB" in result.output # Verify query was called with default value From 9abf7dfaa1e97bf36c22390b241e1f43b8f8bd71 Mon Sep 17 00:00:00 2001 From: Andreas Motl Date: Fri, 19 Sep 2025 21:32:47 +0200 Subject: [PATCH 08/13] Admin/XMover: Improve output for database connectivity test --- cratedb_toolkit/admin/xmover/cli.py | 35 ++++++++++++++--------------- 1 file changed, 17 insertions(+), 18 deletions(-) diff --git a/cratedb_toolkit/admin/xmover/cli.py b/cratedb_toolkit/admin/xmover/cli.py index dfb2c3d8..bff94f20 100644 --- a/cratedb_toolkit/admin/xmover/cli.py +++ b/cratedb_toolkit/admin/xmover/cli.py @@ -1,7 +1,9 @@ """ XMover - CrateDB Shard Analyzer and Movement Tool -Command Line Interface. +A tool for analyzing CrateDB shard distribution across +nodes and availability zones, and for generating safe +SQL commands for shard rebalancing. """ import time @@ -33,28 +35,25 @@ console = Console() -@click.group() +@click.group(help=__doc__) @click.version_option() @click.pass_context def main(ctx): - """XMover - CrateDB Shard Analyzer and Movement Tool - - A tool for analyzing CrateDB shard distribution across nodes and availability zones, - and generating safe SQL commands for shard rebalancing. - """ ctx.ensure_object(dict) - # Test connection on startup - try: - client = CrateDBClient() - if not client.test_connection(): - console.print("[red]Error: Could not connect to CrateDB[/red]") - console.print("Please check your CRATE_CONNECTION_STRING in .env file") - raise click.Abort() - ctx.obj["client"] = client - except Exception as e: - console.print(f"[red]Error connecting to CrateDB: {e}[/red]") - raise click.Abort() from e + # Test connection on startup. + client = CrateDBClient() + if not client.test_connection(): + console.print("[red]Error: Failed connecting to CrateDB[/red]") + console.print( + "Please check your database connection string, " + "i.e. the CRATE_CONNECTION_STRING environment variable, " + "possibly stored within an .env file" + ) + raise click.Abort() + + # Propagate the client handle. + ctx.obj["client"] = client @main.command() From 4a55aa83d85f46a642ebbb88a6b1840fe80d7b45 Mon Sep 17 00:00:00 2001 From: Andreas Motl Date: Fri, 19 Sep 2025 21:33:25 +0200 Subject: [PATCH 09/13] Admin/XMover: Add missing subcommands to CLI software tests --- tests/admin/test_cli.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/tests/admin/test_cli.py b/tests/admin/test_cli.py index de3e4624..c90086bb 100644 --- a/tests/admin/test_cli.py +++ b/tests/admin/test_cli.py @@ -7,15 +7,17 @@ @pytest.mark.parametrize( "subcommand", [ + "active-shards", "analyze", "check-balance", "explain-error", "find-candidates", "monitor-recovery", + "problematic-translogs", "recommend", + "shard-distribution", "test-connection", "zone-analysis", - "shard-distribution", ], ) def test_xmover_all(cratedb, subcommand): From 135aa10b94f0ee91d488c6607ff420161ef8690f Mon Sep 17 00:00:00 2001 From: Andreas Motl Date: Fri, 19 Sep 2025 21:32:08 +0200 Subject: [PATCH 10/13] XMover/problematic-translogs: Fix software tests --- tests/admin/test_problematic_translogs.py | 27 ++++++++++++----------- 1 file changed, 14 insertions(+), 13 deletions(-) diff --git a/tests/admin/test_problematic_translogs.py b/tests/admin/test_problematic_translogs.py index 54baa67a..f63fd4cb 100644 --- a/tests/admin/test_problematic_translogs.py +++ b/tests/admin/test_problematic_translogs.py @@ -21,8 +21,8 @@ def test_no_problematic_shards(self): self.mock_client.execute_query.return_value = {"rows": []} self.mock_client.test_connection.return_value = True - with patch("cratedb_toolkit.admin.xmover.util.database.CrateDBClient", return_value=self.mock_client): - result = self.runner.invoke(cli, ["problematic-translogs", "--size-mb", "300"]) + with patch("cratedb_toolkit.admin.xmover.cli.CrateDBClient", return_value=self.mock_client): + result = self.runner.invoke(cli, ["problematic-translogs", "--size-mb", "300"], catch_exceptions=False) assert result.exit_code == 0, result.output assert "No replica shards found" in result.output @@ -37,7 +37,7 @@ def test_non_partitioned_table_command_generation(self): self.mock_client.execute_query.return_value = {"rows": mock_rows} self.mock_client.test_connection.return_value = True - with patch("cratedb_toolkit.admin.xmover.util.database.CrateDBClient", return_value=self.mock_client): + with patch("cratedb_toolkit.admin.xmover.cli.CrateDBClient", return_value=self.mock_client): result = self.runner.invoke(cli, ["problematic-translogs", "--size-mb", "300"]) assert result.exit_code == 0, result.output @@ -62,7 +62,7 @@ def test_partitioned_table_command_generation(self): self.mock_client.execute_query.return_value = {"rows": mock_rows} self.mock_client.test_connection.return_value = True - with patch("cratedb_toolkit.admin.xmover.util.database.CrateDBClient", return_value=self.mock_client): + with patch("cratedb_toolkit.admin.xmover.cli.CrateDBClient", return_value=self.mock_client): result = self.runner.invoke(cli, ["problematic-translogs", "--size-mb", "400"]) assert result.exit_code == 0, result.output @@ -86,7 +86,7 @@ def test_mixed_partitioned_non_partitioned(self): self.mock_client.execute_query.return_value = {"rows": mock_rows} self.mock_client.test_connection.return_value = True - with patch("cratedb_toolkit.admin.xmover.util.database.CrateDBClient", return_value=self.mock_client): + with patch("cratedb_toolkit.admin.xmover.cli.CrateDBClient", return_value=self.mock_client): result = self.runner.invoke(cli, ["problematic-translogs", "--size-mb", "200"]) assert result.exit_code == 0, result.output @@ -116,7 +116,7 @@ def test_query_parameters(self): self.mock_client.execute_query.return_value = {"rows": []} self.mock_client.test_connection.return_value = True - with patch("cratedb_toolkit.admin.xmover.util.database.CrateDBClient", return_value=self.mock_client): + with patch("cratedb_toolkit.admin.xmover.cli.CrateDBClient", return_value=self.mock_client): self.runner.invoke(cli, ["problematic-translogs", "--size-mb", "500"]) # Verify the query was called with the correct threshold @@ -125,8 +125,9 @@ def test_query_parameters(self): query = call_args[0][0] parameters = call_args[0][1] - assert "sh.translog_stats['uncommitted_size'] > ? * 1024^2" in query - assert "primary=FALSE" in query + assert "sh.translog_stats['uncommitted_size']" in query + assert "1024^2" in query + assert "primary = FALSE" in query assert "6 DESC" in query # More flexible whitespace matching assert parameters == [500] @@ -136,7 +137,7 @@ def test_cancel_flag_user_confirmation_no(self): self.mock_client.execute_query.return_value = {"rows": mock_rows} self.mock_client.test_connection.return_value = True - with patch("cratedb_toolkit.admin.xmover.util.database.CrateDBClient", return_value=self.mock_client), patch( + with patch("cratedb_toolkit.admin.xmover.cli.CrateDBClient", return_value=self.mock_client), patch( "click.confirm", return_value=False ): result = self.runner.invoke(cli, ["problematic-translogs", "--cancel"]) @@ -152,7 +153,7 @@ def test_cancel_flag_user_confirmation_yes(self): self.mock_client.execute_query.return_value = {"rows": mock_rows} self.mock_client.test_connection.return_value = True - with patch("cratedb_toolkit.admin.xmover.util.database.CrateDBClient", return_value=self.mock_client), patch( + with patch("cratedb_toolkit.admin.xmover.cli.CrateDBClient", return_value=self.mock_client), patch( "click.confirm", return_value=True ), patch("time.sleep"): # Mock sleep to speed up test result = self.runner.invoke(cli, ["problematic-translogs", "--cancel"]) @@ -173,7 +174,7 @@ def test_execution_failure_handling(self): self.mock_client.execute_query.side_effect = [{"rows": mock_rows}, Exception("Shard not found")] self.mock_client.test_connection.return_value = True - with patch("cratedb_toolkit.admin.xmover.util.database.CrateDBClient", return_value=self.mock_client), patch( + with patch("cratedb_toolkit.admin.xmover.cli.CrateDBClient", return_value=self.mock_client), patch( "click.confirm", return_value=True ), patch("time.sleep"): result = self.runner.invoke(cli, ["problematic-translogs", "--cancel"]) @@ -188,7 +189,7 @@ def test_database_error_handling(self): self.mock_client.execute_query.side_effect = Exception("Connection failed") self.mock_client.test_connection.return_value = True - with patch("cratedb_toolkit.admin.xmover.util.database.CrateDBClient", return_value=self.mock_client): + with patch("cratedb_toolkit.admin.xmover.cli.CrateDBClient", return_value=self.mock_client): result = self.runner.invoke(cli, ["problematic-translogs"]) assert result.exit_code == 0, result.output @@ -200,7 +201,7 @@ def test_default_size_mb(self): self.mock_client.execute_query.return_value = {"rows": []} self.mock_client.test_connection.return_value = True - with patch("cratedb_toolkit.admin.xmover.util.database.CrateDBClient", return_value=self.mock_client): + with patch("cratedb_toolkit.admin.xmover.cli.CrateDBClient", return_value=self.mock_client): result = self.runner.invoke(cli, ["problematic-translogs"]) assert result.exit_code == 0, result.output From c7fc33aab30d6df9cc1eaba50b0c48dc5d67973e Mon Sep 17 00:00:00 2001 From: Walter Behmann Date: Fri, 19 Sep 2025 22:37:48 +0200 Subject: [PATCH 11/13] XMover/problematic-translogs: Shard size distribution analysis and reporting This commit adds comprehensive shard size analysis capabilities, including: - Size distribution bucketing and reporting - Early warning detection for oversized and undersized shards - Partition-aware shard status display - Integration with recovery monitoring - Improved problematic shard detection - Enhanced CLI output formatting The changes help identify shard sizing issues that could impact cluster performance and stability. --- .../admin/xmover/analysis/shard.py | 147 ++++++++++ cratedb_toolkit/admin/xmover/model.py | 1 + .../admin/xmover/operational/monitor.py | 260 ++++++++++++++---- cratedb_toolkit/admin/xmover/util/database.py | 88 +++++- cratedb_toolkit/admin/xmover/util/format.py | 15 + tests/admin/test_recovery_monitor.py | 5 + 6 files changed, 459 insertions(+), 57 deletions(-) diff --git a/cratedb_toolkit/admin/xmover/analysis/shard.py b/cratedb_toolkit/admin/xmover/analysis/shard.py index db942ced..35239596 100644 --- a/cratedb_toolkit/admin/xmover/analysis/shard.py +++ b/cratedb_toolkit/admin/xmover/analysis/shard.py @@ -614,6 +614,78 @@ def _check_zone_conflict(self, recommendation: ShardRelocationResponse) -> Optio # If we can't check, err on the side of caution return f"Cannot verify zone safety: {str(e)}" + def get_shard_size_overview(self) -> Dict[str, Any]: + """Get shard size distribution analysis""" + # Only analyze STARTED shards + started_shards = [s for s in self.shards if s.state == "STARTED"] + + # Define size buckets (in GB) + size_buckets = { + "<1GB": {"count": 0, "total_size": 0.0, "avg_size_gb": 0.0}, + "1GB-5GB": {"count": 0, "total_size": 0.0, "avg_size_gb": 0.0}, + "5GB-10GB": {"count": 0, "total_size": 0.0, "avg_size_gb": 0.0}, + "10GB-50GB": {"count": 0, "total_size": 0.0, "avg_size_gb": 0.0}, + ">=50GB": {"count": 0, "total_size": 0.0, "avg_size_gb": 0.0}, + } + + if not started_shards: + return { + "total_shards": 0, + "total_size_gb": 0.0, + "avg_shard_size_gb": 0.0, + "size_buckets": size_buckets, + "large_shards_count": 0, + "very_small_shards_percentage": 0.0, + } + + total_shards = len(started_shards) + total_size_gb = sum(s.size_gb for s in started_shards) + avg_size_gb = total_size_gb / total_shards if total_shards > 0 else 0.0 + + # Categorize shards by size + large_shards_count = 0 # >50GB shards + very_small_shards = 0 # <1GB shards (for percentage calculation) + + for shard in started_shards: + size_gb = shard.size_gb + + if size_gb >= 50: + size_buckets[">=50GB"]["count"] += 1 + size_buckets[">=50GB"]["total_size"] += size_gb + large_shards_count += 1 + elif size_gb >= 10: + size_buckets["10GB-50GB"]["count"] += 1 + size_buckets["10GB-50GB"]["total_size"] += size_gb + elif size_gb >= 5: + size_buckets["5GB-10GB"]["count"] += 1 + size_buckets["5GB-10GB"]["total_size"] += size_gb + elif size_gb >= 1: + size_buckets["1GB-5GB"]["count"] += 1 + size_buckets["1GB-5GB"]["total_size"] += size_gb + else: + size_buckets["<1GB"]["count"] += 1 + size_buckets["<1GB"]["total_size"] += size_gb + very_small_shards += 1 + + # Calculate the average size for each bucket + for _, bucket_data in size_buckets.items(): + if bucket_data["count"] > 0: + bucket_data["avg_size_gb"] = bucket_data["total_size"] / bucket_data["count"] + else: + bucket_data["avg_size_gb"] = 0.0 + + # Calculate the percentage of very small shards (<1GB) + very_small_percentage = (very_small_shards / total_shards * 100) if total_shards > 0 else 0.0 + + return { + "total_shards": total_shards, + "total_size_gb": total_size_gb, + "avg_shard_size_gb": avg_size_gb, + "size_buckets": size_buckets, + "large_shards_count": large_shards_count, + "very_small_shards_percentage": very_small_percentage, + } + def get_cluster_overview(self) -> Dict[str, Any]: """Get a comprehensive overview of the cluster""" # Get cluster watermark settings @@ -1032,6 +1104,81 @@ def distribution(self, table: str = None): console.print(node_table) + console.print() + + # Shard Size Overview + size_overview = self.analyzer.get_shard_size_overview() + + size_table = Table(title="Shard Size Distribution", box=box.ROUNDED) + size_table.add_column("Size Range", style="cyan") + size_table.add_column("Count", justify="right", style="magenta") + size_table.add_column("Percentage", justify="right", style="green") + size_table.add_column("Avg Size", justify="right", style="blue") + size_table.add_column("Total Size", justify="right", style="yellow") + + total_shards = size_overview["total_shards"] + + # Define color coding thresholds + large_shards_threshold = 0 # warn if ANY shards >=50GB (red flag) + small_shards_percentage_threshold = 40 # warn if >40% of shards are small (<1GB) + + for bucket_name, bucket_data in size_overview["size_buckets"].items(): + count = bucket_data["count"] + avg_size = bucket_data["avg_size_gb"] + total_size = bucket_data["total_size"] + percentage = (count / total_shards * 100) if total_shards > 0 else 0 + + # Apply color coding + count_str = str(count) + percentage_str = f"{percentage:.1f}%" + + # Color code large shards (>=50GB) - ANY large shard is a red flag + if bucket_name == ">=50GB" and count > large_shards_threshold: + count_str = f"[red]{count}[/red]" + percentage_str = f"[red]{percentage:.1f}%[/red]" + + # Color code if too many very small shards (<1GB) + if bucket_name == "<1GB" and percentage > small_shards_percentage_threshold: + count_str = f"[yellow]{count}[/yellow]" + percentage_str = f"[yellow]{percentage:.1f}%[/yellow]" + + size_table.add_row( + bucket_name, + count_str, + percentage_str, + f"{avg_size:.2f}GB" if avg_size > 0 else "0GB", + format_size(total_size), + ) + + console.print(size_table) + + # Add warnings if thresholds are exceeded + warnings = [] + if size_overview["large_shards_count"] > large_shards_threshold: + warnings.append( + f"[red]๐Ÿ”ฅ CRITICAL: {size_overview['large_shards_count']} " + f"large shards (>=50GB) detected - IMMEDIATE ACTION REQUIRED![/red]" + ) + warnings.append("[red] Large shards cause slow recovery, memory pressure, and performance issues[/red]") + + # Calculate the percentage of very small shards (<1GB) + very_small_count = size_overview["size_buckets"]["<1GB"]["count"] + very_small_percentage = (very_small_count / total_shards * 100) if total_shards > 0 else 0 + + if very_small_percentage > small_shards_percentage_threshold: + warnings.append( + f"[yellow]โš ๏ธ {very_small_percentage:.1f}% of shards are very small (<1GB) - " + f"consider optimizing shard allocation[/yellow]" + ) + warnings.append("[yellow] Too many small shards create metadata overhead and reduce efficiency[/yellow]") + + if warnings: + console.print() + for warning in warnings: + console.print(warning) + + console.print() + # Table-specific analysis if requested if table: console.print() diff --git a/cratedb_toolkit/admin/xmover/model.py b/cratedb_toolkit/admin/xmover/model.py index 7f962c3c..8dea5233 100644 --- a/cratedb_toolkit/admin/xmover/model.py +++ b/cratedb_toolkit/admin/xmover/model.py @@ -56,6 +56,7 @@ class RecoveryInfo: schema_name: str table_name: str + partition_values: Optional[str] # Partition values for partitioned tables shard_id: int node_name: str node_id: str diff --git a/cratedb_toolkit/admin/xmover/operational/monitor.py b/cratedb_toolkit/admin/xmover/operational/monitor.py index 206bacbf..f9acf0e5 100644 --- a/cratedb_toolkit/admin/xmover/operational/monitor.py +++ b/cratedb_toolkit/admin/xmover/operational/monitor.py @@ -7,7 +7,7 @@ from cratedb_toolkit.admin.xmover.model import RecoveryInfo from cratedb_toolkit.admin.xmover.util.database import CrateDBClient -from cratedb_toolkit.admin.xmover.util.format import format_translog_info +from cratedb_toolkit.admin.xmover.util.format import format_table_display_with_partition, format_translog_info console = Console() @@ -42,6 +42,10 @@ def get_cluster_recovery_status(self) -> List[RecoveryInfo]: return recoveries + def get_problematic_shards(self) -> List[Dict[str, Any]]: + """Get shards that need attention but aren't actively recovering""" + return self.client.get_problematic_shards(self.options.table, self.options.node) + def get_recovery_summary(self, recoveries: List[RecoveryInfo]) -> Dict[str, Any]: """Generate a summary of recovery operations""" @@ -126,18 +130,22 @@ def _format_recovery_table(self, recoveries: List[RecoveryInfo]) -> str: return " No recoveries of this type" # Table headers - headers = ["Table", "Shard", "Node", "Type", "Stage", "Progress", "Size(GB)", "Time(s)"] + headers = ["Table", "Shard", "Node", "Recovery", "Stage", "Progress", "Size(GB)", "Time(s)"] # Calculate column widths col_widths = [len(h) for h in headers] rows = [] for recovery in recoveries: + # Format table name with partition values if available + table_display = f"{recovery.schema_name}.{recovery.table_name}" + if recovery.partition_values: + table_display = f"{table_display} {recovery.partition_values}" row = [ - f"{recovery.schema_name}.{recovery.table_name}", + table_display, str(recovery.shard_id), recovery.node_name, - recovery.shard_type, + recovery.recovery_type, recovery.stage, f"{recovery.overall_progress:.1f}%", f"{recovery.size_gb:.1f}", @@ -198,10 +206,9 @@ def start(self, watch: bool, debug: bool = False): ) # Create complete table name - if recovery.schema_name == "doc": - table_display = recovery.table_name - else: - table_display = f"{recovery.schema_name}.{recovery.table_name}" + table_display = format_table_display_with_partition( + recovery.schema_name, recovery.table_name, recovery.partition_values + ) # Count active vs completed if recovery.stage == "DONE" and recovery.overall_progress >= 100.0: @@ -225,16 +232,22 @@ def start(self, watch: bool, debug: bool = False): translog_info = format_translog_info(recovery) if diff > 0: + table_display = format_table_display_with_partition( + recovery.schema_name, recovery.table_name, recovery.partition_values + ) changes.append( f"[green]๐Ÿ“ˆ[/green] {table_display} S{recovery.shard_id} " - f"{recovery.overall_progress:.1f}% (+{diff:.1f}%) " - f"{recovery.size_gb:.1f}GB{translog_info}{node_route}" + f"{recovery.recovery_type} {recovery.overall_progress:.1f}% " + f"(+{diff:.1f}%) {recovery.size_gb:.1f}GB{translog_info}{node_route}" ) else: + table_display = format_table_display_with_partition( + recovery.schema_name, recovery.table_name, recovery.partition_values + ) changes.append( f"[yellow]๐Ÿ“‰[/yellow] {table_display} S{recovery.shard_id} " - f"{recovery.overall_progress:.1f}% ({diff:.1f}%) " - f"{recovery.size_gb:.1f}GB{translog_info}{node_route}" + f"{recovery.recovery_type} {recovery.overall_progress:.1f}% " + f"({diff:.1f}%) {recovery.size_gb:.1f}GB{translog_info}{node_route}" ) elif prev["stage"] != recovery.stage: # Create node route display @@ -246,10 +259,12 @@ def start(self, watch: bool, debug: bool = False): # Add translog info translog_info = format_translog_info(recovery) - + table_display = format_table_display_with_partition( + recovery.schema_name, recovery.table_name, recovery.partition_values + ) changes.append( f"[blue]๐Ÿ”„[/blue] {table_display} S{recovery.shard_id} " - f"{prev['stage']}โ†’{recovery.stage} " + f"{recovery.recovery_type} {prev['stage']}โ†’{recovery.stage} " f"{recovery.size_gb:.1f}GB{translog_info}{node_route}" ) else: @@ -267,12 +282,15 @@ def start(self, watch: bool, debug: bool = False): node_route = f" disk โ†’ {recovery.node_name}" status_icon = "[cyan]๐Ÿ†•[/cyan]" if not first_run else "[blue]๐Ÿ“‹[/blue]" + # Add translog info translog_info = format_translog_info(recovery) - + table_display = format_table_display_with_partition( + recovery.schema_name, recovery.table_name, recovery.partition_values + ) changes.append( f"{status_icon} {table_display} S{recovery.shard_id} " - f"{recovery.stage} {recovery.overall_progress:.1f}% " + f"{recovery.recovery_type} {recovery.stage} {recovery.overall_progress:.1f}% " f"{recovery.size_gb:.1f}GB{translog_info}{node_route}" ) @@ -282,29 +300,81 @@ def start(self, watch: bool, debug: bool = False): "stage": recovery.stage, } - # Always show a status line - if not recoveries: - console.print(f"{current_time} | [green]No recoveries - cluster stable[/green]") + # Get problematic shards for comprehensive status + problematic_shards = self.get_problematic_shards() + + # Filter out shards that are already being recovered + non_recovering_shards = [] + if problematic_shards: + for shard in problematic_shards: + # Check if this shard is already in our recoveries list + is_recovering = any( + r.shard_id == shard["shard_id"] + and r.table_name == shard["table_name"] + and r.schema_name == shard["schema_name"] + for r in recoveries + ) + if not is_recovering: + non_recovering_shards.append(shard) + + # Always show a comprehensive status line + if not recoveries and not non_recovering_shards: + console.print(f"{current_time} | [green]No issues - cluster stable[/green]") + previous_recoveries.clear() + elif not recoveries and non_recovering_shards: + console.print( + f"{current_time} | [yellow]{len(non_recovering_shards)} shards " + f"need attention (not recovering)[/yellow]" + ) + # Show first few problematic shards + for shard in non_recovering_shards[:5]: + table_display = format_table_display_with_partition( + shard["schema_name"], shard["table_name"], shard.get("partition_values") + ) + primary_indicator = "P" if shard.get("primary") else "R" + console.print( + f" | [red]โš [/red] {table_display} " + f"S{shard['shard_id']}{primary_indicator} {shard['state']}" + ) + if len(non_recovering_shards) > 5: + console.print(f" | [dim]... and {len(non_recovering_shards) - 5} more[/dim]") previous_recoveries.clear() else: - # Build status message - status = "" + # Build status message for active recoveries + status_parts = [] if active_count > 0: - status = f"{active_count} active" + status_parts.append(f"{active_count} recovering") if completed_count > 0: - status += f", {completed_count} done" if status else f"{completed_count} done" + status_parts.append(f"{completed_count} done") + if non_recovering_shards: + status_parts.append(f"[yellow]{len(non_recovering_shards)} awaiting recovery[/yellow]") + + status = " | ".join(status_parts) # Show status line with changes or periodic update if changes: console.print(f"{current_time} | {status}") for change in changes: console.print(f" | {change}") + # Show some problematic shards if there are any + if non_recovering_shards and len(changes) < 3: # Don't overwhelm the output + for shard in non_recovering_shards[:2]: + table_display = format_table_display_with_partition( + shard["schema_name"], shard["table_name"], shard.get("partition_values") + ) + primary_indicator = "P" if shard.get("primary") else "R" + console.print( + f" | [red]โš [/red] {table_display} " + f"S{shard['shard_id']}{primary_indicator} {shard['state']}" + ) else: # Show periodic status even without changes if self.options.include_transitioning and completed_count > 0: console.print(f"{current_time} | {status} (transitioning)") elif active_count > 0: console.print(f"{current_time} | {status} (no changes)") + elif non_recovering_shards: + console.print(f"{current_time} | {status} (issues persist)") first_run = False time.sleep(self.options.refresh_interval) @@ -315,30 +385,63 @@ def start(self, watch: bool, debug: bool = False): # Show final summary final_recoveries = self.get_cluster_recovery_status() - if final_recoveries: - console.print("\n๐Ÿ“Š [bold]Final Recovery Summary:[/bold]") - summary = self.get_recovery_summary(final_recoveries) + final_problematic_shards = self.get_problematic_shards() + + # Filter out shards that are already being recovered + final_non_recovering_shards = [] + if final_problematic_shards: + for shard in final_problematic_shards: + is_recovering = any( + r.shard_id == shard["shard_id"] + and r.table_name == shard["table_name"] + and r.schema_name == shard["schema_name"] + for r in final_recoveries + ) + if not is_recovering: + final_non_recovering_shards.append(shard) - # Count active vs completed - active_count = len( - [r for r in final_recoveries if r.overall_progress < 100.0 or r.stage != "DONE"] - ) - completed_count = len(final_recoveries) - active_count + if final_recoveries or final_non_recovering_shards: + console.print("\n๐Ÿ“Š [bold]Final Cluster Status Summary:[/bold]") - console.print(f" Total recoveries: {summary['total_recoveries']}") - console.print(f" Active: {active_count}, Completed: {completed_count}") - console.print(f" Total size: {summary['total_size_gb']:.1f} GB") - console.print(f" Average progress: {summary['avg_progress']:.1f}%") + if final_recoveries: + summary = self.get_recovery_summary(final_recoveries) + # Count active vs completed + active_count = len( + [r for r in final_recoveries if r.overall_progress < 100.0 or r.stage != "DONE"] + ) + completed_count = len(final_recoveries) - active_count + + console.print(f" Total recoveries: {summary['total_recoveries']}") + console.print(f" Active: {active_count}, Completed: {completed_count}") + console.print(f" Total size: {summary['total_size_gb']:.1f} GB") + console.print(f" Average progress: {summary['avg_progress']:.1f}%") + + if summary["by_type"]: + console.print(" By recovery type:") + for rec_type, stats in summary["by_type"].items(): + console.print( + f" {rec_type}: {stats['count']} recoveries, " + f"{stats['avg_progress']:.1f}% avg progress" + ) - if summary["by_type"]: - console.print(" By recovery type:") - for rec_type, stats in summary["by_type"].items(): - console.print( - f" {rec_type}: {stats['count']} recoveries, " - f"{stats['avg_progress']:.1f}% avg progress" - ) + if final_non_recovering_shards: + console.print( + f" [yellow]Problematic shards needing attention: " + f"{len(final_non_recovering_shards)}[/yellow]" + ) + # Group by state for summary + by_state = {} + for shard in final_non_recovering_shards: + state = shard["state"] + if state not in by_state: + by_state[state] = 0 + by_state[state] += 1 + + for state, count in by_state.items(): + console.print(f" {state}: {count} shards") else: console.print("\n[green]โœ… No active recoveries at exit[/green]") + console.print("\n[green]โœ… Cluster stable - no issues detected[/green]") return @@ -349,20 +452,66 @@ def start(self, watch: bool, debug: bool = False): display_output = self.format_recovery_display(recoveries) console.print(display_output) - if not recoveries: + # Get problematic shards for comprehensive status + problematic_shards = self.get_problematic_shards() + + # Filter out shards that are already being recovered + non_recovering_shards = [] + if problematic_shards: + for shard in problematic_shards: + is_recovering = any( + r.shard_id == shard["shard_id"] + and r.table_name == shard["table_name"] + and r.schema_name == shard["schema_name"] + for r in recoveries + ) + if not is_recovering: + non_recovering_shards.append(shard) + + if not recoveries and not non_recovering_shards: if self.options.include_transitioning: - console.print("\n[green]โœ… No recoveries found (active or transitioning)[/green]") + console.print("\n[green]โœ… No issues found - cluster stable[/green]") else: console.print("\n[green]โœ… No active recoveries found[/green]") console.print( "[dim]๐Ÿ’ก Use --include-transitioning to see completed recoveries still transitioning[/dim]" ) + + elif not recoveries and non_recovering_shards: + console.print( + f"\n[yellow]โš ๏ธ {len(non_recovering_shards)} shards need attention (not recovering)[/yellow]" + ) + # Group by state for summary + by_state = {} + for shard in non_recovering_shards: + state = shard["state"] + if state not in by_state: + by_state[state] = 0 + by_state[state] += 1 + + for state, count in by_state.items(): + console.print(f" {state}: {count} shards") + + # Show first few examples + console.print("\nExamples:") + for shard in non_recovering_shards[:5]: + table_display = format_table_display_with_partition( + shard["schema_name"], shard["table_name"], shard.get("partition_values") + ) + primary_indicator = "P" if shard.get("primary") else "R" + console.print( + f" [red]โš [/red] {table_display} S{shard['shard_id']}{primary_indicator} {shard['state']}" + ) + + if len(non_recovering_shards) > 5: + console.print(f" [dim]... and {len(non_recovering_shards) - 5} more[/dim]") + else: - # Show summary + # Show recovery summary summary = self.get_recovery_summary(recoveries) - console.print("\n๐Ÿ“Š [bold]Recovery Summary:[/bold]") - console.print(f" Total recoveries: {summary['total_recoveries']}") - console.print(f" Total size: {summary['total_size_gb']:.1f} GB") + console.print("\n๐Ÿ“Š [bold]Cluster Status Summary:[/bold]") + console.print(f" Active recoveries: {summary['total_recoveries']}") + console.print(f" Total recovery size: {summary['total_size_gb']:.1f} GB") console.print(f" Average progress: {summary['avg_progress']:.1f}%") # Show breakdown by type @@ -374,6 +523,21 @@ def start(self, watch: bool, debug: bool = False): f"{stats['avg_progress']:.1f}% avg progress" ) + # Show problematic shards if any + if non_recovering_shards: + console.print( + f"\n [yellow]Problematic shards needing attention: {len(non_recovering_shards)}[/yellow]" + ) + by_state = {} + for shard in non_recovering_shards: + state = shard["state"] + if state not in by_state: + by_state[state] = 0 + by_state[state] += 1 + + for state, count in by_state.items(): + console.print(f" {state}: {count} shards") + console.print("\n[dim]๐Ÿ’ก Use --watch flag for continuous monitoring[/dim]") except Exception as e: diff --git a/cratedb_toolkit/admin/xmover/util/database.py b/cratedb_toolkit/admin/xmover/util/database.py index e487ed8f..c02f323e 100644 --- a/cratedb_toolkit/admin/xmover/util/database.py +++ b/cratedb_toolkit/admin/xmover/util/database.py @@ -334,6 +334,7 @@ def get_recovery_details(self, schema_name: str, table_name: str, shard_id: int) SELECT s.table_name, s.schema_name, + translate(p.values::text, ':{}', '=()') as partition_values, s.id as shard_id, s.node['name'] as node_name, s.node['id'] as node_id, @@ -344,6 +345,10 @@ def get_recovery_details(self, schema_name: str, table_name: str, shard_id: int) s."primary", s.translog_stats['size'] as translog_size FROM sys.shards s + LEFT JOIN information_schema.table_partitions p + ON s.table_name = p.table_name + AND s.schema_name = p.table_schema + AND s.partition_ident = p.partition_ident WHERE s.schema_name = ? AND s.table_name = ? AND s.id = ? AND (s.state = 'RECOVERING' OR s.routing_state IN ('INITIALIZING', 'RELOCATING')) ORDER BY s.schema_name @@ -359,15 +364,16 @@ def get_recovery_details(self, schema_name: str, table_name: str, shard_id: int) return { "table_name": row[0], "schema_name": row[1], - "shard_id": row[2], - "node_name": row[3], - "node_id": row[4], - "routing_state": row[5], - "state": row[6], - "recovery": row[7], - "size": row[8], - "primary": row[9], - "translog_size": row[10] or 0, + "partition_values": row[2], + "shard_id": row[3], + "node_name": row[4], + "node_id": row[5], + "routing_state": row[6], + "state": row[7], + "recovery": row[8], + "size": row[9], + "primary": row[10], + "translog_size": row[11] or 0, } def get_all_recovering_shards( @@ -442,6 +448,7 @@ def _parse_recovery_info(self, allocation: Dict[str, Any], shard_detail: Dict[st return RecoveryInfo( schema_name=shard_detail["schema_name"], table_name=shard_detail["table_name"], + partition_values=shard_detail.get("partition_values"), shard_id=shard_detail["shard_id"], node_name=shard_detail["node_name"], node_id=shard_detail["node_id"], @@ -506,6 +513,69 @@ def _is_recovery_completed(self, recovery_info: RecoveryInfo) -> bool: and recovery_info.bytes_percent >= 100.0 ) + def get_problematic_shards( + self, table_name: Optional[str] = None, node_name: Optional[str] = None + ) -> List[Dict[str, Any]]: + """Get shards that need attention but aren't actively recovering""" + + where_conditions = ["s.state != 'STARTED'"] + parameters = [] + + if table_name: + where_conditions.append("s.table_name = ?") + parameters.append(table_name) + + if node_name: + where_conditions.append("s.node['name'] = ?") + parameters.append(node_name) + + where_clause = f"WHERE {' AND '.join(where_conditions)}" + + query = f""" + SELECT + s.schema_name, + s.table_name, + translate(p.values::text, ':{{}}', '=()') as partition_values, + s.id as shard_id, + s.state, + s.routing_state, + s.node['name'] as node_name, + s.node['id'] as node_id, + s."primary", + a.current_state, + a.explanation + FROM sys.shards s + LEFT JOIN sys.allocations a ON (s.table_name = a.table_name AND s.id = a.shard_id) + LEFT JOIN information_schema.table_partitions p + ON s.table_name = p.table_name + AND s.schema_name = p.table_schema + AND s.partition_ident = p.partition_ident + {where_clause} + ORDER BY s.state, s.table_name, s.id + """ # noqa: S608 + + result = self.execute_query(query, parameters) + + problematic_shards = [] + for row in result.get("rows", []): + problematic_shards.append( + { + "schema_name": row[0] or "doc", + "table_name": row[1], + "partition_values": row[2], + "shard_id": row[3], + "state": row[4], + "routing_state": row[5], + "node_name": row[6], + "node_id": row[7], + "primary": row[8], + "current_state": row[9], + "explanation": row[10], + } + ) + + return problematic_shards + def get_active_shards_snapshot(self, min_checkpoint_delta: int = 1000) -> List[ActiveShardSnapshot]: """Get a snapshot of all started shards for activity monitoring diff --git a/cratedb_toolkit/admin/xmover/util/format.py b/cratedb_toolkit/admin/xmover/util/format.py index 7bced419..37b2a2cf 100644 --- a/cratedb_toolkit/admin/xmover/util/format.py +++ b/cratedb_toolkit/admin/xmover/util/format.py @@ -41,3 +41,18 @@ def format_translog_info(recovery_info) -> str: size_str = format_size(tl_gb) return f" [dim]([{color}]TL:{size_str}[/{color}])[/dim]" + + +def format_table_display_with_partition(schema_name: str, table_name: str, partition_values: str = None) -> str: + """Format table display with partition values if available""" + # Create base table name + if schema_name and schema_name != "doc": + base_display = f"{schema_name}.{table_name}" + else: + base_display = table_name + + # Add partition values if available + if partition_values: + return f"{base_display} {partition_values}" + else: + return base_display diff --git a/tests/admin/test_recovery_monitor.py b/tests/admin/test_recovery_monitor.py index 6041baeb..e8ee0a26 100644 --- a/tests/admin/test_recovery_monitor.py +++ b/tests/admin/test_recovery_monitor.py @@ -71,6 +71,7 @@ def test_recovery_info_parsing(): recovery = RecoveryInfo( schema_name="CURVO", table_name="PartioffD", + partition_values="NULL", shard_id=19, node_name="data-hot-1", node_id="ZH6fBanGSjanGqeSh-sw0A", @@ -134,6 +135,7 @@ def test_recovery_monitor_formatting(): RecoveryInfo( schema_name="CURVO", table_name="PartioffD", + partition_values="NULL", shard_id=19, node_name="data-hot-1", node_id="node1", @@ -150,6 +152,7 @@ def test_recovery_monitor_formatting(): RecoveryInfo( schema_name="CURVO", table_name="orderTracking", + partition_values="NULL", shard_id=7, node_name="data-hot-2", node_id="node2", @@ -217,6 +220,7 @@ def test_recovery_type_filtering(): RecoveryInfo( schema_name="test", table_name="table1", + partition_values="NULL", shard_id=1, node_name="node1", node_id="n1", @@ -233,6 +237,7 @@ def test_recovery_type_filtering(): RecoveryInfo( schema_name="test", table_name="table2", + partition_values="NULL", shard_id=2, node_name="node2", node_id="n2", From 1a92c3f72b28e1a3fe3bf1cc8533dbcd5cc25c7f Mon Sep 17 00:00:00 2001 From: Walter Behmann Date: Fri, 19 Sep 2025 22:43:43 +0200 Subject: [PATCH 12/13] XMover/problematic-translogs: Large and small shards analysis/tracking - Add max shard size tracking to size buckets - Add small shards analysis view to analyzer command --- .../admin/xmover/analysis/shard.py | 298 +++++++++++++++++- cratedb_toolkit/admin/xmover/model.py | 4 +- 2 files changed, 291 insertions(+), 11 deletions(-) diff --git a/cratedb_toolkit/admin/xmover/analysis/shard.py b/cratedb_toolkit/admin/xmover/analysis/shard.py index 35239596..1824a9c2 100644 --- a/cratedb_toolkit/admin/xmover/analysis/shard.py +++ b/cratedb_toolkit/admin/xmover/analysis/shard.py @@ -20,6 +20,7 @@ ShardInfo, ShardRelocationConstraints, ShardRelocationResponse, + TableStatsType, ) from cratedb_toolkit.admin.xmover.util.database import CrateDBClient from cratedb_toolkit.admin.xmover.util.format import format_percentage, format_size @@ -621,11 +622,11 @@ def get_shard_size_overview(self) -> Dict[str, Any]: # Define size buckets (in GB) size_buckets = { - "<1GB": {"count": 0, "total_size": 0.0, "avg_size_gb": 0.0}, - "1GB-5GB": {"count": 0, "total_size": 0.0, "avg_size_gb": 0.0}, - "5GB-10GB": {"count": 0, "total_size": 0.0, "avg_size_gb": 0.0}, - "10GB-50GB": {"count": 0, "total_size": 0.0, "avg_size_gb": 0.0}, - ">=50GB": {"count": 0, "total_size": 0.0, "avg_size_gb": 0.0}, + "<1GB": {"count": 0, "total_size": 0.0, "avg_size_gb": 0.0, "max_size": 0.0}, + "1GB-5GB": {"count": 0, "total_size": 0.0, "avg_size_gb": 0.0, "max_size": 0.0}, + "5GB-10GB": {"count": 0, "total_size": 0.0, "avg_size_gb": 0.0, "max_size": 0.0}, + "10GB-50GB": {"count": 0, "total_size": 0.0, "avg_size_gb": 0.0, "max_size": 0.0}, + ">=50GB": {"count": 0, "total_size": 0.0, "avg_size_gb": 0.0, "max_size": 0.0}, } if not started_shards: @@ -652,19 +653,24 @@ def get_shard_size_overview(self) -> Dict[str, Any]: if size_gb >= 50: size_buckets[">=50GB"]["count"] += 1 size_buckets[">=50GB"]["total_size"] += size_gb + size_buckets[">=50GB"]["max_size"] = max(size_buckets[">=50GB"]["max_size"], size_gb) large_shards_count += 1 elif size_gb >= 10: size_buckets["10GB-50GB"]["count"] += 1 size_buckets["10GB-50GB"]["total_size"] += size_gb + size_buckets["10GB-50GB"]["max_size"] = max(size_buckets["10GB-50GB"]["max_size"], size_gb) elif size_gb >= 5: size_buckets["5GB-10GB"]["count"] += 1 size_buckets["5GB-10GB"]["total_size"] += size_gb + size_buckets["5GB-10GB"]["max_size"] = max(size_buckets["5GB-10GB"]["max_size"], size_gb) elif size_gb >= 1: size_buckets["1GB-5GB"]["count"] += 1 size_buckets["1GB-5GB"]["total_size"] += size_gb + size_buckets["1GB-5GB"]["max_size"] = max(size_buckets["1GB-5GB"]["max_size"], size_gb) else: size_buckets["<1GB"]["count"] += 1 size_buckets["<1GB"]["total_size"] += size_gb + size_buckets["<1GB"]["max_size"] = max(size_buckets["<1GB"]["max_size"], size_gb) very_small_shards += 1 # Calculate the average size for each bucket @@ -686,6 +692,123 @@ def get_shard_size_overview(self) -> Dict[str, Any]: "very_small_shards_percentage": very_small_percentage, } + def get_large_shards_details(self) -> List[Dict[str, Any]]: + """Get detailed information about large shards (>=50GB) including partition values""" + # Optimized query to fetch only large shards directly from database + query = """ + SELECT + s.schema_name, + s.table_name, + translate(p.values::text, ':{}', '=()') as partition_values, + s.id as shard_id, + s.size / 1024^3 as size_gb, + s."primary" as is_primary, + s.node['name'] as node_name, + s.node['id'] as node_id + FROM sys.shards s + LEFT JOIN information_schema.table_partitions p + ON s.table_name = p.table_name + AND s.schema_name = p.table_schema + AND s.partition_ident = p.partition_ident + WHERE s.state = 'STARTED' + AND s.size >= 50 * 1024^3 -- 50GB in bytes + ORDER BY s.size DESC + """ + + result = self.client.execute_query(query) + + large_shards = [] + for row in result.get("rows", []): + # Get zone information from our nodes data + node_id = row[7] + zone = next((node.zone for node in self.nodes if node.id == node_id), "unknown") + + large_shards.append( + { + "schema_name": row[0] or "doc", + "table_name": row[1], + "partition_values": row[2], + "shard_id": row[3], + "size_gb": float(row[4]) if row[4] else 0.0, + "is_primary": row[5] or False, + "node_name": row[6], + "zone": zone, + } + ) + + return large_shards + + def get_small_shards_details(self, limit: int = 10) -> List[Dict[str, Any]]: + """Get detailed information about the smallest shards, grouped by table/partition""" + # Query to get all shards, ordered by size ascending to get the smallest + query = """ + SELECT + s.schema_name, + s.table_name, + translate(p.values::text, ':{}', '=()') as partition_values, + s.id as shard_id, + s.size / 1024^3 as size_gb, + s."primary" as is_primary, + s.node['name'] as node_name, + s.node['id'] as node_id + FROM sys.shards s + LEFT JOIN information_schema.table_partitions p + ON s.table_name = p.table_name + AND s.schema_name = p.table_schema + AND s.partition_ident = p.partition_ident + WHERE s.state = 'STARTED' + ORDER BY s.size ASC + """ + + result = self.client.execute_query(query) + + # Group by table/partition to get aggregated stats + table_partition_stats: TableStatsType = {} + for row in result.get("rows", []): + # Get zone information from our nodes data + node_id = row[7] + + # FIXME: `zone` does not get used. + zone = next((node.zone for node in self.nodes if node.id == node_id), "unknown") # noqa: F841 + + # Create table key with schema + schema_name = row[0] or "doc" + table_name = row[1] + table_display = table_name + if schema_name and schema_name != "doc": + table_display = f"{schema_name}.{table_name}" + + # Create partition key + partition_key = row[2] or "N/A" + + # Create combined key + key = (table_display, partition_key) + + if key not in table_partition_stats: + table_partition_stats[key] = {"sizes": [], "primary_count": 0, "replica_count": 0, "total_size": 0.0} + + # Aggregate stats + stats = table_partition_stats[key] + size_gb = float(row[4]) if row[4] else 0.0 + stats["sizes"].append(size_gb) + stats["total_size"] += size_gb + if row[5]: # is_primary + stats["primary_count"] += 1 + else: + stats["replica_count"] += 1 + + # Sort by average size ascending (smallest first) and return top tables/partitions + sorted_stats: List[Dict[str, Any]] = [] + for (table_name, partition_key), stats in table_partition_stats.items(): + avg_size = sum(stats["sizes"]) / len(stats["sizes"]) if stats["sizes"] else 0 + sorted_stats.append( + {"table_name": table_name, "partition_key": partition_key, "stats": stats, "avg_size": avg_size} + ) + + # Sort by average size and take the top 'limit' entries + sorted_stats.sort(key=lambda x: x["avg_size"]) + return sorted_stats[:limit] + def get_cluster_overview(self) -> Dict[str, Any]: """Get a comprehensive overview of the cluster""" # Get cluster watermark settings @@ -1114,6 +1237,7 @@ def distribution(self, table: str = None): size_table.add_column("Count", justify="right", style="magenta") size_table.add_column("Percentage", justify="right", style="green") size_table.add_column("Avg Size", justify="right", style="blue") + size_table.add_column("Max Size", justify="right", style="red") size_table.add_column("Total Size", justify="right", style="yellow") total_shards = size_overview["total_shards"] @@ -1147,6 +1271,7 @@ def distribution(self, table: str = None): count_str, percentage_str, f"{avg_size:.2f}GB" if avg_size > 0 else "0GB", + f"{bucket_data['max_size']:.2f}GB" if bucket_data["max_size"] > 0 else "0GB", format_size(total_size), ) @@ -1177,6 +1302,159 @@ def distribution(self, table: str = None): for warning in warnings: console.print(warning) + # Show compact table/partition breakdown of large shards if any exist + if size_overview["large_shards_count"] > 0: + console.print() + large_shards_details = self.analyzer.get_large_shards_details() + + # Aggregate by table/partition + table_partition_stats: TableStatsType = {} + for shard in large_shards_details: + # Create table key with schema + table_display = shard["table_name"] + if shard["schema_name"] and shard["schema_name"] != "doc": + table_display = f"{shard['schema_name']}.{shard['table_name']}" + + # Create partition key + partition_key = shard["partition_values"] or "N/A" + + # Create combined key + key = (table_display, partition_key) + + if key not in table_partition_stats: + table_partition_stats[key] = { + "sizes": [], + "primary_count": 0, + "replica_count": 0, + "total_size": 0.0, + } + + # Aggregate stats + stats = table_partition_stats[key] + stats["sizes"].append(shard["size_gb"]) + stats["total_size"] += shard["size_gb"] + if shard["is_primary"]: + stats["primary_count"] += 1 + else: + stats["replica_count"] += 1 + + # Create compact table + large_shards_table = Table(title="Large Shards Breakdown by Table/Partition (>=50GB)", box=box.ROUNDED) + large_shards_table.add_column("Table", style="cyan") + large_shards_table.add_column("Partition", style="blue") + large_shards_table.add_column("Shards", justify="right", style="magenta") + large_shards_table.add_column("P/R", justify="center", style="yellow") + large_shards_table.add_column("Min Size", justify="right", style="green") + large_shards_table.add_column("Avg Size", justify="right", style="red") + large_shards_table.add_column("Max Size", justify="right", style="red") + large_shards_table.add_column("Total Size", justify="right", style="red") + + # Sort by total size descending (most problematic first) + sorted_stats = sorted(table_partition_stats.items(), key=lambda x: x[1]["total_size"], reverse=True) + + for (table_name, partition_key), stats in sorted_stats: + # Format partition display + partition_display = partition_key + if partition_display != "N/A" and len(partition_display) > 25: + partition_display = partition_display[:22] + "..." + + # Calculate size stats + sizes = stats["sizes"] + min_size = min(sizes) + avg_size = sum(sizes) / len(sizes) + max_size = max(sizes) + total_size = stats["total_size"] + total_shards = len(sizes) + + # Format primary/replica ratio + p_r_display = f"{stats['primary_count']}P/{stats['replica_count']}R" + + large_shards_table.add_row( + table_name, + partition_display, + str(total_shards), + p_r_display, + f"{min_size:.1f}GB", + f"{avg_size:.1f}GB", + f"{max_size:.1f}GB", + f"{total_size:.1f}GB", + ) + + console.print(large_shards_table) + + # Add summary stats + total_primary = sum(stats["primary_count"] for stats in table_partition_stats.values()) + total_replica = sum(stats["replica_count"] for stats in table_partition_stats.values()) + affected_table_partitions = len(table_partition_stats) + + console.print() + console.print( + f"[dim]๐Ÿ“Š Summary: {total_primary} primary, {total_replica} replica shards " + f"across {affected_table_partitions} table/partition(s)[/dim]" + ) + + # Show compact table/partition breakdown of smallest shards (top 10) + console.print() + small_shards_details = self.analyzer.get_small_shards_details(limit=10) + + if small_shards_details: + # Create compact table + small_shards_table = Table(title="Smallest Shards Breakdown by Table/Partition (Top 10)", box=box.ROUNDED) + small_shards_table.add_column("Table", style="cyan") + small_shards_table.add_column("Partition", style="blue") + small_shards_table.add_column("Shards", justify="right", style="magenta") + small_shards_table.add_column("P/R", justify="center", style="yellow") + small_shards_table.add_column("Min Size", justify="right", style="green") + small_shards_table.add_column("Avg Size", justify="right", style="red") + small_shards_table.add_column("Max Size", justify="right", style="red") + small_shards_table.add_column("Total Size", justify="right", style="red") + + for entry in small_shards_details: + table_name = entry["table_name"] + partition_key = entry["partition_key"] + stats = entry["stats"] + + # Format partition display + partition_display = partition_key + if partition_display != "N/A" and len(partition_display) > 25: + partition_display = partition_display[:22] + "..." + + # Calculate size stats + sizes = stats["sizes"] + min_size = min(sizes) + avg_size = sum(sizes) / len(sizes) + max_size = max(sizes) + total_size = stats["total_size"] + total_shards = len(sizes) + + # Format primary/replica ratio + p_r_display = f"{stats['primary_count']}P/{stats['replica_count']}R" + + small_shards_table.add_row( + table_name, + partition_display, + str(total_shards), + p_r_display, + f"{min_size:.1f}GB", + f"{avg_size:.1f}GB", + f"{max_size:.1f}GB", + f"{total_size:.1f}GB", + ) + + console.print(small_shards_table) + + # Add summary stats for smallest shards + total_small_primary = sum(entry["stats"]["primary_count"] for entry in small_shards_details) + total_small_replica = sum(entry["stats"]["replica_count"] for entry in small_shards_details) + small_table_partitions = len(small_shards_details) + + console.print() + console.print( + f"[dim]๐Ÿ“Š Summary: {total_small_primary} primary, " + f"{total_small_replica} replica shards across {small_table_partitions} table/partition(s) " + f"with smallest average sizes[/dim]" + ) + console.print() # Table-specific analysis if requested @@ -1184,16 +1462,16 @@ def distribution(self, table: str = None): console.print() console.print(Panel.fit(f"[bold blue]Analysis for table: {table}[/bold blue]")) - stats = self.analyzer.analyze_distribution(table) + distribution_stats = self.analyzer.analyze_distribution(table) table_summary = Table(title=f"Table {table} Distribution", box=box.ROUNDED) table_summary.add_column("Metric", style="cyan") table_summary.add_column("Value", style="magenta") - table_summary.add_row("Total Shards", str(stats.total_shards)) - table_summary.add_row("Total Size", format_size(stats.total_size_gb)) - table_summary.add_row("Zone Balance Score", f"{stats.zone_balance_score:.1f}/100") - table_summary.add_row("Node Balance Score", f"{stats.node_balance_score:.1f}/100") + table_summary.add_row("Total Shards", str(distribution_stats.total_shards)) + table_summary.add_row("Total Size", format_size(distribution_stats.total_size_gb)) + table_summary.add_row("Zone Balance Score", f"{distribution_stats.zone_balance_score:.1f}/100") + table_summary.add_row("Node Balance Score", f"{distribution_stats.node_balance_score:.1f}/100") console.print(table_summary) diff --git a/cratedb_toolkit/admin/xmover/model.py b/cratedb_toolkit/admin/xmover/model.py index 8dea5233..2f57129b 100644 --- a/cratedb_toolkit/admin/xmover/model.py +++ b/cratedb_toolkit/admin/xmover/model.py @@ -1,5 +1,7 @@ from dataclasses import dataclass -from typing import Dict, Optional +from typing import Any, Dict, Optional, Tuple + +TableStatsType = Dict[Tuple[str, str], Dict[str, Any]] @dataclass From bc2a66bb57fc45dc1f131b6575cae96f779c6b7c Mon Sep 17 00:00:00 2001 From: Walter Behmann Date: Fri, 19 Sep 2025 23:04:57 +0200 Subject: [PATCH 13/13] XMover/problematic-translogs: Remove allocation info from problematic shards query --- cratedb_toolkit/admin/xmover/util/database.py | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) diff --git a/cratedb_toolkit/admin/xmover/util/database.py b/cratedb_toolkit/admin/xmover/util/database.py index c02f323e..d1a0e14e 100644 --- a/cratedb_toolkit/admin/xmover/util/database.py +++ b/cratedb_toolkit/admin/xmover/util/database.py @@ -541,11 +541,8 @@ def get_problematic_shards( s.routing_state, s.node['name'] as node_name, s.node['id'] as node_id, - s."primary", - a.current_state, - a.explanation + s."primary" FROM sys.shards s - LEFT JOIN sys.allocations a ON (s.table_name = a.table_name AND s.id = a.shard_id) LEFT JOIN information_schema.table_partitions p ON s.table_name = p.table_name AND s.schema_name = p.table_schema @@ -569,8 +566,6 @@ def get_problematic_shards( "node_name": row[6], "node_id": row[7], "primary": row[8], - "current_state": row[9], - "explanation": row[10], } )