diff --git a/.env.example b/.env.example index 4820cb155..cc03c40cb 100644 --- a/.env.example +++ b/.env.example @@ -510,13 +510,21 @@ RETRY_JITTER_MAX=0.5 ##################################### # Logging verbosity level -# Options: DEBUG, INFO (default), WARNING, ERROR, CRITICAL +# Options: DEBUG, INFO, WARNING, ERROR (default), CRITICAL # DEBUG: Detailed diagnostic info (verbose) # INFO: General operational messages # WARNING: Warning messages for potential issues -# ERROR: Error messages for failures +# ERROR: Error messages for failures (recommended for production) # CRITICAL: Only critical failures -LOG_LEVEL=INFO +# PRODUCTION: Use ERROR to minimize I/O overhead and improve performance +LOG_LEVEL=ERROR + +# Disable access logging for performance +# Options: true, false (default) +# When true: Disables both gunicorn and uvicorn access logs +# PRODUCTION: Set to true for high-performance deployments +# Access logs create massive I/O overhead under high concurrency +# DISABLE_ACCESS_LOG=true # Log output format # Options: json (default), text diff --git a/docker-compose.yml b/docker-compose.yml index d2c7dc78a..333e48c41 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -76,6 +76,13 @@ services: # Uncomment to enable catalog - MCPGATEWAY_CATALOG_ENABLED=true - MCPGATEWAY_CATALOG_FILE=/app/mcp-catalog.yml + # Authentication configuration + - AUTH_REQUIRED=true + - MCP_CLIENT_AUTH_ENABLED=true + - TRUST_PROXY_AUTH=false + # Logging configuration + - LOG_LEVEL=ERROR # Default to ERROR for production performance + - DISABLE_ACCESS_LOG=true # Disable uvicorn access logs for performance (massive I/O overhead) # Phoenix Observability Integration (uncomment when using Phoenix) # - PHOENIX_ENDPOINT=${PHOENIX_ENDPOINT:-http://phoenix:6006} diff --git a/mcpgateway/admin.py b/mcpgateway/admin.py index 5f22df961..0da274759 100644 --- a/mcpgateway/admin.py +++ b/mcpgateway/admin.py @@ -9910,7 +9910,7 @@ async def admin_generate_support_bundle( LOGGER.info(f"Support bundle generation requested by user: {user}") # First-Party - from mcpgateway.services.support_bundle_service import SupportBundleConfig, SupportBundleService + from mcpgateway.services.support_bundle_service import SupportBundleConfig, SupportBundleService # pylint: disable=import-outside-toplevel # Create configuration config = SupportBundleConfig( diff --git a/mcpgateway/config.py b/mcpgateway/config.py index b5546edb7..f957869d4 100644 --- a/mcpgateway/config.py +++ b/mcpgateway/config.py @@ -626,7 +626,7 @@ def _parse_allowed_origins(cls, v): return set(v) # Logging - log_level: Literal["DEBUG", "INFO", "WARNING", "ERROR", "CRITICAL"] = Field(default="INFO", env="LOG_LEVEL") + log_level: Literal["DEBUG", "INFO", "WARNING", "ERROR", "CRITICAL"] = Field(default="ERROR", env="LOG_LEVEL") log_format: Literal["json", "text"] = "json" # json or text log_to_file: bool = False # Enable file logging (default: stdout/stderr only) log_filemode: str = "a+" # append or overwrite diff --git a/mcpgateway/main.py b/mcpgateway/main.py index 9b970e55e..3825948e1 100644 --- a/mcpgateway/main.py +++ b/mcpgateway/main.py @@ -3410,11 +3410,19 @@ async def handle_rpc(request: Request, db: Session = Depends(get_db), user=Depen request_id = params.get("requestId", None) if not uri: raise JSONRPCError(-32602, "Missing resource URI in parameters", params) - result = await resource_service.read_resource(db, uri, request_id=request_id, user=get_user_email(user)) - if hasattr(result, "model_dump"): - result = {"contents": [result.model_dump(by_alias=True, exclude_none=True)]} - else: - result = {"contents": [result]} + # Get user email for OAuth token selection + user_email = get_user_email(user) + try: + result = await resource_service.read_resource(db, uri, request_id=request_id, user=user_email) + if hasattr(result, "model_dump"): + result = {"contents": [result.model_dump(by_alias=True, exclude_none=True)]} + else: + result = {"contents": [result]} + except ValueError: + # Resource has no local content, forward to upstream MCP server + result = await gateway_service.forward_request(db, method, params, app_user_email=user_email) + if hasattr(result, "model_dump"): + result = result.model_dump(by_alias=True, exclude_none=True) elif method == "prompts/list": if server_id: prompts = await prompt_service.list_server_prompts(db, server_id, cursor=cursor) diff --git a/mcpgateway/middleware/token_scoping.py b/mcpgateway/middleware/token_scoping.py index 63c5ed8e3..16f7abde1 100644 --- a/mcpgateway/middleware/token_scoping.py +++ b/mcpgateway/middleware/token_scoping.py @@ -370,7 +370,10 @@ def _check_team_membership(self, payload: dict) -> bool: db = next(get_db()) try: - for team_id in teams: + for team in teams: + # Extract team ID from dict or use string directly (backward compatibility) + team_id = team["id"] if isinstance(team, dict) else team + membership = db.execute( select(EmailTeamMember).where(and_(EmailTeamMember.team_id == team_id, EmailTeamMember.user_email == user_email, EmailTeamMember.is_active)) ).scalar_one_or_none() @@ -383,7 +386,7 @@ def _check_team_membership(self, payload: dict) -> bool: finally: db.close() - def _check_resource_team_ownership(self, request_path: str, token_teams: list) -> bool: + def _check_resource_team_ownership(self, request_path: str, token_teams: list) -> bool: # pylint: disable=too-many-return-statements """ Check if the requested resource is accessible by the token. @@ -412,9 +415,16 @@ def _check_resource_team_ownership(self, request_path: str, token_teams: list) - Returns: bool: True if resource access is allowed, False otherwise """ + # Normalize token_teams: extract team IDs from dict objects (backward compatibility) + token_team_ids = [] + for team in token_teams: + if isinstance(team, dict): + token_team_ids.append(team["id"]) + else: + token_team_ids.append(team) # Determine token type - is_public_token = not token_teams or len(token_teams) == 0 + is_public_token = not token_team_ids or len(token_team_ids) == 0 if is_public_token: logger.debug("Processing request with PUBLIC-ONLY token") @@ -442,7 +452,7 @@ def _check_resource_team_ownership(self, request_path: str, token_teams: list) - # If no resource ID in path, allow (general endpoints like /health, /tokens, /metrics) if not resource_id or not resource_type: - logger.info(f"No resource ID found in path {request_path}, allowing access") + logger.debug(f"No resource ID found in path {request_path}, allowing access") return True # Import database models @@ -477,16 +487,16 @@ def _check_resource_team_ownership(self, request_path: str, token_teams: list) - # TEAM-SCOPED SERVERS: Check if server belongs to token's teams if server_visibility == "team": - if server.team_id in token_teams: + if server.team_id in token_team_ids: logger.debug(f"Access granted: Team server {resource_id} belongs to token's team {server.team_id}") return True - logger.warning(f"Access denied: Server {resource_id} is team-scoped to '{server.team_id}', " f"token is scoped to teams {token_teams}") + logger.warning(f"Access denied: Server {resource_id} is team-scoped to '{server.team_id}', " f"token is scoped to teams {token_team_ids}") return False # PRIVATE SERVERS: Check if server belongs to token's teams if server_visibility == "private": - if server.team_id in token_teams: + if server.team_id in token_team_ids: logger.debug(f"Access granted: Private server {resource_id} in token's team {server.team_id}") return True @@ -521,17 +531,17 @@ def _check_resource_team_ownership(self, request_path: str, token_teams: list) - # TEAM TOOLS: Check if tool's team matches token's teams if tool_visibility == "team": tool_team_id = getattr(tool, "team_id", None) - if tool_team_id and tool_team_id in token_teams: + if tool_team_id and tool_team_id in token_team_ids: logger.debug(f"Access granted: Team tool {resource_id} belongs to token's team {tool_team_id}") return True - logger.warning(f"Access denied: Tool {resource_id} is team-scoped to '{tool_team_id}', " f"token is scoped to teams {token_teams}") + logger.warning(f"Access denied: Tool {resource_id} is team-scoped to '{tool_team_id}', " f"token is scoped to teams {token_team_ids}") return False # PRIVATE TOOLS: Check if tool is in token's team context if tool_visibility in ["private", "user"]: tool_team_id = getattr(tool, "team_id", None) - if tool_team_id and tool_team_id in token_teams: + if tool_team_id and tool_team_id in token_team_ids: logger.debug(f"Access granted: Private tool {resource_id} in token's team {tool_team_id}") return True @@ -566,17 +576,17 @@ def _check_resource_team_ownership(self, request_path: str, token_teams: list) - # TEAM RESOURCES: Check if resource's team matches token's teams if resource_visibility == "team": resource_team_id = getattr(resource, "team_id", None) - if resource_team_id and resource_team_id in token_teams: + if resource_team_id and resource_team_id in token_team_ids: logger.debug(f"Access granted: Team resource {resource_id} belongs to token's team {resource_team_id}") return True - logger.warning(f"Access denied: Resource {resource_id} is team-scoped to '{resource_team_id}', " f"token is scoped to teams {token_teams}") + logger.warning(f"Access denied: Resource {resource_id} is team-scoped to '{resource_team_id}', " f"token is scoped to teams {token_team_ids}") return False # PRIVATE RESOURCES: Check if resource is in token's team context if resource_visibility in ["private", "user"]: resource_team_id = getattr(resource, "team_id", None) - if resource_team_id and resource_team_id in token_teams: + if resource_team_id and resource_team_id in token_team_ids: logger.debug(f"Access granted: Private resource {resource_id} in token's team {resource_team_id}") return True @@ -611,17 +621,17 @@ def _check_resource_team_ownership(self, request_path: str, token_teams: list) - # TEAM PROMPTS: Check if prompt's team matches token's teams if prompt_visibility == "team": prompt_team_id = getattr(prompt, "team_id", None) - if prompt_team_id and prompt_team_id in token_teams: + if prompt_team_id and prompt_team_id in token_team_ids: logger.debug(f"Access granted: Team prompt {resource_id} belongs to token's team {prompt_team_id}") return True - logger.warning(f"Access denied: Prompt {resource_id} is team-scoped to '{prompt_team_id}', " f"token is scoped to teams {token_teams}") + logger.warning(f"Access denied: Prompt {resource_id} is team-scoped to '{prompt_team_id}', " f"token is scoped to teams {token_team_ids}") return False # PRIVATE PROMPTS: Check if prompt is in token's team context if prompt_visibility in ["private", "user"]: prompt_team_id = getattr(prompt, "team_id", None) - if prompt_team_id and prompt_team_id in token_teams: + if prompt_team_id and prompt_team_id in token_team_ids: logger.debug(f"Access granted: Private prompt {resource_id} in token's team {prompt_team_id}") return True diff --git a/mcpgateway/services/logging_service.py b/mcpgateway/services/logging_service.py index c7532f2f7..b293eaec8 100644 --- a/mcpgateway/services/logging_service.py +++ b/mcpgateway/services/logging_service.py @@ -191,6 +191,9 @@ async def initialize(self) -> None: >>> service = LoggingService() >>> asyncio.run(service.initialize()) """ + # Update service log level from settings BEFORE configuring loggers + self._level = settings.log_level + root_logger = logging.getLogger() self._loggers[""] = root_logger diff --git a/mcpgateway/services/support_bundle_service.py b/mcpgateway/services/support_bundle_service.py index 5a95cfb92..e97d42571 100644 --- a/mcpgateway/services/support_bundle_service.py +++ b/mcpgateway/services/support_bundle_service.py @@ -244,7 +244,7 @@ def _collect_system_info(self) -> Dict[str, Any]: # Try to collect psutil metrics if available try: # Third-Party - import psutil + import psutil # pylint: disable=import-outside-toplevel info["system"] = { "cpu_count": psutil.cpu_count(logical=True), @@ -448,7 +448,7 @@ def generate_bundle(self, config: Optional[SupportBundleConfig] = None) -> Path: zf.writestr(f"logs/{log_name}", log_content) # Add README - readme = """# MCP Gateway Support Bundle + readme = f"""# MCP Gateway Support Bundle This bundle contains diagnostic information for troubleshooting MCP Gateway issues. @@ -478,12 +478,10 @@ def generate_bundle(self, config: Optional[SupportBundleConfig] = None) -> Path: Pay special attention to logs/ for error messages and stack traces. --- -Generated: {timestamp} -Hostname: {hostname} -Version: {version} -""".format( - timestamp=self.timestamp.isoformat(), hostname=self.hostname, version=__version__ - ) +Generated: {self.timestamp.isoformat()} +Hostname: {self.hostname} +Version: {__version__} +""" zf.writestr("README.md", readme) diff --git a/plugins/config.yaml b/plugins/config.yaml index b5e157f21..be4e53318 100644 --- a/plugins/config.yaml +++ b/plugins/config.yaml @@ -230,7 +230,7 @@ plugins: author: "Mihai Criveti" hooks: ["tool_pre_invoke", "tool_post_invoke"] tags: ["schema", "validation"] - mode: "enforce_ignore_error" + mode: "disabled" priority: 110 conditions: [] config: @@ -246,7 +246,7 @@ plugins: author: "Mihai Criveti" hooks: ["tool_pre_invoke", "tool_post_invoke"] tags: ["cache", "performance"] - mode: "permissive" + mode: "disabled" priority: 130 conditions: [] config: @@ -262,7 +262,7 @@ plugins: author: "Mihai Criveti" hooks: ["resource_pre_fetch"] tags: ["security", "url", "reputation"] - mode: "enforce" + mode: "disabled" priority: 60 conditions: [] config: @@ -278,7 +278,7 @@ plugins: author: "Mihai Criveti" hooks: ["resource_pre_fetch", "resource_post_fetch"] tags: ["security", "content", "mime"] - mode: "enforce" + mode: "disabled" priority: 65 conditions: [] config: @@ -293,7 +293,7 @@ plugins: author: "Mihai Criveti" hooks: ["tool_post_invoke", "resource_post_fetch"] tags: ["reliability", "retry"] - mode: "permissive" + mode: "disabled" priority: 170 conditions: [] config: @@ -310,7 +310,7 @@ plugins: author: "Mihai Criveti" hooks: ["prompt_post_fetch", "resource_post_fetch"] tags: ["markdown", "format"] - mode: "permissive" + mode: "disabled" priority: 140 conditions: [] config: {} @@ -323,7 +323,7 @@ plugins: author: "Mihai Criveti" hooks: ["tool_post_invoke"] tags: ["json", "repair"] - mode: "permissive" + mode: "disabled" priority: 145 conditions: [] config: {} @@ -336,7 +336,7 @@ plugins: author: "Mihai Criveti" hooks: ["resource_pre_fetch", "resource_post_fetch", "prompt_post_fetch", "tool_post_invoke"] tags: ["security", "threat"] - mode: "enforce" + mode: "disabled" priority: 61 conditions: [] config: @@ -383,7 +383,7 @@ plugins: author: "MCP Context Forge Team" hooks: ["tool_post_invoke"] tags: ["security", "code"] - mode: "enforce" + mode: "disabled" priority: 155 conditions: [] config: @@ -402,7 +402,7 @@ plugins: author: "MCP Context Forge Team" hooks: ["tool_post_invoke"] tags: ["guard", "length", "outputs", "truncate", "block"] - mode: "permissive" # use "enforce" with strategy: block for strict behavior + mode: "disabled" # use "enforce" with strategy: block for strict behavior priority: 160 # run after other transformers conditions: [] config: @@ -419,7 +419,7 @@ plugins: author: "MCP Context Forge Team" hooks: ["resource_post_fetch", "tool_post_invoke"] tags: ["summarize", "llm", "content"] - mode: "permissive" + mode: "disabled" priority: 170 conditions: [] config: @@ -478,7 +478,8 @@ plugins: author: "MCP Context Forge Team" hooks: ["tool_pre_invoke", "tool_post_invoke"] tags: ["reliability", "sre"] - mode: "enforce_ignore_error" + #mode: "enforce_ignore_error" + mode: "disabled" priority: 70 conditions: [] config: @@ -497,7 +498,8 @@ plugins: author: "MCP Context Forge Team" hooks: ["tool_pre_invoke", "tool_post_invoke"] tags: ["latency", "slo"] - mode: "enforce_ignore_error" + #mode: "enforce_ignore_error" + mode: "disabled" priority: 85 conditions: [] config: @@ -513,7 +515,7 @@ plugins: author: "MCP Context Forge Team" hooks: ["resource_pre_fetch", "resource_post_fetch"] tags: ["compliance", "robots", "license"] - mode: "enforce" + mode: "disabled" priority: 63 conditions: [] config: @@ -531,7 +533,7 @@ plugins: author: "MCP Context Forge Team" hooks: ["prompt_pre_fetch", "tool_post_invoke"] tags: ["safety", "moderation"] - mode: "enforce" + mode: "disabled" priority: 96 conditions: [] config: @@ -549,7 +551,8 @@ plugins: author: "MCP Context Forge Team" hooks: ["tool_pre_invoke", "tool_post_invoke"] tags: ["localization", "timezone"] - mode: "permissive" + #mode: "permissive" + mode: "disabled" priority: 175 conditions: [] config: @@ -585,7 +588,8 @@ plugins: author: "MCP Context Forge Team" hooks: ["prompt_pre_fetch", "tool_pre_invoke"] tags: ["security", "sql", "validation"] - mode: "enforce" + # mode: "enforce" + mode: "disabled" priority: 45 conditions: [] config: @@ -605,7 +609,8 @@ plugins: author: "MCP Context Forge Team" hooks: ["prompt_pre_fetch", "tool_post_invoke", "resource_post_fetch"] tags: ["security", "secrets", "dlp"] - mode: "enforce" + # mode: "enforce" + mode: "disabled" priority: 51 conditions: [] config: @@ -631,7 +636,8 @@ plugins: author: "MCP Context Forge Team" hooks: ["resource_pre_fetch"] tags: ["headers", "network", "enhancement"] - mode: "permissive" + # mode: "permissive" + mode: "disabled" priority: 58 conditions: [] config: @@ -647,7 +653,8 @@ plugins: author: "MCP Context Forge Team" hooks: ["prompt_post_fetch"] tags: ["compliance", "notice", "prompt"] - mode: "permissive" + # mode: "permissive" + mode: "disabled" priority: 90 conditions: [] config: @@ -663,7 +670,8 @@ plugins: author: "MCP Context Forge Team" hooks: ["tool_pre_invoke", "tool_post_invoke"] tags: ["performance", "cache", "similarity"] - mode: "permissive" + # mode: "permissive" + mode: "disabled" priority: 128 conditions: [] config: @@ -681,7 +689,8 @@ plugins: author: "MCP Context Forge Team" hooks: ["tool_post_invoke", "resource_post_fetch"] tags: ["format", "enhancement", "postprocess"] - mode: "permissive" + # mode: "permissive" + mode: "disabled" priority: 180 conditions: [] config: @@ -702,7 +711,8 @@ plugins: author: "MCP Context Forge Team" hooks: ["tool_post_invoke", "resource_post_fetch"] tags: ["compliance", "license", "format"] - mode: "permissive" + # mode: "permissive" + mode: "disabled" priority: 185 conditions: [] config: @@ -720,7 +730,8 @@ plugins: author: "MCP Context Forge Team" hooks: ["resource_post_fetch", "tool_post_invoke"] tags: ["citation", "links", "validation"] - mode: "permissive" + # mode: "permissive" + mode: "disabled" priority: 122 conditions: [] config: @@ -739,7 +750,8 @@ plugins: author: "Adrian Popa" hooks: ["tool_pre_invoke"] tags: ["security", "vault", "OAUTH2"] - mode: "permissive" + # mode: "permissive" + mode: "disabled" priority: 10 conditions: - prompts: [] @@ -759,7 +771,8 @@ plugins: author: "Manav Gupta" hooks: ["tool_pre_invoke", "tool_post_invoke", "prompt_post_fetch", "resource_post_fetch"] tags: ["notification", "webhook", "monitoring", "observability"] - mode: "permissive" + # mode: "permissive" + mode: "disabled" priority: 900 # Run after other plugins to capture their violations conditions: [] config: @@ -798,7 +811,8 @@ plugins: author: "Manav Gupta" hooks: ["prompt_pre_fetch", "tool_pre_invoke", "tool_post_invoke"] tags: ["safety", "moderation", "content", "ai", "ibm", "watson", "granite"] - mode: "permissive" # Use permissive mode for testing + # mode: "permissive" # Use permissive mode for testing + mode: "disabled" priority: 30 # Run early in the pipeline conditions: [] config: diff --git a/run-gunicorn.sh b/run-gunicorn.sh index 621dac045..61addbbb1 100755 --- a/run-gunicorn.sh +++ b/run-gunicorn.sh @@ -341,7 +341,19 @@ cmd=( --timeout "${GUNICORN_TIMEOUT}" --max-requests "${GUNICORN_MAX_REQUESTS}" --max-requests-jitter "${GUNICORN_MAX_REQUESTS_JITTER}" - --access-logfile - +) + +# Configure access logging based on DISABLE_ACCESS_LOG setting +# For performance testing, disable access logs which cause significant I/O overhead +DISABLE_ACCESS_LOG=${DISABLE_ACCESS_LOG:-false} +if [[ "${DISABLE_ACCESS_LOG}" == "true" ]]; then + cmd+=( --access-logfile /dev/null ) + echo "🚫 Access logging disabled for performance" +else + cmd+=( --access-logfile - ) +fi + +cmd+=( --error-logfile - --forwarded-allow-ips="*" --pid "${LOCK_FILE}" # Use lock file as PID file diff --git a/tests/performance/.gitignore b/tests/performance/.gitignore new file mode 100644 index 000000000..064dbb45d --- /dev/null +++ b/tests/performance/.gitignore @@ -0,0 +1,24 @@ +# Ignore test results +results/ +*.txt +*.csv +*.log + +# Ignore generated auth tokens +.auth_token + +# Ignore generated reports (but commit the directory) +reports/*.html +!reports/.gitkeep + +# Ignore generated docker-compose files +docker-compose.perf.yml +docker-compose.backup_*.yml +nginx.conf + +# Ignore baselines (user-specific, don't commit) +baselines/*.json +!baselines/.gitkeep + +# Keep directory structure +!results/.gitkeep diff --git a/tests/performance/MANUAL_TESTING.md b/tests/performance/MANUAL_TESTING.md new file mode 100644 index 000000000..7f6be9483 --- /dev/null +++ b/tests/performance/MANUAL_TESTING.md @@ -0,0 +1,458 @@ +# MCP Gateway API Manual Testing Guide + +Complete CLI testing examples for MCP Gateway API endpoints. + +## Prerequisites + +```bash +# Install required tools +# - curl (usually pre-installed) +# - jq (for JSON parsing) +# - hey (for load testing) + +# Install jq on Ubuntu/Debian +sudo apt-get install jq + +# Install hey +go install github.com/rakyll/hey@latest +# OR download from: https://github.com/rakyll/hey/releases +``` + +## Quick Start: Complete Test Script + +```bash +#!/bin/bash +# Save this as test_gateway.sh and run: bash test_gateway.sh + +echo "=== MCP Gateway API Tests ===" + +# 1. Health Check (no auth required) +echo -e "\n1. Health Check:" +curl -s http://localhost:4444/health | jq . + +# 2. Login and get token +echo -e "\n2. Login:" +export TOKEN=$(curl -s -X POST http://localhost:4444/auth/login \ + -H "Content-Type: application/json" \ + -d '{"email": "admin@example.com", "password": "changeme"}' \ + | jq -r '.access_token') +echo "Token: ${TOKEN:0:50}..." + +# 3. List tools +echo -e "\n3. List Tools (first 3):" +curl -s -X GET "http://localhost:4444/tools?limit=3" \ + -H "Authorization: Bearer $TOKEN" | jq '.[0:3] | .[] | {name, description, team}' + +# 4. List servers +echo -e "\n4. List Servers:" +curl -s -X GET "http://localhost:4444/servers?limit=3" \ + -H "Authorization: Bearer $TOKEN" | jq '.[] | {id, name, url}' + +# 5. List resources +echo -e "\n5. List Resources (first 3):" +curl -s -X GET "http://localhost:4444/resources?limit=3" \ + -H "Authorization: Bearer $TOKEN" | jq '.[0:3] | .[] | {name, uri}' + +echo -e "\n=== Tests Complete ===" +``` + +## Individual API Endpoint Tests + +### 1. Health Check (No Authentication) + +```bash +# Basic health check +curl -s http://localhost:4444/health | jq . + +# Expected output: +# { +# "status": "healthy", +# "timestamp": "2025-10-10T09:27:54.705729Z" +# } +``` + +### 2. Authentication - Get JWT Token + +```bash +# Login and get token +export TOKEN=$(curl -s -X POST http://localhost:4444/auth/login \ + -H "Content-Type: application/json" \ + -d '{"email": "admin@example.com", "password": "changeme"}' \ + | jq -r '.access_token') + +# Verify token was received +echo "Token: ${TOKEN:0:50}..." + +# Decode JWT to see payload (optional) +echo $TOKEN | cut -d. -f2 | base64 -d 2>/dev/null | jq . +``` + +### 3. List Tools (GET) + +```bash +# List all tools (limit 5) +curl -s -X GET "http://localhost:4444/tools?limit=5" \ + -H "Authorization: Bearer $TOKEN" | jq . + +# Get just tool names and descriptions +curl -s -X GET "http://localhost:4444/tools?limit=10" \ + -H "Authorization: Bearer $TOKEN" | \ + jq '.[] | {name, description, team, visibility}' + +# Count total tools +curl -s -X GET "http://localhost:4444/tools" \ + -H "Authorization: Bearer $TOKEN" | jq 'length' +``` + +### 4. List Servers + +```bash +# List all servers +curl -s -X GET "http://localhost:4444/servers" \ + -H "Authorization: Bearer $TOKEN" | jq . + +# Get server summary +curl -s -X GET "http://localhost:4444/servers" \ + -H "Authorization: Bearer $TOKEN" | \ + jq '.[] | {id, name, url, enabled, reachable}' +``` + +### 5. List Resources + +```bash +# List resources +curl -s -X GET "http://localhost:4444/resources?limit=5" \ + -H "Authorization: Bearer $TOKEN" | jq . + +# Get resource names and URIs +curl -s -X GET "http://localhost:4444/resources" \ + -H "Authorization: Bearer $TOKEN" | \ + jq '.[] | {name, uri, mimeType}' +``` + +### 6. List Prompts + +```bash +# List prompts +curl -s -X GET "http://localhost:4444/prompts?limit=5" \ + -H "Authorization: Bearer $TOKEN" | jq . + +# Get prompt names and descriptions +curl -s -X GET "http://localhost:4444/prompts" \ + -H "Authorization: Bearer $TOKEN" | \ + jq '.[] | {name, description}' +``` + +### 7. Get User Profile + +```bash +# Get current user info +curl -s -X GET "http://localhost:4444/auth/email/me" \ + -H "Authorization: Bearer $TOKEN" | jq . + +# Expected output: +# { +# "email": "admin@example.com", +# "full_name": "Platform Administrator", +# "is_admin": true, +# "auth_provider": "local", +# "created_at": "2025-10-10T09:23:25.943945Z" +# } +``` + +## Performance Testing with hey + +### Tools API Performance Test + +```bash +# Get token first +export TOKEN=$(curl -s -X POST http://localhost:4444/auth/login \ + -H "Content-Type: application/json" \ + -d '{"email": "admin@example.com", "password": "changeme"}' \ + | jq -r '.access_token') + +# Performance test: 1000 requests, 50 concurrent +hey -n 1000 -c 50 -m GET \ + -H "Authorization: Bearer $TOKEN" \ + "http://localhost:4444/tools?limit=10" + +# Expected results (with optimized logging): +# Summary: +# Total: 0.5-0.8 secs +# Slowest: 0.05 secs +# Fastest: 0.001 secs +# Average: 0.02 secs +# Requests/sec: 1500-2000 +# +# Status code distribution: +# [200] 1000 responses +``` + +### Health Check Performance Test + +```bash +# No authentication required - test raw performance +hey -n 5000 -c 100 -m GET \ + "http://localhost:4444/health" + +# Expected: 3000-5000 RPS (no DB queries) +``` + +### Multiple Endpoint Stress Test + +```bash +# Generate token +export TOKEN=$(curl -s -X POST http://localhost:4444/auth/login \ + -H "Content-Type: application/json" \ + -d '{"email": "admin@example.com", "password": "changeme"}' \ + | jq -r '.access_token') + +# Test multiple endpoints in parallel +echo "Testing /tools..." +hey -n 500 -c 25 -m GET \ + -H "Authorization: Bearer $TOKEN" \ + "http://localhost:4444/tools" & + +echo "Testing /servers..." +hey -n 500 -c 25 -m GET \ + -H "Authorization: Bearer $TOKEN" \ + "http://localhost:4444/servers" & + +echo "Testing /resources..." +hey -n 500 -c 25 -m GET \ + -H "Authorization: Bearer $TOKEN" \ + "http://localhost:4444/resources" & + +# Wait for all tests to complete +wait + +echo "All performance tests complete!" +``` + +## Benchmarking Script + +Create a comprehensive benchmark script: + +```bash +#!/bin/bash +# Save as benchmark.sh + +echo "=== MCP Gateway Performance Benchmark ===" +echo "Starting at $(date)" + +# Get token +export TOKEN=$(curl -s -X POST http://localhost:4444/auth/login \ + -H "Content-Type: application/json" \ + -d '{"email": "admin@example.com", "password": "changeme"}' \ + | jq -r '.access_token') + +# Test 1: Health endpoint (no auth) +echo -e "\n1. Health Check (5000 req, 100 concurrent):" +hey -n 5000 -c 100 -m GET \ + "http://localhost:4444/health" | \ + grep -E "Requests/sec:|Total:|Status code" + +# Test 2: Tools endpoint +echo -e "\n2. Tools API (1000 req, 50 concurrent):" +hey -n 1000 -c 50 -m GET \ + -H "Authorization: Bearer $TOKEN" \ + "http://localhost:4444/tools?limit=10" | \ + grep -E "Requests/sec:|Total:|Status code" + +# Test 3: Servers endpoint +echo -e "\n3. Servers API (1000 req, 50 concurrent):" +hey -n 1000 -c 50 -m GET \ + -H "Authorization: Bearer $TOKEN" \ + "http://localhost:4444/servers" | \ + grep -E "Requests/sec:|Total:|Status code" + +echo -e "\n=== Benchmark Complete ===" +echo "Finished at $(date)" +``` + +## Expected Performance Results + +With optimized logging settings (`LOG_LEVEL=ERROR`, `DISABLE_ACCESS_LOG=true`): + +| Endpoint | Requests/sec | P50 Latency | P99 Latency | +|----------|-------------|-------------|-------------| +| /health | 3000-5000 | <5ms | <20ms | +| /tools | 1500-2000 | <25ms | <50ms | +| /servers | 1500-2000 | <25ms | <50ms | +| /resources | 1200-1800 | <30ms | <60ms | + +**Note**: Actual performance depends on: +- Hardware specs +- Database configuration (SQLite vs PostgreSQL) +- Number of tools/servers/resources +- LOG_LEVEL setting (ERROR is fastest) +- DISABLE_ACCESS_LOG setting + +## Troubleshooting + +### Token Expiration + +```bash +# Tokens expire after 7 days by default +# If you get "Invalid authentication credentials", regenerate token: +export TOKEN=$(curl -s -X POST http://localhost:4444/auth/login \ + -H "Content-Type: application/json" \ + -d '{"email": "admin@example.com", "password": "changeme"}' \ + | jq -r '.access_token') +``` + +### Check Token Validity + +```bash +# Decode token to check expiration +echo $TOKEN | cut -d. -f2 | base64 -d 2>/dev/null | jq '.exp' + +# Compare with current time +echo "Current time: $(date +%s)" +echo "Token expires: $(echo $TOKEN | cut -d. -f2 | base64 -d 2>/dev/null | jq -r '.exp')" +``` + +### View Detailed API Response + +```bash +# Get full response with headers +curl -v -X GET "http://localhost:4444/tools?limit=1" \ + -H "Authorization: Bearer $TOKEN" + +# Or use -i for just headers +curl -i -X GET "http://localhost:4444/health" +``` + +## Advanced: Automated Testing + +Create a continuous test script that runs every 5 seconds: + +```bash +#!/bin/bash +# Save as continuous_test.sh + +while true; do + clear + echo "=== MCP Gateway Health Check ===" + echo "Timestamp: $(date)" + + # Get token + TOKEN=$(curl -s -X POST http://localhost:4444/auth/login \ + -H "Content-Type: application/json" \ + -d '{"email": "admin@example.com", "password": "changeme"}' \ + | jq -r '.access_token') + + # Test endpoints + echo -e "\nHealth: $(curl -s http://localhost:4444/health | jq -r '.status')" + echo "Tools: $(curl -s -X GET http://localhost:4444/tools -H "Authorization: Bearer $TOKEN" | jq 'length') available" + echo "Servers: $(curl -s -X GET http://localhost:4444/servers -H "Authorization: Bearer $TOKEN" | jq 'length') registered" + + echo -e "\nPress Ctrl+C to stop" + sleep 5 +done +``` + +Run with: `bash continuous_test.sh` + +## Integration with Automated Tests + +These manual tests complement the automated test suites: + +```bash +# Run automated tests +make test # Unit and integration tests +make smoketest # End-to-end Docker tests + +# Run performance tests +cd tests/performance +./run-configurable.sh # Configurable performance suite +./run-advanced.sh # Advanced multi-profile tests +``` + +## Common Testing Scenarios + +### Scenario 1: Verify Fix After Deployment + +```bash +#!/bin/bash +# Quick smoke test after deployment + +TOKEN=$(curl -s -X POST http://localhost:4444/auth/login \ + -H "Content-Type: application/json" \ + -d '{"email": "admin@example.com", "password": "changeme"}' \ + | jq -r '.access_token') + +# Test critical endpoints +HEALTH=$(curl -s http://localhost:4444/health | jq -r '.status') +TOOLS=$(curl -s -X GET http://localhost:4444/tools -H "Authorization: Bearer $TOKEN" | jq 'length') + +if [ "$HEALTH" = "healthy" ] && [ "$TOOLS" -ge 0 ] 2>/dev/null; then + echo "āœ… Deployment verified successfully" + exit 0 +else + echo "āŒ Deployment verification failed" + exit 1 +fi +``` + +### Scenario 2: Load Test Before Release + +```bash +#!/bin/bash +# Pre-release load test + +TOKEN=$(curl -s -X POST http://localhost:4444/auth/login \ + -H "Content-Type: application/json" \ + -d '{"email": "admin@example.com", "password": "changeme"}' \ + | jq -r '.access_token') + +echo "Running load test..." +hey -n 10000 -c 100 -m GET \ + -H "Authorization: Bearer $TOKEN" \ + "http://localhost:4444/tools?limit=10" > /tmp/load_test_results.txt + +# Check if 99% of requests succeeded +SUCCESS_RATE=$(grep "200" /tmp/load_test_results.txt | grep -oP '\d+(?= responses)' || echo "0") + +if [ "$SUCCESS_RATE" -ge 9900 ]; then + echo "āœ… Load test passed (${SUCCESS_RATE}/10000 succeeded)" + exit 0 +else + echo "āŒ Load test failed (only ${SUCCESS_RATE}/10000 succeeded)" + exit 1 +fi +``` + +### Scenario 3: API Response Time Monitoring + +```bash +#!/bin/bash +# Monitor API response times + +TOKEN=$(curl -s -X POST http://localhost:4444/auth/login \ + -H "Content-Type: application/json" \ + -d '{"email": "admin@example.com", "password": "changeme"}' \ + | jq -r '.access_token') + +# Measure response time +START=$(date +%s%3N) +curl -s -X GET "http://localhost:4444/tools" \ + -H "Authorization: Bearer $TOKEN" > /dev/null +END=$(date +%s%3N) + +RESPONSE_TIME=$((END - START)) + +echo "Tools API response time: ${RESPONSE_TIME}ms" + +if [ "$RESPONSE_TIME" -lt 100 ]; then + echo "āœ… Response time acceptable" +else + echo "āš ļø Response time slower than expected" +fi +``` + +## See Also + +- [Automated Performance Tests](./README.md) - Comprehensive automated test suite +- [Quick Start Guide](./QUICK_START.md) - Get started with performance testing +- [Main README](../../README.md) - Full project documentation diff --git a/tests/performance/Makefile b/tests/performance/Makefile new file mode 100644 index 000000000..9be1ca8ad --- /dev/null +++ b/tests/performance/Makefile @@ -0,0 +1,271 @@ +# MCP Gateway Performance Testing Makefile +# Simple entrypoint for all performance testing operations + +.PHONY: help install check test quick heavy baseline compare clean list + +# Default target +help: + @echo "MCP Gateway Performance Testing" + @echo "================================" + @echo "" + @echo "Quick Start:" + @echo " make install - Install dependencies (hey)" + @echo " make test - Run standard performance tests" + @echo " make quick - Quick smoke test (100 requests)" + @echo " make heavy - Heavy load test (50K requests)" + @echo "" + @echo "Advanced Testing:" + @echo " make test-optimized - Test with optimized server profile" + @echo " make test-production - Test production infrastructure" + @echo " make test-scaling - Test with 4 instances" + @echo " make compare-postgres - Compare PostgreSQL 15 vs 17" + @echo "" + @echo "Comprehensive Tests:" + @echo " make test-database - Database connection pool tests" + @echo " make test-gateway-core - Gateway core functionality tests" + @echo " make test-all-scenarios - Run all test scenarios" + @echo "" + @echo "Baseline Management:" + @echo " make baseline - Save current as baseline" + @echo " make save-baseline - Save existing results as baseline" + @echo " make compare - Compare with baseline" + @echo " make list-baselines - List saved baselines" + @echo "" + @echo "Utilities:" + @echo " make list-profiles - List all available profiles" + @echo " make check - Check service health" + @echo " make clean - Clean test result files" + @echo " make clean-results - Remove all result directories" + @echo " make clean-all - Deep clean (results + baselines + reports)" + @echo "" + @echo "Documentation:" + @echo " make docs - Open main documentation" + @echo "" + +# Installation +install: + @echo "Installing performance testing dependencies..." + @command -v hey >/dev/null 2>&1 || (echo "Installing hey..." && go install github.com/rakyll/hey@latest) + @command -v python3 >/dev/null 2>&1 || (echo "Python 3 required but not found" && exit 1) + @pip install pyyaml >/dev/null 2>&1 || echo "Installing pyyaml..." && pip install pyyaml + @echo "āœ… Dependencies installed" + +# Health check +check: + @./utils/check-services.sh + +# Basic Tests +test: + @echo "Running standard performance tests (medium profile)..." + @timeout 600 ./run-advanced.sh -p medium + +quick: + @echo "Running quick smoke test..." + @./run-advanced.sh -p smoke --skip-report + +heavy: + @echo "Running heavy load test..." + @timeout 1200 ./run-advanced.sh -p heavy + +# Server Profile Tests +test-minimal: + @./run-advanced.sh -p medium --server-profile minimal + +test-optimized: + @./run-advanced.sh -p medium --server-profile optimized + +test-memory: + @./run-advanced.sh -p medium --server-profile memory_optimized + +test-io: + @./run-advanced.sh -p medium --server-profile io_optimized + +# Infrastructure Tests +test-development: + @./run-advanced.sh -p medium --infrastructure development + +test-staging: + @./run-advanced.sh -p heavy --infrastructure staging + +test-production: + @timeout 1200 ./run-advanced.sh -p heavy --infrastructure production + +# New comprehensive tests +test-database: + @echo "Running database connection pool tests..." + @./scenarios/database-benchmark.sh + +test-gateway-core: + @echo "Running gateway core functionality tests..." + @./scenarios/gateway-core-benchmark.sh + +test-all-scenarios: + @echo "Running all test scenarios..." + @./scenarios/tools-benchmark.sh + @./scenarios/resources-benchmark.sh + @./scenarios/prompts-benchmark.sh + @./scenarios/gateway-core-benchmark.sh + @./scenarios/database-benchmark.sh + +test-ha: + @./run-advanced.sh -p heavy --infrastructure production_ha + +# Scaling Tests +test-scaling: + @echo "Testing with 4 gateway instances..." + @./run-advanced.sh -p heavy --instances 4 + +test-single: + @./run-advanced.sh -p heavy --instances 1 + +# Database Comparison +compare-postgres: + @echo "Comparing PostgreSQL versions..." + @./run-advanced.sh -p medium --postgres-version 15-alpine --save-baseline pg15_comparison.json + @./run-advanced.sh -p medium --postgres-version 17-alpine --compare-with pg15_comparison.json + +test-pg15: + @./run-advanced.sh -p medium --postgres-version 15-alpine + +test-pg16: + @./run-advanced.sh -p medium --postgres-version 16-alpine + +test-pg17: + @./run-advanced.sh -p medium --postgres-version 17-alpine + +# Baseline Management +baseline: + @echo "Saving current results as baseline..." + @./run-advanced.sh -p medium --save-baseline current_baseline_$$(date +%Y%m%d).json + +baseline-production: + @./run-advanced.sh -p heavy --infrastructure production --save-baseline production_baseline.json + +compare: + @if [ ! -f baselines/production_baseline.json ]; then \ + echo "āŒ No production baseline found. Run 'make baseline-production' first."; \ + exit 1; \ + fi + @./run-advanced.sh -p heavy --infrastructure production --compare-with production_baseline.json + +compare-with: +ifndef BASELINE + @echo "Usage: make compare-with BASELINE=filename.json" + @exit 1 +endif + @./run-advanced.sh -p medium --compare-with $(BASELINE) + +list-baselines: + @./utils/baseline_manager.py list + +save-baseline: +ifndef BASELINE + @echo "Usage: make save-baseline BASELINE=name RESULTS=results_dir [PROFILE=profile] [SERVER_PROFILE=profile]" + @echo "Example: make save-baseline BASELINE=optimized-4instance RESULTS=/tmp/sample_results2 SERVER_PROFILE=optimized" + @exit 1 +endif +ifndef RESULTS + @echo "Usage: make save-baseline BASELINE=name RESULTS=results_dir [PROFILE=profile] [SERVER_PROFILE=profile]" + @echo "Example: make save-baseline BASELINE=optimized-4instance RESULTS=/tmp/sample_results2 SERVER_PROFILE=optimized" + @exit 1 +endif + @echo "Saving baseline '$(BASELINE)' from $(RESULTS)..." + @mkdir -p baselines + @python3 utils/baseline_manager.py save $(RESULTS) --output baselines/$(BASELINE).json $(if $(PROFILE),--profile $(PROFILE)) $(if $(SERVER_PROFILE),--server-profile $(SERVER_PROFILE)) $(if $(INFRASTRUCTURE),--infrastructure $(INFRASTRUCTURE)) + @echo "āœ… Baseline saved to baselines/$(BASELINE).json" + +# Profile Management +list-profiles: + @echo "" + @echo "=== Load Profiles ===" + @python3 -c "import yaml; c=yaml.safe_load(open('config.yaml')); [print(f' {k:12} - {v.get(\"description\",\"\")}') for k,v in c.get('profiles',{}).items()]" + @echo "" + @echo "=== Server Profiles ===" + @python3 -c "import yaml; c=yaml.safe_load(open('config.yaml')); [print(f' {k:20} - {v.get(\"description\",\"\")}') for k,v in c.get('server_profiles',{}).items()]" + @echo "" + @echo "=== Infrastructure Profiles ===" + @python3 -c "import yaml; c=yaml.safe_load(open('config.yaml')); [print(f' {k:20} - {v.get(\"description\",\"\")}') for k,v in c.get('infrastructure_profiles',{}).items()]" + @echo "" + +list-server-profiles: + @./run-advanced.sh --list-server-profiles + +list-infrastructure: + @./run-advanced.sh --list-infrastructure + +# Utilities +clean: + @echo "Cleaning test results..." + @find results/ -name "*.txt" -o -name "*.csv" -o -name "*.log" 2>/dev/null | xargs rm -f || true + @rm -f docker-compose.perf.yml docker-compose.backup_*.yml nginx.conf 2>/dev/null || true + @echo "āœ… Clean complete" + +clean-results: + @echo "Removing all test result directories..." + @find results/ -mindepth 1 -maxdepth 1 -type d -exec rm -rf {} + 2>/dev/null || true + @echo "āœ… All results cleaned" + +clean-all: clean-results + @echo "Cleaning baselines and reports..." + @rm -rf baselines/*.json reports/*.html 2>/dev/null || true + @echo "āœ… Deep clean complete" + +# Documentation +docs: + @echo "Opening main documentation..." + @echo "" + @echo "šŸ“š Available Documentation:" + @echo " README.md - Main overview" + @echo " QUICK_REFERENCE.md - Command cheat sheet" + @echo " SERVER_PROFILES_GUIDE.md - Server profile details" + @echo " PERFORMANCE_STRATEGY.md - Complete strategy" + @echo " README_AUTOMATION.md - Automation guide" + @echo " IMPLEMENTATION_STATUS.md - Implementation status" + @echo "" + +# Generate report from existing results +report: +ifndef RESULTS_DIR + @echo "Usage: make report RESULTS_DIR=results/medium_20241009_123456" + @exit 1 +endif + @python3 utils/report_generator.py --results-dir $(RESULTS_DIR) --config config.yaml + +# Development helpers +dev-test: + @./run-advanced.sh -p smoke --skip-monitoring --no-restore + +watch-logs: + @docker-compose logs -f gateway + +# Complete workflow examples +workflow-optimize: + @echo "šŸ” Optimization Workflow" + @echo "1. Baseline with standard config..." + @./run-advanced.sh -p medium --save-baseline standard_baseline.json + @echo "" + @echo "2. Test with optimized config..." + @./run-advanced.sh -p medium --server-profile optimized --compare-with standard_baseline.json + @echo "" + @echo "āœ… Review comparison report to decide if optimization is worth it" + +workflow-upgrade: + @echo "šŸ” PostgreSQL Upgrade Workflow" + @echo "1. Baseline with PG 15..." + @./run-advanced.sh -p medium --postgres-version 15-alpine --save-baseline pg15_pre_upgrade.json + @echo "" + @echo "2. Test with PG 17..." + @./run-advanced.sh -p medium --postgres-version 17-alpine --compare-with pg15_pre_upgrade.json + @echo "" + @echo "āœ… Review comparison report to evaluate upgrade impact" + +workflow-capacity: + @echo "šŸ” Capacity Planning Workflow" + @echo "Testing with different instance counts..." + @./run-advanced.sh -p heavy --instances 1 --save-baseline capacity_1x.json + @./run-advanced.sh -p heavy --instances 2 --save-baseline capacity_2x.json + @./run-advanced.sh -p heavy --instances 4 --save-baseline capacity_4x.json + @echo "" + @echo "āœ… Review baselines to determine optimal instance count" + +.DEFAULT_GOAL := help diff --git a/tests/performance/PERFORMANCE_STRATEGY.md b/tests/performance/PERFORMANCE_STRATEGY.md new file mode 100644 index 000000000..da1c23f7f --- /dev/null +++ b/tests/performance/PERFORMANCE_STRATEGY.md @@ -0,0 +1,2116 @@ +# Performance Testing Strategy + +**Version:** 1.0 +**Last Updated:** 2025-10-09 +**Status:** Active + +## Table of Contents + +1. [Overview](#overview) +2. [Testing Phases](#testing-phases) +3. [Testing Methodology](#testing-methodology) +4. [Monitoring & Observability](#monitoring--observability) +5. [Profiling & Analysis](#profiling--analysis) +6. [Database Performance](#database-performance) +7. [Bottleneck Identification](#bottleneck-identification) +8. [Continuous Performance Testing](#continuous-performance-testing) +9. [Performance Baselines & SLOs](#performance-baselines--slos) +10. [Tooling & Infrastructure](#tooling--infrastructure) +11. [Reporting & Visualization](#reporting--visualization) + +--- + +## Overview + +This document defines a comprehensive, multi-layered performance testing strategy for the MCP Gateway ecosystem. The goal is to identify performance bottlenecks, establish baselines, and ensure the system meets service level objectives (SLOs) under various load conditions. + +### Objectives + +- **Establish baselines** for individual components and the integrated system +- **Identify bottlenecks** at all layers (application, database, network) +- **Monitor resource utilization** during load testing +- **Profile code paths** to find hot spots +- **Optimize database** queries and connection pooling +- **Validate scalability** under increasing load +- **Track performance regression** over time + +### Key Principles + +1. **Test in isolation first** - Validate individual components before integration +2. **Monitor everything** - Collect metrics at all layers during tests +3. **Profile before optimizing** - Use data to drive optimization decisions +4. **Automate testing** - Make performance testing part of CI/CD +5. **Track trends** - Compare results over time to detect regressions + +--- + +## Testing Phases + +### Phase 1: Individual Component Testing + +Test each component in isolation to establish baseline performance. + +#### 1.1 MCP Server Testing (Standalone) + +**Objective:** Measure MCP server performance without gateway overhead. + +**Test Targets:** +- `fast-time-server` (Go-based MCP server) +- Other MCP servers (mcp-server-git, etc.) + +**Metrics to Collect:** +- Tool invocation latency (p50, p95, p99) +- Resource read latency +- Prompt execution latency +- Throughput (requests/second) +- Memory usage +- CPU utilization +- Error rate + +**Test Scenarios:** +```bash +# Direct SSE connection to MCP server +# Test tools/list performance +hey -n 10000 -c 50 -m POST \ + -T "application/json" \ + -D payloads/tools/list_tools.json \ + http://localhost:8888/sse + +# Test individual tool invocation +hey -n 5000 -c 25 -m POST \ + -T "application/json" \ + -D payloads/tools/get_system_time.json \ + http://localhost:8888/sse +``` + +**Success Criteria:** +- Tool listing: <10ms p95 +- Simple tool invocation: <20ms p95 +- Complex tool invocation: <50ms p95 +- Zero errors under normal load + +#### 1.2 Gateway Core Testing (No MCP Servers) + +**Objective:** Measure gateway overhead without MCP server interactions. + +**Test Targets:** +- Health endpoints +- Authentication +- Routing logic +- Admin UI + +**Metrics to Collect:** +- Health check latency +- Authentication overhead +- Routing decision time +- Memory footprint +- Database query count + +**Test Scenarios:** +```bash +# Health endpoint performance +hey -n 100000 -c 100 /health + +# Authentication overhead +hey -n 10000 -c 50 \ + -H "Authorization: Bearer $TOKEN" \ + /health +``` + +**Success Criteria:** +- Health check: <5ms p95 +- Authenticated request: <10ms p95 +- Memory stable under sustained load + +#### 1.3 Database Layer Testing + +**Objective:** Validate database performance in isolation. + +**Test Targets:** +- SQLite (default) +- PostgreSQL (production) + +**Tests:** +- Connection pool saturation +- Query performance +- Index effectiveness +- Write throughput +- Read throughput +- Transaction overhead + +See [Database Performance](#database-performance) section for details. + +--- + +### Phase 2: Integrated Gateway Testing + +Test the complete gateway with registered MCP servers. + +#### 2.1 Gateway + Single MCP Server + +**Objective:** Measure gateway overhead when proxying to one MCP server. + +**Setup:** +1. Start fast-time-server +2. Register as gateway peer +3. Create virtual server +4. Run load tests through gateway + +**Metrics to Collect:** +- End-to-end latency (client → gateway → MCP server → client) +- Gateway overhead (total latency - MCP server latency) +- Connection pooling efficiency +- SSE/WebSocket performance +- Request queuing delays + +**Test Scenarios:** +```bash +# Tools through gateway +./scenarios/tools-benchmark.sh -p heavy + +# Resources through gateway +./scenarios/resources-benchmark.sh -p heavy + +# Prompts through gateway +./scenarios/prompts-benchmark.sh -p heavy +``` + +**Success Criteria:** +- Gateway overhead: <15ms p95 +- End-to-end tool invocation: <30ms p95 +- No connection pool exhaustion +- Zero request drops + +#### 2.2 Gateway + Multiple MCP Servers + +**Objective:** Test gateway performance with multiple registered servers. + +**Setup:** +1. Register 5-10 different MCP servers +2. Create multiple virtual servers +3. Run concurrent workloads across servers + +**Metrics to Collect:** +- Per-server latency +- Server selection overhead +- Resource contention +- Database query count +- Cache hit rate + +**Test Scenarios:** +```bash +# Mixed workload across multiple servers +./scenarios/mixed-workload.sh -p heavy + +# Concurrent virtual server access +./scenarios/multi-server-benchmark.sh +``` + +**Success Criteria:** +- No degradation with up to 10 servers +- Fair resource allocation across servers +- Cache hit rate >80% + +#### 2.3 Gateway Federation Testing + +**Objective:** Test performance when federating across multiple gateway instances. + +**Setup:** +1. Start 3 gateway instances +2. Configure federation (Redis) +3. Register servers on different gateways +4. Test cross-gateway tool invocation + +**Metrics to Collect:** +- Federation discovery latency +- Cross-gateway routing overhead +- Redis performance +- mDNS discovery time +- Network latency between gateways + +--- + +### Phase 3: Stress & Capacity Testing + +Push the system to its limits to find breaking points. + +#### 3.1 Load Ramp Testing + +**Objective:** Find the maximum sustainable load. + +**Method:** +- Start with light load (10 concurrent users) +- Gradually increase to heavy load (500+ concurrent users) +- Identify point where latency/errors spike + +**Tools:** +```bash +# Gradual ramp +for concurrency in 10 50 100 200 500 1000; do + hey -n 10000 -c $concurrency -m POST \ + -T "application/json" \ + -D payloads/tools/list_tools.json \ + http://localhost:4444/rpc + sleep 10 +done +``` + +#### 3.2 Sustained Load Testing + +**Objective:** Verify stability under sustained load. + +**Duration:** 1-4 hours + +**Metrics:** +- Memory leak detection +- Connection leak detection +- CPU degradation over time +- Database bloat + +**Tools:** +```bash +# Run for 1 hour +hey -z 1h -c 50 -q 100 -m POST \ + -T "application/json" \ + -D payloads/tools/list_tools.json \ + http://localhost:4444/rpc +``` + +#### 3.3 Spike Testing + +**Objective:** Test system resilience to sudden load spikes. + +**Method:** +- Run normal load (50 concurrent) +- Inject spike (500 concurrent for 30s) +- Return to normal load +- Measure recovery time + +--- + +## Testing Methodology + +### Load Testing Tools + +**Primary:** `hey` (HTTP load testing) +- Fast, concurrent request generation +- Detailed latency histograms +- Easy to script and automate + +**Alternative:** `locust` (Python-based) +- More complex scenarios +- Web UI for monitoring +- Custom user behaviors + +**Alternative:** `k6` (JavaScript-based) +- Sophisticated scenarios +- Built-in metrics collection +- Cloud integration + +### Test Data + +**Payloads:** +- Store in `payloads/` directory +- Use realistic data sizes +- Include edge cases (large inputs, unicode, etc.) + +**Randomization:** +- Vary request parameters +- Randomize timezones, times, etc. +- Avoid cache bias + +### Test Execution + +**Environment:** +- Consistent hardware (document specs) +- Isolated network (minimize noise) +- Fresh database state +- Cleared caches + +**Process:** +1. Warm up (100 requests, discard results) +2. Run actual test +3. Cool down period +4. Collect metrics +5. Reset state + +--- + +## Monitoring & Observability + +### System Metrics Collection + +#### 4.1 Host Metrics + +**CPU:** +```bash +# Monitor during tests +vmstat 1 + +# Average CPU usage +sar -u 1 60 +``` + +**Memory:** +```bash +# Real-time monitoring +watch -n 1 free -h + +# Detailed memory stats +cat /proc/meminfo +``` + +**Disk I/O:** +```bash +# I/O statistics +iostat -x 1 + +# Disk usage +df -h +watch -n 1 du -sh /path/to/db +``` + +**Network:** +```bash +# Network throughput +iftop + +# Connection states +ss -s +netstat -an | awk '/tcp/ {print $6}' | sort | uniq -c +``` + +#### 4.2 Application Metrics + +**Prometheus Metrics:** +```bash +# Enable in .env +MCPGATEWAY_ENABLE_PROMETHEUS=true + +# Scrape during tests +curl http://localhost:4444/metrics > metrics_before.txt +# Run test +curl http://localhost:4444/metrics > metrics_after.txt +# Diff and analyze +``` + +**Key Metrics:** +- `http_requests_total` - Total requests +- `http_request_duration_seconds` - Latency histogram +- `http_requests_in_flight` - Concurrent requests +- `database_connections_active` - Active DB connections +- `database_connections_idle` - Idle DB connections +- `cache_hits_total` / `cache_misses_total` - Cache efficiency + +#### 4.3 Database Metrics + +**PostgreSQL:** +```sql +-- Connection stats +SELECT * FROM pg_stat_activity; + +-- Query performance +SELECT query, calls, total_time, mean_time +FROM pg_stat_statements +ORDER BY mean_time DESC +LIMIT 20; + +-- Lock contention +SELECT * FROM pg_locks; + +-- Cache hit ratio +SELECT + sum(heap_blks_read) as heap_read, + sum(heap_blks_hit) as heap_hit, + sum(heap_blks_hit) / (sum(heap_blks_hit) + sum(heap_blks_read)) as ratio +FROM pg_statio_user_tables; +``` + +**SQLite:** +```bash +# Enable query logging +sqlite3 mcp.db ".log stdout" +sqlite3 mcp.db ".stats on" + +# Analyze queries +sqlite3 mcp.db "EXPLAIN QUERY PLAN SELECT ..." +``` + +#### 4.4 Container Metrics (Docker) + +```bash +# Real-time stats +docker stats + +# Continuous monitoring during test +docker stats --no-stream --format \ + "table {{.Container}}\t{{.CPUPerc}}\t{{.MemUsage}}\t{{.NetIO}}\t{{.BlockIO}}" \ + > docker_stats.txt & +STATS_PID=$! + +# Run test +./run-all.sh -p heavy + +# Stop monitoring +kill $STATS_PID +``` + +### Automated Monitoring Scripts + +Create `utils/monitor-during-test.sh`: +```bash +#!/usr/bin/env bash +# Collect all metrics during a test run + +OUTPUT_DIR="$1" +INTERVAL="${2:-5}" + +mkdir -p "$OUTPUT_DIR" + +# CPU & Memory +vmstat $INTERVAL > "$OUTPUT_DIR/vmstat.log" & +PIDS+=($!) + +# Network +ss -s > "$OUTPUT_DIR/network_stats.log" & +PIDS+=($!) + +# Docker stats +docker stats --no-stream --format "{{.Container}},{{.CPUPerc}},{{.MemUsage}}" \ + > "$OUTPUT_DIR/docker_stats.csv" & +PIDS+=($!) + +# Wait for test completion signal +trap "kill ${PIDS[@]}; exit 0" SIGTERM SIGINT + +wait +``` + +--- + +## Profiling & Analysis + +### 5.1 Python Application Profiling + +#### cProfile Integration + +**Profile a specific endpoint:** +```python +# Add to main.py for temporary profiling +import cProfile +import pstats +from io import StringIO + +@app.middleware("http") +async def profile_middleware(request: Request, call_next): + if request.url.path == "/rpc" and ENABLE_PROFILING: + profiler = cProfile.Profile() + profiler.enable() + + response = await call_next(request) + + profiler.disable() + s = StringIO() + ps = pstats.Stats(profiler, stream=s).sort_stats('cumulative') + ps.print_stats() + + # Save to file + with open(f"profiles/profile_{time.time()}.txt", "w") as f: + f.write(s.getvalue()) + + return response + return await call_next(request) +``` + +**Run with profiling:** +```bash +# Enable profiling +export ENABLE_PROFILING=true + +# Run test +./scenarios/tools-benchmark.sh -p medium + +# Analyze profiles +python3 -m pstats profiles/profile_*.txt +# Commands: sort cumulative, stats 20 +``` + +#### py-spy for Live Profiling + +**Install:** +```bash +pip install py-spy +``` + +**Profile running process:** +```bash +# Find PID +PID=$(ps aux | grep "uvicorn mcpgateway.main:app" | grep -v grep | awk '{print $2}') + +# Record flame graph during test +py-spy record -o profile.svg --pid $PID --duration 60 & + +# Run load test +./scenarios/tools-benchmark.sh -p heavy + +# View profile.svg in browser +``` + +#### Memory Profiling + +**Using memory_profiler:** +```bash +pip install memory-profiler + +# Add @profile decorator to functions +# Run with: +python -m memory_profiler mcpgateway/services/gateway_service.py +``` + +**Using tracemalloc:** +```python +# Add to main.py +import tracemalloc + +@app.on_event("startup") +async def startup(): + tracemalloc.start() + +@app.get("/admin/memory-snapshot") +async def memory_snapshot(): + snapshot = tracemalloc.take_snapshot() + top_stats = snapshot.statistics('lineno') + + return { + "top_10": [ + { + "file": str(stat.traceback), + "size_mb": stat.size / 1024 / 1024, + "count": stat.count + } + for stat in top_stats[:10] + ] + } +``` + +### 5.2 Database Query Profiling + +#### PostgreSQL Query Analysis + +**Enable pg_stat_statements:** +```sql +-- In postgresql.conf +shared_preload_libraries = 'pg_stat_statements' +pg_stat_statements.track = all + +-- Restart and create extension +CREATE EXTENSION IF NOT EXISTS pg_stat_statements; +``` + +**Analyze slow queries during test:** +```sql +-- Reset stats before test +SELECT pg_stat_statements_reset(); + +-- Run performance test +-- ... + +-- View slowest queries +SELECT + substring(query, 1, 100) AS short_query, + calls, + total_time, + mean_time, + max_time, + stddev_time +FROM pg_stat_statements +WHERE query NOT LIKE '%pg_stat_statements%' +ORDER BY mean_time DESC +LIMIT 20; + +-- Identify queries with high variability +SELECT + substring(query, 1, 100) AS short_query, + calls, + mean_time, + stddev_time, + (stddev_time / mean_time) * 100 AS variability_percent +FROM pg_stat_statements +WHERE calls > 100 +ORDER BY variability_percent DESC +LIMIT 20; +``` + +**EXPLAIN ANALYZE:** +```sql +-- For problematic queries identified above +EXPLAIN (ANALYZE, BUFFERS, VERBOSE) +SELECT ...; +``` + +#### SQLite Query Analysis + +**Enable query logging:** +```python +# In config.py +import logging +logging.basicConfig() +logging.getLogger('sqlalchemy.engine').setLevel(logging.INFO) +``` + +**Analyze query plans:** +```bash +sqlite3 mcp.db "EXPLAIN QUERY PLAN SELECT * FROM tools WHERE server_id = 1;" +``` + +### 5.3 Network Profiling + +**Capture traffic during test:** +```bash +# Start capture +tcpdump -i any -w gateway_traffic.pcap port 4444 & +TCPDUMP_PID=$! + +# Run test +./scenarios/tools-benchmark.sh + +# Stop capture +kill $TCPDUMP_PID + +# Analyze with Wireshark or tshark +tshark -r gateway_traffic.pcap -q -z io,stat,1 +``` + +**Measure latency breakdown:** +```bash +# curl with timing +curl -w "\nDNS: %{time_namelookup}s\nConnect: %{time_connect}s\nTLS: %{time_appconnect}s\nStart Transfer: %{time_starttransfer}s\nTotal: %{time_total}s\n" \ + -H "Authorization: Bearer $TOKEN" \ + -X POST -d @payloads/tools/list_tools.json \ + http://localhost:4444/rpc +``` + +--- + +## Database Performance + +### 6.1 Connection Pool Optimization + +#### Current Settings Audit + +**PostgreSQL (SQLAlchemy):** +```python +# In config.py, document current settings: +SQLALCHEMY_DATABASE_URL = os.getenv("DATABASE_URL", "postgresql://...") +SQLALCHEMY_POOL_SIZE = int(os.getenv("DB_POOL_SIZE", "20")) +SQLALCHEMY_MAX_OVERFLOW = int(os.getenv("DB_POOL_MAX_OVERFLOW", "40")) +SQLALCHEMY_POOL_TIMEOUT = int(os.getenv("DB_POOL_TIMEOUT", "30")) +SQLALCHEMY_POOL_RECYCLE = int(os.getenv("DB_POOL_RECYCLE", "3600")) +``` + +#### Connection Pool Testing + +**Test 1: Pool Exhaustion** +```bash +# Test with varying pool sizes +for pool_size in 5 10 20 50 100; do + export DB_POOL_SIZE=$pool_size + export DB_POOL_MAX_OVERFLOW=$((pool_size * 2)) + + # Restart gateway + make restart + + # Run high concurrency test + hey -n 10000 -c 200 -m POST \ + -T "application/json" \ + -D payloads/tools/list_tools.json \ + http://localhost:4444/rpc \ + > results/pool_test_${pool_size}.txt +done + +# Analyze results +grep "Requests/sec" results/pool_test_*.txt +``` + +**Test 2: Connection Leak Detection** +```sql +-- Monitor connections during sustained test +-- Run this query every 10 seconds during a 1-hour test + +SELECT + datname, + count(*) as connections, + max(now() - state_change) as longest_idle +FROM pg_stat_activity +WHERE datname = 'mcpgateway' +GROUP BY datname; + +-- Should remain stable; growing count indicates leak +``` + +**Test 3: Pool Recycle Effectiveness** +```bash +# Test with different recycle times +for recycle in 300 1800 3600 7200; do + export DB_POOL_RECYCLE=$recycle + + # Run sustained test + hey -z 30m -c 50 -q 100 -m POST \ + -T "application/json" \ + -D payloads/tools/list_tools.json \ + http://localhost:4444/rpc + + # Monitor connection age in database +done +``` + +### 6.2 Query Performance Optimization + +#### Index Analysis + +**Identify missing indexes:** +```sql +-- PostgreSQL: Find sequential scans on large tables +SELECT + schemaname, + tablename, + seq_scan, + seq_tup_read, + idx_scan, + seq_tup_read / seq_scan as avg_seq_read +FROM pg_stat_user_tables +WHERE seq_scan > 0 +ORDER BY seq_tup_read DESC +LIMIT 20; + +-- Tables with high seq_scan need indexes +``` + +**Test index effectiveness:** +```sql +-- Before adding index +EXPLAIN ANALYZE SELECT * FROM tools WHERE server_id = 1; + +-- Add index +CREATE INDEX idx_tools_server_id ON tools(server_id); + +-- After adding index +EXPLAIN ANALYZE SELECT * FROM tools WHERE server_id = 1; + +-- Compare execution time +``` + +#### Query Optimization Tests + +**Common queries to optimize:** + +1. **Tool lookup by server:** +```sql +-- Baseline +EXPLAIN ANALYZE +SELECT * FROM tools WHERE server_id = 1; + +-- Add index if missing +CREATE INDEX IF NOT EXISTS idx_tools_server_id ON tools(server_id); + +-- Test improvement +``` + +2. **Virtual server composition:** +```sql +-- Baseline +EXPLAIN ANALYZE +SELECT t.* FROM tools t +JOIN virtual_server_tools vst ON t.id = vst.tool_id +WHERE vst.virtual_server_id = 1; + +-- Add composite index +CREATE INDEX IF NOT EXISTS idx_virtual_server_tools_lookup +ON virtual_server_tools(virtual_server_id, tool_id); +``` + +3. **Gateway peer lookup:** +```sql +-- Baseline +EXPLAIN ANALYZE +SELECT * FROM gateway_peers WHERE is_active = true; + +-- Add partial index +CREATE INDEX IF NOT EXISTS idx_active_gateway_peers +ON gateway_peers(is_active) WHERE is_active = true; +``` + +### 6.3 Database Load Testing + +**Write-heavy test:** +```python +# Test tool registration performance +import time +import statistics + +times = [] +for i in range(1000): + start = time.time() + # POST /tools with new tool + response = requests.post(...) + times.append(time.time() - start) + +print(f"Mean: {statistics.mean(times):.3f}s") +print(f"p95: {statistics.quantiles(times, n=20)[18]:.3f}s") +print(f"p99: {statistics.quantiles(times, n=100)[98]:.3f}s") +``` + +**Read-heavy test:** +```bash +# GET /tools with pagination +for page_size in 10 50 100 500; do + hey -n 5000 -c 50 \ + "http://localhost:4444/tools?skip=0&limit=$page_size" \ + > results/read_pagination_${page_size}.txt +done +``` + +**Mixed workload:** +```python +# Simulate realistic usage pattern +# 70% reads, 25% updates, 5% writes +``` + +### 6.4 Database Monitoring During Tests + +**Create monitoring script:** +```bash +#!/usr/bin/env bash +# utils/monitor-db.sh + +while true; do + psql -U postgres -d mcpgateway -c " + SELECT + now(), + (SELECT count(*) FROM pg_stat_activity WHERE datname='mcpgateway') as connections, + (SELECT count(*) FROM pg_stat_activity WHERE state='active') as active, + (SELECT count(*) FROM pg_stat_activity WHERE state='idle') as idle, + (SELECT pg_database_size('mcpgateway')/1024/1024) as size_mb + " >> db_stats.log + + sleep 5 +done +``` + +--- + +## Bottleneck Identification + +### 7.1 Systematic Bottleneck Detection + +**Process:** + +1. **Measure end-to-end latency** (client perspective) +2. **Break down by component:** + - Network latency + - Gateway processing + - Database queries + - MCP server calls + - Response serialization +3. **Identify slowest component** +4. **Profile that component** +5. **Optimize and re-test** + +**Instrumentation Example:** +```python +# Add timing to each layer +import time +from functools import wraps + +def timed(layer_name): + def decorator(func): + @wraps(func) + async def wrapper(*args, **kwargs): + start = time.time() + result = await func(*args, **kwargs) + duration = time.time() - start + + # Log to metrics + metrics.histogram(f"{layer_name}.duration", duration) + + return result + return wrapper + return decorator + +@timed("gateway.route") +async def route_request(...): + ... + +@timed("database.query") +async def get_tools(...): + ... + +@timed("mcp.invoke") +async def invoke_tool(...): + ... +``` + +### 7.2 Common Bottlenecks + +**Symptom:** High latency, low throughput, CPU below 50% +- **Likely cause:** Database connection pool exhaustion +- **Test:** Increase pool size +- **Monitor:** `pg_stat_activity` connection count + +**Symptom:** High CPU, good throughput, increasing latency +- **Likely cause:** Inefficient code path +- **Test:** Profile with py-spy +- **Monitor:** CPU per core + +**Symptom:** High memory usage, slow responses +- **Likely cause:** Memory leak or large result sets +- **Test:** Memory profiler, check query result sizes +- **Monitor:** Memory growth over time + +**Symptom:** Erratic latency, high variance +- **Likely cause:** Lock contention, cache misses +- **Test:** Check database locks, cache hit rate +- **Monitor:** `pg_locks`, cache metrics + +### 7.3 Bottleneck Test Matrix + +Create a test matrix to systematically identify bottlenecks: + +| Component | Metric | Test | Expected | Actual | Bottleneck? | +|-----------|--------|------|----------|--------|-------------| +| Network | Latency | ping | <1ms | 0.5ms | āŒ | +| Gateway Auth | Overhead | /health with auth | <5ms | 3ms | āŒ | +| Gateway Routing | Time | route decision | <2ms | 8ms | āš ļø | +| DB Connection | Wait time | pool.get() | <10ms | 45ms | āœ… | +| DB Query | Execution | SELECT tools | <5ms | 3ms | āŒ | +| MCP Server | Tool call | direct invoke | <20ms | 15ms | āŒ | +| Serialization | JSON encode | response.json() | <1ms | 0.5ms | āŒ | + +**Action:** Focus optimization on DB connection pooling (45ms wait time). + +--- + +## Continuous Performance Testing + +### 8.1 CI/CD Integration + +**GitHub Actions workflow:** +```yaml +name: Performance Benchmarks + +on: + push: + branches: [main] + pull_request: + branches: [main] + schedule: + - cron: '0 2 * * 0' # Weekly on Sunday at 2 AM + +jobs: + performance: + runs-on: ubuntu-latest + timeout-minutes: 60 + + steps: + - uses: actions/checkout@v3 + + - name: Install dependencies + run: | + go install github.com/rakyll/hey@latest + pip install -r requirements.txt + + - name: Start services + run: make compose-up + + - name: Wait for healthy services + run: ./tests/performance/utils/check-services.sh + + - name: Run performance tests + run: | + cd tests/performance + ./run-all.sh -p light + + - name: Collect metrics + if: always() + run: | + docker stats --no-stream > perf_docker_stats.txt + docker logs gateway > perf_gateway_logs.txt + + - name: Upload results + uses: actions/upload-artifact@v3 + if: always() + with: + name: performance-results + path: | + tests/performance/results/ + perf_*.txt + + - name: Compare with baseline + run: | + python tests/performance/utils/compare_baselines.py \ + --baseline baselines/main_baseline.json \ + --current tests/performance/results/summary_light_*.json \ + --threshold 10 # Fail if >10% regression +``` + +### 8.2 Performance Regression Detection + +**Store baselines:** +```bash +# After major release or optimization +./run-all.sh -p medium + +# Save as baseline +cp results/summary_medium_*.md baselines/v1.2.0_baseline.md +``` + +**Compare script (`utils/compare_baselines.py`):** +```python +#!/usr/bin/env python3 +import json +import sys + +def compare_results(baseline, current, threshold_percent): + """ + Compare current results against baseline. + Fail if any metric regresses by more than threshold_percent. + """ + regressions = [] + + for test_name, baseline_metrics in baseline.items(): + current_metrics = current.get(test_name, {}) + + for metric, baseline_value in baseline_metrics.items(): + current_value = current_metrics.get(metric) + + if current_value is None: + continue + + # Calculate regression percentage + if baseline_value > 0: + regression_pct = ((current_value - baseline_value) / baseline_value) * 100 + + if regression_pct > threshold_percent: + regressions.append({ + 'test': test_name, + 'metric': metric, + 'baseline': baseline_value, + 'current': current_value, + 'regression': regression_pct + }) + + return regressions + +if __name__ == "__main__": + # Usage: compare_baselines.py --baseline base.json --current curr.json --threshold 10 + # Returns exit code 1 if regressions found + ... +``` + +### 8.3 Performance Dashboard + +**Option 1: Static HTML Report** + +Generate after each test: +```bash +# utils/generate_report.sh +python3 utils/report_generator.py \ + --results results/ \ + --output reports/perf_report_$(date +%Y%m%d).html +``` + +**Option 2: Grafana + InfluxDB** + +Send metrics to time-series database: +```python +# In test runner +from influxdb_client import InfluxDBClient + +client = InfluxDBClient(url="http://localhost:8086", token="...", org="...") +write_api = client.write_api() + +# After test +point = Point("performance_test") \ + .tag("test_name", test_name) \ + .tag("profile", profile) \ + .field("requests_per_sec", rps) \ + .field("p95_latency_ms", p95) \ + .field("error_rate", error_rate) \ + .time(datetime.utcnow()) + +write_api.write(bucket="mcpgateway", record=point) +``` + +**Option 3: GitHub Pages** + +Publish results to GitHub Pages: +```yaml +- name: Deploy results to GitHub Pages + uses: peaceiris/actions-gh-pages@v3 + with: + github_token: ${{ secrets.GITHUB_TOKEN }} + publish_dir: ./tests/performance/reports +``` + +--- + +## Performance Baselines & SLOs + +### 9.1 Service Level Objectives (SLOs) + +Define performance targets based on user expectations: + +| Operation | Target p95 | Target p99 | Target RPS | Target Error Rate | +|-----------|-----------|-----------|-----------|-------------------| +| Health Check | <5ms | <10ms | 1000+ | 0% | +| Tool List | <30ms | <50ms | 500+ | <0.1% | +| Tool Invoke (simple) | <50ms | <100ms | 300+ | <0.1% | +| Tool Invoke (complex) | <100ms | <200ms | 200+ | <0.5% | +| Resource Read | <40ms | <80ms | 400+ | <0.1% | +| Prompt Get | <60ms | <120ms | 300+ | <0.1% | +| Virtual Server Create | <200ms | <500ms | 50+ | <1% | + +### 9.2 Baseline Establishment + +**Hardware Specification (Document):** +``` +CPU: [e.g., Intel Xeon E5-2670 v3 @ 2.30GHz, 8 cores] +RAM: [e.g., 16GB DDR4] +Disk: [e.g., NVMe SSD, 500GB] +Network: [e.g., 1Gbps] +OS: [e.g., Ubuntu 22.04] +``` + +**Baseline Test Results:** +```bash +# Run comprehensive baseline +./run-all.sh -p medium | tee baselines/baseline_$(uname -n)_$(date +%Y%m%d).txt + +# Save system info +{ + echo "=== System Info ===" + uname -a + lscpu | grep "Model name" + free -h + df -h +} > baselines/system_info_$(uname -n).txt +``` + +### 9.3 SLO Monitoring + +**Create SLO validation test:** +```python +# tests/performance/validate_slo.py +import json +import sys + +SLO_TARGETS = { + "tools/list": {"p95_ms": 30, "p99_ms": 50, "rps": 500}, + "tools/invoke_simple": {"p95_ms": 50, "p99_ms": 100, "rps": 300}, + # ... more +} + +def validate_slo(test_results): + violations = [] + + for test_name, targets in SLO_TARGETS.items(): + actual = test_results.get(test_name, {}) + + for metric, target_value in targets.items(): + actual_value = actual.get(metric) + + if actual_value is None: + continue + + if metric.endswith("_ms") and actual_value > target_value: + violations.append(f"{test_name}.{metric}: {actual_value}ms > {target_value}ms") + elif metric == "rps" and actual_value < target_value: + violations.append(f"{test_name}.{metric}: {actual_value} < {target_value}") + + return violations + +if __name__ == "__main__": + with open(sys.argv[1]) as f: + results = json.load(f) + + violations = validate_slo(results) + + if violations: + print("SLO VIOLATIONS:") + for v in violations: + print(f" - {v}") + sys.exit(1) + else: + print("āœ… All SLOs met") + sys.exit(0) +``` + +--- + +## Tooling & Infrastructure + +### 10.1 Required Tools + +**Load Generation:** +- āœ… `hey` - HTTP load testing (installed) +- `locust` - Advanced scenarios (optional) +- `k6` - Cloud load testing (optional) + +**Monitoring:** +- `htop` / `btop` - Interactive process viewer +- `iotop` - I/O monitoring +- `nethogs` - Network monitoring by process +- `docker stats` - Container resource usage + +**Profiling:** +- `py-spy` - Python profiling (no code changes) +- `cProfile` - Built-in Python profiler +- `memory_profiler` - Memory usage profiling +- `perf` - Linux performance analysis + +**Database:** +- `pg_stat_statements` - PostgreSQL query stats +- `pgBadger` - PostgreSQL log analyzer +- `sqlite3` - SQLite command-line + +**Network:** +- `tcpdump` - Packet capture +- `wireshark` / `tshark` - Packet analysis +- `curl` - HTTP testing with timing + +### 10.2 Test Environment Setup + +**Dedicated performance test environment:** +```bash +# docker-compose.perf.yml +version: '3.8' + +services: + gateway: + build: . + environment: + - DATABASE_URL=postgresql://perf_user:perf_pass@postgres:5432/mcpgateway_perf + - REDIS_URL=redis://redis:6379 + - LOG_LEVEL=WARNING # Reduce logging overhead + - MCPGATEWAY_ENABLE_PROMETHEUS=true + ports: + - "4444:4444" + - "9090:9090" # Prometheus metrics + + postgres: + image: postgres:15 + environment: + POSTGRES_DB: mcpgateway_perf + POSTGRES_USER: perf_user + POSTGRES_PASSWORD: perf_pass + ports: + - "5432:5432" + volumes: + - perf_pgdata:/var/lib/postgresql/data + command: + - "postgres" + - "-c" + - "shared_preload_libraries=pg_stat_statements" + - "-c" + - "pg_stat_statements.track=all" + + redis: + image: redis:7-alpine + ports: + - "6379:6379" + + prometheus: + image: prom/prometheus:latest + volumes: + - ./prometheus.yml:/etc/prometheus/prometheus.yml + ports: + - "9091:9090" + + grafana: + image: grafana/grafana:latest + ports: + - "3000:3000" + environment: + - GF_SECURITY_ADMIN_PASSWORD=admin + volumes: + - perf_grafana_data:/var/lib/grafana + +volumes: + perf_pgdata: + perf_grafana_data: +``` + +**Start performance environment:** +```bash +docker-compose -f docker-compose.perf.yml up -d +``` + +### 10.3 Automation Scripts + +Create comprehensive test automation: + +**`tests/performance/run-full-suite.sh`:** +```bash +#!/usr/bin/env bash +# Complete performance testing suite with monitoring + +set -Eeuo pipefail + +TIMESTAMP=$(date +%Y%m%d_%H%M%S) +RESULTS_DIR="results_${TIMESTAMP}" + +mkdir -p "$RESULTS_DIR"/{monitoring,profiles,reports} + +# Step 1: Baseline the MCP server directly +echo "=== Testing MCP Server (Standalone) ===" +./scenarios/test-mcp-server-direct.sh > "$RESULTS_DIR/01_mcp_baseline.txt" + +# Step 2: Test gateway core +echo "=== Testing Gateway Core ===" +./scenarios/test-gateway-core.sh > "$RESULTS_DIR/02_gateway_core.txt" + +# Step 3: Start monitoring +echo "=== Starting Monitoring ===" +./utils/monitor-during-test.sh "$RESULTS_DIR/monitoring" 5 & +MONITOR_PID=$! + +# Step 4: Profile during load +echo "=== Starting Profiler ===" +PID=$(ps aux | grep uvicorn | grep -v grep | awk '{print $2}') +py-spy record -o "$RESULTS_DIR/profiles/flame.svg" --pid $PID --duration 300 & +PROFILER_PID=$! + +# Step 5: Run full test suite +echo "=== Running Full Test Suite ===" +./run-all.sh -p heavy > "$RESULTS_DIR/03_full_suite.txt" + +# Step 6: Stop monitoring +kill $MONITOR_PID $PROFILER_PID + +# Step 7: Collect database stats +echo "=== Collecting Database Stats ===" +psql -U perf_user -d mcpgateway_perf -f utils/db_stats.sql > "$RESULTS_DIR/04_db_stats.txt" + +# Step 8: Generate report +echo "=== Generating Report ===" +python3 utils/generate_report.py \ + --input "$RESULTS_DIR" \ + --output "$RESULTS_DIR/reports/index.html" + +echo "āœ… Complete! Results in: $RESULTS_DIR" +``` + +--- + +## Reporting & Visualization + +### 11.1 Automated Report Generation + +The performance testing suite now includes a **fully automated HTML report generator** that creates comprehensive, visually rich reports with charts and recommendations. + +**Features:** +- āœ… Automatic parsing of `hey` output files +- āœ… SLO compliance evaluation with visual indicators +- āœ… Interactive charts using Chart.js +- āœ… Performance recommendations based on test results +- āœ… System metrics visualization +- āœ… Baseline comparison (when available) +- āœ… Mobile-responsive design + +**Report structure:** +``` +reports/ +ā”œā”€ā”€ performance_report_medium_20251009_143022.html # Complete HTML report +└── performance_report_heavy_20251009_150133.html # Multiple reports +``` + +**Using the Report Generator:** + +```bash +# Manual report generation +python3 tests/performance/utils/report_generator.py \ + --results-dir tests/performance/results_medium_20251009_143022 \ + --output reports/my_report.html \ + --config config.yaml \ + --profile medium + +# Automatic generation (integrated with run-configurable.sh) +./tests/performance/run-configurable.sh -p medium +# Report automatically generated and opened in browser +``` + +**Report Sections:** + +1. **Executive Summary** + - Overall status indicator + - SLO compliance percentage + - Average throughput + - Average latency (p95, p99) + - Regression detection alerts + +2. **SLO Compliance Table** + - Detailed comparison of actual vs. target metrics + - Pass/fail indicators + - Margin calculations + +3. **Test Results by Category** + - Tools, resources, prompts performance + - Interactive bar charts showing p50/p95/p99 + - Baseline comparison indicators + - Error rate tracking + +4. **System Metrics** (when monitoring enabled) + - CPU usage over time + - Memory usage over time + - Peak resource utilization + +5. **Database Performance** (when available) + - Connection pool statistics + - Query performance + - Slow query identification + +6. **Automated Recommendations** + - Priority-based (high/medium/low) + - Specific actions to improve performance + - Code snippets for investigation + +**Example Report Output:** +```html + + + + Performance Test Report - 2025-10-09 14:30:22 + + + + + + + + + + +``` + +The report is fully self-contained (single HTML file) and can be: +- Viewed locally in any browser +- Shared with team members via email +- Archived for historical comparison +- Published to GitHub Pages or internal dashboards + +### 11.2 Visualization with Grafana + +**Dashboard JSON:** +```json +{ + "dashboard": { + "title": "MCP Gateway Performance", + "panels": [ + { + "title": "Request Rate", + "targets": [ + { + "expr": "rate(http_requests_total[5m])" + } + ] + }, + { + "title": "Request Latency (p95)", + "targets": [ + { + "expr": "histogram_quantile(0.95, rate(http_request_duration_seconds_bucket[5m]))" + } + ] + }, + { + "title": "Database Connections", + "targets": [ + { + "expr": "database_connections_active" + } + ] + } + ] + } +} +``` + +### 11.3 Metrics Export + +**Export to CSV:** +```python +# utils/export_metrics.py +import csv + +def export_to_csv(results, output_file): + with open(output_file, 'w', newline='') as f: + writer = csv.writer(f) + writer.writerow(['Timestamp', 'Test', 'RPS', 'p50', 'p95', 'p99', 'Errors']) + + for test in results: + writer.writerow([ + test['timestamp'], + test['name'], + test['rps'], + test['p50'], + test['p95'], + test['p99'], + test['errors'] + ]) +``` + +**Export to JSON:** +```bash +# In test runner +cat > results/metrics_${TIMESTAMP}.json < B[Load Config] + B --> C[For Each Infrastructure Profile] + C --> D[Generate docker-compose] + D --> E[Stop Services] + E --> F[Start Services with New Config] + F --> G[Wait for Health] + G --> H[For Each Server Profile] + H --> I[Update Env Variables] + I --> J[Restart Gateway] + J --> K[Run Performance Tests] + K --> L[Collect Metrics] + L --> M{More Server Profiles?} + M -->|Yes| H + M -->|No| N{More Infrastructure?} + N -->|Yes| C + N -->|No| O[Generate Comparison Report] + O --> P[End] +``` + +## Automated Test Runner + +### Configuration-Driven Testing + +The suite now includes a **configurable test runner** that reads all settings from `config.yaml`: + +**Configuration File (`config.yaml`):** +```yaml +# Test profiles with different load levels +profiles: + smoke: # Quick validation + requests: 100 + concurrency: 5 + + medium: # Realistic load + requests: 10000 + concurrency: 50 + + heavy: # Stress testing + requests: 50000 + concurrency: 200 + +# Test scenarios (what to test) +scenarios: + tools_benchmark: + enabled: true + tests: + - name: "list_tools" + payload: "payloads/tools/list_tools.json" + endpoint: "/rpc" + +# SLO definitions +slos: + tools_list: + p95_ms: 30 + min_rps: 500 + max_error_rate: 0.001 + +# Monitoring settings +monitoring: + enabled: true + interval_seconds: 5 + +# Reporting settings +reporting: + enabled: true + include_charts: true +``` + +**Running Tests:** + +```bash +# Run with default configuration (medium profile) +./tests/performance/run-configurable.sh + +# Run specific profile +./tests/performance/run-configurable.sh -p heavy + +# Run with custom config +./tests/performance/run-configurable.sh -c my-config.yaml -p light + +# Run only specific scenario +./tests/performance/run-configurable.sh --scenario tools_benchmark + +# Quick run without extras +./tests/performance/run-configurable.sh -p smoke --skip-monitoring --skip-report + +# List available scenarios +./tests/performance/run-configurable.sh --list-scenarios +``` + +**What the Runner Does:** + +1. āœ… **Service Health Check** - Validates gateway and MCP servers are ready +2. āœ… **Authentication Setup** - Generates JWT tokens automatically +3. āœ… **System Monitoring** - Collects CPU, memory, Docker stats during tests +4. āœ… **Warmup Phase** - Sends warmup requests to prime caches +5. āœ… **Test Execution** - Runs all configured scenarios +6. āœ… **Metrics Collection** - Gathers Prometheus metrics and logs +7. āœ… **Report Generation** - Creates HTML report and opens in browser +8. āœ… **Cleanup** - Stops monitoring, saves all artifacts + +**Output Structure:** +``` +results_medium_20251009_143022/ +ā”œā”€ā”€ tools_benchmark_list_tools_medium_20251009_143022.txt +ā”œā”€ā”€ tools_benchmark_get_system_time_medium_20251009_143022.txt +ā”œā”€ā”€ resources_benchmark_list_resources_medium_20251009_143022.txt +ā”œā”€ā”€ system_metrics.csv +ā”œā”€ā”€ docker_stats.csv +ā”œā”€ā”€ prometheus_metrics.txt +└── gateway_logs.txt + +reports/ +└── performance_report_medium_20251009_143022.html +``` + +## Implementation Checklist + +- [x] **Phase 1: Setup** + - [x] Install all required tools (hey, py-spy, etc.) + - [x] Create configurable test runner with YAML config + - [x] Create HTML report generator with charts + - [ ] Create performance test environment (docker-compose.perf.yml) + - [ ] Document baseline system specs + +- [ ] **Phase 2: Individual Component Tests** + - [ ] Test fast-time-server standalone + - [ ] Test gateway core (no MCP servers) + - [ ] Test database in isolation + +- [ ] **Phase 3: Integration Tests** + - [ ] Test gateway + single MCP server + - [ ] Test gateway + multiple MCP servers + - [ ] Test gateway federation + +- [ ] **Phase 4: Monitoring & Profiling** + - [x] Implement monitoring scripts (in run-configurable.sh) + - [ ] Add profiling middleware + - [ ] Set up database query logging + +- [ ] **Phase 5: Optimization** + - [ ] Optimize connection pooling + - [ ] Add missing database indexes + - [ ] Optimize slow queries + +- [ ] **Phase 6: Automation** + - [x] Create configurable test automation + - [x] Generate automated HTML reports with charts + - [ ] Create CI/CD workflow + - [ ] Set up baseline comparison + - [x] Implement SLO validation in reports + +- [ ] **Phase 7: Continuous Improvement** + - [x] Establish SLOs in config.yaml + - [ ] Create performance dashboard (Grafana) + - [ ] Schedule weekly performance tests + +- [ ] **Phase 8: Server Profile & Infrastructure Testing** (NEW) + - [ ] Implement server profile switching (Gunicorn workers, threads, pool sizes) + - [ ] Implement infrastructure profile switching (Docker Compose generation) + - [ ] Add PostgreSQL version comparison (15 vs 16 vs 17) + - [ ] Add horizontal scaling tests (1, 2, 4, 8 instances) + - [ ] Create configuration matrix testing + - [ ] Build infrastructure comparison report generator + - [ ] Add cost-benefit analysis to reports + - [ ] Implement automated Docker Compose templating + - [ ] Create database tuning profile tests + - [ ] Add dynamic configuration testing (runtime changes) + +--- + +## References & Resources + +### Documentation +- [hey Documentation](https://github.com/rakyll/hey) +- [py-spy Documentation](https://github.com/benfred/py-spy) +- [PostgreSQL Performance Tips](https://wiki.postgresql.org/wiki/Performance_Optimization) +- [SQLAlchemy Performance](https://docs.sqlalchemy.org/en/20/faq/performance.html) + +### Tools +- [Grafana](https://grafana.com/) +- [Prometheus](https://prometheus.io/) +- [Locust](https://locust.io/) +- [k6](https://k6.io/) + +### Best Practices +- [Google SRE Book - Performance](https://sre.google/sre-book/table-of-contents/) +- [Database Performance for Developers](https://use-the-index-luke.com/) + +--- + +**Next Steps:** +1. Review and approve this strategy +2. Prioritize implementation phases +3. Allocate resources for performance testing infrastructure +4. Begin Phase 1 implementation + +**Document Owner:** Performance Engineering Team +**Review Cycle:** Quarterly +**Last Review:** 2025-10-09 diff --git a/tests/performance/README.md b/tests/performance/README.md new file mode 100644 index 000000000..74abcdfdb --- /dev/null +++ b/tests/performance/README.md @@ -0,0 +1,309 @@ +# MCP Gateway Performance Testing + +Comprehensive performance testing suite for MCP Gateway with load testing, server profiling, infrastructure testing, and baseline comparison. + +## Quick Start + +```bash +make install # Install dependencies (hey) +make test # Run standard performance test +make test-gateway-core # Test gateway internals +make test-database # Test database connection pool +``` + +Results go to `results/{profile}_{timestamp}/`, reports to `reports/`. + +## Common Commands + +### Basic Testing +```bash +make test # Standard test (10K requests, 50 concurrent) +make quick # Quick smoke test (100 requests) +make heavy # Heavy load (50K requests, 200 concurrent) +``` + +### New Comprehensive Tests +```bash +make test-gateway-core # 11 gateway core tests (health, admin API, etc.) +make test-database # 4 database connection pool tests +make test-all-scenarios # Run all test scenarios +``` + +### Server Profiles +```bash +make test-optimized # 8 workers, 2 threads - high throughput +make test-memory # 4 workers, 8 threads - many connections +make test-io # 6 workers, 50 DB pool - I/O heavy +``` + +### Infrastructure +```bash +make test-production # 4 instances with nginx load balancer +make test-scaling # Test with 4 instances +make compare-postgres # Compare PostgreSQL 15 vs 17 +``` + +### Baseline Management +```bash +make baseline # Save current as baseline +make compare # Compare with baseline +make list-baselines # List all baselines + +# Save specific results +make save-baseline BASELINE=my-test RESULTS=results/medium_20241010_123456 +``` + +### Cleanup +```bash +make clean # Clean result files +make clean-results # Remove all result directories +make clean-all # Deep clean (results + baselines + reports) +``` + +## Available Profiles + +### Load Profiles +| Profile | Requests | Concurrency | Use Case | +|---------|----------|-------------|----------| +| smoke | 100 | 5 | Quick validation | +| light | 1,000 | 10 | Fast testing | +| medium | 10,000 | 50 | Realistic load | +| heavy | 50,000 | 200 | Stress testing | + +### Server Profiles +| Profile | Workers | Threads | DB Pool | Best For | +|---------|---------|---------|---------|----------| +| minimal | 1 | 2 | 5 | Dev/testing | +| standard | 4 | 4 | 20 | Balanced (default) | +| optimized | 8 | 2 | 30 | CPU-bound, high RPS | +| memory_optimized | 4 | 8 | 40 | Many connections | +| io_optimized | 6 | 4 | 50 | Database-heavy | + +### Infrastructure Profiles +| Profile | Instances | PostgreSQL | nginx | Use Case | +|---------|-----------|------------|-------|----------| +| development | 1 | 17-alpine | No | Local dev | +| staging | 2 | 17-alpine | Yes | Pre-prod | +| production | 4 | 17-alpine | Yes | Production | +| production_ha | 6 | 17-alpine | Yes | High availability | + +## Examples + +### Find Optimal Configuration +```bash +# Test all server profiles +make test-minimal +make test-standard +make test-optimized + +# Compare results, choose best cost/performance +``` + +### Plan Database Upgrade +```bash +# Compare PostgreSQL versions +make compare-postgres + +# Or manually: +./run-advanced.sh -p medium --postgres-version 15-alpine --save-baseline pg15.json +./run-advanced.sh -p medium --postgres-version 17-alpine --compare-with pg15.json +``` + +### Capacity Planning +```bash +# Test different instance counts +./run-advanced.sh -p heavy --instances 1 --save-baseline 1x.json +./run-advanced.sh -p heavy --instances 4 --save-baseline 4x.json +./run-advanced.sh -p heavy --instances 8 --save-baseline 8x.json + +# Compare to find optimal scaling point +``` + +### Regression Testing +```bash +# Before code changes +make baseline-production + +# After changes +make compare + +# Automatically fails if regressions detected +``` + +## Directory Structure + +``` +tests/performance/ +ā”œā”€ā”€ Makefile # šŸ‘ˆ Main entrypoint (start here) +ā”œā”€ā”€ README.md # šŸ‘ˆ This file +ā”œā”€ā”€ PERFORMANCE_STRATEGY.md # Complete testing strategy +ā”œā”€ā”€ config.yaml # Configuration +│ +ā”œā”€ā”€ run-advanced.sh # Advanced runner with all features +ā”œā”€ā”€ run-configurable.sh # Config-driven test execution +│ +ā”œā”€ā”€ utils/ +│ ā”œā”€ā”€ generate_docker_compose.py # Generate docker-compose + nginx +│ ā”œā”€ā”€ compare_results.py # Compare baselines +│ ā”œā”€ā”€ baseline_manager.py # Manage baselines +│ ā”œā”€ā”€ report_generator.py # HTML reports +│ ā”œā”€ā”€ check-services.sh # Health checks +│ └── setup-auth.sh # JWT authentication +│ +ā”œā”€ā”€ scenarios/ +│ ā”œā”€ā”€ tools-benchmark.sh # MCP tools tests +│ ā”œā”€ā”€ resources-benchmark.sh # MCP resources tests +│ ā”œā”€ā”€ prompts-benchmark.sh # MCP prompts tests +│ ā”œā”€ā”€ gateway-core-benchmark.sh # 11 gateway core tests (NEW) +│ └── database-benchmark.sh # 4 DB connection tests (NEW) +│ +ā”œā”€ā”€ results/ # Test results (gitignored) +│ └── {profile}_{timestamp}/ +ā”œā”€ā”€ baselines/ # Saved baselines (gitignored) +└── reports/ # HTML reports (gitignored) +``` + +## Advanced Usage + +### Custom Results Location +```bash +# Override default results directory +RESULTS_BASE=/mnt/storage/perf make test +``` + +### Direct Runner +```bash +# Full control with run-advanced.sh +./run-advanced.sh -p medium \ + --server-profile optimized \ + --infrastructure production \ + --postgres-version 17-alpine \ + --instances 4 \ + --save-baseline prod_baseline.json +``` + +### Generate Docker Compose +```bash +# Generate custom docker-compose with nginx load balancer +./utils/generate_docker_compose.py \ + --infrastructure production \ + --server-profile optimized \ + --instances 4 \ + --output docker-compose.prod.yml + +# Creates: +# - docker-compose.prod.yml (4 gateway instances + nginx) +# - nginx.conf (round-robin load balancer) +``` + +## Output + +### Test Results +``` +results/medium_standard_20241010_123456/ +ā”œā”€ā”€ tools_benchmark_list_tools_medium_*.txt # hey output +ā”œā”€ā”€ gateway_admin_list_tools_medium_*.txt # Gateway tests +ā”œā”€ā”€ db_pool_stress_100_medium_*.txt # DB tests +ā”œā”€ā”€ system_metrics.csv # CPU, memory +ā”œā”€ā”€ docker_stats.csv # Container stats +ā”œā”€ā”€ prometheus_metrics.txt # Metrics snapshot +└── gateway_logs.txt # Application logs +``` + +### Baselines +```json +{ + "version": "1.0", + "created": "2025-10-10T00:11:09.675032", + "metadata": { + "profile": "medium", + "server_profile": "optimized" + }, + "results": { + "tools_list_tools": { + "rps": 822.45, + "avg": 12.1, + "p95": 18.9, + "p99": 24.5, + "error_rate": 0.0 + } + } +} +``` + +## Configuration + +Edit `config.yaml` to customize: +- Load profiles (requests, concurrency, timeouts) +- Server profiles (workers, threads, DB pool sizes) +- Infrastructure profiles (instances, PostgreSQL settings) +- SLO thresholds +- Monitoring options + +## Troubleshooting + +### Services Not Starting +```bash +make check # Check health +docker-compose logs gateway # View logs +``` + +### Authentication Failed +```bash +./utils/setup-auth.sh # Regenerate token +source .auth_token # Load token +``` + +### Tests Timeout +```bash +# Tests now have proper timeouts: +# - make test: 600s (10 minutes) +# - make heavy: 1200s (20 minutes) +``` + +### Cleanup +```bash +make clean-results # Remove old test runs +make clean-all # Deep clean everything +``` + +## What's New (v2.1) + +āœ… **Timeout Handling** - Tests won't be killed prematurely +āœ… **Graceful Shutdown** - Saves partial results on interrupt +āœ… **Gateway Core Tests** - 11 new tests for gateway internals +āœ… **Database Tests** - 4 new tests for connection pool behavior +āœ… **Results Organization** - All results in `results/` subdirectory +āœ… **nginx Load Balancer** - Auto-generated for multi-instance tests +āœ… **Better Cleanup** - New make targets for cleanup + +## Quick Reference + +```bash +# List everything +make help # Show all commands +make list-profiles # Show load/server/infra profiles +make list-baselines # Show saved baselines + +# Testing +make test # Standard test +make test-gateway-core # Gateway tests (NEW) +make test-database # DB tests (NEW) + +# Comparison +make baseline # Save baseline +make compare # Compare with baseline + +# Cleanup +make clean # Clean files +make clean-results # Clean directories +``` + +## Documentation + +- **This file** - Quick start and common commands +- **[PERFORMANCE_STRATEGY.md](PERFORMANCE_STRATEGY.md)** - Complete testing strategy, server profile guide, automation guide + +--- + +**Ready?** Run `make test` or `make help` diff --git a/tests/performance/baselines/.gitkeep b/tests/performance/baselines/.gitkeep new file mode 100644 index 000000000..e69de29bb diff --git a/tests/performance/config.yaml b/tests/performance/config.yaml new file mode 100644 index 000000000..048948d09 --- /dev/null +++ b/tests/performance/config.yaml @@ -0,0 +1,386 @@ +environment: + gateway_url: http://localhost:4444 + fast_time_url: http://localhost:8888 + jwt_secret: my-test-key + username: admin@example.com + jwt_expiration_minutes: 10080 + results_base_dir: results +profiles: + smoke: + requests: 100 + concurrency: 5 + duration: 10s + timeout: 30 + description: Quick smoke test for basic functionality + light: + requests: 1000 + concurrency: 10 + duration: 10s + timeout: 30 + description: Light load for quick testing + medium: + requests: 10000 + concurrency: 50 + duration: 30s + timeout: 60 + description: Realistic load simulation + heavy: + requests: 50000 + concurrency: 200 + duration: 60s + timeout: 60 + description: Stress testing and capacity planning + sustained: + requests: 0 + concurrency: 50 + duration: 3600s + timeout: 60 + description: Long-running stability test +scenarios: + tools_benchmark: + enabled: true + description: MCP tool invocation performance + tests: + - name: list_tools + payload: payloads/tools/list_tools.json + endpoint: /rpc + - name: get_system_time + payload: payloads/tools/get_system_time.json + endpoint: /rpc + - name: convert_time + payload: payloads/tools/convert_time.json + endpoint: /rpc + resources_benchmark: + enabled: true + description: MCP resource access performance + tests: + - name: list_resources + payload: payloads/resources/list_resources.json + endpoint: /rpc + - name: read_timezone_info + payload: payloads/resources/read_timezone_info.json + endpoint: /rpc + - name: read_world_times + payload: payloads/resources/read_world_times.json + endpoint: /rpc + prompts_benchmark: + enabled: true + description: MCP prompt execution performance + tests: + - name: list_prompts + payload: payloads/prompts/list_prompts.json + endpoint: /rpc + - name: get_compare_timezones + payload: payloads/prompts/get_compare_timezones.json + endpoint: /rpc + gateway_core: + enabled: true + description: Gateway core functionality (no MCP servers) + tests: + - name: health_check + payload: null + endpoint: /health + method: GET + - name: health_check_authenticated + payload: null + endpoint: /health + method: GET + require_auth: true + mcp_server_direct: + enabled: true + description: Direct MCP server testing (bypass gateway) + base_url: http://localhost:8888 + tests: + - name: direct_list_tools + payload: payloads/tools/list_tools.json + endpoint: /sse + - name: direct_get_system_time + payload: payloads/tools/get_system_time.json + endpoint: /sse +slos: + health_check: + p50_ms: 5 + p95_ms: 10 + p99_ms: 15 + min_rps: 1000 + max_error_rate: 0.0 + tools_list: + p50_ms: 15 + p95_ms: 30 + p99_ms: 50 + min_rps: 500 + max_error_rate: 0.001 + tools_invoke_simple: + p50_ms: 25 + p95_ms: 50 + p99_ms: 100 + min_rps: 300 + max_error_rate: 0.001 + tools_invoke_complex: + p50_ms: 40 + p95_ms: 100 + p99_ms: 200 + min_rps: 200 + max_error_rate: 0.005 + resources_list: + p50_ms: 15 + p95_ms: 30 + p99_ms: 50 + min_rps: 500 + max_error_rate: 0.001 + resources_read: + p50_ms: 20 + p95_ms: 40 + p99_ms: 80 + min_rps: 400 + max_error_rate: 0.001 + prompts_list: + p50_ms: 20 + p95_ms: 40 + p99_ms: 80 + min_rps: 400 + max_error_rate: 0.001 + prompts_get: + p50_ms: 30 + p95_ms: 60 + p99_ms: 120 + min_rps: 300 + max_error_rate: 0.001 +monitoring: + enabled: true + interval_seconds: 5 + collect: + - system_metrics + - docker_stats + - database_stats + - application_metrics + system_metrics: + - cpu_percent + - memory_percent + - disk_io + - network_io + database_metrics: + - connection_count + - active_connections + - idle_connections + - query_count + - slow_query_count +profiling: + enabled: false + duration_seconds: 300 + tools: + - py-spy + - memory_profiler + output_formats: + - flamegraph + - speedscope +reporting: + enabled: true + format: html + output_dir: reports + html: + template: templates/report_template.html + include_charts: true + chart_library: chart.js + sections: + - summary + - slo_compliance + - test_results + - system_metrics + - database_performance + - profiling_results + - recommendations + baseline_comparison: + enabled: true + baseline_file: baselines/production_baseline.json + regression_threshold_percent: 10 +ci: + enabled: false + fail_on_slo_violation: true + fail_on_regression: true + upload_artifacts: true + notifications: + slack: + enabled: false + webhook_url: ${SLACK_WEBHOOK_URL} + email: + enabled: false + smtp_server: smtp.example.com + recipients: + - team@example.com +server_profiles: + minimal: + description: Minimal resources for small deployments + gunicorn_workers: 1 + gunicorn_threads: 2 + gunicorn_timeout: 120 + db_pool_size: 5 + db_pool_max_overflow: 10 + db_pool_timeout: 30 + redis_pool_size: 5 + standard: + description: Standard production configuration + gunicorn_workers: 4 + gunicorn_threads: 4 + gunicorn_timeout: 120 + db_pool_size: 20 + db_pool_max_overflow: 40 + db_pool_timeout: 30 + redis_pool_size: 10 + optimized: + description: CPU-optimized for high throughput + gunicorn_workers: 8 + gunicorn_threads: 2 + gunicorn_timeout: 120 + db_pool_size: 30 + db_pool_max_overflow: 60 + db_pool_timeout: 30 + redis_pool_size: 20 + memory_optimized: + description: Memory-optimized for concurrent connections + gunicorn_workers: 4 + gunicorn_threads: 8 + gunicorn_timeout: 120 + db_pool_size: 40 + db_pool_max_overflow: 80 + db_pool_timeout: 30 + redis_pool_size: 25 + io_optimized: + description: I/O optimized for database-heavy workloads + gunicorn_workers: 6 + gunicorn_threads: 4 + gunicorn_timeout: 180 + db_pool_size: 50 + db_pool_max_overflow: 100 + db_pool_timeout: 60 + redis_pool_size: 30 +infrastructure_profiles: + development: + description: Development environment - minimal resources + gateway_instances: 1 + postgres_version: 17-alpine + postgres_shared_buffers: 128MB + postgres_effective_cache_size: 512MB + postgres_max_connections: 50 + redis_enabled: false + staging: + description: Staging environment - moderate resources + gateway_instances: 2 + postgres_version: 17-alpine + postgres_shared_buffers: 512MB + postgres_effective_cache_size: 2GB + postgres_max_connections: 100 + postgres_work_mem: 8MB + redis_enabled: true + redis_maxmemory: 256mb + production: + description: Production environment - optimized resources + gateway_instances: 4 + postgres_version: 17-alpine + postgres_shared_buffers: 2GB + postgres_effective_cache_size: 6GB + postgres_max_connections: 200 + postgres_work_mem: 16MB + postgres_maintenance_work_mem: 512MB + postgres_random_page_cost: 1.1 + postgres_effective_io_concurrency: 200 + redis_enabled: true + redis_maxmemory: 1gb + redis_maxmemory_policy: allkeys-lru + production_ha: + description: Production HA - high availability configuration + gateway_instances: 6 + postgres_version: 17-alpine + postgres_shared_buffers: 4GB + postgres_effective_cache_size: 12GB + postgres_max_connections: 300 + postgres_work_mem: 32MB + postgres_maintenance_work_mem: 1GB + postgres_random_page_cost: 1.1 + postgres_effective_io_concurrency: 200 + redis_enabled: true + redis_maxmemory: 2gb + redis_maxmemory_policy: allkeys-lru +database_comparison: + enabled: false + versions: + - version: 15-alpine + label: PostgreSQL 15 + - version: 16-alpine + label: PostgreSQL 16 + - version: 17-alpine + label: PostgreSQL 17 + common_config: + shared_buffers: 512MB + effective_cache_size: 2GB + max_connections: 100 +scaling_tests: + enabled: false + configurations: + - instances: 1 + description: Single instance baseline + - instances: 2 + description: Dual instance + - instances: 4 + description: Quad instance + - instances: 8 + description: Eight instance scale-out + load_balancer: + algorithm: round_robin + health_check_interval: 10 +configuration_matrix: + enabled: false + strategy: one_factor_at_a_time + variables: + gunicorn_workers: + values: + - 2 + - 4 + - 6 + - 8 + default: 4 + gunicorn_threads: + values: + - 2 + - 4 + - 8 + default: 4 + db_pool_size: + values: + - 10 + - 20 + - 30 + - 40 + default: 20 + postgres_version: + values: + - 15-alpine + - 16-alpine + - 17-alpine + default: 17-alpine + sample_size: 20 +comparison: + save_baseline: false + baseline_file: baselines/current_baseline.json + compare_enabled: false + compare_baseline: baselines/production_baseline.json + regression_threshold: + throughput_decrease_percent: 10 + latency_increase_percent: 15 + error_rate_increase_percent: 5 +advanced: + warmup: + enabled: true + requests: 100 + cooldown_seconds: 10 + infrastructure_change_delay_seconds: 30 + retry: + enabled: true + max_attempts: 3 + save_raw_results: true + capture_logs: true + log_level: INFO + docker_compose: + file: docker-compose.yml + backup_original: true + restore_after_test: false diff --git a/tests/performance/payloads/prompts/get_compare_timezones.json b/tests/performance/payloads/prompts/get_compare_timezones.json new file mode 100644 index 000000000..0aac05219 --- /dev/null +++ b/tests/performance/payloads/prompts/get_compare_timezones.json @@ -0,0 +1,11 @@ +{ + "jsonrpc": "2.0", + "id": 1, + "method": "prompts/get", + "params": { + "name": "compare_timezones", + "arguments": { + "timezones": "America/New_York,Europe/London,Asia/Tokyo" + } + } +} diff --git a/tests/performance/payloads/prompts/get_customer_greeting.json b/tests/performance/payloads/prompts/get_customer_greeting.json new file mode 100644 index 000000000..38d869391 --- /dev/null +++ b/tests/performance/payloads/prompts/get_customer_greeting.json @@ -0,0 +1,13 @@ +{ + "jsonrpc": "2.0", + "id": 1, + "method": "prompts/get", + "params": { + "name": "customer_greeting", + "arguments": { + "name": "John Smith", + "company": "Acme Corporation", + "topic": "cloud migration" + } + } +} diff --git a/tests/performance/payloads/prompts/list_prompts.json b/tests/performance/payloads/prompts/list_prompts.json new file mode 100644 index 000000000..2417f2b93 --- /dev/null +++ b/tests/performance/payloads/prompts/list_prompts.json @@ -0,0 +1,6 @@ +{ + "jsonrpc": "2.0", + "id": 1, + "method": "prompts/list", + "params": {} +} diff --git a/tests/performance/payloads/resources/list_resources.json b/tests/performance/payloads/resources/list_resources.json new file mode 100644 index 000000000..ced96b057 --- /dev/null +++ b/tests/performance/payloads/resources/list_resources.json @@ -0,0 +1,6 @@ +{ + "jsonrpc": "2.0", + "id": 1, + "method": "resources/list", + "params": {} +} diff --git a/tests/performance/payloads/resources/read_timezone_info.json b/tests/performance/payloads/resources/read_timezone_info.json new file mode 100644 index 000000000..63952fd51 --- /dev/null +++ b/tests/performance/payloads/resources/read_timezone_info.json @@ -0,0 +1,8 @@ +{ + "jsonrpc": "2.0", + "id": 1, + "method": "resources/read", + "params": { + "uri": "sample://welcome-message" + } +} diff --git a/tests/performance/payloads/resources/read_world_times.json b/tests/performance/payloads/resources/read_world_times.json new file mode 100644 index 000000000..ddd465f0c --- /dev/null +++ b/tests/performance/payloads/resources/read_world_times.json @@ -0,0 +1,8 @@ +{ + "jsonrpc": "2.0", + "id": 1, + "method": "resources/read", + "params": { + "uri": "sample://api-documentation" + } +} diff --git a/tests/performance/payloads/tools/convert_time.json b/tests/performance/payloads/tools/convert_time.json new file mode 100644 index 000000000..16a861e79 --- /dev/null +++ b/tests/performance/payloads/tools/convert_time.json @@ -0,0 +1,13 @@ +{ + "jsonrpc": "2.0", + "id": 1, + "method": "tools/call", + "params": { + "name": "fast-time-convert-time", + "arguments": { + "time": "09:00", + "source_timezone": "Europe/London", + "target_timezone": "Asia/Tokyo" + } + } +} diff --git a/tests/performance/payloads/tools/get_system_time.json b/tests/performance/payloads/tools/get_system_time.json new file mode 100644 index 000000000..1193e32d8 --- /dev/null +++ b/tests/performance/payloads/tools/get_system_time.json @@ -0,0 +1,11 @@ +{ + "jsonrpc": "2.0", + "id": 1, + "method": "tools/call", + "params": { + "name": "fast-time-get-system-time", + "arguments": { + "timezone": "America/New_York" + } + } +} diff --git a/tests/performance/payloads/tools/list_tools.json b/tests/performance/payloads/tools/list_tools.json new file mode 100644 index 000000000..f621da561 --- /dev/null +++ b/tests/performance/payloads/tools/list_tools.json @@ -0,0 +1,6 @@ +{ + "jsonrpc": "2.0", + "id": 1, + "method": "tools/list", + "params": {} +} diff --git a/tests/performance/profiles/heavy.env b/tests/performance/profiles/heavy.env new file mode 100644 index 000000000..4fbbdefa4 --- /dev/null +++ b/tests/performance/profiles/heavy.env @@ -0,0 +1,5 @@ +# Heavy load profile - for stress testing +REQUESTS=10000 +CONCURRENCY=200 +DURATION=60s +TIMEOUT=60 diff --git a/tests/performance/profiles/light.env b/tests/performance/profiles/light.env new file mode 100644 index 000000000..902890d15 --- /dev/null +++ b/tests/performance/profiles/light.env @@ -0,0 +1,5 @@ +# Light load profile - for quick smoke tests +REQUESTS=1000 +CONCURRENCY=10 +DURATION=10s +TIMEOUT=30 diff --git a/tests/performance/profiles/medium.env b/tests/performance/profiles/medium.env new file mode 100644 index 000000000..54c1427ac --- /dev/null +++ b/tests/performance/profiles/medium.env @@ -0,0 +1,5 @@ +# Medium load profile - for realistic testing +REQUESTS=1000 +CONCURRENCY=50 +DURATION=30s +TIMEOUT=60 diff --git a/tests/performance/run-advanced.sh b/tests/performance/run-advanced.sh new file mode 100755 index 000000000..3f46f0736 --- /dev/null +++ b/tests/performance/run-advanced.sh @@ -0,0 +1,381 @@ +#!/usr/bin/env bash +# ============================================================================== +# Advanced Performance Test Runner with Server Profile Support +# Supports infrastructure switching, database version comparison, and more +# ============================================================================== + +set -Eeuo pipefail + +# Colors +RED='\033[0;31m' +GREEN='\033[0;32m' +YELLOW='\033[1;33m' +BLUE='\033[0;34m' +MAGENTA='\033[0;35m' +CYAN='\033[0;36m' +NC='\033[0m' + +log() { echo -e "${GREEN}[$(date +'%Y-%m-%d %H:%M:%S')]${NC} $*"; } +info() { echo -e "${BLUE}[INFO]${NC} $*"; } +error() { echo -e "${RED}[ERROR]${NC} $*" >&2; } +warn() { echo -e "${YELLOW}[WARN]${NC} $*"; } +success() { echo -e "${GREEN}[SUCCESS]${NC} $*"; } +header() { + echo "" + echo -e "${MAGENTA}╔════════════════════════════════════════════════════════════════╗${NC}" + echo -e "${MAGENTA}ā•‘${NC} $1" + echo -e "${MAGENTA}ā•šā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•${NC}" + echo "" +} + +# Graceful shutdown handler +cleanup_on_interrupt() { + warn "Received interrupt signal, cleaning up..." + + # Kill any child processes + jobs -p | xargs -r kill 2>/dev/null || true + + # Exit with proper code for SIGINT (130) + exit 130 +} + +# Set up signal handling - MUST be before any long-running operations +trap 'cleanup_on_interrupt' SIGTERM SIGINT + +# Script directory +SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" &>/dev/null && pwd)" +PROJECT_ROOT="$(cd "$SCRIPT_DIR/../.." &>/dev/null && pwd)" + +# Configuration +CONFIG_FILE="${CONFIG_FILE:-$SCRIPT_DIR/config.yaml}" +PROFILE="${PROFILE:-medium}" +SERVER_PROFILE="${SERVER_PROFILE:-standard}" +INFRASTRUCTURE="${INFRASTRUCTURE:-}" +POSTGRES_VERSION="${POSTGRES_VERSION:-}" +INSTANCES="${INSTANCES:-}" +SAVE_BASELINE="${SAVE_BASELINE:-}" +COMPARE_WITH="${COMPARE_WITH:-}" +SKIP_SETUP="${SKIP_SETUP:-false}" +SKIP_MONITORING="${SKIP_MONITORING:-false}" +SKIP_REPORT="${SKIP_REPORT:-false}" +RESTORE_COMPOSE="${RESTORE_COMPOSE:-true}" + +usage() { + cat < Load profile (smoke, light, medium, heavy) + +Server Configuration: + --server-profile Server profile (minimal, standard, optimized, etc.) + --infrastructure Infrastructure profile (development, staging, production) + --postgres-version PostgreSQL version (e.g., 17-alpine) + --instances Number of gateway instances + +Baseline & Comparison: + --save-baseline Save results as baseline + --compare-with Compare results with baseline + +Test Control: + --skip-setup Skip service checks and auth + --skip-monitoring Skip system monitoring + --skip-report Skip HTML report generation + --no-restore Don't restore original docker-compose + +List Options: + --list-profiles List available profiles + --list-server-profiles List server profiles + --list-infrastructure List infrastructure profiles + +Examples: + # Test with optimized server profile + $0 -p medium --server-profile optimized + + # Test production infrastructure + $0 -p heavy --infrastructure production + + # Compare PostgreSQL versions + $0 -p medium --postgres-version 15-alpine --save-baseline pg15.json + $0 -p medium --postgres-version 17-alpine --compare-with pg15.json + + # Test with 4 gateway instances + $0 -p heavy --instances 4 + +EOF + exit 1 +} + +# Parse arguments +while (( "$#" )); do + case "$1" in + -p|--profile) PROFILE="$2"; shift 2 ;; + --server-profile) SERVER_PROFILE="$2"; shift 2 ;; + --infrastructure) INFRASTRUCTURE="$2"; shift 2 ;; + --postgres-version) POSTGRES_VERSION="$2"; shift 2 ;; + --instances) INSTANCES="$2"; shift 2 ;; + --save-baseline) SAVE_BASELINE="$2"; shift 2 ;; + --compare-with) COMPARE_WITH="$2"; shift 2 ;; + --skip-setup) SKIP_SETUP=true; shift ;; + --skip-monitoring) SKIP_MONITORING=true; shift ;; + --skip-report) SKIP_REPORT=true; shift ;; + --no-restore) RESTORE_COMPOSE=false; shift ;; + --list-profiles) + python3 "$SCRIPT_DIR/utils/generate_docker_compose.py" --config "$CONFIG_FILE" --list-profiles + exit 0 + ;; + --list-server-profiles) + python3 -c "import yaml; c=yaml.safe_load(open('$CONFIG_FILE')); [print(f'{k}: {v.get(\"description\",\"\")}') for k,v in c.get('server_profiles',{}).items()]" + exit 0 + ;; + --list-infrastructure) + python3 -c "import yaml; c=yaml.safe_load(open('$CONFIG_FILE')); [print(f'{k}: {v.get(\"description\",\"\")}') for k,v in c.get('infrastructure_profiles',{}).items()]" + exit 0 + ;; + -h|--help) usage ;; + *) error "Unknown option: $1"; usage ;; + esac +done + +# Banner +header "šŸš€ Advanced Performance Test Runner" +log "Profile: $PROFILE" +log "Server Profile: $SERVER_PROFILE" +[ -n "$INFRASTRUCTURE" ] && log "Infrastructure: $INFRASTRUCTURE" +[ -n "$POSTGRES_VERSION" ] && log "PostgreSQL: $POSTGRES_VERSION" +[ -n "$INSTANCES" ] && log "Instances: $INSTANCES" +echo "" + +# Create results directory +TIMESTAMP=$(date +%Y%m%d_%H%M%S) +RESULTS_BASE="${RESULTS_BASE:-$SCRIPT_DIR/results}" +RESULTS_DIR="$RESULTS_BASE/${PROFILE}_${SERVER_PROFILE}_${TIMESTAMP}" +mkdir -p "$RESULTS_DIR" + +log "Results directory: $RESULTS_DIR" + +# Step 1: Backup original docker-compose if infrastructure switching +COMPOSE_BACKUP="" +if [ -n "$INFRASTRUCTURE" ] || [ -n "$POSTGRES_VERSION" ] || [ -n "$INSTANCES" ]; then + header "šŸ“‹ Step 1: Infrastructure Configuration" + + COMPOSE_FILE="$PROJECT_ROOT/docker-compose.yml" + COMPOSE_BACKUP="$SCRIPT_DIR/docker-compose.backup_${TIMESTAMP}.yml" + + if [ -f "$COMPOSE_FILE" ]; then + cp "$COMPOSE_FILE" "$COMPOSE_BACKUP" + success "Backed up docker-compose.yml to $(basename "$COMPOSE_BACKUP")" + fi + + # Generate new docker-compose + NEW_COMPOSE="$SCRIPT_DIR/docker-compose.perf.yml" + + GEN_ARGS=( + --config "$CONFIG_FILE" + --server-profile "$SERVER_PROFILE" + --output "$NEW_COMPOSE" + ) + + [ -n "$INFRASTRUCTURE" ] && GEN_ARGS+=(--infrastructure "$INFRASTRUCTURE") + [ -n "$POSTGRES_VERSION" ] && GEN_ARGS+=(--postgres-version "$POSTGRES_VERSION") + [ -n "$INSTANCES" ] && GEN_ARGS+=(--instances "$INSTANCES") + + if python3 "$SCRIPT_DIR/utils/generate_docker_compose.py" "${GEN_ARGS[@]}"; then + # Copy to project root + cp "$NEW_COMPOSE" "$COMPOSE_FILE" + success "Applied new docker-compose configuration" + + # Restart services + log "Stopping current services..." + cd "$PROJECT_ROOT" + docker-compose down || true + + log "Starting services with new configuration..." + docker-compose up -d + + # Wait for health checks + log "Waiting for services to be healthy..." + sleep 30 + else + error "Failed to generate docker-compose" + exit 1 + fi +fi + +# Step 2: Apply server profile environment variables +if [ "$SERVER_PROFILE" != "standard" ] || [ -n "$INFRASTRUCTURE" ]; then + header "āš™ļø Step 2: Applying Server Profile" + + # Extract server profile settings from config + WORKERS=$(python3 -c "import yaml; c=yaml.safe_load(open('$CONFIG_FILE')); print(c['server_profiles']['$SERVER_PROFILE'].get('gunicorn_workers', 4))") + THREADS=$(python3 -c "import yaml; c=yaml.safe_load(open('$CONFIG_FILE')); print(c['server_profiles']['$SERVER_PROFILE'].get('gunicorn_threads', 4))") + TIMEOUT=$(python3 -c "import yaml; c=yaml.safe_load(open('$CONFIG_FILE')); print(c['server_profiles']['$SERVER_PROFILE'].get('gunicorn_timeout', 120))") + DB_POOL=$(python3 -c "import yaml; c=yaml.safe_load(open('$CONFIG_FILE')); print(c['server_profiles']['$SERVER_PROFILE'].get('db_pool_size', 20))") + DB_OVERFLOW=$(python3 -c "import yaml; c=yaml.safe_load(open('$CONFIG_FILE')); print(c['server_profiles']['$SERVER_PROFILE'].get('db_pool_max_overflow', 40))") + + info "Workers: $WORKERS, Threads: $THREADS" + info "DB Pool: $DB_POOL (max overflow: $DB_OVERFLOW)" + + # Note: These are already in docker-compose if generated, but we log them + success "Server profile applied via docker-compose" +fi + +# Step 3: Service health checks +if [ "$SKIP_SETUP" = false ]; then + header "šŸ„ Step 3: Service Health Checks" + if bash "$SCRIPT_DIR/utils/check-services.sh"; then + success "All services healthy" + else + error "Services not healthy" + exit 1 + fi +else + warn "Skipping service health checks" +fi + +# Step 4: Authentication +if [ "$SKIP_SETUP" = false ]; then + header "šŸ” Step 4: Authentication Setup" + if bash "$SCRIPT_DIR/utils/setup-auth.sh" > /dev/null 2>&1; then + # shellcheck disable=SC1091 + source "$SCRIPT_DIR/.auth_token" + export MCPGATEWAY_BEARER_TOKEN + success "Authentication configured" + else + error "Failed to setup authentication" + exit 1 + fi +else + warn "Skipping authentication setup" +fi + +# Step 5: Run tests using the original configurable runner +header "🧪 Step 5: Running Performance Tests" + +# Use the original run-configurable.sh for actual test execution +if bash "$SCRIPT_DIR/run-configurable.sh" -p "$PROFILE" --skip-setup; then + success "Tests completed" +else + error "Tests failed" + TEST_FAILED=true +fi + +# Step 6: Save baseline if requested +if [ -n "$SAVE_BASELINE" ]; then + header "šŸ’¾ Step 6: Saving Baseline" + + BASELINE_FILE="$SCRIPT_DIR/baselines/$SAVE_BASELINE" + + # Build metadata + METADATA=$(cat < /dev/null + + # Compare + COMPARISON_FILE="$RESULTS_DIR/comparison_vs_$(basename "$COMPARE_WITH" .json).json" + + if python3 "$SCRIPT_DIR/utils/compare_results.py" \ + "$BASELINE_FILE" \ + "$CURRENT_BASELINE" \ + --output "$COMPARISON_FILE"; then + success "Comparison complete" + + # Check for regressions + VERDICT=$(python3 -c "import json; print(json.load(open('$COMPARISON_FILE'))['verdict'])") + case "$VERDICT" in + recommended) + success "āœ… RECOMMENDED - Significant improvements detected" + ;; + acceptable) + info "āœ“ ACCEPTABLE - No major regressions" + ;; + caution) + warn "āš ļø CAUTION - Some regressions detected" + ;; + not_recommended) + error "āŒ NOT RECOMMENDED - Critical regressions detected" + ;; + esac + fi + + # Cleanup + rm -f "$CURRENT_BASELINE" + fi +fi + +# Step 8: Restore original docker-compose +if [ -n "$COMPOSE_BACKUP" ] && [ "$RESTORE_COMPOSE" = true ]; then + header "ā™»ļø Step 8: Restoring Original Configuration" + + COMPOSE_FILE="$PROJECT_ROOT/docker-compose.yml" + + cp "$COMPOSE_BACKUP" "$COMPOSE_FILE" + success "Restored original docker-compose.yml" + + cd "$PROJECT_ROOT" + log "Restarting services with original configuration..." + docker-compose down || true + docker-compose up -d + + log "Waiting for services..." + sleep 20 + + success "Services restored" +fi + +# Final summary +header "šŸŽ‰ Test Run Complete" +log "Profile: $PROFILE" +log "Server Profile: $SERVER_PROFILE" +[ -n "$INFRASTRUCTURE" ] && log "Infrastructure: $INFRASTRUCTURE" +log "Results: $RESULTS_DIR" +log "Duration: $SECONDS seconds" + +if [ -n "$SAVE_BASELINE" ]; then + log "Baseline saved: baselines/$SAVE_BASELINE" +fi + +if [ -n "$COMPARE_WITH" ]; then + log "Comparison: $RESULTS_DIR/comparison_vs_$(basename "$COMPARE_WITH" .json).json" +fi + +success "All done! āœ…" + +exit 0 diff --git a/tests/performance/run-all.sh b/tests/performance/run-all.sh new file mode 100755 index 000000000..43f05c272 --- /dev/null +++ b/tests/performance/run-all.sh @@ -0,0 +1,271 @@ +#!/usr/bin/env bash +# ============================================================================== +# Comprehensive Performance Test Runner +# Runs all performance benchmarks for MCP Gateway with fast-time-server +# ============================================================================== + +set -Eeuo pipefail + +# Colors for output +RED='\033[0;31m' +GREEN='\033[0;32m' +YELLOW='\033[1;33m' +BLUE='\033[0;34m' +MAGENTA='\033[0;35m' +CYAN='\033[0;36m' +NC='\033[0m' # No Color + +log() { + echo -e "${GREEN}[$(date +'%Y-%m-%d %H:%M:%S')]${NC} $*" +} + +info() { + echo -e "${BLUE}[INFO]${NC} $*" +} + +error() { + echo -e "${RED}[ERROR]${NC} $*" >&2 +} + +warn() { + echo -e "${YELLOW}[WARN]${NC} $*" +} + +success() { + echo -e "${GREEN}[SUCCESS]${NC} $*" +} + +header() { + echo "" + echo -e "${MAGENTA}╔════════════════════════════════════════════════════════════════╗${NC}" + echo -e "${MAGENTA}ā•‘${NC} $1" + echo -e "${MAGENTA}ā•šā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•${NC}" + echo "" +} + +# Get script directory +SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" &>/dev/null && pwd)" +PROJECT_ROOT="$(cd "$SCRIPT_DIR/../.." &>/dev/null && pwd)" + +# Configuration +PROFILE="${PROFILE:-medium}" +GATEWAY_URL="${GATEWAY_URL:-http://localhost:4444}" +SKIP_SETUP="${SKIP_SETUP:-false}" +RUN_TOOLS="${RUN_TOOLS:-true}" +RUN_RESOURCES="${RUN_RESOURCES:-true}" +RUN_PROMPTS="${RUN_PROMPTS:-true}" +GENERATE_REPORT="${GENERATE_REPORT:-true}" + +# Usage +usage() { + cat < Load profile (light, medium, heavy) [default: medium] + -u, --url Gateway URL [default: http://localhost:4444] + --skip-setup Skip service health checks and auth setup + --tools-only Run only tool benchmarks + --resources-only Run only resource benchmarks + --prompts-only Run only prompt benchmarks + --no-report Skip report generation + -h, --help Display this help and exit + +Environment Variables: + PROFILE Load profile (light, medium, heavy) + GATEWAY_URL Gateway URL + SKIP_SETUP Skip setup steps (true/false) + +Examples: + # Run all tests with medium profile + $0 + + # Run with light profile for quick testing + $0 -p light + + # Run only tool benchmarks with heavy load + $0 -p heavy --tools-only + + # Run all tests against a remote gateway + $0 -u https://gateway.example.com + +Before running: + 1. Start the stack: make compose-up + 2. Wait for services to be healthy + 3. Run this script + +EOF + exit 1 +} + +# Parse command-line arguments +while (( "$#" )); do + case "$1" in + -p|--profile) PROFILE="$2"; shift 2 ;; + -u|--url) GATEWAY_URL="$2"; shift 2 ;; + --skip-setup) SKIP_SETUP=true; shift ;; + --tools-only) RUN_TOOLS=true; RUN_RESOURCES=false; RUN_PROMPTS=false; shift ;; + --resources-only) RUN_TOOLS=false; RUN_RESOURCES=true; RUN_PROMPTS=false; shift ;; + --prompts-only) RUN_TOOLS=false; RUN_RESOURCES=false; RUN_PROMPTS=true; shift ;; + --no-report) GENERATE_REPORT=false; shift ;; + -h|--help) usage ;; + *) error "Unknown option: $1"; usage ;; + esac +done + +# Banner +header "šŸš€ MCP Gateway Performance Testing Suite" +log "Profile: $PROFILE" +log "Gateway: $GATEWAY_URL" +log "Project Root: $PROJECT_ROOT" +echo "" + +# Change to project root +cd "$PROJECT_ROOT" + +# Step 1: Check services (unless skipped) +if [ "$SKIP_SETUP" = false ]; then + header "šŸ“‹ Step 1: Checking Service Health" + if ! bash "$SCRIPT_DIR/utils/check-services.sh"; then + error "Services are not healthy. Please run: make compose-up" + exit 1 + fi +else + warn "Skipping service health checks" +fi + +# Step 2: Setup authentication (unless skipped) +if [ "$SKIP_SETUP" = false ]; then + header "šŸ” Step 2: Setting Up Authentication" + if ! bash "$SCRIPT_DIR/utils/setup-auth.sh" > /dev/null; then + error "Failed to setup authentication" + exit 1 + fi + # shellcheck disable=SC1091 + source "$SCRIPT_DIR/.auth_token" + export MCPGATEWAY_BEARER_TOKEN +else + warn "Skipping authentication setup" +fi + +# Export configuration for child scripts +export PROFILE +export GATEWAY_URL + +# Step 3: Run benchmarks +BENCHMARK_START=$(date +%s) +FAILED_TESTS=() + +if [ "$RUN_TOOLS" = true ]; then + header "šŸ”§ Step 3a: Running Tool Invocation Benchmarks" + if bash "$SCRIPT_DIR/scenarios/tools-benchmark.sh"; then + success "Tool benchmarks completed" + else + error "Tool benchmarks failed" + FAILED_TESTS+=("tools") + fi +fi + +if [ "$RUN_RESOURCES" = true ]; then + header "šŸ“ Step 3b: Running Resource Access Benchmarks" + if bash "$SCRIPT_DIR/scenarios/resources-benchmark.sh"; then + success "Resource benchmarks completed" + else + error "Resource benchmarks failed" + FAILED_TESTS+=("resources") + fi +fi + +if [ "$RUN_PROMPTS" = true ]; then + header "šŸ’¬ Step 3c: Running Prompt Execution Benchmarks" + if bash "$SCRIPT_DIR/scenarios/prompts-benchmark.sh"; then + success "Prompt benchmarks completed" + else + error "Prompt benchmarks failed" + FAILED_TESTS+=("prompts") + fi +fi + +BENCHMARK_END=$(date +%s) +TOTAL_TIME=$((BENCHMARK_END - BENCHMARK_START)) + +# Step 4: Generate summary report +if [ "$GENERATE_REPORT" = true ]; then + header "šŸ“Š Step 4: Generating Summary Report" + + RESULTS_DIR="$PROJECT_ROOT/tests/performance/results" + TIMESTAMP=$(date +%Y%m%d_%H%M%S) + SUMMARY_FILE="$RESULTS_DIR/summary_${PROFILE}_${TIMESTAMP}.md" + + cat > "$SUMMARY_FILE" </dev/null | wc -l || echo 0) + RESOURCE_RESULTS=$(find "$RESULTS_DIR" -name "resources_*_${PROFILE}_*.txt" -type f 2>/dev/null | wc -l || echo 0) + PROMPT_RESULTS=$(find "$RESULTS_DIR" -name "prompts_*_${PROFILE}_*.txt" -type f 2>/dev/null | wc -l || echo 0) + + cat >> "$SUMMARY_FILE" <> "$SUMMARY_FILE" + else + for test in "${FAILED_TESTS[@]}"; do + echo "- $test āŒ" >> "$SUMMARY_FILE" + done + fi + + cat >> "$SUMMARY_FILE" <&2 +} + +warn() { + echo -e "${YELLOW}[WARN]${NC} $*" +} + +success() { + echo -e "${GREEN}[SUCCESS]${NC} $*" +} + +header() { + echo "" + echo -e "${MAGENTA}╔════════════════════════════════════════════════════════════════╗${NC}" + echo -e "${MAGENTA}ā•‘${NC} $1" + echo -e "${MAGENTA}ā•šā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•${NC}" + echo "" +} + +# Graceful shutdown handler +cleanup_partial_results() { + log "Received shutdown signal, saving partial results..." + + # Stop monitoring if running + if [ -n "${MONITOR_PID:-}" ]; then + kill "$MONITOR_PID" 2>/dev/null || true + wait "$MONITOR_PID" 2>/dev/null || true + fi + + # Kill any background processes + jobs -p | xargs -r kill 2>/dev/null || true + + # Save summary + if [ -d "${RESULTS_DIR:-}" ]; then + echo "Test interrupted at $(date)" > "$RESULTS_DIR/PARTIAL_RESULTS.txt" + log "Partial results saved to: $RESULTS_DIR" + fi + + # Exit with proper code for SIGINT (130) + exit 130 +} + +# Enable immediate signal handling +trap 'cleanup_partial_results' SIGTERM SIGINT + +# Get script directory +SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" &>/dev/null && pwd)" +PROJECT_ROOT="$(cd "$SCRIPT_DIR/../.." &>/dev/null && pwd)" + +# Configuration +CONFIG_FILE="${CONFIG_FILE:-$SCRIPT_DIR/config.yaml}" +PROFILE="${PROFILE:-medium}" +SKIP_SETUP="${SKIP_SETUP:-false}" +SKIP_WARMUP="${SKIP_WARMUP:-false}" +SKIP_MONITORING="${SKIP_MONITORING:-false}" +SKIP_REPORT="${SKIP_REPORT:-false}" + +# Parse command-line arguments +usage() { + cat < Configuration file [default: config.yaml] + -p, --profile Load profile (smoke, light, medium, heavy, sustained) + --skip-setup Skip service checks and auth setup + --skip-warmup Skip warmup requests + --skip-monitoring Skip system monitoring during tests + --skip-report Skip HTML report generation + --scenario Run only specified scenario + --list-scenarios List available scenarios + -h, --help Display this help + +Environment Variables: + CONFIG_FILE Path to config file + PROFILE Load profile name + SKIP_SETUP Skip setup (true/false) + SKIP_MONITORING Skip monitoring (true/false) + +Examples: + # Run with default configuration + $0 + + # Run light profile with custom config + $0 -p light -c my-config.yaml + + # Run only tools benchmark + $0 --scenario tools_benchmark + + # Quick run without monitoring + $0 -p smoke --skip-monitoring --skip-report + +EOF + exit 1 +} + +RUN_SCENARIO="" + +while (( "$#" )); do + case "$1" in + -c|--config) CONFIG_FILE="$2"; shift 2 ;; + -p|--profile) PROFILE="$2"; shift 2 ;; + --skip-setup) SKIP_SETUP=true; shift ;; + --skip-warmup) SKIP_WARMUP=true; shift ;; + --skip-monitoring) SKIP_MONITORING=true; shift ;; + --skip-report) SKIP_REPORT=true; shift ;; + --scenario) RUN_SCENARIO="$2"; shift 2 ;; + --list-scenarios) + if [ -f "$CONFIG_FILE" ]; then + echo "Available scenarios:" + python3 -c "import yaml; config = yaml.safe_load(open('$CONFIG_FILE')); [print(f' - {name}') for name in config.get('scenarios', {}).keys()]" + else + error "Config file not found: $CONFIG_FILE" + fi + exit 0 + ;; + -h|--help) usage ;; + *) error "Unknown option: $1"; usage ;; + esac +done + +# Check if config file exists +if [ ! -f "$CONFIG_FILE" ]; then + error "Configuration file not found: $CONFIG_FILE" + exit 1 +fi + +# Check for required tools +command -v python3 >/dev/null 2>&1 || { error "python3 is required but not installed"; exit 1; } +command -v hey >/dev/null 2>&1 || { error "hey is required but not installed. Install with: brew install hey"; exit 1; } + +# Install yq for YAML parsing if not available, use Python as fallback +parse_yaml() { + local key=$1 + python3 -c "import yaml, sys; config = yaml.safe_load(open('$CONFIG_FILE')); print(config$key)" 2>/dev/null || echo "" +} + +# Banner +header "šŸš€ Configurable Performance Test Runner" +log "Configuration: $CONFIG_FILE" +log "Profile: $PROFILE" +log "Project Root: $PROJECT_ROOT" +echo "" + +# Create results directory +TIMESTAMP=$(date +%Y%m%d_%H%M%S) +RESULTS_BASE="${RESULTS_BASE:-$SCRIPT_DIR/results}" +RESULTS_DIR="$RESULTS_BASE/${PROFILE}_${TIMESTAMP}" +mkdir -p "$RESULTS_DIR" + +log "Results directory: $RESULTS_DIR" + +# Parse configuration using Python +parse_config() { + python3 < /dev/null 2>&1; then + error "Failed to setup authentication" + exit 1 + fi + # shellcheck disable=SC1091 + source "$SCRIPT_DIR/.auth_token" + export MCPGATEWAY_BEARER_TOKEN + success "Authentication configured" +else + warn "Skipping authentication setup" +fi + +# Step 3: Start monitoring +MONITOR_PID="" +if [ "$SKIP_MONITORING" = false ] && [ "$MONITORING_ENABLED" = "True" ]; then + header "šŸ“Š Step 3: Starting System Monitoring" + + # Start background monitoring + { + while true; do + echo "$(date +%s),$(top -bn1 | grep "Cpu(s)" | awk '{print $2}' | cut -d'%' -f1),$(free | grep Mem | awk '{print ($3/$2) * 100.0}')" >> "$RESULTS_DIR/system_metrics.csv" + + # Docker stats if available + if command -v docker >/dev/null 2>&1; then + docker stats --no-stream --format "{{.Container}},{{.CPUPerc}},{{.MemPerc}}" >> "$RESULTS_DIR/docker_stats.csv" 2>/dev/null + fi + + sleep "${MONITORING_INTERVAL:-5}" + done + } & + MONITOR_PID=$! + success "Monitoring started (PID: $MONITOR_PID)" +else + info "Monitoring disabled" +fi + +# Step 4: Warmup +if [ "$SKIP_WARMUP" = false ]; then + header "šŸ”„ Step 4: Warmup" + log "Sending 100 warmup requests..." + + hey -n 100 -c 10 -m GET "$GATEWAY_URL/health" >/dev/null 2>&1 || true + + success "Warmup complete" + sleep 5 +else + warn "Skipping warmup" +fi + +# Step 5: Run test scenarios +header "🧪 Step 5: Running Test Scenarios" + +run_test() { + local test_name=$1 + local payload_file=$2 + local endpoint=$3 + local method=${4:-POST} + + local full_endpoint="${GATEWAY_URL}${endpoint}" + + log "Running: $test_name" + + local output_file="$RESULTS_DIR/${test_name}_${PROFILE}_${TIMESTAMP}.txt" + + local hey_cmd=(hey -n "$REQUESTS" -c "$CONCURRENCY" -t "$TIMEOUT" -m "$method") + + if [ "$method" = "POST" ] && [ -n "$payload_file" ] && [ -f "$SCRIPT_DIR/$payload_file" ]; then + hey_cmd+=(-T "application/json" -D "$SCRIPT_DIR/$payload_file") + fi + + if [ -n "${MCPGATEWAY_BEARER_TOKEN:-}" ]; then + hey_cmd+=(-H "Authorization: Bearer $MCPGATEWAY_BEARER_TOKEN") + fi + + hey_cmd+=("$full_endpoint") + + # Run test + if "${hey_cmd[@]}" > "$output_file" 2>&1; then + # Extract key metrics for quick summary + local rps=$(grep "Requests/sec:" "$output_file" | awk '{print $2}') + local p95=$(grep "95%" "$output_file" | awk '{print $4}' | head -1) + info " → RPS: $rps, p95: $p95" + return 0 + else + error " → Test failed" + return 1 + fi +} + +# Parse scenarios from config and run them +FAILED_TESTS=() + +python3 </dev/null || true + success "Monitoring stopped" +fi + +# Step 7: Collect additional metrics +header "šŸ“ˆ Step 7: Collecting Metrics" + +# Save Prometheus metrics if available +if curl -sf "$GATEWAY_URL/metrics" > "$RESULTS_DIR/prometheus_metrics.txt" 2>/dev/null; then + success "Prometheus metrics collected" +fi + +# Save application logs +if command -v docker >/dev/null 2>&1; then + docker logs gateway --tail 1000 > "$RESULTS_DIR/gateway_logs.txt" 2>&1 || true + success "Application logs collected" +fi + +# Step 8: Generate HTML report +if [ "$SKIP_REPORT" = false ] && [ "$REPORTING_ENABLED" = "True" ]; then + header "šŸ“„ Step 8: Generating HTML Report" + + REPORT_FILE="$SCRIPT_DIR/reports/performance_report_${PROFILE}_${TIMESTAMP}.html" + + if python3 "$SCRIPT_DIR/utils/report_generator.py" \ + --results-dir "$RESULTS_DIR" \ + --output "$REPORT_FILE" \ + --config "$CONFIG_FILE" \ + --profile "$PROFILE"; then + + success "Report generated: $REPORT_FILE" + + # Try to open in browser (optional) + if command -v xdg-open >/dev/null 2>&1; then + info "Opening report in browser..." + xdg-open "$REPORT_FILE" 2>/dev/null || true + elif command -v open >/dev/null 2>&1; then + info "Opening report in browser..." + open "$REPORT_FILE" 2>/dev/null || true + fi + else + error "Failed to generate report" + fi +else + info "Report generation disabled" +fi + +# Final summary +header "šŸŽ‰ Test Run Complete" +log "Profile: $PROFILE" +log "Results: $RESULTS_DIR" +log "Duration: $SECONDS seconds" + +if [ ${#FAILED_TESTS[@]} -eq 0 ]; then + success "All tests completed successfully! āœ…" + exit 0 +else + error "Some tests failed: ${FAILED_TESTS[*]}" + exit 1 +fi diff --git a/tests/performance/scenarios/database-benchmark.sh b/tests/performance/scenarios/database-benchmark.sh new file mode 100755 index 000000000..3890bcdb2 --- /dev/null +++ b/tests/performance/scenarios/database-benchmark.sh @@ -0,0 +1,217 @@ +#!/usr/bin/env bash +# ============================================================================== +# Database Connection Pool Performance Testing +# Tests database connection pool behavior under load +# ============================================================================== + +set -Eeuo pipefail + +# Colors +RED='\033[0;31m' +GREEN='\033[0;32m' +YELLOW='\033[1;33m' +BLUE='\033[0;34m' +NC='\033[0m' + +log() { echo -e "${GREEN}[$(date +'%Y-%m-%d %H:%M:%S')]${NC} $*"; } +info() { echo -e "${BLUE}[INFO]${NC} $*"; } +error() { echo -e "${RED}[ERROR]${NC} $*" >&2; } + +# Script directory +SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" &>/dev/null && pwd)" +PROJECT_ROOT="$(cd "$SCRIPT_DIR/../../.." &>/dev/null && pwd)" + +# Configuration +GATEWAY_URL="${GATEWAY_URL:-http://localhost:4444}" +PROFILE="${PROFILE:-medium}" +RESULTS_DIR="${RESULTS_DIR:-$PROJECT_ROOT/tests/performance/results}" +TIMESTAMP=$(date +%Y%m%d_%H%M%S) + +# Load profile +PROFILE_FILE="$PROJECT_ROOT/tests/performance/profiles/$PROFILE.env" +if [ -f "$PROFILE_FILE" ]; then + # shellcheck disable=SC1090 + source "$PROFILE_FILE" +fi + +REQUESTS="${REQUESTS:-1000}" +TIMEOUT="${TIMEOUT:-60}" + +# Create results directory +mkdir -p "$RESULTS_DIR" + +# Load auth token +if [ -f "$PROJECT_ROOT/tests/performance/.auth_token" ]; then + # shellcheck disable=SC1091 + source "$PROJECT_ROOT/tests/performance/.auth_token" +fi + +AUTH_HEADER="" +if [ -n "${MCPGATEWAY_BEARER_TOKEN:-}" ]; then + AUTH_HEADER="Authorization: Bearer $MCPGATEWAY_BEARER_TOKEN" + info "Using authentication token" +fi + +# Check hey is installed +if ! command -v hey &>/dev/null; then + error "hey is not installed" + exit 1 +fi + +log "šŸ—„ļø Database Connection Pool Performance Test" +log "Profile: $PROFILE" +log "Gateway: $GATEWAY_URL" +echo "" + +# Test 1: Connection pool stress - increasing concurrency +log "════════════════════════════════════════════════════════" +log "Test 1: Connection Pool Stress (Increasing Concurrency)" +log "════════════════════════════════════════════════════════" + +for concurrency in 10 25 50 100 150 200; do + log "Testing with $concurrency concurrent connections..." + + output_file="$RESULTS_DIR/db_pool_stress_${concurrency}_${PROFILE}_${TIMESTAMP}.txt" + + hey_cmd=( + hey + -n "$REQUESTS" + -c "$concurrency" + -m POST + -T "application/json" + -D "$PROJECT_ROOT/tests/performance/payloads/tools/list_tools.json" + -t "$TIMEOUT" + ) + + if [ -n "$AUTH_HEADER" ]; then + hey_cmd+=(-H "$AUTH_HEADER") + fi + + hey_cmd+=("$GATEWAY_URL/rpc") + + "${hey_cmd[@]}" 2>&1 | tee "$output_file" + + # Check error rate + error_count=$(grep "Error" "$output_file" 2>/dev/null | wc -l || echo 0) + if [ "$error_count" -gt 0 ] 2>/dev/null; then + error "āš ļø Detected $error_count errors at concurrency $concurrency" + error "Possible connection pool exhaustion" + else + log "āœ… No errors at concurrency $concurrency" + fi + + # Cool down between tests + sleep 5 +done + +echo "" + +# Test 2: Sustained load - long duration +log "════════════════════════════════════════════════════════" +log "Test 2: Sustained Load (Connection Pool Stability)" +log "════════════════════════════════════════════════════════" + +log "Running sustained test for 60 seconds at 50 concurrent connections..." + +output_file="$RESULTS_DIR/db_sustained_load_${PROFILE}_${TIMESTAMP}.txt" + +hey_cmd=( + hey + -z 60s + -c 50 + -m POST + -T "application/json" + -D "$PROJECT_ROOT/tests/performance/payloads/tools/list_tools.json" + -t "$TIMEOUT" +) + +if [ -n "$AUTH_HEADER" ]; then + hey_cmd+=(-H "$AUTH_HEADER") +fi + +hey_cmd+=("$GATEWAY_URL/rpc") + +"${hey_cmd[@]}" 2>&1 | tee "$output_file" + +echo "" + +# Test 3: Burst load - connection acquisition speed +log "════════════════════════════════════════════════════════" +log "Test 3: Burst Load (Connection Acquisition Speed)" +log "════════════════════════════════════════════════════════" + +for burst_size in 100 500 1000; do + log "Testing burst of $burst_size requests with high concurrency..." + + output_file="$RESULTS_DIR/db_burst_${burst_size}_${PROFILE}_${TIMESTAMP}.txt" + + hey_cmd=( + hey + -n "$burst_size" + -c 100 + -m POST + -T "application/json" + -D "$PROJECT_ROOT/tests/performance/payloads/tools/list_tools.json" + -t "$TIMEOUT" + ) + + if [ -n "$AUTH_HEADER" ]; then + hey_cmd+=(-H "$AUTH_HEADER") + fi + + hey_cmd+=("$GATEWAY_URL/rpc") + + "${hey_cmd[@]}" 2>&1 | tee "$output_file" + + sleep 3 +done + +echo "" + +# Test 4: Connection pool recovery - test after overload +log "════════════════════════════════════════════════════════" +log "Test 4: Connection Pool Recovery" +log "════════════════════════════════════════════════════════" + +log "Step 1: Overload the connection pool..." +hey -n 2000 -c 300 -m POST -T "application/json" \ + -D "$PROJECT_ROOT/tests/performance/payloads/tools/list_tools.json" \ + $([ -n "$AUTH_HEADER" ] && echo "-H \"$AUTH_HEADER\"") \ + -t "$TIMEOUT" \ + "$GATEWAY_URL/rpc" > /dev/null 2>&1 || true + +log "Step 2: Wait for recovery (10 seconds)..." +sleep 10 + +log "Step 3: Test normal load after recovery..." +output_file="$RESULTS_DIR/db_recovery_test_${PROFILE}_${TIMESTAMP}.txt" + +hey_cmd=( + hey + -n 500 + -c 25 + -m POST + -T "application/json" + -D "$PROJECT_ROOT/tests/performance/payloads/tools/list_tools.json" + -t "$TIMEOUT" +) + +if [ -n "$AUTH_HEADER" ]; then + hey_cmd+=(-H "$AUTH_HEADER") +fi + +hey_cmd+=("$GATEWAY_URL/rpc") + +"${hey_cmd[@]}" 2>&1 | tee "$output_file" + +# Check recovery +error_count=$(grep "Error" "$output_file" 2>/dev/null | wc -l || echo 0) +if [ "$error_count" -eq 0 ] 2>/dev/null; then + log "āœ… Connection pool recovered successfully" +else + error "āš ļø Connection pool recovery issues detected" +fi + +echo "" +log "āœ… Database benchmark completed" +log "Results directory: $RESULTS_DIR" diff --git a/tests/performance/scenarios/gateway-core-benchmark.sh b/tests/performance/scenarios/gateway-core-benchmark.sh new file mode 100755 index 000000000..cbe4278eb --- /dev/null +++ b/tests/performance/scenarios/gateway-core-benchmark.sh @@ -0,0 +1,268 @@ +#!/usr/bin/env bash +# ============================================================================== +# Gateway Core Performance Testing +# Tests gateway internals without MCP server dependencies +# ============================================================================== + +set -Eeuo pipefail + +# Colors +RED='\033[0;31m' +GREEN='\033[0;32m' +YELLOW='\033[1;33m' +BLUE='\033[0;34m' +NC='\033[0m' + +log() { echo -e "${GREEN}[$(date +'%Y-%m-%d %H:%M:%S')]${NC} $*"; } +info() { echo -e "${BLUE}[INFO]${NC} $*"; } +error() { echo -e "${RED}[ERROR]${NC} $*" >&2; } + +# Script directory +SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" &>/dev/null && pwd)" +PROJECT_ROOT="$(cd "$SCRIPT_DIR/../../.." &>/dev/null && pwd)" + +# Configuration +GATEWAY_URL="${GATEWAY_URL:-http://localhost:4444}" +PROFILE="${PROFILE:-medium}" +RESULTS_DIR="${RESULTS_DIR:-$PROJECT_ROOT/tests/performance/results}" +TIMESTAMP=$(date +%Y%m%d_%H%M%S) + +# Load profile +PROFILE_FILE="$PROJECT_ROOT/tests/performance/profiles/$PROFILE.env" +if [ -f "$PROFILE_FILE" ]; then + # shellcheck disable=SC1090 + source "$PROFILE_FILE" +fi + +REQUESTS="${REQUESTS:-10000}" +CONCURRENCY="${CONCURRENCY:-50}" +TIMEOUT="${TIMEOUT:-60}" + +# Create results directory +mkdir -p "$RESULTS_DIR" + +# Load auth token +if [ -f "$PROJECT_ROOT/tests/performance/.auth_token" ]; then + # shellcheck disable=SC1091 + source "$PROJECT_ROOT/tests/performance/.auth_token" +fi + +AUTH_HEADER="" +if [ -n "${MCPGATEWAY_BEARER_TOKEN:-}" ]; then + AUTH_HEADER="Authorization: Bearer $MCPGATEWAY_BEARER_TOKEN" + info "Using authentication token" +fi + +# Check hey is installed +if ! command -v hey &>/dev/null; then + error "hey is not installed" + exit 1 +fi + +log "šŸ”§ Gateway Core Performance Test" +log "Profile: $PROFILE" +log "Requests: $REQUESTS" +log "Concurrency: $CONCURRENCY" +log "Gateway: $GATEWAY_URL" +echo "" + +# Test 1: Health endpoint (unauthenticated) +log "════════════════════════════════════════════════════════" +log "Test 1: Health Check Endpoint (Unauthenticated)" +log "════════════════════════════════════════════════════════" + +output_file="$RESULTS_DIR/gateway_health_unauth_${PROFILE}_${TIMESTAMP}.txt" + +hey -n "$REQUESTS" -c "$CONCURRENCY" -t "$TIMEOUT" \ + "$GATEWAY_URL/health" 2>&1 | tee "$output_file" + +echo "" + +# Test 2: Health endpoint (authenticated) +log "════════════════════════════════════════════════════════" +log "Test 2: Health Check Endpoint (Authenticated)" +log "════════════════════════════════════════════════════════" + +output_file="$RESULTS_DIR/gateway_health_auth_${PROFILE}_${TIMESTAMP}.txt" + +hey_cmd=(hey -n "$REQUESTS" -c "$CONCURRENCY" -t "$TIMEOUT") +if [ -n "$AUTH_HEADER" ]; then + hey_cmd+=(-H "$AUTH_HEADER") +fi +hey_cmd+=("$GATEWAY_URL/health") + +"${hey_cmd[@]}" 2>&1 | tee "$output_file" + +echo "" + +# Test 3: Admin API - List Tools +log "════════════════════════════════════════════════════════" +log "Test 3: Admin API - List Tools (Registry Performance)" +log "════════════════════════════════════════════════════════" + +output_file="$RESULTS_DIR/gateway_admin_list_tools_${PROFILE}_${TIMESTAMP}.txt" + +hey_cmd=(hey -n "$REQUESTS" -c "$CONCURRENCY" -t "$TIMEOUT") +if [ -n "$AUTH_HEADER" ]; then + hey_cmd+=(-H "$AUTH_HEADER") +fi +hey_cmd+=("$GATEWAY_URL/tools") + +"${hey_cmd[@]}" 2>&1 | tee "$output_file" + +echo "" + +# Test 4: Admin API - List Servers +log "════════════════════════════════════════════════════════" +log "Test 4: Admin API - List Servers" +log "════════════════════════════════════════════════════════" + +output_file="$RESULTS_DIR/gateway_admin_list_servers_${PROFILE}_${TIMESTAMP}.txt" + +hey_cmd=(hey -n "$REQUESTS" -c "$CONCURRENCY" -t "$TIMEOUT") +if [ -n "$AUTH_HEADER" ]; then + hey_cmd+=(-H "$AUTH_HEADER") +fi +hey_cmd+=("$GATEWAY_URL/servers") + +"${hey_cmd[@]}" 2>&1 | tee "$output_file" + +echo "" + +# Test 5: Admin API - List Gateways (Federation) +log "════════════════════════════════════════════════════════" +log "Test 5: Admin API - List Gateways (Federation Discovery)" +log "════════════════════════════════════════════════════════" + +output_file="$RESULTS_DIR/gateway_admin_list_gateways_${PROFILE}_${TIMESTAMP}.txt" + +hey_cmd=(hey -n "$REQUESTS" -c "$CONCURRENCY" -t "$TIMEOUT") +if [ -n "$AUTH_HEADER" ]; then + hey_cmd+=(-H "$AUTH_HEADER") +fi +hey_cmd+=("$GATEWAY_URL/gateways") + +"${hey_cmd[@]}" 2>&1 | tee "$output_file" + +echo "" + +# Test 6: Metrics endpoint +log "════════════════════════════════════════════════════════" +log "Test 6: Prometheus Metrics Endpoint" +log "════════════════════════════════════════════════════════" + +output_file="$RESULTS_DIR/gateway_metrics_${PROFILE}_${TIMESTAMP}.txt" + +hey -n 1000 -c 10 -t "$TIMEOUT" \ + "$GATEWAY_URL/metrics" 2>&1 | tee "$output_file" + +echo "" + +# Test 7: OpenAPI spec +log "════════════════════════════════════════════════════════" +log "Test 7: OpenAPI Specification Endpoint" +log "════════════════════════════════════════════════════════" + +output_file="$RESULTS_DIR/gateway_openapi_${PROFILE}_${TIMESTAMP}.txt" + +hey -n 1000 -c 10 -t "$TIMEOUT" \ + "$GATEWAY_URL/openapi.json" 2>&1 | tee "$output_file" + +echo "" + +# Test 8: Static file serving (if admin UI enabled) +log "════════════════════════════════════════════════════════" +log "Test 8: Admin UI Static Files" +log "════════════════════════════════════════════════════════" + +output_file="$RESULTS_DIR/gateway_admin_ui_${PROFILE}_${TIMESTAMP}.txt" + +hey -n 5000 -c 25 -t "$TIMEOUT" \ + "$GATEWAY_URL/admin" 2>&1 | tee "$output_file" + +echo "" + +# Test 9: Authentication endpoint +log "════════════════════════════════════════════════════════" +log "Test 9: Token Generation (Login Performance)" +log "════════════════════════════════════════════════════════" + +output_file="$RESULTS_DIR/gateway_token_generation_${PROFILE}_${TIMESTAMP}.txt" + +# Create login payload +LOGIN_PAYLOAD=$(cat < /tmp/login_payload.json + +hey -n 1000 -c 10 -t "$TIMEOUT" \ + -m POST \ + -T "application/json" \ + -D /tmp/login_payload.json \ + "$GATEWAY_URL/token" 2>&1 | tee "$output_file" || log "Token endpoint might not exist" + +rm -f /tmp/login_payload.json + +echo "" + +# Test 10: Rate limiting behavior +log "════════════════════════════════════════════════════════" +log "Test 10: Rate Limiting Test" +log "════════════════════════════════════════════════════════" + +output_file="$RESULTS_DIR/gateway_rate_limiting_${PROFILE}_${TIMESTAMP}.txt" + +log "Sending rapid burst to test rate limiting..." +hey -n 5000 -c 100 -t 10 \ + "$GATEWAY_URL/health" 2>&1 | tee "$output_file" + +# Check for 429 responses +rate_limit_hits=$(grep "429" "$output_file" 2>/dev/null | wc -l || echo 0) +if [ "$rate_limit_hits" -gt 0 ] 2>/dev/null; then + log "āœ… Rate limiting working - $rate_limit_hits requests throttled" +else + info "ā„¹ļø No rate limiting detected (may not be configured)" +fi + +echo "" + +# Test 11: Error handling - invalid JSON +log "════════════════════════════════════════════════════════" +log "Test 11: Error Handling - Invalid JSON" +log "════════════════════════════════════════════════════════" + +output_file="$RESULTS_DIR/gateway_error_handling_${PROFILE}_${TIMESTAMP}.txt" + +echo "invalid json{" > /tmp/invalid.json + +hey_cmd=( + hey -n 100 -c 5 -t "$TIMEOUT" + -m POST + -T "application/json" + -D /tmp/invalid.json +) + +if [ -n "$AUTH_HEADER" ]; then + hey_cmd+=(-H "$AUTH_HEADER") +fi + +hey_cmd+=("$GATEWAY_URL/rpc") + +"${hey_cmd[@]}" 2>&1 | tee "$output_file" || true + +rm -f /tmp/invalid.json + +# Check for proper 400 responses +status_400=$(grep "400" "$output_file" 2>/dev/null | wc -l || echo 0) +if [ "$status_400" -gt 0 ] 2>/dev/null; then + log "āœ… Proper error handling - $status_400 Ɨ 400 Bad Request" +fi + +echo "" +log "āœ… Gateway core benchmark completed" +log "Results directory: $RESULTS_DIR" diff --git a/tests/performance/scenarios/mixed-workload.sh b/tests/performance/scenarios/mixed-workload.sh new file mode 100755 index 000000000..bca461786 --- /dev/null +++ b/tests/performance/scenarios/mixed-workload.sh @@ -0,0 +1,152 @@ +#!/usr/bin/env bash +# ============================================================================== +# Mixed Workload Performance Benchmark +# Tests realistic mixed workload patterns (tools + resources + prompts) +# ============================================================================== + +set -Eeuo pipefail + +# Colors for output +RED='\033[0;31m' +GREEN='\033[0;32m' +YELLOW='\033[1;33m' +BLUE='\033[0;34m' +NC='\033[0m' # No Color + +log() { + echo -e "${GREEN}[$(date +'%Y-%m-%d %H:%M:%S')]${NC} $*" +} + +info() { + echo -e "${BLUE}[INFO]${NC} $*" +} + +error() { + echo -e "${RED}[ERROR]${NC} $*" >&2 +} + +# Get script directory +SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" &>/dev/null && pwd)" +PROJECT_ROOT="$(cd "$SCRIPT_DIR/../../.." &>/dev/null && pwd)" + +# Configuration +GATEWAY_URL="${GATEWAY_URL:-http://localhost:4444}" +PROFILE="${PROFILE:-medium}" +RESULTS_DIR="$PROJECT_ROOT/tests/performance/results" +TIMESTAMP=$(date +%Y%m%d_%H%M%S) + +# Load profile +PROFILE_FILE="$PROJECT_ROOT/tests/performance/profiles/$PROFILE.env" +if [ ! -f "$PROFILE_FILE" ]; then + error "Profile $PROFILE not found at $PROFILE_FILE" + exit 1 +fi + +# shellcheck disable=SC1090 +source "$PROFILE_FILE" + +log "šŸ”§ Mixed Workload Performance Benchmark" +log "Profile: $PROFILE" +log "Requests per test: $REQUESTS" +log "Concurrency: $CONCURRENCY" +log "Gateway: $GATEWAY_URL" + +# Create results directory +mkdir -p "$RESULTS_DIR" + +# Load auth token if available +if [ -f "$PROJECT_ROOT/tests/performance/.auth_token" ]; then + # shellcheck disable=SC1091 + source "$PROJECT_ROOT/tests/performance/.auth_token" +fi + +AUTH_HEADER="" +if [ -n "${MCPGATEWAY_BEARER_TOKEN:-}" ]; then + AUTH_HEADER="Authorization: Bearer $MCPGATEWAY_BEARER_TOKEN" + info "Using authentication token" +fi + +# Check if hey is installed +if ! command -v hey &>/dev/null; then + error "hey is not installed. Install it with: brew install hey (macOS) or go install github.com/rakyll/hey@latest" + exit 1 +fi + +# Array to store background process IDs +declare -a PIDS=() + +run_concurrent_test() { + local test_name=$1 + local payload_file=$2 + local endpoint="${3:-$GATEWAY_URL/rpc}" + + log "Starting concurrent test: $test_name" + + local output_file="$RESULTS_DIR/mixed_${test_name}_${PROFILE}_${TIMESTAMP}.txt" + + local hey_cmd=( + hey + -n "$REQUESTS" + -c "$CONCURRENCY" + -m POST + -T "application/json" + -D "$payload_file" + -t "$TIMEOUT" + ) + + if [ -n "$AUTH_HEADER" ]; then + hey_cmd+=(-H "$AUTH_HEADER") + fi + + hey_cmd+=("$endpoint") + + # Run in background and capture PID + "${hey_cmd[@]}" > "$output_file" 2>&1 & + PIDS+=($!) + + info "Started background test $test_name (PID: ${PIDS[-1]})" +} + +log "════════════════════════════════════════════════════════" +log "Mixed Workload Test - Running All Tests Concurrently" +log "════════════════════════════════════════════════════════" + +# Start all tests concurrently to simulate realistic mixed load +run_concurrent_test "list_tools" \ + "$PROJECT_ROOT/tests/performance/payloads/tools/list_tools.json" + +run_concurrent_test "get_system_time" \ + "$PROJECT_ROOT/tests/performance/payloads/tools/get_system_time.json" + +run_concurrent_test "convert_time" \ + "$PROJECT_ROOT/tests/performance/payloads/tools/convert_time.json" + +run_concurrent_test "list_resources" \ + "$PROJECT_ROOT/tests/performance/payloads/resources/list_resources.json" + +run_concurrent_test "read_timezone_info" \ + "$PROJECT_ROOT/tests/performance/payloads/resources/read_timezone_info.json" + +run_concurrent_test "list_prompts" \ + "$PROJECT_ROOT/tests/performance/payloads/prompts/list_prompts.json" + +# Wait for all background jobs to complete +log "Waiting for all concurrent tests to complete..." +FAILED=0 +for pid in "${PIDS[@]}"; do + if wait "$pid"; then + info "Process $pid completed successfully" + else + error "Process $pid failed" + FAILED=$((FAILED + 1)) + fi +done + +if [ $FAILED -eq 0 ]; then + log "āœ… Mixed workload benchmark completed successfully" + log "Results directory: $RESULTS_DIR" + exit 0 +else + error "āŒ Mixed workload benchmark completed with $FAILED failures" + exit 1 +fi diff --git a/tests/performance/scenarios/prompts-benchmark.sh b/tests/performance/scenarios/prompts-benchmark.sh new file mode 100755 index 000000000..1d5fb5416 --- /dev/null +++ b/tests/performance/scenarios/prompts-benchmark.sh @@ -0,0 +1,134 @@ +#!/usr/bin/env bash +# ============================================================================== +# Prompt Execution Performance Benchmark +# Tests MCP prompt execution performance through the gateway +# ============================================================================== + +set -Eeuo pipefail + +# Colors for output +RED='\033[0;31m' +GREEN='\033[0;32m' +YELLOW='\033[1;33m' +BLUE='\033[0;34m' +NC='\033[0m' # No Color + +log() { + echo -e "${GREEN}[$(date +'%Y-%m-%d %H:%M:%S')]${NC} $*" +} + +info() { + echo -e "${BLUE}[INFO]${NC} $*" +} + +error() { + echo -e "${RED}[ERROR]${NC} $*" >&2 +} + +# Get script directory +SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" &>/dev/null && pwd)" +PROJECT_ROOT="$(cd "$SCRIPT_DIR/../../.." &>/dev/null && pwd)" + +# Configuration +GATEWAY_URL="${GATEWAY_URL:-http://localhost:4444}" +PROFILE="${PROFILE:-medium}" +RESULTS_DIR="$PROJECT_ROOT/tests/performance/results" +TIMESTAMP=$(date +%Y%m%d_%H%M%S) + +# Load profile +PROFILE_FILE="$PROJECT_ROOT/tests/performance/profiles/$PROFILE.env" +if [ ! -f "$PROFILE_FILE" ]; then + error "Profile $PROFILE not found at $PROFILE_FILE" + exit 1 +fi + +# shellcheck disable=SC1090 +source "$PROFILE_FILE" + +log "šŸ”§ Prompt Execution Performance Benchmark" +log "Profile: $PROFILE" +log "Requests: $REQUESTS" +log "Concurrency: $CONCURRENCY" +log "Gateway: $GATEWAY_URL" + +# Create results directory +mkdir -p "$RESULTS_DIR" + +# Load auth token if available +if [ -f "$PROJECT_ROOT/tests/performance/.auth_token" ]; then + # shellcheck disable=SC1091 + source "$PROJECT_ROOT/tests/performance/.auth_token" +fi + +AUTH_HEADER="" +if [ -n "${MCPGATEWAY_BEARER_TOKEN:-}" ]; then + AUTH_HEADER="Authorization: Bearer $MCPGATEWAY_BEARER_TOKEN" + info "Using authentication token" +fi + +# Check if hey is installed +if ! command -v hey &>/dev/null; then + error "hey is not installed. Install it with: brew install hey (macOS) or go install github.com/rakyll/hey@latest" + exit 1 +fi + +run_test() { + local test_name=$1 + local payload_file=$2 + local endpoint="${3:-$GATEWAY_URL/rpc}" + + log "Running test: $test_name" + + local output_file="$RESULTS_DIR/prompts_${test_name}_${PROFILE}_${TIMESTAMP}.txt" + + local hey_cmd=( + hey + -n "$REQUESTS" + -c "$CONCURRENCY" + -m POST + -T "application/json" + -D "$payload_file" + -t "$TIMEOUT" + ) + + if [ -n "$AUTH_HEADER" ]; then + hey_cmd+=(-H "$AUTH_HEADER") + fi + + hey_cmd+=("$endpoint") + + info "Command: ${hey_cmd[*]}" + + # Run and save results + "${hey_cmd[@]}" 2>&1 | tee "$output_file" + + log "Results saved to: $output_file" + echo "" +} + +# Test 1: List prompts (discovery) +log "════════════════════════════════════════════════════════" +log "Test 1: List Prompts (Discovery)" +log "════════════════════════════════════════════════════════" +run_test "list_prompts" \ + "$PROJECT_ROOT/tests/performance/payloads/prompts/list_prompts.json" \ + "$GATEWAY_URL/rpc" + +# Test 2: Get compare timezones prompt (prompt with arguments) +log "════════════════════════════════════════════════════════" +log "Test 2: Get Compare Timezones Prompt" +log "════════════════════════════════════════════════════════" +run_test "get_compare_timezones" \ + "$PROJECT_ROOT/tests/performance/payloads/prompts/get_compare_timezones.json" \ + "$GATEWAY_URL/rpc" + +# Test 3: Get customer greeting prompt (template with required and optional arguments) +log "════════════════════════════════════════════════════════" +log "Test 3: Get Customer Greeting Prompt (Template Arguments)" +log "════════════════════════════════════════════════════════" +run_test "get_customer_greeting" \ + "$PROJECT_ROOT/tests/performance/payloads/prompts/get_customer_greeting.json" \ + "$GATEWAY_URL/rpc" + +log "āœ… Prompt benchmark completed successfully" +log "Results directory: $RESULTS_DIR" diff --git a/tests/performance/scenarios/resources-benchmark.sh b/tests/performance/scenarios/resources-benchmark.sh new file mode 100755 index 000000000..a7b0ad76e --- /dev/null +++ b/tests/performance/scenarios/resources-benchmark.sh @@ -0,0 +1,134 @@ +#!/usr/bin/env bash +# ============================================================================== +# Resource Access Performance Benchmark +# Tests MCP resource access performance through the gateway +# ============================================================================== + +set -Eeuo pipefail + +# Colors for output +RED='\033[0;31m' +GREEN='\033[0;32m' +YELLOW='\033[1;33m' +BLUE='\033[0;34m' +NC='\033[0m' # No Color + +log() { + echo -e "${GREEN}[$(date +'%Y-%m-%d %H:%M:%S')]${NC} $*" +} + +info() { + echo -e "${BLUE}[INFO]${NC} $*" +} + +error() { + echo -e "${RED}[ERROR]${NC} $*" >&2 +} + +# Get script directory +SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" &>/dev/null && pwd)" +PROJECT_ROOT="$(cd "$SCRIPT_DIR/../../.." &>/dev/null && pwd)" + +# Configuration +GATEWAY_URL="${GATEWAY_URL:-http://localhost:4444}" +PROFILE="${PROFILE:-medium}" +RESULTS_DIR="$PROJECT_ROOT/tests/performance/results" +TIMESTAMP=$(date +%Y%m%d_%H%M%S) + +# Load profile +PROFILE_FILE="$PROJECT_ROOT/tests/performance/profiles/$PROFILE.env" +if [ ! -f "$PROFILE_FILE" ]; then + error "Profile $PROFILE not found at $PROFILE_FILE" + exit 1 +fi + +# shellcheck disable=SC1090 +source "$PROFILE_FILE" + +log "šŸ”§ Resource Access Performance Benchmark" +log "Profile: $PROFILE" +log "Requests: $REQUESTS" +log "Concurrency: $CONCURRENCY" +log "Gateway: $GATEWAY_URL" + +# Create results directory +mkdir -p "$RESULTS_DIR" + +# Load auth token if available +if [ -f "$PROJECT_ROOT/tests/performance/.auth_token" ]; then + # shellcheck disable=SC1091 + source "$PROJECT_ROOT/tests/performance/.auth_token" +fi + +AUTH_HEADER="" +if [ -n "${MCPGATEWAY_BEARER_TOKEN:-}" ]; then + AUTH_HEADER="Authorization: Bearer $MCPGATEWAY_BEARER_TOKEN" + info "Using authentication token" +fi + +# Check if hey is installed +if ! command -v hey &>/dev/null; then + error "hey is not installed. Install it with: brew install hey (macOS) or go install github.com/rakyll/hey@latest" + exit 1 +fi + +run_test() { + local test_name=$1 + local payload_file=$2 + local endpoint="${3:-$GATEWAY_URL/rpc}" + + log "Running test: $test_name" + + local output_file="$RESULTS_DIR/resources_${test_name}_${PROFILE}_${TIMESTAMP}.txt" + + local hey_cmd=( + hey + -n "$REQUESTS" + -c "$CONCURRENCY" + -m POST + -T "application/json" + -D "$payload_file" + -t "$TIMEOUT" + ) + + if [ -n "$AUTH_HEADER" ]; then + hey_cmd+=(-H "$AUTH_HEADER") + fi + + hey_cmd+=("$endpoint") + + info "Command: ${hey_cmd[*]}" + + # Run and save results + "${hey_cmd[@]}" 2>&1 | tee "$output_file" + + log "Results saved to: $output_file" + echo "" +} + +# Test 1: List resources (discovery) +log "════════════════════════════════════════════════════════" +log "Test 1: List Resources (Discovery)" +log "════════════════════════════════════════════════════════" +run_test "list_resources" \ + "$PROJECT_ROOT/tests/performance/payloads/resources/list_resources.json" \ + "$GATEWAY_URL/rpc" + +# Test 2: Read welcome message (text resource) +log "════════════════════════════════════════════════════════" +log "Test 2: Read Welcome Message (Text Resource)" +log "════════════════════════════════════════════════════════" +run_test "read_timezone_info" \ + "$PROJECT_ROOT/tests/performance/payloads/resources/read_timezone_info.json" \ + "$GATEWAY_URL/rpc" + +# Test 3: Read API documentation (markdown resource) +log "════════════════════════════════════════════════════════" +log "Test 3: Read API Documentation (Markdown Resource)" +log "════════════════════════════════════════════════════════" +run_test "read_world_times" \ + "$PROJECT_ROOT/tests/performance/payloads/resources/read_world_times.json" \ + "$GATEWAY_URL/rpc" + +log "āœ… Resource benchmark completed successfully" +log "Results directory: $RESULTS_DIR" diff --git a/tests/performance/scenarios/tools-benchmark.sh b/tests/performance/scenarios/tools-benchmark.sh new file mode 100755 index 000000000..349db853c --- /dev/null +++ b/tests/performance/scenarios/tools-benchmark.sh @@ -0,0 +1,134 @@ +#!/usr/bin/env bash +# ============================================================================== +# Tool Invocation Performance Benchmark +# Tests MCP tool invocation performance through the gateway +# ============================================================================== + +set -Eeuo pipefail + +# Colors for output +RED='\033[0;31m' +GREEN='\033[0;32m' +YELLOW='\033[1;33m' +BLUE='\033[0;34m' +NC='\033[0m' # No Color + +log() { + echo -e "${GREEN}[$(date +'%Y-%m-%d %H:%M:%S')]${NC} $*" +} + +info() { + echo -e "${BLUE}[INFO]${NC} $*" +} + +error() { + echo -e "${RED}[ERROR]${NC} $*" >&2 +} + +# Get script directory +SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" &>/dev/null && pwd)" +PROJECT_ROOT="$(cd "$SCRIPT_DIR/../../.." &>/dev/null && pwd)" + +# Configuration +GATEWAY_URL="${GATEWAY_URL:-http://localhost:4444}" +PROFILE="${PROFILE:-medium}" +RESULTS_DIR="$PROJECT_ROOT/tests/performance/results" +TIMESTAMP=$(date +%Y%m%d_%H%M%S) + +# Load profile +PROFILE_FILE="$PROJECT_ROOT/tests/performance/profiles/$PROFILE.env" +if [ ! -f "$PROFILE_FILE" ]; then + error "Profile $PROFILE not found at $PROFILE_FILE" + exit 1 +fi + +# shellcheck disable=SC1090 +source "$PROFILE_FILE" + +log "šŸ”§ Tool Invocation Performance Benchmark" +log "Profile: $PROFILE" +log "Requests: $REQUESTS" +log "Concurrency: $CONCURRENCY" +log "Gateway: $GATEWAY_URL" + +# Create results directory +mkdir -p "$RESULTS_DIR" + +# Load auth token if available +if [ -f "$PROJECT_ROOT/tests/performance/.auth_token" ]; then + # shellcheck disable=SC1091 + source "$PROJECT_ROOT/tests/performance/.auth_token" +fi + +AUTH_HEADER="" +if [ -n "${MCPGATEWAY_BEARER_TOKEN:-}" ]; then + AUTH_HEADER="Authorization: Bearer $MCPGATEWAY_BEARER_TOKEN" + info "Using authentication token" +fi + +# Check if hey is installed +if ! command -v hey &>/dev/null; then + error "hey is not installed. Install it with: brew install hey (macOS) or go install github.com/rakyll/hey@latest" + exit 1 +fi + +run_test() { + local test_name=$1 + local payload_file=$2 + local endpoint="${3:-$GATEWAY_URL/rpc}" + + log "Running test: $test_name" + + local output_file="$RESULTS_DIR/tools_${test_name}_${PROFILE}_${TIMESTAMP}.txt" + + local hey_cmd=( + hey + -n "$REQUESTS" + -c "$CONCURRENCY" + -m POST + -T "application/json" + -D "$payload_file" + -t "$TIMEOUT" + ) + + if [ -n "$AUTH_HEADER" ]; then + hey_cmd+=(-H "$AUTH_HEADER") + fi + + hey_cmd+=("$endpoint") + + info "Command: ${hey_cmd[*]}" + + # Run and save results + "${hey_cmd[@]}" 2>&1 | tee "$output_file" + + log "Results saved to: $output_file" + echo "" +} + +# Test 1: List tools (discovery) +log "════════════════════════════════════════════════════════" +log "Test 1: List Tools (Discovery)" +log "════════════════════════════════════════════════════════" +run_test "list_tools" \ + "$PROJECT_ROOT/tests/performance/payloads/tools/list_tools.json" \ + "$GATEWAY_URL/rpc" + +# Test 2: Get system time (simple tool invocation) +log "════════════════════════════════════════════════════════" +log "Test 2: Get System Time (Simple Tool Invocation)" +log "════════════════════════════════════════════════════════" +run_test "get_system_time" \ + "$PROJECT_ROOT/tests/performance/payloads/tools/get_system_time.json" \ + "$GATEWAY_URL/rpc" + +# Test 3: Convert time (complex tool invocation) +log "════════════════════════════════════════════════════════" +log "Test 3: Convert Time (Complex Tool Invocation)" +log "════════════════════════════════════════════════════════" +run_test "convert_time" \ + "$PROJECT_ROOT/tests/performance/payloads/tools/convert_time.json" \ + "$GATEWAY_URL/rpc" + +log "āœ… Tool benchmark completed successfully" +log "Results directory: $RESULTS_DIR" diff --git a/tests/performance/utils/baseline_manager.py b/tests/performance/utils/baseline_manager.py new file mode 100755 index 000000000..9b0fd5db9 --- /dev/null +++ b/tests/performance/utils/baseline_manager.py @@ -0,0 +1,323 @@ +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- +""" +Baseline Manager + +Utilities for saving and loading performance test baselines. +Converts test results to standardized JSON format for comparison. +""" + +import argparse +import json +import re +import sys +from pathlib import Path +from typing import Dict, Any, Optional +from datetime import datetime + + +class BaselineManager: + """Manage performance test baselines""" + + @staticmethod + def parse_hey_results(results_dir: Path) -> Dict[str, Dict]: + """ + Parse all hey output files in results directory + + Args: + results_dir: Directory containing hey output .txt files + + Returns: + Dictionary mapping test names to their metrics + """ + results = {} + + for txt_file in results_dir.glob('*.txt'): + # Skip non-hey output files + if 'system_metrics' in txt_file.name or 'docker_stats' in txt_file.name: + continue + if 'prometheus' in txt_file.name or 'logs' in txt_file.name: + continue + + # Extract test name from filename + # Format: {category}_{test_name}_{profile}_{timestamp}.txt + parts = txt_file.stem.split('_') + if len(parts) >= 2: + test_name = '_'.join(parts[:-2]) # Remove profile and timestamp + else: + test_name = txt_file.stem + + # Parse hey output + metrics = BaselineManager._parse_hey_output(txt_file) + if metrics: + results[test_name] = metrics + + return results + + @staticmethod + def _parse_hey_output(file_path: Path) -> Optional[Dict]: + """Parse hey output file to extract metrics""" + try: + with open(file_path) as f: + content = f.read() + + metrics = {} + + # Extract summary metrics + if match := re.search(r'Requests/sec:\s+([\d.]+)', content): + metrics['rps'] = float(match.group(1)) + + if match := re.search(r'Average:\s+([\d.]+)\s+secs', content): + metrics['avg'] = float(match.group(1)) * 1000 # Convert to ms + + if match := re.search(r'Slowest:\s+([\d.]+)\s+secs', content): + metrics['max'] = float(match.group(1)) * 1000 + + if match := re.search(r'Fastest:\s+([\d.]+)\s+secs', content): + metrics['min'] = float(match.group(1)) * 1000 + + # Extract percentiles + latency_section = re.search(r'Latency distribution:(.*?)(?=\n\n|\Z)', content, re.DOTALL) + if latency_section: + latency_text = latency_section.group(1) + + if match := re.search(r'10%\s+in\s+([\d.]+)\s+secs', latency_text): + metrics['p10'] = float(match.group(1)) * 1000 + + if match := re.search(r'25%\s+in\s+([\d.]+)\s+secs', latency_text): + metrics['p25'] = float(match.group(1)) * 1000 + + if match := re.search(r'50%\s+in\s+([\d.]+)\s+secs', latency_text): + metrics['p50'] = float(match.group(1)) * 1000 + + if match := re.search(r'75%\s+in\s+([\d.]+)\s+secs', latency_text): + metrics['p75'] = float(match.group(1)) * 1000 + + if match := re.search(r'90%\s+in\s+([\d.]+)\s+secs', latency_text): + metrics['p90'] = float(match.group(1)) * 1000 + + if match := re.search(r'95%\s+in\s+([\d.]+)\s+secs', latency_text): + metrics['p95'] = float(match.group(1)) * 1000 + + if match := re.search(r'99%\s+in\s+([\d.]+)\s+secs', latency_text): + metrics['p99'] = float(match.group(1)) * 1000 + + # Extract status codes + status_codes = {} + status_section = re.search(r'Status code distribution:(.*?)(?=\n\n|\Z)', content, re.DOTALL) + if status_section: + for line in status_section.group(1).strip().split('\n'): + if match := re.search(r'\[(\d+)\]\s+(\d+)\s+responses', line): + status_codes[int(match.group(1))] = int(match.group(2)) + + metrics['status_codes'] = status_codes + + # Calculate error rate + total_responses = sum(status_codes.values()) + error_responses = sum(count for code, count in status_codes.items() if code >= 400) + metrics['error_rate'] = (error_responses / total_responses * 100) if total_responses > 0 else 0 + metrics['total_requests'] = total_responses + + return metrics + + except Exception as e: + print(f"Warning: Failed to parse {file_path}: {e}", file=sys.stderr) + return None + + @staticmethod + def save_baseline( + results_dir: Path, + output_file: Path, + metadata: Optional[Dict] = None + ) -> Dict: + """ + Save test results as baseline + + Args: + results_dir: Directory containing test result files + output_file: Path to save baseline JSON + metadata: Optional metadata to include + + Returns: + Baseline data dictionary + """ + # Parse results + results = BaselineManager.parse_hey_results(results_dir) + + # Create baseline structure + baseline = { + 'version': '1.0', + 'created': datetime.now().isoformat(), + 'metadata': metadata or {}, + 'results': results, + 'summary': { + 'total_tests': len(results), + 'avg_rps': sum(r.get('rps', 0) for r in results.values()) / len(results) if results else 0, + 'avg_p95': sum(r.get('p95', 0) for r in results.values()) / len(results) if results else 0, + } + } + + # Save to file + output_file.parent.mkdir(parents=True, exist_ok=True) + with open(output_file, 'w') as f: + json.dump(baseline, f, indent=2) + + print(f"āœ… Baseline saved: {output_file}") + print(f" Tests: {baseline['summary']['total_tests']}") + print(f" Average RPS: {baseline['summary']['avg_rps']:.1f}") + print(f" Average p95: {baseline['summary']['avg_p95']:.1f}ms") + + return baseline + + @staticmethod + def load_baseline(file_path: Path) -> Dict: + """Load baseline from JSON file""" + with open(file_path) as f: + baseline = json.load(f) + + print(f"āœ… Loaded baseline: {file_path}") + print(f" Created: {baseline.get('created', 'Unknown')}") + print(f" Tests: {baseline.get('summary', {}).get('total_tests', 0)}") + + return baseline + + @staticmethod + def list_baselines(baselines_dir: Path): + """List all available baselines""" + print(f"\nAvailable baselines in {baselines_dir}:") + print("-" * 80) + + baselines = sorted(baselines_dir.glob('*.json')) + if not baselines: + print("No baselines found") + return + + for baseline_file in baselines: + try: + with open(baseline_file) as f: + data = json.load(f) + + created = data.get('created', 'Unknown') + metadata = data.get('metadata', {}) + profile = metadata.get('profile', 'Unknown') + tests = data.get('summary', {}).get('total_tests', 0) + + print(f"\n{baseline_file.name}") + print(f" Created: {created}") + print(f" Profile: {profile}") + print(f" Tests: {tests}") + + # Show configuration if available + config = metadata.get('config', {}) + if config: + print(f" Config:") + for key, value in config.items(): + print(f" {key}: {value}") + + except Exception as e: + print(f"\n{baseline_file.name} - Error: {e}") + + +def main(): + parser = argparse.ArgumentParser( + description='Manage performance test baselines' + ) + + subparsers = parser.add_subparsers(dest='command', help='Command to execute') + + # Save baseline + save_parser = subparsers.add_parser('save', help='Save results as baseline') + save_parser.add_argument( + 'results_dir', + type=Path, + help='Directory containing test results' + ) + save_parser.add_argument( + '--output', + type=Path, + required=True, + help='Output baseline file' + ) + save_parser.add_argument( + '--profile', + help='Test profile name' + ) + save_parser.add_argument( + '--server-profile', + help='Server profile name' + ) + save_parser.add_argument( + '--infrastructure', + help='Infrastructure profile name' + ) + save_parser.add_argument( + '--metadata', + type=json.loads, + help='Additional metadata as JSON string' + ) + + # Load baseline + load_parser = subparsers.add_parser('load', help='Load and display baseline') + load_parser.add_argument( + 'baseline_file', + type=Path, + help='Baseline JSON file' + ) + + # List baselines + list_parser = subparsers.add_parser('list', help='List available baselines') + list_parser.add_argument( + '--dir', + type=Path, + default=Path('baselines'), + help='Baselines directory' + ) + + args = parser.parse_args() + + try: + if args.command == 'save': + # Build metadata + metadata = args.metadata or {} + if args.profile: + metadata['profile'] = args.profile + if args.server_profile: + metadata['server_profile'] = args.server_profile + if args.infrastructure: + metadata['infrastructure'] = args.infrastructure + metadata['timestamp'] = datetime.now().isoformat() + + BaselineManager.save_baseline( + args.results_dir, + args.output, + metadata + ) + + elif args.command == 'load': + baseline = BaselineManager.load_baseline(args.baseline_file) + + # Print summary + print("\nResults:") + for test_name, metrics in baseline.get('results', {}).items(): + rps = metrics.get('rps', 0) + p95 = metrics.get('p95', 0) + print(f" {test_name:40} {rps:8.1f} rps {p95:6.1f}ms p95") + + elif args.command == 'list': + BaselineManager.list_baselines(args.dir) + + else: + parser.print_help() + return 1 + + return 0 + + except Exception as e: + print(f"āŒ Error: {e}", file=sys.stderr) + import traceback + traceback.print_exc() + return 1 + + +if __name__ == '__main__': + sys.exit(main()) diff --git a/tests/performance/utils/check-services.sh b/tests/performance/utils/check-services.sh new file mode 100755 index 000000000..340855fc6 --- /dev/null +++ b/tests/performance/utils/check-services.sh @@ -0,0 +1,67 @@ +#!/usr/bin/env bash +# ============================================================================== +# Service health checker for performance tests +# Verifies that gateway and fast-time-server are ready +# ============================================================================== + +set -Eeuo pipefail + +# Colors for output +RED='\033[0;31m' +GREEN='\033[0;32m' +YELLOW='\033[1;33m' +NC='\033[0m' # No Color + +log() { + echo -e "${GREEN}[$(date +'%Y-%m-%d %H:%M:%S')]${NC} $*" +} + +error() { + echo -e "${RED}[ERROR]${NC} $*" >&2 +} + +warn() { + echo -e "${YELLOW}[WARN]${NC} $*" +} + +# Configuration +GATEWAY_URL="${GATEWAY_URL:-http://localhost:4444}" +FAST_TIME_URL="${FAST_TIME_URL:-http://localhost:8888}" +MAX_RETRIES="${MAX_RETRIES:-30}" +RETRY_DELAY="${RETRY_DELAY:-2}" + +check_service() { + local name=$1 + local url=$2 + local max_retries=$3 + local retry_delay=$4 + + log "Checking $name at $url..." + + for i in $(seq 1 "$max_retries"); do + if curl -f -s -o /dev/null -w "%{http_code}" "$url/health" | grep -q "200"; then + log "āœ… $name is healthy" + return 0 + fi + + warn "Waiting for $name... ($i/$max_retries)" + sleep "$retry_delay" + done + + error "$name failed to become healthy after $max_retries attempts" + return 1 +} + +# Check gateway +if ! check_service "Gateway" "$GATEWAY_URL" "$MAX_RETRIES" "$RETRY_DELAY"; then + error "Gateway is not available. Please start it with: make compose-up" + exit 1 +fi + +# Check fast-time-server +if ! check_service "Fast Time Server" "$FAST_TIME_URL" "$MAX_RETRIES" "$RETRY_DELAY"; then + error "Fast Time Server is not available. Please start it with: make compose-up" + exit 1 +fi + +log "āœ… All services are healthy and ready for testing" diff --git a/tests/performance/utils/compare_results.py b/tests/performance/utils/compare_results.py new file mode 100755 index 000000000..c49daf97b --- /dev/null +++ b/tests/performance/utils/compare_results.py @@ -0,0 +1,376 @@ +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- +""" +Performance Results Comparison Utility + +Compares performance test results across different configurations. +Supports baseline comparison, regression detection, and cost-benefit analysis. +""" + +import argparse +import json +import sys +from pathlib import Path +from typing import Dict, List, Any, Optional +from datetime import datetime + + +class ResultsComparator: + """Compare performance test results""" + + def __init__(self, baseline_file: Path, current_file: Path): + self.baseline = self._load_results(baseline_file) + self.current = self._load_results(current_file) + + def _load_results(self, file_path: Path) -> Dict: + """Load results from JSON file""" + with open(file_path) as f: + return json.load(f) + + def compare(self) -> Dict[str, Any]: + """ + Compare current results against baseline + + Returns: + Dictionary containing comparison results + """ + comparison = { + 'baseline_info': self.baseline.get('metadata', {}), + 'current_info': self.current.get('metadata', {}), + 'test_comparisons': [], + 'summary': {}, + 'regressions': [], + 'improvements': [], + 'verdict': None + } + + # Compare each test + baseline_tests = self.baseline.get('results', {}) + current_tests = self.current.get('results', {}) + + for test_name in set(list(baseline_tests.keys()) + list(current_tests.keys())): + baseline_metrics = baseline_tests.get(test_name, {}) + current_metrics = current_tests.get(test_name, {}) + + if not baseline_metrics or not current_metrics: + continue + + test_comparison = self._compare_test( + test_name, baseline_metrics, current_metrics + ) + comparison['test_comparisons'].append(test_comparison) + + # Track regressions and improvements + if test_comparison['has_regression']: + comparison['regressions'].append({ + 'test': test_name, + 'metrics': test_comparison['regressed_metrics'] + }) + + if test_comparison['has_improvement']: + comparison['improvements'].append({ + 'test': test_name, + 'metrics': test_comparison['improved_metrics'] + }) + + # Calculate summary statistics + comparison['summary'] = self._calculate_summary(comparison['test_comparisons']) + + # Determine overall verdict + comparison['verdict'] = self._determine_verdict(comparison) + + return comparison + + def _compare_test( + self, + test_name: str, + baseline: Dict, + current: Dict + ) -> Dict[str, Any]: + """Compare metrics for a single test""" + comparison = { + 'test_name': test_name, + 'metrics': {}, + 'has_regression': False, + 'has_improvement': False, + 'regressed_metrics': [], + 'improved_metrics': [] + } + + # Metrics to compare + metric_comparisons = { + 'rps': {'higher_is_better': True, 'threshold_pct': 10}, + 'p50': {'higher_is_better': False, 'threshold_pct': 15}, + 'p95': {'higher_is_better': False, 'threshold_pct': 15}, + 'p99': {'higher_is_better': False, 'threshold_pct': 15}, + 'error_rate': {'higher_is_better': False, 'threshold_pct': 5}, + } + + for metric, config in metric_comparisons.items(): + if metric not in baseline or metric not in current: + continue + + baseline_val = baseline[metric] + current_val = current[metric] + + if baseline_val == 0: + continue + + change_pct = ((current_val - baseline_val) / baseline_val) * 100 + + metric_info = { + 'baseline': baseline_val, + 'current': current_val, + 'change': current_val - baseline_val, + 'change_pct': change_pct, + 'threshold_pct': config['threshold_pct'], + 'status': 'unchanged' + } + + # Determine if regression or improvement + if config['higher_is_better']: + if change_pct < -config['threshold_pct']: + metric_info['status'] = 'regression' + comparison['has_regression'] = True + comparison['regressed_metrics'].append(metric) + elif change_pct > config['threshold_pct']: + metric_info['status'] = 'improvement' + comparison['has_improvement'] = True + comparison['improved_metrics'].append(metric) + else: + if change_pct > config['threshold_pct']: + metric_info['status'] = 'regression' + comparison['has_regression'] = True + comparison['regressed_metrics'].append(metric) + elif change_pct < -config['threshold_pct']: + metric_info['status'] = 'improvement' + comparison['has_improvement'] = True + comparison['improved_metrics'].append(metric) + + comparison['metrics'][metric] = metric_info + + return comparison + + def _calculate_summary(self, test_comparisons: List[Dict]) -> Dict: + """Calculate summary statistics across all tests""" + summary = { + 'total_tests': len(test_comparisons), + 'tests_with_regressions': 0, + 'tests_with_improvements': 0, + 'avg_throughput_change_pct': 0, + 'avg_latency_change_pct': 0, + 'total_regressions': 0, + 'total_improvements': 0 + } + + throughput_changes = [] + latency_changes = [] + + for test in test_comparisons: + if test['has_regression']: + summary['tests_with_regressions'] += 1 + summary['total_regressions'] += len(test['regressed_metrics']) + + if test['has_improvement']: + summary['tests_with_improvements'] += 1 + summary['total_improvements'] += len(test['improved_metrics']) + + # Collect throughput changes + if 'rps' in test['metrics']: + throughput_changes.append(test['metrics']['rps']['change_pct']) + + # Collect latency changes (average of p50, p95, p99) + latency_metrics = ['p50', 'p95', 'p99'] + test_latency_changes = [ + test['metrics'][m]['change_pct'] + for m in latency_metrics + if m in test['metrics'] + ] + if test_latency_changes: + latency_changes.append(sum(test_latency_changes) / len(test_latency_changes)) + + # Calculate averages + if throughput_changes: + summary['avg_throughput_change_pct'] = sum(throughput_changes) / len(throughput_changes) + + if latency_changes: + summary['avg_latency_change_pct'] = sum(latency_changes) / len(latency_changes) + + return summary + + def _determine_verdict(self, comparison: Dict) -> str: + """Determine overall verdict (recommended, caution, not_recommended)""" + summary = comparison['summary'] + regressions = len(comparison['regressions']) + + # Critical regressions + if regressions > 0: + if summary['avg_throughput_change_pct'] < -20: + return 'not_recommended' + if summary['avg_latency_change_pct'] > 25: + return 'not_recommended' + if regressions >= 3: + return 'caution' + + # Significant improvements + if summary['avg_throughput_change_pct'] > 15 and summary['avg_latency_change_pct'] < -10: + return 'recommended' + + # Mixed results + if regressions > 0: + return 'caution' + + return 'acceptable' + + def print_comparison(self, comparison: Dict, detailed: bool = True): + """Print comparison results to console""" + print("\n" + "=" * 80) + print("PERFORMANCE COMPARISON REPORT") + print("=" * 80) + + # Header + print(f"\nBaseline: {comparison['baseline_info'].get('timestamp', 'Unknown')}") + print(f" Profile: {comparison['baseline_info'].get('profile', 'Unknown')}") + print(f" Config: {comparison['baseline_info'].get('config', {})}") + + print(f"\nCurrent: {comparison['current_info'].get('timestamp', 'Unknown')}") + print(f" Profile: {comparison['current_info'].get('profile', 'Unknown')}") + print(f" Config: {comparison['current_info'].get('config', {})}") + + # Summary + print("\n" + "-" * 80) + print("SUMMARY") + print("-" * 80) + summary = comparison['summary'] + print(f"Total Tests: {summary['total_tests']}") + print(f"Tests with Regressions: {summary['tests_with_regressions']}") + print(f"Tests with Improvements: {summary['tests_with_improvements']}") + print(f"\nAverage Throughput Change: {summary['avg_throughput_change_pct']:+.1f}%") + print(f"Average Latency Change: {summary['avg_latency_change_pct']:+.1f}%") + + # Regressions + if comparison['regressions']: + print("\n" + "-" * 80) + print("āš ļø REGRESSIONS DETECTED") + print("-" * 80) + for regression in comparison['regressions']: + print(f"\n{regression['test']}:") + for metric in regression['metrics']: + print(f" - {metric}") + + # Improvements + if comparison['improvements']: + print("\n" + "-" * 80) + print("āœ… IMPROVEMENTS") + print("-" * 80) + for improvement in comparison['improvements']: + print(f"\n{improvement['test']}:") + for metric in improvement['metrics']: + print(f" - {metric}") + + # Detailed comparison + if detailed: + print("\n" + "-" * 80) + print("DETAILED METRICS") + print("-" * 80) + + for test in comparison['test_comparisons']: + print(f"\n{test['test_name']}:") + print(f" {'Metric':<15} {'Baseline':>12} {'Current':>12} {'Change':>12} {'Status':<15}") + print(f" {'-'*15} {'-'*12} {'-'*12} {'-'*12} {'-'*15}") + + for metric_name, metric_data in test['metrics'].items(): + baseline_str = f"{metric_data['baseline']:.1f}" + current_str = f"{metric_data['current']:.1f}" + change_str = f"{metric_data['change_pct']:+.1f}%" + + status_symbol = { + 'regression': 'āŒ', + 'improvement': 'āœ…', + 'unchanged': 'āž–' + }.get(metric_data['status'], '?') + + status_str = f"{status_symbol} {metric_data['status']}" + + print(f" {metric_name:<15} {baseline_str:>12} {current_str:>12} {change_str:>12} {status_str:<15}") + + # Verdict + print("\n" + "=" * 80) + print("VERDICT") + print("=" * 80) + + verdict_messages = { + 'recommended': 'āœ… RECOMMENDED - Significant performance improvements detected', + 'acceptable': 'āœ“ ACCEPTABLE - No major regressions, acceptable performance', + 'caution': 'āš ļø CAUTION - Some regressions detected, review carefully', + 'not_recommended': 'āŒ NOT RECOMMENDED - Critical regressions detected' + } + + print(f"\n{verdict_messages.get(comparison['verdict'], 'UNKNOWN')}\n") + + def save_comparison(self, comparison: Dict, output_file: Path): + """Save comparison results to JSON file""" + with open(output_file, 'w') as f: + json.dump(comparison, f, indent=2) + print(f"āœ… Comparison saved to: {output_file}") + + +def main(): + parser = argparse.ArgumentParser( + description='Compare performance test results' + ) + parser.add_argument( + 'baseline', + type=Path, + help='Baseline results JSON file' + ) + parser.add_argument( + 'current', + type=Path, + help='Current results JSON file' + ) + parser.add_argument( + '--output', + type=Path, + help='Output file for comparison results (JSON)' + ) + parser.add_argument( + '--brief', + action='store_true', + help='Show brief summary only' + ) + parser.add_argument( + '--fail-on-regression', + action='store_true', + help='Exit with error code if regressions detected' + ) + + args = parser.parse_args() + + try: + comparator = ResultsComparator(args.baseline, args.current) + comparison = comparator.compare() + + # Print comparison + comparator.print_comparison(comparison, detailed=not args.brief) + + # Save if requested + if args.output: + comparator.save_comparison(comparison, args.output) + + # Check for regressions + if args.fail_on_regression and comparison['regressions']: + print("\nāŒ Exiting with error due to detected regressions") + return 1 + + return 0 + + except Exception as e: + print(f"āŒ Error: {e}", file=sys.stderr) + import traceback + traceback.print_exc() + return 1 + + +if __name__ == '__main__': + sys.exit(main()) diff --git a/tests/performance/utils/generate_docker_compose.py b/tests/performance/utils/generate_docker_compose.py new file mode 100755 index 000000000..0d0454c89 --- /dev/null +++ b/tests/performance/utils/generate_docker_compose.py @@ -0,0 +1,423 @@ +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- +""" +Docker Compose Generator for Infrastructure Profiles + +Generates docker-compose.yml files from infrastructure profile configurations. +Supports PostgreSQL version switching, instance scaling, and resource tuning. +""" + +import argparse +import sys +from pathlib import Path +from typing import Dict, Any +import yaml + + +DOCKER_COMPOSE_TEMPLATE = """version: '3.8' + +services: + postgres: + image: postgres:{postgres_version} + container_name: postgres_perf + environment: + POSTGRES_DB: mcpgateway + POSTGRES_USER: postgres + POSTGRES_PASSWORD: postgres + ports: + - "5432:5432" + volumes: + - postgres_data:/var/lib/postgresql/data + command: + - "postgres" +{postgres_config_commands} + healthcheck: + test: ["CMD-SHELL", "pg_isready -U postgres"] + interval: 10s + timeout: 5s + retries: 5 + +{redis_service} + +{gateway_services} + +{load_balancer} + +volumes: + postgres_data: +{redis_volume} +""" + +GATEWAY_SERVICE_TEMPLATE = """ gateway{instance_suffix}: + build: + context: ../.. + dockerfile: Dockerfile + container_name: gateway{instance_suffix} + environment: + - DATABASE_URL=postgresql://postgres:postgres@postgres:5432/mcpgateway +{redis_url} + - HOST=0.0.0.0 + - PORT=4444 + - LOG_LEVEL=INFO + - GUNICORN_WORKERS={gunicorn_workers} + - GUNICORN_THREADS={gunicorn_threads} + - GUNICORN_TIMEOUT={gunicorn_timeout} + - DB_POOL_SIZE={db_pool_size} + - DB_POOL_MAX_OVERFLOW={db_pool_max_overflow} + - DB_POOL_TIMEOUT={db_pool_timeout} +{redis_pool} + - JWT_SECRET_KEY=my-test-key + - MCPGATEWAY_ADMIN_API_ENABLED=true + - MCPGATEWAY_UI_ENABLED=true + ports: + - "{port_mapping}:4444" + depends_on: + postgres: + condition: service_healthy +{redis_depends} + healthcheck: + test: ["CMD-SHELL", "curl -f http://localhost:4444/health || exit 1"] + interval: 10s + timeout: 5s + retries: 5 +""" + +REDIS_SERVICE = """ redis: + image: redis:7-alpine + container_name: redis_perf + ports: + - "6379:6379" + command: redis-server{redis_config} + healthcheck: + test: ["CMD", "redis-cli", "ping"] + interval: 10s + timeout: 5s + retries: 5 +""" + +NGINX_LOAD_BALANCER = """ nginx: + image: nginx:alpine + container_name: nginx_lb + ports: + - "4444:80" + volumes: + - ./nginx.conf:/etc/nginx/nginx.conf:ro + depends_on: +{nginx_depends} +""" + + +class DockerComposeGenerator: + """Generate docker-compose.yml from infrastructure and server profiles""" + + def __init__(self, config_file: Path): + self.config_file = config_file + self.config = self._load_config() + + def _load_config(self) -> Dict[str, Any]: + """Load configuration from YAML file""" + with open(self.config_file) as f: + return yaml.safe_load(f) + + def generate( + self, + infrastructure_profile: str, + server_profile: str = "standard", + postgres_version: str = None, + instances: int = None, + output_file: Path = None + ) -> str: + """ + Generate docker-compose.yml content + + Args: + infrastructure_profile: Infrastructure profile name + server_profile: Server profile name + postgres_version: Override PostgreSQL version + instances: Override number of gateway instances + output_file: Path to write output (if None, returns string) + + Returns: + Generated docker-compose.yml content + """ + # Get profiles + infra = self.config.get('infrastructure_profiles', {}).get(infrastructure_profile) + if not infra: + raise ValueError(f"Infrastructure profile '{infrastructure_profile}' not found") + + server = self.config.get('server_profiles', {}).get(server_profile) + if not server: + raise ValueError(f"Server profile '{server_profile}' not found") + + # Override values if provided + pg_version = postgres_version or infra.get('postgres_version', '17-alpine') + num_instances = instances or infra.get('gateway_instances', 1) + redis_enabled = infra.get('redis_enabled', False) + + # Generate PostgreSQL configuration commands + postgres_commands = self._generate_postgres_config(infra) + + # Generate Redis service + redis_service = "" + redis_volume = "" + if redis_enabled: + redis_config = self._generate_redis_config(infra) + redis_service = REDIS_SERVICE.format(redis_config=redis_config) + redis_volume = " redis_data:" + + # Generate gateway services + gateway_services = self._generate_gateway_services( + num_instances, server, redis_enabled + ) + + # Generate load balancer if multiple instances + load_balancer = "" + if num_instances > 1: + load_balancer = self._generate_load_balancer(num_instances) + # Also generate nginx.conf + self._generate_nginx_config(num_instances, output_file) + + # Assemble final docker-compose + compose_content = DOCKER_COMPOSE_TEMPLATE.format( + postgres_version=pg_version, + postgres_config_commands=postgres_commands, + redis_service=redis_service, + gateway_services=gateway_services, + load_balancer=load_balancer, + redis_volume=redis_volume + ) + + # Write to file if specified + if output_file: + output_file.parent.mkdir(parents=True, exist_ok=True) + with open(output_file, 'w') as f: + f.write(compose_content) + print(f"āœ… Generated: {output_file}") + + return compose_content + + def _generate_postgres_config(self, infra: Dict) -> str: + """Generate PostgreSQL configuration command arguments""" + commands = [] + + pg_configs = { + 'shared_buffers': 'postgres_shared_buffers', + 'effective_cache_size': 'postgres_effective_cache_size', + 'max_connections': 'postgres_max_connections', + 'work_mem': 'postgres_work_mem', + 'maintenance_work_mem': 'postgres_maintenance_work_mem', + 'random_page_cost': 'postgres_random_page_cost', + 'effective_io_concurrency': 'postgres_effective_io_concurrency', + } + + for pg_param, config_key in pg_configs.items(): + if config_key in infra: + value = infra[config_key] + commands.append(f' - "-c"\n - "{pg_param}={value}"') + + return '\n'.join(commands) if commands else '' + + def _generate_redis_config(self, infra: Dict) -> str: + """Generate Redis configuration arguments""" + config_parts = [] + + if 'redis_maxmemory' in infra: + config_parts.append(f" --maxmemory {infra['redis_maxmemory']}") + + if 'redis_maxmemory_policy' in infra: + config_parts.append(f" --maxmemory-policy {infra['redis_maxmemory_policy']}") + + return ''.join(config_parts) + + def _generate_gateway_services( + self, + num_instances: int, + server_profile: Dict, + redis_enabled: bool + ) -> str: + """Generate gateway service definitions""" + services = [] + + for i in range(num_instances): + instance_suffix = f"_{i+1}" if num_instances > 1 else "" + port_mapping = "4444" if num_instances == 1 else f"{4444 + i}" + + redis_url = "" + redis_pool = "" + redis_depends = "" + + if redis_enabled: + redis_url = " - REDIS_URL=redis://redis:6379" + redis_pool = f" - REDIS_POOL_SIZE={server_profile.get('redis_pool_size', 10)}" + redis_depends = """ redis: + condition: service_healthy""" + + service = GATEWAY_SERVICE_TEMPLATE.format( + instance_suffix=instance_suffix, + redis_url=redis_url, + gunicorn_workers=server_profile.get('gunicorn_workers', 4), + gunicorn_threads=server_profile.get('gunicorn_threads', 4), + gunicorn_timeout=server_profile.get('gunicorn_timeout', 120), + db_pool_size=server_profile.get('db_pool_size', 20), + db_pool_max_overflow=server_profile.get('db_pool_max_overflow', 40), + db_pool_timeout=server_profile.get('db_pool_timeout', 30), + redis_pool=redis_pool, + port_mapping=port_mapping, + redis_depends=redis_depends + ) + + services.append(service) + + return '\n'.join(services) + + def _generate_load_balancer(self, num_instances: int) -> str: + """Generate nginx load balancer service""" + depends = [] + for i in range(num_instances): + suffix = f"_{i+1}" + depends.append(f' - gateway{suffix}') + + return NGINX_LOAD_BALANCER.format( + nginx_depends='\n'.join(depends) + ) + + def _generate_nginx_config(self, num_instances: int, output_file: Path): + """Generate nginx.conf for load balancing""" + if not output_file: + return + + upstreams = [] + for i in range(num_instances): + suffix = f"_{i+1}" + upstreams.append(f' server gateway{suffix}:4444;') + + nginx_conf = f"""events {{ + worker_connections 1024; +}} + +http {{ + upstream gateway_backend {{ +{chr(10).join(upstreams)} + }} + + server {{ + listen 80; + + location / {{ + proxy_pass http://gateway_backend; + proxy_set_header Host $host; + proxy_set_header X-Real-IP $remote_addr; + proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for; + proxy_set_header X-Forwarded-Proto $scheme; + + # Timeouts + proxy_connect_timeout 60s; + proxy_send_timeout 60s; + proxy_read_timeout 60s; + + # Health checks + proxy_next_upstream error timeout invalid_header http_500 http_502 http_503; + }} + + location /health {{ + access_log off; + proxy_pass http://gateway_backend/health; + }} + }} +}} +""" + + nginx_file = output_file.parent / 'nginx.conf' + with open(nginx_file, 'w') as f: + f.write(nginx_conf) + print(f"āœ… Generated: {nginx_file}") + + +def main(): + parser = argparse.ArgumentParser( + description='Generate docker-compose.yml from infrastructure profiles' + ) + parser.add_argument( + '--config', + type=Path, + default=Path('config.yaml'), + help='Configuration file path' + ) + parser.add_argument( + '--infrastructure', + required=True, + help='Infrastructure profile name' + ) + parser.add_argument( + '--server-profile', + default='standard', + help='Server profile name' + ) + parser.add_argument( + '--postgres-version', + help='PostgreSQL version (e.g., 17-alpine)' + ) + parser.add_argument( + '--instances', + type=int, + help='Number of gateway instances' + ) + parser.add_argument( + '--output', + type=Path, + default=Path('docker-compose.perf.yml'), + help='Output file path' + ) + parser.add_argument( + '--list-profiles', + action='store_true', + help='List available profiles and exit' + ) + + args = parser.parse_args() + + try: + generator = DockerComposeGenerator(args.config) + + if args.list_profiles: + print("\n=== Infrastructure Profiles ===") + for name, profile in generator.config.get('infrastructure_profiles', {}).items(): + desc = profile.get('description', 'No description') + instances = profile.get('gateway_instances', 1) + pg_version = profile.get('postgres_version', 'N/A') + print(f" {name:20} - {desc}") + print(f" {'':20} Instances: {instances}, PostgreSQL: {pg_version}") + + print("\n=== Server Profiles ===") + for name, profile in generator.config.get('server_profiles', {}).items(): + desc = profile.get('description', 'No description') + workers = profile.get('gunicorn_workers', 'N/A') + threads = profile.get('gunicorn_threads', 'N/A') + print(f" {name:20} - {desc}") + print(f" {'':20} Workers: {workers}, Threads: {threads}") + + return 0 + + # Generate docker-compose + generator.generate( + infrastructure_profile=args.infrastructure, + server_profile=args.server_profile, + postgres_version=args.postgres_version, + instances=args.instances, + output_file=args.output + ) + + print(f"\nāœ… Successfully generated docker-compose configuration") + print(f" Infrastructure: {args.infrastructure}") + print(f" Server Profile: {args.server_profile}") + print(f" Output: {args.output}") + + return 0 + + except Exception as e: + print(f"āŒ Error: {e}", file=sys.stderr) + return 1 + + +if __name__ == '__main__': + sys.exit(main()) diff --git a/tests/performance/utils/report_generator.py b/tests/performance/utils/report_generator.py new file mode 100755 index 000000000..e32430bc7 --- /dev/null +++ b/tests/performance/utils/report_generator.py @@ -0,0 +1,1195 @@ +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- +""" +HTML Performance Test Report Generator + +Generates comprehensive HTML reports from performance test results including: +- Summary statistics +- SLO compliance +- Charts and visualizations +- System metrics +- Baseline comparisons +- Recommendations +""" + +import argparse +import json +import re +import sys +from datetime import datetime +from pathlib import Path +from typing import Dict, List, Any, Optional +import yaml + + +# HTML Template with embedded CSS and Chart.js +HTML_TEMPLATE = """ + + + + + + Performance Test Report - {{ timestamp }} + + + + +
+
+

šŸš€ Performance Test Report

+
+
Generated: {{ timestamp }}
+
Profile: {{ profile }}
+
Gateway: {{ gateway_url }}
+ {% if git_commit %} +
Git Commit: {{ git_commit }}
+ {% endif %} +
+
+ + +
+

šŸ“Š Executive Summary

+ +
+
+
Overall Status
+
{{ summary.overall_status_text }}
+
{{ summary.tests_passed }}/{{ summary.total_tests }} tests passed
+
+ +
+
SLO Compliance
+
{{ summary.slo_compliance_percent }}%
+
{{ summary.slos_met }}/{{ summary.total_slos }} SLOs met
+
+ +
+
Average Throughput
+
{{ summary.avg_rps }}
+
requests/second
+
+ +
+
Average p95 Latency
+
{{ summary.avg_p95 }}ms
+
{{ summary.avg_p99 }}ms p99
+
+
+ + {% if summary.has_regressions %} +
+ āš ļø Performance Regressions Detected!
+ {{ summary.regression_count }} test(s) show performance degradation compared to baseline. +
+ {% endif %} +
+ + +
+

šŸŽÆ SLO Compliance

+ + + + + + + + + + + + + + {% for slo in slo_results %} + + + + + + + + + {% endfor %} + +
TestMetricTargetActualStatusMargin
{{ slo.test_name }}{{ slo.metric }}{{ slo.target }}{{ slo.actual }}{{ slo.status_text }}{{ slo.margin }}
+
+ + + {% for category, tests in test_results.items() %} +
+

{{ category.title() }} Performance

+ + + + + + + + + + + + + + + {% for test in tests %} + + + + + + + + + + {% endfor %} + +
TestRequests/secp50p95p99Error RateStatus
+ {{ test.name }} + {% if test.has_baseline %} + + {{ test.comparison_text }} + + {% endif %} + {{ test.rps }}{{ test.p50 }}ms{{ test.p95 }}ms{{ test.p99 }}ms{{ test.error_rate }}%{{ test.status_text }}
+ + +
+ +
+
+ {% endfor %} + + + {% if system_metrics %} +
+

šŸ’» System Metrics

+ +
+
+

CPU Usage

+
+ +
+
+ +
+

Memory Usage

+
+ +
+
+
+ +

Resource Utilization Summary

+ + + + + + + + + + + {% for metric in system_metrics %} + + + + + + + {% endfor %} + +
ResourceAveragePeakStatus
{{ metric.name }}{{ metric.average }}{{ metric.peak }}{{ metric.status_text }}
+
+ {% endif %} + + + {% if db_metrics %} +
+

šŸ—„ļø Database Performance

+ +
+
+
Connection Pool
+
{{ db_metrics.avg_connections }}
+
Peak: {{ db_metrics.peak_connections }}
+
+ +
+
Average Query Time
+
{{ db_metrics.avg_query_time }}ms
+
{{ db_metrics.total_queries }} total queries
+
+
+ + {% if db_metrics.slow_queries %} +

Slow Queries

+ + + + + + + + + + + {% for query in db_metrics.slow_queries %} + + + + + + + {% endfor %} + +
QueryAvg TimeMax TimeCalls
{{ query.query_text }}{{ query.avg_time }}ms{{ query.max_time }}ms{{ query.calls }}
+ {% endif %} +
+ {% endif %} + + +
+

šŸ’” Recommendations

+ + {% if recommendations %} + {% for rec in recommendations %} +
+ {{ rec.priority.upper() }} + {{ rec.title }} +

{{ rec.description }}

+ {% if rec.action %} +
{{ rec.action }}
+ {% endif %} +
+ {% endfor %} + {% else %} +
+ āœ… No immediate performance issues detected. All metrics are within acceptable ranges. +
+ {% endif %} +
+ + +
+

šŸ“ Additional Information

+ +

Test Configuration

+ + + + + + + + + + + + + + + + + + + + + + + + + +
ParameterValue
Profile{{ profile }}
Requests per test{{ config.requests }}
Concurrency{{ config.concurrency }}
Timeout{{ config.timeout }}s
Total test duration{{ duration }}
+ +

Files

+ +
+ +
+

Generated by MCP Gateway Performance Testing Suite

+

{{ timestamp }}

+
+
+ + + + + +""" + + +class SimpleTemplate: + """Simple template engine for rendering HTML reports""" + + def __init__(self, template: str): + self.template = template + + def render(self, context: Dict[str, Any]) -> str: + """Render template with context""" + result = self.template + + # Handle simple variable substitution {{ var }} + for key, value in context.items(): + pattern = r'\{\{\s*' + re.escape(key) + r'\s*\}\}' + result = re.sub(pattern, str(value), result) + + # Handle safe JSON {{ var | safe }} + for key, value in context.items(): + pattern = r'\{\{\s*' + re.escape(key) + r'\s*\|\s*safe\s*\}\}' + if isinstance(value, (dict, list)): + # Use lambda to avoid regex backslash interpretation issues with JSON + result = re.sub(pattern, lambda m: json.dumps(value), result) + + # Handle conditionals {% if var %} + result = self._render_conditionals(result, context) + + # Handle loops {% for item in items %} + result = self._render_loops(result, context) + + return result + + def _render_conditionals(self, template: str, context: Dict) -> str: + """Render if/else blocks""" + # Simple implementation - handle {% if var %} ... {% endif %} + pattern = r'\{%\s*if\s+(\w+)\s*%\}(.*?)\{%\s*endif\s*%\}' + + def replace_conditional(match): + var_name = match.group(1) + content = match.group(2) + return content if context.get(var_name) else '' + + return re.sub(pattern, replace_conditional, template, flags=re.DOTALL) + + def _render_loops(self, template: str, context: Dict) -> str: + """Render for loops""" + # Simple implementation - handle {% for item in items %} ... {% endfor %} + pattern = r'\{%\s*for\s+(\w+)\s+in\s+(\w+)\s*%\}(.*?)\{%\s*endfor\s*%\}' + + def replace_loop(match): + item_name = match.group(1) + list_name = match.group(2) + content = match.group(3) + items = context.get(list_name, []) + + result = [] + for item in items: + item_context = context.copy() + item_context[item_name] = item + + # Simple variable substitution within loop + item_result = content + if isinstance(item, dict): + for key, value in item.items(): + var_pattern = r'\{\{\s*' + re.escape(item_name) + r'\.' + re.escape(key) + r'\s*\}\}' + item_result = re.sub(var_pattern, str(value), item_result) + + result.append(item_result) + + return ''.join(result) + + return re.sub(pattern, replace_loop, template, flags=re.DOTALL) + + +class PerformanceReportGenerator: + """Generate HTML reports from performance test results""" + + def __init__(self, results_dir: Path, config_file: Optional[Path] = None): + self.results_dir = Path(results_dir) + self.config = self._load_config(config_file) + self.slos = self.config.get('slos', {}) + + def _load_config(self, config_file: Optional[Path]) -> Dict: + """Load configuration from YAML file""" + if config_file and config_file.exists(): + with open(config_file) as f: + return yaml.safe_load(f) + return {} + + def parse_hey_output(self, file_path: Path) -> Optional[Dict[str, Any]]: + """Parse hey output file to extract metrics""" + try: + with open(file_path) as f: + content = f.read() + + metrics = {} + + # Extract summary metrics + if match := re.search(r'Requests/sec:\s+([\d.]+)', content): + metrics['rps'] = float(match.group(1)) + + if match := re.search(r'Average:\s+([\d.]+)\s+secs', content): + metrics['avg'] = float(match.group(1)) * 1000 # Convert to ms + + if match := re.search(r'Slowest:\s+([\d.]+)\s+secs', content): + metrics['max'] = float(match.group(1)) * 1000 + + if match := re.search(r'Fastest:\s+([\d.]+)\s+secs', content): + metrics['min'] = float(match.group(1)) * 1000 + + # Extract percentiles from latency distribution + # Look for patterns like "0.050 [9500]" which indicates 95th percentile + latency_section = re.search(r'Latency distribution:(.*?)(?=\n\n|\Z)', content, re.DOTALL) + if latency_section: + latency_text = latency_section.group(1) + + if match := re.search(r'50%\s+in\s+([\d.]+)\s+secs', latency_text): + metrics['p50'] = float(match.group(1)) * 1000 + + if match := re.search(r'95%\s+in\s+([\d.]+)\s+secs', latency_text): + metrics['p95'] = float(match.group(1)) * 1000 + + if match := re.search(r'99%\s+in\s+([\d.]+)\s+secs', latency_text): + metrics['p99'] = float(match.group(1)) * 1000 + + # Extract status code distribution + status_codes = {} + status_section = re.search(r'Status code distribution:(.*?)(?=\n\n|\Z)', content, re.DOTALL) + if status_section: + for line in status_section.group(1).strip().split('\n'): + if match := re.search(r'\[(\d+)\]\s+(\d+)\s+responses', line): + status_codes[int(match.group(1))] = int(match.group(2)) + + metrics['status_codes'] = status_codes + + # Calculate error rate + total_responses = sum(status_codes.values()) + error_responses = sum(count for code, count in status_codes.items() if code >= 400) + metrics['error_rate'] = (error_responses / total_responses * 100) if total_responses > 0 else 0 + metrics['total_requests'] = total_responses + + return metrics + + except Exception as e: + print(f"Error parsing {file_path}: {e}", file=sys.stderr) + return None + + def collect_test_results(self) -> Dict[str, List[Dict]]: + """Collect all test results from the results directory""" + results = {} + + # Group results by category (tools, resources, prompts, etc.) + for result_file in self.results_dir.glob('*.txt'): + # Parse filename: {category}_{test_name}_{profile}_{timestamp}.txt + parts = result_file.stem.split('_') + if len(parts) < 2: + continue + + category = parts[0] + test_name = '_'.join(parts[1:-2]) if len(parts) > 3 else parts[1] + + metrics = self.parse_hey_output(result_file) + if not metrics: + continue + + if category not in results: + results[category] = [] + + results[category].append({ + 'name': test_name, + 'file': result_file.name, + **metrics + }) + + return results + + def evaluate_slo(self, test_name: str, metrics: Dict[str, float]) -> List[Dict]: + """Evaluate metrics against SLO thresholds""" + # Map test names to SLO keys + slo_key_map = { + 'list_tools': 'tools_list', + 'get_system_time': 'tools_invoke_simple', + 'convert_time': 'tools_invoke_complex', + 'list_resources': 'resources_list', + 'read_timezone_info': 'resources_read', + 'read_world_times': 'resources_read', + 'list_prompts': 'prompts_list', + 'get_compare_timezones': 'prompts_get', + 'health_check': 'health_check', + } + + slo_key = slo_key_map.get(test_name) + if not slo_key or slo_key not in self.slos: + return [] + + slo = self.slos[slo_key] + results = [] + + # Check p50 + if 'p50_ms' in slo and 'p50' in metrics: + results.append({ + 'test_name': test_name, + 'metric': 'p50', + 'target': f"{slo['p50_ms']}ms", + 'actual': f"{metrics['p50']:.1f}ms", + 'status': 'pass' if metrics['p50'] <= slo['p50_ms'] else 'fail', + 'status_text': 'āœ… Pass' if metrics['p50'] <= slo['p50_ms'] else 'āŒ Fail', + 'margin': f"{((metrics['p50'] - slo['p50_ms']) / slo['p50_ms'] * 100):+.1f}%" + }) + + # Check p95 + if 'p95_ms' in slo and 'p95' in metrics: + results.append({ + 'test_name': test_name, + 'metric': 'p95', + 'target': f"{slo['p95_ms']}ms", + 'actual': f"{metrics['p95']:.1f}ms", + 'status': 'pass' if metrics['p95'] <= slo['p95_ms'] else 'fail', + 'status_text': 'āœ… Pass' if metrics['p95'] <= slo['p95_ms'] else 'āŒ Fail', + 'margin': f"{((metrics['p95'] - slo['p95_ms']) / slo['p95_ms'] * 100):+.1f}%" + }) + + # Check p99 + if 'p99_ms' in slo and 'p99' in metrics: + results.append({ + 'test_name': test_name, + 'metric': 'p99', + 'target': f"{slo['p99_ms']}ms", + 'actual': f"{metrics['p99']:.1f}ms", + 'status': 'pass' if metrics['p99'] <= slo['p99_ms'] else 'fail', + 'status_text': 'āœ… Pass' if metrics['p99'] <= slo['p99_ms'] else 'āŒ Fail', + 'margin': f"{((metrics['p99'] - slo['p99_ms']) / slo['p99_ms'] * 100):+.1f}%" + }) + + # Check throughput + if 'min_rps' in slo and 'rps' in metrics: + results.append({ + 'test_name': test_name, + 'metric': 'throughput', + 'target': f"{slo['min_rps']} req/s", + 'actual': f"{metrics['rps']:.1f} req/s", + 'status': 'pass' if metrics['rps'] >= slo['min_rps'] else 'fail', + 'status_text': 'āœ… Pass' if metrics['rps'] >= slo['min_rps'] else 'āŒ Fail', + 'margin': f"{((metrics['rps'] - slo['min_rps']) / slo['min_rps'] * 100):+.1f}%" + }) + + # Check error rate + if 'max_error_rate' in slo and 'error_rate' in metrics: + max_error_pct = slo['max_error_rate'] * 100 + results.append({ + 'test_name': test_name, + 'metric': 'error_rate', + 'target': f"{max_error_pct}%", + 'actual': f"{metrics['error_rate']:.2f}%", + 'status': 'pass' if metrics['error_rate'] <= max_error_pct else 'fail', + 'status_text': 'āœ… Pass' if metrics['error_rate'] <= max_error_pct else 'āŒ Fail', + 'margin': f"{(metrics['error_rate'] - max_error_pct):+.2f}%" + }) + + return results + + def generate_recommendations(self, test_results: Dict, slo_results: List[Dict]) -> List[Dict]: + """Generate performance recommendations based on results""" + recommendations = [] + + # Check for SLO violations + failed_slos = [slo for slo in slo_results if slo['status'] == 'fail'] + if failed_slos: + for slo in failed_slos[:3]: # Top 3 violations + recommendations.append({ + 'priority': 'high', + 'title': f"SLO Violation: {slo['test_name']} {slo['metric']}", + 'description': f"The {slo['metric']} metric ({slo['actual']}) exceeds the target ({slo['target']}) by {slo['margin']}.", + 'action': None + }) + + # Check for high error rates + for category, tests in test_results.items(): + for test in tests: + if test.get('error_rate', 0) > 1: + recommendations.append({ + 'priority': 'high', + 'title': f"High Error Rate: {test['name']}", + 'description': f"Error rate of {test['error_rate']:.2f}% detected. Investigate application logs for failures.", + 'action': f"docker logs gateway | grep -i error" + }) + + # Check for high latency variance + for category, tests in test_results.items(): + for test in tests: + if 'p99' in test and 'p50' in test: + variance = test['p99'] / test['p50'] if test['p50'] > 0 else 0 + if variance > 3: # p99 is 3x p50 + recommendations.append({ + 'priority': 'medium', + 'title': f"High Latency Variance: {test['name']}", + 'description': f"p99 latency ({test['p99']:.1f}ms) is {variance:.1f}x the p50 ({test['p50']:.1f}ms). This indicates inconsistent performance.", + 'action': "# Profile the application to identify slow code paths\npy-spy record -o profile.svg --pid --duration 60" + }) + + # Check for low throughput + for category, tests in test_results.items(): + for test in tests: + if test.get('rps', float('inf')) < 100: + recommendations.append({ + 'priority': 'medium', + 'title': f"Low Throughput: {test['name']}", + 'description': f"Throughput of {test['rps']:.1f} req/s is lower than expected. Consider optimizing the request handling.", + 'action': "# Check database connection pool settings\n# Review application logs for bottlenecks" + }) + + return recommendations[:10] # Top 10 recommendations + + def generate_report(self, output_file: Path, profile: str = "medium"): + """Generate HTML report""" + # Collect test results + test_results = self.collect_test_results() + + # Evaluate SLOs + slo_results = [] + for category, tests in test_results.items(): + for test in tests: + slo_results.extend(self.evaluate_slo(test['name'], test)) + + # Calculate summary statistics + total_tests = sum(len(tests) for tests in test_results.values()) + all_tests = [test for tests in test_results.values() for test in tests] + + avg_rps = sum(t.get('rps', 0) for t in all_tests) / len(all_tests) if all_tests else 0 + avg_p95 = sum(t.get('p95', 0) for t in all_tests) / len(all_tests) if all_tests else 0 + avg_p99 = sum(t.get('p99', 0) for t in all_tests) / len(all_tests) if all_tests else 0 + + slos_met = sum(1 for slo in slo_results if slo['status'] == 'pass') + total_slos = len(slo_results) + slo_compliance = (slos_met / total_slos * 100) if total_slos > 0 else 0 + + summary = { + 'overall_status': 'excellent' if slo_compliance >= 95 else 'good' if slo_compliance >= 80 else 'warning' if slo_compliance >= 60 else 'poor', + 'overall_status_text': 'āœ… Excellent' if slo_compliance >= 95 else 'āœ“ Good' if slo_compliance >= 80 else '⚠ Warning' if slo_compliance >= 60 else 'āŒ Poor', + 'tests_passed': total_tests, # Simplified + 'total_tests': total_tests, + 'slo_status': 'excellent' if slo_compliance >= 95 else 'good' if slo_compliance >= 80 else 'warning' if slo_compliance >= 60 else 'poor', + 'slo_compliance_percent': f"{slo_compliance:.1f}", + 'slos_met': slos_met, + 'total_slos': total_slos, + 'perf_status': 'good' if avg_rps > 300 else 'warning' if avg_rps > 100 else 'poor', + 'avg_rps': f"{avg_rps:.0f}", + 'latency_status': 'good' if avg_p95 < 50 else 'warning' if avg_p95 < 100 else 'poor', + 'avg_p95': f"{avg_p95:.1f}", + 'avg_p99': f"{avg_p99:.1f}", + 'has_regressions': False, + 'regression_count': 0 + } + + # Format test results for display + formatted_results = {} + for category, tests in test_results.items(): + formatted_results[category] = [] + for test in tests: + formatted_results[category].append({ + 'name': test['name'], + 'rps': f"{test.get('rps', 0):.1f}", + 'p50': f"{test.get('p50', 0):.1f}", + 'p95': f"{test.get('p95', 0):.1f}", + 'p99': f"{test.get('p99', 0):.1f}", + 'error_rate': f"{test.get('error_rate', 0):.2f}", + 'status': 'pass' if test.get('error_rate', 0) < 1 else 'fail', + 'status_text': 'āœ… Pass' if test.get('error_rate', 0) < 1 else 'āŒ Fail', + 'has_baseline': False, + 'comparison_status': '', + 'comparison_text': '' + }) + + # Generate chart data + chart_data = {} + for category, tests in test_results.items(): + chart_data[category] = { + 'labels': [t['name'] for t in tests], + 'p50': [t.get('p50', 0) for t in tests], + 'p95': [t.get('p95', 0) for t in tests], + 'p99': [t.get('p99', 0) for t in tests], + } + + # Generate recommendations + recommendations = self.generate_recommendations(test_results, slo_results) + + # Prepare context for template + context = { + 'timestamp': datetime.now().strftime('%Y-%m-%d %H:%M:%S'), + 'profile': profile, + 'gateway_url': self.config.get('environment', {}).get('gateway_url', 'http://localhost:4444'), + 'git_commit': '', + 'summary': summary, + 'slo_results': slo_results, + 'test_results': formatted_results, + 'system_metrics': None, # TODO: Parse system metrics + 'db_metrics': None, # TODO: Parse DB metrics + 'recommendations': recommendations, + 'chart_data': chart_data, + 'config': { + 'requests': 'Variable', + 'concurrency': 'Variable', + 'timeout': '60' + }, + 'duration': 'Variable', + 'result_files': [ + {'name': f.name, 'path': f.name} + for f in sorted(self.results_dir.glob('*.txt')) + ] + } + + # Render template + template = SimpleTemplate(HTML_TEMPLATE) + html = template.render(context) + + # Write output + output_file.parent.mkdir(parents=True, exist_ok=True) + with open(output_file, 'w') as f: + f.write(html) + + print(f"āœ… Report generated: {output_file}") + return output_file + + +def main(): + parser = argparse.ArgumentParser(description='Generate HTML performance test report') + parser.add_argument('--results-dir', type=Path, default=Path('results'), + help='Directory containing test results') + parser.add_argument('--output', type=Path, default=None, + help='Output HTML file path') + parser.add_argument('--config', type=Path, default=Path('config.yaml'), + help='Configuration file') + parser.add_argument('--profile', type=str, default='medium', + help='Test profile name') + + args = parser.parse_args() + + # Default output path + if not args.output: + timestamp = datetime.now().strftime('%Y%m%d_%H%M%S') + args.output = Path(f'reports/performance_report_{args.profile}_{timestamp}.html') + + # Generate report + generator = PerformanceReportGenerator(args.results_dir, args.config) + generator.generate_report(args.output, args.profile) + + +if __name__ == '__main__': + main() diff --git a/tests/performance/utils/setup-auth.sh b/tests/performance/utils/setup-auth.sh new file mode 100755 index 000000000..2949ab096 --- /dev/null +++ b/tests/performance/utils/setup-auth.sh @@ -0,0 +1,88 @@ +#!/usr/bin/env bash +# ============================================================================== +# Authentication setup for performance tests +# Generates JWT token for authenticated API requests +# ============================================================================== + +set -Eeuo pipefail + +# Colors for output +RED='\033[0;31m' +GREEN='\033[0;32m' +YELLOW='\033[1;33m' +NC='\033[0m' # No Color + +log() { + echo -e "${GREEN}[$(date +'%Y-%m-%d %H:%M:%S')]${NC} $*" +} + +error() { + echo -e "${RED}[ERROR]${NC} $*" >&2 +} + +warn() { + echo -e "${YELLOW}[WARN]${NC} $*" +} + +# Configuration +JWT_SECRET="${JWT_SECRET:-my-test-key}" +JWT_ALGO="${JWT_ALGO:-HS256}" +USERNAME="${USERNAME:-admin@example.com}" +EXPIRATION="${EXPIRATION:-10080}" # 7 days in minutes + +log "Generating JWT token for performance tests..." +log " Username: $USERNAME" +log " Expiration: $EXPIRATION minutes" +log " Algorithm: $JWT_ALGO" + +# Find project root - go up from tests/performance to project root +SCRIPT_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )" +PROJECT_ROOT="$( cd "$SCRIPT_DIR/../../.." && pwd )" + +# Check if we found the project root +if [ ! -f "$PROJECT_ROOT/mcpgateway/utils/create_jwt_token.py" ]; then + error "Cannot find project root. Looking for mcpgateway/utils/create_jwt_token.py" + error "Current script dir: $SCRIPT_DIR" + error "Project root: $PROJECT_ROOT" + exit 1 +fi + +# Change to project root for token generation +cd "$PROJECT_ROOT" || exit 1 + +# Activate virtual environment if available +if [ -f "/home/cmihai/.venv/mcpgateway/bin/activate" ]; then + # shellcheck disable=SC1091 + source /home/cmihai/.venv/mcpgateway/bin/activate +fi + +# Generate token +TOKEN=$(python3 -m mcpgateway.utils.create_jwt_token \ + --username "$USERNAME" \ + --exp "$EXPIRATION" \ + --secret "$JWT_SECRET" \ + --algo "$JWT_ALGO" 2>/dev/null) + +if [ -z "$TOKEN" ]; then + error "Failed to generate JWT token" + exit 1 +fi + +# Export token +export MCPGATEWAY_BEARER_TOKEN="$TOKEN" + +# Save to file for easy sourcing (in tests/performance directory) +TOKEN_FILE="$PROJECT_ROOT/tests/performance/.auth_token" +echo "export MCPGATEWAY_BEARER_TOKEN='$TOKEN'" > "$TOKEN_FILE" + +log "āœ… Token generated successfully" +log "Token saved to: $TOKEN_FILE" +log "" +log "To use in your shell, run:" +log " source tests/performance/.auth_token" +log "" +log "Or in scripts:" +log " export MCPGATEWAY_BEARER_TOKEN='$TOKEN'" + +# Print the token (useful for CI/CD) +echo "$TOKEN"