lightspeed-core · tisnik · Nov 16, 2025 · Nov 16, 2025 · coderabbitai · Nov 16, 2025
diff --git a/docs/openapi.json b/docs/openapi.json
@@ -411,7 +411,7 @@
                     "streaming_query"
                 ],
                 "summary": "Streaming Query Endpoint Handler",
-                "description": "Handle request to the /streaming_query endpoint.\n\nThis endpoint receives a query request, authenticates the user,\nselects the appropriate model and provider, and streams\nincremental response events from the Llama Stack backend to the\nclient. Events include start, token updates, tool calls, turn\ncompletions, errors, and end-of-stream metadata. Optionally\nstores the conversation transcript if enabled in configuration.\n\nReturns:\n    StreamingResponse: An HTTP streaming response yielding\n    SSE-formatted events for the query lifecycle.\n\nRaises:\n    HTTPException: Returns HTTP 500 if unable to connect to the\n    Llama Stack server.",
+                "description": "Handle request to the /streaming_query endpoint using Agent API.\n\nThis is a wrapper around streaming_query_endpoint_handler_base that provides\nthe Agent API specific retrieve_response and response generator functions.\n\nReturns:\n    StreamingResponse: An HTTP streaming response yielding\n    SSE-formatted events for the query lifecycle.\n\nRaises:\n    HTTPException: Returns HTTP 500 if unable to connect to the\n    Llama Stack server.",
                 "operationId": "streaming_query_endpoint_handler_v1_streaming_query_post",
                 "requestBody": {
                     "content": {
@@ -1306,6 +1306,92 @@
                 }
             }
         },
+        "/v2/streaming_query": {
+            "post": {
+                "tags": [
+                    "streaming_query_v2"
+                ],
+                "summary": "Streaming Query Endpoint Handler V2",
+                "description": "Handle request to the /streaming_query endpoint using Responses API.\n\nThis is a wrapper around streaming_query_endpoint_handler_base that provides\nthe Responses API specific retrieve_response and response generator functions.\n\nReturns:\n    StreamingResponse: An HTTP streaming response yielding\n    SSE-formatted events for the query lifecycle.\n\nRaises:\n    HTTPException: Returns HTTP 500 if unable to connect to the\n    Llama Stack server.",
+                "operationId": "streaming_query_endpoint_handler_v2_v2_streaming_query_post",
-                "summary": "Streaming Query Endpoint Handler V2",
-                "description": "Handle request to the /streaming_query endpoint using Responses API.\n\nThis is a wrapper around streaming_query_endpoint_handler_base that provides\nthe Responses API specific retrieve_response and response generator functions.\n\nReturns:\n    StreamingResponse: An HTTP streaming response yielding\n    SSE-formatted events for the query lifecycle.\n\nRaises:\n    HTTPException: Returns HTTP 500 if unable to connect to the\n    Llama Stack server.",
-                "operationId": "streaming_query_endpoint_handler_v2_v2_streaming_query_post",
+                "summary": "Streaming Query Endpoint Handler V2",
+                "description": "Handle request to the /streaming_query endpoint using Responses API.\n\nThis is a wrapper around streaming_query_endpoint_handler_base that provides\nthe Responses API specific retrieve_response and response generator functions.\n\nReturns:\n    StreamingResponse: An HTTP streaming response yielding\n    SSE-formatted events for the query lifecycle.\n\nRaises:\n    HTTPException: Returns HTTP 500 if unable to connect to the\n    Llama Stack server.",
+                "operationId": "streaming_query_endpoint_handler_v2_streaming_query_post",
-                "summary": "Streaming Query Endpoint Handler V2",
-                "description": "Handle request to the /streaming_query endpoint using Responses API.\n\nThis is a wrapper around streaming_query_endpoint_handler_base that provides\nthe Responses API specific retrieve_response and response generator functions.\n\nReturns:\n    StreamingResponse: An HTTP streaming response yielding\n    SSE-formatted events for the query lifecycle.\n\nRaises:\n    HTTPException: Returns HTTP 500 if unable to connect to the\n    Llama Stack server.",
-                "operationId": "streaming_query_endpoint_handler_v2_v2_streaming_query_post",
+                "summary": "Streaming Query Endpoint Handler V2",
+                "description": "Handle request to the /streaming_query endpoint using Responses API.\n\nThis is a wrapper around streaming_query_endpoint_handler_base that provides\nthe Responses API specific retrieve_response and response generator functions.\n\nReturns:\n    StreamingResponse: An HTTP streaming response yielding\n    SSE-formatted events for the query lifecycle.\n\nRaises:\n    HTTPException: Returns HTTP 500 if unable to connect to the\n    Llama Stack server.",
+                "operationId": "streaming_query_endpoint_handler_v2_streaming_query_post",
+                "requestBody": {
+                    "content": {
+                        "application/json": {
+                            "schema": {
+                                "$ref": "#/components/schemas/QueryRequest"
+                            }
+                        }
+                    },
+                    "required": true
+                },
+                "responses": {
+                    "200": {
+                        "description": "Streaming response with Server-Sent Events",
+                        "content": {
+                            "application/json": {
+                                "schema": {
+                                    "type": "string",
+                                    "example": "data: {\"event\": \"start\", \"data\": {\"conversation_id\": \"123e4567-e89b-12d3-a456-426614174000\"}}\n\ndata: {\"event\": \"token\", \"data\": {\"id\": 0, \"token\": \"Hello\"}}\n\ndata: {\"event\": \"end\", \"data\": {\"referenced_documents\": [], \"truncated\": null, \"input_tokens\": 0, \"output_tokens\": 0}, \"available_quotas\": {}}\n\n"
+                                }
+                            },
+                            "text/plain": {
+                                "schema": {
+                                    "type": "string",
+                                    "example": "Hello world!\n\n---\n\nReference: https://example.com/doc"
+                                }
+                            }
+                        }
+                    },
+                    "400": {
+                        "description": "Missing or invalid credentials provided by client",
+                        "content": {
+                            "application/json": {
+                                "schema": {
+                                    "$ref": "#/components/schemas/UnauthorizedResponse"
+                                }
+                            }
+                        }
+                    },
+                    "401": {
+                        "description": "Unauthorized: Invalid or missing Bearer token for k8s auth",
+                        "content": {
+                            "application/json": {
+                                "schema": {
+                                    "$ref": "#/components/schemas/UnauthorizedResponse"
+                                }
+                            }
+                        }
+                    },
+                    "403": {
+                        "description": "User is not authorized",
+                        "content": {
+                            "application/json": {
+                                "schema": {
+                                    "$ref": "#/components/schemas/ForbiddenResponse"
+                                }
+                            }
+                        }
+                    },
+                    "500": {
+                        "description": "Internal Server Error",
+                        "detail": {
+                            "response": "Unable to connect to Llama Stack",
+                            "cause": "Connection error."
+                        }
+                    },
+                    "422": {
+                        "description": "Validation Error",
+                        "content": {
+                            "application/json": {
+                                "schema": {
+                                    "$ref": "#/components/schemas/HTTPValidationError"
+                                }
+                            }
+                        }
+                    }
+                }
+            }
+        },
         "/readiness": {
             "get": {
                 "tags": [

diff --git a/docs/openapi.md b/docs/openapi.md
@@ -227,14 +227,10 @@ Returns:
 
 > **Streaming Query Endpoint Handler**
 
-Handle request to the /streaming_query endpoint.
+Handle request to the /streaming_query endpoint using Agent API.
 
-This endpoint receives a query request, authenticates the user,
-selects the appropriate model and provider, and streams
-incremental response events from the Llama Stack backend to the
-client. Events include start, token updates, tool calls, turn
-completions, errors, and end-of-stream metadata. Optionally
-stores the conversation transcript if enabled in configuration.
+This is a wrapper around streaming_query_endpoint_handler_base that provides
+the Agent API specific retrieve_response and response generator functions.
 
 Returns:
     StreamingResponse: An HTTP streaming response yielding
@@ -587,6 +583,42 @@ Returns:
 | 429 | The quota has been exceeded | [QuotaExceededResponse](#quotaexceededresponse) |
 | 500 | Internal Server Error |  |
 | 422 | Validation Error | [HTTPValidationError](#httpvalidationerror) |
+## POST `/v2/streaming_query`
+
+> **Streaming Query Endpoint Handler V2**
+
+Handle request to the /streaming_query endpoint using Responses API.
+
+This is a wrapper around streaming_query_endpoint_handler_base that provides
+the Responses API specific retrieve_response and response generator functions.
+
+Returns:
+    StreamingResponse: An HTTP streaming response yielding
+    SSE-formatted events for the query lifecycle.
+
+Raises:
+    HTTPException: Returns HTTP 500 if unable to connect to the
+    Llama Stack server.
+
+
+
+
+
+### 📦 Request Body 
+
+[QueryRequest](#queryrequest)
+
+### ✅ Responses
+
+| Status Code | Description | Component |
+|-------------|-------------|-----------|
+| 200 | Streaming response with Server-Sent Events | string
+string |
+| 400 | Missing or invalid credentials provided by client | [UnauthorizedResponse](#unauthorizedresponse) |
+| 401 | Unauthorized: Invalid or missing Bearer token for k8s auth | [UnauthorizedResponse](#unauthorizedresponse) |
+| 403 | User is not authorized | [ForbiddenResponse](#forbiddenresponse) |
+| 500 | Internal Server Error |  |
+| 422 | Validation Error | [HTTPValidationError](#httpvalidationerror) |
 ## GET `/readiness`
 
 > **Readiness Probe Get Method**

diff --git a/docs/output.md b/docs/output.md
@@ -227,14 +227,10 @@ Returns:
 
 > **Streaming Query Endpoint Handler**
 
-Handle request to the /streaming_query endpoint.
+Handle request to the /streaming_query endpoint using Agent API.
 
-This endpoint receives a query request, authenticates the user,
-selects the appropriate model and provider, and streams
-incremental response events from the Llama Stack backend to the
-client. Events include start, token updates, tool calls, turn
-completions, errors, and end-of-stream metadata. Optionally
-stores the conversation transcript if enabled in configuration.
+This is a wrapper around streaming_query_endpoint_handler_base that provides
+the Agent API specific retrieve_response and response generator functions.
 
 Returns:
     StreamingResponse: An HTTP streaming response yielding
@@ -587,6 +583,42 @@ Returns:
 | 429 | The quota has been exceeded | [QuotaExceededResponse](#quotaexceededresponse) |
 | 500 | Internal Server Error |  |
 | 422 | Validation Error | [HTTPValidationError](#httpvalidationerror) |
+## POST `/v2/streaming_query`
+
+> **Streaming Query Endpoint Handler V2**
+
+Handle request to the /streaming_query endpoint using Responses API.
+
+This is a wrapper around streaming_query_endpoint_handler_base that provides
+the Responses API specific retrieve_response and response generator functions.
+
+Returns:
+    StreamingResponse: An HTTP streaming response yielding
+    SSE-formatted events for the query lifecycle.
+
+Raises:
+    HTTPException: Returns HTTP 500 if unable to connect to the
+    Llama Stack server.
+
+
+
+
+
+### 📦 Request Body 
+
+[QueryRequest](#queryrequest)
+
+### ✅ Responses
+
+| Status Code | Description | Component |
+|-------------|-------------|-----------|
+| 200 | Streaming response with Server-Sent Events | string
+string |
+| 400 | Missing or invalid credentials provided by client | [UnauthorizedResponse](#unauthorizedresponse) |
+| 401 | Unauthorized: Invalid or missing Bearer token for k8s auth | [UnauthorizedResponse](#unauthorizedresponse) |
+| 403 | User is not authorized | [ForbiddenResponse](#forbiddenresponse) |
+| 500 | Internal Server Error |  |
+| 422 | Validation Error | [HTTPValidationError](#httpvalidationerror) |
 ## GET `/readiness`
 
 > **Readiness Probe Get Method**