Skip to content

Commit 37f5814

Browse files
Copilotrootfs
andcommitted
Fix conversation chaining: skip model routing when previous_response_id is present
When a Responses API request includes previous_response_id, the router now skips model routing to ensure conversation continuity. This prevents routing subsequent requests to different backend instances that don't have the conversation state. - Added check for previous_response_id in handleResponsesAPIRequest - Skip classification and model routing when conversation is chained - Added test for this behavior (TestHandleResponsesAPIRequest_WithPreviousResponseID) - Updated documentation to explain the limitation and recommended usage Co-authored-by: rootfs <[email protected]>
1 parent 19b5bd1 commit 37f5814

File tree

3 files changed

+77
-1
lines changed

3 files changed

+77
-1
lines changed

src/semantic-router/pkg/extproc/request_handler.go

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -521,6 +521,23 @@ func (r *OpenAIRouter) handleResponsesAPIRequest(v *ext_proc.ProcessingRequest_R
521521
ctx.RequestModel = originalModel
522522
}
523523

524+
// Check if this is a chained conversation (has previous_response_id)
525+
// If so, we cannot change the model as the conversation state is tied to a specific backend instance
526+
hasPreviousResponseID := responsesRequest.PreviousResponseID.Valid() && responsesRequest.PreviousResponseID.Value != ""
527+
if hasPreviousResponseID {
528+
observability.Infof("Responses API - Request has previous_response_id, skipping model routing to maintain conversation continuity")
529+
// Return a pass-through response without model changes
530+
return &ext_proc.ProcessingResponse{
531+
Response: &ext_proc.ProcessingResponse_RequestBody{
532+
RequestBody: &ext_proc.BodyResponse{
533+
Response: &ext_proc.CommonResponse{
534+
Status: ext_proc.CommonResponse_CONTINUE,
535+
},
536+
},
537+
},
538+
}, nil
539+
}
540+
524541
// Get content from input field
525542
userContent, nonUserMessages := extractContentFromResponsesInput(responsesRequest)
526543
observability.Infof("Responses API - Extracted user content length: %d, non-user messages count: %d", len(userContent), len(nonUserMessages))

src/semantic-router/pkg/extproc/responses_api_test.go

Lines changed: 59 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -429,3 +429,62 @@ func TestHandleRequestHeaders_ResponsesAPI_ExcludeInputItems(t *testing.T) {
429429
})
430430
}
431431
}
432+
433+
func TestHandleResponsesAPIRequest_WithPreviousResponseID(t *testing.T) {
434+
// Create a test router
435+
cfg := &config.RouterConfig{
436+
VLLMEndpoints: []config.VLLMEndpoint{
437+
{
438+
Name: "primary",
439+
Address: "127.0.0.1",
440+
Port: 8000,
441+
Models: []string{"gpt-4o", "deepseek-v3"},
442+
Weight: 1,
443+
},
444+
},
445+
DefaultModel: "gpt-4o",
446+
}
447+
448+
// Create a minimal cache backend
449+
cacheBackend, _ := cache.NewCacheBackend(cache.CacheConfig{
450+
BackendType: cache.InMemoryCacheType,
451+
Enabled: false,
452+
})
453+
454+
router := &OpenAIRouter{
455+
Config: cfg,
456+
Cache: cacheBackend,
457+
}
458+
459+
// Test with previous_response_id - should NOT change model even with "auto"
460+
requestBody := []byte(`{
461+
"model": "auto",
462+
"input": "Continue from where we left off",
463+
"previous_response_id": "resp_abc123"
464+
}`)
465+
466+
ctx := &RequestContext{
467+
Headers: make(map[string]string),
468+
IsResponsesAPI: true,
469+
OriginalRequestBody: requestBody,
470+
RequestID: "test-request-456",
471+
}
472+
473+
requestBodyMsg := &ext_proc.ProcessingRequest_RequestBody{
474+
RequestBody: &ext_proc.HttpBody{
475+
Body: requestBody,
476+
},
477+
}
478+
479+
response, err := router.handleResponsesAPIRequest(requestBodyMsg, ctx, false)
480+
481+
// Should succeed and return CONTINUE without modifying the request
482+
assert.NoError(t, err)
483+
assert.NotNil(t, response)
484+
assert.NotNil(t, response.GetRequestBody())
485+
assert.Equal(t, ext_proc.CommonResponse_CONTINUE, response.GetRequestBody().Response.Status)
486+
487+
// Should NOT have body mutation (no model change)
488+
assert.Nil(t, response.GetRequestBody().Response.BodyMutation, "Should not modify body when previous_response_id is present")
489+
}
490+

website/docs/api/router.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -359,7 +359,7 @@ The router will still perform classification and routing, but the actual executi
359359

360360
- GET `/v1/responses/{id}` requests pass through without modification (no routing or classification)
361361
- POST `/v1/responses` requests go through the full routing pipeline
362-
- The `previous_response_id` parameter is preserved during routing for conversation continuity
362+
- **Conversation Chaining Limitation**: When using `previous_response_id` to chain conversations, the router will **not** change the model to ensure conversation continuity. This is because response state is stored on specific backend instances. For multi-turn conversations, specify a fixed model instead of using "auto", or ensure all backend instances share response storage.
363363
- All Responses API features (tools, reasoning, streaming, background) work transparently through the router
364364

365365
## Routing Headers

0 commit comments

Comments
 (0)