Skip to content

Commit ea42cf6

Browse files
committed
Fix BadRequestError due to unvalid max_tokens
This patch ensures if max tokens is not defined it is set to None. This avoid some providers to fail, as they don't have protection for it being set to 0 Issue: #3666
1 parent 351c4b9 commit ea42cf6

File tree

8 files changed

+3
-7
lines changed

8 files changed

+3
-7
lines changed

docs/static/deprecated-llama-stack-spec.html

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4238,7 +4238,6 @@
42384238
},
42394239
"max_tokens": {
42404240
"type": "integer",
4241-
"default": 0,
42424241
"description": "The maximum number of tokens that can be generated in the completion. The token count of your prompt plus max_tokens cannot exceed the model's context length."
42434242
},
42444243
"repetition_penalty": {

docs/static/deprecated-llama-stack-spec.yaml

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3080,7 +3080,6 @@ components:
30803080
description: The sampling strategy.
30813081
max_tokens:
30823082
type: integer
3083-
default: 0
30843083
description: >-
30853084
The maximum number of tokens that can be generated in the completion.
30863085
The token count of your prompt plus max_tokens cannot exceed the model's

docs/static/experimental-llama-stack-spec.html

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2713,7 +2713,6 @@
27132713
},
27142714
"max_tokens": {
27152715
"type": "integer",
2716-
"default": 0,
27172716
"description": "The maximum number of tokens that can be generated in the completion. The token count of your prompt plus max_tokens cannot exceed the model's context length."
27182717
},
27192718
"repetition_penalty": {

docs/static/experimental-llama-stack-spec.yaml

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1927,7 +1927,6 @@ components:
19271927
description: The sampling strategy.
19281928
max_tokens:
19291929
type: integer
1930-
default: 0
19311930
description: >-
19321931
The maximum number of tokens that can be generated in the completion.
19331932
The token count of your prompt plus max_tokens cannot exceed the model's

docs/static/stainless-llama-stack-spec.html

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -15472,7 +15472,6 @@
1547215472
},
1547315473
"max_tokens": {
1547415474
"type": "integer",
15475-
"default": 0,
1547615475
"description": "The maximum number of tokens that can be generated in the completion. The token count of your prompt plus max_tokens cannot exceed the model's context length."
1547715476
},
1547815477
"repetition_penalty": {

docs/static/stainless-llama-stack-spec.yaml

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -11501,7 +11501,6 @@ components:
1150111501
description: The sampling strategy.
1150211502
max_tokens:
1150311503
type: integer
11504-
default: 0
1150511504
description: >-
1150611505
The maximum number of tokens that can be generated in the completion.
1150711506
The token count of your prompt plus max_tokens cannot exceed the model's

llama_stack/apis/inference/inference.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -96,7 +96,7 @@ class SamplingParams(BaseModel):
9696

9797
strategy: SamplingStrategy = Field(default_factory=GreedySamplingStrategy)
9898

99-
max_tokens: int | None = 0
99+
max_tokens: int | None = None
100100
repetition_penalty: float | None = 1.0
101101
stop: list[str] | None = None
102102

tests/integration/eval/test_eval.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -55,6 +55,7 @@ def test_evaluate_rows(llama_stack_client, text_model_id, scoring_fn_id):
5555
"model": text_model_id,
5656
"sampling_params": {
5757
"temperature": 0.0,
58+
"max_tokens": 512,
5859
},
5960
},
6061
},
@@ -88,6 +89,7 @@ def test_evaluate_benchmark(llama_stack_client, text_model_id, scoring_fn_id):
8889
"model": text_model_id,
8990
"sampling_params": {
9091
"temperature": 0.0,
92+
"max_tokens": 512,
9193
},
9294
},
9395
},

0 commit comments

Comments
 (0)