Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 0 additions & 1 deletion docs/static/deprecated-llama-stack-spec.html
Original file line number Diff line number Diff line change
Expand Up @@ -4238,7 +4238,6 @@
},
"max_tokens": {
"type": "integer",
"default": 0,
"description": "The maximum number of tokens that can be generated in the completion. The token count of your prompt plus max_tokens cannot exceed the model's context length."
},
"repetition_penalty": {
Expand Down
1 change: 0 additions & 1 deletion docs/static/deprecated-llama-stack-spec.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -3087,7 +3087,6 @@ components:
description: The sampling strategy.
max_tokens:
type: integer
default: 0
description: >-
The maximum number of tokens that can be generated in the completion.
The token count of your prompt plus max_tokens cannot exceed the model's
Expand Down
1 change: 0 additions & 1 deletion docs/static/experimental-llama-stack-spec.html
Original file line number Diff line number Diff line change
Expand Up @@ -2713,7 +2713,6 @@
},
"max_tokens": {
"type": "integer",
"default": 0,
"description": "The maximum number of tokens that can be generated in the completion. The token count of your prompt plus max_tokens cannot exceed the model's context length."
},
"repetition_penalty": {
Expand Down
1 change: 0 additions & 1 deletion docs/static/experimental-llama-stack-spec.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -1927,7 +1927,6 @@ components:
description: The sampling strategy.
max_tokens:
type: integer
default: 0
description: >-
The maximum number of tokens that can be generated in the completion.
The token count of your prompt plus max_tokens cannot exceed the model's
Expand Down
1 change: 0 additions & 1 deletion docs/static/stainless-llama-stack-spec.html
Original file line number Diff line number Diff line change
Expand Up @@ -15472,7 +15472,6 @@
},
"max_tokens": {
"type": "integer",
"default": 0,
"description": "The maximum number of tokens that can be generated in the completion. The token count of your prompt plus max_tokens cannot exceed the model's context length."
},
"repetition_penalty": {
Expand Down
1 change: 0 additions & 1 deletion docs/static/stainless-llama-stack-spec.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -11541,7 +11541,6 @@ components:
description: The sampling strategy.
max_tokens:
type: integer
default: 0
description: >-
The maximum number of tokens that can be generated in the completion.
The token count of your prompt plus max_tokens cannot exceed the model's
Expand Down
2 changes: 1 addition & 1 deletion llama_stack/apis/inference/inference.py
Original file line number Diff line number Diff line change
Expand Up @@ -96,7 +96,7 @@ class SamplingParams(BaseModel):

strategy: SamplingStrategy = Field(default_factory=GreedySamplingStrategy)

max_tokens: int | None = 0
max_tokens: int | None = None
repetition_penalty: float | None = 1.0
stop: list[str] | None = None

Expand Down
2 changes: 2 additions & 0 deletions tests/integration/eval/test_eval.py
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,7 @@ def test_evaluate_rows(llama_stack_client, text_model_id, scoring_fn_id):
"model": text_model_id,
"sampling_params": {
"temperature": 0.0,
"max_tokens": 512,
},
},
},
Expand Down Expand Up @@ -88,6 +89,7 @@ def test_evaluate_benchmark(llama_stack_client, text_model_id, scoring_fn_id):
"model": text_model_id,
"sampling_params": {
"temperature": 0.0,
"max_tokens": 512,
},
},
},
Expand Down
Loading