diff --git a/.github/workflows/development.yml b/.github/workflows/development.yml index f8604d0f..7ee36c49 100644 --- a/.github/workflows/development.yml +++ b/.github/workflows/development.yml @@ -11,8 +11,8 @@ jobs: strategy: matrix: python: - - "3.12" - - "3.8" + - "3.13" + - "3.9" steps: - uses: actions/checkout@v4 - name: Set up Python @@ -29,8 +29,8 @@ jobs: strategy: matrix: python: - - "3.12" - - "3.8" + - "3.13" + - "3.9" steps: - uses: actions/checkout@v4 - name: Set up Python diff --git a/.github/workflows/nightly.yml b/.github/workflows/nightly.yml index bf7f34cf..634ab52c 100644 --- a/.github/workflows/nightly.yml +++ b/.github/workflows/nightly.yml @@ -10,11 +10,11 @@ jobs: strategy: matrix: python: + - "3.13" - "3.12" - "3.11" - "3.10" - "3.9" - - "3.8" steps: - uses: actions/checkout@v4 - name: Set up Python @@ -31,11 +31,11 @@ jobs: strategy: matrix: python: + - "3.13" - "3.12" - "3.11" - "3.10" - "3.9" - - "3.8" steps: - uses: actions/checkout@v4 - name: Set up Python @@ -52,11 +52,11 @@ jobs: strategy: matrix: python: + - "3.13" - "3.12" - "3.11" - "3.10" - "3.9" - - "3.8" steps: - uses: actions/checkout@v4 - name: Set up Python diff --git a/.github/workflows/quality.yml b/.github/workflows/quality.yml index 95d44af4..5060149a 100644 --- a/.github/workflows/quality.yml +++ b/.github/workflows/quality.yml @@ -14,8 +14,8 @@ jobs: strategy: matrix: python: - - "3.12" - - "3.8" + - "3.13" + - "3.9" steps: - uses: actions/checkout@v4 - name: Set up Python @@ -32,8 +32,8 @@ jobs: strategy: matrix: python: - - "3.12" - - "3.8" + - "3.13" + - "3.9" steps: - uses: actions/checkout@v4 - name: Set up Python @@ -52,7 +52,7 @@ jobs: - name: Set up Python uses: actions/setup-python@v5 with: - python-version: "3.8" + python-version: "3.9" - name: Install pre-commit run: pip install pre-commit - name: Run pre-commit checks diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml index d4fe2494..c7c7b8f7 100644 --- a/.github/workflows/release.yml +++ b/.github/workflows/release.yml @@ -11,11 +11,11 @@ jobs: strategy: matrix: python: + - "3.13" - "3.12" - "3.11" - "3.10" - "3.9" - - "3.8" steps: - uses: actions/checkout@v4 - name: Set up Python @@ -32,11 +32,11 @@ jobs: strategy: matrix: python: + - "3.13" - "3.12" - "3.11" - "3.10" - "3.9" - - "3.8" steps: - uses: actions/checkout@v4 - name: Set up Python @@ -53,11 +53,11 @@ jobs: strategy: matrix: python: + - "3.13" - "3.12" - "3.11" - "3.10" - "3.9" - - "3.8" steps: - uses: actions/checkout@v4 - name: Set up Python diff --git a/pyproject.toml b/pyproject.toml index e0b47007..4eb171f6 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -8,7 +8,7 @@ where = ["src"] include = ["*"] [tool.setuptools.package-data] -guidellm = ["*"] +"guidellm.data" = ["*.gz"] # ************************************************ @@ -32,6 +32,7 @@ dependencies = [ "loguru", "numpy", "pillow", + "protobuf", "pydantic>=2.0.0", "pydantic-settings>=2.0.0", "pyyaml>=6.0.0", @@ -77,7 +78,7 @@ dev = [ [project.entry-points.console_scripts] -guidellm = "guidellm.main:generate_benchmark_report_cli" +guidellm = "guidellm.__main__:cli" guidellm-config = "guidellm.config:print_config" diff --git a/src/guidellm/__init__.py b/src/guidellm/__init__.py index e5620188..929d046e 100644 --- a/src/guidellm/__init__.py +++ b/src/guidellm/__init__.py @@ -6,14 +6,22 @@ # flake8: noqa import os -import transformers # type: ignore +import logging +import contextlib -os.environ["TOKENIZERS_PARALLELISM"] = "false" # Silence warnings for tokenizers -transformers.logging.set_verbosity_error() # Silence warnings for transformers +with open(os.devnull, "w") as devnull, contextlib.redirect_stderr( + devnull +), contextlib.redirect_stdout(devnull): + from transformers.utils import logging as hf_logging # type: ignore[import] + + # Set the log level for the transformers library to ERROR + # to ignore None of PyTorch, TensorFlow found + os.environ["TOKENIZERS_PARALLELISM"] = "false" # Silence warnings for tokenizers + hf_logging.set_verbosity_error() + logging.getLogger("transformers").setLevel(logging.ERROR) from .config import settings from .logger import configure_logger, logger -from .main import generate_benchmark_report __all__ = ["configure_logger", "logger", "settings", "generate_benchmark_report"] diff --git a/src/guidellm/__main__.py b/src/guidellm/__main__.py new file mode 100644 index 00000000..096614de --- /dev/null +++ b/src/guidellm/__main__.py @@ -0,0 +1,270 @@ +import asyncio +import json +from pathlib import Path +from typing import get_args + +import click + +from guidellm.backend import BackendType +from guidellm.benchmark import ProfileType, benchmark_generative_text +from guidellm.scheduler import StrategyType + +STRATEGY_PROFILE_CHOICES = set( + list(get_args(ProfileType)) + list(get_args(StrategyType)) +) + + +def parse_json(ctx, param, value): # noqa: ARG001 + if value is None: + return None + try: + return json.loads(value) + except json.JSONDecodeError as err: + raise click.BadParameter(f"{param.name} must be a valid JSON string.") from err + + +def parse_number_str(ctx, param, value): # noqa: ARG001 + if value is None: + return None + + values = value.split(",") if "," in value else [value] + + try: + return [int(val) if val.isdigit() else float(val) for val in values] + except ValueError as err: + raise click.BadParameter( + f"{param.name} must be a number or comma-separated list of numbers." + ) from err + + +@click.group() +def cli(): + pass + + +@cli.command() +@click.option( + "--target", + required=True, + type=str, + help="The target path for the backend to run benchmarks against. For example, http://localhost:8000", +) +@click.option( + "--backend-type", + type=click.Choice(list(get_args(BackendType))), + help=( + "The type of backend to use to run requests against. Defaults to 'openai_http'." + f" Supported types: {', '.join(get_args(BackendType))}" + ), + default="openai_http", +) +@click.option( + "--backend-args", + callback=parse_json, + default=None, + help=( + "A JSON string containing any arguments to pass to the backend as a " + "dict with **kwargs." + ), +) +@click.option( + "--model", + default=None, + type=str, + help=( + "The ID of the model to benchmark within the backend. " + "If None provided (default), then it will use the first model available." + ), +) +@click.option( + "--processor", + default=None, + type=str, + help=( + "The processor or tokenizer to use to calculate token counts for statistics " + "and synthetic data generation. If None provided (default), will load " + "using the model arg, if needed." + ), +) +@click.option( + "--processor-args", + default=None, + callback=parse_json, + help=( + "A JSON string containing any arguments to pass to the processor constructor " + "as a dict with **kwargs." + ), +) +@click.option( + "--data", + required=True, + type=str, + help=( + "The HuggingFace dataset ID, a path to a HuggingFace dataset, " + "a path to a data file csv, json, jsonl, or txt, " + "or a synthetic data config as a json or key=value string." + ), +) +@click.option( + "--data-args", + callback=parse_json, + help=( + "A JSON string containing any arguments to pass to the dataset creation " + "as a dict with **kwargs." + ), +) +@click.option( + "--data-sampler", + default=None, + type=click.Choice(["random"]), + help=( + "The data sampler type to use. 'random' will add a random shuffle on the data. " + "Defaults to None" + ), +) +@click.option( + "--rate-type", + required=True, + type=click.Choice(STRATEGY_PROFILE_CHOICES), + help=( + "The type of benchmark to run. " + f"Supported types {', '.join(STRATEGY_PROFILE_CHOICES)}. " + ), +) +@click.option( + "--rate", + default=None, + callback=parse_number_str, + help=( + "The rates to run the benchmark at. " + "Can be a single number or a comma-separated list of numbers. " + "For rate-type=sweep, this is the number of benchmarks it runs in the sweep. " + "For rate-type=concurrent, this is the number of concurrent requests. " + "For rate-type=async,constant,poisson, this is the rate requests per second. " + "For rate-type=synchronous,throughput, this must not be set." + ), +) +@click.option( + "--max-seconds", + type=float, + help=( + "The maximum number of seconds each benchmark can run for. " + "If None, will run until max_requests or the data is exhausted." + ), +) +@click.option( + "--max-requests", + type=int, + help=( + "The maximum number of requests each benchmark can run for. " + "If None, will run until max_seconds or the data is exhausted." + ), +) +@click.option( + "--warmup-percent", + type=float, + default=None, + help=( + "The percent of the benchmark (based on max-seconds, max-requets, " + "or lenth of dataset) to run as a warmup and not include in the final results. " + "Defaults to None." + ), +) +@click.option( + "--cooldown-percent", + type=float, + help=( + "The percent of the benchmark (based on max-seconds, max-requets, or lenth " + "of dataset) to run as a cooldown and not include in the final results. " + "Defaults to None." + ), +) +@click.option( + "--disable-progress", + is_flag=True, + help="Set this flag to disable progress updates to the console", +) +@click.option( + "--display-scheduler-stats", + is_flag=True, + help="Set this flag to display stats for the processes running the benchmarks", +) +@click.option( + "--disable-console-outputs", + is_flag=True, + help="Set this flag to disable console output", +) +@click.option( + "--output-path", + type=click.Path(), + default=Path.cwd() / "benchmarks.json", + help=( + "The path to save the output to. If it is a directory, " + "it will save benchmarks.json under it. " + "Otherwise, json, yaml, or csv files are supported for output types " + "which will be read from the extension for the file path." + ), +) +@click.option( + "--output-extras", + callback=parse_json, + help="A JSON string of extra data to save with the output benchmarks", +) +@click.option( + "--random-seed", + default=42, + type=int, + help="The random seed to use for benchmarking to ensure reproducibility.", +) +def benchmark( + target, + backend_type, + backend_args, + model, + processor, + processor_args, + data, + data_args, + data_sampler, + rate_type, + rate, + max_seconds, + max_requests, + warmup_percent, + cooldown_percent, + disable_progress, + display_scheduler_stats, + disable_console_outputs, + output_path, + output_extras, + random_seed, +): + asyncio.run( + benchmark_generative_text( + target=target, + backend_type=backend_type, + backend_args=backend_args, + model=model, + processor=processor, + processor_args=processor_args, + data=data, + data_args=data_args, + data_sampler=data_sampler, + rate_type=rate_type, + rate=rate, + max_seconds=max_seconds, + max_requests=max_requests, + warmup_percent=warmup_percent, + cooldown_percent=cooldown_percent, + show_progress=not disable_progress, + show_progress_scheduler_stats=display_scheduler_stats, + output_console=not disable_console_outputs, + output_path=output_path, + output_extras=output_extras, + random_seed=random_seed, + ) + ) + + +if __name__ == "__main__": + cli() diff --git a/src/guidellm/backend/__init__.py b/src/guidellm/backend/__init__.py index a45a66a7..8dc2ef8f 100644 --- a/src/guidellm/backend/__init__.py +++ b/src/guidellm/backend/__init__.py @@ -2,7 +2,7 @@ Backend, BackendType, ) -from .openai import OpenAIHTTPBackend +from .openai import CHAT_COMPLETIONS_PATH, TEXT_COMPLETIONS_PATH, OpenAIHTTPBackend from .response import ( RequestArgs, ResponseSummary, @@ -18,4 +18,6 @@ "Backend", "BackendType", "OpenAIHTTPBackend", + "TEXT_COMPLETIONS_PATH", + "CHAT_COMPLETIONS_PATH", ] diff --git a/src/guidellm/backend/backend.py b/src/guidellm/backend/backend.py index e2b89f1e..ff80769a 100644 --- a/src/guidellm/backend/backend.py +++ b/src/guidellm/backend/backend.py @@ -1,4 +1,3 @@ -import asyncio from abc import ABC, abstractmethod from pathlib import Path from typing import Any, AsyncGenerator, Dict, List, Literal, Optional, Type, Union @@ -102,27 +101,32 @@ def model(self) -> Optional[str]: """ ... - def validate(self): + @property + @abstractmethod + def info(self) -> Dict[str, Any]: + """ + :return: The information about the backend. + """ + ... + + async def validate(self): """ Handle final setup and validate the backend is ready for use. If not successful, raises the appropriate exception. """ logger.info("{} validating backend {}", self.__class__.__name__, self.type_) - self.check_setup() - models = self.available_models() + await self.check_setup() + models = await self.available_models() if not models: raise ValueError("No models available for the backend") - async def _test_request(): - async for _ in self.text_completions( - prompt="Test connection", output_token_count=1 - ): # type: ignore[attr-defined] - pass - - asyncio.run(_test_request()) + async for _ in self.text_completions( + prompt="Test connection", output_token_count=1 + ): # type: ignore[attr-defined] + pass @abstractmethod - def check_setup(self): + async def check_setup(self): """ Check the setup for the backend. If unsuccessful, raises the appropriate exception. @@ -132,7 +136,17 @@ def check_setup(self): ... @abstractmethod - def available_models(self) -> List[str]: + async def prepare_multiprocessing(self): + """ + Prepare the backend for use in a multiprocessing environment. + This is useful for backends that have instance state that can not + be shared across processes and should be cleared out and re-initialized + for each new process. + """ + ... + + @abstractmethod + async def available_models(self) -> List[str]: """ Get the list of available models for the backend. diff --git a/src/guidellm/backend/openai.py b/src/guidellm/backend/openai.py index 7870a949..48bde08b 100644 --- a/src/guidellm/backend/openai.py +++ b/src/guidellm/backend/openai.py @@ -16,7 +16,11 @@ ) from guidellm.config import settings -__all__ = ["OpenAIHTTPBackend"] +__all__ = ["OpenAIHTTPBackend", "TEXT_COMPLETIONS_PATH", "CHAT_COMPLETIONS_PATH"] + + +TEXT_COMPLETIONS_PATH = "/v1/completions" +CHAT_COMPLETIONS_PATH = "/v1/chat/completions" @Backend.register("openai_http") @@ -61,6 +65,17 @@ def __init__( ): super().__init__(type_="openai_http") self._target = target or settings.openai.base_url + + if not self._target: + raise ValueError("Target URL must be provided for OpenAI HTTP backend.") + + if self._target.endswith("/v1") or self._target.endswith("/v1/"): + # backwards compatability, strip v1 off + self._target = self._target[:-3] + + if self._target.endswith("/"): + self._target = self._target[:-1] + self._model = model api_key = api_key or settings.openai.api_key @@ -77,6 +92,7 @@ def __init__( if max_output_tokens is not None else settings.openai.max_output_tokens ) + self._async_client: Optional[httpx.AsyncClient] = None @property def target(self) -> str: @@ -94,7 +110,23 @@ def model(self) -> Optional[str]: """ return self._model - def check_setup(self): + @property + def info(self) -> Dict[str, Any]: + """ + :return: The information about the backend. + """ + return { + "max_output_tokens": self.max_output_tokens, + "timeout": self.timeout, + "http2": self.http2, + "authorization": bool(self.authorization), + "organization": self.organization, + "project": self.project, + "text_completions_path": TEXT_COMPLETIONS_PATH, + "chat_completions_path": CHAT_COMPLETIONS_PATH, + } + + async def check_setup(self): """ Check if the backend is setup correctly and can be used for requests. Specifically, if a model is not provided, it grabs the first available model. @@ -103,7 +135,7 @@ def check_setup(self): :raises ValueError: If no models or the provided model is not available. """ - models = self.available_models() + models = await self.available_models() if not models: raise ValueError(f"No models available for target: {self.target}") @@ -115,24 +147,32 @@ def check_setup(self): "{models} for target: {self.target}" ) - def available_models(self) -> List[str]: + async def prepare_multiprocessing(self): + """ + Prepare the backend for use in a multiprocessing environment. + Clears out the sync and async clients to ensure they are re-initialized + for each process. + """ + if self._async_client is not None: + await self._async_client.aclose() + self._async_client = None + + async def available_models(self) -> List[str]: """ Get the available models for the target server using the OpenAI models endpoint: /v1/models """ target = f"{self.target}/v1/models" headers = self._headers() + response = await self._get_async_client().get(target, headers=headers) + response.raise_for_status() - with httpx.Client(http2=self.http2, timeout=self.timeout) as client: - response = client.get(target, headers=headers) - response.raise_for_status() + models = [] - models = [] + for item in response.json()["data"]: + models.append(item["id"]) - for item in response.json()["data"]: - models.append(item["id"]) - - return models + return models async def text_completions( # type: ignore[override] self, @@ -160,7 +200,6 @@ async def text_completions( # type: ignore[override] a StreamingTextResponse for each received iteration, and a ResponseSummary for the final response. """ - logger.debug("{} invocation with args: {}", self.__class__.__name__, locals()) headers = self._headers() payload = self._completions_payload( @@ -171,7 +210,7 @@ async def text_completions( # type: ignore[override] try: async for resp in self._iterative_completions_request( - type_="text", + type_="text_completions", request_id=request_id, request_prompt_tokens=prompt_token_count, request_output_tokens=output_token_count, @@ -246,7 +285,7 @@ async def chat_completions( # type: ignore[override] try: async for resp in self._iterative_completions_request( - type_="chat", + type_="chat_completions", request_id=request_id, request_prompt_tokens=prompt_token_count, request_output_tokens=output_token_count, @@ -264,6 +303,21 @@ async def chat_completions( # type: ignore[override] ) raise ex + def _get_async_client(self) -> httpx.AsyncClient: + """ + Get the async HTTP client for making requests. + If the client has not been created yet, it will create one. + + :return: The async HTTP client. + """ + if self._async_client is None: + client = httpx.AsyncClient(http2=self.http2, timeout=self.timeout) + self._async_client = client + else: + client = self._async_client + + return client + def _headers(self) -> Dict[str, str]: headers = { "Content-Type": "application/json", @@ -372,19 +426,17 @@ def _create_chat_messages( async def _iterative_completions_request( self, - type_: Literal["text", "chat"], + type_: Literal["text_completions", "chat_completions"], request_id: Optional[str], request_prompt_tokens: Optional[int], request_output_tokens: Optional[int], headers: Dict, payload: Dict, ) -> AsyncGenerator[Union[StreamingTextResponse, ResponseSummary], None]: - target = f"{self.target}/v1/" - - if type_ == "text": - target += "completions" - elif type_ == "chat": - target += "chat/completions" + if type_ == "text_completions": + target = f"{self.target}{TEXT_COMPLETIONS_PATH}" + elif type_ == "chat_completions": + target = f"{self.target}{CHAT_COMPLETIONS_PATH}" else: raise ValueError(f"Unsupported type: {type_}") @@ -400,58 +452,72 @@ async def _iterative_completions_request( payload, ) - async with httpx.AsyncClient(http2=self.http2, timeout=self.timeout) as client: - response_value = "" - response_prompt_count: Optional[int] = None - response_output_count: Optional[int] = None - iter_count = 0 - start_time = time.time() - iter_time = start_time - - yield StreamingTextResponse( - type_="start", - iter_count=iter_count, - delta="", - time=start_time, - request_id=request_id, - ) + response_value = "" + response_prompt_count: Optional[int] = None + response_output_count: Optional[int] = None + iter_count = 0 + start_time = time.time() + iter_time = start_time + first_iter_time: Optional[float] = None + last_iter_time: Optional[float] = None + + yield StreamingTextResponse( + type_="start", + value="", + start_time=start_time, + first_iter_time=None, + iter_count=iter_count, + delta="", + time=start_time, + request_id=request_id, + ) - async with client.stream( - "POST", target, headers=headers, json=payload - ) as stream: - stream.raise_for_status() - - async for line in stream.aiter_lines(): - iter_time = time.time() - logger.debug( - "{} request: {} recieved iter response line: {}", - self.__class__.__name__, - request_id, - line, + # reset start time after yielding start response to ensure accurate timing + start_time = time.time() + + async with self._get_async_client().stream( + "POST", target, headers=headers, json=payload + ) as stream: + stream.raise_for_status() + + async for line in stream.aiter_lines(): + iter_time = time.time() + logger.debug( + "{} request: {} recieved iter response line: {}", + self.__class__.__name__, + request_id, + line, + ) + + if not line or not line.strip().startswith("data:"): + continue + + if line.strip() == "data: [DONE]": + break + + data = json.loads(line.strip()[len("data: ") :]) + if delta := self._extract_completions_delta_content(type_, data): + if first_iter_time is None: + first_iter_time = iter_time + last_iter_time = iter_time + + iter_count += 1 + response_value += delta + + yield StreamingTextResponse( + type_="iter", + value=response_value, + iter_count=iter_count, + start_time=start_time, + first_iter_time=first_iter_time, + delta=delta, + time=iter_time, + request_id=request_id, ) - if not line or not line.strip().startswith("data:"): - continue - - if line.strip() == "data: [DONE]": - break - - data = json.loads(line.strip()[len("data: ") :]) - if delta := self._extract_completions_delta_content(type_, data): - iter_count += 1 - response_value += delta - - yield StreamingTextResponse( - type_="iter", - iter_count=iter_count, - delta=delta, - time=iter_time, - request_id=request_id, - ) - - if usage := self._extract_completions_usage(data): - response_prompt_count = usage["prompt"] - response_output_count = usage["output"] + if usage := self._extract_completions_usage(data): + response_prompt_count = usage["prompt"] + response_output_count = usage["output"] logger.info( "{} request: {} with headers: {} and payload: {} completed with: {}", @@ -473,6 +539,8 @@ async def _iterative_completions_request( ), start_time=start_time, end_time=iter_time, + first_iter_time=first_iter_time, + last_iter_time=last_iter_time, iterations=iter_count, request_prompt_tokens=request_prompt_tokens, request_output_tokens=request_output_tokens, @@ -483,15 +551,15 @@ async def _iterative_completions_request( @staticmethod def _extract_completions_delta_content( - type_: Literal["text", "chat"], data: Dict + type_: Literal["text_completions", "chat_completions"], data: Dict ) -> Optional[str]: if "choices" not in data or not data["choices"]: return None - if type_ == "text": + if type_ == "text_completions": return data["choices"][0]["text"] - if type_ == "chat": + if type_ == "chat_completions": return data["choices"][0]["delta"]["content"] raise ValueError(f"Unsupported type: {type_}") diff --git a/src/guidellm/backend/response.py b/src/guidellm/backend/response.py index 699f41cc..9dc74578 100644 --- a/src/guidellm/backend/response.py +++ b/src/guidellm/backend/response.py @@ -1,9 +1,9 @@ from typing import Any, Dict, Literal, Optional -from loguru import logger -from pydantic import BaseModel, computed_field +from pydantic import computed_field from guidellm.config import settings +from guidellm.objects.pydantic import StandardBaseModel __all__ = [ "StreamingResponseType", @@ -16,11 +16,13 @@ StreamingResponseType = Literal["start", "iter"] -class StreamingTextResponse(BaseModel): +class StreamingTextResponse(StandardBaseModel): """ A model representing the response content for a streaming text request. :param type_: The type of the response; either 'start' or 'iter'. + :param value: The value of the response up to this iteration. + :param start_time: The time.time() the request started. :param iter_count: The iteration count for the response. For 'start' this is 0 and for the first 'iter' it is 1. :param delta: The text delta added to the response for this stream iteration. @@ -30,13 +32,16 @@ class StreamingTextResponse(BaseModel): """ type_: StreamingResponseType + value: str + start_time: float + first_iter_time: Optional[float] iter_count: int delta: str time: float request_id: Optional[str] = None -class RequestArgs(BaseModel): +class RequestArgs(StandardBaseModel): """ A model representing the arguments for a request to a backend. Biases towards an HTTP request, but can be used for other types of backends. @@ -56,19 +61,28 @@ class RequestArgs(BaseModel): http2: Optional[bool] = None -class ResponseSummary(BaseModel): +class ResponseSummary(StandardBaseModel): """ A model representing a summary of a backend request. Always returned as the final iteration of a streaming request. :param value: The final value returned from the request. :param request_args: The arguments used to make the request. + :param iterations: The number of iterations in the request. :param start_time: The time the request started. :param end_time: The time the request ended. - :param iterations: The number of iterations in the request. - :param prompt_tokens: The number of tokens in the prompt, if any usage was returned. - :param output_tokens: The number of tokens in the output, if any usage was returned. + :param first_iter_time: The time the first iteration was received. + :param last_iter_time: The time the last iteration was received. + :param request_prompt_tokens: The number of tokens measured in the prompt + for the request, if any. + :param request_output_tokens: The number of tokens enforced for the output + for the request, if any. + :param response_prompt_tokens: The number of tokens measured in the prompt + for the response, if any. + :param response_output_tokens: The number of tokens measured in the output + for the response, if any. :param request_id: The unique identifier for the request, if any. + :param error: The error message, if any, returned from making the request. """ value: str @@ -76,11 +90,14 @@ class ResponseSummary(BaseModel): iterations: int = 0 start_time: float end_time: float + first_iter_time: Optional[float] + last_iter_time: Optional[float] request_prompt_tokens: Optional[int] = None request_output_tokens: Optional[int] = None response_prompt_tokens: Optional[int] = None response_output_tokens: Optional[int] = None request_id: Optional[str] = None + error: Optional[str] = None @computed_field # type: ignore[misc] @property @@ -91,21 +108,7 @@ def prompt_tokens(self) -> Optional[int]: :return: The number of tokens in the prompt, if any. """ - if settings.preferred_prompt_tokens_source == "backend": - if self.response_prompt_tokens is None: - logger.warning( - "Preferred prompt tokens source is backend, but no prompt token " - f"values were returned with the response for {self}. " - "Defulating to request_prompt_tokens (if available)." - ) - return self.response_prompt_tokens or self.request_prompt_tokens - elif settings.preferred_prompt_tokens_source == "request": - if self.request_prompt_tokens is None: - logger.warning( - "Preferred prompt tokens source is request, but no prompt token " - f"values were returned with the request for {self}. " - "Defulating to response_prompt_tokens (if available)." - ) + if settings.preferred_prompt_tokens_source == "request": return self.request_prompt_tokens or self.response_prompt_tokens return self.response_prompt_tokens or self.request_prompt_tokens @@ -119,21 +122,11 @@ def output_tokens(self) -> Optional[int]: :return: The number of tokens in the output, if any. """ - if settings.preferred_output_tokens_source == "backend": - if self.response_output_tokens is None: - logger.warning( - "Preferred output tokens source is backend, but no output token " - f"values were returned with the response for {self}. " - "Defulating to request_output_tokens (if available)." - ) - return self.response_output_tokens or self.request_output_tokens - elif settings.preferred_output_tokens_source == "request": - if self.request_output_tokens is None: - logger.warning( - "Preferred output tokens source is request, but no output token " - f"values were returned with the request for {self}. " - "Defulating to response_output_tokens (if available)." - ) + if self.error is not None: + # error occurred, can't trust request tokens were all generated + return self.response_prompt_tokens + + if settings.preferred_output_tokens_source == "request": return self.request_output_tokens or self.response_output_tokens return self.response_output_tokens or self.request_output_tokens diff --git a/src/guidellm/benchmark/__init__.py b/src/guidellm/benchmark/__init__.py new file mode 100644 index 00000000..dc100596 --- /dev/null +++ b/src/guidellm/benchmark/__init__.py @@ -0,0 +1,35 @@ +from .aggregator import AggregatorT, BenchmarkAggregator, GenerativeBenchmarkAggregator +from .benchmark import Benchmark, BenchmarkT, GenerativeBenchmark +from .benchmarker import Benchmarker, BenchmarkerResult, GenerativeBenchmarker +from .entrypoints import benchmark_generative_text +from .profile import ( + AsyncProfile, + ConcurrentProfile, + Profile, + ProfileType, + SweepProfile, + SynchronousProfile, + ThroughputProfile, + create_profile, +) + +__all__ = [ + "AggregatorT", + "BenchmarkT", + "Benchmark", + "BenchmarkAggregator", + "GenerativeBenchmark", + "GenerativeBenchmarkAggregator", + "Benchmarker", + "BenchmarkerResult", + "GenerativeBenchmarker", + "AsyncProfile", + "ConcurrentProfile", + "Profile", + "ProfileType", + "SweepProfile", + "SynchronousProfile", + "ThroughputProfile", + "create_profile", + "benchmark_generative_text", +] diff --git a/src/guidellm/benchmark/aggregator.py b/src/guidellm/benchmark/aggregator.py new file mode 100644 index 00000000..6bd69d28 --- /dev/null +++ b/src/guidellm/benchmark/aggregator.py @@ -0,0 +1,763 @@ +import time +from abc import ABC, abstractmethod +from pathlib import Path +from typing import ( + Any, + Dict, + Generic, + List, + Literal, + Optional, + Tuple, + TypeVar, + Union, +) + +from pydantic import Field + +from guidellm.backend import ResponseSummary +from guidellm.benchmark.benchmark import ( + BenchmarkArgs, + BenchmarkRunStats, + BenchmarkT, + GenerativeBenchmark, + GenerativeTextErrorStats, + GenerativeTextResponseStats, +) +from guidellm.config import settings +from guidellm.objects import ( + RunningStats, + StandardBaseModel, + StatusBreakdown, + TimeRunningStats, +) +from guidellm.request import ( + GenerationRequest, + GenerativeRequestLoaderDescription, + RequestLoaderDescription, +) +from guidellm.scheduler import ( + GenerativeRequestsWorkerDescription, + RequestT, + ResponseT, + SchedulerRequestResult, + WorkerDescription, +) +from guidellm.utils import check_load_processor + +__all__ = [ + "AggregatorT", + "BenchmarkAggregator", + "GenerativeBenchmarkAggregator", +] + + +class SchedulerRunningStats(StandardBaseModel): + """ + The metrics for the scheduler stored as running statistics for easy calculations + of rates, averages, totals, etc. + """ + + created_requests: RunningStats = Field( + description=( + "The running statistics for the number of requests created for this " + "benchmark run. This includes all requests created, regardless of " + "their status." + ), + default_factory=RunningStats, + ) + queued_requests: RunningStats = Field( + description=( + "The running statistics for the number of requests pending in queue " + "for this benchmark run. This includes requests that are waiting to " + "be scheduled." + ), + default_factory=RunningStats, + ) + scheduled_requests: RunningStats = Field( + description=( + "The running statistics for the number of requests scheduled (actively " + "running but waiting for the desired start time) for this benchmark run." + ), + default_factory=RunningStats, + ) + processing_requests: RunningStats = Field( + description=( + "The running statistics for the number of requests actively being " + "processed by the worker for this benchmark run." + ), + default_factory=RunningStats, + ) + completed_requests: RunningStats = Field( + description=( + "The running statistics for the number of requests completed for this " + "benchmark run. This includes requests within the warmup and cooldown " + "period, if any, along with the final results." + ), + default_factory=RunningStats, + ) + + +class RequestsRunningStats(StandardBaseModel): + """ + The metrics for requests that have succeeded, been canceled, or errored stored + as running statistics for easy calculations of rates, averages, totals, etc. + """ + + totals: StatusBreakdown[RunningStats, RunningStats, RunningStats, RunningStats] = ( + Field( + description=( + "The running statistics for the total number of requests that " + "completed within the benchmark run." + ), + default_factory=lambda: StatusBreakdown( + successful=RunningStats(), + errored=RunningStats(), + incomplete=RunningStats(), + total=RunningStats(), + ), + ) + ) + queued_time: TimeRunningStats = Field( + description=( + "The running statistics for the time spent in queue for all requests that " + "completed within the benchmark run. This is the time from when the " + "request was created to when it was dequeued by the worker." + ), + default_factory=TimeRunningStats, + ) + scheduled_time_delay: TimeRunningStats = Field( + description=( + "The running statistics for the time spent from when a request was " + "dequeued by the worker to when it was actually scheduled by the worker" + "for all requests that completed within the benchmark run. " + "This should be as close to 0 as possible, any additional time is " + "overheads from the system or the worker." + ), + default_factory=TimeRunningStats, + ) + scheduled_time_sleep: TimeRunningStats = Field( + description=( + "The running statistics for the time for each request spent sleeping til " + "the desired start time was reached for all requests that completed within " + "the benchmark run. This is the time from when the request was scheduled " + "to when the desired start time was reached. " + ), + default_factory=TimeRunningStats, + ) + worker_start_delay: TimeRunningStats = Field( + description=( + "The running statistics for the time delay between when the request was " + "scheduled and when the worker actually started processing subtracting any " + "sleep time for all requests that completed within the benchmark run. " + "This should be as close to 0 as possible, any additional time is " + "overheads from the system or the worker." + ), + default_factory=TimeRunningStats, + ) + worker_time: TimeRunningStats = Field( + description=( + "The running statistics for the time spent processing all requests that " + "completed within the benchmark run. This is the time from when the " + "request was started to when it was completed." + ), + default_factory=TimeRunningStats, + ) + worker_start_time_targeted_delay: TimeRunningStats = Field( + description=( + "The running statistics for the delay between the targeted start time and " + "the actual start time for requests that completed within the benchmark " + "run. This represents delays from the best case desired start time. " + "For async strategies, this represents delays from the ideal system. " + "For sync strategies, since those are doubled in queue, this should be " + "as close to the time for a request to be processed as possible." + ), + default_factory=TimeRunningStats, + ) + request_start_time_delay: TimeRunningStats = Field( + description=( + "The running statistics for the delay between the actual request being " + "made and the time the worker started on the request for all requests " + "that completed within the benchmark run. This time should be as close to " + "0 as possible, any additional time is overhead from the system or " + "the worker." + ), + default_factory=TimeRunningStats, + ) + request_start_time_targeted_delay: TimeRunningStats = Field( + description=( + "The running statistics for the delay between the targeted start time and " + "the actual start time for all requests that completed within the " + "benchmark run. This represents delays from the best case desired start " + "time. For async strategies, this represents delays from the ideal system. " + "For sync strategies, since those are duplicated in queue, this should be " + "as close to the time for a request to be processed." + ), + default_factory=TimeRunningStats, + ) + request_time_delay: TimeRunningStats = Field( + description=( + "The running statistics for the delay in time between the total request " + "time and the worker time. This should be as close to 0 as possible, any " + "additional time is overhead from the system or the worker. " + ), + default_factory=TimeRunningStats, + ) + request_time: TimeRunningStats = Field( + description=( + "The running statistics for the time spent processing all requests that " + "completed within the benchmark run. This is the time from when the " + "request was created to when it was completed." + ), + default_factory=TimeRunningStats, + ) + + +class BenchmarkAggregator( + ABC, StandardBaseModel, Generic[BenchmarkT, RequestT, ResponseT] +): + """ + A pydantic base class representing the base class for aggregating benchmark results. + The purpose is to receive and process results from a Benchmarker as it iterates + through a Scheduler for an individual benchmark run. + As results are added, lightweight statistics are updated and stored for immediate + progress and informational updates to the caller. + Once the benchmark run is complete, the `compile` method is called to finalize + the benchmark and return a Benchmark object with all the results and statistics + fully calculated. + """ + + type_: Literal["benchmark_aggregator"] = "benchmark_aggregator" + run_id: str = Field( + description=( + "The unique identifier for the encompasing benchmark run that this " + "benchmark was a part of." + ) + ) + args: BenchmarkArgs = Field( + description=( + "The arguments used to create the benchmark run that this benchmark was " + "a part of." + ) + ) + worker_description: Union[ + GenerativeRequestsWorkerDescription, WorkerDescription + ] = Field( + description=( + "The description and specifics for the worker used to resolve requests " + "for this benchmark." + ), + discriminator="type_", + ) + request_loader_description: Union[ + GenerativeRequestLoaderDescription, RequestLoaderDescription + ] = Field( + description=( + "The description and specifics for the request loader used to create " + "requests for this benchmark." + ), + discriminator="type_", + ) + extras: Dict[str, Any] = Field( + description=( + "Any additional information or metadata that was passed for this benchmark." + ) + ) + in_warmup: bool = Field( + description=( + "A flag to indicate if the benchmark is currently in the warmup phase." + ), + default=False, + exclude=True, + ) + in_cooldown: bool = Field( + description=( + "A flag to indicate if the benchmark is currently in the cooldown phase." + ), + default=False, + exclude=True, + ) + scheduler_stats: SchedulerRunningStats = Field( + description=( + "The running statistics for the scheduler for this benchmark run. " + "This includes all requests created, regardless of their status." + ), + default_factory=SchedulerRunningStats, + ) + requests_stats: RequestsRunningStats = Field( + description=( + "The running statistics for the requests for this benchmark run. " + "This includes all requests created, regardless of their status." + ), + default_factory=RequestsRunningStats, + ) + results: StatusBreakdown[ + List[SchedulerRequestResult[RequestT, ResponseT]], + List[SchedulerRequestResult[RequestT, ResponseT]], + List[SchedulerRequestResult[RequestT, ResponseT]], + None, + ] = Field( + description=( + "The completed requests for this benchmark run broken down by status" + "and excluding warmup and cooldown requests." + ), + default_factory=lambda: StatusBreakdown( # type: ignore[arg-type] + successful=[], + errored=[], + incomplete=[], + total=None, + ), + ) + + def add_result( + self, + result: SchedulerRequestResult[RequestT, ResponseT], + ) -> bool: + """ + Add a result to the aggregator. This will update the internal statistics + and add the result to the list of results if it is not within the warmup or + cooldown period. + + :param result: The result to add to the aggregator. + :return: True if the result was added, False if it was added because it + did not fit within the warmup or cooldown period, was not requested, + or is not finished + """ + # Add scheduler statistics + self.scheduler_stats.created_requests += max( + 0, result.run_info.created_requests + ) + self.scheduler_stats.queued_requests += max(0, result.run_info.queued_requests) + self.scheduler_stats.scheduled_requests += max( + 0, result.run_info.scheduled_requests + ) + self.scheduler_stats.processing_requests += max( + 0, result.run_info.processing_requests + ) + self.scheduler_stats.completed_requests += max( + 0, result.run_info.completed_requests + ) + + if result.type_ != "request_complete" or ( + result.request_info.canceled and not result.request_info.requested + ): + # If the result is not completed yet, don't add to the results + # If the result was canceled and not started, ignore it + return False + + # Add request statistics + self.requests_stats.totals.total += 1 + if result.request_info.canceled: + self.requests_stats.totals.incomplete += 1 + elif result.request_info.errored: + self.requests_stats.totals.errored += 1 + elif result.request_info.completed: + self.requests_stats.totals.successful += 1 + else: + raise ValueError( + "Unexpected state: request_info must be either " + "completed, canceled, or errored. " + f"Got {result.request_info}" + ) + + self.requests_stats.queued_time.update( + result.request_info.dequeued_time - result.request_info.queued_time + ) + self.requests_stats.scheduled_time_delay.update( + result.request_info.scheduled_time - result.request_info.dequeued_time + ) + sleep_time = max( + 0.0, + result.request_info.targeted_start_time + - result.request_info.scheduled_time, + ) + self.requests_stats.scheduled_time_sleep.update(sleep_time) + time_to_worker_start = ( + result.request_info.worker_start - result.request_info.scheduled_time + ) + self.requests_stats.worker_start_delay.update(time_to_worker_start - sleep_time) + self.requests_stats.worker_time.update( + result.request_info.worker_end - result.request_info.worker_start + ) + self.requests_stats.worker_start_time_targeted_delay.update( + result.request_info.worker_start - result.request_info.targeted_start_time + ) + self.requests_stats.request_start_time_delay.update( + result.request_info.worker_start - result.request_info.targeted_start_time + ) + self.requests_stats.request_start_time_targeted_delay.update( + result.request_info.worker_start - result.request_info.targeted_start_time + ) + self.requests_stats.request_time_delay.update( + (result.request_info.worker_end - result.request_info.worker_start) + - (result.request_info.worker_end - result.request_info.worker_start) + ) + self.requests_stats.request_time.update( + result.request_info.worker_end - result.request_info.worker_start + ) + + # Add result to the list of results provided we are not in warmup or cooldown + total_completed = self.requests_stats.totals.total.total + global_start_time = self.requests_stats.totals.total.start_time + + in_warmup_number = ( + self.args.warmup_number and total_completed <= self.args.warmup_number + ) + in_warmup_duration = ( + self.args.warmup_duration + and result.request_info.worker_start + <= (global_start_time - self.args.warmup_duration) + ) + + if in_warmup_number or in_warmup_duration: + self.in_warmup = True + return True + + self.in_warmup = False + in_cooldown_number = ( + self.args.cooldown_number + and self.args.max_number + and total_completed > self.args.max_number - self.args.cooldown_number + ) + in_cooldown_duration = ( + self.args.cooldown_duration + and self.args.max_duration + and result.request_info.worker_start + > global_start_time + self.args.max_duration - self.args.cooldown_duration + ) + + if in_cooldown_number or in_cooldown_duration: + self.in_cooldown = True + return True + + self.in_cooldown = False + + if result.request_info.canceled: + self.results.incomplete.append(result) + elif result.request_info.errored: + self.results.errored.append(result) + elif result.request_info.completed: + self.results.successful.append(result) + else: + raise ValueError( + "Unexpected state: request_info must be either " + "completed, canceled, or errored. " + f"Got {result.request_info}" + ) + + return True + + @abstractmethod + def compile(self) -> BenchmarkT: + """ + Compile the benchmark results and statistics into a Benchmark object. + This is required to be implemented by subclasses to finalize the benchmark + and return the compiled object. + """ + ... + + +AggregatorT = TypeVar("AggregatorT", bound=BenchmarkAggregator) + + +class GenerativeRequestsRunningStats(RequestsRunningStats): + """ + The metrics for generative requests that have succeeded, been canceled, or errored + stored as running statistics for easy calculations of rates, averages, totals, etc. + """ + + time_to_first_token: TimeRunningStats = Field( + description=( + "The running statistics for the time from the start of the request to the " + "first token being generated for all requests that completed within the " + "benchmark run." + ), + default_factory=TimeRunningStats, + ) + inter_token_latency: TimeRunningStats = Field( + description=( + "The running statistics for the time between each token being generated " + "for all requests that completed within the benchmark run." + ), + default_factory=TimeRunningStats, + ) + prompt_tokens: RunningStats = Field( + description=( + "The running statistics for the token count for the prompt for all " + "requests that completed, if available in the response." + ), + default_factory=RunningStats, + ) + output_tokens: RunningStats = Field( + description=( + "The running statistics for the token count for the output for all " + "requests that completed, if available in the response." + ), + default_factory=RunningStats, + ) + total_tokens: RunningStats = Field( + description=( + "The running statistics for the total token count for all requests that " + "completed, if available in the response." + ), + default_factory=RunningStats, + ) + + +class GenerativeBenchmarkAggregator( + BenchmarkAggregator[GenerativeBenchmark, GenerationRequest, ResponseSummary] +): + type_: Literal["generative_benchmark_aggregator"] = ( + "generative_benchmark_aggregator" # type: ignore[assignment] + ) + processor: Optional[Union[str, Path, Any]] = Field( + description=( + "The tokenizer to use for calculating token counts when none are " + "avaiable that match the preferred source." + ) + ) + processor_args: Optional[Dict[str, Any]] = Field( + description=( + "Additional arguments to pass to the tokenizer if it requires " + "any specific configuration for loading or processing." + ), + ) + worker_description: GenerativeRequestsWorkerDescription = Field( + description=( + "The description and specifics for the worker used to resolve requests " + "for this benchmark." + ), + discriminator="type_", + ) + request_loader_description: GenerativeRequestLoaderDescription = Field( + description=( + "The description and specifics for the request loader used to create " + "requests for this benchmark." + ), + discriminator="type_", + ) + requests_stats: GenerativeRequestsRunningStats = Field( + description=( + "The running statistics for the requests for this benchmark run. " + "This includes all requests created, regardless of their status." + ), + default_factory=GenerativeRequestsRunningStats, + ) + + def add_result( + self, result: SchedulerRequestResult[GenerationRequest, ResponseSummary] + ) -> bool: + """ + Add a result to the aggregator. This will update the internal statistics + and add the result to the list of results if it is not within the warmup or + cooldown period. + + :param result: The result to add to the aggregator. + """ + if not super().add_result(result): + return False + + if result.request is None: + raise ValueError("Request is None, cannot add result.") + + if result.response is None: + raise ValueError("Response is None, cannot add result.") + + self.requests_stats.request_start_time_delay.update( + result.response.start_time - result.request_info.worker_start + ) + self.requests_stats.request_start_time_targeted_delay.update( + result.response.start_time - result.request_info.targeted_start_time + ) + self.requests_stats.request_time_delay.update( + (result.response.start_time - result.request_info.worker_start) + + result.request_info.worker_end + - result.response.end_time + ) + self.requests_stats.request_time.update( + result.response.end_time - result.response.start_time + ) + if result.response.first_iter_time: + self.requests_stats.time_to_first_token.update( + result.response.first_iter_time - result.response.start_time + ) + if result.response.last_iter_time and result.response.first_iter_time: + self.requests_stats.inter_token_latency.update( + result.response.last_iter_time - result.response.first_iter_time, + count=(result.response.output_tokens or 1) - 1, + ) + self.requests_stats.prompt_tokens += result.response.request_prompt_tokens or 0 + self.requests_stats.output_tokens += result.response.request_output_tokens or 0 + total_tokens = (result.response.request_prompt_tokens or 0) + ( + result.response.request_output_tokens or 0 + ) + self.requests_stats.total_tokens += total_tokens + + return True + + def compile(self) -> GenerativeBenchmark: + """ + Compile the benchmark results and statistics into a GenerativeBenchmark object. + This is required to be implemented by subclasses to finalize the benchmark + and return the compiled object. + """ + successful, incomplete, errored = self._compile_results() + + return GenerativeBenchmark.from_stats( + run_id=self.run_id, + successful=successful, + incomplete=incomplete, + errored=errored, + args=self.args, + run_stats=BenchmarkRunStats( + start_time=self.requests_stats.totals.total.start_time, + end_time=time.time(), + requests_made=StatusBreakdown( + successful=int(self.requests_stats.totals.successful.total), + errored=int(self.requests_stats.totals.errored.total), + incomplete=int(self.requests_stats.totals.incomplete.total), + total=int(self.requests_stats.totals.total.total), + ), + queued_time_avg=self.requests_stats.queued_time.mean, + scheduled_time_delay_avg=self.requests_stats.scheduled_time_delay.mean, + scheduled_time_sleep_avg=self.requests_stats.scheduled_time_sleep.mean, + worker_start_delay_avg=self.requests_stats.worker_start_delay.mean, + worker_time_avg=self.requests_stats.worker_time.mean, + worker_start_time_targeted_delay_avg=self.requests_stats.worker_start_time_targeted_delay.mean, + request_start_time_delay_avg=self.requests_stats.request_start_time_delay.mean, + request_start_time_targeted_delay_avg=self.requests_stats.request_start_time_targeted_delay.mean, + request_time_delay_avg=self.requests_stats.request_time_delay.mean, + request_time_avg=self.requests_stats.request_time.mean, + ), + worker=self.worker_description, + requests_loader=self.request_loader_description, + extras=self.extras, + ) + + def _compile_results( + self, + ) -> Tuple[ + List[GenerativeTextResponseStats], + List[GenerativeTextErrorStats], + List[GenerativeTextErrorStats], + ]: + successful: List[GenerativeTextResponseStats] = [ + GenerativeTextResponseStats( + request_id=result.request.request_id, + request_type=result.request.request_type, + scheduler_info=result.request_info, + prompt=str(result.request.content), + prompt_tokens=self._compile_tokens_count( + value=str(result.request.content), + requests_tokens=result.response.request_prompt_tokens, + response_tokens=result.response.response_prompt_tokens, + preferred_tokens_source=settings.preferred_prompt_tokens_source, + errored=False, + ), + output=result.response.value, + output_tokens=self._compile_tokens_count( + value=result.response.value, + requests_tokens=result.response.request_output_tokens, + response_tokens=result.response.response_output_tokens, + preferred_tokens_source=settings.preferred_output_tokens_source, + errored=False, + ), + start_time=result.response.start_time, + end_time=result.response.end_time, + first_token_time=result.response.first_iter_time or -1.0, + last_token_time=result.response.last_iter_time or -1.0, + ) + for result in self.results.successful + if result.request and result.response + ] + incomplete: List[GenerativeTextErrorStats] = [ + GenerativeTextErrorStats( + error=result.response.error or "", + request_id=result.request.request_id, + request_type=result.request.request_type, + scheduler_info=result.request_info, + prompt=str(result.request.content), + prompt_tokens=self._compile_tokens_count( + value=str(result.request.content), + requests_tokens=result.response.request_prompt_tokens, + response_tokens=result.response.response_prompt_tokens, + preferred_tokens_source=settings.preferred_prompt_tokens_source, + errored=True, + ), + output=result.response.value, + output_tokens=self._compile_tokens_count( + value=result.response.value, + requests_tokens=result.response.request_output_tokens, + response_tokens=result.response.response_output_tokens, + preferred_tokens_source=settings.preferred_output_tokens_source, + errored=True, + ), + start_time=result.response.start_time, + end_time=result.response.end_time, + first_token_time=result.response.first_iter_time, + last_token_time=result.response.last_iter_time, + ) + for result in self.results.incomplete + if result.request and result.response + ] + error: List[GenerativeTextErrorStats] = [ + GenerativeTextErrorStats( + error=result.response.error or "", + request_id=result.request.request_id, + request_type=result.request.request_type, + scheduler_info=result.request_info, + prompt=str(result.request.content), + prompt_tokens=self._compile_tokens_count( + value=str(result.request.content), + requests_tokens=result.response.request_prompt_tokens, + response_tokens=result.response.response_prompt_tokens, + preferred_tokens_source=settings.preferred_prompt_tokens_source, + errored=True, + ), + output=result.response.value, + output_tokens=self._compile_tokens_count( + value=result.response.value, + requests_tokens=result.response.request_output_tokens, + response_tokens=result.response.response_output_tokens, + preferred_tokens_source=settings.preferred_output_tokens_source, + errored=True, + ), + start_time=result.response.start_time, + end_time=result.response.end_time, + first_token_time=result.response.first_iter_time, + last_token_time=result.response.last_iter_time, + ) + for result in self.results.errored + if result.request and result.response + ] + + return successful, incomplete, error + + def _compile_tokens_count( + self, + value: str, + requests_tokens: Optional[int], + response_tokens: Optional[int], + preferred_tokens_source: Optional[Literal["request", "response", "local"]], + errored: bool, + ) -> int: + if not errored and preferred_tokens_source == "response" and response_tokens: + return response_tokens or 0 + + if not errored and preferred_tokens_source == "request" and requests_tokens: + return requests_tokens or 0 + + if preferred_tokens_source in {"response", "request"} and ( + self.processor is None or errored or response_tokens or requests_tokens + ): + # we had a preferred tokens source that isn't local and we either + # have the data to return something or we don't have the ability + # to calculate locally + return response_tokens or requests_tokens or 0 + + self.processor = check_load_processor( + self.processor, + processor_args=self.processor_args, + error_msg="Processor/Tokenizer is required for calculating token counts.", + ) + return len(self.processor.tokenize(value)) diff --git a/src/guidellm/benchmark/benchmark.py b/src/guidellm/benchmark/benchmark.py new file mode 100644 index 00000000..f1f9187c --- /dev/null +++ b/src/guidellm/benchmark/benchmark.py @@ -0,0 +1,828 @@ +import random +import uuid +from typing import Any, Dict, List, Literal, Optional, TypeVar, Union + +from pydantic import Field, computed_field + +from guidellm.benchmark.profile import ( + AsyncProfile, + ConcurrentProfile, + Profile, + SweepProfile, + SynchronousProfile, + ThroughputProfile, +) +from guidellm.objects import ( + StandardBaseModel, + StatusBreakdown, + StatusDistributionSummary, +) +from guidellm.request import ( + GenerativeRequestLoaderDescription, + RequestLoaderDescription, +) +from guidellm.scheduler import ( + AsyncConstantStrategy, + AsyncPoissonStrategy, + ConcurrentStrategy, + GenerativeRequestsWorkerDescription, + SchedulerRequestInfo, + SchedulingStrategy, + SynchronousStrategy, + ThroughputStrategy, + WorkerDescription, +) + +__all__ = [ + "BenchmarkT", + "StatusBreakdown", + "BenchmarkArgs", + "BenchmarkRunStats", + "Benchmark", + "BenchmarkMetrics", + "GenerativeTextResponseStats", + "GenerativeTextErrorStats", + "GenerativeMetrics", + "GenerativeBenchmark", +] + + +class BenchmarkArgs(StandardBaseModel): + """ + A serializable model representing the arguments used to specify a benchmark run + and how data was collected for it. + """ + + profile: Union[ + AsyncProfile, + SweepProfile, + ConcurrentProfile, + ThroughputProfile, + SynchronousProfile, + Profile, + ] = Field( + description=( + "The profile used for the entire benchmark run that the strategy for " + "this benchmark was pulled from." + ), + discriminator="type_", + ) + strategy_index: int = Field( + description=( + "The index of the strategy in the profile that was used for this benchmark." + ) + ) + strategy: Union[ + ConcurrentStrategy, + SchedulingStrategy, + ThroughputStrategy, + SynchronousStrategy, + AsyncPoissonStrategy, + AsyncConstantStrategy, + SchedulingStrategy, + ] = Field( + description="The scheduling strategy used to run this benchmark. ", + discriminator="type_", + ) + max_number: Optional[int] = Field( + description="The maximum number of requests to run for this benchmark, if any." + ) + max_duration: Optional[float] = Field( + description="The maximum duration in seconds to run this benchmark, if any." + ) + warmup_number: Optional[int] = Field( + description=( + "The number of requests to run for the warmup phase of this benchmark, " + "if any. These are requests that were not included in the final results." + ) + ) + warmup_duration: Optional[float] = Field( + description=( + "The duration in seconds to run for the warmup phase of this benchmark, " + "if any. These are requests that were not included in the final results." + ) + ) + cooldown_number: Optional[int] = Field( + description=( + "The number of requests to run for the cooldown phase of this benchmark, " + "if any. These are requests that were not included in the final results." + ) + ) + cooldown_duration: Optional[float] = Field( + description=( + "The duration in seconds to run for the cooldown phase of this benchmark, " + "if any. These are requests that were not included in the final results." + ) + ) + + +class BenchmarkRunStats(StandardBaseModel): + """ + A serializable model representing the run process statistics for the + entire benchmark run across all requests including warmup and cooldown. + """ + + start_time: float = Field( + description="The start time of the benchmark run.", + ) + end_time: float = Field( + description="The end time of the benchmark run.", + ) + requests_made: StatusBreakdown[int, int, int, int] = Field( + description=( + "The number of requests made for the benchmark run broken down by " + "status including successful, incomplete, errored, and the sum of all three" + ) + ) + queued_time_avg: float = Field( + description=( + "The average time spent in the queue for each request in the benchmark " + "run until it was dequeued by a worker." + ) + ) + scheduled_time_delay_avg: float = Field( + description=( + "The average time delay between when a request was dequeued and when it " + "was scheduled to be processed by a worker in the benchmark run. " + "This should be as close to 0 as possible, any additional time is " + "overheads from the system or the worker." + ) + ) + scheduled_time_sleep_avg: float = Field( + description=( + "The average time spent sleeping til the desired start time was reached " + "after being scheduled by the worker in the benchmark run." + ) + ) + worker_start_delay_avg: float = Field( + description=( + "The average time delay between when a request was scheduled and when " + "the worker started processing it in the benchmark run. " + "This should be as close to 0 as possible, any additional time is " + "overheads from the system or the worker." + ) + ) + worker_time_avg: float = Field( + description=( + "The average time taken by the worker to process each request in the " + "benchmark run. This includes the time to generate the response and " + "any additional processing time." + ) + ) + worker_start_time_targeted_delay_avg: float = Field( + description=( + "The average time delay between when a request was targeted to start " + "and when the worker actually started processing it in the benchmark " + "run. For async strategies, this represents delays from the ideal " + "system. For sync strategies, since those are doubled in queue, " + "this should be as close to the time for a request to be processed " + "as possible. Any additional time is overhead from the system or " + "the worker." + ) + ) + request_start_time_delay_avg: float = Field( + description=( + "The average time delay between the actual request being made " + "and the time the worker started on the request for all requests " + "that completed within the benchmark run. This time should be as close " + "to 0 as possible, any additional time is overhead from the system or " + "the worker." + ) + ) + request_start_time_targeted_delay_avg: float = Field( + description=( + "The average time delay between when the targeted start time and " + "the actual start time for each request in the benchmark run. " + "For async strategies, this represents delays from the ideal " + "system. For sync strategies, this should be as close to the " + "time for a request to be processed as possible. Any additional " + "time is overhead from the system or the worker." + ) + ) + request_time_delay_avg: float = Field( + description=( + "The average time delay between the total request time and the " + "worker time. This should be as close to 0 as possible, any additional " + "time is overhead from the system or the worker. " + ) + ) + request_time_avg: float = Field( + description=( + "The average time spent processing all requests in the benchmark run. " + "This is the time from when the actual request was started to when " + "it was completed." + ) + ) + + +class BenchmarkMetrics(StandardBaseModel): + """ + A serializable model representing the metrics for a benchmark run. + """ + + requests_per_second: StatusDistributionSummary = Field( + description="The distribution of requests per second for the benchmark.", + ) + request_concurrency: StatusDistributionSummary = Field( + description="The distribution of requests concurrency for the benchmark.", + ) + + +class Benchmark(StandardBaseModel): + """ + The base serializable model representing a benchmark run and its results. + Specific benchmarker implementations should extend this model to include + additional information or metadata as needed. + + Note, requests_per_second and request_concurrency are kept at this level + and are expected to be populated by the subclass implementation to ensure + the logic for Profiles can include more complicated logic for determining + what rates and concurrency values to use for subsequent strategies. + """ + + type_: Literal["benchmark"] = "benchmark" + id_: str = Field( + default_factory=lambda: str(uuid.uuid4()), + description="The unique identifier for the benchmark.", + ) + run_id: str = Field( + description=( + "The unique identifier for the encompasing benchmark run that this " + "benchmark was a part of." + ) + ) + args: BenchmarkArgs = Field( + description=( + "The arguments used to specify how to run the benchmark and collect data." + ) + ) + run_stats: BenchmarkRunStats = Field( + description=( + "The process statistics for the entire benchmark run across all requests." + ) + ) + worker: Union[WorkerDescription] = Field( + description=( + "The description and specifics for the worker used to resolve requests " + "for this benchmark." + ), + ) + request_loader: Union[RequestLoaderDescription] = Field( + description=( + "The description and specifics for the request loader used to create " + "requests for this benchmark." + ), + ) + extras: Dict[str, Any] = Field( + description=( + "Any additional information or metadata that was passed for this benchmark." + ) + ) + metrics: BenchmarkMetrics = Field( + description=( + "The metrics for the benchmark run represented as a distribution of " + "various per-request statistics." + ), + ) + + +BenchmarkT = TypeVar("BenchmarkT", bound=Benchmark) + + +class GenerativeTextResponseStats(StandardBaseModel): + """ + A serializable model representing the request values, response values, and + statistics for a generative text response. + """ + + type_: Literal["generative_text_response"] = "generative_text_response" + request_id: Optional[str] = Field( + description="The unique identifier for the request.", + ) + request_type: Literal["text_completions", "chat_completions"] = Field( + description="The type of request made to the generative backend." + ) + scheduler_info: SchedulerRequestInfo = Field( + description=( + "The info about the request from the scheduler about how it was run." + ), + ) + prompt: str = Field( + description="The text prompt used for the generative request.", + ) + output: str = Field( + description="The generated text output from the generative request.", + ) + prompt_tokens: int = Field( + description="The number of tokens in the prompt text.", + ) + output_tokens: int = Field( + description="The number of tokens in the generated output text.", + ) + start_time: float = Field( + description="The time the request started.", + ) + end_time: float = Field( + description="The time the request ended.", + ) + first_token_time: float = Field( + description="The time the first token was received.", + ) + last_token_time: float = Field( + description="The time the last token was received.", + ) + + @computed_field # type: ignore[misc] + @property + def request_latency(self) -> float: + """ + :return: The duration of the request in seconds from the start to the end. + """ + return self.end_time - self.start_time + + @computed_field # type: ignore[misc] + @property + def time_to_first_token_ms(self) -> float: + """ + :return: The time in milliseconds from the start of the request to the first + token received. + """ + return 1000 * (self.first_token_time - self.start_time) + + @computed_field # type: ignore[misc] + @property + def time_per_output_token_ms(self) -> float: + """ + :return: The average time in milliseconds per output token generated. + This includes the time to generate the first token and all other tokens. + """ + if self.output_tokens == 0: + return 0.0 + + return ( + 1000 * (self.last_token_time - self.first_token_time) / self.output_tokens + ) + + @computed_field # type: ignore[misc] + @property + def inter_token_latency_ms(self) -> float: + """ + :return: The average time in milliseconds between generating tokens in the + output text. Note, does not include the time to generate the first token. + """ + if self.output_tokens <= 1: + return 0.0 + + return ( + 1000 + * (self.last_token_time - self.first_token_time) + / (self.output_tokens - 1) + ) + + @computed_field # type: ignore[misc] + @property + def tokens_per_second(self) -> float: + """ + :return: The average number of tokens generated per second in the prompt and + output text. + """ + if (latency := self.request_latency) == 0.0: + return 0.0 + + return (self.prompt_tokens + self.output_tokens) / latency + + @computed_field # type: ignore[misc] + @property + def output_tokens_per_second(self) -> float: + """ + :return: The average number of output tokens generated per second. + """ + if (latency := self.request_latency) == 0.0: + return 0.0 + + return self.output_tokens / latency + + +class GenerativeTextErrorStats(GenerativeTextResponseStats): + """ + A serializable model representing the request values, response values, and + statistics for a generative text response that errored. + Extends and overrides the GenerativeTextResponseStats model to include the + error message and optional properties given the error occurred. + """ + + type_: Literal["generative_text_error"] = "generative_text_error" # type: ignore[assignment] + error: str = Field( + description=( + "The error message for the error that occurred while making the request." + ) + ) + output: Optional[str] = Field( # type: ignore[assignment] + default=None, + description=( + "The generated text output from the generative request, if any, " + "before the error occurred." + ), + ) + first_token_time: Optional[float] = Field( # type: ignore[assignment] + default=None, + description=( + "The time the first token was received, if any, before the error occurred." + ), + ) + last_token_time: Optional[float] = Field( # type: ignore[assignment] + default=None, + description=( + "The time the last token was received, if any, before the error occurred." + ), + ) + + @computed_field # type: ignore[misc] + @property + def time_to_first_token_ms(self) -> Optional[float]: # type: ignore[override] + """ + :return: The time in milliseconds from the start of the request to the first + token received. None if the first token was not received. + """ + if self.first_token_time is None: + return None + + return super().time_to_first_token_ms + + @computed_field # type: ignore[misc] + @property + def time_per_output_token_ms(self) -> Optional[float]: # type: ignore[override] + """ + :return: The average time in milliseconds per output token generated. + This includes the time to generate the first token and all other tokens. + None if the output_tokens is None or 0. + """ + if self.output_tokens is None or self.output_tokens == 0: + return None + + return super().time_per_output_token_ms + + @computed_field # type: ignore[misc] + @property + def inter_token_latency_ms(self) -> Optional[float]: # type: ignore[override] + """ + :return: The average time in milliseconds between generating tokens in the + output text. Note, does not include the time to generate the first token. + None if there were no output_tokens or the first token was not received. + """ + if ( + self.output_tokens is None + or self.first_token_time is None + or self.last_token_time is None + ): + return None + + return super().inter_token_latency_ms + + @computed_field # type: ignore[misc] + @property + def output_tokens_per_second(self) -> Optional[float]: # type: ignore[override] + """ + :return: The average number of tokens generated per second in the output text. + Note, does not include the time to generate the first token. None if there + were no output_tokens or the first token was not received. + """ + if self.inter_token_latency_ms is None: + return None + + return super().output_tokens_per_second + + +class GenerativeMetrics(BenchmarkMetrics): + """ + A serializable model representing the metrics for a generative benchmark run. + """ + + request_latency: StatusDistributionSummary = Field( + description="The distribution of latencies for the completed requests.", + ) + prompt_token_count: StatusDistributionSummary = Field( + description=( + "The distribution of token counts in the prompts for completed, " + "errored, and all requests." + ) + ) + output_token_count: StatusDistributionSummary = Field( + description=( + "The distribution of token counts in the outputs for completed, " + "errored, and all requests." + ) + ) + time_to_first_token_ms: StatusDistributionSummary = Field( + description=( + "The distribution of latencies to receiving the first token in " + "milliseconds for completed, errored, and all requests." + ), + ) + time_per_output_token_ms: StatusDistributionSummary = Field( + description=( + "The distribution of latencies per output token in milliseconds for " + "completed, errored, and all requests. " + "This includes the time to generate the first token and all other tokens." + ), + ) + inter_token_latency_ms: StatusDistributionSummary = Field( + description=( + "The distribution of latencies between tokens in milliseconds for " + "completed, errored, and all requests." + ), + ) + output_tokens_per_second: StatusDistributionSummary = Field( + description=( + "The distribution of output tokens per second for completed, " + "errored, and all requests." + ), + ) + tokens_per_second: StatusDistributionSummary = Field( + description=( + "The distribution of tokens per second, including prompt and output tokens " + "for completed, errored, and all requests." + ), + ) + + +class GenerativeBenchmark(Benchmark): + """ + A serializable model representing a benchmark run and its results for generative + requests and responses. Includes the completed and errored requests, the start + and end times for the benchmark, and the statistics for the requests and responses. + """ + + type_: Literal["generative_benchmark"] = "generative_benchmark" # type: ignore[assignment] + start_time: float = Field( + description="The start time of the first request for the benchmark.", + ) + end_time: float = Field( + description="The end time of the last request for the benchmark.", + ) + + @computed_field # type: ignore[misc] + @property + def duration(self) -> float: + """ + :return: The duration of the benchmark in seconds from the start of the + first request to the end of the last request. + """ + return self.end_time - self.start_time + + worker: GenerativeRequestsWorkerDescription = Field( + description=( + "The description and specifics for the worker used to resolve requests " + "for this benchmark." + ), + ) + request_loader: GenerativeRequestLoaderDescription = Field( + description=( + "The description and specifics for the request loader used to create " + "requests for this benchmark." + ), + ) + metrics: GenerativeMetrics = Field( + description=( + "The metrics for the benchmark run represented as a distribution of " + "various per-request statistics." + ), + ) + # Output is ordered so keep the requests at the end for better readability in files + request_totals: StatusBreakdown[int, int, int, int] = Field( + description=( + "The number of requests made for the benchmark broken down by status " + "including successful, incomplete, errored, and the sum of all three" + ) + ) + request_samples: Optional[StatusBreakdown[int, int, int, None]] = Field( + description=( + "The number of requests that were randomly sampled for " + "the benchmark. None if no sampling was applied." + ), + default=None, + ) + requests: StatusBreakdown[ + List[GenerativeTextResponseStats], + List[GenerativeTextErrorStats], + List[GenerativeTextErrorStats], + None, + ] = Field( + description=( + "The breakdown of requests for the benchmark run including successful, " + "incomplete, and errored requests." + ), + ) + + def create_sampled(self, sample_size: int) -> "GenerativeBenchmark": + """ + Create a new benchmark instance with a random sample of the completed and + errored requests based on the given sample sizes. If the sample sizes are + larger than the total number of requests, the sample sizes are capped at + the total number of requests. + + :param sample_size: The number of requests to sample for each status type. + :return: A new benchmark instance with the sampled requests. + :raises ValueError: If the sample sizes are negative. + """ + if sample_size < 0: + raise ValueError(f"Sample size must be non-negative, given {sample_size}") + + sample_size = min(sample_size, len(self.requests.successful)) + error_sample_size = min(sample_size, len(self.requests.errored)) + incomplete_sample_size = min(sample_size, len(self.requests.incomplete)) + + sampled_instance = self.model_copy() + sampled_instance.requests.successful = random.sample( + self.requests.successful, sample_size + ) + sampled_instance.requests.errored = random.sample( + self.requests.errored, error_sample_size + ) + sampled_instance.requests.incomplete = random.sample( + self.requests.incomplete, incomplete_sample_size + ) + sampled_instance.request_samples = StatusBreakdown( + successful=len(sampled_instance.requests.successful), + incomplete=len(sampled_instance.requests.incomplete), + errored=len(sampled_instance.requests.errored), + ) + + return sampled_instance + + @staticmethod + def from_stats( + run_id: str, + successful: List[GenerativeTextResponseStats], + incomplete: List[GenerativeTextErrorStats], + errored: List[GenerativeTextErrorStats], + args: BenchmarkArgs, + run_stats: BenchmarkRunStats, + worker: GenerativeRequestsWorkerDescription, + requests_loader: GenerativeRequestLoaderDescription, + extras: Optional[Dict[str, Any]], + ) -> "GenerativeBenchmark": + """ + Create a GenerativeBenchmark instance from the given statistics and metadata. + Given the completed and errored requests, the benchmark will fill in the + remaining statistics for the various metrics required for a benchmark. + This is the preferred method for creating a GenerativeBenchmark instance + to ensure all statistics are properly calculated and populated. + + :param run_id: The unique identifier for the benchmark run. + :param completed: The list of completed requests. + :param errored: The list of errored requests. + :param args: The arguments used to specify how to run the benchmark + and collect data. + :param run_stats: The process statistics for the entire benchmark run across + all requests. + :param worker: The description and specifics for the worker used to resolve + requests. + :param requests_loader: The description and specifics for the request loader + used to create requests. + :param extras: Any additional information or metadata that was passed for + this benchmark. + :return: A GenerativeBenchmark instance with the given statistics and metadata + populated and calculated + """ + total = successful + incomplete + errored + total_types: List[Literal["successful", "incomplete", "error"]] = [ + *["successful"] * len(successful), # type: ignore[list-item] + *["incomplete"] * len(incomplete), # type: ignore[list-item] + *["error"] * len(errored), # type: ignore[list-item] + ] + start_time = min(req.start_time for req in total) + end_time = max(req.end_time for req in total) + + total_with_prompt, total_types_with_prompt = ( + zip(*filtered) + if ( + filtered := list( + filter(lambda val: bool(val[0].prompt), zip(total, total_types)) + ) + ) + else ([], []) + ) + total_with_output_first, total_types_with_output_first = ( + zip(*filtered) + if ( + filtered := list( + filter( + lambda val: bool(val[0].output_tokens > 0), + zip(total, total_types), + ) + ) + ) + else ([], []) + ) + total_with_output_multi, total_types_with_output_multi = ( + zip(*filtered) + if ( + filtered := list( + filter( + lambda val: bool(val[0].output_tokens > 1), + zip(total, total_types), + ) + ) + ) + else ([], []) + ) + + return GenerativeBenchmark( + run_id=run_id, + args=args, + run_stats=run_stats, + extras=extras or {}, + start_time=start_time, + end_time=end_time, + worker=worker, + request_loader=requests_loader, + metrics=GenerativeMetrics( + requests_per_second=StatusDistributionSummary.from_request_times( + request_types=total_types, + requests=[(req.start_time, req.end_time) for req in total], + distribution_type="rate", + ), + request_concurrency=StatusDistributionSummary.from_request_times( + request_types=total_types, + requests=[(req.start_time, req.end_time) for req in total], + distribution_type="concurrency", + ), + request_latency=StatusDistributionSummary.from_values( + value_types=total_types, + values=[req.request_latency for req in total], + ), + prompt_token_count=StatusDistributionSummary.from_values( + value_types=list(total_types_with_prompt), + values=[req.prompt_tokens for req in total_with_prompt], + ), + output_token_count=StatusDistributionSummary.from_values( + value_types=list(total_types_with_output_first), + values=[req.output_tokens for req in total_with_output_first], + ), + time_to_first_token_ms=StatusDistributionSummary.from_values( + value_types=list(total_types_with_output_first), + values=[ + req.time_to_first_token_ms or 0 + for req in total_with_output_first + ], + ), + time_per_output_token_ms=StatusDistributionSummary.from_values( + value_types=list(total_types_with_output_first), + values=[ + req.time_per_output_token_ms or 0 + for req in total_with_output_first + ], + weights=[req.output_tokens for req in total_with_output_first], + ), + inter_token_latency_ms=StatusDistributionSummary.from_values( + value_types=list(total_types_with_output_multi), + values=[ + req.inter_token_latency_ms or 0 + for req in total_with_output_multi + ], + weights=[req.output_tokens - 1 for req in total_with_output_multi], + ), + output_tokens_per_second=StatusDistributionSummary.from_iterable_request_times( + request_types=list(total_types_with_output_first), + requests=[ + (req.start_time, req.end_time) + for req in total_with_output_first + ], + first_iter_times=[ + req.first_token_time or req.start_time + for req in total_with_output_first + ], + iter_counts=[req.output_tokens for req in total_with_output_first], + ), + tokens_per_second=StatusDistributionSummary.from_iterable_request_times( + request_types=list(total_types_with_output_first), + requests=[ + (req.start_time, req.end_time) + for req in total_with_output_first + ], + first_iter_times=[ + req.first_token_time or req.start_time + for req in total_with_output_first + ], + iter_counts=[ + req.prompt_tokens + req.output_tokens + for req in total_with_output_first + ], + first_iter_counts=[ + req.prompt_tokens for req in total_with_output_first + ], + ), + ), + request_totals=StatusBreakdown( + successful=len(successful), + incomplete=len(incomplete), + errored=len(errored), + total=len(total), + ), + requests=StatusBreakdown( + successful=successful, + incomplete=incomplete, + errored=errored, + ), + ) diff --git a/src/guidellm/benchmark/benchmarker.py b/src/guidellm/benchmark/benchmarker.py new file mode 100644 index 00000000..985d9c4f --- /dev/null +++ b/src/guidellm/benchmark/benchmarker.py @@ -0,0 +1,336 @@ +import time +import uuid +from abc import ABC, abstractmethod +from pathlib import Path +from typing import ( + Any, + AsyncGenerator, + Dict, + Generic, + Iterable, + Literal, + Optional, + Union, +) + +from pydantic import Field +from transformers import PreTrainedTokenizerBase # type: ignore # noqa: PGH003 + +from guidellm.backend import Backend, ResponseSummary +from guidellm.benchmark.aggregator import ( + AggregatorT, + BenchmarkT, + GenerativeBenchmarkAggregator, +) +from guidellm.benchmark.benchmark import BenchmarkArgs, GenerativeBenchmark +from guidellm.benchmark.profile import Profile +from guidellm.objects import StandardBaseModel +from guidellm.request import ( + GenerationRequest, + GenerativeRequestLoaderDescription, + RequestLoaderDescription, +) +from guidellm.scheduler import ( + GenerativeRequestsWorker, + RequestsWorker, + RequestT, + ResponseT, + Scheduler, + SchedulerRequestResult, + SchedulingStrategy, +) + +__all__ = ["Benchmarker", "BenchmarkerResult", "GenerativeBenchmarker"] + + +class BenchmarkerResult( + StandardBaseModel, Generic[AggregatorT, BenchmarkT, RequestT, ResponseT] +): + type_: Literal[ + "run_start", + "run_complete", + "scheduler_start", + "scheduler_update", + "scheduler_complete", + "benchmark_compiled", + ] + start_time: float + end_number: int + profile: Profile + current_index: int + current_strategy: Optional[SchedulingStrategy] = None + current_aggregator: Optional[AggregatorT] = None + current_benchmark: Optional[BenchmarkT] = None + current_result: Optional[SchedulerRequestResult[RequestT, ResponseT]] = None + + +class BenchmarkerStrategyLimits(StandardBaseModel): + requests_loader_size: Optional[int] = Field( + description="Size of the request loader.", + ) + max_number_per_strategy: Optional[int] = Field( + description="Maximum number of requests to process per strategy.", + ge=0, + ) + max_duration_per_strategy: Optional[float] = Field( + description="Maximum duration (in seconds) to process requests per strategy.", + ge=0, + ) + warmup_percent_per_strategy: Optional[float] = Field( + description="Percentage of requests to use for warmup.", + ge=0, + le=1, + ) + cooldown_percent_per_strategy: Optional[float] = Field( + description="Percentage of requests to use for cooldown.", + ge=0, + le=1, + ) + + @property + def max_number(self) -> Optional[int]: + if self.max_number_per_strategy is not None: + return self.max_number_per_strategy + + if self.requests_loader_size is not None: + return self.requests_loader_size + + return None + + @property + def max_duration(self) -> Optional[float]: + return self.max_duration_per_strategy + + @property + def warmup_number(self) -> Optional[int]: + if self.warmup_percent_per_strategy is None or self.max_number is None: + return None + + return int(self.warmup_percent_per_strategy * self.max_number) + + @property + def warmup_duration(self) -> Optional[float]: + if self.warmup_percent_per_strategy is None or self.max_duration is None: + return None + + return self.warmup_percent_per_strategy * self.max_duration + + @property + def cooldown_number(self) -> Optional[int]: + if self.cooldown_percent_per_strategy is None or self.max_number is None: + return None + + return int(self.cooldown_percent_per_strategy * self.max_number) + + @property + def cooldown_duration(self) -> Optional[float]: + if self.cooldown_percent_per_strategy is None or self.max_duration is None: + return None + + return self.cooldown_percent_per_strategy * self.max_duration + + +class Benchmarker(Generic[AggregatorT, BenchmarkT, RequestT, ResponseT], ABC): + def __init__( + self, + worker: RequestsWorker[RequestT, ResponseT], + request_loader: Iterable[RequestT], + requests_loader_description: RequestLoaderDescription, + benchmark_save_extras: Optional[Dict[str, Any]] = None, + ): + self.worker = worker + self.scheduler: Scheduler[RequestT, ResponseT] = Scheduler( + worker=worker, request_loader=request_loader + ) + self.requests_loader_description = requests_loader_description + self.benchmark_save_extras = benchmark_save_extras + + async def run( + self, + profile: Profile, + max_number_per_strategy: Optional[int], + max_duration_per_strategy: Optional[float], + warmup_percent_per_strategy: Optional[float], + cooldown_percent_per_strategy: Optional[float], + ) -> AsyncGenerator[ + BenchmarkerResult[AggregatorT, BenchmarkT, RequestT, ResponseT], None + ]: + try: + requests_loader_size = len(self.scheduler.request_loader) # type: ignore[arg-type] + except Exception: # noqa: BLE001 + requests_loader_size = None + + strategy_limits = BenchmarkerStrategyLimits( + requests_loader_size=requests_loader_size, + max_number_per_strategy=max_number_per_strategy, + max_duration_per_strategy=max_duration_per_strategy, + warmup_percent_per_strategy=warmup_percent_per_strategy, + cooldown_percent_per_strategy=cooldown_percent_per_strategy, + ) + start_time = time.time() + end_number = len(profile.strategy_types) + current_index = -1 + run_id = str(uuid.uuid4()) + + yield BenchmarkerResult( + type_="run_start", + start_time=start_time, + end_number=end_number, + profile=profile, + current_index=current_index, + current_strategy=None, + current_aggregator=None, + current_benchmark=None, + current_result=None, + ) + + while scheduling_strategy := profile.next_strategy(): + current_index += 1 + aggregator = self.create_benchmark_aggregator( + run_id=run_id, + profile=profile, + strategy_index=current_index, + strategy=scheduling_strategy, + limits=strategy_limits, + ) + + async for result in self.scheduler.run( + scheduling_strategy=scheduling_strategy, + max_number=max_number_per_strategy, + max_duration=max_duration_per_strategy, + ): + if result.type_ == "run_start": + yield BenchmarkerResult( + type_="scheduler_start", + start_time=start_time, + end_number=end_number, + profile=profile, + current_index=current_index, + current_strategy=scheduling_strategy, + current_aggregator=aggregator, + current_benchmark=None, + current_result=None, + ) + elif result.type_ == "run_complete": + yield BenchmarkerResult( + type_="scheduler_complete", + start_time=start_time, + end_number=end_number, + profile=profile, + current_index=current_index, + current_strategy=scheduling_strategy, + current_aggregator=aggregator, + current_benchmark=None, + current_result=None, + ) + elif isinstance(result, SchedulerRequestResult): + aggregator.add_result(result) + + yield BenchmarkerResult( + type_="scheduler_update", + start_time=start_time, + end_number=end_number, + profile=profile, + current_index=current_index, + current_strategy=scheduling_strategy, + current_aggregator=aggregator, + current_benchmark=None, + current_result=result, + ) + else: + raise ValueError(f"Unexpected result type: {type(result)}") + + benchmark: BenchmarkT = aggregator.compile() + profile.completed_strategy( + average_rate=benchmark.metrics.requests_per_second.successful.mean, + average_concurrency=benchmark.metrics.request_concurrency.successful.mean, + ) + + yield BenchmarkerResult( + type_="benchmark_compiled", + start_time=start_time, + end_number=end_number, + profile=profile, + current_index=current_index, + current_strategy=scheduling_strategy, + current_aggregator=None, + current_benchmark=benchmark, + current_result=None, + ) + + yield BenchmarkerResult( + type_="run_complete", + start_time=start_time, + end_number=end_number, + profile=profile, + current_index=current_index, + current_strategy=None, + current_aggregator=None, + current_benchmark=None, + current_result=None, + ) + + @abstractmethod + def create_benchmark_aggregator( + self, + run_id: str, + profile: Profile, + strategy_index: int, + strategy: SchedulingStrategy, + limits: BenchmarkerStrategyLimits, + ) -> AggregatorT: ... + + +class GenerativeBenchmarker( + Benchmarker[ + GenerativeBenchmarkAggregator, + GenerativeBenchmark, + GenerationRequest, + ResponseSummary, + ], +): + def __init__( + self, + backend: Backend, + request_loader: Iterable[GenerationRequest], + request_loader_description: GenerativeRequestLoaderDescription, + benchmark_save_extras: Optional[Dict[str, Any]] = None, + processor: Optional[Union[str, Path, PreTrainedTokenizerBase]] = None, + processor_args: Optional[Dict[str, Any]] = None, + ): + super().__init__( + worker=GenerativeRequestsWorker(backend), + request_loader=request_loader, + requests_loader_description=request_loader_description, + benchmark_save_extras=benchmark_save_extras, + ) + self.processor = processor + self.processor_args = processor_args + + def create_benchmark_aggregator( + self, + run_id: str, + profile: Profile, + strategy_index: int, + strategy: SchedulingStrategy, + limits: BenchmarkerStrategyLimits, + ) -> GenerativeBenchmarkAggregator: + return GenerativeBenchmarkAggregator( + run_id=run_id, + args=BenchmarkArgs( + profile=profile, + strategy_index=strategy_index, + strategy=strategy, + max_number=limits.max_number, + max_duration=limits.max_duration, + warmup_number=limits.warmup_number, + warmup_duration=limits.warmup_duration, + cooldown_number=limits.cooldown_number, + cooldown_duration=limits.cooldown_duration, + ), + worker_description=self.worker.description, # type: ignore[arg-type] + request_loader_description=self.requests_loader_description, # type: ignore[arg-type] + extras=self.benchmark_save_extras or {}, + processor=self.processor, + processor_args=self.processor_args, + ) diff --git a/src/guidellm/benchmark/entrypoints.py b/src/guidellm/benchmark/entrypoints.py new file mode 100644 index 00000000..fc98219e --- /dev/null +++ b/src/guidellm/benchmark/entrypoints.py @@ -0,0 +1,129 @@ +from pathlib import Path +from typing import Any, Dict, Iterable, List, Literal, Optional, Union + +from datasets import Dataset, DatasetDict, IterableDataset, IterableDatasetDict +from transformers import ( # type: ignore[import] + PreTrainedTokenizerBase, +) + +from guidellm.backend import Backend, BackendType +from guidellm.benchmark.benchmark import GenerativeBenchmark +from guidellm.benchmark.benchmarker import GenerativeBenchmarker +from guidellm.benchmark.output import ( + GenerativeBenchmarksConsole, + save_generative_benchmarks, +) +from guidellm.benchmark.profile import ProfileType, create_profile +from guidellm.benchmark.progress import GenerativeTextBenchmarkerProgressDisplay +from guidellm.request import GenerativeRequestLoader +from guidellm.scheduler import StrategyType + + +async def benchmark_generative_text( + target: str, + backend_type: BackendType, + backend_args: Optional[Dict[str, Any]], + model: Optional[str], + processor: Optional[Optional[Union[str, Path, PreTrainedTokenizerBase]]], + processor_args: Optional[Dict[str, Any]], + data: Union[ + str, + Path, + Iterable[Union[str, Dict[str, Any]]], + Dataset, + DatasetDict, + IterableDataset, + IterableDatasetDict, + ], + data_args: Optional[Dict[str, Any]], + data_sampler: Optional[Literal["random"]], + rate_type: Union[StrategyType, ProfileType], + rate: Optional[Union[int, float, List[Union[int, float]]]], + max_seconds: Optional[float], + max_requests: Optional[int], + warmup_percent: Optional[float], + cooldown_percent: Optional[float], + show_progress: bool, + show_progress_scheduler_stats: bool, + output_console: bool, + output_path: Optional[Union[str, Path]], + output_extras: Optional[Dict[str, Any]], + random_seed: int, +) -> List[GenerativeBenchmark]: + console = GenerativeBenchmarksConsole(enabled=show_progress) + console.print_line("Creating backend...") + backend = Backend.create( + backend_type, target=target, model=model, **(backend_args or {}) + ) + await backend.validate() + console.print_line( + f"Backend {backend_type} connected to {target} for model {backend.model}." + ) + + if processor is None: + processor = backend.model + + console.print_line("Creating request loader...") + request_loader = GenerativeRequestLoader( + data=data, + data_args=data_args, + processor=processor, + processor_args=processor_args, + shuffle=data_sampler == "random", + iter_type=( + "finite" # assume a finite dataset is our limit + if max_requests is None and max_seconds is None + else "infinite" # default to infinite so we don't run out of data + ), + random_seed=random_seed, + ) + unique_requests = request_loader.num_unique_items(raise_err=False) + console.print_line( + f"Created loader with {unique_requests} unique requests from {data}.\n\n" + if unique_requests > 0 + else f"Created loader with unknown number unique requests from {data}.\n\n" + ) + + profile = create_profile(rate_type=rate_type, rate=rate) + benchmarker = GenerativeBenchmarker( + backend=backend, + request_loader=request_loader, + request_loader_description=request_loader.description, + benchmark_save_extras=output_extras, + processor=processor, + processor_args=processor_args, + ) + progress = ( + GenerativeTextBenchmarkerProgressDisplay( + display_scheduler_stats=show_progress_scheduler_stats + ) + if show_progress + else None + ) + benchmarks = [] + + async for result in benchmarker.run( + profile=profile, + max_number_per_strategy=max_requests, + max_duration_per_strategy=max_seconds, + warmup_percent_per_strategy=warmup_percent, + cooldown_percent_per_strategy=cooldown_percent, + ): + if progress: + progress.update(result) + + if result.type_ == "benchmark_compiled": + if result.current_benchmark is None: + raise ValueError("Current benchmark is None") + benchmarks.append(result.current_benchmark) + + if output_console: + console.benchmarks = benchmarks + console.print_benchmarks_metadata() + console.print_benchmarks_info() + console.print_benchmarks_stats() + + if output_path: + save_generative_benchmarks(benchmarks=benchmarks, path=output_path) + + return benchmarks diff --git a/src/guidellm/benchmark/output.py b/src/guidellm/benchmark/output.py new file mode 100644 index 00000000..d0bdc103 --- /dev/null +++ b/src/guidellm/benchmark/output.py @@ -0,0 +1,347 @@ +import json +from collections import OrderedDict +from datetime import datetime +from pathlib import Path +from typing import Any, List, Optional, Union + +import yaml +from rich.console import Console +from rich.padding import Padding +from rich.table import Table +from rich.text import Text + +from guidellm.benchmark.benchmark import GenerativeBenchmark +from guidellm.benchmark.profile import ( + AsyncProfile, + ConcurrentProfile, + SweepProfile, + ThroughputProfile, +) +from guidellm.objects import StandardBaseModel +from guidellm.scheduler import strategy_display_str +from guidellm.utils import Colors + +__all__ = [ + "GenerativeBenchmarksReport", + "save_generative_benchmarks", + "GenerativeBenchmarksConsole", +] + + +class GenerativeBenchmarksReport(StandardBaseModel): + benchmarks: List[GenerativeBenchmark] + + def save_file(self, path: Path): + if path.is_dir(): + path = path / "benchmarks.json" + + path.parent.mkdir(parents=True, exist_ok=True) + extension = path.suffix.lower() + + if extension == ".json": + self.save_json(path) + elif extension in [".yaml", ".yml"]: + self.save_yaml(path) + elif extension in [".csv"]: + self.save_csv(path) + else: + raise ValueError(f"Unsupported file extension: {extension} for {path}.") + + def save_json(self, path: Path): + model_dict = self.model_dump() + model_json = json.dumps(model_dict) + + with path.open("w") as file: + file.write(model_json) + + def save_yaml(self, path: Path): + model_dict = self.model_dump() + model_yaml = yaml.dump(model_dict) + + with path.open("w") as file: + file.write(model_yaml) + + def save_csv(self, path: Path): + raise NotImplementedError("CSV format is not implemented yet.") + + +def save_generative_benchmarks( + benchmarks: List[GenerativeBenchmark], path: Union[Path, str] +): + path = Path(path) if isinstance(path, str) else path + report = GenerativeBenchmarksReport(benchmarks=benchmarks) + report.save_file(path) + + +class GenerativeBenchmarksConsole: + def __init__(self, enabled: bool = True): + self.enabled = enabled + self.benchmarks: Optional[List[GenerativeBenchmark]] = None + self.console = Console() + + @property + def benchmarks_profile_str(self) -> str: + profile = self.benchmarks[0].args.profile if self.benchmarks else None + + if profile is None: + return "None" + + profile_args = OrderedDict( + { + "type": profile.type_, + "strategies": profile.strategy_types, + } + ) + + if isinstance(profile, ConcurrentProfile): + profile_args["streams"] = str(profile.streams) + elif isinstance(profile, ThroughputProfile): + profile_args["max_concurrency"] = str(profile.max_concurrency) + elif isinstance(profile, AsyncProfile): + profile_args["max_concurrency"] = str(profile.max_concurrency) + profile_args["rate"] = str(profile.rate) + profile_args["initial_burst"] = str(profile.initial_burst) + elif isinstance(profile, SweepProfile): + profile_args["sweep_size"] = str(profile.sweep_size) + + return ", ".join(f"{key}={value}" for key, value in profile_args.items()) + + @property + def benchmarks_args_str(self) -> str: + args = self.benchmarks[0].args if self.benchmarks else None + + if args is None: + return "None" + + args_dict = OrderedDict( + { + "max_number": args.max_number, + "max_duration": args.max_duration, + "warmup_number": args.warmup_number, + "warmup_duration": args.warmup_duration, + "cooldown_number": args.cooldown_number, + "cooldown_duration": args.cooldown_duration, + } + ) + + return ", ".join(f"{key}={value}" for key, value in args_dict.items()) + + @property + def benchmarks_worker_desc_str(self) -> str: + return str(self.benchmarks[0].worker) if self.benchmarks else "None" + + @property + def benchmarks_request_loader_desc_str(self) -> str: + return str(self.benchmarks[0].request_loader) if self.benchmarks else "None" + + @property + def benchmarks_extras_str(self) -> str: + extras = self.benchmarks[0].extras if self.benchmarks else None + + if not extras: + return "None" + + return ", ".join(f"{key}={value}" for key, value in extras.items()) + + def print_section_header(self, title: str, new_lines: int = 2): + if not self.enabled: + return + + text = Text() + + for _ in range(new_lines): + text.append("\n") + + text.append(f"{title}:", style=f"bold underline {Colors.INFO}") + self.console.print(text) + + def print_labeled_line(self, label: str, value: str, indent: int = 4): + if not self.enabled: + return + + text = Text() + text.append(label + ": ", style=f"bold {Colors.INFO}") + text.append(": ") + text.append(value, style="italic") + self.console.print( + Padding.indent(text, indent), + ) + + def print_line(self, value: str, indent: int = 0): + if not self.enabled: + return + + text = Text(value) + self.console.print( + Padding.indent(text, indent), + ) + + def print_table(self, headers: List[str], rows: List[List[Any]], title: str): + if not self.enabled: + return + + self.print_section_header(title) + table = Table(*headers, header_style=f"bold {Colors.INFO}") + + for row in rows: + table.add_row(*[Text(item, style="italic") for item in row]) + + self.console.print(table) + + def print_benchmarks_metadata(self): + if not self.enabled: + return + + if not self.benchmarks: + raise ValueError( + "No benchmarks to print metadata for. Please set benchmarks first." + ) + + start_time = self.benchmarks[0].run_stats.start_time + end_time = self.benchmarks[0].run_stats.end_time + duration = end_time - start_time + + self.print_section_header("Benchmarks Completed") + self.print_labeled_line("Run id", str(self.benchmarks[0].run_id)) + self.print_labeled_line( + "Duration", + f"{duration:.1f} seconds", + ) + self.print_labeled_line( + "Profile", + self.benchmarks_profile_str, + ) + self.print_labeled_line( + "Args", + self.benchmarks_args_str, + ) + self.print_labeled_line( + "Worker", + self.benchmarks_worker_desc_str, + ) + self.print_labeled_line( + "Request Loader", + self.benchmarks_request_loader_desc_str, + ) + self.print_labeled_line( + "Extras", + self.benchmarks_extras_str, + ) + + def print_benchmarks_info(self): + if not self.enabled: + return + + if not self.benchmarks: + raise ValueError( + "No benchmarks to print info for. Please set benchmarks first." + ) + + headers = [ + "Benchmark", + "Start Time", + "End Time", + "Duration (sec)", + "Requests Made \n(comp / inc / err)", + "Prompt Tok / Req \n(comp / inc / err)", + "Output Tok / Req \n(comp / inc / err)", + "Prompt Tok Total \n(comp / inc / err)", + "Output Tok Total \n(comp / inc / err)", + ] + rows = [] + + for benchmark in self.benchmarks: + rows.append( + [ + strategy_display_str(benchmark.args.strategy), + f"{datetime.fromtimestamp(benchmark.start_time).strftime('%H:%M:%S')}", + f"{datetime.fromtimestamp(benchmark.end_time).strftime('%H:%M:%S')}", + f"{(benchmark.end_time - benchmark.start_time):.1f}", + ( + f"{benchmark.request_totals.successful:>5} / " + f"{benchmark.request_totals.incomplete} / " + f"{benchmark.request_totals.errored}" + ), + ( + f"{benchmark.metrics.prompt_token_count.successful.mean:>5.1f} / " # noqa: E501 + f"{benchmark.metrics.prompt_token_count.incomplete.mean:.1f} / " + f"{benchmark.metrics.prompt_token_count.errored.mean:.1f}" + ), + ( + f"{benchmark.metrics.output_token_count.successful.mean:>5.1f} / " # noqa: E501 + f"{benchmark.metrics.output_token_count.incomplete.mean:.1f} / " + f"{benchmark.metrics.output_token_count.errored.mean:.1f}" + ), + ( + f"{benchmark.metrics.prompt_token_count.successful.total_sum:>6.0f} / " # noqa: E501 + f"{benchmark.metrics.prompt_token_count.incomplete.total_sum:.0f} / " # noqa: E501 + f"{benchmark.metrics.prompt_token_count.errored.total_sum:.0f}" + ), + ( + f"{benchmark.metrics.output_token_count.successful.total_sum:>6.0f} / " # noqa: E501 + f"{benchmark.metrics.output_token_count.incomplete.total_sum:.0f} / " # noqa: E501 + f"{benchmark.metrics.output_token_count.errored.total_sum:.0f}" + ), + ] + ) + + self.print_table(headers=headers, rows=rows, title="Benchmarks Info") + + def print_benchmarks_stats(self): + if not self.enabled: + return + + if not self.benchmarks: + raise ValueError( + "No benchmarks to print stats for. Please set benchmarks first." + ) + + headers = [ + "Benchmark", + "Requests / sec", + "Requests Concurrency", + "Output Tok / sec", + "Total Tok / sec", + "Req Latency (ms)\n(mean / median / p99)", + "TTFT (ms)\n(mean / median / p99)", + "ITL (ms)\n(mean / median / p99)", + "TPOT (ms)\n(mean / median / p99)", + ] + rows = [] + + for benchmark in self.benchmarks: + rows.append( + [ + strategy_display_str(benchmark.args.strategy), + f"{benchmark.metrics.requests_per_second.successful.mean:.2f}", + f"{benchmark.metrics.request_concurrency.successful.mean:.2f}", + f"{benchmark.metrics.output_tokens_per_second.total.mean:.1f}", + f"{benchmark.metrics.tokens_per_second.total.mean:.1f}", + ( + f"{benchmark.metrics.request_latency.successful.mean:.2f} / " + f"{benchmark.metrics.request_latency.successful.median:.2f} / " + f"{benchmark.metrics.request_latency.successful.percentiles.p99:.2f}" + ), + ( + f"{benchmark.metrics.time_to_first_token_ms.successful.mean:.1f} / " # noqa: E501 + f"{benchmark.metrics.time_to_first_token_ms.successful.median:.1f} / " # noqa: E501 + f"{benchmark.metrics.time_to_first_token_ms.successful.percentiles.p99:.1f}" + ), + ( + f"{benchmark.metrics.inter_token_latency_ms.successful.mean:.1f} / " # noqa: E501 + f"{benchmark.metrics.inter_token_latency_ms.successful.median:.1f} / " # noqa: E501 + f"{benchmark.metrics.inter_token_latency_ms.successful.percentiles.p99:.1f}" + ), + ( + f"{benchmark.metrics.time_per_output_token_ms.successful.mean:.1f} / " # noqa: E501 + f"{benchmark.metrics.time_per_output_token_ms.successful.median:.1f} / " # noqa: E501 + f"{benchmark.metrics.time_per_output_token_ms.successful.percentiles.p99:.1f}" + ), + ] + ) + + self.print_table( + headers=headers, + rows=rows, + title="Benchmarks Stats", + ) diff --git a/src/guidellm/benchmark/profile.py b/src/guidellm/benchmark/profile.py new file mode 100644 index 00000000..99f01f2e --- /dev/null +++ b/src/guidellm/benchmark/profile.py @@ -0,0 +1,399 @@ +from typing import List, Literal, Optional, Sequence, Union + +import numpy as np +from pydantic import Field, computed_field + +from guidellm.config import settings +from guidellm.objects import StandardBaseModel +from guidellm.scheduler import ( + AsyncConstantStrategy, + AsyncPoissonStrategy, + ConcurrentStrategy, + SchedulingStrategy, + StrategyType, + SynchronousStrategy, + ThroughputStrategy, +) + +__all__ = [ + "ProfileType", + "Profile", + "SynchronousProfile", + "ConcurrentProfile", + "ThroughputProfile", + "AsyncProfile", + "SweepProfile", + "create_profile", +] + +ProfileType = Literal["synchronous", "concurrent", "throughput", "async", "sweep"] + + +class Profile(StandardBaseModel): + type_: Literal["profile"] = Field( + description="The type of benchmarking profile to use.", + ) + completed_strategies: int = Field( + default=0, + description="The number of scheduling strategies generated so far.", + ) + measured_rates: List[float] = Field( + default_factory=list, + description=("The average rates measured for the strategies that have run."), + ) + measured_concurrencies: List[float] = Field( + default_factory=list, + description=( + "The average concurrency measured for the strategies that have run." + ), + ) + + def completed_strategy(self, average_rate: float, average_concurrency: float): + self.measured_rates.append(average_rate) + self.measured_concurrencies.append(average_concurrency) + self.completed_strategies += 1 + + @computed_field # type: ignore[misc] + @property + def strategy_types(self) -> List[StrategyType]: + return [] + + def next_strategy(self) -> Optional[SchedulingStrategy]: + return None + + +class SynchronousProfile(Profile): + type_: Literal["synchronous"] = "synchronous" # type: ignore[assignment] + + @property + def strategy_types(self) -> List[StrategyType]: + return [self.type_] + + def next_strategy(self) -> Optional[SchedulingStrategy]: + if self.completed_strategies >= 1: + return None + + return SynchronousStrategy() + + @staticmethod + def from_standard_args( + rate_type: Union[StrategyType, ProfileType], + rate: Optional[Union[float, Sequence[float]]], + **kwargs, + ) -> "SynchronousProfile": + if rate_type != "synchronous": + raise ValueError("Rate type must be 'synchronous' for synchronous profile.") + + if rate is not None: + raise ValueError( + "Rate does not apply to synchronous profile, it must be set to None." + ) + + if kwargs: + raise ValueError( + "No additional arguments are allowed for synchronous profile." + ) + + return SynchronousProfile() + + +class ConcurrentProfile(Profile): + type_: Literal["concurrent"] = "concurrent" # type: ignore[assignment] + streams: Union[int, Sequence[int]] = Field( + description="The number of concurrent streams to use.", + ) + + @property + def strategy_types(self) -> List[StrategyType]: + num_strategies = len(self.streams) if isinstance(self.streams, Sequence) else 1 + + return [self.type_] * num_strategies + + def next_strategy(self) -> Optional[SchedulingStrategy]: + streams = self.streams if isinstance(self.streams, Sequence) else [self.streams] + + if self.completed_strategies >= len(streams): + return None + + return ConcurrentStrategy( + streams=streams[self.completed_strategies], + ) + + @staticmethod + def from_standard_args( + rate_type: Union[StrategyType, ProfileType], + rate: Optional[Union[float, Sequence[float]]], + **kwargs, + ) -> "ConcurrentProfile": + if rate_type != "concurrent": + raise ValueError("Rate type must be 'concurrent' for concurrent profile.") + + if not rate: + raise ValueError("Rate (streams) must be provided for concurrent profile.") + + if not isinstance(rate, Sequence): + rate = [rate] + + if not all(stream.is_integer() and stream > 0 for stream in rate): + raise ValueError( + f"All rate values (streams) must be positive integers, received {rate}" + ) + + if kwargs: + raise ValueError( + "No additional arguments are allowed for concurrent profile." + ) + + return ConcurrentProfile(streams=[int(rat) for rat in rate]) + + +class ThroughputProfile(Profile): + type_: Literal["throughput"] = "throughput" # type: ignore[assignment] + max_concurrency: Optional[int] = Field( + default=None, + description="The maximum number of concurrent requests that can be scheduled.", + ) + + @property + def strategy_types(self) -> List[StrategyType]: + return [self.type_] + + def next_strategy(self) -> Optional[SchedulingStrategy]: + if self.completed_strategies >= 1: + return None + + return ThroughputStrategy( + max_concurrency=self.max_concurrency, + ) + + @staticmethod + def from_standard_args( + rate_type: Union[StrategyType, ProfileType], + rate: Optional[Union[float, Sequence[float]]], + **kwargs, + ) -> "ThroughputProfile": + if rate_type != "throughput": + raise ValueError("Rate type must be 'throughput' for throughput profile.") + + if rate is not None: + raise ValueError( + "Rate does not apply to throughput profile, it must be set to None." + ) + + return ThroughputProfile(**kwargs) + + +class AsyncProfile(ThroughputProfile): + type_: Literal["async"] = "async" # type: ignore[assignment] + strategy_type: Literal["constant", "poisson"] = Field( + description="The type of asynchronous strategy to use.", + ) + rate: Union[float, Sequence[float]] = Field( + description="The rate of requests per second to use.", + ) + initial_burst: bool = Field( + default=True, + description=( + "True to send an initial burst of requests (math.floor(self.rate)) " + "to reach target rate. False to not send an initial burst." + ), + ) + random_seed: int = Field( + default=42, + description=( + "The random seed to use for the asynchronous strategy. " + "This is used to generate random numbers for the Poisson strategy." + ), + ) + + @property + def strategy_types(self) -> List[StrategyType]: + num_strategies = len(self.rate) if isinstance(self.rate, Sequence) else 1 + + return [self.strategy_type] * num_strategies + + def next_strategy(self) -> Optional[SchedulingStrategy]: + rate = self.rate if isinstance(self.rate, Sequence) else [self.rate] + + if self.completed_strategies >= len(rate): + return None + + if self.strategy_type == "constant": + return AsyncConstantStrategy( + rate=rate[self.completed_strategies], + initial_burst=self.initial_burst, + max_concurrency=self.max_concurrency, + ) + elif self.strategy_type == "poisson": + return AsyncPoissonStrategy( + rate=rate[self.completed_strategies], + initial_burst=self.initial_burst, + max_concurrency=self.max_concurrency, + random_seed=self.random_seed, + ) + else: + raise ValueError(f"Invalid strategy type: {self.strategy_type}") + + @staticmethod + def from_standard_args( # type: ignore[override] + rate_type: Union[StrategyType, ProfileType], + rate: Optional[Union[float, Sequence[float]]], + random_seed: int, + **kwargs, + ) -> "AsyncProfile": + if rate_type not in ("async", "constant", "poisson"): + raise ValueError( + "Rate type must be in ('async', 'constant', 'poisson') " + f"for async profile. Received: {rate_type}" + ) + + if not rate: + raise ValueError("Rate must be provided for async profile.") + + if not isinstance(rate, Sequence): + rate = [rate] + + if not all(r.is_integer() and r > 0 for r in rate): + raise ValueError( + f"All rate values must be positive integers, received {rate}" + ) + + if rate_type == "async": + rate_type = "constant" # default to constant if not specified + + return AsyncProfile( + strategy_type=rate_type, # type: ignore[arg-type] + rate=rate, + random_seed=random_seed, + **kwargs, + ) + + +class SweepProfile(AsyncProfile): + type_: Literal["sweep"] = "sweep" # type: ignore[assignment] + sweep_size: int = Field( + description="The number of strategies to generate for the sweep.", + ) + rate: float = -1 + rate_type: Literal["constant", "poisson"] = "constant" + + @property + def strategy_types(self) -> List[StrategyType]: + return ( + ["synchronous"] + ["throughput"] + [self.rate_type] * (self.sweep_size - 2) # type: ignore[return-value] + ) + + def next_strategy(self) -> Optional[SchedulingStrategy]: + if self.completed_strategies >= self.sweep_size: + return None + + if self.completed_strategies == 0: + return SynchronousStrategy() + + if self.completed_strategies == 1: + return ThroughputStrategy( + max_concurrency=self.max_concurrency, + ) + + min_rate = self.measured_rates[0] + max_rate = self.measured_rates[1] + rates = np.linspace(min_rate, max_rate, self.sweep_size - 1)[1:] + + if self.rate_type == "constant": + return AsyncConstantStrategy( + rate=rates[self.completed_strategies - 2], + initial_burst=self.initial_burst, + max_concurrency=self.max_concurrency, + ) + elif self.rate_type == "poisson": + return AsyncPoissonStrategy( + rate=rates[self.completed_strategies - 2], + initial_burst=self.initial_burst, + max_concurrency=self.max_concurrency, + ) + else: + raise ValueError(f"Invalid strategy type: {self.rate_type}") + + @staticmethod + def from_standard_args( # type: ignore[override] + rate_type: Union[StrategyType, ProfileType], + rate: Optional[Union[float, Sequence[float]]], + random_seed: int, + **kwargs, + ) -> "SweepProfile": + if rate_type != "sweep": + raise ValueError("Rate type must be 'sweep' for sweep profile.") + + if "sweep_size" in kwargs: + raise ValueError("Sweep size must not be provided, use rate instead.") + + if not rate: + rate = settings.default_sweep_number + + if not rate: + raise ValueError( + "Rate (sweep_size) must be provided for concurrent profile." + ) + + if ( + not isinstance(rate, (int, float)) + or (isinstance(rate, float) and not rate.is_integer()) + or rate <= 1 + ): + raise ValueError( + f"Rate (sweep_size) must be a positive integer > 1, received {rate}" + ) + + if not kwargs: + kwargs = {} + + if "strategy_type" not in kwargs: + kwargs["strategy_type"] = "constant" + + return SweepProfile(sweep_size=int(rate), random_seed=random_seed, **kwargs) + + +def create_profile( + rate_type: Union[StrategyType, ProfileType], + rate: Optional[Union[float, Sequence[float]]], + random_seed: int = 42, + **kwargs, +) -> "Profile": + if rate_type == "synchronous": + return SynchronousProfile.from_standard_args( + rate_type=rate_type, + rate=rate, + **kwargs, + ) + + if rate_type == "concurrent": + return ConcurrentProfile.from_standard_args( + rate_type=rate_type, + rate=rate, + **kwargs, + ) + + if rate_type == "throughput": + return ThroughputProfile.from_standard_args( + rate_type=rate_type, + rate=rate, + **kwargs, + ) + + if rate_type in ("async", "constant", "poisson"): + return AsyncProfile.from_standard_args( + rate_type=rate_type, + rate=rate, + random_seed=random_seed, + **kwargs, + ) + + if rate_type == "sweep": + return SweepProfile.from_standard_args( + rate_type=rate_type, + rate=rate, + random_seed=random_seed, + **kwargs, + ) + + raise ValueError(f"Invalid profile type: {rate_type}") diff --git a/src/guidellm/benchmark/progress.py b/src/guidellm/benchmark/progress.py new file mode 100644 index 00000000..059c4b06 --- /dev/null +++ b/src/guidellm/benchmark/progress.py @@ -0,0 +1,713 @@ +import math +import time +from dataclasses import dataclass +from datetime import datetime +from typing import Dict, Generic, List, Optional, TypeVar, Union + +from rich.console import Group +from rich.live import Live +from rich.panel import Panel +from rich.progress import ( + BarColumn, + Progress, + ProgressColumn, + SpinnerColumn, + TaskID, + TaskProgressColumn, + TextColumn, + TimeElapsedColumn, + TimeRemainingColumn, +) + +from guidellm.benchmark.aggregator import ( + BenchmarkAggregator, + GenerativeBenchmarkAggregator, +) +from guidellm.benchmark.benchmark import Benchmark, GenerativeBenchmark +from guidellm.benchmark.benchmarker import BenchmarkerResult +from guidellm.scheduler import ( + SchedulingStrategy, + StrategyType, + strategy_display_str, +) +from guidellm.utils import Colors + + +@dataclass +class BenchmarkerTaskProgressState: + display_scheduler_stats: bool + + task_id: TaskID + strategy: Union[StrategyType, SchedulingStrategy] + started: bool = False + compiling: bool = False + ended: bool = False + + start_time: Optional[float] = None + max_number: Optional[float] = None + max_duration: Optional[float] = None + in_warmup: bool = False + in_cooldown: bool = False + + requests_rate: float = 0 + request_latency: float = 0 + requests_processing: float = 0 + requests_successful: float = 0 + requests_incomplete: float = 0 + requests_errored: float = 0 + + worker_overheads_time_ms: float = 0.0 + backend_overheads_time_ms: float = 0.0 + requests_sleep_time_ms: float = 0.0 + requests_targeted_start_time_delay_ms: float = 0.0 + + @property + def description(self) -> str: + return strategy_display_str(self.strategy) + + @property + def total(self) -> Optional[float]: + if self.max_number is None and self.max_duration is None: + return None + + return 1000 + + @property + def completed(self) -> int: + if self.ended: + return 1000 + + if self.max_number is None and self.max_duration is None: + return 0 + + number = self.requests_successful + self.requests_errored + number_percent = ( + number / float(self.max_number) * 1000 if self.max_number else -math.inf + ) + duration_percent = ( + (time.time() - self.start_time) / self.max_duration * 1000 + if self.max_duration and self.start_time + else -math.inf + ) + + return min(int(max(number_percent, duration_percent)), 1000) + + @property + def fields(self) -> Dict[str, str]: + fields = { + "start_time": self.formatted_start_time, + "progress_status": self.formatted_progress_status, + "requests_summary": self.formatted_requests_summary, + } + + if self.display_scheduler_stats: + fields["scheduler_stats"] = self.formatted_scheduler_stats + + return fields + + @property + def formatted_start_time(self) -> str: + if self.start_time is None: + return "--:--:--" + + return datetime.fromtimestamp(self.start_time).strftime("%H:%M:%S") + + @property + def formatted_progress_status(self) -> str: + if self.ended: + status = "complete" + color = Colors.SUCCESS + elif self.compiling: + status = "compiling" + color = Colors.PROGRESS + elif self.started and self.in_warmup: + status = "warmup" + color = Colors.PROGRESS + elif self.started and self.in_cooldown: + status = "cooldown" + color = Colors.PROGRESS + elif self.started: + status = "running" + color = Colors.PROGRESS + else: + status = "pending" + color = Colors.INFO + + return f"[{color}]{status.ljust(8)}[/{color}]" + + @property + def formatted_requests_summary(self) -> str: + if not self.started: + return " " + + return ( + f"[{Colors.INFO}]Req:[/{Colors.INFO}] " + + BenchmarkerTaskProgressState.format_progress_display( + value=self.requests_rate, + label="req/s", + total_characters=12, + digits_places=4, + decimal_places=1, + ) + + ", " + + BenchmarkerTaskProgressState.format_progress_display( + value=self.request_latency, + label="Lat", + units="s", + total_characters=12, + digits_places=4, + decimal_places=2, + ) + + ", " + + BenchmarkerTaskProgressState.format_progress_display( + value=self.requests_processing, + label="Conc", + total_characters=12, + digits_places=4, + decimal_places=1, + ) + + ", " + + BenchmarkerTaskProgressState.format_progress_display( + value=self.requests_successful, + label="Comp", + total_characters=12, + digits_places=5, + decimal_places=0, + ) + + ", " + + BenchmarkerTaskProgressState.format_progress_display( + value=self.requests_incomplete, + label="Inc", + total_characters=12, + digits_places=5, + decimal_places=0, + ) + + ", " + + BenchmarkerTaskProgressState.format_progress_display( + value=self.requests_errored, + label="Err", + total_characters=12, + digits_places=5, + decimal_places=0, + ) + ) + + @property + def formatted_scheduler_stats(self) -> str: + if not self.started: + return " " + + return ( + f"[{Colors.INFO}]Sys:[/{Colors.INFO}] " + + BenchmarkerTaskProgressState.format_progress_display( + value=self.worker_overheads_time_ms, + label="Work OH", + units="ms", + total_characters=18, + digits_places=3, + decimal_places=1, + ) + + ", " + + BenchmarkerTaskProgressState.format_progress_display( + value=self.backend_overheads_time_ms, + label="Back OH", + units="ms", + total_characters=18, + digits_places=3, + decimal_places=1, + ) + + ", " + + BenchmarkerTaskProgressState.format_progress_display( + value=self.requests_sleep_time_ms, + label="Req Sleep", + units="ms", + total_characters=18, + digits_places=5, + decimal_places=0, + ) + + ", " + + BenchmarkerTaskProgressState.format_progress_display( + value=self.requests_targeted_start_time_delay_ms, + label="Start Del", + units="ms", + total_characters=18, + digits_places=5, + decimal_places=0, + ) + ) + + @staticmethod + def format_progress_display( + value: float, + label: str, + units: str = "", + total_characters: Optional[int] = None, + digits_places: Optional[int] = None, + decimal_places: Optional[int] = None, + ) -> str: + if decimal_places is None and digits_places is None: + formatted_number = f"{value}:.0f" + elif digits_places is None: + formatted_number = f"{value:.{decimal_places}f}" + elif decimal_places is None: + formatted_number = f"{value:>{digits_places}f}" + else: + formatted_number = f"{value:>{digits_places}.{decimal_places}f}" + + result = f"{formatted_number}{units} [{Colors.INFO}]{label}[/{Colors.INFO}]" + + if total_characters is not None: + total_characters += len(Colors.INFO) * 2 + 5 + + if len(result) < total_characters: + result = result.rjust(total_characters) + + return result + + +class GenerativeTextBenchmarkerTaskProgressState(BenchmarkerTaskProgressState): + output_tokens: float = 0 + prompt_tokens: float = 0 + output_tokens_rate: float = 0 + total_tokens_rate: float = 0 + tokens_ttft: float = 0 + tokens_itl: float = 0 + + @property + def fields(self) -> Dict[str, str]: + fields = super().fields + fields["tokens_summary"] = self.formatted_tokens_summary + return fields + + @property + def formatted_tokens_summary(self) -> str: + if not self.started: + return " " + + return ( + f"[{Colors.INFO}]Tok:[/{Colors.INFO}] " + + BenchmarkerTaskProgressState.format_progress_display( + value=self.output_tokens_rate, + label="gen/s", + total_characters=12, + digits_places=4, + decimal_places=1, + ) + + ", " + + BenchmarkerTaskProgressState.format_progress_display( + value=self.total_tokens_rate, + label="tot/s", + total_characters=12, + digits_places=4, + decimal_places=1, + ) + + ", " + + BenchmarkerTaskProgressState.format_progress_display( + value=self.tokens_ttft, + label="TTFT", + units="ms", + total_characters=12, + digits_places=3, + decimal_places=1, + ) + + ", " + + BenchmarkerTaskProgressState.format_progress_display( + value=self.tokens_itl, + label="ITL", + units="ms", + total_characters=12, + digits_places=3, + decimal_places=1, + ) + + ", " + + BenchmarkerTaskProgressState.format_progress_display( + value=self.prompt_tokens, + label="Prompt", + total_characters=12, + digits_places=4, + decimal_places=0, + ) + + ", " + + BenchmarkerTaskProgressState.format_progress_display( + value=self.output_tokens, + label="Gen", + total_characters=12, + digits_places=4, + decimal_places=0, + ) + ) + + +BTPS = TypeVar("BTPS", bound=BenchmarkerTaskProgressState) + + +class BenchmarkerProgressDisplay(Generic[BTPS]): + def __init__(self, display_scheduler_stats: bool): + self.display_scheduler_stats = display_scheduler_stats + self.started = False + self.benchmarker_tasks_progress = Progress(*self.create_task_progress_columns()) + self.benchmarker_tasks_panel = Panel( + self.benchmarker_tasks_progress, + title="Benchmarks", + title_align="left", + expand=True, + ) + self.benchmarker_progress = Progress( + TextColumn("Generating...", style=f"italic {Colors.PROGRESS}"), + BarColumn( + bar_width=None, + complete_style=Colors.PROGRESS, + finished_style=Colors.SUCCESS, + ), + TextColumn( + "({task.fields[completed_benchmarks]}/{task.fields[total_benchmarks]})", + style=Colors.PROGRESS, + ), + TextColumn("["), + TimeElapsedColumn(), + TextColumn("<"), + TimeRemainingColumn(), + TextColumn("]"), + ) + self.benchmarker_live = Live( + Group( + self.benchmarker_tasks_panel, + self.benchmarker_progress, + ), + redirect_stdout=True, + redirect_stderr=True, + ) + self.active_task: Optional[TaskID] = None + self.benchmarker_tasks: List[BTPS] = [] + self.progress_task: Optional[TaskID] = None + + def update(self, result: BenchmarkerResult): + if result.type_ == "run_start": + if self.started: + raise RuntimeError("Progress display already started.") + + self.handle_start(result) + self.started = True + elif result.type_ == "run_complete": + if not self.started: + raise RuntimeError("Progress display not started.") + + self.handle_end(result) + self.started = False + else: + if not self.started: + raise RuntimeError("Progress display not started.") + + self.handle_update(result) + + def handle_start(self, result: BenchmarkerResult): + self.benchmarker_live.start() + + for index, strategy_type in enumerate(result.profile.strategy_types): + task_id = self.benchmarker_tasks_progress.add_task( + description=strategy_type, + start=False, + total=None, + completed=0, + visible=False, + ) + task_progress_state = self.create_task_progress_state( + task_id=task_id, + index=index, + strategy_type=strategy_type, + result=result, + ) + self.benchmarker_tasks.append(task_progress_state) + self.benchmarker_tasks_progress.update( + task_id, + description=task_progress_state.description, + visible=True, + **task_progress_state.fields, # type: ignore[arg-type] + ) + + self.progress_task = self.benchmarker_progress.add_task( + "", + total=len(self.benchmarker_tasks) * 1000, + completed_benchmarks=0, + total_benchmarks=len(self.benchmarker_tasks), + ) + + def handle_update(self, result: BenchmarkerResult): + current_state: BTPS = self.benchmarker_tasks[result.current_index] + + if result.type_ == "scheduler_start": + self.handle_update_scheduler_start(current_state, result) + self.active_task = current_state.task_id + elif result.type_ == "scheduler_update": + self.handle_update_scheduler_update(current_state, result) + elif result.type_ == "scheduler_complete": + self.handle_update_scheduler_complete(current_state, result) + elif result.type_ == "benchmark_compiled": + self.handle_update_benchmark_compiled(current_state, result) + else: + raise ValueError(f"Unknown result type: {result.type_}") + + if self.progress_task is None: + raise RuntimeError("Progress task not set.") + + self.benchmarker_tasks_progress.update( + current_state.task_id, + description=current_state.description, + completed=current_state.completed, + total=current_state.total, + **current_state.fields, # type: ignore[arg-type] + ) + self.benchmarker_progress.update( + self.progress_task, + completed=(result.current_index * 1000) + current_state.completed, + total=1000 * len(self.benchmarker_tasks), + completed_benchmarks=( + result.current_index + (1 if current_state.ended else 0) + ), + total_benchmarks=len(self.benchmarker_tasks), + ) + + if current_state.ended: + self.benchmarker_tasks_progress.stop_task(current_state.task_id) + self.active_task = None + + def handle_update_scheduler_start( + self, progress_state: BTPS, result: BenchmarkerResult + ): + if self.active_task is not None: + raise RuntimeError("Active task already set.") + + progress_state.strategy = result.current_strategy # type: ignore[assignment] + progress_state.started = True + current_aggregator: BenchmarkAggregator = result.current_aggregator # type: ignore[assignment] + progress_state.start_time = ( + current_aggregator.requests_stats.totals.total.start_time + ) + progress_state.max_number = current_aggregator.args.max_number + progress_state.max_duration = current_aggregator.args.max_duration + + def handle_update_scheduler_update( + self, progress_state: BTPS, result: BenchmarkerResult + ): + if self.active_task is None: + raise RuntimeError("Active task not set.") + + if self.active_task != progress_state.task_id: + raise RuntimeError("Active task does not match current task.") + + current_aggregator: BenchmarkAggregator = result.current_aggregator # type: ignore[assignment] + progress_state.in_warmup = current_aggregator.in_warmup + progress_state.in_cooldown = current_aggregator.in_cooldown + progress_state.requests_rate = ( + current_aggregator.requests_stats.totals.successful.rate + ) + progress_state.request_latency = ( + current_aggregator.requests_stats.request_time.mean + ) + progress_state.requests_processing = ( + current_aggregator.scheduler_stats.processing_requests.last + ) + progress_state.requests_successful = ( + current_aggregator.requests_stats.totals.successful.total + ) + progress_state.requests_incomplete = ( + current_aggregator.requests_stats.totals.incomplete.total + ) + progress_state.requests_errored = ( + current_aggregator.requests_stats.totals.errored.total + ) + progress_state.worker_overheads_time_ms = ( + current_aggregator.requests_stats.scheduled_time_delay.mean_ms + + current_aggregator.requests_stats.worker_start_delay.mean_ms + ) + progress_state.backend_overheads_time_ms = ( + current_aggregator.requests_stats.request_time_delay.mean_ms + ) + progress_state.requests_sleep_time_ms = ( + current_aggregator.requests_stats.scheduled_time_sleep.mean_ms + ) + progress_state.requests_targeted_start_time_delay_ms = ( + current_aggregator.requests_stats.request_start_time_targeted_delay.mean_ms + ) + + def handle_update_scheduler_complete( + self, + progress_state: BTPS, + result: BenchmarkerResult, # noqa: ARG002 + ): + if self.active_task is None: + raise RuntimeError("Active task not set.") + + if self.active_task != progress_state.task_id: + raise RuntimeError("Active task does not match current task.") + + progress_state.in_warmup = False + progress_state.in_cooldown = False + progress_state.compiling = True + + def handle_update_benchmark_compiled( + self, progress_state: BTPS, result: BenchmarkerResult + ): + if self.active_task is None: + raise RuntimeError("Active task not set.") + + if self.active_task != progress_state.task_id: + raise RuntimeError("Active task does not match current task.") + + current_benchmark: Benchmark = result.current_benchmark # type: ignore[assignment] + progress_state.compiling = False + progress_state.ended = True + progress_state.requests_rate = ( + current_benchmark.metrics.requests_per_second.successful.mean + ) + progress_state.requests_processing = ( + current_benchmark.metrics.request_concurrency.successful.mean + ) + + def handle_end(self, result: BenchmarkerResult): # noqa: ARG002 + if self.progress_task is None: + raise RuntimeError("Progress task not set.") + + self.benchmarker_progress.update( + self.progress_task, + completed=len(self.benchmarker_tasks) * 1000, + total=len(self.benchmarker_tasks) * 1000, + completed_benchmarks=len(self.benchmarker_tasks), + total_benchmarks=len(self.benchmarker_tasks), + ) + self.benchmarker_progress.stop_task(self.progress_task) + self.benchmarker_live.stop() + self.active_task = None + self.benchmarker_tasks = [] + self.progress_task = None + + def create_task_progress_columns(self) -> List[ProgressColumn]: + columns = [ + TextColumn("[{task.fields[start_time]}]"), + SpinnerColumn(style=Colors.PROGRESS), + TaskProgressColumn(style=Colors.PROGRESS), + TextColumn("{task.description}"), + TextColumn("({task.fields[progress_status]})"), + TextColumn(" "), + ] + + if not self.display_scheduler_stats: + columns += [ + TextColumn("{task.fields[requests_summary]}\n"), + ] + else: + columns += [ + TextColumn( + "{task.fields[requests_summary]}\n{task.fields[scheduler_stats]}\n" + ), + ] + + return columns + + def create_task_progress_state( + self, + task_id: TaskID, + index: int, # noqa: ARG002 + strategy_type: StrategyType, + result: BenchmarkerResult, # noqa: ARG002 + ) -> BTPS: + return BenchmarkerTaskProgressState( # type: ignore[return-value] + display_scheduler_stats=self.display_scheduler_stats, + task_id=task_id, + strategy=strategy_type, + ) + + +class GenerativeTextBenchmarkerProgressDisplay( + BenchmarkerProgressDisplay[GenerativeTextBenchmarkerTaskProgressState] +): + def handle_update_scheduler_update( + self, + progress_state: GenerativeTextBenchmarkerTaskProgressState, + result: BenchmarkerResult, + ): + super().handle_update_scheduler_update(progress_state, result) + current_aggregator: GenerativeBenchmarkAggregator = result.current_aggregator # type: ignore[assignment] + progress_state.output_tokens = ( + current_aggregator.requests_stats.output_tokens.mean + ) + progress_state.prompt_tokens = ( + current_aggregator.requests_stats.prompt_tokens.mean + ) + progress_state.output_tokens_rate = ( + current_aggregator.requests_stats.output_tokens.rate + ) + progress_state.total_tokens_rate = ( + current_aggregator.requests_stats.total_tokens.rate + ) + progress_state.tokens_ttft = ( + current_aggregator.requests_stats.time_to_first_token.mean_ms + ) + progress_state.tokens_itl = ( + current_aggregator.requests_stats.inter_token_latency.mean_ms + ) + + def handle_update_benchmark_compiled( + self, + progress_state: GenerativeTextBenchmarkerTaskProgressState, + result: BenchmarkerResult, + ): + super().handle_update_benchmark_compiled(progress_state, result) + + current_benchmark: GenerativeBenchmark = result.current_benchmark # type: ignore[assignment] + progress_state.request_latency = ( + current_benchmark.metrics.request_latency.successful.mean + ) + progress_state.requests_successful = current_benchmark.request_totals.successful + progress_state.requests_errored = current_benchmark.request_totals.errored + progress_state.requests_incomplete = current_benchmark.request_totals.incomplete + progress_state.output_tokens = ( + current_benchmark.metrics.output_token_count.successful.mean + ) + progress_state.prompt_tokens = ( + current_benchmark.metrics.prompt_token_count.successful.mean + ) + progress_state.output_tokens_rate = ( + current_benchmark.metrics.output_tokens_per_second.successful.mean + ) + progress_state.total_tokens_rate = ( + current_benchmark.metrics.tokens_per_second.successful.mean + ) + progress_state.tokens_ttft = ( + current_benchmark.metrics.time_to_first_token_ms.successful.mean + ) + progress_state.tokens_itl = ( + current_benchmark.metrics.inter_token_latency_ms.successful.mean + ) + + def create_task_progress_state( + self, + task_id: TaskID, + index: int, # noqa: ARG002 + strategy_type: StrategyType, + result: BenchmarkerResult, # noqa: ARG002 + ) -> GenerativeTextBenchmarkerTaskProgressState: + return GenerativeTextBenchmarkerTaskProgressState( + display_scheduler_stats=self.display_scheduler_stats, + task_id=task_id, + strategy=strategy_type, + ) + + def create_task_progress_columns(self) -> List[ProgressColumn]: + columns = super().create_task_progress_columns() + columns = columns[:-1] # remove the last display info column + + if not self.display_scheduler_stats: + columns += [ + TextColumn( + "{task.fields[requests_summary]}\n{task.fields[tokens_summary]}", + ), + ] + else: + columns += [ + TextColumn( + "{task.fields[requests_summary]}\n{task.fields[tokens_summary]}\n{task.fields[scheduler_stats]}", + ), + ] + + return columns diff --git a/src/guidellm/config.py b/src/guidellm/config.py index 2d4e102a..ece9d63f 100644 --- a/src/guidellm/config.py +++ b/src/guidellm/config.py @@ -7,7 +7,6 @@ __all__ = [ "DatasetSettings", - "EmulatedDataSettings", "Environment", "LoggingSettings", "OpenAISettings", @@ -74,24 +73,6 @@ class DatasetSettings(BaseModel): ) -class EmulatedDataSettings(BaseModel): - """ - Emulated data settings for the application to use - """ - - source: str = "https://www.gutenberg.org/files/1342/1342-0.txt" - filter_start: str = "It is a truth universally acknowledged, that a" - filter_end: str = "CHISWICK PRESS:--CHARLES WHITTINGHAM AND CO." - clean_text_args: Dict[str, bool] = Field( - default_factory=lambda: { - "fix_encoding": True, - "clean_whitespace": True, - "remove_empty_lines": True, - "force_new_line_punctuation": True, - } - ) - - class OpenAISettings(BaseModel): """ OpenAI settings for the application to connect to the API @@ -139,19 +120,29 @@ class Settings(BaseSettings): # general settings env: Environment = Environment.PROD + default_async_loop_sleep: float = 10e-5 + logging: LoggingSettings = LoggingSettings() + default_sweep_number: int = 10 + + # HTTP settings request_timeout: int = 60 * 5 # 5 minutes request_http2: bool = True + + # Scheduler settings max_concurrency: int = 512 - num_sweep_profiles: int = 9 - logging: LoggingSettings = LoggingSettings() + max_worker_processes: int = 10 + max_add_requests_per_loop: int = 20 # Data settings dataset: DatasetSettings = DatasetSettings() - emulated_data: EmulatedDataSettings = EmulatedDataSettings() # Request/stats settings - preferred_prompt_tokens_source: Optional[Literal["backend", "local"]] = None - preferred_output_tokens_source: Optional[Literal["backend", "local"]] = None + preferred_prompt_tokens_source: Optional[ + Literal["request", "response", "local"] + ] = None + preferred_output_tokens_source: Optional[ + Literal["request", "response", "local"] + ] = None preferred_backend: Literal["openai"] = "openai" openai: OpenAISettings = OpenAISettings() diff --git a/src/guidellm/core/__init__.py b/src/guidellm/core/__init__.py deleted file mode 100644 index e738aa76..00000000 --- a/src/guidellm/core/__init__.py +++ /dev/null @@ -1,24 +0,0 @@ -from .distribution import Distribution -from .report import GuidanceReport -from .request import TextGenerationRequest -from .result import ( - RequestConcurrencyMeasurement, - TextGenerationBenchmark, - TextGenerationBenchmarkReport, - TextGenerationError, - TextGenerationResult, -) -from .serializable import Serializable, SerializableFileType - -__all__ = [ - "Distribution", - "GuidanceReport", - "RequestConcurrencyMeasurement", - "Serializable", - "SerializableFileType", - "TextGenerationBenchmark", - "TextGenerationBenchmarkReport", - "TextGenerationError", - "TextGenerationRequest", - "TextGenerationResult", -] diff --git a/src/guidellm/core/distribution.py b/src/guidellm/core/distribution.py deleted file mode 100644 index 749d6818..00000000 --- a/src/guidellm/core/distribution.py +++ /dev/null @@ -1,190 +0,0 @@ -from typing import List, Sequence, Union - -import numpy as np -from loguru import logger -from pydantic import Field - -from guidellm.core.serializable import Serializable - -__all__ = ["Distribution"] - - -class Distribution(Serializable): - """ - A class to represent a statistical distribution and perform various - statistical analyses. - """ - - data: Sequence[float] = Field( - default_factory=list, - description="The data points of the distribution.", - ) - - def __str__(self): - return f"Distribution({self.describe()})" - - def __len__(self): - return len(self.data) - - @property - def mean(self) -> float: - """ - Calculate and return the mean of the distribution. - :return: The mean of the distribution. - """ - if not self.data: - logger.info("No data points available to calculate mean.") - return 0.0 - - mean_value = np.mean(self.data).item() - logger.debug(f"Calculated mean: {mean_value}") - return mean_value - - @property - def median(self) -> float: - """ - Calculate and return the median of the distribution. - :return: The median of the distribution. - """ - if not self.data: - logger.info("No data points available to calculate median.") - return 0.0 - - median_value = np.median(self.data).item() - logger.debug(f"Calculated median: {median_value}") - return median_value - - @property - def variance(self) -> float: - """ - Calculate and return the variance of the distribution. - :return: The variance of the distribution. - """ - if not self.data: - logger.info("No data points available to calculate variance.") - return 0.0 - - variance_value = np.var(self.data).item() - logger.debug(f"Calculated variance: {variance_value}") - return variance_value - - @property - def std_deviation(self) -> float: - """ - Calculate and return the standard deviation of the distribution. - :return: The standard deviation of the distribution. - """ - if not self.data: - logger.info("No data points available to calculate standard deviation.") - return 0.0 - - std_deviation_value = np.std(self.data).item() - logger.debug(f"Calculated standard deviation: {std_deviation_value}") - return std_deviation_value - - def percentile(self, percentile: float) -> float: - """ - Calculate and return the specified percentile of the distribution. - :param percentile: The desired percentile to calculate (0-100). - :return: The specified percentile of the distribution. - """ - if not self.data: - logger.info("No data points available to calculate percentile.") - return 0.0 - - percentile_value = np.percentile(self.data, percentile).item() - logger.debug(f"Calculated {percentile}th percentile: {percentile_value}") - return percentile_value - - def percentiles(self, percentiles: Union[List[int], List[float]]) -> List[float]: - """ - Calculate and return the specified percentiles of the distribution. - :param percentiles: A list of desired percentiles to calculate (0-100). - :return: A list of the specified percentiles of the distribution. - """ - if not self.data: - logger.info("No data points available to calculate percentiles.") - return [0.0] * len(percentiles) - - percentiles_values: List[float] = np.percentile(self.data, percentiles).tolist() # type: ignore # noqa: PGH003 - logger.debug(f"Calculated percentiles {percentiles}: {percentiles_values}") - return percentiles_values - - @property - def min(self) -> float: - """ - Return the minimum value of the distribution. - :return: The minimum value of the distribution. - """ - if not self.data: - logger.info("No data points available to calculate minimum.") - return 0.0 - - min_value: float = np.min(self.data).item() # type: ignore # noqa: PGH003 - logger.debug(f"Calculated min: {min_value}") - return min_value - - @property - def max(self) -> float: - """ - Return the maximum value of the distribution. - :return: The maximum value of the distribution. - """ - if not self.data: - logger.info("No data points available to calculate maximum.") - return 0.0 - - max_value: float = np.max(self.data).item() # type: ignore # noqa: PGH003 - logger.debug(f"Calculated max: {max_value}") - return max_value - - @property - def range(self) -> float: - """ - Calculate and return the range of the distribution (max - min). - :return: The range of the distribution. - """ - if not self.data: - logger.info("No data points available to calculate range.") - return 0.0 - - range_value = self.max - self.min - logger.debug(f"Calculated range: {range_value}") - return range_value - - def describe(self) -> dict: - """ - Return a dictionary describing various statistics of the distribution. - :return: A dictionary with statistical summaries of the distribution. - """ - description = { - "mean": self.mean, - "median": self.median, - "variance": self.variance, - "std_deviation": self.std_deviation, - "percentile_indices": [10, 20, 30, 40, 50, 60, 70, 80, 90, 95, 99], - "percentile_values": self.percentiles( - [10, 20, 30, 40, 50, 60, 70, 80, 90, 95, 99], - ), - "min": self.min, - "max": self.max, - "range": self.range, - } - logger.debug(f"Generated description: {description}") - return description - - def add_data(self, new_data: Sequence[float]): - """ - Add new data points to the distribution. - :param new_data: A list of new numerical data points to add. - """ - self.data = list(self.data) + list(new_data) - logger.debug(f"Added new data: {new_data}") - - def remove_data(self, remove_data: Sequence[float]): - """ - Remove specified data points from the distribution. - :param remove_data: A list of numerical data points to remove. - """ - self.data = [item for item in self.data if item not in remove_data] - logger.debug(f"Removed data: {remove_data}") diff --git a/src/guidellm/core/report.py b/src/guidellm/core/report.py deleted file mode 100644 index 584fe63c..00000000 --- a/src/guidellm/core/report.py +++ /dev/null @@ -1,311 +0,0 @@ -import time -from datetime import datetime -from typing import List, Optional - -from loguru import logger -from pydantic import Field -from rich.console import Console, Group -from rich.live import Live -from rich.panel import Panel -from rich.table import Table - -from guidellm.core.result import TextGenerationBenchmark, TextGenerationBenchmarkReport -from guidellm.core.serializable import Serializable - -__all__ = ["GuidanceReport"] - - -def _create_benchmark_report_details(report: TextGenerationBenchmarkReport) -> str: - """ - Create a detailed string representation of a benchmark report. - - :param report: The benchmark report to generate details for. - :type report: TextGenerationBenchmarkReport - :return: A string containing the backend, data, rate, and limits of - the benchmark report. - :rtype: str - """ - backend = ( - f"Backend(type={report.args.get('backend_type', 'N/A')}, " - f"target={report.args.get('target', 'N/A')}, " - f"model={report.args.get('model', 'N/A')})" - ) - data = ( - f"Data(type={report.args.get('data_type', 'N/A')}, " - f"source={report.args.get('data', 'N/A')}, " - f"tokenizer={report.args.get('tokenizer', 'N/A')})" - ) - rate = ( - f"Rate(type={report.args.get('mode', 'N/A')}, " - f"rate={report.args.get('rate', 'N/A')})" - ) - limits = ( - f"Limits(max_number={report.args.get('max_number', 'N/A')} requests, " - f"max_duration={report.args.get('max_duration', 'N/A')} sec)" - ) - - logger.debug( - "Created benchmark report details for backend={}, data={}, rate={}, limits={}", - backend, - data, - rate, - limits, - ) - - return backend + "\n" + data + "\n" + rate + "\n" + limits + "\n" - - -def _benchmark_rate_id(benchmark: TextGenerationBenchmark) -> str: - """ - Generate a string identifier for a benchmark rate. - - :param benchmark: The benchmark for which to generate the rate ID. - :type benchmark: TextGenerationBenchmark - :return: A string representing the benchmark rate ID. - :rtype: str - """ - rate_id = ( - f"{benchmark.mode}@{benchmark.rate:.2f} req/sec" - if benchmark.rate - else f"{benchmark.mode}" - ) - logger.debug("Generated benchmark rate ID: {}", rate_id) - return rate_id - - -def _create_benchmark_report_requests_summary( - report: TextGenerationBenchmarkReport, -) -> Table: - """ - Create a table summarizing the requests of a benchmark report. - - :param report: The benchmark report to summarize. - :type report: TextGenerationBenchmarkReport - :return: A rich Table object summarizing the requests. - :rtype: Table - """ - table = Table( - "Benchmark", - "Requests Completed", - "Request Failed", - "Duration", - "Start Time", - "End Time", - title="[magenta]Requests Data by Benchmark[/magenta]", - title_style="bold", - title_justify="left", - show_header=True, - ) - - for benchmark in report.benchmarks_sorted: - start_time_str = ( - datetime.fromtimestamp(benchmark.start_time).strftime("%H:%M:%S") - if benchmark.start_time - else "N/A" - ) - end_time_str = ( - datetime.fromtimestamp(benchmark.end_time).strftime("%H:%M:%S") - if benchmark.end_time - else "N/A" - ) - - table.add_row( - _benchmark_rate_id(benchmark), - f"{benchmark.request_count}/{benchmark.total_count}", - f"{benchmark.error_count}/{benchmark.total_count}", - f"{benchmark.duration:.2f} sec", - f"{start_time_str}", - f"{end_time_str}", - ) - logger.debug("Created requests summary table for the report.") - return table - - -def _create_benchmark_report_data_tokens_summary( - report: TextGenerationBenchmarkReport, -) -> Table: - """ - Create a table summarizing data tokens of a benchmark report. - - :param report: The benchmark report to summarize. - :type report: TextGenerationBenchmarkReport - :return: A rich Table object summarizing the data tokens. - :rtype: Table - """ - table = Table( - "Benchmark", - "Prompt", - "Prompt (1%, 5%, 10%, 50%, 90%, 95%, 99%)", - "Output", - "Output (1%, 5%, 10%, 50%, 90%, 95%, 99%)", - title="[magenta]Tokens Data by Benchmark[/magenta]", - title_style="bold", - title_justify="left", - show_header=True, - ) - - for benchmark in report.benchmarks_sorted: - table.add_row( - _benchmark_rate_id(benchmark), - f"{benchmark.prompt_token:.2f}", - ", ".join( - f"{percentile:.1f}" - for percentile in benchmark.prompt_token_percentiles.values() - ), - f"{benchmark.output_token:.2f}", - ", ".join( - f"{percentile:.1f}" - for percentile in benchmark.output_token_percentiles.values() - ), - ) - logger.debug("Created data tokens summary table for the report.") - return table - - -def _create_benchmark_report_dist_perf_summary( - report: TextGenerationBenchmarkReport, -) -> Table: - """ - Create a table summarizing distribution performance of a benchmark report. - - :param report: The benchmark report to summarize. - :type report: TextGenerationBenchmarkReport - :return: A rich Table object summarizing the performance statistics. - :rtype: Table - """ - table = Table( - "Benchmark", - "Request Latency [1%, 5%, 10%, 50%, 90%, 95%, 99%] (sec)", - "Time to First Token [1%, 5%, 10%, 50%, 90%, 95%, 99%] (ms)", - "Inter Token Latency [1%, 5%, 10%, 50%, 90%, 95%, 99%] (ms)", - title="[magenta]Performance Stats by Benchmark[/magenta]", - title_style="bold", - title_justify="left", - show_header=True, - ) - - for benchmark in report.benchmarks_sorted: - table.add_row( - _benchmark_rate_id(benchmark), - ", ".join( - f"{percentile:.2f}" - for percentile in benchmark.request_latency_percentiles.values() - ), - ", ".join( - f"{percentile:.1f}" - for percentile in benchmark.time_to_first_token_percentiles.values() - ), - ", ".join( - f"{percentile:.1f}" - for percentile in benchmark.inter_token_latency_percentiles.values() - ), - ) - logger.debug("Created distribution performance summary table for the report.") - return table - - -def _create_benchmark_report_summary(report: TextGenerationBenchmarkReport) -> Table: - """ - Create a summary table for a benchmark report. - - :param report: The benchmark report to summarize. - :type report: TextGenerationBenchmarkReport - :return: A rich Table object summarizing overall performance. - :rtype: Table - """ - table = Table( - "Benchmark", - "Requests per Second", - "Request Latency", - "Time to First Token", - "Inter Token Latency", - "Output Token Throughput", - title="[magenta]Performance Summary by Benchmark[/magenta]", - title_style="bold", - title_justify="left", - show_header=True, - ) - - for benchmark in report.benchmarks_sorted: - table.add_row( - _benchmark_rate_id(benchmark), - f"{benchmark.completed_request_rate:.2f} req/sec", - f"{benchmark.request_latency:.2f} sec", - f"{benchmark.time_to_first_token:.2f} ms", - f"{benchmark.inter_token_latency:.2f} ms", - f"{benchmark.output_token_throughput:.2f} tokens/sec", - ) - logger.debug("Created overall performance summary table for the report.") - return table - - -class GuidanceReport(Serializable): - """ - A class to manage the guidance reports that include the benchmarking details, - potentially across multiple runs, for saving and loading from disk. - - :param benchmarks: The list of benchmarking reports. - :type benchmarks: List[TextGenerationBenchmarkReport] - """ - - benchmarks: List[TextGenerationBenchmarkReport] = Field( - default_factory=list, description="The list of benchmark reports." - ) - - def print( - self, save_path: Optional[str] = None, continual_refresh: bool = False - ) -> None: - """ - Print the guidance report to the console. - - :param save_path: Optional path to save the report to disk. - :type save_path: Optional[str] - :param continual_refresh: Whether to continually refresh the report. - :type continual_refresh: bool - :return: None - """ - logger.info("Printing guidance report to console with save_path={}", save_path) - report_viz = Panel( - Group( - *[ - Panel( - Group( - _create_benchmark_report_details(benchmark), - "", - _create_benchmark_report_requests_summary(benchmark), - "", - _create_benchmark_report_data_tokens_summary(benchmark), - "", - _create_benchmark_report_dist_perf_summary(benchmark), - "", - _create_benchmark_report_summary(benchmark), - ), - title=( - f"[bold magenta]Benchmark Report " - f"{index + 1}[/bold magenta]" - ), - expand=True, - title_align="left", - ) - for index, benchmark in enumerate(self.benchmarks) - ], - ), - title=( - "[bold cyan]GuideLLM Benchmarks Report[/bold cyan] [italic]" - f"({save_path})[/italic]" - ), - expand=True, - title_align="left", - ) - console = Console() - - if continual_refresh: - logger.info("Starting live report with continual refresh.") - with Live(report_viz, refresh_per_second=1, console=console) as live: - while True: - live.update(report_viz) - time.sleep(1) - else: - console.print(report_viz) - - logger.info("Guidance report printing completed.") diff --git a/src/guidellm/core/request.py b/src/guidellm/core/request.py deleted file mode 100644 index 547ac60a..00000000 --- a/src/guidellm/core/request.py +++ /dev/null @@ -1,49 +0,0 @@ -import uuid -from typing import Any, Dict, Literal, Optional - -from pydantic import Field - -from guidellm.core.serializable import Serializable - - -class TextGenerationRequest(Serializable): - """ - A class to represent a text generation request for generative AI workloads. - """ - - id: str = Field( - default_factory=lambda: str(uuid.uuid4()), - description="The unique identifier for the request.", - ) - type_: Literal["text", "chat"] = Field( - default="text", - description="The type of text generation request (e.g., text, chat).", - ) - prompt: str = Field(description="The input prompt for the text generation.") - prompt_token_count: Optional[int] = Field( - default=None, - description="The number of tokens in the input prompt.", - ) - output_token_count: Optional[int] = Field( - default=None, - description="The number of tokens to generate.", - ) - params: Dict[str, Any] = Field( - default_factory=dict, - description="The parameters for the text generation request.", - ) - - def __str__(self) -> str: - prompt_short = ( - self.prompt[:32] + "..." - if self.prompt and len(self.prompt) > 32 # noqa: PLR2004 - else self.prompt - ) - - return ( - f"TextGenerationRequest(id={self.id}, " - f"type_={self.type_}" - f"prompt={prompt_short}, prompt_token_count={self.prompt_token_count}, " - f"output_token_count={self.output_token_count}, " - f"params={self.params})" - ) diff --git a/src/guidellm/core/result.py b/src/guidellm/core/result.py deleted file mode 100644 index 2670c105..00000000 --- a/src/guidellm/core/result.py +++ /dev/null @@ -1,585 +0,0 @@ -from time import time -from typing import Any, Dict, List, Literal, Optional, Union - -from loguru import logger -from pydantic import Field, computed_field - -from guidellm.core.distribution import Distribution -from guidellm.core.request import TextGenerationRequest -from guidellm.core.serializable import Serializable - -__all__ = [ - "RequestConcurrencyMeasurement", - "TextGenerationBenchmark", - "TextGenerationBenchmarkReport", - "TextGenerationError", - "TextGenerationResult", -] - - -DEFAULT_PERCENTILES = [1, 5, 10, 50, 90, 95, 99] - - -class TextGenerationResult(Serializable): - """ - A class to represent the result of a text generation request - for generative AI workloads. - """ - - request: TextGenerationRequest = Field( - description="The text generation request used to generate the result.", - ) - prompt_token_count: Optional[int] = Field( - default=None, - description="The number of tokens in the input prompt.", - ) - output: str = Field( - default_factory=str, - description="The generated output for the text generation.", - ) - output_token_count: Optional[int] = Field( - default=None, - description="The number of tokens in the output.", - ) - start_time: Optional[float] = Field( - default=None, - description="The absolute start time, in seconds, of the text generation.", - ) - end_time: Optional[float] = Field( - default=None, - description="The absolute end time, in seconds, of the text generation.", - ) - first_token_time: Optional[float] = Field( - default=None, - description="The absolute time, in seconds, the first token was received.", - ) - last_token_time: Optional[float] = Field( - default=None, - description="The absolute time, in seconds, the last token was received.", - ) - - @computed_field # type: ignore[misc] - @property - def request_latency(self) -> Optional[float]: - """ - Get the request latency in seconds. - - :return: The request latency in seconds. - """ - if not self.end_time or not self.start_time: - return None - - return self.end_time - self.start_time - - @computed_field # type: ignore[misc] - @property - def time_to_first_token(self) -> Optional[float]: - """ - Get the time taken to decode the first token in milliseconds. - - :return: The time taken to decode the first token in milliseconds. - """ - if not self.first_token_time or not self.start_time: - return None - - return 1000 * (self.first_token_time - self.start_time) - - @computed_field # type: ignore[misc] - @property - def inter_token_latency(self) -> Optional[float]: - """ - Get the average time between tokens in milliseconds. - - :return: The average time between tokens. - """ - if ( - not self.last_token_time - or not self.first_token_time - or not self.output_token_count - or self.output_token_count < 2 # noqa: PLR2004 - ): - return None - - return ( - 1000 - * (self.last_token_time - self.first_token_time) - / (self.output_token_count - 1) # ignore first token - ) - - @computed_field # type: ignore[misc] - @property - def output_tokens_per_second(self) -> Optional[float]: - """ - Get the average token throughput in tokens per second for the entire request. - Note, does not account for the time taken to decode the first token. - - :return: The average token throughput. - """ - itl = self.inter_token_latency - - if itl is None: - return None - - return 1000.0 / itl - - -class TextGenerationError(Serializable): - """ - A class to represent an error that occurred during a text generation request - for generative AI workloads. - """ - - request: TextGenerationRequest = Field( - description="The text generation request that resulted in an error.", - ) - message: str = Field( - description="The error message that occurred during text generation.", - ) - - -class RequestConcurrencyMeasurement(Serializable): - """ - A dataclass to represent the concurrency measurement of a request. - """ - - time: float = Field(description="The time of the measurement.") - completed: int = Field(description="The number of completed requests.") - errored: int = Field(description="The number of errored requests.") - processing: int = Field(description="The number of processing requests.") - - -class TextGenerationBenchmark(Serializable): - """ - A class to represent a report of text generation requests - (results and errors) for generative AI workloads. - This is a set of results and errors for a specific mode and rate. - """ - - mode: Literal["asynchronous", "synchronous", "throughput"] = Field( - description="The generation mode, one of 'async', 'sync', or 'throughput'." - ) - rate: Optional[float] = Field( - default=None, - description="The requested rate of requests per second.", - ) - results: List[TextGenerationResult] = Field( - default_factory=list, - description="The results of the text generation requests.", - ) - errors: List[TextGenerationError] = Field( - default_factory=list, - description="The errors of the text generation requests.", - ) - concurrencies: List[RequestConcurrencyMeasurement] = Field( - default_factory=list, - description="The concurrency measurements of the requests.", - ) - - def __iter__(self): - """ - Provide an iterator interface to iterate over the results. - - :return: An iterator over the results. - """ - return iter(self.results) - - @computed_field # type: ignore[misc] - @property - def request_count(self) -> int: - """ - Get the number of requests in the result. - - :return: The number of requests. - """ - return len(self.results) - - @computed_field # type: ignore[misc] - @property - def error_count(self) -> int: - """ - Get the number of errors in the result. - - :return: The number of errors. - """ - return len(self.errors) - - @computed_field # type: ignore[misc] - @property - def total_count(self) -> int: - """ - Get the total number of requests in the result. - - :return: The total number of requests. - """ - return self.request_count + self.error_count - - @computed_field # type: ignore[misc] - @property - def start_time(self) -> Optional[float]: - """ - Get the start time of the first request in the result. - - :return: The start time of the first request. - """ - return self.results[0].start_time if self.results else None - - @computed_field # type: ignore[misc] - @property - def end_time(self) -> Optional[float]: - """ - Get the end time of the last request in the result. - - :return: The end time of the last request. - """ - return self.results[-1].end_time if self.results else None - - @computed_field # type: ignore[misc] - @property - def duration(self) -> float: - """ - Get the duration of the result in seconds. - - :return: The duration of the result. - """ - return ( - self.end_time - self.start_time - if self.end_time and self.start_time - else 0.0 - ) - - @computed_field # type: ignore[misc] - @property - def completed_request_rate(self) -> float: - """ - Get the rate of requests per second in the result. - - :return: The rate of requests per second. - """ - return self.request_count / self.duration if self.duration else 0.0 - - @property - def request_latency_distribution(self) -> Distribution: - """ - Get the distribution of request latencies in seconds. - - :return: The distribution of request latencies. - """ - return Distribution( - data=[ - result.request_latency - for result in self.results - if result.request_latency - ] - ) - - @computed_field # type: ignore[misc] - @property - def request_latency(self) -> float: - """ - Get the average request latency in seconds. - - :return: The average request latency in seconds. - :rtype: float - """ - return self.request_latency_distribution.mean - - @computed_field # type: ignore[misc] - @property - def request_latency_percentiles(self) -> Dict[str, float]: - """ - Get standard percentiles of request latency in seconds. - - :return: A dictionary mapping percentile to request latency in seconds. - """ - if not self.results: - return {} - - values = self.request_latency_distribution.percentiles(DEFAULT_PERCENTILES) - - return dict(zip(map(str, DEFAULT_PERCENTILES), values)) - - @property - def ttft_distribution(self) -> Distribution: - """ - Get the distribution of time taken to decode the first token. - - :return: The distribution of time taken to decode the first token. - """ - return Distribution( - data=[ - result.time_to_first_token - for result in self.results - if result.time_to_first_token - ] - ) - - @computed_field # type: ignore[misc] - @property - def time_to_first_token(self) -> float: - """ - Get the time taken to decode the first token in milliseconds. - - :return: The time taken to decode the first token in milliseconds. - """ - return self.ttft_distribution.mean - - @computed_field # type: ignore[misc] - @property - def time_to_first_token_percentiles(self) -> Dict[str, float]: - """ - Get standard percentiles for time taken to decode the first token - in milliseconds. - - :return: A dictionary mapping percentile to time taken for the first token. - """ - if not self.results: - return {} - - values = self.ttft_distribution.percentiles(DEFAULT_PERCENTILES) - - return dict(zip(map(str, DEFAULT_PERCENTILES), values)) - - @property - def itl_distribution(self) -> Distribution: - """ - Get the distribution of time between tokens in milliseconds. - - :return: The distribution of time between tokens. - """ - return Distribution( - data=[ - result.inter_token_latency - for result in self.results - for _ in range( - result.output_token_count - 1 - if result.output_token_count and result.output_token_count > 1 - else 0 - ) - if (result.inter_token_latency) - ] - ) - - @computed_field # type: ignore[misc] - @property - def inter_token_latency(self) -> float: - """ - Get the average time between tokens in milliseconds. - - :return: The average time between tokens. - """ - return self.itl_distribution.mean - - @computed_field # type: ignore[misc] - @property - def inter_token_latency_percentiles(self) -> Dict[str, float]: - """ - Get standard percentiles for the time between tokens in milliseconds. - - :return: A dictionary mapping percentile to time between tokens. - """ - if not self.results: - return {} - - values = self.itl_distribution.percentiles(DEFAULT_PERCENTILES) - - return dict(zip(map(str, DEFAULT_PERCENTILES), values)) - - @computed_field # type: ignore[misc] - @property - def output_token_throughput(self) -> float: - """ - Get the average token throughput in tokens per second. - - :return: The average token throughput. - """ - output_tokens = sum( - result.output_token_count - for result in self.results - if result.output_token_count and result.output_token_count > 0 - ) - - return output_tokens / self.duration if self.duration else 0.0 - - @property - def prompt_token_distribution(self) -> Distribution: - """ - Get the distribution of prompt token counts. - - :return: The distribution of prompt token counts. - """ - return Distribution( - data=[ - result.prompt_token_count - for result in self.results - if result.prompt_token_count - ] - ) - - @computed_field # type: ignore[misc] - @property - def prompt_token(self) -> float: - """ - Get the average number of prompt tokens. - - :return: The average number of prompt tokens. - """ - return self.prompt_token_distribution.mean - - @computed_field # type: ignore[misc] - @property - def prompt_token_percentiles(self) -> Dict[str, float]: - """ - Get standard percentiles for number of prompt tokens. - - :return: A dictionary mapping percentile to number of prompt tokens. - """ - if not self.results: - return {} - - values = self.prompt_token_distribution.percentiles(DEFAULT_PERCENTILES) - - return dict(zip(map(str, DEFAULT_PERCENTILES), values)) - - @property - def output_token_distribution(self) -> Distribution: - """ - Get the distribution of output token counts. - - :return: The distribution of output token counts. - """ - return Distribution( - data=[ - result.output_token_count - for result in self.results - if result.output_token_count - ] - ) - - @computed_field # type: ignore[misc] - @property - def output_token(self) -> float: - """ - Get the average number of output tokens. - - :return: The average number of output tokens. - """ - return self.output_token_distribution.mean - - @computed_field # type: ignore[misc] - @property - def output_token_percentiles(self) -> Dict[str, float]: - """ - Get standard percentiles for number of output tokens. - - :return: List of percentiles of number of output tokens. - """ - if not self.results: - return {} - - values = self.output_token_distribution.percentiles(DEFAULT_PERCENTILES) - - return dict(zip(map(str, DEFAULT_PERCENTILES), values)) - - def request_started(self): - """ - Record the start of a generation request. - """ - if not self.concurrencies: - self.concurrencies = [ - RequestConcurrencyMeasurement( - time=time(), - completed=0, - errored=0, - processing=1, - ), - ] - else: - last = self.concurrencies[-1] - self.concurrencies.append( - RequestConcurrencyMeasurement( - time=time(), - completed=last.completed, - errored=last.errored, - processing=last.processing + 1, - ), - ) - - logger.info("Text generation request started") - - def request_completed( - self, - result: Union[TextGenerationResult, TextGenerationError], - ): - """ - Record the completion of a text generation request. - - :param result: The completed result or error. - :type result: Union[TextGenerationResult, TextGenerationError] - """ - if not self.concurrencies: - raise ValueError("Request completed without starting") - - if isinstance(result, TextGenerationError): - is_error = True - self.errors.append(result) - logger.info( - "Text generation request resulted in error: {}", - result.message, - ) - else: - if not result.start_time or not result.end_time: - raise ValueError("Start time and End time are not defined") - - is_error = False - self.results.append(result) - logger.info("Text generation request completed successfully: {}", result) - - last = self.concurrencies[-1] - self.concurrencies.append( - RequestConcurrencyMeasurement( - time=time(), - completed=last.completed + (not is_error), - errored=last.errored + is_error, - processing=last.processing - 1, - ) - ) - - -class TextGenerationBenchmarkReport(Serializable): - """ - A class to represent a report of text generation benchmarks - for generative AI workloads. - This is a collection of benchmarks for different modes and rates. - """ - - benchmarks: List[TextGenerationBenchmark] = Field( - default_factory=list, - description="The benchmarks of text generation requests.", - ) - args: Dict[str, Any] = Field( - default_factory=dict, - description="The arguments used for the benchmarks.", - ) - - def __iter__(self): - return iter(self.benchmarks) - - @property - def benchmarks_sorted(self) -> List[TextGenerationBenchmark]: - """ - Get the list of benchmarks sorted by request rate. - - :return: The sorted list of benchmarks. - :rtype: List[TextGenerationBenchmark] - """ - return sorted(self.benchmarks, key=lambda x: x.completed_request_rate) - - def add_benchmark(self, benchmark: TextGenerationBenchmark): - """ - Add a result to the report. - - :param benchmark: The result to add. - :type benchmark: TextGenerationBenchmark - """ - self.benchmarks.append(benchmark) - logger.debug("Added result: {}", benchmark) diff --git a/src/guidellm/core/serializable.py b/src/guidellm/core/serializable.py deleted file mode 100644 index 23e6845a..00000000 --- a/src/guidellm/core/serializable.py +++ /dev/null @@ -1,169 +0,0 @@ -from pathlib import Path -from typing import Any, Literal, Union, get_args - -import yaml -from loguru import logger -from pydantic import BaseModel, ConfigDict - -__all__ = ["Serializable", "SerializableFileType"] - - -SerializableFileType = Literal["yaml", "json"] - - -class Serializable(BaseModel): - """ - A base class for models that require YAML and JSON serialization and - deserialization. - """ - - model_config = ConfigDict( - extra="ignore", - use_enum_values=True, - validate_assignment=True, - from_attributes=True, - ) - - def __init__(self, /, **data: Any) -> None: - super().__init__(**data) - logger.debug( - "Initialized new instance of {} with data: {}", - self.__class__.__name__, - data, - ) - - def to_yaml(self) -> str: - """ - Serialize the model to a YAML string. - - :return: YAML string representation of the model. - """ - logger.debug("Serializing to YAML... {}", self) - - return yaml.dump(self.model_dump()) - - @classmethod - def from_yaml(cls, data: str): - """ - Deserialize a YAML string to a model instance. - - :param data: YAML string to deserialize. - :return: An instance of the model. - """ - logger.debug("Deserializing from YAML... {}", data) - - return cls.model_validate(yaml.safe_load(data)) - - def to_json(self) -> str: - """ - Serialize the model to a JSON string. - - :return: JSON string representation of the model. - """ - logger.debug("Serializing to JSON... {}", self) - - return self.model_dump_json() - - @classmethod - def from_json(cls, data: str): - """ - Deserialize a JSON string to a model instance. - - :param data: JSON string to deserialize. - :return: An instance of the model. - """ - logger.debug("Deserializing from JSON... {}", data) - - return cls.model_validate_json(data) - - def save_file( - self, - path: Union[str, Path], - type_: SerializableFileType = "yaml", - ) -> str: - """ - Save the model to a file in either YAML or JSON format. - - :param path: Path to the exact file or the containing directory. - If it is a directory, the file name will be inferred from the class name. - :param type_: Optional type to save ('yaml' or 'json'). - If not provided and the path has an extension, - it will be inferred to save in that format. - If not provided and the path does not have an extension, - it will save in YAML format. - :return: The path to the saved file. - """ - logger.debug("Saving to file... {} with format: {}", path, type_) - - if isinstance(path, str): - path = Path(path) - - if path.suffix: - # is a file - ext = path.suffix[1:].lower() - if type_ not in get_args(SerializableFileType): - raise ValueError( - f"Unsupported file extension: {type_}. " - f"Expected one of {SerializableFileType} " - f"for {path}" - ) - type_ = ext # type: ignore # noqa: PGH003 - else: - # is a directory - file_name = f"{self.__class__.__name__.lower()}.{type_}" - path = path / file_name - - path.parent.mkdir(parents=True, exist_ok=True) - - with path.open("w") as file: - if type_ == "yaml": - file.write(self.to_yaml()) - elif type_ == "json": - file.write(self.to_json()) - else: - raise ValueError( - f"Unsupported file extension: {type_}" - f"Expected one of {SerializableFileType} " - f"for {path}" - ) - - logger.info("Successfully saved {} to {}", self.__class__.__name__, path) - - return str(path) - - @classmethod - def load_file(cls, path: Union[str, Path]): - """ - Load a model from a file in either YAML or JSON format. - - :param path: Path to the file. - :return: An instance of the model. - """ - logger.debug("Loading from file... {}", path) - - if isinstance(path, str): - path = Path(path) - - if not path.exists(): - raise FileNotFoundError(f"File not found: {path}") - - if not path.is_file(): - raise ValueError(f"Path is not a file: {path}") - - extension = path.suffix[1:].lower() - - with path.open() as file: - data = file.read() - - if extension == "yaml": - obj = cls.from_yaml(data) - elif extension == "json": - obj = cls.from_json(data) - else: - raise ValueError( - f"Unsupported file extension: {extension}" - f"Expected one of {SerializableFileType} " - f"for {path}" - ) - - return obj diff --git a/src/guidellm/data/__init__.py b/src/guidellm/data/__init__.py new file mode 100644 index 00000000..8a48204e --- /dev/null +++ b/src/guidellm/data/__init__.py @@ -0,0 +1,4 @@ +""" +Required for python < 3.12 +https://docs.python.org/3/library/importlib.resources.html#importlib.resources.files +""" diff --git a/src/guidellm/data/prideandprejudice.txt.gz b/src/guidellm/data/prideandprejudice.txt.gz new file mode 100644 index 00000000..8c7a1072 Binary files /dev/null and b/src/guidellm/data/prideandprejudice.txt.gz differ diff --git a/src/guidellm/dataset/__init__.py b/src/guidellm/dataset/__init__.py new file mode 100644 index 00000000..20d68e64 --- /dev/null +++ b/src/guidellm/dataset/__init__.py @@ -0,0 +1,22 @@ +from .creator import ColumnInputTypes, DatasetCreator +from .entrypoints import load_dataset +from .file import FileDatasetCreator +from .hf_datasets import HFDatasetsCreator +from .in_memory import InMemoryDatasetCreator +from .synthetic import ( + SyntheticDatasetConfig, + SyntheticDatasetCreator, + SyntheticTextItemsGenerator, +) + +__all__ = [ + "DatasetCreator", + "ColumnInputTypes", + "HFDatasetsCreator", + "load_dataset", + "FileDatasetCreator", + "InMemoryDatasetCreator", + "SyntheticDatasetCreator", + "SyntheticDatasetConfig", + "SyntheticTextItemsGenerator", +] diff --git a/src/guidellm/dataset/creator.py b/src/guidellm/dataset/creator.py new file mode 100644 index 00000000..42103a46 --- /dev/null +++ b/src/guidellm/dataset/creator.py @@ -0,0 +1,213 @@ +from abc import ABC, abstractmethod +from pathlib import Path +from typing import Any, Dict, List, Literal, Optional, Tuple, Union + +from datasets import Dataset, DatasetDict, IterableDataset, IterableDatasetDict +from transformers import PreTrainedTokenizerBase # type: ignore[import] + +__all__ = ["DatasetCreator", "ColumnInputTypes"] + +ColumnInputTypes = Literal[ + "prompt_column", + "text_column", + "prompt_tokens_count_column", + "output_tokens_count_column", +] + + +class DatasetCreator(ABC): + DEFAULT_SPLITS_TRAIN = [ + "train", + "training", + "train_set", + "training_set", + "train_dataset", + "training_dataset", + "train_data", + "training_data", + "pretrain", + "pretrain_set", + "pretrain_dataset", + "pretrain_data", + "pretraining", + ] + DEFAULT_SPLITS_CALIB = [ + "calibration", + "calib", + "cal", + "calibration_set", + "calib_set", + "cal_set", + "calibration_dataset", + "calib_dataset", + "cal_set", + "calibration_data", + "calib_data", + "cal_data", + ] + DEFAULT_SPLITS_VAL = [ + "validation", + "val", + "valid", + "validation_set", + "val_set", + "validation_dataset", + "val_dataset", + "validation_data", + "val_data", + "dev", + "dev_set", + "dev_dataset", + "dev_data", + ] + DEFAULT_SPLITS_TEST = [ + "test", + "testing", + "test_set", + "testing_set", + "test_dataset", + "testing_dataset", + "test_data", + "testing_data", + "eval", + "eval_set", + "eval_dataset", + "eval_data", + ] + DEFAULT_SPLITS_DATASET: Dict[str, str] = {} + + @classmethod + def create( + cls, + data: Any, + data_args: Optional[Dict[str, Any]], + processor: Optional[Union[str, Path, PreTrainedTokenizerBase]], + processor_args: Optional[Dict[str, Any]], + random_seed: int = 42, + split_pref_order: Optional[List[str]] = None, + ) -> Tuple[Union[Dataset, IterableDataset], Dict[ColumnInputTypes, str]]: + if not cls.is_supported(data, data_args): + raise ValueError(f"Unsupported data type: {type(data)} given for {data}. ") + + split = cls.extract_args_split(data_args) + column_mappings = cls.extract_args_column_mappings(data_args) + dataset = cls.handle_create( + data, data_args, processor, processor_args, random_seed + ) + + if isinstance(dataset, (DatasetDict, IterableDatasetDict)): + dataset = cls.extract_dataset_split(dataset, split, split_pref_order) + + if not isinstance(dataset, (Dataset, IterableDataset)): + raise ValueError( + f"Unsupported data type: {type(dataset)} given for {dataset}." + ) + + return dataset, column_mappings + + @classmethod + def extract_args_split(cls, data_args: Optional[Dict[str, Any]]) -> str: + split = "auto" + + if data_args and "split" in data_args: + split = data_args["split"] + del data_args["split"] + + return split + + @classmethod + def extract_args_column_mappings( + cls, + data_args: Optional[Dict[str, Any]], + ) -> Dict[ColumnInputTypes, str]: + columns: Dict[ColumnInputTypes, str] = {} + + if data_args: + if "prompt_column" in data_args: + columns["prompt_column"] = data_args["prompt_column"] + del data_args["prompt_column"] + + if "prompt_tokens_count_column" in data_args: + columns["prompt_tokens_count_column"] = data_args[ + "prompt_tokens_count_column" + ] + del data_args["prompt_tokens_count_column"] + + if "output_tokens_count_column" in data_args: + columns["output_tokens_count_column"] = data_args[ + "output_tokens_count_column" + ] + del data_args["output_tokens_count_column"] + + return columns + + @classmethod + def extract_dataset_name( + cls, dataset: Union[Dataset, IterableDataset, DatasetDict, IterableDatasetDict] + ) -> Optional[str]: + if isinstance(dataset, (DatasetDict, IterableDatasetDict)): + dataset = dataset[list(dataset.keys())[0]] + + if isinstance(dataset, (Dataset, IterableDataset)): + if not hasattr(dataset, "info") or not hasattr( + dataset.info, "dataset_name" + ): + return None + + return dataset.info.dataset_name + + raise ValueError(f"Unsupported data type: {type(dataset)} given for {dataset}.") + + @classmethod + def extract_dataset_split( + cls, + dataset: Union[DatasetDict, IterableDatasetDict], + specified_split: Union[Literal["auto"], str] = "auto", + split_pref_order: Optional[Union[Literal["auto"], List[str]]] = "auto", + ) -> Union[Dataset, IterableDataset]: + if not isinstance(dataset, (DatasetDict, IterableDatasetDict)): + raise ValueError( + f"Unsupported data type: {type(dataset)} given for {dataset}." + ) + + if specified_split != "auto": + if specified_split not in dataset: + raise ValueError( + f"Split {specified_split} not found in dataset {dataset}." + ) + + return dataset[specified_split] + + dataset_name = cls.extract_dataset_name(dataset) + + if dataset_name and dataset_name in cls.DEFAULT_SPLITS_DATASET: + return dataset[cls.DEFAULT_SPLITS_DATASET[dataset_name]] + + if split_pref_order == "auto": + split_pref_order = [ + *cls.DEFAULT_SPLITS_TEST, + *cls.DEFAULT_SPLITS_VAL, + *cls.DEFAULT_SPLITS_CALIB, + *cls.DEFAULT_SPLITS_TRAIN, + ] + + for test_split in split_pref_order or []: + if test_split in dataset: + return dataset[test_split] + + return dataset[list(dataset.keys())[0]] + + @classmethod + @abstractmethod + def is_supported(cls, data: Any, data_args: Optional[Dict[str, Any]]) -> bool: ... + + @classmethod + @abstractmethod + def handle_create( + cls, + data: Any, + data_args: Optional[Dict[str, Any]], + processor: Optional[Union[str, Path, PreTrainedTokenizerBase]], + processor_args: Optional[Dict[str, Any]], + random_seed: int, + ) -> Union[Dataset, DatasetDict, IterableDataset, IterableDatasetDict]: ... diff --git a/src/guidellm/dataset/entrypoints.py b/src/guidellm/dataset/entrypoints.py new file mode 100644 index 00000000..5abf0112 --- /dev/null +++ b/src/guidellm/dataset/entrypoints.py @@ -0,0 +1,42 @@ +from pathlib import Path +from typing import Any, Dict, List, Optional, Tuple, Union + +from datasets import Dataset, IterableDataset +from transformers import PreTrainedTokenizerBase # type: ignore[import] + +from guidellm.dataset.creator import ColumnInputTypes +from guidellm.dataset.file import FileDatasetCreator +from guidellm.dataset.hf_datasets import HFDatasetsCreator +from guidellm.dataset.in_memory import InMemoryDatasetCreator +from guidellm.dataset.synthetic import SyntheticDatasetCreator + +__all__ = ["load_dataset"] + + +def load_dataset( + data: Any, + data_args: Optional[Dict[str, Any]], + processor: Optional[Union[str, Path, PreTrainedTokenizerBase]], + processor_args: Optional[Dict[str, Any]], + random_seed: int = 42, + split_pref_order: Optional[List[str]] = None, +) -> Tuple[Union[Dataset, IterableDataset], Dict[ColumnInputTypes, str]]: + creators = [ + InMemoryDatasetCreator, + SyntheticDatasetCreator, + FileDatasetCreator, + HFDatasetsCreator, + ] + + for creator in creators: + if creator.is_supported(data, data_args): + return creator.create( + data, + data_args, + processor, + processor_args, + random_seed, + split_pref_order, + ) + + raise ValueError(f"Unsupported data type: {type(data)} given for {data}. ") diff --git a/src/guidellm/dataset/file.py b/src/guidellm/dataset/file.py new file mode 100644 index 00000000..9f9cf696 --- /dev/null +++ b/src/guidellm/dataset/file.py @@ -0,0 +1,90 @@ +from pathlib import Path +from typing import Any, Dict, Optional, Union + +import pandas as pd # type: ignore[import] +from datasets import ( + Dataset, + DatasetDict, + IterableDataset, + IterableDatasetDict, + load_dataset, +) +from transformers import PreTrainedTokenizerBase # type: ignore[import] + +from guidellm.dataset.creator import DatasetCreator + +__all__ = ["FileDatasetCreator"] + + +class FileDatasetCreator(DatasetCreator): + SUPPORTED_TYPES = { + ".txt", + ".text", + ".csv", + ".json", + ".jsonl", + ".parquet", + ".arrow", + ".hdf5", + ".tar", + } + + @classmethod + def is_supported(cls, data: Any, data_args: Optional[Dict[str, Any]]) -> bool: # noqa: ARG003 + if isinstance(data, (str, Path)) and (path := Path(data)).exists(): + # local folder or py file, assume supported + return path.suffix.lower() in cls.SUPPORTED_TYPES + + return False + + @classmethod + def handle_create( + cls, + data: Any, + data_args: Optional[Dict[str, Any]], + processor: Optional[Union[str, Path, PreTrainedTokenizerBase]], # noqa: ARG003 + processor_args: Optional[Dict[str, Any]], # noqa: ARG003 + random_seed: int, # noqa: ARG003 + ) -> Union[Dataset, DatasetDict, IterableDataset, IterableDatasetDict]: + if not isinstance(data, (str, Path)): + raise ValueError(f"Unsupported data type: {type(data)} given for {data}. ") + + path = Path(data) + if not path.exists(): + raise FileNotFoundError(f"File not found: {path}") + + if not path.is_file(): + raise ValueError(f"Unsupported data type: {path} given for {path}. ") + + if path.suffix.lower() not in cls.SUPPORTED_TYPES: + raise ValueError(f"Unsupported file type: {path.suffix} given for {path}. ") + + return cls.load_dataset(path, data_args) + + @classmethod + def load_dataset( + cls, path: Path, data_args: Optional[Dict[str, Any]] + ) -> Union[Dataset, IterableDataset]: + if path.suffix.lower() in {".txt", ".text"}: + with path.open("r") as file: + items = file.readlines() + + dataset = Dataset.from_dict({"text": items}, **(data_args or {})) + elif path.suffix.lower() == ".csv": + dataset = load_dataset("csv", data_files=path, **(data_args or {})) + elif path.suffix.lower() in {".json", ".jsonl"}: + dataset = load_dataset("json", data_files=path, **(data_args or {})) + elif path.suffix.lower() == ".parquet": + dataset = load_dataset("parquet", data_files=path, **(data_args or {})) + elif path.suffix.lower() == ".arrow": + dataset = load_dataset("arrow", data_files=path, **(data_args or {})) + elif path.suffix.lower() == ".hdf5": + dataset = Dataset.from_pandas(pd.read_hdf(path), **(data_args or {})) + elif path.suffix.lower() == ".db": + dataset = Dataset.from_sql(con=path, **(data_args or {})) + elif path.suffix.lower() == ".tar": + dataset = load_dataset("webdataset", data_files=path, **(data_args or {})) + else: + raise ValueError(f"Unsupported file type: {path.suffix} given for {path}. ") + + return dataset diff --git a/src/guidellm/dataset/hf_datasets.py b/src/guidellm/dataset/hf_datasets.py new file mode 100644 index 00000000..e0102538 --- /dev/null +++ b/src/guidellm/dataset/hf_datasets.py @@ -0,0 +1,62 @@ +from pathlib import Path +from typing import Any, Dict, Optional, Union + +from datasets import ( + Dataset, + DatasetDict, + IterableDataset, + IterableDatasetDict, + get_dataset_config_info, + load_dataset, +) +from transformers import PreTrainedTokenizerBase # type: ignore[import] + +from guidellm.dataset.creator import DatasetCreator + +__all__ = ["HFDatasetsCreator"] + + +class HFDatasetsCreator(DatasetCreator): + @classmethod + def is_supported(cls, data: Any, data_args: Optional[Dict[str, Any]]) -> bool: # noqa: ARG003 + if isinstance( + data, (Dataset, DatasetDict, IterableDataset, IterableDatasetDict) + ): + # base type is supported + return True + + if isinstance(data, (str, Path)) and (path := Path(data)).exists(): + # local folder or py file, assume supported + return path.is_dir() or path.suffix == ".py" + + if isinstance(data, (str, Path)): + try: + # try to load dataset + return get_dataset_config_info(data) is not None + except Exception: # noqa: BLE001, S110 + pass + + return False + + @classmethod + def handle_create( + cls, + data: Any, + data_args: Optional[Dict[str, Any]], + processor: Optional[Union[str, Path, PreTrainedTokenizerBase]], # noqa: ARG003 + processor_args: Optional[Dict[str, Any]], # noqa: ARG003 + random_seed: int, # noqa: ARG003 + ) -> Union[Dataset, DatasetDict, IterableDataset, IterableDatasetDict]: + if isinstance(data, (str, Path)): + data = load_dataset(data, **(data_args or {})) + elif data_args: + raise ValueError( + f"data_args should not be provided when data is a {type(data)}" + ) + + if isinstance( + data, (Dataset, DatasetDict, IterableDataset, IterableDatasetDict) + ): + return data + + raise ValueError(f"Unsupported data type: {type(data)} given for {data}. ") diff --git a/src/guidellm/dataset/in_memory.py b/src/guidellm/dataset/in_memory.py new file mode 100644 index 00000000..dc173d2f --- /dev/null +++ b/src/guidellm/dataset/in_memory.py @@ -0,0 +1,131 @@ +from pathlib import Path +from typing import Any, Dict, Iterable, Optional, Union + +from datasets import ( + Dataset, + DatasetDict, + IterableDataset, + IterableDatasetDict, +) +from transformers import PreTrainedTokenizerBase # type: ignore[import] + +from guidellm.dataset.creator import DatasetCreator + +__all__ = ["InMemoryDatasetCreator"] + + +class InMemoryDatasetCreator(DatasetCreator): + @classmethod + def is_supported(cls, data: Any, data_args: Optional[Dict[str, Any]]) -> bool: # noqa: ARG003 + return isinstance(data, Iterable) and not isinstance(data, str) + + @classmethod + def handle_create( + cls, + data: Any, + data_args: Optional[Dict[str, Any]], + processor: Optional[Union[str, Path, PreTrainedTokenizerBase]], # noqa: ARG003 + processor_args: Optional[Dict[str, Any]], # noqa: ARG003 + random_seed: int, # noqa: ARG003 + ) -> Union[Dataset, DatasetDict, IterableDataset, IterableDatasetDict]: + if not isinstance(data, Iterable): + raise TypeError( + f"Unsupported data format. Expected Iterable[Any], got {type(data)}" + ) + + if not data: + raise ValueError("Data is empty") + + if isinstance(data, Dict): + # assume data is a dictionary of columns and values: {"c1": ["i1", "i2"]} + data_dict = cls.format_data_dict(data) + elif isinstance(data[0], Dict): # type: ignore[index] + # assume data is a list of dictionaries: [{"c1": "i1"}, {"c1": "i2"}] + data_dict = cls.format_data_iterable_dicts(data) + else: + # assume data is a list of items with no columns: ["i1", "i2"] + data_dict = cls.format_data_iterable_values(data) + + return Dataset.from_dict(data_dict, **(data_args or {})) + + @classmethod + def format_data_dict(cls, data: Dict[Any, Any]) -> Dict[str, Any]: + if not isinstance(data, Dict): + raise TypeError( + f"Unsupported data format. Expected Dict[str, Iterable[Any]], " + f"got {type(data)}" + ) + + if not all( + isinstance(key, str) and isinstance(val, Iterable) + for key, val in data.items() + ): + raise TypeError( + "Unsupported data format. Expected Dict[str, Iterable[Any]], " + f"got {type(data)}" + ) + + samples = len(list(data.values())[0]) + if not all(len(val) == samples for val in data.values()): + raise ValueError( + "Unsupported data format. Not all columns have the same number samples " + f"for {data}" + ) + + return data + + @classmethod + def format_data_iterable_dicts( + cls, data: Iterable[Dict[Any, Any]] + ) -> Dict[str, Any]: + if not isinstance(data, Iterable): + raise TypeError( + f"Unsupported data format. Expected Iterable[Dict[str, Any]], " + f"got {type(data)}" + ) + + if not all(isinstance(item, Dict) for item in data): + raise TypeError( + f"Unsupported data format. Expected Iterable[Dict[str, Any]], " + f"got {type(data)}" + ) + + if not all(isinstance(key, str) for key in data[0]): # type: ignore[index] + raise TypeError( + "Unsupported data format. Expected Dict[str, Any], " + f"but one of the items had a non string column for {data}" + ) + + columns = list(data[0].keys()) # type: ignore[index] + if not all( + len(item) == len(columns) and all(key in item for key in columns) + for item in data + ): + raise ValueError( + "Unsupported data format. Not all items have the same columns " + f"for {data}" + ) + + data_dict: Dict[str, Any] = {key: [] for key in columns} + for item in data: + for key, value in item.items(): + data_dict[key].append(value) + + return data_dict + + @classmethod + def format_data_iterable_values(cls, data: Iterable[Any]) -> Dict[str, Any]: + if not isinstance(data, Iterable): + raise TypeError( + f"Unsupported data format. Expected Iterable[Iterable[Any]], " + f"got {type(data)}" + ) + + first_item = next(iter(data), None) + first_type = type(first_item) + if not all(isinstance(item, first_type) for item in data): + raise TypeError( + f"Unsupported data format. Not all types are the same for {data}" + ) + + return {"data": list(data)} diff --git a/src/guidellm/dataset/synthetic.py b/src/guidellm/dataset/synthetic.py new file mode 100644 index 00000000..f2bf69d3 --- /dev/null +++ b/src/guidellm/dataset/synthetic.py @@ -0,0 +1,261 @@ +import json +import random +from pathlib import Path +from typing import Any, Dict, Iterable, Iterator, Literal, Optional, Union + +import yaml +from datasets import ( + Dataset, + DatasetDict, + IterableDataset, + IterableDatasetDict, +) +from pydantic import BaseModel, Field +from transformers import PreTrainedTokenizerBase # type: ignore[import] + +from guidellm.dataset.creator import ColumnInputTypes, DatasetCreator +from guidellm.utils import EndlessTextCreator, IntegerRangeSampler, check_load_processor + +__all__ = [ + "SyntheticDatasetCreator", + "SyntheticDatasetConfig", + "SyntheticTextItemsGenerator", +] + + +class SyntheticDatasetConfig(BaseModel): + prompt_tokens: int = Field( + description="The average number of text tokens generated for prompts.", + gt=0, + ) + prompt_tokens_stdev: Optional[int] = Field( + description="The standard deviation of the tokens generated for prompts.", + gt=0, + default=None, + ) + prompt_tokens_min: Optional[int] = Field( + description="The minimum number of text tokens generated for prompts.", + gt=0, + default=None, + ) + prompt_tokens_max: Optional[int] = Field( + description="The maximum number of text tokens generated for prompts.", + gt=0, + default=None, + ) + output_tokens: int = Field( + description="The average number of text tokens generated for outputs.", + gt=0, + ) + output_tokens_stdev: Optional[int] = Field( + description="The standard deviation of the tokens generated for outputs.", + gt=0, + default=None, + ) + output_tokens_min: Optional[int] = Field( + description="The minimum number of text tokens generated for outputs.", + gt=0, + default=None, + ) + output_tokens_max: Optional[int] = Field( + description="The maximum number of text tokens generated for outputs.", + gt=0, + default=None, + ) + samples: int = Field( + description="The number of samples to generate for the dataset.", + gt=0, + default=1000, + ) + source: str = Field( + description="The source of the text data to be used for generation.", + default="data:prideandprejudice.txt.gz", + ) + + @staticmethod + def parse_str(data: Union[str, Path]) -> "SyntheticDatasetConfig": + if ( + isinstance(data, Path) + or data.strip().endswith(".config") + or data.strip().endswith(".yaml") + ): + return SyntheticDatasetConfig.parse_config_file(data) + + if data.strip().startswith("{"): + return SyntheticDatasetConfig.parse_json(data) + + if data.count("=") > 1: + return SyntheticDatasetConfig.parse_key_value_pairs(data) + + raise ValueError( + f"Unsupported data format. Expected JSON or key-value pairs, got {data}" + ) + + @staticmethod + def parse_json(data: str) -> "SyntheticDatasetConfig": + config_dict = json.loads(data.strip()) + + return SyntheticDatasetConfig(**config_dict) + + @staticmethod + def parse_key_value_pairs(data: str) -> "SyntheticDatasetConfig": + config_dict = {} + items = data.strip().split(",") + for item in items: + key, value = item.split("=") + config_dict[key.strip()] = ( + int(value.strip()) if value.strip().isnumeric() else value.strip() + ) + + return SyntheticDatasetConfig(**config_dict) # type: ignore[arg-type] + + @staticmethod + def parse_config_file(data: Union[str, Path]) -> "SyntheticDatasetConfig": + with Path(data).open("r") as file: + config_dict = yaml.safe_load(file) + + return SyntheticDatasetConfig(**config_dict) + + +class SyntheticTextItemsGenerator( + Iterable[ + Dict[ + Literal["prompt", "prompt_tokens_count", "output_tokens_count"], + Union[str, int], + ] + ] +): + def __init__( + self, + config: SyntheticDatasetConfig, + processor: PreTrainedTokenizerBase, + random_seed: int, + ): + self.config = config + self.processor = processor + self.random_seed = random_seed + self.text_creator = EndlessTextCreator( + data=config.source, + ) + + def __iter__( + self, + ) -> Iterator[ + Dict[ + Literal["prompt", "prompt_tokens_count", "output_tokens_count"], + Union[str, int], + ] + ]: + prompt_tokens_sampler = IntegerRangeSampler( + average=self.config.prompt_tokens, + variance=self.config.prompt_tokens_stdev, + min_value=self.config.prompt_tokens_min, + max_value=self.config.prompt_tokens_max, + random_seed=self.random_seed, + ) + output_tokens_sampler = IntegerRangeSampler( + average=self.config.output_tokens, + variance=self.config.output_tokens_stdev, + min_value=self.config.output_tokens_min, + max_value=self.config.output_tokens_max, + random_seed=self.random_seed + 1, # ensure diff dist from prompts + ) + # ensure diff distribution from output tokens + rand = random.Random(self.random_seed + 2) # noqa: S311 + + for _, prompt_tokens, output_tokens in zip( + range(self.config.samples), + prompt_tokens_sampler, + output_tokens_sampler, + ): + start_index = rand.randint(0, len(self.text_creator.words)) + yield { + "prompt": self._create_prompt(prompt_tokens, start_index), + "prompt_tokens_count": prompt_tokens, + "output_tokens_count": output_tokens, + } + + def _create_prompt(self, prompt_tokens: int, start_index: int) -> str: + if prompt_tokens <= 0: + return "" + + left = start_index + right = start_index + 4 * prompt_tokens + + while left < right: + mid = (left + right) // 2 + test_prompt = self.text_creator.create_text(start_index, mid - start_index) + test_tokens = len(self.processor.tokenize(test_prompt)) + + if test_tokens == prompt_tokens: + return test_prompt + elif test_tokens < prompt_tokens: + left = mid + 1 + else: + right = mid + + return self.text_creator.create_text(start_index, left - start_index) + + +class SyntheticDatasetCreator(DatasetCreator): + @classmethod + def is_supported(cls, data: Any, data_args: Optional[Dict[str, Any]]) -> bool: # noqa: ARG003 + if ( + isinstance(data, Path) + and data.exists() + and data.suffix in {".config", ".yaml"} + ): + return True + + if isinstance(data, str): + data_str: str = data.strip() + if ( + data_str.startswith("{") + or data_str.count("=") > 1 + or data_str.endswith((".config", ".yaml")) + ): + return True + + return False + + @classmethod + def handle_create( + cls, + data: Any, + data_args: Optional[Dict[str, Any]], + processor: Optional[Union[str, Path, PreTrainedTokenizerBase]], + processor_args: Optional[Dict[str, Any]], + random_seed: int, + ) -> Union[Dataset, DatasetDict, IterableDataset, IterableDatasetDict]: + processor = check_load_processor( + processor, + processor_args, + error_msg=( + "Processor/tokenizer required for synthetic dataset generation." + ), + ) + + config = SyntheticDatasetConfig.parse_str(data) + generator = SyntheticTextItemsGenerator(config, processor, random_seed) + items = list(generator) + + return Dataset.from_list(items, **(data_args or {})) + + @classmethod + def extract_args_column_mappings( + cls, + data_args: Optional[Dict[str, Any]], + ) -> Dict[ColumnInputTypes, str]: + data_args_columns = super().extract_args_column_mappings(data_args) + + if data_args_columns: + raise ValueError( + f"Column mappings are not supported for synthetic datasets. " + f"Got {data_args_columns}" + ) + + return { + "prompt_column": "prompt", + "prompt_tokens_count_column": "prompt_tokens_count", + "output_tokens_count_column": "output_tokens_count", + } diff --git a/src/guidellm/executor/__init__.py b/src/guidellm/executor/__init__.py deleted file mode 100644 index 7665e898..00000000 --- a/src/guidellm/executor/__init__.py +++ /dev/null @@ -1,10 +0,0 @@ -from .executor import Executor, ExecutorResult -from .profile_generator import Profile, ProfileGenerationMode, ProfileGenerator - -__all__ = [ - "Executor", - "ExecutorResult", - "Profile", - "ProfileGenerationMode", - "ProfileGenerator", -] diff --git a/src/guidellm/executor/executor.py b/src/guidellm/executor/executor.py deleted file mode 100644 index bfecf17f..00000000 --- a/src/guidellm/executor/executor.py +++ /dev/null @@ -1,213 +0,0 @@ -from dataclasses import dataclass -from typing import AsyncGenerator, Optional, Sequence, Union - -from loguru import logger - -from guidellm.backend import Backend -from guidellm.core import TextGenerationBenchmarkReport -from guidellm.executor.profile_generator import ( - Profile, - ProfileGenerationMode, - ProfileGenerator, -) -from guidellm.request import RequestGenerator -from guidellm.scheduler import Scheduler, SchedulerResult - -__all__ = ["Executor", "ExecutorResult"] - - -@dataclass -class ExecutorResult: - """ - Data class representing the result of executing tasks in the Executor. - - :param completed: Indicates whether all tasks have completed. - :type completed: bool - :param count_total: Total number of profiles. - :type count_total: int - :param count_completed: Number of completed profiles. - :type count_completed: int - :param report: A report report for text generation. - :type report: TextGenerationBenchmarkReport - :param scheduler_result: Optional scheduler result for the last task. - :type scheduler_result: Optional[SchedulerResult] - """ - - completed: bool - count_total: int - count_completed: int - generation_modes: Sequence[ProfileGenerationMode] - report: TextGenerationBenchmarkReport - scheduler_result: Optional[SchedulerResult] = None - current_index: Optional[int] = None - current_profile: Optional[Profile] = None - - -class Executor: - """ - The Executor class manages the execution of tasks based on a given profile - generation mode and rate. It orchestrates the interaction between the backend, - request generator, and profile generator, and runs benchmarks accordingly. - - :param backend: The backend to run tasks against. - :type backend: Backend - :param request_generator: The generator that creates requests for execution. - :type request_generator: RequestGenerator - :param mode: The mode for profile generation (e.g., sweep, synchronous). - :type mode: ProfileGenerationMode - :param rate: The list of rates for load generation, or None. - :type rate: Optional[List[float]] - :param max_number: Maximum number of requests to generate for the scheduler - (a single report run), or None. - :type max_number: Optional[int] - :param max_duration: Maximum duration for generating requests for the scheduler, - (a single report run), or None. - :type max_duration: Optional[float] - """ - - def __init__( - self, - backend: Backend, - request_generator: RequestGenerator, - mode: ProfileGenerationMode = "sweep", - rate: Optional[Union[float, Sequence[float]]] = None, - max_number: Optional[int] = None, - max_duration: Optional[float] = None, - ): - self._backend = backend - self._generator = request_generator - self._max_number = max_number - self._max_duration = max_duration - self._profile_generator = ProfileGenerator(mode=mode, rate=rate) - logger.info("Executor initialized with mode: {}, rate: {}", mode, rate) - - @property - def backend(self) -> Backend: - """ - Returns the backend being used by the Executor. - - :return: Backend - :rtype: Backend - """ - return self._backend - - @property - def request_generator(self) -> RequestGenerator: - """ - Returns the request generator used by the Executor. - - :return: RequestGenerator - :rtype: RequestGenerator - """ - return self._generator - - @property - def profile_generator(self) -> ProfileGenerator: - """ - Returns the profile generator for generating profiles during execution. - - :return: ProfileGenerator - :rtype: ProfileGenerator - """ - return self._profile_generator - - @property - def max_number(self) -> Optional[int]: - """ - Returns the maximum number of requests to generate. - - :return: Maximum number of requests or None. - :rtype: Optional[int] - """ - return self._max_number - - @property - def max_duration(self) -> Optional[float]: - """ - Returns the maximum duration for generating requests. - - :return: Maximum duration in seconds or None. - :rtype: Optional[float] - """ - return self._max_duration - - async def run(self) -> AsyncGenerator[ExecutorResult, None]: - """ - Runs the Executor, generating and scheduling tasks based on the profile - generation mode. Yields results incrementally. - - :rtype: AsyncGenerator[ExecutorResult, None] - """ - report = TextGenerationBenchmarkReport() - report.args = { - # backend args - "backend_type": self.backend.type_, - "target": self.backend.target, - "model": self.backend.model, - # data args - "data_type": self.request_generator.type_, - "data": self.request_generator.source, - "tokenizer": self.request_generator.tokenizer.name_or_path, - # rate args - "mode": self.profile_generator.mode, - "rate": self.profile_generator.rates, - # limits args - "max_number": self.max_number, - "max_duration": self.max_duration, - } - profile_index = -1 - logger.info("Starting Executor run") - - yield ExecutorResult( - completed=False, - count_total=len(self.profile_generator), - count_completed=0, - generation_modes=self.profile_generator.profile_generation_modes, - report=report, - ) - - while profile := self.profile_generator.next(report): - logger.debug("Generated profile: {}", profile) - scheduler = Scheduler( - generator=self.request_generator, - backend=self.backend, - mode=profile.load_gen_mode, - rate=profile.load_gen_rate, - max_number=self.max_number or profile.args.get("max_number", None), - max_duration=self.max_duration, - ) - profile_index += 1 - - logger.info( - "Scheduling tasks with mode: {}, rate: {}", - profile.load_gen_mode, - profile.load_gen_rate, - ) - - async for scheduler_result in scheduler.run(): - if scheduler_result.completed: - report.add_benchmark(scheduler_result.benchmark) - logger.debug( - "Benchmark added for scheduler result: {}", - scheduler_result.benchmark, - ) - - yield ExecutorResult( - completed=False, - count_total=len(self.profile_generator), - count_completed=len(report.benchmarks), - generation_modes=self.profile_generator.profile_generation_modes, - report=report, - scheduler_result=scheduler_result, - current_index=profile_index, - current_profile=profile, - ) - - logger.info("Executor run completed") - yield ExecutorResult( - completed=True, - count_total=len(self.profile_generator), - count_completed=len(report.benchmarks), - generation_modes=self.profile_generator.profile_generation_modes, - report=report, - ) diff --git a/src/guidellm/executor/profile_generator.py b/src/guidellm/executor/profile_generator.py deleted file mode 100644 index 1f857f78..00000000 --- a/src/guidellm/executor/profile_generator.py +++ /dev/null @@ -1,347 +0,0 @@ -from typing import Any, Dict, List, Literal, Optional, Sequence, Union, get_args - -import numpy as np -from loguru import logger -from numpy._typing import NDArray -from pydantic import Field - -from guidellm.config import settings -from guidellm.core import TextGenerationBenchmark, TextGenerationBenchmarkReport -from guidellm.core.serializable import Serializable -from guidellm.scheduler import LoadGenerationMode - -__all__ = [ - "Profile", - "ProfileGenerationMode", - "ProfileGenerator", -] - -ProfileGenerationMode = Literal[ - "sweep", "synchronous", "throughput", "constant", "poisson" -] - - -class Profile(Serializable): - """ - A data class representing a profile for load generation. - - :param load_gen_mode: The mode of load generation (e.g., constant, poisson). - :type load_gen_mode: LoadGenerationMode - :param load_gen_rate: The rate of load generation, if applicable. - :type load_gen_rate: Optional[float] - :param args: Additional arguments for the profile. - :type args: Optional[Dict[str, Any]] - """ - - load_gen_mode: LoadGenerationMode - load_gen_rate: Optional[float] = None - args: Dict[str, Any] = Field(default_factory=dict) - - -class ProfileGenerator: - """ - Generates profiles based on different load generation modes. - - :param mode: The mode for profile generation (e.g., sweep, synchronous). - :type mode: ProfileGenerationMode - :param rate: The rate(s) for load generation; could be a float or list of floats. - :type rate: Optional[Union[float, Sequence[float]]] - """ - - def __init__( - self, - mode: ProfileGenerationMode, - rate: Optional[Union[float, Sequence[float]]] = None, - ): - if mode not in get_args(ProfileGenerationMode): - err = ValueError( - f"{mode} is not a valid Profile Generation Mode. " - f"Valid options are {get_args(ProfileGenerationMode)}" - ) - logger.error(err) - raise err - - self._mode = mode - - if self._mode in ("sweep", "throughput", "synchronous"): - if rate is not None: - err = ValueError(f"Rates are not applicable for {self._mode} mode") - logger.error(err) - raise err - self._rates = None - else: - if not rate: - err = ValueError(f"Rates are required for {self._mode} mode") - logger.error(err) - raise err - self._rates = rate if isinstance(rate, Sequence) else [rate] - - for rt in self._rates: - if rt <= 0: - err = ValueError( - f"Rate must be > 0 for mode: {self._mode}. Given: {rt}" - ) - logger.error(err) - raise err - - self._generated_count = 0 - - def __len__(self) -> int: - """ - Returns the number of profiles to generate based on the mode and rates. - - :return: The number of profiles. - :rtype: int - """ - if self._mode == "sweep": - return settings.num_sweep_profiles + 2 - - if self._mode in ("throughput", "synchronous"): - return 1 - - if not self._rates: - raise ValueError(f"Rates are required for {self._mode} mode") - - return len(self._rates) - - @property - def mode(self) -> ProfileGenerationMode: - """ - Returns the current mode of profile generation. - - :return: The profile generation mode. - :rtype: ProfileGenerationMode - """ - return self._mode - - @property - def rates(self) -> Optional[Sequence[float]]: - """ - Returns the list of rates for load generation, if any. - - :return: Sequence of rates or None if not applicable. - :rtype: Optional[Sequence[float]] - """ - return self._rates - - @property - def generated_count(self) -> int: - """ - Returns the current count of generated profiles. - - :return: The current count of generated profiles. - :rtype: int - """ - return self._generated_count - - @property - def profile_generation_modes(self) -> Sequence[ProfileGenerationMode]: - """ - Return the list of profile modes to be run in the report. - - :return: Sequence of profile modes to be run in the report. - :rtype: Sequence[ProfileGenerationMode] - """ - if self._mode == "sweep": - return ["synchronous", "throughput"] + ["constant"] * ( # type: ignore # noqa: PGH003 - settings.num_sweep_profiles - ) - - if self._mode in ["throughput", "synchronous"]: - return [self._mode] - - if self._rates is None: - raise ValueError(f"Rates are required for {self._mode} mode") - - if self._mode in ["constant", "poisson"]: - return [self._mode] * len(self._rates) - - raise ValueError(f"Invalid mode: {self._mode}") - - def next(self, current_report: TextGenerationBenchmarkReport) -> Optional[Profile]: - """ - Generates the next profile based on the current mode and report. - - :param current_report: The current report report. - :type current_report: TextGenerationBenchmarkReport - :return: The generated profile or None if no more profiles. - :rtype: Optional[Profile] - """ - logger.debug( - "Generating the next profile with mode: {}, current report: {}", - self.mode, - current_report, - ) - - if self.mode in ["constant", "poisson"]: - if not self.rates: - err = ValueError(f"Rates are required for {self.mode} mode") - logger.error(err) - raise err - - profile = self.create_fixed_rate_profile( - self.generated_count, - self.mode, - self.rates, - ) - elif self.mode == "synchronous": - profile = self.create_synchronous_profile(self.generated_count) - elif self.mode == "throughput": - profile = self.create_throughput_profile(self.generated_count) - elif self.mode == "sweep": - profile = self.create_sweep_profile( - self.generated_count, - sync_benchmark=( - current_report.benchmarks[0] if current_report.benchmarks else None - ), - throughput_benchmark=( - current_report.benchmarks[1] - if len(current_report.benchmarks) > 1 - else None - ), - ) - else: - err = ValueError(f"Invalid mode: {self.mode}") - logger.error(err) - raise err - - self._generated_count += 1 - logger.info( - "Generated profile: {}, total generated count: {}", - profile, - self._generated_count, - ) - return profile - - @staticmethod - def create_fixed_rate_profile( - index: int, mode: ProfileGenerationMode, rates: Sequence[float] - ) -> Optional[Profile]: - """ - Creates a profile with a fixed rate. - - :param index: The index of the rate in the list. - :type index: int - :param mode: The mode for profile generation (e.g., constant, poisson). - :type mode: ProfileGenerationMode - :param rates: The list of rates for load generation. - :type rates: Sequence[float] - :return: The generated profile or None if index is out of range. - :rtype: Optional[Profile] - """ - modes_map: Dict[str, LoadGenerationMode] = { - "constant": "constant", - "poisson": "poisson", - } - - if mode not in modes_map: - err = ValueError(f"Invalid mode: {mode}") - logger.error(err) - raise err - - profile = ( - Profile( - load_gen_mode=modes_map[mode], - load_gen_rate=rates[index], - ) - if index < len(rates) - else None - ) - logger.debug("Created fixed rate profile: {}", profile) - return profile - - @staticmethod - def create_synchronous_profile(index: int) -> Optional[Profile]: - """ - Creates a profile with synchronous mode. - - :param index: The index of the profile to create. - :type index: int - :return: The generated profile or None if index is out of range. - :rtype: Optional[Profile] - """ - profile = ( - Profile( - load_gen_mode="synchronous", - load_gen_rate=None, - ) - if index < 1 - else None - ) - logger.debug("Created synchronous profile: {}", profile) - return profile - - @staticmethod - def create_throughput_profile(index: int) -> Optional[Profile]: - """ - Creates a profile with throughput mode. - - :param index: The index of the profile to create. - :type index: int - :return: The generated profile or None if index is out of range. - :rtype: Optional[Profile] - """ - profile = ( - Profile( - load_gen_mode="throughput", - load_gen_rate=None, - ) - if index < 1 - else None - ) - logger.debug("Created throughput profile: {}", profile) - return profile - - @staticmethod - def create_sweep_profile( - index: int, - sync_benchmark: Optional[TextGenerationBenchmark], - throughput_benchmark: Optional[TextGenerationBenchmark], - ) -> Optional[Profile]: - """ - Creates a profile with sweep mode, generating profiles between - synchronous and throughput benchmarks. - - :param index: The index of the profile to create. - :type index: int - :param sync_benchmark: The synchronous report data. - :type sync_benchmark: Optional[TextGenerationBenchmark] - :param throughput_benchmark: The throughput report data. - :type throughput_benchmark: Optional[TextGenerationBenchmark] - :return: The generated profile or None if index is out of range. - :rtype: Optional[Profile] - """ - if index < 0 or index >= settings.num_sweep_profiles + 2: - return None - - if index == 0: - return ProfileGenerator.create_synchronous_profile(0) - - if not sync_benchmark: - err = ValueError("Synchronous report is required for sweep mode") - logger.error(err) - raise err - - if index == 1: - throughput_profile: Profile = ProfileGenerator.create_throughput_profile(0) # type: ignore # noqa: PGH003 - return throughput_profile - - if not throughput_benchmark: - err = ValueError("Throughput report is required for sweep mode") - logger.error(err) - raise err - - min_rate = sync_benchmark.completed_request_rate - max_rate = throughput_benchmark.completed_request_rate - intermediate_rates: List[NDArray] = list( - np.linspace(min_rate, max_rate, settings.num_sweep_profiles + 1) - )[1:] - - return Profile( - load_gen_mode="constant", - load_gen_rate=( - float(load_gen_rate) - if (load_gen_rate := intermediate_rates[index - 2]) - else 1.0 # the fallback value - ), - ) diff --git a/src/guidellm/main.py b/src/guidellm/main.py deleted file mode 100644 index e7363c6e..00000000 --- a/src/guidellm/main.py +++ /dev/null @@ -1,346 +0,0 @@ -import asyncio -from typing import Any, Literal, Mapping, Optional, Union, get_args - -import click -from loguru import logger -from transformers import AutoTokenizer # type: ignore[import-untyped] - -from guidellm.backend import Backend, BackendType -from guidellm.core import GuidanceReport, TextGenerationBenchmarkReport -from guidellm.executor import Executor, ProfileGenerationMode -from guidellm.request import ( - EmulatedRequestGenerator, - FileRequestGenerator, - TransformersDatasetRequestGenerator, -) -from guidellm.request.base import RequestGenerator -from guidellm.utils import BenchmarkReportProgress, cli_params - -__all__ = ["generate_benchmark_report"] - - -@click.command() -@click.option( - "--target", - type=str, - required=True, - help=( - "The target path or url for the backend to evaluate. " - "Ex: 'http://localhost:8000'" - ), -) -@click.option( - "--backend", - type=click.Choice(get_args(BackendType)), - default="openai_http", - help=( - "The backend to use for benchmarking. " - "The default is OpenAI Server enabling compatability with any server that " - "follows the OpenAI spec including vLLM." - ), -) -@click.option( - "--model", - type=str, - default=None, - help=( - "The Model to use for benchmarking. If not provided, it will use " - "the first available model provided the backend supports listing models." - ), -) -@click.option( - "--data", - type=str, - required=True, - help=( - "The data source to use for benchmarking. " - "Depending on the data-type, it should be a " - "path to a data file containing prompts to run (ex: data.txt), " - "a HuggingFace dataset name (ex: 'neuralmagic/LLM_compression_calibration'), " - "or a configuration for emulated data " - "(ex: 'prompt_tokens=128,generated_tokens=128')." - ), -) -@click.option( - "--data-type", - type=click.Choice(["emulated", "file", "transformers"]), - required=True, - help=( - "The type of data to use for benchmarking. " - "Use 'emulated' for synthetic data, 'file' for a file, or 'transformers' " - "for a HuggingFace dataset. Specify the data source with the --data flag." - ), -) -@click.option( - "--tokenizer", - type=str, - default=None, - help=( - "The tokenizer to use for calculating the number of prompt tokens. " - "This should match the tokenizer used by the model." - "By default, it will use the --model flag to determine the tokenizer. " - "If not provided and the model is not available, will raise an error. " - "Ex: 'neuralmagic/Meta-Llama-3.1-8B-quantized.w8a8'" - ), -) -@click.option( - "--rate-type", - type=click.Choice(get_args(ProfileGenerationMode)), - default="sweep", - help=( - "The type of request rate to use for benchmarking. " - "Use sweep to run a full range from synchronous to throughput (default), " - "synchronous for sending requests one after the other, " - "throughput to send requests as fast as possible, " - "constant for a fixed request rate, " - "or poisson for a real-world variable request rate." - ), -) -@click.option( - "--rate", - type=float, - default=None, - help=( - "The request rate to use for constant and poisson rate types. " - "To run multiple, provide the flag multiple times. " - ), - multiple=True, -) -@click.option( - "--max-seconds", - type=int, - default=120, - help=( - "The maximum number of seconds for each benchmark run. " - "Either max-seconds, max-requests, or both must be set. " - "The default is 120 seconds. " - "Note, this is the maximum time for each rate supplied, not the total time. " - "This value should be large enough to allow for " - "the server's performance to stabilize." - ), -) -@click.option( - "--max-requests", - type=cli_params.MAX_REQUESTS, - default=None, - help=( - "The maximum number of requests for each benchmark run. " - "Either max-seconds, max-requests, or both must be set. " - "Note, this is the maximum number of requests for each rate supplied, " - "not the total number of requests. " - "This value should be large enough to allow for " - "the server's performance to stabilize." - ), -) -@click.option( - "--output-path", - type=str, - default=None, - help=( - "The output path to save the output report to for loading later. " - "Ex: guidance_report.json. " - "The default is None, meaning no output is saved and results are only " - "printed to the console." - ), -) -@click.option( - "--enable-continuous-refresh", - is_flag=True, - default=False, - help=( - "Enable continual refreshing of the output table in the CLI " - "until the user exits. " - ), -) -def generate_benchmark_report_cli( - target: str, - backend: BackendType, - model: Optional[str], - data: Optional[str], - data_type: Literal["emulated", "file", "transformers"], - tokenizer: Optional[str], - rate_type: ProfileGenerationMode, - rate: Optional[float], - max_seconds: Optional[int], - max_requests: Union[Literal["dataset"], int, None], - output_path: str, - enable_continuous_refresh: bool, -): - """ - Generate a benchmark report for a specified backend and dataset. - """ - generate_benchmark_report( - target=target, - backend=backend, - model=model, - data=data, - data_type=data_type, - tokenizer=tokenizer, - rate_type=rate_type, - rate=rate, - max_seconds=max_seconds, - max_requests=max_requests, - output_path=output_path, - cont_refresh_table=enable_continuous_refresh, - ) - - -def generate_benchmark_report( - target: str, - data: Optional[str], - data_type: Literal["emulated", "file", "transformers"], - backend: BackendType = "openai_http", - backend_kwargs: Optional[Mapping[str, Any]] = None, - model: Optional[str] = None, - tokenizer: Optional[str] = None, - rate_type: ProfileGenerationMode = "sweep", - rate: Optional[float] = None, - max_seconds: Optional[int] = 120, - max_requests: Union[Literal["dataset"], int, None] = None, - output_path: Optional[str] = None, - cont_refresh_table: bool = False, -) -> GuidanceReport: - """ - Generate a benchmark report for a specified backend and dataset. - - :param target: The target URL or path for the backend to evaluate. - :param backend: The backend type to use for benchmarking. - :param model: The model to benchmark; - defaults to the first available if not specified. - :param data: The data source for benchmarking, - which may be a path, dataset name, or config. - :param data_type: The type of data to use, - such as 'emulated', 'file', or 'transformers'. - :param tokenizer: The tokenizer to use for token counting, - defaulting to Llama 3.1 if not provided. - :param rate_type: The rate type for requests during benchmarking. - :param rate: The specific request rate for constant and poisson rate types. - :param max_seconds: Maximum duration for each benchmark run in seconds. - :param max_requests: Maximum number of requests per benchmark run. - :param output_path: Path to save the output report file. - :param cont_refresh_table: Continually refresh the table in the CLI - until the user exits. - :param backend_kwargs: Additional keyword arguments for the backend. - """ - logger.info( - "Generating benchmark report with target: {}, backend: {}", target, backend - ) - - # Create backend - backend_inst = Backend.create( - type_=backend, - target=target, - model=model, - **(backend_kwargs or {}), - ) - backend_inst.validate() - - request_generator: RequestGenerator - - # Create tokenizer and request generator - tokenizer_inst = tokenizer - if not tokenizer_inst: - try: - tokenizer_inst = AutoTokenizer.from_pretrained(backend_inst.model) - except Exception as err: - raise ValueError( - "Could not load model's tokenizer, " - "--tokenizer must be provided for request generation" - ) from err - - if data_type == "emulated": - request_generator = EmulatedRequestGenerator( - config=data, tokenizer=tokenizer_inst - ) - elif data_type == "file": - request_generator = FileRequestGenerator(path=data, tokenizer=tokenizer_inst) - elif data_type == "transformers": - request_generator = TransformersDatasetRequestGenerator( - dataset=data, tokenizer=tokenizer_inst - ) - else: - raise ValueError(f"Unknown data type: {data_type}") - - if data_type == "emulated" and max_requests == "dataset": - raise ValueError("Cannot use 'dataset' for emulated data") - - # Create executor - executor = Executor( - backend=backend_inst, - request_generator=request_generator, - mode=rate_type, - rate=rate if rate_type in ("constant", "poisson") else None, - max_number=( - len(request_generator) if max_requests == "dataset" else max_requests - ), - max_duration=max_seconds, - ) - - # Run executor - logger.debug( - "Running executor with args: {}", - { - "backend": backend, - "request_generator": request_generator, - "mode": rate_type, - "rate": rate, - "max_number": max_requests, - "max_duration": max_seconds, - }, - ) - report = asyncio.run(_run_executor_for_result(executor)) - - # Save and print report - guidance_report = GuidanceReport() - guidance_report.benchmarks.append(report) - - if output_path: - guidance_report.save_file(output_path) - - guidance_report.print( - save_path=output_path if output_path is not None else "stdout", - continual_refresh=cont_refresh_table, - ) - - return guidance_report - - -async def _run_executor_for_result(executor: Executor) -> TextGenerationBenchmarkReport: - report = None - progress = BenchmarkReportProgress() - started = False - - async for result in executor.run(): - if not started: - progress.start(result.generation_modes) # type: ignore # noqa: PGH003 - started = True - - if result.current_index is not None: - description = f"{result.current_profile.load_gen_mode}" # type: ignore # noqa: PGH003 - if result.current_profile.load_gen_mode in ("constant", "poisson"): # type: ignore # noqa: PGH003 - description += f"@{result.current_profile.load_gen_rate:.2f} req/s" # type: ignore # noqa: PGH003 - - progress.update_benchmark( - index=result.current_index, - description=description, - completed=result.scheduler_result.completed, # type: ignore # noqa: PGH003 - completed_count=result.scheduler_result.count_completed, # type: ignore # noqa: PGH003 - completed_total=result.scheduler_result.count_total, # type: ignore # noqa: PGH003 - start_time=result.scheduler_result.benchmark.start_time, # type: ignore # noqa: PGH003 - req_per_sec=result.scheduler_result.benchmark.completed_request_rate, # type: ignore # noqa: PGH003 - ) - - if result.completed: - report = result.report - break - - progress.finish() - - if not report: - raise ValueError("No report generated by executor") - - return report - - -if __name__ == "__main__": - generate_benchmark_report_cli() diff --git a/src/guidellm/objects/__init__.py b/src/guidellm/objects/__init__.py new file mode 100644 index 00000000..168570dd --- /dev/null +++ b/src/guidellm/objects/__init__.py @@ -0,0 +1,18 @@ +from .pydantic import StandardBaseModel, StatusBreakdown +from .statistics import ( + DistributionSummary, + Percentiles, + RunningStats, + StatusDistributionSummary, + TimeRunningStats, +) + +__all__ = [ + "StandardBaseModel", + "StatusBreakdown", + "DistributionSummary", + "Percentiles", + "RunningStats", + "StatusDistributionSummary", + "TimeRunningStats", +] diff --git a/src/guidellm/objects/pydantic.py b/src/guidellm/objects/pydantic.py new file mode 100644 index 00000000..b6e998fa --- /dev/null +++ b/src/guidellm/objects/pydantic.py @@ -0,0 +1,60 @@ +from typing import Any, Generic, TypeVar + +from loguru import logger +from pydantic import BaseModel, ConfigDict, Field + +__all__ = ["StandardBaseModel", "StatusBreakdown"] + + +class StandardBaseModel(BaseModel): + """ + A base class for Pydantic models throughout GuideLLM enabling standard + configuration and logging. + """ + + model_config = ConfigDict( + extra="allow", + use_enum_values=True, + validate_assignment=True, + from_attributes=True, + ) + + def __init__(self, /, **data: Any) -> None: + super().__init__(**data) + logger.debug( + "Initialized new instance of {} with data: {}", + self.__class__.__name__, + data, + ) + + +SuccessfulT = TypeVar("SuccessfulT") +ErroredT = TypeVar("ErroredT") +IncompleteT = TypeVar("IncompleteT") +TotalT = TypeVar("TotalT") + + +class StatusBreakdown(BaseModel, Generic[SuccessfulT, ErroredT, IncompleteT, TotalT]): + """ + A base class for Pydantic models that are separated by statuses including + successful, incomplete, and errored. It additionally enables the inclusion + of total, which is intended as the combination of all statuses. + Total may or may not be used depending on if it duplicates information. + """ + + successful: SuccessfulT = Field( + description="The results with a successful status.", + default=None, # type: ignore[assignment] + ) + errored: ErroredT = Field( + description="The results with an errored status.", + default=None, # type: ignore[assignment] + ) + incomplete: IncompleteT = Field( + description="The results with an incomplete status.", + default=None, # type: ignore[assignment] + ) + total: TotalT = Field( + description="The combination of all statuses.", + default=None, # type: ignore[assignment] + ) diff --git a/src/guidellm/objects/statistics.py b/src/guidellm/objects/statistics.py new file mode 100644 index 00000000..0e43cdbd --- /dev/null +++ b/src/guidellm/objects/statistics.py @@ -0,0 +1,947 @@ +import math +import time as timer +from collections import defaultdict +from typing import Any, Dict, List, Literal, Optional, Tuple + +import numpy as np +from pydantic import Field, computed_field + +from guidellm.objects.pydantic import StandardBaseModel, StatusBreakdown + +__all__ = [ + "Percentiles", + "DistributionSummary", + "StatusDistributionSummary", + "RunningStats", + "TimeRunningStats", +] + + +class Percentiles(StandardBaseModel): + """ + A pydantic model representing the standard percentiles of a distribution. + """ + + p001: float = Field( + description="The 0.1th percentile of the distribution.", + ) + p01: float = Field( + description="The 1st percentile of the distribution.", + ) + p05: float = Field( + description="The 5th percentile of the distribution.", + ) + p10: float = Field( + description="The 10th percentile of the distribution.", + ) + p25: float = Field( + description="The 25th percentile of the distribution.", + ) + p75: float = Field( + description="The 75th percentile of the distribution.", + ) + p90: float = Field( + description="The 90th percentile of the distribution.", + ) + p95: float = Field( + description="The 95th percentile of the distribution.", + ) + p99: float = Field( + description="The 99th percentile of the distribution.", + ) + p999: float = Field( + description="The 99.9th percentile of the distribution.", + ) + + +class DistributionSummary(StandardBaseModel): + """ + A pydantic model representing a statistical summary for a given + distribution of numerical values. + """ + + mean: float = Field( + description="The mean/average of the distribution.", + ) + median: float = Field( + description="The median of the distribution.", + ) + mode: float = Field( + description="The mode of the distribution.", + ) + variance: float = Field( + description="The variance of the distribution.", + ) + std_dev: float = Field( + description="The standard deviation of the distribution.", + ) + min: float = Field( + description="The minimum value of the distribution.", + ) + max: float = Field( + description="The maximum value of the distribution.", + ) + count: int = Field( + description="The number of values in the distribution.", + ) + total_sum: float = Field( + description="The total sum of the values in the distribution.", + ) + percentiles: Percentiles = Field( + description="The percentiles of the distribution.", + ) + cumulative_distribution_function: Optional[List[Tuple[float, float]]] = Field( + description="The cumulative distribution function (CDF) of the distribution.", + default=None, + ) + + @staticmethod + def from_distribution_function( + distribution: List[Tuple[float, float]], + include_cdf: bool = False, + ) -> "DistributionSummary": + """ + Create a statistical summary for a given distribution of weighted numerical + values or a probability distribution function (PDF). + 1. If the distribution is a PDF, it is expected to be a list of tuples + where each tuple contains (value, probability). The sum of the + probabilities should be 1. If it is not, it will be normalized. + 2. If the distribution is a values distribution function, it is expected + to be a list of tuples where each tuple contains (value, weight). + The weights are normalized to a probability distribution function. + + :param distribution: A list of tuples representing the distribution. + Each tuple contains (value, weight) or (value, probability). + :param include_cdf: Whether to include the calculated cumulative distribution + function (CDF) in the output DistributionSummary. + :return: An instance of DistributionSummary with calculated values. + """ + values, weights = zip(*distribution) if distribution else ([], []) + values = np.array(values) # type: ignore[assignment] + weights = np.array(weights) # type: ignore[assignment] + + # create the PDF + probabilities = weights / np.sum(weights) # type: ignore[operator] + pdf = np.column_stack((values, probabilities)) + pdf = pdf[np.argsort(pdf[:, 0])] + values = pdf[:, 0] # type: ignore[assignment] + probabilities = pdf[:, 1] + + # calculate the CDF + cumulative_probabilities = np.cumsum(probabilities) + cdf = np.column_stack((values, cumulative_probabilities)) + + # calculate statistics + mean = np.sum(values * probabilities).item() # type: ignore[attr-defined] + median = cdf[np.argmax(cdf[:, 1] >= 0.5), 0].item() if len(cdf) > 0 else 0 # noqa: PLR2004 + mode = values[np.argmax(probabilities)].item() if len(values) > 0 else 0 # type: ignore[call-overload] + variance = np.sum((values - mean) ** 2 * probabilities).item() # type: ignore[attr-defined] + std_dev = math.sqrt(variance) + minimum = values[0].item() if len(values) > 0 else 0 + maximum = values[-1].item() if len(values) > 0 else 0 + count = len(values) + total_sum = np.sum(values).item() # type: ignore[attr-defined] + + return DistributionSummary( + mean=mean, + median=median, + mode=mode, + variance=variance, + std_dev=std_dev, + min=minimum, + max=maximum, + count=count, + total_sum=total_sum, + percentiles=( + Percentiles( + p001=cdf[np.argmax(cdf[:, 1] >= 0.001), 0].item(), # noqa: PLR2004 + p01=cdf[np.argmax(cdf[:, 1] >= 0.01), 0].item(), # noqa: PLR2004 + p05=cdf[np.argmax(cdf[:, 1] >= 0.05), 0].item(), # noqa: PLR2004 + p10=cdf[np.argmax(cdf[:, 1] >= 0.1), 0].item(), # noqa: PLR2004 + p25=cdf[np.argmax(cdf[:, 1] >= 0.25), 0].item(), # noqa: PLR2004 + p75=cdf[np.argmax(cdf[:, 1] >= 0.75), 0].item(), # noqa: PLR2004 + p90=cdf[np.argmax(cdf[:, 1] >= 0.9), 0].item(), # noqa: PLR2004 + p95=cdf[np.argmax(cdf[:, 1] >= 0.95), 0].item(), # noqa: PLR2004 + p99=cdf[np.argmax(cdf[:, 1] >= 0.99), 0].item(), # noqa: PLR2004 + p999=cdf[np.argmax(cdf[:, 1] >= 0.999), 0].item(), # noqa: PLR2004 + ) + if len(cdf) > 0 + else Percentiles( + p001=0, + p01=0, + p05=0, + p10=0, + p25=0, + p75=0, + p90=0, + p95=0, + p99=0, + p999=0, + ) + ), + cumulative_distribution_function=cdf.tolist() if include_cdf else None, + ) + + @staticmethod + def from_values( + values: List[float], + weights: Optional[List[float]] = None, + include_cdf: bool = False, + ) -> "DistributionSummary": + """ + Create a statistical summary for a given distribution of numerical values. + This is a wrapper around from_distribution_function to handle the optional case + of including weights for the values. If weights are not provided, they are + automatically set to 1.0 for each value, so each value is equally weighted. + + :param values: A list of numerical values representing the distribution. + :param weights: A list of weights for each value in the distribution. + If not provided, all values are equally weighted. + :param include_cdf: Whether to include the calculated cumulative distribution + function (CDF) in the output DistributionSummary. + """ + if weights is None: + weights = [1.0] * len(values) + + if len(values) != len(weights): + raise ValueError( + "The length of values and weights must be the same.", + ) + + return DistributionSummary.from_distribution_function( + distribution=list(zip(values, weights)), + include_cdf=include_cdf, + ) + + @staticmethod + def from_request_times( + requests: List[Tuple[float, float]], + distribution_type: Literal["concurrency", "rate"], + include_cdf: bool = False, + epsilon: float = 1e-6, + ) -> "DistributionSummary": + """ + Create a statistical summary for a given distribution of request times. + Specifically, this is used to measure concurrency or rate of requests + given an input list containing the start and end time of each request. + This will first convert the request times into a distribution function + and then calculate the statistics with from_distribution_function. + + :param requests: A list of tuples representing the start and end times of + each request. Example: [(start_1, end_1), (start_2, end_2), ...] + :param distribution_type: The type of distribution to calculate. + Either "concurrency" or "rate". + :param include_cdf: Whether to include the calculated cumulative distribution + function (CDF) in the output DistributionSummary. + :param epsilon: The epsilon value for merging close events. + :return: An instance of DistributionSummary with calculated values. + """ + if distribution_type == "concurrency": + # convert to delta changes based on when requests were running + time_deltas: Dict[float, int] = defaultdict(int) + for start, end in requests: + time_deltas[start] += 1 + time_deltas[end] -= 1 + + # convert to the events over time measuring concurrency changes + events = [] + active = 0 + + for time, delta in sorted(time_deltas.items()): + active += delta + events.append((time, active)) + elif distribution_type == "rate": + # convert to events for when requests finished + global_start = min(start for start, _ in requests) if requests else 0 + events = [(global_start, 1)] + [(end, 1) for _, end in requests] + else: + raise ValueError( + f"Invalid distribution_type '{distribution_type}'. " + "Must be 'concurrency' or 'rate'." + ) + + # combine any events that are very close together + flattened_events: List[Tuple[float, float]] = [] + for time, val in sorted(events): + last_time, last_val = ( + flattened_events[-1] if flattened_events else (None, None) + ) + + if ( + last_time is not None + and last_val is not None + and abs(last_time - time) <= epsilon + ): + flattened_events[-1] = (last_time, last_val + val) + else: + flattened_events.append((time, val)) + + # convert to value distribution function + distribution: Dict[float, float] = defaultdict(float) + + for ind in range(len(flattened_events) - 1): + start_time, value = flattened_events[ind] + end_time, _ = flattened_events[ind + 1] + duration = end_time - start_time + + if distribution_type == "concurrency": + # weight the concurrency value by the duration + distribution[value] += duration + elif distribution_type == "rate": + # weight the rate value by the duration + rate = value / duration + distribution[rate] += duration + + distribution_list: List[Tuple[float, float]] = sorted(distribution.items()) + + return DistributionSummary.from_distribution_function( + distribution=distribution_list, + include_cdf=include_cdf, + ) + + @staticmethod + def from_iterable_request_times( + requests: List[Tuple[float, float]], + first_iter_times: List[float], + iter_counts: List[int], + first_iter_counts: Optional[List[int]] = None, + include_cdf: bool = False, + epsilon: float = 1e-6, + ) -> "DistributionSummary": + """ + Create a statistical summary for a given distribution of request times + for a request with iterable responses between the start and end. + For example, this is used to measure auto regressive requests where + a request is started and at some later point, iterative responses are + received. This will convert the request times and iterable values into + a distribution function and then calculate the statistics with + from_distribution_function. + + :param requests: A list of tuples representing the start and end times of + each request. Example: [(start_1, end_1), (start_2, end_2), ...] + :param first_iter_times: A list of times when the first iteration of + each request was received. Must be the same length as requests. + :param iter_counts: A list of the total number of iterations for each + request that occurred starting at the first iteration and ending + at the request end time. Must be the same length as requests. + :param first_iter_counts: A list of the number of iterations to log + for the first iteration of each request. For example, when calculating + total number of tokens processed, this is set to the prompt tokens number. + If not provided, defaults to 1 for each request. + :param include_cdf: Whether to include the calculated cumulative distribution + function (CDF) in the output DistributionSummary. + :param epsilon: The epsilon value for merging close events. + :return: An instance of DistributionSummary with calculated values. + """ + + if first_iter_counts is None: + first_iter_counts = [1] * len(requests) + + if ( + len(requests) != len(first_iter_times) + or len(requests) != len(iter_counts) + or len(requests) != len(first_iter_counts) + ): + raise ValueError( + "requests, first_iter_times, iter_counts, and first_iter_counts must" + "be the same length." + f"Given {len(requests)}, {len(first_iter_times)}, {len(iter_counts)}, " + f"{len(first_iter_counts)}", + ) + + # first break up the requests into individual iterable events + events = defaultdict(int) + global_start = min(start for start, _ in requests) if requests else 0 + global_end = max(end for _, end in requests) if requests else 0 + events[global_start] = 0 + events[global_end] = 0 + + for (_, end), first_iter, first_iter_count, total_count in zip( + requests, first_iter_times, first_iter_counts, iter_counts + ): + events[first_iter] += first_iter_count + + if total_count > 1: + iter_latency = (end - first_iter) / (total_count - 1) + for ind in range(1, total_count): + events[first_iter + ind * iter_latency] += 1 + + # combine any events that are very close together + flattened_events: List[Tuple[float, int]] = [] + + for time, count in sorted(events.items()): + last_time, last_count = ( + flattened_events[-1] if flattened_events else (None, None) + ) + + if ( + last_time is not None + and last_count is not None + and abs(last_time - time) <= epsilon + ): + flattened_events[-1] = (last_time, last_count + count) + else: + flattened_events.append((time, count)) + + # convert to value distribution function + distribution: Dict[float, float] = defaultdict(float) + + for ind in range(len(flattened_events) - 1): + start_time, count = flattened_events[ind] + end_time, _ = flattened_events[ind + 1] + duration = end_time - start_time + rate = count / duration + distribution[rate] += duration + + distribution_list = sorted(distribution.items()) + + return DistributionSummary.from_distribution_function( + distribution=distribution_list, + include_cdf=include_cdf, + ) + + +class StatusDistributionSummary( + StatusBreakdown[ + DistributionSummary, + DistributionSummary, + DistributionSummary, + DistributionSummary, + ] +): + """ + A pydantic model representing a statistical summary for a given + distribution of numerical values grouped by status. + Specifically used to represent the total, successful, incomplete, + and errored values for a benchmark or other statistical summary. + """ + + @staticmethod + def from_values( + value_types: List[Literal["successful", "incomplete", "error"]], + values: List[float], + weights: Optional[List[float]] = None, + include_cdf: bool = False, + ) -> "StatusDistributionSummary": + """ + Create a statistical summary by status for a given distribution of numerical + values. This is used to measure the distribution of values for different + statuses (e.g., successful, incomplete, error) and calculate the statistics + for each status. Weights are optional to weight the probability distribution + for each value by. If not provided, all values are equally weighted. + + :param value_types: A list of status types for each value in the distribution. + Must be one of 'successful', 'incomplete', or 'error'. + :param values: A list of numerical values representing the distribution. + Must be the same length as value_types. + :param weights: A list of weights for each value in the distribution. + If not provided, all values are equally weighted (set to 1). + Must be the same length as value_types. + :param include_cdf: Whether to include the calculated cumulative distribution + function (CDF) in the output StatusDistributionSummary. + :return: An instance of StatusDistributionSummary with calculated values. + """ + if any( + type_ not in {"successful", "incomplete", "error"} for type_ in value_types + ): + raise ValueError( + "value_types must be one of 'successful', 'incomplete', or 'error'. " + f"Got {value_types} instead.", + ) + + if weights is None: + weights = [1.0] * len(values) + + if len(value_types) != len(values) or len(value_types) != len(weights): + raise ValueError( + "The length of value_types, values, and weights must be the same.", + ) + + _, successful_values, successful_weights = ( + zip(*successful) + if ( + successful := list( + filter( + lambda val: val[0] == "successful", + zip(value_types, values, weights), + ) + ) + ) + else ([], [], []) + ) + _, incomplete_values, incomplete_weights = ( + zip(*incomplete) + if ( + incomplete := list( + filter( + lambda val: val[0] == "incomplete", + zip(value_types, values, weights), + ) + ) + ) + else ([], [], []) + ) + _, errored_values, errored_weights = ( + zip(*errored) + if ( + errored := list( + filter( + lambda val: val[0] == "error", + zip(value_types, values, weights), + ) + ) + ) + else ([], [], []) + ) + + return StatusDistributionSummary( + total=DistributionSummary.from_values( + values, + weights, + include_cdf=include_cdf, + ), + successful=DistributionSummary.from_values( + successful_values, # type: ignore[arg-type] + successful_weights, # type: ignore[arg-type] + include_cdf=include_cdf, + ), + incomplete=DistributionSummary.from_values( + incomplete_values, # type: ignore[arg-type] + incomplete_weights, # type: ignore[arg-type] + include_cdf=include_cdf, + ), + errored=DistributionSummary.from_values( + errored_values, # type: ignore[arg-type] + errored_weights, # type: ignore[arg-type] + include_cdf=include_cdf, + ), + ) + + @staticmethod + def from_request_times( + request_types: List[Literal["successful", "incomplete", "error"]], + requests: List[Tuple[float, float]], + distribution_type: Literal["concurrency", "rate"], + include_cdf: bool = False, + epsilon: float = 1e-6, + ) -> "StatusDistributionSummary": + """ + Create a statistical summary by status for given distribution of request times. + This is used to measure the distribution of request times for different statuses + (e.g., successful, incomplete, error) for concurrency and rates. + This will call into DistributionSummary.from_request_times to calculate + the statistics for each status. + + :param request_types: List of status types for each request in the distribution. + Must be one of 'successful', 'incomplete', or 'error'. + :param requests: A list of tuples representing the start and end times of + each request. Example: [(start_1, end_1), (start_2, end_2), ...]. + Must be the same length as request_types. + :param distribution_type: The type of distribution to calculate. + Either "concurrency" or "rate". + :param include_cdf: Whether to include the calculated cumulative distribution + function (CDF) in the output StatusDistributionSummary. + :param epsilon: The epsilon value for merging close events. + :return: An instance of StatusDistributionSummary with calculated values. + """ + if distribution_type not in {"concurrency", "rate"}: + raise ValueError( + f"Invalid distribution_type '{distribution_type}'. " + "Must be 'concurrency' or 'rate'." + ) + + if any( + type_ not in {"successful", "incomplete", "error"} + for type_ in request_types + ): + raise ValueError( + "request_types must be one of 'successful', 'incomplete', or 'error'. " + f"Got {request_types} instead.", + ) + + if len(request_types) != len(requests): + raise ValueError( + "The length of request_types and requests must be the same. " + f"Got {len(request_types)} and {len(requests)} instead.", + ) + + _, successful_requests = ( + zip(*successful) + if ( + successful := list( + filter( + lambda val: val[0] == "successful", + zip(request_types, requests), + ) + ) + ) + else ([], []) + ) + _, incomplete_requests = ( + zip(*incomplete) + if ( + incomplete := list( + filter( + lambda val: val[0] == "incomplete", + zip(request_types, requests), + ) + ) + ) + else ([], []) + ) + _, errored_requests = ( + zip(*errored) + if ( + errored := list( + filter( + lambda val: val[0] == "error", + zip(request_types, requests), + ) + ) + ) + else ([], []) + ) + + return StatusDistributionSummary( + total=DistributionSummary.from_request_times( + requests, + distribution_type=distribution_type, + include_cdf=include_cdf, + epsilon=epsilon, + ), + successful=DistributionSummary.from_request_times( + successful_requests, # type: ignore[arg-type] + distribution_type=distribution_type, + include_cdf=include_cdf, + epsilon=epsilon, + ), + incomplete=DistributionSummary.from_request_times( + incomplete_requests, # type: ignore[arg-type] + distribution_type=distribution_type, + include_cdf=include_cdf, + epsilon=epsilon, + ), + errored=DistributionSummary.from_request_times( + errored_requests, # type: ignore[arg-type] + distribution_type=distribution_type, + include_cdf=include_cdf, + epsilon=epsilon, + ), + ) + + @staticmethod + def from_iterable_request_times( + request_types: List[Literal["successful", "incomplete", "error"]], + requests: List[Tuple[float, float]], + first_iter_times: List[float], + iter_counts: Optional[List[int]] = None, + first_iter_counts: Optional[List[int]] = None, + include_cdf: bool = False, + epsilon: float = 1e-6, + ) -> "StatusDistributionSummary": + """ + Create a statistical summary by status for given distribution of request times + for a request with iterable responses between the start and end. + For example, this is used to measure auto regressive requests where + a request is started and at some later point, iterative responses are + received. This will call into DistributionSummary.from_iterable_request_times + to calculate the statistics for each status. + + :param request_types: List of status types for each request in the distribution. + Must be one of 'successful', 'incomplete', or 'error'. + :param requests: A list of tuples representing the start and end times of + each request. Example: [(start_1, end_1), (start_2, end_2), ...]. + Must be the same length as request_types. + :param first_iter_times: A list of times when the first iteration of + each request was received. Must be the same length as requests. + :param iter_counts: A list of the total number of iterations for each + request that occurred starting at the first iteration and ending + at the request end time. Must be the same length as requests. + If not provided, defaults to 1 for each request. + :param first_iter_counts: A list of the number of iterations to log + for the first iteration of each request. For example, when calculating + total number of tokens processed, this is set to the prompt tokens number. + If not provided, defaults to 1 for each request. + :param include_cdf: Whether to include the calculated cumulative distribution + function (CDF) in the output StatusDistributionSummary. + :param epsilon: The epsilon value for merging close events. + :return: An instance of StatusDistributionSummary with calculated values. + """ + if any( + type_ not in {"successful", "incomplete", "error"} + for type_ in request_types + ): + raise ValueError( + "request_types must be one of 'successful', 'incomplete', or 'error'. " + f"Got {request_types} instead.", + ) + + if iter_counts is None: + iter_counts = [1] * len(requests) + + if first_iter_counts is None: + first_iter_counts = [1] * len(requests) + + if ( + len(request_types) != len(requests) + or len(requests) != len(first_iter_times) + or len(requests) != len(iter_counts) + or len(requests) != len(first_iter_counts) + ): + raise ValueError( + "request_types, requests, first_iter_times, iter_counts, and " + "first_iter_counts must be the same length." + f"Given {len(request_types)}, {len(requests)}, " + f"{len(first_iter_times)}, {len(iter_counts)}, " + f"{len(first_iter_counts)}", + ) + + ( + _, + successful_requests, + successful_first_iter_times, + successful_iter_counts, + successful_first_iter_counts, + ) = ( + zip(*successful) + if ( + successful := list( + filter( + lambda val: val[0] == "successful", + zip( + request_types, + requests, + first_iter_times, + iter_counts, + first_iter_counts, + ), + ) + ) + ) + else ([], [], [], [], []) + ) + ( + _, + incomplete_requests, + incomplete_first_iter_times, + incomplete_iter_counts, + incomplete_first_iter_counts, + ) = ( + zip(*incomplete) + if ( + incomplete := list( + filter( + lambda val: val[0] == "incomplete", + zip( + request_types, + requests, + first_iter_times, + iter_counts, + first_iter_counts, + ), + ) + ) + ) + else ([], [], [], [], []) + ) + ( + _, + errored_requests, + errored_first_iter_times, + errored_iter_counts, + errored_first_iter_counts, + ) = ( + zip(*errored) + if ( + errored := list( + filter( + lambda val: val[0] == "error", + zip( + request_types, + requests, + first_iter_times, + iter_counts, + first_iter_counts, + ), + ) + ) + ) + else ([], [], [], [], []) + ) + + return StatusDistributionSummary( + total=DistributionSummary.from_iterable_request_times( + requests, + first_iter_times, + iter_counts, + first_iter_counts, + include_cdf=include_cdf, + epsilon=epsilon, + ), + successful=DistributionSummary.from_iterable_request_times( + successful_requests, # type: ignore[arg-type] + successful_first_iter_times, # type: ignore[arg-type] + successful_iter_counts, # type: ignore[arg-type] + successful_first_iter_counts, # type: ignore[arg-type] + include_cdf=include_cdf, + epsilon=epsilon, + ), + incomplete=DistributionSummary.from_iterable_request_times( + incomplete_requests, # type: ignore[arg-type] + incomplete_first_iter_times, # type: ignore[arg-type] + incomplete_iter_counts, # type: ignore[arg-type] + incomplete_first_iter_counts, # type: ignore[arg-type] + include_cdf=include_cdf, + epsilon=epsilon, + ), + errored=DistributionSummary.from_iterable_request_times( + errored_requests, # type: ignore[arg-type] + errored_first_iter_times, # type: ignore[arg-type] + errored_iter_counts, # type: ignore[arg-type] + errored_first_iter_counts, # type: ignore[arg-type] + include_cdf=include_cdf, + epsilon=epsilon, + ), + ) + + +class RunningStats(StandardBaseModel): + """ + Create a running statistics object to track the mean, rate, and other + statistics of a stream of values. + 1. The start time is set to the time the object is created. + 2. The count is set to 0. + 3. The total is set to 0. + 4. The last value is set to 0. + 5. The mean is calculated as the total / count. + """ + + start_time: float = Field( + default_factory=timer.time, + description=( + "The time the running statistics object was created. " + "This is used to calculate the rate of the statistics." + ), + ) + count: int = Field( + default=0, + description="The number of values added to the running statistics.", + ) + total: float = Field( + default=0.0, + description="The total sum of the values added to the running statistics.", + ) + last: float = Field( + default=0.0, + description="The last value added to the running statistics.", + ) + + @computed_field # type: ignore[misc] + @property + def mean(self) -> float: + """ + :return: The mean of the running statistics (total / count). + If count is 0, return 0.0. + """ + if self.count == 0: + return 0.0 + return self.total / self.count + + @computed_field # type: ignore[misc] + @property + def rate(self) -> float: + """ + :return: The rate of the running statistics + (total / (time.time() - start_time)). + If count is 0, return 0.0. + """ + if self.count == 0: + return 0.0 + return self.total / (timer.time() - self.start_time) + + def __add__(self, value: Any) -> float: + """ + Enable the use of the + operator to add a value to the running statistics. + + :param value: The value to add to the running statistics. + :return: The mean of the running statistics. + """ + if not isinstance(value, (int, float)): + raise ValueError( + f"Value must be an int or float, got {type(value)} instead.", + ) + + self.update(value) + + return self.mean + + def __iadd__(self, value: Any) -> "RunningStats": + """ + Enable the use of the += operator to add a value to the running statistics. + + :param value: The value to add to the running statistics. + :return: The running statistics object. + """ + if not isinstance(value, (int, float)): + raise ValueError( + f"Value must be an int or float, got {type(value)} instead.", + ) + + self.update(value) + + return self + + def update(self, value: float, count: int = 1) -> None: + """ + Update the running statistics with a new value. + + :param value: The new value to add to the running statistics. + :param count: The number of times to 'count' for the value. + If not provided, defaults to 1. + """ + self.count += count + self.total += value + self.last = value + + +class TimeRunningStats(RunningStats): + """ + Create a running statistics object to track the mean, rate, and other + statistics of a stream of time values. This is used to track time values + in milliseconds and seconds. + + Adds time specific computed_fields such as measurements in milliseconds and seconds. + """ + + @computed_field # type: ignore[misc] + @property + def total_ms(self) -> float: + """ + :return: The total time multiplied by 1000.0 to convert to milliseconds. + """ + return self.total * 1000.0 + + @computed_field # type: ignore[misc] + @property + def last_ms(self) -> float: + """ + :return: The last time multiplied by 1000.0 to convert to milliseconds. + """ + return self.last * 1000.0 + + @computed_field # type: ignore[misc] + @property + def mean_ms(self) -> float: + """ + :return: The mean time multiplied by 1000.0 to convert to milliseconds. + """ + return self.mean * 1000.0 + + @computed_field # type: ignore[misc] + @property + def rate_ms(self) -> float: + """ + :return: The rate of the running statistics multiplied by 1000.0 + to convert to milliseconds. + """ + return self.rate * 1000.0 diff --git a/src/guidellm/request/__init__.py b/src/guidellm/request/__init__.py index 4feca91c..bdd87389 100644 --- a/src/guidellm/request/__init__.py +++ b/src/guidellm/request/__init__.py @@ -1,13 +1,15 @@ -from .base import GenerationMode, RequestGenerator -from .emulated import EmulatedConfig, EmulatedRequestGenerator -from .file import FileRequestGenerator -from .transformers import TransformersDatasetRequestGenerator +from .loader import ( + GenerativeRequestLoader, + GenerativeRequestLoaderDescription, + RequestLoader, + RequestLoaderDescription, +) +from .request import GenerationRequest __all__ = [ - "EmulatedConfig", - "EmulatedRequestGenerator", - "FileRequestGenerator", - "GenerationMode", - "RequestGenerator", - "TransformersDatasetRequestGenerator", + "RequestLoader", + "RequestLoaderDescription", + "GenerativeRequestLoaderDescription", + "GenerativeRequestLoader", + "GenerationRequest", ] diff --git a/src/guidellm/request/base.py b/src/guidellm/request/base.py deleted file mode 100644 index 9fd303e6..00000000 --- a/src/guidellm/request/base.py +++ /dev/null @@ -1,200 +0,0 @@ -import contextlib -import threading -import time -from abc import ABC, abstractmethod -from queue import Empty, Full, Queue -from typing import Iterator, Literal, Union - -from loguru import logger -from transformers import ( # type: ignore # noqa: PGH003 - AutoTokenizer, - PreTrainedTokenizer, -) - -from guidellm.core.request import TextGenerationRequest - -__all__ = ["GenerationMode", "RequestGenerator"] - - -GenerationMode = Literal["async", "sync"] - - -class RequestGenerator(ABC): - """ - A base class for request generators that generate result requests. - - :param type_: The type of the request generator. - :type type_: str - :param source: The data source for the request generator. - :type source: str - :param tokenizer: The tokenizer instance or the name/config to use - for tokenizing prompts. - :type tokenizer: Union[str, PreTrainedTokenizer] - :param mode: The generation mode, either 'async' or 'sync'. - :type mode: GenerationMode - :param async_queue_size: The size of the request queue. - :type async_queue_size: int - """ - - def __init__( - self, - type_: str, - source: str, - tokenizer: Union[str, PreTrainedTokenizer], - mode: GenerationMode = "async", - async_queue_size: int = 50, - ): - self._type = type_ - self._source = source - self._async_queue_size: int = async_queue_size - self._mode: str = mode - self._queue: Queue = Queue(maxsize=async_queue_size) - self._stop_event: threading.Event = threading.Event() - - if not tokenizer: - err = "Tokenizer must be provided for request generation" - logger.error(err) - raise ValueError(err) - - self._tokenizer = ( - AutoTokenizer.from_pretrained(tokenizer) - if isinstance(tokenizer, str) - else tokenizer - ) - logger.info("Tokenizer initialized for request generation: {}", self._tokenizer) - - if self._mode == "async": - self._thread = threading.Thread(target=self._populate_queue, daemon=True) - self._thread.start() - logger.info( - "RequestGenerator started in async mode with queue size: {}", - self._async_queue_size, - ) - - def __repr__(self) -> str: - """ - Return a string representation of the RequestGenerator. - - :return: String representation of the RequestGenerator. - :rtype: str - """ - return ( - f"RequestGenerator(" - f"mode={self._mode}, " - f"async_queue_size={self._async_queue_size}, " - f"tokenizer={self._tokenizer})" - ) - - def __iter__(self) -> Iterator[TextGenerationRequest]: - """ - Provide an iterator interface to generate new requests. - - :return: An iterator over result requests. - :rtype: Iterator[TextGenerationRequest] - """ - if self.mode == "async": - while not self._stop_event.is_set(): - try: - item = self._queue.get_nowait() - self._queue.task_done() - yield item - except Empty: - time.sleep(0.01) - continue - else: - while not self._stop_event.is_set(): - yield self.create_item() - - @abstractmethod - def __len__(self) -> int: - """ - Abstract method to get the length of the collection to be generated. - """ - - @abstractmethod - def create_item(self) -> TextGenerationRequest: - """ - Abstract method to create a new result request item. - - :return: A new result request. - :rtype: TextGenerationRequest - """ - - @property - def type_(self) -> str: - """ - Get the type of the request generator. - - :return: The type of the request generator. - :rtype: str - """ - return self._type - - @property - def source(self) -> str: - """ - Get the data source for the request generator. - - :return: The data source. - :rtype: str - """ - return self._source - - @property - def tokenizer(self) -> PreTrainedTokenizer: - """ - Get the tokenizer instance. - - :return: The tokenizer instance. - :rtype: PreTrainedTokenizer - """ - return self._tokenizer - - @property - def mode(self) -> str: - """ - Get the generation mode. - - :return: The generation mode. - :rtype: str - """ - return self._mode - - @property - def async_queue_size(self) -> int: - """ - Get the size of the request queue. - - :return: The size of the request queue. - :rtype: int - """ - return self._async_queue_size - - def stop(self): - """ - Stop the background task that populates the queue. - """ - logger.info("Stopping RequestGenerator...") - self._stop_event.set() - if self._mode == "async": - self._thread.join() - logger.info("RequestGenerator stopped") - - def _populate_queue(self): - """ - Populate the request queue in the background. - """ - - while not self._stop_event.is_set(): - with contextlib.suppress(Full): - if self._queue.qsize() < self._async_queue_size: - item = self.create_item() - self._queue.put(item, timeout=0.1) - logger.debug( - "Item added to queue. Current queue size: {}", - self._queue.qsize(), - ) - else: - time.sleep(0.1) - - logger.info("RequestGenerator stopped populating queue") diff --git a/src/guidellm/request/emulated.py b/src/guidellm/request/emulated.py deleted file mode 100644 index 7d481cb7..00000000 --- a/src/guidellm/request/emulated.py +++ /dev/null @@ -1,397 +0,0 @@ -import json -import math -from dataclasses import dataclass -from pathlib import Path -from typing import Dict, List, Optional, Tuple, Union - -import numpy as np -from loguru import logger -from transformers import PreTrainedTokenizer # type: ignore # noqa: PGH003 - -from guidellm.config import settings -from guidellm.core.request import TextGenerationRequest -from guidellm.request.base import GenerationMode, RequestGenerator -from guidellm.utils import clean_text, filter_text, load_text, split_text - -__all__ = ["EmulatedConfig", "EmulatedRequestGenerator", "EndlessTokens"] - - -@dataclass -class EmulatedConfig: - """ - Configuration for emulated text generation requests. - - Args: - prompt_tokens (int): Number of prompt tokens. - prompt_tokens_variance (Optional[int]): Variance for prompt tokens. - prompt_tokens_min (Optional[int]): Minimum number of prompt tokens. - prompt_tokens_max (Optional[int]): Maximum number of prompt tokens. - generated_tokens (Optional[int]): Number of generated tokens. - generated_tokens_variance (Optional[int]): Variance for generated tokens. - generated_tokens_min (Optional[int]): Minimum number of generated tokens. - generated_tokens_max (Optional[int]): Maximum number of generated tokens. - """ - - @staticmethod - def create_config(config: Optional[Union[str, Path, Dict]]) -> "EmulatedConfig": - """ - Create an EmulatedConfig instance from a configuration source. - - :param config: Configuration source, can be a dictionary, JSON string, - key=value string, or file path. - :type config: Union[str, Path, Dict] - :return: An instance of EmulatedConfig. - :rtype: EmulatedConfig - :raises FileNotFoundError: If the configuration file is not found. - :raises ValueError: If the configuration format is invalid. - """ - if not config: - logger.debug("Creating default configuration") - return EmulatedConfig(prompt_tokens=1024, generated_tokens=256) - - if isinstance(config, dict): - logger.debug("Loading configuration from dict: {}", config) - return EmulatedConfig(**config) - - if isinstance(config, Path) or ( - isinstance(config, str) and (config.endswith(".json") or "{" in config) - ): - logger.debug("Loading configuration from json: {}", config) - - if isinstance(config, str) and "{" in config: - json_text = config.strip() - else: - if isinstance(config, str): - config = Path(config) - - if not config.exists(): - raise FileNotFoundError(f"Configuration file not found: {config}") - - json_text = config.read_text(encoding="utf-8") - - json_dict = json.loads(json_text) - - return EmulatedConfig(**json_dict) - - if isinstance(config, str) and "=" in config: - logger.debug("Loading configuration from csv string: {}", config) - items = config.split(",") - config_dict = {} - for item in items: - key_value = item.strip().split("=") - if len(key_value) != 2: # noqa: PLR2004 - raise ValueError(f"Unexpected format for item: {item}") - key = key_value[0].strip() - value = ( - int(key_value[1].strip()) - if key_value[1].isnumeric() - else key_value[1] - ) - config_dict[key] = value - - return EmulatedConfig(**config_dict) # type: ignore # noqa: PGH003 - - raise ValueError( - f"Invalid configuration given for creation of EmulatedConfig: {config}" - ) - - prompt_tokens: int - prompt_tokens_variance: Optional[int] = None - prompt_tokens_min: Optional[int] = None - prompt_tokens_max: Optional[int] = None - - generated_tokens: Optional[int] = None - generated_tokens_variance: Optional[int] = None - generated_tokens_min: Optional[int] = None - generated_tokens_max: Optional[int] = None - - @property - def prompt_tokens_range(self) -> Tuple[int, int]: - """ - Get the range (min, max) of prompt tokens to generate. - - :return: The range of prompt tokens. - :rtype: Tuple[int, int] - """ - return self._token_range( - self.prompt_tokens, - self.prompt_tokens_variance, - self.prompt_tokens_min, - self.prompt_tokens_max, - ) - - @property - def output_tokens_range(self) -> Tuple[int, int]: - """ - Get the range (min, max) of output tokens to generate. - - :return: The range of generated tokens. - :rtype: Tuple[int, int] - """ - if not self.generated_tokens: - return 0, 0 - - return self._token_range( - self.generated_tokens, - self.generated_tokens_variance, - self.generated_tokens_min, - self.generated_tokens_max, - ) - - def sample_prompt_tokens(self, rng: np.random.Generator) -> int: - """ - Sample the number of prompt tokens to generate. - - :param rng: The random number generator to use. - :type rng: np.random.Generator - :return: The number of prompt tokens to create. - :rtype: int - """ - return self._sample_tokens( - self.prompt_tokens, - self.prompt_tokens_variance, - self.prompt_tokens_min, - self.prompt_tokens_max, - rng, - ) - - def sample_output_tokens(self, rng: np.random.Generator) -> Optional[int]: - """ - Sample the number of output tokens to generate. - - :param rng: The random number generator to use. - :type rng: np.random.Generator - :return: The number of output tokens to generate. - :rtype: Optional[int] - """ - if not self.generated_tokens: - return None - - return self._sample_tokens( - self.generated_tokens, - self.generated_tokens_variance, - self.generated_tokens_min, - self.generated_tokens_max, - rng, - ) - - @staticmethod - def _sample_tokens( - base: int, - variance: Optional[int], - min_tokens: Optional[int], - max_tokens: Optional[int], - rng: np.random.Generator, - ) -> int: - min_tokens, max_tokens = EmulatedConfig._token_range( - base, variance, min_tokens, max_tokens - ) - - if min_tokens == max_tokens: - return min_tokens - - if not variance: - return rng.integers(min_tokens, max_tokens + 1) - - rand = rng.normal(base, math.sqrt(variance)) - - return int(min(max(rand, min_tokens), max_tokens)) - - @staticmethod - def _token_range( - base: int, - variance: Optional[int], - min_tokens: Optional[int], - max_tokens: Optional[int], - ) -> Tuple[int, int]: - if not variance: - return ( - min_tokens or base, - max_tokens or base, - ) - - min_tokens = min_tokens if min_tokens and min_tokens > 0 else 1 - max_tokens = ( - max_tokens if max_tokens and max_tokens > base else base + 5 * variance - ) - - return min_tokens, max_tokens - - -class EndlessTokens(List[str]): - """ - A list subclass that allows for endless data generation. - """ - - def __init__( - self, - data: Union[str, Path], - filter_start: Optional[Union[str, int]] = None, - filter_end: Optional[Union[str, int]] = None, - clean_text_args: Optional[Dict[str, bool]] = None, - ): - """ - Initialize EndlessDataWords with data. - - :param data: Source text data. - :type data: str - """ - logger.debug("Loading data from: {}", data) - data = load_text(data) - data = filter_text(data, filter_start, filter_end) - data = ( - clean_text(data) - if not clean_text_args - else clean_text(data, **clean_text_args) - ) - self._tokens, self._token_separators, self._line_indices = split_text(data) - - super().__init__(self._tokens) - - @property - def line_indices(self) -> List[int]: - """ - Get the list of start indices for lines. - - :return: List of start indices. - :rtype: List[int] - """ - return self._line_indices - - def create_text(self, start: int, length: int) -> str: - """ - Create a text snippet from the specified range. - - :param start: Start index. - :type start: int - :param length: Length of the snippet. - :type length: int - :return: Text snippet. - :rtype: str - """ - start = start % len(self) - text = "" - buff_token_sep = "" - - for counter in range(length): - index = (start + counter) % len(self) - text += buff_token_sep + self[index] - buff_token_sep = self._token_separators[index] - - return text - - -class EmulatedRequestGenerator(RequestGenerator): - """ - A request generator that generates emulated requests based on a configuration. - - :param config: The configuration string, file path, or dictionary. - :type config: Union[str, Dict, Path] - :param random_seed: The random seed to use for generating requests. - :type random_seed: Optional[int] - :param tokenizer: The tokenizer instance or the name/config to use - for tokenizing prompts. - :type tokenizer: Optional[Union[str, PreTrainedTokenizer]] - :param mode: The generation mode, either 'async' or 'sync'. - :type mode: GenerationMode - :param async_queue_size: The size of the request queue. - :type async_queue_size: int - """ - - def __init__( - self, - config: Optional[Union[str, Path, Dict]], - random_seed: Optional[int] = None, - tokenizer: Optional[Union[str, PreTrainedTokenizer]] = None, - mode: GenerationMode = "async", - async_queue_size: int = 50, - ): - """ - Initialize EmulatedRequestGenerator with configuration and tokenizer. - - :param config: Configuration source, can be a dictionary, - JSON string, or file path. - :type config: Optional[Union[str, Path, Dict]] - :param random_seed: Optional seed for random number generator. - :type random_seed: Optional[int] - :param tokenizer: Tokenizer instance or configuration for tokenizing prompts. - :type tokenizer: Optional[Union[str, PreTrainedTokenizer]] - :param mode: Mode of request generation, either 'async' or 'sync'. - :type mode: str - :param async_queue_size: Size of the asynchronous queue. - :type async_queue_size: int - """ - self._config = EmulatedConfig.create_config(config) - self._tokens = EndlessTokens( - settings.emulated_data.source, - settings.emulated_data.filter_start, - settings.emulated_data.filter_end, - ) - self._rng = np.random.default_rng(random_seed) - - # NOTE: Must be after all the parameters since the queue population - # function requires attributes above - super().__init__( - type_="emulated", - source=str(config), - tokenizer=tokenizer, - mode=mode, - async_queue_size=async_queue_size, - ) - - def __len__(self) -> int: - raise NotImplementedError( - "Can't get the length of the emulated dataset. " - "Check the `--data-type` CLI parameter." - ) - - def create_item(self) -> TextGenerationRequest: - """ - Create a new text generation request item from the data. - - :return: A new text generation request. - :rtype: TextGenerationRequest - """ - logger.debug("Creating new text generation request") - target_prompt_token_count = self._config.sample_prompt_tokens(self._rng) - prompt = self.sample_prompt(target_prompt_token_count) - prompt_token_count = len(self.tokenizer.tokenize(prompt)) - output_token_count = self._config.sample_output_tokens(self._rng) - logger.debug("Generated prompt: {}", prompt) - - return TextGenerationRequest( - prompt=prompt, - prompt_token_count=prompt_token_count, - output_token_count=output_token_count, - ) - - def sample_prompt(self, tokens: int) -> str: - """ - Sample a prompt with the specified number of tokens. - - :param tokens: Number of tokens for the prompt. - :type tokens: int - :return: Sampled prompt text. - :rtype: str - """ - start_line_index = self._rng.integers(0, len(self._tokens.line_indices)) - - # binary search to find the proper number of tokens for the prompt - # this is because tokenizers differ in tokenization behavior - left = 0 - right = left + 5 * tokens - - while left < right: - mid = (left + right) // 2 - prompt = self._tokens.create_text(start_line_index, mid) - token_count = len(self.tokenizer.tokenize(prompt)) - - if token_count == tokens: - return prompt - - if token_count < tokens: - left = mid + 1 - else: - right = mid - - return self._tokens.create_text(start_line_index, left) diff --git a/src/guidellm/request/file.py b/src/guidellm/request/file.py deleted file mode 100644 index b187f7b4..00000000 --- a/src/guidellm/request/file.py +++ /dev/null @@ -1,83 +0,0 @@ -from pathlib import Path -from typing import Optional, Union - -from loguru import logger -from transformers import PreTrainedTokenizer # type: ignore # noqa: PGH003 - -from guidellm.config import settings -from guidellm.core.request import TextGenerationRequest -from guidellm.request.base import GenerationMode, RequestGenerator -from guidellm.utils import load_text_lines - -__all__ = ["FileRequestGenerator"] - - -class FileRequestGenerator(RequestGenerator): - """ - A request generator implementation for files. - - :param path: The path to the file containing the data. - :type path: Optional[Union[str, Path]] - :param tokenizer: The tokenizer instance or the name/config to use - for tokenizing prompts. - :type tokenizer: Union[str, PreTrainedTokenizer] - :param mode: The generation mode, either 'async' or 'sync'. - :type mode: str - :param async_queue_size: The size of the request queue. - :type async_queue_size: int - """ - - def __init__( - self, - path: Optional[Union[str, Path]], - tokenizer: Optional[Union[str, PreTrainedTokenizer]] = None, - mode: GenerationMode = "async", - async_queue_size: int = 50, - ): - if not path: - raise ValueError("File path must be provided for FileRequestGenerator") - - self._path = path - self._data = load_text_lines( - path, - filters=settings.dataset.preferred_data_columns, - ) - self._iterator = iter(self._data) - - # NOTE: Must be after all the parameters since the queue population - # function requires attributes above - super().__init__( - type_="file", - source=str(path), - tokenizer=tokenizer, - mode=mode, - async_queue_size=async_queue_size, - ) - - def __len__(self) -> int: - """ - Return the number of text lines. - """ - - return len(self._data) - - def create_item(self) -> TextGenerationRequest: - """ - Create a new result request item from the data. - - :return: A new result request. - :rtype: TextGenerationRequest - """ - logger.debug("Creating new request item from file data") - - try: - data = next(self._iterator) - except StopIteration: - self._iterator = iter(self._data) - data = next(self._iterator) - - token_count = len(self.tokenizer.tokenize(data)) - request = TextGenerationRequest(prompt=data, prompt_token_count=token_count) - logger.debug("Created new TextGenerationRequest: {}", request) - - return request diff --git a/src/guidellm/request/loader.py b/src/guidellm/request/loader.py new file mode 100644 index 00000000..de11e9c3 --- /dev/null +++ b/src/guidellm/request/loader.py @@ -0,0 +1,284 @@ +from abc import abstractmethod +from pathlib import Path +from typing import ( + Any, + Dict, + Iterable, + Iterator, + List, + Literal, + Optional, + Union, +) + +from datasets import Dataset, DatasetDict, IterableDataset, IterableDatasetDict +from transformers import PreTrainedTokenizerBase # type: ignore[import] + +from guidellm.dataset import ColumnInputTypes, load_dataset +from guidellm.objects import StandardBaseModel +from guidellm.request.request import GenerationRequest + +__all__ = [ + "RequestLoaderDescription", + "RequestLoader", + "GenerativeRequestLoaderDescription", + "GenerativeRequestLoader", +] + + +class RequestLoaderDescription(StandardBaseModel): + type_: Literal["request_loader"] = "request_loader" + + +class RequestLoader(Iterable): + @abstractmethod + def __iter__(self): ... + + @abstractmethod + def __len__(self): ... + + @property + @abstractmethod + def description(self) -> RequestLoaderDescription: ... + + +class GenerativeRequestLoaderDescription(RequestLoaderDescription): + type_: Literal["generative_request_loader"] = "generative_request_loader" # type: ignore[assignment] + data: str + data_args: Optional[Dict[str, Any]] + processor: str + processor_args: Optional[Dict[str, Any]] + + +class GenerativeRequestLoader(RequestLoader): + DEFAULT_PROMPT_COLUMNS = [ + "prompt", + "prompts", + "instruction", + "instructions", + "question", + "questions", + "input", + "inputs", + "context", + "content", + "conversation", + "conversations", + "text", + ] + + def __init__( + self, + data: Union[ + str, + Path, + Iterable[Union[str, Dict[str, Any]]], + Dataset, + DatasetDict, + IterableDataset, + IterableDatasetDict, + ], + data_args: Optional[Dict[str, Any]], + processor: Optional[Union[str, Path, PreTrainedTokenizerBase]], + processor_args: Optional[Dict[str, Any]], + shuffle: bool = True, + iter_type: Literal["finite", "infinite"] = "finite", + random_seed: int = 42, + ): + self.data = data + self.data_args = data_args + dataset, args_column_mappings = load_dataset( + data, + data_args, + processor, + processor_args, + random_seed, + ) + self.dataset = dataset + self.processor = processor + self.processor_args = processor_args + self.shuffle = shuffle + self.iter_type = iter_type + self.random_seed = random_seed + + self.column_mappings = self._create_column_mappings(args_column_mappings) + self.preserve_iter_state = iter_type == "infinite" # ensure no caching requests + self._preserved_iter = None + + def __iter__(self) -> Iterator[GenerationRequest]: + scope_create_count = 0 + + while (dataset_iter := self._get_dataset_iter(scope_create_count)) is not None: + scope_create_count += 1 + + for item in dataset_iter: + yield self._create_request(item) + + self._preserved_iter = None + + def __len__(self) -> int: + if self.iter_type == "finite": + return self.num_unique_items() + + raise ValueError(f"Unable to determine length of dataset: {self.data}") + + @property + def description(self) -> GenerativeRequestLoaderDescription: + return GenerativeRequestLoaderDescription( + data=str(self.data), + data_args=self.data_args, + processor=str(self.processor), + processor_args=self.processor_args, + ) + + def num_unique_items(self, raise_err: bool = True) -> int: + try: + return len(self.dataset) + except Exception: # noqa: BLE001, S110 + pass + + dataset_size = self.dataset.info.dataset_size + if dataset_size is not None: + return dataset_size + + if raise_err: + raise ValueError("Unable to determine number of items in the dataset") + + return -1 + + def _create_column_mappings( + self, + args_column_mappings: Dict[ColumnInputTypes, str], + ) -> Dict[ColumnInputTypes, str]: + column_mappings: Dict[ColumnInputTypes, str] = {} + + if "text_column" in args_column_mappings: + column_mappings["prompt_column"] = args_column_mappings["text_column"] + else: + column_mappings["prompt_column"] = self._extract_text_column() + + if "prompt_tokens_count_column" in args_column_mappings: + column_mappings["prompt_tokens_count_column"] = args_column_mappings[ + "prompt_tokens_count_column" + ] + elif prompt_tokens_count_column := self._extract_prompt_tokens_count_column(): + column_mappings["prompt_tokens_count_column"] = prompt_tokens_count_column + + if "output_tokens_count_column" in args_column_mappings: + column_mappings["output_tokens_count_column"] = args_column_mappings[ + "output_tokens_count_column" + ] + elif output_tokens_count_column := self._extract_output_tokens_count_column(): + column_mappings["output_tokens_count_column"] = output_tokens_count_column + + return column_mappings + + def _extract_text_column(self) -> str: + column_names = self._dataset_columns( + err_msg=( + "Unable to determine text column from dataset and it is required. " + "To specify the text column, set the 'text_column' key in the " + "'data_args' dictionary." + ) + ) + + if not column_names: + raise ValueError( + "Unable to determine text column from dataset and it is required. " + "To specify the text column, set the 'text_column' key in the " + "'data_args' dictionary." + ) + + if len(column_names) == 1: + return column_names[0] + + for def_column in self.DEFAULT_PROMPT_COLUMNS: + if def_column in column_names: + return def_column + + raise ValueError( + f"Unable to determine text column from dataset columns: {column_names}. " + "To specify the text column, set the 'text_column' key in the " + "'data_args' dictionary." + ) + + def _extract_prompt_tokens_count_column(self) -> Optional[str]: + column_names = self._dataset_columns() + + if column_names and "prompt_tokens_count" in column_names: + return "prompt_tokens_count" + + if column_names and "prompt_tokens" in column_names: + return "prompt_tokens" + + return None + + def _extract_output_tokens_count_column(self) -> Optional[str]: + column_names = self._dataset_columns() + + if column_names and "output_tokens_count" in column_names: + return "output_tokens_count" + + if column_names and "output_tokens" in column_names: + return "output_tokens" + + return None + + def _dataset_columns(self, err_msg: Optional[str] = None) -> Optional[List[str]]: + try: + column_names = self.dataset.column_names + + if not column_names and err_msg: + raise ValueError(f"No column names found in dataset: {self.data}") + except Exception as err: + if err_msg: + raise ValueError(err_msg) from err + + column_names = None + + return column_names + + def _get_dataset_iter( + self, scope_create_count: int + ) -> Optional[Iterator[Dict[str, Any]]]: + if scope_create_count > 0 and self.iter_type != "infinite": + return None + + if self.preserve_iter_state and self._preserved_iter is not None: + return self._preserved_iter + + dataset = ( + self.dataset + if not self.shuffle + else self.dataset.shuffle(seed=self.random_seed) + ) + + dataset_iter = iter(dataset) + + if self.preserve_iter_state: + self._preserved_iter = dataset_iter + + return dataset_iter + + def _create_request(self, item: Dict[str, Any]) -> GenerationRequest: + prompt_tokens = ( + item[self.column_mappings["prompt_tokens_count_column"]] + if "prompt_tokens_count_column" in self.column_mappings + else None + ) + output_tokens = ( + item[self.column_mappings["output_tokens_count_column"]] + if "output_tokens_count_column" in self.column_mappings + else None + ) + + return GenerationRequest( + request_type="text_completions", + content=item[self.column_mappings["prompt_column"]], + stats=( + {"prompt_tokens": prompt_tokens} if prompt_tokens is not None else {} + ), + constraints=( + {"output_tokens": output_tokens} if output_tokens is not None else {} + ), + ) diff --git a/src/guidellm/request/request.py b/src/guidellm/request/request.py new file mode 100644 index 00000000..216ca0e9 --- /dev/null +++ b/src/guidellm/request/request.py @@ -0,0 +1,79 @@ +import uuid +from typing import Any, Dict, Literal, Optional + +from pydantic import Field + +from guidellm.objects.pydantic import StandardBaseModel + +__all__ = ["GenerationRequest"] + + +class GenerationRequest(StandardBaseModel): + """ + A class representing a request for generation. + This class is used to encapsulate the details of a generation request, + including the request ID, type, content, parameters, statistics, and constraints. + It is designed to be used with the BackendRequestsWorker class to handle + the generation process. + + :param request_id: The unique identifier for the request. + :param request_type: The type of request (e.g., text, chat). + :param content: The content for the request to send to the backend. + If request_type is 'text', this should be a string or list of strings + which will be resolved by backend.text_completions. + If request_type is 'chat', this should be a string, + a list of (str, Dict[str, Union[str, Dict[str, str]], Path, Image]), + or Any raw content which will be resolved by backend.chat_completions. + If raw content, raw_content=True must be passed in the params. + :param params: Additional parameters for the request passed in as kwargs. + For an http backend, these are passed into the body of the request. + :param stats: Statistics for the request, such as the number of prompt tokens. + Used for tracking and reporting purposes. + :param constraints: Constraints for the request, such as the maximum number + of output tokens. Used for controlling the behavior of the backend. + """ + + request_id: Optional[str] = Field( + default_factory=lambda: str(uuid.uuid4()), + description="The unique identifier for the request.", + ) + request_type: Literal["text_completions", "chat_completions"] = Field( + default="text_completions", + description=( + "The type of request (e.g., text, chat). " + "If request_type='text_completions', resolved by backend.text_completions. " + "If request_typ='chat_completions', resolved by backend.chat_completions." + ), + ) + content: Any = Field( + description=( + "The content for the request to send to the backend. " + "If request_type is 'text', this should be a string or list of strings " + "which will be resolved by backend.text_completions. " + "If request_type is 'chat', this should be a string, " + "a list of (str, Dict[str, Union[str, Dict[str, str]], Path, Image]), " + "or Any raw content which will be resolved by backend.chat_completions. " + "If raw content, raw_content=True must be passed in the params." + ) + ) + params: Dict[str, Any] = Field( + default_factory=dict, + description=( + "Additional parameters for the request that will be passed in as kwargs. " + "For an http backend, these are passed into the body of the request. " + ), + ) + stats: Dict[Literal["prompt_tokens"], int] = Field( + default_factory=dict, + description=( + "Statistics for the request, such as the number of prompt tokens. " + "Used for tracking and reporting purposes." + ), + ) + constraints: Dict[Literal["output_tokens"], int] = Field( + default_factory=dict, + description=( + "Constraints for the request, such as the maximum number of output tokens. " + "Used for controlling the behavior of the backend." + ), + ) diff --git a/src/guidellm/request/transformers.py b/src/guidellm/request/transformers.py deleted file mode 100644 index 3fd24040..00000000 --- a/src/guidellm/request/transformers.py +++ /dev/null @@ -1,103 +0,0 @@ -from pathlib import Path -from typing import Optional, Union - -from datasets import Dataset, DatasetDict, IterableDataset, IterableDatasetDict -from loguru import logger -from transformers import PreTrainedTokenizer # type: ignore # noqa: PGH003 - -from guidellm.core.request import TextGenerationRequest -from guidellm.request.base import GenerationMode, RequestGenerator -from guidellm.utils import ( - load_transformers_dataset, - resolve_transformers_dataset_column, -) - -__all__ = ["TransformersDatasetRequestGenerator"] - - -class TransformersDatasetRequestGenerator(RequestGenerator): - """ - A request generator implementation for Hugging Face datasets. - - :param dataset: The name of the Hugging Face dataset to use or the path - to a local dataset. - :type dataset_name: str - :param split: The split of the dataset to use (e.g., 'train', 'test'). - :type split: str - :param column: The column/field to use for generating requests. - :type column: str - :param tokenizer: The tokenizer instance or the name/config to use - for tokenizing prompts. - :type tokenizer: Union[str, PreTrainedTokenizer] - :param mode: The generation mode, either 'async' or 'sync'. - :type mode: str - :param async_queue_size: The size of the request queue. - :type async_queue_size: int - """ - - def __init__( - self, - dataset: Union[ - str, Path, DatasetDict, Dataset, IterableDatasetDict, IterableDataset - ], - split: Optional[str] = None, - column: Optional[str] = None, - tokenizer: Optional[Union[str, PreTrainedTokenizer]] = None, - mode: GenerationMode = "async", - async_queue_size: int = 50, - **kwargs, - ): - self._dataset = dataset - self._split = split - self._column = column - self._kwargs = kwargs - - self._hf_dataset: Union[Dataset, IterableDataset] = load_transformers_dataset( - dataset, split=split, **kwargs - ) - self._hf_column = resolve_transformers_dataset_column( - self._hf_dataset, column=column - ) - self._hf_dataset_iterator = iter(self._hf_dataset) - - # NOTE: Must be after all the parameters since the queue population - # function requires attributes above - super().__init__( - type_="transformers_dataset", - source=str(dataset), - tokenizer=tokenizer, - mode=mode, - async_queue_size=async_queue_size, - ) - - def __len__(self) -> int: - if not isinstance(self._hf_dataset, Dataset): - raise ValueError("Can't get dataset size for IterableDataset object") - else: - return len(self._hf_dataset) - - def create_item(self) -> TextGenerationRequest: - """ - Create a new result request item from the dataset. - - :return: A new result request. - :rtype: TextGenerationRequest - """ - - logger.debug("Creating new request item from dataset") - - try: - data = next(self._hf_dataset_iterator) - except StopIteration: - self._hf_dataset_iterator = iter(self._hf_dataset) - data = next(self._hf_dataset_iterator) - - prompt = data[self._hf_column] - token_count = len(self.tokenizer.tokenize(prompt)) - request = TextGenerationRequest( - prompt=prompt, - prompt_token_count=token_count, - ) - logger.debug(f"Created new TextGenerationRequest: {request}") - - return request diff --git a/src/guidellm/scheduler/__init__.py b/src/guidellm/scheduler/__init__.py index 39485648..e26f3bb3 100644 --- a/src/guidellm/scheduler/__init__.py +++ b/src/guidellm/scheduler/__init__.py @@ -1,4 +1,52 @@ -from .load_generator import LoadGenerationMode, LoadGenerator -from .scheduler import Scheduler, SchedulerResult +from .result import ( + SchedulerRequestInfo, + SchedulerRequestResult, + SchedulerResult, + SchedulerRunInfo, +) +from .scheduler import Scheduler +from .strategy import ( + AsyncConstantStrategy, + AsyncPoissonStrategy, + ConcurrentStrategy, + SchedulingStrategy, + StrategyType, + SynchronousStrategy, + ThroughputStrategy, + strategy_display_str, +) +from .types import RequestT, ResponseT +from .worker import ( + GenerativeRequestsWorker, + GenerativeRequestsWorkerDescription, + RequestsWorker, + ResolveStatus, + WorkerDescription, + WorkerProcessRequest, + WorkerProcessResult, +) -__all__ = ["LoadGenerationMode", "LoadGenerator", "Scheduler", "SchedulerResult"] +__all__ = [ + "SchedulerRequestInfo", + "SchedulerRequestResult", + "SchedulerResult", + "SchedulerRunInfo", + "Scheduler", + "AsyncConstantStrategy", + "AsyncPoissonStrategy", + "ConcurrentStrategy", + "SchedulingStrategy", + "StrategyType", + "SynchronousStrategy", + "ThroughputStrategy", + "strategy_display_str", + "RequestT", + "ResponseT", + "WorkerProcessRequest", + "WorkerProcessResult", + "ResolveStatus", + "WorkerDescription", + "RequestsWorker", + "GenerativeRequestsWorkerDescription", + "GenerativeRequestsWorker", +] diff --git a/src/guidellm/scheduler/load_generator.py b/src/guidellm/scheduler/load_generator.py deleted file mode 100644 index f629752a..00000000 --- a/src/guidellm/scheduler/load_generator.py +++ /dev/null @@ -1,196 +0,0 @@ -import time -from typing import Generator, Literal, Optional, get_args - -import numpy as np -from loguru import logger - -__all__ = ["LoadGenerationMode", "LoadGenerator"] - -LoadGenerationMode = Literal["synchronous", "constant", "poisson", "throughput"] - - -class LoadGenerator: - """ - Load Generator class that generates timestamps for load generation. - - This class supports multiple load generation modes: "constant", "poisson", - "throughput", and "synchronous". Each mode has its own method for generating - timestamps based on the rate provided during initialization. - - :param mode: The mode of load generation. Valid options are "constant", - "poisson", "throughput", and "synchronous". - :type mode: LoadGenerationMode - :param rate: The rate at which to generate timestamps. This value is - interpreted differently depending on the mode. - :type rate: float - - :raises ValueError: If an invalid mode is provided. - """ - - def __init__(self, mode: LoadGenerationMode, rate: Optional[float] = None): - """ - Initialize the Load Generator with the mode and rate. - - :param mode: The mode of load generation ("constant", "poisson", "throughput", - or "synchronous"). - :type mode: LoadGenerationMode - :param rate: The rate at which to generate timestamps. In the "constant" - mode, this represents the frequency of events. In the "poisson" mode, - it represents the average frequency. - :type rate: Optional[float] - """ - if mode not in get_args(LoadGenerationMode): - error = ValueError( - f"{mode} is not a valid Load Generation Mode. " - f"Valid options are {get_args(LoadGenerationMode)}" - ) - logger.error(error) - raise error - - if mode not in ["synchronous", "throughput"] and (rate is None or rate <= 0): - error = ValueError(f"Rate must be > 0 for mode: {mode}. Given: {rate}") - logger.error(error) - raise error - - self._mode = mode - self._rate = rate - logger.debug( - "Initialized LoadGenerator with mode: {mode}, rate: {rate}", - mode=mode, - rate=rate, - ) - - @property - def mode(self) -> LoadGenerationMode: - """ - Get the mode of load generation. - - :return: The mode of load generation. - :rtype: LoadGenerationMode - """ - return self._mode - - @property - def rate(self) -> Optional[float]: - """ - Get the rate of load generation. - - :return: The rate of load generation. - :rtype: Optional[float] - """ - return self._rate - - def times(self) -> Generator[float, None, None]: - """ - Generate timestamps for load generation based on the selected mode. - - :return: A generator that yields timestamps at which each load - should be initiated. - :rtype: Generator[float, None, None] - - :raises ValueError: If the mode is invalid. - """ - logger.debug(f"Generating timestamps using mode: {self._mode}") - - if self._mode == "throughput": - yield from self.throughput_times() - elif self._mode == "constant": - yield from self.constant_times() - elif self._mode == "poisson": - yield from self.poisson_times() - elif self._mode == "synchronous": - yield from self.synchronous_times() - else: - logger.error(f"Invalid mode encountered: {self._mode}") - raise ValueError(f"Invalid mode: {self._mode}") - - def synchronous_times(self) -> Generator[float, None, None]: - """ - Generate invalid timestamps for the "synchronous" mode. - - :return: A generator that yields a constant invalid timestamp (-1.0). - :rtype: Generator[float, None, None] - """ - logger.debug("Generating invalid timestamps for synchronous mode") - while True: - yield -1.0 - - def throughput_times(self) -> Generator[float, None, None]: - """ - Generate timestamps at the maximum rate possible, returning the current time. - - :return: A generator that yields the current time in seconds. - :rtype: Generator[float, None, None] - """ - logger.debug("Generating timestamps at throughput rate") - while True: - yield time.time() - - def constant_times(self) -> Generator[float, None, None]: - """ - Generate timestamps at a constant rate based on the specified rate. - - :return: A generator that yields timestamps incremented by 1/rate seconds. - :rtype: Generator[float, None, None] - """ - logger.debug("Generating constant rate timestamps with rate: {}", self._rate) - - if self._rate is None or self._rate == 0: - raise ValueError( - "Rate must be > 0 for constant mode, given: {}", self._rate - ) - - start_time = time.time() - time_increment = 1.0 / self._rate - counter = 0 - - while True: - yield_time = start_time + time_increment * counter - logger.debug(f"Yielding timestamp: {yield_time}") - yield yield_time - counter += 1 - - def poisson_times(self) -> Generator[float, None, None]: - """ - Generate timestamps based on a Poisson process, where the number - of requests to be sent per second is drawn from a Poisson distribution. - The inter arrival time between requests is exponentially distributed. - - :return: A generator that yields timestamps based on a Poisson distribution. - :rtype: Generator[float, None, None] - """ - logger.debug("Generating Poisson rate timestamps with rate: {}", self._rate) - - if self._rate is None or self._rate == 0: - raise ValueError("Rate must be > 0 for poisson mode, given: {}", self._rate) - - time_tracker = time.time() - rng = np.random.default_rng() - time_increment = 1.0 - - while True: - num_requests = rng.poisson(self._rate) - - if num_requests == 0: - yield time_tracker + time_increment - else: - inter_arrival_times = rng.exponential(1.0 / self._rate, num_requests) - logger.debug( - "Calculated new inter-arrival times for poisson process: {}", - inter_arrival_times, - ) - arrival_time_tracker = time_tracker - - for arrival_time in inter_arrival_times: - arrival_time_tracker += arrival_time - - if arrival_time_tracker > time_tracker + time_increment: - logger.debug( - "Arrival time tracker: {} is greater than current time", - arrival_time_tracker, - ) - break - - yield arrival_time_tracker - - time_tracker += time_increment # Move on to the next time period diff --git a/src/guidellm/scheduler/result.py b/src/guidellm/scheduler/result.py new file mode 100644 index 00000000..ab1094ad --- /dev/null +++ b/src/guidellm/scheduler/result.py @@ -0,0 +1,137 @@ +from typing import ( + Generic, + Literal, + Optional, +) + +from guidellm.objects import StandardBaseModel +from guidellm.scheduler.strategy import SchedulingStrategy +from guidellm.scheduler.types import RequestT, ResponseT + +__all__ = [ + "SchedulerResult", + "SchedulerRequestResult", + "SchedulerRunInfo", + "SchedulerRequestInfo", +] + + +class SchedulerRunInfo(StandardBaseModel): + """ + Information about the current run of the scheduler. + This class holds metadata about the scheduling run, + including the start and end times, the number of processes, + and the scheduling strategy used. + It also tracks the number of requests created, queued, pending, + and completed during the run. + + :param start_time: The start time of the scheduling run. + :param end_time: The end time of the scheduling run; + if None, then this will be math.inf. + :param end_number: The maximum number of requests to be processed; + if None, then this will be math.inf. + :param processes: The number of processes used in the scheduling run. + :param strategy: The scheduling strategy used in the run. + This should be an instance of SchedulingStrategy. + :param created_requests: The number of requests created during the run. + :param queued_requests: The number of requests queued during the run. + :param scheduled_requests: The number of requests scheduled during the run. + (requests pending being sent to the worker but recieved by a process) + :param processing_requests: The number of requests actively being run. + :param completed_requests: The number of requests completed during the run. + """ + + start_time: float + end_time: float + end_number: float + processes: int + strategy: SchedulingStrategy + + created_requests: int = 0 + queued_requests: int = 0 + scheduled_requests: int = 0 + processing_requests: int = 0 + completed_requests: int = 0 + + +class SchedulerRequestInfo(StandardBaseModel): + """ + Information about a specific request run through the scheduler. + This class holds metadata about the request, including + the targeted start time, queued time, start time, end time, + and the process ID that handled the request. + + :param targeted_start_time: The targeted start time for the request (time.time()). + :param queued_time: The time the request was queued (time.time()). + :param scheduled_time: The time the request was scheduled (time.time()) + (any sleep time before the request was sent to the worker). + :param worker_start: The time the worker started processing request (time.time()). + :param worker_end: The time the worker finished processing request. (time.time()). + :param process_id: The ID of the underlying process that handled the request. + """ + + requested: bool = False + completed: bool = False + errored: bool = False + canceled: bool = False + + targeted_start_time: float = -1 + queued_time: float = -1 + dequeued_time: float = -1 + scheduled_time: float = -1 + worker_start: float = -1 + request_start: float = -1 + request_end: float = -1 + worker_end: float = -1 + process_id: int = -1 + + +class SchedulerResult(StandardBaseModel): + """ + The yielded, iterative result for a scheduler run. + These are triggered on the start and end of the run, + as well as on the start and end of each request. + Depending on the type, it will hold the request and response + along with information and statistics about the request and general run. + + :param type_: The type of the result, which can be one of: + - "run_start": Indicates the start of the run. + - "run_complete": Indicates the completion of the run (teardown happens after). + - "request_start": Indicates the start of a request. + - "request_complete": Indicates the completion of a request. + :param request: The request that was processed. + :param response: The response from the worker for the request. + :param request_info: Information about the request, including + the targeted start time, queued time, start time, end time, + and the process ID that handled the request. + :param run_info: Information about the current run of the scheduler, + including the start and end times, the number of processes, + and the scheduling strategy used. + It also tracks the number of requests created, queued, pending, + and completed during the run. + """ + + pydantic_type: Literal["scheduler_result"] = "scheduler_result" + type_: Literal[ + "run_start", + "run_complete", + "request_scheduled", + "request_start", + "request_complete", + ] + run_info: SchedulerRunInfo + + +class SchedulerRequestResult( + SchedulerResult, + Generic[RequestT, ResponseT], +): + pydantic_type: Literal["scheduler_request_result"] = "scheduler_request_result" # type: ignore[assignment] + type_: Literal[ + "request_scheduled", + "request_start", + "request_complete", + ] + request: RequestT + request_info: SchedulerRequestInfo + response: Optional[ResponseT] = None diff --git a/src/guidellm/scheduler/scheduler.py b/src/guidellm/scheduler/scheduler.py index 2f8c44fe..0be0ebb7 100644 --- a/src/guidellm/scheduler/scheduler.py +++ b/src/guidellm/scheduler/scheduler.py @@ -1,417 +1,366 @@ import asyncio import math +import multiprocessing +import multiprocessing.queues import time -from dataclasses import dataclass -from typing import AsyncGenerator, Literal, Optional, Union, get_args +from concurrent.futures import ProcessPoolExecutor +from typing import ( + Any, + AsyncGenerator, + Generic, + Iterable, + Iterator, + List, + Optional, + Tuple, + Union, +) from loguru import logger -from guidellm.backend import Backend, ResponseSummary, StreamingTextResponse from guidellm.config import settings -from guidellm.core import ( - TextGenerationBenchmark, - TextGenerationError, - TextGenerationRequest, - TextGenerationResult, +from guidellm.scheduler.result import ( + SchedulerRequestResult, + SchedulerResult, + SchedulerRunInfo, +) +from guidellm.scheduler.strategy import SchedulingStrategy +from guidellm.scheduler.types import RequestT, ResponseT +from guidellm.scheduler.worker import ( + RequestsWorker, + WorkerProcessRequest, + WorkerProcessResult, ) -from guidellm.request import RequestGenerator -from guidellm.scheduler.load_generator import LoadGenerationMode, LoadGenerator - -__all__ = ["Scheduler", "SchedulerResult"] - - -@dataclass -class SchedulerResult: - """ - Represents the result of a single task execution within the Scheduler. - - :param completed: Indicates if the task is completed. - :type completed: bool - :param count_total: Total number of tasks to be executed. - :type count_total: int - :param count_completed: Number of tasks that have been completed so far. - :type count_completed: int - :param report: Benchmark data for the task execution. - :type benchmark: TextGenerationBenchmark - :param current_result: The result of the current request, if any. - :type current_result: Optional[Union[TextGenerationResult, Exception]] - """ - completed: bool - count_total: int - count_completed: int - benchmark: TextGenerationBenchmark - current_result: Optional[Union[TextGenerationResult, TextGenerationError]] = None +__all__ = ["Scheduler"] -class Scheduler: +class Scheduler(Generic[RequestT, ResponseT]): """ - Schedules and manages the execution of tasks for text generation requests. - - :param generator: The request generator that produces text generation requests. - :type generator: RequestGenerator - :param backend: The backend that processes the requests. - :type backend: Backend - :param mode: The mode of load generation (e.g., synchronous, asynchronous). - :type mode: LoadGenerationMode - :param rate: The rate at which requests are generated, if applicable. - :type rate: Optional[float] - :param max_number: Maximum number of requests to be processed. - :type max_number: Optional[int] - :param max_duration: Maximum duration in seconds for which requests - should be processed. - :type max_duration: Optional[float] - - :raises ValueError: If neither max_number nor max_duration is specified or - if they are not positive. + A class that handles the scheduling of requests to a worker. + This class is responsible for managing the lifecycle of the requests, + including their creation, queuing, and processing. + It uses a multiprocessing approach to handle requests concurrently + and efficiently, based on the specified scheduling strategy. + The Scheduler class is designed to work with a RequestsWorker, + which is an abstract base class that defines the interface for a worker + that can resolve requests asynchronously or synchronously. + The Scheduler class also supports different scheduling strategies, + including synchronous, throughput, and concurrent strategies. + + :param worker: The worker that will process the requests. + This should be an instance of RequestsWorker. + :param request_loader: An iterable that generates requests. + This can be a list, generator, or any other iterable. + The requests will be processed by the worker. """ def __init__( self, - generator: RequestGenerator, - backend: Backend, - mode: LoadGenerationMode = "synchronous", - rate: Optional[float] = None, - max_number: Optional[int] = None, - max_duration: Optional[float] = None, + worker: RequestsWorker[RequestT, ResponseT], + request_loader: Iterable[RequestT], ): - logger.info( - "Scheduler initialized with params: generator={}, backend={}, mode={}, " - "rate={}, max_number={}, max_duration={}", - generator, - backend, - mode, - rate, - max_number, - max_duration, - ) - - if mode not in get_args(LoadGenerationMode): - err = ValueError( - f"{mode} is not a valid Load Generation Mode. " - f"Valid options are {get_args(LoadGenerationMode)}" - ) - logger.error(err) - raise err - - if not max_number and not max_duration: - err = ValueError("Either max_number or max_duration must be specified") - logger.error(err) - raise err - - if max_number and max_number <= 0: - err = ValueError(f"max_number must be > 0, given: {max_number}") - logger.error(err) - raise err - - if max_duration and max_duration <= 0: - err = ValueError(f"max_duration must be > 0, given: {max_duration}") - logger.error(err) - raise err - - if mode in ["constant", "poisson"] and not rate: - err = ValueError(f"Rate must be > 0 for mode: {mode}. Given: {rate}") - logger.error(err) - raise err - - self._generator = generator - self._backend = backend - self._mode = mode - self._rate = rate - self._max_number = max_number - self._max_duration = max_duration - - self._load_generator = LoadGenerator(mode, rate) - - @property - def generator(self) -> RequestGenerator: - """ - The request generator that produces text generation requests. - - :return: The request generator instance. - :rtype: RequestGenerator - """ - return self._generator - - @property - def backend(self) -> Backend: - """ - The backend that processes the requests. - - :return: The backend instance. - :rtype: Backend - """ - return self._backend + if not isinstance(worker, RequestsWorker): + raise ValueError(f"Invalid worker: {worker}") - @property - def mode(self) -> LoadGenerationMode: - """ - The mode of load generation (e.g., synchronous, asynchronous). - - :return: The load generation mode. - :rtype: LoadGenerationMode - """ - return self._mode - - @property - def rate(self) -> Optional[float]: - """ - The rate at which requests are generated, if applicable. + if not isinstance(request_loader, Iterable): + raise ValueError(f"Invalid request_loader: {request_loader}") - :return: The rate of request generation. - :rtype: Optional[float] - """ - return self._rate + self.worker = worker + self.request_loader = request_loader - @property - def max_number(self) -> Optional[int]: + async def run( + self, + scheduling_strategy: SchedulingStrategy, + max_number: Optional[int] = None, + max_duration: Optional[float] = None, + ) -> AsyncGenerator[ + Union[SchedulerResult, SchedulerRequestResult[RequestT, ResponseT]], None + ]: """ - Maximum number of requests to be processed. - - :return: The maximum number of requests. - :rtype: Optional[int] + The main method that runs the scheduler. + This method is a generator that yields SchedulerResult objects + at the start and end of the run, as well as at the start and end + of each request. + It uses multiprocessing to handle requests concurrently + and efficiently, based on the specified scheduling strategy. + The method also handles the lifecycle of the requests, + including their creation, queuing, and processing. + The method is designed to be used as an asynchronous generator, + allowing it to be used with asyncio and other asynchronous frameworks. + + :param scheduling_strategy: The scheduling strategy to use. + Specifies the times at which requests will be sent as well how many + worker processes are used and if requests are scheduled sync or async. + This can be one of the following: + - "synchronous": Requests are sent synchronously. + - "throughput": Requests are sent at the maximum rate possible. + - An instance of SchedulingStrategy. + :param max_number: The maximum number of requests to process. + If None, then no limit is set and either the iterator must be exhaustible + or the max_duration must be set. + :param max_duration: The maximum duration for the scheduling run. + If None, then no limit is set and either the iterator must be exhaustible + or the max_number must be set. + :return: An asynchronous generator that yields SchedulerResult objects. + Each SchedulerResult object contains information about the request, + the response, and the run information. """ - return self._max_number + if scheduling_strategy is None or not isinstance( + scheduling_strategy, SchedulingStrategy + ): + raise ValueError(f"Invalid scheduling strategy: {scheduling_strategy}") - @property - def max_duration(self) -> Optional[float]: - """ - Maximum duration in seconds for which requests should be processed. + if max_number is not None and max_number < 1: + raise ValueError(f"Invalid max_number: {max_number}") - :return: The maximum duration in seconds. - :rtype: Optional[float] - """ - return self._max_duration + if max_duration is not None and max_duration < 0: + raise ValueError(f"Invalid max_duration: {max_duration}") - @property - def load_generator(self) -> LoadGenerator: - """ - The load generator responsible for generating load based on mode and rate. + with multiprocessing.Manager() as manager, ProcessPoolExecutor( + max_workers=scheduling_strategy.processes_limit + ) as executor: + requests_iter: Optional[Iterator[Any]] = None + futures, requests_queue, responses_queue = await self._start_processes( + manager, executor, scheduling_strategy + ) + run_info, requests_iter, times_iter = self._run_setup( + futures, scheduling_strategy, max_number, max_duration + ) + yield SchedulerResult( + type_="run_start", + run_info=run_info, + ) - :return: The load generator instance. - :rtype: LoadGenerator - """ - return self._load_generator + try: + while True: + # check errors and raise them + for future in futures: + if future.done() and (err := future.exception()) is not None: + raise err + + if ( + requests_iter is None + and run_info.completed_requests >= run_info.created_requests + ): + # we've exhausted all requests we've wanted to run + # and yielded all responses + break + + requests_iter = self._add_requests( + requests_iter, + times_iter, + requests_queue, + run_info, + ) + await asyncio.sleep(0) # enable requests to start + + iter_result = self._check_result_ready( + responses_queue, + run_info, + ) + if iter_result is not None: + yield iter_result + + # yield control to the event loop + await asyncio.sleep(settings.default_async_loop_sleep) + except Exception as err: + raise RuntimeError(f"Scheduler run failed: {err}") from err - @property - def benchmark_mode(self) -> Literal["asynchronous", "synchronous", "throughput"]: - """ - The report mode for the scheduler. + yield SchedulerResult( + type_="run_complete", + run_info=run_info, + ) - :return: The report mode. - :rtype: Literal["asynchronous", "synchronous", "throughput"] - """ - if self._mode == "synchronous": - return "synchronous" + await self._stop_processes(futures, requests_queue) - if self._mode == "throughput": - return "throughput" + async def _start_processes( + self, + manager, + executor: ProcessPoolExecutor, + scheduling_strategy: SchedulingStrategy, + ) -> Tuple[ + List[asyncio.Future], + multiprocessing.Queue, + multiprocessing.Queue, + ]: + await self.worker.prepare_multiprocessing() + requests_queue = manager.Queue( + maxsize=scheduling_strategy.queued_requests_limit + ) + responses_queue = manager.Queue() + per_process_requests_limit = scheduling_strategy.processing_requests_limit // ( + scheduling_strategy.processes_limit + ) - return "asynchronous" + futures = [] + loop = asyncio.get_event_loop() + for process_id in range(scheduling_strategy.processes_limit): + if scheduling_strategy.processing_mode == "sync": + futures.append( + loop.run_in_executor( + executor, + self.worker.process_loop_synchronous, + requests_queue, + responses_queue, + process_id, + ) + ) + elif scheduling_strategy.processing_mode == "async": + futures.append( + loop.run_in_executor( + executor, + self.worker.process_loop_asynchronous, + requests_queue, + responses_queue, + per_process_requests_limit, + process_id, + ) + ) + else: + raise ValueError( + f"Invalid processing mode: {scheduling_strategy.processing_mode} " + f"for strategy: {scheduling_strategy}" + ) - async def run(self) -> AsyncGenerator[SchedulerResult, None]: - """ - Run the scheduler to process requests based on the configured mode, rate, - maximum number, and maximum duration. + await asyncio.sleep(0.1) # give time for processes to start - :yield: The result of each task executed by the scheduler. - :rtype: Generator[SchedulerResult, None, None] - """ - logger.info("Starting Scheduler run") + return futures, requests_queue, responses_queue - benchmark = TextGenerationBenchmark(mode=self.benchmark_mode, rate=self.rate) + def _run_setup( + self, + processes: List[asyncio.Future], + scheduling_strategy: SchedulingStrategy, + max_number: Optional[int], + max_duration: Optional[float], + ) -> Tuple[SchedulerRunInfo, Iterator[Any], Iterator[float]]: + requests_iter = iter(self.request_loader) start_time = time.time() - end_time = start_time + self.max_duration if self.max_duration else math.inf - max_number = float(self.max_number) if self.max_number else math.inf - runner = self._run_sync if self._mode == "synchronous" else self._run_async - count_total = ( - self.max_number - if self.max_number - else round(self.max_duration) - if self.max_duration - else 0 - ) + times_iter = iter(scheduling_strategy.request_times()) + end_time = time.time() + (max_duration or math.inf) + end_number = max_number or math.inf - # yield initial result for progress tracking - yield SchedulerResult( - completed=False, - count_total=count_total, - count_completed=0, - benchmark=benchmark, - ) - - run_count = 0 - async for res in runner(benchmark, end_time, max_number): - run_count += 1 - count_completed = ( - min(run_count, self.max_number) - if self.max_number - else round(time.time() - start_time) - if self.max_duration - else 0 - ) - - yield SchedulerResult( - completed=False, - count_total=count_total, - count_completed=count_completed, - benchmark=benchmark, - current_result=res, + try: + # update end number if the request loader is finite and less than max + iter_length = len(self.request_loader) # type: ignore[arg-type] + if 0 < iter_length < end_number: + end_number = iter_length + except Exception: # noqa: BLE001, S110 + pass + + if end_number == math.inf and end_time is None: + logger.warning( + "No end number or end time set, " + "scheduler will run indefinitely until the request loader is exhausted." ) - logger.info("Scheduler run completed") - - yield SchedulerResult( - completed=True, - count_total=count_total, - count_completed=( - benchmark.request_count + benchmark.error_count - if self.max_number - else round(time.time() - start_time) - if self.max_duration - else 0 - ), - benchmark=benchmark, + info = SchedulerRunInfo( + start_time=start_time, + end_time=end_time, + end_number=end_number, + processes=len(processes), + strategy=scheduling_strategy, ) - async def _run_sync( - self, benchmark: TextGenerationBenchmark, end_time: float, max_number: float - ) -> AsyncGenerator[Union[TextGenerationResult, TextGenerationError], None]: - for index, (request, submit_at) in enumerate( - zip(self.generator, self.load_generator.times()) - ): - if index >= max_number or time.time() >= end_time: - break + return info, requests_iter, times_iter - logger.debug( - "Running synchronous request={} at submit_at={}", - request, - submit_at, + def _add_requests( + self, + requests_iter: Optional[Iterator[Any]], + times_iter: Iterator[float], + requests_queue: multiprocessing.Queue, + run_info: SchedulerRunInfo, + ) -> Optional[Iterator[Any]]: + if requests_iter is not None: + try: + added_count = 0 + + while ( + not requests_queue.full() + and added_count < settings.max_add_requests_per_loop + ): + if run_info.created_requests >= run_info.end_number: + raise StopIteration + + if ( + request_time := next(times_iter) + ) >= run_info.end_time or time.time() >= run_info.end_time: + raise StopIteration + + request = next(requests_iter) + work_req: WorkerProcessRequest[RequestT] = WorkerProcessRequest( + request=request, + start_time=request_time, + timeout_time=run_info.end_time, + queued_time=time.time(), + ) + requests_queue.put(work_req) + + run_info.created_requests += 1 + run_info.queued_requests += 1 + added_count += 1 + except StopIteration: + # we've reached the limit number, limit time, or exhausted the requests + # set to None to stop adding more and tell the loop no more requests + requests_iter = None + + return requests_iter + + def _check_result_ready( + self, + responses_queue: multiprocessing.Queue, + run_info: SchedulerRunInfo, + ) -> Optional[SchedulerRequestResult[RequestT, ResponseT]]: + try: + process_response: WorkerProcessResult[RequestT, ResponseT] = ( + responses_queue.get_nowait() ) - benchmark.request_started() - result = await self._scheduled_request(request, submit_at, end_time) - if result is not None: - benchmark.request_completed(result) - logger.debug("Request completed with output: {}", result) - yield result - - async def _run_async( - self, benchmark: TextGenerationBenchmark, end_time: float, max_number: float - ) -> AsyncGenerator[Union[TextGenerationResult, TextGenerationError], None]: - tasks = [] - pending = asyncio.Semaphore(settings.max_concurrency) - - for index, (request, submit_at) in enumerate( - zip(self.generator, self.load_generator.times()) - ): - # wait for number of pending tasks to be >= max_concurrency - await pending.acquire() - - if index >= max_number or time.time() >= end_time or submit_at >= end_time: - break - - logger.debug( - "Running asynchronous request={} at submit_at={}", - request, - submit_at, + except multiprocessing.queues.Empty: # type: ignore[attr-defined] + return None + + if process_response.type_ == "request_scheduled": + run_info.queued_requests -= 1 + run_info.scheduled_requests += 1 + + return SchedulerRequestResult( + type_="request_scheduled", + run_info=run_info, + request=process_response.request, + request_info=process_response.info, + response=None, ) - def _completed(_task: asyncio.Task) -> None: - # NOTE: this is only ok because we don't use threads/processes - nonlocal pending - pending.release() - _res = _task.result() + if process_response.type_ == "request_start": + run_info.scheduled_requests -= 1 + run_info.processing_requests += 1 - if _res: - benchmark.request_completed(_res) - logger.debug("Request completed: {}", _res) - - benchmark.request_started() - task = asyncio.create_task( - self._scheduled_request(request, submit_at, end_time) + return SchedulerRequestResult( + type_="request_start", + run_info=run_info, + request=process_response.request, + request_info=process_response.info, + response=None, ) - task.add_done_callback(_completed) - tasks.append(task) - - # release control to the event loop for other tasks - await asyncio.sleep(0) - for compl_task in asyncio.as_completed(tasks): - task_res = await compl_task - if task_res is not None: - yield task_res + if process_response.type_ == "request_complete": + run_info.processing_requests -= 1 + run_info.completed_requests += 1 - async def _scheduled_request( - self, request: TextGenerationRequest, submit_at: float, end_time: float - ) -> Optional[Union[TextGenerationResult, TextGenerationError]]: - try: - if submit_at > end_time: - raise asyncio.TimeoutError( - f"Request submission time {submit_at} " - f"is greater than end time {end_time}" - ) - - if submit_at > time.time(): - await asyncio.sleep(submit_at - time.time()) - - timeout = ( - end_time - time.time() if end_time and end_time < math.inf else None + return SchedulerRequestResult( + type_="request_complete", + run_info=run_info, + request=process_response.request, + request_info=process_response.info, + response=process_response.response, ) + raise ValueError(f"Invalid process response type: {process_response}") - return await asyncio.wait_for( - self._resolve_text_request(request), timeout=timeout - ) - except Exception as exc: # noqa: BLE001 - if not isinstance(exc, asyncio.TimeoutError): - logger.warning("Request {} failed: {}", request, exc) - - return TextGenerationError(request=request, message=str(exc)) - - async def _resolve_text_request( - self, request: TextGenerationRequest - ) -> TextGenerationResult: - final_resp = None - first_token_time = None - last_token_time = None - - if request.type_ == "text": - async for resp in self._backend.text_completions( # type: ignore[attr-defined] - prompt=request.prompt, - id_=request.id, - prompt_token_count=request.prompt_token_count, - output_token_count=request.output_token_count, - ): - if isinstance(resp, StreamingTextResponse) and resp.type_ == "iter": - first_token_time = first_token_time or resp.time - last_token_time = resp.time - - final_resp = resp - elif request.type_ == "chat": - async for resp in self._backend.chat_completions( # type: ignore[attr-defined] - content=request.prompt, - id_=request.id, - prompt_token_count=request.prompt_token_count, - output_token_count=request.output_token_count, - ): - if isinstance(resp, StreamingTextResponse) and resp.type_ == "iter": - first_token_time = first_token_time or resp.time - last_token_time = resp.time - - final_resp = resp - - if not final_resp or not isinstance(final_resp, ResponseSummary): - raise ValueError( - f"Invalid final response for request: {request} " - f"and backend: {self._backend}, recieved: {final_resp}" - ) + async def _stop_processes( + self, + futures: List[asyncio.Future], + requests_queue: multiprocessing.Queue, + ): + for _ in futures: + requests_queue.put(None) - return TextGenerationResult( - request=request, - prompt_token_count=final_resp.prompt_tokens, - output=final_resp.value, - output_token_count=resp.output_tokens, - start_time=resp.start_time, - end_time=resp.end_time, - first_token_time=first_token_time, - last_token_time=last_token_time, - ) + await asyncio.gather(*futures) diff --git a/src/guidellm/scheduler/strategy.py b/src/guidellm/scheduler/strategy.py new file mode 100644 index 00000000..7e8d253a --- /dev/null +++ b/src/guidellm/scheduler/strategy.py @@ -0,0 +1,493 @@ +import math +import os +import random +import time +from typing import ( + Generator, + Literal, + Optional, + Union, +) + +from pydantic import Field + +from guidellm.config import settings +from guidellm.objects import StandardBaseModel + +__all__ = [ + "StrategyType", + "SchedulingStrategy", + "SynchronousStrategy", + "ConcurrentStrategy", + "ThroughputStrategy", + "AsyncConstantStrategy", + "AsyncPoissonStrategy", + "strategy_display_str", +] + + +StrategyType = Literal["synchronous", "concurrent", "throughput", "constant", "poisson"] + + +class SchedulingStrategy(StandardBaseModel): + """ + An abstract base class for scheduling strategies. + This class defines the interface for scheduling requests and provides + a common structure for all scheduling strategies. + Subclasses should implement the `request_times` method to provide + specific scheduling behavior. + + :param type_: The type of scheduling strategy to use. + This should be one of the predefined strategy types. + """ + + type_: Literal["strategy"] = Field( + description="The type of scheduling strategy schedule requests with.", + ) + + @property + def processing_mode(self) -> Literal["sync", "async"]: + """ + The processing mode for the scheduling strategy, either 'sync' or 'async'. + This property determines how the worker processes are setup: + either to run synchronously with one request at a time or asynchronously. + This property should be implemented by subclasses to return + the appropriate processing mode. + + :return: The processing mode for the scheduling strategy, + either 'sync' or 'async'. + """ + return "async" + + @property + def processes_limit(self) -> int: + """ + The limit on the number of worker processes for the scheduling strategy. + It determines how many worker processes are created + for the scheduling strategy and must be implemented by subclasses. + + :return: The number of processes for the scheduling strategy. + """ + cpu_cores = os.cpu_count() or 1 + + return min(max(1, cpu_cores - 1), settings.max_worker_processes) + + @property + def queued_requests_limit(self) -> Optional[int]: + """ + The maximum number of queued requests for the scheduling strategy. + It determines how many requests can be queued at one time + for the scheduling strategy and must be implemented by subclasses. + + :return: The maximum number of queued requests for the scheduling strategy. + """ + return settings.max_concurrency + + @property + def processing_requests_limit(self) -> int: + """ + The maximum number of processing requests for the scheduling strategy. + It determines how many requests can be processed at one time + for the scheduling strategy and must be implemented by subclasses. + + :return: The maximum number of processing requests for the scheduling strategy. + """ + return settings.max_concurrency + + def request_times(self) -> Generator[float, None, None]: + """ + A generator that yields timestamps for when requests should be sent. + This method should be implemented by subclasses to provide specific + scheduling behavior. + + :return: A generator that yields timestamps for request scheduling + or -1 for requests that should be sent immediately. + """ + raise NotImplementedError("Subclasses must implement request_times() method.") + + +class SynchronousStrategy(SchedulingStrategy): + """ + A class representing a synchronous scheduling strategy. + This strategy schedules requests synchronously, one at a time, + with the maximum rate possible. + It inherits from the `SchedulingStrategy` base class and + implements the `request_times` method to provide the specific + behavior for synchronous scheduling. + + :param type_: The synchronous StrategyType to schedule requests synchronously. + """ + + type_: Literal["synchronous"] = "synchronous" # type: ignore[assignment] + + @property + def processing_mode(self) -> Literal["sync"]: + """ + The processing mode for the scheduling strategy, either 'sync' or 'async'. + This property determines how the worker processes are setup: + either to run synchronously with one request at a time or asynchronously. + + :return: 'sync' for synchronous scheduling strategy + for the single worker process. + """ + return "sync" + + @property + def processes_limit(self) -> int: + """ + The limit on the number of worker processes for the scheduling strategy. + It determines how many worker processes are created + for the scheduling strategy and must be implemented by subclasses. + + :return: 1 for the synchronous scheduling strategy to limit + the worker processes to one. + """ + return 1 + + @property + def queued_requests_limit(self) -> int: + """ + The maximum number of queued requests for the scheduling strategy. + It determines how many requests can be queued at one time + for the scheduling strategy and must be implemented by subclasses. + + :return: 1 for the synchronous scheduling strategy to limit + the queued requests to one that is ready to be processed. + """ + return 1 + + @property + def processing_requests_limit(self) -> int: + """ + The maximum number of processing requests for the scheduling strategy. + It determines how many requests can be processed at one time + for the scheduling strategy and must be implemented by subclasses. + + :return: 1 for the synchronous scheduling strategy to limit + the processing requests to one that is ready to be processed. + """ + return 1 + + def request_times(self) -> Generator[float, None, None]: + """ + A generator that yields time.time() so requests are sent immediately, + while scheduling them synchronously. + + :return: A generator that yields time.time() for immediate request scheduling. + """ + while True: + yield time.time() + + +class ConcurrentStrategy(SchedulingStrategy): + """ + A class representing a concurrent scheduling strategy. + This strategy schedules requests concurrently with the specified + number of streams. + It inherits from the `SchedulingStrategy` base class and + implements the `request_times` method to provide the specific + behavior for concurrent scheduling. + + :param type_: The concurrent StrategyType to schedule requests concurrently. + :param streams: The number of concurrent streams to use for scheduling requests. + Each stream runs synchronously with the maximum rate possible. + This must be a positive integer. + """ + + type_: Literal["concurrent"] = "concurrent" # type: ignore[assignment] + streams: int = Field( + description=( + "The number of concurrent streams to use for scheduling requests. " + "Each stream runs sychronously with the maximum rate possible. " + "This must be a positive integer." + ), + gt=0, + ) + + @property + def processing_mode(self) -> Literal["sync"]: + """ + The processing mode for the scheduling strategy, either 'sync' or 'async'. + This property determines how the worker processes are setup: + either to run synchronously with one request at a time or asynchronously. + + :return: 'sync' for synchronous scheduling strategy + for the multiple worker processes equal to streams. + """ + return "sync" + + @property + def processes_limit(self) -> int: + """ + The limit on the number of worker processes for the scheduling strategy. + It determines how many worker processes are created + for the scheduling strategy and must be implemented by subclasses. + + :return: {self.streams} for the concurrent scheduling strategy to limit + the worker processes to the number of streams. + """ + return self.streams + + @property + def queued_requests_limit(self) -> int: + """ + The maximum number of queued requests for the scheduling strategy. + It determines how many requests can be queued at one time + for the scheduling strategy and must be implemented by subclasses. + + :return: {self.streams} for the concurrent scheduling strategy to limit + the queued requests to the number of streams that are ready to be processed. + """ + return self.streams + + @property + def processing_requests_limit(self) -> int: + """ + The maximum number of processing requests for the scheduling strategy. + It determines how many requests can be processed at one time + for the scheduling strategy and must be implemented by subclasses. + + :return: {self.streams} for the concurrent scheduling strategy to limit + the processing requests to the number of streams that ready to be processed. + """ + return self.streams + + def request_times(self) -> Generator[float, None, None]: + """ + A generator that yields time.time() so requests are sent + immediately, while scheduling them concurrently with the specified + number of streams. + + :return: A generator that yields time.time() for immediate request scheduling. + """ + while True: + yield time.time() + + +class ThroughputStrategy(SchedulingStrategy): + """ + A class representing a throughput scheduling strategy. + This strategy schedules as many requests asynchronously as possible, + with the maximum rate possible. + It inherits from the `SchedulingStrategy` base class and + implements the `request_times` method to provide the specific + behavior for throughput scheduling. + + :param type_: The throughput StrategyType to schedule requests asynchronously. + """ + + type_: Literal["throughput"] = "throughput" # type: ignore[assignment] + max_concurrency: Optional[int] = Field( + default=None, + description=( + "The maximum number of concurrent requests to schedule. " + "If set to None, the concurrency value from settings will be used. " + "This must be a positive integer greater than 0." + ), + gt=0, + ) + + @property + def processing_mode(self) -> Literal["async"]: + """ + The processing mode for the scheduling strategy, either 'sync' or 'async'. + This property determines how the worker processes are setup: + either to run synchronously with one request at a time or asynchronously. + + :return: 'async' for asynchronous scheduling strategy + for the multiple worker processes handling requests. + """ + return "async" + + @property + def queued_requests_limit(self) -> int: + """ + The maximum number of queued requests for the scheduling strategy. + It determines how many requests can be queued at one time + for the scheduling strategy and must be implemented by subclasses. + + :return: The processing requests limit to ensure that there are enough + requests even for the worst case scenario where the max concurrent + requests are pulled at once for processing. + """ + return self.processing_requests_limit + + @property + def processing_requests_limit(self) -> int: + """ + The maximum number of processing requests for the scheduling strategy. + It determines how many requests can be processed at one time + for the scheduling strategy and must be implemented by subclasses. + + :return: {self.max_concurrency} for the throughput scheduling strategy to limit + the processing requests to the maximum concurrency. + If max_concurrency is None, then the default processing requests limit + will be used. + """ + return self.max_concurrency or super().processing_requests_limit + + def request_times(self) -> Generator[float, None, None]: + """ + A generator that yields the start time.time() so requests are sent + immediately, while scheduling as many asynchronously as possible. + + :return: A generator that yields the start time.time() + for immediate request scheduling. + """ + start_time = time.time() + + while True: + yield start_time + + +class AsyncConstantStrategy(ThroughputStrategy): + """ + A class representing an asynchronous constant scheduling strategy. + This strategy schedules requests asynchronously at a constant request rate + in requests per second. + If initial_burst is set, it will send an initial burst of math.floor(rate) + requests to reach the target rate. + This is useful to ensure that the target rate is reached quickly + and then maintained. + It inherits from the `SchedulingStrategy` base class and + implements the `request_times` method to provide the specific + behavior for asynchronous constant scheduling. + + :param type_: The constant StrategyType to schedule requests asynchronously. + :param rate: The rate at which to schedule requests asynchronously in + requests per second. This must be a positive float. + :param initial_burst: True to send an initial burst of requests + (math.floor(self.rate)) to reach target rate. + False to not send an initial burst. + """ + + type_: Literal["constant"] = "constant" # type: ignore[assignment] + rate: float = Field( + description=( + "The rate at which to schedule requests asynchronously in " + "requests per second. This must be a positive float." + ), + gt=0, + ) + initial_burst: bool = Field( + default=True, + description=( + "True to send an initial burst of requests (math.floor(self.rate)) " + "to reach target rate. False to not send an initial burst." + ), + ) + + def request_times(self) -> Generator[float, None, None]: + """ + A generator that yields timestamps for when requests should be sent. + This method schedules requests asynchronously at a constant rate + in requests per second. + If burst_time is set, it will send an initial burst of requests + to reach the target rate. + This is useful to ensure that the target rate is reached quickly + and then maintained. + + :return: A generator that yields timestamps for request scheduling. + """ + start_time = time.time() + constant_increment = 1.0 / self.rate + + # handle bursts first to get to the desired rate + if self.initial_burst is not None: + # send an initial burst equal to the rate + # to reach the target rate + burst_count = math.floor(self.rate) + for _ in range(burst_count): + yield start_time + + start_time += constant_increment + + counter = 0 + + # continue with constant rate after bursting + while True: + yield start_time + constant_increment * counter + counter += 1 + + +class AsyncPoissonStrategy(ThroughputStrategy): + """ + A class representing an asynchronous Poisson scheduling strategy. + This strategy schedules requests asynchronously at a Poisson request rate + in requests per second. + If initial_burst is set, it will send an initial burst of math.floor(rate) + requests to reach the target rate. + It inherits from the `SchedulingStrategy` base class and + implements the `request_times` method to provide the specific + behavior for asynchronous Poisson scheduling. + + :param type_: The Poisson StrategyType to schedule requests asynchronously. + :param rate: The rate at which to schedule requests asynchronously in + requests per second. This must be a positive float. + :param initial_burst: True to send an initial burst of requests + (math.floor(self.rate)) to reach target rate. + False to not send an initial burst. + """ + + type_: Literal["poisson"] = "poisson" # type: ignore[assignment] + rate: float = Field( + description=( + "The rate at which to schedule requests asynchronously in " + "requests per second. This must be a positive float." + ), + gt=0, + ) + initial_burst: bool = Field( + default=True, + description=( + "True to send an initial burst of requests (math.floor(self.rate)) " + "to reach target rate. False to not send an initial burst." + ), + ) + random_seed: int = Field( + default=42, + description=("The random seed to use for the Poisson distribution. "), + ) + + def request_times(self) -> Generator[float, None, None]: + """ + A generator that yields timestamps for when requests should be sent. + This method schedules requests asynchronously at a Poisson rate + in requests per second. + The inter arrival time between requests is exponentially distributed + based on the rate. + + :return: A generator that yields timestamps for request scheduling. + """ + start_time = time.time() + + if self.initial_burst is not None: + # send an initial burst equal to the rate + # to reach the target rate + burst_count = math.floor(self.rate) + for _ in range(burst_count): + yield start_time + else: + yield start_time + + # set the random seed for reproducibility + rand = random.Random(self.random_seed) # noqa: S311 + + while True: + inter_arrival_time = rand.expovariate(self.rate) + start_time += inter_arrival_time + yield start_time + + +def strategy_display_str(strategy: Union[StrategyType, SchedulingStrategy]) -> str: + strategy_type = strategy if isinstance(strategy, str) else strategy.type_ + strategy_instance = strategy if isinstance(strategy, SchedulingStrategy) else None + + if strategy_type == "concurrent": + rate = f"@{strategy_instance.streams}" if strategy_instance else "@##" # type: ignore[attr-defined] + elif strategy_type in ("constant", "poisson"): + rate = f"@{strategy_instance.rate:.2f}" if strategy_instance else "@#.##" # type: ignore[attr-defined] + else: + rate = "" + + return f"{strategy_type}{rate}" diff --git a/src/guidellm/scheduler/types.py b/src/guidellm/scheduler/types.py new file mode 100644 index 00000000..42535d71 --- /dev/null +++ b/src/guidellm/scheduler/types.py @@ -0,0 +1,7 @@ +from typing import TypeVar + +__all__ = ["RequestT", "ResponseT"] + + +RequestT = TypeVar("RequestT") +ResponseT = TypeVar("ResponseT") diff --git a/src/guidellm/scheduler/worker.py b/src/guidellm/scheduler/worker.py new file mode 100644 index 00000000..44444c51 --- /dev/null +++ b/src/guidellm/scheduler/worker.py @@ -0,0 +1,512 @@ +import asyncio +import math +import multiprocessing +import multiprocessing.queues +import time +from abc import ABC, abstractmethod +from dataclasses import dataclass +from typing import ( + Any, + AsyncGenerator, + Dict, + Generic, + Literal, + Optional, + Tuple, + Union, +) + +from loguru import logger +from pydantic import Field + +from guidellm.backend import ( + Backend, + BackendType, + RequestArgs, + ResponseSummary, + StreamingTextResponse, +) +from guidellm.objects import StandardBaseModel +from guidellm.request import GenerationRequest +from guidellm.scheduler.result import SchedulerRequestInfo +from guidellm.scheduler.types import RequestT, ResponseT + +__all__ = [ + "WorkerProcessRequest", + "WorkerProcessResult", + "ResolveStatus", + "WorkerDescription", + "RequestsWorker", + "GenerativeRequestsWorkerDescription", + "GenerativeRequestsWorker", +] + + +@dataclass +class WorkerProcessRequest(Generic[RequestT]): + request: RequestT + start_time: float + timeout_time: float + queued_time: float + + +@dataclass +class WorkerProcessResult(Generic[RequestT, ResponseT]): + type_: Literal["request_scheduled", "request_start", "request_complete"] + request: RequestT + response: Optional[ResponseT] + info: SchedulerRequestInfo + + +@dataclass +class ResolveStatus: + requested: bool + completed: bool + errored: bool + canceled: bool + + request_start: float + request_end: float + + +class WorkerDescription(StandardBaseModel): + type_: Literal["worker"] = "worker" + + +class RequestsWorker(ABC, Generic[RequestT, ResponseT]): + """ + An abstract base class for a worker that processes requests. + This class defines the interface for a worker that can resolve requests + asynchronously or synchronously within the Scheduler class. + Subclasses must implement the `resolve` method, + which takes a request directly given from the load generator, + along with the desired start_time for the request and a timeout_time. + The `resolve` method should return the response from the backend. + """ + + @property + @abstractmethod + def description(self) -> WorkerDescription: + """ + An abstract property that must be implemented by subclasses. + This property should return a Serializable class representing the information + about the worker instance. + """ + ... + + @abstractmethod + async def prepare_multiprocessing(self): + """ + An abstract method that must be implemented by subclasses. + This is useful for workers that have instance state that can not + be shared across processes and should be cleared out and re-initialized + for each new process. + """ + ... + + @abstractmethod + async def resolve( + self, + request: RequestT, + timeout_time: float, + ) -> Tuple[ResolveStatus, ResponseT]: + """ + An abstract method that must be implemented by subclasses. + This method should handle the resolution of a request through asyncio, + including any necessary backend processing and response handling. + + :param request: The request to be resolved generated by the load generator. + :param timeout_time: The timeout time for the request, if there is no timeout + given, then this will be math.inf. + :return: The response from the worker. + """ + ... + + async def get_request( + self, requests_queue: multiprocessing.Queue + ) -> Optional[WorkerProcessRequest[RequestT]]: + return await asyncio.to_thread(requests_queue.get) # type: ignore[attr-defined] + + async def send_result( + self, + results_queue: multiprocessing.Queue, + result: WorkerProcessResult[RequestT, ResponseT], + ): + await asyncio.to_thread(results_queue.put, result) # type: ignore[attr-defined] + + async def resolve_scheduler_request( + self, + request: Any, + queued_time: float, + dequeued_time: float, + start_time: float, + timeout_time: float, + results_queue: multiprocessing.Queue, + process_id: int, + ): + info = SchedulerRequestInfo( + targeted_start_time=start_time, + queued_time=queued_time, + dequeued_time=dequeued_time, + scheduled_time=time.time(), + process_id=process_id, + ) + result: WorkerProcessResult[RequestT, ResponseT] = WorkerProcessResult( + type_="request_scheduled", + request=request, + response=None, + info=info, + ) + asyncio.create_task(self.send_result(results_queue, result)) + + if (wait_time := start_time - time.time()) > 0: + await asyncio.sleep(wait_time) + + info.worker_start = time.time() + result = WorkerProcessResult( + type_="request_start", + request=request, + response=None, + info=info, + ) + asyncio.create_task(self.send_result(results_queue, result)) + + status, response = await self.resolve(request, timeout_time) + info.worker_end = time.time() + info.requested = status.requested + info.completed = status.completed + info.errored = status.errored + info.canceled = status.canceled + info.request_start = status.request_start + info.request_end = status.request_end + result = WorkerProcessResult( + type_="request_complete", + request=request, + response=response, + info=info, + ) + asyncio.create_task(self.send_result(results_queue, result)) + + def process_loop_synchronous( + self, + requests_queue: multiprocessing.Queue, + results_queue: multiprocessing.Queue, + process_id: int, + ): + async def _process_runner(): + while ( + process_request := await self.get_request(requests_queue) + ) is not None: + dequeued_time = time.time() + + await self.resolve_scheduler_request( + request=process_request.request, + queued_time=process_request.queued_time, + dequeued_time=dequeued_time, + start_time=process_request.start_time, + timeout_time=process_request.timeout_time, + results_queue=results_queue, + process_id=process_id, + ) + + try: + asyncio.run(_process_runner()) + except Exception as exc: # noqa: BLE001 + logger.error( + f"Error in worker process {process_id}: {exc}", + exc_info=True, + stack_info=True, + ) + + def process_loop_asynchronous( + self, + requests_queue: multiprocessing.Queue, + results_queue: multiprocessing.Queue, + max_concurrency: Optional[int], + process_id: int, + ): + async def _process_runner(): + pending = asyncio.Semaphore(max_concurrency) if max_concurrency else None + + while ( + process_request := await self.get_request(requests_queue) + ) is not None: + dequeued_time = time.time() + + if pending: + await pending.acquire() + + def _task_done(_: asyncio.Task): + nonlocal pending + if pending: + pending.release() + + task = asyncio.create_task( + self.resolve_scheduler_request( + request=process_request.request, + queued_time=process_request.queued_time, + dequeued_time=dequeued_time, + start_time=process_request.start_time, + timeout_time=process_request.timeout_time, + results_queue=results_queue, + process_id=process_id, + ) + ) + task.add_done_callback(_task_done) + await asyncio.sleep(0) # enable start task immediately + + try: + asyncio.run(_process_runner()) + except Exception as exc: # noqa: BLE001 + logger.error( + f"Error in worker process {process_id}: {exc}", + exc_info=True, + stack_info=True, + ) + + +class GenerativeRequestsWorkerDescription(WorkerDescription): + type_: Literal["generative_requests_worker"] = "generative_requests_worker" # type: ignore[assignment] + backend_type: BackendType + backend_target: str + backend_model: str + backend_info: Dict[str, Any] = Field( + default_factory=dict, + ) + + +class GenerativeRequestsWorker(RequestsWorker[GenerationRequest, ResponseSummary]): + """ + A class that handles the execution of requests using a backend. + This class is responsible for sending requests to the backend, + handling responses, and managing errors. + + :param backend: The backend to use for handling requests. + This should be an instance of Backend such as an OpenAIHTTPBackend. + """ + + def __init__(self, backend: Backend): + self.backend = backend + + @property + def description(self) -> GenerativeRequestsWorkerDescription: + """ + Get the description of the worker. + :return: The description of the worker. + """ + return GenerativeRequestsWorkerDescription( + backend_type=self.backend.type_, + backend_target=self.backend.target, + backend_model=self.backend.model or "None", + backend_info=self.backend.info, + ) + + async def prepare_multiprocessing(self): + """ + Prepare the worker for multiprocessing. + This is useful for workers that have instance state that can not + be shared across processes and should be cleared out and re-initialized + for each new process. + """ + await self.backend.prepare_multiprocessing() + + def process_loop_synchronous( + self, + requests_queue: multiprocessing.Queue, + results_queue: multiprocessing.Queue, + process_id: int, + ): + asyncio.run(self.backend.validate()) + super().process_loop_synchronous( + requests_queue=requests_queue, + results_queue=results_queue, + process_id=process_id, + ) + + def process_loop_asynchronous( + self, + requests_queue: multiprocessing.Queue, + results_queue: multiprocessing.Queue, + max_concurrency: Optional[int], + process_id: int, + ): + asyncio.run(self.backend.validate()) + super().process_loop_asynchronous( + requests_queue=requests_queue, + results_queue=results_queue, + max_concurrency=max_concurrency, + process_id=process_id, + ) + + async def resolve( + self, + request: GenerationRequest, + timeout_time: float, + ) -> Tuple[ResolveStatus, ResponseSummary]: + """ + Resolve a request by sending it to the backend and handling the response. + This method sends the request to the backend, waits for a response, + and handles any errors that may occur during the process. + + :param request: The request to resolve. + :param timeout_time: The time to wait for a response before timing out. + If timeout_time is math.inf, the request will not timeout. + :return: A ResponseSummary object containing the response from the backend. + If an error occurs, the ResponseSummary will contain the error message. + """ + resolve_start_time = time.time() + response = None + error: Optional[str] = None + status = ResolveStatus( + requested=False, + completed=False, + errored=False, + canceled=False, + request_start=-1, + request_end=-1, + ) + + try: + if timeout_time < time.time(): + raise asyncio.TimeoutError( + "The timeout time has already passed." + ) # exit early + + status.requested = True + request_func, request_kwargs = self._create_request_func_kwargs(request) + + async def _runner(): + # wrap function so we can enforce timeout and + # still return the latest state from the backend + async for resp in request_func(**request_kwargs): # type: ignore[operator] + nonlocal response + response = resp + + await asyncio.wait_for( + _runner(), + timeout=timeout_time - time.time() if timeout_time < math.inf else None, + ) + + if not response: + raise ValueError( + f"No response received for request: {request} " + f"and backend: {self.backend}" + ) + if not isinstance(response, ResponseSummary): + raise ValueError( + f"Received no ResponseSummary for request: {request} " + f"and backend: {self.backend}, received: {response}" + ) + + status.completed = True + except asyncio.TimeoutError: + error = "TimeoutError: The request timed out before completing." + status.errored = True + status.canceled = True + except Exception as exc: # noqa: BLE001 + error = str(exc) + status.errored = True + + return self._handle_response( + status=status, + request=request, + response=response, + error=error, + resolve_start_time=resolve_start_time, + ) + + def _create_request_func_kwargs( + self, + request: GenerationRequest, + ) -> Tuple[ + AsyncGenerator[Union[StreamingTextResponse, ResponseSummary], None], + Dict[str, Any], + ]: + request_func: AsyncGenerator[ + Union[StreamingTextResponse, ResponseSummary], None + ] + request_kwargs: Dict[str, Any] + + if request.request_type == "text_completions": + request_func = self.backend.text_completions # type: ignore[assignment] + request_kwargs = { + "prompt": request.content, + "request_id": request.request_id, + "prompt_token_count": request.stats.get("prompt_tokens", None), + "output_token_count": request.constraints.get("output_tokens", None), + **request.params, + } + elif request.request_type == "chat_completions": + request_func = self.backend.chat_completions # type: ignore[assignment] + request_kwargs = { + "content": request.content, + "request_id": request.request_id, + "prompt_token_count": request.stats.get("prompt_tokens", None), + "output_token_count": request.constraints.get("output_tokens", None), + **request.params, + } + else: + raise ValueError( + f"Invalid request type: {request.request_type} for {request}" + ) + + return request_func, request_kwargs + + def _handle_response( + self, + status: ResolveStatus, + request: GenerationRequest, + response: Any, + error: Optional[str], + resolve_start_time: float, + ) -> Tuple[ResolveStatus, ResponseSummary]: + if response is None or not isinstance( + response, (ResponseSummary, StreamingTextResponse) + ): + # nothing received or invalid response, fill in defaults for error + if response: + error = str( + ValueError( + f"Invalid response: {type(response)} for request: {request}; " + ) + ) + (error or "") + + response = ResponseSummary( + value="", + request_args=RequestArgs( + target=self.backend.target, + headers={}, + payload={}, + ), + start_time=resolve_start_time, + end_time=status.request_end, + first_iter_time=None, + last_iter_time=None, + request_id=request.request_id, + error=error or "Unknown error", + ) + elif isinstance(response, StreamingTextResponse): + response = ResponseSummary( + value=response.value, + request_args=RequestArgs( + target=self.backend.target, + headers={}, + payload={}, + ), + start_time=response.start_time, + end_time=time.time(), + first_iter_time=response.first_iter_time, + last_iter_time=response.time if response.iter_count > 0 else None, + request_prompt_tokens=request.stats.get("prompt_tokens", None), + request_output_tokens=request.constraints.get("output_tokens", None), + response_prompt_tokens=None, + response_output_tokens=response.iter_count, + request_id=request.request_id, + error=error or "Unknown error", + ) + + response.error = error + status.request_start = response.start_time + status.request_end = response.end_time + + return status, response diff --git a/src/guidellm/utils/__init__.py b/src/guidellm/utils/__init__.py index 2fdd8ca8..3620a3d3 100644 --- a/src/guidellm/utils/__init__.py +++ b/src/guidellm/utils/__init__.py @@ -1,40 +1,25 @@ -from .injector import create_report, inject_data -from .progress import BenchmarkReportProgress +from .colors import Colors +from .hf_transformers import ( + check_load_processor, +) +from .random import IntegerRangeSampler from .text import ( + EndlessTextCreator, clean_text, filter_text, - is_path, - is_path_like, - is_url, + is_puncutation, load_text, - load_text_lines, - parse_text_objects, - split_lines_by_punctuation, split_text, ) -from .transformers import ( - load_transformers_dataset, - resolve_transformers_dataset, - resolve_transformers_dataset_column, - resolve_transformers_dataset_split, -) __all__ = [ - "BenchmarkReportProgress", - "clean_text", - "create_report", + "IntegerRangeSampler", + "Colors", + "check_load_processor", "filter_text", - "inject_data", - "is_path", - "is_path_like", - "is_url", - "load_text", - "load_text_lines", - "load_transformers_dataset", - "parse_text_objects", - "resolve_transformers_dataset", - "resolve_transformers_dataset_column", - "resolve_transformers_dataset_split", - "split_lines_by_punctuation", + "clean_text", "split_text", + "load_text", + "is_puncutation", + "EndlessTextCreator", ] diff --git a/src/guidellm/utils/cli_params.py b/src/guidellm/utils/cli_params.py deleted file mode 100644 index 4e8800d2..00000000 --- a/src/guidellm/utils/cli_params.py +++ /dev/null @@ -1,34 +0,0 @@ -""" -This module includes custom CLI parameters for the `click` package. -""" - -from typing import Any, Optional - -from click import Context, Parameter, ParamType - -__all__ = ["MAX_REQUESTS"] - - -class MaxRequestsType(ParamType): - """ - Catch the `dataset` string parameter to determine the behavior of the Scheduler. - """ - - name = "max_requests" - - def convert( - self, value: Any, param: Optional[Parameter], ctx: Optional[Context] - ) -> Any: - if isinstance(value, int): - return value - - try: - return int(value) - except ValueError: - if value == "dataset": - return value - else: - self.fail(f"{value} is not a valid integer or 'dataset'", param, ctx) - - -MAX_REQUESTS = MaxRequestsType() diff --git a/src/guidellm/utils/colors.py b/src/guidellm/utils/colors.py new file mode 100644 index 00000000..e4d60d52 --- /dev/null +++ b/src/guidellm/utils/colors.py @@ -0,0 +1,8 @@ +__all__ = ["Colors"] + + +class Colors: + INFO: str = "light_steel_blue" + PROGRESS: str = "dark_slate_gray1" + SUCCESS: str = "chartreuse1" + ERROR: str = "orange_red1" diff --git a/src/guidellm/utils/hf_transformers.py b/src/guidellm/utils/hf_transformers.py new file mode 100644 index 00000000..2c298d2f --- /dev/null +++ b/src/guidellm/utils/hf_transformers.py @@ -0,0 +1,35 @@ +from pathlib import Path +from typing import Any, Dict, Optional, Union + +from transformers import AutoTokenizer, PreTrainedTokenizerBase # type: ignore[import] + +__all__ = [ + "check_load_processor", +] + + +def check_load_processor( + processor: Optional[Union[str, Path, PreTrainedTokenizerBase]], + processor_args: Optional[Dict[str, Any]], + error_msg: str, +) -> PreTrainedTokenizerBase: + if processor is None: + raise ValueError(f"Processor/Tokenizer is required for {error_msg}.") + + try: + if isinstance(processor, (str, Path)): + loaded = AutoTokenizer.from_pretrained( + processor, + **(processor_args or {}), + ) + else: + loaded = processor + except Exception as err: + raise ValueError( + f"Failed to load processor/Tokenizer for {error_msg}." + ) from err + + if not isinstance(loaded, PreTrainedTokenizerBase): + raise ValueError(f"Invalid processor/Tokenizer for {error_msg}.") + + return loaded diff --git a/src/guidellm/utils/injector.py b/src/guidellm/utils/injector.py deleted file mode 100644 index fb5216aa..00000000 --- a/src/guidellm/utils/injector.py +++ /dev/null @@ -1,70 +0,0 @@ -from pathlib import Path -from typing import Union - -from pydantic import BaseModel - -from guidellm.config import settings -from guidellm.utils.text import load_text - -__all__ = ["create_report", "inject_data"] - - -def create_report(model: BaseModel, output_path: Union[str, Path]) -> Path: - """ - Creates a report from the model and saves it to the output path. - - :param model: the model to serialize and inject - :type model: BaseModel - :param output_path: the path, either a file or a directory, - to save the report to. If a directory, the report will be saved - as "report.html" inside of the directory. - :type output_path: str - :return: the path to the saved report - :rtype: str - """ - if not isinstance(output_path, Path): - output_path = Path(output_path) - - html_content = load_text(settings.report_generation.source) - report_content = inject_data( - model, - html_content, - settings.report_generation.report_html_match, - settings.report_generation.report_html_placeholder, - ) - - if not output_path.suffix: - # assume directory, save as report.html - output_path = output_path / "report.html" - - output_path.parent.mkdir(parents=True, exist_ok=True) - output_path.write_text(report_content) - - return output_path - - -def inject_data( - model: BaseModel, - html: str, - match: str, - placeholder: str, -) -> str: - """ - Injects the data from the model into the HTML while replacing the placeholder. - - :param model: the model to serialize and inject - :type model: BaseModel - :param html: the html to inject the data into - :type html: str - :param match: the string to match in the html to find the placeholder - :type match: str - :param placeholder: the placeholder to replace with the model data - inside of the placeholder - :type placeholder: str - :return: the html with the model data injected - :rtype: str - """ - model_str = model.json() - inject_str = match.replace(placeholder, model_str) - - return html.replace(match, inject_str) diff --git a/src/guidellm/utils/progress.py b/src/guidellm/utils/progress.py deleted file mode 100644 index a1e1e798..00000000 --- a/src/guidellm/utils/progress.py +++ /dev/null @@ -1,199 +0,0 @@ -from datetime import datetime -from typing import List - -from loguru import logger -from rich.console import Group -from rich.live import Live -from rich.panel import Panel -from rich.progress import ( - BarColumn, - Progress, - SpinnerColumn, - TaskID, - TaskProgressColumn, - TextColumn, - TimeElapsedColumn, - TimeRemainingColumn, -) - -__all__ = ["BenchmarkReportProgress"] - - -class BenchmarkReportProgress: - """ - Manages the progress display for benchmarks and report generation using Rich. - - This class provides a visual representation of the benchmarking process - and report generation using Rich's progress bars and panels. - """ - - def __init__(self): - """ - Initialize the BenchmarkReportProgress with default settings. - - This method sets up the progress displays for both individual benchmarks - and the overall report, as well as initializing internal task management - structures. - """ - logger.info("Initializing BenchmarkReportProgress instance") - - self.benchmarks_progress = Progress( - TextColumn("[{task.fields[start_time_str]}]"), - SpinnerColumn(), - TaskProgressColumn(), - TextColumn("{task.description}"), - TextColumn(" "), - TextColumn( - "[bold cyan]({task.fields[req_per_sec]} req/sec avg)[/bold cyan]" - ), - ) - self.benchmarks_panel = Panel( - self.benchmarks_progress, - title="Benchmarks", - title_align="left", - expand=True, - ) - self.report_progress = Progress( - SpinnerColumn(), - TextColumn("Generating report..."), - BarColumn(bar_width=None), - TextColumn( - "({task.fields[completed_benchmarks]}/{task.fields[total_benchmarks]})" - ), - TextColumn("["), - TimeElapsedColumn(), - TextColumn("<"), - TimeRemainingColumn(), - TextColumn("]"), - ) - self.render_group = Group(self.benchmarks_panel, self.report_progress) - self.live = Live(self.render_group, redirect_stdout=True, redirect_stderr=True) - - self.report_task: TaskID = None # type: ignore # noqa: PGH003 - self.benchmark_tasks: List[TaskID] = [] - self.benchmark_tasks_started: List[bool] = [] - self.benchmark_tasks_completed: List[bool] = [] - self.benchmark_tasks_progress: List[float] = [] - - def start(self, task_descriptions: List[str]) -> None: - """ - Starts the live progress display and initializes benchmark tasks. - - :param task_descriptions: List of descriptions for each benchmark task. - :type task_descriptions: List[str] - """ - logger.info( - "Starting BenchmarkReportProgress with task descriptions: {}", - task_descriptions, - ) - self.live.start() - - for task_description in task_descriptions: - logger.debug("Adding task with description: {}", task_description) - task_id = self.benchmarks_progress.add_task( - task_description, - start=False, - total=None, - start_time_str="--:--:--", - req_per_sec="#.##", - ) - self.benchmark_tasks.append(task_id) - self.benchmark_tasks_started.append(False) - self.benchmark_tasks_completed.append(False) - self.benchmark_tasks_progress.append(0) - - self.report_task = self.report_progress.add_task( - "", - total=len(self.benchmark_tasks) * 100, # 100 points per report - completed_benchmarks=0, - total_benchmarks=len(task_descriptions), - ) - logger.info("Initialized {} benchmark tasks", len(task_descriptions)) - - def update_benchmark( - self, - index: int, - description: str, - completed: bool, - completed_count: int, - completed_total: int, - start_time: float, - req_per_sec: float, - ) -> None: - """ - Updates the progress of a specific benchmark task. - - :param index: Index of the benchmark task to update. - :type index: int - :param description: Description of the current benchmark task. - :type description: str - :param completed: Flag indicating if the benchmark is completed. - :type completed: bool - :param completed_count: Number of completed operations for the task. - :type completed_count: int - :param completed_total: Total number of operations for the task. - :type completed_total: int - :param start_time: Start time of the benchmark in timestamp format. - :type start_time: float - :param req_per_sec: Average requests per second. - :type req_per_sec: float - :raises ValueError: If trying to update a completed benchmark. - """ - - if self.benchmark_tasks_completed[index]: - err = ValueError(f"Benchmark {index} already completed") - logger.error("Error updating benchmark: {}", err) - raise err - - if not self.benchmark_tasks_started[index]: - self.benchmark_tasks_started[index] = True - self.benchmarks_progress.start_task(self.benchmark_tasks[index]) - logger.info("Starting benchmark task at index {}", index) - - if completed: - self.benchmark_tasks_completed[index] = True - self.benchmark_tasks_progress[index] = 100 - self.benchmarks_progress.stop_task(self.benchmark_tasks[index]) - logger.info("Completed benchmark task at index {}", index) - - self.benchmark_tasks_progress[index] = completed_count / completed_total * 100 - self.benchmarks_progress.update( - self.benchmark_tasks[index], - description=description, - total=completed_total, - completed=completed_count if not completed else completed_total, - req_per_sec=(f"{req_per_sec:.2f}" if req_per_sec else "#.##"), - start_time_str=( - datetime.fromtimestamp(start_time).strftime("%H:%M:%S") - if start_time - else "--:--:--" - ), - ) - logger.debug( - "Updated benchmark task at index {}: {}% complete", - index, - self.benchmark_tasks_progress[index], - ) - self.report_progress.update( - self.report_task, - total=len(self.benchmark_tasks) * 100, - completed=sum(self.benchmark_tasks_progress), - completed_benchmarks=sum(self.benchmark_tasks_completed), - total_benchmarks=len(self.benchmark_tasks), - ) - - def finish(self) -> None: - """ - Marks the overall report task as finished and stops the live display. - """ - logger.info("Finishing BenchmarkReportProgress") - self.report_progress.update( - self.report_task, - total=len(self.benchmark_tasks) * 100, - completed=len(self.benchmark_tasks) * 100, - completed_benchmarks=len(self.benchmark_tasks), - total_benchmarks=len(self.benchmark_tasks), - ) - self.report_progress.stop_task(self.report_task) - self.live.stop() - logger.info("BenchmarkReportProgress finished and live display stopped") diff --git a/src/guidellm/utils/random.py b/src/guidellm/utils/random.py new file mode 100644 index 00000000..fefef4f1 --- /dev/null +++ b/src/guidellm/utils/random.py @@ -0,0 +1,42 @@ +import random +from typing import Iterator, Optional + +__all__ = ["IntegerRangeSampler"] + + +class IntegerRangeSampler: + def __init__( + self, + average: int, + variance: Optional[int], + min_value: Optional[int], + max_value: Optional[int], + random_seed: int, + ): + self.average = average + self.variance = variance + self.min_value = min_value + self.max_value = max_value + self.seed = random_seed + self.rng = random.Random(random_seed) # noqa: S311 + + def __iter__(self) -> Iterator[int]: + calc_min = self.min_value + if calc_min is None: + calc_min = max( + 1, self.average - 5 * self.variance if self.variance else self.average + ) + calc_max = self.max_value + if calc_max is None: + calc_max = ( + self.average + 5 * self.variance if self.variance else self.average + ) + + while True: + if calc_min == calc_max: + yield calc_min + elif not self.variance: + yield self.rng.randint(calc_min, calc_max + 1) + else: + rand = self.rng.gauss(self.average, self.variance) + yield round(max(calc_min, min(calc_max, rand))) diff --git a/src/guidellm/utils/text.py b/src/guidellm/utils/text.py index f8c5038c..92a0284a 100644 --- a/src/guidellm/utils/text.py +++ b/src/guidellm/utils/text.py @@ -1,60 +1,26 @@ -import csv -import json +import gzip import re +from importlib.resources import as_file, files # type: ignore[attr-defined] from pathlib import Path -from typing import Any, Dict, List, Optional, Tuple, Union -from urllib.parse import urlparse +from typing import List, Optional, Union import ftfy -import requests -import yaml +import httpx from loguru import logger +from guidellm import data as package_data from guidellm.config import settings __all__ = [ - "clean_text", "filter_text", - "is_path", - "is_path_like", - "is_url", - "load_text", - "load_text_lines", - "parse_text_objects", - "split_lines_by_punctuation", + "clean_text", "split_text", + "load_text", + "is_puncutation", + "EndlessTextCreator", ] - -NAME_TITLES = [ - "Mr.", - "Mrs.", - "Ms.", - "Dr.", - "Prof.", - "Jr.", - "Sr.", - "St.", - "Lt.", - "Col.", - "Gen.", - "Rep.", - "Sen.", - "Gov.", - "Pres.", -] -SENTENCE_REGEX = r'[^.!?]*[.!?]["\']?\s*(?=[A-Z])' -MAX_EXTENSION_LENGTH = 8 MAX_PATH_LENGTH = 4096 -EXTENSION_TYPES = { - "csv": "csv", - "jsonl": "jsonl", - "json": "json", - "yaml": "yaml", - "yml": "yaml", - "txt": "txt", - "text": "txt", -} def filter_text( @@ -95,216 +61,17 @@ def filter_text( return text -def clean_text( - text: str, - fix_encoding: bool = True, - clean_whitespace: bool = False, - remove_empty_lines: bool = False, - force_new_line_punctuation: bool = False, -) -> str: - """ - Clean text by fixing encoding, cleaning whitespace, removing empty lines, - and forcing new line punctuation - - :param text: the text to clean - :param fix_encoding: True to fix the encoding of the text, False to leave as is - :param clean_whitespace: True to clean the whitespace in the text - (remove extra spaces, tabs, etc), False to leave as is - :param remove_empty_lines: True to remove empty lines from the text - (lines with only whitespace), False to leave as is - :param force_new_line_punctuation: True to force new lines at punctuation - (line ends in a period, exclamation point, or question mark), - False to leave as is - :return: The cleaned text - """ - - if fix_encoding: - text = ftfy.fix_text(text) - - if clean_whitespace: - text = "\n".join( - [re.sub(r"\s+", " ", line).strip() for line in text.splitlines()] - ) - - if remove_empty_lines: - text = "\n".join([line for line in text.splitlines() if line.strip()]) - - if force_new_line_punctuation: - # first remove any existing new lines - text = " ".join(line for line in text.splitlines() if line.strip()) - lines = split_lines_by_punctuation(text) - text = "\n".join(lines) - - return text - - -def split_lines_by_punctuation(text: str) -> List[str]: - """ - Split text into lines based on punctuation - - :param text: the text to split - :return: the list of lines - """ - - lines = [] - current_line = "" - skip_next = False - - for index, char in enumerate(text): - if skip_next: - skip_next = False - continue - - current_line += char - - if char not in [".", "!", "?"]: - # must match end of sentence punctuation - continue - - # if this is the character for a title, don't split - if any(current_line.endswith(title) for title in NAME_TITLES): - continue - - char_next_1 = text[index + 1] if index + 1 < len(text) else None - char_next_2 = text[index + 2] if index + 2 < len(text) else None - char_next_3 = text[index + 3] if index + 3 < len(text) else None - - next_is_space = char_next_1 and char_next_1.isspace() - next_is_quote_and_space = char_next_1 in ["'", '"'] and char_next_2 == " " - - # next character must be a space or a quote, otherwise skip - if not next_is_space and not next_is_quote_and_space: - continue - - # after this, next character must be an upper case letter - upper_char = char_next_3 if next_is_quote_and_space else char_next_2 - next_is_upper = upper_char and ( - upper_char.isupper() or upper_char in ["'", '"'] - ) +def clean_text(text: str) -> str: + return re.sub(r"\s+", " ", ftfy.fix_text(text)).strip() - if not next_is_upper: - continue - # if next char is a quote, add it and skip next - if next_is_quote_and_space: - current_line += text[index + 1] - skip_next = True +def split_text(text: str, split_punctuation: bool = False) -> List[str]: + text = clean_text(text) - lines.append(current_line.strip()) - current_line = "" + if split_punctuation: + return re.findall(r"[\w]+|[.,!?;]", text) - if current_line: - lines.append(current_line.strip()) - - return lines - - -def is_url(url: str) -> bool: - """ - Check if a string is a URL - - :param url: the string to check - :return: True if the string is a URL, False if not - """ - try: - result = urlparse(url) - return all([result.scheme, result.netloc]) - except Exception: # noqa: BLE001 - return False - - -def is_path(path: Any) -> bool: - """ - Check if a string is a path - - :param path: the string to check - :return: True if the string is a path, False if not - """ - if not isinstance(path, (str, Path)): - return False - - if isinstance(path, str): - path = Path(path) - - return path.exists() - - -def is_path_like(path: Any, enforce_file: bool = False) -> bool: - """ - Check if a string has a path like structure where it doesn't need to exist - - :param path: the string to check - :param enforce_file: True if the path should be a file, False if not - :return: True if the string is path like, False if not - """ - # if path isn't a str or Path, it's not a path - if not isinstance(path, (str, Path)): - return False - - if isinstance(path, Path): - path = str(path) - - # if text is too long, it's not a path (4096 for most linux setups) - if len(path) > MAX_PATH_LENGTH: - return False - - # if it starts with a URL scheme, it's not a path - if path.startswith(("http", "ftp")): - return False - - test_path = Path(path) - - # if it's supposed to be a file and there's no extension or - # the extension is too long, it's not a path - return not enforce_file or ( - bool(test_path.suffix) and len(test_path.suffix) <= MAX_EXTENSION_LENGTH - ) - - -def split_text(text: str) -> Tuple[List[str], List[str], List[int]]: - """ - Split text into words / tokens, the white space separators between words, - and the indices for each new line - - :param text: the text to split - :return: the words, the white space separators, and the new line indices - """ - if not text or not text.strip(): - return [], [], [] - - text = text.strip() - tokens = [] # type: List[str] - separators = [] # type: List[str] - new_lines = [0] - buffer = text[0] - is_token = not text[0].isspace() - - for char in text[1:]: - char_whitespace = char.isspace() - - if char == "\n": - new_lines.append(len(tokens) + 1) - - if char_whitespace and is_token: - tokens.append(buffer) - buffer = char - is_token = False - elif char_whitespace: - buffer += char - elif not char_whitespace and not is_token: - separators.append(buffer) - buffer = char - is_token = True - else: - buffer += char - - if buffer and is_token: - tokens.append(buffer) - separators.append(" ") - elif buffer: - separators.append(buffer) - - return tokens, separators, new_lines + return text.split() def load_text(data: Union[str, Path], encoding: Optional[str] = None) -> str: @@ -324,132 +91,75 @@ def load_text(data: Union[str, Path], encoding: Optional[str] = None) -> str: return "" # check URLs - if isinstance(data, str) and data.startswith("http"): - response = requests.get(data, timeout=settings.request_timeout) - response.raise_for_status() - return response.text - - # check raw text - if isinstance(data, str) and not is_path_like(data, enforce_file=True): + if isinstance(data, str) and data.strip().startswith(("http", "ftp")): + with httpx.Client(timeout=settings.request_timeout) as client: + response = client.get(data.strip()) + response.raise_for_status() + return response.text + + # check package data + if isinstance(data, str) and data.startswith("data:"): + resource_path = files(package_data).joinpath(data[5:]) + with as_file(resource_path) as resource_file, gzip.open( + resource_file, "rt", encoding=encoding + ) as file: + return file.read() + + # check gzipped files + if isinstance(data, str) and data.endswith(".gz"): + with gzip.open(data, "rt", encoding=encoding) as file: + return file.read() + + # check if it's raw text by not being a path + if isinstance(data, str) and ( + len(data) > MAX_PATH_LENGTH or not Path(data).exists() + ): return data # assume local file if not isinstance(data, Path): data = Path(data) - if not data.exists(): + if not data.exists() or not data.is_file(): raise FileNotFoundError(f"File not found: {data}") - if not data.is_file(): - raise IsADirectoryError(f"Path is a directory: {data}") - return data.read_text(encoding=encoding) -def parse_text_objects(data: str, format_: str = "txt") -> List[Dict]: +def is_puncutation(text: str) -> bool: """ - Parse text data into a list of dictionaries based on the format given - (csv, jsonl, json, yaml, txt). - - :param data: the text data to parse - :param format_: the format of the data to parse: - 'csv', 'jsonl', 'json', 'yaml', 'txt' - :return: the list of dictionaries parsed from the data, if text - then each line is a dictionary with a single key 'text' - """ - if not isinstance(data, str): - raise ValueError(f"Unsupported data given of type: {type(data)}") - - if format_ == "csv": - reader = csv.DictReader(data.splitlines()) - columns = reader.fieldnames - return [{col: row[col] for col in columns} for row in reader] # type: ignore # noqa: PGH003 - - if format_ == "jsonl": - return [json.loads(line) for line in data.splitlines() if line] - - if format_ in ("json", "yaml"): - data = json.loads(data) if format_ == "json" else yaml.safe_load(data) - - if not data: - return [] - - if isinstance(data, dict) and len(data) == 1: - logger.debug("Getting first value from JSON/YAML object: {}", data) - data = list(data.values())[0] - elif isinstance(data, dict): - logger.debug("Converting JSON/YAML object to list: {}", data) - data = list(data.values()) - - if not isinstance(data, list) or not isinstance(data[0], dict): - raise ValueError(f"Unsupported data structure given: {data}") - - return data - - if format_ == "txt": - return [{"text": line} for line in data.splitlines() if line] + Check if the text is a punctuation - raise ValueError(f"Unsupported format given: {format_}") - - -def load_text_lines( - data: Union[str, Path, List[Dict]], - format_: Optional[str] = None, - filters: Optional[List[str]] = None, - encoding: Optional[str] = None, -) -> List[str]: + :param text: the text to check + :type text: str + :return: True if the text is a punctuation, False otherwise + :rtype: bool """ - Load text lines from a file or data object with optional filtering and formatting. - - - :param data: the data to load the text lines from - :param format_: the format of the data to load, if not provided will be inferred. - Supported formats: 'csv', 'jsonl', 'json', 'yaml', 'txt' - :param filters: the keys to filter the data by when loading in order of preference. - If not provided, will use the first key in the data object. - :param encoding: the encoding to use when reading the file - :return: the list of text lines - """ - logger.debug( - "Loading text lines with format {}, filters {}, encoding {} for data: {}", - format_, - filters, - encoding, - data, - ) - - if not data: - return [] - - if not format_ and isinstance(data, (str, Path)) and "." in str(data): - extension = str(data).split(".")[-1] - format_ = EXTENSION_TYPES.get(extension, "txt") - elif not format_: - format_ = "txt" + return len(text) == 1 and not text.isalnum() and not text.isspace() - # load the data if it's a path or URL - if isinstance(data, (Path, str)): - data = load_text(data, encoding=encoding) - data = clean_text(data) - # parse the data into a list of dictionaries based on the format - if isinstance(data, str): - data = parse_text_objects(data, format_) +class EndlessTextCreator: + def __init__( + self, + data: Union[str, Path], + filter_start: Optional[Union[str, int]] = None, + filter_end: Optional[Union[str, int]] = None, + ): + self.data = data + self.text = load_text(data) + self.filtered_text = filter_text(self.text, filter_start, filter_end) + self.words = split_text(self.filtered_text, split_punctuation=True) - if not isinstance(data, list): - raise ValueError(f"Unsupported data given of type: {type(data)}") + def create_text(self, start: int, length: int) -> str: + text = "" - if not isinstance(data[0], dict): - raise ValueError(f"Unsupported data item type given: {type(data[0])}") + for counter in range(length): + index = (start + counter) % len(self.words) + add_word = self.words[index] - # grab the first available filter key to use if preference order as provided - filter_ = list(data[0].keys())[0] - for filt in filters or []: - if filt not in data[0]: - continue + if counter != 0 and not is_puncutation(add_word): + text += " " - filter_ = filt - break + text += add_word - # extract the lines from the data - return [row[filter_] for row in data] if filter_ else [str(row) for row in data] + return text diff --git a/src/guidellm/utils/transformers.py b/src/guidellm/utils/transformers.py deleted file mode 100644 index 54057299..00000000 --- a/src/guidellm/utils/transformers.py +++ /dev/null @@ -1,151 +0,0 @@ -from pathlib import Path -from typing import List, Optional, Union - -from datasets import ( # type: ignore # noqa: PGH003 - Dataset, - DatasetDict, - IterableDataset, - IterableDatasetDict, - load_dataset, -) -from loguru import logger - -from guidellm.config import settings - -__all__ = [ - "load_transformers_dataset", - "resolve_transformers_dataset", - "resolve_transformers_dataset_column", - "resolve_transformers_dataset_split", -] - - -def load_transformers_dataset( - dataset: Union[ - str, Path, DatasetDict, Dataset, IterableDatasetDict, IterableDataset - ], - split: Optional[str] = None, - preferred_splits: Optional[List[str]] = settings.dataset.preferred_data_splits, - **kwargs, -) -> Union[Dataset, IterableDataset]: - """ - Load a dataset from a file or a script and resolve the preferred split. - - :param dataset: the dataset file or script to load - :param split: the dataset split to use - (overrides preferred_splits, must be in dataset) - :param preferred_splits: the preferred dataset splits to use - :param kwargs: additional keyword arguments to pass to the dataset loader - :return: the loaded dataset - """ - dataset = resolve_transformers_dataset(dataset, **kwargs) - - return resolve_transformers_dataset_split(dataset, split, preferred_splits) - - -def resolve_transformers_dataset( - dataset: Union[ - str, Path, DatasetDict, Dataset, IterableDatasetDict, IterableDataset - ], - **kwargs, -) -> Union[DatasetDict, Dataset, IterableDatasetDict, IterableDataset]: - """ - Resolve the dataset from a file (csv, json, script) or a dataset name. - - :param dataset: the dataset file or script to load - :param kwargs: additional keyword arguments to pass to the dataset loader - :return: the loaded dataset - """ - if isinstance( - dataset, (DatasetDict, Dataset, IterableDatasetDict, IterableDataset) - ): - return dataset - - if not isinstance(dataset, (str, Path)): - raise ValueError(f"Invalid dataset type: {type(dataset)}") - - dataset = str(dataset) - - if dataset.endswith((".csv", ".json")): - logger.debug("Loading dataset from local path: {}", dataset) - extension = dataset.split(".")[-1] - - return load_dataset(extension, data_files=dataset, **kwargs) - - if dataset.endswith(".py"): - logger.debug("Loading dataset from local script: {}", dataset) - - return load_dataset(dataset, **kwargs) - - logger.debug("Loading dataset: {}", dataset) - - return load_dataset(dataset, **kwargs) - - -def resolve_transformers_dataset_split( - dataset: Union[DatasetDict, Dataset, IterableDatasetDict, IterableDataset], - split: Optional[str] = None, - preferred_splits: Optional[List[str]] = settings.dataset.preferred_data_splits, -) -> Union[Dataset, IterableDataset]: - """ - Resolve the preferred split from a dataset dictionary. - - :param dataset: the dataset to resolve the split from - :param split: the dataset split to use - (overrides preferred_splits, must be in dataset) - :param preferred_splits: the preferred dataset splits to use - :return: the resolved dataset split - """ - if not isinstance(dataset, (DatasetDict, IterableDatasetDict)): - logger.debug("Dataset is not a dictionary, using default split") - return dataset - - if split: - if split not in dataset: - raise ValueError(f"Split '{split}' not found in dataset") - - return dataset[split] - - if preferred_splits: - for spl in preferred_splits: - if spl not in dataset: - continue - return dataset[spl] - - return list(dataset.values())[0] - - -def resolve_transformers_dataset_column( - dataset: Union[Dataset, IterableDataset], - column: Optional[str] = None, - preferred_columns: Optional[List[str]] = settings.dataset.preferred_data_columns, -) -> str: - """ - Resolve the preferred column from a dataset. - - :param dataset: the dataset to resolve the column from - :param column: the dataset column to use - (overrides preferred_columns, must be in dataset) - :param preferred_columns: the preferred dataset columns to use - :return: the resolved dataset column - """ - column_names = dataset.column_names - - if not column_names: - # grab from the first item - first_item = next(iter(dataset)) - column_names = list(first_item.keys()) - - if column: - if column not in column_names: - raise ValueError(f"Column '{column}' not found in dataset") - - return column - - if preferred_columns: - for col in preferred_columns: - if col not in column_names: - continue - return col - - return list(column_names)[0] diff --git a/tests/dummy/__init__.py b/tests/dummy/__init__.py deleted file mode 100644 index a0cccdbf..00000000 --- a/tests/dummy/__init__.py +++ /dev/null @@ -1,8 +0,0 @@ -""" -The tests.dummy package package represents dummy data factories and test services. - -test.dummy.data.openai_model_factory - openai.types.Model test factory -test.dummy.data.openai_completion_factory - openai.types.Completion test factory -""" - -from . import data, services # noqa: F401 diff --git a/tests/dummy/data/pride_and_prejudice.txt b/tests/dummy/data/pride_and_prejudice.txt deleted file mode 100644 index 3b93b50a..00000000 --- a/tests/dummy/data/pride_and_prejudice.txt +++ /dev/null @@ -1,2015 +0,0 @@ -*** START OF THE PROJECT GUTENBERG EBOOK 1342 *** - - PAGE - -Frontispiece iv - -Title-page v - -Dedication vii - -Heading to Preface ix - -Heading to List of Illustrations xxv - -Heading to Chapter I. 1 - -“He came down to see the place” 2 - -Mr. and Mrs. Bennet 5 - -“I hope Mr. Bingley will like it” 6 - -“I’m the tallest” 9 - -“He rode a black horse” 10 - -“When the party entered” 12 - -“She is tolerable” 15 - -Heading to Chapter IV. 18 - -Heading to Chapter V. 22 - -“Without once opening his lips” 24 - -Tailpiece to Chapter V. 26 - -Heading to Chapter VI. 27 - -“The entreaties of several” 31 - -“A note for Miss Bennet” 36 - -“Cheerful prognostics” 40 - -“The apothecary came” 43 - -“Covering a screen” 45 - -“Mrs. Bennet and her two youngest girls” 53 - -Heading to Chapter X. 60 - -“No, no; stay where you are” 67 - -“Piling up the fire” 69 - -Heading to Chapter XII. 75 - -Heading to Chapter XIII. 78 - -Heading to Chapter XIV. 84 - -“Protested that he never read novels” 87 - -Heading to Chapter XV. 89 - -Heading to Chapter XVI. 95 - -“The officers of the ----shire” 97 - -“Delighted to see their dear friend again” 108 - -Heading to Chapter XVIII. 113 - -“Such very superior dancing is not often seen” 118 - -“To assure you in the most animated language” 132 - -Heading to Chapter XX. 139 - -“They entered the breakfast-room” 143 - -Heading to Chapter XXI. 146 - -“Walked back with them” 148 - -Heading to Chapter XXII. 154 - -“So much love and eloquence” 156 - -“Protested he must be entirely mistaken” 161 - -“Whenever she spoke in a low voice” 166 - -Heading to Chapter XXIV. 168 - -Heading to Chapter XXV. 175 - -“Offended two or three young ladies” 177 - -“Will you come and see me?” 181 - -“On the stairs” 189 - -“At the door” 194 - -“In conversation with the ladies” 198 - -“Lady Catherine,” said she, “you have given me a treasure” 200 - -Heading to Chapter XXX. 209 - -“He never failed to inform them” 211 - -“The gentlemen accompanied him” 213 - -Heading to Chapter XXXI. 215 - -Heading to Chapter XXXII. 221 - -“Accompanied by their aunt” 225 - -“On looking up” 228 - -Heading to Chapter XXXIV. 235 - -“Hearing herself called” 243 - -Heading to Chapter XXXVI. 253 - -“Meeting accidentally in town” 256 - -“His parting obeisance” 261 - -“Dawson” 263 - -“The elevation of his feelings” 267 - -“They had forgotten to leave any message” 270 - -“How nicely we are crammed in!” 272 - -Heading to Chapter XL. 278 - -“I am determined never to speak of it again” 283 - -“When Colonel Miller’s regiment went away” 285 - -“Tenderly flirting” 290 - -The arrival of the Gardiners 294 - -“Conjecturing as to the date” 301 - -Heading to Chapter XLIV. 318 - -“To make herself agreeable to all” 321 - -“Engaged by the river” 327 - -Heading to Chapter XLVI. 334 - -“I have not an instant to lose” 339 - -“The first pleasing earnest of their welcome” 345 - -The Post 359 - -“To whom I have related the affair” 363 - -Heading to Chapter XLIX. 368 - -“But perhaps you would like to read it” 370 - -“The spiteful old ladies” 377 - -“With an affectionate smile” 385 - -“I am sure she did not listen” 393 - -“Mr. Darcy with him” 404 - -“Jane happened to look round” 415 - -“Mrs. Long and her nieces” 420 - -“Lizzy, my dear, I want to speak to you” 422 - -Heading to Chapter LVI. 431 - -“After a short survey” 434 - -“But now it comes out” 442 - -“The efforts of his aunt” 448 - -“Unable to utter a syllable” 457 - -“The obsequious civility” 466 - -Heading to Chapter LXI. 472 - -The End 476 - - - - -[Illustration: ·PRIDE AND PREJUDICE· - - - - -Chapter I.] - - -It is a truth universally acknowledged, that a single man in possession -of a good fortune must be in want of a wife. - -However little known the feelings or views of such a man may be on his -first entering a neighbourhood, this truth is so well fixed in the minds -of the surrounding families, that he is considered as the rightful -property of some one or other of their daughters. - -“My dear Mr. Bennet,” said his lady to him one day, “have you heard that -Netherfield Park is let at last?” - -Mr. Bennet replied that he had not. - -“But it is,” returned she; “for Mrs. Long has just been here, and she -told me all about it.” - -Mr. Bennet made no answer. - -“Do not you want to know who has taken it?” cried his wife, impatiently. - -“_You_ want to tell me, and I have no objection to hearing it.” - -[Illustration: - -“He came down to see the place” - -[_Copyright 1894 by George Allen._]] - -This was invitation enough. - -“Why, my dear, you must know, Mrs. Long says that Netherfield is taken -by a young man of large fortune from the north of England; that he came -down on Monday in a chaise and four to see the place, and was so much -delighted with it that he agreed with Mr. Morris immediately; that he is -to take possession before Michaelmas, and some of his servants are to be -in the house by the end of next week.” - -“What is his name?” - -“Bingley.” - -“Is he married or single?” - -“Oh, single, my dear, to be sure! A single man of large fortune; four or -five thousand a year. What a fine thing for our girls!” - -“How so? how can it affect them?” - -“My dear Mr. Bennet,” replied his wife, “how can you be so tiresome? You -must know that I am thinking of his marrying one of them.” - -“Is that his design in settling here?” - -“Design? Nonsense, how can you talk so! But it is very likely that he -_may_ fall in love with one of them, and therefore you must visit him as -soon as he comes.” - -“I see no occasion for that. You and the girls may go--or you may send -them by themselves, which perhaps will be still better; for as you are -as handsome as any of them, Mr. Bingley might like you the best of the -party.” - -“My dear, you flatter me. I certainly _have_ had my share of beauty, but -I do not pretend to be anything extraordinary now. When a woman has five -grown-up daughters, she ought to give over thinking of her own beauty.” - -“In such cases, a woman has not often much beauty to think of.” - -“But, my dear, you must indeed go and see Mr. Bingley when he comes into -the neighbourhood.” - -“It is more than I engage for, I assure you.” - -“But consider your daughters. Only think what an establishment it would -be for one of them. Sir William and Lady Lucas are determined to go, -merely on that account; for in general, you know, they visit no new -comers. Indeed you must go, for it will be impossible for _us_ to visit -him, if you do not.” - -“You are over scrupulous, surely. I dare say Mr. Bingley will be very -glad to see you; and I will send a few lines by you to assure him of my -hearty consent to his marrying whichever he chooses of the girls--though -I must throw in a good word for my little Lizzy.” - -“I desire you will do no such thing. Lizzy is not a bit better than the -others: and I am sure she is not half so handsome as Jane, nor half so -good-humoured as Lydia. But you are always giving _her_ the preference.” - -“They have none of them much to recommend them,” replied he: “they are -all silly and ignorant like other girls; but Lizzy has something more of -quickness than her sisters.” - -“Mr. Bennet, how can you abuse your own children in such a way? You take -delight in vexing me. You have no compassion on my poor nerves.” - -“You mistake me, my dear. I have a high respect for your nerves. They -are my old friends. I have heard you mention them with consideration -these twenty years at least.” - -“Ah, you do not know what I suffer.” - -“But I hope you will get over it, and live to see many young men of four -thousand a year come into the neighbourhood.” - -“It will be no use to us, if twenty such should come, since you will not -visit them.” - -“Depend upon it, my dear, that when there are twenty, I will visit them -all.” - -Mr. Bennet was so odd a mixture of quick parts, sarcastic humour, -reserve, and caprice, that the experience of three-and-twenty years had -been insufficient to make his wife understand his character. _Her_ mind -was less difficult to develope. She was a woman of mean understanding, -little information, and uncertain temper. When she was discontented, she -fancied herself nervous. The business of her life was to get her -daughters married: its solace was visiting and news. - -[Illustration: M^{r.} & M^{rs.} Bennet - -[_Copyright 1894 by George Allen._]] - - - - -[Illustration: - -“I hope Mr. Bingley will like it” - -[_Copyright 1894 by George Allen._]] - - - - -CHAPTER II. - - -[Illustration] - -Mr. Bennet was among the earliest of those who waited on Mr. Bingley. He -had always intended to visit him, though to the last always assuring his -wife that he should not go; and till the evening after the visit was -paid she had no knowledge of it. It was then disclosed in the following -manner. Observing his second daughter employed in trimming a hat, he -suddenly addressed her with,-- - -“I hope Mr. Bingley will like it, Lizzy.” - -“We are not in a way to know _what_ Mr. Bingley likes,” said her mother, -resentfully, “since we are not to visit.” - -“But you forget, mamma,” said Elizabeth, “that we shall meet him at the -assemblies, and that Mrs. Long has promised to introduce him.” - -“I do not believe Mrs. Long will do any such thing. She has two nieces -of her own. She is a selfish, hypocritical woman, and I have no opinion -of her.” - -“No more have I,” said Mr. Bennet; “and I am glad to find that you do -not depend on her serving you.” - -Mrs. Bennet deigned not to make any reply; but, unable to contain -herself, began scolding one of her daughters. - -“Don’t keep coughing so, Kitty, for heaven’s sake! Have a little -compassion on my nerves. You tear them to pieces.” - -“Kitty has no discretion in her coughs,” said her father; “she times -them ill.” - -“I do not cough for my own amusement,” replied Kitty, fretfully. “When -is your next ball to be, Lizzy?” - -“To-morrow fortnight.” - -“Ay, so it is,” cried her mother, “and Mrs. Long does not come back till -the day before; so, it will be impossible for her to introduce him, for -she will not know him herself.” - -“Then, my dear, you may have the advantage of your friend, and introduce -Mr. Bingley to _her_.” - -“Impossible, Mr. Bennet, impossible, when I am not acquainted with him -myself; how can you be so teasing?” - -“I honour your circumspection. A fortnight’s acquaintance is certainly -very little. One cannot know what a man really is by the end of a -fortnight. But if _we_ do not venture, somebody else will; and after -all, Mrs. Long and her nieces must stand their chance; and, therefore, -as she will think it an act of kindness, if you decline the office, I -will take it on myself.” - -The girls stared at their father. Mrs. Bennet said only, “Nonsense, -nonsense!” - -“What can be the meaning of that emphatic exclamation?” cried he. “Do -you consider the forms of introduction, and the stress that is laid on -them, as nonsense? I cannot quite agree with you _there_. What say you, -Mary? For you are a young lady of deep reflection, I know, and read -great books, and make extracts.” - -Mary wished to say something very sensible, but knew not how. - -“While Mary is adjusting her ideas,” he continued, “let us return to Mr. -Bingley.” - -“I am sick of Mr. Bingley,” cried his wife. - -“I am sorry to hear _that_; but why did you not tell me so before? If I -had known as much this morning, I certainly would not have called on -him. It is very unlucky; but as I have actually paid the visit, we -cannot escape the acquaintance now.” - -The astonishment of the ladies was just what he wished--that of Mrs. -Bennet perhaps surpassing the rest; though when the first tumult of joy -was over, she began to declare that it was what she had expected all the -while. - -“How good it was in you, my dear Mr. Bennet! But I knew I should -persuade you at last. I was sure you loved your girls too well to -neglect such an acquaintance. Well, how pleased I am! And it is such a -good joke, too, that you should have gone this morning, and never said a -word about it till now.” - -“Now, Kitty, you may cough as much as you choose,” said Mr. Bennet; and, -as he spoke, he left the room, fatigued with the raptures of his wife. - -“What an excellent father you have, girls,” said she, when the door was -shut. “I do not know how you will ever make him amends for his kindness; -or me either, for that matter. At our time of life, it is not so -pleasant, I can tell you, to be making new acquaintances every day; but -for your sakes we would do anything. Lydia, my love, though you _are_ -the youngest, I dare say Mr. Bingley will dance with you at the next -ball.” - -“Oh,” said Lydia, stoutly, “I am not afraid; for though I _am_ the -youngest, I’m the tallest.” - -The rest of the evening was spent in conjecturing how soon he would -return Mr. Bennet’s visit, and determining when they should ask him to -dinner. - -[Illustration: “I’m the tallest”] - - - - -[Illustration: - - “He rode a black horse” -] - - - - -CHAPTER III. - - -[Illustration] - -Not all that Mrs. Bennet, however, with the assistance of her five -daughters, could ask on the subject, was sufficient to draw from her -husband any satisfactory description of Mr. Bingley. They attacked him -in various ways, with barefaced questions, ingenious suppositions, and -distant surmises; but he eluded the skill of them all; and they were at -last obliged to accept the second-hand intelligence of their neighbour, -Lady Lucas. Her report was highly favourable. Sir William had been -delighted with him. He was quite young, wonderfully handsome, extremely -agreeable, and, to crown the whole, he meant to be at the next assembly -with a large party. Nothing could be more delightful! To be fond of -dancing was a certain step towards falling in love; and very lively -hopes of Mr. Bingley’s heart were entertained. - -“If I can but see one of my daughters happily settled at Netherfield,” -said Mrs. Bennet to her husband, “and all the others equally well -married, I shall have nothing to wish for.” - -In a few days Mr. Bingley returned Mr. Bennet’s visit, and sat about ten -minutes with him in his library. He had entertained hopes of being -admitted to a sight of the young ladies, of whose beauty he had heard -much; but he saw only the father. The ladies were somewhat more -fortunate, for they had the advantage of ascertaining, from an upper -window, that he wore a blue coat and rode a black horse. - -An invitation to dinner was soon afterwards despatched; and already had -Mrs. Bennet planned the courses that were to do credit to her -housekeeping, when an answer arrived which deferred it all. Mr. Bingley -was obliged to be in town the following day, and consequently unable to -accept the honour of their invitation, etc. Mrs. Bennet was quite -disconcerted. She could not imagine what business he could have in town -so soon after his arrival in Hertfordshire; and she began to fear that -he might always be flying about from one place to another, and never -settled at Netherfield as he ought to be. Lady Lucas quieted her fears a -little by starting the idea of his - -[Illustration: - - “When the Party entered” - -[_Copyright 1894 by George Allen._]] - -being gone to London only to get a large party for the ball; and a -report soon followed that Mr. Bingley was to bring twelve ladies and -seven gentlemen with him to the assembly. The girls grieved over such a -number of ladies; but were comforted the day before the ball by hearing -that, instead of twelve, he had brought only six with him from London, -his five sisters and a cousin. And when the party entered the -assembly-room, it consisted of only five altogether: Mr. Bingley, his -two sisters, the husband of the eldest, and another young man. - -Mr. Bingley was good-looking and gentlemanlike: he had a pleasant -countenance, and easy, unaffected manners. His sisters were fine women, -with an air of decided fashion. His brother-in-law, Mr. Hurst, merely -looked the gentleman; but his friend Mr. Darcy soon drew the attention -of the room by his fine, tall person, handsome features, noble mien, and -the report, which was in general circulation within five minutes after -his entrance, of his having ten thousand a year. The gentlemen -pronounced him to be a fine figure of a man, the ladies declared he was -much handsomer than Mr. Bingley, and he was looked at with great -admiration for about half the evening, till his manners gave a disgust -which turned the tide of his popularity; for he was discovered to be -proud, to be above his company, and above being pleased; and not all his -large estate in Derbyshire could save him from having a most forbidding, -disagreeable countenance, and being unworthy to be compared with his -friend. - -Mr. Bingley had soon made himself acquainted with all the principal -people in the room: he was lively and unreserved, danced every dance, -was angry that the ball closed so early, and talked of giving one -himself at Netherfield. Such amiable qualities must speak for -themselves. What a contrast between him and his friend! Mr. Darcy danced -only once with Mrs. Hurst and once with Miss Bingley, declined being -introduced to any other lady, and spent the rest of the evening in -walking about the room, speaking occasionally to one of his own party. -His character was decided. He was the proudest, most disagreeable man in -the world, and everybody hoped that he would never come there again. -Amongst the most violent against him was Mrs. Bennet, whose dislike of -his general behaviour was sharpened into particular resentment by his -having slighted one of her daughters. - -Elizabeth Bennet had been obliged, by the scarcity of gentlemen, to sit -down for two dances; and during part of that time, Mr. Darcy had been -standing near enough for her to overhear a conversation between him and -Mr. Bingley, who came from the dance for a few minutes to press his -friend to join it. - -“Come, Darcy,” said he, “I must have you dance. I hate to see you -standing about by yourself in this stupid manner. You had much better -dance.” - -“I certainly shall not. You know how I detest it, unless I am -particularly acquainted with my partner. At such an assembly as this, it -would be insupportable. Your sisters are engaged, and there is not -another woman in the room whom it would not be a punishment to me to -stand up with.” - -“I would not be so fastidious as you are,” cried Bingley, “for a -kingdom! Upon my honour, I never met with so many pleasant girls in my -life as I have this evening; and there are several of them, you see, -uncommonly pretty.” - -“_You_ are dancing with the only handsome girl in the room,” said Mr. -Darcy, looking at the eldest Miss Bennet. - -“Oh, she is the most beautiful creature I ever beheld! But there is one -of her sisters sitting down just behind you, who is very pretty, and I -dare say very agreeable. Do let me ask my partner to introduce you.” - -[Illustration: - -“She is tolerable” - -[_Copyright 1894 by George Allen._]] - -“Which do you mean?” and turning round, he looked for a moment at -Elizabeth, till, catching her eye, he withdrew his own, and coldly said, -“She is tolerable: but not handsome enough to tempt _me_; and I am in no -humour at present to give consequence to young ladies who are slighted -by other men. You had better return to your partner and enjoy her -smiles, for you are wasting your time with me.” - -Mr. Bingley followed his advice. Mr. Darcy walked off; and Elizabeth -remained with no very cordial feelings towards him. She told the story, -however, with great spirit among her friends; for she had a lively, -playful disposition, which delighted in anything ridiculous. - -The evening altogether passed off pleasantly to the whole family. Mrs. -Bennet had seen her eldest daughter much admired by the Netherfield -party. Mr. Bingley had danced with her twice, and she had been -distinguished by his sisters. Jane was as much gratified by this as her -mother could be, though in a quieter way. Elizabeth felt Jane’s -pleasure. Mary had heard herself mentioned to Miss Bingley as the most -accomplished girl in the neighbourhood; and Catherine and Lydia had been -fortunate enough to be never without partners, which was all that they -had yet learnt to care for at a ball. They returned, therefore, in good -spirits to Longbourn, the village where they lived, and of which they -were the principal inhabitants. They found Mr. Bennet still up. With a -book, he was regardless of time; and on the present occasion he had a -good deal of curiosity as to the event of an evening which had raised -such splendid expectations. He had rather hoped that all his wife’s -views on the stranger would be disappointed; but he soon found that he -had a very different story to hear. - -“Oh, my dear Mr. Bennet,” as she entered the room, “we have had a most -delightful evening, a most excellent ball. I wish you had been there. -Jane was so admired, nothing could be like it. Everybody said how well -she looked; and Mr. Bingley thought her quite beautiful, and danced with -her twice. Only think of _that_, my dear: he actually danced with her -twice; and she was the only creature in the room that he asked a second -time. First of all, he asked Miss Lucas. I was so vexed to see him stand -up with her; but, however, he did not admire her at all; indeed, nobody -can, you know; and he seemed quite struck with Jane as she was going -down the dance. So he inquired who she was, and got introduced, and -asked her for the two next. Then, the two third he danced with Miss -King, and the two fourth with Maria Lucas, and the two fifth with Jane -again, and the two sixth with Lizzy, and the _Boulanger_----” - -“If he had had any compassion for _me_,” cried her husband impatiently, -“he would not have danced half so much! For God’s sake, say no more of -his partners. O that he had sprained his ancle in the first dance!” - -“Oh, my dear,” continued Mrs. Bennet, “I am quite delighted with him. He -is so excessively handsome! and his sisters are charming women. I never -in my life saw anything more elegant than their dresses. I dare say the -lace upon Mrs. Hurst’s gown----” - -Here she was interrupted again. Mr. Bennet protested against any -description of finery. She was therefore obliged to seek another branch -of the subject, and related, with much bitterness of spirit, and some -exaggeration, the shocking rudeness of Mr. Darcy. - -“But I can assure you,” she added, “that Lizzy does not lose much by not -suiting _his_ fancy; for he is a most disagreeable, horrid man, not at -all worth pleasing. So high and so conceited, that there was no enduring -him! He walked here, and he walked there, fancying himself so very -great! Not handsome enough to dance with! I wish you had been there, my -dear, to have given him one of your set-downs. I quite detest the man.” - - - - -[Illustration] - - - - -CHAPTER IV. - - -[Illustration] - -When Jane and Elizabeth were alone, the former, who had been cautious in -her praise of Mr. Bingley before, expressed to her sister how very much -she admired him. - -“He is just what a young-man ought to be,” said she, “sensible, -good-humoured, lively; and I never saw such happy manners! so much ease, -with such perfect good breeding!” - -“He is also handsome,” replied Elizabeth, “which a young man ought -likewise to be if he possibly can. His character is thereby complete.” - -“I was very much flattered by his asking me to dance a second time. I -did not expect such a compliment.” - -“Did not you? _I_ did for you. But that is one great difference between -us. Compliments always take _you_ by surprise, and _me_ never. What -could be more natural than his asking you again? He could not help -seeing that you were about five times as pretty as every other woman in -the room. No thanks to his gallantry for that. Well, he certainly is -very agreeable, and I give you leave to like him. You have liked many a -stupider person.” - -“Dear Lizzy!” - -“Oh, you are a great deal too apt, you know, to like people in general. -You never see a fault in anybody. All the world are good and agreeable -in your eyes. I never heard you speak ill of a human being in my life.” - -“I would wish not to be hasty in censuring anyone; but I always speak -what I think.” - -“I know you do: and it is _that_ which makes the wonder. With _your_ -good sense, to be so honestly blind to the follies and nonsense of -others! Affectation of candour is common enough; one meets with it -everywhere. But to be candid without ostentation or design,--to take the -good of everybody’s character and make it still better, and say nothing -of the bad,--belongs to you alone. And so, you like this man’s sisters, -too, do you? Their manners are not equal to his.” - -“Certainly not, at first; but they are very pleasing women when you -converse with them. Miss Bingley is to live with her brother, and keep -his house; and I am much mistaken if we shall not find a very charming -neighbour in her.” - -Elizabeth listened in silence, but was not convinced: their behaviour at -the assembly had not been calculated to please in general; and with more -quickness of observation and less pliancy of temper than her sister, and -with a judgment, too, unassailed by any attention to herself, she was -very little disposed to approve them. They were, in fact, very fine -ladies; not deficient in good-humour when they were pleased, nor in the -power of being agreeable where they chose it; but proud and conceited. -They were rather handsome; had been educated in one of the first private -seminaries in town; had a fortune of twenty thousand pounds; were in the -habit of spending more than they ought, and of associating with people -of rank; and were, therefore, in every respect entitled to think well of -themselves and meanly of others. They were of a respectable family in -the north of England; a circumstance more deeply impressed on their -memories than that their brother’s fortune and their own had been -acquired by trade. - -Mr. Bingley inherited property to the amount of nearly a hundred -thousand pounds from his father, who had intended to purchase an estate, -but did not live to do it. Mr. Bingley intended it likewise, and -sometimes made choice of his county; but, as he was now provided with a -good house and the liberty of a manor, it was doubtful to many of those -who best knew the easiness of his temper, whether he might not spend the -remainder of his days at Netherfield, and leave the next generation to -purchase. - -His sisters were very anxious for his having an estate of his own; but -though he was now established only as a tenant, Miss Bingley was by no -means unwilling to preside at his table; nor was Mrs. Hurst, who had -married a man of more fashion than fortune, less disposed to consider -his house as her home when it suited her. Mr. Bingley had not been of -age two years when he was tempted, by an accidental recommendation, to -look at Netherfield House. He did look at it, and into it, for half an -hour; was pleased with the situation and the principal rooms, satisfied -with what the owner said in its praise, and took it immediately. - -Between him and Darcy there was a very steady friendship, in spite of a -great opposition of character. Bingley was endeared to Darcy by the -easiness, openness, and ductility of his temper, though no disposition -could offer a greater contrast to his own, and though with his own he -never appeared dissatisfied. On the strength of Darcy’s regard, Bingley -had the firmest reliance, and of his judgment the highest opinion. In -understanding, Darcy was the superior. Bingley was by no means -deficient; but Darcy was clever. He was at the same time haughty, -reserved, and fastidious; and his manners, though well bred, were not -inviting. In that respect his friend had greatly the advantage. Bingley -was sure of being liked wherever he appeared; Darcy was continually -giving offence. - -The manner in which they spoke of the Meryton assembly was sufficiently -characteristic. Bingley had never met with pleasanter people or prettier -girls in his life; everybody had been most kind and attentive to him; -there had been no formality, no stiffness; he had soon felt acquainted -with all the room; and as to Miss Bennet, he could not conceive an angel -more beautiful. Darcy, on the contrary, had seen a collection of people -in whom there was little beauty and no fashion, for none of whom he had -felt the smallest interest, and from none received either attention or -pleasure. Miss Bennet he acknowledged to be pretty; but she smiled too -much. - -Mrs. Hurst and her sister allowed it to be so; but still they admired -her and liked her, and pronounced her to be a sweet girl, and one whom -they should not object to know more of. Miss Bennet was therefore -established as a sweet girl; and their brother felt authorized by such -commendation to think of her as he chose. - - - - -[Illustration: [_Copyright 1894 by George Allen._]] - - - - -CHAPTER V. - - -[Illustration] - -Within a short walk of Longbourn lived a family with whom the Bennets -were particularly intimate. Sir William Lucas had been formerly in trade -in Meryton, where he had made a tolerable fortune, and risen to the -honour of knighthood by an address to the king during his mayoralty. The -distinction had, perhaps, been felt too strongly. It had given him a -disgust to his business and to his residence in a small market town; -and, quitting them both, he had removed with his family to a house about -a mile from Meryton, denominated from that period Lucas Lodge; where he -could think with pleasure of his own importance, and, unshackled by -business, occupy himself solely in being civil to all the world. For, -though elated by his rank, it did not render him supercilious; on the -contrary, he was all attention to everybody. By nature inoffensive, -friendly, and obliging, his presentation at St. James’s had made him -courteous. - -Lady Lucas was a very good kind of woman, not too clever to be a -valuable neighbour to Mrs. Bennet. They had several children. The eldest -of them, a sensible, intelligent young woman, about twenty-seven, was -Elizabeth’s intimate friend. - -That the Miss Lucases and the Miss Bennets should meet to talk over a -ball was absolutely necessary; and the morning after the assembly -brought the former to Longbourn to hear and to communicate. - -“_You_ began the evening well, Charlotte,” said Mrs. Bennet, with civil -self-command, to Miss Lucas. “_You_ were Mr. Bingley’s first choice.” - -“Yes; but he seemed to like his second better.” - -“Oh, you mean Jane, I suppose, because he danced with her twice. To be -sure that _did_ seem as if he admired her--indeed, I rather believe he -_did_--I heard something about it--but I hardly know what--something -about Mr. Robinson.” - -“Perhaps you mean what I overheard between him and Mr. Robinson: did not -I mention it to you? Mr. Robinson’s asking him how he liked our Meryton -assemblies, and whether he did not think there were a great many pretty -women in the room, and _which_ he thought the prettiest? and his -answering immediately to the last question, ‘Oh, the eldest Miss Bennet, -beyond a doubt: there cannot be two opinions on that point.’” - -“Upon my word! Well, that was very decided, indeed--that does seem as -if--but, however, it may all come to nothing, you know.” - -“_My_ overhearings were more to the purpose than _yours_, Eliza,” said -Charlotte. “Mr. Darcy is not so well worth listening to as his friend, -is he? Poor Eliza! to be only just _tolerable_.” - -“I beg you will not put it into Lizzy’s head to be vexed by his -ill-treatment, for he is such a disagreeable man that it would be quite -a misfortune to be liked by him. Mrs. Long told me last night that he -sat close to her for half an hour without once opening his lips.” - -[Illustration: “Without once opening his lips” - -[_Copyright 1894 by George Allen._]] - -“Are you quite sure, ma’am? Is not there a little mistake?” said Jane. -“I certainly saw Mr. Darcy speaking to her.” - -“Ay, because she asked him at last how he liked Netherfield, and he -could not help answering her; but she said he seemed very angry at being -spoke to.” - -“Miss Bingley told me,” said Jane, “that he never speaks much unless -among his intimate acquaintance. With _them_ he is remarkably -agreeable.” - -“I do not believe a word of it, my dear. If he had been so very -agreeable, he would have talked to Mrs. Long. But I can guess how it -was; everybody says that he is eat up with pride, and I dare say he had -heard somehow that Mrs. Long does not keep a carriage, and had to come -to the ball in a hack chaise.” - -“I do not mind his not talking to Mrs. Long,” said Miss Lucas, “but I -wish he had danced with Eliza.” - -“Another time, Lizzy,” said her mother, “I would not dance with _him_, -if I were you.” - -“I believe, ma’am, I may safely promise you _never_ to dance with him.” - -“His pride,” said Miss Lucas, “does not offend _me_ so much as pride -often does, because there is an excuse for it. One cannot wonder that so -very fine a young man, with family, fortune, everything in his favour, -should think highly of himself. If I may so express it, he has a _right_ -to be proud.” - -“That is very true,” replied Elizabeth, “and I could easily forgive -_his_ pride, if he had not mortified _mine_.” - -“Pride,” observed Mary, who piqued herself upon the solidity of her -reflections, “is a very common failing, I believe. By all that I have -ever read, I am convinced that it is very common indeed; that human -nature is particularly prone to it, and that there are very few of us -who do not cherish a feeling of self-complacency on the score of some -quality or other, real or imaginary. Vanity and pride are different -things, though the words are often used synonymously. A person may be -proud without being vain. Pride relates more to our opinion of -ourselves; vanity to what we would have others think of us.” - -“If I were as rich as Mr. Darcy,” cried a young Lucas, who came with his -sisters, “I should not care how proud I was. I would keep a pack of -foxhounds, and drink a bottle of wine every day.” - -“Then you would drink a great deal more than you ought,” said Mrs. -Bennet; “and if I were to see you at it, I should take away your bottle -directly.” - -The boy protested that she should not; she continued to declare that she -would; and the argument ended only with the visit. - -[Illustration] - - - - -[Illustration] - - - - -CHAPTER VI. - - -[Illustration] - -The ladies of Longbourn soon waited on those of Netherfield. The visit -was returned in due form. Miss Bennet’s pleasing manners grew on the -good-will of Mrs. Hurst and Miss Bingley; and though the mother was -found to be intolerable, and the younger sisters not worth speaking to, -a wish of being better acquainted with _them_ was expressed towards the -two eldest. By Jane this attention was received with the greatest -pleasure; but Elizabeth still saw superciliousness in their treatment of -everybody, hardly excepting even her sister, and could not like them; -though their kindness to Jane, such as it was, had a value, as arising, -in all probability, from the influence of their brother’s admiration. It -was generally evident, whenever they met, that he _did_ admire her; and -to _her_ it was equally evident that Jane was yielding to the preference -which she had begun to entertain for him from the first, and was in a -way to be very much in love; but she considered with pleasure that it -was not likely to be discovered by the world in general, since Jane -united with great strength of feeling, a composure of temper and an -uniform cheerfulness of manner, which would guard her from the -suspicions of the impertinent. She mentioned this to her friend, Miss -Lucas. - -“It may, perhaps, be pleasant,” replied Charlotte, “to be able to impose -on the public in such a case; but it is sometimes a disadvantage to be -so very guarded. If a woman conceals her affection with the same skill -from the object of it, she may lose the opportunity of fixing him; and -it will then be but poor consolation to believe the world equally in the -dark. There is so much of gratitude or vanity in almost every -attachment, that it is not safe to leave any to itself. We can all -_begin_ freely--a slight preference is natural enough; but there are -very few of us who have heart enough to be really in love without -encouragement. In nine cases out of ten, a woman had better show _more_ -affection than she feels. Bingley likes your sister undoubtedly; but he -may never do more than like her, if she does not help him on.” - -“But she does help him on, as much as her nature will allow. If _I_ can -perceive her regard for him, he must be a simpleton indeed not to -discover it too.” - -“Remember, Eliza, that he does not know Jane’s disposition as you do.” - -“But if a woman is partial to a man, and does not endeavor to conceal -it, he must find it out.” - -“Perhaps he must, if he sees enough of her. But though Bingley and Jane -meet tolerably often, it is never for many hours together; and as they -always see each other in large mixed parties, it is impossible that -every moment should be employed in conversing together. Jane should -therefore make the most of every half hour in which she can command his -attention. When she is secure of him, there will be leisure for falling -in love as much as she chooses.” - -“Your plan is a good one,” replied Elizabeth, “where nothing is in -question but the desire of being well married; and if I were determined -to get a rich husband, or any husband, I dare say I should adopt it. But -these are not Jane’s feelings; she is not acting by design. As yet she -cannot even be certain of the degree of her own regard, nor of its -reasonableness. She has known him only a fortnight. She danced four -dances with him at Meryton; she saw him one morning at his own house, -and has since dined in company with him four times. This is not quite -enough to make her understand his character.” - -“Not as you represent it. Had she merely _dined_ with him, she might -only have discovered whether he had a good appetite; but you must -remember that four evenings have been also spent together--and four -evenings may do a great deal.” - -“Yes: these four evenings have enabled them to ascertain that they both -like Vingt-un better than Commerce, but with respect to any other -leading characteristic, I do not imagine that much has been unfolded.” - -“Well,” said Charlotte, “I wish Jane success with all my heart; and if -she were married to him to-morrow, I should think she had as good a -chance of happiness as if she were to be studying his character for a -twelvemonth. Happiness in marriage is entirely a matter of chance. If -the dispositions of the parties are ever so well known to each other, or -ever so similar beforehand, it does not advance their felicity in the -least. They always continue to grow sufficiently unlike afterwards to -have their share of vexation; and it is better to know as little as -possible of the defects of the person with whom you are to pass your -life.” - -“You make me laugh, Charlotte; but it is not sound. You know it is not -sound, and that you would never act in this way yourself.” - -Occupied in observing Mr. Bingley’s attention to her sister, Elizabeth -was far from suspecting that she was herself becoming an object of some -interest in the eyes of his friend. Mr. Darcy had at first scarcely -allowed her to be pretty: he had looked at her without admiration at the -ball; and when they next met, he looked at her only to criticise. But no -sooner had he made it clear to himself and his friends that she had -hardly a good feature in her face, than he began to find it was rendered -uncommonly intelligent by the beautiful expression of her dark eyes. To -this discovery succeeded some others equally mortifying. Though he had -detected with a critical eye more than one failure of perfect symmetry -in her form, he was forced to acknowledge her figure to be light and -pleasing; and in spite of his asserting that her manners were not those -of the fashionable world, he was caught by their easy playfulness. Of -this she was perfectly unaware: to her he was only the man who made -himself agreeable nowhere, and who had not thought her handsome enough -to dance with. - -He began to wish to know more of her; and, as a step towards conversing -with her himself, attended to her conversation with others. His doing so -drew her notice. It was at Sir William Lucas’s, where a large party were -assembled. - -“What does Mr. Darcy mean,” said she to Charlotte, “by listening to my -conversation with Colonel Forster?” - -“That is a question which Mr. Darcy only can answer.” - -“But if he does it any more, I shall certainly let him know that I see -what he is about. He has a very satirical eye, and if I do not begin by -being impertinent myself, I shall soon grow afraid of him.” - -[Illustration: “The entreaties of several” [_Copyright 1894 by George -Allen._]] - -On his approaching them soon afterwards, though without seeming to have -any intention of speaking, Miss Lucas defied her friend to mention such -a subject to him, which immediately provoking Elizabeth to do it, she -turned to him and said,-- - -“Did not you think, Mr. Darcy, that I expressed myself uncommonly well -just now, when I was teasing Colonel Forster to give us a ball at -Meryton?” - -“With great energy; but it is a subject which always makes a lady -energetic.” - -“You are severe on us.” - -“It will be _her_ turn soon to be teased,” said Miss Lucas. “I am going -to open the instrument, Eliza, and you know what follows.” - -“You are a very strange creature by way of a friend!--always wanting me -to play and sing before anybody and everybody! If my vanity had taken a -musical turn, you would have been invaluable; but as it is, I would -really rather not sit down before those who must be in the habit of -hearing the very best performers.” On Miss Lucas’s persevering, however, -she added, “Very well; if it must be so, it must.” And gravely glancing -at Mr. Darcy, “There is a very fine old saying, which everybody here is -of course familiar with--‘Keep your breath to cool your porridge,’--and -I shall keep mine to swell my song.” - -Her performance was pleasing, though by no means capital. After a song -or two, and before she could reply to the entreaties of several that she -would sing again, she was eagerly succeeded at the instrument by her -sister Mary, who having, in consequence of being the only plain one in -the family, worked hard for knowledge and accomplishments, was always -impatient for display. - -Mary had neither genius nor taste; and though vanity had given her -application, it had given her likewise a pedantic air and conceited -manner, which would have injured a higher degree of excellence than she -had reached. Elizabeth, easy and unaffected, had been listened to with -much more pleasure, though not playing half so well; and Mary, at the -end of a long concerto, was glad to purchase praise and gratitude by -Scotch and Irish airs, at the request of her younger sisters, who with -some of the Lucases, and two or three officers, joined eagerly in -dancing at one end of the room. - -Mr. Darcy stood near them in silent indignation at such a mode of -passing the evening, to the exclusion of all conversation, and was too -much engrossed by his own thoughts to perceive that Sir William Lucas -was his neighbour, till Sir William thus began:-- - -“What a charming amusement for young people this is, Mr. Darcy! There is -nothing like dancing, after all. I consider it as one of the first -refinements of polished societies.” - -“Certainly, sir; and it has the advantage also of being in vogue amongst -the less polished societies of the world: every savage can dance.” - -Sir William only smiled. “Your friend performs delightfully,” he -continued, after a pause, on seeing Bingley join the group; “and I doubt -not that you are an adept in the science yourself, Mr. Darcy.” - -“You saw me dance at Meryton, I believe, sir.” - -“Yes, indeed, and received no inconsiderable pleasure from the sight. Do -you often dance at St. James’s?” - -“Never, sir.” - -“Do you not think it would be a proper compliment to the place?” - -“It is a compliment which I never pay to any place if I can avoid it.” - -“You have a house in town, I conclude?” - -Mr. Darcy bowed. - -“I had once some thoughts of fixing in town myself, for I am fond of -superior society; but I did not feel quite certain that the air of -London would agree with Lady Lucas.” - -He paused in hopes of an answer: but his companion was not disposed to -make any; and Elizabeth at that instant moving towards them, he was -struck with the notion of doing a very gallant thing, and called out to -her,-- - -“My dear Miss Eliza, why are not you dancing? Mr. Darcy, you must allow -me to present this young lady to you as a very desirable partner. You -cannot refuse to dance, I am sure, when so much beauty is before you.” -And, taking her hand, he would have given it to Mr. Darcy, who, though -extremely surprised, was not unwilling to receive it, when she instantly -drew back, and said with some discomposure to Sir William,-- - -“Indeed, sir, I have not the least intention of dancing. I entreat you -not to suppose that I moved this way in order to beg for a partner.” - -Mr. Darcy, with grave propriety, requested to be allowed the honour of -her hand, but in vain. Elizabeth was determined; nor did Sir William at -all shake her purpose by his attempt at persuasion. - -“You excel so much in the dance, Miss Eliza, that it is cruel to deny me -the happiness of seeing you; and though this gentleman dislikes the -amusement in general, he can have no objection, I am sure, to oblige us -for one half hour.” - -“Mr. Darcy is all politeness,” said Elizabeth, smiling. - -“He is, indeed: but considering the inducement, my dear Miss Eliza, we -cannot wonder at his complaisance; for who would object to such a -partner?” - -Elizabeth looked archly, and turned away. Her resistance had not injured -her with the gentleman, and he was thinking of her with some -complacency, when thus accosted by Miss Bingley,-- - -“I can guess the subject of your reverie.” - -“I should imagine not.” - -“You are considering how insupportable it would be to pass many -evenings in this manner,--in such society; and, indeed, I am quite of -your opinion. I was never more annoyed! The insipidity, and yet the -noise--the nothingness, and yet the self-importance, of all these -people! What would I give to hear your strictures on them!” - -“Your conjecture is totally wrong, I assure you. My mind was more -agreeably engaged. I have been meditating on the very great pleasure -which a pair of fine eyes in the face of a pretty woman can bestow.” - -Miss Bingley immediately fixed her eyes on his face, and desired he -would tell her what lady had the credit of inspiring such reflections. -Mr. Darcy replied, with great intrepidity,-- - -“Miss Elizabeth Bennet.” - -“Miss Elizabeth Bennet!” repeated Miss Bingley. “I am all astonishment. -How long has she been such a favourite? and pray when am I to wish you -joy?” - -“That is exactly the question which I expected you to ask. A lady’s -imagination is very rapid; it jumps from admiration to love, from love -to matrimony, in a moment. I knew you would be wishing me joy.” - -“Nay, if you are so serious about it, I shall consider the matter as -absolutely settled. You will have a charming mother-in-law, indeed, and -of course she will be always at Pemberley with you.” - -He listened to her with perfect indifference, while she chose to -entertain herself in this manner; and as his composure convinced her -that all was safe, her wit flowed along. - - - - -[Illustration: - - “A note for Miss Bennet” - -[_Copyright 1894 by George Allen._]] - - - - -CHAPTER VII. - - -[Illustration] - -Mr. Bennet’s property consisted almost entirely in an estate of two -thousand a year, which, unfortunately for his daughters, was entailed, -in default of heirs male, on a distant relation; and their mother’s -fortune, though ample for her situation in life, could but ill supply -the deficiency of his. Her father had been an attorney in Meryton, and -had left her four thousand pounds. - -She had a sister married to a Mr. Philips, who had been a clerk to their -father and succeeded him in the business, and a brother settled in -London in a respectable line of trade. - -The village of Longbourn was only one mile from Meryton; a most -convenient distance for the young ladies, who were usually tempted -thither three or four times a week, to pay their duty to their aunt, and -to a milliner’s shop just over the way. The two youngest of the family, -Catherine and Lydia, were particularly frequent in these attentions: -their minds were more vacant than their sisters’, and when nothing -better offered, a walk to Meryton was necessary to amuse their morning -hours and furnish conversation for the evening; and, however bare of -news the country in general might be, they always contrived to learn -some from their aunt. At present, indeed, they were well supplied both -with news and happiness by the recent arrival of a militia regiment in -the neighbourhood; it was to remain the whole winter, and Meryton was -the head-quarters. - -Their visits to Mrs. Philips were now productive of the most interesting -intelligence. Every day added something to their knowledge of the -officers’ names and connections. Their lodgings were not long a secret, -and at length they began to know the officers themselves. Mr. Philips -visited them all, and this opened to his nieces a source of felicity -unknown before. They could talk of nothing but officers; and Mr. -Bingley’s large fortune, the mention of which gave animation to their -mother, was worthless in their eyes when opposed to the regimentals of -an ensign. - -After listening one morning to their effusions on this subject, Mr. -Bennet coolly observed,-- - -“From all that I can collect by your manner of talking, you must be two -of the silliest girls in the country. I have suspected it some time, but -I am now convinced.” - -Catherine was disconcerted, and made no answer; but Lydia, with perfect -indifference, continued to express her admiration of Captain Carter, and -her hope of seeing him in the course of the day, as he was going the -next morning to London. - -“I am astonished, my dear,” said Mrs. Bennet, “that you should be so -ready to think your own children silly. If I wished to think slightingly -of anybody’s children, it should not be of my own, however.” - -“If my children are silly, I must hope to be always sensible of it.” - -“Yes; but as it happens, they are all of them very clever.” - -“This is the only point, I flatter myself, on which we do not agree. I -had hoped that our sentiments coincided in every particular, but I must -so far differ from you as to think our two youngest daughters uncommonly -foolish.” - -“My dear Mr. Bennet, you must not expect such girls to have the sense of -their father and mother. When they get to our age, I dare say they will -not think about officers any more than we do. I remember the time when I -liked a red coat myself very well--and, indeed, so I do still at my -heart; and if a smart young colonel, with five or six thousand a year, -should want one of my girls, I shall not say nay to him; and I thought -Colonel Forster looked very becoming the other night at Sir William’s in -his regimentals.” - -“Mamma,” cried Lydia, “my aunt says that Colonel Forster and Captain -Carter do not go so often to Miss Watson’s as they did when they first -came; she sees them now very often standing in Clarke’s library.” - -Mrs. Bennet was prevented replying by the entrance of the footman with a -note for Miss Bennet; it came from Netherfield, and the servant waited -for an answer. Mrs. Bennet’s eyes sparkled with pleasure, and she was -eagerly calling out, while her daughter read,-- - -“Well, Jane, who is it from? What is it about? What does he say? Well, -Jane, make haste and tell us; make haste, my love.” - -“It is from Miss Bingley,” said Jane, and then read it aloud. - - /* NIND “My dear friend, */ - - “If you are not so compassionate as to dine to-day with Louisa and - me, we shall be in danger of hating each other for the rest of our - lives; for a whole day’s _tête-à -tête_ between two women can never - end without a quarrel. Come as soon as you can on the receipt of - this. My brother and the gentlemen are to dine with the officers. - Yours ever, - -“CAROLINE BINGLEY.” - -“With the officers!” cried Lydia: “I wonder my aunt did not tell us of -_that_.” - -“Dining out,” said Mrs. Bennet; “that is very unlucky.” - -“Can I have the carriage?” said Jane. - -“No, my dear, you had better go on horseback, because it seems likely to -rain; and then you must stay all night.” - -“That would be a good scheme,” said Elizabeth, “if you were sure that -they would not offer to send her home.” - -“Oh, but the gentlemen will have Mr. Bingley’s chaise to go to Meryton; -and the Hursts have no horses to theirs.” - -“I had much rather go in the coach.” - -“But, my dear, your father cannot spare the horses, I am sure. They are -wanted in the farm, Mr. Bennet, are not they?” - -[Illustration: Cheerful prognostics] - -“They are wanted in the farm much oftener than I can get them.” - -“But if you have got them to-day,” said Elizabeth, “my mother’s purpose -will be answered.” - -She did at last extort from her father an acknowledgment that the horses -were engaged; Jane was therefore obliged to go on horseback, and her -mother attended her to the door with many cheerful prognostics of a bad -day. Her hopes were answered; Jane had not been gone long before it -rained hard. Her sisters were uneasy for her, but her mother was -delighted. The rain continued the whole evening without intermission; -Jane certainly could not come back. - -“This was a lucky idea of mine, indeed!” said Mrs. Bennet, more than -once, as if the credit of making it rain were all her own. Till the next -morning, however, she was not aware of all the felicity of her -contrivance. Breakfast was scarcely over when a servant from Netherfield -brought the following note for Elizabeth:-- - - /* NIND “My dearest Lizzie, */ - - “I find myself very unwell this morning, which, I suppose, is to be - imputed to my getting wet through yesterday. My kind friends will - not hear of my returning home till I am better. They insist also on - my seeing Mr. Jones--therefore do not be alarmed if you should hear - of his having been to me--and, excepting a sore throat and a - headache, there is not much the matter with me. - -“Yours, etc.” - -“Well, my dear,” said Mr. Bennet, when Elizabeth had read the note -aloud, “if your daughter should have a dangerous fit of illness--if she -should die--it would be a comfort to know that it was all in pursuit of -Mr. Bingley, and under your orders.” - -“Oh, I am not at all afraid of her dying. People do not die of little -trifling colds. She will be taken good care of. As long as she stays -there, it is all very well. I would go and see her if I could have the -carriage.” - -Elizabeth, feeling really anxious, determined to go to her, though the -carriage was not to be had: and as she was no horsewoman, walking was -her only alternative. She declared her resolution. - -“How can you be so silly,” cried her mother, “as to think of such a -thing, in all this dirt! You will not be fit to be seen when you get -there.” - -“I shall be very fit to see Jane--which is all I want.” - -“Is this a hint to me, Lizzy,” said her father, “to send for the -horses?” - -“No, indeed. I do not wish to avoid the walk. The distance is nothing, -when one has a motive; only three miles. I shall be back by dinner.” - -“I admire the activity of your benevolence,” observed Mary, “but every -impulse of feeling should be guided by reason; and, in my opinion, -exertion should always be in proportion to what is required.” - -“We will go as far as Meryton with you,” said Catherine and Lydia. -Elizabeth accepted their company, and the three young ladies set off -together. - -“If we make haste,” said Lydia, as they walked along, “perhaps we may -see something of Captain Carter, before he goes.” - -In Meryton they parted: the two youngest repaired to the lodgings of one -of the officers’ wives, and Elizabeth continued her walk alone, crossing -field after field at a quick pace, jumping over stiles and springing -over puddles, with impatient activity, and finding herself at last -within view of the house, with weary ancles, dirty stockings, and a face -glowing with the warmth of exercise. - -She was shown into the breakfast parlour, where all but Jane were -assembled, and where her appearance created a great deal of surprise. -That she should have walked three miles so early in the day in such -dirty weather, and by herself, was almost incredible to Mrs. Hurst and -Miss Bingley; and Elizabeth was convinced that they held her in contempt -for it. She was received, however, very politely by them; and in their -brother’s manners there was something better than politeness--there was -good-humour and kindness. Mr. Darcy said very little, and Mr. Hurst -nothing at all. The former was divided between admiration of the -brilliancy which exercise had given to her complexion and doubt as to -the occasion’s justifying her coming so far alone. The latter was -thinking only of his breakfast. - -Her inquiries after her sister were not very favourably answered. Miss -Bennet had slept ill, and though up, was very feverish, and not well -enough to leave her room. Elizabeth was glad to be taken to her -immediately; and Jane, who had only been withheld by the fear of giving -alarm or inconvenience, from expressing in her note how much she longed -for such a visit, was delighted at her entrance. She was not equal, -however, to much conversation; and when Miss Bingley left them together, -could attempt little beside expressions of gratitude for the -extraordinary kindness she was treated with. Elizabeth silently attended -her. - -When breakfast was over, they were joined by the sisters; and Elizabeth -began to like them herself, when she saw how much affection and -solicitude they showed for Jane. The apothecary came; and having -examined his patient, said, as might be supposed, that she had caught a -violent cold, and that they must endeavour to get the better of it; -advised her to return to bed, and promised her some draughts. The advice -was followed readily, for the feverish symptoms increased, and her head -ached acutely. Elizabeth did not quit her room for a moment, nor were -the other ladies often absent; the gentlemen being out, they had in fact -nothing to do elsewhere. - -When the clock struck three, Elizabeth felt that she must go, and very -unwillingly said so. Miss Bingley offered her the carriage, and she only -wanted a little pressing to accept it, when Jane testified such concern -at parting with her that Miss Bingley was obliged to convert the offer -of the chaise into an invitation to remain at Netherfield for the -present. Elizabeth most thankfully consented, and a servant was -despatched to Longbourn, to acquaint the family with her stay, and bring -back a supply of clothes. - -[Illustration: - -“The Apothecary came” -] - - - - -[Illustration: - -“covering a screen” -] - - - - -CHAPTER VIII. - - -[Illustration] - -At five o’clock the two ladies retired to dress, and at half-past six -Elizabeth was summoned to dinner. To the civil inquiries which then -poured in, and amongst which she had the pleasure of distinguishing the -much superior solicitude of Mr. Bingley, she could not make a very -favourable answer. Jane was by no means better. The sisters, on hearing -this, repeated three or four times how much they were grieved, how -shocking it was to have a bad cold, and how excessively they disliked -being ill themselves; and then thought no more of the matter: and their -indifference towards Jane, when not immediately before them, restored -Elizabeth to the enjoyment of all her original dislike. - -Their brother, indeed, was the only one of the party whom she could -regard with any complacency. His anxiety for Jane was evident, and his -attentions to herself most pleasing; and they prevented her feeling -herself so much an intruder as she believed she was considered by the -others. She had very little notice from any but him. Miss Bingley was -engrossed by Mr. Darcy, her sister scarcely less so; and as for Mr. -Hurst, by whom Elizabeth sat, he was an indolent man, who lived only to -eat, drink, and play at cards, who, when he found her prefer a plain -dish to a ragout, had nothing to say to her. - -When dinner was over, she returned directly to Jane, and Miss Bingley -began abusing her as soon as she was out of the room. Her manners were -pronounced to be very bad indeed,--a mixture of pride and impertinence: -she had no conversation, no style, no taste, no beauty. Mrs. Hurst -thought the same, and added,-- - -“She has nothing, in short, to recommend her, but being an excellent -walker. I shall never forget her appearance this morning. She really -looked almost wild.” - -“She did indeed, Louisa. I could hardly keep my countenance. Very -nonsensical to come at all! Why must _she_ be scampering about the -country, because her sister had a cold? Her hair so untidy, so blowzy!” - -“Yes, and her petticoat; I hope you saw her petticoat, six inches deep -in mud, I am absolutely certain, and the gown which had been let down to -hide it not doing its office.” - -“Your picture may be very exact, Louisa,” said Bingley; “but this was -all lost upon me. I thought Miss Elizabeth Bennet looked remarkably well -when she came into the room this morning. Her dirty petticoat quite -escaped my notice.” - -“_You_ observed it, Mr. Darcy, I am sure,” said Miss Bingley; “and I am -inclined to think that you would not wish to see _your sister_ make such -an exhibition.” - -“Certainly not.” - -“To walk three miles, or four miles, or five miles, or whatever it is, -above her ancles in dirt, and alone, quite alone! what could she mean by -it? It seems to me to show an abominable sort of conceited independence, -a most country-town indifference to decorum.” - -“It shows an affection for her sister that is very pleasing,” said -Bingley. - -“I am afraid, Mr. Darcy,” observed Miss Bingley, in a half whisper, -“that this adventure has rather affected your admiration of her fine -eyes.” - -“Not at all,” he replied: “they were brightened by the exercise.” A -short pause followed this speech, and Mrs. Hurst began again,-- - -“I have an excessive regard for Jane Bennet,--she is really a very sweet -girl,--and I wish with all my heart she were well settled. But with such -a father and mother, and such low connections, I am afraid there is no -chance of it.” - -“I think I have heard you say that their uncle is an attorney in -Meryton?” - -“Yes; and they have another, who lives somewhere near Cheapside.” - -“That is capital,” added her sister; and they both laughed heartily. - -“If they had uncles enough to fill _all_ Cheapside,” cried Bingley, “it -would not make them one jot less agreeable.” - -“But it must very materially lessen their chance of marrying men of any -consideration in the world,” replied Darcy. - -To this speech Bingley made no answer; but his sisters gave it their -hearty assent, and indulged their mirth for some time at the expense of -their dear friend’s vulgar relations. - -With a renewal of tenderness, however, they repaired to her room on -leaving the dining-parlour, and sat with her till summoned to coffee. -She was still very poorly, and Elizabeth would not quit her at all, till -late in the evening, when she had the comfort of seeing her asleep, and -when it appeared to her rather right than pleasant that she should go -down stairs herself. On entering the drawing-room, she found the whole -party at loo, and was immediately invited to join them; but suspecting -them to be playing high, she declined it, and making her sister the -excuse, said she would amuse herself, for the short time she could stay -below, with a book. Mr. Hurst looked at her with astonishment. - -“Do you prefer reading to cards?” said he; “that is rather singular.” - -“Miss Eliza Bennet,” said Miss Bingley, “despises cards. She is a great -reader, and has no pleasure in anything else.” - -“I deserve neither such praise nor such censure,” cried Elizabeth; “I -am _not_ a great reader, and I have pleasure in many things.” - -“In nursing your sister I am sure you have pleasure,” said Bingley; “and -I hope it will soon be increased by seeing her quite well.” - -Elizabeth thanked him from her heart, and then walked towards a table -where a few books were lying. He immediately offered to fetch her -others; all that his library afforded. - -“And I wish my collection were larger for your benefit and my own -credit; but I am an idle fellow; and though I have not many, I have more -than I ever looked into.” - -Elizabeth assured him that she could suit herself perfectly with those -in the room. - -“I am astonished,” said Miss Bingley, “that my father should have left -so small a collection of books. What a delightful library you have at -Pemberley, Mr. Darcy!” - -“It ought to be good,” he replied: “it has been the work of many -generations.” - -“And then you have added so much to it yourself--you are always buying -books.” - -“I cannot comprehend the neglect of a family library in such days as -these.” - -“Neglect! I am sure you neglect nothing that can add to the beauties of -that noble place. Charles, when you build _your_ house, I wish it may be -half as delightful as Pemberley.” - -“I wish it may.” - -“But I would really advise you to make your purchase in that -neighbourhood, and take Pemberley for a kind of model. There is not a -finer county in England than Derbyshire.” - -“With all my heart: I will buy Pemberley itself, if Darcy will sell it.” - -“I am talking of possibilities, Charles.” - -“Upon my word, Caroline, I should think it more possible to get -Pemberley by purchase than by imitation.” - -Elizabeth was so much caught by what passed, as to leave her very little -attention for her book; and, soon laying it wholly aside, she drew near -the card-table, and stationed herself between Mr. Bingley and his eldest -sister, to observe the game. - -“Is Miss Darcy much grown since the spring?” said Miss Bingley: “will -she be as tall as I am?” - -“I think she will. She is now about Miss Elizabeth Bennet’s height, or -rather taller.” - -“How I long to see her again! I never met with anybody who delighted me -so much. Such a countenance, such manners, and so extremely accomplished -for her age! Her performance on the pianoforte is exquisite.” - -“It is amazing to me,” said Bingley, “how young ladies can have patience -to be so very accomplished as they all are.” - -“All young ladies accomplished! My dear Charles, what do you mean?” - -“Yes, all of them, I think. They all paint tables, cover screens, and -net purses. I scarcely know any one who cannot do all this; and I am -sure I never heard a young lady spoken of for the first time, without -being informed that she was very accomplished.” - -“Your list of the common extent of accomplishments,” said Darcy, “has -too much truth. The word is applied to many a woman who deserves it no -otherwise than by netting a purse or covering a screen; but I am very -far from agreeing with you in your estimation of ladies in general. I -cannot boast of knowing more than half-a-dozen in the whole range of my -acquaintance that are really accomplished.” - -“Nor I, I am sure,” said Miss Bingley. - -“Then,” observed Elizabeth, “you must comprehend a great deal in your -idea of an accomplished woman.” - -“Yes; I do comprehend a great deal in it.” - -“Oh, certainly,” cried his faithful assistant, “no one can be really -esteemed accomplished who does not greatly surpass what is usually met -with. A woman must have a thorough knowledge of music, singing, drawing, -dancing, and the modern languages, to deserve the word; and, besides all -this, she must possess a certain something in her air and manner of -walking, the tone of her voice, her address and expressions, or the word -will be but half deserved.” - -“All this she must possess,” added Darcy; “and to all she must yet add -something more substantial in the improvement of her mind by extensive -reading.” - -“I am no longer surprised at your knowing _only_ six accomplished women. -I rather wonder now at your knowing _any_.” - -“Are you so severe upon your own sex as to doubt the possibility of all -this?” - -“_I_ never saw such a woman. _I_ never saw such capacity, and taste, and -application, and elegance, as you describe, united.” - -Mrs. Hurst and Miss Bingley both cried out against the injustice of her -implied doubt, and were both protesting that they knew many women who -answered this description, when Mr. Hurst called them to order, with -bitter complaints of their inattention to what was going forward. As all -conversation was thereby at an end, Elizabeth soon afterwards left the -room. - -“Eliza Bennet,” said Miss Bingley, when the door was closed on her, “is -one of those young ladies who seek to recommend themselves to the other -sex by undervaluing their own; and with many men, I daresay, it -succeeds; but, in my opinion, it is a paltry device, a very mean art.” - -“Undoubtedly,” replied Darcy, to whom this remark was chiefly addressed, -“there is meanness in _all_ the arts which ladies sometimes condescend -to employ for captivation. Whatever bears affinity to cunning is -despicable.” - -Miss Bingley was not so entirely satisfied with this reply as to -continue the subject. - -Elizabeth joined them again only to say that her sister was worse, and -that she could not leave her. Bingley urged Mr. Jones’s being sent for -immediately; while his sisters, convinced that no country advice could -be of any service, recommended an express to town for one of the most -eminent physicians. This she would not hear of; but she was not so -unwilling to comply with their brother’s proposal; and it was settled -that Mr. Jones should be sent for early in the morning, if Miss Bennet -were not decidedly better. Bingley was quite uncomfortable; his sisters -declared that they were miserable. They solaced their wretchedness, -however, by duets after supper; while he could find no better relief to -his feelings than by giving his housekeeper directions that every -possible attention might be paid to the sick lady and her sister. - - - - -[Illustration: - -M^{rs} Bennet and her two youngest girls - -[_Copyright 1894 by George Allen._]] - - - - -CHAPTER IX. - - -[Illustration] - -Elizabeth passed the chief of the night in her sister’s room, and in the -morning had the pleasure of being able to send a tolerable answer to the -inquiries which she very early received from Mr. Bingley by a housemaid, -and some time afterwards from the two elegant ladies who waited on his -sisters. In spite of this amendment, however, she requested to have a -note sent to Longbourn, desiring her mother to visit Jane, and form her -own judgment of her situation. The note was immediately despatched, and -its contents as quickly complied with. Mrs. Bennet, accompanied by her -two youngest girls, reached Netherfield soon after the family breakfast. - -Had she found Jane in any apparent danger, Mrs. Bennet would have been -very miserable; but being satisfied on seeing her that her illness was -not alarming, she had no wish of her recovering immediately, as her -restoration to health would probably remove her from Netherfield. She -would not listen, therefore, to her daughter’s proposal of being carried -home; neither did the apothecary, who arrived about the same time, think -it at all advisable. After sitting a little while with Jane, on Miss -Bingley’s appearance and invitation, the mother and three daughters all -attended her into the breakfast parlour. Bingley met them with hopes -that Mrs. Bennet had not found Miss Bennet worse than she expected. - -“Indeed I have, sir,” was her answer. “She is a great deal too ill to be -moved. Mr. Jones says we must not think of moving her. We must trespass -a little longer on your kindness.” - -“Removed!” cried Bingley. “It must not be thought of. My sister, I am -sure, will not hear of her removal.” - -“You may depend upon it, madam,” said Miss Bingley, with cold civility, -“that Miss Bennet shall receive every possible attention while she -remains with us.” - -Mrs. Bennet was profuse in her acknowledgments. - -“I am sure,” she added, “if it was not for such good friends, I do not -know what would become of her, for she is very ill indeed, and suffers a -vast deal, though with the greatest patience in the world, which is -always the way with her, for she has, without exception, the sweetest -temper I ever met with. I often tell my other girls they are nothing to -_her_. You have a sweet room here, Mr. Bingley, and a charming prospect -over that gravel walk. I do not know a place in the country that is -equal to Netherfield. You will not think of quitting it in a hurry, I -hope, though you have but a short lease.” - -“Whatever I do is done in a hurry,” replied he; “and therefore if I -should resolve to quit Netherfield, I should probably be off in five -minutes. At present, however, I consider myself as quite fixed here.” - -“That is exactly what I should have supposed of you,” said Elizabeth. - -“You begin to comprehend me, do you?” cried he, turning towards her. - -“Oh yes--I understand you perfectly.” - -“I wish I might take this for a compliment; but to be so easily seen -through, I am afraid, is pitiful.” - -“That is as it happens. It does not necessarily follow that a deep, -intricate character is more or less estimable than such a one as yours.” - -“Lizzy,” cried her mother, “remember where you are, and do not run on in -the wild manner that you are suffered to do at home.” - -“I did not know before,” continued Bingley, immediately, “that you were -a studier of character. It must be an amusing study.” - -“Yes; but intricate characters are the _most_ amusing. They have at -least that advantage.” - -“The country,” said Darcy, “can in general supply but few subjects for -such a study. In a country neighbourhood you move in a very confined and -unvarying society.” - -“But people themselves alter so much, that there is something new to be -observed in them for ever.” - -“Yes, indeed,” cried Mrs. Bennet, offended by his manner of mentioning a -country neighbourhood. “I assure you there is quite as much of _that_ -going on in the country as in town.” - -Everybody was surprised; and Darcy, after looking at her for a moment, -turned silently away. Mrs. Bennet, who fancied she had gained a complete -victory over him, continued her triumph,-- - -“I cannot see that London has any great advantage over the country, for -my part, except the shops and public places. The country is a vast deal -pleasanter, is not it, Mr. Bingley?” - -“When I am in the country,” he replied, “I never wish to leave it; and -when I am in town, it is pretty much the same. They have each their -advantages, and I can be equally happy in either.” - -“Ay, that is because you have the right disposition. But that -gentleman,” looking at Darcy, “seemed to think the country was nothing -at all.” - -“Indeed, mamma, you are mistaken,” said Elizabeth, blushing for her -mother. “You quite mistook Mr. Darcy. He only meant that there was not -such a variety of people to be met with in the country as in town, which -you must acknowledge to be true.” - -“Certainly, my dear, nobody said there were; but as to not meeting with -many people in this neighbourhood, I believe there are few -neighbourhoods larger. I know we dine with four-and-twenty families.” - -Nothing but concern for Elizabeth could enable Bingley to keep his -countenance. His sister was less delicate, and directed her eye towards -Mr. Darcy with a very expressive smile. Elizabeth, for the sake of -saying something that might turn her mother’s thoughts, now asked her if -Charlotte Lucas had been at Longbourn since _her_ coming away. - -“Yes, she called yesterday with her father. What an agreeable man Sir -William is, Mr. Bingley--is not he? so much the man of fashion! so -genteel and so easy! He has always something to say to everybody. _That_ -is my idea of good breeding; and those persons who fancy themselves very -important and never open their mouths quite mistake the matter.” - -“Did Charlotte dine with you?” - -“No, she would go home. I fancy she was wanted about the mince-pies. For -my part, Mr. Bingley, _I_ always keep servants that can do their own -work; _my_ daughters are brought up differently. But everybody is to -judge for themselves, and the Lucases are a very good sort of girls, I -assure you. It is a pity they are not handsome! Not that _I_ think -Charlotte so _very_ plain; but then she is our particular friend.” - -“She seems a very pleasant young woman,” said Bingley. - -“Oh dear, yes; but you must own she is very plain. Lady Lucas herself -has often said so, and envied me Jane’s beauty. I do not like to boast -of my own child; but to be sure, Jane--one does not often see anybody -better looking. It is what everybody says. I do not trust my own -partiality. When she was only fifteen there was a gentleman at my -brother Gardiner’s in town so much in love with her, that my -sister-in-law was sure he would make her an offer before we came away. -But, however, he did not. Perhaps he thought her too young. However, he -wrote some verses on her, and very pretty they were.” - -“And so ended his affection,” said Elizabeth, impatiently. “There has -been many a one, I fancy, overcome in the same way. I wonder who first -discovered the efficacy of poetry in driving away love!” - -“I have been used to consider poetry as the _food_ of love,” said Darcy. - -“Of a fine, stout, healthy love it may. Everything nourishes what is -strong already. But if it be only a slight, thin sort of inclination, I -am convinced that one good sonnet will starve it entirely away.” - -Darcy only smiled; and the general pause which ensued made Elizabeth -tremble lest her mother should be exposing herself again. She longed to -speak, but could think of nothing to say; and after a short silence Mrs. -Bennet began repeating her thanks to Mr. Bingley for his kindness to -Jane, with an apology for troubling him also with Lizzy. Mr. Bingley was -unaffectedly civil in his answer, and forced his younger sister to be -civil also, and say what the occasion required. She performed her part, -indeed, without much graciousness, but Mrs. Bennet was satisfied, and -soon afterwards ordered her carriage. Upon this signal, the youngest of -her daughters put herself forward. The two girls had been whispering to -each other during the whole visit; and the result of it was, that the -youngest should tax Mr. Bingley with having promised on his first coming -into the country to give a ball at Netherfield. - -Lydia was a stout, well-grown girl of fifteen, with a fine complexion -and good-humoured countenance; a favourite with her mother, whose -affection had brought her into public at an early age. She had high -animal spirits, and a sort of natural self-consequence, which the -attentions of the officers, to whom her uncle’s good dinners and her -own easy manners recommended her, had increased into assurance. She was -very equal, therefore, to address Mr. Bingley on the subject of the -ball, and abruptly reminded him of his promise; adding, that it would be -the most shameful thing in the world if he did not keep it. His answer -to this sudden attack was delightful to her mother’s ear. - -“I am perfectly ready, I assure you, to keep my engagement; and, when -your sister is recovered, you shall, if you please, name the very day of -the ball. But you would not wish to be dancing while she is ill?” - -Lydia declared herself satisfied. “Oh yes--it would be much better to -wait till Jane was well; and by that time, most likely, Captain Carter -would be at Meryton again. And when you have given _your_ ball,” she -added, “I shall insist on their giving one also. I shall tell Colonel -Forster it will be quite a shame if he does not.” - -Mrs. Bennet and her daughters then departed, and Elizabeth returned -instantly to Jane, leaving her own and her relations’ behaviour to the -remarks of the two ladies and Mr. Darcy; the latter of whom, however, -could not be prevailed on to join in their censure of _her_, in spite of -all Miss Bingley’s witticisms on _fine eyes_. - - - - -[Illustration] - - - - -CHAPTER X. - - -[Illustration] - -The day passed much as the day before had done. Mrs. Hurst and Miss -Bingley had spent some hours of the morning with the invalid, who -continued, though slowly, to mend; and, in the evening, Elizabeth joined -their party in the drawing-room. The loo table, however, did not appear. -Mr. Darcy was writing, and Miss Bingley, seated near him, was watching -the progress of his letter, and repeatedly calling off his attention by -messages to his sister. Mr. Hurst and Mr. Bingley were at piquet, and -Mrs. Hurst was observing their game. - -Elizabeth took up some needlework, and was sufficiently amused in -attending to what passed between Darcy and his companion. The perpetual -commendations of the lady either on his hand-writing, or on the evenness -of his lines, or on the length of his letter, with the perfect unconcern -with which her praises were received, formed a curious dialogue, and was -exactly in unison with her opinion of each. - -“How delighted Miss Darcy will be to receive such a letter!” - -He made no answer. - -“You write uncommonly fast.” - -“You are mistaken. I write rather slowly.” - -“How many letters you must have occasion to write in the course of a -year! Letters of business, too! How odious I should think them!” - -“It is fortunate, then, that they fall to my lot instead of to yours.” - -“Pray tell your sister that I long to see her.” - -“I have already told her so once, by your desire.” - -“I am afraid you do not like your pen. Let me mend it for you. I mend -pens remarkably well.” - -“Thank you--but I always mend my own.” - -“How can you contrive to write so even?” - -He was silent. - -“Tell your sister I am delighted to hear of her improvement on the harp, -and pray let her know that I am quite in raptures with her beautiful -little design for a table, and I think it infinitely superior to Miss -Grantley’s.” - -“Will you give me leave to defer your raptures till I write again? At -present I have not room to do them justice.” - -“Oh, it is of no consequence. I shall see her in January. But do you -always write such charming long letters to her, Mr. Darcy?” - -“They are generally long; but whether always charming, it is not for me -to determine.” - -“It is a rule with me, that a person who can write a long letter with -ease cannot write ill.” - -“That will not do for a compliment to Darcy, Caroline,” cried her -brother, “because he does _not_ write with ease. He studies too much -for words of four syllables. Do not you, Darcy?” - -“My style of writing is very different from yours.” - -“Oh,” cried Miss Bingley, “Charles writes in the most careless way -imaginable. He leaves out half his words, and blots the rest.” - -“My ideas flow so rapidly that I have not time to express them; by which -means my letters sometimes convey no ideas at all to my correspondents.” - -“Your humility, Mr. Bingley,” said Elizabeth, “must disarm reproof.” - -“Nothing is more deceitful,” said Darcy, “than the appearance of -humility. It is often only carelessness of opinion, and sometimes an -indirect boast.” - -“And which of the two do you call _my_ little recent piece of modesty?” - -“The indirect boast; for you are really proud of your defects in -writing, because you consider them as proceeding from a rapidity of -thought and carelessness of execution, which, if not estimable, you -think at least highly interesting. The power of doing anything with -quickness is always much prized by the possessor, and often without any -attention to the imperfection of the performance. When you told Mrs. -Bennet this morning, that if you ever resolved on quitting Netherfield -you should be gone in five minutes, you meant it to be a sort of -panegyric, of compliment to yourself; and yet what is there so very -laudable in a precipitance which must leave very necessary business -undone, and can be of no real advantage to yourself or anyone else?” - - - CHISWICK PRESS:--CHARLES WHITTINGHAM AND CO. - TOOKS COURT, CHANCERY LANE, LONDON. - - -*** END OF THE PROJECT GUTENBERG EBOOK 1342 *** diff --git a/tests/dummy/data/transformers.py b/tests/dummy/data/transformers.py deleted file mode 100644 index 7d8911bb..00000000 --- a/tests/dummy/data/transformers.py +++ /dev/null @@ -1,50 +0,0 @@ -from typing import Iterable - -from datasets import ( # type: ignore - Dataset, - DatasetDict, - IterableDataset, - IterableDatasetDict, -) - - -def create_sample_dataset( - column: str = "text", pattern: str = "sample text {}" -) -> Dataset: - return Dataset.from_dict({column: [pattern.format(ind) for ind in range(1, 4)]}) - - -def create_sample_iterable_dataset( - column: str = "text", pattern: str = "sample text {}" -) -> IterableDataset: - def _generator(): - for ind in range(1, 4): - yield {column: pattern.format(ind)} - - return IterableDataset.from_generator(_generator) - - -def create_sample_dataset_dict( - splits: Iterable[str] = ("train", "test"), - column: str = "text", - pattern: str = "sample text {}", -): - return DatasetDict( - { - split: create_sample_dataset(column=column, pattern=pattern) - for split in splits - } - ) - - -def create_sample_iterable_dataset_dict( - splits: Iterable[str] = ("train", "test"), - column: str = "text", - pattern: str = "sample text {}", -): - return IterableDatasetDict( - { - split: create_sample_iterable_dataset(column=column, pattern=pattern) - for split in splits - } - ) diff --git a/tests/dummy/services/__init__.py b/tests/dummy/services/__init__.py deleted file mode 100644 index 8c63c5c4..00000000 --- a/tests/dummy/services/__init__.py +++ /dev/null @@ -1,5 +0,0 @@ -from .requests import TestRequestGenerator - -__all__ = [ - "TestRequestGenerator", -] diff --git a/tests/dummy/services/requests.py b/tests/dummy/services/requests.py deleted file mode 100644 index e7e29402..00000000 --- a/tests/dummy/services/requests.py +++ /dev/null @@ -1,31 +0,0 @@ -from typing import Optional - -from guidellm.core import TextGenerationRequest -from guidellm.request import GenerationMode, RequestGenerator - - -class TestRequestGenerator(RequestGenerator): - """ - This class represents the Testing Request Generator. - The purpose - to be used for testing. - """ - - def __init__( - self, - tokenizer: Optional[str] = None, - mode: GenerationMode = "async", - async_queue_size: int = 50, - ): - super().__init__( - type_="test", - source="test", - tokenizer=tokenizer, - mode=mode, - async_queue_size=async_queue_size, - ) - - def create_item(self) -> TextGenerationRequest: - return TextGenerationRequest(prompt="Test prompt") - - def __len__(self) -> int: - raise NotImplementedError diff --git a/tests/e2e/test_guidellm.py b/tests/e2e/test_guidellm.py deleted file mode 100644 index 75ab2212..00000000 --- a/tests/e2e/test_guidellm.py +++ /dev/null @@ -1,8 +0,0 @@ -import pytest - -from guidellm.config import settings - - -@pytest.mark.smoke() -def test_import(): - assert settings diff --git a/tests/e2e/test_placeholder.py b/tests/e2e/test_placeholder.py new file mode 100644 index 00000000..d028e3f9 --- /dev/null +++ b/tests/e2e/test_placeholder.py @@ -0,0 +1,6 @@ +import pytest + + +@pytest.mark.smoke() +def test_placeholder(): + assert True diff --git a/tests/integration/test_guidellm.py b/tests/integration/test_guidellm.py deleted file mode 100644 index 75ab2212..00000000 --- a/tests/integration/test_guidellm.py +++ /dev/null @@ -1,8 +0,0 @@ -import pytest - -from guidellm.config import settings - - -@pytest.mark.smoke() -def test_import(): - assert settings diff --git a/tests/integration/test_placeholder.py b/tests/integration/test_placeholder.py new file mode 100644 index 00000000..d028e3f9 --- /dev/null +++ b/tests/integration/test_placeholder.py @@ -0,0 +1,6 @@ +import pytest + + +@pytest.mark.smoke() +def test_placeholder(): + assert True diff --git a/tests/unit/backend/test_backend.py b/tests/unit/backend/test_backend.py index 29a008e1..1c16d397 100644 --- a/tests/unit/backend/test_backend.py +++ b/tests/unit/backend/test_backend.py @@ -124,10 +124,13 @@ async def test_backend_chat_completions(mock_backend): @pytest.mark.smoke() -def test_backend_models(mock_backend): - assert mock_backend.available_models() == ["mock-model"] +@pytest.mark.asyncio() +async def test_backend_models(mock_backend): + models = await mock_backend.available_models() + assert models == ["mock-model"] @pytest.mark.smoke() -def test_backend_validate(mock_backend): - mock_backend.validate() +@pytest.mark.asyncio() +async def test_backend_validate(mock_backend): + await mock_backend.validate() diff --git a/tests/unit/backend/test_openai_backend.py b/tests/unit/backend/test_openai_backend.py index db03c259..0749e9db 100644 --- a/tests/unit/backend/test_openai_backend.py +++ b/tests/unit/backend/test_openai_backend.py @@ -42,24 +42,26 @@ def test_openai_http_backend_intialization(): @pytest.mark.smoke() -def test_openai_http_backend_available_models(httpx_openai_mock): +@pytest.mark.asyncio() +async def test_openai_http_backend_available_models(httpx_openai_mock): backend = OpenAIHTTPBackend(target="http://target.mock") - models = backend.available_models() + models = await backend.available_models() assert models == ["mock-model"] @pytest.mark.smoke() -def test_openai_http_backend_validate(httpx_openai_mock): +@pytest.mark.asyncio() +async def test_openai_http_backend_validate(httpx_openai_mock): backend = OpenAIHTTPBackend(target="http://target.mock", model="mock-model") - backend.validate() + await backend.validate() backend = OpenAIHTTPBackend(target="http://target.mock") - backend.validate() + await backend.validate() assert backend.model == "mock-model" backend = OpenAIHTTPBackend(target="http://target.mock", model="invalid-model") with pytest.raises(ValueError): - backend.validate() + await backend.validate() @pytest.mark.smoke() diff --git a/tests/unit/backend/test_response.py b/tests/unit/backend/test_response.py index 8de78925..c4773083 100644 --- a/tests/unit/backend/test_response.py +++ b/tests/unit/backend/test_response.py @@ -20,6 +20,9 @@ def test_streaming_response_types(): def test_streaming_text_response_default_initilization(): response = StreamingTextResponse( type_="start", + value="", + start_time=0.0, + first_iter_time=None, iter_count=0, delta="", time=0.0, @@ -31,13 +34,19 @@ def test_streaming_text_response_default_initilization(): def test_streaming_text_response_initialization(): response = StreamingTextResponse( type_="start", - iter_count=0, + value="Hello, world!", + start_time=0.0, + first_iter_time=0.0, + iter_count=1, delta="Hello, world!", time=1.0, request_id="123", ) assert response.type_ == "start" - assert response.iter_count == 0 + assert response.value == "Hello, world!" + assert response.start_time == 0.0 + assert response.first_iter_time == 0.0 + assert response.iter_count == 1 assert response.delta == "Hello, world!" assert response.time == 1.0 assert response.request_id == "123" @@ -47,6 +56,9 @@ def test_streaming_text_response_initialization(): def test_streaming_text_response_marshalling(): response = StreamingTextResponse( type_="start", + value="Hello, world!", + start_time=0.0, + first_iter_time=0.0, iter_count=0, delta="Hello, world!", time=1.0, @@ -117,7 +129,18 @@ def test_response_summary_default_initialization(): ), start_time=0.0, end_time=0.0, + first_iter_time=None, + last_iter_time=None, ) + assert summary.value == "Hello, world!" + assert summary.request_args.target == "http://example.com" + assert summary.request_args.headers == {} + assert summary.request_args.payload == {} + assert summary.start_time == 0.0 + assert summary.end_time == 0.0 + assert summary.first_iter_time is None + assert summary.last_iter_time is None + assert summary.iterations == 0 assert summary.request_prompt_tokens is None assert summary.request_output_tokens is None assert summary.response_prompt_tokens is None @@ -137,6 +160,8 @@ def test_response_summary_initialization(): start_time=1.0, end_time=2.0, iterations=3, + first_iter_time=1.0, + last_iter_time=2.0, request_prompt_tokens=5, request_output_tokens=10, response_prompt_tokens=5, @@ -150,6 +175,8 @@ def test_response_summary_initialization(): assert summary.start_time == 1.0 assert summary.end_time == 2.0 assert summary.iterations == 3 + assert summary.first_iter_time == 1.0 + assert summary.last_iter_time == 2.0 assert summary.request_prompt_tokens == 5 assert summary.request_output_tokens == 10 assert summary.response_prompt_tokens == 5 diff --git a/tests/unit/cli/__init__.py b/tests/unit/cli/__init__.py deleted file mode 100644 index e69de29b..00000000 diff --git a/tests/unit/cli/test_custom_type_params.py b/tests/unit/cli/test_custom_type_params.py deleted file mode 100644 index 1e66311d..00000000 --- a/tests/unit/cli/test_custom_type_params.py +++ /dev/null @@ -1,38 +0,0 @@ -import pytest -from click import BadParameter - -from guidellm.utils import cli_params - - -@pytest.fixture() -def max_requests_param_type(): - return cli_params.MaxRequestsType() - - -def test_valid_integer_input(max_requests_param_type): - assert max_requests_param_type.convert(10, None, None) == 10 - assert max_requests_param_type.convert("42", None, None) == 42 - - -def test_valid_dataset_input(max_requests_param_type): - assert max_requests_param_type.convert("dataset", None, None) == "dataset" - - -def test_invalid_string_input(max_requests_param_type): - with pytest.raises(BadParameter): - max_requests_param_type.convert("invalid", None, None) - - -def test_invalid_float_input(max_requests_param_type): - with pytest.raises(BadParameter): - max_requests_param_type.convert("10.5", None, None) - - -def test_invalid_non_numeric_string_input(max_requests_param_type): - with pytest.raises(BadParameter): - max_requests_param_type.convert("abc", None, None) - - -def test_invalid_mixed_string_input(max_requests_param_type): - with pytest.raises(BadParameter): - max_requests_param_type.convert("123abc", None, None) diff --git a/tests/unit/conftest.py b/tests/unit/conftest.py index 2a31df5d..41c0fbf5 100644 --- a/tests/unit/conftest.py +++ b/tests/unit/conftest.py @@ -1,11 +1,9 @@ import json -from pathlib import Path from typing import Any, AsyncIterable, Dict, List, Literal, Optional from unittest.mock import MagicMock, patch import httpx import pytest -import requests_mock import respx from guidellm.backend import ResponseSummary, StreamingTextResponse @@ -27,21 +25,6 @@ def _fake_tokenize(text: str) -> List[int]: yield mock_tokenizer -@pytest.fixture() -def mock_requests_pride_and_prejudice(): - text_path = ( - Path(__file__).parent.parent / "dummy" / "data" / "pride_and_prejudice.txt" - ) - text_content = text_path.read_text() - - with requests_mock.Mocker() as mock: - mock.get( - "https://www.gutenberg.org/files/1342/1342-0.txt", - text=text_content, - ) - yield mock - - @pytest.fixture() def mock_backend(request): params = request.param if hasattr(request, "param") else {} diff --git a/tests/unit/core/__init__.py b/tests/unit/core/__init__.py deleted file mode 100644 index e69de29b..00000000 diff --git a/tests/unit/core/test_distribution.py b/tests/unit/core/test_distribution.py deleted file mode 100644 index 95b7e923..00000000 --- a/tests/unit/core/test_distribution.py +++ /dev/null @@ -1,107 +0,0 @@ -import pytest - -from guidellm.core import Distribution - - -@pytest.mark.smoke() -def test_distribution_initialization(): - data = [1, 2, 3, 4, 5] - dist = Distribution(data=data) - assert dist.data == data - - -@pytest.mark.smoke() -def test_distribution_statistics(): - data = [1, 2, 3, 4, 5] - dist = Distribution(data=data) - assert dist.mean == 3.0 - assert dist.median == 3.0 - assert dist.variance == 2.0 - assert dist.std_deviation == pytest.approx(1.414213, rel=1e-5) - assert dist.min == 1 - assert dist.max == 5 - assert dist.range == 4 - assert dist.percentile(50) == 3.0 - assert dist.percentiles([25, 50, 75]) == pytest.approx([2.0, 3.0, 4.0]) - - -@pytest.mark.smoke() -def test_distribution_no_data(): - dist = Distribution(data=[]) - assert dist.mean == 0.0 - assert dist.median == 0.0 - assert dist.variance == 0.0 - assert dist.std_deviation == 0.0 - assert dist.min == 0.0 - assert dist.max == 0.0 - assert dist.range == 0.0 - assert dist.percentile(50) == 0.0 - assert dist.percentiles([25, 50, 75]) == [0.0, 0.0, 0.0] - - -@pytest.mark.sanity() -def test_distribution_add_data(): - data = [1, 2, 3, 4, 5] - dist = Distribution(data=data) - new_data = [6, 7, 8] - dist.add_data(new_data) - - assert dist.data == data + new_data - - -@pytest.mark.sanity() -def test_distribution_remove_data(): - data = [1, 2, 3, 4, 5] - dist = Distribution(data=data) - remove_data = [2, 4] - dist.remove_data(remove_data) - assert dist.data == [1, 3, 5] - - -@pytest.mark.regression() -def test_distribution_str(): - data = [1, 2, 3, 4, 5] - dist = Distribution(data=data) - assert "Distribution({" in str(dist) - assert "'mean': 3.0" in str(dist) - assert "'median': 3.0" in str(dist) - assert "'variance': 2.0" in str(dist) - assert "'percentile_indices': [10, 20, 30, 40, 50, 60, 70, 80, 90, 95, 99]" in str( - dist - ) - assert ( - "'percentile_values': [1.4, 1.8, 2.2, 2.6, 3.0, 3.4, 3.8, 4.2, 4.6, 4.8, 4.96]" - in str(dist) - ) - assert "'min': 1" in str(dist) - assert "'max': 5" in str(dist) - assert "'range': 4" in str(dist) - - -@pytest.mark.regression() -def test_distribution_repr(): - data = [1, 2, 3, 4, 5] - dist = Distribution(data=data) - assert repr(dist) == f"Distribution(data={dist.data})" - - -@pytest.mark.regression() -def test_distribution_json(): - data = [1, 2, 3, 4, 5] - dist = Distribution(data=data) - json_str = dist.to_json() - assert f'"data":[{dist.data[0]}' in json_str - - dist_restored = Distribution.from_json(json_str) - assert dist_restored.data == data - - -@pytest.mark.regression() -def test_distribution_yaml(): - data = [1, 2, 3, 4, 5] - dist = Distribution(data=data) - yaml_str = dist.to_yaml() - assert f"data:\n- {dist.data[0]}" in yaml_str - - dist_restored = Distribution.from_yaml(yaml_str) - assert dist_restored.data == data diff --git a/tests/unit/core/test_report.py b/tests/unit/core/test_report.py deleted file mode 100644 index c9e4ef3a..00000000 --- a/tests/unit/core/test_report.py +++ /dev/null @@ -1,106 +0,0 @@ -import tempfile -from pathlib import Path - -import pytest - -from guidellm.core import ( - GuidanceReport, - TextGenerationBenchmark, - TextGenerationBenchmarkReport, - TextGenerationRequest, - TextGenerationResult, -) - - -@pytest.fixture() -def sample_benchmark_report() -> TextGenerationBenchmarkReport: - sample_request = TextGenerationRequest(prompt="sample prompt") - sample_result = TextGenerationResult( - request=sample_request, - prompt_token_count=2, - output="sample output", - output_token_count=2, - start_time=None, - end_time=None, - first_token_time=None, - last_token_time=None, - ) - sample_benchmark = TextGenerationBenchmark( - mode="asynchronous", - rate=1.0, - results=[sample_result], - errors=[], - concurrencies=[], - ) - return TextGenerationBenchmarkReport( - benchmarks=[sample_benchmark], args={"arg1": "value1"} - ) - - -def compare_guidance_reports(report1: GuidanceReport, report2: GuidanceReport) -> bool: - return report1.benchmarks == report2.benchmarks - - -@pytest.mark.smoke() -def test_guidance_report_initialization(): - report = GuidanceReport() - assert report.benchmarks == [] - - -@pytest.mark.smoke() -def test_guidance_report_initialization_with_params(sample_benchmark_report): - report = GuidanceReport(benchmarks=[sample_benchmark_report]) - assert report.benchmarks == [sample_benchmark_report] - - -@pytest.mark.sanity() -def test_guidance_report_print(sample_benchmark_report): - report = GuidanceReport(benchmarks=[sample_benchmark_report]) - report.print() # This will output to the console - - -@pytest.mark.sanity() -def test_guidance_report_json(sample_benchmark_report): - report = GuidanceReport(benchmarks=[sample_benchmark_report]) - json_str = report.to_json() - loaded_report = GuidanceReport.from_json(json_str) - assert compare_guidance_reports(report, loaded_report) - - -@pytest.mark.sanity() -def test_guidance_report_yaml(sample_benchmark_report): - report = GuidanceReport(benchmarks=[sample_benchmark_report]) - yaml_str = report.to_yaml() - loaded_report = GuidanceReport.from_yaml(yaml_str) - assert compare_guidance_reports(report, loaded_report) - - -@pytest.mark.sanity() -def test_guidance_report_save_load_file(sample_benchmark_report): - report = GuidanceReport(benchmarks=[sample_benchmark_report]) - with tempfile.TemporaryDirectory() as temp_dir: - file_path = Path(temp_dir) / "report.yaml" - report.save_file(file_path) - loaded_report = GuidanceReport.load_file(file_path) - assert compare_guidance_reports(report, loaded_report) - - -@pytest.mark.regression() -def test_empty_guidance_report(): - report = GuidanceReport() - assert len(report.benchmarks) == 0 - report.print() # Ensure it doesn't raise error with no benchmarks - - -@pytest.mark.regression() -def test_compare_guidance_reports(sample_benchmark_report): - report1 = GuidanceReport(benchmarks=[sample_benchmark_report]) - report2 = GuidanceReport(benchmarks=[sample_benchmark_report]) - assert compare_guidance_reports(report1, report2) - - -@pytest.mark.regression() -def test_compare_guidance_reports_inequality(sample_benchmark_report): - report1 = GuidanceReport(benchmarks=[sample_benchmark_report]) - report2 = GuidanceReport(benchmarks=[]) - assert not compare_guidance_reports(report1, report2) diff --git a/tests/unit/core/test_request.py b/tests/unit/core/test_request.py deleted file mode 100644 index 8550eb28..00000000 --- a/tests/unit/core/test_request.py +++ /dev/null @@ -1,79 +0,0 @@ -import pytest - -from guidellm.core import TextGenerationRequest - - -@pytest.mark.smoke() -def test_text_generation_request_initialization(): - prompt = "Generate a story" - request = TextGenerationRequest(prompt=prompt) - assert request.prompt == prompt - assert request.prompt_token_count is None - assert request.output_token_count is None - assert request.params == {} - - -@pytest.mark.sanity() -def test_text_generation_request_initialization_with_params(): - prompt = "Generate a story" - prompt_token_count = 50 - output_token_count = 100 - params = {"temperature": 0.7} - request = TextGenerationRequest( - prompt=prompt, - prompt_token_count=prompt_token_count, - output_token_count=output_token_count, - params=params, - ) - assert request.prompt == prompt - assert request.prompt_token_count == prompt_token_count - assert request.output_token_count == output_token_count - assert request.params == params - - -@pytest.mark.regression() -def test_request_json(): - prompt = "Generate text" - prompt_token_count = 10 - output_token_count = 50 - params = {"temperature": 0.7} - request = TextGenerationRequest( - prompt=prompt, - prompt_token_count=prompt_token_count, - output_token_count=output_token_count, - params=params, - ) - json_str = request.to_json() - assert '"prompt":"Generate text"' in json_str - assert '"id":' in json_str - - request_restored = TextGenerationRequest.from_json(json_str) - assert request.id == request_restored.id - assert request_restored.prompt == prompt - assert request_restored.prompt_token_count == prompt_token_count - assert request_restored.output_token_count == output_token_count - assert request_restored.params == params - - -@pytest.mark.regression() -def test_request_yaml(): - prompt = "Generate text" - prompt_token_count = 15 - output_token_count = 55 - params = {"temperature": 0.8} - request = TextGenerationRequest( - prompt=prompt, - prompt_token_count=prompt_token_count, - output_token_count=output_token_count, - params=params, - ) - yaml_str = request.to_yaml() - assert "prompt: Generate text" in yaml_str - assert "id:" in yaml_str - - request_restored = TextGenerationRequest.from_yaml(yaml_str) - assert request.id == request_restored.id - assert request_restored.prompt == prompt - assert request_restored.prompt_token_count == prompt_token_count - assert request_restored.output_token_count == output_token_count - assert request_restored.params == params diff --git a/tests/unit/core/test_result.py b/tests/unit/core/test_result.py deleted file mode 100644 index ddd62d7f..00000000 --- a/tests/unit/core/test_result.py +++ /dev/null @@ -1,279 +0,0 @@ -import time - -import pytest - -from guidellm.core import ( - RequestConcurrencyMeasurement, - TextGenerationBenchmark, - TextGenerationBenchmarkReport, - TextGenerationError, - TextGenerationRequest, - TextGenerationResult, -) - - -def create_sample_request(): - return TextGenerationRequest(prompt="Hello, world!") - - -def create_sample_result(): - start_time = time.time() - - return TextGenerationResult( - request=create_sample_request(), - prompt_token_count=4, - output="Generated text", - output_token_count=3, - start_time=start_time, - end_time=start_time + 1.5, - first_token_time=start_time + 0.5, - last_token_time=start_time + 1.4, - ) - - -@pytest.mark.smoke() -def test_text_generation_result_default_initialization(): - result = TextGenerationResult(request=create_sample_request()) - assert result.request.prompt == "Hello, world!" - assert result.prompt_token_count is None - assert result.output == "" - assert result.output_token_count is None - assert result.start_time is None - assert result.end_time is None - assert result.first_token_time is None - assert result.last_token_time is None - - -@pytest.mark.smoke() -def test_text_generation_result_initialization(): - result = create_sample_result() - assert result.request.prompt == "Hello, world!" - assert result.prompt_token_count == 4 - assert result.output == "Generated text" - assert result.output_token_count == 3 - assert result.start_time >= 0.0 - assert result.end_time == result.start_time + 1.5 - assert result.first_token_time == result.start_time + 0.5 - assert result.last_token_time == result.start_time + 1.4 - - # computed fields - assert result.request_latency == 1.5 - assert result.time_to_first_token == 0.5 * 1000 - assert result.inter_token_latency == pytest.approx((1.4 - 0.5) * 1000 / 2) - assert result.output_tokens_per_second == pytest.approx(2 / (1.4 - 0.5)) - - -@pytest.mark.smoke() -def test_text_generation_result_marshalling(): - result = create_sample_result() - serialized = result.model_dump() - deserialized = TextGenerationResult.model_validate(serialized) - - for key, value in vars(result).items(): - assert getattr(deserialized, key) == value - - -@pytest.mark.smoke() -def test_text_generation_error_initialization(): - error = TextGenerationError( - request=create_sample_request(), message="Error message" - ) - assert error.request.prompt == "Hello, world!" - assert error.message == "Error message" - - -@pytest.mark.smoke() -def test_text_generation_error_marshalling(): - error = TextGenerationError( - request=create_sample_request(), message="Error message" - ) - serialized = error.model_dump() - deserialized = TextGenerationError.model_validate(serialized) - - for key, value in vars(error).items(): - assert getattr(deserialized, key) == value - - -@pytest.mark.smoke() -def test_request_concurrency_measurement_initialization(): - start_time = time.time() - measurement = RequestConcurrencyMeasurement( - time=start_time, - completed=8, - errored=2, - processing=3, - ) - assert measurement.time == start_time - assert measurement.completed == 8 - assert measurement.errored == 2 - assert measurement.processing == 3 - - -@pytest.mark.smoke() -def test_request_concurrency_measurement_marshalling(): - start_time = time.time() - measurement = RequestConcurrencyMeasurement( - time=start_time, - completed=8, - errored=2, - processing=3, - ) - serialized = measurement.model_dump() - deserialized = RequestConcurrencyMeasurement.model_validate(serialized) - - for key, value in vars(measurement).items(): - assert getattr(deserialized, key) == value - - -@pytest.mark.smoke() -def test_text_generation_benchmark_default_initialization(): - benchmark = TextGenerationBenchmark(mode="asynchronous") - assert benchmark.mode == "asynchronous" - assert benchmark.rate is None - assert benchmark.results == [] - assert benchmark.errors == [] - assert benchmark.concurrencies == [] - - # computed - assert benchmark.request_count == 0 - assert benchmark.error_count == 0 - assert benchmark.total_count == 0 - assert benchmark.start_time is None - assert benchmark.end_time is None - assert benchmark.duration == 0.0 - assert benchmark.completed_request_rate == 0.0 - assert benchmark.request_latency_distribution is not None - assert benchmark.request_latency == 0.0 - assert benchmark.request_latency_percentiles == {} - assert benchmark.ttft_distribution is not None - assert benchmark.time_to_first_token == 0.0 - assert benchmark.time_to_first_token_percentiles == {} - assert benchmark.itl_distribution is not None - assert benchmark.inter_token_latency == 0.0 - assert benchmark.inter_token_latency_percentiles == {} - assert benchmark.output_token_throughput == 0.0 - assert benchmark.prompt_token_distribution is not None - assert benchmark.prompt_token == 0.0 - assert benchmark.prompt_token_percentiles == {} - assert benchmark.output_token_distribution is not None - assert benchmark.output_token == 0.0 - assert benchmark.output_token_percentiles == {} - - -@pytest.mark.smoke() -def test_text_generation_benchmark_initialization(): - benchmark = TextGenerationBenchmark(mode="asynchronous", rate=10) - assert benchmark.mode == "asynchronous" - assert benchmark.rate == 10 - - for _ in range(5): - benchmark.request_started() - benchmark.request_completed(create_sample_result()) - time.sleep(1.5) - - for _ in range(2): - benchmark.request_started() - benchmark.request_completed( - TextGenerationError( - request=create_sample_request(), message="Error message" - ) - ) - - def _test_percentiles(percentiles, value=None): - assert len(percentiles) == 7 - assert list(percentiles.keys()) == ["1", "5", "10", "50", "90", "95", "99"] - - if value is None: - assert all(per >= 0.0 for per in percentiles.values()) - else: - assert all(per == pytest.approx(value) for per in percentiles.values()) - - assert len(benchmark.results) == 5 - assert len(benchmark.errors) == 2 - assert len(benchmark.concurrencies) == 14 - assert benchmark.request_count == 5 - assert benchmark.error_count == 2 - assert benchmark.total_count == 7 - assert benchmark.start_time == pytest.approx(time.time() - 1.5 * 5, abs=0.01) - assert benchmark.end_time == pytest.approx(time.time(), abs=0.01) - assert benchmark.duration == benchmark.end_time - benchmark.start_time # type: ignore - assert benchmark.completed_request_rate == pytest.approx(5 / benchmark.duration) - assert benchmark.request_latency_distribution is not None - assert benchmark.request_latency == pytest.approx(1.5) - _test_percentiles(benchmark.request_latency_percentiles, 1.5) - assert benchmark.ttft_distribution is not None - assert benchmark.time_to_first_token == pytest.approx(500) - _test_percentiles(benchmark.time_to_first_token_percentiles, 500) - assert benchmark.itl_distribution is not None - assert benchmark.inter_token_latency == pytest.approx(450) - _test_percentiles(benchmark.inter_token_latency_percentiles, 450) - assert benchmark.output_token_throughput == pytest.approx(3.0 / 1.5, abs=0.01) - assert benchmark.prompt_token_distribution is not None - assert benchmark.prompt_token == pytest.approx(4.0) - _test_percentiles(benchmark.prompt_token_percentiles, 4.0) - assert benchmark.output_token_distribution is not None - assert benchmark.output_token == pytest.approx(3.0) - _test_percentiles(benchmark.output_token_percentiles, 3.0) - - -@pytest.mark.smoke() -def test_text_generation_benchmark_marshalling(): - benchmark = TextGenerationBenchmark(mode="asynchronous", rate=10) - for _ in range(5): - benchmark.request_started() - benchmark.request_completed(create_sample_result()) - - for _ in range(2): - benchmark.request_started() - benchmark.request_completed( - TextGenerationError( - request=create_sample_request(), message="Error message" - ) - ) - - serialized = benchmark.model_dump() - deserialized = TextGenerationBenchmark.model_validate(serialized) - - for key, value in vars(benchmark).items(): - assert getattr(deserialized, key) == value - - -@pytest.mark.smoke() -def test_text_generation_benchmark_report_initialization(): - report = TextGenerationBenchmarkReport( - benchmarks=[ - TextGenerationBenchmark(mode="asynchronous", rate=10), - TextGenerationBenchmark(mode="asynchronous", rate=20), - ], - args={ - "backend_type": "http", - "target": "http://example.com", - "model": "test-model", - }, - ) - assert len(report.benchmarks) == 2 - assert report.args == { - "backend_type": "http", - "target": "http://example.com", - "model": "test-model", - } - - -@pytest.mark.smoke() -def test_text_generation_benchmark_report_marshalling(): - report = TextGenerationBenchmarkReport( - benchmarks=[ - TextGenerationBenchmark(mode="asynchronous", rate=10), - TextGenerationBenchmark(mode="asynchronous", rate=20), - ], - args={ - "backend_type": "http", - "target": "http://example.com", - "model": "test-model", - }, - ) - serialized = report.model_dump() - deserialized = TextGenerationBenchmarkReport.model_validate(serialized) - - for key, value in vars(report).items(): - assert getattr(deserialized, key) == value diff --git a/tests/unit/core/test_serializable.py b/tests/unit/core/test_serializable.py deleted file mode 100644 index ce0cec8a..00000000 --- a/tests/unit/core/test_serializable.py +++ /dev/null @@ -1,151 +0,0 @@ -import tempfile -from pathlib import Path - -import pytest - -from guidellm.core.serializable import Serializable - - -class ExampleModel(Serializable): - name: str - age: int - - -@pytest.mark.smoke() -def test_serializable_json(): - # to json - example = ExampleModel(name="John Doe", age=30) - json_str = example.to_json() - assert '"name":"John Doe"' in json_str - assert '"age":30' in json_str - - # from json - example = ExampleModel.from_json(json_str) - assert example.name == "John Doe" - assert example.age == 30 - - -@pytest.mark.smoke() -def test_serializable_yaml(): - # to yaml - example = ExampleModel(name="John Doe", age=30) - yaml_str = example.to_yaml() - assert "name: John Doe" in yaml_str - assert "age: 30" in yaml_str - - # from yaml - example = ExampleModel.from_yaml(yaml_str) - assert example.name == "John Doe" - assert example.age == 30 - - -@pytest.mark.smoke() -def test_serializable_file_json(): - example = ExampleModel(name="John Doe", age=30) - with tempfile.TemporaryDirectory() as temp_dir: - file_path = Path(temp_dir) / "example.json" - saved_path = example.save_file(file_path, "json") - assert Path(saved_path).exists() - loaded_example = ExampleModel.load_file(saved_path) - assert loaded_example.name == "John Doe" - assert loaded_example.age == 30 - - -@pytest.mark.smoke() -def test_serializable_file_yaml(): - example = ExampleModel(name="John Doe", age=30) - with tempfile.TemporaryDirectory() as temp_dir: - file_path = Path(temp_dir) / "example.yaml" - saved_path = example.save_file(file_path, "yaml") - assert Path(saved_path).exists() - loaded_example = ExampleModel.load_file(saved_path) - assert loaded_example.name == "John Doe" - assert loaded_example.age == 30 - - -@pytest.mark.smoke() -def test_serializable_file_without_extension(): - example = ExampleModel(name="John Doe", age=30) - with tempfile.TemporaryDirectory() as temp_dir: - saved_path = example.save_file(temp_dir) - assert Path(saved_path).exists() - assert saved_path.endswith(".yaml") - loaded_example = ExampleModel.load_file(saved_path) - assert loaded_example.name == "John Doe" - assert loaded_example.age == 30 - - -@pytest.mark.sanity() -def test_serializable_file_with_directory_json(): - example = ExampleModel(name="John Doe", age=30) - with tempfile.TemporaryDirectory() as temp_dir: - saved_path = example.save_file(temp_dir, "json") - assert Path(saved_path).exists() - assert saved_path.endswith(".json") - loaded_example = ExampleModel.load_file(saved_path) - assert loaded_example.name == "John Doe" - assert loaded_example.age == 30 - - -@pytest.mark.sanity() -def test_serializable_file_with_directory_yaml(): - example = ExampleModel(name="John Doe", age=30) - with tempfile.TemporaryDirectory() as temp_dir: - saved_path = example.save_file(temp_dir, "yaml") - assert Path(saved_path).exists() - assert saved_path.endswith(".yaml") - loaded_example = ExampleModel.load_file(saved_path) - assert loaded_example.name == "John Doe" - assert loaded_example.age == 30 - - -@pytest.mark.sanity() -def test_serializable_file_infer_extension(): - example = ExampleModel(name="John Doe", age=30) - with tempfile.TemporaryDirectory() as temp_dir: - inferred_path = example.save_file(temp_dir, "json") - assert Path(inferred_path).exists() - assert inferred_path.endswith(".json") - loaded_example = ExampleModel.load_file(inferred_path) - assert loaded_example.name == "John Doe" - assert loaded_example.age == 30 - - -@pytest.mark.regression() -def test_serializable_file_invalid_extension(): - # to file - example = ExampleModel(name="John Doe", age=30) - with tempfile.TemporaryDirectory() as temp_dir: - invalid_file_path = Path(temp_dir) / "example.txt" - with pytest.raises(ValueError, match="Unsupported file extension.*"): - example.save_file(invalid_file_path) - - # to directory - with tempfile.TemporaryDirectory() as temp_dir: - invalid_file_path = Path(temp_dir) - with pytest.raises(ValueError, match="Unsupported file extension.*"): - example.save_file(invalid_file_path, type_="txt") # type: ignore - - # from file - with tempfile.TemporaryDirectory() as temp_dir: - invalid_file_path = Path(temp_dir) / "example.txt" - with invalid_file_path.open("w") as file: - file.write("invalid content") - with pytest.raises(ValueError, match="Unsupported file extension.*"): - ExampleModel.load_file(invalid_file_path) - - -@pytest.mark.regression() -def test_serializable_load_missing_path(): - with tempfile.TemporaryDirectory() as temp_dir: - invalid_file_path = Path(temp_dir) / "example.yaml" - with pytest.raises(FileNotFoundError): - ExampleModel.load_file(invalid_file_path) - - -@pytest.mark.regression() -def test_serializable_load_non_file_path(): - with tempfile.TemporaryDirectory() as temp_dir: - invalid_file_path = Path(temp_dir) - with pytest.raises(ValueError, match="Path is not a file.*"): - ExampleModel.load_file(invalid_file_path) diff --git a/tests/unit/executor/__init__.py b/tests/unit/executor/__init__.py deleted file mode 100644 index e69de29b..00000000 diff --git a/tests/unit/executor/test_executor.py b/tests/unit/executor/test_executor.py deleted file mode 100644 index 58c0a9d4..00000000 --- a/tests/unit/executor/test_executor.py +++ /dev/null @@ -1,542 +0,0 @@ -from typing import List, Optional, Union -from unittest.mock import create_autospec, patch - -import pytest - -from guidellm.backend import Backend -from guidellm.config import settings -from guidellm.core import ( - TextGenerationBenchmarkReport, -) -from guidellm.executor import ( - Executor, - ExecutorResult, - Profile, - ProfileGenerationMode, - ProfileGenerator, -) -from guidellm.request import RequestGenerator -from guidellm.scheduler import Scheduler, SchedulerResult - - -@pytest.fixture() -def mock_scheduler(): - with patch("guidellm.executor.executor.Scheduler") as mock_scheduler: - - def scheduler_constructor(*args, **kwargs): - mock_instance = create_autospec(Scheduler, instance=True) - mock_instance.args = args - mock_instance.kwargs = kwargs - num_requests = kwargs.get("max_number", 10) - - async def run(): - benchmark = create_autospec( - TextGenerationBenchmarkReport, instance=True - ) - benchmark.completed_request_rate = kwargs.get("rate", None) - yield SchedulerResult( - completed=False, - count_total=10, - count_completed=0, - benchmark=benchmark, - current_result=None, - ) - - for index in range(num_requests): - yield SchedulerResult( - completed=False, - count_total=10, - count_completed=index + 1, - benchmark=benchmark, - current_result=create_autospec( - TextGenerationBenchmarkReport, instance=True - ), - ) - - yield SchedulerResult( - completed=True, - count_total=num_requests, - count_completed=num_requests, - benchmark=benchmark, - current_result=None, - ) - - mock_instance.run.side_effect = run - - return mock_instance - - mock_scheduler.side_effect = scheduler_constructor - yield mock_scheduler - - -@pytest.mark.smoke() -def test_executor_result_instantiation(): - report = create_autospec(TextGenerationBenchmarkReport, instance=True) - scheduler_result = create_autospec(SchedulerResult, instance=True) - executor_result = ExecutorResult( - completed=True, - count_total=10, - count_completed=5, - generation_modes=["synchronous", "throughput", "constant"], - report=report, - scheduler_result=scheduler_result, - ) - - assert executor_result.completed is True - assert executor_result.count_total == 10 - assert executor_result.count_completed == 5 - assert executor_result.report == report - assert executor_result.scheduler_result == scheduler_result - - -@pytest.mark.smoke() -@pytest.mark.parametrize( - ("mode", "rate"), - [ - ("sweep", None), - ("synchronous", None), - ("throughput", None), - ("constant", 10), - ("constant", [10, 20, 30]), - ("poisson", 10), - ("poisson", [10, 20, 30]), - ], -) -def test_executor_instantiation(mode, rate): - backend = create_autospec(Backend, instance=True) - request_generator = create_autospec(RequestGenerator, instance=True) - executor = Executor( - backend=backend, - request_generator=request_generator, - mode=mode, - rate=rate, - max_number=100, - max_duration=60.0, - ) - - assert executor.backend == backend - assert executor.request_generator == request_generator - assert executor.profile_generator is not None - assert isinstance(executor.profile_generator, ProfileGenerator) - assert executor.profile_generator.mode == mode - assert ( - executor.profile_generator.rates == rate - if not rate or isinstance(rate, list) - else [rate] - ) - assert executor.max_number == 100 - assert executor.max_duration == 60.0 - - -def _check_executor_result_base( - result: ExecutorResult, - expected_completed: bool, - expected_count_total: int, - expected_count_completed: int, - expected_generation_modes: List[ProfileGenerationMode], -): - assert result.completed == expected_completed - assert result.count_total == expected_count_total - assert result.count_completed == expected_count_completed - assert result.generation_modes == expected_generation_modes - - -def _check_executor_result_report( - result: ExecutorResult, - mode: ProfileGenerationMode, - rate: Optional[Union[float, List[float]]], - max_number: Optional[int], - max_duration: Optional[float], - benchmarks_count: int, -): - assert result.report is not None - assert isinstance(result.report, TextGenerationBenchmarkReport) - - # check args - for expected in ( - "backend_type", - "target", - "model", - "data_type", - "data", - "tokenizer", - "mode", - "rate", - "max_number", - "max_duration", - ): - assert expected in result.report.args - - assert result.report.args["mode"] == mode - assert ( - result.report.args["rate"] == rate - if rate is None or not isinstance(rate, (float, int)) - else [rate] - ) - assert result.report.args["max_number"] == max_number - assert result.report.args["max_duration"] == max_duration - - # check benchmarks - assert len(result.report.benchmarks) == benchmarks_count - for benchmark in result.report.benchmarks: - assert isinstance(benchmark, TextGenerationBenchmarkReport) - - -def _check_executor_result_scheduler( - result: ExecutorResult, - expected_scheduler_result: bool, - expected_generation_modes: List[ProfileGenerationMode], - expected_index: Optional[int], - expected_profile_mode: Optional[ProfileGenerationMode], - expected_profile_rate: Optional[float], -): - if not expected_scheduler_result: - assert result.scheduler_result is None - assert result.current_index is None - assert result.current_profile is None - - return - - assert result.scheduler_result is not None - assert isinstance(result.scheduler_result, SchedulerResult) - assert result.current_index == expected_index - assert result.current_profile is not None - assert isinstance(result.current_profile, Profile) - assert result.current_profile.load_gen_mode == expected_profile_mode - assert result.current_profile.load_gen_rate == expected_profile_rate - assert ( - result.current_profile.load_gen_mode - == expected_generation_modes[expected_index] # type: ignore - ) - - -@pytest.mark.smoke() -@pytest.mark.asyncio() -async def test_executor_run_sweep(mock_scheduler): - num_requests = 15 - - backend = create_autospec(Backend, instance=True) - request_generator = create_autospec(RequestGenerator, instance=True) - executor = Executor( - backend=backend, - request_generator=request_generator, - mode="sweep", - rate=None, - max_number=num_requests, - ) - - num_profiles = 2 + settings.num_sweep_profiles - generation_modes = ["synchronous", "throughput"] + [ - "constant" - ] * settings.num_sweep_profiles - generation_rates = [None, None] + list(range(2, settings.num_sweep_profiles + 2)) - output_rates = [1, settings.num_sweep_profiles + 1] + list( - range(2, settings.num_sweep_profiles + 2) - ) - - iterator = executor.run() - - # Check start result - result = await iterator.__anext__() - _check_executor_result_base( - result=result, - expected_completed=False, - expected_count_total=num_profiles, - expected_count_completed=0, - expected_generation_modes=generation_modes, # type: ignore - ) - _check_executor_result_report( - result=result, - mode="sweep", - rate=None, - max_number=num_requests, - max_duration=None, - benchmarks_count=0, - ) - _check_executor_result_scheduler( - result=result, - expected_scheduler_result=False, - expected_generation_modes=generation_modes, # type: ignore - expected_index=None, - expected_profile_mode=None, - expected_profile_rate=None, - ) - - for scheduler_index in range(num_profiles): - for request_index in range(num_requests + 2): - result = await iterator.__anext__() - _check_executor_result_base( - result=result, - expected_completed=False, - expected_count_total=num_profiles, - expected_count_completed=scheduler_index - if request_index < num_requests + 1 - else scheduler_index + 1, - expected_generation_modes=generation_modes, # type: ignore - ) - _check_executor_result_report( - result=result, - mode="sweep", - rate=None, - max_number=num_requests, - max_duration=None, - benchmarks_count=scheduler_index - if request_index < num_requests + 1 - else scheduler_index + 1, - ) - _check_executor_result_scheduler( - result=result, - expected_scheduler_result=True, - expected_generation_modes=generation_modes, # type: ignore - expected_index=scheduler_index, - expected_profile_mode=generation_modes[scheduler_index], # type: ignore - expected_profile_rate=generation_rates[scheduler_index], - ) - # set the rate for the benchmark for sweep profile generation - result.report.benchmarks[-1].completed_request_rate = output_rates[ # type: ignore - scheduler_index - ] - result.report.benchmarks[-1].request_count = num_requests # type: ignore - - # Check end result - result = await iterator.__anext__() - _check_executor_result_base( - result=result, - expected_completed=True, - expected_count_total=num_profiles, - expected_count_completed=num_profiles, - expected_generation_modes=generation_modes, # type: ignore - ) - _check_executor_result_report( - result=result, - mode="sweep", - rate=None, - max_number=num_requests, - max_duration=None, - benchmarks_count=num_profiles, - ) - _check_executor_result_scheduler( - result=result, - expected_scheduler_result=False, - expected_generation_modes=generation_modes, # type: ignore - expected_index=None, - expected_profile_mode=None, - expected_profile_rate=None, - ) - - -@pytest.mark.smoke() -@pytest.mark.asyncio() -@pytest.mark.parametrize( - "mode", - [ - "synchronous", - "throughput", - ], -) -async def test_executor_run_non_rate_modes(mock_scheduler, mode): - num_requests = 15 - - backend = create_autospec(Backend, instance=True) - request_generator = create_autospec(RequestGenerator, instance=True) - executor = Executor( - backend=backend, - request_generator=request_generator, - mode=mode, - rate=None, - max_number=num_requests, - ) - - iterator = executor.run() - - # Check start result - result = await iterator.__anext__() - _check_executor_result_base( - result=result, - expected_completed=False, - expected_count_total=1, - expected_count_completed=0, - expected_generation_modes=[mode], - ) - _check_executor_result_report( - result=result, - mode=mode, - rate=None, - max_number=num_requests, - max_duration=None, - benchmarks_count=0, - ) - _check_executor_result_scheduler( - result=result, - expected_scheduler_result=False, - expected_generation_modes=[mode], - expected_index=None, - expected_profile_mode=None, - expected_profile_rate=None, - ) - - for request_index in range(num_requests + 2): - result = await iterator.__anext__() - _check_executor_result_base( - result=result, - expected_completed=False, - expected_count_total=1, - expected_count_completed=0 if request_index < num_requests + 1 else 1, - expected_generation_modes=[mode], - ) - _check_executor_result_report( - result=result, - mode=mode, - rate=None, - max_number=num_requests, - max_duration=None, - benchmarks_count=0 if request_index < num_requests + 1 else 1, - ) - _check_executor_result_scheduler( - result=result, - expected_scheduler_result=True, - expected_generation_modes=[mode], - expected_index=0, - expected_profile_mode=mode, - expected_profile_rate=None, - ) - - # Check end result - result = await iterator.__anext__() - _check_executor_result_base( - result=result, - expected_completed=True, - expected_count_total=1, - expected_count_completed=1, - expected_generation_modes=[mode], - ) - _check_executor_result_report( - result=result, - mode=mode, - rate=None, - max_number=num_requests, - max_duration=None, - benchmarks_count=1, - ) - _check_executor_result_scheduler( - result=result, - expected_scheduler_result=False, - expected_generation_modes=[mode], - expected_index=None, - expected_profile_mode=None, - expected_profile_rate=None, - ) - - -@pytest.mark.smoke() -@pytest.mark.asyncio() -@pytest.mark.parametrize( - ("mode", "rate"), - [ - ("constant", 10), - ("constant", [10, 20, 30]), - ("poisson", 10), - ("poisson", [10, 20, 30]), - ], -) -async def test_executor_run_rate_modes(mock_scheduler, mode, rate): - num_requests = 15 - - backend = create_autospec(Backend, instance=True) - request_generator = create_autospec(RequestGenerator, instance=True) - executor = Executor( - backend=backend, - request_generator=request_generator, - mode=mode, - rate=rate, - max_number=num_requests, - ) - - num_profiles = len(rate) if isinstance(rate, list) else 1 - generation_modes = [mode] * num_profiles - generation_rates = rate if isinstance(rate, list) else [rate] - - iterator = executor.run() - - # Check start result - result = await iterator.__anext__() - _check_executor_result_base( - result=result, - expected_completed=False, - expected_count_total=num_profiles, - expected_count_completed=0, - expected_generation_modes=generation_modes, - ) - _check_executor_result_report( - result=result, - mode=mode, - rate=rate, - max_number=num_requests, - max_duration=None, - benchmarks_count=0, - ) - _check_executor_result_scheduler( - result=result, - expected_scheduler_result=False, - expected_generation_modes=generation_modes, - expected_index=None, - expected_profile_mode=None, - expected_profile_rate=None, - ) - - for scheduler_index in range(num_profiles): - for request_index in range(num_requests + 2): - result = await iterator.__anext__() - _check_executor_result_base( - result=result, - expected_completed=False, - expected_count_total=num_profiles, - expected_count_completed=scheduler_index - if request_index < num_requests + 1 - else scheduler_index + 1, - expected_generation_modes=generation_modes, - ) - _check_executor_result_report( - result=result, - mode=mode, - rate=rate, - max_number=num_requests, - max_duration=None, - benchmarks_count=scheduler_index - if request_index < num_requests + 1 - else scheduler_index + 1, - ) - _check_executor_result_scheduler( - result=result, - expected_scheduler_result=True, - expected_generation_modes=generation_modes, - expected_index=scheduler_index, - expected_profile_mode=generation_modes[scheduler_index], - expected_profile_rate=generation_rates[scheduler_index], - ) - - # Check end result - result = await iterator.__anext__() - _check_executor_result_base( - result=result, - expected_completed=True, - expected_count_total=num_profiles, - expected_count_completed=num_profiles, - expected_generation_modes=generation_modes, - ) - _check_executor_result_report( - result=result, - mode=mode, - rate=rate, - max_number=num_requests, - max_duration=None, - benchmarks_count=num_profiles, - ) - _check_executor_result_scheduler( - result=result, - expected_scheduler_result=False, - expected_generation_modes=generation_modes, - expected_index=None, - expected_profile_mode=None, - expected_profile_rate=None, - ) diff --git a/tests/unit/executor/test_profile_generator.py b/tests/unit/executor/test_profile_generator.py deleted file mode 100644 index 9c91d574..00000000 --- a/tests/unit/executor/test_profile_generator.py +++ /dev/null @@ -1,204 +0,0 @@ -from typing import get_args -from unittest.mock import create_autospec - -import pytest - -from guidellm import settings -from guidellm.core import ( - TextGenerationBenchmark, - TextGenerationBenchmarkReport, -) -from guidellm.executor import Profile, ProfileGenerationMode, ProfileGenerator - - -@pytest.mark.smoke() -def test_profile_generator_mode(): - assert set(get_args(ProfileGenerationMode)) == { - "sweep", - "synchronous", - "throughput", - "constant", - "poisson", - } - - -@pytest.mark.smoke() -def test_profile_instantiation(): - profile = Profile(load_gen_mode="constant", load_gen_rate=10) - assert profile.load_gen_mode == "constant" - assert profile.load_gen_rate == 10 - assert profile.args == {} - - -@pytest.mark.smoke() -@pytest.mark.parametrize( - ("mode", "rate"), - [ - ("sweep", None), - ("synchronous", None), - ("throughput", None), - ("constant", 10), - ("constant", [10, 20, 30]), - ("poisson", 10), - ("poisson", [10, 20, 30]), - ], -) -def test_profile_generator_instantiation(mode, rate): - generator = ProfileGenerator(mode=mode, rate=rate) - assert generator.mode == mode - - if rate is None: - assert generator.rates is None - elif isinstance(rate, list): - assert generator.rates == rate - else: - assert generator.rates == [rate] - - if mode == "sweep": - assert len(generator) == settings.num_sweep_profiles + 2 - assert ( - generator.profile_generation_modes - == ["synchronous", "throughput"] - + ["constant"] * settings.num_sweep_profiles - ) - elif mode in ("throughput", "synchronous"): - assert len(generator) == 1 - assert generator.profile_generation_modes == [mode] - else: - assert len(generator) == len(rate) if isinstance(rate, list) else 1 - assert generator.profile_generation_modes == [mode] * ( - len(rate) if isinstance(rate, list) else 1 - ) - - assert generator.generated_count == 0 - - -@pytest.mark.sanity() -@pytest.mark.parametrize( - ("mode", "rate"), - [ - # invalid modes - ("invalid_mode", None), - # rates supplied for non-applicable modes - ("sweep", 10), - ("sweep", [10, 20, 30]), - ("synchronous", 10), - ("synchronous", [10, 20, 30]), - ("throughput", 10), - ("throughput", [10, 20, 30]), - # invalid rates supplied for applicable modes - ("constant", None), - ("constant", -1), - ("constant", 0), - ("poisson", None), - ("poisson", -1), - ("poisson", 0), - ], -) -def test_profile_generator_invalid_instantiation(mode, rate): - with pytest.raises(ValueError): - ProfileGenerator(mode=mode, rate=rate) - - -@pytest.mark.sanity() -def test_profile_generator_next_sweep(): - generator = ProfileGenerator(mode="sweep") - current_report = TextGenerationBenchmarkReport() - - for index in range(settings.num_sweep_profiles + 2): - profile: Profile = generator.next(current_report) # type: ignore - - if index == 0: - assert profile.load_gen_mode == "synchronous" - assert profile.load_gen_rate is None - mock_benchmark = create_autospec(TextGenerationBenchmark, instance=True) - mock_benchmark.completed_request_rate = 1 - current_report.add_benchmark(mock_benchmark) - elif index == 1: - assert profile.load_gen_mode == "throughput" - assert profile.load_gen_rate is None - mock_benchmark = create_autospec(TextGenerationBenchmark, instance=True) - mock_benchmark.completed_request_rate = 10 - current_report.add_benchmark(mock_benchmark) - else: - assert profile.load_gen_mode == "constant" - assert profile.load_gen_rate == index - - assert generator.generated_count == index + 1 - - for _ in range(3): - assert generator.next(current_report) is None - - -@pytest.mark.sanity() -def test_profile_generator_next_synchronous(): - generator = ProfileGenerator(mode="synchronous") - current_report = TextGenerationBenchmarkReport() - - profile: Profile = generator.next(current_report) # type: ignore - assert profile.load_gen_mode == "synchronous" - assert profile.load_gen_rate is None - assert generator.generated_count == 1 - - for _ in range(3): - assert generator.next(current_report) is None - - -@pytest.mark.sanity() -def test_profile_generator_next_throughput(): - generator = ProfileGenerator(mode="throughput") - current_report = TextGenerationBenchmarkReport() - - profile: Profile = generator.next(current_report) # type: ignore - assert profile.load_gen_mode == "throughput" - assert profile.load_gen_rate is None - assert generator.generated_count == 1 - - for _ in range(3): - assert generator.next(current_report) is None - - -@pytest.mark.sanity() -@pytest.mark.parametrize( - "rate", - [ - 10, - [10, 20, 30], - ], -) -def test_profile_generator_next_constant(rate): - generator = ProfileGenerator(mode="constant", rate=rate) - test_rates = rate if isinstance(rate, list) else [rate] - current_report = TextGenerationBenchmarkReport() - - for index, test_rate in enumerate(test_rates): - profile: Profile = generator.next(current_report) # type: ignore - assert profile.load_gen_mode == "constant" - assert profile.load_gen_rate == test_rate - assert generator.generated_count == index + 1 - - for _ in range(3): - assert generator.next(current_report) is None - - -@pytest.mark.sanity() -@pytest.mark.parametrize( - "rate", - [ - 10, - [10, 20, 30], - ], -) -def test_profile_generator_next_poisson(rate): - generator = ProfileGenerator(mode="poisson", rate=rate) - test_rates = rate if isinstance(rate, list) else [rate] - current_report = TextGenerationBenchmarkReport() - - for index, test_rate in enumerate(test_rates): - profile: Profile = generator.next(current_report) # type: ignore - assert profile.load_gen_mode == "poisson" - assert profile.load_gen_rate == test_rate - assert generator.generated_count == index + 1 - - for _ in range(3): - assert generator.next(current_report) is None diff --git a/tests/unit/mock_backend.py b/tests/unit/mock_backend.py index 9eb4d6ee..0e59e93e 100644 --- a/tests/unit/mock_backend.py +++ b/tests/unit/mock_backend.py @@ -36,10 +36,17 @@ def target(self) -> str: def model(self) -> Optional[str]: return self._model - def check_setup(self): + @property + def info(self) -> Dict[str, Any]: + return {} + + async def prepare_multiprocessing(self): + pass + + async def check_setup(self): pass - def available_models(self) -> List[str]: + async def available_models(self) -> List[str]: return [self.model] # type: ignore async def text_completions( # type: ignore @@ -97,24 +104,38 @@ async def _text_prompt_response_generator( yield StreamingTextResponse( type_="start", + value="", + start_time=start_time, + first_iter_time=None, iter_count=0, delta="", time=start_time, request_id=request_id, ) + first_iter_time = None + last_iter_time = None + for index, token in enumerate(tokens): if self._iter_delay: await asyncio.sleep(self._iter_delay) + if first_iter_time is None: + first_iter_time = time.time() + yield StreamingTextResponse( type_="iter", + value="".join(tokens[: index + 1]), + start_time=start_time, + first_iter_time=first_iter_time, iter_count=index + 1, delta=token, time=time.time(), request_id=request_id, ) + last_iter_time = time.time() + yield ResponseSummary( value="".join(tokens), request_args=RequestArgs( @@ -125,6 +146,8 @@ async def _text_prompt_response_generator( iterations=len(tokens), start_time=start_time, end_time=time.time(), + first_iter_time=first_iter_time, + last_iter_time=last_iter_time, request_prompt_tokens=prompt_token_count, request_output_tokens=output_token_count, response_prompt_tokens=len(prompt.split()) + prompt.count(" "), diff --git a/tests/dummy/data/__init__.py b/tests/unit/objects/__init__.py similarity index 100% rename from tests/dummy/data/__init__.py rename to tests/unit/objects/__init__.py diff --git a/tests/unit/objects/test_pydantic.py b/tests/unit/objects/test_pydantic.py new file mode 100644 index 00000000..a27fac5a --- /dev/null +++ b/tests/unit/objects/test_pydantic.py @@ -0,0 +1,43 @@ +import pytest +from pydantic import computed_field + +from guidellm.objects.pydantic import StandardBaseModel + + +class ExampleModel(StandardBaseModel): + name: str + age: int + + @computed_field # type: ignore[misc] + @property + def computed(self) -> str: + return self.name + " " + str(self.age) + + +@pytest.mark.smoke() +def test_standard_base_model_initialization(): + example = ExampleModel(name="John Doe", age=30) + assert example.name == "John Doe" + assert example.age == 30 + assert example.computed == "John Doe 30" + + +@pytest.mark.smoke() +def test_standard_base_model_invalid_initialization(): + with pytest.raises(ValueError): + ExampleModel(name="John Doe", age="thirty") # type: ignore[arg-type] + + +@pytest.mark.smoke() +def test_standard_base_model_marshalling(): + example = ExampleModel(name="John Doe", age=30) + serialized = example.model_dump() + assert serialized["name"] == "John Doe" + assert serialized["age"] == 30 + assert serialized["computed"] == "John Doe 30" + + serialized["computed"] = "Jane Doe 40" + deserialized = ExampleModel.model_validate(serialized) + assert deserialized.name == "John Doe" + assert deserialized.age == 30 + assert deserialized.computed == "John Doe 30" diff --git a/tests/unit/objects/test_statistics.py b/tests/unit/objects/test_statistics.py new file mode 100644 index 00000000..692db4b6 --- /dev/null +++ b/tests/unit/objects/test_statistics.py @@ -0,0 +1,693 @@ +import math +import time +from typing import List, Literal + +import numpy as np +import pytest + +from guidellm.objects import ( + DistributionSummary, + Percentiles, + RunningStats, + StatusDistributionSummary, + TimeRunningStats, +) + + +def create_default_percentiles() -> Percentiles: + return Percentiles( + p001=0.1, + p01=1.0, + p05=5.0, + p10=10.0, + p25=25.0, + p75=75.0, + p90=90.0, + p95=95.0, + p99=99.0, + p999=99.9, + ) + + +def create_default_distribution_summary() -> DistributionSummary: + return DistributionSummary( + mean=50.0, + median=50.0, + mode=50.0, + variance=835, + std_dev=math.sqrt(835), + min=0.0, + max=100.0, + count=1001, + total_sum=50050.0, + percentiles=create_default_percentiles(), + ) + + +@pytest.mark.smoke() +def test_percentiles_initialization(): + percentiles = create_default_percentiles() + assert percentiles.p001 == 0.1 + assert percentiles.p01 == 1.0 + assert percentiles.p05 == 5.0 + assert percentiles.p10 == 10.0 + assert percentiles.p25 == 25.0 + assert percentiles.p75 == 75.0 + assert percentiles.p90 == 90.0 + assert percentiles.p95 == 95.0 + assert percentiles.p99 == 99.0 + assert percentiles.p999 == 99.9 + + +@pytest.mark.smoke() +def test_percentiles_invalid_initialization(): + test_kwargs = { + "p001": 0.1, + "p01": 1.0, + "p05": 5.0, + "p10": 10.0, + "p25": 25.0, + "p75": 75.0, + "p90": 90.0, + "p95": 95.0, + "p99": 99.0, + "p999": 99.9, + } + test_missing_keys = list(test_kwargs.keys()) + + for missing_key in test_missing_keys: + kwargs = {key: val for key, val in test_kwargs.items() if key != missing_key} + with pytest.raises(ValueError): + Percentiles(**kwargs) + + +@pytest.mark.smoke() +def test_percentiles_marshalling(): + percentiles = create_default_percentiles() + serialized = percentiles.model_dump() + deserialized = Percentiles.model_validate(serialized) + + for key, value in vars(percentiles).items(): + assert getattr(deserialized, key) == value + + +@pytest.mark.smoke() +def test_distribution_summary_initilaization(): + distribution_summary = create_default_distribution_summary() + assert distribution_summary.mean == 50.0 + assert distribution_summary.median == 50.0 + assert distribution_summary.mode == 50.0 + assert distribution_summary.variance == 835 + assert distribution_summary.std_dev == math.sqrt(835) + assert distribution_summary.min == 0.0 + assert distribution_summary.max == 100.0 + assert distribution_summary.count == 1001 + assert distribution_summary.total_sum == 50050.0 + assert distribution_summary.percentiles.p001 == 0.1 + assert distribution_summary.percentiles.p01 == 1.0 + assert distribution_summary.percentiles.p05 == 5.0 + assert distribution_summary.percentiles.p10 == 10.0 + assert distribution_summary.percentiles.p25 == 25.0 + assert distribution_summary.percentiles.p75 == 75.0 + assert distribution_summary.percentiles.p90 == 90.0 + assert distribution_summary.percentiles.p95 == 95.0 + assert distribution_summary.percentiles.p99 == 99.0 + assert distribution_summary.percentiles.p999 == 99.9 + + +@pytest.mark.smoke() +def test_distribution_summary_invalid_initialization(): + test_kwargs = { + "mean": 50.0, + "median": 50.0, + "mode": 50.0, + "variance": 835, + "std_dev": math.sqrt(835), + "min": 0.0, + "max": 100.0, + "count": 1001, + "total_sum": 50050.0, + "percentiles": create_default_percentiles(), + } + test_missing_keys = list(test_kwargs.keys()) + for missing_key in test_missing_keys: + kwargs = {key: val for key, val in test_kwargs.items() if key != missing_key} + with pytest.raises(ValueError): + DistributionSummary(**kwargs) # type: ignore[arg-type] + + +@pytest.mark.smoke() +def test_distribution_summary_marshalling(): + distribution_summary = create_default_distribution_summary() + serialized = distribution_summary.model_dump() + deserialized = DistributionSummary.model_validate(serialized) + + for key, value in vars(distribution_summary).items(): + assert getattr(deserialized, key) == value + + +@pytest.mark.smoke() +def test_distribution_summary_from_distribution_function(): + values = [val / 10.0 for val in range(1001)] + distribution = [(val, 1.0) for val in values] + distribution_summary = DistributionSummary.from_distribution_function(distribution) + assert distribution_summary.mean == pytest.approx(np.mean(values)) + assert distribution_summary.median == pytest.approx(np.median(values)) + assert distribution_summary.mode == 0.0 + assert distribution_summary.variance == pytest.approx(np.var(values, ddof=0)) + assert distribution_summary.std_dev == pytest.approx(np.std(values, ddof=0)) + assert distribution_summary.min == min(values) + assert distribution_summary.max == max(values) + assert distribution_summary.count == len(values) + assert distribution_summary.total_sum == sum(values) + assert distribution_summary.percentiles.p001 == pytest.approx( + np.percentile(values, 0.1) + ) + assert distribution_summary.percentiles.p01 == pytest.approx( + np.percentile(values, 1.0) + ) + assert distribution_summary.percentiles.p05 == pytest.approx( + np.percentile(values, 5.0) + ) + assert distribution_summary.percentiles.p10 == pytest.approx( + np.percentile(values, 10.0) + ) + assert distribution_summary.percentiles.p25 == pytest.approx( + np.percentile(values, 25.0) + ) + assert distribution_summary.percentiles.p75 == pytest.approx( + np.percentile(values, 75.0) + ) + assert distribution_summary.percentiles.p90 == pytest.approx( + np.percentile(values, 90.0) + ) + assert distribution_summary.percentiles.p95 == pytest.approx( + np.percentile(values, 95.0) + ) + assert distribution_summary.percentiles.p99 == pytest.approx( + np.percentile(values, 99.0) + ) + assert distribution_summary.percentiles.p999 == pytest.approx( + np.percentile(values, 99.9) + ) + assert distribution_summary.cumulative_distribution_function is None + + distribution_summary_cdf = DistributionSummary.from_distribution_function( + distribution, include_cdf=True + ) + assert distribution_summary_cdf.cumulative_distribution_function is not None + assert len(distribution_summary_cdf.cumulative_distribution_function) == len(values) + + +def test_distribution_summary_from_values(): + values = [val / 10 for val in range(1001)] + distribution_summary = DistributionSummary.from_values(values) + assert distribution_summary.mean == pytest.approx(np.mean(values)) + assert distribution_summary.median == pytest.approx(np.median(values)) + assert distribution_summary.mode == 0.0 + assert distribution_summary.variance == pytest.approx(np.var(values, ddof=0)) + assert distribution_summary.std_dev == pytest.approx(np.std(values, ddof=0)) + assert distribution_summary.min == min(values) + assert distribution_summary.max == max(values) + assert distribution_summary.count == len(values) + assert distribution_summary.total_sum == sum(values) + assert distribution_summary.percentiles.p001 == pytest.approx( + np.percentile(values, 0.1) + ) + assert distribution_summary.percentiles.p01 == pytest.approx( + np.percentile(values, 1.0) + ) + assert distribution_summary.percentiles.p05 == pytest.approx( + np.percentile(values, 5.0) + ) + assert distribution_summary.percentiles.p10 == pytest.approx( + np.percentile(values, 10.0) + ) + assert distribution_summary.percentiles.p25 == pytest.approx( + np.percentile(values, 25.0) + ) + assert distribution_summary.percentiles.p75 == pytest.approx( + np.percentile(values, 75.0) + ) + assert distribution_summary.percentiles.p90 == pytest.approx( + np.percentile(values, 90.0) + ) + assert distribution_summary.percentiles.p95 == pytest.approx( + np.percentile(values, 95.0) + ) + assert distribution_summary.percentiles.p99 == pytest.approx( + np.percentile(values, 99.0) + ) + assert distribution_summary.percentiles.p999 == pytest.approx( + np.percentile(values, 99.9) + ) + assert distribution_summary.cumulative_distribution_function is None + + distribution_summary_weights = DistributionSummary.from_values( + values, weights=[2] * len(values) + ) + assert distribution_summary_weights.mean == pytest.approx(np.mean(values)) + assert distribution_summary_weights.median == pytest.approx(np.median(values)) + assert distribution_summary_weights.mode == 0.0 + assert distribution_summary_weights.variance == pytest.approx( + np.var(values, ddof=0) + ) + assert distribution_summary_weights.std_dev == pytest.approx(np.std(values, ddof=0)) + assert distribution_summary_weights.min == min(values) + assert distribution_summary_weights.max == max(values) + assert distribution_summary_weights.count == len(values) + assert distribution_summary_weights.total_sum == sum(values) + assert distribution_summary_weights.cumulative_distribution_function is None + + distribution_summary_cdf = DistributionSummary.from_values(values, include_cdf=True) + assert distribution_summary_cdf.cumulative_distribution_function is not None + assert len(distribution_summary_cdf.cumulative_distribution_function) == len(values) + + +def test_distribution_summary_from_request_times_concurrency(): + # create consistent timestamped values matching a rate of 10 per second + requests = [(val / 10, val / 10 + 1) for val in range(10001)] + distribution_summary = DistributionSummary.from_request_times( + requests, distribution_type="concurrency" + ) + assert distribution_summary.mean == pytest.approx(10.0, abs=0.01) + assert distribution_summary.median == pytest.approx(10.0) + assert distribution_summary.mode == 10.0 + assert distribution_summary.variance == pytest.approx(0, abs=0.1) + assert distribution_summary.std_dev == pytest.approx(0, abs=0.3) + assert distribution_summary.min == pytest.approx(1) + assert distribution_summary.max == pytest.approx(10.0) + assert distribution_summary.count == 10 + assert distribution_summary.total_sum == pytest.approx(55.0) + assert distribution_summary.percentiles.p001 == pytest.approx(10, abs=5) + assert distribution_summary.percentiles.p01 == pytest.approx(10) + assert distribution_summary.percentiles.p05 == pytest.approx(10) + assert distribution_summary.percentiles.p10 == pytest.approx(10) + assert distribution_summary.percentiles.p25 == pytest.approx(10) + assert distribution_summary.percentiles.p75 == pytest.approx(10) + assert distribution_summary.percentiles.p90 == pytest.approx(10) + assert distribution_summary.percentiles.p95 == pytest.approx(10) + assert distribution_summary.percentiles.p99 == pytest.approx(10) + assert distribution_summary.percentiles.p999 == pytest.approx(10) + assert distribution_summary.cumulative_distribution_function is None + + distribution_summary_cdf = DistributionSummary.from_request_times( + requests, distribution_type="concurrency", include_cdf=True + ) + assert distribution_summary_cdf.cumulative_distribution_function is not None + assert len(distribution_summary_cdf.cumulative_distribution_function) == 10 + + +def test_distribution_summary_from_request_times_rate(): + # create consistent timestamped values matching a rate of 10 per second + requests = [(val / 10, val / 10 + 1) for val in range(10001)] + distribution_summary = DistributionSummary.from_request_times( + requests, distribution_type="rate" + ) + assert distribution_summary.mean == pytest.approx(10.0, abs=0.01) + assert distribution_summary.median == pytest.approx(10.0) + assert distribution_summary.mode == pytest.approx(10.0) + assert distribution_summary.variance == pytest.approx(0, abs=0.1) + assert distribution_summary.std_dev == pytest.approx(0, abs=0.3) + assert distribution_summary.min == pytest.approx(1.0) + assert distribution_summary.max == pytest.approx(10.0) + assert distribution_summary.count == 12 + assert distribution_summary.total_sum == pytest.approx(111.0) + assert distribution_summary.percentiles.p001 == pytest.approx(10.0, abs=0.5) + assert distribution_summary.percentiles.p01 == pytest.approx(10.0) + assert distribution_summary.percentiles.p05 == pytest.approx(10.0) + assert distribution_summary.percentiles.p10 == pytest.approx(10.0) + assert distribution_summary.percentiles.p25 == pytest.approx(10.0) + assert distribution_summary.percentiles.p75 == pytest.approx(10.0) + assert distribution_summary.percentiles.p90 == pytest.approx(10.0) + assert distribution_summary.percentiles.p95 == pytest.approx(10.0) + assert distribution_summary.percentiles.p99 == pytest.approx(10.0) + assert distribution_summary.percentiles.p999 == pytest.approx(10.0) + assert distribution_summary.cumulative_distribution_function is None + + distribution_summary_cdf = DistributionSummary.from_request_times( + requests, distribution_type="rate", include_cdf=True + ) + assert distribution_summary_cdf.cumulative_distribution_function is not None + assert len(distribution_summary_cdf.cumulative_distribution_function) == 12 + + +def test_distribution_summary_from_iterable_request_times(): + # create consistent timestamped values matching a rate of 10 per second + requests = [(val / 10, val / 10 + 1) for val in range(10001)] + # create 9 iterations for each request with first iter at start + 0.1 + # and spaced at 0.1 seconds apart + first_iter_times = [val / 10 + 0.1 for val in range(10001)] + iter_counts = [9 for _ in range(10001)] + first_iter_counts = [1 for _ in range(10001)] + + distribution_summary = DistributionSummary.from_iterable_request_times( + requests, first_iter_times, iter_counts, first_iter_counts + ) + assert distribution_summary.mean == pytest.approx(90.0, abs=0.1) + assert distribution_summary.median == pytest.approx(80.0) + assert distribution_summary.mode == pytest.approx(80.0) + assert distribution_summary.variance == pytest.approx(704.463, abs=0.001) + assert distribution_summary.std_dev == pytest.approx(26.541, abs=0.001) + assert distribution_summary.min == pytest.approx(0.0) + assert distribution_summary.max == pytest.approx(160.0) + assert distribution_summary.count == 44 + assert distribution_summary.total_sum == pytest.approx(3538.85, abs=0.01) + assert distribution_summary.percentiles.p001 == pytest.approx(80.0) + assert distribution_summary.percentiles.p01 == pytest.approx(80.0) + assert distribution_summary.percentiles.p05 == pytest.approx(80.0) + assert distribution_summary.percentiles.p10 == pytest.approx(80.0) + assert distribution_summary.percentiles.p25 == pytest.approx(80.0) + assert distribution_summary.percentiles.p75 == pytest.approx(80.0) + assert distribution_summary.percentiles.p90 == pytest.approx(160.0) + assert distribution_summary.percentiles.p95 == pytest.approx(160.0) + assert distribution_summary.percentiles.p99 == pytest.approx(160.0) + assert distribution_summary.percentiles.p999 == pytest.approx(160.0) + assert distribution_summary.cumulative_distribution_function is None + + distribution_summary_cdf = DistributionSummary.from_iterable_request_times( + requests, first_iter_times, iter_counts, first_iter_counts, include_cdf=True + ) + assert distribution_summary_cdf.cumulative_distribution_function is not None + assert len(distribution_summary_cdf.cumulative_distribution_function) == 44 + + +def test_status_distribution_summary_initialization(): + status_distribution_summary = StatusDistributionSummary( + total=create_default_distribution_summary(), + successful=create_default_distribution_summary(), + incomplete=create_default_distribution_summary(), + errored=create_default_distribution_summary(), + ) + assert status_distribution_summary.total.mean == 50.0 + assert status_distribution_summary.successful.mean == 50.0 + assert status_distribution_summary.incomplete.mean == 50.0 + assert status_distribution_summary.errored.mean == 50.0 + + +def test_status_distribution_summary_marshalling(): + status_distribution_summary = StatusDistributionSummary( + total=create_default_distribution_summary(), + successful=create_default_distribution_summary(), + incomplete=create_default_distribution_summary(), + errored=create_default_distribution_summary(), + ) + serialized = status_distribution_summary.model_dump() + deserialized = StatusDistributionSummary.model_validate(serialized) + + for key, value in vars(status_distribution_summary).items(): + for child_key, child_value in vars(value).items(): + assert getattr(getattr(deserialized, key), child_key) == child_value + + +def test_status_distribution_summary_from_values(): + value_types: List[Literal["successful", "incomplete", "error"]] = [ + "successful", + "incomplete", + "error", + ] * 1000 + values = [float(val % 3) for val in range(3000)] + status_distribution_summary = StatusDistributionSummary.from_values( + value_types, values + ) + assert status_distribution_summary.total.count == len(values) + assert status_distribution_summary.total.mean == pytest.approx(np.mean(values)) + assert status_distribution_summary.total.cumulative_distribution_function is None + assert status_distribution_summary.successful.mean == pytest.approx( + np.mean( + [val for ind, val in enumerate(values) if value_types[ind] == "successful"] + ) + ) + assert status_distribution_summary.successful.count == len( + [val for ind, val in enumerate(values) if value_types[ind] == "successful"] + ) + assert ( + status_distribution_summary.successful.cumulative_distribution_function is None + ) + assert status_distribution_summary.incomplete.mean == pytest.approx( + np.mean( + [val for ind, val in enumerate(values) if value_types[ind] == "incomplete"] + ) + ) + assert status_distribution_summary.incomplete.count == len( + [val for ind, val in enumerate(values) if value_types[ind] == "incomplete"] + ) + assert ( + status_distribution_summary.incomplete.cumulative_distribution_function is None + ) + assert status_distribution_summary.errored.mean == pytest.approx( + np.mean([val for ind, val in enumerate(values) if value_types[ind] == "error"]) + ) + assert status_distribution_summary.errored.count == len( + [val for ind, val in enumerate(values) if value_types[ind] == "error"] + ) + assert status_distribution_summary.errored.cumulative_distribution_function is None + + status_distribution_summary_cdf = StatusDistributionSummary.from_values( + value_types, values, include_cdf=True + ) + assert ( + status_distribution_summary_cdf.total.cumulative_distribution_function + is not None + ) + assert ( + status_distribution_summary_cdf.successful.cumulative_distribution_function + is not None + ) + assert ( + status_distribution_summary_cdf.incomplete.cumulative_distribution_function + is not None + ) + assert ( + status_distribution_summary_cdf.errored.cumulative_distribution_function + is not None + ) + + +def test_status_distribution_summary_from_request_times(): + request_types: List[Literal["successful", "incomplete", "error"]] = [ + "successful", + "incomplete", + "error", + ] * 1000 + requests = [((val % 3) / 10, (val % 3) / 10 + 1) for val in range(3000)] + status_distribution_summary = StatusDistributionSummary.from_request_times( + request_types, requests, distribution_type="concurrency" + ) + assert status_distribution_summary.total.mean == pytest.approx(2500.0, abs=0.01) + assert status_distribution_summary.total.cumulative_distribution_function is None + assert status_distribution_summary.successful.mean == pytest.approx( + 1000.0, abs=0.01 + ) + assert ( + status_distribution_summary.successful.cumulative_distribution_function is None + ) + assert status_distribution_summary.incomplete.mean == pytest.approx( + 1000.0, abs=0.01 + ) + assert ( + status_distribution_summary.incomplete.cumulative_distribution_function is None + ) + assert status_distribution_summary.errored.mean == pytest.approx(1000.0, abs=0.01) + assert status_distribution_summary.errored.cumulative_distribution_function is None + + status_distribution_summary_cdf = StatusDistributionSummary.from_request_times( + request_types, requests, distribution_type="concurrency", include_cdf=True + ) + assert ( + status_distribution_summary_cdf.total.cumulative_distribution_function + is not None + ) + assert ( + status_distribution_summary_cdf.successful.cumulative_distribution_function + is not None + ) + assert ( + status_distribution_summary_cdf.incomplete.cumulative_distribution_function + is not None + ) + assert ( + status_distribution_summary_cdf.errored.cumulative_distribution_function + is not None + ) + + +def test_status_distribution_summary_from_iterable_request_times(): + request_types: List[Literal["successful", "incomplete", "error"]] = [ + "successful", + "incomplete", + "error", + ] * 1000 + requests = [(val % 3 / 10, val % 3 / 10 + 1) for val in range(3000)] + first_iter_times = [val % 3 / 10 + 0.1 for val in range(3000)] + iter_counts = [9 for _ in range(3000)] + first_iter_counts = [1 for _ in range(3000)] + status_distribution_summary = StatusDistributionSummary.from_iterable_request_times( + request_types, + requests, + first_iter_times, + iter_counts, + first_iter_counts, + ) + assert status_distribution_summary.total.mean == pytest.approx(21666.66, abs=0.01) + assert status_distribution_summary.total.cumulative_distribution_function is None + assert status_distribution_summary.successful.mean == pytest.approx( + 8000.0, abs=0.01 + ) + assert ( + status_distribution_summary.successful.cumulative_distribution_function is None + ) + assert status_distribution_summary.incomplete.mean == pytest.approx( + 8000.0, abs=0.01 + ) + assert ( + status_distribution_summary.incomplete.cumulative_distribution_function is None + ) + assert status_distribution_summary.errored.mean == pytest.approx(8000.0, abs=0.01) + assert status_distribution_summary.errored.cumulative_distribution_function is None + + status_distribution_summary_cdf = ( + StatusDistributionSummary.from_iterable_request_times( + request_types, + requests, + first_iter_times, + iter_counts, + first_iter_counts, + include_cdf=True, + ) + ) + assert ( + status_distribution_summary_cdf.total.cumulative_distribution_function + is not None + ) + assert ( + status_distribution_summary_cdf.successful.cumulative_distribution_function + is not None + ) + assert ( + status_distribution_summary_cdf.incomplete.cumulative_distribution_function + is not None + ) + assert ( + status_distribution_summary_cdf.errored.cumulative_distribution_function + is not None + ) + + +def test_running_stats_initialization(): + running_stats = RunningStats() + assert running_stats.start_time == pytest.approx(time.time(), abs=0.01) + assert running_stats.count == 0 + assert running_stats.total == 0 + assert running_stats.last == 0 + assert running_stats.mean == 0 + assert running_stats.rate == 0 + + +def test_running_stats_marshalling(): + running_stats = RunningStats() + serialized = running_stats.model_dump() + deserialized = RunningStats.model_validate(serialized) + + for key, value in vars(running_stats).items(): + assert getattr(deserialized, key) == value + + +def test_running_stats_update(): + running_stats = RunningStats() + running_stats.update(1) + assert running_stats.count == 1 + assert running_stats.total == 1 + assert running_stats.last == 1 + assert running_stats.mean == 1 + time.sleep(1.0) + assert running_stats.rate == pytest.approx( + 1.0 / (time.time() - running_stats.start_time), abs=0.1 + ) + + running_stats.update(2) + assert running_stats.count == 2 + assert running_stats.total == 3 + assert running_stats.last == 2 + assert running_stats.mean == 1.5 + time.sleep(1) + assert running_stats.rate == pytest.approx( + 3 / (time.time() - running_stats.start_time), abs=0.1 + ) + + +def test_running_stats_add(): + running_stats = RunningStats() + mean = running_stats + 1 + assert mean == 1 + assert mean == running_stats.mean + assert running_stats.count == 1 + assert running_stats.total == 1 + assert running_stats.last == 1 + + +def test_running_stats_iadd(): + running_stats = RunningStats() + running_stats += 1 + assert running_stats.count == 1 + assert running_stats.total == 1 + assert running_stats.last == 1 + assert running_stats.mean == 1 + + +def test_time_running_stats_initialization(): + time_running_stats = TimeRunningStats() + assert time_running_stats.start_time == pytest.approx(time.time(), abs=0.01) + assert time_running_stats.count == 0 + assert time_running_stats.total == 0 + assert time_running_stats.last == 0 + assert time_running_stats.mean == 0 + assert time_running_stats.rate == 0 + assert time_running_stats.total_ms == 0 + assert time_running_stats.last_ms == 0 + assert time_running_stats.mean_ms == 0 + assert time_running_stats.rate_ms == 0 + + +def test_time_running_stats_marshalling(): + time_running_stats = TimeRunningStats() + serialized = time_running_stats.model_dump() + deserialized = TimeRunningStats.model_validate(serialized) + + for key, value in vars(time_running_stats).items(): + assert getattr(deserialized, key) == value + + +def test_time_running_stats_update(): + time_running_stats = TimeRunningStats() + time_running_stats.update(1) + assert time_running_stats.count == 1 + assert time_running_stats.total == 1 + assert time_running_stats.last == 1 + assert time_running_stats.mean == 1 + assert time_running_stats.total_ms == 1000 + assert time_running_stats.last_ms == 1000 + assert time_running_stats.mean_ms == 1000 + time.sleep(1.0) + assert time_running_stats.rate == pytest.approx( + 1.0 / (time.time() - time_running_stats.start_time), abs=0.1 + ) + assert time_running_stats.rate_ms == pytest.approx( + 1000 / (time.time() - time_running_stats.start_time), abs=0.1 + ) + + time_running_stats.update(2) + assert time_running_stats.count == 2 + assert time_running_stats.total == 3 + assert time_running_stats.last == 2 + assert time_running_stats.mean == 1.5 + assert time_running_stats.total_ms == 3000 + assert time_running_stats.last_ms == 2000 + assert time_running_stats.mean_ms == 1500 + time.sleep(1) + assert time_running_stats.rate == pytest.approx( + 3 / (time.time() - time_running_stats.start_time), abs=0.1 + ) + assert time_running_stats.rate_ms == pytest.approx( + 3000 / (time.time() - time_running_stats.start_time), abs=0.1 + ) diff --git a/tests/unit/request/__init__.py b/tests/unit/request/__init__.py deleted file mode 100644 index e69de29b..00000000 diff --git a/tests/unit/request/test_base.py b/tests/unit/request/test_base.py deleted file mode 100644 index 73cf1b14..00000000 --- a/tests/unit/request/test_base.py +++ /dev/null @@ -1,160 +0,0 @@ -import re -import time -from typing import List -from unittest.mock import MagicMock, Mock, patch - -import pytest - -from guidellm.core import TextGenerationRequest -from tests.dummy.services import TestRequestGenerator - - -@pytest.mark.smoke() -def test_request_generator_sync_constructor(mock_auto_tokenizer): - generator = TestRequestGenerator(mode="sync", tokenizer="mock-tokenizer") - assert generator.mode == "sync" - assert generator.async_queue_size == 50 # Default value - - -@pytest.mark.smoke() -def test_request_generator_async_constructor(mock_auto_tokenizer): - generator = TestRequestGenerator( - mode="async", tokenizer="mock-tokenizer", async_queue_size=10 - ) - assert generator.mode == "async" - assert generator.async_queue_size == 10 - generator.stop() - - -@pytest.mark.smoke() -def test_request_generator_sync_iter(mock_auto_tokenizer): - generator = TestRequestGenerator(mode="sync", tokenizer="mock-tokenizer") - items = [] - for item in generator: - items.append(item) - if len(items) == 5: - break - - assert len(items) == 5 - assert items[0].prompt == "Test prompt" - - -@pytest.mark.smoke() -def test_request_generator_async_iter(mock_auto_tokenizer): - generator = TestRequestGenerator(mode="async", tokenizer="mock-tokenizer") - items = [] - for item in generator: - items.append(item) - if len(items) == 5: - break - - generator.stop() - assert len(items) == 5 - assert items[0].prompt == "Test prompt" - - -@pytest.mark.smoke() -def test_request_generator_iter_calls_create_item(mock_auto_tokenizer): - generator = TestRequestGenerator(mode="sync", tokenizer="mock-tokenizer") - generator.create_item = Mock( # type: ignore - return_value=TextGenerationRequest(prompt="Mock prompt"), - ) - - items = [] - for item in generator: - items.append(item) - if len(items) == 5: - break - - assert len(items) == 5 - generator.create_item.assert_called() - - -@pytest.mark.smoke() -def test_request_generator_async_iter_calls_create_item(mock_auto_tokenizer): - generator = TestRequestGenerator(mode="sync", tokenizer="mock-tokenizer") - generator.create_item = Mock( # type: ignore - return_value=TextGenerationRequest(prompt="Mock prompt"), - ) - - items = [] - for item in generator: - items.append(item) - if len(items) == 5: - break - - generator.stop() - assert len(items) == 5 - generator.create_item.assert_called() - - -@pytest.mark.sanity() -def test_request_generator_repr(mock_auto_tokenizer): - generator = TestRequestGenerator( - mode="sync", tokenizer="mock-tokenizer", async_queue_size=100 - ) - repr_str = repr(generator) - assert repr_str.startswith("RequestGenerator(") - assert "mode=sync" in repr_str - assert "async_queue_size=100" in repr_str - assert "tokenizer= List[int]: - tokens = re.findall(r"\w+|[^\w\s]", text) - return [0] * len(tokens) - - mock_tokenizer = MagicMock() - mock_tokenizer.tokenize = MagicMock(side_effect=_fake_tokenize) - - generator = TestRequestGenerator(tokenizer=mock_tokenizer) - assert generator.tokenizer == mock_tokenizer - - with patch( - "guidellm.request.base.AutoTokenizer", - ) as MockAutoTokenizer: # noqa: N806 - MockAutoTokenizer.from_pretrained.return_value = mock_tokenizer - generator = TestRequestGenerator(tokenizer="mock-tokenizer") - assert generator.tokenizer == mock_tokenizer - MockAutoTokenizer.from_pretrained.assert_called_with("mock-tokenizer") - - -@pytest.mark.regression() -def test_request_generator_populate_queue(mock_auto_tokenizer): - generator = TestRequestGenerator( - mode="async", tokenizer="mock-tokenizer", async_queue_size=2 - ) - generator.create_item = Mock( # type: ignore - return_value=TextGenerationRequest(prompt="Mock prompt") - ) - - time.sleep(0.2) # Allow some time for the queue to populate - generator.stop() - assert generator._queue.qsize() > 0 - - -@pytest.mark.regression() -def test_request_generator_async_stop_during_population(mock_auto_tokenizer): - generator = TestRequestGenerator( - mode="async", tokenizer="mock-tokenizer", async_queue_size=2 - ) - generator.create_item = Mock( # type: ignore - return_value=TextGenerationRequest(prompt="Mock prompt") - ) - - time.sleep(0.1) # Allow some time for the queue to start populating - generator.stop() - - # Ensure the stop event is set and thread is no longer alive - assert generator._stop_event.is_set() - assert not generator._thread.is_alive() diff --git a/tests/unit/request/test_emulated.py b/tests/unit/request/test_emulated.py deleted file mode 100644 index f6af1301..00000000 --- a/tests/unit/request/test_emulated.py +++ /dev/null @@ -1,373 +0,0 @@ -import json -import tempfile -from pathlib import Path -from typing import Tuple, Union - -import numpy as np -import pytest -from transformers import PreTrainedTokenizer # type: ignore - -from guidellm.core.request import TextGenerationRequest -from guidellm.request.emulated import ( - EmulatedConfig, - EmulatedRequestGenerator, - EndlessTokens, -) - - -@pytest.mark.smoke() -def test_emulated_config_construction(): - config = EmulatedConfig( - prompt_tokens=10, - prompt_tokens_variance=2, - prompt_tokens_min=5, - prompt_tokens_max=15, - generated_tokens=20, - generated_tokens_variance=4, - generated_tokens_min=10, - generated_tokens_max=30, - ) - assert config.prompt_tokens == 10 - assert config.prompt_tokens_variance == 2 - assert config.prompt_tokens_min == 5 - assert config.prompt_tokens_max == 15 - assert config.generated_tokens == 20 - assert config.generated_tokens_variance == 4 - assert config.generated_tokens_min == 10 - assert config.generated_tokens_max == 30 - - -@pytest.mark.smoke() -def test_emulated_config_create_dict(): - config_dict = { - "prompt_tokens": 10, - "prompt_tokens_variance": 2, - "prompt_tokens_min": 5, - "prompt_tokens_max": 15, - "generated_tokens": 20, - "generated_tokens_variance": 4, - "generated_tokens_min": 10, - "generated_tokens_max": 30, - } - config = EmulatedConfig.create_config(config_dict) - assert config.prompt_tokens == 10 - assert config.prompt_tokens_variance == 2 - assert config.prompt_tokens_min == 5 - assert config.prompt_tokens_max == 15 - assert config.generated_tokens == 20 - assert config.generated_tokens_variance == 4 - assert config.generated_tokens_min == 10 - assert config.generated_tokens_max == 30 - - -@pytest.mark.smoke() -@pytest.mark.parametrize( - ("base", "variance", "min_tokens", "max_tokens", "expected_range"), - [ - (10, 2, None, None, (1, 10 + 5 * 2)), - (10, 2, 5, 15, (5, 15)), - (10, None, 5, 15, (5, 15)), - (10, 2, 1, None, (1, 10 + 5 * 2)), - ], -) -def test_emulated_config_token_range( - base: int, - variance: int, - min_tokens: int, - max_tokens: int, - expected_range: Tuple[int, int], -): - assert ( - EmulatedConfig._token_range(base, variance, min_tokens, max_tokens) - == expected_range - ) - - -@pytest.mark.smoke() -@pytest.mark.parametrize( - ("base", "variance", "min_tokens", "max_tokens", "expected_range"), - [ - (10, None, None, None, (10, 10)), - (10, 5, None, None, (1, 10 + 5 * 2)), - (10, 5, 5, 15, (5, 15)), - (10, None, 5, 15, (5, 15)), - (10, 5, 2, None, (2, 10 + 5 * 2)), - (10, 5, None, 20, (1, 20)), - ], -) -def test_emulated_config_sample_tokens( - base: int, - variance: int, - min_tokens: int, - max_tokens: int, - expected_range: Tuple[int, int], -): - rng = np.random.default_rng() - - for _ in range(100): - token_count = EmulatedConfig._sample_tokens( - base, variance, min_tokens, max_tokens, rng - ) - assert token_count >= expected_range[0] - assert token_count <= expected_range[1] - - -@pytest.mark.sanity() -def test_emulated_config_create(): - test_dict = { - "prompt_tokens": 10, - "prompt_tokens_variance": 2, - "prompt_tokens_min": 5, - "prompt_tokens_max": 15, - "generated_tokens": 20, - "generated_tokens_variance": 4, - "generated_tokens_min": 10, - "generated_tokens_max": 30, - } - compare_config = EmulatedConfig(**test_dict) - - # test dict - test_config = EmulatedConfig.create_config(test_dict) - assert ( - test_config == compare_config - ), f"Dictionary creation failed: {test_config} != {compare_config}" - - # test json str - test_config = EmulatedConfig.create_config(json.dumps(test_dict)) - assert ( - test_config == compare_config - ), f"JSON string creation failed: {test_config} != {compare_config}" - - # test json file str path - with tempfile.TemporaryDirectory() as temp_dir: - test_path = Path(temp_dir) / "test.json" - test_path.write_text(json.dumps(test_dict)) - test_config = EmulatedConfig.create_config(str(test_path)) - assert ( - test_config == compare_config - ), f"JSON file path creation failed: {test_config} != {compare_config}" - - # test json file Path object - with tempfile.TemporaryDirectory() as temp_dir: - test_path = Path(temp_dir) / "test.json" - test_path.write_text(json.dumps(test_dict)) - test_config = EmulatedConfig.create_config(test_path) - assert ( - test_config == compare_config - ), f"JSON file Path object creation failed: {test_config} != {compare_config}" - - # test key value string - test_str = ( - f"prompt_tokens={test_dict['prompt_tokens']}, " - f"prompt_tokens_variance={test_dict['prompt_tokens_variance']}, " - f"prompt_tokens_min={test_dict['prompt_tokens_min']}, " - f"prompt_tokens_max={test_dict['prompt_tokens_max']}, " - f"generated_tokens={test_dict['generated_tokens']}, " - f"generated_tokens_variance={test_dict['generated_tokens_variance']}, " - f"generated_tokens_min={test_dict['generated_tokens_min']}, " - f"generated_tokens_max={test_dict['generated_tokens_max']}" - ) - test_config = EmulatedConfig.create_config(test_str) - assert ( - test_config == compare_config - ), f"Key value string creation failed: {test_config} != {compare_config}" - - -# EndlessTokens - - -@pytest.mark.smoke() -@pytest.mark.parametrize( - ("data", "expected_words", "expected_indices"), - [ - ( - "word1 word2 word3\nword4 word5", - ["word1", "word2", "word3", "word4", "word5"], - [0, 3], - ), - ( - "word1 word2\n word3 word4\n word5", - ["word1", "word2", "word3", "word4", "word5"], - [0, 2, 4], - ), - ], -) -def test_endless_data_words_construction(data, expected_words, expected_indices): - tokens = EndlessTokens(data) - assert tokens == expected_words - assert tokens.line_indices == expected_indices - - -@pytest.mark.smoke() -def test_endless_data_words_create_from_basic_file(): - with tempfile.TemporaryDirectory() as temp_dir: - file_path = Path(temp_dir) / "test.txt" - file_path.write_text("word1 word2 word3\nword4 word5") - - tokens = EndlessTokens(file_path) - assert tokens == ["word1", "word2", "word3", "word4", "word5"] - assert tokens.line_indices == [0, 3] - - tokens = EndlessTokens(str(file_path)) - assert tokens == ["word1", "word2", "word3", "word4", "word5"] - assert tokens.line_indices == [0, 3] - - -@pytest.mark.smoke() -@pytest.mark.parametrize( - ("data", "start", "length", "expected_text"), - [ - ("word1 word2 word3 word4", 0, 2, "word1 word2"), - ("word1 word2\nword3 word4", 1, 2, "word2\nword3"), - ( - "word1 word2\nword3 word4", - 1, - 6, - "word2\nword3 word4 word1 word2\nword3", - ), - ], -) -def test_endless_data_words_create_text(data, start, length, expected_text): - words = EndlessTokens(data) - text = words.create_text(start, length) - assert text == expected_text - - -# EmulatedRequestGenerator - - -@pytest.mark.smoke() -def test_emulated_request_generator_construction(mocker, mock_auto_tokenizer): - mocker.patch( - "guidellm.request.emulated.EmulatedConfig.create_config", - return_value=EmulatedConfig(prompt_tokens=10), - ) - mocker.patch( - "guidellm.request.emulated.EndlessTokens", - return_value=EndlessTokens("word1 word2"), - ) - generator = EmulatedRequestGenerator( - config="mock_config", tokenizer="mock-tokenizer", mode="sync" - ) - assert isinstance(generator._config, EmulatedConfig) - assert isinstance(generator._tokens, EndlessTokens) - - -@pytest.mark.smoke() -def test_emulated_request_generator_create_item(mocker): - mocker.patch( - "guidellm.request.emulated.EndlessTokens", - return_value=EndlessTokens("word1 word2"), - ) - mock_tokenizer = mocker.Mock(PreTrainedTokenizer) - mock_tokenizer.tokenize.return_value = ["word1", "word2"] - generator = EmulatedRequestGenerator( - config={ - "prompt_tokens": 10, - }, - tokenizer=mock_tokenizer, - mode="sync", - ) - item = generator.create_item() - assert isinstance(item, TextGenerationRequest) - - -@pytest.mark.smoke() -def test_emulated_request_generator_sample_prompt(mocker, mock_auto_tokenizer): - mocker.patch( - "guidellm.request.emulated.EndlessTokens", - return_value=EndlessTokens("word1 word2"), - ) - generator = EmulatedRequestGenerator( - config={"prompt_tokens": 3}, tokenizer="mock-tokenizer", mode="sync" - ) - prompt = generator.sample_prompt(3) - assert prompt == "word1 word2 word1" - - request = generator.create_item() - assert request.prompt_token_count == 3 - - -@pytest.mark.smoke() -def test_emulated_request_generator_random_seed(mocker, mock_auto_tokenizer): - mocker.patch( - "guidellm.request.emulated.EndlessTokens", - return_value=EndlessTokens("word1 word2"), - ) - - rand_gen = EmulatedRequestGenerator( - config={"prompt_tokens": 20, "prompt_tokens_variance": 10}, - tokenizer="mock-tokenizer", - random_seed=42, - mode="sync", - ) - rand_gen_comp_pos = EmulatedRequestGenerator( - config={"prompt_tokens": 20, "prompt_tokens_variance": 10}, - tokenizer="mock-tokenizer", - random_seed=42, - mode="sync", - ) - rand_gen_comp_neg = EmulatedRequestGenerator( - config={"prompt_tokens": 20, "prompt_tokens_variance": 10}, - tokenizer="mock-tokenizer", - random_seed=43, - mode="sync", - ) - - assert rand_gen.create_item().prompt == rand_gen_comp_pos.create_item().prompt - assert rand_gen.create_item().prompt != rand_gen_comp_neg.create_item().prompt - - -@pytest.mark.regression() -@pytest.mark.parametrize( - ("config_type", "config"), - [ - ("dict", {"prompt_tokens": 10, "generated_tokens": 20}), - ("dict", {"prompt_tokens": 10, "prompt_tokens_variance": 2}), - ( - "dict", - { - "prompt_tokens": 10, - "prompt_tokens_min": 5, - "prompt_tokens_max": 15, - "generated_tokens": 20, - }, - ), - ("json_str", json.dumps({"prompt_tokens": 10, "generated_tokens": 20})), - ("key_value_str", "prompt_tokens=10, generated_tokens=20"), - ("file_str", json.dumps({"prompt_tokens": 10, "generated_tokens": 20})), - ("file_path", json.dumps({"prompt_tokens": 10, "generated_tokens": 20})), - ], -) -def test_emulated_request_generator_lifecycle( - mock_requests_pride_and_prejudice, - mock_auto_tokenizer, - config_type: str, - config: Union[str, dict, Path], -): - if config_type in ["dict", "json_str", "key_value_str"]: - generator = EmulatedRequestGenerator(config, tokenizer="mock-tokenizer") - elif config_type in ["file_str", "file_path"]: - with tempfile.TemporaryDirectory() as temp_dir: - file_path = Path(temp_dir) / "test.json" - file_path.write_text(config) # type: ignore - generator = EmulatedRequestGenerator( - str(file_path) if config_type == "file_str" else file_path, - tokenizer="mock-tokenizer", - ) - - for _ in range(5): - request = generator.create_item() - prompt_range = generator._config.prompt_tokens_range - outputs_range = generator._config.output_tokens_range - - assert request.prompt_token_count >= prompt_range[0] # type: ignore - assert request.prompt_token_count <= prompt_range[1] # type: ignore - - prompt_tokens = len(generator.tokenizer.tokenize(request.prompt)) - assert request.prompt_token_count == prompt_tokens - - if generator._config.generated_tokens: - assert len(outputs_range) == 2 - assert request.output_token_count >= outputs_range[0] # type: ignore - assert request.output_token_count <= outputs_range[1] # type: ignore diff --git a/tests/unit/request/test_file.py b/tests/unit/request/test_file.py deleted file mode 100644 index 69e538a1..00000000 --- a/tests/unit/request/test_file.py +++ /dev/null @@ -1,161 +0,0 @@ -import tempfile -from pathlib import Path - -import pytest - -from guidellm.core.request import TextGenerationRequest -from guidellm.request.file import FileRequestGenerator - - -@pytest.mark.smoke() -def test_file_request_generator_constructor(mock_auto_tokenizer): - with tempfile.TemporaryDirectory() as temp_dir: - file_path = Path(temp_dir) / "example.txt" - file_path.write_text("This is a test.\nThis is another test.") - generator = FileRequestGenerator(file_path, tokenizer="mock-tokenizer") - assert generator._path == file_path - assert generator._data == ["This is a test.", "This is another test."] - assert generator._iterator is not None - - -@pytest.mark.smoke() -def test_file_request_generator_create_item(mock_auto_tokenizer): - with tempfile.TemporaryDirectory() as temp_dir: - file_path = Path(temp_dir) / "example.txt" - file_path.write_text("This is a test.\nThis is another test.") - generator = FileRequestGenerator( - file_path, tokenizer="mock-tokenizer", mode="sync" - ) - request = generator.create_item() - assert isinstance(request, TextGenerationRequest) - assert request.prompt == "This is a test." - - -@pytest.mark.smoke() -@pytest.mark.parametrize( - ("file_extension", "file_content"), - [ - ("txt", "Test content 1.\nTest content 2.\nTest content 3.\n"), - ( - "csv", - "text,label,extra\n" - "Test content 1.,1,extra 1\n" - "Test content 2.,2,extra 2\n" - "Test content 3.,3,extra 3\n", - ), - ( - "jsonl", - '{"text": "Test content 1."}\n' - '{"text": "Test content 2."}\n' - '{"text": "Test content 3."}\n', - ), - ( - "csv", - "prompt,text,extra\n" - "Test content 1., text 1, extra 1\n" - "Test content 2., text 2, extra 2\n" - "Test content 3., text 3, extra 3\n", - ), - ( - "json", - '[{"text": "Test content 1."}, ' - '{"text": "Test content 2."}, ' - '{"text": "Test content 3."}]\n', - ), - ( - "json", - '{"object_1": {"text": "Test content 1."}, ' - '"object_2": {"text": "Test content 2."}, ' - '"object_3": {"text": "Test content 3."}}\n', - ), - ( - "yaml", - "items:\n" - " - text: Test content 1.\n" - " - text: Test content 2.\n" - " - text: Test content 3.\n", - ), - ( - "yaml", - "object_1:\n text: Test content 1.\n" - "object_2:\n text: Test content 2.\n" - "object_3:\n text: Test content 3.\n", - ), - ], -) -def test_file_request_generator_file_types_lifecycle( - mock_auto_tokenizer, file_extension, file_content -): - with tempfile.TemporaryDirectory() as temp_dir: - file_path = Path(temp_dir) / f"example.{file_extension}" - file_path.write_text(file_content) - generator = FileRequestGenerator(file_path, tokenizer="mock-tokenizer") - - for index, request in enumerate(generator): - assert isinstance(request, TextGenerationRequest) - assert request.prompt == f"Test content {index + 1}." - assert request.prompt_token_count == 3 - - if index == 2: - break - - -@pytest.mark.smoke() -@pytest.mark.parametrize( - ("file_extension", "file_content"), - [ - ("txt", "Test content 1.\nTest content 2.\nTest content 3.\n"), - ( - "csv", - "text,label,extra\n" - "Test content 1.,1,extra 1\n" - "Test content 2.,2,extra 2\n" - "Test content 3.,3,extra 3\n", - ), - ( - "jsonl", - '{"text": "Test content 1."}\n' - '{"text": "Test content 2."}\n' - '{"text": "Test content 3."}\n', - ), - ( - "csv", - "prompt,text,extra\n" - "Test content 1., text 1, extra 1\n" - "Test content 2., text 2, extra 2\n" - "Test content 3., text 3, extra 3\n", - ), - ( - "json", - '[{"text": "Test content 1."}, ' - '{"text": "Test content 2."}, ' - '{"text": "Test content 3."}]\n', - ), - ( - "json", - '{"object_1": {"text": "Test content 1."}, ' - '"object_2": {"text": "Test content 2."}, ' - '"object_3": {"text": "Test content 3."}}\n', - ), - ( - "yaml", - "items:\n" - " - text: Test content 1.\n" - " - text: Test content 2.\n" - " - text: Test content 3.\n", - ), - ( - "yaml", - "object_1:\n text: Test content 1.\n" - "object_2:\n text: Test content 2.\n" - "object_3:\n text: Test content 3.\n", - ), - ], -) -def test_file_request_generator_len(mock_auto_tokenizer, file_extension, file_content): - with tempfile.TemporaryDirectory() as temp_dir: - file_path = Path(temp_dir) / f"example.{file_extension}" - file_path.write_text(file_content) - generator = FileRequestGenerator(file_path, tokenizer="mock-tokenizer") - - assert len(generator) == 3 diff --git a/tests/unit/request/test_transformers.py b/tests/unit/request/test_transformers.py deleted file mode 100644 index d3b45325..00000000 --- a/tests/unit/request/test_transformers.py +++ /dev/null @@ -1,132 +0,0 @@ -from unittest.mock import patch - -import pytest - -from guidellm.core.request import TextGenerationRequest -from guidellm.request.transformers import TransformersDatasetRequestGenerator -from tests.dummy.data.transformers import ( - create_sample_dataset, - create_sample_dataset_dict, - create_sample_iterable_dataset, - create_sample_iterable_dataset_dict, -) - - -@pytest.mark.smoke() -def test_transformers_dataset_request_generator_constructor( - mock_auto_tokenizer, -): - dataset = create_sample_dataset() - with patch( - "guidellm.request.transformers.load_transformers_dataset", - return_value=dataset, - ), patch( - "guidellm.request.transformers.resolve_transformers_dataset_column", - return_value="text", - ): - generator = TransformersDatasetRequestGenerator( - dataset="dummy_dataset", - split="train", - column="text", - tokenizer="mock-tokenizer", - ) - assert generator._dataset == "dummy_dataset" - assert generator._split == "train" - assert generator._column == "text" - assert generator._hf_dataset == dataset - assert generator._hf_column == "text" - assert generator._hf_dataset_iterator is not None - - -@pytest.mark.smoke() -def test_transformers_dataset_request_generator_create_item( - mock_auto_tokenizer, -): - generator = TransformersDatasetRequestGenerator( - dataset=create_sample_dataset_dict(), - split="train", - column="text", - tokenizer="mock-tokenizer", - mode="sync", - ) - request = generator.create_item() - assert isinstance(request, TextGenerationRequest) - assert request.prompt == "sample text 1" - assert request.prompt_token_count == 3 - - -@pytest.mark.smoke() -@pytest.mark.parametrize( - ("dataset_arg", "dataset"), - [ - ( - "mock/directory/file.csv", - create_sample_dataset_dict(splits=["train"]), - ), - ( - "mock/directory/file.json", - create_sample_dataset(column="prompt"), - ), - ( - "mock/directory/file.py", - create_sample_dataset_dict(splits=["test"], column="output"), - ), - (create_sample_dataset_dict(splits=["val", "train"], column="custom"), None), - (create_sample_dataset(), None), - (create_sample_iterable_dataset_dict(splits=["validation"]), None), - (create_sample_iterable_dataset(), None), - ], -) -def test_transformers_dataset_request_generator_lifecycle( - mock_auto_tokenizer, dataset_arg, dataset -): - with patch( - "guidellm.utils.transformers.load_dataset", - return_value=dataset, - ): - generator = TransformersDatasetRequestGenerator( - dataset=dataset_arg, tokenizer="mock-tokenizer", mode="sync" - ) - - for index, request in enumerate(generator): - assert isinstance(request, TextGenerationRequest) - assert request.prompt == f"sample text {index + 1}" - assert request.prompt_token_count == 3 - - if index == 2: - break - - -@pytest.mark.smoke() -@pytest.mark.parametrize( - ("dataset_arg", "dataset"), - [ - ( - "mock/directory/file.csv", - create_sample_dataset_dict(splits=["train"]), - ), - ( - "mock/directory/file.json", - create_sample_dataset(column="prompt"), - ), - ( - "mock/directory/file.py", - create_sample_dataset_dict(splits=["test"], column="output"), - ), - (create_sample_dataset_dict(splits=["val", "train"], column="custom"), None), - (create_sample_dataset(), None), - ], -) -def test_transformers_dataset_request_generator_len( - mock_auto_tokenizer, dataset_arg, dataset -): - with patch( - "guidellm.utils.transformers.load_dataset", - return_value=dataset, - ): - generator = TransformersDatasetRequestGenerator( - dataset=dataset_arg, tokenizer="mock-tokenizer", mode="sync" - ) - - # Check if __len__ returns the correct length - assert len(generator) == 3 diff --git a/tests/unit/scheduler/__init__.py b/tests/unit/scheduler/__init__.py deleted file mode 100644 index e69de29b..00000000 diff --git a/tests/unit/scheduler/test_load_generator.py b/tests/unit/scheduler/test_load_generator.py deleted file mode 100644 index 6b84ee01..00000000 --- a/tests/unit/scheduler/test_load_generator.py +++ /dev/null @@ -1,153 +0,0 @@ -import time -from typing import get_args - -import pytest -from scipy.stats import kstest # type: ignore - -from guidellm.scheduler import LoadGenerationMode, LoadGenerator - - -@pytest.mark.smoke() -def test_load_generator_mode(): - assert set(get_args(LoadGenerationMode)) == { - "synchronous", - "constant", - "poisson", - "throughput", - } - - -@pytest.mark.smoke() -@pytest.mark.parametrize( - ("mode", "rate"), - [ - ("constant", 10), - ("poisson", 5), - ("throughput", None), - ("synchronous", None), - ], -) -def test_load_generator_instantiation(mode, rate): - generator = LoadGenerator(mode=mode, rate=rate) - assert generator.mode == mode - assert generator.rate == rate - - -@pytest.mark.regression() -@pytest.mark.parametrize( - ("mode", "rate", "expected_error"), - [ - ("invalid_mode", None, ValueError), - ("constant", 0, ValueError), - ("poisson", -1, ValueError), - ], -) -def test_load_generator_invalid_instantiation(mode, rate, expected_error): - with pytest.raises(expected_error): - LoadGenerator(mode=mode, rate=rate) - - -@pytest.mark.smoke() -@pytest.mark.parametrize( - ("mode", "rate"), - [ - ("synchronous", None), - ("throughput", None), - ("constant", 1), - ("poisson", 5), - ], -) -def test_load_generator_times(mode, rate): - # first check that the proper method is called - generator = LoadGenerator(mode=mode, rate=rate) - func_name = f"{mode}_times" - assert hasattr(generator, func_name) - assert callable(getattr(generator, func_name)) - - call_count = 0 - - def _increment_call_count(): - nonlocal call_count - call_count += 1 - yield -1.0 - - setattr(generator, func_name, _increment_call_count) - for time_ in generator.times(): - assert time_ == -1.0 - break - assert call_count == 1 - - # now check that the method generates reasonable timestamps - generator = LoadGenerator(mode=mode, rate=rate) - start_time = time.time() - for index, time_ in enumerate(generator.times()): - if index > 10: - break - - if mode == "synchronous": - assert time_ == -1.0 - else: - assert time_ >= start_time - - -@pytest.mark.smoke() -def test_load_generator_invalid_times(): - generator = LoadGenerator(mode="synchronous") - - for index, time_ in enumerate(generator.synchronous_times()): - if index > 10: - break - - assert time_ == -1.0 - - -@pytest.mark.smoke() -def test_load_generator_throughput_times(): - generator = LoadGenerator(mode="throughput") - - for index, time_ in enumerate(generator.throughput_times()): - if index > 10: - break - - assert time_ <= time.time() - - -@pytest.mark.smoke() -@pytest.mark.parametrize("rate", [1, 10, 42]) -def test_load_generator_constant_times(rate): - generator = LoadGenerator(mode="constant", rate=rate) - start_time = time.time() - - for index, time_ in enumerate(generator.constant_times()): - if index > 10: - break - - assert time_ == pytest.approx(start_time + index / rate, rel=1e-5) - - -@pytest.mark.smoke() -@pytest.mark.flaky(reruns=5) -def test_load_generator_poisson_times(): - rate = 5 - generator = LoadGenerator(mode="poisson", rate=rate) - start_time = time.time() - - times = [] - prev_time = start_time - - for index, current_time in enumerate(generator.poisson_times()): - if index > 100: - break - - times.append(current_time - prev_time) - prev_time = current_time - - mean_inter_arrival_time = 1 / rate - - # Perform Kolmogorov-Smirnov test to compare the sample distribution - # to the expected exponential distribution - ks_statistic, p_value = kstest(times, "expon", args=(0, mean_inter_arrival_time)) - assert p_value > 0.025, ( - f"Poisson-generated inter-arrival times do not " - f"match the expected exponential distribution (p-value: {p_value})" - ) diff --git a/tests/unit/scheduler/test_scheduler.py b/tests/unit/scheduler/test_scheduler.py deleted file mode 100644 index d765280f..00000000 --- a/tests/unit/scheduler/test_scheduler.py +++ /dev/null @@ -1,199 +0,0 @@ -import random -from unittest.mock import create_autospec - -import pytest - -from guidellm.backend import Backend -from guidellm.core import ( - TextGenerationBenchmark, - TextGenerationRequest, - TextGenerationResult, -) -from guidellm.request import RequestGenerator -from guidellm.scheduler import ( - LoadGenerator, - Scheduler, - SchedulerResult, -) - - -@pytest.mark.smoke() -def test_scheduler_result_default_intialization(): - benchmark = create_autospec(TextGenerationBenchmark, instance=True) - scheduler_result = SchedulerResult( - completed=False, - count_total=0, - count_completed=0, - benchmark=benchmark, - ) - - assert scheduler_result.completed is False - assert scheduler_result.count_total == 0 - assert scheduler_result.count_completed == 0 - assert scheduler_result.benchmark == benchmark - assert scheduler_result.current_result is None - - -@pytest.mark.smoke() -def test_scheduler_result_initialization(): - benchmark = create_autospec(TextGenerationBenchmark, instance=True) - result = TextGenerationResult( - request=TextGenerationRequest(prompt="prompt"), output="Test output" - ) - scheduler_result = SchedulerResult( - completed=False, - count_total=10, - count_completed=5, - benchmark=benchmark, - current_result=result, - ) - - assert scheduler_result.completed is False - assert scheduler_result.count_total == 10 - assert scheduler_result.count_completed == 5 - assert scheduler_result.benchmark == benchmark - assert scheduler_result.current_result == result - - -@pytest.mark.smoke() -@pytest.mark.parametrize( - ("mode", "rate", "max_number", "max_duration"), - [ - ("synchronous", None, 10, None), - ("throughput", 5.0, None, 60.0), - ("poisson", 10.0, 100, None), - ("constant", 1.0, None, 120.0), - ], -) -def test_scheduler_initialization(mode, rate, max_number, max_duration): - generator = create_autospec(RequestGenerator, instance=True) - backend = create_autospec(Backend, instance=True) - scheduler = Scheduler( - generator, - backend, - mode=mode, - rate=rate, - max_number=max_number, - max_duration=max_duration, - ) - - assert scheduler.generator == generator - assert scheduler.backend == backend - assert scheduler.mode == mode - assert scheduler.rate == rate - assert scheduler.max_number == max_number - assert scheduler.max_duration == max_duration - assert isinstance(scheduler.load_generator, LoadGenerator) - assert scheduler.benchmark_mode in {"synchronous", "asynchronous", "throughput"} - - -@pytest.mark.sanity() -@pytest.mark.parametrize( - ("mode", "rate", "max_number", "max_duration"), - [ - # invalid modes - ("invalid_mode", None, 10, None), - # invalid max settings - ("synchronous", None, None, None), - ("synchronous", None, -1, 10), - ("synchronous", None, 10, -1), - # invalid rate settings - ("constant", -1, None, 10), - ("constant", None, None, 10), - ("poisson", -1, None, 10), - ("poisson", None, None, 10), - ], -) -def test_scheduler_invalid_initialization( - mode, - rate, - max_number, - max_duration, -): - generator = create_autospec(RequestGenerator, instance=True) - backend = create_autospec(Backend, instance=True) - - with pytest.raises(ValueError): - Scheduler( - generator, - backend, - mode=mode, - rate=rate, - max_number=max_number, - max_duration=max_duration, - ) - - -@pytest.mark.sanity() -@pytest.mark.asyncio() -@pytest.mark.parametrize( - "mode", - [ - "synchronous", - "throughput", - "poisson", - "constant", - ], -) -async def test_scheduler_run_number(mode, mock_backend): - rate = 10.0 - max_number = 20 - generator = create_autospec(RequestGenerator, instance=True) - - # Mock the request generator and backend submit behavior - generator.__iter__.return_value = iter( - [TextGenerationRequest(prompt="Test", type_=random.choice(["text", "chat"]))] - * (max_number * 2) - ) - - scheduler = Scheduler( - generator, - mock_backend, - mode=mode, - rate=rate, - max_number=max_number, - ) - - run_count = 0 - count_completed = 0 - received_init = False - received_final = False - async for result in scheduler.run(): - run_count += 1 - - assert run_count <= max_number + 2 - assert result.count_total == max_number - assert result.benchmark is not None - assert isinstance(result.benchmark, TextGenerationBenchmark) - - if result.current_result is not None: - count_completed += 1 - - if run_count == 1: - assert not received_init - assert not received_final - assert count_completed == 0 - assert result.count_completed == 0 - assert not result.completed - assert result.current_result is None - received_init = True - elif run_count - 2 == max_number: - assert received_init - assert not received_final - assert count_completed == max_number - assert result.count_completed == max_number - assert result.completed - assert result.current_result is None - received_final = True - else: - assert received_init - assert not received_final - assert count_completed == run_count - 1 - assert result.count_completed == run_count - 1 - assert not result.completed - assert result.current_result is not None - assert isinstance(result.current_result, TextGenerationResult) - - assert received_init - assert received_final - assert count_completed == max_number diff --git a/tests/unit/test_type.py b/tests/unit/test_type.py deleted file mode 100644 index e69de29b..00000000 diff --git a/tests/unit/utils/__init__.py b/tests/unit/utils/__init__.py deleted file mode 100644 index e69de29b..00000000 diff --git a/tests/unit/utils/test_injector.py b/tests/unit/utils/test_injector.py deleted file mode 100644 index 9a58575e..00000000 --- a/tests/unit/utils/test_injector.py +++ /dev/null @@ -1,70 +0,0 @@ -from pathlib import Path - -import pytest -from pydantic import BaseModel - -from guidellm.config import settings -from guidellm.utils.injector import create_report, inject_data - - -class ExampleModel(BaseModel): - name: str - version: str - - -@pytest.mark.smoke() -def test_inject_data(): - model = ExampleModel(name="Example App", version="1.0.0") - html = "window.report_data = {};" - expected_html = 'window.report_data = {"name":"Example App","version":"1.0.0"};' - - result = inject_data( - model, - html, - settings.report_generation.report_html_match, - settings.report_generation.report_html_placeholder, - ) - assert result == expected_html - - -@pytest.mark.smoke() -def test_create_report_to_file(tmpdir): - model = ExampleModel(name="Example App", version="1.0.0") - html_content = "window.report_data = {};" - expected_html_content = ( - 'window.report_data = {"name":"Example App","version":"1.0.0"};' - ) - - mock_html_path = tmpdir.join("template.html") - mock_html_path.write(html_content) - settings.report_generation.source = str(mock_html_path) - - output_path = tmpdir.join("output.html") - result_path = create_report(model, str(output_path)) - result_content = result_path.read_text() - - assert result_path == output_path - assert result_content == expected_html_content - - -@pytest.mark.smoke() -def test_create_report_to_directory(tmpdir): - model = ExampleModel(name="Example App", version="1.0.0") - html_content = "window.report_data = {};" - expected_html_content = ( - 'window.report_data = {"name":"Example App","version":"1.0.0"};' - ) - - mock_html_path = tmpdir.join("template.html") - mock_html_path.write(html_content) - settings.report_generation.source = str(mock_html_path) - - output_dir = tmpdir.mkdir("output_dir") - output_path = Path(output_dir) / "report.html" - result_path = create_report(model, str(output_dir)) - - with Path(result_path).open("r") as file: - result_content = file.read() - - assert result_path == output_path - assert result_content == expected_html_content diff --git a/tests/unit/utils/test_progress.py b/tests/unit/utils/test_progress.py deleted file mode 100644 index 637b2be2..00000000 --- a/tests/unit/utils/test_progress.py +++ /dev/null @@ -1,116 +0,0 @@ -import pytest - -from guidellm.utils import BenchmarkReportProgress - - -@pytest.fixture() -def benchmark_progress(): - return BenchmarkReportProgress() - - -@pytest.mark.smoke() -def test_initialization(benchmark_progress): - assert benchmark_progress.report_task is None - assert benchmark_progress.benchmark_tasks == [] - assert benchmark_progress.benchmark_tasks_started == [] - assert benchmark_progress.benchmark_tasks_completed == [] - assert benchmark_progress.benchmark_tasks_progress == [] - - -@pytest.mark.smoke() -def test_start_method(benchmark_progress): - descriptions = ["Benchmark 1", "Benchmark 2"] - benchmark_progress.start(descriptions) - - assert len(benchmark_progress.benchmark_tasks) == 2 - assert benchmark_progress.report_task is not None - - benchmark_progress.finish() - - -@pytest.mark.sanity() -def test_update_benchmark(benchmark_progress): - descriptions = ["Benchmark 1"] - benchmark_progress.start(descriptions) - - benchmark_progress.update_benchmark( - index=0, - description="Updating Benchmark 1", - completed=False, - completed_count=50, - completed_total=100, - start_time=0, - req_per_sec=10.5, - ) - assert benchmark_progress.benchmark_tasks_progress[0] == 50.0 - - benchmark_progress.finish() - - -@pytest.mark.sanity() -def test_finish_method(benchmark_progress): - descriptions = ["Benchmark 1", "Benchmark 2"] - benchmark_progress.start(descriptions) - benchmark_progress.finish() - - assert benchmark_progress.report_progress.finished - - -@pytest.mark.regression() -def test_error_on_update_completed_benchmark(benchmark_progress): - descriptions = ["Benchmark 1"] - benchmark_progress.start(descriptions) - benchmark_progress.update_benchmark( - index=0, - description="Benchmark 1", - completed=True, - completed_count=100, - completed_total=100, - start_time=0, - req_per_sec=10.5, - ) - - with pytest.raises(ValueError, match="already completed"): - benchmark_progress.update_benchmark( - index=0, - description="Benchmark 1", - completed=False, - completed_count=50, - completed_total=100, - start_time=0, - req_per_sec=10.5, - ) - - benchmark_progress.finish() - - -@pytest.mark.regression() -def test_multiple_updates(benchmark_progress): - descriptions = ["Benchmark 1", "Benchmark 2"] - benchmark_progress.start(descriptions) - - # First update - benchmark_progress.update_benchmark( - index=0, - description="Updating Benchmark 1", - completed=False, - completed_count=50, - completed_total=100, - start_time=0, - req_per_sec=5.0, - ) - assert benchmark_progress.benchmark_tasks_progress[0] == 50.0 - - # Second update, same task - benchmark_progress.update_benchmark( - index=0, - description="Updating Benchmark 1", - completed=True, - completed_count=100, - completed_total=100, - start_time=0, - req_per_sec=5.0, - ) - assert benchmark_progress.benchmark_tasks_progress[0] == 100.0 - - benchmark_progress.finish() diff --git a/tests/unit/utils/test_text.py b/tests/unit/utils/test_text.py deleted file mode 100644 index 1d89ee31..00000000 --- a/tests/unit/utils/test_text.py +++ /dev/null @@ -1,394 +0,0 @@ -from pathlib import Path -from unittest.mock import patch - -import pytest -import requests - -from guidellm.utils.text import ( - clean_text, - filter_text, - is_path, - is_path_like, - is_url, - load_text, - load_text_lines, - parse_text_objects, - split_lines_by_punctuation, - split_text, -) - - -@pytest.fixture() -def sample_text(): - return "This is a sample text.\nThis is another line!" - - -@pytest.fixture() -def sample_dict_data(): - return [{"text": "line 1"}, {"text": "line 2"}, {"text": "line 3"}] - - -@pytest.fixture() -def sample_csv_data(): - return "text\nline 1\nline 2\nline 3" - - -@pytest.fixture() -def sample_jsonl_data(): - return '{"text": "line 1"}\n{"text": "line 2"}\n{"text": "line 3"}' - - -@pytest.fixture() -def sample_yaml_data(): - return """ - text: - - line 1 - - line 2 - - line 3 - """ - - -@pytest.fixture() -def mock_response(): - response = requests.Response() - response.status_code = 200 - response._content = b"Mock content" - return response - - -@pytest.mark.smoke() -@pytest.mark.parametrize( - ("text", "start", "end", "expected"), - [ - ("hello world", "hello", "world", "hello "), - ("hello world", "world", None, "world"), - ("hello world", None, "hello", ""), - ("hello world", None, None, "hello world"), - ], -) -def test_filter_text(text, start, end, expected): - assert filter_text(text, start, end) == expected - - -@pytest.mark.smoke() -@pytest.mark.parametrize( - ( - "text", - "fix_encoding", - "clean_whitespace", - "remove_empty_lines", - "force_new_line_punctuation", - "expected", - ), - [ - ( - "This is\ta test.\n New line.", - True, - True, - False, - False, - "This is a test.\nNew line.", - ), - ( - "This is\ta test.\n New line.", - True, - True, - True, - False, - "This is a test.\nNew line.", - ), - ( - "This is a test. New line.", - True, - False, - False, - True, - "This is a test.\nNew line.", - ), - ], -) -def test_clean_text( - text, - fix_encoding, - clean_whitespace, - remove_empty_lines, - force_new_line_punctuation, - expected, -): - assert ( - clean_text( - text, - fix_encoding, - clean_whitespace, - remove_empty_lines, - force_new_line_punctuation, - ) - == expected - ) - - -@pytest.mark.smoke() -def test_split_lines_by_punctuation(sample_text): - expected = ["This is a sample text.", "This is another line!"] - assert split_lines_by_punctuation(sample_text) == expected - - -@pytest.mark.smoke() -@pytest.mark.parametrize( - ("url", "expected"), - [ - ("https://example.com", True), - ("ftp://example.com", True), - ("not a url", False), - ], -) -def test_is_url(url, expected): - assert is_url(url) == expected - - -@pytest.mark.smoke() -@pytest.mark.parametrize( - ("path", "expected"), - [ - (str(Path(__file__)), True), - ("/non/existent/path", False), - ], -) -def test_is_path(path, expected): - assert is_path(path) == expected - - -@pytest.mark.smoke() -@pytest.mark.parametrize( - ("path", "enforce_file", "expected"), - [ - (str(Path(__file__)), True, True), - ("/non/existent/path", False, True), - ("https://example.com", False, False), - ], -) -def test_is_path_like(path, enforce_file, expected): - assert is_path_like(path, enforce_file) == expected - - -@pytest.mark.smoke() -def test_split_text(sample_text): - words, separators, new_lines = split_text(sample_text) - assert words == [ - "This", - "is", - "a", - "sample", - "text.", - "This", - "is", - "another", - "line!", - ] - assert separators == [" ", " ", " ", " ", "\n", " ", " ", " ", " "] - assert new_lines == [0, 5] - - -@pytest.mark.smoke() -@pytest.mark.parametrize( - ("data", "format_", "expected"), - [ - ("text\nline 1\nline 2", "csv", [{"text": "line 1"}, {"text": "line 2"}]), - ( - '{"text": "line 1"}\n{"text": "line 2"}', - "jsonl", - [{"text": "line 1"}, {"text": "line 2"}], - ), - ], -) -def test_parse_text_objects(data, format_, expected): - assert parse_text_objects(data, format_) == expected - - -@pytest.mark.smoke() -@pytest.mark.parametrize( - ("data", "expected"), - [ - ("https://example.com", "Mock content"), - (str(Path(__file__)), Path(__file__).read_text()), - ], -) -def test_load_text(data, expected, mock_response): - with patch("requests.get", return_value=mock_response): - assert load_text(data) == expected - - -@pytest.mark.regression() -def test_load_text_file_not_found(): - with pytest.raises(FileNotFoundError): - load_text("/non/existent/file.txt") - - -@pytest.mark.smoke() -@pytest.mark.parametrize( - ("data", "format_", "filters", "expected"), - [ - ("text\nline 1\nline 2", "csv", None, ["line 1", "line 2"]), - ('{"text": "line 1"}\n{"text": "line 2"}', "jsonl", None, ["line 1", "line 2"]), - ("text\nline 1\nline 2", "txt", None, ["text", "line 1", "line 2"]), - ], -) -def test_load_text_lines(data, format_, filters, expected): - assert load_text_lines(data, format_=format_, filters=filters) == expected - - -@pytest.mark.regression() -def test_load_text_lines_invalid_data(): - with pytest.raises(ValueError): - load_text_lines(123) # type: ignore - - -@pytest.mark.regression() -def test_parse_text_objects_invalid_format(): - with pytest.raises(ValueError): - parse_text_objects("text", format_="unsupported") - - -@pytest.mark.regression() -def test_parse_text_objects_invalid_data(): - with pytest.raises(ValueError): - parse_text_objects(123) # type: ignore - - -@pytest.mark.regression() -@pytest.mark.parametrize( - ("data", "format_", "filters", "expected"), - [ - ( - "text\nline 1\nline 2\n", - "csv", - ["text"], - ["line 1", "line 2"], - ), - ], -) -def test_load_text_lines_with_filters(data, format_, filters, expected): - assert load_text_lines(data, format_=format_, filters=filters) == expected - - -@pytest.mark.regression() -def test_is_path_with_symlink(tmp_path): - # Create a symlink to a temporary file - target_file = tmp_path / "target_file.txt" - target_file.write_text("Sample content") - symlink_path = tmp_path / "symlink" - symlink_path.symlink_to(target_file) - - assert is_path(str(symlink_path)) is True - - -@pytest.mark.regression() -def test_is_path_like_with_symlink(tmp_path): - # Create a symlink to a temporary file - target_file = tmp_path / "target_file.txt" - target_file.write_text("Sample content") - symlink_path = tmp_path / "symlink.file" - symlink_path.symlink_to(target_file) - - assert is_path_like(str(symlink_path), enforce_file=True) is True - - -@pytest.mark.regression() -def test_load_text_lines_empty(): - # Test loading text lines from an empty string - assert load_text_lines("") == [] - - -@pytest.mark.regression() -def test_split_text_with_empty_string(): - words, separators, new_lines = split_text("") - assert words == [] - assert separators == [] - assert new_lines == [] - - -@pytest.mark.regression() -def test_split_lines_by_punctuation_with_no_punctuation(): - text = "This is a test without punctuation" - assert split_lines_by_punctuation(text) == [text] - - -@pytest.mark.regression() -def test_is_path_invalid_type(): - assert not is_path(None) - assert not is_path(123) - assert not is_path(["not", "a", "path"]) - - -@pytest.mark.regression() -def test_is_path_like_invalid_type(): - assert not is_path_like(None, enforce_file=False) - assert not is_path_like(123, enforce_file=True) - assert not is_path_like(["not", "a", "path"], enforce_file=False) - - -@pytest.mark.regression() -def test_load_text_invalid_url(): - with pytest.raises(requests.ConnectionError): - load_text("http://invalid.url") - - -@pytest.mark.regression() -def test_parse_text_objects_empty_csv(): - assert parse_text_objects("text\n", "csv") == [] - - -@pytest.mark.regression() -def test_parse_text_objects_empty_jsonl(): - assert parse_text_objects("", "jsonl") == [] - - -@pytest.mark.regression() -def test_parse_text_objects_invalid_jsonl(): - with pytest.raises(ValueError): - parse_text_objects("{invalid_json}", "jsonl") - - -@pytest.mark.regression() -def test_parse_text_objects_empty_yaml(): - assert parse_text_objects("", "yaml") == [] - - -@pytest.mark.regression() -def test_clean_text_with_unicode(): - text = "This is a test with unicode: \u2013 \u2014" - cleaned_text = clean_text(text, fix_encoding=True, clean_whitespace=True) - assert cleaned_text == "This is a test with unicode: – —" - - -@pytest.mark.regression() -def test_split_lines_by_punctuation_with_multiple_punctuations(): - text = "First sentence. Second sentence? Third sentence!" - expected = ["First sentence.", "Second sentence?", "Third sentence!"] - assert split_lines_by_punctuation(text) == expected - - -@pytest.mark.regression() -def test_is_url_empty_string(): - assert not is_url("") - - -@pytest.mark.regression() -def test_load_text_invalid_data(): - with pytest.raises(TypeError): - load_text(123) # type: ignore - - -@pytest.mark.regression() -def test_load_text_lines_empty_format(): - data = "text\nline 1\nline 2" - assert load_text_lines(data, format_="") == ["text", "line 1", "line 2"] - - -@pytest.mark.regression() -def test_split_text_with_mixed_separators(): - text = "This\tis a test\nwith mixed separators." - words, separators, new_lines = split_text(text) - assert words == ["This", "is", "a", "test", "with", "mixed", "separators."] - assert separators == ["\t", " ", " ", "\n", " ", " ", " "] - assert new_lines == [0, 4] diff --git a/tests/unit/utils/test_transformers.py b/tests/unit/utils/test_transformers.py deleted file mode 100644 index 5153da3f..00000000 --- a/tests/unit/utils/test_transformers.py +++ /dev/null @@ -1,236 +0,0 @@ -from unittest.mock import patch - -import pytest -from datasets import ( # type: ignore - Dataset, - DatasetDict, - IterableDataset, - IterableDatasetDict, -) - -from guidellm.utils.transformers import ( - load_transformers_dataset, - resolve_transformers_dataset, - resolve_transformers_dataset_column, - resolve_transformers_dataset_split, -) -from tests.dummy.data.transformers import ( - create_sample_dataset, - create_sample_dataset_dict, - create_sample_iterable_dataset, - create_sample_iterable_dataset_dict, -) - - -@pytest.mark.smoke() -@pytest.mark.parametrize( - ("dataset_arg", "dataset", "split", "preferred_splits", "expected_type"), - [ - ( - "mock/directory/file.csv", - create_sample_dataset_dict(splits=["train"]), - "train", - None, - Dataset, - ), - ( - "mock/directory/file.json", - create_sample_dataset_dict(splits=["test"]), - None, - ("train", "test"), - Dataset, - ), - ( - "mock/directory/file.py", - create_sample_dataset_dict(splits=["test"], column="output"), - None, - None, - Dataset, - ), - ( - create_sample_dataset_dict(splits=["val", "train"], column="custom"), - None, - "val", - None, - Dataset, - ), - ( - create_sample_dataset(), - None, - None, - None, - Dataset, - ), - ( - create_sample_iterable_dataset_dict(splits=["validation"]), - None, - None, - None, - IterableDataset, - ), - ( - create_sample_iterable_dataset(), - None, - "validation", - None, - IterableDataset, - ), - ], -) -def test_load_transformers_dataset( - dataset_arg, dataset, split, preferred_splits, expected_type -): - with patch( - "guidellm.utils.transformers.load_dataset", - return_value=dataset, - ): - loaded_dataset = load_transformers_dataset( - dataset_arg, split=split, preferred_splits=preferred_splits - ) - assert isinstance(loaded_dataset, expected_type) - - -@pytest.mark.smoke() -@pytest.mark.parametrize( - ("dataset_arg", "dataset", "split", "preferred_splits", "expected_type"), - [ - ( - "mock/directory/file.csv", - create_sample_dataset(), - "train", - None, - Dataset, - ), - ( - "mock/directory/file.json", - create_sample_dataset_dict(splits=["test"]), - None, - ("train", "test"), - DatasetDict, - ), - ( - "mock/directory/file.py", - create_sample_dataset_dict(splits=["test"], column="output"), - None, - None, - DatasetDict, - ), - ( - "mock/directory/file.unk", - create_sample_dataset_dict(splits=["test"], column="output"), - None, - None, - DatasetDict, - ), - ( - create_sample_dataset_dict(splits=["val", "train"], column="custom"), - None, - "val", - None, - DatasetDict, - ), - ( - create_sample_dataset(), - None, - None, - None, - Dataset, - ), - ( - create_sample_iterable_dataset_dict(splits=["validation"]), - None, - None, - None, - IterableDatasetDict, - ), - ( - create_sample_iterable_dataset(), - None, - "validation", - None, - IterableDataset, - ), - ], -) -def test_resolve_transformers_dataset( - dataset_arg, dataset, split, preferred_splits, expected_type -): - with patch( - "guidellm.utils.transformers.load_dataset", - return_value=dataset, - ): - loaded_dataset = resolve_transformers_dataset( - dataset_arg, split=split, preferred_splits=preferred_splits - ) - assert isinstance(loaded_dataset, expected_type) - - -@pytest.mark.sanity() -def test_resolve_transformers_dataset_invalid(): - with pytest.raises(ValueError): - resolve_transformers_dataset(123) - - -@pytest.mark.smoke() -@pytest.mark.parametrize( - ("dataset", "split", "preferred_splits", "expected_type"), - [ - ( - create_sample_dataset(), - None, - None, - Dataset, - ), - ( - create_sample_iterable_dataset_dict(splits=["validation"]), - None, - None, - IterableDataset, - ), - ( - create_sample_iterable_dataset(), - "validation", - None, - IterableDataset, - ), - ], -) -def test_resolve_transformers_dataset_split( - dataset, split, preferred_splits, expected_type -): - loaded_dataset = resolve_transformers_dataset_split( - dataset, split=split, preferred_splits=preferred_splits - ) - assert isinstance(loaded_dataset, expected_type) - - -def test_resolve_transformers_dataset_split_missing(): - dataset = create_sample_dataset_dict() - with pytest.raises(ValueError): - resolve_transformers_dataset_split(dataset, split="missing") - - -@pytest.mark.smoke() -@pytest.mark.parametrize( - ("dataset", "column", "preferred_columns", "expected_column"), - [ - (create_sample_dataset(), None, None, "text"), - (create_sample_dataset(), "text", None, "text"), - (create_sample_dataset(), None, ["text"], "text"), - (create_sample_dataset(), None, ["data"], "text"), - (create_sample_iterable_dataset(), None, None, "text"), - ], -) -def test_resolve_transformers_dataset_column( - dataset, column, preferred_columns, expected_column -): - resolved_column = resolve_transformers_dataset_column( - dataset, column=column, preferred_columns=preferred_columns - ) - assert resolved_column == expected_column - - -def test_resolve_transformers_dataset_column_missing(): - dataset = create_sample_dataset() - with pytest.raises(ValueError): - resolve_transformers_dataset_column(dataset, column="missing")