diff --git a/.github/workflows/development.yml b/.github/workflows/development.yml
index f8604d0f..7ee36c49 100644
--- a/.github/workflows/development.yml
+++ b/.github/workflows/development.yml
@@ -11,8 +11,8 @@ jobs:
     strategy:
       matrix:
         python:
-          - "3.12"
-          - "3.8"
+          - "3.13"
+          - "3.9"
     steps:
       - uses: actions/checkout@v4
       - name: Set up Python
@@ -29,8 +29,8 @@ jobs:
     strategy:
       matrix:
         python:
-          - "3.12"
-          - "3.8"
+          - "3.13"
+          - "3.9"
     steps:
       - uses: actions/checkout@v4
       - name: Set up Python
diff --git a/.github/workflows/nightly.yml b/.github/workflows/nightly.yml
index bf7f34cf..634ab52c 100644
--- a/.github/workflows/nightly.yml
+++ b/.github/workflows/nightly.yml
@@ -10,11 +10,11 @@ jobs:
     strategy:
       matrix:
         python:
+          - "3.13"
           - "3.12"
           - "3.11"
           - "3.10"
           - "3.9"
-          - "3.8"
     steps:
       - uses: actions/checkout@v4
       - name: Set up Python
@@ -31,11 +31,11 @@ jobs:
     strategy:
       matrix:
         python:
+          - "3.13"
           - "3.12"
           - "3.11"
           - "3.10"
           - "3.9"
-          - "3.8"
     steps:
       - uses: actions/checkout@v4
       - name: Set up Python
@@ -52,11 +52,11 @@ jobs:
     strategy:
       matrix:
         python:
+          - "3.13"
           - "3.12"
           - "3.11"
           - "3.10"
           - "3.9"
-          - "3.8"
     steps:
       - uses: actions/checkout@v4
       - name: Set up Python
diff --git a/.github/workflows/quality.yml b/.github/workflows/quality.yml
index 95d44af4..5060149a 100644
--- a/.github/workflows/quality.yml
+++ b/.github/workflows/quality.yml
@@ -14,8 +14,8 @@ jobs:
     strategy:
       matrix:
         python:
-          - "3.12"
-          - "3.8"
+          - "3.13"
+          - "3.9"
     steps:
       - uses: actions/checkout@v4
       - name: Set up Python
@@ -32,8 +32,8 @@ jobs:
     strategy:
       matrix:
         python:
-          - "3.12"
-          - "3.8"
+          - "3.13"
+          - "3.9"
     steps:
       - uses: actions/checkout@v4
       - name: Set up Python
@@ -52,7 +52,7 @@ jobs:
       - name: Set up Python
         uses: actions/setup-python@v5
         with:
-          python-version: "3.8"
+          python-version: "3.9"
       - name: Install pre-commit
         run: pip install pre-commit
       - name: Run pre-commit checks
diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml
index d4fe2494..c7c7b8f7 100644
--- a/.github/workflows/release.yml
+++ b/.github/workflows/release.yml
@@ -11,11 +11,11 @@ jobs:
     strategy:
       matrix:
         python:
+          - "3.13"
           - "3.12"
           - "3.11"
           - "3.10"
           - "3.9"
-          - "3.8"
     steps:
       - uses: actions/checkout@v4
       - name: Set up Python
@@ -32,11 +32,11 @@ jobs:
     strategy:
       matrix:
         python:
+          - "3.13"
           - "3.12"
           - "3.11"
           - "3.10"
           - "3.9"
-          - "3.8"
     steps:
       - uses: actions/checkout@v4
       - name: Set up Python
@@ -53,11 +53,11 @@ jobs:
     strategy:
       matrix:
         python:
+          - "3.13"
           - "3.12"
           - "3.11"
           - "3.10"
           - "3.9"
-          - "3.8"
     steps:
       - uses: actions/checkout@v4
       - name: Set up Python
diff --git a/pyproject.toml b/pyproject.toml
index e0b47007..4eb171f6 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -8,7 +8,7 @@ where = ["src"]
 include = ["*"]
 
 [tool.setuptools.package-data]
-guidellm = ["*"]
+"guidellm.data" = ["*.gz"]
 
 
 # ************************************************
@@ -32,6 +32,7 @@ dependencies = [
     "loguru",
     "numpy",
     "pillow",
+    "protobuf",
     "pydantic>=2.0.0",
     "pydantic-settings>=2.0.0",
     "pyyaml>=6.0.0",
@@ -77,7 +78,7 @@ dev = [
 
 
 [project.entry-points.console_scripts]
-guidellm = "guidellm.main:generate_benchmark_report_cli"
+guidellm = "guidellm.__main__:cli"
 guidellm-config = "guidellm.config:print_config"
 
 
diff --git a/src/guidellm/__init__.py b/src/guidellm/__init__.py
index e5620188..929d046e 100644
--- a/src/guidellm/__init__.py
+++ b/src/guidellm/__init__.py
@@ -6,14 +6,22 @@
 # flake8: noqa
 
 import os
-import transformers  # type: ignore
+import logging
+import contextlib
 
-os.environ["TOKENIZERS_PARALLELISM"] = "false"  # Silence warnings for tokenizers
-transformers.logging.set_verbosity_error()  # Silence warnings for transformers
 
+with open(os.devnull, "w") as devnull, contextlib.redirect_stderr(
+    devnull
+), contextlib.redirect_stdout(devnull):
+    from transformers.utils import logging as hf_logging  # type: ignore[import]
+
+    # Set the log level for the transformers library to ERROR
+    # to ignore None of PyTorch, TensorFlow found
+    os.environ["TOKENIZERS_PARALLELISM"] = "false"  # Silence warnings for tokenizers
+    hf_logging.set_verbosity_error()
+    logging.getLogger("transformers").setLevel(logging.ERROR)
 
 from .config import settings
 from .logger import configure_logger, logger
-from .main import generate_benchmark_report
 
 __all__ = ["configure_logger", "logger", "settings", "generate_benchmark_report"]
diff --git a/src/guidellm/__main__.py b/src/guidellm/__main__.py
new file mode 100644
index 00000000..096614de
--- /dev/null
+++ b/src/guidellm/__main__.py
@@ -0,0 +1,270 @@
+import asyncio
+import json
+from pathlib import Path
+from typing import get_args
+
+import click
+
+from guidellm.backend import BackendType
+from guidellm.benchmark import ProfileType, benchmark_generative_text
+from guidellm.scheduler import StrategyType
+
+STRATEGY_PROFILE_CHOICES = set(
+    list(get_args(ProfileType)) + list(get_args(StrategyType))
+)
+
+
+def parse_json(ctx, param, value):  # noqa: ARG001
+    if value is None:
+        return None
+    try:
+        return json.loads(value)
+    except json.JSONDecodeError as err:
+        raise click.BadParameter(f"{param.name} must be a valid JSON string.") from err
+
+
+def parse_number_str(ctx, param, value):  # noqa: ARG001
+    if value is None:
+        return None
+
+    values = value.split(",") if "," in value else [value]
+
+    try:
+        return [int(val) if val.isdigit() else float(val) for val in values]
+    except ValueError as err:
+        raise click.BadParameter(
+            f"{param.name} must be a number or comma-separated list of numbers."
+        ) from err
+
+
+@click.group()
+def cli():
+    pass
+
+
+@cli.command()
+@click.option(
+    "--target",
+    required=True,
+    type=str,
+    help="The target path for the backend to run benchmarks against. For example, http://localhost:8000",
+)
+@click.option(
+    "--backend-type",
+    type=click.Choice(list(get_args(BackendType))),
+    help=(
+        "The type of backend to use to run requests against. Defaults to 'openai_http'."
+        f" Supported types: {', '.join(get_args(BackendType))}"
+    ),
+    default="openai_http",
+)
+@click.option(
+    "--backend-args",
+    callback=parse_json,
+    default=None,
+    help=(
+        "A JSON string containing any arguments to pass to the backend as a "
+        "dict with **kwargs."
+    ),
+)
+@click.option(
+    "--model",
+    default=None,
+    type=str,
+    help=(
+        "The ID of the model to benchmark within the backend. "
+        "If None provided (default), then it will use the first model available."
+    ),
+)
+@click.option(
+    "--processor",
+    default=None,
+    type=str,
+    help=(
+        "The processor or tokenizer to use to calculate token counts for statistics "
+        "and synthetic data generation. If None provided (default), will load "
+        "using the model arg, if needed."
+    ),
+)
+@click.option(
+    "--processor-args",
+    default=None,
+    callback=parse_json,
+    help=(
+        "A JSON string containing any arguments to pass to the processor constructor "
+        "as a dict with **kwargs."
+    ),
+)
+@click.option(
+    "--data",
+    required=True,
+    type=str,
+    help=(
+        "The HuggingFace dataset ID, a path to a HuggingFace dataset, "
+        "a path to a data file csv, json, jsonl, or txt, "
+        "or a synthetic data config as a json or key=value string."
+    ),
+)
+@click.option(
+    "--data-args",
+    callback=parse_json,
+    help=(
+        "A JSON string containing any arguments to pass to the dataset creation "
+        "as a dict with **kwargs."
+    ),
+)
+@click.option(
+    "--data-sampler",
+    default=None,
+    type=click.Choice(["random"]),
+    help=(
+        "The data sampler type to use. 'random' will add a random shuffle on the data. "
+        "Defaults to None"
+    ),
+)
+@click.option(
+    "--rate-type",
+    required=True,
+    type=click.Choice(STRATEGY_PROFILE_CHOICES),
+    help=(
+        "The type of benchmark to run. "
+        f"Supported types {', '.join(STRATEGY_PROFILE_CHOICES)}. "
+    ),
+)
+@click.option(
+    "--rate",
+    default=None,
+    callback=parse_number_str,
+    help=(
+        "The rates to run the benchmark at. "
+        "Can be a single number or a comma-separated list of numbers. "
+        "For rate-type=sweep, this is the number of benchmarks it runs in the sweep. "
+        "For rate-type=concurrent, this is the number of concurrent requests. "
+        "For rate-type=async,constant,poisson, this is the rate requests per second. "
+        "For rate-type=synchronous,throughput, this must not be set."
+    ),
+)
+@click.option(
+    "--max-seconds",
+    type=float,
+    help=(
+        "The maximum number of seconds each benchmark can run for. "
+        "If None, will run until max_requests or the data is exhausted."
+    ),
+)
+@click.option(
+    "--max-requests",
+    type=int,
+    help=(
+        "The maximum number of requests each benchmark can run for. "
+        "If None, will run until max_seconds or the data is exhausted."
+    ),
+)
+@click.option(
+    "--warmup-percent",
+    type=float,
+    default=None,
+    help=(
+        "The percent of the benchmark (based on max-seconds, max-requets, "
+        "or lenth of dataset) to run as a warmup and not include in the final results. "
+        "Defaults to None."
+    ),
+)
+@click.option(
+    "--cooldown-percent",
+    type=float,
+    help=(
+        "The percent of the benchmark (based on max-seconds, max-requets, or lenth "
+        "of dataset) to run as a cooldown and not include in the final results. "
+        "Defaults to None."
+    ),
+)
+@click.option(
+    "--disable-progress",
+    is_flag=True,
+    help="Set this flag to disable progress updates to the console",
+)
+@click.option(
+    "--display-scheduler-stats",
+    is_flag=True,
+    help="Set this flag to display stats for the processes running the benchmarks",
+)
+@click.option(
+    "--disable-console-outputs",
+    is_flag=True,
+    help="Set this flag to disable console output",
+)
+@click.option(
+    "--output-path",
+    type=click.Path(),
+    default=Path.cwd() / "benchmarks.json",
+    help=(
+        "The path to save the output to. If it is a directory, "
+        "it will save benchmarks.json under it. "
+        "Otherwise, json, yaml, or csv files are supported for output types "
+        "which will be read from the extension for the file path."
+    ),
+)
+@click.option(
+    "--output-extras",
+    callback=parse_json,
+    help="A JSON string of extra data to save with the output benchmarks",
+)
+@click.option(
+    "--random-seed",
+    default=42,
+    type=int,
+    help="The random seed to use for benchmarking to ensure reproducibility.",
+)
+def benchmark(
+    target,
+    backend_type,
+    backend_args,
+    model,
+    processor,
+    processor_args,
+    data,
+    data_args,
+    data_sampler,
+    rate_type,
+    rate,
+    max_seconds,
+    max_requests,
+    warmup_percent,
+    cooldown_percent,
+    disable_progress,
+    display_scheduler_stats,
+    disable_console_outputs,
+    output_path,
+    output_extras,
+    random_seed,
+):
+    asyncio.run(
+        benchmark_generative_text(
+            target=target,
+            backend_type=backend_type,
+            backend_args=backend_args,
+            model=model,
+            processor=processor,
+            processor_args=processor_args,
+            data=data,
+            data_args=data_args,
+            data_sampler=data_sampler,
+            rate_type=rate_type,
+            rate=rate,
+            max_seconds=max_seconds,
+            max_requests=max_requests,
+            warmup_percent=warmup_percent,
+            cooldown_percent=cooldown_percent,
+            show_progress=not disable_progress,
+            show_progress_scheduler_stats=display_scheduler_stats,
+            output_console=not disable_console_outputs,
+            output_path=output_path,
+            output_extras=output_extras,
+            random_seed=random_seed,
+        )
+    )
+
+
+if __name__ == "__main__":
+    cli()
diff --git a/src/guidellm/backend/__init__.py b/src/guidellm/backend/__init__.py
index a45a66a7..8dc2ef8f 100644
--- a/src/guidellm/backend/__init__.py
+++ b/src/guidellm/backend/__init__.py
@@ -2,7 +2,7 @@
     Backend,
     BackendType,
 )
-from .openai import OpenAIHTTPBackend
+from .openai import CHAT_COMPLETIONS_PATH, TEXT_COMPLETIONS_PATH, OpenAIHTTPBackend
 from .response import (
     RequestArgs,
     ResponseSummary,
@@ -18,4 +18,6 @@
     "Backend",
     "BackendType",
     "OpenAIHTTPBackend",
+    "TEXT_COMPLETIONS_PATH",
+    "CHAT_COMPLETIONS_PATH",
 ]
diff --git a/src/guidellm/backend/backend.py b/src/guidellm/backend/backend.py
index e2b89f1e..ff80769a 100644
--- a/src/guidellm/backend/backend.py
+++ b/src/guidellm/backend/backend.py
@@ -1,4 +1,3 @@
-import asyncio
 from abc import ABC, abstractmethod
 from pathlib import Path
 from typing import Any, AsyncGenerator, Dict, List, Literal, Optional, Type, Union
@@ -102,27 +101,32 @@ def model(self) -> Optional[str]:
         """
         ...
 
-    def validate(self):
+    @property
+    @abstractmethod
+    def info(self) -> Dict[str, Any]:
+        """
+        :return: The information about the backend.
+        """
+        ...
+
+    async def validate(self):
         """
         Handle final setup and validate the backend is ready for use.
         If not successful, raises the appropriate exception.
         """
         logger.info("{} validating backend {}", self.__class__.__name__, self.type_)
-        self.check_setup()
-        models = self.available_models()
+        await self.check_setup()
+        models = await self.available_models()
         if not models:
             raise ValueError("No models available for the backend")
 
-        async def _test_request():
-            async for _ in self.text_completions(
-                prompt="Test connection", output_token_count=1
-            ):  # type: ignore[attr-defined]
-                pass
-
-        asyncio.run(_test_request())
+        async for _ in self.text_completions(
+            prompt="Test connection", output_token_count=1
+        ):  # type: ignore[attr-defined]
+            pass
 
     @abstractmethod
-    def check_setup(self):
+    async def check_setup(self):
         """
         Check the setup for the backend.
         If unsuccessful, raises the appropriate exception.
@@ -132,7 +136,17 @@ def check_setup(self):
         ...
 
     @abstractmethod
-    def available_models(self) -> List[str]:
+    async def prepare_multiprocessing(self):
+        """
+        Prepare the backend for use in a multiprocessing environment.
+        This is useful for backends that have instance state that can not
+        be shared across processes and should be cleared out and re-initialized
+        for each new process.
+        """
+        ...
+
+    @abstractmethod
+    async def available_models(self) -> List[str]:
         """
         Get the list of available models for the backend.
 
diff --git a/src/guidellm/backend/openai.py b/src/guidellm/backend/openai.py
index 7870a949..48bde08b 100644
--- a/src/guidellm/backend/openai.py
+++ b/src/guidellm/backend/openai.py
@@ -16,7 +16,11 @@
 )
 from guidellm.config import settings
 
-__all__ = ["OpenAIHTTPBackend"]
+__all__ = ["OpenAIHTTPBackend", "TEXT_COMPLETIONS_PATH", "CHAT_COMPLETIONS_PATH"]
+
+
+TEXT_COMPLETIONS_PATH = "/v1/completions"
+CHAT_COMPLETIONS_PATH = "/v1/chat/completions"
 
 
 @Backend.register("openai_http")
@@ -61,6 +65,17 @@ def __init__(
     ):
         super().__init__(type_="openai_http")
         self._target = target or settings.openai.base_url
+
+        if not self._target:
+            raise ValueError("Target URL must be provided for OpenAI HTTP backend.")
+
+        if self._target.endswith("/v1") or self._target.endswith("/v1/"):
+            # backwards compatability, strip v1 off
+            self._target = self._target[:-3]
+
+        if self._target.endswith("/"):
+            self._target = self._target[:-1]
+
         self._model = model
 
         api_key = api_key or settings.openai.api_key
@@ -77,6 +92,7 @@ def __init__(
             if max_output_tokens is not None
             else settings.openai.max_output_tokens
         )
+        self._async_client: Optional[httpx.AsyncClient] = None
 
     @property
     def target(self) -> str:
@@ -94,7 +110,23 @@ def model(self) -> Optional[str]:
         """
         return self._model
 
-    def check_setup(self):
+    @property
+    def info(self) -> Dict[str, Any]:
+        """
+        :return: The information about the backend.
+        """
+        return {
+            "max_output_tokens": self.max_output_tokens,
+            "timeout": self.timeout,
+            "http2": self.http2,
+            "authorization": bool(self.authorization),
+            "organization": self.organization,
+            "project": self.project,
+            "text_completions_path": TEXT_COMPLETIONS_PATH,
+            "chat_completions_path": CHAT_COMPLETIONS_PATH,
+        }
+
+    async def check_setup(self):
         """
         Check if the backend is setup correctly and can be used for requests.
         Specifically, if a model is not provided, it grabs the first available model.
@@ -103,7 +135,7 @@ def check_setup(self):
 
         :raises ValueError: If no models or the provided model is not available.
         """
-        models = self.available_models()
+        models = await self.available_models()
         if not models:
             raise ValueError(f"No models available for target: {self.target}")
 
@@ -115,24 +147,32 @@ def check_setup(self):
                 "{models} for target: {self.target}"
             )
 
-    def available_models(self) -> List[str]:
+    async def prepare_multiprocessing(self):
+        """
+        Prepare the backend for use in a multiprocessing environment.
+        Clears out the sync and async clients to ensure they are re-initialized
+        for each process.
+        """
+        if self._async_client is not None:
+            await self._async_client.aclose()
+            self._async_client = None
+
+    async def available_models(self) -> List[str]:
         """
         Get the available models for the target server using the OpenAI models endpoint:
         /v1/models
         """
         target = f"{self.target}/v1/models"
         headers = self._headers()
+        response = await self._get_async_client().get(target, headers=headers)
+        response.raise_for_status()
 
-        with httpx.Client(http2=self.http2, timeout=self.timeout) as client:
-            response = client.get(target, headers=headers)
-            response.raise_for_status()
+        models = []
 
-            models = []
+        for item in response.json()["data"]:
+            models.append(item["id"])
 
-            for item in response.json()["data"]:
-                models.append(item["id"])
-
-            return models
+        return models
 
     async def text_completions(  # type: ignore[override]
         self,
@@ -160,7 +200,6 @@ async def text_completions(  # type: ignore[override]
             a StreamingTextResponse for each received iteration,
             and a ResponseSummary for the final response.
         """
-
         logger.debug("{} invocation with args: {}", self.__class__.__name__, locals())
         headers = self._headers()
         payload = self._completions_payload(
@@ -171,7 +210,7 @@ async def text_completions(  # type: ignore[override]
 
         try:
             async for resp in self._iterative_completions_request(
-                type_="text",
+                type_="text_completions",
                 request_id=request_id,
                 request_prompt_tokens=prompt_token_count,
                 request_output_tokens=output_token_count,
@@ -246,7 +285,7 @@ async def chat_completions(  # type: ignore[override]
 
         try:
             async for resp in self._iterative_completions_request(
-                type_="chat",
+                type_="chat_completions",
                 request_id=request_id,
                 request_prompt_tokens=prompt_token_count,
                 request_output_tokens=output_token_count,
@@ -264,6 +303,21 @@ async def chat_completions(  # type: ignore[override]
             )
             raise ex
 
+    def _get_async_client(self) -> httpx.AsyncClient:
+        """
+        Get the async HTTP client for making requests.
+        If the client has not been created yet, it will create one.
+
+        :return: The async HTTP client.
+        """
+        if self._async_client is None:
+            client = httpx.AsyncClient(http2=self.http2, timeout=self.timeout)
+            self._async_client = client
+        else:
+            client = self._async_client
+
+        return client
+
     def _headers(self) -> Dict[str, str]:
         headers = {
             "Content-Type": "application/json",
@@ -372,19 +426,17 @@ def _create_chat_messages(
 
     async def _iterative_completions_request(
         self,
-        type_: Literal["text", "chat"],
+        type_: Literal["text_completions", "chat_completions"],
         request_id: Optional[str],
         request_prompt_tokens: Optional[int],
         request_output_tokens: Optional[int],
         headers: Dict,
         payload: Dict,
     ) -> AsyncGenerator[Union[StreamingTextResponse, ResponseSummary], None]:
-        target = f"{self.target}/v1/"
-
-        if type_ == "text":
-            target += "completions"
-        elif type_ == "chat":
-            target += "chat/completions"
+        if type_ == "text_completions":
+            target = f"{self.target}{TEXT_COMPLETIONS_PATH}"
+        elif type_ == "chat_completions":
+            target = f"{self.target}{CHAT_COMPLETIONS_PATH}"
         else:
             raise ValueError(f"Unsupported type: {type_}")
 
@@ -400,58 +452,72 @@ async def _iterative_completions_request(
             payload,
         )
 
-        async with httpx.AsyncClient(http2=self.http2, timeout=self.timeout) as client:
-            response_value = ""
-            response_prompt_count: Optional[int] = None
-            response_output_count: Optional[int] = None
-            iter_count = 0
-            start_time = time.time()
-            iter_time = start_time
-
-            yield StreamingTextResponse(
-                type_="start",
-                iter_count=iter_count,
-                delta="",
-                time=start_time,
-                request_id=request_id,
-            )
+        response_value = ""
+        response_prompt_count: Optional[int] = None
+        response_output_count: Optional[int] = None
+        iter_count = 0
+        start_time = time.time()
+        iter_time = start_time
+        first_iter_time: Optional[float] = None
+        last_iter_time: Optional[float] = None
+
+        yield StreamingTextResponse(
+            type_="start",
+            value="",
+            start_time=start_time,
+            first_iter_time=None,
+            iter_count=iter_count,
+            delta="",
+            time=start_time,
+            request_id=request_id,
+        )
 
-            async with client.stream(
-                "POST", target, headers=headers, json=payload
-            ) as stream:
-                stream.raise_for_status()
-
-                async for line in stream.aiter_lines():
-                    iter_time = time.time()
-                    logger.debug(
-                        "{} request: {} recieved iter response line: {}",
-                        self.__class__.__name__,
-                        request_id,
-                        line,
+        # reset start time after yielding start response to ensure accurate timing
+        start_time = time.time()
+
+        async with self._get_async_client().stream(
+            "POST", target, headers=headers, json=payload
+        ) as stream:
+            stream.raise_for_status()
+
+            async for line in stream.aiter_lines():
+                iter_time = time.time()
+                logger.debug(
+                    "{} request: {} recieved iter response line: {}",
+                    self.__class__.__name__,
+                    request_id,
+                    line,
+                )
+
+                if not line or not line.strip().startswith("data:"):
+                    continue
+
+                if line.strip() == "data: [DONE]":
+                    break
+
+                data = json.loads(line.strip()[len("data: ") :])
+                if delta := self._extract_completions_delta_content(type_, data):
+                    if first_iter_time is None:
+                        first_iter_time = iter_time
+                    last_iter_time = iter_time
+
+                    iter_count += 1
+                    response_value += delta
+
+                    yield StreamingTextResponse(
+                        type_="iter",
+                        value=response_value,
+                        iter_count=iter_count,
+                        start_time=start_time,
+                        first_iter_time=first_iter_time,
+                        delta=delta,
+                        time=iter_time,
+                        request_id=request_id,
                     )
 
-                    if not line or not line.strip().startswith("data:"):
-                        continue
-
-                    if line.strip() == "data: [DONE]":
-                        break
-
-                    data = json.loads(line.strip()[len("data: ") :])
-                    if delta := self._extract_completions_delta_content(type_, data):
-                        iter_count += 1
-                        response_value += delta
-
-                        yield StreamingTextResponse(
-                            type_="iter",
-                            iter_count=iter_count,
-                            delta=delta,
-                            time=iter_time,
-                            request_id=request_id,
-                        )
-
-                    if usage := self._extract_completions_usage(data):
-                        response_prompt_count = usage["prompt"]
-                        response_output_count = usage["output"]
+                if usage := self._extract_completions_usage(data):
+                    response_prompt_count = usage["prompt"]
+                    response_output_count = usage["output"]
 
         logger.info(
             "{} request: {} with headers: {} and payload: {} completed with: {}",
@@ -473,6 +539,8 @@ async def _iterative_completions_request(
             ),
             start_time=start_time,
             end_time=iter_time,
+            first_iter_time=first_iter_time,
+            last_iter_time=last_iter_time,
             iterations=iter_count,
             request_prompt_tokens=request_prompt_tokens,
             request_output_tokens=request_output_tokens,
@@ -483,15 +551,15 @@ async def _iterative_completions_request(
 
     @staticmethod
     def _extract_completions_delta_content(
-        type_: Literal["text", "chat"], data: Dict
+        type_: Literal["text_completions", "chat_completions"], data: Dict
     ) -> Optional[str]:
         if "choices" not in data or not data["choices"]:
             return None
 
-        if type_ == "text":
+        if type_ == "text_completions":
             return data["choices"][0]["text"]
 
-        if type_ == "chat":
+        if type_ == "chat_completions":
             return data["choices"][0]["delta"]["content"]
 
         raise ValueError(f"Unsupported type: {type_}")
diff --git a/src/guidellm/backend/response.py b/src/guidellm/backend/response.py
index 699f41cc..9dc74578 100644
--- a/src/guidellm/backend/response.py
+++ b/src/guidellm/backend/response.py
@@ -1,9 +1,9 @@
 from typing import Any, Dict, Literal, Optional
 
-from loguru import logger
-from pydantic import BaseModel, computed_field
+from pydantic import computed_field
 
 from guidellm.config import settings
+from guidellm.objects.pydantic import StandardBaseModel
 
 __all__ = [
     "StreamingResponseType",
@@ -16,11 +16,13 @@
 StreamingResponseType = Literal["start", "iter"]
 
 
-class StreamingTextResponse(BaseModel):
+class StreamingTextResponse(StandardBaseModel):
     """
     A model representing the response content for a streaming text request.
 
     :param type_: The type of the response; either 'start' or 'iter'.
+    :param value: The value of the response up to this iteration.
+    :param start_time: The time.time() the request started.
     :param iter_count: The iteration count for the response. For 'start' this is 0
         and for the first 'iter' it is 1.
     :param delta: The text delta added to the response for this stream iteration.
@@ -30,13 +32,16 @@ class StreamingTextResponse(BaseModel):
     """
 
     type_: StreamingResponseType
+    value: str
+    start_time: float
+    first_iter_time: Optional[float]
     iter_count: int
     delta: str
     time: float
     request_id: Optional[str] = None
 
 
-class RequestArgs(BaseModel):
+class RequestArgs(StandardBaseModel):
     """
     A model representing the arguments for a request to a backend.
     Biases towards an HTTP request, but can be used for other types of backends.
@@ -56,19 +61,28 @@ class RequestArgs(BaseModel):
     http2: Optional[bool] = None
 
 
-class ResponseSummary(BaseModel):
+class ResponseSummary(StandardBaseModel):
     """
     A model representing a summary of a backend request.
     Always returned as the final iteration of a streaming request.
 
     :param value: The final value returned from the request.
     :param request_args: The arguments used to make the request.
+    :param iterations: The number of iterations in the request.
     :param start_time: The time the request started.
     :param end_time: The time the request ended.
-    :param iterations: The number of iterations in the request.
-    :param prompt_tokens: The number of tokens in the prompt, if any usage was returned.
-    :param output_tokens: The number of tokens in the output, if any usage was returned.
+    :param first_iter_time: The time the first iteration was received.
+    :param last_iter_time: The time the last iteration was received.
+    :param request_prompt_tokens: The number of tokens measured in the prompt
+        for the request, if any.
+    :param request_output_tokens: The number of tokens enforced for the output
+        for the request, if any.
+    :param response_prompt_tokens: The number of tokens measured in the prompt
+        for the response, if any.
+    :param response_output_tokens: The number of tokens measured in the output
+        for the response, if any.
     :param request_id: The unique identifier for the request, if any.
+    :param error: The error message, if any, returned from making the request.
     """
 
     value: str
@@ -76,11 +90,14 @@ class ResponseSummary(BaseModel):
     iterations: int = 0
     start_time: float
     end_time: float
+    first_iter_time: Optional[float]
+    last_iter_time: Optional[float]
     request_prompt_tokens: Optional[int] = None
     request_output_tokens: Optional[int] = None
     response_prompt_tokens: Optional[int] = None
     response_output_tokens: Optional[int] = None
     request_id: Optional[str] = None
+    error: Optional[str] = None
 
     @computed_field  # type: ignore[misc]
     @property
@@ -91,21 +108,7 @@ def prompt_tokens(self) -> Optional[int]:
 
         :return: The number of tokens in the prompt, if any.
         """
-        if settings.preferred_prompt_tokens_source == "backend":
-            if self.response_prompt_tokens is None:
-                logger.warning(
-                    "Preferred prompt tokens source is backend, but no prompt token "
-                    f"values were returned with the response for {self}. "
-                    "Defulating to request_prompt_tokens (if available)."
-                )
-            return self.response_prompt_tokens or self.request_prompt_tokens
-        elif settings.preferred_prompt_tokens_source == "request":
-            if self.request_prompt_tokens is None:
-                logger.warning(
-                    "Preferred prompt tokens source is request, but no prompt token "
-                    f"values were returned with the request for {self}. "
-                    "Defulating to response_prompt_tokens (if available)."
-                )
+        if settings.preferred_prompt_tokens_source == "request":
             return self.request_prompt_tokens or self.response_prompt_tokens
 
         return self.response_prompt_tokens or self.request_prompt_tokens
@@ -119,21 +122,11 @@ def output_tokens(self) -> Optional[int]:
 
         :return: The number of tokens in the output, if any.
         """
-        if settings.preferred_output_tokens_source == "backend":
-            if self.response_output_tokens is None:
-                logger.warning(
-                    "Preferred output tokens source is backend, but no output token "
-                    f"values were returned with the response for {self}. "
-                    "Defulating to request_output_tokens (if available)."
-                )
-            return self.response_output_tokens or self.request_output_tokens
-        elif settings.preferred_output_tokens_source == "request":
-            if self.request_output_tokens is None:
-                logger.warning(
-                    "Preferred output tokens source is request, but no output token "
-                    f"values were returned with the request for {self}. "
-                    "Defulating to response_output_tokens (if available)."
-                )
+        if self.error is not None:
+            # error occurred, can't trust request tokens were all generated
+            return self.response_prompt_tokens
+
+        if settings.preferred_output_tokens_source == "request":
             return self.request_output_tokens or self.response_output_tokens
 
         return self.response_output_tokens or self.request_output_tokens
diff --git a/src/guidellm/benchmark/__init__.py b/src/guidellm/benchmark/__init__.py
new file mode 100644
index 00000000..dc100596
--- /dev/null
+++ b/src/guidellm/benchmark/__init__.py
@@ -0,0 +1,35 @@
+from .aggregator import AggregatorT, BenchmarkAggregator, GenerativeBenchmarkAggregator
+from .benchmark import Benchmark, BenchmarkT, GenerativeBenchmark
+from .benchmarker import Benchmarker, BenchmarkerResult, GenerativeBenchmarker
+from .entrypoints import benchmark_generative_text
+from .profile import (
+    AsyncProfile,
+    ConcurrentProfile,
+    Profile,
+    ProfileType,
+    SweepProfile,
+    SynchronousProfile,
+    ThroughputProfile,
+    create_profile,
+)
+
+__all__ = [
+    "AggregatorT",
+    "BenchmarkT",
+    "Benchmark",
+    "BenchmarkAggregator",
+    "GenerativeBenchmark",
+    "GenerativeBenchmarkAggregator",
+    "Benchmarker",
+    "BenchmarkerResult",
+    "GenerativeBenchmarker",
+    "AsyncProfile",
+    "ConcurrentProfile",
+    "Profile",
+    "ProfileType",
+    "SweepProfile",
+    "SynchronousProfile",
+    "ThroughputProfile",
+    "create_profile",
+    "benchmark_generative_text",
+]
diff --git a/src/guidellm/benchmark/aggregator.py b/src/guidellm/benchmark/aggregator.py
new file mode 100644
index 00000000..6bd69d28
--- /dev/null
+++ b/src/guidellm/benchmark/aggregator.py
@@ -0,0 +1,763 @@
+import time
+from abc import ABC, abstractmethod
+from pathlib import Path
+from typing import (
+    Any,
+    Dict,
+    Generic,
+    List,
+    Literal,
+    Optional,
+    Tuple,
+    TypeVar,
+    Union,
+)
+
+from pydantic import Field
+
+from guidellm.backend import ResponseSummary
+from guidellm.benchmark.benchmark import (
+    BenchmarkArgs,
+    BenchmarkRunStats,
+    BenchmarkT,
+    GenerativeBenchmark,
+    GenerativeTextErrorStats,
+    GenerativeTextResponseStats,
+)
+from guidellm.config import settings
+from guidellm.objects import (
+    RunningStats,
+    StandardBaseModel,
+    StatusBreakdown,
+    TimeRunningStats,
+)
+from guidellm.request import (
+    GenerationRequest,
+    GenerativeRequestLoaderDescription,
+    RequestLoaderDescription,
+)
+from guidellm.scheduler import (
+    GenerativeRequestsWorkerDescription,
+    RequestT,
+    ResponseT,
+    SchedulerRequestResult,
+    WorkerDescription,
+)
+from guidellm.utils import check_load_processor
+
+__all__ = [
+    "AggregatorT",
+    "BenchmarkAggregator",
+    "GenerativeBenchmarkAggregator",
+]
+
+
+class SchedulerRunningStats(StandardBaseModel):
+    """
+    The metrics for the scheduler stored as running statistics for easy calculations
+    of rates, averages, totals, etc.
+    """
+
+    created_requests: RunningStats = Field(
+        description=(
+            "The running statistics for the number of requests created for this "
+            "benchmark run. This includes all requests created, regardless of "
+            "their status."
+        ),
+        default_factory=RunningStats,
+    )
+    queued_requests: RunningStats = Field(
+        description=(
+            "The running statistics for the number of requests pending in queue "
+            "for this benchmark run. This includes requests that are waiting to "
+            "be scheduled."
+        ),
+        default_factory=RunningStats,
+    )
+    scheduled_requests: RunningStats = Field(
+        description=(
+            "The running statistics for the number of requests scheduled (actively "
+            "running but waiting for the desired start time) for this benchmark run."
+        ),
+        default_factory=RunningStats,
+    )
+    processing_requests: RunningStats = Field(
+        description=(
+            "The running statistics for the number of requests actively being "
+            "processed by the worker for this benchmark run."
+        ),
+        default_factory=RunningStats,
+    )
+    completed_requests: RunningStats = Field(
+        description=(
+            "The running statistics for the number of requests completed for this "
+            "benchmark run. This includes requests within the warmup and cooldown "
+            "period, if any, along with the final results."
+        ),
+        default_factory=RunningStats,
+    )
+
+
+class RequestsRunningStats(StandardBaseModel):
+    """
+    The metrics for requests that have succeeded, been canceled, or errored stored
+    as running statistics for easy calculations of rates, averages, totals, etc.
+    """
+
+    totals: StatusBreakdown[RunningStats, RunningStats, RunningStats, RunningStats] = (
+        Field(
+            description=(
+                "The running statistics for the total number of requests that "
+                "completed within the benchmark run."
+            ),
+            default_factory=lambda: StatusBreakdown(
+                successful=RunningStats(),
+                errored=RunningStats(),
+                incomplete=RunningStats(),
+                total=RunningStats(),
+            ),
+        )
+    )
+    queued_time: TimeRunningStats = Field(
+        description=(
+            "The running statistics for the time spent in queue for all requests that "
+            "completed within the benchmark run. This is the time from when the "
+            "request was created to when it was dequeued by the worker."
+        ),
+        default_factory=TimeRunningStats,
+    )
+    scheduled_time_delay: TimeRunningStats = Field(
+        description=(
+            "The running statistics for the time spent from when a request was "
+            "dequeued by the worker to when it was actually scheduled by the worker"
+            "for all requests that completed within the benchmark run. "
+            "This should be as close to 0 as possible, any additional time is "
+            "overheads from the system or the worker."
+        ),
+        default_factory=TimeRunningStats,
+    )
+    scheduled_time_sleep: TimeRunningStats = Field(
+        description=(
+            "The running statistics for the time for each request spent sleeping til "
+            "the desired start time was reached for all requests that completed within "
+            "the benchmark run. This is the time from when the request was scheduled "
+            "to when the desired start time was reached. "
+        ),
+        default_factory=TimeRunningStats,
+    )
+    worker_start_delay: TimeRunningStats = Field(
+        description=(
+            "The running statistics for the time delay between when the request was "
+            "scheduled and when the worker actually started processing subtracting any "
+            "sleep time for all requests that completed within the benchmark run. "
+            "This should be as close to 0 as possible, any additional time is "
+            "overheads from the system or the worker."
+        ),
+        default_factory=TimeRunningStats,
+    )
+    worker_time: TimeRunningStats = Field(
+        description=(
+            "The running statistics for the time spent processing all requests that "
+            "completed within the benchmark run. This is the time from when the "
+            "request was started to when it was completed."
+        ),
+        default_factory=TimeRunningStats,
+    )
+    worker_start_time_targeted_delay: TimeRunningStats = Field(
+        description=(
+            "The running statistics for the delay between the targeted start time and "
+            "the actual start time for requests that completed within the benchmark "
+            "run. This represents delays from the best case desired start time. "
+            "For async strategies, this represents delays from the ideal system. "
+            "For sync strategies, since those are doubled in queue, this should be "
+            "as close to the time for a request to be processed as possible."
+        ),
+        default_factory=TimeRunningStats,
+    )
+    request_start_time_delay: TimeRunningStats = Field(
+        description=(
+            "The running statistics for the delay between the actual request being "
+            "made and the time the worker started on the request for all requests "
+            "that completed within the benchmark run. This time should be as close to "
+            "0 as possible, any additional time is overhead from the system or "
+            "the worker."
+        ),
+        default_factory=TimeRunningStats,
+    )
+    request_start_time_targeted_delay: TimeRunningStats = Field(
+        description=(
+            "The running statistics for the delay between the targeted start time and "
+            "the actual start time for all requests that completed within the "
+            "benchmark run. This represents delays from the best case desired start "
+            "time. For async strategies, this represents delays from the ideal system. "
+            "For sync strategies, since those are duplicated in queue, this should be "
+            "as close to the time for a request to be processed."
+        ),
+        default_factory=TimeRunningStats,
+    )
+    request_time_delay: TimeRunningStats = Field(
+        description=(
+            "The running statistics for the delay in time between the total request "
+            "time and the worker time. This should be as close to 0 as possible, any "
+            "additional time is overhead from the system or the worker. "
+        ),
+        default_factory=TimeRunningStats,
+    )
+    request_time: TimeRunningStats = Field(
+        description=(
+            "The running statistics for the time spent processing all requests that "
+            "completed within the benchmark run. This is the time from when the "
+            "request was created to when it was completed."
+        ),
+        default_factory=TimeRunningStats,
+    )
+
+
+class BenchmarkAggregator(
+    ABC, StandardBaseModel, Generic[BenchmarkT, RequestT, ResponseT]
+):
+    """
+    A pydantic base class representing the base class for aggregating benchmark results.
+    The purpose is to receive and process results from a Benchmarker as it iterates
+    through a Scheduler for an individual benchmark run.
+    As results are added, lightweight statistics are updated and stored for immediate
+    progress and informational updates to the caller.
+    Once the benchmark run is complete, the `compile` method is called to finalize
+    the benchmark and return a Benchmark object with all the results and statistics
+    fully calculated.
+    """
+
+    type_: Literal["benchmark_aggregator"] = "benchmark_aggregator"
+    run_id: str = Field(
+        description=(
+            "The unique identifier for the encompasing benchmark run that this "
+            "benchmark was a part of."
+        )
+    )
+    args: BenchmarkArgs = Field(
+        description=(
+            "The arguments used to create the benchmark run that this benchmark was "
+            "a part of."
+        )
+    )
+    worker_description: Union[
+        GenerativeRequestsWorkerDescription, WorkerDescription
+    ] = Field(
+        description=(
+            "The description and specifics for the worker used to resolve requests "
+            "for this benchmark."
+        ),
+        discriminator="type_",
+    )
+    request_loader_description: Union[
+        GenerativeRequestLoaderDescription, RequestLoaderDescription
+    ] = Field(
+        description=(
+            "The description and specifics for the request loader used to create "
+            "requests for this benchmark."
+        ),
+        discriminator="type_",
+    )
+    extras: Dict[str, Any] = Field(
+        description=(
+            "Any additional information or metadata that was passed for this benchmark."
+        )
+    )
+    in_warmup: bool = Field(
+        description=(
+            "A flag to indicate if the benchmark is currently in the warmup phase."
+        ),
+        default=False,
+        exclude=True,
+    )
+    in_cooldown: bool = Field(
+        description=(
+            "A flag to indicate if the benchmark is currently in the cooldown phase."
+        ),
+        default=False,
+        exclude=True,
+    )
+    scheduler_stats: SchedulerRunningStats = Field(
+        description=(
+            "The running statistics for the scheduler for this benchmark run. "
+            "This includes all requests created, regardless of their status."
+        ),
+        default_factory=SchedulerRunningStats,
+    )
+    requests_stats: RequestsRunningStats = Field(
+        description=(
+            "The running statistics for the requests for this benchmark run. "
+            "This includes all requests created, regardless of their status."
+        ),
+        default_factory=RequestsRunningStats,
+    )
+    results: StatusBreakdown[
+        List[SchedulerRequestResult[RequestT, ResponseT]],
+        List[SchedulerRequestResult[RequestT, ResponseT]],
+        List[SchedulerRequestResult[RequestT, ResponseT]],
+        None,
+    ] = Field(
+        description=(
+            "The completed requests for this benchmark run broken down by status"
+            "and excluding warmup and cooldown requests."
+        ),
+        default_factory=lambda: StatusBreakdown(  # type: ignore[arg-type]
+            successful=[],
+            errored=[],
+            incomplete=[],
+            total=None,
+        ),
+    )
+
+    def add_result(
+        self,
+        result: SchedulerRequestResult[RequestT, ResponseT],
+    ) -> bool:
+        """
+        Add a result to the aggregator. This will update the internal statistics
+        and add the result to the list of results if it is not within the warmup or
+        cooldown period.
+
+        :param result: The result to add to the aggregator.
+        :return: True if the result was added, False if it was added because it
+            did not fit within the warmup or cooldown period, was not requested,
+            or is not finished
+        """
+        # Add scheduler statistics
+        self.scheduler_stats.created_requests += max(
+            0, result.run_info.created_requests
+        )
+        self.scheduler_stats.queued_requests += max(0, result.run_info.queued_requests)
+        self.scheduler_stats.scheduled_requests += max(
+            0, result.run_info.scheduled_requests
+        )
+        self.scheduler_stats.processing_requests += max(
+            0, result.run_info.processing_requests
+        )
+        self.scheduler_stats.completed_requests += max(
+            0, result.run_info.completed_requests
+        )
+
+        if result.type_ != "request_complete" or (
+            result.request_info.canceled and not result.request_info.requested
+        ):
+            # If the result is not completed yet, don't add to the results
+            # If the result was canceled and not started, ignore it
+            return False
+
+        # Add request statistics
+        self.requests_stats.totals.total += 1
+        if result.request_info.canceled:
+            self.requests_stats.totals.incomplete += 1
+        elif result.request_info.errored:
+            self.requests_stats.totals.errored += 1
+        elif result.request_info.completed:
+            self.requests_stats.totals.successful += 1
+        else:
+            raise ValueError(
+                "Unexpected state: request_info must be either "
+                "completed, canceled, or errored. "
+                f"Got {result.request_info}"
+            )
+
+        self.requests_stats.queued_time.update(
+            result.request_info.dequeued_time - result.request_info.queued_time
+        )
+        self.requests_stats.scheduled_time_delay.update(
+            result.request_info.scheduled_time - result.request_info.dequeued_time
+        )
+        sleep_time = max(
+            0.0,
+            result.request_info.targeted_start_time
+            - result.request_info.scheduled_time,
+        )
+        self.requests_stats.scheduled_time_sleep.update(sleep_time)
+        time_to_worker_start = (
+            result.request_info.worker_start - result.request_info.scheduled_time
+        )
+        self.requests_stats.worker_start_delay.update(time_to_worker_start - sleep_time)
+        self.requests_stats.worker_time.update(
+            result.request_info.worker_end - result.request_info.worker_start
+        )
+        self.requests_stats.worker_start_time_targeted_delay.update(
+            result.request_info.worker_start - result.request_info.targeted_start_time
+        )
+        self.requests_stats.request_start_time_delay.update(
+            result.request_info.worker_start - result.request_info.targeted_start_time
+        )
+        self.requests_stats.request_start_time_targeted_delay.update(
+            result.request_info.worker_start - result.request_info.targeted_start_time
+        )
+        self.requests_stats.request_time_delay.update(
+            (result.request_info.worker_end - result.request_info.worker_start)
+            - (result.request_info.worker_end - result.request_info.worker_start)
+        )
+        self.requests_stats.request_time.update(
+            result.request_info.worker_end - result.request_info.worker_start
+        )
+
+        # Add result to the list of results provided we are not in warmup or cooldown
+        total_completed = self.requests_stats.totals.total.total
+        global_start_time = self.requests_stats.totals.total.start_time
+
+        in_warmup_number = (
+            self.args.warmup_number and total_completed <= self.args.warmup_number
+        )
+        in_warmup_duration = (
+            self.args.warmup_duration
+            and result.request_info.worker_start
+            <= (global_start_time - self.args.warmup_duration)
+        )
+
+        if in_warmup_number or in_warmup_duration:
+            self.in_warmup = True
+            return True
+
+        self.in_warmup = False
+        in_cooldown_number = (
+            self.args.cooldown_number
+            and self.args.max_number
+            and total_completed > self.args.max_number - self.args.cooldown_number
+        )
+        in_cooldown_duration = (
+            self.args.cooldown_duration
+            and self.args.max_duration
+            and result.request_info.worker_start
+            > global_start_time + self.args.max_duration - self.args.cooldown_duration
+        )
+
+        if in_cooldown_number or in_cooldown_duration:
+            self.in_cooldown = True
+            return True
+
+        self.in_cooldown = False
+
+        if result.request_info.canceled:
+            self.results.incomplete.append(result)
+        elif result.request_info.errored:
+            self.results.errored.append(result)
+        elif result.request_info.completed:
+            self.results.successful.append(result)
+        else:
+            raise ValueError(
+                "Unexpected state: request_info must be either "
+                "completed, canceled, or errored. "
+                f"Got {result.request_info}"
+            )
+
+        return True
+
+    @abstractmethod
+    def compile(self) -> BenchmarkT:
+        """
+        Compile the benchmark results and statistics into a Benchmark object.
+        This is required to be implemented by subclasses to finalize the benchmark
+        and return the compiled object.
+        """
+        ...
+
+
+AggregatorT = TypeVar("AggregatorT", bound=BenchmarkAggregator)
+
+
+class GenerativeRequestsRunningStats(RequestsRunningStats):
+    """
+    The metrics for generative requests that have succeeded, been canceled, or errored
+    stored as running statistics for easy calculations of rates, averages, totals, etc.
+    """
+
+    time_to_first_token: TimeRunningStats = Field(
+        description=(
+            "The running statistics for the time from the start of the request to the "
+            "first token being generated for all requests that completed within the "
+            "benchmark run."
+        ),
+        default_factory=TimeRunningStats,
+    )
+    inter_token_latency: TimeRunningStats = Field(
+        description=(
+            "The running statistics for the time between each token being generated "
+            "for all requests that completed within the benchmark run."
+        ),
+        default_factory=TimeRunningStats,
+    )
+    prompt_tokens: RunningStats = Field(
+        description=(
+            "The running statistics for the token count for the prompt for all "
+            "requests that completed, if available in the response."
+        ),
+        default_factory=RunningStats,
+    )
+    output_tokens: RunningStats = Field(
+        description=(
+            "The running statistics for the token count for the output for all "
+            "requests that completed, if available in the response."
+        ),
+        default_factory=RunningStats,
+    )
+    total_tokens: RunningStats = Field(
+        description=(
+            "The running statistics for the total token count for all requests that "
+            "completed, if available in the response."
+        ),
+        default_factory=RunningStats,
+    )
+
+
+class GenerativeBenchmarkAggregator(
+    BenchmarkAggregator[GenerativeBenchmark, GenerationRequest, ResponseSummary]
+):
+    type_: Literal["generative_benchmark_aggregator"] = (
+        "generative_benchmark_aggregator"  # type: ignore[assignment]
+    )
+    processor: Optional[Union[str, Path, Any]] = Field(
+        description=(
+            "The tokenizer to use for calculating token counts when none are "
+            "avaiable that match the preferred source."
+        )
+    )
+    processor_args: Optional[Dict[str, Any]] = Field(
+        description=(
+            "Additional arguments to pass to the tokenizer if it requires "
+            "any specific configuration for loading or processing."
+        ),
+    )
+    worker_description: GenerativeRequestsWorkerDescription = Field(
+        description=(
+            "The description and specifics for the worker used to resolve requests "
+            "for this benchmark."
+        ),
+        discriminator="type_",
+    )
+    request_loader_description: GenerativeRequestLoaderDescription = Field(
+        description=(
+            "The description and specifics for the request loader used to create "
+            "requests for this benchmark."
+        ),
+        discriminator="type_",
+    )
+    requests_stats: GenerativeRequestsRunningStats = Field(
+        description=(
+            "The running statistics for the requests for this benchmark run. "
+            "This includes all requests created, regardless of their status."
+        ),
+        default_factory=GenerativeRequestsRunningStats,
+    )
+
+    def add_result(
+        self, result: SchedulerRequestResult[GenerationRequest, ResponseSummary]
+    ) -> bool:
+        """
+        Add a result to the aggregator. This will update the internal statistics
+        and add the result to the list of results if it is not within the warmup or
+        cooldown period.
+
+        :param result: The result to add to the aggregator.
+        """
+        if not super().add_result(result):
+            return False
+
+        if result.request is None:
+            raise ValueError("Request is None, cannot add result.")
+
+        if result.response is None:
+            raise ValueError("Response is None, cannot add result.")
+
+        self.requests_stats.request_start_time_delay.update(
+            result.response.start_time - result.request_info.worker_start
+        )
+        self.requests_stats.request_start_time_targeted_delay.update(
+            result.response.start_time - result.request_info.targeted_start_time
+        )
+        self.requests_stats.request_time_delay.update(
+            (result.response.start_time - result.request_info.worker_start)
+            + result.request_info.worker_end
+            - result.response.end_time
+        )
+        self.requests_stats.request_time.update(
+            result.response.end_time - result.response.start_time
+        )
+        if result.response.first_iter_time:
+            self.requests_stats.time_to_first_token.update(
+                result.response.first_iter_time - result.response.start_time
+            )
+        if result.response.last_iter_time and result.response.first_iter_time:
+            self.requests_stats.inter_token_latency.update(
+                result.response.last_iter_time - result.response.first_iter_time,
+                count=(result.response.output_tokens or 1) - 1,
+            )
+        self.requests_stats.prompt_tokens += result.response.request_prompt_tokens or 0
+        self.requests_stats.output_tokens += result.response.request_output_tokens or 0
+        total_tokens = (result.response.request_prompt_tokens or 0) + (
+            result.response.request_output_tokens or 0
+        )
+        self.requests_stats.total_tokens += total_tokens
+
+        return True
+
+    def compile(self) -> GenerativeBenchmark:
+        """
+        Compile the benchmark results and statistics into a GenerativeBenchmark object.
+        This is required to be implemented by subclasses to finalize the benchmark
+        and return the compiled object.
+        """
+        successful, incomplete, errored = self._compile_results()
+
+        return GenerativeBenchmark.from_stats(
+            run_id=self.run_id,
+            successful=successful,
+            incomplete=incomplete,
+            errored=errored,
+            args=self.args,
+            run_stats=BenchmarkRunStats(
+                start_time=self.requests_stats.totals.total.start_time,
+                end_time=time.time(),
+                requests_made=StatusBreakdown(
+                    successful=int(self.requests_stats.totals.successful.total),
+                    errored=int(self.requests_stats.totals.errored.total),
+                    incomplete=int(self.requests_stats.totals.incomplete.total),
+                    total=int(self.requests_stats.totals.total.total),
+                ),
+                queued_time_avg=self.requests_stats.queued_time.mean,
+                scheduled_time_delay_avg=self.requests_stats.scheduled_time_delay.mean,
+                scheduled_time_sleep_avg=self.requests_stats.scheduled_time_sleep.mean,
+                worker_start_delay_avg=self.requests_stats.worker_start_delay.mean,
+                worker_time_avg=self.requests_stats.worker_time.mean,
+                worker_start_time_targeted_delay_avg=self.requests_stats.worker_start_time_targeted_delay.mean,
+                request_start_time_delay_avg=self.requests_stats.request_start_time_delay.mean,
+                request_start_time_targeted_delay_avg=self.requests_stats.request_start_time_targeted_delay.mean,
+                request_time_delay_avg=self.requests_stats.request_time_delay.mean,
+                request_time_avg=self.requests_stats.request_time.mean,
+            ),
+            worker=self.worker_description,
+            requests_loader=self.request_loader_description,
+            extras=self.extras,
+        )
+
+    def _compile_results(
+        self,
+    ) -> Tuple[
+        List[GenerativeTextResponseStats],
+        List[GenerativeTextErrorStats],
+        List[GenerativeTextErrorStats],
+    ]:
+        successful: List[GenerativeTextResponseStats] = [
+            GenerativeTextResponseStats(
+                request_id=result.request.request_id,
+                request_type=result.request.request_type,
+                scheduler_info=result.request_info,
+                prompt=str(result.request.content),
+                prompt_tokens=self._compile_tokens_count(
+                    value=str(result.request.content),
+                    requests_tokens=result.response.request_prompt_tokens,
+                    response_tokens=result.response.response_prompt_tokens,
+                    preferred_tokens_source=settings.preferred_prompt_tokens_source,
+                    errored=False,
+                ),
+                output=result.response.value,
+                output_tokens=self._compile_tokens_count(
+                    value=result.response.value,
+                    requests_tokens=result.response.request_output_tokens,
+                    response_tokens=result.response.response_output_tokens,
+                    preferred_tokens_source=settings.preferred_output_tokens_source,
+                    errored=False,
+                ),
+                start_time=result.response.start_time,
+                end_time=result.response.end_time,
+                first_token_time=result.response.first_iter_time or -1.0,
+                last_token_time=result.response.last_iter_time or -1.0,
+            )
+            for result in self.results.successful
+            if result.request and result.response
+        ]
+        incomplete: List[GenerativeTextErrorStats] = [
+            GenerativeTextErrorStats(
+                error=result.response.error or "",
+                request_id=result.request.request_id,
+                request_type=result.request.request_type,
+                scheduler_info=result.request_info,
+                prompt=str(result.request.content),
+                prompt_tokens=self._compile_tokens_count(
+                    value=str(result.request.content),
+                    requests_tokens=result.response.request_prompt_tokens,
+                    response_tokens=result.response.response_prompt_tokens,
+                    preferred_tokens_source=settings.preferred_prompt_tokens_source,
+                    errored=True,
+                ),
+                output=result.response.value,
+                output_tokens=self._compile_tokens_count(
+                    value=result.response.value,
+                    requests_tokens=result.response.request_output_tokens,
+                    response_tokens=result.response.response_output_tokens,
+                    preferred_tokens_source=settings.preferred_output_tokens_source,
+                    errored=True,
+                ),
+                start_time=result.response.start_time,
+                end_time=result.response.end_time,
+                first_token_time=result.response.first_iter_time,
+                last_token_time=result.response.last_iter_time,
+            )
+            for result in self.results.incomplete
+            if result.request and result.response
+        ]
+        error: List[GenerativeTextErrorStats] = [
+            GenerativeTextErrorStats(
+                error=result.response.error or "",
+                request_id=result.request.request_id,
+                request_type=result.request.request_type,
+                scheduler_info=result.request_info,
+                prompt=str(result.request.content),
+                prompt_tokens=self._compile_tokens_count(
+                    value=str(result.request.content),
+                    requests_tokens=result.response.request_prompt_tokens,
+                    response_tokens=result.response.response_prompt_tokens,
+                    preferred_tokens_source=settings.preferred_prompt_tokens_source,
+                    errored=True,
+                ),
+                output=result.response.value,
+                output_tokens=self._compile_tokens_count(
+                    value=result.response.value,
+                    requests_tokens=result.response.request_output_tokens,
+                    response_tokens=result.response.response_output_tokens,
+                    preferred_tokens_source=settings.preferred_output_tokens_source,
+                    errored=True,
+                ),
+                start_time=result.response.start_time,
+                end_time=result.response.end_time,
+                first_token_time=result.response.first_iter_time,
+                last_token_time=result.response.last_iter_time,
+            )
+            for result in self.results.errored
+            if result.request and result.response
+        ]
+
+        return successful, incomplete, error
+
+    def _compile_tokens_count(
+        self,
+        value: str,
+        requests_tokens: Optional[int],
+        response_tokens: Optional[int],
+        preferred_tokens_source: Optional[Literal["request", "response", "local"]],
+        errored: bool,
+    ) -> int:
+        if not errored and preferred_tokens_source == "response" and response_tokens:
+            return response_tokens or 0
+
+        if not errored and preferred_tokens_source == "request" and requests_tokens:
+            return requests_tokens or 0
+
+        if preferred_tokens_source in {"response", "request"} and (
+            self.processor is None or errored or response_tokens or requests_tokens
+        ):
+            # we had a preferred tokens source that isn't local and we either
+            # have the data to return something or we don't have the ability
+            # to calculate locally
+            return response_tokens or requests_tokens or 0
+
+        self.processor = check_load_processor(
+            self.processor,
+            processor_args=self.processor_args,
+            error_msg="Processor/Tokenizer is required for calculating token counts.",
+        )
+        return len(self.processor.tokenize(value))
diff --git a/src/guidellm/benchmark/benchmark.py b/src/guidellm/benchmark/benchmark.py
new file mode 100644
index 00000000..f1f9187c
--- /dev/null
+++ b/src/guidellm/benchmark/benchmark.py
@@ -0,0 +1,828 @@
+import random
+import uuid
+from typing import Any, Dict, List, Literal, Optional, TypeVar, Union
+
+from pydantic import Field, computed_field
+
+from guidellm.benchmark.profile import (
+    AsyncProfile,
+    ConcurrentProfile,
+    Profile,
+    SweepProfile,
+    SynchronousProfile,
+    ThroughputProfile,
+)
+from guidellm.objects import (
+    StandardBaseModel,
+    StatusBreakdown,
+    StatusDistributionSummary,
+)
+from guidellm.request import (
+    GenerativeRequestLoaderDescription,
+    RequestLoaderDescription,
+)
+from guidellm.scheduler import (
+    AsyncConstantStrategy,
+    AsyncPoissonStrategy,
+    ConcurrentStrategy,
+    GenerativeRequestsWorkerDescription,
+    SchedulerRequestInfo,
+    SchedulingStrategy,
+    SynchronousStrategy,
+    ThroughputStrategy,
+    WorkerDescription,
+)
+
+__all__ = [
+    "BenchmarkT",
+    "StatusBreakdown",
+    "BenchmarkArgs",
+    "BenchmarkRunStats",
+    "Benchmark",
+    "BenchmarkMetrics",
+    "GenerativeTextResponseStats",
+    "GenerativeTextErrorStats",
+    "GenerativeMetrics",
+    "GenerativeBenchmark",
+]
+
+
+class BenchmarkArgs(StandardBaseModel):
+    """
+    A serializable model representing the arguments used to specify a benchmark run
+    and how data was collected for it.
+    """
+
+    profile: Union[
+        AsyncProfile,
+        SweepProfile,
+        ConcurrentProfile,
+        ThroughputProfile,
+        SynchronousProfile,
+        Profile,
+    ] = Field(
+        description=(
+            "The profile used for the entire benchmark run that the strategy for "
+            "this benchmark was pulled from."
+        ),
+        discriminator="type_",
+    )
+    strategy_index: int = Field(
+        description=(
+            "The index of the strategy in the profile that was used for this benchmark."
+        )
+    )
+    strategy: Union[
+        ConcurrentStrategy,
+        SchedulingStrategy,
+        ThroughputStrategy,
+        SynchronousStrategy,
+        AsyncPoissonStrategy,
+        AsyncConstantStrategy,
+        SchedulingStrategy,
+    ] = Field(
+        description="The scheduling strategy used to run this benchmark. ",
+        discriminator="type_",
+    )
+    max_number: Optional[int] = Field(
+        description="The maximum number of requests to run for this benchmark, if any."
+    )
+    max_duration: Optional[float] = Field(
+        description="The maximum duration in seconds to run this benchmark, if any."
+    )
+    warmup_number: Optional[int] = Field(
+        description=(
+            "The number of requests to run for the warmup phase of this benchmark, "
+            "if any. These are requests that were not included in the final results."
+        )
+    )
+    warmup_duration: Optional[float] = Field(
+        description=(
+            "The duration in seconds to run for the warmup phase of this benchmark, "
+            "if any. These are requests that were not included in the final results."
+        )
+    )
+    cooldown_number: Optional[int] = Field(
+        description=(
+            "The number of requests to run for the cooldown phase of this benchmark, "
+            "if any. These are requests that were not included in the final results."
+        )
+    )
+    cooldown_duration: Optional[float] = Field(
+        description=(
+            "The duration in seconds to run for the cooldown phase of this benchmark, "
+            "if any. These are requests that were not included in the final results."
+        )
+    )
+
+
+class BenchmarkRunStats(StandardBaseModel):
+    """
+    A serializable model representing the run process statistics for the
+    entire benchmark run across all requests including warmup and cooldown.
+    """
+
+    start_time: float = Field(
+        description="The start time of the benchmark run.",
+    )
+    end_time: float = Field(
+        description="The end time of the benchmark run.",
+    )
+    requests_made: StatusBreakdown[int, int, int, int] = Field(
+        description=(
+            "The number of requests made for the benchmark run broken down by "
+            "status including successful, incomplete, errored, and the sum of all three"
+        )
+    )
+    queued_time_avg: float = Field(
+        description=(
+            "The average time spent in the queue for each request in the benchmark "
+            "run until it was dequeued by a worker."
+        )
+    )
+    scheduled_time_delay_avg: float = Field(
+        description=(
+            "The average time delay between when a request was dequeued and when it "
+            "was scheduled to be processed by a worker in the benchmark run. "
+            "This should be as close to 0 as possible, any additional time is "
+            "overheads from the system or the worker."
+        )
+    )
+    scheduled_time_sleep_avg: float = Field(
+        description=(
+            "The average time spent sleeping til the desired start time was reached "
+            "after being scheduled by the worker in the benchmark run."
+        )
+    )
+    worker_start_delay_avg: float = Field(
+        description=(
+            "The average time delay between when a request was scheduled and when "
+            "the worker started processing it in the benchmark run. "
+            "This should be as close to 0 as possible, any additional time is "
+            "overheads from the system or the worker."
+        )
+    )
+    worker_time_avg: float = Field(
+        description=(
+            "The average time taken by the worker to process each request in the "
+            "benchmark run. This includes the time to generate the response and "
+            "any additional processing time."
+        )
+    )
+    worker_start_time_targeted_delay_avg: float = Field(
+        description=(
+            "The average time delay between when a request was targeted to start "
+            "and when the worker actually started processing it in the benchmark "
+            "run. For async strategies, this represents delays from the ideal "
+            "system. For sync strategies, since those are doubled in queue, "
+            "this should be as close to the time for a request to be processed "
+            "as possible. Any additional time is overhead from the system or "
+            "the worker."
+        )
+    )
+    request_start_time_delay_avg: float = Field(
+        description=(
+            "The average time delay between the actual request being made "
+            "and the time the worker started on the request for all requests "
+            "that completed within the benchmark run. This time should be as close "
+            "to 0 as possible, any additional time is overhead from the system or "
+            "the worker."
+        )
+    )
+    request_start_time_targeted_delay_avg: float = Field(
+        description=(
+            "The average time delay between when the targeted start time and "
+            "the actual start time for each request in the benchmark run. "
+            "For async strategies, this represents delays from the ideal "
+            "system. For sync strategies, this should be as close to the "
+            "time for a request to be processed as possible. Any additional "
+            "time is overhead from the system or the worker."
+        )
+    )
+    request_time_delay_avg: float = Field(
+        description=(
+            "The average time delay between the total request time and the "
+            "worker time. This should be as close to 0 as possible, any additional "
+            "time is overhead from the system or the worker. "
+        )
+    )
+    request_time_avg: float = Field(
+        description=(
+            "The average time spent processing all requests in the benchmark run. "
+            "This is the time from when the actual request was started to when "
+            "it was completed."
+        )
+    )
+
+
+class BenchmarkMetrics(StandardBaseModel):
+    """
+    A serializable model representing the metrics for a benchmark run.
+    """
+
+    requests_per_second: StatusDistributionSummary = Field(
+        description="The distribution of requests per second for the benchmark.",
+    )
+    request_concurrency: StatusDistributionSummary = Field(
+        description="The distribution of requests concurrency for the benchmark.",
+    )
+
+
+class Benchmark(StandardBaseModel):
+    """
+    The base serializable model representing a benchmark run and its results.
+    Specific benchmarker implementations should extend this model to include
+    additional information or metadata as needed.
+
+    Note, requests_per_second and request_concurrency are kept at this level
+    and are expected to be populated by the subclass implementation to ensure
+    the logic for Profiles can include more complicated logic for determining
+    what rates and concurrency values to use for subsequent strategies.
+    """
+
+    type_: Literal["benchmark"] = "benchmark"
+    id_: str = Field(
+        default_factory=lambda: str(uuid.uuid4()),
+        description="The unique identifier for the benchmark.",
+    )
+    run_id: str = Field(
+        description=(
+            "The unique identifier for the encompasing benchmark run that this "
+            "benchmark was a part of."
+        )
+    )
+    args: BenchmarkArgs = Field(
+        description=(
+            "The arguments used to specify how to run the benchmark and collect data."
+        )
+    )
+    run_stats: BenchmarkRunStats = Field(
+        description=(
+            "The process statistics for the entire benchmark run across all requests."
+        )
+    )
+    worker: Union[WorkerDescription] = Field(
+        description=(
+            "The description and specifics for the worker used to resolve requests "
+            "for this benchmark."
+        ),
+    )
+    request_loader: Union[RequestLoaderDescription] = Field(
+        description=(
+            "The description and specifics for the request loader used to create "
+            "requests for this benchmark."
+        ),
+    )
+    extras: Dict[str, Any] = Field(
+        description=(
+            "Any additional information or metadata that was passed for this benchmark."
+        )
+    )
+    metrics: BenchmarkMetrics = Field(
+        description=(
+            "The metrics for the benchmark run represented as a distribution of "
+            "various per-request statistics."
+        ),
+    )
+
+
+BenchmarkT = TypeVar("BenchmarkT", bound=Benchmark)
+
+
+class GenerativeTextResponseStats(StandardBaseModel):
+    """
+    A serializable model representing the request values, response values, and
+    statistics for a generative text response.
+    """
+
+    type_: Literal["generative_text_response"] = "generative_text_response"
+    request_id: Optional[str] = Field(
+        description="The unique identifier for the request.",
+    )
+    request_type: Literal["text_completions", "chat_completions"] = Field(
+        description="The type of request made to the generative backend."
+    )
+    scheduler_info: SchedulerRequestInfo = Field(
+        description=(
+            "The info about the request from the scheduler about how it was run."
+        ),
+    )
+    prompt: str = Field(
+        description="The text prompt used for the generative request.",
+    )
+    output: str = Field(
+        description="The generated text output from the generative request.",
+    )
+    prompt_tokens: int = Field(
+        description="The number of tokens in the prompt text.",
+    )
+    output_tokens: int = Field(
+        description="The number of tokens in the generated output text.",
+    )
+    start_time: float = Field(
+        description="The time the request started.",
+    )
+    end_time: float = Field(
+        description="The time the request ended.",
+    )
+    first_token_time: float = Field(
+        description="The time the first token was received.",
+    )
+    last_token_time: float = Field(
+        description="The time the last token was received.",
+    )
+
+    @computed_field  # type: ignore[misc]
+    @property
+    def request_latency(self) -> float:
+        """
+        :return: The duration of the request in seconds from the start to the end.
+        """
+        return self.end_time - self.start_time
+
+    @computed_field  # type: ignore[misc]
+    @property
+    def time_to_first_token_ms(self) -> float:
+        """
+        :return: The time in milliseconds from the start of the request to the first
+            token received.
+        """
+        return 1000 * (self.first_token_time - self.start_time)
+
+    @computed_field  # type: ignore[misc]
+    @property
+    def time_per_output_token_ms(self) -> float:
+        """
+        :return: The average time in milliseconds per output token generated.
+            This includes the time to generate the first token and all other tokens.
+        """
+        if self.output_tokens == 0:
+            return 0.0
+
+        return (
+            1000 * (self.last_token_time - self.first_token_time) / self.output_tokens
+        )
+
+    @computed_field  # type: ignore[misc]
+    @property
+    def inter_token_latency_ms(self) -> float:
+        """
+        :return: The average time in milliseconds between generating tokens in the
+            output text. Note, does not include the time to generate the first token.
+        """
+        if self.output_tokens <= 1:
+            return 0.0
+
+        return (
+            1000
+            * (self.last_token_time - self.first_token_time)
+            / (self.output_tokens - 1)
+        )
+
+    @computed_field  # type: ignore[misc]
+    @property
+    def tokens_per_second(self) -> float:
+        """
+        :return: The average number of tokens generated per second in the prompt and
+            output text.
+        """
+        if (latency := self.request_latency) == 0.0:
+            return 0.0
+
+        return (self.prompt_tokens + self.output_tokens) / latency
+
+    @computed_field  # type: ignore[misc]
+    @property
+    def output_tokens_per_second(self) -> float:
+        """
+        :return: The average number of output tokens generated per second.
+        """
+        if (latency := self.request_latency) == 0.0:
+            return 0.0
+
+        return self.output_tokens / latency
+
+
+class GenerativeTextErrorStats(GenerativeTextResponseStats):
+    """
+    A serializable model representing the request values, response values, and
+    statistics for a generative text response that errored.
+    Extends and overrides the GenerativeTextResponseStats model to include the
+    error message and optional properties given the error occurred.
+    """
+
+    type_: Literal["generative_text_error"] = "generative_text_error"  # type: ignore[assignment]
+    error: str = Field(
+        description=(
+            "The error message for the error that occurred while making the request."
+        )
+    )
+    output: Optional[str] = Field(  # type: ignore[assignment]
+        default=None,
+        description=(
+            "The generated text output from the generative request, if any, "
+            "before the error occurred."
+        ),
+    )
+    first_token_time: Optional[float] = Field(  # type: ignore[assignment]
+        default=None,
+        description=(
+            "The time the first token was received, if any, before the error occurred."
+        ),
+    )
+    last_token_time: Optional[float] = Field(  # type: ignore[assignment]
+        default=None,
+        description=(
+            "The time the last token was received, if any, before the error occurred."
+        ),
+    )
+
+    @computed_field  # type: ignore[misc]
+    @property
+    def time_to_first_token_ms(self) -> Optional[float]:  # type: ignore[override]
+        """
+        :return: The time in milliseconds from the start of the request to the first
+            token received. None if the first token was not received.
+        """
+        if self.first_token_time is None:
+            return None
+
+        return super().time_to_first_token_ms
+
+    @computed_field  # type: ignore[misc]
+    @property
+    def time_per_output_token_ms(self) -> Optional[float]:  # type: ignore[override]
+        """
+        :return: The average time in milliseconds per output token generated.
+            This includes the time to generate the first token and all other tokens.
+            None if the output_tokens is None or 0.
+        """
+        if self.output_tokens is None or self.output_tokens == 0:
+            return None
+
+        return super().time_per_output_token_ms
+
+    @computed_field  # type: ignore[misc]
+    @property
+    def inter_token_latency_ms(self) -> Optional[float]:  # type: ignore[override]
+        """
+        :return: The average time in milliseconds between generating tokens in the
+            output text. Note, does not include the time to generate the first token.
+            None if there were no output_tokens or the first token was not received.
+        """
+        if (
+            self.output_tokens is None
+            or self.first_token_time is None
+            or self.last_token_time is None
+        ):
+            return None
+
+        return super().inter_token_latency_ms
+
+    @computed_field  # type: ignore[misc]
+    @property
+    def output_tokens_per_second(self) -> Optional[float]:  # type: ignore[override]
+        """
+        :return: The average number of tokens generated per second in the output text.
+            Note, does not include the time to generate the first token. None if there
+            were no output_tokens or the first token was not received.
+        """
+        if self.inter_token_latency_ms is None:
+            return None
+
+        return super().output_tokens_per_second
+
+
+class GenerativeMetrics(BenchmarkMetrics):
+    """
+    A serializable model representing the metrics for a generative benchmark run.
+    """
+
+    request_latency: StatusDistributionSummary = Field(
+        description="The distribution of latencies for the completed requests.",
+    )
+    prompt_token_count: StatusDistributionSummary = Field(
+        description=(
+            "The distribution of token counts in the prompts for completed, "
+            "errored, and all requests."
+        )
+    )
+    output_token_count: StatusDistributionSummary = Field(
+        description=(
+            "The distribution of token counts in the outputs for completed, "
+            "errored, and all requests."
+        )
+    )
+    time_to_first_token_ms: StatusDistributionSummary = Field(
+        description=(
+            "The distribution of latencies to receiving the first token in "
+            "milliseconds for completed, errored, and all requests."
+        ),
+    )
+    time_per_output_token_ms: StatusDistributionSummary = Field(
+        description=(
+            "The distribution of latencies per output token in milliseconds for "
+            "completed, errored, and all requests. "
+            "This includes the time to generate the first token and all other tokens."
+        ),
+    )
+    inter_token_latency_ms: StatusDistributionSummary = Field(
+        description=(
+            "The distribution of latencies between tokens in milliseconds for "
+            "completed, errored, and all requests."
+        ),
+    )
+    output_tokens_per_second: StatusDistributionSummary = Field(
+        description=(
+            "The distribution of output tokens per second for completed, "
+            "errored, and all requests."
+        ),
+    )
+    tokens_per_second: StatusDistributionSummary = Field(
+        description=(
+            "The distribution of tokens per second, including prompt and output tokens "
+            "for completed, errored, and all requests."
+        ),
+    )
+
+
+class GenerativeBenchmark(Benchmark):
+    """
+    A serializable model representing a benchmark run and its results for generative
+    requests and responses. Includes the completed and errored requests, the start
+    and end times for the benchmark, and the statistics for the requests and responses.
+    """
+
+    type_: Literal["generative_benchmark"] = "generative_benchmark"  # type: ignore[assignment]
+    start_time: float = Field(
+        description="The start time of the first request for the benchmark.",
+    )
+    end_time: float = Field(
+        description="The end time of the last request for the benchmark.",
+    )
+
+    @computed_field  # type: ignore[misc]
+    @property
+    def duration(self) -> float:
+        """
+        :return: The duration of the benchmark in seconds from the start of the
+            first request to the end of the last request.
+        """
+        return self.end_time - self.start_time
+
+    worker: GenerativeRequestsWorkerDescription = Field(
+        description=(
+            "The description and specifics for the worker used to resolve requests "
+            "for this benchmark."
+        ),
+    )
+    request_loader: GenerativeRequestLoaderDescription = Field(
+        description=(
+            "The description and specifics for the request loader used to create "
+            "requests for this benchmark."
+        ),
+    )
+    metrics: GenerativeMetrics = Field(
+        description=(
+            "The metrics for the benchmark run represented as a distribution of "
+            "various per-request statistics."
+        ),
+    )
+    # Output is ordered so keep the requests at the end for better readability in files
+    request_totals: StatusBreakdown[int, int, int, int] = Field(
+        description=(
+            "The number of requests made for the benchmark broken down by status "
+            "including successful, incomplete, errored, and the sum of all three"
+        )
+    )
+    request_samples: Optional[StatusBreakdown[int, int, int, None]] = Field(
+        description=(
+            "The number of requests that were randomly sampled for "
+            "the benchmark. None if no sampling was applied."
+        ),
+        default=None,
+    )
+    requests: StatusBreakdown[
+        List[GenerativeTextResponseStats],
+        List[GenerativeTextErrorStats],
+        List[GenerativeTextErrorStats],
+        None,
+    ] = Field(
+        description=(
+            "The breakdown of requests for the benchmark run including successful, "
+            "incomplete, and errored requests."
+        ),
+    )
+
+    def create_sampled(self, sample_size: int) -> "GenerativeBenchmark":
+        """
+        Create a new benchmark instance with a random sample of the completed and
+        errored requests based on the given sample sizes. If the sample sizes are
+        larger than the total number of requests, the sample sizes are capped at
+        the total number of requests.
+
+        :param sample_size: The number of requests to sample for each status type.
+        :return: A new benchmark instance with the sampled requests.
+        :raises ValueError: If the sample sizes are negative.
+        """
+        if sample_size < 0:
+            raise ValueError(f"Sample size must be non-negative, given {sample_size}")
+
+        sample_size = min(sample_size, len(self.requests.successful))
+        error_sample_size = min(sample_size, len(self.requests.errored))
+        incomplete_sample_size = min(sample_size, len(self.requests.incomplete))
+
+        sampled_instance = self.model_copy()
+        sampled_instance.requests.successful = random.sample(
+            self.requests.successful, sample_size
+        )
+        sampled_instance.requests.errored = random.sample(
+            self.requests.errored, error_sample_size
+        )
+        sampled_instance.requests.incomplete = random.sample(
+            self.requests.incomplete, incomplete_sample_size
+        )
+        sampled_instance.request_samples = StatusBreakdown(
+            successful=len(sampled_instance.requests.successful),
+            incomplete=len(sampled_instance.requests.incomplete),
+            errored=len(sampled_instance.requests.errored),
+        )
+
+        return sampled_instance
+
+    @staticmethod
+    def from_stats(
+        run_id: str,
+        successful: List[GenerativeTextResponseStats],
+        incomplete: List[GenerativeTextErrorStats],
+        errored: List[GenerativeTextErrorStats],
+        args: BenchmarkArgs,
+        run_stats: BenchmarkRunStats,
+        worker: GenerativeRequestsWorkerDescription,
+        requests_loader: GenerativeRequestLoaderDescription,
+        extras: Optional[Dict[str, Any]],
+    ) -> "GenerativeBenchmark":
+        """
+        Create a GenerativeBenchmark instance from the given statistics and metadata.
+        Given the completed and errored requests, the benchmark will fill in the
+        remaining statistics for the various metrics required for a benchmark.
+        This is the preferred method for creating a GenerativeBenchmark instance
+        to ensure all statistics are properly calculated and populated.
+
+        :param run_id: The unique identifier for the benchmark run.
+        :param completed: The list of completed requests.
+        :param errored: The list of errored requests.
+        :param args: The arguments used to specify how to run the benchmark
+            and collect data.
+        :param run_stats: The process statistics for the entire benchmark run across
+            all requests.
+        :param worker: The description and specifics for the worker used to resolve
+            requests.
+        :param requests_loader: The description and specifics for the request loader
+            used to create requests.
+        :param extras: Any additional information or metadata that was passed for
+            this benchmark.
+        :return: A GenerativeBenchmark instance with the given statistics and metadata
+            populated and calculated
+        """
+        total = successful + incomplete + errored
+        total_types: List[Literal["successful", "incomplete", "error"]] = [
+            *["successful"] * len(successful),  # type: ignore[list-item]
+            *["incomplete"] * len(incomplete),  # type: ignore[list-item]
+            *["error"] * len(errored),  # type: ignore[list-item]
+        ]
+        start_time = min(req.start_time for req in total)
+        end_time = max(req.end_time for req in total)
+
+        total_with_prompt, total_types_with_prompt = (
+            zip(*filtered)
+            if (
+                filtered := list(
+                    filter(lambda val: bool(val[0].prompt), zip(total, total_types))
+                )
+            )
+            else ([], [])
+        )
+        total_with_output_first, total_types_with_output_first = (
+            zip(*filtered)
+            if (
+                filtered := list(
+                    filter(
+                        lambda val: bool(val[0].output_tokens > 0),
+                        zip(total, total_types),
+                    )
+                )
+            )
+            else ([], [])
+        )
+        total_with_output_multi, total_types_with_output_multi = (
+            zip(*filtered)
+            if (
+                filtered := list(
+                    filter(
+                        lambda val: bool(val[0].output_tokens > 1),
+                        zip(total, total_types),
+                    )
+                )
+            )
+            else ([], [])
+        )
+
+        return GenerativeBenchmark(
+            run_id=run_id,
+            args=args,
+            run_stats=run_stats,
+            extras=extras or {},
+            start_time=start_time,
+            end_time=end_time,
+            worker=worker,
+            request_loader=requests_loader,
+            metrics=GenerativeMetrics(
+                requests_per_second=StatusDistributionSummary.from_request_times(
+                    request_types=total_types,
+                    requests=[(req.start_time, req.end_time) for req in total],
+                    distribution_type="rate",
+                ),
+                request_concurrency=StatusDistributionSummary.from_request_times(
+                    request_types=total_types,
+                    requests=[(req.start_time, req.end_time) for req in total],
+                    distribution_type="concurrency",
+                ),
+                request_latency=StatusDistributionSummary.from_values(
+                    value_types=total_types,
+                    values=[req.request_latency for req in total],
+                ),
+                prompt_token_count=StatusDistributionSummary.from_values(
+                    value_types=list(total_types_with_prompt),
+                    values=[req.prompt_tokens for req in total_with_prompt],
+                ),
+                output_token_count=StatusDistributionSummary.from_values(
+                    value_types=list(total_types_with_output_first),
+                    values=[req.output_tokens for req in total_with_output_first],
+                ),
+                time_to_first_token_ms=StatusDistributionSummary.from_values(
+                    value_types=list(total_types_with_output_first),
+                    values=[
+                        req.time_to_first_token_ms or 0
+                        for req in total_with_output_first
+                    ],
+                ),
+                time_per_output_token_ms=StatusDistributionSummary.from_values(
+                    value_types=list(total_types_with_output_first),
+                    values=[
+                        req.time_per_output_token_ms or 0
+                        for req in total_with_output_first
+                    ],
+                    weights=[req.output_tokens for req in total_with_output_first],
+                ),
+                inter_token_latency_ms=StatusDistributionSummary.from_values(
+                    value_types=list(total_types_with_output_multi),
+                    values=[
+                        req.inter_token_latency_ms or 0
+                        for req in total_with_output_multi
+                    ],
+                    weights=[req.output_tokens - 1 for req in total_with_output_multi],
+                ),
+                output_tokens_per_second=StatusDistributionSummary.from_iterable_request_times(
+                    request_types=list(total_types_with_output_first),
+                    requests=[
+                        (req.start_time, req.end_time)
+                        for req in total_with_output_first
+                    ],
+                    first_iter_times=[
+                        req.first_token_time or req.start_time
+                        for req in total_with_output_first
+                    ],
+                    iter_counts=[req.output_tokens for req in total_with_output_first],
+                ),
+                tokens_per_second=StatusDistributionSummary.from_iterable_request_times(
+                    request_types=list(total_types_with_output_first),
+                    requests=[
+                        (req.start_time, req.end_time)
+                        for req in total_with_output_first
+                    ],
+                    first_iter_times=[
+                        req.first_token_time or req.start_time
+                        for req in total_with_output_first
+                    ],
+                    iter_counts=[
+                        req.prompt_tokens + req.output_tokens
+                        for req in total_with_output_first
+                    ],
+                    first_iter_counts=[
+                        req.prompt_tokens for req in total_with_output_first
+                    ],
+                ),
+            ),
+            request_totals=StatusBreakdown(
+                successful=len(successful),
+                incomplete=len(incomplete),
+                errored=len(errored),
+                total=len(total),
+            ),
+            requests=StatusBreakdown(
+                successful=successful,
+                incomplete=incomplete,
+                errored=errored,
+            ),
+        )
diff --git a/src/guidellm/benchmark/benchmarker.py b/src/guidellm/benchmark/benchmarker.py
new file mode 100644
index 00000000..985d9c4f
--- /dev/null
+++ b/src/guidellm/benchmark/benchmarker.py
@@ -0,0 +1,336 @@
+import time
+import uuid
+from abc import ABC, abstractmethod
+from pathlib import Path
+from typing import (
+    Any,
+    AsyncGenerator,
+    Dict,
+    Generic,
+    Iterable,
+    Literal,
+    Optional,
+    Union,
+)
+
+from pydantic import Field
+from transformers import PreTrainedTokenizerBase  # type: ignore  # noqa: PGH003
+
+from guidellm.backend import Backend, ResponseSummary
+from guidellm.benchmark.aggregator import (
+    AggregatorT,
+    BenchmarkT,
+    GenerativeBenchmarkAggregator,
+)
+from guidellm.benchmark.benchmark import BenchmarkArgs, GenerativeBenchmark
+from guidellm.benchmark.profile import Profile
+from guidellm.objects import StandardBaseModel
+from guidellm.request import (
+    GenerationRequest,
+    GenerativeRequestLoaderDescription,
+    RequestLoaderDescription,
+)
+from guidellm.scheduler import (
+    GenerativeRequestsWorker,
+    RequestsWorker,
+    RequestT,
+    ResponseT,
+    Scheduler,
+    SchedulerRequestResult,
+    SchedulingStrategy,
+)
+
+__all__ = ["Benchmarker", "BenchmarkerResult", "GenerativeBenchmarker"]
+
+
+class BenchmarkerResult(
+    StandardBaseModel, Generic[AggregatorT, BenchmarkT, RequestT, ResponseT]
+):
+    type_: Literal[
+        "run_start",
+        "run_complete",
+        "scheduler_start",
+        "scheduler_update",
+        "scheduler_complete",
+        "benchmark_compiled",
+    ]
+    start_time: float
+    end_number: int
+    profile: Profile
+    current_index: int
+    current_strategy: Optional[SchedulingStrategy] = None
+    current_aggregator: Optional[AggregatorT] = None
+    current_benchmark: Optional[BenchmarkT] = None
+    current_result: Optional[SchedulerRequestResult[RequestT, ResponseT]] = None
+
+
+class BenchmarkerStrategyLimits(StandardBaseModel):
+    requests_loader_size: Optional[int] = Field(
+        description="Size of the request loader.",
+    )
+    max_number_per_strategy: Optional[int] = Field(
+        description="Maximum number of requests to process per strategy.",
+        ge=0,
+    )
+    max_duration_per_strategy: Optional[float] = Field(
+        description="Maximum duration (in seconds) to process requests per strategy.",
+        ge=0,
+    )
+    warmup_percent_per_strategy: Optional[float] = Field(
+        description="Percentage of requests to use for warmup.",
+        ge=0,
+        le=1,
+    )
+    cooldown_percent_per_strategy: Optional[float] = Field(
+        description="Percentage of requests to use for cooldown.",
+        ge=0,
+        le=1,
+    )
+
+    @property
+    def max_number(self) -> Optional[int]:
+        if self.max_number_per_strategy is not None:
+            return self.max_number_per_strategy
+
+        if self.requests_loader_size is not None:
+            return self.requests_loader_size
+
+        return None
+
+    @property
+    def max_duration(self) -> Optional[float]:
+        return self.max_duration_per_strategy
+
+    @property
+    def warmup_number(self) -> Optional[int]:
+        if self.warmup_percent_per_strategy is None or self.max_number is None:
+            return None
+
+        return int(self.warmup_percent_per_strategy * self.max_number)
+
+    @property
+    def warmup_duration(self) -> Optional[float]:
+        if self.warmup_percent_per_strategy is None or self.max_duration is None:
+            return None
+
+        return self.warmup_percent_per_strategy * self.max_duration
+
+    @property
+    def cooldown_number(self) -> Optional[int]:
+        if self.cooldown_percent_per_strategy is None or self.max_number is None:
+            return None
+
+        return int(self.cooldown_percent_per_strategy * self.max_number)
+
+    @property
+    def cooldown_duration(self) -> Optional[float]:
+        if self.cooldown_percent_per_strategy is None or self.max_duration is None:
+            return None
+
+        return self.cooldown_percent_per_strategy * self.max_duration
+
+
+class Benchmarker(Generic[AggregatorT, BenchmarkT, RequestT, ResponseT], ABC):
+    def __init__(
+        self,
+        worker: RequestsWorker[RequestT, ResponseT],
+        request_loader: Iterable[RequestT],
+        requests_loader_description: RequestLoaderDescription,
+        benchmark_save_extras: Optional[Dict[str, Any]] = None,
+    ):
+        self.worker = worker
+        self.scheduler: Scheduler[RequestT, ResponseT] = Scheduler(
+            worker=worker, request_loader=request_loader
+        )
+        self.requests_loader_description = requests_loader_description
+        self.benchmark_save_extras = benchmark_save_extras
+
+    async def run(
+        self,
+        profile: Profile,
+        max_number_per_strategy: Optional[int],
+        max_duration_per_strategy: Optional[float],
+        warmup_percent_per_strategy: Optional[float],
+        cooldown_percent_per_strategy: Optional[float],
+    ) -> AsyncGenerator[
+        BenchmarkerResult[AggregatorT, BenchmarkT, RequestT, ResponseT], None
+    ]:
+        try:
+            requests_loader_size = len(self.scheduler.request_loader)  # type: ignore[arg-type]
+        except Exception:  # noqa: BLE001
+            requests_loader_size = None
+
+        strategy_limits = BenchmarkerStrategyLimits(
+            requests_loader_size=requests_loader_size,
+            max_number_per_strategy=max_number_per_strategy,
+            max_duration_per_strategy=max_duration_per_strategy,
+            warmup_percent_per_strategy=warmup_percent_per_strategy,
+            cooldown_percent_per_strategy=cooldown_percent_per_strategy,
+        )
+        start_time = time.time()
+        end_number = len(profile.strategy_types)
+        current_index = -1
+        run_id = str(uuid.uuid4())
+
+        yield BenchmarkerResult(
+            type_="run_start",
+            start_time=start_time,
+            end_number=end_number,
+            profile=profile,
+            current_index=current_index,
+            current_strategy=None,
+            current_aggregator=None,
+            current_benchmark=None,
+            current_result=None,
+        )
+
+        while scheduling_strategy := profile.next_strategy():
+            current_index += 1
+            aggregator = self.create_benchmark_aggregator(
+                run_id=run_id,
+                profile=profile,
+                strategy_index=current_index,
+                strategy=scheduling_strategy,
+                limits=strategy_limits,
+            )
+
+            async for result in self.scheduler.run(
+                scheduling_strategy=scheduling_strategy,
+                max_number=max_number_per_strategy,
+                max_duration=max_duration_per_strategy,
+            ):
+                if result.type_ == "run_start":
+                    yield BenchmarkerResult(
+                        type_="scheduler_start",
+                        start_time=start_time,
+                        end_number=end_number,
+                        profile=profile,
+                        current_index=current_index,
+                        current_strategy=scheduling_strategy,
+                        current_aggregator=aggregator,
+                        current_benchmark=None,
+                        current_result=None,
+                    )
+                elif result.type_ == "run_complete":
+                    yield BenchmarkerResult(
+                        type_="scheduler_complete",
+                        start_time=start_time,
+                        end_number=end_number,
+                        profile=profile,
+                        current_index=current_index,
+                        current_strategy=scheduling_strategy,
+                        current_aggregator=aggregator,
+                        current_benchmark=None,
+                        current_result=None,
+                    )
+                elif isinstance(result, SchedulerRequestResult):
+                    aggregator.add_result(result)
+
+                    yield BenchmarkerResult(
+                        type_="scheduler_update",
+                        start_time=start_time,
+                        end_number=end_number,
+                        profile=profile,
+                        current_index=current_index,
+                        current_strategy=scheduling_strategy,
+                        current_aggregator=aggregator,
+                        current_benchmark=None,
+                        current_result=result,
+                    )
+                else:
+                    raise ValueError(f"Unexpected result type: {type(result)}")
+
+            benchmark: BenchmarkT = aggregator.compile()
+            profile.completed_strategy(
+                average_rate=benchmark.metrics.requests_per_second.successful.mean,
+                average_concurrency=benchmark.metrics.request_concurrency.successful.mean,
+            )
+
+            yield BenchmarkerResult(
+                type_="benchmark_compiled",
+                start_time=start_time,
+                end_number=end_number,
+                profile=profile,
+                current_index=current_index,
+                current_strategy=scheduling_strategy,
+                current_aggregator=None,
+                current_benchmark=benchmark,
+                current_result=None,
+            )
+
+        yield BenchmarkerResult(
+            type_="run_complete",
+            start_time=start_time,
+            end_number=end_number,
+            profile=profile,
+            current_index=current_index,
+            current_strategy=None,
+            current_aggregator=None,
+            current_benchmark=None,
+            current_result=None,
+        )
+
+    @abstractmethod
+    def create_benchmark_aggregator(
+        self,
+        run_id: str,
+        profile: Profile,
+        strategy_index: int,
+        strategy: SchedulingStrategy,
+        limits: BenchmarkerStrategyLimits,
+    ) -> AggregatorT: ...
+
+
+class GenerativeBenchmarker(
+    Benchmarker[
+        GenerativeBenchmarkAggregator,
+        GenerativeBenchmark,
+        GenerationRequest,
+        ResponseSummary,
+    ],
+):
+    def __init__(
+        self,
+        backend: Backend,
+        request_loader: Iterable[GenerationRequest],
+        request_loader_description: GenerativeRequestLoaderDescription,
+        benchmark_save_extras: Optional[Dict[str, Any]] = None,
+        processor: Optional[Union[str, Path, PreTrainedTokenizerBase]] = None,
+        processor_args: Optional[Dict[str, Any]] = None,
+    ):
+        super().__init__(
+            worker=GenerativeRequestsWorker(backend),
+            request_loader=request_loader,
+            requests_loader_description=request_loader_description,
+            benchmark_save_extras=benchmark_save_extras,
+        )
+        self.processor = processor
+        self.processor_args = processor_args
+
+    def create_benchmark_aggregator(
+        self,
+        run_id: str,
+        profile: Profile,
+        strategy_index: int,
+        strategy: SchedulingStrategy,
+        limits: BenchmarkerStrategyLimits,
+    ) -> GenerativeBenchmarkAggregator:
+        return GenerativeBenchmarkAggregator(
+            run_id=run_id,
+            args=BenchmarkArgs(
+                profile=profile,
+                strategy_index=strategy_index,
+                strategy=strategy,
+                max_number=limits.max_number,
+                max_duration=limits.max_duration,
+                warmup_number=limits.warmup_number,
+                warmup_duration=limits.warmup_duration,
+                cooldown_number=limits.cooldown_number,
+                cooldown_duration=limits.cooldown_duration,
+            ),
+            worker_description=self.worker.description,  # type: ignore[arg-type]
+            request_loader_description=self.requests_loader_description,  # type: ignore[arg-type]
+            extras=self.benchmark_save_extras or {},
+            processor=self.processor,
+            processor_args=self.processor_args,
+        )
diff --git a/src/guidellm/benchmark/entrypoints.py b/src/guidellm/benchmark/entrypoints.py
new file mode 100644
index 00000000..fc98219e
--- /dev/null
+++ b/src/guidellm/benchmark/entrypoints.py
@@ -0,0 +1,129 @@
+from pathlib import Path
+from typing import Any, Dict, Iterable, List, Literal, Optional, Union
+
+from datasets import Dataset, DatasetDict, IterableDataset, IterableDatasetDict
+from transformers import (  # type: ignore[import]
+    PreTrainedTokenizerBase,
+)
+
+from guidellm.backend import Backend, BackendType
+from guidellm.benchmark.benchmark import GenerativeBenchmark
+from guidellm.benchmark.benchmarker import GenerativeBenchmarker
+from guidellm.benchmark.output import (
+    GenerativeBenchmarksConsole,
+    save_generative_benchmarks,
+)
+from guidellm.benchmark.profile import ProfileType, create_profile
+from guidellm.benchmark.progress import GenerativeTextBenchmarkerProgressDisplay
+from guidellm.request import GenerativeRequestLoader
+from guidellm.scheduler import StrategyType
+
+
+async def benchmark_generative_text(
+    target: str,
+    backend_type: BackendType,
+    backend_args: Optional[Dict[str, Any]],
+    model: Optional[str],
+    processor: Optional[Optional[Union[str, Path, PreTrainedTokenizerBase]]],
+    processor_args: Optional[Dict[str, Any]],
+    data: Union[
+        str,
+        Path,
+        Iterable[Union[str, Dict[str, Any]]],
+        Dataset,
+        DatasetDict,
+        IterableDataset,
+        IterableDatasetDict,
+    ],
+    data_args: Optional[Dict[str, Any]],
+    data_sampler: Optional[Literal["random"]],
+    rate_type: Union[StrategyType, ProfileType],
+    rate: Optional[Union[int, float, List[Union[int, float]]]],
+    max_seconds: Optional[float],
+    max_requests: Optional[int],
+    warmup_percent: Optional[float],
+    cooldown_percent: Optional[float],
+    show_progress: bool,
+    show_progress_scheduler_stats: bool,
+    output_console: bool,
+    output_path: Optional[Union[str, Path]],
+    output_extras: Optional[Dict[str, Any]],
+    random_seed: int,
+) -> List[GenerativeBenchmark]:
+    console = GenerativeBenchmarksConsole(enabled=show_progress)
+    console.print_line("Creating backend...")
+    backend = Backend.create(
+        backend_type, target=target, model=model, **(backend_args or {})
+    )
+    await backend.validate()
+    console.print_line(
+        f"Backend {backend_type} connected to {target} for model {backend.model}."
+    )
+
+    if processor is None:
+        processor = backend.model
+
+    console.print_line("Creating request loader...")
+    request_loader = GenerativeRequestLoader(
+        data=data,
+        data_args=data_args,
+        processor=processor,
+        processor_args=processor_args,
+        shuffle=data_sampler == "random",
+        iter_type=(
+            "finite"  # assume a finite dataset is our limit
+            if max_requests is None and max_seconds is None
+            else "infinite"  # default to infinite so we don't run out of data
+        ),
+        random_seed=random_seed,
+    )
+    unique_requests = request_loader.num_unique_items(raise_err=False)
+    console.print_line(
+        f"Created loader with {unique_requests} unique requests from {data}.\n\n"
+        if unique_requests > 0
+        else f"Created loader with unknown number unique requests from {data}.\n\n"
+    )
+
+    profile = create_profile(rate_type=rate_type, rate=rate)
+    benchmarker = GenerativeBenchmarker(
+        backend=backend,
+        request_loader=request_loader,
+        request_loader_description=request_loader.description,
+        benchmark_save_extras=output_extras,
+        processor=processor,
+        processor_args=processor_args,
+    )
+    progress = (
+        GenerativeTextBenchmarkerProgressDisplay(
+            display_scheduler_stats=show_progress_scheduler_stats
+        )
+        if show_progress
+        else None
+    )
+    benchmarks = []
+
+    async for result in benchmarker.run(
+        profile=profile,
+        max_number_per_strategy=max_requests,
+        max_duration_per_strategy=max_seconds,
+        warmup_percent_per_strategy=warmup_percent,
+        cooldown_percent_per_strategy=cooldown_percent,
+    ):
+        if progress:
+            progress.update(result)
+
+        if result.type_ == "benchmark_compiled":
+            if result.current_benchmark is None:
+                raise ValueError("Current benchmark is None")
+            benchmarks.append(result.current_benchmark)
+
+    if output_console:
+        console.benchmarks = benchmarks
+        console.print_benchmarks_metadata()
+        console.print_benchmarks_info()
+        console.print_benchmarks_stats()
+
+    if output_path:
+        save_generative_benchmarks(benchmarks=benchmarks, path=output_path)
+
+    return benchmarks
diff --git a/src/guidellm/benchmark/output.py b/src/guidellm/benchmark/output.py
new file mode 100644
index 00000000..d0bdc103
--- /dev/null
+++ b/src/guidellm/benchmark/output.py
@@ -0,0 +1,347 @@
+import json
+from collections import OrderedDict
+from datetime import datetime
+from pathlib import Path
+from typing import Any, List, Optional, Union
+
+import yaml
+from rich.console import Console
+from rich.padding import Padding
+from rich.table import Table
+from rich.text import Text
+
+from guidellm.benchmark.benchmark import GenerativeBenchmark
+from guidellm.benchmark.profile import (
+    AsyncProfile,
+    ConcurrentProfile,
+    SweepProfile,
+    ThroughputProfile,
+)
+from guidellm.objects import StandardBaseModel
+from guidellm.scheduler import strategy_display_str
+from guidellm.utils import Colors
+
+__all__ = [
+    "GenerativeBenchmarksReport",
+    "save_generative_benchmarks",
+    "GenerativeBenchmarksConsole",
+]
+
+
+class GenerativeBenchmarksReport(StandardBaseModel):
+    benchmarks: List[GenerativeBenchmark]
+
+    def save_file(self, path: Path):
+        if path.is_dir():
+            path = path / "benchmarks.json"
+
+        path.parent.mkdir(parents=True, exist_ok=True)
+        extension = path.suffix.lower()
+
+        if extension == ".json":
+            self.save_json(path)
+        elif extension in [".yaml", ".yml"]:
+            self.save_yaml(path)
+        elif extension in [".csv"]:
+            self.save_csv(path)
+        else:
+            raise ValueError(f"Unsupported file extension: {extension} for {path}.")
+
+    def save_json(self, path: Path):
+        model_dict = self.model_dump()
+        model_json = json.dumps(model_dict)
+
+        with path.open("w") as file:
+            file.write(model_json)
+
+    def save_yaml(self, path: Path):
+        model_dict = self.model_dump()
+        model_yaml = yaml.dump(model_dict)
+
+        with path.open("w") as file:
+            file.write(model_yaml)
+
+    def save_csv(self, path: Path):
+        raise NotImplementedError("CSV format is not implemented yet.")
+
+
+def save_generative_benchmarks(
+    benchmarks: List[GenerativeBenchmark], path: Union[Path, str]
+):
+    path = Path(path) if isinstance(path, str) else path
+    report = GenerativeBenchmarksReport(benchmarks=benchmarks)
+    report.save_file(path)
+
+
+class GenerativeBenchmarksConsole:
+    def __init__(self, enabled: bool = True):
+        self.enabled = enabled
+        self.benchmarks: Optional[List[GenerativeBenchmark]] = None
+        self.console = Console()
+
+    @property
+    def benchmarks_profile_str(self) -> str:
+        profile = self.benchmarks[0].args.profile if self.benchmarks else None
+
+        if profile is None:
+            return "None"
+
+        profile_args = OrderedDict(
+            {
+                "type": profile.type_,
+                "strategies": profile.strategy_types,
+            }
+        )
+
+        if isinstance(profile, ConcurrentProfile):
+            profile_args["streams"] = str(profile.streams)
+        elif isinstance(profile, ThroughputProfile):
+            profile_args["max_concurrency"] = str(profile.max_concurrency)
+        elif isinstance(profile, AsyncProfile):
+            profile_args["max_concurrency"] = str(profile.max_concurrency)
+            profile_args["rate"] = str(profile.rate)
+            profile_args["initial_burst"] = str(profile.initial_burst)
+        elif isinstance(profile, SweepProfile):
+            profile_args["sweep_size"] = str(profile.sweep_size)
+
+        return ", ".join(f"{key}={value}" for key, value in profile_args.items())
+
+    @property
+    def benchmarks_args_str(self) -> str:
+        args = self.benchmarks[0].args if self.benchmarks else None
+
+        if args is None:
+            return "None"
+
+        args_dict = OrderedDict(
+            {
+                "max_number": args.max_number,
+                "max_duration": args.max_duration,
+                "warmup_number": args.warmup_number,
+                "warmup_duration": args.warmup_duration,
+                "cooldown_number": args.cooldown_number,
+                "cooldown_duration": args.cooldown_duration,
+            }
+        )
+
+        return ", ".join(f"{key}={value}" for key, value in args_dict.items())
+
+    @property
+    def benchmarks_worker_desc_str(self) -> str:
+        return str(self.benchmarks[0].worker) if self.benchmarks else "None"
+
+    @property
+    def benchmarks_request_loader_desc_str(self) -> str:
+        return str(self.benchmarks[0].request_loader) if self.benchmarks else "None"
+
+    @property
+    def benchmarks_extras_str(self) -> str:
+        extras = self.benchmarks[0].extras if self.benchmarks else None
+
+        if not extras:
+            return "None"
+
+        return ", ".join(f"{key}={value}" for key, value in extras.items())
+
+    def print_section_header(self, title: str, new_lines: int = 2):
+        if not self.enabled:
+            return
+
+        text = Text()
+
+        for _ in range(new_lines):
+            text.append("\n")
+
+        text.append(f"{title}:", style=f"bold underline {Colors.INFO}")
+        self.console.print(text)
+
+    def print_labeled_line(self, label: str, value: str, indent: int = 4):
+        if not self.enabled:
+            return
+
+        text = Text()
+        text.append(label + ": ", style=f"bold {Colors.INFO}")
+        text.append(": ")
+        text.append(value, style="italic")
+        self.console.print(
+            Padding.indent(text, indent),
+        )
+
+    def print_line(self, value: str, indent: int = 0):
+        if not self.enabled:
+            return
+
+        text = Text(value)
+        self.console.print(
+            Padding.indent(text, indent),
+        )
+
+    def print_table(self, headers: List[str], rows: List[List[Any]], title: str):
+        if not self.enabled:
+            return
+
+        self.print_section_header(title)
+        table = Table(*headers, header_style=f"bold {Colors.INFO}")
+
+        for row in rows:
+            table.add_row(*[Text(item, style="italic") for item in row])
+
+        self.console.print(table)
+
+    def print_benchmarks_metadata(self):
+        if not self.enabled:
+            return
+
+        if not self.benchmarks:
+            raise ValueError(
+                "No benchmarks to print metadata for. Please set benchmarks first."
+            )
+
+        start_time = self.benchmarks[0].run_stats.start_time
+        end_time = self.benchmarks[0].run_stats.end_time
+        duration = end_time - start_time
+
+        self.print_section_header("Benchmarks Completed")
+        self.print_labeled_line("Run id", str(self.benchmarks[0].run_id))
+        self.print_labeled_line(
+            "Duration",
+            f"{duration:.1f} seconds",
+        )
+        self.print_labeled_line(
+            "Profile",
+            self.benchmarks_profile_str,
+        )
+        self.print_labeled_line(
+            "Args",
+            self.benchmarks_args_str,
+        )
+        self.print_labeled_line(
+            "Worker",
+            self.benchmarks_worker_desc_str,
+        )
+        self.print_labeled_line(
+            "Request Loader",
+            self.benchmarks_request_loader_desc_str,
+        )
+        self.print_labeled_line(
+            "Extras",
+            self.benchmarks_extras_str,
+        )
+
+    def print_benchmarks_info(self):
+        if not self.enabled:
+            return
+
+        if not self.benchmarks:
+            raise ValueError(
+                "No benchmarks to print info for. Please set benchmarks first."
+            )
+
+        headers = [
+            "Benchmark",
+            "Start Time",
+            "End Time",
+            "Duration (sec)",
+            "Requests Made \n(comp / inc / err)",
+            "Prompt Tok / Req \n(comp / inc / err)",
+            "Output Tok / Req \n(comp / inc / err)",
+            "Prompt Tok Total \n(comp / inc / err)",
+            "Output Tok Total \n(comp / inc / err)",
+        ]
+        rows = []
+
+        for benchmark in self.benchmarks:
+            rows.append(
+                [
+                    strategy_display_str(benchmark.args.strategy),
+                    f"{datetime.fromtimestamp(benchmark.start_time).strftime('%H:%M:%S')}",
+                    f"{datetime.fromtimestamp(benchmark.end_time).strftime('%H:%M:%S')}",
+                    f"{(benchmark.end_time - benchmark.start_time):.1f}",
+                    (
+                        f"{benchmark.request_totals.successful:>5} / "
+                        f"{benchmark.request_totals.incomplete} / "
+                        f"{benchmark.request_totals.errored}"
+                    ),
+                    (
+                        f"{benchmark.metrics.prompt_token_count.successful.mean:>5.1f} / "  # noqa: E501
+                        f"{benchmark.metrics.prompt_token_count.incomplete.mean:.1f} / "
+                        f"{benchmark.metrics.prompt_token_count.errored.mean:.1f}"
+                    ),
+                    (
+                        f"{benchmark.metrics.output_token_count.successful.mean:>5.1f} / "  # noqa: E501
+                        f"{benchmark.metrics.output_token_count.incomplete.mean:.1f} / "
+                        f"{benchmark.metrics.output_token_count.errored.mean:.1f}"
+                    ),
+                    (
+                        f"{benchmark.metrics.prompt_token_count.successful.total_sum:>6.0f} / "  # noqa: E501
+                        f"{benchmark.metrics.prompt_token_count.incomplete.total_sum:.0f} / "  # noqa: E501
+                        f"{benchmark.metrics.prompt_token_count.errored.total_sum:.0f}"
+                    ),
+                    (
+                        f"{benchmark.metrics.output_token_count.successful.total_sum:>6.0f} / "  # noqa: E501
+                        f"{benchmark.metrics.output_token_count.incomplete.total_sum:.0f} / "  # noqa: E501
+                        f"{benchmark.metrics.output_token_count.errored.total_sum:.0f}"
+                    ),
+                ]
+            )
+
+        self.print_table(headers=headers, rows=rows, title="Benchmarks Info")
+
+    def print_benchmarks_stats(self):
+        if not self.enabled:
+            return
+
+        if not self.benchmarks:
+            raise ValueError(
+                "No benchmarks to print stats for. Please set benchmarks first."
+            )
+
+        headers = [
+            "Benchmark",
+            "Requests / sec",
+            "Requests Concurrency",
+            "Output Tok / sec",
+            "Total Tok / sec",
+            "Req Latency (ms)\n(mean / median / p99)",
+            "TTFT (ms)\n(mean / median / p99)",
+            "ITL (ms)\n(mean / median / p99)",
+            "TPOT (ms)\n(mean / median / p99)",
+        ]
+        rows = []
+
+        for benchmark in self.benchmarks:
+            rows.append(
+                [
+                    strategy_display_str(benchmark.args.strategy),
+                    f"{benchmark.metrics.requests_per_second.successful.mean:.2f}",
+                    f"{benchmark.metrics.request_concurrency.successful.mean:.2f}",
+                    f"{benchmark.metrics.output_tokens_per_second.total.mean:.1f}",
+                    f"{benchmark.metrics.tokens_per_second.total.mean:.1f}",
+                    (
+                        f"{benchmark.metrics.request_latency.successful.mean:.2f} / "
+                        f"{benchmark.metrics.request_latency.successful.median:.2f} / "
+                        f"{benchmark.metrics.request_latency.successful.percentiles.p99:.2f}"
+                    ),
+                    (
+                        f"{benchmark.metrics.time_to_first_token_ms.successful.mean:.1f} / "  # noqa: E501
+                        f"{benchmark.metrics.time_to_first_token_ms.successful.median:.1f} / "  # noqa: E501
+                        f"{benchmark.metrics.time_to_first_token_ms.successful.percentiles.p99:.1f}"
+                    ),
+                    (
+                        f"{benchmark.metrics.inter_token_latency_ms.successful.mean:.1f} / "  # noqa: E501
+                        f"{benchmark.metrics.inter_token_latency_ms.successful.median:.1f} / "  # noqa: E501
+                        f"{benchmark.metrics.inter_token_latency_ms.successful.percentiles.p99:.1f}"
+                    ),
+                    (
+                        f"{benchmark.metrics.time_per_output_token_ms.successful.mean:.1f} / "  # noqa: E501
+                        f"{benchmark.metrics.time_per_output_token_ms.successful.median:.1f} / "  # noqa: E501
+                        f"{benchmark.metrics.time_per_output_token_ms.successful.percentiles.p99:.1f}"
+                    ),
+                ]
+            )
+
+        self.print_table(
+            headers=headers,
+            rows=rows,
+            title="Benchmarks Stats",
+        )
diff --git a/src/guidellm/benchmark/profile.py b/src/guidellm/benchmark/profile.py
new file mode 100644
index 00000000..99f01f2e
--- /dev/null
+++ b/src/guidellm/benchmark/profile.py
@@ -0,0 +1,399 @@
+from typing import List, Literal, Optional, Sequence, Union
+
+import numpy as np
+from pydantic import Field, computed_field
+
+from guidellm.config import settings
+from guidellm.objects import StandardBaseModel
+from guidellm.scheduler import (
+    AsyncConstantStrategy,
+    AsyncPoissonStrategy,
+    ConcurrentStrategy,
+    SchedulingStrategy,
+    StrategyType,
+    SynchronousStrategy,
+    ThroughputStrategy,
+)
+
+__all__ = [
+    "ProfileType",
+    "Profile",
+    "SynchronousProfile",
+    "ConcurrentProfile",
+    "ThroughputProfile",
+    "AsyncProfile",
+    "SweepProfile",
+    "create_profile",
+]
+
+ProfileType = Literal["synchronous", "concurrent", "throughput", "async", "sweep"]
+
+
+class Profile(StandardBaseModel):
+    type_: Literal["profile"] = Field(
+        description="The type of benchmarking profile to use.",
+    )
+    completed_strategies: int = Field(
+        default=0,
+        description="The number of scheduling strategies generated so far.",
+    )
+    measured_rates: List[float] = Field(
+        default_factory=list,
+        description=("The average rates measured for the strategies that have run."),
+    )
+    measured_concurrencies: List[float] = Field(
+        default_factory=list,
+        description=(
+            "The average concurrency measured for the strategies that have run."
+        ),
+    )
+
+    def completed_strategy(self, average_rate: float, average_concurrency: float):
+        self.measured_rates.append(average_rate)
+        self.measured_concurrencies.append(average_concurrency)
+        self.completed_strategies += 1
+
+    @computed_field  # type: ignore[misc]
+    @property
+    def strategy_types(self) -> List[StrategyType]:
+        return []
+
+    def next_strategy(self) -> Optional[SchedulingStrategy]:
+        return None
+
+
+class SynchronousProfile(Profile):
+    type_: Literal["synchronous"] = "synchronous"  # type: ignore[assignment]
+
+    @property
+    def strategy_types(self) -> List[StrategyType]:
+        return [self.type_]
+
+    def next_strategy(self) -> Optional[SchedulingStrategy]:
+        if self.completed_strategies >= 1:
+            return None
+
+        return SynchronousStrategy()
+
+    @staticmethod
+    def from_standard_args(
+        rate_type: Union[StrategyType, ProfileType],
+        rate: Optional[Union[float, Sequence[float]]],
+        **kwargs,
+    ) -> "SynchronousProfile":
+        if rate_type != "synchronous":
+            raise ValueError("Rate type must be 'synchronous' for synchronous profile.")
+
+        if rate is not None:
+            raise ValueError(
+                "Rate does not apply to synchronous profile, it must be set to None."
+            )
+
+        if kwargs:
+            raise ValueError(
+                "No additional arguments are allowed for synchronous profile."
+            )
+
+        return SynchronousProfile()
+
+
+class ConcurrentProfile(Profile):
+    type_: Literal["concurrent"] = "concurrent"  # type: ignore[assignment]
+    streams: Union[int, Sequence[int]] = Field(
+        description="The number of concurrent streams to use.",
+    )
+
+    @property
+    def strategy_types(self) -> List[StrategyType]:
+        num_strategies = len(self.streams) if isinstance(self.streams, Sequence) else 1
+
+        return [self.type_] * num_strategies
+
+    def next_strategy(self) -> Optional[SchedulingStrategy]:
+        streams = self.streams if isinstance(self.streams, Sequence) else [self.streams]
+
+        if self.completed_strategies >= len(streams):
+            return None
+
+        return ConcurrentStrategy(
+            streams=streams[self.completed_strategies],
+        )
+
+    @staticmethod
+    def from_standard_args(
+        rate_type: Union[StrategyType, ProfileType],
+        rate: Optional[Union[float, Sequence[float]]],
+        **kwargs,
+    ) -> "ConcurrentProfile":
+        if rate_type != "concurrent":
+            raise ValueError("Rate type must be 'concurrent' for concurrent profile.")
+
+        if not rate:
+            raise ValueError("Rate (streams) must be provided for concurrent profile.")
+
+        if not isinstance(rate, Sequence):
+            rate = [rate]
+
+        if not all(stream.is_integer() and stream > 0 for stream in rate):
+            raise ValueError(
+                f"All rate values (streams) must be positive integers, received {rate}"
+            )
+
+        if kwargs:
+            raise ValueError(
+                "No additional arguments are allowed for concurrent profile."
+            )
+
+        return ConcurrentProfile(streams=[int(rat) for rat in rate])
+
+
+class ThroughputProfile(Profile):
+    type_: Literal["throughput"] = "throughput"  # type: ignore[assignment]
+    max_concurrency: Optional[int] = Field(
+        default=None,
+        description="The maximum number of concurrent requests that can be scheduled.",
+    )
+
+    @property
+    def strategy_types(self) -> List[StrategyType]:
+        return [self.type_]
+
+    def next_strategy(self) -> Optional[SchedulingStrategy]:
+        if self.completed_strategies >= 1:
+            return None
+
+        return ThroughputStrategy(
+            max_concurrency=self.max_concurrency,
+        )
+
+    @staticmethod
+    def from_standard_args(
+        rate_type: Union[StrategyType, ProfileType],
+        rate: Optional[Union[float, Sequence[float]]],
+        **kwargs,
+    ) -> "ThroughputProfile":
+        if rate_type != "throughput":
+            raise ValueError("Rate type must be 'throughput' for throughput profile.")
+
+        if rate is not None:
+            raise ValueError(
+                "Rate does not apply to throughput profile, it must be set to None."
+            )
+
+        return ThroughputProfile(**kwargs)
+
+
+class AsyncProfile(ThroughputProfile):
+    type_: Literal["async"] = "async"  # type: ignore[assignment]
+    strategy_type: Literal["constant", "poisson"] = Field(
+        description="The type of asynchronous strategy to use.",
+    )
+    rate: Union[float, Sequence[float]] = Field(
+        description="The rate of requests per second to use.",
+    )
+    initial_burst: bool = Field(
+        default=True,
+        description=(
+            "True to send an initial burst of requests (math.floor(self.rate)) "
+            "to reach target rate. False to not send an initial burst."
+        ),
+    )
+    random_seed: int = Field(
+        default=42,
+        description=(
+            "The random seed to use for the asynchronous strategy. "
+            "This is used to generate random numbers for the Poisson strategy."
+        ),
+    )
+
+    @property
+    def strategy_types(self) -> List[StrategyType]:
+        num_strategies = len(self.rate) if isinstance(self.rate, Sequence) else 1
+
+        return [self.strategy_type] * num_strategies
+
+    def next_strategy(self) -> Optional[SchedulingStrategy]:
+        rate = self.rate if isinstance(self.rate, Sequence) else [self.rate]
+
+        if self.completed_strategies >= len(rate):
+            return None
+
+        if self.strategy_type == "constant":
+            return AsyncConstantStrategy(
+                rate=rate[self.completed_strategies],
+                initial_burst=self.initial_burst,
+                max_concurrency=self.max_concurrency,
+            )
+        elif self.strategy_type == "poisson":
+            return AsyncPoissonStrategy(
+                rate=rate[self.completed_strategies],
+                initial_burst=self.initial_burst,
+                max_concurrency=self.max_concurrency,
+                random_seed=self.random_seed,
+            )
+        else:
+            raise ValueError(f"Invalid strategy type: {self.strategy_type}")
+
+    @staticmethod
+    def from_standard_args(  # type: ignore[override]
+        rate_type: Union[StrategyType, ProfileType],
+        rate: Optional[Union[float, Sequence[float]]],
+        random_seed: int,
+        **kwargs,
+    ) -> "AsyncProfile":
+        if rate_type not in ("async", "constant", "poisson"):
+            raise ValueError(
+                "Rate type must be in ('async', 'constant', 'poisson') "
+                f"for async profile. Received: {rate_type}"
+            )
+
+        if not rate:
+            raise ValueError("Rate must be provided for async profile.")
+
+        if not isinstance(rate, Sequence):
+            rate = [rate]
+
+        if not all(r.is_integer() and r > 0 for r in rate):
+            raise ValueError(
+                f"All rate values must be positive integers, received {rate}"
+            )
+
+        if rate_type == "async":
+            rate_type = "constant"  # default to constant if not specified
+
+        return AsyncProfile(
+            strategy_type=rate_type,  # type: ignore[arg-type]
+            rate=rate,
+            random_seed=random_seed,
+            **kwargs,
+        )
+
+
+class SweepProfile(AsyncProfile):
+    type_: Literal["sweep"] = "sweep"  # type: ignore[assignment]
+    sweep_size: int = Field(
+        description="The number of strategies to generate for the sweep.",
+    )
+    rate: float = -1
+    rate_type: Literal["constant", "poisson"] = "constant"
+
+    @property
+    def strategy_types(self) -> List[StrategyType]:
+        return (
+            ["synchronous"] + ["throughput"] + [self.rate_type] * (self.sweep_size - 2)  # type: ignore[return-value]
+        )
+
+    def next_strategy(self) -> Optional[SchedulingStrategy]:
+        if self.completed_strategies >= self.sweep_size:
+            return None
+
+        if self.completed_strategies == 0:
+            return SynchronousStrategy()
+
+        if self.completed_strategies == 1:
+            return ThroughputStrategy(
+                max_concurrency=self.max_concurrency,
+            )
+
+        min_rate = self.measured_rates[0]
+        max_rate = self.measured_rates[1]
+        rates = np.linspace(min_rate, max_rate, self.sweep_size - 1)[1:]
+
+        if self.rate_type == "constant":
+            return AsyncConstantStrategy(
+                rate=rates[self.completed_strategies - 2],
+                initial_burst=self.initial_burst,
+                max_concurrency=self.max_concurrency,
+            )
+        elif self.rate_type == "poisson":
+            return AsyncPoissonStrategy(
+                rate=rates[self.completed_strategies - 2],
+                initial_burst=self.initial_burst,
+                max_concurrency=self.max_concurrency,
+            )
+        else:
+            raise ValueError(f"Invalid strategy type: {self.rate_type}")
+
+    @staticmethod
+    def from_standard_args(  # type: ignore[override]
+        rate_type: Union[StrategyType, ProfileType],
+        rate: Optional[Union[float, Sequence[float]]],
+        random_seed: int,
+        **kwargs,
+    ) -> "SweepProfile":
+        if rate_type != "sweep":
+            raise ValueError("Rate type must be 'sweep' for sweep profile.")
+
+        if "sweep_size" in kwargs:
+            raise ValueError("Sweep size must not be provided, use rate instead.")
+
+        if not rate:
+            rate = settings.default_sweep_number
+
+        if not rate:
+            raise ValueError(
+                "Rate (sweep_size) must be provided for concurrent profile."
+            )
+
+        if (
+            not isinstance(rate, (int, float))
+            or (isinstance(rate, float) and not rate.is_integer())
+            or rate <= 1
+        ):
+            raise ValueError(
+                f"Rate (sweep_size) must be a positive integer > 1, received {rate}"
+            )
+
+        if not kwargs:
+            kwargs = {}
+
+        if "strategy_type" not in kwargs:
+            kwargs["strategy_type"] = "constant"
+
+        return SweepProfile(sweep_size=int(rate), random_seed=random_seed, **kwargs)
+
+
+def create_profile(
+    rate_type: Union[StrategyType, ProfileType],
+    rate: Optional[Union[float, Sequence[float]]],
+    random_seed: int = 42,
+    **kwargs,
+) -> "Profile":
+    if rate_type == "synchronous":
+        return SynchronousProfile.from_standard_args(
+            rate_type=rate_type,
+            rate=rate,
+            **kwargs,
+        )
+
+    if rate_type == "concurrent":
+        return ConcurrentProfile.from_standard_args(
+            rate_type=rate_type,
+            rate=rate,
+            **kwargs,
+        )
+
+    if rate_type == "throughput":
+        return ThroughputProfile.from_standard_args(
+            rate_type=rate_type,
+            rate=rate,
+            **kwargs,
+        )
+
+    if rate_type in ("async", "constant", "poisson"):
+        return AsyncProfile.from_standard_args(
+            rate_type=rate_type,
+            rate=rate,
+            random_seed=random_seed,
+            **kwargs,
+        )
+
+    if rate_type == "sweep":
+        return SweepProfile.from_standard_args(
+            rate_type=rate_type,
+            rate=rate,
+            random_seed=random_seed,
+            **kwargs,
+        )
+
+    raise ValueError(f"Invalid profile type: {rate_type}")
diff --git a/src/guidellm/benchmark/progress.py b/src/guidellm/benchmark/progress.py
new file mode 100644
index 00000000..059c4b06
--- /dev/null
+++ b/src/guidellm/benchmark/progress.py
@@ -0,0 +1,713 @@
+import math
+import time
+from dataclasses import dataclass
+from datetime import datetime
+from typing import Dict, Generic, List, Optional, TypeVar, Union
+
+from rich.console import Group
+from rich.live import Live
+from rich.panel import Panel
+from rich.progress import (
+    BarColumn,
+    Progress,
+    ProgressColumn,
+    SpinnerColumn,
+    TaskID,
+    TaskProgressColumn,
+    TextColumn,
+    TimeElapsedColumn,
+    TimeRemainingColumn,
+)
+
+from guidellm.benchmark.aggregator import (
+    BenchmarkAggregator,
+    GenerativeBenchmarkAggregator,
+)
+from guidellm.benchmark.benchmark import Benchmark, GenerativeBenchmark
+from guidellm.benchmark.benchmarker import BenchmarkerResult
+from guidellm.scheduler import (
+    SchedulingStrategy,
+    StrategyType,
+    strategy_display_str,
+)
+from guidellm.utils import Colors
+
+
+@dataclass
+class BenchmarkerTaskProgressState:
+    display_scheduler_stats: bool
+
+    task_id: TaskID
+    strategy: Union[StrategyType, SchedulingStrategy]
+    started: bool = False
+    compiling: bool = False
+    ended: bool = False
+
+    start_time: Optional[float] = None
+    max_number: Optional[float] = None
+    max_duration: Optional[float] = None
+    in_warmup: bool = False
+    in_cooldown: bool = False
+
+    requests_rate: float = 0
+    request_latency: float = 0
+    requests_processing: float = 0
+    requests_successful: float = 0
+    requests_incomplete: float = 0
+    requests_errored: float = 0
+
+    worker_overheads_time_ms: float = 0.0
+    backend_overheads_time_ms: float = 0.0
+    requests_sleep_time_ms: float = 0.0
+    requests_targeted_start_time_delay_ms: float = 0.0
+
+    @property
+    def description(self) -> str:
+        return strategy_display_str(self.strategy)
+
+    @property
+    def total(self) -> Optional[float]:
+        if self.max_number is None and self.max_duration is None:
+            return None
+
+        return 1000
+
+    @property
+    def completed(self) -> int:
+        if self.ended:
+            return 1000
+
+        if self.max_number is None and self.max_duration is None:
+            return 0
+
+        number = self.requests_successful + self.requests_errored
+        number_percent = (
+            number / float(self.max_number) * 1000 if self.max_number else -math.inf
+        )
+        duration_percent = (
+            (time.time() - self.start_time) / self.max_duration * 1000
+            if self.max_duration and self.start_time
+            else -math.inf
+        )
+
+        return min(int(max(number_percent, duration_percent)), 1000)
+
+    @property
+    def fields(self) -> Dict[str, str]:
+        fields = {
+            "start_time": self.formatted_start_time,
+            "progress_status": self.formatted_progress_status,
+            "requests_summary": self.formatted_requests_summary,
+        }
+
+        if self.display_scheduler_stats:
+            fields["scheduler_stats"] = self.formatted_scheduler_stats
+
+        return fields
+
+    @property
+    def formatted_start_time(self) -> str:
+        if self.start_time is None:
+            return "--:--:--"
+
+        return datetime.fromtimestamp(self.start_time).strftime("%H:%M:%S")
+
+    @property
+    def formatted_progress_status(self) -> str:
+        if self.ended:
+            status = "complete"
+            color = Colors.SUCCESS
+        elif self.compiling:
+            status = "compiling"
+            color = Colors.PROGRESS
+        elif self.started and self.in_warmup:
+            status = "warmup"
+            color = Colors.PROGRESS
+        elif self.started and self.in_cooldown:
+            status = "cooldown"
+            color = Colors.PROGRESS
+        elif self.started:
+            status = "running"
+            color = Colors.PROGRESS
+        else:
+            status = "pending"
+            color = Colors.INFO
+
+        return f"[{color}]{status.ljust(8)}[/{color}]"
+
+    @property
+    def formatted_requests_summary(self) -> str:
+        if not self.started:
+            return " "
+
+        return (
+            f"[{Colors.INFO}]Req:[/{Colors.INFO}] "
+            + BenchmarkerTaskProgressState.format_progress_display(
+                value=self.requests_rate,
+                label="req/s",
+                total_characters=12,
+                digits_places=4,
+                decimal_places=1,
+            )
+            + ", "
+            + BenchmarkerTaskProgressState.format_progress_display(
+                value=self.request_latency,
+                label="Lat",
+                units="s",
+                total_characters=12,
+                digits_places=4,
+                decimal_places=2,
+            )
+            + ", "
+            + BenchmarkerTaskProgressState.format_progress_display(
+                value=self.requests_processing,
+                label="Conc",
+                total_characters=12,
+                digits_places=4,
+                decimal_places=1,
+            )
+            + ", "
+            + BenchmarkerTaskProgressState.format_progress_display(
+                value=self.requests_successful,
+                label="Comp",
+                total_characters=12,
+                digits_places=5,
+                decimal_places=0,
+            )
+            + ", "
+            + BenchmarkerTaskProgressState.format_progress_display(
+                value=self.requests_incomplete,
+                label="Inc",
+                total_characters=12,
+                digits_places=5,
+                decimal_places=0,
+            )
+            + ", "
+            + BenchmarkerTaskProgressState.format_progress_display(
+                value=self.requests_errored,
+                label="Err",
+                total_characters=12,
+                digits_places=5,
+                decimal_places=0,
+            )
+        )
+
+    @property
+    def formatted_scheduler_stats(self) -> str:
+        if not self.started:
+            return " "
+
+        return (
+            f"[{Colors.INFO}]Sys:[/{Colors.INFO}] "
+            + BenchmarkerTaskProgressState.format_progress_display(
+                value=self.worker_overheads_time_ms,
+                label="Work OH",
+                units="ms",
+                total_characters=18,
+                digits_places=3,
+                decimal_places=1,
+            )
+            + ", "
+            + BenchmarkerTaskProgressState.format_progress_display(
+                value=self.backend_overheads_time_ms,
+                label="Back OH",
+                units="ms",
+                total_characters=18,
+                digits_places=3,
+                decimal_places=1,
+            )
+            + ", "
+            + BenchmarkerTaskProgressState.format_progress_display(
+                value=self.requests_sleep_time_ms,
+                label="Req Sleep",
+                units="ms",
+                total_characters=18,
+                digits_places=5,
+                decimal_places=0,
+            )
+            + ", "
+            + BenchmarkerTaskProgressState.format_progress_display(
+                value=self.requests_targeted_start_time_delay_ms,
+                label="Start Del",
+                units="ms",
+                total_characters=18,
+                digits_places=5,
+                decimal_places=0,
+            )
+        )
+
+    @staticmethod
+    def format_progress_display(
+        value: float,
+        label: str,
+        units: str = "",
+        total_characters: Optional[int] = None,
+        digits_places: Optional[int] = None,
+        decimal_places: Optional[int] = None,
+    ) -> str:
+        if decimal_places is None and digits_places is None:
+            formatted_number = f"{value}:.0f"
+        elif digits_places is None:
+            formatted_number = f"{value:.{decimal_places}f}"
+        elif decimal_places is None:
+            formatted_number = f"{value:>{digits_places}f}"
+        else:
+            formatted_number = f"{value:>{digits_places}.{decimal_places}f}"
+
+        result = f"{formatted_number}{units} [{Colors.INFO}]{label}[/{Colors.INFO}]"
+
+        if total_characters is not None:
+            total_characters += len(Colors.INFO) * 2 + 5
+
+            if len(result) < total_characters:
+                result = result.rjust(total_characters)
+
+        return result
+
+
+class GenerativeTextBenchmarkerTaskProgressState(BenchmarkerTaskProgressState):
+    output_tokens: float = 0
+    prompt_tokens: float = 0
+    output_tokens_rate: float = 0
+    total_tokens_rate: float = 0
+    tokens_ttft: float = 0
+    tokens_itl: float = 0
+
+    @property
+    def fields(self) -> Dict[str, str]:
+        fields = super().fields
+        fields["tokens_summary"] = self.formatted_tokens_summary
+        return fields
+
+    @property
+    def formatted_tokens_summary(self) -> str:
+        if not self.started:
+            return " "
+
+        return (
+            f"[{Colors.INFO}]Tok:[/{Colors.INFO}] "
+            + BenchmarkerTaskProgressState.format_progress_display(
+                value=self.output_tokens_rate,
+                label="gen/s",
+                total_characters=12,
+                digits_places=4,
+                decimal_places=1,
+            )
+            + ", "
+            + BenchmarkerTaskProgressState.format_progress_display(
+                value=self.total_tokens_rate,
+                label="tot/s",
+                total_characters=12,
+                digits_places=4,
+                decimal_places=1,
+            )
+            + ", "
+            + BenchmarkerTaskProgressState.format_progress_display(
+                value=self.tokens_ttft,
+                label="TTFT",
+                units="ms",
+                total_characters=12,
+                digits_places=3,
+                decimal_places=1,
+            )
+            + ", "
+            + BenchmarkerTaskProgressState.format_progress_display(
+                value=self.tokens_itl,
+                label="ITL",
+                units="ms",
+                total_characters=12,
+                digits_places=3,
+                decimal_places=1,
+            )
+            + ", "
+            + BenchmarkerTaskProgressState.format_progress_display(
+                value=self.prompt_tokens,
+                label="Prompt",
+                total_characters=12,
+                digits_places=4,
+                decimal_places=0,
+            )
+            + ", "
+            + BenchmarkerTaskProgressState.format_progress_display(
+                value=self.output_tokens,
+                label="Gen",
+                total_characters=12,
+                digits_places=4,
+                decimal_places=0,
+            )
+        )
+
+
+BTPS = TypeVar("BTPS", bound=BenchmarkerTaskProgressState)
+
+
+class BenchmarkerProgressDisplay(Generic[BTPS]):
+    def __init__(self, display_scheduler_stats: bool):
+        self.display_scheduler_stats = display_scheduler_stats
+        self.started = False
+        self.benchmarker_tasks_progress = Progress(*self.create_task_progress_columns())
+        self.benchmarker_tasks_panel = Panel(
+            self.benchmarker_tasks_progress,
+            title="Benchmarks",
+            title_align="left",
+            expand=True,
+        )
+        self.benchmarker_progress = Progress(
+            TextColumn("Generating...", style=f"italic {Colors.PROGRESS}"),
+            BarColumn(
+                bar_width=None,
+                complete_style=Colors.PROGRESS,
+                finished_style=Colors.SUCCESS,
+            ),
+            TextColumn(
+                "({task.fields[completed_benchmarks]}/{task.fields[total_benchmarks]})",
+                style=Colors.PROGRESS,
+            ),
+            TextColumn("["),
+            TimeElapsedColumn(),
+            TextColumn("<"),
+            TimeRemainingColumn(),
+            TextColumn("]"),
+        )
+        self.benchmarker_live = Live(
+            Group(
+                self.benchmarker_tasks_panel,
+                self.benchmarker_progress,
+            ),
+            redirect_stdout=True,
+            redirect_stderr=True,
+        )
+        self.active_task: Optional[TaskID] = None
+        self.benchmarker_tasks: List[BTPS] = []
+        self.progress_task: Optional[TaskID] = None
+
+    def update(self, result: BenchmarkerResult):
+        if result.type_ == "run_start":
+            if self.started:
+                raise RuntimeError("Progress display already started.")
+
+            self.handle_start(result)
+            self.started = True
+        elif result.type_ == "run_complete":
+            if not self.started:
+                raise RuntimeError("Progress display not started.")
+
+            self.handle_end(result)
+            self.started = False
+        else:
+            if not self.started:
+                raise RuntimeError("Progress display not started.")
+
+            self.handle_update(result)
+
+    def handle_start(self, result: BenchmarkerResult):
+        self.benchmarker_live.start()
+
+        for index, strategy_type in enumerate(result.profile.strategy_types):
+            task_id = self.benchmarker_tasks_progress.add_task(
+                description=strategy_type,
+                start=False,
+                total=None,
+                completed=0,
+                visible=False,
+            )
+            task_progress_state = self.create_task_progress_state(
+                task_id=task_id,
+                index=index,
+                strategy_type=strategy_type,
+                result=result,
+            )
+            self.benchmarker_tasks.append(task_progress_state)
+            self.benchmarker_tasks_progress.update(
+                task_id,
+                description=task_progress_state.description,
+                visible=True,
+                **task_progress_state.fields,  # type: ignore[arg-type]
+            )
+
+        self.progress_task = self.benchmarker_progress.add_task(
+            "",
+            total=len(self.benchmarker_tasks) * 1000,
+            completed_benchmarks=0,
+            total_benchmarks=len(self.benchmarker_tasks),
+        )
+
+    def handle_update(self, result: BenchmarkerResult):
+        current_state: BTPS = self.benchmarker_tasks[result.current_index]
+
+        if result.type_ == "scheduler_start":
+            self.handle_update_scheduler_start(current_state, result)
+            self.active_task = current_state.task_id
+        elif result.type_ == "scheduler_update":
+            self.handle_update_scheduler_update(current_state, result)
+        elif result.type_ == "scheduler_complete":
+            self.handle_update_scheduler_complete(current_state, result)
+        elif result.type_ == "benchmark_compiled":
+            self.handle_update_benchmark_compiled(current_state, result)
+        else:
+            raise ValueError(f"Unknown result type: {result.type_}")
+
+        if self.progress_task is None:
+            raise RuntimeError("Progress task not set.")
+
+        self.benchmarker_tasks_progress.update(
+            current_state.task_id,
+            description=current_state.description,
+            completed=current_state.completed,
+            total=current_state.total,
+            **current_state.fields,  # type: ignore[arg-type]
+        )
+        self.benchmarker_progress.update(
+            self.progress_task,
+            completed=(result.current_index * 1000) + current_state.completed,
+            total=1000 * len(self.benchmarker_tasks),
+            completed_benchmarks=(
+                result.current_index + (1 if current_state.ended else 0)
+            ),
+            total_benchmarks=len(self.benchmarker_tasks),
+        )
+
+        if current_state.ended:
+            self.benchmarker_tasks_progress.stop_task(current_state.task_id)
+            self.active_task = None
+
+    def handle_update_scheduler_start(
+        self, progress_state: BTPS, result: BenchmarkerResult
+    ):
+        if self.active_task is not None:
+            raise RuntimeError("Active task already set.")
+
+        progress_state.strategy = result.current_strategy  # type: ignore[assignment]
+        progress_state.started = True
+        current_aggregator: BenchmarkAggregator = result.current_aggregator  # type: ignore[assignment]
+        progress_state.start_time = (
+            current_aggregator.requests_stats.totals.total.start_time
+        )
+        progress_state.max_number = current_aggregator.args.max_number
+        progress_state.max_duration = current_aggregator.args.max_duration
+
+    def handle_update_scheduler_update(
+        self, progress_state: BTPS, result: BenchmarkerResult
+    ):
+        if self.active_task is None:
+            raise RuntimeError("Active task not set.")
+
+        if self.active_task != progress_state.task_id:
+            raise RuntimeError("Active task does not match current task.")
+
+        current_aggregator: BenchmarkAggregator = result.current_aggregator  # type: ignore[assignment]
+        progress_state.in_warmup = current_aggregator.in_warmup
+        progress_state.in_cooldown = current_aggregator.in_cooldown
+        progress_state.requests_rate = (
+            current_aggregator.requests_stats.totals.successful.rate
+        )
+        progress_state.request_latency = (
+            current_aggregator.requests_stats.request_time.mean
+        )
+        progress_state.requests_processing = (
+            current_aggregator.scheduler_stats.processing_requests.last
+        )
+        progress_state.requests_successful = (
+            current_aggregator.requests_stats.totals.successful.total
+        )
+        progress_state.requests_incomplete = (
+            current_aggregator.requests_stats.totals.incomplete.total
+        )
+        progress_state.requests_errored = (
+            current_aggregator.requests_stats.totals.errored.total
+        )
+        progress_state.worker_overheads_time_ms = (
+            current_aggregator.requests_stats.scheduled_time_delay.mean_ms
+            + current_aggregator.requests_stats.worker_start_delay.mean_ms
+        )
+        progress_state.backend_overheads_time_ms = (
+            current_aggregator.requests_stats.request_time_delay.mean_ms
+        )
+        progress_state.requests_sleep_time_ms = (
+            current_aggregator.requests_stats.scheduled_time_sleep.mean_ms
+        )
+        progress_state.requests_targeted_start_time_delay_ms = (
+            current_aggregator.requests_stats.request_start_time_targeted_delay.mean_ms
+        )
+
+    def handle_update_scheduler_complete(
+        self,
+        progress_state: BTPS,
+        result: BenchmarkerResult,  # noqa: ARG002
+    ):
+        if self.active_task is None:
+            raise RuntimeError("Active task not set.")
+
+        if self.active_task != progress_state.task_id:
+            raise RuntimeError("Active task does not match current task.")
+
+        progress_state.in_warmup = False
+        progress_state.in_cooldown = False
+        progress_state.compiling = True
+
+    def handle_update_benchmark_compiled(
+        self, progress_state: BTPS, result: BenchmarkerResult
+    ):
+        if self.active_task is None:
+            raise RuntimeError("Active task not set.")
+
+        if self.active_task != progress_state.task_id:
+            raise RuntimeError("Active task does not match current task.")
+
+        current_benchmark: Benchmark = result.current_benchmark  # type: ignore[assignment]
+        progress_state.compiling = False
+        progress_state.ended = True
+        progress_state.requests_rate = (
+            current_benchmark.metrics.requests_per_second.successful.mean
+        )
+        progress_state.requests_processing = (
+            current_benchmark.metrics.request_concurrency.successful.mean
+        )
+
+    def handle_end(self, result: BenchmarkerResult):  # noqa: ARG002
+        if self.progress_task is None:
+            raise RuntimeError("Progress task not set.")
+
+        self.benchmarker_progress.update(
+            self.progress_task,
+            completed=len(self.benchmarker_tasks) * 1000,
+            total=len(self.benchmarker_tasks) * 1000,
+            completed_benchmarks=len(self.benchmarker_tasks),
+            total_benchmarks=len(self.benchmarker_tasks),
+        )
+        self.benchmarker_progress.stop_task(self.progress_task)
+        self.benchmarker_live.stop()
+        self.active_task = None
+        self.benchmarker_tasks = []
+        self.progress_task = None
+
+    def create_task_progress_columns(self) -> List[ProgressColumn]:
+        columns = [
+            TextColumn("[{task.fields[start_time]}]"),
+            SpinnerColumn(style=Colors.PROGRESS),
+            TaskProgressColumn(style=Colors.PROGRESS),
+            TextColumn("{task.description}"),
+            TextColumn("({task.fields[progress_status]})"),
+            TextColumn(" "),
+        ]
+
+        if not self.display_scheduler_stats:
+            columns += [
+                TextColumn("{task.fields[requests_summary]}\n"),
+            ]
+        else:
+            columns += [
+                TextColumn(
+                    "{task.fields[requests_summary]}\n{task.fields[scheduler_stats]}\n"
+                ),
+            ]
+
+        return columns
+
+    def create_task_progress_state(
+        self,
+        task_id: TaskID,
+        index: int,  # noqa: ARG002
+        strategy_type: StrategyType,
+        result: BenchmarkerResult,  # noqa: ARG002
+    ) -> BTPS:
+        return BenchmarkerTaskProgressState(  # type: ignore[return-value]
+            display_scheduler_stats=self.display_scheduler_stats,
+            task_id=task_id,
+            strategy=strategy_type,
+        )
+
+
+class GenerativeTextBenchmarkerProgressDisplay(
+    BenchmarkerProgressDisplay[GenerativeTextBenchmarkerTaskProgressState]
+):
+    def handle_update_scheduler_update(
+        self,
+        progress_state: GenerativeTextBenchmarkerTaskProgressState,
+        result: BenchmarkerResult,
+    ):
+        super().handle_update_scheduler_update(progress_state, result)
+        current_aggregator: GenerativeBenchmarkAggregator = result.current_aggregator  # type: ignore[assignment]
+        progress_state.output_tokens = (
+            current_aggregator.requests_stats.output_tokens.mean
+        )
+        progress_state.prompt_tokens = (
+            current_aggregator.requests_stats.prompt_tokens.mean
+        )
+        progress_state.output_tokens_rate = (
+            current_aggregator.requests_stats.output_tokens.rate
+        )
+        progress_state.total_tokens_rate = (
+            current_aggregator.requests_stats.total_tokens.rate
+        )
+        progress_state.tokens_ttft = (
+            current_aggregator.requests_stats.time_to_first_token.mean_ms
+        )
+        progress_state.tokens_itl = (
+            current_aggregator.requests_stats.inter_token_latency.mean_ms
+        )
+
+    def handle_update_benchmark_compiled(
+        self,
+        progress_state: GenerativeTextBenchmarkerTaskProgressState,
+        result: BenchmarkerResult,
+    ):
+        super().handle_update_benchmark_compiled(progress_state, result)
+
+        current_benchmark: GenerativeBenchmark = result.current_benchmark  # type: ignore[assignment]
+        progress_state.request_latency = (
+            current_benchmark.metrics.request_latency.successful.mean
+        )
+        progress_state.requests_successful = current_benchmark.request_totals.successful
+        progress_state.requests_errored = current_benchmark.request_totals.errored
+        progress_state.requests_incomplete = current_benchmark.request_totals.incomplete
+        progress_state.output_tokens = (
+            current_benchmark.metrics.output_token_count.successful.mean
+        )
+        progress_state.prompt_tokens = (
+            current_benchmark.metrics.prompt_token_count.successful.mean
+        )
+        progress_state.output_tokens_rate = (
+            current_benchmark.metrics.output_tokens_per_second.successful.mean
+        )
+        progress_state.total_tokens_rate = (
+            current_benchmark.metrics.tokens_per_second.successful.mean
+        )
+        progress_state.tokens_ttft = (
+            current_benchmark.metrics.time_to_first_token_ms.successful.mean
+        )
+        progress_state.tokens_itl = (
+            current_benchmark.metrics.inter_token_latency_ms.successful.mean
+        )
+
+    def create_task_progress_state(
+        self,
+        task_id: TaskID,
+        index: int,  # noqa: ARG002
+        strategy_type: StrategyType,
+        result: BenchmarkerResult,  # noqa: ARG002
+    ) -> GenerativeTextBenchmarkerTaskProgressState:
+        return GenerativeTextBenchmarkerTaskProgressState(
+            display_scheduler_stats=self.display_scheduler_stats,
+            task_id=task_id,
+            strategy=strategy_type,
+        )
+
+    def create_task_progress_columns(self) -> List[ProgressColumn]:
+        columns = super().create_task_progress_columns()
+        columns = columns[:-1]  # remove the last display info column
+
+        if not self.display_scheduler_stats:
+            columns += [
+                TextColumn(
+                    "{task.fields[requests_summary]}\n{task.fields[tokens_summary]}",
+                ),
+            ]
+        else:
+            columns += [
+                TextColumn(
+                    "{task.fields[requests_summary]}\n{task.fields[tokens_summary]}\n{task.fields[scheduler_stats]}",
+                ),
+            ]
+
+        return columns
diff --git a/src/guidellm/config.py b/src/guidellm/config.py
index 2d4e102a..ece9d63f 100644
--- a/src/guidellm/config.py
+++ b/src/guidellm/config.py
@@ -7,7 +7,6 @@
 
 __all__ = [
     "DatasetSettings",
-    "EmulatedDataSettings",
     "Environment",
     "LoggingSettings",
     "OpenAISettings",
@@ -74,24 +73,6 @@ class DatasetSettings(BaseModel):
     )
 
 
-class EmulatedDataSettings(BaseModel):
-    """
-    Emulated data settings for the application to use
-    """
-
-    source: str = "https://www.gutenberg.org/files/1342/1342-0.txt"
-    filter_start: str = "It is a truth universally acknowledged, that a"
-    filter_end: str = "CHISWICK PRESS:--CHARLES WHITTINGHAM AND CO."
-    clean_text_args: Dict[str, bool] = Field(
-        default_factory=lambda: {
-            "fix_encoding": True,
-            "clean_whitespace": True,
-            "remove_empty_lines": True,
-            "force_new_line_punctuation": True,
-        }
-    )
-
-
 class OpenAISettings(BaseModel):
     """
     OpenAI settings for the application to connect to the API
@@ -139,19 +120,29 @@ class Settings(BaseSettings):
 
     # general settings
     env: Environment = Environment.PROD
+    default_async_loop_sleep: float = 10e-5
+    logging: LoggingSettings = LoggingSettings()
+    default_sweep_number: int = 10
+
+    # HTTP settings
     request_timeout: int = 60 * 5  # 5 minutes
     request_http2: bool = True
+
+    # Scheduler settings
     max_concurrency: int = 512
-    num_sweep_profiles: int = 9
-    logging: LoggingSettings = LoggingSettings()
+    max_worker_processes: int = 10
+    max_add_requests_per_loop: int = 20
 
     # Data settings
     dataset: DatasetSettings = DatasetSettings()
-    emulated_data: EmulatedDataSettings = EmulatedDataSettings()
 
     # Request/stats settings
-    preferred_prompt_tokens_source: Optional[Literal["backend", "local"]] = None
-    preferred_output_tokens_source: Optional[Literal["backend", "local"]] = None
+    preferred_prompt_tokens_source: Optional[
+        Literal["request", "response", "local"]
+    ] = None
+    preferred_output_tokens_source: Optional[
+        Literal["request", "response", "local"]
+    ] = None
     preferred_backend: Literal["openai"] = "openai"
     openai: OpenAISettings = OpenAISettings()
 
diff --git a/src/guidellm/core/__init__.py b/src/guidellm/core/__init__.py
deleted file mode 100644
index e738aa76..00000000
--- a/src/guidellm/core/__init__.py
+++ /dev/null
@@ -1,24 +0,0 @@
-from .distribution import Distribution
-from .report import GuidanceReport
-from .request import TextGenerationRequest
-from .result import (
-    RequestConcurrencyMeasurement,
-    TextGenerationBenchmark,
-    TextGenerationBenchmarkReport,
-    TextGenerationError,
-    TextGenerationResult,
-)
-from .serializable import Serializable, SerializableFileType
-
-__all__ = [
-    "Distribution",
-    "GuidanceReport",
-    "RequestConcurrencyMeasurement",
-    "Serializable",
-    "SerializableFileType",
-    "TextGenerationBenchmark",
-    "TextGenerationBenchmarkReport",
-    "TextGenerationError",
-    "TextGenerationRequest",
-    "TextGenerationResult",
-]
diff --git a/src/guidellm/core/distribution.py b/src/guidellm/core/distribution.py
deleted file mode 100644
index 749d6818..00000000
--- a/src/guidellm/core/distribution.py
+++ /dev/null
@@ -1,190 +0,0 @@
-from typing import List, Sequence, Union
-
-import numpy as np
-from loguru import logger
-from pydantic import Field
-
-from guidellm.core.serializable import Serializable
-
-__all__ = ["Distribution"]
-
-
-class Distribution(Serializable):
-    """
-    A class to represent a statistical distribution and perform various
-    statistical analyses.
-    """
-
-    data: Sequence[float] = Field(
-        default_factory=list,
-        description="The data points of the distribution.",
-    )
-
-    def __str__(self):
-        return f"Distribution({self.describe()})"
-
-    def __len__(self):
-        return len(self.data)
-
-    @property
-    def mean(self) -> float:
-        """
-        Calculate and return the mean of the distribution.
-        :return: The mean of the distribution.
-        """
-        if not self.data:
-            logger.info("No data points available to calculate mean.")
-            return 0.0
-
-        mean_value = np.mean(self.data).item()
-        logger.debug(f"Calculated mean: {mean_value}")
-        return mean_value
-
-    @property
-    def median(self) -> float:
-        """
-        Calculate and return the median of the distribution.
-        :return: The median of the distribution.
-        """
-        if not self.data:
-            logger.info("No data points available to calculate median.")
-            return 0.0
-
-        median_value = np.median(self.data).item()
-        logger.debug(f"Calculated median: {median_value}")
-        return median_value
-
-    @property
-    def variance(self) -> float:
-        """
-        Calculate and return the variance of the distribution.
-        :return: The variance of the distribution.
-        """
-        if not self.data:
-            logger.info("No data points available to calculate variance.")
-            return 0.0
-
-        variance_value = np.var(self.data).item()
-        logger.debug(f"Calculated variance: {variance_value}")
-        return variance_value
-
-    @property
-    def std_deviation(self) -> float:
-        """
-        Calculate and return the standard deviation of the distribution.
-        :return: The standard deviation of the distribution.
-        """
-        if not self.data:
-            logger.info("No data points available to calculate standard deviation.")
-            return 0.0
-
-        std_deviation_value = np.std(self.data).item()
-        logger.debug(f"Calculated standard deviation: {std_deviation_value}")
-        return std_deviation_value
-
-    def percentile(self, percentile: float) -> float:
-        """
-        Calculate and return the specified percentile of the distribution.
-        :param percentile: The desired percentile to calculate (0-100).
-        :return: The specified percentile of the distribution.
-        """
-        if not self.data:
-            logger.info("No data points available to calculate percentile.")
-            return 0.0
-
-        percentile_value = np.percentile(self.data, percentile).item()
-        logger.debug(f"Calculated {percentile}th percentile: {percentile_value}")
-        return percentile_value
-
-    def percentiles(self, percentiles: Union[List[int], List[float]]) -> List[float]:
-        """
-        Calculate and return the specified percentiles of the distribution.
-        :param percentiles: A list of desired percentiles to calculate (0-100).
-        :return: A list of the specified percentiles of the distribution.
-        """
-        if not self.data:
-            logger.info("No data points available to calculate percentiles.")
-            return [0.0] * len(percentiles)
-
-        percentiles_values: List[float] = np.percentile(self.data, percentiles).tolist()  # type: ignore  # noqa: PGH003
-        logger.debug(f"Calculated percentiles {percentiles}: {percentiles_values}")
-        return percentiles_values
-
-    @property
-    def min(self) -> float:
-        """
-        Return the minimum value of the distribution.
-        :return: The minimum value of the distribution.
-        """
-        if not self.data:
-            logger.info("No data points available to calculate minimum.")
-            return 0.0
-
-        min_value: float = np.min(self.data).item()  # type: ignore  # noqa: PGH003
-        logger.debug(f"Calculated min: {min_value}")
-        return min_value
-
-    @property
-    def max(self) -> float:
-        """
-        Return the maximum value of the distribution.
-        :return: The maximum value of the distribution.
-        """
-        if not self.data:
-            logger.info("No data points available to calculate maximum.")
-            return 0.0
-
-        max_value: float = np.max(self.data).item()  # type: ignore  # noqa: PGH003
-        logger.debug(f"Calculated max: {max_value}")
-        return max_value
-
-    @property
-    def range(self) -> float:
-        """
-        Calculate and return the range of the distribution (max - min).
-        :return: The range of the distribution.
-        """
-        if not self.data:
-            logger.info("No data points available to calculate range.")
-            return 0.0
-
-        range_value = self.max - self.min
-        logger.debug(f"Calculated range: {range_value}")
-        return range_value
-
-    def describe(self) -> dict:
-        """
-        Return a dictionary describing various statistics of the distribution.
-        :return: A dictionary with statistical summaries of the distribution.
-        """
-        description = {
-            "mean": self.mean,
-            "median": self.median,
-            "variance": self.variance,
-            "std_deviation": self.std_deviation,
-            "percentile_indices": [10, 20, 30, 40, 50, 60, 70, 80, 90, 95, 99],
-            "percentile_values": self.percentiles(
-                [10, 20, 30, 40, 50, 60, 70, 80, 90, 95, 99],
-            ),
-            "min": self.min,
-            "max": self.max,
-            "range": self.range,
-        }
-        logger.debug(f"Generated description: {description}")
-        return description
-
-    def add_data(self, new_data: Sequence[float]):
-        """
-        Add new data points to the distribution.
-        :param new_data: A list of new numerical data points to add.
-        """
-        self.data = list(self.data) + list(new_data)
-        logger.debug(f"Added new data: {new_data}")
-
-    def remove_data(self, remove_data: Sequence[float]):
-        """
-        Remove specified data points from the distribution.
-        :param remove_data: A list of numerical data points to remove.
-        """
-        self.data = [item for item in self.data if item not in remove_data]
-        logger.debug(f"Removed data: {remove_data}")
diff --git a/src/guidellm/core/report.py b/src/guidellm/core/report.py
deleted file mode 100644
index 584fe63c..00000000
--- a/src/guidellm/core/report.py
+++ /dev/null
@@ -1,311 +0,0 @@
-import time
-from datetime import datetime
-from typing import List, Optional
-
-from loguru import logger
-from pydantic import Field
-from rich.console import Console, Group
-from rich.live import Live
-from rich.panel import Panel
-from rich.table import Table
-
-from guidellm.core.result import TextGenerationBenchmark, TextGenerationBenchmarkReport
-from guidellm.core.serializable import Serializable
-
-__all__ = ["GuidanceReport"]
-
-
-def _create_benchmark_report_details(report: TextGenerationBenchmarkReport) -> str:
-    """
-    Create a detailed string representation of a benchmark report.
-
-    :param report: The benchmark report to generate details for.
-    :type report: TextGenerationBenchmarkReport
-    :return: A string containing the backend, data, rate, and limits of
-        the benchmark report.
-    :rtype: str
-    """
-    backend = (
-        f"Backend(type={report.args.get('backend_type', 'N/A')}, "
-        f"target={report.args.get('target', 'N/A')}, "
-        f"model={report.args.get('model', 'N/A')})"
-    )
-    data = (
-        f"Data(type={report.args.get('data_type', 'N/A')}, "
-        f"source={report.args.get('data', 'N/A')}, "
-        f"tokenizer={report.args.get('tokenizer', 'N/A')})"
-    )
-    rate = (
-        f"Rate(type={report.args.get('mode', 'N/A')}, "
-        f"rate={report.args.get('rate', 'N/A')})"
-    )
-    limits = (
-        f"Limits(max_number={report.args.get('max_number', 'N/A')} requests, "
-        f"max_duration={report.args.get('max_duration', 'N/A')} sec)"
-    )
-
-    logger.debug(
-        "Created benchmark report details for backend={}, data={}, rate={}, limits={}",
-        backend,
-        data,
-        rate,
-        limits,
-    )
-
-    return backend + "\n" + data + "\n" + rate + "\n" + limits + "\n"
-
-
-def _benchmark_rate_id(benchmark: TextGenerationBenchmark) -> str:
-    """
-    Generate a string identifier for a benchmark rate.
-
-    :param benchmark: The benchmark for which to generate the rate ID.
-    :type benchmark: TextGenerationBenchmark
-    :return: A string representing the benchmark rate ID.
-    :rtype: str
-    """
-    rate_id = (
-        f"{benchmark.mode}@{benchmark.rate:.2f} req/sec"
-        if benchmark.rate
-        else f"{benchmark.mode}"
-    )
-    logger.debug("Generated benchmark rate ID: {}", rate_id)
-    return rate_id
-
-
-def _create_benchmark_report_requests_summary(
-    report: TextGenerationBenchmarkReport,
-) -> Table:
-    """
-    Create a table summarizing the requests of a benchmark report.
-
-    :param report: The benchmark report to summarize.
-    :type report: TextGenerationBenchmarkReport
-    :return: A rich Table object summarizing the requests.
-    :rtype: Table
-    """
-    table = Table(
-        "Benchmark",
-        "Requests Completed",
-        "Request Failed",
-        "Duration",
-        "Start Time",
-        "End Time",
-        title="[magenta]Requests Data by Benchmark[/magenta]",
-        title_style="bold",
-        title_justify="left",
-        show_header=True,
-    )
-
-    for benchmark in report.benchmarks_sorted:
-        start_time_str = (
-            datetime.fromtimestamp(benchmark.start_time).strftime("%H:%M:%S")
-            if benchmark.start_time
-            else "N/A"
-        )
-        end_time_str = (
-            datetime.fromtimestamp(benchmark.end_time).strftime("%H:%M:%S")
-            if benchmark.end_time
-            else "N/A"
-        )
-
-        table.add_row(
-            _benchmark_rate_id(benchmark),
-            f"{benchmark.request_count}/{benchmark.total_count}",
-            f"{benchmark.error_count}/{benchmark.total_count}",
-            f"{benchmark.duration:.2f} sec",
-            f"{start_time_str}",
-            f"{end_time_str}",
-        )
-    logger.debug("Created requests summary table for the report.")
-    return table
-
-
-def _create_benchmark_report_data_tokens_summary(
-    report: TextGenerationBenchmarkReport,
-) -> Table:
-    """
-    Create a table summarizing data tokens of a benchmark report.
-
-    :param report: The benchmark report to summarize.
-    :type report: TextGenerationBenchmarkReport
-    :return: A rich Table object summarizing the data tokens.
-    :rtype: Table
-    """
-    table = Table(
-        "Benchmark",
-        "Prompt",
-        "Prompt (1%, 5%, 10%, 50%, 90%, 95%, 99%)",
-        "Output",
-        "Output (1%, 5%, 10%, 50%, 90%, 95%, 99%)",
-        title="[magenta]Tokens Data by Benchmark[/magenta]",
-        title_style="bold",
-        title_justify="left",
-        show_header=True,
-    )
-
-    for benchmark in report.benchmarks_sorted:
-        table.add_row(
-            _benchmark_rate_id(benchmark),
-            f"{benchmark.prompt_token:.2f}",
-            ", ".join(
-                f"{percentile:.1f}"
-                for percentile in benchmark.prompt_token_percentiles.values()
-            ),
-            f"{benchmark.output_token:.2f}",
-            ", ".join(
-                f"{percentile:.1f}"
-                for percentile in benchmark.output_token_percentiles.values()
-            ),
-        )
-    logger.debug("Created data tokens summary table for the report.")
-    return table
-
-
-def _create_benchmark_report_dist_perf_summary(
-    report: TextGenerationBenchmarkReport,
-) -> Table:
-    """
-    Create a table summarizing distribution performance of a benchmark report.
-
-    :param report: The benchmark report to summarize.
-    :type report: TextGenerationBenchmarkReport
-    :return: A rich Table object summarizing the performance statistics.
-    :rtype: Table
-    """
-    table = Table(
-        "Benchmark",
-        "Request Latency [1%, 5%, 10%, 50%, 90%, 95%, 99%] (sec)",
-        "Time to First Token [1%, 5%, 10%, 50%, 90%, 95%, 99%] (ms)",
-        "Inter Token Latency [1%, 5%, 10%, 50%, 90%, 95%, 99%] (ms)",
-        title="[magenta]Performance Stats by Benchmark[/magenta]",
-        title_style="bold",
-        title_justify="left",
-        show_header=True,
-    )
-
-    for benchmark in report.benchmarks_sorted:
-        table.add_row(
-            _benchmark_rate_id(benchmark),
-            ", ".join(
-                f"{percentile:.2f}"
-                for percentile in benchmark.request_latency_percentiles.values()
-            ),
-            ", ".join(
-                f"{percentile:.1f}"
-                for percentile in benchmark.time_to_first_token_percentiles.values()
-            ),
-            ", ".join(
-                f"{percentile:.1f}"
-                for percentile in benchmark.inter_token_latency_percentiles.values()
-            ),
-        )
-    logger.debug("Created distribution performance summary table for the report.")
-    return table
-
-
-def _create_benchmark_report_summary(report: TextGenerationBenchmarkReport) -> Table:
-    """
-    Create a summary table for a benchmark report.
-
-    :param report: The benchmark report to summarize.
-    :type report: TextGenerationBenchmarkReport
-    :return: A rich Table object summarizing overall performance.
-    :rtype: Table
-    """
-    table = Table(
-        "Benchmark",
-        "Requests per Second",
-        "Request Latency",
-        "Time to First Token",
-        "Inter Token Latency",
-        "Output Token Throughput",
-        title="[magenta]Performance Summary by Benchmark[/magenta]",
-        title_style="bold",
-        title_justify="left",
-        show_header=True,
-    )
-
-    for benchmark in report.benchmarks_sorted:
-        table.add_row(
-            _benchmark_rate_id(benchmark),
-            f"{benchmark.completed_request_rate:.2f} req/sec",
-            f"{benchmark.request_latency:.2f} sec",
-            f"{benchmark.time_to_first_token:.2f} ms",
-            f"{benchmark.inter_token_latency:.2f} ms",
-            f"{benchmark.output_token_throughput:.2f} tokens/sec",
-        )
-    logger.debug("Created overall performance summary table for the report.")
-    return table
-
-
-class GuidanceReport(Serializable):
-    """
-    A class to manage the guidance reports that include the benchmarking details,
-    potentially across multiple runs, for saving and loading from disk.
-
-    :param benchmarks: The list of benchmarking reports.
-    :type benchmarks: List[TextGenerationBenchmarkReport]
-    """
-
-    benchmarks: List[TextGenerationBenchmarkReport] = Field(
-        default_factory=list, description="The list of benchmark reports."
-    )
-
-    def print(
-        self, save_path: Optional[str] = None, continual_refresh: bool = False
-    ) -> None:
-        """
-        Print the guidance report to the console.
-
-        :param save_path: Optional path to save the report to disk.
-        :type save_path: Optional[str]
-        :param continual_refresh: Whether to continually refresh the report.
-        :type continual_refresh: bool
-        :return: None
-        """
-        logger.info("Printing guidance report to console with save_path={}", save_path)
-        report_viz = Panel(
-            Group(
-                *[
-                    Panel(
-                        Group(
-                            _create_benchmark_report_details(benchmark),
-                            "",
-                            _create_benchmark_report_requests_summary(benchmark),
-                            "",
-                            _create_benchmark_report_data_tokens_summary(benchmark),
-                            "",
-                            _create_benchmark_report_dist_perf_summary(benchmark),
-                            "",
-                            _create_benchmark_report_summary(benchmark),
-                        ),
-                        title=(
-                            f"[bold magenta]Benchmark Report "
-                            f"{index + 1}[/bold magenta]"
-                        ),
-                        expand=True,
-                        title_align="left",
-                    )
-                    for index, benchmark in enumerate(self.benchmarks)
-                ],
-            ),
-            title=(
-                "[bold cyan]GuideLLM Benchmarks Report[/bold cyan] [italic]"
-                f"({save_path})[/italic]"
-            ),
-            expand=True,
-            title_align="left",
-        )
-        console = Console()
-
-        if continual_refresh:
-            logger.info("Starting live report with continual refresh.")
-            with Live(report_viz, refresh_per_second=1, console=console) as live:
-                while True:
-                    live.update(report_viz)
-                    time.sleep(1)
-        else:
-            console.print(report_viz)
-
-        logger.info("Guidance report printing completed.")
diff --git a/src/guidellm/core/request.py b/src/guidellm/core/request.py
deleted file mode 100644
index 547ac60a..00000000
--- a/src/guidellm/core/request.py
+++ /dev/null
@@ -1,49 +0,0 @@
-import uuid
-from typing import Any, Dict, Literal, Optional
-
-from pydantic import Field
-
-from guidellm.core.serializable import Serializable
-
-
-class TextGenerationRequest(Serializable):
-    """
-    A class to represent a text generation request for generative AI workloads.
-    """
-
-    id: str = Field(
-        default_factory=lambda: str(uuid.uuid4()),
-        description="The unique identifier for the request.",
-    )
-    type_: Literal["text", "chat"] = Field(
-        default="text",
-        description="The type of text generation request (e.g., text, chat).",
-    )
-    prompt: str = Field(description="The input prompt for the text generation.")
-    prompt_token_count: Optional[int] = Field(
-        default=None,
-        description="The number of tokens in the input prompt.",
-    )
-    output_token_count: Optional[int] = Field(
-        default=None,
-        description="The number of tokens to generate.",
-    )
-    params: Dict[str, Any] = Field(
-        default_factory=dict,
-        description="The parameters for the text generation request.",
-    )
-
-    def __str__(self) -> str:
-        prompt_short = (
-            self.prompt[:32] + "..."
-            if self.prompt and len(self.prompt) > 32  # noqa: PLR2004
-            else self.prompt
-        )
-
-        return (
-            f"TextGenerationRequest(id={self.id}, "
-            f"type_={self.type_}"
-            f"prompt={prompt_short}, prompt_token_count={self.prompt_token_count}, "
-            f"output_token_count={self.output_token_count}, "
-            f"params={self.params})"
-        )
diff --git a/src/guidellm/core/result.py b/src/guidellm/core/result.py
deleted file mode 100644
index 2670c105..00000000
--- a/src/guidellm/core/result.py
+++ /dev/null
@@ -1,585 +0,0 @@
-from time import time
-from typing import Any, Dict, List, Literal, Optional, Union
-
-from loguru import logger
-from pydantic import Field, computed_field
-
-from guidellm.core.distribution import Distribution
-from guidellm.core.request import TextGenerationRequest
-from guidellm.core.serializable import Serializable
-
-__all__ = [
-    "RequestConcurrencyMeasurement",
-    "TextGenerationBenchmark",
-    "TextGenerationBenchmarkReport",
-    "TextGenerationError",
-    "TextGenerationResult",
-]
-
-
-DEFAULT_PERCENTILES = [1, 5, 10, 50, 90, 95, 99]
-
-
-class TextGenerationResult(Serializable):
-    """
-    A class to represent the result of a text generation request
-    for generative AI workloads.
-    """
-
-    request: TextGenerationRequest = Field(
-        description="The text generation request used to generate the result.",
-    )
-    prompt_token_count: Optional[int] = Field(
-        default=None,
-        description="The number of tokens in the input prompt.",
-    )
-    output: str = Field(
-        default_factory=str,
-        description="The generated output for the text generation.",
-    )
-    output_token_count: Optional[int] = Field(
-        default=None,
-        description="The number of tokens in the output.",
-    )
-    start_time: Optional[float] = Field(
-        default=None,
-        description="The absolute start time, in seconds, of the text generation.",
-    )
-    end_time: Optional[float] = Field(
-        default=None,
-        description="The absolute end time, in seconds, of the text generation.",
-    )
-    first_token_time: Optional[float] = Field(
-        default=None,
-        description="The absolute time, in seconds, the first token was received.",
-    )
-    last_token_time: Optional[float] = Field(
-        default=None,
-        description="The absolute time, in seconds, the last token was received.",
-    )
-
-    @computed_field  # type: ignore[misc]
-    @property
-    def request_latency(self) -> Optional[float]:
-        """
-        Get the request latency in seconds.
-
-        :return: The request latency in seconds.
-        """
-        if not self.end_time or not self.start_time:
-            return None
-
-        return self.end_time - self.start_time
-
-    @computed_field  # type: ignore[misc]
-    @property
-    def time_to_first_token(self) -> Optional[float]:
-        """
-        Get the time taken to decode the first token in milliseconds.
-
-        :return: The time taken to decode the first token in milliseconds.
-        """
-        if not self.first_token_time or not self.start_time:
-            return None
-
-        return 1000 * (self.first_token_time - self.start_time)
-
-    @computed_field  # type: ignore[misc]
-    @property
-    def inter_token_latency(self) -> Optional[float]:
-        """
-        Get the average time between tokens in milliseconds.
-
-        :return: The average time between tokens.
-        """
-        if (
-            not self.last_token_time
-            or not self.first_token_time
-            or not self.output_token_count
-            or self.output_token_count < 2  # noqa: PLR2004
-        ):
-            return None
-
-        return (
-            1000
-            * (self.last_token_time - self.first_token_time)
-            / (self.output_token_count - 1)  # ignore first token
-        )
-
-    @computed_field  # type: ignore[misc]
-    @property
-    def output_tokens_per_second(self) -> Optional[float]:
-        """
-        Get the average token throughput in tokens per second for the entire request.
-        Note, does not account for the time taken to decode the first token.
-
-        :return: The average token throughput.
-        """
-        itl = self.inter_token_latency
-
-        if itl is None:
-            return None
-
-        return 1000.0 / itl
-
-
-class TextGenerationError(Serializable):
-    """
-    A class to represent an error that occurred during a text generation request
-    for generative AI workloads.
-    """
-
-    request: TextGenerationRequest = Field(
-        description="The text generation request that resulted in an error.",
-    )
-    message: str = Field(
-        description="The error message that occurred during text generation.",
-    )
-
-
-class RequestConcurrencyMeasurement(Serializable):
-    """
-    A dataclass to represent the concurrency measurement of a request.
-    """
-
-    time: float = Field(description="The time of the measurement.")
-    completed: int = Field(description="The number of completed requests.")
-    errored: int = Field(description="The number of errored requests.")
-    processing: int = Field(description="The number of processing requests.")
-
-
-class TextGenerationBenchmark(Serializable):
-    """
-    A class to represent a report of text generation requests
-    (results and errors) for generative AI workloads.
-    This is a set of results and errors for a specific mode and rate.
-    """
-
-    mode: Literal["asynchronous", "synchronous", "throughput"] = Field(
-        description="The generation mode, one of 'async', 'sync', or 'throughput'."
-    )
-    rate: Optional[float] = Field(
-        default=None,
-        description="The requested rate of requests per second.",
-    )
-    results: List[TextGenerationResult] = Field(
-        default_factory=list,
-        description="The results of the text generation requests.",
-    )
-    errors: List[TextGenerationError] = Field(
-        default_factory=list,
-        description="The errors of the text generation requests.",
-    )
-    concurrencies: List[RequestConcurrencyMeasurement] = Field(
-        default_factory=list,
-        description="The concurrency measurements of the requests.",
-    )
-
-    def __iter__(self):
-        """
-        Provide an iterator interface to iterate over the results.
-
-        :return: An iterator over the results.
-        """
-        return iter(self.results)
-
-    @computed_field  # type: ignore[misc]
-    @property
-    def request_count(self) -> int:
-        """
-        Get the number of requests in the result.
-
-        :return: The number of requests.
-        """
-        return len(self.results)
-
-    @computed_field  # type: ignore[misc]
-    @property
-    def error_count(self) -> int:
-        """
-        Get the number of errors in the result.
-
-        :return: The number of errors.
-        """
-        return len(self.errors)
-
-    @computed_field  # type: ignore[misc]
-    @property
-    def total_count(self) -> int:
-        """
-        Get the total number of requests in the result.
-
-        :return: The total number of requests.
-        """
-        return self.request_count + self.error_count
-
-    @computed_field  # type: ignore[misc]
-    @property
-    def start_time(self) -> Optional[float]:
-        """
-        Get the start time of the first request in the result.
-
-        :return: The start time of the first request.
-        """
-        return self.results[0].start_time if self.results else None
-
-    @computed_field  # type: ignore[misc]
-    @property
-    def end_time(self) -> Optional[float]:
-        """
-        Get the end time of the last request in the result.
-
-        :return: The end time of the last request.
-        """
-        return self.results[-1].end_time if self.results else None
-
-    @computed_field  # type: ignore[misc]
-    @property
-    def duration(self) -> float:
-        """
-        Get the duration of the result in seconds.
-
-        :return: The duration of the result.
-        """
-        return (
-            self.end_time - self.start_time
-            if self.end_time and self.start_time
-            else 0.0
-        )
-
-    @computed_field  # type: ignore[misc]
-    @property
-    def completed_request_rate(self) -> float:
-        """
-        Get the rate of requests per second in the result.
-
-        :return: The rate of requests per second.
-        """
-        return self.request_count / self.duration if self.duration else 0.0
-
-    @property
-    def request_latency_distribution(self) -> Distribution:
-        """
-        Get the distribution of request latencies in seconds.
-
-        :return: The distribution of request latencies.
-        """
-        return Distribution(
-            data=[
-                result.request_latency
-                for result in self.results
-                if result.request_latency
-            ]
-        )
-
-    @computed_field  # type: ignore[misc]
-    @property
-    def request_latency(self) -> float:
-        """
-        Get the average request latency in seconds.
-
-        :return: The average request latency in seconds.
-        :rtype: float
-        """
-        return self.request_latency_distribution.mean
-
-    @computed_field  # type: ignore[misc]
-    @property
-    def request_latency_percentiles(self) -> Dict[str, float]:
-        """
-        Get standard percentiles of request latency in seconds.
-
-        :return: A dictionary mapping percentile to request latency in seconds.
-        """
-        if not self.results:
-            return {}
-
-        values = self.request_latency_distribution.percentiles(DEFAULT_PERCENTILES)
-
-        return dict(zip(map(str, DEFAULT_PERCENTILES), values))
-
-    @property
-    def ttft_distribution(self) -> Distribution:
-        """
-        Get the distribution of time taken to decode the first token.
-
-        :return: The distribution of time taken to decode the first token.
-        """
-        return Distribution(
-            data=[
-                result.time_to_first_token
-                for result in self.results
-                if result.time_to_first_token
-            ]
-        )
-
-    @computed_field  # type: ignore[misc]
-    @property
-    def time_to_first_token(self) -> float:
-        """
-        Get the time taken to decode the first token in milliseconds.
-
-        :return: The time taken to decode the first token in milliseconds.
-        """
-        return self.ttft_distribution.mean
-
-    @computed_field  # type: ignore[misc]
-    @property
-    def time_to_first_token_percentiles(self) -> Dict[str, float]:
-        """
-        Get standard percentiles for time taken to decode the first token
-        in milliseconds.
-
-        :return: A dictionary mapping percentile to time taken for the first token.
-        """
-        if not self.results:
-            return {}
-
-        values = self.ttft_distribution.percentiles(DEFAULT_PERCENTILES)
-
-        return dict(zip(map(str, DEFAULT_PERCENTILES), values))
-
-    @property
-    def itl_distribution(self) -> Distribution:
-        """
-        Get the distribution of time between tokens in milliseconds.
-
-        :return: The distribution of time between tokens.
-        """
-        return Distribution(
-            data=[
-                result.inter_token_latency
-                for result in self.results
-                for _ in range(
-                    result.output_token_count - 1
-                    if result.output_token_count and result.output_token_count > 1
-                    else 0
-                )
-                if (result.inter_token_latency)
-            ]
-        )
-
-    @computed_field  # type: ignore[misc]
-    @property
-    def inter_token_latency(self) -> float:
-        """
-        Get the average time between tokens in milliseconds.
-
-        :return: The average time between tokens.
-        """
-        return self.itl_distribution.mean
-
-    @computed_field  # type: ignore[misc]
-    @property
-    def inter_token_latency_percentiles(self) -> Dict[str, float]:
-        """
-        Get standard percentiles for the time between tokens in milliseconds.
-
-        :return: A dictionary mapping percentile to time between tokens.
-        """
-        if not self.results:
-            return {}
-
-        values = self.itl_distribution.percentiles(DEFAULT_PERCENTILES)
-
-        return dict(zip(map(str, DEFAULT_PERCENTILES), values))
-
-    @computed_field  # type: ignore[misc]
-    @property
-    def output_token_throughput(self) -> float:
-        """
-        Get the average token throughput in tokens per second.
-
-        :return: The average token throughput.
-        """
-        output_tokens = sum(
-            result.output_token_count
-            for result in self.results
-            if result.output_token_count and result.output_token_count > 0
-        )
-
-        return output_tokens / self.duration if self.duration else 0.0
-
-    @property
-    def prompt_token_distribution(self) -> Distribution:
-        """
-        Get the distribution of prompt token counts.
-
-        :return: The distribution of prompt token counts.
-        """
-        return Distribution(
-            data=[
-                result.prompt_token_count
-                for result in self.results
-                if result.prompt_token_count
-            ]
-        )
-
-    @computed_field  # type: ignore[misc]
-    @property
-    def prompt_token(self) -> float:
-        """
-        Get the average number of prompt tokens.
-
-        :return: The average number of prompt tokens.
-        """
-        return self.prompt_token_distribution.mean
-
-    @computed_field  # type: ignore[misc]
-    @property
-    def prompt_token_percentiles(self) -> Dict[str, float]:
-        """
-        Get standard percentiles for number of prompt tokens.
-
-        :return: A dictionary mapping percentile to number of prompt tokens.
-        """
-        if not self.results:
-            return {}
-
-        values = self.prompt_token_distribution.percentiles(DEFAULT_PERCENTILES)
-
-        return dict(zip(map(str, DEFAULT_PERCENTILES), values))
-
-    @property
-    def output_token_distribution(self) -> Distribution:
-        """
-        Get the distribution of output token counts.
-
-        :return: The distribution of output token counts.
-        """
-        return Distribution(
-            data=[
-                result.output_token_count
-                for result in self.results
-                if result.output_token_count
-            ]
-        )
-
-    @computed_field  # type: ignore[misc]
-    @property
-    def output_token(self) -> float:
-        """
-        Get the average number of output tokens.
-
-        :return: The average number of output tokens.
-        """
-        return self.output_token_distribution.mean
-
-    @computed_field  # type: ignore[misc]
-    @property
-    def output_token_percentiles(self) -> Dict[str, float]:
-        """
-        Get standard percentiles for number of output tokens.
-
-        :return: List of percentiles of number of output tokens.
-        """
-        if not self.results:
-            return {}
-
-        values = self.output_token_distribution.percentiles(DEFAULT_PERCENTILES)
-
-        return dict(zip(map(str, DEFAULT_PERCENTILES), values))
-
-    def request_started(self):
-        """
-        Record the start of a generation request.
-        """
-        if not self.concurrencies:
-            self.concurrencies = [
-                RequestConcurrencyMeasurement(
-                    time=time(),
-                    completed=0,
-                    errored=0,
-                    processing=1,
-                ),
-            ]
-        else:
-            last = self.concurrencies[-1]
-            self.concurrencies.append(
-                RequestConcurrencyMeasurement(
-                    time=time(),
-                    completed=last.completed,
-                    errored=last.errored,
-                    processing=last.processing + 1,
-                ),
-            )
-
-        logger.info("Text generation request started")
-
-    def request_completed(
-        self,
-        result: Union[TextGenerationResult, TextGenerationError],
-    ):
-        """
-        Record the completion of a text generation request.
-
-        :param result: The completed result or error.
-        :type result: Union[TextGenerationResult, TextGenerationError]
-        """
-        if not self.concurrencies:
-            raise ValueError("Request completed without starting")
-
-        if isinstance(result, TextGenerationError):
-            is_error = True
-            self.errors.append(result)
-            logger.info(
-                "Text generation request resulted in error: {}",
-                result.message,
-            )
-        else:
-            if not result.start_time or not result.end_time:
-                raise ValueError("Start time and End time are not defined")
-
-            is_error = False
-            self.results.append(result)
-            logger.info("Text generation request completed successfully: {}", result)
-
-        last = self.concurrencies[-1]
-        self.concurrencies.append(
-            RequestConcurrencyMeasurement(
-                time=time(),
-                completed=last.completed + (not is_error),
-                errored=last.errored + is_error,
-                processing=last.processing - 1,
-            )
-        )
-
-
-class TextGenerationBenchmarkReport(Serializable):
-    """
-    A class to represent a report of text generation benchmarks
-    for generative AI workloads.
-    This is a collection of benchmarks for different modes and rates.
-    """
-
-    benchmarks: List[TextGenerationBenchmark] = Field(
-        default_factory=list,
-        description="The benchmarks of text generation requests.",
-    )
-    args: Dict[str, Any] = Field(
-        default_factory=dict,
-        description="The arguments used for the benchmarks.",
-    )
-
-    def __iter__(self):
-        return iter(self.benchmarks)
-
-    @property
-    def benchmarks_sorted(self) -> List[TextGenerationBenchmark]:
-        """
-        Get the list of benchmarks sorted by request rate.
-
-        :return: The sorted list of benchmarks.
-        :rtype: List[TextGenerationBenchmark]
-        """
-        return sorted(self.benchmarks, key=lambda x: x.completed_request_rate)
-
-    def add_benchmark(self, benchmark: TextGenerationBenchmark):
-        """
-        Add a result to the report.
-
-        :param benchmark: The result to add.
-        :type benchmark: TextGenerationBenchmark
-        """
-        self.benchmarks.append(benchmark)
-        logger.debug("Added result: {}", benchmark)
diff --git a/src/guidellm/core/serializable.py b/src/guidellm/core/serializable.py
deleted file mode 100644
index 23e6845a..00000000
--- a/src/guidellm/core/serializable.py
+++ /dev/null
@@ -1,169 +0,0 @@
-from pathlib import Path
-from typing import Any, Literal, Union, get_args
-
-import yaml
-from loguru import logger
-from pydantic import BaseModel, ConfigDict
-
-__all__ = ["Serializable", "SerializableFileType"]
-
-
-SerializableFileType = Literal["yaml", "json"]
-
-
-class Serializable(BaseModel):
-    """
-    A base class for models that require YAML and JSON serialization and
-    deserialization.
-    """
-
-    model_config = ConfigDict(
-        extra="ignore",
-        use_enum_values=True,
-        validate_assignment=True,
-        from_attributes=True,
-    )
-
-    def __init__(self, /, **data: Any) -> None:
-        super().__init__(**data)
-        logger.debug(
-            "Initialized new instance of {} with data: {}",
-            self.__class__.__name__,
-            data,
-        )
-
-    def to_yaml(self) -> str:
-        """
-        Serialize the model to a YAML string.
-
-        :return: YAML string representation of the model.
-        """
-        logger.debug("Serializing to YAML... {}", self)
-
-        return yaml.dump(self.model_dump())
-
-    @classmethod
-    def from_yaml(cls, data: str):
-        """
-        Deserialize a YAML string to a model instance.
-
-        :param data: YAML string to deserialize.
-        :return: An instance of the model.
-        """
-        logger.debug("Deserializing from YAML... {}", data)
-
-        return cls.model_validate(yaml.safe_load(data))
-
-    def to_json(self) -> str:
-        """
-        Serialize the model to a JSON string.
-
-        :return: JSON string representation of the model.
-        """
-        logger.debug("Serializing to JSON... {}", self)
-
-        return self.model_dump_json()
-
-    @classmethod
-    def from_json(cls, data: str):
-        """
-        Deserialize a JSON string to a model instance.
-
-        :param data: JSON string to deserialize.
-        :return: An instance of the model.
-        """
-        logger.debug("Deserializing from JSON... {}", data)
-
-        return cls.model_validate_json(data)
-
-    def save_file(
-        self,
-        path: Union[str, Path],
-        type_: SerializableFileType = "yaml",
-    ) -> str:
-        """
-        Save the model to a file in either YAML or JSON format.
-
-        :param path: Path to the exact file or the containing directory.
-            If it is a directory, the file name will be inferred from the class name.
-        :param type_: Optional type to save ('yaml' or 'json').
-            If not provided and the path has an extension,
-            it will be inferred to save in that format.
-            If not provided and the path does not have an extension,
-            it will save in YAML format.
-        :return: The path to the saved file.
-        """
-        logger.debug("Saving to file... {} with format: {}", path, type_)
-
-        if isinstance(path, str):
-            path = Path(path)
-
-        if path.suffix:
-            # is a file
-            ext = path.suffix[1:].lower()
-            if type_ not in get_args(SerializableFileType):
-                raise ValueError(
-                    f"Unsupported file extension: {type_}. "
-                    f"Expected one of {SerializableFileType} "
-                    f"for {path}"
-                )
-            type_ = ext  # type: ignore # noqa: PGH003
-        else:
-            # is a directory
-            file_name = f"{self.__class__.__name__.lower()}.{type_}"
-            path = path / file_name
-
-        path.parent.mkdir(parents=True, exist_ok=True)
-
-        with path.open("w") as file:
-            if type_ == "yaml":
-                file.write(self.to_yaml())
-            elif type_ == "json":
-                file.write(self.to_json())
-            else:
-                raise ValueError(
-                    f"Unsupported file extension: {type_}"
-                    f"Expected one of {SerializableFileType} "
-                    f"for {path}"
-                )
-
-        logger.info("Successfully saved {} to {}", self.__class__.__name__, path)
-
-        return str(path)
-
-    @classmethod
-    def load_file(cls, path: Union[str, Path]):
-        """
-        Load a model from a file in either YAML or JSON format.
-
-        :param path: Path to the file.
-        :return: An instance of the model.
-        """
-        logger.debug("Loading from file... {}", path)
-
-        if isinstance(path, str):
-            path = Path(path)
-
-        if not path.exists():
-            raise FileNotFoundError(f"File not found: {path}")
-
-        if not path.is_file():
-            raise ValueError(f"Path is not a file: {path}")
-
-        extension = path.suffix[1:].lower()
-
-        with path.open() as file:
-            data = file.read()
-
-            if extension == "yaml":
-                obj = cls.from_yaml(data)
-            elif extension == "json":
-                obj = cls.from_json(data)
-            else:
-                raise ValueError(
-                    f"Unsupported file extension: {extension}"
-                    f"Expected one of {SerializableFileType} "
-                    f"for {path}"
-                )
-
-        return obj
diff --git a/src/guidellm/data/__init__.py b/src/guidellm/data/__init__.py
new file mode 100644
index 00000000..8a48204e
--- /dev/null
+++ b/src/guidellm/data/__init__.py
@@ -0,0 +1,4 @@
+"""
+Required for python < 3.12
+https://docs.python.org/3/library/importlib.resources.html#importlib.resources.files
+"""
diff --git a/src/guidellm/data/prideandprejudice.txt.gz b/src/guidellm/data/prideandprejudice.txt.gz
new file mode 100644
index 00000000..8c7a1072
Binary files /dev/null and b/src/guidellm/data/prideandprejudice.txt.gz differ
diff --git a/src/guidellm/dataset/__init__.py b/src/guidellm/dataset/__init__.py
new file mode 100644
index 00000000..20d68e64
--- /dev/null
+++ b/src/guidellm/dataset/__init__.py
@@ -0,0 +1,22 @@
+from .creator import ColumnInputTypes, DatasetCreator
+from .entrypoints import load_dataset
+from .file import FileDatasetCreator
+from .hf_datasets import HFDatasetsCreator
+from .in_memory import InMemoryDatasetCreator
+from .synthetic import (
+    SyntheticDatasetConfig,
+    SyntheticDatasetCreator,
+    SyntheticTextItemsGenerator,
+)
+
+__all__ = [
+    "DatasetCreator",
+    "ColumnInputTypes",
+    "HFDatasetsCreator",
+    "load_dataset",
+    "FileDatasetCreator",
+    "InMemoryDatasetCreator",
+    "SyntheticDatasetCreator",
+    "SyntheticDatasetConfig",
+    "SyntheticTextItemsGenerator",
+]
diff --git a/src/guidellm/dataset/creator.py b/src/guidellm/dataset/creator.py
new file mode 100644
index 00000000..42103a46
--- /dev/null
+++ b/src/guidellm/dataset/creator.py
@@ -0,0 +1,213 @@
+from abc import ABC, abstractmethod
+from pathlib import Path
+from typing import Any, Dict, List, Literal, Optional, Tuple, Union
+
+from datasets import Dataset, DatasetDict, IterableDataset, IterableDatasetDict
+from transformers import PreTrainedTokenizerBase  # type: ignore[import]
+
+__all__ = ["DatasetCreator", "ColumnInputTypes"]
+
+ColumnInputTypes = Literal[
+    "prompt_column",
+    "text_column",
+    "prompt_tokens_count_column",
+    "output_tokens_count_column",
+]
+
+
+class DatasetCreator(ABC):
+    DEFAULT_SPLITS_TRAIN = [
+        "train",
+        "training",
+        "train_set",
+        "training_set",
+        "train_dataset",
+        "training_dataset",
+        "train_data",
+        "training_data",
+        "pretrain",
+        "pretrain_set",
+        "pretrain_dataset",
+        "pretrain_data",
+        "pretraining",
+    ]
+    DEFAULT_SPLITS_CALIB = [
+        "calibration",
+        "calib",
+        "cal",
+        "calibration_set",
+        "calib_set",
+        "cal_set",
+        "calibration_dataset",
+        "calib_dataset",
+        "cal_set",
+        "calibration_data",
+        "calib_data",
+        "cal_data",
+    ]
+    DEFAULT_SPLITS_VAL = [
+        "validation",
+        "val",
+        "valid",
+        "validation_set",
+        "val_set",
+        "validation_dataset",
+        "val_dataset",
+        "validation_data",
+        "val_data",
+        "dev",
+        "dev_set",
+        "dev_dataset",
+        "dev_data",
+    ]
+    DEFAULT_SPLITS_TEST = [
+        "test",
+        "testing",
+        "test_set",
+        "testing_set",
+        "test_dataset",
+        "testing_dataset",
+        "test_data",
+        "testing_data",
+        "eval",
+        "eval_set",
+        "eval_dataset",
+        "eval_data",
+    ]
+    DEFAULT_SPLITS_DATASET: Dict[str, str] = {}
+
+    @classmethod
+    def create(
+        cls,
+        data: Any,
+        data_args: Optional[Dict[str, Any]],
+        processor: Optional[Union[str, Path, PreTrainedTokenizerBase]],
+        processor_args: Optional[Dict[str, Any]],
+        random_seed: int = 42,
+        split_pref_order: Optional[List[str]] = None,
+    ) -> Tuple[Union[Dataset, IterableDataset], Dict[ColumnInputTypes, str]]:
+        if not cls.is_supported(data, data_args):
+            raise ValueError(f"Unsupported data type: {type(data)} given for {data}. ")
+
+        split = cls.extract_args_split(data_args)
+        column_mappings = cls.extract_args_column_mappings(data_args)
+        dataset = cls.handle_create(
+            data, data_args, processor, processor_args, random_seed
+        )
+
+        if isinstance(dataset, (DatasetDict, IterableDatasetDict)):
+            dataset = cls.extract_dataset_split(dataset, split, split_pref_order)
+
+        if not isinstance(dataset, (Dataset, IterableDataset)):
+            raise ValueError(
+                f"Unsupported data type: {type(dataset)} given for {dataset}."
+            )
+
+        return dataset, column_mappings
+
+    @classmethod
+    def extract_args_split(cls, data_args: Optional[Dict[str, Any]]) -> str:
+        split = "auto"
+
+        if data_args and "split" in data_args:
+            split = data_args["split"]
+            del data_args["split"]
+
+        return split
+
+    @classmethod
+    def extract_args_column_mappings(
+        cls,
+        data_args: Optional[Dict[str, Any]],
+    ) -> Dict[ColumnInputTypes, str]:
+        columns: Dict[ColumnInputTypes, str] = {}
+
+        if data_args:
+            if "prompt_column" in data_args:
+                columns["prompt_column"] = data_args["prompt_column"]
+                del data_args["prompt_column"]
+
+            if "prompt_tokens_count_column" in data_args:
+                columns["prompt_tokens_count_column"] = data_args[
+                    "prompt_tokens_count_column"
+                ]
+                del data_args["prompt_tokens_count_column"]
+
+            if "output_tokens_count_column" in data_args:
+                columns["output_tokens_count_column"] = data_args[
+                    "output_tokens_count_column"
+                ]
+                del data_args["output_tokens_count_column"]
+
+        return columns
+
+    @classmethod
+    def extract_dataset_name(
+        cls, dataset: Union[Dataset, IterableDataset, DatasetDict, IterableDatasetDict]
+    ) -> Optional[str]:
+        if isinstance(dataset, (DatasetDict, IterableDatasetDict)):
+            dataset = dataset[list(dataset.keys())[0]]
+
+        if isinstance(dataset, (Dataset, IterableDataset)):
+            if not hasattr(dataset, "info") or not hasattr(
+                dataset.info, "dataset_name"
+            ):
+                return None
+
+            return dataset.info.dataset_name
+
+        raise ValueError(f"Unsupported data type: {type(dataset)} given for {dataset}.")
+
+    @classmethod
+    def extract_dataset_split(
+        cls,
+        dataset: Union[DatasetDict, IterableDatasetDict],
+        specified_split: Union[Literal["auto"], str] = "auto",
+        split_pref_order: Optional[Union[Literal["auto"], List[str]]] = "auto",
+    ) -> Union[Dataset, IterableDataset]:
+        if not isinstance(dataset, (DatasetDict, IterableDatasetDict)):
+            raise ValueError(
+                f"Unsupported data type: {type(dataset)} given for {dataset}."
+            )
+
+        if specified_split != "auto":
+            if specified_split not in dataset:
+                raise ValueError(
+                    f"Split {specified_split} not found in dataset {dataset}."
+                )
+
+            return dataset[specified_split]
+
+        dataset_name = cls.extract_dataset_name(dataset)
+
+        if dataset_name and dataset_name in cls.DEFAULT_SPLITS_DATASET:
+            return dataset[cls.DEFAULT_SPLITS_DATASET[dataset_name]]
+
+        if split_pref_order == "auto":
+            split_pref_order = [
+                *cls.DEFAULT_SPLITS_TEST,
+                *cls.DEFAULT_SPLITS_VAL,
+                *cls.DEFAULT_SPLITS_CALIB,
+                *cls.DEFAULT_SPLITS_TRAIN,
+            ]
+
+        for test_split in split_pref_order or []:
+            if test_split in dataset:
+                return dataset[test_split]
+
+        return dataset[list(dataset.keys())[0]]
+
+    @classmethod
+    @abstractmethod
+    def is_supported(cls, data: Any, data_args: Optional[Dict[str, Any]]) -> bool: ...
+
+    @classmethod
+    @abstractmethod
+    def handle_create(
+        cls,
+        data: Any,
+        data_args: Optional[Dict[str, Any]],
+        processor: Optional[Union[str, Path, PreTrainedTokenizerBase]],
+        processor_args: Optional[Dict[str, Any]],
+        random_seed: int,
+    ) -> Union[Dataset, DatasetDict, IterableDataset, IterableDatasetDict]: ...
diff --git a/src/guidellm/dataset/entrypoints.py b/src/guidellm/dataset/entrypoints.py
new file mode 100644
index 00000000..5abf0112
--- /dev/null
+++ b/src/guidellm/dataset/entrypoints.py
@@ -0,0 +1,42 @@
+from pathlib import Path
+from typing import Any, Dict, List, Optional, Tuple, Union
+
+from datasets import Dataset, IterableDataset
+from transformers import PreTrainedTokenizerBase  # type: ignore[import]
+
+from guidellm.dataset.creator import ColumnInputTypes
+from guidellm.dataset.file import FileDatasetCreator
+from guidellm.dataset.hf_datasets import HFDatasetsCreator
+from guidellm.dataset.in_memory import InMemoryDatasetCreator
+from guidellm.dataset.synthetic import SyntheticDatasetCreator
+
+__all__ = ["load_dataset"]
+
+
+def load_dataset(
+    data: Any,
+    data_args: Optional[Dict[str, Any]],
+    processor: Optional[Union[str, Path, PreTrainedTokenizerBase]],
+    processor_args: Optional[Dict[str, Any]],
+    random_seed: int = 42,
+    split_pref_order: Optional[List[str]] = None,
+) -> Tuple[Union[Dataset, IterableDataset], Dict[ColumnInputTypes, str]]:
+    creators = [
+        InMemoryDatasetCreator,
+        SyntheticDatasetCreator,
+        FileDatasetCreator,
+        HFDatasetsCreator,
+    ]
+
+    for creator in creators:
+        if creator.is_supported(data, data_args):
+            return creator.create(
+                data,
+                data_args,
+                processor,
+                processor_args,
+                random_seed,
+                split_pref_order,
+            )
+
+    raise ValueError(f"Unsupported data type: {type(data)} given for {data}. ")
diff --git a/src/guidellm/dataset/file.py b/src/guidellm/dataset/file.py
new file mode 100644
index 00000000..9f9cf696
--- /dev/null
+++ b/src/guidellm/dataset/file.py
@@ -0,0 +1,90 @@
+from pathlib import Path
+from typing import Any, Dict, Optional, Union
+
+import pandas as pd  # type: ignore[import]
+from datasets import (
+    Dataset,
+    DatasetDict,
+    IterableDataset,
+    IterableDatasetDict,
+    load_dataset,
+)
+from transformers import PreTrainedTokenizerBase  # type: ignore[import]
+
+from guidellm.dataset.creator import DatasetCreator
+
+__all__ = ["FileDatasetCreator"]
+
+
+class FileDatasetCreator(DatasetCreator):
+    SUPPORTED_TYPES = {
+        ".txt",
+        ".text",
+        ".csv",
+        ".json",
+        ".jsonl",
+        ".parquet",
+        ".arrow",
+        ".hdf5",
+        ".tar",
+    }
+
+    @classmethod
+    def is_supported(cls, data: Any, data_args: Optional[Dict[str, Any]]) -> bool:  # noqa: ARG003
+        if isinstance(data, (str, Path)) and (path := Path(data)).exists():
+            # local folder or py file, assume supported
+            return path.suffix.lower() in cls.SUPPORTED_TYPES
+
+        return False
+
+    @classmethod
+    def handle_create(
+        cls,
+        data: Any,
+        data_args: Optional[Dict[str, Any]],
+        processor: Optional[Union[str, Path, PreTrainedTokenizerBase]],  # noqa: ARG003
+        processor_args: Optional[Dict[str, Any]],  # noqa: ARG003
+        random_seed: int,  # noqa: ARG003
+    ) -> Union[Dataset, DatasetDict, IterableDataset, IterableDatasetDict]:
+        if not isinstance(data, (str, Path)):
+            raise ValueError(f"Unsupported data type: {type(data)} given for {data}. ")
+
+        path = Path(data)
+        if not path.exists():
+            raise FileNotFoundError(f"File not found: {path}")
+
+        if not path.is_file():
+            raise ValueError(f"Unsupported data type: {path} given for {path}. ")
+
+        if path.suffix.lower() not in cls.SUPPORTED_TYPES:
+            raise ValueError(f"Unsupported file type: {path.suffix} given for {path}. ")
+
+        return cls.load_dataset(path, data_args)
+
+    @classmethod
+    def load_dataset(
+        cls, path: Path, data_args: Optional[Dict[str, Any]]
+    ) -> Union[Dataset, IterableDataset]:
+        if path.suffix.lower() in {".txt", ".text"}:
+            with path.open("r") as file:
+                items = file.readlines()
+
+            dataset = Dataset.from_dict({"text": items}, **(data_args or {}))
+        elif path.suffix.lower() == ".csv":
+            dataset = load_dataset("csv", data_files=path, **(data_args or {}))
+        elif path.suffix.lower() in {".json", ".jsonl"}:
+            dataset = load_dataset("json", data_files=path, **(data_args or {}))
+        elif path.suffix.lower() == ".parquet":
+            dataset = load_dataset("parquet", data_files=path, **(data_args or {}))
+        elif path.suffix.lower() == ".arrow":
+            dataset = load_dataset("arrow", data_files=path, **(data_args or {}))
+        elif path.suffix.lower() == ".hdf5":
+            dataset = Dataset.from_pandas(pd.read_hdf(path), **(data_args or {}))
+        elif path.suffix.lower() == ".db":
+            dataset = Dataset.from_sql(con=path, **(data_args or {}))
+        elif path.suffix.lower() == ".tar":
+            dataset = load_dataset("webdataset", data_files=path, **(data_args or {}))
+        else:
+            raise ValueError(f"Unsupported file type: {path.suffix} given for {path}. ")
+
+        return dataset
diff --git a/src/guidellm/dataset/hf_datasets.py b/src/guidellm/dataset/hf_datasets.py
new file mode 100644
index 00000000..e0102538
--- /dev/null
+++ b/src/guidellm/dataset/hf_datasets.py
@@ -0,0 +1,62 @@
+from pathlib import Path
+from typing import Any, Dict, Optional, Union
+
+from datasets import (
+    Dataset,
+    DatasetDict,
+    IterableDataset,
+    IterableDatasetDict,
+    get_dataset_config_info,
+    load_dataset,
+)
+from transformers import PreTrainedTokenizerBase  # type: ignore[import]
+
+from guidellm.dataset.creator import DatasetCreator
+
+__all__ = ["HFDatasetsCreator"]
+
+
+class HFDatasetsCreator(DatasetCreator):
+    @classmethod
+    def is_supported(cls, data: Any, data_args: Optional[Dict[str, Any]]) -> bool:  # noqa: ARG003
+        if isinstance(
+            data, (Dataset, DatasetDict, IterableDataset, IterableDatasetDict)
+        ):
+            # base type is supported
+            return True
+
+        if isinstance(data, (str, Path)) and (path := Path(data)).exists():
+            # local folder or py file, assume supported
+            return path.is_dir() or path.suffix == ".py"
+
+        if isinstance(data, (str, Path)):
+            try:
+                # try to load dataset
+                return get_dataset_config_info(data) is not None
+            except Exception:  # noqa: BLE001, S110
+                pass
+
+        return False
+
+    @classmethod
+    def handle_create(
+        cls,
+        data: Any,
+        data_args: Optional[Dict[str, Any]],
+        processor: Optional[Union[str, Path, PreTrainedTokenizerBase]],  # noqa: ARG003
+        processor_args: Optional[Dict[str, Any]],  # noqa: ARG003
+        random_seed: int,  # noqa: ARG003
+    ) -> Union[Dataset, DatasetDict, IterableDataset, IterableDatasetDict]:
+        if isinstance(data, (str, Path)):
+            data = load_dataset(data, **(data_args or {}))
+        elif data_args:
+            raise ValueError(
+                f"data_args should not be provided when data is a {type(data)}"
+            )
+
+        if isinstance(
+            data, (Dataset, DatasetDict, IterableDataset, IterableDatasetDict)
+        ):
+            return data
+
+        raise ValueError(f"Unsupported data type: {type(data)} given for {data}. ")
diff --git a/src/guidellm/dataset/in_memory.py b/src/guidellm/dataset/in_memory.py
new file mode 100644
index 00000000..dc173d2f
--- /dev/null
+++ b/src/guidellm/dataset/in_memory.py
@@ -0,0 +1,131 @@
+from pathlib import Path
+from typing import Any, Dict, Iterable, Optional, Union
+
+from datasets import (
+    Dataset,
+    DatasetDict,
+    IterableDataset,
+    IterableDatasetDict,
+)
+from transformers import PreTrainedTokenizerBase  # type: ignore[import]
+
+from guidellm.dataset.creator import DatasetCreator
+
+__all__ = ["InMemoryDatasetCreator"]
+
+
+class InMemoryDatasetCreator(DatasetCreator):
+    @classmethod
+    def is_supported(cls, data: Any, data_args: Optional[Dict[str, Any]]) -> bool:  # noqa: ARG003
+        return isinstance(data, Iterable) and not isinstance(data, str)
+
+    @classmethod
+    def handle_create(
+        cls,
+        data: Any,
+        data_args: Optional[Dict[str, Any]],
+        processor: Optional[Union[str, Path, PreTrainedTokenizerBase]],  # noqa: ARG003
+        processor_args: Optional[Dict[str, Any]],  # noqa: ARG003
+        random_seed: int,  # noqa: ARG003
+    ) -> Union[Dataset, DatasetDict, IterableDataset, IterableDatasetDict]:
+        if not isinstance(data, Iterable):
+            raise TypeError(
+                f"Unsupported data format. Expected Iterable[Any], got {type(data)}"
+            )
+
+        if not data:
+            raise ValueError("Data is empty")
+
+        if isinstance(data, Dict):
+            # assume data is a dictionary of columns and values: {"c1": ["i1", "i2"]}
+            data_dict = cls.format_data_dict(data)
+        elif isinstance(data[0], Dict):  # type: ignore[index]
+            # assume data is a list of dictionaries: [{"c1": "i1"}, {"c1": "i2"}]
+            data_dict = cls.format_data_iterable_dicts(data)
+        else:
+            # assume data is a list of items with no columns: ["i1", "i2"]
+            data_dict = cls.format_data_iterable_values(data)
+
+        return Dataset.from_dict(data_dict, **(data_args or {}))
+
+    @classmethod
+    def format_data_dict(cls, data: Dict[Any, Any]) -> Dict[str, Any]:
+        if not isinstance(data, Dict):
+            raise TypeError(
+                f"Unsupported data format. Expected Dict[str, Iterable[Any]], "
+                f"got {type(data)}"
+            )
+
+        if not all(
+            isinstance(key, str) and isinstance(val, Iterable)
+            for key, val in data.items()
+        ):
+            raise TypeError(
+                "Unsupported data format. Expected Dict[str, Iterable[Any]], "
+                f"got {type(data)}"
+            )
+
+        samples = len(list(data.values())[0])
+        if not all(len(val) == samples for val in data.values()):
+            raise ValueError(
+                "Unsupported data format. Not all columns have the same number samples "
+                f"for {data}"
+            )
+
+        return data
+
+    @classmethod
+    def format_data_iterable_dicts(
+        cls, data: Iterable[Dict[Any, Any]]
+    ) -> Dict[str, Any]:
+        if not isinstance(data, Iterable):
+            raise TypeError(
+                f"Unsupported data format. Expected Iterable[Dict[str, Any]], "
+                f"got {type(data)}"
+            )
+
+        if not all(isinstance(item, Dict) for item in data):
+            raise TypeError(
+                f"Unsupported data format. Expected Iterable[Dict[str, Any]], "
+                f"got {type(data)}"
+            )
+
+        if not all(isinstance(key, str) for key in data[0]):  # type: ignore[index]
+            raise TypeError(
+                "Unsupported data format. Expected Dict[str, Any], "
+                f"but one of the items had a non string column for {data}"
+            )
+
+        columns = list(data[0].keys())  # type: ignore[index]
+        if not all(
+            len(item) == len(columns) and all(key in item for key in columns)
+            for item in data
+        ):
+            raise ValueError(
+                "Unsupported data format. Not all items have the same columns "
+                f"for {data}"
+            )
+
+        data_dict: Dict[str, Any] = {key: [] for key in columns}
+        for item in data:
+            for key, value in item.items():
+                data_dict[key].append(value)
+
+        return data_dict
+
+    @classmethod
+    def format_data_iterable_values(cls, data: Iterable[Any]) -> Dict[str, Any]:
+        if not isinstance(data, Iterable):
+            raise TypeError(
+                f"Unsupported data format. Expected Iterable[Iterable[Any]], "
+                f"got {type(data)}"
+            )
+
+        first_item = next(iter(data), None)
+        first_type = type(first_item)
+        if not all(isinstance(item, first_type) for item in data):
+            raise TypeError(
+                f"Unsupported data format. Not all types are the same for {data}"
+            )
+
+        return {"data": list(data)}
diff --git a/src/guidellm/dataset/synthetic.py b/src/guidellm/dataset/synthetic.py
new file mode 100644
index 00000000..f2bf69d3
--- /dev/null
+++ b/src/guidellm/dataset/synthetic.py
@@ -0,0 +1,261 @@
+import json
+import random
+from pathlib import Path
+from typing import Any, Dict, Iterable, Iterator, Literal, Optional, Union
+
+import yaml
+from datasets import (
+    Dataset,
+    DatasetDict,
+    IterableDataset,
+    IterableDatasetDict,
+)
+from pydantic import BaseModel, Field
+from transformers import PreTrainedTokenizerBase  # type: ignore[import]
+
+from guidellm.dataset.creator import ColumnInputTypes, DatasetCreator
+from guidellm.utils import EndlessTextCreator, IntegerRangeSampler, check_load_processor
+
+__all__ = [
+    "SyntheticDatasetCreator",
+    "SyntheticDatasetConfig",
+    "SyntheticTextItemsGenerator",
+]
+
+
+class SyntheticDatasetConfig(BaseModel):
+    prompt_tokens: int = Field(
+        description="The average number of text tokens generated for prompts.",
+        gt=0,
+    )
+    prompt_tokens_stdev: Optional[int] = Field(
+        description="The standard deviation of the tokens generated for prompts.",
+        gt=0,
+        default=None,
+    )
+    prompt_tokens_min: Optional[int] = Field(
+        description="The minimum number of text tokens generated for prompts.",
+        gt=0,
+        default=None,
+    )
+    prompt_tokens_max: Optional[int] = Field(
+        description="The maximum number of text tokens generated for prompts.",
+        gt=0,
+        default=None,
+    )
+    output_tokens: int = Field(
+        description="The average number of text tokens generated for outputs.",
+        gt=0,
+    )
+    output_tokens_stdev: Optional[int] = Field(
+        description="The standard deviation of the tokens generated for outputs.",
+        gt=0,
+        default=None,
+    )
+    output_tokens_min: Optional[int] = Field(
+        description="The minimum number of text tokens generated for outputs.",
+        gt=0,
+        default=None,
+    )
+    output_tokens_max: Optional[int] = Field(
+        description="The maximum number of text tokens generated for outputs.",
+        gt=0,
+        default=None,
+    )
+    samples: int = Field(
+        description="The number of samples to generate for the dataset.",
+        gt=0,
+        default=1000,
+    )
+    source: str = Field(
+        description="The source of the text data to be used for generation.",
+        default="data:prideandprejudice.txt.gz",
+    )
+
+    @staticmethod
+    def parse_str(data: Union[str, Path]) -> "SyntheticDatasetConfig":
+        if (
+            isinstance(data, Path)
+            or data.strip().endswith(".config")
+            or data.strip().endswith(".yaml")
+        ):
+            return SyntheticDatasetConfig.parse_config_file(data)
+
+        if data.strip().startswith("{"):
+            return SyntheticDatasetConfig.parse_json(data)
+
+        if data.count("=") > 1:
+            return SyntheticDatasetConfig.parse_key_value_pairs(data)
+
+        raise ValueError(
+            f"Unsupported data format. Expected JSON or key-value pairs, got {data}"
+        )
+
+    @staticmethod
+    def parse_json(data: str) -> "SyntheticDatasetConfig":
+        config_dict = json.loads(data.strip())
+
+        return SyntheticDatasetConfig(**config_dict)
+
+    @staticmethod
+    def parse_key_value_pairs(data: str) -> "SyntheticDatasetConfig":
+        config_dict = {}
+        items = data.strip().split(",")
+        for item in items:
+            key, value = item.split("=")
+            config_dict[key.strip()] = (
+                int(value.strip()) if value.strip().isnumeric() else value.strip()
+            )
+
+        return SyntheticDatasetConfig(**config_dict)  # type: ignore[arg-type]
+
+    @staticmethod
+    def parse_config_file(data: Union[str, Path]) -> "SyntheticDatasetConfig":
+        with Path(data).open("r") as file:
+            config_dict = yaml.safe_load(file)
+
+        return SyntheticDatasetConfig(**config_dict)
+
+
+class SyntheticTextItemsGenerator(
+    Iterable[
+        Dict[
+            Literal["prompt", "prompt_tokens_count", "output_tokens_count"],
+            Union[str, int],
+        ]
+    ]
+):
+    def __init__(
+        self,
+        config: SyntheticDatasetConfig,
+        processor: PreTrainedTokenizerBase,
+        random_seed: int,
+    ):
+        self.config = config
+        self.processor = processor
+        self.random_seed = random_seed
+        self.text_creator = EndlessTextCreator(
+            data=config.source,
+        )
+
+    def __iter__(
+        self,
+    ) -> Iterator[
+        Dict[
+            Literal["prompt", "prompt_tokens_count", "output_tokens_count"],
+            Union[str, int],
+        ]
+    ]:
+        prompt_tokens_sampler = IntegerRangeSampler(
+            average=self.config.prompt_tokens,
+            variance=self.config.prompt_tokens_stdev,
+            min_value=self.config.prompt_tokens_min,
+            max_value=self.config.prompt_tokens_max,
+            random_seed=self.random_seed,
+        )
+        output_tokens_sampler = IntegerRangeSampler(
+            average=self.config.output_tokens,
+            variance=self.config.output_tokens_stdev,
+            min_value=self.config.output_tokens_min,
+            max_value=self.config.output_tokens_max,
+            random_seed=self.random_seed + 1,  # ensure diff dist from prompts
+        )
+        # ensure diff distribution from output tokens
+        rand = random.Random(self.random_seed + 2)  # noqa: S311
+
+        for _, prompt_tokens, output_tokens in zip(
+            range(self.config.samples),
+            prompt_tokens_sampler,
+            output_tokens_sampler,
+        ):
+            start_index = rand.randint(0, len(self.text_creator.words))
+            yield {
+                "prompt": self._create_prompt(prompt_tokens, start_index),
+                "prompt_tokens_count": prompt_tokens,
+                "output_tokens_count": output_tokens,
+            }
+
+    def _create_prompt(self, prompt_tokens: int, start_index: int) -> str:
+        if prompt_tokens <= 0:
+            return ""
+
+        left = start_index
+        right = start_index + 4 * prompt_tokens
+
+        while left < right:
+            mid = (left + right) // 2
+            test_prompt = self.text_creator.create_text(start_index, mid - start_index)
+            test_tokens = len(self.processor.tokenize(test_prompt))
+
+            if test_tokens == prompt_tokens:
+                return test_prompt
+            elif test_tokens < prompt_tokens:
+                left = mid + 1
+            else:
+                right = mid
+
+        return self.text_creator.create_text(start_index, left - start_index)
+
+
+class SyntheticDatasetCreator(DatasetCreator):
+    @classmethod
+    def is_supported(cls, data: Any, data_args: Optional[Dict[str, Any]]) -> bool:  # noqa: ARG003
+        if (
+            isinstance(data, Path)
+            and data.exists()
+            and data.suffix in {".config", ".yaml"}
+        ):
+            return True
+
+        if isinstance(data, str):
+            data_str: str = data.strip()
+            if (
+                data_str.startswith("{")
+                or data_str.count("=") > 1
+                or data_str.endswith((".config", ".yaml"))
+            ):
+                return True
+
+        return False
+
+    @classmethod
+    def handle_create(
+        cls,
+        data: Any,
+        data_args: Optional[Dict[str, Any]],
+        processor: Optional[Union[str, Path, PreTrainedTokenizerBase]],
+        processor_args: Optional[Dict[str, Any]],
+        random_seed: int,
+    ) -> Union[Dataset, DatasetDict, IterableDataset, IterableDatasetDict]:
+        processor = check_load_processor(
+            processor,
+            processor_args,
+            error_msg=(
+                "Processor/tokenizer required for synthetic dataset generation."
+            ),
+        )
+
+        config = SyntheticDatasetConfig.parse_str(data)
+        generator = SyntheticTextItemsGenerator(config, processor, random_seed)
+        items = list(generator)
+
+        return Dataset.from_list(items, **(data_args or {}))
+
+    @classmethod
+    def extract_args_column_mappings(
+        cls,
+        data_args: Optional[Dict[str, Any]],
+    ) -> Dict[ColumnInputTypes, str]:
+        data_args_columns = super().extract_args_column_mappings(data_args)
+
+        if data_args_columns:
+            raise ValueError(
+                f"Column mappings are not supported for synthetic datasets. "
+                f"Got {data_args_columns}"
+            )
+
+        return {
+            "prompt_column": "prompt",
+            "prompt_tokens_count_column": "prompt_tokens_count",
+            "output_tokens_count_column": "output_tokens_count",
+        }
diff --git a/src/guidellm/executor/__init__.py b/src/guidellm/executor/__init__.py
deleted file mode 100644
index 7665e898..00000000
--- a/src/guidellm/executor/__init__.py
+++ /dev/null
@@ -1,10 +0,0 @@
-from .executor import Executor, ExecutorResult
-from .profile_generator import Profile, ProfileGenerationMode, ProfileGenerator
-
-__all__ = [
-    "Executor",
-    "ExecutorResult",
-    "Profile",
-    "ProfileGenerationMode",
-    "ProfileGenerator",
-]
diff --git a/src/guidellm/executor/executor.py b/src/guidellm/executor/executor.py
deleted file mode 100644
index bfecf17f..00000000
--- a/src/guidellm/executor/executor.py
+++ /dev/null
@@ -1,213 +0,0 @@
-from dataclasses import dataclass
-from typing import AsyncGenerator, Optional, Sequence, Union
-
-from loguru import logger
-
-from guidellm.backend import Backend
-from guidellm.core import TextGenerationBenchmarkReport
-from guidellm.executor.profile_generator import (
-    Profile,
-    ProfileGenerationMode,
-    ProfileGenerator,
-)
-from guidellm.request import RequestGenerator
-from guidellm.scheduler import Scheduler, SchedulerResult
-
-__all__ = ["Executor", "ExecutorResult"]
-
-
-@dataclass
-class ExecutorResult:
-    """
-    Data class representing the result of executing tasks in the Executor.
-
-    :param completed: Indicates whether all tasks have completed.
-    :type completed: bool
-    :param count_total: Total number of profiles.
-    :type count_total: int
-    :param count_completed: Number of completed profiles.
-    :type count_completed: int
-    :param report: A report report for text generation.
-    :type report: TextGenerationBenchmarkReport
-    :param scheduler_result: Optional scheduler result for the last task.
-    :type scheduler_result: Optional[SchedulerResult]
-    """
-
-    completed: bool
-    count_total: int
-    count_completed: int
-    generation_modes: Sequence[ProfileGenerationMode]
-    report: TextGenerationBenchmarkReport
-    scheduler_result: Optional[SchedulerResult] = None
-    current_index: Optional[int] = None
-    current_profile: Optional[Profile] = None
-
-
-class Executor:
-    """
-    The Executor class manages the execution of tasks based on a given profile
-    generation mode and rate. It orchestrates the interaction between the backend,
-    request generator, and profile generator, and runs benchmarks accordingly.
-
-    :param backend: The backend to run tasks against.
-    :type backend: Backend
-    :param request_generator: The generator that creates requests for execution.
-    :type request_generator: RequestGenerator
-    :param mode: The mode for profile generation (e.g., sweep, synchronous).
-    :type mode: ProfileGenerationMode
-    :param rate: The list of rates for load generation, or None.
-    :type rate: Optional[List[float]]
-    :param max_number: Maximum number of requests to generate for the scheduler
-        (a single report run), or None.
-    :type max_number: Optional[int]
-    :param max_duration: Maximum duration for generating requests for the scheduler,
-        (a single report run), or None.
-    :type max_duration: Optional[float]
-    """
-
-    def __init__(
-        self,
-        backend: Backend,
-        request_generator: RequestGenerator,
-        mode: ProfileGenerationMode = "sweep",
-        rate: Optional[Union[float, Sequence[float]]] = None,
-        max_number: Optional[int] = None,
-        max_duration: Optional[float] = None,
-    ):
-        self._backend = backend
-        self._generator = request_generator
-        self._max_number = max_number
-        self._max_duration = max_duration
-        self._profile_generator = ProfileGenerator(mode=mode, rate=rate)
-        logger.info("Executor initialized with mode: {}, rate: {}", mode, rate)
-
-    @property
-    def backend(self) -> Backend:
-        """
-        Returns the backend being used by the Executor.
-
-        :return: Backend
-        :rtype: Backend
-        """
-        return self._backend
-
-    @property
-    def request_generator(self) -> RequestGenerator:
-        """
-        Returns the request generator used by the Executor.
-
-        :return: RequestGenerator
-        :rtype: RequestGenerator
-        """
-        return self._generator
-
-    @property
-    def profile_generator(self) -> ProfileGenerator:
-        """
-        Returns the profile generator for generating profiles during execution.
-
-        :return: ProfileGenerator
-        :rtype: ProfileGenerator
-        """
-        return self._profile_generator
-
-    @property
-    def max_number(self) -> Optional[int]:
-        """
-        Returns the maximum number of requests to generate.
-
-        :return: Maximum number of requests or None.
-        :rtype: Optional[int]
-        """
-        return self._max_number
-
-    @property
-    def max_duration(self) -> Optional[float]:
-        """
-        Returns the maximum duration for generating requests.
-
-        :return: Maximum duration in seconds or None.
-        :rtype: Optional[float]
-        """
-        return self._max_duration
-
-    async def run(self) -> AsyncGenerator[ExecutorResult, None]:
-        """
-        Runs the Executor, generating and scheduling tasks based on the profile
-        generation mode. Yields results incrementally.
-
-        :rtype: AsyncGenerator[ExecutorResult, None]
-        """
-        report = TextGenerationBenchmarkReport()
-        report.args = {
-            # backend args
-            "backend_type": self.backend.type_,
-            "target": self.backend.target,
-            "model": self.backend.model,
-            # data args
-            "data_type": self.request_generator.type_,
-            "data": self.request_generator.source,
-            "tokenizer": self.request_generator.tokenizer.name_or_path,
-            # rate args
-            "mode": self.profile_generator.mode,
-            "rate": self.profile_generator.rates,
-            # limits args
-            "max_number": self.max_number,
-            "max_duration": self.max_duration,
-        }
-        profile_index = -1
-        logger.info("Starting Executor run")
-
-        yield ExecutorResult(
-            completed=False,
-            count_total=len(self.profile_generator),
-            count_completed=0,
-            generation_modes=self.profile_generator.profile_generation_modes,
-            report=report,
-        )
-
-        while profile := self.profile_generator.next(report):
-            logger.debug("Generated profile: {}", profile)
-            scheduler = Scheduler(
-                generator=self.request_generator,
-                backend=self.backend,
-                mode=profile.load_gen_mode,
-                rate=profile.load_gen_rate,
-                max_number=self.max_number or profile.args.get("max_number", None),
-                max_duration=self.max_duration,
-            )
-            profile_index += 1
-
-            logger.info(
-                "Scheduling tasks with mode: {}, rate: {}",
-                profile.load_gen_mode,
-                profile.load_gen_rate,
-            )
-
-            async for scheduler_result in scheduler.run():
-                if scheduler_result.completed:
-                    report.add_benchmark(scheduler_result.benchmark)
-                    logger.debug(
-                        "Benchmark added for scheduler result: {}",
-                        scheduler_result.benchmark,
-                    )
-
-                yield ExecutorResult(
-                    completed=False,
-                    count_total=len(self.profile_generator),
-                    count_completed=len(report.benchmarks),
-                    generation_modes=self.profile_generator.profile_generation_modes,
-                    report=report,
-                    scheduler_result=scheduler_result,
-                    current_index=profile_index,
-                    current_profile=profile,
-                )
-
-        logger.info("Executor run completed")
-        yield ExecutorResult(
-            completed=True,
-            count_total=len(self.profile_generator),
-            count_completed=len(report.benchmarks),
-            generation_modes=self.profile_generator.profile_generation_modes,
-            report=report,
-        )
diff --git a/src/guidellm/executor/profile_generator.py b/src/guidellm/executor/profile_generator.py
deleted file mode 100644
index 1f857f78..00000000
--- a/src/guidellm/executor/profile_generator.py
+++ /dev/null
@@ -1,347 +0,0 @@
-from typing import Any, Dict, List, Literal, Optional, Sequence, Union, get_args
-
-import numpy as np
-from loguru import logger
-from numpy._typing import NDArray
-from pydantic import Field
-
-from guidellm.config import settings
-from guidellm.core import TextGenerationBenchmark, TextGenerationBenchmarkReport
-from guidellm.core.serializable import Serializable
-from guidellm.scheduler import LoadGenerationMode
-
-__all__ = [
-    "Profile",
-    "ProfileGenerationMode",
-    "ProfileGenerator",
-]
-
-ProfileGenerationMode = Literal[
-    "sweep", "synchronous", "throughput", "constant", "poisson"
-]
-
-
-class Profile(Serializable):
-    """
-    A data class representing a profile for load generation.
-
-    :param load_gen_mode: The mode of load generation (e.g., constant, poisson).
-    :type load_gen_mode: LoadGenerationMode
-    :param load_gen_rate: The rate of load generation, if applicable.
-    :type load_gen_rate: Optional[float]
-    :param args: Additional arguments for the profile.
-    :type args: Optional[Dict[str, Any]]
-    """
-
-    load_gen_mode: LoadGenerationMode
-    load_gen_rate: Optional[float] = None
-    args: Dict[str, Any] = Field(default_factory=dict)
-
-
-class ProfileGenerator:
-    """
-    Generates profiles based on different load generation modes.
-
-    :param mode: The mode for profile generation (e.g., sweep, synchronous).
-    :type mode: ProfileGenerationMode
-    :param rate: The rate(s) for load generation; could be a float or list of floats.
-    :type rate: Optional[Union[float, Sequence[float]]]
-    """
-
-    def __init__(
-        self,
-        mode: ProfileGenerationMode,
-        rate: Optional[Union[float, Sequence[float]]] = None,
-    ):
-        if mode not in get_args(ProfileGenerationMode):
-            err = ValueError(
-                f"{mode} is not a valid Profile Generation Mode. "
-                f"Valid options are {get_args(ProfileGenerationMode)}"
-            )
-            logger.error(err)
-            raise err
-
-        self._mode = mode
-
-        if self._mode in ("sweep", "throughput", "synchronous"):
-            if rate is not None:
-                err = ValueError(f"Rates are not applicable for {self._mode} mode")
-                logger.error(err)
-                raise err
-            self._rates = None
-        else:
-            if not rate:
-                err = ValueError(f"Rates are required for {self._mode} mode")
-                logger.error(err)
-                raise err
-            self._rates = rate if isinstance(rate, Sequence) else [rate]
-
-            for rt in self._rates:
-                if rt <= 0:
-                    err = ValueError(
-                        f"Rate must be > 0 for mode: {self._mode}. Given: {rt}"
-                    )
-                    logger.error(err)
-                    raise err
-
-        self._generated_count = 0
-
-    def __len__(self) -> int:
-        """
-        Returns the number of profiles to generate based on the mode and rates.
-
-        :return: The number of profiles.
-        :rtype: int
-        """
-        if self._mode == "sweep":
-            return settings.num_sweep_profiles + 2
-
-        if self._mode in ("throughput", "synchronous"):
-            return 1
-
-        if not self._rates:
-            raise ValueError(f"Rates are required for {self._mode} mode")
-
-        return len(self._rates)
-
-    @property
-    def mode(self) -> ProfileGenerationMode:
-        """
-        Returns the current mode of profile generation.
-
-        :return: The profile generation mode.
-        :rtype: ProfileGenerationMode
-        """
-        return self._mode
-
-    @property
-    def rates(self) -> Optional[Sequence[float]]:
-        """
-        Returns the list of rates for load generation, if any.
-
-        :return: Sequence of rates or None if not applicable.
-        :rtype: Optional[Sequence[float]]
-        """
-        return self._rates
-
-    @property
-    def generated_count(self) -> int:
-        """
-        Returns the current count of generated profiles.
-
-        :return: The current count of generated profiles.
-        :rtype: int
-        """
-        return self._generated_count
-
-    @property
-    def profile_generation_modes(self) -> Sequence[ProfileGenerationMode]:
-        """
-        Return the list of profile modes to be run in the report.
-
-        :return: Sequence of profile modes to be run in the report.
-        :rtype: Sequence[ProfileGenerationMode]
-        """
-        if self._mode == "sweep":
-            return ["synchronous", "throughput"] + ["constant"] * (  # type: ignore  # noqa: PGH003
-                settings.num_sweep_profiles
-            )
-
-        if self._mode in ["throughput", "synchronous"]:
-            return [self._mode]
-
-        if self._rates is None:
-            raise ValueError(f"Rates are required for {self._mode} mode")
-
-        if self._mode in ["constant", "poisson"]:
-            return [self._mode] * len(self._rates)
-
-        raise ValueError(f"Invalid mode: {self._mode}")
-
-    def next(self, current_report: TextGenerationBenchmarkReport) -> Optional[Profile]:
-        """
-        Generates the next profile based on the current mode and report.
-
-        :param current_report: The current report report.
-        :type current_report: TextGenerationBenchmarkReport
-        :return: The generated profile or None if no more profiles.
-        :rtype: Optional[Profile]
-        """
-        logger.debug(
-            "Generating the next profile with mode: {}, current report: {}",
-            self.mode,
-            current_report,
-        )
-
-        if self.mode in ["constant", "poisson"]:
-            if not self.rates:
-                err = ValueError(f"Rates are required for {self.mode} mode")
-                logger.error(err)
-                raise err
-
-            profile = self.create_fixed_rate_profile(
-                self.generated_count,
-                self.mode,
-                self.rates,
-            )
-        elif self.mode == "synchronous":
-            profile = self.create_synchronous_profile(self.generated_count)
-        elif self.mode == "throughput":
-            profile = self.create_throughput_profile(self.generated_count)
-        elif self.mode == "sweep":
-            profile = self.create_sweep_profile(
-                self.generated_count,
-                sync_benchmark=(
-                    current_report.benchmarks[0] if current_report.benchmarks else None
-                ),
-                throughput_benchmark=(
-                    current_report.benchmarks[1]
-                    if len(current_report.benchmarks) > 1
-                    else None
-                ),
-            )
-        else:
-            err = ValueError(f"Invalid mode: {self.mode}")
-            logger.error(err)
-            raise err
-
-        self._generated_count += 1
-        logger.info(
-            "Generated profile: {}, total generated count: {}",
-            profile,
-            self._generated_count,
-        )
-        return profile
-
-    @staticmethod
-    def create_fixed_rate_profile(
-        index: int, mode: ProfileGenerationMode, rates: Sequence[float]
-    ) -> Optional[Profile]:
-        """
-        Creates a profile with a fixed rate.
-
-        :param index: The index of the rate in the list.
-        :type index: int
-        :param mode: The mode for profile generation (e.g., constant, poisson).
-        :type mode: ProfileGenerationMode
-        :param rates: The list of rates for load generation.
-        :type rates: Sequence[float]
-        :return: The generated profile or None if index is out of range.
-        :rtype: Optional[Profile]
-        """
-        modes_map: Dict[str, LoadGenerationMode] = {
-            "constant": "constant",
-            "poisson": "poisson",
-        }
-
-        if mode not in modes_map:
-            err = ValueError(f"Invalid mode: {mode}")
-            logger.error(err)
-            raise err
-
-        profile = (
-            Profile(
-                load_gen_mode=modes_map[mode],
-                load_gen_rate=rates[index],
-            )
-            if index < len(rates)
-            else None
-        )
-        logger.debug("Created fixed rate profile: {}", profile)
-        return profile
-
-    @staticmethod
-    def create_synchronous_profile(index: int) -> Optional[Profile]:
-        """
-        Creates a profile with synchronous mode.
-
-        :param index: The index of the profile to create.
-        :type index: int
-        :return: The generated profile or None if index is out of range.
-        :rtype: Optional[Profile]
-        """
-        profile = (
-            Profile(
-                load_gen_mode="synchronous",
-                load_gen_rate=None,
-            )
-            if index < 1
-            else None
-        )
-        logger.debug("Created synchronous profile: {}", profile)
-        return profile
-
-    @staticmethod
-    def create_throughput_profile(index: int) -> Optional[Profile]:
-        """
-        Creates a profile with throughput mode.
-
-        :param index: The index of the profile to create.
-        :type index: int
-        :return: The generated profile or None if index is out of range.
-        :rtype: Optional[Profile]
-        """
-        profile = (
-            Profile(
-                load_gen_mode="throughput",
-                load_gen_rate=None,
-            )
-            if index < 1
-            else None
-        )
-        logger.debug("Created throughput profile: {}", profile)
-        return profile
-
-    @staticmethod
-    def create_sweep_profile(
-        index: int,
-        sync_benchmark: Optional[TextGenerationBenchmark],
-        throughput_benchmark: Optional[TextGenerationBenchmark],
-    ) -> Optional[Profile]:
-        """
-        Creates a profile with sweep mode, generating profiles between
-        synchronous and throughput benchmarks.
-
-        :param index: The index of the profile to create.
-        :type index: int
-        :param sync_benchmark: The synchronous report data.
-        :type sync_benchmark: Optional[TextGenerationBenchmark]
-        :param throughput_benchmark: The throughput report data.
-        :type throughput_benchmark: Optional[TextGenerationBenchmark]
-        :return: The generated profile or None if index is out of range.
-        :rtype: Optional[Profile]
-        """
-        if index < 0 or index >= settings.num_sweep_profiles + 2:
-            return None
-
-        if index == 0:
-            return ProfileGenerator.create_synchronous_profile(0)
-
-        if not sync_benchmark:
-            err = ValueError("Synchronous report is required for sweep mode")
-            logger.error(err)
-            raise err
-
-        if index == 1:
-            throughput_profile: Profile = ProfileGenerator.create_throughput_profile(0)  # type: ignore  # noqa: PGH003
-            return throughput_profile
-
-        if not throughput_benchmark:
-            err = ValueError("Throughput report is required for sweep mode")
-            logger.error(err)
-            raise err
-
-        min_rate = sync_benchmark.completed_request_rate
-        max_rate = throughput_benchmark.completed_request_rate
-        intermediate_rates: List[NDArray] = list(
-            np.linspace(min_rate, max_rate, settings.num_sweep_profiles + 1)
-        )[1:]
-
-        return Profile(
-            load_gen_mode="constant",
-            load_gen_rate=(
-                float(load_gen_rate)
-                if (load_gen_rate := intermediate_rates[index - 2])
-                else 1.0  # the fallback value
-            ),
-        )
diff --git a/src/guidellm/main.py b/src/guidellm/main.py
deleted file mode 100644
index e7363c6e..00000000
--- a/src/guidellm/main.py
+++ /dev/null
@@ -1,346 +0,0 @@
-import asyncio
-from typing import Any, Literal, Mapping, Optional, Union, get_args
-
-import click
-from loguru import logger
-from transformers import AutoTokenizer  # type: ignore[import-untyped]
-
-from guidellm.backend import Backend, BackendType
-from guidellm.core import GuidanceReport, TextGenerationBenchmarkReport
-from guidellm.executor import Executor, ProfileGenerationMode
-from guidellm.request import (
-    EmulatedRequestGenerator,
-    FileRequestGenerator,
-    TransformersDatasetRequestGenerator,
-)
-from guidellm.request.base import RequestGenerator
-from guidellm.utils import BenchmarkReportProgress, cli_params
-
-__all__ = ["generate_benchmark_report"]
-
-
-@click.command()
-@click.option(
-    "--target",
-    type=str,
-    required=True,
-    help=(
-        "The target path or url for the backend to evaluate. "
-        "Ex: 'http://localhost:8000'"
-    ),
-)
-@click.option(
-    "--backend",
-    type=click.Choice(get_args(BackendType)),
-    default="openai_http",
-    help=(
-        "The backend to use for benchmarking. "
-        "The default is OpenAI Server enabling compatability with any server that "
-        "follows the OpenAI spec including vLLM."
-    ),
-)
-@click.option(
-    "--model",
-    type=str,
-    default=None,
-    help=(
-        "The Model to use for benchmarking. If not provided, it will use "
-        "the first available model provided the backend supports listing models."
-    ),
-)
-@click.option(
-    "--data",
-    type=str,
-    required=True,
-    help=(
-        "The data source to use for benchmarking. "
-        "Depending on the data-type, it should be a "
-        "path to a data file containing prompts to run (ex: data.txt), "
-        "a HuggingFace dataset name (ex: 'neuralmagic/LLM_compression_calibration'), "
-        "or a configuration for emulated data "
-        "(ex: 'prompt_tokens=128,generated_tokens=128')."
-    ),
-)
-@click.option(
-    "--data-type",
-    type=click.Choice(["emulated", "file", "transformers"]),
-    required=True,
-    help=(
-        "The type of data to use for benchmarking. "
-        "Use 'emulated' for synthetic data, 'file' for a file, or 'transformers' "
-        "for a HuggingFace dataset. Specify the data source with the --data flag."
-    ),
-)
-@click.option(
-    "--tokenizer",
-    type=str,
-    default=None,
-    help=(
-        "The tokenizer to use for calculating the number of prompt tokens. "
-        "This should match the tokenizer used by the model."
-        "By default, it will use the --model flag to determine the tokenizer. "
-        "If not provided and the model is not available, will raise an error. "
-        "Ex: 'neuralmagic/Meta-Llama-3.1-8B-quantized.w8a8'"
-    ),
-)
-@click.option(
-    "--rate-type",
-    type=click.Choice(get_args(ProfileGenerationMode)),
-    default="sweep",
-    help=(
-        "The type of request rate to use for benchmarking. "
-        "Use sweep to run a full range from synchronous to throughput (default), "
-        "synchronous for sending requests one after the other, "
-        "throughput to send requests as fast as possible, "
-        "constant for a fixed request rate, "
-        "or poisson for a real-world variable request rate."
-    ),
-)
-@click.option(
-    "--rate",
-    type=float,
-    default=None,
-    help=(
-        "The request rate to use for constant and poisson rate types. "
-        "To run multiple, provide the flag multiple times. "
-    ),
-    multiple=True,
-)
-@click.option(
-    "--max-seconds",
-    type=int,
-    default=120,
-    help=(
-        "The maximum number of seconds for each benchmark run. "
-        "Either max-seconds, max-requests, or both must be set. "
-        "The default is 120 seconds. "
-        "Note, this is the maximum time for each rate supplied, not the total time. "
-        "This value should be large enough to allow for "
-        "the server's performance to stabilize."
-    ),
-)
-@click.option(
-    "--max-requests",
-    type=cli_params.MAX_REQUESTS,
-    default=None,
-    help=(
-        "The maximum number of requests for each benchmark run. "
-        "Either max-seconds, max-requests, or both must be set. "
-        "Note, this is the maximum number of requests for each rate supplied, "
-        "not the total number of requests. "
-        "This value should be large enough to allow for "
-        "the server's performance to stabilize."
-    ),
-)
-@click.option(
-    "--output-path",
-    type=str,
-    default=None,
-    help=(
-        "The output path to save the output report to for loading later. "
-        "Ex: guidance_report.json. "
-        "The default is None, meaning no output is saved and results are only "
-        "printed to the console."
-    ),
-)
-@click.option(
-    "--enable-continuous-refresh",
-    is_flag=True,
-    default=False,
-    help=(
-        "Enable continual refreshing of the output table in the CLI "
-        "until the user exits. "
-    ),
-)
-def generate_benchmark_report_cli(
-    target: str,
-    backend: BackendType,
-    model: Optional[str],
-    data: Optional[str],
-    data_type: Literal["emulated", "file", "transformers"],
-    tokenizer: Optional[str],
-    rate_type: ProfileGenerationMode,
-    rate: Optional[float],
-    max_seconds: Optional[int],
-    max_requests: Union[Literal["dataset"], int, None],
-    output_path: str,
-    enable_continuous_refresh: bool,
-):
-    """
-    Generate a benchmark report for a specified backend and dataset.
-    """
-    generate_benchmark_report(
-        target=target,
-        backend=backend,
-        model=model,
-        data=data,
-        data_type=data_type,
-        tokenizer=tokenizer,
-        rate_type=rate_type,
-        rate=rate,
-        max_seconds=max_seconds,
-        max_requests=max_requests,
-        output_path=output_path,
-        cont_refresh_table=enable_continuous_refresh,
-    )
-
-
-def generate_benchmark_report(
-    target: str,
-    data: Optional[str],
-    data_type: Literal["emulated", "file", "transformers"],
-    backend: BackendType = "openai_http",
-    backend_kwargs: Optional[Mapping[str, Any]] = None,
-    model: Optional[str] = None,
-    tokenizer: Optional[str] = None,
-    rate_type: ProfileGenerationMode = "sweep",
-    rate: Optional[float] = None,
-    max_seconds: Optional[int] = 120,
-    max_requests: Union[Literal["dataset"], int, None] = None,
-    output_path: Optional[str] = None,
-    cont_refresh_table: bool = False,
-) -> GuidanceReport:
-    """
-    Generate a benchmark report for a specified backend and dataset.
-
-    :param target: The target URL or path for the backend to evaluate.
-    :param backend: The backend type to use for benchmarking.
-    :param model: The model to benchmark;
-        defaults to the first available if not specified.
-    :param data: The data source for benchmarking,
-        which may be a path, dataset name, or config.
-    :param data_type: The type of data to use,
-        such as 'emulated', 'file', or 'transformers'.
-    :param tokenizer: The tokenizer to use for token counting,
-        defaulting to Llama 3.1 if not provided.
-    :param rate_type: The rate type for requests during benchmarking.
-    :param rate: The specific request rate for constant and poisson rate types.
-    :param max_seconds: Maximum duration for each benchmark run in seconds.
-    :param max_requests: Maximum number of requests per benchmark run.
-    :param output_path: Path to save the output report file.
-    :param cont_refresh_table: Continually refresh the table in the CLI
-        until the user exits.
-    :param backend_kwargs: Additional keyword arguments for the backend.
-    """
-    logger.info(
-        "Generating benchmark report with target: {}, backend: {}", target, backend
-    )
-
-    # Create backend
-    backend_inst = Backend.create(
-        type_=backend,
-        target=target,
-        model=model,
-        **(backend_kwargs or {}),
-    )
-    backend_inst.validate()
-
-    request_generator: RequestGenerator
-
-    # Create tokenizer and request generator
-    tokenizer_inst = tokenizer
-    if not tokenizer_inst:
-        try:
-            tokenizer_inst = AutoTokenizer.from_pretrained(backend_inst.model)
-        except Exception as err:
-            raise ValueError(
-                "Could not load model's tokenizer, "
-                "--tokenizer must be provided for request generation"
-            ) from err
-
-    if data_type == "emulated":
-        request_generator = EmulatedRequestGenerator(
-            config=data, tokenizer=tokenizer_inst
-        )
-    elif data_type == "file":
-        request_generator = FileRequestGenerator(path=data, tokenizer=tokenizer_inst)
-    elif data_type == "transformers":
-        request_generator = TransformersDatasetRequestGenerator(
-            dataset=data, tokenizer=tokenizer_inst
-        )
-    else:
-        raise ValueError(f"Unknown data type: {data_type}")
-
-    if data_type == "emulated" and max_requests == "dataset":
-        raise ValueError("Cannot use 'dataset' for emulated data")
-
-    # Create executor
-    executor = Executor(
-        backend=backend_inst,
-        request_generator=request_generator,
-        mode=rate_type,
-        rate=rate if rate_type in ("constant", "poisson") else None,
-        max_number=(
-            len(request_generator) if max_requests == "dataset" else max_requests
-        ),
-        max_duration=max_seconds,
-    )
-
-    # Run executor
-    logger.debug(
-        "Running executor with args: {}",
-        {
-            "backend": backend,
-            "request_generator": request_generator,
-            "mode": rate_type,
-            "rate": rate,
-            "max_number": max_requests,
-            "max_duration": max_seconds,
-        },
-    )
-    report = asyncio.run(_run_executor_for_result(executor))
-
-    # Save and print report
-    guidance_report = GuidanceReport()
-    guidance_report.benchmarks.append(report)
-
-    if output_path:
-        guidance_report.save_file(output_path)
-
-    guidance_report.print(
-        save_path=output_path if output_path is not None else "stdout",
-        continual_refresh=cont_refresh_table,
-    )
-
-    return guidance_report
-
-
-async def _run_executor_for_result(executor: Executor) -> TextGenerationBenchmarkReport:
-    report = None
-    progress = BenchmarkReportProgress()
-    started = False
-
-    async for result in executor.run():
-        if not started:
-            progress.start(result.generation_modes)  # type: ignore  # noqa: PGH003
-            started = True
-
-        if result.current_index is not None:
-            description = f"{result.current_profile.load_gen_mode}"  # type: ignore  # noqa: PGH003
-            if result.current_profile.load_gen_mode in ("constant", "poisson"):  # type: ignore  # noqa: PGH003
-                description += f"@{result.current_profile.load_gen_rate:.2f} req/s"  # type: ignore  # noqa: PGH003
-
-            progress.update_benchmark(
-                index=result.current_index,
-                description=description,
-                completed=result.scheduler_result.completed,  # type: ignore  # noqa: PGH003
-                completed_count=result.scheduler_result.count_completed,  # type: ignore  # noqa: PGH003
-                completed_total=result.scheduler_result.count_total,  # type: ignore  # noqa: PGH003
-                start_time=result.scheduler_result.benchmark.start_time,  # type: ignore  # noqa: PGH003
-                req_per_sec=result.scheduler_result.benchmark.completed_request_rate,  # type: ignore  # noqa: PGH003
-            )
-
-        if result.completed:
-            report = result.report
-            break
-
-    progress.finish()
-
-    if not report:
-        raise ValueError("No report generated by executor")
-
-    return report
-
-
-if __name__ == "__main__":
-    generate_benchmark_report_cli()
diff --git a/src/guidellm/objects/__init__.py b/src/guidellm/objects/__init__.py
new file mode 100644
index 00000000..168570dd
--- /dev/null
+++ b/src/guidellm/objects/__init__.py
@@ -0,0 +1,18 @@
+from .pydantic import StandardBaseModel, StatusBreakdown
+from .statistics import (
+    DistributionSummary,
+    Percentiles,
+    RunningStats,
+    StatusDistributionSummary,
+    TimeRunningStats,
+)
+
+__all__ = [
+    "StandardBaseModel",
+    "StatusBreakdown",
+    "DistributionSummary",
+    "Percentiles",
+    "RunningStats",
+    "StatusDistributionSummary",
+    "TimeRunningStats",
+]
diff --git a/src/guidellm/objects/pydantic.py b/src/guidellm/objects/pydantic.py
new file mode 100644
index 00000000..b6e998fa
--- /dev/null
+++ b/src/guidellm/objects/pydantic.py
@@ -0,0 +1,60 @@
+from typing import Any, Generic, TypeVar
+
+from loguru import logger
+from pydantic import BaseModel, ConfigDict, Field
+
+__all__ = ["StandardBaseModel", "StatusBreakdown"]
+
+
+class StandardBaseModel(BaseModel):
+    """
+    A base class for Pydantic models throughout GuideLLM enabling standard
+    configuration and logging.
+    """
+
+    model_config = ConfigDict(
+        extra="allow",
+        use_enum_values=True,
+        validate_assignment=True,
+        from_attributes=True,
+    )
+
+    def __init__(self, /, **data: Any) -> None:
+        super().__init__(**data)
+        logger.debug(
+            "Initialized new instance of {} with data: {}",
+            self.__class__.__name__,
+            data,
+        )
+
+
+SuccessfulT = TypeVar("SuccessfulT")
+ErroredT = TypeVar("ErroredT")
+IncompleteT = TypeVar("IncompleteT")
+TotalT = TypeVar("TotalT")
+
+
+class StatusBreakdown(BaseModel, Generic[SuccessfulT, ErroredT, IncompleteT, TotalT]):
+    """
+    A base class for Pydantic models that are separated by statuses including
+    successful, incomplete, and errored. It additionally enables the inclusion
+    of total, which is intended as the combination of all statuses.
+    Total may or may not be used depending on if it duplicates information.
+    """
+
+    successful: SuccessfulT = Field(
+        description="The results with a successful status.",
+        default=None,  # type: ignore[assignment]
+    )
+    errored: ErroredT = Field(
+        description="The results with an errored status.",
+        default=None,  # type: ignore[assignment]
+    )
+    incomplete: IncompleteT = Field(
+        description="The results with an incomplete status.",
+        default=None,  # type: ignore[assignment]
+    )
+    total: TotalT = Field(
+        description="The combination of all statuses.",
+        default=None,  # type: ignore[assignment]
+    )
diff --git a/src/guidellm/objects/statistics.py b/src/guidellm/objects/statistics.py
new file mode 100644
index 00000000..0e43cdbd
--- /dev/null
+++ b/src/guidellm/objects/statistics.py
@@ -0,0 +1,947 @@
+import math
+import time as timer
+from collections import defaultdict
+from typing import Any, Dict, List, Literal, Optional, Tuple
+
+import numpy as np
+from pydantic import Field, computed_field
+
+from guidellm.objects.pydantic import StandardBaseModel, StatusBreakdown
+
+__all__ = [
+    "Percentiles",
+    "DistributionSummary",
+    "StatusDistributionSummary",
+    "RunningStats",
+    "TimeRunningStats",
+]
+
+
+class Percentiles(StandardBaseModel):
+    """
+    A pydantic model representing the standard percentiles of a distribution.
+    """
+
+    p001: float = Field(
+        description="The 0.1th percentile of the distribution.",
+    )
+    p01: float = Field(
+        description="The 1st percentile of the distribution.",
+    )
+    p05: float = Field(
+        description="The 5th percentile of the distribution.",
+    )
+    p10: float = Field(
+        description="The 10th percentile of the distribution.",
+    )
+    p25: float = Field(
+        description="The 25th percentile of the distribution.",
+    )
+    p75: float = Field(
+        description="The 75th percentile of the distribution.",
+    )
+    p90: float = Field(
+        description="The 90th percentile of the distribution.",
+    )
+    p95: float = Field(
+        description="The 95th percentile of the distribution.",
+    )
+    p99: float = Field(
+        description="The 99th percentile of the distribution.",
+    )
+    p999: float = Field(
+        description="The 99.9th percentile of the distribution.",
+    )
+
+
+class DistributionSummary(StandardBaseModel):
+    """
+    A pydantic model representing a statistical summary for a given
+    distribution of numerical values.
+    """
+
+    mean: float = Field(
+        description="The mean/average of the distribution.",
+    )
+    median: float = Field(
+        description="The median of the distribution.",
+    )
+    mode: float = Field(
+        description="The mode of the distribution.",
+    )
+    variance: float = Field(
+        description="The variance of the distribution.",
+    )
+    std_dev: float = Field(
+        description="The standard deviation of the distribution.",
+    )
+    min: float = Field(
+        description="The minimum value of the distribution.",
+    )
+    max: float = Field(
+        description="The maximum value of the distribution.",
+    )
+    count: int = Field(
+        description="The number of values in the distribution.",
+    )
+    total_sum: float = Field(
+        description="The total sum of the values in the distribution.",
+    )
+    percentiles: Percentiles = Field(
+        description="The percentiles of the distribution.",
+    )
+    cumulative_distribution_function: Optional[List[Tuple[float, float]]] = Field(
+        description="The cumulative distribution function (CDF) of the distribution.",
+        default=None,
+    )
+
+    @staticmethod
+    def from_distribution_function(
+        distribution: List[Tuple[float, float]],
+        include_cdf: bool = False,
+    ) -> "DistributionSummary":
+        """
+        Create a statistical summary for a given distribution of weighted numerical
+        values or a probability distribution function (PDF).
+        1.  If the distribution is a PDF, it is expected to be a list of tuples
+            where each tuple contains (value, probability). The sum of the
+            probabilities should be 1. If it is not, it will be normalized.
+        2.  If the distribution is a values distribution function, it is expected
+            to be a list of tuples where each tuple contains (value, weight).
+            The weights are normalized to a probability distribution function.
+
+        :param distribution: A list of tuples representing the distribution.
+            Each tuple contains (value, weight) or (value, probability).
+        :param include_cdf: Whether to include the calculated cumulative distribution
+            function (CDF) in the output DistributionSummary.
+        :return: An instance of DistributionSummary with calculated values.
+        """
+        values, weights = zip(*distribution) if distribution else ([], [])
+        values = np.array(values)  # type: ignore[assignment]
+        weights = np.array(weights)  # type: ignore[assignment]
+
+        # create the PDF
+        probabilities = weights / np.sum(weights)  # type: ignore[operator]
+        pdf = np.column_stack((values, probabilities))
+        pdf = pdf[np.argsort(pdf[:, 0])]
+        values = pdf[:, 0]  # type: ignore[assignment]
+        probabilities = pdf[:, 1]
+
+        # calculate the CDF
+        cumulative_probabilities = np.cumsum(probabilities)
+        cdf = np.column_stack((values, cumulative_probabilities))
+
+        # calculate statistics
+        mean = np.sum(values * probabilities).item()  # type: ignore[attr-defined]
+        median = cdf[np.argmax(cdf[:, 1] >= 0.5), 0].item() if len(cdf) > 0 else 0  # noqa: PLR2004
+        mode = values[np.argmax(probabilities)].item() if len(values) > 0 else 0  # type: ignore[call-overload]
+        variance = np.sum((values - mean) ** 2 * probabilities).item()  # type: ignore[attr-defined]
+        std_dev = math.sqrt(variance)
+        minimum = values[0].item() if len(values) > 0 else 0
+        maximum = values[-1].item() if len(values) > 0 else 0
+        count = len(values)
+        total_sum = np.sum(values).item()  # type: ignore[attr-defined]
+
+        return DistributionSummary(
+            mean=mean,
+            median=median,
+            mode=mode,
+            variance=variance,
+            std_dev=std_dev,
+            min=minimum,
+            max=maximum,
+            count=count,
+            total_sum=total_sum,
+            percentiles=(
+                Percentiles(
+                    p001=cdf[np.argmax(cdf[:, 1] >= 0.001), 0].item(),  # noqa: PLR2004
+                    p01=cdf[np.argmax(cdf[:, 1] >= 0.01), 0].item(),  # noqa: PLR2004
+                    p05=cdf[np.argmax(cdf[:, 1] >= 0.05), 0].item(),  # noqa: PLR2004
+                    p10=cdf[np.argmax(cdf[:, 1] >= 0.1), 0].item(),  # noqa: PLR2004
+                    p25=cdf[np.argmax(cdf[:, 1] >= 0.25), 0].item(),  # noqa: PLR2004
+                    p75=cdf[np.argmax(cdf[:, 1] >= 0.75), 0].item(),  # noqa: PLR2004
+                    p90=cdf[np.argmax(cdf[:, 1] >= 0.9), 0].item(),  # noqa: PLR2004
+                    p95=cdf[np.argmax(cdf[:, 1] >= 0.95), 0].item(),  # noqa: PLR2004
+                    p99=cdf[np.argmax(cdf[:, 1] >= 0.99), 0].item(),  # noqa: PLR2004
+                    p999=cdf[np.argmax(cdf[:, 1] >= 0.999), 0].item(),  # noqa: PLR2004
+                )
+                if len(cdf) > 0
+                else Percentiles(
+                    p001=0,
+                    p01=0,
+                    p05=0,
+                    p10=0,
+                    p25=0,
+                    p75=0,
+                    p90=0,
+                    p95=0,
+                    p99=0,
+                    p999=0,
+                )
+            ),
+            cumulative_distribution_function=cdf.tolist() if include_cdf else None,
+        )
+
+    @staticmethod
+    def from_values(
+        values: List[float],
+        weights: Optional[List[float]] = None,
+        include_cdf: bool = False,
+    ) -> "DistributionSummary":
+        """
+        Create a statistical summary for a given distribution of numerical values.
+        This is a wrapper around from_distribution_function to handle the optional case
+        of including weights for the values. If weights are not provided, they are
+        automatically set to 1.0 for each value, so each value is equally weighted.
+
+        :param values: A list of numerical values representing the distribution.
+        :param weights: A list of weights for each value in the distribution.
+            If not provided, all values are equally weighted.
+        :param include_cdf: Whether to include the calculated cumulative distribution
+            function (CDF) in the output DistributionSummary.
+        """
+        if weights is None:
+            weights = [1.0] * len(values)
+
+        if len(values) != len(weights):
+            raise ValueError(
+                "The length of values and weights must be the same.",
+            )
+
+        return DistributionSummary.from_distribution_function(
+            distribution=list(zip(values, weights)),
+            include_cdf=include_cdf,
+        )
+
+    @staticmethod
+    def from_request_times(
+        requests: List[Tuple[float, float]],
+        distribution_type: Literal["concurrency", "rate"],
+        include_cdf: bool = False,
+        epsilon: float = 1e-6,
+    ) -> "DistributionSummary":
+        """
+        Create a statistical summary for a given distribution of request times.
+        Specifically, this is used to measure concurrency or rate of requests
+        given an input list containing the start and end time of each request.
+        This will first convert the request times into a distribution function
+        and then calculate the statistics with from_distribution_function.
+
+        :param requests: A list of tuples representing the start and end times of
+            each request. Example: [(start_1, end_1), (start_2, end_2), ...]
+        :param distribution_type: The type of distribution to calculate.
+            Either "concurrency" or "rate".
+        :param include_cdf: Whether to include the calculated cumulative distribution
+            function (CDF) in the output DistributionSummary.
+        :param epsilon: The epsilon value for merging close events.
+        :return: An instance of DistributionSummary with calculated values.
+        """
+        if distribution_type == "concurrency":
+            # convert to delta changes based on when requests were running
+            time_deltas: Dict[float, int] = defaultdict(int)
+            for start, end in requests:
+                time_deltas[start] += 1
+                time_deltas[end] -= 1
+
+            # convert to the events over time measuring concurrency changes
+            events = []
+            active = 0
+
+            for time, delta in sorted(time_deltas.items()):
+                active += delta
+                events.append((time, active))
+        elif distribution_type == "rate":
+            # convert to events for when requests finished
+            global_start = min(start for start, _ in requests) if requests else 0
+            events = [(global_start, 1)] + [(end, 1) for _, end in requests]
+        else:
+            raise ValueError(
+                f"Invalid distribution_type '{distribution_type}'. "
+                "Must be 'concurrency' or 'rate'."
+            )
+
+        # combine any events that are very close together
+        flattened_events: List[Tuple[float, float]] = []
+        for time, val in sorted(events):
+            last_time, last_val = (
+                flattened_events[-1] if flattened_events else (None, None)
+            )
+
+            if (
+                last_time is not None
+                and last_val is not None
+                and abs(last_time - time) <= epsilon
+            ):
+                flattened_events[-1] = (last_time, last_val + val)
+            else:
+                flattened_events.append((time, val))
+
+        # convert to value distribution function
+        distribution: Dict[float, float] = defaultdict(float)
+
+        for ind in range(len(flattened_events) - 1):
+            start_time, value = flattened_events[ind]
+            end_time, _ = flattened_events[ind + 1]
+            duration = end_time - start_time
+
+            if distribution_type == "concurrency":
+                # weight the concurrency value by the duration
+                distribution[value] += duration
+            elif distribution_type == "rate":
+                # weight the rate value by the duration
+                rate = value / duration
+                distribution[rate] += duration
+
+        distribution_list: List[Tuple[float, float]] = sorted(distribution.items())
+
+        return DistributionSummary.from_distribution_function(
+            distribution=distribution_list,
+            include_cdf=include_cdf,
+        )
+
+    @staticmethod
+    def from_iterable_request_times(
+        requests: List[Tuple[float, float]],
+        first_iter_times: List[float],
+        iter_counts: List[int],
+        first_iter_counts: Optional[List[int]] = None,
+        include_cdf: bool = False,
+        epsilon: float = 1e-6,
+    ) -> "DistributionSummary":
+        """
+        Create a statistical summary for a given distribution of request times
+        for a request with iterable responses between the start and end.
+        For example, this is used to measure auto regressive requests where
+        a request is started and at some later point, iterative responses are
+        received. This will convert the request times and iterable values into
+        a distribution function and then calculate the statistics with
+        from_distribution_function.
+
+        :param requests: A list of tuples representing the start and end times of
+            each request. Example: [(start_1, end_1), (start_2, end_2), ...]
+        :param first_iter_times: A list of times when the first iteration of
+            each request was received. Must be the same length as requests.
+        :param iter_counts: A list of the total number of iterations for each
+            request that occurred starting at the first iteration and ending
+            at the request end time. Must be the same length as requests.
+        :param first_iter_counts: A list of the number of iterations to log
+            for the first iteration of each request. For example, when calculating
+            total number of tokens processed, this is set to the prompt tokens number.
+            If not provided, defaults to 1 for each request.
+        :param include_cdf: Whether to include the calculated cumulative distribution
+            function (CDF) in the output DistributionSummary.
+        :param epsilon: The epsilon value for merging close events.
+        :return: An instance of DistributionSummary with calculated values.
+        """
+
+        if first_iter_counts is None:
+            first_iter_counts = [1] * len(requests)
+
+        if (
+            len(requests) != len(first_iter_times)
+            or len(requests) != len(iter_counts)
+            or len(requests) != len(first_iter_counts)
+        ):
+            raise ValueError(
+                "requests, first_iter_times, iter_counts, and first_iter_counts must"
+                "be the same length."
+                f"Given {len(requests)}, {len(first_iter_times)}, {len(iter_counts)}, "
+                f"{len(first_iter_counts)}",
+            )
+
+        # first break up the requests into individual iterable events
+        events = defaultdict(int)
+        global_start = min(start for start, _ in requests) if requests else 0
+        global_end = max(end for _, end in requests) if requests else 0
+        events[global_start] = 0
+        events[global_end] = 0
+
+        for (_, end), first_iter, first_iter_count, total_count in zip(
+            requests, first_iter_times, first_iter_counts, iter_counts
+        ):
+            events[first_iter] += first_iter_count
+
+            if total_count > 1:
+                iter_latency = (end - first_iter) / (total_count - 1)
+                for ind in range(1, total_count):
+                    events[first_iter + ind * iter_latency] += 1
+
+        # combine any events that are very close together
+        flattened_events: List[Tuple[float, int]] = []
+
+        for time, count in sorted(events.items()):
+            last_time, last_count = (
+                flattened_events[-1] if flattened_events else (None, None)
+            )
+
+            if (
+                last_time is not None
+                and last_count is not None
+                and abs(last_time - time) <= epsilon
+            ):
+                flattened_events[-1] = (last_time, last_count + count)
+            else:
+                flattened_events.append((time, count))
+
+        # convert to value distribution function
+        distribution: Dict[float, float] = defaultdict(float)
+
+        for ind in range(len(flattened_events) - 1):
+            start_time, count = flattened_events[ind]
+            end_time, _ = flattened_events[ind + 1]
+            duration = end_time - start_time
+            rate = count / duration
+            distribution[rate] += duration
+
+        distribution_list = sorted(distribution.items())
+
+        return DistributionSummary.from_distribution_function(
+            distribution=distribution_list,
+            include_cdf=include_cdf,
+        )
+
+
+class StatusDistributionSummary(
+    StatusBreakdown[
+        DistributionSummary,
+        DistributionSummary,
+        DistributionSummary,
+        DistributionSummary,
+    ]
+):
+    """
+    A pydantic model representing a statistical summary for a given
+    distribution of numerical values grouped by status.
+    Specifically used to represent the total, successful, incomplete,
+    and errored values for a benchmark or other statistical summary.
+    """
+
+    @staticmethod
+    def from_values(
+        value_types: List[Literal["successful", "incomplete", "error"]],
+        values: List[float],
+        weights: Optional[List[float]] = None,
+        include_cdf: bool = False,
+    ) -> "StatusDistributionSummary":
+        """
+        Create a statistical summary by status for a given distribution of numerical
+        values. This is used to measure the distribution of values for different
+        statuses (e.g., successful, incomplete, error) and calculate the statistics
+        for each status. Weights are optional to weight the probability distribution
+        for each value by. If not provided, all values are equally weighted.
+
+        :param value_types: A list of status types for each value in the distribution.
+            Must be one of 'successful', 'incomplete', or 'error'.
+        :param values: A list of numerical values representing the distribution.
+            Must be the same length as value_types.
+        :param weights: A list of weights for each value in the distribution.
+            If not provided, all values are equally weighted (set to 1).
+            Must be the same length as value_types.
+        :param include_cdf: Whether to include the calculated cumulative distribution
+            function (CDF) in the output StatusDistributionSummary.
+        :return: An instance of StatusDistributionSummary with calculated values.
+        """
+        if any(
+            type_ not in {"successful", "incomplete", "error"} for type_ in value_types
+        ):
+            raise ValueError(
+                "value_types must be one of 'successful', 'incomplete', or 'error'. "
+                f"Got {value_types} instead.",
+            )
+
+        if weights is None:
+            weights = [1.0] * len(values)
+
+        if len(value_types) != len(values) or len(value_types) != len(weights):
+            raise ValueError(
+                "The length of value_types, values, and weights must be the same.",
+            )
+
+        _, successful_values, successful_weights = (
+            zip(*successful)
+            if (
+                successful := list(
+                    filter(
+                        lambda val: val[0] == "successful",
+                        zip(value_types, values, weights),
+                    )
+                )
+            )
+            else ([], [], [])
+        )
+        _, incomplete_values, incomplete_weights = (
+            zip(*incomplete)
+            if (
+                incomplete := list(
+                    filter(
+                        lambda val: val[0] == "incomplete",
+                        zip(value_types, values, weights),
+                    )
+                )
+            )
+            else ([], [], [])
+        )
+        _, errored_values, errored_weights = (
+            zip(*errored)
+            if (
+                errored := list(
+                    filter(
+                        lambda val: val[0] == "error",
+                        zip(value_types, values, weights),
+                    )
+                )
+            )
+            else ([], [], [])
+        )
+
+        return StatusDistributionSummary(
+            total=DistributionSummary.from_values(
+                values,
+                weights,
+                include_cdf=include_cdf,
+            ),
+            successful=DistributionSummary.from_values(
+                successful_values,  # type: ignore[arg-type]
+                successful_weights,  # type: ignore[arg-type]
+                include_cdf=include_cdf,
+            ),
+            incomplete=DistributionSummary.from_values(
+                incomplete_values,  # type: ignore[arg-type]
+                incomplete_weights,  # type: ignore[arg-type]
+                include_cdf=include_cdf,
+            ),
+            errored=DistributionSummary.from_values(
+                errored_values,  # type: ignore[arg-type]
+                errored_weights,  # type: ignore[arg-type]
+                include_cdf=include_cdf,
+            ),
+        )
+
+    @staticmethod
+    def from_request_times(
+        request_types: List[Literal["successful", "incomplete", "error"]],
+        requests: List[Tuple[float, float]],
+        distribution_type: Literal["concurrency", "rate"],
+        include_cdf: bool = False,
+        epsilon: float = 1e-6,
+    ) -> "StatusDistributionSummary":
+        """
+        Create a statistical summary by status for given distribution of request times.
+        This is used to measure the distribution of request times for different statuses
+        (e.g., successful, incomplete, error) for concurrency and rates.
+        This will call into DistributionSummary.from_request_times to calculate
+        the statistics for each status.
+
+        :param request_types: List of status types for each request in the distribution.
+            Must be one of 'successful', 'incomplete', or 'error'.
+        :param requests: A list of tuples representing the start and end times of
+            each request. Example: [(start_1, end_1), (start_2, end_2), ...].
+            Must be the same length as request_types.
+        :param distribution_type: The type of distribution to calculate.
+            Either "concurrency" or "rate".
+        :param include_cdf: Whether to include the calculated cumulative distribution
+            function (CDF) in the output StatusDistributionSummary.
+        :param epsilon: The epsilon value for merging close events.
+        :return: An instance of StatusDistributionSummary with calculated values.
+        """
+        if distribution_type not in {"concurrency", "rate"}:
+            raise ValueError(
+                f"Invalid distribution_type '{distribution_type}'. "
+                "Must be 'concurrency' or 'rate'."
+            )
+
+        if any(
+            type_ not in {"successful", "incomplete", "error"}
+            for type_ in request_types
+        ):
+            raise ValueError(
+                "request_types must be one of 'successful', 'incomplete', or 'error'. "
+                f"Got {request_types} instead.",
+            )
+
+        if len(request_types) != len(requests):
+            raise ValueError(
+                "The length of request_types and requests must be the same. "
+                f"Got {len(request_types)} and {len(requests)} instead.",
+            )
+
+        _, successful_requests = (
+            zip(*successful)
+            if (
+                successful := list(
+                    filter(
+                        lambda val: val[0] == "successful",
+                        zip(request_types, requests),
+                    )
+                )
+            )
+            else ([], [])
+        )
+        _, incomplete_requests = (
+            zip(*incomplete)
+            if (
+                incomplete := list(
+                    filter(
+                        lambda val: val[0] == "incomplete",
+                        zip(request_types, requests),
+                    )
+                )
+            )
+            else ([], [])
+        )
+        _, errored_requests = (
+            zip(*errored)
+            if (
+                errored := list(
+                    filter(
+                        lambda val: val[0] == "error",
+                        zip(request_types, requests),
+                    )
+                )
+            )
+            else ([], [])
+        )
+
+        return StatusDistributionSummary(
+            total=DistributionSummary.from_request_times(
+                requests,
+                distribution_type=distribution_type,
+                include_cdf=include_cdf,
+                epsilon=epsilon,
+            ),
+            successful=DistributionSummary.from_request_times(
+                successful_requests,  # type: ignore[arg-type]
+                distribution_type=distribution_type,
+                include_cdf=include_cdf,
+                epsilon=epsilon,
+            ),
+            incomplete=DistributionSummary.from_request_times(
+                incomplete_requests,  # type: ignore[arg-type]
+                distribution_type=distribution_type,
+                include_cdf=include_cdf,
+                epsilon=epsilon,
+            ),
+            errored=DistributionSummary.from_request_times(
+                errored_requests,  # type: ignore[arg-type]
+                distribution_type=distribution_type,
+                include_cdf=include_cdf,
+                epsilon=epsilon,
+            ),
+        )
+
+    @staticmethod
+    def from_iterable_request_times(
+        request_types: List[Literal["successful", "incomplete", "error"]],
+        requests: List[Tuple[float, float]],
+        first_iter_times: List[float],
+        iter_counts: Optional[List[int]] = None,
+        first_iter_counts: Optional[List[int]] = None,
+        include_cdf: bool = False,
+        epsilon: float = 1e-6,
+    ) -> "StatusDistributionSummary":
+        """
+        Create a statistical summary by status for given distribution of request times
+        for a request with iterable responses between the start and end.
+        For example, this is used to measure auto regressive requests where
+        a request is started and at some later point, iterative responses are
+        received. This will call into DistributionSummary.from_iterable_request_times
+        to calculate the statistics for each status.
+
+        :param request_types: List of status types for each request in the distribution.
+            Must be one of 'successful', 'incomplete', or 'error'.
+        :param requests: A list of tuples representing the start and end times of
+            each request. Example: [(start_1, end_1), (start_2, end_2), ...].
+            Must be the same length as request_types.
+        :param first_iter_times: A list of times when the first iteration of
+            each request was received. Must be the same length as requests.
+        :param iter_counts: A list of the total number of iterations for each
+            request that occurred starting at the first iteration and ending
+            at the request end time. Must be the same length as requests.
+            If not provided, defaults to 1 for each request.
+        :param first_iter_counts: A list of the number of iterations to log
+            for the first iteration of each request. For example, when calculating
+            total number of tokens processed, this is set to the prompt tokens number.
+            If not provided, defaults to 1 for each request.
+        :param include_cdf: Whether to include the calculated cumulative distribution
+            function (CDF) in the output StatusDistributionSummary.
+        :param epsilon: The epsilon value for merging close events.
+        :return: An instance of StatusDistributionSummary with calculated values.
+        """
+        if any(
+            type_ not in {"successful", "incomplete", "error"}
+            for type_ in request_types
+        ):
+            raise ValueError(
+                "request_types must be one of 'successful', 'incomplete', or 'error'. "
+                f"Got {request_types} instead.",
+            )
+
+        if iter_counts is None:
+            iter_counts = [1] * len(requests)
+
+        if first_iter_counts is None:
+            first_iter_counts = [1] * len(requests)
+
+        if (
+            len(request_types) != len(requests)
+            or len(requests) != len(first_iter_times)
+            or len(requests) != len(iter_counts)
+            or len(requests) != len(first_iter_counts)
+        ):
+            raise ValueError(
+                "request_types, requests, first_iter_times, iter_counts, and "
+                "first_iter_counts must be the same length."
+                f"Given {len(request_types)}, {len(requests)}, "
+                f"{len(first_iter_times)}, {len(iter_counts)}, "
+                f"{len(first_iter_counts)}",
+            )
+
+        (
+            _,
+            successful_requests,
+            successful_first_iter_times,
+            successful_iter_counts,
+            successful_first_iter_counts,
+        ) = (
+            zip(*successful)
+            if (
+                successful := list(
+                    filter(
+                        lambda val: val[0] == "successful",
+                        zip(
+                            request_types,
+                            requests,
+                            first_iter_times,
+                            iter_counts,
+                            first_iter_counts,
+                        ),
+                    )
+                )
+            )
+            else ([], [], [], [], [])
+        )
+        (
+            _,
+            incomplete_requests,
+            incomplete_first_iter_times,
+            incomplete_iter_counts,
+            incomplete_first_iter_counts,
+        ) = (
+            zip(*incomplete)
+            if (
+                incomplete := list(
+                    filter(
+                        lambda val: val[0] == "incomplete",
+                        zip(
+                            request_types,
+                            requests,
+                            first_iter_times,
+                            iter_counts,
+                            first_iter_counts,
+                        ),
+                    )
+                )
+            )
+            else ([], [], [], [], [])
+        )
+        (
+            _,
+            errored_requests,
+            errored_first_iter_times,
+            errored_iter_counts,
+            errored_first_iter_counts,
+        ) = (
+            zip(*errored)
+            if (
+                errored := list(
+                    filter(
+                        lambda val: val[0] == "error",
+                        zip(
+                            request_types,
+                            requests,
+                            first_iter_times,
+                            iter_counts,
+                            first_iter_counts,
+                        ),
+                    )
+                )
+            )
+            else ([], [], [], [], [])
+        )
+
+        return StatusDistributionSummary(
+            total=DistributionSummary.from_iterable_request_times(
+                requests,
+                first_iter_times,
+                iter_counts,
+                first_iter_counts,
+                include_cdf=include_cdf,
+                epsilon=epsilon,
+            ),
+            successful=DistributionSummary.from_iterable_request_times(
+                successful_requests,  # type: ignore[arg-type]
+                successful_first_iter_times,  # type: ignore[arg-type]
+                successful_iter_counts,  # type: ignore[arg-type]
+                successful_first_iter_counts,  # type: ignore[arg-type]
+                include_cdf=include_cdf,
+                epsilon=epsilon,
+            ),
+            incomplete=DistributionSummary.from_iterable_request_times(
+                incomplete_requests,  # type: ignore[arg-type]
+                incomplete_first_iter_times,  # type: ignore[arg-type]
+                incomplete_iter_counts,  # type: ignore[arg-type]
+                incomplete_first_iter_counts,  # type: ignore[arg-type]
+                include_cdf=include_cdf,
+                epsilon=epsilon,
+            ),
+            errored=DistributionSummary.from_iterable_request_times(
+                errored_requests,  # type: ignore[arg-type]
+                errored_first_iter_times,  # type: ignore[arg-type]
+                errored_iter_counts,  # type: ignore[arg-type]
+                errored_first_iter_counts,  # type: ignore[arg-type]
+                include_cdf=include_cdf,
+                epsilon=epsilon,
+            ),
+        )
+
+
+class RunningStats(StandardBaseModel):
+    """
+    Create a running statistics object to track the mean, rate, and other
+    statistics of a stream of values.
+    1.  The start time is set to the time the object is created.
+    2.  The count is set to 0.
+    3.  The total is set to 0.
+    4.  The last value is set to 0.
+    5.  The mean is calculated as the total / count.
+    """
+
+    start_time: float = Field(
+        default_factory=timer.time,
+        description=(
+            "The time the running statistics object was created. "
+            "This is used to calculate the rate of the statistics."
+        ),
+    )
+    count: int = Field(
+        default=0,
+        description="The number of values added to the running statistics.",
+    )
+    total: float = Field(
+        default=0.0,
+        description="The total sum of the values added to the running statistics.",
+    )
+    last: float = Field(
+        default=0.0,
+        description="The last value added to the running statistics.",
+    )
+
+    @computed_field  # type: ignore[misc]
+    @property
+    def mean(self) -> float:
+        """
+        :return: The mean of the running statistics (total / count).
+            If count is 0, return 0.0.
+        """
+        if self.count == 0:
+            return 0.0
+        return self.total / self.count
+
+    @computed_field  # type: ignore[misc]
+    @property
+    def rate(self) -> float:
+        """
+        :return: The rate of the running statistics
+            (total / (time.time() - start_time)).
+            If count is 0, return 0.0.
+        """
+        if self.count == 0:
+            return 0.0
+        return self.total / (timer.time() - self.start_time)
+
+    def __add__(self, value: Any) -> float:
+        """
+        Enable the use of the + operator to add a value to the running statistics.
+
+        :param value: The value to add to the running statistics.
+        :return: The mean of the running statistics.
+        """
+        if not isinstance(value, (int, float)):
+            raise ValueError(
+                f"Value must be an int or float, got {type(value)} instead.",
+            )
+
+        self.update(value)
+
+        return self.mean
+
+    def __iadd__(self, value: Any) -> "RunningStats":
+        """
+        Enable the use of the += operator to add a value to the running statistics.
+
+        :param value: The value to add to the running statistics.
+        :return: The running statistics object.
+        """
+        if not isinstance(value, (int, float)):
+            raise ValueError(
+                f"Value must be an int or float, got {type(value)} instead.",
+            )
+
+        self.update(value)
+
+        return self
+
+    def update(self, value: float, count: int = 1) -> None:
+        """
+        Update the running statistics with a new value.
+
+        :param value: The new value to add to the running statistics.
+        :param count: The number of times to 'count' for the value.
+            If not provided, defaults to 1.
+        """
+        self.count += count
+        self.total += value
+        self.last = value
+
+
+class TimeRunningStats(RunningStats):
+    """
+    Create a running statistics object to track the mean, rate, and other
+    statistics of a stream of time values. This is used to track time values
+    in milliseconds and seconds.
+
+    Adds time specific computed_fields such as measurements in milliseconds and seconds.
+    """
+
+    @computed_field  # type: ignore[misc]
+    @property
+    def total_ms(self) -> float:
+        """
+        :return: The total time multiplied by 1000.0 to convert to milliseconds.
+        """
+        return self.total * 1000.0
+
+    @computed_field  # type: ignore[misc]
+    @property
+    def last_ms(self) -> float:
+        """
+        :return: The last time multiplied by 1000.0 to convert to milliseconds.
+        """
+        return self.last * 1000.0
+
+    @computed_field  # type: ignore[misc]
+    @property
+    def mean_ms(self) -> float:
+        """
+        :return: The mean time multiplied by 1000.0 to convert to milliseconds.
+        """
+        return self.mean * 1000.0
+
+    @computed_field  # type: ignore[misc]
+    @property
+    def rate_ms(self) -> float:
+        """
+        :return: The rate of the running statistics multiplied by 1000.0
+            to convert to milliseconds.
+        """
+        return self.rate * 1000.0
diff --git a/src/guidellm/request/__init__.py b/src/guidellm/request/__init__.py
index 4feca91c..bdd87389 100644
--- a/src/guidellm/request/__init__.py
+++ b/src/guidellm/request/__init__.py
@@ -1,13 +1,15 @@
-from .base import GenerationMode, RequestGenerator
-from .emulated import EmulatedConfig, EmulatedRequestGenerator
-from .file import FileRequestGenerator
-from .transformers import TransformersDatasetRequestGenerator
+from .loader import (
+    GenerativeRequestLoader,
+    GenerativeRequestLoaderDescription,
+    RequestLoader,
+    RequestLoaderDescription,
+)
+from .request import GenerationRequest
 
 __all__ = [
-    "EmulatedConfig",
-    "EmulatedRequestGenerator",
-    "FileRequestGenerator",
-    "GenerationMode",
-    "RequestGenerator",
-    "TransformersDatasetRequestGenerator",
+    "RequestLoader",
+    "RequestLoaderDescription",
+    "GenerativeRequestLoaderDescription",
+    "GenerativeRequestLoader",
+    "GenerationRequest",
 ]
diff --git a/src/guidellm/request/base.py b/src/guidellm/request/base.py
deleted file mode 100644
index 9fd303e6..00000000
--- a/src/guidellm/request/base.py
+++ /dev/null
@@ -1,200 +0,0 @@
-import contextlib
-import threading
-import time
-from abc import ABC, abstractmethod
-from queue import Empty, Full, Queue
-from typing import Iterator, Literal, Union
-
-from loguru import logger
-from transformers import (  # type: ignore  # noqa: PGH003
-    AutoTokenizer,
-    PreTrainedTokenizer,
-)
-
-from guidellm.core.request import TextGenerationRequest
-
-__all__ = ["GenerationMode", "RequestGenerator"]
-
-
-GenerationMode = Literal["async", "sync"]
-
-
-class RequestGenerator(ABC):
-    """
-    A base class for request generators that generate result requests.
-
-    :param type_: The type of the request generator.
-    :type type_: str
-    :param source: The data source for the request generator.
-    :type source: str
-    :param tokenizer: The tokenizer instance or the name/config to use
-        for tokenizing prompts.
-    :type tokenizer: Union[str, PreTrainedTokenizer]
-    :param mode: The generation mode, either 'async' or 'sync'.
-    :type mode: GenerationMode
-    :param async_queue_size: The size of the request queue.
-    :type async_queue_size: int
-    """
-
-    def __init__(
-        self,
-        type_: str,
-        source: str,
-        tokenizer: Union[str, PreTrainedTokenizer],
-        mode: GenerationMode = "async",
-        async_queue_size: int = 50,
-    ):
-        self._type = type_
-        self._source = source
-        self._async_queue_size: int = async_queue_size
-        self._mode: str = mode
-        self._queue: Queue = Queue(maxsize=async_queue_size)
-        self._stop_event: threading.Event = threading.Event()
-
-        if not tokenizer:
-            err = "Tokenizer must be provided for request generation"
-            logger.error(err)
-            raise ValueError(err)
-
-        self._tokenizer = (
-            AutoTokenizer.from_pretrained(tokenizer)
-            if isinstance(tokenizer, str)
-            else tokenizer
-        )
-        logger.info("Tokenizer initialized for request generation: {}", self._tokenizer)
-
-        if self._mode == "async":
-            self._thread = threading.Thread(target=self._populate_queue, daemon=True)
-            self._thread.start()
-            logger.info(
-                "RequestGenerator started in async mode with queue size: {}",
-                self._async_queue_size,
-            )
-
-    def __repr__(self) -> str:
-        """
-        Return a string representation of the RequestGenerator.
-
-        :return: String representation of the RequestGenerator.
-        :rtype: str
-        """
-        return (
-            f"RequestGenerator("
-            f"mode={self._mode}, "
-            f"async_queue_size={self._async_queue_size}, "
-            f"tokenizer={self._tokenizer})"
-        )
-
-    def __iter__(self) -> Iterator[TextGenerationRequest]:
-        """
-        Provide an iterator interface to generate new requests.
-
-        :return: An iterator over result requests.
-        :rtype: Iterator[TextGenerationRequest]
-        """
-        if self.mode == "async":
-            while not self._stop_event.is_set():
-                try:
-                    item = self._queue.get_nowait()
-                    self._queue.task_done()
-                    yield item
-                except Empty:
-                    time.sleep(0.01)
-                    continue
-        else:
-            while not self._stop_event.is_set():
-                yield self.create_item()
-
-    @abstractmethod
-    def __len__(self) -> int:
-        """
-        Abstract method to get the length of the collection to be generated.
-        """
-
-    @abstractmethod
-    def create_item(self) -> TextGenerationRequest:
-        """
-        Abstract method to create a new result request item.
-
-        :return: A new result request.
-        :rtype: TextGenerationRequest
-        """
-
-    @property
-    def type_(self) -> str:
-        """
-        Get the type of the request generator.
-
-        :return: The type of the request generator.
-        :rtype: str
-        """
-        return self._type
-
-    @property
-    def source(self) -> str:
-        """
-        Get the data source for the request generator.
-
-        :return: The data source.
-        :rtype: str
-        """
-        return self._source
-
-    @property
-    def tokenizer(self) -> PreTrainedTokenizer:
-        """
-        Get the tokenizer instance.
-
-        :return: The tokenizer instance.
-        :rtype: PreTrainedTokenizer
-        """
-        return self._tokenizer
-
-    @property
-    def mode(self) -> str:
-        """
-        Get the generation mode.
-
-        :return: The generation mode.
-        :rtype: str
-        """
-        return self._mode
-
-    @property
-    def async_queue_size(self) -> int:
-        """
-        Get the size of the request queue.
-
-        :return: The size of the request queue.
-        :rtype: int
-        """
-        return self._async_queue_size
-
-    def stop(self):
-        """
-        Stop the background task that populates the queue.
-        """
-        logger.info("Stopping RequestGenerator...")
-        self._stop_event.set()
-        if self._mode == "async":
-            self._thread.join()
-        logger.info("RequestGenerator stopped")
-
-    def _populate_queue(self):
-        """
-        Populate the request queue in the background.
-        """
-
-        while not self._stop_event.is_set():
-            with contextlib.suppress(Full):
-                if self._queue.qsize() < self._async_queue_size:
-                    item = self.create_item()
-                    self._queue.put(item, timeout=0.1)
-                    logger.debug(
-                        "Item added to queue. Current queue size: {}",
-                        self._queue.qsize(),
-                    )
-                else:
-                    time.sleep(0.1)
-
-        logger.info("RequestGenerator stopped populating queue")
diff --git a/src/guidellm/request/emulated.py b/src/guidellm/request/emulated.py
deleted file mode 100644
index 7d481cb7..00000000
--- a/src/guidellm/request/emulated.py
+++ /dev/null
@@ -1,397 +0,0 @@
-import json
-import math
-from dataclasses import dataclass
-from pathlib import Path
-from typing import Dict, List, Optional, Tuple, Union
-
-import numpy as np
-from loguru import logger
-from transformers import PreTrainedTokenizer  # type: ignore  # noqa: PGH003
-
-from guidellm.config import settings
-from guidellm.core.request import TextGenerationRequest
-from guidellm.request.base import GenerationMode, RequestGenerator
-from guidellm.utils import clean_text, filter_text, load_text, split_text
-
-__all__ = ["EmulatedConfig", "EmulatedRequestGenerator", "EndlessTokens"]
-
-
-@dataclass
-class EmulatedConfig:
-    """
-    Configuration for emulated text generation requests.
-
-    Args:
-        prompt_tokens (int): Number of prompt tokens.
-        prompt_tokens_variance (Optional[int]): Variance for prompt tokens.
-        prompt_tokens_min (Optional[int]): Minimum number of prompt tokens.
-        prompt_tokens_max (Optional[int]): Maximum number of prompt tokens.
-        generated_tokens (Optional[int]): Number of generated tokens.
-        generated_tokens_variance (Optional[int]): Variance for generated tokens.
-        generated_tokens_min (Optional[int]): Minimum number of generated tokens.
-        generated_tokens_max (Optional[int]): Maximum number of generated tokens.
-    """
-
-    @staticmethod
-    def create_config(config: Optional[Union[str, Path, Dict]]) -> "EmulatedConfig":
-        """
-        Create an EmulatedConfig instance from a configuration source.
-
-        :param config: Configuration source, can be a dictionary, JSON string,
-            key=value string, or file path.
-        :type config: Union[str, Path, Dict]
-        :return: An instance of EmulatedConfig.
-        :rtype: EmulatedConfig
-        :raises FileNotFoundError: If the configuration file is not found.
-        :raises ValueError: If the configuration format is invalid.
-        """
-        if not config:
-            logger.debug("Creating default configuration")
-            return EmulatedConfig(prompt_tokens=1024, generated_tokens=256)
-
-        if isinstance(config, dict):
-            logger.debug("Loading configuration from dict: {}", config)
-            return EmulatedConfig(**config)
-
-        if isinstance(config, Path) or (
-            isinstance(config, str) and (config.endswith(".json") or "{" in config)
-        ):
-            logger.debug("Loading configuration from json: {}", config)
-
-            if isinstance(config, str) and "{" in config:
-                json_text = config.strip()
-            else:
-                if isinstance(config, str):
-                    config = Path(config)
-
-                if not config.exists():
-                    raise FileNotFoundError(f"Configuration file not found: {config}")
-
-                json_text = config.read_text(encoding="utf-8")
-
-            json_dict = json.loads(json_text)
-
-            return EmulatedConfig(**json_dict)
-
-        if isinstance(config, str) and "=" in config:
-            logger.debug("Loading configuration from csv string: {}", config)
-            items = config.split(",")
-            config_dict = {}
-            for item in items:
-                key_value = item.strip().split("=")
-                if len(key_value) != 2:  # noqa: PLR2004
-                    raise ValueError(f"Unexpected format for item: {item}")
-                key = key_value[0].strip()
-                value = (
-                    int(key_value[1].strip())
-                    if key_value[1].isnumeric()
-                    else key_value[1]
-                )
-                config_dict[key] = value
-
-            return EmulatedConfig(**config_dict)  # type: ignore # noqa: PGH003
-
-        raise ValueError(
-            f"Invalid configuration given for creation of EmulatedConfig: {config}"
-        )
-
-    prompt_tokens: int
-    prompt_tokens_variance: Optional[int] = None
-    prompt_tokens_min: Optional[int] = None
-    prompt_tokens_max: Optional[int] = None
-
-    generated_tokens: Optional[int] = None
-    generated_tokens_variance: Optional[int] = None
-    generated_tokens_min: Optional[int] = None
-    generated_tokens_max: Optional[int] = None
-
-    @property
-    def prompt_tokens_range(self) -> Tuple[int, int]:
-        """
-        Get the range (min, max) of prompt tokens to generate.
-
-        :return: The range of prompt tokens.
-        :rtype: Tuple[int, int]
-        """
-        return self._token_range(
-            self.prompt_tokens,
-            self.prompt_tokens_variance,
-            self.prompt_tokens_min,
-            self.prompt_tokens_max,
-        )
-
-    @property
-    def output_tokens_range(self) -> Tuple[int, int]:
-        """
-        Get the range (min, max) of output tokens to generate.
-
-        :return: The range of generated tokens.
-        :rtype: Tuple[int, int]
-        """
-        if not self.generated_tokens:
-            return 0, 0
-
-        return self._token_range(
-            self.generated_tokens,
-            self.generated_tokens_variance,
-            self.generated_tokens_min,
-            self.generated_tokens_max,
-        )
-
-    def sample_prompt_tokens(self, rng: np.random.Generator) -> int:
-        """
-        Sample the number of prompt tokens to generate.
-
-        :param rng: The random number generator to use.
-        :type rng: np.random.Generator
-        :return: The number of prompt tokens to create.
-        :rtype: int
-        """
-        return self._sample_tokens(
-            self.prompt_tokens,
-            self.prompt_tokens_variance,
-            self.prompt_tokens_min,
-            self.prompt_tokens_max,
-            rng,
-        )
-
-    def sample_output_tokens(self, rng: np.random.Generator) -> Optional[int]:
-        """
-        Sample the number of output tokens to generate.
-
-        :param rng: The random number generator to use.
-        :type rng: np.random.Generator
-        :return: The number of output tokens to generate.
-        :rtype: Optional[int]
-        """
-        if not self.generated_tokens:
-            return None
-
-        return self._sample_tokens(
-            self.generated_tokens,
-            self.generated_tokens_variance,
-            self.generated_tokens_min,
-            self.generated_tokens_max,
-            rng,
-        )
-
-    @staticmethod
-    def _sample_tokens(
-        base: int,
-        variance: Optional[int],
-        min_tokens: Optional[int],
-        max_tokens: Optional[int],
-        rng: np.random.Generator,
-    ) -> int:
-        min_tokens, max_tokens = EmulatedConfig._token_range(
-            base, variance, min_tokens, max_tokens
-        )
-
-        if min_tokens == max_tokens:
-            return min_tokens
-
-        if not variance:
-            return rng.integers(min_tokens, max_tokens + 1)
-
-        rand = rng.normal(base, math.sqrt(variance))
-
-        return int(min(max(rand, min_tokens), max_tokens))
-
-    @staticmethod
-    def _token_range(
-        base: int,
-        variance: Optional[int],
-        min_tokens: Optional[int],
-        max_tokens: Optional[int],
-    ) -> Tuple[int, int]:
-        if not variance:
-            return (
-                min_tokens or base,
-                max_tokens or base,
-            )
-
-        min_tokens = min_tokens if min_tokens and min_tokens > 0 else 1
-        max_tokens = (
-            max_tokens if max_tokens and max_tokens > base else base + 5 * variance
-        )
-
-        return min_tokens, max_tokens
-
-
-class EndlessTokens(List[str]):
-    """
-    A list subclass that allows for endless data generation.
-    """
-
-    def __init__(
-        self,
-        data: Union[str, Path],
-        filter_start: Optional[Union[str, int]] = None,
-        filter_end: Optional[Union[str, int]] = None,
-        clean_text_args: Optional[Dict[str, bool]] = None,
-    ):
-        """
-        Initialize EndlessDataWords with data.
-
-        :param data: Source text data.
-        :type data: str
-        """
-        logger.debug("Loading data from: {}", data)
-        data = load_text(data)
-        data = filter_text(data, filter_start, filter_end)
-        data = (
-            clean_text(data)
-            if not clean_text_args
-            else clean_text(data, **clean_text_args)
-        )
-        self._tokens, self._token_separators, self._line_indices = split_text(data)
-
-        super().__init__(self._tokens)
-
-    @property
-    def line_indices(self) -> List[int]:
-        """
-        Get the list of start indices for lines.
-
-        :return: List of start indices.
-        :rtype: List[int]
-        """
-        return self._line_indices
-
-    def create_text(self, start: int, length: int) -> str:
-        """
-        Create a text snippet from the specified range.
-
-        :param start: Start index.
-        :type start: int
-        :param length: Length of the snippet.
-        :type length: int
-        :return: Text snippet.
-        :rtype: str
-        """
-        start = start % len(self)
-        text = ""
-        buff_token_sep = ""
-
-        for counter in range(length):
-            index = (start + counter) % len(self)
-            text += buff_token_sep + self[index]
-            buff_token_sep = self._token_separators[index]
-
-        return text
-
-
-class EmulatedRequestGenerator(RequestGenerator):
-    """
-    A request generator that generates emulated requests based on a configuration.
-
-    :param config: The configuration string, file path, or dictionary.
-    :type config: Union[str, Dict, Path]
-    :param random_seed: The random seed to use for generating requests.
-    :type random_seed: Optional[int]
-    :param tokenizer: The tokenizer instance or the name/config to use
-        for tokenizing prompts.
-    :type tokenizer: Optional[Union[str, PreTrainedTokenizer]]
-    :param mode: The generation mode, either 'async' or 'sync'.
-    :type mode: GenerationMode
-    :param async_queue_size: The size of the request queue.
-    :type async_queue_size: int
-    """
-
-    def __init__(
-        self,
-        config: Optional[Union[str, Path, Dict]],
-        random_seed: Optional[int] = None,
-        tokenizer: Optional[Union[str, PreTrainedTokenizer]] = None,
-        mode: GenerationMode = "async",
-        async_queue_size: int = 50,
-    ):
-        """
-        Initialize EmulatedRequestGenerator with configuration and tokenizer.
-
-        :param config: Configuration source, can be a dictionary,
-            JSON string, or file path.
-        :type config: Optional[Union[str, Path, Dict]]
-        :param random_seed: Optional seed for random number generator.
-        :type random_seed: Optional[int]
-        :param tokenizer: Tokenizer instance or configuration for tokenizing prompts.
-        :type tokenizer: Optional[Union[str, PreTrainedTokenizer]]
-        :param mode: Mode of request generation, either 'async' or 'sync'.
-        :type mode: str
-        :param async_queue_size: Size of the asynchronous queue.
-        :type async_queue_size: int
-        """
-        self._config = EmulatedConfig.create_config(config)
-        self._tokens = EndlessTokens(
-            settings.emulated_data.source,
-            settings.emulated_data.filter_start,
-            settings.emulated_data.filter_end,
-        )
-        self._rng = np.random.default_rng(random_seed)
-
-        # NOTE: Must be after all the parameters since the queue population
-        #       function requires attributes above
-        super().__init__(
-            type_="emulated",
-            source=str(config),
-            tokenizer=tokenizer,
-            mode=mode,
-            async_queue_size=async_queue_size,
-        )
-
-    def __len__(self) -> int:
-        raise NotImplementedError(
-            "Can't get the length of the emulated dataset. "
-            "Check the `--data-type` CLI parameter."
-        )
-
-    def create_item(self) -> TextGenerationRequest:
-        """
-        Create a new text generation request item from the data.
-
-        :return: A new text generation request.
-        :rtype: TextGenerationRequest
-        """
-        logger.debug("Creating new text generation request")
-        target_prompt_token_count = self._config.sample_prompt_tokens(self._rng)
-        prompt = self.sample_prompt(target_prompt_token_count)
-        prompt_token_count = len(self.tokenizer.tokenize(prompt))
-        output_token_count = self._config.sample_output_tokens(self._rng)
-        logger.debug("Generated prompt: {}", prompt)
-
-        return TextGenerationRequest(
-            prompt=prompt,
-            prompt_token_count=prompt_token_count,
-            output_token_count=output_token_count,
-        )
-
-    def sample_prompt(self, tokens: int) -> str:
-        """
-        Sample a prompt with the specified number of tokens.
-
-        :param tokens: Number of tokens for the prompt.
-        :type tokens: int
-        :return: Sampled prompt text.
-        :rtype: str
-        """
-        start_line_index = self._rng.integers(0, len(self._tokens.line_indices))
-
-        # binary search to find the proper number of tokens for the prompt
-        # this is because tokenizers differ in tokenization behavior
-        left = 0
-        right = left + 5 * tokens
-
-        while left < right:
-            mid = (left + right) // 2
-            prompt = self._tokens.create_text(start_line_index, mid)
-            token_count = len(self.tokenizer.tokenize(prompt))
-
-            if token_count == tokens:
-                return prompt
-
-            if token_count < tokens:
-                left = mid + 1
-            else:
-                right = mid
-
-        return self._tokens.create_text(start_line_index, left)
diff --git a/src/guidellm/request/file.py b/src/guidellm/request/file.py
deleted file mode 100644
index b187f7b4..00000000
--- a/src/guidellm/request/file.py
+++ /dev/null
@@ -1,83 +0,0 @@
-from pathlib import Path
-from typing import Optional, Union
-
-from loguru import logger
-from transformers import PreTrainedTokenizer  # type: ignore  # noqa: PGH003
-
-from guidellm.config import settings
-from guidellm.core.request import TextGenerationRequest
-from guidellm.request.base import GenerationMode, RequestGenerator
-from guidellm.utils import load_text_lines
-
-__all__ = ["FileRequestGenerator"]
-
-
-class FileRequestGenerator(RequestGenerator):
-    """
-    A request generator implementation for files.
-
-    :param path: The path to the file containing the data.
-    :type path: Optional[Union[str, Path]]
-    :param tokenizer: The tokenizer instance or the name/config to use
-        for tokenizing prompts.
-    :type tokenizer: Union[str, PreTrainedTokenizer]
-    :param mode: The generation mode, either 'async' or 'sync'.
-    :type mode: str
-    :param async_queue_size: The size of the request queue.
-    :type async_queue_size: int
-    """
-
-    def __init__(
-        self,
-        path: Optional[Union[str, Path]],
-        tokenizer: Optional[Union[str, PreTrainedTokenizer]] = None,
-        mode: GenerationMode = "async",
-        async_queue_size: int = 50,
-    ):
-        if not path:
-            raise ValueError("File path must be provided for FileRequestGenerator")
-
-        self._path = path
-        self._data = load_text_lines(
-            path,
-            filters=settings.dataset.preferred_data_columns,
-        )
-        self._iterator = iter(self._data)
-
-        # NOTE: Must be after all the parameters since the queue population
-        #       function requires attributes above
-        super().__init__(
-            type_="file",
-            source=str(path),
-            tokenizer=tokenizer,
-            mode=mode,
-            async_queue_size=async_queue_size,
-        )
-
-    def __len__(self) -> int:
-        """
-        Return the number of text lines.
-        """
-
-        return len(self._data)
-
-    def create_item(self) -> TextGenerationRequest:
-        """
-        Create a new result request item from the data.
-
-        :return: A new result request.
-        :rtype: TextGenerationRequest
-        """
-        logger.debug("Creating new request item from file data")
-
-        try:
-            data = next(self._iterator)
-        except StopIteration:
-            self._iterator = iter(self._data)
-            data = next(self._iterator)
-
-        token_count = len(self.tokenizer.tokenize(data))
-        request = TextGenerationRequest(prompt=data, prompt_token_count=token_count)
-        logger.debug("Created new TextGenerationRequest: {}", request)
-
-        return request
diff --git a/src/guidellm/request/loader.py b/src/guidellm/request/loader.py
new file mode 100644
index 00000000..de11e9c3
--- /dev/null
+++ b/src/guidellm/request/loader.py
@@ -0,0 +1,284 @@
+from abc import abstractmethod
+from pathlib import Path
+from typing import (
+    Any,
+    Dict,
+    Iterable,
+    Iterator,
+    List,
+    Literal,
+    Optional,
+    Union,
+)
+
+from datasets import Dataset, DatasetDict, IterableDataset, IterableDatasetDict
+from transformers import PreTrainedTokenizerBase  # type: ignore[import]
+
+from guidellm.dataset import ColumnInputTypes, load_dataset
+from guidellm.objects import StandardBaseModel
+from guidellm.request.request import GenerationRequest
+
+__all__ = [
+    "RequestLoaderDescription",
+    "RequestLoader",
+    "GenerativeRequestLoaderDescription",
+    "GenerativeRequestLoader",
+]
+
+
+class RequestLoaderDescription(StandardBaseModel):
+    type_: Literal["request_loader"] = "request_loader"
+
+
+class RequestLoader(Iterable):
+    @abstractmethod
+    def __iter__(self): ...
+
+    @abstractmethod
+    def __len__(self): ...
+
+    @property
+    @abstractmethod
+    def description(self) -> RequestLoaderDescription: ...
+
+
+class GenerativeRequestLoaderDescription(RequestLoaderDescription):
+    type_: Literal["generative_request_loader"] = "generative_request_loader"  # type: ignore[assignment]
+    data: str
+    data_args: Optional[Dict[str, Any]]
+    processor: str
+    processor_args: Optional[Dict[str, Any]]
+
+
+class GenerativeRequestLoader(RequestLoader):
+    DEFAULT_PROMPT_COLUMNS = [
+        "prompt",
+        "prompts",
+        "instruction",
+        "instructions",
+        "question",
+        "questions",
+        "input",
+        "inputs",
+        "context",
+        "content",
+        "conversation",
+        "conversations",
+        "text",
+    ]
+
+    def __init__(
+        self,
+        data: Union[
+            str,
+            Path,
+            Iterable[Union[str, Dict[str, Any]]],
+            Dataset,
+            DatasetDict,
+            IterableDataset,
+            IterableDatasetDict,
+        ],
+        data_args: Optional[Dict[str, Any]],
+        processor: Optional[Union[str, Path, PreTrainedTokenizerBase]],
+        processor_args: Optional[Dict[str, Any]],
+        shuffle: bool = True,
+        iter_type: Literal["finite", "infinite"] = "finite",
+        random_seed: int = 42,
+    ):
+        self.data = data
+        self.data_args = data_args
+        dataset, args_column_mappings = load_dataset(
+            data,
+            data_args,
+            processor,
+            processor_args,
+            random_seed,
+        )
+        self.dataset = dataset
+        self.processor = processor
+        self.processor_args = processor_args
+        self.shuffle = shuffle
+        self.iter_type = iter_type
+        self.random_seed = random_seed
+
+        self.column_mappings = self._create_column_mappings(args_column_mappings)
+        self.preserve_iter_state = iter_type == "infinite"  # ensure no caching requests
+        self._preserved_iter = None
+
+    def __iter__(self) -> Iterator[GenerationRequest]:
+        scope_create_count = 0
+
+        while (dataset_iter := self._get_dataset_iter(scope_create_count)) is not None:
+            scope_create_count += 1
+
+            for item in dataset_iter:
+                yield self._create_request(item)
+
+            self._preserved_iter = None
+
+    def __len__(self) -> int:
+        if self.iter_type == "finite":
+            return self.num_unique_items()
+
+        raise ValueError(f"Unable to determine length of dataset: {self.data}")
+
+    @property
+    def description(self) -> GenerativeRequestLoaderDescription:
+        return GenerativeRequestLoaderDescription(
+            data=str(self.data),
+            data_args=self.data_args,
+            processor=str(self.processor),
+            processor_args=self.processor_args,
+        )
+
+    def num_unique_items(self, raise_err: bool = True) -> int:
+        try:
+            return len(self.dataset)
+        except Exception:  # noqa: BLE001, S110
+            pass
+
+        dataset_size = self.dataset.info.dataset_size
+        if dataset_size is not None:
+            return dataset_size
+
+        if raise_err:
+            raise ValueError("Unable to determine number of items in the dataset")
+
+        return -1
+
+    def _create_column_mappings(
+        self,
+        args_column_mappings: Dict[ColumnInputTypes, str],
+    ) -> Dict[ColumnInputTypes, str]:
+        column_mappings: Dict[ColumnInputTypes, str] = {}
+
+        if "text_column" in args_column_mappings:
+            column_mappings["prompt_column"] = args_column_mappings["text_column"]
+        else:
+            column_mappings["prompt_column"] = self._extract_text_column()
+
+        if "prompt_tokens_count_column" in args_column_mappings:
+            column_mappings["prompt_tokens_count_column"] = args_column_mappings[
+                "prompt_tokens_count_column"
+            ]
+        elif prompt_tokens_count_column := self._extract_prompt_tokens_count_column():
+            column_mappings["prompt_tokens_count_column"] = prompt_tokens_count_column
+
+        if "output_tokens_count_column" in args_column_mappings:
+            column_mappings["output_tokens_count_column"] = args_column_mappings[
+                "output_tokens_count_column"
+            ]
+        elif output_tokens_count_column := self._extract_output_tokens_count_column():
+            column_mappings["output_tokens_count_column"] = output_tokens_count_column
+
+        return column_mappings
+
+    def _extract_text_column(self) -> str:
+        column_names = self._dataset_columns(
+            err_msg=(
+                "Unable to determine text column from dataset and it is required. "
+                "To specify the text column, set the 'text_column' key in the "
+                "'data_args' dictionary."
+            )
+        )
+
+        if not column_names:
+            raise ValueError(
+                "Unable to determine text column from dataset and it is required. "
+                "To specify the text column, set the 'text_column' key in the "
+                "'data_args' dictionary."
+            )
+
+        if len(column_names) == 1:
+            return column_names[0]
+
+        for def_column in self.DEFAULT_PROMPT_COLUMNS:
+            if def_column in column_names:
+                return def_column
+
+        raise ValueError(
+            f"Unable to determine text column from dataset columns: {column_names}. "
+            "To specify the text column, set the 'text_column' key in the "
+            "'data_args' dictionary."
+        )
+
+    def _extract_prompt_tokens_count_column(self) -> Optional[str]:
+        column_names = self._dataset_columns()
+
+        if column_names and "prompt_tokens_count" in column_names:
+            return "prompt_tokens_count"
+
+        if column_names and "prompt_tokens" in column_names:
+            return "prompt_tokens"
+
+        return None
+
+    def _extract_output_tokens_count_column(self) -> Optional[str]:
+        column_names = self._dataset_columns()
+
+        if column_names and "output_tokens_count" in column_names:
+            return "output_tokens_count"
+
+        if column_names and "output_tokens" in column_names:
+            return "output_tokens"
+
+        return None
+
+    def _dataset_columns(self, err_msg: Optional[str] = None) -> Optional[List[str]]:
+        try:
+            column_names = self.dataset.column_names
+
+            if not column_names and err_msg:
+                raise ValueError(f"No column names found in dataset: {self.data}")
+        except Exception as err:
+            if err_msg:
+                raise ValueError(err_msg) from err
+
+            column_names = None
+
+        return column_names
+
+    def _get_dataset_iter(
+        self, scope_create_count: int
+    ) -> Optional[Iterator[Dict[str, Any]]]:
+        if scope_create_count > 0 and self.iter_type != "infinite":
+            return None
+
+        if self.preserve_iter_state and self._preserved_iter is not None:
+            return self._preserved_iter
+
+        dataset = (
+            self.dataset
+            if not self.shuffle
+            else self.dataset.shuffle(seed=self.random_seed)
+        )
+
+        dataset_iter = iter(dataset)
+
+        if self.preserve_iter_state:
+            self._preserved_iter = dataset_iter
+
+        return dataset_iter
+
+    def _create_request(self, item: Dict[str, Any]) -> GenerationRequest:
+        prompt_tokens = (
+            item[self.column_mappings["prompt_tokens_count_column"]]
+            if "prompt_tokens_count_column" in self.column_mappings
+            else None
+        )
+        output_tokens = (
+            item[self.column_mappings["output_tokens_count_column"]]
+            if "output_tokens_count_column" in self.column_mappings
+            else None
+        )
+
+        return GenerationRequest(
+            request_type="text_completions",
+            content=item[self.column_mappings["prompt_column"]],
+            stats=(
+                {"prompt_tokens": prompt_tokens} if prompt_tokens is not None else {}
+            ),
+            constraints=(
+                {"output_tokens": output_tokens} if output_tokens is not None else {}
+            ),
+        )
diff --git a/src/guidellm/request/request.py b/src/guidellm/request/request.py
new file mode 100644
index 00000000..216ca0e9
--- /dev/null
+++ b/src/guidellm/request/request.py
@@ -0,0 +1,79 @@
+import uuid
+from typing import Any, Dict, Literal, Optional
+
+from pydantic import Field
+
+from guidellm.objects.pydantic import StandardBaseModel
+
+__all__ = ["GenerationRequest"]
+
+
+class GenerationRequest(StandardBaseModel):
+    """
+    A class representing a request for generation.
+    This class is used to encapsulate the details of a generation request,
+    including the request ID, type, content, parameters, statistics, and constraints.
+    It is designed to be used with the BackendRequestsWorker class to handle
+    the generation process.
+
+    :param request_id: The unique identifier for the request.
+    :param request_type: The type of request (e.g., text, chat).
+    :param content: The content for the request to send to the backend.
+        If request_type is 'text', this should be a string or list of strings
+        which will be resolved by backend.text_completions.
+        If request_type is 'chat', this should be a string,
+        a list of (str, Dict[str, Union[str, Dict[str, str]], Path, Image]),
+        or Any raw content which will be resolved by backend.chat_completions.
+        If raw content, raw_content=True must be passed in the params.
+    :param params: Additional parameters for the request passed in as kwargs.
+        For an http backend, these are passed into the body of the request.
+    :param stats: Statistics for the request, such as the number of prompt tokens.
+        Used for tracking and reporting purposes.
+    :param constraints: Constraints for the request, such as the maximum number
+        of output tokens. Used for controlling the behavior of the backend.
+    """
+
+    request_id: Optional[str] = Field(
+        default_factory=lambda: str(uuid.uuid4()),
+        description="The unique identifier for the request.",
+    )
+    request_type: Literal["text_completions", "chat_completions"] = Field(
+        default="text_completions",
+        description=(
+            "The type of request (e.g., text, chat). "
+            "If request_type='text_completions', resolved by backend.text_completions. "
+            "If request_typ='chat_completions', resolved by backend.chat_completions."
+        ),
+    )
+    content: Any = Field(
+        description=(
+            "The content for the request to send to the backend. "
+            "If request_type is 'text', this should be a string or list of strings "
+            "which will be resolved by backend.text_completions. "
+            "If request_type is 'chat', this should be a string, "
+            "a list of (str, Dict[str, Union[str, Dict[str, str]], Path, Image]), "
+            "or Any raw content which will be resolved by backend.chat_completions. "
+            "If raw content, raw_content=True must be passed in the params."
+        )
+    )
+    params: Dict[str, Any] = Field(
+        default_factory=dict,
+        description=(
+            "Additional parameters for the request that will be passed in as kwargs. "
+            "For an http backend, these are passed into the body of the request. "
+        ),
+    )
+    stats: Dict[Literal["prompt_tokens"], int] = Field(
+        default_factory=dict,
+        description=(
+            "Statistics for the request, such as the number of prompt tokens. "
+            "Used for tracking and reporting purposes."
+        ),
+    )
+    constraints: Dict[Literal["output_tokens"], int] = Field(
+        default_factory=dict,
+        description=(
+            "Constraints for the request, such as the maximum number of output tokens. "
+            "Used for controlling the behavior of the backend."
+        ),
+    )
diff --git a/src/guidellm/request/transformers.py b/src/guidellm/request/transformers.py
deleted file mode 100644
index 3fd24040..00000000
--- a/src/guidellm/request/transformers.py
+++ /dev/null
@@ -1,103 +0,0 @@
-from pathlib import Path
-from typing import Optional, Union
-
-from datasets import Dataset, DatasetDict, IterableDataset, IterableDatasetDict
-from loguru import logger
-from transformers import PreTrainedTokenizer  # type: ignore  # noqa: PGH003
-
-from guidellm.core.request import TextGenerationRequest
-from guidellm.request.base import GenerationMode, RequestGenerator
-from guidellm.utils import (
-    load_transformers_dataset,
-    resolve_transformers_dataset_column,
-)
-
-__all__ = ["TransformersDatasetRequestGenerator"]
-
-
-class TransformersDatasetRequestGenerator(RequestGenerator):
-    """
-    A request generator implementation for Hugging Face datasets.
-
-    :param dataset: The name of the Hugging Face dataset to use or the path
-        to a local dataset.
-    :type dataset_name: str
-    :param split: The split of the dataset to use (e.g., 'train', 'test').
-    :type split: str
-    :param column: The column/field to use for generating requests.
-    :type column: str
-    :param tokenizer: The tokenizer instance or the name/config to use
-        for tokenizing prompts.
-    :type tokenizer: Union[str, PreTrainedTokenizer]
-    :param mode: The generation mode, either 'async' or 'sync'.
-    :type mode: str
-    :param async_queue_size: The size of the request queue.
-    :type async_queue_size: int
-    """
-
-    def __init__(
-        self,
-        dataset: Union[
-            str, Path, DatasetDict, Dataset, IterableDatasetDict, IterableDataset
-        ],
-        split: Optional[str] = None,
-        column: Optional[str] = None,
-        tokenizer: Optional[Union[str, PreTrainedTokenizer]] = None,
-        mode: GenerationMode = "async",
-        async_queue_size: int = 50,
-        **kwargs,
-    ):
-        self._dataset = dataset
-        self._split = split
-        self._column = column
-        self._kwargs = kwargs
-
-        self._hf_dataset: Union[Dataset, IterableDataset] = load_transformers_dataset(
-            dataset, split=split, **kwargs
-        )
-        self._hf_column = resolve_transformers_dataset_column(
-            self._hf_dataset, column=column
-        )
-        self._hf_dataset_iterator = iter(self._hf_dataset)
-
-        # NOTE: Must be after all the parameters since the queue population
-        #       function requires attributes above
-        super().__init__(
-            type_="transformers_dataset",
-            source=str(dataset),
-            tokenizer=tokenizer,
-            mode=mode,
-            async_queue_size=async_queue_size,
-        )
-
-    def __len__(self) -> int:
-        if not isinstance(self._hf_dataset, Dataset):
-            raise ValueError("Can't get dataset size for IterableDataset object")
-        else:
-            return len(self._hf_dataset)
-
-    def create_item(self) -> TextGenerationRequest:
-        """
-        Create a new result request item from the dataset.
-
-        :return: A new result request.
-        :rtype: TextGenerationRequest
-        """
-
-        logger.debug("Creating new request item from dataset")
-
-        try:
-            data = next(self._hf_dataset_iterator)
-        except StopIteration:
-            self._hf_dataset_iterator = iter(self._hf_dataset)
-            data = next(self._hf_dataset_iterator)
-
-        prompt = data[self._hf_column]
-        token_count = len(self.tokenizer.tokenize(prompt))
-        request = TextGenerationRequest(
-            prompt=prompt,
-            prompt_token_count=token_count,
-        )
-        logger.debug(f"Created new TextGenerationRequest: {request}")
-
-        return request
diff --git a/src/guidellm/scheduler/__init__.py b/src/guidellm/scheduler/__init__.py
index 39485648..e26f3bb3 100644
--- a/src/guidellm/scheduler/__init__.py
+++ b/src/guidellm/scheduler/__init__.py
@@ -1,4 +1,52 @@
-from .load_generator import LoadGenerationMode, LoadGenerator
-from .scheduler import Scheduler, SchedulerResult
+from .result import (
+    SchedulerRequestInfo,
+    SchedulerRequestResult,
+    SchedulerResult,
+    SchedulerRunInfo,
+)
+from .scheduler import Scheduler
+from .strategy import (
+    AsyncConstantStrategy,
+    AsyncPoissonStrategy,
+    ConcurrentStrategy,
+    SchedulingStrategy,
+    StrategyType,
+    SynchronousStrategy,
+    ThroughputStrategy,
+    strategy_display_str,
+)
+from .types import RequestT, ResponseT
+from .worker import (
+    GenerativeRequestsWorker,
+    GenerativeRequestsWorkerDescription,
+    RequestsWorker,
+    ResolveStatus,
+    WorkerDescription,
+    WorkerProcessRequest,
+    WorkerProcessResult,
+)
 
-__all__ = ["LoadGenerationMode", "LoadGenerator", "Scheduler", "SchedulerResult"]
+__all__ = [
+    "SchedulerRequestInfo",
+    "SchedulerRequestResult",
+    "SchedulerResult",
+    "SchedulerRunInfo",
+    "Scheduler",
+    "AsyncConstantStrategy",
+    "AsyncPoissonStrategy",
+    "ConcurrentStrategy",
+    "SchedulingStrategy",
+    "StrategyType",
+    "SynchronousStrategy",
+    "ThroughputStrategy",
+    "strategy_display_str",
+    "RequestT",
+    "ResponseT",
+    "WorkerProcessRequest",
+    "WorkerProcessResult",
+    "ResolveStatus",
+    "WorkerDescription",
+    "RequestsWorker",
+    "GenerativeRequestsWorkerDescription",
+    "GenerativeRequestsWorker",
+]
diff --git a/src/guidellm/scheduler/load_generator.py b/src/guidellm/scheduler/load_generator.py
deleted file mode 100644
index f629752a..00000000
--- a/src/guidellm/scheduler/load_generator.py
+++ /dev/null
@@ -1,196 +0,0 @@
-import time
-from typing import Generator, Literal, Optional, get_args
-
-import numpy as np
-from loguru import logger
-
-__all__ = ["LoadGenerationMode", "LoadGenerator"]
-
-LoadGenerationMode = Literal["synchronous", "constant", "poisson", "throughput"]
-
-
-class LoadGenerator:
-    """
-    Load Generator class that generates timestamps for load generation.
-
-    This class supports multiple load generation modes: "constant", "poisson",
-    "throughput", and "synchronous". Each mode has its own method for generating
-    timestamps based on the rate provided during initialization.
-
-    :param mode: The mode of load generation. Valid options are "constant",
-        "poisson", "throughput", and "synchronous".
-    :type mode: LoadGenerationMode
-    :param rate: The rate at which to generate timestamps. This value is
-        interpreted differently depending on the mode.
-    :type rate: float
-
-    :raises ValueError: If an invalid mode is provided.
-    """
-
-    def __init__(self, mode: LoadGenerationMode, rate: Optional[float] = None):
-        """
-        Initialize the Load Generator with the mode and rate.
-
-        :param mode: The mode of load generation ("constant", "poisson", "throughput",
-            or "synchronous").
-        :type mode: LoadGenerationMode
-        :param rate: The rate at which to generate timestamps. In the "constant"
-            mode, this represents the frequency of events. In the "poisson" mode,
-            it represents the average frequency.
-        :type rate: Optional[float]
-        """
-        if mode not in get_args(LoadGenerationMode):
-            error = ValueError(
-                f"{mode} is not a valid Load Generation Mode. "
-                f"Valid options are {get_args(LoadGenerationMode)}"
-            )
-            logger.error(error)
-            raise error
-
-        if mode not in ["synchronous", "throughput"] and (rate is None or rate <= 0):
-            error = ValueError(f"Rate must be > 0 for mode: {mode}. Given: {rate}")
-            logger.error(error)
-            raise error
-
-        self._mode = mode
-        self._rate = rate
-        logger.debug(
-            "Initialized LoadGenerator with mode: {mode}, rate: {rate}",
-            mode=mode,
-            rate=rate,
-        )
-
-    @property
-    def mode(self) -> LoadGenerationMode:
-        """
-        Get the mode of load generation.
-
-        :return: The mode of load generation.
-        :rtype: LoadGenerationMode
-        """
-        return self._mode
-
-    @property
-    def rate(self) -> Optional[float]:
-        """
-        Get the rate of load generation.
-
-        :return: The rate of load generation.
-        :rtype: Optional[float]
-        """
-        return self._rate
-
-    def times(self) -> Generator[float, None, None]:
-        """
-        Generate timestamps for load generation based on the selected mode.
-
-        :return: A generator that yields timestamps at which each load
-            should be initiated.
-        :rtype: Generator[float, None, None]
-
-        :raises ValueError: If the mode is invalid.
-        """
-        logger.debug(f"Generating timestamps using mode: {self._mode}")
-
-        if self._mode == "throughput":
-            yield from self.throughput_times()
-        elif self._mode == "constant":
-            yield from self.constant_times()
-        elif self._mode == "poisson":
-            yield from self.poisson_times()
-        elif self._mode == "synchronous":
-            yield from self.synchronous_times()
-        else:
-            logger.error(f"Invalid mode encountered: {self._mode}")
-            raise ValueError(f"Invalid mode: {self._mode}")
-
-    def synchronous_times(self) -> Generator[float, None, None]:
-        """
-        Generate invalid timestamps for the "synchronous" mode.
-
-        :return: A generator that yields a constant invalid timestamp (-1.0).
-        :rtype: Generator[float, None, None]
-        """
-        logger.debug("Generating invalid timestamps for synchronous mode")
-        while True:
-            yield -1.0
-
-    def throughput_times(self) -> Generator[float, None, None]:
-        """
-        Generate timestamps at the maximum rate possible, returning the current time.
-
-        :return: A generator that yields the current time in seconds.
-        :rtype: Generator[float, None, None]
-        """
-        logger.debug("Generating timestamps at throughput rate")
-        while True:
-            yield time.time()
-
-    def constant_times(self) -> Generator[float, None, None]:
-        """
-        Generate timestamps at a constant rate based on the specified rate.
-
-        :return: A generator that yields timestamps incremented by 1/rate seconds.
-        :rtype: Generator[float, None, None]
-        """
-        logger.debug("Generating constant rate timestamps with rate: {}", self._rate)
-
-        if self._rate is None or self._rate == 0:
-            raise ValueError(
-                "Rate must be > 0 for constant mode, given: {}", self._rate
-            )
-
-        start_time = time.time()
-        time_increment = 1.0 / self._rate
-        counter = 0
-
-        while True:
-            yield_time = start_time + time_increment * counter
-            logger.debug(f"Yielding timestamp: {yield_time}")
-            yield yield_time
-            counter += 1
-
-    def poisson_times(self) -> Generator[float, None, None]:
-        """
-        Generate timestamps based on a Poisson process, where the number
-        of requests to be sent per second is drawn from a Poisson distribution.
-        The inter arrival time between requests is exponentially distributed.
-
-        :return: A generator that yields timestamps based on a Poisson distribution.
-        :rtype: Generator[float, None, None]
-        """
-        logger.debug("Generating Poisson rate timestamps with rate: {}", self._rate)
-
-        if self._rate is None or self._rate == 0:
-            raise ValueError("Rate must be > 0 for poisson mode, given: {}", self._rate)
-
-        time_tracker = time.time()
-        rng = np.random.default_rng()
-        time_increment = 1.0
-
-        while True:
-            num_requests = rng.poisson(self._rate)
-
-            if num_requests == 0:
-                yield time_tracker + time_increment
-            else:
-                inter_arrival_times = rng.exponential(1.0 / self._rate, num_requests)
-                logger.debug(
-                    "Calculated new inter-arrival times for poisson process: {}",
-                    inter_arrival_times,
-                )
-                arrival_time_tracker = time_tracker
-
-                for arrival_time in inter_arrival_times:
-                    arrival_time_tracker += arrival_time
-
-                    if arrival_time_tracker > time_tracker + time_increment:
-                        logger.debug(
-                            "Arrival time tracker: {} is greater than current time",
-                            arrival_time_tracker,
-                        )
-                        break
-
-                    yield arrival_time_tracker
-
-            time_tracker += time_increment  # Move on to the next time period
diff --git a/src/guidellm/scheduler/result.py b/src/guidellm/scheduler/result.py
new file mode 100644
index 00000000..ab1094ad
--- /dev/null
+++ b/src/guidellm/scheduler/result.py
@@ -0,0 +1,137 @@
+from typing import (
+    Generic,
+    Literal,
+    Optional,
+)
+
+from guidellm.objects import StandardBaseModel
+from guidellm.scheduler.strategy import SchedulingStrategy
+from guidellm.scheduler.types import RequestT, ResponseT
+
+__all__ = [
+    "SchedulerResult",
+    "SchedulerRequestResult",
+    "SchedulerRunInfo",
+    "SchedulerRequestInfo",
+]
+
+
+class SchedulerRunInfo(StandardBaseModel):
+    """
+    Information about the current run of the scheduler.
+    This class holds metadata about the scheduling run,
+    including the start and end times, the number of processes,
+    and the scheduling strategy used.
+    It also tracks the number of requests created, queued, pending,
+    and completed during the run.
+
+    :param start_time: The start time of the scheduling run.
+    :param end_time: The end time of the scheduling run;
+        if None, then this will be math.inf.
+    :param end_number: The maximum number of requests to be processed;
+        if None, then this will be math.inf.
+    :param processes: The number of processes used in the scheduling run.
+    :param strategy: The scheduling strategy used in the run.
+        This should be an instance of SchedulingStrategy.
+    :param created_requests: The number of requests created during the run.
+    :param queued_requests: The number of requests queued during the run.
+    :param scheduled_requests: The number of requests scheduled during the run.
+        (requests pending being sent to the worker but recieved by a process)
+    :param processing_requests: The number of requests actively being run.
+    :param completed_requests: The number of requests completed during the run.
+    """
+
+    start_time: float
+    end_time: float
+    end_number: float
+    processes: int
+    strategy: SchedulingStrategy
+
+    created_requests: int = 0
+    queued_requests: int = 0
+    scheduled_requests: int = 0
+    processing_requests: int = 0
+    completed_requests: int = 0
+
+
+class SchedulerRequestInfo(StandardBaseModel):
+    """
+    Information about a specific request run through the scheduler.
+    This class holds metadata about the request, including
+    the targeted start time, queued time, start time, end time,
+    and the process ID that handled the request.
+
+    :param targeted_start_time: The targeted start time for the request (time.time()).
+    :param queued_time: The time the request was queued (time.time()).
+    :param scheduled_time: The time the request was scheduled (time.time())
+        (any sleep time before the request was sent to the worker).
+    :param worker_start: The time the worker started processing request (time.time()).
+    :param worker_end: The time the worker finished processing request. (time.time()).
+    :param process_id: The ID of the underlying process that handled the request.
+    """
+
+    requested: bool = False
+    completed: bool = False
+    errored: bool = False
+    canceled: bool = False
+
+    targeted_start_time: float = -1
+    queued_time: float = -1
+    dequeued_time: float = -1
+    scheduled_time: float = -1
+    worker_start: float = -1
+    request_start: float = -1
+    request_end: float = -1
+    worker_end: float = -1
+    process_id: int = -1
+
+
+class SchedulerResult(StandardBaseModel):
+    """
+    The yielded, iterative result for a scheduler run.
+    These are triggered on the start and end of the run,
+    as well as on the start and end of each request.
+    Depending on the type, it will hold the request and response
+    along with information and statistics about the request and general run.
+
+    :param type_: The type of the result, which can be one of:
+        - "run_start": Indicates the start of the run.
+        - "run_complete": Indicates the completion of the run (teardown happens after).
+        - "request_start": Indicates the start of a request.
+        - "request_complete": Indicates the completion of a request.
+    :param request: The request that was processed.
+    :param response: The response from the worker for the request.
+    :param request_info: Information about the request, including
+        the targeted start time, queued time, start time, end time,
+        and the process ID that handled the request.
+    :param run_info: Information about the current run of the scheduler,
+        including the start and end times, the number of processes,
+        and the scheduling strategy used.
+        It also tracks the number of requests created, queued, pending,
+        and completed during the run.
+    """
+
+    pydantic_type: Literal["scheduler_result"] = "scheduler_result"
+    type_: Literal[
+        "run_start",
+        "run_complete",
+        "request_scheduled",
+        "request_start",
+        "request_complete",
+    ]
+    run_info: SchedulerRunInfo
+
+
+class SchedulerRequestResult(
+    SchedulerResult,
+    Generic[RequestT, ResponseT],
+):
+    pydantic_type: Literal["scheduler_request_result"] = "scheduler_request_result"  # type: ignore[assignment]
+    type_: Literal[
+        "request_scheduled",
+        "request_start",
+        "request_complete",
+    ]
+    request: RequestT
+    request_info: SchedulerRequestInfo
+    response: Optional[ResponseT] = None
diff --git a/src/guidellm/scheduler/scheduler.py b/src/guidellm/scheduler/scheduler.py
index 2f8c44fe..0be0ebb7 100644
--- a/src/guidellm/scheduler/scheduler.py
+++ b/src/guidellm/scheduler/scheduler.py
@@ -1,417 +1,366 @@
 import asyncio
 import math
+import multiprocessing
+import multiprocessing.queues
 import time
-from dataclasses import dataclass
-from typing import AsyncGenerator, Literal, Optional, Union, get_args
+from concurrent.futures import ProcessPoolExecutor
+from typing import (
+    Any,
+    AsyncGenerator,
+    Generic,
+    Iterable,
+    Iterator,
+    List,
+    Optional,
+    Tuple,
+    Union,
+)
 
 from loguru import logger
 
-from guidellm.backend import Backend, ResponseSummary, StreamingTextResponse
 from guidellm.config import settings
-from guidellm.core import (
-    TextGenerationBenchmark,
-    TextGenerationError,
-    TextGenerationRequest,
-    TextGenerationResult,
+from guidellm.scheduler.result import (
+    SchedulerRequestResult,
+    SchedulerResult,
+    SchedulerRunInfo,
+)
+from guidellm.scheduler.strategy import SchedulingStrategy
+from guidellm.scheduler.types import RequestT, ResponseT
+from guidellm.scheduler.worker import (
+    RequestsWorker,
+    WorkerProcessRequest,
+    WorkerProcessResult,
 )
-from guidellm.request import RequestGenerator
-from guidellm.scheduler.load_generator import LoadGenerationMode, LoadGenerator
-
-__all__ = ["Scheduler", "SchedulerResult"]
-
-
-@dataclass
-class SchedulerResult:
-    """
-    Represents the result of a single task execution within the Scheduler.
-
-    :param completed: Indicates if the task is completed.
-    :type completed: bool
-    :param count_total: Total number of tasks to be executed.
-    :type count_total: int
-    :param count_completed: Number of tasks that have been completed so far.
-    :type count_completed: int
-    :param report: Benchmark data for the task execution.
-    :type benchmark: TextGenerationBenchmark
-    :param current_result: The result of the current request, if any.
-    :type current_result: Optional[Union[TextGenerationResult, Exception]]
-    """
 
-    completed: bool
-    count_total: int
-    count_completed: int
-    benchmark: TextGenerationBenchmark
-    current_result: Optional[Union[TextGenerationResult, TextGenerationError]] = None
+__all__ = ["Scheduler"]
 
 
-class Scheduler:
+class Scheduler(Generic[RequestT, ResponseT]):
     """
-    Schedules and manages the execution of tasks for text generation requests.
-
-    :param generator: The request generator that produces text generation requests.
-    :type generator: RequestGenerator
-    :param backend: The backend that processes the requests.
-    :type backend: Backend
-    :param mode: The mode of load generation (e.g., synchronous, asynchronous).
-    :type mode: LoadGenerationMode
-    :param rate: The rate at which requests are generated, if applicable.
-    :type rate: Optional[float]
-    :param max_number: Maximum number of requests to be processed.
-    :type max_number: Optional[int]
-    :param max_duration: Maximum duration in seconds for which requests
-        should be processed.
-    :type max_duration: Optional[float]
-
-    :raises ValueError: If neither max_number nor max_duration is specified or
-        if they are not positive.
+    A class that handles the scheduling of requests to a worker.
+    This class is responsible for managing the lifecycle of the requests,
+    including their creation, queuing, and processing.
+    It uses a multiprocessing approach to handle requests concurrently
+    and efficiently, based on the specified scheduling strategy.
+    The Scheduler class is designed to work with a RequestsWorker,
+    which is an abstract base class that defines the interface for a worker
+    that can resolve requests asynchronously or synchronously.
+    The Scheduler class also supports different scheduling strategies,
+    including synchronous, throughput, and concurrent strategies.
+
+    :param worker: The worker that will process the requests.
+        This should be an instance of RequestsWorker.
+    :param request_loader: An iterable that generates requests.
+        This can be a list, generator, or any other iterable.
+        The requests will be processed by the worker.
     """
 
     def __init__(
         self,
-        generator: RequestGenerator,
-        backend: Backend,
-        mode: LoadGenerationMode = "synchronous",
-        rate: Optional[float] = None,
-        max_number: Optional[int] = None,
-        max_duration: Optional[float] = None,
+        worker: RequestsWorker[RequestT, ResponseT],
+        request_loader: Iterable[RequestT],
     ):
-        logger.info(
-            "Scheduler initialized with params: generator={}, backend={}, mode={}, "
-            "rate={}, max_number={}, max_duration={}",
-            generator,
-            backend,
-            mode,
-            rate,
-            max_number,
-            max_duration,
-        )
-
-        if mode not in get_args(LoadGenerationMode):
-            err = ValueError(
-                f"{mode} is not a valid Load Generation Mode. "
-                f"Valid options are {get_args(LoadGenerationMode)}"
-            )
-            logger.error(err)
-            raise err
-
-        if not max_number and not max_duration:
-            err = ValueError("Either max_number or max_duration must be specified")
-            logger.error(err)
-            raise err
-
-        if max_number and max_number <= 0:
-            err = ValueError(f"max_number must be > 0, given: {max_number}")
-            logger.error(err)
-            raise err
-
-        if max_duration and max_duration <= 0:
-            err = ValueError(f"max_duration must be > 0, given: {max_duration}")
-            logger.error(err)
-            raise err
-
-        if mode in ["constant", "poisson"] and not rate:
-            err = ValueError(f"Rate must be > 0 for mode: {mode}. Given: {rate}")
-            logger.error(err)
-            raise err
-
-        self._generator = generator
-        self._backend = backend
-        self._mode = mode
-        self._rate = rate
-        self._max_number = max_number
-        self._max_duration = max_duration
-
-        self._load_generator = LoadGenerator(mode, rate)
-
-    @property
-    def generator(self) -> RequestGenerator:
-        """
-        The request generator that produces text generation requests.
-
-        :return: The request generator instance.
-        :rtype: RequestGenerator
-        """
-        return self._generator
-
-    @property
-    def backend(self) -> Backend:
-        """
-        The backend that processes the requests.
-
-        :return: The backend instance.
-        :rtype: Backend
-        """
-        return self._backend
+        if not isinstance(worker, RequestsWorker):
+            raise ValueError(f"Invalid worker: {worker}")
 
-    @property
-    def mode(self) -> LoadGenerationMode:
-        """
-        The mode of load generation (e.g., synchronous, asynchronous).
-
-        :return: The load generation mode.
-        :rtype: LoadGenerationMode
-        """
-        return self._mode
-
-    @property
-    def rate(self) -> Optional[float]:
-        """
-        The rate at which requests are generated, if applicable.
+        if not isinstance(request_loader, Iterable):
+            raise ValueError(f"Invalid request_loader: {request_loader}")
 
-        :return: The rate of request generation.
-        :rtype: Optional[float]
-        """
-        return self._rate
+        self.worker = worker
+        self.request_loader = request_loader
 
-    @property
-    def max_number(self) -> Optional[int]:
+    async def run(
+        self,
+        scheduling_strategy: SchedulingStrategy,
+        max_number: Optional[int] = None,
+        max_duration: Optional[float] = None,
+    ) -> AsyncGenerator[
+        Union[SchedulerResult, SchedulerRequestResult[RequestT, ResponseT]], None
+    ]:
         """
-        Maximum number of requests to be processed.
-
-        :return: The maximum number of requests.
-        :rtype: Optional[int]
+        The main method that runs the scheduler.
+        This method is a generator that yields SchedulerResult objects
+        at the start and end of the run, as well as at the start and end
+        of each request.
+        It uses multiprocessing to handle requests concurrently
+        and efficiently, based on the specified scheduling strategy.
+        The method also handles the lifecycle of the requests,
+        including their creation, queuing, and processing.
+        The method is designed to be used as an asynchronous generator,
+        allowing it to be used with asyncio and other asynchronous frameworks.
+
+        :param scheduling_strategy: The scheduling strategy to use.
+            Specifies the times at which requests will be sent as well how many
+            worker processes are used and if requests are scheduled sync or async.
+            This can be one of the following:
+            - "synchronous": Requests are sent synchronously.
+            - "throughput": Requests are sent at the maximum rate possible.
+            - An instance of SchedulingStrategy.
+        :param max_number: The maximum number of requests to process.
+            If None, then no limit is set and either the iterator must be exhaustible
+            or the max_duration must be set.
+        :param max_duration: The maximum duration for the scheduling run.
+            If None, then no limit is set and either the iterator must be exhaustible
+            or the max_number must be set.
+        :return: An asynchronous generator that yields SchedulerResult objects.
+            Each SchedulerResult object contains information about the request,
+            the response, and the run information.
         """
-        return self._max_number
+        if scheduling_strategy is None or not isinstance(
+            scheduling_strategy, SchedulingStrategy
+        ):
+            raise ValueError(f"Invalid scheduling strategy: {scheduling_strategy}")
 
-    @property
-    def max_duration(self) -> Optional[float]:
-        """
-        Maximum duration in seconds for which requests should be processed.
+        if max_number is not None and max_number < 1:
+            raise ValueError(f"Invalid max_number: {max_number}")
 
-        :return: The maximum duration in seconds.
-        :rtype: Optional[float]
-        """
-        return self._max_duration
+        if max_duration is not None and max_duration < 0:
+            raise ValueError(f"Invalid max_duration: {max_duration}")
 
-    @property
-    def load_generator(self) -> LoadGenerator:
-        """
-        The load generator responsible for generating load based on mode and rate.
+        with multiprocessing.Manager() as manager, ProcessPoolExecutor(
+            max_workers=scheduling_strategy.processes_limit
+        ) as executor:
+            requests_iter: Optional[Iterator[Any]] = None
+            futures, requests_queue, responses_queue = await self._start_processes(
+                manager, executor, scheduling_strategy
+            )
+            run_info, requests_iter, times_iter = self._run_setup(
+                futures, scheduling_strategy, max_number, max_duration
+            )
+            yield SchedulerResult(
+                type_="run_start",
+                run_info=run_info,
+            )
 
-        :return: The load generator instance.
-        :rtype: LoadGenerator
-        """
-        return self._load_generator
+            try:
+                while True:
+                    # check errors and raise them
+                    for future in futures:
+                        if future.done() and (err := future.exception()) is not None:
+                            raise err
+
+                    if (
+                        requests_iter is None
+                        and run_info.completed_requests >= run_info.created_requests
+                    ):
+                        # we've exhausted all requests we've wanted to run
+                        # and yielded all responses
+                        break
+
+                    requests_iter = self._add_requests(
+                        requests_iter,
+                        times_iter,
+                        requests_queue,
+                        run_info,
+                    )
+                    await asyncio.sleep(0)  # enable requests to start
+
+                    iter_result = self._check_result_ready(
+                        responses_queue,
+                        run_info,
+                    )
+                    if iter_result is not None:
+                        yield iter_result
+
+                    # yield control to the event loop
+                    await asyncio.sleep(settings.default_async_loop_sleep)
+            except Exception as err:
+                raise RuntimeError(f"Scheduler run failed: {err}") from err
 
-    @property
-    def benchmark_mode(self) -> Literal["asynchronous", "synchronous", "throughput"]:
-        """
-        The report mode for the scheduler.
+            yield SchedulerResult(
+                type_="run_complete",
+                run_info=run_info,
+            )
 
-        :return: The report mode.
-        :rtype: Literal["asynchronous", "synchronous", "throughput"]
-        """
-        if self._mode == "synchronous":
-            return "synchronous"
+            await self._stop_processes(futures, requests_queue)
 
-        if self._mode == "throughput":
-            return "throughput"
+    async def _start_processes(
+        self,
+        manager,
+        executor: ProcessPoolExecutor,
+        scheduling_strategy: SchedulingStrategy,
+    ) -> Tuple[
+        List[asyncio.Future],
+        multiprocessing.Queue,
+        multiprocessing.Queue,
+    ]:
+        await self.worker.prepare_multiprocessing()
+        requests_queue = manager.Queue(
+            maxsize=scheduling_strategy.queued_requests_limit
+        )
+        responses_queue = manager.Queue()
+        per_process_requests_limit = scheduling_strategy.processing_requests_limit // (
+            scheduling_strategy.processes_limit
+        )
 
-        return "asynchronous"
+        futures = []
+        loop = asyncio.get_event_loop()
+        for process_id in range(scheduling_strategy.processes_limit):
+            if scheduling_strategy.processing_mode == "sync":
+                futures.append(
+                    loop.run_in_executor(
+                        executor,
+                        self.worker.process_loop_synchronous,
+                        requests_queue,
+                        responses_queue,
+                        process_id,
+                    )
+                )
+            elif scheduling_strategy.processing_mode == "async":
+                futures.append(
+                    loop.run_in_executor(
+                        executor,
+                        self.worker.process_loop_asynchronous,
+                        requests_queue,
+                        responses_queue,
+                        per_process_requests_limit,
+                        process_id,
+                    )
+                )
+            else:
+                raise ValueError(
+                    f"Invalid processing mode: {scheduling_strategy.processing_mode} "
+                    f"for strategy: {scheduling_strategy}"
+                )
 
-    async def run(self) -> AsyncGenerator[SchedulerResult, None]:
-        """
-        Run the scheduler to process requests based on the configured mode, rate,
-        maximum number, and maximum duration.
+        await asyncio.sleep(0.1)  # give time for processes to start
 
-        :yield: The result of each task executed by the scheduler.
-        :rtype: Generator[SchedulerResult, None, None]
-        """
-        logger.info("Starting Scheduler run")
+        return futures, requests_queue, responses_queue
 
-        benchmark = TextGenerationBenchmark(mode=self.benchmark_mode, rate=self.rate)
+    def _run_setup(
+        self,
+        processes: List[asyncio.Future],
+        scheduling_strategy: SchedulingStrategy,
+        max_number: Optional[int],
+        max_duration: Optional[float],
+    ) -> Tuple[SchedulerRunInfo, Iterator[Any], Iterator[float]]:
+        requests_iter = iter(self.request_loader)
         start_time = time.time()
-        end_time = start_time + self.max_duration if self.max_duration else math.inf
-        max_number = float(self.max_number) if self.max_number else math.inf
-        runner = self._run_sync if self._mode == "synchronous" else self._run_async
-        count_total = (
-            self.max_number
-            if self.max_number
-            else round(self.max_duration)
-            if self.max_duration
-            else 0
-        )
+        times_iter = iter(scheduling_strategy.request_times())
+        end_time = time.time() + (max_duration or math.inf)
+        end_number = max_number or math.inf
 
-        # yield initial result for progress tracking
-        yield SchedulerResult(
-            completed=False,
-            count_total=count_total,
-            count_completed=0,
-            benchmark=benchmark,
-        )
-
-        run_count = 0
-        async for res in runner(benchmark, end_time, max_number):
-            run_count += 1
-            count_completed = (
-                min(run_count, self.max_number)
-                if self.max_number
-                else round(time.time() - start_time)
-                if self.max_duration
-                else 0
-            )
-
-            yield SchedulerResult(
-                completed=False,
-                count_total=count_total,
-                count_completed=count_completed,
-                benchmark=benchmark,
-                current_result=res,
+        try:
+            # update end number if the request loader is finite and less than max
+            iter_length = len(self.request_loader)  # type: ignore[arg-type]
+            if 0 < iter_length < end_number:
+                end_number = iter_length
+        except Exception:  # noqa: BLE001, S110
+            pass
+
+        if end_number == math.inf and end_time is None:
+            logger.warning(
+                "No end number or end time set, "
+                "scheduler will run indefinitely until the request loader is exhausted."
             )
 
-        logger.info("Scheduler run completed")
-
-        yield SchedulerResult(
-            completed=True,
-            count_total=count_total,
-            count_completed=(
-                benchmark.request_count + benchmark.error_count
-                if self.max_number
-                else round(time.time() - start_time)
-                if self.max_duration
-                else 0
-            ),
-            benchmark=benchmark,
+        info = SchedulerRunInfo(
+            start_time=start_time,
+            end_time=end_time,
+            end_number=end_number,
+            processes=len(processes),
+            strategy=scheduling_strategy,
         )
 
-    async def _run_sync(
-        self, benchmark: TextGenerationBenchmark, end_time: float, max_number: float
-    ) -> AsyncGenerator[Union[TextGenerationResult, TextGenerationError], None]:
-        for index, (request, submit_at) in enumerate(
-            zip(self.generator, self.load_generator.times())
-        ):
-            if index >= max_number or time.time() >= end_time:
-                break
+        return info, requests_iter, times_iter
 
-            logger.debug(
-                "Running synchronous request={} at submit_at={}",
-                request,
-                submit_at,
+    def _add_requests(
+        self,
+        requests_iter: Optional[Iterator[Any]],
+        times_iter: Iterator[float],
+        requests_queue: multiprocessing.Queue,
+        run_info: SchedulerRunInfo,
+    ) -> Optional[Iterator[Any]]:
+        if requests_iter is not None:
+            try:
+                added_count = 0
+
+                while (
+                    not requests_queue.full()
+                    and added_count < settings.max_add_requests_per_loop
+                ):
+                    if run_info.created_requests >= run_info.end_number:
+                        raise StopIteration
+
+                    if (
+                        request_time := next(times_iter)
+                    ) >= run_info.end_time or time.time() >= run_info.end_time:
+                        raise StopIteration
+
+                    request = next(requests_iter)
+                    work_req: WorkerProcessRequest[RequestT] = WorkerProcessRequest(
+                        request=request,
+                        start_time=request_time,
+                        timeout_time=run_info.end_time,
+                        queued_time=time.time(),
+                    )
+                    requests_queue.put(work_req)
+
+                    run_info.created_requests += 1
+                    run_info.queued_requests += 1
+                    added_count += 1
+            except StopIteration:
+                # we've reached the limit number, limit time, or exhausted the requests
+                # set to None to stop adding more and tell the loop no more requests
+                requests_iter = None
+
+        return requests_iter
+
+    def _check_result_ready(
+        self,
+        responses_queue: multiprocessing.Queue,
+        run_info: SchedulerRunInfo,
+    ) -> Optional[SchedulerRequestResult[RequestT, ResponseT]]:
+        try:
+            process_response: WorkerProcessResult[RequestT, ResponseT] = (
+                responses_queue.get_nowait()
             )
-            benchmark.request_started()
-            result = await self._scheduled_request(request, submit_at, end_time)
-            if result is not None:
-                benchmark.request_completed(result)
-                logger.debug("Request completed with output: {}", result)
-                yield result
-
-    async def _run_async(
-        self, benchmark: TextGenerationBenchmark, end_time: float, max_number: float
-    ) -> AsyncGenerator[Union[TextGenerationResult, TextGenerationError], None]:
-        tasks = []
-        pending = asyncio.Semaphore(settings.max_concurrency)
-
-        for index, (request, submit_at) in enumerate(
-            zip(self.generator, self.load_generator.times())
-        ):
-            # wait for number of pending tasks to be >= max_concurrency
-            await pending.acquire()
-
-            if index >= max_number or time.time() >= end_time or submit_at >= end_time:
-                break
-
-            logger.debug(
-                "Running asynchronous request={} at submit_at={}",
-                request,
-                submit_at,
+        except multiprocessing.queues.Empty:  # type: ignore[attr-defined]
+            return None
+
+        if process_response.type_ == "request_scheduled":
+            run_info.queued_requests -= 1
+            run_info.scheduled_requests += 1
+
+            return SchedulerRequestResult(
+                type_="request_scheduled",
+                run_info=run_info,
+                request=process_response.request,
+                request_info=process_response.info,
+                response=None,
             )
 
-            def _completed(_task: asyncio.Task) -> None:
-                # NOTE: this is only ok because we don't use threads/processes
-                nonlocal pending
-                pending.release()
-                _res = _task.result()
+        if process_response.type_ == "request_start":
+            run_info.scheduled_requests -= 1
+            run_info.processing_requests += 1
 
-                if _res:
-                    benchmark.request_completed(_res)
-                    logger.debug("Request completed: {}", _res)
-
-            benchmark.request_started()
-            task = asyncio.create_task(
-                self._scheduled_request(request, submit_at, end_time)
+            return SchedulerRequestResult(
+                type_="request_start",
+                run_info=run_info,
+                request=process_response.request,
+                request_info=process_response.info,
+                response=None,
             )
-            task.add_done_callback(_completed)
-            tasks.append(task)
-
-            # release control to the event loop for other tasks
-            await asyncio.sleep(0)
 
-        for compl_task in asyncio.as_completed(tasks):
-            task_res = await compl_task
-            if task_res is not None:
-                yield task_res
+        if process_response.type_ == "request_complete":
+            run_info.processing_requests -= 1
+            run_info.completed_requests += 1
 
-    async def _scheduled_request(
-        self, request: TextGenerationRequest, submit_at: float, end_time: float
-    ) -> Optional[Union[TextGenerationResult, TextGenerationError]]:
-        try:
-            if submit_at > end_time:
-                raise asyncio.TimeoutError(
-                    f"Request submission time {submit_at} "
-                    f"is greater than end time {end_time}"
-                )
-
-            if submit_at > time.time():
-                await asyncio.sleep(submit_at - time.time())
-
-            timeout = (
-                end_time - time.time() if end_time and end_time < math.inf else None
+            return SchedulerRequestResult(
+                type_="request_complete",
+                run_info=run_info,
+                request=process_response.request,
+                request_info=process_response.info,
+                response=process_response.response,
             )
+        raise ValueError(f"Invalid process response type: {process_response}")
 
-            return await asyncio.wait_for(
-                self._resolve_text_request(request), timeout=timeout
-            )
-        except Exception as exc:  # noqa: BLE001
-            if not isinstance(exc, asyncio.TimeoutError):
-                logger.warning("Request {} failed: {}", request, exc)
-
-            return TextGenerationError(request=request, message=str(exc))
-
-    async def _resolve_text_request(
-        self, request: TextGenerationRequest
-    ) -> TextGenerationResult:
-        final_resp = None
-        first_token_time = None
-        last_token_time = None
-
-        if request.type_ == "text":
-            async for resp in self._backend.text_completions(  # type: ignore[attr-defined]
-                prompt=request.prompt,
-                id_=request.id,
-                prompt_token_count=request.prompt_token_count,
-                output_token_count=request.output_token_count,
-            ):
-                if isinstance(resp, StreamingTextResponse) and resp.type_ == "iter":
-                    first_token_time = first_token_time or resp.time
-                    last_token_time = resp.time
-
-                final_resp = resp
-        elif request.type_ == "chat":
-            async for resp in self._backend.chat_completions(  # type: ignore[attr-defined]
-                content=request.prompt,
-                id_=request.id,
-                prompt_token_count=request.prompt_token_count,
-                output_token_count=request.output_token_count,
-            ):
-                if isinstance(resp, StreamingTextResponse) and resp.type_ == "iter":
-                    first_token_time = first_token_time or resp.time
-                    last_token_time = resp.time
-
-                final_resp = resp
-
-        if not final_resp or not isinstance(final_resp, ResponseSummary):
-            raise ValueError(
-                f"Invalid final response for request: {request} "
-                f"and backend: {self._backend}, recieved: {final_resp}"
-            )
+    async def _stop_processes(
+        self,
+        futures: List[asyncio.Future],
+        requests_queue: multiprocessing.Queue,
+    ):
+        for _ in futures:
+            requests_queue.put(None)
 
-        return TextGenerationResult(
-            request=request,
-            prompt_token_count=final_resp.prompt_tokens,
-            output=final_resp.value,
-            output_token_count=resp.output_tokens,
-            start_time=resp.start_time,
-            end_time=resp.end_time,
-            first_token_time=first_token_time,
-            last_token_time=last_token_time,
-        )
+        await asyncio.gather(*futures)
diff --git a/src/guidellm/scheduler/strategy.py b/src/guidellm/scheduler/strategy.py
new file mode 100644
index 00000000..7e8d253a
--- /dev/null
+++ b/src/guidellm/scheduler/strategy.py
@@ -0,0 +1,493 @@
+import math
+import os
+import random
+import time
+from typing import (
+    Generator,
+    Literal,
+    Optional,
+    Union,
+)
+
+from pydantic import Field
+
+from guidellm.config import settings
+from guidellm.objects import StandardBaseModel
+
+__all__ = [
+    "StrategyType",
+    "SchedulingStrategy",
+    "SynchronousStrategy",
+    "ConcurrentStrategy",
+    "ThroughputStrategy",
+    "AsyncConstantStrategy",
+    "AsyncPoissonStrategy",
+    "strategy_display_str",
+]
+
+
+StrategyType = Literal["synchronous", "concurrent", "throughput", "constant", "poisson"]
+
+
+class SchedulingStrategy(StandardBaseModel):
+    """
+    An abstract base class for scheduling strategies.
+    This class defines the interface for scheduling requests and provides
+    a common structure for all scheduling strategies.
+    Subclasses should implement the `request_times` method to provide
+    specific scheduling behavior.
+
+    :param type_: The type of scheduling strategy to use.
+        This should be one of the predefined strategy types.
+    """
+
+    type_: Literal["strategy"] = Field(
+        description="The type of scheduling strategy schedule requests with.",
+    )
+
+    @property
+    def processing_mode(self) -> Literal["sync", "async"]:
+        """
+        The processing mode for the scheduling strategy, either 'sync' or 'async'.
+        This property determines how the worker processes are setup:
+        either to run synchronously with one request at a time or asynchronously.
+        This property should be implemented by subclasses to return
+        the appropriate processing mode.
+
+        :return: The processing mode for the scheduling strategy,
+            either 'sync' or 'async'.
+        """
+        return "async"
+
+    @property
+    def processes_limit(self) -> int:
+        """
+        The limit on the number of worker processes for the scheduling strategy.
+        It determines how many worker processes are created
+        for the scheduling strategy and must be implemented by subclasses.
+
+        :return: The number of processes for the scheduling strategy.
+        """
+        cpu_cores = os.cpu_count() or 1
+
+        return min(max(1, cpu_cores - 1), settings.max_worker_processes)
+
+    @property
+    def queued_requests_limit(self) -> Optional[int]:
+        """
+        The maximum number of queued requests for the scheduling strategy.
+        It determines how many requests can be queued at one time
+        for the scheduling strategy and must be implemented by subclasses.
+
+        :return: The maximum number of queued requests for the scheduling strategy.
+        """
+        return settings.max_concurrency
+
+    @property
+    def processing_requests_limit(self) -> int:
+        """
+        The maximum number of processing requests for the scheduling strategy.
+        It determines how many requests can be processed at one time
+        for the scheduling strategy and must be implemented by subclasses.
+
+        :return: The maximum number of processing requests for the scheduling strategy.
+        """
+        return settings.max_concurrency
+
+    def request_times(self) -> Generator[float, None, None]:
+        """
+        A generator that yields timestamps for when requests should be sent.
+        This method should be implemented by subclasses to provide specific
+        scheduling behavior.
+
+        :return: A generator that yields timestamps for request scheduling
+            or -1 for requests that should be sent immediately.
+        """
+        raise NotImplementedError("Subclasses must implement request_times() method.")
+
+
+class SynchronousStrategy(SchedulingStrategy):
+    """
+    A class representing a synchronous scheduling strategy.
+    This strategy schedules requests synchronously, one at a time,
+    with the maximum rate possible.
+    It inherits from the `SchedulingStrategy` base class and
+    implements the `request_times` method to provide the specific
+    behavior for synchronous scheduling.
+
+    :param type_: The synchronous StrategyType to schedule requests synchronously.
+    """
+
+    type_: Literal["synchronous"] = "synchronous"  # type: ignore[assignment]
+
+    @property
+    def processing_mode(self) -> Literal["sync"]:
+        """
+        The processing mode for the scheduling strategy, either 'sync' or 'async'.
+        This property determines how the worker processes are setup:
+        either to run synchronously with one request at a time or asynchronously.
+
+        :return: 'sync' for synchronous scheduling strategy
+            for the single worker process.
+        """
+        return "sync"
+
+    @property
+    def processes_limit(self) -> int:
+        """
+        The limit on the number of worker processes for the scheduling strategy.
+        It determines how many worker processes are created
+        for the scheduling strategy and must be implemented by subclasses.
+
+        :return: 1 for the synchronous scheduling strategy to limit
+            the worker processes to one.
+        """
+        return 1
+
+    @property
+    def queued_requests_limit(self) -> int:
+        """
+        The maximum number of queued requests for the scheduling strategy.
+        It determines how many requests can be queued at one time
+        for the scheduling strategy and must be implemented by subclasses.
+
+        :return: 1 for the synchronous scheduling strategy to limit
+            the queued requests to one that is ready to be processed.
+        """
+        return 1
+
+    @property
+    def processing_requests_limit(self) -> int:
+        """
+        The maximum number of processing requests for the scheduling strategy.
+        It determines how many requests can be processed at one time
+        for the scheduling strategy and must be implemented by subclasses.
+
+        :return: 1 for the synchronous scheduling strategy to limit
+            the processing requests to one that is ready to be processed.
+        """
+        return 1
+
+    def request_times(self) -> Generator[float, None, None]:
+        """
+        A generator that yields time.time() so requests are sent immediately,
+            while scheduling them synchronously.
+
+        :return: A generator that yields time.time() for immediate request scheduling.
+        """
+        while True:
+            yield time.time()
+
+
+class ConcurrentStrategy(SchedulingStrategy):
+    """
+    A class representing a concurrent scheduling strategy.
+    This strategy schedules requests concurrently with the specified
+    number of streams.
+    It inherits from the `SchedulingStrategy` base class and
+    implements the `request_times` method to provide the specific
+    behavior for concurrent scheduling.
+
+    :param type_: The concurrent StrategyType to schedule requests concurrently.
+    :param streams: The number of concurrent streams to use for scheduling requests.
+        Each stream runs synchronously with the maximum rate possible.
+        This must be a positive integer.
+    """
+
+    type_: Literal["concurrent"] = "concurrent"  # type: ignore[assignment]
+    streams: int = Field(
+        description=(
+            "The number of concurrent streams to use for scheduling requests. "
+            "Each stream runs sychronously with the maximum rate possible. "
+            "This must be a positive integer."
+        ),
+        gt=0,
+    )
+
+    @property
+    def processing_mode(self) -> Literal["sync"]:
+        """
+        The processing mode for the scheduling strategy, either 'sync' or 'async'.
+        This property determines how the worker processes are setup:
+        either to run synchronously with one request at a time or asynchronously.
+
+        :return: 'sync' for synchronous scheduling strategy
+            for the multiple worker processes equal to streams.
+        """
+        return "sync"
+
+    @property
+    def processes_limit(self) -> int:
+        """
+        The limit on the number of worker processes for the scheduling strategy.
+        It determines how many worker processes are created
+        for the scheduling strategy and must be implemented by subclasses.
+
+        :return: {self.streams} for the concurrent scheduling strategy to limit
+            the worker processes to the number of streams.
+        """
+        return self.streams
+
+    @property
+    def queued_requests_limit(self) -> int:
+        """
+        The maximum number of queued requests for the scheduling strategy.
+        It determines how many requests can be queued at one time
+        for the scheduling strategy and must be implemented by subclasses.
+
+        :return: {self.streams} for the concurrent scheduling strategy to limit
+            the queued requests to the number of streams that are ready to be processed.
+        """
+        return self.streams
+
+    @property
+    def processing_requests_limit(self) -> int:
+        """
+        The maximum number of processing requests for the scheduling strategy.
+        It determines how many requests can be processed at one time
+        for the scheduling strategy and must be implemented by subclasses.
+
+        :return: {self.streams} for the concurrent scheduling strategy to limit
+            the processing requests to the number of streams that ready to be processed.
+        """
+        return self.streams
+
+    def request_times(self) -> Generator[float, None, None]:
+        """
+        A generator that yields time.time() so requests are sent
+        immediately, while scheduling them concurrently with the specified
+        number of streams.
+
+        :return: A generator that yields time.time() for immediate request scheduling.
+        """
+        while True:
+            yield time.time()
+
+
+class ThroughputStrategy(SchedulingStrategy):
+    """
+    A class representing a throughput scheduling strategy.
+    This strategy schedules as many requests asynchronously as possible,
+    with the maximum rate possible.
+    It inherits from the `SchedulingStrategy` base class and
+    implements the `request_times` method to provide the specific
+    behavior for throughput scheduling.
+
+    :param type_: The throughput StrategyType to schedule requests asynchronously.
+    """
+
+    type_: Literal["throughput"] = "throughput"  # type: ignore[assignment]
+    max_concurrency: Optional[int] = Field(
+        default=None,
+        description=(
+            "The maximum number of concurrent requests to schedule. "
+            "If set to None, the concurrency value from settings will be used. "
+            "This must be a positive integer greater than 0."
+        ),
+        gt=0,
+    )
+
+    @property
+    def processing_mode(self) -> Literal["async"]:
+        """
+        The processing mode for the scheduling strategy, either 'sync' or 'async'.
+        This property determines how the worker processes are setup:
+        either to run synchronously with one request at a time or asynchronously.
+
+        :return: 'async' for asynchronous scheduling strategy
+            for the multiple worker processes handling requests.
+        """
+        return "async"
+
+    @property
+    def queued_requests_limit(self) -> int:
+        """
+        The maximum number of queued requests for the scheduling strategy.
+        It determines how many requests can be queued at one time
+        for the scheduling strategy and must be implemented by subclasses.
+
+        :return: The processing requests limit to ensure that there are enough
+            requests even for the worst case scenario where the max concurrent
+            requests are pulled at once for processing.
+        """
+        return self.processing_requests_limit
+
+    @property
+    def processing_requests_limit(self) -> int:
+        """
+        The maximum number of processing requests for the scheduling strategy.
+        It determines how many requests can be processed at one time
+        for the scheduling strategy and must be implemented by subclasses.
+
+        :return: {self.max_concurrency} for the throughput scheduling strategy to limit
+            the processing requests to the maximum concurrency.
+            If max_concurrency is None, then the default processing requests limit
+            will be used.
+        """
+        return self.max_concurrency or super().processing_requests_limit
+
+    def request_times(self) -> Generator[float, None, None]:
+        """
+        A generator that yields the start time.time() so requests are sent
+        immediately, while scheduling as many asynchronously as possible.
+
+        :return: A generator that yields the start time.time()
+            for immediate request scheduling.
+        """
+        start_time = time.time()
+
+        while True:
+            yield start_time
+
+
+class AsyncConstantStrategy(ThroughputStrategy):
+    """
+    A class representing an asynchronous constant scheduling strategy.
+    This strategy schedules requests asynchronously at a constant request rate
+    in requests per second.
+    If initial_burst is set, it will send an initial burst of math.floor(rate)
+    requests to reach the target rate.
+    This is useful to ensure that the target rate is reached quickly
+    and then maintained.
+    It inherits from the `SchedulingStrategy` base class and
+    implements the `request_times` method to provide the specific
+    behavior for asynchronous constant scheduling.
+
+    :param type_: The constant StrategyType to schedule requests asynchronously.
+    :param rate: The rate at which to schedule requests asynchronously in
+        requests per second. This must be a positive float.
+    :param initial_burst: True to send an initial burst of requests
+        (math.floor(self.rate)) to reach target rate.
+        False to not send an initial burst.
+    """
+
+    type_: Literal["constant"] = "constant"  # type: ignore[assignment]
+    rate: float = Field(
+        description=(
+            "The rate at which to schedule requests asynchronously in "
+            "requests per second. This must be a positive float."
+        ),
+        gt=0,
+    )
+    initial_burst: bool = Field(
+        default=True,
+        description=(
+            "True to send an initial burst of requests (math.floor(self.rate)) "
+            "to reach target rate. False to not send an initial burst."
+        ),
+    )
+
+    def request_times(self) -> Generator[float, None, None]:
+        """
+        A generator that yields timestamps for when requests should be sent.
+        This method schedules requests asynchronously at a constant rate
+        in requests per second.
+        If burst_time is set, it will send an initial burst of requests
+        to reach the target rate.
+        This is useful to ensure that the target rate is reached quickly
+        and then maintained.
+
+        :return: A generator that yields timestamps for request scheduling.
+        """
+        start_time = time.time()
+        constant_increment = 1.0 / self.rate
+
+        # handle bursts first to get to the desired rate
+        if self.initial_burst is not None:
+            # send an initial burst equal to the rate
+            # to reach the target rate
+            burst_count = math.floor(self.rate)
+            for _ in range(burst_count):
+                yield start_time
+
+            start_time += constant_increment
+
+        counter = 0
+
+        # continue with constant rate after bursting
+        while True:
+            yield start_time + constant_increment * counter
+            counter += 1
+
+
+class AsyncPoissonStrategy(ThroughputStrategy):
+    """
+    A class representing an asynchronous Poisson scheduling strategy.
+    This strategy schedules requests asynchronously at a Poisson request rate
+    in requests per second.
+    If initial_burst is set, it will send an initial burst of math.floor(rate)
+    requests to reach the target rate.
+    It inherits from the `SchedulingStrategy` base class and
+    implements the `request_times` method to provide the specific
+    behavior for asynchronous Poisson scheduling.
+
+    :param type_: The Poisson StrategyType to schedule requests asynchronously.
+    :param rate: The rate at which to schedule requests asynchronously in
+        requests per second. This must be a positive float.
+    :param initial_burst: True to send an initial burst of requests
+        (math.floor(self.rate)) to reach target rate.
+        False to not send an initial burst.
+    """
+
+    type_: Literal["poisson"] = "poisson"  # type: ignore[assignment]
+    rate: float = Field(
+        description=(
+            "The rate at which to schedule requests asynchronously in "
+            "requests per second. This must be a positive float."
+        ),
+        gt=0,
+    )
+    initial_burst: bool = Field(
+        default=True,
+        description=(
+            "True to send an initial burst of requests (math.floor(self.rate)) "
+            "to reach target rate. False to not send an initial burst."
+        ),
+    )
+    random_seed: int = Field(
+        default=42,
+        description=("The random seed to use for the Poisson distribution. "),
+    )
+
+    def request_times(self) -> Generator[float, None, None]:
+        """
+        A generator that yields timestamps for when requests should be sent.
+        This method schedules requests asynchronously at a Poisson rate
+        in requests per second.
+        The inter arrival time between requests is exponentially distributed
+        based on the rate.
+
+        :return: A generator that yields timestamps for request scheduling.
+        """
+        start_time = time.time()
+
+        if self.initial_burst is not None:
+            # send an initial burst equal to the rate
+            # to reach the target rate
+            burst_count = math.floor(self.rate)
+            for _ in range(burst_count):
+                yield start_time
+        else:
+            yield start_time
+
+        # set the random seed for reproducibility
+        rand = random.Random(self.random_seed)  # noqa: S311
+
+        while True:
+            inter_arrival_time = rand.expovariate(self.rate)
+            start_time += inter_arrival_time
+            yield start_time
+
+
+def strategy_display_str(strategy: Union[StrategyType, SchedulingStrategy]) -> str:
+    strategy_type = strategy if isinstance(strategy, str) else strategy.type_
+    strategy_instance = strategy if isinstance(strategy, SchedulingStrategy) else None
+
+    if strategy_type == "concurrent":
+        rate = f"@{strategy_instance.streams}" if strategy_instance else "@##"  # type: ignore[attr-defined]
+    elif strategy_type in ("constant", "poisson"):
+        rate = f"@{strategy_instance.rate:.2f}" if strategy_instance else "@#.##"  # type: ignore[attr-defined]
+    else:
+        rate = ""
+
+    return f"{strategy_type}{rate}"
diff --git a/src/guidellm/scheduler/types.py b/src/guidellm/scheduler/types.py
new file mode 100644
index 00000000..42535d71
--- /dev/null
+++ b/src/guidellm/scheduler/types.py
@@ -0,0 +1,7 @@
+from typing import TypeVar
+
+__all__ = ["RequestT", "ResponseT"]
+
+
+RequestT = TypeVar("RequestT")
+ResponseT = TypeVar("ResponseT")
diff --git a/src/guidellm/scheduler/worker.py b/src/guidellm/scheduler/worker.py
new file mode 100644
index 00000000..44444c51
--- /dev/null
+++ b/src/guidellm/scheduler/worker.py
@@ -0,0 +1,512 @@
+import asyncio
+import math
+import multiprocessing
+import multiprocessing.queues
+import time
+from abc import ABC, abstractmethod
+from dataclasses import dataclass
+from typing import (
+    Any,
+    AsyncGenerator,
+    Dict,
+    Generic,
+    Literal,
+    Optional,
+    Tuple,
+    Union,
+)
+
+from loguru import logger
+from pydantic import Field
+
+from guidellm.backend import (
+    Backend,
+    BackendType,
+    RequestArgs,
+    ResponseSummary,
+    StreamingTextResponse,
+)
+from guidellm.objects import StandardBaseModel
+from guidellm.request import GenerationRequest
+from guidellm.scheduler.result import SchedulerRequestInfo
+from guidellm.scheduler.types import RequestT, ResponseT
+
+__all__ = [
+    "WorkerProcessRequest",
+    "WorkerProcessResult",
+    "ResolveStatus",
+    "WorkerDescription",
+    "RequestsWorker",
+    "GenerativeRequestsWorkerDescription",
+    "GenerativeRequestsWorker",
+]
+
+
+@dataclass
+class WorkerProcessRequest(Generic[RequestT]):
+    request: RequestT
+    start_time: float
+    timeout_time: float
+    queued_time: float
+
+
+@dataclass
+class WorkerProcessResult(Generic[RequestT, ResponseT]):
+    type_: Literal["request_scheduled", "request_start", "request_complete"]
+    request: RequestT
+    response: Optional[ResponseT]
+    info: SchedulerRequestInfo
+
+
+@dataclass
+class ResolveStatus:
+    requested: bool
+    completed: bool
+    errored: bool
+    canceled: bool
+
+    request_start: float
+    request_end: float
+
+
+class WorkerDescription(StandardBaseModel):
+    type_: Literal["worker"] = "worker"
+
+
+class RequestsWorker(ABC, Generic[RequestT, ResponseT]):
+    """
+    An abstract base class for a worker that processes requests.
+    This class defines the interface for a worker that can resolve requests
+    asynchronously or synchronously within the Scheduler class.
+    Subclasses must implement the `resolve` method,
+    which takes a request directly given from the load generator,
+    along with the desired start_time for the request and a timeout_time.
+    The `resolve` method should return the response from the backend.
+    """
+
+    @property
+    @abstractmethod
+    def description(self) -> WorkerDescription:
+        """
+        An abstract property that must be implemented by subclasses.
+        This property should return a Serializable class representing the information
+        about the worker instance.
+        """
+        ...
+
+    @abstractmethod
+    async def prepare_multiprocessing(self):
+        """
+        An abstract method that must be implemented by subclasses.
+        This is useful for workers that have instance state that can not
+        be shared across processes and should be cleared out and re-initialized
+        for each new process.
+        """
+        ...
+
+    @abstractmethod
+    async def resolve(
+        self,
+        request: RequestT,
+        timeout_time: float,
+    ) -> Tuple[ResolveStatus, ResponseT]:
+        """
+        An abstract method that must be implemented by subclasses.
+        This method should handle the resolution of a request through asyncio,
+        including any necessary backend processing and response handling.
+
+        :param request: The request to be resolved generated by the load generator.
+        :param timeout_time: The timeout time for the request, if there is no timeout
+            given, then this will be math.inf.
+        :return: The response from the worker.
+        """
+        ...
+
+    async def get_request(
+        self, requests_queue: multiprocessing.Queue
+    ) -> Optional[WorkerProcessRequest[RequestT]]:
+        return await asyncio.to_thread(requests_queue.get)  # type: ignore[attr-defined]
+
+    async def send_result(
+        self,
+        results_queue: multiprocessing.Queue,
+        result: WorkerProcessResult[RequestT, ResponseT],
+    ):
+        await asyncio.to_thread(results_queue.put, result)  # type: ignore[attr-defined]
+
+    async def resolve_scheduler_request(
+        self,
+        request: Any,
+        queued_time: float,
+        dequeued_time: float,
+        start_time: float,
+        timeout_time: float,
+        results_queue: multiprocessing.Queue,
+        process_id: int,
+    ):
+        info = SchedulerRequestInfo(
+            targeted_start_time=start_time,
+            queued_time=queued_time,
+            dequeued_time=dequeued_time,
+            scheduled_time=time.time(),
+            process_id=process_id,
+        )
+        result: WorkerProcessResult[RequestT, ResponseT] = WorkerProcessResult(
+            type_="request_scheduled",
+            request=request,
+            response=None,
+            info=info,
+        )
+        asyncio.create_task(self.send_result(results_queue, result))
+
+        if (wait_time := start_time - time.time()) > 0:
+            await asyncio.sleep(wait_time)
+
+        info.worker_start = time.time()
+        result = WorkerProcessResult(
+            type_="request_start",
+            request=request,
+            response=None,
+            info=info,
+        )
+        asyncio.create_task(self.send_result(results_queue, result))
+
+        status, response = await self.resolve(request, timeout_time)
+        info.worker_end = time.time()
+        info.requested = status.requested
+        info.completed = status.completed
+        info.errored = status.errored
+        info.canceled = status.canceled
+        info.request_start = status.request_start
+        info.request_end = status.request_end
+        result = WorkerProcessResult(
+            type_="request_complete",
+            request=request,
+            response=response,
+            info=info,
+        )
+        asyncio.create_task(self.send_result(results_queue, result))
+
+    def process_loop_synchronous(
+        self,
+        requests_queue: multiprocessing.Queue,
+        results_queue: multiprocessing.Queue,
+        process_id: int,
+    ):
+        async def _process_runner():
+            while (
+                process_request := await self.get_request(requests_queue)
+            ) is not None:
+                dequeued_time = time.time()
+
+                await self.resolve_scheduler_request(
+                    request=process_request.request,
+                    queued_time=process_request.queued_time,
+                    dequeued_time=dequeued_time,
+                    start_time=process_request.start_time,
+                    timeout_time=process_request.timeout_time,
+                    results_queue=results_queue,
+                    process_id=process_id,
+                )
+
+        try:
+            asyncio.run(_process_runner())
+        except Exception as exc:  # noqa: BLE001
+            logger.error(
+                f"Error in worker process {process_id}: {exc}",
+                exc_info=True,
+                stack_info=True,
+            )
+
+    def process_loop_asynchronous(
+        self,
+        requests_queue: multiprocessing.Queue,
+        results_queue: multiprocessing.Queue,
+        max_concurrency: Optional[int],
+        process_id: int,
+    ):
+        async def _process_runner():
+            pending = asyncio.Semaphore(max_concurrency) if max_concurrency else None
+
+            while (
+                process_request := await self.get_request(requests_queue)
+            ) is not None:
+                dequeued_time = time.time()
+
+                if pending:
+                    await pending.acquire()
+
+                def _task_done(_: asyncio.Task):
+                    nonlocal pending
+                    if pending:
+                        pending.release()
+
+                task = asyncio.create_task(
+                    self.resolve_scheduler_request(
+                        request=process_request.request,
+                        queued_time=process_request.queued_time,
+                        dequeued_time=dequeued_time,
+                        start_time=process_request.start_time,
+                        timeout_time=process_request.timeout_time,
+                        results_queue=results_queue,
+                        process_id=process_id,
+                    )
+                )
+                task.add_done_callback(_task_done)
+                await asyncio.sleep(0)  # enable start task immediately
+
+        try:
+            asyncio.run(_process_runner())
+        except Exception as exc:  # noqa: BLE001
+            logger.error(
+                f"Error in worker process {process_id}: {exc}",
+                exc_info=True,
+                stack_info=True,
+            )
+
+
+class GenerativeRequestsWorkerDescription(WorkerDescription):
+    type_: Literal["generative_requests_worker"] = "generative_requests_worker"  # type: ignore[assignment]
+    backend_type: BackendType
+    backend_target: str
+    backend_model: str
+    backend_info: Dict[str, Any] = Field(
+        default_factory=dict,
+    )
+
+
+class GenerativeRequestsWorker(RequestsWorker[GenerationRequest, ResponseSummary]):
+    """
+    A class that handles the execution of requests using a backend.
+    This class is responsible for sending requests to the backend,
+    handling responses, and managing errors.
+
+    :param backend: The backend to use for handling requests.
+        This should be an instance of Backend such as an OpenAIHTTPBackend.
+    """
+
+    def __init__(self, backend: Backend):
+        self.backend = backend
+
+    @property
+    def description(self) -> GenerativeRequestsWorkerDescription:
+        """
+        Get the description of the worker.
+        :return: The description of the worker.
+        """
+        return GenerativeRequestsWorkerDescription(
+            backend_type=self.backend.type_,
+            backend_target=self.backend.target,
+            backend_model=self.backend.model or "None",
+            backend_info=self.backend.info,
+        )
+
+    async def prepare_multiprocessing(self):
+        """
+        Prepare the worker for multiprocessing.
+        This is useful for workers that have instance state that can not
+        be shared across processes and should be cleared out and re-initialized
+        for each new process.
+        """
+        await self.backend.prepare_multiprocessing()
+
+    def process_loop_synchronous(
+        self,
+        requests_queue: multiprocessing.Queue,
+        results_queue: multiprocessing.Queue,
+        process_id: int,
+    ):
+        asyncio.run(self.backend.validate())
+        super().process_loop_synchronous(
+            requests_queue=requests_queue,
+            results_queue=results_queue,
+            process_id=process_id,
+        )
+
+    def process_loop_asynchronous(
+        self,
+        requests_queue: multiprocessing.Queue,
+        results_queue: multiprocessing.Queue,
+        max_concurrency: Optional[int],
+        process_id: int,
+    ):
+        asyncio.run(self.backend.validate())
+        super().process_loop_asynchronous(
+            requests_queue=requests_queue,
+            results_queue=results_queue,
+            max_concurrency=max_concurrency,
+            process_id=process_id,
+        )
+
+    async def resolve(
+        self,
+        request: GenerationRequest,
+        timeout_time: float,
+    ) -> Tuple[ResolveStatus, ResponseSummary]:
+        """
+        Resolve a request by sending it to the backend and handling the response.
+        This method sends the request to the backend, waits for a response,
+        and handles any errors that may occur during the process.
+
+        :param request: The request to resolve.
+        :param timeout_time: The time to wait for a response before timing out.
+            If timeout_time is math.inf, the request will not timeout.
+        :return: A ResponseSummary object containing the response from the backend.
+            If an error occurs, the ResponseSummary will contain the error message.
+        """
+        resolve_start_time = time.time()
+        response = None
+        error: Optional[str] = None
+        status = ResolveStatus(
+            requested=False,
+            completed=False,
+            errored=False,
+            canceled=False,
+            request_start=-1,
+            request_end=-1,
+        )
+
+        try:
+            if timeout_time < time.time():
+                raise asyncio.TimeoutError(
+                    "The timeout time has already passed."
+                )  # exit early
+
+            status.requested = True
+            request_func, request_kwargs = self._create_request_func_kwargs(request)
+
+            async def _runner():
+                # wrap function so we can enforce timeout and
+                # still return the latest state from the backend
+                async for resp in request_func(**request_kwargs):  # type: ignore[operator]
+                    nonlocal response
+                    response = resp
+
+            await asyncio.wait_for(
+                _runner(),
+                timeout=timeout_time - time.time() if timeout_time < math.inf else None,
+            )
+
+            if not response:
+                raise ValueError(
+                    f"No response received for request: {request} "
+                    f"and backend: {self.backend}"
+                )
+            if not isinstance(response, ResponseSummary):
+                raise ValueError(
+                    f"Received no ResponseSummary for request: {request} "
+                    f"and backend: {self.backend}, received: {response}"
+                )
+
+            status.completed = True
+        except asyncio.TimeoutError:
+            error = "TimeoutError: The request timed out before completing."
+            status.errored = True
+            status.canceled = True
+        except Exception as exc:  # noqa: BLE001
+            error = str(exc)
+            status.errored = True
+
+        return self._handle_response(
+            status=status,
+            request=request,
+            response=response,
+            error=error,
+            resolve_start_time=resolve_start_time,
+        )
+
+    def _create_request_func_kwargs(
+        self,
+        request: GenerationRequest,
+    ) -> Tuple[
+        AsyncGenerator[Union[StreamingTextResponse, ResponseSummary], None],
+        Dict[str, Any],
+    ]:
+        request_func: AsyncGenerator[
+            Union[StreamingTextResponse, ResponseSummary], None
+        ]
+        request_kwargs: Dict[str, Any]
+
+        if request.request_type == "text_completions":
+            request_func = self.backend.text_completions  # type: ignore[assignment]
+            request_kwargs = {
+                "prompt": request.content,
+                "request_id": request.request_id,
+                "prompt_token_count": request.stats.get("prompt_tokens", None),
+                "output_token_count": request.constraints.get("output_tokens", None),
+                **request.params,
+            }
+        elif request.request_type == "chat_completions":
+            request_func = self.backend.chat_completions  # type: ignore[assignment]
+            request_kwargs = {
+                "content": request.content,
+                "request_id": request.request_id,
+                "prompt_token_count": request.stats.get("prompt_tokens", None),
+                "output_token_count": request.constraints.get("output_tokens", None),
+                **request.params,
+            }
+        else:
+            raise ValueError(
+                f"Invalid request type: {request.request_type} for {request}"
+            )
+
+        return request_func, request_kwargs
+
+    def _handle_response(
+        self,
+        status: ResolveStatus,
+        request: GenerationRequest,
+        response: Any,
+        error: Optional[str],
+        resolve_start_time: float,
+    ) -> Tuple[ResolveStatus, ResponseSummary]:
+        if response is None or not isinstance(
+            response, (ResponseSummary, StreamingTextResponse)
+        ):
+            # nothing received or invalid response, fill in defaults for error
+            if response:
+                error = str(
+                    ValueError(
+                        f"Invalid response: {type(response)} for request: {request}; "
+                    )
+                ) + (error or "")
+
+            response = ResponseSummary(
+                value="",
+                request_args=RequestArgs(
+                    target=self.backend.target,
+                    headers={},
+                    payload={},
+                ),
+                start_time=resolve_start_time,
+                end_time=status.request_end,
+                first_iter_time=None,
+                last_iter_time=None,
+                request_id=request.request_id,
+                error=error or "Unknown error",
+            )
+        elif isinstance(response, StreamingTextResponse):
+            response = ResponseSummary(
+                value=response.value,
+                request_args=RequestArgs(
+                    target=self.backend.target,
+                    headers={},
+                    payload={},
+                ),
+                start_time=response.start_time,
+                end_time=time.time(),
+                first_iter_time=response.first_iter_time,
+                last_iter_time=response.time if response.iter_count > 0 else None,
+                request_prompt_tokens=request.stats.get("prompt_tokens", None),
+                request_output_tokens=request.constraints.get("output_tokens", None),
+                response_prompt_tokens=None,
+                response_output_tokens=response.iter_count,
+                request_id=request.request_id,
+                error=error or "Unknown error",
+            )
+
+        response.error = error
+        status.request_start = response.start_time
+        status.request_end = response.end_time
+
+        return status, response
diff --git a/src/guidellm/utils/__init__.py b/src/guidellm/utils/__init__.py
index 2fdd8ca8..3620a3d3 100644
--- a/src/guidellm/utils/__init__.py
+++ b/src/guidellm/utils/__init__.py
@@ -1,40 +1,25 @@
-from .injector import create_report, inject_data
-from .progress import BenchmarkReportProgress
+from .colors import Colors
+from .hf_transformers import (
+    check_load_processor,
+)
+from .random import IntegerRangeSampler
 from .text import (
+    EndlessTextCreator,
     clean_text,
     filter_text,
-    is_path,
-    is_path_like,
-    is_url,
+    is_puncutation,
     load_text,
-    load_text_lines,
-    parse_text_objects,
-    split_lines_by_punctuation,
     split_text,
 )
-from .transformers import (
-    load_transformers_dataset,
-    resolve_transformers_dataset,
-    resolve_transformers_dataset_column,
-    resolve_transformers_dataset_split,
-)
 
 __all__ = [
-    "BenchmarkReportProgress",
-    "clean_text",
-    "create_report",
+    "IntegerRangeSampler",
+    "Colors",
+    "check_load_processor",
     "filter_text",
-    "inject_data",
-    "is_path",
-    "is_path_like",
-    "is_url",
-    "load_text",
-    "load_text_lines",
-    "load_transformers_dataset",
-    "parse_text_objects",
-    "resolve_transformers_dataset",
-    "resolve_transformers_dataset_column",
-    "resolve_transformers_dataset_split",
-    "split_lines_by_punctuation",
+    "clean_text",
     "split_text",
+    "load_text",
+    "is_puncutation",
+    "EndlessTextCreator",
 ]
diff --git a/src/guidellm/utils/cli_params.py b/src/guidellm/utils/cli_params.py
deleted file mode 100644
index 4e8800d2..00000000
--- a/src/guidellm/utils/cli_params.py
+++ /dev/null
@@ -1,34 +0,0 @@
-"""
-This module includes custom CLI parameters for the `click` package.
-"""
-
-from typing import Any, Optional
-
-from click import Context, Parameter, ParamType
-
-__all__ = ["MAX_REQUESTS"]
-
-
-class MaxRequestsType(ParamType):
-    """
-    Catch the `dataset` string parameter to determine the behavior of the Scheduler.
-    """
-
-    name = "max_requests"
-
-    def convert(
-        self, value: Any, param: Optional[Parameter], ctx: Optional[Context]
-    ) -> Any:
-        if isinstance(value, int):
-            return value
-
-        try:
-            return int(value)
-        except ValueError:
-            if value == "dataset":
-                return value
-            else:
-                self.fail(f"{value} is not a valid integer or 'dataset'", param, ctx)
-
-
-MAX_REQUESTS = MaxRequestsType()
diff --git a/src/guidellm/utils/colors.py b/src/guidellm/utils/colors.py
new file mode 100644
index 00000000..e4d60d52
--- /dev/null
+++ b/src/guidellm/utils/colors.py
@@ -0,0 +1,8 @@
+__all__ = ["Colors"]
+
+
+class Colors:
+    INFO: str = "light_steel_blue"
+    PROGRESS: str = "dark_slate_gray1"
+    SUCCESS: str = "chartreuse1"
+    ERROR: str = "orange_red1"
diff --git a/src/guidellm/utils/hf_transformers.py b/src/guidellm/utils/hf_transformers.py
new file mode 100644
index 00000000..2c298d2f
--- /dev/null
+++ b/src/guidellm/utils/hf_transformers.py
@@ -0,0 +1,35 @@
+from pathlib import Path
+from typing import Any, Dict, Optional, Union
+
+from transformers import AutoTokenizer, PreTrainedTokenizerBase  # type: ignore[import]
+
+__all__ = [
+    "check_load_processor",
+]
+
+
+def check_load_processor(
+    processor: Optional[Union[str, Path, PreTrainedTokenizerBase]],
+    processor_args: Optional[Dict[str, Any]],
+    error_msg: str,
+) -> PreTrainedTokenizerBase:
+    if processor is None:
+        raise ValueError(f"Processor/Tokenizer is required for {error_msg}.")
+
+    try:
+        if isinstance(processor, (str, Path)):
+            loaded = AutoTokenizer.from_pretrained(
+                processor,
+                **(processor_args or {}),
+            )
+        else:
+            loaded = processor
+    except Exception as err:
+        raise ValueError(
+            f"Failed to load processor/Tokenizer for {error_msg}."
+        ) from err
+
+    if not isinstance(loaded, PreTrainedTokenizerBase):
+        raise ValueError(f"Invalid processor/Tokenizer for {error_msg}.")
+
+    return loaded
diff --git a/src/guidellm/utils/injector.py b/src/guidellm/utils/injector.py
deleted file mode 100644
index fb5216aa..00000000
--- a/src/guidellm/utils/injector.py
+++ /dev/null
@@ -1,70 +0,0 @@
-from pathlib import Path
-from typing import Union
-
-from pydantic import BaseModel
-
-from guidellm.config import settings
-from guidellm.utils.text import load_text
-
-__all__ = ["create_report", "inject_data"]
-
-
-def create_report(model: BaseModel, output_path: Union[str, Path]) -> Path:
-    """
-    Creates a report from the model and saves it to the output path.
-
-    :param model: the model to serialize and inject
-    :type model: BaseModel
-    :param output_path: the path, either a file or a directory,
-        to save the report to. If a directory, the report will be saved
-        as "report.html" inside of the directory.
-    :type output_path: str
-    :return: the path to the saved report
-    :rtype: str
-    """
-    if not isinstance(output_path, Path):
-        output_path = Path(output_path)
-
-    html_content = load_text(settings.report_generation.source)
-    report_content = inject_data(
-        model,
-        html_content,
-        settings.report_generation.report_html_match,
-        settings.report_generation.report_html_placeholder,
-    )
-
-    if not output_path.suffix:
-        # assume directory, save as report.html
-        output_path = output_path / "report.html"
-
-    output_path.parent.mkdir(parents=True, exist_ok=True)
-    output_path.write_text(report_content)
-
-    return output_path
-
-
-def inject_data(
-    model: BaseModel,
-    html: str,
-    match: str,
-    placeholder: str,
-) -> str:
-    """
-    Injects the data from the model into the HTML while replacing the placeholder.
-
-    :param model: the model to serialize and inject
-    :type model: BaseModel
-    :param html: the html to inject the data into
-    :type html: str
-    :param match: the string to match in the html to find the placeholder
-    :type match: str
-    :param placeholder: the placeholder to replace with the model data
-        inside of the placeholder
-    :type placeholder: str
-    :return: the html with the model data injected
-    :rtype: str
-    """
-    model_str = model.json()
-    inject_str = match.replace(placeholder, model_str)
-
-    return html.replace(match, inject_str)
diff --git a/src/guidellm/utils/progress.py b/src/guidellm/utils/progress.py
deleted file mode 100644
index a1e1e798..00000000
--- a/src/guidellm/utils/progress.py
+++ /dev/null
@@ -1,199 +0,0 @@
-from datetime import datetime
-from typing import List
-
-from loguru import logger
-from rich.console import Group
-from rich.live import Live
-from rich.panel import Panel
-from rich.progress import (
-    BarColumn,
-    Progress,
-    SpinnerColumn,
-    TaskID,
-    TaskProgressColumn,
-    TextColumn,
-    TimeElapsedColumn,
-    TimeRemainingColumn,
-)
-
-__all__ = ["BenchmarkReportProgress"]
-
-
-class BenchmarkReportProgress:
-    """
-    Manages the progress display for benchmarks and report generation using Rich.
-
-    This class provides a visual representation of the benchmarking process
-    and report generation using Rich's progress bars and panels.
-    """
-
-    def __init__(self):
-        """
-        Initialize the BenchmarkReportProgress with default settings.
-
-        This method sets up the progress displays for both individual benchmarks
-        and the overall report, as well as initializing internal task management
-        structures.
-        """
-        logger.info("Initializing BenchmarkReportProgress instance")
-
-        self.benchmarks_progress = Progress(
-            TextColumn("[{task.fields[start_time_str]}]"),
-            SpinnerColumn(),
-            TaskProgressColumn(),
-            TextColumn("{task.description}"),
-            TextColumn(" "),
-            TextColumn(
-                "[bold cyan]({task.fields[req_per_sec]} req/sec avg)[/bold cyan]"
-            ),
-        )
-        self.benchmarks_panel = Panel(
-            self.benchmarks_progress,
-            title="Benchmarks",
-            title_align="left",
-            expand=True,
-        )
-        self.report_progress = Progress(
-            SpinnerColumn(),
-            TextColumn("Generating report..."),
-            BarColumn(bar_width=None),
-            TextColumn(
-                "({task.fields[completed_benchmarks]}/{task.fields[total_benchmarks]})"
-            ),
-            TextColumn("["),
-            TimeElapsedColumn(),
-            TextColumn("<"),
-            TimeRemainingColumn(),
-            TextColumn("]"),
-        )
-        self.render_group = Group(self.benchmarks_panel, self.report_progress)
-        self.live = Live(self.render_group, redirect_stdout=True, redirect_stderr=True)
-
-        self.report_task: TaskID = None  # type: ignore  # noqa: PGH003
-        self.benchmark_tasks: List[TaskID] = []
-        self.benchmark_tasks_started: List[bool] = []
-        self.benchmark_tasks_completed: List[bool] = []
-        self.benchmark_tasks_progress: List[float] = []
-
-    def start(self, task_descriptions: List[str]) -> None:
-        """
-        Starts the live progress display and initializes benchmark tasks.
-
-        :param task_descriptions: List of descriptions for each benchmark task.
-        :type task_descriptions: List[str]
-        """
-        logger.info(
-            "Starting BenchmarkReportProgress with task descriptions: {}",
-            task_descriptions,
-        )
-        self.live.start()
-
-        for task_description in task_descriptions:
-            logger.debug("Adding task with description: {}", task_description)
-            task_id = self.benchmarks_progress.add_task(
-                task_description,
-                start=False,
-                total=None,
-                start_time_str="--:--:--",
-                req_per_sec="#.##",
-            )
-            self.benchmark_tasks.append(task_id)
-            self.benchmark_tasks_started.append(False)
-            self.benchmark_tasks_completed.append(False)
-            self.benchmark_tasks_progress.append(0)
-
-        self.report_task = self.report_progress.add_task(
-            "",
-            total=len(self.benchmark_tasks) * 100,  # 100 points per report
-            completed_benchmarks=0,
-            total_benchmarks=len(task_descriptions),
-        )
-        logger.info("Initialized {} benchmark tasks", len(task_descriptions))
-
-    def update_benchmark(
-        self,
-        index: int,
-        description: str,
-        completed: bool,
-        completed_count: int,
-        completed_total: int,
-        start_time: float,
-        req_per_sec: float,
-    ) -> None:
-        """
-        Updates the progress of a specific benchmark task.
-
-        :param index: Index of the benchmark task to update.
-        :type index: int
-        :param description: Description of the current benchmark task.
-        :type description: str
-        :param completed: Flag indicating if the benchmark is completed.
-        :type completed: bool
-        :param completed_count: Number of completed operations for the task.
-        :type completed_count: int
-        :param completed_total: Total number of operations for the task.
-        :type completed_total: int
-        :param start_time: Start time of the benchmark in timestamp format.
-        :type start_time: float
-        :param req_per_sec: Average requests per second.
-        :type req_per_sec: float
-        :raises ValueError: If trying to update a completed benchmark.
-        """
-
-        if self.benchmark_tasks_completed[index]:
-            err = ValueError(f"Benchmark {index} already completed")
-            logger.error("Error updating benchmark: {}", err)
-            raise err
-
-        if not self.benchmark_tasks_started[index]:
-            self.benchmark_tasks_started[index] = True
-            self.benchmarks_progress.start_task(self.benchmark_tasks[index])
-            logger.info("Starting benchmark task at index {}", index)
-
-        if completed:
-            self.benchmark_tasks_completed[index] = True
-            self.benchmark_tasks_progress[index] = 100
-            self.benchmarks_progress.stop_task(self.benchmark_tasks[index])
-            logger.info("Completed benchmark task at index {}", index)
-
-        self.benchmark_tasks_progress[index] = completed_count / completed_total * 100
-        self.benchmarks_progress.update(
-            self.benchmark_tasks[index],
-            description=description,
-            total=completed_total,
-            completed=completed_count if not completed else completed_total,
-            req_per_sec=(f"{req_per_sec:.2f}" if req_per_sec else "#.##"),
-            start_time_str=(
-                datetime.fromtimestamp(start_time).strftime("%H:%M:%S")
-                if start_time
-                else "--:--:--"
-            ),
-        )
-        logger.debug(
-            "Updated benchmark task at index {}: {}% complete",
-            index,
-            self.benchmark_tasks_progress[index],
-        )
-        self.report_progress.update(
-            self.report_task,
-            total=len(self.benchmark_tasks) * 100,
-            completed=sum(self.benchmark_tasks_progress),
-            completed_benchmarks=sum(self.benchmark_tasks_completed),
-            total_benchmarks=len(self.benchmark_tasks),
-        )
-
-    def finish(self) -> None:
-        """
-        Marks the overall report task as finished and stops the live display.
-        """
-        logger.info("Finishing BenchmarkReportProgress")
-        self.report_progress.update(
-            self.report_task,
-            total=len(self.benchmark_tasks) * 100,
-            completed=len(self.benchmark_tasks) * 100,
-            completed_benchmarks=len(self.benchmark_tasks),
-            total_benchmarks=len(self.benchmark_tasks),
-        )
-        self.report_progress.stop_task(self.report_task)
-        self.live.stop()
-        logger.info("BenchmarkReportProgress finished and live display stopped")
diff --git a/src/guidellm/utils/random.py b/src/guidellm/utils/random.py
new file mode 100644
index 00000000..fefef4f1
--- /dev/null
+++ b/src/guidellm/utils/random.py
@@ -0,0 +1,42 @@
+import random
+from typing import Iterator, Optional
+
+__all__ = ["IntegerRangeSampler"]
+
+
+class IntegerRangeSampler:
+    def __init__(
+        self,
+        average: int,
+        variance: Optional[int],
+        min_value: Optional[int],
+        max_value: Optional[int],
+        random_seed: int,
+    ):
+        self.average = average
+        self.variance = variance
+        self.min_value = min_value
+        self.max_value = max_value
+        self.seed = random_seed
+        self.rng = random.Random(random_seed)  # noqa: S311
+
+    def __iter__(self) -> Iterator[int]:
+        calc_min = self.min_value
+        if calc_min is None:
+            calc_min = max(
+                1, self.average - 5 * self.variance if self.variance else self.average
+            )
+        calc_max = self.max_value
+        if calc_max is None:
+            calc_max = (
+                self.average + 5 * self.variance if self.variance else self.average
+            )
+
+        while True:
+            if calc_min == calc_max:
+                yield calc_min
+            elif not self.variance:
+                yield self.rng.randint(calc_min, calc_max + 1)
+            else:
+                rand = self.rng.gauss(self.average, self.variance)
+                yield round(max(calc_min, min(calc_max, rand)))
diff --git a/src/guidellm/utils/text.py b/src/guidellm/utils/text.py
index f8c5038c..92a0284a 100644
--- a/src/guidellm/utils/text.py
+++ b/src/guidellm/utils/text.py
@@ -1,60 +1,26 @@
-import csv
-import json
+import gzip
 import re
+from importlib.resources import as_file, files  # type: ignore[attr-defined]
 from pathlib import Path
-from typing import Any, Dict, List, Optional, Tuple, Union
-from urllib.parse import urlparse
+from typing import List, Optional, Union
 
 import ftfy
-import requests
-import yaml
+import httpx
 from loguru import logger
 
+from guidellm import data as package_data
 from guidellm.config import settings
 
 __all__ = [
-    "clean_text",
     "filter_text",
-    "is_path",
-    "is_path_like",
-    "is_url",
-    "load_text",
-    "load_text_lines",
-    "parse_text_objects",
-    "split_lines_by_punctuation",
+    "clean_text",
     "split_text",
+    "load_text",
+    "is_puncutation",
+    "EndlessTextCreator",
 ]
 
-
-NAME_TITLES = [
-    "Mr.",
-    "Mrs.",
-    "Ms.",
-    "Dr.",
-    "Prof.",
-    "Jr.",
-    "Sr.",
-    "St.",
-    "Lt.",
-    "Col.",
-    "Gen.",
-    "Rep.",
-    "Sen.",
-    "Gov.",
-    "Pres.",
-]
-SENTENCE_REGEX = r'[^.!?]*[.!?]["\']?\s*(?=[A-Z])'
-MAX_EXTENSION_LENGTH = 8
 MAX_PATH_LENGTH = 4096
-EXTENSION_TYPES = {
-    "csv": "csv",
-    "jsonl": "jsonl",
-    "json": "json",
-    "yaml": "yaml",
-    "yml": "yaml",
-    "txt": "txt",
-    "text": "txt",
-}
 
 
 def filter_text(
@@ -95,216 +61,17 @@ def filter_text(
     return text
 
 
-def clean_text(
-    text: str,
-    fix_encoding: bool = True,
-    clean_whitespace: bool = False,
-    remove_empty_lines: bool = False,
-    force_new_line_punctuation: bool = False,
-) -> str:
-    """
-    Clean text by fixing encoding, cleaning whitespace, removing empty lines,
-    and forcing new line punctuation
-
-    :param text: the text to clean
-    :param fix_encoding: True to fix the encoding of the text, False to leave as is
-    :param clean_whitespace: True to clean the whitespace in the text
-        (remove extra spaces, tabs, etc), False to leave as is
-    :param remove_empty_lines: True to remove empty lines from the text
-        (lines with only whitespace), False to leave as is
-    :param force_new_line_punctuation: True to force new lines at punctuation
-        (line ends in a period, exclamation point, or question mark),
-        False to leave as is
-    :return: The cleaned text
-    """
-
-    if fix_encoding:
-        text = ftfy.fix_text(text)
-
-    if clean_whitespace:
-        text = "\n".join(
-            [re.sub(r"\s+", " ", line).strip() for line in text.splitlines()]
-        )
-
-    if remove_empty_lines:
-        text = "\n".join([line for line in text.splitlines() if line.strip()])
-
-    if force_new_line_punctuation:
-        # first remove any existing new lines
-        text = " ".join(line for line in text.splitlines() if line.strip())
-        lines = split_lines_by_punctuation(text)
-        text = "\n".join(lines)
-
-    return text
-
-
-def split_lines_by_punctuation(text: str) -> List[str]:
-    """
-    Split text into lines based on punctuation
-
-    :param text: the text to split
-    :return: the list of lines
-    """
-
-    lines = []
-    current_line = ""
-    skip_next = False
-
-    for index, char in enumerate(text):
-        if skip_next:
-            skip_next = False
-            continue
-
-        current_line += char
-
-        if char not in [".", "!", "?"]:
-            # must match end of sentence punctuation
-            continue
-
-        # if this is the character for a title, don't split
-        if any(current_line.endswith(title) for title in NAME_TITLES):
-            continue
-
-        char_next_1 = text[index + 1] if index + 1 < len(text) else None
-        char_next_2 = text[index + 2] if index + 2 < len(text) else None
-        char_next_3 = text[index + 3] if index + 3 < len(text) else None
-
-        next_is_space = char_next_1 and char_next_1.isspace()
-        next_is_quote_and_space = char_next_1 in ["'", '"'] and char_next_2 == " "
-
-        # next character must be a space or a quote, otherwise skip
-        if not next_is_space and not next_is_quote_and_space:
-            continue
-
-        # after this, next character must be an upper case letter
-        upper_char = char_next_3 if next_is_quote_and_space else char_next_2
-        next_is_upper = upper_char and (
-            upper_char.isupper() or upper_char in ["'", '"']
-        )
+def clean_text(text: str) -> str:
+    return re.sub(r"\s+", " ", ftfy.fix_text(text)).strip()
 
-        if not next_is_upper:
-            continue
 
-        # if next char is a quote, add it and skip next
-        if next_is_quote_and_space:
-            current_line += text[index + 1]
-            skip_next = True
+def split_text(text: str, split_punctuation: bool = False) -> List[str]:
+    text = clean_text(text)
 
-        lines.append(current_line.strip())
-        current_line = ""
+    if split_punctuation:
+        return re.findall(r"[\w]+|[.,!?;]", text)
 
-    if current_line:
-        lines.append(current_line.strip())
-
-    return lines
-
-
-def is_url(url: str) -> bool:
-    """
-    Check if a string is a URL
-
-    :param url: the string to check
-    :return: True if the string is a URL, False if not
-    """
-    try:
-        result = urlparse(url)
-        return all([result.scheme, result.netloc])
-    except Exception:  # noqa: BLE001
-        return False
-
-
-def is_path(path: Any) -> bool:
-    """
-    Check if a string is a path
-
-    :param path: the string to check
-    :return: True if the string is a path, False if not
-    """
-    if not isinstance(path, (str, Path)):
-        return False
-
-    if isinstance(path, str):
-        path = Path(path)
-
-    return path.exists()
-
-
-def is_path_like(path: Any, enforce_file: bool = False) -> bool:
-    """
-    Check if a string has a path like structure where it doesn't need to exist
-
-    :param path: the string to check
-    :param enforce_file: True if the path should be a file, False if not
-    :return: True if the string is path like, False if not
-    """
-    # if path isn't a str or Path, it's not a path
-    if not isinstance(path, (str, Path)):
-        return False
-
-    if isinstance(path, Path):
-        path = str(path)
-
-    # if text is too long, it's not a path (4096 for most linux setups)
-    if len(path) > MAX_PATH_LENGTH:
-        return False
-
-    # if it starts with a URL scheme, it's not a path
-    if path.startswith(("http", "ftp")):
-        return False
-
-    test_path = Path(path)
-
-    # if it's supposed to be a file and there's no extension or
-    # the extension is too long, it's not a path
-    return not enforce_file or (
-        bool(test_path.suffix) and len(test_path.suffix) <= MAX_EXTENSION_LENGTH
-    )
-
-
-def split_text(text: str) -> Tuple[List[str], List[str], List[int]]:
-    """
-    Split text into words / tokens, the white space separators between words,
-    and the indices for each new line
-
-    :param text: the text to split
-    :return: the words, the white space separators, and the new line indices
-    """
-    if not text or not text.strip():
-        return [], [], []
-
-    text = text.strip()
-    tokens = []  # type: List[str]
-    separators = []  # type: List[str]
-    new_lines = [0]
-    buffer = text[0]
-    is_token = not text[0].isspace()
-
-    for char in text[1:]:
-        char_whitespace = char.isspace()
-
-        if char == "\n":
-            new_lines.append(len(tokens) + 1)
-
-        if char_whitespace and is_token:
-            tokens.append(buffer)
-            buffer = char
-            is_token = False
-        elif char_whitespace:
-            buffer += char
-        elif not char_whitespace and not is_token:
-            separators.append(buffer)
-            buffer = char
-            is_token = True
-        else:
-            buffer += char
-
-    if buffer and is_token:
-        tokens.append(buffer)
-        separators.append(" ")
-    elif buffer:
-        separators.append(buffer)
-
-    return tokens, separators, new_lines
+    return text.split()
 
 
 def load_text(data: Union[str, Path], encoding: Optional[str] = None) -> str:
@@ -324,132 +91,75 @@ def load_text(data: Union[str, Path], encoding: Optional[str] = None) -> str:
         return ""
 
     # check URLs
-    if isinstance(data, str) and data.startswith("http"):
-        response = requests.get(data, timeout=settings.request_timeout)
-        response.raise_for_status()
-        return response.text
-
-    # check raw text
-    if isinstance(data, str) and not is_path_like(data, enforce_file=True):
+    if isinstance(data, str) and data.strip().startswith(("http", "ftp")):
+        with httpx.Client(timeout=settings.request_timeout) as client:
+            response = client.get(data.strip())
+            response.raise_for_status()
+            return response.text
+
+    # check package data
+    if isinstance(data, str) and data.startswith("data:"):
+        resource_path = files(package_data).joinpath(data[5:])
+        with as_file(resource_path) as resource_file, gzip.open(
+            resource_file, "rt", encoding=encoding
+        ) as file:
+            return file.read()
+
+    # check gzipped files
+    if isinstance(data, str) and data.endswith(".gz"):
+        with gzip.open(data, "rt", encoding=encoding) as file:
+            return file.read()
+
+    # check if it's raw text by not being a path
+    if isinstance(data, str) and (
+        len(data) > MAX_PATH_LENGTH or not Path(data).exists()
+    ):
         return data
 
     # assume local file
     if not isinstance(data, Path):
         data = Path(data)
 
-    if not data.exists():
+    if not data.exists() or not data.is_file():
         raise FileNotFoundError(f"File not found: {data}")
 
-    if not data.is_file():
-        raise IsADirectoryError(f"Path is a directory: {data}")
-
     return data.read_text(encoding=encoding)
 
 
-def parse_text_objects(data: str, format_: str = "txt") -> List[Dict]:
+def is_puncutation(text: str) -> bool:
     """
-    Parse text data into a list of dictionaries based on the format given
-    (csv, jsonl, json, yaml, txt).
-
-    :param data: the text data to parse
-    :param format_: the format of the data to parse:
-        'csv', 'jsonl', 'json', 'yaml', 'txt'
-    :return: the list of dictionaries parsed from the data, if text
-        then each line is a dictionary with a single key 'text'
-    """
-    if not isinstance(data, str):
-        raise ValueError(f"Unsupported data given of type: {type(data)}")
-
-    if format_ == "csv":
-        reader = csv.DictReader(data.splitlines())
-        columns = reader.fieldnames
-        return [{col: row[col] for col in columns} for row in reader]  # type: ignore # noqa: PGH003
-
-    if format_ == "jsonl":
-        return [json.loads(line) for line in data.splitlines() if line]
-
-    if format_ in ("json", "yaml"):
-        data = json.loads(data) if format_ == "json" else yaml.safe_load(data)
-
-        if not data:
-            return []
-
-        if isinstance(data, dict) and len(data) == 1:
-            logger.debug("Getting first value from JSON/YAML object: {}", data)
-            data = list(data.values())[0]
-        elif isinstance(data, dict):
-            logger.debug("Converting JSON/YAML object to list: {}", data)
-            data = list(data.values())
-
-        if not isinstance(data, list) or not isinstance(data[0], dict):
-            raise ValueError(f"Unsupported data structure given: {data}")
-
-        return data
-
-    if format_ == "txt":
-        return [{"text": line} for line in data.splitlines() if line]
+    Check if the text is a punctuation
 
-    raise ValueError(f"Unsupported format given: {format_}")
-
-
-def load_text_lines(
-    data: Union[str, Path, List[Dict]],
-    format_: Optional[str] = None,
-    filters: Optional[List[str]] = None,
-    encoding: Optional[str] = None,
-) -> List[str]:
+    :param text: the text to check
+    :type text: str
+    :return: True if the text is a punctuation, False otherwise
+    :rtype: bool
     """
-    Load text lines from a file or data object with optional filtering and formatting.
-
-
-    :param data: the data to load the text lines from
-    :param format_: the format of the data to load, if not provided will be inferred.
-        Supported formats: 'csv', 'jsonl', 'json', 'yaml', 'txt'
-    :param filters: the keys to filter the data by when loading in order of preference.
-        If not provided, will use the first key in the data object.
-    :param encoding: the encoding to use when reading the file
-    :return: the list of text lines
-    """
-    logger.debug(
-        "Loading text lines with format {}, filters {}, encoding {} for data: {}",
-        format_,
-        filters,
-        encoding,
-        data,
-    )
-
-    if not data:
-        return []
-
-    if not format_ and isinstance(data, (str, Path)) and "." in str(data):
-        extension = str(data).split(".")[-1]
-        format_ = EXTENSION_TYPES.get(extension, "txt")
-    elif not format_:
-        format_ = "txt"
+    return len(text) == 1 and not text.isalnum() and not text.isspace()
 
-    # load the data if it's a path or URL
-    if isinstance(data, (Path, str)):
-        data = load_text(data, encoding=encoding)
-        data = clean_text(data)
 
-    # parse the data into a list of dictionaries based on the format
-    if isinstance(data, str):
-        data = parse_text_objects(data, format_)
+class EndlessTextCreator:
+    def __init__(
+        self,
+        data: Union[str, Path],
+        filter_start: Optional[Union[str, int]] = None,
+        filter_end: Optional[Union[str, int]] = None,
+    ):
+        self.data = data
+        self.text = load_text(data)
+        self.filtered_text = filter_text(self.text, filter_start, filter_end)
+        self.words = split_text(self.filtered_text, split_punctuation=True)
 
-    if not isinstance(data, list):
-        raise ValueError(f"Unsupported data given of type: {type(data)}")
+    def create_text(self, start: int, length: int) -> str:
+        text = ""
 
-    if not isinstance(data[0], dict):
-        raise ValueError(f"Unsupported data item type given: {type(data[0])}")
+        for counter in range(length):
+            index = (start + counter) % len(self.words)
+            add_word = self.words[index]
 
-    # grab the first available filter key to use if preference order as provided
-    filter_ = list(data[0].keys())[0]
-    for filt in filters or []:
-        if filt not in data[0]:
-            continue
+            if counter != 0 and not is_puncutation(add_word):
+                text += " "
 
-        filter_ = filt
-        break
+            text += add_word
 
-    # extract the lines from the data
-    return [row[filter_] for row in data] if filter_ else [str(row) for row in data]
+        return text
diff --git a/src/guidellm/utils/transformers.py b/src/guidellm/utils/transformers.py
deleted file mode 100644
index 54057299..00000000
--- a/src/guidellm/utils/transformers.py
+++ /dev/null
@@ -1,151 +0,0 @@
-from pathlib import Path
-from typing import List, Optional, Union
-
-from datasets import (  # type: ignore  # noqa: PGH003
-    Dataset,
-    DatasetDict,
-    IterableDataset,
-    IterableDatasetDict,
-    load_dataset,
-)
-from loguru import logger
-
-from guidellm.config import settings
-
-__all__ = [
-    "load_transformers_dataset",
-    "resolve_transformers_dataset",
-    "resolve_transformers_dataset_column",
-    "resolve_transformers_dataset_split",
-]
-
-
-def load_transformers_dataset(
-    dataset: Union[
-        str, Path, DatasetDict, Dataset, IterableDatasetDict, IterableDataset
-    ],
-    split: Optional[str] = None,
-    preferred_splits: Optional[List[str]] = settings.dataset.preferred_data_splits,
-    **kwargs,
-) -> Union[Dataset, IterableDataset]:
-    """
-    Load a dataset from a file or a script and resolve the preferred split.
-
-    :param dataset: the dataset file or script to load
-    :param split: the dataset split to use
-        (overrides preferred_splits, must be in dataset)
-    :param preferred_splits: the preferred dataset splits to use
-    :param kwargs: additional keyword arguments to pass to the dataset loader
-    :return: the loaded dataset
-    """
-    dataset = resolve_transformers_dataset(dataset, **kwargs)
-
-    return resolve_transformers_dataset_split(dataset, split, preferred_splits)
-
-
-def resolve_transformers_dataset(
-    dataset: Union[
-        str, Path, DatasetDict, Dataset, IterableDatasetDict, IterableDataset
-    ],
-    **kwargs,
-) -> Union[DatasetDict, Dataset, IterableDatasetDict, IterableDataset]:
-    """
-    Resolve the dataset from a file (csv, json, script) or a dataset name.
-
-    :param dataset: the dataset file or script to load
-    :param kwargs: additional keyword arguments to pass to the dataset loader
-    :return: the loaded dataset
-    """
-    if isinstance(
-        dataset, (DatasetDict, Dataset, IterableDatasetDict, IterableDataset)
-    ):
-        return dataset
-
-    if not isinstance(dataset, (str, Path)):
-        raise ValueError(f"Invalid dataset type: {type(dataset)}")
-
-    dataset = str(dataset)
-
-    if dataset.endswith((".csv", ".json")):
-        logger.debug("Loading dataset from local path: {}", dataset)
-        extension = dataset.split(".")[-1]
-
-        return load_dataset(extension, data_files=dataset, **kwargs)
-
-    if dataset.endswith(".py"):
-        logger.debug("Loading dataset from local script: {}", dataset)
-
-        return load_dataset(dataset, **kwargs)
-
-    logger.debug("Loading dataset: {}", dataset)
-
-    return load_dataset(dataset, **kwargs)
-
-
-def resolve_transformers_dataset_split(
-    dataset: Union[DatasetDict, Dataset, IterableDatasetDict, IterableDataset],
-    split: Optional[str] = None,
-    preferred_splits: Optional[List[str]] = settings.dataset.preferred_data_splits,
-) -> Union[Dataset, IterableDataset]:
-    """
-    Resolve the preferred split from a dataset dictionary.
-
-    :param dataset: the dataset to resolve the split from
-    :param split: the dataset split to use
-        (overrides preferred_splits, must be in dataset)
-    :param preferred_splits: the preferred dataset splits to use
-    :return: the resolved dataset split
-    """
-    if not isinstance(dataset, (DatasetDict, IterableDatasetDict)):
-        logger.debug("Dataset is not a dictionary, using default split")
-        return dataset
-
-    if split:
-        if split not in dataset:
-            raise ValueError(f"Split '{split}' not found in dataset")
-
-        return dataset[split]
-
-    if preferred_splits:
-        for spl in preferred_splits:
-            if spl not in dataset:
-                continue
-            return dataset[spl]
-
-    return list(dataset.values())[0]
-
-
-def resolve_transformers_dataset_column(
-    dataset: Union[Dataset, IterableDataset],
-    column: Optional[str] = None,
-    preferred_columns: Optional[List[str]] = settings.dataset.preferred_data_columns,
-) -> str:
-    """
-    Resolve the preferred column from a dataset.
-
-    :param dataset: the dataset to resolve the column from
-    :param column: the dataset column to use
-        (overrides preferred_columns, must be in dataset)
-    :param preferred_columns: the preferred dataset columns to use
-    :return: the resolved dataset column
-    """
-    column_names = dataset.column_names
-
-    if not column_names:
-        # grab from the first item
-        first_item = next(iter(dataset))
-        column_names = list(first_item.keys())
-
-    if column:
-        if column not in column_names:
-            raise ValueError(f"Column '{column}' not found in dataset")
-
-        return column
-
-    if preferred_columns:
-        for col in preferred_columns:
-            if col not in column_names:
-                continue
-            return col
-
-    return list(column_names)[0]
diff --git a/tests/dummy/__init__.py b/tests/dummy/__init__.py
deleted file mode 100644
index a0cccdbf..00000000
--- a/tests/dummy/__init__.py
+++ /dev/null
@@ -1,8 +0,0 @@
-"""
-The tests.dummy package package represents dummy data factories and test services.
-
-test.dummy.data.openai_model_factory - openai.types.Model test factory
-test.dummy.data.openai_completion_factory - openai.types.Completion test factory
-"""
-
-from . import data, services  # noqa: F401
diff --git a/tests/dummy/data/pride_and_prejudice.txt b/tests/dummy/data/pride_and_prejudice.txt
deleted file mode 100644
index 3b93b50a..00000000
--- a/tests/dummy/data/pride_and_prejudice.txt
+++ /dev/null
@@ -1,2015 +0,0 @@
-*** START OF THE PROJECT GUTENBERG EBOOK 1342 ***
-
-                                                                    PAGE
-
-Frontispiece                                                          iv
-
-Title-page                                                             v
-
-Dedication                                                           vii
-
-Heading to Preface                                                    ix
-
-Heading to List of Illustrations                                     xxv
-
-Heading to Chapter I.                                                  1
-
-â€œHe came down to see the placeâ€                                        2
-
-Mr. and Mrs. Bennet                                                    5
-
-â€œI hope Mr. Bingley will like itâ€                                      6
-
-â€œIâ€™m the tallestâ€                                                      9
-
-â€œHe rode a black horseâ€                                               10
-
-â€œWhen the party enteredâ€                                              12
-
-â€œShe is tolerableâ€                                                    15
-
-Heading to Chapter IV.                                                18
-
-Heading to Chapter V.                                                 22
-
-â€œWithout once opening his lipsâ€                                       24
-
-Tailpiece to Chapter V.                                               26
-
-Heading to Chapter VI.                                                27
-
-â€œThe entreaties of severalâ€                                           31
-
-â€œA note for Miss Bennetâ€                                              36
-
-â€œCheerful prognosticsâ€                                                40
-
-â€œThe apothecary cameâ€                                                 43
-
-â€œCovering a screenâ€                                                   45
-
-â€œMrs. Bennet and her two youngest girlsâ€                              53
-
-Heading to Chapter X.                                                 60
-
-â€œNo, no; stay where you areâ€                                          67
-
-â€œPiling up the fireâ€                                                  69
-
-Heading to Chapter XII.                                               75
-
-Heading to Chapter XIII.                                              78
-
-Heading to Chapter XIV.                                               84
-
-â€œProtested that he never read novelsâ€                                 87
-
-Heading to Chapter XV.                                                89
-
-Heading to Chapter XVI.                                               95
-
-â€œThe officers of the ----shireâ€                                       97
-
-â€œDelighted to see their dear friend againâ€                           108
-
-Heading to Chapter XVIII.                                            113
-
-â€œSuch very superior dancing is not often seenâ€                       118
-
-â€œTo assure you in the most animated languageâ€                        132
-
-Heading to Chapter XX.                                               139
-
-â€œThey entered the breakfast-roomâ€                                    143
-
-Heading to Chapter XXI.                                              146
-
-â€œWalked back with themâ€                                              148
-
-Heading to Chapter XXII.                                             154
-
-â€œSo much love and eloquenceâ€                                         156
-
-â€œProtested he must be entirely mistakenâ€                             161
-
-â€œWhenever she spoke in a low voiceâ€                                  166
-
-Heading to Chapter XXIV.                                             168
-
-Heading to Chapter XXV.                                              175
-
-â€œOffended two or three young ladiesâ€                                 177
-
-â€œWill you come and see me?â€                                          181
-
-â€œOn the stairsâ€                                                      189
-
-â€œAt the doorâ€                                                        194
-
-â€œIn conversation with the ladiesâ€                                    198
-
-â€œLady Catherine,â€ said she, â€œyou have given me a treasureâ€           200
-
-Heading to Chapter XXX.                                              209
-
-â€œHe never failed to inform themâ€                                     211
-
-â€œThe gentlemen accompanied himâ€                                      213
-
-Heading to Chapter XXXI.                                             215
-
-Heading to Chapter XXXII.                                            221
-
-â€œAccompanied by their auntâ€                                          225
-
-â€œOn looking upâ€                                                      228
-
-Heading to Chapter XXXIV.                                            235
-
-â€œHearing herself calledâ€                                             243
-
-Heading to Chapter XXXVI.                                            253
-
-â€œMeeting accidentally in townâ€                                       256
-
-â€œHis parting obeisanceâ€                                              261
-
-â€œDawsonâ€                                                             263
-
-â€œThe elevation of his feelingsâ€                                      267
-
-â€œThey had forgotten to leave any messageâ€                            270
-
-â€œHow nicely we are crammed in!â€                                      272
-
-Heading to Chapter XL.                                               278
-
-â€œI am determined never to speak of it againâ€                         283
-
-â€œWhen Colonel Millerâ€™s regiment went awayâ€                           285
-
-â€œTenderly flirtingâ€                                                  290
-
-The arrival of the Gardiners                                         294
-
-â€œConjecturing as to the dateâ€                                        301
-
-Heading to Chapter XLIV.                                             318
-
-â€œTo make herself agreeable to allâ€                                   321
-
-â€œEngaged by the riverâ€                                               327
-
-Heading to Chapter XLVI.                                             334
-
-â€œI have not an instant to loseâ€                                      339
-
-â€œThe first pleasing earnest of their welcomeâ€                        345
-
-The Post                                                             359
-
-â€œTo whom I have related the affairâ€                                  363
-
-Heading to Chapter XLIX.                                             368
-
-â€œBut perhaps you would like to read itâ€                              370
-
-â€œThe spiteful old ladiesâ€                                            377
-
-â€œWith an affectionate smileâ€                                         385
-
-â€œI am sure she did not listenâ€                                       393
-
-â€œMr. Darcy with himâ€                                                 404
-
-â€œJane happened to look roundâ€                                        415
-
-â€œMrs. Long and her niecesâ€                                           420
-
-â€œLizzy, my dear, I want to speak to youâ€                             422
-
-Heading to Chapter LVI.                                              431
-
-â€œAfter a short surveyâ€                                               434
-
-â€œBut now it comes outâ€                                               442
-
-â€œThe efforts of his auntâ€                                            448
-
-â€œUnable to utter a syllableâ€                                         457
-
-â€œThe obsequious civilityâ€                                            466
-
-Heading to Chapter LXI.                                              472
-
-The End                                                              476
-
-
-
-
-[Illustration: Â·PRIDE AND PREJUDICEÂ·
-
-
-
-
-Chapter I.]
-
-
-It is a truth universally acknowledged, that a single man in possession
-of a good fortune must be in want of a wife.
-
-However little known the feelings or views of such a man may be on his
-first entering a neighbourhood, this truth is so well fixed in the minds
-of the surrounding families, that he is considered as the rightful
-property of some one or other of their daughters.
-
-â€œMy dear Mr. Bennet,â€ said his lady to him one day, â€œhave you heard that
-Netherfield Park is let at last?â€
-
-Mr. Bennet replied that he had not.
-
-â€œBut it is,â€ returned she; â€œfor Mrs. Long has just been here, and she
-told me all about it.â€
-
-Mr. Bennet made no answer.
-
-â€œDo not you want to know who has taken it?â€ cried his wife, impatiently.
-
-â€œ_You_ want to tell me, and I have no objection to hearing it.â€
-
-[Illustration:
-
-â€œHe came down to see the placeâ€
-
-[_Copyright 1894 by George Allen._]]
-
-This was invitation enough.
-
-â€œWhy, my dear, you must know, Mrs. Long says that Netherfield is taken
-by a young man of large fortune from the north of England; that he came
-down on Monday in a chaise and four to see the place, and was so much
-delighted with it that he agreed with Mr. Morris immediately; that he is
-to take possession before Michaelmas, and some of his servants are to be
-in the house by the end of next week.â€
-
-â€œWhat is his name?â€
-
-â€œBingley.â€
-
-â€œIs he married or single?â€
-
-â€œOh, single, my dear, to be sure! A single man of large fortune; four or
-five thousand a year. What a fine thing for our girls!â€
-
-â€œHow so? how can it affect them?â€
-
-â€œMy dear Mr. Bennet,â€ replied his wife, â€œhow can you be so tiresome? You
-must know that I am thinking of his marrying one of them.â€
-
-â€œIs that his design in settling here?â€
-
-â€œDesign? Nonsense, how can you talk so! But it is very likely that he
-_may_ fall in love with one of them, and therefore you must visit him as
-soon as he comes.â€
-
-â€œI see no occasion for that. You and the girls may go--or you may send
-them by themselves, which perhaps will be still better; for as you are
-as handsome as any of them, Mr. Bingley might like you the best of the
-party.â€
-
-â€œMy dear, you flatter me. I certainly _have_ had my share of beauty, but
-I do not pretend to be anything extraordinary now. When a woman has five
-grown-up daughters, she ought to give over thinking of her own beauty.â€
-
-â€œIn such cases, a woman has not often much beauty to think of.â€
-
-â€œBut, my dear, you must indeed go and see Mr. Bingley when he comes into
-the neighbourhood.â€
-
-â€œIt is more than I engage for, I assure you.â€
-
-â€œBut consider your daughters. Only think what an establishment it would
-be for one of them. Sir William and Lady Lucas are determined to go,
-merely on that account; for in general, you know, they visit no new
-comers. Indeed you must go, for it will be impossible for _us_ to visit
-him, if you do not.â€
-
-â€œYou are over scrupulous, surely. I dare say Mr. Bingley will be very
-glad to see you; and I will send a few lines by you to assure him of my
-hearty consent to his marrying whichever he chooses of the girls--though
-I must throw in a good word for my little Lizzy.â€
-
-â€œI desire you will do no such thing. Lizzy is not a bit better than the
-others: and I am sure she is not half so handsome as Jane, nor half so
-good-humoured as Lydia. But you are always giving _her_ the preference.â€
-
-â€œThey have none of them much to recommend them,â€ replied he: â€œthey are
-all silly and ignorant like other girls; but Lizzy has something more of
-quickness than her sisters.â€
-
-â€œMr. Bennet, how can you abuse your own children in such a way? You take
-delight in vexing me. You have no compassion on my poor nerves.â€
-
-â€œYou mistake me, my dear. I have a high respect for your nerves. They
-are my old friends. I have heard you mention them with consideration
-these twenty years at least.â€
-
-â€œAh, you do not know what I suffer.â€
-
-â€œBut I hope you will get over it, and live to see many young men of four
-thousand a year come into the neighbourhood.â€
-
-â€œIt will be no use to us, if twenty such should come, since you will not
-visit them.â€
-
-â€œDepend upon it, my dear, that when there are twenty, I will visit them
-all.â€
-
-Mr. Bennet was so odd a mixture of quick parts, sarcastic humour,
-reserve, and caprice, that the experience of three-and-twenty years had
-been insufficient to make his wife understand his character. _Her_ mind
-was less difficult to develope. She was a woman of mean understanding,
-little information, and uncertain temper. When she was discontented, she
-fancied herself nervous. The business of her life was to get her
-daughters married: its solace was visiting and news.
-
-[Illustration: M^{r.} & M^{rs.} Bennet
-
-[_Copyright 1894 by George Allen._]]
-
-
-
-
-[Illustration:
-
-â€œI hope Mr. Bingley will like itâ€
-
-[_Copyright 1894 by George Allen._]]
-
-
-
-
-CHAPTER II.
-
-
-[Illustration]
-
-Mr. Bennet was among the earliest of those who waited on Mr. Bingley. He
-had always intended to visit him, though to the last always assuring his
-wife that he should not go; and till the evening after the visit was
-paid she had no knowledge of it. It was then disclosed in the following
-manner. Observing his second daughter employed in trimming a hat, he
-suddenly addressed her with,--
-
-â€œI hope Mr. Bingley will like it, Lizzy.â€
-
-â€œWe are not in a way to know _what_ Mr. Bingley likes,â€ said her mother,
-resentfully, â€œsince we are not to visit.â€
-
-â€œBut you forget, mamma,â€ said Elizabeth, â€œthat we shall meet him at the
-assemblies, and that Mrs. Long has promised to introduce him.â€
-
-â€œI do not believe Mrs. Long will do any such thing. She has two nieces
-of her own. She is a selfish, hypocritical woman, and I have no opinion
-of her.â€
-
-â€œNo more have I,â€ said Mr. Bennet; â€œand I am glad to find that you do
-not depend on her serving you.â€
-
-Mrs. Bennet deigned not to make any reply; but, unable to contain
-herself, began scolding one of her daughters.
-
-â€œDonâ€™t keep coughing so, Kitty, for heavenâ€™s sake! Have a little
-compassion on my nerves. You tear them to pieces.â€
-
-â€œKitty has no discretion in her coughs,â€ said her father; â€œshe times
-them ill.â€
-
-â€œI do not cough for my own amusement,â€ replied Kitty, fretfully. â€œWhen
-is your next ball to be, Lizzy?â€
-
-â€œTo-morrow fortnight.â€
-
-â€œAy, so it is,â€ cried her mother, â€œand Mrs. Long does not come back till
-the day before; so, it will be impossible for her to introduce him, for
-she will not know him herself.â€
-
-â€œThen, my dear, you may have the advantage of your friend, and introduce
-Mr. Bingley to _her_.â€
-
-â€œImpossible, Mr. Bennet, impossible, when I am not acquainted with him
-myself; how can you be so teasing?â€
-
-â€œI honour your circumspection. A fortnightâ€™s acquaintance is certainly
-very little. One cannot know what a man really is by the end of a
-fortnight. But if _we_ do not venture, somebody else will; and after
-all, Mrs. Long and her nieces must stand their chance; and, therefore,
-as she will think it an act of kindness, if you decline the office, I
-will take it on myself.â€
-
-The girls stared at their father. Mrs. Bennet said only, â€œNonsense,
-nonsense!â€
-
-â€œWhat can be the meaning of that emphatic exclamation?â€ cried he. â€œDo
-you consider the forms of introduction, and the stress that is laid on
-them, as nonsense? I cannot quite agree with you _there_. What say you,
-Mary? For you are a young lady of deep reflection, I know, and read
-great books, and make extracts.â€
-
-Mary wished to say something very sensible, but knew not how.
-
-â€œWhile Mary is adjusting her ideas,â€ he continued, â€œlet us return to Mr.
-Bingley.â€
-
-â€œI am sick of Mr. Bingley,â€ cried his wife.
-
-â€œI am sorry to hear _that_; but why did you not tell me so before? If I
-had known as much this morning, I certainly would not have called on
-him. It is very unlucky; but as I have actually paid the visit, we
-cannot escape the acquaintance now.â€
-
-The astonishment of the ladies was just what he wished--that of Mrs.
-Bennet perhaps surpassing the rest; though when the first tumult of joy
-was over, she began to declare that it was what she had expected all the
-while.
-
-â€œHow good it was in you, my dear Mr. Bennet! But I knew I should
-persuade you at last. I was sure you loved your girls too well to
-neglect such an acquaintance. Well, how pleased I am! And it is such a
-good joke, too, that you should have gone this morning, and never said a
-word about it till now.â€
-
-â€œNow, Kitty, you may cough as much as you choose,â€ said Mr. Bennet; and,
-as he spoke, he left the room, fatigued with the raptures of his wife.
-
-â€œWhat an excellent father you have, girls,â€ said she, when the door was
-shut. â€œI do not know how you will ever make him amends for his kindness;
-or me either, for that matter. At our time of life, it is not so
-pleasant, I can tell you, to be making new acquaintances every day; but
-for your sakes we would do anything. Lydia, my love, though you _are_
-the youngest, I dare say Mr. Bingley will dance with you at the next
-ball.â€
-
-â€œOh,â€ said Lydia, stoutly, â€œI am not afraid; for though I _am_ the
-youngest, Iâ€™m the tallest.â€
-
-The rest of the evening was spent in conjecturing how soon he would
-return Mr. Bennetâ€™s visit, and determining when they should ask him to
-dinner.
-
-[Illustration: â€œIâ€™m the tallestâ€]
-
-
-
-
-[Illustration:
-
-     â€œHe rode a black horseâ€
-]
-
-
-
-
-CHAPTER III.
-
-
-[Illustration]
-
-Not all that Mrs. Bennet, however, with the assistance of her five
-daughters, could ask on the subject, was sufficient to draw from her
-husband any satisfactory description of Mr. Bingley. They attacked him
-in various ways, with barefaced questions, ingenious suppositions, and
-distant surmises; but he eluded the skill of them all; and they were at
-last obliged to accept the second-hand intelligence of their neighbour,
-Lady Lucas. Her report was highly favourable. Sir William had been
-delighted with him. He was quite young, wonderfully handsome, extremely
-agreeable, and, to crown the whole, he meant to be at the next assembly
-with a large party. Nothing could be more delightful! To be fond of
-dancing was a certain step towards falling in love; and very lively
-hopes of Mr. Bingleyâ€™s heart were entertained.
-
-â€œIf I can but see one of my daughters happily settled at Netherfield,â€
-said Mrs. Bennet to her husband, â€œand all the others equally well
-married, I shall have nothing to wish for.â€
-
-In a few days Mr. Bingley returned Mr. Bennetâ€™s visit, and sat about ten
-minutes with him in his library. He had entertained hopes of being
-admitted to a sight of the young ladies, of whose beauty he had heard
-much; but he saw only the father. The ladies were somewhat more
-fortunate, for they had the advantage of ascertaining, from an upper
-window, that he wore a blue coat and rode a black horse.
-
-An invitation to dinner was soon afterwards despatched; and already had
-Mrs. Bennet planned the courses that were to do credit to her
-housekeeping, when an answer arrived which deferred it all. Mr. Bingley
-was obliged to be in town the following day, and consequently unable to
-accept the honour of their invitation, etc. Mrs. Bennet was quite
-disconcerted. She could not imagine what business he could have in town
-so soon after his arrival in Hertfordshire; and she began to fear that
-he might always be flying about from one place to another, and never
-settled at Netherfield as he ought to be. Lady Lucas quieted her fears a
-little by starting the idea of his
-
-[Illustration:
-
-     â€œWhen the Party enteredâ€
-
-[_Copyright 1894 by George Allen._]]
-
-being gone to London only to get a large party for the ball; and a
-report soon followed that Mr. Bingley was to bring twelve ladies and
-seven gentlemen with him to the assembly. The girls grieved over such a
-number of ladies; but were comforted the day before the ball by hearing
-that, instead of twelve, he had brought only six with him from London,
-his five sisters and a cousin. And when the party entered the
-assembly-room, it consisted of only five altogether: Mr. Bingley, his
-two sisters, the husband of the eldest, and another young man.
-
-Mr. Bingley was good-looking and gentlemanlike: he had a pleasant
-countenance, and easy, unaffected manners. His sisters were fine women,
-with an air of decided fashion. His brother-in-law, Mr. Hurst, merely
-looked the gentleman; but his friend Mr. Darcy soon drew the attention
-of the room by his fine, tall person, handsome features, noble mien, and
-the report, which was in general circulation within five minutes after
-his entrance, of his having ten thousand a year. The gentlemen
-pronounced him to be a fine figure of a man, the ladies declared he was
-much handsomer than Mr. Bingley, and he was looked at with great
-admiration for about half the evening, till his manners gave a disgust
-which turned the tide of his popularity; for he was discovered to be
-proud, to be above his company, and above being pleased; and not all his
-large estate in Derbyshire could save him from having a most forbidding,
-disagreeable countenance, and being unworthy to be compared with his
-friend.
-
-Mr. Bingley had soon made himself acquainted with all the principal
-people in the room: he was lively and unreserved, danced every dance,
-was angry that the ball closed so early, and talked of giving one
-himself at Netherfield. Such amiable qualities must speak for
-themselves. What a contrast between him and his friend! Mr. Darcy danced
-only once with Mrs. Hurst and once with Miss Bingley, declined being
-introduced to any other lady, and spent the rest of the evening in
-walking about the room, speaking occasionally to one of his own party.
-His character was decided. He was the proudest, most disagreeable man in
-the world, and everybody hoped that he would never come there again.
-Amongst the most violent against him was Mrs. Bennet, whose dislike of
-his general behaviour was sharpened into particular resentment by his
-having slighted one of her daughters.
-
-Elizabeth Bennet had been obliged, by the scarcity of gentlemen, to sit
-down for two dances; and during part of that time, Mr. Darcy had been
-standing near enough for her to overhear a conversation between him and
-Mr. Bingley, who came from the dance for a few minutes to press his
-friend to join it.
-
-â€œCome, Darcy,â€ said he, â€œI must have you dance. I hate to see you
-standing about by yourself in this stupid manner. You had much better
-dance.â€
-
-â€œI certainly shall not. You know how I detest it, unless I am
-particularly acquainted with my partner. At such an assembly as this, it
-would be insupportable. Your sisters are engaged, and there is not
-another woman in the room whom it would not be a punishment to me to
-stand up with.â€
-
-â€œI would not be so fastidious as you are,â€ cried Bingley, â€œfor a
-kingdom! Upon my honour, I never met with so many pleasant girls in my
-life as I have this evening; and there are several of them, you see,
-uncommonly pretty.â€
-
-â€œ_You_ are dancing with the only handsome girl in the room,â€ said Mr.
-Darcy, looking at the eldest Miss Bennet.
-
-â€œOh, she is the most beautiful creature I ever beheld! But there is one
-of her sisters sitting down just behind you, who is very pretty, and I
-dare say very agreeable. Do let me ask my partner to introduce you.â€
-
-[Illustration:
-
-â€œShe is tolerableâ€
-
-[_Copyright 1894 by George Allen._]]
-
-â€œWhich do you mean?â€ and turning round, he looked for a moment at
-Elizabeth, till, catching her eye, he withdrew his own, and coldly said,
-â€œShe is tolerable: but not handsome enough to tempt _me_; and I am in no
-humour at present to give consequence to young ladies who are slighted
-by other men. You had better return to your partner and enjoy her
-smiles, for you are wasting your time with me.â€
-
-Mr. Bingley followed his advice. Mr. Darcy walked off; and Elizabeth
-remained with no very cordial feelings towards him. She told the story,
-however, with great spirit among her friends; for she had a lively,
-playful disposition, which delighted in anything ridiculous.
-
-The evening altogether passed off pleasantly to the whole family. Mrs.
-Bennet had seen her eldest daughter much admired by the Netherfield
-party. Mr. Bingley had danced with her twice, and she had been
-distinguished by his sisters. Jane was as much gratified by this as her
-mother could be, though in a quieter way. Elizabeth felt Janeâ€™s
-pleasure. Mary had heard herself mentioned to Miss Bingley as the most
-accomplished girl in the neighbourhood; and Catherine and Lydia had been
-fortunate enough to be never without partners, which was all that they
-had yet learnt to care for at a ball. They returned, therefore, in good
-spirits to Longbourn, the village where they lived, and of which they
-were the principal inhabitants. They found Mr. Bennet still up. With a
-book, he was regardless of time; and on the present occasion he had a
-good deal of curiosity as to the event of an evening which had raised
-such splendid expectations. He had rather hoped that all his wifeâ€™s
-views on the stranger would be disappointed; but he soon found that he
-had a very different story to hear.
-
-â€œOh, my dear Mr. Bennet,â€ as she entered the room, â€œwe have had a most
-delightful evening, a most excellent ball. I wish you had been there.
-Jane was so admired, nothing could be like it. Everybody said how well
-she looked; and Mr. Bingley thought her quite beautiful, and danced with
-her twice. Only think of _that_, my dear: he actually danced with her
-twice; and she was the only creature in the room that he asked a second
-time. First of all, he asked Miss Lucas. I was so vexed to see him stand
-up with her; but, however, he did not admire her at all; indeed, nobody
-can, you know; and he seemed quite struck with Jane as she was going
-down the dance. So he inquired who she was, and got introduced, and
-asked her for the two next. Then, the two third he danced with Miss
-King, and the two fourth with Maria Lucas, and the two fifth with Jane
-again, and the two sixth with Lizzy, and the _Boulanger_----â€
-
-â€œIf he had had any compassion for _me_,â€ cried her husband impatiently,
-â€œhe would not have danced half so much! For Godâ€™s sake, say no more of
-his partners. O that he had sprained his ancle in the first dance!â€
-
-â€œOh, my dear,â€ continued Mrs. Bennet, â€œI am quite delighted with him. He
-is so excessively handsome! and his sisters are charming women. I never
-in my life saw anything more elegant than their dresses. I dare say the
-lace upon Mrs. Hurstâ€™s gown----â€
-
-Here she was interrupted again. Mr. Bennet protested against any
-description of finery. She was therefore obliged to seek another branch
-of the subject, and related, with much bitterness of spirit, and some
-exaggeration, the shocking rudeness of Mr. Darcy.
-
-â€œBut I can assure you,â€ she added, â€œthat Lizzy does not lose much by not
-suiting _his_ fancy; for he is a most disagreeable, horrid man, not at
-all worth pleasing. So high and so conceited, that there was no enduring
-him! He walked here, and he walked there, fancying himself so very
-great! Not handsome enough to dance with! I wish you had been there, my
-dear, to have given him one of your set-downs. I quite detest the man.â€
-
-
-
-
-[Illustration]
-
-
-
-
-CHAPTER IV.
-
-
-[Illustration]
-
-When Jane and Elizabeth were alone, the former, who had been cautious in
-her praise of Mr. Bingley before, expressed to her sister how very much
-she admired him.
-
-â€œHe is just what a young-man ought to be,â€ said she, â€œsensible,
-good-humoured, lively; and I never saw such happy manners! so much ease,
-with such perfect good breeding!â€
-
-â€œHe is also handsome,â€ replied Elizabeth, â€œwhich a young man ought
-likewise to be if he possibly can. His character is thereby complete.â€
-
-â€œI was very much flattered by his asking me to dance a second time. I
-did not expect such a compliment.â€
-
-â€œDid not you? _I_ did for you. But that is one great difference between
-us. Compliments always take _you_ by surprise, and _me_ never. What
-could be more natural than his asking you again? He could not help
-seeing that you were about five times as pretty as every other woman in
-the room. No thanks to his gallantry for that. Well, he certainly is
-very agreeable, and I give you leave to like him. You have liked many a
-stupider person.â€
-
-â€œDear Lizzy!â€
-
-â€œOh, you are a great deal too apt, you know, to like people in general.
-You never see a fault in anybody. All the world are good and agreeable
-in your eyes. I never heard you speak ill of a human being in my life.â€
-
-â€œI would wish not to be hasty in censuring anyone; but I always speak
-what I think.â€
-
-â€œI know you do: and it is _that_ which makes the wonder. With _your_
-good sense, to be so honestly blind to the follies and nonsense of
-others! Affectation of candour is common enough; one meets with it
-everywhere. But to be candid without ostentation or design,--to take the
-good of everybodyâ€™s character and make it still better, and say nothing
-of the bad,--belongs to you alone. And so, you like this manâ€™s sisters,
-too, do you? Their manners are not equal to his.â€
-
-â€œCertainly not, at first; but they are very pleasing women when you
-converse with them. Miss Bingley is to live with her brother, and keep
-his house; and I am much mistaken if we shall not find a very charming
-neighbour in her.â€
-
-Elizabeth listened in silence, but was not convinced: their behaviour at
-the assembly had not been calculated to please in general; and with more
-quickness of observation and less pliancy of temper than her sister, and
-with a judgment, too, unassailed by any attention to herself, she was
-very little disposed to approve them. They were, in fact, very fine
-ladies; not deficient in good-humour when they were pleased, nor in the
-power of being agreeable where they chose it; but proud and conceited.
-They were rather handsome; had been educated in one of the first private
-seminaries in town; had a fortune of twenty thousand pounds; were in the
-habit of spending more than they ought, and of associating with people
-of rank; and were, therefore, in every respect entitled to think well of
-themselves and meanly of others. They were of a respectable family in
-the north of England; a circumstance more deeply impressed on their
-memories than that their brotherâ€™s fortune and their own had been
-acquired by trade.
-
-Mr. Bingley inherited property to the amount of nearly a hundred
-thousand pounds from his father, who had intended to purchase an estate,
-but did not live to do it. Mr. Bingley intended it likewise, and
-sometimes made choice of his county; but, as he was now provided with a
-good house and the liberty of a manor, it was doubtful to many of those
-who best knew the easiness of his temper, whether he might not spend the
-remainder of his days at Netherfield, and leave the next generation to
-purchase.
-
-His sisters were very anxious for his having an estate of his own; but
-though he was now established only as a tenant, Miss Bingley was by no
-means unwilling to preside at his table; nor was Mrs. Hurst, who had
-married a man of more fashion than fortune, less disposed to consider
-his house as her home when it suited her. Mr. Bingley had not been of
-age two years when he was tempted, by an accidental recommendation, to
-look at Netherfield House. He did look at it, and into it, for half an
-hour; was pleased with the situation and the principal rooms, satisfied
-with what the owner said in its praise, and took it immediately.
-
-Between him and Darcy there was a very steady friendship, in spite of a
-great opposition of character. Bingley was endeared to Darcy by the
-easiness, openness, and ductility of his temper, though no disposition
-could offer a greater contrast to his own, and though with his own he
-never appeared dissatisfied. On the strength of Darcyâ€™s regard, Bingley
-had the firmest reliance, and of his judgment the highest opinion. In
-understanding, Darcy was the superior. Bingley was by no means
-deficient; but Darcy was clever. He was at the same time haughty,
-reserved, and fastidious; and his manners, though well bred, were not
-inviting. In that respect his friend had greatly the advantage. Bingley
-was sure of being liked wherever he appeared; Darcy was continually
-giving offence.
-
-The manner in which they spoke of the Meryton assembly was sufficiently
-characteristic. Bingley had never met with pleasanter people or prettier
-girls in his life; everybody had been most kind and attentive to him;
-there had been no formality, no stiffness; he had soon felt acquainted
-with all the room; and as to Miss Bennet, he could not conceive an angel
-more beautiful. Darcy, on the contrary, had seen a collection of people
-in whom there was little beauty and no fashion, for none of whom he had
-felt the smallest interest, and from none received either attention or
-pleasure. Miss Bennet he acknowledged to be pretty; but she smiled too
-much.
-
-Mrs. Hurst and her sister allowed it to be so; but still they admired
-her and liked her, and pronounced her to be a sweet girl, and one whom
-they should not object to know more of. Miss Bennet was therefore
-established as a sweet girl; and their brother felt authorized by such
-commendation to think of her as he chose.
-
-
-
-
-[Illustration: [_Copyright 1894 by George Allen._]]
-
-
-
-
-CHAPTER V.
-
-
-[Illustration]
-
-Within a short walk of Longbourn lived a family with whom the Bennets
-were particularly intimate. Sir William Lucas had been formerly in trade
-in Meryton, where he had made a tolerable fortune, and risen to the
-honour of knighthood by an address to the king during his mayoralty. The
-distinction had, perhaps, been felt too strongly. It had given him a
-disgust to his business and to his residence in a small market town;
-and, quitting them both, he had removed with his family to a house about
-a mile from Meryton, denominated from that period Lucas Lodge; where he
-could think with pleasure of his own importance, and, unshackled by
-business, occupy himself solely in being civil to all the world. For,
-though elated by his rank, it did not render him supercilious; on the
-contrary, he was all attention to everybody. By nature inoffensive,
-friendly, and obliging, his presentation at St. Jamesâ€™s had made him
-courteous.
-
-Lady Lucas was a very good kind of woman, not too clever to be a
-valuable neighbour to Mrs. Bennet. They had several children. The eldest
-of them, a sensible, intelligent young woman, about twenty-seven, was
-Elizabethâ€™s intimate friend.
-
-That the Miss Lucases and the Miss Bennets should meet to talk over a
-ball was absolutely necessary; and the morning after the assembly
-brought the former to Longbourn to hear and to communicate.
-
-â€œ_You_ began the evening well, Charlotte,â€ said Mrs. Bennet, with civil
-self-command, to Miss Lucas. â€œ_You_ were Mr. Bingleyâ€™s first choice.â€
-
-â€œYes; but he seemed to like his second better.â€
-
-â€œOh, you mean Jane, I suppose, because he danced with her twice. To be
-sure that _did_ seem as if he admired her--indeed, I rather believe he
-_did_--I heard something about it--but I hardly know what--something
-about Mr. Robinson.â€
-
-â€œPerhaps you mean what I overheard between him and Mr. Robinson: did not
-I mention it to you? Mr. Robinsonâ€™s asking him how he liked our Meryton
-assemblies, and whether he did not think there were a great many pretty
-women in the room, and _which_ he thought the prettiest? and his
-answering immediately to the last question, â€˜Oh, the eldest Miss Bennet,
-beyond a doubt: there cannot be two opinions on that point.â€™â€
-
-â€œUpon my word! Well, that was very decided, indeed--that does seem as
-if--but, however, it may all come to nothing, you know.â€
-
-â€œ_My_ overhearings were more to the purpose than _yours_, Eliza,â€ said
-Charlotte. â€œMr. Darcy is not so well worth listening to as his friend,
-is he? Poor Eliza! to be only just _tolerable_.â€
-
-â€œI beg you will not put it into Lizzyâ€™s head to be vexed by his
-ill-treatment, for he is such a disagreeable man that it would be quite
-a misfortune to be liked by him. Mrs. Long told me last night that he
-sat close to her for half an hour without once opening his lips.â€
-
-[Illustration: â€œWithout once opening his lipsâ€
-
-[_Copyright 1894 by George Allen._]]
-
-â€œAre you quite sure, maâ€™am? Is not there a little mistake?â€ said Jane.
-â€œI certainly saw Mr. Darcy speaking to her.â€
-
-â€œAy, because she asked him at last how he liked Netherfield, and he
-could not help answering her; but she said he seemed very angry at being
-spoke to.â€
-
-â€œMiss Bingley told me,â€ said Jane, â€œthat he never speaks much unless
-among his intimate acquaintance. With _them_ he is remarkably
-agreeable.â€
-
-â€œI do not believe a word of it, my dear. If he had been so very
-agreeable, he would have talked to Mrs. Long. But I can guess how it
-was; everybody says that he is eat up with pride, and I dare say he had
-heard somehow that Mrs. Long does not keep a carriage, and had to come
-to the ball in a hack chaise.â€
-
-â€œI do not mind his not talking to Mrs. Long,â€ said Miss Lucas, â€œbut I
-wish he had danced with Eliza.â€
-
-â€œAnother time, Lizzy,â€ said her mother, â€œI would not dance with _him_,
-if I were you.â€
-
-â€œI believe, maâ€™am, I may safely promise you _never_ to dance with him.â€
-
-â€œHis pride,â€ said Miss Lucas, â€œdoes not offend _me_ so much as pride
-often does, because there is an excuse for it. One cannot wonder that so
-very fine a young man, with family, fortune, everything in his favour,
-should think highly of himself. If I may so express it, he has a _right_
-to be proud.â€
-
-â€œThat is very true,â€ replied Elizabeth, â€œand I could easily forgive
-_his_ pride, if he had not mortified _mine_.â€
-
-â€œPride,â€ observed Mary, who piqued herself upon the solidity of her
-reflections, â€œis a very common failing, I believe. By all that I have
-ever read, I am convinced that it is very common indeed; that human
-nature is particularly prone to it, and that there are very few of us
-who do not cherish a feeling of self-complacency on the score of some
-quality or other, real or imaginary. Vanity and pride are different
-things, though the words are often used synonymously. A person may be
-proud without being vain. Pride relates more to our opinion of
-ourselves; vanity to what we would have others think of us.â€
-
-â€œIf I were as rich as Mr. Darcy,â€ cried a young Lucas, who came with his
-sisters, â€œI should not care how proud I was. I would keep a pack of
-foxhounds, and drink a bottle of wine every day.â€
-
-â€œThen you would drink a great deal more than you ought,â€ said Mrs.
-Bennet; â€œand if I were to see you at it, I should take away your bottle
-directly.â€
-
-The boy protested that she should not; she continued to declare that she
-would; and the argument ended only with the visit.
-
-[Illustration]
-
-
-
-
-[Illustration]
-
-
-
-
-CHAPTER VI.
-
-
-[Illustration]
-
-The ladies of Longbourn soon waited on those of Netherfield. The visit
-was returned in due form. Miss Bennetâ€™s pleasing manners grew on the
-good-will of Mrs. Hurst and Miss Bingley; and though the mother was
-found to be intolerable, and the younger sisters not worth speaking to,
-a wish of being better acquainted with _them_ was expressed towards the
-two eldest. By Jane this attention was received with the greatest
-pleasure; but Elizabeth still saw superciliousness in their treatment of
-everybody, hardly excepting even her sister, and could not like them;
-though their kindness to Jane, such as it was, had a value, as arising,
-in all probability, from the influence of their brotherâ€™s admiration. It
-was generally evident, whenever they met, that he _did_ admire her; and
-to _her_ it was equally evident that Jane was yielding to the preference
-which she had begun to entertain for him from the first, and was in a
-way to be very much in love; but she considered with pleasure that it
-was not likely to be discovered by the world in general, since Jane
-united with great strength of feeling, a composure of temper and an
-uniform cheerfulness of manner, which would guard her from the
-suspicions of the impertinent. She mentioned this to her friend, Miss
-Lucas.
-
-â€œIt may, perhaps, be pleasant,â€ replied Charlotte, â€œto be able to impose
-on the public in such a case; but it is sometimes a disadvantage to be
-so very guarded. If a woman conceals her affection with the same skill
-from the object of it, she may lose the opportunity of fixing him; and
-it will then be but poor consolation to believe the world equally in the
-dark. There is so much of gratitude or vanity in almost every
-attachment, that it is not safe to leave any to itself. We can all
-_begin_ freely--a slight preference is natural enough; but there are
-very few of us who have heart enough to be really in love without
-encouragement. In nine cases out of ten, a woman had better show _more_
-affection than she feels. Bingley likes your sister undoubtedly; but he
-may never do more than like her, if she does not help him on.â€
-
-â€œBut she does help him on, as much as her nature will allow. If _I_ can
-perceive her regard for him, he must be a simpleton indeed not to
-discover it too.â€
-
-â€œRemember, Eliza, that he does not know Janeâ€™s disposition as you do.â€
-
-â€œBut if a woman is partial to a man, and does not endeavor to conceal
-it, he must find it out.â€
-
-â€œPerhaps he must, if he sees enough of her. But though Bingley and Jane
-meet tolerably often, it is never for many hours together; and as they
-always see each other in large mixed parties, it is impossible that
-every moment should be employed in conversing together. Jane should
-therefore make the most of every half hour in which she can command his
-attention. When she is secure of him, there will be leisure for falling
-in love as much as she chooses.â€
-
-â€œYour plan is a good one,â€ replied Elizabeth, â€œwhere nothing is in
-question but the desire of being well married; and if I were determined
-to get a rich husband, or any husband, I dare say I should adopt it. But
-these are not Janeâ€™s feelings; she is not acting by design. As yet she
-cannot even be certain of the degree of her own regard, nor of its
-reasonableness. She has known him only a fortnight. She danced four
-dances with him at Meryton; she saw him one morning at his own house,
-and has since dined in company with him four times. This is not quite
-enough to make her understand his character.â€
-
-â€œNot as you represent it. Had she merely _dined_ with him, she might
-only have discovered whether he had a good appetite; but you must
-remember that four evenings have been also spent together--and four
-evenings may do a great deal.â€
-
-â€œYes: these four evenings have enabled them to ascertain that they both
-like Vingt-un better than Commerce, but with respect to any other
-leading characteristic, I do not imagine that much has been unfolded.â€
-
-â€œWell,â€ said Charlotte, â€œI wish Jane success with all my heart; and if
-she were married to him to-morrow, I should think she had as good a
-chance of happiness as if she were to be studying his character for a
-twelvemonth. Happiness in marriage is entirely a matter of chance. If
-the dispositions of the parties are ever so well known to each other, or
-ever so similar beforehand, it does not advance their felicity in the
-least. They always continue to grow sufficiently unlike afterwards to
-have their share of vexation; and it is better to know as little as
-possible of the defects of the person with whom you are to pass your
-life.â€
-
-â€œYou make me laugh, Charlotte; but it is not sound. You know it is not
-sound, and that you would never act in this way yourself.â€
-
-Occupied in observing Mr. Bingleyâ€™s attention to her sister, Elizabeth
-was far from suspecting that she was herself becoming an object of some
-interest in the eyes of his friend. Mr. Darcy had at first scarcely
-allowed her to be pretty: he had looked at her without admiration at the
-ball; and when they next met, he looked at her only to criticise. But no
-sooner had he made it clear to himself and his friends that she had
-hardly a good feature in her face, than he began to find it was rendered
-uncommonly intelligent by the beautiful expression of her dark eyes. To
-this discovery succeeded some others equally mortifying. Though he had
-detected with a critical eye more than one failure of perfect symmetry
-in her form, he was forced to acknowledge her figure to be light and
-pleasing; and in spite of his asserting that her manners were not those
-of the fashionable world, he was caught by their easy playfulness. Of
-this she was perfectly unaware: to her he was only the man who made
-himself agreeable nowhere, and who had not thought her handsome enough
-to dance with.
-
-He began to wish to know more of her; and, as a step towards conversing
-with her himself, attended to her conversation with others. His doing so
-drew her notice. It was at Sir William Lucasâ€™s, where a large party were
-assembled.
-
-â€œWhat does Mr. Darcy mean,â€ said she to Charlotte, â€œby listening to my
-conversation with Colonel Forster?â€
-
-â€œThat is a question which Mr. Darcy only can answer.â€
-
-â€œBut if he does it any more, I shall certainly let him know that I see
-what he is about. He has a very satirical eye, and if I do not begin by
-being impertinent myself, I shall soon grow afraid of him.â€
-
-[Illustration: â€œThe entreaties of severalâ€ [_Copyright 1894 by George
-Allen._]]
-
-On his approaching them soon afterwards, though without seeming to have
-any intention of speaking, Miss Lucas defied her friend to mention such
-a subject to him, which immediately provoking Elizabeth to do it, she
-turned to him and said,--
-
-â€œDid not you think, Mr. Darcy, that I expressed myself uncommonly well
-just now, when I was teasing Colonel Forster to give us a ball at
-Meryton?â€
-
-â€œWith great energy; but it is a subject which always makes a lady
-energetic.â€
-
-â€œYou are severe on us.â€
-
-â€œIt will be _her_ turn soon to be teased,â€ said Miss Lucas. â€œI am going
-to open the instrument, Eliza, and you know what follows.â€
-
-â€œYou are a very strange creature by way of a friend!--always wanting me
-to play and sing before anybody and everybody! If my vanity had taken a
-musical turn, you would have been invaluable; but as it is, I would
-really rather not sit down before those who must be in the habit of
-hearing the very best performers.â€ On Miss Lucasâ€™s persevering, however,
-she added, â€œVery well; if it must be so, it must.â€ And gravely glancing
-at Mr. Darcy, â€œThere is a very fine old saying, which everybody here is
-of course familiar with--â€˜Keep your breath to cool your porridge,â€™--and
-I shall keep mine to swell my song.â€
-
-Her performance was pleasing, though by no means capital. After a song
-or two, and before she could reply to the entreaties of several that she
-would sing again, she was eagerly succeeded at the instrument by her
-sister Mary, who having, in consequence of being the only plain one in
-the family, worked hard for knowledge and accomplishments, was always
-impatient for display.
-
-Mary had neither genius nor taste; and though vanity had given her
-application, it had given her likewise a pedantic air and conceited
-manner, which would have injured a higher degree of excellence than she
-had reached. Elizabeth, easy and unaffected, had been listened to with
-much more pleasure, though not playing half so well; and Mary, at the
-end of a long concerto, was glad to purchase praise and gratitude by
-Scotch and Irish airs, at the request of her younger sisters, who with
-some of the Lucases, and two or three officers, joined eagerly in
-dancing at one end of the room.
-
-Mr. Darcy stood near them in silent indignation at such a mode of
-passing the evening, to the exclusion of all conversation, and was too
-much engrossed by his own thoughts to perceive that Sir William Lucas
-was his neighbour, till Sir William thus began:--
-
-â€œWhat a charming amusement for young people this is, Mr. Darcy! There is
-nothing like dancing, after all. I consider it as one of the first
-refinements of polished societies.â€
-
-â€œCertainly, sir; and it has the advantage also of being in vogue amongst
-the less polished societies of the world: every savage can dance.â€
-
-Sir William only smiled. â€œYour friend performs delightfully,â€ he
-continued, after a pause, on seeing Bingley join the group; â€œand I doubt
-not that you are an adept in the science yourself, Mr. Darcy.â€
-
-â€œYou saw me dance at Meryton, I believe, sir.â€
-
-â€œYes, indeed, and received no inconsiderable pleasure from the sight. Do
-you often dance at St. Jamesâ€™s?â€
-
-â€œNever, sir.â€
-
-â€œDo you not think it would be a proper compliment to the place?â€
-
-â€œIt is a compliment which I never pay to any place if I can avoid it.â€
-
-â€œYou have a house in town, I conclude?â€
-
-Mr. Darcy bowed.
-
-â€œI had once some thoughts of fixing in town myself, for I am fond of
-superior society; but I did not feel quite certain that the air of
-London would agree with Lady Lucas.â€
-
-He paused in hopes of an answer: but his companion was not disposed to
-make any; and Elizabeth at that instant moving towards them, he was
-struck with the notion of doing a very gallant thing, and called out to
-her,--
-
-â€œMy dear Miss Eliza, why are not you dancing? Mr. Darcy, you must allow
-me to present this young lady to you as a very desirable partner. You
-cannot refuse to dance, I am sure, when so much beauty is before you.â€
-And, taking her hand, he would have given it to Mr. Darcy, who, though
-extremely surprised, was not unwilling to receive it, when she instantly
-drew back, and said with some discomposure to Sir William,--
-
-â€œIndeed, sir, I have not the least intention of dancing. I entreat you
-not to suppose that I moved this way in order to beg for a partner.â€
-
-Mr. Darcy, with grave propriety, requested to be allowed the honour of
-her hand, but in vain. Elizabeth was determined; nor did Sir William at
-all shake her purpose by his attempt at persuasion.
-
-â€œYou excel so much in the dance, Miss Eliza, that it is cruel to deny me
-the happiness of seeing you; and though this gentleman dislikes the
-amusement in general, he can have no objection, I am sure, to oblige us
-for one half hour.â€
-
-â€œMr. Darcy is all politeness,â€ said Elizabeth, smiling.
-
-â€œHe is, indeed: but considering the inducement, my dear Miss Eliza, we
-cannot wonder at his complaisance; for who would object to such a
-partner?â€
-
-Elizabeth looked archly, and turned away. Her resistance had not injured
-her with the gentleman, and he was thinking of her with some
-complacency, when thus accosted by Miss Bingley,--
-
-â€œI can guess the subject of your reverie.â€
-
-â€œI should imagine not.â€
-
-â€œYou are considering how insupportable it would be to pass many
-evenings in this manner,--in such society; and, indeed, I am quite of
-your opinion. I was never more annoyed! The insipidity, and yet the
-noise--the nothingness, and yet the self-importance, of all these
-people! What would I give to hear your strictures on them!â€
-
-â€œYour conjecture is totally wrong, I assure you. My mind was more
-agreeably engaged. I have been meditating on the very great pleasure
-which a pair of fine eyes in the face of a pretty woman can bestow.â€
-
-Miss Bingley immediately fixed her eyes on his face, and desired he
-would tell her what lady had the credit of inspiring such reflections.
-Mr. Darcy replied, with great intrepidity,--
-
-â€œMiss Elizabeth Bennet.â€
-
-â€œMiss Elizabeth Bennet!â€ repeated Miss Bingley. â€œI am all astonishment.
-How long has she been such a favourite? and pray when am I to wish you
-joy?â€
-
-â€œThat is exactly the question which I expected you to ask. A ladyâ€™s
-imagination is very rapid; it jumps from admiration to love, from love
-to matrimony, in a moment. I knew you would be wishing me joy.â€
-
-â€œNay, if you are so serious about it, I shall consider the matter as
-absolutely settled. You will have a charming mother-in-law, indeed, and
-of course she will be always at Pemberley with you.â€
-
-He listened to her with perfect indifference, while she chose to
-entertain herself in this manner; and as his composure convinced her
-that all was safe, her wit flowed along.
-
-
-
-
-[Illustration:
-
-     â€œA note for Miss Bennetâ€
-
-[_Copyright 1894 by George Allen._]]
-
-
-
-
-CHAPTER VII.
-
-
-[Illustration]
-
-Mr. Bennetâ€™s property consisted almost entirely in an estate of two
-thousand a year, which, unfortunately for his daughters, was entailed,
-in default of heirs male, on a distant relation; and their motherâ€™s
-fortune, though ample for her situation in life, could but ill supply
-the deficiency of his. Her father had been an attorney in Meryton, and
-had left her four thousand pounds.
-
-She had a sister married to a Mr. Philips, who had been a clerk to their
-father and succeeded him in the business, and a brother settled in
-London in a respectable line of trade.
-
-The village of Longbourn was only one mile from Meryton; a most
-convenient distance for the young ladies, who were usually tempted
-thither three or four times a week, to pay their duty to their aunt, and
-to a millinerâ€™s shop just over the way. The two youngest of the family,
-Catherine and Lydia, were particularly frequent in these attentions:
-their minds were more vacant than their sistersâ€™, and when nothing
-better offered, a walk to Meryton was necessary to amuse their morning
-hours and furnish conversation for the evening; and, however bare of
-news the country in general might be, they always contrived to learn
-some from their aunt. At present, indeed, they were well supplied both
-with news and happiness by the recent arrival of a militia regiment in
-the neighbourhood; it was to remain the whole winter, and Meryton was
-the head-quarters.
-
-Their visits to Mrs. Philips were now productive of the most interesting
-intelligence. Every day added something to their knowledge of the
-officersâ€™ names and connections. Their lodgings were not long a secret,
-and at length they began to know the officers themselves. Mr. Philips
-visited them all, and this opened to his nieces a source of felicity
-unknown before. They could talk of nothing but officers; and Mr.
-Bingleyâ€™s large fortune, the mention of which gave animation to their
-mother, was worthless in their eyes when opposed to the regimentals of
-an ensign.
-
-After listening one morning to their effusions on this subject, Mr.
-Bennet coolly observed,--
-
-â€œFrom all that I can collect by your manner of talking, you must be two
-of the silliest girls in the country. I have suspected it some time, but
-I am now convinced.â€
-
-Catherine was disconcerted, and made no answer; but Lydia, with perfect
-indifference, continued to express her admiration of Captain Carter, and
-her hope of seeing him in the course of the day, as he was going the
-next morning to London.
-
-â€œI am astonished, my dear,â€ said Mrs. Bennet, â€œthat you should be so
-ready to think your own children silly. If I wished to think slightingly
-of anybodyâ€™s children, it should not be of my own, however.â€
-
-â€œIf my children are silly, I must hope to be always sensible of it.â€
-
-â€œYes; but as it happens, they are all of them very clever.â€
-
-â€œThis is the only point, I flatter myself, on which we do not agree. I
-had hoped that our sentiments coincided in every particular, but I must
-so far differ from you as to think our two youngest daughters uncommonly
-foolish.â€
-
-â€œMy dear Mr. Bennet, you must not expect such girls to have the sense of
-their father and mother. When they get to our age, I dare say they will
-not think about officers any more than we do. I remember the time when I
-liked a red coat myself very well--and, indeed, so I do still at my
-heart; and if a smart young colonel, with five or six thousand a year,
-should want one of my girls, I shall not say nay to him; and I thought
-Colonel Forster looked very becoming the other night at Sir Williamâ€™s in
-his regimentals.â€
-
-â€œMamma,â€ cried Lydia, â€œmy aunt says that Colonel Forster and Captain
-Carter do not go so often to Miss Watsonâ€™s as they did when they first
-came; she sees them now very often standing in Clarkeâ€™s library.â€
-
-Mrs. Bennet was prevented replying by the entrance of the footman with a
-note for Miss Bennet; it came from Netherfield, and the servant waited
-for an answer. Mrs. Bennetâ€™s eyes sparkled with pleasure, and she was
-eagerly calling out, while her daughter read,--
-
-â€œWell, Jane, who is it from? What is it about? What does he say? Well,
-Jane, make haste and tell us; make haste, my love.â€
-
-â€œIt is from Miss Bingley,â€ said Jane, and then read it aloud.
-
-     /* NIND â€œMy dear friend, */
-
-     â€œIf you are not so compassionate as to dine to-day with Louisa and
-     me, we shall be in danger of hating each other for the rest of our
-     lives; for a whole dayâ€™s _tÃªte-Ã -tÃªte_ between two women can never
-     end without a quarrel. Come as soon as you can on the receipt of
-     this. My brother and the gentlemen are to dine with the officers.
-     Yours ever,
-
-â€œCAROLINE BINGLEY.â€
-
-â€œWith the officers!â€ cried Lydia: â€œI wonder my aunt did not tell us of
-_that_.â€
-
-â€œDining out,â€ said Mrs. Bennet; â€œthat is very unlucky.â€
-
-â€œCan I have the carriage?â€ said Jane.
-
-â€œNo, my dear, you had better go on horseback, because it seems likely to
-rain; and then you must stay all night.â€
-
-â€œThat would be a good scheme,â€ said Elizabeth, â€œif you were sure that
-they would not offer to send her home.â€
-
-â€œOh, but the gentlemen will have Mr. Bingleyâ€™s chaise to go to Meryton;
-and the Hursts have no horses to theirs.â€
-
-â€œI had much rather go in the coach.â€
-
-â€œBut, my dear, your father cannot spare the horses, I am sure. They are
-wanted in the farm, Mr. Bennet, are not they?â€
-
-[Illustration: Cheerful prognostics]
-
-â€œThey are wanted in the farm much oftener than I can get them.â€
-
-â€œBut if you have got them to-day,â€ said Elizabeth, â€œmy motherâ€™s purpose
-will be answered.â€
-
-She did at last extort from her father an acknowledgment that the horses
-were engaged; Jane was therefore obliged to go on horseback, and her
-mother attended her to the door with many cheerful prognostics of a bad
-day. Her hopes were answered; Jane had not been gone long before it
-rained hard. Her sisters were uneasy for her, but her mother was
-delighted. The rain continued the whole evening without intermission;
-Jane certainly could not come back.
-
-â€œThis was a lucky idea of mine, indeed!â€ said Mrs. Bennet, more than
-once, as if the credit of making it rain were all her own. Till the next
-morning, however, she was not aware of all the felicity of her
-contrivance. Breakfast was scarcely over when a servant from Netherfield
-brought the following note for Elizabeth:--
-
-     /* NIND â€œMy dearest Lizzie, */
-
-     â€œI find myself very unwell this morning, which, I suppose, is to be
-     imputed to my getting wet through yesterday. My kind friends will
-     not hear of my returning home till I am better. They insist also on
-     my seeing Mr. Jones--therefore do not be alarmed if you should hear
-     of his having been to me--and, excepting a sore throat and a
-     headache, there is not much the matter with me.
-
-â€œYours, etc.â€
-
-â€œWell, my dear,â€ said Mr. Bennet, when Elizabeth had read the note
-aloud, â€œif your daughter should have a dangerous fit of illness--if she
-should die--it would be a comfort to know that it was all in pursuit of
-Mr. Bingley, and under your orders.â€
-
-â€œOh, I am not at all afraid of her dying. People do not die of little
-trifling colds. She will be taken good care of. As long as she stays
-there, it is all very well. I would go and see her if I could have the
-carriage.â€
-
-Elizabeth, feeling really anxious, determined to go to her, though the
-carriage was not to be had: and as she was no horsewoman, walking was
-her only alternative. She declared her resolution.
-
-â€œHow can you be so silly,â€ cried her mother, â€œas to think of such a
-thing, in all this dirt! You will not be fit to be seen when you get
-there.â€
-
-â€œI shall be very fit to see Jane--which is all I want.â€
-
-â€œIs this a hint to me, Lizzy,â€ said her father, â€œto send for the
-horses?â€
-
-â€œNo, indeed. I do not wish to avoid the walk. The distance is nothing,
-when one has a motive; only three miles. I shall be back by dinner.â€
-
-â€œI admire the activity of your benevolence,â€ observed Mary, â€œbut every
-impulse of feeling should be guided by reason; and, in my opinion,
-exertion should always be in proportion to what is required.â€
-
-â€œWe will go as far as Meryton with you,â€ said Catherine and Lydia.
-Elizabeth accepted their company, and the three young ladies set off
-together.
-
-â€œIf we make haste,â€ said Lydia, as they walked along, â€œperhaps we may
-see something of Captain Carter, before he goes.â€
-
-In Meryton they parted: the two youngest repaired to the lodgings of one
-of the officersâ€™ wives, and Elizabeth continued her walk alone, crossing
-field after field at a quick pace, jumping over stiles and springing
-over puddles, with impatient activity, and finding herself at last
-within view of the house, with weary ancles, dirty stockings, and a face
-glowing with the warmth of exercise.
-
-She was shown into the breakfast parlour, where all but Jane were
-assembled, and where her appearance created a great deal of surprise.
-That she should have walked three miles so early in the day in such
-dirty weather, and by herself, was almost incredible to Mrs. Hurst and
-Miss Bingley; and Elizabeth was convinced that they held her in contempt
-for it. She was received, however, very politely by them; and in their
-brotherâ€™s manners there was something better than politeness--there was
-good-humour and kindness. Mr. Darcy said very little, and Mr. Hurst
-nothing at all. The former was divided between admiration of the
-brilliancy which exercise had given to her complexion and doubt as to
-the occasionâ€™s justifying her coming so far alone. The latter was
-thinking only of his breakfast.
-
-Her inquiries after her sister were not very favourably answered. Miss
-Bennet had slept ill, and though up, was very feverish, and not well
-enough to leave her room. Elizabeth was glad to be taken to her
-immediately; and Jane, who had only been withheld by the fear of giving
-alarm or inconvenience, from expressing in her note how much she longed
-for such a visit, was delighted at her entrance. She was not equal,
-however, to much conversation; and when Miss Bingley left them together,
-could attempt little beside expressions of gratitude for the
-extraordinary kindness she was treated with. Elizabeth silently attended
-her.
-
-When breakfast was over, they were joined by the sisters; and Elizabeth
-began to like them herself, when she saw how much affection and
-solicitude they showed for Jane. The apothecary came; and having
-examined his patient, said, as might be supposed, that she had caught a
-violent cold, and that they must endeavour to get the better of it;
-advised her to return to bed, and promised her some draughts. The advice
-was followed readily, for the feverish symptoms increased, and her head
-ached acutely. Elizabeth did not quit her room for a moment, nor were
-the other ladies often absent; the gentlemen being out, they had in fact
-nothing to do elsewhere.
-
-When the clock struck three, Elizabeth felt that she must go, and very
-unwillingly said so. Miss Bingley offered her the carriage, and she only
-wanted a little pressing to accept it, when Jane testified such concern
-at parting with her that Miss Bingley was obliged to convert the offer
-of the chaise into an invitation to remain at Netherfield for the
-present. Elizabeth most thankfully consented, and a servant was
-despatched to Longbourn, to acquaint the family with her stay, and bring
-back a supply of clothes.
-
-[Illustration:
-
-â€œThe Apothecary cameâ€
-]
-
-
-
-
-[Illustration:
-
-â€œcovering a screenâ€
-]
-
-
-
-
-CHAPTER VIII.
-
-
-[Illustration]
-
-At five oâ€™clock the two ladies retired to dress, and at half-past six
-Elizabeth was summoned to dinner. To the civil inquiries which then
-poured in, and amongst which she had the pleasure of distinguishing the
-much superior solicitude of Mr. Bingley, she could not make a very
-favourable answer. Jane was by no means better. The sisters, on hearing
-this, repeated three or four times how much they were grieved, how
-shocking it was to have a bad cold, and how excessively they disliked
-being ill themselves; and then thought no more of the matter: and their
-indifference towards Jane, when not immediately before them, restored
-Elizabeth to the enjoyment of all her original dislike.
-
-Their brother, indeed, was the only one of the party whom she could
-regard with any complacency. His anxiety for Jane was evident, and his
-attentions to herself most pleasing; and they prevented her feeling
-herself so much an intruder as she believed she was considered by the
-others. She had very little notice from any but him. Miss Bingley was
-engrossed by Mr. Darcy, her sister scarcely less so; and as for Mr.
-Hurst, by whom Elizabeth sat, he was an indolent man, who lived only to
-eat, drink, and play at cards, who, when he found her prefer a plain
-dish to a ragout, had nothing to say to her.
-
-When dinner was over, she returned directly to Jane, and Miss Bingley
-began abusing her as soon as she was out of the room. Her manners were
-pronounced to be very bad indeed,--a mixture of pride and impertinence:
-she had no conversation, no style, no taste, no beauty. Mrs. Hurst
-thought the same, and added,--
-
-â€œShe has nothing, in short, to recommend her, but being an excellent
-walker. I shall never forget her appearance this morning. She really
-looked almost wild.â€
-
-â€œShe did indeed, Louisa. I could hardly keep my countenance. Very
-nonsensical to come at all! Why must _she_ be scampering about the
-country, because her sister had a cold? Her hair so untidy, so blowzy!â€
-
-â€œYes, and her petticoat; I hope you saw her petticoat, six inches deep
-in mud, I am absolutely certain, and the gown which had been let down to
-hide it not doing its office.â€
-
-â€œYour picture may be very exact, Louisa,â€ said Bingley; â€œbut this was
-all lost upon me. I thought Miss Elizabeth Bennet looked remarkably well
-when she came into the room this morning. Her dirty petticoat quite
-escaped my notice.â€
-
-â€œ_You_ observed it, Mr. Darcy, I am sure,â€ said Miss Bingley; â€œand I am
-inclined to think that you would not wish to see _your sister_ make such
-an exhibition.â€
-
-â€œCertainly not.â€
-
-â€œTo walk three miles, or four miles, or five miles, or whatever it is,
-above her ancles in dirt, and alone, quite alone! what could she mean by
-it? It seems to me to show an abominable sort of conceited independence,
-a most country-town indifference to decorum.â€
-
-â€œIt shows an affection for her sister that is very pleasing,â€ said
-Bingley.
-
-â€œI am afraid, Mr. Darcy,â€ observed Miss Bingley, in a half whisper,
-â€œthat this adventure has rather affected your admiration of her fine
-eyes.â€
-
-â€œNot at all,â€ he replied: â€œthey were brightened by the exercise.â€ A
-short pause followed this speech, and Mrs. Hurst began again,--
-
-â€œI have an excessive regard for Jane Bennet,--she is really a very sweet
-girl,--and I wish with all my heart she were well settled. But with such
-a father and mother, and such low connections, I am afraid there is no
-chance of it.â€
-
-â€œI think I have heard you say that their uncle is an attorney in
-Meryton?â€
-
-â€œYes; and they have another, who lives somewhere near Cheapside.â€
-
-â€œThat is capital,â€ added her sister; and they both laughed heartily.
-
-â€œIf they had uncles enough to fill _all_ Cheapside,â€ cried Bingley, â€œit
-would not make them one jot less agreeable.â€
-
-â€œBut it must very materially lessen their chance of marrying men of any
-consideration in the world,â€ replied Darcy.
-
-To this speech Bingley made no answer; but his sisters gave it their
-hearty assent, and indulged their mirth for some time at the expense of
-their dear friendâ€™s vulgar relations.
-
-With a renewal of tenderness, however, they repaired to her room on
-leaving the dining-parlour, and sat with her till summoned to coffee.
-She was still very poorly, and Elizabeth would not quit her at all, till
-late in the evening, when she had the comfort of seeing her asleep, and
-when it appeared to her rather right than pleasant that she should go
-down stairs herself. On entering the drawing-room, she found the whole
-party at loo, and was immediately invited to join them; but suspecting
-them to be playing high, she declined it, and making her sister the
-excuse, said she would amuse herself, for the short time she could stay
-below, with a book. Mr. Hurst looked at her with astonishment.
-
-â€œDo you prefer reading to cards?â€ said he; â€œthat is rather singular.â€
-
-â€œMiss Eliza Bennet,â€ said Miss Bingley, â€œdespises cards. She is a great
-reader, and has no pleasure in anything else.â€
-
-â€œI deserve neither such praise nor such censure,â€ cried Elizabeth; â€œI
-am _not_ a great reader, and I have pleasure in many things.â€
-
-â€œIn nursing your sister I am sure you have pleasure,â€ said Bingley; â€œand
-I hope it will soon be increased by seeing her quite well.â€
-
-Elizabeth thanked him from her heart, and then walked towards a table
-where a few books were lying. He immediately offered to fetch her
-others; all that his library afforded.
-
-â€œAnd I wish my collection were larger for your benefit and my own
-credit; but I am an idle fellow; and though I have not many, I have more
-than I ever looked into.â€
-
-Elizabeth assured him that she could suit herself perfectly with those
-in the room.
-
-â€œI am astonished,â€ said Miss Bingley, â€œthat my father should have left
-so small a collection of books. What a delightful library you have at
-Pemberley, Mr. Darcy!â€
-
-â€œIt ought to be good,â€ he replied: â€œit has been the work of many
-generations.â€
-
-â€œAnd then you have added so much to it yourself--you are always buying
-books.â€
-
-â€œI cannot comprehend the neglect of a family library in such days as
-these.â€
-
-â€œNeglect! I am sure you neglect nothing that can add to the beauties of
-that noble place. Charles, when you build _your_ house, I wish it may be
-half as delightful as Pemberley.â€
-
-â€œI wish it may.â€
-
-â€œBut I would really advise you to make your purchase in that
-neighbourhood, and take Pemberley for a kind of model. There is not a
-finer county in England than Derbyshire.â€
-
-â€œWith all my heart: I will buy Pemberley itself, if Darcy will sell it.â€
-
-â€œI am talking of possibilities, Charles.â€
-
-â€œUpon my word, Caroline, I should think it more possible to get
-Pemberley by purchase than by imitation.â€
-
-Elizabeth was so much caught by what passed, as to leave her very little
-attention for her book; and, soon laying it wholly aside, she drew near
-the card-table, and stationed herself between Mr. Bingley and his eldest
-sister, to observe the game.
-
-â€œIs Miss Darcy much grown since the spring?â€ said Miss Bingley: â€œwill
-she be as tall as I am?â€
-
-â€œI think she will. She is now about Miss Elizabeth Bennetâ€™s height, or
-rather taller.â€
-
-â€œHow I long to see her again! I never met with anybody who delighted me
-so much. Such a countenance, such manners, and so extremely accomplished
-for her age! Her performance on the pianoforte is exquisite.â€
-
-â€œIt is amazing to me,â€ said Bingley, â€œhow young ladies can have patience
-to be so very accomplished as they all are.â€
-
-â€œAll young ladies accomplished! My dear Charles, what do you mean?â€
-
-â€œYes, all of them, I think. They all paint tables, cover screens, and
-net purses. I scarcely know any one who cannot do all this; and I am
-sure I never heard a young lady spoken of for the first time, without
-being informed that she was very accomplished.â€
-
-â€œYour list of the common extent of accomplishments,â€ said Darcy, â€œhas
-too much truth. The word is applied to many a woman who deserves it no
-otherwise than by netting a purse or covering a screen; but I am very
-far from agreeing with you in your estimation of ladies in general. I
-cannot boast of knowing more than half-a-dozen in the whole range of my
-acquaintance that are really accomplished.â€
-
-â€œNor I, I am sure,â€ said Miss Bingley.
-
-â€œThen,â€ observed Elizabeth, â€œyou must comprehend a great deal in your
-idea of an accomplished woman.â€
-
-â€œYes; I do comprehend a great deal in it.â€
-
-â€œOh, certainly,â€ cried his faithful assistant, â€œno one can be really
-esteemed accomplished who does not greatly surpass what is usually met
-with. A woman must have a thorough knowledge of music, singing, drawing,
-dancing, and the modern languages, to deserve the word; and, besides all
-this, she must possess a certain something in her air and manner of
-walking, the tone of her voice, her address and expressions, or the word
-will be but half deserved.â€
-
-â€œAll this she must possess,â€ added Darcy; â€œand to all she must yet add
-something more substantial in the improvement of her mind by extensive
-reading.â€
-
-â€œI am no longer surprised at your knowing _only_ six accomplished women.
-I rather wonder now at your knowing _any_.â€
-
-â€œAre you so severe upon your own sex as to doubt the possibility of all
-this?â€
-
-â€œ_I_ never saw such a woman. _I_ never saw such capacity, and taste, and
-application, and elegance, as you describe, united.â€
-
-Mrs. Hurst and Miss Bingley both cried out against the injustice of her
-implied doubt, and were both protesting that they knew many women who
-answered this description, when Mr. Hurst called them to order, with
-bitter complaints of their inattention to what was going forward. As all
-conversation was thereby at an end, Elizabeth soon afterwards left the
-room.
-
-â€œEliza Bennet,â€ said Miss Bingley, when the door was closed on her, â€œis
-one of those young ladies who seek to recommend themselves to the other
-sex by undervaluing their own; and with many men, I daresay, it
-succeeds; but, in my opinion, it is a paltry device, a very mean art.â€
-
-â€œUndoubtedly,â€ replied Darcy, to whom this remark was chiefly addressed,
-â€œthere is meanness in _all_ the arts which ladies sometimes condescend
-to employ for captivation. Whatever bears affinity to cunning is
-despicable.â€
-
-Miss Bingley was not so entirely satisfied with this reply as to
-continue the subject.
-
-Elizabeth joined them again only to say that her sister was worse, and
-that she could not leave her. Bingley urged Mr. Jonesâ€™s being sent for
-immediately; while his sisters, convinced that no country advice could
-be of any service, recommended an express to town for one of the most
-eminent physicians. This she would not hear of; but she was not so
-unwilling to comply with their brotherâ€™s proposal; and it was settled
-that Mr. Jones should be sent for early in the morning, if Miss Bennet
-were not decidedly better. Bingley was quite uncomfortable; his sisters
-declared that they were miserable. They solaced their wretchedness,
-however, by duets after supper; while he could find no better relief to
-his feelings than by giving his housekeeper directions that every
-possible attention might be paid to the sick lady and her sister.
-
-
-
-
-[Illustration:
-
-M^{rs} Bennet and her two youngest girls
-
-[_Copyright 1894 by George Allen._]]
-
-
-
-
-CHAPTER IX.
-
-
-[Illustration]
-
-Elizabeth passed the chief of the night in her sisterâ€™s room, and in the
-morning had the pleasure of being able to send a tolerable answer to the
-inquiries which she very early received from Mr. Bingley by a housemaid,
-and some time afterwards from the two elegant ladies who waited on his
-sisters. In spite of this amendment, however, she requested to have a
-note sent to Longbourn, desiring her mother to visit Jane, and form her
-own judgment of her situation. The note was immediately despatched, and
-its contents as quickly complied with. Mrs. Bennet, accompanied by her
-two youngest girls, reached Netherfield soon after the family breakfast.
-
-Had she found Jane in any apparent danger, Mrs. Bennet would have been
-very miserable; but being satisfied on seeing her that her illness was
-not alarming, she had no wish of her recovering immediately, as her
-restoration to health would probably remove her from Netherfield. She
-would not listen, therefore, to her daughterâ€™s proposal of being carried
-home; neither did the apothecary, who arrived about the same time, think
-it at all advisable. After sitting a little while with Jane, on Miss
-Bingleyâ€™s appearance and invitation, the mother and three daughters all
-attended her into the breakfast parlour. Bingley met them with hopes
-that Mrs. Bennet had not found Miss Bennet worse than she expected.
-
-â€œIndeed I have, sir,â€ was her answer. â€œShe is a great deal too ill to be
-moved. Mr. Jones says we must not think of moving her. We must trespass
-a little longer on your kindness.â€
-
-â€œRemoved!â€ cried Bingley. â€œIt must not be thought of. My sister, I am
-sure, will not hear of her removal.â€
-
-â€œYou may depend upon it, madam,â€ said Miss Bingley, with cold civility,
-â€œthat Miss Bennet shall receive every possible attention while she
-remains with us.â€
-
-Mrs. Bennet was profuse in her acknowledgments.
-
-â€œI am sure,â€ she added, â€œif it was not for such good friends, I do not
-know what would become of her, for she is very ill indeed, and suffers a
-vast deal, though with the greatest patience in the world, which is
-always the way with her, for she has, without exception, the sweetest
-temper I ever met with. I often tell my other girls they are nothing to
-_her_. You have a sweet room here, Mr. Bingley, and a charming prospect
-over that gravel walk. I do not know a place in the country that is
-equal to Netherfield. You will not think of quitting it in a hurry, I
-hope, though you have but a short lease.â€
-
-â€œWhatever I do is done in a hurry,â€ replied he; â€œand therefore if I
-should resolve to quit Netherfield, I should probably be off in five
-minutes. At present, however, I consider myself as quite fixed here.â€
-
-â€œThat is exactly what I should have supposed of you,â€ said Elizabeth.
-
-â€œYou begin to comprehend me, do you?â€ cried he, turning towards her.
-
-â€œOh yes--I understand you perfectly.â€
-
-â€œI wish I might take this for a compliment; but to be so easily seen
-through, I am afraid, is pitiful.â€
-
-â€œThat is as it happens. It does not necessarily follow that a deep,
-intricate character is more or less estimable than such a one as yours.â€
-
-â€œLizzy,â€ cried her mother, â€œremember where you are, and do not run on in
-the wild manner that you are suffered to do at home.â€
-
-â€œI did not know before,â€ continued Bingley, immediately, â€œthat you were
-a studier of character. It must be an amusing study.â€
-
-â€œYes; but intricate characters are the _most_ amusing. They have at
-least that advantage.â€
-
-â€œThe country,â€ said Darcy, â€œcan in general supply but few subjects for
-such a study. In a country neighbourhood you move in a very confined and
-unvarying society.â€
-
-â€œBut people themselves alter so much, that there is something new to be
-observed in them for ever.â€
-
-â€œYes, indeed,â€ cried Mrs. Bennet, offended by his manner of mentioning a
-country neighbourhood. â€œI assure you there is quite as much of _that_
-going on in the country as in town.â€
-
-Everybody was surprised; and Darcy, after looking at her for a moment,
-turned silently away. Mrs. Bennet, who fancied she had gained a complete
-victory over him, continued her triumph,--
-
-â€œI cannot see that London has any great advantage over the country, for
-my part, except the shops and public places. The country is a vast deal
-pleasanter, is not it, Mr. Bingley?â€
-
-â€œWhen I am in the country,â€ he replied, â€œI never wish to leave it; and
-when I am in town, it is pretty much the same. They have each their
-advantages, and I can be equally happy in either.â€
-
-â€œAy, that is because you have the right disposition. But that
-gentleman,â€ looking at Darcy, â€œseemed to think the country was nothing
-at all.â€
-
-â€œIndeed, mamma, you are mistaken,â€ said Elizabeth, blushing for her
-mother. â€œYou quite mistook Mr. Darcy. He only meant that there was not
-such a variety of people to be met with in the country as in town, which
-you must acknowledge to be true.â€
-
-â€œCertainly, my dear, nobody said there were; but as to not meeting with
-many people in this neighbourhood, I believe there are few
-neighbourhoods larger. I know we dine with four-and-twenty families.â€
-
-Nothing but concern for Elizabeth could enable Bingley to keep his
-countenance. His sister was less delicate, and directed her eye towards
-Mr. Darcy with a very expressive smile. Elizabeth, for the sake of
-saying something that might turn her motherâ€™s thoughts, now asked her if
-Charlotte Lucas had been at Longbourn since _her_ coming away.
-
-â€œYes, she called yesterday with her father. What an agreeable man Sir
-William is, Mr. Bingley--is not he? so much the man of fashion! so
-genteel and so easy! He has always something to say to everybody. _That_
-is my idea of good breeding; and those persons who fancy themselves very
-important and never open their mouths quite mistake the matter.â€
-
-â€œDid Charlotte dine with you?â€
-
-â€œNo, she would go home. I fancy she was wanted about the mince-pies. For
-my part, Mr. Bingley, _I_ always keep servants that can do their own
-work; _my_ daughters are brought up differently. But everybody is to
-judge for themselves, and the Lucases are a very good sort of girls, I
-assure you. It is a pity they are not handsome! Not that _I_ think
-Charlotte so _very_ plain; but then she is our particular friend.â€
-
-â€œShe seems a very pleasant young woman,â€ said Bingley.
-
-â€œOh dear, yes; but you must own she is very plain. Lady Lucas herself
-has often said so, and envied me Janeâ€™s beauty. I do not like to boast
-of my own child; but to be sure, Jane--one does not often see anybody
-better looking. It is what everybody says. I do not trust my own
-partiality. When she was only fifteen there was a gentleman at my
-brother Gardinerâ€™s in town so much in love with her, that my
-sister-in-law was sure he would make her an offer before we came away.
-But, however, he did not. Perhaps he thought her too young. However, he
-wrote some verses on her, and very pretty they were.â€
-
-â€œAnd so ended his affection,â€ said Elizabeth, impatiently. â€œThere has
-been many a one, I fancy, overcome in the same way. I wonder who first
-discovered the efficacy of poetry in driving away love!â€
-
-â€œI have been used to consider poetry as the _food_ of love,â€ said Darcy.
-
-â€œOf a fine, stout, healthy love it may. Everything nourishes what is
-strong already. But if it be only a slight, thin sort of inclination, I
-am convinced that one good sonnet will starve it entirely away.â€
-
-Darcy only smiled; and the general pause which ensued made Elizabeth
-tremble lest her mother should be exposing herself again. She longed to
-speak, but could think of nothing to say; and after a short silence Mrs.
-Bennet began repeating her thanks to Mr. Bingley for his kindness to
-Jane, with an apology for troubling him also with Lizzy. Mr. Bingley was
-unaffectedly civil in his answer, and forced his younger sister to be
-civil also, and say what the occasion required. She performed her part,
-indeed, without much graciousness, but Mrs. Bennet was satisfied, and
-soon afterwards ordered her carriage. Upon this signal, the youngest of
-her daughters put herself forward. The two girls had been whispering to
-each other during the whole visit; and the result of it was, that the
-youngest should tax Mr. Bingley with having promised on his first coming
-into the country to give a ball at Netherfield.
-
-Lydia was a stout, well-grown girl of fifteen, with a fine complexion
-and good-humoured countenance; a favourite with her mother, whose
-affection had brought her into public at an early age. She had high
-animal spirits, and a sort of natural self-consequence, which the
-attentions of the officers, to whom her uncleâ€™s good dinners and her
-own easy manners recommended her, had increased into assurance. She was
-very equal, therefore, to address Mr. Bingley on the subject of the
-ball, and abruptly reminded him of his promise; adding, that it would be
-the most shameful thing in the world if he did not keep it. His answer
-to this sudden attack was delightful to her motherâ€™s ear.
-
-â€œI am perfectly ready, I assure you, to keep my engagement; and, when
-your sister is recovered, you shall, if you please, name the very day of
-the ball. But you would not wish to be dancing while she is ill?â€
-
-Lydia declared herself satisfied. â€œOh yes--it would be much better to
-wait till Jane was well; and by that time, most likely, Captain Carter
-would be at Meryton again. And when you have given _your_ ball,â€ she
-added, â€œI shall insist on their giving one also. I shall tell Colonel
-Forster it will be quite a shame if he does not.â€
-
-Mrs. Bennet and her daughters then departed, and Elizabeth returned
-instantly to Jane, leaving her own and her relationsâ€™ behaviour to the
-remarks of the two ladies and Mr. Darcy; the latter of whom, however,
-could not be prevailed on to join in their censure of _her_, in spite of
-all Miss Bingleyâ€™s witticisms on _fine eyes_.
-
-
-
-
-[Illustration]
-
-
-
-
-CHAPTER X.
-
-
-[Illustration]
-
-The day passed much as the day before had done. Mrs. Hurst and Miss
-Bingley had spent some hours of the morning with the invalid, who
-continued, though slowly, to mend; and, in the evening, Elizabeth joined
-their party in the drawing-room. The loo table, however, did not appear.
-Mr. Darcy was writing, and Miss Bingley, seated near him, was watching
-the progress of his letter, and repeatedly calling off his attention by
-messages to his sister. Mr. Hurst and Mr. Bingley were at piquet, and
-Mrs. Hurst was observing their game.
-
-Elizabeth took up some needlework, and was sufficiently amused in
-attending to what passed between Darcy and his companion. The perpetual
-commendations of the lady either on his hand-writing, or on the evenness
-of his lines, or on the length of his letter, with the perfect unconcern
-with which her praises were received, formed a curious dialogue, and was
-exactly in unison with her opinion of each.
-
-â€œHow delighted Miss Darcy will be to receive such a letter!â€
-
-He made no answer.
-
-â€œYou write uncommonly fast.â€
-
-â€œYou are mistaken. I write rather slowly.â€
-
-â€œHow many letters you must have occasion to write in the course of a
-year! Letters of business, too! How odious I should think them!â€
-
-â€œIt is fortunate, then, that they fall to my lot instead of to yours.â€
-
-â€œPray tell your sister that I long to see her.â€
-
-â€œI have already told her so once, by your desire.â€
-
-â€œI am afraid you do not like your pen. Let me mend it for you. I mend
-pens remarkably well.â€
-
-â€œThank you--but I always mend my own.â€
-
-â€œHow can you contrive to write so even?â€
-
-He was silent.
-
-â€œTell your sister I am delighted to hear of her improvement on the harp,
-and pray let her know that I am quite in raptures with her beautiful
-little design for a table, and I think it infinitely superior to Miss
-Grantleyâ€™s.â€
-
-â€œWill you give me leave to defer your raptures till I write again? At
-present I have not room to do them justice.â€
-
-â€œOh, it is of no consequence. I shall see her in January. But do you
-always write such charming long letters to her, Mr. Darcy?â€
-
-â€œThey are generally long; but whether always charming, it is not for me
-to determine.â€
-
-â€œIt is a rule with me, that a person who can write a long letter with
-ease cannot write ill.â€
-
-â€œThat will not do for a compliment to Darcy, Caroline,â€ cried her
-brother, â€œbecause he does _not_ write with ease. He studies too much
-for words of four syllables. Do not you, Darcy?â€
-
-â€œMy style of writing is very different from yours.â€
-
-â€œOh,â€ cried Miss Bingley, â€œCharles writes in the most careless way
-imaginable. He leaves out half his words, and blots the rest.â€
-
-â€œMy ideas flow so rapidly that I have not time to express them; by which
-means my letters sometimes convey no ideas at all to my correspondents.â€
-
-â€œYour humility, Mr. Bingley,â€ said Elizabeth, â€œmust disarm reproof.â€
-
-â€œNothing is more deceitful,â€ said Darcy, â€œthan the appearance of
-humility. It is often only carelessness of opinion, and sometimes an
-indirect boast.â€
-
-â€œAnd which of the two do you call _my_ little recent piece of modesty?â€
-
-â€œThe indirect boast; for you are really proud of your defects in
-writing, because you consider them as proceeding from a rapidity of
-thought and carelessness of execution, which, if not estimable, you
-think at least highly interesting. The power of doing anything with
-quickness is always much prized by the possessor, and often without any
-attention to the imperfection of the performance. When you told Mrs.
-Bennet this morning, that if you ever resolved on quitting Netherfield
-you should be gone in five minutes, you meant it to be a sort of
-panegyric, of compliment to yourself; and yet what is there so very
-laudable in a precipitance which must leave very necessary business
-undone, and can be of no real advantage to yourself or anyone else?â€
-
-
-             CHISWICK PRESS:--CHARLES WHITTINGHAM AND CO.
-                  TOOKS COURT, CHANCERY LANE, LONDON.
-
-
-*** END OF THE PROJECT GUTENBERG EBOOK 1342 ***
diff --git a/tests/dummy/data/transformers.py b/tests/dummy/data/transformers.py
deleted file mode 100644
index 7d8911bb..00000000
--- a/tests/dummy/data/transformers.py
+++ /dev/null
@@ -1,50 +0,0 @@
-from typing import Iterable
-
-from datasets import (  # type: ignore
-    Dataset,
-    DatasetDict,
-    IterableDataset,
-    IterableDatasetDict,
-)
-
-
-def create_sample_dataset(
-    column: str = "text", pattern: str = "sample text {}"
-) -> Dataset:
-    return Dataset.from_dict({column: [pattern.format(ind) for ind in range(1, 4)]})
-
-
-def create_sample_iterable_dataset(
-    column: str = "text", pattern: str = "sample text {}"
-) -> IterableDataset:
-    def _generator():
-        for ind in range(1, 4):
-            yield {column: pattern.format(ind)}
-
-    return IterableDataset.from_generator(_generator)
-
-
-def create_sample_dataset_dict(
-    splits: Iterable[str] = ("train", "test"),
-    column: str = "text",
-    pattern: str = "sample text {}",
-):
-    return DatasetDict(
-        {
-            split: create_sample_dataset(column=column, pattern=pattern)
-            for split in splits
-        }
-    )
-
-
-def create_sample_iterable_dataset_dict(
-    splits: Iterable[str] = ("train", "test"),
-    column: str = "text",
-    pattern: str = "sample text {}",
-):
-    return IterableDatasetDict(
-        {
-            split: create_sample_iterable_dataset(column=column, pattern=pattern)
-            for split in splits
-        }
-    )
diff --git a/tests/dummy/services/__init__.py b/tests/dummy/services/__init__.py
deleted file mode 100644
index 8c63c5c4..00000000
--- a/tests/dummy/services/__init__.py
+++ /dev/null
@@ -1,5 +0,0 @@
-from .requests import TestRequestGenerator
-
-__all__ = [
-    "TestRequestGenerator",
-]
diff --git a/tests/dummy/services/requests.py b/tests/dummy/services/requests.py
deleted file mode 100644
index e7e29402..00000000
--- a/tests/dummy/services/requests.py
+++ /dev/null
@@ -1,31 +0,0 @@
-from typing import Optional
-
-from guidellm.core import TextGenerationRequest
-from guidellm.request import GenerationMode, RequestGenerator
-
-
-class TestRequestGenerator(RequestGenerator):
-    """
-    This class represents the Testing Request Generator.
-    The purpose - to be used for testing.
-    """
-
-    def __init__(
-        self,
-        tokenizer: Optional[str] = None,
-        mode: GenerationMode = "async",
-        async_queue_size: int = 50,
-    ):
-        super().__init__(
-            type_="test",
-            source="test",
-            tokenizer=tokenizer,
-            mode=mode,
-            async_queue_size=async_queue_size,
-        )
-
-    def create_item(self) -> TextGenerationRequest:
-        return TextGenerationRequest(prompt="Test prompt")
-
-    def __len__(self) -> int:
-        raise NotImplementedError
diff --git a/tests/e2e/test_guidellm.py b/tests/e2e/test_guidellm.py
deleted file mode 100644
index 75ab2212..00000000
--- a/tests/e2e/test_guidellm.py
+++ /dev/null
@@ -1,8 +0,0 @@
-import pytest
-
-from guidellm.config import settings
-
-
-@pytest.mark.smoke()
-def test_import():
-    assert settings
diff --git a/tests/e2e/test_placeholder.py b/tests/e2e/test_placeholder.py
new file mode 100644
index 00000000..d028e3f9
--- /dev/null
+++ b/tests/e2e/test_placeholder.py
@@ -0,0 +1,6 @@
+import pytest
+
+
+@pytest.mark.smoke()
+def test_placeholder():
+    assert True
diff --git a/tests/integration/test_guidellm.py b/tests/integration/test_guidellm.py
deleted file mode 100644
index 75ab2212..00000000
--- a/tests/integration/test_guidellm.py
+++ /dev/null
@@ -1,8 +0,0 @@
-import pytest
-
-from guidellm.config import settings
-
-
-@pytest.mark.smoke()
-def test_import():
-    assert settings
diff --git a/tests/integration/test_placeholder.py b/tests/integration/test_placeholder.py
new file mode 100644
index 00000000..d028e3f9
--- /dev/null
+++ b/tests/integration/test_placeholder.py
@@ -0,0 +1,6 @@
+import pytest
+
+
+@pytest.mark.smoke()
+def test_placeholder():
+    assert True
diff --git a/tests/unit/backend/test_backend.py b/tests/unit/backend/test_backend.py
index 29a008e1..1c16d397 100644
--- a/tests/unit/backend/test_backend.py
+++ b/tests/unit/backend/test_backend.py
@@ -124,10 +124,13 @@ async def test_backend_chat_completions(mock_backend):
 
 
 @pytest.mark.smoke()
-def test_backend_models(mock_backend):
-    assert mock_backend.available_models() == ["mock-model"]
+@pytest.mark.asyncio()
+async def test_backend_models(mock_backend):
+    models = await mock_backend.available_models()
+    assert models == ["mock-model"]
 
 
 @pytest.mark.smoke()
-def test_backend_validate(mock_backend):
-    mock_backend.validate()
+@pytest.mark.asyncio()
+async def test_backend_validate(mock_backend):
+    await mock_backend.validate()
diff --git a/tests/unit/backend/test_openai_backend.py b/tests/unit/backend/test_openai_backend.py
index db03c259..0749e9db 100644
--- a/tests/unit/backend/test_openai_backend.py
+++ b/tests/unit/backend/test_openai_backend.py
@@ -42,24 +42,26 @@ def test_openai_http_backend_intialization():
 
 
 @pytest.mark.smoke()
-def test_openai_http_backend_available_models(httpx_openai_mock):
+@pytest.mark.asyncio()
+async def test_openai_http_backend_available_models(httpx_openai_mock):
     backend = OpenAIHTTPBackend(target="http://target.mock")
-    models = backend.available_models()
+    models = await backend.available_models()
     assert models == ["mock-model"]
 
 
 @pytest.mark.smoke()
-def test_openai_http_backend_validate(httpx_openai_mock):
+@pytest.mark.asyncio()
+async def test_openai_http_backend_validate(httpx_openai_mock):
     backend = OpenAIHTTPBackend(target="http://target.mock", model="mock-model")
-    backend.validate()
+    await backend.validate()
 
     backend = OpenAIHTTPBackend(target="http://target.mock")
-    backend.validate()
+    await backend.validate()
     assert backend.model == "mock-model"
 
     backend = OpenAIHTTPBackend(target="http://target.mock", model="invalid-model")
     with pytest.raises(ValueError):
-        backend.validate()
+        await backend.validate()
 
 
 @pytest.mark.smoke()
diff --git a/tests/unit/backend/test_response.py b/tests/unit/backend/test_response.py
index 8de78925..c4773083 100644
--- a/tests/unit/backend/test_response.py
+++ b/tests/unit/backend/test_response.py
@@ -20,6 +20,9 @@ def test_streaming_response_types():
 def test_streaming_text_response_default_initilization():
     response = StreamingTextResponse(
         type_="start",
+        value="",
+        start_time=0.0,
+        first_iter_time=None,
         iter_count=0,
         delta="",
         time=0.0,
@@ -31,13 +34,19 @@ def test_streaming_text_response_default_initilization():
 def test_streaming_text_response_initialization():
     response = StreamingTextResponse(
         type_="start",
-        iter_count=0,
+        value="Hello, world!",
+        start_time=0.0,
+        first_iter_time=0.0,
+        iter_count=1,
         delta="Hello, world!",
         time=1.0,
         request_id="123",
     )
     assert response.type_ == "start"
-    assert response.iter_count == 0
+    assert response.value == "Hello, world!"
+    assert response.start_time == 0.0
+    assert response.first_iter_time == 0.0
+    assert response.iter_count == 1
     assert response.delta == "Hello, world!"
     assert response.time == 1.0
     assert response.request_id == "123"
@@ -47,6 +56,9 @@ def test_streaming_text_response_initialization():
 def test_streaming_text_response_marshalling():
     response = StreamingTextResponse(
         type_="start",
+        value="Hello, world!",
+        start_time=0.0,
+        first_iter_time=0.0,
         iter_count=0,
         delta="Hello, world!",
         time=1.0,
@@ -117,7 +129,18 @@ def test_response_summary_default_initialization():
         ),
         start_time=0.0,
         end_time=0.0,
+        first_iter_time=None,
+        last_iter_time=None,
     )
+    assert summary.value == "Hello, world!"
+    assert summary.request_args.target == "http://example.com"
+    assert summary.request_args.headers == {}
+    assert summary.request_args.payload == {}
+    assert summary.start_time == 0.0
+    assert summary.end_time == 0.0
+    assert summary.first_iter_time is None
+    assert summary.last_iter_time is None
+    assert summary.iterations == 0
     assert summary.request_prompt_tokens is None
     assert summary.request_output_tokens is None
     assert summary.response_prompt_tokens is None
@@ -137,6 +160,8 @@ def test_response_summary_initialization():
         start_time=1.0,
         end_time=2.0,
         iterations=3,
+        first_iter_time=1.0,
+        last_iter_time=2.0,
         request_prompt_tokens=5,
         request_output_tokens=10,
         response_prompt_tokens=5,
@@ -150,6 +175,8 @@ def test_response_summary_initialization():
     assert summary.start_time == 1.0
     assert summary.end_time == 2.0
     assert summary.iterations == 3
+    assert summary.first_iter_time == 1.0
+    assert summary.last_iter_time == 2.0
     assert summary.request_prompt_tokens == 5
     assert summary.request_output_tokens == 10
     assert summary.response_prompt_tokens == 5
diff --git a/tests/unit/cli/__init__.py b/tests/unit/cli/__init__.py
deleted file mode 100644
index e69de29b..00000000
diff --git a/tests/unit/cli/test_custom_type_params.py b/tests/unit/cli/test_custom_type_params.py
deleted file mode 100644
index 1e66311d..00000000
--- a/tests/unit/cli/test_custom_type_params.py
+++ /dev/null
@@ -1,38 +0,0 @@
-import pytest
-from click import BadParameter
-
-from guidellm.utils import cli_params
-
-
-@pytest.fixture()
-def max_requests_param_type():
-    return cli_params.MaxRequestsType()
-
-
-def test_valid_integer_input(max_requests_param_type):
-    assert max_requests_param_type.convert(10, None, None) == 10
-    assert max_requests_param_type.convert("42", None, None) == 42
-
-
-def test_valid_dataset_input(max_requests_param_type):
-    assert max_requests_param_type.convert("dataset", None, None) == "dataset"
-
-
-def test_invalid_string_input(max_requests_param_type):
-    with pytest.raises(BadParameter):
-        max_requests_param_type.convert("invalid", None, None)
-
-
-def test_invalid_float_input(max_requests_param_type):
-    with pytest.raises(BadParameter):
-        max_requests_param_type.convert("10.5", None, None)
-
-
-def test_invalid_non_numeric_string_input(max_requests_param_type):
-    with pytest.raises(BadParameter):
-        max_requests_param_type.convert("abc", None, None)
-
-
-def test_invalid_mixed_string_input(max_requests_param_type):
-    with pytest.raises(BadParameter):
-        max_requests_param_type.convert("123abc", None, None)
diff --git a/tests/unit/conftest.py b/tests/unit/conftest.py
index 2a31df5d..41c0fbf5 100644
--- a/tests/unit/conftest.py
+++ b/tests/unit/conftest.py
@@ -1,11 +1,9 @@
 import json
-from pathlib import Path
 from typing import Any, AsyncIterable, Dict, List, Literal, Optional
 from unittest.mock import MagicMock, patch
 
 import httpx
 import pytest
-import requests_mock
 import respx
 
 from guidellm.backend import ResponseSummary, StreamingTextResponse
@@ -27,21 +25,6 @@ def _fake_tokenize(text: str) -> List[int]:
         yield mock_tokenizer
 
 
-@pytest.fixture()
-def mock_requests_pride_and_prejudice():
-    text_path = (
-        Path(__file__).parent.parent / "dummy" / "data" / "pride_and_prejudice.txt"
-    )
-    text_content = text_path.read_text()
-
-    with requests_mock.Mocker() as mock:
-        mock.get(
-            "https://www.gutenberg.org/files/1342/1342-0.txt",
-            text=text_content,
-        )
-        yield mock
-
-
 @pytest.fixture()
 def mock_backend(request):
     params = request.param if hasattr(request, "param") else {}
diff --git a/tests/unit/core/__init__.py b/tests/unit/core/__init__.py
deleted file mode 100644
index e69de29b..00000000
diff --git a/tests/unit/core/test_distribution.py b/tests/unit/core/test_distribution.py
deleted file mode 100644
index 95b7e923..00000000
--- a/tests/unit/core/test_distribution.py
+++ /dev/null
@@ -1,107 +0,0 @@
-import pytest
-
-from guidellm.core import Distribution
-
-
-@pytest.mark.smoke()
-def test_distribution_initialization():
-    data = [1, 2, 3, 4, 5]
-    dist = Distribution(data=data)
-    assert dist.data == data
-
-
-@pytest.mark.smoke()
-def test_distribution_statistics():
-    data = [1, 2, 3, 4, 5]
-    dist = Distribution(data=data)
-    assert dist.mean == 3.0
-    assert dist.median == 3.0
-    assert dist.variance == 2.0
-    assert dist.std_deviation == pytest.approx(1.414213, rel=1e-5)
-    assert dist.min == 1
-    assert dist.max == 5
-    assert dist.range == 4
-    assert dist.percentile(50) == 3.0
-    assert dist.percentiles([25, 50, 75]) == pytest.approx([2.0, 3.0, 4.0])
-
-
-@pytest.mark.smoke()
-def test_distribution_no_data():
-    dist = Distribution(data=[])
-    assert dist.mean == 0.0
-    assert dist.median == 0.0
-    assert dist.variance == 0.0
-    assert dist.std_deviation == 0.0
-    assert dist.min == 0.0
-    assert dist.max == 0.0
-    assert dist.range == 0.0
-    assert dist.percentile(50) == 0.0
-    assert dist.percentiles([25, 50, 75]) == [0.0, 0.0, 0.0]
-
-
-@pytest.mark.sanity()
-def test_distribution_add_data():
-    data = [1, 2, 3, 4, 5]
-    dist = Distribution(data=data)
-    new_data = [6, 7, 8]
-    dist.add_data(new_data)
-
-    assert dist.data == data + new_data
-
-
-@pytest.mark.sanity()
-def test_distribution_remove_data():
-    data = [1, 2, 3, 4, 5]
-    dist = Distribution(data=data)
-    remove_data = [2, 4]
-    dist.remove_data(remove_data)
-    assert dist.data == [1, 3, 5]
-
-
-@pytest.mark.regression()
-def test_distribution_str():
-    data = [1, 2, 3, 4, 5]
-    dist = Distribution(data=data)
-    assert "Distribution({" in str(dist)
-    assert "'mean': 3.0" in str(dist)
-    assert "'median': 3.0" in str(dist)
-    assert "'variance': 2.0" in str(dist)
-    assert "'percentile_indices': [10, 20, 30, 40, 50, 60, 70, 80, 90, 95, 99]" in str(
-        dist
-    )
-    assert (
-        "'percentile_values': [1.4, 1.8, 2.2, 2.6, 3.0, 3.4, 3.8, 4.2, 4.6, 4.8, 4.96]"
-        in str(dist)
-    )
-    assert "'min': 1" in str(dist)
-    assert "'max': 5" in str(dist)
-    assert "'range': 4" in str(dist)
-
-
-@pytest.mark.regression()
-def test_distribution_repr():
-    data = [1, 2, 3, 4, 5]
-    dist = Distribution(data=data)
-    assert repr(dist) == f"Distribution(data={dist.data})"
-
-
-@pytest.mark.regression()
-def test_distribution_json():
-    data = [1, 2, 3, 4, 5]
-    dist = Distribution(data=data)
-    json_str = dist.to_json()
-    assert f'"data":[{dist.data[0]}' in json_str
-
-    dist_restored = Distribution.from_json(json_str)
-    assert dist_restored.data == data
-
-
-@pytest.mark.regression()
-def test_distribution_yaml():
-    data = [1, 2, 3, 4, 5]
-    dist = Distribution(data=data)
-    yaml_str = dist.to_yaml()
-    assert f"data:\n- {dist.data[0]}" in yaml_str
-
-    dist_restored = Distribution.from_yaml(yaml_str)
-    assert dist_restored.data == data
diff --git a/tests/unit/core/test_report.py b/tests/unit/core/test_report.py
deleted file mode 100644
index c9e4ef3a..00000000
--- a/tests/unit/core/test_report.py
+++ /dev/null
@@ -1,106 +0,0 @@
-import tempfile
-from pathlib import Path
-
-import pytest
-
-from guidellm.core import (
-    GuidanceReport,
-    TextGenerationBenchmark,
-    TextGenerationBenchmarkReport,
-    TextGenerationRequest,
-    TextGenerationResult,
-)
-
-
-@pytest.fixture()
-def sample_benchmark_report() -> TextGenerationBenchmarkReport:
-    sample_request = TextGenerationRequest(prompt="sample prompt")
-    sample_result = TextGenerationResult(
-        request=sample_request,
-        prompt_token_count=2,
-        output="sample output",
-        output_token_count=2,
-        start_time=None,
-        end_time=None,
-        first_token_time=None,
-        last_token_time=None,
-    )
-    sample_benchmark = TextGenerationBenchmark(
-        mode="asynchronous",
-        rate=1.0,
-        results=[sample_result],
-        errors=[],
-        concurrencies=[],
-    )
-    return TextGenerationBenchmarkReport(
-        benchmarks=[sample_benchmark], args={"arg1": "value1"}
-    )
-
-
-def compare_guidance_reports(report1: GuidanceReport, report2: GuidanceReport) -> bool:
-    return report1.benchmarks == report2.benchmarks
-
-
-@pytest.mark.smoke()
-def test_guidance_report_initialization():
-    report = GuidanceReport()
-    assert report.benchmarks == []
-
-
-@pytest.mark.smoke()
-def test_guidance_report_initialization_with_params(sample_benchmark_report):
-    report = GuidanceReport(benchmarks=[sample_benchmark_report])
-    assert report.benchmarks == [sample_benchmark_report]
-
-
-@pytest.mark.sanity()
-def test_guidance_report_print(sample_benchmark_report):
-    report = GuidanceReport(benchmarks=[sample_benchmark_report])
-    report.print()  # This will output to the console
-
-
-@pytest.mark.sanity()
-def test_guidance_report_json(sample_benchmark_report):
-    report = GuidanceReport(benchmarks=[sample_benchmark_report])
-    json_str = report.to_json()
-    loaded_report = GuidanceReport.from_json(json_str)
-    assert compare_guidance_reports(report, loaded_report)
-
-
-@pytest.mark.sanity()
-def test_guidance_report_yaml(sample_benchmark_report):
-    report = GuidanceReport(benchmarks=[sample_benchmark_report])
-    yaml_str = report.to_yaml()
-    loaded_report = GuidanceReport.from_yaml(yaml_str)
-    assert compare_guidance_reports(report, loaded_report)
-
-
-@pytest.mark.sanity()
-def test_guidance_report_save_load_file(sample_benchmark_report):
-    report = GuidanceReport(benchmarks=[sample_benchmark_report])
-    with tempfile.TemporaryDirectory() as temp_dir:
-        file_path = Path(temp_dir) / "report.yaml"
-        report.save_file(file_path)
-        loaded_report = GuidanceReport.load_file(file_path)
-        assert compare_guidance_reports(report, loaded_report)
-
-
-@pytest.mark.regression()
-def test_empty_guidance_report():
-    report = GuidanceReport()
-    assert len(report.benchmarks) == 0
-    report.print()  # Ensure it doesn't raise error with no benchmarks
-
-
-@pytest.mark.regression()
-def test_compare_guidance_reports(sample_benchmark_report):
-    report1 = GuidanceReport(benchmarks=[sample_benchmark_report])
-    report2 = GuidanceReport(benchmarks=[sample_benchmark_report])
-    assert compare_guidance_reports(report1, report2)
-
-
-@pytest.mark.regression()
-def test_compare_guidance_reports_inequality(sample_benchmark_report):
-    report1 = GuidanceReport(benchmarks=[sample_benchmark_report])
-    report2 = GuidanceReport(benchmarks=[])
-    assert not compare_guidance_reports(report1, report2)
diff --git a/tests/unit/core/test_request.py b/tests/unit/core/test_request.py
deleted file mode 100644
index 8550eb28..00000000
--- a/tests/unit/core/test_request.py
+++ /dev/null
@@ -1,79 +0,0 @@
-import pytest
-
-from guidellm.core import TextGenerationRequest
-
-
-@pytest.mark.smoke()
-def test_text_generation_request_initialization():
-    prompt = "Generate a story"
-    request = TextGenerationRequest(prompt=prompt)
-    assert request.prompt == prompt
-    assert request.prompt_token_count is None
-    assert request.output_token_count is None
-    assert request.params == {}
-
-
-@pytest.mark.sanity()
-def test_text_generation_request_initialization_with_params():
-    prompt = "Generate a story"
-    prompt_token_count = 50
-    output_token_count = 100
-    params = {"temperature": 0.7}
-    request = TextGenerationRequest(
-        prompt=prompt,
-        prompt_token_count=prompt_token_count,
-        output_token_count=output_token_count,
-        params=params,
-    )
-    assert request.prompt == prompt
-    assert request.prompt_token_count == prompt_token_count
-    assert request.output_token_count == output_token_count
-    assert request.params == params
-
-
-@pytest.mark.regression()
-def test_request_json():
-    prompt = "Generate text"
-    prompt_token_count = 10
-    output_token_count = 50
-    params = {"temperature": 0.7}
-    request = TextGenerationRequest(
-        prompt=prompt,
-        prompt_token_count=prompt_token_count,
-        output_token_count=output_token_count,
-        params=params,
-    )
-    json_str = request.to_json()
-    assert '"prompt":"Generate text"' in json_str
-    assert '"id":' in json_str
-
-    request_restored = TextGenerationRequest.from_json(json_str)
-    assert request.id == request_restored.id
-    assert request_restored.prompt == prompt
-    assert request_restored.prompt_token_count == prompt_token_count
-    assert request_restored.output_token_count == output_token_count
-    assert request_restored.params == params
-
-
-@pytest.mark.regression()
-def test_request_yaml():
-    prompt = "Generate text"
-    prompt_token_count = 15
-    output_token_count = 55
-    params = {"temperature": 0.8}
-    request = TextGenerationRequest(
-        prompt=prompt,
-        prompt_token_count=prompt_token_count,
-        output_token_count=output_token_count,
-        params=params,
-    )
-    yaml_str = request.to_yaml()
-    assert "prompt: Generate text" in yaml_str
-    assert "id:" in yaml_str
-
-    request_restored = TextGenerationRequest.from_yaml(yaml_str)
-    assert request.id == request_restored.id
-    assert request_restored.prompt == prompt
-    assert request_restored.prompt_token_count == prompt_token_count
-    assert request_restored.output_token_count == output_token_count
-    assert request_restored.params == params
diff --git a/tests/unit/core/test_result.py b/tests/unit/core/test_result.py
deleted file mode 100644
index ddd62d7f..00000000
--- a/tests/unit/core/test_result.py
+++ /dev/null
@@ -1,279 +0,0 @@
-import time
-
-import pytest
-
-from guidellm.core import (
-    RequestConcurrencyMeasurement,
-    TextGenerationBenchmark,
-    TextGenerationBenchmarkReport,
-    TextGenerationError,
-    TextGenerationRequest,
-    TextGenerationResult,
-)
-
-
-def create_sample_request():
-    return TextGenerationRequest(prompt="Hello, world!")
-
-
-def create_sample_result():
-    start_time = time.time()
-
-    return TextGenerationResult(
-        request=create_sample_request(),
-        prompt_token_count=4,
-        output="Generated text",
-        output_token_count=3,
-        start_time=start_time,
-        end_time=start_time + 1.5,
-        first_token_time=start_time + 0.5,
-        last_token_time=start_time + 1.4,
-    )
-
-
-@pytest.mark.smoke()
-def test_text_generation_result_default_initialization():
-    result = TextGenerationResult(request=create_sample_request())
-    assert result.request.prompt == "Hello, world!"
-    assert result.prompt_token_count is None
-    assert result.output == ""
-    assert result.output_token_count is None
-    assert result.start_time is None
-    assert result.end_time is None
-    assert result.first_token_time is None
-    assert result.last_token_time is None
-
-
-@pytest.mark.smoke()
-def test_text_generation_result_initialization():
-    result = create_sample_result()
-    assert result.request.prompt == "Hello, world!"
-    assert result.prompt_token_count == 4
-    assert result.output == "Generated text"
-    assert result.output_token_count == 3
-    assert result.start_time >= 0.0
-    assert result.end_time == result.start_time + 1.5
-    assert result.first_token_time == result.start_time + 0.5
-    assert result.last_token_time == result.start_time + 1.4
-
-    # computed fields
-    assert result.request_latency == 1.5
-    assert result.time_to_first_token == 0.5 * 1000
-    assert result.inter_token_latency == pytest.approx((1.4 - 0.5) * 1000 / 2)
-    assert result.output_tokens_per_second == pytest.approx(2 / (1.4 - 0.5))
-
-
-@pytest.mark.smoke()
-def test_text_generation_result_marshalling():
-    result = create_sample_result()
-    serialized = result.model_dump()
-    deserialized = TextGenerationResult.model_validate(serialized)
-
-    for key, value in vars(result).items():
-        assert getattr(deserialized, key) == value
-
-
-@pytest.mark.smoke()
-def test_text_generation_error_initialization():
-    error = TextGenerationError(
-        request=create_sample_request(), message="Error message"
-    )
-    assert error.request.prompt == "Hello, world!"
-    assert error.message == "Error message"
-
-
-@pytest.mark.smoke()
-def test_text_generation_error_marshalling():
-    error = TextGenerationError(
-        request=create_sample_request(), message="Error message"
-    )
-    serialized = error.model_dump()
-    deserialized = TextGenerationError.model_validate(serialized)
-
-    for key, value in vars(error).items():
-        assert getattr(deserialized, key) == value
-
-
-@pytest.mark.smoke()
-def test_request_concurrency_measurement_initialization():
-    start_time = time.time()
-    measurement = RequestConcurrencyMeasurement(
-        time=start_time,
-        completed=8,
-        errored=2,
-        processing=3,
-    )
-    assert measurement.time == start_time
-    assert measurement.completed == 8
-    assert measurement.errored == 2
-    assert measurement.processing == 3
-
-
-@pytest.mark.smoke()
-def test_request_concurrency_measurement_marshalling():
-    start_time = time.time()
-    measurement = RequestConcurrencyMeasurement(
-        time=start_time,
-        completed=8,
-        errored=2,
-        processing=3,
-    )
-    serialized = measurement.model_dump()
-    deserialized = RequestConcurrencyMeasurement.model_validate(serialized)
-
-    for key, value in vars(measurement).items():
-        assert getattr(deserialized, key) == value
-
-
-@pytest.mark.smoke()
-def test_text_generation_benchmark_default_initialization():
-    benchmark = TextGenerationBenchmark(mode="asynchronous")
-    assert benchmark.mode == "asynchronous"
-    assert benchmark.rate is None
-    assert benchmark.results == []
-    assert benchmark.errors == []
-    assert benchmark.concurrencies == []
-
-    # computed
-    assert benchmark.request_count == 0
-    assert benchmark.error_count == 0
-    assert benchmark.total_count == 0
-    assert benchmark.start_time is None
-    assert benchmark.end_time is None
-    assert benchmark.duration == 0.0
-    assert benchmark.completed_request_rate == 0.0
-    assert benchmark.request_latency_distribution is not None
-    assert benchmark.request_latency == 0.0
-    assert benchmark.request_latency_percentiles == {}
-    assert benchmark.ttft_distribution is not None
-    assert benchmark.time_to_first_token == 0.0
-    assert benchmark.time_to_first_token_percentiles == {}
-    assert benchmark.itl_distribution is not None
-    assert benchmark.inter_token_latency == 0.0
-    assert benchmark.inter_token_latency_percentiles == {}
-    assert benchmark.output_token_throughput == 0.0
-    assert benchmark.prompt_token_distribution is not None
-    assert benchmark.prompt_token == 0.0
-    assert benchmark.prompt_token_percentiles == {}
-    assert benchmark.output_token_distribution is not None
-    assert benchmark.output_token == 0.0
-    assert benchmark.output_token_percentiles == {}
-
-
-@pytest.mark.smoke()
-def test_text_generation_benchmark_initialization():
-    benchmark = TextGenerationBenchmark(mode="asynchronous", rate=10)
-    assert benchmark.mode == "asynchronous"
-    assert benchmark.rate == 10
-
-    for _ in range(5):
-        benchmark.request_started()
-        benchmark.request_completed(create_sample_result())
-        time.sleep(1.5)
-
-    for _ in range(2):
-        benchmark.request_started()
-        benchmark.request_completed(
-            TextGenerationError(
-                request=create_sample_request(), message="Error message"
-            )
-        )
-
-    def _test_percentiles(percentiles, value=None):
-        assert len(percentiles) == 7
-        assert list(percentiles.keys()) == ["1", "5", "10", "50", "90", "95", "99"]
-
-        if value is None:
-            assert all(per >= 0.0 for per in percentiles.values())
-        else:
-            assert all(per == pytest.approx(value) for per in percentiles.values())
-
-    assert len(benchmark.results) == 5
-    assert len(benchmark.errors) == 2
-    assert len(benchmark.concurrencies) == 14
-    assert benchmark.request_count == 5
-    assert benchmark.error_count == 2
-    assert benchmark.total_count == 7
-    assert benchmark.start_time == pytest.approx(time.time() - 1.5 * 5, abs=0.01)
-    assert benchmark.end_time == pytest.approx(time.time(), abs=0.01)
-    assert benchmark.duration == benchmark.end_time - benchmark.start_time  # type: ignore
-    assert benchmark.completed_request_rate == pytest.approx(5 / benchmark.duration)
-    assert benchmark.request_latency_distribution is not None
-    assert benchmark.request_latency == pytest.approx(1.5)
-    _test_percentiles(benchmark.request_latency_percentiles, 1.5)
-    assert benchmark.ttft_distribution is not None
-    assert benchmark.time_to_first_token == pytest.approx(500)
-    _test_percentiles(benchmark.time_to_first_token_percentiles, 500)
-    assert benchmark.itl_distribution is not None
-    assert benchmark.inter_token_latency == pytest.approx(450)
-    _test_percentiles(benchmark.inter_token_latency_percentiles, 450)
-    assert benchmark.output_token_throughput == pytest.approx(3.0 / 1.5, abs=0.01)
-    assert benchmark.prompt_token_distribution is not None
-    assert benchmark.prompt_token == pytest.approx(4.0)
-    _test_percentiles(benchmark.prompt_token_percentiles, 4.0)
-    assert benchmark.output_token_distribution is not None
-    assert benchmark.output_token == pytest.approx(3.0)
-    _test_percentiles(benchmark.output_token_percentiles, 3.0)
-
-
-@pytest.mark.smoke()
-def test_text_generation_benchmark_marshalling():
-    benchmark = TextGenerationBenchmark(mode="asynchronous", rate=10)
-    for _ in range(5):
-        benchmark.request_started()
-        benchmark.request_completed(create_sample_result())
-
-    for _ in range(2):
-        benchmark.request_started()
-        benchmark.request_completed(
-            TextGenerationError(
-                request=create_sample_request(), message="Error message"
-            )
-        )
-
-    serialized = benchmark.model_dump()
-    deserialized = TextGenerationBenchmark.model_validate(serialized)
-
-    for key, value in vars(benchmark).items():
-        assert getattr(deserialized, key) == value
-
-
-@pytest.mark.smoke()
-def test_text_generation_benchmark_report_initialization():
-    report = TextGenerationBenchmarkReport(
-        benchmarks=[
-            TextGenerationBenchmark(mode="asynchronous", rate=10),
-            TextGenerationBenchmark(mode="asynchronous", rate=20),
-        ],
-        args={
-            "backend_type": "http",
-            "target": "http://example.com",
-            "model": "test-model",
-        },
-    )
-    assert len(report.benchmarks) == 2
-    assert report.args == {
-        "backend_type": "http",
-        "target": "http://example.com",
-        "model": "test-model",
-    }
-
-
-@pytest.mark.smoke()
-def test_text_generation_benchmark_report_marshalling():
-    report = TextGenerationBenchmarkReport(
-        benchmarks=[
-            TextGenerationBenchmark(mode="asynchronous", rate=10),
-            TextGenerationBenchmark(mode="asynchronous", rate=20),
-        ],
-        args={
-            "backend_type": "http",
-            "target": "http://example.com",
-            "model": "test-model",
-        },
-    )
-    serialized = report.model_dump()
-    deserialized = TextGenerationBenchmarkReport.model_validate(serialized)
-
-    for key, value in vars(report).items():
-        assert getattr(deserialized, key) == value
diff --git a/tests/unit/core/test_serializable.py b/tests/unit/core/test_serializable.py
deleted file mode 100644
index ce0cec8a..00000000
--- a/tests/unit/core/test_serializable.py
+++ /dev/null
@@ -1,151 +0,0 @@
-import tempfile
-from pathlib import Path
-
-import pytest
-
-from guidellm.core.serializable import Serializable
-
-
-class ExampleModel(Serializable):
-    name: str
-    age: int
-
-
-@pytest.mark.smoke()
-def test_serializable_json():
-    # to json
-    example = ExampleModel(name="John Doe", age=30)
-    json_str = example.to_json()
-    assert '"name":"John Doe"' in json_str
-    assert '"age":30' in json_str
-
-    # from json
-    example = ExampleModel.from_json(json_str)
-    assert example.name == "John Doe"
-    assert example.age == 30
-
-
-@pytest.mark.smoke()
-def test_serializable_yaml():
-    # to yaml
-    example = ExampleModel(name="John Doe", age=30)
-    yaml_str = example.to_yaml()
-    assert "name: John Doe" in yaml_str
-    assert "age: 30" in yaml_str
-
-    # from yaml
-    example = ExampleModel.from_yaml(yaml_str)
-    assert example.name == "John Doe"
-    assert example.age == 30
-
-
-@pytest.mark.smoke()
-def test_serializable_file_json():
-    example = ExampleModel(name="John Doe", age=30)
-    with tempfile.TemporaryDirectory() as temp_dir:
-        file_path = Path(temp_dir) / "example.json"
-        saved_path = example.save_file(file_path, "json")
-        assert Path(saved_path).exists()
-        loaded_example = ExampleModel.load_file(saved_path)
-        assert loaded_example.name == "John Doe"
-        assert loaded_example.age == 30
-
-
-@pytest.mark.smoke()
-def test_serializable_file_yaml():
-    example = ExampleModel(name="John Doe", age=30)
-    with tempfile.TemporaryDirectory() as temp_dir:
-        file_path = Path(temp_dir) / "example.yaml"
-        saved_path = example.save_file(file_path, "yaml")
-        assert Path(saved_path).exists()
-        loaded_example = ExampleModel.load_file(saved_path)
-        assert loaded_example.name == "John Doe"
-        assert loaded_example.age == 30
-
-
-@pytest.mark.smoke()
-def test_serializable_file_without_extension():
-    example = ExampleModel(name="John Doe", age=30)
-    with tempfile.TemporaryDirectory() as temp_dir:
-        saved_path = example.save_file(temp_dir)
-        assert Path(saved_path).exists()
-        assert saved_path.endswith(".yaml")
-        loaded_example = ExampleModel.load_file(saved_path)
-        assert loaded_example.name == "John Doe"
-        assert loaded_example.age == 30
-
-
-@pytest.mark.sanity()
-def test_serializable_file_with_directory_json():
-    example = ExampleModel(name="John Doe", age=30)
-    with tempfile.TemporaryDirectory() as temp_dir:
-        saved_path = example.save_file(temp_dir, "json")
-        assert Path(saved_path).exists()
-        assert saved_path.endswith(".json")
-        loaded_example = ExampleModel.load_file(saved_path)
-        assert loaded_example.name == "John Doe"
-        assert loaded_example.age == 30
-
-
-@pytest.mark.sanity()
-def test_serializable_file_with_directory_yaml():
-    example = ExampleModel(name="John Doe", age=30)
-    with tempfile.TemporaryDirectory() as temp_dir:
-        saved_path = example.save_file(temp_dir, "yaml")
-        assert Path(saved_path).exists()
-        assert saved_path.endswith(".yaml")
-        loaded_example = ExampleModel.load_file(saved_path)
-        assert loaded_example.name == "John Doe"
-        assert loaded_example.age == 30
-
-
-@pytest.mark.sanity()
-def test_serializable_file_infer_extension():
-    example = ExampleModel(name="John Doe", age=30)
-    with tempfile.TemporaryDirectory() as temp_dir:
-        inferred_path = example.save_file(temp_dir, "json")
-        assert Path(inferred_path).exists()
-        assert inferred_path.endswith(".json")
-        loaded_example = ExampleModel.load_file(inferred_path)
-        assert loaded_example.name == "John Doe"
-        assert loaded_example.age == 30
-
-
-@pytest.mark.regression()
-def test_serializable_file_invalid_extension():
-    # to file
-    example = ExampleModel(name="John Doe", age=30)
-    with tempfile.TemporaryDirectory() as temp_dir:
-        invalid_file_path = Path(temp_dir) / "example.txt"
-        with pytest.raises(ValueError, match="Unsupported file extension.*"):
-            example.save_file(invalid_file_path)
-
-    # to directory
-    with tempfile.TemporaryDirectory() as temp_dir:
-        invalid_file_path = Path(temp_dir)
-        with pytest.raises(ValueError, match="Unsupported file extension.*"):
-            example.save_file(invalid_file_path, type_="txt")  # type: ignore
-
-    # from file
-    with tempfile.TemporaryDirectory() as temp_dir:
-        invalid_file_path = Path(temp_dir) / "example.txt"
-        with invalid_file_path.open("w") as file:
-            file.write("invalid content")
-        with pytest.raises(ValueError, match="Unsupported file extension.*"):
-            ExampleModel.load_file(invalid_file_path)
-
-
-@pytest.mark.regression()
-def test_serializable_load_missing_path():
-    with tempfile.TemporaryDirectory() as temp_dir:
-        invalid_file_path = Path(temp_dir) / "example.yaml"
-        with pytest.raises(FileNotFoundError):
-            ExampleModel.load_file(invalid_file_path)
-
-
-@pytest.mark.regression()
-def test_serializable_load_non_file_path():
-    with tempfile.TemporaryDirectory() as temp_dir:
-        invalid_file_path = Path(temp_dir)
-        with pytest.raises(ValueError, match="Path is not a file.*"):
-            ExampleModel.load_file(invalid_file_path)
diff --git a/tests/unit/executor/__init__.py b/tests/unit/executor/__init__.py
deleted file mode 100644
index e69de29b..00000000
diff --git a/tests/unit/executor/test_executor.py b/tests/unit/executor/test_executor.py
deleted file mode 100644
index 58c0a9d4..00000000
--- a/tests/unit/executor/test_executor.py
+++ /dev/null
@@ -1,542 +0,0 @@
-from typing import List, Optional, Union
-from unittest.mock import create_autospec, patch
-
-import pytest
-
-from guidellm.backend import Backend
-from guidellm.config import settings
-from guidellm.core import (
-    TextGenerationBenchmarkReport,
-)
-from guidellm.executor import (
-    Executor,
-    ExecutorResult,
-    Profile,
-    ProfileGenerationMode,
-    ProfileGenerator,
-)
-from guidellm.request import RequestGenerator
-from guidellm.scheduler import Scheduler, SchedulerResult
-
-
-@pytest.fixture()
-def mock_scheduler():
-    with patch("guidellm.executor.executor.Scheduler") as mock_scheduler:
-
-        def scheduler_constructor(*args, **kwargs):
-            mock_instance = create_autospec(Scheduler, instance=True)
-            mock_instance.args = args
-            mock_instance.kwargs = kwargs
-            num_requests = kwargs.get("max_number", 10)
-
-            async def run():
-                benchmark = create_autospec(
-                    TextGenerationBenchmarkReport, instance=True
-                )
-                benchmark.completed_request_rate = kwargs.get("rate", None)
-                yield SchedulerResult(
-                    completed=False,
-                    count_total=10,
-                    count_completed=0,
-                    benchmark=benchmark,
-                    current_result=None,
-                )
-
-                for index in range(num_requests):
-                    yield SchedulerResult(
-                        completed=False,
-                        count_total=10,
-                        count_completed=index + 1,
-                        benchmark=benchmark,
-                        current_result=create_autospec(
-                            TextGenerationBenchmarkReport, instance=True
-                        ),
-                    )
-
-                yield SchedulerResult(
-                    completed=True,
-                    count_total=num_requests,
-                    count_completed=num_requests,
-                    benchmark=benchmark,
-                    current_result=None,
-                )
-
-            mock_instance.run.side_effect = run
-
-            return mock_instance
-
-        mock_scheduler.side_effect = scheduler_constructor
-        yield mock_scheduler
-
-
-@pytest.mark.smoke()
-def test_executor_result_instantiation():
-    report = create_autospec(TextGenerationBenchmarkReport, instance=True)
-    scheduler_result = create_autospec(SchedulerResult, instance=True)
-    executor_result = ExecutorResult(
-        completed=True,
-        count_total=10,
-        count_completed=5,
-        generation_modes=["synchronous", "throughput", "constant"],
-        report=report,
-        scheduler_result=scheduler_result,
-    )
-
-    assert executor_result.completed is True
-    assert executor_result.count_total == 10
-    assert executor_result.count_completed == 5
-    assert executor_result.report == report
-    assert executor_result.scheduler_result == scheduler_result
-
-
-@pytest.mark.smoke()
-@pytest.mark.parametrize(
-    ("mode", "rate"),
-    [
-        ("sweep", None),
-        ("synchronous", None),
-        ("throughput", None),
-        ("constant", 10),
-        ("constant", [10, 20, 30]),
-        ("poisson", 10),
-        ("poisson", [10, 20, 30]),
-    ],
-)
-def test_executor_instantiation(mode, rate):
-    backend = create_autospec(Backend, instance=True)
-    request_generator = create_autospec(RequestGenerator, instance=True)
-    executor = Executor(
-        backend=backend,
-        request_generator=request_generator,
-        mode=mode,
-        rate=rate,
-        max_number=100,
-        max_duration=60.0,
-    )
-
-    assert executor.backend == backend
-    assert executor.request_generator == request_generator
-    assert executor.profile_generator is not None
-    assert isinstance(executor.profile_generator, ProfileGenerator)
-    assert executor.profile_generator.mode == mode
-    assert (
-        executor.profile_generator.rates == rate
-        if not rate or isinstance(rate, list)
-        else [rate]
-    )
-    assert executor.max_number == 100
-    assert executor.max_duration == 60.0
-
-
-def _check_executor_result_base(
-    result: ExecutorResult,
-    expected_completed: bool,
-    expected_count_total: int,
-    expected_count_completed: int,
-    expected_generation_modes: List[ProfileGenerationMode],
-):
-    assert result.completed == expected_completed
-    assert result.count_total == expected_count_total
-    assert result.count_completed == expected_count_completed
-    assert result.generation_modes == expected_generation_modes
-
-
-def _check_executor_result_report(
-    result: ExecutorResult,
-    mode: ProfileGenerationMode,
-    rate: Optional[Union[float, List[float]]],
-    max_number: Optional[int],
-    max_duration: Optional[float],
-    benchmarks_count: int,
-):
-    assert result.report is not None
-    assert isinstance(result.report, TextGenerationBenchmarkReport)
-
-    # check args
-    for expected in (
-        "backend_type",
-        "target",
-        "model",
-        "data_type",
-        "data",
-        "tokenizer",
-        "mode",
-        "rate",
-        "max_number",
-        "max_duration",
-    ):
-        assert expected in result.report.args
-
-    assert result.report.args["mode"] == mode
-    assert (
-        result.report.args["rate"] == rate
-        if rate is None or not isinstance(rate, (float, int))
-        else [rate]
-    )
-    assert result.report.args["max_number"] == max_number
-    assert result.report.args["max_duration"] == max_duration
-
-    # check benchmarks
-    assert len(result.report.benchmarks) == benchmarks_count
-    for benchmark in result.report.benchmarks:
-        assert isinstance(benchmark, TextGenerationBenchmarkReport)
-
-
-def _check_executor_result_scheduler(
-    result: ExecutorResult,
-    expected_scheduler_result: bool,
-    expected_generation_modes: List[ProfileGenerationMode],
-    expected_index: Optional[int],
-    expected_profile_mode: Optional[ProfileGenerationMode],
-    expected_profile_rate: Optional[float],
-):
-    if not expected_scheduler_result:
-        assert result.scheduler_result is None
-        assert result.current_index is None
-        assert result.current_profile is None
-
-        return
-
-    assert result.scheduler_result is not None
-    assert isinstance(result.scheduler_result, SchedulerResult)
-    assert result.current_index == expected_index
-    assert result.current_profile is not None
-    assert isinstance(result.current_profile, Profile)
-    assert result.current_profile.load_gen_mode == expected_profile_mode
-    assert result.current_profile.load_gen_rate == expected_profile_rate
-    assert (
-        result.current_profile.load_gen_mode
-        == expected_generation_modes[expected_index]  # type: ignore
-    )
-
-
-@pytest.mark.smoke()
-@pytest.mark.asyncio()
-async def test_executor_run_sweep(mock_scheduler):
-    num_requests = 15
-
-    backend = create_autospec(Backend, instance=True)
-    request_generator = create_autospec(RequestGenerator, instance=True)
-    executor = Executor(
-        backend=backend,
-        request_generator=request_generator,
-        mode="sweep",
-        rate=None,
-        max_number=num_requests,
-    )
-
-    num_profiles = 2 + settings.num_sweep_profiles
-    generation_modes = ["synchronous", "throughput"] + [
-        "constant"
-    ] * settings.num_sweep_profiles
-    generation_rates = [None, None] + list(range(2, settings.num_sweep_profiles + 2))
-    output_rates = [1, settings.num_sweep_profiles + 1] + list(
-        range(2, settings.num_sweep_profiles + 2)
-    )
-
-    iterator = executor.run()
-
-    # Check start result
-    result = await iterator.__anext__()
-    _check_executor_result_base(
-        result=result,
-        expected_completed=False,
-        expected_count_total=num_profiles,
-        expected_count_completed=0,
-        expected_generation_modes=generation_modes,  # type: ignore
-    )
-    _check_executor_result_report(
-        result=result,
-        mode="sweep",
-        rate=None,
-        max_number=num_requests,
-        max_duration=None,
-        benchmarks_count=0,
-    )
-    _check_executor_result_scheduler(
-        result=result,
-        expected_scheduler_result=False,
-        expected_generation_modes=generation_modes,  # type: ignore
-        expected_index=None,
-        expected_profile_mode=None,
-        expected_profile_rate=None,
-    )
-
-    for scheduler_index in range(num_profiles):
-        for request_index in range(num_requests + 2):
-            result = await iterator.__anext__()
-            _check_executor_result_base(
-                result=result,
-                expected_completed=False,
-                expected_count_total=num_profiles,
-                expected_count_completed=scheduler_index
-                if request_index < num_requests + 1
-                else scheduler_index + 1,
-                expected_generation_modes=generation_modes,  # type: ignore
-            )
-            _check_executor_result_report(
-                result=result,
-                mode="sweep",
-                rate=None,
-                max_number=num_requests,
-                max_duration=None,
-                benchmarks_count=scheduler_index
-                if request_index < num_requests + 1
-                else scheduler_index + 1,
-            )
-            _check_executor_result_scheduler(
-                result=result,
-                expected_scheduler_result=True,
-                expected_generation_modes=generation_modes,  # type: ignore
-                expected_index=scheduler_index,
-                expected_profile_mode=generation_modes[scheduler_index],  # type: ignore
-                expected_profile_rate=generation_rates[scheduler_index],
-            )
-        # set the rate for the benchmark for sweep profile generation
-        result.report.benchmarks[-1].completed_request_rate = output_rates[  # type: ignore
-            scheduler_index
-        ]
-        result.report.benchmarks[-1].request_count = num_requests  # type: ignore
-
-    # Check end result
-    result = await iterator.__anext__()
-    _check_executor_result_base(
-        result=result,
-        expected_completed=True,
-        expected_count_total=num_profiles,
-        expected_count_completed=num_profiles,
-        expected_generation_modes=generation_modes,  # type: ignore
-    )
-    _check_executor_result_report(
-        result=result,
-        mode="sweep",
-        rate=None,
-        max_number=num_requests,
-        max_duration=None,
-        benchmarks_count=num_profiles,
-    )
-    _check_executor_result_scheduler(
-        result=result,
-        expected_scheduler_result=False,
-        expected_generation_modes=generation_modes,  # type: ignore
-        expected_index=None,
-        expected_profile_mode=None,
-        expected_profile_rate=None,
-    )
-
-
-@pytest.mark.smoke()
-@pytest.mark.asyncio()
-@pytest.mark.parametrize(
-    "mode",
-    [
-        "synchronous",
-        "throughput",
-    ],
-)
-async def test_executor_run_non_rate_modes(mock_scheduler, mode):
-    num_requests = 15
-
-    backend = create_autospec(Backend, instance=True)
-    request_generator = create_autospec(RequestGenerator, instance=True)
-    executor = Executor(
-        backend=backend,
-        request_generator=request_generator,
-        mode=mode,
-        rate=None,
-        max_number=num_requests,
-    )
-
-    iterator = executor.run()
-
-    # Check start result
-    result = await iterator.__anext__()
-    _check_executor_result_base(
-        result=result,
-        expected_completed=False,
-        expected_count_total=1,
-        expected_count_completed=0,
-        expected_generation_modes=[mode],
-    )
-    _check_executor_result_report(
-        result=result,
-        mode=mode,
-        rate=None,
-        max_number=num_requests,
-        max_duration=None,
-        benchmarks_count=0,
-    )
-    _check_executor_result_scheduler(
-        result=result,
-        expected_scheduler_result=False,
-        expected_generation_modes=[mode],
-        expected_index=None,
-        expected_profile_mode=None,
-        expected_profile_rate=None,
-    )
-
-    for request_index in range(num_requests + 2):
-        result = await iterator.__anext__()
-        _check_executor_result_base(
-            result=result,
-            expected_completed=False,
-            expected_count_total=1,
-            expected_count_completed=0 if request_index < num_requests + 1 else 1,
-            expected_generation_modes=[mode],
-        )
-        _check_executor_result_report(
-            result=result,
-            mode=mode,
-            rate=None,
-            max_number=num_requests,
-            max_duration=None,
-            benchmarks_count=0 if request_index < num_requests + 1 else 1,
-        )
-        _check_executor_result_scheduler(
-            result=result,
-            expected_scheduler_result=True,
-            expected_generation_modes=[mode],
-            expected_index=0,
-            expected_profile_mode=mode,
-            expected_profile_rate=None,
-        )
-
-    # Check end result
-    result = await iterator.__anext__()
-    _check_executor_result_base(
-        result=result,
-        expected_completed=True,
-        expected_count_total=1,
-        expected_count_completed=1,
-        expected_generation_modes=[mode],
-    )
-    _check_executor_result_report(
-        result=result,
-        mode=mode,
-        rate=None,
-        max_number=num_requests,
-        max_duration=None,
-        benchmarks_count=1,
-    )
-    _check_executor_result_scheduler(
-        result=result,
-        expected_scheduler_result=False,
-        expected_generation_modes=[mode],
-        expected_index=None,
-        expected_profile_mode=None,
-        expected_profile_rate=None,
-    )
-
-
-@pytest.mark.smoke()
-@pytest.mark.asyncio()
-@pytest.mark.parametrize(
-    ("mode", "rate"),
-    [
-        ("constant", 10),
-        ("constant", [10, 20, 30]),
-        ("poisson", 10),
-        ("poisson", [10, 20, 30]),
-    ],
-)
-async def test_executor_run_rate_modes(mock_scheduler, mode, rate):
-    num_requests = 15
-
-    backend = create_autospec(Backend, instance=True)
-    request_generator = create_autospec(RequestGenerator, instance=True)
-    executor = Executor(
-        backend=backend,
-        request_generator=request_generator,
-        mode=mode,
-        rate=rate,
-        max_number=num_requests,
-    )
-
-    num_profiles = len(rate) if isinstance(rate, list) else 1
-    generation_modes = [mode] * num_profiles
-    generation_rates = rate if isinstance(rate, list) else [rate]
-
-    iterator = executor.run()
-
-    # Check start result
-    result = await iterator.__anext__()
-    _check_executor_result_base(
-        result=result,
-        expected_completed=False,
-        expected_count_total=num_profiles,
-        expected_count_completed=0,
-        expected_generation_modes=generation_modes,
-    )
-    _check_executor_result_report(
-        result=result,
-        mode=mode,
-        rate=rate,
-        max_number=num_requests,
-        max_duration=None,
-        benchmarks_count=0,
-    )
-    _check_executor_result_scheduler(
-        result=result,
-        expected_scheduler_result=False,
-        expected_generation_modes=generation_modes,
-        expected_index=None,
-        expected_profile_mode=None,
-        expected_profile_rate=None,
-    )
-
-    for scheduler_index in range(num_profiles):
-        for request_index in range(num_requests + 2):
-            result = await iterator.__anext__()
-            _check_executor_result_base(
-                result=result,
-                expected_completed=False,
-                expected_count_total=num_profiles,
-                expected_count_completed=scheduler_index
-                if request_index < num_requests + 1
-                else scheduler_index + 1,
-                expected_generation_modes=generation_modes,
-            )
-            _check_executor_result_report(
-                result=result,
-                mode=mode,
-                rate=rate,
-                max_number=num_requests,
-                max_duration=None,
-                benchmarks_count=scheduler_index
-                if request_index < num_requests + 1
-                else scheduler_index + 1,
-            )
-            _check_executor_result_scheduler(
-                result=result,
-                expected_scheduler_result=True,
-                expected_generation_modes=generation_modes,
-                expected_index=scheduler_index,
-                expected_profile_mode=generation_modes[scheduler_index],
-                expected_profile_rate=generation_rates[scheduler_index],
-            )
-
-    # Check end result
-    result = await iterator.__anext__()
-    _check_executor_result_base(
-        result=result,
-        expected_completed=True,
-        expected_count_total=num_profiles,
-        expected_count_completed=num_profiles,
-        expected_generation_modes=generation_modes,
-    )
-    _check_executor_result_report(
-        result=result,
-        mode=mode,
-        rate=rate,
-        max_number=num_requests,
-        max_duration=None,
-        benchmarks_count=num_profiles,
-    )
-    _check_executor_result_scheduler(
-        result=result,
-        expected_scheduler_result=False,
-        expected_generation_modes=generation_modes,
-        expected_index=None,
-        expected_profile_mode=None,
-        expected_profile_rate=None,
-    )
diff --git a/tests/unit/executor/test_profile_generator.py b/tests/unit/executor/test_profile_generator.py
deleted file mode 100644
index 9c91d574..00000000
--- a/tests/unit/executor/test_profile_generator.py
+++ /dev/null
@@ -1,204 +0,0 @@
-from typing import get_args
-from unittest.mock import create_autospec
-
-import pytest
-
-from guidellm import settings
-from guidellm.core import (
-    TextGenerationBenchmark,
-    TextGenerationBenchmarkReport,
-)
-from guidellm.executor import Profile, ProfileGenerationMode, ProfileGenerator
-
-
-@pytest.mark.smoke()
-def test_profile_generator_mode():
-    assert set(get_args(ProfileGenerationMode)) == {
-        "sweep",
-        "synchronous",
-        "throughput",
-        "constant",
-        "poisson",
-    }
-
-
-@pytest.mark.smoke()
-def test_profile_instantiation():
-    profile = Profile(load_gen_mode="constant", load_gen_rate=10)
-    assert profile.load_gen_mode == "constant"
-    assert profile.load_gen_rate == 10
-    assert profile.args == {}
-
-
-@pytest.mark.smoke()
-@pytest.mark.parametrize(
-    ("mode", "rate"),
-    [
-        ("sweep", None),
-        ("synchronous", None),
-        ("throughput", None),
-        ("constant", 10),
-        ("constant", [10, 20, 30]),
-        ("poisson", 10),
-        ("poisson", [10, 20, 30]),
-    ],
-)
-def test_profile_generator_instantiation(mode, rate):
-    generator = ProfileGenerator(mode=mode, rate=rate)
-    assert generator.mode == mode
-
-    if rate is None:
-        assert generator.rates is None
-    elif isinstance(rate, list):
-        assert generator.rates == rate
-    else:
-        assert generator.rates == [rate]
-
-    if mode == "sweep":
-        assert len(generator) == settings.num_sweep_profiles + 2
-        assert (
-            generator.profile_generation_modes
-            == ["synchronous", "throughput"]
-            + ["constant"] * settings.num_sweep_profiles
-        )
-    elif mode in ("throughput", "synchronous"):
-        assert len(generator) == 1
-        assert generator.profile_generation_modes == [mode]
-    else:
-        assert len(generator) == len(rate) if isinstance(rate, list) else 1
-        assert generator.profile_generation_modes == [mode] * (
-            len(rate) if isinstance(rate, list) else 1
-        )
-
-    assert generator.generated_count == 0
-
-
-@pytest.mark.sanity()
-@pytest.mark.parametrize(
-    ("mode", "rate"),
-    [
-        # invalid modes
-        ("invalid_mode", None),
-        # rates supplied for non-applicable modes
-        ("sweep", 10),
-        ("sweep", [10, 20, 30]),
-        ("synchronous", 10),
-        ("synchronous", [10, 20, 30]),
-        ("throughput", 10),
-        ("throughput", [10, 20, 30]),
-        # invalid rates supplied for applicable modes
-        ("constant", None),
-        ("constant", -1),
-        ("constant", 0),
-        ("poisson", None),
-        ("poisson", -1),
-        ("poisson", 0),
-    ],
-)
-def test_profile_generator_invalid_instantiation(mode, rate):
-    with pytest.raises(ValueError):
-        ProfileGenerator(mode=mode, rate=rate)
-
-
-@pytest.mark.sanity()
-def test_profile_generator_next_sweep():
-    generator = ProfileGenerator(mode="sweep")
-    current_report = TextGenerationBenchmarkReport()
-
-    for index in range(settings.num_sweep_profiles + 2):
-        profile: Profile = generator.next(current_report)  # type: ignore
-
-        if index == 0:
-            assert profile.load_gen_mode == "synchronous"
-            assert profile.load_gen_rate is None
-            mock_benchmark = create_autospec(TextGenerationBenchmark, instance=True)
-            mock_benchmark.completed_request_rate = 1
-            current_report.add_benchmark(mock_benchmark)
-        elif index == 1:
-            assert profile.load_gen_mode == "throughput"
-            assert profile.load_gen_rate is None
-            mock_benchmark = create_autospec(TextGenerationBenchmark, instance=True)
-            mock_benchmark.completed_request_rate = 10
-            current_report.add_benchmark(mock_benchmark)
-        else:
-            assert profile.load_gen_mode == "constant"
-            assert profile.load_gen_rate == index
-
-        assert generator.generated_count == index + 1
-
-    for _ in range(3):
-        assert generator.next(current_report) is None
-
-
-@pytest.mark.sanity()
-def test_profile_generator_next_synchronous():
-    generator = ProfileGenerator(mode="synchronous")
-    current_report = TextGenerationBenchmarkReport()
-
-    profile: Profile = generator.next(current_report)  # type: ignore
-    assert profile.load_gen_mode == "synchronous"
-    assert profile.load_gen_rate is None
-    assert generator.generated_count == 1
-
-    for _ in range(3):
-        assert generator.next(current_report) is None
-
-
-@pytest.mark.sanity()
-def test_profile_generator_next_throughput():
-    generator = ProfileGenerator(mode="throughput")
-    current_report = TextGenerationBenchmarkReport()
-
-    profile: Profile = generator.next(current_report)  # type: ignore
-    assert profile.load_gen_mode == "throughput"
-    assert profile.load_gen_rate is None
-    assert generator.generated_count == 1
-
-    for _ in range(3):
-        assert generator.next(current_report) is None
-
-
-@pytest.mark.sanity()
-@pytest.mark.parametrize(
-    "rate",
-    [
-        10,
-        [10, 20, 30],
-    ],
-)
-def test_profile_generator_next_constant(rate):
-    generator = ProfileGenerator(mode="constant", rate=rate)
-    test_rates = rate if isinstance(rate, list) else [rate]
-    current_report = TextGenerationBenchmarkReport()
-
-    for index, test_rate in enumerate(test_rates):
-        profile: Profile = generator.next(current_report)  # type: ignore
-        assert profile.load_gen_mode == "constant"
-        assert profile.load_gen_rate == test_rate
-        assert generator.generated_count == index + 1
-
-    for _ in range(3):
-        assert generator.next(current_report) is None
-
-
-@pytest.mark.sanity()
-@pytest.mark.parametrize(
-    "rate",
-    [
-        10,
-        [10, 20, 30],
-    ],
-)
-def test_profile_generator_next_poisson(rate):
-    generator = ProfileGenerator(mode="poisson", rate=rate)
-    test_rates = rate if isinstance(rate, list) else [rate]
-    current_report = TextGenerationBenchmarkReport()
-
-    for index, test_rate in enumerate(test_rates):
-        profile: Profile = generator.next(current_report)  # type: ignore
-        assert profile.load_gen_mode == "poisson"
-        assert profile.load_gen_rate == test_rate
-        assert generator.generated_count == index + 1
-
-    for _ in range(3):
-        assert generator.next(current_report) is None
diff --git a/tests/unit/mock_backend.py b/tests/unit/mock_backend.py
index 9eb4d6ee..0e59e93e 100644
--- a/tests/unit/mock_backend.py
+++ b/tests/unit/mock_backend.py
@@ -36,10 +36,17 @@ def target(self) -> str:
     def model(self) -> Optional[str]:
         return self._model
 
-    def check_setup(self):
+    @property
+    def info(self) -> Dict[str, Any]:
+        return {}
+
+    async def prepare_multiprocessing(self):
+        pass
+
+    async def check_setup(self):
         pass
 
-    def available_models(self) -> List[str]:
+    async def available_models(self) -> List[str]:
         return [self.model]  # type: ignore
 
     async def text_completions(  # type: ignore
@@ -97,24 +104,38 @@ async def _text_prompt_response_generator(
 
         yield StreamingTextResponse(
             type_="start",
+            value="",
+            start_time=start_time,
+            first_iter_time=None,
             iter_count=0,
             delta="",
             time=start_time,
             request_id=request_id,
         )
 
+        first_iter_time = None
+        last_iter_time = None
+
         for index, token in enumerate(tokens):
             if self._iter_delay:
                 await asyncio.sleep(self._iter_delay)
 
+            if first_iter_time is None:
+                first_iter_time = time.time()
+
             yield StreamingTextResponse(
                 type_="iter",
+                value="".join(tokens[: index + 1]),
+                start_time=start_time,
+                first_iter_time=first_iter_time,
                 iter_count=index + 1,
                 delta=token,
                 time=time.time(),
                 request_id=request_id,
             )
 
+            last_iter_time = time.time()
+
         yield ResponseSummary(
             value="".join(tokens),
             request_args=RequestArgs(
@@ -125,6 +146,8 @@ async def _text_prompt_response_generator(
             iterations=len(tokens),
             start_time=start_time,
             end_time=time.time(),
+            first_iter_time=first_iter_time,
+            last_iter_time=last_iter_time,
             request_prompt_tokens=prompt_token_count,
             request_output_tokens=output_token_count,
             response_prompt_tokens=len(prompt.split()) + prompt.count(" "),
diff --git a/tests/dummy/data/__init__.py b/tests/unit/objects/__init__.py
similarity index 100%
rename from tests/dummy/data/__init__.py
rename to tests/unit/objects/__init__.py
diff --git a/tests/unit/objects/test_pydantic.py b/tests/unit/objects/test_pydantic.py
new file mode 100644
index 00000000..a27fac5a
--- /dev/null
+++ b/tests/unit/objects/test_pydantic.py
@@ -0,0 +1,43 @@
+import pytest
+from pydantic import computed_field
+
+from guidellm.objects.pydantic import StandardBaseModel
+
+
+class ExampleModel(StandardBaseModel):
+    name: str
+    age: int
+
+    @computed_field  # type: ignore[misc]
+    @property
+    def computed(self) -> str:
+        return self.name + " " + str(self.age)
+
+
+@pytest.mark.smoke()
+def test_standard_base_model_initialization():
+    example = ExampleModel(name="John Doe", age=30)
+    assert example.name == "John Doe"
+    assert example.age == 30
+    assert example.computed == "John Doe 30"
+
+
+@pytest.mark.smoke()
+def test_standard_base_model_invalid_initialization():
+    with pytest.raises(ValueError):
+        ExampleModel(name="John Doe", age="thirty")  # type: ignore[arg-type]
+
+
+@pytest.mark.smoke()
+def test_standard_base_model_marshalling():
+    example = ExampleModel(name="John Doe", age=30)
+    serialized = example.model_dump()
+    assert serialized["name"] == "John Doe"
+    assert serialized["age"] == 30
+    assert serialized["computed"] == "John Doe 30"
+
+    serialized["computed"] = "Jane Doe 40"
+    deserialized = ExampleModel.model_validate(serialized)
+    assert deserialized.name == "John Doe"
+    assert deserialized.age == 30
+    assert deserialized.computed == "John Doe 30"
diff --git a/tests/unit/objects/test_statistics.py b/tests/unit/objects/test_statistics.py
new file mode 100644
index 00000000..692db4b6
--- /dev/null
+++ b/tests/unit/objects/test_statistics.py
@@ -0,0 +1,693 @@
+import math
+import time
+from typing import List, Literal
+
+import numpy as np
+import pytest
+
+from guidellm.objects import (
+    DistributionSummary,
+    Percentiles,
+    RunningStats,
+    StatusDistributionSummary,
+    TimeRunningStats,
+)
+
+
+def create_default_percentiles() -> Percentiles:
+    return Percentiles(
+        p001=0.1,
+        p01=1.0,
+        p05=5.0,
+        p10=10.0,
+        p25=25.0,
+        p75=75.0,
+        p90=90.0,
+        p95=95.0,
+        p99=99.0,
+        p999=99.9,
+    )
+
+
+def create_default_distribution_summary() -> DistributionSummary:
+    return DistributionSummary(
+        mean=50.0,
+        median=50.0,
+        mode=50.0,
+        variance=835,
+        std_dev=math.sqrt(835),
+        min=0.0,
+        max=100.0,
+        count=1001,
+        total_sum=50050.0,
+        percentiles=create_default_percentiles(),
+    )
+
+
+@pytest.mark.smoke()
+def test_percentiles_initialization():
+    percentiles = create_default_percentiles()
+    assert percentiles.p001 == 0.1
+    assert percentiles.p01 == 1.0
+    assert percentiles.p05 == 5.0
+    assert percentiles.p10 == 10.0
+    assert percentiles.p25 == 25.0
+    assert percentiles.p75 == 75.0
+    assert percentiles.p90 == 90.0
+    assert percentiles.p95 == 95.0
+    assert percentiles.p99 == 99.0
+    assert percentiles.p999 == 99.9
+
+
+@pytest.mark.smoke()
+def test_percentiles_invalid_initialization():
+    test_kwargs = {
+        "p001": 0.1,
+        "p01": 1.0,
+        "p05": 5.0,
+        "p10": 10.0,
+        "p25": 25.0,
+        "p75": 75.0,
+        "p90": 90.0,
+        "p95": 95.0,
+        "p99": 99.0,
+        "p999": 99.9,
+    }
+    test_missing_keys = list(test_kwargs.keys())
+
+    for missing_key in test_missing_keys:
+        kwargs = {key: val for key, val in test_kwargs.items() if key != missing_key}
+        with pytest.raises(ValueError):
+            Percentiles(**kwargs)
+
+
+@pytest.mark.smoke()
+def test_percentiles_marshalling():
+    percentiles = create_default_percentiles()
+    serialized = percentiles.model_dump()
+    deserialized = Percentiles.model_validate(serialized)
+
+    for key, value in vars(percentiles).items():
+        assert getattr(deserialized, key) == value
+
+
+@pytest.mark.smoke()
+def test_distribution_summary_initilaization():
+    distribution_summary = create_default_distribution_summary()
+    assert distribution_summary.mean == 50.0
+    assert distribution_summary.median == 50.0
+    assert distribution_summary.mode == 50.0
+    assert distribution_summary.variance == 835
+    assert distribution_summary.std_dev == math.sqrt(835)
+    assert distribution_summary.min == 0.0
+    assert distribution_summary.max == 100.0
+    assert distribution_summary.count == 1001
+    assert distribution_summary.total_sum == 50050.0
+    assert distribution_summary.percentiles.p001 == 0.1
+    assert distribution_summary.percentiles.p01 == 1.0
+    assert distribution_summary.percentiles.p05 == 5.0
+    assert distribution_summary.percentiles.p10 == 10.0
+    assert distribution_summary.percentiles.p25 == 25.0
+    assert distribution_summary.percentiles.p75 == 75.0
+    assert distribution_summary.percentiles.p90 == 90.0
+    assert distribution_summary.percentiles.p95 == 95.0
+    assert distribution_summary.percentiles.p99 == 99.0
+    assert distribution_summary.percentiles.p999 == 99.9
+
+
+@pytest.mark.smoke()
+def test_distribution_summary_invalid_initialization():
+    test_kwargs = {
+        "mean": 50.0,
+        "median": 50.0,
+        "mode": 50.0,
+        "variance": 835,
+        "std_dev": math.sqrt(835),
+        "min": 0.0,
+        "max": 100.0,
+        "count": 1001,
+        "total_sum": 50050.0,
+        "percentiles": create_default_percentiles(),
+    }
+    test_missing_keys = list(test_kwargs.keys())
+    for missing_key in test_missing_keys:
+        kwargs = {key: val for key, val in test_kwargs.items() if key != missing_key}
+        with pytest.raises(ValueError):
+            DistributionSummary(**kwargs)  # type: ignore[arg-type]
+
+
+@pytest.mark.smoke()
+def test_distribution_summary_marshalling():
+    distribution_summary = create_default_distribution_summary()
+    serialized = distribution_summary.model_dump()
+    deserialized = DistributionSummary.model_validate(serialized)
+
+    for key, value in vars(distribution_summary).items():
+        assert getattr(deserialized, key) == value
+
+
+@pytest.mark.smoke()
+def test_distribution_summary_from_distribution_function():
+    values = [val / 10.0 for val in range(1001)]
+    distribution = [(val, 1.0) for val in values]
+    distribution_summary = DistributionSummary.from_distribution_function(distribution)
+    assert distribution_summary.mean == pytest.approx(np.mean(values))
+    assert distribution_summary.median == pytest.approx(np.median(values))
+    assert distribution_summary.mode == 0.0
+    assert distribution_summary.variance == pytest.approx(np.var(values, ddof=0))
+    assert distribution_summary.std_dev == pytest.approx(np.std(values, ddof=0))
+    assert distribution_summary.min == min(values)
+    assert distribution_summary.max == max(values)
+    assert distribution_summary.count == len(values)
+    assert distribution_summary.total_sum == sum(values)
+    assert distribution_summary.percentiles.p001 == pytest.approx(
+        np.percentile(values, 0.1)
+    )
+    assert distribution_summary.percentiles.p01 == pytest.approx(
+        np.percentile(values, 1.0)
+    )
+    assert distribution_summary.percentiles.p05 == pytest.approx(
+        np.percentile(values, 5.0)
+    )
+    assert distribution_summary.percentiles.p10 == pytest.approx(
+        np.percentile(values, 10.0)
+    )
+    assert distribution_summary.percentiles.p25 == pytest.approx(
+        np.percentile(values, 25.0)
+    )
+    assert distribution_summary.percentiles.p75 == pytest.approx(
+        np.percentile(values, 75.0)
+    )
+    assert distribution_summary.percentiles.p90 == pytest.approx(
+        np.percentile(values, 90.0)
+    )
+    assert distribution_summary.percentiles.p95 == pytest.approx(
+        np.percentile(values, 95.0)
+    )
+    assert distribution_summary.percentiles.p99 == pytest.approx(
+        np.percentile(values, 99.0)
+    )
+    assert distribution_summary.percentiles.p999 == pytest.approx(
+        np.percentile(values, 99.9)
+    )
+    assert distribution_summary.cumulative_distribution_function is None
+
+    distribution_summary_cdf = DistributionSummary.from_distribution_function(
+        distribution, include_cdf=True
+    )
+    assert distribution_summary_cdf.cumulative_distribution_function is not None
+    assert len(distribution_summary_cdf.cumulative_distribution_function) == len(values)
+
+
+def test_distribution_summary_from_values():
+    values = [val / 10 for val in range(1001)]
+    distribution_summary = DistributionSummary.from_values(values)
+    assert distribution_summary.mean == pytest.approx(np.mean(values))
+    assert distribution_summary.median == pytest.approx(np.median(values))
+    assert distribution_summary.mode == 0.0
+    assert distribution_summary.variance == pytest.approx(np.var(values, ddof=0))
+    assert distribution_summary.std_dev == pytest.approx(np.std(values, ddof=0))
+    assert distribution_summary.min == min(values)
+    assert distribution_summary.max == max(values)
+    assert distribution_summary.count == len(values)
+    assert distribution_summary.total_sum == sum(values)
+    assert distribution_summary.percentiles.p001 == pytest.approx(
+        np.percentile(values, 0.1)
+    )
+    assert distribution_summary.percentiles.p01 == pytest.approx(
+        np.percentile(values, 1.0)
+    )
+    assert distribution_summary.percentiles.p05 == pytest.approx(
+        np.percentile(values, 5.0)
+    )
+    assert distribution_summary.percentiles.p10 == pytest.approx(
+        np.percentile(values, 10.0)
+    )
+    assert distribution_summary.percentiles.p25 == pytest.approx(
+        np.percentile(values, 25.0)
+    )
+    assert distribution_summary.percentiles.p75 == pytest.approx(
+        np.percentile(values, 75.0)
+    )
+    assert distribution_summary.percentiles.p90 == pytest.approx(
+        np.percentile(values, 90.0)
+    )
+    assert distribution_summary.percentiles.p95 == pytest.approx(
+        np.percentile(values, 95.0)
+    )
+    assert distribution_summary.percentiles.p99 == pytest.approx(
+        np.percentile(values, 99.0)
+    )
+    assert distribution_summary.percentiles.p999 == pytest.approx(
+        np.percentile(values, 99.9)
+    )
+    assert distribution_summary.cumulative_distribution_function is None
+
+    distribution_summary_weights = DistributionSummary.from_values(
+        values, weights=[2] * len(values)
+    )
+    assert distribution_summary_weights.mean == pytest.approx(np.mean(values))
+    assert distribution_summary_weights.median == pytest.approx(np.median(values))
+    assert distribution_summary_weights.mode == 0.0
+    assert distribution_summary_weights.variance == pytest.approx(
+        np.var(values, ddof=0)
+    )
+    assert distribution_summary_weights.std_dev == pytest.approx(np.std(values, ddof=0))
+    assert distribution_summary_weights.min == min(values)
+    assert distribution_summary_weights.max == max(values)
+    assert distribution_summary_weights.count == len(values)
+    assert distribution_summary_weights.total_sum == sum(values)
+    assert distribution_summary_weights.cumulative_distribution_function is None
+
+    distribution_summary_cdf = DistributionSummary.from_values(values, include_cdf=True)
+    assert distribution_summary_cdf.cumulative_distribution_function is not None
+    assert len(distribution_summary_cdf.cumulative_distribution_function) == len(values)
+
+
+def test_distribution_summary_from_request_times_concurrency():
+    # create consistent timestamped values matching a rate of 10 per second
+    requests = [(val / 10, val / 10 + 1) for val in range(10001)]
+    distribution_summary = DistributionSummary.from_request_times(
+        requests, distribution_type="concurrency"
+    )
+    assert distribution_summary.mean == pytest.approx(10.0, abs=0.01)
+    assert distribution_summary.median == pytest.approx(10.0)
+    assert distribution_summary.mode == 10.0
+    assert distribution_summary.variance == pytest.approx(0, abs=0.1)
+    assert distribution_summary.std_dev == pytest.approx(0, abs=0.3)
+    assert distribution_summary.min == pytest.approx(1)
+    assert distribution_summary.max == pytest.approx(10.0)
+    assert distribution_summary.count == 10
+    assert distribution_summary.total_sum == pytest.approx(55.0)
+    assert distribution_summary.percentiles.p001 == pytest.approx(10, abs=5)
+    assert distribution_summary.percentiles.p01 == pytest.approx(10)
+    assert distribution_summary.percentiles.p05 == pytest.approx(10)
+    assert distribution_summary.percentiles.p10 == pytest.approx(10)
+    assert distribution_summary.percentiles.p25 == pytest.approx(10)
+    assert distribution_summary.percentiles.p75 == pytest.approx(10)
+    assert distribution_summary.percentiles.p90 == pytest.approx(10)
+    assert distribution_summary.percentiles.p95 == pytest.approx(10)
+    assert distribution_summary.percentiles.p99 == pytest.approx(10)
+    assert distribution_summary.percentiles.p999 == pytest.approx(10)
+    assert distribution_summary.cumulative_distribution_function is None
+
+    distribution_summary_cdf = DistributionSummary.from_request_times(
+        requests, distribution_type="concurrency", include_cdf=True
+    )
+    assert distribution_summary_cdf.cumulative_distribution_function is not None
+    assert len(distribution_summary_cdf.cumulative_distribution_function) == 10
+
+
+def test_distribution_summary_from_request_times_rate():
+    # create consistent timestamped values matching a rate of 10 per second
+    requests = [(val / 10, val / 10 + 1) for val in range(10001)]
+    distribution_summary = DistributionSummary.from_request_times(
+        requests, distribution_type="rate"
+    )
+    assert distribution_summary.mean == pytest.approx(10.0, abs=0.01)
+    assert distribution_summary.median == pytest.approx(10.0)
+    assert distribution_summary.mode == pytest.approx(10.0)
+    assert distribution_summary.variance == pytest.approx(0, abs=0.1)
+    assert distribution_summary.std_dev == pytest.approx(0, abs=0.3)
+    assert distribution_summary.min == pytest.approx(1.0)
+    assert distribution_summary.max == pytest.approx(10.0)
+    assert distribution_summary.count == 12
+    assert distribution_summary.total_sum == pytest.approx(111.0)
+    assert distribution_summary.percentiles.p001 == pytest.approx(10.0, abs=0.5)
+    assert distribution_summary.percentiles.p01 == pytest.approx(10.0)
+    assert distribution_summary.percentiles.p05 == pytest.approx(10.0)
+    assert distribution_summary.percentiles.p10 == pytest.approx(10.0)
+    assert distribution_summary.percentiles.p25 == pytest.approx(10.0)
+    assert distribution_summary.percentiles.p75 == pytest.approx(10.0)
+    assert distribution_summary.percentiles.p90 == pytest.approx(10.0)
+    assert distribution_summary.percentiles.p95 == pytest.approx(10.0)
+    assert distribution_summary.percentiles.p99 == pytest.approx(10.0)
+    assert distribution_summary.percentiles.p999 == pytest.approx(10.0)
+    assert distribution_summary.cumulative_distribution_function is None
+
+    distribution_summary_cdf = DistributionSummary.from_request_times(
+        requests, distribution_type="rate", include_cdf=True
+    )
+    assert distribution_summary_cdf.cumulative_distribution_function is not None
+    assert len(distribution_summary_cdf.cumulative_distribution_function) == 12
+
+
+def test_distribution_summary_from_iterable_request_times():
+    # create consistent timestamped values matching a rate of 10 per second
+    requests = [(val / 10, val / 10 + 1) for val in range(10001)]
+    # create 9 iterations for each request with first iter at start + 0.1
+    # and spaced at 0.1 seconds apart
+    first_iter_times = [val / 10 + 0.1 for val in range(10001)]
+    iter_counts = [9 for _ in range(10001)]
+    first_iter_counts = [1 for _ in range(10001)]
+
+    distribution_summary = DistributionSummary.from_iterable_request_times(
+        requests, first_iter_times, iter_counts, first_iter_counts
+    )
+    assert distribution_summary.mean == pytest.approx(90.0, abs=0.1)
+    assert distribution_summary.median == pytest.approx(80.0)
+    assert distribution_summary.mode == pytest.approx(80.0)
+    assert distribution_summary.variance == pytest.approx(704.463, abs=0.001)
+    assert distribution_summary.std_dev == pytest.approx(26.541, abs=0.001)
+    assert distribution_summary.min == pytest.approx(0.0)
+    assert distribution_summary.max == pytest.approx(160.0)
+    assert distribution_summary.count == 44
+    assert distribution_summary.total_sum == pytest.approx(3538.85, abs=0.01)
+    assert distribution_summary.percentiles.p001 == pytest.approx(80.0)
+    assert distribution_summary.percentiles.p01 == pytest.approx(80.0)
+    assert distribution_summary.percentiles.p05 == pytest.approx(80.0)
+    assert distribution_summary.percentiles.p10 == pytest.approx(80.0)
+    assert distribution_summary.percentiles.p25 == pytest.approx(80.0)
+    assert distribution_summary.percentiles.p75 == pytest.approx(80.0)
+    assert distribution_summary.percentiles.p90 == pytest.approx(160.0)
+    assert distribution_summary.percentiles.p95 == pytest.approx(160.0)
+    assert distribution_summary.percentiles.p99 == pytest.approx(160.0)
+    assert distribution_summary.percentiles.p999 == pytest.approx(160.0)
+    assert distribution_summary.cumulative_distribution_function is None
+
+    distribution_summary_cdf = DistributionSummary.from_iterable_request_times(
+        requests, first_iter_times, iter_counts, first_iter_counts, include_cdf=True
+    )
+    assert distribution_summary_cdf.cumulative_distribution_function is not None
+    assert len(distribution_summary_cdf.cumulative_distribution_function) == 44
+
+
+def test_status_distribution_summary_initialization():
+    status_distribution_summary = StatusDistributionSummary(
+        total=create_default_distribution_summary(),
+        successful=create_default_distribution_summary(),
+        incomplete=create_default_distribution_summary(),
+        errored=create_default_distribution_summary(),
+    )
+    assert status_distribution_summary.total.mean == 50.0
+    assert status_distribution_summary.successful.mean == 50.0
+    assert status_distribution_summary.incomplete.mean == 50.0
+    assert status_distribution_summary.errored.mean == 50.0
+
+
+def test_status_distribution_summary_marshalling():
+    status_distribution_summary = StatusDistributionSummary(
+        total=create_default_distribution_summary(),
+        successful=create_default_distribution_summary(),
+        incomplete=create_default_distribution_summary(),
+        errored=create_default_distribution_summary(),
+    )
+    serialized = status_distribution_summary.model_dump()
+    deserialized = StatusDistributionSummary.model_validate(serialized)
+
+    for key, value in vars(status_distribution_summary).items():
+        for child_key, child_value in vars(value).items():
+            assert getattr(getattr(deserialized, key), child_key) == child_value
+
+
+def test_status_distribution_summary_from_values():
+    value_types: List[Literal["successful", "incomplete", "error"]] = [
+        "successful",
+        "incomplete",
+        "error",
+    ] * 1000
+    values = [float(val % 3) for val in range(3000)]
+    status_distribution_summary = StatusDistributionSummary.from_values(
+        value_types, values
+    )
+    assert status_distribution_summary.total.count == len(values)
+    assert status_distribution_summary.total.mean == pytest.approx(np.mean(values))
+    assert status_distribution_summary.total.cumulative_distribution_function is None
+    assert status_distribution_summary.successful.mean == pytest.approx(
+        np.mean(
+            [val for ind, val in enumerate(values) if value_types[ind] == "successful"]
+        )
+    )
+    assert status_distribution_summary.successful.count == len(
+        [val for ind, val in enumerate(values) if value_types[ind] == "successful"]
+    )
+    assert (
+        status_distribution_summary.successful.cumulative_distribution_function is None
+    )
+    assert status_distribution_summary.incomplete.mean == pytest.approx(
+        np.mean(
+            [val for ind, val in enumerate(values) if value_types[ind] == "incomplete"]
+        )
+    )
+    assert status_distribution_summary.incomplete.count == len(
+        [val for ind, val in enumerate(values) if value_types[ind] == "incomplete"]
+    )
+    assert (
+        status_distribution_summary.incomplete.cumulative_distribution_function is None
+    )
+    assert status_distribution_summary.errored.mean == pytest.approx(
+        np.mean([val for ind, val in enumerate(values) if value_types[ind] == "error"])
+    )
+    assert status_distribution_summary.errored.count == len(
+        [val for ind, val in enumerate(values) if value_types[ind] == "error"]
+    )
+    assert status_distribution_summary.errored.cumulative_distribution_function is None
+
+    status_distribution_summary_cdf = StatusDistributionSummary.from_values(
+        value_types, values, include_cdf=True
+    )
+    assert (
+        status_distribution_summary_cdf.total.cumulative_distribution_function
+        is not None
+    )
+    assert (
+        status_distribution_summary_cdf.successful.cumulative_distribution_function
+        is not None
+    )
+    assert (
+        status_distribution_summary_cdf.incomplete.cumulative_distribution_function
+        is not None
+    )
+    assert (
+        status_distribution_summary_cdf.errored.cumulative_distribution_function
+        is not None
+    )
+
+
+def test_status_distribution_summary_from_request_times():
+    request_types: List[Literal["successful", "incomplete", "error"]] = [
+        "successful",
+        "incomplete",
+        "error",
+    ] * 1000
+    requests = [((val % 3) / 10, (val % 3) / 10 + 1) for val in range(3000)]
+    status_distribution_summary = StatusDistributionSummary.from_request_times(
+        request_types, requests, distribution_type="concurrency"
+    )
+    assert status_distribution_summary.total.mean == pytest.approx(2500.0, abs=0.01)
+    assert status_distribution_summary.total.cumulative_distribution_function is None
+    assert status_distribution_summary.successful.mean == pytest.approx(
+        1000.0, abs=0.01
+    )
+    assert (
+        status_distribution_summary.successful.cumulative_distribution_function is None
+    )
+    assert status_distribution_summary.incomplete.mean == pytest.approx(
+        1000.0, abs=0.01
+    )
+    assert (
+        status_distribution_summary.incomplete.cumulative_distribution_function is None
+    )
+    assert status_distribution_summary.errored.mean == pytest.approx(1000.0, abs=0.01)
+    assert status_distribution_summary.errored.cumulative_distribution_function is None
+
+    status_distribution_summary_cdf = StatusDistributionSummary.from_request_times(
+        request_types, requests, distribution_type="concurrency", include_cdf=True
+    )
+    assert (
+        status_distribution_summary_cdf.total.cumulative_distribution_function
+        is not None
+    )
+    assert (
+        status_distribution_summary_cdf.successful.cumulative_distribution_function
+        is not None
+    )
+    assert (
+        status_distribution_summary_cdf.incomplete.cumulative_distribution_function
+        is not None
+    )
+    assert (
+        status_distribution_summary_cdf.errored.cumulative_distribution_function
+        is not None
+    )
+
+
+def test_status_distribution_summary_from_iterable_request_times():
+    request_types: List[Literal["successful", "incomplete", "error"]] = [
+        "successful",
+        "incomplete",
+        "error",
+    ] * 1000
+    requests = [(val % 3 / 10, val % 3 / 10 + 1) for val in range(3000)]
+    first_iter_times = [val % 3 / 10 + 0.1 for val in range(3000)]
+    iter_counts = [9 for _ in range(3000)]
+    first_iter_counts = [1 for _ in range(3000)]
+    status_distribution_summary = StatusDistributionSummary.from_iterable_request_times(
+        request_types,
+        requests,
+        first_iter_times,
+        iter_counts,
+        first_iter_counts,
+    )
+    assert status_distribution_summary.total.mean == pytest.approx(21666.66, abs=0.01)
+    assert status_distribution_summary.total.cumulative_distribution_function is None
+    assert status_distribution_summary.successful.mean == pytest.approx(
+        8000.0, abs=0.01
+    )
+    assert (
+        status_distribution_summary.successful.cumulative_distribution_function is None
+    )
+    assert status_distribution_summary.incomplete.mean == pytest.approx(
+        8000.0, abs=0.01
+    )
+    assert (
+        status_distribution_summary.incomplete.cumulative_distribution_function is None
+    )
+    assert status_distribution_summary.errored.mean == pytest.approx(8000.0, abs=0.01)
+    assert status_distribution_summary.errored.cumulative_distribution_function is None
+
+    status_distribution_summary_cdf = (
+        StatusDistributionSummary.from_iterable_request_times(
+            request_types,
+            requests,
+            first_iter_times,
+            iter_counts,
+            first_iter_counts,
+            include_cdf=True,
+        )
+    )
+    assert (
+        status_distribution_summary_cdf.total.cumulative_distribution_function
+        is not None
+    )
+    assert (
+        status_distribution_summary_cdf.successful.cumulative_distribution_function
+        is not None
+    )
+    assert (
+        status_distribution_summary_cdf.incomplete.cumulative_distribution_function
+        is not None
+    )
+    assert (
+        status_distribution_summary_cdf.errored.cumulative_distribution_function
+        is not None
+    )
+
+
+def test_running_stats_initialization():
+    running_stats = RunningStats()
+    assert running_stats.start_time == pytest.approx(time.time(), abs=0.01)
+    assert running_stats.count == 0
+    assert running_stats.total == 0
+    assert running_stats.last == 0
+    assert running_stats.mean == 0
+    assert running_stats.rate == 0
+
+
+def test_running_stats_marshalling():
+    running_stats = RunningStats()
+    serialized = running_stats.model_dump()
+    deserialized = RunningStats.model_validate(serialized)
+
+    for key, value in vars(running_stats).items():
+        assert getattr(deserialized, key) == value
+
+
+def test_running_stats_update():
+    running_stats = RunningStats()
+    running_stats.update(1)
+    assert running_stats.count == 1
+    assert running_stats.total == 1
+    assert running_stats.last == 1
+    assert running_stats.mean == 1
+    time.sleep(1.0)
+    assert running_stats.rate == pytest.approx(
+        1.0 / (time.time() - running_stats.start_time), abs=0.1
+    )
+
+    running_stats.update(2)
+    assert running_stats.count == 2
+    assert running_stats.total == 3
+    assert running_stats.last == 2
+    assert running_stats.mean == 1.5
+    time.sleep(1)
+    assert running_stats.rate == pytest.approx(
+        3 / (time.time() - running_stats.start_time), abs=0.1
+    )
+
+
+def test_running_stats_add():
+    running_stats = RunningStats()
+    mean = running_stats + 1
+    assert mean == 1
+    assert mean == running_stats.mean
+    assert running_stats.count == 1
+    assert running_stats.total == 1
+    assert running_stats.last == 1
+
+
+def test_running_stats_iadd():
+    running_stats = RunningStats()
+    running_stats += 1
+    assert running_stats.count == 1
+    assert running_stats.total == 1
+    assert running_stats.last == 1
+    assert running_stats.mean == 1
+
+
+def test_time_running_stats_initialization():
+    time_running_stats = TimeRunningStats()
+    assert time_running_stats.start_time == pytest.approx(time.time(), abs=0.01)
+    assert time_running_stats.count == 0
+    assert time_running_stats.total == 0
+    assert time_running_stats.last == 0
+    assert time_running_stats.mean == 0
+    assert time_running_stats.rate == 0
+    assert time_running_stats.total_ms == 0
+    assert time_running_stats.last_ms == 0
+    assert time_running_stats.mean_ms == 0
+    assert time_running_stats.rate_ms == 0
+
+
+def test_time_running_stats_marshalling():
+    time_running_stats = TimeRunningStats()
+    serialized = time_running_stats.model_dump()
+    deserialized = TimeRunningStats.model_validate(serialized)
+
+    for key, value in vars(time_running_stats).items():
+        assert getattr(deserialized, key) == value
+
+
+def test_time_running_stats_update():
+    time_running_stats = TimeRunningStats()
+    time_running_stats.update(1)
+    assert time_running_stats.count == 1
+    assert time_running_stats.total == 1
+    assert time_running_stats.last == 1
+    assert time_running_stats.mean == 1
+    assert time_running_stats.total_ms == 1000
+    assert time_running_stats.last_ms == 1000
+    assert time_running_stats.mean_ms == 1000
+    time.sleep(1.0)
+    assert time_running_stats.rate == pytest.approx(
+        1.0 / (time.time() - time_running_stats.start_time), abs=0.1
+    )
+    assert time_running_stats.rate_ms == pytest.approx(
+        1000 / (time.time() - time_running_stats.start_time), abs=0.1
+    )
+
+    time_running_stats.update(2)
+    assert time_running_stats.count == 2
+    assert time_running_stats.total == 3
+    assert time_running_stats.last == 2
+    assert time_running_stats.mean == 1.5
+    assert time_running_stats.total_ms == 3000
+    assert time_running_stats.last_ms == 2000
+    assert time_running_stats.mean_ms == 1500
+    time.sleep(1)
+    assert time_running_stats.rate == pytest.approx(
+        3 / (time.time() - time_running_stats.start_time), abs=0.1
+    )
+    assert time_running_stats.rate_ms == pytest.approx(
+        3000 / (time.time() - time_running_stats.start_time), abs=0.1
+    )
diff --git a/tests/unit/request/__init__.py b/tests/unit/request/__init__.py
deleted file mode 100644
index e69de29b..00000000
diff --git a/tests/unit/request/test_base.py b/tests/unit/request/test_base.py
deleted file mode 100644
index 73cf1b14..00000000
--- a/tests/unit/request/test_base.py
+++ /dev/null
@@ -1,160 +0,0 @@
-import re
-import time
-from typing import List
-from unittest.mock import MagicMock, Mock, patch
-
-import pytest
-
-from guidellm.core import TextGenerationRequest
-from tests.dummy.services import TestRequestGenerator
-
-
-@pytest.mark.smoke()
-def test_request_generator_sync_constructor(mock_auto_tokenizer):
-    generator = TestRequestGenerator(mode="sync", tokenizer="mock-tokenizer")
-    assert generator.mode == "sync"
-    assert generator.async_queue_size == 50  # Default value
-
-
-@pytest.mark.smoke()
-def test_request_generator_async_constructor(mock_auto_tokenizer):
-    generator = TestRequestGenerator(
-        mode="async", tokenizer="mock-tokenizer", async_queue_size=10
-    )
-    assert generator.mode == "async"
-    assert generator.async_queue_size == 10
-    generator.stop()
-
-
-@pytest.mark.smoke()
-def test_request_generator_sync_iter(mock_auto_tokenizer):
-    generator = TestRequestGenerator(mode="sync", tokenizer="mock-tokenizer")
-    items = []
-    for item in generator:
-        items.append(item)
-        if len(items) == 5:
-            break
-
-    assert len(items) == 5
-    assert items[0].prompt == "Test prompt"
-
-
-@pytest.mark.smoke()
-def test_request_generator_async_iter(mock_auto_tokenizer):
-    generator = TestRequestGenerator(mode="async", tokenizer="mock-tokenizer")
-    items = []
-    for item in generator:
-        items.append(item)
-        if len(items) == 5:
-            break
-
-    generator.stop()
-    assert len(items) == 5
-    assert items[0].prompt == "Test prompt"
-
-
-@pytest.mark.smoke()
-def test_request_generator_iter_calls_create_item(mock_auto_tokenizer):
-    generator = TestRequestGenerator(mode="sync", tokenizer="mock-tokenizer")
-    generator.create_item = Mock(  # type: ignore
-        return_value=TextGenerationRequest(prompt="Mock prompt"),
-    )
-
-    items = []
-    for item in generator:
-        items.append(item)
-        if len(items) == 5:
-            break
-
-    assert len(items) == 5
-    generator.create_item.assert_called()
-
-
-@pytest.mark.smoke()
-def test_request_generator_async_iter_calls_create_item(mock_auto_tokenizer):
-    generator = TestRequestGenerator(mode="sync", tokenizer="mock-tokenizer")
-    generator.create_item = Mock(  # type: ignore
-        return_value=TextGenerationRequest(prompt="Mock prompt"),
-    )
-
-    items = []
-    for item in generator:
-        items.append(item)
-        if len(items) == 5:
-            break
-
-    generator.stop()
-    assert len(items) == 5
-    generator.create_item.assert_called()
-
-
-@pytest.mark.sanity()
-def test_request_generator_repr(mock_auto_tokenizer):
-    generator = TestRequestGenerator(
-        mode="sync", tokenizer="mock-tokenizer", async_queue_size=100
-    )
-    repr_str = repr(generator)
-    assert repr_str.startswith("RequestGenerator(")
-    assert "mode=sync" in repr_str
-    assert "async_queue_size=100" in repr_str
-    assert "tokenizer=<MagicMock" in repr_str
-
-
-@pytest.mark.sanity()
-def test_request_generator_stop(mock_auto_tokenizer):
-    generator = TestRequestGenerator(mode="async", tokenizer="mock-tokenizer")
-    generator.stop()
-    assert generator._stop_event.is_set()
-    assert not generator._thread.is_alive()
-
-
-@pytest.mark.regression()
-def test_request_generator_with_mock_tokenizer():
-    def _fake_tokenize(text: str) -> List[int]:
-        tokens = re.findall(r"\w+|[^\w\s]", text)
-        return [0] * len(tokens)
-
-    mock_tokenizer = MagicMock()
-    mock_tokenizer.tokenize = MagicMock(side_effect=_fake_tokenize)
-
-    generator = TestRequestGenerator(tokenizer=mock_tokenizer)
-    assert generator.tokenizer == mock_tokenizer
-
-    with patch(
-        "guidellm.request.base.AutoTokenizer",
-    ) as MockAutoTokenizer:  # noqa: N806
-        MockAutoTokenizer.from_pretrained.return_value = mock_tokenizer
-        generator = TestRequestGenerator(tokenizer="mock-tokenizer")
-        assert generator.tokenizer == mock_tokenizer
-        MockAutoTokenizer.from_pretrained.assert_called_with("mock-tokenizer")
-
-
-@pytest.mark.regression()
-def test_request_generator_populate_queue(mock_auto_tokenizer):
-    generator = TestRequestGenerator(
-        mode="async", tokenizer="mock-tokenizer", async_queue_size=2
-    )
-    generator.create_item = Mock(  # type: ignore
-        return_value=TextGenerationRequest(prompt="Mock prompt")
-    )
-
-    time.sleep(0.2)  # Allow some time for the queue to populate
-    generator.stop()
-    assert generator._queue.qsize() > 0
-
-
-@pytest.mark.regression()
-def test_request_generator_async_stop_during_population(mock_auto_tokenizer):
-    generator = TestRequestGenerator(
-        mode="async", tokenizer="mock-tokenizer", async_queue_size=2
-    )
-    generator.create_item = Mock(  # type: ignore
-        return_value=TextGenerationRequest(prompt="Mock prompt")
-    )
-
-    time.sleep(0.1)  # Allow some time for the queue to start populating
-    generator.stop()
-
-    # Ensure the stop event is set and thread is no longer alive
-    assert generator._stop_event.is_set()
-    assert not generator._thread.is_alive()
diff --git a/tests/unit/request/test_emulated.py b/tests/unit/request/test_emulated.py
deleted file mode 100644
index f6af1301..00000000
--- a/tests/unit/request/test_emulated.py
+++ /dev/null
@@ -1,373 +0,0 @@
-import json
-import tempfile
-from pathlib import Path
-from typing import Tuple, Union
-
-import numpy as np
-import pytest
-from transformers import PreTrainedTokenizer  # type: ignore
-
-from guidellm.core.request import TextGenerationRequest
-from guidellm.request.emulated import (
-    EmulatedConfig,
-    EmulatedRequestGenerator,
-    EndlessTokens,
-)
-
-
-@pytest.mark.smoke()
-def test_emulated_config_construction():
-    config = EmulatedConfig(
-        prompt_tokens=10,
-        prompt_tokens_variance=2,
-        prompt_tokens_min=5,
-        prompt_tokens_max=15,
-        generated_tokens=20,
-        generated_tokens_variance=4,
-        generated_tokens_min=10,
-        generated_tokens_max=30,
-    )
-    assert config.prompt_tokens == 10
-    assert config.prompt_tokens_variance == 2
-    assert config.prompt_tokens_min == 5
-    assert config.prompt_tokens_max == 15
-    assert config.generated_tokens == 20
-    assert config.generated_tokens_variance == 4
-    assert config.generated_tokens_min == 10
-    assert config.generated_tokens_max == 30
-
-
-@pytest.mark.smoke()
-def test_emulated_config_create_dict():
-    config_dict = {
-        "prompt_tokens": 10,
-        "prompt_tokens_variance": 2,
-        "prompt_tokens_min": 5,
-        "prompt_tokens_max": 15,
-        "generated_tokens": 20,
-        "generated_tokens_variance": 4,
-        "generated_tokens_min": 10,
-        "generated_tokens_max": 30,
-    }
-    config = EmulatedConfig.create_config(config_dict)
-    assert config.prompt_tokens == 10
-    assert config.prompt_tokens_variance == 2
-    assert config.prompt_tokens_min == 5
-    assert config.prompt_tokens_max == 15
-    assert config.generated_tokens == 20
-    assert config.generated_tokens_variance == 4
-    assert config.generated_tokens_min == 10
-    assert config.generated_tokens_max == 30
-
-
-@pytest.mark.smoke()
-@pytest.mark.parametrize(
-    ("base", "variance", "min_tokens", "max_tokens", "expected_range"),
-    [
-        (10, 2, None, None, (1, 10 + 5 * 2)),
-        (10, 2, 5, 15, (5, 15)),
-        (10, None, 5, 15, (5, 15)),
-        (10, 2, 1, None, (1, 10 + 5 * 2)),
-    ],
-)
-def test_emulated_config_token_range(
-    base: int,
-    variance: int,
-    min_tokens: int,
-    max_tokens: int,
-    expected_range: Tuple[int, int],
-):
-    assert (
-        EmulatedConfig._token_range(base, variance, min_tokens, max_tokens)
-        == expected_range
-    )
-
-
-@pytest.mark.smoke()
-@pytest.mark.parametrize(
-    ("base", "variance", "min_tokens", "max_tokens", "expected_range"),
-    [
-        (10, None, None, None, (10, 10)),
-        (10, 5, None, None, (1, 10 + 5 * 2)),
-        (10, 5, 5, 15, (5, 15)),
-        (10, None, 5, 15, (5, 15)),
-        (10, 5, 2, None, (2, 10 + 5 * 2)),
-        (10, 5, None, 20, (1, 20)),
-    ],
-)
-def test_emulated_config_sample_tokens(
-    base: int,
-    variance: int,
-    min_tokens: int,
-    max_tokens: int,
-    expected_range: Tuple[int, int],
-):
-    rng = np.random.default_rng()
-
-    for _ in range(100):
-        token_count = EmulatedConfig._sample_tokens(
-            base, variance, min_tokens, max_tokens, rng
-        )
-        assert token_count >= expected_range[0]
-        assert token_count <= expected_range[1]
-
-
-@pytest.mark.sanity()
-def test_emulated_config_create():
-    test_dict = {
-        "prompt_tokens": 10,
-        "prompt_tokens_variance": 2,
-        "prompt_tokens_min": 5,
-        "prompt_tokens_max": 15,
-        "generated_tokens": 20,
-        "generated_tokens_variance": 4,
-        "generated_tokens_min": 10,
-        "generated_tokens_max": 30,
-    }
-    compare_config = EmulatedConfig(**test_dict)
-
-    # test dict
-    test_config = EmulatedConfig.create_config(test_dict)
-    assert (
-        test_config == compare_config
-    ), f"Dictionary creation failed: {test_config} != {compare_config}"
-
-    # test json str
-    test_config = EmulatedConfig.create_config(json.dumps(test_dict))
-    assert (
-        test_config == compare_config
-    ), f"JSON string creation failed: {test_config} != {compare_config}"
-
-    # test json file str path
-    with tempfile.TemporaryDirectory() as temp_dir:
-        test_path = Path(temp_dir) / "test.json"
-        test_path.write_text(json.dumps(test_dict))
-        test_config = EmulatedConfig.create_config(str(test_path))
-        assert (
-            test_config == compare_config
-        ), f"JSON file path creation failed: {test_config} != {compare_config}"
-
-    # test json file Path object
-    with tempfile.TemporaryDirectory() as temp_dir:
-        test_path = Path(temp_dir) / "test.json"
-        test_path.write_text(json.dumps(test_dict))
-        test_config = EmulatedConfig.create_config(test_path)
-        assert (
-            test_config == compare_config
-        ), f"JSON file Path object creation failed: {test_config} != {compare_config}"
-
-    # test key value string
-    test_str = (
-        f"prompt_tokens={test_dict['prompt_tokens']}, "
-        f"prompt_tokens_variance={test_dict['prompt_tokens_variance']}, "
-        f"prompt_tokens_min={test_dict['prompt_tokens_min']}, "
-        f"prompt_tokens_max={test_dict['prompt_tokens_max']}, "
-        f"generated_tokens={test_dict['generated_tokens']}, "
-        f"generated_tokens_variance={test_dict['generated_tokens_variance']}, "
-        f"generated_tokens_min={test_dict['generated_tokens_min']}, "
-        f"generated_tokens_max={test_dict['generated_tokens_max']}"
-    )
-    test_config = EmulatedConfig.create_config(test_str)
-    assert (
-        test_config == compare_config
-    ), f"Key value string creation failed: {test_config} != {compare_config}"
-
-
-# EndlessTokens
-
-
-@pytest.mark.smoke()
-@pytest.mark.parametrize(
-    ("data", "expected_words", "expected_indices"),
-    [
-        (
-            "word1 word2  word3\nword4   word5",
-            ["word1", "word2", "word3", "word4", "word5"],
-            [0, 3],
-        ),
-        (
-            "word1  word2\n  word3   word4\n word5",
-            ["word1", "word2", "word3", "word4", "word5"],
-            [0, 2, 4],
-        ),
-    ],
-)
-def test_endless_data_words_construction(data, expected_words, expected_indices):
-    tokens = EndlessTokens(data)
-    assert tokens == expected_words
-    assert tokens.line_indices == expected_indices
-
-
-@pytest.mark.smoke()
-def test_endless_data_words_create_from_basic_file():
-    with tempfile.TemporaryDirectory() as temp_dir:
-        file_path = Path(temp_dir) / "test.txt"
-        file_path.write_text("word1 word2 word3\nword4 word5")
-
-        tokens = EndlessTokens(file_path)
-        assert tokens == ["word1", "word2", "word3", "word4", "word5"]
-        assert tokens.line_indices == [0, 3]
-
-        tokens = EndlessTokens(str(file_path))
-        assert tokens == ["word1", "word2", "word3", "word4", "word5"]
-        assert tokens.line_indices == [0, 3]
-
-
-@pytest.mark.smoke()
-@pytest.mark.parametrize(
-    ("data", "start", "length", "expected_text"),
-    [
-        ("word1 word2 word3 word4", 0, 2, "word1 word2"),
-        ("word1     word2\nword3   word4", 1, 2, "word2\nword3"),
-        (
-            "word1     word2\nword3   word4",
-            1,
-            6,
-            "word2\nword3   word4 word1     word2\nword3",
-        ),
-    ],
-)
-def test_endless_data_words_create_text(data, start, length, expected_text):
-    words = EndlessTokens(data)
-    text = words.create_text(start, length)
-    assert text == expected_text
-
-
-# EmulatedRequestGenerator
-
-
-@pytest.mark.smoke()
-def test_emulated_request_generator_construction(mocker, mock_auto_tokenizer):
-    mocker.patch(
-        "guidellm.request.emulated.EmulatedConfig.create_config",
-        return_value=EmulatedConfig(prompt_tokens=10),
-    )
-    mocker.patch(
-        "guidellm.request.emulated.EndlessTokens",
-        return_value=EndlessTokens("word1 word2"),
-    )
-    generator = EmulatedRequestGenerator(
-        config="mock_config", tokenizer="mock-tokenizer", mode="sync"
-    )
-    assert isinstance(generator._config, EmulatedConfig)
-    assert isinstance(generator._tokens, EndlessTokens)
-
-
-@pytest.mark.smoke()
-def test_emulated_request_generator_create_item(mocker):
-    mocker.patch(
-        "guidellm.request.emulated.EndlessTokens",
-        return_value=EndlessTokens("word1 word2"),
-    )
-    mock_tokenizer = mocker.Mock(PreTrainedTokenizer)
-    mock_tokenizer.tokenize.return_value = ["word1", "word2"]
-    generator = EmulatedRequestGenerator(
-        config={
-            "prompt_tokens": 10,
-        },
-        tokenizer=mock_tokenizer,
-        mode="sync",
-    )
-    item = generator.create_item()
-    assert isinstance(item, TextGenerationRequest)
-
-
-@pytest.mark.smoke()
-def test_emulated_request_generator_sample_prompt(mocker, mock_auto_tokenizer):
-    mocker.patch(
-        "guidellm.request.emulated.EndlessTokens",
-        return_value=EndlessTokens("word1 word2"),
-    )
-    generator = EmulatedRequestGenerator(
-        config={"prompt_tokens": 3}, tokenizer="mock-tokenizer", mode="sync"
-    )
-    prompt = generator.sample_prompt(3)
-    assert prompt == "word1 word2 word1"
-
-    request = generator.create_item()
-    assert request.prompt_token_count == 3
-
-
-@pytest.mark.smoke()
-def test_emulated_request_generator_random_seed(mocker, mock_auto_tokenizer):
-    mocker.patch(
-        "guidellm.request.emulated.EndlessTokens",
-        return_value=EndlessTokens("word1 word2"),
-    )
-
-    rand_gen = EmulatedRequestGenerator(
-        config={"prompt_tokens": 20, "prompt_tokens_variance": 10},
-        tokenizer="mock-tokenizer",
-        random_seed=42,
-        mode="sync",
-    )
-    rand_gen_comp_pos = EmulatedRequestGenerator(
-        config={"prompt_tokens": 20, "prompt_tokens_variance": 10},
-        tokenizer="mock-tokenizer",
-        random_seed=42,
-        mode="sync",
-    )
-    rand_gen_comp_neg = EmulatedRequestGenerator(
-        config={"prompt_tokens": 20, "prompt_tokens_variance": 10},
-        tokenizer="mock-tokenizer",
-        random_seed=43,
-        mode="sync",
-    )
-
-    assert rand_gen.create_item().prompt == rand_gen_comp_pos.create_item().prompt
-    assert rand_gen.create_item().prompt != rand_gen_comp_neg.create_item().prompt
-
-
-@pytest.mark.regression()
-@pytest.mark.parametrize(
-    ("config_type", "config"),
-    [
-        ("dict", {"prompt_tokens": 10, "generated_tokens": 20}),
-        ("dict", {"prompt_tokens": 10, "prompt_tokens_variance": 2}),
-        (
-            "dict",
-            {
-                "prompt_tokens": 10,
-                "prompt_tokens_min": 5,
-                "prompt_tokens_max": 15,
-                "generated_tokens": 20,
-            },
-        ),
-        ("json_str", json.dumps({"prompt_tokens": 10, "generated_tokens": 20})),
-        ("key_value_str", "prompt_tokens=10, generated_tokens=20"),
-        ("file_str", json.dumps({"prompt_tokens": 10, "generated_tokens": 20})),
-        ("file_path", json.dumps({"prompt_tokens": 10, "generated_tokens": 20})),
-    ],
-)
-def test_emulated_request_generator_lifecycle(
-    mock_requests_pride_and_prejudice,
-    mock_auto_tokenizer,
-    config_type: str,
-    config: Union[str, dict, Path],
-):
-    if config_type in ["dict", "json_str", "key_value_str"]:
-        generator = EmulatedRequestGenerator(config, tokenizer="mock-tokenizer")
-    elif config_type in ["file_str", "file_path"]:
-        with tempfile.TemporaryDirectory() as temp_dir:
-            file_path = Path(temp_dir) / "test.json"
-            file_path.write_text(config)  # type: ignore
-            generator = EmulatedRequestGenerator(
-                str(file_path) if config_type == "file_str" else file_path,
-                tokenizer="mock-tokenizer",
-            )
-
-    for _ in range(5):
-        request = generator.create_item()
-        prompt_range = generator._config.prompt_tokens_range
-        outputs_range = generator._config.output_tokens_range
-
-        assert request.prompt_token_count >= prompt_range[0]  # type: ignore
-        assert request.prompt_token_count <= prompt_range[1]  # type: ignore
-
-        prompt_tokens = len(generator.tokenizer.tokenize(request.prompt))
-        assert request.prompt_token_count == prompt_tokens
-
-        if generator._config.generated_tokens:
-            assert len(outputs_range) == 2
-            assert request.output_token_count >= outputs_range[0]  # type: ignore
-            assert request.output_token_count <= outputs_range[1]  # type: ignore
diff --git a/tests/unit/request/test_file.py b/tests/unit/request/test_file.py
deleted file mode 100644
index 69e538a1..00000000
--- a/tests/unit/request/test_file.py
+++ /dev/null
@@ -1,161 +0,0 @@
-import tempfile
-from pathlib import Path
-
-import pytest
-
-from guidellm.core.request import TextGenerationRequest
-from guidellm.request.file import FileRequestGenerator
-
-
-@pytest.mark.smoke()
-def test_file_request_generator_constructor(mock_auto_tokenizer):
-    with tempfile.TemporaryDirectory() as temp_dir:
-        file_path = Path(temp_dir) / "example.txt"
-        file_path.write_text("This is a test.\nThis is another test.")
-        generator = FileRequestGenerator(file_path, tokenizer="mock-tokenizer")
-        assert generator._path == file_path
-        assert generator._data == ["This is a test.", "This is another test."]
-        assert generator._iterator is not None
-
-
-@pytest.mark.smoke()
-def test_file_request_generator_create_item(mock_auto_tokenizer):
-    with tempfile.TemporaryDirectory() as temp_dir:
-        file_path = Path(temp_dir) / "example.txt"
-        file_path.write_text("This is a test.\nThis is another test.")
-        generator = FileRequestGenerator(
-            file_path, tokenizer="mock-tokenizer", mode="sync"
-        )
-        request = generator.create_item()
-        assert isinstance(request, TextGenerationRequest)
-        assert request.prompt == "This is a test."
-
-
-@pytest.mark.smoke()
-@pytest.mark.parametrize(
-    ("file_extension", "file_content"),
-    [
-        ("txt", "Test content 1.\nTest content 2.\nTest content 3.\n"),
-        (
-            "csv",
-            "text,label,extra\n"
-            "Test content 1.,1,extra 1\n"
-            "Test content 2.,2,extra 2\n"
-            "Test content 3.,3,extra 3\n",
-        ),
-        (
-            "jsonl",
-            '{"text": "Test content 1."}\n'
-            '{"text": "Test content 2."}\n'
-            '{"text": "Test content 3."}\n',
-        ),
-        (
-            "csv",
-            "prompt,text,extra\n"
-            "Test content 1., text 1, extra 1\n"
-            "Test content 2., text 2, extra 2\n"
-            "Test content 3., text 3, extra 3\n",
-        ),
-        (
-            "json",
-            '[{"text": "Test content 1."}, '
-            '{"text": "Test content 2."}, '
-            '{"text": "Test content 3."}]\n',
-        ),
-        (
-            "json",
-            '{"object_1": {"text": "Test content 1."}, '
-            '"object_2": {"text": "Test content 2."}, '
-            '"object_3": {"text": "Test content 3."}}\n',
-        ),
-        (
-            "yaml",
-            "items:\n"
-            "   - text: Test content 1.\n"
-            "   - text: Test content 2.\n"
-            "   - text: Test content 3.\n",
-        ),
-        (
-            "yaml",
-            "object_1:\n  text: Test content 1.\n"
-            "object_2:\n  text: Test content 2.\n"
-            "object_3:\n  text: Test content 3.\n",
-        ),
-    ],
-)
-def test_file_request_generator_file_types_lifecycle(
-    mock_auto_tokenizer, file_extension, file_content
-):
-    with tempfile.TemporaryDirectory() as temp_dir:
-        file_path = Path(temp_dir) / f"example.{file_extension}"
-        file_path.write_text(file_content)
-        generator = FileRequestGenerator(file_path, tokenizer="mock-tokenizer")
-
-        for index, request in enumerate(generator):
-            assert isinstance(request, TextGenerationRequest)
-            assert request.prompt == f"Test content {index + 1}."
-            assert request.prompt_token_count == 3
-
-            if index == 2:
-                break
-
-
-@pytest.mark.smoke()
-@pytest.mark.parametrize(
-    ("file_extension", "file_content"),
-    [
-        ("txt", "Test content 1.\nTest content 2.\nTest content 3.\n"),
-        (
-            "csv",
-            "text,label,extra\n"
-            "Test content 1.,1,extra 1\n"
-            "Test content 2.,2,extra 2\n"
-            "Test content 3.,3,extra 3\n",
-        ),
-        (
-            "jsonl",
-            '{"text": "Test content 1."}\n'
-            '{"text": "Test content 2."}\n'
-            '{"text": "Test content 3."}\n',
-        ),
-        (
-            "csv",
-            "prompt,text,extra\n"
-            "Test content 1., text 1, extra 1\n"
-            "Test content 2., text 2, extra 2\n"
-            "Test content 3., text 3, extra 3\n",
-        ),
-        (
-            "json",
-            '[{"text": "Test content 1."}, '
-            '{"text": "Test content 2."}, '
-            '{"text": "Test content 3."}]\n',
-        ),
-        (
-            "json",
-            '{"object_1": {"text": "Test content 1."}, '
-            '"object_2": {"text": "Test content 2."}, '
-            '"object_3": {"text": "Test content 3."}}\n',
-        ),
-        (
-            "yaml",
-            "items:\n"
-            "   - text: Test content 1.\n"
-            "   - text: Test content 2.\n"
-            "   - text: Test content 3.\n",
-        ),
-        (
-            "yaml",
-            "object_1:\n  text: Test content 1.\n"
-            "object_2:\n  text: Test content 2.\n"
-            "object_3:\n  text: Test content 3.\n",
-        ),
-    ],
-)
-def test_file_request_generator_len(mock_auto_tokenizer, file_extension, file_content):
-    with tempfile.TemporaryDirectory() as temp_dir:
-        file_path = Path(temp_dir) / f"example.{file_extension}"
-        file_path.write_text(file_content)
-        generator = FileRequestGenerator(file_path, tokenizer="mock-tokenizer")
-
-        assert len(generator) == 3
diff --git a/tests/unit/request/test_transformers.py b/tests/unit/request/test_transformers.py
deleted file mode 100644
index d3b45325..00000000
--- a/tests/unit/request/test_transformers.py
+++ /dev/null
@@ -1,132 +0,0 @@
-from unittest.mock import patch
-
-import pytest
-
-from guidellm.core.request import TextGenerationRequest
-from guidellm.request.transformers import TransformersDatasetRequestGenerator
-from tests.dummy.data.transformers import (
-    create_sample_dataset,
-    create_sample_dataset_dict,
-    create_sample_iterable_dataset,
-    create_sample_iterable_dataset_dict,
-)
-
-
-@pytest.mark.smoke()
-def test_transformers_dataset_request_generator_constructor(
-    mock_auto_tokenizer,
-):
-    dataset = create_sample_dataset()
-    with patch(
-        "guidellm.request.transformers.load_transformers_dataset",
-        return_value=dataset,
-    ), patch(
-        "guidellm.request.transformers.resolve_transformers_dataset_column",
-        return_value="text",
-    ):
-        generator = TransformersDatasetRequestGenerator(
-            dataset="dummy_dataset",
-            split="train",
-            column="text",
-            tokenizer="mock-tokenizer",
-        )
-        assert generator._dataset == "dummy_dataset"
-        assert generator._split == "train"
-        assert generator._column == "text"
-        assert generator._hf_dataset == dataset
-        assert generator._hf_column == "text"
-        assert generator._hf_dataset_iterator is not None
-
-
-@pytest.mark.smoke()
-def test_transformers_dataset_request_generator_create_item(
-    mock_auto_tokenizer,
-):
-    generator = TransformersDatasetRequestGenerator(
-        dataset=create_sample_dataset_dict(),
-        split="train",
-        column="text",
-        tokenizer="mock-tokenizer",
-        mode="sync",
-    )
-    request = generator.create_item()
-    assert isinstance(request, TextGenerationRequest)
-    assert request.prompt == "sample text 1"
-    assert request.prompt_token_count == 3
-
-
-@pytest.mark.smoke()
-@pytest.mark.parametrize(
-    ("dataset_arg", "dataset"),
-    [
-        (
-            "mock/directory/file.csv",
-            create_sample_dataset_dict(splits=["train"]),
-        ),
-        (
-            "mock/directory/file.json",
-            create_sample_dataset(column="prompt"),
-        ),
-        (
-            "mock/directory/file.py",
-            create_sample_dataset_dict(splits=["test"], column="output"),
-        ),
-        (create_sample_dataset_dict(splits=["val", "train"], column="custom"), None),
-        (create_sample_dataset(), None),
-        (create_sample_iterable_dataset_dict(splits=["validation"]), None),
-        (create_sample_iterable_dataset(), None),
-    ],
-)
-def test_transformers_dataset_request_generator_lifecycle(
-    mock_auto_tokenizer, dataset_arg, dataset
-):
-    with patch(
-        "guidellm.utils.transformers.load_dataset",
-        return_value=dataset,
-    ):
-        generator = TransformersDatasetRequestGenerator(
-            dataset=dataset_arg, tokenizer="mock-tokenizer", mode="sync"
-        )
-
-        for index, request in enumerate(generator):
-            assert isinstance(request, TextGenerationRequest)
-            assert request.prompt == f"sample text {index + 1}"
-            assert request.prompt_token_count == 3
-
-            if index == 2:
-                break
-
-
-@pytest.mark.smoke()
-@pytest.mark.parametrize(
-    ("dataset_arg", "dataset"),
-    [
-        (
-            "mock/directory/file.csv",
-            create_sample_dataset_dict(splits=["train"]),
-        ),
-        (
-            "mock/directory/file.json",
-            create_sample_dataset(column="prompt"),
-        ),
-        (
-            "mock/directory/file.py",
-            create_sample_dataset_dict(splits=["test"], column="output"),
-        ),
-        (create_sample_dataset_dict(splits=["val", "train"], column="custom"), None),
-        (create_sample_dataset(), None),
-    ],
-)
-def test_transformers_dataset_request_generator_len(
-    mock_auto_tokenizer, dataset_arg, dataset
-):
-    with patch(
-        "guidellm.utils.transformers.load_dataset",
-        return_value=dataset,
-    ):
-        generator = TransformersDatasetRequestGenerator(
-            dataset=dataset_arg, tokenizer="mock-tokenizer", mode="sync"
-        )
-
-        # Check if __len__ returns the correct length
-        assert len(generator) == 3
diff --git a/tests/unit/scheduler/__init__.py b/tests/unit/scheduler/__init__.py
deleted file mode 100644
index e69de29b..00000000
diff --git a/tests/unit/scheduler/test_load_generator.py b/tests/unit/scheduler/test_load_generator.py
deleted file mode 100644
index 6b84ee01..00000000
--- a/tests/unit/scheduler/test_load_generator.py
+++ /dev/null
@@ -1,153 +0,0 @@
-import time
-from typing import get_args
-
-import pytest
-from scipy.stats import kstest  # type: ignore
-
-from guidellm.scheduler import LoadGenerationMode, LoadGenerator
-
-
-@pytest.mark.smoke()
-def test_load_generator_mode():
-    assert set(get_args(LoadGenerationMode)) == {
-        "synchronous",
-        "constant",
-        "poisson",
-        "throughput",
-    }
-
-
-@pytest.mark.smoke()
-@pytest.mark.parametrize(
-    ("mode", "rate"),
-    [
-        ("constant", 10),
-        ("poisson", 5),
-        ("throughput", None),
-        ("synchronous", None),
-    ],
-)
-def test_load_generator_instantiation(mode, rate):
-    generator = LoadGenerator(mode=mode, rate=rate)
-    assert generator.mode == mode
-    assert generator.rate == rate
-
-
-@pytest.mark.regression()
-@pytest.mark.parametrize(
-    ("mode", "rate", "expected_error"),
-    [
-        ("invalid_mode", None, ValueError),
-        ("constant", 0, ValueError),
-        ("poisson", -1, ValueError),
-    ],
-)
-def test_load_generator_invalid_instantiation(mode, rate, expected_error):
-    with pytest.raises(expected_error):
-        LoadGenerator(mode=mode, rate=rate)
-
-
-@pytest.mark.smoke()
-@pytest.mark.parametrize(
-    ("mode", "rate"),
-    [
-        ("synchronous", None),
-        ("throughput", None),
-        ("constant", 1),
-        ("poisson", 5),
-    ],
-)
-def test_load_generator_times(mode, rate):
-    # first check that the proper method is called
-    generator = LoadGenerator(mode=mode, rate=rate)
-    func_name = f"{mode}_times"
-    assert hasattr(generator, func_name)
-    assert callable(getattr(generator, func_name))
-
-    call_count = 0
-
-    def _increment_call_count():
-        nonlocal call_count
-        call_count += 1
-        yield -1.0
-
-    setattr(generator, func_name, _increment_call_count)
-    for time_ in generator.times():
-        assert time_ == -1.0
-        break
-    assert call_count == 1
-
-    # now check that the method generates reasonable timestamps
-    generator = LoadGenerator(mode=mode, rate=rate)
-    start_time = time.time()
-    for index, time_ in enumerate(generator.times()):
-        if index > 10:
-            break
-
-        if mode == "synchronous":
-            assert time_ == -1.0
-        else:
-            assert time_ >= start_time
-
-
-@pytest.mark.smoke()
-def test_load_generator_invalid_times():
-    generator = LoadGenerator(mode="synchronous")
-
-    for index, time_ in enumerate(generator.synchronous_times()):
-        if index > 10:
-            break
-
-        assert time_ == -1.0
-
-
-@pytest.mark.smoke()
-def test_load_generator_throughput_times():
-    generator = LoadGenerator(mode="throughput")
-
-    for index, time_ in enumerate(generator.throughput_times()):
-        if index > 10:
-            break
-
-        assert time_ <= time.time()
-
-
-@pytest.mark.smoke()
-@pytest.mark.parametrize("rate", [1, 10, 42])
-def test_load_generator_constant_times(rate):
-    generator = LoadGenerator(mode="constant", rate=rate)
-    start_time = time.time()
-
-    for index, time_ in enumerate(generator.constant_times()):
-        if index > 10:
-            break
-
-        assert time_ == pytest.approx(start_time + index / rate, rel=1e-5)
-
-
-@pytest.mark.smoke()
-@pytest.mark.flaky(reruns=5)
-def test_load_generator_poisson_times():
-    rate = 5
-    generator = LoadGenerator(mode="poisson", rate=rate)
-    start_time = time.time()
-
-    times = []
-    prev_time = start_time
-
-    for index, current_time in enumerate(generator.poisson_times()):
-        if index > 100:
-            break
-
-        times.append(current_time - prev_time)
-        prev_time = current_time
-
-    mean_inter_arrival_time = 1 / rate
-
-    # Perform Kolmogorov-Smirnov test to compare the sample distribution
-    # to the expected exponential distribution
-    ks_statistic, p_value = kstest(times, "expon", args=(0, mean_inter_arrival_time))
-    assert p_value > 0.025, (
-        f"Poisson-generated inter-arrival times do not "
-        f"match the expected exponential distribution (p-value: {p_value})"
-    )
diff --git a/tests/unit/scheduler/test_scheduler.py b/tests/unit/scheduler/test_scheduler.py
deleted file mode 100644
index d765280f..00000000
--- a/tests/unit/scheduler/test_scheduler.py
+++ /dev/null
@@ -1,199 +0,0 @@
-import random
-from unittest.mock import create_autospec
-
-import pytest
-
-from guidellm.backend import Backend
-from guidellm.core import (
-    TextGenerationBenchmark,
-    TextGenerationRequest,
-    TextGenerationResult,
-)
-from guidellm.request import RequestGenerator
-from guidellm.scheduler import (
-    LoadGenerator,
-    Scheduler,
-    SchedulerResult,
-)
-
-
-@pytest.mark.smoke()
-def test_scheduler_result_default_intialization():
-    benchmark = create_autospec(TextGenerationBenchmark, instance=True)
-    scheduler_result = SchedulerResult(
-        completed=False,
-        count_total=0,
-        count_completed=0,
-        benchmark=benchmark,
-    )
-
-    assert scheduler_result.completed is False
-    assert scheduler_result.count_total == 0
-    assert scheduler_result.count_completed == 0
-    assert scheduler_result.benchmark == benchmark
-    assert scheduler_result.current_result is None
-
-
-@pytest.mark.smoke()
-def test_scheduler_result_initialization():
-    benchmark = create_autospec(TextGenerationBenchmark, instance=True)
-    result = TextGenerationResult(
-        request=TextGenerationRequest(prompt="prompt"), output="Test output"
-    )
-    scheduler_result = SchedulerResult(
-        completed=False,
-        count_total=10,
-        count_completed=5,
-        benchmark=benchmark,
-        current_result=result,
-    )
-
-    assert scheduler_result.completed is False
-    assert scheduler_result.count_total == 10
-    assert scheduler_result.count_completed == 5
-    assert scheduler_result.benchmark == benchmark
-    assert scheduler_result.current_result == result
-
-
-@pytest.mark.smoke()
-@pytest.mark.parametrize(
-    ("mode", "rate", "max_number", "max_duration"),
-    [
-        ("synchronous", None, 10, None),
-        ("throughput", 5.0, None, 60.0),
-        ("poisson", 10.0, 100, None),
-        ("constant", 1.0, None, 120.0),
-    ],
-)
-def test_scheduler_initialization(mode, rate, max_number, max_duration):
-    generator = create_autospec(RequestGenerator, instance=True)
-    backend = create_autospec(Backend, instance=True)
-    scheduler = Scheduler(
-        generator,
-        backend,
-        mode=mode,
-        rate=rate,
-        max_number=max_number,
-        max_duration=max_duration,
-    )
-
-    assert scheduler.generator == generator
-    assert scheduler.backend == backend
-    assert scheduler.mode == mode
-    assert scheduler.rate == rate
-    assert scheduler.max_number == max_number
-    assert scheduler.max_duration == max_duration
-    assert isinstance(scheduler.load_generator, LoadGenerator)
-    assert scheduler.benchmark_mode in {"synchronous", "asynchronous", "throughput"}
-
-
-@pytest.mark.sanity()
-@pytest.mark.parametrize(
-    ("mode", "rate", "max_number", "max_duration"),
-    [
-        # invalid modes
-        ("invalid_mode", None, 10, None),
-        # invalid max settings
-        ("synchronous", None, None, None),
-        ("synchronous", None, -1, 10),
-        ("synchronous", None, 10, -1),
-        # invalid rate settings
-        ("constant", -1, None, 10),
-        ("constant", None, None, 10),
-        ("poisson", -1, None, 10),
-        ("poisson", None, None, 10),
-    ],
-)
-def test_scheduler_invalid_initialization(
-    mode,
-    rate,
-    max_number,
-    max_duration,
-):
-    generator = create_autospec(RequestGenerator, instance=True)
-    backend = create_autospec(Backend, instance=True)
-
-    with pytest.raises(ValueError):
-        Scheduler(
-            generator,
-            backend,
-            mode=mode,
-            rate=rate,
-            max_number=max_number,
-            max_duration=max_duration,
-        )
-
-
-@pytest.mark.sanity()
-@pytest.mark.asyncio()
-@pytest.mark.parametrize(
-    "mode",
-    [
-        "synchronous",
-        "throughput",
-        "poisson",
-        "constant",
-    ],
-)
-async def test_scheduler_run_number(mode, mock_backend):
-    rate = 10.0
-    max_number = 20
-    generator = create_autospec(RequestGenerator, instance=True)
-
-    # Mock the request generator and backend submit behavior
-    generator.__iter__.return_value = iter(
-        [TextGenerationRequest(prompt="Test", type_=random.choice(["text", "chat"]))]
-        * (max_number * 2)
-    )
-
-    scheduler = Scheduler(
-        generator,
-        mock_backend,
-        mode=mode,
-        rate=rate,
-        max_number=max_number,
-    )
-
-    run_count = 0
-    count_completed = 0
-    received_init = False
-    received_final = False
-    async for result in scheduler.run():
-        run_count += 1
-
-        assert run_count <= max_number + 2
-        assert result.count_total == max_number
-        assert result.benchmark is not None
-        assert isinstance(result.benchmark, TextGenerationBenchmark)
-
-        if result.current_result is not None:
-            count_completed += 1
-
-        if run_count == 1:
-            assert not received_init
-            assert not received_final
-            assert count_completed == 0
-            assert result.count_completed == 0
-            assert not result.completed
-            assert result.current_result is None
-            received_init = True
-        elif run_count - 2 == max_number:
-            assert received_init
-            assert not received_final
-            assert count_completed == max_number
-            assert result.count_completed == max_number
-            assert result.completed
-            assert result.current_result is None
-            received_final = True
-        else:
-            assert received_init
-            assert not received_final
-            assert count_completed == run_count - 1
-            assert result.count_completed == run_count - 1
-            assert not result.completed
-            assert result.current_result is not None
-            assert isinstance(result.current_result, TextGenerationResult)
-
-    assert received_init
-    assert received_final
-    assert count_completed == max_number
diff --git a/tests/unit/test_type.py b/tests/unit/test_type.py
deleted file mode 100644
index e69de29b..00000000
diff --git a/tests/unit/utils/__init__.py b/tests/unit/utils/__init__.py
deleted file mode 100644
index e69de29b..00000000
diff --git a/tests/unit/utils/test_injector.py b/tests/unit/utils/test_injector.py
deleted file mode 100644
index 9a58575e..00000000
--- a/tests/unit/utils/test_injector.py
+++ /dev/null
@@ -1,70 +0,0 @@
-from pathlib import Path
-
-import pytest
-from pydantic import BaseModel
-
-from guidellm.config import settings
-from guidellm.utils.injector import create_report, inject_data
-
-
-class ExampleModel(BaseModel):
-    name: str
-    version: str
-
-
-@pytest.mark.smoke()
-def test_inject_data():
-    model = ExampleModel(name="Example App", version="1.0.0")
-    html = "window.report_data = {};"
-    expected_html = 'window.report_data = {"name":"Example App","version":"1.0.0"};'
-
-    result = inject_data(
-        model,
-        html,
-        settings.report_generation.report_html_match,
-        settings.report_generation.report_html_placeholder,
-    )
-    assert result == expected_html
-
-
-@pytest.mark.smoke()
-def test_create_report_to_file(tmpdir):
-    model = ExampleModel(name="Example App", version="1.0.0")
-    html_content = "window.report_data = {};"
-    expected_html_content = (
-        'window.report_data = {"name":"Example App","version":"1.0.0"};'
-    )
-
-    mock_html_path = tmpdir.join("template.html")
-    mock_html_path.write(html_content)
-    settings.report_generation.source = str(mock_html_path)
-
-    output_path = tmpdir.join("output.html")
-    result_path = create_report(model, str(output_path))
-    result_content = result_path.read_text()
-
-    assert result_path == output_path
-    assert result_content == expected_html_content
-
-
-@pytest.mark.smoke()
-def test_create_report_to_directory(tmpdir):
-    model = ExampleModel(name="Example App", version="1.0.0")
-    html_content = "window.report_data = {};"
-    expected_html_content = (
-        'window.report_data = {"name":"Example App","version":"1.0.0"};'
-    )
-
-    mock_html_path = tmpdir.join("template.html")
-    mock_html_path.write(html_content)
-    settings.report_generation.source = str(mock_html_path)
-
-    output_dir = tmpdir.mkdir("output_dir")
-    output_path = Path(output_dir) / "report.html"
-    result_path = create_report(model, str(output_dir))
-
-    with Path(result_path).open("r") as file:
-        result_content = file.read()
-
-    assert result_path == output_path
-    assert result_content == expected_html_content
diff --git a/tests/unit/utils/test_progress.py b/tests/unit/utils/test_progress.py
deleted file mode 100644
index 637b2be2..00000000
--- a/tests/unit/utils/test_progress.py
+++ /dev/null
@@ -1,116 +0,0 @@
-import pytest
-
-from guidellm.utils import BenchmarkReportProgress
-
-
-@pytest.fixture()
-def benchmark_progress():
-    return BenchmarkReportProgress()
-
-
-@pytest.mark.smoke()
-def test_initialization(benchmark_progress):
-    assert benchmark_progress.report_task is None
-    assert benchmark_progress.benchmark_tasks == []
-    assert benchmark_progress.benchmark_tasks_started == []
-    assert benchmark_progress.benchmark_tasks_completed == []
-    assert benchmark_progress.benchmark_tasks_progress == []
-
-
-@pytest.mark.smoke()
-def test_start_method(benchmark_progress):
-    descriptions = ["Benchmark 1", "Benchmark 2"]
-    benchmark_progress.start(descriptions)
-
-    assert len(benchmark_progress.benchmark_tasks) == 2
-    assert benchmark_progress.report_task is not None
-
-    benchmark_progress.finish()
-
-
-@pytest.mark.sanity()
-def test_update_benchmark(benchmark_progress):
-    descriptions = ["Benchmark 1"]
-    benchmark_progress.start(descriptions)
-
-    benchmark_progress.update_benchmark(
-        index=0,
-        description="Updating Benchmark 1",
-        completed=False,
-        completed_count=50,
-        completed_total=100,
-        start_time=0,
-        req_per_sec=10.5,
-    )
-    assert benchmark_progress.benchmark_tasks_progress[0] == 50.0
-
-    benchmark_progress.finish()
-
-
-@pytest.mark.sanity()
-def test_finish_method(benchmark_progress):
-    descriptions = ["Benchmark 1", "Benchmark 2"]
-    benchmark_progress.start(descriptions)
-    benchmark_progress.finish()
-
-    assert benchmark_progress.report_progress.finished
-
-
-@pytest.mark.regression()
-def test_error_on_update_completed_benchmark(benchmark_progress):
-    descriptions = ["Benchmark 1"]
-    benchmark_progress.start(descriptions)
-    benchmark_progress.update_benchmark(
-        index=0,
-        description="Benchmark 1",
-        completed=True,
-        completed_count=100,
-        completed_total=100,
-        start_time=0,
-        req_per_sec=10.5,
-    )
-
-    with pytest.raises(ValueError, match="already completed"):
-        benchmark_progress.update_benchmark(
-            index=0,
-            description="Benchmark 1",
-            completed=False,
-            completed_count=50,
-            completed_total=100,
-            start_time=0,
-            req_per_sec=10.5,
-        )
-
-    benchmark_progress.finish()
-
-
-@pytest.mark.regression()
-def test_multiple_updates(benchmark_progress):
-    descriptions = ["Benchmark 1", "Benchmark 2"]
-    benchmark_progress.start(descriptions)
-
-    # First update
-    benchmark_progress.update_benchmark(
-        index=0,
-        description="Updating Benchmark 1",
-        completed=False,
-        completed_count=50,
-        completed_total=100,
-        start_time=0,
-        req_per_sec=5.0,
-    )
-    assert benchmark_progress.benchmark_tasks_progress[0] == 50.0
-
-    # Second update, same task
-    benchmark_progress.update_benchmark(
-        index=0,
-        description="Updating Benchmark 1",
-        completed=True,
-        completed_count=100,
-        completed_total=100,
-        start_time=0,
-        req_per_sec=5.0,
-    )
-    assert benchmark_progress.benchmark_tasks_progress[0] == 100.0
-
-    benchmark_progress.finish()
diff --git a/tests/unit/utils/test_text.py b/tests/unit/utils/test_text.py
deleted file mode 100644
index 1d89ee31..00000000
--- a/tests/unit/utils/test_text.py
+++ /dev/null
@@ -1,394 +0,0 @@
-from pathlib import Path
-from unittest.mock import patch
-
-import pytest
-import requests
-
-from guidellm.utils.text import (
-    clean_text,
-    filter_text,
-    is_path,
-    is_path_like,
-    is_url,
-    load_text,
-    load_text_lines,
-    parse_text_objects,
-    split_lines_by_punctuation,
-    split_text,
-)
-
-
-@pytest.fixture()
-def sample_text():
-    return "This is a sample text.\nThis is another line!"
-
-
-@pytest.fixture()
-def sample_dict_data():
-    return [{"text": "line 1"}, {"text": "line 2"}, {"text": "line 3"}]
-
-
-@pytest.fixture()
-def sample_csv_data():
-    return "text\nline 1\nline 2\nline 3"
-
-
-@pytest.fixture()
-def sample_jsonl_data():
-    return '{"text": "line 1"}\n{"text": "line 2"}\n{"text": "line 3"}'
-
-
-@pytest.fixture()
-def sample_yaml_data():
-    return """
-    text:
-      - line 1
-      - line 2
-      - line 3
-    """
-
-
-@pytest.fixture()
-def mock_response():
-    response = requests.Response()
-    response.status_code = 200
-    response._content = b"Mock content"
-    return response
-
-
-@pytest.mark.smoke()
-@pytest.mark.parametrize(
-    ("text", "start", "end", "expected"),
-    [
-        ("hello world", "hello", "world", "hello "),
-        ("hello world", "world", None, "world"),
-        ("hello world", None, "hello", ""),
-        ("hello world", None, None, "hello world"),
-    ],
-)
-def test_filter_text(text, start, end, expected):
-    assert filter_text(text, start, end) == expected
-
-
-@pytest.mark.smoke()
-@pytest.mark.parametrize(
-    (
-        "text",
-        "fix_encoding",
-        "clean_whitespace",
-        "remove_empty_lines",
-        "force_new_line_punctuation",
-        "expected",
-    ),
-    [
-        (
-            "This is\ta test.\n   New line.",
-            True,
-            True,
-            False,
-            False,
-            "This is a test.\nNew line.",
-        ),
-        (
-            "This is\ta test.\n   New line.",
-            True,
-            True,
-            True,
-            False,
-            "This is a test.\nNew line.",
-        ),
-        (
-            "This is a test. New line.",
-            True,
-            False,
-            False,
-            True,
-            "This is a test.\nNew line.",
-        ),
-    ],
-)
-def test_clean_text(
-    text,
-    fix_encoding,
-    clean_whitespace,
-    remove_empty_lines,
-    force_new_line_punctuation,
-    expected,
-):
-    assert (
-        clean_text(
-            text,
-            fix_encoding,
-            clean_whitespace,
-            remove_empty_lines,
-            force_new_line_punctuation,
-        )
-        == expected
-    )
-
-
-@pytest.mark.smoke()
-def test_split_lines_by_punctuation(sample_text):
-    expected = ["This is a sample text.", "This is another line!"]
-    assert split_lines_by_punctuation(sample_text) == expected
-
-
-@pytest.mark.smoke()
-@pytest.mark.parametrize(
-    ("url", "expected"),
-    [
-        ("https://example.com", True),
-        ("ftp://example.com", True),
-        ("not a url", False),
-    ],
-)
-def test_is_url(url, expected):
-    assert is_url(url) == expected
-
-
-@pytest.mark.smoke()
-@pytest.mark.parametrize(
-    ("path", "expected"),
-    [
-        (str(Path(__file__)), True),
-        ("/non/existent/path", False),
-    ],
-)
-def test_is_path(path, expected):
-    assert is_path(path) == expected
-
-
-@pytest.mark.smoke()
-@pytest.mark.parametrize(
-    ("path", "enforce_file", "expected"),
-    [
-        (str(Path(__file__)), True, True),
-        ("/non/existent/path", False, True),
-        ("https://example.com", False, False),
-    ],
-)
-def test_is_path_like(path, enforce_file, expected):
-    assert is_path_like(path, enforce_file) == expected
-
-
-@pytest.mark.smoke()
-def test_split_text(sample_text):
-    words, separators, new_lines = split_text(sample_text)
-    assert words == [
-        "This",
-        "is",
-        "a",
-        "sample",
-        "text.",
-        "This",
-        "is",
-        "another",
-        "line!",
-    ]
-    assert separators == [" ", " ", " ", " ", "\n", " ", " ", " ", " "]
-    assert new_lines == [0, 5]
-
-
-@pytest.mark.smoke()
-@pytest.mark.parametrize(
-    ("data", "format_", "expected"),
-    [
-        ("text\nline 1\nline 2", "csv", [{"text": "line 1"}, {"text": "line 2"}]),
-        (
-            '{"text": "line 1"}\n{"text": "line 2"}',
-            "jsonl",
-            [{"text": "line 1"}, {"text": "line 2"}],
-        ),
-    ],
-)
-def test_parse_text_objects(data, format_, expected):
-    assert parse_text_objects(data, format_) == expected
-
-
-@pytest.mark.smoke()
-@pytest.mark.parametrize(
-    ("data", "expected"),
-    [
-        ("https://example.com", "Mock content"),
-        (str(Path(__file__)), Path(__file__).read_text()),
-    ],
-)
-def test_load_text(data, expected, mock_response):
-    with patch("requests.get", return_value=mock_response):
-        assert load_text(data) == expected
-
-
-@pytest.mark.regression()
-def test_load_text_file_not_found():
-    with pytest.raises(FileNotFoundError):
-        load_text("/non/existent/file.txt")
-
-
-@pytest.mark.smoke()
-@pytest.mark.parametrize(
-    ("data", "format_", "filters", "expected"),
-    [
-        ("text\nline 1\nline 2", "csv", None, ["line 1", "line 2"]),
-        ('{"text": "line 1"}\n{"text": "line 2"}', "jsonl", None, ["line 1", "line 2"]),
-        ("text\nline 1\nline 2", "txt", None, ["text", "line 1", "line 2"]),
-    ],
-)
-def test_load_text_lines(data, format_, filters, expected):
-    assert load_text_lines(data, format_=format_, filters=filters) == expected
-
-
-@pytest.mark.regression()
-def test_load_text_lines_invalid_data():
-    with pytest.raises(ValueError):
-        load_text_lines(123)  # type: ignore
-
-
-@pytest.mark.regression()
-def test_parse_text_objects_invalid_format():
-    with pytest.raises(ValueError):
-        parse_text_objects("text", format_="unsupported")
-
-
-@pytest.mark.regression()
-def test_parse_text_objects_invalid_data():
-    with pytest.raises(ValueError):
-        parse_text_objects(123)  # type: ignore
-
-
-@pytest.mark.regression()
-@pytest.mark.parametrize(
-    ("data", "format_", "filters", "expected"),
-    [
-        (
-            "text\nline 1\nline 2\n",
-            "csv",
-            ["text"],
-            ["line 1", "line 2"],
-        ),
-    ],
-)
-def test_load_text_lines_with_filters(data, format_, filters, expected):
-    assert load_text_lines(data, format_=format_, filters=filters) == expected
-
-
-@pytest.mark.regression()
-def test_is_path_with_symlink(tmp_path):
-    # Create a symlink to a temporary file
-    target_file = tmp_path / "target_file.txt"
-    target_file.write_text("Sample content")
-    symlink_path = tmp_path / "symlink"
-    symlink_path.symlink_to(target_file)
-
-    assert is_path(str(symlink_path)) is True
-
-
-@pytest.mark.regression()
-def test_is_path_like_with_symlink(tmp_path):
-    # Create a symlink to a temporary file
-    target_file = tmp_path / "target_file.txt"
-    target_file.write_text("Sample content")
-    symlink_path = tmp_path / "symlink.file"
-    symlink_path.symlink_to(target_file)
-
-    assert is_path_like(str(symlink_path), enforce_file=True) is True
-
-
-@pytest.mark.regression()
-def test_load_text_lines_empty():
-    # Test loading text lines from an empty string
-    assert load_text_lines("") == []
-
-
-@pytest.mark.regression()
-def test_split_text_with_empty_string():
-    words, separators, new_lines = split_text("")
-    assert words == []
-    assert separators == []
-    assert new_lines == []
-
-
-@pytest.mark.regression()
-def test_split_lines_by_punctuation_with_no_punctuation():
-    text = "This is a test without punctuation"
-    assert split_lines_by_punctuation(text) == [text]
-
-
-@pytest.mark.regression()
-def test_is_path_invalid_type():
-    assert not is_path(None)
-    assert not is_path(123)
-    assert not is_path(["not", "a", "path"])
-
-
-@pytest.mark.regression()
-def test_is_path_like_invalid_type():
-    assert not is_path_like(None, enforce_file=False)
-    assert not is_path_like(123, enforce_file=True)
-    assert not is_path_like(["not", "a", "path"], enforce_file=False)
-
-
-@pytest.mark.regression()
-def test_load_text_invalid_url():
-    with pytest.raises(requests.ConnectionError):
-        load_text("http://invalid.url")
-
-
-@pytest.mark.regression()
-def test_parse_text_objects_empty_csv():
-    assert parse_text_objects("text\n", "csv") == []
-
-
-@pytest.mark.regression()
-def test_parse_text_objects_empty_jsonl():
-    assert parse_text_objects("", "jsonl") == []
-
-
-@pytest.mark.regression()
-def test_parse_text_objects_invalid_jsonl():
-    with pytest.raises(ValueError):
-        parse_text_objects("{invalid_json}", "jsonl")
-
-
-@pytest.mark.regression()
-def test_parse_text_objects_empty_yaml():
-    assert parse_text_objects("", "yaml") == []
-
-
-@pytest.mark.regression()
-def test_clean_text_with_unicode():
-    text = "This is a test with unicode: \u2013 \u2014"
-    cleaned_text = clean_text(text, fix_encoding=True, clean_whitespace=True)
-    assert cleaned_text == "This is a test with unicode: – —"
-
-
-@pytest.mark.regression()
-def test_split_lines_by_punctuation_with_multiple_punctuations():
-    text = "First sentence. Second sentence? Third sentence!"
-    expected = ["First sentence.", "Second sentence?", "Third sentence!"]
-    assert split_lines_by_punctuation(text) == expected
-
-
-@pytest.mark.regression()
-def test_is_url_empty_string():
-    assert not is_url("")
-
-
-@pytest.mark.regression()
-def test_load_text_invalid_data():
-    with pytest.raises(TypeError):
-        load_text(123)  # type: ignore
-
-
-@pytest.mark.regression()
-def test_load_text_lines_empty_format():
-    data = "text\nline 1\nline 2"
-    assert load_text_lines(data, format_="") == ["text", "line 1", "line 2"]
-
-
-@pytest.mark.regression()
-def test_split_text_with_mixed_separators():
-    text = "This\tis a test\nwith mixed separators."
-    words, separators, new_lines = split_text(text)
-    assert words == ["This", "is", "a", "test", "with", "mixed", "separators."]
-    assert separators == ["\t", " ", " ", "\n", " ", " ", " "]
-    assert new_lines == [0, 4]
diff --git a/tests/unit/utils/test_transformers.py b/tests/unit/utils/test_transformers.py
deleted file mode 100644
index 5153da3f..00000000
--- a/tests/unit/utils/test_transformers.py
+++ /dev/null
@@ -1,236 +0,0 @@
-from unittest.mock import patch
-
-import pytest
-from datasets import (  # type: ignore
-    Dataset,
-    DatasetDict,
-    IterableDataset,
-    IterableDatasetDict,
-)
-
-from guidellm.utils.transformers import (
-    load_transformers_dataset,
-    resolve_transformers_dataset,
-    resolve_transformers_dataset_column,
-    resolve_transformers_dataset_split,
-)
-from tests.dummy.data.transformers import (
-    create_sample_dataset,
-    create_sample_dataset_dict,
-    create_sample_iterable_dataset,
-    create_sample_iterable_dataset_dict,
-)
-
-
-@pytest.mark.smoke()
-@pytest.mark.parametrize(
-    ("dataset_arg", "dataset", "split", "preferred_splits", "expected_type"),
-    [
-        (
-            "mock/directory/file.csv",
-            create_sample_dataset_dict(splits=["train"]),
-            "train",
-            None,
-            Dataset,
-        ),
-        (
-            "mock/directory/file.json",
-            create_sample_dataset_dict(splits=["test"]),
-            None,
-            ("train", "test"),
-            Dataset,
-        ),
-        (
-            "mock/directory/file.py",
-            create_sample_dataset_dict(splits=["test"], column="output"),
-            None,
-            None,
-            Dataset,
-        ),
-        (
-            create_sample_dataset_dict(splits=["val", "train"], column="custom"),
-            None,
-            "val",
-            None,
-            Dataset,
-        ),
-        (
-            create_sample_dataset(),
-            None,
-            None,
-            None,
-            Dataset,
-        ),
-        (
-            create_sample_iterable_dataset_dict(splits=["validation"]),
-            None,
-            None,
-            None,
-            IterableDataset,
-        ),
-        (
-            create_sample_iterable_dataset(),
-            None,
-            "validation",
-            None,
-            IterableDataset,
-        ),
-    ],
-)
-def test_load_transformers_dataset(
-    dataset_arg, dataset, split, preferred_splits, expected_type
-):
-    with patch(
-        "guidellm.utils.transformers.load_dataset",
-        return_value=dataset,
-    ):
-        loaded_dataset = load_transformers_dataset(
-            dataset_arg, split=split, preferred_splits=preferred_splits
-        )
-        assert isinstance(loaded_dataset, expected_type)
-
-
-@pytest.mark.smoke()
-@pytest.mark.parametrize(
-    ("dataset_arg", "dataset", "split", "preferred_splits", "expected_type"),
-    [
-        (
-            "mock/directory/file.csv",
-            create_sample_dataset(),
-            "train",
-            None,
-            Dataset,
-        ),
-        (
-            "mock/directory/file.json",
-            create_sample_dataset_dict(splits=["test"]),
-            None,
-            ("train", "test"),
-            DatasetDict,
-        ),
-        (
-            "mock/directory/file.py",
-            create_sample_dataset_dict(splits=["test"], column="output"),
-            None,
-            None,
-            DatasetDict,
-        ),
-        (
-            "mock/directory/file.unk",
-            create_sample_dataset_dict(splits=["test"], column="output"),
-            None,
-            None,
-            DatasetDict,
-        ),
-        (
-            create_sample_dataset_dict(splits=["val", "train"], column="custom"),
-            None,
-            "val",
-            None,
-            DatasetDict,
-        ),
-        (
-            create_sample_dataset(),
-            None,
-            None,
-            None,
-            Dataset,
-        ),
-        (
-            create_sample_iterable_dataset_dict(splits=["validation"]),
-            None,
-            None,
-            None,
-            IterableDatasetDict,
-        ),
-        (
-            create_sample_iterable_dataset(),
-            None,
-            "validation",
-            None,
-            IterableDataset,
-        ),
-    ],
-)
-def test_resolve_transformers_dataset(
-    dataset_arg, dataset, split, preferred_splits, expected_type
-):
-    with patch(
-        "guidellm.utils.transformers.load_dataset",
-        return_value=dataset,
-    ):
-        loaded_dataset = resolve_transformers_dataset(
-            dataset_arg, split=split, preferred_splits=preferred_splits
-        )
-        assert isinstance(loaded_dataset, expected_type)
-
-
-@pytest.mark.sanity()
-def test_resolve_transformers_dataset_invalid():
-    with pytest.raises(ValueError):
-        resolve_transformers_dataset(123)
-
-
-@pytest.mark.smoke()
-@pytest.mark.parametrize(
-    ("dataset", "split", "preferred_splits", "expected_type"),
-    [
-        (
-            create_sample_dataset(),
-            None,
-            None,
-            Dataset,
-        ),
-        (
-            create_sample_iterable_dataset_dict(splits=["validation"]),
-            None,
-            None,
-            IterableDataset,
-        ),
-        (
-            create_sample_iterable_dataset(),
-            "validation",
-            None,
-            IterableDataset,
-        ),
-    ],
-)
-def test_resolve_transformers_dataset_split(
-    dataset, split, preferred_splits, expected_type
-):
-    loaded_dataset = resolve_transformers_dataset_split(
-        dataset, split=split, preferred_splits=preferred_splits
-    )
-    assert isinstance(loaded_dataset, expected_type)
-
-
-def test_resolve_transformers_dataset_split_missing():
-    dataset = create_sample_dataset_dict()
-    with pytest.raises(ValueError):
-        resolve_transformers_dataset_split(dataset, split="missing")
-
-
-@pytest.mark.smoke()
-@pytest.mark.parametrize(
-    ("dataset", "column", "preferred_columns", "expected_column"),
-    [
-        (create_sample_dataset(), None, None, "text"),
-        (create_sample_dataset(), "text", None, "text"),
-        (create_sample_dataset(), None, ["text"], "text"),
-        (create_sample_dataset(), None, ["data"], "text"),
-        (create_sample_iterable_dataset(), None, None, "text"),
-    ],
-)
-def test_resolve_transformers_dataset_column(
-    dataset, column, preferred_columns, expected_column
-):
-    resolved_column = resolve_transformers_dataset_column(
-        dataset, column=column, preferred_columns=preferred_columns
-    )
-    assert resolved_column == expected_column
-
-
-def test_resolve_transformers_dataset_column_missing():
-    dataset = create_sample_dataset()
-    with pytest.raises(ValueError):
-        resolve_transformers_dataset_column(dataset, column="missing")