|
| 1 | +"""add llm and mux |
| 2 | +
|
| 3 | +Revision ID: 0f9b8edc8e46 |
| 4 | +Revises: 90d5471db49a |
| 5 | +Create Date: 2025-01-24 07:58:34.907908+00:00 |
| 6 | +
|
| 7 | +""" |
| 8 | + |
| 9 | +from typing import Sequence, Union |
| 10 | + |
| 11 | +from alembic import context, op |
| 12 | + |
| 13 | +# revision identifiers, used by Alembic. |
| 14 | +revision: str = "0f9b8edc8e46" |
| 15 | +down_revision: Union[str, None] = "90d5471db49a" |
| 16 | +branch_labels: Union[str, Sequence[str], None] = None |
| 17 | +depends_on: Union[str, Sequence[str], None] = None |
| 18 | + |
| 19 | + |
| 20 | +def upgrade() -> None: |
| 21 | + with context.begin_transaction(): |
| 22 | + # This table is used to store the providers endpoints that |
| 23 | + # are available for references, e.g. in Muxing. The |
| 24 | + # `auth_blob` field is used to store the credentials for |
| 25 | + # the model, which can be a JSON object or a string, |
| 26 | + # depending on the `auth_type`. The `auth_type` field |
| 27 | + # is used to determine how to interpret # the |
| 28 | + # `auth_blob` field. If `auth_type` is `none`, then the |
| 29 | + # `auth_blob` field is ignored. |
| 30 | + # The `endpoint` field is used to store the endpoint of the |
| 31 | + # model. |
| 32 | + # NOTE: This resource is not namespaced by a workspace; that is |
| 33 | + # because the models are shared across workspaces. |
| 34 | + # NOTE: The lack of `deleted_at` is intentional. This resource |
| 35 | + # is not soft-deleted. |
| 36 | + # TODO: Do we need a display name here? An option is to |
| 37 | + # use the `name` field as the display name and normalize |
| 38 | + # the `name` field to be a slug when used as a reference. |
| 39 | + op.execute( |
| 40 | + """ |
| 41 | + CREATE TABLE IF NOT EXISTS provider_endpoints ( |
| 42 | + id TEXT PRIMARY KEY, -- UUID stored as TEXT |
| 43 | + name TEXT NOT NULL UNIQUE, |
| 44 | + description TEXT NOT NULL DEFAULT '', |
| 45 | + provider_type TEXT NOT NULL, -- e.g. "openai", "anthropic", "vllm" |
| 46 | + endpoint TEXT NOT NULL DEFAULT '', |
| 47 | + auth_type TEXT NOT NULL DEFAULT 'none', |
| 48 | + auth_blob TEXT NOT NULL DEFAULT '', |
| 49 | + created_at TIMESTAMP NOT NULL DEFAULT CURRENT_TIMESTAMP, |
| 50 | + updated_at TIMESTAMP NOT NULL DEFAULT CURRENT_TIMESTAMP |
| 51 | + ); |
| 52 | + """ |
| 53 | + ) |
| 54 | + |
| 55 | + # This table is used to store the models that are available |
| 56 | + # for a given provider. The `provider_endpoint_id` field is |
| 57 | + # used to reference the provider endpoint that the model is |
| 58 | + # associated with. The `name` field is used to store the name |
| 59 | + # of the model, which should contain the version of the model. |
| 60 | + # NOTE: This is basically a cache of the models that are |
| 61 | + # available for a given provider. We should update this cache |
| 62 | + # periodically; but always at the point of provider endpoint |
| 63 | + # creation. |
| 64 | + op.execute( |
| 65 | + """ |
| 66 | + CREATE TABLE IF NOT EXISTS provider_models ( |
| 67 | + provider_endpoint_id TEXT NOT NULL REFERENCES provider_endpoints(id) |
| 68 | + ON DELETE CASCADE, |
| 69 | + name TEXT NOT NULL, -- this should contain the version of the model |
| 70 | + created_at TIMESTAMP NOT NULL DEFAULT CURRENT_TIMESTAMP, |
| 71 | + updated_at TIMESTAMP NOT NULL DEFAULT CURRENT_TIMESTAMP, |
| 72 | + deleted_at TIMESTAMP, |
| 73 | + PRIMARY KEY (provider_endpoint_id, name) |
| 74 | + ); |
| 75 | + """ |
| 76 | + ) |
| 77 | + |
| 78 | + # This table is used to store the Muxing configuration. The |
| 79 | + # `destination_model_id` field is used to reference the model that the |
| 80 | + # Muxing configuration is for. |
| 81 | + # The `matcher_type` field is used to determine the type of the |
| 82 | + # matcher that is used in the Muxing configuration. e.g. `file_glob` would |
| 83 | + # be a matcher that uses file globbing to match files if a file is |
| 84 | + # detected in the prompt. The `matcher_blob` field is used to store the |
| 85 | + # configuration for the matcher, which can be a JSON object or a string, |
| 86 | + # depending on the `matcher_type`. On an initial implementation, the |
| 87 | + # `matcher_blob` field will simply be a string that is used to match |
| 88 | + # the prompt file name (if a file is detected in the prompt). |
| 89 | + # The `priority` field is used to determine the priority of the Muxing |
| 90 | + # configuration. The lower the number, the higher the priority. Note that |
| 91 | + # prompts will be matched against the Muxing configurations in ascending |
| 92 | + # order of priority. |
| 93 | + op.execute( |
| 94 | + """ |
| 95 | + CREATE TABLE IF NOT EXISTS muxes ( |
| 96 | + id TEXT PRIMARY KEY, -- UUID stored as TEXT |
| 97 | + provider_endpoint_id TEXT NOT NULL, |
| 98 | + provider_model_name TEXT NOT NULL, |
| 99 | + workspace_id TEXT NOT NULL REFERENCES workspaces(id) ON DELETE CASCADE, |
| 100 | + matcher_type TEXT NOT NULL, |
| 101 | + matcher_blob TEXT NOT NULL, |
| 102 | + priority INTEGER NOT NULL, |
| 103 | + created_at TIMESTAMP NOT NULL DEFAULT CURRENT_TIMESTAMP, |
| 104 | + updated_at TIMESTAMP NOT NULL DEFAULT CURRENT_TIMESTAMP, |
| 105 | + FOREIGN KEY (provider_endpoint_id, provider_model_name) |
| 106 | + REFERENCES provider_models(provider_endpoint_id, name) ON DELETE CASCADE |
| 107 | + ); |
| 108 | + """ |
| 109 | + ) |
| 110 | + |
| 111 | + # In terms of access patterns, the `muxes` table will be queried |
| 112 | + # to find the Muxing configuration for a given prompt. On initial search, |
| 113 | + # the `muxes` table will be queried by the `workspace_id`. |
| 114 | + op.execute("CREATE INDEX IF NOT EXISTS idx_muxes_workspace_id ON muxes (workspace_id);") |
| 115 | + |
| 116 | + # We'll be JOINING the `muxes` table with the `provider_models` table |
| 117 | + # to get the model information. We should have an index on the |
| 118 | + # `provider_endpoint_id` and `provider_model_name` fields in the `muxes` |
| 119 | + # table. |
| 120 | + op.execute( |
| 121 | + """ |
| 122 | + CREATE INDEX IF NOT EXISTS idx_muxes_provider_endpoint_id_provider_model_name |
| 123 | + ON muxes (provider_endpoint_id, provider_model_name); |
| 124 | + """ |
| 125 | + ) |
| 126 | + |
| 127 | + |
| 128 | +def downgrade() -> None: |
| 129 | + with context.begin_transaction(): |
| 130 | + op.execute("DROP INDEX IF EXISTS idx_muxes_provider_endpoint_id_provider_model_name;") |
| 131 | + op.execute("DROP INDEX IF EXISTS idx_muxes_workspace_id;") |
| 132 | + op.execute("DROP TABLE IF EXISTS muxes;") |
| 133 | + op.execute("DROP TABLE IF EXISTS provider_models;") |
| 134 | + op.execute("DROP TABLE IF EXISTS provider_endpoints;") |
0 commit comments