llamastack
diff --git a/‎.github/actions/setup-test-environment/action.yml‎
Lines changed: 26 additions & 0 deletions b/‎.github/actions/setup-test-environment/action.yml‎
Lines changed: 26 additions & 0 deletions
diff --git a/‎.github/workflows/integration-tests.yml‎
Lines changed: 9 additions & 3 deletions b/‎.github/workflows/integration-tests.yml‎
Lines changed: 9 additions & 3 deletions
diff --git a/‎client-sdks/stainless/config.yml‎
Lines changed: 6 additions & 0 deletions b/‎client-sdks/stainless/config.yml‎
Lines changed: 6 additions & 0 deletions
diff --git a/‎client-sdks/stainless/openapi.yml‎
Lines changed: 55 additions & 2 deletions b/‎client-sdks/stainless/openapi.yml‎
Lines changed: 55 additions & 2 deletions
diff --git a/‎docs/docs/api-deprecated/index.mdx‎
Lines changed: 62 additions & 0 deletions b/‎docs/docs/api-deprecated/index.mdx‎
Lines changed: 62 additions & 0 deletions
diff --git a/‎docs/docs/api-experimental/index.mdx‎
Lines changed: 128 additions & 0 deletions b/‎docs/docs/api-experimental/index.mdx‎
Lines changed: 128 additions & 0 deletions
@@ -39,6 +39,32 @@ runs:
       if: ${{ inputs.setup == 'vllm' && inputs.inference-mode == 'record' }}
       uses: ./.github/actions/setup-vllm
 
+    - name: Start Postgres service
+      if: ${{ contains(inputs.setup, 'postgres') }}
+      shell: bash
+      run: |
+        sudo docker rm -f postgres-ci || true
+        sudo docker run -d --name postgres-ci \
+          -e POSTGRES_USER=llamastack \
+          -e POSTGRES_PASSWORD=llamastack \
+          -e POSTGRES_DB=llamastack \
+          -p 5432:5432 \
+          postgres:16
+
+        echo "Waiting for Postgres to become ready..."
+        for i in {1..30}; do
+          if sudo docker exec postgres-ci pg_isready -U llamastack -d llamastack >/dev/null 2>&1; then
+            echo "Postgres is ready"
+            break
+          fi
+          if [ "$i" -eq 30 ]; then
+            echo "Postgres failed to start in time"
+            sudo docker logs postgres-ci || true
+            exit 1
+          fi
+          sleep 2
+        done
+
     - name: Build Llama Stack
       shell: bash
       run: |
 
@@ -66,12 +66,12 @@ jobs:
   run-replay-mode-tests:
     needs: generate-matrix
     runs-on: ubuntu-latest
-    name: ${{ format('Integration Tests ({0}, {1}, {2}, client={3}, {4})', matrix.client-type, matrix.config.setup, matrix.python-version, matrix.client-version, matrix.config.suite) }}
+    name: ${{ format('Integration Tests ({0}, {1}, {2}, client={3}, {4})', matrix.client, matrix.config.setup, matrix.python-version, matrix.client-version, matrix.config.suite) }}
 
     strategy:
       fail-fast: false
       matrix:
-        client-type: [library, docker, server]
+        client: [library, docker, server]
         # Use Python 3.13 only on nightly schedule (daily latest client test), otherwise use 3.12
         python-version: ${{ github.event.schedule == '0 0 * * *' && fromJSON('["3.12", "3.13"]') || fromJSON('["3.12"]') }}
         client-version: ${{ (github.event.schedule == '0 0 * * *' || github.event.inputs.test-all-client-versions == 'true') && fromJSON('["published", "latest"]') || fromJSON('["latest"]') }}
@@ -84,6 +84,7 @@ jobs:
         uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # v5.0.0
 
       - name: Setup test environment
+        if: ${{ matrix.config.allowed_clients == null || contains(matrix.config.allowed_clients, matrix.client) }}
         uses: ./.github/actions/setup-test-environment
         with:
           python-version: ${{ matrix.python-version }}
@@ -93,11 +94,16 @@ jobs:
           inference-mode: 'replay'
 
       - name: Run tests
+        if: ${{ matrix.config.allowed_clients == null || contains(matrix.config.allowed_clients, matrix.client) }}
         uses: ./.github/actions/run-and-record-tests
         env:
           OPENAI_API_KEY: dummy
         with:
-          stack-config: ${{ matrix.client-type == 'library' && 'ci-tests' || matrix.client-type == 'server' && 'server:ci-tests' || 'docker:ci-tests' }}
+          stack-config: >-
+            ${{ matrix.config.stack_config
+                || (matrix.client == 'library' && 'ci-tests')
+                || (matrix.client == 'server' && 'server:ci-tests')
+                || 'docker:ci-tests' }}
           setup: ${{ matrix.config.setup }}
           inference-mode: 'replay'
           suite: ${{ matrix.config.suite }}
@@ -463,6 +463,12 @@ resources:
 settings:
   license: MIT
   unwrap_response_fields: [data]
+  file_header: |
+    Copyright (c) Meta Platforms, Inc. and affiliates.
+    All rights reserved.
+
+    This source code is licensed under the terms described in the LICENSE file in
+    the root directory of this source tree.
 
 openapi:
   transformations:
 
@@ -963,7 +963,7 @@ paths:
             Optional filter to control which routes are returned. Can be an API level
             ('v1', 'v1alpha', 'v1beta') to show non-deprecated routes at that level,
             or 'deprecated' to show deprecated routes across all levels. If not specified,
-            returns only non-deprecated v1 routes.
+            returns all non-deprecated routes.
           required: false
           schema:
             type: string
@@ -2691,7 +2691,8 @@ paths:
       responses:
         '200':
           description: >-
-            A VectorStoreFileContentResponse representing the file contents.
+            File contents, optionally with embeddings and metadata based on query
+            parameters.
           content:
             application/json:
               schema:
@@ -2726,6 +2727,20 @@ paths:
           required: true
           schema:
             type: string
+        - name: include_embeddings
+          in: query
+          description: >-
+            Whether to include embedding vectors in the response.
+          required: false
+          schema:
+            $ref: '#/components/schemas/bool'
+        - name: include_metadata
+          in: query
+          description: >-
+            Whether to include chunk metadata in the response.
+          required: false
+          schema:
+            $ref: '#/components/schemas/bool'
       deprecated: false
   /v1/vector_stores/{vector_store_id}/search:
     post:
@@ -6779,6 +6794,11 @@ components:
           type: string
           description: >-
             (Optional) System message inserted into the model's context
+        max_tool_calls:
+          type: integer
+          description: >-
+            (Optional) Max number of total calls to built-in tools that can be processed
+            in a response
         input:
           type: array
           items:
@@ -7139,6 +7159,11 @@ components:
             (Optional) Additional fields to include in the response.
         max_infer_iters:
           type: integer
+        max_tool_calls:
+          type: integer
+          description: >-
+            (Optional) Max number of total calls to built-in tools that can be processed
+            in a response.
       additionalProperties: false
       required:
         - input
@@ -7224,6 +7249,11 @@ components:
           type: string
           description: >-
             (Optional) System message inserted into the model's context
+        max_tool_calls:
+          type: integer
+          description: >-
+            (Optional) Max number of total calls to built-in tools that can be processed
+            in a response
       additionalProperties: false
       required:
         - created_at
@@ -10235,6 +10265,8 @@ components:
       title: VectorStoreFileDeleteResponse
       description: >-
         Response from deleting a vector store file.
+    bool:
+      type: boolean
     VectorStoreContent:
       type: object
       properties:
@@ -10246,6 +10278,26 @@ components:
         text:
           type: string
           description: The actual text content
+        embedding:
+          type: array
+          items:
+            type: number
+          description: >-
+            Optional embedding vector for this content chunk
+        chunk_metadata:
+          $ref: '#/components/schemas/ChunkMetadata'
+          description: Optional chunk metadata
+        metadata:
+          type: object
+          additionalProperties:
+            oneOf:
+              - type: 'null'
+              - type: boolean
+              - type: number
+              - type: string
+              - type: array
+              - type: object
+          description: Optional user-defined metadata
       additionalProperties: false
       required:
         - type
@@ -10269,6 +10321,7 @@ components:
           description: Parsed content of the file
         has_more:
           type: boolean
+          default: false
           description: >-
             Indicates if there are more content pages to fetch
         next_page:
 
@@ -0,0 +1,62 @@
+---
+title: Deprecated APIs
+description: Legacy APIs that are being phased out
+sidebar_label: Deprecated
+sidebar_position: 1
+---
+
+# Deprecated APIs
+
+This section contains APIs that are being phased out in favor of newer, more standardized implementations. These APIs are maintained for backward compatibility but are not recommended for new projects.
+
+:::warning Deprecation Notice
+These APIs are deprecated and will be removed in future versions. Please migrate to the recommended alternatives listed below.
+:::
+
+## Migration Guide
+
+When using deprecated APIs, please refer to the migration guides provided for each API to understand how to transition to the supported alternatives.
+
+## Deprecated API List
+
+### Legacy Inference APIs
+Some older inference endpoints that have been superseded by the standardized Inference API.
+
+**Migration Path:** Use the [Inference API](../api/) instead.
+
+### Legacy Vector Operations
+Older vector database operations that have been replaced by the Vector IO API.
+
+**Migration Path:** Use the [Vector IO API](../api/) instead.
+
+### Legacy File Operations
+Older file management endpoints that have been replaced by the Files API.
+
+**Migration Path:** Use the [Files API](../api/) instead.
+
+## Support Timeline
+
+Deprecated APIs will be supported according to the following timeline:
+
+- **Current Version**: Full support with deprecation warnings
+- **Next Major Version**: Limited support with migration notices
+- **Following Major Version**: Removal of deprecated APIs
+
+## Getting Help
+
+If you need assistance migrating from deprecated APIs:
+
+1. Check the specific migration guides for each API
+2. Review the [API Reference](../api/) for current alternatives
+3. Consult the [Community Forums](https://github.com/llamastack/llama-stack/discussions) for migration support
+4. Open an issue on GitHub for specific migration questions
+
+## Contributing
+
+If you find issues with deprecated APIs or have suggestions for improving the migration process, please contribute by:
+
+1. Opening an issue describing the problem
+2. Submitting a pull request with improvements
+3. Updating migration documentation
+
+For more information on contributing, see our [Contributing Guide](../contributing/).
@@ -0,0 +1,128 @@
+---
+title: Experimental APIs
+description: APIs in development with limited support
+sidebar_label: Experimental
+sidebar_position: 1
+---
+
+# Experimental APIs
+
+This section contains APIs that are currently in development and may have limited support or stability. These APIs are available for testing and feedback but should not be used in production environments.
+
+:::warning Experimental Notice
+These APIs are experimental and may change without notice. Use with caution and provide feedback to help improve them.
+:::
+
+## Current Experimental APIs
+
+### Batch Inference API
+Run inference on a dataset of inputs in batch mode for improved efficiency.
+
+**Status:** In Development
+**Provider Support:** Limited
+**Use Case:** Large-scale inference operations
+
+**Features:**
+- Batch processing of multiple inputs
+- Optimized resource utilization
+- Progress tracking and monitoring
+
+### Batch Agents API
+Run agentic workflows on a dataset of inputs in batch mode.
+
+**Status:** In Development
+**Provider Support:** Limited
+**Use Case:** Large-scale agent operations
+
+**Features:**
+- Batch agent execution
+- Parallel processing capabilities
+- Result aggregation and analysis
+
+### Synthetic Data Generation API
+Generate synthetic data for model development and testing.
+
+**Status:** Early Development
+**Provider Support:** Very Limited
+**Use Case:** Training data augmentation
+
+**Features:**
+- Automated data generation
+- Quality control mechanisms
+- Customizable generation parameters
+
+### Batches API (OpenAI-compatible)
+OpenAI-compatible batch management for inference operations.
+
+**Status:** In Development
+**Provider Support:** Limited
+**Use Case:** OpenAI batch processing compatibility
+
+**Features:**
+- OpenAI batch API compatibility
+- Job scheduling and management
+- Status tracking and monitoring
+
+## Getting Started with Experimental APIs
+
+### Prerequisites
+- Llama Stack server running with experimental features enabled
+- Appropriate provider configurations
+- Understanding of API limitations
+
+### Configuration
+Experimental APIs may require special configuration flags or provider settings. Check the specific API documentation for setup requirements.
+
+### Usage Guidelines
+1. **Testing Only**: Use experimental APIs for testing and development only
+2. **Monitor Changes**: Watch for updates and breaking changes
+3. **Provide Feedback**: Report issues and suggest improvements
+4. **Backup Data**: Always backup important data when using experimental features
+
+## Feedback and Contribution
+
+We encourage feedback on experimental APIs to help improve them:
+
+### Reporting Issues
+- Use GitHub issues with the "experimental" label
+- Include detailed error messages and reproduction steps
+- Specify the API version and provider being used
+
+### Feature Requests
+- Submit feature requests through GitHub discussions
+- Provide use cases and expected behavior
+- Consider contributing implementations
+
+### Testing
+- Test experimental APIs in your environment
+- Report performance issues and optimization opportunities
+- Share success stories and use cases
+
+## Migration to Stable APIs
+
+As experimental APIs mature, they will be moved to the stable API section. When this happens:
+
+1. **Announcement**: We'll announce the promotion in release notes
+2. **Migration Guide**: Detailed migration instructions will be provided
+3. **Deprecation Timeline**: Experimental versions will be deprecated with notice
+4. **Support**: Full support will be available for stable versions
+
+## Provider Support
+
+Experimental APIs may have limited provider support. Check the specific API documentation for:
+
+- Supported providers
+- Configuration requirements
+- Known limitations
+- Performance characteristics
+
+## Roadmap
+
+Experimental APIs are part of our ongoing development roadmap:
+
+- **Q1 2024**: Batch Inference API stabilization
+- **Q2 2024**: Batch Agents API improvements
+- **Q3 2024**: Synthetic Data Generation API expansion
+- **Q4 2024**: Batches API full OpenAI compatibility
+
+For the latest updates, follow our [GitHub releases](https://github.com/llamastack/llama-stack/releases) and [roadmap discussions](https://github.com/llamastack/llama-stack/discussions).