Azure inference supported

Andrej Simurka · Andrej Simurka · commit fcf23066ece7 · 2025-10-10T14:41:28.000+02:00
diff --git a/.github/workflows/e2e_tests.yaml b/.github/workflows/e2e_tests.yaml
@@ -8,9 +8,12 @@ jobs:
     runs-on: ubuntu-latest
     strategy:
       matrix:
-        environment: [ "ci"]
+        environment: [ "ci", "azure"]
     env:
       OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
+      CLIENT_SECRET: ${{ secrets.CLIENT_SECRET }}
+      CLIENT_ID: ${{ secrets.CLIENT_ID }}
+      TENANT_ID: ${{ secrets.TENANT_ID }}
 
     steps:
       - uses: actions/checkout@v4
@@ -72,6 +75,32 @@ jobs:
 
             authentication:
               module: "noop"
+        
+      - name: Get Azure API key (access token)
+        if: matrix.environment == 'azure'
+        id: azure_token
+        env:
+          CLIENT_ID: ${{ secrets.CLIENT_ID }}
+          CLIENT_SECRET: ${{ secrets.CLIENT_SECRET }}
+          TENANT_ID: ${{ secrets.TENANT_ID }}
+        run: |
+          echo "Requesting Azure API token..."
+          RESPONSE=$(curl -s -X POST \
+            -H "Content-Type: application/x-www-form-urlencoded" \
+            -d "client_id=$CLIENT_ID&scope=https://cognitiveservices.azure.com/.default&client_secret=$CLIENT_SECRET&grant_type=client_credentials" \
+            "https://login.microsoftonline.com/$TENANT_ID/oauth2/v2.0/token")
+
+          echo "Response received. Extracting access_token..."
+          ACCESS_TOKEN=$(echo "$RESPONSE" | jq -r '.access_token')
+
+          if [ -z "$ACCESS_TOKEN" ] || [ "$ACCESS_TOKEN" == "null" ]; then
+            echo "❌ Failed to obtain Azure access token. Response:"
+            echo "$RESPONSE"
+            exit 1
+          fi
+
+          echo "✅ Successfully obtained Azure access token."
+          echo "AZURE_API_KEY=$ACCESS_TOKEN" >> $GITHUB_ENV
 
       - name: Select and configure run.yaml
         env:
diff --git a/README.md b/README.md
@@ -125,6 +125,8 @@ Lightspeed Core Stack (LCS) supports the large language models from the provider
 | OpenAI   | gpt-5, gpt-4o, gpt4-turbo, gpt-4.1, o1, o3, o4 | Yes          | remote::openai | [1](examples/openai-faiss-run.yaml) [2](examples/openai-pgvector-run.yaml) |
 | OpenAI   | gpt-3.5-turbo, gpt-4                           | No           | remote::openai |                                                                            |
 | RHAIIS (vLLM)| meta-llama/Llama-3.1-8B-Instruct           | Yes          | remote::vllm   | [1](tests/e2e/configs/run-rhaiis.yaml)                                     |
+| Azure    | gpt-5, gpt-5-mini, gpt-5-nano, gpt-5-chat, gpt-4.1, gpt-4.1-mini, gpt-4.1-nano, o3-mini, o4-mini | Yes          | remote::azure  | [1](examples/azure-run.yaml)                                               |
+| Azure    |  o1, o1-mini | No          | remote::azure  |  |
 
 The "provider_type" is used in the llama stack configuration file when refering to the provider.
 
diff --git a/docker-compose.yaml b/docker-compose.yaml
@@ -12,6 +12,7 @@ services:
       - ./run.yaml:/opt/app-root/run.yaml:Z
     environment:
       - OPENAI_API_KEY=${OPENAI_API_KEY}
+      - AZURE_API_KEY=${AZURE_API_KEY}
       - BRAVE_SEARCH_API_KEY=${BRAVE_SEARCH_API_KEY:-}
       - TAVILY_SEARCH_API_KEY=${TAVILY_SEARCH_API_KEY:-}
       - RHAIIS_URL=${RHAIIS_URL}
@@ -36,6 +37,7 @@ services:
       - ./lightspeed-stack.yaml:/app-root/lightspeed-stack.yaml:Z
     environment:
       - OPENAI_API_KEY=${OPENAI_API_KEY}
+      - AZURE_API_KEY=${AZURE_API_KEY}
     depends_on:
         llama-stack:
           condition: service_healthy
diff --git a/docs/providers.md b/docs/providers.md
@@ -36,7 +36,7 @@ The tables below summarize each provider category, containing the following atri
 | meta-reference | inline | `accelerate`, `fairscale`, `torch`, `torchvision`, `transformers`, `zmq`, `lm-format-enforcer`, `sentence-transformers`, `torchao==0.8.0`, `fbgemm-gpu-genai==1.1.2` | ❌ |
 | sentence-transformers | inline | `torch torchvision torchao>=0.12.0 --extra-index-url https://download.pytorch.org/whl/cpu`, `sentence-transformers --no-deps` | ❌ |
 | anthropic | remote | `litellm` | ❌ |
-| azure | remote | `itellm` | ❌ |
+| azure | remote | — | ✅ |
 | bedrock | remote | `boto3` | ❌ |
 | cerebras | remote | `cerebras_cloud_sdk` | ❌ |
 | databricks | remote | — | ❌ |
@@ -287,4 +287,4 @@ Red Hat providers:
 
 ---
 
-For a deeper understanding, see the [official llama-stack configuration documentation](https://llama-stack.readthedocs.io/en/latest/distributions/configuration.html).
+For a deeper understanding, see the [official llama-stack providers documentation](https://llamastack.github.io/docs/providers).
diff --git a/examples/azure-run.yaml b/examples/azure-run.yaml
@@ -0,0 +1,128 @@
+version: '2'
+image_name: minimal-viable-llama-stack-configuration
+
+apis:
+  - agents
+  - datasetio
+  - eval
+  - files
+  - inference
+  - post_training
+  - safety
+  - scoring
+  - telemetry
+  - tool_runtime
+  - vector_io
+benchmarks: []
+container_image: null
+datasets: []
+external_providers_dir: null
+inference_store:
+  db_path: .llama/distributions/ollama/inference_store.db
+  type: sqlite
+logging: null
+metadata_store:
+  db_path: .llama/distributions/ollama/registry.db
+  namespace: null
+  type: sqlite
+providers:
+  files:
+  - provider_id: localfs
+    provider_type: inline::localfs
+    config:
+      storage_dir: /tmp/llama-stack-files
+      metadata_store:
+        type: sqlite
+        db_path: .llama/distributions/ollama/files_metadata.db
+  agents:
+  - provider_id: meta-reference
+    provider_type: inline::meta-reference
+    config:
+      persistence_store:
+        db_path: .llama/distributions/ollama/agents_store.db
+        namespace: null
+        type: sqlite
+      responses_store:
+        db_path: .llama/distributions/ollama/responses_store.db
+        type: sqlite
+  datasetio:
+  - provider_id: huggingface
+    provider_type: remote::huggingface
+    config:
+      kvstore:
+        db_path: .llama/distributions/ollama/huggingface_datasetio.db
+        namespace: null
+        type: sqlite
+  - provider_id: localfs
+    provider_type: inline::localfs
+    config:
+      kvstore:
+        db_path: .llama/distributions/ollama/localfs_datasetio.db
+        namespace: null
+        type: sqlite
+  eval:
+  - provider_id: meta-reference
+    provider_type: inline::meta-reference
+    config:
+      kvstore:
+        db_path: .llama/distributions/ollama/meta_reference_eval.db
+        namespace: null
+        type: sqlite
+  inference:
+    - provider_id: azure
+      provider_type: remote::azure
+      config: 
+        api_key: ${env.AZURE_API_KEY}
+        api_base: https://ols-test.openai.azure.com/
+        api_version: 2024-02-15-preview
+        api_type: ${env.AZURE_API_TYPE:=}
+  post_training:
+  - provider_id: huggingface
+    provider_type: inline::huggingface-gpu
+    config:
+      checkpoint_format: huggingface
+      device: cpu
+      distributed_backend: null
+      dpo_output_dir: "."
+  safety:
+  - provider_id: llama-guard
+    provider_type: inline::llama-guard
+    config:
+      excluded_categories: []
+  scoring:
+  - provider_id: basic
+    provider_type: inline::basic
+    config: {}
+  - provider_id: llm-as-judge
+    provider_type: inline::llm-as-judge
+    config: {}
+  - provider_id: braintrust
+    provider_type: inline::braintrust
+    config:
+      openai_api_key: '********'
+  telemetry:
+  - provider_id: meta-reference
+    provider_type: inline::meta-reference
+    config:
+      service_name: 'lightspeed-stack-telemetry'
+      sinks: sqlite
+      sqlite_db_path: .llama/distributions/ollama/trace_store.db
+  tool_runtime:
+    - provider_id: model-context-protocol
+      provider_type: remote::model-context-protocol
+      config: {}
+scoring_fns: []
+server:
+  auth: null
+  host: null
+  port: 8321
+  quota: null
+  tls_cafile: null
+  tls_certfile: null
+  tls_keyfile: null
+shields: []
+models:
+  - model_id: gpt-4o-mini
+    model_type: llm
+    provider_id: azure
+    provider_model_id: gpt-4o-mini
diff --git a/tests/e2e/configs/run-azure.yaml b/tests/e2e/configs/run-azure.yaml
@@ -0,0 +1,128 @@
+version: '2'
+image_name: minimal-viable-llama-stack-configuration
+
+apis:
+  - agents
+  - datasetio
+  - eval
+  - files
+  - inference
+  - post_training
+  - safety
+  - scoring
+  - telemetry
+  - tool_runtime
+  - vector_io
+benchmarks: []
+container_image: null
+datasets: []
+external_providers_dir: null
+inference_store:
+  db_path: .llama/distributions/ollama/inference_store.db
+  type: sqlite
+logging: null
+metadata_store:
+  db_path: .llama/distributions/ollama/registry.db
+  namespace: null
+  type: sqlite
+providers:
+  files:
+  - provider_id: localfs
+    provider_type: inline::localfs
+    config:
+      storage_dir: /tmp/llama-stack-files
+      metadata_store:
+        type: sqlite
+        db_path: .llama/distributions/ollama/files_metadata.db
+  agents:
+  - provider_id: meta-reference
+    provider_type: inline::meta-reference
+    config:
+      persistence_store:
+        db_path: .llama/distributions/ollama/agents_store.db
+        namespace: null
+        type: sqlite
+      responses_store:
+        db_path: .llama/distributions/ollama/responses_store.db
+        type: sqlite
+  datasetio:
+  - provider_id: huggingface
+    provider_type: remote::huggingface
+    config:
+      kvstore:
+        db_path: .llama/distributions/ollama/huggingface_datasetio.db
+        namespace: null
+        type: sqlite
+  - provider_id: localfs
+    provider_type: inline::localfs
+    config:
+      kvstore:
+        db_path: .llama/distributions/ollama/localfs_datasetio.db
+        namespace: null
+        type: sqlite
+  eval:
+  - provider_id: meta-reference
+    provider_type: inline::meta-reference
+    config:
+      kvstore:
+        db_path: .llama/distributions/ollama/meta_reference_eval.db
+        namespace: null
+        type: sqlite
+  inference:
+    - provider_id: azure
+      provider_type: remote::azure
+      config: 
+        api_key: ${env.AZURE_API_KEY}
+        api_base: https://ols-test.openai.azure.com/
+        api_version: 2024-02-15-preview
+        api_type: ${env.AZURE_API_TYPE:=}
+  post_training:
+  - provider_id: huggingface
+    provider_type: inline::huggingface-gpu
+    config:
+      checkpoint_format: huggingface
+      device: cpu
+      distributed_backend: null
+      dpo_output_dir: "."
+  safety:
+  - provider_id: llama-guard
+    provider_type: inline::llama-guard
+    config:
+      excluded_categories: []
+  scoring:
+  - provider_id: basic
+    provider_type: inline::basic
+    config: {}
+  - provider_id: llm-as-judge
+    provider_type: inline::llm-as-judge
+    config: {}
+  - provider_id: braintrust
+    provider_type: inline::braintrust
+    config:
+      openai_api_key: '********'
+  telemetry:
+  - provider_id: meta-reference
+    provider_type: inline::meta-reference
+    config:
+      service_name: 'lightspeed-stack-telemetry'
+      sinks: sqlite
+      sqlite_db_path: .llama/distributions/ollama/trace_store.db
+  tool_runtime:
+    - provider_id: model-context-protocol
+      provider_type: remote::model-context-protocol
+      config: {}
+scoring_fns: []
+server:
+  auth: null
+  host: null
+  port: 8321
+  quota: null
+  tls_cafile: null
+  tls_certfile: null
+  tls_keyfile: null
+shields: []
+models:
+  - model_id: gpt-4o-mini
+    model_type: llm
+    provider_id: azure
+    provider_model_id: gpt-4o-mini