Merge pull request #652 from are-ces/rhaiis-support

tisnik · web-flow · commit 23948fdf55d7 · 2025-10-10T12:55:00.000+02:00
LCORE-378: Lightspeed core needs to fully support Red Hat AI Inference server LLM provider
diff --git a/.github/workflows/e2e_tests_rhaiis.yaml b/.github/workflows/e2e_tests_rhaiis.yaml
@@ -0,0 +1,175 @@
+# .github/workflows/e2e_tests_rhaiis.yaml
+name: RHAIIS E2E Tests
+
+on:
+  schedule:
+    - cron: "0 0 * * *"  # Runs once a day at midnight UTC
+  workflow_dispatch:
+
+
+jobs:
+  e2e_tests:
+    runs-on: ubuntu-latest
+    strategy:
+      matrix:
+        environment: [ "rhaiis" ]
+    env:
+      RHAIIS_URL: ${{ secrets.RHAIIS_URL }}
+      RHAIIS_API_KEY: ${{ secrets.RHAIIS_API_KEY }}
+
+    steps:
+      - uses: actions/checkout@v4
+        with:
+          # On PR_TARGET → the fork (or same repo) that opened the PR.
+          # On push      → falls back to the current repository.
+          repository: ${{ github.event.pull_request.head.repo.full_name || github.repository }}
+
+          # On PR_TARGET → the PR head *commit* (reproducible).
+          # On push      → the pushed commit that triggered the workflow.
+          ref: ${{ github.event.pull_request.head.ref || github.sha }}
+
+          # Don’t keep credentials when running untrusted PR code under PR_TARGET.
+          persist-credentials: ${{ github.event_name != 'pull_request_target' }}
+
+      - name: Verify actual git checkout result
+        run: |
+          echo "=== Git Status After Checkout ==="
+          echo "Remote URLs:"
+          git remote -v
+          echo ""
+          echo "Current branch: $(git branch --show-current 2>/dev/null || echo 'detached HEAD')"
+          echo "Current commit: $(git rev-parse HEAD)"
+          echo "Current commit message: $(git log -1 --oneline)"
+          echo ""
+          echo "=== Recent commits (should show setup-metrics commits) ==="
+          git log --oneline -5
+
+      - uses: 1arp/create-a-file-action@0.4.5
+        with:
+          path: '.'
+          isAbsolutePath: false
+          file: 'lightspeed-stack.yaml'
+          content: |
+            name: Lightspeed Core Service (LCS)
+            service:
+              host: 0.0.0.0
+              port: 8080
+              auth_enabled: false
+              workers: 1
+              color_log: true
+              access_log: true
+            llama_stack:
+              # Uses a remote llama-stack service
+              # The instance would have already been started with a llama-stack-run.yaml file
+              use_as_library_client: false
+              # Alternative for "as library use"
+              # use_as_library_client: true
+              # library_client_config_path: <path-to-llama-stack-run.yaml-file>
+              url: http://llama-stack:8321
+              api_key: xyzzy
+            user_data_collection:
+              feedback_enabled: true
+              feedback_storage: "/tmp/data/feedback"
+              transcripts_enabled: true
+              transcripts_storage: "/tmp/data/transcripts"
+
+            authentication:
+              module: "noop"
+
+      - name: Select and configure run.yaml
+        env:
+          CONFIG_ENVIRONMENT: ${{ matrix.environment || 'rhaiis' }}
+        run: |
+          CONFIGS_DIR="tests/e2e/configs"
+          ENVIRONMENT="$CONFIG_ENVIRONMENT"
+          
+          echo "Looking for configurations in $CONFIGS_DIR/"
+          
+          # List available configurations
+          if [ -d "$CONFIGS_DIR" ]; then
+            echo "Available configurations:"
+            ls -la "$CONFIGS_DIR"/*.yaml 2>/dev/null || echo "No YAML files found in $CONFIGS_DIR/"
+          else
+            echo "Configs directory '$CONFIGS_DIR' not found!"
+            exit 1
+          fi
+          
+          # Determine which config file to use
+          CONFIG_FILE="$CONFIGS_DIR/run-$ENVIRONMENT.yaml"
+          
+          echo "Looking for: $CONFIG_FILE"
+          
+          if [ -f "$CONFIG_FILE" ]; then
+            echo "Found config for environment: $ENVIRONMENT"
+            cp "$CONFIG_FILE" run.yaml
+          else
+            echo "Configuration file not found: $CONFIG_FILE"
+            echo "Available files in $CONFIGS_DIR:"
+            ls -la "$CONFIGS_DIR/"
+            exit 1
+          fi
+          
+          # Update paths for container environment (relative -> absolute)
+          sed -i 's|db_path: \.llama/distributions|db_path: /app-root/.llama/distributions|g' run.yaml
+          sed -i 's|db_path: tmp/|db_path: /app-root/.llama/distributions/|g' run.yaml
+          
+          echo "Successfully configured for environment: $ENVIRONMENT"
+          echo "Using configuration: $(basename "$CONFIG_FILE")"
+
+      - name: Test RHAIIS connectivity
+        env: 
+            RHAIIS_URL: ${{ secrets.RHAIIS_URL }}
+            RHAIIS_API_KEY: ${{ secrets.RHAIIS_API_KEY }}
+        run: |  
+          curl ${RHAIIS_URL}:8000/v1/models   -H "Authorization: Bearer ${RHAIIS_API_KEY}"  
+
+      - name: Run service manually
+        env: 
+            RHAIIS_URL: ${{ secrets.RHAIIS_URL }}
+            RHAIIS_API_KEY: ${{ secrets.RHAIIS_API_KEY }}
+        run: |         
+          docker compose version
+          docker compose up -d
+          
+          # Check for errors and show logs if any services failed
+          if docker compose ps | grep -E 'Exit|exited|stopped'; then
+            echo "Some services failed to start - showing logs:"
+            docker compose logs
+            exit 1
+          else
+            echo "All services started successfully"
+          fi
+
+      - name: Wait for services
+        run: |
+          echo "Waiting for services to be healthy..."
+          sleep 20  # adjust depending on boot time
+
+      - name: Quick connectivity test
+        run: |
+          echo "Testing basic connectivity before full test suite..."
+          curl -f http://localhost:8080/v1/models || {
+            echo "❌ Basic connectivity failed - showing logs before running full tests"
+            docker compose logs --tail=30
+            exit 1
+          }
+
+      - name: Run e2e tests
+        run: |
+          echo "Installing test dependencies..."
+          pip install uv
+          uv sync
+
+          echo "Running comprehensive e2e test suite..."
+          make test-e2e
+
+      - name: Show logs on failure
+        if: failure()
+        run: |
+          echo "=== Test failure logs ==="
+          echo "=== llama-stack logs ==="
+          docker compose logs llama-stack
+          
+          echo ""
+          echo "=== lightspeed-stack logs ==="
+          docker compose logs lightspeed-stack
diff --git a/README.md b/README.md
@@ -22,6 +22,7 @@ The service includes comprehensive user data collection capabilities for various
 * [Configuration](#configuration)
     * [LLM Compatibility](#llm-compatibility)
     * [Set LLM provider and model](#set-llm-provider-and-model)
+    * [Supported providers](#supported-providers)
     * [Integration with Llama Stack](#integration-with-llama-stack)
     * [Llama Stack as separate server](#llama-stack-as-separate-server)
         * [MCP Server and Tool Configuration](#mcp-server-and-tool-configuration)
@@ -123,6 +124,7 @@ Lightspeed Core Stack (LCS) supports the large language models from the provider
 | -------- | ---------------------------------------------- | ------------ | -------------- | -------------------------------------------------------------------------- |
 | OpenAI   | gpt-5, gpt-4o, gpt4-turbo, gpt-4.1, o1, o3, o4 | Yes          | remote::openai | [1](examples/openai-faiss-run.yaml) [2](examples/openai-pgvector-run.yaml) |
 | OpenAI   | gpt-3.5-turbo, gpt-4                           | No           | remote::openai |                                                                            |
+| RHAIIS (vLLM)| meta-llama/Llama-3.1-8B-Instruct           | Yes          | remote::vllm   | [1](tests/e2e/configs/run-rhaiis.yaml)                                     |
 
 The "provider_type" is used in the llama stack configuration file when refering to the provider.
 
@@ -156,6 +158,9 @@ models:
     provider_model_id: gpt-4-turbo
 ```
 
+## Supported providers
+
+For a comprehensive list of supported providers, take a look [here](docs/providers.md).
 
 ## Integration with Llama Stack
 
diff --git a/docker-compose.yaml b/docker-compose.yaml
@@ -14,6 +14,8 @@ services:
       - OPENAI_API_KEY=${OPENAI_API_KEY}
       - BRAVE_SEARCH_API_KEY=${BRAVE_SEARCH_API_KEY:-}
       - TAVILY_SEARCH_API_KEY=${TAVILY_SEARCH_API_KEY:-}
+      - RHAIIS_URL=${RHAIIS_URL}
+      - RHAIIS_API_KEY=${RHAIIS_API_KEY}
     networks:
       - lightspeednet
     healthcheck:
diff --git a/docs/providers.md b/docs/providers.md
@@ -55,9 +55,15 @@ The tables below summarize each provider category, containing the following atri
 | tgi | remote | `huggingface_hub`, `aiohttp` | ❌ |
 | together | remote | `together` | ❌ |
 | vertexai | remote | `litellm`, `google-cloud-aiplatform` | ❌ |
-| vllm | remote | `openai` | ❌ |
 | watsonx | remote | `ibm_watsonx_ai` | ❌ |
 
+Red Hat providers:
+
+| Name | Version Tested | Type | Pip Dependencies | Supported in LCS |
+|---|---|---|---|:---:|
+| RHAIIS (vllm) | 3.2.3 (on RHEL 9.20250429.0.4) | remote | `openai` | ✅ |
+
+
 ---
 
 ## Agent Providers
diff --git a/tests/e2e/configs/run-rhaiis.yaml b/tests/e2e/configs/run-rhaiis.yaml
@@ -0,0 +1,137 @@
+version: '2'
+image_name: rhaiis-configuration
+
+apis:
+  - agents
+  - datasetio
+  - eval
+  - files
+  - inference
+  - post_training
+  - safety
+  - scoring
+  - telemetry
+  - tool_runtime
+  - vector_io
+benchmarks: []
+container_image: null
+datasets: []
+external_providers_dir: null
+inference_store:
+  db_path: .llama/distributions/ollama/inference_store.db
+  type: sqlite
+logging: null
+metadata_store:
+  db_path: .llama/distributions/ollama/registry.db
+  namespace: null
+  type: sqlite
+providers:
+  files:
+  - config:
+      storage_dir: /tmp/llama-stack-files
+      metadata_store:
+        type: sqlite
+        db_path: .llama/distributions/ollama/files_metadata.db
+    provider_id: localfs
+    provider_type: inline::localfs
+  agents:
+  - config:
+      persistence_store:
+        db_path: .llama/distributions/ollama/agents_store.db
+        namespace: null
+        type: sqlite
+      responses_store:
+        db_path: .llama/distributions/ollama/responses_store.db
+        type: sqlite
+    provider_id: meta-reference
+    provider_type: inline::meta-reference
+  datasetio:
+  - config:
+      kvstore:
+        db_path: .llama/distributions/ollama/huggingface_datasetio.db
+        namespace: null
+        type: sqlite
+    provider_id: huggingface
+    provider_type: remote::huggingface
+  - config:
+      kvstore:
+        db_path: .llama/distributions/ollama/localfs_datasetio.db
+        namespace: null
+        type: sqlite
+    provider_id: localfs
+    provider_type: inline::localfs
+  eval:
+  - config:
+      kvstore:
+        db_path: .llama/distributions/ollama/meta_reference_eval.db
+        namespace: null
+        type: sqlite
+    provider_id: meta-reference
+    provider_type: inline::meta-reference
+  inference:
+    - provider_id: sentence-transformers # Can be any embedding provider
+      provider_type: inline::sentence-transformers
+      config: {}
+    - provider_id: vllm
+      provider_type: remote::vllm
+      config:
+        url: http://${env.RHAIIS_URL}:8000/v1/
+        api_token: ${env.RHAIIS_API_KEY}
+        tls_verify: false
+        max_tokens: 2048  
+  post_training:
+  - config:
+      checkpoint_format: huggingface
+      device: cpu
+      distributed_backend: null
+      dpo_output_dir: "."
+    provider_id: huggingface
+    provider_type: inline::huggingface-gpu
+  safety:
+  - config:
+      excluded_categories: []
+    provider_id: llama-guard
+    provider_type: inline::llama-guard
+  scoring:
+  - config: {}
+    provider_id: basic
+    provider_type: inline::basic
+  - config: {}
+    provider_id: llm-as-judge
+    provider_type: inline::llm-as-judge
+  - config:
+      openai_api_key: '********'
+    provider_id: braintrust
+    provider_type: inline::braintrust
+  telemetry:
+  - config:
+      service_name: 'lightspeed-stack-telemetry'
+      sinks: sqlite
+      sqlite_db_path: .llama/distributions/ollama/trace_store.db
+    provider_id: meta-reference
+    provider_type: inline::meta-reference
+  tool_runtime:
+    - provider_id: model-context-protocol
+      provider_type: remote::model-context-protocol
+      config: {}
+scoring_fns: []
+server:
+  auth: null
+  host: null
+  port: 8321
+  quota: null
+  tls_cafile: null
+  tls_certfile: null
+  tls_keyfile: null
+shields: []
+models:
+  - metadata:
+      embedding_dimension: 768 # Depends on chosen model
+    model_id: sentence-transformers/all-mpnet-base-v2 # Example embedding model
+    provider_id: sentence-transformers
+    provider_model_id: sentence-transformers/all-mpnet-base-v2 # Location of embedding model
+    model_type: embedding
+  - model_id: meta-llama/Llama-3.1-8B-Instruct
+    provider_id: vllm
+    model_type: llm
+    provider_model_id: meta-llama/Llama-3.1-8B-Instruct
diff --git a/tests/e2e/features/conversations.feature b/tests/e2e/features/conversations.feature