Applying fixes to resolve PR comments

alex-akv · alex-akv · commit 42ca13874ee5 · 2025-10-27T21:42:36.000+01:00
diff --git a/ai/ai-starter-kit/helm-chart/ai-starter-kit/files/multi-agent-ollama.ipynb b/ai/ai-starter-kit/helm-chart/ai-starter-kit/files/multi-agent-ollama.ipynb
@@ -228,36 +228,91 @@
     "with open('/tmp/ollama_wrapper.py', 'w') as f:\n",
     "    f.write(api_wrapper_code)\n",
     "\n",
-    "!pkill -f ollama_wrapper.py 2>/dev/null || true\n",
-    "\n",
-    "env_vars = f\"\"\"\n",
-    "export OLLAMA_HOST=\"{os.getenv('OLLAMA_HOST', 'http://ai-starter-kit-ollama:11434')}\"\n",
-    "export MODEL_NAME=\"qwen2.5:1.5b\"\n",
-    "export MLFLOW_TRACKING_URI=\"{os.getenv('MLFLOW_TRACKING_URI', 'http://ai-starter-kit-mlflow:5000')}\"\n",
-    "\"\"\"\n",
-    "\n",
-    "!echo '{env_vars}' > /tmp/env_vars.sh\n",
-    "!bash -c 'source /tmp/env_vars.sh && nohup python /tmp/ollama_wrapper.py > /tmp/wrapper.log 2>&1 &'\n",
+    "print(\"Checking if wrapper script was written...\")\n",
+    "if not os.path.exists('/tmp/ollama_wrapper.py'):\n",
+    "    print(\"ERROR: Failed to write wrapper script!\")\n",
+    "    raise Exception(\"Cannot proceed without wrapper script\")\n",
+    "print(\" Wrapper script created\")\n",
     "\n",
-    "print(\"Starting API wrapper...\")\n",
+    "print(\"\\nKilling existing wrapper processes...\")\n",
+    "!pkill -f ollama_wrapper.py 2>/dev/null || true\n",
+    "time.sleep(2)\n",
+    "\n",
+    "log_file = '/tmp/wrapper.log'\n",
+    "print(f\"\\nPreparing log file: {log_file}\")\n",
+    "!touch /tmp/wrapper.log\n",
+    "!chmod 666 /tmp/wrapper.log\n",
+    "\n",
+    "if not os.path.exists(log_file):\n",
+    "    print(f\"ERROR: Could not create log file at {log_file}\")\n",
+    "    raise Exception(\"Cannot create log file\")\n",
+    "print(\" Log file ready\")\n",
+    "\n",
+    "env_vars = {\n",
+    "    'OLLAMA_HOST': os.getenv('OLLAMA_HOST', 'http://ai-starter-kit-ollama:11434'),\n",
+    "    'MODEL_NAME': 'qwen2.5:1.5b',\n",
+    "    'MLFLOW_TRACKING_URI': os.getenv('MLFLOW_TRACKING_URI', 'http://ai-starter-kit-mlflow:5000')\n",
+    "}\n",
+    "\n",
+    "print(\"\\nEnvironment variables:\")\n",
+    "for k, v in env_vars.items():\n",
+    "    print(f\"  {k}={v}\")\n",
+    "\n",
+    "print(\"\\nStarting API wrapper...\")\n",
+    "with open(log_file, 'w') as log:\n",
+    "    process = subprocess.Popen(\n",
+    "        ['python', '/tmp/ollama_wrapper.py'],\n",
+    "        stdout=log,\n",
+    "        stderr=subprocess.STDOUT,\n",
+    "        env={**os.environ, **env_vars},\n",
+    "        start_new_session=True\n",
+    "    )\n",
+    "    \n",
+    "print(f\"Process started with PID: {process.pid}\")\n",
+    "\n",
+    "time.sleep(2)\n",
+    "if process.poll() is not None:\n",
+    "    print(f\"\\nERROR: Process died immediately with exit code {process.poll()}\")\n",
+    "    print(\"\\nLog contents:\")\n",
+    "    !cat /tmp/wrapper.log\n",
+    "    raise Exception(\"API wrapper failed to start\")\n",
+    "print(\" Process is running\")\n",
+    "\n",
+    "print(\"\\nWaiting for API to respond...\")\n",
+    "api_ready = False\n",
     "for i in range(30):\n",
     "    time.sleep(1)\n",
     "    try:\n",
     "        r = requests.get(\"http://localhost:8000/v1/healthz\", timeout=1)\n",
     "        if r.status_code == 200:\n",
-    "            print(\"API Status:\", r.json())\n",
+    "            print(f\"\\n API is ready! Response: {r.json()}\")\n",
     "            print(f\"\\nOpenAI-compatible API running at: http://localhost:8000/v1\")\n",
     "            print(f\"Health: http://localhost:8000/v1/healthz\")\n",
     "            print(f\"Chat:   http://localhost:8000/v1/chat/completions\")\n",
+    "            api_ready = True\n",
     "            break\n",
-    "    except:\n",
+    "    except requests.exceptions.ConnectionError:\n",
     "        if i % 5 == 0:\n",
-    "            print(f\"  Waiting for API to start... ({i}s)\")\n",
-    "        continue\n",
-    "else:\n",
-    "    print(\"\\nAPI wrapper failed to start. Checking logs:\")\n",
-    "    !tail -20 /tmp/wrapper.log\n",
-    "    print(\"\\nYou can still use direct Ollama API in the next cells.\")"
+    "            print(f\"  Waiting for API... ({i}s)\")\n",
+    "    except Exception as e:\n",
+    "        print(f\"  Unexpected error: {e}\")\n",
+    "\n",
+    "if not api_ready:\n",
+    "    print(\"\\nAPI wrapper failed to start within 30 seconds\")\n",
+    "    print(\"\\nChecking if process is still alive...\")\n",
+    "    if process.poll() is not None:\n",
+    "        print(f\"Process died with exit code: {process.poll()}\")\n",
+    "    else:\n",
+    "        print(\"Process is still running but not responding\")\n",
+    "    \n",
+    "    print(\"\\nLast 50 lines of logs:\")\n",
+    "    !tail -50 /tmp/wrapper.log\n",
+    "    \n",
+    "    print(\"\\nChecking if port 8000 is in use:\")\n",
+    "    !netstat -tlnp 2>/dev/null | grep 8000 || echo \"No process on port 8000\"\n",
+    "    \n",
+    "    print(\"\\nChecking Python processes:\")\n",
+    "    !ps aux | grep python | grep -v grep"
    ]
   },
   {
@@ -279,15 +334,23 @@
    "source": [
     "import os, time, requests, json\n",
     "\n",
-    "USE_WRAPPER = True\n",
-    "BASE_URL = \"http://localhost:8000/v1\" if USE_WRAPPER else os.getenv(\"OLLAMA_HOST\", \"http://ai-starter-kit-ollama:11434\")\n",
+    "BASE_URL = \"http://localhost:8000/v1\"\n",
+    "OLLAMA_DIRECT = os.getenv(\"OLLAMA_HOST\", \"http://ai-starter-kit-ollama:11434\")\n",
+    "\n",
+    "try:\n",
+    "    r = requests.get(f\"{BASE_URL}/healthz\", timeout=2)\n",
+    "    USE_WRAPPER = r.status_code == 200\n",
+    "    print(\"✓ Using: OpenAI-compatible wrapper\")\n",
+    "except:\n",
+    "    USE_WRAPPER = False\n",
+    "    print(\"✓ Using: Direct Ollama API (wrapper not available)\")\n",
     "\n",
     "def health():\n",
     "    if USE_WRAPPER:\n",
     "        r = requests.get(f\"{BASE_URL}/healthz\", timeout=10)\n",
     "        print(\"Health:\", r.status_code, r.json())\n",
     "    else:\n",
-    "        r = requests.get(f\"{BASE_URL}/api/tags\", timeout=10)\n",
+    "        r = requests.get(f\"{OLLAMA_DIRECT}/api/tags\", timeout=10)\n",
     "        print(\"Health:\", r.status_code, \"Models available:\", len(r.json().get('models', [])))\n",
     "\n",
     "def chat(prompt, temperature=0.4, max_tokens=220):\n",
@@ -315,7 +378,7 @@
     "                \"num_predict\": max_tokens\n",
     "            }\n",
     "        }\n",
-    "        endpoint = f\"{BASE_URL}/api/chat\"\n",
+    "        endpoint = f\"{OLLAMA_DIRECT}/api/chat\"\n",
     "    \n",
     "    t0 = time.time()\n",
     "    r = requests.post(endpoint, json=body, timeout=120)\n",
diff --git a/ai/ai-starter-kit/helm-chart/ai-starter-kit/files/multi-agent-ramalama.ipynb b/ai/ai-starter-kit/helm-chart/ai-starter-kit/files/multi-agent-ramalama.ipynb
@@ -59,7 +59,6 @@
    "outputs": [],
    "source": [
     "import requests, os, json\n",
-    "\n",
     "RAMALAMA_HOST = os.environ.get('RAMALAMA_HOST', 'http://ai-starter-kit-ramalama:8080')\n",
     "MODEL_NAME = \"qwen2.5:1.5b\"\n",
     "\n",
@@ -73,14 +72,38 @@
     "if check_ramalama():\n",
     "    print(\"RamaLama service is running\")\n",
     "    \n",
+    "    print(f\"\\nChecking model {MODEL_NAME}...\")\n",
     "    try:\n",
     "        r = requests.get(f\"{RAMALAMA_HOST}/v1/models\")\n",
     "        models = r.json().get('data', [])\n",
-    "        model_exists = any(m.get('id') == MODEL_NAME for m in models) \n",
-    "        if model_exists:\n",
-    "            print(f\"Model {MODEL_NAME} already available\")\n",
+    "        model_exists = any(m.get('id') == MODEL_NAME for m in models)\n",
+    "        \n",
+    "        if not model_exists:\n",
+    "            print(f\"Pulling model {MODEL_NAME}...\")\n",
+    "            \n",
+    "            try:\n",
+    "                test_body = {\n",
+    "                    \"model\": MODEL_NAME,\n",
+    "                    \"messages\": [{\"role\": \"user\", \"content\": \"test\"}],\n",
+    "                    \"max_tokens\": 1\n",
+    "                }\n",
+    "                r = requests.post(f\"{RAMALAMA_HOST}/v1/chat/completions\", json=test_body, timeout=300)\n",
+    "                \n",
+    "                if r.status_code == 200:\n",
+    "                    print(f\"Model {MODEL_NAME} loaded successfully\")\n",
+    "                else:\n",
+    "                    print(f\"Failed to load model. Status: {r.status_code}\")\n",
+    "                    print(\"You may need to pull the model manually in the RamaLama deployment\")\n",
+    "                    \n",
+    "            except requests.exceptions.Timeout:\n",
+    "                print(\"Model pull timed out. Large models may take longer.\")\n",
+    "                print(\"Check RamaLama logs to monitor progress\")\n",
+    "            except Exception as e:\n",
+    "                print(f\"Error pulling model: {e}\")\n",
+    "                print(\"You may need to pull the model manually in the RamaLama deployment\")\n",
     "        else:\n",
-    "            print(f\"Model {MODEL_NAME} not found; ensure it's pulled in the deployment\")\n",
+    "            print(f\"Model {MODEL_NAME} already available\")\n",
+    "            \n",
     "    except Exception as e:\n",
     "        print(f\"Error checking model: {e}\")\n",
     "else:\n",
@@ -107,7 +130,7 @@
    "source": [
     "!pip -q install fastapi uvicorn mlflow --disable-pip-version-check\n",
     "\n",
-    "import os, threading, time, json\n",
+    "import os, subprocess, time, json, requests\n",
     "from pathlib import Path\n",
     "\n",
     "api_wrapper_code = '''\n",
@@ -128,7 +151,7 @@
     "    pass\n",
     "\n",
     "app = FastAPI()\n",
-    "RAMALAMA_HOST = os.getenv(\"RAMALAMA_HOST\", \"http://127.0.0.1:8080\")\n",
+    "RAMALAMA_HOST = os.getenv(\"RAMALAMA_HOST\", \"http://ai-starter-kit-ramalama:8080\")\n",
     "MODEL_NAME = os.getenv(\"MODEL_NAME\", \"qwen2.5:1.5b\")\n",
     "\n",
     "@app.get(\"/v1/healthz\")\n",
@@ -206,32 +229,88 @@
     "with open('/tmp/ramalama_wrapper.py', 'w') as f:\n",
     "    f.write(api_wrapper_code)\n",
     "\n",
-    "def run_api():\n",
-    "    subprocess.run([\"python\", \"/tmp/ramalama_wrapper.py\"], capture_output=True)\n",
-    "\n",
-    "import subprocess\n",
-    "api_process = subprocess.Popen(\n",
-    "    [\"python\", \"/tmp/ramalama_wrapper.py\"],\n",
-    "    env={**os.environ, \n",
-    "         \"RAMALAMA_HOST\": os.getenv(\"RAMALAMA_HOST\", \"http://127.0.0.1:8080\"),\n",
-    "         \"MODEL_NAME\": MODEL_NAME,\n",
-    "         \"MLFLOW_TRACKING_URI\": MLFLOW_URI},\n",
-    "    stdout=subprocess.DEVNULL,\n",
-    "    stderr=subprocess.DEVNULL\n",
-    ")\n",
+    "print(\"Wrapper script created\")\n",
+    "\n",
+    "print(\"Killing existing wrapper processes...\")\n",
+    "!pkill -f ramalama_wrapper.py 2>/dev/null || true\n",
+    "time.sleep(2)\n",
     "\n",
-    "time.sleep(3)\n",
+    "log_file = '/tmp/ramalama_wrapper.log'\n",
+    "!touch /tmp/ramalama_wrapper.log\n",
+    "!chmod 666 /tmp/ramalama_wrapper.log\n",
+    "print(\"Log file ready\")\n",
     "\n",
+    "MODEL_NAME = \"qwen2.5:1.5b\"\n",
+    "MLFLOW_URI = os.getenv(\"MLFLOW_TRACKING_URI\", \"http://ai-starter-kit-mlflow:5000\")\n",
+    "RAMALAMA_HOST = os.getenv(\"RAMALAMA_HOST\", \"http://ai-starter-kit-ramalama:8080\")\n",
+    "\n",
+    "env_vars = {\n",
+    "    'RAMALAMA_HOST': RAMALAMA_HOST,\n",
+    "    'MODEL_NAME': MODEL_NAME,\n",
+    "    'MLFLOW_TRACKING_URI': MLFLOW_URI\n",
+    "}\n",
+    "\n",
+    "print(\"\\nEnvironment variables:\")\n",
+    "for k, v in env_vars.items():\n",
+    "    print(f\"  {k}={v}\")\n",
+    "\n",
+    "print(\"\\nStarting API wrapper...\")\n",
+    "with open(log_file, 'w') as log:\n",
+    "    api_process = subprocess.Popen(\n",
+    "        [\"python\", \"/tmp/ramalama_wrapper.py\"],\n",
+    "        stdout=log,\n",
+    "        stderr=subprocess.STDOUT,\n",
+    "        env={**os.environ, **env_vars},\n",
+    "        start_new_session=True\n",
+    "    )\n",
+    "\n",
+    "print(f\"Process started with PID: {api_process.pid}\")\n",
+    "\n",
+    "time.sleep(2)\n",
+    "if api_process.poll() is not None:\n",
+    "    print(f\"\\nERROR: Process died immediately with exit code {api_process.poll()}\")\n",
+    "    print(\"\\nLog contents:\")\n",
+    "    !cat /tmp/ramalama_wrapper.log\n",
+    "    raise Exception(\"API wrapper failed to start\")\n",
+    "print(\"Process is running\")\n",
+    "\n",
+    "print(\"\\nWaiting for API to respond...\")\n",
     "API_URL = \"http://localhost:8000\"\n",
-    "try:\n",
-    "    r = requests.get(f\"{API_URL}/v1/healthz\", timeout=5)\n",
-    "    print(\"API Status:\", r.json())\n",
-    "    print(f\"\\nOpenAI-compatible API running at: {API_URL}/v1\")\n",
-    "    print(f\"Health: {API_URL}/v1/healthz\")\n",
-    "    print(f\"Chat:   {API_URL}/v1/chat/completions\")\n",
-    "except Exception as e:\n",
-    "    print(f\"Warning: API wrapper not responding: {e}\")\n",
-    "    print(\"You may need to run the wrapper manually\")"
+    "api_ready = False\n",
+    "\n",
+    "for i in range(30):\n",
+    "    time.sleep(1)\n",
+    "    try:\n",
+    "        r = requests.get(f\"{API_URL}/v1/healthz\", timeout=1)\n",
+    "        if r.status_code == 200:\n",
+    "            print(f\"\\nAPI is ready! Response: {r.json()}\")\n",
+    "            print(f\"\\nOpenAI-compatible API running at: {API_URL}/v1\")\n",
+    "            print(f\"Health: {API_URL}/v1/healthz\")\n",
+    "            print(f\"Chat:   {API_URL}/v1/chat/completions\")\n",
+    "            api_ready = True\n",
+    "            break\n",
+    "    except requests.exceptions.ConnectionError:\n",
+    "        if i % 5 == 0:\n",
+    "            print(f\"  Waiting for API... ({i}s)\")\n",
+    "    except Exception as e:\n",
+    "        if i % 10 == 0:\n",
+    "            print(f\"  Unexpected error: {e}\")\n",
+    "\n",
+    "if not api_ready:\n",
+    "    print(\"\\nAPI wrapper failed to start within 30 seconds\")\n",
+    "    print(\"\\nChecking if process is still alive...\")\n",
+    "    if api_process.poll() is not None:\n",
+    "        print(f\"Process died with exit code: {api_process.poll()}\")\n",
+    "    else:\n",
+    "        print(\"Process is still running but not responding\")\n",
+    "    \n",
+    "    print(\"\\nLast 50 lines of logs:\")\n",
+    "    !tail -50 /tmp/ramalama_wrapper.log\n",
+    "    \n",
+    "    print(\"\\nChecking if port 8000 is in use:\")\n",
+    "    !netstat -tlnp 2>/dev/null | grep 8000 || echo \"No process on port 8000\"\n",
+    "    \n",
+    "    print(\"\\nNote: You can re-run this cell - the API might just need more time to start\")"
    ]
   },
   {
diff --git a/ai/ai-starter-kit/helm-chart/ai-starter-kit/files/welcome.ipynb b/ai/ai-starter-kit/helm-chart/ai-starter-kit/files/welcome.ipynb
@@ -5,7 +5,22 @@
    "id": "5af4f666",
    "metadata": {},
    "source": [
-    "Welcome Notebook - Notebook used to verify basic jupyterhub functionality and inference"
+    "### Welcome Notebook - Notebook used to verify basic jupyterhub functionality and inference"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "c85b6901",
+   "metadata": {},
+   "source": [
+    "**Purpose**: This notebook demonstrates semantic similarity search using the Qwen3-Embedding-0.6B model. It shows how to:\n",
+    "\n",
+    "1. Generate embeddings for search queries and documents.\n",
+    "2. Use instructed queries (queries with task descriptions) to improve retrieval quality.\n",
+    "3. Calculate similarity scores between queries and documents.\n",
+    "4. Identify which documents are most relevant to which queries.\n",
+    "\n",
+    "Use Case: Testing embedding model functionality in your JupyterHub environment. The example compares two queries (\"What is the capital of China?\" and \"Explain gravity\") against two documents to find the best matches. High scores (like 0.76) indicate strong semantic similarity."
    ]
   },
   {