|
59 | 59 | "outputs": [], |
60 | 60 | "source": [ |
61 | 61 | "import requests, os, json\n", |
62 | | - "\n", |
63 | 62 | "RAMALAMA_HOST = os.environ.get('RAMALAMA_HOST', 'http://ai-starter-kit-ramalama:8080')\n", |
64 | 63 | "MODEL_NAME = \"qwen2.5:1.5b\"\n", |
65 | 64 | "\n", |
|
73 | 72 | "if check_ramalama():\n", |
74 | 73 | " print(\"RamaLama service is running\")\n", |
75 | 74 | " \n", |
| 75 | + " print(f\"\\nChecking model {MODEL_NAME}...\")\n", |
76 | 76 | " try:\n", |
77 | 77 | " r = requests.get(f\"{RAMALAMA_HOST}/v1/models\")\n", |
78 | 78 | " models = r.json().get('data', [])\n", |
79 | | - " model_exists = any(m.get('id') == MODEL_NAME for m in models) \n", |
80 | | - " if model_exists:\n", |
81 | | - " print(f\"Model {MODEL_NAME} already available\")\n", |
| 79 | + " model_exists = any(m.get('id') == MODEL_NAME for m in models)\n", |
| 80 | + " \n", |
| 81 | + " if not model_exists:\n", |
| 82 | + " print(f\"Pulling model {MODEL_NAME}...\")\n", |
| 83 | + " \n", |
| 84 | + " try:\n", |
| 85 | + " test_body = {\n", |
| 86 | + " \"model\": MODEL_NAME,\n", |
| 87 | + " \"messages\": [{\"role\": \"user\", \"content\": \"test\"}],\n", |
| 88 | + " \"max_tokens\": 1\n", |
| 89 | + " }\n", |
| 90 | + " r = requests.post(f\"{RAMALAMA_HOST}/v1/chat/completions\", json=test_body, timeout=300)\n", |
| 91 | + " \n", |
| 92 | + " if r.status_code == 200:\n", |
| 93 | + " print(f\"Model {MODEL_NAME} loaded successfully\")\n", |
| 94 | + " else:\n", |
| 95 | + " print(f\"Failed to load model. Status: {r.status_code}\")\n", |
| 96 | + " print(\"You may need to pull the model manually in the RamaLama deployment\")\n", |
| 97 | + " \n", |
| 98 | + " except requests.exceptions.Timeout:\n", |
| 99 | + " print(\"Model pull timed out. Large models may take longer.\")\n", |
| 100 | + " print(\"Check RamaLama logs to monitor progress\")\n", |
| 101 | + " except Exception as e:\n", |
| 102 | + " print(f\"Error pulling model: {e}\")\n", |
| 103 | + " print(\"You may need to pull the model manually in the RamaLama deployment\")\n", |
82 | 104 | " else:\n", |
83 | | - " print(f\"Model {MODEL_NAME} not found; ensure it's pulled in the deployment\")\n", |
| 105 | + " print(f\"Model {MODEL_NAME} already available\")\n", |
| 106 | + " \n", |
84 | 107 | " except Exception as e:\n", |
85 | 108 | " print(f\"Error checking model: {e}\")\n", |
86 | 109 | "else:\n", |
|
107 | 130 | "source": [ |
108 | 131 | "!pip -q install fastapi uvicorn mlflow --disable-pip-version-check\n", |
109 | 132 | "\n", |
110 | | - "import os, threading, time, json\n", |
| 133 | + "import os, subprocess, time, json, requests\n", |
111 | 134 | "from pathlib import Path\n", |
112 | 135 | "\n", |
113 | 136 | "api_wrapper_code = '''\n", |
|
128 | 151 | " pass\n", |
129 | 152 | "\n", |
130 | 153 | "app = FastAPI()\n", |
131 | | - "RAMALAMA_HOST = os.getenv(\"RAMALAMA_HOST\", \"http://127.0.0.1:8080\")\n", |
| 154 | + "RAMALAMA_HOST = os.getenv(\"RAMALAMA_HOST\", \"http://ai-starter-kit-ramalama:8080\")\n", |
132 | 155 | "MODEL_NAME = os.getenv(\"MODEL_NAME\", \"qwen2.5:1.5b\")\n", |
133 | 156 | "\n", |
134 | 157 | "@app.get(\"/v1/healthz\")\n", |
|
206 | 229 | "with open('/tmp/ramalama_wrapper.py', 'w') as f:\n", |
207 | 230 | " f.write(api_wrapper_code)\n", |
208 | 231 | "\n", |
209 | | - "def run_api():\n", |
210 | | - " subprocess.run([\"python\", \"/tmp/ramalama_wrapper.py\"], capture_output=True)\n", |
211 | | - "\n", |
212 | | - "import subprocess\n", |
213 | | - "api_process = subprocess.Popen(\n", |
214 | | - " [\"python\", \"/tmp/ramalama_wrapper.py\"],\n", |
215 | | - " env={**os.environ, \n", |
216 | | - " \"RAMALAMA_HOST\": os.getenv(\"RAMALAMA_HOST\", \"http://127.0.0.1:8080\"),\n", |
217 | | - " \"MODEL_NAME\": MODEL_NAME,\n", |
218 | | - " \"MLFLOW_TRACKING_URI\": MLFLOW_URI},\n", |
219 | | - " stdout=subprocess.DEVNULL,\n", |
220 | | - " stderr=subprocess.DEVNULL\n", |
221 | | - ")\n", |
| 232 | + "print(\"Wrapper script created\")\n", |
| 233 | + "\n", |
| 234 | + "print(\"Killing existing wrapper processes...\")\n", |
| 235 | + "!pkill -f ramalama_wrapper.py 2>/dev/null || true\n", |
| 236 | + "time.sleep(2)\n", |
222 | 237 | "\n", |
223 | | - "time.sleep(3)\n", |
| 238 | + "log_file = '/tmp/ramalama_wrapper.log'\n", |
| 239 | + "!touch /tmp/ramalama_wrapper.log\n", |
| 240 | + "!chmod 666 /tmp/ramalama_wrapper.log\n", |
| 241 | + "print(\"Log file ready\")\n", |
224 | 242 | "\n", |
| 243 | + "MODEL_NAME = \"qwen2.5:1.5b\"\n", |
| 244 | + "MLFLOW_URI = os.getenv(\"MLFLOW_TRACKING_URI\", \"http://ai-starter-kit-mlflow:5000\")\n", |
| 245 | + "RAMALAMA_HOST = os.getenv(\"RAMALAMA_HOST\", \"http://ai-starter-kit-ramalama:8080\")\n", |
| 246 | + "\n", |
| 247 | + "env_vars = {\n", |
| 248 | + " 'RAMALAMA_HOST': RAMALAMA_HOST,\n", |
| 249 | + " 'MODEL_NAME': MODEL_NAME,\n", |
| 250 | + " 'MLFLOW_TRACKING_URI': MLFLOW_URI\n", |
| 251 | + "}\n", |
| 252 | + "\n", |
| 253 | + "print(\"\\nEnvironment variables:\")\n", |
| 254 | + "for k, v in env_vars.items():\n", |
| 255 | + " print(f\" {k}={v}\")\n", |
| 256 | + "\n", |
| 257 | + "print(\"\\nStarting API wrapper...\")\n", |
| 258 | + "with open(log_file, 'w') as log:\n", |
| 259 | + " api_process = subprocess.Popen(\n", |
| 260 | + " [\"python\", \"/tmp/ramalama_wrapper.py\"],\n", |
| 261 | + " stdout=log,\n", |
| 262 | + " stderr=subprocess.STDOUT,\n", |
| 263 | + " env={**os.environ, **env_vars},\n", |
| 264 | + " start_new_session=True\n", |
| 265 | + " )\n", |
| 266 | + "\n", |
| 267 | + "print(f\"Process started with PID: {api_process.pid}\")\n", |
| 268 | + "\n", |
| 269 | + "time.sleep(2)\n", |
| 270 | + "if api_process.poll() is not None:\n", |
| 271 | + " print(f\"\\nERROR: Process died immediately with exit code {api_process.poll()}\")\n", |
| 272 | + " print(\"\\nLog contents:\")\n", |
| 273 | + " !cat /tmp/ramalama_wrapper.log\n", |
| 274 | + " raise Exception(\"API wrapper failed to start\")\n", |
| 275 | + "print(\"Process is running\")\n", |
| 276 | + "\n", |
| 277 | + "print(\"\\nWaiting for API to respond...\")\n", |
225 | 278 | "API_URL = \"http://localhost:8000\"\n", |
226 | | - "try:\n", |
227 | | - " r = requests.get(f\"{API_URL}/v1/healthz\", timeout=5)\n", |
228 | | - " print(\"API Status:\", r.json())\n", |
229 | | - " print(f\"\\nOpenAI-compatible API running at: {API_URL}/v1\")\n", |
230 | | - " print(f\"Health: {API_URL}/v1/healthz\")\n", |
231 | | - " print(f\"Chat: {API_URL}/v1/chat/completions\")\n", |
232 | | - "except Exception as e:\n", |
233 | | - " print(f\"Warning: API wrapper not responding: {e}\")\n", |
234 | | - " print(\"You may need to run the wrapper manually\")" |
| 279 | + "api_ready = False\n", |
| 280 | + "\n", |
| 281 | + "for i in range(30):\n", |
| 282 | + " time.sleep(1)\n", |
| 283 | + " try:\n", |
| 284 | + " r = requests.get(f\"{API_URL}/v1/healthz\", timeout=1)\n", |
| 285 | + " if r.status_code == 200:\n", |
| 286 | + " print(f\"\\nAPI is ready! Response: {r.json()}\")\n", |
| 287 | + " print(f\"\\nOpenAI-compatible API running at: {API_URL}/v1\")\n", |
| 288 | + " print(f\"Health: {API_URL}/v1/healthz\")\n", |
| 289 | + " print(f\"Chat: {API_URL}/v1/chat/completions\")\n", |
| 290 | + " api_ready = True\n", |
| 291 | + " break\n", |
| 292 | + " except requests.exceptions.ConnectionError:\n", |
| 293 | + " if i % 5 == 0:\n", |
| 294 | + " print(f\" Waiting for API... ({i}s)\")\n", |
| 295 | + " except Exception as e:\n", |
| 296 | + " if i % 10 == 0:\n", |
| 297 | + " print(f\" Unexpected error: {e}\")\n", |
| 298 | + "\n", |
| 299 | + "if not api_ready:\n", |
| 300 | + " print(\"\\nAPI wrapper failed to start within 30 seconds\")\n", |
| 301 | + " print(\"\\nChecking if process is still alive...\")\n", |
| 302 | + " if api_process.poll() is not None:\n", |
| 303 | + " print(f\"Process died with exit code: {api_process.poll()}\")\n", |
| 304 | + " else:\n", |
| 305 | + " print(\"Process is still running but not responding\")\n", |
| 306 | + " \n", |
| 307 | + " print(\"\\nLast 50 lines of logs:\")\n", |
| 308 | + " !tail -50 /tmp/ramalama_wrapper.log\n", |
| 309 | + " \n", |
| 310 | + " print(\"\\nChecking if port 8000 is in use:\")\n", |
| 311 | + " !netstat -tlnp 2>/dev/null | grep 8000 || echo \"No process on port 8000\"\n", |
| 312 | + " \n", |
| 313 | + " print(\"\\nNote: You can re-run this cell - the API might just need more time to start\")" |
235 | 314 | ] |
236 | 315 | }, |
237 | 316 | { |
|
0 commit comments