vllm-project · dhuangnm · Oct 29, 2025 · Oct 31, 2025 · Oct 31, 2025 · Oct 31, 2025
diff --git a/tests/e2e/vLLM/rhaiis-e2e-smoke.list b/tests/e2e/vLLM/rhaiis-e2e-smoke.list
@@ -0,0 +1,7 @@
+fp4_nvfp4.yaml
+fp8_dynamic_per_token.yaml
+kv_cache_gptq_tinyllama.yaml
+sparse2of4_fp8_dynamic.yaml
+w4a16_grouped_quant_asym_awq.yaml
+w4a16_actorder_weight.yaml
+int8_channel_weight_static_per_tensor_act.yaml
diff --git a/tests/e2e/vLLM/run_tests_in_rhaiis.sh b/tests/e2e/vLLM/run_tests_in_rhaiis.sh
@@ -0,0 +1,79 @@
+#!/bin/bash
+
+usage() {
+  echo "Usage: $0 -c <config> -t <test> -s <save_dir>"
+  exit 1
+}
+
+while getopts "c:t:s:" OPT; do
+  case ${OPT} in
+    c )
+        CONFIG="$OPTARG"
+        ;;
+    t )
+        TEST="$OPTARG"
+        ;;
+    s )
+        SAVE_DIR="$OPTARG"
+        ;;
+    \? )
+        exit 1
+        ;;
+  esac
+done
+
+if [[ -z "$CONFIG" || -z "$TEST" || -z "$SAVE_DIR" ]]; then
+  echo "Error: -c, -t, and -s are required."
+  usage
+fi
+
+script_path=$(dirname "${BASH_SOURCE[0]}")
+if [ -d "$CONFIG" ]; then
+    echo "Config is provided as a folder: $CONFIG"
+    CONFIGS=`ls "$CONFIG"`
+elif [ -f "$CONFIG" ]; then
+    echo "Config is provided as a file: $CONFIG"
+    CONFIGS=`cat "$CONFIG"`
+fi
+
+SUCCESS=0
+
+# Parse list of configs and add save_dir
+rm -rf $SAVE_DIR/configs
+mkdir -p $SAVE_DIR/configs
+for MODEL_CONFIG in $(echo -e "$CONFIGS" | sed "s|^|${script_path}/configs/|")
+do
+    FILE_NAME=$(basename $MODEL_CONFIG)
+    CONFIG_FILE=$SAVE_DIR/configs/$FILE_NAME
+
+    save_dir=$(cat $MODEL_CONFIG | grep 'save_dir:' | cut -d' ' -f2)
+    model=$(cat $MODEL_CONFIG | grep 'model:' | cut -d'/' -f2)
+    scheme=$(cat $MODEL_CONFIG | grep 'scheme:' | cut -d' ' -f2)
+
+    # add or overwrite save_dir for each model
+    if [[ -z "$save_dir" ]]; then
+      { cat $MODEL_CONFIG; echo -e "\nsave_dir: $SAVE_DIR/$model-$scheme"; } > $CONFIG_FILE
+    else
+      { cat $MODEL_CONFIG | grep -v 'save_dir'; echo "save_dir: $SAVE_DIR/$save_dir"; } > $CONFIG_FILE
+    fi
+
+    echo "=== RUNNING MODEL: $CONFIG_FILE ==="
+    cat $CONFIG_FILE
+
+    LOCAL_SUCCESS=0
+    export TEST_DATA_FILE="$CONFIG_FILE"
+    pytest \
+        --capture=tee-sys \
+        "$TEST" || LOCAL_SUCCESS=$?
+
+    if [[ $LOCAL_SUCCESS == 0 ]]; then
+        echo "=== PASSED MODEL: $CONFIG_FILE ==="
+    else
+        echo "=== FAILED MODEL: $CONFIG_FILE ==="
+    fi
+
+    SUCCESS=$((SUCCESS + LOCAL_SUCCESS))
+
+done
+
+exit "$SUCCESS"
diff --git a/tests/e2e/vLLM/test_vllm.py b/tests/e2e/vLLM/test_vllm.py
@@ -23,6 +23,9 @@
 SKIP_HF_UPLOAD = os.environ.get("SKIP_HF_UPLOAD", "")
 # vllm python environment
 VLLM_PYTHON_ENV = os.environ.get("VLLM_PYTHON_ENV", "same")
+IS_VLLM_IMAGE = False
+if VLLM_PYTHON_ENV.lower() != "same" and (not Path(VLLM_PYTHON_ENV).exists()):
+    IS_VLLM_IMAGE = True
 TIMINGS_DIR = os.environ.get("TIMINGS_DIR", "timings/e2e-test_vllm")
 os.environ["VLLM_WORKER_MULTIPROC_METHOD"] = "spawn"
 EXPECTED_SAVED_FILES = [
@@ -96,12 +99,8 @@ def set_up(self, test_data_file: str):
         ]
         self.api = HfApi()
 
-    def test_vllm(self, test_data_file: str):
-        # Run vLLM with saved model
-
+    def compress_model(self, test_data_file: str):
         self.set_up(test_data_file)
-        if not self.save_dir:
-            self.save_dir = self.model.split("/")[1] + f"-{self.scheme}"
         oneshot_model, tokenizer = run_oneshot_for_e2e_testing(
             model=self.model,
             model_class=self.model_class,
@@ -114,12 +113,17 @@ def test_vllm(self, test_data_file: str):
             recipe=self.recipe,
             quant_type=self.quant_type,
         )
+        self.oneshot_model = oneshot_model
+        self.tokenizer = tokenizer
 
         # check that session contains recipe
         self._check_session_contains_recipe()
 
+    def save_compressed_model(self):
         logger.info("================= SAVING TO DISK ======================")
-        self._save_compressed_model(oneshot_model=oneshot_model, tokenizer=tokenizer)
+        self._save_compressed_model(
+            oneshot_model=self.oneshot_model, tokenizer=self.tokenizer
+        )
 
         recipe_path = os.path.join(self.save_dir, "recipe.yaml")
 
@@ -151,7 +155,15 @@ def test_vllm(self, test_data_file: str):
                 folder_path=self.save_dir,
             )
 
-        if VLLM_PYTHON_ENV.lower() == "same":
+    def test_vllm(self, test_data_file: str):
+        self.compress_model(test_data_file)
+
+        self.save_compressed_model()
+
+        # Run vLLM with saved model
+        if IS_VLLM_IMAGE:
+            logger.info("========== RUNNING vLLM in RHAIIS vllm image ==========")
+        elif VLLM_PYTHON_ENV.lower() == "same":
             logger.info("========== RUNNING vLLM in the same python env ==========")
         else:
             logger.info("========== RUNNING vLLM in a separate python env ==========")
@@ -198,17 +210,68 @@ def _run_vllm(self, logger):
         json_prompts = json.dumps(self.prompts)
 
         test_file_dir = os.path.dirname(os.path.abspath(__file__))
-        run_file_path = os.path.join(test_file_dir, "run_vllm.py")
 
-        logger.info("Run vllm in subprocess.Popen() using python env:")
-        logger.info(self.vllm_env)
+        if IS_VLLM_IMAGE:
+            # generate python command to run in the vllm image
+            RUN_SAVE_DIR = os.path.dirname(self.save_dir)
+            run_file_path = os.path.join(RUN_SAVE_DIR, "run_vllm.py")
+            shutil.copy(
+                os.path.join(test_file_dir, "run_vllm.py"),
+                os.path.join(RUN_SAVE_DIR, "run_vllm.py"),
+            )
+            cmds = [
+                "python",
+                run_file_path,
+                f"'{json_scheme}'",
+                f"'{json_llm_kwargs}'",
+                f"'{json_prompts}'",
+            ]
+            vllm_cmd = " ".join(cmds)
+            vllm_bash = os.path.join(RUN_SAVE_DIR, "run-vllm.bash")
+            with open(vllm_bash, "w") as cf:
+                cf.write(
+                    f"""#!/bin/bash
+                    export HF_HUB_OFFLINE=0
+                    export VLLM_NO_USAGE_STATS=1
+                    {vllm_cmd}
+                    """
+                )
+            os.chmod(vllm_bash, 0o755)
+            logger.info(f"Wrote vllm cmd into {vllm_bash}:")
+            logger.info("vllm image. Run vllm cmd with kubectl.")
+            result = subprocess.Popen(
+                [
+                    "kubectl",
+                    "exec",
+                    "-it",
+                    VLLM_PYTHON_ENV,
+                    "-n",
+                    "arc-runners",
+                    "--",
+                    "/bin/bash",
+                    vllm_bash,
+                ],
+                stdout=subprocess.PIPE,
+                stderr=subprocess.PIPE,
+                text=True,
+            )
+        else:
+            run_file_path = os.path.join(test_file_dir, "run_vllm.py")
+            logger.info("Run vllm in subprocess.Popen using python env:")
+            logger.info(self.vllm_env)
+            result = subprocess.Popen(
+                [
+                    self.vllm_env,
+                    run_file_path,
+                    json_scheme,
+                    json_llm_kwargs,
+                    json_prompts,
+                ],
+                stdout=subprocess.PIPE,
+                stderr=subprocess.PIPE,
+                text=True,
+            )
 
-        result = subprocess.Popen(
-            [self.vllm_env, run_file_path, json_scheme, json_llm_kwargs, json_prompts],
-            stdout=subprocess.PIPE,
-            stderr=subprocess.PIPE,
-            text=True,
-        )
         stdout, stderr = result.communicate()
         logger.info(stdout)