Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
40 commits
Select commit Hold shift + click to select a range
f636e2c
Allow using vllm image
dhuangnm Oct 29, 2025
afbf811
fix a typo
dhuangnm Oct 31, 2025
a55e5c8
fix typo again
dhuangnm Oct 31, 2025
ceee681
fix an issue
dhuangnm Oct 31, 2025
bcc7a50
fix an issue
dhuangnm Oct 31, 2025
665cd1e
fix cmd string
dhuangnm Oct 31, 2025
4bf0dc1
fix an issue
dhuangnm Oct 31, 2025
59cea15
add debugging
dhuangnm Oct 31, 2025
be75c8d
don't delete run folder if using image
dhuangnm Nov 3, 2025
586dcc1
allow using pulled image or deployed runner
dhuangnm Nov 5, 2025
c1dde7f
fix a typo
dhuangnm Nov 5, 2025
ae9e526
remove extra )
dhuangnm Nov 5, 2025
80352db
run vllm with podman
dhuangnm Nov 5, 2025
8461d03
fix error
dhuangnm Nov 5, 2025
5704e62
fix issues
dhuangnm Nov 5, 2025
098f561
fix path
dhuangnm Nov 5, 2025
d564408
improve output
dhuangnm Nov 5, 2025
5da7eee
fix typo
dhuangnm Nov 5, 2025
4cb2251
fix format
dhuangnm Nov 5, 2025
d2cb646
fix command
dhuangnm Nov 5, 2025
5cdb543
allow file to execute
dhuangnm Nov 5, 2025
6dc42c4
minor update
dhuangnm Nov 5, 2025
84634e0
copy file
dhuangnm Nov 5, 2025
57c99ac
fix issue
dhuangnm Nov 5, 2025
7cdedbb
run vllm in deployed pod
dhuangnm Nov 7, 2025
3951475
missed ,
dhuangnm Nov 7, 2025
5c401fc
fix command
dhuangnm Nov 7, 2025
870b6ee
remove VLLM_VOLUME_MOUNT_DIR
dhuangnm Nov 11, 2025
d23bdf4
fix missing path
dhuangnm Nov 11, 2025
625c9db
clean up
dhuangnm Nov 11, 2025
264fdcb
final update
dhuangnm Nov 13, 2025
318bd3d
clean up
dhuangnm Nov 13, 2025
117ec9d
fix quality failures
dhuangnm Nov 14, 2025
8b41d5f
reorg test code and remove env var
dhuangnm Nov 24, 2025
1b2530e
fix error
dhuangnm Nov 25, 2025
3d889c6
fix another error
dhuangnm Nov 25, 2025
7e77202
fix style
dhuangnm Nov 25, 2025
7662699
clean up and fix format
dhuangnm Nov 25, 2025
abb6bab
fix format
dhuangnm Nov 25, 2025
de58b02
rename file to be rhaiis specific
dhuangnm Nov 25, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 7 additions & 0 deletions tests/e2e/vLLM/rhaiis-e2e-smoke.list
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
fp4_nvfp4.yaml
fp8_dynamic_per_token.yaml
kv_cache_gptq_tinyllama.yaml
sparse2of4_fp8_dynamic.yaml
w4a16_grouped_quant_asym_awq.yaml
w4a16_actorder_weight.yaml
int8_channel_weight_static_per_tensor_act.yaml
79 changes: 79 additions & 0 deletions tests/e2e/vLLM/run_tests_in_rhaiis.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,79 @@
#!/bin/bash

usage() {
echo "Usage: $0 -c <config> -t <test> -s <save_dir>"
exit 1
}

while getopts "c:t:s:" OPT; do
case ${OPT} in
c )
CONFIG="$OPTARG"
;;
t )
TEST="$OPTARG"
;;
s )
SAVE_DIR="$OPTARG"
;;
\? )
exit 1
;;
esac
done

if [[ -z "$CONFIG" || -z "$TEST" || -z "$SAVE_DIR" ]]; then
echo "Error: -c, -t, and -s are required."
usage
fi

script_path=$(dirname "${BASH_SOURCE[0]}")
if [ -d "$CONFIG" ]; then
echo "Config is provided as a folder: $CONFIG"
CONFIGS=`ls "$CONFIG"`
elif [ -f "$CONFIG" ]; then
echo "Config is provided as a file: $CONFIG"
CONFIGS=`cat "$CONFIG"`
fi

SUCCESS=0

# Parse list of configs and add save_dir
rm -rf $SAVE_DIR/configs
mkdir -p $SAVE_DIR/configs
for MODEL_CONFIG in $(echo -e "$CONFIGS" | sed "s|^|${script_path}/configs/|")
do
FILE_NAME=$(basename $MODEL_CONFIG)
CONFIG_FILE=$SAVE_DIR/configs/$FILE_NAME

save_dir=$(cat $MODEL_CONFIG | grep 'save_dir:' | cut -d' ' -f2)
model=$(cat $MODEL_CONFIG | grep 'model:' | cut -d'/' -f2)
scheme=$(cat $MODEL_CONFIG | grep 'scheme:' | cut -d' ' -f2)

# add or overwrite save_dir for each model
if [[ -z "$save_dir" ]]; then
{ cat $MODEL_CONFIG; echo -e "\nsave_dir: $SAVE_DIR/$model-$scheme"; } > $CONFIG_FILE
else
{ cat $MODEL_CONFIG | grep -v 'save_dir'; echo "save_dir: $SAVE_DIR/$save_dir"; } > $CONFIG_FILE
fi

echo "=== RUNNING MODEL: $CONFIG_FILE ==="
cat $CONFIG_FILE

LOCAL_SUCCESS=0
export TEST_DATA_FILE="$CONFIG_FILE"
pytest \
--capture=tee-sys \
"$TEST" || LOCAL_SUCCESS=$?

if [[ $LOCAL_SUCCESS == 0 ]]; then
echo "=== PASSED MODEL: $CONFIG_FILE ==="
else
echo "=== FAILED MODEL: $CONFIG_FILE ==="
fi

SUCCESS=$((SUCCESS + LOCAL_SUCCESS))

done

exit "$SUCCESS"
95 changes: 79 additions & 16 deletions tests/e2e/vLLM/test_vllm.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,9 @@
SKIP_HF_UPLOAD = os.environ.get("SKIP_HF_UPLOAD", "")
# vllm python environment
VLLM_PYTHON_ENV = os.environ.get("VLLM_PYTHON_ENV", "same")
IS_VLLM_IMAGE = False
if VLLM_PYTHON_ENV.lower() != "same" and (not Path(VLLM_PYTHON_ENV).exists()):
IS_VLLM_IMAGE = True
TIMINGS_DIR = os.environ.get("TIMINGS_DIR", "timings/e2e-test_vllm")
os.environ["VLLM_WORKER_MULTIPROC_METHOD"] = "spawn"
EXPECTED_SAVED_FILES = [
Expand Down Expand Up @@ -96,12 +99,8 @@ def set_up(self, test_data_file: str):
]
self.api = HfApi()

def test_vllm(self, test_data_file: str):
# Run vLLM with saved model

def compress_model(self, test_data_file: str):
self.set_up(test_data_file)
if not self.save_dir:
self.save_dir = self.model.split("/")[1] + f"-{self.scheme}"
oneshot_model, tokenizer = run_oneshot_for_e2e_testing(
model=self.model,
model_class=self.model_class,
Expand All @@ -114,12 +113,17 @@ def test_vllm(self, test_data_file: str):
recipe=self.recipe,
quant_type=self.quant_type,
)
self.oneshot_model = oneshot_model
self.tokenizer = tokenizer

# check that session contains recipe
self._check_session_contains_recipe()

def save_compressed_model(self):
logger.info("================= SAVING TO DISK ======================")
self._save_compressed_model(oneshot_model=oneshot_model, tokenizer=tokenizer)
self._save_compressed_model(
oneshot_model=self.oneshot_model, tokenizer=self.tokenizer
)

recipe_path = os.path.join(self.save_dir, "recipe.yaml")

Expand Down Expand Up @@ -151,7 +155,15 @@ def test_vllm(self, test_data_file: str):
folder_path=self.save_dir,
)

if VLLM_PYTHON_ENV.lower() == "same":
def test_vllm(self, test_data_file: str):
self.compress_model(test_data_file)

self.save_compressed_model()

# Run vLLM with saved model
if IS_VLLM_IMAGE:
logger.info("========== RUNNING vLLM in RHAIIS vllm image ==========")
elif VLLM_PYTHON_ENV.lower() == "same":
logger.info("========== RUNNING vLLM in the same python env ==========")
else:
logger.info("========== RUNNING vLLM in a separate python env ==========")
Expand Down Expand Up @@ -198,17 +210,68 @@ def _run_vllm(self, logger):
json_prompts = json.dumps(self.prompts)

test_file_dir = os.path.dirname(os.path.abspath(__file__))
run_file_path = os.path.join(test_file_dir, "run_vllm.py")

logger.info("Run vllm in subprocess.Popen() using python env:")
logger.info(self.vllm_env)
if IS_VLLM_IMAGE:
# generate python command to run in the vllm image
RUN_SAVE_DIR = os.path.dirname(self.save_dir)
run_file_path = os.path.join(RUN_SAVE_DIR, "run_vllm.py")
shutil.copy(
os.path.join(test_file_dir, "run_vllm.py"),
os.path.join(RUN_SAVE_DIR, "run_vllm.py"),
)
cmds = [
"python",
run_file_path,
f"'{json_scheme}'",
f"'{json_llm_kwargs}'",
f"'{json_prompts}'",
]
vllm_cmd = " ".join(cmds)
vllm_bash = os.path.join(RUN_SAVE_DIR, "run-vllm.bash")
with open(vllm_bash, "w") as cf:
cf.write(
f"""#!/bin/bash
export HF_HUB_OFFLINE=0
export VLLM_NO_USAGE_STATS=1
{vllm_cmd}
"""
)
os.chmod(vllm_bash, 0o755)
logger.info(f"Wrote vllm cmd into {vllm_bash}:")
logger.info("vllm image. Run vllm cmd with kubectl.")
result = subprocess.Popen(
[
"kubectl",
"exec",
"-it",
VLLM_PYTHON_ENV,
"-n",
"arc-runners",
"--",
"/bin/bash",
vllm_bash,
],
stdout=subprocess.PIPE,
stderr=subprocess.PIPE,
text=True,
)
else:
run_file_path = os.path.join(test_file_dir, "run_vllm.py")
logger.info("Run vllm in subprocess.Popen using python env:")
logger.info(self.vllm_env)
result = subprocess.Popen(
[
self.vllm_env,
run_file_path,
json_scheme,
json_llm_kwargs,
json_prompts,
],
stdout=subprocess.PIPE,
stderr=subprocess.PIPE,
text=True,
)

result = subprocess.Popen(
[self.vllm_env, run_file_path, json_scheme, json_llm_kwargs, json_prompts],
stdout=subprocess.PIPE,
stderr=subprocess.PIPE,
text=True,
)
stdout, stderr = result.communicate()
logger.info(stdout)

Expand Down