6161 PYTORCH_CUDA_ALLOC_CONF : ' expandable_segments:True'
6262 MAX_JOBS : 8
6363 RUNNER : 10.0.13.31
64+ XEON5 : 10.0.14.248
6465 LEGACY_TESTS : " models/test_internlm.py,models/test_internlm2_5.py,models/test_xverse.py"
6566 IGNORED_TEST_FILES : " test_tgi.py,test_gptneox.py,models/test_mixtral.py,models/test_phi_3_moe.py,test_bits_new.py"
6667 GPTQMODEL_FORCE_BUILD : 1
8586 echo "ref: ${{ env.ref }}"
8687 echo "artifact_id: ${{ github.event.inputs.artifact_id }}"
8788 echo "test_names: ${{ github.event.inputs.test_names }}"
89+ echo "exclusive-gpu: ${{ github.event.inputs.exclusive-gpu }}"
8890 echo "selected server: ${{ github.event.inputs.server }}"
8991
9092 - name : Select server
@@ -247,7 +249,7 @@ jobs:
247249
248250 - name : Upload source to local
249251 continue-on-error : true
250- run : curl -s -F "runid=${{ github.run_id }}" -F "repo=${{ env.repo }}" -F "ref=${{ env.ref }}" -F "sha256=${{ env.SOURCE_HASH }}" -F "file=@dist/gptqmodel_source.tar.gz" http://${{ needs.check-vm.outputs.ip }} /gpu/whl/upload
252+ run : curl -s -F "runid=${{ github.run_id }}" -F "repo=${{ env.repo }}" -F "ref=${{ env.ref }}" -F "sha256=${{ env.SOURCE_HASH }}" -F "file=@dist/gptqmodel_source.tar.gz" http://$RUNNER /gpu/whl/upload
251253
252254 - name : Upload source to github artifact
253255 uses : actions/upload-artifact@v4
@@ -277,7 +279,7 @@ jobs:
277279 - name : Upload wheel to local
278280 if : github.event.inputs.artifact_id == '' && !cancelled()
279281 continue-on-error : true
280- run : curl -s -F "runid=${{ github.run_id }}" -F "repo=${{ env.repo }}" -F "ref=${{ env.ref }}" -F "sha256=${{ env.WHL_HASH }}" -F "file=@dist/${{ env.WHL_NAME }}" http://${{ needs.check-vm.outputs.ip }} /gpu/whl/upload
282+ run : curl -s -F "runid=${{ github.run_id }}" -F "repo=${{ env.repo }}" -F "ref=${{ env.ref }}" -F "sha256=${{ env.WHL_HASH }}" -F "file=@dist/${{ env.WHL_NAME }}" http://$RUNNER /gpu/whl/upload
281283
282284 - name : Upload wheel to github artifact
283285 if : github.event.inputs.artifact_id == '' && !cancelled()
@@ -339,7 +341,7 @@ jobs:
339341 - name : Download source from local
340342 continue-on-error : true
341343 run : |
342- curl -s -O http://${{ needs.check-vm.outputs.ip }} /whl/${{ env.repo }}/${{ github.run_id }}/gptqmodel_source.tar.gz
344+ curl -s -O http://$RUNNER /whl/${{ env.repo }}/${{ github.run_id }}/gptqmodel_source.tar.gz
343345 ls -ahl .
344346 sha256=$(sha256sum $file_name)
345347 echo "sha256=$sha256"
@@ -363,14 +365,14 @@ jobs:
363365 - name : Download wheel from local
364366 continue-on-error : true
365367 run : |
366- file_name=$(curl -s -F "runid=${{ needs.check-vm.outputs.run_id }}" -F "repo=${{ env.repo }}" -F "ref=${{ env.ref }}" -F "fuzz=1" "http://${{ needs.check-vm.outputs.ip }} /gpu/whl/download")
368+ file_name=$(curl -s -F "runid=${{ needs.check-vm.outputs.run_id }}" -F "repo=${{ env.repo }}" -F "ref=${{ env.ref }}" -F "fuzz=1" "http://$RUNNER /gpu/whl/download")
367369
368370 echo "file_name=$file_name"
369371
370372 if echo "$file_name" | grep -q "gptqmodel"; then
371373 mkdir dist || true
372374 cd dist
373- curl -s -O http://${{ needs.check-vm.outputs.ip }} /whl/${{ env.repo }}/${{ needs.check-vm.outputs.run_id }}/$file_name
375+ curl -s -O http://$RUNNER /whl/${{ env.repo }}/${{ needs.check-vm.outputs.run_id }}/$file_name
374376 ls -ahl .
375377 sha256=$(sha256sum $file_name)
376378 echo "sha256=$sha256"
@@ -390,19 +392,19 @@ jobs:
390392 uv pip install tokenicer==0.0.4 -U
391393 uv pip install logbar==0.0.3 -U
392394 echo "===== install optimum bitblas parameterized uvicorn ====="
393- uv pip install optimum bitblas==0.0.1.dev13 parameterized uvicorn -i http://${{ needs.check-vm.outputs.ip }} /simple/ --trusted-host ${{ needs.check-vm.outputs.ip }} --extra-index-url https://pypi.org/simple
395+ uv pip install optimum bitblas==0.0.1.dev13 parameterized uvicorn -i http://$RUNNER /simple/ --trusted-host $RUNNER --extra-index-url https://pypi.org/simple
394396 echo "===== install dist/whl ====="
395- uv pip install dist/*.whl -i http://${{ needs.check-vm.outputs.ip }} /simple/ --trusted-host ${{ needs.check-vm.outputs.ip }} --extra-index-url https://pypi.org/simple
397+ uv pip install dist/*.whl -i http://$RUNNER /simple/ --trusted-host $RUNNER --extra-index-url https://pypi.org/simple
396398 echo "===== init test env ====="
397399 echo "===== install transformers==4.38.2 typing-extensions numpy==1.26.4 peft==0.13.2 ====="
398- uv pip install transformers==4.38.2 typing-extensions numpy==1.26.4 peft==0.13.2 -U -i http://${{ needs.check-vm.outputs.ip }} /simple/ --trusted-host ${{ needs.check-vm.outputs.ip }} --extra-index-url https://pypi.org/simple
400+ uv pip install transformers==4.38.2 typing-extensions numpy==1.26.4 peft==0.13.2 -U -i http://$RUNNER /simple/ --trusted-host $RUNNER --extra-index-url https://pypi.org/simple
399401 if [ "${{ matrix.test_script }}" == "models/test_xverse" ]; then
400402 echo "===== install tokenizers==0.15.2 ====="
401- uv pip install tokenizers==0.15.2 -i http://${{ needs.check-vm.outputs.ip }} /simple/ --trusted-host ${{ needs.check-vm.outputs.ip }} --extra-index-url https://pypi.org/simple
403+ uv pip install tokenizers==0.15.2 -i http://$RUNNER /simple/ --trusted-host $RUNNER --extra-index-url https://pypi.org/simple
402404 fi
403405 if [ "${{ matrix.test_script }}" == "test_quant_formats" ] || [ "${{ matrix.test_script }}" == "test_perplexity" ]; then
404406 echo "===== install auto_round ====="
405- uv pip install auto_round -i http://${{ needs.check-vm.outputs.ip }} /simple/ --trusted-host ${{ needs.check-vm.outputs.ip }} --extra-index-url https://pypi.org/simple
407+ uv pip install auto_round -i http://$RUNNER /simple/ --trusted-host $RUNNER --extra-index-url https://pypi.org/simple
406408 fi
407409
408410 - name : Find suitable GPU
@@ -412,12 +414,12 @@ jobs:
412414 gpu_id=-1
413415
414416 while [ "$gpu_id" -lt 0 ]; do
415- gpu_id=$(curl -s "http://${{ needs.check-vm.outputs.ip }} /gpu/get?id=${{ github.run_id }}×tamp=$timestamp&test=${{ matrix.test_script }}&runner=${RUNNER_NAME}&exclusive=${{ github.event.inputs.exclusive-gpu }}")
417+ gpu_id=$(curl -s "http://$RUNNER /gpu/get?id=${{ github.run_id }}×tamp=$timestamp&test=${{ matrix.test_script }}&runner=${RUNNER_NAME}&exclusive=${{ github.event.inputs.exclusive-gpu }}")
416418
417419 if [ "$gpu_id" -lt 0 ]; then
418- echo "http://${{ needs.check-vm.outputs.ip }} /gpu/get?id=${{ github.run_id }}×tamp=$timestamp&test=${{ matrix.test_script }}&runner=${RUNNER_NAME}&exclusive=${{ github.event.inputs.exclusive-gpu }} returned $gpu_id"
420+ echo "http://$RUNNER /gpu/get?id=${{ github.run_id }}×tamp=$timestamp&test=${{ matrix.test_script }}&runner=${RUNNER_NAME}&exclusive=${{ github.event.inputs.exclusive-gpu }} returned $gpu_id"
419421 echo "No available GPU, waiting 5 seconds..."
420- curl http://10.0.14.248 /gpu/status2
422+ curl http://$XEON5 /gpu/status2
421423 sleep 5
422424 else
423425 echo "Allocated GPU ID: $gpu_id"
@@ -429,7 +431,7 @@ jobs:
429431 echo "CUDA_VISIBLE_DEVICES=$gpu_id" >> $GITHUB_ENV
430432 echo "STEP_TIMESTAMP=$timestamp" >> $GITHUB_ENV
431433 echo "CUDA_VISIBLE_DEVICES set to $gpu_id, timestamp=$timestamp"
432- curl http://10.0.14.248 /gpu/status2
434+ curl http://$XEON5 /gpu/status2
433435
434436 - name : Run tests
435437 if : ${{ (!github.event.inputs.test_names || contains(github.event.inputs.test_names, matrix.test_script)) && !cancelled() }}
@@ -438,11 +440,11 @@ jobs:
438440 pytest --durations=0 tests/${{ matrix.test_script }}.py || { echo "ERROR=1" >> $GITHUB_ENV; exit 1; }
439441 execution_time=$(( $(date +%s) - start_time ))
440442 echo "$((execution_time / 60))m $((execution_time % 60))s"
441- curl "http://${{ needs.check-vm.outputs.ip }} /gpu/log_test_vram?id=${{ github.run_id }}&gpu=${{ env.CUDA_VISIBLE_DEVICES }}&range=$execution_time&unit=second&name=${{ matrix.test_script }}"
443+ curl "http://$RUNNER /gpu/log_test_vram?id=${{ github.run_id }}&gpu=${{ env.CUDA_VISIBLE_DEVICES }}&range=$execution_time&unit=second&name=${{ matrix.test_script }}"
442444
443445 - name : Release GPU
444446 if : always() && !contains(matrix.test_script, 'ipex')
445- run : curl -X GET "http://${{ needs.check-vm.outputs.ip }} /gpu/release?id=${{ github.run_id }}&gpu=${{ env.CUDA_VISIBLE_DEVICES }}×tamp=${{ env.STEP_TIMESTAMP }}&test=${{ matrix.test_script }}&runner=${RUNNER_NAME}"
447+ run : curl -X GET "http://$RUNNER /gpu/release?id=${{ github.run_id }}&gpu=${{ env.CUDA_VISIBLE_DEVICES }}×tamp=${{ env.STEP_TIMESTAMP }}&test=${{ matrix.test_script }}&runner=${RUNNER_NAME}"
446448
447449 - name : Clean cache
448450 if : always()
@@ -500,7 +502,7 @@ jobs:
500502 - name : Download source from local
501503 continue-on-error : true
502504 run : |
503- curl -s -O http://${{ needs.check-vm.outputs.ip }} /whl/${{ env.repo }}/${{ github.run_id }}/gptqmodel_source.tar.gz
505+ curl -s -O http://$RUNNER /whl/${{ env.repo }}/${{ github.run_id }}/gptqmodel_source.tar.gz
504506 ls -ahl .
505507 sha256=$(sha256sum $file_name)
506508 echo "sha256=$sha256"
@@ -524,14 +526,14 @@ jobs:
524526 - name : Download wheel from local
525527 continue-on-error : true
526528 run : |
527- file_name=$(curl -s -F "runid=${{ needs.check-vm.outputs.run_id }}" -F "repo=${{ env.repo }}" -F "ref=${{ env.ref }}" -F "fuzz=1" "http://${{ needs.check-vm.outputs.ip }} /gpu/whl/download")
529+ file_name=$(curl -s -F "runid=${{ needs.check-vm.outputs.run_id }}" -F "repo=${{ env.repo }}" -F "ref=${{ env.ref }}" -F "fuzz=1" "http://$RUNNER /gpu/whl/download")
528530
529531 echo "file_name=$file_name"
530532
531533 if echo "$file_name" | grep -q "gptqmodel"; then
532534 mkdir dist || true
533535 cd dist
534- curl -s -O http://${{ needs.check-vm.outputs.ip }} /whl/${{ env.repo }}/${{ needs.check-vm.outputs.run_id }}/$file_name
536+ curl -s -O http://$RUNNER /whl/${{ env.repo }}/${{ needs.check-vm.outputs.run_id }}/$file_name
535537 ls -ahl .
536538 sha256=$(sha256sum $file_name)
537539 echo "sha256=$sha256"
@@ -554,7 +556,7 @@ jobs:
554556 [ "${{ matrix.test_script }}" == "test_q4_bitblas" ] || \
555557 [ "${{ matrix.test_script }}" == "test_save_loaded_quantized_model" ]; then
556558 echo "===== install bitblas==0.0.1.dev13 ====="
557- uv pip install bitblas==0.0.1.dev13 -i http://${{ needs.check-vm.outputs.ip }} /simple/ --trusted-host ${{ needs.check-vm.outputs.ip }} --extra-index-url https://pypi.org/simple
559+ uv pip install bitblas==0.0.1.dev13 -i http://$RUNNER /simple/ --trusted-host $RUNNER --extra-index-url https://pypi.org/simple
558560 fi
559561
560562 if [[ "${{ matrix.test_script }}" == *auto_round* ]]; then
@@ -563,7 +565,7 @@ jobs:
563565
564566 if [ "${{ matrix.test_script }}" == "models/test_cohere2" ] || [ "${{ matrix.test_script }}" == "models/test_gemma" ]; then
565567 echo "===== install transformers from git ====="
566- uv pip install -U transformers -i http://${{ needs.check-vm.outputs.ip }} /simple/ --trusted-host ${{ needs.check-vm.outputs.ip }} --extra-index-url https://pypi.org/simple
568+ uv pip install -U transformers -i http://$RUNNER /simple/ --trusted-host $RUNNER --extra-index-url https://pypi.org/simple
567569 fi
568570
569571 if [[ "${{ matrix.test_script }}" == *xpu* ]]; then
@@ -575,24 +577,24 @@ jobs:
575577 if [[ "${{ matrix.test_script }}" == *ipex* ]] && [[ "${{ matrix.test_script }}" != *xpu* ]]; then
576578 uv pip uninstall torchvision torch flash_attn # fix ipex can't be used with torch+cu126
577579 uv pip install torchvision torch
578- uv pip install -U intel_extension_for_pytorch -i http://${{ needs.check-vm.outputs.ip }} /simple/ --trusted-host ${{ needs.check-vm.outputs.ip }} --extra-index-url https://pypi.org/simple
580+ uv pip install -U intel_extension_for_pytorch -i http://$RUNNER /simple/ --trusted-host $RUNNER --extra-index-url https://pypi.org/simple
579581 fi
580582
581583 if [[ "${{ matrix.test_script }}" == *"mlx"* ]]; then
582- uv pip install mlx_lm --no-build-isolation -i http://${{ needs.check-vm.outputs.ip }} /simple/ --trusted-host ${{ needs.check-vm.outputs.ip }} --extra-index-url https://pypi.org/simple
584+ uv pip install mlx_lm --no-build-isolation -i http://$RUNNER /simple/ --trusted-host $RUNNER --extra-index-url https://pypi.org/simple
583585 fi
584586
585587 if [[ "${{ matrix.test_script }}" == "test_modelscope" ]]; then
586588 echo "===== installing modelscope ====="
587- uv pip install modelscope --no-build-isolation -i http://${{ needs.check-vm.outputs.ip }} /simple/ --trusted-host ${{ needs.check-vm.outputs.ip }} --extra-index-url https://pypi.org/simple
589+ uv pip install modelscope --no-build-isolation -i http://$RUNNER /simple/ --trusted-host $RUNNER --extra-index-url https://pypi.org/simple
588590 fi
589591
590592 uv pip install git+https://github.com/ModelCloud/Tokenicer -U
591593
592594 # ipex doesn't need to compile kernels. xpu can't install cuda package
593595 if [[ "${{ matrix.test_script }}" != *ipex* && "${{ matrix.test_script }}" != *xpu* ]]; then
594596 echo "===== install dist/whl ====="
595- uv pip install dist/*.whl -i http://${{ needs.check-vm.outputs.ip }} /simple/ --trusted-host ${{ needs.check-vm.outputs.ip }} --extra-index-url https://pypi.org/simple
597+ uv pip install dist/*.whl -i http://$RUNNER /simple/ --trusted-host $RUNNER --extra-index-url https://pypi.org/simple
596598 else
597599 echo "===== install with local files for xpu env ====="
598600 export CUDA_VISIBLE_DEVICES=""
@@ -602,7 +604,7 @@ jobs:
602604
603605 if [ "${{ matrix.test_script }}" == "test_transformers" ]; then
604606 echo "===== install optimum from git ====="
605- uv pip install -U git+https://github.com/huggingface/optimum.git -i http://${{ needs.check-vm.outputs.ip }} /simple/ --trusted-host ${{ needs.check-vm.outputs.ip }}
607+ uv pip install -U git+https://github.com/huggingface/optimum.git -i http://$RUNNER /simple/ --trusted-host $RUNNER
606608 fi
607609
608610 if [[ "${{ matrix.test_script }}" == "test_sglang" ]]; then
@@ -617,12 +619,12 @@ jobs:
617619 gpu_id=-1
618620
619621 while [ "$gpu_id" -lt 0 ]; do
620- gpu_id=$(curl -s "http://${{ needs.check-vm.outputs.ip }} /gpu/get?id=${{ github.run_id }}×tamp=$timestamp&test=${{ matrix.test_script }}&runner=${RUNNER_NAME}&exclusive=${{ github.event.inputs.exclusive-gpu }}")
622+ gpu_id=$(curl -s "http://$RUNNER /gpu/get?id=${{ github.run_id }}×tamp=$timestamp&test=${{ matrix.test_script }}&runner=${RUNNER_NAME}&exclusive=${{ github.event.inputs.exclusive-gpu }}")
621623
622624 if [ "$gpu_id" -lt 0 ]; then
623- echo "http://${{ needs.check-vm.outputs.ip }} /gpu/get?id=${{ github.run_id }}×tamp=$timestamp&test=${{ matrix.test_script }}&runner=${RUNNER_NAME}&exclusive=${{ github.event.inputs.exclusive-gpu }} returned $gpu_id"
625+ echo "http://$RUNNER /gpu/get?id=${{ github.run_id }}×tamp=$timestamp&test=${{ matrix.test_script }}&runner=${RUNNER_NAME}&exclusive=${{ github.event.inputs.exclusive-gpu }} returned $gpu_id"
624626 echo "No available GPU, waiting 5 seconds..."
625- curl http://${{ needs.check-vm.outputs.ip }} /gpu/status2
627+ curl http://$XEON5 /gpu/status2
626628 sleep 5
627629 else
628630 echo "Allocated GPU ID: $gpu_id"
@@ -634,7 +636,7 @@ jobs:
634636 echo "CUDA_VISIBLE_DEVICES=$gpu_id" >> $GITHUB_ENV
635637 echo "STEP_TIMESTAMP=$timestamp" >> $GITHUB_ENV
636638 echo "CUDA_VISIBLE_DEVICES set to $gpu_id, timestamp=$timestamp"
637- curl http://${{ needs.check-vm.outputs.ip }} /gpu/status2
639+ curl http://$XEON5 /gpu/status2
638640
639641 - name : Run tests
640642 if : ${{ (!github.event.inputs.test_names || contains(github.event.inputs.test_names, matrix.test_script)) && !cancelled() }}
@@ -653,11 +655,11 @@ jobs:
653655 pytest --durations=0 tests/${{ matrix.test_script }}.py || { echo "ERROR=1" >> $GITHUB_ENV; exit 1; }
654656 execution_time=$(( $(date +%s) - start_time ))
655657 echo "$((execution_time / 60))m $((execution_time % 60))s"
656- curl "http://${{ needs.check-vm.outputs.ip }} /gpu/log_test_vram?id=${{ github.run_id }}&gpu=${{ env.CUDA_VISIBLE_DEVICES }}&range=$execution_time&unit=second&test=${{ matrix.test_script }}"
658+ curl "http://$RUNNER /gpu/log_test_vram?id=${{ github.run_id }}&gpu=${{ env.CUDA_VISIBLE_DEVICES }}&range=$execution_time&unit=second&test=${{ matrix.test_script }}"
657659
658660 - name : Release GPU
659661 if : always() && !contains(matrix.test_script, 'ipex') && !contains(matrix.test_script, 'xpu')
660- run : curl -X GET "http://${{ needs.check-vm.outputs.ip }} /gpu/release?id=${{ github.run_id }}&gpu=${{ env.CUDA_VISIBLE_DEVICES }}×tamp=${{ env.STEP_TIMESTAMP }}&test=${{ matrix.test_script }}&runner=${RUNNER_NAME}"
662+ run : curl -X GET "http://$RUNNER /gpu/release?id=${{ github.run_id }}&gpu=${{ env.CUDA_VISIBLE_DEVICES }}×tamp=${{ env.STEP_TIMESTAMP }}&test=${{ matrix.test_script }}&runner=${RUNNER_NAME}"
661663
662664 - name : Clean cache
663665 if : always()
@@ -667,15 +669,15 @@ jobs:
667669
668670 show-statistics :
669671 runs-on : [ self-hosted, xeon5 ]
670- if : github.event. inputs.exclusive-gpu == 'true'
672+ if : always() && inputs.exclusive-gpu
671673 container :
672674 image : modelcloud/gptqmodel:alpine-ci-v1
673675 needs :
674676 - legacy
675677 - torch
676678 steps :
677679 - name : Print statistics
678- run : curl "http://10.0.14.248 /gpu/get_vram_logs?id=${{ github.run_id }}"
680+ run : curl "http://$RUNNER /gpu/get_vram_logs?id=${{ github.run_id }}"
679681
680682 m4 :
681683 runs-on : [ self-hosted, m4 ]
@@ -714,16 +716,16 @@ jobs:
714716
715717 rm profile.sb || true
716718
717- curl -O http://${{ needs.check-vm.outputs.ip }} /scripts/m4/profile.sb
719+ curl -O http://$RUNNER /scripts/m4/profile.sb
718720
719721 echo "=== installing uv setuptools build"
720- pip install uv setuptools build -U -i http://${{ needs.check-vm.outputs.ip }} /simple --trusted-host ${{ needs.check-vm.outputs.ip }} --extra-index-url https://pypi.org/simple
722+ pip install uv setuptools build -U -i http://$RUNNER /simple --trusted-host $RUNNER --extra-index-url https://pypi.org/simple
721723
722724 echo "=== installing test tools"
723- uv pip install pytest parameterized vllm lm-eval device-smi mlx-lm -U -i http://${{ needs.check-vm.outputs.ip }} /simple/ --trusted-host ${{ needs.check-vm.outputs.ip }} --extra-index-url https://pypi.org/simple
725+ uv pip install pytest parameterized vllm lm-eval device-smi mlx-lm -U -i http://$RUNNER /simple/ --trusted-host $RUNNER --extra-index-url https://pypi.org/simple
724726
725727 echo "=== installing gptqmodel"
726- uv pip install . --no-build-isolation -i http://${{ needs.check-vm.outputs.ip }} /simple/ --trusted-host ${{ needs.check-vm.outputs.ip }} --extra-index-url https://pypi.org/simple
728+ uv pip install . --no-build-isolation -i http://$RUNNER /simple/ --trusted-host $RUNNER --extra-index-url https://pypi.org/simple
727729
728730 echo "replacing model path"
729731 find tests -name "*.py" -exec sed -i '' 's/\/monster\/data\/model/..\/..\/..\/monster/g' {} +
0 commit comments