2929 ref : ${{ github.event.inputs.ref }}
3030
3131 - name : Compile
32+ shell : bash
3233 run : python setup.py bdist_wheel
3334
3435 - name : Show dist folder
@@ -45,145 +46,60 @@ jobs:
4546 runs-on : self-hosted
4647 container :
4748 image : modelcloud/gptqmodel:github-ci-v1
49+ strategy :
50+ fail-fast : false
51+ matrix :
52+ version : [ "test_perplexity.py", "test_lm_head.py", "test_q4_exallama.py", "test_q4_exallama_v2.py", "test_q4_marlin.py", "test_q4_triton.py", "test_repacking.py", "test_serialization.py", "test_sharded.py", "test_triton.py", "test_quant_formats.py", "test_q4_cuda.py", "test_q4_bitblas.py" ]
53+
4854 steps :
55+ - name : Checkout Codes
56+ uses : actions/checkout@v4
57+ with :
58+ repository : ${{ github.event.inputs.repo }}
59+ ref : ${{ github.event.inputs.ref }}
60+
61+ - name : Show folder
62+ run : |
63+ ls -alh . || true
64+ ls -alh dist || true
65+ rm -rf dist/* || true
66+
4967 - name : Download artifact
5068 uses : actions/download-artifact@v4
5169 with :
5270 name : dist
5371 path : dist
5472
5573 - name : Show dist folder
56- run : ls -alh dist
74+ run : ls -alh dist || true
5775
5876 - name : Install wheel
77+ shell : bash
5978 run : |
60- # install only the last version
6179 pip install dist/*.whl
6280
6381 - name : Find suitable GPU
64- run : |
65- suitable_gpu=$(nvidia-smi -L | grep -E '4090' | awk -F': ' '{print $1}' | sed 's/GPU //g' | while read gpu_id
66- do
67- mem_total=$(nvidia-smi --query-gpu=memory.total --format=csv,noheader,nounits -i $gpu_id)
68- mem_used=$(nvidia-smi --query-gpu=memory.used --format=csv,noheader,nounits -i $gpu_id)
69- mem_used_pct=$((100 * mem_used / mem_total))
70- if [ $mem_used_pct -lt 2 ]; then # 2 -> 98% free
71- echo $gpu_id
72- break
73- fi
74- done)
75- if [ -z "$suitable_gpu" ]; then
76- echo "No suitable GPU found. Exiting with error."
77- exit 1
78- else
79- echo "CUDA_VISIBLE_DEVICES=$suitable_gpu" >> $GITHUB_ENV
80- echo "CUDA_VISIBLE_DEVICES set to $suitable_gpu"
81- fi
82-
83- - name : Run test_perplexity.py
84- id : test_perplexity
85- continue-on-error : true
86- run : pytest --durations=0 tests/test_perplexity.py
87-
88- - name : Run test_lm_head.py
89- id : test_lm_head
90- continue-on-error : true
91- run : pytest --durations=0 tests/test_lm_head.py
92-
93- - name : Run test_q4_exallama.py
94- id : test_q4_exallama
95- continue-on-error : true
96- run : pytest --durations=0 tests/test_q4_exallama.py
97-
98- - name : Run test_q4_exallama_v2.py
99- id : test_q4_exallama_v2
100- continue-on-error : true
101- run : pytest --durations=0 tests/test_q4_exallama_v2.py
102-
103- - name : Run test_q4_marlin.py
104- id : test_q4_marlin
105- continue-on-error : true
106- run : pytest --durations=0 tests/test_q4_marlin.py
107-
108- - name : Run test_q4_triton.py
109- id : test_q4_triton
110- continue-on-error : true
111- run : pytest --durations=0 tests/test_q4_triton.py
112-
113- - name : Run test_repacking.py
114- id : test_repacking
115- continue-on-error : true
116- run : pytest --durations=0 tests/test_repacking.py
117-
118- - name : Run test_serialization.py
119- id : test_serialization
120- continue-on-error : true
121- run : pytest --durations=0 tests/test_serialization.py
122-
123- - name : Run test_sharded.py
124- id : test_sharded
125- continue-on-error : true
126- run : pytest --durations=0 tests/test_sharded.py
127-
128- - name : Run test_triton.py
129- id : test_triton
130- continue-on-error : true
131- run : pytest --durations=0 tests/test_triton.py
132-
133- - name : Run test_quant_formats.py
134- id : test_quant_formats
135- continue-on-error : true
136- run : pytest --durations=0 tests/test_quant_formats.py
137-
138- - name : Run test_q4_cuda.py
139- id : test_q4_cuda
140- continue-on-error : true
141- run : pytest --durations=0 tests/test_q4_cuda.py
142-
143- - name : Run test_q4_bitblas.py
144- id : test_q4_bitblas
145- continue-on-error : true
146- run : pytest --durations=0 tests/test_q4_bitblas.py
147-
148- - name : Print results
14982 shell : bash
15083 run : |
151- declare -A step_outcomes
152- step_outcomes=(
153- [test_perplexity]="${{ steps.test_perplexity.outcome }}"
154- [test_lm_head]="${{ steps.test_lm_head.outcome }}"
155- [test_q4_exallama]="${{ steps.test_q4_exallama.outcome }}"
156- [test_q4_exallama_v2]="${{ steps.test_q4_exallama_v2.outcome }}"
157- [test_q4_marlin]="${{ steps.test_q4_marlin.outcome }}"
158- [test_q4_triton]="${{ steps.test_q4_triton.outcome }}"
159- [test_repacking]="${{ steps.test_repacking.outcome }}"
160- [test_serialization]="${{ steps.test_serialization.outcome }}"
161- [test_sharded]="${{ steps.test_sharded.outcome }}"
162- [test_triton]="${{ steps.test_triton.outcome }}"
163- [test_quant_formats]="${{ steps.test_quant_formats.outcome }}"
164- [test_q4_cuda]="${{ steps.test_q4_cuda.outcome }}"
165- [test_q4_bitblas]="${{ steps.test_q4_bitblas.outcome }}"
166- )
167-
168- max_length=0
169- for step in "${!step_outcomes[@]}"; do
170- length=${#step}
171- if [[ $length -gt $max_length ]]; then
172- max_length=$length
173- fi
174- done
175-
176- error_occurred=0
177- for step in "${!step_outcomes[@]}"; do
178- outcome="${step_outcomes[$step]}"
179- if [ "$outcome" == "success" ]; then
180- printf "\e[32m%-*s Result : %s\e[0m\n" $((max_length + 4)) "$step" "$outcome"
84+ gpu_id=-1
85+
86+ while [ "$gpu_id" -lt 0 ]; do
87+ gpu_id=$(curl -s "http://10.0.23.237/gpu/get?id=${{ github.run_id }}")
88+
89+ if [ "$gpu_id" -lt 0 ]; then
90+ echo "No available GPU, waiting 5 seconds..."
91+ sleep 5
18192 else
182- printf "\e[31m%-*s Result : %s\e[0m\n" $((max_length + 4)) "$step" "$outcome"
183- error_occurred=1
93+ echo "Allocated GPU ID: $gpu_id"
18494 fi
18595 done
186-
187- if [ $error_occurred -eq 1 ]; then
188- exit 1
189- fi
96+ echo "CUDA_VISIBLE_DEVICES=$gpu_id" >> $GITHUB_ENV
97+ echo "CUDA_VISIBLE_DEVICES set to $gpu_id"
98+
99+ - name : Run tests
100+ run : pytest tests/${{ matrix.test_script }}
101+
102+ - name : Release GPU
103+ if : always()
104+ shell : bash
105+ run : curl -X GET "http://10.0.23.237/gpu/release?id=${{ github.run_id }}&gpu=$CUDA_VISIBLE_DEVICES"
0 commit comments