diff --git a/.github/workflows/torch_compile_tests.yml b/.github/workflows/torch_compile_tests.yml index c84bec97a1..7ac0a643c2 100644 --- a/.github/workflows/torch_compile_tests.yml +++ b/.github/workflows/torch_compile_tests.yml @@ -11,32 +11,42 @@ on: required: false default: false +env: + RUN_SLOW: "yes" + IS_GITHUB_CI: "1" + # To be able to run tests on CUDA 12.2 + NVIDIA_DISABLE_REQUIRE: "1" + jobs: run_tests_with_compile: - runs-on: ubuntu-latest + runs-on: [self-hosted, single-gpu, nvidia-gpu, a10, ci] env: PEFT_DEBUG_WITH_TORCH_COMPILE: 1 + CUDA_VISIBLE_DEVICES: "0" + TEST_TYPE: "single_gpu_huggingface/peft-gpu-bnb-latest:latest" + container: + image: "huggingface/peft-gpu-bnb-latest:latest" + options: --gpus all --shm-size "16gb" --ipc host -v /mnt/cache/.cache/huggingface:/mnt/cache/ + defaults: + run: + shell: bash steps: - uses: actions/checkout@v4 with: ref: ${{ github.event.inputs.branch }} repository: ${{ github.event.pull_request.head.repo.full_name }} - - name: Set up Python - uses: actions/setup-python@v4 - with: - python-version: "3.10" - cache: "pip" - cache-dependency-path: "setup.py" - - name: Install dependencies + - name: Pip install run: | - python -m pip install --upgrade pip - python -m pip install .[test] - python -m pip install bitsandbytes + source activate peft + pip install -e . --no-deps + pip install pytest-cov parameterized datasets scipy einops + pip install "pytest>=7.2.0,<8.0.0" # see: https://github.com/huggingface/transformers/blob/ce4fff0be7f6464d713f7ac3e0bbaafbc6959ae5/setup.py#L148C6-L148C26 if [ "${{ github.event.inputs.pytorch_nightly }}" = "true" ]; then python -m pip install --upgrade --pre torch --index-url https://download.pytorch.org/whl/nightly/cpu fi - name: Test compile with pytest run: | + source activate peft echo "PEFT_DEBUG_WITH_TORCH_COMPILE=$PEFT_DEBUG_WITH_TORCH_COMPILE" git status make tests_torch_compile diff --git a/tests/test_torch_compile.py b/tests/test_torch_compile.py index 353a562148..818dcc1c43 100644 --- a/tests/test_torch_compile.py +++ b/tests/test_torch_compile.py @@ -49,6 +49,8 @@ get_peft_model, ) +from .testing_utils import require_bitsandbytes + # only run (very slow) torch.compile tests when explicitly asked to if os.environ.get("PEFT_DEBUG_WITH_TORCH_COMPILE") != "1": @@ -269,6 +271,7 @@ def test_causal_lm_training_pytorch_compile(self, settings, tokenizer, data, tmp assert torch.allclose(output_after.logits, output_loaded.logits, atol=atol, rtol=rtol) assert (tokens_after == tokens_loaded).all() + @require_bitsandbytes @pytest.mark.xfail(strict=True) def test_causal_lm_training_lora_bnb_compile(self, tokenizer, data, tmp_path): r"""Train a bnb quantized LoRA model with torch.compile using PyTorch training loop""" @@ -329,6 +332,7 @@ def test_causal_lm_training_lora_bnb_compile(self, tokenizer, data, tmp_path): assert torch.allclose(output_after.logits, output_loaded.logits, atol=atol, rtol=rtol) @pytest.mark.xfail(strict=True) + @require_bitsandbytes def test_causal_lm_multiple_lora_adapter_compile(self, tokenizer, data): torch.manual_seed(0) model = AutoModelForCausalLM.from_pretrained( @@ -393,6 +397,7 @@ def test_causal_lm_disable_lora_adapter_compile(self, tokenizer, data): assert torch.allclose(output_base.logits, output_disabled.logits, atol=atol, rtol=rtol) assert not torch.allclose(output_base.logits, output_lora.logits, atol=atol, rtol=rtol) + @require_bitsandbytes def test_causal_lm_merging_lora_adapter_compile(self, tokenizer, data): # merge the adapter torch.manual_seed(0) @@ -420,6 +425,7 @@ def test_causal_lm_merging_lora_adapter_compile(self, tokenizer, data): assert not torch.allclose(output_base.logits, output_lora.logits, atol=atol, rtol=rtol) assert torch.allclose(output_lora.logits, output_merged.logits, atol=atol, rtol=rtol) + @require_bitsandbytes def test_causal_lm_merging_multiple_lora_adapters_compile(self, tokenizer, data): # merge multiple adapters at once torch.manual_seed(0) @@ -457,6 +463,7 @@ def test_causal_lm_merging_multiple_lora_adapters_compile(self, tokenizer, data) assert not torch.allclose(output_default.logits, output_merged.logits, atol=atol, rtol=rtol) assert not torch.allclose(output_other.logits, output_merged.logits, atol=atol, rtol=rtol) + @require_bitsandbytes @pytest.mark.xfail(strict=True) def test_causal_lm_merge_and_unload_lora_adapter_compile(self, tokenizer, data): torch.manual_seed(0) @@ -485,6 +492,7 @@ def test_causal_lm_merge_and_unload_lora_adapter_compile(self, tokenizer, data): assert not torch.allclose(output_base.logits, output_lora.logits, atol=atol, rtol=rtol) assert torch.allclose(output_lora.logits, output_unloaded.logits, atol=atol, rtol=rtol) + @require_bitsandbytes @pytest.mark.xfail(strict=True) def test_causal_lm_mixed_batch_lora_adapter_compile(self, tokenizer, data): torch.manual_seed(0) @@ -530,6 +538,7 @@ def test_causal_lm_mixed_batch_lora_adapter_compile(self, tokenizer, data): assert torch.allclose(output_default.logits[1], output_mixed.logits[1], atol=atol, rtol=rtol) assert torch.allclose(output_other.logits[2], output_mixed.logits[2], atol=atol, rtol=rtol) + @require_bitsandbytes def test_causal_lm_add_weighted_adapter_lora_adapter_compile(self, tokenizer, data): torch.manual_seed(0) model = AutoModelForCausalLM.from_pretrained(