zucchini-nlp · pdufour · Oct 9, 2024 · Oct 9, 2024 · Oct 9, 2024 · Oct 9, 2024
diff --git a/.circleci/config.yml b/.circleci/config.yml
@@ -47,13 +47,13 @@ jobs:
 
             - run:
                 name: "Retrieve Artifact Paths"
-                env:
-                    CIRCLE_TOKEN: ${{ secrets.CI_ARTIFACT_TOKEN }}
+                # [reference] https://circleci.com/docs/api/v2/index.html#operation/getJobArtifacts
+                # `CIRCLE_TOKEN` is defined as an environment variables set within a context, see `https://circleci.com/docs/contexts/`
                 command: |
                     project_slug="gh/${CIRCLE_PROJECT_USERNAME}/${CIRCLE_PROJECT_REPONAME}"
                     job_number=${CIRCLE_BUILD_NUM}
                     url="https://circleci.com/api/v2/project/${project_slug}/${job_number}/artifacts"
-                    curl -o  test_preparation/artifacts.json ${url}
+                    curl -o test_preparation/artifacts.json ${url} --header "Circle-Token: $CIRCLE_TOKEN"
             - run:
                 name: "Prepare pipeline parameters"
                 command: |
@@ -82,22 +82,49 @@ jobs:
         parallelism: 1
         steps:
             - checkout
-            - run: uv pip install -e .
-            - run: |
-                  mkdir test_preparation
-                  echo -n "tests" > test_preparation/test_list.txt
-                  echo -n "all" > test_preparation/examples_test_list.txt
-                  echo -n "tests/repo_utils" > test_preparation/test_repo_utils.txt
+            - run: uv pip install -U -e .
+            - run: echo 'export "GIT_COMMIT_MESSAGE=$(git show -s --format=%s)"' >> "$BASH_ENV" && source "$BASH_ENV"
+            - run: mkdir -p test_preparation
+            - run: python utils/tests_fetcher.py --fetch_all | tee tests_fetched_summary.txt
+            - run: python utils/tests_fetcher.py --filter_tests
+            - run: export "GIT_COMMIT_MESSAGE=$(git show -s --format=%s)" && echo $GIT_COMMIT_MESSAGE && python .circleci/create_circleci_config.py --fetcher_folder test_preparation
             - run: |
-                  echo -n "tests" > test_list.txt
-                  python utils/tests_fetcher.py --filter_tests
-                  mv test_list.txt test_preparation/filtered_test_list.txt
-            - run: python .circleci/create_circleci_config.py --fetcher_folder test_preparation
-            - run: cp test_preparation/generated_config.yml test_preparation/generated_config.txt
+                if [ ! -s test_preparation/generated_config.yml ]; then
+                    echo "No tests to run, exiting early!"
+                    circleci-agent step halt
+                fi
+
             - store_artifacts:
-                  path: test_preparation/generated_config.txt
+                path: test_preparation
+
+            - run:
+                name: "Retrieve Artifact Paths"
+                env:
+                    CIRCLE_TOKEN: ${{ secrets.CI_ARTIFACT_TOKEN }}
+                command: |
+                    project_slug="gh/${CIRCLE_PROJECT_USERNAME}/${CIRCLE_PROJECT_REPONAME}"
+                    job_number=${CIRCLE_BUILD_NUM}
+                    url="https://circleci.com/api/v2/project/${project_slug}/${job_number}/artifacts"
+                    curl -o  test_preparation/artifacts.json ${url}
+            - run:
+                name: "Prepare pipeline parameters"
+                command: |
+                    python utils/process_test_artifacts.py 
+
+            # To avoid too long generated_config.yaml on the continuation orb, we pass the links to the artifacts as parameters.
+            # Otherwise the list of tests was just too big. Explicit is good but for that it was a limitation.
+            # We used:
+
+            # https://circleci.com/docs/api/v2/index.html#operation/getJobArtifacts : to get the job artifacts
+            # We could not pass a nested dict, which is why we create the test_file_... parameters for every single job
+
+            - store_artifacts:
+                path: test_preparation/transformed_artifacts.json
+            - store_artifacts:
+                path: test_preparation/artifacts.json
             - continuation/continue:
-                  configuration_path: test_preparation/generated_config.yml
+                parameters:  test_preparation/transformed_artifacts.json
+                configuration_path: test_preparation/generated_config.yml
 
     check_code_quality:
         working_directory: ~/transformers
@@ -110,7 +137,7 @@ jobs:
         parallelism: 1
         steps:
             - checkout
-            - run: uv pip install -e .
+            - run: uv pip install -e ".[quality]"
             - run:
                 name: Show installed libraries and their versions
                 command: pip freeze | tee installed.txt
@@ -135,13 +162,14 @@ jobs:
         parallelism: 1
         steps:
             - checkout
-            - run: uv pip install -e .
+            - run: uv pip install -e ".[quality]"
             - run:
                 name: Show installed libraries and their versions
                 command: pip freeze | tee installed.txt
             - store_artifacts:
                   path: ~/transformers/installed.txt
             - run: python utils/check_copies.py
+            - run: python utils/check_modular_conversion.py
             - run: python utils/check_table.py
             - run: python utils/check_dummies.py
             - run: python utils/check_repo.py
@@ -158,13 +186,28 @@ workflows:
     version: 2
     setup_and_quality:
         when:
-            not: <<pipeline.parameters.nightly>>
+            and:
+                - equal: [<<pipeline.project.git_url>>, https://github.com/huggingface/transformers]
+                - not: <<pipeline.parameters.nightly>>
         jobs:
             - check_circleci_user
             - check_code_quality
             - check_repository_consistency
             - fetch_tests
 
+    setup_and_quality_2:
+        when:
+            not:
+                 equal: [<<pipeline.project.git_url>>, https://github.com/huggingface/transformers]
+        jobs:
+            - check_circleci_user
+            - check_code_quality
+            - check_repository_consistency
+            - fetch_tests:
+                # [reference] https://circleci.com/docs/contexts/
+                context:
+                    - TRANSFORMERS_CONTEXT
+
     nightly:
         when: <<pipeline.parameters.nightly>>
         jobs:

diff --git a/.circleci/create_circleci_config.py b/.circleci/create_circleci_config.py
@@ -312,6 +312,15 @@ def job_name(self):
 )
 
 
+non_model_job = CircleCIJob(
+    "non_model",
+    docker_image=[{"image": "huggingface/transformers-torch-light"}],
+    marker="not generate",
+    parallelism=6,
+    pytest_num_workers=8,
+)
+
+
 # We also include a `dummy.py` file in the files to be doc-tested to prevent edge case failure. Otherwise, the pytest
 # hangs forever during test collection while showing `collecting 0 items / 21 errors`. (To see this, we have to remove
 # the bash output redirection.)
@@ -336,7 +345,7 @@ def job_name(self):
     pytest_num_workers=1,
 )
 
-REGULAR_TESTS = [torch_and_tf_job, torch_and_flax_job, torch_job, tf_job, flax_job, hub_job, onnx_job, tokenization_job, processor_job, generate_job] # fmt: skip
+REGULAR_TESTS = [torch_and_tf_job, torch_and_flax_job, torch_job, tf_job, flax_job, hub_job, onnx_job, tokenization_job, processor_job, generate_job, non_model_job] # fmt: skip
 EXAMPLES_TESTS = [examples_torch_job, examples_tensorflow_job]
 PIPELINE_TESTS = [pipelines_torch_job, pipelines_tf_job]
 REPO_UTIL_TESTS = [repo_utils_job]

diff --git a/.coveragerc b/.coveragerc
diff --git a/.github/ISSUE_TEMPLATE/bug-report.yml b/.github/ISSUE_TEMPLATE/bug-report.yml
@@ -37,25 +37,25 @@ body:
         Models:
 
           - text models: @ArthurZucker
-          - vision models: @amyeroberts
-          - speech models: @sanchit-gandhi
+          - vision models: @amyeroberts, @qubvel
+          - speech models: @ylacombe, @eustlb
           - graph models: @clefourrier
 
         Library:
 
           - flax: @sanchit-gandhi
           - generate: @zucchini-nlp (visual-language models) or @gante (all others)
-          - pipelines: @Narsil
+          - pipelines: @Rocketknight1
           - tensorflow: @gante and @Rocketknight1
-          - tokenizers: @ArthurZucker
+          - tokenizers: @ArthurZucker and @itazap
           - trainer: @muellerzr @SunMarc
 
         Integrations:
 
           - deepspeed: HF Trainer/Accelerate: @muellerzr
           - ray/raytune: @richardliaw, @amogkam
           - Big Model Inference: @SunMarc
-          - quantization (bitsandbytes, autogpt): @SunMarc
+          - quantization (bitsandbytes, autogpt): @SunMarc @MekkCyber
 
         Documentation: @stevhliu
 

diff --git a/.github/PULL_REQUEST_TEMPLATE.md b/.github/PULL_REQUEST_TEMPLATE.md
@@ -40,25 +40,26 @@ members/contributors who may be interested in your PR.
 Models:
 
 - text models: @ArthurZucker
-- vision models: @amyeroberts
-- speech models: @sanchit-gandhi
+- vision models: @amyeroberts, @qubvel
+- speech models: @ylacombe, @eustlb
 - graph models: @clefourrier
 
 Library:
 
 - flax: @sanchit-gandhi
 - generate: @zucchini-nlp (visual-language models) or @gante (all others)
-- pipelines: @Narsil
+- pipelines: @Rocketknight1
 - tensorflow: @gante and @Rocketknight1
 - tokenizers: @ArthurZucker
 - trainer: @muellerzr and @SunMarc
+- chat templates: @Rocketknight1
 
 Integrations:
 
 - deepspeed: HF Trainer/Accelerate: @muellerzr
 - ray/raytune: @richardliaw, @amogkam
 - Big Model Inference: @SunMarc
-- quantization (bitsandbytes, autogpt): @SunMarc
+- quantization (bitsandbytes, autogpt): @SunMarc @MekkCyber
 
 Documentation: @stevhliu
 

diff --git a/.github/workflows/benchmark.yml b/.github/workflows/benchmark.yml
@@ -1,42 +1,74 @@
 name: Self-hosted runner (benchmark)
 
 on:
-  schedule:
-    - cron: "17 2 * * *"
-  workflow_call:
+  push:
+    branches: [main]
+  pull_request:
+    types: [ opened, labeled, reopened, synchronize ]
+
+concurrency:
+  group: ${{ github.workflow }}-${{ github.head_ref || github.run_id }}
+  cancel-in-progress: true
 
 env:
   HF_HOME: /mnt/cache
-  TF_FORCE_GPU_ALLOW_GROWTH: true
-
 
 jobs:
   benchmark:
     name: Benchmark
-    runs-on: [single-gpu, nvidia-gpu, a10, ci]
+    strategy:
+      matrix:
+        group: [aws-g5-4xlarge-cache, aws-p4d-24xlarge-plus]
+    runs-on:
+      group: ${{ matrix.group }}
+    if: |
+      (github.event_name == 'pull_request' && contains( github.event.pull_request.labels.*.name, 'run-benchmark') )||
+      (github.event_name == 'push' && github.ref == 'refs/heads/main')
     container:
-      image: huggingface/transformers-all-latest-gpu
-      options: --gpus all --privileged --ipc host -v /mnt/cache/.cache/huggingface:/mnt/cache/
+      image: huggingface/transformers-pytorch-gpu
+      options: --gpus all --privileged --ipc host
     steps:
-      - name: Update clone
-        working-directory: /transformers
+      - name: Get repo
+        uses: actions/checkout@v4
+        with:
+          ref: ${{ github.event.pull_request.head.sha || github.sha }}
+
+      - name: Install libpq-dev & psql
         run: |
-          git fetch && git checkout ${{ github.sha }}
+          apt update
+          apt install -y libpq-dev postgresql-client
+
+      - name: Install benchmark script dependencies
+        run: python3 -m pip install -r benchmark/requirements.txt
 
       - name: Reinstall transformers in edit mode (remove the one installed during docker image build)
         working-directory: /transformers
-        run: python3 -m pip uninstall -y transformers && python3 -m pip install -e .
+        run: python3 -m pip uninstall -y transformers && python3 -m pip install -e ".[torch]"
 
-      - name: Benchmark (daily)
-        if: github.event_name == 'schedule'
-        working-directory: /transformers
+      - name: Run database init script
         run: |
-          python3 -m pip install optimum-benchmark>=0.3.0
-          HF_TOKEN=${{ secrets.TRANSFORMERS_BENCHMARK_TOKEN }} python3 benchmark/benchmark.py --repo_id hf-internal-testing/benchmark_results --path_in_repo $(date +'%Y-%m-%d') --config-dir benchmark/config --config-name generation --commit=${{ github.sha }} backend.model=google/gemma-2b backend.cache_implementation=null,static backend.torch_compile=false,true --multirun
+          psql -f benchmark/init_db.sql
+        env:
+          PGDATABASE: metrics
+          PGHOST: ${{ secrets.TRANSFORMERS_BENCHMARKS_PGHOST }}
+          PGUSER: transformers_benchmarks
+          PGPASSWORD: ${{ secrets.TRANSFORMERS_BENCHMARKS_PGPASSWORD }}
 
-      - name: Benchmark (merged to main event)
-        if: github.event_name == 'push' && github.ref_name == 'main'
-        working-directory: /transformers
+      - name: Run benchmark
         run: |
-          python3 -m pip install optimum-benchmark>=0.3.0
-          HF_TOKEN=${{ secrets.TRANSFORMERS_BENCHMARK_TOKEN }} python3 benchmark/benchmark.py --repo_id hf-internal-testing/benchmark_results_merge_event --path_in_repo $(date +'%Y-%m-%d') --config-dir benchmark/config --config-name generation --commit=${{ github.sha }} backend.model=google/gemma-2b backend.cache_implementation=null,static backend.torch_compile=false,true --multirun
+          git config --global --add safe.directory /__w/transformers/transformers
+          if [ "$GITHUB_EVENT_NAME" = "pull_request" ]; then
+            commit_id=$(echo "${{ github.event.pull_request.head.sha }}")
+          elif [ "$GITHUB_EVENT_NAME" = "push" ]; then
+            commit_id=$GITHUB_SHA
+          fi
+          commit_msg=$(git show -s --format=%s | cut -c1-70)
+          python3 benchmark/llama.py "${{ github.head_ref || github.ref_name }}" "$commit_id" "$commit_msg"
+        env:
+          HF_TOKEN: ${{ secrets.HF_HUB_READ_TOKEN }}
+          # Enable this to see debug logs
+          # HF_HUB_VERBOSITY: debug
+          # TRANSFORMERS_VERBOSITY: debug
+          PGHOST: ${{ secrets.TRANSFORMERS_BENCHMARKS_PGHOST }}
+          PGUSER: transformers_benchmarks
+          PGPASSWORD: ${{ secrets.TRANSFORMERS_BENCHMARKS_PGPASSWORD }}