diff --git a/.circleci/config.yml b/.circleci/config.yml
index 483e315e260c..9c414901c4f5 100644
--- a/.circleci/config.yml
+++ b/.circleci/config.yml
@@ -47,13 +47,13 @@ jobs:
- run:
name: "Retrieve Artifact Paths"
- env:
- CIRCLE_TOKEN: ${{ secrets.CI_ARTIFACT_TOKEN }}
+ # [reference] https://circleci.com/docs/api/v2/index.html#operation/getJobArtifacts
+ # `CIRCLE_TOKEN` is defined as an environment variables set within a context, see `https://circleci.com/docs/contexts/`
command: |
project_slug="gh/${CIRCLE_PROJECT_USERNAME}/${CIRCLE_PROJECT_REPONAME}"
job_number=${CIRCLE_BUILD_NUM}
url="https://circleci.com/api/v2/project/${project_slug}/${job_number}/artifacts"
- curl -o test_preparation/artifacts.json ${url}
+ curl -o test_preparation/artifacts.json ${url} --header "Circle-Token: $CIRCLE_TOKEN"
- run:
name: "Prepare pipeline parameters"
command: |
@@ -82,22 +82,49 @@ jobs:
parallelism: 1
steps:
- checkout
- - run: uv pip install -e .
- - run: |
- mkdir test_preparation
- echo -n "tests" > test_preparation/test_list.txt
- echo -n "all" > test_preparation/examples_test_list.txt
- echo -n "tests/repo_utils" > test_preparation/test_repo_utils.txt
+ - run: uv pip install -U -e .
+ - run: echo 'export "GIT_COMMIT_MESSAGE=$(git show -s --format=%s)"' >> "$BASH_ENV" && source "$BASH_ENV"
+ - run: mkdir -p test_preparation
+ - run: python utils/tests_fetcher.py --fetch_all | tee tests_fetched_summary.txt
+ - run: python utils/tests_fetcher.py --filter_tests
+ - run: export "GIT_COMMIT_MESSAGE=$(git show -s --format=%s)" && echo $GIT_COMMIT_MESSAGE && python .circleci/create_circleci_config.py --fetcher_folder test_preparation
- run: |
- echo -n "tests" > test_list.txt
- python utils/tests_fetcher.py --filter_tests
- mv test_list.txt test_preparation/filtered_test_list.txt
- - run: python .circleci/create_circleci_config.py --fetcher_folder test_preparation
- - run: cp test_preparation/generated_config.yml test_preparation/generated_config.txt
+ if [ ! -s test_preparation/generated_config.yml ]; then
+ echo "No tests to run, exiting early!"
+ circleci-agent step halt
+ fi
+
- store_artifacts:
- path: test_preparation/generated_config.txt
+ path: test_preparation
+
+ - run:
+ name: "Retrieve Artifact Paths"
+ env:
+ CIRCLE_TOKEN: ${{ secrets.CI_ARTIFACT_TOKEN }}
+ command: |
+ project_slug="gh/${CIRCLE_PROJECT_USERNAME}/${CIRCLE_PROJECT_REPONAME}"
+ job_number=${CIRCLE_BUILD_NUM}
+ url="https://circleci.com/api/v2/project/${project_slug}/${job_number}/artifacts"
+ curl -o test_preparation/artifacts.json ${url}
+ - run:
+ name: "Prepare pipeline parameters"
+ command: |
+ python utils/process_test_artifacts.py
+
+ # To avoid too long generated_config.yaml on the continuation orb, we pass the links to the artifacts as parameters.
+ # Otherwise the list of tests was just too big. Explicit is good but for that it was a limitation.
+ # We used:
+
+ # https://circleci.com/docs/api/v2/index.html#operation/getJobArtifacts : to get the job artifacts
+ # We could not pass a nested dict, which is why we create the test_file_... parameters for every single job
+
+ - store_artifacts:
+ path: test_preparation/transformed_artifacts.json
+ - store_artifacts:
+ path: test_preparation/artifacts.json
- continuation/continue:
- configuration_path: test_preparation/generated_config.yml
+ parameters: test_preparation/transformed_artifacts.json
+ configuration_path: test_preparation/generated_config.yml
check_code_quality:
working_directory: ~/transformers
@@ -110,7 +137,7 @@ jobs:
parallelism: 1
steps:
- checkout
- - run: uv pip install -e .
+ - run: uv pip install -e ".[quality]"
- run:
name: Show installed libraries and their versions
command: pip freeze | tee installed.txt
@@ -135,13 +162,14 @@ jobs:
parallelism: 1
steps:
- checkout
- - run: uv pip install -e .
+ - run: uv pip install -e ".[quality]"
- run:
name: Show installed libraries and their versions
command: pip freeze | tee installed.txt
- store_artifacts:
path: ~/transformers/installed.txt
- run: python utils/check_copies.py
+ - run: python utils/check_modular_conversion.py
- run: python utils/check_table.py
- run: python utils/check_dummies.py
- run: python utils/check_repo.py
@@ -158,13 +186,28 @@ workflows:
version: 2
setup_and_quality:
when:
- not: <>
+ and:
+ - equal: [<>, https://github.com/huggingface/transformers]
+ - not: <>
jobs:
- check_circleci_user
- check_code_quality
- check_repository_consistency
- fetch_tests
+ setup_and_quality_2:
+ when:
+ not:
+ equal: [<>, https://github.com/huggingface/transformers]
+ jobs:
+ - check_circleci_user
+ - check_code_quality
+ - check_repository_consistency
+ - fetch_tests:
+ # [reference] https://circleci.com/docs/contexts/
+ context:
+ - TRANSFORMERS_CONTEXT
+
nightly:
when: <>
jobs:
diff --git a/.circleci/create_circleci_config.py b/.circleci/create_circleci_config.py
index d8d3e7d86cf3..7ccf5ec96cec 100644
--- a/.circleci/create_circleci_config.py
+++ b/.circleci/create_circleci_config.py
@@ -312,6 +312,15 @@ def job_name(self):
)
+non_model_job = CircleCIJob(
+ "non_model",
+ docker_image=[{"image": "huggingface/transformers-torch-light"}],
+ marker="not generate",
+ parallelism=6,
+ pytest_num_workers=8,
+)
+
+
# We also include a `dummy.py` file in the files to be doc-tested to prevent edge case failure. Otherwise, the pytest
# hangs forever during test collection while showing `collecting 0 items / 21 errors`. (To see this, we have to remove
# the bash output redirection.)
@@ -336,7 +345,7 @@ def job_name(self):
pytest_num_workers=1,
)
-REGULAR_TESTS = [torch_and_tf_job, torch_and_flax_job, torch_job, tf_job, flax_job, hub_job, onnx_job, tokenization_job, processor_job, generate_job] # fmt: skip
+REGULAR_TESTS = [torch_and_tf_job, torch_and_flax_job, torch_job, tf_job, flax_job, hub_job, onnx_job, tokenization_job, processor_job, generate_job, non_model_job] # fmt: skip
EXAMPLES_TESTS = [examples_torch_job, examples_tensorflow_job]
PIPELINE_TESTS = [pipelines_torch_job, pipelines_tf_job]
REPO_UTIL_TESTS = [repo_utils_job]
diff --git a/.coveragerc b/.coveragerc
deleted file mode 100644
index 9a1103b8af3d..000000000000
--- a/.coveragerc
+++ /dev/null
@@ -1,12 +0,0 @@
-[run]
-source=transformers
-omit =
- # skip convertion scripts from testing for now
- */convert_*
- */__main__.py
-[report]
-exclude_lines =
- pragma: no cover
- raise
- except
- register_parameter
\ No newline at end of file
diff --git a/.github/ISSUE_TEMPLATE/bug-report.yml b/.github/ISSUE_TEMPLATE/bug-report.yml
index 7415ca71d466..ecc795ae6325 100644
--- a/.github/ISSUE_TEMPLATE/bug-report.yml
+++ b/.github/ISSUE_TEMPLATE/bug-report.yml
@@ -37,17 +37,17 @@ body:
Models:
- text models: @ArthurZucker
- - vision models: @amyeroberts
- - speech models: @sanchit-gandhi
+ - vision models: @amyeroberts, @qubvel
+ - speech models: @ylacombe, @eustlb
- graph models: @clefourrier
Library:
- flax: @sanchit-gandhi
- generate: @zucchini-nlp (visual-language models) or @gante (all others)
- - pipelines: @Narsil
+ - pipelines: @Rocketknight1
- tensorflow: @gante and @Rocketknight1
- - tokenizers: @ArthurZucker
+ - tokenizers: @ArthurZucker and @itazap
- trainer: @muellerzr @SunMarc
Integrations:
@@ -55,7 +55,7 @@ body:
- deepspeed: HF Trainer/Accelerate: @muellerzr
- ray/raytune: @richardliaw, @amogkam
- Big Model Inference: @SunMarc
- - quantization (bitsandbytes, autogpt): @SunMarc
+ - quantization (bitsandbytes, autogpt): @SunMarc @MekkCyber
Documentation: @stevhliu
diff --git a/.github/PULL_REQUEST_TEMPLATE.md b/.github/PULL_REQUEST_TEMPLATE.md
index cf638dc59255..ee7a7eaae113 100644
--- a/.github/PULL_REQUEST_TEMPLATE.md
+++ b/.github/PULL_REQUEST_TEMPLATE.md
@@ -40,25 +40,26 @@ members/contributors who may be interested in your PR.
Models:
- text models: @ArthurZucker
-- vision models: @amyeroberts
-- speech models: @sanchit-gandhi
+- vision models: @amyeroberts, @qubvel
+- speech models: @ylacombe, @eustlb
- graph models: @clefourrier
Library:
- flax: @sanchit-gandhi
- generate: @zucchini-nlp (visual-language models) or @gante (all others)
-- pipelines: @Narsil
+- pipelines: @Rocketknight1
- tensorflow: @gante and @Rocketknight1
- tokenizers: @ArthurZucker
- trainer: @muellerzr and @SunMarc
+- chat templates: @Rocketknight1
Integrations:
- deepspeed: HF Trainer/Accelerate: @muellerzr
- ray/raytune: @richardliaw, @amogkam
- Big Model Inference: @SunMarc
-- quantization (bitsandbytes, autogpt): @SunMarc
+- quantization (bitsandbytes, autogpt): @SunMarc @MekkCyber
Documentation: @stevhliu
diff --git a/.github/workflows/benchmark.yml b/.github/workflows/benchmark.yml
index cb9a3d7b7974..eaa4b3b2f824 100644
--- a/.github/workflows/benchmark.yml
+++ b/.github/workflows/benchmark.yml
@@ -1,42 +1,74 @@
name: Self-hosted runner (benchmark)
on:
- schedule:
- - cron: "17 2 * * *"
- workflow_call:
+ push:
+ branches: [main]
+ pull_request:
+ types: [ opened, labeled, reopened, synchronize ]
+
+concurrency:
+ group: ${{ github.workflow }}-${{ github.head_ref || github.run_id }}
+ cancel-in-progress: true
env:
HF_HOME: /mnt/cache
- TF_FORCE_GPU_ALLOW_GROWTH: true
-
jobs:
benchmark:
name: Benchmark
- runs-on: [single-gpu, nvidia-gpu, a10, ci]
+ strategy:
+ matrix:
+ group: [aws-g5-4xlarge-cache, aws-p4d-24xlarge-plus]
+ runs-on:
+ group: ${{ matrix.group }}
+ if: |
+ (github.event_name == 'pull_request' && contains( github.event.pull_request.labels.*.name, 'run-benchmark') )||
+ (github.event_name == 'push' && github.ref == 'refs/heads/main')
container:
- image: huggingface/transformers-all-latest-gpu
- options: --gpus all --privileged --ipc host -v /mnt/cache/.cache/huggingface:/mnt/cache/
+ image: huggingface/transformers-pytorch-gpu
+ options: --gpus all --privileged --ipc host
steps:
- - name: Update clone
- working-directory: /transformers
+ - name: Get repo
+ uses: actions/checkout@v4
+ with:
+ ref: ${{ github.event.pull_request.head.sha || github.sha }}
+
+ - name: Install libpq-dev & psql
run: |
- git fetch && git checkout ${{ github.sha }}
+ apt update
+ apt install -y libpq-dev postgresql-client
+
+ - name: Install benchmark script dependencies
+ run: python3 -m pip install -r benchmark/requirements.txt
- name: Reinstall transformers in edit mode (remove the one installed during docker image build)
working-directory: /transformers
- run: python3 -m pip uninstall -y transformers && python3 -m pip install -e .
+ run: python3 -m pip uninstall -y transformers && python3 -m pip install -e ".[torch]"
- - name: Benchmark (daily)
- if: github.event_name == 'schedule'
- working-directory: /transformers
+ - name: Run database init script
run: |
- python3 -m pip install optimum-benchmark>=0.3.0
- HF_TOKEN=${{ secrets.TRANSFORMERS_BENCHMARK_TOKEN }} python3 benchmark/benchmark.py --repo_id hf-internal-testing/benchmark_results --path_in_repo $(date +'%Y-%m-%d') --config-dir benchmark/config --config-name generation --commit=${{ github.sha }} backend.model=google/gemma-2b backend.cache_implementation=null,static backend.torch_compile=false,true --multirun
+ psql -f benchmark/init_db.sql
+ env:
+ PGDATABASE: metrics
+ PGHOST: ${{ secrets.TRANSFORMERS_BENCHMARKS_PGHOST }}
+ PGUSER: transformers_benchmarks
+ PGPASSWORD: ${{ secrets.TRANSFORMERS_BENCHMARKS_PGPASSWORD }}
- - name: Benchmark (merged to main event)
- if: github.event_name == 'push' && github.ref_name == 'main'
- working-directory: /transformers
+ - name: Run benchmark
run: |
- python3 -m pip install optimum-benchmark>=0.3.0
- HF_TOKEN=${{ secrets.TRANSFORMERS_BENCHMARK_TOKEN }} python3 benchmark/benchmark.py --repo_id hf-internal-testing/benchmark_results_merge_event --path_in_repo $(date +'%Y-%m-%d') --config-dir benchmark/config --config-name generation --commit=${{ github.sha }} backend.model=google/gemma-2b backend.cache_implementation=null,static backend.torch_compile=false,true --multirun
+ git config --global --add safe.directory /__w/transformers/transformers
+ if [ "$GITHUB_EVENT_NAME" = "pull_request" ]; then
+ commit_id=$(echo "${{ github.event.pull_request.head.sha }}")
+ elif [ "$GITHUB_EVENT_NAME" = "push" ]; then
+ commit_id=$GITHUB_SHA
+ fi
+ commit_msg=$(git show -s --format=%s | cut -c1-70)
+ python3 benchmark/llama.py "${{ github.head_ref || github.ref_name }}" "$commit_id" "$commit_msg"
+ env:
+ HF_TOKEN: ${{ secrets.HF_HUB_READ_TOKEN }}
+ # Enable this to see debug logs
+ # HF_HUB_VERBOSITY: debug
+ # TRANSFORMERS_VERBOSITY: debug
+ PGHOST: ${{ secrets.TRANSFORMERS_BENCHMARKS_PGHOST }}
+ PGUSER: transformers_benchmarks
+ PGPASSWORD: ${{ secrets.TRANSFORMERS_BENCHMARKS_PGPASSWORD }}
diff --git a/.github/workflows/build-docker-images.yml b/.github/workflows/build-docker-images.yml
index df772db773e2..c21faf2d7479 100644
--- a/.github/workflows/build-docker-images.yml
+++ b/.github/workflows/build-docker-images.yml
@@ -20,7 +20,8 @@ concurrency:
jobs:
latest-docker:
name: "Latest PyTorch + TensorFlow [dev]"
- runs-on: [intel-cpu, 8-cpu, ci]
+ runs-on:
+ group: aws-general-8-plus
steps:
-
name: Set up Docker Buildx
@@ -68,7 +69,8 @@ jobs:
latest-torch-deepspeed-docker:
name: "Latest PyTorch + DeepSpeed"
- runs-on: [intel-cpu, 8-cpu, ci]
+ runs-on:
+ group: aws-general-8-plus
steps:
-
name: Set up Docker Buildx
@@ -104,7 +106,8 @@ jobs:
# Can't build 2 images in a single job `latest-torch-deepspeed-docker` (for `nvcr.io/nvidia`)
latest-torch-deepspeed-docker-for-push-ci-daily-build:
name: "Latest PyTorch + DeepSpeed (Push CI - Daily Build)"
- runs-on: [intel-cpu, 8-cpu, ci]
+ runs-on:
+ group: aws-general-8-plus
steps:
-
name: Set up Docker Buildx
@@ -145,7 +148,8 @@ jobs:
name: "Doc builder"
# Push CI doesn't need this image
if: inputs.image_postfix != '-push-ci'
- runs-on: [intel-cpu, 8-cpu, ci]
+ runs-on:
+ group: aws-general-8-plus
steps:
-
name: Set up Docker Buildx
@@ -180,7 +184,8 @@ jobs:
name: "Latest PyTorch [dev]"
# Push CI doesn't need this image
if: inputs.image_postfix != '-push-ci'
- runs-on: [intel-cpu, 8-cpu, ci]
+ runs-on:
+ group: aws-general-8-plus
steps:
-
name: Set up Docker Buildx
@@ -215,7 +220,8 @@ jobs:
latest-pytorch-amd:
name: "Latest PyTorch (AMD) [dev]"
- runs-on: [intel-cpu, 8-cpu, ci]
+ runs-on:
+ group: aws-general-8-plus
steps:
-
name: Set up Docker Buildx
@@ -265,7 +271,8 @@ jobs:
name: "Latest TensorFlow [dev]"
# Push CI doesn't need this image
if: inputs.image_postfix != '-push-ci'
- runs-on: [intel-cpu, 8-cpu, ci]
+ runs-on:
+ group: aws-general-8-plus
steps:
-
name: Set up Docker Buildx
@@ -300,7 +307,8 @@ jobs:
latest-pytorch-deepspeed-amd:
name: "PyTorch + DeepSpeed (AMD) [dev]"
- runs-on: [intel-cpu, 8-cpu, ci]
+ runs-on:
+ group: aws-general-8-plus
steps:
-
name: Set up Docker Buildx
@@ -350,7 +358,8 @@ jobs:
name: "Latest Pytorch + Quantization [dev]"
# Push CI doesn't need this image
if: inputs.image_postfix != '-push-ci'
- runs-on: [intel-cpu, 8-cpu, ci]
+ runs-on:
+ group: aws-general-8-plus
steps:
-
name: Set up Docker Buildx
diff --git a/.github/workflows/build-nightly-ci-docker-images.yml b/.github/workflows/build-nightly-ci-docker-images.yml
index 0b1b7df5f8a2..4b00a6d3fae3 100644
--- a/.github/workflows/build-nightly-ci-docker-images.yml
+++ b/.github/workflows/build-nightly-ci-docker-images.yml
@@ -13,7 +13,8 @@ concurrency:
jobs:
latest-with-torch-nightly-docker:
name: "Nightly PyTorch + Stable TensorFlow"
- runs-on: [intel-cpu, 8-cpu, ci]
+ runs-on:
+ group: aws-general-8-plus
steps:
-
name: Set up Docker Buildx
@@ -40,7 +41,8 @@ jobs:
nightly-torch-deepspeed-docker:
name: "Nightly PyTorch + DeepSpeed"
- runs-on: [intel-cpu, 8-cpu, ci]
+ runs-on:
+ group: aws-general-8-plus
steps:
-
name: Set up Docker Buildx
@@ -62,4 +64,4 @@ jobs:
build-args: |
REF=main
push: true
- tags: huggingface/transformers-pytorch-deepspeed-nightly-gpu
\ No newline at end of file
+ tags: huggingface/transformers-pytorch-deepspeed-nightly-gpu
diff --git a/.github/workflows/build-past-ci-docker-images.yml b/.github/workflows/build-past-ci-docker-images.yml
index 6ee60b8a6b60..c4f0b78986ca 100644
--- a/.github/workflows/build-past-ci-docker-images.yml
+++ b/.github/workflows/build-past-ci-docker-images.yml
@@ -16,7 +16,8 @@ jobs:
fail-fast: false
matrix:
version: ["1.13", "1.12", "1.11"]
- runs-on: [intel-cpu, 8-cpu, ci]
+ runs-on:
+ group: aws-general-8-plus
steps:
-
name: Set up Docker Buildx
@@ -60,7 +61,8 @@ jobs:
fail-fast: false
matrix:
version: ["2.11", "2.10", "2.9", "2.8", "2.7", "2.6", "2.5"]
- runs-on: [intel-cpu, 8-cpu, ci]
+ runs-on:
+ group: aws-general-8-plus
steps:
-
name: Set up Docker Buildx
diff --git a/.github/workflows/build_documentation.yml b/.github/workflows/build_documentation.yml
index e3e3b5f2df37..c55638ded149 100644
--- a/.github/workflows/build_documentation.yml
+++ b/.github/workflows/build_documentation.yml
@@ -1,6 +1,7 @@
name: Build documentation
on:
+ workflow_dispatch:
push:
branches:
- main
@@ -15,7 +16,7 @@ jobs:
commit_sha: ${{ github.sha }}
package: transformers
notebook_folder: transformers_doc
- languages: de en es fr hi it ko pt tr zh ja te
+ languages: ar de en es fr hi it ko pt tr zh ja te
custom_container: huggingface/transformers-doc-builder
secrets:
token: ${{ secrets.HUGGINGFACE_PUSH }}
diff --git a/.github/workflows/build_pr_documentation.yml b/.github/workflows/build_pr_documentation.yml
index c8d073ea3468..f698f860b2f9 100644
--- a/.github/workflows/build_pr_documentation.yml
+++ b/.github/workflows/build_pr_documentation.yml
@@ -14,5 +14,5 @@ jobs:
commit_sha: ${{ github.event.pull_request.head.sha }}
pr_number: ${{ github.event.number }}
package: transformers
- languages: de en es fr hi it ko pt tr zh ja te
+ languages: ar de en es fr hi it ko pt tr zh ja te
custom_container: huggingface/transformers-doc-builder
diff --git a/.github/workflows/check_failed_model_tests.yml b/.github/workflows/check_failed_model_tests.yml
new file mode 100644
index 000000000000..f3ea8646900a
--- /dev/null
+++ b/.github/workflows/check_failed_model_tests.yml
@@ -0,0 +1,129 @@
+name: Process failed tests
+
+on:
+ workflow_call:
+ inputs:
+ docker:
+ required: true
+ type: string
+ start_sha:
+ required: true
+ type: string
+
+
+env:
+ HF_HOME: /mnt/cache
+ TRANSFORMERS_IS_CI: yes
+ OMP_NUM_THREADS: 8
+ MKL_NUM_THREADS: 8
+ RUN_SLOW: yes
+ # For gated repositories, we still need to agree to share information on the Hub repo. page in order to get access.
+ # This token is created under the bot `hf-transformers-bot`.
+ HF_HUB_READ_TOKEN: ${{ secrets.HF_HUB_READ_TOKEN }}
+ SIGOPT_API_TOKEN: ${{ secrets.SIGOPT_API_TOKEN }}
+ TF_FORCE_GPU_ALLOW_GROWTH: true
+ RUN_PT_TF_CROSS_TESTS: 1
+ CUDA_VISIBLE_DEVICES: 0,1
+
+
+jobs:
+ run_models_gpu:
+ name: " "
+ runs-on:
+ group: aws-g4dn-2xlarge-cache
+ container:
+ image: ${{ inputs.docker }}
+ options: --gpus all --shm-size "16gb" --ipc host -v /mnt/cache/.cache/huggingface:/mnt/cache/
+ steps:
+ - uses: actions/download-artifact@v4
+ with:
+ name: ci_results_run_models_gpu
+ path: /transformers/ci_results_run_models_gpu
+
+ - name: Update clone
+ working-directory: /transformers
+ run: git fetch && git checkout ${{ github.sha }}
+
+ - name: Get target commit
+ working-directory: /transformers/utils
+ run: |
+ echo "END_SHA=$(TOKEN=${{ secrets.ACCESS_REPO_INFO_TOKEN }} python3 -c 'import os; from get_previous_daily_ci import get_last_daily_ci_run_commit; commit=get_last_daily_ci_run_commit(token=os.environ["TOKEN"]); print(commit)')" >> $GITHUB_ENV
+
+ - name: Checkout to `start_sha`
+ working-directory: /transformers
+ run: git fetch && git checkout ${{ inputs.start_sha }}
+
+ - name: Reinstall transformers in edit mode (remove the one installed during docker image build)
+ working-directory: /transformers
+ run: python3 -m pip uninstall -y transformers && python3 -m pip install -e .
+
+ - name: NVIDIA-SMI
+ run: |
+ nvidia-smi
+
+ - name: Environment
+ working-directory: /transformers
+ run: |
+ python3 utils/print_env.py
+
+ - name: Show installed libraries and their versions
+ working-directory: /transformers
+ run: pip freeze
+
+ - name: Check failed tests
+ working-directory: /transformers
+ run: python3 utils/check_bad_commit.py --start_commit ${{ inputs.start_sha }} --end_commit ${{ env.END_SHA }} --file ci_results_run_models_gpu/new_model_failures.json --output_file new_model_failures_with_bad_commit.json
+
+ - name: Show results
+ working-directory: /transformers
+ run: |
+ ls -l new_model_failures_with_bad_commit.json
+ cat new_model_failures_with_bad_commit.json
+
+ - name: Checkout back
+ working-directory: /transformers
+ run: |
+ git checkout ${{ inputs.start_sha }}
+
+ - name: Process report
+ shell: bash
+ working-directory: /transformers
+ env:
+ TRANSFORMERS_CI_RESULTS_UPLOAD_TOKEN: ${{ secrets.TRANSFORMERS_CI_RESULTS_UPLOAD_TOKEN }}
+ run: |
+ python3 utils/process_bad_commit_report.py
+
+ - name: Process report
+ shell: bash
+ working-directory: /transformers
+ env:
+ TRANSFORMERS_CI_RESULTS_UPLOAD_TOKEN: ${{ secrets.TRANSFORMERS_CI_RESULTS_UPLOAD_TOKEN }}
+ run: |
+ {
+ echo 'REPORT_TEXT<> "$GITHUB_ENV"
+
+ - name: Send processed report
+ if: ${{ !endsWith(env.REPORT_TEXT, '{}') }}
+ uses: slackapi/slack-github-action@6c661ce58804a1a20f6dc5fbee7f0381b469e001
+ with:
+ # Slack channel id, channel name, or user id to post message.
+ # See also: https://api.slack.com/methods/chat.postMessage#channels
+ channel-id: '#transformers-ci-feedback-tests'
+ # For posting a rich message using Block Kit
+ payload: |
+ {
+ "blocks": [
+ {
+ "type": "section",
+ "text": {
+ "type": "mrkdwn",
+ "text": "${{ env.REPORT_TEXT }}"
+ }
+ }
+ ]
+ }
+ env:
+ SLACK_BOT_TOKEN: ${{ secrets.SLACK_CIFEEDBACK_BOT_TOKEN }}
diff --git a/.github/workflows/doctest_job.yml b/.github/workflows/doctest_job.yml
index 98be985292e3..eb62b797b8eb 100644
--- a/.github/workflows/doctest_job.yml
+++ b/.github/workflows/doctest_job.yml
@@ -27,7 +27,8 @@ jobs:
fail-fast: false
matrix:
split_keys: ${{ fromJson(inputs.split_keys) }}
- runs-on: [single-gpu, nvidia-gpu, t4, ci]
+ runs-on:
+ group: aws-g4dn-2xlarge-cache
container:
image: huggingface/transformers-all-latest-gpu
options: --gpus 0 --shm-size "16gb" --ipc host -v /mnt/cache/.cache/huggingface:/mnt/cache/
diff --git a/.github/workflows/doctests.yml b/.github/workflows/doctests.yml
index 4b515c741a3a..472b07684ed1 100644
--- a/.github/workflows/doctests.yml
+++ b/.github/workflows/doctests.yml
@@ -14,7 +14,8 @@ env:
jobs:
setup:
name: Setup
- runs-on: [single-gpu, nvidia-gpu, t4, ci]
+ runs-on:
+ group: aws-g4dn-2xlarge-cache
container:
image: huggingface/transformers-all-latest-gpu
options: --gpus 0 --shm-size "16gb" --ipc host -v /mnt/cache/.cache/huggingface:/mnt/cache/
@@ -85,4 +86,4 @@ jobs:
uses: actions/upload-artifact@v4
with:
name: doc_test_results
- path: doc_test_results
\ No newline at end of file
+ path: doc_test_results
diff --git a/.github/workflows/model_jobs.yml b/.github/workflows/model_jobs.yml
index 454d03f42456..001e2c531d9b 100644
--- a/.github/workflows/model_jobs.yml
+++ b/.github/workflows/model_jobs.yml
@@ -41,7 +41,8 @@ jobs:
fail-fast: false
matrix:
folders: ${{ fromJson(inputs.folder_slices)[inputs.slice_id] }}
- runs-on: ['${{ inputs.machine_type }}', nvidia-gpu, t4, '${{ inputs.runner }}']
+ runs-on:
+ group: '${{ inputs.machine_type }}'
container:
image: ${{ inputs.docker }}
options: --gpus all --shm-size "16gb" --ipc host -v /mnt/cache/.cache/huggingface:/mnt/cache/
@@ -97,25 +98,42 @@ jobs:
working-directory: /transformers
run: pip freeze
+ - name: Set `machine_type` for report and artifact names
+ working-directory: /transformers
+ shell: bash
+ run: |
+ echo "${{ inputs.machine_type }}"
+
+ if [ "${{ inputs.machine_type }}" = "aws-g4dn-2xlarge-cache" ]; then
+ machine_type=single-gpu
+ elif [ "${{ inputs.machine_type }}" = "aws-g4dn-12xlarge-cache" ]; then
+ machine_type=multi-gpu
+ else
+ machine_type=${{ inputs.machine_type }}
+ fi
+
+ echo "$machine_type"
+ echo "machine_type=$machine_type" >> $GITHUB_ENV
+
- name: Run all tests on GPU
working-directory: /transformers
- run: python3 -m pytest -rsfE -v --make-reports=${{ inputs.machine_type }}_run_models_gpu_${{ matrix.folders }}_test_reports tests/${{ matrix.folders }}
+ run: python3 -m pytest -rsfE -v --make-reports=${{ env.machine_type }}_run_models_gpu_${{ matrix.folders }}_test_reports tests/${{ matrix.folders }}
- name: Failure short reports
if: ${{ failure() }}
continue-on-error: true
- run: cat /transformers/reports/${{ inputs.machine_type }}_run_models_gpu_${{ matrix.folders }}_test_reports/failures_short.txt
+ run: cat /transformers/reports/${{ env.machine_type }}_run_models_gpu_${{ matrix.folders }}_test_reports/failures_short.txt
- name: Run test
shell: bash
run: |
- mkdir -p /transformers/reports/${{ inputs.machine_type }}_run_models_gpu_${{ matrix.folders }}_test_reports
- echo "hello" > /transformers/reports/${{ inputs.machine_type }}_run_models_gpu_${{ matrix.folders }}_test_reports/hello.txt
- echo "${{ inputs.machine_type }}_run_models_gpu_${{ matrix.folders }}_test_reports"
+ mkdir -p /transformers/reports/${{ env.machine_type }}_run_models_gpu_${{ matrix.folders }}_test_reports
+ echo "hello" > /transformers/reports/${{ env.machine_type }}_run_models_gpu_${{ matrix.folders }}_test_reports/hello.txt
+ echo "${{ env.machine_type }}_run_models_gpu_${{ matrix.folders }}_test_reports"
- - name: "Test suite reports artifacts: ${{ inputs.machine_type }}_run_models_gpu_${{ env.matrix_folders }}_test_reports"
+ - name: "Test suite reports artifacts: ${{ env.machine_type }}_run_models_gpu_${{ env.matrix_folders }}_test_reports"
if: ${{ always() }}
uses: actions/upload-artifact@v4
with:
- name: ${{ inputs.machine_type }}_run_models_gpu_${{ env.matrix_folders }}_test_reports
- path: /transformers/reports/${{ inputs.machine_type }}_run_models_gpu_${{ matrix.folders }}_test_reports
+ name: ${{ env.machine_type }}_run_models_gpu_${{ env.matrix_folders }}_test_reports
+ path: /transformers/reports/${{ env.machine_type }}_run_models_gpu_${{ matrix.folders }}_test_reports
diff --git a/.github/workflows/model_jobs_amd.yml b/.github/workflows/model_jobs_amd.yml
new file mode 100644
index 000000000000..a7e6c7b1ccd5
--- /dev/null
+++ b/.github/workflows/model_jobs_amd.yml
@@ -0,0 +1,129 @@
+name: model jobs
+
+on:
+ workflow_call:
+ inputs:
+ folder_slices:
+ required: true
+ type: string
+ machine_type:
+ required: true
+ type: string
+ slice_id:
+ required: true
+ type: number
+ runner:
+ required: true
+ type: string
+ docker:
+ required: true
+ type: string
+
+env:
+ HF_HOME: /mnt/cache
+ TRANSFORMERS_IS_CI: yes
+ OMP_NUM_THREADS: 8
+ MKL_NUM_THREADS: 8
+ RUN_SLOW: yes
+ # For gated repositories, we still need to agree to share information on the Hub repo. page in order to get access.
+ # This token is created under the bot `hf-transformers-bot`.
+ HF_HUB_READ_TOKEN: ${{ secrets.HF_HUB_READ_TOKEN }}
+ SIGOPT_API_TOKEN: ${{ secrets.SIGOPT_API_TOKEN }}
+ TF_FORCE_GPU_ALLOW_GROWTH: true
+ RUN_PT_TF_CROSS_TESTS: 1
+ CUDA_VISIBLE_DEVICES: 0,1
+
+jobs:
+ run_models_gpu:
+ name: " "
+ strategy:
+ max-parallel: 1 # For now, not to parallelize. Can change later if it works well.
+ fail-fast: false
+ matrix:
+ folders: ${{ fromJson(inputs.folder_slices)[inputs.slice_id] }}
+ runs-on: ['${{ inputs.machine_type }}', self-hosted, amd-gpu, '${{ inputs.runner }}']
+ container:
+ image: ${{ inputs.docker }}
+ options: --device /dev/kfd --device /dev/dri --env ROCR_VISIBLE_DEVICES --shm-size "16gb" --ipc host -v /mnt/cache/.cache/huggingface:/mnt/cache/
+ steps:
+ - name: Echo input and matrix info
+ shell: bash
+ run: |
+ echo "${{ inputs.folder_slices }}"
+ echo "${{ matrix.folders }}"
+ echo "${{ toJson(fromJson(inputs.folder_slices)[inputs.slice_id]) }}"
+
+ - name: Echo folder ${{ matrix.folders }}
+ shell: bash
+ # For folders like `models/bert`, set an env. var. (`matrix_folders`) to `models_bert`, which will be used to
+ # set the artifact folder names (because the character `/` is not allowed).
+ run: |
+ echo "${{ matrix.folders }}"
+ matrix_folders=${{ matrix.folders }}
+ matrix_folders=${matrix_folders/'models/'/'models_'}
+ echo "$matrix_folders"
+ echo "matrix_folders=$matrix_folders" >> $GITHUB_ENV
+
+ - name: Update clone
+ working-directory: /transformers
+ run: git fetch && git checkout ${{ github.sha }}
+
+ - name: Reinstall transformers in edit mode (remove the one installed during docker image build)
+ working-directory: /transformers
+ run: python3 -m pip uninstall -y transformers && python3 -m pip install -e .
+
+ - name: Update / Install some packages (for Past CI)
+ if: ${{ contains(inputs.docker, '-past-') }}
+ working-directory: /transformers
+ run: |
+ python3 -m pip install -U datasets
+
+ - name: Update / Install some packages (for Past CI)
+ if: ${{ contains(inputs.docker, '-past-') && contains(inputs.docker, '-pytorch-') }}
+ working-directory: /transformers
+ run: |
+ python3 -m pip install --no-cache-dir git+https://github.com/huggingface/accelerate@main#egg=accelerate
+
+ - name: ROCM-SMI
+ run: |
+ rocm-smi
+
+ - name: ROCM-INFO
+ run: |
+ rocminfo | grep "Agent" -A 14
+
+ - name: Show ROCR environment
+ run: |
+ echo "ROCR: $ROCR_VISIBLE_DEVICES"
+
+ - name: Environment
+ working-directory: /transformers
+ run: |
+ python3 utils/print_env.py
+
+ - name: Show installed libraries and their versions
+ working-directory: /transformers
+ run: pip freeze
+
+ - name: Run all tests on GPU
+ working-directory: /transformers
+ run: python3 -m pytest -rsfE -v --make-reports=${{ inputs.machine_type }}_run_models_gpu_${{ matrix.folders }}_test_reports tests/${{ matrix.folders }} -m "not not_device_test"
+
+ - name: Failure short reports
+ if: ${{ failure() }}
+ continue-on-error: true
+ run: cat /transformers/reports/${{ inputs.machine_type }}_run_models_gpu_${{ matrix.folders }}_test_reports/failures_short.txt
+
+ - name: Run test
+ shell: bash
+ run: |
+ mkdir -p /transformers/reports/${{ inputs.machine_type }}_run_models_gpu_${{ matrix.folders }}_test_reports
+ echo "hello" > /transformers/reports/${{ inputs.machine_type }}_run_models_gpu_${{ matrix.folders }}_test_reports/hello.txt
+ echo "${{ inputs.machine_type }}_run_models_gpu_${{ matrix.folders }}_test_reports"
+
+ - name: "Test suite reports artifacts: ${{ inputs.machine_type }}_run_models_gpu_${{ env.matrix_folders }}_test_reports"
+ if: ${{ always() }}
+ uses: actions/upload-artifact@v4
+ with:
+ name: ${{ inputs.machine_type }}_run_models_gpu_${{ env.matrix_folders }}_test_reports
+ path: /transformers/reports/${{ inputs.machine_type }}_run_models_gpu_${{ matrix.folders }}_test_reports
diff --git a/.github/workflows/push-important-models.yml b/.github/workflows/push-important-models.yml
index 41bcd43fcc6f..1887af0f4c5b 100644
--- a/.github/workflows/push-important-models.yml
+++ b/.github/workflows/push-important-models.yml
@@ -52,7 +52,8 @@ jobs:
test_modified_files:
needs: get_modified_models
name: Slow & FA2 tests
- runs-on: [single-gpu, nvidia-gpu, a10, ci]
+ runs-on:
+ group: aws-g5-4xlarge-cache
container:
image: huggingface/transformers-all-latest-gpu
options: --gpus all --privileged --ipc host -v /mnt/cache/.cache/huggingface:/mnt/cache/
diff --git a/.github/workflows/self-pr-slow-ci.yml b/.github/workflows/self-pr-slow-ci.yml
index 2287b5e3f315..43fcecd8def2 100644
--- a/.github/workflows/self-pr-slow-ci.yml
+++ b/.github/workflows/self-pr-slow-ci.yml
@@ -65,8 +65,9 @@ jobs:
fail-fast: false
matrix:
folders: ${{ fromJson(needs.find_models_to_run.outputs.models) }}
- machine_type: [single-gpu, multi-gpu]
- runs-on: ['${{ matrix.machine_type }}', nvidia-gpu, t4, ci]
+ machine_type: [aws-g4dn-2xlarge-cache, aws-g4dn-12xlarge-cache]
+ runs-on:
+ group: '${{ matrix.machine_type }}'
container:
image: huggingface/transformers-all-latest-gpu
options: --gpus all --shm-size "16gb" --ipc host -v /mnt/cache/.cache/huggingface:/mnt/cache/
@@ -93,12 +94,27 @@ jobs:
- name: Reinstall transformers in edit mode (remove the one installed during docker image build)
working-directory: /transformers
- run: python3 -m pip uninstall -y transformers && python3 -m pip install -e .
+ run: python3 -m pip uninstall -y transformers && python3 -m pip install -e . && python3 -m pip install --upgrade torch torchaudio torchvision
- name: NVIDIA-SMI
run: |
nvidia-smi
+ - name: Set `machine_type` for report and artifact names
+ working-directory: /transformers
+ shell: bash
+ run: |
+ echo "${{ matrix.machine_type }}"
+ if [ "${{ matrix.machine_type }}" = "aws-g4dn-2xlarge-cache" ]; then
+ machine_type=single-gpu
+ elif [ "${{ matrix.machine_type }}" = "aws-g4dn-12xlarge-cache" ]; then
+ machine_type=multi-gpu
+ else
+ machine_type=${{ matrix.machine_type }}
+ fi
+ echo "$machine_type"
+ echo "machine_type=$machine_type" >> $GITHUB_ENV
+
- name: Environment
working-directory: /transformers
run: |
@@ -113,23 +129,23 @@ jobs:
run: |
export CUDA_VISIBLE_DEVICES="$(python3 utils/set_cuda_devices_for_ci.py --test_folder ${{ matrix.folders }})"
echo $CUDA_VISIBLE_DEVICES
- python3 -m pytest -v -rsfE --make-reports=${{ matrix.machine_type }}_run_models_gpu_${{ matrix.folders }}_test_reports tests/${{ matrix.folders }}
+ python3 -m pytest -v -rsfE --make-reports=${{ env.machine_type }}_run_models_gpu_${{ matrix.folders }}_test_reports tests/${{ matrix.folders }}
- name: Failure short reports
if: ${{ failure() }}
continue-on-error: true
- run: cat /transformers/reports/${{ matrix.machine_type }}_run_models_gpu_${{ matrix.folders }}_test_reports/failures_short.txt
+ run: cat /transformers/reports/${{ env.machine_type }}_run_models_gpu_${{ matrix.folders }}_test_reports/failures_short.txt
- name: Make sure report directory exists
shell: bash
run: |
- mkdir -p /transformers/reports/${{ matrix.machine_type }}_run_models_gpu_${{ matrix.folders }}_test_reports
- echo "hello" > /transformers/reports/${{ matrix.machine_type }}_run_models_gpu_${{ matrix.folders }}_test_reports/hello.txt
- echo "${{ matrix.machine_type }}_run_models_gpu_${{ matrix.folders }}_test_reports"
+ mkdir -p /transformers/reports/${{ env.machine_type }}_run_models_gpu_${{ matrix.folders }}_test_reports
+ echo "hello" > /transformers/reports/${{ env.machine_type }}_run_models_gpu_${{ matrix.folders }}_test_reports/hello.txt
+ echo "${{ env.machine_type }}_run_models_gpu_${{ matrix.folders }}_test_reports"
- - name: "Test suite reports artifacts: ${{ matrix.machine_type }}_run_models_gpu_${{ env.matrix_folders }}_test_reports"
+ - name: "Test suite reports artifacts: ${{ env.machine_type }}_run_models_gpu_${{ env.matrix_folders }}_test_reports"
if: ${{ always() }}
uses: actions/upload-artifact@v4
with:
- name: ${{ matrix.machine_type }}_run_models_gpu_${{ env.matrix_folders }}_test_reports
- path: /transformers/reports/${{ matrix.machine_type }}_run_models_gpu_${{ matrix.folders }}_test_reports
+ name: ${{ env.machine_type }}_run_models_gpu_${{ env.matrix_folders }}_test_reports
+ path: /transformers/reports/${{ env.machine_type }}_run_models_gpu_${{ matrix.folders }}_test_reports
diff --git a/.github/workflows/self-push.yml b/.github/workflows/self-push.yml
index b328f65d34a5..940495c28753 100644
--- a/.github/workflows/self-push.yml
+++ b/.github/workflows/self-push.yml
@@ -32,8 +32,9 @@ jobs:
name: Setup
strategy:
matrix:
- machine_type: [single-gpu, multi-gpu]
- runs-on: ['${{ matrix.machine_type }}', nvidia-gpu, t4, push-ci]
+ machine_type: [aws-g4dn-2xlarge-cache, aws-g4dn-12xlarge-cache]
+ runs-on:
+ group: '${{ matrix.machine_type }}'
container:
image: huggingface/transformers-all-latest-gpu-push-ci
options: --gpus 0 --shm-size "16gb" --ipc host -v /mnt/cache/.cache/huggingface:/mnt/cache/
@@ -131,8 +132,9 @@ jobs:
fail-fast: false
matrix:
folders: ${{ fromJson(needs.setup.outputs.matrix) }}
- machine_type: [single-gpu]
- runs-on: ['${{ matrix.machine_type }}', nvidia-gpu, t4, push-ci]
+ machine_type: [aws-g4dn-2xlarge-cache]
+ runs-on:
+ group: '${{ matrix.machine_type }}'
container:
image: huggingface/transformers-all-latest-gpu-push-ci
options: --gpus 0 --shm-size "16gb" --ipc host -v /mnt/cache/.cache/huggingface:/mnt/cache/
@@ -162,6 +164,23 @@ jobs:
echo "env.CI_BRANCH = ${{ env.CI_BRANCH }}"
echo "env.CI_SHA = ${{ env.CI_SHA }}"
+ - name: Set `machine_type` for report and artifact names
+ working-directory: /transformers
+ shell: bash
+ run: |
+ echo "${{ matrix.machine_type }}"
+
+ if [ "${{ matrix.machine_type }}" = "aws-g4dn-2xlarge-cache" ]; then
+ machine_type=single-gpu
+ elif [ "${{ matrix.machine_type }}" = "aws-g4dn-12xlarge-cache" ]; then
+ machine_type=multi-gpu
+ else
+ machine_type=${{ matrix.machine_type }}
+ fi
+
+ echo "$machine_type"
+ echo "machine_type=$machine_type" >> $GITHUB_ENV
+
- name: Update clone using environment variables
working-directory: /transformers
run: |
@@ -203,19 +222,19 @@ jobs:
- name: Run all non-slow selected tests on GPU
working-directory: /transformers
run: |
- python3 -m pytest -n 2 --dist=loadfile -v --make-reports=${{ matrix.machine_type }}_tests_gpu_${{ matrix.folders }} ${{ fromJson(needs.setup.outputs.test_map)[matrix.folders] }}
+ python3 -m pytest -n 2 --dist=loadfile -v --make-reports=${{ env.machine_type }}_tests_gpu_${{ matrix.folders }} ${{ fromJson(needs.setup.outputs.test_map)[matrix.folders] }}
- name: Failure short reports
if: ${{ failure() }}
continue-on-error: true
- run: cat /transformers/reports/${{ matrix.machine_type }}_tests_gpu_${{ matrix.folders }}/failures_short.txt
+ run: cat /transformers/reports/${{ env.machine_type }}_tests_gpu_${{ matrix.folders }}/failures_short.txt
- - name: "Test suite reports artifacts: ${{ matrix.machine_type }}_run_all_tests_gpu_${{ env.matrix_folders }}_test_reports"
+ - name: "Test suite reports artifacts: ${{ env.machine_type }}_run_all_tests_gpu_${{ env.matrix_folders }}_test_reports"
if: ${{ always() }}
uses: actions/upload-artifact@v4
with:
- name: ${{ matrix.machine_type }}_run_all_tests_gpu_${{ env.matrix_folders }}_test_reports
- path: /transformers/reports/${{ matrix.machine_type }}_tests_gpu_${{ matrix.folders }}
+ name: ${{ env.machine_type }}_run_all_tests_gpu_${{ env.matrix_folders }}_test_reports
+ path: /transformers/reports/${{ env.machine_type }}_tests_gpu_${{ matrix.folders }}
run_tests_multi_gpu:
name: Model tests
@@ -226,8 +245,9 @@ jobs:
fail-fast: false
matrix:
folders: ${{ fromJson(needs.setup.outputs.matrix) }}
- machine_type: [multi-gpu]
- runs-on: ['${{ matrix.machine_type }}', nvidia-gpu, t4, push-ci]
+ machine_type: [aws-g4dn-12xlarge-cache]
+ runs-on:
+ group: '${{ matrix.machine_type }}'
container:
image: huggingface/transformers-all-latest-gpu-push-ci
options: --gpus all --shm-size "16gb" --ipc host -v /mnt/cache/.cache/huggingface:/mnt/cache/
@@ -257,6 +277,23 @@ jobs:
echo "env.CI_BRANCH = ${{ env.CI_BRANCH }}"
echo "env.CI_SHA = ${{ env.CI_SHA }}"
+ - name: Set `machine_type` for report and artifact names
+ working-directory: /transformers
+ shell: bash
+ run: |
+ echo "${{ matrix.machine_type }}"
+
+ if [ "${{ matrix.machine_type }}" = "aws-g4dn-2xlarge-cache" ]; then
+ machine_type=single-gpu
+ elif [ "${{ matrix.machine_type }}" = "aws-g4dn-12xlarge-cache" ]; then
+ machine_type=multi-gpu
+ else
+ machine_type=${{ matrix.machine_type }}
+ fi
+
+ echo "$machine_type"
+ echo "machine_type=$machine_type" >> $GITHUB_ENV
+
- name: Update clone using environment variables
working-directory: /transformers
run: |
@@ -300,19 +337,19 @@ jobs:
MKL_SERVICE_FORCE_INTEL: 1
working-directory: /transformers
run: |
- python3 -m pytest -n 2 --dist=loadfile -v --make-reports=${{ matrix.machine_type }}_tests_gpu_${{ matrix.folders }} ${{ fromJson(needs.setup.outputs.test_map)[matrix.folders] }}
+ python3 -m pytest -n 2 --dist=loadfile -v --make-reports=${{ env.machine_type }}_tests_gpu_${{ matrix.folders }} ${{ fromJson(needs.setup.outputs.test_map)[matrix.folders] }}
- name: Failure short reports
if: ${{ failure() }}
continue-on-error: true
- run: cat /transformers/reports/${{ matrix.machine_type }}_tests_gpu_${{ matrix.folders }}/failures_short.txt
+ run: cat /transformers/reports/${{ env.machine_type }}_tests_gpu_${{ matrix.folders }}/failures_short.txt
- - name: "Test suite reports artifacts: ${{ matrix.machine_type }}_run_all_tests_gpu_${{ env.matrix_folders }}_test_reports"
+ - name: "Test suite reports artifacts: ${{ env.machine_type }}_run_all_tests_gpu_${{ env.matrix_folders }}_test_reports"
if: ${{ always() }}
uses: actions/upload-artifact@v4
with:
- name: ${{ matrix.machine_type }}_run_all_tests_gpu_${{ env.matrix_folders }}_test_reports
- path: /transformers/reports/${{ matrix.machine_type }}_tests_gpu_${{ matrix.folders }}
+ name: ${{ env.machine_type }}_run_all_tests_gpu_${{ env.matrix_folders }}_test_reports
+ path: /transformers/reports/${{ env.machine_type }}_tests_gpu_${{ matrix.folders }}
run_tests_torch_cuda_extensions_single_gpu:
name: Torch CUDA extension tests
@@ -321,8 +358,9 @@ jobs:
strategy:
fail-fast: false
matrix:
- machine_type: [single-gpu]
- runs-on: ['${{ matrix.machine_type }}', nvidia-gpu, t4, push-ci]
+ machine_type: [aws-g4dn-2xlarge-cache]
+ runs-on:
+ group: '${{ matrix.machine_type }}'
container:
image: huggingface/transformers-pytorch-deepspeed-latest-gpu-push-ci
options: --gpus 0 --shm-size "16gb" --ipc host -v /mnt/cache/.cache/huggingface:/mnt/cache/
@@ -352,6 +390,23 @@ jobs:
echo "env.CI_BRANCH = ${{ env.CI_BRANCH }}"
echo "env.CI_SHA = ${{ env.CI_SHA }}"
+ - name: Set `machine_type` for report and artifact names
+ working-directory: /workspace/transformers
+ shell: bash
+ run: |
+ echo "${{ matrix.machine_type }}"
+
+ if [ "${{ matrix.machine_type }}" = "aws-g4dn-2xlarge-cache" ]; then
+ machine_type=single-gpu
+ elif [ "${{ matrix.machine_type }}" = "aws-g4dn-12xlarge-cache" ]; then
+ machine_type=multi-gpu
+ else
+ machine_type=${{ matrix.machine_type }}
+ fi
+
+ echo "$machine_type"
+ echo "machine_type=$machine_type" >> $GITHUB_ENV
+
- name: Update clone using environment variables
working-directory: /workspace/transformers
run: |
@@ -392,19 +447,19 @@ jobs:
working-directory: /workspace/transformers
# TODO: Here we pass all tests in the 2 folders for simplicity. It's better to pass only the identified tests.
run: |
- python -m pytest -n 1 --dist=loadfile -v --make-reports=${{ matrix.machine_type }}_run_torch_cuda_extensions_gpu_test_reports tests/deepspeed tests/extended
+ python -m pytest -n 1 --dist=loadfile -v --make-reports=${{ env.machine_type }}_run_torch_cuda_extensions_gpu_test_reports tests/deepspeed tests/extended
- name: Failure short reports
if: ${{ failure() }}
continue-on-error: true
- run: cat /workspace/transformers/reports/${{ matrix.machine_type }}_run_torch_cuda_extensions_gpu_test_reports/failures_short.txt
+ run: cat /workspace/transformers/reports/${{ env.machine_type }}_run_torch_cuda_extensions_gpu_test_reports/failures_short.txt
- - name: "Test suite reports artifacts: ${{ matrix.machine_type }}_run_torch_cuda_extensions_gpu_test_reports"
+ - name: "Test suite reports artifacts: ${{ env.machine_type }}_run_torch_cuda_extensions_gpu_test_reports"
if: ${{ always() }}
uses: actions/upload-artifact@v4
with:
- name: ${{ matrix.machine_type }}_run_torch_cuda_extensions_gpu_test_reports
- path: /workspace/transformers/reports/${{ matrix.machine_type }}_run_torch_cuda_extensions_gpu_test_reports
+ name: ${{ env.machine_type }}_run_torch_cuda_extensions_gpu_test_reports
+ path: /workspace/transformers/reports/${{ env.machine_type }}_run_torch_cuda_extensions_gpu_test_reports
run_tests_torch_cuda_extensions_multi_gpu:
name: Torch CUDA extension tests
@@ -413,8 +468,9 @@ jobs:
strategy:
fail-fast: false
matrix:
- machine_type: [multi-gpu]
- runs-on: ['${{ matrix.machine_type }}', nvidia-gpu, t4, push-ci]
+ machine_type: [aws-g4dn-12xlarge-cache]
+ runs-on:
+ group: '${{ matrix.machine_type }}'
container:
image: huggingface/transformers-pytorch-deepspeed-latest-gpu-push-ci
options: --gpus all --shm-size "16gb" --ipc host -v /mnt/cache/.cache/huggingface:/mnt/cache/
@@ -444,6 +500,23 @@ jobs:
echo "env.CI_BRANCH = ${{ env.CI_BRANCH }}"
echo "env.CI_SHA = ${{ env.CI_SHA }}"
+ - name: Set `machine_type` for report and artifact names
+ working-directory: /workspace/transformers
+ shell: bash
+ run: |
+ echo "${{ matrix.machine_type }}"
+
+ if [ "${{ matrix.machine_type }}" = "aws-g4dn-2xlarge-cache" ]; then
+ machine_type=single-gpu
+ elif [ "${{ matrix.machine_type }}" = "aws-g4dn-12xlarge-cache" ]; then
+ machine_type=multi-gpu
+ else
+ machine_type=${{ matrix.machine_type }}
+ fi
+
+ echo "$machine_type"
+ echo "machine_type=$machine_type" >> $GITHUB_ENV
+
- name: Update clone using environment variables
working-directory: /workspace/transformers
run: |
@@ -484,19 +557,19 @@ jobs:
working-directory: /workspace/transformers
# TODO: Here we pass all tests in the 2 folders for simplicity. It's better to pass only the identified tests.
run: |
- python -m pytest -n 1 --dist=loadfile -v --make-reports=${{ matrix.machine_type }}_run_torch_cuda_extensions_gpu_test_reports tests/deepspeed tests/extended
+ python -m pytest -n 1 --dist=loadfile -v --make-reports=${{ env.machine_type }}_run_torch_cuda_extensions_gpu_test_reports tests/deepspeed tests/extended
- name: Failure short reports
if: ${{ failure() }}
continue-on-error: true
- run: cat /workspace/transformers/reports/${{ matrix.machine_type }}_run_torch_cuda_extensions_gpu_test_reports/failures_short.txt
+ run: cat /workspace/transformers/reports/${{ env.machine_type }}_run_torch_cuda_extensions_gpu_test_reports/failures_short.txt
- - name: "Test suite reports artifacts: ${{ matrix.machine_type }}_run_torch_cuda_extensions_gpu_test_reports"
+ - name: "Test suite reports artifacts: ${{ env.machine_type }}_run_torch_cuda_extensions_gpu_test_reports"
if: ${{ always() }}
uses: actions/upload-artifact@v4
with:
- name: ${{ matrix.machine_type }}_run_torch_cuda_extensions_gpu_test_reports
- path: /workspace/transformers/reports/${{ matrix.machine_type }}_run_torch_cuda_extensions_gpu_test_reports
+ name: ${{ env.machine_type }}_run_torch_cuda_extensions_gpu_test_reports
+ path: /workspace/transformers/reports/${{ env.machine_type }}_run_torch_cuda_extensions_gpu_test_reports
send_results:
name: Send results to webhook
diff --git a/.github/workflows/self-scheduled-amd-mi210-caller.yml b/.github/workflows/self-scheduled-amd-mi210-caller.yml
index 6abba6894aaf..1c79b38a314e 100644
--- a/.github/workflows/self-scheduled-amd-mi210-caller.yml
+++ b/.github/workflows/self-scheduled-amd-mi210-caller.yml
@@ -10,11 +10,46 @@ on:
- run_amd_scheduled_ci_caller*
jobs:
- run_amd_ci:
- name: AMD mi210
- if: (cancelled() != true) && ((github.event_name == 'workflow_run') || ((github.event_name == 'push') && startsWith(github.ref_name, 'run_amd_scheduled_ci_caller')))
+ model-ci:
+ name: Model CI
uses: ./.github/workflows/self-scheduled-amd.yml
with:
- gpu_flavor: mi210
+ job: run_models_gpu
slack_report_channel: "#transformers-ci-daily-amd"
+ runner: mi210
+ docker: huggingface/transformers-pytorch-amd-gpu
+ ci_event: Scheduled CI (AMD) - mi210
+ secrets: inherit
+
+ torch-pipeline:
+ name: Torch pipeline CI
+ uses: ./.github/workflows/self-scheduled-amd.yml
+ with:
+ job: run_pipelines_torch_gpu
+ slack_report_channel: "#transformers-ci-daily-amd"
+ runner: mi210
+ docker: huggingface/transformers-pytorch-amd-gpu
+ ci_event: Scheduled CI (AMD) - mi210
+ secrets: inherit
+
+ example-ci:
+ name: Example CI
+ uses: ./.github/workflows/self-scheduled-amd.yml
+ with:
+ job: run_examples_gpu
+ slack_report_channel: "#transformers-ci-daily-amd"
+ runner: mi210
+ docker: huggingface/transformers-pytorch-amd-gpu
+ ci_event: Scheduled CI (AMD) - mi210
+ secrets: inherit
+
+ deepspeed-ci:
+ name: DeepSpeed CI
+ uses: ./.github/workflows/self-scheduled-amd.yml
+ with:
+ job: run_torch_cuda_extensions_gpu
+ slack_report_channel: "#transformers-ci-daily-amd"
+ runner: mi210
+ docker: huggingface/transformers-pytorch-deepspeed-amd-gpu
+ ci_event: Scheduled CI (AMD) - mi210
secrets: inherit
diff --git a/.github/workflows/self-scheduled-amd-mi250-caller.yml b/.github/workflows/self-scheduled-amd-mi250-caller.yml
index 36365d4a67f1..fd1513057163 100644
--- a/.github/workflows/self-scheduled-amd-mi250-caller.yml
+++ b/.github/workflows/self-scheduled-amd-mi250-caller.yml
@@ -10,11 +10,46 @@ on:
- run_amd_scheduled_ci_caller*
jobs:
- run_amd_ci:
- name: AMD mi250
- if: (cancelled() != true) && ((github.event_name == 'workflow_run') || ((github.event_name == 'push') && startsWith(github.ref_name, 'run_amd_scheduled_ci_caller')))
+ model-ci:
+ name: Model CI
uses: ./.github/workflows/self-scheduled-amd.yml
with:
- gpu_flavor: mi250
+ job: run_models_gpu
slack_report_channel: "#transformers-ci-daily-amd"
+ runner: mi250
+ docker: huggingface/transformers-pytorch-amd-gpu
+ ci_event: Scheduled CI (AMD) - mi250
+ secrets: inherit
+
+ torch-pipeline:
+ name: Torch pipeline CI
+ uses: ./.github/workflows/self-scheduled-amd.yml
+ with:
+ job: run_pipelines_torch_gpu
+ slack_report_channel: "#transformers-ci-daily-amd"
+ runner: mi250
+ docker: huggingface/transformers-pytorch-amd-gpu
+ ci_event: Scheduled CI (AMD) - mi250
+ secrets: inherit
+
+ example-ci:
+ name: Example CI
+ uses: ./.github/workflows/self-scheduled-amd.yml
+ with:
+ job: run_examples_gpu
+ slack_report_channel: "#transformers-ci-daily-amd"
+ runner: mi250
+ docker: huggingface/transformers-pytorch-amd-gpu
+ ci_event: Scheduled CI (AMD) - mi250
+ secrets: inherit
+
+ deepspeed-ci:
+ name: DeepSpeed CI
+ uses: ./.github/workflows/self-scheduled-amd.yml
+ with:
+ job: run_torch_cuda_extensions_gpu
+ slack_report_channel: "#transformers-ci-daily-amd"
+ runner: mi250
+ docker: huggingface/transformers-pytorch-deepspeed-amd-gpu
+ ci_event: Scheduled CI (AMD) - mi250
secrets: inherit
diff --git a/.github/workflows/self-scheduled-amd-mi300-caller.yml b/.github/workflows/self-scheduled-amd-mi300-caller.yml
deleted file mode 100644
index a9e7b934c34b..000000000000
--- a/.github/workflows/self-scheduled-amd-mi300-caller.yml
+++ /dev/null
@@ -1,21 +0,0 @@
-name: Self-hosted runner (AMD mi300 scheduled CI caller)
-
-on:
- workflow_run:
- workflows: ["Self-hosted runner (AMD scheduled CI caller)"]
- branches: ["main"]
- types: [completed]
- push:
- branches:
- - run_amd_scheduled_ci_caller*
-
-jobs:
- run_amd_ci:
- name: AMD mi300
- needs: build-docker-containers
- if: (cancelled() != true) && ((github.event_name == 'workflow_run') || ((github.event_name == 'push') && (startsWith(github.ref_name, 'run_amd_push_ci_caller') || startsWith(github.ref_name, 'mi300-ci'))))
- uses: ./.github/workflows/self-scheduled-amd.yml
- with:
- gpu_flavor: mi300
- slack_report_channel: "#transformers-ci-daily-amd"
- secrets: inherit
diff --git a/.github/workflows/self-scheduled-amd.yml b/.github/workflows/self-scheduled-amd.yml
index f3b17bfbffb0..47f92cd6a2b0 100644
--- a/.github/workflows/self-scheduled-amd.yml
+++ b/.github/workflows/self-scheduled-amd.yml
@@ -3,10 +3,23 @@ name: Self-hosted runner (scheduled-amd)
# Note: For the AMD CI, we rely on a caller workflow and on the workflow_call event to trigger the
# CI in order to run it on both MI210 and MI250, without having to use matrix here which pushes
# us towards the limit of allowed jobs on GitHub Actions.
+
on:
workflow_call:
inputs:
- gpu_flavor:
+ job:
+ required: true
+ type: string
+ slack_report_channel:
+ required: true
+ type: string
+ runner:
+ required: true
+ type: string
+ docker:
+ required: true
+ type: string
+ ci_event:
required: true
type: string
@@ -18,7 +31,7 @@ env:
RUN_SLOW: yes
HF_HUB_READ_TOKEN: ${{ secrets.HF_HUB_READ_TOKEN }}
SIGOPT_API_TOKEN: ${{ secrets.SIGOPT_API_TOKEN }}
-
+ NUM_SLICES: 2
# Important note: each job (run_tests_single_gpu, run_tests_multi_gpu, run_examples_gpu, run_pipelines_torch_gpu) requires all the previous jobs before running.
# This is done so that we avoid parallelizing the scheduled tests, to leave available
@@ -42,7 +55,7 @@ jobs:
strategy:
matrix:
machine_type: [single-gpu, multi-gpu]
- runs-on: [self-hosted, amd-gpu, '${{ matrix.machine_type }}', '${{ inputs.gpu_flavor }}']
+ runs-on: ['${{ matrix.machine_type }}', self-hosted, amd-gpu, '${{ inputs.runner }}']
container:
image: huggingface/transformers-pytorch-amd-gpu
options: --device /dev/kfd --device /dev/dri --env ROCR_VISIBLE_DEVICES --shm-size "16gb" --ipc host -v /mnt/cache/.cache/huggingface:/mnt/cache/
@@ -50,25 +63,29 @@ jobs:
- name: ROCM-SMI
run: |
rocm-smi
+
- name: ROCM-INFO
run: |
rocminfo | grep "Agent" -A 14
+
- name: Show ROCR environment
run: |
echo "ROCR: $ROCR_VISIBLE_DEVICES"
setup:
+ if: contains(fromJSON('["run_models_gpu"]'), inputs.job)
name: Setup
needs: check_runners
strategy:
matrix:
machine_type: [single-gpu, multi-gpu]
- runs-on: [self-hosted, amd-gpu, '${{ matrix.machine_type }}', '${{ inputs.gpu_flavor }}']
+ runs-on: ['${{ matrix.machine_type }}', self-hosted, amd-gpu, '${{ inputs.runner }}']
container:
image: huggingface/transformers-pytorch-amd-gpu
options: --device /dev/kfd --device /dev/dri --env ROCR_VISIBLE_DEVICES --shm-size "16gb" --ipc host -v /mnt/cache/.cache/huggingface:/mnt/cache/
outputs:
- matrix: ${{ steps.set-matrix.outputs.matrix }}
+ folder_slices: ${{ steps.set-matrix.outputs.folder_slices }}
+ slice_ids: ${{ steps.set-matrix.outputs.slice_ids }}
steps:
- name: Update clone
working-directory: /transformers
@@ -90,7 +107,8 @@ jobs:
name: Identify models to test
working-directory: /transformers/tests
run: |
- echo "matrix=$(python3 -c 'import os; tests = os.getcwd(); model_tests = os.listdir(os.path.join(tests, "models")); d1 = sorted(list(filter(os.path.isdir, os.listdir(tests)))); d2 = sorted(list(filter(os.path.isdir, [f"models/{x}" for x in model_tests]))); d1.remove("models"); d = d2 + d1; print(d)')" >> $GITHUB_OUTPUT
+ echo "folder_slices=$(python3 ../utils/split_model_tests.py --num_splits ${{ env.NUM_SLICES }})" >> $GITHUB_OUTPUT
+ echo "slice_ids=$(python3 -c 'd = list(range(${{ env.NUM_SLICES }})); print(d)')" >> $GITHUB_OUTPUT
- name: ROCM-SMI
run: |
@@ -99,6 +117,7 @@ jobs:
- name: ROCM-INFO
run: |
rocminfo | grep "Agent" -A 14
+
- name: Show ROCR environment
run: |
echo "ROCR: $ROCR_VISIBLE_DEVICES"
@@ -108,99 +127,38 @@ jobs:
run: |
python3 utils/print_env.py
- run_models_gpu_single_gpu:
+ run_models_gpu:
+ if: ${{ inputs.job == 'run_models_gpu' }}
name: Single GPU tests
+ needs: setup
strategy:
max-parallel: 1 # For now, not to parallelize. Can change later if it works well.
fail-fast: false
matrix:
- folders: ${{ fromJson(needs.setup.outputs.matrix) }}
- machine_type: [single-gpu]
- runs-on: [self-hosted, amd-gpu, '${{ matrix.machine_type }}', '${{ inputs.gpu_flavor }}']
- container:
- image: huggingface/transformers-pytorch-amd-gpu
- options: --device /dev/kfd --device /dev/dri --env ROCR_VISIBLE_DEVICES --shm-size "16gb" --ipc host -v /mnt/cache/.cache/huggingface:/mnt/cache/
- needs: setup
- steps:
- - name: Echo folder ${{ matrix.folders }}
- shell: bash
- # For folders like `models/bert`, set an env. var. (`matrix_folders`) to `models_bert`, which will be used to
- # set the artifact folder names (because the character `/` is not allowed).
- run: |
- echo "${{ matrix.folders }}"
- matrix_folders=${{ matrix.folders }}
- matrix_folders=${matrix_folders/'models/'/'models_'}
- echo "$matrix_folders"
- echo "matrix_folders=$matrix_folders" >> $GITHUB_ENV
-
- - name: Update clone
- working-directory: /transformers
- run: git fetch && git checkout ${{ github.sha }}
-
- - name: Reinstall transformers in edit mode (remove the one installed during docker image build)
- working-directory: /transformers
- run: python3 -m pip uninstall -y transformers && python3 -m pip install -e .
-
- - name: ROCM-SMI
- run: |
- rocm-smi
- - name: ROCM-INFO
- run: |
- rocminfo | grep "Agent" -A 14
- - name: Show ROCR environment
- run: |
- echo "ROCR: $ROCR_VISIBLE_DEVICES"
-
- - name: Environment
- working-directory: /transformers
- run: |
- python3 utils/print_env.py
-
- - name: Show installed libraries and their versions
- working-directory: /transformers
- run: pip freeze
-
- - name: Run all tests on GPU
- working-directory: /transformers
- run: python3 -m pytest -v --make-reports=${{ matrix.machine_type }}_run_models_gpu_${{ matrix.folders }}_test_reports tests/${{ matrix.folders }} -m "not not_device_test"
-
- - name: Failure short reports
- if: ${{ failure() }}
- continue-on-error: true
- run: cat /transformers/reports/${{ matrix.machine_type }}_run_models_gpu_${{ matrix.folders }}_test_reports/failures_short.txt
-
- - name: "Test suite reports artifacts: ${{ matrix.machine_type }}_run_models_gpu_${{ env.matrix_folders }}_test_reports"
- if: ${{ always() }}
- uses: actions/upload-artifact@v4
- with:
- name: ${{ matrix.machine_type }}_run_models_gpu_${{ env.matrix_folders }}_test_reports
- path: /transformers/reports/${{ matrix.machine_type }}_run_models_gpu_${{ matrix.folders }}_test_reports
+ machine_type: [single-gpu, multi-gpu]
+ slice_id: ${{ fromJSON(needs.setup.outputs.slice_ids) }}
+ uses: ./.github/workflows/model_jobs_amd.yml
+ with:
+ folder_slices: ${{ needs.setup.outputs.folder_slices }}
+ machine_type: ${{ matrix.machine_type }}
+ slice_id: ${{ matrix.slice_id }}
+ runner: ${{ inputs.runner }}
+ docker: ${{ inputs.docker }}
+ secrets: inherit
- run_models_gpu_multi_gpu:
- name: Multi GPU tests
+ run_pipelines_torch_gpu:
+ if: ${{ inputs.job == 'run_pipelines_torch_gpu' }}
+ name: PyTorch pipelines
+ needs: check_runners
strategy:
- max-parallel: 1
fail-fast: false
matrix:
- folders: ${{ fromJson(needs.setup.outputs.matrix) }}
- machine_type: [multi-gpu]
- runs-on: [self-hosted, amd-gpu, '${{ matrix.machine_type }}', '${{ inputs.gpu_flavor }}']
+ machine_type: [single-gpu, multi-gpu]
+ runs-on: ['${{ matrix.machine_type }}', self-hosted, amd-gpu, '${{ inputs.runner }}']
container:
- image: huggingface/transformers-pytorch-amd-gpu
+ image: ${{ inputs.docker }}
options: --device /dev/kfd --device /dev/dri --env ROCR_VISIBLE_DEVICES --shm-size "16gb" --ipc host -v /mnt/cache/.cache/huggingface:/mnt/cache/
- needs: setup
steps:
- - name: Echo folder ${{ matrix.folders }}
- shell: bash
- # For folders like `models/bert`, set an env. var. (`matrix_folders`) to `models_bert`, which will be used to
- # set the artifact folder names (because the character `/` is not allowed).
- run: |
- echo "${{ matrix.folders }}"
- matrix_folders=${{ matrix.folders }}
- matrix_folders=${matrix_folders/'models/'/'models_'}
- echo "$matrix_folders"
- echo "matrix_folders=$matrix_folders" >> $GITHUB_ENV
-
- name: Update clone
working-directory: /transformers
run: git fetch && git checkout ${{ github.sha }}
@@ -212,9 +170,11 @@ jobs:
- name: ROCM-SMI
run: |
rocm-smi
+
- name: ROCM-INFO
run: |
rocminfo | grep "Agent" -A 14
+
- name: Show ROCR environment
run: |
echo "ROCR: $ROCR_VISIBLE_DEVICES"
@@ -228,33 +188,35 @@ jobs:
working-directory: /transformers
run: pip freeze
- - name: Run all tests on GPU
+ - name: Run all pipeline tests on GPU
working-directory: /transformers
- run: python3 -m pytest -v --make-reports=${{ matrix.machine_type }}_run_models_gpu_${{ matrix.folders }}_test_reports tests/${{ matrix.folders }} -m "not not_device_test"
+ run: |
+ python3 -m pytest -n 1 -v --dist=loadfile --make-reports=${{ matrix.machine_type }}_run_pipelines_torch_gpu_test_reports tests/pipelines -m "not not_device_test"
- name: Failure short reports
if: ${{ failure() }}
continue-on-error: true
- run: cat /transformers/reports/${{ matrix.machine_type }}_run_models_gpu_${{ matrix.folders }}_test_reports/failures_short.txt
+ run: cat /transformers/reports/${{ matrix.machine_type }}_run_pipelines_torch_gpu_test_reports/failures_short.txt
- - name: "Test suite reports artifacts: ${{ matrix.machine_type }}_run_models_gpu_${{ env.matrix_folders }}_test_reports"
+ - name: "Test suite reports artifacts: ${{ matrix.machine_type }}_run_pipelines_torch_gpu_test_reports"
if: ${{ always() }}
uses: actions/upload-artifact@v4
with:
- name: ${{ matrix.machine_type }}_run_models_gpu_${{ env.matrix_folders }}_test_reports
- path: /transformers/reports/${{ matrix.machine_type }}_run_models_gpu_${{ matrix.folders }}_test_reports
+ name: ${{ matrix.machine_type }}_run_pipelines_torch_gpu_test_reports
+ path: /transformers/reports/${{ matrix.machine_type }}_run_pipelines_torch_gpu_test_reports
run_examples_gpu:
- name: Examples tests
+ if: ${{ inputs.job == 'run_examples_gpu' }}
+ name: Examples directory
+ needs: check_runners
strategy:
fail-fast: false
matrix:
machine_type: [single-gpu]
- runs-on: [self-hosted, amd-gpu, '${{ matrix.machine_type }}', '${{ inputs.gpu_flavor }}']
+ runs-on: ['${{ matrix.machine_type }}', self-hosted, amd-gpu, '${{ inputs.runner }}']
container:
- image: huggingface/transformers-pytorch-amd-gpu
+ image: ${{ inputs.docker }}
options: --device /dev/kfd --device /dev/dri --env ROCR_VISIBLE_DEVICES --shm-size "16gb" --ipc host -v /mnt/cache/.cache/huggingface:/mnt/cache/
- needs: setup
steps:
- name: Update clone
working-directory: /transformers
@@ -267,9 +229,11 @@ jobs:
- name: ROCM-SMI
run: |
rocm-smi
+
- name: ROCM-INFO
run: |
rocminfo | grep "Agent" -A 14
+
- name: Show ROCR environment
run: |
echo "ROCR: $ROCR_VISIBLE_DEVICES"
@@ -301,73 +265,17 @@ jobs:
name: ${{ matrix.machine_type }}_run_examples_gpu_test_reports
path: /transformers/reports/${{ matrix.machine_type }}_run_examples_gpu_test_reports
- run_pipelines_torch_gpu:
- name: PyTorch pipelines tests
- strategy:
- fail-fast: false
- matrix:
- machine_type: [single-gpu, multi-gpu]
- runs-on: [self-hosted, amd-gpu, '${{ matrix.machine_type }}', '${{ inputs.gpu_flavor }}']
- container:
- image: huggingface/transformers-pytorch-amd-gpu
- options: --device /dev/kfd --device /dev/dri --env ROCR_VISIBLE_DEVICES --shm-size "16gb" --ipc host -v /mnt/cache/.cache/huggingface:/mnt/cache/
- needs: setup
- steps:
- - name: Update clone
- working-directory: /transformers
- run: git fetch && git checkout ${{ github.sha }}
-
- - name: Reinstall transformers in edit mode (remove the one installed during docker image build)
- working-directory: /transformers
- run: python3 -m pip uninstall -y transformers && python3 -m pip install -e .
-
- - name: ROCM-SMI
- run: |
- rocm-smi
- - name: ROCM-INFO
- run: |
- rocminfo | grep "Agent" -A 14
- - name: Show ROCR environment
- run: |
- echo "ROCR: $ROCR_VISIBLE_DEVICES"
-
- - name: Environment
- working-directory: /transformers
- run: |
- python3 utils/print_env.py
-
- - name: Show installed libraries and their versions
- working-directory: /transformers
- run: pip freeze
-
- - name: Run all pipeline tests on GPU
- working-directory: /transformers
- run: |
- python3 -m pytest -n 1 -v --dist=loadfile --make-reports=${{ matrix.machine_type }}_run_pipelines_torch_gpu_test_reports tests/pipelines -m "not not_device_test"
-
- - name: Failure short reports
- if: ${{ failure() }}
- continue-on-error: true
- run: cat /transformers/reports/${{ matrix.machine_type }}_run_pipelines_torch_gpu_test_reports/failures_short.txt
-
- - name: "Test suite reports artifacts: ${{ matrix.machine_type }}_run_pipelines_torch_gpu_test_reports"
- if: ${{ always() }}
- uses: actions/upload-artifact@v4
- with:
- name: ${{ matrix.machine_type }}_run_pipelines_torch_gpu_test_reports
- path: /transformers/reports/${{ matrix.machine_type }}_run_pipelines_torch_gpu_test_reports
-
run_torch_cuda_extensions_gpu:
+ if: ${{ inputs.job == 'run_torch_cuda_extensions_gpu' }}
name: Torch ROCm deepspeed tests
+ needs: check_runners
strategy:
fail-fast: false
matrix:
machine_type: [single-gpu, multi-gpu]
-
- runs-on: [self-hosted, amd-gpu, '${{ matrix.machine_type }}', '${{ inputs.gpu_flavor }}']
- needs: setup
+ runs-on: ['${{ matrix.machine_type }}', self-hosted, amd-gpu, '${{ inputs.runner }}']
container:
- image: huggingface/transformers-pytorch-deepspeed-amd-gpu
+ image: ${{ inputs.docker }}
options: --device /dev/kfd --device /dev/dri --env ROCR_VISIBLE_DEVICES --shm-size "16gb" --ipc host -v /mnt/cache/.cache/huggingface:/mnt/cache/
steps:
- name: Update clone
@@ -381,6 +289,7 @@ jobs:
- name: ROCM-SMI
run: |
rocm-smi
+
- name: ROCM-INFO
run: |
rocminfo | grep "Agent" -A 14
@@ -414,107 +323,27 @@ jobs:
name: ${{ matrix.machine_type }}_run_torch_cuda_extensions_gpu_test_reports
path: /transformers/reports/${{ matrix.machine_type }}_run_torch_cuda_extensions_gpu_test_reports
- run_extract_warnings:
- name: Extract warnings in CI artifacts
- runs-on: ubuntu-22.04
- if: always()
- needs: [
- check_runner_status,
- check_runners,
- setup,
- run_models_gpu_single_gpu,
- run_models_gpu_multi_gpu,
- run_examples_gpu,
- run_pipelines_torch_gpu,
- run_torch_cuda_extensions_gpu
- ]
- steps:
- - name: Checkout transformers
- uses: actions/checkout@v4
- with:
- fetch-depth: 2
-
- - name: Install transformers
- run: pip install transformers
-
- - name: Show installed libraries and their versions
- run: pip freeze
-
- - name: Create output directory
- run: mkdir warnings_in_ci
-
- - uses: actions/download-artifact@v4
- with:
- path: warnings_in_ci
-
- - name: Show artifacts
- run: echo "$(python3 -c 'import os; d = os.listdir(); print(d)')"
- working-directory: warnings_in_ci
-
- - name: Extract warnings in CI artifacts
- run: |
- python3 utils/extract_warnings.py --workflow_run_id ${{ github.run_id }} --output_dir warnings_in_ci --token ${{ secrets.ACCESS_REPO_INFO_TOKEN }} --from_gh
- echo "$(python3 -c 'import os; import json; fp = open("warnings_in_ci/selected_warnings.json"); d = json.load(fp); d = "\n".join(d) ;print(d)')"
-
- - name: Upload artifact
- if: ${{ always() }}
- uses: actions/upload-artifact@v4
- with:
- name: warnings_in_ci
- path: warnings_in_ci/selected_warnings.json
-
send_results:
- name: Send results to webhook
- runs-on: ubuntu-22.04
- if: always()
+ name: Slack Report
needs: [
check_runner_status,
check_runners,
setup,
- run_models_gpu_single_gpu,
- run_models_gpu_multi_gpu,
- run_examples_gpu,
+ run_models_gpu,
run_pipelines_torch_gpu,
- run_torch_cuda_extensions_gpu,
- run_extract_warnings
+ run_examples_gpu,
+ run_torch_cuda_extensions_gpu
]
- steps:
- - name: Preliminary job status
- shell: bash
- # For the meaning of these environment variables, see the job `Setup`
- run: |
- echo "Runner availability: ${{ needs.check_runner_status.result }}"
- echo "Runner status: ${{ needs.check_runners.result }}"
- echo "Setup status: ${{ needs.setup.result }}"
-
- - uses: actions/checkout@v4
- - uses: actions/download-artifact@v4
- - name: Send message to Slack
- env:
- CI_SLACK_BOT_TOKEN: ${{ secrets.CI_SLACK_BOT_TOKEN }}
- CI_SLACK_CHANNEL_ID_DAILY_AMD: ${{ secrets.CI_SLACK_CHANNEL_ID_DAILY_AMD }}
- CI_SLACK_CHANNEL_DUMMY_TESTS: ${{ secrets.CI_SLACK_CHANNEL_DUMMY_TESTS }}
- CI_SLACK_REPORT_CHANNEL_ID: ${{ secrets.CI_SLACK_CHANNEL_ID_DAILY_AMD }}
- ACCESS_REPO_INFO_TOKEN: ${{ secrets.ACCESS_REPO_INFO_TOKEN }}
- CI_EVENT: Scheduled CI (AMD) - ${{ inputs.gpu_flavor }}
- CI_SHA: ${{ github.sha }}
- CI_WORKFLOW_REF: ${{ github.workflow_ref }}
- RUNNER_STATUS: ${{ needs.check_runner_status.result }}
- RUNNER_ENV_STATUS: ${{ needs.check_runners.result }}
- SETUP_STATUS: ${{ needs.setup.result }}
- # We pass `needs.setup.outputs.matrix` as the argument. A processing in `notification_service.py` to change
- # `models/bert` to `models_bert` is required, as the artifact names use `_` instead of `/`.
- run: |
- sudo apt-get install -y curl
- pip install huggingface_hub
- pip install slack_sdk
- pip show slack_sdk
- python utils/notification_service.py "${{ needs.setup.outputs.matrix }}"
-
- # Upload complete failure tables, as they might be big and only truncated versions could be sent to Slack.
- - name: Failure table artifacts
- if: ${{ always() }}
- uses: actions/upload-artifact@v4
- with:
- name: test_failure_tables
- path: test_failure_tables
+ if: ${{ always() }}
+ uses: ./.github/workflows/slack-report.yml
+ with:
+ job: ${{ inputs.job }}
+ # This would be `skipped` if `setup` is skipped.
+ setup_status: ${{ needs.setup.result }}
+ slack_report_channel: ${{ inputs.slack_report_channel }}
+ # This would be an empty string if `setup` is skipped.
+ folder_slices: ${{ needs.setup.outputs.folder_slices }}
+ quantization_matrix: ${{ needs.setup.outputs.quantization_matrix }}
+ ci_event: ${{ inputs.ci_event }}
+
+ secrets: inherit
diff --git a/.github/workflows/self-scheduled-caller.yml b/.github/workflows/self-scheduled-caller.yml
index a4c5d2023485..75ea3bb24bc7 100644
--- a/.github/workflows/self-scheduled-caller.yml
+++ b/.github/workflows/self-scheduled-caller.yml
@@ -2,6 +2,9 @@ name: Self-hosted runner (scheduled)
on:
+ repository_dispatch:
+ schedule:
+ - cron: "17 2 * * *"
push:
branches:
- run_scheduled_ci*
diff --git a/.github/workflows/self-scheduled.yml b/.github/workflows/self-scheduled.yml
index b056759aa773..353fb59843e4 100644
--- a/.github/workflows/self-scheduled.yml
+++ b/.github/workflows/self-scheduled.yml
@@ -50,8 +50,9 @@ jobs:
name: Setup
strategy:
matrix:
- machine_type: [single-gpu, multi-gpu]
- runs-on: ['${{ matrix.machine_type }}', nvidia-gpu, t4, '${{ inputs.runner }}']
+ machine_type: [aws-g4dn-2xlarge-cache, aws-g4dn-12xlarge-cache]
+ runs-on:
+ group: '${{ matrix.machine_type }}'
container:
image: huggingface/transformers-all-latest-gpu
options: --gpus 0 --shm-size "16gb" --ipc host -v /mnt/cache/.cache/huggingface:/mnt/cache/
@@ -83,7 +84,7 @@ jobs:
run: |
echo "folder_slices=$(python3 ../utils/split_model_tests.py --num_splits ${{ env.NUM_SLICES }})" >> $GITHUB_OUTPUT
echo "slice_ids=$(python3 -c 'd = list(range(${{ env.NUM_SLICES }})); print(d)')" >> $GITHUB_OUTPUT
-
+
- id: set-matrix-quantization
if: ${{ inputs.job == 'run_quantization_torch_gpu' }}
name: Identify quantization method to test
@@ -102,7 +103,7 @@ jobs:
strategy:
fail-fast: false
matrix:
- machine_type: [single-gpu, multi-gpu]
+ machine_type: [aws-g4dn-2xlarge-cache, aws-g4dn-12xlarge-cache]
slice_id: ${{ fromJSON(needs.setup.outputs.slice_ids) }}
uses: ./.github/workflows/model_jobs.yml
with:
@@ -119,8 +120,9 @@ jobs:
strategy:
fail-fast: false
matrix:
- machine_type: [single-gpu, multi-gpu]
- runs-on: ['${{ matrix.machine_type }}', nvidia-gpu, t4, '${{ inputs.runner }}']
+ machine_type: [aws-g4dn-2xlarge-cache, aws-g4dn-12xlarge-cache]
+ runs-on:
+ group: '${{ matrix.machine_type }}'
container:
image: huggingface/transformers-pytorch-gpu
options: --gpus all --shm-size "16gb" --ipc host -v /mnt/cache/.cache/huggingface:/mnt/cache/
@@ -146,22 +148,39 @@ jobs:
working-directory: /transformers
run: pip freeze
+ - name: Set `machine_type` for report and artifact names
+ working-directory: /transformers
+ shell: bash
+ run: |
+ echo "${{ matrix.machine_type }}"
+
+ if [ "${{ matrix.machine_type }}" = "aws-g4dn-2xlarge-cache" ]; then
+ machine_type=single-gpu
+ elif [ "${{ matrix.machine_type }}" = "aws-g4dn-12xlarge-cache" ]; then
+ machine_type=multi-gpu
+ else
+ machine_type=${{ matrix.machine_type }}
+ fi
+
+ echo "$machine_type"
+ echo "machine_type=$machine_type" >> $GITHUB_ENV
+
- name: Run all pipeline tests on GPU
working-directory: /transformers
run: |
- python3 -m pytest -n 1 -v --dist=loadfile --make-reports=${{ matrix.machine_type }}_run_pipelines_torch_gpu_test_reports tests/pipelines
+ python3 -m pytest -n 1 -v --dist=loadfile --make-reports=${{ env.machine_type }}_run_pipelines_torch_gpu_test_reports tests/pipelines
- name: Failure short reports
if: ${{ failure() }}
continue-on-error: true
- run: cat /transformers/reports/${{ matrix.machine_type }}_run_pipelines_torch_gpu_test_reports/failures_short.txt
+ run: cat /transformers/reports/${{ env.machine_type }}_run_pipelines_torch_gpu_test_reports/failures_short.txt
- - name: "Test suite reports artifacts: ${{ matrix.machine_type }}_run_pipelines_torch_gpu_test_reports"
+ - name: "Test suite reports artifacts: ${{ env.machine_type }}_run_pipelines_torch_gpu_test_reports"
if: ${{ always() }}
uses: actions/upload-artifact@v4
with:
- name: ${{ matrix.machine_type }}_run_pipelines_torch_gpu_test_reports
- path: /transformers/reports/${{ matrix.machine_type }}_run_pipelines_torch_gpu_test_reports
+ name: ${{ env.machine_type }}_run_pipelines_torch_gpu_test_reports
+ path: /transformers/reports/${{ env.machine_type }}_run_pipelines_torch_gpu_test_reports
run_pipelines_tf_gpu:
if: ${{ inputs.job == 'run_pipelines_tf_gpu' }}
@@ -169,8 +188,9 @@ jobs:
strategy:
fail-fast: false
matrix:
- machine_type: [single-gpu, multi-gpu]
- runs-on: ['${{ matrix.machine_type }}', nvidia-gpu, t4, '${{ inputs.runner }}']
+ machine_type: [aws-g4dn-2xlarge-cache, aws-g4dn-12xlarge-cache]
+ runs-on:
+ group: '${{ matrix.machine_type }}'
container:
image: huggingface/transformers-tensorflow-gpu
options: --gpus all --shm-size "16gb" --ipc host -v /mnt/cache/.cache/huggingface:/mnt/cache/
@@ -197,22 +217,39 @@ jobs:
working-directory: /transformers
run: pip freeze
+ - name: Set `machine_type` for report and artifact names
+ working-directory: /transformers
+ shell: bash
+ run: |
+ echo "${{ matrix.machine_type }}"
+
+ if [ "${{ matrix.machine_type }}" = "aws-g4dn-2xlarge-cache" ]; then
+ machine_type=single-gpu
+ elif [ "${{ matrix.machine_type }}" = "aws-g4dn-12xlarge-cache" ]; then
+ machine_type=multi-gpu
+ else
+ machine_type=${{ matrix.machine_type }}
+ fi
+
+ echo "$machine_type"
+ echo "machine_type=$machine_type" >> $GITHUB_ENV
+
- name: Run all pipeline tests on GPU
working-directory: /transformers
run: |
- python3 -m pytest -n 1 -v --dist=loadfile --make-reports=${{ matrix.machine_type }}_run_pipelines_tf_gpu_test_reports tests/pipelines
+ python3 -m pytest -n 1 -v --dist=loadfile --make-reports=${{ env.machine_type }}_run_pipelines_tf_gpu_test_reports tests/pipelines
- name: Failure short reports
if: ${{ always() }}
run: |
- cat /transformers/reports/${{ matrix.machine_type }}_run_pipelines_tf_gpu_test_reports/failures_short.txt
+ cat /transformers/reports/${{ env.machine_type }}_run_pipelines_tf_gpu_test_reports/failures_short.txt
- - name: "Test suite reports artifacts: ${{ matrix.machine_type }}_run_pipelines_tf_gpu_test_reports"
+ - name: "Test suite reports artifacts: ${{ env.machine_type }}_run_pipelines_tf_gpu_test_reports"
if: ${{ always() }}
uses: actions/upload-artifact@v4
with:
- name: ${{ matrix.machine_type }}_run_pipelines_tf_gpu_test_reports
- path: /transformers/reports/${{ matrix.machine_type }}_run_pipelines_tf_gpu_test_reports
+ name: ${{ env.machine_type }}_run_pipelines_tf_gpu_test_reports
+ path: /transformers/reports/${{ env.machine_type }}_run_pipelines_tf_gpu_test_reports
run_examples_gpu:
if: ${{ inputs.job == 'run_examples_gpu' }}
@@ -220,8 +257,9 @@ jobs:
strategy:
fail-fast: false
matrix:
- machine_type: [single-gpu]
- runs-on: ['${{ matrix.machine_type }}', nvidia-gpu, t4, '${{ inputs.runner }}']
+ machine_type: [aws-g4dn-2xlarge-cache]
+ runs-on:
+ group: '${{ matrix.machine_type }}'
container:
image: huggingface/transformers-all-latest-gpu
options: --gpus 0 --shm-size "16gb" --ipc host -v /mnt/cache/.cache/huggingface:/mnt/cache/
@@ -247,23 +285,40 @@ jobs:
working-directory: /transformers
run: pip freeze
+ - name: Set `machine_type` for report and artifact names
+ working-directory: /transformers
+ shell: bash
+ run: |
+ echo "${{ matrix.machine_type }}"
+
+ if [ "${{ matrix.machine_type }}" = "aws-g4dn-2xlarge-cache" ]; then
+ machine_type=single-gpu
+ elif [ "${{ matrix.machine_type }}" = "aws-g4dn-12xlarge-cache" ]; then
+ machine_type=multi-gpu
+ else
+ machine_type=${{ matrix.machine_type }}
+ fi
+
+ echo "$machine_type"
+ echo "machine_type=$machine_type" >> $GITHUB_ENV
+
- name: Run examples tests on GPU
working-directory: /transformers
run: |
pip install -r examples/pytorch/_tests_requirements.txt
- python3 -m pytest -v --make-reports=${{ matrix.machine_type }}_run_examples_gpu_test_reports examples/pytorch
+ python3 -m pytest -v --make-reports=${{ env.machine_type }}_run_examples_gpu_test_reports examples/pytorch
- name: Failure short reports
if: ${{ failure() }}
continue-on-error: true
- run: cat /transformers/reports/${{ matrix.machine_type }}_run_examples_gpu_test_reports/failures_short.txt
+ run: cat /transformers/reports/${{ env.machine_type }}_run_examples_gpu_test_reports/failures_short.txt
- - name: "Test suite reports artifacts: ${{ matrix.machine_type }}_run_examples_gpu_test_reports"
+ - name: "Test suite reports artifacts: ${{ env.machine_type }}_run_examples_gpu_test_reports"
if: ${{ always() }}
uses: actions/upload-artifact@v4
with:
- name: ${{ matrix.machine_type }}_run_examples_gpu_test_reports
- path: /transformers/reports/${{ matrix.machine_type }}_run_examples_gpu_test_reports
+ name: ${{ env.machine_type }}_run_examples_gpu_test_reports
+ path: /transformers/reports/${{ env.machine_type }}_run_examples_gpu_test_reports
run_torch_cuda_extensions_gpu:
if: ${{ inputs.job == 'run_torch_cuda_extensions_gpu' }}
@@ -271,8 +326,9 @@ jobs:
strategy:
fail-fast: false
matrix:
- machine_type: [single-gpu, multi-gpu]
- runs-on: ['${{ matrix.machine_type }}', nvidia-gpu, t4, '${{ inputs.runner }}']
+ machine_type: [aws-g4dn-2xlarge-cache, aws-g4dn-12xlarge-cache]
+ runs-on:
+ group: '${{ matrix.machine_type }}'
container:
image: ${{ inputs.docker }}
options: --gpus all --shm-size "16gb" --ipc host -v /mnt/cache/.cache/huggingface:/mnt/cache/
@@ -326,22 +382,39 @@ jobs:
working-directory: ${{ inputs.working-directory-prefix }}/transformers
run: pip freeze
+ - name: Set `machine_type` for report and artifact names
+ working-directory: /transformers
+ shell: bash
+ run: |
+ echo "${{ matrix.machine_type }}"
+
+ if [ "${{ matrix.machine_type }}" = "aws-g4dn-2xlarge-cache" ]; then
+ machine_type=single-gpu
+ elif [ "${{ matrix.machine_type }}" = "aws-g4dn-12xlarge-cache" ]; then
+ machine_type=multi-gpu
+ else
+ machine_type=${{ matrix.machine_type }}
+ fi
+
+ echo "$machine_type"
+ echo "machine_type=$machine_type" >> $GITHUB_ENV
+
- name: Run all tests on GPU
working-directory: ${{ inputs.working-directory-prefix }}/transformers
run: |
- python3 -m pytest -v --make-reports=${{ matrix.machine_type }}_run_torch_cuda_extensions_gpu_test_reports tests/deepspeed tests/extended
+ python3 -m pytest -v --make-reports=${{ env.machine_type }}_run_torch_cuda_extensions_gpu_test_reports tests/deepspeed tests/extended
- name: Failure short reports
if: ${{ failure() }}
continue-on-error: true
- run: cat ${{ inputs.working-directory-prefix }}/transformers/reports/${{ matrix.machine_type }}_run_torch_cuda_extensions_gpu_test_reports/failures_short.txt
+ run: cat ${{ inputs.working-directory-prefix }}/transformers/reports/${{ env.machine_type }}_run_torch_cuda_extensions_gpu_test_reports/failures_short.txt
- - name: "Test suite reports artifacts: ${{ matrix.machine_type }}_run_torch_cuda_extensions_gpu_test_reports"
+ - name: "Test suite reports artifacts: ${{ env.machine_type }}_run_torch_cuda_extensions_gpu_test_reports"
if: ${{ always() }}
uses: actions/upload-artifact@v4
with:
- name: ${{ matrix.machine_type }}_run_torch_cuda_extensions_gpu_test_reports
- path: ${{ inputs.working-directory-prefix }}/transformers/reports/${{ matrix.machine_type }}_run_torch_cuda_extensions_gpu_test_reports
+ name: ${{ env.machine_type }}_run_torch_cuda_extensions_gpu_test_reports
+ path: ${{ inputs.working-directory-prefix }}/transformers/reports/${{ env.machine_type }}_run_torch_cuda_extensions_gpu_test_reports
run_quantization_torch_gpu:
if: ${{ inputs.job == 'run_quantization_torch_gpu' }}
@@ -352,8 +425,9 @@ jobs:
fail-fast: false
matrix:
folders: ${{ fromJson(needs.setup.outputs.quantization_matrix) }}
- machine_type: [single-gpu, multi-gpu]
- runs-on: ['${{ matrix.machine_type }}', nvidia-gpu, t4, '${{ inputs.runner }}']
+ machine_type: [aws-g4dn-2xlarge-cache, aws-g4dn-12xlarge-cache]
+ runs-on:
+ group: '${{ matrix.machine_type }}'
container:
image: huggingface/transformers-quantization-latest-gpu
options: --gpus all --shm-size "16gb" --ipc host -v /mnt/cache/.cache/huggingface:/mnt/cache/
@@ -388,22 +462,39 @@ jobs:
working-directory: /transformers
run: pip freeze
+ - name: Set `machine_type` for report and artifact names
+ working-directory: /transformers
+ shell: bash
+ run: |
+ echo "${{ matrix.machine_type }}"
+
+ if [ "${{ matrix.machine_type }}" = "aws-g4dn-2xlarge-cache" ]; then
+ machine_type=single-gpu
+ elif [ "${{ matrix.machine_type }}" = "aws-g4dn-12xlarge-cache" ]; then
+ machine_type=multi-gpu
+ else
+ machine_type=${{ matrix.machine_type }}
+ fi
+
+ echo "$machine_type"
+ echo "machine_type=$machine_type" >> $GITHUB_ENV
+
- name: Run quantization tests on GPU
working-directory: /transformers
run: |
- python3 -m pytest -v --make-reports=${{ matrix.machine_type }}_run_quantization_torch_gpu_${{ matrix.folders }}_test_reports tests/${{ matrix.folders }}
+ python3 -m pytest -v --make-reports=${{ env.machine_type }}_run_quantization_torch_gpu_${{ matrix.folders }}_test_reports tests/${{ matrix.folders }}
- name: Failure short reports
if: ${{ failure() }}
continue-on-error: true
- run: cat /transformers/reports/${{ matrix.machine_type }}_run_quantization_torch_gpu_${{ matrix.folders }}_test_reports/failures_short.txt
+ run: cat /transformers/reports/${{ env.machine_type }}_run_quantization_torch_gpu_${{ matrix.folders }}_test_reports/failures_short.txt
- - name: "Test suite reports artifacts: ${{ matrix.machine_type }}_run_quantization_torch_gpu_${{ env.matrix_folders }}_test_reports"
+ - name: "Test suite reports artifacts: ${{ env.machine_type }}_run_quantization_torch_gpu_${{ env.matrix_folders }}_test_reports"
if: ${{ always() }}
uses: actions/upload-artifact@v4
with:
- name: ${{ matrix.machine_type }}_run_quantization_torch_gpu_${{ env.matrix_folders }}_test_reports
- path: /transformers/reports/${{ matrix.machine_type }}_run_quantization_torch_gpu_${{ matrix.folders }}_test_reports
+ name: ${{ env.machine_type }}_run_quantization_torch_gpu_${{ env.matrix_folders }}_test_reports
+ path: /transformers/reports/${{ env.machine_type }}_run_quantization_torch_gpu_${{ matrix.folders }}_test_reports
run_extract_warnings:
# Let's only do this for the job `run_models_gpu` to simplify the (already complex) logic.
@@ -471,3 +562,13 @@ jobs:
ci_event: ${{ inputs.ci_event }}
secrets: inherit
+
+ check_new_model_failures:
+ if: ${{ always() && inputs.ci_event == 'Daily CI' && inputs.job == 'run_models_gpu' && needs.send_results.result == 'success' }}
+ name: Check new model failures
+ needs: send_results
+ uses: ./.github/workflows/check_failed_model_tests.yml
+ with:
+ docker: ${{ inputs.docker }}
+ start_sha: ${{ github.sha }}
+ secrets: inherit
\ No newline at end of file
diff --git a/.github/workflows/ssh-runner.yml b/.github/workflows/ssh-runner.yml
index 26f888776005..02b022698b0c 100644
--- a/.github/workflows/ssh-runner.yml
+++ b/.github/workflows/ssh-runner.yml
@@ -26,9 +26,38 @@ env:
RUN_PT_TF_CROSS_TESTS: 1
jobs:
+ get_runner:
+ name: "Get runner to use"
+ runs-on: ubuntu-22.04
+ outputs:
+ RUNNER: ${{ steps.set_runner.outputs.RUNNER }}
+ steps:
+ - name: Get runner to use
+ shell: bash
+ run: |
+ if [[ "${{ github.event.inputs.num_gpus }}" == "single" && "${{ github.event.inputs.runner_type }}" == "t4" ]]; then
+ echo "RUNNER=aws-g4dn-2xlarge-cache" >> $GITHUB_ENV
+ elif [[ "${{ github.event.inputs.num_gpus }}" == "multi" && "${{ github.event.inputs.runner_type }}" == "t4" ]]; then
+ echo "RUNNER=aws-g4dn-12xlarge-cache" >> $GITHUB_ENV
+ elif [[ "${{ github.event.inputs.num_gpus }}" == "single" && "${{ github.event.inputs.runner_type }}" == "a10" ]]; then
+ echo "RUNNER=aws-g5-4xlarge-cache" >> $GITHUB_ENV
+ elif [[ "${{ github.event.inputs.num_gpus }}" == "multi" && "${{ github.event.inputs.runner_type }}" == "a10" ]]; then
+ echo "RUNNER=aws-g5-12xlarge-cache" >> $GITHUB_ENV
+ else
+ echo "RUNNER=" >> $GITHUB_ENV
+ fi
+
+ - name: Set runner to use
+ id: set_runner
+ run: |
+ echo ${{ env.RUNNER }}
+ echo "RUNNER=${{ env.RUNNER }}" >> $GITHUB_OUTPUT
+
ssh_runner:
name: "SSH"
- runs-on: ["${{ github.event.inputs.num_gpus }}-gpu", nvidia-gpu, "${{ github.event.inputs.runner_type }}", ci]
+ needs: get_runner
+ runs-on:
+ group: ${{ needs.get_runner.outputs.RUNNER }}
container:
image: ${{ github.event.inputs.docker_image }}
options: --gpus all --privileged --ipc host -v /mnt/cache/.cache/huggingface:/mnt/cache/
@@ -53,12 +82,33 @@ jobs:
- name: NVIDIA-SMI
run: |
nvidia-smi
-
+
+ - name: Store Slack infos
+ #because the SSH can be enabled dynamically if the workflow failed, so we need to store slack infos to be able to retrieve them during the waitforssh step
+ shell: bash
+ run: |
+ echo "${{ github.actor }}"
+ github_actor=${{ github.actor }}
+ github_actor=${github_actor/'-'/'_'}
+ echo "$github_actor"
+ echo "github_actor=$github_actor" >> $GITHUB_ENV
+
+ - name: Store Slack infos
+ #because the SSH can be enabled dynamically if the workflow failed, so we need to store slack infos to be able to retrieve them during the waitforssh step
+ shell: bash
+ run: |
+ echo "${{ env.github_actor }}"
+ if [ "${{ secrets[format('{0}_{1}', env.github_actor, 'SLACK_ID')] }}" != "" ]; then
+ echo "SLACKCHANNEL=${{ secrets[format('{0}_{1}', env.github_actor, 'SLACK_ID')] }}" >> $GITHUB_ENV
+ else
+ echo "SLACKCHANNEL=${{ secrets.SLACK_CIFEEDBACK_CHANNEL }}" >> $GITHUB_ENV
+ fi
+
- name: Tailscale # In order to be able to SSH when a test fails
uses: huggingface/tailscale-action@main
with:
authkey: ${{ secrets.TAILSCALE_SSH_AUTHKEY }}
- slackChannel: ${{ secrets.SLACK_CIFEEDBACK_CHANNEL }}
+ slackChannel: ${{ env.SLACKCHANNEL }}
slackToken: ${{ secrets.SLACK_CIFEEDBACK_BOT_TOKEN }}
waitForSSH: true
sshTimeout: 15m
diff --git a/.github/workflows/stale.yml b/.github/workflows/stale.yml
index d0dfeb8b4b71..65eaf755ab3a 100644
--- a/.github/workflows/stale.yml
+++ b/.github/workflows/stale.yml
@@ -9,6 +9,8 @@ jobs:
name: Close Stale Issues
if: github.repository == 'huggingface/transformers'
runs-on: ubuntu-22.04
+ permissions:
+ issues: write
env:
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
steps:
diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md
index 4d62a44ab250..9eeea9971540 100644
--- a/CONTRIBUTING.md
+++ b/CONTRIBUTING.md
@@ -132,7 +132,7 @@ You will need basic `git` proficiency to contribute to
manual. Type `git --help` in a shell and enjoy! If you prefer books, [Pro
Git](https://git-scm.com/book/en/v2) is a very good reference.
-You'll need **[Python 3.8](https://github.com/huggingface/transformers/blob/main/setup.py#L449)** or above to contribute to 🤗 Transformers. Follow the steps below to start contributing:
+You'll need **[Python 3.9](https://github.com/huggingface/transformers/blob/main/setup.py#L449)** or above to contribute to 🤗 Transformers. Follow the steps below to start contributing:
1. Fork the [repository](https://github.com/huggingface/transformers) by
clicking on the **[Fork](https://github.com/huggingface/transformers/fork)** button on the repository's page. This creates a copy of the code
diff --git a/Makefile b/Makefile
index cfa40b7bd6ee..710c555b74f6 100644
--- a/Makefile
+++ b/Makefile
@@ -36,6 +36,7 @@ autogenerate_code: deps_table_update
repo-consistency:
python utils/check_copies.py
+ python utils/check_modular_conversion.py
python utils/check_table.py
python utils/check_dummies.py
python utils/check_repo.py
@@ -53,7 +54,6 @@ quality:
@python -c "from transformers import *" || (echo '🚨 import failed, this means you introduced unprotected imports! 🚨'; exit 1)
ruff check $(check_dirs) setup.py conftest.py
ruff format --check $(check_dirs) setup.py conftest.py
- python utils/custom_init_isort.py --check_only
python utils/sort_auto_mappings.py --check_only
python utils/check_doc_toc.py
python utils/check_docstrings.py --check_all
@@ -62,7 +62,6 @@ quality:
# Format source code automatically and check is there are any problems left that need manual fixing
extra_style_checks:
- python utils/custom_init_isort.py
python utils/sort_auto_mappings.py
python utils/check_doc_toc.py --fix_and_overwrite
@@ -82,6 +81,7 @@ fixup: modified_only_fixup extra_style_checks autogenerate_code repo-consistency
fix-copies:
python utils/check_copies.py --fix_and_overwrite
+ python utils/check_modular_conversion.py --fix_and_overwrite
python utils/check_table.py --fix_and_overwrite
python utils/check_dummies.py --fix_and_overwrite
python utils/check_doctest_list.py --fix_and_overwrite
diff --git a/README.md b/README.md
index f80835534496..c748e6750662 100644
--- a/README.md
+++ b/README.md
@@ -48,7 +48,8 @@ limitations under the License.
Français |
Deutsch |
Tiếng Việt |
- العربية |
+ العربية |
+ اردو |
@@ -127,10 +128,10 @@ incredible projects built in the vicinity of transformers.
If you own or use a project that you believe should be part of the list, please open a PR to add it!
-## If you are looking for custom support from the Hugging Face team
+## Serious about AI in your organisation? Build faster with the Hugging Face Enterprise Hub.
-
-
+
+
## Quick tour
@@ -248,7 +249,7 @@ The model itself is a regular [Pytorch `nn.Module`](https://pytorch.org/docs/sta
### With pip
-This repository is tested on Python 3.8+, Flax 0.4.1+, PyTorch 1.11+, and TensorFlow 2.6+.
+This repository is tested on Python 3.9+, Flax 0.4.1+, PyTorch 1.11+, and TensorFlow 2.6+.
You should install 🤗 Transformers in a [virtual environment](https://docs.python.org/3/library/venv.html). If you're unfamiliar with Python virtual environments, check out the [user guide](https://packaging.python.org/guides/installing-using-pip-and-virtual-environments/).
diff --git a/SECURITY.md b/SECURITY.md
index fcb8b9b6f18f..431b17a85042 100644
--- a/SECURITY.md
+++ b/SECURITY.md
@@ -36,5 +36,4 @@ Please inspect the code of the tools before passing them to the Agent to protect
## Reporting a Vulnerability
-🤗 Please feel free to submit vulnerability reports to our private bug bounty program at https://hackerone.com/hugging_face. You'll need to request access to the program by emailing security@huggingface.co.
-Note that you'll need to be invited to our program, so send us a quick email at security@huggingface.co if you've found a vulnerability.
+Feel free to submit vulnerability reports to [security@huggingface.co](mailto:security@huggingface.co), where someone from the HF security team will review and recommend next steps. If reporting a vulnerability specific to open source, please note [Huntr](https://huntr.com) is a vulnerability disclosure program for open source software.
diff --git a/benchmark/grafana_dashboard.json b/benchmark/grafana_dashboard.json
new file mode 100644
index 000000000000..3d579f7b3687
--- /dev/null
+++ b/benchmark/grafana_dashboard.json
@@ -0,0 +1,2364 @@
+{
+ "annotations": {
+ "list": [
+ {
+ "builtIn": 1,
+ "datasource": {
+ "type": "grafana",
+ "uid": "-- Grafana --"
+ },
+ "enable": true,
+ "hide": true,
+ "iconColor": "rgba(0, 211, 255, 1)",
+ "name": "Annotations & Alerts",
+ "type": "dashboard"
+ }
+ ]
+ },
+ "editable": true,
+ "fiscalYearStartMonth": 0,
+ "graphTooltip": 0,
+ "id": 1,
+ "links": [
+ {
+ "asDropdown": false,
+ "icon": "external link",
+ "includeVars": false,
+ "keepTime": false,
+ "tags": [],
+ "targetBlank": false,
+ "title": "Go to data",
+ "tooltip": "Go to data",
+ "type": "link",
+ "url": "http://transformers-benchmarks.huggingface.co/d/fdz33iyzln9c0a/transformers-benchmarks?orgId=1&from=${StartTime}&to=${EndTime}"
+ }
+ ],
+ "liveNow": true,
+ "panels": [
+ {
+ "datasource": {
+ "default": true,
+ "type": "grafana-postgresql-datasource",
+ "uid": "be28nkzirtb0gd"
+ },
+ "fieldConfig": {
+ "defaults": {
+ "color": {
+ "mode": "thresholds"
+ },
+ "custom": {
+ "align": "left",
+ "cellOptions": {
+ "type": "auto"
+ },
+ "inspect": false
+ },
+ "mappings": [],
+ "thresholds": {
+ "mode": "absolute",
+ "steps": [
+ {
+ "color": "green",
+ "value": null
+ },
+ {
+ "color": "red",
+ "value": 80
+ }
+ ]
+ }
+ },
+ "overrides": [
+ {
+ "matcher": {
+ "id": "byName",
+ "options": "gpu_name"
+ },
+ "properties": [
+ {
+ "id": "custom.width",
+ "value": 196
+ }
+ ]
+ },
+ {
+ "matcher": {
+ "id": "byName",
+ "options": "left"
+ },
+ "properties": [
+ {
+ "id": "custom.width",
+ "value": 407
+ }
+ ]
+ },
+ {
+ "matcher": {
+ "id": "byName",
+ "options": "commit_message"
+ },
+ "properties": [
+ {
+ "id": "custom.width",
+ "value": 581
+ }
+ ]
+ },
+ {
+ "matcher": {
+ "id": "byName",
+ "options": "commit_id"
+ },
+ "properties": [
+ {
+ "id": "custom.width",
+ "value": 379
+ }
+ ]
+ }
+ ]
+ },
+ "gridPos": {
+ "h": 6,
+ "w": 24,
+ "x": 0,
+ "y": 0
+ },
+ "id": 5,
+ "options": {
+ "cellHeight": "sm",
+ "footer": {
+ "countRows": false,
+ "fields": "",
+ "reducer": [
+ "sum"
+ ],
+ "show": false
+ },
+ "showHeader": true,
+ "sortBy": []
+ },
+ "pluginVersion": "11.2.2",
+ "targets": [
+ {
+ "datasource": {
+ "type": "grafana-postgresql-datasource"
+ },
+ "editorMode": "code",
+ "format": "table",
+ "rawQuery": true,
+ "rawSql": "SELECT commit_id as commit_id, commit_message, gpu_name, created_at AS date FROM benchmarks WHERE branch = '${branch}' ORDER BY benchmark_id DESC LIMIT ${last_n_commits};",
+ "refId": "A",
+ "sql": {
+ "columns": [
+ {
+ "parameters": [
+ {
+ "name": "commit_id",
+ "type": "functionParameter"
+ }
+ ],
+ "type": "function"
+ },
+ {
+ "parameters": [
+ {
+ "name": "gpu_name",
+ "type": "functionParameter"
+ }
+ ],
+ "type": "function"
+ }
+ ],
+ "groupBy": [
+ {
+ "property": {
+ "type": "string"
+ },
+ "type": "groupBy"
+ }
+ ],
+ "limit": 50,
+ "whereJsonTree": {
+ "children1": [
+ {
+ "id": "baaa8aaa-89ab-4cde-b012-31922f96de3f",
+ "properties": {
+ "field": "commit_id",
+ "fieldSrc": "field",
+ "operator": "equal",
+ "value": [
+ "${commit}"
+ ],
+ "valueError": [
+ null
+ ],
+ "valueSrc": [
+ "value"
+ ],
+ "valueType": [
+ "text"
+ ]
+ },
+ "type": "rule"
+ }
+ ],
+ "id": "bab88a98-0123-4456-b89a-b1922f7d4f11",
+ "type": "group"
+ },
+ "whereString": "commit_id = '${commit}'"
+ },
+ "table": "benchmarks"
+ }
+ ],
+ "transparent": true,
+ "type": "table"
+ },
+ {
+ "collapsed": false,
+ "gridPos": {
+ "h": 1,
+ "w": 24,
+ "x": 0,
+ "y": 6
+ },
+ "id": 13,
+ "panels": [],
+ "title": "Eager Forward Pass",
+ "type": "row"
+ },
+ {
+ "datasource": {
+ "default": true,
+ "type": "grafana-postgresql-datasource",
+ "uid": "be28nkzirtb0gd"
+ },
+ "fieldConfig": {
+ "defaults": {
+ "color": {
+ "mode": "continuous-YlBl"
+ },
+ "custom": {
+ "axisBorderShow": false,
+ "axisCenteredZero": false,
+ "axisColorMode": "text",
+ "axisLabel": "",
+ "axisPlacement": "auto",
+ "fillOpacity": 80,
+ "gradientMode": "scheme",
+ "hideFrom": {
+ "legend": false,
+ "tooltip": false,
+ "viz": false
+ },
+ "lineWidth": 0,
+ "scaleDistribution": {
+ "type": "linear"
+ },
+ "thresholdsStyle": {
+ "mode": "off"
+ }
+ },
+ "mappings": [],
+ "thresholds": {
+ "mode": "absolute",
+ "steps": [
+ {
+ "color": "green",
+ "value": null
+ }
+ ]
+ },
+ "unit": "s"
+ },
+ "overrides": []
+ },
+ "gridPos": {
+ "h": 11,
+ "w": 12,
+ "x": 0,
+ "y": 7
+ },
+ "id": 7,
+ "options": {
+ "barRadius": 0.05,
+ "barWidth": 0.8,
+ "fullHighlight": false,
+ "groupWidth": 0.7,
+ "legend": {
+ "calcs": [],
+ "displayMode": "list",
+ "placement": "bottom",
+ "showLegend": false
+ },
+ "orientation": "auto",
+ "showValue": "auto",
+ "stacking": "none",
+ "tooltip": {
+ "mode": "single",
+ "sort": "none"
+ },
+ "xTickLabelRotation": 0,
+ "xTickLabelSpacing": 0
+ },
+ "pluginVersion": "11.2.2",
+ "targets": [
+ {
+ "datasource": {
+ "type": "grafana-postgresql-datasource",
+ "uid": "bdz2yss7sxo1sc"
+ },
+ "editorMode": "code",
+ "format": "table",
+ "rawQuery": true,
+ "rawSql": "SELECT CAST(m.measurements->'first_eager_forward_pass_time_secs' AS double precision) AS first_eager_forward_pass_time_secs, left(b.commit_id, 7), m.time FROM benchmarks as b JOIN model_measurements AS m ON b.benchmark_id = m.benchmark_id WHERE b.branch = '${branch}' AND gpu_name = '${gpu_name}' ORDER BY b.benchmark_id DESC LIMIT ${last_n_commits};",
+ "refId": "A",
+ "sql": {
+ "columns": [
+ {
+ "parameters": [],
+ "type": "function"
+ }
+ ],
+ "groupBy": [
+ {
+ "property": {
+ "type": "string"
+ },
+ "type": "groupBy"
+ }
+ ],
+ "limit": 50
+ }
+ }
+ ],
+ "title": "First eager forward pass",
+ "transformations": [
+ {
+ "id": "sortBy",
+ "options": {
+ "fields": {},
+ "sort": [
+ {
+ "field": "time"
+ }
+ ]
+ }
+ }
+ ],
+ "transparent": true,
+ "type": "barchart"
+ },
+ {
+ "datasource": {
+ "default": true,
+ "type": "grafana-postgresql-datasource",
+ "uid": "be28nkzirtb0gd"
+ },
+ "fieldConfig": {
+ "defaults": {
+ "color": {
+ "mode": "continuous-YlBl"
+ },
+ "custom": {
+ "axisBorderShow": false,
+ "axisCenteredZero": false,
+ "axisColorMode": "text",
+ "axisLabel": "",
+ "axisPlacement": "auto",
+ "fillOpacity": 80,
+ "gradientMode": "scheme",
+ "hideFrom": {
+ "legend": false,
+ "tooltip": false,
+ "viz": false
+ },
+ "lineWidth": 0,
+ "scaleDistribution": {
+ "type": "linear"
+ },
+ "thresholdsStyle": {
+ "mode": "off"
+ }
+ },
+ "mappings": [],
+ "thresholds": {
+ "mode": "absolute",
+ "steps": [
+ {
+ "color": "green",
+ "value": null
+ },
+ {
+ "color": "red",
+ "value": 80
+ }
+ ]
+ },
+ "unit": "s"
+ },
+ "overrides": []
+ },
+ "gridPos": {
+ "h": 11,
+ "w": 12,
+ "x": 12,
+ "y": 7
+ },
+ "id": 9,
+ "options": {
+ "barRadius": 0.05,
+ "barWidth": 0.8,
+ "fullHighlight": false,
+ "groupWidth": 0.7,
+ "legend": {
+ "calcs": [],
+ "displayMode": "list",
+ "placement": "bottom",
+ "showLegend": false
+ },
+ "orientation": "auto",
+ "showValue": "auto",
+ "stacking": "none",
+ "tooltip": {
+ "mode": "single",
+ "sort": "none"
+ },
+ "xTickLabelRotation": 0,
+ "xTickLabelSpacing": 0
+ },
+ "targets": [
+ {
+ "datasource": {
+ "type": "grafana-postgresql-datasource",
+ "uid": "bdz2yss7sxo1sc"
+ },
+ "editorMode": "code",
+ "format": "table",
+ "rawQuery": true,
+ "rawSql": "SELECT CAST(m.measurements->'second_eager_forward_pass_time_secs' AS double precision) AS second_eager_forward_pass_time_secs, left(b.commit_id, 7), m.time FROM benchmarks as b JOIN model_measurements AS m ON b.benchmark_id = m.benchmark_id WHERE b.branch = '${branch}' AND gpu_name = '${gpu_name}' ORDER BY b.benchmark_id DESC LIMIT ${last_n_commits};",
+ "refId": "A",
+ "sql": {
+ "columns": [
+ {
+ "parameters": [],
+ "type": "function"
+ }
+ ],
+ "groupBy": [
+ {
+ "property": {
+ "type": "string"
+ },
+ "type": "groupBy"
+ }
+ ],
+ "limit": 50
+ }
+ }
+ ],
+ "title": "Second eager forward pass",
+ "transformations": [
+ {
+ "id": "sortBy",
+ "options": {
+ "fields": {},
+ "sort": [
+ {
+ "field": "time"
+ }
+ ]
+ }
+ }
+ ],
+ "transparent": true,
+ "type": "barchart"
+ },
+ {
+ "collapsed": false,
+ "gridPos": {
+ "h": 1,
+ "w": 24,
+ "x": 0,
+ "y": 18
+ },
+ "id": 16,
+ "panels": [],
+ "title": "Time to next token",
+ "type": "row"
+ },
+ {
+ "datasource": {
+ "default": true,
+ "type": "grafana-postgresql-datasource",
+ "uid": "be28nkzirtb0gd"
+ },
+ "fieldConfig": {
+ "defaults": {
+ "color": {
+ "mode": "continuous-YlBl"
+ },
+ "custom": {
+ "axisBorderShow": false,
+ "axisCenteredZero": false,
+ "axisColorMode": "text",
+ "axisLabel": "",
+ "axisPlacement": "auto",
+ "fillOpacity": 80,
+ "gradientMode": "scheme",
+ "hideFrom": {
+ "legend": false,
+ "tooltip": false,
+ "viz": false
+ },
+ "lineWidth": 0,
+ "scaleDistribution": {
+ "type": "linear"
+ },
+ "thresholdsStyle": {
+ "mode": "off"
+ }
+ },
+ "mappings": [],
+ "thresholds": {
+ "mode": "absolute",
+ "steps": [
+ {
+ "color": "green",
+ "value": null
+ }
+ ]
+ },
+ "unit": "s"
+ },
+ "overrides": []
+ },
+ "gridPos": {
+ "h": 11,
+ "w": 12,
+ "x": 0,
+ "y": 19
+ },
+ "id": 17,
+ "options": {
+ "barRadius": 0.05,
+ "barWidth": 0.8,
+ "fullHighlight": false,
+ "groupWidth": 0.7,
+ "legend": {
+ "calcs": [],
+ "displayMode": "list",
+ "placement": "bottom",
+ "showLegend": false
+ },
+ "orientation": "auto",
+ "showValue": "always",
+ "stacking": "none",
+ "tooltip": {
+ "mode": "single",
+ "sort": "none"
+ },
+ "xTickLabelRotation": 0,
+ "xTickLabelSpacing": 0
+ },
+ "targets": [
+ {
+ "datasource": {
+ "type": "grafana-postgresql-datasource",
+ "uid": "bdz2yss7sxo1sc"
+ },
+ "editorMode": "code",
+ "format": "table",
+ "rawQuery": true,
+ "rawSql": "SELECT CAST(m.measurements->'time_to_first_token_secs' AS double precision) AS time_to_first_token_secs, left(b.commit_id, 7), m.time FROM benchmarks as b JOIN model_measurements AS m ON b.benchmark_id = m.benchmark_id WHERE b.branch = '${branch}' AND gpu_name = '${gpu_name}' ORDER BY b.benchmark_id DESC LIMIT ${last_n_commits};",
+ "refId": "A",
+ "sql": {
+ "columns": [
+ {
+ "parameters": [],
+ "type": "function"
+ }
+ ],
+ "groupBy": [
+ {
+ "property": {
+ "type": "string"
+ },
+ "type": "groupBy"
+ }
+ ],
+ "limit": 50
+ }
+ }
+ ],
+ "title": "Time to first token",
+ "transformations": [
+ {
+ "id": "sortBy",
+ "options": {
+ "fields": {},
+ "sort": [
+ {
+ "field": "time"
+ }
+ ]
+ }
+ }
+ ],
+ "transparent": true,
+ "type": "barchart"
+ },
+ {
+ "datasource": {
+ "default": true,
+ "type": "grafana-postgresql-datasource",
+ "uid": "be28nkzirtb0gd"
+ },
+ "fieldConfig": {
+ "defaults": {
+ "color": {
+ "mode": "continuous-YlBl"
+ },
+ "custom": {
+ "axisBorderShow": false,
+ "axisCenteredZero": false,
+ "axisColorMode": "text",
+ "axisLabel": "",
+ "axisPlacement": "auto",
+ "fillOpacity": 80,
+ "gradientMode": "scheme",
+ "hideFrom": {
+ "legend": false,
+ "tooltip": false,
+ "viz": false
+ },
+ "lineWidth": 0,
+ "scaleDistribution": {
+ "type": "linear"
+ },
+ "thresholdsStyle": {
+ "mode": "off"
+ }
+ },
+ "mappings": [],
+ "thresholds": {
+ "mode": "absolute",
+ "steps": [
+ {
+ "color": "green",
+ "value": null
+ }
+ ]
+ },
+ "unit": "s"
+ },
+ "overrides": []
+ },
+ "gridPos": {
+ "h": 11,
+ "w": 12,
+ "x": 12,
+ "y": 19
+ },
+ "id": 18,
+ "options": {
+ "barRadius": 0.05,
+ "barWidth": 0.8,
+ "fullHighlight": false,
+ "groupWidth": 0.7,
+ "legend": {
+ "calcs": [],
+ "displayMode": "list",
+ "placement": "bottom",
+ "showLegend": false
+ },
+ "orientation": "auto",
+ "showValue": "always",
+ "stacking": "none",
+ "tooltip": {
+ "mode": "single",
+ "sort": "none"
+ },
+ "xTickLabelRotation": 0,
+ "xTickLabelSpacing": 0
+ },
+ "targets": [
+ {
+ "datasource": {
+ "type": "grafana-postgresql-datasource",
+ "uid": "bdz2yss7sxo1sc"
+ },
+ "editorMode": "code",
+ "format": "table",
+ "rawQuery": true,
+ "rawSql": "SELECT CAST(m.measurements->'time_to_second_token_secs' AS double precision) AS time_to_second_token_secs, left(b.commit_id, 7), m.time FROM benchmarks as b JOIN model_measurements AS m ON b.benchmark_id = m.benchmark_id WHERE b.branch = '${branch}' AND gpu_name = '${gpu_name}' ORDER BY b.benchmark_id DESC LIMIT ${last_n_commits};",
+ "refId": "A",
+ "sql": {
+ "columns": [
+ {
+ "parameters": [],
+ "type": "function"
+ }
+ ],
+ "groupBy": [
+ {
+ "property": {
+ "type": "string"
+ },
+ "type": "groupBy"
+ }
+ ],
+ "limit": 50
+ }
+ }
+ ],
+ "title": "Time to second token",
+ "transformations": [
+ {
+ "id": "sortBy",
+ "options": {
+ "fields": {},
+ "sort": [
+ {
+ "field": "time"
+ }
+ ]
+ }
+ }
+ ],
+ "transparent": true,
+ "type": "barchart"
+ },
+ {
+ "datasource": {
+ "default": true,
+ "type": "grafana-postgresql-datasource",
+ "uid": "be28nkzirtb0gd"
+ },
+ "fieldConfig": {
+ "defaults": {
+ "color": {
+ "mode": "continuous-YlBl"
+ },
+ "custom": {
+ "axisBorderShow": false,
+ "axisCenteredZero": false,
+ "axisColorMode": "text",
+ "axisLabel": "",
+ "axisPlacement": "auto",
+ "fillOpacity": 80,
+ "gradientMode": "scheme",
+ "hideFrom": {
+ "legend": false,
+ "tooltip": false,
+ "viz": false
+ },
+ "lineWidth": 0,
+ "scaleDistribution": {
+ "type": "linear"
+ },
+ "thresholdsStyle": {
+ "mode": "off"
+ }
+ },
+ "mappings": [],
+ "thresholds": {
+ "mode": "absolute",
+ "steps": [
+ {
+ "color": "green",
+ "value": null
+ }
+ ]
+ },
+ "unit": "s"
+ },
+ "overrides": []
+ },
+ "gridPos": {
+ "h": 11,
+ "w": 12,
+ "x": 0,
+ "y": 30
+ },
+ "id": 19,
+ "options": {
+ "barRadius": 0.05,
+ "barWidth": 0.8,
+ "fullHighlight": false,
+ "groupWidth": 0.7,
+ "legend": {
+ "calcs": [],
+ "displayMode": "list",
+ "placement": "bottom",
+ "showLegend": false
+ },
+ "orientation": "auto",
+ "showValue": "always",
+ "stacking": "none",
+ "tooltip": {
+ "mode": "single",
+ "sort": "none"
+ },
+ "xTickLabelRotation": 0,
+ "xTickLabelSpacing": 0
+ },
+ "targets": [
+ {
+ "datasource": {
+ "type": "grafana-postgresql-datasource",
+ "uid": "bdz2yss7sxo1sc"
+ },
+ "editorMode": "code",
+ "format": "table",
+ "rawQuery": true,
+ "rawSql": "SELECT CAST(m.measurements->'time_to_third_token_secs' AS double precision) AS time_to_third_token_secs, left(b.commit_id, 7), m.time FROM benchmarks as b JOIN model_measurements AS m ON b.benchmark_id = m.benchmark_id WHERE b.branch = '${branch}' AND gpu_name = '${gpu_name}' ORDER BY b.benchmark_id DESC LIMIT ${last_n_commits};",
+ "refId": "A",
+ "sql": {
+ "columns": [
+ {
+ "parameters": [],
+ "type": "function"
+ }
+ ],
+ "groupBy": [
+ {
+ "property": {
+ "type": "string"
+ },
+ "type": "groupBy"
+ }
+ ],
+ "limit": 50
+ }
+ }
+ ],
+ "title": "Time to third token",
+ "transformations": [
+ {
+ "id": "sortBy",
+ "options": {
+ "fields": {},
+ "sort": [
+ {
+ "field": "time"
+ }
+ ]
+ }
+ }
+ ],
+ "transparent": true,
+ "type": "barchart"
+ },
+ {
+ "datasource": {
+ "default": true,
+ "type": "grafana-postgresql-datasource",
+ "uid": "be28nkzirtb0gd"
+ },
+ "fieldConfig": {
+ "defaults": {
+ "color": {
+ "mode": "continuous-YlBl"
+ },
+ "custom": {
+ "axisBorderShow": false,
+ "axisCenteredZero": false,
+ "axisColorMode": "text",
+ "axisLabel": "",
+ "axisPlacement": "auto",
+ "fillOpacity": 80,
+ "gradientMode": "scheme",
+ "hideFrom": {
+ "legend": false,
+ "tooltip": false,
+ "viz": false
+ },
+ "lineWidth": 0,
+ "scaleDistribution": {
+ "type": "linear"
+ },
+ "thresholdsStyle": {
+ "mode": "off"
+ }
+ },
+ "mappings": [],
+ "thresholds": {
+ "mode": "absolute",
+ "steps": [
+ {
+ "color": "green",
+ "value": null
+ }
+ ]
+ },
+ "unit": "s"
+ },
+ "overrides": []
+ },
+ "gridPos": {
+ "h": 11,
+ "w": 12,
+ "x": 12,
+ "y": 30
+ },
+ "id": 20,
+ "options": {
+ "barRadius": 0.05,
+ "barWidth": 0.8,
+ "fullHighlight": false,
+ "groupWidth": 0.7,
+ "legend": {
+ "calcs": [],
+ "displayMode": "list",
+ "placement": "bottom",
+ "showLegend": false
+ },
+ "orientation": "auto",
+ "showValue": "always",
+ "stacking": "none",
+ "tooltip": {
+ "mode": "single",
+ "sort": "none"
+ },
+ "xTickLabelRotation": 0,
+ "xTickLabelSpacing": 0
+ },
+ "targets": [
+ {
+ "datasource": {
+ "type": "grafana-postgresql-datasource",
+ "uid": "bdz2yss7sxo1sc"
+ },
+ "editorMode": "code",
+ "format": "table",
+ "rawQuery": true,
+ "rawSql": "SELECT CAST(m.measurements->'time_to_next_token_mean_secs' AS double precision) AS time_to_next_token_mean_secs, left(b.commit_id, 7), m.time FROM benchmarks as b JOIN model_measurements AS m ON b.benchmark_id = m.benchmark_id WHERE b.branch = '${branch}' AND gpu_name = '${gpu_name}' ORDER BY b.benchmark_id DESC LIMIT ${last_n_commits};",
+ "refId": "A",
+ "sql": {
+ "columns": [
+ {
+ "parameters": [],
+ "type": "function"
+ }
+ ],
+ "groupBy": [
+ {
+ "property": {
+ "type": "string"
+ },
+ "type": "groupBy"
+ }
+ ],
+ "limit": 50
+ }
+ }
+ ],
+ "title": "Time to subsequent next tokens mean",
+ "transformations": [
+ {
+ "id": "sortBy",
+ "options": {
+ "fields": {},
+ "sort": [
+ {
+ "field": "time"
+ }
+ ]
+ }
+ }
+ ],
+ "transparent": true,
+ "type": "barchart"
+ },
+ {
+ "collapsed": false,
+ "gridPos": {
+ "h": 1,
+ "w": 24,
+ "x": 0,
+ "y": 41
+ },
+ "id": 14,
+ "panels": [],
+ "title": "Compiled Generate",
+ "type": "row"
+ },
+ {
+ "datasource": {
+ "default": true,
+ "type": "grafana-postgresql-datasource",
+ "uid": "be28nkzirtb0gd"
+ },
+ "fieldConfig": {
+ "defaults": {
+ "color": {
+ "mode": "continuous-YlBl"
+ },
+ "custom": {
+ "axisBorderShow": false,
+ "axisCenteredZero": false,
+ "axisColorMode": "text",
+ "axisLabel": "",
+ "axisPlacement": "auto",
+ "fillOpacity": 80,
+ "gradientMode": "scheme",
+ "hideFrom": {
+ "legend": false,
+ "tooltip": false,
+ "viz": false
+ },
+ "lineWidth": 0,
+ "scaleDistribution": {
+ "type": "linear"
+ },
+ "thresholdsStyle": {
+ "mode": "off"
+ }
+ },
+ "mappings": [],
+ "thresholds": {
+ "mode": "absolute",
+ "steps": [
+ {
+ "color": "green",
+ "value": null
+ }
+ ]
+ },
+ "unit": "s"
+ },
+ "overrides": []
+ },
+ "gridPos": {
+ "h": 11,
+ "w": 12,
+ "x": 0,
+ "y": 42
+ },
+ "id": 8,
+ "options": {
+ "barRadius": 0.05,
+ "barWidth": 0.8,
+ "fullHighlight": false,
+ "groupWidth": 0.7,
+ "legend": {
+ "calcs": [],
+ "displayMode": "list",
+ "placement": "bottom",
+ "showLegend": false
+ },
+ "orientation": "auto",
+ "showValue": "always",
+ "stacking": "none",
+ "tooltip": {
+ "mode": "single",
+ "sort": "none"
+ },
+ "xTickLabelRotation": 0,
+ "xTickLabelSpacing": 0
+ },
+ "targets": [
+ {
+ "datasource": {
+ "type": "grafana-postgresql-datasource",
+ "uid": "bdz2yss7sxo1sc"
+ },
+ "editorMode": "code",
+ "format": "table",
+ "rawQuery": true,
+ "rawSql": "SELECT CAST(m.measurements->'first_compile_generate_time_secs' AS double precision) AS first_compile_generate_time_secs, left(b.commit_id, 7), m.time FROM benchmarks as b JOIN model_measurements AS m ON b.benchmark_id = m.benchmark_id WHERE b.branch = '${branch}' AND gpu_name = '${gpu_name}' ORDER BY b.benchmark_id DESC LIMIT ${last_n_commits};",
+ "refId": "A",
+ "sql": {
+ "columns": [
+ {
+ "parameters": [],
+ "type": "function"
+ }
+ ],
+ "groupBy": [
+ {
+ "property": {
+ "type": "string"
+ },
+ "type": "groupBy"
+ }
+ ],
+ "limit": 50
+ }
+ }
+ ],
+ "title": "First compile generate",
+ "transformations": [
+ {
+ "id": "sortBy",
+ "options": {
+ "fields": {},
+ "sort": [
+ {
+ "field": "time"
+ }
+ ]
+ }
+ }
+ ],
+ "transparent": true,
+ "type": "barchart"
+ },
+ {
+ "datasource": {
+ "default": true,
+ "type": "grafana-postgresql-datasource",
+ "uid": "be28nkzirtb0gd"
+ },
+ "fieldConfig": {
+ "defaults": {
+ "color": {
+ "mode": "continuous-YlBl"
+ },
+ "custom": {
+ "axisBorderShow": false,
+ "axisCenteredZero": false,
+ "axisColorMode": "text",
+ "axisLabel": "",
+ "axisPlacement": "auto",
+ "fillOpacity": 80,
+ "gradientMode": "scheme",
+ "hideFrom": {
+ "legend": false,
+ "tooltip": false,
+ "viz": false
+ },
+ "lineWidth": 0,
+ "scaleDistribution": {
+ "type": "linear"
+ },
+ "thresholdsStyle": {
+ "mode": "off"
+ }
+ },
+ "mappings": [],
+ "thresholds": {
+ "mode": "absolute",
+ "steps": [
+ {
+ "color": "green",
+ "value": null
+ }
+ ]
+ },
+ "unit": "s"
+ },
+ "overrides": []
+ },
+ "gridPos": {
+ "h": 11,
+ "w": 12,
+ "x": 12,
+ "y": 42
+ },
+ "id": 10,
+ "options": {
+ "barRadius": 0.05,
+ "barWidth": 0.8,
+ "fullHighlight": false,
+ "groupWidth": 0.7,
+ "legend": {
+ "calcs": [],
+ "displayMode": "list",
+ "placement": "bottom",
+ "showLegend": false
+ },
+ "orientation": "auto",
+ "showValue": "auto",
+ "stacking": "none",
+ "tooltip": {
+ "mode": "single",
+ "sort": "none"
+ },
+ "xTickLabelRotation": 0,
+ "xTickLabelSpacing": 0
+ },
+ "targets": [
+ {
+ "datasource": {
+ "type": "grafana-postgresql-datasource",
+ "uid": "bdz2yss7sxo1sc"
+ },
+ "editorMode": "code",
+ "format": "table",
+ "rawQuery": true,
+ "rawSql": "SELECT CAST(m.measurements->'second_compile_generate_time_secs' AS double precision) AS second_compile_generate_time_secs, left(b.commit_id, 7), m.time FROM benchmarks as b JOIN model_measurements AS m ON b.benchmark_id = m.benchmark_id WHERE b.branch = '${branch}' AND gpu_name = '${gpu_name}' ORDER BY b.benchmark_id DESC LIMIT ${last_n_commits};",
+ "refId": "A",
+ "sql": {
+ "columns": [
+ {
+ "parameters": [],
+ "type": "function"
+ }
+ ],
+ "groupBy": [
+ {
+ "property": {
+ "type": "string"
+ },
+ "type": "groupBy"
+ }
+ ],
+ "limit": 50
+ }
+ }
+ ],
+ "title": "Second compile generate",
+ "transformations": [
+ {
+ "id": "sortBy",
+ "options": {
+ "fields": {},
+ "sort": [
+ {
+ "field": "time"
+ }
+ ]
+ }
+ }
+ ],
+ "transparent": true,
+ "type": "barchart"
+ },
+ {
+ "datasource": {
+ "default": true,
+ "type": "grafana-postgresql-datasource",
+ "uid": "be28nkzirtb0gd"
+ },
+ "fieldConfig": {
+ "defaults": {
+ "color": {
+ "mode": "continuous-YlBl"
+ },
+ "custom": {
+ "axisBorderShow": false,
+ "axisCenteredZero": false,
+ "axisColorMode": "text",
+ "axisLabel": "",
+ "axisPlacement": "auto",
+ "fillOpacity": 80,
+ "gradientMode": "scheme",
+ "hideFrom": {
+ "legend": false,
+ "tooltip": false,
+ "viz": false
+ },
+ "lineWidth": 0,
+ "scaleDistribution": {
+ "type": "linear"
+ },
+ "thresholdsStyle": {
+ "mode": "off"
+ }
+ },
+ "mappings": [],
+ "thresholds": {
+ "mode": "absolute",
+ "steps": [
+ {
+ "color": "green",
+ "value": null
+ }
+ ]
+ },
+ "unit": "s"
+ },
+ "overrides": []
+ },
+ "gridPos": {
+ "h": 11,
+ "w": 12,
+ "x": 0,
+ "y": 53
+ },
+ "id": 11,
+ "options": {
+ "barRadius": 0.05,
+ "barWidth": 0.8,
+ "fullHighlight": false,
+ "groupWidth": 0.7,
+ "legend": {
+ "calcs": [],
+ "displayMode": "list",
+ "placement": "bottom",
+ "showLegend": false
+ },
+ "orientation": "auto",
+ "showValue": "auto",
+ "stacking": "none",
+ "tooltip": {
+ "mode": "single",
+ "sort": "none"
+ },
+ "xTickLabelRotation": 0,
+ "xTickLabelSpacing": 0
+ },
+ "targets": [
+ {
+ "datasource": {
+ "type": "grafana-postgresql-datasource",
+ "uid": "bdz2yss7sxo1sc"
+ },
+ "editorMode": "code",
+ "format": "table",
+ "rawQuery": true,
+ "rawSql": "SELECT CAST(m.measurements->'third_compile_generate_time_secs' AS double precision) AS third_compile_generate_time_secs, left(b.commit_id, 7), m.time FROM benchmarks as b JOIN model_measurements AS m ON b.benchmark_id = m.benchmark_id WHERE b.branch = '${branch}' AND gpu_name = '${gpu_name}' ORDER BY b.benchmark_id DESC LIMIT ${last_n_commits};",
+ "refId": "A",
+ "sql": {
+ "columns": [
+ {
+ "parameters": [],
+ "type": "function"
+ }
+ ],
+ "groupBy": [
+ {
+ "property": {
+ "type": "string"
+ },
+ "type": "groupBy"
+ }
+ ],
+ "limit": 50
+ }
+ }
+ ],
+ "title": "Third compile generate",
+ "transformations": [
+ {
+ "id": "sortBy",
+ "options": {
+ "fields": {},
+ "sort": [
+ {
+ "field": "time"
+ }
+ ]
+ }
+ }
+ ],
+ "transparent": true,
+ "type": "barchart"
+ },
+ {
+ "datasource": {
+ "default": true,
+ "type": "grafana-postgresql-datasource",
+ "uid": "be28nkzirtb0gd"
+ },
+ "fieldConfig": {
+ "defaults": {
+ "color": {
+ "mode": "continuous-YlBl"
+ },
+ "custom": {
+ "axisBorderShow": false,
+ "axisCenteredZero": false,
+ "axisColorMode": "text",
+ "axisLabel": "",
+ "axisPlacement": "auto",
+ "fillOpacity": 80,
+ "gradientMode": "none",
+ "hideFrom": {
+ "legend": false,
+ "tooltip": false,
+ "viz": false
+ },
+ "lineWidth": 0,
+ "scaleDistribution": {
+ "type": "linear"
+ },
+ "thresholdsStyle": {
+ "mode": "off"
+ }
+ },
+ "mappings": [],
+ "thresholds": {
+ "mode": "absolute",
+ "steps": [
+ {
+ "color": "green",
+ "value": null
+ }
+ ]
+ },
+ "unit": "s"
+ },
+ "overrides": []
+ },
+ "gridPos": {
+ "h": 11,
+ "w": 12,
+ "x": 12,
+ "y": 53
+ },
+ "id": 12,
+ "options": {
+ "barRadius": 0.05,
+ "barWidth": 0.8,
+ "fullHighlight": false,
+ "groupWidth": 0.7,
+ "legend": {
+ "calcs": [],
+ "displayMode": "list",
+ "placement": "bottom",
+ "showLegend": false
+ },
+ "orientation": "auto",
+ "showValue": "auto",
+ "stacking": "none",
+ "tooltip": {
+ "mode": "single",
+ "sort": "none"
+ },
+ "xTickLabelRotation": 0,
+ "xTickLabelSpacing": 0
+ },
+ "targets": [
+ {
+ "datasource": {
+ "type": "grafana-postgresql-datasource",
+ "uid": "bdz2yss7sxo1sc"
+ },
+ "editorMode": "code",
+ "format": "table",
+ "rawQuery": true,
+ "rawSql": "SELECT CAST(m.measurements->'fourth_compile_generate_time_secs' AS double precision) AS fourth_compile_generate_time_secs, left(b.commit_id, 7), m.time FROM benchmarks as b JOIN model_measurements AS m ON b.benchmark_id = m.benchmark_id WHERE b.branch = '${branch}' AND gpu_name = '${gpu_name}' ORDER BY b.benchmark_id DESC LIMIT ${last_n_commits};",
+ "refId": "A",
+ "sql": {
+ "columns": [
+ {
+ "parameters": [],
+ "type": "function"
+ }
+ ],
+ "groupBy": [
+ {
+ "property": {
+ "type": "string"
+ },
+ "type": "groupBy"
+ }
+ ],
+ "limit": 50
+ }
+ }
+ ],
+ "title": "Fourth compile generate",
+ "transformations": [
+ {
+ "id": "sortBy",
+ "options": {
+ "fields": {},
+ "sort": [
+ {
+ "field": "time"
+ }
+ ]
+ }
+ }
+ ],
+ "transparent": true,
+ "type": "barchart"
+ },
+ {
+ "collapsed": true,
+ "gridPos": {
+ "h": 1,
+ "w": 24,
+ "x": 0,
+ "y": 64
+ },
+ "id": 15,
+ "panels": [
+ {
+ "datasource": {
+ "default": true,
+ "type": "grafana-postgresql-datasource",
+ "uid": "be28nkzirtb0gd"
+ },
+ "fieldConfig": {
+ "defaults": {
+ "color": {
+ "mode": "palette-classic"
+ },
+ "custom": {
+ "axisBorderShow": false,
+ "axisCenteredZero": false,
+ "axisColorMode": "text",
+ "axisLabel": "",
+ "axisPlacement": "auto",
+ "barAlignment": 0,
+ "barWidthFactor": 0.6,
+ "drawStyle": "line",
+ "fillOpacity": 0,
+ "gradientMode": "none",
+ "hideFrom": {
+ "legend": false,
+ "tooltip": false,
+ "viz": false
+ },
+ "insertNulls": 60000,
+ "lineInterpolation": "linear",
+ "lineWidth": 1,
+ "pointSize": 5,
+ "scaleDistribution": {
+ "type": "linear"
+ },
+ "showPoints": "auto",
+ "spanNulls": false,
+ "stacking": {
+ "group": "A",
+ "mode": "none"
+ },
+ "thresholdsStyle": {
+ "mode": "off"
+ }
+ },
+ "mappings": [],
+ "thresholds": {
+ "mode": "absolute",
+ "steps": [
+ {
+ "color": "green",
+ "value": null
+ },
+ {
+ "color": "red",
+ "value": 80
+ }
+ ]
+ },
+ "unit": "percent"
+ },
+ "overrides": []
+ },
+ "gridPos": {
+ "h": 9,
+ "w": 12,
+ "x": 0,
+ "y": 65
+ },
+ "id": 1,
+ "options": {
+ "legend": {
+ "calcs": [],
+ "displayMode": "list",
+ "placement": "bottom",
+ "showLegend": true
+ },
+ "tooltip": {
+ "mode": "single",
+ "sort": "none"
+ }
+ },
+ "targets": [
+ {
+ "datasource": {
+ "type": "grafana-postgresql-datasource",
+ "uid": "bdz2yss7sxo1sc"
+ },
+ "editorMode": "code",
+ "format": "table",
+ "rawQuery": true,
+ "rawSql": "SELECT\n d.cpu_util,\n d.time\nFROM\n benchmarks AS b\n JOIN device_measurements AS d ON b.benchmark_id = d.benchmark_id\nWHERE\n branch = '${branch}';",
+ "refId": "A",
+ "sql": {
+ "columns": [
+ {
+ "parameters": [
+ {
+ "name": "cpu_util",
+ "type": "functionParameter"
+ }
+ ],
+ "type": "function"
+ },
+ {
+ "parameters": [
+ {
+ "name": "mem_megabytes",
+ "type": "functionParameter"
+ }
+ ],
+ "type": "function"
+ },
+ {
+ "parameters": [
+ {
+ "name": "gpu_util",
+ "type": "functionParameter"
+ }
+ ],
+ "type": "function"
+ },
+ {
+ "parameters": [
+ {
+ "name": "gpu_mem_megabytes",
+ "type": "functionParameter"
+ }
+ ],
+ "type": "function"
+ },
+ {
+ "parameters": [
+ {
+ "name": "\"time\"",
+ "type": "functionParameter"
+ }
+ ],
+ "type": "function"
+ }
+ ],
+ "groupBy": [
+ {
+ "property": {
+ "type": "string"
+ },
+ "type": "groupBy"
+ }
+ ],
+ "limit": 50,
+ "whereJsonTree": {
+ "children1": [
+ {
+ "id": "baa888b8-89ab-4cde-b012-31922f8671e9",
+ "properties": {
+ "field": "commit_id",
+ "fieldSrc": "field",
+ "operator": "equal",
+ "value": [
+ "${commit}"
+ ],
+ "valueError": [
+ null
+ ],
+ "valueSrc": [
+ "value"
+ ],
+ "valueType": [
+ "text"
+ ]
+ },
+ "type": "rule"
+ }
+ ],
+ "id": "bab88a98-0123-4456-b89a-b1922f7d4f11",
+ "type": "group"
+ },
+ "whereString": "commit_id = '${commit}'"
+ },
+ "table": "measurements"
+ }
+ ],
+ "title": "CPU Utilization",
+ "transparent": true,
+ "type": "timeseries"
+ },
+ {
+ "datasource": {
+ "default": true,
+ "type": "grafana-postgresql-datasource",
+ "uid": "be28nkzirtb0gd"
+ },
+ "fieldConfig": {
+ "defaults": {
+ "color": {
+ "mode": "palette-classic"
+ },
+ "custom": {
+ "axisBorderShow": false,
+ "axisCenteredZero": false,
+ "axisColorMode": "text",
+ "axisLabel": "",
+ "axisPlacement": "auto",
+ "barAlignment": 0,
+ "barWidthFactor": 0.6,
+ "drawStyle": "line",
+ "fillOpacity": 0,
+ "gradientMode": "none",
+ "hideFrom": {
+ "legend": false,
+ "tooltip": false,
+ "viz": false
+ },
+ "insertNulls": 60000,
+ "lineInterpolation": "linear",
+ "lineWidth": 1,
+ "pointSize": 5,
+ "scaleDistribution": {
+ "type": "linear"
+ },
+ "showPoints": "auto",
+ "spanNulls": false,
+ "stacking": {
+ "group": "A",
+ "mode": "none"
+ },
+ "thresholdsStyle": {
+ "mode": "off"
+ }
+ },
+ "mappings": [],
+ "thresholds": {
+ "mode": "absolute",
+ "steps": [
+ {
+ "color": "green",
+ "value": null
+ },
+ {
+ "color": "red",
+ "value": 80
+ }
+ ]
+ },
+ "unit": "percent"
+ },
+ "overrides": []
+ },
+ "gridPos": {
+ "h": 9,
+ "w": 12,
+ "x": 12,
+ "y": 65
+ },
+ "id": 4,
+ "options": {
+ "legend": {
+ "calcs": [],
+ "displayMode": "list",
+ "placement": "bottom",
+ "showLegend": true
+ },
+ "tooltip": {
+ "mode": "single",
+ "sort": "none"
+ }
+ },
+ "targets": [
+ {
+ "datasource": {
+ "type": "grafana-postgresql-datasource",
+ "uid": "bdz2yss7sxo1sc"
+ },
+ "editorMode": "code",
+ "format": "table",
+ "rawQuery": true,
+ "rawSql": "SELECT\n b.commit_id,\n d.gpu_util,\n d.time\nFROM\n benchmarks AS b\n JOIN device_measurements AS d ON b.benchmark_id = d.benchmark_id\nWHERE\n branch = '${branch}';",
+ "refId": "A",
+ "sql": {
+ "columns": [
+ {
+ "parameters": [
+ {
+ "name": "cpu_util",
+ "type": "functionParameter"
+ }
+ ],
+ "type": "function"
+ },
+ {
+ "parameters": [
+ {
+ "name": "mem_megabytes",
+ "type": "functionParameter"
+ }
+ ],
+ "type": "function"
+ },
+ {
+ "parameters": [
+ {
+ "name": "gpu_util",
+ "type": "functionParameter"
+ }
+ ],
+ "type": "function"
+ },
+ {
+ "parameters": [
+ {
+ "name": "gpu_mem_megabytes",
+ "type": "functionParameter"
+ }
+ ],
+ "type": "function"
+ },
+ {
+ "parameters": [
+ {
+ "name": "\"time\"",
+ "type": "functionParameter"
+ }
+ ],
+ "type": "function"
+ }
+ ],
+ "groupBy": [
+ {
+ "property": {
+ "type": "string"
+ },
+ "type": "groupBy"
+ }
+ ],
+ "limit": 50,
+ "whereJsonTree": {
+ "children1": [
+ {
+ "id": "baa888b8-89ab-4cde-b012-31922f8671e9",
+ "properties": {
+ "field": "commit_id",
+ "fieldSrc": "field",
+ "operator": "equal",
+ "value": [
+ "${commit}"
+ ],
+ "valueError": [
+ null
+ ],
+ "valueSrc": [
+ "value"
+ ],
+ "valueType": [
+ "text"
+ ]
+ },
+ "type": "rule"
+ }
+ ],
+ "id": "bab88a98-0123-4456-b89a-b1922f7d4f11",
+ "type": "group"
+ },
+ "whereString": "commit_id = '${commit}'"
+ },
+ "table": "measurements"
+ }
+ ],
+ "title": "GPU Utilization",
+ "transparent": true,
+ "type": "timeseries"
+ },
+ {
+ "datasource": {
+ "default": true,
+ "type": "grafana-postgresql-datasource",
+ "uid": "be28nkzirtb0gd"
+ },
+ "fieldConfig": {
+ "defaults": {
+ "color": {
+ "mode": "palette-classic"
+ },
+ "custom": {
+ "axisBorderShow": false,
+ "axisCenteredZero": false,
+ "axisColorMode": "text",
+ "axisLabel": "",
+ "axisPlacement": "auto",
+ "barAlignment": 0,
+ "barWidthFactor": 0.6,
+ "drawStyle": "line",
+ "fillOpacity": 0,
+ "gradientMode": "none",
+ "hideFrom": {
+ "legend": false,
+ "tooltip": false,
+ "viz": false
+ },
+ "insertNulls": 60000,
+ "lineInterpolation": "linear",
+ "lineWidth": 1,
+ "pointSize": 5,
+ "scaleDistribution": {
+ "type": "linear"
+ },
+ "showPoints": "auto",
+ "spanNulls": false,
+ "stacking": {
+ "group": "A",
+ "mode": "none"
+ },
+ "thresholdsStyle": {
+ "mode": "off"
+ }
+ },
+ "mappings": [],
+ "thresholds": {
+ "mode": "absolute",
+ "steps": [
+ {
+ "color": "green",
+ "value": null
+ },
+ {
+ "color": "red",
+ "value": 80
+ }
+ ]
+ },
+ "unit": "decmbytes"
+ },
+ "overrides": []
+ },
+ "gridPos": {
+ "h": 9,
+ "w": 12,
+ "x": 0,
+ "y": 74
+ },
+ "id": 2,
+ "options": {
+ "legend": {
+ "calcs": [],
+ "displayMode": "list",
+ "placement": "bottom",
+ "showLegend": true
+ },
+ "tooltip": {
+ "mode": "single",
+ "sort": "none"
+ }
+ },
+ "targets": [
+ {
+ "datasource": {
+ "type": "grafana-postgresql-datasource",
+ "uid": "bdz2yss7sxo1sc"
+ },
+ "editorMode": "code",
+ "format": "table",
+ "rawQuery": true,
+ "rawSql": "SELECT d.mem_megabytes, d.time FROM benchmarks AS b JOIN device_measurements AS d ON b.benchmark_id = d.benchmark_id WHERE branch = '${branch}';",
+ "refId": "A",
+ "sql": {
+ "columns": [
+ {
+ "parameters": [
+ {
+ "name": "cpu_util",
+ "type": "functionParameter"
+ }
+ ],
+ "type": "function"
+ },
+ {
+ "parameters": [
+ {
+ "name": "mem_megabytes",
+ "type": "functionParameter"
+ }
+ ],
+ "type": "function"
+ },
+ {
+ "parameters": [
+ {
+ "name": "gpu_util",
+ "type": "functionParameter"
+ }
+ ],
+ "type": "function"
+ },
+ {
+ "parameters": [
+ {
+ "name": "gpu_mem_megabytes",
+ "type": "functionParameter"
+ }
+ ],
+ "type": "function"
+ },
+ {
+ "parameters": [
+ {
+ "name": "\"time\"",
+ "type": "functionParameter"
+ }
+ ],
+ "type": "function"
+ }
+ ],
+ "groupBy": [
+ {
+ "property": {
+ "type": "string"
+ },
+ "type": "groupBy"
+ }
+ ],
+ "limit": 50,
+ "whereJsonTree": {
+ "children1": [
+ {
+ "id": "baa888b8-89ab-4cde-b012-31922f8671e9",
+ "properties": {
+ "field": "commit_id",
+ "fieldSrc": "field",
+ "operator": "equal",
+ "value": [
+ "${commit}"
+ ],
+ "valueError": [
+ null
+ ],
+ "valueSrc": [
+ "value"
+ ],
+ "valueType": [
+ "text"
+ ]
+ },
+ "type": "rule"
+ }
+ ],
+ "id": "bab88a98-0123-4456-b89a-b1922f7d4f11",
+ "type": "group"
+ },
+ "whereString": "commit_id = '${commit}'"
+ },
+ "table": "measurements"
+ }
+ ],
+ "title": "Memory usage",
+ "transparent": true,
+ "type": "timeseries"
+ },
+ {
+ "datasource": {
+ "default": true,
+ "type": "grafana-postgresql-datasource",
+ "uid": "be28nkzirtb0gd"
+ },
+ "fieldConfig": {
+ "defaults": {
+ "color": {
+ "mode": "palette-classic"
+ },
+ "custom": {
+ "axisBorderShow": false,
+ "axisCenteredZero": false,
+ "axisColorMode": "text",
+ "axisLabel": "",
+ "axisPlacement": "auto",
+ "barAlignment": 0,
+ "barWidthFactor": 0.6,
+ "drawStyle": "line",
+ "fillOpacity": 0,
+ "gradientMode": "none",
+ "hideFrom": {
+ "legend": false,
+ "tooltip": false,
+ "viz": false
+ },
+ "insertNulls": 60000,
+ "lineInterpolation": "linear",
+ "lineWidth": 1,
+ "pointSize": 5,
+ "scaleDistribution": {
+ "type": "linear"
+ },
+ "showPoints": "auto",
+ "spanNulls": false,
+ "stacking": {
+ "group": "A",
+ "mode": "none"
+ },
+ "thresholdsStyle": {
+ "mode": "off"
+ }
+ },
+ "mappings": [],
+ "thresholds": {
+ "mode": "absolute",
+ "steps": [
+ {
+ "color": "green",
+ "value": null
+ },
+ {
+ "color": "red",
+ "value": 80
+ }
+ ]
+ },
+ "unit": "decmbytes"
+ },
+ "overrides": []
+ },
+ "gridPos": {
+ "h": 9,
+ "w": 12,
+ "x": 12,
+ "y": 74
+ },
+ "id": 3,
+ "options": {
+ "legend": {
+ "calcs": [],
+ "displayMode": "list",
+ "placement": "bottom",
+ "showLegend": true
+ },
+ "tooltip": {
+ "mode": "single",
+ "sort": "none"
+ }
+ },
+ "targets": [
+ {
+ "datasource": {
+ "type": "grafana-postgresql-datasource",
+ "uid": "bdz2yss7sxo1sc"
+ },
+ "editorMode": "code",
+ "format": "table",
+ "rawQuery": true,
+ "rawSql": "SELECT\n d.gpu_mem_megabytes,\n d.time\nFROM\n benchmarks AS b\n JOIN device_measurements AS d ON b.benchmark_id = d.benchmark_id\nWHERE\n branch = '${branch}';",
+ "refId": "A",
+ "sql": {
+ "columns": [
+ {
+ "parameters": [
+ {
+ "name": "cpu_util",
+ "type": "functionParameter"
+ }
+ ],
+ "type": "function"
+ },
+ {
+ "parameters": [
+ {
+ "name": "mem_megabytes",
+ "type": "functionParameter"
+ }
+ ],
+ "type": "function"
+ },
+ {
+ "parameters": [
+ {
+ "name": "gpu_util",
+ "type": "functionParameter"
+ }
+ ],
+ "type": "function"
+ },
+ {
+ "parameters": [
+ {
+ "name": "gpu_mem_megabytes",
+ "type": "functionParameter"
+ }
+ ],
+ "type": "function"
+ },
+ {
+ "parameters": [
+ {
+ "name": "\"time\"",
+ "type": "functionParameter"
+ }
+ ],
+ "type": "function"
+ }
+ ],
+ "groupBy": [
+ {
+ "property": {
+ "type": "string"
+ },
+ "type": "groupBy"
+ }
+ ],
+ "limit": 50,
+ "whereJsonTree": {
+ "children1": [
+ {
+ "id": "baa888b8-89ab-4cde-b012-31922f8671e9",
+ "properties": {
+ "field": "commit_id",
+ "fieldSrc": "field",
+ "operator": "equal",
+ "value": [
+ "${commit}"
+ ],
+ "valueError": [
+ null
+ ],
+ "valueSrc": [
+ "value"
+ ],
+ "valueType": [
+ "text"
+ ]
+ },
+ "type": "rule"
+ }
+ ],
+ "id": "bab88a98-0123-4456-b89a-b1922f7d4f11",
+ "type": "group"
+ },
+ "whereString": "commit_id = '${commit}'"
+ },
+ "table": "measurements"
+ }
+ ],
+ "title": "GPU memory usage",
+ "transparent": true,
+ "type": "timeseries"
+ }
+ ],
+ "title": "Usage metrics",
+ "type": "row"
+ }
+ ],
+ "refresh": "",
+ "schemaVersion": 39,
+ "tags": [],
+ "templating": {
+ "list": [
+ {
+ "current": {
+ "selected": false,
+ "text": "main",
+ "value": "main"
+ },
+ "datasource": {
+ "type": "grafana-postgresql-datasource",
+ "uid": "be28nkzirtb0gd"
+ },
+ "definition": "SELECT DISTINCT branch FROM benchmarks;",
+ "description": "",
+ "hide": 0,
+ "includeAll": false,
+ "label": "branch",
+ "multi": false,
+ "name": "branch",
+ "options": [],
+ "query": "SELECT DISTINCT branch FROM benchmarks;",
+ "refresh": 2,
+ "regex": "",
+ "skipUrlSync": false,
+ "sort": 0,
+ "type": "query"
+ },
+ {
+ "current": {
+ "selected": false,
+ "text": "1729701492845",
+ "value": "1729701492845"
+ },
+ "datasource": {
+ "type": "grafana-postgresql-datasource",
+ "uid": "be28nkzirtb0gd"
+ },
+ "definition": "SELECT created_at - INTERVAL '5 secs' FROM benchmarks WHERE branch = '${branch}' ORDER BY benchmark_id ASC LIMIT 1;",
+ "description": "",
+ "hide": 2,
+ "includeAll": false,
+ "multi": false,
+ "name": "StartTime",
+ "options": [],
+ "query": "SELECT created_at - INTERVAL '5 secs' FROM benchmarks WHERE branch = '${branch}' ORDER BY benchmark_id ASC LIMIT 1;",
+ "refresh": 2,
+ "regex": "",
+ "skipUrlSync": false,
+ "sort": 0,
+ "type": "query"
+ },
+ {
+ "current": {
+ "selected": false,
+ "text": "1730120430069",
+ "value": "1730120430069"
+ },
+ "datasource": {
+ "type": "grafana-postgresql-datasource",
+ "uid": "be28nkzirtb0gd"
+ },
+ "definition": "SELECT time + INTERVAL '5 secs' FROM benchmarks AS b JOIN device_measurements AS d ON b.benchmark_id = d.benchmark_id WHERE branch = '${branch}' ORDER BY b.benchmark_id DESC, d.measurement_id DESC LIMIT 1;",
+ "description": "",
+ "hide": 2,
+ "includeAll": false,
+ "multi": false,
+ "name": "EndTime",
+ "options": [],
+ "query": "SELECT time + INTERVAL '5 secs' FROM benchmarks AS b JOIN device_measurements AS d ON b.benchmark_id = d.benchmark_id WHERE branch = '${branch}' ORDER BY b.benchmark_id DESC, d.measurement_id DESC LIMIT 1;",
+ "refresh": 1,
+ "regex": "",
+ "skipUrlSync": false,
+ "sort": 0,
+ "type": "query"
+ },
+ {
+ "current": {
+ "selected": false,
+ "text": "NVIDIA A10G",
+ "value": "NVIDIA A10G"
+ },
+ "datasource": {
+ "type": "grafana-postgresql-datasource",
+ "uid": "be28nkzirtb0gd"
+ },
+ "definition": "SELECT DISTINCT gpu_name FROM benchmarks;",
+ "hide": 0,
+ "includeAll": false,
+ "label": "GPU",
+ "multi": false,
+ "name": "gpu_name",
+ "options": [],
+ "query": "SELECT DISTINCT gpu_name FROM benchmarks;",
+ "refresh": 2,
+ "regex": "",
+ "skipUrlSync": false,
+ "sort": 0,
+ "type": "query"
+ },
+ {
+ "current": {
+ "selected": false,
+ "text": "10",
+ "value": "10"
+ },
+ "description": "The number of commits to display, going from most recent to the nth commit.",
+ "hide": 0,
+ "label": "Last # of commits",
+ "name": "last_n_commits",
+ "options": [
+ {
+ "selected": true,
+ "text": "10",
+ "value": "10"
+ }
+ ],
+ "query": "10",
+ "skipUrlSync": false,
+ "type": "textbox"
+ }
+ ]
+ },
+ "time": {
+ "from": "now-1h",
+ "to": "now"
+ },
+ "timepicker": {
+ "hidden": false
+ },
+ "timezone": "browser",
+ "title": "Transformers benchmarks",
+ "uid": "fdz33iyzln9c0a",
+ "version": 4,
+ "weekStart": ""
+}
diff --git a/benchmark/init_db.sql b/benchmark/init_db.sql
new file mode 100644
index 000000000000..573cc11518e8
--- /dev/null
+++ b/benchmark/init_db.sql
@@ -0,0 +1,33 @@
+CREATE TABLE IF NOT EXISTS benchmarks (
+ benchmark_id SERIAL PRIMARY KEY,
+ branch VARCHAR(255),
+ commit_id VARCHAR(72),
+ commit_message VARCHAR(70),
+ gpu_name VARCHAR(255),
+ created_at timestamp without time zone NOT NULL DEFAULT (current_timestamp AT TIME ZONE 'UTC')
+);
+
+CREATE INDEX IF NOT EXISTS benchmarks_benchmark_id_idx ON benchmarks (benchmark_id);
+
+CREATE INDEX IF NOT EXISTS benchmarks_branch_idx ON benchmarks (branch);
+
+CREATE TABLE IF NOT EXISTS device_measurements (
+ measurement_id SERIAL PRIMARY KEY,
+ benchmark_id int REFERENCES benchmarks (benchmark_id),
+ cpu_util double precision,
+ mem_megabytes double precision,
+ gpu_util double precision,
+ gpu_mem_megabytes double precision,
+ time timestamp without time zone NOT NULL DEFAULT (current_timestamp AT TIME ZONE 'UTC')
+);
+
+CREATE INDEX IF NOT EXISTS device_measurements_branch_idx ON device_measurements (benchmark_id);
+
+CREATE TABLE IF NOT EXISTS model_measurements (
+ measurement_id SERIAL PRIMARY KEY,
+ benchmark_id int REFERENCES benchmarks (benchmark_id),
+ measurements jsonb,
+ time timestamp without time zone NOT NULL DEFAULT (current_timestamp AT TIME ZONE 'UTC')
+);
+
+CREATE INDEX IF NOT EXISTS model_measurements_branch_idx ON model_measurements (benchmark_id);
diff --git a/benchmark/llama.py b/benchmark/llama.py
new file mode 100644
index 000000000000..4a2c57422e6f
--- /dev/null
+++ b/benchmark/llama.py
@@ -0,0 +1,408 @@
+import argparse
+import json
+import logging
+import os
+import sys
+from statistics import mean
+from threading import Event, Thread
+from time import perf_counter, sleep
+from typing import Optional
+import gpustat
+import psutil
+import psycopg2
+import torch
+
+from transformers import AutoModelForCausalLM, AutoTokenizer, GenerationConfig, StaticCache
+from psycopg2.extras import Json
+from psycopg2.extensions import register_adapter
+
+
+os.environ["HF_HUB_ENABLE_HF_TRANSFER"] = "1"
+
+logger = logging.getLogger(__name__)
+logger.setLevel(logging.INFO)
+
+handler = logging.StreamHandler(sys.stdout)
+handler.setLevel(logging.INFO)
+formatter = logging.Formatter("[%(levelname)s - %(asctime)s] %(message)s")
+handler.setFormatter(formatter)
+logger.addHandler(handler)
+
+os.environ["TOKENIZERS_PARALLELISM"] = "1"
+torch.set_float32_matmul_precision("high")
+register_adapter(dict, Json)
+
+
+def parse_arguments():
+ """
+ Parse command line arguments for the benchmarking CLI.
+ """
+ parser = argparse.ArgumentParser(description="CLI for benchmarking the huggingface/transformers.")
+
+ parser.add_argument(
+ "branch",
+ type=str,
+ help="The branch name on which the benchmarking is performed.",
+ )
+
+ parser.add_argument(
+ "commit_id",
+ type=str,
+ help="The commit hash on which the benchmarking is performed.",
+ )
+
+ parser.add_argument(
+ "commit_msg",
+ type=str,
+ help="The commit message associated with the commit, truncated to 70 characters.",
+ )
+
+ args = parser.parse_args()
+
+ return args.branch, args.commit_id, args.commit_msg
+
+
+def collect_metrics(benchmark_id, continue_metric_collection):
+ p = psutil.Process(os.getpid())
+ conn = psycopg2.connect("dbname=metrics")
+ cur = conn.cursor()
+ while not continue_metric_collection.is_set():
+ with p.oneshot():
+ cpu_util = p.cpu_percent()
+ mem_megabytes = p.memory_info().rss / (1024 * 1024)
+ gpu_stats = gpustat.GPUStatCollection.new_query()
+ gpu_util = gpu_stats[0]["utilization.gpu"]
+ gpu_mem_megabytes = gpu_stats[0]["memory.used"]
+ cur.execute(
+ "INSERT INTO device_measurements (benchmark_id, cpu_util, mem_megabytes, gpu_util, gpu_mem_megabytes) VALUES (%s, %s, %s, %s, %s)",
+ (benchmark_id, cpu_util, mem_megabytes, gpu_util, gpu_mem_megabytes),
+ )
+ sleep(0.01)
+ conn.commit()
+ conn.close()
+
+
+def run_benchmark(branch: str, commit_id: str, commit_msg: str, num_tokens_to_generate=100):
+ continue_metric_collection = Event()
+ metrics_thread = None
+ try:
+ gpu_stats = gpustat.GPUStatCollection.new_query()
+ gpu_name = gpu_stats[0]["name"]
+ conn = psycopg2.connect("dbname=metrics")
+ cur = conn.cursor()
+ cur.execute(
+ "INSERT INTO benchmarks (branch, commit_id, commit_message, gpu_name) VALUES (%s, %s, %s, %s) RETURNING benchmark_id",
+ (branch, commit_id, commit_msg, gpu_name),
+ )
+ conn.commit()
+ benchmark_id = cur.fetchone()[0]
+ logger.info(f"running benchmark #{benchmark_id} on {gpu_name}")
+ metrics_thread = Thread(target=collect_metrics, args=[benchmark_id, continue_metric_collection])
+ metrics_thread.start()
+ logger.info("started background thread to fetch device metrics")
+
+ os.environ["TOKENIZERS_PARALLELISM"] = "false" # silence warnings when compiling
+
+ device = "cuda"
+ ckpt = "meta-llama/Llama-2-7b-hf"
+
+ logger.info("downloading weights")
+ # This is to avoid counting download in model load time measurement
+ model = AutoModelForCausalLM.from_pretrained(ckpt, torch_dtype=torch.float16)
+ gen_config = GenerationConfig(do_sample=False, top_p=1, temperature=1)
+ logger.info("loading model")
+ start = perf_counter()
+ model = AutoModelForCausalLM.from_pretrained(
+ ckpt, torch_dtype=torch.float16, generation_config=gen_config
+ ).eval()
+ model.to(device)
+ torch.cuda.synchronize()
+ end = perf_counter()
+ model_load_time = end - start
+ logger.info(f"loaded model in: {model_load_time}s")
+
+ tokenizer = AutoTokenizer.from_pretrained(ckpt)
+
+ prompt = "Why dogs are so cute?"
+ inputs = tokenizer(prompt, return_tensors="pt").to(device)
+
+ # Specify the max length (including both the prompt and the response)
+ # When calling `generate` with `cache_implementation="static" later, this is also used to create a `StaticCache` object
+ # with sequence length = `max_length`. The longer the more you will re-use it
+ seq_length = inputs["input_ids"].shape[1]
+ model.generation_config.max_length = seq_length + num_tokens_to_generate
+ batch_size = inputs["input_ids"].shape[0]
+
+ # Copied from the gpt-fast repo
+ def multinomial_sample_one_no_sync(probs_sort): # Does multinomial sampling without a cuda synchronization
+ q = torch.empty_like(probs_sort).exponential_(1)
+ return torch.argmax(probs_sort / q, dim=-1, keepdim=True).to(dtype=torch.int)
+
+ def logits_to_probs(logits, temperature: float = 1.0, top_k: Optional[int] = None):
+ logits = logits / max(temperature, 1e-5)
+
+ if top_k is not None:
+ v, _ = torch.topk(logits, min(top_k, logits.size(-1)))
+ pivot = v.select(-1, -1).unsqueeze(-1)
+ logits = torch.where(logits < pivot, -float("Inf"), logits)
+ probs = torch.nn.functional.softmax(logits, dim=-1)
+ return probs
+
+ def sample(logits, temperature: float = 1.0, top_k: Optional[int] = None):
+ probs = logits_to_probs(logits[:, -1], temperature, top_k)
+ idx_next = multinomial_sample_one_no_sync(probs)
+ return idx_next, probs
+
+ def decode_one_token(model, cur_token, cache_position, past_key_values):
+ logits = model(
+ cur_token,
+ cache_position=cache_position,
+ past_key_values=past_key_values,
+ return_dict=False,
+ use_cache=True,
+ )[0]
+ new_token = sample(logits, temperature=0.6, top_k=5)[0]
+ return new_token
+
+ #########
+ # Eager #
+ #########
+ with torch.no_grad():
+ past_key_values = StaticCache(
+ model.config,
+ batch_size=batch_size,
+ device=device,
+ dtype=torch.float16,
+ max_cache_len=seq_length + num_tokens_to_generate,
+ )
+ cache_position = torch.arange(seq_length, device=device)
+ start = perf_counter()
+ model(
+ **inputs,
+ cache_position=cache_position,
+ past_key_values=past_key_values,
+ return_dict=False,
+ use_cache=True,
+ )
+ end = perf_counter()
+ first_eager_fwd_pass_time = end - start
+ logger.info(f"completed first eager fwd pass in: {first_eager_fwd_pass_time}s")
+ start = perf_counter()
+ output = model.generate(**inputs, do_sample=False)
+ end = perf_counter()
+ first_eager_generate_time = end - start
+ logger.info(f"completed first eager generation in: {first_eager_generate_time}s")
+ logger.info(f"generated: {tokenizer.batch_decode(output.cpu().tolist())}")
+
+ past_key_values = StaticCache(
+ model.config,
+ batch_size=batch_size,
+ device=device,
+ dtype=torch.float16,
+ max_cache_len=seq_length + num_tokens_to_generate,
+ )
+ cache_position = torch.arange(seq_length, device=device)
+ start = perf_counter()
+ model(
+ **inputs,
+ cache_position=cache_position,
+ past_key_values=past_key_values,
+ return_dict=False,
+ use_cache=True,
+ )
+ end = perf_counter()
+ second_eager_fwd_pass_time = end - start
+ logger.info(f"completed second eager fwd pass in: {second_eager_fwd_pass_time}s")
+ start = perf_counter()
+ model.generate(**inputs, do_sample=False)
+ end = perf_counter()
+ second_eager_generate_time = end - start
+ logger.info(f"completed second eager generation in: {second_eager_generate_time}s")
+ logger.info(f"generated: {tokenizer.batch_decode(output.cpu().tolist())}")
+
+ torch.compiler.reset()
+
+ ################
+ # Forward pass #
+ ################
+
+ # `torch.compile(model, ...)` is not recommended as you compile callbacks
+ # and full generate. We recommend compiling only the forward for now.
+ # "reduce-overhead" will use cudagraphs.
+ generated_ids = torch.zeros(
+ (batch_size, num_tokens_to_generate + seq_length), dtype=torch.int, device=device
+ )
+
+ generated_ids[:, :seq_length] = inputs["input_ids"]
+ decode_one_token = torch.compile(decode_one_token, mode="reduce-overhead", fullgraph=True)
+ # model.forward = torch.compile(model.forward, mode="reduce-overhead", fullgraph=True)
+ # TODO use decode_one_token(model, input_id.clone(), cache_position) for verification
+ past_key_values = StaticCache(
+ model.config,
+ batch_size=batch_size,
+ device=device,
+ dtype=torch.float16,
+ max_cache_len=seq_length + num_tokens_to_generate + 10,
+ )
+ cache_position = torch.arange(seq_length, device=device)
+ all_generated_tokens = []
+ ### First compile, prefill
+ start = perf_counter()
+ next_token = decode_one_token(
+ model, inputs["input_ids"], cache_position=cache_position, past_key_values=past_key_values
+ )
+ torch.cuda.synchronize()
+ end = perf_counter()
+ time_to_first_token = end - start
+ logger.info(f"completed first compile generation in: {time_to_first_token}s")
+ cache_position += 1
+ all_generated_tokens += next_token.clone().detach().cpu().tolist()
+
+ cache_position = torch.tensor([seq_length], device=device)
+ ### First compile, decoding
+ start = perf_counter()
+ next_token = decode_one_token(
+ model, next_token.clone(), cache_position=cache_position, past_key_values=past_key_values
+ )
+ torch.cuda.synchronize()
+ end = perf_counter()
+ time_to_second_token = end - start
+ logger.info(f"completed second compile generation in: {time_to_first_token}s")
+ cache_position += 1
+ all_generated_tokens += next_token.clone().detach().cpu().tolist()
+
+ ### Second compile, decoding
+ start = perf_counter()
+ next_token = decode_one_token(
+ model, next_token.clone(), cache_position=cache_position, past_key_values=past_key_values
+ )
+ torch.cuda.synchronize()
+ end = perf_counter()
+ time_to_third_token = end - start
+ logger.info(f"completed third compile forward in: {time_to_first_token}s")
+ cache_position += 1
+ all_generated_tokens += next_token.clone().detach().cpu().tolist()
+
+ ### Using cuda graphs decoding
+
+ start = perf_counter()
+ for _ in range(1, num_tokens_to_generate):
+ all_generated_tokens += next_token.clone().detach().cpu().tolist()
+ next_token = decode_one_token(
+ model, next_token.clone(), cache_position=cache_position, past_key_values=past_key_values
+ )
+ cache_position += 1
+ torch.cuda.synchronize()
+ end = perf_counter()
+ mean_time_to_next_token = (end - start) / num_tokens_to_generate
+ logger.info(f"completed next compile generation in: {mean_time_to_next_token}s")
+ logger.info(f"generated: {tokenizer.batch_decode(all_generated_tokens)}")
+
+ ####################
+ # Generate compile #
+ ####################
+ torch.compiler.reset()
+ # we will not compile full generate as it' s to intensive, tho we measure full forward!
+
+ past_key_values = StaticCache(
+ model.config,
+ batch_size=batch_size,
+ device=device,
+ dtype=torch.float16,
+ max_cache_len=seq_length + 128,
+ )
+
+ # 1st call
+ start = perf_counter()
+ output = model.generate(**inputs, past_key_values=past_key_values)
+ torch.cuda.synchronize()
+ end = perf_counter()
+ first_compile_generate_time = end - start
+ logger.info(f"completed first compile generation in: {first_compile_generate_time}s")
+ logger.info(f"generated: {tokenizer.batch_decode(output.cpu().tolist())}")
+
+ past_key_values = StaticCache(
+ model.config,
+ batch_size=batch_size,
+ device=device,
+ dtype=torch.float16,
+ max_cache_len=seq_length + 128,
+ )
+ # 2nd call
+ start = perf_counter()
+ output = model.generate(**inputs, past_key_values=past_key_values)
+ torch.cuda.synchronize()
+ end = perf_counter()
+ second_compile_generate_time = end - start
+ logger.info(f"completed second compile generation in: {second_compile_generate_time}s")
+ logger.info(f"generated: {tokenizer.batch_decode(output.cpu().tolist())}")
+
+ past_key_values = StaticCache(
+ model.config,
+ batch_size=batch_size,
+ device=device,
+ dtype=torch.float16,
+ max_cache_len=seq_length + 128,
+ )
+
+ # 3nd call
+ start = perf_counter()
+ output = model.generate(**inputs, past_key_values=past_key_values)
+ end = perf_counter()
+ third_compile_generate_time = end - start
+ logger.info(f"completed second compile generation in: {third_compile_generate_time}s")
+ logger.info(f"generated: {tokenizer.batch_decode(output.cpu().tolist())}")
+
+ past_key_values = StaticCache(
+ model.config,
+ batch_size=batch_size,
+ device=device,
+ dtype=torch.float16,
+ max_cache_len=seq_length + 128,
+ )
+ # 4th call
+ start = perf_counter()
+ output = model.generate(**inputs, past_key_values=past_key_values)
+ end = perf_counter()
+ fourth_compile_generate_time = end - start
+ logger.info(f"completed second compile generation in: {fourth_compile_generate_time}s")
+ logger.info(f"generated: {tokenizer.batch_decode(output.cpu().tolist())}")
+
+ cur.execute(
+ """
+ INSERT INTO model_measurements (
+ benchmark_id,
+ measurements
+ ) VALUES (%s, %s)
+ """,
+ (
+ benchmark_id,
+ {
+ "model_load_time": model_load_time,
+ "first_eager_forward_pass_time_secs": first_eager_fwd_pass_time,
+ "second_eager_forward_pass_time_secs": second_eager_fwd_pass_time,
+ "first_eager_generate_time_secs": first_eager_generate_time,
+ "second_eager_generate_time_secs": second_eager_generate_time,
+ "time_to_first_token_secs": time_to_first_token,
+ "time_to_second_token_secs": time_to_second_token,
+ "time_to_third_token_secs": time_to_third_token,
+ "time_to_next_token_mean_secs": mean_time_to_next_token,
+ "first_compile_generate_time_secs": first_compile_generate_time,
+ "second_compile_generate_time_secs": second_compile_generate_time,
+ "third_compile_generate_time_secs": third_compile_generate_time,
+ "fourth_compile_generate_time_secs": fourth_compile_generate_time,
+ },
+ ),
+ )
+ conn.commit()
+ conn.close()
+ except Exception as e:
+ logger.error(f"Caught exception: {e}")
+ continue_metric_collection.set()
+ if metrics_thread is not None:
+ metrics_thread.join()
+
+
+if __name__ == "__main__":
+ branch, commit_id, commit_msg = parse_arguments()
+ run_benchmark(branch, commit_id, commit_msg, num_tokens_to_generate=20)
diff --git a/benchmark/requirements.txt b/benchmark/requirements.txt
new file mode 100644
index 000000000000..50e9dfaddfa4
--- /dev/null
+++ b/benchmark/requirements.txt
@@ -0,0 +1,5 @@
+gpustat==1.1.1
+psutil==6.0.0
+psycopg2==2.9.9
+torch>=2.4.0
+hf_transfer
\ No newline at end of file
diff --git a/docker/README.md b/docker/README.md
new file mode 100644
index 000000000000..2a71ab6fb6ec
--- /dev/null
+++ b/docker/README.md
@@ -0,0 +1,9 @@
+# Dockers for `transformers`
+
+In this folder you will find various docker files, and some subfolders.
+- dockerfiles (ex: `consistency.dockerfile`) present under `~/docker` are used for our "fast" CIs. You should be able to use them for tasks that only need CPU. For example `torch-light` is a very light weights container (703MiB).
+- subfloder contain dockerfiles used for our `slow` CIs, which *can* be used for GPU tasks, but they are **BIG** as they were not specifically designed for a single model / single task. Thus the `~/docker/transformers-pytorch-gpu` includes additional dependencies to allow us to run ALL model tests (say `librosa` or `tesseract`, which you do not need to run LLMs)
+
+Note that in both case, you need to run `uv pip install -e .`, which should take around 5 seconds. We do it outside the dockerfile for the need of our CI: we checkout a new branch each time, and the `transformers` code is thus updated.
+
+We are open to contribution, and invite the community to create dockerfiles with potential arguments that properly choose extras depending on the model's dependencies! :hugs:
\ No newline at end of file
diff --git a/docker/consistency.dockerfile b/docker/consistency.dockerfile
index 21e0de8830d6..1f09626d8904 100644
--- a/docker/consistency.dockerfile
+++ b/docker/consistency.dockerfile
@@ -13,4 +13,4 @@ RUN uv pip install --no-cache-dir "git+https://github.com/huggingface/transforme
RUN git lfs install
RUN pip uninstall -y transformers
-RUN apt-get clean && rm -rf /var/lib/apt/lists/* && apt-get autoremove && apt-get autoclean
+RUN apt-get clean && rm -rf /var/lib/apt/lists/* && apt-get autoremove && apt-get autoclean
\ No newline at end of file
diff --git a/docker/torch-light.dockerfile b/docker/torch-light.dockerfile
index 524a68fd5540..710a599abbe9 100644
--- a/docker/torch-light.dockerfile
+++ b/docker/torch-light.dockerfile
@@ -6,6 +6,6 @@ RUN apt-get update && apt-get install -y --no-install-recommends libsndfile1-de
ENV UV_PYTHON=/usr/local/bin/python
RUN pip --no-cache-dir install uv && uv venv && uv pip install --no-cache-dir -U pip setuptools
RUN pip install --no-cache-dir 'torch' 'torchvision' 'torchaudio' --index-url https://download.pytorch.org/whl/cpu
-RUN uv pip install --no-deps timm accelerate --extra-index-url https://download.pytorch.org/whl/cpu
-RUN uv pip install --no-cache-dir librosa "git+https://github.com/huggingface/transformers.git@${REF}#egg=transformers[sklearn,sentencepiece,vision,testing]"
+RUN uv pip install --no-deps timm accelerate --extra-index-url https://download.pytorch.org/whl/cpu
+RUN uv pip install --no-cache-dir librosa "git+https://github.com/huggingface/transformers.git@${REF}#egg=transformers[sklearn,sentencepiece,vision,testing,tiktoken]"
RUN pip uninstall -y transformers
\ No newline at end of file
diff --git a/docker/transformers-all-latest-gpu/Dockerfile b/docker/transformers-all-latest-gpu/Dockerfile
index 9c5e3c914157..b597f5a73fb5 100644
--- a/docker/transformers-all-latest-gpu/Dockerfile
+++ b/docker/transformers-all-latest-gpu/Dockerfile
@@ -1,4 +1,4 @@
-FROM nvidia/cuda:12.1.0-cudnn8-devel-ubuntu20.04
+FROM nvidia/cuda:12.1.0-cudnn8-devel-ubuntu22.04
LABEL maintainer="Hugging Face"
ARG DEBIAN_FRONTEND=noninteractive
@@ -9,7 +9,7 @@ SHELL ["sh", "-lc"]
# The following `ARG` are mainly used to specify the versions explicitly & directly in this docker file, and not meant
# to be used as arguments for docker build (so far).
-ARG PYTORCH='2.4.0'
+ARG PYTORCH='2.5.1'
# (not always a valid torch version)
ARG INTEL_TORCH_EXT='2.3.0'
# Example: `cu102`, `cu113`, etc.
@@ -26,7 +26,7 @@ RUN git clone https://github.com/huggingface/transformers && cd transformers &&
# 1. Put several commands in a single `RUN` to avoid image/layer exporting issue. Could be revised in the future.
# 2. Regarding `torch` part, We might need to specify proper versions for `torchvision` and `torchaudio`.
# Currently, let's not bother to specify their versions explicitly (so installed with their latest release versions).
-RUN python3 -m pip install --no-cache-dir -U tensorflow==2.13 protobuf==3.20.3 tensorflow_text tensorflow_probability && python3 -m pip install --no-cache-dir -e ./transformers[dev,onnxruntime] && [ ${#PYTORCH} -gt 0 -a "$PYTORCH" != "pre" ] && VERSION='torch=='$PYTORCH'.*' || VERSION='torch'; echo "export VERSION='$VERSION'" >> ~/.profile && echo torch=$VERSION && [ "$PYTORCH" != "pre" ] && python3 -m pip install --no-cache-dir -U $VERSION torchvision torchaudio --extra-index-url https://download.pytorch.org/whl/$CUDA || python3 -m pip install --no-cache-dir -U --pre torch torchvision torchaudio --extra-index-url https://download.pytorch.org/whl/nightly/$CUDA
+RUN python3 -m pip install --no-cache-dir -U tensorflow==2.13 protobuf==3.20.3 "tensorflow_text<2.16" "tensorflow_probability<0.22" && python3 -m pip install --no-cache-dir -e ./transformers[dev,onnxruntime] && [ ${#PYTORCH} -gt 0 -a "$PYTORCH" != "pre" ] && VERSION='torch=='$PYTORCH'.*' || VERSION='torch'; echo "export VERSION='$VERSION'" >> ~/.profile && echo torch=$VERSION && [ "$PYTORCH" != "pre" ] && python3 -m pip install --no-cache-dir -U $VERSION torchvision torchaudio --extra-index-url https://download.pytorch.org/whl/$CUDA || python3 -m pip install --no-cache-dir -U --pre torch torchvision torchaudio --extra-index-url https://download.pytorch.org/whl/nightly/$CUDA
RUN python3 -m pip uninstall -y flax jax
@@ -43,7 +43,7 @@ RUN python3 -m pip install --no-cache-dir git+https://github.com/huggingface/pef
RUN python3 -m pip install --no-cache-dir git+https://github.com/huggingface/optimum@main#egg=optimum
# For video model testing
-RUN python3 -m pip install --no-cache-dir decord av==9.2.0
+RUN python3 -m pip install --no-cache-dir av==9.2.0
# Some slow tests require bnb
RUN python3 -m pip install --no-cache-dir bitsandbytes
diff --git a/docker/transformers-pytorch-gpu/Dockerfile b/docker/transformers-pytorch-gpu/Dockerfile
index 2c1f153eef27..f22d77b9372d 100644
--- a/docker/transformers-pytorch-gpu/Dockerfile
+++ b/docker/transformers-pytorch-gpu/Dockerfile
@@ -1,4 +1,4 @@
-FROM nvidia/cuda:12.1.0-cudnn8-devel-ubuntu20.04
+FROM nvidia/cuda:12.1.0-cudnn8-devel-ubuntu22.04
LABEL maintainer="Hugging Face"
ARG DEBIAN_FRONTEND=noninteractive
@@ -11,7 +11,7 @@ ARG REF=main
RUN git clone https://github.com/huggingface/transformers && cd transformers && git checkout $REF
# If set to nothing, will install the latest version
-ARG PYTORCH='2.4.0'
+ARG PYTORCH='2.5.1'
ARG TORCH_VISION=''
ARG TORCH_AUDIO=''
# Example: `cu102`, `cu113`, etc.
diff --git a/docker/transformers-quantization-latest-gpu/Dockerfile b/docker/transformers-quantization-latest-gpu/Dockerfile
index 6d94dbee5aa0..53e66662f9ee 100755
--- a/docker/transformers-quantization-latest-gpu/Dockerfile
+++ b/docker/transformers-quantization-latest-gpu/Dockerfile
@@ -1,4 +1,4 @@
-FROM nvidia/cuda:11.8.0-cudnn8-devel-ubuntu20.04
+FROM nvidia/cuda:11.8.0-cudnn8-devel-ubuntu22.04
LABEL maintainer="Hugging Face"
ARG DEBIAN_FRONTEND=noninteractive
@@ -9,12 +9,12 @@ SHELL ["sh", "-lc"]
# The following `ARG` are mainly used to specify the versions explicitly & directly in this docker file, and not meant
# to be used as arguments for docker build (so far).
-ARG PYTORCH='2.2.1'
+ARG PYTORCH='2.4.1'
# Example: `cu102`, `cu113`, etc.
ARG CUDA='cu118'
RUN apt update
-RUN apt install -y git libsndfile1-dev tesseract-ocr espeak-ng python python3-pip ffmpeg
+RUN apt install -y git libsndfile1-dev tesseract-ocr espeak-ng python3 python3-pip ffmpeg
RUN python3 -m pip install --no-cache-dir --upgrade pip
ARG REF=main
@@ -53,10 +53,10 @@ RUN python3 -m pip install --no-cache-dir gguf
# Add autoawq for quantization testing
# >=v0.2.3 needed for compatibility with torch 2.2.1
-RUN python3 -m pip install --no-cache-dir https://github.com/casper-hansen/AutoAWQ/releases/download/v0.2.3/autoawq-0.2.3+cu118-cp38-cp38-linux_x86_64.whl
+RUN python3 -m pip install --no-cache-dir https://github.com/casper-hansen/AutoAWQ/releases/download/v0.2.3/autoawq-0.2.3+cu118-cp310-cp310-linux_x86_64.whl
# Add quanto for quantization testing
-RUN python3 -m pip install --no-cache-dir quanto
+RUN python3 -m pip install --no-cache-dir optimum-quanto
# Add eetq for quantization testing
RUN python3 -m pip install git+https://github.com/NetEase-FuXi/EETQ.git
diff --git a/docker/transformers-tensorflow-gpu/Dockerfile b/docker/transformers-tensorflow-gpu/Dockerfile
index adccee1ace49..378491a6c600 100644
--- a/docker/transformers-tensorflow-gpu/Dockerfile
+++ b/docker/transformers-tensorflow-gpu/Dockerfile
@@ -1,4 +1,4 @@
-FROM nvidia/cuda:12.1.0-cudnn8-devel-ubuntu20.04
+FROM nvidia/cuda:12.1.0-cudnn8-devel-ubuntu22.04
LABEL maintainer="Hugging Face"
ARG DEBIAN_FRONTEND=noninteractive
@@ -18,7 +18,7 @@ RUN [ ${#TENSORFLOW} -gt 0 ] && VERSION='tensorflow=='$TENSORFLOW'.*' || VERSIO
RUN python3 -m pip uninstall -y torch flax
RUN python3 -m pip install -U "itsdangerous<2.1.0"
-RUN python3 -m pip install --no-cache-dir -U tensorflow_probability
+RUN python3 -m pip install --no-cache-dir -U "tensorflow_probability<0.22"
# When installing in editable mode, `transformers` is not recognized as a package.
# this line must be added in order for python to be aware of transformers.
diff --git a/docs/README.md b/docs/README.md
index 7dbcefc0483c..bb54d7004130 100644
--- a/docs/README.md
+++ b/docs/README.md
@@ -276,14 +276,14 @@ building the return.
Here's an example of a single value return:
-```
+```python
Returns:
`List[int]`: A list of integers in the range [0, 1] --- 1 for a special token, 0 for a sequence token.
```
Here's an example of a tuple return, comprising several objects:
-```
+```python
Returns:
`tuple(torch.FloatTensor)` comprising various elements depending on the configuration ([`BertConfig`]) and inputs:
- ** loss** (*optional*, returned when `masked_lm_labels` is provided) `torch.FloatTensor` of shape `(1,)` --
@@ -322,10 +322,9 @@ includes an example of how to transcribe speech to text in the
The syntax for Example docstrings can look as follows:
-```
+```python
Example:
- ```python
>>> from transformers import Wav2Vec2Processor, Wav2Vec2ForCTC
>>> from datasets import load_dataset
>>> import torch
@@ -347,7 +346,6 @@ The syntax for Example docstrings can look as follows:
>>> transcription = processor.batch_decode(predicted_ids)
>>> transcription[0]
'MISTER QUILTER IS THE APOSTLE OF THE MIDDLE CLASSES AND WE ARE GLAD TO WELCOME HIS GOSPEL'
- ```
```
The docstring should give a minimal, clear example of how the respective model
diff --git a/docs/TRANSLATING.md b/docs/TRANSLATING.md
index 49747821f476..64dced450987 100644
--- a/docs/TRANSLATING.md
+++ b/docs/TRANSLATING.md
@@ -1,57 +1,70 @@
-### Translating the Transformers documentation into your language
+# Translating the Transformers documentation into your language
-As part of our mission to democratize machine learning, we'd love to make the Transformers library available in many more languages! Follow the steps below if you want to help translate the documentation into your language 🙏.
+As part of our mission to democratize machine learning, we aim to make the Transformers library available in many more languages! Follow the steps below to help translate the documentation into your language.
-**🗞️ Open an issue**
+## Open an Issue
-To get started, navigate to the [Issues](https://github.com/huggingface/transformers/issues) page of this repo and check if anyone else has opened an issue for your language. If not, open a new issue by selecting the "Translation template" from the "New issue" button.
+1. Navigate to the Issues page of this repository.
+2. Check if anyone has already opened an issue for your language.
+3. If not, create a new issue by selecting the "Translation template" from the "New issue" button.
+4. Post a comment indicating which chapters you’d like to work on, and we’ll add your name to the list.
-Once an issue exists, post a comment to indicate which chapters you'd like to work on, and we'll add your name to the list.
+## Fork the Repository
+1. First, fork the Transformers repo by clicking the Fork button in the top-right corner.
+2. Clone your fork to your local machine for editing with the following command:
-**🍴 Fork the repository**
+ ```bash
+ git clone https://github.com/YOUR-USERNAME/transformers.git
+ ```
+
+ Replace `YOUR-USERNAME` with your GitHub username.
-First, you'll need to [fork the Transformers repo](https://docs.github.com/en/get-started/quickstart/fork-a-repo). You can do this by clicking on the **Fork** button on the top-right corner of this repo's page.
+## Copy-paste the English version with a new language code
-Once you've forked the repo, you'll want to get the files on your local machine for editing. You can do that by cloning the fork with Git as follows:
+The documentation files are organized in the following directory:
-```bash
-git clone https://github.com/YOUR-USERNAME/transformers.git
-```
+- **docs/source**: This contains all documentation materials organized by language.
-**📋 Copy-paste the English version with a new language code**
+To copy the English version to your new language directory:
-The documentation files are in one leading directory:
+1. Navigate to your fork of the repository:
-- [`docs/source`](https://github.com/huggingface/transformers/tree/main/docs/source): All the documentation materials are organized here by language.
+ ```bash
+ cd ~/path/to/transformers/docs
+ ```
-You'll only need to copy the files in the [`docs/source/en`](https://github.com/huggingface/transformers/tree/main/docs/source/en) directory, so first navigate to your fork of the repo and run the following:
+ Replace `~/path/to` with your actual path.
-```bash
-cd ~/path/to/transformers/docs
-cp -r source/en source/LANG-ID
-```
+2. Run the following command:
-Here, `LANG-ID` should be one of the ISO 639-1 or ISO 639-2 language codes -- see [here](https://www.loc.gov/standards/iso639-2/php/code_list.php) for a handy table.
+ ```bash
+ cp -r source/en source/LANG-ID
+ ```
-**✍️ Start translating**
+ Replace `LANG-ID` with the appropriate ISO 639-1 or ISO 639-2 language code (see [this table](https://en.wikipedia.org/wiki/List_of_ISO_639-1_codes) for reference).
-The fun part comes - translating the text!
+## Start translating
-The first thing we recommend is translating the part of the `_toctree.yml` file that corresponds to your doc chapter. This file is used to render the table of contents on the website.
+Begin translating the text!
-> 🙋 If the `_toctree.yml` file doesn't yet exist for your language, you can create one by copy-pasting from the English version and deleting the sections unrelated to your chapter. Just make sure it exists in the `docs/source/LANG-ID/` directory!
+1. Start with the `_toctree.yml` file that corresponds to your documentation chapter. This file is essential for rendering the table of contents on the website.
-The fields you should add are `local` (with the name of the file containing the translation; e.g. `autoclass_tutorial`), and `title` (with the title of the doc in your language; e.g. `Load pretrained instances with an AutoClass`) -- as a reference, here is the `_toctree.yml` for [English](https://github.com/huggingface/transformers/blob/main/docs/source/en/_toctree.yml):
+ - If the `_toctree.yml` file doesn’t exist for your language, create one by copying the English version and removing unrelated sections.
+ - Ensure it is placed in the `docs/source/LANG-ID/` directory.
-```yaml
-- sections:
- - local: pipeline_tutorial # Do not change this! Use the same name for your .md file
- title: Pipelines for inference # Translate this!
- ...
- title: Tutorials # Translate this!
-```
+ Here’s an example structure for the `_toctree.yml` file:
-Once you have translated the `_toctree.yml` file, you can start translating the [MDX](https://mdxjs.com/) files associated with your docs chapter.
+ ```yaml
+ - sections:
+ - local: pipeline_tutorial # Keep this name for your .md file
+ title: Pipelines for Inference # Translate this
+ ...
+ title: Tutorials # Translate this
+ ```
-> 🙋 If you'd like others to help you with the translation, you should [open an issue](https://github.com/huggingface/transformers/issues) and tag @stevhliu.
+2. Once you’ve translated the `_toctree.yml`, move on to translating the associated MDX files.
+
+## Collaborate and share
+
+If you'd like assistance with your translation, open an issue and tag `@stevhliu`. Feel free to share resources or glossaries to ensure consistent terminology.
diff --git a/docs/source/ar/_config.py b/docs/source/ar/_config.py
new file mode 100644
index 000000000000..f49e4e473196
--- /dev/null
+++ b/docs/source/ar/_config.py
@@ -0,0 +1,14 @@
+# docstyle-ignore
+INSTALL_CONTENT = """
+# Transformers installation
+! pip install transformers datasets evaluate accelerate
+# To install from source instead of the last release, comment the command above and uncomment the following one.
+# ! pip install git+https://github.com/huggingface/transformers.git
+"""
+
+notebook_first_cells = [{"type": "code", "content": INSTALL_CONTENT}]
+black_avoid_patterns = {
+ "{processor_class}": "FakeProcessorClass",
+ "{model_class}": "FakeModelClass",
+ "{object_class}": "FakeObjectClass",
+}
diff --git a/docs/source/ar/_toctree.yml b/docs/source/ar/_toctree.yml
new file mode 100644
index 000000000000..e66ed3381e2c
--- /dev/null
+++ b/docs/source/ar/_toctree.yml
@@ -0,0 +1,892 @@
+- sections:
+ - local: index
+ title: 🤗 المحولات
+ - local: quicktour
+ title: جولة سريعة
+ - local: installation
+ title: التثبيت
+ title: البدء
+- sections:
+ - local: pipeline_tutorial
+ title: تشغيل الاستنتاج باستخدام خطوط الأنابيب
+ - local: autoclass_tutorial
+ title: كتابة تعليمات برمجية متكيفه باستخدام AutoClass
+ - local: preprocessing
+ title: معالجة البيانات مسبقًا
+ - local: training
+ title: ضبط نموذج مسبق التدريب
+ - local: run_scripts
+ title: التدريب باستخدام نص برمجي
+ - local: accelerate
+ title: إعداد تدريب موزع باستخدام 🤗 Accelerate
+ - local: peft
+ title: تحميل النماذج المخصصة وتدريبها باستخدام 🤗 PEFT
+ - local: model_sharing
+ title: مشاركة نموذجك
+ - local: agents
+ title: الوكلاء
+ - local: llm_tutorial
+ title: التوليد باستخدام LLMs
+ - local: conversations
+ title: الدردشة مع المحولات
+ title: البرامج التعليمية
+# - sections:
+# - isExpanded: false
+# sections:
+# - local: tasks/sequence_classification
+# title: تصنيف النصوص
+# - local: tasks/token_classification
+# title: تصنيف الرموز
+# - local: tasks/question_answering
+# title: الإجابة على الأسئلة
+# - local: tasks/language_modeling
+# title: نمذجة اللغة السببية
+# - local: tasks/masked_language_modeling
+# title: نمذجة اللغة المقنعة
+# - local: tasks/translation
+# title: الترجمة
+# - local: tasks/summarization
+# title: التلخيص
+# - local: tasks/multiple_choice
+# title: الاختيار المتعدد
+# title: معالجة اللغات الطبيعية
+# - isExpanded: false
+# sections:
+# - local: tasks/audio_classification
+# title: تصنيف الصوت
+# - local: tasks/asr
+# title: التعرف التلقائي على الكلام
+# title: الصوت
+# - isExpanded: false
+# sections:
+# - local: tasks/image_classification
+# title: تصنيف الصور
+# - local: tasks/semantic_segmentation
+# title: تجزئة الصور
+# - local: tasks/video_classification
+# title: تصنيف الفيديو
+# - local: tasks/object_detection
+# title: اكتشاف الأشياء
+# - local: tasks/zero_shot_object_detection
+# title: اكتشاف الأشياء بدون تدريب
+# - local: tasks/zero_shot_image_classification
+# title: تصنيف الصور بدون تدريب
+# - local: tasks/monocular_depth_estimation
+# title: تقدير العمق
+# - local: tasks/image_to_image
+# title: صورة إلى صورة
+# - local: tasks/image_feature_extraction
+# title: استخراج ميزات الصورة
+# - local: tasks/mask_generation
+# title: توليد القناع
+# - local: tasks/knowledge_distillation_for_image_classification
+# title: التقليل المعرفي للرؤية الحاسوبية
+# title: الرؤية الحاسوبية
+# - isExpanded: false
+# sections:
+# - local: tasks/image_captioning
+# title: وصف الصور Image captioning
+# - local: tasks/document_question_answering
+# title: الإجابة على أسئلة المستندات
+# - local: tasks/visual_question_answering
+# title: الإجابة على الأسئلة المرئية
+# - local: tasks/text-to-speech
+# title: تحويل النص إلى كلام
+# title: المتعددة الوسائط
+# - isExpanded: false
+# sections:
+# - local: generation_strategies
+# title: تخصيص استراتيجية التوليد
+# - local: kv_cache
+# title: أفضل الممارسات للتوليد باستخدام ذاكرة التخزين المؤقت
+# title: التوليد
+# - isExpanded: false
+# sections:
+# - local: tasks/idefics
+# title: مهام الصور مع IDEFICS
+# - local: tasks/prompting
+# title: دليل إرشادي لمحفزات النماذج اللغوية الكبيرة
+# title: الإرشاد
+# title: أدلة المهام
+- sections:
+ - local: fast_tokenizers
+ title: استخدم مجزئيات النصوص السريعة من 🤗 Tokenizers
+ - local: multilingual
+ title: الاستدلال باستخدام نماذج متعددة اللغات
+ - local: create_a_model
+ title: استخدام واجهات برمجة التطبيقات الخاصة بالنموذج
+ - local: custom_models
+ title: مشاركة نموذج مخصص
+ - local: chat_templating
+ title: قوالب لنماذج الدردشة
+# - local: trainer
+# title: المدرب
+# - local: sagemaker
+# title: تشغيل التدريب على Amazon SageMaker
+# - local: serialization
+# title: التصدير إلى ONNX
+ - local: tflite
+ title: التصدير إلى TFLite
+# - local: torchscript
+# title: التصدير إلى TorchScript
+# - local: benchmarks
+# title: المعايير
+# - local: notebooks
+# title: دفاتر الملاحظات مع الأمثلة
+# - local: community
+# title: موارد المجتمع
+# - local: troubleshooting
+# title: استكشاف الأخطاء وإصلاحها
+ - local: gguf
+ title: التوافق مع ملفات GGUF
+ title: أدلة المطورين
+# - sections:
+# - local: quantization/overview
+# title: نظرة عامة
+# - local: quantization/bitsandbytes
+# title: bitsandbytes
+# - local: quantization/gptq
+# title: GPTQ
+# - local: quantization/awq
+# title: AWQ
+# - local: quantization/aqlm
+# title: AQLM
+# - local: quantization/quanto
+# title: Quanto
+# - local: quantization/eetq
+# title: EETQ
+# - local: quantization/hqq
+# title: HQQ
+# - local: quantization/optimum
+# title: Optimum
+# - local: quantization/contribute
+# title: المساهمة بطريقة جديدة للتكميم
+# title: أساليب التكميم
+# - sections:
+# - local: performance
+# title: الأداء-نظرة عامة
+# - local: llm_optims
+# title: تحسين الاستدلال LLM
+# - sections:
+# - local: perf_train_gpu_one
+# title: استخدام عدة وحدات معالجة رسوميات (GPUs) بشكل متوازٍ
+# - local: perf_train_gpu_many
+# title: وحدات معالجة الرسومات (GPU) متعددة والتوازي
+# - local: fsdp
+# title: Fully Sharded Data Parallel
+# - local: deepspeed
+# title: DeepSpeed
+# - local: perf_train_cpu
+# title: التدريب الفعال على وحدة المعالجة المركزية (CPU)
+# - local: perf_train_cpu_many
+# title: التدريب الموزع لوحدة المعالجة المركزية (CPU)
+# - local: perf_train_tpu_tf
+# title: التدريب على (TPU) باستخدام TensorFlow
+# - local: perf_train_special
+# title: تدريب PyTorch على Apple silicon
+# - local: perf_hardware
+# title: الأجهزة المخصصة للتدريب
+# - local: hpo_train
+# title: البحث عن المعاملات المثلى باستخدام واجهة برمجة تطبيقات المدرب
+# title: تقنيات التدريب الفعال
+# - sections:
+# - local: perf_infer_cpu
+# title: الإستدلال على وحدة المعالجة المركزية (CPU)
+# - local: perf_infer_gpu_one
+# title: الإستدلال على وحدة معالجة الرسومات (GPU)
+# title: تحسين الاستدلال
+# - local: big_models
+# title: إنشاء نموذج كبير
+# - local: debugging
+# title: تصحيح الأخطاء البرمجية
+# - local: tf_xla
+# title: تكامل XLA لنماذج TensorFlow
+# - local: perf_torch_compile
+# title: تحسين الاستدلال باستخدام `torch.compile()`
+# title: الأداء وقابلية التوسع
+# - sections:
+# - local: contributing
+# title: كيفية المساهمة في 🤗 المحولات؟
+# - local: add_new_model
+# title: كيفية إضافة نموذج إلى 🤗 المحولات؟
+# - local: add_new_pipeline
+# title: كيفية إضافة خط أنابيب إلى 🤗 المحولات؟
+# - local: testing
+# title: الاختبار
+# - local: pr_checks
+# title: التحقق من طلب السحب
+# title: المساهمة
+- sections:
+ - local: philosophy
+ title: الفلسفة
+ - local: glossary
+ title: (قاموس المصطلحات (قائمة الكلمات
+ - local: task_summary
+ title: ما الذي يمكن أن تفعله 🤗 المحولات
+ - local: tasks_explained
+ title: كيف تحل المحولات المهام
+ - local: model_summary
+ title: عائلة نماذج المحول
+ - local: tokenizer_summary
+ title: ملخص برنامج مقسم النصوص (tokenizers)
+ - local: attention
+ title: الانتباه Attention
+ - local: pad_truncation
+ title: الحشو والتقليم
+ - local: bertology
+ title: BERTology
+ - local: perplexity
+ title: حيرة النماذج ذات الطول الثابت
+ - local: pipeline_webserver
+ title: خطوط الأنابيب للاستدلال على خادم الويب
+ - local: model_memory_anatomy
+ title: تشريح تدريب النموذج
+ - local: llm_tutorial_optimization
+ title: الاستفادة القصوى من LLMs
+ title: أطر مفاهيمية
+# - sections:
+# - sections:
+# - local: main_classes/agent
+# title: الوكلاء والأدوات
+# - local: model_doc/auto
+# title: فئات يتم إنشاؤها ديناميكيًا
+# - local: main_classes/backbones
+# title: العمود الفقري
+# - local: main_classes/callback
+# title: عمليات الاسترجاع
+# - local: main_classes/configuration
+# title: التكوين
+# - local: main_classes/data_collator
+# title: مجمع البيانات
+# - local: main_classes/keras_callbacks
+# title: استدعاءات Keras
+# - local: main_classes/logging
+# title: التسجيل
+# - local: main_classes/model
+# title: النماذج
+# - local: main_classes/text_generation
+# title: توليد النصوص
+# - local: main_classes/onnx
+# title: ONNX
+# - local: main_classes/optimizer_schedules
+# title: التحسين
+# - local: main_classes/output
+# title: مخرجات النموذج
+# - local: main_classes/pipelines
+# title: خطوط الأنابيب
+# - local: main_classes/processors
+# title: المعالجات
+# - local: main_classes/quantization
+# title: التكميم
+# - local: main_classes/tokenizer
+# title: برنامج مقسم النصوص
+# - local: main_classes/trainer
+# title: المدرب
+# - local: main_classes/deepspeed
+# title: DeepSpeed
+# - local: main_classes/feature_extractor
+# title: مستخرج الميزات
+# - local: main_classes/image_processor
+# title: معالج الصور
+# title: الفئات الرئيسية
+# - sections:
+# - isExpanded: false
+# sections:
+# - local: model_doc/albert
+# title: ALBERT
+# - local: model_doc/bart
+# title: BART
+# - local: model_doc/barthez
+# title: BARThez
+# - local: model_doc/bartpho
+# title: BARTpho
+# - local: model_doc/bert
+# title: BERT
+# - local: model_doc/bert-generation
+# title: BertGeneration
+# - local: model_doc/bert-japanese
+# title: BertJapanese
+# - local: model_doc/bertweet
+# title: Bertweet
+# - local: model_doc/big_bird
+# title: BigBird
+# - local: model_doc/bigbird_pegasus
+# title: BigBirdPegasus
+# - local: model_doc/biogpt
+# title: BioGpt
+# - local: model_doc/blenderbot
+# title: Blenderbot
+# - local: model_doc/blenderbot-small
+# title: Blenderbot Small
+# - local: model_doc/bloom
+# title: BLOOM
+# - local: model_doc/bort
+# title: BORT
+# - local: model_doc/byt5
+# title: ByT5
+# - local: model_doc/camembert
+# title: CamemBERT
+# - local: model_doc/canine
+# title: CANINE
+# - local: model_doc/codegen
+# title: CodeGen
+# - local: model_doc/code_llama
+# title: CodeLlama
+# - local: model_doc/cohere
+# title: Cohere
+# - local: model_doc/convbert
+# title: ConvBERT
+# - local: model_doc/cpm
+# title: CPM
+# - local: model_doc/cpmant
+# title: CPMANT
+# - local: model_doc/ctrl
+# title: CTRL
+# - local: model_doc/dbrx
+# title: DBRX
+# - local: model_doc/deberta
+# title: DeBERTa
+# - local: model_doc/deberta-v2
+# title: DeBERTa-v2
+# - local: model_doc/dialogpt
+# title: DialoGPT
+# - local: model_doc/distilbert
+# title: DistilBERT
+# - local: model_doc/dpr
+# title: DPR
+# - local: model_doc/electra
+# title: ELECTRA
+# - local: model_doc/encoder-decoder
+# title: Encoder Decoder Models
+# - local: model_doc/ernie
+# title: ERNIE
+# - local: model_doc/ernie_m
+# title: ErnieM
+# - local: model_doc/esm
+# title: ESM
+# - local: model_doc/falcon
+# title: Falcon
+# - local: model_doc/fastspeech2_conformer
+# title: FastSpeech2Conformer
+# - local: model_doc/flan-t5
+# title: FLAN-T5
+# - local: model_doc/flan-ul2
+# title: FLAN-UL2
+# - local: model_doc/flaubert
+# title: FlauBERT
+# - local: model_doc/fnet
+# title: FNet
+# - local: model_doc/fsmt
+# title: FSMT
+# - local: model_doc/funnel
+# title: Funnel Transformer
+# - local: model_doc/fuyu
+# title: Fuyu
+# - local: model_doc/gemma
+# title: Gemma
+# - local: model_doc/openai-gpt
+# title: GPT
+# - local: model_doc/gpt_neo
+# title: GPT Neo
+# - local: model_doc/gpt_neox
+# title: GPT NeoX
+# - local: model_doc/gpt_neox_japanese
+# title: GPT NeoX Japanese
+# - local: model_doc/gptj
+# title: GPT-J
+# - local: model_doc/gpt2
+# title: GPT2
+# - local: model_doc/gpt_bigcode
+# title: GPTBigCode
+# - local: model_doc/gptsan-japanese
+# title: GPTSAN Japanese
+# - local: model_doc/gpt-sw3
+# title: GPTSw3
+# - local: model_doc/herbert
+# title: HerBERT
+# - local: model_doc/ibert
+# title: I-BERT
+# - local: model_doc/jamba
+# title: Jamba
+# - local: model_doc/jetmoe
+# title: JetMoe
+# - local: model_doc/jukebox
+# title: Jukebox
+# - local: model_doc/led
+# title: LED
+# - local: model_doc/llama
+# title: LLaMA
+# - local: model_doc/llama2
+# title: Llama2
+# - local: model_doc/llama3
+# title: Llama3
+# - local: model_doc/longformer
+# title: Longformer
+# - local: model_doc/longt5
+# title: LongT5
+# - local: model_doc/luke
+# title: LUKE
+# - local: model_doc/m2m_100
+# title: M2M100
+# - local: model_doc/madlad-400
+# title: MADLAD-400
+# - local: model_doc/mamba
+# title: Mamba
+# - local: model_doc/marian
+# title: MarianMT
+# - local: model_doc/markuplm
+# title: MarkupLM
+# - local: model_doc/mbart
+# title: MBart and MBart-50
+# - local: model_doc/mega
+# title: MEGA
+# - local: model_doc/megatron-bert
+# title: MegatronBERT
+# - local: model_doc/megatron_gpt2
+# title: MegatronGPT2
+# - local: model_doc/mistral
+# title: Mistral
+# - local: model_doc/mixtral
+# title: Mixtral
+# - local: model_doc/mluke
+# title: mLUKE
+# - local: model_doc/mobilebert
+# title: MobileBERT
+# - local: model_doc/mpnet
+# title: MPNet
+# - local: model_doc/mpt
+# title: MPT
+# - local: model_doc/mra
+# title: MRA
+# - local: model_doc/mt5
+# title: MT5
+# - local: model_doc/mvp
+# title: MVP
+# - local: model_doc/nezha
+# title: NEZHA
+# - local: model_doc/nllb
+# title: NLLB
+# - local: model_doc/nllb-moe
+# title: NLLB-MoE
+# - local: model_doc/nystromformer
+# title: Nyströmformer
+# - local: model_doc/olmo
+# title: OLMo
+# - local: model_doc/open-llama
+# title: Open-Llama
+# - local: model_doc/opt
+# title: OPT
+# - local: model_doc/pegasus
+# title: Pegasus
+# - local: model_doc/pegasus_x
+# title: PEGASUS-X
+# - local: model_doc/persimmon
+# title: Persimmon
+# - local: model_doc/phi
+# title: Phi
+# - local: model_doc/phi3
+# title: Phi-3
+# - local: model_doc/phobert
+# title: PhoBERT
+# - local: model_doc/plbart
+# title: PLBart
+# - local: model_doc/prophetnet
+# title: ProphetNet
+# - local: model_doc/qdqbert
+# title: QDQBert
+# - local: model_doc/qwen2
+# title: Qwen2
+# - local: model_doc/qwen2_moe
+# title: Qwen2MoE
+# - local: model_doc/rag
+# title: RAG
+# - local: model_doc/realm
+# title: REALM
+# - local: model_doc/recurrent_gemma
+# title: RecurrentGemma
+# - local: model_doc/reformer
+# title: Reformer
+# - local: model_doc/rembert
+# title: RemBERT
+# - local: model_doc/retribert
+# title: RetriBERT
+# - local: model_doc/roberta
+# title: RoBERTa
+# - local: model_doc/roberta-prelayernorm
+# title: RoBERTa-PreLayerNorm
+# - local: model_doc/roc_bert
+# title: RoCBert
+# - local: model_doc/roformer
+# title: RoFormer
+# - local: model_doc/rwkv
+# title: RWKV
+# - local: model_doc/splinter
+# title: Splinter
+# - local: model_doc/squeezebert
+# title: SqueezeBERT
+# - local: model_doc/stablelm
+# title: StableLm
+# - local: model_doc/starcoder2
+# title: Starcoder2
+# - local: model_doc/switch_transformers
+# title: SwitchTransformers
+# - local: model_doc/t5
+# title: T5
+# - local: model_doc/t5v1.1
+# title: T5v1.1
+# - local: model_doc/tapex
+# title: TAPEX
+# - local: model_doc/transfo-xl
+# title: Transformer XL
+# - local: model_doc/ul2
+# title: UL2
+# - local: model_doc/umt5
+# title: UMT5
+# - local: model_doc/xmod
+# title: X-MOD
+# - local: model_doc/xglm
+# title: XGLM
+# - local: model_doc/xlm
+# title: XLM
+# - local: model_doc/xlm-prophetnet
+# title: XLM-ProphetNet
+# - local: model_doc/xlm-roberta
+# title: XLM-RoBERTa
+# - local: model_doc/xlm-roberta-xl
+# title: XLM-RoBERTa-XL
+# - local: model_doc/xlm-v
+# title: XLM-V
+# - local: model_doc/xlnet
+# title: XLNet
+# - local: model_doc/yoso
+# title: YOSO
+# title: Text models
+# - isExpanded: false
+# sections:
+# - local: model_doc/beit
+# title: BEiT
+# - local: model_doc/bit
+# title: BiT
+# - local: model_doc/conditional_detr
+# title: Conditional DETR
+# - local: model_doc/convnext
+# title: ConvNeXT
+# - local: model_doc/convnextv2
+# title: ConvNeXTV2
+# - local: model_doc/cvt
+# title: CVT
+# - local: model_doc/deformable_detr
+# title: Deformable DETR
+# - local: model_doc/deit
+# title: DeiT
+# - local: model_doc/depth_anything
+# title: Depth Anything
+# - local: model_doc/deta
+# title: DETA
+# - local: model_doc/detr
+# title: DETR
+# - local: model_doc/dinat
+# title: DiNAT
+# - local: model_doc/dinov2
+# title: DINOV2
+# - local: model_doc/dit
+# title: DiT
+# - local: model_doc/dpt
+# title: DPT
+# - local: model_doc/efficientformer
+# title: EfficientFormer
+# - local: model_doc/efficientnet
+# title: EfficientNet
+# - local: model_doc/focalnet
+# title: FocalNet
+# - local: model_doc/glpn
+# title: GLPN
+# - local: model_doc/imagegpt
+# title: ImageGPT
+# - local: model_doc/levit
+# title: LeViT
+# - local: model_doc/mask2former
+# title: Mask2Former
+# - local: model_doc/maskformer
+# title: MaskFormer
+# - local: model_doc/mobilenet_v1
+# title: MobileNetV1
+# - local: model_doc/mobilenet_v2
+# title: MobileNetV2
+# - local: model_doc/mobilevit
+# title: MobileViT
+# - local: model_doc/mobilevitv2
+# title: MobileViTV2
+# - local: model_doc/nat
+# title: NAT
+# - local: model_doc/poolformer
+# title: PoolFormer
+# - local: model_doc/pvt
+# title: Pyramid Vision Transformer (PVT)
+# - local: model_doc/pvt_v2
+# title: Pyramid Vision Transformer v2 (PVTv2)
+# - local: model_doc/regnet
+# title: RegNet
+# - local: model_doc/resnet
+# title: ResNet
+# - local: model_doc/segformer
+# title: SegFormer
+# - local: model_doc/seggpt
+# title: SegGpt
+# - local: model_doc/superpoint
+# title: SuperPoint
+# - local: model_doc/swiftformer
+# title: SwiftFormer
+# - local: model_doc/swin
+# title: Swin Transformer
+# - local: model_doc/swinv2
+# title: Swin Transformer V2
+# - local: model_doc/swin2sr
+# title: Swin2SR
+# - local: model_doc/table-transformer
+# title: Table Transformer
+# - local: model_doc/upernet
+# title: UperNet
+# - local: model_doc/van
+# title: VAN
+# - local: model_doc/vit
+# title: Vision Transformer (ViT)
+# - local: model_doc/vit_hybrid
+# title: ViT Hybrid
+# - local: model_doc/vitdet
+# title: ViTDet
+# - local: model_doc/vit_mae
+# title: ViTMAE
+# - local: model_doc/vitmatte
+# title: ViTMatte
+# - local: model_doc/vit_msn
+# title: ViTMSN
+# - local: model_doc/yolos
+# title: YOLOS
+# title: Vision models
+# - isExpanded: false
+# sections:
+# - local: model_doc/audio-spectrogram-transformer
+# title: Audio Spectrogram Transformer
+# - local: model_doc/bark
+# title: Bark
+# - local: model_doc/clap
+# title: CLAP
+# - local: model_doc/encodec
+# title: EnCodec
+# - local: model_doc/hubert
+# title: Hubert
+# - local: model_doc/mctct
+# title: MCTCT
+# - local: model_doc/mms
+# title: MMS
+# - local: model_doc/musicgen
+# title: MusicGen
+# - local: model_doc/musicgen_melody
+# title: MusicGen Melody
+# - local: model_doc/pop2piano
+# title: Pop2Piano
+# - local: model_doc/seamless_m4t
+# title: Seamless-M4T
+# - local: model_doc/seamless_m4t_v2
+# title: SeamlessM4T-v2
+# - local: model_doc/sew
+# title: SEW
+# - local: model_doc/sew-d
+# title: SEW-D
+# - local: model_doc/speech_to_text
+# title: Speech2Text
+# - local: model_doc/speech_to_text_2
+# title: Speech2Text2
+# - local: model_doc/speecht5
+# title: SpeechT5
+# - local: model_doc/unispeech
+# title: UniSpeech
+# - local: model_doc/unispeech-sat
+# title: UniSpeech-SAT
+# - local: model_doc/univnet
+# title: UnivNet
+# - local: model_doc/vits
+# title: VITS
+# - local: model_doc/wav2vec2
+# title: Wav2Vec2
+# - local: model_doc/wav2vec2-bert
+# title: Wav2Vec2-BERT
+# - local: model_doc/wav2vec2-conformer
+# title: Wav2Vec2-Conformer
+# - local: model_doc/wav2vec2_phoneme
+# title: Wav2Vec2Phoneme
+# - local: model_doc/wavlm
+# title: WavLM
+# - local: model_doc/whisper
+# title: Whisper
+# - local: model_doc/xls_r
+# title: XLS-R
+# - local: model_doc/xlsr_wav2vec2
+# title: XLSR-Wav2Vec2
+# title: Audio models
+# - isExpanded: false
+# sections:
+# - local: model_doc/timesformer
+# title: TimeSformer
+# - local: model_doc/videomae
+# title: VideoMAE
+# - local: model_doc/vivit
+# title: ViViT
+# title: Video models
+# - isExpanded: false
+# sections:
+# - local: model_doc/align
+# title: ALIGN
+# - local: model_doc/altclip
+# title: AltCLIP
+# - local: model_doc/blip
+# title: BLIP
+# - local: model_doc/blip-2
+# title: BLIP-2
+# - local: model_doc/bridgetower
+# title: BridgeTower
+# - local: model_doc/bros
+# title: BROS
+# - local: model_doc/chinese_clip
+# title: Chinese-CLIP
+# - local: model_doc/clip
+# title: CLIP
+# - local: model_doc/clipseg
+# title: CLIPSeg
+# - local: model_doc/clvp
+# title: CLVP
+# - local: model_doc/data2vec
+# title: Data2Vec
+# - local: model_doc/deplot
+# title: DePlot
+# - local: model_doc/donut
+# title: Donut
+# - local: model_doc/flava
+# title: FLAVA
+# - local: model_doc/git
+# title: GIT
+# - local: model_doc/grounding-dino
+# title: Grounding DINO
+# - local: model_doc/groupvit
+# title: GroupViT
+# - local: model_doc/idefics
+# title: IDEFICS
+# - local: model_doc/idefics2
+# title: Idefics2
+# - local: model_doc/instructblip
+# title: InstructBLIP
+# - local: model_doc/kosmos-2
+# title: KOSMOS-2
+# - local: model_doc/layoutlm
+# title: LayoutLM
+# - local: model_doc/layoutlmv2
+# title: LayoutLMV2
+# - local: model_doc/layoutlmv3
+# title: LayoutLMV3
+# - local: model_doc/layoutxlm
+# title: LayoutXLM
+# - local: model_doc/lilt
+# title: LiLT
+# - local: model_doc/llava
+# title: Llava
+# - local: model_doc/llava_next
+# title: LLaVA-NeXT
+# - local: model_doc/lxmert
+# title: LXMERT
+# - local: model_doc/matcha
+# title: MatCha
+# - local: model_doc/mgp-str
+# title: MGP-STR
+# - local: model_doc/nougat
+# title: Nougat
+# - local: model_doc/oneformer
+# title: OneFormer
+# - local: model_doc/owlvit
+# title: OWL-ViT
+# - local: model_doc/owlv2
+# title: OWLv2
+# - local: model_doc/paligemma
+# title: PaliGemma
+# - local: model_doc/perceiver
+# title: Perceiver
+# - local: model_doc/pix2struct
+# title: Pix2Struct
+# - local: model_doc/sam
+# title: Segment Anything
+# - local: model_doc/siglip
+# title: SigLIP
+# - local: model_doc/speech-encoder-decoder
+# title: Speech Encoder Decoder Models
+# - local: model_doc/tapas
+# title: TAPAS
+# - local: model_doc/trocr
+# title: TrOCR
+# - local: model_doc/tvlt
+# title: TVLT
+# - local: model_doc/tvp
+# title: TVP
+# - local: model_doc/udop
+# title: UDOP
+# - local: model_doc/video_llava
+# title: VideoLlava
+# - local: model_doc/vilt
+# title: ViLT
+# - local: model_doc/vipllava
+# title: VipLlava
+# - local: model_doc/vision-encoder-decoder
+# title: Vision Encoder Decoder Models
+# - local: model_doc/vision-text-dual-encoder
+# title: Vision Text Dual Encoder
+# - local: model_doc/visual_bert
+# title: VisualBERT
+# - local: model_doc/xclip
+# title: X-CLIP
+# title: Multimodal models
+# - isExpanded: false
+# sections:
+# - local: model_doc/decision_transformer
+# title: محول القرار
+# - local: model_doc/trajectory_transformer
+# title: محول المسار
+# title: نماذج التعلم التعزيزية
+# - isExpanded: false
+# sections:
+# - local: model_doc/autoformer
+# title: Autoformer
+# - local: model_doc/informer
+# title: Informer
+# - local: model_doc/patchtsmixer
+# title: PatchTSMixer
+# - local: model_doc/patchtst
+# title: PatchTST
+# - local: model_doc/time_series_transformer
+# title: محول السلاسل الزمنية
+# title: نماذج السلاسل الزمنية
+# - isExpanded: false
+# sections:
+# - local: model_doc/graphormer
+# title: Graphormer
+# title: نماذج الرسم البياني
+# title: النماذج
+# - sections:
+# - local: internal/modeling_utils
+# title: الطبقات المخصصة والمرافق
+# - local: internal/pipelines_utils
+# title: مرافق خطوط الأنابيب
+# - local: internal/tokenization_utils
+# title: مرافق مقسم النصوص
+# - local: internal/trainer_utils
+# title: مرافق المدرب
+# - local: internal/generation_utils
+# title: مرافق التوليد
+# - local: internal/image_processing_utils
+# title: مرافق معالجة الصور
+# - local: internal/audio_utils
+# title: مرافق معالجة الصوت
+# - local: internal/file_utils
+# title: مرافق عامة
+# - local: internal/time_series_utils
+# title: مرافق السلاسل الزمنية
+# title: مساعدون داخليون
+# title: API
diff --git a/docs/source/ar/accelerate.md b/docs/source/ar/accelerate.md
new file mode 100644
index 000000000000..486c1efe59af
--- /dev/null
+++ b/docs/source/ar/accelerate.md
@@ -0,0 +1,120 @@
+# التدريب الموزع باستخدام 🤗 Accelerate
+
+
+مع تزايد حجم النماذج اللغوية، برز التوازي كأحد الاستراتيجيات لتدريب نماذج أكبر على أجهزة محدودة وتسريع عملية التدريب بمقدار كبير. أنشأنا في Hugging Face، قمنا بإنشاء مكتبة [ Accelerate](https://huggingface.co/docs/accelerate) لمساعدة المستخدمين على تدريب أي نموذج من Transformers بسهولة على أي نوع من الإعدادات الموزعة، سواء كان ذلك على عدة وحدات معالجة رسومات (GPUs) على جهاز واحد أو على عدة وحدات معالجة رسومات موزعة على عدة أجهزة. في هذا الدليل، تعلم كيفية تخصيص حلقة تدريب PyTorch الأصلية لتمكين التدريب في بيئة موزعة.
+
+## الإعداد
+
+ابدأ بتثبيت 🤗 Accelerate:
+
+```bash
+pip install accelerate
+```
+
+ثم قم باستيراد وإنشاء كائن [`~accelerate.Accelerator`]. سيقوم [`~accelerate.Accelerator`] تلقائيًا باكتشاف نوع الإعداد الموزع الخاص بك وتهيئة جميع المكونات اللازمة للتدريب. لن تحتاج إلى وضع نموذجك على جهاز بشكل معين.
+
+```py
+>>> from accelerate import Accelerator
+
+>>> accelerator = Accelerator()
+```
+
+## الاستعداد للتسريع
+
+الخطوة التالية هي تمرير جميع كائنات التدريب ذات الصلة إلى دالة الإعداد [`~accelerate.Accelerator.prepare`]. ويشمل ذلك DataLoaders للتدريب والتقييم، ونموذجًا ومُحَسِّنً المعاملات (optimizer):
+
+```py
+>>> train_dataloader, eval_dataloader, model, optimizer = accelerator.prepare(
+... train_dataloader, eval_dataloader, model, optimizer
+... )
+```
+
+## الخلفي Backward
+
+الإضافة الأخيرة هي استبدال الدالة المعتادة `loss.backward()` في حلقة التدريب الخاصة بك بدالة [`~accelerate.Accelerator.backward`] في 🤗 Accelerate:
+
+```py
+>>> for epoch in range(num_epochs):
+... for batch in train_dataloader:
+... outputs = model(**batch)
+... loss = outputs.loss
+... accelerator.backward(loss)
+
+... optimizer.step()
+... lr_scheduler.step()
+... optimizer.zero_grad()
+... progress_bar.update(1)
+```
+
+كما يمكنك أن ترى في الكود التالي، فأنت بحاجة فقط إلى إضافة أربعة أسطر من الكود إلى حلقة التدريب الخاصة بك لتمكين التدريب الموزع!
+
+```diff
++ from accelerate import Accelerator
+ from transformers import AdamW, AutoModelForSequenceClassification, get_scheduler
+
++ accelerator = Accelerator()
+
+ model = AutoModelForSequenceClassification.from_pretrained(checkpoint, num_labels=2)
+ optimizer = AdamW(model.parameters(), lr=3e-5)
+
+- device = torch.device("cuda") if torch.cuda.is_available() else torch.device("cpu")
+- model.to(device)
+
++ train_dataloader, eval_dataloader, model, optimizer = accelerator.prepare(
++ train_dataloader, eval_dataloader, model, optimizer
++ )
+
+ num_epochs = 3
+ num_training_steps = num_epochs * len(train_dataloader)
+ lr_scheduler = get_scheduler(
+ "linear",
+ optimizer=optimizer,
+ num_warmup_steps=0,
+ num_training_steps=num_training_steps
+ )
+
+ progress_bar = tqdm(range(num_training_steps))
+
+ model.train()
+ for epoch in range(num_epochs):
+ for batch in train_dataloader:
+- batch = {k: v.to(device) for k, v in batch.items()}
+ outputs = model(**batch)
+ loss = outputs.loss
+- loss.backward()
++ accelerator.backward(loss)
+optimizer.step()
+ lr_scheduler.step()
+ optimizer.zero_grad()
+ progress_bar.update(1)
+```
+
+## تدريب
+
+بمجرد إضافة أسطر الكود ذات الصلة، قم بتشغيل التدريب الخاص بك في أحد النصوص أو الدفاتر مثل Colaboratory.
+
+### التدريب باستخدام نص برمجي
+
+إذا كنت تشغل التدريب الخاص بك من نص برمجي، فقم بتشغيل الأمر التالي لإنشاء وحفظ ملف تكوين:
+
+```bash
+accelerate config
+```
+
+ثم قم بتشغيل التدريب الخاص بك باستخدام:
+
+```bash
+accelerate launch train.py
+```
+
+### التدريب باستخدام دفتر ملاحظات
+
+يمكن أيضًا تشغيل 🤗 Accelerate في دفاتر إذا كنت تخطط لاستخدام وحدات معالجة الرسوميات (TPUs) في Colaboratory. قم بتغليف كل الكود المسؤول عن التدريب في دالة، ومررها إلى [`~accelerate.notebook_launcher`]:
+
+```py
+>>> from accelerate import notebook_launcher
+
+>>> notebook_launcher(training_function)
+```
+
+للحصول على مزيد من المعلومات حول 🤗 Accelerate وميزاته الغنية، يرجى الرجوع إلى [الوثائق](https://huggingface.co/docs/accelerate).
\ No newline at end of file
diff --git a/docs/source/ar/agents.md b/docs/source/ar/agents.md
new file mode 100644
index 000000000000..92b2a4715f6f
--- /dev/null
+++ b/docs/source/ar/agents.md
@@ -0,0 +1,539 @@
+# الوكلاء والأدوات
+
+[[open-in-colab]]
+
+### ما هو الوكيل؟
+
+يمكن للنظم اللغوية الكبيرة (LLMs) التي تم تدريبها على أداء [نمذجة اللغة السببية](./tasks/language_modeling.) التعامل مع مجموعة واسعة من المهام، ولكنها غالبًا ما تواجه صعوبات في المهام الأساسية مثل المنطق والحساب والبحث. وعندما يتم استدعاؤها في مجالات لا تؤدي فيها أداءً جيدًا، فإنها غالبًا ما تفشل في توليد الإجابة التي نتوقعها منها.
+
+يتمثل أحد النهج للتغلب على هذا القصور في إنشاء "وكيل".
+
+الوكيل هو نظام يستخدم LLM كمحرك له، ولديه حق الوصول إلى وظائف تسمى "أدوات".
+
+هذه "الأدوات" هي وظائف لأداء مهمة، وتحتوي على جميع الأوصاف اللازمة للوكيل لاستخدامها بشكل صحيح.
+
+يمكن برمجة الوكيل للقيام بما يلي:
+- وضع سلسلة من الإجراءات/الأدوات وتشغيلها جميعًا في نفس الوقت مثل [`CodeAgent`] على سبيل المثال
+- التخطيط للاجراءات/الأدوات وتنفيذها واحدة تلو الأخرى والانتظار حتى انتهاء كل إجراء قبل إطلاق التالي مثل [`ReactJsonAgent`] على سبيل المثال
+
+### أنواع الوكلاء
+
+#### الوكيل البرمجي (Code agent)
+
+يتمتع هذا الوكيل يتبع خطوات محددة: أولًا، يخطط لسلسلة من الإجراءات التي يريد تنفيذها، ثم شفرة Python لتنفيذ جميع الإجراءات في نفس الوقت. وهو يتعامل بشكل أصلي مع أنواع مختلفة من المدخلات والمخرجات للأدوات التي يستخدمها، وبالتالي فهو الخيار الموصى به للمهام متعددة الوسائط.
+
+#### وكلاء التفاعل
+
+هذا هو الوكيل الذي يتم اللجوء إليه لحل مهام الاستدلال، حيث يجعل إطار ReAct ([Yao et al.، 2022](https://huggingface.co/papers/2210.03629)) من الكفاءة حقًا التفكير على أساس ملاحظاته السابقة.
+
+نقوم بتنفيذ إصدارين من ReactJsonAgent:
+- [`ReactJsonAgent`] يقوم بتوليد استدعاءات الأدوات كـ JSON في إخراجها.
+- [`ReactCodeAgent`] هو نوع جديد من ReactJsonAgent يقوم بتوليد استدعاءات أدواته كمقاطع من التعليمات البرمجية، والتي تعمل بشكل جيد حقًا مع LLMs التي تتمتع بأداء قوي في البرمجة.
+
+> [!TIP]
+> اقرأ منشور المدونة [Open-source LLMs as LangChain Agents](https://huggingface.co/blog/open-source-llms-as-agents) لمعرفة المزيد عن وكيل ReAct.
+
+
+
+على سبيل المثال، إليك كيف يعمل وكيل ReAct Code طريقه من خلال السؤال التالي.
+
+```py3
+>>> agent.run(
+... "How many more blocks (also denoted as layers) in BERT base encoder than the encoder from the architecture proposed in Attention is All You Need?",
+... )
+=====New task=====
+How many more blocks (also denoted as layers) in BERT base encoder than the encoder from the architecture proposed in Attention is All You Need?
+====Agent is executing the code below:
+bert_blocks = search(query="number of blocks in BERT base encoder")
+print("BERT blocks:", bert_blocks)
+====
+Print outputs:
+BERT blocks: twelve encoder blocks
+
+====Agent is executing the code below:
+attention_layer = search(query="number of layers in Attention is All You Need")
+print("Attention layers:", attention_layer)
+====
+Print outputs:
+Attention layers: Encoder: The encoder is composed of a stack of N = 6 identical layers. Each layer has two sub-layers. The first is a multi-head self-attention mechanism, and the second is a simple, position- 2 Page 3 Figure 1: The Transformer - model architecture.
+
+====Agent is executing the code below:
+bert_blocks = 12
+attention_layers = 6
+diff = bert_blocks - attention_layers
+print("Difference in blocks:", diff)
+final_answer(diff)
+====
+
+Print outputs:
+Difference in blocks: 6
+
+Final answer: 6
+```
+
+### كيف يمكنني بناء وكيل؟
+
+لتهيئة وكيل، تحتاج إلى هذه الوسائط:
+
+- نموذج لغوي كبير (LLM) يشكل المحرك الأساسي للوكيل. الوكيل نفسه ليس النموذج اللغوي، بل هو برنامج يستخدم النموذج اللغوي كمحرك له.
+- موجه النظام (system prompt): هذه هي التعليمات التي يتم إعطاؤها للنموذج اللغوي لإنشاء مخرجاته.
+- صندوق أدوات (toolbox) يختار الوكيل منه الأدوات لتنفيذها
+- محلل (parser) لاستخراج الأدوات التي يجب استدعاؤها من مخرجات النموذج اللغوي LLM والأدوات التي يجب استخدامها
+
+عند تهيئة نظام الوكيل، يتم استخدام سمات الأداة لإنشاء وصف للأداة، ثم يتم دمجها في موجه النظام الخاص `system_prompt` للوكيل لإعلامه بالأدوات التي يمكنه استخدامها ولماذا.
+
+للبدء، يرجى تثبيت `agents` الإضافية لتثبيت جميع التبعيات الافتراضية.
+
+```bash
+pip install transformers[agents]
+```
+
+قم ببناء محرك LLM الخاص بك من خلال تعريف طريقة `llm_engine` التي تقبل قائمة من [الرسائل](./chat_templating.) وتعيد النص. يجب أن تقبل هذه الدالة القابلة للاستدعاء أيضًا معامل `stop` يشير إلى متى يجب التوقف عن التوليد.
+
+```python
+from huggingface_hub import login, InferenceClient
+
+login("")
+
+client = InferenceClient(model="meta-llama/Meta-Llama-3-70B-Instruct")
+
+def llm_engine(messages, stop_sequences=["Task"]) -> str:
+ response = client.chat_completion(messages, stop=stop_sequences, max_tokens=1000)
+ answer = response.choices[0].message.content
+ return answer
+```
+
+يمكنك استخدام أي طريقة `llm_engine` طالما أنها:
+1. يتبع تنسيق [رسائل](./chat_templating.md) لإدخاله (`List [Dict [str، str]]`) ويعيد `str`
+2. يتوقف عن توليد المخراجات من التسلسلات التي تم تمريرها في معامل `stop`
+
+أنت بحاجة أيضًا إلى معامل "الأدوات" الذي يقبل قائمة من "الأدوات". يمكنك توفير قائمة فارغة لـ "الأدوات"، ولكن استخدم صندوق الأدوات الافتراضي مع معامل اختياري `add_base_tools=True`.
+
+الآن يمكنك إنشاء وكيل، مثل [`CodeAgent`], وتشغيله. ولتسهيل الأمر، نقدم أيضًا فئة [`HfEngine`] التي تستخدم `huggingface_hub.InferenceClient` بشكل مخفى.
+
+```python
+from transformers import CodeAgent, HfEngine
+
+llm_engine = HfEngine(model="meta-llama/Meta-Llama-3-70B-Instruct")
+agent = CodeAgent(tools=[], llm_engine=llm_engine, add_base_tools=True)
+
+agent.run(
+ "Could you translate this sentence from French, say it out loud and return the audio.",
+ sentence="Où est la boulangerie la plus proche?",
+)
+```
+
+هذه الميزة ستكون مفيدة في حالة الحاجة الملحة! يمكنك حتى ترك معامل `llm_engine` غير محدد، وسيتم إنشاء [`HfEngine`] بشكل تلقائي.
+
+```python
+from transformers import CodeAgent
+
+agent = CodeAgent(tools=[], add_base_tools=True)
+
+agent.run(
+ "Could you translate this sentence from French, say it out loud and give me the audio.",
+ sentence="Où est la boulangerie la plus proche?",
+)
+```
+
+لاحظ أننا استخدمنا معامل "sentence" إضافي: يمكنك تمرير النص كمعامل إضافي إلى النموذج.
+
+يمكنك أيضًا استخدام هذا للإشارة إلى مسار الملفات المحلية أو البعيدة للنموذج لاستخدامها:
+
+```py
+from transformers import ReactCodeAgent
+
+agent = ReactCodeAgent(tools=[], llm_engine=llm_engine, add_base_tools=True)
+
+agent.run("Why does Mike not know many people in New York?", audio="https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/transformers/recording.mp3")
+```
+
+
+تم تحديد موجه النظام ومحلل المخرجات تلقائيًا، ولكن يمكنك فحصهما بسهولة عن طريق استدعاء `system_prompt_template` على وكيلك.
+
+```python
+print(agent.system_prompt_template)
+```
+
+من المهم أن تشرح بأكبر قدر ممكن من الوضوح المهمة التي تريد تنفيذها.
+كل عملية [`~Agent.run`] مستقلة، وبما أن الوكيل مدعوم من LLM، فقد تؤدي الاختلافات الطفيفة في موجهك إلى نتائج مختلفة تمامًا.
+يمكنك أيضًا تشغيل وكيل بشكل متتالي لمهام مختلفة: في كل مرة يتم فيها إعادة تهيئة سمتي `agent.task` و`agent.logs`.
+
+
+#### تنفيذ التعليمات البرمجية
+
+يقوم مفسر Python بتنفيذ التعليمات البرمجية على مجموعة من المدخلات التي يتم تمريرها جنبًا إلى جنب مع أدواتك.
+يجب أن يكون هذا الأمر آمنًا لأن الوظائف الوحيدة التي يمكن استدعاؤها هي الأدوات التي قدمتها (خاصة إذا كانت أدوات من Hugging Face فقط) ووظيفة الطباعة، لذا فأنت مقيد بالفعل بما يمكن تنفيذه.
+
+مفسر Python لا يسمح أيضًا باستدعاء دوال بشكل افتراضي خارج قائمة آمنة، لذا فإن جميع الهجمات الأكثر وضوحًا لا ينبغي أن تكون مشكلة.
+يمكنك أيضًا الإذن باستيرادات إضافية عن طريق تمرير الوحدات النمطية المصرح بها كقائمة من السلاسل في معامل `additional_authorized_imports` عند تهيئة [`ReactCodeAgent`] أو [`CodeAgent`]:
+
+```py
+>>> from transformers import ReactCodeAgent
+
+>>> agent = ReactCodeAgent(tools=[], additional_authorized_imports=['requests', 'bs4'])
+>>> agent.run("Could you get me the title of the page at url 'https://huggingface.co/blog'?")
+
+(...)
+'Hugging Face – Blog'
+```
+
+سيتم إيقاف التنفيذ عند أي رمز يحاول تنفيذ عملية غير قانونية أو إذا كان هناك خطأ Python عادي في التعليمات البرمجية التي تم إنشاؤها بواسطة الوكيل.
+
+> [!WARNING]
+> يمكن لـ LLM توليد شفرة برمجية عشوائية سيتم تنفيذها بعد ذلك: لا تقمب استدعاء أى دوال غير آمنة!
+
+### موجه النظام
+
+ينشئ الوكيل، أو بالأحرى LLM الذي يقود الوكيل، يولد مخرجات بناءً على موجه النظام. يمكن تخصيص موجه النظام وتصميمه للمهام المقصودة. على سبيل المثال، تحقق من موجه النظام لـ [`ReactCodeAgent`] (الإصدار أدناه مبسط قليلاً).
+
+```text
+You will be given a task to solve as best you can.
+You have access to the following tools:
+<>
+
+To solve the task, you must plan forward to proceed in a series of steps, in a cycle of 'Thought:', 'Code:', and 'Observation:' sequences.
+
+At each step, in the 'Thought:' sequence, you should first explain your reasoning towards solving the task, then the tools that you want to use.
+Then in the 'Code:' sequence, you shold write the code in simple Python. The code sequence must end with '/End code' sequence.
+During each intermediate step, you can use 'print()' to save whatever important information you will then need.
+These print outputs will then be available in the 'Observation:' field, for using this information as input for the next step.
+
+In the end you have to return a final answer using the `final_answer` tool.
+
+Here are a few examples using notional tools:
+---
+{examples}
+
+Above example were using notional tools that might not exist for you. You only have acces to those tools:
+<>
+You also can perform computations in the python code you generate.
+
+Always provide a 'Thought:' and a 'Code:\n```py' sequence ending with '```' sequence. You MUST provide at least the 'Code:' sequence to move forward.
+
+Remember to not perform too many operations in a single code block! You should split the task into intermediate code blocks.
+Print results at the end of each step to save the intermediate results. Then use final_answer() to return the final result.
+
+Remember to make sure that variables you use are all defined.
+
+Now Begin!
+```
+
+يتضمن موجه النظام:
+- *مقدمة* تشرح كيف يجب أن يتصرف الوكيل والأدوات التي يجب عليه استخدامها.
+- وصف لجميع الأدوات التي يتم تحديدها بواسطة رمز `<>` الذي يتم استبداله ديناميكيًا في وقت التشغيل بالأدوات التي يحددها المستخدم أو يختارها.
+ - يأتي وصف الأداة من سمات الأداة، `name`، و`description`، و`inputs` و`output_type`، وقالب `jinja2` بسيط يمكنك تحسينه.
+- شكل المخرج المتوقع.
+
+يمكنك تحسين موجه النظام، على سبيل المثال، عن طريق إضافة شرح لتنسيق المخرجات.
+
+للحصول على أقصى قدر من المرونة، يمكنك الكتابة فوق قالب موجه النظام بالكامل عن طريق تمرير موجه مخصص كمعامل إلى معلمة `system_prompt`.
+
+```python
+from transformers import ReactJsonAgent
+from transformers.agents import PythonInterpreterTool
+
+agent = ReactJsonAgent(tools=[PythonInterpreterTool()], system_prompt="{your_custom_prompt}")
+```
+
+> [!WARNING]
+> يرجى التأكد من تحديد سلسلة `<>` في مكان ما في `template` حتى يكون الوكيل على علم
+بالأدوات المتاحة.
+
+
+### فحص تشغيل الوكيل
+
+فيما يلي بعض السمات المفيدة لفحص ما حدث بعد التشغيل:
+- تخزن `agent.logs` سجلات مفصلة للوكيل. في كل خطوة من تشغيل الوكيل، يتم تخزين كل شيء في قاموس إلحاقه بـ `agent.logs`.
+- تشغيل `agent.write_inner_memory_from_logs()` يخلق ذاكرة داخلية لسجلات الوكيل للنظام LLM لعرضها، كقائمة من رسائل الدردشة. تنتقل هذه الطريقة عبر كل خطوة من سجل الوكيل ولا تخزن سوى ما يهمها كرسالة: على سبيل المثال، سيحفظ موجه النظام والمهمة في رسائل منفصلة، ثم لكل خطوة سيخزن مخرج LLM كرسالة، ومخرج استدعاء الأداة كرسالة أخرى. استخدم هذا إذا كنت تريد عرضًا عامًا لما حدث - ولكن لن يتم نسخ كل سجل بواسطة هذه الطريقة.
+
+## الأدوات
+
+الأداة هي عبارة عن وظيفة أساسية يستخدمها الوكيل لتنفيذ مهمة محددة.
+
+يمكنك على سبيل المثال التحقق من [`PythonInterpreterTool`]: لديه اسم ووصف ووصف للمدخلات ونوع للمخرج، وطريقة `__call__` التي تقوم بتنفيذ المهمة المطلوبة.
+
+عند تهيئة الوكيل، يتم استخدام سمات الأداة لتوليد وصف للأداة يتم تضمينه في موجه النظام الخاص بالوكيل. يتيح هذا للوكيل معرفة الأدوات التي يمكنه استخدامها ولماذا.
+
+### صندوق الأدوات الافتراضي
+
+يأتي Transformers مع صندوق أدوات افتراضي لتمكين الوكلاء، والذي يمكنك إضافته إلى وكيلك عند التهيئة باستخدام معامل `add_base_tools = True`:
+
+- **الإجابة على أسئلة المستند**: الإجابة على سؤال حول المستند (مثل ملف PDF) بتنسيق صورة ([Donut](./model_doc/donut))
+- **الإجابة على أسئلة الصور**: الإجابة على سؤال حول صورة ([VILT](./model_doc/vilt))
+- **التحدث إلى النص**: قم بتفريغ الكلام إلى نص ([Whisper](./model_doc/whisper))
+- **النص إلى كلام**: تحويل النص إلى كلام ([SpeechT5](./model_doc/speecht5))
+- **الترجمة**: ترجمة جملة معينة من لغة المصدر إلى لغة الهدف.
+- **مفسر كود Python**: تشغيل كود Python الذي تم إنشاؤه بواسطة LLM في بيئة آمنة. لن يتم إضافة هذه الأداة إلى [`ReactJsonAgent`] إلا إذا استخدمت `add_base_tools=True`، نظرًا لأن الأدوات المستندة إلى التعليمات البرمجية يمكنها بالفعل تنفيذ كود Python
+لا تترجم النصوص الخاصة ولا الأكواد البرمجية ولا الروابط ولا رموز HTML وCSS:
+
+يمكنك استخدام أداة يدويًا عن طريق استدعاء دالة [`load_tool`] وتحديد مهمة لتنفيذها.
+
+```python
+from transformers import load_tool
+
+tool = load_tool("text-to-speech")
+audio = tool("This is a text to speech tool")
+```
+
+### إنشاء أداة جديدة
+
+يمكنك إنشاء أداتك الخاصة لتغطية حالات الاستخدام التي لا تغطيها الأدوات الافتراضية من Hugging Face.
+على سبيل المثال، دعنا نقوم بإنشاء أداة تعرض النموذج الأكثر تنزيلًا لمهمة معينة من Hub.
+
+سوف نبدأ بالكود التالي.
+
+```python
+from huggingface_hub import list_models
+
+task = "text-classification"
+
+model = next(iter(list_models(filter=task, sort="downloads", direction=-1)))
+print(model.id)
+```
+
+يمكن تحويل هذه الشيفرة إلى فئة ترث من الفئة العليا [`Tool`].
+
+تحتاج الأداة المخصصة إلى:
+
+- اسم `name`، والتي تمثل اسم الأداة نفسها. عادةً ما يصف الاسم وظيفتها. بما أن الكود يعيد النموذج الأكثر تنزيلًا لمهمة ما، فلنسمها `model_download_counter`.
+- تستخدم خاصية `description` لملء موجه نظام الوكيل.
+- خاصية `inputs`، والتي هي عبارة عن قاموس بمفاتيح "type" و"description". يحتوي على معلومات تساعد المفسر Python على اتخاذ خيارات مستنيرة بشأن المدخلات.
+- خاصية `output_type`، والتي تحدد نوع المخرج.
+- طريقة `forward` والتي تحتوي على الكود الذي سيتم تنفيذه للحصول على النتيجة النهائية.
+
+```python
+from transformers import Tool
+from huggingface_hub import list_models
+
+class HFModelDownloadsTool(Tool):
+ name = "model_download_counter"
+ description = (
+ "This is a tool that returns the most downloaded model of a given task on the Hugging Face Hub. "
+ "It returns the name of the checkpoint."
+ )
+
+ inputs = {
+ "task": {
+ "type": "text",
+ "description": "the task category (such as text-classification, depth-estimation, etc)",
+ }
+ }
+ output_type = "text"
+
+ def forward(self, task: str):
+ model = next(iter(list_models(filter=task, sort="downloads", direction=-1)))
+ return model.id
+```
+
+الآن بعد أن أصبحت فئة `HfModelDownloadsTool` المخصصة جاهزة، يمكنك حفظها في ملف باسم `model_downloads.py` واستيرادها للاستخدام.
+
+```python
+from model_downloads import HFModelDownloadsTool
+
+tool = HFModelDownloadsTool()
+```
+
+يمكنك أيضًا مشاركة أداتك المخصصة في Hub عن طريق استدعاء [`~Tool.push_to_hub`] على الأداة. تأكد من أنك قمت بإنشاء مستودع لها على Hub وأنك تستخدم رمز وصول للقراءة.
+
+```python
+tool.push_to_hub("{your_username}/hf-model-downloads")
+```
+
+قم بتحميل الأداة باستخدام دالة [`~Tool.load_tool`] ومررها إلى معلمة `tools` في الوكيل الخاص بك.
+
+```python
+from transformers import load_tool, CodeAgent
+
+model_download_tool = load_tool("m-ric/hf-model-downloads")
+agent = CodeAgent(tools=[model_download_tool], llm_engine=llm_engine)
+agent.run(
+ "Can you give me the name of the model that has the most downloads in the 'text-to-video' task on the Hugging Face Hub?"
+)
+```
+
+ستحصل على ما يلي:
+
+```text
+======== New task ========
+Can you give me the name of the model that has the most downloads in the 'text-to-video' task on the Hugging Face Hub?
+==== Agent is executing the code below:
+most_downloaded_model = model_download_counter(task="text-to-video")
+print(f"The most downloaded model for the 'text-to-video' task is {most_downloaded_model}.")
+====
+```
+
+والناتج:
+
+`"النموذج الأكثر تنزيلًا لمهمة `text-to-video` هو ByteDance/AnimateDiff-Lightning."`
+
+### إدارة صندوق أدوات الوكيل الخاص بك
+
+إذا كنت قد قمت بتهيئة وكيل، فمن غير الملائم إعادة تهيئته من البداية لإضافة أداة جديدة ترغب في استخدامها. باستخدام مكتبة Transformers، يمكنك إدارة صندوق أدوات الوكيل بإضافة أو استبدال أداة موجودة.
+
+دعنا نضيف الأداة `model_download_tool` إلى وكيل تم تهيئته مسبقًا باستخدام صندوق الأدوات الافتراضي.
+
+```python
+from transformers import CodeAgent
+
+agent = CodeAgent(tools=[], llm_engine=llm_engine, add_base_tools=True)
+agent.toolbox.add_tool(model_download_tool)
+```
+
+الآن يمكننا الاستفادة من الأداة الجديدة وأداة تحويل النص إلى كلام السابقة:
+
+```python
+ agent.run(
+ "Can you read out loud the name of the model that has the most downloads in the 'text-to-video' task on the Hugging Face Hub and return the audio?"
+ )
+```
+
+| **Audio** |
+|------------------------------------------------------------------------------------------------------------------------------------------------------|
+|