jameshennessytempus
diff --git a/‎.circleci/config.yml‎
Lines changed: 17 additions & 0 deletions b/‎.circleci/config.yml‎
Lines changed: 17 additions & 0 deletions
diff --git a/‎.circleci/create_circleci_config.py‎
Lines changed: 78 additions & 17 deletions b/‎.circleci/create_circleci_config.py‎
Lines changed: 78 additions & 17 deletions
diff --git a/‎.github/ISSUE_TEMPLATE/bug-report.yml‎
Lines changed: 35 additions & 39 deletions b/‎.github/ISSUE_TEMPLATE/bug-report.yml‎
Lines changed: 35 additions & 39 deletions
@@ -9,6 +9,19 @@ parameters:
         default: false
 
 jobs:
+    # Ensure running with CircleCI/huggingface
+    check_circleci_user:
+        docker:
+            - image: cimg/python:3.7.12
+        parallelism: 1
+        steps:
+            - run: echo $CIRCLE_PROJECT_USERNAME
+            - run: |
+                if [ "$CIRCLE_PROJECT_USERNAME" = "huggingface" ]; then
+                    exit 0
+                else
+                    echo "The CI is running under $CIRCLE_PROJECT_USERNAME personal account. Please follow https://support.circleci.com/hc/en-us/articles/360008097173-Troubleshooting-why-pull-requests-are-not-triggering-jobs-on-my-organization- to fix it."; exit -1
+                fi
     # Fetch the tests to run
     fetch_tests:
         working_directory: ~/transformers
@@ -161,23 +174,27 @@ jobs:
             - run: python utils/check_repo.py
             - run: python utils/check_inits.py
             - run: python utils/check_config_docstrings.py
+            - run: python utils/check_doctest_list.py
             - run: make deps_table_check_updated
             - run: python utils/tests_fetcher.py --sanity_check
             - run: python utils/update_metadata.py --check-only
+            - run: python utils/check_task_guides.py
 
 workflows:
     version: 2
     setup_and_quality:
         when:
             not: <<pipeline.parameters.nightly>>
         jobs:
+            - check_circleci_user
             - check_code_quality
             - check_repository_consistency
             - fetch_tests
 
     nightly:
         when: <<pipeline.parameters.nightly>>
         jobs:
+            - check_circleci_user
             - check_code_quality
             - check_repository_consistency
             - fetch_all_tests
@@ -15,7 +15,9 @@
 
 import argparse
 import copy
+import glob
 import os
+import random
 from dataclasses import dataclass
 from typing import Any, Dict, List, Optional
 
@@ -25,7 +27,6 @@
 COMMON_ENV_VARIABLES = {"OMP_NUM_THREADS": 1, "TRANSFORMERS_IS_CI": True, "PYTEST_TIMEOUT": 120}
 COMMON_PYTEST_OPTIONS = {"max-worker-restart": 0, "dist": "loadfile", "s": None}
 DEFAULT_DOCKER_IMAGE = [{"image": "cimg/python:3.7.12"}]
-TORCH_SCATTER_INSTALL = "pip install torch-scatter -f https://pytorch-geometric.com/whl/torch-1.12.0+cpu.html"
 
 
 @dataclass
@@ -59,6 +60,8 @@ def __post_init__(self):
             self.pytest_options = {}
         if isinstance(self.tests_to_run, str):
             self.tests_to_run = [self.tests_to_run]
+        if self.parallelism is None:
+            self.parallelism = 1
 
     def to_dict(self):
         job = {
@@ -100,10 +103,57 @@ def to_dict(self):
             f"--make-reports={self.name}" if "examples" in self.name else f"--make-reports=tests_{self.name}"
         )
         test_command = f"python -m pytest -n {self.pytest_num_workers} " + " ".join(pytest_flags)
-        if self.tests_to_run is None:
-            test_command += " << pipeline.parameters.tests_to_run >>"
+        if self.parallelism == 1:
+            if self.tests_to_run is None:
+                test_command += " << pipeline.parameters.tests_to_run >>"
+            else:
+                test_command += " " + " ".join(self.tests_to_run)
         else:
-            test_command += " " + " ".join(self.tests_to_run)
+            # We need explicit list instead of `pipeline.parameters.tests_to_run` (only available at job runtime)
+            tests = self.tests_to_run
+            if tests is None:
+                folder = os.environ["test_preparation_dir"]
+                test_file = os.path.join(folder, "filtered_test_list.txt")
+                if os.path.exists(test_file):
+                    with open(test_file) as f:
+                        tests = f.read().split(" ")
+
+            # expand the test list
+            if tests == ["tests"]:
+                tests = [os.path.join("tests", x) for x in os.listdir("tests")]
+            expanded_tests = []
+            for test in tests:
+                if test.endswith(".py"):
+                    expanded_tests.append(test)
+                elif test == "tests/models":
+                    expanded_tests.extend([os.path.join(test, x) for x in os.listdir(test)])
+                elif test == "tests/pipelines":
+                    expanded_tests.extend([os.path.join(test, x) for x in os.listdir(test)])
+                else:
+                    expanded_tests.append(test)
+            # Avoid long tests always being collected together
+            random.shuffle(expanded_tests)
+            tests = " ".join(expanded_tests)
+
+            # Each executor to run ~10 tests
+            n_executors = max(len(tests) // 10, 1)
+            # Avoid empty test list on some executor(s) or launching too many executors
+            if n_executors > self.parallelism:
+                n_executors = self.parallelism
+            job["parallelism"] = n_executors
+
+            # Need to be newline separated for the command `circleci tests split` below
+            command = f'echo {tests} | tr " " "\\n" >> tests.txt'
+            steps.append({"run": {"name": "Get tests", "command": command}})
+
+            command = 'TESTS=$(circleci tests split tests.txt) && echo $TESTS > splitted_tests.txt'
+            steps.append({"run": {"name": "Split tests", "command": command}})
+
+            steps.append({"store_artifacts": {"path": "~/transformers/tests.txt"}})
+            steps.append({"store_artifacts": {"path": "~/transformers/splitted_tests.txt"}})
+
+            test_command = f"python -m pytest -n {self.pytest_num_workers} " + " ".join(pytest_flags)
+            test_command += " $(cat splitted_tests.txt)"
         if self.marker is not None:
             test_command += f" -m {self.marker}"
         test_command += " | tee tests_output.txt"
@@ -127,7 +177,6 @@ def job_name(self):
         "git lfs install",
         "pip install --upgrade pip",
         "pip install .[sklearn,tf-cpu,torch,testing,sentencepiece,torch-speech,vision]",
-        TORCH_SCATTER_INSTALL,
         "pip install tensorflow_probability",
         "pip install git+https://github.com/huggingface/accelerate",
     ],
@@ -143,7 +192,6 @@ def job_name(self):
         "sudo apt-get -y update && sudo apt-get install -y libsndfile1-dev espeak-ng",
         "pip install --upgrade pip",
         "pip install .[sklearn,flax,torch,testing,sentencepiece,torch-speech,vision]",
-        TORCH_SCATTER_INSTALL,
         "pip install git+https://github.com/huggingface/accelerate",
     ],
     marker="is_pt_flax_cross_test",
@@ -157,9 +205,9 @@ def job_name(self):
         "sudo apt-get -y update && sudo apt-get install -y libsndfile1-dev espeak-ng time",
         "pip install --upgrade pip",
         "pip install .[sklearn,torch,testing,sentencepiece,torch-speech,vision,timm]",
-        TORCH_SCATTER_INSTALL,
         "pip install git+https://github.com/huggingface/accelerate",
     ],
+    parallelism=1,
     pytest_num_workers=3,
 )
 
@@ -172,6 +220,7 @@ def job_name(self):
         "pip install .[sklearn,tf-cpu,testing,sentencepiece,tf-speech,vision]",
         "pip install tensorflow_probability",
     ],
+    parallelism=1,
     pytest_options={"rA": None},
 )
 
@@ -183,6 +232,7 @@ def job_name(self):
         "pip install --upgrade pip",
         "pip install .[flax,testing,sentencepiece,flax-speech,vision]",
     ],
+    parallelism=1,
     pytest_options={"rA": None},
 )
 
@@ -192,8 +242,7 @@ def job_name(self):
     install_steps=[
         "sudo apt-get -y update && sudo apt-get install -y libsndfile1-dev espeak-ng",
         "pip install --upgrade pip",
-        "pip install .[sklearn,torch,testing,sentencepiece,torch-speech,vision,timm]",
-        TORCH_SCATTER_INSTALL,
+        "pip install .[sklearn,torch,testing,sentencepiece,torch-speech,vision,timm,video]",
     ],
     pytest_options={"rA": None},
     tests_to_run="tests/pipelines/"
@@ -204,7 +253,7 @@ def job_name(self):
     "pipelines_tf",
     install_steps=[
         "pip install --upgrade pip",
-        "pip install .[sklearn,tf-cpu,testing,sentencepiece]",
+        "pip install .[sklearn,tf-cpu,testing,sentencepiece,vision]",
         "pip install tensorflow_probability",
     ],
     pytest_options={"rA": None},
@@ -303,18 +352,24 @@ def job_name(self):
 )
 
 
-layoutlm_job = CircleCIJob(
-    "layoutlmv2_and_v3",
+exotic_models_job = CircleCIJob(
+    "exotic_models",
     install_steps=[
         "sudo apt-get -y update && sudo apt-get install -y libsndfile1-dev",
         "pip install --upgrade pip",
         "pip install .[torch,testing,vision]",
         "pip install torchvision",
+        "pip install scipy",
         "pip install 'git+https://github.com/facebookresearch/detectron2.git'",
         "sudo apt install tesseract-ocr",
         "pip install pytesseract",
+        "pip install natten",
+    ],
+    tests_to_run=[
+        "tests/models/*layoutlmv*",
+        "tests/models/*nat",
+        "tests/models/deta",
     ],
-    tests_to_run="tests/models/*layoutlmv*",
     pytest_num_workers=1,
     pytest_options={"durations": 100},
 )
@@ -324,7 +379,7 @@ def job_name(self):
     "repo_utils",
     install_steps=[
         "pip install --upgrade pip",
-        "pip install .[all,quality,testing]",
+        "pip install .[quality,testing]",
     ],
     parallelism=None,
     pytest_num_workers=1,
@@ -341,7 +396,7 @@ def job_name(self):
     custom_tokenizers_job,
     hub_job,
     onnx_job,
-    layoutlm_job,
+    exotic_models_job,
 ]
 EXAMPLES_TESTS = [
     examples_torch_job,
@@ -357,6 +412,8 @@ def job_name(self):
 def create_circleci_config(folder=None):
     if folder is None:
         folder = os.getcwd()
+    # Used in CircleCIJob.to_dict() to expand the test list (for using parallelism)
+    os.environ["test_preparation_dir"] = folder
     jobs = []
     all_test_file = os.path.join(folder, "test_list.txt")
     if os.path.exists(all_test_file):
@@ -379,14 +436,18 @@ def create_circleci_config(folder=None):
     example_file = os.path.join(folder, "examples_test_list.txt")
     if os.path.exists(example_file) and os.path.getsize(example_file) > 0:
         jobs.extend(EXAMPLES_TESTS)
-    
+
     repo_util_file = os.path.join(folder, "test_repo_utils.txt")
     if os.path.exists(repo_util_file) and os.path.getsize(repo_util_file) > 0:
         jobs.extend(REPO_UTIL_TESTS)
 
     if len(jobs) > 0:
         config = {"version": "2.1"}
-        config["parameters"] = {"tests_to_run": {"type": "string", "default": test_list}}
+        config["parameters"] = {
+            # Only used to accept the parameters from the trigger
+            "nightly": {"type": "boolean", "default": False},
+            "tests_to_run": {"type": "string", "default": test_list},
+        }
         config["jobs"] = {j.job_name: j.to_dict() for j in jobs}
         config["workflows"] = {"version": 2, "run_tests": {"jobs": [j.job_name for j in jobs]}}
         with open(os.path.join(folder, "generated_config.yml"), "w") as f:
 
@@ -17,58 +17,54 @@ body:
       description: |
         Your issue will be replied to more quickly if you can figure out the right person to tag with @
         If you know how to use git blame, that is the easiest way, otherwise, here is a rough guide of **who to tag**.
+        
+        All issues are read by one of the core maintainers, so if you don't know who to tag, just leave this blank and
+        a core maintainer will ping the right person.
+        
         Please tag fewer than 3 people.
         
         Models:
 
-          - ALBERT, BERT, XLM, DeBERTa, DeBERTa-v2, ELECTRA, MobileBert, SqueezeBert: `@LysandreJik`
-          - T5, Pegasus, EncoderDecoder: `@patrickvonplaten`
-          - Blenderbot, MBART, BART, Marian, Pegasus: `@patil-suraj`
-          - Reformer, TransfoXL, XLNet, FNet: `@patrickvonplaten`
-          - Longformer, BigBird: `@ydshieh`
-          - FSMT: `@stas00`
-          - Funnel: `@sgugger`
-          - GPT-2, GPT: `@patil-suraj`, `@patrickvonplaten`, `@LysandreJik`
-          - RAG, DPR: `@patrickvonplaten`, `@lhoestq`
-          - TensorFlow: `@Rocketknight1`
-          - JAX/Flax: `@patil-suraj`
-          - TAPAS, LayoutLM, LayoutLMv2, LUKE, ViT, BEiT, DEiT, DETR, CANINE: `@NielsRogge`
-          - GPT-Neo, GPT-J, CLIP: `@patil-suraj`
-          - Wav2Vec2, HuBERT, UniSpeech, UniSpeechSAT, SEW, SEW-D: `@patrickvonplaten`, `@anton-l`
-          - SpeechEncoderDecoder, Speech2Text, Speech2Text2: `@sanchit-gandhi`, `@patrickvonplaten`, `@anton-l`
-          
-          If the model isn't in the list, ping `@LysandreJik` who will redirect you to the correct contributor.
-
+          - text models: @ArthurZucker and @younesbelkada
+          - vision models: @amyeroberts and @NielsRogge
+          - speech models: @sanchit-gandhi
+          - graph models: @clefourrier
+        
         Library:
-          - Benchmarks: `@patrickvonplaten`
-          - Deepspeed: `@stas00`
-          - Ray/raytune: `@richardliaw`, `@amogkam`
-          - Text generation: `@patrickvonplaten`, `@Narsil`, `@gante`
-          - Tokenizers: `@SaulLu`
-          - Trainer: `@sgugger`
-          - Pipelines: `@Narsil`
-          - Speech: `@patrickvonplaten`, `@anton-l`, `@sanchit-gandhi`
-          - Vision: `@NielsRogge`, `@sgugger`
-
-        Documentation: `@sgugger`, `@stevhliu`
-
+        
+          - flax: @sanchit-gandhi
+          - generate: @gante
+          - pipelines: @Narsil
+          - tensorflow: @gante and @Rocketknight1
+          - tokenizers: @ArthurZucker
+          - trainer: @sgugger
+        
+        Integrations:
+        
+          - deepspeed: HF Trainer: @stas00, Accelerate: @pacman100
+          - ray/raytune: @richardliaw, @amogkam
+        
+        Documentation: @sgugger, @stevhliu and @MKhalusova
+        
         Model hub:
 
           - for issues with a model, report at https://discuss.huggingface.co/ and tag the model's creator.
-
+        
         HF projects:
-
+        
+          - accelerate: [different repo](https://github.com/huggingface/accelerate)
           - datasets: [different repo](https://github.com/huggingface/datasets)
+          - diffusers: [different repo](https://github.com/huggingface/diffusers)
           - rust tokenizers: [different repo](https://github.com/huggingface/tokenizers)
+        
+        Maintained examples (not research project or legacy):
+        
+          - Flax: @sanchit-gandhi
+          - PyTorch: @sgugger
+          - TensorFlow: @Rocketknight1
 
-        Examples:
-
-          - maintained examples (not research project or legacy): `@sgugger`, `@patil-suraj`
-
-        For research projetcs, please ping the contributor directly. For example, on the following projects:
+        Research projects are not maintained and should be taken as is.
 
-          - research_projects/bert-loses-patience: `@JetRunner`
-          - research_projects/distillation: `@VictorSanh`
       placeholder: "@Username ..."
 
   - type: checkboxes