Skip to content

Commit c6c74b5

Browse files
Merge pull request huggingface#4 from huggingface/main
radio
2 parents 2163461 + 92ce53a commit c6c74b5

File tree

1,187 files changed

+134735
-24639
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

1,187 files changed

+134735
-24639
lines changed

.circleci/config.yml

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,19 @@ parameters:
99
default: false
1010

1111
jobs:
12+
# Ensure running with CircleCI/huggingface
13+
check_circleci_user:
14+
docker:
15+
- image: cimg/python:3.7.12
16+
parallelism: 1
17+
steps:
18+
- run: echo $CIRCLE_PROJECT_USERNAME
19+
- run: |
20+
if [ "$CIRCLE_PROJECT_USERNAME" = "huggingface" ]; then
21+
exit 0
22+
else
23+
echo "The CI is running under $CIRCLE_PROJECT_USERNAME personal account. Please follow https://support.circleci.com/hc/en-us/articles/360008097173-Troubleshooting-why-pull-requests-are-not-triggering-jobs-on-my-organization- to fix it."; exit -1
24+
fi
1225
# Fetch the tests to run
1326
fetch_tests:
1427
working_directory: ~/transformers
@@ -161,23 +174,27 @@ jobs:
161174
- run: python utils/check_repo.py
162175
- run: python utils/check_inits.py
163176
- run: python utils/check_config_docstrings.py
177+
- run: python utils/check_doctest_list.py
164178
- run: make deps_table_check_updated
165179
- run: python utils/tests_fetcher.py --sanity_check
166180
- run: python utils/update_metadata.py --check-only
181+
- run: python utils/check_task_guides.py
167182

168183
workflows:
169184
version: 2
170185
setup_and_quality:
171186
when:
172187
not: <<pipeline.parameters.nightly>>
173188
jobs:
189+
- check_circleci_user
174190
- check_code_quality
175191
- check_repository_consistency
176192
- fetch_tests
177193

178194
nightly:
179195
when: <<pipeline.parameters.nightly>>
180196
jobs:
197+
- check_circleci_user
181198
- check_code_quality
182199
- check_repository_consistency
183200
- fetch_all_tests

.circleci/create_circleci_config.py

Lines changed: 78 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,9 @@
1515

1616
import argparse
1717
import copy
18+
import glob
1819
import os
20+
import random
1921
from dataclasses import dataclass
2022
from typing import Any, Dict, List, Optional
2123

@@ -25,7 +27,6 @@
2527
COMMON_ENV_VARIABLES = {"OMP_NUM_THREADS": 1, "TRANSFORMERS_IS_CI": True, "PYTEST_TIMEOUT": 120}
2628
COMMON_PYTEST_OPTIONS = {"max-worker-restart": 0, "dist": "loadfile", "s": None}
2729
DEFAULT_DOCKER_IMAGE = [{"image": "cimg/python:3.7.12"}]
28-
TORCH_SCATTER_INSTALL = "pip install torch-scatter -f https://pytorch-geometric.com/whl/torch-1.12.0+cpu.html"
2930

3031

3132
@dataclass
@@ -59,6 +60,8 @@ def __post_init__(self):
5960
self.pytest_options = {}
6061
if isinstance(self.tests_to_run, str):
6162
self.tests_to_run = [self.tests_to_run]
63+
if self.parallelism is None:
64+
self.parallelism = 1
6265

6366
def to_dict(self):
6467
job = {
@@ -100,10 +103,57 @@ def to_dict(self):
100103
f"--make-reports={self.name}" if "examples" in self.name else f"--make-reports=tests_{self.name}"
101104
)
102105
test_command = f"python -m pytest -n {self.pytest_num_workers} " + " ".join(pytest_flags)
103-
if self.tests_to_run is None:
104-
test_command += " << pipeline.parameters.tests_to_run >>"
106+
if self.parallelism == 1:
107+
if self.tests_to_run is None:
108+
test_command += " << pipeline.parameters.tests_to_run >>"
109+
else:
110+
test_command += " " + " ".join(self.tests_to_run)
105111
else:
106-
test_command += " " + " ".join(self.tests_to_run)
112+
# We need explicit list instead of `pipeline.parameters.tests_to_run` (only available at job runtime)
113+
tests = self.tests_to_run
114+
if tests is None:
115+
folder = os.environ["test_preparation_dir"]
116+
test_file = os.path.join(folder, "filtered_test_list.txt")
117+
if os.path.exists(test_file):
118+
with open(test_file) as f:
119+
tests = f.read().split(" ")
120+
121+
# expand the test list
122+
if tests == ["tests"]:
123+
tests = [os.path.join("tests", x) for x in os.listdir("tests")]
124+
expanded_tests = []
125+
for test in tests:
126+
if test.endswith(".py"):
127+
expanded_tests.append(test)
128+
elif test == "tests/models":
129+
expanded_tests.extend([os.path.join(test, x) for x in os.listdir(test)])
130+
elif test == "tests/pipelines":
131+
expanded_tests.extend([os.path.join(test, x) for x in os.listdir(test)])
132+
else:
133+
expanded_tests.append(test)
134+
# Avoid long tests always being collected together
135+
random.shuffle(expanded_tests)
136+
tests = " ".join(expanded_tests)
137+
138+
# Each executor to run ~10 tests
139+
n_executors = max(len(tests) // 10, 1)
140+
# Avoid empty test list on some executor(s) or launching too many executors
141+
if n_executors > self.parallelism:
142+
n_executors = self.parallelism
143+
job["parallelism"] = n_executors
144+
145+
# Need to be newline separated for the command `circleci tests split` below
146+
command = f'echo {tests} | tr " " "\\n" >> tests.txt'
147+
steps.append({"run": {"name": "Get tests", "command": command}})
148+
149+
command = 'TESTS=$(circleci tests split tests.txt) && echo $TESTS > splitted_tests.txt'
150+
steps.append({"run": {"name": "Split tests", "command": command}})
151+
152+
steps.append({"store_artifacts": {"path": "~/transformers/tests.txt"}})
153+
steps.append({"store_artifacts": {"path": "~/transformers/splitted_tests.txt"}})
154+
155+
test_command = f"python -m pytest -n {self.pytest_num_workers} " + " ".join(pytest_flags)
156+
test_command += " $(cat splitted_tests.txt)"
107157
if self.marker is not None:
108158
test_command += f" -m {self.marker}"
109159
test_command += " | tee tests_output.txt"
@@ -127,7 +177,6 @@ def job_name(self):
127177
"git lfs install",
128178
"pip install --upgrade pip",
129179
"pip install .[sklearn,tf-cpu,torch,testing,sentencepiece,torch-speech,vision]",
130-
TORCH_SCATTER_INSTALL,
131180
"pip install tensorflow_probability",
132181
"pip install git+https://github.com/huggingface/accelerate",
133182
],
@@ -143,7 +192,6 @@ def job_name(self):
143192
"sudo apt-get -y update && sudo apt-get install -y libsndfile1-dev espeak-ng",
144193
"pip install --upgrade pip",
145194
"pip install .[sklearn,flax,torch,testing,sentencepiece,torch-speech,vision]",
146-
TORCH_SCATTER_INSTALL,
147195
"pip install git+https://github.com/huggingface/accelerate",
148196
],
149197
marker="is_pt_flax_cross_test",
@@ -157,9 +205,9 @@ def job_name(self):
157205
"sudo apt-get -y update && sudo apt-get install -y libsndfile1-dev espeak-ng time",
158206
"pip install --upgrade pip",
159207
"pip install .[sklearn,torch,testing,sentencepiece,torch-speech,vision,timm]",
160-
TORCH_SCATTER_INSTALL,
161208
"pip install git+https://github.com/huggingface/accelerate",
162209
],
210+
parallelism=1,
163211
pytest_num_workers=3,
164212
)
165213

@@ -172,6 +220,7 @@ def job_name(self):
172220
"pip install .[sklearn,tf-cpu,testing,sentencepiece,tf-speech,vision]",
173221
"pip install tensorflow_probability",
174222
],
223+
parallelism=1,
175224
pytest_options={"rA": None},
176225
)
177226

@@ -183,6 +232,7 @@ def job_name(self):
183232
"pip install --upgrade pip",
184233
"pip install .[flax,testing,sentencepiece,flax-speech,vision]",
185234
],
235+
parallelism=1,
186236
pytest_options={"rA": None},
187237
)
188238

@@ -192,8 +242,7 @@ def job_name(self):
192242
install_steps=[
193243
"sudo apt-get -y update && sudo apt-get install -y libsndfile1-dev espeak-ng",
194244
"pip install --upgrade pip",
195-
"pip install .[sklearn,torch,testing,sentencepiece,torch-speech,vision,timm]",
196-
TORCH_SCATTER_INSTALL,
245+
"pip install .[sklearn,torch,testing,sentencepiece,torch-speech,vision,timm,video]",
197246
],
198247
pytest_options={"rA": None},
199248
tests_to_run="tests/pipelines/"
@@ -204,7 +253,7 @@ def job_name(self):
204253
"pipelines_tf",
205254
install_steps=[
206255
"pip install --upgrade pip",
207-
"pip install .[sklearn,tf-cpu,testing,sentencepiece]",
256+
"pip install .[sklearn,tf-cpu,testing,sentencepiece,vision]",
208257
"pip install tensorflow_probability",
209258
],
210259
pytest_options={"rA": None},
@@ -303,18 +352,24 @@ def job_name(self):
303352
)
304353

305354

306-
layoutlm_job = CircleCIJob(
307-
"layoutlmv2_and_v3",
355+
exotic_models_job = CircleCIJob(
356+
"exotic_models",
308357
install_steps=[
309358
"sudo apt-get -y update && sudo apt-get install -y libsndfile1-dev",
310359
"pip install --upgrade pip",
311360
"pip install .[torch,testing,vision]",
312361
"pip install torchvision",
362+
"pip install scipy",
313363
"pip install 'git+https://github.com/facebookresearch/detectron2.git'",
314364
"sudo apt install tesseract-ocr",
315365
"pip install pytesseract",
366+
"pip install natten",
367+
],
368+
tests_to_run=[
369+
"tests/models/*layoutlmv*",
370+
"tests/models/*nat",
371+
"tests/models/deta",
316372
],
317-
tests_to_run="tests/models/*layoutlmv*",
318373
pytest_num_workers=1,
319374
pytest_options={"durations": 100},
320375
)
@@ -324,7 +379,7 @@ def job_name(self):
324379
"repo_utils",
325380
install_steps=[
326381
"pip install --upgrade pip",
327-
"pip install .[all,quality,testing]",
382+
"pip install .[quality,testing]",
328383
],
329384
parallelism=None,
330385
pytest_num_workers=1,
@@ -341,7 +396,7 @@ def job_name(self):
341396
custom_tokenizers_job,
342397
hub_job,
343398
onnx_job,
344-
layoutlm_job,
399+
exotic_models_job,
345400
]
346401
EXAMPLES_TESTS = [
347402
examples_torch_job,
@@ -357,6 +412,8 @@ def job_name(self):
357412
def create_circleci_config(folder=None):
358413
if folder is None:
359414
folder = os.getcwd()
415+
# Used in CircleCIJob.to_dict() to expand the test list (for using parallelism)
416+
os.environ["test_preparation_dir"] = folder
360417
jobs = []
361418
all_test_file = os.path.join(folder, "test_list.txt")
362419
if os.path.exists(all_test_file):
@@ -379,14 +436,18 @@ def create_circleci_config(folder=None):
379436
example_file = os.path.join(folder, "examples_test_list.txt")
380437
if os.path.exists(example_file) and os.path.getsize(example_file) > 0:
381438
jobs.extend(EXAMPLES_TESTS)
382-
439+
383440
repo_util_file = os.path.join(folder, "test_repo_utils.txt")
384441
if os.path.exists(repo_util_file) and os.path.getsize(repo_util_file) > 0:
385442
jobs.extend(REPO_UTIL_TESTS)
386443

387444
if len(jobs) > 0:
388445
config = {"version": "2.1"}
389-
config["parameters"] = {"tests_to_run": {"type": "string", "default": test_list}}
446+
config["parameters"] = {
447+
# Only used to accept the parameters from the trigger
448+
"nightly": {"type": "boolean", "default": False},
449+
"tests_to_run": {"type": "string", "default": test_list},
450+
}
390451
config["jobs"] = {j.job_name: j.to_dict() for j in jobs}
391452
config["workflows"] = {"version": 2, "run_tests": {"jobs": [j.job_name for j in jobs]}}
392453
with open(os.path.join(folder, "generated_config.yml"), "w") as f:

.github/ISSUE_TEMPLATE/bug-report.yml

Lines changed: 35 additions & 39 deletions
Original file line numberDiff line numberDiff line change
@@ -17,58 +17,54 @@ body:
1717
description: |
1818
Your issue will be replied to more quickly if you can figure out the right person to tag with @
1919
If you know how to use git blame, that is the easiest way, otherwise, here is a rough guide of **who to tag**.
20+
21+
All issues are read by one of the core maintainers, so if you don't know who to tag, just leave this blank and
22+
a core maintainer will ping the right person.
23+
2024
Please tag fewer than 3 people.
2125
2226
Models:
2327
24-
- ALBERT, BERT, XLM, DeBERTa, DeBERTa-v2, ELECTRA, MobileBert, SqueezeBert: `@LysandreJik`
25-
- T5, Pegasus, EncoderDecoder: `@patrickvonplaten`
26-
- Blenderbot, MBART, BART, Marian, Pegasus: `@patil-suraj`
27-
- Reformer, TransfoXL, XLNet, FNet: `@patrickvonplaten`
28-
- Longformer, BigBird: `@ydshieh`
29-
- FSMT: `@stas00`
30-
- Funnel: `@sgugger`
31-
- GPT-2, GPT: `@patil-suraj`, `@patrickvonplaten`, `@LysandreJik`
32-
- RAG, DPR: `@patrickvonplaten`, `@lhoestq`
33-
- TensorFlow: `@Rocketknight1`
34-
- JAX/Flax: `@patil-suraj`
35-
- TAPAS, LayoutLM, LayoutLMv2, LUKE, ViT, BEiT, DEiT, DETR, CANINE: `@NielsRogge`
36-
- GPT-Neo, GPT-J, CLIP: `@patil-suraj`
37-
- Wav2Vec2, HuBERT, UniSpeech, UniSpeechSAT, SEW, SEW-D: `@patrickvonplaten`, `@anton-l`
38-
- SpeechEncoderDecoder, Speech2Text, Speech2Text2: `@sanchit-gandhi`, `@patrickvonplaten`, `@anton-l`
39-
40-
If the model isn't in the list, ping `@LysandreJik` who will redirect you to the correct contributor.
41-
28+
- text models: @ArthurZucker and @younesbelkada
29+
- vision models: @amyeroberts and @NielsRogge
30+
- speech models: @sanchit-gandhi
31+
- graph models: @clefourrier
32+
4233
Library:
43-
- Benchmarks: `@patrickvonplaten`
44-
- Deepspeed: `@stas00`
45-
- Ray/raytune: `@richardliaw`, `@amogkam`
46-
- Text generation: `@patrickvonplaten`, `@Narsil`, `@gante`
47-
- Tokenizers: `@SaulLu`
48-
- Trainer: `@sgugger`
49-
- Pipelines: `@Narsil`
50-
- Speech: `@patrickvonplaten`, `@anton-l`, `@sanchit-gandhi`
51-
- Vision: `@NielsRogge`, `@sgugger`
52-
53-
Documentation: `@sgugger`, `@stevhliu`
54-
34+
35+
- flax: @sanchit-gandhi
36+
- generate: @gante
37+
- pipelines: @Narsil
38+
- tensorflow: @gante and @Rocketknight1
39+
- tokenizers: @ArthurZucker
40+
- trainer: @sgugger
41+
42+
Integrations:
43+
44+
- deepspeed: HF Trainer: @stas00, Accelerate: @pacman100
45+
- ray/raytune: @richardliaw, @amogkam
46+
47+
Documentation: @sgugger, @stevhliu and @MKhalusova
48+
5549
Model hub:
5650
5751
- for issues with a model, report at https://discuss.huggingface.co/ and tag the model's creator.
58-
52+
5953
HF projects:
60-
54+
55+
- accelerate: [different repo](https://github.com/huggingface/accelerate)
6156
- datasets: [different repo](https://github.com/huggingface/datasets)
57+
- diffusers: [different repo](https://github.com/huggingface/diffusers)
6258
- rust tokenizers: [different repo](https://github.com/huggingface/tokenizers)
59+
60+
Maintained examples (not research project or legacy):
61+
62+
- Flax: @sanchit-gandhi
63+
- PyTorch: @sgugger
64+
- TensorFlow: @Rocketknight1
6365
64-
Examples:
65-
66-
- maintained examples (not research project or legacy): `@sgugger`, `@patil-suraj`
67-
68-
For research projetcs, please ping the contributor directly. For example, on the following projects:
66+
Research projects are not maintained and should be taken as is.
6967
70-
- research_projects/bert-loses-patience: `@JetRunner`
71-
- research_projects/distillation: `@VictorSanh`
7268
placeholder: "@Username ..."
7369

7470
- type: checkboxes

0 commit comments

Comments
 (0)