Skip to content

Commit f7ceb0c

Browse files
committed
Merge branch 'main' into bamba-pr
2 parents 024072a + 9feae5f commit f7ceb0c

19 files changed

+259
-112
lines changed

.circleci/config.yml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -58,14 +58,14 @@ jobs:
5858
name: "Prepare pipeline parameters"
5959
command: |
6060
python utils/process_test_artifacts.py
61-
61+
6262
# To avoid too long generated_config.yaml on the continuation orb, we pass the links to the artifacts as parameters.
6363
# Otherwise the list of tests was just too big. Explicit is good but for that it was a limitation.
6464
# We used:
6565

6666
# https://circleci.com/docs/api/v2/index.html#operation/getJobArtifacts : to get the job artifacts
6767
# We could not pass a nested dict, which is why we create the test_file_... parameters for every single job
68-
68+
6969
- store_artifacts:
7070
path: test_preparation/transformed_artifacts.json
7171
- store_artifacts:

.circleci/create_circleci_config.py

Lines changed: 23 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -40,9 +40,22 @@ class EmptyJob:
4040
job_name = "empty"
4141

4242
def to_dict(self):
43+
steps = [{"run": 'ls -la'}]
44+
if self.job_name == "collection_job":
45+
steps.extend(
46+
[
47+
"checkout",
48+
{"run": "pip install requests || true"},
49+
{"run": """while [[ $(curl --location --request GET "https://circleci.com/api/v2/workflow/$CIRCLE_WORKFLOW_ID/job" --header "Circle-Token: $CCI_TOKEN"| jq -r '.items[]|select(.name != "collection_job")|.status' | grep -c "running") -gt 0 ]]; do sleep 5; done || true"""},
50+
{"run": 'python utils/process_circleci_workflow_test_reports.py --workflow_id $CIRCLE_WORKFLOW_ID || true'},
51+
{"store_artifacts": {"path": "outputs"}},
52+
{"run": 'echo "All required jobs have now completed"'},
53+
]
54+
)
55+
4356
return {
4457
"docker": copy.deepcopy(DEFAULT_DOCKER_IMAGE),
45-
"steps":["checkout"],
58+
"steps": steps,
4659
}
4760

4861

@@ -352,6 +365,7 @@ def job_name(self):
352365
DOC_TESTS = [doc_test_job]
353366
ALL_TESTS = REGULAR_TESTS + EXAMPLES_TESTS + PIPELINE_TESTS + REPO_UTIL_TESTS + DOC_TESTS + [custom_tokenizers_job] + [exotic_models_job] # fmt: skip
354367

368+
355369
def create_circleci_config(folder=None):
356370
if folder is None:
357371
folder = os.getcwd()
@@ -361,7 +375,13 @@ def create_circleci_config(folder=None):
361375

362376
if len(jobs) == 0:
363377
jobs = [EmptyJob()]
364-
print("Full list of job name inputs", {j.job_name + "_test_list":{"type":"string", "default":''} for j in jobs})
378+
else:
379+
print("Full list of job name inputs", {j.job_name + "_test_list":{"type":"string", "default":''} for j in jobs})
380+
# Add a job waiting all the test jobs and aggregate their test summary files at the end
381+
collection_job = EmptyJob()
382+
collection_job.job_name = "collection_job"
383+
jobs = [collection_job] + jobs
384+
365385
config = {
366386
"version": "2.1",
367387
"parameters": {
@@ -371,7 +391,7 @@ def create_circleci_config(folder=None):
371391
**{j.job_name + "_test_list":{"type":"string", "default":''} for j in jobs},
372392
**{j.job_name + "_parallelism":{"type":"integer", "default":1} for j in jobs},
373393
},
374-
"jobs" : {j.job_name: j.to_dict() for j in jobs}
394+
"jobs": {j.job_name: j.to_dict() for j in jobs}
375395
}
376396
if "CIRCLE_TOKEN" in os.environ:
377397
# For private forked repo. (e.g. new model addition)
Lines changed: 25 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -1,25 +1,25 @@
1-
name: Self-hosted runner (AMD mi210 CI caller)
2-
3-
on:
4-
workflow_run:
5-
workflows: ["Self-hosted runner (push-caller)"]
6-
branches: ["main"]
7-
types: [completed]
8-
push:
9-
branches:
10-
- run_amd_push_ci_caller*
11-
paths:
12-
- "src/**"
13-
- "tests/**"
14-
- ".github/**"
15-
- "templates/**"
16-
- "utils/**"
17-
18-
jobs:
19-
run_amd_ci:
20-
name: AMD mi210
21-
if: (cancelled() != true) && ((github.event_name == 'workflow_run') || ((github.event_name == 'push') && startsWith(github.ref_name, 'run_amd_push_ci_caller')))
22-
uses: ./.github/workflows/self-push-amd.yml
23-
with:
24-
gpu_flavor: mi210
25-
secrets: inherit
1+
name: Self-hosted runner (AMD mi210 CI caller)
2+
3+
on:
4+
#workflow_run:
5+
# workflows: ["Self-hosted runner (push-caller)"]
6+
# branches: ["main"]
7+
# types: [completed]
8+
push:
9+
branches:
10+
- run_amd_push_ci_caller*
11+
paths:
12+
- "src/**"
13+
- "tests/**"
14+
- ".github/**"
15+
- "templates/**"
16+
- "utils/**"
17+
18+
jobs:
19+
run_amd_ci:
20+
name: AMD mi210
21+
if: (cancelled() != true) && ((github.event_name == 'workflow_run') || ((github.event_name == 'push') && startsWith(github.ref_name, 'run_amd_push_ci_caller')))
22+
uses: ./.github/workflows/self-push-amd.yml
23+
with:
24+
gpu_flavor: mi210
25+
secrets: inherit
Lines changed: 25 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -1,25 +1,25 @@
1-
name: Self-hosted runner (AMD mi250 CI caller)
2-
3-
on:
4-
workflow_run:
5-
workflows: ["Self-hosted runner (push-caller)"]
6-
branches: ["main"]
7-
types: [completed]
8-
push:
9-
branches:
10-
- run_amd_push_ci_caller*
11-
paths:
12-
- "src/**"
13-
- "tests/**"
14-
- ".github/**"
15-
- "templates/**"
16-
- "utils/**"
17-
18-
jobs:
19-
run_amd_ci:
20-
name: AMD mi250
21-
if: (cancelled() != true) && ((github.event_name == 'workflow_run') || ((github.event_name == 'push') && startsWith(github.ref_name, 'run_amd_push_ci_caller')))
22-
uses: ./.github/workflows/self-push-amd.yml
23-
with:
24-
gpu_flavor: mi250
25-
secrets: inherit
1+
name: Self-hosted runner (AMD mi250 CI caller)
2+
3+
on:
4+
#workflow_run:
5+
# workflows: ["Self-hosted runner (push-caller)"]
6+
# branches: ["main"]
7+
# types: [completed]
8+
push:
9+
branches:
10+
- run_amd_push_ci_caller*
11+
paths:
12+
- "src/**"
13+
- "tests/**"
14+
- ".github/**"
15+
- "templates/**"
16+
- "utils/**"
17+
18+
jobs:
19+
run_amd_ci:
20+
name: AMD mi250
21+
if: (cancelled() != true) && ((github.event_name == 'workflow_run') || ((github.event_name == 'push') && startsWith(github.ref_name, 'run_amd_push_ci_caller')))
22+
uses: ./.github/workflows/self-push-amd.yml
23+
with:
24+
gpu_flavor: mi250
25+
secrets: inherit

.github/workflows/self-push-amd-mi300-caller.yml

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,10 @@
11
name: Self-hosted runner (AMD mi300 CI caller)
22

33
on:
4-
workflow_run:
5-
workflows: ["Self-hosted runner (push-caller)"]
6-
branches: ["main"]
7-
types: [completed]
4+
#workflow_run:
5+
# workflows: ["Self-hosted runner (push-caller)"]
6+
# branches: ["main"]
7+
# types: [completed]
88
push:
99
branches:
1010
- run_amd_push_ci_caller*

docs/source/en/main_classes/image_processor.md

Lines changed: 9 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,7 @@ from transformers import AutoImageProcessor
2727

2828
processor = AutoImageProcessor.from_pretrained("facebook/detr-resnet-50", use_fast=True)
2929
```
30+
Note that `use_fast` will be set to `True` by default in a future release.
3031

3132
When using a fast image processor, you can also set the `device` argument to specify the device on which the processing should be done. By default, the processing is done on the same device as the inputs if the inputs are tensors, or on the CPU otherwise.
3233

@@ -42,21 +43,17 @@ images_processed = processor(images, return_tensors="pt", device="cuda")
4243
Here are some speed comparisons between the base and fast image processors for the `DETR` and `RT-DETR` models, and how they impact overall inference time:
4344

4445
<div class="flex">
45-
<div>
46-
<img src="https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/transformers/benchmark_results_full_pipeline_detr_fast_padded.png" />
47-
</div>
48-
<div>
49-
<img src="https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/transformers/benchmark_results_full_pipeline_detr_fast_batched_compiled.png" />
50-
</div>
46+
<img src="https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/transformers/benchmark_results_full_pipeline_detr_fast_padded.png" />
47+
</div>
48+
<div class="flex">
49+
<img src="https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/transformers/benchmark_results_full_pipeline_detr_fast_batched_compiled.png" />
5150
</div>
5251

5352
<div class="flex">
54-
<div>
55-
<img src="https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/transformers/benchmark_results_full_pipeline_rt_detr_fast_single.png" />
56-
</div>
57-
<div>
58-
<img src="https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/transformers/benchmark_results_full_pipeline_rt_detr_fast_batched.png" />
59-
</div>
53+
<img src="https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/transformers/benchmark_results_full_pipeline_rt_detr_fast_single.png" />
54+
</div>
55+
<div class="flex">
56+
<img src="https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/transformers/benchmark_results_full_pipeline_rt_detr_fast_batched.png" />
6057
</div>
6158

6259
These benchmarks were run on an [AWS EC2 g5.2xlarge instance](https://aws.amazon.com/ec2/instance-types/g5/), utilizing an NVIDIA A10G Tensor Core GPU.

src/transformers/audio_utils.py

Lines changed: 1 addition & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -689,16 +689,12 @@ def spectrogram_batch(
689689
if hop_length <= 0:
690690
raise ValueError("hop_length must be greater than zero")
691691

692-
# Check the dimensions of the waveform
692+
# Check the dimensions of the waveform , and if waveform is complex
693693
for waveform in waveform_list:
694694
if waveform.ndim != 1:
695695
raise ValueError(f"Input waveform must have only one dimension, shape is {waveform.shape}")
696-
697-
# Check if waveform is complex
698-
for waveform in waveform_list:
699696
if np.iscomplexobj(waveform):
700697
raise ValueError("Complex-valued input waveforms are not currently supported")
701-
702698
# Center pad the waveform
703699
if center:
704700
padding = [(int(frame_length // 2), int(frame_length // 2))]

src/transformers/models/auto/image_processing_auto.py

Lines changed: 45 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -175,7 +175,7 @@
175175
IMAGE_PROCESSOR_MAPPING = _LazyAutoMapping(CONFIG_MAPPING_NAMES, IMAGE_PROCESSOR_MAPPING_NAMES)
176176

177177

178-
def image_processor_class_from_name(class_name: str):
178+
def get_image_processor_class_from_name(class_name: str):
179179
if class_name == "BaseImageProcessorFast":
180180
return BaseImageProcessorFast
181181

@@ -368,7 +368,7 @@ def from_pretrained(cls, pretrained_model_name_or_path, *inputs, **kwargs):
368368
identifier allowed by git.
369369
use_fast (`bool`, *optional*, defaults to `False`):
370370
Use a fast torchvision-base image processor if it is supported for a given model.
371-
If a fast tokenizer is not available for a given model, a normal numpy-based image processor
371+
If a fast image processor is not available for a given model, a normal numpy-based image processor
372372
is returned instead.
373373
return_unused_kwargs (`bool`, *optional*, defaults to `False`):
374374
If `False`, then this function returns just the final image processor object. If `True`, then this
@@ -416,6 +416,7 @@ def from_pretrained(cls, pretrained_model_name_or_path, *inputs, **kwargs):
416416
kwargs["token"] = use_auth_token
417417

418418
config = kwargs.pop("config", None)
419+
# TODO: @yoni, change in v4.48 (use_fast set to True by default)
419420
use_fast = kwargs.pop("use_fast", None)
420421
trust_remote_code = kwargs.pop("trust_remote_code", None)
421422
kwargs["_from_auto"] = True
@@ -451,42 +452,71 @@ def from_pretrained(cls, pretrained_model_name_or_path, *inputs, **kwargs):
451452
if not is_timm_config_dict(config_dict):
452453
raise initial_exception
453454

454-
image_processor_class = config_dict.get("image_processor_type", None)
455+
image_processor_type = config_dict.get("image_processor_type", None)
455456
image_processor_auto_map = None
456457
if "AutoImageProcessor" in config_dict.get("auto_map", {}):
457458
image_processor_auto_map = config_dict["auto_map"]["AutoImageProcessor"]
458459

459460
# If we still don't have the image processor class, check if we're loading from a previous feature extractor config
460461
# and if so, infer the image processor class from there.
461-
if image_processor_class is None and image_processor_auto_map is None:
462+
if image_processor_type is None and image_processor_auto_map is None:
462463
feature_extractor_class = config_dict.pop("feature_extractor_type", None)
463464
if feature_extractor_class is not None:
464-
image_processor_class = feature_extractor_class.replace("FeatureExtractor", "ImageProcessor")
465+
image_processor_type = feature_extractor_class.replace("FeatureExtractor", "ImageProcessor")
465466
if "AutoFeatureExtractor" in config_dict.get("auto_map", {}):
466467
feature_extractor_auto_map = config_dict["auto_map"]["AutoFeatureExtractor"]
467468
image_processor_auto_map = feature_extractor_auto_map.replace("FeatureExtractor", "ImageProcessor")
468469

469470
# If we don't find the image processor class in the image processor config, let's try the model config.
470-
if image_processor_class is None and image_processor_auto_map is None:
471+
if image_processor_type is None and image_processor_auto_map is None:
471472
if not isinstance(config, PretrainedConfig):
472473
config = AutoConfig.from_pretrained(
473474
pretrained_model_name_or_path,
474475
trust_remote_code=trust_remote_code,
475476
**kwargs,
476477
)
477478
# It could be in `config.image_processor_type``
478-
image_processor_class = getattr(config, "image_processor_type", None)
479+
image_processor_type = getattr(config, "image_processor_type", None)
479480
if hasattr(config, "auto_map") and "AutoImageProcessor" in config.auto_map:
480481
image_processor_auto_map = config.auto_map["AutoImageProcessor"]
481482

482-
if image_processor_class is not None:
483-
# Update class name to reflect the use_fast option. If class is not found, None is returned.
484-
if use_fast is not None:
485-
if use_fast and not image_processor_class.endswith("Fast"):
486-
image_processor_class += "Fast"
487-
elif not use_fast and image_processor_class.endswith("Fast"):
488-
image_processor_class = image_processor_class[:-4]
489-
image_processor_class = image_processor_class_from_name(image_processor_class)
483+
image_processor_class = None
484+
# TODO: @yoni, change logic in v4.48 (when use_fast set to True by default)
485+
if image_processor_type is not None:
486+
# if use_fast is not set and the processor was saved with a fast processor, we use it, otherwise we use the slow processor.
487+
if use_fast is None:
488+
use_fast = image_processor_type.endswith("Fast")
489+
if not use_fast:
490+
logger.warning_once(
491+
"Using a slow image processor as `use_fast` is unset and a slow processor was saved with this model. "
492+
"`use_fast=True` will be the default behavior in v4.48, even if the model was saved with a slow processor. "
493+
"This will result in minor differences in outputs. You'll still be able to use a slow processor with `use_fast=False`."
494+
)
495+
# Update class name to reflect the use_fast option. If class is not found, we fall back to the slow version.
496+
if use_fast and not is_torchvision_available():
497+
logger.warning_once(
498+
"Using `use_fast=True` but `torchvision` is not available. Falling back to the slow image processor."
499+
)
500+
use_fast = False
501+
if use_fast:
502+
if not image_processor_type.endswith("Fast"):
503+
image_processor_type += "Fast"
504+
for _, image_processors in IMAGE_PROCESSOR_MAPPING_NAMES.items():
505+
if image_processor_type in image_processors:
506+
break
507+
else:
508+
image_processor_type = image_processor_type[:-4]
509+
use_fast = False
510+
logger.warning_once(
511+
"`use_fast` is set to `True` but the image processor class does not have a fast version. "
512+
" Falling back to the slow version."
513+
)
514+
image_processor_class = get_image_processor_class_from_name(image_processor_type)
515+
else:
516+
image_processor_type = (
517+
image_processor_type[:-4] if image_processor_type.endswith("Fast") else image_processor_type
518+
)
519+
image_processor_class = get_image_processor_class_from_name(image_processor_type)
490520

491521
has_remote_code = image_processor_auto_map is not None
492522
has_local_code = image_processor_class is not None or type(config) in IMAGE_PROCESSOR_MAPPING

src/transformers/models/blip/modeling_blip.py

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -464,7 +464,8 @@ class BlipPreTrainedModel(PreTrainedModel):
464464
config_class = BlipConfig
465465
base_model_prefix = "blip"
466466
supports_gradient_checkpointing = True
467-
_no_split_modules = ["BlipEncoderLayer"]
467+
_no_split_modules = ["BlipEncoderLayer", "BlipTextEmbeddings"]
468+
_skip_keys_device_placement = ["past_key_value"]
468469

469470
def _init_weights(self, module):
470471
"""Initialize the weights"""
@@ -1010,7 +1011,8 @@ def forward(
10101011
text_embeds = text_embeds / text_embeds.norm(p=2, dim=-1, keepdim=True)
10111012

10121013
# cosine similarity as logits
1013-
logit_scale = self.logit_scale.exp()
1014+
logit_scale = self.logit_scale.exp().to(device=text_embeds.device)
1015+
image_embeds = image_embeds.to(device=text_embeds.device, dtype=text_embeds.dtype)
10141016
logits_per_text = torch.matmul(text_embeds, image_embeds.t()) * logit_scale
10151017
logits_per_image = logits_per_text.t()
10161018

src/transformers/models/blip/modeling_blip_text.py

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -82,7 +82,6 @@ def forward(
8282
position_ids = self.position_ids[:, past_key_values_length : seq_length + past_key_values_length]
8383

8484
if inputs_embeds is None:
85-
input_ids = input_ids.to(self.word_embeddings.weight.device)
8685
inputs_embeds = self.word_embeddings(input_ids)
8786

8887
embeddings = inputs_embeds

0 commit comments

Comments
 (0)