Skip to content

Commit 01b304e

Browse files
authored
Update accelerator connector messages after the addition of strategy (#9937)
1 parent 788f686 commit 01b304e

File tree

3 files changed

+30
-38
lines changed

3 files changed

+30
-38
lines changed

pytorch_lightning/trainer/connectors/accelerator_connector.py

Lines changed: 26 additions & 33 deletions
Original file line numberDiff line numberDiff line change
@@ -244,12 +244,13 @@ def _validate_accelerator_and_devices(self) -> None:
244244
raise MisconfigurationException(
245245
f"You passed `devices={self.devices}` but haven't specified"
246246
" `accelerator=('auto'|'tpu'|'gpu'|'ipu'|'cpu')` for the devices mapping,"
247-
f" got `accelerator={self.distributed_backend}`."
247+
f" got `accelerator={self.distributed_backend!r}`."
248248
)
249249

250250
def _validate_accelerator_type(self) -> None:
251251
if self._accelerator_type and self._accelerator_type != self._device_type:
252-
raise MisconfigurationException(
252+
# internal error: should not happen.
253+
raise ValueError(
253254
f"Mismatch between the requested accelerator type ({self._accelerator_type})"
254255
f" and assigned device type ({self._device_type})."
255256
)
@@ -259,25 +260,16 @@ def _warn_if_devices_flag_ignored(self) -> None:
259260
if self.devices is None:
260261
return
261262
devices_warning = f"The flag `devices={self.devices}` will be ignored, as you have set"
262-
if self.distributed_backend == "auto":
263+
if self.distributed_backend in ("auto", DeviceType.TPU):
263264
if self.tpu_cores is not None:
264265
rank_zero_warn(f"{devices_warning} `tpu_cores={self.tpu_cores}`")
265-
elif self.ipus is not None:
266-
rank_zero_warn(f"{devices_warning} `ipus={self.ipus}`")
267-
elif self.gpus is not None:
268-
rank_zero_warn(f"{devices_warning} `gpus={self.gpus}`")
269-
elif self.num_processes != 1:
270-
rank_zero_warn(f"{devices_warning} `num_processes={self.num_processes}`")
271-
elif self.distributed_backend == DeviceType.TPU:
272-
if self.tpu_cores is not None:
273-
rank_zero_warn(f"{devices_warning} `tpu_cores={self.tpu_cores}`")
274-
elif self.distributed_backend == DeviceType.IPU:
266+
elif self.distributed_backend in ("auto", DeviceType.IPU):
275267
if self.ipus is not None:
276268
rank_zero_warn(f"{devices_warning} `ipus={self.ipus}`")
277-
elif self.distributed_backend == DeviceType.GPU:
269+
elif self.distributed_backend in ("auto", DeviceType.GPU):
278270
if self.gpus is not None:
279271
rank_zero_warn(f"{devices_warning} `gpus={self.gpus}`")
280-
elif self.distributed_backend == DeviceType.CPU:
272+
elif self.distributed_backend in ("auto", DeviceType.CPU):
281273
if self.num_processes != 1:
282274
rank_zero_warn(f"{devices_warning} `num_processes={self.num_processes}`")
283275

@@ -298,26 +290,27 @@ def _handle_accelerator_and_distributed_backend(
298290
) -> None:
299291
if distributed_backend is not None:
300292
rank_zero_deprecation(
301-
f"`Trainer(distributed_backend={distributed_backend})` has been deprecated and will be removed in v1.5."
302-
f" Use `Trainer(strategy={distributed_backend})` instead."
293+
f"`Trainer(distributed_backend={distributed_backend!r})` "
294+
"has been deprecated and will be removed in v1.5."
295+
f" Use `Trainer(strategy={distributed_backend!r})` instead."
303296
)
304297
if self.strategy is not None:
305298
raise MisconfigurationException(
306-
f"You have passed `Trainer(strategy={self.strategy})` but have"
307-
f" also passed `Trainer(distributed_backend={distributed_backend})`."
308-
f"HINT: Use just `Trainer(strategy={self.strategy})` instead."
299+
f"You have passed `Trainer(strategy={self.strategy!r})` but have"
300+
f" also passed `Trainer(distributed_backend={distributed_backend!r})`."
301+
f" HINT: Use just `Trainer(strategy={self.strategy!r})` instead."
309302
)
310303

311304
if accelerator is not None and accelerator in list(DistributedType):
312305
rank_zero_deprecation(
313-
f"Passing {accelerator} `strategy` to the `accelerator` flag in Trainer has been deprecated"
314-
f" in v1.5 and will be removed in v1.7. Use `Trainer(strategy={accelerator})` instead."
306+
f"Passing `Trainer(accelerator={accelerator!r})` has been deprecated"
307+
f" in v1.5 and will be removed in v1.7. Use `Trainer(strategy={accelerator!r})` instead."
315308
)
316309
if self.strategy is not None:
317310
raise MisconfigurationException(
318-
f"You have passed `Trainer(strategy={self.strategy})` but have"
319-
f" also passed `Trainer(accelerator={accelerator})`."
320-
f"HINT: Use just `Trainer(strategy={self.strategy})` instead."
311+
f"You have passed `Trainer(strategy={self.strategy!r})` but have"
312+
f" also passed `Trainer(accelerator={accelerator!r})`."
313+
f" HINT: Use just `Trainer(strategy={self.strategy!r})` instead."
321314
)
322315

323316
def _set_training_type_plugin(self) -> None:
@@ -333,7 +326,7 @@ def handle_given_plugins(self) -> None:
333326
for plug in self.plugins:
334327
if self.strategy is not None and self._is_plugin_training_type(plug):
335328
raise MisconfigurationException(
336-
f"You have passed `Trainer(strategy={self.strategy})`"
329+
f"You have passed `Trainer(strategy={self.strategy!r})`"
337330
f" and you can only specify one training type plugin, but you have passed {plug} as a plugin."
338331
)
339332
if self._is_plugin_training_type(plug):
@@ -507,7 +500,7 @@ def _map_devices_to_accelerator(self, accelerator: str) -> bool:
507500
if accelerator == DeviceType.CPU:
508501
if not isinstance(self.devices, int):
509502
raise MisconfigurationException(
510-
"The flag `devices` only supports integer for `accelerator='cpu'`,"
503+
"The flag `devices` must be an int with `accelerator='cpu'`,"
511504
f" got `devices={self.devices}` instead."
512505
)
513506
self.num_processes = self.devices
@@ -816,7 +809,7 @@ def set_distributed_mode(self, distributed_backend: Optional[str] = None):
816809
elif self.num_gpus > 1 and not _use_cpu:
817810
rank_zero_warn(
818811
"You requested multiple GPUs but did not specify a backend, e.g."
819-
' `Trainer(accelerator="dp"|"ddp"|"ddp2")`. Setting `accelerator="ddp_spawn"` for you.'
812+
' `Trainer(strategy="dp"|"ddp"|"ddp2")`. Setting `strategy="ddp_spawn"` for you.'
820813
)
821814
self.distributed_backend = DistributedType.DDP_SPAWN
822815

@@ -833,7 +826,7 @@ def set_distributed_mode(self, distributed_backend: Optional[str] = None):
833826
self._distrib_type = DistributedType.DDP_SPAWN
834827
if self.num_gpus > 0:
835828
rank_zero_warn(
836-
"You requested one or more GPUs, but set the backend to `ddp_cpu`. Training will not use GPUs."
829+
"You requested one or more GPUs, but set `accelerator='ddp_cpu'`. Training will not use GPUs."
837830
)
838831
self.parallel_device_ids = None
839832
if self.num_processes is None:
@@ -859,7 +852,7 @@ def set_distributed_mode(self, distributed_backend: Optional[str] = None):
859852
if (self.num_nodes and self.num_nodes > 1) or (self.num_processes and self.num_processes > 1):
860853
if self._distrib_type in (DistributedType.DP, DistributedType.DDP2):
861854
rank_zero_warn(
862-
f"{self._distrib_type} is not supported on CPUs, hence setting the distributed type to `ddp`."
855+
f"{self._distrib_type.value!r} is not supported on CPUs, hence setting `strategy='ddp'`."
863856
)
864857
self._distrib_type = DistributedType.DDP
865858
else:
@@ -887,8 +880,7 @@ def set_distributed_mode(self, distributed_backend: Optional[str] = None):
887880
if self.num_nodes > 1 and not using_valid_distributed:
888881
# throw error to force user to choose a supported distributed type such as ddp or ddp2
889882
raise MisconfigurationException(
890-
"Your chosen distributed type does not support num_nodes > 1. "
891-
"Please set accelerator=ddp or accelerator=ddp2."
883+
"Your chosen strategy does not support `num_nodes > 1`. Please set `strategy=('ddp'|'ddp2')`."
892884
)
893885

894886
def _set_horovod_backend(self):
@@ -910,7 +902,8 @@ def check_interactive_compatibility(self):
910902

911903
if _IS_INTERACTIVE and self._distrib_type is not None and not self._distrib_type.is_interactive_compatible():
912904
raise MisconfigurationException(
913-
f"Selected distributed backend {self._distrib_type} is not compatible with an interactive"
905+
f"`Trainer(strategy={self._distrib_type.value!r})` or"
906+
f" `Trainer(accelerator={self._distrib_type.value!r})` is not compatible with an interactive"
914907
" environment. Run your code as a script, or choose one of the compatible backends:"
915908
f" {', '.join(DistributedType.interactive_compatible_types())}."
916909
" In case you are spawning processes yourself, make sure to include the Trainer"

tests/accelerators/test_accelerator_connector.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -447,10 +447,10 @@ def on_fit_start(self, trainer, pl_module):
447447
@mock.patch("pytorch_lightning.utilities._IS_INTERACTIVE", return_value=True)
448448
@mock.patch("torch.cuda.device_count", return_value=2)
449449
def test_ipython_incompatible_backend_error(*_):
450-
with pytest.raises(MisconfigurationException, match="backend ddp is not compatible"):
450+
with pytest.raises(MisconfigurationException, match=r"strategy='ddp'\)`.*is not compatible"):
451451
Trainer(accelerator="ddp", gpus=2)
452452

453-
with pytest.raises(MisconfigurationException, match="backend ddp2 is not compatible"):
453+
with pytest.raises(MisconfigurationException, match=r"strategy='ddp2'\)`.*is not compatible"):
454454
Trainer(accelerator="ddp2", gpus=2)
455455

456456

@@ -615,14 +615,14 @@ def test_set_devices_if_none_gpu():
615615

616616
def test_devices_with_cpu_only_supports_integer():
617617

618-
with pytest.raises(MisconfigurationException, match="The flag `devices` only supports integer"):
618+
with pytest.raises(MisconfigurationException, match="The flag `devices` must be an int"):
619619
Trainer(accelerator="cpu", devices="1,3")
620620

621621

622622
@pytest.mark.parametrize("training_type", ["ddp2", "dp"])
623623
def test_unsupported_distrib_types_on_cpu(training_type):
624624

625-
with pytest.warns(UserWarning, match="is not supported on CPUs, hence setting the distributed type to `ddp`."):
625+
with pytest.warns(UserWarning, match="is not supported on CPUs, hence setting `strategy='ddp"):
626626
trainer = Trainer(accelerator=training_type, num_processes=2)
627627

628628
assert trainer._distrib_type == DistributedType.DDP

tests/accelerators/test_tpu.py

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -222,7 +222,6 @@ def on_train_end(self, trainer, pl_module):
222222

223223
@RunIf(tpu=True)
224224
def test_ddp_cpu_not_supported_on_tpus():
225-
226225
with pytest.raises(MisconfigurationException, match="`accelerator='ddp_cpu'` is not supported on TPU machines"):
227226
Trainer(accelerator="ddp_cpu")
228227

0 commit comments

Comments
 (0)