@@ -244,12 +244,13 @@ def _validate_accelerator_and_devices(self) -> None:
244244 raise MisconfigurationException (
245245 f"You passed `devices={ self .devices } ` but haven't specified"
246246 " `accelerator=('auto'|'tpu'|'gpu'|'ipu'|'cpu')` for the devices mapping,"
247- f" got `accelerator={ self .distributed_backend } `."
247+ f" got `accelerator={ self .distributed_backend !r } `."
248248 )
249249
250250 def _validate_accelerator_type (self ) -> None :
251251 if self ._accelerator_type and self ._accelerator_type != self ._device_type :
252- raise MisconfigurationException (
252+ # internal error: should not happen.
253+ raise ValueError (
253254 f"Mismatch between the requested accelerator type ({ self ._accelerator_type } )"
254255 f" and assigned device type ({ self ._device_type } )."
255256 )
@@ -259,25 +260,16 @@ def _warn_if_devices_flag_ignored(self) -> None:
259260 if self .devices is None :
260261 return
261262 devices_warning = f"The flag `devices={ self .devices } ` will be ignored, as you have set"
262- if self .distributed_backend == "auto" :
263+ if self .distributed_backend in ( "auto" , DeviceType . TPU ) :
263264 if self .tpu_cores is not None :
264265 rank_zero_warn (f"{ devices_warning } `tpu_cores={ self .tpu_cores } `" )
265- elif self .ipus is not None :
266- rank_zero_warn (f"{ devices_warning } `ipus={ self .ipus } `" )
267- elif self .gpus is not None :
268- rank_zero_warn (f"{ devices_warning } `gpus={ self .gpus } `" )
269- elif self .num_processes != 1 :
270- rank_zero_warn (f"{ devices_warning } `num_processes={ self .num_processes } `" )
271- elif self .distributed_backend == DeviceType .TPU :
272- if self .tpu_cores is not None :
273- rank_zero_warn (f"{ devices_warning } `tpu_cores={ self .tpu_cores } `" )
274- elif self .distributed_backend == DeviceType .IPU :
266+ elif self .distributed_backend in ("auto" , DeviceType .IPU ):
275267 if self .ipus is not None :
276268 rank_zero_warn (f"{ devices_warning } `ipus={ self .ipus } `" )
277- elif self .distributed_backend == DeviceType .GPU :
269+ elif self .distributed_backend in ( "auto" , DeviceType .GPU ) :
278270 if self .gpus is not None :
279271 rank_zero_warn (f"{ devices_warning } `gpus={ self .gpus } `" )
280- elif self .distributed_backend == DeviceType .CPU :
272+ elif self .distributed_backend in ( "auto" , DeviceType .CPU ) :
281273 if self .num_processes != 1 :
282274 rank_zero_warn (f"{ devices_warning } `num_processes={ self .num_processes } `" )
283275
@@ -298,26 +290,27 @@ def _handle_accelerator_and_distributed_backend(
298290 ) -> None :
299291 if distributed_backend is not None :
300292 rank_zero_deprecation (
301- f"`Trainer(distributed_backend={ distributed_backend } )` has been deprecated and will be removed in v1.5."
302- f" Use `Trainer(strategy={ distributed_backend } )` instead."
293+ f"`Trainer(distributed_backend={ distributed_backend !r} )` "
294+ "has been deprecated and will be removed in v1.5."
295+ f" Use `Trainer(strategy={ distributed_backend !r} )` instead."
303296 )
304297 if self .strategy is not None :
305298 raise MisconfigurationException (
306- f"You have passed `Trainer(strategy={ self .strategy } )` but have"
307- f" also passed `Trainer(distributed_backend={ distributed_backend } )`."
308- f"HINT: Use just `Trainer(strategy={ self .strategy } )` instead."
299+ f"You have passed `Trainer(strategy={ self .strategy !r } )` but have"
300+ f" also passed `Trainer(distributed_backend={ distributed_backend !r } )`."
301+ f" HINT: Use just `Trainer(strategy={ self .strategy !r } )` instead."
309302 )
310303
311304 if accelerator is not None and accelerator in list (DistributedType ):
312305 rank_zero_deprecation (
313- f"Passing { accelerator } `strategy` to the `accelerator` flag in Trainer has been deprecated"
314- f" in v1.5 and will be removed in v1.7. Use `Trainer(strategy={ accelerator } )` instead."
306+ f"Passing `Trainer(accelerator= { accelerator !r } )` has been deprecated"
307+ f" in v1.5 and will be removed in v1.7. Use `Trainer(strategy={ accelerator !r } )` instead."
315308 )
316309 if self .strategy is not None :
317310 raise MisconfigurationException (
318- f"You have passed `Trainer(strategy={ self .strategy } )` but have"
319- f" also passed `Trainer(accelerator={ accelerator } )`."
320- f"HINT: Use just `Trainer(strategy={ self .strategy } )` instead."
311+ f"You have passed `Trainer(strategy={ self .strategy !r } )` but have"
312+ f" also passed `Trainer(accelerator={ accelerator !r } )`."
313+ f" HINT: Use just `Trainer(strategy={ self .strategy !r } )` instead."
321314 )
322315
323316 def _set_training_type_plugin (self ) -> None :
@@ -333,7 +326,7 @@ def handle_given_plugins(self) -> None:
333326 for plug in self .plugins :
334327 if self .strategy is not None and self ._is_plugin_training_type (plug ):
335328 raise MisconfigurationException (
336- f"You have passed `Trainer(strategy={ self .strategy } )`"
329+ f"You have passed `Trainer(strategy={ self .strategy !r } )`"
337330 f" and you can only specify one training type plugin, but you have passed { plug } as a plugin."
338331 )
339332 if self ._is_plugin_training_type (plug ):
@@ -507,7 +500,7 @@ def _map_devices_to_accelerator(self, accelerator: str) -> bool:
507500 if accelerator == DeviceType .CPU :
508501 if not isinstance (self .devices , int ):
509502 raise MisconfigurationException (
510- "The flag `devices` only supports integer for `accelerator='cpu'`,"
503+ "The flag `devices` must be an int with `accelerator='cpu'`,"
511504 f" got `devices={ self .devices } ` instead."
512505 )
513506 self .num_processes = self .devices
@@ -816,7 +809,7 @@ def set_distributed_mode(self, distributed_backend: Optional[str] = None):
816809 elif self .num_gpus > 1 and not _use_cpu :
817810 rank_zero_warn (
818811 "You requested multiple GPUs but did not specify a backend, e.g."
819- ' `Trainer(accelerator ="dp"|"ddp"|"ddp2")`. Setting `accelerator ="ddp_spawn"` for you.'
812+ ' `Trainer(strategy ="dp"|"ddp"|"ddp2")`. Setting `strategy ="ddp_spawn"` for you.'
820813 )
821814 self .distributed_backend = DistributedType .DDP_SPAWN
822815
@@ -833,7 +826,7 @@ def set_distributed_mode(self, distributed_backend: Optional[str] = None):
833826 self ._distrib_type = DistributedType .DDP_SPAWN
834827 if self .num_gpus > 0 :
835828 rank_zero_warn (
836- "You requested one or more GPUs, but set the backend to ` ddp_cpu`. Training will not use GPUs."
829+ "You requested one or more GPUs, but set `accelerator=' ddp_cpu' `. Training will not use GPUs."
837830 )
838831 self .parallel_device_ids = None
839832 if self .num_processes is None :
@@ -859,7 +852,7 @@ def set_distributed_mode(self, distributed_backend: Optional[str] = None):
859852 if (self .num_nodes and self .num_nodes > 1 ) or (self .num_processes and self .num_processes > 1 ):
860853 if self ._distrib_type in (DistributedType .DP , DistributedType .DDP2 ):
861854 rank_zero_warn (
862- f"{ self ._distrib_type } is not supported on CPUs, hence setting the distributed type to ` ddp`."
855+ f"{ self ._distrib_type . value !r } is not supported on CPUs, hence setting `strategy=' ddp' `."
863856 )
864857 self ._distrib_type = DistributedType .DDP
865858 else :
@@ -887,8 +880,7 @@ def set_distributed_mode(self, distributed_backend: Optional[str] = None):
887880 if self .num_nodes > 1 and not using_valid_distributed :
888881 # throw error to force user to choose a supported distributed type such as ddp or ddp2
889882 raise MisconfigurationException (
890- "Your chosen distributed type does not support num_nodes > 1. "
891- "Please set accelerator=ddp or accelerator=ddp2."
883+ "Your chosen strategy does not support `num_nodes > 1`. Please set `strategy=('ddp'|'ddp2')`."
892884 )
893885
894886 def _set_horovod_backend (self ):
@@ -910,7 +902,8 @@ def check_interactive_compatibility(self):
910902
911903 if _IS_INTERACTIVE and self ._distrib_type is not None and not self ._distrib_type .is_interactive_compatible ():
912904 raise MisconfigurationException (
913- f"Selected distributed backend { self ._distrib_type } is not compatible with an interactive"
905+ f"`Trainer(strategy={ self ._distrib_type .value !r} )` or"
906+ f" `Trainer(accelerator={ self ._distrib_type .value !r} )` is not compatible with an interactive"
914907 " environment. Run your code as a script, or choose one of the compatible backends:"
915908 f" { ', ' .join (DistributedType .interactive_compatible_types ())} ."
916909 " In case you are spawning processes yourself, make sure to include the Trainer"
0 commit comments