2626
2727import typing as t
2828
29- from pydantic import BaseModel , Field , NonNegativeInt , PositiveInt
29+ from pydantic import BaseModel , Field , NonNegativeInt , PositiveInt , ValidationError
3030
3131import smartsim ._core .schemas .utils as _utils
32+ from smartsim .error .errors import SmartSimError
3233
3334# Black and Pylint disagree about where to put the `...`
3435# pylint: disable=multiple-statements
@@ -42,31 +43,50 @@ class DragonRequest(BaseModel): ...
4243class DragonRunPolicy (BaseModel ):
4344 """Policy specifying hardware constraints when running a Dragon job"""
4445
45- device : t .Literal ["cpu" , "gpu" ] = Field (default = "cpu" )
46+ device : t .Literal ["cpu" , "gpu" , "" ] = Field (default = "" )
47+ """Specify a device category on which to run the job. Uses system default
48+ if not specified"""
4649 cpu_affinity : t .List [NonNegativeInt ] = Field (default_factory = list )
50+ """List of CPU indices to which the job should be pinned"""
4751 gpu_affinity : t .List [NonNegativeInt ] = Field (default_factory = list )
52+ """List of GPU indices to which the job should be pinned"""
4853
4954 @staticmethod
5055 def from_run_args (
5156 run_args : t .Dict [str , t .Union [int , str , float , None ]]
5257 ) -> "DragonRunPolicy" :
58+ """Create a DragonRunPolicy from a dictionary of run arguments"""
5359 features : str = str (run_args .get ("node-feature" , "" ))
5460
55- device = "gpu" if "gpu" in features else "cpu"
61+ device = ""
62+ if "gpu" in features :
63+ device = "gpu"
64+ elif "cpu" in features :
65+ device = "cpu"
5666
57- gpu_args = str (run_args .get ("gpu-affinity" , "" ))
58- cpu_args = str (run_args .get ("cpu-affinity" , "" ))
59- gpu_affinity = [x for x in gpu_args .split ("," ) if x ]
60- cpu_affinity = [x for x in cpu_args .split ("," ) if x ]
67+ gpu_args = ""
68+ if gpu_arg_value := run_args .get ("gpu-affinity" , None ):
69+ gpu_args = str (gpu_arg_value )
6170
62- if device == "cpu" and not (cpu_affinity or gpu_affinity ):
71+ cpu_args = ""
72+ if cpu_arg_value := run_args .get ("cpu-affinity" , None ):
73+ cpu_args = str (cpu_arg_value )
74+
75+ # run args converted to a string must be split back into a list[int]
76+ gpu_affinity = [int (x .strip ()) for x in gpu_args .split ("," ) if x ]
77+ cpu_affinity = [int (x .strip ()) for x in cpu_args .split ("," ) if x ]
78+
79+ if device == "" and not (cpu_affinity or gpu_affinity ):
6380 return DragonRunPolicy ()
6481
65- return DragonRunPolicy (
66- device = device ,
67- cpu_affinity = cpu_affinity ,
68- gpu_affinity = gpu_affinity ,
69- )
82+ try :
83+ return DragonRunPolicy (
84+ device = device ,
85+ cpu_affinity = cpu_affinity ,
86+ gpu_affinity = gpu_affinity ,
87+ )
88+ except ValidationError as ex :
89+ raise SmartSimError ("Unable to build DragonRunPolicy" ) from ex
7090
7191
7292class DragonRunRequestView (DragonRequest ):
0 commit comments