@@ -103,26 +103,6 @@ def job_client(self):
103
103
)
104
104
return self ._job_submission_client
105
105
106
- def evaluate_dispatch_priority (self ):
107
- priority_class = self .config .dispatch_priority
108
-
109
- try :
110
- config_check ()
111
- api_instance = client .CustomObjectsApi (api_config_handler ())
112
- priority_classes = api_instance .list_cluster_custom_object (
113
- group = "scheduling.k8s.io" ,
114
- version = "v1" ,
115
- plural = "priorityclasses" ,
116
- )
117
- except Exception as e : # pragma: no cover
118
- return _kube_api_error_handling (e )
119
-
120
- for pc in priority_classes ["items" ]:
121
- if pc ["metadata" ]["name" ] == priority_class :
122
- return pc ["value" ]
123
- print (f"Priority class { priority_class } is not available in the cluster" )
124
- return None
125
-
126
106
def validate_image_config (self ):
127
107
"""
128
108
Validates that the image configuration is not empty.
@@ -152,18 +132,6 @@ def create_app_wrapper(self):
152
132
self .validate_image_config ()
153
133
154
134
# Before attempting to create the cluster AW, let's evaluate the ClusterConfig
155
- if self .config .dispatch_priority :
156
- if not self .config .mcad :
157
- raise ValueError (
158
- "Invalid Cluster Configuration, cannot have dispatch priority without MCAD"
159
- )
160
- priority_val = self .evaluate_dispatch_priority ()
161
- if priority_val == None :
162
- raise ValueError (
163
- "Invalid Cluster Configuration, AppWrapper not generated"
164
- )
165
- else :
166
- priority_val = None
167
135
168
136
name = self .config .name
169
137
namespace = self .config .namespace
@@ -178,12 +146,10 @@ def create_app_wrapper(self):
178
146
workers = self .config .num_workers
179
147
template = self .config .template
180
148
image = self .config .image
181
- instascale = self .config .instascale
182
- mcad = self .config .mcad
149
+ appwrapper = self .config .appwrapper
183
150
instance_types = self .config .machine_types
184
151
env = self .config .envs
185
152
image_pull_secrets = self .config .image_pull_secrets
186
- dispatch_priority = self .config .dispatch_priority
187
153
write_to_file = self .config .write_to_file
188
154
verify_tls = self .config .verify_tls
189
155
local_queue = self .config .local_queue
@@ -202,13 +168,10 @@ def create_app_wrapper(self):
202
168
workers = workers ,
203
169
template = template ,
204
170
image = image ,
205
- instascale = instascale ,
206
- mcad = mcad ,
171
+ appwrapper = appwrapper ,
207
172
instance_types = instance_types ,
208
173
env = env ,
209
174
image_pull_secrets = image_pull_secrets ,
210
- dispatch_priority = dispatch_priority ,
211
- priority_val = priority_val ,
212
175
write_to_file = write_to_file ,
213
176
verify_tls = verify_tls ,
214
177
local_queue = local_queue ,
@@ -230,13 +193,13 @@ def up(self):
230
193
try :
231
194
config_check ()
232
195
api_instance = client .CustomObjectsApi (api_config_handler ())
233
- if self .config .mcad :
196
+ if self .config .appwrapper :
234
197
if self .config .write_to_file :
235
198
with open (self .app_wrapper_yaml ) as f :
236
199
aw = yaml .load (f , Loader = yaml .FullLoader )
237
200
api_instance .create_namespaced_custom_object (
238
201
group = "workload.codeflare.dev" ,
239
- version = "v1beta1 " ,
202
+ version = "v1beta2 " ,
240
203
namespace = namespace ,
241
204
plural = "appwrappers" ,
242
205
body = aw ,
@@ -245,7 +208,7 @@ def up(self):
245
208
aw = yaml .safe_load (self .app_wrapper_yaml )
246
209
api_instance .create_namespaced_custom_object (
247
210
group = "workload.codeflare.dev" ,
248
- version = "v1beta1 " ,
211
+ version = "v1beta2 " ,
249
212
namespace = namespace ,
250
213
plural = "appwrappers" ,
251
214
body = aw ,
@@ -284,10 +247,10 @@ def down(self):
284
247
try :
285
248
config_check ()
286
249
api_instance = client .CustomObjectsApi (api_config_handler ())
287
- if self .config .mcad :
250
+ if self .config .appwrapper :
288
251
api_instance .delete_namespaced_custom_object (
289
252
group = "workload.codeflare.dev" ,
290
- version = "v1beta1 " ,
253
+ version = "v1beta2 " ,
291
254
namespace = namespace ,
292
255
plural = "appwrappers" ,
293
256
name = self .app_wrapper_name ,
@@ -306,30 +269,28 @@ def status(
306
269
"""
307
270
ready = False
308
271
status = CodeFlareClusterStatus .UNKNOWN
309
- if self .config .mcad :
272
+ if self .config .appwrapper :
310
273
# check the app wrapper status
311
274
appwrapper = _app_wrapper_status (self .config .name , self .config .namespace )
312
275
if appwrapper :
313
276
if appwrapper .status in [
314
- AppWrapperStatus .RUNNING ,
315
- AppWrapperStatus .COMPLETED ,
316
- AppWrapperStatus .RUNNING_HOLD_COMPLETION ,
277
+ AppWrapperStatus .RESUMING ,
278
+ AppWrapperStatus .RESETTING ,
317
279
]:
318
280
ready = False
319
281
status = CodeFlareClusterStatus .STARTING
320
282
elif appwrapper .status in [
321
283
AppWrapperStatus .FAILED ,
322
- AppWrapperStatus .DELETED ,
323
284
]:
324
285
ready = False
325
286
status = CodeFlareClusterStatus .FAILED # should deleted be separate
326
287
return status , ready # exit early, no need to check ray status
327
288
elif appwrapper .status in [
328
- AppWrapperStatus .PENDING ,
329
- AppWrapperStatus .QUEUEING ,
289
+ AppWrapperStatus .SUSPENDED ,
290
+ AppWrapperStatus .SUSPENDING ,
330
291
]:
331
292
ready = False
332
- if appwrapper .status == AppWrapperStatus .PENDING :
293
+ if appwrapper .status == AppWrapperStatus .SUSPENDED :
333
294
status = CodeFlareClusterStatus .QUEUED
334
295
else :
335
296
status = CodeFlareClusterStatus .QUEUEING
@@ -501,7 +462,7 @@ def job_logs(self, job_id: str) -> str:
501
462
502
463
def from_k8_cluster_object (
503
464
rc ,
504
- mcad = True ,
465
+ appwrapper = True ,
505
466
write_to_file = False ,
506
467
verify_tls = True ,
507
468
):
@@ -534,11 +495,10 @@ def from_k8_cluster_object(
534
495
"resources"
535
496
]["limits" ]["nvidia.com/gpu" ]
536
497
),
537
- instascale = True if machine_types else False ,
538
498
image = rc ["spec" ]["workerGroupSpecs" ][0 ]["template" ]["spec" ]["containers" ][
539
499
0
540
500
]["image" ],
541
- mcad = mcad ,
501
+ appwrapper = appwrapper ,
542
502
write_to_file = write_to_file ,
543
503
verify_tls = verify_tls ,
544
504
local_queue = rc ["metadata" ]
@@ -597,15 +557,15 @@ def list_all_clusters(namespace: str, print_to_console: bool = True):
597
557
return clusters
598
558
599
559
600
- def list_all_queued (namespace : str , print_to_console : bool = True , mcad : bool = False ):
560
+ def list_all_queued (
561
+ namespace : str , print_to_console : bool = True , appwrapper : bool = False
562
+ ):
601
563
"""
602
564
Returns (and prints by default) a list of all currently queued-up Ray Clusters
603
565
in a given namespace.
604
566
"""
605
- if mcad :
606
- resources = _get_app_wrappers (
607
- namespace , filter = [AppWrapperStatus .RUNNING , AppWrapperStatus .PENDING ]
608
- )
567
+ if appwrapper :
568
+ resources = _get_app_wrappers (namespace , filter = [AppWrapperStatus .SUSPENDED ])
609
569
if print_to_console :
610
570
pretty_print .print_app_wrappers_status (resources )
611
571
else :
@@ -675,10 +635,10 @@ def get_cluster(
675
635
676
636
for rc in rcs ["items" ]:
677
637
if rc ["metadata" ]["name" ] == cluster_name :
678
- mcad = _check_aw_exists (cluster_name , namespace )
638
+ appwrapper = _check_aw_exists (cluster_name , namespace )
679
639
return Cluster .from_k8_cluster_object (
680
640
rc ,
681
- mcad = mcad ,
641
+ appwrapper = appwrapper ,
682
642
write_to_file = write_to_file ,
683
643
verify_tls = verify_tls ,
684
644
)
@@ -721,7 +681,7 @@ def _check_aw_exists(name: str, namespace: str) -> bool:
721
681
api_instance = client .CustomObjectsApi (api_config_handler ())
722
682
aws = api_instance .list_namespaced_custom_object (
723
683
group = "workload.codeflare.dev" ,
724
- version = "v1beta1 " ,
684
+ version = "v1beta2 " ,
725
685
namespace = namespace ,
726
686
plural = "appwrappers" ,
727
687
)
@@ -781,7 +741,7 @@ def _app_wrapper_status(name, namespace="default") -> Optional[AppWrapper]:
781
741
api_instance = client .CustomObjectsApi (api_config_handler ())
782
742
aws = api_instance .list_namespaced_custom_object (
783
743
group = "workload.codeflare.dev" ,
784
- version = "v1beta1 " ,
744
+ version = "v1beta2 " ,
785
745
namespace = namespace ,
786
746
plural = "appwrappers" ,
787
747
)
@@ -851,7 +811,7 @@ def _get_app_wrappers(
851
811
api_instance = client .CustomObjectsApi (api_config_handler ())
852
812
aws = api_instance .list_namespaced_custom_object (
853
813
group = "workload.codeflare.dev" ,
854
- version = "v1beta1 " ,
814
+ version = "v1beta2 " ,
855
815
namespace = namespace ,
856
816
plural = "appwrappers" ,
857
817
)
@@ -945,18 +905,14 @@ def _map_to_ray_cluster(rc) -> Optional[RayCluster]:
945
905
946
906
947
907
def _map_to_app_wrapper (aw ) -> AppWrapper :
948
- if "status" in aw and "canrun" in aw [ "status" ] :
908
+ if "status" in aw :
949
909
return AppWrapper (
950
910
name = aw ["metadata" ]["name" ],
951
- status = AppWrapperStatus (aw ["status" ]["state" ].lower ()),
952
- can_run = aw ["status" ]["canrun" ],
953
- job_state = aw ["status" ]["queuejobstate" ],
911
+ status = AppWrapperStatus (aw ["status" ]["phase" ].lower ()),
954
912
)
955
913
return AppWrapper (
956
914
name = aw ["metadata" ]["name" ],
957
- status = AppWrapperStatus ("queueing" ),
958
- can_run = False ,
959
- job_state = "Still adding to queue" ,
915
+ status = AppWrapperStatus ("suspended" ),
960
916
)
961
917
962
918
0 commit comments