Skip to content

Commit 14969f2

Browse files
committed
Change Default namespace logic to use user's current namespace
Signed-off-by: Anish Asthana <[email protected]>
1 parent ba50edb commit 14969f2

File tree

4 files changed

+77
-50
lines changed

4 files changed

+77
-50
lines changed

src/codeflare_sdk/cluster/cluster.py

+8-20
Original file line numberDiff line numberDiff line change
@@ -63,6 +63,14 @@ def create_app_wrapper(self):
6363
Called upon cluster object creation, creates an AppWrapper yaml based on
6464
the specifications of the ClusterConfiguration.
6565
"""
66+
67+
if self.config.namespace is None:
68+
self.config.namespace = oc.get_project_name()
69+
if type(self.config.namespace) is not str:
70+
raise TypeError(
71+
f"Namespace {self.config.namespace} is of type {type(self.config.namespace)}. Check your Kubernetes Authentication."
72+
)
73+
6674
name = self.config.name
6775
namespace = self.config.namespace
6876
min_cpu = self.config.min_cpus
@@ -285,26 +293,6 @@ def torchx_config(
285293
return to_return
286294

287295

288-
def get_current_namespace() -> str:
289-
"""
290-
Returns the user's current working namespace.
291-
"""
292-
try:
293-
namespace = oc.invoke("project", ["-q"]).actions()[0].out.strip()
294-
except oc.OpenShiftPythonException as osp: # pragma: no cover
295-
error_msg = osp.result.err()
296-
if (
297-
"do not have rights" in error_msg
298-
or "Missing or incomplete configuration" in error_msg
299-
):
300-
raise PermissionError(
301-
"Action not permitted, have you run auth.login() or cluster.up()?"
302-
)
303-
else:
304-
raise osp
305-
return namespace
306-
307-
308296
def list_all_clusters(namespace: str, print_to_console: bool = True):
309297
"""
310298
Returns (and prints by default) a list of all clusters in a given namespace.

src/codeflare_sdk/cluster/config.py

+2-1
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@
2121
from dataclasses import dataclass, field
2222
from .auth import Authentication
2323
import pathlib
24+
import openshift
2425

2526
dir = pathlib.Path(__file__).parent.parent.resolve()
2627

@@ -33,7 +34,7 @@ class ClusterConfiguration:
3334
"""
3435

3536
name: str
36-
namespace: str = "default"
37+
namespace: str = None
3738
head_info: list = field(default_factory=list)
3839
machine_types: list = field(default_factory=list) # ["m4.xlarge", "g4dn.xlarge"]
3940
min_cpus: int = 1

src/codeflare_sdk/job/jobs.py

+5-1
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@
1717
from typing import TYPE_CHECKING, Optional, Dict, List
1818
from pathlib import Path
1919

20+
import openshift as oc
2021
from torchx.components.dist import ddp
2122
from torchx.runner import get_runner
2223
from torchx.specs import AppHandle, parse_app_handle, AppDryRunInfo
@@ -114,6 +115,9 @@ def _missing_spec(self, spec: str):
114115
raise ValueError(f"Job definition missing arg: {spec}")
115116

116117
def _dry_run_no_cluster(self):
118+
if self.scheduler_args is not None:
119+
if self.scheduler_args.get("namespace") is None:
120+
self.scheduler_args["namespace"] = oc.get_project_name()
117121
return torchx_runner.dryrun(
118122
app=ddp(
119123
*self.script_args,
@@ -144,7 +148,7 @@ def _dry_run_no_cluster(self):
144148
else self._missing_spec("image"),
145149
),
146150
scheduler="kubernetes_mcad",
147-
cfg=self.scheduler_args if self.scheduler_args is not None else None,
151+
cfg=self.scheduler_args,
148152
workspace="",
149153
)
150154

tests/unit_test.py

+62-28
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,6 @@
2424
from codeflare_sdk.cluster.cluster import (
2525
Cluster,
2626
ClusterConfiguration,
27-
get_current_namespace,
2827
list_all_clusters,
2928
list_all_queued,
3029
_copy_to_ray,
@@ -240,6 +239,23 @@ def test_cluster_creation():
240239
return cluster
241240

242241

242+
def test_default_cluster_creation(mocker):
243+
mocker.patch(
244+
"openshift.get_project_name",
245+
return_value="opendatahub",
246+
)
247+
default_config = ClusterConfiguration(
248+
name="unit-test-default-cluster",
249+
)
250+
cluster = Cluster(default_config)
251+
252+
assert cluster.app_wrapper_yaml == "unit-test-default-cluster.yaml"
253+
assert cluster.app_wrapper_name == "unit-test-default-cluster"
254+
assert cluster.config.namespace == "opendatahub"
255+
256+
return cluster
257+
258+
243259
def arg_check_apply_effect(*args):
244260
assert args[0] == "apply"
245261
assert args[1] == ["-f", "unit-test-cluster.yaml"]
@@ -496,14 +512,6 @@ def act_side_effect_list(self):
496512
return [self]
497513

498514

499-
def test_get_namespace(mocker):
500-
mocker.patch("openshift.invoke", side_effect=arg_side_effect)
501-
mock_res = mocker.patch.object(openshift.Result, "actions")
502-
mock_res.side_effect = lambda: act_side_effect_list(fake_res)
503-
vars = get_current_namespace()
504-
assert vars == "('project', ['-q'])"
505-
506-
507515
def get_selector(*args):
508516
selector = Selector({"operation": "selector", "status": 0, "actions": []})
509517
return selector
@@ -1593,22 +1601,6 @@ def test_wait_ready(mocker, capsys):
15931601
)
15941602

15951603

1596-
def test_cmd_line_generation():
1597-
os.system(
1598-
f"python3 {parent}/src/codeflare_sdk/utils/generate_yaml.py --name=unit-cmd-cluster --min-cpu=1 --max-cpu=1 --min-memory=2 --max-memory=2 --gpu=1 --workers=2 --template=src/codeflare_sdk/templates/new-template.yaml"
1599-
)
1600-
assert filecmp.cmp(
1601-
"unit-cmd-cluster.yaml", f"{parent}/tests/test-case-cmd.yaml", shallow=True
1602-
)
1603-
os.remove("unit-test-cluster.yaml")
1604-
os.remove("unit-cmd-cluster.yaml")
1605-
1606-
1607-
def test_cleanup():
1608-
os.remove("test.yaml")
1609-
os.remove("raytest2.yaml")
1610-
1611-
16121604
def test_jobdefinition_coverage():
16131605
abstract = JobDefinition()
16141606
cluster = Cluster(test_config_creation())
@@ -1673,7 +1665,6 @@ def test_DDPJobDefinition_dry_run():
16731665
assert type(ddp_job._scheduler) == type(str())
16741666

16751667
assert ddp_job.request.app_id.startswith("test")
1676-
assert ddp_job.request.working_dir.startswith("/tmp/torchx_workspace")
16771668
assert ddp_job.request.cluster_name == "unit-test-cluster"
16781669
assert ddp_job.request.requirements == "test"
16791670

@@ -1687,12 +1678,18 @@ def test_DDPJobDefinition_dry_run():
16871678
assert ddp_job._scheduler == "ray"
16881679

16891680

1690-
def test_DDPJobDefinition_dry_run_no_cluster():
1681+
def test_DDPJobDefinition_dry_run_no_cluster(mocker):
16911682
"""
16921683
Test that the dry run method returns the correct type: AppDryRunInfo,
16931684
that the attributes of the returned object are of the correct type,
16941685
and that the values from cluster and job definition are correctly passed.
16951686
"""
1687+
1688+
mocker.patch(
1689+
"openshift.get_project_name",
1690+
return_value="opendatahub",
1691+
)
1692+
16961693
ddp = test_DDPJobDefinition_creation()
16971694
ddp.image = "fake-image"
16981695
ddp_job = ddp._dry_run_no_cluster()
@@ -1750,12 +1747,18 @@ def test_DDPJobDefinition_dry_run_no_resource_args():
17501747
)
17511748

17521749

1753-
def test_DDPJobDefinition_dry_run_no_cluster_no_resource_args():
1750+
def test_DDPJobDefinition_dry_run_no_cluster_no_resource_args(mocker):
17541751
"""
17551752
Test that the dry run method returns the correct type: AppDryRunInfo,
17561753
that the attributes of the returned object are of the correct type,
17571754
and that the values from cluster and job definition are correctly passed.
17581755
"""
1756+
1757+
mocker.patch(
1758+
"openshift.get_project_name",
1759+
return_value="opendatahub",
1760+
)
1761+
17591762
ddp = test_DDPJobDefinition_creation()
17601763
try:
17611764
ddp._dry_run_no_cluster()
@@ -1806,6 +1809,10 @@ def test_DDPJobDefinition_submit(mocker):
18061809
"""
18071810
ddp_def = test_DDPJobDefinition_creation()
18081811
cluster = Cluster(test_config_creation())
1812+
mocker.patch(
1813+
"openshift.get_project_name",
1814+
return_value="opendatahub",
1815+
)
18091816
mocker.patch(
18101817
"codeflare_sdk.job.jobs.torchx_runner.schedule",
18111818
return_value="fake-dashboard-url",
@@ -1852,6 +1859,10 @@ def test_DDPJob_creation(mocker):
18521859
def test_DDPJob_creation_no_cluster(mocker):
18531860
ddp_def = test_DDPJobDefinition_creation()
18541861
ddp_def.image = "fake-image"
1862+
mocker.patch(
1863+
"openshift.get_project_name",
1864+
return_value="opendatahub",
1865+
)
18551866
mocker.patch(
18561867
"codeflare_sdk.job.jobs.torchx_runner.schedule",
18571868
return_value="fake-app-handle",
@@ -1898,6 +1909,10 @@ def arg_check_side_effect(*args):
18981909

18991910
def test_DDPJob_cancel(mocker):
19001911
ddp_job = test_DDPJob_creation_no_cluster(mocker)
1912+
mocker.patch(
1913+
"openshift.get_project_name",
1914+
return_value="opendatahub",
1915+
)
19011916
mocker.patch(
19021917
"codeflare_sdk.job.jobs.torchx_runner.cancel", side_effect=arg_check_side_effect
19031918
)
@@ -1916,3 +1931,22 @@ def parse_j(cmd):
19161931
max_worker = args[1]
19171932
gpu = args[3]
19181933
return f"{max_worker}x{gpu}"
1934+
1935+
1936+
# Make sure to keep this function and the efollowing function at the end of the file
1937+
def test_cmd_line_generation():
1938+
os.system(
1939+
f"python3 {parent}/src/codeflare_sdk/utils/generate_yaml.py --name=unit-cmd-cluster --min-cpu=1 --max-cpu=1 --min-memory=2 --max-memory=2 --gpu=1 --workers=2 --template=src/codeflare_sdk/templates/new-template.yaml"
1940+
)
1941+
assert filecmp.cmp(
1942+
"unit-cmd-cluster.yaml", f"{parent}/tests/test-case-cmd.yaml", shallow=True
1943+
)
1944+
os.remove("unit-test-cluster.yaml")
1945+
os.remove("unit-test-default-cluster.yaml")
1946+
os.remove("unit-cmd-cluster.yaml")
1947+
1948+
1949+
# Make sure to always keep this function last
1950+
def test_cleanup():
1951+
os.remove("test.yaml")
1952+
os.remove("raytest2.yaml")

0 commit comments

Comments
 (0)