Skip to content

Change default namespace logic #85

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -2,3 +2,5 @@ dist/
.python-version
__pycache__/
.coverage
Pipfile
Pipfile.lock
51 changes: 28 additions & 23 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -15,26 +15,31 @@ Can be installed via `pip`: `pip install codeflare-sdk`

## Development

For testing, make sure to have installed:
- `pytest`, `pytest-mock` (can both be installed with `pip`)
- The remaining dependencies located in `requirements.txt`
- To run the unit tests, run `pytest -v tests/unit_test.py`)
- Any new test functions/scripts can be added into the `tests` folder

NOTE: Functional tests coming soon, will live in `tests/func_test.py`

For checking code coverage while testing:
- Start by installing `coverage` (can be done via `pip`)
- Now instead when testing run `coverage run -m --source=src pytest tests/unit_test.py`
- To then view a code coverage report w/ missing lines, run `coverage report -m`

For formatting:
- Currently using black v22.3.0 for format checking
- To install, run `pip install black==22.3.0`
- To check file formatting, in top-level dir run `black --check .`
- To auto-reformat all files, remove the `--check` flag
- To reformat an individual file, run `black <filename>`

To build the python package:
- If poetry is not installed: `pip install poetry`
- `poetry build`
### Prerequisites

We recommend using Python 3.8 for development.
Install development specific dependencies:
`$ pip install poetry pytest pytest-mock coverage black==22.3.0`

Additional dependencies can be found in `requirements.txt`: `$ pip install -r requirements.txt`

### Testing

- To run the unit tests, run `pytest -v tests/unit_test.py`
- Any new test functions/scripts can be added into the `tests` folder
- NOTE: Functional tests coming soon, will live in `tests/func_test.py`

#### Code Coverage

- Run tests with the following command: `coverage run -m --source=src pytest tests/unit_test.py`
- To then view a code coverage report w/ missing lines, run `coverage report -m`

### Code Formatting

- To check file formatting, in top-level dir run `black --check .`
- To auto-reformat all files, remove the `--check` flag
- To reformat an individual file, run `black <filename>`

### Package Build

To build the python package: `$ poetry build`
28 changes: 8 additions & 20 deletions src/codeflare_sdk/cluster/cluster.py
Original file line number Diff line number Diff line change
Expand Up @@ -63,6 +63,14 @@ def create_app_wrapper(self):
Called upon cluster object creation, creates an AppWrapper yaml based on
the specifications of the ClusterConfiguration.
"""

if self.config.namespace is None:
self.config.namespace = oc.get_project_name()
if type(self.config.namespace) is not str:
raise TypeError(
f"Namespace {self.config.namespace} is of type {type(self.config.namespace)}. Check your Kubernetes Authentication."
)

name = self.config.name
namespace = self.config.namespace
min_cpu = self.config.min_cpus
Expand Down Expand Up @@ -285,26 +293,6 @@ def torchx_config(
return to_return


def get_current_namespace() -> str:
"""
Returns the user's current working namespace.
"""
try:
namespace = oc.invoke("project", ["-q"]).actions()[0].out.strip()
except oc.OpenShiftPythonException as osp: # pragma: no cover
error_msg = osp.result.err()
if (
"do not have rights" in error_msg
or "Missing or incomplete configuration" in error_msg
):
raise PermissionError(
"Action not permitted, have you run auth.login() or cluster.up()?"
)
else:
raise osp
return namespace


def list_all_clusters(namespace: str, print_to_console: bool = True):
"""
Returns (and prints by default) a list of all clusters in a given namespace.
Expand Down
3 changes: 2 additions & 1 deletion src/codeflare_sdk/cluster/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@
from dataclasses import dataclass, field
from .auth import Authentication
import pathlib
import openshift

dir = pathlib.Path(__file__).parent.parent.resolve()

Expand All @@ -33,7 +34,7 @@ class ClusterConfiguration:
"""

name: str
namespace: str = "default"
namespace: str = None
head_info: list = field(default_factory=list)
machine_types: list = field(default_factory=list) # ["m4.xlarge", "g4dn.xlarge"]
min_cpus: int = 1
Expand Down
6 changes: 5 additions & 1 deletion src/codeflare_sdk/job/jobs.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
from typing import TYPE_CHECKING, Optional, Dict, List
from pathlib import Path

import openshift as oc
from torchx.components.dist import ddp
from torchx.runner import get_runner
from torchx.specs import AppHandle, parse_app_handle, AppDryRunInfo
Expand Down Expand Up @@ -114,6 +115,9 @@ def _missing_spec(self, spec: str):
raise ValueError(f"Job definition missing arg: {spec}")

def _dry_run_no_cluster(self):
if self.scheduler_args is not None:
if self.scheduler_args.get("namespace") is None:
self.scheduler_args["namespace"] = oc.get_project_name()
return torchx_runner.dryrun(
app=ddp(
*self.script_args,
Expand Down Expand Up @@ -144,7 +148,7 @@ def _dry_run_no_cluster(self):
else self._missing_spec("image"),
),
scheduler="kubernetes_mcad",
cfg=self.scheduler_args if self.scheduler_args is not None else None,
cfg=self.scheduler_args,
workspace="",
)

Expand Down
90 changes: 62 additions & 28 deletions tests/unit_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,6 @@
from codeflare_sdk.cluster.cluster import (
Cluster,
ClusterConfiguration,
get_current_namespace,
list_all_clusters,
list_all_queued,
_copy_to_ray,
Expand Down Expand Up @@ -240,6 +239,23 @@ def test_cluster_creation():
return cluster


def test_default_cluster_creation(mocker):
mocker.patch(
"openshift.get_project_name",
return_value="opendatahub",
)
default_config = ClusterConfiguration(
name="unit-test-default-cluster",
)
cluster = Cluster(default_config)

assert cluster.app_wrapper_yaml == "unit-test-default-cluster.yaml"
assert cluster.app_wrapper_name == "unit-test-default-cluster"
assert cluster.config.namespace == "opendatahub"

return cluster


def arg_check_apply_effect(*args):
assert args[0] == "apply"
assert args[1] == ["-f", "unit-test-cluster.yaml"]
Expand Down Expand Up @@ -496,14 +512,6 @@ def act_side_effect_list(self):
return [self]


def test_get_namespace(mocker):
mocker.patch("openshift.invoke", side_effect=arg_side_effect)
mock_res = mocker.patch.object(openshift.Result, "actions")
mock_res.side_effect = lambda: act_side_effect_list(fake_res)
vars = get_current_namespace()
assert vars == "('project', ['-q'])"


def get_selector(*args):
selector = Selector({"operation": "selector", "status": 0, "actions": []})
return selector
Expand Down Expand Up @@ -1593,22 +1601,6 @@ def test_wait_ready(mocker, capsys):
)


def test_cmd_line_generation():
os.system(
f"python3 {parent}/src/codeflare_sdk/utils/generate_yaml.py --name=unit-cmd-cluster --min-cpu=1 --max-cpu=1 --min-memory=2 --max-memory=2 --gpu=1 --workers=2 --template=src/codeflare_sdk/templates/new-template.yaml"
)
assert filecmp.cmp(
"unit-cmd-cluster.yaml", f"{parent}/tests/test-case-cmd.yaml", shallow=True
)
os.remove("unit-test-cluster.yaml")
os.remove("unit-cmd-cluster.yaml")


def test_cleanup():
os.remove("test.yaml")
os.remove("raytest2.yaml")


def test_jobdefinition_coverage():
abstract = JobDefinition()
cluster = Cluster(test_config_creation())
Expand Down Expand Up @@ -1673,7 +1665,6 @@ def test_DDPJobDefinition_dry_run():
assert type(ddp_job._scheduler) == type(str())

assert ddp_job.request.app_id.startswith("test")
assert ddp_job.request.working_dir.startswith("/tmp/torchx_workspace")
assert ddp_job.request.cluster_name == "unit-test-cluster"
assert ddp_job.request.requirements == "test"

Expand All @@ -1687,12 +1678,18 @@ def test_DDPJobDefinition_dry_run():
assert ddp_job._scheduler == "ray"


def test_DDPJobDefinition_dry_run_no_cluster():
def test_DDPJobDefinition_dry_run_no_cluster(mocker):
"""
Test that the dry run method returns the correct type: AppDryRunInfo,
that the attributes of the returned object are of the correct type,
and that the values from cluster and job definition are correctly passed.
"""

mocker.patch(
"openshift.get_project_name",
return_value="opendatahub",
)

ddp = test_DDPJobDefinition_creation()
ddp.image = "fake-image"
ddp_job = ddp._dry_run_no_cluster()
Expand Down Expand Up @@ -1750,12 +1747,18 @@ def test_DDPJobDefinition_dry_run_no_resource_args():
)


def test_DDPJobDefinition_dry_run_no_cluster_no_resource_args():
def test_DDPJobDefinition_dry_run_no_cluster_no_resource_args(mocker):
"""
Test that the dry run method returns the correct type: AppDryRunInfo,
that the attributes of the returned object are of the correct type,
and that the values from cluster and job definition are correctly passed.
"""

mocker.patch(
"openshift.get_project_name",
return_value="opendatahub",
)

ddp = test_DDPJobDefinition_creation()
try:
ddp._dry_run_no_cluster()
Expand Down Expand Up @@ -1806,6 +1809,10 @@ def test_DDPJobDefinition_submit(mocker):
"""
ddp_def = test_DDPJobDefinition_creation()
cluster = Cluster(test_config_creation())
mocker.patch(
"openshift.get_project_name",
return_value="opendatahub",
)
mocker.patch(
"codeflare_sdk.job.jobs.torchx_runner.schedule",
return_value="fake-dashboard-url",
Expand Down Expand Up @@ -1852,6 +1859,10 @@ def test_DDPJob_creation(mocker):
def test_DDPJob_creation_no_cluster(mocker):
ddp_def = test_DDPJobDefinition_creation()
ddp_def.image = "fake-image"
mocker.patch(
"openshift.get_project_name",
return_value="opendatahub",
)
mocker.patch(
"codeflare_sdk.job.jobs.torchx_runner.schedule",
return_value="fake-app-handle",
Expand Down Expand Up @@ -1898,6 +1909,10 @@ def arg_check_side_effect(*args):

def test_DDPJob_cancel(mocker):
ddp_job = test_DDPJob_creation_no_cluster(mocker)
mocker.patch(
"openshift.get_project_name",
return_value="opendatahub",
)
mocker.patch(
"codeflare_sdk.job.jobs.torchx_runner.cancel", side_effect=arg_check_side_effect
)
Expand All @@ -1916,3 +1931,22 @@ def parse_j(cmd):
max_worker = args[1]
gpu = args[3]
return f"{max_worker}x{gpu}"


# Make sure to keep this function and the efollowing function at the end of the file
def test_cmd_line_generation():
os.system(
f"python3 {parent}/src/codeflare_sdk/utils/generate_yaml.py --name=unit-cmd-cluster --min-cpu=1 --max-cpu=1 --min-memory=2 --max-memory=2 --gpu=1 --workers=2 --template=src/codeflare_sdk/templates/new-template.yaml"
)
assert filecmp.cmp(
"unit-cmd-cluster.yaml", f"{parent}/tests/test-case-cmd.yaml", shallow=True
)
os.remove("unit-test-cluster.yaml")
os.remove("unit-test-default-cluster.yaml")
os.remove("unit-cmd-cluster.yaml")


# Make sure to always keep this function last
def test_cleanup():
os.remove("test.yaml")
os.remove("raytest2.yaml")