diff --git a/.gitignore b/.gitignore index 846fc76e6..d6b60ccde 100644 --- a/.gitignore +++ b/.gitignore @@ -2,3 +2,5 @@ dist/ .python-version __pycache__/ .coverage +Pipfile +Pipfile.lock diff --git a/README.md b/README.md index 289892346..ac933263e 100644 --- a/README.md +++ b/README.md @@ -15,26 +15,31 @@ Can be installed via `pip`: `pip install codeflare-sdk` ## Development -For testing, make sure to have installed: - - `pytest`, `pytest-mock` (can both be installed with `pip`) - - The remaining dependencies located in `requirements.txt` - - To run the unit tests, run `pytest -v tests/unit_test.py`) - - Any new test functions/scripts can be added into the `tests` folder - -NOTE: Functional tests coming soon, will live in `tests/func_test.py` - -For checking code coverage while testing: - - Start by installing `coverage` (can be done via `pip`) - - Now instead when testing run `coverage run -m --source=src pytest tests/unit_test.py` - - To then view a code coverage report w/ missing lines, run `coverage report -m` - -For formatting: - - Currently using black v22.3.0 for format checking - - To install, run `pip install black==22.3.0` - - To check file formatting, in top-level dir run `black --check .` - - To auto-reformat all files, remove the `--check` flag - - To reformat an individual file, run `black ` - -To build the python package: - - If poetry is not installed: `pip install poetry` - - `poetry build` +### Prerequisites + +We recommend using Python 3.8 for development. +Install development specific dependencies: + `$ pip install poetry pytest pytest-mock coverage black==22.3.0` + +Additional dependencies can be found in `requirements.txt`: `$ pip install -r requirements.txt` + +### Testing + +- To run the unit tests, run `pytest -v tests/unit_test.py` +- Any new test functions/scripts can be added into the `tests` folder +- NOTE: Functional tests coming soon, will live in `tests/func_test.py` + +#### Code Coverage + +- Run tests with the following command: `coverage run -m --source=src pytest tests/unit_test.py` +- To then view a code coverage report w/ missing lines, run `coverage report -m` + +### Code Formatting + +- To check file formatting, in top-level dir run `black --check .` +- To auto-reformat all files, remove the `--check` flag +- To reformat an individual file, run `black ` + +### Package Build + +To build the python package: `$ poetry build` diff --git a/src/codeflare_sdk/cluster/cluster.py b/src/codeflare_sdk/cluster/cluster.py index b727e0cbc..bc30cb3e6 100644 --- a/src/codeflare_sdk/cluster/cluster.py +++ b/src/codeflare_sdk/cluster/cluster.py @@ -63,6 +63,14 @@ def create_app_wrapper(self): Called upon cluster object creation, creates an AppWrapper yaml based on the specifications of the ClusterConfiguration. """ + + if self.config.namespace is None: + self.config.namespace = oc.get_project_name() + if type(self.config.namespace) is not str: + raise TypeError( + f"Namespace {self.config.namespace} is of type {type(self.config.namespace)}. Check your Kubernetes Authentication." + ) + name = self.config.name namespace = self.config.namespace min_cpu = self.config.min_cpus @@ -285,26 +293,6 @@ def torchx_config( return to_return -def get_current_namespace() -> str: - """ - Returns the user's current working namespace. - """ - try: - namespace = oc.invoke("project", ["-q"]).actions()[0].out.strip() - except oc.OpenShiftPythonException as osp: # pragma: no cover - error_msg = osp.result.err() - if ( - "do not have rights" in error_msg - or "Missing or incomplete configuration" in error_msg - ): - raise PermissionError( - "Action not permitted, have you run auth.login() or cluster.up()?" - ) - else: - raise osp - return namespace - - def list_all_clusters(namespace: str, print_to_console: bool = True): """ Returns (and prints by default) a list of all clusters in a given namespace. diff --git a/src/codeflare_sdk/cluster/config.py b/src/codeflare_sdk/cluster/config.py index c3b4eb2e1..5d74639d9 100644 --- a/src/codeflare_sdk/cluster/config.py +++ b/src/codeflare_sdk/cluster/config.py @@ -21,6 +21,7 @@ from dataclasses import dataclass, field from .auth import Authentication import pathlib +import openshift dir = pathlib.Path(__file__).parent.parent.resolve() @@ -33,7 +34,7 @@ class ClusterConfiguration: """ name: str - namespace: str = "default" + namespace: str = None head_info: list = field(default_factory=list) machine_types: list = field(default_factory=list) # ["m4.xlarge", "g4dn.xlarge"] min_cpus: int = 1 diff --git a/src/codeflare_sdk/job/jobs.py b/src/codeflare_sdk/job/jobs.py index 74135980c..d7581538d 100644 --- a/src/codeflare_sdk/job/jobs.py +++ b/src/codeflare_sdk/job/jobs.py @@ -17,6 +17,7 @@ from typing import TYPE_CHECKING, Optional, Dict, List from pathlib import Path +import openshift as oc from torchx.components.dist import ddp from torchx.runner import get_runner from torchx.specs import AppHandle, parse_app_handle, AppDryRunInfo @@ -114,6 +115,9 @@ def _missing_spec(self, spec: str): raise ValueError(f"Job definition missing arg: {spec}") def _dry_run_no_cluster(self): + if self.scheduler_args is not None: + if self.scheduler_args.get("namespace") is None: + self.scheduler_args["namespace"] = oc.get_project_name() return torchx_runner.dryrun( app=ddp( *self.script_args, @@ -144,7 +148,7 @@ def _dry_run_no_cluster(self): else self._missing_spec("image"), ), scheduler="kubernetes_mcad", - cfg=self.scheduler_args if self.scheduler_args is not None else None, + cfg=self.scheduler_args, workspace="", ) diff --git a/tests/unit_test.py b/tests/unit_test.py index 9338434a2..8f87b6de7 100644 --- a/tests/unit_test.py +++ b/tests/unit_test.py @@ -24,7 +24,6 @@ from codeflare_sdk.cluster.cluster import ( Cluster, ClusterConfiguration, - get_current_namespace, list_all_clusters, list_all_queued, _copy_to_ray, @@ -240,6 +239,23 @@ def test_cluster_creation(): return cluster +def test_default_cluster_creation(mocker): + mocker.patch( + "openshift.get_project_name", + return_value="opendatahub", + ) + default_config = ClusterConfiguration( + name="unit-test-default-cluster", + ) + cluster = Cluster(default_config) + + assert cluster.app_wrapper_yaml == "unit-test-default-cluster.yaml" + assert cluster.app_wrapper_name == "unit-test-default-cluster" + assert cluster.config.namespace == "opendatahub" + + return cluster + + def arg_check_apply_effect(*args): assert args[0] == "apply" assert args[1] == ["-f", "unit-test-cluster.yaml"] @@ -496,14 +512,6 @@ def act_side_effect_list(self): return [self] -def test_get_namespace(mocker): - mocker.patch("openshift.invoke", side_effect=arg_side_effect) - mock_res = mocker.patch.object(openshift.Result, "actions") - mock_res.side_effect = lambda: act_side_effect_list(fake_res) - vars = get_current_namespace() - assert vars == "('project', ['-q'])" - - def get_selector(*args): selector = Selector({"operation": "selector", "status": 0, "actions": []}) return selector @@ -1593,22 +1601,6 @@ def test_wait_ready(mocker, capsys): ) -def test_cmd_line_generation(): - os.system( - f"python3 {parent}/src/codeflare_sdk/utils/generate_yaml.py --name=unit-cmd-cluster --min-cpu=1 --max-cpu=1 --min-memory=2 --max-memory=2 --gpu=1 --workers=2 --template=src/codeflare_sdk/templates/new-template.yaml" - ) - assert filecmp.cmp( - "unit-cmd-cluster.yaml", f"{parent}/tests/test-case-cmd.yaml", shallow=True - ) - os.remove("unit-test-cluster.yaml") - os.remove("unit-cmd-cluster.yaml") - - -def test_cleanup(): - os.remove("test.yaml") - os.remove("raytest2.yaml") - - def test_jobdefinition_coverage(): abstract = JobDefinition() cluster = Cluster(test_config_creation()) @@ -1673,7 +1665,6 @@ def test_DDPJobDefinition_dry_run(): assert type(ddp_job._scheduler) == type(str()) assert ddp_job.request.app_id.startswith("test") - assert ddp_job.request.working_dir.startswith("/tmp/torchx_workspace") assert ddp_job.request.cluster_name == "unit-test-cluster" assert ddp_job.request.requirements == "test" @@ -1687,12 +1678,18 @@ def test_DDPJobDefinition_dry_run(): assert ddp_job._scheduler == "ray" -def test_DDPJobDefinition_dry_run_no_cluster(): +def test_DDPJobDefinition_dry_run_no_cluster(mocker): """ Test that the dry run method returns the correct type: AppDryRunInfo, that the attributes of the returned object are of the correct type, and that the values from cluster and job definition are correctly passed. """ + + mocker.patch( + "openshift.get_project_name", + return_value="opendatahub", + ) + ddp = test_DDPJobDefinition_creation() ddp.image = "fake-image" ddp_job = ddp._dry_run_no_cluster() @@ -1750,12 +1747,18 @@ def test_DDPJobDefinition_dry_run_no_resource_args(): ) -def test_DDPJobDefinition_dry_run_no_cluster_no_resource_args(): +def test_DDPJobDefinition_dry_run_no_cluster_no_resource_args(mocker): """ Test that the dry run method returns the correct type: AppDryRunInfo, that the attributes of the returned object are of the correct type, and that the values from cluster and job definition are correctly passed. """ + + mocker.patch( + "openshift.get_project_name", + return_value="opendatahub", + ) + ddp = test_DDPJobDefinition_creation() try: ddp._dry_run_no_cluster() @@ -1806,6 +1809,10 @@ def test_DDPJobDefinition_submit(mocker): """ ddp_def = test_DDPJobDefinition_creation() cluster = Cluster(test_config_creation()) + mocker.patch( + "openshift.get_project_name", + return_value="opendatahub", + ) mocker.patch( "codeflare_sdk.job.jobs.torchx_runner.schedule", return_value="fake-dashboard-url", @@ -1852,6 +1859,10 @@ def test_DDPJob_creation(mocker): def test_DDPJob_creation_no_cluster(mocker): ddp_def = test_DDPJobDefinition_creation() ddp_def.image = "fake-image" + mocker.patch( + "openshift.get_project_name", + return_value="opendatahub", + ) mocker.patch( "codeflare_sdk.job.jobs.torchx_runner.schedule", return_value="fake-app-handle", @@ -1898,6 +1909,10 @@ def arg_check_side_effect(*args): def test_DDPJob_cancel(mocker): ddp_job = test_DDPJob_creation_no_cluster(mocker) + mocker.patch( + "openshift.get_project_name", + return_value="opendatahub", + ) mocker.patch( "codeflare_sdk.job.jobs.torchx_runner.cancel", side_effect=arg_check_side_effect ) @@ -1916,3 +1931,22 @@ def parse_j(cmd): max_worker = args[1] gpu = args[3] return f"{max_worker}x{gpu}" + + +# Make sure to keep this function and the efollowing function at the end of the file +def test_cmd_line_generation(): + os.system( + f"python3 {parent}/src/codeflare_sdk/utils/generate_yaml.py --name=unit-cmd-cluster --min-cpu=1 --max-cpu=1 --min-memory=2 --max-memory=2 --gpu=1 --workers=2 --template=src/codeflare_sdk/templates/new-template.yaml" + ) + assert filecmp.cmp( + "unit-cmd-cluster.yaml", f"{parent}/tests/test-case-cmd.yaml", shallow=True + ) + os.remove("unit-test-cluster.yaml") + os.remove("unit-test-default-cluster.yaml") + os.remove("unit-cmd-cluster.yaml") + + +# Make sure to always keep this function last +def test_cleanup(): + os.remove("test.yaml") + os.remove("raytest2.yaml")