Skip to content

Update unit tests for RHOAI ray image #598

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Jul 22, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
156 changes: 156 additions & 0 deletions tests/test-case-custom-image.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,156 @@
apiVersion: ray.io/v1
kind: RayCluster
metadata:
annotations:
app.kubernetes.io/managed-by: test-prefix
labels:
controller-tools.k8s.io: '1.0'
kueue.x-k8s.io/queue-name: local-queue-default
testlabel: test
testlabel2: test
name: unit-test-cluster-custom-image
namespace: ns
spec:
autoscalerOptions:
idleTimeoutSeconds: 60
imagePullPolicy: Always
resources:
limits:
cpu: 500m
memory: 512Mi
requests:
cpu: 500m
memory: 512Mi
upscalingMode: Default
enableInTreeAutoscaling: false
headGroupSpec:
enableIngress: false
rayStartParams:
block: 'true'
dashboard-host: 0.0.0.0
num-gpus: '0'
resources: '"{}"'
serviceType: ClusterIP
template:
spec:
containers:
- image: quay.io/project-codeflare/ray:2.20.0-py39-cu118
imagePullPolicy: Always
lifecycle:
preStop:
exec:
command:
- /bin/sh
- -c
- ray stop
name: ray-head
ports:
- containerPort: 6379
name: gcs
- containerPort: 8265
name: dashboard
- containerPort: 10001
name: client
resources:
limits:
cpu: 2
memory: 8G
requests:
cpu: 2
memory: 8G
volumeMounts:
- mountPath: /etc/pki/tls/certs/odh-trusted-ca-bundle.crt
name: odh-trusted-ca-cert
subPath: odh-trusted-ca-bundle.crt
- mountPath: /etc/ssl/certs/odh-trusted-ca-bundle.crt
name: odh-trusted-ca-cert
subPath: odh-trusted-ca-bundle.crt
- mountPath: /etc/pki/tls/certs/odh-ca-bundle.crt
name: odh-ca-cert
subPath: odh-ca-bundle.crt
- mountPath: /etc/ssl/certs/odh-ca-bundle.crt
name: odh-ca-cert
subPath: odh-ca-bundle.crt
imagePullSecrets:
- name: unit-test-pull-secret
volumes:
- configMap:
items:
- key: ca-bundle.crt
path: odh-trusted-ca-bundle.crt
name: odh-trusted-ca-bundle
optional: true
name: odh-trusted-ca-cert
- configMap:
items:
- key: odh-ca-bundle.crt
path: odh-ca-bundle.crt
name: odh-trusted-ca-bundle
optional: true
name: odh-ca-cert
rayVersion: 2.23.0
workerGroupSpecs:
- groupName: small-group-unit-test-cluster-custom-image
maxReplicas: 2
minReplicas: 2
rayStartParams:
block: 'true'
num-gpus: '7'
resources: '"{}"'
replicas: 2
template:
metadata:
annotations:
key: value
labels:
key: value
spec:
containers:
- image: quay.io/project-codeflare/ray:2.20.0-py39-cu118
lifecycle:
preStop:
exec:
command:
- /bin/sh
- -c
- ray stop
name: machine-learning
resources:
limits:
cpu: 4
memory: 6G
nvidia.com/gpu: 7
requests:
cpu: 3
memory: 5G
nvidia.com/gpu: 7
volumeMounts:
- mountPath: /etc/pki/tls/certs/odh-trusted-ca-bundle.crt
name: odh-trusted-ca-cert
subPath: odh-trusted-ca-bundle.crt
- mountPath: /etc/ssl/certs/odh-trusted-ca-bundle.crt
name: odh-trusted-ca-cert
subPath: odh-trusted-ca-bundle.crt
- mountPath: /etc/pki/tls/certs/odh-ca-bundle.crt
name: odh-ca-cert
subPath: odh-ca-bundle.crt
- mountPath: /etc/ssl/certs/odh-ca-bundle.crt
name: odh-ca-cert
subPath: odh-ca-bundle.crt
imagePullSecrets:
- name: unit-test-pull-secret
volumes:
- configMap:
items:
- key: ca-bundle.crt
path: odh-trusted-ca-bundle.crt
name: odh-trusted-ca-bundle
optional: true
name: odh-trusted-ca-cert
- configMap:
items:
- key: odh-ca-bundle.crt
path: odh-ca-bundle.crt
name: odh-trusted-ca-bundle
optional: true
name: odh-ca-cert
63 changes: 55 additions & 8 deletions tests/unit_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -262,7 +262,6 @@ def test_config_creation():
assert config.worker_cpu_requests == 3 and config.worker_cpu_limits == 4
assert config.worker_memory_requests == "5G" and config.worker_memory_limits == "6G"
assert config.worker_extended_resource_requests == {"nvidia.com/gpu": 7}
assert config.image == "quay.io/rhoai/ray:2.23.0-py39-cu121"
assert config.template == f"{parent}/src/codeflare_sdk/templates/base-template.yaml"
assert config.machine_types == ["cpu.small", "gpu.large"]
assert config.image_pull_secrets == ["unit-test-pull-secret"]
Expand Down Expand Up @@ -415,7 +414,6 @@ def test_cluster_creation_no_mcad_local_queue(mocker):
worker_extended_resource_requests={"nvidia.com/gpu": 7},
machine_types=["cpu.small", "gpu.large"],
image_pull_secrets=["unit-test-pull-secret"],
image="quay.io/rhoai/ray:2.23.0-py39-cu121",
write_to_file=True,
appwrapper=False,
local_queue="local-queue-default",
Expand Down Expand Up @@ -443,7 +441,6 @@ def test_default_cluster_creation(mocker):
)
default_config = ClusterConfiguration(
name="unit-test-default-cluster",
image="quay.io/rhoai/ray:2.23.0-py39-cu121",
appwrapper=True,
)
cluster = Cluster(default_config)
Expand All @@ -459,6 +456,61 @@ def test_default_cluster_creation(mocker):
assert cluster.config.namespace == "opendatahub"


def test_cluster_creation_with_custom_image(mocker):
# With written resources
# Create Ray Cluster with local queue specified
mocker.patch("kubernetes.client.ApisApi.get_api_versions")
mocker.patch(
"kubernetes.client.CustomObjectsApi.get_cluster_custom_object",
return_value={"spec": {"domain": "apps.cluster.awsroute.org"}},
)
mocker.patch(
"kubernetes.client.CustomObjectsApi.list_namespaced_custom_object",
return_value=get_local_queue("kueue.x-k8s.io", "v1beta1", "ns", "localqueues"),
)
mocker.patch("os.environ.get", return_value="test-prefix")
config = createClusterConfig()
config.name = "unit-test-cluster-custom-image"
config.appwrapper = False
config.image = "quay.io/project-codeflare/ray:2.20.0-py39-cu118"
config.local_queue = "local-queue-default"
config.labels = {"testlabel": "test", "testlabel2": "test"}
cluster = Cluster(config)
assert cluster.app_wrapper_yaml == f"{aw_dir}unit-test-cluster-custom-image.yaml"
assert cluster.app_wrapper_name == "unit-test-cluster-custom-image"
assert filecmp.cmp(
f"{aw_dir}unit-test-cluster-custom-image.yaml",
f"{parent}/tests/test-case-custom-image.yaml",
shallow=True,
)
# With resources loaded in memory
config = ClusterConfiguration(
name="unit-test-cluster-custom-image",
namespace="ns",
num_workers=2,
worker_cpu_requests=3,
worker_cpu_limits=4,
worker_memory_requests=5,
worker_memory_limits=6,
worker_extended_resource_requests={"nvidia.com/gpu": 7},
machine_types=["cpu.small", "gpu.large"],
image_pull_secrets=["unit-test-pull-secret"],
image="quay.io/project-codeflare/ray:2.20.0-py39-cu118",
write_to_file=True,
appwrapper=False,
local_queue="local-queue-default",
labels={"testlabel": "test", "testlabel2": "test"},
)
cluster = Cluster(config)
assert cluster.app_wrapper_yaml == f"{aw_dir}unit-test-cluster-custom-image.yaml"
assert cluster.app_wrapper_name == "unit-test-cluster-custom-image"
assert filecmp.cmp(
f"{aw_dir}unit-test-cluster-custom-image.yaml",
f"{parent}/tests/test-case-custom-image.yaml",
shallow=True,
)


def test_gen_names_with_name(mocker):
mocker.patch.object(
uuid, "uuid4", return_value=uuid.UUID("00000000-0000-0000-0000-000000000001")
Expand Down Expand Up @@ -792,7 +844,6 @@ def test_ray_job_wrapping(mocker):
return_value=get_local_queue("kueue.x-k8s.io", "v1beta1", "ns", "localqueues"),
)
cluster = cluster = createClusterWithConfig(mocker)
cluster.config.image = "quay.io/rhoai/ray:2.23.0-py39-cu121"
mocker.patch(
"ray.job_submission.JobSubmissionClient._check_connection_and_version_with_url",
return_value="None",
Expand Down Expand Up @@ -910,7 +961,6 @@ def test_ray_details(mocker, capsys):
ClusterConfiguration(
name="raytest2",
namespace="ns",
image="quay.io/rhoai/ray:2.23.0-py39-cu121",
write_to_file=True,
appwrapper=True,
local_queue="local_default_queue",
Expand Down Expand Up @@ -2313,7 +2363,6 @@ def test_cluster_status(mocker):
ClusterConfiguration(
name="test",
namespace="ns",
image="quay.io/rhoai/ray:2.23.0-py39-cu121",
write_to_file=True,
appwrapper=True,
local_queue="local_default_queue",
Expand Down Expand Up @@ -2408,7 +2457,6 @@ def test_wait_ready(mocker, capsys):
ClusterConfiguration(
name="test",
namespace="ns",
image="quay.io/rhoai/ray:2.23.0-py39-cu121",
write_to_file=True,
appwrapper=True,
local_queue="local-queue-default",
Expand Down Expand Up @@ -2635,7 +2683,6 @@ def throw_if_getting_raycluster(group, version, namespace, plural):
cluster = Cluster(
ClusterConfiguration(
"test_cluster",
image="quay.io/rhoai/ray:2.23.0-py39-cu121",
write_to_file=False,
)
)
Expand Down
1 change: 0 additions & 1 deletion tests/unit_test_support.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,6 @@ def createClusterConfig():
appwrapper=True,
machine_types=["cpu.small", "gpu.large"],
image_pull_secrets=["unit-test-pull-secret"],
image="quay.io/rhoai/ray:2.23.0-py39-cu121",
write_to_file=True,
)
return config
Expand Down