Skip to content

Added Custom Volume/Volume Mount Support #554

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 2 commits into from
Jun 18, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions src/codeflare_sdk/cluster/cluster.py
Original file line number Diff line number Diff line change
Expand Up @@ -152,6 +152,8 @@ def create_app_wrapper(self):
write_to_file = self.config.write_to_file
local_queue = self.config.local_queue
labels = self.config.labels
volumes = self.config.volumes
volume_mounts = self.config.volume_mounts
return generate_appwrapper(
name=name,
namespace=namespace,
Expand All @@ -172,6 +174,8 @@ def create_app_wrapper(self):
write_to_file=write_to_file,
local_queue=local_queue,
labels=labels,
volumes=volumes,
volume_mounts=volume_mounts,
)

# creates a new cluster with the provided or default spec
Expand Down
2 changes: 2 additions & 0 deletions src/codeflare_sdk/cluster/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,8 @@ class ClusterConfiguration:
write_to_file: bool = False
verify_tls: bool = True
labels: dict = field(default_factory=dict)
volumes: list = field(default_factory=list)
volume_mounts: list = field(default_factory=list)

def __post_init__(self):
if not self.verify_tls:
Expand Down
22 changes: 22 additions & 0 deletions src/codeflare_sdk/utils/generate_yaml.py
Original file line number Diff line number Diff line change
Expand Up @@ -96,6 +96,20 @@ def update_image_pull_secrets(spec, image_pull_secrets):
]


def update_volume_mounts(spec, volume_mounts: list):
containers = spec.get("containers")
for volume_mount in volume_mounts:
for container in containers:
volumeMount = client.ApiClient().sanitize_for_serialization(volume_mount)
container["volumeMounts"].append(volumeMount)


def update_volumes(spec, volumes: list):
for volume in volumes:
new_volume = client.ApiClient().sanitize_for_serialization(volume)
spec["volumes"].append(new_volume)


def update_env(spec, env):
containers = spec.get("containers")
for container in containers:
Expand Down Expand Up @@ -136,6 +150,8 @@ def update_nodes(
head_cpus,
head_memory,
head_gpus,
volumes,
volume_mounts,
):
head = cluster_yaml.get("spec").get("headGroupSpec")
head["rayStartParams"]["num-gpus"] = str(int(head_gpus))
Expand All @@ -150,6 +166,8 @@ def update_nodes(

for comp in [head, worker]:
spec = comp.get("template").get("spec")
update_volume_mounts(spec, volume_mounts)
update_volumes(spec, volumes)
update_image_pull_secrets(spec, image_pull_secrets)
update_image(spec, image)
update_env(spec, env)
Expand Down Expand Up @@ -280,6 +298,8 @@ def generate_appwrapper(
write_to_file: bool,
local_queue: Optional[str],
labels,
volumes: list[client.V1Volume],
volume_mounts: list[client.V1VolumeMount],
):
cluster_yaml = read_template(template)
appwrapper_name, cluster_name = gen_names(name)
Expand All @@ -299,6 +319,8 @@ def generate_appwrapper(
head_cpus,
head_memory,
head_gpus,
volumes,
volume_mounts,
)
augment_labels(cluster_yaml, labels)
notebook_annotations(cluster_yaml)
Expand Down
186 changes: 186 additions & 0 deletions tests/unit-test-volume-cluster.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,186 @@
apiVersion: ray.io/v1
kind: RayCluster
metadata:
labels:
controller-tools.k8s.io: '1.0'
kueue.x-k8s.io/queue-name: local-queue-default
name: unit-test-volume-cluster
namespace: opendatahub
spec:
autoscalerOptions:
idleTimeoutSeconds: 60
imagePullPolicy: Always
resources:
limits:
cpu: 500m
memory: 512Mi
requests:
cpu: 500m
memory: 512Mi
upscalingMode: Default
enableInTreeAutoscaling: false
headGroupSpec:
enableIngress: false
rayStartParams:
block: 'true'
dashboard-host: 0.0.0.0
num-gpus: '0'
serviceType: ClusterIP
template:
spec:
containers:
- image: quay.io/project-codeflare/ray:latest-py39-cu118
imagePullPolicy: Always
lifecycle:
preStop:
exec:
command:
- /bin/sh
- -c
- ray stop
name: ray-head
ports:
- containerPort: 6379
name: gcs
- containerPort: 8265
name: dashboard
- containerPort: 10001
name: client
resources:
limits:
cpu: 2
memory: 8G
nvidia.com/gpu: 0
requests:
cpu: 2
memory: 8G
nvidia.com/gpu: 0
volumeMounts:
- mountPath: /etc/pki/tls/certs/odh-trusted-ca-bundle.crt
name: odh-trusted-ca-cert
subPath: odh-trusted-ca-bundle.crt
- mountPath: /etc/ssl/certs/odh-trusted-ca-bundle.crt
name: odh-trusted-ca-cert
subPath: odh-trusted-ca-bundle.crt
- mountPath: /etc/pki/tls/certs/odh-ca-bundle.crt
name: odh-ca-cert
subPath: odh-ca-bundle.crt
- mountPath: /etc/ssl/certs/odh-ca-bundle.crt
name: odh-ca-cert
subPath: odh-ca-bundle.crt
- mountPath: /home/ray/test1
name: test
- mountPath: /home/ray/test2
name: test2
- mountPath: /home/ray/test2
name: test3
imagePullSecrets: []
volumes:
- configMap:
items:
- key: ca-bundle.crt
path: odh-trusted-ca-bundle.crt
name: odh-trusted-ca-bundle
optional: true
name: odh-trusted-ca-cert
- configMap:
items:
- key: odh-ca-bundle.crt
path: odh-ca-bundle.crt
name: odh-trusted-ca-bundle
optional: true
name: odh-ca-cert
- emptyDir:
sizeLimit: 500Gi
name: test
- configMap:
items:
- key: test
path: /home/ray/test2/data.txt
name: config-map-test
name: test2
- name: test3
secret:
secretName: test-secret
rayVersion: 2.7.0
workerGroupSpecs:
- groupName: small-group-unit-test-volume-cluster
maxReplicas: 1
minReplicas: 1
rayStartParams:
block: 'true'
num-gpus: '0'
replicas: 1
template:
metadata:
annotations:
key: value
labels:
key: value
spec:
containers:
- image: quay.io/project-codeflare/ray:latest-py39-cu118
lifecycle:
preStop:
exec:
command:
- /bin/sh
- -c
- ray stop
name: machine-learning
resources:
limits:
cpu: 1
memory: 2G
nvidia.com/gpu: 0
requests:
cpu: 1
memory: 2G
nvidia.com/gpu: 0
volumeMounts:
- mountPath: /etc/pki/tls/certs/odh-trusted-ca-bundle.crt
name: odh-trusted-ca-cert
subPath: odh-trusted-ca-bundle.crt
- mountPath: /etc/ssl/certs/odh-trusted-ca-bundle.crt
name: odh-trusted-ca-cert
subPath: odh-trusted-ca-bundle.crt
- mountPath: /etc/pki/tls/certs/odh-ca-bundle.crt
name: odh-ca-cert
subPath: odh-ca-bundle.crt
- mountPath: /etc/ssl/certs/odh-ca-bundle.crt
name: odh-ca-cert
subPath: odh-ca-bundle.crt
- mountPath: /home/ray/test1
name: test
- mountPath: /home/ray/test2
name: test2
- mountPath: /home/ray/test2
name: test3
imagePullSecrets: []
volumes:
- configMap:
items:
- key: ca-bundle.crt
path: odh-trusted-ca-bundle.crt
name: odh-trusted-ca-bundle
optional: true
name: odh-trusted-ca-cert
- configMap:
items:
- key: odh-ca-bundle.crt
path: odh-ca-bundle.crt
name: odh-trusted-ca-bundle
optional: true
name: odh-ca-cert
- emptyDir:
sizeLimit: 500Gi
name: test
- configMap:
items:
- key: test
path: /home/ray/test2/data.txt
name: config-map-test
name: test2
- name: test3
secret:
secretName: test-secret
62 changes: 62 additions & 0 deletions tests/unit_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -444,6 +444,68 @@ def test_default_cluster_creation(mocker):
assert cluster.config.namespace == "opendatahub"


def test_cluster_with_custom_volumes(mocker):
mocker.patch("kubernetes.client.ApisApi.get_api_versions")
mocker.patch(
"codeflare_sdk.cluster.cluster.get_current_namespace",
return_value="opendatahub",
)
mocker.patch(
"kubernetes.client.CustomObjectsApi.list_namespaced_custom_object",
return_value=get_local_queue("kueue.x-k8s.io", "v1beta1", "ns", "localqueues"),
)

from kubernetes.client import (
V1Volume,
V1VolumeMount,
V1EmptyDirVolumeSource,
V1ConfigMapVolumeSource,
V1KeyToPath,
V1SecretVolumeSource,
)

volume_mounts = [
V1VolumeMount(mount_path="/home/ray/test1", name="test"),
V1VolumeMount(
mount_path="/home/ray/test2",
name="test2",
),
V1VolumeMount(
mount_path="/home/ray/test2",
name="test3",
),
]

volumes = [
V1Volume(
name="test",
empty_dir=V1EmptyDirVolumeSource(size_limit="500Gi"),
),
V1Volume(
name="test2",
config_map=V1ConfigMapVolumeSource(
name="config-map-test",
items=[V1KeyToPath(key="test", path="/home/ray/test2/data.txt")],
),
),
V1Volume(name="test3", secret=V1SecretVolumeSource(secret_name="test-secret")),
]

test_config = ClusterConfiguration(
name="unit-test-volume-cluster",
image="quay.io/project-codeflare/ray:latest-py39-cu118",
volume_mounts=volume_mounts,
volumes=volumes,
)
cluster = Cluster(test_config)
test_rc = yaml.load(cluster.app_wrapper_yaml, Loader=yaml.FullLoader)
with open(
f"{parent}/tests/unit-test-volume-cluster.yaml",
) as f:
volume_rc = yaml.load(f, Loader=yaml.FullLoader)
assert test_rc == volume_rc


def test_gen_names_with_name(mocker):
mocker.patch.object(
uuid, "uuid4", return_value=uuid.UUID("00000000-0000-0000-0000-000000000001")
Expand Down