diff --git a/src/codeflare_sdk/cluster/cluster.py b/src/codeflare_sdk/cluster/cluster.py index e5bbcd86a..015f15eda 100644 --- a/src/codeflare_sdk/cluster/cluster.py +++ b/src/codeflare_sdk/cluster/cluster.py @@ -152,6 +152,8 @@ def create_app_wrapper(self): write_to_file = self.config.write_to_file local_queue = self.config.local_queue labels = self.config.labels + volumes = self.config.volumes + volume_mounts = self.config.volume_mounts return generate_appwrapper( name=name, namespace=namespace, @@ -172,6 +174,8 @@ def create_app_wrapper(self): write_to_file=write_to_file, local_queue=local_queue, labels=labels, + volumes=volumes, + volume_mounts=volume_mounts, ) # creates a new cluster with the provided or default spec diff --git a/src/codeflare_sdk/cluster/config.py b/src/codeflare_sdk/cluster/config.py index 9e069c376..970673652 100644 --- a/src/codeflare_sdk/cluster/config.py +++ b/src/codeflare_sdk/cluster/config.py @@ -53,6 +53,8 @@ class ClusterConfiguration: write_to_file: bool = False verify_tls: bool = True labels: dict = field(default_factory=dict) + volumes: list = field(default_factory=list) + volume_mounts: list = field(default_factory=list) def __post_init__(self): if not self.verify_tls: diff --git a/src/codeflare_sdk/utils/generate_yaml.py b/src/codeflare_sdk/utils/generate_yaml.py index 30edcd913..3192ae1bc 100755 --- a/src/codeflare_sdk/utils/generate_yaml.py +++ b/src/codeflare_sdk/utils/generate_yaml.py @@ -96,6 +96,20 @@ def update_image_pull_secrets(spec, image_pull_secrets): ] +def update_volume_mounts(spec, volume_mounts: list): + containers = spec.get("containers") + for volume_mount in volume_mounts: + for container in containers: + volumeMount = client.ApiClient().sanitize_for_serialization(volume_mount) + container["volumeMounts"].append(volumeMount) + + +def update_volumes(spec, volumes: list): + for volume in volumes: + new_volume = client.ApiClient().sanitize_for_serialization(volume) + spec["volumes"].append(new_volume) + + def update_env(spec, env): containers = spec.get("containers") for container in containers: @@ -136,6 +150,8 @@ def update_nodes( head_cpus, head_memory, head_gpus, + volumes, + volume_mounts, ): head = cluster_yaml.get("spec").get("headGroupSpec") head["rayStartParams"]["num-gpus"] = str(int(head_gpus)) @@ -150,6 +166,8 @@ def update_nodes( for comp in [head, worker]: spec = comp.get("template").get("spec") + update_volume_mounts(spec, volume_mounts) + update_volumes(spec, volumes) update_image_pull_secrets(spec, image_pull_secrets) update_image(spec, image) update_env(spec, env) @@ -280,6 +298,8 @@ def generate_appwrapper( write_to_file: bool, local_queue: Optional[str], labels, + volumes: list[client.V1Volume], + volume_mounts: list[client.V1VolumeMount], ): cluster_yaml = read_template(template) appwrapper_name, cluster_name = gen_names(name) @@ -299,6 +319,8 @@ def generate_appwrapper( head_cpus, head_memory, head_gpus, + volumes, + volume_mounts, ) augment_labels(cluster_yaml, labels) notebook_annotations(cluster_yaml) diff --git a/tests/unit-test-volume-cluster.yaml b/tests/unit-test-volume-cluster.yaml new file mode 100644 index 000000000..542f11833 --- /dev/null +++ b/tests/unit-test-volume-cluster.yaml @@ -0,0 +1,186 @@ +apiVersion: ray.io/v1 +kind: RayCluster +metadata: + labels: + controller-tools.k8s.io: '1.0' + kueue.x-k8s.io/queue-name: local-queue-default + name: unit-test-volume-cluster + namespace: opendatahub +spec: + autoscalerOptions: + idleTimeoutSeconds: 60 + imagePullPolicy: Always + resources: + limits: + cpu: 500m + memory: 512Mi + requests: + cpu: 500m + memory: 512Mi + upscalingMode: Default + enableInTreeAutoscaling: false + headGroupSpec: + enableIngress: false + rayStartParams: + block: 'true' + dashboard-host: 0.0.0.0 + num-gpus: '0' + serviceType: ClusterIP + template: + spec: + containers: + - image: quay.io/project-codeflare/ray:latest-py39-cu118 + imagePullPolicy: Always + lifecycle: + preStop: + exec: + command: + - /bin/sh + - -c + - ray stop + name: ray-head + ports: + - containerPort: 6379 + name: gcs + - containerPort: 8265 + name: dashboard + - containerPort: 10001 + name: client + resources: + limits: + cpu: 2 + memory: 8G + nvidia.com/gpu: 0 + requests: + cpu: 2 + memory: 8G + nvidia.com/gpu: 0 + volumeMounts: + - mountPath: /etc/pki/tls/certs/odh-trusted-ca-bundle.crt + name: odh-trusted-ca-cert + subPath: odh-trusted-ca-bundle.crt + - mountPath: /etc/ssl/certs/odh-trusted-ca-bundle.crt + name: odh-trusted-ca-cert + subPath: odh-trusted-ca-bundle.crt + - mountPath: /etc/pki/tls/certs/odh-ca-bundle.crt + name: odh-ca-cert + subPath: odh-ca-bundle.crt + - mountPath: /etc/ssl/certs/odh-ca-bundle.crt + name: odh-ca-cert + subPath: odh-ca-bundle.crt + - mountPath: /home/ray/test1 + name: test + - mountPath: /home/ray/test2 + name: test2 + - mountPath: /home/ray/test2 + name: test3 + imagePullSecrets: [] + volumes: + - configMap: + items: + - key: ca-bundle.crt + path: odh-trusted-ca-bundle.crt + name: odh-trusted-ca-bundle + optional: true + name: odh-trusted-ca-cert + - configMap: + items: + - key: odh-ca-bundle.crt + path: odh-ca-bundle.crt + name: odh-trusted-ca-bundle + optional: true + name: odh-ca-cert + - emptyDir: + sizeLimit: 500Gi + name: test + - configMap: + items: + - key: test + path: /home/ray/test2/data.txt + name: config-map-test + name: test2 + - name: test3 + secret: + secretName: test-secret + rayVersion: 2.7.0 + workerGroupSpecs: + - groupName: small-group-unit-test-volume-cluster + maxReplicas: 1 + minReplicas: 1 + rayStartParams: + block: 'true' + num-gpus: '0' + replicas: 1 + template: + metadata: + annotations: + key: value + labels: + key: value + spec: + containers: + - image: quay.io/project-codeflare/ray:latest-py39-cu118 + lifecycle: + preStop: + exec: + command: + - /bin/sh + - -c + - ray stop + name: machine-learning + resources: + limits: + cpu: 1 + memory: 2G + nvidia.com/gpu: 0 + requests: + cpu: 1 + memory: 2G + nvidia.com/gpu: 0 + volumeMounts: + - mountPath: /etc/pki/tls/certs/odh-trusted-ca-bundle.crt + name: odh-trusted-ca-cert + subPath: odh-trusted-ca-bundle.crt + - mountPath: /etc/ssl/certs/odh-trusted-ca-bundle.crt + name: odh-trusted-ca-cert + subPath: odh-trusted-ca-bundle.crt + - mountPath: /etc/pki/tls/certs/odh-ca-bundle.crt + name: odh-ca-cert + subPath: odh-ca-bundle.crt + - mountPath: /etc/ssl/certs/odh-ca-bundle.crt + name: odh-ca-cert + subPath: odh-ca-bundle.crt + - mountPath: /home/ray/test1 + name: test + - mountPath: /home/ray/test2 + name: test2 + - mountPath: /home/ray/test2 + name: test3 + imagePullSecrets: [] + volumes: + - configMap: + items: + - key: ca-bundle.crt + path: odh-trusted-ca-bundle.crt + name: odh-trusted-ca-bundle + optional: true + name: odh-trusted-ca-cert + - configMap: + items: + - key: odh-ca-bundle.crt + path: odh-ca-bundle.crt + name: odh-trusted-ca-bundle + optional: true + name: odh-ca-cert + - emptyDir: + sizeLimit: 500Gi + name: test + - configMap: + items: + - key: test + path: /home/ray/test2/data.txt + name: config-map-test + name: test2 + - name: test3 + secret: + secretName: test-secret diff --git a/tests/unit_test.py b/tests/unit_test.py index bdd5ffadd..892d8fe27 100644 --- a/tests/unit_test.py +++ b/tests/unit_test.py @@ -444,6 +444,68 @@ def test_default_cluster_creation(mocker): assert cluster.config.namespace == "opendatahub" +def test_cluster_with_custom_volumes(mocker): + mocker.patch("kubernetes.client.ApisApi.get_api_versions") + mocker.patch( + "codeflare_sdk.cluster.cluster.get_current_namespace", + return_value="opendatahub", + ) + mocker.patch( + "kubernetes.client.CustomObjectsApi.list_namespaced_custom_object", + return_value=get_local_queue("kueue.x-k8s.io", "v1beta1", "ns", "localqueues"), + ) + + from kubernetes.client import ( + V1Volume, + V1VolumeMount, + V1EmptyDirVolumeSource, + V1ConfigMapVolumeSource, + V1KeyToPath, + V1SecretVolumeSource, + ) + + volume_mounts = [ + V1VolumeMount(mount_path="/home/ray/test1", name="test"), + V1VolumeMount( + mount_path="/home/ray/test2", + name="test2", + ), + V1VolumeMount( + mount_path="/home/ray/test2", + name="test3", + ), + ] + + volumes = [ + V1Volume( + name="test", + empty_dir=V1EmptyDirVolumeSource(size_limit="500Gi"), + ), + V1Volume( + name="test2", + config_map=V1ConfigMapVolumeSource( + name="config-map-test", + items=[V1KeyToPath(key="test", path="/home/ray/test2/data.txt")], + ), + ), + V1Volume(name="test3", secret=V1SecretVolumeSource(secret_name="test-secret")), + ] + + test_config = ClusterConfiguration( + name="unit-test-volume-cluster", + image="quay.io/project-codeflare/ray:latest-py39-cu118", + volume_mounts=volume_mounts, + volumes=volumes, + ) + cluster = Cluster(test_config) + test_rc = yaml.load(cluster.app_wrapper_yaml, Loader=yaml.FullLoader) + with open( + f"{parent}/tests/unit-test-volume-cluster.yaml", + ) as f: + volume_rc = yaml.load(f, Loader=yaml.FullLoader) + assert test_rc == volume_rc + + def test_gen_names_with_name(mocker): mocker.patch.object( uuid, "uuid4", return_value=uuid.UUID("00000000-0000-0000-0000-000000000001")