diff --git a/docs/cluster-configuration.md b/docs/cluster-configuration.md
index bb058fa4e..7684db2ca 100644
--- a/docs/cluster-configuration.md
+++ b/docs/cluster-configuration.md
@@ -22,39 +22,11 @@ cluster = Cluster(ClusterConfiguration(
image="quay.io/project-codeflare/ray:latest-py39-cu118", # Mandatory Field
instascale=False, # Default False
machine_types=["m5.xlarge", "g4dn.xlarge"],
- ingress_domain="example.com" # Default None, Mandatory for Vanilla Kubernetes Clusters - ingress_domain is ignored on OpenShift Clusters as a route is created.
- local_interactive=False, # Default False
))
```
-Note: On OpenShift, the `ingress_domain` is only required when `local_interactive` is enabled. - This may change soon.
Upon creating a cluster configuration with `mcad=True` an appwrapper will be created featuring the Ray Cluster and any Routes, Ingresses or Secrets that are needed to be created along side it.
From there a user can call `cluster.up()` and `cluster.down()` to create and remove the appwrapper thus creating and removing the Ray Cluster.
In cases where `mcad=False` a yaml file will be created with the individual Ray Cluster, Route/Ingress and Secret included.
The Ray Cluster and service will be created by KubeRay directly and the other components will be individually created.
-
-## Ray Cluster Configuration in a Vanilla Kubernetes environment (Non-OpenShift)
-To create a Ray Cluster using the CodeFlare SDK in a Vanilla Kubernetes environment an `ingress_domain` must be passed in the Cluster Configuration.
-This is used for the creation of the Ray Dashboard and Client ingresses.
-
-`ingress_options` can be passed to create a custom Ray Dashboard ingress, `ingress_domain` is still a required variable for the Client route/ingress.
-An example of `ingress_options` would look like this.
-
-```
-ingress_options = {
- "ingresses": [
- {
- "ingressName": "",
- "port": ,
- "pathType": "",
- "path": "",
- "host":"",
- "annotations": {
- "foo": "bar",
- "foo": "bar",
- }
- }
- ]
-}
-```
diff --git a/src/codeflare_sdk.egg-info/SOURCES.txt b/src/codeflare_sdk.egg-info/SOURCES.txt
index d922d0dbe..42541f1d2 100644
--- a/src/codeflare_sdk.egg-info/SOURCES.txt
+++ b/src/codeflare_sdk.egg-info/SOURCES.txt
@@ -13,11 +13,9 @@ src/codeflare_sdk/cluster/cluster.py
src/codeflare_sdk/cluster/config.py
src/codeflare_sdk/cluster/model.py
src/codeflare_sdk/job/__init__.py
-src/codeflare_sdk/job/jobs.py
src/codeflare_sdk/job/ray_jobs.py
src/codeflare_sdk/utils/__init__.py
src/codeflare_sdk/utils/generate_cert.py
src/codeflare_sdk/utils/generate_yaml.py
src/codeflare_sdk/utils/kube_api_helpers.py
-src/codeflare_sdk/utils/openshift_oauth.py
src/codeflare_sdk/utils/pretty_print.py
diff --git a/src/codeflare_sdk/cluster/cluster.py b/src/codeflare_sdk/cluster/cluster.py
index 24cbf9a71..295332ae4 100644
--- a/src/codeflare_sdk/cluster/cluster.py
+++ b/src/codeflare_sdk/cluster/cluster.py
@@ -179,7 +179,6 @@ def create_app_wrapper(self):
mcad = self.config.mcad
instance_types = self.config.machine_types
env = self.config.envs
- local_interactive = self.config.local_interactive
image_pull_secrets = self.config.image_pull_secrets
dispatch_priority = self.config.dispatch_priority
write_to_file = self.config.write_to_file
@@ -203,7 +202,6 @@ def create_app_wrapper(self):
mcad=mcad,
instance_types=instance_types,
env=env,
- local_interactive=local_interactive,
image_pull_secrets=image_pull_secrets,
dispatch_priority=dispatch_priority,
priority_val=priority_val,
@@ -479,13 +477,6 @@ def from_k8_cluster_object(
verify_tls=True,
):
config_check()
- if (
- rc["metadata"]["annotations"]["sdk.codeflare.dev/local_interactive"]
- == "True"
- ):
- local_interactive = True
- else:
- local_interactive = False
machine_types = (
rc["metadata"]["labels"]["orderedinstance"].split("_")
if "orderedinstance" in rc["metadata"]["labels"]
@@ -526,7 +517,6 @@ def from_k8_cluster_object(
image=rc["spec"]["workerGroupSpecs"][0]["template"]["spec"]["containers"][
0
]["image"],
- local_interactive=local_interactive,
mcad=mcad,
write_to_file=write_to_file,
verify_tls=verify_tls,
@@ -534,11 +524,8 @@ def from_k8_cluster_object(
return Cluster(cluster_config)
def local_client_url(self):
- if self.config.local_interactive == True:
- ingress_domain = _get_ingress_domain(self)
- return f"ray://{ingress_domain}"
- else:
- return "None"
+ ingress_domain = _get_ingress_domain(self)
+ return f"ray://{ingress_domain}"
def _component_resources_up(
self, namespace: str, api_instance: client.CustomObjectsApi
@@ -678,13 +665,6 @@ def _delete_resources(
plural="rayclusters",
name=name,
)
- elif resource["kind"] == "Secret":
- name = resource["metadata"]["name"]
- secret_instance = client.CoreV1Api(api_config_handler())
- secret_instance.delete_namespaced_secret(
- namespace=namespace,
- name=name,
- )
def _create_resources(yamls, namespace: str, api_instance: client.CustomObjectsApi):
@@ -697,12 +677,6 @@ def _create_resources(yamls, namespace: str, api_instance: client.CustomObjectsA
plural="rayclusters",
body=resource,
)
- elif resource["kind"] == "Secret":
- secret_instance = client.CoreV1Api(api_config_handler())
- secret_instance.create_namespaced_secret(
- namespace=namespace,
- body=resource,
- )
def _check_aw_exists(name: str, namespace: str) -> bool:
diff --git a/src/codeflare_sdk/cluster/config.py b/src/codeflare_sdk/cluster/config.py
index f6bcac89c..e4d046f93 100644
--- a/src/codeflare_sdk/cluster/config.py
+++ b/src/codeflare_sdk/cluster/config.py
@@ -49,7 +49,6 @@ class ClusterConfiguration:
mcad: bool = False
envs: dict = field(default_factory=dict)
image: str = ""
- local_interactive: bool = False
image_pull_secrets: list = field(default_factory=list)
dispatch_priority: str = None
write_to_file: bool = False
diff --git a/src/codeflare_sdk/templates/base-template.yaml b/src/codeflare_sdk/templates/base-template.yaml
index 5f6036ac8..356e3494e 100644
--- a/src/codeflare_sdk/templates/base-template.yaml
+++ b/src/codeflare_sdk/templates/base-template.yaml
@@ -40,8 +40,6 @@ spec:
apiVersion: ray.io/v1
kind: RayCluster
metadata:
- annotations:
- sdk.codeflare.dev/local_interactive: "False"
labels:
workload.codeflare.dev/appwrapper: "aw-kuberay"
controller-tools.k8s.io: "1.0"
@@ -117,20 +115,7 @@ spec:
- "aw-kuberay"
containers:
# The Ray head pod
- - env:
- - name: MY_POD_IP
- valueFrom:
- fieldRef:
- fieldPath: status.podIP
- - name: RAY_USE_TLS
- value: "0"
- - name: RAY_TLS_SERVER_CERT
- value: /home/ray/workspace/tls/server.crt
- - name: RAY_TLS_SERVER_KEY
- value: /home/ray/workspace/tls/server.key
- - name: RAY_TLS_CA_CERT
- value: /home/ray/workspace/tls/ca.crt
- name: ray-head
+ - name: ray-head
image: quay.io/project-codeflare/ray:latest-py39-cu118
imagePullPolicy: Always
ports:
@@ -154,12 +139,6 @@ spec:
memory: "8G"
nvidia.com/gpu: 0
volumeMounts:
- - name: ca-vol
- mountPath: "/home/ray/workspace/ca"
- readOnly: true
- - name: server-cert
- mountPath: "/home/ray/workspace/tls"
- readOnly: true
- mountPath: /etc/pki/tls/certs/odh-trusted-ca-bundle.crt
name: odh-trusted-ca-cert
subPath: odh-trusted-ca-bundle.crt
@@ -172,30 +151,7 @@ spec:
- mountPath: /etc/ssl/certs/odh-ca-bundle.crt
name: odh-ca-cert
subPath: odh-ca-bundle.crt
- initContainers:
- - command:
- - sh
- - -c
- - cd /home/ray/workspace/tls && openssl req -nodes -newkey rsa:2048 -keyout server.key -out server.csr -subj '/CN=ray-head' && printf "authorityKeyIdentifier=keyid,issuer\nbasicConstraints=CA:FALSE\nsubjectAltName = @alt_names\n[alt_names]\nDNS.1 = 127.0.0.1\nDNS.2 = localhost\nDNS.3 = ${FQ_RAY_IP}\nDNS.4 = $(awk 'END{print $1}' /etc/hosts)\nDNS.5 = rayclient-deployment-name-$(cat /var/run/secrets/kubernetes.io/serviceaccount/namespace).server-name">./domain.ext && cp /home/ray/workspace/ca/* . && openssl x509 -req -CA ca.crt -CAkey ca.key -in server.csr -out server.crt -days 365 -CAcreateserial -extfile domain.ext
- image: quay.io/project-codeflare/ray:latest-py39-cu118
- name: create-cert
- # securityContext:
- # runAsUser: 1000
- # runAsGroup: 1000
- volumeMounts:
- - name: ca-vol
- mountPath: "/home/ray/workspace/ca"
- readOnly: true
- - name: server-cert
- mountPath: "/home/ray/workspace/tls"
- readOnly: false
volumes:
- - name: ca-vol
- secret:
- secretName: ca-secret-deployment-name
- optional: false
- - name: server-cert
- emptyDir: {}
- name: odh-trusted-ca-cert
configMap:
name: odh-trusted-ca-bundle
@@ -250,40 +206,9 @@ spec:
operator: In
values:
- "aw-kuberay"
- initContainers:
- # the env var $RAY_IP is set by the operator if missing, with the value of the head service name
- - name: create-cert
- image: quay.io/project-codeflare/ray:latest-py39-cu118
- command:
- - sh
- - -c
- - cd /home/ray/workspace/tls && openssl req -nodes -newkey rsa:2048 -keyout server.key -out server.csr -subj '/CN=ray-head' && printf "authorityKeyIdentifier=keyid,issuer\nbasicConstraints=CA:FALSE\nsubjectAltName = @alt_names\n[alt_names]\nDNS.1 = 127.0.0.1\nDNS.2 = localhost\nDNS.3 = ${FQ_RAY_IP}\nDNS.4 = $(awk 'END{print $1}' /etc/hosts)">./domain.ext && cp /home/ray/workspace/ca/* . && openssl x509 -req -CA ca.crt -CAkey ca.key -in server.csr -out server.crt -days 365 -CAcreateserial -extfile domain.ext
- # securityContext:
- # runAsUser: 1000
- # runAsGroup: 1000
- volumeMounts:
- - name: ca-vol
- mountPath: "/home/ray/workspace/ca"
- readOnly: true
- - name: server-cert
- mountPath: "/home/ray/workspace/tls"
- readOnly: false
containers:
- name: machine-learning # must consist of lower case alphanumeric characters or '-', and must start and end with an alphanumeric character (e.g. 'my-name', or '123-abc'
image: quay.io/project-codeflare/ray:latest-py39-cu118
- env:
- - name: MY_POD_IP
- valueFrom:
- fieldRef:
- fieldPath: status.podIP
- - name: RAY_USE_TLS
- value: "0"
- - name: RAY_TLS_SERVER_CERT
- value: /home/ray/workspace/tls/server.crt
- - name: RAY_TLS_SERVER_KEY
- value: /home/ray/workspace/tls/server.key
- - name: RAY_TLS_CA_CERT
- value: /home/ray/workspace/tls/ca.crt
# environment variables to set in the container.Optional.
# Refer to https://kubernetes.io/docs/tasks/inject-data-application/define-environment-variable-container/
lifecycle:
@@ -300,12 +225,6 @@ spec:
memory: "12G"
nvidia.com/gpu: "1"
volumeMounts:
- - name: ca-vol
- mountPath: "/home/ray/workspace/ca"
- readOnly: true
- - name: server-cert
- mountPath: "/home/ray/workspace/tls"
- readOnly: true
- mountPath: /etc/pki/tls/certs/odh-trusted-ca-bundle.crt
name: odh-trusted-ca-cert
subPath: odh-trusted-ca-bundle.crt
@@ -319,12 +238,6 @@ spec:
name: odh-ca-cert
subPath: odh-ca-bundle.crt
volumes:
- - name: ca-vol
- secret:
- secretName: ca-secret-deployment-name
- optional: false
- - name: server-cert
- emptyDir: {}
- name: odh-trusted-ca-cert
configMap:
name: odh-trusted-ca-bundle
@@ -339,15 +252,3 @@ spec:
- key: odh-ca-bundle.crt
path: odh-ca-bundle.crt
optional: true
- - replicas: 1
- generictemplate:
- apiVersion: v1
- data:
- ca.crt: generated_crt
- ca.key: generated_key
- kind: Secret
- metadata:
- name: ca-secret-deployment-name
- labels:
- # allows me to return name of service that Ray operator creates
- odh-ray-cluster-service: deployment-name-head-svc
diff --git a/src/codeflare_sdk/utils/generate_yaml.py b/src/codeflare_sdk/utils/generate_yaml.py
index 2088b9102..95c3d04f0 100755
--- a/src/codeflare_sdk/utils/generate_yaml.py
+++ b/src/codeflare_sdk/utils/generate_yaml.py
@@ -85,20 +85,6 @@ def update_names(yaml, item, appwrapper_name, cluster_name, namespace):
lower_meta["labels"]["workload.codeflare.dev/appwrapper"] = appwrapper_name
lower_meta["name"] = cluster_name
lower_meta["namespace"] = namespace
- lower_spec = item.get("generictemplate", {}).get("spec")
- if is_openshift_cluster():
- cookie_secret_env_var = {
- "name": "COOKIE_SECRET",
- "valueFrom": {
- "secretKeyRef": {
- "key": "cookie_secret",
- "name": f"{cluster_name}-oauth-config",
- }
- },
- }
- lower_spec["headGroupSpec"]["template"]["spec"]["containers"][0]["env"].append(
- cookie_secret_env_var
- )
def update_labels(yaml, instascale, instance_types):
@@ -280,106 +266,10 @@ def update_nodes(
update_resources(spec, min_cpu, max_cpu, min_memory, max_memory, gpu)
-def update_ca_secret(ca_secret_item, cluster_name, namespace):
- from . import generate_cert
-
- metadata = ca_secret_item.get("generictemplate", {}).get("metadata")
- metadata["name"] = f"ca-secret-{cluster_name}"
- metadata["namespace"] = namespace
- metadata["labels"]["odh-ray-cluster-service"] = f"{cluster_name}-head-svc"
- data = ca_secret_item.get("generictemplate", {}).get("data")
- data["ca.key"], data["ca.crt"] = generate_cert.generate_ca_cert(365)
-
-
-def enable_local_interactive(resources, cluster_name, namespace): # pragma: no cover
- from ..cluster.cluster import _get_ingress_domain
-
- ca_secret_item = resources["resources"].get("GenericItems")[1]
- item = resources["resources"].get("GenericItems")[0]
- update_ca_secret(ca_secret_item, cluster_name, namespace)
- # update_ca_secret_volumes
- item["generictemplate"]["spec"]["headGroupSpec"]["template"]["spec"]["volumes"][0][
- "secret"
- ]["secretName"] = f"ca-secret-{cluster_name}"
- item["generictemplate"]["spec"]["workerGroupSpecs"][0]["template"]["spec"][
- "volumes"
- ][0]["secret"]["secretName"] = f"ca-secret-{cluster_name}"
- # update_tls_env
- item["generictemplate"]["spec"]["headGroupSpec"]["template"]["spec"]["containers"][
- 0
- ]["env"][1]["value"] = "1"
- item["generictemplate"]["spec"]["workerGroupSpecs"][0]["template"]["spec"][
- "containers"
- ][0]["env"][1]["value"] = "1"
- # update_init_container
- command = item["generictemplate"]["spec"]["headGroupSpec"]["template"]["spec"][
- "initContainers"
- ][0].get("command")[2]
-
- command = command.replace("deployment-name", cluster_name)
-
- domain = "" ## FIX - We can't retrieve ingress domain - move init container to CFO
-
- command = command.replace("server-name", domain)
- item["generictemplate"]["metadata"]["annotations"][
- "sdk.codeflare.dev/local_interactive"
- ] = "True"
-
- item["generictemplate"]["spec"]["headGroupSpec"]["template"]["spec"][
- "initContainers"
- ][0].get("command")[2] = command
-
-
def del_from_list_by_name(l: list, target: typing.List[str]) -> list:
return [x for x in l if x["name"] not in target]
-def disable_raycluster_tls(resources):
- generic_template_spec = resources["GenericItems"][0]["generictemplate"]["spec"]
-
- headGroupTemplateSpec = generic_template_spec["headGroupSpec"]["template"]["spec"]
- headGroupTemplateSpec["volumes"] = del_from_list_by_name(
- headGroupTemplateSpec.get("volumes", []),
- ["ca-vol", "server-cert"],
- )
-
- c: dict
- for c in generic_template_spec["headGroupSpec"]["template"]["spec"]["containers"]:
- c["volumeMounts"] = del_from_list_by_name(
- c.get("volumeMounts", []), ["ca-vol", "server-cert"]
- )
-
- if "initContainers" in generic_template_spec["headGroupSpec"]["template"]["spec"]:
- del generic_template_spec["headGroupSpec"]["template"]["spec"]["initContainers"]
-
- for workerGroup in generic_template_spec.get("workerGroupSpecs", []):
- workerGroupSpec = workerGroup["template"]["spec"]
- workerGroupSpec["volumes"] = del_from_list_by_name(
- workerGroupSpec.get("volumes", []),
- ["ca-vol", "server-cert"],
- )
- for c in workerGroup["template"]["spec"].get("containers", []):
- c["volumeMounts"] = del_from_list_by_name(
- c.get("volumeMounts", []), ["ca-vol", "server-cert"]
- )
-
- del generic_template_spec["workerGroupSpecs"][0]["template"]["spec"][
- "initContainers"
- ]
-
- updated_items = []
- for i in resources["GenericItems"][:]:
- if "rayclient-deployment-ingress" in i["generictemplate"]["metadata"]["name"]:
- continue
- if "rayclient-deployment-route" in i["generictemplate"]["metadata"]["name"]:
- continue
- if "ca-secret-deployment-name" in i["generictemplate"]["metadata"]["name"]:
- continue
- updated_items.append(i)
-
- resources["GenericItems"] = updated_items
-
-
def write_user_appwrapper(user_yaml, output_file_name):
# Create the directory if it doesn't exist
directory_path = os.path.dirname(output_file_name)
@@ -392,75 +282,6 @@ def write_user_appwrapper(user_yaml, output_file_name):
print(f"Written to: {output_file_name}")
-def enable_openshift_oauth(user_yaml, cluster_name, namespace):
- config_check()
- k8_client = api_config_handler() or client.ApiClient()
- tls_mount_location = "/etc/tls/private"
- oauth_port = 8443
- oauth_sa_name = f"{cluster_name}-oauth-proxy"
- tls_secret_name = f"{cluster_name}-proxy-tls-secret"
- tls_volume_name = "proxy-tls-secret"
- port_name = "oauth-proxy"
- oauth_sidecar = _create_oauth_sidecar_object(
- namespace,
- tls_mount_location,
- oauth_port,
- oauth_sa_name,
- tls_volume_name,
- port_name,
- )
- tls_secret_volume = client.V1Volume(
- name=tls_volume_name,
- secret=client.V1SecretVolumeSource(secret_name=tls_secret_name),
- )
- # allows for setting value of Cluster object when initializing object from an existing AppWrapper on cluster
- user_yaml["metadata"]["annotations"] = user_yaml["metadata"].get("annotations", {})
- ray_headgroup_pod = user_yaml["spec"]["resources"]["GenericItems"][0][
- "generictemplate"
- ]["spec"]["headGroupSpec"]["template"]["spec"]
- ray_headgroup_pod["serviceAccount"] = oauth_sa_name
- ray_headgroup_pod["volumes"] = ray_headgroup_pod.get("volumes", [])
-
- # we use a generic api client here so that the serialization function doesn't need to be mocked for unit tests
- ray_headgroup_pod["volumes"].append(
- client.ApiClient().sanitize_for_serialization(tls_secret_volume)
- )
- ray_headgroup_pod["containers"].append(
- client.ApiClient().sanitize_for_serialization(oauth_sidecar)
- )
-
-
-def _create_oauth_sidecar_object(
- namespace: str,
- tls_mount_location: str,
- oauth_port: int,
- oauth_sa_name: str,
- tls_volume_name: str,
- port_name: str,
-) -> client.V1Container:
- return client.V1Container(
- args=[
- f"--https-address=:{oauth_port}",
- "--provider=openshift",
- f"--openshift-service-account={oauth_sa_name}",
- "--upstream=http://localhost:8265",
- f"--tls-cert={tls_mount_location}/tls.crt",
- f"--tls-key={tls_mount_location}/tls.key",
- "--cookie-secret=$(COOKIE_SECRET)",
- f'--openshift-delegate-urls={{"/":{{"resource":"pods","namespace":"{namespace}","verb":"get"}}}}',
- ],
- image="registry.redhat.io/openshift4/ose-oauth-proxy@sha256:1ea6a01bf3e63cdcf125c6064cbd4a4a270deaf0f157b3eabb78f60556840366",
- name="oauth-proxy",
- ports=[client.V1ContainerPort(container_port=oauth_port, name=port_name)],
- resources=client.V1ResourceRequirements(limits=None, requests=None),
- volume_mounts=[
- client.V1VolumeMount(
- mount_path=tls_mount_location, name=tls_volume_name, read_only=True
- )
- ],
- )
-
-
def get_default_kueue_name(namespace: str):
# If the local queue is set, use it. Otherwise, try to use the default queue.
try:
@@ -568,7 +389,6 @@ def generate_appwrapper(
mcad: bool,
instance_types: list,
env,
- local_interactive: bool,
image_pull_secrets: list,
dispatch_priority: str,
priority_val: int,
@@ -619,14 +439,6 @@ def generate_appwrapper(
head_gpus,
)
- if local_interactive:
- enable_local_interactive(resources, cluster_name, namespace)
- else:
- disable_raycluster_tls(resources["resources"])
-
- if is_openshift_cluster():
- enable_openshift_oauth(user_yaml, cluster_name, namespace)
-
directory_path = os.path.expanduser("~/.codeflare/resources/")
outfile = os.path.join(directory_path, appwrapper_name + ".yaml")
diff --git a/tests/test-case-bad.yaml b/tests/test-case-bad.yaml
index aeccf5194..6e969e01b 100644
--- a/tests/test-case-bad.yaml
+++ b/tests/test-case-bad.yaml
@@ -32,8 +32,6 @@ spec:
apiVersion: ray.io/v1
kind: RayCluster
metadata:
- annotations:
- sdk.codeflare.dev/local_interactive: 'False'
labels:
workload.codeflare.dev/appwrapper: unit-test-cluster
controller-tools.k8s.io: '1.0'
diff --git a/tests/test-case-no-mcad.yamls b/tests/test-case-no-mcad.yamls
index e13752a44..aaf9324e6 100644
--- a/tests/test-case-no-mcad.yamls
+++ b/tests/test-case-no-mcad.yamls
@@ -2,8 +2,6 @@
apiVersion: ray.io/v1
kind: RayCluster
metadata:
- annotations:
- sdk.codeflare.dev/local_interactive: 'False'
labels:
controller-tools.k8s.io: '1.0'
kueue.x-k8s.io/queue-name: local-queue-default
@@ -41,20 +39,7 @@ spec:
values:
- unit-test-cluster-ray
containers:
- - env:
- - name: MY_POD_IP
- valueFrom:
- fieldRef:
- fieldPath: status.podIP
- - name: RAY_USE_TLS
- value: '0'
- - name: RAY_TLS_SERVER_CERT
- value: /home/ray/workspace/tls/server.crt
- - name: RAY_TLS_SERVER_KEY
- value: /home/ray/workspace/tls/server.key
- - name: RAY_TLS_CA_CERT
- value: /home/ray/workspace/tls/ca.crt
- image: quay.io/project-codeflare/ray:latest-py39-cu118
+ - image: quay.io/project-codeflare/ray:latest-py39-cu118
imagePullPolicy: Always
lifecycle:
preStop:
@@ -136,20 +121,7 @@ spec:
values:
- unit-test-cluster-ray
containers:
- - env:
- - name: MY_POD_IP
- valueFrom:
- fieldRef:
- fieldPath: status.podIP
- - name: RAY_USE_TLS
- value: '0'
- - name: RAY_TLS_SERVER_CERT
- value: /home/ray/workspace/tls/server.crt
- - name: RAY_TLS_SERVER_KEY
- value: /home/ray/workspace/tls/server.key
- - name: RAY_TLS_CA_CERT
- value: /home/ray/workspace/tls/ca.crt
- image: quay.io/project-codeflare/ray:latest-py39-cu118
+ - image: quay.io/project-codeflare/ray:latest-py39-cu118
lifecycle:
preStop:
exec:
diff --git a/tests/test-case-prio.yaml b/tests/test-case-prio.yaml
index 10e161dee..a4d6e68f2 100644
--- a/tests/test-case-prio.yaml
+++ b/tests/test-case-prio.yaml
@@ -32,8 +32,6 @@ spec:
apiVersion: ray.io/v1
kind: RayCluster
metadata:
- annotations:
- sdk.codeflare.dev/local_interactive: 'False'
labels:
controller-tools.k8s.io: '1.0'
workload.codeflare.dev/appwrapper: prio-test-cluster
@@ -71,20 +69,7 @@ spec:
values:
- prio-test-cluster
containers:
- - env:
- - name: MY_POD_IP
- valueFrom:
- fieldRef:
- fieldPath: status.podIP
- - name: RAY_USE_TLS
- value: '0'
- - name: RAY_TLS_SERVER_CERT
- value: /home/ray/workspace/tls/server.crt
- - name: RAY_TLS_SERVER_KEY
- value: /home/ray/workspace/tls/server.key
- - name: RAY_TLS_CA_CERT
- value: /home/ray/workspace/tls/ca.crt
- image: quay.io/project-codeflare/ray:latest-py39-cu118
+ - image: quay.io/project-codeflare/ray:latest-py39-cu118
imagePullPolicy: Always
lifecycle:
preStop:
@@ -167,20 +152,7 @@ spec:
values:
- prio-test-cluster
containers:
- - env:
- - name: MY_POD_IP
- valueFrom:
- fieldRef:
- fieldPath: status.podIP
- - name: RAY_USE_TLS
- value: '0'
- - name: RAY_TLS_SERVER_CERT
- value: /home/ray/workspace/tls/server.crt
- - name: RAY_TLS_SERVER_KEY
- value: /home/ray/workspace/tls/server.key
- - name: RAY_TLS_CA_CERT
- value: /home/ray/workspace/tls/ca.crt
- image: quay.io/project-codeflare/ray:latest-py39-cu118
+ - image: quay.io/project-codeflare/ray:latest-py39-cu118
lifecycle:
preStop:
exec:
diff --git a/tests/test-case.yaml b/tests/test-case.yaml
index 78d2e4a54..b97d12a49 100644
--- a/tests/test-case.yaml
+++ b/tests/test-case.yaml
@@ -31,8 +31,6 @@ spec:
apiVersion: ray.io/v1
kind: RayCluster
metadata:
- annotations:
- sdk.codeflare.dev/local_interactive: 'False'
labels:
controller-tools.k8s.io: '1.0'
workload.codeflare.dev/appwrapper: unit-test-cluster
@@ -70,20 +68,7 @@ spec:
values:
- unit-test-cluster
containers:
- - env:
- - name: MY_POD_IP
- valueFrom:
- fieldRef:
- fieldPath: status.podIP
- - name: RAY_USE_TLS
- value: '0'
- - name: RAY_TLS_SERVER_CERT
- value: /home/ray/workspace/tls/server.crt
- - name: RAY_TLS_SERVER_KEY
- value: /home/ray/workspace/tls/server.key
- - name: RAY_TLS_CA_CERT
- value: /home/ray/workspace/tls/ca.crt
- image: quay.io/project-codeflare/ray:latest-py39-cu118
+ - image: quay.io/project-codeflare/ray:latest-py39-cu118
imagePullPolicy: Always
lifecycle:
preStop:
@@ -165,20 +150,7 @@ spec:
values:
- unit-test-cluster
containers:
- - env:
- - name: MY_POD_IP
- valueFrom:
- fieldRef:
- fieldPath: status.podIP
- - name: RAY_USE_TLS
- value: '0'
- - name: RAY_TLS_SERVER_CERT
- value: /home/ray/workspace/tls/server.crt
- - name: RAY_TLS_SERVER_KEY
- value: /home/ray/workspace/tls/server.key
- - name: RAY_TLS_CA_CERT
- value: /home/ray/workspace/tls/ca.crt
- image: quay.io/project-codeflare/ray:latest-py39-cu118
+ - image: quay.io/project-codeflare/ray:latest-py39-cu118
lifecycle:
preStop:
exec:
diff --git a/tests/test-default-appwrapper.yaml b/tests/test-default-appwrapper.yaml
index ecab5eac6..c390f619b 100644
--- a/tests/test-default-appwrapper.yaml
+++ b/tests/test-default-appwrapper.yaml
@@ -29,8 +29,6 @@ spec:
apiVersion: ray.io/v1
kind: RayCluster
metadata:
- annotations:
- sdk.codeflare.dev/local_interactive: 'False'
labels:
controller-tools.k8s.io: '1.0'
workload.codeflare.dev/appwrapper: unit-test-default-cluster
@@ -59,20 +57,7 @@ spec:
template:
spec:
containers:
- - env:
- - name: MY_POD_IP
- valueFrom:
- fieldRef:
- fieldPath: status.podIP
- - name: RAY_USE_TLS
- value: '0'
- - name: RAY_TLS_SERVER_CERT
- value: /home/ray/workspace/tls/server.crt
- - name: RAY_TLS_SERVER_KEY
- value: /home/ray/workspace/tls/server.key
- - name: RAY_TLS_CA_CERT
- value: /home/ray/workspace/tls/ca.crt
- image: quay.io/project-codeflare/ray:latest-py39-cu118
+ - image: quay.io/project-codeflare/ray:latest-py39-cu118
imagePullPolicy: Always
lifecycle:
preStop:
@@ -144,20 +129,7 @@ spec:
key: value
spec:
containers:
- - env:
- - name: MY_POD_IP
- valueFrom:
- fieldRef:
- fieldPath: status.podIP
- - name: RAY_USE_TLS
- value: '0'
- - name: RAY_TLS_SERVER_CERT
- value: /home/ray/workspace/tls/server.crt
- - name: RAY_TLS_SERVER_KEY
- value: /home/ray/workspace/tls/server.key
- - name: RAY_TLS_CA_CERT
- value: /home/ray/workspace/tls/ca.crt
- image: quay.io/project-codeflare/ray:latest-py39-cu118
+ - image: quay.io/project-codeflare/ray:latest-py39-cu118
lifecycle:
preStop:
exec:
diff --git a/tests/unit_test.py b/tests/unit_test.py
index 935cdd100..3892f8e57 100644
--- a/tests/unit_test.py
+++ b/tests/unit_test.py
@@ -74,7 +74,7 @@
gen_names,
is_openshift_cluster,
read_template,
- enable_local_interactive,
+ write_components,
)
import openshift
@@ -257,7 +257,6 @@ def test_config_creation():
assert config.image_pull_secrets == ["unit-test-pull-secret"]
assert config.dispatch_priority == None
assert config.mcad == True
- assert config.local_interactive == False
def test_cluster_creation(mocker):
@@ -326,6 +325,7 @@ def test_cluster_creation_no_mcad(mocker):
config.write_to_file = True
config.mcad = False
cluster = Cluster(config)
+
assert cluster.app_wrapper_yaml == f"{aw_dir}unit-test-cluster-ray.yaml"
assert cluster.app_wrapper_name == "unit-test-cluster-ray"
assert filecmp.cmp(
@@ -370,23 +370,18 @@ def test_cluster_creation_no_mcad_local_queue(mocker):
machine_types=["cpu.small", "gpu.large"],
image_pull_secrets=["unit-test-pull-secret"],
image="quay.io/project-codeflare/ray:latest-py39-cu118",
- write_to_file=False,
+ write_to_file=True,
mcad=False,
local_queue="local-queue-default",
)
cluster = Cluster(config)
- test_resources = []
- expected_resources = []
- test_aw = yaml.load_all(cluster.app_wrapper_yaml, Loader=yaml.FullLoader)
- for resource in test_aw:
- test_resources.append(resource)
- with open(
+ assert cluster.app_wrapper_yaml == f"{aw_dir}unit-test-cluster-ray.yaml"
+ assert cluster.app_wrapper_name == "unit-test-cluster-ray"
+ assert filecmp.cmp(
+ f"{aw_dir}unit-test-cluster-ray.yaml",
f"{parent}/tests/test-case-no-mcad.yamls",
- ) as f:
- default_aw = yaml.load_all(f, Loader=yaml.FullLoader)
- for resource in default_aw:
- expected_resources.append(resource)
- assert test_resources == expected_resources
+ shallow=True,
+ )
def test_cluster_creation_priority(mocker):
@@ -425,7 +420,8 @@ def test_default_cluster_creation(mocker):
mcad=True,
)
cluster = Cluster(default_config)
- test_aw = yaml.safe_load(cluster.app_wrapper_yaml)
+ test_aw = yaml.load(cluster.app_wrapper_yaml, Loader=yaml.FullLoader)
+
with open(
f"{parent}/tests/test-default-appwrapper.yaml",
) as f:
@@ -534,16 +530,12 @@ def test_cluster_up_down(mocker):
def test_cluster_up_down_no_mcad(mocker):
+ mocker.patch("kubernetes.config.load_kube_config", return_value="ignore")
+ mocker.patch("kubernetes.client.ApisApi.get_api_versions")
mocker.patch(
"kubernetes.client.CustomObjectsApi.list_namespaced_custom_object",
return_value=get_local_queue("kueue.x-k8s.io", "v1beta1", "ns", "localqueues"),
)
- mocker.patch("kubernetes.client.ApisApi.get_api_versions")
- mocker.patch("kubernetes.config.load_kube_config", return_value="ignore")
- mocker.patch(
- "kubernetes.client.CustomObjectsApi.get_cluster_custom_object",
- return_value={"spec": {"domain": "apps.cluster.awsroute.org"}},
- )
mocker.patch(
"kubernetes.client.CustomObjectsApi.create_namespaced_custom_object",
side_effect=arg_check_apply_effect,
@@ -552,6 +544,12 @@ def test_cluster_up_down_no_mcad(mocker):
"kubernetes.client.CustomObjectsApi.delete_namespaced_custom_object",
side_effect=arg_check_del_effect,
)
+ mocker.patch(
+ "kubernetes.client.CoreV1Api.create_namespaced_secret",
+ )
+ mocker.patch(
+ "kubernetes.client.CoreV1Api.delete_namespaced_secret",
+ )
mocker.patch(
"kubernetes.client.CustomObjectsApi.list_cluster_custom_object",
return_value={"items": []},
@@ -678,7 +676,6 @@ def test_local_client_url(mocker):
cluster_config = ClusterConfiguration(
name="unit-test-cluster-localinter",
namespace="ns",
- local_interactive=True,
write_to_file=True,
)
cluster = Cluster(cluster_config)
@@ -994,9 +991,6 @@ def get_ray_obj(group, version, namespace, plural, cls=None):
"metadata": {
"creationTimestamp": "2024-03-05T09:55:37Z",
"generation": 1,
- "annotations": {
- "sdk.codeflare.dev/local_interactive": "True",
- },
"labels": {
"appwrapper.mcad.ibm.com": "quicktest",
"controller-tools.k8s.io": "1.0",
@@ -1806,9 +1800,6 @@ def get_aw_obj(group, version, namespace, plural):
"apiVersion": "ray.io/v1",
"kind": "RayCluster",
"metadata": {
- "annotations": {
- "sdk.codeflare.dev/local_interactive": "False"
- },
"labels": {
"workload.codeflare.dev/appwrapper": "quicktest1",
"controller-tools.k8s.io": "1.0",
@@ -2136,9 +2127,6 @@ def get_aw_obj(group, version, namespace, plural):
"apiVersion": "ray.io/v1",
"kind": "RayCluster",
"metadata": {
- "annotations": {
- "sdk.codeflare.dev/local_interactive": "False"
- },
"labels": {
"workload.codeflare.dev/appwrapper": "quicktest2",
"controller-tools.k8s.io": "1.0",
@@ -2450,7 +2438,6 @@ def custom_side_effect(group, version, namespace, plural, **kwargs):
assert cluster_config.min_cpus == 1 and cluster_config.max_cpus == 1
assert cluster_config.min_memory == 2 and cluster_config.max_memory == 2
assert cluster_config.num_gpus == 0
- assert cluster_config.local_interactive == True
assert (
cluster_config.image
== "ghcr.io/foundation-model-stack/base:ray2.1.0-py38-gpu-pytorch1.12.0cu116-20221213-193103"
@@ -2484,7 +2471,6 @@ def test_get_cluster(mocker):
assert cluster_config.min_memory == 2 and cluster_config.max_memory == 2
assert cluster_config.num_gpus == 0
assert cluster_config.instascale
- assert cluster_config.local_interactive
assert (
cluster_config.image
== "ghcr.io/foundation-model-stack/base:ray2.1.0-py38-gpu-pytorch1.12.0cu116-20221213-193103"
@@ -3014,151 +3000,6 @@ def test_export_env():
)
-# def test_enable_local_interactive(mocker):
-# template = f"{parent}/src/codeflare_sdk/templates/base-template.yaml"
-# user_yaml = read_template(template)
-# aw_spec = user_yaml.get("spec", None)
-# cluster_name = "test-enable-local"
-# namespace = "default"
-# ingress_domain = "mytest.domain"
-# mocker.patch("kubernetes.client.ApisApi.get_api_versions")
-# mocker.patch(
-# "codeflare_sdk.utils.generate_yaml.is_openshift_cluster", return_value=False
-# )
-# volume_mounts = [
-# {"name": "ca-vol", "mountPath": "/home/ray/workspace/ca", "readOnly": True},
-# {
-# "name": "server-cert",
-# "mountPath": "/home/ray/workspace/tls",
-# "readOnly": False,
-# },
-# ]
-# volumes = [
-# {
-# "name": "ca-vol",
-# "secret": {"secretName": "ca-secret-test-enable-local"},
-# "optional": False,
-# },
-# {"name": "server-cert", "emptyDir": {}},
-# {
-# "name": "odh-trusted-ca-cert",
-# "configMap": {
-# "name": "odh-trusted-ca-bundle",
-# "items": [
-# {"key": "ca-bundle.crt", "path": "odh-trusted-ca-bundle.crt"}
-# ],
-# "optional": True,
-# },
-# },
-# {
-# "name": "odh-ca-cert",
-# "configMap": {
-# "name": "odh-trusted-ca-bundle",
-# "items": [{"key": "odh-ca-bundle.crt", "path": "odh-ca-bundle.crt"}],
-# "optional": True,
-# },
-# },
-# ]
-# tls_env = [
-# {"name": "RAY_USE_TLS", "value": "1"},
-# {"name": "RAY_TLS_SERVER_CERT", "value": "/home/ray/workspace/tls/server.crt"},
-# {"name": "RAY_TLS_SERVER_KEY", "value": "/home/ray/workspace/tls/server.key"},
-# {"name": "RAY_TLS_CA_CERT", "value": "/home/ray/workspace/tls/ca.crt"},
-# ]
-# assert aw_spec != None
-# enable_local_interactive(aw_spec, cluster_name, namespace, ingress_domain)
-# head_group_spec = aw_spec["resources"]["GenericItems"][0]["generictemplate"][
-# "spec"
-# ]["headGroupSpec"]
-# worker_group_spec = aw_spec["resources"]["GenericItems"][0]["generictemplate"][
-# "spec"
-# ]["workerGroupSpecs"]
-# ca_secret = aw_spec["resources"]["GenericItems"][1]["generictemplate"]
-# # At a minimal, make sure the following items are presented in the appwrapper spec.resources.
-# # 1. headgroup has the initContainers command to generated TLS cert from the mounted CA cert.
-# # Note: In this particular command, the DNS.5 in [alt_name] must match the exposed local_client_url: rayclient-{cluster_name}.{namespace}.{ingress_domain}
-# assert (
-# head_group_spec["template"]["spec"]["initContainers"][0]["command"][2]
-# == f"cd /home/ray/workspace/tls && openssl req -nodes -newkey rsa:2048 -keyout server.key -out server.csr -subj '/CN=ray-head' && printf \"authorityKeyIdentifier=keyid,issuer\\nbasicConstraints=CA:FALSE\\nsubjectAltName = @alt_names\\n[alt_names]\\nDNS.1 = 127.0.0.1\\nDNS.2 = localhost\\nDNS.3 = ${{FQ_RAY_IP}}\\nDNS.4 = $(awk 'END{{print $1}}' /etc/hosts)\\nDNS.5 = rayclient-{cluster_name}-$(cat /var/run/secrets/kubernetes.io/serviceaccount/namespace).{ingress_domain}\">./domain.ext && cp /home/ray/workspace/ca/* . && openssl x509 -req -CA ca.crt -CAkey ca.key -in server.csr -out server.crt -days 365 -CAcreateserial -extfile domain.ext"
-# )
-# assert (
-# head_group_spec["template"]["spec"]["initContainers"][0]["volumeMounts"]
-# == volume_mounts
-# )
-# assert head_group_spec["template"]["spec"]["volumes"] == volumes
-
-# # 2. workerGroupSpec has the initContainers command to generated TLS cert from the mounted CA cert.
-# assert (
-# worker_group_spec[0]["template"]["spec"]["initContainers"][0]["command"][2]
-# == "cd /home/ray/workspace/tls && openssl req -nodes -newkey rsa:2048 -keyout server.key -out server.csr -subj '/CN=ray-head' && printf \"authorityKeyIdentifier=keyid,issuer\\nbasicConstraints=CA:FALSE\\nsubjectAltName = @alt_names\\n[alt_names]\\nDNS.1 = 127.0.0.1\\nDNS.2 = localhost\\nDNS.3 = ${FQ_RAY_IP}\\nDNS.4 = $(awk 'END{print $1}' /etc/hosts)\">./domain.ext && cp /home/ray/workspace/ca/* . && openssl x509 -req -CA ca.crt -CAkey ca.key -in server.csr -out server.crt -days 365 -CAcreateserial -extfile domain.ext"
-# )
-# assert (
-# worker_group_spec[0]["template"]["spec"]["initContainers"][0]["volumeMounts"]
-# == volume_mounts
-# )
-# assert worker_group_spec[0]["template"]["spec"]["volumes"] == volumes
-
-# # 3. Required Envs to enable TLS encryption between head and workers
-# for i in range(len(tls_env)):
-# assert (
-# head_group_spec["template"]["spec"]["containers"][0]["env"][i + 1]["name"]
-# == tls_env[i]["name"]
-# )
-# assert (
-# head_group_spec["template"]["spec"]["containers"][0]["env"][i + 1]["value"]
-# == tls_env[i]["value"]
-# )
-# assert (
-# worker_group_spec[0]["template"]["spec"]["containers"][0]["env"][i + 1][
-# "name"
-# ]
-# == tls_env[i]["name"]
-# )
-# assert (
-# worker_group_spec[0]["template"]["spec"]["containers"][0]["env"][i + 1][
-# "value"
-# ]
-# == tls_env[i]["value"]
-# )
-
-# # 4. Secret with ca.crt and ca.key
-# assert ca_secret["kind"] == "Secret"
-# assert ca_secret["data"]["ca.crt"] != None
-# assert ca_secret["data"]["ca.key"] != None
-# assert ca_secret["metadata"]["name"] == f"ca-secret-{cluster_name}"
-# assert ca_secret["metadata"]["namespace"] == namespace
-
-
-def test_gen_app_wrapper_with_oauth(mocker: MockerFixture):
- mocker.patch("kubernetes.client.ApisApi.get_api_versions")
- mocker.patch(
- "codeflare_sdk.cluster.cluster.get_current_namespace",
- return_value="opendatahub",
- )
- mocker.patch(
- "codeflare_sdk.utils.generate_yaml.is_openshift_cluster", return_value=True
- )
- write_user_appwrapper = MagicMock()
- mocker.patch(
- "codeflare_sdk.utils.generate_yaml.write_user_appwrapper", write_user_appwrapper
- )
- Cluster(
- ClusterConfiguration(
- "test_cluster",
- image="quay.io/project-codeflare/ray:latest-py39-cu118",
- write_to_file=True,
- mcad=True,
- )
- )
- user_yaml = write_user_appwrapper.call_args.args[0]
- assert any(
- container["name"] == "oauth-proxy"
- for container in user_yaml["spec"]["resources"]["GenericItems"][0][
- "generictemplate"
- ]["spec"]["headGroupSpec"]["template"]["spec"]["containers"]
- )
-
-
"""
Ray Jobs tests
"""