Skip to content

Add GPU to ClusterConfiguration #527

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
May 31, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion demo-notebooks/additional-demos/hf_interactive.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -93,12 +93,13 @@
"# The SDK will try to find the name of your default local queue based on the annotation \"kueue.x-k8s.io/default-queue\": \"true\" unless you specify the local queue manually below\n",
"cluster = Cluster(ClusterConfiguration(name='hfgputest', \n",
" namespace=\"default\", # Update to your namespace\n",
" head_gpus=1, # For GPU enabled workloads set the head_gpus and num_gpus\n",
" num_gpus=1,\n",
" num_workers=1,\n",
" min_cpus=8, \n",
" max_cpus=8, \n",
" min_memory=16, \n",
" max_memory=16, \n",
" num_gpus=4,\n",
" image=\"quay.io/project-codeflare/ray:latest-py39-cu118\",\n",
" write_to_file=False, # When enabled Ray Cluster yaml files are written to /HOME/.codeflare/resources \n",
" # local_queue=\"local-queue-name\" # Specify the local queue manually\n",
Expand Down
3 changes: 2 additions & 1 deletion demo-notebooks/additional-demos/local_interactive.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -55,12 +55,13 @@
"\n",
"cluster = Cluster(ClusterConfiguration(namespace=namespace,\n",
" name=cluster_name,\n",
" head_gpus=0, # For GPU enabled workloads set the head_gpus and num_gpus\n",
" num_gpus=0,\n",
" num_workers=1,\n",
" min_cpus=1,\n",
" max_cpus=1,\n",
" min_memory=4,\n",
" max_memory=4,\n",
" num_gpus=0,\n",
" image=\"quay.io/project-codeflare/ray:latest-py39-cu118\",\n",
" write_to_file=False, # When enabled Ray Cluster yaml files are written to /HOME/.codeflare/resources \n",
" # local_queue=\"local-queue-name\" # Specify the local queue manually\n",
Expand Down
3 changes: 2 additions & 1 deletion demo-notebooks/additional-demos/ray_job_client.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -46,12 +46,13 @@
"cluster = Cluster(ClusterConfiguration(\n",
" name='jobtest',\n",
" namespace='default', # Update to your namespace\n",
" head_gpus=0, # For GPU enabled workloads set the head_gpus and num_gpus\n",
" num_gpus=0,\n",
" num_workers=2,\n",
" min_cpus=1,\n",
" max_cpus=1,\n",
" min_memory=4,\n",
" max_memory=4,\n",
" num_gpus=0,\n",
" image=\"quay.io/project-codeflare/ray:latest-py39-cu118\",\n",
" write_to_file=False # When enabled Ray Cluster yaml files are written to /HOME/.codeflare/resources \n",
"))"
Expand Down
5 changes: 3 additions & 2 deletions demo-notebooks/guided-demos/0_basic_ray.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -61,14 +61,15 @@
"# Create and configure our cluster object\n",
"# The SDK will try to find the name of your default local queue based on the annotation \"kueue.x-k8s.io/default-queue\": \"true\" unless you specify the local queue manually below\n",
"cluster = Cluster(ClusterConfiguration(\n",
" name='raytest',\n",
" name='raytest', \n",
" namespace='default', # Update to your namespace\n",
" head_gpus=0, # For GPU enabled workloads set the head_gpus and num_gpus\n",
" num_gpus=0,\n",
" num_workers=2,\n",
" min_cpus=1,\n",
" max_cpus=1,\n",
" min_memory=4,\n",
" max_memory=4,\n",
" num_gpus=0,\n",
" image=\"quay.io/project-codeflare/ray:latest-py39-cu118\",\n",
" write_to_file=False, # When enabled Ray Cluster yaml files are written to /HOME/.codeflare/resources \n",
" # local_queue=\"local-queue-name\" # Specify the local queue manually\n",
Expand Down
4 changes: 2 additions & 2 deletions demo-notebooks/guided-demos/1_cluster_job_client.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -45,13 +45,13 @@
"cluster = Cluster(ClusterConfiguration(\n",
" name='jobtest',\n",
" namespace='default', # Update to your namespace\n",
" head_gpus=1, # For GPU enabled workloads set the head_gpus and num_gpus\n",
" num_gpus=1,\n",
" num_workers=2,\n",
" min_cpus=1,\n",
" max_cpus=1,\n",
" min_memory=4,\n",
" max_memory=4,\n",
" num_gpus=1,\n",
" head_gpus=1,\n",
" image=\"quay.io/project-codeflare/ray:latest-py39-cu118\",\n",
" write_to_file=False, # When enabled Ray Cluster yaml files are written to /HOME/.codeflare/resources \n",
" # local_queue=\"local-queue-name\" # Specify the local queue manually\n",
Expand Down
3 changes: 2 additions & 1 deletion demo-notebooks/guided-demos/2_basic_interactive.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -62,12 +62,13 @@
"cluster = Cluster(ClusterConfiguration(\n",
" name=cluster_name,\n",
" namespace=namespace,\n",
" head_gpus=1, # For GPU enabled workloads set the head_gpus and num_gpus\n",
" num_gpus=1,\n",
" num_workers=2,\n",
" min_cpus=2,\n",
" max_cpus=2,\n",
" min_memory=8,\n",
" max_memory=8,\n",
" num_gpus=1,\n",
" image=\"quay.io/project-codeflare/ray:latest-py39-cu118\",\n",
" write_to_file=False, # When enabled Ray Cluster yaml files are written to /HOME/.codeflare/resources \n",
" # local_queue=\"local-queue-name\" # Specify the local queue manually\n",
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -71,12 +71,13 @@
"cluster = Cluster(ClusterConfiguration(\n",
" name='raytest',\n",
" namespace='default', # Update to your namespace\n",
" head_gpus=0, # For GPU enabled workloads set the head_gpus and num_gpus\n",
" num_gpus=0,\n",
" num_workers=2,\n",
" min_cpus=1,\n",
" max_cpus=1,\n",
" min_memory=4,\n",
" max_memory=4,\n",
" num_gpus=0,\n",
" image=\"quay.io/project-codeflare/ray:latest-py39-cu118\",\n",
" write_to_file=False, # When enabled Ray Cluster yaml files are written to /HOME/.codeflare/resources \n",
" # local_queue=\"local-queue-name\" # Specify the local queue manually\n",
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -45,13 +45,13 @@
"cluster = Cluster(ClusterConfiguration(\n",
" name='jobtest',\n",
" namespace='default', # Update to your namespace\n",
" head_gpus=1, # For GPU enabled workloads set the head_gpus and num_gpus\n",
" num_gpus=1,\n",
" num_workers=2,\n",
" min_cpus=1,\n",
" max_cpus=1,\n",
" min_memory=4,\n",
" max_memory=4,\n",
" num_gpus=1,\n",
" head_gpus=1,\n",
" image=\"quay.io/project-codeflare/ray:latest-py39-cu118\",\n",
" write_to_file=False, # When enabled Ray Cluster yaml files are written to /HOME/.codeflare/resources \n",
" # local_queue=\"local-queue-name\" # Specify the local queue manually\n",
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -70,12 +70,13 @@
"cluster = Cluster(ClusterConfiguration(\n",
" name=cluster_name,\n",
" namespace=namespace,\n",
" head_gpus=1, # For GPU enabled workloads set the head_gpus and num_gpus\n",
" num_gpus=1,\n",
" num_workers=2,\n",
" min_cpus=2,\n",
" max_cpus=2,\n",
" min_memory=8,\n",
" max_memory=8,\n",
" num_gpus=1,\n",
" image=\"quay.io/project-codeflare/ray:latest-py39-cu118\",\n",
" write_to_file=False, # When enabled Ray Cluster yaml files are written to /HOME/.codeflare/resources \n",
" # local_queue=\"local-queue-name\" # Specify the local queue manually\n",
Expand Down
3 changes: 2 additions & 1 deletion demo-notebooks/guided-demos/preview_nbs/0_basic_ray.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -63,12 +63,13 @@
"cluster = Cluster(ClusterConfiguration(\n",
" name='raytest',\n",
" namespace='default', # Update to your namespace\n",
" head_gpus=0, # For GPU enabled workloads set the head_gpus and num_gpus\n",
" num_gpus=0,\n",
" num_workers=2,\n",
" min_cpus=1,\n",
" max_cpus=1,\n",
" min_memory=4,\n",
" max_memory=4,\n",
" num_gpus=0,\n",
" image=\"quay.io/project-codeflare/ray:latest-py39-cu118\",\n",
" write_to_file=False, # When enabled Ray Cluster yaml files are written to /HOME/.codeflare/resources \n",
" # local_queue=\"local-queue-name\" # Specify the local queue manually\n",
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -45,13 +45,13 @@
"cluster = Cluster(ClusterConfiguration(\n",
" name='jobtest',\n",
" namespace='default', # Update to your namespace\n",
" head_gpus=1, # For GPU enabled workloads set the head_gpus and num_gpus\n",
" num_gpus=1,\n",
" num_workers=2,\n",
" min_cpus=1,\n",
" max_cpus=1,\n",
" min_memory=4,\n",
" max_memory=4,\n",
" num_gpus=1,\n",
" head_gpus=1,\n",
" image=\"quay.io/project-codeflare/ray:latest-py39-cu118\",\n",
" write_to_file=False, # When enabled Ray Cluster yaml files are written to /HOME/.codeflare/resources\n",
" # local_queue=\"local-queue-name\" # Specify the local queue manually\n",
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -62,12 +62,13 @@
"cluster = Cluster(ClusterConfiguration(\n",
" name=cluster_name,\n",
" namespace=namespace,\n",
" head_gpus=1, # For GPU enabled workloads set the head_gpus and num_gpus\n",
" num_gpus=1,\n",
" num_workers=2,\n",
" min_cpus=2,\n",
" max_cpus=2,\n",
" min_memory=8,\n",
" max_memory=8,\n",
" num_gpus=1,\n",
" image=\"quay.io/project-codeflare/ray:latest-py39-cu118\",\n",
" write_to_file=False, # When enabled Ray Cluster yaml files are written to /HOME/.codeflare/resources \n",
" # local_queue=\"local-queue-name\" # Specify the local queue manually\n",
Expand Down
2 changes: 1 addition & 1 deletion docs/cluster-configuration.md
Original file line number Diff line number Diff line change
Expand Up @@ -12,12 +12,12 @@ cluster = Cluster(ClusterConfiguration(
head_cpus=1, # Default 2
head_memory=1, # Default 8
head_gpus=0, # Default 0
num_gpus=0, # Default 0
num_workers=1, # Default 1
min_cpus=1, # Default 1
max_cpus=1, # Default 1
min_memory=2, # Default 2
max_memory=2, # Default 2
num_gpus=0, # Default 0
mcad=True, # Default True
image="quay.io/project-codeflare/ray:latest-py39-cu118", # Mandatory Field
machine_types=["m5.xlarge", "g4dn.xlarge"],
Expand Down