1
- name : e2e
1
+ name : Guided notebooks tests
2
2
3
3
on :
4
4
pull_request :
68
68
python-version : ' 3.9'
69
69
cache : ' pip' # caching pip dependencies
70
70
71
+ - name : Configure AWS Credentials
72
+ uses : aws-actions/configure-aws-credentials@v4
73
+ with :
74
+ aws-region : us-east-1
75
+ aws-access-key-id : ${{ secrets.AWS_ACCESS_KEY_ID }}
76
+ aws-secret-access-key : ${{ secrets.AWS_SECRET_ACCESS_KEY }}
77
+
71
78
- name : Setup NVidia GPU environment for KinD
72
79
uses : ./common/github-actions/nvidia-gpu-setup
73
80
76
83
77
84
- name : Install NVidia GPU operator for KinD
78
85
uses : ./common/github-actions/nvidia-gpu-operator
86
+ with :
87
+ enable-time-slicing : ' true'
79
88
80
89
- name : Deploy CodeFlare stack
81
90
id : deploy
@@ -113,46 +122,107 @@ jobs:
113
122
kubectl create clusterrolebinding sdk-user-list-secrets --clusterrole=list-secrets --user=sdk-user
114
123
kubectl config use-context sdk-user
115
124
116
- - name : Run e2e tests
125
+ - name : Create AWS S3 bucket for tests
126
+ id : s3-bucket
117
127
run : |
118
- export CODEFLARE_TEST_OUTPUT_DIR=${{ env.TEMP_DIR }}
119
- echo "CODEFLARE_TEST_OUTPUT_DIR=${CODEFLARE_TEST_OUTPUT_DIR}" >> $GITHUB_ENV
128
+ aws s3 mb s3://rhoai-dw-sdk-test
120
129
121
- set -euo pipefail
122
- pip install poetry
130
+ - name : Setup Guided notebooks execution
131
+ run : |
132
+ echo "Installing papermill and dependencies..."
133
+ pip install poetry papermill ipython ipykernel
134
+ # Disable virtualenv due to problems using packaged in virtualenv in papermill
135
+ poetry config virtualenvs.create false
136
+
137
+ echo "Installing SDK..."
123
138
poetry install --with test,docs
124
- echo "Running e2e tests..."
125
- poetry run pytest -v -s ./tests/e2e -m 'kind and nvidia_gpu' > ${CODEFLARE_TEST_OUTPUT_DIR}/pytest_output.log 2>&1
139
+
140
+ - name : Run 0_basic_ray.ipynb
141
+ run : |
142
+ set -euo pipefail
143
+
144
+ # Remove login/logout cells, as KinD doesn't support authentication using token
145
+ jq -r 'del(.cells[] | select(.source[] | contains("Create authentication object for user permissions")))' 0_basic_ray.ipynb > 0_basic_ray.ipynb.tmp && mv 0_basic_ray.ipynb.tmp 0_basic_ray.ipynb
146
+ jq -r 'del(.cells[] | select(.source[] | contains("auth.logout()")))' 0_basic_ray.ipynb > 0_basic_ray.ipynb.tmp && mv 0_basic_ray.ipynb.tmp 0_basic_ray.ipynb
147
+ # Run notebook
148
+ # poetry run papermill 0_basic_ray.ipynb 0_basic_ray_out.ipynb --log-output --execution-timeout 600
149
+ working-directory : demo-notebooks/guided-demos
150
+
151
+ - name : Run 1_cluster_job_client.ipynb
152
+ run : |
153
+ set -euo pipefail
154
+
155
+ # Remove login/logout cells, as KinD doesn't support authentication using token
156
+ jq -r 'del(.cells[] | select(.source[] | contains("Create authentication object for user permissions")))' 1_cluster_job_client.ipynb > 1_cluster_job_client.ipynb.tmp && mv 1_cluster_job_client.ipynb.tmp 1_cluster_job_client.ipynb
157
+ jq -r 'del(.cells[] | select(.source[] | contains("auth.logout()")))' 1_cluster_job_client.ipynb > 1_cluster_job_client.ipynb.tmp && mv 1_cluster_job_client.ipynb.tmp 1_cluster_job_client.ipynb
158
+ # Replace async logs with waiting for job to finish, async logs don't work properly in papermill
159
+ JOB_WAIT=$(jq -r '.' ${GITHUB_WORKSPACE}/.github/resources/wait_for_job_cell.json)
160
+ jq --argjson job_wait "$JOB_WAIT" -r '(.cells[] | select(.source[] | contains("async for lines in client.tail_job_logs"))) |= $job_wait' 1_cluster_job_client.ipynb > 1_cluster_job_client.ipynb.tmp && mv 1_cluster_job_client.ipynb.tmp 1_cluster_job_client.ipynb
161
+ # Run notebook
162
+ # poetry run papermill 1_cluster_job_client.ipynb 1_cluster_job_client_out.ipynb --log-output --execution-timeout 1200
163
+ working-directory : demo-notebooks/guided-demos
164
+
165
+ - name : Run 2_basic_interactive.ipynb
166
+ run : |
167
+ set -euo pipefail
168
+
169
+ # Remove login/logout cells, as KinD doesn't support authentication using token
170
+ jq -r 'del(.cells[] | select(.source[] | contains("Create authentication object for user permissions")))' 2_basic_interactive.ipynb > 2_basic_interactive.ipynb.tmp && mv 2_basic_interactive.ipynb.tmp 2_basic_interactive.ipynb
171
+ jq -r 'del(.cells[] | select(.source[] | contains("auth.logout()")))' 2_basic_interactive.ipynb > 2_basic_interactive.ipynb.tmp && mv 2_basic_interactive.ipynb.tmp 2_basic_interactive.ipynb
172
+ # Rewrite cluster_uri() to local_client_url() to retrieve client URL available out of cluster, as the test is executed outside of cluster
173
+ sed -i "s/cluster_uri()/local_client_url()/" 2_basic_interactive.ipynb
174
+ # Set explicit namespace as SDK need it (currently) to resolve local queues
175
+ sed -i "s/head_cpus=1,/head_cpus=1, namespace='default',/" 2_basic_interactive.ipynb
176
+ # Set env variables for AWS S3 bucket
177
+ sed -i "s/\\\\\"pip\\\\\"/\\\\\"env_vars\\\\\": {\\\\\"AWS_ACCESS_KEY_ID\\\\\": \\\\\"${{ secrets.AWS_ACCESS_KEY_ID }}\\\\\", \\\\\"AWS_SECRET_ACCESS_KEY\\\\\": \\\\\"${{ secrets.AWS_SECRET_ACCESS_KEY }}\\\\\", \\\\\"AWS_DEFAULT_REGION\\\\\": \\\\\"us-east-1\\\\\"}, \\\\\"pip\\\\\"/" 2_basic_interactive.ipynb
178
+ # Configure persistent storage for Ray trainer
179
+ sed -i "s/# run_config/run_config/" 2_basic_interactive.ipynb
180
+ sed -i "s|storage path|s3://rhoai-dw-sdk-test/test/|" 2_basic_interactive.ipynb
181
+ # Run notebook
182
+ poetry run papermill 2_basic_interactive.ipynb 2_basic_interactive_out.ipynb --log-output --execution-timeout 1200
126
183
env :
127
184
GRPC_DNS_RESOLVER : " native"
185
+ working-directory : demo-notebooks/guided-demos
186
+
187
+ - name : Delete AWS S3 bucket
188
+ if : always() && steps.s3-bucket.outcome == 'success'
189
+ run : |
190
+ aws s3 rb s3://rhoai-dw-sdk-test --force
128
191
129
192
- name : Switch to kind-cluster context to print logs
130
193
if : always() && steps.deploy.outcome == 'success'
131
194
run : kubectl config use-context kind-cluster
132
195
133
- - name : Print Pytest output log
196
+ - name : Print debug info
134
197
if : always() && steps.deploy.outcome == 'success'
135
198
run : |
136
- echo "Printing Pytest output logs "
137
- cat ${CODEFLARE_TEST_OUTPUT_DIR}/pytest_output.log
199
+ echo "Printing debug info "
200
+ kubectl describe pods -n default
138
201
139
202
- name : Print CodeFlare operator logs
140
203
if : always() && steps.deploy.outcome == 'success'
141
204
run : |
142
205
echo "Printing CodeFlare operator logs"
143
- kubectl logs -n openshift-operators --tail -1 -l app.kubernetes.io/name=codeflare-operator | tee ${CODEFLARE_TEST_OUTPUT_DIR}/codeflare-operator.log
206
+ kubectl logs -n openshift-operators --tail -1 -l app.kubernetes.io/name=codeflare-operator | tee ${TEMP_DIR}/codeflare-operator.log
207
+
208
+ - name : Print Kueue operator logs
209
+ if : always() && steps.deploy.outcome == 'success'
210
+ run : |
211
+ echo "Printing Kueue operator logs"
212
+ KUEUE_CONTROLLER_POD=$(kubectl get pods -n kueue-system | grep kueue-controller | awk '{print $1}')
213
+ kubectl logs -n kueue-system --tail -1 ${KUEUE_CONTROLLER_POD} | tee ${TEMP_DIR}/kueue.log
144
214
145
215
- name : Print KubeRay operator logs
146
216
if : always() && steps.deploy.outcome == 'success'
147
217
run : |
148
218
echo "Printing KubeRay operator logs"
149
- kubectl logs -n ray-system --tail -1 -l app.kubernetes.io/name=kuberay | tee ${CODEFLARE_TEST_OUTPUT_DIR }/kuberay.log
219
+ kubectl logs -n ray-system --tail -1 -l app.kubernetes.io/name=kuberay | tee ${TEMP_DIR }/kuberay.log
150
220
151
221
- name : Export all KinD pod logs
152
222
uses : ./common/github-actions/kind-export-logs
153
223
if : always() && steps.deploy.outcome == 'success'
154
224
with :
155
- output-directory : ${CODEFLARE_TEST_OUTPUT_DIR }
225
+ output-directory : ${TEMP_DIR }
156
226
157
227
- name : Upload logs
158
228
uses : actions/upload-artifact@v4
@@ -161,4 +231,4 @@ jobs:
161
231
name : logs
162
232
retention-days : 10
163
233
path : |
164
- ${{ env.CODEFLARE_TEST_OUTPUT_DIR }}/**/*.log
234
+ ${{ env.TEMP_DIR }}/**/*.log
0 commit comments