1
- name : e2e
1
+ name : Guided notebooks tests
2
2
3
3
on :
4
4
pull_request :
76
76
77
77
- name : Install NVidia GPU operator for KinD
78
78
uses : ./common/github-actions/nvidia-gpu-operator
79
+ with :
80
+ enable-time-slicing : ' true'
79
81
80
82
- name : Deploy CodeFlare stack
81
83
id : deploy
@@ -113,46 +115,90 @@ jobs:
113
115
kubectl create clusterrolebinding sdk-user-list-secrets --clusterrole=list-secrets --user=sdk-user
114
116
kubectl config use-context sdk-user
115
117
116
- - name : Run e2e tests
118
+ - name : Setup Guided notebooks execution
117
119
run : |
118
- export CODEFLARE_TEST_OUTPUT_DIR=${{ env.TEMP_DIR }}
119
- echo "CODEFLARE_TEST_OUTPUT_DIR=${CODEFLARE_TEST_OUTPUT_DIR}" >> $GITHUB_ENV
120
+ echo "Installing papermill and dependencies..."
121
+ pip install poetry papermill ipython ipykernel
122
+ # Disable virtualenv due to problems using packaged in virtualenv in papermill
123
+ poetry config virtualenvs.create false
120
124
121
- set -euo pipefail
122
- pip install poetry
125
+ echo "Installing SDK..."
123
126
poetry install --with test,docs
124
- echo "Running e2e tests..."
125
- poetry run pytest -v -s ./tests/e2e -m 'kind and nvidia_gpu' > ${CODEFLARE_TEST_OUTPUT_DIR}/pytest_output.log 2>&1
127
+
128
+ - name : Run 0_basic_ray.ipynb
129
+ run : |
130
+ set -euo pipefail
131
+
132
+ # Remove login/logout cells, as KinD doesn't support authentication using token
133
+ jq -r 'del(.cells[] | select(.source[] | contains("Create authentication object for user permissions")))' 0_basic_ray.ipynb > 0_basic_ray.ipynb.tmp && mv 0_basic_ray.ipynb.tmp 0_basic_ray.ipynb
134
+ jq -r 'del(.cells[] | select(.source[] | contains("auth.logout()")))' 0_basic_ray.ipynb > 0_basic_ray.ipynb.tmp && mv 0_basic_ray.ipynb.tmp 0_basic_ray.ipynb
135
+ # Run notebook
136
+ # poetry run papermill 0_basic_ray.ipynb 0_basic_ray_out.ipynb --log-output --execution-timeout 600
137
+ working-directory : demo-notebooks/guided-demos
138
+
139
+ - name : Run 1_cluster_job_client.ipynb
140
+ run : |
141
+ set -euo pipefail
142
+
143
+ # Remove login/logout cells, as KinD doesn't support authentication using token
144
+ jq -r 'del(.cells[] | select(.source[] | contains("Create authentication object for user permissions")))' 1_cluster_job_client.ipynb > 1_cluster_job_client.ipynb.tmp && mv 1_cluster_job_client.ipynb.tmp 1_cluster_job_client.ipynb
145
+ jq -r 'del(.cells[] | select(.source[] | contains("auth.logout()")))' 1_cluster_job_client.ipynb > 1_cluster_job_client.ipynb.tmp && mv 1_cluster_job_client.ipynb.tmp 1_cluster_job_client.ipynb
146
+ # Replace async logs with waiting for job to finish, async logs don't work properly in papermill
147
+ JOB_WAIT=$(jq -r '.' ${GITHUB_WORKSPACE}/.github/resources/wait_for_job_cell.json)
148
+ jq --argjson job_wait "$JOB_WAIT" -r '(.cells[] | select(.source[] | contains("async for lines in client.tail_job_logs"))) |= $job_wait' 1_cluster_job_client.ipynb > 1_cluster_job_client.ipynb.tmp && mv 1_cluster_job_client.ipynb.tmp 1_cluster_job_client.ipynb
149
+ # Run notebook
150
+ # poetry run papermill 1_cluster_job_client.ipynb 1_cluster_job_client_out.ipynb --log-output --execution-timeout 1200
151
+ working-directory : demo-notebooks/guided-demos
152
+
153
+ - name : Run 2_basic_interactive.ipynb
154
+ run : |
155
+ set -euo pipefail
156
+
157
+ # Remove login/logout cells, as KinD doesn't support authentication using token
158
+ jq -r 'del(.cells[] | select(.source[] | contains("Create authentication object for user permissions")))' 2_basic_interactive.ipynb > 2_basic_interactive.ipynb.tmp && mv 2_basic_interactive.ipynb.tmp 2_basic_interactive.ipynb
159
+ jq -r 'del(.cells[] | select(.source[] | contains("auth.logout()")))' 2_basic_interactive.ipynb > 2_basic_interactive.ipynb.tmp && mv 2_basic_interactive.ipynb.tmp 2_basic_interactive.ipynb
160
+ # Rewrite cluster_uri() to local_client_url() to retrieve client URL available out of cluster, as the test is executed outside of cluster
161
+ sed -i "s/cluster_uri()/local_client_url()/" 2_basic_interactive.ipynb
162
+ # Run notebook
163
+ poetry run papermill 2_basic_interactive.ipynb 2_basic_interactive_out.ipynb --log-output --execution-timeout 1200
126
164
env :
127
165
GRPC_DNS_RESOLVER : " native"
166
+ working-directory : demo-notebooks/guided-demos
128
167
129
168
- name : Switch to kind-cluster context to print logs
130
169
if : always() && steps.deploy.outcome == 'success'
131
170
run : kubectl config use-context kind-cluster
132
171
133
- - name : Print Pytest output log
172
+ - name : Print debug info
134
173
if : always() && steps.deploy.outcome == 'success'
135
174
run : |
136
- echo "Printing Pytest output logs "
137
- cat ${CODEFLARE_TEST_OUTPUT_DIR}/pytest_output.log
175
+ echo "Printing debug info "
176
+ kubectl describe pods -n default
138
177
139
178
- name : Print CodeFlare operator logs
140
179
if : always() && steps.deploy.outcome == 'success'
141
180
run : |
142
181
echo "Printing CodeFlare operator logs"
143
- kubectl logs -n openshift-operators --tail -1 -l app.kubernetes.io/name=codeflare-operator | tee ${CODEFLARE_TEST_OUTPUT_DIR}/codeflare-operator.log
182
+ kubectl logs -n openshift-operators --tail -1 -l app.kubernetes.io/name=codeflare-operator | tee ${TEMP_DIR}/codeflare-operator.log
183
+
184
+ - name : Print Kueue operator logs
185
+ if : always() && steps.deploy.outcome == 'success'
186
+ run : |
187
+ echo "Printing Kueue operator logs"
188
+ KUEUE_CONTROLLER_POD=$(kubectl get pods -n kueue-system | grep kueue-controller | awk '{print $1}')
189
+ kubectl logs -n kueue-system --tail -1 ${KUEUE_CONTROLLER_POD} | tee ${TEMP_DIR}/kueue.log
144
190
145
191
- name : Print KubeRay operator logs
146
192
if : always() && steps.deploy.outcome == 'success'
147
193
run : |
148
194
echo "Printing KubeRay operator logs"
149
- kubectl logs -n ray-system --tail -1 -l app.kubernetes.io/name=kuberay | tee ${CODEFLARE_TEST_OUTPUT_DIR }/kuberay.log
195
+ kubectl logs -n ray-system --tail -1 -l app.kubernetes.io/name=kuberay | tee ${TEMP_DIR }/kuberay.log
150
196
151
197
- name : Export all KinD pod logs
152
198
uses : ./common/github-actions/kind-export-logs
153
199
if : always() && steps.deploy.outcome == 'success'
154
200
with :
155
- output-directory : ${CODEFLARE_TEST_OUTPUT_DIR }
201
+ output-directory : ${TEMP_DIR }
156
202
157
203
- name : Upload logs
158
204
uses : actions/upload-artifact@v4
@@ -161,4 +207,4 @@ jobs:
161
207
name : logs
162
208
retention-days : 10
163
209
path : |
164
- ${{ env.CODEFLARE_TEST_OUTPUT_DIR }}/**/*.log
210
+ ${{ env.TEMP_DIR }}/**/*.log
0 commit comments