diff --git a/.pipelines/cni/cilium/cilium-overlay-load-test-template.yaml b/.pipelines/cni/cilium/cilium-overlay-load-test-template.yaml index 98a2305c46..a58bd03dbf 100644 --- a/.pipelines/cni/cilium/cilium-overlay-load-test-template.yaml +++ b/.pipelines/cni/cilium/cilium-overlay-load-test-template.yaml @@ -266,6 +266,7 @@ stages: fi name: "testAsyncDelete" displayName: "Verify Async Delete when CNS is down" + - template: ../../templates/cilium-mtu-check.yaml - template: ../k8s-e2e/k8s-e2e-job-template.yaml parameters: sub: $(BUILD_VALIDATIONS_SERVICE_CONNECTION) diff --git a/.pipelines/singletenancy/cilium-dualstack-overlay/cilium-dualstackoverlay-e2e-step-template.yaml b/.pipelines/singletenancy/cilium-dualstack-overlay/cilium-dualstackoverlay-e2e-step-template.yaml index 9337ef48d5..7c1ec21f33 100644 --- a/.pipelines/singletenancy/cilium-dualstack-overlay/cilium-dualstackoverlay-e2e-step-template.yaml +++ b/.pipelines/singletenancy/cilium-dualstack-overlay/cilium-dualstackoverlay-e2e-step-template.yaml @@ -161,3 +161,5 @@ steps: fi name: "testAsyncDelete" displayName: "Verify Async Delete when CNS is down" + + - template: ../../templates/cilium-mtu-check.yaml diff --git a/.pipelines/singletenancy/cilium-dualstack-overlay/cilium-dualstackoverlay-e2e.steps.yaml b/.pipelines/singletenancy/cilium-dualstack-overlay/cilium-dualstackoverlay-e2e.steps.yaml index dc49686b13..7f00f3e124 100644 --- a/.pipelines/singletenancy/cilium-dualstack-overlay/cilium-dualstackoverlay-e2e.steps.yaml +++ b/.pipelines/singletenancy/cilium-dualstack-overlay/cilium-dualstackoverlay-e2e.steps.yaml @@ -170,3 +170,5 @@ steps: fi name: "testAsyncDelete" displayName: "Verify Async Delete when CNS is down" + + - template: ../../templates/cilium-mtu-check.yaml diff --git a/.pipelines/singletenancy/cilium-overlay-withhubble/cilium-overlay-e2e-step-template.yaml b/.pipelines/singletenancy/cilium-overlay-withhubble/cilium-overlay-e2e-step-template.yaml index 7a81685b85..a9f024ae15 100644 --- a/.pipelines/singletenancy/cilium-overlay-withhubble/cilium-overlay-e2e-step-template.yaml +++ b/.pipelines/singletenancy/cilium-overlay-withhubble/cilium-overlay-e2e-step-template.yaml @@ -199,6 +199,8 @@ steps: name: "testAsyncDelete" displayName: "Verify Async Delete when CNS is down" + - template: ../../templates/cilium-mtu-check.yaml + - script: | ARTIFACT_DIR=$(Build.ArtifactStagingDirectory)/test-output/ echo $ARTIFACT_DIR diff --git a/.pipelines/singletenancy/cilium-overlay-withhubble/cilium-overlay-e2e.steps.yaml b/.pipelines/singletenancy/cilium-overlay-withhubble/cilium-overlay-e2e.steps.yaml index 6856847c1e..bdedaa7901 100644 --- a/.pipelines/singletenancy/cilium-overlay-withhubble/cilium-overlay-e2e.steps.yaml +++ b/.pipelines/singletenancy/cilium-overlay-withhubble/cilium-overlay-e2e.steps.yaml @@ -196,6 +196,8 @@ steps: fi name: "testAsyncDelete" displayName: "Verify Async Delete when CNS is down" + + - template: ../../templates/cilium-mtu-check.yaml - script: | ARTIFACT_DIR=$(Build.ArtifactStagingDirectory)/test-output/ diff --git a/.pipelines/singletenancy/cilium-overlay/cilium-overlay-e2e-step-template.yaml b/.pipelines/singletenancy/cilium-overlay/cilium-overlay-e2e-step-template.yaml index 7444ef1d98..04730a16f0 100644 --- a/.pipelines/singletenancy/cilium-overlay/cilium-overlay-e2e-step-template.yaml +++ b/.pipelines/singletenancy/cilium-overlay/cilium-overlay-e2e-step-template.yaml @@ -248,3 +248,5 @@ steps: fi name: "testAsyncDelete" displayName: "Verify Async Delete when CNS is down" + + - template: ../../templates/cilium-mtu-check.yaml \ No newline at end of file diff --git a/.pipelines/singletenancy/cilium-overlay/cilium-overlay-e2e.steps.yaml b/.pipelines/singletenancy/cilium-overlay/cilium-overlay-e2e.steps.yaml index c82bafb9a8..39c2cae2a3 100644 --- a/.pipelines/singletenancy/cilium-overlay/cilium-overlay-e2e.steps.yaml +++ b/.pipelines/singletenancy/cilium-overlay/cilium-overlay-e2e.steps.yaml @@ -246,3 +246,7 @@ steps: fi name: "testAsyncDelete" displayName: "Verify Async Delete when CNS is down" + + - template: ../../templates/cilium-mtu-check.yaml + + diff --git a/.pipelines/templates/cilium-mtu-check.yaml b/.pipelines/templates/cilium-mtu-check.yaml new file mode 100644 index 0000000000..6cf1967493 --- /dev/null +++ b/.pipelines/templates/cilium-mtu-check.yaml @@ -0,0 +1,7 @@ +steps: + - script: | + cd hack/scripts + chmod +x cilium-mtu-validation.sh + ./cilium-mtu-validation.sh + name: "CiliumMTUValidation" + displayName: "Run Cilium MTU Validation" \ No newline at end of file diff --git a/.pipelines/templates/cilium-tests.yaml b/.pipelines/templates/cilium-tests.yaml index 6821e4ec56..e3d091bd3f 100644 --- a/.pipelines/templates/cilium-tests.yaml +++ b/.pipelines/templates/cilium-tests.yaml @@ -85,3 +85,5 @@ steps: fi name: "testAsyncDelete" displayName: "Verify Async Delete when CNS is down" + + - template: ./cilium-mtu-check.yaml diff --git a/hack/manifests/nginx.yaml b/hack/manifests/nginx.yaml new file mode 100644 index 0000000000..2ecfc95a72 --- /dev/null +++ b/hack/manifests/nginx.yaml @@ -0,0 +1,29 @@ +apiVersion: apps/v1 +kind: Deployment +metadata: + name: nginx + labels: + app: nginx + namespace: kube-system +spec: + replicas: 4 + selector: + matchLabels: + app: nginx + template: + metadata: + labels: + app: nginx + spec: + containers: + - name: nginx + image: mcr.microsoft.com/azurelinux/base/nginx:1 + ports: + - containerPort: 80 + topologySpreadConstraints: + - maxSkew: 1 + topologyKey: kubernetes.io/hostname # KV: Key is hostname, value is each unique nodename + whenUnsatisfiable: ScheduleAnyway + labelSelector: + matchLabels: + app: nginx \ No newline at end of file diff --git a/hack/scripts/cilium-mtu-validation.sh b/hack/scripts/cilium-mtu-validation.sh new file mode 100755 index 0000000000..9e8386b849 --- /dev/null +++ b/hack/scripts/cilium-mtu-validation.sh @@ -0,0 +1,103 @@ +#!/bin/bash +NAMESPACE="kube-system" + +echo "Deploy nginx pods for MTU testing" +kubectl apply -f ../manifests/nginx.yaml +kubectl wait --for=condition=available --timeout=60s -n $NAMESPACE deployment/nginx + +# Check node count +node_count=$(kubectl get nodes --no-headers | wc -l) + +# in CNI release test scenario scale deployments to 3 * node count to get replicas on each node +if [ "$node_count" -gt 1 ]; then + echo "Scaling nginx deployment to $((3 * node_count)) replicas" + kubectl scale deployment nginx --replicas=$((3 * node_count)) -n $NAMESPACE +fi +# Wait for nginx pods to be ready +kubectl wait --for=condition=available --timeout=60s -n $NAMESPACE deployment/nginx + + + +echo "Checking MTU for pods in namespace: $NAMESPACE using Cilium agent and nginx MTU" + +# Get all nodes +nodes=$(kubectl get nodes -o jsonpath='{.items[*].metadata.name}') + +for node in $nodes; do + echo "Checking node: $node" + + # Get the Cilium agent pod running on this node + cilium_pod=$(kubectl get pods -n $NAMESPACE -o wide --field-selector spec.nodeName=$node -l k8s-app=cilium -o jsonpath='{.items[0].metadata.name}') + + if [ -z "$cilium_pod" ]; then + echo "Failed to find Cilium agent pod on node $node" + echo "##[error]Failed to find Cilium agent pod on node $node" + exit 1 + fi + + # Get the MTU of eth0 in the Cilium agent pod + cilium_mtu=$(kubectl exec -n $NAMESPACE $cilium_pod -- cat /sys/class/net/eth0/mtu 2>/dev/null) + + if [ -z "$cilium_mtu" ]; then + echo "Failed to get MTU from Cilium agent pod on node $node" + echo "##[error]Failed to get MTU from Cilium agent pod on node $node" + exit 1 + fi + + echo "Cilium agent eth0 MTU: $cilium_mtu" + + # Get an nginx pod running on this node + nginx_pod=$(kubectl get pods -n $NAMESPACE -o wide --field-selector spec.nodeName=$node -l app=nginx -o jsonpath='{.items[0].metadata.name}') + if [ -z "$nginx_pod" ]; then + echo "Failed to find nginx pod on node $node" + echo "##[error]Failed to find nginx pod on node $node" + exit 1 + fi + # Get the MTU of eth0 in the nginx pod + nginx_mtu=$(kubectl exec -n $NAMESPACE $nginx_pod -- cat /sys/class/net/eth0/mtu 2>/dev/null) + if [ -z "$nginx_mtu" ]; then + echo "Failed to get MTU from nginx pod on node $node" + echo "##[error]Failed to get MTU from nginx pod on node $node" + exit 1 + fi + echo "Nginx pod eth0 MTU: $nginx_mtu" + + # Get the node's eth0 MTU + node_mtu=$(kubectl debug node/$node -it --image=busybox -- sh -c "cat /sys/class/net/eth0/mtu" 2>/dev/null | tail -n 1) + + if [ -z "$node_mtu" ]; then + echo "Failed to get MTU from node $node" + echo "##[error]Failed to get MTU from node $node" + exit 1 + fi + echo "Node eth0 MTU: $node_mtu" + + # Check if the MTUs match + if [ "$cilium_mtu" -eq "$nginx_mtu" ] && [ "$nginx_mtu" -eq "$node_mtu" ]; then + echo "MTU validation passed for node $node" + else + echo "MTU validation failed for node $node" + echo "Cilium agent MTU: $cilium_mtu, Nginx pod MTU: $nginx_mtu, Node MTU: $node_mtu" + echo "##[error]MTU validation failed. MTUs do not match." + exit 1 + fi + + echo "----------------------------------------" + +done + +# Clean up +kubectl delete deployment nginx -n $NAMESPACE +echo "Cleaned up nginx deployment" + +# Clean up the debug pod +debug_pod=$(kubectl get pods -o name | grep "node-debugger") +if [ -n "$debug_pod" ]; then + kubectl delete $debug_pod + kubectl wait --for=delete $debug_pod --timeout=60s + if [ $? -ne 0 ]; then + echo "Failed to clean up debug pod $debug_pod" + fi +else + echo "No debug pod found" +fi \ No newline at end of file