Skip to content

Commit 4ba8490

Browse files
authored
Merge pull request #1855 from atlassian/issue/dev/ECO-963-run-bzt-in-session
Updated bzt_on_pod.sh to run inside tmux sessions to recover from net…
2 parents 0021883 + 76a2958 commit 4ba8490

File tree

1 file changed

+144
-19
lines changed

1 file changed

+144
-19
lines changed

app/util/k8s/bzt_on_pod.sh

Lines changed: 144 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,14 @@ else
2828
exit 1
2929
fi
3030

31+
# Parse arguments for --ci flag (starting from 2nd argument)
32+
ci=false
33+
for ((i=2; i<=$#; i++)); do
34+
if [[ "${!i}" == "--ci" ]]; then
35+
ci=true
36+
break
37+
fi
38+
done
3139

3240
echo "INFO: Update kubeconfig"
3341
aws eks update-kubeconfig --name atlas-"$ENVIRONMENT_NAME"-cluster --region "$REGION"
@@ -43,27 +51,144 @@ fi
4351

4452
echo "INFO: Execution environment pod name: $exec_pod_name"
4553

46-
echo "INFO: Cleanup dc-app-performance-toolkit folder on the exec env pod"
47-
kubectl exec -it "$exec_pod_name" -n atlassian -- rm -rf /dc-app-performance-toolkit
48-
49-
echo "INFO: Copy latest dc-app-performance-toolkit folder to the exec env pod"
50-
start=$(date +%s)
51-
# tar only app folder, exclude results and util/k8s folder
52-
tar -czf dcapt.tar.gz -C dc-app-performance-toolkit --exclude results --exclude util/k8s app Dockerfile requirements.txt
53-
kubectl exec -it "$exec_pod_name" -n atlassian -- mkdir /dc-app-performance-toolkit
54-
cat dcapt.tar.gz | kubectl exec -i -n atlassian "$exec_pod_name" -- tar xzf - -C /dc-app-performance-toolkit
55-
rm -rf dcapt.tar.gz
56-
end=$(date +%s)
57-
runtime=$((end-start))
58-
echo "INFO: Copy finished in $runtime seconds"
59-
60-
if [[ $2 == "--docker_image_rebuild" ]]; then
61-
echo "INFO: Rebuild docker image"
62-
kubectl exec -it "$exec_pod_name" -n atlassian -- docker build -t $DCAPT_DOCKER_IMAGE dc-app-performance-toolkit
54+
# Ensure tmux is installed in the pod (using apk for Alpine)
55+
echo "INFO: Ensuring tmux is available in the pod"
56+
kubectl exec -i "$exec_pod_name" -n atlassian -- sh -c "command -v tmux || apk add --no-cache tmux"
57+
58+
# Check if tmux session already exists and contains our setup
59+
session_exists=$(kubectl exec "$exec_pod_name" -n atlassian -- sh -c "tmux has-session -t bzt_session 2>/dev/null; echo \$?")
60+
61+
if [[ "$session_exists" == "0" ]]; then
62+
echo "INFO: Found existing tmux session 'bzt_session', attaching to it..."
63+
else
64+
echo "INFO: Creating new tmux session and running setup inside it"
65+
66+
# Prepare the dcapt archive locally first
67+
echo "INFO: Preparing dc-app-performance-toolkit archive"
68+
start=$(date +%s)
69+
tar -czf dcapt.tar.gz -C dc-app-performance-toolkit --exclude results --exclude util/k8s app Dockerfile requirements.txt
70+
71+
# Copy the archive to the pod
72+
echo "INFO: Copying archive to pod"
73+
kubectl cp dcapt.tar.gz atlassian/"$exec_pod_name":/tmp/dcapt.tar.gz
74+
rm -rf dcapt.tar.gz
75+
end=$(date +%s)
76+
runtime=$((end-start))
77+
echo "INFO: Archive preparation and copy finished in $runtime seconds"
78+
79+
# Create the setup script that will run inside tmux
80+
setup_script="
81+
echo 'INFO: Starting setup inside tmux session'
82+
83+
# Cleanup and recreate directory
84+
echo 'INFO: Cleanup dc-app-performance-toolkit folder'
85+
rm -rf /dc-app-performance-toolkit
86+
mkdir -p /dc-app-performance-toolkit
87+
88+
# Extract the archive
89+
echo 'INFO: Extracting dc-app-performance-toolkit'
90+
tar xzf /tmp/dcapt.tar.gz -C /dc-app-performance-toolkit
91+
rm -f /tmp/dcapt.tar.gz
92+
93+
# Docker image rebuild if requested
94+
if [ '$2' = '--docker_image_rebuild' ]; then
95+
echo 'INFO: Rebuilding docker image'
96+
docker build -t $DCAPT_DOCKER_IMAGE dc-app-performance-toolkit
97+
fi
98+
99+
# Create a marker file to indicate setup is complete
100+
touch /tmp/dcapt_setup_complete
101+
102+
echo 'INFO: Setup complete, starting bzt execution'
103+
# Run bzt
104+
docker run --shm-size=4g -v /dc-app-performance-toolkit:/dc-app-performance-toolkit $DCAPT_DOCKER_IMAGE '$1'
105+
"
106+
107+
# Start tmux session with the complete setup and execution
108+
kubectl exec -i "$exec_pod_name" -n atlassian -- sh -c "
109+
rm -f /tmp/bzt_session.log
110+
tmux new-session -d -s bzt_session \"$setup_script\"
111+
tmux pipe-pane -t bzt_session \"cat > /tmp/bzt_session.log\"
112+
"
113+
114+
echo "INFO: Tmux session 'bzt_session' created with setup and execution"
63115
fi
64116

65-
echo "INFO: Run bzt on the exec env pod"
66-
kubectl exec -it "$exec_pod_name" -n atlassian -- docker run --shm-size=4g -v "/dc-app-performance-toolkit:/dc-app-performance-toolkit" $DCAPT_DOCKER_IMAGE "$1"
117+
attempt=1
118+
max_attempts=5
119+
echo "INFO: Attaching to tmux session 'bzt_session'..."
120+
121+
while [ $attempt -le $max_attempts ]; do
122+
echo "INFO: Attempt $attempt to attach to tmux session 'bzt_session'..."
123+
124+
# Check if session still exists before attempting to attach
125+
session_check=$(kubectl exec "$exec_pod_name" -n atlassian -- sh -c "tmux has-session -t bzt_session 2>/dev/null; echo \$?" 2>/dev/null)
126+
127+
if [[ "$session_check" != "0" ]]; then
128+
echo "INFO: bzt session has finished or does not exist."
129+
break
130+
fi
131+
132+
# Different behavior based on ci flag
133+
if [[ "$ci" == "true" ]]; then
134+
# CI mode: stream logs from /tmp/bzt_session.log (non-interactive)
135+
kubectl exec "$exec_pod_name" -n atlassian -- sh -c "
136+
tail -f /tmp/bzt_session.log &
137+
tail_pid=\$!
138+
139+
while true; do
140+
sleep 5
141+
if ! tmux has-session -t bzt_session 2>/dev/null; then
142+
break
143+
fi
144+
done
145+
146+
kill \$tail_pid 2>/dev/null || true
147+
" 2>/dev/null
148+
exit_code=$?
149+
else
150+
# Interactive mode: attach to tmux session
151+
kubectl exec -it "$exec_pod_name" -n atlassian -- tmux attach-session -t bzt_session 2>/dev/null
152+
exit_code=$?
153+
fi
154+
155+
# Handle different exit scenarios
156+
if [[ $exit_code -eq 0 ]]; then
157+
echo "INFO: Successfully detached from tmux session."
158+
# Double-check if session still exists after clean detachment
159+
session_exists=$(kubectl exec "$exec_pod_name" -n atlassian -- sh -c "tmux has-session -t bzt_session 2>/dev/null; echo \$?" 2>/dev/null)
160+
if [[ "$session_exists" != "0" ]]; then
161+
echo "INFO: bzt session has completed."
162+
break
163+
fi
164+
# Clean detachment but session still exists, continue monitoring
165+
echo "INFO: Session still active, continuing to monitor..."
166+
elif [[ $exit_code -ne 0 ]]; then
167+
# Handle network errors or other failures
168+
echo "WARNING: Connection lost (exit code: $exit_code)"
169+
170+
# Verify session still exists before retrying
171+
session_exists=$(kubectl exec "$exec_pod_name" -n atlassian -- sh -c "tmux has-session -t bzt_session 2>/dev/null; echo \$?" 2>/dev/null)
172+
173+
if [[ "$session_exists" != "0" ]]; then
174+
echo "INFO: bzt session has finished during disconnection."
175+
break
176+
fi
177+
178+
if [ $attempt -eq $max_attempts ]; then
179+
echo "ERROR: Reached maximum number of attempts ($max_attempts). Session may still be running."
180+
echo "ERROR: You can manually reconnect using: kubectl exec -it $exec_pod_name -n atlassian -- tmux attach-session -t bzt_session"
181+
break
182+
fi
183+
184+
# Exponential backoff for retries
185+
sleep_time=$((2 + attempt))
186+
echo "INFO: Network error or disconnect detected, reconnecting to tmux session in $sleep_time seconds (attempt $((attempt+1)))..."
187+
sleep $sleep_time
188+
attempt=$((attempt+1))
189+
fi
190+
done
191+
67192
sleep 10
68193

69194
echo "INFO: Copy results folder from the exec env pod to local"

0 commit comments

Comments
 (0)