Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 3 additions & 1 deletion .github/workflows/manual-compare-versions-benchmark.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -167,6 +167,7 @@ jobs:
res_p99_time=$(compare "p99_time" "${{ needs.runBenchmarkForVersion1.outputs.p99_time }}" "${{ needs.runBenchmarkForVersion2.outputs.p99_time }}")
res_vm_rss_memory_usage=$(compare "vm_rss_memory_usage" "${{ needs.runBenchmarkForVersion1.outputs.vm_rss_memory_usage }}" "${{ needs.runBenchmarkForVersion2.outputs.vm_rss_memory_usage }}")
res_rss_anon_memory_usage=$(compare "rss_anon_memory_usage" "${{ needs.runBenchmarkForVersion1.outputs.rss_anon_memory_usage }}" "${{ needs.runBenchmarkForVersion2.outputs.rss_anon_memory_usage }}")
res_cpu_usage=$(compare "cpu_usage" "${{ needs.runBenchmarkForVersion1.outputs.cpu }}" "${{ needs.runBenchmarkForVersion2.outputs.cpu }}")
res_upload_time=$(compare "upload_time" "${{ needs.runBenchmarkForVersion1.outputs.upload_time }}" "${{ needs.runBenchmarkForVersion2.outputs.upload_time }}")
res_indexing_time=$(compare "indexing_time" "${{ needs.runBenchmarkForVersion1.outputs.indexing_time }}" "${{ needs.runBenchmarkForVersion2.outputs.indexing_time }}")

Expand All @@ -179,6 +180,7 @@ jobs:
echo "| p95_time | ${{ needs.runBenchmarkForVersion1.outputs.p95_time }} | ${{ needs.runBenchmarkForVersion2.outputs.p95_time }} | ${res_p95_time} |" >> $GITHUB_STEP_SUMMARY
echo "| p99_time | ${{ needs.runBenchmarkForVersion1.outputs.p99_time }} | ${{ needs.runBenchmarkForVersion2.outputs.p99_time }} | ${res_p99_time} |" >> $GITHUB_STEP_SUMMARY
echo "| vm_rss_memory_usage | ${{ needs.runBenchmarkForVersion1.outputs.vm_rss_memory_usage }} | ${{ needs.runBenchmarkForVersion2.outputs.vm_rss_memory_usage }} | ${res_vm_rss_memory_usage} |" >> $GITHUB_STEP_SUMMARY
echo "| rss_anon_memory_usage | ${{ needs.runBenchmarkForVersion1.outputs.rss_anon_memory_usage }} | ${{ needs.runBenchmarkForVersion2.outputs.rss_anon_memory_usage }} | ${res_rss_anon_memory_usage} |" >> $GITHUB_STEP_SUMMARY
echo "| rss_anon_memory_usage | ${{ needs.runBenchmarkForVersion1.outputs.rss_anon_memory_usage }} | ${{ needs.runBenchmarkForVersion2.outputs.rss_anon_memory_usage }} | ${res_rss_anon_memory_usage} |" >> $GITHUB_STEP_SUMMARY
echo "| cpu | ${{ needs.runBenchmarkForVersion1.outputs.cpu }} | ${{ needs.runBenchmarkForVersion2.outputs.cpu }} | ${res_cpu_usage} |" >> $GITHUB_STEP_SUMMARY
echo "| upload_time | ${{ needs.runBenchmarkForVersion1.outputs.upload_time }} | ${{ needs.runBenchmarkForVersion2.outputs.upload_time }} | ${res_upload_time} |" >> $GITHUB_STEP_SUMMARY
echo "| indexing_time | ${{ needs.runBenchmarkForVersion1.outputs.indexing_time }} | ${{ needs.runBenchmarkForVersion2.outputs.indexing_time }} | ${res_indexing_time} |" >> $GITHUB_STEP_SUMMARY
40 changes: 40 additions & 0 deletions tools/qdrant_collect_cpu_usage.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
#!/bin/bash

PS4='ts=$(date "+%Y-%m-%dT%H:%M:%SZ") level=DEBUG line=$LINENO file=$BASH_SOURCE '
set -euo pipefail

# Examples: start or end
MODE=$1

CLOUD_NAME=${CLOUD_NAME:-"hetzner"}
SERVER_USERNAME=${SERVER_USERNAME:-"root"}

SCRIPT=$(realpath "$0")
SCRIPT_PATH=$(dirname "$SCRIPT")

BENCH_SERVER_NAME=${SERVER_NAME:-"benchmark-server-1"}

IP_OF_THE_SERVER=$(bash "${SCRIPT_PATH}/${CLOUD_NAME}/get_public_ip.sh" "$BENCH_SERVER_NAME")

UTIME=$(ssh -tt -o ServerAliveInterval=10 -o ServerAliveCountMax=10 "${SERVER_USERNAME}@${IP_OF_THE_SERVER}" "cat /proc/\$(pidof qdrant)/stat | awk '{print \$14}'")
# Clean up any whitespace characters
UTIME=$(echo "$UTIME" | tr -d '[:space:]')

CURRENT_DATE=$(date +%Y-%m-%d-%H-%M-%S)

mkdir -p results/cpu

if [[ "$MODE" == "end" ]]; then
echo "Calculate CPU usage (seconds) over period of time"
UTIME_FILE=$(ls -t results/cpu/utime-*.txt | head -n 1)
UTIME_START=$(cat "$UTIME_FILE" | tr -d '[:space:]')
echo "$UTIME" >> "${UTIME_FILE}"
CPU=$(echo "scale=2; ($UTIME - $UTIME_START) / 100" | bc)
echo "$CPU" > "./results/cpu/cpu-usage-${CURRENT_DATE}.txt"
elif [[ "$MODE" == "start" ]]; then
echo "Store utime start value in ./results/cpu/utime-${CURRENT_DATE}.txt"
echo "$UTIME" > "./results/cpu/utime-${CURRENT_DATE}.txt"
else
echo "Unknown mode: $MODE"
exit 1
fi
8 changes: 7 additions & 1 deletion tools/run_ci.sh
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,8 @@ trap 'handle_term' TERM

# Script, that runs benchmark within the GitHub Actions CI environment

BENCHMARK_STRATEGY=${BENCHMARK_STRATEGY:-"default"}
# Possible values for BENCHMARK_STRATEGY: default, tenants, parallel and collection-reload
export BENCHMARK_STRATEGY=${BENCHMARK_STRATEGY:-"default"}

SCRIPT=$(realpath "$0")
SCRIPT_PATH=$(dirname "$SCRIPT")
Expand All @@ -37,9 +38,14 @@ if [[ "$BENCHMARK_STRATEGY" == "collection-reload" ]]; then
export TELEMETRY_API_RESPONSE_FILE=$(ls -t results/telemetry-api-*.json | head -n 1)
else
# any other strategies are considered to have search & upload results
export TELEMETRY_API_RESPONSE_FILE=$(ls -t results/telemetry-api-*.json | head -n 1)
export SEARCH_RESULTS_FILE=$(find results/ -maxdepth 1 -type f -name '*-search-*.json' -printf '%T@ %p\n' | sort -nr | head -n 1 | cut -d' ' -f2-)
export UPLOAD_RESULTS_FILE=$(find results/ -maxdepth 1 -type f -name '*-upload-*.json' -printf '%T@ %p\n' | sort -nr | head -n 1 | cut -d' ' -f2-)

if [[ "$BENCHMARK_STRATEGY" == "default" ]]; then
export CPU_USAGE_FILE=$(ls -t results/cpu/cpu-usage-*.txt | head -n 1)
fi

if [[ "$BENCHMARK_STRATEGY" == "parallel" ]]; then
export PARALLEL_UPLOAD_RESULTS_FILE=$(ls -t results/parallel/*-upload-*.json | head -n 1)
export PARALLEL_SEARCH_RESULTS_FILE=$(ls -t results/parallel/*-search-*.json | head -n 1)
Expand Down
4 changes: 4 additions & 0 deletions tools/run_remote_benchmark.sh
Original file line number Diff line number Diff line change
Expand Up @@ -44,8 +44,12 @@ case "$BENCHMARK_STRATEGY" in

bash -x "${SCRIPT_PATH}/run_server_container.sh" "$SERVER_CONTAINER_NAME"

bash -x "${SCRIPT_PATH}/qdrant_collect_cpu_usage.sh" "start"

bash -x "${SCRIPT_PATH}/run_client_script.sh"

bash -x "${SCRIPT_PATH}/qdrant_collect_cpu_usage.sh" "end"

bash -x "${SCRIPT_PATH}/qdrant_collect_stats.sh" "$SERVER_CONTAINER_NAME"
;;
"tenants")
Expand Down
24 changes: 22 additions & 2 deletions tools/upload_parallel_results_postgres.sh
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,8 @@
# p99_time real,
# search_time real,
# no_upsert_search_time real,
# cpu real,
# cpu_telemetry real,
# );

PARALLEL_SEARCH_RESULTS_FILE=${PARALLEL_SEARCH_RESULTS_FILE:-""}
Expand Down Expand Up @@ -63,6 +65,13 @@ if [[ -z "$ROOT_API_RESPONSE_FILE" ]]; then
exit 1
fi

if [[ "$BENCHMARK_STRATEGY" == "default" ]]; then
if [[ -z "$CPU_USAGE_FILE" ]]; then
echo "CPU_USAGE_FILE is not set"
exit 1
fi
fi

RPS=NULL
MEAN_PRECISIONS=NULL
P95_TIME=NULL
Expand All @@ -71,6 +80,8 @@ UPLOAD_TIME=NULL
INDEXING_TIME=NULL
SEARCH_TIME=NULL
NO_UPSERT_SEARCH_TIME=NULL
CPU=NULL
CPU_TELEMETRY=NULL

RPS=$(jq -r '.results.rps' "$PARALLEL_SEARCH_RESULTS_FILE")
MEAN_PRECISIONS=$(jq -r '.results.mean_precisions' "$PARALLEL_SEARCH_RESULTS_FILE")
Expand All @@ -82,14 +93,20 @@ NO_UPSERT_SEARCH_TIME=$(jq -r '.results.total_time' "$SEARCH_RESULT_FILE")
UPLOAD_TIME=$(jq -r '.results.upload_time' "$PARALLEL_UPLOAD_RESULTS_FILE")
INDEXING_TIME=$(jq -r '.results.total_time' "$PARALLEL_UPLOAD_RESULTS_FILE")

if [[ "$BENCHMARK_STRATEGY" == "default" ]]; then
# Only this strategy produces cpu usage results files
CPU=$(cat "$CPU_USAGE_FILE" | tr -d '[:space:]')
fi
CPU_TELEMETRY=$(jq -r '.result.hardware.collection_data.benchmark.cpu' "$TELEMETRY_API_RESPONSE_FILE")

QDRANT_COMMIT=$(jq -r '.commit' "$ROOT_API_RESPONSE_FILE")

MEASURE_TIMESTAMP=${MEASURE_TIMESTAMP:-$(date -u +"%Y-%m-%dT%H:%M:%SZ")}


docker run --name "vector-db" --rm jbergknoff/postgresql-client "postgresql://qdrant:${POSTGRES_PASSWORD}@${POSTGRES_HOST}:5432/postgres" -c "
INSERT INTO ${POSTGRES_TABLE} (engine, branch, commit, dataset, measure_timestamp, upload_time, indexing_time, rps, mean_precisions, p95_time, p99_time, search_time, no_upsert_search_time)
VALUES ('qdrant-ci', '${QDRANT_VERSION}', '${QDRANT_COMMIT}', '${DATASETS}', '${MEASURE_TIMESTAMP}', ${UPLOAD_TIME}, ${INDEXING_TIME}, ${RPS}, ${MEAN_PRECISIONS}, ${P95_TIME}, ${P99_TIME}, ${SEARCH_TIME}, ${NO_UPSERT_SEARCH_TIME});
INSERT INTO ${POSTGRES_TABLE} (engine, branch, commit, dataset, measure_timestamp, upload_time, indexing_time, rps, mean_precisions, p95_time, p99_time, search_time, no_upsert_search_time, cpu_telemetry, cpu)
VALUES ('qdrant-ci', '${QDRANT_VERSION}', '${QDRANT_COMMIT}', '${DATASETS}', '${MEASURE_TIMESTAMP}', ${UPLOAD_TIME}, ${INDEXING_TIME}, ${RPS}, ${MEAN_PRECISIONS}, ${P95_TIME}, ${P99_TIME}, ${SEARCH_TIME}, ${NO_UPSERT_SEARCH_TIME}, ${CPU_TELEMETRY}, ${CPU});
"

if [[ "$IS_CI_RUN" == "true" ]]; then
Expand All @@ -103,4 +120,7 @@ if [[ "$IS_CI_RUN" == "true" ]]; then

echo "upload_time=${UPLOAD_TIME}" >> "$GITHUB_OUTPUT"
echo "indexing_time=${INDEXING_TIME}" >> "$GITHUB_OUTPUT"

echo "cpu_telemetry=${CPU_TELEMETRY}" >> "$GITHUB_OUTPUT"
echo "cpu=${CPU}" >> "$GITHUB_OUTPUT"
fi
30 changes: 25 additions & 5 deletions tools/upload_results_postgres.sh
Original file line number Diff line number Diff line change
Expand Up @@ -18,8 +18,10 @@
# p95_time real,
# p99_time real,
# vm_rss_mem real,
# rss_anon_mem real
# collection_load_time_ms real
# rss_anon_mem real,
# collection_load_time_ms real,
# cpu real,
# cpu_telemetry real,
# );

SEARCH_RESULTS_FILE=${SEARCH_RESULTS_FILE:-""}
Expand Down Expand Up @@ -69,15 +71,25 @@ if [[ -z "$ROOT_API_RESPONSE_FILE" ]]; then
exit 1
fi

if [[ "$BENCHMARK_STRATEGY" == "default" ]]; then
if [[ -z "$CPU_USAGE_FILE" ]]; then
echo "CPU_USAGE_FILE is not set"
exit 1
fi
fi

COLLECTION_LOAD_TIME=NULL
RPS=NULL
MEAN_PRECISIONS=NULL
P95_TIME=NULL
P99_TIME=NULL
UPLOAD_TIME=NULL
INDEXING_TIME=NULL
CPU=NULL
CPU_TELEMETRY=NULL

if [[ "$BENCHMARK_STRATEGY" == "collection-reload" ]]; then
# this strategy does not produce search & upload results files
echo "BENCHMARK_STRATEGY is $BENCHMARK_STRATEGY, upload telemetry"
COLLECTION_LOAD_TIME=$(jq -r '.result.collections.collections[] | select(.id == "benchmark") | .init_time_ms' "$TELEMETRY_API_RESPONSE_FILE")
else
Expand All @@ -94,14 +106,19 @@ fi
VM_RSS_MEMORY_USAGE=$(cat "$VM_RSS_MEMORY_USAGE_FILE" | tr -d '[:space:]')
RSS_ANON_MEMORY_USAGE=$(cat "$RSS_ANON_MEMORY_USAGE_FILE" | tr -d '[:space:]')

if [[ "$BENCHMARK_STRATEGY" == "default" ]]; then
# Only this strategy produces cpu usage results files
CPU=$(cat "$CPU_USAGE_FILE" | tr -d '[:space:]')
fi
CPU_TELEMETRY=$(jq -r '.result.hardware.collection_data.benchmark.cpu' "$TELEMETRY_API_RESPONSE_FILE")

QDRANT_COMMIT=$(jq -r '.commit' "$ROOT_API_RESPONSE_FILE")

MEASURE_TIMESTAMP=${MEASURE_TIMESTAMP:-$(date -u +"%Y-%m-%dT%H:%M:%SZ")}


docker run --name "vector-db" --rm jbergknoff/postgresql-client "postgresql://qdrant:${POSTGRES_PASSWORD}@${POSTGRES_HOST}:5432/postgres" -c "
INSERT INTO ${POSTGRES_TABLE} (engine, branch, commit, dataset, measure_timestamp, upload_time, indexing_time, rps, mean_precisions, p95_time, p99_time, vm_rss_mem, rss_anon_mem, collection_load_time_ms)
VALUES ('qdrant-ci', '${QDRANT_VERSION}', '${QDRANT_COMMIT}', '${DATASETS}', '${MEASURE_TIMESTAMP}', ${UPLOAD_TIME}, ${INDEXING_TIME}, ${RPS}, ${MEAN_PRECISIONS}, ${P95_TIME}, ${P99_TIME}, ${VM_RSS_MEMORY_USAGE}, ${RSS_ANON_MEMORY_USAGE}, ${COLLECTION_LOAD_TIME});
INSERT INTO ${POSTGRES_TABLE} (engine, branch, commit, dataset, measure_timestamp, upload_time, indexing_time, rps, mean_precisions, p95_time, p99_time, vm_rss_mem, rss_anon_mem, collection_load_time_ms, cpu_telemetry, cpu)
VALUES ('qdrant-ci', '${QDRANT_VERSION}', '${QDRANT_COMMIT}', '${DATASETS}', '${MEASURE_TIMESTAMP}', ${UPLOAD_TIME}, ${INDEXING_TIME}, ${RPS}, ${MEAN_PRECISIONS}, ${P95_TIME}, ${P99_TIME}, ${VM_RSS_MEMORY_USAGE}, ${RSS_ANON_MEMORY_USAGE}, ${COLLECTION_LOAD_TIME}, ${CPU_TELEMETRY}, ${CPU});
"

if [[ "$IS_CI_RUN" == "true" ]]; then
Expand All @@ -117,4 +134,7 @@ if [[ "$IS_CI_RUN" == "true" ]]; then

echo "upload_time=${UPLOAD_TIME}" >> "$GITHUB_OUTPUT"
echo "indexing_time=${INDEXING_TIME}" >> "$GITHUB_OUTPUT"

echo "cpu_telemetry=${CPU_TELEMETRY}" >> "$GITHUB_OUTPUT"
echo "cpu=${CPU}" >> "$GITHUB_OUTPUT"
fi