Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions b
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
inv dev.cmake -b Release

inv dev.cc faabric_dist_tests
inv dev.cc faabric_dist_test_server
inv dev.cc planner_server

76 changes: 76 additions & 0 deletions bb.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,76 @@
#!/bin/sh

bench_base=bb

cli_inv() { docker compose exec -it cli ./bin/inv_wrapper.sh $@; }

bench_named() {
docker compose exec -it cli env EXP_NAME="$1" ./build/static/bin/faabric_dist_tests "$2"
}

bench() {
bench_named "local" "Bench MPI all reduce local" | grep bench_allreduce | tee 1.txt
bench_named "remote" "Bench MPI all reduce remote" | grep bench_allreduce | tee 2.txt
}

setup() {
cli_inv apt update
cli_inv apt update apt install -y linux-tools-common linux-tools-generic linux-tools-5.15.0-91-generic
}

run() {
docker compose down
docker compose up --no-recreate -d cli
FAABRIC_DOCKER="on" ./bin/wait_for_venv.sh

cli_inv dev.cmake -b Release

cli_inv dev.cc faabric_dist_tests
cli_inv dev.cc faabric_dist_test_server
cli_inv dev.cc planner_server
./dist-test/dev_server.sh

bench

# docker compose down
}

bb() {
local commit=$1
echo "commit: $commit"
local d=$HOME/bench/$commit/faabric
if [ ! -d "$d" ]; then
git clone [email protected]:/faasm/faabric.git $d
fi
cd $d
if [ $(git remote | grep lg) ]; then
echo "using exists lg"
else
git remote add lg [email protected]:lgarithm/faabric.git
fi
pwd
git checkout -f $commit
git fetch lg
git merge lg/$bench_base --squash --strategy-option ours
echo "merged"
git status
echo "building ..."
setup
run
echo "done"
}

all() {
local ids=$1
for id in $(cat $ids | awk '{print $1}' | grep -v '#'); do
echo "commit: $id"
bb "$id"
echo "done for $id"
done
}

# inv docker.build -c openmpi-worker
# bb "7483943ede55cb90394eedaa670ec169239eeb0c" # main
all ids.txt

# bb "578c079e9a24d838485544a7039845f18864a08a" # merge failed
3 changes: 3 additions & 0 deletions capture.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
./t > out.log 2> err.log

cat err.log | grep bench_allreduce | tee result.log
2 changes: 2 additions & 0 deletions clean.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
sudo rm -fr venv conan-cache

51 changes: 51 additions & 0 deletions comment.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,51 @@
#!/bin/sh

down() {
local commit=$1
echo "commit: $commit"
local d=bench/$commit/faabric
mkdir -p logs/$commit
scp ko3:$d/1.txt "logs/$commit/1.txt"
scp ko3:$d/2.txt "logs/$commit/2.txt"
}

cmt() {
# gh pr comment 2 --body "$1" --repo https://github.com/lgarithm/faabric
echo "$1" | tee -a comment.txt
}

body() {
local commit=$1
echo "$commit"

echo
echo '```'
cat "logs/$commit/1.txt"
echo '```'
echo

echo '```'
cat "logs/$commit/2.txt"
echo '```'
}

f() {
local commit=$1
echo $commit
down $commit
cmt "$(body $commit)"
}

main() {
local ids=$1
for id in $(cat $ids | awk '{print $1}' | grep -v '#'); do
echo "commit: $id"
f $id
echo "done for $id"
echo
done
}

main ids.txt
# cmt "$(body "7483943ede55cb90394eedaa670ec169239eeb0c.log")"
# cmt "$(body "8d10fa2051ff19cb141515296874d7d2bf2f3235.log")"
1 change: 1 addition & 0 deletions dist-test/dev_server.sh
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ pushd ${PROJ_ROOT} > /dev/null
export OVERRIDE_CPU_COUNT=4

if [[ -z "$1" ]]; then
#docker compose up -d dist-test-server openmpi-worker --scale openmpi-worker=2
docker compose up -d dist-test-server
elif [[ "$1" == "restart" ]]; then
docker compose restart dist-test-server
Expand Down
2 changes: 2 additions & 0 deletions docker-compose.yml
Original file line number Diff line number Diff line change
Expand Up @@ -61,3 +61,5 @@ services:
depends_on:
- planner
- redis
# openmpi-worker:
# image: faasm.azurecr.io/openmpi-worker:${FAABRIC_VERSION}
11 changes: 11 additions & 0 deletions download.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
#!/bin/sh

down() {
local b=$1
echo "commit: $b"
d=bench/$b/faabric
scp ko3:$d/result.log "$b.log"
}

# down "7483943ede55cb90394eedaa670ec169239eeb0c"
down "8d10fa2051ff19cb141515296874d7d2bf2f3235"
15 changes: 15 additions & 0 deletions flame.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
#!/bin/sh

#
# apt update
# apt install -y linux-tools-common linux-tools-generic linux-tools-$(uname -r)

perf record -F 99 -p $(pidof faabric_dist_tests) -g -- sleep 60 # -> perf.data

rm -fr out.perf
rm -fr out.folded
rm -fr kernel.svg

perf script -f >out.perf
./FlameGraph/stackcollapse-perf.pl out.perf >out.folded
./FlameGraph/flamegraph.pl out.folded >kernel.svg
5 changes: 5 additions & 0 deletions ids.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
d9810b0d5dec2ff36ee9fdbda015137bb145364d # main
26568f78c368256eae5aa3cb74c94bc737bbd85a # mpi-struct
8eb27e0985b9d87834d822676a104b74faafb9a1 # ptp-struct
0b0f1b117ea03c2e8e0641ab56a9c8e4e97954a7 # spinlock
7b17b30706166ee777d8c05379ef37ca74029eb3 # ptp-no-order
1 change: 1 addition & 0 deletions logs/.gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
*.txt
22 changes: 22 additions & 0 deletions logs/mpi/1.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
BGN ======================================== bench_allreduce local ========================================
bench_allreduce(np=4) took 0.0013s, total workload: 384000B, rate: 0.270GiB/s
bench_allreduce(np=4) took 0.0012s, total workload: 384000B, rate: 0.296GiB/s
bench_allreduce(np=4) took 0.0012s, total workload: 384000B, rate: 0.295GiB/s
bench_allreduce(np=4) took 0.0012s, total workload: 384000B, rate: 0.293GiB/s
bench_allreduce(np=4) took 0.0012s, total workload: 384000B, rate: 0.292GiB/s
bench_allreduce(np=4) took 0.0012s, total workload: 384000B, rate: 0.295GiB/s
bench_allreduce(np=4) took 0.0012s, total workload: 384000B, rate: 0.299GiB/s
bench_allreduce(np=4) took 0.0012s, total workload: 384000B, rate: 0.298GiB/s
bench_allreduce(np=4) took 0.0012s, total workload: 384000B, rate: 0.299GiB/s
bench_allreduce(np=4) took 0.0012s, total workload: 384000B, rate: 0.299GiB/s
bench_allreduce(np=4) took 0.1540s, total workload: 1.144GiB, rate: 7.425GiB/s
bench_allreduce(np=4) took 0.1205s, total workload: 1.144GiB, rate: 9.493GiB/s
bench_allreduce(np=4) took 0.1179s, total workload: 1.144GiB, rate: 9.700GiB/s
bench_allreduce(np=4) took 0.1204s, total workload: 1.144GiB, rate: 9.502GiB/s
bench_allreduce(np=4) took 0.1236s, total workload: 1.144GiB, rate: 9.254GiB/s
bench_allreduce(np=4) took 0.1316s, total workload: 1.144GiB, rate: 8.689GiB/s
bench_allreduce(np=4) took 0.1335s, total workload: 1.144GiB, rate: 8.566GiB/s
bench_allreduce(np=4) took 0.1324s, total workload: 1.144GiB, rate: 8.641GiB/s
bench_allreduce(np=4) took 0.1327s, total workload: 1.144GiB, rate: 8.620GiB/s
bench_allreduce(np=4) took 0.1321s, total workload: 1.144GiB, rate: 8.659GiB/s
END ======================================== bench_allreduce local ========================================
22 changes: 22 additions & 0 deletions logs/mpi/2.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
BGN ======================================== bench_allreduce remote ========================================
bench_allreduce(np=4) took 0.0304s, total workload: 384000B, rate: 0.012GiB/s
bench_allreduce(np=4) took 0.0187s, total workload: 384000B, rate: 0.019GiB/s
bench_allreduce(np=4) took 0.0160s, total workload: 384000B, rate: 0.022GiB/s
bench_allreduce(np=4) took 0.0160s, total workload: 384000B, rate: 0.022GiB/s
bench_allreduce(np=4) took 0.0162s, total workload: 384000B, rate: 0.022GiB/s
bench_allreduce(np=4) took 0.0161s, total workload: 384000B, rate: 0.022GiB/s
bench_allreduce(np=4) took 0.0164s, total workload: 384000B, rate: 0.022GiB/s
bench_allreduce(np=4) took 0.0185s, total workload: 384000B, rate: 0.019GiB/s
bench_allreduce(np=4) took 0.0167s, total workload: 384000B, rate: 0.021GiB/s
bench_allreduce(np=4) took 0.0165s, total workload: 384000B, rate: 0.022GiB/s
bench_allreduce(np=4) took 0.1663s, total workload: 1.144GiB, rate: 6.877GiB/s
bench_allreduce(np=4) took 0.1554s, total workload: 1.144GiB, rate: 7.362GiB/s
bench_allreduce(np=4) took 0.1777s, total workload: 1.144GiB, rate: 6.436GiB/s
bench_allreduce(np=4) took 0.1231s, total workload: 1.144GiB, rate: 9.289GiB/s
bench_allreduce(np=4) took 0.1252s, total workload: 1.144GiB, rate: 9.136GiB/s
bench_allreduce(np=4) took 0.1198s, total workload: 1.144GiB, rate: 9.543GiB/s
bench_allreduce(np=4) took 0.1198s, total workload: 1.144GiB, rate: 9.549GiB/s
bench_allreduce(np=4) took 0.1191s, total workload: 1.144GiB, rate: 9.603GiB/s
bench_allreduce(np=4) took 0.1217s, total workload: 1.144GiB, rate: 9.400GiB/s
bench_allreduce(np=4) took 0.1209s, total workload: 1.144GiB, rate: 9.459GiB/s
END ======================================== bench_allreduce remote ========================================
142 changes: 142 additions & 0 deletions plot/parse-log.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,142 @@
#!/usr/bin/env python3
import sys
import numpy as np


unit = 'GiB/s'
small = '384000B'
large = '1.144GiB'
keys = [small, large]


def pad(s, n):
return s + ' ' * max(0, n - len(s))


def show_table(rows):
ws = [0] * len(rows[0])
for row in rows:
for i, c in enumerate(row):
if len(c) > ws[i]:
ws[i] = len(c)
hr = '-' * (sum(ws) + len(ws) * 2)
for i, row in enumerate(rows):
if i < 2:
print(hr)
fields = []
for i, c in enumerate(row):
fields.append(pad(c, ws[i]))
print(' '.join(fields))




def show(groups, name=''):
if name:
print(name)
# print(ks)
for k in keys:
xs = np.array(groups[k])
msg = '%10s : %0.3f' %(k, np.mean(xs))
print(msg)
# print('{:10}: {:0.3}'.format(k, np.mean(xs)))
# print()


def to_row(groups):
cols = []
for k in keys:
xs = np.array(groups[k])
cell = '%0.3f' % (np.mean(xs))
cols.append(cell)

return cols


def g(filename):
# print(filename)
groups = dict()
for line in open(filename):
# print(line)
parts = line.strip().split()
# print(parts)
# if len(parts) =
if parts[0].startswith('bench_allreduce') and len(parts) > 3:
# print(parts)
w = parts[5].replace(',', '')
x = float(parts[7].replace(unit, ''))
groups.setdefault(w, []).append(x)
else:
# print(filename, parts)
pass

return groups


th = [
'local/S', 'local/L',
'remote/S', 'remote/L',
'commit',
]


def f(id):
local = g(f'logs/{id}/1.txt')
remote = g(f'logs/{id}/2.txt')

print(id)
cols = to_row(local) + to_row(remote)
# line= ' '.join(cols)
# print(line)
# print(' ',jo)
# show(local, 'local')
# show(remote, 'remote')
return cols + [id]



def parse_ids(filename = 'ids.txt'):
id_names = []
for line in open(filename):
if line.startswith('#'):
continue
parts = line.strip().split()
if len(parts) > 2:
id = parts[0]
name = parts[2]
id_names.append((id, name))
return dict(id_names)



def add_ratio(rows):
names = parse_ids()
row0, tail = rows[0], rows[1:]
new_rows = []
for row in tail:
cols = []
for x, b in zip(row, row0):
if b == 'mpi':
cols.append(x + ' # ' + names[x])
else:
x = float(x)
b = float(b)
cols.append('%.3f (%.2f)' % (x, x / b))
new_rows.append(cols)

return [row0] + new_rows


def main(args):
args = ['mpi'] + args
rows = []
for id in args:
cols = f(id)
rows.append(cols)

# show_table([th] + rows)
show_table([th] + add_ratio(rows))
# print(args)


main(sys.argv[1:])
13 changes: 13 additions & 0 deletions plot/plot.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
#!/bin/sh
set -e

list_ids() {
local ids=$1
cat $ids | awk '{print $1}' | grep -v '#'
}

all() {
./plot/parse-log.py $(list_ids $1)
}

all ids.txt
1 change: 1 addition & 0 deletions re
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
./dist-test/dev_server.sh restart
Loading