-
-
Notifications
You must be signed in to change notification settings - Fork 739
Description
I wrote a quick script to test network performance between workers.
We try 3 approaches:
-
Make the scheduler transfer a DataFrame through task dependencies.
This includes client < -- > scheduler < -- > workers communication in transfer time, so isn't actually a good measure of bandwidth, but is maybe still an interesting number, since it's closer to (but still an over-estimation of) a measure of the real-life bandwidth a data transfer would experience.
-
Add simple comms handlers and send the DataFrame between workers directly.
-
Use
iperf3
to test raw TCP network (and disk) performance, to get an upper bound on what we should expect from the network
This script was a quick hack (DataFrame size doesn't match iperf3 transfer size, for instance) but others might still find it interesting.
The script
import time
from dask.utils import format_bytes
import distributed
from distributed.comm.addressing import parse_address, parse_host_port
from distributed.protocol import to_serialize
import numpy as np
import pandas as pd
import coiled
def test_tasks(client: distributed.Client):
"Test network performance using tasks (scheduler forces a transfer)"
client.wait_for_workers(2)
client.restart()
a, b, *workers = client.scheduler_info()["workers"]
print(f"send: {a} recv: {b} - performance over task `get_data`")
# Store data on a global variable so we don't have to recompute
distributed.wait(
client.submit(
lambda: setattr(
distributed, "GLOBAL_DF", pd.DataFrame(np.random.random((30_000, 1000)))
),
workers=[a],
pure=False,
)
)
size = client.submit(
lambda: distributed.GLOBAL_DF.memory_usage().sum(), workers=[a]
).result()
for i in range(15):
dff = client.submit(
lambda: distributed.GLOBAL_DF,
workers=[a],
pure=False,
)
start = time.perf_counter()
distributed.wait(client.submit(lambda df: None, dff, workers=[b]))
elapsed = time.perf_counter() - start
print(
f"{format_bytes(size)}: {elapsed:.2f}sec, {format_bytes(size / elapsed)}/sec"
)
# Clean up the global variable
distributed.wait(
client.submit(
lambda: delattr(distributed, "GLOBAL_DF"), workers=[a], pure=False
)
)
def test_handlers(client: distributed.Client):
"Test network performance using pure comms handlers"
client.wait_for_workers(2)
client.restart()
a, b = client.scheduler_info()["workers"]
print(f"send: {a} recv: {b} - performance over comms handler")
async def send(dask_worker: distributed.Worker):
df = pd.DataFrame(np.random.random((30_000, 1000)))
dask_worker._send_size = df.memory_usage().sum()
s = to_serialize(df)
dask_worker._send_times = []
while True:
start = time.perf_counter()
await dask_worker.rpc(b).stuff_receive(data=s)
elapsed = time.perf_counter() - start
dask_worker._send_times.append(elapsed)
def add_receiver(dask_worker: distributed.Worker):
def receive(comm, data=None):
pass
dask_worker.handlers["stuff_receive"] = receive
client.run(add_receiver, workers=[b])
client.run(send, workers=[a], wait=False)
def get_times(dask_worker: distributed.Worker):
times = dask_worker._send_times
dask_worker._send_times = []
return dask_worker._send_size, times
for i in range(8):
time.sleep(2)
size, times = client.run(get_times, workers=[a])[a]
for t in times:
print(f"{format_bytes(size)}: {t:.2f}sec, {format_bytes(size / t)}/sec")
# TODO stop send coroutine and clean up handlers
def test_iperf(client: distributed.Client):
"Install iperf on workers and test network and disk performance with it"
import subprocess
client.wait_for_workers(2)
client.restart()
try:
client.run(
subprocess.run,
"iperf3 -v",
shell=True,
check=True,
stdout=subprocess.DEVNULL,
stderr=subprocess.DEVNULL,
)
except subprocess.CalledProcessError:
print("Installing iperf3 on workers from conda...")
client.run(
subprocess.run, "conda install -c conda-forge iperf", shell=True, check=True
)
a, b = client.scheduler_info()["workers"]
a_ip = parse_host_port(parse_address(a)[1])[0]
b_ip = parse_host_port(parse_address(b)[1])[0]
print(f"A: {a} B: {b} - performance from iperf3")
print("B memory -> A memory")
# start iperf server (daemon & oneshot mode so `client.run` doesn't block)
client.run(
subprocess.run, "iperf3 -s -D -1 -p 5001", shell=True, check=True, workers=[a]
)
result = client.run(
subprocess.run,
f"iperf3 -c {a_ip} -p 5001 -f M -t 30",
shell=True,
capture_output=True,
text=True,
workers=[b],
)
proc = result[b]
print(proc.stdout)
if proc.returncode != 0:
print(proc.stderr)
return
print("B memory -> A disk")
# reference: https://fasterdata.es.net/performance-testing/network-troubleshooting-tools/iperf/disk-testing-using-iperf/
client.run(
subprocess.run,
"iperf3 -s -D -1 -p 5001 -F iperf_out",
shell=True,
workers=[a],
)
time.sleep(0.5)
result = client.run(
subprocess.run,
f"iperf3 -c {a_ip} -p 5001 -f M -t 15",
shell=True,
capture_output=True,
text=True,
workers=[b],
)
proc = result[b]
print(proc.stdout)
if proc.returncode != 0:
print(proc.stderr)
return
print("A disk -> B disk")
client.run(
subprocess.run,
"iperf3 -s -D -1 -p 5001 -F iperf_out",
shell=True,
workers=[b],
)
result = client.run(
subprocess.run,
f"iperf3 -c {b_ip} -p 5001 -f M -F iperf_out -t 30",
shell=True,
capture_output=True,
text=True,
workers=[a],
)
proc = result[a]
print(proc.stdout)
if proc.returncode != 0:
print(proc.stderr)
return
client.run(
subprocess.run,
"rm iperf_out",
shell=True,
check=True,
)
if __name__ == "__main__":
with distributed.Client(
memory_limit=None,
n_workers=2,
processes=True,
worker_class=distributed.Worker,
threads_per_worker=1,
scheduler_port=8786,
) as client:
# cluster = coiled.Cluster(
# name="perf",
# software="gjoseph92/shuffleservice",
# n_workers=2,
# worker_cpu=2,
# worker_memory="2GiB",
# scheduler_cpu=1,
# scheduler_memory="2GiB",
# )
# with distributed.Client(cluster) as client:
test_tasks(client)
test_handlers(client)
test_iperf(client)
Initial results:
On a Coiled cluster (docker on AWS EC2 VMs; don't know the exact instance type, but I requested 2CPU and 2GiB memory, so something low-end):
- task dependencies: 180-280MiB/sec
- comms handler: 290-330MiB/sec
- iperf3 raw TCP: 590 MBytes/sec
- iperf3 raw TCP -> disk: 44.2 MBytes/sec
- iperf3 disk -> TCP -> disk: 5.46 MBytes/sec
So dask's networking is only half as fast as raw TCP here. That's better than I expected actually.
Using comms handlers directly is faster, though not hugely. Also not surprising.
On these low-end EC2 nodes, networking is slow. And disk is very slow.
Full results
(env) gabe dask-playground/shuffle-service » python network.py
Using existing cluster: 'perf'
send: tls://10.6.20.175:33093 recv: tls://10.6.31.53:42065 - performance over task `get_data`
228.88 MiB: 1.12sec, 204.94 MiB/sec
228.88 MiB: 1.06sec, 216.87 MiB/sec
228.88 MiB: 1.14sec, 200.35 MiB/sec
228.88 MiB: 1.20sec, 190.22 MiB/sec
228.88 MiB: 1.23sec, 186.78 MiB/sec
228.88 MiB: 0.84sec, 272.11 MiB/sec
228.88 MiB: 0.87sec, 261.72 MiB/sec
228.88 MiB: 0.87sec, 264.40 MiB/sec
228.88 MiB: 0.82sec, 278.06 MiB/sec
228.88 MiB: 0.89sec, 256.02 MiB/sec
send: tls://10.6.20.175:43659 recv: tls://10.6.31.53:46709 - performance over comms handler
228.88 MiB: 0.79sec, 290.32 MiB/sec
228.88 MiB: 0.68sec, 336.06 MiB/sec
228.88 MiB: 0.85sec, 268.17 MiB/sec
228.88 MiB: 0.93sec, 245.97 MiB/sec
228.88 MiB: 0.68sec, 334.61 MiB/sec
228.88 MiB: 0.74sec, 308.71 MiB/sec
228.88 MiB: 0.70sec, 328.58 MiB/sec
228.88 MiB: 0.75sec, 303.90 MiB/sec
228.88 MiB: 0.79sec, 288.88 MiB/sec
228.88 MiB: 0.75sec, 304.38 MiB/sec
228.88 MiB: 0.73sec, 315.54 MiB/sec
228.88 MiB: 0.75sec, 303.72 MiB/sec
228.88 MiB: 0.72sec, 319.23 MiB/sec
228.88 MiB: 1.12sec, 204.12 MiB/sec
228.88 MiB: 0.77sec, 298.89 MiB/sec
228.88 MiB: 0.74sec, 307.82 MiB/sec
228.88 MiB: 0.78sec, 292.28 MiB/sec
228.88 MiB: 0.72sec, 318.10 MiB/sec
228.88 MiB: 0.85sec, 268.82 MiB/sec
228.88 MiB: 0.82sec, 279.50 MiB/sec
228.88 MiB: 0.74sec, 310.35 MiB/sec
228.88 MiB: 0.78sec, 294.62 MiB/sec
228.88 MiB: 0.77sec, 295.47 MiB/sec
228.88 MiB: 0.70sec, 327.29 MiB/sec
228.88 MiB: 0.78sec, 294.05 MiB/sec
228.88 MiB: 0.68sec, 335.36 MiB/sec
A: tls://10.6.20.175:45103 B: tls://10.6.31.53:39869 - performance from iperf3
B memory -> A memory
Connecting to host 10.6.20.175, port 5001
[ 5] local 10.6.31.53 port 58252 connected to 10.6.20.175 port 5001
[ ID] Interval Transfer Bitrate Retr Cwnd
[ 5] 0.00-1.00 sec 597 MBytes 597 MBytes/sec 0 1.89 MBytes
[ 5] 1.00-2.00 sec 592 MBytes 592 MBytes/sec 0 2.44 MBytes
[ 5] 2.00-3.00 sec 589 MBytes 589 MBytes/sec 0 2.85 MBytes
[ 5] 3.00-4.00 sec 588 MBytes 587 MBytes/sec 0 3.00 MBytes
[ 5] 4.00-5.00 sec 590 MBytes 590 MBytes/sec 0 3.00 MBytes
[ 5] 5.00-6.00 sec 580 MBytes 580 MBytes/sec 46 2.26 MBytes
[ 5] 6.00-7.00 sec 594 MBytes 594 MBytes/sec 0 2.45 MBytes
[ 5] 7.00-8.00 sec 590 MBytes 590 MBytes/sec 2 1.97 MBytes
[ 5] 8.00-9.00 sec 591 MBytes 591 MBytes/sec 0 2.13 MBytes
[ 5] 9.00-10.00 sec 591 MBytes 591 MBytes/sec 0 2.24 MBytes
[ 5] 10.00-11.00 sec 588 MBytes 588 MBytes/sec 0 2.29 MBytes
[ 5] 11.00-12.00 sec 585 MBytes 585 MBytes/sec 15 1.66 MBytes
[ 5] 12.00-13.00 sec 588 MBytes 587 MBytes/sec 0 1.94 MBytes
[ 5] 13.00-14.00 sec 590 MBytes 590 MBytes/sec 0 2.13 MBytes
[ 5] 14.00-15.00 sec 592 MBytes 592 MBytes/sec 0 2.23 MBytes
[ 5] 15.00-16.00 sec 592 MBytes 593 MBytes/sec 0 2.27 MBytes
[ 5] 16.00-17.00 sec 592 MBytes 593 MBytes/sec 0 2.30 MBytes
[ 5] 17.00-18.00 sec 592 MBytes 592 MBytes/sec 8 1.78 MBytes
[ 5] 18.00-19.00 sec 592 MBytes 592 MBytes/sec 0 2.07 MBytes
[ 5] 19.00-20.00 sec 592 MBytes 593 MBytes/sec 0 2.17 MBytes
[ 5] 20.00-21.00 sec 590 MBytes 590 MBytes/sec 0 2.24 MBytes
[ 5] 21.00-22.00 sec 594 MBytes 594 MBytes/sec 0 2.30 MBytes
[ 5] 22.00-23.00 sec 586 MBytes 586 MBytes/sec 0 2.30 MBytes
[ 5] 23.00-24.00 sec 592 MBytes 593 MBytes/sec 0 2.30 MBytes
[ 5] 24.00-25.00 sec 594 MBytes 594 MBytes/sec 0 2.31 MBytes
[ 5] 25.00-26.00 sec 581 MBytes 581 MBytes/sec 0 2.36 MBytes
[ 5] 26.00-27.00 sec 592 MBytes 592 MBytes/sec 0 2.39 MBytes
[ 5] 27.00-28.00 sec 592 MBytes 593 MBytes/sec 0 2.62 MBytes
[ 5] 28.00-29.00 sec 592 MBytes 592 MBytes/sec 0 2.73 MBytes
[ 5] 29.00-30.00 sec 594 MBytes 594 MBytes/sec 0 2.73 MBytes
- - - - - - - - - - - - - - - - - - - - - - - - -
[ ID] Interval Transfer Bitrate Retr
[ 5] 0.00-30.00 sec 17.3 GBytes 591 MBytes/sec 71 sender
[ 5] 0.00-30.00 sec 17.3 GBytes 590 MBytes/sec receiver
iperf Done.
B memory -> A disk
Connecting to host 10.6.20.175, port 5001
[ 5] local 10.6.31.53 port 58258 connected to 10.6.20.175 port 5001
[ ID] Interval Transfer Bitrate Retr Cwnd
[ 5] 0.00-1.00 sec 48.8 MBytes 48.8 MBytes/sec 0 288 KBytes
[ 5] 1.00-2.00 sec 45.3 MBytes 45.3 MBytes/sec 0 297 KBytes
[ 5] 2.00-3.00 sec 46.0 MBytes 46.0 MBytes/sec 0 297 KBytes
[ 5] 3.00-4.00 sec 46.7 MBytes 46.7 MBytes/sec 0 315 KBytes
[ 5] 4.00-5.00 sec 46.6 MBytes 46.6 MBytes/sec 0 350 KBytes
[ 5] 5.00-6.00 sec 45.0 MBytes 45.0 MBytes/sec 0 350 KBytes
[ 5] 6.00-7.00 sec 44.5 MBytes 44.5 MBytes/sec 0 350 KBytes
[ 5] 7.00-8.00 sec 45.5 MBytes 45.5 MBytes/sec 0 350 KBytes
[ 5] 8.00-9.00 sec 45.2 MBytes 45.2 MBytes/sec 0 350 KBytes
[ 5] 9.00-10.00 sec 45.5 MBytes 45.5 MBytes/sec 0 350 KBytes
[ 5] 10.00-11.00 sec 45.5 MBytes 45.5 MBytes/sec 0 350 KBytes
[ 5] 11.00-12.00 sec 42.7 MBytes 42.7 MBytes/sec 0 350 KBytes
[ 5] 12.00-13.00 sec 45.0 MBytes 45.0 MBytes/sec 0 350 KBytes
[ 5] 13.00-14.00 sec 43.0 MBytes 43.0 MBytes/sec 0 350 KBytes
[ 5] 14.00-15.00 sec 41.5 MBytes 41.5 MBytes/sec 0 350 KBytes
[ 5] 15.00-16.00 sec 41.0 MBytes 41.0 MBytes/sec 0 350 KBytes
[ 5] 16.00-17.00 sec 43.6 MBytes 43.5 MBytes/sec 0 350 KBytes
[ 5] 17.00-18.00 sec 43.6 MBytes 43.6 MBytes/sec 0 350 KBytes
[ 5] 18.00-19.00 sec 45.6 MBytes 45.6 MBytes/sec 0 350 KBytes
[ 5] 19.00-20.00 sec 46.1 MBytes 46.1 MBytes/sec 0 350 KBytes
[ 5] 20.00-21.00 sec 45.8 MBytes 45.8 MBytes/sec 0 350 KBytes
[ 5] 21.00-22.00 sec 42.5 MBytes 42.5 MBytes/sec 0 350 KBytes
[ 5] 22.00-23.00 sec 43.3 MBytes 43.3 MBytes/sec 0 367 KBytes
[ 5] 23.00-24.00 sec 42.1 MBytes 42.1 MBytes/sec 0 367 KBytes
[ 5] 24.00-25.00 sec 43.1 MBytes 43.1 MBytes/sec 0 367 KBytes
[ 5] 25.00-26.00 sec 43.9 MBytes 43.9 MBytes/sec 0 385 KBytes
[ 5] 26.00-27.00 sec 43.1 MBytes 43.1 MBytes/sec 0 385 KBytes
[ 5] 27.00-28.00 sec 42.1 MBytes 42.1 MBytes/sec 0 385 KBytes
[ 5] 28.00-29.00 sec 42.1 MBytes 42.1 MBytes/sec 0 385 KBytes
[ 5] 29.00-30.00 sec 43.6 MBytes 43.6 MBytes/sec 0 385 KBytes
- - - - - - - - - - - - - - - - - - - - - - - - -
[ ID] Interval Transfer Bitrate Retr
[ 5] 0.00-30.00 sec 1.30 GBytes 44.3 MBytes/sec 0 sender
[ 5] 0.00-30.00 sec 1.29 GBytes 44.2 MBytes/sec receiver
iperf Done.
A disk -> B disk
Connecting to host 10.6.31.53, port 5001
[ 5] local 10.6.20.175 port 59674 connected to 10.6.31.53 port 5001
[ ID] Interval Transfer Bitrate Retr Cwnd
[ 5] 0.00-1.00 sec 9.75 MBytes 9.75 MBytes/sec 11 271 KBytes
[ 5] 1.00-2.00 sec 4.84 MBytes 4.84 MBytes/sec 11 271 KBytes
[ 5] 2.00-3.00 sec 5.79 MBytes 5.79 MBytes/sec 12 271 KBytes
[ 5] 3.00-4.00 sec 5.20 MBytes 5.20 MBytes/sec 11 271 KBytes
[ 5] 4.00-5.00 sec 5.02 MBytes 5.02 MBytes/sec 9 271 KBytes
[ 5] 5.00-6.00 sec 5.14 MBytes 5.14 MBytes/sec 11 271 KBytes
[ 5] 6.00-7.00 sec 4.48 MBytes 4.48 MBytes/sec 9 271 KBytes
[ 5] 7.00-8.00 sec 6.27 MBytes 6.27 MBytes/sec 13 271 KBytes
[ 5] 8.00-9.00 sec 7.29 MBytes 7.29 MBytes/sec 12 271 KBytes
[ 5] 9.00-10.00 sec 5.44 MBytes 5.44 MBytes/sec 12 271 KBytes
[ 5] 10.00-11.00 sec 5.91 MBytes 5.91 MBytes/sec 11 271 KBytes
[ 5] 11.00-12.00 sec 5.32 MBytes 5.32 MBytes/sec 11 271 KBytes
[ 5] 12.00-13.00 sec 5.14 MBytes 5.14 MBytes/sec 10 271 KBytes
[ 5] 13.00-14.00 sec 5.79 MBytes 5.79 MBytes/sec 12 271 KBytes
[ 5] 14.00-15.00 sec 5.26 MBytes 5.25 MBytes/sec 10 271 KBytes
[ 5] 15.00-16.00 sec 5.62 MBytes 5.62 MBytes/sec 13 271 KBytes
[ 5] 16.00-17.00 sec 5.50 MBytes 5.50 MBytes/sec 11 271 KBytes
[ 5] 17.00-18.00 sec 4.84 MBytes 4.84 MBytes/sec 10 271 KBytes
[ 5] 18.00-19.00 sec 5.14 MBytes 5.14 MBytes/sec 10 271 KBytes
[ 5] 19.00-20.00 sec 5.50 MBytes 5.50 MBytes/sec 10 271 KBytes
[ 5] 20.00-21.00 sec 4.66 MBytes 4.66 MBytes/sec 10 271 KBytes
[ 5] 21.00-22.00 sec 5.38 MBytes 5.37 MBytes/sec 10 271 KBytes
[ 5] 22.00-23.00 sec 5.38 MBytes 5.38 MBytes/sec 12 271 KBytes
[ 5] 23.00-24.00 sec 5.74 MBytes 5.74 MBytes/sec 11 271 KBytes
[ 5] 24.00-25.00 sec 4.00 MBytes 4.00 MBytes/sec 8 271 KBytes
[ 5] 25.00-26.00 sec 4.84 MBytes 4.84 MBytes/sec 10 271 KBytes
[ 5] 26.00-27.00 sec 5.02 MBytes 5.02 MBytes/sec 10 271 KBytes
[ 5] 27.00-28.00 sec 4.90 MBytes 4.90 MBytes/sec 9 271 KBytes
[ 5] 28.00-29.00 sec 5.74 MBytes 5.73 MBytes/sec 11 271 KBytes
[ 5] 29.00-30.00 sec 4.96 MBytes 4.96 MBytes/sec 11 271 KBytes
- - - - - - - - - - - - - - - - - - - - - - - - -
[ ID] Interval Transfer Bitrate Retr
[ 5] 0.00-30.00 sec 164 MBytes 5.46 MBytes/sec 321 sender
Sent 164 MByte / 1.29 GByte (12%) of iperf_out
[ 5] 0.00-30.03 sec 162 MBytes 5.38 MBytes/sec receiver
iperf Done.
Locally on my mac:
- task depenencies: ~1.7 GiB/sec
- comms hander: ~2.4 GiB/sec
- raw TCP: 7.5 GiB/sec
Full results
(env) gabe dask-playground/shuffle-service » python network.py
/Users/gabe/dev/dask-playground/env/lib/python3.9/site-packages/pandas/compat/__init__.py:124: UserWarning: Could not import the lzma module. Your installed Python is incomplete. Attempting to use lzma compression will result in a RuntimeError.
warnings.warn(msg)
/Users/gabe/dev/dask-playground/env/lib/python3.9/site-packages/setuptools/distutils_patch.py:25: UserWarning: Distutils was imported before Setuptools. This usage is discouraged and may exhibit undesirable behaviors or errors. Please use Setuptools' objects directly or at least import Setuptools first.
warnings.warn(
send: tcp://127.0.0.1:57650 recv: tcp://127.0.0.1:57652 - performance over task `get_data`
228.88 MiB: 0.22sec, 1.02 GiB/sec
228.88 MiB: 0.13sec, 1.74 GiB/sec
228.88 MiB: 0.15sec, 1.53 GiB/sec
228.88 MiB: 0.12sec, 1.89 GiB/sec
228.88 MiB: 0.12sec, 1.86 GiB/sec
228.88 MiB: 0.12sec, 1.81 GiB/sec
228.88 MiB: 0.12sec, 1.82 GiB/sec
228.88 MiB: 0.12sec, 1.83 GiB/sec
228.88 MiB: 0.15sec, 1.51 GiB/sec
228.88 MiB: 0.13sec, 1.76 GiB/sec
228.88 MiB: 0.12sec, 1.91 GiB/sec
228.88 MiB: 0.11sec, 1.98 GiB/sec
228.88 MiB: 0.12sec, 1.83 GiB/sec
228.88 MiB: 0.12sec, 1.87 GiB/sec
228.88 MiB: 0.14sec, 1.62 GiB/sec
send: tcp://127.0.0.1:57650 recv: tcp://127.0.0.1:57652 - performance over comms handler
228.88 MiB: 0.17sec, 1.32 GiB/sec
228.88 MiB: 0.10sec, 2.33 GiB/sec
228.88 MiB: 0.10sec, 2.35 GiB/sec
228.88 MiB: 0.10sec, 2.35 GiB/sec
228.88 MiB: 0.09sec, 2.36 GiB/sec
228.88 MiB: 0.09sec, 2.39 GiB/sec
228.88 MiB: 0.10sec, 2.30 GiB/sec
228.88 MiB: 0.10sec, 2.28 GiB/sec
228.88 MiB: 0.09sec, 2.37 GiB/sec
228.88 MiB: 0.09sec, 2.37 GiB/sec
228.88 MiB: 0.09sec, 2.35 GiB/sec
228.88 MiB: 0.10sec, 2.35 GiB/sec
228.88 MiB: 0.10sec, 2.31 GiB/sec
228.88 MiB: 0.09sec, 2.37 GiB/sec
228.88 MiB: 0.10sec, 2.28 GiB/sec
228.88 MiB: 0.09sec, 2.38 GiB/sec
228.88 MiB: 0.10sec, 2.35 GiB/sec
228.88 MiB: 0.10sec, 2.29 GiB/sec
228.88 MiB: 0.09sec, 2.37 GiB/sec
228.88 MiB: 0.10sec, 2.32 GiB/sec
228.88 MiB: 0.10sec, 2.13 GiB/sec
228.88 MiB: 0.10sec, 2.18 GiB/sec
228.88 MiB: 0.10sec, 2.32 GiB/sec
228.88 MiB: 0.09sec, 2.38 GiB/sec
228.88 MiB: 0.10sec, 2.25 GiB/sec
228.88 MiB: 0.09sec, 2.38 GiB/sec
228.88 MiB: 0.09sec, 2.37 GiB/sec
228.88 MiB: 0.10sec, 2.28 GiB/sec
228.88 MiB: 0.10sec, 2.34 GiB/sec
228.88 MiB: 0.09sec, 2.41 GiB/sec
228.88 MiB: 0.09sec, 2.36 GiB/sec
228.88 MiB: 0.09sec, 2.36 GiB/sec
228.88 MiB: 0.10sec, 2.31 GiB/sec
228.88 MiB: 0.10sec, 2.33 GiB/sec
228.88 MiB: 0.09sec, 2.39 GiB/sec
228.88 MiB: 0.09sec, 2.40 GiB/sec
228.88 MiB: 0.10sec, 2.35 GiB/sec
228.88 MiB: 0.10sec, 2.27 GiB/sec
228.88 MiB: 0.11sec, 2.11 GiB/sec
228.88 MiB: 0.11sec, 2.01 GiB/sec
228.88 MiB: 0.11sec, 2.09 GiB/sec
228.88 MiB: 0.11sec, 2.04 GiB/sec
228.88 MiB: 0.10sec, 2.24 GiB/sec
228.88 MiB: 0.10sec, 2.28 GiB/sec
228.88 MiB: 0.10sec, 2.34 GiB/sec
228.88 MiB: 0.10sec, 2.34 GiB/sec
228.88 MiB: 0.10sec, 2.27 GiB/sec
228.88 MiB: 0.10sec, 2.16 GiB/sec
228.88 MiB: 0.10sec, 2.19 GiB/sec
228.88 MiB: 0.10sec, 2.24 GiB/sec
228.88 MiB: 0.10sec, 2.22 GiB/sec
228.88 MiB: 0.10sec, 2.22 GiB/sec
228.88 MiB: 0.10sec, 2.19 GiB/sec
228.88 MiB: 0.10sec, 2.24 GiB/sec
228.88 MiB: 0.09sec, 2.36 GiB/sec
228.88 MiB: 0.09sec, 2.35 GiB/sec
228.88 MiB: 0.10sec, 2.25 GiB/sec
228.88 MiB: 0.10sec, 2.16 GiB/sec
228.88 MiB: 0.12sec, 1.91 GiB/sec
228.88 MiB: 0.10sec, 2.26 GiB/sec
228.88 MiB: 0.10sec, 2.27 GiB/sec
228.88 MiB: 0.11sec, 2.02 GiB/sec
228.88 MiB: 0.15sec, 1.48 GiB/sec
228.88 MiB: 0.10sec, 2.25 GiB/sec
228.88 MiB: 0.10sec, 2.16 GiB/sec
228.88 MiB: 0.10sec, 2.23 GiB/sec
228.88 MiB: 0.10sec, 2.15 GiB/sec
228.88 MiB: 0.11sec, 1.98 GiB/sec
228.88 MiB: 0.10sec, 2.20 GiB/sec
228.88 MiB: 0.10sec, 2.15 GiB/sec
228.88 MiB: 0.10sec, 2.25 GiB/sec
228.88 MiB: 0.10sec, 2.26 GiB/sec
228.88 MiB: 0.10sec, 2.28 GiB/sec
228.88 MiB: 0.09sec, 2.37 GiB/sec
228.88 MiB: 0.09sec, 2.37 GiB/sec
228.88 MiB: 0.10sec, 2.29 GiB/sec
228.88 MiB: 0.11sec, 2.00 GiB/sec
228.88 MiB: 0.10sec, 2.29 GiB/sec
228.88 MiB: 0.10sec, 2.19 GiB/sec
228.88 MiB: 0.11sec, 2.09 GiB/sec
228.88 MiB: 0.10sec, 2.23 GiB/sec
228.88 MiB: 0.13sec, 1.72 GiB/sec
228.88 MiB: 0.11sec, 2.10 GiB/sec
228.88 MiB: 0.09sec, 2.37 GiB/sec
228.88 MiB: 0.09sec, 2.36 GiB/sec
228.88 MiB: 0.09sec, 2.36 GiB/sec
228.88 MiB: 0.10sec, 2.30 GiB/sec
228.88 MiB: 0.10sec, 2.13 GiB/sec
228.88 MiB: 0.10sec, 2.35 GiB/sec
228.88 MiB: 0.09sec, 2.36 GiB/sec
228.88 MiB: 0.09sec, 2.36 GiB/sec
228.88 MiB: 0.10sec, 2.16 GiB/sec
228.88 MiB: 0.12sec, 1.81 GiB/sec
228.88 MiB: 0.10sec, 2.32 GiB/sec
228.88 MiB: 0.09sec, 2.38 GiB/sec
228.88 MiB: 0.09sec, 2.38 GiB/sec
228.88 MiB: 0.10sec, 2.14 GiB/sec
228.88 MiB: 0.10sec, 2.21 GiB/sec
228.88 MiB: 0.10sec, 2.35 GiB/sec
228.88 MiB: 0.10sec, 2.32 GiB/sec
228.88 MiB: 0.10sec, 2.31 GiB/sec
228.88 MiB: 0.10sec, 2.21 GiB/sec
228.88 MiB: 0.12sec, 1.81 GiB/sec
228.88 MiB: 0.10sec, 2.32 GiB/sec
228.88 MiB: 0.09sec, 2.37 GiB/sec
228.88 MiB: 0.10sec, 2.32 GiB/sec
228.88 MiB: 0.10sec, 2.32 GiB/sec
228.88 MiB: 0.10sec, 2.23 GiB/sec
228.88 MiB: 0.09sec, 2.36 GiB/sec
228.88 MiB: 0.10sec, 2.34 GiB/sec
228.88 MiB: 0.10sec, 2.32 GiB/sec
228.88 MiB: 0.10sec, 2.32 GiB/sec
228.88 MiB: 0.10sec, 2.29 GiB/sec
228.88 MiB: 0.12sec, 1.92 GiB/sec
228.88 MiB: 0.10sec, 2.30 GiB/sec
228.88 MiB: 0.10sec, 2.35 GiB/sec
228.88 MiB: 0.10sec, 2.31 GiB/sec
228.88 MiB: 0.10sec, 2.30 GiB/sec
228.88 MiB: 0.10sec, 2.34 GiB/sec
228.88 MiB: 0.10sec, 2.35 GiB/sec
228.88 MiB: 0.09sec, 2.39 GiB/sec
228.88 MiB: 0.09sec, 2.36 GiB/sec
228.88 MiB: 0.10sec, 2.22 GiB/sec
228.88 MiB: 0.13sec, 1.77 GiB/sec
228.88 MiB: 0.10sec, 2.26 GiB/sec
228.88 MiB: 0.10sec, 2.34 GiB/sec
228.88 MiB: 0.10sec, 2.30 GiB/sec
228.88 MiB: 0.10sec, 2.23 GiB/sec
228.88 MiB: 0.10sec, 2.35 GiB/sec
228.88 MiB: 0.10sec, 2.34 GiB/sec
228.88 MiB: 0.10sec, 2.35 GiB/sec
228.88 MiB: 0.10sec, 2.35 GiB/sec
228.88 MiB: 0.10sec, 2.31 GiB/sec
228.88 MiB: 0.09sec, 2.40 GiB/sec
228.88 MiB: 0.12sec, 1.92 GiB/sec
228.88 MiB: 0.09sec, 2.36 GiB/sec
228.88 MiB: 0.09sec, 2.37 GiB/sec
228.88 MiB: 0.10sec, 2.31 GiB/sec
228.88 MiB: 0.09sec, 2.36 GiB/sec
228.88 MiB: 0.09sec, 2.39 GiB/sec
228.88 MiB: 0.09sec, 2.38 GiB/sec
228.88 MiB: 0.10sec, 2.35 GiB/sec
228.88 MiB: 0.10sec, 2.28 GiB/sec
228.88 MiB: 0.10sec, 2.30 GiB/sec
228.88 MiB: 0.12sec, 1.87 GiB/sec
228.88 MiB: 0.10sec, 2.32 GiB/sec
228.88 MiB: 0.10sec, 2.35 GiB/sec
228.88 MiB: 0.10sec, 2.26 GiB/sec
228.88 MiB: 0.09sec, 2.36 GiB/sec
228.88 MiB: 0.09sec, 2.40 GiB/sec
228.88 MiB: 0.09sec, 2.37 GiB/sec
228.88 MiB: 0.09sec, 2.36 GiB/sec
228.88 MiB: 0.10sec, 2.32 GiB/sec
228.88 MiB: 0.10sec, 2.34 GiB/sec
228.88 MiB: 0.09sec, 2.36 GiB/sec
228.88 MiB: 0.09sec, 2.37 GiB/sec
228.88 MiB: 0.09sec, 2.36 GiB/sec
228.88 MiB: 0.10sec, 2.30 GiB/sec
228.88 MiB: 0.10sec, 2.29 GiB/sec
228.88 MiB: 0.10sec, 2.34 GiB/sec
228.88 MiB: 0.09sec, 2.39 GiB/sec
228.88 MiB: 0.09sec, 2.38 GiB/sec
228.88 MiB: 0.10sec, 2.35 GiB/sec
228.88 MiB: 0.10sec, 2.29 GiB/sec
228.88 MiB: 0.09sec, 2.36 GiB/sec
A: tcp://127.0.0.1:57650 B: tcp://127.0.0.1:57652 - performance from iperf3
B memory -> A memory
Connecting to host 127.0.0.1, port 5001
[ 5] local 127.0.0.1 port 57689 connected to 127.0.0.1 port 5001
[ ID] Interval Transfer Bitrate
[ 5] 0.00-1.00 sec 7.39 GBytes 7566 MBytes/sec
[ 5] 1.00-2.00 sec 6.35 GBytes 6507 MBytes/sec
[ 5] 2.00-3.00 sec 7.18 GBytes 7355 MBytes/sec
[ 5] 3.00-4.00 sec 7.30 GBytes 7480 MBytes/sec
[ 5] 4.00-5.00 sec 6.87 GBytes 7039 MBytes/sec
[ 5] 5.00-6.00 sec 7.92 GBytes 8112 MBytes/sec
[ 5] 6.00-7.00 sec 7.86 GBytes 8054 MBytes/sec
[ 5] 7.00-8.00 sec 7.88 GBytes 8065 MBytes/sec
[ 5] 8.00-9.00 sec 7.61 GBytes 7795 MBytes/sec
[ 5] 9.00-10.00 sec 6.83 GBytes 6996 MBytes/sec
[ 5] 10.00-11.00 sec 7.15 GBytes 7324 MBytes/sec
[ 5] 11.00-12.00 sec 7.79 GBytes 7974 MBytes/sec
[ 5] 12.00-13.00 sec 7.80 GBytes 7989 MBytes/sec
[ 5] 13.00-14.00 sec 7.84 GBytes 8026 MBytes/sec
[ 5] 14.00-15.00 sec 7.86 GBytes 8044 MBytes/sec
[ 5] 15.00-16.00 sec 7.79 GBytes 7979 MBytes/sec
[ 5] 16.00-17.00 sec 7.92 GBytes 8110 MBytes/sec
[ 5] 17.00-18.00 sec 6.76 GBytes 6921 MBytes/sec
[ 5] 18.00-19.00 sec 7.45 GBytes 7627 MBytes/sec
[ 5] 19.00-20.00 sec 7.38 GBytes 7558 MBytes/sec
[ 5] 20.00-21.00 sec 7.52 GBytes 7699 MBytes/sec
[ 5] 21.00-22.00 sec 7.22 GBytes 7397 MBytes/sec
[ 5] 22.00-23.00 sec 7.26 GBytes 7432 MBytes/sec
[ 5] 23.00-24.00 sec 7.00 GBytes 7170 MBytes/sec
[ 5] 24.00-25.00 sec 7.19 GBytes 7362 MBytes/sec
[ 5] 25.00-26.00 sec 6.80 GBytes 6961 MBytes/sec
[ 5] 26.00-27.00 sec 6.91 GBytes 7072 MBytes/sec
[ 5] 27.00-28.00 sec 7.22 GBytes 7390 MBytes/sec
[ 5] 28.00-29.00 sec 7.41 GBytes 7591 MBytes/sec
[ 5] 29.00-30.00 sec 7.31 GBytes 7484 MBytes/sec
- - - - - - - - - - - - - - - - - - - - - - - - -
[ ID] Interval Transfer Bitrate
[ 5] 0.00-30.00 sec 221 GBytes 7536 MBytes/sec sender
[ 5] 0.00-30.00 sec 221 GBytes 7536 MBytes/sec receiver
iperf Done.
Dask lags further behind a very fast network, but it's still much faster than a slow network! Does it need to be faster? How often are we actually bandwidth-constrained (versus being constrained by event loop/GIL/worker threads/things that can do something useful with that bandwidth)?