Skip to content

Testing network performance #5258

@gjoseph92

Description

@gjoseph92

I wrote a quick script to test network performance between workers.

We try 3 approaches:

  • Make the scheduler transfer a DataFrame through task dependencies.

    This includes client < -- > scheduler < -- > workers communication in transfer time, so isn't actually a good measure of bandwidth, but is maybe still an interesting number, since it's closer to (but still an over-estimation of) a measure of the real-life bandwidth a data transfer would experience.

  • Add simple comms handlers and send the DataFrame between workers directly.

  • Use iperf3 to test raw TCP network (and disk) performance, to get an upper bound on what we should expect from the network

This script was a quick hack (DataFrame size doesn't match iperf3 transfer size, for instance) but others might still find it interesting.

The script
import time

from dask.utils import format_bytes
import distributed
from distributed.comm.addressing import parse_address, parse_host_port
from distributed.protocol import to_serialize
import numpy as np
import pandas as pd
import coiled


def test_tasks(client: distributed.Client):
    "Test network performance using tasks (scheduler forces a transfer)"
    client.wait_for_workers(2)
    client.restart()
    a, b, *workers = client.scheduler_info()["workers"]

    print(f"send: {a} recv: {b} - performance over task `get_data`")

    # Store data on a global variable so we don't have to recompute
    distributed.wait(
        client.submit(
            lambda: setattr(
                distributed, "GLOBAL_DF", pd.DataFrame(np.random.random((30_000, 1000)))
            ),
            workers=[a],
            pure=False,
        )
    )

    size = client.submit(
        lambda: distributed.GLOBAL_DF.memory_usage().sum(), workers=[a]
    ).result()

    for i in range(15):
        dff = client.submit(
            lambda: distributed.GLOBAL_DF,
            workers=[a],
            pure=False,
        )

        start = time.perf_counter()
        distributed.wait(client.submit(lambda df: None, dff, workers=[b]))
        elapsed = time.perf_counter() - start

        print(
            f"{format_bytes(size)}: {elapsed:.2f}sec, {format_bytes(size / elapsed)}/sec"
        )

    # Clean up the global variable
    distributed.wait(
        client.submit(
            lambda: delattr(distributed, "GLOBAL_DF"), workers=[a], pure=False
        )
    )


def test_handlers(client: distributed.Client):
    "Test network performance using pure comms handlers"
    client.wait_for_workers(2)
    client.restart()
    a, b = client.scheduler_info()["workers"]

    print(f"send: {a} recv: {b} - performance over comms handler")

    async def send(dask_worker: distributed.Worker):
        df = pd.DataFrame(np.random.random((30_000, 1000)))
        dask_worker._send_size = df.memory_usage().sum()
        s = to_serialize(df)
        dask_worker._send_times = []
        while True:
            start = time.perf_counter()
            await dask_worker.rpc(b).stuff_receive(data=s)
            elapsed = time.perf_counter() - start
            dask_worker._send_times.append(elapsed)

    def add_receiver(dask_worker: distributed.Worker):
        def receive(comm, data=None):
            pass

        dask_worker.handlers["stuff_receive"] = receive

    client.run(add_receiver, workers=[b])
    client.run(send, workers=[a], wait=False)

    def get_times(dask_worker: distributed.Worker):
        times = dask_worker._send_times
        dask_worker._send_times = []

        return dask_worker._send_size, times

    for i in range(8):
        time.sleep(2)
        size, times = client.run(get_times, workers=[a])[a]
        for t in times:
            print(f"{format_bytes(size)}: {t:.2f}sec, {format_bytes(size / t)}/sec")

    # TODO stop send coroutine and clean up handlers


def test_iperf(client: distributed.Client):
    "Install iperf on workers and test network and disk performance with it"
    import subprocess

    client.wait_for_workers(2)
    client.restart()

    try:
        client.run(
            subprocess.run,
            "iperf3 -v",
            shell=True,
            check=True,
            stdout=subprocess.DEVNULL,
            stderr=subprocess.DEVNULL,
        )
    except subprocess.CalledProcessError:
        print("Installing iperf3 on workers from conda...")
        client.run(
            subprocess.run, "conda install -c conda-forge iperf", shell=True, check=True
        )

    a, b = client.scheduler_info()["workers"]
    a_ip = parse_host_port(parse_address(a)[1])[0]
    b_ip = parse_host_port(parse_address(b)[1])[0]

    print(f"A: {a} B: {b} - performance from iperf3")

    print("B memory -> A memory")
    # start iperf server (daemon & oneshot mode so `client.run` doesn't block)
    client.run(
        subprocess.run, "iperf3 -s -D -1 -p 5001", shell=True, check=True, workers=[a]
    )
    result = client.run(
        subprocess.run,
        f"iperf3 -c {a_ip} -p 5001 -f M -t 30",
        shell=True,
        capture_output=True,
        text=True,
        workers=[b],
    )
    proc = result[b]
    print(proc.stdout)
    if proc.returncode != 0:
        print(proc.stderr)
        return

    print("B memory -> A disk")
    # reference: https://fasterdata.es.net/performance-testing/network-troubleshooting-tools/iperf/disk-testing-using-iperf/
    client.run(
        subprocess.run,
        "iperf3 -s -D -1 -p 5001 -F iperf_out",
        shell=True,
        workers=[a],
    )
    time.sleep(0.5)
    result = client.run(
        subprocess.run,
        f"iperf3 -c {a_ip} -p 5001 -f M -t 15",
        shell=True,
        capture_output=True,
        text=True,
        workers=[b],
    )
    proc = result[b]
    print(proc.stdout)
    if proc.returncode != 0:
        print(proc.stderr)
        return

    print("A disk -> B disk")
    client.run(
        subprocess.run,
        "iperf3 -s -D -1 -p 5001 -F iperf_out",
        shell=True,
        workers=[b],
    )
    result = client.run(
        subprocess.run,
        f"iperf3 -c {b_ip} -p 5001 -f M -F iperf_out -t 30",
        shell=True,
        capture_output=True,
        text=True,
        workers=[a],
    )
    proc = result[a]
    print(proc.stdout)
    if proc.returncode != 0:
        print(proc.stderr)
        return

    client.run(
        subprocess.run,
        "rm iperf_out",
        shell=True,
        check=True,
    )


if __name__ == "__main__":
    with distributed.Client(
        memory_limit=None,
        n_workers=2,
        processes=True,
        worker_class=distributed.Worker,
        threads_per_worker=1,
        scheduler_port=8786,
    ) as client:

        # cluster = coiled.Cluster(
        #     name="perf",
        #     software="gjoseph92/shuffleservice",
        #     n_workers=2,
        #     worker_cpu=2,
        #     worker_memory="2GiB",
        #     scheduler_cpu=1,
        #     scheduler_memory="2GiB",
        # )
        # with distributed.Client(cluster) as client:
        test_tasks(client)
        test_handlers(client)
        test_iperf(client)

Initial results:

On a Coiled cluster (docker on AWS EC2 VMs; don't know the exact instance type, but I requested 2CPU and 2GiB memory, so something low-end):

  • task dependencies: 180-280MiB/sec
  • comms handler: 290-330MiB/sec
  • iperf3 raw TCP: 590 MBytes/sec
  • iperf3 raw TCP -> disk: 44.2 MBytes/sec
  • iperf3 disk -> TCP -> disk: 5.46 MBytes/sec

So dask's networking is only half as fast as raw TCP here. That's better than I expected actually.
Using comms handlers directly is faster, though not hugely. Also not surprising.

On these low-end EC2 nodes, networking is slow. And disk is very slow.

Full results
(env) gabe dask-playground/shuffle-service » python network.py
Using existing cluster: 'perf'
send: tls://10.6.20.175:33093 recv: tls://10.6.31.53:42065 - performance over task `get_data`
228.88 MiB: 1.12sec, 204.94 MiB/sec
228.88 MiB: 1.06sec, 216.87 MiB/sec
228.88 MiB: 1.14sec, 200.35 MiB/sec
228.88 MiB: 1.20sec, 190.22 MiB/sec
228.88 MiB: 1.23sec, 186.78 MiB/sec
228.88 MiB: 0.84sec, 272.11 MiB/sec
228.88 MiB: 0.87sec, 261.72 MiB/sec
228.88 MiB: 0.87sec, 264.40 MiB/sec
228.88 MiB: 0.82sec, 278.06 MiB/sec
228.88 MiB: 0.89sec, 256.02 MiB/sec
send: tls://10.6.20.175:43659 recv: tls://10.6.31.53:46709 - performance over comms handler
228.88 MiB: 0.79sec, 290.32 MiB/sec
228.88 MiB: 0.68sec, 336.06 MiB/sec
228.88 MiB: 0.85sec, 268.17 MiB/sec
228.88 MiB: 0.93sec, 245.97 MiB/sec
228.88 MiB: 0.68sec, 334.61 MiB/sec
228.88 MiB: 0.74sec, 308.71 MiB/sec
228.88 MiB: 0.70sec, 328.58 MiB/sec
228.88 MiB: 0.75sec, 303.90 MiB/sec
228.88 MiB: 0.79sec, 288.88 MiB/sec
228.88 MiB: 0.75sec, 304.38 MiB/sec
228.88 MiB: 0.73sec, 315.54 MiB/sec
228.88 MiB: 0.75sec, 303.72 MiB/sec
228.88 MiB: 0.72sec, 319.23 MiB/sec
228.88 MiB: 1.12sec, 204.12 MiB/sec
228.88 MiB: 0.77sec, 298.89 MiB/sec
228.88 MiB: 0.74sec, 307.82 MiB/sec
228.88 MiB: 0.78sec, 292.28 MiB/sec
228.88 MiB: 0.72sec, 318.10 MiB/sec
228.88 MiB: 0.85sec, 268.82 MiB/sec
228.88 MiB: 0.82sec, 279.50 MiB/sec
228.88 MiB: 0.74sec, 310.35 MiB/sec
228.88 MiB: 0.78sec, 294.62 MiB/sec
228.88 MiB: 0.77sec, 295.47 MiB/sec
228.88 MiB: 0.70sec, 327.29 MiB/sec
228.88 MiB: 0.78sec, 294.05 MiB/sec
228.88 MiB: 0.68sec, 335.36 MiB/sec
A: tls://10.6.20.175:45103 B: tls://10.6.31.53:39869 - performance from iperf3
B memory -> A memory
Connecting to host 10.6.20.175, port 5001
[  5] local 10.6.31.53 port 58252 connected to 10.6.20.175 port 5001
[ ID] Interval           Transfer     Bitrate         Retr  Cwnd
[  5]   0.00-1.00   sec   597 MBytes   597 MBytes/sec    0   1.89 MBytes       
[  5]   1.00-2.00   sec   592 MBytes   592 MBytes/sec    0   2.44 MBytes       
[  5]   2.00-3.00   sec   589 MBytes   589 MBytes/sec    0   2.85 MBytes       
[  5]   3.00-4.00   sec   588 MBytes   587 MBytes/sec    0   3.00 MBytes       
[  5]   4.00-5.00   sec   590 MBytes   590 MBytes/sec    0   3.00 MBytes       
[  5]   5.00-6.00   sec   580 MBytes   580 MBytes/sec   46   2.26 MBytes       
[  5]   6.00-7.00   sec   594 MBytes   594 MBytes/sec    0   2.45 MBytes       
[  5]   7.00-8.00   sec   590 MBytes   590 MBytes/sec    2   1.97 MBytes       
[  5]   8.00-9.00   sec   591 MBytes   591 MBytes/sec    0   2.13 MBytes       
[  5]   9.00-10.00  sec   591 MBytes   591 MBytes/sec    0   2.24 MBytes       
[  5]  10.00-11.00  sec   588 MBytes   588 MBytes/sec    0   2.29 MBytes       
[  5]  11.00-12.00  sec   585 MBytes   585 MBytes/sec   15   1.66 MBytes       
[  5]  12.00-13.00  sec   588 MBytes   587 MBytes/sec    0   1.94 MBytes       
[  5]  13.00-14.00  sec   590 MBytes   590 MBytes/sec    0   2.13 MBytes       
[  5]  14.00-15.00  sec   592 MBytes   592 MBytes/sec    0   2.23 MBytes       
[  5]  15.00-16.00  sec   592 MBytes   593 MBytes/sec    0   2.27 MBytes       
[  5]  16.00-17.00  sec   592 MBytes   593 MBytes/sec    0   2.30 MBytes       
[  5]  17.00-18.00  sec   592 MBytes   592 MBytes/sec    8   1.78 MBytes       
[  5]  18.00-19.00  sec   592 MBytes   592 MBytes/sec    0   2.07 MBytes       
[  5]  19.00-20.00  sec   592 MBytes   593 MBytes/sec    0   2.17 MBytes       
[  5]  20.00-21.00  sec   590 MBytes   590 MBytes/sec    0   2.24 MBytes       
[  5]  21.00-22.00  sec   594 MBytes   594 MBytes/sec    0   2.30 MBytes       
[  5]  22.00-23.00  sec   586 MBytes   586 MBytes/sec    0   2.30 MBytes       
[  5]  23.00-24.00  sec   592 MBytes   593 MBytes/sec    0   2.30 MBytes       
[  5]  24.00-25.00  sec   594 MBytes   594 MBytes/sec    0   2.31 MBytes       
[  5]  25.00-26.00  sec   581 MBytes   581 MBytes/sec    0   2.36 MBytes       
[  5]  26.00-27.00  sec   592 MBytes   592 MBytes/sec    0   2.39 MBytes       
[  5]  27.00-28.00  sec   592 MBytes   593 MBytes/sec    0   2.62 MBytes       
[  5]  28.00-29.00  sec   592 MBytes   592 MBytes/sec    0   2.73 MBytes       
[  5]  29.00-30.00  sec   594 MBytes   594 MBytes/sec    0   2.73 MBytes       
- - - - - - - - - - - - - - - - - - - - - - - - -
[ ID] Interval           Transfer     Bitrate         Retr
[  5]   0.00-30.00  sec  17.3 GBytes   591 MBytes/sec   71             sender
[  5]   0.00-30.00  sec  17.3 GBytes   590 MBytes/sec                  receiver

iperf Done.

B memory -> A disk
Connecting to host 10.6.20.175, port 5001
[  5] local 10.6.31.53 port 58258 connected to 10.6.20.175 port 5001
[ ID] Interval           Transfer     Bitrate         Retr  Cwnd
[  5]   0.00-1.00   sec  48.8 MBytes  48.8 MBytes/sec    0    288 KBytes       
[  5]   1.00-2.00   sec  45.3 MBytes  45.3 MBytes/sec    0    297 KBytes       
[  5]   2.00-3.00   sec  46.0 MBytes  46.0 MBytes/sec    0    297 KBytes       
[  5]   3.00-4.00   sec  46.7 MBytes  46.7 MBytes/sec    0    315 KBytes       
[  5]   4.00-5.00   sec  46.6 MBytes  46.6 MBytes/sec    0    350 KBytes       
[  5]   5.00-6.00   sec  45.0 MBytes  45.0 MBytes/sec    0    350 KBytes       
[  5]   6.00-7.00   sec  44.5 MBytes  44.5 MBytes/sec    0    350 KBytes       
[  5]   7.00-8.00   sec  45.5 MBytes  45.5 MBytes/sec    0    350 KBytes       
[  5]   8.00-9.00   sec  45.2 MBytes  45.2 MBytes/sec    0    350 KBytes       
[  5]   9.00-10.00  sec  45.5 MBytes  45.5 MBytes/sec    0    350 KBytes       
[  5]  10.00-11.00  sec  45.5 MBytes  45.5 MBytes/sec    0    350 KBytes       
[  5]  11.00-12.00  sec  42.7 MBytes  42.7 MBytes/sec    0    350 KBytes       
[  5]  12.00-13.00  sec  45.0 MBytes  45.0 MBytes/sec    0    350 KBytes       
[  5]  13.00-14.00  sec  43.0 MBytes  43.0 MBytes/sec    0    350 KBytes       
[  5]  14.00-15.00  sec  41.5 MBytes  41.5 MBytes/sec    0    350 KBytes       
[  5]  15.00-16.00  sec  41.0 MBytes  41.0 MBytes/sec    0    350 KBytes       
[  5]  16.00-17.00  sec  43.6 MBytes  43.5 MBytes/sec    0    350 KBytes       
[  5]  17.00-18.00  sec  43.6 MBytes  43.6 MBytes/sec    0    350 KBytes       
[  5]  18.00-19.00  sec  45.6 MBytes  45.6 MBytes/sec    0    350 KBytes       
[  5]  19.00-20.00  sec  46.1 MBytes  46.1 MBytes/sec    0    350 KBytes       
[  5]  20.00-21.00  sec  45.8 MBytes  45.8 MBytes/sec    0    350 KBytes       
[  5]  21.00-22.00  sec  42.5 MBytes  42.5 MBytes/sec    0    350 KBytes       
[  5]  22.00-23.00  sec  43.3 MBytes  43.3 MBytes/sec    0    367 KBytes       
[  5]  23.00-24.00  sec  42.1 MBytes  42.1 MBytes/sec    0    367 KBytes       
[  5]  24.00-25.00  sec  43.1 MBytes  43.1 MBytes/sec    0    367 KBytes       
[  5]  25.00-26.00  sec  43.9 MBytes  43.9 MBytes/sec    0    385 KBytes       
[  5]  26.00-27.00  sec  43.1 MBytes  43.1 MBytes/sec    0    385 KBytes       
[  5]  27.00-28.00  sec  42.1 MBytes  42.1 MBytes/sec    0    385 KBytes       
[  5]  28.00-29.00  sec  42.1 MBytes  42.1 MBytes/sec    0    385 KBytes       
[  5]  29.00-30.00  sec  43.6 MBytes  43.6 MBytes/sec    0    385 KBytes       
- - - - - - - - - - - - - - - - - - - - - - - - -
[ ID] Interval           Transfer     Bitrate         Retr
[  5]   0.00-30.00  sec  1.30 GBytes  44.3 MBytes/sec    0             sender
[  5]   0.00-30.00  sec  1.29 GBytes  44.2 MBytes/sec                  receiver

iperf Done.

A disk -> B disk
Connecting to host 10.6.31.53, port 5001
[  5] local 10.6.20.175 port 59674 connected to 10.6.31.53 port 5001
[ ID] Interval           Transfer     Bitrate         Retr  Cwnd
[  5]   0.00-1.00   sec  9.75 MBytes  9.75 MBytes/sec   11    271 KBytes       
[  5]   1.00-2.00   sec  4.84 MBytes  4.84 MBytes/sec   11    271 KBytes       
[  5]   2.00-3.00   sec  5.79 MBytes  5.79 MBytes/sec   12    271 KBytes       
[  5]   3.00-4.00   sec  5.20 MBytes  5.20 MBytes/sec   11    271 KBytes       
[  5]   4.00-5.00   sec  5.02 MBytes  5.02 MBytes/sec    9    271 KBytes       
[  5]   5.00-6.00   sec  5.14 MBytes  5.14 MBytes/sec   11    271 KBytes       
[  5]   6.00-7.00   sec  4.48 MBytes  4.48 MBytes/sec    9    271 KBytes       
[  5]   7.00-8.00   sec  6.27 MBytes  6.27 MBytes/sec   13    271 KBytes       
[  5]   8.00-9.00   sec  7.29 MBytes  7.29 MBytes/sec   12    271 KBytes       
[  5]   9.00-10.00  sec  5.44 MBytes  5.44 MBytes/sec   12    271 KBytes       
[  5]  10.00-11.00  sec  5.91 MBytes  5.91 MBytes/sec   11    271 KBytes       
[  5]  11.00-12.00  sec  5.32 MBytes  5.32 MBytes/sec   11    271 KBytes       
[  5]  12.00-13.00  sec  5.14 MBytes  5.14 MBytes/sec   10    271 KBytes       
[  5]  13.00-14.00  sec  5.79 MBytes  5.79 MBytes/sec   12    271 KBytes       
[  5]  14.00-15.00  sec  5.26 MBytes  5.25 MBytes/sec   10    271 KBytes       
[  5]  15.00-16.00  sec  5.62 MBytes  5.62 MBytes/sec   13    271 KBytes       
[  5]  16.00-17.00  sec  5.50 MBytes  5.50 MBytes/sec   11    271 KBytes       
[  5]  17.00-18.00  sec  4.84 MBytes  4.84 MBytes/sec   10    271 KBytes       
[  5]  18.00-19.00  sec  5.14 MBytes  5.14 MBytes/sec   10    271 KBytes       
[  5]  19.00-20.00  sec  5.50 MBytes  5.50 MBytes/sec   10    271 KBytes       
[  5]  20.00-21.00  sec  4.66 MBytes  4.66 MBytes/sec   10    271 KBytes       
[  5]  21.00-22.00  sec  5.38 MBytes  5.37 MBytes/sec   10    271 KBytes       
[  5]  22.00-23.00  sec  5.38 MBytes  5.38 MBytes/sec   12    271 KBytes       
[  5]  23.00-24.00  sec  5.74 MBytes  5.74 MBytes/sec   11    271 KBytes       
[  5]  24.00-25.00  sec  4.00 MBytes  4.00 MBytes/sec    8    271 KBytes       
[  5]  25.00-26.00  sec  4.84 MBytes  4.84 MBytes/sec   10    271 KBytes       
[  5]  26.00-27.00  sec  5.02 MBytes  5.02 MBytes/sec   10    271 KBytes       
[  5]  27.00-28.00  sec  4.90 MBytes  4.90 MBytes/sec    9    271 KBytes       
[  5]  28.00-29.00  sec  5.74 MBytes  5.73 MBytes/sec   11    271 KBytes       
[  5]  29.00-30.00  sec  4.96 MBytes  4.96 MBytes/sec   11    271 KBytes       
- - - - - - - - - - - - - - - - - - - - - - - - -
[ ID] Interval           Transfer     Bitrate         Retr
[  5]   0.00-30.00  sec   164 MBytes  5.46 MBytes/sec  321             sender
        Sent  164 MByte / 1.29 GByte (12%) of iperf_out
[  5]   0.00-30.03  sec   162 MBytes  5.38 MBytes/sec                  receiver

iperf Done.

Locally on my mac:

  • task depenencies: ~1.7 GiB/sec
  • comms hander: ~2.4 GiB/sec
  • raw TCP: 7.5 GiB/sec
Full results
(env) gabe dask-playground/shuffle-service » python network.py
/Users/gabe/dev/dask-playground/env/lib/python3.9/site-packages/pandas/compat/__init__.py:124: UserWarning: Could not import the lzma module. Your installed Python is incomplete. Attempting to use lzma compression will result in a RuntimeError.
  warnings.warn(msg)
/Users/gabe/dev/dask-playground/env/lib/python3.9/site-packages/setuptools/distutils_patch.py:25: UserWarning: Distutils was imported before Setuptools. This usage is discouraged and may exhibit undesirable behaviors or errors. Please use Setuptools' objects directly or at least import Setuptools first.
  warnings.warn(
send: tcp://127.0.0.1:57650 recv: tcp://127.0.0.1:57652 - performance over task `get_data`
228.88 MiB: 0.22sec, 1.02 GiB/sec
228.88 MiB: 0.13sec, 1.74 GiB/sec
228.88 MiB: 0.15sec, 1.53 GiB/sec
228.88 MiB: 0.12sec, 1.89 GiB/sec
228.88 MiB: 0.12sec, 1.86 GiB/sec
228.88 MiB: 0.12sec, 1.81 GiB/sec
228.88 MiB: 0.12sec, 1.82 GiB/sec
228.88 MiB: 0.12sec, 1.83 GiB/sec
228.88 MiB: 0.15sec, 1.51 GiB/sec
228.88 MiB: 0.13sec, 1.76 GiB/sec
228.88 MiB: 0.12sec, 1.91 GiB/sec
228.88 MiB: 0.11sec, 1.98 GiB/sec
228.88 MiB: 0.12sec, 1.83 GiB/sec
228.88 MiB: 0.12sec, 1.87 GiB/sec
228.88 MiB: 0.14sec, 1.62 GiB/sec
send: tcp://127.0.0.1:57650 recv: tcp://127.0.0.1:57652 - performance over comms handler
228.88 MiB: 0.17sec, 1.32 GiB/sec
228.88 MiB: 0.10sec, 2.33 GiB/sec
228.88 MiB: 0.10sec, 2.35 GiB/sec
228.88 MiB: 0.10sec, 2.35 GiB/sec
228.88 MiB: 0.09sec, 2.36 GiB/sec
228.88 MiB: 0.09sec, 2.39 GiB/sec
228.88 MiB: 0.10sec, 2.30 GiB/sec
228.88 MiB: 0.10sec, 2.28 GiB/sec
228.88 MiB: 0.09sec, 2.37 GiB/sec
228.88 MiB: 0.09sec, 2.37 GiB/sec
228.88 MiB: 0.09sec, 2.35 GiB/sec
228.88 MiB: 0.10sec, 2.35 GiB/sec
228.88 MiB: 0.10sec, 2.31 GiB/sec
228.88 MiB: 0.09sec, 2.37 GiB/sec
228.88 MiB: 0.10sec, 2.28 GiB/sec
228.88 MiB: 0.09sec, 2.38 GiB/sec
228.88 MiB: 0.10sec, 2.35 GiB/sec
228.88 MiB: 0.10sec, 2.29 GiB/sec
228.88 MiB: 0.09sec, 2.37 GiB/sec
228.88 MiB: 0.10sec, 2.32 GiB/sec
228.88 MiB: 0.10sec, 2.13 GiB/sec
228.88 MiB: 0.10sec, 2.18 GiB/sec
228.88 MiB: 0.10sec, 2.32 GiB/sec
228.88 MiB: 0.09sec, 2.38 GiB/sec
228.88 MiB: 0.10sec, 2.25 GiB/sec
228.88 MiB: 0.09sec, 2.38 GiB/sec
228.88 MiB: 0.09sec, 2.37 GiB/sec
228.88 MiB: 0.10sec, 2.28 GiB/sec
228.88 MiB: 0.10sec, 2.34 GiB/sec
228.88 MiB: 0.09sec, 2.41 GiB/sec
228.88 MiB: 0.09sec, 2.36 GiB/sec
228.88 MiB: 0.09sec, 2.36 GiB/sec
228.88 MiB: 0.10sec, 2.31 GiB/sec
228.88 MiB: 0.10sec, 2.33 GiB/sec
228.88 MiB: 0.09sec, 2.39 GiB/sec
228.88 MiB: 0.09sec, 2.40 GiB/sec
228.88 MiB: 0.10sec, 2.35 GiB/sec
228.88 MiB: 0.10sec, 2.27 GiB/sec
228.88 MiB: 0.11sec, 2.11 GiB/sec
228.88 MiB: 0.11sec, 2.01 GiB/sec
228.88 MiB: 0.11sec, 2.09 GiB/sec
228.88 MiB: 0.11sec, 2.04 GiB/sec
228.88 MiB: 0.10sec, 2.24 GiB/sec
228.88 MiB: 0.10sec, 2.28 GiB/sec
228.88 MiB: 0.10sec, 2.34 GiB/sec
228.88 MiB: 0.10sec, 2.34 GiB/sec
228.88 MiB: 0.10sec, 2.27 GiB/sec
228.88 MiB: 0.10sec, 2.16 GiB/sec
228.88 MiB: 0.10sec, 2.19 GiB/sec
228.88 MiB: 0.10sec, 2.24 GiB/sec
228.88 MiB: 0.10sec, 2.22 GiB/sec
228.88 MiB: 0.10sec, 2.22 GiB/sec
228.88 MiB: 0.10sec, 2.19 GiB/sec
228.88 MiB: 0.10sec, 2.24 GiB/sec
228.88 MiB: 0.09sec, 2.36 GiB/sec
228.88 MiB: 0.09sec, 2.35 GiB/sec
228.88 MiB: 0.10sec, 2.25 GiB/sec
228.88 MiB: 0.10sec, 2.16 GiB/sec
228.88 MiB: 0.12sec, 1.91 GiB/sec
228.88 MiB: 0.10sec, 2.26 GiB/sec
228.88 MiB: 0.10sec, 2.27 GiB/sec
228.88 MiB: 0.11sec, 2.02 GiB/sec
228.88 MiB: 0.15sec, 1.48 GiB/sec
228.88 MiB: 0.10sec, 2.25 GiB/sec
228.88 MiB: 0.10sec, 2.16 GiB/sec
228.88 MiB: 0.10sec, 2.23 GiB/sec
228.88 MiB: 0.10sec, 2.15 GiB/sec
228.88 MiB: 0.11sec, 1.98 GiB/sec
228.88 MiB: 0.10sec, 2.20 GiB/sec
228.88 MiB: 0.10sec, 2.15 GiB/sec
228.88 MiB: 0.10sec, 2.25 GiB/sec
228.88 MiB: 0.10sec, 2.26 GiB/sec
228.88 MiB: 0.10sec, 2.28 GiB/sec
228.88 MiB: 0.09sec, 2.37 GiB/sec
228.88 MiB: 0.09sec, 2.37 GiB/sec
228.88 MiB: 0.10sec, 2.29 GiB/sec
228.88 MiB: 0.11sec, 2.00 GiB/sec
228.88 MiB: 0.10sec, 2.29 GiB/sec
228.88 MiB: 0.10sec, 2.19 GiB/sec
228.88 MiB: 0.11sec, 2.09 GiB/sec
228.88 MiB: 0.10sec, 2.23 GiB/sec
228.88 MiB: 0.13sec, 1.72 GiB/sec
228.88 MiB: 0.11sec, 2.10 GiB/sec
228.88 MiB: 0.09sec, 2.37 GiB/sec
228.88 MiB: 0.09sec, 2.36 GiB/sec
228.88 MiB: 0.09sec, 2.36 GiB/sec
228.88 MiB: 0.10sec, 2.30 GiB/sec
228.88 MiB: 0.10sec, 2.13 GiB/sec
228.88 MiB: 0.10sec, 2.35 GiB/sec
228.88 MiB: 0.09sec, 2.36 GiB/sec
228.88 MiB: 0.09sec, 2.36 GiB/sec
228.88 MiB: 0.10sec, 2.16 GiB/sec
228.88 MiB: 0.12sec, 1.81 GiB/sec
228.88 MiB: 0.10sec, 2.32 GiB/sec
228.88 MiB: 0.09sec, 2.38 GiB/sec
228.88 MiB: 0.09sec, 2.38 GiB/sec
228.88 MiB: 0.10sec, 2.14 GiB/sec
228.88 MiB: 0.10sec, 2.21 GiB/sec
228.88 MiB: 0.10sec, 2.35 GiB/sec
228.88 MiB: 0.10sec, 2.32 GiB/sec
228.88 MiB: 0.10sec, 2.31 GiB/sec
228.88 MiB: 0.10sec, 2.21 GiB/sec
228.88 MiB: 0.12sec, 1.81 GiB/sec
228.88 MiB: 0.10sec, 2.32 GiB/sec
228.88 MiB: 0.09sec, 2.37 GiB/sec
228.88 MiB: 0.10sec, 2.32 GiB/sec
228.88 MiB: 0.10sec, 2.32 GiB/sec
228.88 MiB: 0.10sec, 2.23 GiB/sec
228.88 MiB: 0.09sec, 2.36 GiB/sec
228.88 MiB: 0.10sec, 2.34 GiB/sec
228.88 MiB: 0.10sec, 2.32 GiB/sec
228.88 MiB: 0.10sec, 2.32 GiB/sec
228.88 MiB: 0.10sec, 2.29 GiB/sec
228.88 MiB: 0.12sec, 1.92 GiB/sec
228.88 MiB: 0.10sec, 2.30 GiB/sec
228.88 MiB: 0.10sec, 2.35 GiB/sec
228.88 MiB: 0.10sec, 2.31 GiB/sec
228.88 MiB: 0.10sec, 2.30 GiB/sec
228.88 MiB: 0.10sec, 2.34 GiB/sec
228.88 MiB: 0.10sec, 2.35 GiB/sec
228.88 MiB: 0.09sec, 2.39 GiB/sec
228.88 MiB: 0.09sec, 2.36 GiB/sec
228.88 MiB: 0.10sec, 2.22 GiB/sec
228.88 MiB: 0.13sec, 1.77 GiB/sec
228.88 MiB: 0.10sec, 2.26 GiB/sec
228.88 MiB: 0.10sec, 2.34 GiB/sec
228.88 MiB: 0.10sec, 2.30 GiB/sec
228.88 MiB: 0.10sec, 2.23 GiB/sec
228.88 MiB: 0.10sec, 2.35 GiB/sec
228.88 MiB: 0.10sec, 2.34 GiB/sec
228.88 MiB: 0.10sec, 2.35 GiB/sec
228.88 MiB: 0.10sec, 2.35 GiB/sec
228.88 MiB: 0.10sec, 2.31 GiB/sec
228.88 MiB: 0.09sec, 2.40 GiB/sec
228.88 MiB: 0.12sec, 1.92 GiB/sec
228.88 MiB: 0.09sec, 2.36 GiB/sec
228.88 MiB: 0.09sec, 2.37 GiB/sec
228.88 MiB: 0.10sec, 2.31 GiB/sec
228.88 MiB: 0.09sec, 2.36 GiB/sec
228.88 MiB: 0.09sec, 2.39 GiB/sec
228.88 MiB: 0.09sec, 2.38 GiB/sec
228.88 MiB: 0.10sec, 2.35 GiB/sec
228.88 MiB: 0.10sec, 2.28 GiB/sec
228.88 MiB: 0.10sec, 2.30 GiB/sec
228.88 MiB: 0.12sec, 1.87 GiB/sec
228.88 MiB: 0.10sec, 2.32 GiB/sec
228.88 MiB: 0.10sec, 2.35 GiB/sec
228.88 MiB: 0.10sec, 2.26 GiB/sec
228.88 MiB: 0.09sec, 2.36 GiB/sec
228.88 MiB: 0.09sec, 2.40 GiB/sec
228.88 MiB: 0.09sec, 2.37 GiB/sec
228.88 MiB: 0.09sec, 2.36 GiB/sec
228.88 MiB: 0.10sec, 2.32 GiB/sec
228.88 MiB: 0.10sec, 2.34 GiB/sec
228.88 MiB: 0.09sec, 2.36 GiB/sec
228.88 MiB: 0.09sec, 2.37 GiB/sec
228.88 MiB: 0.09sec, 2.36 GiB/sec
228.88 MiB: 0.10sec, 2.30 GiB/sec
228.88 MiB: 0.10sec, 2.29 GiB/sec
228.88 MiB: 0.10sec, 2.34 GiB/sec
228.88 MiB: 0.09sec, 2.39 GiB/sec
228.88 MiB: 0.09sec, 2.38 GiB/sec
228.88 MiB: 0.10sec, 2.35 GiB/sec
228.88 MiB: 0.10sec, 2.29 GiB/sec
228.88 MiB: 0.09sec, 2.36 GiB/sec
A: tcp://127.0.0.1:57650 B: tcp://127.0.0.1:57652 - performance from iperf3
B memory -> A memory
Connecting to host 127.0.0.1, port 5001
[  5] local 127.0.0.1 port 57689 connected to 127.0.0.1 port 5001
[ ID] Interval           Transfer     Bitrate
[  5]   0.00-1.00   sec  7.39 GBytes  7566 MBytes/sec                  
[  5]   1.00-2.00   sec  6.35 GBytes  6507 MBytes/sec                  
[  5]   2.00-3.00   sec  7.18 GBytes  7355 MBytes/sec                  
[  5]   3.00-4.00   sec  7.30 GBytes  7480 MBytes/sec                  
[  5]   4.00-5.00   sec  6.87 GBytes  7039 MBytes/sec                  
[  5]   5.00-6.00   sec  7.92 GBytes  8112 MBytes/sec                  
[  5]   6.00-7.00   sec  7.86 GBytes  8054 MBytes/sec                  
[  5]   7.00-8.00   sec  7.88 GBytes  8065 MBytes/sec                  
[  5]   8.00-9.00   sec  7.61 GBytes  7795 MBytes/sec                  
[  5]   9.00-10.00  sec  6.83 GBytes  6996 MBytes/sec                  
[  5]  10.00-11.00  sec  7.15 GBytes  7324 MBytes/sec                  
[  5]  11.00-12.00  sec  7.79 GBytes  7974 MBytes/sec                  
[  5]  12.00-13.00  sec  7.80 GBytes  7989 MBytes/sec                  
[  5]  13.00-14.00  sec  7.84 GBytes  8026 MBytes/sec                  
[  5]  14.00-15.00  sec  7.86 GBytes  8044 MBytes/sec                  
[  5]  15.00-16.00  sec  7.79 GBytes  7979 MBytes/sec                  
[  5]  16.00-17.00  sec  7.92 GBytes  8110 MBytes/sec                  
[  5]  17.00-18.00  sec  6.76 GBytes  6921 MBytes/sec                  
[  5]  18.00-19.00  sec  7.45 GBytes  7627 MBytes/sec                  
[  5]  19.00-20.00  sec  7.38 GBytes  7558 MBytes/sec                  
[  5]  20.00-21.00  sec  7.52 GBytes  7699 MBytes/sec                  
[  5]  21.00-22.00  sec  7.22 GBytes  7397 MBytes/sec                  
[  5]  22.00-23.00  sec  7.26 GBytes  7432 MBytes/sec                  
[  5]  23.00-24.00  sec  7.00 GBytes  7170 MBytes/sec                  
[  5]  24.00-25.00  sec  7.19 GBytes  7362 MBytes/sec                  
[  5]  25.00-26.00  sec  6.80 GBytes  6961 MBytes/sec                  
[  5]  26.00-27.00  sec  6.91 GBytes  7072 MBytes/sec                  
[  5]  27.00-28.00  sec  7.22 GBytes  7390 MBytes/sec                  
[  5]  28.00-29.00  sec  7.41 GBytes  7591 MBytes/sec                  
[  5]  29.00-30.00  sec  7.31 GBytes  7484 MBytes/sec                  
- - - - - - - - - - - - - - - - - - - - - - - - -
[ ID] Interval           Transfer     Bitrate
[  5]   0.00-30.00  sec   221 GBytes  7536 MBytes/sec                  sender
[  5]   0.00-30.00  sec   221 GBytes  7536 MBytes/sec                  receiver

iperf Done.

Dask lags further behind a very fast network, but it's still much faster than a slow network! Does it need to be faster? How often are we actually bandwidth-constrained (versus being constrained by event loop/GIL/worker threads/things that can do something useful with that bandwidth)?

Metadata

Metadata

Assignees

No one assigned

    Labels

    discussionDiscussing a topic with no specific actions yetnetworking

    Type

    No type

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions