diff --git a/.gitignore b/.gitignore
index eaf84826..a3fa088f 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1,12 +1,22 @@
-src/questdb/ingress.html
 src/questdb/ingress.c
+src/questdb/*.html
 rustup-init.exe
 
+# Linux Perf profiles
+perf.data*
+perf/*.svg
+
+# Atheris Crash/OOM and other files
+fuzz-artifact/
+
 # Byte-compiled / optimized / DLL files
 __pycache__/
 *.py[cod]
 *$py.class
 
+# Parquet files generated as part of example runs
+*.parquet
+
 # C extensions
 *.so
 
diff --git a/.vscode/settings.json b/.vscode/settings.json
index a7d0fc7b..c710dcde 100644
--- a/.vscode/settings.json
+++ b/.vscode/settings.json
@@ -1,3 +1,7 @@
 {
-    "esbonio.sphinx.confDir": ""
+    "esbonio.sphinx.confDir": "",
+    "cmake.configureOnOpen": false,
+    "files.associations": {
+        "ingress_helper.h": "c"
+    }
 }
\ No newline at end of file
diff --git a/CHANGELOG.rst b/CHANGELOG.rst
index 1015d247..0fe8b7a3 100644
--- a/CHANGELOG.rst
+++ b/CHANGELOG.rst
@@ -2,9 +2,47 @@
 Changelog
 =========
 
+1.1.0 (2023-01-04)
+------------------
+
+Features
+~~~~~~~~
+
+* High-performance ingestion of `Pandas <https://pandas.pydata.org/>`_
+  dataframes into QuestDB via ILP.
+  We now support most Pandas column types. The logic is implemented in native
+  code and is orders of magnitude faster than iterating the dataframe
+  in Python and calling the ``Buffer.row()`` or ``Sender.row()`` methods: The
+  ``Buffer`` can be written from Pandas at hundreds of MiB/s per CPU core.
+  The new ``dataframe()`` method continues working with the ``auto_flush``
+  feature.
+  See API documentation and examples for the new ``dataframe()`` method
+  available on both the ``Sender`` and ``Buffer`` classes.
+
+* New ``TimestampNanos.now()`` and ``TimestampMicros.now()`` methods.
+  *These are the new recommended way of getting the current timestamp.*
+
+* The Python GIL is now released during calls to ``Sender.flush()`` and when
+  ``auto_flush`` is triggered. This should improve throughput when using the
+  ``Sender`` from multiple threads.
+
+Errata
+~~~~~~
+
+* In previous releases the documentation for the ``from_datetime()`` methods of
+  the ``TimestampNanos`` and ``TimestampMicros`` types recommended calling
+  ``datetime.datetime.utcnow()`` to get the current timestamp. This is incorrect
+  as it will (confusinly) return object with the local timezone instead of UTC.
+  This documentation has been corrected and now recommends calling
+  ``datetime.datetime.now(tz=datetime.timezone.utc)`` or (more efficiently) the
+  new ``TimestampNanos.now()`` and ``TimestampMicros.now()`` methods.
+
 1.0.2 (2022-10-31)
 ------------------
 
+Features
+~~~~~~~~
+
 * Support for Python 3.11.
 * Updated to version 2.1.1 of the ``c-questdb-client`` library:
 
@@ -14,13 +52,20 @@ Changelog
 1.0.1 (2022-08-16)
 ------------------
 
+Features
+~~~~~~~~
+
+* As a matter of convenience, the ``Buffer.row`` method can now take ``None`` column
+  values. This has the same semantics as skipping the column altogether.
+  Closes `#3 <https://github.com/questdb/py-questdb-client/issues/3>`_.
+
+Bugfixes
+~~~~~~~~
+
 * Fixed a major bug where Python ``int`` and ``float`` types were handled with
   32-bit instead of 64-bit precision. This caused certain ``int`` values to be
   rejected and other ``float`` values to be rounded incorrectly.
   Closes `#13 <https://github.com/questdb/py-questdb-client/issues/13>`_.
-* As a matter of convenience, the ``Buffer.row`` method can now take ``None`` column
-  values. This has the same semantics as skipping the column altogether.
-  Closes `#3 <https://github.com/questdb/py-questdb-client/issues/3>`_.
 * Fixed a minor bug where an error auto-flush caused a second clean-up error.
   Closes `#4 <https://github.com/questdb/py-questdb-client/issues/4>`_.
 
@@ -28,6 +73,9 @@ Changelog
 1.0.0 (2022-07-15)
 ------------------
 
+Features
+~~~~~~~~
+
 * First stable release.
 * Insert data into QuestDB via ILP.
 * Sender and Buffer APIs.
@@ -38,6 +86,9 @@ Changelog
 0.0.3 (2022-07-14)
 ------------------
 
+Features
+~~~~~~~~
+
 * Initial set of features to connect to the database.
 * ``Buffer`` and ``Sender`` classes.
 * First release where ``pip install questdb`` should work.
@@ -46,4 +97,7 @@ Changelog
 0.0.1 (2022-07-08)
 ------------------
 
+Features
+~~~~~~~~
+
 * First release on PyPI.
diff --git a/README.rst b/README.rst
index ae0e4947..a5dc9469 100644
--- a/README.rst
+++ b/README.rst
@@ -34,6 +34,22 @@ The latest version of the library is 1.0.2.
             columns={'temperature': 20.0, 'humidity': 0.5})
         sender.flush()
 
+You can also send Pandas dataframes:
+
+.. code-block:: python
+
+    import pandas as pd
+    from questdb.ingress import Sender
+
+    df = pd.DataFrame({
+        'id': pd.Categorical(['toronto1', 'paris3']),
+        'temperature': [20.0, 21.0],
+        'humidity': [0.5, 0.6],
+        'timestamp': pd.to_datetime(['2021-01-01', '2021-01-02'])'})
+
+    with Sender('localhost', 9009) as sender:
+        sender.dataframe(df, table_name='sensors')
+
 
 Docs
 ====
diff --git a/TODO.rst b/TODO.rst
index 7bb28af2..cb07d6f6 100644
--- a/TODO.rst
+++ b/TODO.rst
@@ -6,8 +6,6 @@ TODO
 Build Tooling
 =============
 
-* **[HIGH]** Transition to Azure, move Linux arm to ARM pipeline without QEMU.
-
 * **[MEDIUM]** Automate Apple Silicon as part of CI.
 
 * **[LOW]** Release to PyPI from CI.
@@ -19,13 +17,3 @@ Docs
 * **[MEDIUM]** Examples should be tested as part of the unit tests (as they
   are in the C client). This is to ensure they don't "bit rot" as the code
   changes.
-
-* **[MEDIUM]** Document on a per-version basis.
-
-Development
-===========
-
-* **[HIGH]** Implement ``tabular()`` API in the buffer.
-
-* **[MEDIUM]** Implement ``pandas()`` API in the buffer.
-  *This can probably wait for a future release.*
\ No newline at end of file
diff --git a/c-questdb-client b/c-questdb-client
index 2cd4e7fb..ad3776ef 160000
--- a/c-questdb-client
+++ b/c-questdb-client
@@ -1 +1 @@
-Subproject commit 2cd4e7fb3ad10b20a7de28527cdf18cf240b9634
+Subproject commit ad3776efb057d09a86a83e15c0f39ae40d75485b
diff --git a/ci/cibuildwheel.yaml b/ci/cibuildwheel.yaml
index 1f612720..d6fa6c32 100644
--- a/ci/cibuildwheel.yaml
+++ b/ci/cibuildwheel.yaml
@@ -68,7 +68,7 @@ stages:
           - bash: |
               set -o errexit
               python3 -m pip install --upgrade pip
-              pip3 install cibuildwheel==2.11.1
+              python3 -m pip install cibuildwheel==2.11.2
             displayName: Install dependencies
           - bash: cibuildwheel --output-dir wheelhouse .
             displayName: Build wheels
@@ -83,7 +83,7 @@ stages:
           - bash: |
               set -o errexit
               python3 -m pip install --upgrade pip
-              pip3 install cibuildwheel==2.11.1
+              python3 -m pip install cibuildwheel==2.11.2
             displayName: Install dependencies
           - bash: cibuildwheel --output-dir wheelhouse .
             displayName: Build wheels
@@ -100,7 +100,7 @@ stages:
           - bash: |
               set -o errexit
               python3 -m pip install --upgrade pip
-              pip3 install cibuildwheel==2.11.1
+              python3 -m pip install cibuildwheel==2.11.2
             displayName: Install dependencies
           - bash: cibuildwheel --output-dir wheelhouse .
             displayName: Build wheels
@@ -117,7 +117,7 @@ stages:
           - bash: |
               set -o errexit
               python3 -m pip install --upgrade pip
-              pip3 install cibuildwheel==2.11.1
+              python3 -m pip install cibuildwheel==2.11.2
             displayName: Install dependencies
           - bash: cibuildwheel --output-dir wheelhouse .
             displayName: Build wheels
@@ -134,7 +134,7 @@ stages:
           - bash: |
               set -o errexit
               python3 -m pip install --upgrade pip
-              pip3 install cibuildwheel==2.11.1
+              python3 -m pip install cibuildwheel==2.11.2
             displayName: Install dependencies
           - bash: cibuildwheel --output-dir wheelhouse .
             displayName: Build wheels
@@ -151,7 +151,7 @@ stages:
           - bash: |
               set -o errexit
               python3 -m pip install --upgrade pip
-              python3 -m pip install cibuildwheel==2.11.1
+              python3 -m pip install cibuildwheel==2.11.2
             displayName: Install dependencies
           - bash: cibuildwheel --output-dir wheelhouse .
             displayName: Build wheels
@@ -165,8 +165,8 @@ stages:
           - task: UsePythonVersion@0
           - bash: |
               set -o errexit
-              python -m pip install --upgrade pip
-              pip install cibuildwheel==2.11.1
+              python3 -m pip install --upgrade pip
+              python3 -m pip install cibuildwheel==2.11.2
             displayName: Install dependencies
           - bash: cibuildwheel --output-dir wheelhouse .
             displayName: Build wheels
diff --git a/ci/pip_install_deps.py b/ci/pip_install_deps.py
new file mode 100644
index 00000000..13e9f247
--- /dev/null
+++ b/ci/pip_install_deps.py
@@ -0,0 +1,74 @@
+import sys
+import subprocess
+import shlex
+import textwrap
+import platform
+
+
+class UnsupportedDependency(Exception):
+    pass
+
+
+def pip_install(package):
+    args = [
+        sys.executable,
+        '-m', 'pip', 'install',
+        '--upgrade',
+        '--only-binary', ':all:',
+        package]
+    args_s = ' '.join(shlex.quote(arg) for arg in args)
+    sys.stderr.write(args_s + '\n')
+    res = subprocess.run(
+        args,
+        stderr=subprocess.STDOUT,
+        stdout=subprocess.PIPE)
+    if res.returncode == 0:
+        return
+    output = res.stdout.decode('utf-8')
+    if 'Could not find a version that satisfies the requirement' in output:
+        raise UnsupportedDependency(output)
+    else:
+        sys.stderr.write(output + '\n')
+        sys.exit(res.returncode)
+
+
+def try_pip_install(package):
+    try:
+        pip_install(package)
+    except UnsupportedDependency as e:
+        msg = textwrap.indent(str(e), ' ' * 8)
+        sys.stderr.write(f'    Ignored unsatisfiable dependency:\n{msg}\n')
+
+
+def ensure_timezone():
+    try:
+        import zoneinfo
+        if platform.system() == 'Windows':
+            pip_install('tzdata')  # for zoneinfo
+    except ImportError:
+        pip_install('pytz')
+
+
+def main():
+    ensure_timezone()
+    try_pip_install('fastparquet>=2022.12.0')
+    try_pip_install('pandas')
+    try_pip_install('numpy')
+    try_pip_install('pyarrow')
+
+    on_linux_is_glibc = (
+        (not platform.system() == 'Linux') or
+        (platform.libc_ver()[0] == 'glibc'))
+    is_64bits = sys.maxsize > 2**32
+    is_cpython = platform.python_implementation() == 'CPython'
+    if on_linux_is_glibc and is_64bits and is_cpython:
+        # Ensure that we've managed to install the expected dependencies.
+        import pandas
+        import numpy
+        import pyarrow
+        if sys.version_info >= (3, 8):
+            import fastparquet
+
+
+if __name__ == "__main__":
+    main()
diff --git a/ci/run_tests_pipeline.yaml b/ci/run_tests_pipeline.yaml
index 78834b50..84f62c7e 100644
--- a/ci/run_tests_pipeline.yaml
+++ b/ci/run_tests_pipeline.yaml
@@ -28,7 +28,9 @@ stages:
             submodules: true
           - task: UsePythonVersion@0
           - script: python3 --version
-          - script: python3 -m pip install cython
+          - script: |
+              python3 -m pip install cython
+              python3 ci/pip_install_deps.py
             displayName: Installing Python dependencies
           - script: python3 proj.py build
             displayName: "Build"
diff --git a/dev_requirements.txt b/dev_requirements.txt
index a5e835a7..c639c014 100644
--- a/dev_requirements.txt
+++ b/dev_requirements.txt
@@ -1,8 +1,12 @@
 setuptools>=45.2.0
 Cython>=0.29.32
 wheel>=0.34.2
-cibuildwheel>=2.11.1
+cibuildwheel>=2.11.2
 Sphinx>=5.0.2
 sphinx-rtd-theme>=1.0.0
 twine>=4.0.1
 bump2version>=1.0.1
+pandas>=1.3.5
+numpy>=1.21.6
+pyarrow>=10.0.1
+fastparquet>=2022.12.0
diff --git a/docs/examples.rst b/docs/examples.rst
index a1a4409e..b9e01081 100644
--- a/docs/examples.rst
+++ b/docs/examples.rst
@@ -5,6 +5,9 @@ Examples
 Basics
 ======
 
+Row-by-row Insertion
+--------------------
+
 The following example connects to the database and sends two rows (lines).
 
 The connection is unauthenticated and the data is sent at the end of the
@@ -18,7 +21,7 @@ Here the :class:`questdb.ingress.Sender` is constructed with just ``host`` and
 
 
 Authentication and TLS
-======================
+----------------------
 
 Continuing from the previous example, the connection is authenticated
 and also uses TLS.
@@ -31,7 +34,7 @@ and ``tls`` arguments.
 
 
 Explicit Buffers
-================
+----------------
 
 For more advanced use cases where the same messages need to be sent to multiple
 questdb instances or you want to decouple serialization and sending (as may be
@@ -48,7 +51,7 @@ all data is sent.
 
 
 Ticking Random Data and Timer-based Flush
-=========================================
+-----------------------------------------
 
 The following example somewhat mimics the behavior of a loop in an application.
 
@@ -57,3 +60,57 @@ based on a timer if the auto-flushing logic was not triggered recently.
 
 .. literalinclude:: ../examples/random_data.py
    :language: python
+
+
+Data Frames
+===========
+
+Pandas Basics
+-------------
+
+The following example shows how to insert data from a Pandas DataFrame to the
+``'trades'`` table.
+
+.. literalinclude:: ../examples/pandas_basic.py
+   :language: python
+
+For details on all options, see the
+:func:`questdb.ingress.Buffer.dataframe` method.
+
+
+``pd.Categorical`` and multiple tables
+--------------------------------------
+
+The next example shows some more advanced features inserting data from Pandas.
+
+* The data is sent to multiple tables.
+
+* It uses the ``pd.Categorical`` type to determine the table to insert and also
+  uses it for the sensor name.
+
+* Columns of type ``pd.Categorical`` are sent as ``SYMBOL`` types.
+
+* The ``at`` parameter is specified using a column index: -1 is the last column.
+
+.. literalinclude:: ../examples/pandas_advanced.py
+   :language: python
+
+After running this example, the rows will be split across the ``'humidity'``,
+``'temp_c'`` and ``'voc_index'`` tables.
+
+For details on all options, see the
+:func:`questdb.ingress.Buffer.dataframe` method.
+
+Loading Pandas from a Parquet File
+----------------------------------
+
+The following example shows how to load a Pandas DataFrame from a Parquet file.
+
+The example also relies on the dataframe's index name to determine the table
+name.
+
+.. literalinclude:: ../examples/pandas_parquet.py
+   :language: python
+
+For details on all options, see the
+:func:`questdb.ingress.Buffer.dataframe` method.
diff --git a/docs/installation.rst b/docs/installation.rst
index 0d926ffe..7f450b3b 100644
--- a/docs/installation.rst
+++ b/docs/installation.rst
@@ -5,9 +5,9 @@ Installation
 The Python QuestDB client does not have any additional run-time dependencies and
 will run on any version of Python >= 3.7 on most platforms and architectures.
 
-You can install it globally by running::
+You can install it (or update it) globally by running::
 
-    python3 -m pip install questdb
+    python3 -m pip install -U questdb
 
 
 Or, from within a virtual environment::
@@ -20,6 +20,15 @@ If you're using poetry, you can add ``questdb`` as a dependency::
     poetry add questdb
 
 
+Note that the :func:`questdb.ingress.Buffer.dataframe` and the
+:func:`questdb.ingress.Sender.dataframe` methods also require the following
+dependencies to be installed:
+
+* ``pandas``
+* ``pyarrow``
+* ``numpy``
+
+
 Verifying the Installation
 ==========================
 
@@ -34,3 +43,16 @@ following statements from a ``python3`` interactive shell:
     <questdb.ingress.Buffer object at 0x104b68240>
     >>> str(buf)
     'test,a=b\n'
+
+If you also want to if check you can serialize from Pandas
+(which requires additional dependencies):
+
+.. code-block:: python
+
+    >>> import questdb.ingress
+    >>> import pandas as pd
+    >>> df = pd.DataFrame({'a': [1, 2]})
+    >>> buf = questdb.ingress.Buffer()
+    >>> buf.dataframe(df, table_name='test')
+    >>> str(buf)
+    'test a=1i\ntest a=2i\n'
diff --git a/examples.manifest.yaml b/examples.manifest.yaml
index 9379adbf..75a1e3fb 100644
--- a/examples.manifest.yaml
+++ b/examples.manifest.yaml
@@ -5,6 +5,9 @@
     Python client library [docs](https://py-questdb-client.readthedocs.io/en/latest/)
     and [repo](https://github.com/questdb/py-questdb-client).
 
+    See more [examples](https://py-questdb-client.readthedocs.io/en/latest/examples.html),
+    including ingesting data from Pandas dataframes.
+
     ```
     python3 -m pip install questdb
     ```
@@ -15,6 +18,9 @@
     Python client library [docs](https://py-questdb-client.readthedocs.io/en/latest/)
     and [repo](https://github.com/questdb/py-questdb-client).
 
+    See more [examples](https://py-questdb-client.readthedocs.io/en/latest/examples.html),
+    including ingesting data from Pandas dataframes.
+
     ```
     python3 -m pip install questdb
     ```
diff --git a/examples/pandas_advanced.py b/examples/pandas_advanced.py
new file mode 100644
index 00000000..7b163cd4
--- /dev/null
+++ b/examples/pandas_advanced.py
@@ -0,0 +1,33 @@
+from questdb.ingress import Sender, IngressError
+
+import sys
+import pandas as pd
+
+
+def example(host: str = 'localhost', port: int = 9009):
+    df = pd.DataFrame({
+            'metric': pd.Categorical(
+                ['humidity', 'temp_c', 'voc_index', 'temp_c']),
+            'sensor': pd.Categorical(
+                ['paris-01', 'london-02', 'london-01', 'paris-01']),
+            'value': [
+                0.83, 22.62, 100.0, 23.62],
+            'ts': [
+                pd.Timestamp('2022-08-06 07:35:23.189062'),
+                pd.Timestamp('2022-08-06 07:35:23.189062'),
+                pd.Timestamp('2022-08-06 07:35:23.189062'),
+                pd.Timestamp('2022-08-06 07:35:23.189062')]})
+    try:
+        with Sender(host, port) as sender:
+            sender.dataframe(
+                df,
+                table_name_col='metric',  # Table name from 'metric' column.
+                symbols='auto',  # Category columns as SYMBOL. (Default)
+                at=-1)  # Last column contains the designated timestamps.
+
+    except IngressError as e:
+        sys.stderr.write(f'Got error: {e}\n')
+
+
+if __name__ == '__main__':
+    example()
diff --git a/examples/pandas_basic.py b/examples/pandas_basic.py
new file mode 100644
index 00000000..3c07d7fc
--- /dev/null
+++ b/examples/pandas_basic.py
@@ -0,0 +1,29 @@
+from questdb.ingress import Sender, IngressError
+
+import sys
+import pandas as pd
+
+
+def example(host: str = 'localhost', port: int = 9009):
+    df = pd.DataFrame({
+            'pair': ['USDGBP', 'EURJPY'],
+            'traded_price': [0.83, 142.62],
+            'qty': [100, 400],
+            'limit_price': [0.84, None],
+            'timestamp': [
+                pd.Timestamp('2022-08-06 07:35:23.189062', tz='UTC'),
+                pd.Timestamp('2022-08-06 07:35:23.189062', tz='UTC')]})
+    try:
+        with Sender(host, port) as sender:
+            sender.dataframe(
+                df,
+                table_name='trades',  # Table name to insert into.
+                symbols=['pair'],  # Columns to be inserted as SYMBOL types.
+                at='timestamp')  # Column containing the designated timestamps.
+
+    except IngressError as e:
+        sys.stderr.write(f'Got error: {e}\n')
+
+
+if __name__ == '__main__':
+    example()
diff --git a/examples/pandas_parquet.py b/examples/pandas_parquet.py
new file mode 100644
index 00000000..0d3b315d
--- /dev/null
+++ b/examples/pandas_parquet.py
@@ -0,0 +1,43 @@
+from questdb.ingress import Sender
+import pandas as pd
+
+
+def write_parquet_file():
+    df = pd.DataFrame({
+        'location': pd.Categorical(
+            ['BP-5541', 'UB-3355', 'SL-0995', 'BP-6653']),
+        'provider': pd.Categorical(
+            ['BP Pulse', 'Ubitricity', 'Source London', 'BP Pulse']),
+        'speed_kwh': pd.Categorical(
+            [50, 7, 7, 120]),
+        'connector_type': pd.Categorical(
+            ['Type 2 & 2+CCS', 'Type 1 & 2', 'Type 1 & 2', 'Type 2 & 2+CCS']),
+        'current_type': pd.Categorical(
+            ['dc', 'ac', 'ac', 'dc']),
+        'price_pence':
+            [54, 34, 32, 59],
+        'in_use':
+            [True, False, False, True],
+        'ts': [
+            pd.Timestamp('2022-12-30 12:15:00'),
+            pd.Timestamp('2022-12-30 12:16:00'),
+            pd.Timestamp('2022-12-30 12:18:00'),
+            pd.Timestamp('2022-12-30 12:19:00')]})
+    name = 'ev_chargers'
+    df.index.name = name  # We set the dataframe's index name here!
+    filename = f'{name}.parquet'
+    df.to_parquet(filename)
+    return filename
+
+
+def example(host: str = 'localhost', port: int = 9009):
+    filename = write_parquet_file()
+
+    df = pd.read_parquet(filename)
+    with Sender(host, port) as sender:
+        # Note: Table name is looked up from the dataframe's index name.
+        sender.dataframe(df, at='ts')
+
+
+if __name__ == '__main__':
+    example()
diff --git a/install_rust.py b/install_rust.py
index a0adacca..d0de3696 100644
--- a/install_rust.py
+++ b/install_rust.py
@@ -1,4 +1,5 @@
 import sys
+sys.dont_write_bytecode = True
 import os
 import subprocess
 import pathlib
diff --git a/perf/README.md b/perf/README.md
new file mode 100644
index 00000000..a6ba49d8
--- /dev/null
+++ b/perf/README.md
@@ -0,0 +1,28 @@
+# Profiling with Linux Perf
+
+https://juanjose.garciaripoll.com/blog/profiling-code-with-linux-perf/index.html
+
+```bash
+$ TEST_QUESTDB_PATCH_PATH=1 perf record -g --call-graph dwarf python3 test/benchmark.py -v TestBencharkPandas.test_string_encoding_1m
+test_string_encoding_1m (__main__.TestBencharkPandas.test_string_encoding_1m) ... Time: 4.682273147998785, size: 4593750000
+ok
+
+----------------------------------------------------------------------
+Ran 1 test in 10.166s
+
+OK
+[ perf record: Woken up 1341 times to write data ]
+Warning:
+Processed 54445 events and lost 91 chunks!
+
+Check IO/CPU overload!
+
+[ perf record: Captured and wrote 405.575 MB perf.data (50622 samples) ]
+```
+
+# Rendering results
+
+```bash
+$ perf script | python3 perf/gprof2dot.py --format=perf | dot -Tsvg > perf/profile_graph.svg
+$ (cd perf && python3 -m http.server)
+```
\ No newline at end of file
diff --git a/perf/gprof2dot.py b/perf/gprof2dot.py
new file mode 100644
index 00000000..99554b2a
--- /dev/null
+++ b/perf/gprof2dot.py
@@ -0,0 +1,3555 @@
+#!/usr/bin/env python3
+#
+# Copyright 2008-2017 Jose Fonseca
+#
+# This program is free software: you can redistribute it and/or modify it
+# under the terms of the GNU Lesser General Public License as published
+# by the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU Lesser General Public License for more details.
+#
+# You should have received a copy of the GNU Lesser General Public License
+# along with this program.  If not, see <http://www.gnu.org/licenses/>.
+#
+
+"""Generate a dot graph from the output of several profilers."""
+
+__author__ = "Jose Fonseca et al"
+
+
+import sys
+import math
+import os.path
+import re
+import textwrap
+import optparse
+import xml.parsers.expat
+import collections
+import locale
+import json
+import fnmatch
+
+# Python 2.x/3.x compatibility
+if sys.version_info[0] >= 3:
+    PYTHON_3 = True
+    def compat_iteritems(x): return x.items()  # No iteritems() in Python 3
+    def compat_itervalues(x): return x.values()  # No itervalues() in Python 3
+    def compat_keys(x): return list(x.keys())  # keys() is a generator in Python 3
+    basestring = str  # No class basestring in Python 3
+    unichr = chr # No unichr in Python 3
+    xrange = range # No xrange in Python 3
+else:
+    PYTHON_3 = False
+    def compat_iteritems(x): return x.iteritems()
+    def compat_itervalues(x): return x.itervalues()
+    def compat_keys(x): return x.keys()
+
+
+
+########################################################################
+# Model
+
+
+MULTIPLICATION_SIGN = unichr(0xd7)
+
+
+def times(x):
+    return "%u%s" % (x, MULTIPLICATION_SIGN)
+
+def percentage(p):
+    return "%.02f%%" % (p*100.0,)
+
+def add(a, b):
+    return a + b
+
+def fail(a, b):
+    assert False
+
+
+tol = 2 ** -23
+
+def ratio(numerator, denominator):
+    try:
+        ratio = float(numerator)/float(denominator)
+    except ZeroDivisionError:
+        # 0/0 is undefined, but 1.0 yields more useful results
+        return 1.0
+    if ratio < 0.0:
+        if ratio < -tol:
+            sys.stderr.write('warning: negative ratio (%s/%s)\n' % (numerator, denominator))
+        return 0.0
+    if ratio > 1.0:
+        if ratio > 1.0 + tol:
+            sys.stderr.write('warning: ratio greater than one (%s/%s)\n' % (numerator, denominator))
+        return 1.0
+    return ratio
+
+
+class UndefinedEvent(Exception):
+    """Raised when attempting to get an event which is undefined."""
+
+    def __init__(self, event):
+        Exception.__init__(self)
+        self.event = event
+
+    def __str__(self):
+        return 'unspecified event %s' % self.event.name
+
+
+class Event(object):
+    """Describe a kind of event, and its basic operations."""
+
+    def __init__(self, name, null, aggregator, formatter = str):
+        self.name = name
+        self._null = null
+        self._aggregator = aggregator
+        self._formatter = formatter
+
+    def __eq__(self, other):
+        return self is other
+
+    def __hash__(self):
+        return id(self)
+
+    def null(self):
+        return self._null
+
+    def aggregate(self, val1, val2):
+        """Aggregate two event values."""
+        assert val1 is not None
+        assert val2 is not None
+        return self._aggregator(val1, val2)
+
+    def format(self, val):
+        """Format an event value."""
+        assert val is not None
+        return self._formatter(val)
+
+
+CALLS = Event("Calls", 0, add, times)
+SAMPLES = Event("Samples", 0, add, times)
+SAMPLES2 = Event("Samples", 0, add, times)
+
+# Count of samples where a given function was either executing or on the stack.
+# This is used to calculate the total time ratio according to the
+# straightforward method described in Mike Dunlavey's answer to
+# stackoverflow.com/questions/1777556/alternatives-to-gprof, item 4 (the myth
+# "that recursion is a tricky confusing issue"), last edited 2012-08-30: it's
+# just the ratio of TOTAL_SAMPLES over the number of samples in the profile.
+#
+# Used only when totalMethod == callstacks
+TOTAL_SAMPLES = Event("Samples", 0, add, times)
+
+TIME = Event("Time", 0.0, add, lambda x: '(' + str(x) + ')')
+TIME_RATIO = Event("Time ratio", 0.0, add, lambda x: '(' + percentage(x) + ')')
+TOTAL_TIME = Event("Total time", 0.0, fail)
+TOTAL_TIME_RATIO = Event("Total time ratio", 0.0, fail, percentage)
+
+labels = {
+    'self-time': TIME,
+    'self-time-percentage': TIME_RATIO,
+    'total-time': TOTAL_TIME,
+    'total-time-percentage': TOTAL_TIME_RATIO,
+}
+defaultLabelNames = ['total-time-percentage', 'self-time-percentage']
+
+totalMethod = 'callratios'
+
+
+class Object(object):
+    """Base class for all objects in profile which can store events."""
+
+    def __init__(self, events=None):
+        if events is None:
+            self.events = {}
+        else:
+            self.events = events
+
+    def __hash__(self):
+        return id(self)
+
+    def __eq__(self, other):
+        return self is other
+
+    def __lt__(self, other):
+        return id(self) < id(other)
+
+    def __contains__(self, event):
+        return event in self.events
+
+    def __getitem__(self, event):
+        try:
+            return self.events[event]
+        except KeyError:
+            raise UndefinedEvent(event)
+
+    def __setitem__(self, event, value):
+        if value is None:
+            if event in self.events:
+                del self.events[event]
+        else:
+            self.events[event] = value
+
+
+class Call(Object):
+    """A call between functions.
+
+    There should be at most one call object for every pair of functions.
+    """
+
+    def __init__(self, callee_id):
+        Object.__init__(self)
+        self.callee_id = callee_id
+        self.ratio = None
+        self.weight = None
+
+
+class Function(Object):
+    """A function."""
+
+    def __init__(self, id, name):
+        Object.__init__(self)
+        self.id = id
+        self.name = name
+        self.module = None
+        self.process = None
+        self.calls = {}
+        self.called = None
+        self.weight = None
+        self.cycle = None
+        self.filename = None
+
+    def add_call(self, call):
+        if call.callee_id in self.calls:
+            sys.stderr.write('warning: overwriting call from function %s to %s\n' % (str(self.id), str(call.callee_id)))
+        self.calls[call.callee_id] = call
+
+    def get_call(self, callee_id):
+        if not callee_id in self.calls:
+            call = Call(callee_id)
+            call[SAMPLES] = 0
+            call[SAMPLES2] = 0
+            call[CALLS] = 0
+            self.calls[callee_id] = call
+        return self.calls[callee_id]
+
+    _parenthesis_re = re.compile(r'\([^()]*\)')
+    _angles_re = re.compile(r'<[^<>]*>')
+    _const_re = re.compile(r'\s+const$')
+
+    def stripped_name(self):
+        """Remove extraneous information from C++ demangled function names."""
+
+        name = self.name
+
+        # Strip function parameters from name by recursively removing paired parenthesis
+        while True:
+            name, n = self._parenthesis_re.subn('', name)
+            if not n:
+                break
+
+        # Strip const qualifier
+        name = self._const_re.sub('', name)
+
+        # Strip template parameters from name by recursively removing paired angles
+        while True:
+            name, n = self._angles_re.subn('', name)
+            if not n:
+                break
+
+        return name
+
+    # TODO: write utility functions
+
+    def __repr__(self):
+        return self.name
+
+    def dump(self, sep1=",\n\t", sep2=":=", sep3="\n"):
+        """ Returns as a string all information available in this Function object
+            separators sep1:between entries
+                       sep2:between attribute name and value,
+                       sep3: inserted at end
+        """
+        return sep1.join("".join(k,sep2,v) for (k,v) in sorted(self.__dict__.items())) + sep3
+
+class Cycle(Object):
+    """A cycle made from recursive function calls."""
+
+    def __init__(self):
+        Object.__init__(self)
+        self.functions = set()
+
+    def add_function(self, function):
+        assert function not in self.functions
+        self.functions.add(function)
+        if function.cycle is not None:
+            for other in function.cycle.functions:
+                if function not in self.functions:
+                    self.add_function(other)
+        function.cycle = self
+
+
+class Profile(Object):
+    """The whole profile."""
+
+    def __init__(self):
+        Object.__init__(self)
+        self.functions = {}
+        self.cycles = []
+
+    def add_function(self, function):
+        if function.id in self.functions:
+            sys.stderr.write('warning: overwriting function %s (id %s)\n' % (function.name, str(function.id)))
+        self.functions[function.id] = function
+
+    def add_cycle(self, cycle):
+        self.cycles.append(cycle)
+
+    def validate(self):
+        """Validate the edges."""
+
+        for function in compat_itervalues(self.functions):
+            for callee_id in compat_keys(function.calls):
+                assert function.calls[callee_id].callee_id == callee_id
+                if callee_id not in self.functions:
+                    sys.stderr.write('warning: call to undefined function %s from function %s\n' % (str(callee_id), function.name))
+                    del function.calls[callee_id]
+
+    def find_cycles(self):
+        """Find cycles using Tarjan's strongly connected components algorithm."""
+
+        # Apply the Tarjan's algorithm successively until all functions are visited
+        stack = []
+        data = {}
+        order = 0
+        for function in compat_itervalues(self.functions):
+            order = self._tarjan(function, order, stack, data)
+        cycles = []
+        for function in compat_itervalues(self.functions):
+            if function.cycle is not None and function.cycle not in cycles:
+                cycles.append(function.cycle)
+        self.cycles = cycles
+        if 0:
+            for cycle in cycles:
+                sys.stderr.write("Cycle:\n")
+                for member in cycle.functions:
+                    sys.stderr.write("\tFunction %s\n" % member.name)
+
+    def prune_root(self, roots, depth=-1):
+        visited = set()
+        frontier = set([(root_node, depth) for root_node in roots])
+        while len(frontier) > 0:
+            node, node_depth = frontier.pop()
+            visited.add(node)
+            if node_depth == 0:
+                continue
+            f = self.functions[node]
+            newNodes = set(f.calls.keys()) - visited
+            frontier = frontier.union({(new_node, node_depth - 1) for new_node in newNodes})
+        subtreeFunctions = {}
+        for n in visited:
+            f = self.functions[n]
+            newCalls = {}
+            for c in f.calls.keys():
+                if c in visited:
+                    newCalls[c] = f.calls[c]
+            f.calls = newCalls
+            subtreeFunctions[n] = f
+        self.functions = subtreeFunctions
+
+    def prune_leaf(self, leafs, depth=-1):
+        edgesUp = collections.defaultdict(set)
+        for f in self.functions.keys():
+            for n in self.functions[f].calls.keys():
+                edgesUp[n].add(f)
+        # build the tree up
+        visited = set()
+        frontier = set([(leaf_node, depth) for leaf_node in leafs])
+        while len(frontier) > 0:
+            node, node_depth = frontier.pop()
+            visited.add(node)
+            if node_depth == 0:
+                continue
+            newNodes = edgesUp[node] - visited
+            frontier = frontier.union({(new_node, node_depth - 1) for new_node in newNodes})
+        downTree = set(self.functions.keys())
+        upTree = visited
+        path = downTree.intersection(upTree)
+        pathFunctions = {}
+        for n in path:
+            f = self.functions[n]
+            newCalls = {}
+            for c in f.calls.keys():
+                if c in path:
+                    newCalls[c] = f.calls[c]
+            f.calls = newCalls
+            pathFunctions[n] = f
+        self.functions = pathFunctions
+
+    def getFunctionIds(self, funcName):
+        function_names = {v.name: k for (k, v) in self.functions.items()}
+        return [function_names[name] for name in fnmatch.filter(function_names.keys(), funcName)]
+
+    def getFunctionId(self, funcName):
+        for f in self.functions:
+            if self.functions[f].name == funcName:
+                return f
+        return False
+
+    def printFunctionIds(self, selector=None, file=sys.stderr):
+        """ Print to file function entries selected by fnmatch.fnmatch like in
+            method getFunctionIds, with following extensions:
+             - selector starts with "%": dump all information available
+             - selector is '+' or '-': select all function entries
+        """
+        if selector is None or selector in ("+", "*"):
+            v = ",\n".join(("%s:\t%s" % (kf,self.functions[kf].name)
+                            for kf in self.functions.keys()))
+        else:
+            if selector[0]=="%":
+                selector=selector[1:]
+                function_info={k:v for (k,v)
+                               in self.functions.items()
+                               if fnmatch.fnmatch(v.name,selector)}
+                v = ",\n".join( ("%s\t({k})\t(%s)::\n\t%s" % (v.name,type(v),v.dump())
+                                 for (k,v) in function_info.items()
+                                  ))
+
+            else:
+                function_names = (v.name for v in self.functions.values())
+                v = ",\n".join( ( nm for nm in fnmatch.filter(function_names,selector )))
+
+        file.write(v+"\n")
+        file.flush()
+
+    class _TarjanData:
+        def __init__(self, order):
+            self.order = order
+            self.lowlink = order
+            self.onstack = False
+
+    def _tarjan(self, function, order, stack, data):
+        """Tarjan's strongly connected components algorithm.
+
+        See also:
+        - http://en.wikipedia.org/wiki/Tarjan's_strongly_connected_components_algorithm
+        """
+
+        try:
+            func_data = data[function.id]
+            return order
+        except KeyError:
+            func_data = self._TarjanData(order)
+            data[function.id] = func_data
+        order += 1
+        pos = len(stack)
+        stack.append(function)
+        func_data.onstack = True
+        for call in compat_itervalues(function.calls):
+            try:
+                callee_data = data[call.callee_id]
+                if callee_data.onstack:
+                    func_data.lowlink = min(func_data.lowlink, callee_data.order)
+            except KeyError:
+                callee = self.functions[call.callee_id]
+                order = self._tarjan(callee, order, stack, data)
+                callee_data = data[call.callee_id]
+                func_data.lowlink = min(func_data.lowlink, callee_data.lowlink)
+        if func_data.lowlink == func_data.order:
+            # Strongly connected component found
+            members = stack[pos:]
+            del stack[pos:]
+            if len(members) > 1:
+                cycle = Cycle()
+                for member in members:
+                    cycle.add_function(member)
+                    data[member.id].onstack = False
+            else:
+                for member in members:
+                    data[member.id].onstack = False
+        return order
+
+    def call_ratios(self, event):
+        # Aggregate for incoming calls
+        cycle_totals = {}
+        for cycle in self.cycles:
+            cycle_totals[cycle] = 0.0
+        function_totals = {}
+        for function in compat_itervalues(self.functions):
+            function_totals[function] = 0.0
+
+        # Pass 1:  function_total gets the sum of call[event] for all
+        #          incoming arrows.  Same for cycle_total for all arrows
+        #          that are coming into the *cycle* but are not part of it.
+        for function in compat_itervalues(self.functions):
+            for call in compat_itervalues(function.calls):
+                if call.callee_id != function.id:
+                    callee = self.functions[call.callee_id]
+                    if event in call.events:
+                        function_totals[callee] += call[event]
+                        if callee.cycle is not None and callee.cycle is not function.cycle:
+                            cycle_totals[callee.cycle] += call[event]
+                    else:
+                        sys.stderr.write("call_ratios: No data for " + function.name + " call to " + callee.name + "\n")
+
+        # Pass 2:  Compute the ratios.  Each call[event] is scaled by the
+        #          function_total of the callee.  Calls into cycles use the
+        #          cycle_total, but not calls within cycles.
+        for function in compat_itervalues(self.functions):
+            for call in compat_itervalues(function.calls):
+                assert call.ratio is None
+                if call.callee_id != function.id:
+                    callee = self.functions[call.callee_id]
+                    if event in call.events:
+                        if callee.cycle is not None and callee.cycle is not function.cycle:
+                            total = cycle_totals[callee.cycle]
+                        else:
+                            total = function_totals[callee]
+                        call.ratio = ratio(call[event], total)
+                    else:
+                        # Warnings here would only repeat those issued above.
+                        call.ratio = 0.0
+
+    def integrate(self, outevent, inevent):
+        """Propagate function time ratio along the function calls.
+
+        Must be called after finding the cycles.
+
+        See also:
+        - http://citeseer.ist.psu.edu/graham82gprof.html
+        """
+
+        # Sanity checking
+        assert outevent not in self
+        for function in compat_itervalues(self.functions):
+            assert outevent not in function
+            assert inevent in function
+            for call in compat_itervalues(function.calls):
+                assert outevent not in call
+                if call.callee_id != function.id:
+                    assert call.ratio is not None
+
+        # Aggregate the input for each cycle
+        for cycle in self.cycles:
+            total = inevent.null()
+            for function in compat_itervalues(self.functions):
+                total = inevent.aggregate(total, function[inevent])
+            self[inevent] = total
+
+        # Integrate along the edges
+        total = inevent.null()
+        for function in compat_itervalues(self.functions):
+            total = inevent.aggregate(total, function[inevent])
+            self._integrate_function(function, outevent, inevent)
+        self[outevent] = total
+
+    def _integrate_function(self, function, outevent, inevent):
+        if function.cycle is not None:
+            return self._integrate_cycle(function.cycle, outevent, inevent)
+        else:
+            if outevent not in function:
+                total = function[inevent]
+                for call in compat_itervalues(function.calls):
+                    if call.callee_id != function.id:
+                        total += self._integrate_call(call, outevent, inevent)
+                function[outevent] = total
+            return function[outevent]
+
+    def _integrate_call(self, call, outevent, inevent):
+        assert outevent not in call
+        assert call.ratio is not None
+        callee = self.functions[call.callee_id]
+        subtotal = call.ratio *self._integrate_function(callee, outevent, inevent)
+        call[outevent] = subtotal
+        return subtotal
+
+    def _integrate_cycle(self, cycle, outevent, inevent):
+        if outevent not in cycle:
+
+            # Compute the outevent for the whole cycle
+            total = inevent.null()
+            for member in cycle.functions:
+                subtotal = member[inevent]
+                for call in compat_itervalues(member.calls):
+                    callee = self.functions[call.callee_id]
+                    if callee.cycle is not cycle:
+                        subtotal += self._integrate_call(call, outevent, inevent)
+                total += subtotal
+            cycle[outevent] = total
+
+            # Compute the time propagated to callers of this cycle
+            callees = {}
+            for function in compat_itervalues(self.functions):
+                if function.cycle is not cycle:
+                    for call in compat_itervalues(function.calls):
+                        callee = self.functions[call.callee_id]
+                        if callee.cycle is cycle:
+                            try:
+                                callees[callee] += call.ratio
+                            except KeyError:
+                                callees[callee] = call.ratio
+
+            for member in cycle.functions:
+                member[outevent] = outevent.null()
+
+            for callee, call_ratio in compat_iteritems(callees):
+                ranks = {}
+                call_ratios = {}
+                partials = {}
+                self._rank_cycle_function(cycle, callee, ranks)
+                self._call_ratios_cycle(cycle, callee, ranks, call_ratios, set())
+                partial = self._integrate_cycle_function(cycle, callee, call_ratio, partials, ranks, call_ratios, outevent, inevent)
+
+                # Ensure `partial == max(partials.values())`, but with round-off tolerance
+                max_partial = max(partials.values())
+                assert abs(partial - max_partial) <= 1e-7*max_partial
+
+                assert abs(call_ratio*total - partial) <= 0.001*call_ratio*total
+
+        return cycle[outevent]
+
+    def _rank_cycle_function(self, cycle, function, ranks):
+        """Dijkstra's shortest paths algorithm.
+
+        See also:
+        - http://en.wikipedia.org/wiki/Dijkstra's_algorithm
+        """
+
+        import heapq
+        Q = []
+        Qd = {}
+        p = {}
+        visited = set([function])
+
+        ranks[function] = 0
+        for call in compat_itervalues(function.calls):
+            if call.callee_id != function.id:
+                callee = self.functions[call.callee_id]
+                if callee.cycle is cycle:
+                    ranks[callee] = 1
+                    item = [ranks[callee], function, callee]
+                    heapq.heappush(Q, item)
+                    Qd[callee] = item
+
+        while Q:
+            cost, parent, member = heapq.heappop(Q)
+            if member not in visited:
+                p[member]= parent
+                visited.add(member)
+                for call in compat_itervalues(member.calls):
+                    if call.callee_id != member.id:
+                        callee = self.functions[call.callee_id]
+                        if callee.cycle is cycle:
+                            member_rank = ranks[member]
+                            rank = ranks.get(callee)
+                            if rank is not None:
+                                if rank > 1 + member_rank:
+                                    rank = 1 + member_rank
+                                    ranks[callee] = rank
+                                    Qd_callee = Qd[callee]
+                                    Qd_callee[0] = rank
+                                    Qd_callee[1] = member
+                                    heapq._siftdown(Q, 0, Q.index(Qd_callee))
+                            else:
+                                rank = 1 + member_rank
+                                ranks[callee] = rank
+                                item = [rank, member, callee]
+                                heapq.heappush(Q, item)
+                                Qd[callee] = item
+
+    def _call_ratios_cycle(self, cycle, function, ranks, call_ratios, visited):
+        if function not in visited:
+            visited.add(function)
+            for call in compat_itervalues(function.calls):
+                if call.callee_id != function.id:
+                    callee = self.functions[call.callee_id]
+                    if callee.cycle is cycle:
+                        if ranks[callee] > ranks[function]:
+                            call_ratios[callee] = call_ratios.get(callee, 0.0) + call.ratio
+                            self._call_ratios_cycle(cycle, callee, ranks, call_ratios, visited)
+
+    def _integrate_cycle_function(self, cycle, function, partial_ratio, partials, ranks, call_ratios, outevent, inevent):
+        if function not in partials:
+            partial = partial_ratio*function[inevent]
+            for call in compat_itervalues(function.calls):
+                if call.callee_id != function.id:
+                    callee = self.functions[call.callee_id]
+                    if callee.cycle is not cycle:
+                        assert outevent in call
+                        partial += partial_ratio*call[outevent]
+                    else:
+                        if ranks[callee] > ranks[function]:
+                            callee_partial = self._integrate_cycle_function(cycle, callee, partial_ratio, partials, ranks, call_ratios, outevent, inevent)
+                            call_ratio = ratio(call.ratio, call_ratios[callee])
+                            call_partial = call_ratio*callee_partial
+                            try:
+                                call[outevent] += call_partial
+                            except UndefinedEvent:
+                                call[outevent] = call_partial
+                            partial += call_partial
+            partials[function] = partial
+            try:
+                function[outevent] += partial
+            except UndefinedEvent:
+                function[outevent] = partial
+        return partials[function]
+
+    def aggregate(self, event):
+        """Aggregate an event for the whole profile."""
+
+        total = event.null()
+        for function in compat_itervalues(self.functions):
+            try:
+                total = event.aggregate(total, function[event])
+            except UndefinedEvent:
+                return
+        self[event] = total
+
+    def ratio(self, outevent, inevent):
+        assert outevent not in self
+        assert inevent in self
+        for function in compat_itervalues(self.functions):
+            assert outevent not in function
+            assert inevent in function
+            function[outevent] = ratio(function[inevent], self[inevent])
+            for call in compat_itervalues(function.calls):
+                assert outevent not in call
+                if inevent in call:
+                    call[outevent] = ratio(call[inevent], self[inevent])
+        self[outevent] = 1.0
+
+    def prune(self, node_thres, edge_thres, paths, color_nodes_by_selftime):
+        """Prune the profile"""
+
+        # compute the prune ratios
+        for function in compat_itervalues(self.functions):
+            try:
+                function.weight = function[TOTAL_TIME_RATIO]
+            except UndefinedEvent:
+                pass
+
+            for call in compat_itervalues(function.calls):
+                callee = self.functions[call.callee_id]
+
+                if TOTAL_TIME_RATIO in call:
+                    # handle exact cases first
+                    call.weight = call[TOTAL_TIME_RATIO]
+                else:
+                    try:
+                        # make a safe estimate
+                        call.weight = min(function[TOTAL_TIME_RATIO], callee[TOTAL_TIME_RATIO])
+                    except UndefinedEvent:
+                        pass
+
+        # prune the nodes
+        for function_id in compat_keys(self.functions):
+            function = self.functions[function_id]
+            if function.weight is not None:
+                if function.weight < node_thres:
+                    del self.functions[function_id]
+
+        # prune file paths
+        for function_id in compat_keys(self.functions):
+            function = self.functions[function_id]
+            if paths and function.filename and not any(function.filename.startswith(path) for path in paths):
+                del self.functions[function_id]
+            elif paths and function.module and not any((function.module.find(path)>-1) for path in paths):
+                del self.functions[function_id]
+
+        # prune the edges
+        for function in compat_itervalues(self.functions):
+            for callee_id in compat_keys(function.calls):
+                call = function.calls[callee_id]
+                if callee_id not in self.functions or call.weight is not None and call.weight < edge_thres:
+                    del function.calls[callee_id]
+
+        if color_nodes_by_selftime:
+            weights = []
+            for function in compat_itervalues(self.functions):
+                try:
+                    weights.append(function[TIME_RATIO])
+                except UndefinedEvent:
+                    pass
+            max_ratio = max(weights or [1])
+
+            # apply rescaled weights for coloriung
+            for function in compat_itervalues(self.functions):
+                try:
+                    function.weight = function[TIME_RATIO] / max_ratio
+                except (ZeroDivisionError, UndefinedEvent):
+                    pass
+
+    def dump(self):
+        for function in compat_itervalues(self.functions):
+            sys.stderr.write('Function %s:\n' % (function.name,))
+            self._dump_events(function.events)
+            for call in compat_itervalues(function.calls):
+                callee = self.functions[call.callee_id]
+                sys.stderr.write('  Call %s:\n' % (callee.name,))
+                self._dump_events(call.events)
+        for cycle in self.cycles:
+            sys.stderr.write('Cycle:\n')
+            self._dump_events(cycle.events)
+            for function in cycle.functions:
+                sys.stderr.write('  Function %s\n' % (function.name,))
+
+    def _dump_events(self, events):
+        for event, value in compat_iteritems(events):
+            sys.stderr.write('    %s: %s\n' % (event.name, event.format(value)))
+
+
+
+########################################################################
+# Parsers
+
+
+class Struct:
+    """Masquerade a dictionary with a structure-like behavior."""
+
+    def __init__(self, attrs = None):
+        if attrs is None:
+            attrs = {}
+        self.__dict__['_attrs'] = attrs
+
+    def __getattr__(self, name):
+        try:
+            return self._attrs[name]
+        except KeyError:
+            raise AttributeError(name)
+
+    def __setattr__(self, name, value):
+        self._attrs[name] = value
+
+    def __str__(self):
+        return str(self._attrs)
+
+    def __repr__(self):
+        return repr(self._attrs)
+
+
+class ParseError(Exception):
+    """Raised when parsing to signal mismatches."""
+
+    def __init__(self, msg, line):
+        Exception.__init__(self)
+        self.msg = msg
+        # TODO: store more source line information
+        self.line = line
+
+    def __str__(self):
+        return '%s: %r' % (self.msg, self.line)
+
+
+class Parser:
+    """Parser interface."""
+
+    stdinInput = True
+    multipleInput = False
+
+    def __init__(self):
+        pass
+
+    def parse(self):
+        raise NotImplementedError
+
+
+class JsonParser(Parser):
+    """Parser for a custom JSON representation of profile data.
+
+    See schema.json for details.
+    """
+
+
+    def __init__(self, stream):
+        Parser.__init__(self)
+        self.stream = stream
+
+    def parse(self):
+
+        obj = json.load(self.stream)
+
+        assert obj['version'] == 0
+
+        profile = Profile()
+        profile[SAMPLES] = 0
+
+        fns = obj['functions']
+
+        for functionIndex in range(len(fns)):
+            fn = fns[functionIndex]
+            function = Function(functionIndex, fn['name'])
+            try:
+                function.module = fn['module']
+            except KeyError:
+                pass
+            try:
+                function.process = fn['process']
+            except KeyError:
+                pass
+            function[SAMPLES] = 0
+            function.called = 0
+            profile.add_function(function)
+
+        for event in obj['events']:
+            callchain = []
+
+            for functionIndex in event['callchain']:
+                function = profile.functions[functionIndex]
+                callchain.append(function)
+
+            # increment the call count of the first in the callchain
+            function = profile.functions[event['callchain'][0]]
+            function.called = function.called + 1
+
+            cost = event['cost'][0]
+
+            callee = callchain[0]
+            callee[SAMPLES] += cost
+            profile[SAMPLES] += cost
+
+            for caller in callchain[1:]:
+                try:
+                    call = caller.calls[callee.id]
+                except KeyError:
+                    call = Call(callee.id)
+                    call[SAMPLES2] = cost
+                    caller.add_call(call)
+                else:
+                    call[SAMPLES2] += cost
+
+                callee = caller
+
+        if False:
+            profile.dump()
+
+        # compute derived data
+        profile.validate()
+        profile.find_cycles()
+        profile.ratio(TIME_RATIO, SAMPLES)
+        profile.call_ratios(SAMPLES2)
+        profile.integrate(TOTAL_TIME_RATIO, TIME_RATIO)
+
+        return profile
+
+
+class LineParser(Parser):
+    """Base class for parsers that read line-based formats."""
+
+    def __init__(self, stream):
+        Parser.__init__(self)
+        self._stream = stream
+        self.__line = None
+        self.__eof = False
+        self.line_no = 0
+
+    def readline(self):
+        line = self._stream.readline()
+        if not line:
+            self.__line = ''
+            self.__eof = True
+        else:
+            self.line_no += 1
+        line = line.rstrip('\r\n')
+        if not PYTHON_3:
+            encoding = self._stream.encoding
+            if encoding is None:
+                encoding = locale.getpreferredencoding()
+            line = line.decode(encoding)
+        self.__line = line
+
+    def lookahead(self):
+        assert self.__line is not None
+        return self.__line
+
+    def consume(self):
+        assert self.__line is not None
+        line = self.__line
+        self.readline()
+        return line
+
+    def eof(self):
+        assert self.__line is not None
+        return self.__eof
+
+
+XML_ELEMENT_START, XML_ELEMENT_END, XML_CHARACTER_DATA, XML_EOF = range(4)
+
+
+class XmlToken:
+
+    def __init__(self, type, name_or_data, attrs = None, line = None, column = None):
+        assert type in (XML_ELEMENT_START, XML_ELEMENT_END, XML_CHARACTER_DATA, XML_EOF)
+        self.type = type
+        self.name_or_data = name_or_data
+        self.attrs = attrs
+        self.line = line
+        self.column = column
+
+    def __str__(self):
+        if self.type == XML_ELEMENT_START:
+            return '<' + self.name_or_data + ' ...>'
+        if self.type == XML_ELEMENT_END:
+            return '</' + self.name_or_data + '>'
+        if self.type == XML_CHARACTER_DATA:
+            return self.name_or_data
+        if self.type == XML_EOF:
+            return 'end of file'
+        assert 0
+
+
+class XmlTokenizer:
+    """Expat based XML tokenizer."""
+
+    def __init__(self, fp, skip_ws = True):
+        self.fp = fp
+        self.tokens = []
+        self.index = 0
+        self.final = False
+        self.skip_ws = skip_ws
+
+        self.character_pos = 0, 0
+        self.character_data = ''
+
+        self.parser = xml.parsers.expat.ParserCreate()
+        self.parser.StartElementHandler  = self.handle_element_start
+        self.parser.EndElementHandler    = self.handle_element_end
+        self.parser.CharacterDataHandler = self.handle_character_data
+
+    def handle_element_start(self, name, attributes):
+        self.finish_character_data()
+        line, column = self.pos()
+        token = XmlToken(XML_ELEMENT_START, name, attributes, line, column)
+        self.tokens.append(token)
+
+    def handle_element_end(self, name):
+        self.finish_character_data()
+        line, column = self.pos()
+        token = XmlToken(XML_ELEMENT_END, name, None, line, column)
+        self.tokens.append(token)
+
+    def handle_character_data(self, data):
+        if not self.character_data:
+            self.character_pos = self.pos()
+        self.character_data += data
+
+    def finish_character_data(self):
+        if self.character_data:
+            if not self.skip_ws or not self.character_data.isspace():
+                line, column = self.character_pos
+                token = XmlToken(XML_CHARACTER_DATA, self.character_data, None, line, column)
+                self.tokens.append(token)
+            self.character_data = ''
+
+    def next(self):
+        size = 16*1024
+        while self.index >= len(self.tokens) and not self.final:
+            self.tokens = []
+            self.index = 0
+            data = self.fp.read(size)
+            self.final = len(data) < size
+            self.parser.Parse(data, self.final)
+        if self.index >= len(self.tokens):
+            line, column = self.pos()
+            token = XmlToken(XML_EOF, None, None, line, column)
+        else:
+            token = self.tokens[self.index]
+            self.index += 1
+        return token
+
+    def pos(self):
+        return self.parser.CurrentLineNumber, self.parser.CurrentColumnNumber
+
+
+class XmlTokenMismatch(Exception):
+
+    def __init__(self, expected, found):
+        Exception.__init__(self)
+        self.expected = expected
+        self.found = found
+
+    def __str__(self):
+        return '%u:%u: %s expected, %s found' % (self.found.line, self.found.column, str(self.expected), str(self.found))
+
+
+class XmlParser(Parser):
+    """Base XML document parser."""
+
+    def __init__(self, fp):
+        Parser.__init__(self)
+        self.tokenizer = XmlTokenizer(fp)
+        self.consume()
+
+    def consume(self):
+        self.token = self.tokenizer.next()
+
+    def match_element_start(self, name):
+        return self.token.type == XML_ELEMENT_START and self.token.name_or_data == name
+
+    def match_element_end(self, name):
+        return self.token.type == XML_ELEMENT_END and self.token.name_or_data == name
+
+    def element_start(self, name):
+        while self.token.type == XML_CHARACTER_DATA:
+            self.consume()
+        if self.token.type != XML_ELEMENT_START:
+            raise XmlTokenMismatch(XmlToken(XML_ELEMENT_START, name), self.token)
+        if self.token.name_or_data != name:
+            raise XmlTokenMismatch(XmlToken(XML_ELEMENT_START, name), self.token)
+        attrs = self.token.attrs
+        self.consume()
+        return attrs
+
+    def element_end(self, name):
+        while self.token.type == XML_CHARACTER_DATA:
+            self.consume()
+        if self.token.type != XML_ELEMENT_END:
+            raise XmlTokenMismatch(XmlToken(XML_ELEMENT_END, name), self.token)
+        if self.token.name_or_data != name:
+            raise XmlTokenMismatch(XmlToken(XML_ELEMENT_END, name), self.token)
+        self.consume()
+
+    def character_data(self, strip = True):
+        data = ''
+        while self.token.type == XML_CHARACTER_DATA:
+            data += self.token.name_or_data
+            self.consume()
+        if strip:
+            data = data.strip()
+        return data
+
+
+class GprofParser(Parser):
+    """Parser for GNU gprof output.
+
+    See also:
+    - Chapter "Interpreting gprof's Output" from the GNU gprof manual
+      http://sourceware.org/binutils/docs-2.18/gprof/Call-Graph.html#Call-Graph
+    - File "cg_print.c" from the GNU gprof source code
+      http://sourceware.org/cgi-bin/cvsweb.cgi/~checkout~/src/gprof/cg_print.c?rev=1.12&cvsroot=src
+    """
+
+    def __init__(self, fp):
+        Parser.__init__(self)
+        self.fp = fp
+        self.functions = {}
+        self.cycles = {}
+
+    def readline(self):
+        line = self.fp.readline()
+        if not line:
+            sys.stderr.write('error: unexpected end of file\n')
+            sys.exit(1)
+        line = line.rstrip('\r\n')
+        return line
+
+    _int_re = re.compile(r'^\d+$')
+    _float_re = re.compile(r'^\d+\.\d+$')
+
+    def translate(self, mo):
+        """Extract a structure from a match object, while translating the types in the process."""
+        attrs = {}
+        groupdict = mo.groupdict()
+        for name, value in compat_iteritems(groupdict):
+            if value is None:
+                value = None
+            elif self._int_re.match(value):
+                value = int(value)
+            elif self._float_re.match(value):
+                value = float(value)
+            attrs[name] = (value)
+        return Struct(attrs)
+
+    _cg_header_re = re.compile(
+        # original gprof header
+        r'^\s+called/total\s+parents\s*$|' +
+        r'^index\s+%time\s+self\s+descendents\s+called\+self\s+name\s+index\s*$|' +
+        r'^\s+called/total\s+children\s*$|' +
+        # GNU gprof header
+        r'^index\s+%\s+time\s+self\s+children\s+called\s+name\s*$'
+    )
+
+    _cg_ignore_re = re.compile(
+        # spontaneous
+        r'^\s+<spontaneous>\s*$|'
+        # internal calls (such as "mcount")
+        r'^.*\((\d+)\)$'
+    )
+
+    _cg_primary_re = re.compile(
+        r'^\[(?P<index>\d+)\]?' +
+        r'\s+(?P<percentage_time>\d+\.\d+)' +
+        r'\s+(?P<self>\d+\.\d+)' +
+        r'\s+(?P<descendants>\d+\.\d+)' +
+        r'\s+(?:(?P<called>\d+)(?:\+(?P<called_self>\d+))?)?' +
+        r'\s+(?P<name>\S.*?)' +
+        r'(?:\s+<cycle\s(?P<cycle>\d+)>)?' +
+        r'\s\[(\d+)\]$'
+    )
+
+    _cg_parent_re = re.compile(
+        r'^\s+(?P<self>\d+\.\d+)?' +
+        r'\s+(?P<descendants>\d+\.\d+)?' +
+        r'\s+(?P<called>\d+)(?:/(?P<called_total>\d+))?' +
+        r'\s+(?P<name>\S.*?)' +
+        r'(?:\s+<cycle\s(?P<cycle>\d+)>)?' +
+        r'\s\[(?P<index>\d+)\]$'
+    )
+
+    _cg_child_re = _cg_parent_re
+
+    _cg_cycle_header_re = re.compile(
+        r'^\[(?P<index>\d+)\]?' +
+        r'\s+(?P<percentage_time>\d+\.\d+)' +
+        r'\s+(?P<self>\d+\.\d+)' +
+        r'\s+(?P<descendants>\d+\.\d+)' +
+        r'\s+(?:(?P<called>\d+)(?:\+(?P<called_self>\d+))?)?' +
+        r'\s+<cycle\s(?P<cycle>\d+)\sas\sa\swhole>' +
+        r'\s\[(\d+)\]$'
+    )
+
+    _cg_cycle_member_re = re.compile(
+        r'^\s+(?P<self>\d+\.\d+)?' +
+        r'\s+(?P<descendants>\d+\.\d+)?' +
+        r'\s+(?P<called>\d+)(?:\+(?P<called_self>\d+))?' +
+        r'\s+(?P<name>\S.*?)' +
+        r'(?:\s+<cycle\s(?P<cycle>\d+)>)?' +
+        r'\s\[(?P<index>\d+)\]$'
+    )
+
+    _cg_sep_re = re.compile(r'^--+$')
+
+    def parse_function_entry(self, lines):
+        parents = []
+        children = []
+
+        while True:
+            if not lines:
+                sys.stderr.write('warning: unexpected end of entry\n')
+            line = lines.pop(0)
+            if line.startswith('['):
+                break
+
+            # read function parent line
+            mo = self._cg_parent_re.match(line)
+            if not mo:
+                if self._cg_ignore_re.match(line):
+                    continue
+                sys.stderr.write('warning: unrecognized call graph entry: %r\n' % line)
+            else:
+                parent = self.translate(mo)
+                parents.append(parent)
+
+        # read primary line
+        mo = self._cg_primary_re.match(line)
+        if not mo:
+            sys.stderr.write('warning: unrecognized call graph entry: %r\n' % line)
+            return
+        else:
+            function = self.translate(mo)
+
+        while lines:
+            line = lines.pop(0)
+
+            # read function subroutine line
+            mo = self._cg_child_re.match(line)
+            if not mo:
+                if self._cg_ignore_re.match(line):
+                    continue
+                sys.stderr.write('warning: unrecognized call graph entry: %r\n' % line)
+            else:
+                child = self.translate(mo)
+                children.append(child)
+
+        function.parents = parents
+        function.children = children
+
+        self.functions[function.index] = function
+
+    def parse_cycle_entry(self, lines):
+
+        # read cycle header line
+        line = lines[0]
+        mo = self._cg_cycle_header_re.match(line)
+        if not mo:
+            sys.stderr.write('warning: unrecognized call graph entry: %r\n' % line)
+            return
+        cycle = self.translate(mo)
+
+        # read cycle member lines
+        cycle.functions = []
+        for line in lines[1:]:
+            mo = self._cg_cycle_member_re.match(line)
+            if not mo:
+                sys.stderr.write('warning: unrecognized call graph entry: %r\n' % line)
+                continue
+            call = self.translate(mo)
+            cycle.functions.append(call)
+
+        self.cycles[cycle.cycle] = cycle
+
+    def parse_cg_entry(self, lines):
+        if lines[0].startswith("["):
+            self.parse_cycle_entry(lines)
+        else:
+            self.parse_function_entry(lines)
+
+    def parse_cg(self):
+        """Parse the call graph."""
+
+        # skip call graph header
+        while not self._cg_header_re.match(self.readline()):
+            pass
+        line = self.readline()
+        while self._cg_header_re.match(line):
+            line = self.readline()
+
+        # process call graph entries
+        entry_lines = []
+        while line != '\014': # form feed
+            if line and not line.isspace():
+                if self._cg_sep_re.match(line):
+                    self.parse_cg_entry(entry_lines)
+                    entry_lines = []
+                else:
+                    entry_lines.append(line)
+            line = self.readline()
+
+    def parse(self):
+        self.parse_cg()
+        self.fp.close()
+
+        profile = Profile()
+        profile[TIME] = 0.0
+
+        cycles = {}
+        for index in self.cycles:
+            cycles[index] = Cycle()
+
+        for entry in compat_itervalues(self.functions):
+            # populate the function
+            function = Function(entry.index, entry.name)
+            function[TIME] = entry.self
+            if entry.called is not None:
+                function.called = entry.called
+            if entry.called_self is not None:
+                call = Call(entry.index)
+                call[CALLS] = entry.called_self
+                function.called += entry.called_self
+
+            # populate the function calls
+            for child in entry.children:
+                call = Call(child.index)
+
+                assert child.called is not None
+                call[CALLS] = child.called
+
+                if child.index not in self.functions:
+                    # NOTE: functions that were never called but were discovered by gprof's
+                    # static call graph analysis dont have a call graph entry so we need
+                    # to add them here
+                    missing = Function(child.index, child.name)
+                    function[TIME] = 0.0
+                    function.called = 0
+                    profile.add_function(missing)
+
+                function.add_call(call)
+
+            profile.add_function(function)
+
+            if entry.cycle is not None:
+                try:
+                    cycle = cycles[entry.cycle]
+                except KeyError:
+                    sys.stderr.write('warning: <cycle %u as a whole> entry missing\n' % entry.cycle)
+                    cycle = Cycle()
+                    cycles[entry.cycle] = cycle
+                cycle.add_function(function)
+
+            profile[TIME] = profile[TIME] + function[TIME]
+
+        for cycle in compat_itervalues(cycles):
+            profile.add_cycle(cycle)
+
+        # Compute derived events
+        profile.validate()
+        profile.ratio(TIME_RATIO, TIME)
+        profile.call_ratios(CALLS)
+        profile.integrate(TOTAL_TIME, TIME)
+        profile.ratio(TOTAL_TIME_RATIO, TOTAL_TIME)
+
+        return profile
+
+
+# Clone&hack of GprofParser for VTune Amplifier XE 2013 gprof-cc output.
+# Tested only with AXE 2013 for Windows.
+#   - Use total times as reported by AXE.
+#   - In the absence of call counts, call ratios are faked from the relative
+#     proportions of total time.  This affects only the weighting of the calls.
+#   - Different header, separator, and end marker.
+#   - Extra whitespace after function names.
+#   - You get a full entry for <spontaneous>, which does not have parents.
+#   - Cycles do have parents.  These are saved but unused (as they are
+#     for functions).
+#   - Disambiguated "unrecognized call graph entry" error messages.
+# Notes:
+#   - Total time of functions as reported by AXE passes the val3 test.
+#   - CPU Time:Children in the input is sometimes a negative number.  This
+#     value goes to the variable descendants, which is unused.
+#   - The format of gprof-cc reports is unaffected by the use of
+#       -knob enable-call-counts=true (no call counts, ever), or
+#       -show-as=samples (results are quoted in seconds regardless).
+class AXEParser(Parser):
+    "Parser for VTune Amplifier XE 2013 gprof-cc report output."
+
+    def __init__(self, fp):
+        Parser.__init__(self)
+        self.fp = fp
+        self.functions = {}
+        self.cycles = {}
+
+    def readline(self):
+        line = self.fp.readline()
+        if not line:
+            sys.stderr.write('error: unexpected end of file\n')
+            sys.exit(1)
+        line = line.rstrip('\r\n')
+        return line
+
+    _int_re = re.compile(r'^\d+$')
+    _float_re = re.compile(r'^\d+\.\d+$')
+
+    def translate(self, mo):
+        """Extract a structure from a match object, while translating the types in the process."""
+        attrs = {}
+        groupdict = mo.groupdict()
+        for name, value in compat_iteritems(groupdict):
+            if value is None:
+                value = None
+            elif self._int_re.match(value):
+                value = int(value)
+            elif self._float_re.match(value):
+                value = float(value)
+            attrs[name] = (value)
+        return Struct(attrs)
+
+    _cg_header_re = re.compile(
+        '^Index |'
+        '^-----+ '
+    )
+
+    _cg_footer_re = re.compile(r'^Index\s+Function\s*$')
+
+    _cg_primary_re = re.compile(
+        r'^\[(?P<index>\d+)\]?' +
+        r'\s+(?P<percentage_time>\d+\.\d+)' +
+        r'\s+(?P<self>\d+\.\d+)' +
+        r'\s+(?P<descendants>\d+\.\d+)' +
+        r'\s+(?P<name>\S.*?)' +
+        r'(?:\s+<cycle\s(?P<cycle>\d+)>)?' +
+        r'\s+\[(\d+)\]' +
+        r'\s*$'
+    )
+
+    _cg_parent_re = re.compile(
+        r'^\s+(?P<self>\d+\.\d+)?' +
+        r'\s+(?P<descendants>\d+\.\d+)?' +
+        r'\s+(?P<name>\S.*?)' +
+        r'(?:\s+<cycle\s(?P<cycle>\d+)>)?' +
+        r'(?:\s+\[(?P<index>\d+)\]\s*)?' +
+        r'\s*$'
+    )
+
+    _cg_child_re = _cg_parent_re
+
+    _cg_cycle_header_re = re.compile(
+        r'^\[(?P<index>\d+)\]?' +
+        r'\s+(?P<percentage_time>\d+\.\d+)' +
+        r'\s+(?P<self>\d+\.\d+)' +
+        r'\s+(?P<descendants>\d+\.\d+)' +
+        r'\s+<cycle\s(?P<cycle>\d+)\sas\sa\swhole>' +
+        r'\s+\[(\d+)\]' +
+        r'\s*$'
+    )
+
+    _cg_cycle_member_re = re.compile(
+        r'^\s+(?P<self>\d+\.\d+)?' +
+        r'\s+(?P<descendants>\d+\.\d+)?' +
+        r'\s+(?P<name>\S.*?)' +
+        r'(?:\s+<cycle\s(?P<cycle>\d+)>)?' +
+        r'\s+\[(?P<index>\d+)\]' +
+        r'\s*$'
+    )
+
+    def parse_function_entry(self, lines):
+        parents = []
+        children = []
+
+        while True:
+            if not lines:
+                sys.stderr.write('warning: unexpected end of entry\n')
+                return
+            line = lines.pop(0)
+            if line.startswith('['):
+                break
+
+            # read function parent line
+            mo = self._cg_parent_re.match(line)
+            if not mo:
+                sys.stderr.write('warning: unrecognized call graph entry (1): %r\n' % line)
+            else:
+                parent = self.translate(mo)
+                if parent.name != '<spontaneous>':
+                    parents.append(parent)
+
+        # read primary line
+        mo = self._cg_primary_re.match(line)
+        if not mo:
+            sys.stderr.write('warning: unrecognized call graph entry (2): %r\n' % line)
+            return
+        else:
+            function = self.translate(mo)
+
+        while lines:
+            line = lines.pop(0)
+
+            # read function subroutine line
+            mo = self._cg_child_re.match(line)
+            if not mo:
+                sys.stderr.write('warning: unrecognized call graph entry (3): %r\n' % line)
+            else:
+                child = self.translate(mo)
+                if child.name != '<spontaneous>':
+                    children.append(child)
+
+        if function.name != '<spontaneous>':
+            function.parents = parents
+            function.children = children
+
+            self.functions[function.index] = function
+
+    def parse_cycle_entry(self, lines):
+
+        # Process the parents that were not there in gprof format.
+        parents = []
+        while True:
+            if not lines:
+                sys.stderr.write('warning: unexpected end of cycle entry\n')
+                return
+            line = lines.pop(0)
+            if line.startswith('['):
+                break
+            mo = self._cg_parent_re.match(line)
+            if not mo:
+                sys.stderr.write('warning: unrecognized call graph entry (6): %r\n' % line)
+            else:
+                parent = self.translate(mo)
+                if parent.name != '<spontaneous>':
+                    parents.append(parent)
+
+        # read cycle header line
+        mo = self._cg_cycle_header_re.match(line)
+        if not mo:
+            sys.stderr.write('warning: unrecognized call graph entry (4): %r\n' % line)
+            return
+        cycle = self.translate(mo)
+
+        # read cycle member lines
+        cycle.functions = []
+        for line in lines[1:]:
+            mo = self._cg_cycle_member_re.match(line)
+            if not mo:
+                sys.stderr.write('warning: unrecognized call graph entry (5): %r\n' % line)
+                continue
+            call = self.translate(mo)
+            cycle.functions.append(call)
+
+        cycle.parents = parents
+        self.cycles[cycle.cycle] = cycle
+
+    def parse_cg_entry(self, lines):
+        if any("as a whole" in linelooper for linelooper in lines):
+            self.parse_cycle_entry(lines)
+        else:
+            self.parse_function_entry(lines)
+
+    def parse_cg(self):
+        """Parse the call graph."""
+
+        # skip call graph header
+        line = self.readline()
+        while self._cg_header_re.match(line):
+            line = self.readline()
+
+        # process call graph entries
+        entry_lines = []
+        # An EOF in readline terminates the program without returning.
+        while not self._cg_footer_re.match(line):
+            if line.isspace():
+                self.parse_cg_entry(entry_lines)
+                entry_lines = []
+            else:
+                entry_lines.append(line)
+            line = self.readline()
+
+    def parse(self):
+        sys.stderr.write('warning: for axe format, edge weights are unreliable estimates derived from function total times.\n')
+        self.parse_cg()
+        self.fp.close()
+
+        profile = Profile()
+        profile[TIME] = 0.0
+
+        cycles = {}
+        for index in self.cycles:
+            cycles[index] = Cycle()
+
+        for entry in compat_itervalues(self.functions):
+            # populate the function
+            function = Function(entry.index, entry.name)
+            function[TIME] = entry.self
+            function[TOTAL_TIME_RATIO] = entry.percentage_time / 100.0
+
+            # populate the function calls
+            for child in entry.children:
+                call = Call(child.index)
+                # The following bogus value affects only the weighting of
+                # the calls.
+                call[TOTAL_TIME_RATIO] = function[TOTAL_TIME_RATIO]
+
+                if child.index not in self.functions:
+                    # NOTE: functions that were never called but were discovered by gprof's
+                    # static call graph analysis dont have a call graph entry so we need
+                    # to add them here
+                    # FIXME: Is this applicable?
+                    missing = Function(child.index, child.name)
+                    function[TIME] = 0.0
+                    profile.add_function(missing)
+
+                function.add_call(call)
+
+            profile.add_function(function)
+
+            if entry.cycle is not None:
+                try:
+                    cycle = cycles[entry.cycle]
+                except KeyError:
+                    sys.stderr.write('warning: <cycle %u as a whole> entry missing\n' % entry.cycle)
+                    cycle = Cycle()
+                    cycles[entry.cycle] = cycle
+                cycle.add_function(function)
+
+            profile[TIME] = profile[TIME] + function[TIME]
+
+        for cycle in compat_itervalues(cycles):
+            profile.add_cycle(cycle)
+
+        # Compute derived events.
+        profile.validate()
+        profile.ratio(TIME_RATIO, TIME)
+        # Lacking call counts, fake call ratios based on total times.
+        profile.call_ratios(TOTAL_TIME_RATIO)
+        # The TOTAL_TIME_RATIO of functions is already set.  Propagate that
+        # total time to the calls.  (TOTAL_TIME is neither set nor used.)
+        for function in compat_itervalues(profile.functions):
+            for call in compat_itervalues(function.calls):
+                if call.ratio is not None:
+                    callee = profile.functions[call.callee_id]
+                    call[TOTAL_TIME_RATIO] = call.ratio * callee[TOTAL_TIME_RATIO]
+
+        return profile
+
+
+class CallgrindParser(LineParser):
+    """Parser for valgrind's callgrind tool.
+
+    See also:
+    - http://valgrind.org/docs/manual/cl-format.html
+    """
+
+    _call_re = re.compile(r'^calls=\s*(\d+)\s+((\d+|\+\d+|-\d+|\*)\s+)+$')
+
+    def __init__(self, infile):
+        LineParser.__init__(self, infile)
+
+        # Textual positions
+        self.position_ids = {}
+        self.positions = {}
+
+        # Numeric positions
+        self.num_positions = 1
+        self.cost_positions = ['line']
+        self.last_positions = [0]
+
+        # Events
+        self.num_events = 0
+        self.cost_events = []
+
+        self.profile = Profile()
+        self.profile[SAMPLES] = 0
+
+    def parse(self):
+        # read lookahead
+        self.readline()
+
+        self.parse_key('version')
+        self.parse_key('creator')
+        while self.parse_part():
+            pass
+        if not self.eof():
+            sys.stderr.write('warning: line %u: unexpected line\n' % self.line_no)
+            sys.stderr.write('%s\n' % self.lookahead())
+
+        # compute derived data
+        self.profile.validate()
+        self.profile.find_cycles()
+        self.profile.ratio(TIME_RATIO, SAMPLES)
+        self.profile.call_ratios(SAMPLES2)
+        self.profile.integrate(TOTAL_TIME_RATIO, TIME_RATIO)
+
+        return self.profile
+
+    def parse_part(self):
+        if not self.parse_header_line():
+            return False
+        while self.parse_header_line():
+            pass
+        if not self.parse_body_line():
+            return False
+        while self.parse_body_line():
+            pass
+        return True
+
+    def parse_header_line(self):
+        return \
+            self.parse_empty() or \
+            self.parse_comment() or \
+            self.parse_part_detail() or \
+            self.parse_description() or \
+            self.parse_event_specification() or \
+            self.parse_cost_line_def() or \
+            self.parse_cost_summary()
+
+    _detail_keys = set(('cmd', 'pid', 'thread', 'part'))
+
+    def parse_part_detail(self):
+        return self.parse_keys(self._detail_keys)
+
+    def parse_description(self):
+        return self.parse_key('desc') is not None
+
+    def parse_event_specification(self):
+        event = self.parse_key('event')
+        if event is None:
+            return False
+        return True
+
+    def parse_cost_line_def(self):
+        pair = self.parse_keys(('events', 'positions'))
+        if pair is None:
+            return False
+        key, value = pair
+        items = value.split()
+        if key == 'events':
+            self.num_events = len(items)
+            self.cost_events = items
+        if key == 'positions':
+            self.num_positions = len(items)
+            self.cost_positions = items
+            self.last_positions = [0]*self.num_positions
+        return True
+
+    def parse_cost_summary(self):
+        pair = self.parse_keys(('summary', 'totals'))
+        if pair is None:
+            return False
+        return True
+
+    def parse_body_line(self):
+        return \
+            self.parse_empty() or \
+            self.parse_comment() or \
+            self.parse_cost_line() or \
+            self.parse_position_spec() or \
+            self.parse_association_spec()
+
+    __subpos_re = r'(0x[0-9a-fA-F]+|\d+|\+\d+|-\d+|\*)'
+    _cost_re = re.compile(r'^' +
+        __subpos_re + r'( +' + __subpos_re + r')*' +
+        r'( +\d+)*' +
+    '$')
+
+    def parse_cost_line(self, calls=None):
+        line = self.lookahead().rstrip()
+        mo = self._cost_re.match(line)
+        if not mo:
+            return False
+
+        function = self.get_function()
+
+        if calls is None:
+            # Unlike other aspects, call object (cob) is relative not to the
+            # last call object, but to the caller's object (ob), so try to
+            # update it when processing a functions cost line
+            try:
+                self.positions['cob'] = self.positions['ob']
+            except KeyError:
+                pass
+
+        values = line.split()
+        assert len(values) <= self.num_positions + self.num_events
+
+        positions = values[0 : self.num_positions]
+        events = values[self.num_positions : ]
+        events += ['0']*(self.num_events - len(events))
+
+        for i in range(self.num_positions):
+            position = positions[i]
+            if position == '*':
+                position = self.last_positions[i]
+            elif position[0] in '-+':
+                position = self.last_positions[i] + int(position)
+            elif position.startswith('0x'):
+                position = int(position, 16)
+            else:
+                position = int(position)
+            self.last_positions[i] = position
+
+        events = [float(event) for event in events]
+
+        if calls is None:
+            function[SAMPLES] += events[0]
+            self.profile[SAMPLES] += events[0]
+        else:
+            callee = self.get_callee()
+            callee.called += calls
+
+            try:
+                call = function.calls[callee.id]
+            except KeyError:
+                call = Call(callee.id)
+                call[CALLS] = calls
+                call[SAMPLES2] = events[0]
+                function.add_call(call)
+            else:
+                call[CALLS] += calls
+                call[SAMPLES2] += events[0]
+
+        self.consume()
+        return True
+
+    def parse_association_spec(self):
+        line = self.lookahead()
+        if not line.startswith('calls='):
+            return False
+
+        _, values = line.split('=', 1)
+        values = values.strip().split()
+        calls = int(values[0])
+        call_position = values[1:]
+        self.consume()
+
+        self.parse_cost_line(calls)
+
+        return True
+
+    _position_re = re.compile(r'^(?P<position>[cj]?(?:ob|fl|fi|fe|fn))=\s*(?:\((?P<id>\d+)\))?(?:\s*(?P<name>.+))?')
+
+    _position_table_map = {
+        'ob': 'ob',
+        'fl': 'fl',
+        'fi': 'fl',
+        'fe': 'fl',
+        'fn': 'fn',
+        'cob': 'ob',
+        'cfl': 'fl',
+        'cfi': 'fl',
+        'cfe': 'fl',
+        'cfn': 'fn',
+        'jfi': 'fl',
+    }
+
+    _position_map = {
+        'ob': 'ob',
+        'fl': 'fl',
+        'fi': 'fl',
+        'fe': 'fl',
+        'fn': 'fn',
+        'cob': 'cob',
+        'cfl': 'cfl',
+        'cfi': 'cfl',
+        'cfe': 'cfl',
+        'cfn': 'cfn',
+        'jfi': 'jfi',
+    }
+
+    def parse_position_spec(self):
+        line = self.lookahead()
+
+        if line.startswith('jump=') or line.startswith('jcnd='):
+            self.consume()
+            return True
+
+        mo = self._position_re.match(line)
+        if not mo:
+            return False
+
+        position, id, name = mo.groups()
+        if id:
+            table = self._position_table_map[position]
+            if name:
+                self.position_ids[(table, id)] = name
+            else:
+                name = self.position_ids.get((table, id), '')
+        self.positions[self._position_map[position]] = name
+
+        self.consume()
+        return True
+
+    def parse_empty(self):
+        if self.eof():
+            return False
+        line = self.lookahead()
+        if line.strip():
+            return False
+        self.consume()
+        return True
+
+    def parse_comment(self):
+        line = self.lookahead()
+        if not line.startswith('#'):
+            return False
+        self.consume()
+        return True
+
+    _key_re = re.compile(r'^(\w+):')
+
+    def parse_key(self, key):
+        pair = self.parse_keys((key,))
+        if not pair:
+            return None
+        key, value = pair
+        return value
+
+    def parse_keys(self, keys):
+        line = self.lookahead()
+        mo = self._key_re.match(line)
+        if not mo:
+            return None
+        key, value = line.split(':', 1)
+        if key not in keys:
+            return None
+        value = value.strip()
+        self.consume()
+        return key, value
+
+    def make_function(self, module, filename, name):
+        # FIXME: module and filename are not being tracked reliably
+        #id = '|'.join((module, filename, name))
+        id = name
+        try:
+            function = self.profile.functions[id]
+        except KeyError:
+            function = Function(id, name)
+            if module:
+                function.module = os.path.basename(module)
+            function[SAMPLES] = 0
+            function.called = 0
+            self.profile.add_function(function)
+        return function
+
+    def get_function(self):
+        module = self.positions.get('ob', '')
+        filename = self.positions.get('fl', '')
+        function = self.positions.get('fn', '')
+        return self.make_function(module, filename, function)
+
+    def get_callee(self):
+        module = self.positions.get('cob', '')
+        filename = self.positions.get('cfi', '')
+        function = self.positions.get('cfn', '')
+        return self.make_function(module, filename, function)
+
+    def readline(self):
+        # Override LineParser.readline to ignore comment lines
+        while True:
+            LineParser.readline(self)
+            if self.eof() or not self.lookahead().startswith('#'):
+                break
+
+
+class PerfParser(LineParser):
+    """Parser for linux perf callgraph output.
+
+    It expects output generated with
+
+        perf record -g
+        perf script | gprof2dot.py --format=perf
+    """
+
+    def __init__(self, infile):
+        LineParser.__init__(self, infile)
+        self.profile = Profile()
+
+    def readline(self):
+        # Override LineParser.readline to ignore comment lines
+        while True:
+            LineParser.readline(self)
+            if self.eof() or not self.lookahead().startswith('#'):
+                break
+
+    def parse(self):
+        # read lookahead
+        self.readline()
+
+        profile = self.profile
+        profile[SAMPLES] = 0
+        while not self.eof():
+            self.parse_event()
+
+        # compute derived data
+        profile.validate()
+        profile.find_cycles()
+        profile.ratio(TIME_RATIO, SAMPLES)
+        profile.call_ratios(SAMPLES2)
+        if totalMethod == "callratios":
+            # Heuristic approach.  TOTAL_SAMPLES is unused.
+            profile.integrate(TOTAL_TIME_RATIO, TIME_RATIO)
+        elif totalMethod == "callstacks":
+            # Use the actual call chains for functions.
+            profile[TOTAL_SAMPLES] = profile[SAMPLES]
+            profile.ratio(TOTAL_TIME_RATIO, TOTAL_SAMPLES)
+            # Then propagate that total time to the calls.
+            for function in compat_itervalues(profile.functions):
+                for call in compat_itervalues(function.calls):
+                    if call.ratio is not None:
+                        callee = profile.functions[call.callee_id]
+                        call[TOTAL_TIME_RATIO] = call.ratio * callee[TOTAL_TIME_RATIO]
+        else:
+            assert False
+
+        return profile
+
+    def parse_event(self):
+        if self.eof():
+            return
+
+        line = self.consume()
+        assert line
+
+        callchain = self.parse_callchain()
+        if not callchain:
+            return
+
+        callee = callchain[0]
+        callee[SAMPLES] += 1
+        self.profile[SAMPLES] += 1
+
+        for caller in callchain[1:]:
+            try:
+                call = caller.calls[callee.id]
+            except KeyError:
+                call = Call(callee.id)
+                call[SAMPLES2] = 1
+                caller.add_call(call)
+            else:
+                call[SAMPLES2] += 1
+
+            callee = caller
+
+        # Increment TOTAL_SAMPLES only once on each function.
+        stack = set(callchain)
+        for function in stack:
+            function[TOTAL_SAMPLES] += 1
+
+    def parse_callchain(self):
+        callchain = []
+        while self.lookahead():
+            function = self.parse_call()
+            if function is None:
+                break
+            callchain.append(function)
+        if self.lookahead() == '':
+            self.consume()
+        return callchain
+
+    call_re = re.compile(r'^\s+(?P<address>[0-9a-fA-F]+)\s+(?P<symbol>.*)\s+\((?P<module>.*)\)$')
+    addr2_re = re.compile(r'\+0x[0-9a-fA-F]+$')
+
+    def parse_call(self):
+        line = self.consume()
+        mo = self.call_re.match(line)
+        assert mo
+        if not mo:
+            return None
+
+        function_name = mo.group('symbol')
+
+        # If present, amputate program counter from function name.
+        if function_name:
+            function_name = re.sub(self.addr2_re, '', function_name)
+
+        if not function_name or function_name == '[unknown]':
+            function_name = mo.group('address')
+
+        module = mo.group('module')
+
+        function_id = function_name + ':' + module
+
+        try:
+            function = self.profile.functions[function_id]
+        except KeyError:
+            function = Function(function_id, function_name)
+            function.module = os.path.basename(module)
+            function[SAMPLES] = 0
+            function[TOTAL_SAMPLES] = 0
+            self.profile.add_function(function)
+
+        return function
+
+
+class OprofileParser(LineParser):
+    """Parser for oprofile callgraph output.
+
+    See also:
+    - http://oprofile.sourceforge.net/doc/opreport.html#opreport-callgraph
+    """
+
+    _fields_re = {
+        'samples': r'(\d+)',
+        '%': r'(\S+)',
+        'linenr info': r'(?P<source>\(no location information\)|\S+:\d+)',
+        'image name': r'(?P<image>\S+(?:\s\(tgid:[^)]*\))?)',
+        'app name': r'(?P<application>\S+)',
+        'symbol name': r'(?P<symbol>\(no symbols\)|.+?)',
+    }
+
+    def __init__(self, infile):
+        LineParser.__init__(self, infile)
+        self.entries = {}
+        self.entry_re = None
+
+    def add_entry(self, callers, function, callees):
+        try:
+            entry = self.entries[function.id]
+        except KeyError:
+            self.entries[function.id] = (callers, function, callees)
+        else:
+            callers_total, function_total, callees_total = entry
+            self.update_subentries_dict(callers_total, callers)
+            function_total.samples += function.samples
+            self.update_subentries_dict(callees_total, callees)
+
+    def update_subentries_dict(self, totals, partials):
+        for partial in compat_itervalues(partials):
+            try:
+                total = totals[partial.id]
+            except KeyError:
+                totals[partial.id] = partial
+            else:
+                total.samples += partial.samples
+
+    def parse(self):
+        # read lookahead
+        self.readline()
+
+        self.parse_header()
+        while self.lookahead():
+            self.parse_entry()
+
+        profile = Profile()
+
+        reverse_call_samples = {}
+
+        # populate the profile
+        profile[SAMPLES] = 0
+        for _callers, _function, _callees in compat_itervalues(self.entries):
+            function = Function(_function.id, _function.name)
+            function[SAMPLES] = _function.samples
+            profile.add_function(function)
+            profile[SAMPLES] += _function.samples
+
+            if _function.application:
+                function.process = os.path.basename(_function.application)
+            if _function.image:
+                function.module = os.path.basename(_function.image)
+
+            total_callee_samples = 0
+            for _callee in compat_itervalues(_callees):
+                total_callee_samples += _callee.samples
+
+            for _callee in compat_itervalues(_callees):
+                if not _callee.self:
+                    call = Call(_callee.id)
+                    call[SAMPLES2] = _callee.samples
+                    function.add_call(call)
+
+        # compute derived data
+        profile.validate()
+        profile.find_cycles()
+        profile.ratio(TIME_RATIO, SAMPLES)
+        profile.call_ratios(SAMPLES2)
+        profile.integrate(TOTAL_TIME_RATIO, TIME_RATIO)
+
+        return profile
+
+    def parse_header(self):
+        while not self.match_header():
+            self.consume()
+        line = self.lookahead()
+        fields = re.split(r'\s\s+', line)
+        entry_re = r'^\s*' + r'\s+'.join([self._fields_re[field] for field in fields]) + r'(?P<self>\s+\[self\])?$'
+        self.entry_re = re.compile(entry_re)
+        self.skip_separator()
+
+    def parse_entry(self):
+        callers = self.parse_subentries()
+        if self.match_primary():
+            function = self.parse_subentry()
+            if function is not None:
+                callees = self.parse_subentries()
+                self.add_entry(callers, function, callees)
+        self.skip_separator()
+
+    def parse_subentries(self):
+        subentries = {}
+        while self.match_secondary():
+            subentry = self.parse_subentry()
+            subentries[subentry.id] = subentry
+        return subentries
+
+    def parse_subentry(self):
+        entry = Struct()
+        line = self.consume()
+        mo = self.entry_re.match(line)
+        if not mo:
+            raise ParseError('failed to parse', line)
+        fields = mo.groupdict()
+        entry.samples = int(mo.group(1))
+        if 'source' in fields and fields['source'] != '(no location information)':
+            source = fields['source']
+            filename, lineno = source.split(':')
+            entry.filename = filename
+            entry.lineno = int(lineno)
+        else:
+            source = ''
+            entry.filename = None
+            entry.lineno = None
+        entry.image = fields.get('image', '')
+        entry.application = fields.get('application', '')
+        if 'symbol' in fields and fields['symbol'] != '(no symbols)':
+            entry.symbol = fields['symbol']
+        else:
+            entry.symbol = ''
+        if entry.symbol.startswith('"') and entry.symbol.endswith('"'):
+            entry.symbol = entry.symbol[1:-1]
+        entry.id = ':'.join((entry.application, entry.image, source, entry.symbol))
+        entry.self = fields.get('self', None) != None
+        if entry.self:
+            entry.id += ':self'
+        if entry.symbol:
+            entry.name = entry.symbol
+        else:
+            entry.name = entry.image
+        return entry
+
+    def skip_separator(self):
+        while not self.match_separator():
+            self.consume()
+        self.consume()
+
+    def match_header(self):
+        line = self.lookahead()
+        return line.startswith('samples')
+
+    def match_separator(self):
+        line = self.lookahead()
+        return line == '-'*len(line)
+
+    def match_primary(self):
+        line = self.lookahead()
+        return not line[:1].isspace()
+
+    def match_secondary(self):
+        line = self.lookahead()
+        return line[:1].isspace()
+
+
+class HProfParser(LineParser):
+    """Parser for java hprof output
+
+    See also:
+    - http://java.sun.com/developer/technicalArticles/Programming/HPROF.html
+    """
+
+    trace_re = re.compile(r'\t(.*)\((.*):(.*)\)')
+    trace_id_re = re.compile(r'^TRACE (\d+):$')
+
+    def __init__(self, infile):
+        LineParser.__init__(self, infile)
+        self.traces = {}
+        self.samples = {}
+
+    def parse(self):
+        # read lookahead
+        self.readline()
+
+        while not self.lookahead().startswith('------'): self.consume()
+        while not self.lookahead().startswith('TRACE '): self.consume()
+
+        self.parse_traces()
+
+        while not self.lookahead().startswith('CPU'):
+            self.consume()
+
+        self.parse_samples()
+
+        # populate the profile
+        profile = Profile()
+        profile[SAMPLES] = 0
+
+        functions = {}
+
+        # build up callgraph
+        for id, trace in compat_iteritems(self.traces):
+            if not id in self.samples: continue
+            mtime = self.samples[id][0]
+            last = None
+
+            for func, file, line in trace:
+                if not func in functions:
+                    function = Function(func, func)
+                    function[SAMPLES] = 0
+                    profile.add_function(function)
+                    functions[func] = function
+
+                function = functions[func]
+                # allocate time to the deepest method in the trace
+                if not last:
+                    function[SAMPLES] += mtime
+                    profile[SAMPLES] += mtime
+                else:
+                    c = function.get_call(last)
+                    c[SAMPLES2] += mtime
+
+                last = func
+
+        # compute derived data
+        profile.validate()
+        profile.find_cycles()
+        profile.ratio(TIME_RATIO, SAMPLES)
+        profile.call_ratios(SAMPLES2)
+        profile.integrate(TOTAL_TIME_RATIO, TIME_RATIO)
+
+        return profile
+
+    def parse_traces(self):
+        while self.lookahead().startswith('TRACE '):
+            self.parse_trace()
+
+    def parse_trace(self):
+        l = self.consume()
+        mo = self.trace_id_re.match(l)
+        tid = mo.group(1)
+        last = None
+        trace = []
+
+        while self.lookahead().startswith('\t'):
+            l = self.consume()
+            match = self.trace_re.search(l)
+            if not match:
+                #sys.stderr.write('Invalid line: %s\n' % l)
+                break
+            else:
+                function_name, file, line = match.groups()
+                trace += [(function_name, file, line)]
+
+        self.traces[int(tid)] = trace
+
+    def parse_samples(self):
+        self.consume()
+        self.consume()
+
+        while not self.lookahead().startswith('CPU'):
+            rank, percent_self, percent_accum, count, traceid, method = self.lookahead().split()
+            self.samples[int(traceid)] = (int(count), method)
+            self.consume()
+
+
+class SysprofParser(XmlParser):
+
+    def __init__(self, stream):
+        XmlParser.__init__(self, stream)
+
+    def parse(self):
+        objects = {}
+        nodes = {}
+
+        self.element_start('profile')
+        while self.token.type == XML_ELEMENT_START:
+            if self.token.name_or_data == 'objects':
+                assert not objects
+                objects = self.parse_items('objects')
+            elif self.token.name_or_data == 'nodes':
+                assert not nodes
+                nodes = self.parse_items('nodes')
+            else:
+                self.parse_value(self.token.name_or_data)
+        self.element_end('profile')
+
+        return self.build_profile(objects, nodes)
+
+    def parse_items(self, name):
+        assert name[-1] == 's'
+        items = {}
+        self.element_start(name)
+        while self.token.type == XML_ELEMENT_START:
+            id, values = self.parse_item(name[:-1])
+            assert id not in items
+            items[id] = values
+        self.element_end(name)
+        return items
+
+    def parse_item(self, name):
+        attrs = self.element_start(name)
+        id = int(attrs['id'])
+        values = self.parse_values()
+        self.element_end(name)
+        return id, values
+
+    def parse_values(self):
+        values = {}
+        while self.token.type == XML_ELEMENT_START:
+            name = self.token.name_or_data
+            value = self.parse_value(name)
+            assert name not in values
+            values[name] = value
+        return values
+
+    def parse_value(self, tag):
+        self.element_start(tag)
+        value = self.character_data()
+        self.element_end(tag)
+        if value.isdigit():
+            return int(value)
+        if value.startswith('"') and value.endswith('"'):
+            return value[1:-1]
+        return value
+
+    def build_profile(self, objects, nodes):
+        profile = Profile()
+
+        profile[SAMPLES] = 0
+        for id, object in compat_iteritems(objects):
+            # Ignore fake objects (process names, modules, "Everything", "kernel", etc.)
+            if object['self'] == 0:
+                continue
+
+            function = Function(id, object['name'])
+            function[SAMPLES] = object['self']
+            profile.add_function(function)
+            profile[SAMPLES] += function[SAMPLES]
+
+        for id, node in compat_iteritems(nodes):
+            # Ignore fake calls
+            if node['self'] == 0:
+                continue
+
+            # Find a non-ignored parent
+            parent_id = node['parent']
+            while parent_id != 0:
+                parent = nodes[parent_id]
+                caller_id = parent['object']
+                if objects[caller_id]['self'] != 0:
+                    break
+                parent_id = parent['parent']
+            if parent_id == 0:
+                continue
+
+            callee_id = node['object']
+
+            assert objects[caller_id]['self']
+            assert objects[callee_id]['self']
+
+            function = profile.functions[caller_id]
+
+            samples = node['self']
+            try:
+                call = function.calls[callee_id]
+            except KeyError:
+                call = Call(callee_id)
+                call[SAMPLES2] = samples
+                function.add_call(call)
+            else:
+                call[SAMPLES2] += samples
+
+        # Compute derived events
+        profile.validate()
+        profile.find_cycles()
+        profile.ratio(TIME_RATIO, SAMPLES)
+        profile.call_ratios(SAMPLES2)
+        profile.integrate(TOTAL_TIME_RATIO, TIME_RATIO)
+
+        return profile
+
+
+class XPerfParser(Parser):
+    """Parser for CSVs generated by XPerf, from Microsoft Windows Performance Tools.
+    """
+
+    def __init__(self, stream):
+        Parser.__init__(self)
+        self.stream = stream
+        self.profile = Profile()
+        self.profile[SAMPLES] = 0
+        self.column = {}
+
+    def parse(self):
+        import csv
+        reader = csv.reader(
+            self.stream,
+            delimiter = ',',
+            quotechar = None,
+            escapechar = None,
+            doublequote = False,
+            skipinitialspace = True,
+            lineterminator = '\r\n',
+            quoting = csv.QUOTE_NONE)
+        header = True
+        for row in reader:
+            if header:
+                self.parse_header(row)
+                header = False
+            else:
+                self.parse_row(row)
+
+        # compute derived data
+        self.profile.validate()
+        self.profile.find_cycles()
+        self.profile.ratio(TIME_RATIO, SAMPLES)
+        self.profile.call_ratios(SAMPLES2)
+        self.profile.integrate(TOTAL_TIME_RATIO, TIME_RATIO)
+
+        return self.profile
+
+    def parse_header(self, row):
+        for column in range(len(row)):
+            name = row[column]
+            assert name not in self.column
+            self.column[name] = column
+
+    def parse_row(self, row):
+        fields = {}
+        for name, column in compat_iteritems(self.column):
+            value = row[column]
+            for factory in int, float:
+                try:
+                    value = factory(value)
+                except ValueError:
+                    pass
+                else:
+                    break
+            fields[name] = value
+
+        process = fields['Process Name']
+        symbol = fields['Module'] + '!' + fields['Function']
+        weight = fields['Weight']
+        count = fields['Count']
+
+        if process == 'Idle':
+            return
+
+        function = self.get_function(process, symbol)
+        function[SAMPLES] += weight * count
+        self.profile[SAMPLES] += weight * count
+
+        stack = fields['Stack']
+        if stack != '?':
+            stack = stack.split('/')
+            assert stack[0] == '[Root]'
+            if stack[-1] != symbol:
+                # XXX: some cases the sampled function does not appear in the stack
+                stack.append(symbol)
+            caller = None
+            for symbol in stack[1:]:
+                callee = self.get_function(process, symbol)
+                if caller is not None:
+                    try:
+                        call = caller.calls[callee.id]
+                    except KeyError:
+                        call = Call(callee.id)
+                        call[SAMPLES2] = count
+                        caller.add_call(call)
+                    else:
+                        call[SAMPLES2] += count
+                caller = callee
+
+    def get_function(self, process, symbol):
+        function_id = process + '!' + symbol
+
+        try:
+            function = self.profile.functions[function_id]
+        except KeyError:
+            module, name = symbol.split('!', 1)
+            function = Function(function_id, name)
+            function.process = process
+            function.module = module
+            function[SAMPLES] = 0
+            self.profile.add_function(function)
+
+        return function
+
+
+class SleepyParser(Parser):
+    """Parser for GNU gprof output.
+
+    See also:
+    - http://www.codersnotes.com/sleepy/
+    - http://sleepygraph.sourceforge.net/
+    """
+
+    stdinInput = False
+
+    def __init__(self, filename):
+        Parser.__init__(self)
+
+        from zipfile import ZipFile
+
+        self.database = ZipFile(filename)
+
+        self.symbols = {}
+        self.calls = {}
+
+        self.profile = Profile()
+
+    _symbol_re = re.compile(
+        r'^(?P<id>\w+)' +
+        r'\s+"(?P<module>[^"]*)"' +
+        r'\s+"(?P<procname>[^"]*)"' +
+        r'\s+"(?P<sourcefile>[^"]*)"' +
+        r'\s+(?P<sourceline>\d+)$'
+    )
+
+    def openEntry(self, name):
+        # Some versions of verysleepy use lowercase filenames
+        for database_name in self.database.namelist():
+            if name.lower() == database_name.lower():
+                name = database_name
+                break
+
+        return self.database.open(name, 'r')
+
+    def parse_symbols(self):
+        for line in self.openEntry('Symbols.txt'):
+            line = line.decode('UTF-8').rstrip('\r\n')
+
+            mo = self._symbol_re.match(line)
+            if mo:
+                symbol_id, module, procname, sourcefile, sourceline = mo.groups()
+
+                function_id = ':'.join([module, procname])
+
+                try:
+                    function = self.profile.functions[function_id]
+                except KeyError:
+                    function = Function(function_id, procname)
+                    function.module = module
+                    function[SAMPLES] = 0
+                    self.profile.add_function(function)
+
+                self.symbols[symbol_id] = function
+
+    def parse_callstacks(self):
+        for line in self.openEntry('Callstacks.txt'):
+            line = line.decode('UTF-8').rstrip('\r\n')
+
+            fields = line.split()
+            samples = float(fields[0])
+            callstack = fields[1:]
+
+            callstack = [self.symbols[symbol_id] for symbol_id in callstack]
+
+            callee = callstack[0]
+
+            callee[SAMPLES] += samples
+            self.profile[SAMPLES] += samples
+
+            for caller in callstack[1:]:
+                try:
+                    call = caller.calls[callee.id]
+                except KeyError:
+                    call = Call(callee.id)
+                    call[SAMPLES2] = samples
+                    caller.add_call(call)
+                else:
+                    call[SAMPLES2] += samples
+
+                callee = caller
+
+    def parse(self):
+        profile = self.profile
+        profile[SAMPLES] = 0
+
+        self.parse_symbols()
+        self.parse_callstacks()
+
+        # Compute derived events
+        profile.validate()
+        profile.find_cycles()
+        profile.ratio(TIME_RATIO, SAMPLES)
+        profile.call_ratios(SAMPLES2)
+        profile.integrate(TOTAL_TIME_RATIO, TIME_RATIO)
+
+        return profile
+
+
+class PstatsParser:
+    """Parser python profiling statistics saved with te pstats module."""
+
+    stdinInput = False
+    multipleInput = True
+
+    def __init__(self, *filename):
+        import pstats
+        try:
+            self.stats = pstats.Stats(*filename)
+        except ValueError:
+            if PYTHON_3:
+                sys.stderr.write('error: failed to load %s, maybe they are generated by different python version?\n' % ', '.join(filename))
+                sys.exit(1)
+            import hotshot.stats
+            self.stats = hotshot.stats.load(filename[0])
+        self.profile = Profile()
+        self.function_ids = {}
+
+    def get_function_name(self, key):
+        filename, line, name = key
+        module = os.path.splitext(filename)[0]
+        module = os.path.basename(module)
+        return "%s:%d:%s" % (module, line, name)
+
+    def get_function(self, key):
+        try:
+            id = self.function_ids[key]
+        except KeyError:
+            id = len(self.function_ids)
+            name = self.get_function_name(key)
+            function = Function(id, name)
+            function.filename = key[0]
+            self.profile.functions[id] = function
+            self.function_ids[key] = id
+        else:
+            function = self.profile.functions[id]
+        return function
+
+    def parse(self):
+        self.profile[TIME] = 0.0
+        self.profile[TOTAL_TIME] = self.stats.total_tt
+        for fn, (cc, nc, tt, ct, callers) in compat_iteritems(self.stats.stats):
+            callee = self.get_function(fn)
+            callee.called = nc
+            callee[TOTAL_TIME] = ct
+            callee[TIME] = tt
+            self.profile[TIME] += tt
+            self.profile[TOTAL_TIME] = max(self.profile[TOTAL_TIME], ct)
+            for fn, value in compat_iteritems(callers):
+                caller = self.get_function(fn)
+                call = Call(callee.id)
+                if isinstance(value, tuple):
+                    for i in xrange(0, len(value), 4):
+                        nc, cc, tt, ct = value[i:i+4]
+                        if CALLS in call:
+                            call[CALLS] += cc
+                        else:
+                            call[CALLS] = cc
+
+                        if TOTAL_TIME in call:
+                            call[TOTAL_TIME] += ct
+                        else:
+                            call[TOTAL_TIME] = ct
+
+                else:
+                    call[CALLS] = value
+                    call[TOTAL_TIME] = ratio(value, nc)*ct
+
+                caller.add_call(call)
+
+        if False:
+            self.stats.print_stats()
+            self.stats.print_callees()
+
+        # Compute derived events
+        self.profile.validate()
+        self.profile.ratio(TIME_RATIO, TIME)
+        self.profile.ratio(TOTAL_TIME_RATIO, TOTAL_TIME)
+
+        return self.profile
+
+class DtraceParser(LineParser):
+    """Parser for linux perf callgraph output.
+
+    It expects output generated with
+
+        # Refer to https://github.com/brendangregg/FlameGraph#dtrace
+        # 60 seconds of user-level stacks, including time spent in-kernel, for PID 12345 at 97 Hertz
+        sudo dtrace -x ustackframes=100 -n 'profile-97 /pid == 12345/ { @[ustack()] = count(); } tick-60s { exit(0); }' -o out.user_stacks
+
+        # The dtrace output
+        gprof2dot.py -f dtrace out.user_stacks
+
+        # Notice: sometimes, the dtrace outputs format may be latin-1, and gprof2dot will fail to parse it.
+        # To solve this problem, you should use iconv to convert to UTF-8 explicitly.
+        # TODO: add an encoding flag to tell gprof2dot how to decode the profile file.
+        iconv -f ISO-8859-1 -t UTF-8 out.user_stacks | gprof2dot.py -f dtrace
+    """
+
+    def __init__(self, infile):
+        LineParser.__init__(self, infile)
+        self.profile = Profile()
+
+    def readline(self):
+        # Override LineParser.readline to ignore comment lines
+        while True:
+            LineParser.readline(self)
+            if self.eof():
+                break
+
+            line = self.lookahead().strip()
+            if line.startswith('CPU'):
+                # The format likes:
+                # CPU     ID                    FUNCTION:NAME
+                #   1  29684                        :tick-60s
+                # Skip next line
+                LineParser.readline(self)
+            elif not line == '':
+                break
+
+
+    def parse(self):
+        # read lookahead
+        self.readline()
+
+        profile = self.profile
+        profile[SAMPLES] = 0
+        while not self.eof():
+            self.parse_event()
+
+        # compute derived data
+        profile.validate()
+        profile.find_cycles()
+        profile.ratio(TIME_RATIO, SAMPLES)
+        profile.call_ratios(SAMPLES2)
+        if totalMethod == "callratios":
+            # Heuristic approach.  TOTAL_SAMPLES is unused.
+            profile.integrate(TOTAL_TIME_RATIO, TIME_RATIO)
+        elif totalMethod == "callstacks":
+            # Use the actual call chains for functions.
+            profile[TOTAL_SAMPLES] = profile[SAMPLES]
+            profile.ratio(TOTAL_TIME_RATIO, TOTAL_SAMPLES)
+            # Then propagate that total time to the calls.
+            for function in compat_itervalues(profile.functions):
+                for call in compat_itervalues(function.calls):
+                    if call.ratio is not None:
+                        callee = profile.functions[call.callee_id]
+                        call[TOTAL_TIME_RATIO] = call.ratio * callee[TOTAL_TIME_RATIO]
+        else:
+            assert False
+
+        return profile
+
+    def parse_event(self):
+        if self.eof():
+            return
+
+        callchain, count = self.parse_callchain()
+        if not callchain:
+            return
+
+        callee = callchain[0]
+        callee[SAMPLES] += count
+        self.profile[SAMPLES] += count
+
+        for caller in callchain[1:]:
+            try:
+                call = caller.calls[callee.id]
+            except KeyError:
+                call = Call(callee.id)
+                call[SAMPLES2] = count
+                caller.add_call(call)
+            else:
+                call[SAMPLES2] += count
+
+            callee = caller
+
+        # Increment TOTAL_SAMPLES only once on each function.
+        stack = set(callchain)
+        for function in stack:
+            function[TOTAL_SAMPLES] += count
+
+
+    def parse_callchain(self):
+        callchain = []
+        count = 0
+        while self.lookahead():
+            function, count = self.parse_call()
+            if function is None:
+                break
+            callchain.append(function)
+        return callchain, count
+
+    call_re = re.compile(r'^\s+(?P<module>.*)`(?P<symbol>.*)')
+    addr2_re = re.compile(r'\+0x[0-9a-fA-F]+$')
+
+    def parse_call(self):
+        line = self.consume()
+        mo = self.call_re.match(line)
+        if not mo:
+            # The line must be the stack count
+            return None, int(line.strip())
+
+        function_name = mo.group('symbol')
+
+        # If present, amputate program counter from function name.
+        if function_name:
+            function_name = re.sub(self.addr2_re, '', function_name)
+
+        # if not function_name or function_name == '[unknown]':
+        #     function_name = mo.group('address')
+
+        module = mo.group('module')
+
+        function_id = function_name + ':' + module
+
+        try:
+            function = self.profile.functions[function_id]
+        except KeyError:
+            function = Function(function_id, function_name)
+            function.module = os.path.basename(module)
+            function[SAMPLES] = 0
+            function[TOTAL_SAMPLES] = 0
+            self.profile.add_function(function)
+
+        return function, None
+
+formats = {
+    "axe": AXEParser,
+    "callgrind": CallgrindParser,
+    "hprof": HProfParser,
+    "json": JsonParser,
+    "oprofile": OprofileParser,
+    "perf": PerfParser,
+    "prof": GprofParser,
+    "pstats": PstatsParser,
+    "sleepy": SleepyParser,
+    "sysprof": SysprofParser,
+    "xperf": XPerfParser,
+    "dtrace": DtraceParser,
+}
+
+
+########################################################################
+# Output
+
+
+class Theme:
+
+    def __init__(self,
+            bgcolor = (0.0, 0.0, 1.0),
+            mincolor = (0.0, 0.0, 0.0),
+            maxcolor = (0.0, 0.0, 1.0),
+            fontname = "Arial",
+            fontcolor = "white",
+            nodestyle = "filled",
+            minfontsize = 10.0,
+            maxfontsize = 10.0,
+            minpenwidth = 0.5,
+            maxpenwidth = 4.0,
+            gamma = 2.2,
+            skew = 1.0):
+        self.bgcolor = bgcolor
+        self.mincolor = mincolor
+        self.maxcolor = maxcolor
+        self.fontname = fontname
+        self.fontcolor = fontcolor
+        self.nodestyle = nodestyle
+        self.minfontsize = minfontsize
+        self.maxfontsize = maxfontsize
+        self.minpenwidth = minpenwidth
+        self.maxpenwidth = maxpenwidth
+        self.gamma = gamma
+        self.skew = skew
+
+    def graph_bgcolor(self):
+        return self.hsl_to_rgb(*self.bgcolor)
+
+    def graph_fontname(self):
+        return self.fontname
+
+    def graph_fontcolor(self):
+        return self.fontcolor
+
+    def graph_fontsize(self):
+        return self.minfontsize
+
+    def node_bgcolor(self, weight):
+        return self.color(weight)
+
+    def node_fgcolor(self, weight):
+        if self.nodestyle == "filled":
+            return self.graph_bgcolor()
+        else:
+            return self.color(weight)
+
+    def node_fontsize(self, weight):
+        return self.fontsize(weight)
+
+    def node_style(self):
+        return self.nodestyle
+
+    def edge_color(self, weight):
+        return self.color(weight)
+
+    def edge_fontsize(self, weight):
+        return self.fontsize(weight)
+
+    def edge_penwidth(self, weight):
+        return max(weight*self.maxpenwidth, self.minpenwidth)
+
+    def edge_arrowsize(self, weight):
+        return 0.5 * math.sqrt(self.edge_penwidth(weight))
+
+    def fontsize(self, weight):
+        return max(weight**2 * self.maxfontsize, self.minfontsize)
+
+    def color(self, weight):
+        weight = min(max(weight, 0.0), 1.0)
+
+        hmin, smin, lmin = self.mincolor
+        hmax, smax, lmax = self.maxcolor
+
+        if self.skew < 0:
+            raise ValueError("Skew must be greater than 0")
+        elif self.skew == 1.0:
+            h = hmin + weight*(hmax - hmin)
+            s = smin + weight*(smax - smin)
+            l = lmin + weight*(lmax - lmin)
+        else:
+            base = self.skew
+            h = hmin + ((hmax-hmin)*(-1.0 + (base ** weight)) / (base - 1.0))
+            s = smin + ((smax-smin)*(-1.0 + (base ** weight)) / (base - 1.0))
+            l = lmin + ((lmax-lmin)*(-1.0 + (base ** weight)) / (base - 1.0))
+
+        return self.hsl_to_rgb(h, s, l)
+
+    def hsl_to_rgb(self, h, s, l):
+        """Convert a color from HSL color-model to RGB.
+
+        See also:
+        - http://www.w3.org/TR/css3-color/#hsl-color
+        """
+
+        h = h % 1.0
+        s = min(max(s, 0.0), 1.0)
+        l = min(max(l, 0.0), 1.0)
+
+        if l <= 0.5:
+            m2 = l*(s + 1.0)
+        else:
+            m2 = l + s - l*s
+        m1 = l*2.0 - m2
+        r = self._hue_to_rgb(m1, m2, h + 1.0/3.0)
+        g = self._hue_to_rgb(m1, m2, h)
+        b = self._hue_to_rgb(m1, m2, h - 1.0/3.0)
+
+        # Apply gamma correction
+        r **= self.gamma
+        g **= self.gamma
+        b **= self.gamma
+
+        return (r, g, b)
+
+    def _hue_to_rgb(self, m1, m2, h):
+        if h < 0.0:
+            h += 1.0
+        elif h > 1.0:
+            h -= 1.0
+        if h*6 < 1.0:
+            return m1 + (m2 - m1)*h*6.0
+        elif h*2 < 1.0:
+            return m2
+        elif h*3 < 2.0:
+            return m1 + (m2 - m1)*(2.0/3.0 - h)*6.0
+        else:
+            return m1
+
+
+TEMPERATURE_COLORMAP = Theme(
+    mincolor = (2.0/3.0, 0.80, 0.25), # dark blue
+    maxcolor = (0.0, 1.0, 0.5), # satured red
+    gamma = 1.0
+)
+
+PINK_COLORMAP = Theme(
+    mincolor = (0.0, 1.0, 0.90), # pink
+    maxcolor = (0.0, 1.0, 0.5), # satured red
+)
+
+GRAY_COLORMAP = Theme(
+    mincolor = (0.0, 0.0, 0.85), # light gray
+    maxcolor = (0.0, 0.0, 0.0), # black
+)
+
+BW_COLORMAP = Theme(
+    minfontsize = 8.0,
+    maxfontsize = 24.0,
+    mincolor = (0.0, 0.0, 0.0), # black
+    maxcolor = (0.0, 0.0, 0.0), # black
+    minpenwidth = 0.1,
+    maxpenwidth = 8.0,
+)
+
+PRINT_COLORMAP = Theme(
+    minfontsize = 18.0,
+    maxfontsize = 30.0,
+    fontcolor = "black",
+    nodestyle = "solid",
+    mincolor = (0.0, 0.0, 0.0), # black
+    maxcolor = (0.0, 0.0, 0.0), # black
+    minpenwidth = 0.1,
+    maxpenwidth = 8.0,
+)
+
+
+themes = {
+    "color": TEMPERATURE_COLORMAP,
+    "pink": PINK_COLORMAP,
+    "gray": GRAY_COLORMAP,
+    "bw": BW_COLORMAP,
+    "print": PRINT_COLORMAP,
+}
+
+
+def sorted_iteritems(d):
+    # Used mostly for result reproducibility (while testing.)
+    keys = compat_keys(d)
+    keys.sort()
+    for key in keys:
+        value = d[key]
+        yield key, value
+
+
+class DotWriter:
+    """Writer for the DOT language.
+
+    See also:
+    - "The DOT Language" specification
+      http://www.graphviz.org/doc/info/lang.html
+    """
+
+    strip = False
+    wrap = False
+
+    def __init__(self, fp):
+        self.fp = fp
+
+    def wrap_function_name(self, name):
+        """Split the function name on multiple lines."""
+
+        if len(name) > 32:
+            ratio = 2.0/3.0
+            height = max(int(len(name)/(1.0 - ratio) + 0.5), 1)
+            width = max(len(name)/height, 32)
+            # TODO: break lines in symbols
+            name = textwrap.fill(name, width, break_long_words=False)
+
+        # Take away spaces
+        name = name.replace(", ", ",")
+        name = name.replace("> >", ">>")
+        name = name.replace("> >", ">>") # catch consecutive
+
+        return name
+
+    show_function_events = [TOTAL_TIME_RATIO, TIME_RATIO]
+    show_edge_events = [TOTAL_TIME_RATIO, CALLS]
+
+    def graph(self, profile, theme):
+        self.begin_graph()
+
+        fontname = theme.graph_fontname()
+        fontcolor = theme.graph_fontcolor()
+        nodestyle = theme.node_style()
+
+        self.attr('graph', fontname=fontname, ranksep=0.25, nodesep=0.125)
+        self.attr('node', fontname=fontname, shape="box", style=nodestyle, fontcolor=fontcolor, width=0, height=0)
+        self.attr('edge', fontname=fontname)
+
+        for _, function in sorted_iteritems(profile.functions):
+            labels = []
+            if function.process is not None:
+                labels.append(function.process)
+            if function.module is not None:
+                labels.append(function.module)
+
+            if self.strip:
+                function_name = function.stripped_name()
+            else:
+                function_name = function.name
+
+            # dot can't parse quoted strings longer than YY_BUF_SIZE, which
+            # defaults to 16K. But some annotated C++ functions (e.g., boost,
+            # https://github.com/jrfonseca/gprof2dot/issues/30) can exceed that
+            MAX_FUNCTION_NAME = 4096
+            if len(function_name) >= MAX_FUNCTION_NAME:
+                sys.stderr.write('warning: truncating function name with %u chars (%s)\n' % (len(function_name), function_name[:32] + '...'))
+                function_name = function_name[:MAX_FUNCTION_NAME - 1] + unichr(0x2026)
+
+            if self.wrap:
+                function_name = self.wrap_function_name(function_name)
+            labels.append(function_name)
+
+            for event in self.show_function_events:
+                if event in function.events:
+                    label = event.format(function[event])
+                    labels.append(label)
+            if function.called is not None:
+                labels.append("%u%s" % (function.called, MULTIPLICATION_SIGN))
+
+            if function.weight is not None:
+                weight = function.weight
+            else:
+                weight = 0.0
+
+            label = '\n'.join(labels)
+            self.node(function.id,
+                label = label,
+                color = self.color(theme.node_bgcolor(weight)),
+                fontcolor = self.color(theme.node_fgcolor(weight)),
+                fontsize = "%.2f" % theme.node_fontsize(weight),
+                tooltip = function.filename,
+            )
+
+            for _, call in sorted_iteritems(function.calls):
+                callee = profile.functions[call.callee_id]
+
+                labels = []
+                for event in self.show_edge_events:
+                    if event in call.events:
+                        label = event.format(call[event])
+                        labels.append(label)
+
+                if call.weight is not None:
+                    weight = call.weight
+                elif callee.weight is not None:
+                    weight = callee.weight
+                else:
+                    weight = 0.0
+
+                label = '\n'.join(labels)
+
+                self.edge(function.id, call.callee_id,
+                    label = label,
+                    color = self.color(theme.edge_color(weight)),
+                    fontcolor = self.color(theme.edge_color(weight)),
+                    fontsize = "%.2f" % theme.edge_fontsize(weight),
+                    penwidth = "%.2f" % theme.edge_penwidth(weight),
+                    labeldistance = "%.2f" % theme.edge_penwidth(weight),
+                    arrowsize = "%.2f" % theme.edge_arrowsize(weight),
+                )
+
+        self.end_graph()
+
+    def begin_graph(self):
+        self.write('digraph {\n')
+
+    def end_graph(self):
+        self.write('}\n')
+
+    def attr(self, what, **attrs):
+        self.write("\t")
+        self.write(what)
+        self.attr_list(attrs)
+        self.write(";\n")
+
+    def node(self, node, **attrs):
+        self.write("\t")
+        self.id(node)
+        self.attr_list(attrs)
+        self.write(";\n")
+
+    def edge(self, src, dst, **attrs):
+        self.write("\t")
+        self.id(src)
+        self.write(" -> ")
+        self.id(dst)
+        self.attr_list(attrs)
+        self.write(";\n")
+
+    def attr_list(self, attrs):
+        if not attrs:
+            return
+        self.write(' [')
+        first = True
+        for name, value in sorted_iteritems(attrs):
+            if value is None:
+                continue
+            if first:
+                first = False
+            else:
+                self.write(", ")
+            self.id(name)
+            self.write('=')
+            self.id(value)
+        self.write(']')
+
+    def id(self, id):
+        if isinstance(id, (int, float)):
+            s = str(id)
+        elif isinstance(id, basestring):
+            if id.isalnum() and not id.startswith('0x'):
+                s = id
+            else:
+                s = self.escape(id)
+        else:
+            raise TypeError
+        self.write(s)
+
+    def color(self, rgb):
+        r, g, b = rgb
+
+        def float2int(f):
+            if f <= 0.0:
+                return 0
+            if f >= 1.0:
+                return 255
+            return int(255.0*f + 0.5)
+
+        return "#" + "".join(["%02x" % float2int(c) for c in (r, g, b)])
+
+    def escape(self, s):
+        if not PYTHON_3:
+            s = s.encode('utf-8')
+        s = s.replace('\\', r'\\')
+        s = s.replace('\n', r'\n')
+        s = s.replace('\t', r'\t')
+        s = s.replace('"', r'\"')
+        return '"' + s + '"'
+
+    def write(self, s):
+        self.fp.write(s)
+
+
+
+########################################################################
+# Main program
+
+
+def naturalJoin(values):
+    if len(values) >= 2:
+        return ', '.join(values[:-1]) + ' or ' + values[-1]
+
+    else:
+        return ''.join(values)
+
+
+def main(argv=sys.argv[1:]):
+    """Main program."""
+
+    global totalMethod
+
+    formatNames = list(formats.keys())
+    formatNames.sort()
+
+    themeNames = list(themes.keys())
+    themeNames.sort()
+
+    labelNames = list(labels.keys())
+    labelNames.sort()
+
+    optparser = optparse.OptionParser(
+        usage="\n\t%prog [options] [file] ...")
+    optparser.add_option(
+        '-o', '--output', metavar='FILE',
+        type="string", dest="output",
+        help="output filename [stdout]")
+    optparser.add_option(
+        '-n', '--node-thres', metavar='PERCENTAGE',
+        type="float", dest="node_thres", default=0.5,
+        help="eliminate nodes below this threshold [default: %default]")
+    optparser.add_option(
+        '-e', '--edge-thres', metavar='PERCENTAGE',
+        type="float", dest="edge_thres", default=0.1,
+        help="eliminate edges below this threshold [default: %default]")
+    optparser.add_option(
+        '-f', '--format',
+        type="choice", choices=formatNames,
+        dest="format", default="prof",
+        help="profile format: %s [default: %%default]" % naturalJoin(formatNames))
+    optparser.add_option(
+        '--total',
+        type="choice", choices=('callratios', 'callstacks'),
+        dest="totalMethod", default=totalMethod,
+        help="preferred method of calculating total time: callratios or callstacks (currently affects only perf format) [default: %default]")
+    optparser.add_option(
+        '-c', '--colormap',
+        type="choice", choices=themeNames,
+        dest="theme", default="color",
+        help="color map: %s [default: %%default]" % naturalJoin(themeNames))
+    optparser.add_option(
+        '-s', '--strip',
+        action="store_true",
+        dest="strip", default=False,
+        help="strip function parameters, template parameters, and const modifiers from demangled C++ function names")
+    optparser.add_option(
+        '--color-nodes-by-selftime',
+        action="store_true",
+        dest="color_nodes_by_selftime", default=False,
+        help="color nodes by self time, rather than by total time (sum of self and descendants)")
+    optparser.add_option(
+        '--colour-nodes-by-selftime',
+        action="store_true",
+        dest="color_nodes_by_selftime",
+        help=optparse.SUPPRESS_HELP)
+    optparser.add_option(
+        '-w', '--wrap',
+        action="store_true",
+        dest="wrap", default=False,
+        help="wrap function names")
+    optparser.add_option(
+        '--show-samples',
+        action="store_true",
+        dest="show_samples", default=False,
+        help="show function samples")
+    optparser.add_option(
+        '--node-label', metavar='MEASURE',
+        type='choice', choices=labelNames,
+        action='append',
+        dest='node_labels',
+        help="measurements to on show the node (can be specified multiple times): %s [default: %s]" % (
+            naturalJoin(labelNames), ', '.join(defaultLabelNames)))
+    # add option to show information on available entries ()
+    optparser.add_option(
+        '--list-functions',
+        type="string",
+        dest="list_functions", default=None,
+        help="""\
+list functions available for selection in -z or -l, requires selector argument
+( use '+' to select all).
+Recall that the selector argument is used with Unix/Bash globbing/pattern matching,
+and that entries are formatted '<pkg>:<linenum>:<function>'. When argument starts
+with '%', a dump of all available information is performed for selected entries,
+ after removal of leading '%'.
+""")
+    # add option to create subtree or show paths
+    optparser.add_option(
+        '-z', '--root',
+        type="string",
+        dest="root", default="",
+        help="prune call graph to show only descendants of specified root function")
+    optparser.add_option(
+        '-l', '--leaf',
+        type="string",
+        dest="leaf", default="",
+        help="prune call graph to show only ancestors of specified leaf function")
+    optparser.add_option(
+        '--depth',
+        type="int",
+        dest="depth", default=-1,
+        help="prune call graph to show only descendants or ancestors until specified depth")
+    # add a new option to control skew of the colorization curve
+    optparser.add_option(
+        '--skew',
+        type="float", dest="theme_skew", default=1.0,
+        help="skew the colorization curve.  Values < 1.0 give more variety to lower percentages.  Values > 1.0 give less variety to lower percentages")
+    # add option for filtering by file path
+    optparser.add_option(
+        '-p', '--path', action="append",
+        type="string", dest="filter_paths",
+        help="Filter all modules not in a specified path")
+    (options, args) = optparser.parse_args(argv)
+
+    if len(args) > 1 and options.format != 'pstats':
+        optparser.error('incorrect number of arguments')
+
+    try:
+        theme = themes[options.theme]
+    except KeyError:
+        optparser.error('invalid colormap \'%s\'' % options.theme)
+
+    # set skew on the theme now that it has been picked.
+    if options.theme_skew:
+        theme.skew = options.theme_skew
+
+    totalMethod = options.totalMethod
+
+    try:
+        Format = formats[options.format]
+    except KeyError:
+        optparser.error('invalid format \'%s\'' % options.format)
+
+    if Format.stdinInput:
+        if not args:
+            fp = sys.stdin
+        elif PYTHON_3:
+            fp = open(args[0], 'rt', encoding='UTF-8')
+        else:
+            fp = open(args[0], 'rt')
+        parser = Format(fp)
+    elif Format.multipleInput:
+        if not args:
+            optparser.error('at least a file must be specified for %s input' % options.format)
+        parser = Format(*args)
+    else:
+        if len(args) != 1:
+            optparser.error('exactly one file must be specified for %s input' % options.format)
+        parser = Format(args[0])
+
+    profile = parser.parse()
+
+    if options.output is None:
+        if PYTHON_3:
+            output = open(sys.stdout.fileno(), mode='wt', encoding='UTF-8', closefd=False)
+        else:
+            output = sys.stdout
+    else:
+        if PYTHON_3:
+            output = open(options.output, 'wt', encoding='UTF-8')
+        else:
+            output = open(options.output, 'wt')
+
+    dot = DotWriter(output)
+    dot.strip = options.strip
+    dot.wrap = options.wrap
+
+    labelNames = options.node_labels or defaultLabelNames
+    dot.show_function_events = [labels[l] for l in labelNames]
+    if options.show_samples:
+        dot.show_function_events.append(SAMPLES)
+
+    profile = profile
+    profile.prune(options.node_thres/100.0, options.edge_thres/100.0, options.filter_paths, options.color_nodes_by_selftime)
+
+    if options.list_functions:
+        profile.printFunctionIds(selector=options.list_functions)
+        sys.exit(0)
+
+    if options.root:
+        rootIds = profile.getFunctionIds(options.root)
+        if not rootIds:
+            sys.stderr.write('root node ' + options.root + ' not found (might already be pruned : try -e0 -n0 flags)\n')
+            sys.exit(1)
+        profile.prune_root(rootIds, options.depth)
+    if options.leaf:
+        leafIds = profile.getFunctionIds(options.leaf)
+        if not leafIds:
+            sys.stderr.write('leaf node ' + options.leaf + ' not found (maybe already pruned : try -e0 -n0 flags)\n')
+            sys.exit(1)
+        profile.prune_leaf(leafIds, options.depth)
+
+    dot.graph(profile, theme)
+
+
+if __name__ == '__main__':
+    main()
diff --git a/proj.py b/proj.py
index de09db79..c967cdfb 100755
--- a/proj.py
+++ b/proj.py
@@ -66,8 +66,15 @@ def build():
     _run('python3', 'setup.py', 'build_ext', '--inplace')
 
 
+@command
+def build_fuzzing():
+    _run('python3', 'setup.py', 'build_ext', '--inplace',
+        env={'TEST_QUESTDB_FUZZING': '1'})
+
+
 @command
 def test(all=False, patch_path='1', *args):
+    _run('cargo', 'test', cwd=PROJ_ROOT / 'pystr-to-utf8')
     env = {'TEST_QUESTDB_PATCH_PATH': patch_path}
     if _arg2bool(all):
         env['TEST_QUESTDB_INTEGRATION'] = '1'
@@ -75,6 +82,74 @@ def test(all=False, patch_path='1', *args):
          env=env)
 
 
+@command
+def test_fuzzing(*args):
+    import atheris
+    import pathlib
+    lib_path = pathlib.Path(atheris.path()) / 'asan_with_fuzzer.so'
+    if not lib_path.exists():
+        sys.stderr.write(f'WARNING: {lib_path} not found\n')
+        sys.exit(42)
+    ld_preload = os.environ.get('LD_PRELOAD', '')
+    if ld_preload:
+        ld_preload += ':'
+    ld_preload += str(lib_path)
+    cmd = [
+        'python3',
+        'test/test_dataframe_fuzz.py'] + list(args)
+    if not args:
+        cmd.extend([
+            '-detect_leaks=0',
+            '-rss_limit_mb=32768',
+            '-artifact_prefix=fuzz-artifact/',
+            '-create_missing_dirs=1'])
+    _run(*cmd, env={'LD_PRELOAD': ld_preload})
+
+
+@command
+def benchmark(*args):
+    env = {'TEST_QUESTDB_PATCH_PATH': '1'}
+    _run('python3', 'test/benchmark.py', '-v', *args, env=env)
+
+
+@command
+def gdb_test(*args):
+    env = {'TEST_QUESTDB_PATCH_PATH': '1'}
+    _run('gdb', '-ex', 'r', '--args', 'python3', 'test/test.py', '-v', *args,
+         env=env)
+
+
+@command
+def rr_test(*args):
+    """
+    Linux-only reverse debugger.
+    https://github.com/rr-debugger/rr
+    https://www.youtube.com/watch?v=61kD3x4Pu8I
+
+    Install rr:
+    $ sudo apt install rr
+    $ sudo vim /proc/sys/kernel/perf_event_paranoid  # set to -1
+    """
+    env = {'TEST_QUESTDB_PATCH_PATH': '1'}
+    try:
+        _run('rr', 'record', 'python3', 'test/test.py', '-v', *args,
+             env=env)
+    finally:
+        sys.stdout.flush()
+        sys.stderr.flush()
+        red = '\033[01;31m'
+        reset = '\033[0m'
+        sys.stderr.write(f'''\n{red}
+            Now first re-run marking stdout/stderr events with a unique ID:
+                $ rr -M replay -a
+                
+            Then re-run inside GDB, running up to a specific event:
+                $ rr replay -g $EVENT_ID
+                (rr) break ingress.c:9999
+                (rr) continue  # or step, next, etc.{reset}\n\n''')
+    
+
+
 @command
 def doc(http_serve=False, port=None):
     _run('python3', '-m', 'sphinx.cmd.build',
@@ -112,6 +187,17 @@ def cibuildwheel(*args):
          *args)
 
 
+@command
+def repl(*args):
+    _run('python3', env={'PYTHONPATH': str(PROJ_ROOT / 'src')})
+
+
+@command
+def example(name, *args):
+    _run('python3', 'examples/' + name + '.py', *args,
+         env={'PYTHONPATH': str(PROJ_ROOT / 'src')})
+
+
 @command
 def cw(*args):
     cibuildwheel(args)
@@ -128,6 +214,7 @@ def clean():
     _rmtree(PROJ_ROOT / 'dist')
     _rmtree(PROJ_ROOT / 'c-questdb-client' / 'questdb-rs-ffi' / 'target')
     _rmtree(PROJ_ROOT / 'c-questdb-client' / 'build')
+    _rmtree(PROJ_ROOT / 'pystr-to-utf8' / 'target')
     _rmtree(PROJ_ROOT / 'src' / 'questdb.egg-info')
     _rmtree(PROJ_ROOT / 'venv')
     _rmtree(PROJ_ROOT / 'wheelhouse')
diff --git a/pyproject.toml b/pyproject.toml
index e01713a6..c7cb32e8 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -51,6 +51,7 @@ requires = [
 # See: https://cibuildwheel.readthedocs.io/en/stable/options/#configuration-file
 build-verbosity = "3"
 before-build = "python {project}/install_rust.py"
+before-test = "python {project}/ci/pip_install_deps.py"
 test-command = "python {project}/test/test.py -v"
 skip = [
     # No 32-bit musl C native tool chain for Rust.
diff --git a/pystr-to-utf8/Cargo.lock b/pystr-to-utf8/Cargo.lock
new file mode 100644
index 00000000..5210f8ca
--- /dev/null
+++ b/pystr-to-utf8/Cargo.lock
@@ -0,0 +1,242 @@
+# This file is automatically @generated by Cargo.
+# It is not intended for manual editing.
+version = 3
+
+[[package]]
+name = "autocfg"
+version = "1.1.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "d468802bab17cbc0cc575e9b053f41e72aa36bfa6b7f55e3529ffa43161b97fa"
+
+[[package]]
+name = "bitflags"
+version = "1.3.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "bef38d45163c2f1dde094a7dfd33ccf595c92905c8f8f4fdc18d06fb1037718a"
+
+[[package]]
+name = "cbindgen"
+version = "0.24.3"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "a6358dedf60f4d9b8db43ad187391afe959746101346fe51bb978126bec61dfb"
+dependencies = [
+ "heck",
+ "indexmap",
+ "log",
+ "proc-macro2",
+ "quote",
+ "serde",
+ "serde_json",
+ "syn",
+ "tempfile",
+ "toml",
+]
+
+[[package]]
+name = "cfg-if"
+version = "1.0.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd"
+
+[[package]]
+name = "fastrand"
+version = "1.8.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "a7a407cfaa3385c4ae6b23e84623d48c2798d06e3e6a1878f7f59f17b3f86499"
+dependencies = [
+ "instant",
+]
+
+[[package]]
+name = "hashbrown"
+version = "0.12.3"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "8a9ee70c43aaf417c914396645a0fa852624801b24ebb7ae78fe8272889ac888"
+
+[[package]]
+name = "heck"
+version = "0.4.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "2540771e65fc8cb83cd6e8a237f70c319bd5c29f78ed1084ba5d50eeac86f7f9"
+
+[[package]]
+name = "indexmap"
+version = "1.9.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "10a35a97730320ffe8e2d410b5d3b69279b98d2c14bdb8b70ea89ecf7888d41e"
+dependencies = [
+ "autocfg",
+ "hashbrown",
+]
+
+[[package]]
+name = "instant"
+version = "0.1.12"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "7a5bbe824c507c5da5956355e86a746d82e0e1464f65d862cc5e71da70e94b2c"
+dependencies = [
+ "cfg-if",
+]
+
+[[package]]
+name = "itoa"
+version = "1.0.4"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "4217ad341ebadf8d8e724e264f13e593e0648f5b3e94b3896a5df283be015ecc"
+
+[[package]]
+name = "libc"
+version = "0.2.137"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "fc7fcc620a3bff7cdd7a365be3376c97191aeaccc2a603e600951e452615bf89"
+
+[[package]]
+name = "log"
+version = "0.4.17"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "abb12e687cfb44aa40f41fc3978ef76448f9b6038cad6aef4259d3c095a2382e"
+dependencies = [
+ "cfg-if",
+]
+
+[[package]]
+name = "proc-macro2"
+version = "1.0.47"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "5ea3d908b0e36316caf9e9e2c4625cdde190a7e6f440d794667ed17a1855e725"
+dependencies = [
+ "unicode-ident",
+]
+
+[[package]]
+name = "pystr-to-utf8"
+version = "0.1.0"
+dependencies = [
+ "cbindgen",
+]
+
+[[package]]
+name = "quote"
+version = "1.0.21"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "bbe448f377a7d6961e30f5955f9b8d106c3f5e449d493ee1b125c1d43c2b5179"
+dependencies = [
+ "proc-macro2",
+]
+
+[[package]]
+name = "redox_syscall"
+version = "0.2.16"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "fb5a58c1855b4b6819d59012155603f0b22ad30cad752600aadfcb695265519a"
+dependencies = [
+ "bitflags",
+]
+
+[[package]]
+name = "remove_dir_all"
+version = "0.5.3"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "3acd125665422973a33ac9d3dd2df85edad0f4ae9b00dafb1a05e43a9f5ef8e7"
+dependencies = [
+ "winapi",
+]
+
+[[package]]
+name = "ryu"
+version = "1.0.11"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "4501abdff3ae82a1c1b477a17252eb69cee9e66eb915c1abaa4f44d873df9f09"
+
+[[package]]
+name = "serde"
+version = "1.0.147"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "d193d69bae983fc11a79df82342761dfbf28a99fc8d203dca4c3c1b590948965"
+dependencies = [
+ "serde_derive",
+]
+
+[[package]]
+name = "serde_derive"
+version = "1.0.147"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "4f1d362ca8fc9c3e3a7484440752472d68a6caa98f1ab81d99b5dfe517cec852"
+dependencies = [
+ "proc-macro2",
+ "quote",
+ "syn",
+]
+
+[[package]]
+name = "serde_json"
+version = "1.0.87"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "6ce777b7b150d76b9cf60d28b55f5847135a003f7d7350c6be7a773508ce7d45"
+dependencies = [
+ "itoa",
+ "ryu",
+ "serde",
+]
+
+[[package]]
+name = "syn"
+version = "1.0.103"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "a864042229133ada95abf3b54fdc62ef5ccabe9515b64717bcb9a1919e59445d"
+dependencies = [
+ "proc-macro2",
+ "quote",
+ "unicode-ident",
+]
+
+[[package]]
+name = "tempfile"
+version = "3.3.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "5cdb1ef4eaeeaddc8fbd371e5017057064af0911902ef36b39801f67cc6d79e4"
+dependencies = [
+ "cfg-if",
+ "fastrand",
+ "libc",
+ "redox_syscall",
+ "remove_dir_all",
+ "winapi",
+]
+
+[[package]]
+name = "toml"
+version = "0.5.9"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "8d82e1a7758622a465f8cee077614c73484dac5b836c02ff6a40d5d1010324d7"
+dependencies = [
+ "serde",
+]
+
+[[package]]
+name = "unicode-ident"
+version = "1.0.5"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "6ceab39d59e4c9499d4e5a8ee0e2735b891bb7308ac83dfb4e80cad195c9f6f3"
+
+[[package]]
+name = "winapi"
+version = "0.3.9"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "5c839a674fcd7a98952e593242ea400abe93992746761e38641405d28b00f419"
+dependencies = [
+ "winapi-i686-pc-windows-gnu",
+ "winapi-x86_64-pc-windows-gnu",
+]
+
+[[package]]
+name = "winapi-i686-pc-windows-gnu"
+version = "0.4.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "ac3b87c63620426dd9b991e5ce0329eff545bccbbb34f3be09ff6fb6ab51b7b6"
+
+[[package]]
+name = "winapi-x86_64-pc-windows-gnu"
+version = "0.4.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "712e227841d057c1ee1cd2fb22fa7e5a5461ae8e48fa2ca79ec42cfc1931183f"
diff --git a/pystr-to-utf8/Cargo.toml b/pystr-to-utf8/Cargo.toml
new file mode 100644
index 00000000..4eb8f445
--- /dev/null
+++ b/pystr-to-utf8/Cargo.toml
@@ -0,0 +1,12 @@
+[package]
+name = "pystr-to-utf8"
+version = "0.1.0"
+edition = "2021"
+publish = false
+
+[lib]
+name = "pystr_to_utf8"
+crate-type = ["staticlib"]
+
+[build-dependencies]
+cbindgen = { version = "0.24.3", optional = true, default-features = false }
\ No newline at end of file
diff --git a/pystr-to-utf8/README.md b/pystr-to-utf8/README.md
new file mode 100644
index 00000000..607f98b1
--- /dev/null
+++ b/pystr-to-utf8/README.md
@@ -0,0 +1,11 @@
+By default, when compiling, we don't re-generate the `.h` and `.pxd` files.
+This is to speed up compile time.
+
+If you've updated the API, regenerate them by running:
+
+```
+$ cargo clean
+$ cargo build --features cbindgen
+```
+
+Then make sure to commit the updated generated files.
diff --git a/pystr-to-utf8/build.rs b/pystr-to-utf8/build.rs
new file mode 100644
index 00000000..a93cb918
--- /dev/null
+++ b/pystr-to-utf8/build.rs
@@ -0,0 +1,51 @@
+#[cfg(feature = "cbindgen")]
+extern crate cbindgen;
+
+#[cfg(feature = "cbindgen")]
+const BAD_PXD: &str = "
+cdef extern from *:
+  ctypedef bint bool
+  ctypedef struct va_list";
+
+#[cfg(feature = "cbindgen")]
+fn main() -> Result<(), Box<dyn std::error::Error>> {
+    let crate_dir = std::env::var("CARGO_MANIFEST_DIR")?;
+    let bindings = cbindgen::generate(&crate_dir)?;
+    bindings.write_to_file("include/pystr_to_utf8.h");
+
+    let config = cbindgen::Config {
+        language: cbindgen::Language::Cython,
+        documentation: true,
+        cython: cbindgen::CythonConfig {
+            header: Some("\"pystr_to_utf8.h\"".to_owned()),
+            cimports: std::collections::BTreeMap::new()},
+        usize_is_size_t: true,
+        ..Default::default()
+    };
+
+    let bindings = cbindgen::Builder::new()
+        .with_crate(&crate_dir)
+        .with_config(config)
+        .generate()?;
+
+    // Instead of just writing out the file:
+    //     bindings.write_to_file("include/pystr_to_utf8.pxd");
+    // We need to do some post-processing to make it work our code.
+    // The default output is too opinionated and has unwanted typedefs.
+    let mut pxd = Vec::new();
+    bindings.write(&mut pxd);
+    let pxd = String::from_utf8(pxd)?;
+    if !pxd.contains(BAD_PXD) {
+        panic!("cbindgen generated unexpected pxd: {}", pxd);
+    }
+    let pxd = pxd.replace(BAD_PXD, "");
+    let pxd = pxd.replace("bool", "bint");
+    let pxd = pxd.replace(";", "");
+    // println!("{}", &pxd);
+    std::fs::write("../src/questdb/pystr_to_utf8.pxd", &pxd)?;
+    Ok(())
+}
+
+#[cfg(not(feature = "cbindgen"))]
+fn main() {}
+
diff --git a/pystr-to-utf8/cbindgen.toml b/pystr-to-utf8/cbindgen.toml
new file mode 100644
index 00000000..d9f4e5d0
--- /dev/null
+++ b/pystr-to-utf8/cbindgen.toml
@@ -0,0 +1,59 @@
+language = "C"
+
+header = """/*******************************************************************************
+ *     ___                  _   ____  ____
+ *    / _ \\ _   _  ___  ___| |_|  _ \\| __ )
+ *   | | | | | | |/ _ \\/ __| __| | | |  _ \\
+ *   | |_| | |_| |  __/\\__ \\ |_| |_| | |_) |
+ *    \\__\\_\\\\__,_|\\___||___/\\__|____/|____/
+ *
+ *  Copyright (c) 2014-2019 Appsicle
+ *  Copyright (c) 2019-2022 QuestDB
+ *
+ *  Licensed under the Apache License, Version 2.0 (the "License");
+ *  you may not use this file except in compliance with the License.
+ *  You may obtain a copy of the License at
+ *
+ *  http://www.apache.org/licenses/LICENSE-2.0
+ *
+ *  Unless required by applicable law or agreed to in writing, software
+ *  distributed under the License is distributed on an "AS IS" BASIS,
+ *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ *  See the License for the specific language governing permissions and
+ *  limitations under the License.
+ *
+ ******************************************************************************/"""
+
+pragma_once = true
+
+autogen_warning = "// This header is auto-generated. Do not edit directly!"
+
+# A list of sys headers to #include (with angle brackets)
+# default: []
+sys_includes = ["stdint.h", "stddef.h", "stdbool.h"]
+
+# A list of headers to #include (with quotes)
+# default: []
+includes = []  # ["my_great_lib.h"]
+
+# Suppress cbindgen's default includes.
+no_includes = true
+
+# #ifdef __cplusplus
+# extern "C" {
+# #endif // __cplusplus
+cpp_compat = true
+
+# Code Style Options
+braces = "NextLine"
+line_length = 79
+tab_width = 4
+documentation = true
+documentation_style = "doxy"
+
+# Codegen Options
+style = "both"
+usize_is_size_t = true
+
+[fn]
+args = "vertical"
diff --git a/pystr-to-utf8/include/pystr_to_utf8.h b/pystr-to-utf8/include/pystr_to_utf8.h
new file mode 100644
index 00000000..cf783284
--- /dev/null
+++ b/pystr-to-utf8/include/pystr_to_utf8.h
@@ -0,0 +1,113 @@
+/*******************************************************************************
+ *     ___                  _   ____  ____
+ *    / _ \ _   _  ___  ___| |_|  _ \| __ )
+ *   | | | | | | |/ _ \/ __| __| | | |  _ \
+ *   | |_| | |_| |  __/\__ \ |_| |_| | |_) |
+ *    \__\_\\__,_|\___||___/\__|____/|____/
+ *
+ *  Copyright (c) 2014-2019 Appsicle
+ *  Copyright (c) 2019-2022 QuestDB
+ *
+ *  Licensed under the Apache License, Version 2.0 (the "License");
+ *  you may not use this file except in compliance with the License.
+ *  You may obtain a copy of the License at
+ *
+ *  http://www.apache.org/licenses/LICENSE-2.0
+ *
+ *  Unless required by applicable law or agreed to in writing, software
+ *  distributed under the License is distributed on an "AS IS" BASIS,
+ *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ *  See the License for the specific language governing permissions and
+ *  limitations under the License.
+ *
+ ******************************************************************************/
+
+#pragma once
+
+// This header is auto-generated. Do not edit directly!
+
+#include <stdint.h>
+#include <stddef.h>
+#include <stdbool.h>
+
+typedef struct qdb_pystr_buf qdb_pystr_buf;
+
+typedef struct qdb_pystr_pos
+{
+    size_t chain;
+    size_t string;
+} qdb_pystr_pos;
+
+#ifdef __cplusplus
+extern "C" {
+#endif // __cplusplus
+
+/**
+ * Prepare a new buffer. The buffer must be freed with `qdb_pystr_free`.
+ * The `qdb_ucsX_to_utf8` functions will write to this buffer.
+ */
+struct qdb_pystr_buf *qdb_pystr_buf_new(void);
+
+/**
+ * Get current position. Use in conjunction with `truncate`.
+ */
+struct qdb_pystr_pos qdb_pystr_buf_tell(const struct qdb_pystr_buf *b);
+
+/**
+ * Trim the buffer to the given position. Use in conjunction with `tell`.
+ */
+void qdb_pystr_buf_truncate(struct qdb_pystr_buf *b,
+                            struct qdb_pystr_pos pos);
+
+/**
+ * Reset the converter's buffer to zero length.
+ */
+void qdb_pystr_buf_clear(struct qdb_pystr_buf *b);
+
+/**
+ * Free the buffer. Must be called after `qdb_pystr_buf_new`.
+ */
+void qdb_pystr_buf_free(struct qdb_pystr_buf *b);
+
+/**
+ * Convert a Py_UCS1 string to UTF-8.
+ * Returns a `buf_out` borrowed ptr of `size_out` len.
+ * The buffer is borrowed from `b`.
+ */
+void qdb_ucs1_to_utf8(struct qdb_pystr_buf *b,
+                      size_t count,
+                      const uint8_t *input,
+                      size_t *size_out,
+                      const char **buf_out);
+
+/**
+ * Convert a Py_UCS2 string to UTF-8.
+ * Returns a `buf_out` borrowed ptr of `size_out` len.
+ * The buffer is borrowed from `b`.
+ * In case of errors, returns `false` and bad_codepoint_out is set to the
+ * offending codepoint.
+ */
+bool qdb_ucs2_to_utf8(struct qdb_pystr_buf *b,
+                      size_t count,
+                      const uint16_t *input,
+                      size_t *size_out,
+                      const char **buf_out,
+                      uint32_t *bad_codepoint_out);
+
+/**
+ * Convert a Py_UCS4 string to UTF-8.
+ * Returns a `buf_out` borrowed ptr of `size_out` len.
+ * The buffer is borrowed from `b`.
+ * In case of errors, returns `false` and bad_codepoint_out is set to the
+ * offending codepoint.
+ */
+bool qdb_ucs4_to_utf8(struct qdb_pystr_buf *b,
+                      size_t count,
+                      const uint32_t *input,
+                      size_t *size_out,
+                      const char **buf_out,
+                      uint32_t *bad_codepoint_out);
+
+#ifdef __cplusplus
+} // extern "C"
+#endif // __cplusplus
diff --git a/pystr-to-utf8/src/lib.rs b/pystr-to-utf8/src/lib.rs
new file mode 100644
index 00000000..aa58ac7c
--- /dev/null
+++ b/pystr-to-utf8/src/lib.rs
@@ -0,0 +1,286 @@
+/*******************************************************************************
+ *     ___                  _   ____  ____
+ *    / _ \ _   _  ___  ___| |_|  _ \| __ )
+ *   | | | | | | |/ _ \/ __| __| | | |  _ \
+ *   | |_| | |_| |  __/\__ \ |_| |_| | |_) |
+ *    \__\_\\__,_|\___||___/\__|____/|____/
+ *
+ *  Copyright (c) 2014-2019 Appsicle
+ *  Copyright (c) 2019-2022 QuestDB
+ *
+ *  Licensed under the Apache License, Version 2.0 (the "License");
+ *  you may not use this file except in compliance with the License.
+ *  You may obtain a copy of the License at
+ *
+ *  http://www.apache.org/licenses/LICENSE-2.0
+ *
+ *  Unless required by applicable law or agreed to in writing, software
+ *  distributed under the License is distributed on an "AS IS" BASIS,
+ *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ *  See the License for the specific language governing permissions and
+ *  limitations under the License.
+ *
+ ******************************************************************************/
+
+use std::ffi::c_char;
+use std::slice::from_raw_parts;
+
+#[allow(non_camel_case_types)]
+pub struct qdb_pystr_buf(Vec<String>);
+
+#[repr(C)]
+#[allow(non_camel_case_types)]
+#[derive(Debug, Clone, Copy, PartialEq, Eq)]
+pub struct qdb_pystr_pos {
+    pub chain: usize,
+    pub string: usize
+}
+
+/// Prepare a new buffer. The buffer must be freed with `qdb_pystr_free`.
+/// The `qdb_ucsX_to_utf8` functions will write to this buffer.
+#[no_mangle]
+pub unsafe extern "C" fn qdb_pystr_buf_new() -> *mut qdb_pystr_buf {
+    Box::into_raw(Box::new(qdb_pystr_buf(Vec::new())))
+}
+
+/// Get current position. Use in conjunction with `truncate`.
+#[no_mangle]
+pub unsafe extern "C" fn qdb_pystr_buf_tell(
+        b: *const qdb_pystr_buf) -> qdb_pystr_pos {
+    let b = &*b;
+    let chain_pos = b.0.len();
+    let string_pos = if chain_pos > 0 {
+            b.0[chain_pos - 1].len()
+        } else {
+            0
+        };
+    qdb_pystr_pos { chain: chain_pos, string: string_pos }
+}
+
+/// Trim the buffer to the given position. Use in conjunction with `tell`.
+#[no_mangle]
+pub unsafe extern "C" fn qdb_pystr_buf_truncate(
+        b: *mut qdb_pystr_buf, pos: qdb_pystr_pos) {
+    let b = &mut *b;
+    b.0.truncate(pos.chain);
+    if !b.0.is_empty() {
+        b.0[pos.chain - 1].truncate(pos.string);
+    }
+}
+
+/// Reset the converter's buffer to zero length.
+#[no_mangle]
+pub unsafe extern "C" fn qdb_pystr_buf_clear(b: *mut qdb_pystr_buf) {
+    let b = &mut *b;
+    if !b.0.is_empty() {
+        b.0.truncate(1);
+        b.0[0].clear();
+    }
+}
+
+/// Free the buffer. Must be called after `qdb_pystr_buf_new`.
+#[no_mangle]
+pub unsafe extern "C" fn qdb_pystr_buf_free(b: *mut qdb_pystr_buf) {
+    if !b.is_null() {
+        drop(Box::from_raw(b));
+    }
+}
+
+const MIN_BUF_LEN: usize = 1024;
+
+/// A carefully crafted buffer with spare capacity for `len` bytes.
+/// This is necessary to return "stable" addresses and avoid segfaults.
+/// Rust is unaware we are borrowing its memory and could try to free it as
+/// part of a reallocation if we were to use a `String` directly.
+fn get_dest(chain: &mut Vec<String>, len: usize) -> &mut String {
+    if !chain.is_empty() {
+        let last = chain.last_mut().unwrap();
+        if last.capacity() - last.len() >= len {
+            return chain.last_mut().unwrap();
+        }
+    }
+    chain.push(String::with_capacity(std::cmp::max(len, MIN_BUF_LEN)));
+    chain.last_mut().unwrap()
+}
+
+#[inline(always)]
+fn encode_loop<'a, 'b, T, F>(
+    utf8_mult: usize,
+    chain: &'a mut Vec<String>,
+    buf: &'b [T],
+    get_char: F) -> Result<&'a str, u32>
+        where
+            F: Fn(T) -> Option<char>,
+            T: Copy + Into<u32>
+{
+    let dest = get_dest(chain, utf8_mult * buf.len());
+    let last = dest.len();
+    // for &b in buf.iter() {
+    //     // Checking for validity is not optional:
+    //     // >>> for n in range(2 ** 16):
+    //     // >>>     chr(n).encode('utf-8')
+    //     // UnicodeEncodeError: 'utf-8' codec can't encode character '\ud800'
+    //     //   in position 0: surrogates not allowed
+    //     match get_char(b) {
+    //         Some(c) => dest.push(c),
+    //         None => {
+    //             dest.truncate(last);
+    //             return Err(b.into());
+    //         }
+    //     }
+    // }
+    // Ok(&dest[last..])
+    unsafe {
+        let v = dest.as_mut_vec();
+        v.set_len(v.capacity());
+        let mut index = last;
+        
+        for &b in buf.iter() {
+            let c = match get_char(b) {
+                Some(c) => c,
+                None => {
+                    v.set_len(last);
+                    return Err(b.into())
+                }
+            };
+            let utf_c_len = c.len_utf8();
+            match utf_c_len {
+                1 => {
+                    v[index] = c as u8;
+                },
+                2 => {
+                    let mut codepoint_buf = [0; 4];
+                    let bytes = c
+                        .encode_utf8(&mut codepoint_buf).as_bytes();
+                    *v.get_unchecked_mut(index) =
+                        *bytes.get_unchecked(0);
+                    *v.get_unchecked_mut(index + 1) =
+                        *bytes.get_unchecked(1);
+                },
+                3 => {
+                    let mut codepoint_buf = [0; 4];
+                    let bytes = c
+                        .encode_utf8(&mut codepoint_buf).as_bytes();
+                    *v.get_unchecked_mut(index) =
+                        *bytes.get_unchecked(0);
+                    *v.get_unchecked_mut(index + 1) =
+                        *bytes.get_unchecked(1);
+                    *v.get_unchecked_mut(index + 2) =
+                        *bytes.get_unchecked(2);
+                },
+                4 => {
+                    let mut codepoint_buf = [0; 4];
+                    let bytes = c
+                        .encode_utf8(&mut codepoint_buf).as_bytes();
+                    *v.get_unchecked_mut(index) =
+                        *bytes.get_unchecked(0);
+                    *v.get_unchecked_mut(index + 1) =
+                        *bytes.get_unchecked(1);
+                    *v.get_unchecked_mut(index + 2) =
+                        *bytes.get_unchecked(2);
+                    *v.get_unchecked_mut(index + 3) =
+                        *bytes.get_unchecked(3);
+                },
+                _ => unreachable!()
+            }
+            index += utf_c_len;
+        }
+        v.set_len(index);
+    }
+    Ok(&dest[last..])
+}
+
+/// Convert a Py_UCS1 string to UTF-8.
+/// Returns a `buf_out` borrowed ptr of `size_out` len.
+/// The buffer is borrowed from `b`.
+#[no_mangle]
+pub unsafe extern "C" fn qdb_ucs1_to_utf8(
+        b: *mut qdb_pystr_buf,
+        count: usize, input: *const u8,
+        size_out: *mut usize, buf_out: *mut *const c_char) {
+    let b = &mut *b;
+    let i = from_raw_parts(input, count);
+
+    // len(chr(2 ** 8 - 1).encode('utf-8')) == 2
+    let utf8_mult = 2;
+    let res = encode_loop(
+        utf8_mult,
+        &mut b.0,
+        i,
+        |c| Some(c as char)).unwrap();
+    *size_out = res.len();
+    *buf_out = res.as_ptr() as *const c_char;
+}
+
+/// Convert a Py_UCS2 string to UTF-8.
+/// Returns a `buf_out` borrowed ptr of `size_out` len.
+/// The buffer is borrowed from `b`.
+/// In case of errors, returns `false` and bad_codepoint_out is set to the
+/// offending codepoint.
+#[no_mangle]
+pub unsafe extern "C" fn qdb_ucs2_to_utf8(b: *mut qdb_pystr_buf,
+        count: usize,
+        input: *const u16,
+        size_out: *mut usize,
+        buf_out: *mut *const c_char,
+        bad_codepoint_out: *mut u32) -> bool {
+    let b = &mut *b;
+    let i = from_raw_parts(input, count);
+
+    // len(chr(2 ** 16 - 1).encode('utf-8')) == 3
+    let utf8_mult = 3;
+    let res = encode_loop(
+        utf8_mult,
+        &mut b.0,
+        i,
+        |c| char::from_u32(c as u32));
+    match res {
+        Ok(s) => {
+            *size_out = s.len();
+            *buf_out = s.as_ptr() as *const c_char;
+            true
+        }
+        Err(bad) => {
+            *bad_codepoint_out = bad;
+            false
+        }
+    }
+}
+
+/// Convert a Py_UCS4 string to UTF-8.
+/// Returns a `buf_out` borrowed ptr of `size_out` len.
+/// The buffer is borrowed from `b`.
+/// In case of errors, returns `false` and bad_codepoint_out is set to the
+/// offending codepoint.
+#[no_mangle]
+pub unsafe extern "C" fn qdb_ucs4_to_utf8(b: *mut qdb_pystr_buf,
+        count: usize,
+        input: *const u32,
+        size_out: *mut usize,
+        buf_out: *mut *const c_char,
+        bad_codepoint_out: *mut u32) -> bool {
+    let b = &mut *b;
+    let i = from_raw_parts(input, count);
+
+    // Max 4 bytes allowed by RFC: https://www.rfc-editor.org/rfc/rfc3629#page-4
+    let utf8_mult = 4;
+    let res = encode_loop(
+        utf8_mult,
+        &mut b.0,
+        i,
+        |c| char::from_u32(c));
+    match res {
+        Ok(s) => {
+            *size_out = s.len();
+            *buf_out = s.as_ptr() as *const c_char;
+            true
+        }
+        Err(bad) => {
+            *bad_codepoint_out = bad;
+            false
+        }
+    }
+}
+
+#[cfg(test)]
+mod tests;
diff --git a/pystr-to-utf8/src/tests.rs b/pystr-to-utf8/src/tests.rs
new file mode 100644
index 00000000..68da613e
--- /dev/null
+++ b/pystr-to-utf8/src/tests.rs
@@ -0,0 +1,315 @@
+use super::*;
+
+struct Buf {
+    buf: *mut qdb_pystr_buf,
+}
+
+impl Buf {
+    fn new() -> Self {
+        Self {
+            buf: unsafe { qdb_pystr_buf_new() },
+        }
+    }
+
+    fn chain(&self) -> &Vec<String> {
+        unsafe { &(*self.buf).0 }
+    }
+
+    fn chain_mut(&mut self) -> &mut Vec<String> {
+        unsafe { &mut (*self.buf).0 }
+    }
+
+    fn clear(&mut self) {
+        unsafe { qdb_pystr_buf_clear(self.buf) }
+    }
+
+    fn tell(&self) -> qdb_pystr_pos {
+        unsafe { qdb_pystr_buf_tell(self.buf) }
+    }
+
+    fn truncate(&mut self, pos: qdb_pystr_pos) {
+        unsafe { qdb_pystr_buf_truncate(self.buf, pos) }
+    }
+
+    fn ucs1_to_utf8(&mut self, input: &[u8]) -> &'static str {
+        let mut size_out = 0;
+        let mut buf_out = std::ptr::null();
+        unsafe {
+            qdb_ucs1_to_utf8(
+                self.buf,
+                input.len(),
+                input.as_ptr(),
+                &mut size_out,
+                &mut buf_out);
+        }
+        let slice = unsafe {
+            from_raw_parts(buf_out as *const u8, size_out) };
+        std::str::from_utf8(slice).unwrap()
+    }
+
+    fn ucs2_to_utf8(&mut self, input: &[u16]) -> Result<&'static str, u32> {
+        let mut size_out = 0;
+        let mut buf_out = std::ptr::null();
+        let mut bad_codepoint = 0u32;
+        let ok = unsafe {
+                qdb_ucs2_to_utf8(
+                    self.buf,
+                    input.len(),
+                    input.as_ptr(),
+                    &mut size_out,
+                    &mut buf_out,
+                    &mut bad_codepoint)
+            };
+        if ok {
+            let slice = unsafe {
+                from_raw_parts(buf_out as *const u8, size_out) };
+            let msg = std::str::from_utf8(slice).unwrap();
+            Ok(msg)
+        } else {
+            Err(bad_codepoint)
+        }
+    }
+
+    fn ucs4_to_utf8(&mut self, input: &[u32]) -> Result<&'static str, u32> {
+        let mut size_out = 0;
+        let mut buf_out = std::ptr::null();
+        let mut bad_codepoint = 0u32;
+        let ok = unsafe {
+                qdb_ucs4_to_utf8(
+                    self.buf,
+                    input.len(),
+                    input.as_ptr(),
+                    &mut size_out,
+                    &mut buf_out,
+                    &mut bad_codepoint)
+            };
+        if ok {
+            let slice = unsafe {
+                from_raw_parts(buf_out as *const u8, size_out) };
+            let msg = std::str::from_utf8(slice).unwrap();
+            Ok(msg)
+        } else {
+            Err(bad_codepoint)
+        }
+    }
+}
+
+impl Drop for Buf {
+    fn drop(&mut self) {
+        unsafe {
+            qdb_pystr_buf_free(self.buf);
+        }
+    }
+}
+
+#[test]
+fn test_empty() {
+    let b = Buf::new();
+    assert_eq!(b.chain().len(), 0);
+    let pos = b.tell();
+    assert_eq!(pos.chain, 0);
+    assert_eq!(pos.string, 0);
+}
+
+#[test]
+fn test_ucs1() {
+    let mut b = Buf::new();
+    let s1 = b.ucs1_to_utf8(b"hello");
+    assert_eq!(s1, "hello");
+    assert_eq!(b.chain_mut().len(), 1);
+    assert_eq!(b.chain_mut()[0].as_str().as_ptr(), s1.as_ptr());
+    assert_eq!(b.chain()[0], "hello");
+    assert_eq!(b.tell().chain, 1);
+    assert_eq!(b.tell().string, 5);
+    b.clear();
+    assert_eq!(b.chain().len(), 1);
+    assert_eq!(b.chain()[0], "");
+    let s2 = b.ucs1_to_utf8(b"");
+    assert_eq!(s2, "");
+    assert_eq!(b.tell(), qdb_pystr_pos { chain: 1, string: 0 });
+    assert_eq!(s2.as_ptr(), b.chain()[0].as_str().as_ptr());
+    let s3 = b.ucs1_to_utf8(b"10\xb5");
+    assert_eq!(s3, "10µ");
+    assert_eq!(s3.len(), 4);  // 3 bytes in UCS-1, 4 bytes in UTF-8.
+    assert_eq!(b.chain().len(), 1);
+    assert_eq!(s3.as_ptr(), unsafe {
+        b.chain()[0].as_str().as_ptr().add(s2.len())
+    });
+    assert_eq!(b.tell(), qdb_pystr_pos {
+        chain: 1, string: s2.len() + s3.len() });
+}
+
+#[test]
+fn test_resize_and_truncate() {
+    let mut b = Buf::new();
+    let s1 = b.ucs1_to_utf8(b"abcdefghijklmnopqrstuvwxyz");
+    assert_eq!(s1, "abcdefghijklmnopqrstuvwxyz");
+    assert_eq!(b.chain_mut().len(), 1);
+    assert_eq!(b.chain_mut()[0].as_str().as_ptr(), s1.as_ptr());
+    
+    let big_string = "hello world".repeat(1000);
+    assert!(big_string.len() > MIN_BUF_LEN);
+    let s2 = b.ucs1_to_utf8(big_string.as_bytes());
+    assert_eq!(s2, big_string);
+    assert_eq!(b.chain_mut().len(), 2);
+    assert_eq!(b.chain_mut()[0].as_str().as_ptr(), s1.as_ptr());
+    assert_eq!(b.chain_mut()[1].as_str().as_ptr(), s2.as_ptr());
+    assert_eq!(b.tell(), qdb_pystr_pos { chain: 2, string: 11000 });
+    b.truncate(b.tell());
+    assert_eq!(b.tell(), qdb_pystr_pos { chain: 2, string: 11000 });
+
+    let spare = b.chain_mut()[1].capacity() - b.chain_mut()[1].len();
+    assert!(spare > 4);
+
+    let test_string = "ab";
+    let s3 = b.ucs1_to_utf8(test_string.as_bytes());
+    assert_eq!(s3, test_string);
+    assert_eq!(b.chain_mut().len(), 2);
+    assert_eq!(b.chain_mut()[0].as_str().as_ptr(), s1.as_ptr());
+    assert_eq!(b.chain_mut()[1].as_str().as_ptr(), s2.as_ptr());
+    assert_eq!(b.tell(), qdb_pystr_pos {
+        chain: 2, string: 11000 + test_string.len() });
+}
+
+#[test]
+fn test_ucs2() {
+    let mut b = Buf::new();
+
+    // We first check code points within the ASCII range.
+    let s1 = b.ucs2_to_utf8(
+        &[0x61, 0x62, 0x63, 0x64, 0x65]).unwrap();
+    assert_eq!(s1, "abcde");
+    assert_eq!(s1.len(), 5);
+    
+    // Now chars outside ASCII range, but within UCS-1 range.
+    // These will yield two bytes each in UTF-8.
+    let s2 = b.ucs2_to_utf8(
+        &[0x00f0, 0x00e3, 0x00b5, 0x00b6])
+        .unwrap();
+    assert_eq!(s2, "ðãµ¶");
+    assert_eq!(s2.len(), 8);
+
+    // Now chars that actually require two bytes in UCS-2, but also fit in
+    // two bytes in UTF-8.
+    let s3 = b.ucs2_to_utf8(
+        &[0x0100, 0x069c])
+        .unwrap();
+    assert_eq!(s3, "Āڜ");
+    assert_eq!(s3.len(), 4);
+
+    // Now chars that require two bytes in UCS-2 and 3 bytes in UTF-8.
+    let s4 = b.ucs2_to_utf8(
+        &[0x569c, 0xa4c2])
+        .unwrap();
+    assert_eq!(s4, "嚜꓂");
+    assert_eq!(s4.len(), 6);
+
+    // Quick check that we're just writing to the same buffer.
+    assert_eq!(b.tell(), qdb_pystr_pos {
+        chain: 1,
+        string: [s1, s2, s3, s4].iter().map(|s| s.len()).sum() });
+
+    // Now we finally check that errors are captured.
+    // For this, we use a code point which is valid in a Python string
+    // (in UCS-2), but which is not valid when encoded as UTF-8.
+    // >>> chr(0xd800).encode('utf-8')
+    // Traceback (most recent call last):
+    // File "<stdin>", line 1, in <module>
+    // UnicodeEncodeError: 'utf-8' codec can't encode character '\ud800'
+    //                                     in position 0: surrogates not allowed
+    let before_pos = b.tell();
+    let s5 = b.ucs2_to_utf8(&[0x061, 0xd800]);
+    assert!(s5.is_err());
+    assert_eq!(s5.unwrap_err(), 0xd800 as u32);
+
+    // Even though 0x061 (ASCII char 'a') was valid and successfully encoded,
+    // we also want to be sure that the buffer was not modified and appended to.
+    assert_eq!(b.tell(), before_pos);
+
+    // Now we check that the buffer is still in a valid state.
+    let s6 = b.ucs2_to_utf8(&[0x062, 0x063]).unwrap();
+    assert_eq!(s6, "bc");
+    assert_eq!(b.tell(), qdb_pystr_pos {
+        chain: 1,
+        string: [s1, s2, s3, s4, s6].iter().map(|s| s.len()).sum() });
+}
+
+#[test]
+fn test_ucs4() {
+    let mut b = Buf::new();
+
+    // We first check code points within the ASCII range.
+    let s1 = b.ucs4_to_utf8(
+        &[0x61, 0x62, 0x63, 0x64, 0x65]).unwrap();
+    assert_eq!(s1, "abcde");
+    assert_eq!(s1.len(), 5);
+    
+    // Now chars outside ASCII range, but within UCS-1 range.
+    // These will yield two bytes each in UTF-8.
+    let s2 = b.ucs4_to_utf8(
+        &[0x00f0, 0x00e3, 0x00b5, 0x00b6])
+        .unwrap();
+    assert_eq!(s2, "ðãµ¶");
+    assert_eq!(s2.len(), 8);
+
+    // Now chars that actually require two bytes in UCS-2, but also fit in
+    // two bytes in UTF-8.
+    let s3 = b.ucs4_to_utf8(
+        &[0x0100, 0x069c])
+        .unwrap();
+    assert_eq!(s3, "Āڜ");
+    assert_eq!(s3.len(), 4);
+
+    // Now chars that require two bytes in UCS-2 and 3 bytes in UTF-8.
+    let s4 = b.ucs4_to_utf8(
+        &[0x569c, 0xa4c2])
+        .unwrap();
+    assert_eq!(s4, "嚜꓂");
+    assert_eq!(s4.len(), 6);
+
+    // Now chars that require four bytes in UCS-4 and 4 bytes in UTF-8.
+    let s5 = b.ucs4_to_utf8(
+        &[0x1f4a9, 0x1f99e])
+        .unwrap();
+    assert_eq!(s5, "💩🦞");
+    assert_eq!(s5.len(), 8);
+
+    // Quick check that we're just writing to the same buffer.
+    assert_eq!(b.tell(), qdb_pystr_pos {
+        chain: 1,
+        string: [s1, s2, s3, s4, s5].iter().map(|s| s.len()).sum() });
+
+    // Now we finally check that errors are captured.
+    // For this, we use a code point which is valid in a Python string
+    // (in UCS-4), but which is not valid when encoded as UTF-8.
+    // >>> chr(0xd800).encode('utf-8')
+    // Traceback (most recent call last):
+    // File "<stdin>", line 1, in <module>
+    // UnicodeEncodeError: 'utf-8' codec can't encode character '\ud800'
+    //                                     in position 0: surrogates not allowed
+    let before_pos = b.tell();
+    let s6 = b.ucs4_to_utf8(&[0x061, 0xd800]);
+    assert!(s6.is_err());
+    assert_eq!(s6.unwrap_err(), 0xd800 as u32);
+
+    // Even though 0x061 (ASCII char 'a') was valid and successfully encoded,
+    // we also want to be sure that the buffer was not modified and appended to.
+    assert_eq!(b.tell(), before_pos);
+
+    // We repeat the same with chars with code points higher than the u16 type.
+    let before_pos = b.tell();
+    let s7 = b.ucs4_to_utf8(&[0x061, 0x110000]);
+    assert!(s7.is_err());
+    assert_eq!(s7.unwrap_err(), 0x110000);
+
+    // Even though 0x061 (ASCII char 'a') was valid and successfully encoded,
+    // we also want to be sure that the buffer was not modified and appended to.
+    assert_eq!(b.tell(), before_pos);
+
+    // Now we check that the buffer is still in a valid state.
+    let s8 = b.ucs4_to_utf8(&[0x062, 0x063]).unwrap();
+    assert_eq!(s8, "bc");
+    assert_eq!(b.tell(), qdb_pystr_pos {
+        chain: 1,
+        string: [s1, s2, s3, s4, s5, s8].iter().map(|s| s.len()).sum() });
+}
\ No newline at end of file
diff --git a/setup.py b/setup.py
index 3b0e93d1..f871e695 100755
--- a/setup.py
+++ b/setup.py
@@ -21,8 +21,18 @@
 WIN_32BIT_CARGO_TARGET = 'i686-pc-windows-msvc'
 
 
+INSTRUMENT_FUZZING = False
+if os.environ.get('TEST_QUESTDB_FUZZING') == '1':
+    INSTRUMENT_FUZZING = True
+    ORIG_CC = os.environ.get('CC')
+    os.environ['CC'] = "clang"
+    ORIG_CXX = os.environ.get('CXX')
+    os.environ['CXX'] = "clang++"
+
+
 def ingress_extension():
-    lib_name = None
+    lib_prefix = ''
+    lib_suffix = ''
     lib_paths = []
     libraries = []
     extra_compile_args = []
@@ -30,31 +40,50 @@ def ingress_extension():
     extra_objects = []
 
     questdb_rs_ffi_dir = PROJ_ROOT / 'c-questdb-client' / 'questdb-rs-ffi'
+    pystr_to_utf8_dir = PROJ_ROOT / 'pystr-to-utf8'
     questdb_client_lib_dir = None
+    pystr_to_utf8_lib_dir = None
     if PLATFORM == 'win32' and MODE == '32bit':
         questdb_client_lib_dir = \
             questdb_rs_ffi_dir / 'target' / WIN_32BIT_CARGO_TARGET / 'release'
+        pystr_to_utf8_lib_dir = \
+            pystr_to_utf8_dir / 'target' / WIN_32BIT_CARGO_TARGET / 'release'
     else:
         questdb_client_lib_dir = questdb_rs_ffi_dir / 'target' / 'release'
+        pystr_to_utf8_lib_dir = pystr_to_utf8_dir / 'target' / 'release'
+        if INSTRUMENT_FUZZING:
+            extra_compile_args.append('-fsanitize=fuzzer-no-link')
+            extra_link_args.append('-fsanitize=fuzzer-no-link')
+        else:
+            extra_compile_args.append('-flto')
+            extra_link_args.append('-flto')
 
     if PLATFORM == 'darwin':
-        lib_name = 'libquestdb_client.a'
-        extra_objects = [str(questdb_client_lib_dir / lib_name)]
+        lib_prefix = 'lib'
+        lib_suffix = '.a'
         extra_link_args.extend(['-framework', 'Security'])
     elif PLATFORM == 'win32':
-        lib_name = 'questdb_client.lib'
-        extra_objects = [str(questdb_client_lib_dir / lib_name)]
+        lib_prefix = ''
+        lib_suffix = '.lib'
         libraries.extend(['wsock32', 'ws2_32', 'AdvAPI32', 'bcrypt', 'UserEnv'])
     elif PLATFORM == 'linux':
-        lib_name = 'libquestdb_client.a'
-        extra_objects = [str(questdb_client_lib_dir / lib_name)]
+        lib_prefix = 'lib'
+        lib_suffix = '.a'
     else:
         raise NotImplementedError(f'Unsupported platform: {PLATFORM}')
 
+    extra_objects = [
+        str(loc / f'{lib_prefix}{name}{lib_suffix}')
+        for loc, name in (
+            (questdb_client_lib_dir, 'questdb_client'),
+            (pystr_to_utf8_lib_dir, 'pystr_to_utf8'))]
+
     return Extension(
         "questdb.ingress",
         ["src/questdb/ingress.pyx"],
-        include_dirs=["c-questdb-client/include"],
+        include_dirs=[
+            "c-questdb-client/include",
+            "pystr-to-utf8/include"],
         library_dirs=lib_paths,
         libraries=libraries,
         extra_compile_args=extra_compile_args,
@@ -98,9 +127,25 @@ def cargo_build():
     if PLATFORM == 'win32' and MODE == '32bit':
         cargo_args.append(f'--target={WIN_32BIT_CARGO_TARGET}')
 
+    env = os.environ.copy()
+    if INSTRUMENT_FUZZING:
+        if ORIG_CC is not None:
+            env['CC'] = ORIG_CC
+        else:
+            del env['CC']
+        if ORIG_CXX is not None:
+            env['CXX'] = ORIG_CXX
+        else:
+            del env['CXX']
+    subprocess.check_call(
+        cargo_args,
+        cwd=str(PROJ_ROOT / 'c-questdb-client' / 'questdb-rs-ffi'),
+        env=env)
+
     subprocess.check_call(
         cargo_args,
-        cwd=str(PROJ_ROOT / 'c-questdb-client' / 'questdb-rs-ffi'))
+        cwd=str(PROJ_ROOT / 'pystr-to-utf8'),
+        env=env)
 
 
 class questdb_build_ext(build_ext):
diff --git a/src/questdb/arrow_c_data_interface.h b/src/questdb/arrow_c_data_interface.h
new file mode 100644
index 00000000..d58417e6
--- /dev/null
+++ b/src/questdb/arrow_c_data_interface.h
@@ -0,0 +1,111 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#pragma once
+
+#include <stdint.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#ifndef ARROW_C_DATA_INTERFACE
+#define ARROW_C_DATA_INTERFACE
+
+#define ARROW_FLAG_DICTIONARY_ORDERED 1
+#define ARROW_FLAG_NULLABLE 2
+#define ARROW_FLAG_MAP_KEYS_SORTED 4
+
+struct ArrowSchema {
+  // Array type description
+  const char* format;
+  const char* name;
+  const char* metadata;
+  int64_t flags;
+  int64_t n_children;
+  struct ArrowSchema** children;
+  struct ArrowSchema* dictionary;
+
+  // Release callback
+  void (*release)(struct ArrowSchema*);
+  // Opaque producer-specific data
+  void* private_data;
+};
+
+struct ArrowArray {
+  // Array data description
+  int64_t length;
+  int64_t null_count;
+  int64_t offset;
+  int64_t n_buffers;
+  int64_t n_children;
+  const void** buffers;
+  struct ArrowArray** children;
+  struct ArrowArray* dictionary;
+
+  // Release callback
+  void (*release)(struct ArrowArray*);
+  // Opaque producer-specific data
+  void* private_data;
+};
+
+#endif  // ARROW_C_DATA_INTERFACE
+
+#ifndef ARROW_C_STREAM_INTERFACE
+#define ARROW_C_STREAM_INTERFACE
+
+struct ArrowArrayStream {
+  // Callback to get the stream type
+  // (will be the same for all arrays in the stream).
+  //
+  // Return value: 0 if successful, an `errno`-compatible error code otherwise.
+  //
+  // If successful, the ArrowSchema must be released independently from the stream.
+  int (*get_schema)(struct ArrowArrayStream*, struct ArrowSchema* out);
+
+  // Callback to get the next array
+  // (if no error and the array is released, the stream has ended)
+  //
+  // Return value: 0 if successful, an `errno`-compatible error code otherwise.
+  //
+  // If successful, the ArrowArray must be released independently from the stream.
+  int (*get_next)(struct ArrowArrayStream*, struct ArrowArray* out);
+
+  // Callback to get optional detailed error information.
+  // This must only be called if the last stream operation failed
+  // with a non-0 return code.
+  //
+  // Return value: pointer to a null-terminated character array describing
+  // the last error, or NULL if no description is available.
+  //
+  // The returned pointer is only valid until the next operation on this stream
+  // (including release).
+  const char* (*get_last_error)(struct ArrowArrayStream*);
+
+  // Release callback: release the stream's own resources.
+  // Note that arrays returned by `get_next` must be individually released.
+  void (*release)(struct ArrowArrayStream*);
+
+  // Opaque producer-specific data
+  void* private_data;
+};
+
+#endif  // ARROW_C_STREAM_INTERFACE
+
+#ifdef __cplusplus
+}
+#endif
diff --git a/src/questdb/arrow_c_data_interface.pxd b/src/questdb/arrow_c_data_interface.pxd
new file mode 100644
index 00000000..8c0b5472
--- /dev/null
+++ b/src/questdb/arrow_c_data_interface.pxd
@@ -0,0 +1,38 @@
+from libc.stdint cimport int64_t
+
+cdef extern from "arrow_c_data_interface.h":
+
+    cdef int ARROW_FLAG_DICTIONARY_ORDERED
+    cdef int ARROW_FLAG_NULLABLE
+    cdef int ARROW_FLAG_MAP_KEYS_SORTED
+
+    cdef struct ArrowSchema:
+        # Array type description
+        const char* format
+        const char* name
+        const char* metadata
+        int64_t flags
+        int64_t n_children
+        ArrowSchema** children
+        ArrowSchema* dictionary
+
+        # Release callback
+        void (*release)(ArrowSchema*)
+        # Opaque producer-specific data
+        void* private_data
+
+    cdef struct ArrowArray:
+        # Array data description
+        int64_t length
+        int64_t null_count
+        int64_t offset
+        int64_t n_buffers
+        int64_t n_children
+        const void** buffers
+        ArrowArray** children
+        ArrowArray* dictionary
+
+        # Release callback
+        void (*release)(ArrowArray*)
+        # Opaque producer-specific data
+        void* private_data
diff --git a/src/questdb/dataframe.md b/src/questdb/dataframe.md
new file mode 100644
index 00000000..70928f93
--- /dev/null
+++ b/src/questdb/dataframe.md
@@ -0,0 +1,661 @@
+# Pandas Integration High-level Overview
+
+## Goal
+
+We want to access data in a pandas dataframe from Cython efficiently.
+To do this, we need to access its raw memory to traverse it efficiently.
+The data held by a dataframe is organized in a columnar fashion.
+Each column is a Series object in Python.
+Each series object can be backed up by either a Numpy data-structure or
+by an Arrow data-structure.
+
+## Accessing raw Numpy data
+To access Numpy data we take the series, call its `.to_numpy()` method
+and then access the numpy data as a `Py_buffer`.
+* https://docs.python.org/3/c-api/buffer.html
+* http://jakevdp.github.io/blog/2014/05/05/introduction-to-the-python-buffer-protocol/
+
+## Accessing raw Arrow data
+To access Arrow data we first need to unpack each chunk of data at the
+Python level giving us `pyarrow` wrapper Array objects.
+Each Arrow object in `pyarrow` has a `._export_to_c(..)` python method where we
+can pass a Python ints with the addresses to a pre-allocated `ArrowArray` and
+`ArrowSchema` C structures.
+* https://arrow.apache.org/docs/python/integration/python_java.html
+    (Ignore the Java part, we just use the same approach for Python to C.)
+* https://arrow.apache.org/docs/format/CDataInterface.html
+* https://arrow.apache.org/docs/format/Columnar.html#format-columnar
+
+## Consolidating data access
+Now that we've obtained all the pointers we can traverse through the data
+without the aid of the Python interpreter (until we hit a Python string in a
+Numpy array that is).
+
+The trouble is, though, that we're dealing with so many potential column types
+numpy strides, arrow dictionaries and nullables that we risk having an
+unmaintainable spaghetti mess of conditionals, special cases and downright
+untestability.
+
+To tame this and maintain one's sanity we need to remember that we
+don't need to support every type, data-structure et cetera that pandas, numpy
+and arrow can throw at us: Instead we approach this by only accepting
+one-dimensional arrays that support our basic ILP supported types _only_.
+
+We can also further simplify iteration via the introduction of a cursor:
+a struct that is a mishmash of the simplified subsets of arrow and py buffers
+that we actually care about.
+
+## Cherry-picking `Py_buffer` and `ArrowArray` features
+
+First, off the bat, we can exclude supporting some of these structs' fields:
+
+### `Py_buffer`
+_Always one single `Py_buffer` per column. Not chunked._
+
+* `void *buf`: Points to the start of our data.                   **`[NEEDED]`**
+* `PyObject *obj`: No need to access Py object again.             **`[IGNORED]`**
+* `int readonly`: We never write.                                 **`[IGNORED]`**
+* `Py_ssize_t len`: We already have the row-count.                **`[IGNORED]`**
+* `Py_ssize_t itemsize`: It's enough to know our stride.          **`[IGNORED]`**
+* `int ndim`: We only support 1-D data.                           **`[VALIDATED]`**
+* `Py_ssize_t *shape`: We only support 1-D data.                  **`[IGNORED]`**
+* `Py_ssize_t *strides`: We only need the first value             **`[SIMPLIFIED]`**
+* `Py_ssize_t *suboffsets`: Numpy shouldn't be using this.        **`[VALIDATED]`**
+* `void *internal`: Says on the tin.                              **`[IGNORED]`**
+
+### `ArrowArray`
+_Multiple of these `ArrowArray` structs per column. Chunked._
+
+* `int64_t length`: We need it for the length of the chunk.   **`[NEEDED]`**
+* `int64_t null_count`: Needed as if == 0, null col may be NULL.  **`[NEEDED]`**
+* `int64_t offset`: Needed to determine number of skipped rows.  **`[NEEDED]`**
+* `int64_t n_buffers`: A function of the type, not needed.  **`[IGNORED]`**
+* `int64_t n_children`: A function of the type, not needed.  **`[IGNORED]`**
+* `const void** buffers`: Data, e.g. buffers[0] is validity bitvec.  **`[NEEDED]`**
+* `ArrowArray** children`: Needed only for strings where:  **`[NEEDED]`**
+    * `buffers[0]` is nulls bitvec
+    * `buffers[1]` is int32 offsets buffer
+    * `children[0]` is ArrowArray of int8
+    * See: https://arrow.apache.org/docs/format/Columnar.html#variable-size-list-layout
+* `ArrowArray* dictionary`: Needed to support Pandas categories.
+    * This ends up being an array of strings, whilst the index is kept in the
+      parent `buffers[1]` with `buffers[0]` (possibly) as the validity bitmap.
+
+## Mapping Datatypes
+
+We can now start to remind ourselves of the destination data-types that we
+actually need to support, and see how these map from source column data-types
+in both of Numpy and Arrow.
+
+We need to extract:
+* booleans
+* 64-bit signed integers
+* 64-bit floats
+* UTF-8 string buffers
+* Nanosecond-precision UTC unix epoch 64-bit signed int timestamps
+
+```python
+import pandas as pd
+import pyarrow as pa
+```
+
+### Booleans
+
+```python
+>>> df = pd.DataFrame({
+...    'bool_col': [True, False, False, True],
+...    'obj_bool_col': [True, False, None, False],
+...    'nullable_bool_col': pd.array(
+...       [True, False, None, False], dtype="boolean")})
+```
+
+#### Numpy-native representation.
+```python
+>>> df.dtypes['bool_col']
+dtype('bool')
+>>> type(df.dtypes['bool_col']).mro()
+[<class 'numpy.dtype[bool_]'>, <class 'numpy.dtype'>, <class 'object'>]
+>>> df.bool_col.to_numpy().dtype
+dtype('bool')
+```
+
+#### Bools as Python objects
+```python
+>>> df.obj_bool_col
+0     True
+1    False
+2     None
+3    False
+Name: obj_bool_col, dtype: object
+```
+
+It's unclear if this should be supported or not. We might want to and error out
+as soon as we encounter either a `None` or a `pandas.NA` object.
+
+```python
+>>> df.obj_bool_col.astype('bool')
+0     True
+1    False
+2    False
+3    False
+Name: obj_bool_col, dtype: bool
+```
+
+Lastly, we have what appears to be an Arrow-backed representation.
+```python
+>>> df.dtypes['nullable_bool_col']
+BooleanDtype
+>>> type(df.dtypes['nullable_bool_col']).mro()
+[<class 'pandas.core.arrays.boolean.BooleanDtype'>, <class 'pandas.core.dtypes.dtypes.BaseMaskedDtype'>, <class 'pandas.core.dtypes.base.ExtensionDtype'>, <class 'object'>]
+```
+
+We can convert it and then access its contents:
+```
+>>> arr1 = pa.Array.from_pandas(df.nullable_bool_col)
+>>> arr1
+<pyarrow.lib.BooleanArray object at 0x7f3ae82b0dc0>
+[
+  true,
+  false,
+  null,
+  false
+]
+>>> arr1._export_to_c(.... pointer_refs to ArrowArray and ArrowSchema)
+```
+
+This last type is represented as two bitmaps.
+See: https://docs.rs/arrow-array/26.0.0/src/arrow_array/array/boolean_array.rs.html#107
+
+We want to support this representation, but skip out on nulls.
+We want to error out as soon as we see a `null`.
+
+### 64-bit signed integers
+
+From Numpy's side, we've got a fair few to deal with:
+https://numpy.org/doc/stable/user/basics.types.html
+
+This is all your usual signed/unsigned integers with 8, 16, 32 and 64 bit width.
+
+The good news is that the default _is_ `int64`:
+
+```python
+>>> df = pd.DataFrame({'n': [1, 2, 3, 4, 5]})
+>>> df.n
+0    1
+1    2
+2    3
+3    4
+4    5
+Name: n, dtype: int64
+
+>>> df.dtypes['n']
+dtype('int64')
+
+>>> type(df.dtypes['n']).mro()
+[<class 'numpy.dtype[int64]'>, <class 'numpy.dtype'>, <class 'object'>]
+```
+
+Some of these are going to be in bounds, others out of bounds of 64-bit signed:
+Validation needed.
+
+Pandas also provides its own (arrow-based) nullable integers.
+
+```python
+>>> df2 = pd.DataFrame({'nullable_n': pd.array([1, 2, None], dtype=pd.Int64Dtype())})
+>>> df2.nullable_n
+0       1
+1       2
+2    <NA>
+Name: nullable_n, dtype: Int64
+>>> type(df2.dtypes['nullable_n']).mro()
+[<class 'pandas.core.arrays.integer.Int64Dtype'>, <class 'pandas.core.arrays.integer.IntegerDtype'>, <class 'pandas.core.arrays.numeric.NumericDtype'>, <class 'pandas.core.dtypes.dtypes.BaseMaskedDtype'>, <class 'pandas.core.dtypes.base.ExtensionDtype'>, <class 'object'>]
+```
+
+We also need to validate against potential byte-order issues as we're not going
+to support this until someone asks:
+https://pandas.pydata.org/pandas-docs/version/0.19.1/gotchas.html#byte-ordering-issues
+
+```python
+>>> df3 = pd.DataFrame({'big_e': np.array([1, 2, 3, 4]).astype('>u4')})
+>>> df3.big_e
+0    1
+1    2
+2    3
+3    4
+Name: big_e, dtype: uint32
+>>> type(df3.dtypes['big_e']).mro()
+[<class 'numpy.dtype[uint32]'>, <class 'numpy.dtype'>, <class 'object'>]
+>>> df3.dtypes['big_e'].byteorder
+'>'
+```
+
+
+### 64-bit floats
+
+32-bit and 64-bit floats. They all support nullability. We will disallow 16-bit
+floats.
+
+64-bit is default.
+
+```python
+>>> df = pd.DataFrame({'a': [None, 1.0, 1.5, 2.0], 'b': pd.Series([None, 1.0, 1.5, 2.0], dtype='float32'), 'c': pd.Series([None, 1.0, 1.5, 2.0], dtype='float64')})
+>>> df
+     a    b    c
+0  NaN  NaN  NaN
+1  1.0  1.0  1.0
+2  1.5  1.5  1.5
+3  2.0  2.0  2.0
+>>> df.a
+0    NaN
+1    1.0
+2    1.5
+3    2.0
+Name: a, dtype: float64
+>>> df.b
+0    NaN
+1    1.0
+2    1.5
+3    2.0
+Name: b, dtype: float32
+>>> df.c
+0    NaN
+1    1.0
+2    1.5
+3    2.0
+Name: c, dtype: float64
+```
+
+#### Arrow floats
+
+Pandas also has arrow-compatible floats.
+These have an additional bitvector to represent nulls.
+
+
+
+#### 16-bit floats
+
+16-bit floats _do exist_ in Pandas, but we will disallow them:
+
+```python
+>>> df = pd.DataFrame({'a': pd.Series([1.0, 1.5, 2.0], dtype='float16')})
+>>> df
+     a
+0  1.0
+1  1.5
+2  2.0
+>>> df.a
+0    1.0
+1    1.5
+2    2.0
+Name: a, dtype: float16
+```
+
+### UTF-8 string buffers
+
+Strings are.. hard. Strings in dataframes are harder.
+
+#### Python Strings
+
+Numpy usually holds strings as Python objects.
+
+```python
+>>> df = pd.DataFrame({'a': [
+...     'Strings', 'in', 'Pandas', 'are', 'objects', 'by', 'default']})
+>>> df.dtypes['a']
+dtype('O')
+>>> type(df.dtypes['a']).mro()
+[<class 'numpy.dtype[object_]'>, <class 'numpy.dtype'>, <class 'object'>]
+```
+
+Ouch.
+
+Python string objects internally hold buffers that, depending on need are
+encoded as one of UCS-1, UCS-2 or UCS-4. These are variable-length arrays of
+codepoints. One codepoint per array element.
+
+In UCS-1 that's 1-byte elements - effectively `uint8_t`, so the highest code
+point is `2 ** 8 - 1 == 255`, or in other words:
+
+```python
+>>> chr(255)
+'ÿ'
+```
+
+If a string contains a codepoint with a numeric value higher than this, it would
+need UCS-2 or UCS-4. Such representations are backed by `uint16_t` or `uint32_t`
+arrays.
+
+For example, the codepoint for a lobster is 129438.
+
+```python
+>>> ord('🦞')
+129438
+```
+
+We _could_ ask Python to convert strings to UTF-8 for us,
+
+```python
+>>> '🦞'.encode('utf-8')
+b'\xf0\x9f\xa6\x9e'
+```
+
+but this would require invoking the Python interpreter and the creation of a
+gargantuan amount of little temporary objects.
+
+This is such a common use case that we do the encoding in a supporting Rust
+library. See `pystr-to-utf8` in the source tree.
+
+It accumulates strings in a address-stable buffer (internally a `Vec<String>`)
+and allows us to borrow its memory.
+
+As a side-note, we should also be ready to handle nulls here:
+
+```python
+>>> df = pd.DataFrame({'a': ['interspersed', None, 'in', None, 'data']})
+>>> type(df.a[1])
+<class 'NoneType'>
+```
+
+#### Fixed-length strings
+
+Numpy also has some fixed-length strings via two datatypes:
+* `S`: Bytes
+* `U`: Unicode
+
+```python
+>>> df = pd.DataFrame({
+...     'a': np.array(['fixed', 'len', 'strings'], dtype='S'),
+...     'b': np.array(['example', 'with', 'unicode 🦞'], dtype='U')})
+>>> df
+            a          b
+0    b'fixed'    example
+1      b'len'       with
+2  b'strings'  unicode 🦞
+```
+
+It doesn't really matter much though. Their Pandas datatype is actually just
+`'O'` (object).
+
+```python
+>>> df.dtypes['a']
+dtype('O')
+>>> df.dtypes['b']
+dtype('O')
+>>> type(df.dtypes['b'])
+<class 'numpy.dtype[object_]'>
+```
+
+We should:
+* reject the first one (because in Python3 bytes aren't strings) - We lack the powers to guess which text encoding was used. It's usually `latin-1`, but was it?
+  ```python
+  >>> type(df.a[0])
+  <class 'bytes'>
+  ```
+* Accept the second one without further optimisations:
+  ```python
+  >>> type(df.b[0])
+  <class 'str'>
+  ```
+
+#### Pandas `string[object]` dtype
+
+Since the `'O'` dtype could hold anything (not just strings), Pandas introduced a new column type that ensures the column only holds strings.
+
+```python
+>>> df = pd.DataFrame({'a': pd.Series(['another', None, 'str', 'example'], dtype='string')})
+>>> df
+         a
+0  another
+1     <NA>
+2      str
+3  example
+>>> df.dtypes['a']
+string[python]
+>>> type(df.dtypes['a']).mro()
+[<class 'pandas.core.arrays.string_.StringDtype'>, <class 'pandas.core.dtypes.base.StorageExtensionDtype'>, <class 'pandas.core.dtypes.base.ExtensionDtype'>, <class 'object'>]
+```
+
+Note that by default the storage is still Python objects (sigh),
+so our Rust-based conversion will come handy here as well.
+
+Note however that we need to handle nulls not as `None` objects,
+but as `pandas.NA` objects.
+
+```python
+>>> df.a[1]
+<NA>
+```
+
+At other times, we end up with `nan` python float objects to represent nulls.
+_Yay!_.
+
+#### Arrow-backed Strings
+
+Finally - as we would expect when obtaining a frame from something like Parquet - there's string columns in UTF-8-native format backed by Arrow.
+
+_note the different `dtype`:_
+
+```python
+df = pd.DataFrame({'a': pd.Series(['arrow', None, 'str', 'example'], dtype='string[pyarrow]')})
+```
+
+```
+>>> df = pd.DataFrame({'a': pd.Series(['arrow', None, 'str', 'example'], dtype='string[pyarrow]')})
+>>> df
+         a
+0    arrow
+1     <NA>
+2      str
+3  example
+>>> df.dtypes['a']
+string[pyarrow]
+>>> type(df.dtypes['a']).mro()
+[<class 'pandas.core.arrays.string_.StringDtype'>, <class 'pandas.core.dtypes.base.StorageExtensionDtype'>, <class 'pandas.core.dtypes.base.ExtensionDtype'>, <class 'object'>]
+```
+
+Note that these strings will always have indices based on `int32_t`.
+
+Arrow also has a `pyarrow.large_string()` type, but
+pandas doesn't support it.
+
+#### Symbol-like Categorical Data
+
+Pandas supports categories. These are backed by Arrow.
+
+```python
+>>> df = pd.DataFrame({'a': pd.Series(
+...     ['symbol', 'like', 'type', 'symbol', 'like', 'like', 'like', None],
+...     dtype='category')})
+>>> df
+        a
+0  symbol
+1    like
+2    type
+3  symbol
+4    like
+5    like
+6    like
+7     NaN
+>>> df.dtypes['a']
+CategoricalDtype(categories=['like', 'symbol', 'type'], ordered=False)
+>>> type(df.dtypes['a']).mro()
+[<class 'pandas.core.dtypes.dtypes.CategoricalDtype'>, <class 'pandas.core.dtypes.dtypes.PandasExtensionDtype'>, <class 'pandas.core.dtypes.base.ExtensionDtype'>, <class 'object'>]
+```
+
+This is how it's represented:
+
+```python
+>>> pa.Array.from_pandas(df.a)
+<pyarrow.lib.DictionaryArray object at 0x7f7a965fee30>
+
+-- dictionary:
+  [
+    "like",
+    "symbol",
+    "type"
+  ]
+-- indices:
+  [
+    1,
+    0,
+    2,
+    1,
+    0,
+    0,
+    0,
+    null
+  ]
+```
+
+For this, we need the `dictionary` field in the `ArrowArray` struct.
+
+What's also neat is that we know the categories in advance _before_ running the
+encoding. This means we can build up our `line_sender_utf8` objects in advance,
+though they are all UTF-8 buffers already so.. little gain.
+
+
+### Nanosecond-precision UTC unix epoch 64-bit signed int timestamps
+
+#### Timezone-free timestamp
+
+```python
+>>> n1 = pd.Timestamp(dt.datetime.utcnow())
+>>> n2 = pd.Timestamp(dt.datetime.utcnow())
+>>> df = pd.DataFrame({'a': [n1, n2]})
+>>> df
+                           a
+0 2022-11-15 17:47:23.131445
+1 2022-11-15 17:47:26.943899
+```
+
+The data is held as nanos since unix epoch as a 64-bit int.
+```python
+>>> df.dtypes['a']
+dtype('<M8[ns]')
+>>> type(df.dtypes['a']).mro()
+[<class 'numpy.dtype[datetime64]'>, <class 'numpy.dtype'>, <class 'object'>]
+```
+
+This matches our own designated timestamp representation and we just need to convert to micros for the rest of the columns.
+
+Null values _are_ supported.
+
+```python
+>>> df = pd.DataFrame({'a': [n1, n2, None]})
+>>> df
+                           a
+0 2022-11-15 17:47:23.131445
+1 2022-11-15 17:47:26.943899
+2                        NaT
+```
+
+Unclear what the sentinel value for `NaT` is yet, but we want to map it internally to 0 for the designated timestamp and to recognise it
+and skip the column otherwise.
+
+#### Additionally, we can also have datetimes with a timezone
+
+```python
+>>> ts = pd.Timestamp(
+...    year=2020, month=1, day=1, hour=12, minute=0, second=0,
+...    tz=zoneinfo.ZoneInfo('America/Los_Angeles'))
+>>> df = pd.DataFrame({'a': [ts]})
+>>> df.dtypes['a']
+datetime64[ns, America/Los_Angeles]
+>>> type(_)
+<class 'pandas.core.dtypes.dtypes.DatetimeTZDtype'>
+>>> df.dtypes['a'].tz
+zoneinfo.ZoneInfo(key='America/Los_Angeles')
+```
+
+The good news here is that the timestamp is still held as UTC (regardless of
+timezone), so no timezone conversion logic is required here.
+
+```python
+>>> pa.Array.from_pandas(df.a)
+<pyarrow.lib.TimestampArray object at 0x7ff63914c4c0>
+[
+  2020-01-01 20:00:00.000000000
+]
+```
+
+**Note**: We need PyArrow to access the buffer, or we need to convert to
+`datetime64[ns]`.
+
+
+## Strided Numpy Arrays
+
+Numpy arrays need not be contiguous. In Pandas, however, we
+need not worry about this.
+
+If we construct a `(4, 3)`-shaped 2D numpy array
+
+```python
+>>> import numpy as np
+>>> a1 = np.array([[1, 10, 100], [2, 20, 200], [3, 30, 300], [4, 40, 400]])
+>>> a1
+array([[  1,  10, 100],
+       [  2,  20, 200],
+       [  3,  30, 300],
+       [  4,  40, 400]])
+>>> a1.dtype
+dtype('int64')
+```
+
+and then select it's second column
+
+```python
+>>> a2 = a1[:, 1]
+>>> a2
+array([10, 20, 30, 40])
+```
+
+We encounter a non-contiguous array.
+
+```python
+>>> a2.data
+<memory at 0x7faefaaac4c0>
+>>> a2.data.contiguous
+False
+>>> a2.data.strides
+(24,)
+```
+
+If we then wrap up the array in a dataframe and convert the series back to numpy
+
+```python
+>>> df = pd.DataFrame({'a': a2})
+>>> df
+    a
+0  10
+1  20
+2  30
+3  40
+>>> df.a
+0    10
+1    20
+2    30
+3    40
+Name: a, dtype: int64
+>>> a3 = df.a.to_numpy()
+```
+
+We see that we get a new object back, and that the new object actually _is_
+contiguous.
+
+```python
+>>> id(a2)
+140389455034672
+>>> id(a3)
+140388032511696
+>>> a3.data
+<memory at 0x7faea2c17880>
+>>> a3.data.contiguous
+True
+```
+
+For this reason, supporting strides is not necessary.
+
+
+## Unified Cursor
+
+TO BE CONTINUED
diff --git a/src/questdb/dataframe.pxi b/src/questdb/dataframe.pxi
new file mode 100644
index 00000000..62cfd365
--- /dev/null
+++ b/src/questdb/dataframe.pxi
@@ -0,0 +1,2275 @@
+# See: dataframe.md for technical overview.
+
+cdef struct auto_flush_t:
+    line_sender* sender
+    size_t watermark
+
+
+cdef auto_flush_t auto_flush_blank():
+    cdef auto_flush_t af
+    af.sender = NULL
+    af.watermark = 0
+    return af
+
+
+cdef struct col_chunks_t:
+    size_t n_chunks
+    ArrowArray* chunks  # We calloc `n_chunks + 1` of these.
+
+
+cdef struct col_cursor_t:
+    ArrowArray* chunk  # Current chunk.
+    size_t chunk_index
+    size_t offset  # i.e. the element index (not byte offset)
+
+
+cdef enum col_target_t:
+    col_target_skip = 0
+    col_target_table = 1
+    col_target_symbol = 2
+    col_target_column_bool = 3
+    col_target_column_i64 = 4
+    col_target_column_f64 = 5
+    col_target_column_str = 6
+    col_target_column_ts = 7
+    col_target_at = 8
+
+
+cdef dict _TARGET_NAMES = {
+    col_target_t.col_target_skip: "skipped",
+    col_target_t.col_target_table: "table name",
+    col_target_t.col_target_symbol: "symbol",
+    col_target_t.col_target_column_bool: "boolean",
+    col_target_t.col_target_column_i64: "integer",
+    col_target_t.col_target_column_f64: "float",
+    col_target_t.col_target_column_str: "string",
+    col_target_t.col_target_column_ts: "timestamp",
+    col_target_t.col_target_at: "designated timestamp",
+}
+
+
+cdef enum col_source_t:
+    # Note: Hundreds digit set to 1 if GIL is required.
+    col_source_nulls =                0
+    col_source_bool_pyobj =      101100
+    col_source_bool_numpy =      102000
+    col_source_bool_arrow =      103000
+    col_source_int_pyobj =       201100
+    col_source_u8_numpy =        202000
+    col_source_i8_numpy =        203000
+    col_source_u16_numpy =       204000
+    col_source_i16_numpy =       205000
+    col_source_u32_numpy =       206000
+    col_source_i32_numpy =       207000
+    col_source_u64_numpy =       208000
+    col_source_i64_numpy =       209000
+    col_source_u8_arrow =        210000
+    col_source_i8_arrow =        211000
+    col_source_u16_arrow =       212000
+    col_source_i16_arrow =       213000
+    col_source_u32_arrow =       214000
+    col_source_i32_arrow =       215000
+    col_source_u64_arrow =       216000
+    col_source_i64_arrow =       217000
+    col_source_float_pyobj =     301100
+    col_source_f32_numpy =       302000
+    col_source_f64_numpy =       303000
+    col_source_f32_arrow =       304000
+    col_source_f64_arrow =       305000
+    col_source_str_pyobj =       401100
+    col_source_str_arrow =       402000
+    col_source_str_i8_cat =      403000
+    col_source_str_i16_cat =     404000
+    col_source_str_i32_cat =     405000
+    col_source_dt64ns_numpy =    501000
+    col_source_dt64ns_tz_arrow = 502000
+
+
+cdef bint col_source_needs_gil(col_source_t source):
+    # Check if hundreds digit is 1.
+    return <int>source // 100 % 10 == 1
+
+
+cdef set _STR_SOURCES = {
+    col_source_t.col_source_str_pyobj,
+    col_source_t.col_source_str_arrow,
+    col_source_t.col_source_str_i8_cat,
+    col_source_t.col_source_str_i16_cat,
+    col_source_t.col_source_str_i32_cat,
+}
+
+
+cdef dict _PYOBJ_SOURCE_DESCR = {
+    col_source_t.col_source_bool_pyobj: "bool",
+    col_source_t.col_source_int_pyobj: "int",
+    col_source_t.col_source_float_pyobj: "float",
+    col_source_t.col_source_str_pyobj: "str",
+}
+
+
+cdef dict _TARGET_TO_SOURCES = {
+    col_target_t.col_target_skip: {
+        col_source_t.col_source_nulls,
+    },
+    col_target_t.col_target_table: {
+        col_source_t.col_source_str_pyobj,
+        col_source_t.col_source_str_arrow,
+        col_source_t.col_source_str_i8_cat,
+        col_source_t.col_source_str_i16_cat,
+        col_source_t.col_source_str_i32_cat,
+    },
+    col_target_t.col_target_symbol: {
+        col_source_t.col_source_str_pyobj,
+        col_source_t.col_source_str_arrow,
+        col_source_t.col_source_str_i8_cat,
+        col_source_t.col_source_str_i16_cat,
+        col_source_t.col_source_str_i32_cat,
+    },
+    col_target_t.col_target_column_bool: {
+        col_source_t.col_source_bool_pyobj,
+        col_source_t.col_source_bool_numpy,
+        col_source_t.col_source_bool_arrow,
+    },
+    col_target_t.col_target_column_i64: {
+        col_source_t.col_source_int_pyobj,
+        col_source_t.col_source_u8_numpy,
+        col_source_t.col_source_i8_numpy,
+        col_source_t.col_source_u16_numpy,
+        col_source_t.col_source_i16_numpy,
+        col_source_t.col_source_u32_numpy,
+        col_source_t.col_source_i32_numpy,
+        col_source_t.col_source_u64_numpy,
+        col_source_t.col_source_i64_numpy,
+        col_source_t.col_source_u8_arrow,
+        col_source_t.col_source_i8_arrow,
+        col_source_t.col_source_u16_arrow,
+        col_source_t.col_source_i16_arrow,
+        col_source_t.col_source_u32_arrow,
+        col_source_t.col_source_i32_arrow,
+        col_source_t.col_source_u64_arrow,
+        col_source_t.col_source_i64_arrow,
+    },
+    col_target_t.col_target_column_f64: {
+        col_source_t.col_source_float_pyobj,
+        col_source_t.col_source_f32_numpy,
+        col_source_t.col_source_f64_numpy,
+        col_source_t.col_source_f32_arrow,
+        col_source_t.col_source_f64_arrow,
+    },
+    col_target_t.col_target_column_str: {
+        col_source_t.col_source_str_pyobj,
+        col_source_t.col_source_str_arrow,
+        col_source_t.col_source_str_i8_cat,
+        col_source_t.col_source_str_i16_cat,
+        col_source_t.col_source_str_i32_cat,
+    },
+    col_target_t.col_target_column_ts: {
+        col_source_t.col_source_dt64ns_numpy,
+        col_source_t.col_source_dt64ns_tz_arrow,
+    },
+    col_target_t.col_target_at: {
+        col_source_t.col_source_dt64ns_numpy,
+        col_source_t.col_source_dt64ns_tz_arrow,
+    },
+}
+
+
+# Targets associated with col_meta_target.field.
+cdef tuple _FIELD_TARGETS = (
+    col_target_t.col_target_skip,
+    col_target_t.col_target_column_bool,
+    col_target_t.col_target_column_i64,
+    col_target_t.col_target_column_f64,
+    col_target_t.col_target_column_str,
+    col_target_t.col_target_column_ts)
+
+
+# Targets that map directly from a meta target.
+cdef set _DIRECT_META_TARGETS = {
+    col_target_t.col_target_table,
+    col_target_t.col_target_symbol,
+    col_target_t.col_target_at,
+}
+
+
+# This is verbose, but..
+#   * Enums give us constants.
+#   * Constants allow unfolding `if` statements into `switch`
+#   * Switch statements can be more heavily optimized by the C compiler.
+cdef enum col_dispatch_code_t:
+    col_dispatch_code_skip_nulls = \
+        col_target_t.col_target_skip + col_source_t.col_source_nulls
+
+    col_dispatch_code_table__str_pyobj = \
+        col_target_t.col_target_table + col_source_t.col_source_str_pyobj
+    col_dispatch_code_table__str_arrow = \
+        col_target_t.col_target_table + col_source_t.col_source_str_arrow
+    col_dispatch_code_table__str_i8_cat = \
+        col_target_t.col_target_table + col_source_t.col_source_str_i8_cat
+    col_dispatch_code_table__str_i16_cat = \
+        col_target_t.col_target_table + col_source_t.col_source_str_i16_cat
+    col_dispatch_code_table__str_i32_cat = \
+        col_target_t.col_target_table + col_source_t.col_source_str_i32_cat
+
+    col_dispatch_code_symbol__str_pyobj = \
+        col_target_t.col_target_symbol + col_source_t.col_source_str_pyobj
+    col_dispatch_code_symbol__str_arrow = \
+        col_target_t.col_target_symbol + col_source_t.col_source_str_arrow
+    col_dispatch_code_symbol__str_i8_cat = \
+        col_target_t.col_target_symbol + col_source_t.col_source_str_i8_cat
+    col_dispatch_code_symbol__str_i16_cat = \
+        col_target_t.col_target_symbol + col_source_t.col_source_str_i16_cat
+    col_dispatch_code_symbol__str_i32_cat = \
+        col_target_t.col_target_symbol + col_source_t.col_source_str_i32_cat
+
+    col_dispatch_code_column_bool__bool_pyobj = \
+        col_target_t.col_target_column_bool + col_source_t.col_source_bool_pyobj
+    col_dispatch_code_column_bool__bool_numpy = \
+        col_target_t.col_target_column_bool + col_source_t.col_source_bool_numpy
+    col_dispatch_code_column_bool__bool_arrow = \
+        col_target_t.col_target_column_bool + col_source_t.col_source_bool_arrow
+
+    col_dispatch_code_column_i64__int_pyobj = \
+        col_target_t.col_target_column_i64 + col_source_t.col_source_int_pyobj
+    col_dispatch_code_column_i64__u8_numpy = \
+        col_target_t.col_target_column_i64 + col_source_t.col_source_u8_numpy
+    col_dispatch_code_column_i64__i8_numpy = \
+        col_target_t.col_target_column_i64 + col_source_t.col_source_i8_numpy
+    col_dispatch_code_column_i64__u16_numpy = \
+        col_target_t.col_target_column_i64 + col_source_t.col_source_u16_numpy
+    col_dispatch_code_column_i64__i16_numpy = \
+        col_target_t.col_target_column_i64 + col_source_t.col_source_i16_numpy
+    col_dispatch_code_column_i64__u32_numpy = \
+        col_target_t.col_target_column_i64 + col_source_t.col_source_u32_numpy
+    col_dispatch_code_column_i64__i32_numpy = \
+        col_target_t.col_target_column_i64 + col_source_t.col_source_i32_numpy
+    col_dispatch_code_column_i64__u64_numpy = \
+        col_target_t.col_target_column_i64 + col_source_t.col_source_u64_numpy
+    col_dispatch_code_column_i64__i64_numpy = \
+        col_target_t.col_target_column_i64 + col_source_t.col_source_i64_numpy
+    col_dispatch_code_column_i64__u8_arrow = \
+        col_target_t.col_target_column_i64 + col_source_t.col_source_u8_arrow
+    col_dispatch_code_column_i64__i8_arrow = \
+        col_target_t.col_target_column_i64 + col_source_t.col_source_i8_arrow
+    col_dispatch_code_column_i64__u16_arrow = \
+        col_target_t.col_target_column_i64 + col_source_t.col_source_u16_arrow
+    col_dispatch_code_column_i64__i16_arrow = \
+        col_target_t.col_target_column_i64 + col_source_t.col_source_i16_arrow
+    col_dispatch_code_column_i64__u32_arrow = \
+        col_target_t.col_target_column_i64 + col_source_t.col_source_u32_arrow
+    col_dispatch_code_column_i64__i32_arrow = \
+        col_target_t.col_target_column_i64 + col_source_t.col_source_i32_arrow
+    col_dispatch_code_column_i64__u64_arrow = \
+        col_target_t.col_target_column_i64 + col_source_t.col_source_u64_arrow
+    col_dispatch_code_column_i64__i64_arrow = \
+        col_target_t.col_target_column_i64 + col_source_t.col_source_i64_arrow
+
+    col_dispatch_code_column_f64__float_pyobj = \
+        col_target_t.col_target_column_f64 + col_source_t.col_source_float_pyobj
+    col_dispatch_code_column_f64__f32_numpy = \
+        col_target_t.col_target_column_f64 + col_source_t.col_source_f32_numpy
+    col_dispatch_code_column_f64__f64_numpy = \
+        col_target_t.col_target_column_f64 + col_source_t.col_source_f64_numpy
+    col_dispatch_code_column_f64__f32_arrow = \
+        col_target_t.col_target_column_f64 + col_source_t.col_source_f32_arrow
+    col_dispatch_code_column_f64__f64_arrow = \
+        col_target_t.col_target_column_f64 + col_source_t.col_source_f64_arrow
+
+    col_dispatch_code_column_str__str_pyobj = \
+        col_target_t.col_target_column_str + col_source_t.col_source_str_pyobj
+    col_dispatch_code_column_str__str_arrow = \
+        col_target_t.col_target_column_str + col_source_t.col_source_str_arrow
+    col_dispatch_code_column_str__str_i8_cat = \
+        col_target_t.col_target_column_str + col_source_t.col_source_str_i8_cat
+    col_dispatch_code_column_str__str_i16_cat = \
+        col_target_t.col_target_column_str + col_source_t.col_source_str_i16_cat
+    col_dispatch_code_column_str__str_i32_cat = \
+        col_target_t.col_target_column_str + col_source_t.col_source_str_i32_cat
+
+    col_dispatch_code_column_ts__dt64ns_numpy = \
+        col_target_t.col_target_column_ts + col_source_t.col_source_dt64ns_numpy
+    col_dispatch_code_column_ts__dt64ns_tz_arrow = \
+        col_target_t.col_target_column_ts + \
+        col_source_t.col_source_dt64ns_tz_arrow
+
+    col_dispatch_code_at__dt64ns_numpy = \
+        col_target_t.col_target_at + col_source_t.col_source_dt64ns_numpy
+    col_dispatch_code_at__dt64ns_tz_arrow = \
+        col_target_t.col_target_at + col_source_t.col_source_dt64ns_tz_arrow
+
+
+# Int values in order for sorting (as needed for API's sequential coupling).
+cdef enum meta_target_t:
+    meta_target_table = <int>col_target_t.col_target_table
+    meta_target_symbol = <int>col_target_t.col_target_symbol
+    meta_target_field = <int>col_target_t.col_target_column_bool
+    meta_target_at = <int>col_target_t.col_target_at
+
+
+cdef struct col_setup_t:
+    col_chunks_t chunks
+    size_t orig_index
+    Py_buffer pybuf
+    ArrowSchema arrow_schema  # Schema of first chunk.
+    col_source_t source
+    meta_target_t meta_target
+    col_target_t target
+
+
+cdef struct col_t:
+    col_dispatch_code_t dispatch_code  # source + target. Determines serializer.
+    line_sender_column_name name
+    col_cursor_t cursor
+    col_setup_t* setup  # Grouping to reduce size of struct.
+
+
+cdef void col_t_release(col_t* col):
+    """
+    Release a (possibly) initialized column.
+
+    col_t objects are `calloc`ed, so uninitialized (or partially) initialized
+    objects will have their pointers and other values set to 0.
+    """
+    cdef size_t chunk_index
+    cdef ArrowArray* chunk
+
+    if Py_buffer_obj_is_set(&col.setup.pybuf):
+        PyBuffer_Release(&col.setup.pybuf)  # Note: Sets `.pybuf.obj` to NULL.
+
+    for chunk_index in range(col.setup.chunks.n_chunks):
+        chunk = &col.setup.chunks.chunks[chunk_index]
+        if chunk.release != NULL:
+            chunk.release(chunk)
+        memset(chunk, 0, sizeof(ArrowArray))
+
+    if col.setup.arrow_schema.release != NULL:
+        col.setup.arrow_schema.release(&col.setup.arrow_schema)
+
+    free(col.setup.chunks.chunks)
+    col.setup.chunks.chunks = NULL
+    col.setup.chunks.n_chunks = 0
+
+    free(col.setup)
+    col.setup = NULL
+
+
+# Calloc'd array of col_t.
+cdef struct col_t_arr:
+    size_t size
+    col_t* d
+
+
+cdef col_t_arr col_t_arr_blank():
+    cdef col_t_arr arr
+    arr.size = 0
+    arr.d = NULL
+    return arr
+
+
+cdef col_t_arr col_t_arr_new(size_t size):
+    cdef col_t_arr arr
+    cdef size_t index
+    arr.size = size
+    arr.d = <col_t*>calloc(size, sizeof(col_t))
+    for index in range(size):
+        arr.d[index].setup = <col_setup_t*>calloc(1, sizeof(col_setup_t))
+    return arr
+
+
+cdef void col_t_arr_release(col_t_arr* arr):
+    cdef size_t index
+    if arr.d:
+        for index in range(arr.size):
+            col_t_release(&arr.d[index])
+        free(arr.d)
+        arr.size = 0
+        arr.d = NULL
+
+
+cdef object _NUMPY = None  # module object
+cdef object _NUMPY_BOOL = None
+cdef object _NUMPY_UINT8 = None
+cdef object _NUMPY_INT8 = None
+cdef object _NUMPY_UINT16 = None
+cdef object _NUMPY_INT16 = None
+cdef object _NUMPY_UINT32 = None
+cdef object _NUMPY_INT32 = None
+cdef object _NUMPY_UINT64 = None
+cdef object _NUMPY_INT64 = None
+cdef object _NUMPY_FLOAT32 = None
+cdef object _NUMPY_FLOAT64 = None
+cdef object _NUMPY_DATETIME64_NS = None
+cdef object _NUMPY_OBJECT = None
+cdef object _PANDAS = None  # module object
+cdef object _PANDAS_NA = None  # pandas.NA
+cdef object _PYARROW = None  # module object, if available or None
+
+cdef int64_t _NAT = INT64_MIN  # pandas NaT
+
+
+cdef object _dataframe_may_import_deps():
+    """"
+    Lazily import module dependencies on first use to avoid startup overhead.
+
+    $ cat imp_test.py 
+    import numpy
+    import pandas
+    import pyarrow
+
+    $ time python3 ./imp_test.py
+    python3 ./imp_test.py  0.56s user 1.60s system 852% cpu 0.254 total
+    """
+    global _NUMPY, _PANDAS, _PYARROW, _PANDAS_NA
+    global _NUMPY_BOOL
+    global _NUMPY_UINT8
+    global _NUMPY_INT8
+    global _NUMPY_UINT16
+    global _NUMPY_INT16
+    global _NUMPY_UINT32
+    global _NUMPY_INT32
+    global _NUMPY_UINT64
+    global _NUMPY_INT64
+    global _NUMPY_FLOAT32
+    global _NUMPY_FLOAT64
+    global _NUMPY_DATETIME64_NS
+    global _NUMPY_OBJECT
+    if _NUMPY is not None:
+        return
+    try:
+        import pandas
+        import numpy
+        import pyarrow
+    except ImportError as ie:
+        raise ImportError(
+            'Missing dependencies: `pandas`, `numpy` and `pyarrow` must all ' +
+            'be installed to use the `.dataframe()` method. ' +
+            'See: https://py-questdb-client.readthedocs.io/' +
+            'en/latest/installation.html.') from ie
+    _NUMPY = numpy
+    _NUMPY_BOOL = type(_NUMPY.dtype('bool'))
+    _NUMPY_UINT8 = type(_NUMPY.dtype('uint8'))
+    _NUMPY_INT8 = type(_NUMPY.dtype('int8'))
+    _NUMPY_UINT16 = type(_NUMPY.dtype('uint16'))
+    _NUMPY_INT16 = type(_NUMPY.dtype('int16'))
+    _NUMPY_UINT32 = type(_NUMPY.dtype('uint32'))
+    _NUMPY_INT32 = type(_NUMPY.dtype('int32'))
+    _NUMPY_UINT64 = type(_NUMPY.dtype('uint64'))
+    _NUMPY_INT64 = type(_NUMPY.dtype('int64'))
+    _NUMPY_FLOAT32 = type(_NUMPY.dtype('float32'))
+    _NUMPY_FLOAT64 = type(_NUMPY.dtype('float64'))
+    _NUMPY_DATETIME64_NS = type(_NUMPY.dtype('datetime64[ns]'))
+    _NUMPY_OBJECT = type(_NUMPY.dtype('object'))
+    _PANDAS = pandas
+    _PANDAS_NA = pandas.NA
+    _PYARROW = pyarrow
+
+
+cdef object _dataframe_check_is_dataframe(object df):
+    if not isinstance(df, _PANDAS.DataFrame):
+        raise IngressError(
+            IngressErrorCode.InvalidApiCall,
+            f'Bad argument `df`: Expected {_fqn(_PANDAS.DataFrame)}, ' +
+            f'not an object of type {_fqn(type(df))}.')
+
+
+cdef ssize_t _dataframe_resolve_table_name(
+        qdb_pystr_buf* b,
+        object df,
+        list pandas_cols,
+        col_t_arr* cols,
+        object table_name,
+        object table_name_col,
+        size_t col_count,
+        line_sender_table_name* name_out) except -2:
+    """
+    Resolve the table name string or column.
+
+    Returns -1 if the table name is a string, otherwise the column index.
+    """
+    cdef size_t col_index = 0
+    cdef PandasCol pandas_col
+    cdef col_t* col
+    if table_name is not None:
+        if table_name_col is not None:
+            raise ValueError(
+                'Can specify only one of `table_name` or `table_name_col`.')
+        if isinstance(table_name, str):
+            try:
+                str_to_table_name_copy(b, <PyObject*>table_name, name_out)
+                return -1  # Magic value for "no column index".
+            except IngressError as ie:
+                raise ValueError(
+                    f'Bad argument `table_name`: {ie}')
+        else:
+            raise TypeError('Bad argument `table_name`: Must be str.')
+    elif table_name_col is not None:
+        if isinstance(table_name_col, str):
+            _dataframe_get_loc(df, table_name_col, 'table_name_col', &col_index)
+        elif isinstance(table_name_col, int):
+            _bind_col_index(
+                'table_name_col', table_name_col, col_count, &col_index)
+        else:
+            raise TypeError(
+                'Bad argument `table_name_col`: ' +
+                'must be a column name (str) or index (int).')
+        pandas_col = pandas_cols[col_index]
+        col = &cols.d[col_index]
+        _dataframe_check_column_is_str(
+            'Bad argument `table_name_col`: ',
+            pandas_col,
+            col.setup.source)
+        col.setup.meta_target = meta_target_t.meta_target_table
+        name_out.len = 0
+        name_out.buf = NULL
+        return col_index
+    elif df.index.name:
+        if not isinstance(df.index.name, str):
+            raise TypeError(
+                'Bad dataframe index name as table name: Expected str, ' +
+                f'not an object of type {_fqn(type(df.index.name))}.')
+
+        # If the index has a name, use that as the table name.
+        try:
+            str_to_table_name_copy(b, <PyObject*>df.index.name, name_out)
+            return -1  # Magic value for "no column index".
+        except IngressError as ie:
+            raise ValueError(
+                f'Bad dataframe index name as table name: {ie}')
+    else:
+        raise ValueError(
+            'Must specify at least one of `table_name` or `table_name_col`, ' +
+            'or set the dataframe index name (df.index.name = \'tbl_name\').')
+
+
+cdef void_int _bind_col_index(
+        str arg_name, int col_num, size_t col_count,
+        size_t* col_index) except -1:
+    """
+    Validate that `col_index` is in bounds for `col_count`.
+    This function also converts negative indicies (e.g. -1 for last column) to
+    positive indicies.
+    """
+    cdef bint bad = False
+    cdef int orig_col_num = col_num
+    if col_num < 0:
+        col_num += col_count  # Try convert negative offsets to positive ones.
+    if col_num < 0:
+        bad = True
+    if (not bad) and (<size_t>col_num >= col_count):
+        bad = True
+    if bad:
+        raise IndexError(
+            f'Bad argument `{arg_name}`: {orig_col_num} index out of range')
+    col_index[0] = <size_t>col_num
+
+
+cdef void_int _dataframe_check_column_is_str(
+        str err_msg_prefix,
+        PandasCol pandas_col,
+        col_source_t source) except -1:
+    cdef str inferred_descr = ""
+    if not source in _STR_SOURCES:
+        if isinstance(pandas_col.dtype, _NUMPY_OBJECT):
+            inferred_descr = f' (inferred type: {_PYOBJ_SOURCE_DESCR[source]})'
+        raise IngressError(
+            IngressErrorCode.BadDataFrame,
+            err_msg_prefix + 
+            f'Bad dtype `{pandas_col.dtype}`{inferred_descr} for the ' +
+            f'{pandas_col.name!r} column: Must be a strings column.')
+
+
+@cython.internal
+cdef class PandasCol:
+    """Python object representing a column to parse .dataframe() arguments."""
+    cdef str name
+    cdef object dtype
+    cdef object series
+
+    def __init__(
+            self,
+            str name,
+            object dtype,
+            object series):
+        self.name = name
+        self.dtype = dtype
+        self.series = series
+
+
+cdef void_int _dataframe_resolve_symbols(
+        object df,
+        list pandas_cols,
+        col_t_arr* cols,
+        ssize_t table_name_col,
+        ssize_t at_col,
+        object symbols) except -1:
+    cdef size_t col_index = 0
+    cdef object symbol
+    cdef PandasCol pandas_col
+    cdef col_t* col
+    if symbols == 'auto':
+        for col_index in range(cols.size):
+            pandas_col = pandas_cols[col_index]
+            col = &cols.d[col_index]
+            if col.setup.meta_target == meta_target_t.meta_target_field:
+                if isinstance(pandas_col.dtype, _PANDAS.CategoricalDtype):
+                    col.setup.meta_target = meta_target_t.meta_target_symbol
+    elif symbols is False:
+        pass
+    elif symbols is True:
+        for col_index in range(cols.size):
+            col = &cols.d[col_index]
+            if col.setup.source in _STR_SOURCES:
+                pandas_col = pandas_cols[col_index]
+                if col.setup.meta_target == meta_target_t.meta_target_field:
+                    col.setup.meta_target = meta_target_t.meta_target_symbol
+    else:
+        if not isinstance(symbols, (tuple, list)):
+            raise TypeError(
+                f'Bad argument `symbols`: Must be a bool or a tuple or list '+
+                'of column names (str) or indices (int).')
+        for symbol in symbols:
+            if isinstance(symbol, str):
+                _dataframe_get_loc(df, symbol, 'symbols', &col_index)
+            elif isinstance(symbol, int):
+                _bind_col_index('symbol', symbol, cols.size, &col_index) 
+            else:
+                raise TypeError(
+                    f'Bad argument `symbols`: Elements must ' +
+                    'be a column name (str) or index (int).')
+            if (table_name_col >= 0) and (col_index == <size_t>table_name_col):
+                raise ValueError(
+                    f'Bad argument `symbols`: Cannot use the same column ' +
+                    f'{symbol!r} as both the table_name and as a symbol.')
+            if (at_col >= 0) and (col_index == <size_t>at_col):
+                raise ValueError(
+                    f'Bad argument `symbols`: Cannot use the `at` column ' +
+                    f'({df.columns[at_col]!r}) as a symbol column.')
+            pandas_col = pandas_cols[col_index]
+            col = &cols.d[col_index]
+            _dataframe_check_column_is_str(
+                'Bad argument `symbols`: ',
+                pandas_col,
+                col.setup.source)
+            col.setup.meta_target = meta_target_t.meta_target_symbol
+
+
+cdef void_int _dataframe_get_loc(
+        object df, str col_name, str arg_name,
+        size_t* col_index_out) except -1:
+    """
+    Return the column index for `col_name`.
+    """
+    try:
+        col_index_out[0] = df.columns.get_loc(col_name)
+    except KeyError:
+        raise KeyError(
+            f'Bad argument `{arg_name}`: ' +
+            f'Column {col_name!r} not found in the dataframe.')
+
+
+# The values -2 and -1 are safe to use as a sentinel because the TimestampNanos
+# type already validates that the value is >= 0.
+cdef int64_t _AT_IS_SERVER_NOW = -2
+cdef int64_t _AT_IS_SET_BY_COLUMN = -1
+
+
+cdef str _SUPPORTED_DATETIMES = 'datetime64[ns] or datetime64[ns, tz]'
+
+
+cdef object _dataframe_is_supported_datetime(object dtype):
+    if (isinstance(dtype, _NUMPY_DATETIME64_NS) and
+            (str(dtype) == 'datetime64[ns]')):
+        return True
+    if isinstance(dtype, _PANDAS.DatetimeTZDtype):
+        return dtype.unit == 'ns'
+    return False
+
+
+cdef ssize_t _dataframe_resolve_at(
+        object df,
+        col_t_arr* cols,
+        object at,
+        size_t col_count,
+        int64_t* at_value_out) except -2:
+    cdef size_t col_index
+    cdef object dtype
+    cdef PandasCol pandas_col
+    cdef TimestampNanos at_nanos
+    if at is None:
+        at_value_out[0] = _AT_IS_SERVER_NOW
+        return -1
+    elif isinstance(at, TimestampNanos):
+        at_nanos = at
+        at_value_out[0] = at_nanos._value
+        return -1
+    elif isinstance(at, datetime):
+        if at.timestamp() < 0:
+            raise ValueError(
+                'Bad argument `at`: Cannot use a datetime before the ' +
+                'Unix epoch (1970-01-01 00:00:00).')
+        at_value_out[0] = datetime_to_nanos(at)
+        return -1
+    elif isinstance(at, str):
+        _dataframe_get_loc(df, at, 'at', &col_index)
+    elif isinstance(at, int):
+        _bind_col_index('at', at, col_count, &col_index)
+    else:
+        raise TypeError(
+            f'Bad argument `at`: Unsupported type {_fqn(type(at))}. ' +
+            'Must be one of: None, TimestampNanos, datetime, ' +
+            'int (column index), str (colum name)')
+    dtype = df.dtypes[col_index]
+    if _dataframe_is_supported_datetime(dtype):
+        at_value_out[0] = _AT_IS_SET_BY_COLUMN
+        col = &cols.d[col_index]
+        col.setup.meta_target = meta_target_t.meta_target_at
+        return col_index
+    else:
+        raise TypeError(
+            f'Bad argument `at`: Bad dtype `{dtype}` ' +
+            f'for the {at!r} column: Must be a {_SUPPORTED_DATETIMES} column.')
+
+
+cdef void_int _dataframe_alloc_chunks(
+        size_t n_chunks, col_t* col) except -1:
+    col.setup.chunks.n_chunks = n_chunks
+    col.setup.chunks.chunks = <ArrowArray*>calloc(
+        col.setup.chunks.n_chunks + 1,  # See `_dataframe_col_advance` on why +1.
+        sizeof(ArrowArray))
+    if col.setup.chunks.chunks == NULL:
+        raise MemoryError()
+
+
+cdef void _dataframe_free_mapped_arrow(ArrowArray* arr):
+    free(arr.buffers)
+    arr.buffers = NULL
+    arr.release = NULL
+
+
+cdef void_int _dataframe_series_as_pybuf(
+        PandasCol pandas_col, col_t* col, str fallback_dtype=None) except -1:
+    cdef object nparr = pandas_col.series.to_numpy(dtype=fallback_dtype)
+    cdef ArrowArray* mapped
+    cdef int get_buf_ret
+    if not PyObject_CheckBuffer(nparr):
+        raise TypeError(
+            f'Bad column {pandas_col.name!r}: Expected a buffer, got ' +
+            f'{pandas_col.series!r} ({_fqn(type(pandas_col.series))})')
+    try:
+        # Note! We don't need to support numpy strides since Pandas doesn't.
+        # Also note that this guarantees a 1D buffer.
+        get_buf_ret = PyObject_GetBuffer(nparr, &col.setup.pybuf, PyBUF_SIMPLE)
+    except ValueError as ve:
+        raise IngressError(
+            IngressErrorCode.BadDataFrame,
+            f'Bad column {pandas_col.name!r}: {ve}') from ve
+    except BufferError as be:
+        raise IngressError(
+            IngressErrorCode.BadDataFrame,
+            f'Bad column {pandas_col.name!r}: Expected a buffer, got ' +
+            f'{pandas_col.series!r} ({_fqn(type(pandas_col.series))})') from be
+    _dataframe_alloc_chunks(1, col)
+    mapped = &col.setup.chunks.chunks[0]
+
+    # Total number of elements.
+    mapped.length = (
+        <int64_t>col.setup.pybuf.len // <int64_t>col.setup.pybuf.itemsize)
+    mapped.null_count = 0
+    mapped.offset = 0
+    mapped.n_buffers = 2
+    mapped.n_children = 0
+    mapped.buffers = <const void**>calloc(2, sizeof(const void*))
+    mapped.buffers[0] = NULL
+    mapped.buffers[1] = <const void*>col.setup.pybuf.buf
+    mapped.children = NULL
+    mapped.dictionary = NULL
+    mapped.release = _dataframe_free_mapped_arrow  # to cleanup allocated array.
+
+
+cdef void_int _dataframe_series_as_arrow(
+        PandasCol pandas_col,
+        col_t* col) except -1:
+    cdef object array
+    cdef list chunks
+    cdef size_t n_chunks
+    cdef size_t chunk_index
+    array = _PYARROW.Array.from_pandas(pandas_col.series)
+    if isinstance(array, _PYARROW.ChunkedArray):
+        chunks = array.chunks
+    else:
+        chunks = [array]
+
+    n_chunks = len(chunks)
+    _dataframe_alloc_chunks(n_chunks, col)
+
+    for chunk_index in range(n_chunks):
+        array = chunks[chunk_index]
+        if chunk_index == 0:
+            chunks[chunk_index]._export_to_c(
+                <uintptr_t>&col.setup.chunks.chunks[chunk_index],
+                <uintptr_t>&col.setup.arrow_schema)
+        else:
+            chunks[chunk_index]._export_to_c(
+                <uintptr_t>&col.setup.chunks.chunks[chunk_index])
+    
+
+cdef const char* _ARROW_FMT_INT8 = "c"
+cdef const char* _ARROW_FMT_INT16 = "s"
+cdef const char* _ARROW_FMT_INT32 = "i"
+cdef const char* _ARROW_FMT_SML_STR = "u"
+
+
+cdef void_int _dataframe_category_series_as_arrow(
+        PandasCol pandas_col, col_t* col) except -1:
+    cdef const char* format
+    _dataframe_series_as_arrow(pandas_col, col)
+    format = col.setup.arrow_schema.format
+    if strncmp(format, _ARROW_FMT_INT8, 1) == 0:
+        col.setup.source = col_source_t.col_source_str_i8_cat
+    elif strncmp(format, _ARROW_FMT_INT16, 1) == 0:
+        col.setup.source = col_source_t.col_source_str_i16_cat
+    elif strncmp(format, _ARROW_FMT_INT32, 1) == 0:
+        col.setup.source = col_source_t.col_source_str_i32_cat
+    else:
+        raise IngressError(
+            IngressErrorCode.BadDataFrame,
+            f'Bad column {pandas_col.name!r}: ' +
+            'Unsupported arrow category index type. ' +
+            f'Got {(<bytes>format).decode("utf-8")!r}.')
+    
+    format = col.setup.arrow_schema.dictionary.format
+    if strncmp(format, _ARROW_FMT_SML_STR, 1) != 0:
+        raise IngressError(
+            IngressErrorCode.BadDataFrame,
+            f'Bad column {pandas_col.name!r}: ' +
+            'Expected a category of strings, ' +
+            f'got a category of {pandas_col.series.dtype.categories.dtype}.')
+
+
+cdef inline bint _dataframe_is_float_nan(PyObject* obj):
+    return PyFloat_CheckExact(obj) and isnan(PyFloat_AS_DOUBLE(obj))
+
+
+cdef inline bint _dataframe_is_null_pyobj(PyObject* obj):
+    return (
+        (obj == Py_None) or
+        (obj == <PyObject*>_PANDAS_NA) or
+        _dataframe_is_float_nan(obj))
+
+
+cdef void_int _dataframe_series_sniff_pyobj(
+        PandasCol pandas_col, col_t* col) except -1:
+    """
+    Deduct the type of the object column.
+    Object columns can contain pretty much anything, but they usually don't.
+    We make an educated guess by finding the first non-null value in the column.
+    """
+    cdef size_t el_index
+    cdef size_t n_elements = len(pandas_col.series)
+    cdef PyObject** obj_arr
+    cdef PyObject* obj
+    _dataframe_series_as_pybuf(pandas_col, col)
+    obj_arr = <PyObject**>(col.setup.pybuf.buf)
+    for el_index in range(n_elements):
+        obj = obj_arr[el_index]
+        if not _dataframe_is_null_pyobj(obj):
+            if PyBool_Check(obj):
+                col.setup.source = col_source_t.col_source_bool_pyobj
+            elif PyLong_CheckExact(obj):
+                col.setup.source = col_source_t.col_source_int_pyobj
+            elif PyFloat_CheckExact(obj):
+                col.setup.source = col_source_t.col_source_float_pyobj
+            elif PyUnicode_CheckExact(obj):
+                col.setup.source = col_source_t.col_source_str_pyobj
+            elif PyBytes_CheckExact(obj):
+                raise IngressError(
+                    IngressErrorCode.BadDataFrame,
+                    f'Bad column {pandas_col.name!r}: ' +
+                    'Unsupported object column containing bytes.' +
+                    'If this is a string column, decode it first. ' +
+                    'See: https://stackoverflow.com/questions/40389764/')
+            else:
+                raise IngressError(
+                    IngressErrorCode.BadDataFrame,
+                    f'Bad column {pandas_col.name!r}: ' +
+                    f'Unsupported object column containing an object of type ' +
+                    _fqn(type(<object>obj)) + '.')
+            return 0
+
+    # We haven't returned yet, so we've hit an object column that
+    # exclusively has null values. We will just skip this column.
+    col.setup.source = col_source_t.col_source_nulls
+    
+
+cdef void_int _dataframe_resolve_source_and_buffers(
+        PandasCol pandas_col, col_t* col) except -1:
+    cdef object dtype = pandas_col.dtype
+    if isinstance(dtype, _NUMPY_BOOL):
+        col.setup.source = col_source_t.col_source_bool_numpy
+        _dataframe_series_as_pybuf(pandas_col, col)
+    elif isinstance(dtype, _PANDAS.BooleanDtype):
+        col.setup.source = col_source_t.col_source_bool_arrow
+        _dataframe_series_as_arrow(pandas_col, col)
+    elif isinstance(dtype, _NUMPY_UINT8):
+        col.setup.source = col_source_t.col_source_u8_numpy
+        _dataframe_series_as_pybuf(pandas_col, col)
+    elif isinstance(dtype, _NUMPY_INT8):
+        col.setup.source = col_source_t.col_source_i8_numpy
+        _dataframe_series_as_pybuf(pandas_col, col)
+    elif isinstance(dtype, _NUMPY_UINT16):
+        col.setup.source = col_source_t.col_source_u16_numpy
+        _dataframe_series_as_pybuf(pandas_col, col)
+    elif isinstance(dtype, _NUMPY_INT16):
+        col.setup.source = col_source_t.col_source_i16_numpy
+        _dataframe_series_as_pybuf(pandas_col, col)
+    elif isinstance(dtype, _NUMPY_UINT32):
+        col.setup.source = col_source_t.col_source_u32_numpy
+        _dataframe_series_as_pybuf(pandas_col, col)
+    elif isinstance(dtype, _NUMPY_INT32):
+        col.setup.source = col_source_t.col_source_i32_numpy
+        _dataframe_series_as_pybuf(pandas_col, col)
+    elif isinstance(dtype, _NUMPY_UINT64):
+        col.setup.source = col_source_t.col_source_u64_numpy
+        _dataframe_series_as_pybuf(pandas_col, col)
+    elif isinstance(dtype, _NUMPY_INT64):
+        col.setup.source = col_source_t.col_source_i64_numpy
+        _dataframe_series_as_pybuf(pandas_col, col)
+    elif isinstance(dtype, _PANDAS.UInt8Dtype):
+        col.setup.source = col_source_t.col_source_u8_arrow
+        _dataframe_series_as_arrow(pandas_col, col)
+    elif isinstance(dtype, _PANDAS.Int8Dtype):
+        col.setup.source = col_source_t.col_source_i8_arrow
+        _dataframe_series_as_arrow(pandas_col, col)
+    elif isinstance(dtype, _PANDAS.UInt16Dtype):
+        col.setup.source = col_source_t.col_source_u16_arrow
+        _dataframe_series_as_arrow(pandas_col, col)
+    elif isinstance(dtype, _PANDAS.Int16Dtype):
+        col.setup.source = col_source_t.col_source_i16_arrow
+        _dataframe_series_as_arrow(pandas_col, col)
+    elif isinstance(dtype, _PANDAS.UInt32Dtype):
+        col.setup.source = col_source_t.col_source_u32_arrow
+        _dataframe_series_as_arrow(pandas_col, col)
+    elif isinstance(dtype, _PANDAS.Int32Dtype):
+        col.setup.source = col_source_t.col_source_i32_arrow
+        _dataframe_series_as_arrow(pandas_col, col)
+    elif isinstance(dtype, _PANDAS.UInt64Dtype):
+        col.setup.source = col_source_t.col_source_u64_arrow
+        _dataframe_series_as_arrow(pandas_col, col)
+    elif isinstance(dtype, _PANDAS.Int64Dtype):
+        col.setup.source = col_source_t.col_source_i64_arrow
+        _dataframe_series_as_arrow(pandas_col, col)
+    elif isinstance(dtype, _NUMPY_FLOAT32):
+        col.setup.source = col_source_t.col_source_f32_numpy
+        _dataframe_series_as_pybuf(pandas_col, col)
+    elif isinstance(dtype, _NUMPY_FLOAT64):
+        col.setup.source = col_source_t.col_source_f64_numpy
+        _dataframe_series_as_pybuf(pandas_col, col)
+    elif isinstance(dtype, _PANDAS.Float32Dtype):
+        col.setup.source = col_source_t.col_source_f32_arrow
+        _dataframe_series_as_arrow(pandas_col, col)
+    elif isinstance(dtype, _PANDAS.Float64Dtype):
+        col.setup.source = col_source_t.col_source_f64_arrow
+        _dataframe_series_as_arrow(pandas_col, col)
+    elif isinstance(dtype, _PANDAS.StringDtype):
+        if dtype.storage == 'pyarrow':
+            col.setup.source = col_source_t.col_source_str_arrow
+            _dataframe_series_as_arrow(pandas_col, col)
+        elif dtype.storage == 'python':
+            col.setup.source = col_source_t.col_source_str_pyobj
+            _dataframe_series_as_pybuf(pandas_col, col)
+        else:
+            raise IngressError(
+                IngressErrorCode.BadDataFrame,
+                f'Unknown string dtype storage: f{dtype.storage} ' +
+                f'for column {pandas_col.name} of dtype {dtype}.')
+    elif isinstance(dtype, _PANDAS.CategoricalDtype):
+        _dataframe_category_series_as_arrow(pandas_col, col)
+    elif (isinstance(dtype, _NUMPY_DATETIME64_NS) and
+            _dataframe_is_supported_datetime(dtype)):
+        col.setup.source = col_source_t.col_source_dt64ns_numpy
+        _dataframe_series_as_pybuf(pandas_col, col)
+    elif (isinstance(dtype, _PANDAS.DatetimeTZDtype) and
+            _dataframe_is_supported_datetime(dtype)):
+        col.setup.source = col_source_t.col_source_dt64ns_tz_arrow
+        _dataframe_series_as_arrow(pandas_col, col)
+    elif isinstance(dtype, _NUMPY_OBJECT):
+        _dataframe_series_sniff_pyobj(pandas_col, col)
+    else:
+        raise IngressError(
+            IngressErrorCode.BadDataFrame,
+            f'Unsupported dtype {dtype} for column {pandas_col.name!r}. ' +
+            'Raise an issue if you think it should be supported: ' +
+            'https://github.com/questdb/py-questdb-client/issues.')
+
+
+cdef void_int _dataframe_resolve_target(
+        PandasCol pandas_col, col_t* col) except -1:
+    cdef col_target_t target
+    cdef set target_sources
+    if col.setup.meta_target in _DIRECT_META_TARGETS:
+        col.setup.target = <col_target_t><int>col.setup.meta_target
+        return 0
+    for target in _FIELD_TARGETS:
+        target_sources = _TARGET_TO_SOURCES[target]
+        if col.setup.source in target_sources:
+            col.setup.target = target
+            return 0
+    raise IngressError(
+        IngressErrorCode.BadDataFrame,
+        f'Could not map column source type (code {col.setup.source} for ' +
+        f'column {pandas_col.name!r} ' +
+        f' ({pandas_col.dtype}) to any ILP type.')
+
+
+cdef void _dataframe_init_cursor(col_t* col):
+    col.cursor.chunk = col.setup.chunks.chunks
+    col.cursor.chunk_index = 0
+    col.cursor.offset = col.cursor.chunk.offset
+
+
+cdef void_int _dataframe_resolve_cols(
+        qdb_pystr_buf* b,
+        list pandas_cols,
+        col_t_arr* cols,
+        bint* any_cols_need_gil_out) except -1:
+    cdef size_t index
+    cdef size_t len_dataframe_cols = len(pandas_cols)
+    cdef PandasCol pandas_col
+    cdef col_t* col
+    any_cols_need_gil_out[0] = False
+    for index in range(len_dataframe_cols):
+        pandas_col = pandas_cols[index]
+        col = &cols.d[index]
+
+        # The target is resolved in stages:
+        # * We first assign all column `.meta_target`s to be fields.
+        # * Then, depending on argument parsing some/none of the columns
+        #   obtain a meta-target of "table", "symbol" or "at".
+        # * Finally, based on the source, any remaining "meta_target_field"
+        #   columns are converted to the appropriate target.
+        #   See: _dataframe_resolve_col_targets_and_dc(..).
+        col.setup.meta_target = meta_target_t.meta_target_field
+
+        # We will sort columns later. The index will be used to achieve a stable
+        # sort among columns with the same `.meta_target`.
+        col.setup.orig_index = index
+
+        _dataframe_resolve_source_and_buffers(pandas_col, col)
+        _dataframe_init_cursor(col)
+        if col_source_needs_gil(col.setup.source):
+            any_cols_need_gil_out[0] = True
+
+
+cdef void_int _dataframe_resolve_cols_target_name_and_dc(
+        qdb_pystr_buf* b,
+        list pandas_cols,
+        col_t_arr* cols) except -1:
+    cdef size_t index
+    cdef col_t* col
+    cdef PandasCol pandas_col
+    for index in range(cols.size):
+        col = &cols.d[index]
+        pandas_col = pandas_cols[index]
+        _dataframe_resolve_target(pandas_col, col)
+        if col.setup.source not in _TARGET_TO_SOURCES[col.setup.target]:
+            raise ValueError(
+                f'Bad value: Column {pandas_col.name!r} ' +
+                f'({pandas_col.dtype}) is not ' +
+                f'supported as a {_TARGET_NAMES[col.setup.target]} column.')
+        col.dispatch_code = <col_dispatch_code_t>(
+            <int>col.setup.source + <int>col.setup.target)
+
+        # Since we don't need to send the column names for 'table' and
+        # 'at' columns, we don't need to validate and encode them as
+        # column names. This allows unsupported names for these columns.
+        if ((col.setup.meta_target != meta_target_t.meta_target_table) and
+                (col.setup.meta_target != meta_target_t.meta_target_at)):
+            str_to_column_name_copy(b, pandas_col.name, &col.name)
+
+
+cdef int _dataframe_compare_cols(const void* lhs, const void* rhs) nogil:
+    cdef col_t* lhs_col = <col_t*>lhs
+    cdef col_t* rhs_col = <col_t*>rhs
+    cdef int source_diff = lhs_col.setup.meta_target - rhs_col.setup.meta_target
+    if source_diff != 0:
+        return source_diff
+    return <int>lhs_col.setup.orig_index - <int>rhs_col.setup.orig_index
+
+
+cdef void_int _dataframe_resolve_args(
+        object df,
+        object table_name,
+        object table_name_col,
+        object symbols,
+        object at,
+        qdb_pystr_buf* b,
+        size_t col_count,
+        line_sender_table_name* c_table_name_out,
+        int64_t* at_value_out,
+        col_t_arr* cols,
+        bint* any_cols_need_gil_out) except -1:
+    cdef ssize_t name_col
+    cdef ssize_t at_col
+
+    cdef list pandas_cols = [
+        PandasCol(name, df.dtypes[index], series)
+        for index, (name, series) in enumerate(df.items())]
+    _dataframe_resolve_cols(b, pandas_cols, cols, any_cols_need_gil_out)
+    name_col = _dataframe_resolve_table_name(
+        b,
+        df,
+        pandas_cols,
+        cols,
+        table_name,
+        table_name_col,
+        col_count,
+        c_table_name_out)
+    at_col = _dataframe_resolve_at(df, cols, at, col_count, at_value_out)
+    _dataframe_resolve_symbols(df, pandas_cols, cols, name_col, at_col, symbols)
+    _dataframe_resolve_cols_target_name_and_dc(b, pandas_cols, cols)
+    qsort(cols.d, col_count, sizeof(col_t), _dataframe_compare_cols)
+
+
+cdef inline bint _dataframe_arrow_get_bool(col_cursor_t* cursor):
+    return (
+        (<uint8_t*>cursor.chunk.buffers[1])[cursor.offset // 8] &
+        (1 << (cursor.offset % 8)))
+
+
+cdef inline bint _dataframe_arrow_is_valid(col_cursor_t* cursor):
+    """Check if the value is set according to the validity bitmap."""
+    return (
+        cursor.chunk.null_count == 0 or
+        (
+            (<uint8_t*>cursor.chunk.buffers[0])[cursor.offset // 8] &
+            (1 << (cursor.offset % 8))))
+
+
+cdef inline void _dataframe_arrow_get_cat_value(
+        col_cursor_t* cursor, 
+        size_t key,
+        size_t* len_out,
+        const char** buf_out):
+    cdef int32_t* value_index_access
+    cdef int32_t value_begin
+    cdef uint8_t* value_char_access
+    value_index_access = <int32_t*>cursor.chunk.dictionary.buffers[1]
+    value_begin = value_index_access[key]
+    len_out[0] = value_index_access[key + 1] - value_begin
+    value_char_access = <uint8_t*>cursor.chunk.dictionary.buffers[2]
+    buf_out[0] = <const char*>&value_char_access[value_begin]
+
+
+cdef inline bint _dataframe_arrow_get_cat_i8(
+        col_cursor_t* cursor, size_t* len_out, const char** buf_out):
+    cdef bint valid = _dataframe_arrow_is_valid(cursor)
+    cdef int8_t* key_access
+    cdef int8_t key
+    if valid:
+        key_access = <int8_t*>cursor.chunk.buffers[1]
+        key = key_access[cursor.offset]
+        _dataframe_arrow_get_cat_value(cursor, <size_t>key, len_out, buf_out)
+    return valid
+
+
+cdef inline bint _dataframe_arrow_get_cat_i16(
+        col_cursor_t* cursor, size_t* len_out, const char** buf_out):
+    cdef bint valid = _dataframe_arrow_is_valid(cursor)
+    cdef int16_t* key_access
+    cdef int16_t key
+    if valid:
+        key_access = <int16_t*>cursor.chunk.buffers[1]
+        key = key_access[cursor.offset]
+        _dataframe_arrow_get_cat_value(cursor, <size_t>key, len_out, buf_out)
+    return valid
+
+
+cdef inline bint _dataframe_arrow_get_cat_i32(
+        col_cursor_t* cursor, size_t* len_out, const char** buf_out):
+    cdef bint valid = _dataframe_arrow_is_valid(cursor)
+    cdef int32_t* key_access
+    cdef int32_t key
+    if valid:
+        key_access = <int32_t*>cursor.chunk.buffers[1]
+        key = key_access[cursor.offset]
+        _dataframe_arrow_get_cat_value(cursor, <size_t>key, len_out, buf_out)
+    return valid
+
+
+cdef inline bint _dataframe_arrow_str(
+        col_cursor_t* cursor,
+        size_t* len_out,
+        const char** buf_out):
+    cdef int32_t* index_access
+    cdef uint8_t* char_access
+    cdef int32_t begin
+    cdef bint valid = _dataframe_arrow_is_valid(cursor)
+    if valid:
+        index_access = <int32_t*>cursor.chunk.buffers[1]
+        char_access = <uint8_t*>cursor.chunk.buffers[2]
+        begin = index_access[cursor.offset]
+        len_out[0] = index_access[cursor.offset + 1] - begin
+        buf_out[0] = <const char*>&char_access[begin]
+    return valid
+
+
+cdef inline void_int _dataframe_cell_str_pyobj_to_utf8(
+        qdb_pystr_buf* b,
+        col_cursor_t* cursor,
+        bint* valid_out,
+        line_sender_utf8* utf8_out) except -1: 
+    cdef PyObject** access = <PyObject**>cursor.chunk.buffers[1]
+    cdef PyObject* cell = access[cursor.offset]
+    if PyUnicode_CheckExact(cell):
+        str_to_utf8(b, cell, utf8_out)
+        valid_out[0] = True
+    elif _dataframe_is_null_pyobj(cell):
+        valid_out[0] = False
+    else:
+        raise ValueError(
+            'Expected a string, ' +
+            f'got an object of type {_fqn(type(<object>cell))}.')
+
+
+cdef void_int _dataframe_serialize_cell_table__str_pyobj(
+        line_sender_buffer* ls_buf,
+        qdb_pystr_buf* b,
+        col_t* col) except -1:
+    cdef line_sender_error* err = NULL
+    cdef PyObject** access = <PyObject**>col.cursor.chunk.buffers[1]
+    cdef PyObject* cell = access[col.cursor.offset]
+    cdef line_sender_table_name c_table_name
+    if not PyUnicode_CheckExact(cell):
+        if _dataframe_is_null_pyobj(cell):
+            raise ValueError('Expected a table name, got a null value')
+        else:
+            raise ValueError(
+                'Expected a table name (str object), ' +
+                f'got an object of type {_fqn(type(<object>cell))}.')
+    str_to_table_name(b, cell, &c_table_name)
+    if not line_sender_buffer_table(ls_buf, c_table_name, &err):
+        raise c_err_to_py(err)
+
+
+cdef void_int _dataframe_serialize_cell_table__str_arrow(
+        line_sender_buffer* ls_buf,
+        qdb_pystr_buf* b,
+        col_t* col,
+        PyThreadState** gs) except -1:
+    cdef line_sender_error* err = NULL
+    cdef size_t c_len
+    cdef const char* buf
+    cdef line_sender_table_name c_table_name
+    if _dataframe_arrow_str(&col.cursor, &c_len, &buf):
+        if not line_sender_table_name_init(&c_table_name, c_len, buf, &err):
+            _ensure_has_gil(gs)
+            raise c_err_to_py(err)
+        if not line_sender_buffer_table(ls_buf, c_table_name, &err):
+            _ensure_has_gil(gs)
+            raise c_err_to_py(err)
+    else:
+        _ensure_has_gil(gs)
+        raise ValueError('Table name cannot be null')
+
+
+cdef void_int _dataframe_serialize_cell_table__str_i8_cat(
+        line_sender_buffer* ls_buf,
+        qdb_pystr_buf* b,
+        col_t* col,
+        PyThreadState** gs) except -1:
+    cdef line_sender_error* err = NULL
+    cdef size_t c_len
+    cdef const char* c_buf
+    cdef line_sender_table_name c_table_name
+    if _dataframe_arrow_get_cat_i8(&col.cursor, &c_len, &c_buf):
+        if not line_sender_table_name_init(&c_table_name, c_len, c_buf, &err):
+            _ensure_has_gil(gs)
+            raise c_err_to_py(err)
+        if not line_sender_buffer_table(ls_buf, c_table_name, &err):
+            _ensure_has_gil(gs)
+            raise c_err_to_py(err)
+    else:
+        _ensure_has_gil(gs)
+        raise ValueError('Table name cannot be null')
+
+
+cdef void_int _dataframe_serialize_cell_table__str_i16_cat(
+        line_sender_buffer* ls_buf,
+        qdb_pystr_buf* b,
+        col_t* col,
+        PyThreadState** gs) except -1:
+    cdef line_sender_error* err = NULL
+    cdef size_t c_len
+    cdef const char* c_buf
+    cdef line_sender_table_name c_table_name
+    if _dataframe_arrow_get_cat_i16(&col.cursor, &c_len, &c_buf):
+        if not line_sender_table_name_init(&c_table_name, c_len, c_buf, &err):
+            _ensure_has_gil(gs)
+            raise c_err_to_py(err)
+        if not line_sender_buffer_table(ls_buf, c_table_name, &err):
+            _ensure_has_gil(gs)
+            raise c_err_to_py(err)
+    else:
+        _ensure_has_gil(gs)
+        raise ValueError('Table name cannot be null')
+
+
+cdef void_int _dataframe_serialize_cell_table__str_i32_cat(
+        line_sender_buffer* ls_buf,
+        qdb_pystr_buf* b,
+        col_t* col,
+        PyThreadState** gs) except -1:
+    cdef line_sender_error* err = NULL
+    cdef size_t c_len
+    cdef const char* c_buf
+    cdef line_sender_table_name c_table_name
+    if _dataframe_arrow_get_cat_i32(&col.cursor, &c_len, &c_buf):
+        if not line_sender_table_name_init(&c_table_name, c_len, c_buf, &err):
+            _ensure_has_gil(gs)
+            raise c_err_to_py(err)
+        if not line_sender_buffer_table(ls_buf, c_table_name, &err):
+            _ensure_has_gil(gs)
+            raise c_err_to_py(err)
+    else:
+        _ensure_has_gil(gs)
+        raise ValueError('Table name cannot be null')
+
+
+cdef void_int _dataframe_serialize_cell_symbol__str_pyobj(
+        line_sender_buffer* ls_buf,
+        qdb_pystr_buf* b,
+        col_t* col) except -1:
+    cdef line_sender_error* err = NULL
+    cdef bint valid = False
+    cdef line_sender_utf8 utf8
+    _dataframe_cell_str_pyobj_to_utf8(b, &col.cursor, &valid, &utf8)
+    if valid and not line_sender_buffer_symbol(ls_buf, col.name, utf8, &err):
+        raise c_err_to_py(err)
+
+
+cdef void_int _dataframe_serialize_cell_symbol__str_arrow(
+        line_sender_buffer* ls_buf,
+        qdb_pystr_buf* b,
+        col_t* col,
+        PyThreadState** gs) except -1:
+    cdef line_sender_error* err = NULL
+    cdef line_sender_utf8 utf8
+    if _dataframe_arrow_str(&col.cursor, &utf8.len, &utf8.buf):
+        if not line_sender_buffer_symbol(ls_buf, col.name, utf8, &err):
+            _ensure_has_gil(gs)
+            raise c_err_to_py(err)
+
+
+cdef void_int _dataframe_serialize_cell_symbol__str_i8_cat(
+        line_sender_buffer* ls_buf,
+        qdb_pystr_buf* b,
+        col_t* col,
+        PyThreadState** gs) except -1:
+    cdef line_sender_error* err = NULL
+    cdef line_sender_utf8 utf8
+    if _dataframe_arrow_get_cat_i8(&col.cursor, &utf8.len, &utf8.buf):
+        if not line_sender_buffer_symbol(ls_buf, col.name, utf8, &err):
+            _ensure_has_gil(gs)
+            raise c_err_to_py(err)
+
+
+cdef void_int _dataframe_serialize_cell_symbol__str_i16_cat(
+        line_sender_buffer* ls_buf,
+        qdb_pystr_buf* b,
+        col_t* col,
+        PyThreadState** gs) except -1:
+    cdef line_sender_error* err = NULL
+    cdef line_sender_utf8 utf8
+    if _dataframe_arrow_get_cat_i16(&col.cursor, &utf8.len, &utf8.buf):
+        if not line_sender_buffer_symbol(ls_buf, col.name, utf8, &err):
+            _ensure_has_gil(gs)
+            raise c_err_to_py(err)
+
+
+cdef void_int _dataframe_serialize_cell_symbol__str_i32_cat(
+        line_sender_buffer* ls_buf,
+        qdb_pystr_buf* b,
+        col_t* col,
+        PyThreadState** gs) except -1:
+    cdef line_sender_error* err = NULL
+    cdef line_sender_utf8 utf8
+    if _dataframe_arrow_get_cat_i32(&col.cursor, &utf8.len, &utf8.buf):
+        if not line_sender_buffer_symbol(ls_buf, col.name, utf8, &err):
+            _ensure_has_gil(gs)
+            raise c_err_to_py(err)
+
+
+cdef void_int _dataframe_serialize_cell_column_bool__bool_pyobj(
+        line_sender_buffer* ls_buf,
+        qdb_pystr_buf* b,
+        col_t* col) except -1:
+    cdef line_sender_error* err = NULL
+    cdef PyObject** access = <PyObject**>col.cursor.chunk.buffers[1]
+    cdef PyObject* cell = access[col.cursor.offset]
+    if PyBool_Check(cell):
+        if not line_sender_buffer_column_bool(
+                ls_buf, col.name, cell == Py_True, &err):
+            raise c_err_to_py(err)
+    elif _dataframe_is_null_pyobj(cell):
+        raise ValueError('Cannot insert null values into a boolean column.')
+    else:
+        raise ValueError(
+            'Expected an object of type bool, got a ' +
+            _fqn(type(<object>cell)) + '.')
+
+
+cdef void_int _dataframe_serialize_cell_column_bool__bool_numpy(
+        line_sender_buffer* ls_buf,
+        qdb_pystr_buf* b,
+        col_t* col,
+        PyThreadState** gs) except -1:
+    cdef line_sender_error* err = NULL
+    cdef uint8_t* access = <uint8_t*>col.cursor.chunk.buffers[1]
+    cdef uint8_t cell = access[col.cursor.offset]
+    if not line_sender_buffer_column_bool(ls_buf, col.name, not not cell, &err):
+        _ensure_has_gil(gs)
+        raise c_err_to_py(err)
+
+
+cdef void_int _dataframe_serialize_cell_column_bool__bool_arrow(
+        line_sender_buffer* ls_buf,
+        qdb_pystr_buf* b,
+        col_t* col,
+        PyThreadState** gs) except -1:
+    cdef line_sender_error* err = NULL
+    cdef bint valid = _dataframe_arrow_is_valid(&col.cursor)
+    cdef bint value
+    if valid:
+        value = _dataframe_arrow_get_bool(&col.cursor)
+        if not line_sender_buffer_column_bool(ls_buf, col.name, value, &err):
+            _ensure_has_gil(gs)
+            raise c_err_to_py(err)
+    else:
+        _ensure_has_gil(gs)
+        raise ValueError('Cannot insert null values into a boolean column.')
+
+
+cdef void_int _dataframe_serialize_cell_column_i64__int_pyobj(
+        line_sender_buffer* ls_buf,
+        qdb_pystr_buf* b,
+        col_t* col) except -1:
+    cdef line_sender_error* err = NULL
+    cdef PyObject** access = <PyObject**>col.cursor.chunk.buffers[1]
+    cdef PyObject* cell = access[col.cursor.offset]
+    cdef int64_t value
+    if PyLong_CheckExact(cell):
+        value = PyLong_AsLongLong(cell)
+        if not line_sender_buffer_column_i64(ls_buf, col.name, value, &err):
+            raise c_err_to_py(err)
+    elif _dataframe_is_null_pyobj(cell):
+        pass
+    else:
+        raise ValueError(
+            'Expected an object of type int, got an object of type ' +
+            _fqn(type(<object>cell)) + '.')
+
+
+cdef void_int _dataframe_serialize_cell_column_i64__u8_numpy(
+        line_sender_buffer* ls_buf,
+        qdb_pystr_buf* b,
+        col_t* col,
+        PyThreadState** gs) except -1:
+    cdef line_sender_error* err = NULL
+    cdef uint8_t* access = <uint8_t*>col.cursor.chunk.buffers[1]
+    cdef uint8_t cell = access[col.cursor.offset]
+    if not line_sender_buffer_column_i64(ls_buf, col.name, <int64_t>cell, &err):
+        _ensure_has_gil(gs)
+        raise c_err_to_py(err)
+
+
+cdef void_int _dataframe_serialize_cell_column_i64__i8_numpy(
+        line_sender_buffer* ls_buf,
+        qdb_pystr_buf* b,
+        col_t* col,
+        PyThreadState** gs) except -1:
+    cdef line_sender_error* err = NULL
+    cdef int8_t* access = <int8_t*>col.cursor.chunk.buffers[1]
+    cdef int8_t cell = access[col.cursor.offset]
+    if not line_sender_buffer_column_i64(ls_buf, col.name, <int64_t>cell, &err):
+        _ensure_has_gil(gs)
+        raise c_err_to_py(err)
+
+
+cdef void_int _dataframe_serialize_cell_column_i64__u16_numpy(
+        line_sender_buffer* ls_buf,
+        qdb_pystr_buf* b,
+        col_t* col,
+        PyThreadState** gs) except -1:
+    cdef line_sender_error* err = NULL
+    cdef uint16_t* access = <uint16_t*>col.cursor.chunk.buffers[1]
+    cdef uint16_t cell = access[col.cursor.offset]
+    if not line_sender_buffer_column_i64(ls_buf, col.name, <int64_t>cell, &err):
+        _ensure_has_gil(gs)
+        raise c_err_to_py(err)
+
+
+cdef void_int _dataframe_serialize_cell_column_i64__i16_numpy(
+        line_sender_buffer* ls_buf,
+        qdb_pystr_buf* b,
+        col_t* col,
+        PyThreadState** gs) except -1:
+    cdef line_sender_error* err = NULL
+    cdef int16_t* access = <int16_t*>col.cursor.chunk.buffers[1]
+    cdef int16_t cell = access[col.cursor.offset]
+    if not line_sender_buffer_column_i64(ls_buf, col.name, <int64_t>cell, &err):
+        _ensure_has_gil(gs)
+        raise c_err_to_py(err)
+
+
+cdef void_int _dataframe_serialize_cell_column_i64__u32_numpy(
+        line_sender_buffer* ls_buf,
+        qdb_pystr_buf* b,
+        col_t* col,
+        PyThreadState** gs) except -1:
+    cdef line_sender_error* err = NULL
+    cdef uint32_t* access = <uint32_t*>col.cursor.chunk.buffers[1]
+    cdef uint32_t cell = access[col.cursor.offset]
+    if not line_sender_buffer_column_i64(ls_buf, col.name, <int64_t>cell, &err):
+        _ensure_has_gil(gs)
+        raise c_err_to_py(err)
+
+
+cdef void_int _dataframe_serialize_cell_column_i64__i32_numpy(
+        line_sender_buffer* ls_buf,
+        qdb_pystr_buf* b,
+        col_t* col,
+        PyThreadState** gs) except -1:
+    cdef line_sender_error* err = NULL
+    cdef int32_t* access = <int32_t*>col.cursor.chunk.buffers[1]
+    cdef int32_t cell = access[col.cursor.offset]
+    if not line_sender_buffer_column_i64(ls_buf, col.name, <int64_t>cell, &err):
+        _ensure_has_gil(gs)
+        raise c_err_to_py(err)
+
+
+cdef void_int _dataframe_serialize_cell_column_i64__u64_numpy(
+        line_sender_buffer* ls_buf,
+        qdb_pystr_buf* b,
+        col_t* col,
+        PyThreadState** gs) except -1:
+    cdef line_sender_error* err = NULL
+    cdef uint64_t* access = <uint64_t*>col.cursor.chunk.buffers[1]
+    cdef uint64_t cell = access[col.cursor.offset]
+    if cell > <uint64_t>INT64_MAX:
+        _ensure_has_gil(gs)
+        raise OverflowError('uint64 value too large for int64 column type.')
+    if not line_sender_buffer_column_i64(ls_buf, col.name, <int64_t>cell, &err):
+        _ensure_has_gil(gs)
+        raise c_err_to_py(err)
+
+
+cdef void_int _dataframe_serialize_cell_column_i64__i64_numpy(
+        line_sender_buffer* ls_buf,
+        qdb_pystr_buf* b,
+        col_t* col,
+        PyThreadState** gs) except -1:
+    cdef line_sender_error* err = NULL
+    cdef int64_t* access = <int64_t*>col.cursor.chunk.buffers[1]
+    cdef int64_t cell = access[col.cursor.offset]
+    if not line_sender_buffer_column_i64(ls_buf, col.name, cell, &err):
+        _ensure_has_gil(gs)
+        raise c_err_to_py(err)
+
+
+cdef void_int _dataframe_serialize_cell_column_i64__u8_arrow(
+        line_sender_buffer* ls_buf,
+        qdb_pystr_buf* b,
+        col_t* col,
+        PyThreadState** gs) except -1:
+    cdef line_sender_error* err = NULL
+    cdef bint valid = _dataframe_arrow_is_valid(&col.cursor)
+    cdef uint8_t* access
+    if valid:
+        access = <uint8_t*>col.cursor.chunk.buffers[1]
+        if not line_sender_buffer_column_i64(
+                ls_buf,
+                col.name,
+                <int64_t>access[col.cursor.offset],
+                &err):
+            _ensure_has_gil(gs)
+            raise c_err_to_py(err)
+
+
+cdef void_int _dataframe_serialize_cell_column_i64__i8_arrow(
+        line_sender_buffer* ls_buf,
+        qdb_pystr_buf* b,
+        col_t* col,
+        PyThreadState** gs) except -1:
+    cdef line_sender_error* err = NULL
+    cdef bint valid = _dataframe_arrow_is_valid(&col.cursor)
+    cdef int8_t* access
+    if valid:
+        access = <int8_t*>col.cursor.chunk.buffers[1]
+        if not line_sender_buffer_column_i64(
+                ls_buf,
+                col.name,
+                <int64_t>access[col.cursor.offset],
+                &err):
+            _ensure_has_gil(gs)
+            raise c_err_to_py(err)
+
+
+cdef void_int _dataframe_serialize_cell_column_i64__u16_arrow(
+        line_sender_buffer* ls_buf,
+        qdb_pystr_buf* b,
+        col_t* col,
+        PyThreadState** gs) except -1:
+    cdef line_sender_error* err = NULL
+    cdef bint valid = _dataframe_arrow_is_valid(&col.cursor)
+    cdef uint16_t* access
+    if valid:
+        access = <uint16_t*>col.cursor.chunk.buffers[1]
+        if not line_sender_buffer_column_i64(
+                ls_buf,
+                col.name,
+                <int64_t>access[col.cursor.offset],
+                &err):
+            _ensure_has_gil(gs)
+            raise c_err_to_py(err)
+
+
+cdef void_int _dataframe_serialize_cell_column_i64__i16_arrow(
+        line_sender_buffer* ls_buf,
+        qdb_pystr_buf* b,
+        col_t* col,
+        PyThreadState** gs) except -1:
+    cdef line_sender_error* err = NULL
+    cdef bint valid = _dataframe_arrow_is_valid(&col.cursor)
+    cdef int16_t* access
+    if valid:
+        access = <int16_t*>col.cursor.chunk.buffers[1]
+        if not line_sender_buffer_column_i64(
+                ls_buf,
+                col.name,
+                <int64_t>access[col.cursor.offset],
+                &err):
+            _ensure_has_gil(gs)
+            raise c_err_to_py(err)
+
+
+cdef void_int _dataframe_serialize_cell_column_i64__u32_arrow(
+        line_sender_buffer* ls_buf,
+        qdb_pystr_buf* b,
+        col_t* col,
+        PyThreadState** gs) except -1:
+    cdef line_sender_error* err = NULL
+    cdef bint valid = _dataframe_arrow_is_valid(&col.cursor)
+    cdef uint32_t* access
+    if valid:
+        access = <uint32_t*>col.cursor.chunk.buffers[1]
+        if not line_sender_buffer_column_i64(
+                ls_buf,
+                col.name,
+                <int64_t>access[col.cursor.offset],
+                &err):
+            _ensure_has_gil(gs)
+            raise c_err_to_py(err)
+
+
+cdef void_int _dataframe_serialize_cell_column_i64__i32_arrow(
+        line_sender_buffer* ls_buf,
+        qdb_pystr_buf* b,
+        col_t* col,
+        PyThreadState** gs) except -1:
+    cdef line_sender_error* err = NULL
+    cdef bint valid = _dataframe_arrow_is_valid(&col.cursor)
+    cdef int32_t* access
+    if valid:
+        access = <int32_t*>col.cursor.chunk.buffers[1]
+        if not line_sender_buffer_column_i64(
+                ls_buf,
+                col.name,
+                <int64_t>access[col.cursor.offset],
+                &err):
+            _ensure_has_gil(gs)
+            raise c_err_to_py(err)
+
+
+cdef void_int _dataframe_serialize_cell_column_i64__u64_arrow(
+        line_sender_buffer* ls_buf,
+        qdb_pystr_buf* b,
+        col_t* col,
+        PyThreadState** gs) except -1:
+    cdef line_sender_error* err = NULL
+    cdef bint valid = _dataframe_arrow_is_valid(&col.cursor)
+    cdef uint64_t* access
+    cdef uint64_t cell
+    if valid:
+        access = <uint64_t*>col.cursor.chunk.buffers[1]
+        cell = access[col.cursor.offset]
+        if cell > <uint64_t>INT64_MAX:
+            _ensure_has_gil(gs)
+            raise OverflowError('uint64 value too large for int64 column type.')
+        if not line_sender_buffer_column_i64(
+                ls_buf,
+                col.name,
+                <int64_t>cell,
+                &err):
+            _ensure_has_gil(gs)
+            raise c_err_to_py(err)
+
+
+cdef void_int _dataframe_serialize_cell_column_i64__i64_arrow(
+        line_sender_buffer* ls_buf,
+        qdb_pystr_buf* b,
+        col_t* col,
+        PyThreadState** gs) except -1:
+    cdef line_sender_error* err = NULL
+    cdef bint valid = _dataframe_arrow_is_valid(&col.cursor)
+    cdef int64_t* access
+    if valid:
+        access = <int64_t*>col.cursor.chunk.buffers[1]
+        if not line_sender_buffer_column_i64(
+                ls_buf,
+                col.name,
+                access[col.cursor.offset],
+                &err):
+            _ensure_has_gil(gs)
+            raise c_err_to_py(err)
+
+
+cdef void_int _dataframe_serialize_cell_column_f64__float_pyobj(
+        line_sender_buffer* ls_buf,
+        qdb_pystr_buf* b,
+        col_t* col) except -1:
+    cdef line_sender_error* err = NULL
+    cdef PyObject** access = <PyObject**>col.cursor.chunk.buffers[1]
+    cdef PyObject* cell = access[col.cursor.offset]
+    cdef double value
+    if PyFloat_CheckExact(cell):
+        value = PyFloat_AS_DOUBLE(cell)
+        if not line_sender_buffer_column_f64(ls_buf, col.name, value, &err):
+            raise c_err_to_py(err)
+    elif _dataframe_is_null_pyobj(cell):
+        pass
+    else:
+        raise ValueError(
+            'Expected an object of type float, got an object of type ' +
+            _fqn(type(<object>cell)) + '.')
+
+
+cdef void_int _dataframe_serialize_cell_column_f64__f32_numpy(
+        line_sender_buffer* ls_buf,
+        qdb_pystr_buf* b,
+        col_t* col,
+        PyThreadState** gs) except -1:
+    cdef line_sender_error* err = NULL
+    # Note: This is the C `float` type, not the Python `float` type.
+    cdef float* access = <float*>col.cursor.chunk.buffers[1]
+    cdef float cell = access[col.cursor.offset]
+    if not line_sender_buffer_column_f64(ls_buf, col.name, <double>cell, &err):
+        _ensure_has_gil(gs)
+        raise c_err_to_py(err)
+
+
+cdef void_int _dataframe_serialize_cell_column_f64__f64_numpy(
+        line_sender_buffer* ls_buf,
+        qdb_pystr_buf* b,
+        col_t* col,
+        PyThreadState** gs) except -1:
+    cdef line_sender_error* err = NULL
+    cdef double* access = <double*>col.cursor.chunk.buffers[1]
+    cdef double cell = access[col.cursor.offset]
+    if not line_sender_buffer_column_f64(ls_buf, col.name, cell, &err):
+        _ensure_has_gil(gs)
+        raise c_err_to_py(err)
+
+
+cdef void_int _dataframe_serialize_cell_column_f64__f32_arrow(
+        line_sender_buffer* ls_buf,
+        qdb_pystr_buf* b,
+        col_t* col,
+        PyThreadState** gs) except -1:
+    cdef line_sender_error* err = NULL
+    cdef bint valid = _dataframe_arrow_is_valid(&col.cursor)
+    cdef float* access
+    if valid:
+        access = <float*>col.cursor.chunk.buffers[1]
+        if not line_sender_buffer_column_f64(
+                ls_buf,
+                col.name,
+                <double>access[col.cursor.offset],
+                &err):
+            _ensure_has_gil(gs)
+            raise c_err_to_py(err)
+
+
+cdef void_int _dataframe_serialize_cell_column_f64__f64_arrow(
+        line_sender_buffer* ls_buf,
+        qdb_pystr_buf* b,
+        col_t* col,
+        PyThreadState** gs) except -1:
+    cdef line_sender_error* err = NULL
+    cdef bint valid = _dataframe_arrow_is_valid(&col.cursor)
+    cdef double* access
+    if valid:
+        access = <double*>col.cursor.chunk.buffers[1]
+        if not line_sender_buffer_column_f64(
+                ls_buf,
+                col.name,
+                access[col.cursor.offset],
+                &err):
+            _ensure_has_gil(gs)
+            raise c_err_to_py(err)
+
+
+cdef void_int _dataframe_serialize_cell_column_str__str_pyobj(
+        line_sender_buffer* ls_buf,
+        qdb_pystr_buf* b,
+        col_t* col) except -1:
+    cdef line_sender_error* err = NULL
+    cdef bint valid = False
+    cdef line_sender_utf8 utf8
+    _dataframe_cell_str_pyobj_to_utf8(b, &col.cursor,  &valid, &utf8)
+    if valid and not line_sender_buffer_column_str(
+            ls_buf, col.name, utf8, &err):
+        raise c_err_to_py(err)
+
+
+cdef void_int _dataframe_serialize_cell_column_str__str_arrow(
+        line_sender_buffer* ls_buf,
+        qdb_pystr_buf* b,
+        col_t* col,
+        PyThreadState** gs) except -1:
+    cdef line_sender_error* err = NULL
+    cdef line_sender_utf8 utf8
+    if _dataframe_arrow_str(&col.cursor, &utf8.len, &utf8.buf):
+        if not line_sender_buffer_column_str(ls_buf, col.name, utf8, &err):
+            _ensure_has_gil(gs)
+            raise c_err_to_py(err)
+
+
+cdef void_int _dataframe_serialize_cell_column_str__str_i8_cat(
+        line_sender_buffer* ls_buf,
+        qdb_pystr_buf* b,
+        col_t* col,
+        PyThreadState** gs) except -1:
+    cdef line_sender_error* err = NULL
+    cdef line_sender_utf8 utf8
+    if _dataframe_arrow_get_cat_i8(&col.cursor, &utf8.len, &utf8.buf):
+        if not line_sender_buffer_column_str(ls_buf, col.name, utf8, &err):
+            _ensure_has_gil(gs)
+            raise c_err_to_py(err)
+
+
+cdef void_int _dataframe_serialize_cell_column_str__str_i16_cat(
+        line_sender_buffer* ls_buf,
+        qdb_pystr_buf* b,
+        col_t* col,
+        PyThreadState** gs) except -1:
+    cdef line_sender_error* err = NULL
+    cdef line_sender_utf8 utf8
+    if _dataframe_arrow_get_cat_i16(&col.cursor, &utf8.len, &utf8.buf):
+        if not line_sender_buffer_column_str(ls_buf, col.name, utf8, &err):
+            _ensure_has_gil(gs)
+            raise c_err_to_py(err)
+
+
+cdef void_int _dataframe_serialize_cell_column_str__str_i32_cat(
+        line_sender_buffer* ls_buf,
+        qdb_pystr_buf* b,
+        col_t* col,
+        PyThreadState** gs) except -1:
+    cdef line_sender_error* err = NULL
+    cdef line_sender_utf8 utf8
+    if _dataframe_arrow_get_cat_i32(&col.cursor, &utf8.len, &utf8.buf):
+        if not line_sender_buffer_column_str(ls_buf, col.name, utf8, &err):
+            _ensure_has_gil(gs)
+            raise c_err_to_py(err)
+
+
+cdef void_int _dataframe_serialize_cell_column_ts__dt64ns_numpy(
+        line_sender_buffer* ls_buf,
+        qdb_pystr_buf* b,
+        col_t* col,
+        PyThreadState** gs) except -1:
+    cdef line_sender_error* err = NULL
+    cdef int64_t* access = <int64_t*>col.cursor.chunk.buffers[1]
+    cdef int64_t cell = access[col.cursor.offset]
+    if cell != _NAT:
+        cell //= 1000  # Convert from nanoseconds to microseconds.
+        if not line_sender_buffer_column_ts(ls_buf, col.name, cell, &err):
+            _ensure_has_gil(gs)
+            raise c_err_to_py(err)
+
+
+cdef void_int _dataframe_serialize_cell_column_ts__dt64ns_tz_arrow(
+        line_sender_buffer* ls_buf,
+        qdb_pystr_buf* b,
+        col_t* col,
+        PyThreadState** gs) except -1:
+    cdef line_sender_error* err = NULL
+    cdef bint valid = _dataframe_arrow_is_valid(&col.cursor)
+    cdef int64_t cell
+    cdef int64_t* access
+    if valid:
+        access = <int64_t*>col.cursor.chunk.buffers[1]
+        cell = access[col.cursor.offset]
+        cell //= 1000  # Convert from nanoseconds to microseconds.
+        if not line_sender_buffer_column_ts(ls_buf, col.name, cell, &err):
+            _ensure_has_gil(gs)
+            raise c_err_to_py(err)
+
+
+cdef void_int _dataframe_serialize_cell_at_dt64ns_numpy(
+        line_sender_buffer* ls_buf,
+        qdb_pystr_buf* b,
+        col_t* col,
+        PyThreadState** gs) except -1:
+    cdef line_sender_error* err = NULL
+    cdef int64_t* access = <int64_t*>col.cursor.chunk.buffers[1]
+    cdef int64_t cell = access[col.cursor.offset]
+    if cell == _NAT:
+        if not line_sender_buffer_at_now(ls_buf, &err):
+            _ensure_has_gil(gs)
+            raise c_err_to_py(err)
+    else:
+        # Note: ls_buf will validate against negative numbers.
+        if not line_sender_buffer_at(ls_buf, cell, &err):
+            _ensure_has_gil(gs)
+            raise c_err_to_py(err)
+
+
+cdef void_int _dataframe_serialize_cell_at_dt64ns_tz_arrow(
+        line_sender_buffer* ls_buf,
+        qdb_pystr_buf* b,
+        col_t* col,
+        PyThreadState** gs) except -1:
+    cdef line_sender_error* err = NULL
+    cdef bint valid = _dataframe_arrow_is_valid(&col.cursor)
+    cdef int64_t* access
+    cdef int64_t cell
+    if valid:
+        access = <int64_t*>col.cursor.chunk.buffers[1]
+        cell = access[col.cursor.offset]
+        # Note: ls_buf will validate against negative numbers.
+        if not line_sender_buffer_at(ls_buf, cell, &err):
+            _ensure_has_gil(gs)
+            raise c_err_to_py(err)
+    else:
+        if not line_sender_buffer_at_now(ls_buf, &err):
+            _ensure_has_gil(gs)
+            raise c_err_to_py(err)
+
+
+cdef void_int _dataframe_serialize_cell(
+        line_sender_buffer* ls_buf,
+        qdb_pystr_buf* b,
+        col_t* col,
+        PyThreadState** gs) except -1:
+    cdef col_dispatch_code_t dc = col.dispatch_code
+    # Note!: Code below will generate a `switch` statement.
+    # Ensure this happens! Don't break the `dc == ...` pattern.
+    if dc == col_dispatch_code_t.col_dispatch_code_skip_nulls:
+        pass  # We skip a null column. Nothing to do.
+    elif dc == col_dispatch_code_t.col_dispatch_code_table__str_pyobj:
+        _dataframe_serialize_cell_table__str_pyobj(ls_buf, b, col)
+    elif dc == col_dispatch_code_t.col_dispatch_code_table__str_arrow:
+        _dataframe_serialize_cell_table__str_arrow(ls_buf, b, col, gs)
+    elif dc == col_dispatch_code_t.col_dispatch_code_table__str_i8_cat:
+        _dataframe_serialize_cell_table__str_i8_cat(ls_buf, b, col, gs)
+    elif dc == col_dispatch_code_t.col_dispatch_code_table__str_i16_cat:
+        _dataframe_serialize_cell_table__str_i16_cat(ls_buf, b, col, gs)
+    elif dc == col_dispatch_code_t.col_dispatch_code_table__str_i32_cat:
+        _dataframe_serialize_cell_table__str_i32_cat(ls_buf, b, col, gs)
+    elif dc == col_dispatch_code_t.col_dispatch_code_symbol__str_pyobj:
+        _dataframe_serialize_cell_symbol__str_pyobj(ls_buf, b, col)
+    elif dc == col_dispatch_code_t.col_dispatch_code_symbol__str_arrow:
+        _dataframe_serialize_cell_symbol__str_arrow(ls_buf, b, col, gs)
+    elif dc == col_dispatch_code_t.col_dispatch_code_symbol__str_i8_cat:
+        _dataframe_serialize_cell_symbol__str_i8_cat(ls_buf, b, col, gs)
+    elif dc == col_dispatch_code_t.col_dispatch_code_symbol__str_i16_cat:
+        _dataframe_serialize_cell_symbol__str_i16_cat(ls_buf, b, col, gs)
+    elif dc == col_dispatch_code_t.col_dispatch_code_symbol__str_i32_cat:
+        _dataframe_serialize_cell_symbol__str_i32_cat(ls_buf, b, col, gs)
+    elif dc == col_dispatch_code_t.col_dispatch_code_column_bool__bool_pyobj:
+        _dataframe_serialize_cell_column_bool__bool_pyobj(ls_buf, b, col)
+    elif dc == col_dispatch_code_t.col_dispatch_code_column_bool__bool_numpy:
+        _dataframe_serialize_cell_column_bool__bool_numpy(ls_buf, b, col, gs)
+    elif dc == col_dispatch_code_t.col_dispatch_code_column_bool__bool_arrow:
+        _dataframe_serialize_cell_column_bool__bool_arrow(ls_buf, b, col, gs)
+    elif dc == col_dispatch_code_t.col_dispatch_code_column_i64__int_pyobj:
+        _dataframe_serialize_cell_column_i64__int_pyobj(ls_buf, b, col)
+    elif dc == col_dispatch_code_t.col_dispatch_code_column_i64__u8_numpy:
+        _dataframe_serialize_cell_column_i64__u8_numpy(ls_buf, b, col, gs)
+    elif dc == col_dispatch_code_t.col_dispatch_code_column_i64__i8_numpy:
+        _dataframe_serialize_cell_column_i64__i8_numpy(ls_buf, b, col, gs)
+    elif dc == col_dispatch_code_t.col_dispatch_code_column_i64__u16_numpy:
+        _dataframe_serialize_cell_column_i64__u16_numpy(ls_buf, b, col, gs)
+    elif dc == col_dispatch_code_t.col_dispatch_code_column_i64__i16_numpy:
+        _dataframe_serialize_cell_column_i64__i16_numpy(ls_buf, b, col, gs)
+    elif dc == col_dispatch_code_t.col_dispatch_code_column_i64__u32_numpy:
+        _dataframe_serialize_cell_column_i64__u32_numpy(ls_buf, b, col, gs)
+    elif dc == col_dispatch_code_t.col_dispatch_code_column_i64__i32_numpy:
+        _dataframe_serialize_cell_column_i64__i32_numpy(ls_buf, b, col, gs)
+    elif dc == col_dispatch_code_t.col_dispatch_code_column_i64__u64_numpy:
+        _dataframe_serialize_cell_column_i64__u64_numpy(ls_buf, b, col, gs)
+    elif dc == col_dispatch_code_t.col_dispatch_code_column_i64__i64_numpy:
+        _dataframe_serialize_cell_column_i64__i64_numpy(ls_buf, b, col, gs)
+    elif dc == col_dispatch_code_t.col_dispatch_code_column_i64__u8_arrow:
+        _dataframe_serialize_cell_column_i64__u8_arrow(ls_buf, b, col, gs)
+    elif dc == col_dispatch_code_t.col_dispatch_code_column_i64__i8_arrow:
+        _dataframe_serialize_cell_column_i64__i8_arrow(ls_buf, b, col, gs)
+    elif dc == col_dispatch_code_t.col_dispatch_code_column_i64__u16_arrow:
+        _dataframe_serialize_cell_column_i64__u16_arrow(ls_buf, b, col, gs)
+    elif dc == col_dispatch_code_t.col_dispatch_code_column_i64__i16_arrow:
+        _dataframe_serialize_cell_column_i64__i16_arrow(ls_buf, b, col, gs)
+    elif dc == col_dispatch_code_t.col_dispatch_code_column_i64__u32_arrow:
+        _dataframe_serialize_cell_column_i64__u32_arrow(ls_buf, b, col, gs)
+    elif dc == col_dispatch_code_t.col_dispatch_code_column_i64__i32_arrow:
+        _dataframe_serialize_cell_column_i64__i32_arrow(ls_buf, b, col, gs)
+    elif dc == col_dispatch_code_t.col_dispatch_code_column_i64__u64_arrow:
+        _dataframe_serialize_cell_column_i64__u64_arrow(ls_buf, b, col, gs)
+    elif dc == col_dispatch_code_t.col_dispatch_code_column_i64__i64_arrow:
+        _dataframe_serialize_cell_column_i64__i64_arrow(ls_buf, b, col, gs)
+    elif dc == col_dispatch_code_t.col_dispatch_code_column_f64__float_pyobj:
+        _dataframe_serialize_cell_column_f64__float_pyobj(ls_buf, b, col)
+    elif dc == col_dispatch_code_t.col_dispatch_code_column_f64__f32_numpy:
+        _dataframe_serialize_cell_column_f64__f32_numpy(ls_buf, b, col, gs)
+    elif dc == col_dispatch_code_t.col_dispatch_code_column_f64__f64_numpy:
+        _dataframe_serialize_cell_column_f64__f64_numpy(ls_buf, b, col, gs)
+    elif dc == col_dispatch_code_t.col_dispatch_code_column_f64__f32_arrow:
+        _dataframe_serialize_cell_column_f64__f32_arrow(ls_buf, b, col, gs)
+    elif dc == col_dispatch_code_t.col_dispatch_code_column_f64__f64_arrow:
+        _dataframe_serialize_cell_column_f64__f64_arrow(ls_buf, b, col, gs)
+    elif dc == col_dispatch_code_t.col_dispatch_code_column_str__str_pyobj:
+        _dataframe_serialize_cell_column_str__str_pyobj(ls_buf, b, col)
+    elif dc == col_dispatch_code_t.col_dispatch_code_column_str__str_arrow:
+        _dataframe_serialize_cell_column_str__str_arrow(ls_buf, b, col, gs)
+    elif dc == col_dispatch_code_t.col_dispatch_code_column_str__str_i8_cat:
+        _dataframe_serialize_cell_column_str__str_i8_cat(ls_buf, b, col, gs)
+    elif dc == col_dispatch_code_t.col_dispatch_code_column_str__str_i16_cat:
+        _dataframe_serialize_cell_column_str__str_i16_cat(ls_buf, b, col, gs)
+    elif dc == col_dispatch_code_t.col_dispatch_code_column_str__str_i32_cat:
+        _dataframe_serialize_cell_column_str__str_i32_cat(ls_buf, b, col, gs)
+    elif dc == col_dispatch_code_t.col_dispatch_code_column_ts__dt64ns_numpy:
+        _dataframe_serialize_cell_column_ts__dt64ns_numpy(ls_buf, b, col, gs)
+    elif dc == col_dispatch_code_t.col_dispatch_code_column_ts__dt64ns_tz_arrow:
+        _dataframe_serialize_cell_column_ts__dt64ns_tz_arrow(ls_buf, b, col, gs)
+    elif dc == col_dispatch_code_t.col_dispatch_code_at__dt64ns_numpy:
+        _dataframe_serialize_cell_at_dt64ns_numpy(ls_buf, b, col, gs)
+    elif dc == col_dispatch_code_t.col_dispatch_code_at__dt64ns_tz_arrow:
+        _dataframe_serialize_cell_at_dt64ns_tz_arrow(ls_buf, b, col, gs)
+    else:
+        _ensure_has_gil(gs)
+        raise RuntimeError(f"Unknown column dispatch code: {dc}")
+    # See earlier note about switch statement generation.
+    # Don't add complex conditions above!
+
+
+cdef void _dataframe_col_advance(col_t* col):
+    # Branchless version of:
+    #     cdef bint new_chunk = cursor.offset == <size_t>cursor.chunk.length
+    #     if new_chunk == 0:
+    #         cursor.chunk_index += 1
+    #         cursor.chunk += 1  # pointer advance
+    #
+    #     if new_chunk:
+    #         cursor.offset = cursor.chunk.offset
+    #     else:
+    #         cursor.offset += 1
+    #
+    # (Checked with Godbolt, GCC -O3 code was rather "jumpy")
+    cdef col_cursor_t* cursor = &col.cursor
+    cdef size_t new_chunk  # disguised bint. Either 0 or 1.
+    cursor.offset += 1
+    new_chunk = cursor.offset == <size_t>cursor.chunk.length
+    cursor.chunk_index += new_chunk
+    cursor.chunk += new_chunk
+    # Note: We get away with this because we've allocated one extra blank chunk.
+    # This ensures that accessing `cursor.chunk.offset` doesn't segfault.
+    cursor.offset = (
+        (new_chunk * cursor.chunk.offset) +
+        ((not new_chunk) * cursor.offset))
+
+
+cdef void_int _dataframe_handle_auto_flush(
+            auto_flush_t af,
+            line_sender_buffer* ls_buf,
+            PyThreadState** gs) except -1:
+    cdef line_sender_error* flush_err
+    cdef line_sender_error* marker_err
+    cdef bint flush_ok
+    cdef bint marker_ok
+    if (af.sender == NULL) or (line_sender_buffer_size(ls_buf) < af.watermark):
+        return 0
+
+    # Always temporarily release GIL during a flush.
+    had_gil = _ensure_doesnt_have_gil(gs)
+    flush_ok = line_sender_flush(af.sender, ls_buf, &flush_err)
+    if not flush_ok:
+        # To avoid flush reattempt on Sender.__exit__.
+        line_sender_buffer_clear(ls_buf)
+
+    # Flushing will have cleared the marker: We need to set it again
+    # We need this also on error due to our error handling logic which will
+    # try to rewind the buffer on error and fail if the marker is unset.
+    marker_ok = line_sender_buffer_set_marker(ls_buf, &marker_err)
+
+    if had_gil or (not flush_ok) or (not marker_ok):
+        _ensure_has_gil(gs)
+
+    if not flush_ok:
+        raise c_err_to_py_fmt(flush_err, _FLUSH_FMT)
+
+    # The flush error takes precedence over the marker error.
+    if not marker_ok:
+        raise c_err_to_py(marker_err)
+
+
+# Every how many cells to release and re-acquire the Python GIL.
+#
+# We've done some perf testing with some mixed column dtypes.
+# On a modern CPU we're doing over 8 million pandas cells per second.
+# By default, `sys.getswitchinterval()` is 0.005 seconds.
+# To accomodate this, we'd need to release the GIL every 40,000 cells.
+# This will be divided by the column count to get the row gil blip interval.
+cdef size_t _CELL_GIL_BLIP_INTERVAL = 40000
+
+
+cdef void_int _dataframe(
+        auto_flush_t af,
+        line_sender_buffer* ls_buf,
+        qdb_pystr_buf* b,
+        object df,
+        object table_name,
+        object table_name_col,
+        object symbols,
+        object at) except -1:
+    cdef size_t col_count
+    cdef line_sender_table_name c_table_name
+    cdef int64_t at_value = _AT_IS_SET_BY_COLUMN
+    cdef col_t_arr cols = col_t_arr_blank()
+    cdef bint any_cols_need_gil = False
+    cdef qdb_pystr_pos str_buf_marker
+    cdef size_t row_count
+    cdef line_sender_error* err = NULL
+    cdef size_t row_index
+    cdef size_t col_index
+    cdef col_t* col
+    cdef size_t row_gil_blip_interval
+    cdef PyThreadState* gs = NULL  # GIL state. NULL means we have the GIL.
+    cdef bint had_gil
+    cdef bint was_serializing_cell = False
+
+    _dataframe_may_import_deps()
+    _dataframe_check_is_dataframe(df)
+    row_count = len(df)
+    col_count = len(df.columns)
+    if (col_count == 0) or (row_count == 0):
+        return 0  # Nothing to do.
+
+    try:
+        qdb_pystr_buf_clear(b)
+        cols = col_t_arr_new(col_count)
+        _dataframe_resolve_args(
+            df,
+            table_name,
+            table_name_col,
+            symbols,
+            at,
+            b,
+            col_count,
+            &c_table_name,
+            &at_value,
+            &cols,
+            &any_cols_need_gil)
+
+        # We've used the str buffer up to a point for the headers.
+        # Instead of clearing it (which would clear the headers' memory)
+        # we will truncate (rewind) back to this position.
+        str_buf_marker = qdb_pystr_buf_tell(b)
+        line_sender_buffer_clear_marker(ls_buf)
+
+        # On error, undo all added lines.
+        if not line_sender_buffer_set_marker(ls_buf, &err):
+            raise c_err_to_py(err)
+
+        row_gil_blip_interval = _CELL_GIL_BLIP_INTERVAL // col_count
+        if row_gil_blip_interval < 400:  # ceiling reached at 100 columns
+            row_gil_blip_interval = 400
+        try:
+            # Don't move this logic up! We need the GIL to execute a `try`.
+            # Also we can't have any other `try` blocks between here and the
+            # `finally` block.
+            if not any_cols_need_gil:
+                _ensure_doesnt_have_gil(&gs)
+
+            for row_index in range(row_count):
+                if (gs == NULL) and (row_index % row_gil_blip_interval == 0):
+                    # Release and re-acquire the GIL every so often.
+                    # This is to allow other python threads to run.
+                    # If we hold the GIL for too long, we can starve other
+                    # threads, for example timing out network activity.
+                    _ensure_doesnt_have_gil(&gs)
+                    _ensure_has_gil(&gs)
+
+                qdb_pystr_buf_truncate(b, str_buf_marker)
+
+                # Table-name from `table_name` arg in Python.
+                if c_table_name.buf != NULL:
+                    if not line_sender_buffer_table(ls_buf, c_table_name, &err):
+                        _ensure_has_gil(&gs)
+                        raise c_err_to_py(err)
+
+                # Serialize columns cells.
+                # Note: Columns are sorted: table name, symbols, fields, at.
+                was_serializing_cell = True
+                for col_index in range(col_count):
+                    col = &cols.d[col_index]
+                    _dataframe_serialize_cell(ls_buf, b, col, &gs)  # may raise
+                    _dataframe_col_advance(col)
+                was_serializing_cell = False
+
+                # Fixed "at" value (not from a column).
+                if at_value == _AT_IS_SERVER_NOW:
+                    if not line_sender_buffer_at_now(ls_buf, &err):
+                        _ensure_has_gil(&gs)
+                        raise c_err_to_py(err)
+                elif at_value >= 0:
+                    if not line_sender_buffer_at(ls_buf, at_value, &err):
+                        _ensure_has_gil(&gs)
+                        raise c_err_to_py(err)
+
+                _dataframe_handle_auto_flush(af, ls_buf, &gs)
+        except Exception as e:
+            # It would be an internal bug for this to raise.
+            if not line_sender_buffer_rewind_to_marker(ls_buf, &err):
+                raise c_err_to_py(err)
+
+            if (isinstance(e, IngressError) and
+                    (e.code == IngressErrorCode.InvalidApiCall)):
+                # TODO: This should be allowed by the database.
+                # It currently isn't so we have to raise an error.
+                raise IngressError(
+                    IngressErrorCode.BadDataFrame,
+                    f'Bad dataframe row at index {row_index}: ' +
+                    'All values are nulls. '+
+                    'Ensure at least one column is not null.') from e
+            elif was_serializing_cell:
+                raise IngressError(
+                    IngressErrorCode.BadDataFrame,
+                    'Failed to serialize value of column ' +
+                    repr(df.columns[col.setup.orig_index]) +
+                    f' at row index {row_index} (' +
+                    repr(df.iloc[row_index, col.setup.orig_index]) +
+                    f'): {e}  [dc={<int>col.dispatch_code}]') from e
+            else:
+                raise
+    except Exception as e:
+        if not isinstance(e, IngressError):
+            raise IngressError(
+                IngressErrorCode.InvalidApiCall,
+                str(e)) from e
+        else:
+            raise
+    finally:
+        _ensure_has_gil(&gs)  # Note: We need the GIL for cleanup.
+        line_sender_buffer_clear_marker(ls_buf)
+        col_t_arr_release(&cols)
+        qdb_pystr_buf_clear(b)
diff --git a/src/questdb/extra_cpython.pxd b/src/questdb/extra_cpython.pxd
new file mode 100644
index 00000000..3e794566
--- /dev/null
+++ b/src/questdb/extra_cpython.pxd
@@ -0,0 +1,70 @@
+# Custom definitions that aren't provided in the standard `cpython` module.
+
+from libc.stdint cimport uint8_t, uint16_t, uint32_t
+from cpython.object cimport PyObject
+
+cdef extern from "Python.h":
+    cdef PyObject* Py_None
+    cdef PyObject* Py_True
+
+    ctypedef uint8_t Py_UCS1  # unicodeobject.h
+    ctypedef uint16_t Py_UCS2
+    ctypedef uint32_t Py_UCS4
+
+    ctypedef unsigned int uint
+
+    cdef enum PyUnicode_Kind:
+        PyUnicode_1BYTE_KIND
+        PyUnicode_2BYTE_KIND
+        PyUnicode_4BYTE_KIND
+
+    # Note: Returning an `object` rather than `PyObject` as the function
+    # returns a new reference rather than borrowing an existing one.
+    object PyUnicode_FromKindAndData(
+        int kind, const void* buffer, Py_ssize_t size)
+
+    # Ditto, see comment on why not returning a `PyObject` above.
+    str PyUnicode_FromStringAndSize(
+        const char* u, Py_ssize_t size)
+
+    # Must be called before accessing data or is compact check.
+    int PyUnicode_READY(PyObject* o) except -1
+
+    # Is UCS1 and ascii (and therefore valid UTF-8).
+    bint PyUnicode_IS_COMPACT_ASCII(PyObject* o)
+
+    # Get length.
+    Py_ssize_t PyUnicode_GET_LENGTH(PyObject* o)
+
+    # Zero-copy access to string buffer.
+    int PyUnicode_KIND(PyObject* o)
+    Py_UCS1* PyUnicode_1BYTE_DATA(PyObject* o)
+    Py_UCS2* PyUnicode_2BYTE_DATA(PyObject* o)
+    Py_UCS4* PyUnicode_4BYTE_DATA(PyObject* o)
+
+    Py_ssize_t PyBytes_GET_SIZE(object o)
+
+    bint PyBytes_CheckExact(PyObject* o)
+
+    char* PyBytes_AsString(object o)
+
+    bint PyUnicode_CheckExact(PyObject* o)
+
+    bint PyBool_Check(PyObject* o)
+
+    bint PyLong_CheckExact(PyObject* o)
+
+    bint PyFloat_CheckExact(PyObject* o)
+
+    double PyFloat_AS_DOUBLE(PyObject* o)
+
+    long long PyLong_AsLongLong(PyObject* o) except? -1
+
+    PyObject* PyErr_Occurred()
+
+    ctypedef struct PyThreadState:
+        pass
+
+    PyThreadState* PyEval_SaveThread()
+
+    void PyEval_RestoreThread(PyThreadState* tstate)
diff --git a/src/questdb/ingress.pyx b/src/questdb/ingress.pyx
index 4100899e..b676400c 100644
--- a/src/questdb/ingress.pyx
+++ b/src/questdb/ingress.pyx
@@ -30,58 +30,67 @@
 API for fast data ingestion into QuestDB.
 """
 
-from libc.stdint cimport uint8_t, uint64_t, int64_t
+# For prototypes: https://github.com/cython/cython/tree/master/Cython/Includes
+from libc.stdint cimport uint8_t, uint64_t, int64_t, uint32_t, uintptr_t, \
+    INT64_MAX, INT64_MIN
+from libc.stdlib cimport malloc, calloc, realloc, free, abort, qsort
+from libc.string cimport strncmp, memset
+from libc.math cimport isnan
+from libc.errno cimport errno
 from cpython.datetime cimport datetime
-from cpython.bool cimport bool, PyBool_Check
+from cpython.bool cimport bool
 from cpython.weakref cimport PyWeakref_NewRef, PyWeakref_GetObject
 from cpython.object cimport PyObject
-from cpython.float cimport PyFloat_Check
-from cpython.int cimport PyInt_Check
-from cpython.unicode cimport PyUnicode_Check
+from cpython.buffer cimport Py_buffer, PyObject_CheckBuffer, \
+    PyObject_GetBuffer, PyBuffer_Release, PyBUF_SIMPLE
+from cpython.memoryview cimport PyMemoryView_FromMemory
 
 from .line_sender cimport *
+from .pystr_to_utf8 cimport *
+from .arrow_c_data_interface cimport *
+from .extra_cpython cimport *
+from .ingress_helper cimport *
 
-cdef extern from "Python.h":
-    ctypedef uint8_t Py_UCS1  # unicodeobject.h
+# An int we use only for error reporting.
+#  0 is success.
+# -1 is failure.
+ctypedef int void_int
 
-    ctypedef unsigned int uint
+import cython
+include "dataframe.pxi"
 
-    cdef enum PyUnicode_Kind:
-        PyUnicode_1BYTE_KIND
-        PyUnicode_2BYTE_KIND
-        PyUnicode_4BYTE_KIND
 
-    # Note: Returning an `object` rather than `PyObject` as the function
-    # returns a new reference rather than borrowing an existing one.
-    object PyUnicode_FromKindAndData(
-        int kind, const void* buffer, Py_ssize_t size)
+from enum import Enum
+from typing import List, Tuple, Dict, Union, Any, Optional, Callable, \
+    Iterable
+import pathlib
 
-    # Ditto, see comment on why not returning a `PyObject` above.
-    str PyUnicode_FromStringAndSize(
-        const char* u, Py_ssize_t size)
+import sys
 
-    # Must be called before accessing data or is compact check.
-    int PyUnicode_READY(object o) except -1
+# For `get_time_now_ns` and `get_time_now_us` functions.
+IF UNAME_SYSNAME == 'Windows':
+    import time
+ELSE:
+    from posix.time cimport timespec, clock_gettime, CLOCK_REALTIME
 
-    # Is UCS1 and ascii (and therefore valid UTF-8).
-    bint PyUnicode_IS_COMPACT_ASCII(object o)
 
-    # Get length.
-    Py_ssize_t PyUnicode_GET_LENGTH(object o)
+cdef bint _has_gil(PyThreadState** gs):
+    return gs[0] == NULL
 
-    # Zero-copy access to buffer.
-    Py_UCS1* PyUnicode_1BYTE_DATA(object o)
 
-    Py_ssize_t PyBytes_GET_SIZE(object o)
+cdef bint _ensure_doesnt_have_gil(PyThreadState** gs):
+    """Returns True if previously had the GIL, False otherwise."""
+    if _has_gil(gs):
+        gs[0] = PyEval_SaveThread()
+        return True
+    return False
 
-    char* PyBytes_AsString(object o)
 
+cdef void _ensure_has_gil(PyThreadState** gs):
+    if not _has_gil(gs):
+        PyEval_RestoreThread(gs[0])
+        gs[0] = NULL
 
-from enum import Enum
-from typing import List, Tuple, Dict, Union, Any, Optional, Callable, Iterable
-import pathlib
-
-import sys
 
 class IngressErrorCode(Enum):
     """Category of Error."""
@@ -93,6 +102,7 @@ class IngressErrorCode(Enum):
     InvalidTimestamp = line_sender_error_invalid_timestamp
     AuthError = line_sender_error_auth_error
     TlsError = line_sender_error_tls_error
+    BadDataFrame = <int>line_sender_error_tls_error + 1
 
     def __str__(self) -> str:
         """Return the name of the enum."""
@@ -160,53 +170,173 @@ cdef inline object c_err_to_py_fmt(line_sender_error* err, str fmt):
     return IngressError(tup[0], fmt.format(tup[1]))
 
 
-cdef bytes str_to_utf8(str string, line_sender_utf8* utf8_out):
+cdef object _utf8_decode_error(
+        PyObject* string, uint32_t bad_codepoint):
+    cdef str s = <str><object>string
+    return IngressError(
+        IngressErrorCode.InvalidUtf8,
+        f'Invalid codepoint 0x{bad_codepoint:x} in string {s!r}: ' +
+        'Cannot be encoded as UTF-8.')
+
+
+cdef str _fqn(type obj):
+    if obj.__module__ == 'builtins':
+        return obj.__qualname__
+    else:
+        return f'{obj.__module__}.{obj.__qualname__}'
+
+
+cdef inline void_int _encode_utf8(
+        qdb_pystr_buf* b,
+        PyObject* string,
+        line_sender_utf8* utf8_out) except -1:
+    cdef uint32_t bad_codepoint = 0
+    cdef size_t count = <size_t>(PyUnicode_GET_LENGTH(string))
+    cdef int kind = PyUnicode_KIND(string)
+    if kind == PyUnicode_1BYTE_KIND:
+        # No error handling for UCS1: All code points translate into valid UTF8.
+        qdb_ucs1_to_utf8(
+            b,
+            count,
+            PyUnicode_1BYTE_DATA(string),
+            &utf8_out.len,
+            &utf8_out.buf)
+    elif kind == PyUnicode_2BYTE_KIND:
+        if not qdb_ucs2_to_utf8(
+                b,
+                count,
+                PyUnicode_2BYTE_DATA(string),
+                &utf8_out.len,
+                &utf8_out.buf,
+                &bad_codepoint):
+            raise _utf8_decode_error(string, bad_codepoint)
+    elif kind == PyUnicode_4BYTE_KIND:
+        if not qdb_ucs4_to_utf8(
+                b,
+                count,
+
+                # This cast is required and is possibly a Cython compiler bug.
+                # It doesn't recognize that `const Py_UCS4*`
+                # is the same as `const uint32_t*`.
+                <const uint32_t*>PyUnicode_4BYTE_DATA(string),
+
+                &utf8_out.len,
+                &utf8_out.buf,
+                &bad_codepoint):
+            raise _utf8_decode_error(string, bad_codepoint)
+    else:
+        raise ValueError(f'Unknown UCS kind: {kind}.')
+
+
+cdef void_int str_to_utf8(
+        qdb_pystr_buf* b,
+        PyObject* string,
+        line_sender_utf8* utf8_out) except -1:
     """
-    Init the `utf8_out` object from the `string`.
-    If the string is held as a UCS1 and is purely ascii, then
-    the memory is borrowed.
-    Otherwise the string is first encoded to UTF-8 into a bytes object
-    and such bytes object is returned to transfer ownership and extend
-    the lifetime of the buffer pointed to by `utf8_out`.
+    Convert a Python string to a UTF-8 borrowed buffer.
+    This is done without allocating new Python `bytes` objects.
+    In case the string is an ASCII string, it's also generally zero-copy.
+    The `utf8_out` param will point to (borrow from) either the ASCII buffer
+    inside the original Python object or a part of memory allocated inside the
+    `b` buffer.
+
+    If you need to use `utf8_out` without the GIL, call `qdb_pystr_buf_copy`.
     """
-    # Note that we bypass `line_sender_utf8_init`.
-    cdef bytes owner = None
+    if not PyUnicode_CheckExact(string):
+        raise TypeError(
+            'Expected a str object, not an object of type ' +
+            _fqn(type(<str><object>string)))
     PyUnicode_READY(string)
+
+    # We optimize the common case of ASCII strings.
+    # This avoid memory allocations and copies altogether.
+    # We get away with this because ASCII is a subset of UTF-8.
     if PyUnicode_IS_COMPACT_ASCII(string):
         utf8_out.len = <size_t>(PyUnicode_GET_LENGTH(string))
         utf8_out.buf = <const char*>(PyUnicode_1BYTE_DATA(string))
-        return owner
-    else:
-        owner = string.encode('utf-8')
-        utf8_out.len = <size_t>(PyBytes_GET_SIZE(owner))
-        utf8_out.buf = <const char*>(PyBytes_AsString(owner))
-        return owner
+        return 0
+
+    _encode_utf8(b, string, utf8_out)
 
 
-cdef bytes str_to_table_name(str string, line_sender_table_name* name_out):
+
+cdef void_int str_to_utf8_copy(
+        qdb_pystr_buf* b,
+        PyObject* string,
+        line_sender_utf8* utf8_out) except -1:
+    """
+    Variant of `str_to_utf8` that always copies the string to a new buffer.
+
+    The resulting `utf8_out` can be used when not holding the GIL:
+    The pointed-to memory is owned by `b`.
+    """
+    if not PyUnicode_CheckExact(string):
+        raise TypeError(
+            'Expected a str object, not an object of type ' +
+            _fqn(type(<str><object>string)))
+
+    PyUnicode_READY(string)
+    _encode_utf8(b, string, utf8_out)
+
+
+cdef void_int str_to_table_name(
+        qdb_pystr_buf* b,
+        PyObject* string,
+        line_sender_table_name* name_out) except -1:
     """
     Python string to borrowed C table name.
     Also see `str_to_utf8`.
     """
     cdef line_sender_error* err = NULL
     cdef line_sender_utf8 utf8
-    cdef bytes owner = str_to_utf8(string, &utf8)
+    str_to_utf8(b, string, &utf8)
+    if not line_sender_table_name_init(name_out, utf8.len, utf8.buf, &err):
+        raise c_err_to_py(err)
+
+
+cdef void_int str_to_table_name_copy(
+        qdb_pystr_buf* b,
+        PyObject* string,
+        line_sender_table_name* name_out) except -1:
+    """
+    Python string to copied C table name.
+    Also see `str_to_utf8_copy`.
+    """
+    cdef line_sender_error* err = NULL
+    cdef line_sender_utf8 utf8
+    str_to_utf8_copy(b, string, &utf8)
     if not line_sender_table_name_init(name_out, utf8.len, utf8.buf, &err):
         raise c_err_to_py(err)
-    return owner
 
 
-cdef bytes str_to_column_name(str string, line_sender_column_name* name_out):
+cdef void_int str_to_column_name(
+        qdb_pystr_buf* b,
+        str string,
+        line_sender_column_name* name_out) except -1:
     """
     Python string to borrowed C column name.
     Also see `str_to_utf8`.
     """
     cdef line_sender_error* err = NULL
     cdef line_sender_utf8 utf8
-    cdef bytes owner = str_to_utf8(string, &utf8)
+    str_to_utf8(b, <PyObject*>string, &utf8)
+    if not line_sender_column_name_init(name_out, utf8.len, utf8.buf, &err):
+        raise c_err_to_py(err)
+
+
+cdef void_int str_to_column_name_copy(
+        qdb_pystr_buf* b,
+        str string,
+        line_sender_column_name* name_out) except -1:
+    """
+    Python string to copied C column name.
+    Also see `str_to_utf8_copy`.
+    """
+    cdef line_sender_error* err = NULL
+    cdef line_sender_utf8 utf8
+    str_to_utf8_copy(b, <PyObject*>string, &utf8)
     if not line_sender_column_name_init(name_out, utf8.len, utf8.buf, &err):
         raise c_err_to_py(err)
-    return owner
 
 
 cdef int64_t datetime_to_micros(datetime dt):
@@ -229,40 +359,79 @@ cdef int64_t datetime_to_nanos(datetime dt):
         <int64_t>(dt.microsecond * 1000))
 
 
+cdef int64_t _US_SEC = 1000000
+cdef int64_t _NS_US = 1000
+
+
+cdef int64_t get_time_now_us() except -1:
+    """
+    Get the current time in microseconds.
+    """
+    IF UNAME_SYSNAME == 'Windows':
+        return time.time_ns() // 1000
+    ELSE:
+        # Note: Y2K38 bug on 32-bit systems, but we don't care.
+        cdef timespec ts
+        if clock_gettime(CLOCK_REALTIME, &ts) != 0:
+            raise OSError(errno, 'clock_gettime(CLOCK_REALTIME, &ts) failed')
+        return <int64_t>(ts.tv_sec) * _US_SEC + <int64_t>(ts.tv_nsec) // _NS_US
+
+
+cdef int64_t _NS_SEC = 1000000000
+
+
+cdef int64_t get_time_now_ns() except -1:
+    """
+    Get the current time in nanoseconds.
+    """
+    IF UNAME_SYSNAME == 'Windows':
+        return time.time_ns()
+    ELSE:
+        # Note: Y2K38 bug on 32-bit systems, but we don't care.
+        cdef timespec ts
+        if clock_gettime(CLOCK_REALTIME, &ts) != 0:
+            raise OSError(errno, 'clock_gettime(CLOCK_REALTIME, &ts) failed')
+        return <int64_t>(ts.tv_sec) * _NS_SEC + <int64_t>(ts.tv_nsec)
+
+
 cdef class TimestampMicros:
     """
-    A timestamp in microseconds since the UNIX epoch.
+    A timestamp in microseconds since the UNIX epoch (UTC).
 
-    You may construct a ``TimestampMicros`` from an integer or a ``datetime``.
+    You may construct a ``TimestampMicros`` from an integer or a
+    ``datetime.datetime``, or simply call the :func:`TimestampMicros.now`
+    method.
 
     .. code-block:: python
 
-        # Can't be negative.
-        TimestampMicros(1657888365426838016)
-
-        # Careful with the timezeone!
-        TimestampMicros.from_datetime(datetime.datetime.utcnow())
+        # Recommended way to get the current timestamp.
+        TimestampMicros.now()
 
-    When constructing from a ``datetime``, you should take extra care
-    to ensure that the timezone is correct.
+        # The above is equivalent to:
+        TimestampMicros(time.time_ns() // 1000)
 
-    For example, ``datetime.now()`` implies the `local` timezone which
-    is probably not what you want.
+        # You can provide a numeric timestamp too. It can't be negative.
+        TimestampMicros(1657888365426838)
 
-    When constructing the ``datetime`` object explicity, you pass in the
-    timezone to use.
+    ``TimestampMicros`` can also be constructed from a ``datetime.datetime``
+    object.
 
     .. code-block:: python
 
         TimestampMicros.from_datetime(
-            datetime.datetime(2000, 1, 1, tzinfo=datetime.timezone.utc))
+            datetime.datetime.now(tz=datetime.timezone.utc))
 
+    We recommend that when using ``datetime`` objects, you explicitly pass in
+    the timezone to use. This is because ``datetime`` objects without an
+    associated timezone are assumed to be in the local timezone and it is easy
+    to make mistakes (e.g. passing ``datetime.datetime.utcnow()`` is a likely
+    bug).
     """
     cdef int64_t _value
 
     def __cinit__(self, value: int):
         if value < 0:
-            raise ValueError('value must positive integer.')
+            raise ValueError('value must be a positive integer.')
         self._value = value
 
     @classmethod
@@ -274,46 +443,60 @@ cdef class TimestampMicros:
             raise TypeError('dt must be a datetime object.')
         return cls(datetime_to_micros(dt))
 
+    @classmethod
+    def now(cls):
+        """
+        Construct a ``TimestampMicros`` from the current time as UTC.
+        """
+        cdef int64_t value = get_time_now_us()
+        return cls(value)
+
     @property
     def value(self) -> int:
-        """Number of microseconds."""
+        """Number of microseconds (Unix epoch timestamp, UTC)."""
         return self._value
 
+    def __repr__(self):
+        return f'TimestampMicros.({self._value})'
+
 
 cdef class TimestampNanos:
     """
-    A timestamp in nanoseconds since the UNIX epoch.
+    A timestamp in nanoseconds since the UNIX epoch (UTC).
 
-    You may construct a ``TimestampNanos`` from an integer or a ``datetime``.
+    You may construct a ``TimestampNanos`` from an integer or a
+    ``datetime.datetime``, or simply call the :func:`TimestampNanos.now`
+    method.
 
     .. code-block:: python
 
-        # Can't be negative.
-        TimestampNanos(1657888365426838016)
-
-        # Careful with the timezeone!
-        TimestampNanos.from_datetime(datetime.datetime.utcnow())
+        # Recommended way to get the current timestamp.
+        TimestampNanos.now()
 
-    When constructing from a ``datetime``, you should take extra care
-    to ensure that the timezone is correct.
+        # The above is equivalent to:
+        TimestampNanos(time.time_ns())
 
-    For example, ``datetime.now()`` implies the `local` timezone which
-    is probably not what you want.
+        # You can provide a numeric timestamp too. It can't be negative.
+        TimestampNanos(1657888365426838016)
 
-    When constructing the ``datetime`` object explicity, you pass in the
-    timezone to use.
+    ``TimestampNanos`` can also be constructed from a ``datetime`` object.
 
     .. code-block:: python
 
-        TimestampMicros.from_datetime(
-            datetime.datetime(2000, 1, 1, tzinfo=datetime.timezone.utc))
+        TimestampNanos.from_datetime(
+            datetime.datetime.now(tz=datetime.timezone.utc))
 
+    We recommend that when using ``datetime`` objects, you explicitly pass in
+    the timezone to use. This is because ``datetime`` objects without an
+    associated timezone are assumed to be in the local timezone and it is easy
+    to make mistakes (e.g. passing ``datetime.datetime.utcnow()`` is a likely
+    bug).
     """
     cdef int64_t _value
 
     def __cinit__(self, value: int):
         if value < 0:
-            raise ValueError('value must positive integer.')
+            raise ValueError('value must be a positive integer.')
         self._value = value
 
     @classmethod
@@ -325,17 +508,28 @@ cdef class TimestampNanos:
             raise TypeError('dt must be a datetime object.')
         return cls(datetime_to_nanos(dt))
 
+    @classmethod
+    def now(cls):
+        """
+        Construct a ``TimestampNanos`` from the current time as UTC.
+        """
+        cdef int64_t value = get_time_now_ns()
+        return cls(value)
+
     @property
     def value(self) -> int:
-        """Number of nanoseconds."""
+        """Number of nanoseconds (Unix epoch timestamp, UTC)."""
         return self._value
 
+    def __repr__(self):
+        return f'TimestampNanos({self.value})'
+
 
 cdef class Sender
 cdef class Buffer
 
 
-cdef int may_flush_on_row_complete(Buffer buffer, Sender sender) except -1:
+cdef void_int may_flush_on_row_complete(Buffer buffer, Sender sender) except -1:
     if sender._auto_flush_enabled:
         if len(buffer) >= sender._auto_flush_watermark:
             sender.flush(buffer)
@@ -406,6 +600,7 @@ cdef class Buffer:
 
     """
     cdef line_sender_buffer* _impl
+    cdef qdb_pystr_buf* _b
     cdef size_t _init_capacity
     cdef size_t _max_name_len
     cdef object _row_complete_sender
@@ -420,6 +615,7 @@ cdef class Buffer:
 
     cdef inline _cinit_impl(self, size_t init_capacity, size_t max_name_len):
         self._impl = line_sender_buffer_with_max_name_len(max_name_len)
+        self._b = qdb_pystr_buf_new()
         line_sender_buffer_reserve(self._impl, init_capacity)
         self._init_capacity = init_capacity
         self._max_name_len = max_name_len
@@ -427,6 +623,7 @@ cdef class Buffer:
 
     def __dealloc__(self):
         self._row_complete_sender = None
+        qdb_pystr_buf_free(self._b)
         line_sender_buffer_free(self._impl)
 
     @property
@@ -473,6 +670,7 @@ cdef class Buffer:
         ``sender.flush(buffer, clear=False)``.
         """
         line_sender_buffer_clear(self._impl)
+        qdb_pystr_buf_clear(self._b)
 
     def __len__(self) -> int:
         """
@@ -491,12 +689,12 @@ cdef class Buffer:
         cdef const char* utf8 = line_sender_buffer_peek(self._impl, &size)
         return PyUnicode_FromStringAndSize(utf8, <Py_ssize_t>size)
 
-    cdef inline int _set_marker(self) except -1:
+    cdef inline void_int _set_marker(self) except -1:
         cdef line_sender_error* err = NULL
         if not line_sender_buffer_set_marker(self._impl, &err):
             raise c_err_to_py(err)
 
-    cdef inline int _rewind_to_marker(self) except -1:
+    cdef inline void_int _rewind_to_marker(self) except -1:
         cdef line_sender_error* err = NULL
         if not line_sender_buffer_rewind_to_marker(self._impl, &err):
             raise c_err_to_py(err)
@@ -504,84 +702,82 @@ cdef class Buffer:
     cdef inline _clear_marker(self):
         line_sender_buffer_clear_marker(self._impl)
 
-    cdef inline int _table(self, str table_name) except -1:
+    cdef inline void_int _table(self, str table_name) except -1:
         cdef line_sender_error* err = NULL
         cdef line_sender_table_name c_table_name
-        cdef bytes owner = str_to_table_name(table_name, &c_table_name)
+        str_to_table_name(
+            self._cleared_b(), <PyObject*>table_name, &c_table_name)
         if not line_sender_buffer_table(self._impl, c_table_name, &err):
             raise c_err_to_py(err)
-        return 0
 
-    cdef inline int _symbol(self, str name, str value) except -1:
+    cdef inline qdb_pystr_buf* _cleared_b(self):
+        qdb_pystr_buf_clear(self._b)
+        return self._b
+
+    cdef inline void_int _symbol(self, str name, str value) except -1:
         cdef line_sender_error* err = NULL
         cdef line_sender_column_name c_name
         cdef line_sender_utf8 c_value
-        cdef bytes owner_name = str_to_column_name(name, &c_name)
-        cdef bytes owner_value = str_to_utf8(value, &c_value)
+        str_to_column_name(self._cleared_b(), name, &c_name)
+        str_to_utf8(self._b, <PyObject*>value, &c_value)
         if not line_sender_buffer_symbol(self._impl, c_name, c_value, &err):
             raise c_err_to_py(err)
-        return 0
 
-    cdef inline int _column_bool(
+    cdef inline void_int _column_bool(
             self, line_sender_column_name c_name, bint value) except -1:
         cdef line_sender_error* err = NULL
         if not line_sender_buffer_column_bool(self._impl, c_name, value, &err):
             raise c_err_to_py(err)
-        return 0
 
-    cdef inline int _column_i64(
+    cdef inline void_int _column_i64(
             self, line_sender_column_name c_name, int64_t value) except -1:
         cdef line_sender_error* err = NULL
         if not line_sender_buffer_column_i64(self._impl, c_name, value, &err):
             raise c_err_to_py(err)
         return 0
 
-    cdef inline int _column_f64(
+    cdef inline void_int _column_f64(
             self, line_sender_column_name c_name, double value) except -1:
         cdef line_sender_error* err = NULL
         if not line_sender_buffer_column_f64(self._impl, c_name, value, &err):
             raise c_err_to_py(err)
-        return 0
 
-    cdef inline int _column_str(
+    cdef inline void_int _column_str(
             self, line_sender_column_name c_name, str value) except -1:
         cdef line_sender_error* err = NULL
         cdef line_sender_utf8 c_value
-        cdef bytes owner_value = str_to_utf8(value, &c_value)
+        str_to_utf8(self._b, <PyObject*>value, &c_value)
         if not line_sender_buffer_column_str(self._impl, c_name, c_value, &err):
             raise c_err_to_py(err)
-        return 0
 
-    cdef inline int _column_ts(
+    cdef inline void_int _column_ts(
             self, line_sender_column_name c_name, TimestampMicros ts) except -1:
         cdef line_sender_error* err = NULL
         if not line_sender_buffer_column_ts(self._impl, c_name, ts._value, &err):
             raise c_err_to_py(err)
-        return 0
 
-    cdef inline int _column_dt(
+    cdef inline void_int _column_dt(
             self, line_sender_column_name c_name, datetime dt) except -1:
         cdef line_sender_error* err = NULL
         if not line_sender_buffer_column_ts(
                 self._impl, c_name, datetime_to_micros(dt), &err):
             raise c_err_to_py(err)
-        return 0
 
-    cdef inline int _column(self, str name, object value) except -1:
+    cdef inline void_int _column(self, str name, object value) except -1:
         cdef line_sender_column_name c_name
-        cdef bytes owner_name = str_to_column_name(name, &c_name)
-        if PyBool_Check(value):
-            return self._column_bool(c_name, value)
-        elif PyInt_Check(value):
-            return self._column_i64(c_name, value)
-        elif PyFloat_Check(value):
-            return self._column_f64(c_name, value)
-        elif PyUnicode_Check(value):
-            return self._column_str(c_name, value)
+        str_to_column_name(self._cleared_b(), name, &c_name)
+        if PyBool_Check(<PyObject*>value):
+            self._column_bool(c_name, value)
+        elif PyLong_CheckExact(<PyObject*>value):
+            self._column_i64(c_name, value)
+        elif PyFloat_CheckExact(<PyObject*>value):
+            self._column_f64(c_name, value)
+        elif PyUnicode_CheckExact(<PyObject*>value):
+            self._column_str(c_name, value)
         elif isinstance(value, TimestampMicros):
-            return self._column_ts(c_name, value)
+            self._column_ts(c_name, value)
         elif isinstance(value, datetime):
-            return self._column_dt(c_name, value)
+            self._column_dt(c_name, value)
         else:
             valid = ', '.join((
                 'bool',
@@ -591,9 +787,9 @@ cdef class Buffer:
                 'TimestampMicros',
                 'datetime.datetime'))
             raise TypeError(
-                f'Unsupported type: {type(value)}. Must be one of: {valid}')
+                f'Unsupported type: {_fqn(type(value))}. Must be one of: {valid}')
 
-    cdef inline int _may_trigger_row_complete(self) except -1:
+    cdef inline void_int _may_trigger_row_complete(self) except -1:
         cdef line_sender_error* err = NULL
         cdef PyObject* sender = NULL
         if self._row_complete_sender != None:
@@ -601,38 +797,35 @@ cdef class Buffer:
             if sender != NULL:
                 may_flush_on_row_complete(self, <Sender><object>sender)
 
-    cdef inline int _at_ts(self, TimestampNanos ts) except -1:
+    cdef inline void_int _at_ts(self, TimestampNanos ts) except -1:
         cdef line_sender_error* err = NULL
         if not line_sender_buffer_at(self._impl, ts._value, &err):
             raise c_err_to_py(err)
-        return 0
 
-    cdef inline int _at_dt(self, datetime dt) except -1:
+    cdef inline void_int _at_dt(self, datetime dt) except -1:
         cdef int64_t value = datetime_to_nanos(dt)
         cdef line_sender_error* err = NULL
         if not line_sender_buffer_at(self._impl, value, &err):
             raise c_err_to_py(err)
-        return 0
 
-    cdef inline int _at_now(self) except -1:
+    cdef inline void_int _at_now(self) except -1:
         cdef line_sender_error* err = NULL
         if not line_sender_buffer_at_now(self._impl, &err):
             raise c_err_to_py(err)
-        return 0
 
-    cdef inline int _at(self, object ts) except -1:
+    cdef inline void_int _at(self, object ts) except -1:
         if ts is None:
-            return self._at_now()
+            self._at_now()
         elif isinstance(ts, TimestampNanos):
-            return self._at_ts(ts)
+            self._at_ts(ts)
         elif isinstance(ts, datetime):
-            return self._at_dt(ts)
+            self._at_dt(ts)
         else:
             raise TypeError(
-                f'Unsupported type: {type(ts)}. Must be one of: ' +
+                f'Unsupported type: {_fqn(type(ts))}. Must be one of: ' +
                 'TimestampNanos, datetime, None')
 
-    cdef int _row(
+    cdef void_int _row(
             self,
             str table_name,
             dict symbols=None,
@@ -708,7 +901,7 @@ cdef class Buffer:
                 columns={
                     'temperature': 24.5,
                     'humidity': 0.5},
-                at=datetime.datetime.utcnow())
+                at=datetime.datetime.now(tz=datetime.timezone.utc))
 
 
         Python strings passed as values to ``symbols`` are going to be encoded
@@ -764,181 +957,284 @@ cdef class Buffer:
         self._row(table_name, symbols, columns, at)
         return self
 
-    # def tabular(
-    #         self,
-    #         table_name: str,
-    #         data: Iterable[Iterable[Union[
-    #             bool, int, float, str,
-    #             TimestampMicros, TimestampNanos, datetime]]],
-    #         *,
-    #         header: Optional[List[Optional[str]]]=None,
-    #         symbols: Union[bool, List[int]]=False,
-    #         at: Union[None, TimestampNanos, datetime]=None):
-    #     """
-    #     Add multiple rows as an iterable of iterables (e.g. list of lists) to
-    #     the buffer.
-
-    #     **Data and header**
-
-    #     The ``data`` argument specifies rows which must all be for the same
-    #     table. Column names are provided as the ``header``.
-
-    #     .. code-block:: python
-
-    #         buffer.tabular(
-    #             'table_name',
-    #             [[True, 123, 3.14, 'xyz'],
-    #              [False, 456, 6.28, 'abc'],
-    #              [True, 789, 9.87, 'def']],
-    #             header=['col1', 'col2', 'col3', 'col4'])
-
-    #     **Designated Timestamp Column**
-
-    #     QuestDB supports a special `designated timestamp
-    #     <https://questdb.io/docs/concept/designated-timestamp/>`_ column that it
-    #     uses to sort the rows by timestamp.
-
-    #     If the data section contains the same number of columns as the header,
-    #     then the designated is going to be
-    #     assigned by the server, unless specified for all columns the `at`
-    #     argument as either an integer wrapped in a ``TimestampNanos`` object
-    #     representing nanoseconds since unix epoch (1970-01-01 00:00:00 UTC) or
-    #     as a ``datetime.datetime`` object.
-
-    #     .. code-block:: python
-
-    #         buffer.tabular(
-    #             'table_name',
-    #             [[True, None, 3.14, 'xyz'],
-    #              [False, 123, 6.28, 'abc'],
-    #              [True, 456, 9.87, 'def']],
-    #             header=['col1', 'col2', 'col3', 'col4'],
-    #             at=datetime.datetime.utcnow())
-
-    #             # or ...
-    #             # at=TimestampNanos(1657386397157631000))
-
-    #     If the rows need different `designated timestamp
-    #     <https://questdb.io/docs/concept/designated-timestamp/>`_ values across
-    #     different rows, you can provide them as an additional unlabeled column.
-    #     An unlabled column is one that has its name set to ``None``.
-
-    #     .. code-block:: python
-
-    #         ts1 = datetime.datetime.utcnow()
-    #         ts2 = (
-    #             datetime.datetime.utcnow() +
-    #             datetime.timedelta(microseconds=1))
-    #         buffer.tabular(
-    #             'table_name',
-    #             [[True, 123, ts1],
-    #              [False, 456, ts2]],
-    #             header=['col1', 'col2', None])
-
-    #     Like the ``at`` argument, the designated timestamp column may also be
-    #     specified as ``TimestampNanos`` objects.
-
-    #     .. code-block:: python
-
-    #         buffer.tabular(
-    #             'table_name',
-    #             [[True, 123, TimestampNanos(1657386397157630000)],
-    #              [False, 456, TimestampNanos(1657386397157631000)]],
-    #             header=['col1', 'col2', None])
-
-    #     The designated timestamp column may appear anywhere positionally.
-
-    #     .. code-block:: python
-
-    #         ts1 = datetime.datetime.utcnow()
-    #         ts2 = (
-    #             datetime.datetime.utcnow() +
-    #             datetime.timedelta(microseconds=1))
-    #         buffer.tabular(
-    #             'table_name',
-    #             [[1000, ts1, 123],
-    #              [2000, ts2, 456]],
-    #             header=['col1', None, 'col2'])
-
-    #     **Other timestamp columns**
-
-    #     Other columns may also contain timestamps. These columns can take
-    #     ``datetime.datetime`` objects or ``TimestampMicros`` (*not nanos*)
-    #     objects.
-
-    #     .. code-block:: python
-
-    #         ts1 = datetime.datetime.utcnow()
-    #         ts2 = (
-    #             datetime.datetime.utcnow() +
-    #             datetime.timedelta(microseconds=1))
-    #         buffer.tabular(
-    #             'table_name',
-    #             [[1000, ts1, 123],
-    #              [2000, ts2, 456]],
-    #             header=['col1', 'col2', 'col3'],
-    #             at=datetime.datetime.utcnow())
-
-    #     **Symbol Columns**
-
-    #     QuestDB can represent strings via the ``STRING`` or ``SYMBOL`` types.
-
-    #     If all the columns of type ``str`` are to be treated as ``STRING``, then
-    #     specify ``symbols=False`` (default - see exaples above).
-
-    #     If all need to be treated as ``SYMBOL`` specify ``symbols=True``.
-
-    #     .. code-block:: python
-
-    #         buffer.tabular(
-    #             'table_name',
-    #             [['abc', 123, 3.14, 'xyz'],
-    #              ['def', 456, None, 'abc'],
-    #              ['ghi', 789, 9.87, 'def']],
-    #             header=['col1', 'col2', 'col3', 'col4'],
-    #             symbols=True)  # `col1` and `col4` are SYMBOL columns.
-
-    #    Whilst if only a select few are to be treated as ``SYMBOL``, specify a
-    #    list of column indices to the ``symbols`` arg.
-
-    #    .. code-block:: python
-
-    #        buffer.tabular(
-    #            'table_name',
-    #            [['abc', 123, 3.14, 'xyz'],
-    #             ['def', 456, 6.28, 'abc'],
-    #             ['ghi', 789, 9.87, 'def']],
-    #            header=['col1', 'col2', 'col3', 'col4'],
-    #            symbols=[0])  # `col1` is SYMBOL; 'col4' is STRING.
-
-    #    Alternatively, you can specify a list of symbol column names.
-
-    #    .. code-block:: python
-
-    #        buffer.tabular(
-    #            'table_name',
-    #            [['abc', 123, 3.14, 'xyz'],
-    #             ['def', 456, 6.28, 'abc'],
-    #             ['ghi', 789, 9.87, 'def']],
-    #            header=['col1', 'col2', 'col3', 'col4'],
-    #            symbols=['col1'])  # `col1` is SYMBOL; 'col4' is STRING.
-
-    #     Note that column indices are 0-based and negative indices are counted
-    #     from the end.
-    #     """
-    #     raise ValueError('nyi')
-
-    # def pandas(
-    #         self,
-    #         table_name: str,
-    #         data: pd.DataFrame,
-    #         *,
-    #         symbols: Union[bool, List[int]]=False,
-    #         at: Union[None, TimestampNanos, datetime]=None):
-    #     """
-    #     Add a pandas DataFrame to the buffer.
-    #     """
-    #     raise ValueError('nyi')
+    def dataframe(
+            self,
+            df,  # : pd.DataFrame
+            *,
+            table_name: Optional[str] = None,
+            table_name_col: Union[None, int, str] = None,
+            symbols: Union[str, bool, List[int], List[str]] = 'auto',
+            at: Union[None, int, str, TimestampNanos, datetime] = None):
+        """
+        Add a pandas DataFrame to the buffer.
+
+        Also see the :func:`Sender.dataframe` method if you're
+        not using the buffer explicitly. It supports the same parameters
+        and also supports auto-flushing.
+
+        This feature requires the ``pandas``, ``numpy`` and ``pyarrow``
+        package to be installed.
+
+        :param df: The pandas DataFrame to serialize to the buffer.
+        :type df: pandas.DataFrame
+
+        :param table_name: The name of the table to which the rows belong.
+
+            If ``None``, the table name is taken from the ``table_name_col``
+            parameter. If both ``table_name`` and ``table_name_col`` are
+            ``None``, the table name is taken from the DataFrame's index
+            name (``df.index.name`` attribute).
+        :type table_name: str or None
+
+        :param table_name_col: The name or index of the column in the DataFrame
+            that contains the table name.
+            
+            If ``None``, the table name is taken
+            from the ``table_name`` parameter. If both ``table_name`` and
+            ``table_name_col`` are ``None``, the table name is taken from the
+            DataFrame's index name (``df.index.name`` attribute).
+
+            If ``table_name_col`` is an integer, it is interpreted as the index
+            of the column starting from ``0``. The index of the column can be
+            negative, in which case it is interpreted as an offset from the end
+            of the DataFrame. E.g. ``-1`` is the last column.
+        :type table_name_col: str or int or None
+
+        :param symbols: The columns to be serialized as symbols.
+        
+            If ``'auto'`` (default), all columns of dtype ``'categorical'`` are
+            serialized as symbols. If ``True``, all ``str`` columns are
+            serialized as symbols. If ``False``, no columns are serialized as
+            symbols.
+            
+            The list of symbols can also be specified explicitly as a ``list``
+            of column names (``str``) or indices (``int``). Integer indices
+            start at ``0`` and can be negative, offset from the end of the
+            DataFrame. E.g. ``-1`` is the last column.
+
+            Only columns containing strings can be serialized as symbols.
+
+        :type symbols: str or bool or list of str or list of int
+
+        :param at: The designated timestamp of the rows.
+        
+            You can specify a single value for all rows or column name or index.
+            If ``None``, timestamp is assigned by the server for all rows.
+            To pass in a timestamp explicity as an integer use the
+            ``TimestampNanos`` wrapper type. To get the current timestamp,
+            use ``TimestampNanos.now()``.
+            When passing a ``datetime.datetime`` object, the timestamp is
+            converted to nanoseconds.
+            A ``datetime`` object is assumed to be in the local timezone unless
+            one is specified explicitly (so call
+            ``datetime.datetime.now(tz=datetime.timezone.utc)`` instead
+            of ``datetime.datetime.utcnow()`` for the current timestamp to
+            avoid bugs).
+
+            To specify a different timestamp for each row, pass in a column name
+            (``str``) or index (``int``, 0-based index, negative index
+            supported): In this case, the column needs to be of dtype
+            ``datetime64[ns]`` (assumed to be in the **UTC timezone** and not
+            local, due to differences in Pandas and Python datetime handling) or
+            ``datetime64[ns, tz]``. When a timezone is specified in the column,
+            it is converted to UTC automatically.
+
+            A timestamp column can also contain ``None`` values. The server will
+            assign the current timestamp to those rows.
+
+            **Note**: All timestamps are always converted to nanoseconds and in
+            the UTC timezone. Timezone information is dropped before sending and
+            QuestDB will not store any timezone information.
+        :type at: TimestampNanos, datetime.datetime, int or str or None
+
+        **Note**: It is an error to specify both ``table_name`` and
+        ``table_name_col``.
+
+        **Note**: The "index" column of the DataFrame is never serialized,
+        even if it is named.
+
+        Example:
+
+        .. code-block:: python
+
+            import pandas as pd
+            import questdb.ingress as qi
+
+            buf = qi.Buffer()
+            # ...
+
+            df = pd.DataFrame({
+                'location': ['London', 'Managua', 'London'],
+                'temperature': [24.5, 35.0, 25.5],
+                'humidity': [0.5, 0.6, 0.45],
+                'ts': pd.date_range('2021-07-01', periods=3)})
+            buf.dataframe(
+                df, table_name='weather', at='ts', symbols=['location'])
+
+            # ...
+            sender.flush(buf)
+
+        **Pandas to ILP datatype mappings**
+
+        .. seealso:: https://questdb.io/docs/reference/api/ilp/columnset-types/
+
+        .. list-table:: Pandas Mappings
+            :header-rows: 1
+
+            * - Pandas ``dtype``
+              - Nulls
+              - ILP Datatype
+            * - ``'bool'``
+              - N
+              - ``BOOLEAN``
+            * - ``'boolean'``
+              - N **α**
+              - ``BOOLEAN``
+            * - ``'object'`` (``bool`` objects)
+              - N **α**
+              - ``BOOLEAN``
+            * - ``'uint8'``
+              - N
+              - ``INTEGER``
+            * - ``'int8'``
+              - N
+              - ``INTEGER``
+            * - ``'uint16'``
+              - N
+              - ``INTEGER``
+            * - ``'int16'``
+              - N
+              - ``INTEGER``
+            * - ``'uint32'``
+              - N
+              - ``INTEGER``
+            * - ``'int32'``
+              - N
+              - ``INTEGER``
+            * - ``'uint64'``
+              - N
+              - ``INTEGER`` **β**
+            * - ``'int64'``
+              - N
+              - ``INTEGER``
+            * - ``'UInt8'``
+              - Y
+              - ``INTEGER``
+            * - ``'Int8'``
+              - Y
+              - ``INTEGER``
+            * - ``'UInt16'``
+              - Y
+              - ``INTEGER``
+            * - ``'Int16'``
+              - Y
+              - ``INTEGER``
+            * - ``'UInt32'``
+              - Y
+              - ``INTEGER``
+            * - ``'Int32'``
+              - Y
+              - ``INTEGER``
+            * - ``'UInt64'``
+              - Y
+              - ``INTEGER`` **β**
+            * - ``'Int64'``
+              - Y
+              - ``INTEGER``
+            * - ``'object'`` (``int`` objects)
+              - Y
+              - ``INTEGER`` **β**
+            * - ``'float32'`` **γ**
+              - Y (``NaN``)
+              - ``FLOAT``
+            * - ``'float64'``
+              - Y (``NaN``)
+              - ``FLOAT``
+            * - ``'object'`` (``float`` objects)
+              - Y (``NaN``)
+              - ``FLOAT``
+            * - ``'string'`` (``str`` objects)
+              - Y
+              - ``STRING`` (default), ``SYMBOL`` via ``symbols`` arg. **δ**
+            * - ``'string[pyarrow]'``
+              - Y
+              - ``STRING`` (default), ``SYMBOL`` via ``symbols`` arg. **δ**
+            * - ``'category'`` (``str`` objects) **ε**
+              - Y
+              - ``SYMBOL`` (default), ``STRING`` via ``symbols`` arg. **δ**
+            * - ``'object'`` (``str`` objects)
+              - Y
+              - ``STRING`` (default), ``SYMBOL`` via ``symbols`` arg. **δ**
+            * - ``'datetime64[ns]'``
+              - Y
+              - ``TIMESTAMP`` **ζ**
+            * - ``'datetime64[ns, tz]'``
+              - Y
+              - ``TIMESTAMP`` **ζ**
+
+        .. note::
+
+            * **α**: Note some pandas dtypes allow nulls (e.g. ``'boolean'``),
+              where the QuestDB database does not.
+
+            * **β**: The valid range for integer values is -2^63 to 2^63-1.
+              Any ``'uint64'``, ``'UInt64'`` or python ``int`` object values
+              outside this range will raise an error during serialization.
+
+            * **γ**: Upcast to 64-bit float during serialization.
+
+            * **δ**: Columns containing strings can also be used to specify the
+              table name. See ``table_name_col``.
+
+            * **ε**: We only support categories containing strings. If the
+              category contains non-string values, an error will be raised.
+
+            * **ζ**: The '.dataframe()' method only supports datetimes with
+              nanosecond precision. The designated timestamp column (see ``at``
+              parameter) maintains the nanosecond precision, whilst values
+              stored as columns have their precision truncated to microseconds.
+              All dates are sent as UTC and any additional timezone information
+              is dropped. If no timezone is specified, we follow
+              the pandas convention of assuming the timezone is UTC.
+              Datetimes before 1970-01-01 00:00:00 UTC are not supported.
+              If a datetime value is specified as ``None`` (``NaT``), it is
+              interpreted as the current QuestDB server time set on receipt of
+              message.
+
+        **Error Handling and Recovery**
+
+        In case an exception is raised during dataframe serialization, the
+        buffer is left in its previous state.
+        The buffer remains in a valid state and can be used for further calls
+        even after an error.
+
+        For clarification, as an example, if an invalid ``None``
+        value appears at the 3rd row for a ``bool`` column, neither the 3rd nor
+        the preceding rows are added to the buffer.
+
+        **Note**: This differs from the :func:`Sender.dataframe` method, which
+        modifies this guarantee due to its ``auto_flush`` logic.
+
+        **Performance Considerations**
+
+        The Python GIL is released during serialization if it is not needed.
+        If any column requires the GIL, the entire serialization is done whilst
+        holding the GIL.
+
+        Column types that require the GIL are:
+
+        * Columns of ``str``, ``float`` or ``int`` or ``float`` Python objects.
+        * The ``'string[python]'`` dtype.
+        """
+        _dataframe(
+            auto_flush_blank(),
+            self._impl,
+            self._b,
+            df,
+            table_name,
+            table_name_col,
+            symbols,
+            at)
 
 
 _FLUSH_FMT = ('{} - See https://py-questdb-client.readthedocs.io/en/'
@@ -1100,15 +1396,12 @@ cdef class Sender:
         cdef line_sender_error* err = NULL
 
         cdef line_sender_utf8 host_utf8
-        cdef bytes host_owner
 
         cdef str port_str
         cdef line_sender_utf8 port_utf8
-        cdef bytes port_owner
 
         cdef str interface_str
         cdef line_sender_utf8 interface_utf8
-        cdef bytes interface_owner
 
         cdef str a_key_id
         cdef bytes a_key_id_owner
@@ -1126,27 +1419,36 @@ cdef class Sender:
         cdef bytes a_pub_key_y_owner
         cdef line_sender_utf8 a_pub_key_y_utf8
 
-        cdef bytes ca_owner
         cdef line_sender_utf8 ca_utf8
 
+        cdef qdb_pystr_buf* b
+
         self._opts = NULL
         self._impl = NULL
-        self._buffer = None
 
-        if PyInt_Check(port):
+        self._init_capacity = init_capacity
+        self._max_name_len = max_name_len
+
+        self._buffer = Buffer(
+            init_capacity=init_capacity,
+            max_name_len=max_name_len)
+
+        b = self._buffer._b
+
+        if PyLong_CheckExact(<PyObject*>port):
             port_str = str(port)
-        elif PyUnicode_Check(port):
+        elif PyUnicode_CheckExact(<PyObject*>port):
             port_str = port
         else:
             raise TypeError(
-                f'port must be an integer or a string, not {type(port)}')
+                f'port must be an int or a str, not {_fqn(type(port))}')
 
-        host_owner = str_to_utf8(host, &host_utf8)
-        port_owner = str_to_utf8(port_str, &port_utf8)
+        str_to_utf8(b, <PyObject*>host, &host_utf8)
+        str_to_utf8(b, <PyObject*>port_str, &port_utf8)
         self._opts = line_sender_opts_new_service(host_utf8, port_utf8)
 
         if interface is not None:
-            interface_owner = str_to_utf8(interface, &interface_utf8)
+            str_to_utf8(b, <PyObject*>interface, &interface_utf8)
             line_sender_opts_net_interface(self._opts, interface_utf8)
 
         if auth is not None:
@@ -1154,10 +1456,10 @@ cdef class Sender:
              a_priv_key,
              a_pub_key_x,
              a_pub_key_y) = auth
-            a_key_id_owner = str_to_utf8(a_key_id, &a_key_id_utf8)
-            a_priv_key_owner = str_to_utf8(a_priv_key, &a_priv_key_utf8)
-            a_pub_key_x_owner = str_to_utf8(a_pub_key_x, &a_pub_key_x_utf8)
-            a_pub_key_y_owner = str_to_utf8(a_pub_key_y, &a_pub_key_y_utf8)
+            str_to_utf8(b, <PyObject*>a_key_id, &a_key_id_utf8)
+            str_to_utf8(b, <PyObject*>a_priv_key, &a_priv_key_utf8)
+            str_to_utf8(b, <PyObject*>a_pub_key_x, &a_pub_key_x_utf8)
+            str_to_utf8(b, <PyObject*>a_pub_key_y, &a_pub_key_y_utf8)
             line_sender_opts_auth(
                 self._opts,
                 a_key_id_utf8,
@@ -1172,27 +1474,20 @@ cdef class Sender:
                 if tls == 'insecure_skip_verify':
                     line_sender_opts_tls_insecure_skip_verify(self._opts)
                 else:
-                    ca_owner = str_to_utf8(tls, &ca_utf8)
+                    str_to_utf8(b, <PyObject*>tls, &ca_utf8)
                     line_sender_opts_tls_ca(self._opts, ca_utf8)
             elif isinstance(tls, pathlib.Path):
                 tls = str(tls)
-                ca_owner = str_to_utf8(tls, &ca_utf8)
+                str_to_utf8(b, <PyObject*>tls, &ca_utf8)
                 line_sender_opts_tls_ca(self._opts, ca_utf8)
             else:
                 raise TypeError(
                     'tls must be a bool, a path or string pointing to CA file '
-                    f'or "insecure_skip_verify", not {type(tls)}')
+                    f'or "insecure_skip_verify", not {_fqn(type(tls))}')
 
         if read_timeout is not None:
             line_sender_opts_read_timeout(self._opts, read_timeout)
 
-        self._init_capacity = init_capacity
-        self._max_name_len = max_name_len
-
-        self._buffer = Buffer(
-            init_capacity=init_capacity,
-            max_name_len=max_name_len)
-
         self._auto_flush_enabled = not not auto_flush
         self._auto_flush_watermark = int(auto_flush) \
             if self._auto_flush_enabled else 0
@@ -1200,6 +1495,8 @@ cdef class Sender:
             raise ValueError(
                 'auto_flush_watermark must be >= 0, '
                 f'not {self._auto_flush_watermark}')
+        
+        qdb_pystr_buf_clear(b)
 
     def new_buffer(self):
         """
@@ -1288,6 +1585,68 @@ cdef class Sender:
         """
         self._buffer.row(table_name, symbols=symbols, columns=columns, at=at)
 
+    def dataframe(
+            self,
+            df,  # : pd.DataFrame
+            *,
+            table_name: Optional[str] = None,
+            table_name_col: Union[None, int, str] = None,
+            symbols: Union[str, bool, List[int], List[str]] = 'auto',
+            at: Union[None, int, str, TimestampNanos, datetime] = None):
+        """
+        Write a Pandas DataFrame to the internal buffer.
+
+        Example:
+
+        .. code-block:: python
+
+            import pandas as pd
+            import questdb.ingress as qi
+
+            df = pd.DataFrame({
+                'car': pd.Categorical(['Nic 42', 'Eddi', 'Nic 42', 'Eddi']),
+                'position': [1, 2, 1, 2],
+                'speed': [89.3, 98.2, 3, 4],
+                'lat_gforce': [0.1, -0.2, -0.6, 0.4],
+                'accelleration': [0.1, -0.2, 0.6, 4.4],
+                'tyre_pressure': [2.6, 2.5, 2.6, 2.5],
+                'ts': [
+                    pd.Timestamp('2022-08-09 13:56:00'),
+                    pd.Timestamp('2022-08-09 13:56:01'),
+                    pd.Timestamp('2022-08-09 13:56:02'),
+                    pd.Timestamp('2022-08-09 13:56:03')]})
+
+            with qi.Sender('localhost', 9000) as sender:
+                sender.dataframe(df, table_name='race_metrics', at='ts')
+
+        This method builds on top of the :func:`Buffer.dataframe` method.
+        See its documentation for details on arguments.
+
+        Additionally, this method also supports auto-flushing the buffer
+        as specified in the ``Sender``'s ``auto_flush`` constructor argument.
+        Auto-flushing is implemented incrementally, meanting that when
+        calling ``sender.dataframe(df)`` with a large ``df``, the sender may
+        have sent some of the rows to the server already whist the rest of the
+        rows are going to be sent at the next auto-flush or next explicit call
+        to :func:`Sender.flush`.
+
+        In case of data errors with auto-flushing enabled, some of the rows
+        may have been transmitted to the server already.
+        """
+        cdef auto_flush_t af = auto_flush_blank()
+        if self._auto_flush_enabled:
+            af.sender = self._impl
+            af.watermark = self._auto_flush_watermark
+        _dataframe(
+            af,
+            self._buffer._impl,
+            self._buffer._b,
+            df,
+            table_name,
+            table_name_col,
+            symbols,
+            at)
+
     cpdef flush(self, Buffer buffer=None, bint clear=True):
         """
         If called with no arguments, immediately flushes the internal buffer.
@@ -1307,13 +1666,19 @@ cdef class Sender:
             If ``False``, the flushed buffer is left in the internal buffer.
             Note that ``clear=False`` is only supported if ``buffer`` is also
             specified.
+
+        The Python GIL is released during the network IO operation.
         """
+        cdef line_sender* sender = self._impl
+        cdef line_sender_error* err = NULL
+        cdef line_sender_buffer* c_buf = NULL
+        cdef PyThreadState* gs = NULL  # GIL state. NULL means we have the GIL.
+        cdef bint ok = False
+
         if buffer is None and not clear:
             raise ValueError('The internal buffer must always be cleared.')
 
-        cdef line_sender_error* err = NULL
-        cdef line_sender_buffer* c_buf = NULL
-        if self._impl == NULL:
+        if sender == NULL:
             raise IngressError(
                 IngressErrorCode.InvalidApiCall,
                 'flush() can\'t be called: Not connected.')
@@ -1324,20 +1689,21 @@ cdef class Sender:
         if line_sender_buffer_size(c_buf) == 0:
             return
 
-        try:
-            if clear:
-                if not line_sender_flush(self._impl, c_buf, &err):
-                    raise c_err_to_py_fmt(err, _FLUSH_FMT)
-            else:
-                if not line_sender_flush_and_keep(self._impl, c_buf, &err):
-                    raise c_err_to_py_fmt(err, _FLUSH_FMT)
-        except:
-            # Prevent a follow-up call to `.close(flush=True)` (as is usually
-            # called from `__exit__`) to raise after the sender entered an error
-            # state following a failed call to `.flush()`.
+        # We might be blocking on IO, so temporarily release the GIL.
+        _ensure_doesnt_have_gil(&gs)
+        if clear:
+            ok = line_sender_flush(sender, c_buf, &err)
+        else:
+            ok = line_sender_flush_and_keep(sender, c_buf, &err)
+        _ensure_has_gil(&gs)
+        if not ok:
             if c_buf == self._buffer._impl:
+                # Prevent a follow-up call to `.close(flush=True)` (as is
+                # usually called from `__exit__`) to raise after the sender
+                # entered an error state following a failed call to `.flush()`.
+                # Note: In this case `clear` is always `True`.
                 line_sender_buffer_clear(c_buf)
-            raise
+            raise c_err_to_py_fmt(err, _FLUSH_FMT)
 
     cdef _close(self):
         self._buffer = None
diff --git a/src/questdb/ingress_helper.inc b/src/questdb/ingress_helper.inc
new file mode 100644
index 00000000..b726a913
--- /dev/null
+++ b/src/questdb/ingress_helper.inc
@@ -0,0 +1,14 @@
+#pragma once
+// This file is included into `ingress.c`.
+
+
+// Cython idiosyncrasy workaround.
+// If we do this in Cython it treats `buf.obj` as
+// a ref-counted `object` instead of a `PyObject*`,
+// so we can't check it for NULL.
+// Since `Py_buffer` is a Cython built-in we can't actually
+// just re-define it in `extra_cpython.pxd`.
+static int Py_buffer_obj_is_set(Py_buffer* buf)
+{
+    return buf->obj != NULL;
+}
diff --git a/src/questdb/ingress_helper.pxd b/src/questdb/ingress_helper.pxd
new file mode 100644
index 00000000..9d3651d5
--- /dev/null
+++ b/src/questdb/ingress_helper.pxd
@@ -0,0 +1,2 @@
+cdef extern from "ingress_helper.inc":
+    bint Py_buffer_obj_is_set(Py_buffer* buf)
\ No newline at end of file
diff --git a/src/questdb/pystr_to_utf8.pxd b/src/questdb/pystr_to_utf8.pxd
new file mode 100644
index 00000000..1822e26c
--- /dev/null
+++ b/src/questdb/pystr_to_utf8.pxd
@@ -0,0 +1,60 @@
+from libc.stdint cimport int8_t, int16_t, int32_t, int64_t, intptr_t
+from libc.stdint cimport uint8_t, uint16_t, uint32_t, uint64_t, uintptr_t
+
+cdef extern from "pystr_to_utf8.h":
+
+  cdef struct qdb_pystr_buf:
+    pass
+
+  cdef struct qdb_pystr_pos:
+    size_t chain
+    size_t string
+
+  # Prepare a new buffer. The buffer must be freed with `qdb_pystr_free`.
+  # The `qdb_ucsX_to_utf8` functions will write to this buffer.
+  qdb_pystr_buf *qdb_pystr_buf_new()
+
+  # Get current position. Use in conjunction with `truncate`.
+  qdb_pystr_pos qdb_pystr_buf_tell(const qdb_pystr_buf *b)
+
+  # Trim the buffer to the given position. Use in conjunction with `tell`.
+  void qdb_pystr_buf_truncate(qdb_pystr_buf *b, qdb_pystr_pos pos)
+
+  # Reset the converter's buffer to zero length.
+  void qdb_pystr_buf_clear(qdb_pystr_buf *b)
+
+  # Free the buffer. Must be called after `qdb_pystr_buf_new`.
+  void qdb_pystr_buf_free(qdb_pystr_buf *b)
+
+  # Convert a Py_UCS1 string to UTF-8.
+  # Returns a `buf_out` borrowed ptr of `size_out` len.
+  # The buffer is borrowed from `b`.
+  void qdb_ucs1_to_utf8(qdb_pystr_buf *b,
+                        size_t count,
+                        const uint8_t *input,
+                        size_t *size_out,
+                        const char **buf_out)
+
+  # Convert a Py_UCS2 string to UTF-8.
+  # Returns a `buf_out` borrowed ptr of `size_out` len.
+  # The buffer is borrowed from `b`.
+  # In case of errors, returns `false` and bad_codepoint_out is set to the
+  # offending codepoint.
+  bint qdb_ucs2_to_utf8(qdb_pystr_buf *b,
+                        size_t count,
+                        const uint16_t *input,
+                        size_t *size_out,
+                        const char **buf_out,
+                        uint32_t *bad_codepoint_out)
+
+  # Convert a Py_UCS4 string to UTF-8.
+  # Returns a `buf_out` borrowed ptr of `size_out` len.
+  # The buffer is borrowed from `b`.
+  # In case of errors, returns `false` and bad_codepoint_out is set to the
+  # offending codepoint.
+  bint qdb_ucs4_to_utf8(qdb_pystr_buf *b,
+                        size_t count,
+                        const uint32_t *input,
+                        size_t *size_out,
+                        const char **buf_out,
+                        uint32_t *bad_codepoint_out)
diff --git a/test/benchmark.py b/test/benchmark.py
new file mode 100644
index 00000000..fa62b514
--- /dev/null
+++ b/test/benchmark.py
@@ -0,0 +1,196 @@
+#!/usr/bin/env python3
+
+import sys
+import os
+sys.dont_write_bytecode = True
+import unittest
+import time
+import numpy as np
+import pandas as pd
+from concurrent.futures import ThreadPoolExecutor
+
+import patch_path
+import questdb.ingress as qi
+
+
+def _tp(buf, t0, t1):
+    tp = len(buf) / (t1 - t0) / 1024 / 1024
+    return f'{tp:.2f} MiB/s'
+
+
+class TestBenchmarkPandas(unittest.TestCase):
+    def test_pystr_i64_10m(self):
+        # This is a benchmark, not a test.
+        # It is useful to run it manually to check performance.
+        slist = [f's{i:09}' for i in range(10_000_000)]
+        df = pd.DataFrame({
+            'a': slist,
+            'b': list(range(len(slist)))})
+
+        buf = qi.Buffer()
+
+        # Warm up and pre-size buffer
+        buf.dataframe(df, table_name='tbl1', symbols=True)
+        buf.clear()
+
+        # Run
+        t0 = time.monotonic()
+        buf.dataframe(df, table_name='tbl1', symbols=True)
+        t1 = time.monotonic()
+        print(f'Time: {t1 - t0}, size: {len(buf)}, tp: {_tp(buf, t0, t1)}')
+
+    def test_mixed_10m(self):
+        # This is a benchmark, not a test.
+        # It is useful to run it manually to check performance.
+        count = 10_000_000
+        slist = [f's{i:09}' for i in range(count)]
+        df = pd.DataFrame({
+            'col1': pd.Series(slist, dtype='string[pyarrow]'),
+            'col2': list(range(len(slist))),
+            'col3': [float(i / 2) for i in range(len(slist))],
+            'col4': [float(i / 2) + 1.0 for i in range(len(slist))],
+            'col5': pd.Categorical(
+                ['a', 'b', 'c', 'a', None, 'c', 'a', float('nan')] *
+                (count // 8))})
+
+        buf = qi.Buffer()
+
+        # Warm up and pre-size buffer
+        buf.dataframe(df, table_name='tbl1', symbols=True)
+        buf.clear()
+
+        # Run
+        t0 = time.monotonic()
+        buf.dataframe(df, table_name='tbl1', symbols=True)
+        t1 = time.monotonic()
+        print(f'Time: {t1 - t0}, size: {len(buf)}, tp: {_tp(buf, t0, t1)}')
+
+    def test_string_escaping_10m(self):
+        count = 10_000_000
+        slist = [f's={i:09}==abc \\' for i in range(count)]
+        series = pd.Series(slist, dtype='string[pyarrow]')
+        df = pd.DataFrame({
+            'col1': series,
+            'col2': series,
+            'col3': series,
+            'col4': series,
+            'col5': series,
+            'col6': series})
+        
+        buf = qi.Buffer()
+
+        # Warm up and pre-size buffer
+        buf.dataframe(df, table_name='tbl1', symbols=True)
+        buf.clear()
+
+        # Run
+        t0 = time.monotonic()
+        buf.dataframe(df, table_name='tbl1', symbols=True)
+        t1 = time.monotonic()
+        print(f'Time: {t1 - t0}, size: {len(buf)}, tp: {_tp(buf, t0, t1)}')
+
+    def test_string_encoding_10m(self):
+        count = 10_000_000
+        strs = ['a',                     # ASCII
+                'q❤️p',                   # Mixed ASCII and UCS-2
+                '❤️' * 12  ,              # UCS-2
+                'Questo è un qualcosa',  # Non-ASCII UCS-1
+                'щось',                  # UCS-2, 2 bytes for UTF-8.
+                '',                      # Empty string
+                '嚜꓂',                   # UCS-2, 3 bytes for UTF-8.
+                '𐀀a𐀀b𐀀💩🦞c𐀀d𐀀ef']      # UCS-4, 4 bytes for UTF-8.
+        slist = strs * (count // len(strs))
+        self.assertEqual(len(slist), count)
+
+        df = pd.DataFrame({
+            'col1': slist,
+            'col2': slist,
+            'col3': slist,
+            'col4': slist,
+            'col5': slist})
+
+        buf = qi.Buffer()
+
+        # Warm up and pre-size buffer
+        buf.dataframe(df, table_name='tbl1', symbols=False)
+        buf.clear()
+
+        # Run
+        t0 = time.monotonic()
+        buf.dataframe(df, table_name='tbl1', symbols=False)
+        t1 = time.monotonic()
+        print(f'Time: {t1 - t0}, size: {len(buf)}, tp: {_tp(buf, t0, t1)}')
+
+    def _test_gil_release_10m(self, threads):
+        count = 10_000_000
+        series = pd.Series(np.arange(count), dtype='int64')
+        df = pd.DataFrame({
+            'col1': series,
+            'col2': series,
+            'col3': series,
+            'col4': series,
+            'col5': series,
+            'col6': series})
+
+        tpe = ThreadPoolExecutor(max_workers=threads)
+        bufs = [qi.Buffer() for _ in range(threads)]
+
+        def benchmark_run(buf):
+            t0 = time.monotonic()
+            buf.dataframe(df, table_name='tbl1', symbols=True)
+            t1 = time.monotonic()
+            return buf, (t0, t1)
+
+        # Warm up and pre-size buffer
+        futs = [
+            tpe.submit(benchmark_run, buf)
+            for buf in bufs]
+        for fut in futs:
+            fut.result()  # Wait for completion
+        for buf in bufs:
+            buf.clear()
+
+        # Run
+        futs = [
+            tpe.submit(benchmark_run, buf)
+            for buf in bufs]
+        results = [
+            fut.result()
+            for fut in futs]
+        print(f'\nSize: {len(bufs[0])}')
+        total_time = 0
+        min_time = 2 ** 64 -1  # Bigger than any `time.monotonic()` value
+        max_time = 0
+        print('Per-thread times:')
+        for index, (_, (t0, t1)) in enumerate(results):
+            if t0 < min_time:
+                min_time = t0
+            if t1 > max_time:
+                max_time = t1
+            elapsed = t1 - t0
+            print(f'  [{index:02}]: Time: {elapsed}')
+            total_time += elapsed
+        avg_time = total_time / len(results)
+        print(f'Avg time: {avg_time}')
+        tp = (len(bufs[0]) * len(bufs)) / (max_time - min_time) / 1024 / 1024
+        print(f'Wall time: {max_time - min_time}, tp: {tp:.2f} MiB/s')
+
+    def test_gil_release_10m_1t(self):
+        self._test_gil_release_10m(1)
+
+    def test_gil_release_10m_10t(self):
+        self._test_gil_release_10m(10)
+
+    def test_gil_release_10m_16t(self):
+        self._test_gil_release_10m(16)
+
+    def test_gil_release_10m_32t(self):
+        self._test_gil_release_10m(32)
+
+
+if __name__ == '__main__':
+    if os.environ.get('TEST_QUESTDB_PROFILE') == '1':
+        import cProfile
+        cProfile.run('unittest.main()', sort='cumtime')
+    else:
+        unittest.main()
diff --git a/test/patch_path.py b/test/patch_path.py
index 8137bcd1..64868881 100644
--- a/test/patch_path.py
+++ b/test/patch_path.py
@@ -9,5 +9,8 @@
 import pathlib
 PROJ_ROOT = pathlib.Path(__file__).parent.parent
 
-if os.environ.get('TEST_QUESTDB_PATCH_PATH') == '1':
+def patch():
     sys.path.append(str(PROJ_ROOT / 'src'))
+
+if os.environ.get('TEST_QUESTDB_PATCH_PATH') == '1':
+    patch()
\ No newline at end of file
diff --git a/test/system_test.py b/test/system_test.py
index 900fff03..e0eb7d5a 100755
--- a/test/system_test.py
+++ b/test/system_test.py
@@ -12,6 +12,14 @@
 from fixture import QuestDbFixture, install_questdb, CA_PATH, AUTH
 
 
+try:
+    import pandas as pd
+    import numpy
+    import pyarrow
+except ImportError:
+    pd = None
+
+
 import questdb.ingress as qi
 
 
@@ -123,6 +131,44 @@ def test_auth_tls_ca(self):
     def test_auth_tls_ca_str(self):
         self._test_scenario(self.qdb_auth, AUTH, str(CA_PATH))
 
+    @unittest.skipIf(not pd, 'pandas not installed')
+    def test_basic_dataframe(self):
+        port = self.qdb_plain.line_tcp_port
+        pending = None
+        table_name = uuid.uuid4().hex
+        df = pd.DataFrame({
+            'col_a': [1, 2, 3],
+            'col_b': ['a', 'b', 'c'],
+            'col_c': [True, False, True],
+            'col_d': [1.5, 2.5, 3.5],
+            'col_e': pd.Categorical(['A', 'B', 'C']),
+            'col_f': [
+                numpy.datetime64('2021-01-01'),
+                numpy.datetime64('2021-01-02'),
+                numpy.datetime64('2021-01-03')]})
+        df.index.name = table_name
+        with qi.Sender('localhost', port) as sender:
+            sender.dataframe(df)
+            pending = str(sender)
+
+        resp = self.qdb_plain.retry_check_table(
+            table_name, min_rows=3, log_ctx=pending)
+        exp_columns = [
+            {'name': 'col_e', 'type': 'SYMBOL'},
+            {'name': 'col_a', 'type': 'LONG'},
+            {'name': 'col_b', 'type': 'STRING'},
+            {'name': 'col_c', 'type': 'BOOLEAN'},
+            {'name': 'col_d', 'type': 'DOUBLE'},
+            {'name': 'col_f', 'type': 'TIMESTAMP'},
+            {'name': 'timestamp', 'type': 'TIMESTAMP'}]
+        self.assertEqual(resp['columns'], exp_columns)
+
+        exp_dataset = [  # Comparison excludes timestamp column.
+            ['A', 1, 'a', True, 1.5, '2021-01-01T00:00:00.000000Z'],
+            ['B', 2, 'b', False, 2.5, '2021-01-02T00:00:00.000000Z'],
+            ['C', 3, 'c', True, 3.5, '2021-01-03T00:00:00.000000Z']]
+        scrubbed_dataset = [row[:-1] for row in resp['dataset']]
+        self.assertEqual(scrubbed_dataset, exp_dataset)
 
 if __name__ == '__main__':
     unittest.main()
\ No newline at end of file
diff --git a/test/test.py b/test/test.py
index 392839f0..136f91f1 100755
--- a/test/test.py
+++ b/test/test.py
@@ -10,11 +10,30 @@
 import patch_path
 from mock_server import Server
 
+
 import questdb.ingress as qi
 
 if os.environ.get('TEST_QUESTDB_INTEGRATION') == '1':
     from system_test import TestWithDatabase
 
+try:
+    import pandas as pd
+    import numpy
+    import pyarrow
+except ImportError:
+    pd = None
+
+
+if pd is not None:
+    from test_dataframe import TestPandas
+else:
+    class TestNoPandas(unittest.TestCase):
+        def test_no_pandas(self):
+            buf = qi.Buffer()
+            exp = 'Missing.*`pandas.*pyarrow`.*readthedocs.*installation.html.'
+            with self.assertRaisesRegex(ImportError, exp):
+                buf.dataframe(None)
+
 
 class TestBuffer(unittest.TestCase):
     def test_new(self):
@@ -106,8 +125,43 @@ def test_no_symbol_or_col_args(self):
 
     def test_unicode(self):
         buf = qi.Buffer()
-        buf.row('tbl1', symbols={'questdb1': '❤️'}, columns={'questdb2': '❤️'})
-        self.assertEqual(str(buf), 'tbl1,questdb1=❤️ questdb2="❤️"\n')
+        buf.row(
+            'tbl1',                            # ASCII
+            symbols={'questdb1': 'q❤️p'},       # Mixed ASCII and UCS-2
+            columns={'questdb2': '❤️' * 1200})  # Over the 1024 buffer prealloc.
+        buf.row(
+            'tbl1',
+            symbols={
+                'Questo è il nome di una colonna':  # Non-ASCII UCS-1
+                'Це символьне значення'},  # UCS-2, 2 bytes for UTF-8.
+            columns={
+                'questdb1': '',  # Empty string
+                'questdb2': '嚜꓂',  # UCS-2, 3 bytes for UTF-8.
+                'questdb3': '💩🦞'})  # UCS-4, 4 bytes for UTF-8.
+        self.assertEqual(str(buf),
+            f'tbl1,questdb1=q❤️p questdb2="{"❤️" * 1200}"\n' +
+            'tbl1,Questo\\ è\\ il\\ nome\\ di\\ una\\ colonna=' +
+            'Це\\ символьне\\ значення ' +
+            'questdb1="",questdb2="嚜꓂",questdb3="💩🦞"\n')
+
+        buf.clear()
+        buf.row('tbl1', symbols={'questdb1': 'q❤️p'})
+        self.assertEqual(str(buf), 'tbl1,questdb1=q❤️p\n')
+
+        # A bad char in Python.
+        with self.assertRaisesRegex(
+                qi.IngressError,
+                '.*codepoint 0xd800 in string .*'):
+            buf.row('tbl1', symbols={'questdb1': 'a\ud800'})
+
+        # Strong exception safety: no partial writes.
+        # Ensure we can continue using the buffer after an error.
+        buf.row('tbl1', symbols={'questdb1': 'another line of input'})
+        self.assertEqual(
+            str(buf),
+            'tbl1,questdb1=q❤️p\n' +
+            # Note: No partially written failed line here.
+            'tbl1,questdb1=another\\ line\\ of\\ input\n')
 
     def test_float(self):
         buf = qi.Buffer()
@@ -139,7 +193,6 @@ def test_int_range(self):
             buf.row('tbl1', columns={'num': -2**63-1})
 
 
-
 class TestSender(unittest.TestCase):
     def test_basic(self):
         with Server() as server, qi.Sender('localhost', server.port) as sender:
@@ -362,6 +415,55 @@ def test_dont_flush_on_exception(self):
             msgs = server.recv()
             self.assertEqual(msgs, [])
 
+    @unittest.skipIf(not pd, 'pandas not installed')
+    def test_dataframe(self):
+        with Server() as server:
+            with qi.Sender('localhost', server.port) as sender:
+                server.accept()
+                df = pd.DataFrame({'a': [1, 2], 'b': [3.0, 4.0]})
+                sender.dataframe(df, table_name='tbl1')
+            msgs = server.recv()
+            self.assertEqual(
+                msgs,
+                [b'tbl1 a=1i,b=3.0',
+                    b'tbl1 a=2i,b=4.0'])
+
+    @unittest.skipIf(not pd, 'pandas not installed')
+    def test_dataframe_auto_flush(self):
+        with Server() as server:
+            # An auto-flush size of 20 bytes is enough to auto-flush the first
+            # row, but not the second.
+            with qi.Sender('localhost', server.port, auto_flush=20) as sender:
+                server.accept()
+                df = pd.DataFrame({'a': [100000, 2], 'b': [3.0, 4.0]})
+                sender.dataframe(df, table_name='tbl1')
+                msgs = server.recv()
+                self.assertEqual(
+                    msgs,
+                    [b'tbl1 a=100000i,b=3.0'])
+
+                # The second row is still pending send.
+                self.assertEqual(len(sender), 16)
+
+                # So we give it some more data and we should see it flush.
+                sender.row('tbl1', columns={'a': 3, 'b': 5.0})
+                msgs = server.recv()
+                self.assertEqual(
+                    msgs,
+                    [b'tbl1 a=2i,b=4.0',
+                     b'tbl1 a=3i,b=5.0'])
+
+                self.assertEqual(len(sender), 0)
+
+                # We can now disconnect the server and see auto flush failing.
+                server.close()
+
+                exp_err = 'Could not flush buffer.* - See https'
+                with self.assertRaisesRegex(qi.IngressError, exp_err):
+                    for _ in range(1000):
+                        time.sleep(0.01)
+                        sender.dataframe(df.head(1), table_name='tbl1')
+
     def test_new_buffer(self):
         sender = qi.Sender(
             host='localhost',
@@ -393,5 +495,64 @@ def test_bad_init_args(self):
             qi.Sender(host='localhost', port=9009, max_name_len=-1)
 
 
+class TestBases:
+    class Timestamp(unittest.TestCase):
+        def test_from_int(self):
+            ns = 1670857929778202000
+            num = ns // self.ns_scale
+            ts = self.timestamp_cls(num)
+            self.assertEqual(ts.value, num)
+
+            ts0 = self.timestamp_cls(0)
+            self.assertEqual(ts0.value, 0)
+
+            with self.assertRaisesRegex(ValueError, 'value must be a positive'):
+                self.timestamp_cls(-1)
+
+        def test_from_datetime(self):
+            utc = datetime.timezone.utc
+
+            dt1 = datetime.datetime(2022, 1, 1, 12, 0, 0, 0, tzinfo=utc)
+            ts1 = self.timestamp_cls.from_datetime(dt1)
+            self.assertEqual(ts1.value, 1641038400000000000 // self.ns_scale)
+            self.assertEqual(
+                ts1.value,
+                int(dt1.timestamp() * 1000000000 // self.ns_scale))
+
+            dt2 = datetime.datetime(1970, 1, 1, tzinfo=utc)
+            ts2 = self.timestamp_cls.from_datetime(dt2)
+            self.assertEqual(ts2.value, 0)
+
+            with self.assertRaisesRegex(ValueError, 'value must be a positive'):
+                self.timestamp_cls.from_datetime(
+                    datetime.datetime(1969, 12, 31, tzinfo=utc))
+
+            dt_naive = datetime.datetime(2022, 1, 1, 12, 0, 0, 0,
+                tzinfo=utc).astimezone(None).replace(tzinfo=None)
+            ts3 = self.timestamp_cls.from_datetime(dt_naive)
+            self.assertEqual(ts3.value, 1641038400000000000 // self.ns_scale)
+
+        def test_now(self):
+            expected = time.time_ns() // self.ns_scale
+            actual = self.timestamp_cls.now().value
+            delta = abs(expected - actual)
+            one_sec = 1000000000 // self.ns_scale
+            self.assertLess(delta, one_sec)
+
+
+class TestTimestampMicros(TestBases.Timestamp):
+    timestamp_cls = qi.TimestampMicros
+    ns_scale = 1000
+
+
+class TestTimestampNanos(TestBases.Timestamp):
+    timestamp_cls = qi.TimestampNanos
+    ns_scale = 1
+
+
 if __name__ == '__main__':
-    unittest.main()
+    if os.environ.get('TEST_QUESTDB_PROFILE') == '1':
+        import cProfile
+        cProfile.run('unittest.main()', sort='cumtime')
+    else:
+        unittest.main()
diff --git a/test/test_dataframe.py b/test/test_dataframe.py
new file mode 100644
index 00000000..42e310c7
--- /dev/null
+++ b/test/test_dataframe.py
@@ -0,0 +1,1575 @@
+#!/usr/bin/env python3
+
+import sys
+import os
+sys.dont_write_bytecode = True
+import unittest
+import datetime as dt
+import functools
+import tempfile
+import pathlib
+
+BROKEN_TIMEZONES = True
+
+try:
+    import zoneinfo
+    _TZ = zoneinfo.ZoneInfo('America/New_York')
+    BROKEN_TIMEZONES = os.name == 'nt'
+except ImportError:
+    import pytz
+    _TZ = pytz.timezone('America/New_York')
+
+import patch_path
+
+import questdb.ingress as qi
+import pandas as pd
+import numpy as np
+import pyarrow as pa
+
+try:
+    import fastparquet
+except ImportError:
+    fastparquet = None
+
+
+def _dataframe(*args, **kwargs):
+    buf = qi.Buffer()
+    buf.dataframe(*args, **kwargs)
+    return str(buf)
+
+
+DF1 = pd.DataFrame({
+    'A': [1.0, 2.0, 3.0],
+    'B': [1, 2, 3],
+    'C': [
+        pd.Timestamp('20180310'),
+        pd.Timestamp('20180311'),
+        pd.Timestamp('20180312')],
+    'D': [True, 'foo', 'bar']})
+
+
+DF2 = pd.DataFrame({
+    'T': ['t1', 't2', 't1'],
+    'A': ['a1', 'a2', 'a3'],
+    'B': ['b1', None, 'b3'],
+    'C': pd.Series(['b1', None, 'b3'], dtype='string'),
+    'D': pd.Series(['a1', 'a2', 'a3'], dtype='string'),
+    'E': [1.0, 2.0, 3.0],
+    'F': [1, 2, 3],
+    'G': [
+        pd.Timestamp('20180310'),
+        pd.Timestamp('20180311'),
+        pd.Timestamp('20180312')]})
+
+
+def with_tmp_dir(func):
+    @functools.wraps(func)
+    def wrapper(self, *args, **kwargs):
+        with tempfile.TemporaryDirectory(prefix='py-questdb-client_') as tmpdir:
+            return func(self, *args, pathlib.Path(tmpdir), **kwargs)
+    return wrapper
+
+
+class TestPandas(unittest.TestCase):
+    def test_bad_dataframe(self):
+        with self.assertRaisesRegex(qi.IngressError,
+                'Expected pandas'):
+            _dataframe([])
+
+    def test_no_table_name(self):
+        with self.assertRaisesRegex(qi.IngressError,
+                'Must specify at least one of'):
+            _dataframe(DF1)
+
+    def test_bad_table_name_type(self):
+        with self.assertRaisesRegex(qi.IngressError, 'Must be str'):
+            _dataframe(DF1, table_name=1.5)
+
+    def test_invalid_table_name(self):
+        with self.assertRaisesRegex(qi.IngressError,
+                '`table_name`: Bad string "."'):
+            _dataframe(DF1, table_name='.')
+
+    def test_invalid_column_dtype(self):
+        with self.assertRaisesRegex(qi.IngressError,
+                '`table_name_col`: Bad dtype'):
+            _dataframe(DF1, table_name_col='B')
+        with self.assertRaisesRegex(qi.IngressError,
+                '`table_name_col`: Bad dtype'):
+            _dataframe(DF1, table_name_col=1)
+        with self.assertRaisesRegex(qi.IngressError,
+                '`table_name_col`: Bad dtype'):
+            _dataframe(DF1, table_name_col=-3)
+        with self.assertRaisesRegex(qi.IngressError,
+                '`table_name_col`: -5 index'):
+            _dataframe(DF1, table_name_col=-5)
+
+    def test_bad_str_obj_col(self):
+        with self.assertRaisesRegex(qi.IngressError,
+                "`table_name_col`: Bad.*`object`.*bool.*'D'.*Must.*strings"):
+            _dataframe(DF1, table_name_col='D')
+        with self.assertRaisesRegex(qi.IngressError,
+                "`table_name_col`: Bad.*`object`.*bool.*'D'.*Must.*strings"):
+            _dataframe(DF1, table_name_col=3)
+        with self.assertRaisesRegex(qi.IngressError,
+                "`table_name_col`: Bad.*`object`.*bool.*'D'.*Must.*strings"):
+            _dataframe(DF1, table_name_col=-1)
+
+    def test_bad_symbol(self):
+        with self.assertRaisesRegex(qi.IngressError,
+                '`symbols`.*bool.*tuple.*list'):
+            _dataframe(DF1, table_name='tbl1', symbols=0)
+        with self.assertRaisesRegex(qi.IngressError,
+                '`symbols`.*bool.*tuple.*list'):
+            _dataframe(DF1, table_name='tbl1', symbols={})
+        with self.assertRaisesRegex(qi.IngressError,
+                '`symbols`.*bool.*tuple.*list'):
+            _dataframe(DF1, table_name='tbl1', symbols=None)
+        with self.assertRaisesRegex(qi.IngressError,
+                "`symbols`: Bad dtype `float64`.*'A'.*Must.*strings col"):
+            _dataframe(DF1, table_name='tbl1', symbols=(0,))
+        with self.assertRaisesRegex(qi.IngressError,
+                "`symbols`: Bad dtype `int64`.*'B'.*Must be a strings column."):
+            _dataframe(DF1, table_name='tbl1', symbols=[1])
+
+    def test_bad_at(self):
+        with self.assertRaisesRegex(qi.IngressError,
+                '`at`.*2018.*not found in the'):
+            _dataframe(DF1, table_name='tbl1', at='2018-03-10T00:00:00Z')
+        with self.assertRaisesRegex(qi.IngressError,
+                '`at`.*float64.*be a datetime'):
+            _dataframe(DF1, table_name='tbl1', at='A')
+        with self.assertRaisesRegex(qi.IngressError,
+                '`at`.*int64.*be a datetime'):
+            _dataframe(DF1, table_name='tbl1', at=1)
+        with self.assertRaisesRegex(qi.IngressError,
+                '`at`.*object.*be a datetime'):
+            _dataframe(DF1, table_name='tbl1', at=-1)
+
+    def test_empty_dataframe(self):
+        buf = _dataframe(pd.DataFrame(), table_name='tbl1')
+        self.assertEqual(buf, '')
+
+    def test_zero_row_dataframe(self):
+        buf = _dataframe(pd.DataFrame(columns=['A', 'B']), table_name='tbl1')
+        self.assertEqual(buf, '')
+
+    def test_zero_column_dataframe(self):
+        df = pd.DataFrame(index=[0, 1, 2])
+        self.assertEqual(len(df), 3)
+        buf = _dataframe(df, table_name='tbl1')
+        self.assertEqual(buf, '')
+
+    def test_basic(self):
+        buf = _dataframe(
+            DF2,
+            table_name_col='T',
+            symbols=['A', 'B', 'C', 'D'],
+            at=-1)
+        self.assertEqual(
+            buf,
+            't1,A=a1,B=b1,C=b1,D=a1 E=1.0,F=1i 1520640000000000000\n' +
+            't2,A=a2,D=a2 E=2.0,F=2i 1520726400000000000\n' +
+            't1,A=a3,B=b3,C=b3,D=a3 E=3.0,F=3i 1520812800000000000\n')
+
+    def test_named_dataframe(self):
+        df = pd.DataFrame({
+            'a': [1, 2, 3],
+            'b': ['a', 'b', 'c']})
+        df.index.name = 'table_name'
+        buf = _dataframe(df)
+        self.assertEqual(
+            buf,
+            'table_name a=1i,b="a"\n' +
+            'table_name a=2i,b="b"\n' +
+            'table_name a=3i,b="c"\n')
+
+        buf = _dataframe(df, table_name='tbl1')
+        self.assertEqual(
+            buf,
+            'tbl1 a=1i,b="a"\n' +
+            'tbl1 a=2i,b="b"\n' +
+            'tbl1 a=3i,b="c"\n')
+
+        buf = _dataframe(df, table_name_col='b')
+        self.assertEqual(
+            buf,
+            'a a=1i\n' +
+            'b a=2i\n' +
+            'c a=3i\n')
+
+        df.index.name = 42  # bad type, not str
+        with self.assertRaisesRegex(qi.IngressError,
+                'Bad dataframe index name as table.*: Expected str, not.*int.'):
+            _dataframe(df)
+
+    @unittest.skipIf(BROKEN_TIMEZONES, 'requires accurate timezones')
+    def test_at_good(self):
+        df = pd.DataFrame({
+            'a': [1, 2, 3],
+            'b': ['a', 'b', 'c']})
+        df.index.name = 'test_at_good'
+        with self.assertRaisesRegex(qi.IngressError,
+                'Bad argument `at`: Column .2018-03.* not found .* dataframe.'):
+            _dataframe(df, at='2018-03-10T00:00:00Z')
+
+        # Same timestamp, specified in various ways.
+        t1_setup = dt.datetime(2018, 3, 10, 0, 0, 0, tzinfo=dt.timezone.utc)
+        t1 = t1_setup.astimezone(tz=None).replace(tzinfo=None)  # naive, local
+        t2 = dt.datetime(2018, 3, 10, 0, 0, 0, tzinfo=dt.timezone.utc)
+        t3 = dt.datetime(2018, 3, 9, 19, 0, 0, tzinfo=_TZ)
+        t4 = qi.TimestampNanos(1520640000000000000)
+        t5 = qi.TimestampNanos.from_datetime(t1)
+        t6 = qi.TimestampNanos.from_datetime(t2)
+        t7 = qi.TimestampNanos.from_datetime(t3)
+        timestamps = [t1, t2, t3, t4, t5, t6, t7]
+        for ts in timestamps:
+            buf = _dataframe(df, table_name='tbl1', at=ts)
+            self.assertEqual(
+                buf,
+                'tbl1 a=1i,b="a" 1520640000000000000\n' +
+                'tbl1 a=2i,b="b" 1520640000000000000\n' +
+                'tbl1 a=3i,b="c" 1520640000000000000\n')
+
+    @unittest.skipIf(BROKEN_TIMEZONES, 'requires accurate timezones')
+    def test_at_neg(self):
+        n1 = dt.datetime(1965, 1, 1, 0, 0, 0, tzinfo=dt.timezone.utc)
+        n2 = dt.datetime(1965, 1, 1, 0, 0, 0, tzinfo=_TZ)
+        n3 = dt.datetime(1965, 1, 1, 0, 0, 0)
+        neg_timestamps = [n1, n2, n3]
+        for ts in neg_timestamps:
+            with self.assertRaisesRegex(qi.IngressError,
+                    'Bad.*`at`: Cannot .* before the Unix epoch .1970-01-01.*'):
+                _dataframe(DF2, at=ts, table_name='test_at_neg')
+
+    @unittest.skipIf(BROKEN_TIMEZONES, 'requires accurate timezones')
+    def test_at_ts_0(self):
+        df = pd.DataFrame({
+            'a': [1, 2, 3],
+            'b': ['a', 'b', 'c']})
+        df.index.name = 'test_at_ts_0'
+
+        # Epoch 0, specified in various ways.
+        e1_setup = dt.datetime(1970, 1, 1, 0, 0, 0, tzinfo=dt.timezone.utc)
+        e1 = e1_setup.astimezone(tz=None).replace(tzinfo=None)  # naive, local
+        e2 = dt.datetime(1970, 1, 1, 0, 0, 0, tzinfo=dt.timezone.utc)
+        e3 = dt.datetime(1969, 12, 31, 19, 0, 0, tzinfo=_TZ)
+        e4 = qi.TimestampNanos(0)
+        e5 = qi.TimestampNanos.from_datetime(e1)
+        e6 = qi.TimestampNanos.from_datetime(e2)
+        e7 = qi.TimestampNanos.from_datetime(e3)
+        edge_timestamps = [e1, e2, e3, e4, e5, e6, e7]
+
+        for ts in edge_timestamps:
+            buf = _dataframe(df, table_name='tbl1', at=ts)
+            self.assertEqual(
+                buf,
+                'tbl1 a=1i,b="a" 0\n' +
+                'tbl1 a=2i,b="b" 0\n' +
+                'tbl1 a=3i,b="c" 0\n')
+
+    def test_single_at_col(self):
+        df = pd.DataFrame({'timestamp': pd.to_datetime(['2023-01-01'])})
+        with self.assertRaisesRegex(qi.IngressError,
+                'Bad dataframe row at index 0: All values are nulls.'):
+            _dataframe(df, table_name='tbl1', at='timestamp')
+
+    def test_row_of_nulls(self):
+        df = pd.DataFrame({'a': ['a1', None, 'a3']})
+        with self.assertRaisesRegex(
+                qi.IngressError, 'Bad dataframe row.*1: All values are nulls.'):
+            _dataframe(df, table_name='tbl1', symbols=['a'])
+
+    def test_u8_numpy_col(self):
+        df = pd.DataFrame({'a': pd.Series([
+                1, 2, 3,
+                0,
+                255],  # u8 max
+            dtype='uint8')})
+        buf = _dataframe(df, table_name='tbl1')
+        self.assertEqual(
+            buf,
+            'tbl1 a=1i\n' +
+            'tbl1 a=2i\n' +
+            'tbl1 a=3i\n' +
+            'tbl1 a=0i\n' +
+            'tbl1 a=255i\n')
+
+    def test_i8_numpy_col(self):
+        df = pd.DataFrame({'a': pd.Series([
+                1, 2, 3,
+                -128,  # i8 min
+                127,   # i8 max
+                0], dtype='int8')})
+        buf = _dataframe(df, table_name='tbl1')
+        self.assertEqual(
+            buf,
+            'tbl1 a=1i\n' +
+            'tbl1 a=2i\n' +
+            'tbl1 a=3i\n' +
+            'tbl1 a=-128i\n' +
+            'tbl1 a=127i\n' +
+            'tbl1 a=0i\n')
+
+    def test_u16_numpy_col(self):
+        df = pd.DataFrame({'a': pd.Series([
+                1, 2, 3,
+                0,
+                65535],  # u16 max
+            dtype='uint16')})
+        buf = _dataframe(df, table_name='tbl1')
+        self.assertEqual(
+            buf,
+            'tbl1 a=1i\n' +
+            'tbl1 a=2i\n' +
+            'tbl1 a=3i\n' +
+            'tbl1 a=0i\n' +
+            'tbl1 a=65535i\n')
+
+    def test_i16_numpy_col(self):
+        df = pd.DataFrame({'a': pd.Series([
+                1, 2, 3,
+                -32768,  # i16 min
+                32767,   # i16 max
+                0], dtype='int16')})
+        buf = _dataframe(df, table_name='tbl1')
+        self.assertEqual(
+            buf,
+            'tbl1 a=1i\n' +
+            'tbl1 a=2i\n' +
+            'tbl1 a=3i\n' +
+            'tbl1 a=-32768i\n' +
+            'tbl1 a=32767i\n' +
+            'tbl1 a=0i\n')
+
+    def test_u32_numpy_col(self):
+        df = pd.DataFrame({'a': pd.Series([
+                1, 2, 3,
+                0,
+                4294967295],  # u32 max
+            dtype='uint32')})
+        buf = _dataframe(df, table_name='tbl1')
+        self.assertEqual(
+            buf,
+            'tbl1 a=1i\n' +
+            'tbl1 a=2i\n' +
+            'tbl1 a=3i\n' +
+            'tbl1 a=0i\n' +
+            'tbl1 a=4294967295i\n')
+
+    def test_i32_numpy_col(self):
+        df = pd.DataFrame({'a': pd.Series([
+                1, 2, 3,
+                -2147483648,  # i32 min
+                0,
+                2147483647],  # i32 max
+            dtype='int32')})
+        buf = _dataframe(df, table_name='tbl1')
+        self.assertEqual(
+            buf,
+            'tbl1 a=1i\n' +
+            'tbl1 a=2i\n' +
+            'tbl1 a=3i\n' +
+            'tbl1 a=-2147483648i\n' +
+            'tbl1 a=0i\n' +
+            'tbl1 a=2147483647i\n')
+
+    def test_u64_numpy_col(self):
+        df = pd.DataFrame({'a': pd.Series([
+                1, 2, 3,
+                0,
+                9223372036854775807],  # i64 max
+            dtype='uint64')})
+        buf = _dataframe(df, table_name='tbl1')
+        self.assertEqual(
+            buf,
+            'tbl1 a=1i\n' +
+            'tbl1 a=2i\n' +
+            'tbl1 a=3i\n' +
+            'tbl1 a=0i\n' +
+            'tbl1 a=9223372036854775807i\n')
+
+        buf = qi.Buffer()
+        buf.dataframe(pd.DataFrame({'b': [.5, 1.0, 1.5]}), table_name='tbl2')
+        exp1 = (
+            'tbl2 b=0.5\n' +
+            'tbl2 b=1.0\n' +
+            'tbl2 b=1.5\n')
+        self.assertEqual(
+            str(buf),
+            exp1)
+        df2 = pd.DataFrame({'a': pd.Series([
+                1, 2, 3,
+                0,
+                9223372036854775808],  # i64 max + 1
+            dtype='uint64')})
+        with self.assertRaisesRegex(
+                qi.IngressError,
+                'serialize .* column .a. .* 4 .9223372036854775808.*int64'):
+            buf.dataframe(df2, table_name='tbl1')
+
+        self.assertEqual(
+            str(buf),
+            exp1)  # No partial write of `df2`.
+
+    def test_i64_numpy_col(self):
+        df = pd.DataFrame({'a': pd.Series([
+                1, 2, 3,
+                -9223372036854775808,  # i64 min
+                0,
+                9223372036854775807],  # i64 max
+            dtype='int64')})
+        buf = _dataframe(df, table_name='tbl1')
+        self.assertEqual(
+            buf,
+            'tbl1 a=1i\n' +
+            'tbl1 a=2i\n' +
+            'tbl1 a=3i\n' +
+            'tbl1 a=-9223372036854775808i\n' +
+            'tbl1 a=0i\n' +
+            'tbl1 a=9223372036854775807i\n')
+
+    def test_f32_numpy_col(self):
+        df = pd.DataFrame({'a': pd.Series([
+                1.0, 2.0, 3.0,
+                0.0,
+                float('inf'),
+                float('-inf'),
+                float('nan'),
+                3.4028234663852886e38],  # f32 max
+            dtype='float32')})
+        buf = _dataframe(df, table_name='tbl1')
+        self.assertEqual(
+            buf,
+            'tbl1 a=1.0\n' +
+            'tbl1 a=2.0\n' +
+            'tbl1 a=3.0\n' +
+            'tbl1 a=0.0\n' +
+            'tbl1 a=Infinity\n' +
+            'tbl1 a=-Infinity\n' +
+            'tbl1 a=NaN\n' +
+            'tbl1 a=3.4028234663852886e38\n')
+
+    def test_f64_numpy_col(self):
+        df = pd.DataFrame({'a': pd.Series([
+                1.0, 2.0, 3.0,
+                0.0,
+                float('inf'),
+                float('-inf'),
+                float('nan'),
+                1.7976931348623157e308],  # f64 max
+            dtype='float64')})
+        buf = _dataframe(df, table_name='tbl1')
+        self.assertEqual(
+            buf,
+            'tbl1 a=1.0\n' +
+            'tbl1 a=2.0\n' +
+            'tbl1 a=3.0\n' +
+            'tbl1 a=0.0\n' +
+            'tbl1 a=Infinity\n' +
+            'tbl1 a=-Infinity\n' +
+            'tbl1 a=NaN\n' +
+            'tbl1 a=1.7976931348623157e308\n')
+
+    def test_u8_arrow_col(self):
+        df = pd.DataFrame({
+            'a': pd.Series([
+                    1, 2, 3,
+                    0,
+                    None,
+                    255],  # u8 max
+                dtype=pd.UInt8Dtype()),
+            'b': ['a', 'b', 'c', 'd', 'e', 'f']})
+        buf = _dataframe(df, table_name='tbl1')
+        self.assertEqual(
+            buf,
+            'tbl1 a=1i,b="a"\n' +
+            'tbl1 a=2i,b="b"\n' +
+            'tbl1 a=3i,b="c"\n' +
+            'tbl1 a=0i,b="d"\n' +
+            'tbl1 b="e"\n' +
+            'tbl1 a=255i,b="f"\n')
+
+    def test_i8_arrow_col(self):
+        df = pd.DataFrame({
+            'a': pd.Series([
+                    1, 2, 3,
+                    -128,  # i8 min
+                    0,
+                    None,
+                    127],  # i8 max
+                dtype=pd.Int8Dtype()),
+            'b': ['a', 'b', 'c', 'd', 'e', 'f', 'g']})
+        buf = _dataframe(df, table_name='tbl1')
+        self.assertEqual(
+            buf,
+            'tbl1 a=1i,b="a"\n' +
+            'tbl1 a=2i,b="b"\n' +
+            'tbl1 a=3i,b="c"\n' +
+            'tbl1 a=-128i,b="d"\n' +
+            'tbl1 a=0i,b="e"\n' +
+            'tbl1 b="f"\n' +
+            'tbl1 a=127i,b="g"\n')
+
+    def test_u16_arrow_col(self):
+        df = pd.DataFrame({
+            'a': pd.Series([
+                    1, 2, 3,
+                    0,
+                    None,
+                    65535],  # u16 max
+                dtype=pd.UInt16Dtype()),
+            'b': ['a', 'b', 'c', 'd', 'e', 'f']})
+        buf = _dataframe(df, table_name='tbl1')
+        self.assertEqual(
+            buf,
+            'tbl1 a=1i,b="a"\n' +
+            'tbl1 a=2i,b="b"\n' +
+            'tbl1 a=3i,b="c"\n' +
+            'tbl1 a=0i,b="d"\n' +
+            'tbl1 b="e"\n' +
+            'tbl1 a=65535i,b="f"\n')
+
+    def test_i16_arrow_col(self):
+        df = pd.DataFrame({
+            'a': pd.Series([
+                    1, 2, 3,
+                    -32768,  # i16 min
+                    0,
+                    None,
+                    32767],  # i16 max
+                dtype=pd.Int16Dtype()),
+            'b': ['a', 'b', 'c', 'd', 'e', 'f', 'g']})
+        buf = _dataframe(df, table_name='tbl1')
+        self.assertEqual(
+            buf,
+            'tbl1 a=1i,b="a"\n' +
+            'tbl1 a=2i,b="b"\n' +
+            'tbl1 a=3i,b="c"\n' +
+            'tbl1 a=-32768i,b="d"\n' +
+            'tbl1 a=0i,b="e"\n' +
+            'tbl1 b="f"\n' +
+            'tbl1 a=32767i,b="g"\n')
+
+    def test_u32_arrow_col(self):
+        df = pd.DataFrame({
+            'a': pd.Series([
+                    1, 2, 3,
+                    0,
+                    None,
+                    4294967295],  # u32 max
+                dtype=pd.UInt32Dtype()),
+            'b': ['a', 'b', 'c', 'd', 'e', 'f']})
+        buf = _dataframe(df, table_name='tbl1')
+        self.assertEqual(
+            buf,
+            'tbl1 a=1i,b="a"\n' +
+            'tbl1 a=2i,b="b"\n' +
+            'tbl1 a=3i,b="c"\n' +
+            'tbl1 a=0i,b="d"\n' +
+            'tbl1 b="e"\n' +
+            'tbl1 a=4294967295i,b="f"\n')
+
+    def test_i32_arrow_col(self):
+        df = pd.DataFrame({
+            'a': pd.Series([
+                    1, 2, 3,
+                    -2147483648,  # i32 min
+                    0,
+                    None,
+                    2147483647],  # i32 max
+                dtype=pd.Int32Dtype()),
+            'b': ['a', 'b', 'c', 'd', 'e', 'f', 'g']})
+        buf = _dataframe(df, table_name='tbl1')
+        self.assertEqual(
+            buf,
+            'tbl1 a=1i,b="a"\n' +
+            'tbl1 a=2i,b="b"\n' +
+            'tbl1 a=3i,b="c"\n' +
+            'tbl1 a=-2147483648i,b="d"\n' +
+            'tbl1 a=0i,b="e"\n' +
+            'tbl1 b="f"\n' +
+            'tbl1 a=2147483647i,b="g"\n')
+
+    def test_u64_arrow_col(self):
+        df = pd.DataFrame({
+            'a': pd.Series([
+                    1, 2, 3,
+                    0,
+                    None,
+                    9223372036854775807],  # i64 max
+                dtype=pd.UInt64Dtype()),
+            'b': ['a', 'b', 'c', 'd', 'e', 'f']})
+        buf = _dataframe(df, table_name='tbl1')
+        self.assertEqual(
+            buf,
+            'tbl1 a=1i,b="a"\n' +
+            'tbl1 a=2i,b="b"\n' +
+            'tbl1 a=3i,b="c"\n' +
+            'tbl1 a=0i,b="d"\n' +
+            'tbl1 b="e"\n' +
+            'tbl1 a=9223372036854775807i,b="f"\n')
+
+        df2 = pd.DataFrame({'a': pd.Series([
+                1, 2, 3,
+                0,
+                9223372036854775808],  # i64 max + 1
+            dtype=pd.UInt64Dtype())})
+        with self.assertRaisesRegex(
+                qi.IngressError,
+                'serialize .* column .a. .* 4 .9223372036854775808.*int64'):
+            _dataframe(df2, table_name='tbl1')
+
+    def test_i64_arrow_col(self):
+        df = pd.DataFrame({
+            'a': pd.Series([
+                    1, 2, 3,
+                    -9223372036854775808,  # i64 min
+                    0,
+                    None,
+                    9223372036854775807],  # i64 max
+                dtype=pd.Int64Dtype()),
+            'b': ['a', 'b', 'c', 'd', 'e', 'f', 'g']})
+        buf = _dataframe(df, table_name='tbl1')
+        self.assertEqual(
+            buf,
+            'tbl1 a=1i,b="a"\n' +
+            'tbl1 a=2i,b="b"\n' +
+            'tbl1 a=3i,b="c"\n' +
+            'tbl1 a=-9223372036854775808i,b="d"\n' +
+            'tbl1 a=0i,b="e"\n' +
+            'tbl1 b="f"\n' +
+            'tbl1 a=9223372036854775807i,b="g"\n')
+
+    def test_f32_arrow_col(self):
+        df = pd.DataFrame({
+            'a': pd.Series([
+                    1.0, 2.0, 3.0,
+                    0.0,
+                    float('inf'),
+                    float('-inf'),
+                    float('nan'),
+                    3.4028234663852886e38,  # f32 max
+                    None],
+                dtype=pd.Float32Dtype()),
+            'b': ['a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i']})
+        buf = _dataframe(df, table_name='tbl1')
+        self.assertEqual(
+            buf,
+            'tbl1 a=1.0,b="a"\n' +
+            'tbl1 a=2.0,b="b"\n' +
+            'tbl1 a=3.0,b="c"\n' +
+            'tbl1 a=0.0,b="d"\n' +
+            'tbl1 a=Infinity,b="e"\n' +
+            'tbl1 a=-Infinity,b="f"\n' +
+            'tbl1 b="g"\n' +  # This one is wierd: `nan` gets 0 in the bitmask.
+            'tbl1 a=3.4028234663852886e38,b="h"\n' +
+            'tbl1 b="i"\n')
+
+    def test_f64_arrow_col(self):
+        df = pd.DataFrame({
+            'a': pd.Series([
+                    1.0, 2.0, 3.0,
+                    0.0,
+                    float('inf'),
+                    float('-inf'),
+                    float('nan'),
+                    1.7976931348623157e308,  # f64 max
+                    None],
+                dtype=pd.Float64Dtype()),
+            'b': ['a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i']})
+        buf = _dataframe(df, table_name='tbl1')
+        self.assertEqual(
+            buf,
+            'tbl1 a=1.0,b="a"\n' +
+            'tbl1 a=2.0,b="b"\n' +
+            'tbl1 a=3.0,b="c"\n' +
+            'tbl1 a=0.0,b="d"\n' +
+            'tbl1 a=Infinity,b="e"\n' +
+            'tbl1 a=-Infinity,b="f"\n' +
+            'tbl1 b="g"\n' +  # This one is wierd: `nan` gets 0 in the bitmask.
+            'tbl1 a=1.7976931348623157e308,b="h"\n' +
+            'tbl1 b="i"\n')
+
+    def test_bool_numpy_col(self):
+        df = pd.DataFrame({'a': pd.Series([
+                True, False, False,
+                False, True, False],
+            dtype='bool')})
+        buf = _dataframe(df, table_name='tbl1')
+        self.assertEqual(
+            buf,
+            'tbl1 a=t\n' +
+            'tbl1 a=f\n' +
+            'tbl1 a=f\n' +
+            'tbl1 a=f\n' +
+            'tbl1 a=t\n' +
+            'tbl1 a=f\n')
+
+    def test_bool_arrow_col(self):
+        df = pd.DataFrame({'a': pd.Series([
+                True, False, False,
+                False, True, False,
+                True, True, True,
+                False, False, False],
+            dtype='boolean')})  # Note `boolean` != `bool`.
+        buf = _dataframe(df, table_name='tbl1')
+        self.assertEqual(
+            buf,
+            'tbl1 a=t\n' +
+            'tbl1 a=f\n' +
+            'tbl1 a=f\n' +
+            'tbl1 a=f\n' +
+            'tbl1 a=t\n' +
+            'tbl1 a=f\n' +
+            'tbl1 a=t\n' +
+            'tbl1 a=t\n' +
+            'tbl1 a=t\n' +
+            'tbl1 a=f\n' +
+            'tbl1 a=f\n' +
+            'tbl1 a=f\n')
+
+        df2 = pd.DataFrame({'a': pd.Series([
+                True, False, False,
+                None, True, False],
+            dtype='boolean')})
+        with self.assertRaisesRegex(
+                qi.IngressError,
+                'Failed.*at row index 3 .*<NA>.: .*insert null .*boolean col'):
+            _dataframe(df2, table_name='tbl1')
+
+    def test_bool_obj_col(self):
+        df = pd.DataFrame({'a': pd.Series([
+                True, False, False,
+                False, True, False],
+            dtype='object')})
+        buf = _dataframe(df, table_name='tbl1')
+        self.assertEqual(
+            buf,
+            'tbl1 a=t\n' +
+            'tbl1 a=f\n' +
+            'tbl1 a=f\n' +
+            'tbl1 a=f\n' +
+            'tbl1 a=t\n' +
+            'tbl1 a=f\n')
+
+        df2 = pd.DataFrame({'a': pd.Series([
+                True, False, 'false'],
+            dtype='object')})
+        with self.assertRaisesRegex(
+                qi.IngressError,
+                'serialize .* column .a. .* 2 .*false.*bool'):
+            _dataframe(df2, table_name='tbl1')
+
+        df3 = pd.DataFrame({'a': pd.Series([
+                None, True, False],
+            dtype='object')})
+        with self.assertRaisesRegex(
+                qi.IngressError,
+                'serialize.*\\(None\\): Cannot insert null.*boolean column'):
+            _dataframe(df3, table_name='tbl1')
+
+    def test_datetime64_numpy_col(self):
+        df = pd.DataFrame({
+            'a': pd.Series([
+                    pd.Timestamp('2019-01-01 00:00:00'),
+                    pd.Timestamp('2019-01-01 00:00:01'),
+                    pd.Timestamp('2019-01-01 00:00:02'),
+                    pd.Timestamp('2019-01-01 00:00:03'),
+                    pd.Timestamp('2019-01-01 00:00:04'),
+                    pd.Timestamp('2019-01-01 00:00:05'),
+                    None,
+                    float('nan'),
+                    pd.NA],
+                dtype='datetime64[ns]'),
+            'b': ['a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i']})
+        buf = _dataframe(df, table_name='tbl1')
+        self.assertEqual(
+            buf,
+            'tbl1 a=1546300800000000t,b="a"\n' +
+            'tbl1 a=1546300801000000t,b="b"\n' +
+            'tbl1 a=1546300802000000t,b="c"\n' +
+            'tbl1 a=1546300803000000t,b="d"\n' +
+            'tbl1 a=1546300804000000t,b="e"\n' +
+            'tbl1 a=1546300805000000t,b="f"\n' +
+            'tbl1 b="g"\n' +
+            'tbl1 b="h"\n' +
+            'tbl1 b="i"\n')
+
+        df = pd.DataFrame({'a': pd.Series([
+                pd.Timestamp('1970-01-01 00:00:00'),
+                pd.Timestamp('1970-01-01 00:00:01'),
+                pd.Timestamp('1970-01-01 00:00:02')])})
+        buf = _dataframe(df, table_name='tbl1')
+        self.assertEqual(
+            buf,
+            'tbl1 a=0t\n' +
+            'tbl1 a=1000000t\n' +
+            'tbl1 a=2000000t\n')
+
+    def test_datetime64_tz_arrow_col(self):
+        df = pd.DataFrame({
+            'a': [
+                pd.Timestamp(
+                    year=2019, month=1, day=1,
+                    hour=0, minute=0, second=0, tz=_TZ),
+                pd.Timestamp(
+                    year=2019, month=1, day=1,
+                    hour=0, minute=0, second=1, tz=_TZ),
+                None,
+                pd.Timestamp(
+                    year=2019, month=1, day=1,
+                    hour=0, minute=0, second=3, tz=_TZ)],
+            'b': ['sym1', 'sym2', 'sym3', 'sym4']})
+        buf = _dataframe(df, table_name='tbl1', symbols=['b'])
+        self.assertEqual(
+            buf,
+            # Note how these are 5hr offset from `test_datetime64_numpy_col`.
+            'tbl1,b=sym1 a=1546318800000000t\n' +
+            'tbl1,b=sym2 a=1546318801000000t\n' +
+            'tbl1,b=sym3\n' +
+            'tbl1,b=sym4 a=1546318803000000t\n')
+
+        # Not epoch 0.
+        df = pd.DataFrame({
+            'a': [
+                pd.Timestamp(
+                    year=1970, month=1, day=1,
+                    hour=0, minute=0, second=0, tz=_TZ),
+                pd.Timestamp(
+                    year=1970, month=1, day=1,
+                    hour=0, minute=0, second=1, tz=_TZ),
+                pd.Timestamp(
+                    year=1970, month=1, day=1,
+                    hour=0, minute=0, second=2, tz=_TZ)],
+            'b': ['sym1', 'sym2', 'sym3']})
+        buf = _dataframe(df, table_name='tbl1', symbols=['b'])
+        self.assertEqual(
+            buf,
+            # Note how these are 5hr offset from `test_datetime64_numpy_col`.
+            'tbl1,b=sym1 a=18000000000t\n' +
+            'tbl1,b=sym2 a=18001000000t\n' +
+            'tbl1,b=sym3 a=18002000000t\n')
+
+        # Actual epoch 0.
+        df = pd.DataFrame({
+            'a': [
+                pd.Timestamp(
+                    year=1969, month=12, day=31,
+                    hour=19, minute=0, second=0, tz=_TZ),
+                pd.Timestamp(
+                    year=1969, month=12, day=31,
+                    hour=19, minute=0, second=1, tz=_TZ),
+                pd.Timestamp(
+                    year=1969, month=12, day=31,
+                    hour=19, minute=0, second=2, tz=_TZ)],
+            'b': ['sym1', 'sym2', 'sym3']})
+        buf = _dataframe(df, table_name='tbl1', symbols=['b'])
+        self.assertEqual(
+            buf,
+            'tbl1,b=sym1 a=0t\n' +
+            'tbl1,b=sym2 a=1000000t\n' +
+            'tbl1,b=sym3 a=2000000t\n')
+
+        df2 = pd.DataFrame({
+            'a': [
+                pd.Timestamp(
+                    year=1900, month=1, day=1,
+                    hour=0, minute=0, second=0, tz=_TZ)],
+            'b': ['sym1']})
+        with self.assertRaisesRegex(
+                qi.IngressError, "Failed.*'a'.*-220897.* is negative."):
+            _dataframe(df2, table_name='tbl1', symbols=['b'])
+        return   ###############################################################
+
+    def test_datetime64_numpy_at(self):
+        df = pd.DataFrame({
+            'a': pd.Series([
+                    pd.Timestamp('2019-01-01 00:00:00'),
+                    pd.Timestamp('2019-01-01 00:00:01'),
+                    pd.Timestamp('2019-01-01 00:00:02'),
+                    pd.Timestamp('2019-01-01 00:00:03'),
+                    pd.Timestamp('2019-01-01 00:00:04'),
+                    pd.Timestamp('2019-01-01 00:00:05'),
+                    float('nan'),
+                    None,
+                    pd.NaT],
+                dtype='datetime64[ns]'),
+            'b': [1, 2, 3, 4, 5, 6, 7, 8, 9]})
+        buf = _dataframe(df, table_name='tbl1', at='a')
+        self.assertEqual(
+            buf,
+            'tbl1 b=1i 1546300800000000000\n' +
+            'tbl1 b=2i 1546300801000000000\n' +
+            'tbl1 b=3i 1546300802000000000\n' +
+            'tbl1 b=4i 1546300803000000000\n' +
+            'tbl1 b=5i 1546300804000000000\n' +
+            'tbl1 b=6i 1546300805000000000\n' +
+            'tbl1 b=7i\n' +
+            'tbl1 b=8i\n' +
+            'tbl1 b=9i\n')
+
+        df = pd.DataFrame({
+            'a': pd.Series([
+                    pd.Timestamp('1970-01-01 00:00:00'),
+                    pd.Timestamp('1970-01-01 00:00:01'),
+                    pd.Timestamp('1970-01-01 00:00:02')],
+                dtype='datetime64[ns]'),
+            'b': [1, 2, 3]})
+        buf = _dataframe(df, table_name='tbl1', at='a')
+        self.assertEqual(
+            buf,
+            'tbl1 b=1i 0\n' +
+            'tbl1 b=2i 1000000000\n' +
+            'tbl1 b=3i 2000000000\n')
+
+    def test_datetime64_tz_arrow_at(self):
+        df = pd.DataFrame({
+            'a': [
+                pd.Timestamp(
+                    year=2019, month=1, day=1,
+                    hour=0, minute=0, second=0, tz=_TZ),
+                pd.Timestamp(
+                    year=2019, month=1, day=1,
+                    hour=0, minute=0, second=1, tz=_TZ),
+                None,
+                pd.Timestamp(
+                    year=2019, month=1, day=1,
+                    hour=0, minute=0, second=3, tz=_TZ)],
+            'b': ['sym1', 'sym2', 'sym3', 'sym4']})
+        buf = _dataframe(df, table_name='tbl1', symbols=['b'], at='a')
+        self.assertEqual(
+            buf,
+            # Note how these are 5hr offset from `test_datetime64_numpy_col`.
+            'tbl1,b=sym1 1546318800000000000\n' +
+            'tbl1,b=sym2 1546318801000000000\n' +
+            'tbl1,b=sym3\n' +
+            'tbl1,b=sym4 1546318803000000000\n')
+
+        df2 = pd.DataFrame({
+            'a': [
+                pd.Timestamp(
+                    year=1900, month=1, day=1,
+                    hour=0, minute=0, second=0, tz=_TZ)],
+            'b': ['sym1']})
+        with self.assertRaisesRegex(
+                qi.IngressError, "Failed.*'a'.*-220897.* is neg"):
+            _dataframe(df2, table_name='tbl1', symbols=['b'], at='a')
+
+    def _test_pyobjstr_table(self, dtype):
+        df = pd.DataFrame({
+            '../bad col name/../it does not matter...':
+                pd.Series([
+                    'a',                     # ASCII
+                    'b' * 127,               # Max table name length.
+                    'q❤️p',                   # Mixed ASCII and UCS-2
+                    '嚜꓂',                   # UCS-2, 3 bytes for UTF-8.
+                    '💩🦞'],                 # UCS-4, 4 bytes for UTF-8.
+                dtype=dtype),
+            'b': [1, 2, 3, 4, 5]})
+        buf = _dataframe(df, table_name_col=0)
+        self.assertEqual(
+            buf,
+            'a b=1i\n' +
+            ('b' * 127) + ' b=2i\n' +
+            'q❤️p b=3i\n' +
+            '嚜꓂ b=4i\n' +
+            '💩🦞 b=5i\n')
+
+        with self.assertRaisesRegex(
+                qi.IngressError, "Too long"):
+            _dataframe(
+                pd.DataFrame({'a': pd.Series(['b' * 128], dtype=dtype)}),
+                table_name_col='a')
+
+        with self.assertRaisesRegex(
+                qi.IngressError, 'Failed.*Expected a table name, got a null.*'):
+            _dataframe(
+                pd.DataFrame({
+                    '.': pd.Series(['x', None], dtype=dtype),
+                    'b': [1, 2]}),
+                table_name_col='.')
+
+        with self.assertRaisesRegex(
+                qi.IngressError, 'Failed.*Expected a table name, got a null.*'):
+            _dataframe(
+                pd.DataFrame({
+                    '.': pd.Series(['x', float('nan')], dtype=dtype),
+                    'b': [1, 2]}),
+                table_name_col='.')
+
+        with self.assertRaisesRegex(
+                qi.IngressError, 'Failed.*Expected a table name, got a null.*'):
+            _dataframe(
+                pd.DataFrame({
+                    '.': pd.Series(['x', pd.NA], dtype=dtype),
+                    'b': [1, 2]}),
+                table_name_col='.')
+
+        with self.assertRaisesRegex(
+                qi.IngressError, "''.*must have a non-zero length"):
+            _dataframe(
+                pd.DataFrame({
+                    '/': pd.Series([''], dtype=dtype),
+                    'b': [1]}),
+                table_name_col='/')
+
+        with self.assertRaisesRegex(
+                qi.IngressError, "'tab..1'.*invalid dot `\\.` at position 4"):
+            _dataframe(
+                pd.DataFrame({
+                    '/': pd.Series(['tab..1'], dtype=dtype),
+                    'b': [1]}),
+                table_name_col='/')
+
+    def test_obj_str_table(self):
+        self._test_pyobjstr_table('object')
+
+        with self.assertRaisesRegex(
+                qi.IngressError, 'table name .*got an object of type int'):
+            _dataframe(
+                pd.DataFrame({
+                    '.': pd.Series(['x', 42], dtype='object'),
+                    'z': [1, 2]}),
+                table_name_col='.')
+
+    def test_obj_string_table(self):
+        self._test_pyobjstr_table('string')
+
+        self.assertEqual(
+            _dataframe(
+                pd.DataFrame({
+                    '.': pd.Series(['x', 42], dtype='string'),
+                    'z': [1, 2]}),
+                table_name_col='.'),
+            'x z=1i\n' +
+            '42 z=2i\n')
+
+    def _test_pyobjstr_numpy_symbol(self, dtype):
+        df = pd.DataFrame({'a': pd.Series([
+                'a',                     # ASCII
+                'q❤️p',                   # Mixed ASCII and UCS-2
+                '❤️' * 1200,              # Over the 1024 buffer prealloc.
+                'Questo è un qualcosa',  # Non-ASCII UCS-1
+                'щось',                  # UCS-2, 2 bytes for UTF-8.
+                '',                      # Empty string
+                '嚜꓂',                   # UCS-2, 3 bytes for UTF-8.
+                '💩🦞'],                 # UCS-4, 4 bytes for UTF-8.
+            dtype=dtype)})
+        buf = _dataframe(df, table_name='tbl1', symbols=True)
+        self.assertEqual(
+            buf,
+            'tbl1,a=a\n' +
+            'tbl1,a=q❤️p\n' +
+            'tbl1,a=' + ('❤️' * 1200) + '\n' +
+            'tbl1,a=Questo\\ è\\ un\\ qualcosa\n' +
+            'tbl1,a=щось\n' +
+            'tbl1,a=\n' +
+            'tbl1,a=嚜꓂\n' +
+            'tbl1,a=💩🦞\n')
+
+        for null_obj in (None, float('nan'), pd.NA):
+            self.assertEqual(
+                _dataframe(
+                    pd.DataFrame({
+                        'x': pd.Series(['a', null_obj], dtype=dtype),
+                        'y': [1, 2]}),
+                    table_name='tbl1', symbols=[0]),
+                'tbl1,x=a y=1i\n' +
+                'tbl1 y=2i\n')
+
+    def test_obj_str_numpy_symbol(self):
+        self._test_pyobjstr_numpy_symbol('object')
+
+        with self.assertRaisesRegex(
+                qi.IngressError, 'Expected a string, got an .* type int'):
+            _dataframe(
+                pd.DataFrame({
+                    'x': pd.Series(['x', 42], dtype='object'),
+                    'y': [1, 2]}),
+                table_name='tbl1', symbols=[0])
+
+    def test_obj_string_numpy_symbol(self):
+        self._test_pyobjstr_numpy_symbol('string')
+
+        self.assertEqual(
+            _dataframe(
+                pd.DataFrame({
+                    'x': pd.Series(['x', 42], dtype='string'),
+                    'y': [1, 2]}),
+                table_name='tbl1', symbols=[0]),
+            'tbl1,x=x y=1i\n' +
+            'tbl1,x=42 y=2i\n')
+
+    def test_str_numpy_col(self):
+        df = pd.DataFrame({'a': pd.Series([
+                'a',                     # ASCII
+                'q❤️p',                   # Mixed ASCII and UCS-2
+                '❤️' * 1200,              # Over the 1024 buffer prealloc.
+                'Questo è un qualcosa',  # Non-ASCII UCS-1
+                'щось',                  # UCS-2, 2 bytes for UTF-8.
+                '',                      # Empty string
+                '嚜꓂',                   # UCS-2, 3 bytes for UTF-8.
+                '💩🦞'],                 # UCS-4, 4 bytes for UTF-8.
+            dtype='str')})
+        buf = _dataframe(df, table_name='tbl1')
+        self.assertEqual(
+            buf,
+            'tbl1 a="a"\n' +
+            'tbl1 a="q❤️p"\n' +
+            'tbl1 a="' + ('❤️' * 1200) + '"\n' +
+            'tbl1 a="Questo è un qualcosa"\n' +
+            'tbl1 a="щось"\n' +
+            'tbl1 a=""\n' +
+            'tbl1 a="嚜꓂"\n' +
+            'tbl1 a="💩🦞"\n')
+
+    def test_str_arrow_table(self):
+        df = pd.DataFrame({
+            '../bad col name/../it does not matter...': pd.Series([
+                'a',                     # ASCII
+                'b' * 127,               # Max table name length.
+                'q❤️p',                   # Mixed ASCII and UCS-2
+                '嚜꓂',                   # UCS-2, 3 bytes for UTF-8.
+                '💩🦞'],                 # UCS-4, 4 bytes for UTF-8.
+                dtype='string[pyarrow]'),
+            'b': [1, 2, 3, 4, 5]})
+        buf = _dataframe(df, table_name_col=0)
+        self.assertEqual(
+            buf,
+            'a b=1i\n' +
+            ('b' * 127) + ' b=2i\n' +
+            'q❤️p b=3i\n' +
+            '嚜꓂ b=4i\n' +
+            '💩🦞 b=5i\n')
+
+        with self.assertRaisesRegex(
+                qi.IngressError, "Too long"):
+            _dataframe(
+                pd.DataFrame({
+                    'a': pd.Series(['b' * 128], dtype='string[pyarrow]')}),
+                table_name_col='a')
+
+        with self.assertRaisesRegex(
+                qi.IngressError, "Failed .*<NA>.*Table name cannot be null"):
+            _dataframe(
+                pd.DataFrame({
+                    '.': pd.Series(['x', None], dtype='string[pyarrow]'),
+                    'b': [1, 2]}),
+                table_name_col='.')
+
+        with self.assertRaisesRegex(
+                qi.IngressError, "''.*must have a non-zero length"):
+            _dataframe(
+                pd.DataFrame({
+                    '/': pd.Series([''], dtype='string[pyarrow]')}),
+                table_name_col='/')
+
+        with self.assertRaisesRegex(
+                qi.IngressError, "'tab..1'.*invalid dot `\\.` at position 4"):
+            _dataframe(
+                pd.DataFrame({
+                    '/': pd.Series(['tab..1'], dtype='string[pyarrow]')}),
+                table_name_col='/')
+
+    def test_str_arrow_symbol(self):
+        df = pd.DataFrame({
+            'a': pd.Series([
+                'a',                     # ASCII
+                'q❤️p',                   # Mixed ASCII and UCS-2
+                '❤️' * 1200,              # Over the 1024 buffer prealloc.
+                'Questo è un qualcosa',  # Non-ASCII UCS-1
+                'щось',                  # UCS-2, 2 bytes for UTF-8.
+                '',                      # Empty string
+                None,
+                '嚜꓂',                   # UCS-2, 3 bytes for UTF-8.
+                '💩🦞'],                 # UCS-4, 4 bytes for UTF-8.
+                dtype='string[pyarrow]'),
+            'b': [1, 2, 3, 4, 5, 6, 7, 8, 9]})
+        buf = _dataframe(df, table_name='tbl1', symbols=True)
+        self.assertEqual(
+            buf,
+            'tbl1,a=a b=1i\n' +
+            'tbl1,a=q❤️p b=2i\n' +
+            'tbl1,a=' + ('❤️' * 1200) + ' b=3i\n' +
+            'tbl1,a=Questo\\ è\\ un\\ qualcosa b=4i\n' +
+            'tbl1,a=щось b=5i\n' +
+            'tbl1,a= b=6i\n' +
+            'tbl1 b=7i\n' +
+            'tbl1,a=嚜꓂ b=8i\n' +
+            'tbl1,a=💩🦞 b=9i\n')
+
+    def test_str_arrow_col(self):
+        df = pd.DataFrame({
+            'a': pd.Series([
+                'a',                     # ASCII
+                'q❤️p',                   # Mixed ASCII and UCS-2
+                '❤️' * 1200,              # Over the 1024 buffer prealloc.
+                'Questo è un qualcosa',  # Non-ASCII UCS-1
+                'щось',                  # UCS-2, 2 bytes for UTF-8.
+                '',                      # Empty string
+                None,
+                '嚜꓂',                   # UCS-2, 3 bytes for UTF-8.
+                '💩🦞'],                 # UCS-4, 4 bytes for UTF-8.
+                dtype='string[pyarrow]'),
+            'b': [1, 2, 3, 4, 5, 6, 7, 8, 9]})
+        buf = _dataframe(df, table_name='tbl1', symbols=False)
+        self.assertEqual(
+            buf,
+            'tbl1 a="a",b=1i\n' +
+            'tbl1 a="q❤️p",b=2i\n' +
+            'tbl1 a="' + ('❤️' * 1200) + '",b=3i\n' +
+            'tbl1 a="Questo è un qualcosa",b=4i\n' +
+            'tbl1 a="щось",b=5i\n' +
+            'tbl1 a="",b=6i\n' +
+            'tbl1 b=7i\n' +
+            'tbl1 a="嚜꓂",b=8i\n' +
+            'tbl1 a="💩🦞",b=9i\n')
+
+    def test_pyobj_int_col(self):
+        int64_min = -2**63
+        int64_max = 2**63 - 1
+        self.assertEqual(
+            _dataframe(
+                pd.DataFrame({
+                    'a': pd.Series([
+                        1, 2, 3, None, float('nan'), pd.NA, 7,
+                        0,
+                        int64_min,
+                        int64_max], dtype='object'),
+                    'b': [1, 2, 3, 4, 5, 6, 7, 8, 9, 10]}),
+                table_name='tbl1'),
+            'tbl1 a=1i,b=1i\n' +
+            'tbl1 a=2i,b=2i\n' +
+            'tbl1 a=3i,b=3i\n' +
+            'tbl1 b=4i\n' +
+            'tbl1 b=5i\n' +
+            'tbl1 b=6i\n' +
+            'tbl1 a=7i,b=7i\n' +
+            'tbl1 a=0i,b=8i\n' +
+            'tbl1 a=' + str(int64_min) + 'i,b=9i\n' +
+            'tbl1 a=' + str(int64_max) + 'i,b=10i\n')
+
+        with self.assertRaisesRegex(
+                qi.IngressError, "1 \\('STRING'\\): .*type int, got.*str\\."):
+            _dataframe(
+                pd.DataFrame({
+                    'a': pd.Series([1, 'STRING'], dtype='object'),
+                    'b': [1, 2]}),
+                table_name='tbl1')
+
+        out_of_range = [int64_min - 1, int64_max + 1]
+        for num in out_of_range:
+            with self.assertRaisesRegex(
+                    qi.IngressError, "index 1 .*922337203685477.*int too big"):
+                _dataframe(
+                    pd.DataFrame({
+                        'a': pd.Series([1, num], dtype='object'),
+                        'b': [1, 2]}),
+                    table_name='tbl1')
+
+    def test_pyobj_float_col(self):
+        self.assertEqual(
+            _dataframe(
+                pd.DataFrame({
+                    'a': pd.Series(
+                        [1.0, 2.0, 3.0, None, float('nan'), pd.NA, 7.0],
+                        dtype='object'),
+                    'b': [1, 2, 3, 4, 5, 6, 7]}),
+                table_name='tbl1'),
+            'tbl1 a=1.0,b=1i\n' +
+            'tbl1 a=2.0,b=2i\n' +
+            'tbl1 a=3.0,b=3i\n' +
+            'tbl1 b=4i\n' +
+            'tbl1 a=NaN,b=5i\n' +
+            'tbl1 b=6i\n' +
+            'tbl1 a=7.0,b=7i\n')
+
+        with self.assertRaisesRegex(
+                qi.IngressError, "1 \\('STRING'\\): .*type float, got.*str\\."):
+            _dataframe(
+                pd.DataFrame({
+                    'a': pd.Series([1.0, 'STRING'], dtype='object'),
+                    'b': [1, 2]}),
+                table_name='tbl1')
+
+    def test_bad_category(self):
+        # We only support string categories
+        # (unless anyone asks for additional ones).
+        # We want to test others are rejected.
+        with self.assertRaisesRegex(
+                qi.IngressError, "Bad column 'a'.*got a category of .*int64"):
+            _dataframe(
+                pd.DataFrame({'a': pd.Series([1, 2, 3, 2], dtype='category')}),
+                table_name='tbl1')
+
+    def _test_cat_table(self, count):
+        slist = [f's{i}' for i in range(count)]
+
+        df = pd.DataFrame({
+            'a': pd.Series(slist, dtype='category'),
+            'b': list(range(len(slist)))})
+
+        buf = _dataframe(df, table_name_col=0)
+        exp = ''.join(
+            f'{s} b={i}i\n'
+            for i, s in enumerate(slist))
+        self.assertEqual(buf, exp)
+
+        slist[2] = None
+        df2 = pd.DataFrame({
+            'a': pd.Series(slist, dtype='category'),
+            'b': list(range(len(slist)))})
+        with self.assertRaisesRegex(
+                qi.IngressError, 'Table name cannot be null'):
+            _dataframe(df2, table_name_col=0)
+
+    def test_cat_i8_table(self):
+        self._test_cat_table(30)
+        self._test_cat_table(127)
+
+    def test_cat_i16_table(self):
+        self._test_cat_table(128)
+        self._test_cat_table(4000)
+        self._test_cat_table(32767)
+
+    def test_cat_i32_table(self):
+        self._test_cat_table(32768)
+        self._test_cat_table(40000)
+
+    def _test_cat_symbol(self, count):
+        slist = [f's{i}' for i in range(count)]
+
+        df = pd.DataFrame({
+            'a': pd.Series(slist, dtype='category'),
+            'b': list(range(len(slist)))})
+
+        buf = _dataframe(df, table_name='tbl1', symbols=True)
+        exp = ''.join(
+            f'tbl1,a={s} b={i}i\n'
+            for i, s in enumerate(slist))
+        self.assertEqual(buf, exp)
+
+        slist[2] = None
+        df2 = pd.DataFrame({
+            'a': pd.Series(slist, dtype='category'),
+            'b': list(range(len(slist)))})
+
+        exp2 = exp.replace('tbl1,a=s2 b=2i\n', 'tbl1 b=2i\n')
+        buf2 = _dataframe(df2, table_name='tbl1', symbols=True)
+        self.assertEqual(buf2, exp2)
+
+    def test_cat_i8_symbol(self):
+        self._test_cat_symbol(30)
+        self._test_cat_symbol(127)
+
+    def test_cat_i16_symbol(self):
+        self._test_cat_symbol(128)
+        self._test_cat_symbol(4000)
+        self._test_cat_symbol(32767)
+
+    def test_cat_i32_symbol(self):
+        self._test_cat_symbol(32768)
+        self._test_cat_symbol(40000)
+
+    def _test_cat_str(self, count):
+        slist = [f's{i}' for i in range(count)]
+
+        df = pd.DataFrame({
+            'a': pd.Series(slist, dtype='category'),
+            'b': list(range(len(slist)))})
+
+        buf = _dataframe(df, table_name='tbl1', symbols=False)
+        exp = ''.join(
+            f'tbl1 a="{s}",b={i}i\n'
+            for i, s in enumerate(slist))
+        self.assertEqual(buf, exp)
+
+        slist[2] = None
+        df2 = pd.DataFrame({
+            'a': pd.Series(slist, dtype='category'),
+            'b': list(range(len(slist)))})
+
+        exp2 = exp.replace('tbl1 a="s2",b=2i\n', 'tbl1 b=2i\n')
+        buf2 = _dataframe(df2, table_name='tbl1', symbols=False)
+        self.assertEqual(buf2, exp2)
+
+    def test_cat_i8_str(self):
+        self._test_cat_str(30)
+        self._test_cat_str(127)
+
+    def test_cat_i16_str(self):
+        self._test_cat_str(128)
+        self._test_cat_str(4000)
+        self._test_cat_str(32767)
+
+    def test_cat_i32_str(self):
+        self._test_cat_str(32768)
+        self._test_cat_str(40000)
+
+    def test_all_nulls_pyobj_col(self):
+        df = pd.DataFrame({
+            'a': [None, pd.NA, float('nan')],
+            'b': [1, 2, 3]})
+        buf = _dataframe(df, table_name='tbl1')
+        self.assertEqual(
+            buf,
+            'tbl1 b=1i\n' +
+            'tbl1 b=2i\n' +
+            'tbl1 b=3i\n')
+
+    def test_strided_numpy_column(self):
+        two_d = np.array([
+            [1, 10],
+            [2, 20],
+            [3, 30]], dtype='int64')
+        col2 = two_d[:, 1]
+        col2.flags['WRITEABLE'] = False
+
+        # Checking our test case setup.
+        mv = memoryview(col2)
+        self.assertEqual(mv.contiguous, False)
+        self.assertEqual(mv.strides, (16,))
+
+        df = pd.DataFrame(col2, copy=False)
+        df.columns = ['a']
+
+        with self.assertRaisesRegex(
+                qi.IngressError, "Bad column 'a': .*not.*contiguous"):
+            _dataframe(df, table_name='tbl1')
+
+    def test_serializing_in_chunks(self):
+        df = pd.DataFrame({
+            'a': pd.Series(np.arange(30), dtype='int64'),
+            'b': pd.Series(np.arange(30), dtype='Int64')})
+        parts = [
+            df.iloc[:10],
+            df.iloc[10:20],
+            df.iloc[20:]]
+        for index, part in enumerate(parts):
+            buf = _dataframe(part, table_name='tbl1')
+            exp = ''.join(
+                f'tbl1 a={i}i,b={i}i\n'
+                for i in range(index * 10, (index + 1) * 10))
+            self.assertEqual(buf, exp)
+
+    def test_arrow_chunked_array(self):
+        # We build a table with chunked arrow arrays as columns.
+        chunks_a = [
+            pa.array([1, 2, 3], type=pa.int16()),
+            pa.array([4, 5, 6], type=pa.int16()),
+            pa.array([], type=pa.int16()),
+            pa.array([7, 8, 9], type=pa.int16())]
+        chunked_a = pa.chunked_array(chunks_a)
+        chunks_b = [
+            pa.array([10, 20], type=pa.int32()),
+            pa.array([], type=pa.int32()),
+            pa.array([30, 40, 50, 60], type=pa.int32()),
+            pa.array([70, 80, 90], type=pa.int32())]
+        chunked_b = pa.chunked_array(chunks_b)
+        arr_tab = pa.Table.from_arrays([chunked_a, chunked_b], names=['a', 'b'])
+
+        # NOTE!
+        # This does *not* preserve the chunking of the arrow arrays.
+        df = arr_tab.to_pandas()
+        buf = _dataframe(df, table_name='tbl1')
+        exp = (
+            'tbl1 a=1i,b=10i\n' +
+            'tbl1 a=2i,b=20i\n' +
+            'tbl1 a=3i,b=30i\n' +
+            'tbl1 a=4i,b=40i\n' +
+            'tbl1 a=5i,b=50i\n' +
+            'tbl1 a=6i,b=60i\n' +
+            'tbl1 a=7i,b=70i\n' +
+            'tbl1 a=8i,b=80i\n' +
+            'tbl1 a=9i,b=90i\n')
+        self.assertEqual(buf, exp)
+
+        if not hasattr(pd, 'ArrowDtype'):
+            # We don't have pandas ArrowDtype, so we can't test the rest.
+            return
+
+        # To preserve the chunking we need to use a special pandas type:
+        pandarrow_a = pd.array(chunked_a, dtype='int16[pyarrow]')
+        pandarrow_b = pd.array(chunked_b, dtype='int32[pyarrow]')
+        df = pd.DataFrame({'a': pandarrow_a, 'b': pandarrow_b})
+
+        # Note that this dtype is experimental (currently),
+        # so we don't support it yet.. but we have everything in place should we
+        # need to, so - as for now - we just test that we raise a nice error.
+        with self.assertRaisesRegex(
+                qi.IngressError,
+                "Unsupported dtype int16\[pyarrow\] for column 'a'.*github"):
+            _dataframe(df, table_name='tbl1')
+
+    @unittest.skipIf(not fastparquet, 'fastparquet not installed')
+    @with_tmp_dir
+    def test_parquet_roundtrip(self, tmpdir):
+        pa_parquet_path = tmpdir / 'test_pa.parquet'
+        fp_parquet_path = tmpdir / 'test_fp.parquet'
+        df = pd.DataFrame({
+            's': pd.Categorical(['a', 'b', 'a', 'c', 'a']),
+            'a': pd.Series([1, 2, 3, 4, 5], dtype='int16'),
+            'b': pd.Series([10, 20, 30, None, 50], dtype='UInt8'),
+            'c': [0.5, float('nan'), 2.5, 3.5, None]})
+        df.to_parquet(pa_parquet_path, engine='pyarrow')
+        df.to_parquet(fp_parquet_path, engine='fastparquet')
+        pa2pa_df = pd.read_parquet(pa_parquet_path, engine='pyarrow')
+        pa2fp_df = pd.read_parquet(pa_parquet_path, engine='fastparquet')
+        fp2pa_df = pd.read_parquet(fp_parquet_path, engine='pyarrow')
+        fp2fp_df = pd.read_parquet(fp_parquet_path, engine='fastparquet')
+
+        exp_dtypes = ['category', 'int16', 'UInt8', 'float64']
+        self.assertEqual(list(df.dtypes), exp_dtypes)
+
+        def df_eq(exp_df, deser_df, exp_dtypes):
+            self.assertEqual(list(deser_df.dtypes), exp_dtypes)
+            if not exp_df.equals(deser_df):
+                print('\nexp_df:')
+                print(exp_df)
+                print('\ndeser_df:')
+                print(deser_df)
+            self.assertTrue(exp_df.equals(deser_df))
+
+        # fastparquet doesn't roundtrip with pyarrow parquet properly.
+        # It decays categories to object and UInt8 to float64.
+        # We need to set up special case expected results for that.
+        fallback_exp_dtypes = [
+            np.dtype('O'),
+            np.dtype('int16'),
+            np.dtype('float64'),
+            np.dtype('float64')]
+        fallback_df = df.astype({'s': 'object', 'b': 'float64'})
+
+        df_eq(df, pa2pa_df, exp_dtypes)
+        df_eq(df, pa2fp_df, exp_dtypes)
+        df_eq(fallback_df, fp2pa_df, fallback_exp_dtypes)
+        df_eq(df, fp2fp_df, exp_dtypes)
+
+        exp = (
+            'tbl1,s=a a=1i,b=10i,c=0.5\n' +
+            'tbl1,s=b a=2i,b=20i,c=NaN\n' +
+            'tbl1,s=a a=3i,b=30i,c=2.5\n' +
+            'tbl1,s=c a=4i,c=3.5\n' +
+            'tbl1,s=a a=5i,b=50i,c=NaN\n')
+
+        fallback_exp = (
+            'tbl1 s="a",a=1i,b=10.0,c=0.5\n' +
+            'tbl1 s="b",a=2i,b=20.0,c=NaN\n' +
+            'tbl1 s="a",a=3i,b=30.0,c=2.5\n' +
+            'tbl1 s="c",a=4i,b=NaN,c=3.5\n' +
+            'tbl1 s="a",a=5i,b=50.0,c=NaN\n')
+
+        self.assertEqual(_dataframe(df, table_name='tbl1'), exp)
+        self.assertEqual(_dataframe(pa2pa_df, table_name='tbl1'), exp)
+        self.assertEqual(_dataframe(pa2fp_df, table_name='tbl1'), exp)
+        self.assertEqual(_dataframe(fp2pa_df, table_name='tbl1'), fallback_exp)
+        self.assertEqual(_dataframe(fp2fp_df, table_name='tbl1'), exp)
+
+
+if __name__ == '__main__':
+    if os.environ.get('TEST_QUESTDB_PROFILE') == '1':
+        import cProfile
+        cProfile.run('unittest.main()', sort='cumtime')
+    else:
+        unittest.main()
diff --git a/test/test_dataframe_fuzz.py b/test/test_dataframe_fuzz.py
new file mode 100644
index 00000000..991c5d9e
--- /dev/null
+++ b/test/test_dataframe_fuzz.py
@@ -0,0 +1,192 @@
+"""
+# On Linux, ensure `clang` is installed.
+pyenv shell 3.10
+./proj clean
+./proj build_fuzzing
+./proj test_fuzzing
+"""
+
+import sys
+import struct
+import patch_path
+patch_path.patch()
+import numpy as np
+from numpy.random import Generator, PCG64
+import pandas as pd
+import pyarrow as pa
+import re
+import atheris
+
+
+with atheris.instrument_imports():
+    import questdb.ingress as qi
+
+
+@atheris.instrument_func
+def get_test_alphabet():
+    include_ranges = [
+        (0x0021, 0x0021),
+        (0x0023, 0x0026),
+        (0x0028, 0x007E),
+        (0x00A1, 0x00AC),
+        (0x00AE, 0x00FF),
+        (0x0100, 0x017F),
+        (0x0180, 0x024F),
+        (0x2C60, 0x2C7F),
+        (0x16A0, 0x16F0),
+        (0x0370, 0x0377),
+        (0x037A, 0x037E),
+        (0x0384, 0x038A),
+        (0x038C, 0x038C)]
+    return [
+        chr(code_point)
+        for current_range in include_ranges
+            for code_point in range(current_range[0], current_range[1] + 1)]
+
+
+TEST_ALPHABET = get_test_alphabet()
+
+
+def get_random_unicode(rand, length, none_val_prob=0):
+    """
+    Adapted from https://stackoverflow.com/questions/1477294
+    """
+    if none_val_prob and (rand.random() < none_val_prob):
+        return None
+    return ''.join(rand.choice(TEST_ALPHABET) for _ in range(length))
+
+
+@atheris.instrument_func
+def gen_string_series(rand, n_rows, none_val_prob, length, dtype):
+    series_n_rows = n_rows
+    if dtype == 'categorical':
+        series_n_rows //= 4
+    data = [
+        get_random_unicode(rand, length, none_val_prob)
+        for _ in range(series_n_rows)]
+    if dtype == 'categorical':
+        data = data * 6
+        data = data[:n_rows]
+        rand.shuffle(data)
+    return pd.Series(data, dtype=dtype)
+
+
+def gen_numpy_series(rand, n_rows, dtype):
+    return pd.Series(
+        rand.integers(
+            np.iinfo(dtype).min,
+            np.iinfo(dtype).max,
+            size=n_rows,
+            dtype=dtype))
+
+
+@atheris.instrument_func
+def gen_series_i8_numpy(rand, n_rows, none_val_prob):
+    return gen_numpy_series(rand, n_rows, np.int8)
+
+
+@atheris.instrument_func
+def gen_series_pyobj_str(rand, n_rows, none_val_prob):
+    return gen_string_series(rand, n_rows, none_val_prob, 6, 'object')
+
+
+# TODO: Test all datatypes
+# TODO: Include None, NA and NaN.
+series_generators = [
+    gen_series_i8_numpy,
+    # gen_series_i16_numpy,
+    gen_series_pyobj_str]
+
+
+
+@atheris.instrument_func
+def parse_input_bytes(input_bytes):
+    fdp = atheris.FuzzedDataProvider(input_bytes)
+    rand_seed = fdp.ConsumeUInt(1)
+    none_val_prob = fdp.ConsumeProbability()
+    table_name_type = fdp.ConsumeIntInRange(0, 4)
+    table_name_len = fdp.ConsumeIntInRange(1, 32)
+    n_cols = fdp.ConsumeIntInRange(10, 40)
+    col_generators = [
+        series_generators[fdp.ConsumeIntInRange(0, len(series_generators) - 1)]
+        for _ in range(n_cols)]
+    n_rows = fdp.ConsumeIntInRange(10, 5000)
+    rand = Generator(PCG64(rand_seed))
+    series_list = []
+    col_name = lambda: f'{get_random_unicode(rand, 4)}_{len(series_list)}'
+    table_name = None
+    table_name_col = None
+    symbols = 'auto'
+    at = None
+    if table_name_type == 0:
+        table_name = get_random_unicode(rand, table_name_len)
+    else:
+        table_name_col = col_name()
+        dtype = {
+            1: 'object',
+            2: 'string',
+            3: 'string[pyarrow]',
+            4: 'category'}[table_name_type]
+        series = gen_string_series(rand, n_rows, 0, table_name_len, dtype)
+        series_list.append((table_name_col, series))
+
+    for index in range(n_cols):
+        name = col_name()
+        series = col_generators[index](rand, n_rows, none_val_prob)
+        series_list.append((name, series))
+    rand.shuffle(series_list)
+    series = dict([
+        (name, series)
+        for name, series in series_list])
+    df = pd.DataFrame(series)
+    return df, table_name, table_name_col, symbols, at
+
+
+@atheris.instrument_func
+def test_dataframe(input_bytes):
+    # print(f'input_bytes: {input_bytes}')
+    params = parse_input_bytes(input_bytes)
+    df, table_name, table_name_col, symbols, at = params
+
+    try:
+        BUF = qi.Buffer()
+        BUF.clear()
+        try:
+            BUF.dataframe(
+                df,
+                table_name=table_name,
+                table_name_col=table_name_col,
+                symbols=symbols,
+                at=at)
+        except Exception as e:
+            if isinstance(e, (qi.IngressError)):
+                msg = str(e)
+                if 'Bad argument `table_name`' in msg:
+                    return
+                if re.search(r'Failed .*Bad string.*', msg):
+                    return
+                if re.search(r'Bad string .*: Column names', msg):
+                    return
+                if 'Ensure at least one column is not null.' in msg:
+                    return
+            raise e
+    except:
+        print('>>>>>>>>>')
+        print(f'input_bytes: {input_bytes!r}')
+        print(f'df: {df}')
+        print(f'table_name: {table_name}')
+        print(f'table_name_col: {table_name_col}')
+        print(f'symbols: {symbols}')
+        print(f'at: {at}')
+        print('<<<<<<<<<')
+        raise
+
+
+def main():
+    args = list(sys.argv)
+    atheris.Setup(args, test_dataframe)
+    atheris.Fuzz()
+
+
+if __name__ == "__main__":
+    main()
diff --git a/test/test_dataframe_leaks.py b/test/test_dataframe_leaks.py
new file mode 100644
index 00000000..2f33313c
--- /dev/null
+++ b/test/test_dataframe_leaks.py
@@ -0,0 +1,43 @@
+import patch_path
+patch_path.patch()
+
+import pandas as pd
+import questdb.ingress as qi
+
+import os, psutil
+process = psutil.Process(os.getpid())
+
+def get_rss():
+    return process.memory_info().rss 
+
+
+def serialize_and_cleanup():
+    # qi.Buffer().row(
+    #     'table_name',
+    #     symbols={'x': 'a', 'y': 'b'},
+    #     columns={'a': 1, 'b': 2, 'c': 3})
+    df = pd.DataFrame({
+        'a': [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16],
+        'b': [4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19],
+        'c': [7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22]})
+    qi.Buffer().dataframe(df, table_name='test')
+
+
+def main():
+    warmup_count = 0
+    for n in range(1000000):
+        if n % 1000 == 0:
+            print(f'[iter: {n:09}, RSS: {get_rss():010}]')
+        if n > warmup_count:
+            before = get_rss()
+        serialize_and_cleanup()
+        if n > warmup_count:
+            after = get_rss()
+            if after != before:
+                msg = f'RSS changed from {before} to {after} after {n} iters'
+                print(msg)
+
+
+if __name__ == '__main__':
+    main()
+    
\ No newline at end of file