From 0bf9db4876a12289624586cd25594b0d6801da50 Mon Sep 17 00:00:00 2001 From: Adam Cimarosti Date: Tue, 3 Jun 2025 10:28:59 +0100 Subject: [PATCH 01/30] pointing submodule to v5 branch --- c-questdb-client | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/c-questdb-client b/c-questdb-client index fd24e025..a5243037 160000 --- a/c-questdb-client +++ b/c-questdb-client @@ -1 +1 @@ -Subproject commit fd24e0258f6b86a457037013cc42459e5bb9475b +Subproject commit a5243037588592152916096dbf3a1242ee54dbc1 From a33997705e1533d43d2a4edef6acf0b96160495e Mon Sep 17 00:00:00 2001 From: Adam Cimarosti Date: Tue, 3 Jun 2025 10:35:32 +0100 Subject: [PATCH 02/30] Reverting unnecessary name change --- README.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.rst b/README.rst index 04923f6f..3592fcb6 100644 --- a/README.rst +++ b/README.rst @@ -5,7 +5,7 @@ QuestDB Client Library for Python This is the official Python client library for `QuestDB `_. This client library implements QuestDB's variant of the -`Ingestion Line Protocol `_ +`InfluxDB Line Protocol `_ (ILP) over HTTP and TCP. ILP provides the fastest way to insert data into QuestDB. From bde6396b81ddeb1d36dd35f3e67e5c435bfae5d6 Mon Sep 17 00:00:00 2001 From: Adam Cimarosti Date: Tue, 3 Jun 2025 12:39:58 +0100 Subject: [PATCH 03/30] consolidating array error codes --- c-questdb-client | 2 +- src/questdb/ingress.pyi | 2 +- src/questdb/ingress.pyx | 14 ++++---------- src/questdb/line_sender.pxd | 4 +--- 4 files changed, 7 insertions(+), 15 deletions(-) diff --git a/c-questdb-client b/c-questdb-client index a5243037..04177649 160000 --- a/c-questdb-client +++ b/c-questdb-client @@ -1 +1 @@ -Subproject commit a5243037588592152916096dbf3a1242ee54dbc1 +Subproject commit 041776490cc65d2d92380be89e549d65e98eddda diff --git a/src/questdb/ingress.pyi b/src/questdb/ingress.pyi index cd6f2085..63fc7200 100644 --- a/src/questdb/ingress.pyi +++ b/src/questdb/ingress.pyi @@ -57,7 +57,7 @@ class IngressErrorCode(Enum): ConfigError = ... ArrayLargeDimError = ... ArrayInternalError = ... - ArrayWriteToBufferError = ... + ArrayError = ... ProtocolVersionError = ... BadDataFrame = ... diff --git a/src/questdb/ingress.pyx b/src/questdb/ingress.pyx index 524ae2e3..61915698 100644 --- a/src/questdb/ingress.pyx +++ b/src/questdb/ingress.pyx @@ -131,9 +131,7 @@ class IngressErrorCode(Enum): HttpNotSupported = line_sender_error_http_not_supported ServerFlushError = line_sender_error_server_flush_error ConfigError = line_sender_error_config_error - ArrayLargeDimError = line_sender_error_array_large_dim - ArrayInternalError = line_sender_error_array_view_internal_error - ArrayWriteToBufferError = line_sender_error_array_view_write_to_buffer_error + ArrayError = line_sender_error_array_error ProtocolVersionError = line_sender_error_protocol_version_error BadDataFrame = line_sender_error_protocol_version_error + 1 @@ -177,12 +175,8 @@ cdef inline object c_err_code_to_py(line_sender_error_code code): return IngressErrorCode.ServerFlushError elif code == line_sender_error_config_error: return IngressErrorCode.ConfigError - elif code == line_sender_error_array_large_dim: - return IngressErrorCode.ArrayLargeDimError - elif code == line_sender_error_array_view_internal_error: - return IngressErrorCode.ArrayInternalError - elif code == line_sender_error_array_view_write_to_buffer_error: - return IngressErrorCode.ArrayWriteToBufferError + elif code == line_sender_error_array_error: + return IngressErrorCode.ArrayError elif code == line_sender_error_protocol_version_error: return IngressErrorCode.ProtocolVersionError else: @@ -964,7 +958,7 @@ cdef class Buffer: self, line_sender_column_name c_name, cnp.ndarray arr) except -1: if cnp.PyArray_TYPE(arr) != cnp.NPY_FLOAT64: raise IngressError( - IngressErrorCode.ArrayWriteToBufferError, + IngressErrorCode.ArrayError, f'Only float64 numpy arrays are supported, got dtype: {arr.dtype}') cdef: size_t rank = cnp.PyArray_NDIM(arr) diff --git a/src/questdb/line_sender.pxd b/src/questdb/line_sender.pxd index 8a28c0d3..aaecb112 100644 --- a/src/questdb/line_sender.pxd +++ b/src/questdb/line_sender.pxd @@ -40,9 +40,7 @@ cdef extern from "questdb/ingress/line_sender.h": line_sender_error_http_not_supported, line_sender_error_server_flush_error, line_sender_error_config_error, - line_sender_error_array_large_dim - line_sender_error_array_view_internal_error - line_sender_error_array_view_write_to_buffer_error + line_sender_error_array_error line_sender_error_protocol_version_error cdef enum line_sender_protocol: From 47476db404a7aa32f80aa652863604786958d618 Mon Sep 17 00:00:00 2001 From: victor Date: Wed, 4 Jun 2025 16:24:09 +0800 Subject: [PATCH 04/30] add c-major array layout api. --- c-questdb-client | 2 +- src/questdb/dataframe.pxi | 34 +++++++++++++++++++++++----------- src/questdb/extra_numpy.pxd | 2 ++ src/questdb/ingress.pyx | 32 +++++++++++++++++++++----------- src/questdb/line_sender.pxd | 10 ++++++++++ 5 files changed, 57 insertions(+), 23 deletions(-) diff --git a/c-questdb-client b/c-questdb-client index 04177649..d8843d4a 160000 --- a/c-questdb-client +++ b/c-questdb-client @@ -1 +1 @@ -Subproject commit 041776490cc65d2d92380be89e549d65e98eddda +Subproject commit d8843d4a28a9d0f8b063dd0d20c51f50b0a792d1 diff --git a/src/questdb/dataframe.pxi b/src/questdb/dataframe.pxi index 2693250d..2ff76757 100644 --- a/src/questdb/dataframe.pxi +++ b/src/questdb/dataframe.pxi @@ -2065,17 +2065,29 @@ cdef void_int _dataframe_serialize_cell_column_arr_f64__arr_f64_numpyobj( cdef: size_t rank = PyArray_NDIM(arr) const uint8_t* data_ptr = PyArray_DATA(arr) - line_sender_error * err = NULL - if not line_sender_buffer_column_f64_arr_byte_strides( - ls_buf, - col.name, - rank, - PyArray_DIMS(arr), - PyArray_STRIDES(arr), # N.B.: Strides expressed as byte jumps - data_ptr, - PyArray_NBYTES(arr), - &err): - raise c_err_to_py(err) + line_sender_error * err = NULL\ + + if PyArray_FLAGS(arr) & NPY_ARRAY_C_CONTIGUOUS != 0: + if not line_sender_buffer_column_f64_arr_c_major( + ls_buf, + col.name, + rank, + PyArray_DIMS(arr), + data_ptr, + PyArray_NBYTES(arr), + &err): + raise c_err_to_py(err) + else: + if not line_sender_buffer_column_f64_arr_byte_strides( + ls_buf, + col.name, + rank, + PyArray_DIMS(arr), + PyArray_STRIDES(arr), # N.B.: Strides expressed as byte jumps + data_ptr, + PyArray_NBYTES(arr), + &err): + raise c_err_to_py(err) cdef void_int _dataframe_serialize_cell_column_ts__dt64ns_tz_arrow( line_sender_buffer* ls_buf, diff --git a/src/questdb/extra_numpy.pxd b/src/questdb/extra_numpy.pxd index 3aa5f71f..45810f31 100644 --- a/src/questdb/extra_numpy.pxd +++ b/src/questdb/extra_numpy.pxd @@ -8,6 +8,7 @@ from cpython.object cimport PyObject from numpy cimport ( # Constants NPY_DOUBLE, # N.B.: From `#include `: `#define NPY_FLOAT64 NPY_DOUBLE` + NPY_ARRAY_C_CONTIGUOUS, # Types PyArrayObject, @@ -31,3 +32,4 @@ cdef extern from "numpy/arrayobject.h": npy_intp PyArray_DIM(PyArrayObject*, size_t) nogil npy_intp PyArray_STRIDE(PyArrayObject*, size_t) nogil int PyArray_NDIM(PyArrayObject*) nogil + int PyArray_FLAGS(PyArrayObject*) diff --git a/src/questdb/ingress.pyx b/src/questdb/ingress.pyx index 61915698..19672115 100644 --- a/src/questdb/ingress.pyx +++ b/src/questdb/ingress.pyx @@ -74,7 +74,6 @@ ctypedef int void_int import cython include "dataframe.pxi" - from enum import Enum from typing import List, Tuple, Dict, Union, Any, Optional, Callable, \ Iterable @@ -965,16 +964,27 @@ cdef class Buffer: const uint8_t * data_ptr = cnp.PyArray_DATA(arr) line_sender_error * err = NULL - if not line_sender_buffer_column_f64_arr_byte_strides( - self._impl, - c_name, - rank, - cnp.PyArray_DIMS(arr), - cnp.PyArray_STRIDES(arr), # N.B.: Strides expressed as byte jumps - data_ptr, - cnp.PyArray_NBYTES(arr), - &err): - raise c_err_to_py(err) + if cnp.PyArray_FLAGS(arr) & cnp.NPY_ARRAY_C_CONTIGUOUS != 0: + if not line_sender_buffer_column_f64_arr_c_major( + self._impl, + c_name, + rank, + cnp.PyArray_DIMS(arr), + data_ptr, + cnp.PyArray_NBYTES(arr), + &err): + raise c_err_to_py(err) + else: + if not line_sender_buffer_column_f64_arr_byte_strides( + self._impl, + c_name, + rank, + cnp.PyArray_DIMS(arr), + cnp.PyArray_STRIDES(arr), # N.B.: Strides expressed as byte jumps + data_ptr, + cnp.PyArray_NBYTES(arr), + &err): + raise c_err_to_py(err) cdef inline void_int _column_dt( self, line_sender_column_name c_name, datetime dt) except -1: diff --git a/src/questdb/line_sender.pxd b/src/questdb/line_sender.pxd index aaecb112..ead3d10e 100644 --- a/src/questdb/line_sender.pxd +++ b/src/questdb/line_sender.pxd @@ -228,6 +228,16 @@ cdef extern from "questdb/ingress/line_sender.h": line_sender_error** err_out ) noexcept nogil + bint line_sender_buffer_column_f64_arr_c_major( + line_sender_buffer* buffer, + line_sender_column_name name, + size_t rank, + const size_t* shapes, + const uint8_t* data_buffer, + size_t data_buffer_len, + line_sender_error** err_out + ) noexcept nogil + bint line_sender_buffer_column_f64_arr_byte_strides( line_sender_buffer* buffer, line_sender_column_name name, From ab6882bc8022275d92374ee0e025825202dca77c Mon Sep 17 00:00:00 2001 From: victor Date: Mon, 9 Jun 2025 09:44:07 +0800 Subject: [PATCH 05/30] update numpy dep version. --- c-questdb-client | 2 +- pyproject.toml | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/c-questdb-client b/c-questdb-client index d8843d4a..87e237cd 160000 --- a/c-questdb-client +++ b/c-questdb-client @@ -1 +1 @@ -Subproject commit d8843d4a28a9d0f8b063dd0d20c51f50b0a792d1 +Subproject commit 87e237cd18571210f010fe2afc5ac8016359daee diff --git a/pyproject.toml b/pyproject.toml index b7b94fe3..1ed3bfd8 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -19,7 +19,7 @@ classifiers = [ "Topic :: Database :: Front-Ends", "Topic :: Scientific/Engineering" ] -dependencies = ["numpy>=1.26.0"] # Keep in sync with build-system.requires +dependencies = ["numpy>=1.21.0"] # Keep in sync with build-system.requires [project.license] text = "Apache License 2.0" @@ -46,7 +46,7 @@ requires = [ "setuptools>=45.2.0", "wheel>=0.34.2", "cython>=0.29.24", - "numpy>=1.26.0", # keep in sync with project.dependencies + "numpy>=1.21.0", # keep in sync with project.dependencies ] From cfa4104509fb2a36c910935e94ea0239081e3714 Mon Sep 17 00:00:00 2001 From: victor Date: Mon, 9 Jun 2025 13:05:03 +0800 Subject: [PATCH 06/30] add ci on various numpy version. --- ci/run_tests_pipeline.yaml | 30 ++++++++++++++++++++++++++++++ dev_requirements.txt | 2 +- 2 files changed, 31 insertions(+), 1 deletion(-) diff --git a/ci/run_tests_pipeline.yaml b/ci/run_tests_pipeline.yaml index 3a3d848d..6136d453 100644 --- a/ci/run_tests_pipeline.yaml +++ b/ci/run_tests_pipeline.yaml @@ -68,3 +68,33 @@ stages: JAVA_HOME: $(JAVA_HOME_11_X64) QDB_REPO_PATH: './questdb' condition: eq(variables.vsQuestDbMaster, true) + - job: TestsAgainstVariousNumpyVersion + pool: + name: "Azure Pipelines" + vmImage: "ubuntu-latest" + timeoutInMinutes: 45 + steps: + - checkout: self + fetchDepth: 1 + lfs: false + submodules: true + - task: UsePythonVersion@0 + inputs: + versionSpec: '3.11' + - script: | + python3 --version + python3 -m pip install uv + sudo apt-get install -y libopenblas-dev pkg-config + displayName: "Install uv" + - script: uv run --with 'numpy==1.21.0' test/test.py -v TestBufferProtocolVersionV2 + displayName: "Test vs numpy 1.21" + - script: uv run --with 'numpy==1.24.0' test/test.py -v TestBufferProtocolVersionV2 + displayName: "Test vs numpy 1.24" + - script: uv run --with 'numpy==1.26.0' test/test.py -v TestBufferProtocolVersionV2 + displayName: "Test vs numpy 1.26" + - script: uv run --with 'numpy==2.0.0' test/test.py -v TestBufferProtocolVersionV2 + displayName: "Test vs numpy 2.0" + - script: uv run --with 'numpy==2.2.0' test/test.py -v TestBufferProtocolVersionV2 + displayName: "Test vs numpy 2.2" + - script: uv run --with 'numpy==2.3.0' test/test.py -v TestBufferProtocolVersionV2 + displayName: "Test vs numpy 2.3" diff --git a/dev_requirements.txt b/dev_requirements.txt index 9d2076dc..36930bfb 100644 --- a/dev_requirements.txt +++ b/dev_requirements.txt @@ -7,6 +7,6 @@ sphinx-rtd-theme>=1.0.0 twine>=4.0.1 bump2version>=1.0.1 pandas>=1.3.5 -numpy>=1.21.6 +numpy>=1.21.0 pyarrow>=10.0.1 fastparquet>=2023.10.1 From d4a4f26993be6918826414e346f890656d7655f2 Mon Sep 17 00:00:00 2001 From: victor Date: Mon, 9 Jun 2025 13:18:37 +0800 Subject: [PATCH 07/30] add ci on various numpy version --- ci/run_tests_pipeline.yaml | 22 ++++++++++++++++++++-- 1 file changed, 20 insertions(+), 2 deletions(-) diff --git a/ci/run_tests_pipeline.yaml b/ci/run_tests_pipeline.yaml index 6136d453..28c7031a 100644 --- a/ci/run_tests_pipeline.yaml +++ b/ci/run_tests_pipeline.yaml @@ -68,7 +68,7 @@ stages: JAVA_HOME: $(JAVA_HOME_11_X64) QDB_REPO_PATH: './questdb' condition: eq(variables.vsQuestDbMaster, true) - - job: TestsAgainstVariousNumpyVersion + - job: TestsAgainstVariousNumpyVersion1.x pool: name: "Azure Pipelines" vmImage: "ubuntu-latest" @@ -80,7 +80,7 @@ stages: submodules: true - task: UsePythonVersion@0 inputs: - versionSpec: '3.11' + versionSpec: '3.9' - script: | python3 --version python3 -m pip install uv @@ -92,6 +92,24 @@ stages: displayName: "Test vs numpy 1.24" - script: uv run --with 'numpy==1.26.0' test/test.py -v TestBufferProtocolVersionV2 displayName: "Test vs numpy 1.26" + - job: TestsAgainstVariousNumpyVersion2.x + pool: + name: "Azure Pipelines" + vmImage: "ubuntu-latest" + timeoutInMinutes: 45 + steps: + - checkout: self + fetchDepth: 1 + lfs: false + submodules: true + - task: UsePythonVersion@0 + inputs: + versionSpec: '3.11' + - script: | + python3 --version + python3 -m pip install uv + sudo apt-get install -y libopenblas-dev pkg-config + displayName: "Install uv" - script: uv run --with 'numpy==2.0.0' test/test.py -v TestBufferProtocolVersionV2 displayName: "Test vs numpy 2.0" - script: uv run --with 'numpy==2.2.0' test/test.py -v TestBufferProtocolVersionV2 From dd4515ab0435c0a7290955b4b4766a4a6568ef20 Mon Sep 17 00:00:00 2001 From: victor Date: Mon, 9 Jun 2025 13:22:13 +0800 Subject: [PATCH 08/30] add ci on various numpy version --- ci/run_tests_pipeline.yaml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/ci/run_tests_pipeline.yaml b/ci/run_tests_pipeline.yaml index 28c7031a..9fc3f52a 100644 --- a/ci/run_tests_pipeline.yaml +++ b/ci/run_tests_pipeline.yaml @@ -68,7 +68,7 @@ stages: JAVA_HOME: $(JAVA_HOME_11_X64) QDB_REPO_PATH: './questdb' condition: eq(variables.vsQuestDbMaster, true) - - job: TestsAgainstVariousNumpyVersion1.x + - job: TestsAgainstVariousNumpyVersion1x pool: name: "Azure Pipelines" vmImage: "ubuntu-latest" @@ -92,7 +92,7 @@ stages: displayName: "Test vs numpy 1.24" - script: uv run --with 'numpy==1.26.0' test/test.py -v TestBufferProtocolVersionV2 displayName: "Test vs numpy 1.26" - - job: TestsAgainstVariousNumpyVersion2.x + - job: TestsAgainstVariousNumpyVersion2x pool: name: "Azure Pipelines" vmImage: "ubuntu-latest" From 2be594f1e6b0630471b7941fe814b4d99535229c Mon Sep 17 00:00:00 2001 From: victor Date: Mon, 9 Jun 2025 16:43:47 +0800 Subject: [PATCH 09/30] fix doc warnings. --- docs/conf.py | 5 +++++ src/questdb/ingress.pyi | 2 +- src/questdb/ingress.pyx | 2 +- 3 files changed, 7 insertions(+), 2 deletions(-) diff --git a/docs/conf.py b/docs/conf.py index 1c68b50e..c50d9dc0 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -5,6 +5,11 @@ from questdb.ingress import * +autodoc_mock_imports = ["_cython"] +autodoc_type_aliases = { + 'datetime': 'datetime.datetime', +} + extensions = [ 'sphinx.ext.autodoc', 'sphinx.ext.autosummary', diff --git a/src/questdb/ingress.pyi b/src/questdb/ingress.pyi index 63fc7200..b4fb495f 100644 --- a/src/questdb/ingress.pyi +++ b/src/questdb/ingress.pyi @@ -189,7 +189,7 @@ class SenderTransaction: To create a transaction: - .. code_block:: python + .. code-block:: python with sender.transaction('table_name') as txn: txn.row(..) diff --git a/src/questdb/ingress.pyx b/src/questdb/ingress.pyx index 19672115..6efeade9 100644 --- a/src/questdb/ingress.pyx +++ b/src/questdb/ingress.pyx @@ -586,7 +586,7 @@ cdef class SenderTransaction: To create a transaction: - .. code_block:: python + .. code-block:: python with sender.transaction('table_name') as txn: txn.row(..) From 79bfff7c4d72ae3633ea1dcf1489f2cc70663ab6 Mon Sep 17 00:00:00 2001 From: victor Date: Mon, 9 Jun 2025 17:06:17 +0800 Subject: [PATCH 10/30] fix test warnings. --- test/test_dataframe.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/test_dataframe.py b/test/test_dataframe.py index 7e6b909d..df1822e2 100644 --- a/test/test_dataframe.py +++ b/test/test_dataframe.py @@ -1581,7 +1581,7 @@ def test_arrow_chunked_array(self): # need to, so - as for now - we just test that we raise a nice error. with self.assertRaisesRegex( qi.IngressError, - "Unsupported dtype int16\[pyarrow\] for column 'a'.*github"): + r"Unsupported dtype int16\[pyarrow\] for column 'a'.*github"): _dataframe(self.version, df, table_name='tbl1', at = qi.ServerTimestamp) @unittest.skipIf(not fastparquet, 'fastparquet not installed') From 0e016016f4dfae145a1391475e98a55b75e39d16 Mon Sep 17 00:00:00 2001 From: victor Date: Mon, 9 Jun 2025 18:18:07 +0800 Subject: [PATCH 11/30] doc first version of QuestDB support arrays. --- CHANGELOG.rst | 4 ++++ README.rst | 2 ++ docs/conf.rst | 3 +++ docs/sender.rst | 3 +++ src/questdb/ingress.pyi | 24 +++++++++++++++++++++--- src/questdb/ingress.pyx | 9 +++++++++ 6 files changed, 42 insertions(+), 3 deletions(-) diff --git a/CHANGELOG.rst b/CHANGELOG.rst index ca005f9e..3598c1f8 100644 --- a/CHANGELOG.rst +++ b/CHANGELOG.rst @@ -16,6 +16,10 @@ Features * Array Data Type Support. Adds native support for NumPy arrays (currently only for ``np.float64`` element type and up to 32 dimensions). +.. note:: + **Server Requirement**: This feature requires QuestDB server version 8.4.0 or higher. + Ensure your server is upgraded before ingesting array types, otherwise data ingestion will fail. + .. code-block:: python import numpy as np diff --git a/README.rst b/README.rst index 3592fcb6..3d4a0ac2 100644 --- a/README.rst +++ b/README.rst @@ -53,6 +53,7 @@ The most common way to insert data is from a Pandas dataframe. 'amount': [0.00044, 0.001], # NumPy float64 arrays are supported from v3.0.0rc1 onwards. + # Note that requires QuestDB server >= 8.4.0 for array support 'ord_book_bids': [ np.array([2615.54, 2618.63]), np.array([39269.98, 39270.00]) @@ -82,6 +83,7 @@ You can also send individual rows. This only requires a more minimal installatio 'amount': 0.00044, # NumPy float64 arrays are supported from v3.0.0rc1 onwards. + # Note that requires QuestDB server >= 8.4.0 for array support 'ord_book_bids': np.array([2615.54, 2618.63]), }, at=TimestampNanos.now()) diff --git a/docs/conf.rst b/docs/conf.rst index 6b0853b2..71812a3d 100644 --- a/docs/conf.rst +++ b/docs/conf.rst @@ -240,6 +240,9 @@ Specifies the version of InfluxDB Line Protocol to use. Valid options are: TCP/TCPS: Defaults to version 1 for compatibility +.. note:: + Protocol version ``2`` requires QuestDB server version 8.4.0 or higher. + .. _sender_conf_buffer: Buffer diff --git a/docs/sender.rst b/docs/sender.rst index 602f1268..840c1b1a 100644 --- a/docs/sender.rst +++ b/docs/sender.rst @@ -302,6 +302,9 @@ Here is a configuration string with ``protocol_version=2`` for ``TCP``: See the :ref:`sender_conf_protocol_version` section for more details. +.. note:: + Protocol version ``2`` requires QuestDB server version 8.4.0 or higher. + Error Reporting =============== diff --git a/src/questdb/ingress.pyi b/src/questdb/ingress.pyi index b4fb495f..7d5c206f 100644 --- a/src/questdb/ingress.pyi +++ b/src/questdb/ingress.pyi @@ -55,8 +55,6 @@ class IngressErrorCode(Enum): HttpNotSupported = ... ServerFlushError = ... ConfigError = ... - ArrayLargeDimError = ... - ArrayInternalError = ... ArrayError = ... ProtocolVersionError = ... BadDataFrame = ... @@ -212,6 +210,8 @@ class SenderTransaction: Write a row for the table in the transaction. The table name is taken from the transaction. + + **Note**: Support for NumPy arrays (``np.array``) requires QuestDB server version 8.4.0 or higher. """ def dataframe( @@ -295,6 +295,8 @@ class Buffer: This should match the ``cairo.max.file.name.length`` setting of the QuestDB instance you're connecting to. + **Note**: Protocol version ``2`` requires QuestDB server version 8.4.0 or higher. + .. code-block:: python # These two buffer constructions are equivalent. @@ -450,6 +452,8 @@ class Buffer: * - ``None`` - *Column is skipped and not serialized.* + **Note**: Support for NumPy arrays (``np.array``) requires QuestDB server version 8.4.0 or higher. + If the destination table was already created, then the columns types will be cast to the types of the existing columns whenever possible (Refer to the QuestDB documentation pages linked above). @@ -727,6 +731,9 @@ class Buffer: interpreted as the current QuestDB server time set on receipt of message. + * **η**: Support for NumPy arrays (``np.array``) requires QuestDB + server version 8.4.0 or higher. + **Error Handling and Recovery** In case an exception is raised during dataframe serialization, the @@ -834,6 +841,7 @@ class Sender: auto_flush_rows: Optional[int] = None, auto_flush_bytes: bool = False, auto_flush_interval: int = 1000, + protocol_version=None, init_buf_size: int = 65536, max_name_len: int = 127, ): ... @@ -859,6 +867,7 @@ class Sender: auto_flush_rows: Optional[int] = None, auto_flush_bytes: bool = False, auto_flush_interval: int = 1000, + protocol_version=None, init_buf_size: int = 65536, max_name_len: int = 127, ) -> Sender: @@ -894,6 +903,7 @@ class Sender: auto_flush_rows: Optional[int] = None, auto_flush_bytes: bool = False, auto_flush_interval: int = 1000, + protocol_version=None, init_buf_size: int = 65536, max_name_len: int = 127, ) -> Sender: @@ -956,7 +966,13 @@ class Sender: @property def protocol_version(self) -> int: """ - Returns the QuestDB server's recommended default line protocol version. + The protocol version used by the sender. + + Protocol version 1 is retained for backwards compatibility with + older QuestDB versions. + + Protocol version 2 introduces binary floating point support and + the array datatype. """ @property @@ -1020,6 +1036,8 @@ class Sender: in the constructor. Refer to the :func:`Buffer.row` documentation for details on arguments. + + **Note**: Support for NumPy arrays (``np.array``) requires QuestDB server version 8.4.0 or higher. """ def dataframe( diff --git a/src/questdb/ingress.pyx b/src/questdb/ingress.pyx index 6efeade9..32d28675 100644 --- a/src/questdb/ingress.pyx +++ b/src/questdb/ingress.pyx @@ -645,6 +645,8 @@ cdef class SenderTransaction: Write a row for the table in the transaction. The table name is taken from the transaction. + + **Note**: Support for NumPy arrays (``np.array``) requires QuestDB server version 8.4.0 or higher. """ if at is None: raise IngressError( @@ -770,6 +772,7 @@ cdef class Buffer: Buffer Constructor Arguments: + * protocol_version (``int``): The protocol version to use. * ``init_buf_size`` (``int``): Initial capacity of the buffer in bytes. Defaults to ``65536`` (64KiB). * ``max_name_len`` (``int``): Maximum length of a column name. @@ -777,6 +780,8 @@ cdef class Buffer: This should match the ``cairo.max.file.name.length`` setting of the QuestDB instance you're connecting to. + **Note**: Protocol version ``2`` requires QuestDB server version 8.4.0 or higher. + .. code-block:: python # These two buffer constructions are equivalent. @@ -1170,6 +1175,8 @@ cdef class Buffer: * - ``None`` - *Column is skipped and not serialized.* + **Note**: Support for NumPy arrays (``np.array``) requires QuestDB server version 8.4.0 or higher. + If the destination table was already created, then the columns types will be cast to the types of the existing columns whenever possible (Refer to the QuestDB documentation pages linked above). @@ -2472,6 +2479,8 @@ cdef class Sender: in the constructor. Refer to the :func:`Buffer.row` documentation for details on arguments. + + **Note**: Support for NumPy arrays (``np.array``) requires QuestDB server version 8.4.0 or higher. """ if self._in_txn: raise IngressError( From fcada80e1863ba9fdaea84b45d32d3293c1332c9 Mon Sep 17 00:00:00 2001 From: victor Date: Wed, 18 Jun 2025 12:50:44 +0800 Subject: [PATCH 12/30] adapt new c interface. --- c-questdb-client | 2 +- src/questdb/dataframe.pxi | 8 ++++---- src/questdb/extra_numpy.pxd | 2 +- src/questdb/ingress.pyx | 6 +++--- src/questdb/line_sender.pxd | 8 ++++---- 5 files changed, 13 insertions(+), 13 deletions(-) diff --git a/c-questdb-client b/c-questdb-client index 87e237cd..aa918b44 160000 --- a/c-questdb-client +++ b/c-questdb-client @@ -1 +1 @@ -Subproject commit 87e237cd18571210f010fe2afc5ac8016359daee +Subproject commit aa918b44cd5d7da59fe1c3ca55eb823fb7b8d8ff diff --git a/src/questdb/dataframe.pxi b/src/questdb/dataframe.pxi index ba9dd484..dcd6624b 100644 --- a/src/questdb/dataframe.pxi +++ b/src/questdb/dataframe.pxi @@ -2064,8 +2064,8 @@ cdef void_int _dataframe_serialize_cell_column_arr_f64__arr_f64_numpyobj( f'Only float64 numpy arrays are supported, got dtype: {arr_descr}') cdef: size_t rank = PyArray_NDIM(arr) - const uint8_t* data_ptr = PyArray_DATA(arr) - line_sender_error * err = NULL\ + const double* data_ptr = PyArray_DATA(arr) + line_sender_error * err = NULL if PyArray_FLAGS(arr) & NPY_ARRAY_C_CONTIGUOUS != 0: if not line_sender_buffer_column_f64_arr_c_major( @@ -2074,7 +2074,7 @@ cdef void_int _dataframe_serialize_cell_column_arr_f64__arr_f64_numpyobj( rank, PyArray_DIMS(arr), data_ptr, - PyArray_NBYTES(arr), + PyArray_SIZE(arr), &err): raise c_err_to_py(err) else: @@ -2085,7 +2085,7 @@ cdef void_int _dataframe_serialize_cell_column_arr_f64__arr_f64_numpyobj( PyArray_DIMS(arr), PyArray_STRIDES(arr), # N.B.: Strides expressed as byte jumps data_ptr, - PyArray_NBYTES(arr), + PyArray_SIZE(arr), &err): raise c_err_to_py(err) diff --git a/src/questdb/extra_numpy.pxd b/src/questdb/extra_numpy.pxd index 45810f31..ec7ce57d 100644 --- a/src/questdb/extra_numpy.pxd +++ b/src/questdb/extra_numpy.pxd @@ -21,7 +21,7 @@ cdef extern from "numpy/arrayobject.h": bint PyArray_CheckExact(PyObject * o) # PyArrayObject - npy_intp PyArray_NBYTES(PyArrayObject*) nogil + npy_intp PyArray_SIZE(PyArrayObject*) nogil npy_intp* PyArray_STRIDES(PyArrayObject*) nogil npy_intp* PyArray_DIMS(PyArrayObject*) nogil npy_int PyArray_TYPE(PyArrayObject* arr) nogil diff --git a/src/questdb/ingress.pyx b/src/questdb/ingress.pyx index 32d28675..19654883 100644 --- a/src/questdb/ingress.pyx +++ b/src/questdb/ingress.pyx @@ -966,7 +966,7 @@ cdef class Buffer: f'Only float64 numpy arrays are supported, got dtype: {arr.dtype}') cdef: size_t rank = cnp.PyArray_NDIM(arr) - const uint8_t * data_ptr = cnp.PyArray_DATA(arr) + const double * data_ptr = cnp.PyArray_DATA(arr) line_sender_error * err = NULL if cnp.PyArray_FLAGS(arr) & cnp.NPY_ARRAY_C_CONTIGUOUS != 0: @@ -976,7 +976,7 @@ cdef class Buffer: rank, cnp.PyArray_DIMS(arr), data_ptr, - cnp.PyArray_NBYTES(arr), + cnp.PyArray_SIZE(arr), &err): raise c_err_to_py(err) else: @@ -987,7 +987,7 @@ cdef class Buffer: cnp.PyArray_DIMS(arr), cnp.PyArray_STRIDES(arr), # N.B.: Strides expressed as byte jumps data_ptr, - cnp.PyArray_NBYTES(arr), + cnp.PyArray_SIZE(arr), &err): raise c_err_to_py(err) diff --git a/src/questdb/line_sender.pxd b/src/questdb/line_sender.pxd index ead3d10e..2b00404e 100644 --- a/src/questdb/line_sender.pxd +++ b/src/questdb/line_sender.pxd @@ -233,8 +233,8 @@ cdef extern from "questdb/ingress/line_sender.h": line_sender_column_name name, size_t rank, const size_t* shapes, - const uint8_t* data_buffer, - size_t data_buffer_len, + const double* data, + size_t data_len, line_sender_error** err_out ) noexcept nogil @@ -244,8 +244,8 @@ cdef extern from "questdb/ingress/line_sender.h": size_t rank, const size_t* shapes, const ssize_t* strides, - const uint8_t* data_buffer, - size_t data_buffer_len, + const double* data, + size_t data_len, line_sender_error** err_out ) noexcept nogil From 97941839e0a8a7069313ca609d20fc56e0b23c57 Mon Sep 17 00:00:00 2001 From: victor Date: Wed, 18 Jun 2025 19:20:09 +0800 Subject: [PATCH 13/30] update c module and fix ci --- c-questdb-client | 2 +- ci/cibuildwheel.yaml | 2 ++ 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/c-questdb-client b/c-questdb-client index aa918b44..d2b14407 160000 --- a/c-questdb-client +++ b/c-questdb-client @@ -1 +1 @@ -Subproject commit aa918b44cd5d7da59fe1c3ca55eb823fb7b8d8ff +Subproject commit d2b14407550e4e6b561c1edbaa6df9c0a0b7c0e3 diff --git a/ci/cibuildwheel.yaml b/ci/cibuildwheel.yaml index fdbb5850..18b471be 100644 --- a/ci/cibuildwheel.yaml +++ b/ci/cibuildwheel.yaml @@ -122,6 +122,8 @@ stages: displayName: Build wheels env: CIBW_BUILD: pp* + CIBW_ARCHS: x86_64 + CIBW_PLATFORM: linux - task: PublishBuildArtifacts@1 inputs: {pathtoPublish: 'wheelhouse'} From 1b42514071c20d85af7a5ad53ae3230baa4fd111 Mon Sep 17 00:00:00 2001 From: victor Date: Wed, 18 Jun 2025 20:51:23 +0800 Subject: [PATCH 14/30] fix ci --- ci/cibuildwheel.yaml | 3 +-- ci/run_tests_pipeline.yaml | 2 ++ 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/ci/cibuildwheel.yaml b/ci/cibuildwheel.yaml index 18b471be..2f684b5d 100644 --- a/ci/cibuildwheel.yaml +++ b/ci/cibuildwheel.yaml @@ -122,8 +122,7 @@ stages: displayName: Build wheels env: CIBW_BUILD: pp* - CIBW_ARCHS: x86_64 - CIBW_PLATFORM: linux + CIBW_ENABLE: pypy pypy-eol - task: PublishBuildArtifacts@1 inputs: {pathtoPublish: 'wheelhouse'} diff --git a/ci/run_tests_pipeline.yaml b/ci/run_tests_pipeline.yaml index 9fc3f52a..4d88f61c 100644 --- a/ci/run_tests_pipeline.yaml +++ b/ci/run_tests_pipeline.yaml @@ -35,6 +35,8 @@ stages: lfs: false submodules: true - task: UsePythonVersion@0 + inputs: + versionSpec: '3.12' - script: | python3 --version python3 -m pip install cython From b5a7c4d8f236cf7d411c5fa8ed0dfe7a83d915c6 Mon Sep 17 00:00:00 2001 From: Adam Cimarosti Date: Fri, 20 Jun 2025 11:06:53 +0100 Subject: [PATCH 15/30] updated c-questdb-client submodule --- c-questdb-client | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/c-questdb-client b/c-questdb-client index d2b14407..5af7515a 160000 --- a/c-questdb-client +++ b/c-questdb-client @@ -1 +1 @@ -Subproject commit d2b14407550e4e6b561c1edbaa6df9c0a0b7c0e3 +Subproject commit 5af7515a29bc5b612516474a83e1186c583a73b3 From 37741d36ab3208bfeed6229ea6a15c3e289e31fc Mon Sep 17 00:00:00 2001 From: Adam Cimarosti Date: Fri, 20 Jun 2025 15:58:59 +0100 Subject: [PATCH 16/30] fast reconnection warning --- .bumpversion.cfg | 28 ---------- .bumpversion.toml | 39 +++++++++++++ DEV_NOTES.rst | 1 - RELEASING.rst | 20 +++---- src/questdb/ingress.pyx | 119 ++++++++++++++++++++++++++++++++++++++++ 5 files changed, 166 insertions(+), 41 deletions(-) delete mode 100644 .bumpversion.cfg create mode 100644 .bumpversion.toml diff --git a/.bumpversion.cfg b/.bumpversion.cfg deleted file mode 100644 index 6e3ad036..00000000 --- a/.bumpversion.cfg +++ /dev/null @@ -1,28 +0,0 @@ -[bumpversion] -current_version = 3.0.0rc1 -commit = True -tag = False - -[bumpversion:file:pyproject.toml] -search = version = "{current_version}" -replace = version = "{new_version}" - -[bumpversion:file:setup.py] -search = version='{current_version}' -replace = version='{new_version}' - -[bumpversion:file:README.rst] -search = library is {current_version} -replace = library is {new_version} - -[bumpversion:file:docs/conf.py] -search = version = release = '{current_version}' -replace = version = release = '{new_version}' - -[bumpversion:file:src/questdb/__init__.py] -search = __version__ = '{current_version}' -replace = __version__ = '{new_version}' - -[bumpversion:file:src/questdb/ingress.pyx] -search = VERSION = '{current_version}' -replace = VERSION = '{new_version}' diff --git a/.bumpversion.toml b/.bumpversion.toml new file mode 100644 index 00000000..1417a603 --- /dev/null +++ b/.bumpversion.toml @@ -0,0 +1,39 @@ +[tool.bumpversion] +current_version = "3.0.0rc1" +commit = false +tag = false + +[[tool.bumpversion.files]] +filename = "pyproject.toml" +search = "version = \"{current_version}\"" +replace = "version = \"{new_version}\"" + +[[tool.bumpversion.files]] +filename = "setup.py" +search = "version='{current_version}'" +replace = "version='{new_version}'" + +[[tool.bumpversion.files]] +filename = "README.rst" +search = "library is {current_version}" +replace = "library is {new_version}" + +[[tool.bumpversion.files]] +filename = "docs/conf.py" +search = "version = release = '{current_version}'" +replace = "version = release = '{new_version}'" + +[[tool.bumpversion.files]] +filename = "src/questdb/__init__.py" +search = "__version__ = '{current_version}'" +replace = "__version__ = '{new_version}'" + +[[tool.bumpversion.files]] +filename = "src/questdb/ingress.pyx" +search = "VERSION = '{current_version}'" +replace = "VERSION = '{new_version}'" + +[[tool.bumpversion.files]] +filename = ".bumpversion.toml" +search = "current_version = \"{current_version}\"" +replace = "current_version = \"{new_version}\"" diff --git a/DEV_NOTES.rst b/DEV_NOTES.rst index fd899f57..b4b40855 100644 --- a/DEV_NOTES.rst +++ b/DEV_NOTES.rst @@ -55,7 +55,6 @@ The development requirements are these if you prefer to install them one by one: python3 -m pip install wheel python3 -m pip install twine python3 -m pip install cibuildwheel - python3 -m pip install bump2version Building and packaging diff --git a/RELEASING.rst b/RELEASING.rst index 68c77e69..c5444cdd 100644 --- a/RELEASING.rst +++ b/RELEASING.rst @@ -20,21 +20,17 @@ Create a new PR with the new changes in ``CHANGELOG.rst``. Make a commit and push the changes to a new branch. -You also want to bump the version. +You also want to bump the version. This process is semi-automated. -This process is automated by the following command: +* Ensure you have `uv` and `bump-my-version` installed: + * `curl -LsSf https://astral.sh/uv/install.sh | sh` : see https://docs.astral.sh/uv/getting-started/installation/ + * `uv tool install bump-my-version`: see https://github.com/callowayproject/bump-my-version. -.. code-block:: bash - - bump2version --config-file .bumpversion.cfg --no-tag patch - -Here use: - -* ``patch`` to bump the version to the next patch version, e.g. 1.0.0 -> 1.0.1 - -* ``minor`` to bump the version to the next minor version, e.g. 1.0.0 -> 1.1.0 +```console +bump-my-version replace --new-version NEW_VERSION +``` -* ``major`` to bump the version to the next major version, e.g. 1.0.0 -> 2.0.0 +If you're unsure, append `--dry-run` to preview changes. Now merge the PR with the title "Bump version: V.V.V → W.W.W". diff --git a/src/questdb/ingress.pyx b/src/questdb/ingress.pyx index 19654883..bfcd1b2d 100644 --- a/src/questdb/ingress.pyx +++ b/src/questdb/ingress.pyx @@ -82,6 +82,11 @@ from cpython.bytes cimport PyBytes_FromStringAndSize import sys import os +import threading +import collections +import time +import heapq +import warnings import numpy as np cimport numpy as cnp @@ -99,6 +104,112 @@ cnp.import_array() VERSION = '3.0.0rc1' +_SENDER_RECONNECT_WARN_THRESHOLD = 25 # reconnections +_SENDER_RECONNECT_WARN_WINDOW_NS = 5_000_000_000 # 5 seconds in nanoseconds + + +class _ActiveSenders: + def __init__(self): + self._lock = threading.Lock() + + # The slots fields manage a pool of unsigned integer slot IDs. These slot IDs are: + # * Always non-negative integers (starting from 0). + # * Reused when returned. + # * Allocated in the lowest-available order to keep them compact. + self._next_slot = 0 # Next available slot ID in the linear range. + self._returned_slots = [] # I.e. "holes" in the range `0..self._next_slot`. + + # Tracked established/closed connection events. + # Keys are slot IDs, which are always non-negative integers. + # Values are `collections.deque(maxlen=100)` containing established `time.monotonic_ns()` timestamps. + self._series = {} + + # Timestamp of last warning (monotonic_ns) + self._last_warning_ns = None # Track last warning time (monotonic_ns) + + def _get_next_slot(self) -> int: + # Always called with a lock held. + if self._returned_slots: + return heapq.heappop(self._returned_slots) + else: + self._next_slot += 1 + return self._next_slot - 1 + + def _return_slot(self, slot_id): + # Always called with a lock held. + if slot_id == self._next_slot - 1: + # Not optimal since we're not dealing with "trailing" slots, + # but at least the code is simple :-) + self._next_slot -= 1 + else: + heapq.heappush(self._returned_slots, slot_id) + + def _count_recent_reconnections(self, window_ns) -> int: + """ + Return the number of sender connections established within the last `window_ns` window. + Each slot's most recent establishment is counted if it falls within the window. + """ + # Always called with a lock held. + now = time.monotonic_ns() + cutoff = now - window_ns + max_count = 0 + to_delete = [] + for slot_id, serie in self._series.items(): + while serie and serie[0] < cutoff: + serie.popleft() + count = len(serie) + if not serie: + to_delete.append(slot_id) + elif count > max_count: + max_count = count + for slot_id in to_delete: + del self._series[slot_id] + return max_count + + def track_established(self) -> int: + """ + Track a sender connection event (threadsafe). + """ + with self._lock: + slot_id = self._get_next_slot() + serie = self._series.setdefault(slot_id, collections.deque(maxlen=100)) + serie.append(time.monotonic_ns()) + + max_recent_reconnections = self._count_recent_reconnections( + _SENDER_RECONNECT_WARN_WINDOW_NS) + + if max_recent_reconnections >= _SENDER_RECONNECT_WARN_THRESHOLD: + now = time.monotonic_ns() + # 10 minutes in nanoseconds + min_rewarn_interval_ns = 10 * 60 * 1_000_000_000 + no_recent_warnings = self._last_warning_ns is None or \ + (now - self._last_warning_ns > min_rewarn_interval_ns) + if no_recent_warnings: + warnings.warn( + "questdb.ingress.Sender: " + f"Detected {max_recent_reconnections} reconnections " + f"within the last {_SENDER_RECONNECT_WARN_WINDOW_NS / 1_000_000_000} seconds. " + "This may indicate an inefficient coding pattern where the sender is " + "frequently created and destroyed. " + "Consider reusing sender instance whenever possible.", + UserWarning, + stacklevel=2 + ) + self._last_warning_ns = now + return slot_id + + def track_closed(self, slot_id: int): + """ + Track a sender connection closed event (threadsafe). + """ + with self._lock: + self._return_slot(slot_id) + + +_ACTIVE_SENDERS = _ActiveSenders() + + + cdef bint _has_gil(PyThreadState** gs): return gs[0] == NULL @@ -1829,6 +1940,7 @@ cdef class Sender: cdef int64_t* _last_flush_ms cdef size_t _init_buf_size cdef bint _in_txn + cdef int64_t _slot_id cdef void_int _set_sender_fields( self, @@ -2035,6 +2147,7 @@ cdef class Sender: self._last_flush_ms = NULL self._init_buf_size = 0 self._in_txn = False + self._slot_id = -1 def __init__( self, @@ -2429,6 +2542,9 @@ cdef class Sender: self._buffer._row_complete_sender = PyWeakref_NewRef(self, None) self._last_flush_ms[0] = line_sender_now_micros() // 1000 + # Track and warn about overly quick reconnections to the server. + self._slot_id = _ACTIVE_SENDERS.track_established() + def __enter__(self) -> Sender: """Call :func:`Sender.establish` at the start of a ``with`` block.""" self.establish() @@ -2671,6 +2787,9 @@ cdef class Sender: self._opts = NULL line_sender_close(self._impl) self._impl = NULL + if self._slot_id != -1: + _ACTIVE_SENDERS.track_closed(self._slot_id) + self._slot_id = -1 cpdef close(self, bint flush=True): """ From a50ebc15028d8f069b7d7f62ed65469317582b21 Mon Sep 17 00:00:00 2001 From: Adam Cimarosti Date: Fri, 20 Jun 2025 17:41:52 +0100 Subject: [PATCH 17/30] fixed a few sphinx issues, 3 more remaining --- src/questdb/ingress.pyx | 56 +++++++++++++++++++++-------------------- 1 file changed, 29 insertions(+), 27 deletions(-) diff --git a/src/questdb/ingress.pyx b/src/questdb/ingress.pyx index bfcd1b2d..e8401404 100644 --- a/src/questdb/ingress.pyx +++ b/src/questdb/ingress.pyx @@ -51,7 +51,8 @@ from libc.string cimport strncmp, memset from libc.math cimport isnan from libc.errno cimport errno # from libc.stdio cimport stderr, fprintf -from cpython.datetime cimport datetime, timedelta +from cpython.datetime cimport datetime as cp_datetime +from cpython.datetime cimport timedelta as cp_timedelta from cpython.bool cimport bool from cpython.weakref cimport PyWeakref_NewRef, PyWeakref_GetObject from cpython.object cimport PyObject @@ -81,6 +82,7 @@ import pathlib from cpython.bytes cimport PyBytes_FromStringAndSize import sys +import datetime import os import threading import collections @@ -490,7 +492,7 @@ cdef void_int str_to_column_name_copy( raise c_err_to_py(err) -cdef int64_t datetime_to_micros(datetime dt): +cdef int64_t datetime_to_micros(cp_datetime dt): """ Convert a :class:`datetime.datetime` to microseconds since the epoch. """ @@ -500,7 +502,7 @@ cdef int64_t datetime_to_micros(datetime dt): (dt.microsecond)) -cdef int64_t datetime_to_nanos(datetime dt): +cdef int64_t datetime_to_nanos(cp_datetime dt): """ Convert a `datetime.datetime` to nanoseconds since the epoch. """ @@ -573,11 +575,11 @@ cdef class TimestampMicros: self._value = value @classmethod - def from_datetime(cls, dt: datetime): + def from_datetime(cls, dt: datetime.datetime): """ Construct a ``TimestampMicros`` from a :class:`datetime.datetime` object. """ - if not isinstance(dt, datetime): + if not isinstance(dt, cp_datetime): raise TypeError('dt must be a datetime object.') return cls(datetime_to_micros(dt)) @@ -638,11 +640,11 @@ cdef class TimestampNanos: self._value = value @classmethod - def from_datetime(cls, dt: datetime): + def from_datetime(cls, dt: datetime.datetime): """ Construct a ``TimestampNanos`` from a ``datetime.datetime`` object. """ - if not isinstance(dt, datetime): + if not isinstance(dt, cp_datetime): raise TypeError('dt must be a datetime object.') return cls(datetime_to_nanos(dt)) @@ -749,9 +751,9 @@ cdef class SenderTransaction: symbols: Optional[Dict[str, Optional[str]]]=None, columns: Optional[Dict[ str, - Union[None, bool, int, float, str, TimestampMicros, datetime, np.ndarray]] + Union[None, bool, int, float, str, TimestampMicros, datetime.datetime, np.ndarray]] ]=None, - at: Union[ServerTimestampType, TimestampNanos, datetime]): + at: Union[ServerTimestampType, TimestampNanos, datetime.datetime]): """ Write a row for the table in the transaction. @@ -784,7 +786,7 @@ cdef class SenderTransaction: df, # : pd.DataFrame *, symbols: Union[str, bool, List[int], List[str]] = 'auto', - at: Union[ServerTimestampType, int, str, TimestampNanos, datetime]): + at: Union[ServerTimestampType, int, str, TimestampNanos, datetime.datetime]): """ Write a dataframe for the table in the transaction. @@ -1103,7 +1105,7 @@ cdef class Buffer: raise c_err_to_py(err) cdef inline void_int _column_dt( - self, line_sender_column_name c_name, datetime dt) except -1: + self, line_sender_column_name c_name, cp_datetime dt) except -1: cdef line_sender_error* err = NULL if not line_sender_buffer_column_ts_micros( self._impl, c_name, datetime_to_micros(dt), &err): @@ -1124,7 +1126,7 @@ cdef class Buffer: self._column_ts(c_name, value) elif PyArray_CheckExact( value): self._column_numpy(c_name, value) - elif isinstance(value, datetime): + elif isinstance(value, cp_datetime): self._column_dt(c_name, value) else: valid = ', '.join(( @@ -1151,7 +1153,7 @@ cdef class Buffer: if not line_sender_buffer_at_nanos(self._impl, ts._value, &err): raise c_err_to_py(err) - cdef inline void_int _at_dt(self, datetime dt) except -1: + cdef inline void_int _at_dt(self, cp_datetime dt) except -1: cdef int64_t value = datetime_to_nanos(dt) cdef line_sender_error* err = NULL if not line_sender_buffer_at_nanos(self._impl, value, &err): @@ -1167,7 +1169,7 @@ cdef class Buffer: self._at_now() elif isinstance(ts, TimestampNanos): self._at_ts(ts) - elif isinstance(ts, datetime): + elif isinstance(ts, cp_datetime): self._at_dt(ts) else: raise TypeError( @@ -1216,9 +1218,9 @@ cdef class Buffer: symbols: Optional[Dict[str, Optional[str]]]=None, columns: Optional[Dict[ str, - Union[None, bool, int, float, str, TimestampMicros, datetime, np.ndarray]] + Union[None, bool, int, float, str, TimestampMicros, datetime.datetime, np.ndarray]] ]=None, - at: Union[ServerTimestampType, TimestampNanos, datetime]): + at: Union[ServerTimestampType, TimestampNanos, datetime.datetime]): """ Add a single row (line) to the buffer. @@ -1332,7 +1334,7 @@ cdef class Buffer: table_name: Optional[str] = None, table_name_col: Union[None, int, str] = None, symbols: Union[str, bool, List[int], List[str]] = 'auto', - at: Union[ServerTimestampType, int, str, TimestampNanos, datetime]): + at: Union[ServerTimestampType, int, str, TimestampNanos, datetime.datetime]): """ Add a pandas DataFrame to the buffer. @@ -1620,7 +1622,7 @@ _FLUSH_FMT = ('{} - See https://py-questdb-client.readthedocs.io/en/' '/troubleshooting.html#inspecting-and-debugging-errors#flush-failed') -cdef uint64_t _timedelta_to_millis(object timedelta): +cdef uint64_t _timedelta_to_millis(cp_timedelta timedelta): """ Convert a timedelta to milliseconds. """ @@ -1713,7 +1715,7 @@ cdef void_int _parse_auto_flush( auto_flush_interval = int(auto_flush_interval) elif auto_flush_interval is False or isinstance(auto_flush_interval, int): pass - elif isinstance(auto_flush_interval, timedelta): + elif isinstance(auto_flush_interval, cp_timedelta): auto_flush_interval = _timedelta_to_millis(auto_flush_interval) else: raise TypeError( @@ -2046,7 +2048,7 @@ cdef class Sender: if auth_timeout is not None: if isinstance(auth_timeout, int): c_auth_timeout = auth_timeout - elif isinstance(auth_timeout, timedelta): + elif isinstance(auth_timeout, cp_timedelta): c_auth_timeout = _timedelta_to_millis(auth_timeout) else: raise TypeError( @@ -2094,7 +2096,7 @@ cdef class Sender: c_retry_timeout = retry_timeout if not line_sender_opts_retry_timeout(self._opts, c_retry_timeout, &err): raise c_err_to_py(err) - elif isinstance(retry_timeout, timedelta): + elif isinstance(retry_timeout, cp_timedelta): c_retry_timeout = _timedelta_to_millis(retry_timeout) if not line_sender_opts_retry_timeout(self._opts, c_retry_timeout, &err): raise c_err_to_py(err) @@ -2118,7 +2120,7 @@ cdef class Sender: c_request_timeout = request_timeout if not line_sender_opts_request_timeout(self._opts, c_request_timeout, &err): raise c_err_to_py(err) - elif isinstance(request_timeout, timedelta): + elif isinstance(request_timeout, cp_timedelta): c_request_timeout = _timedelta_to_millis(request_timeout) if not line_sender_opts_request_timeout(self._opts, c_request_timeout, &err): raise c_err_to_py(err) @@ -2474,7 +2476,7 @@ cdef class Sender: return self._auto_flush_mode.byte_count @property - def auto_flush_interval(self) -> Optional[timedelta]: + def auto_flush_interval(self) -> Optional[datetime.timedelta]: """ Time interval threshold for the auto-flush logic, or None if disabled. """ @@ -2482,7 +2484,7 @@ cdef class Sender: return None if self._auto_flush_mode.interval == -1: return None - return timedelta(milliseconds=self._auto_flush_mode.interval) + return cp_timedelta(milliseconds=self._auto_flush_mode.interval) @property def protocol_version(self) -> int: @@ -2586,8 +2588,8 @@ cdef class Sender: symbols: Optional[Dict[str, str]]=None, columns: Optional[Dict[ str, - Union[bool, int, float, str, TimestampMicros, datetime, np.ndarray]]]=None, - at: Union[TimestampNanos, datetime, ServerTimestampType]): + Union[bool, int, float, str, TimestampMicros, datetime.datetime, np.ndarray]]]=None, + at: Union[TimestampNanos, datetime.datetime, ServerTimestampType]): """ Write a row to the internal buffer. @@ -2623,7 +2625,7 @@ cdef class Sender: table_name: Optional[str] = None, table_name_col: Union[None, int, str] = None, symbols: Union[str, bool, List[int], List[str]] = 'auto', - at: Union[ServerTimestampType, int, str, TimestampNanos, datetime]): + at: Union[ServerTimestampType, int, str, TimestampNanos, datetime.datetime]): """ Write a Pandas DataFrame to the internal buffer. From f4591024cea45341ed3614a0af185cd038dc0734 Mon Sep 17 00:00:00 2001 From: Adam Cimarosti Date: Fri, 20 Jun 2025 17:45:32 +0100 Subject: [PATCH 18/30] fixed remaining sphinx issues --- src/questdb/ingress.pyx | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/src/questdb/ingress.pyx b/src/questdb/ingress.pyx index e8401404..881bd3b6 100644 --- a/src/questdb/ingress.pyx +++ b/src/questdb/ingress.pyx @@ -90,7 +90,7 @@ import time import heapq import warnings -import numpy as np +import numpy cimport numpy as cnp from numpy cimport NPY_DOUBLE, PyArrayObject @@ -751,7 +751,7 @@ cdef class SenderTransaction: symbols: Optional[Dict[str, Optional[str]]]=None, columns: Optional[Dict[ str, - Union[None, bool, int, float, str, TimestampMicros, datetime.datetime, np.ndarray]] + Union[None, bool, int, float, str, TimestampMicros, datetime.datetime, numpy.ndarray]] ]=None, at: Union[ServerTimestampType, TimestampNanos, datetime.datetime]): """ @@ -759,7 +759,7 @@ cdef class SenderTransaction: The table name is taken from the transaction. - **Note**: Support for NumPy arrays (``np.array``) requires QuestDB server version 8.4.0 or higher. + **Note**: Support for NumPy arrays (``numpy.array``) requires QuestDB server version 8.4.0 or higher. """ if at is None: raise IngressError( @@ -1136,7 +1136,7 @@ cdef class Buffer: 'str', 'TimestampMicros', 'datetime.datetime' - 'np.ndarray')) + 'numpy.ndarray')) raise TypeError( f'Unsupported type: {_fqn(type(value))}. Must be one of: {valid}') @@ -1218,7 +1218,7 @@ cdef class Buffer: symbols: Optional[Dict[str, Optional[str]]]=None, columns: Optional[Dict[ str, - Union[None, bool, int, float, str, TimestampMicros, datetime.datetime, np.ndarray]] + Union[None, bool, int, float, str, TimestampMicros, datetime.datetime, numpy.ndarray]] ]=None, at: Union[ServerTimestampType, TimestampNanos, datetime.datetime]): """ @@ -1237,7 +1237,7 @@ cdef class Buffer: 'col4': 'xyz', 'col5': TimestampMicros(123456789), 'col6': datetime(2019, 1, 1, 12, 0, 0), - 'col7': np.array([[1.0, 2.0, 3.0], [4.0, 5.0, 6.0]]), + 'col7': numpy.array([[1.0, 2.0, 3.0], [4.0, 5.0, 6.0]]), 'col8': None}, at=TimestampNanos(123456789)) @@ -1281,14 +1281,14 @@ cdef class Buffer: - `FLOAT `_ * - ``str`` - `STRING `_ - * - ``np.ndarray`` + * - ``numpy.ndarray`` - `ARRAY `_ * - ``datetime.datetime`` and ``TimestampMicros`` - `TIMESTAMP `_ * - ``None`` - *Column is skipped and not serialized.* - **Note**: Support for NumPy arrays (``np.array``) requires QuestDB server version 8.4.0 or higher. + **Note**: Support for NumPy arrays (``numpy.array``) requires QuestDB server version 8.4.0 or higher. If the destination table was already created, then the columns types will be cast to the types of the existing columns whenever possible @@ -2588,7 +2588,7 @@ cdef class Sender: symbols: Optional[Dict[str, str]]=None, columns: Optional[Dict[ str, - Union[bool, int, float, str, TimestampMicros, datetime.datetime, np.ndarray]]]=None, + Union[bool, int, float, str, TimestampMicros, datetime.datetime, numpy.ndarray]]]=None, at: Union[TimestampNanos, datetime.datetime, ServerTimestampType]): """ Write a row to the internal buffer. @@ -2598,7 +2598,7 @@ cdef class Sender: Refer to the :func:`Buffer.row` documentation for details on arguments. - **Note**: Support for NumPy arrays (``np.array``) requires QuestDB server version 8.4.0 or higher. + **Note**: Support for NumPy arrays (``numpy.array``) requires QuestDB server version 8.4.0 or higher. """ if self._in_txn: raise IngressError( From f7fc76171430a23e24cc143de142801f329a65c1 Mon Sep 17 00:00:00 2001 From: Adam Cimarosti Date: Mon, 23 Jun 2025 11:37:37 +0100 Subject: [PATCH 19/30] should fix CI --- src/questdb/dataframe.pxi | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/questdb/dataframe.pxi b/src/questdb/dataframe.pxi index dcd6624b..1dffc643 100644 --- a/src/questdb/dataframe.pxi +++ b/src/questdb/dataframe.pxi @@ -762,7 +762,7 @@ cdef ssize_t _dataframe_resolve_at( at_nanos = at at_value_out[0] = at_nanos._value return -1 - elif isinstance(at, datetime): + elif isinstance(at, cp_datetime): if at.timestamp() < 0: raise ValueError( 'Bad argument `at`: Cannot use a datetime before the ' + From da0226569e85f77933905ce14ee59ed6dce1fd5f Mon Sep 17 00:00:00 2001 From: Adam Cimarosti Date: Mon, 23 Jun 2025 19:22:56 +0100 Subject: [PATCH 20/30] Rewrote high reconnection detection logic in Rust to make writing tests for it easy, wrote tests. --- MANIFEST.in | 2 +- pystr-to-utf8/Cargo.lock | 307 --------- pystr-to-utf8/Cargo.toml | 12 - pystr-to-utf8/README.md | 11 - pystr-to-utf8/build.rs | 51 -- pystr-to-utf8/src/lib.rs | 286 -------- pystr-to-utf8/src/tests.rs | 315 --------- rpyutils/Cargo.lock | 157 +++++ rpyutils/Cargo.toml | 12 + rpyutils/README.md | 4 + {pystr-to-utf8 => rpyutils}/cbindgen.toml | 0 .../include/rpyutils.h | 4 + rpyutils/src/active_senders.rs | 476 ++++++++++++++ rpyutils/src/lib.rs | 26 + rpyutils/src/pystr_to_utf8.rs | 615 ++++++++++++++++++ setup.py | 16 +- src/questdb/dataframe.md | 2 +- src/questdb/ingress.pyx | 125 +--- .../{pystr_to_utf8.pxd => rpyutils.pxd} | 6 +- 19 files changed, 1326 insertions(+), 1101 deletions(-) delete mode 100644 pystr-to-utf8/Cargo.lock delete mode 100644 pystr-to-utf8/Cargo.toml delete mode 100644 pystr-to-utf8/README.md delete mode 100644 pystr-to-utf8/build.rs delete mode 100644 pystr-to-utf8/src/lib.rs delete mode 100644 pystr-to-utf8/src/tests.rs create mode 100644 rpyutils/Cargo.lock create mode 100644 rpyutils/Cargo.toml create mode 100644 rpyutils/README.md rename {pystr-to-utf8 => rpyutils}/cbindgen.toml (100%) rename pystr-to-utf8/include/pystr_to_utf8.h => rpyutils/include/rpyutils.h (96%) create mode 100644 rpyutils/src/active_senders.rs create mode 100644 rpyutils/src/lib.rs create mode 100644 rpyutils/src/pystr_to_utf8.rs rename src/questdb/{pystr_to_utf8.pxd => rpyutils.pxd} (93%) diff --git a/MANIFEST.in b/MANIFEST.in index b25d533b..ca7ac24d 100644 --- a/MANIFEST.in +++ b/MANIFEST.in @@ -15,7 +15,7 @@ recursive-include src *.py recursive-include src *.md recursive-include src *.pxi recursive-include src *.c -graft pystr-to-utf8 +graft rpyutils graft c-questdb-client prune c-questdb-client/src/tests/json_tests.rs prune c-questdb-client/.git diff --git a/pystr-to-utf8/Cargo.lock b/pystr-to-utf8/Cargo.lock deleted file mode 100644 index 85005104..00000000 --- a/pystr-to-utf8/Cargo.lock +++ /dev/null @@ -1,307 +0,0 @@ -# This file is automatically @generated by Cargo. -# It is not intended for manual editing. -version = 3 - -[[package]] -name = "autocfg" -version = "1.1.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d468802bab17cbc0cc575e9b053f41e72aa36bfa6b7f55e3529ffa43161b97fa" - -[[package]] -name = "bitflags" -version = "1.3.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bef38d45163c2f1dde094a7dfd33ccf595c92905c8f8f4fdc18d06fb1037718a" - -[[package]] -name = "bitflags" -version = "2.4.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "327762f6e5a765692301e5bb513e0d9fef63be86bbc14528052b1cd3e6f03e07" - -[[package]] -name = "cbindgen" -version = "0.24.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4b922faaf31122819ec80c4047cc684c6979a087366c069611e33649bf98e18d" -dependencies = [ - "heck", - "indexmap", - "log", - "proc-macro2", - "quote", - "serde", - "serde_json", - "syn 1.0.109", - "tempfile", - "toml", -] - -[[package]] -name = "cfg-if" -version = "1.0.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd" - -[[package]] -name = "errno" -version = "0.3.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ac3e13f66a2f95e32a39eaa81f6b95d42878ca0e1db0c7543723dfe12557e860" -dependencies = [ - "libc", - "windows-sys", -] - -[[package]] -name = "fastrand" -version = "2.0.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "25cbce373ec4653f1a01a31e8a5e5ec0c622dc27ff9c4e6606eefef5cbbed4a5" - -[[package]] -name = "hashbrown" -version = "0.12.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8a9ee70c43aaf417c914396645a0fa852624801b24ebb7ae78fe8272889ac888" - -[[package]] -name = "heck" -version = "0.4.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "95505c38b4572b2d910cecb0281560f54b440a19336cbbcb27bf6ce6adc6f5a8" - -[[package]] -name = "indexmap" -version = "1.9.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bd070e393353796e801d209ad339e89596eb4c8d430d18ede6a1cced8fafbd99" -dependencies = [ - "autocfg", - "hashbrown", -] - -[[package]] -name = "itoa" -version = "1.0.9" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "af150ab688ff2122fcef229be89cb50dd66af9e01a4ff320cc137eecc9bacc38" - -[[package]] -name = "libc" -version = "0.2.150" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "89d92a4743f9a61002fae18374ed11e7973f530cb3a3255fb354818118b2203c" - -[[package]] -name = "linux-raw-sys" -version = "0.4.10" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "da2479e8c062e40bf0066ffa0bc823de0a9368974af99c9f6df941d2c231e03f" - -[[package]] -name = "log" -version = "0.4.20" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b5e6163cb8c49088c2c36f57875e58ccd8c87c7427f7fbd50ea6710b2f3f2e8f" - -[[package]] -name = "proc-macro2" -version = "1.0.69" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "134c189feb4956b20f6f547d2cf727d4c0fe06722b20a0eec87ed445a97f92da" -dependencies = [ - "unicode-ident", -] - -[[package]] -name = "pystr-to-utf8" -version = "0.1.0" -dependencies = [ - "cbindgen", -] - -[[package]] -name = "quote" -version = "1.0.33" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5267fca4496028628a95160fc423a33e8b2e6af8a5302579e322e4b520293cae" -dependencies = [ - "proc-macro2", -] - -[[package]] -name = "redox_syscall" -version = "0.4.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4722d768eff46b75989dd134e5c353f0d6296e5aaa3132e776cbdb56be7731aa" -dependencies = [ - "bitflags 1.3.2", -] - -[[package]] -name = "rustix" -version = "0.38.21" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2b426b0506e5d50a7d8dafcf2e81471400deb602392c7dd110815afb4eaf02a3" -dependencies = [ - "bitflags 2.4.1", - "errno", - "libc", - "linux-raw-sys", - "windows-sys", -] - -[[package]] -name = "ryu" -version = "1.0.15" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1ad4cc8da4ef723ed60bced201181d83791ad433213d8c24efffda1eec85d741" - -[[package]] -name = "serde" -version = "1.0.190" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "91d3c334ca1ee894a2c6f6ad698fe8c435b76d504b13d436f0685d648d6d96f7" -dependencies = [ - "serde_derive", -] - -[[package]] -name = "serde_derive" -version = "1.0.190" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "67c5609f394e5c2bd7fc51efda478004ea80ef42fee983d5c67a65e34f32c0e3" -dependencies = [ - "proc-macro2", - "quote", - "syn 2.0.39", -] - -[[package]] -name = "serde_json" -version = "1.0.108" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3d1c7e3eac408d115102c4c24ad393e0821bb3a5df4d506a80f85f7a742a526b" -dependencies = [ - "itoa", - "ryu", - "serde", -] - -[[package]] -name = "syn" -version = "1.0.109" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "72b64191b275b66ffe2469e8af2c1cfe3bafa67b529ead792a6d0160888b4237" -dependencies = [ - "proc-macro2", - "quote", - "unicode-ident", -] - -[[package]] -name = "syn" -version = "2.0.39" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "23e78b90f2fcf45d3e842032ce32e3f2d1545ba6636271dcbf24fa306d87be7a" -dependencies = [ - "proc-macro2", - "quote", - "unicode-ident", -] - -[[package]] -name = "tempfile" -version = "3.8.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7ef1adac450ad7f4b3c28589471ade84f25f731a7a0fe30d71dfa9f60fd808e5" -dependencies = [ - "cfg-if", - "fastrand", - "redox_syscall", - "rustix", - "windows-sys", -] - -[[package]] -name = "toml" -version = "0.5.11" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f4f7f0dd8d50a853a531c426359045b1998f04219d88799810762cd4ad314234" -dependencies = [ - "serde", -] - -[[package]] -name = "unicode-ident" -version = "1.0.12" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3354b9ac3fae1ff6755cb6db53683adb661634f67557942dea4facebec0fee4b" - -[[package]] -name = "windows-sys" -version = "0.48.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "677d2418bec65e3338edb076e806bc1ec15693c5d0104683f2efe857f61056a9" -dependencies = [ - "windows-targets", -] - -[[package]] -name = "windows-targets" -version = "0.48.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9a2fa6e2155d7247be68c096456083145c183cbbbc2764150dda45a87197940c" -dependencies = [ - "windows_aarch64_gnullvm", - "windows_aarch64_msvc", - "windows_i686_gnu", - "windows_i686_msvc", - "windows_x86_64_gnu", - "windows_x86_64_gnullvm", - "windows_x86_64_msvc", -] - -[[package]] -name = "windows_aarch64_gnullvm" -version = "0.48.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2b38e32f0abccf9987a4e3079dfb67dcd799fb61361e53e2882c3cbaf0d905d8" - -[[package]] -name = "windows_aarch64_msvc" -version = "0.48.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "dc35310971f3b2dbbf3f0690a219f40e2d9afcf64f9ab7cc1be722937c26b4bc" - -[[package]] -name = "windows_i686_gnu" -version = "0.48.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a75915e7def60c94dcef72200b9a8e58e5091744960da64ec734a6c6e9b3743e" - -[[package]] -name = "windows_i686_msvc" -version = "0.48.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8f55c233f70c4b27f66c523580f78f1004e8b5a8b659e05a4eb49d4166cca406" - -[[package]] -name = "windows_x86_64_gnu" -version = "0.48.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "53d40abd2583d23e4718fddf1ebec84dbff8381c07cae67ff7768bbf19c6718e" - -[[package]] -name = "windows_x86_64_gnullvm" -version = "0.48.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0b7b52767868a23d5bab768e390dc5f5c55825b6d30b86c844ff2dc7414044cc" - -[[package]] -name = "windows_x86_64_msvc" -version = "0.48.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ed94fce61571a4006852b7389a063ab983c02eb1bb37b47f8272ce92d06d9538" diff --git a/pystr-to-utf8/Cargo.toml b/pystr-to-utf8/Cargo.toml deleted file mode 100644 index 4eb8f445..00000000 --- a/pystr-to-utf8/Cargo.toml +++ /dev/null @@ -1,12 +0,0 @@ -[package] -name = "pystr-to-utf8" -version = "0.1.0" -edition = "2021" -publish = false - -[lib] -name = "pystr_to_utf8" -crate-type = ["staticlib"] - -[build-dependencies] -cbindgen = { version = "0.24.3", optional = true, default-features = false } \ No newline at end of file diff --git a/pystr-to-utf8/README.md b/pystr-to-utf8/README.md deleted file mode 100644 index 607f98b1..00000000 --- a/pystr-to-utf8/README.md +++ /dev/null @@ -1,11 +0,0 @@ -By default, when compiling, we don't re-generate the `.h` and `.pxd` files. -This is to speed up compile time. - -If you've updated the API, regenerate them by running: - -``` -$ cargo clean -$ cargo build --features cbindgen -``` - -Then make sure to commit the updated generated files. diff --git a/pystr-to-utf8/build.rs b/pystr-to-utf8/build.rs deleted file mode 100644 index a93cb918..00000000 --- a/pystr-to-utf8/build.rs +++ /dev/null @@ -1,51 +0,0 @@ -#[cfg(feature = "cbindgen")] -extern crate cbindgen; - -#[cfg(feature = "cbindgen")] -const BAD_PXD: &str = " -cdef extern from *: - ctypedef bint bool - ctypedef struct va_list"; - -#[cfg(feature = "cbindgen")] -fn main() -> Result<(), Box> { - let crate_dir = std::env::var("CARGO_MANIFEST_DIR")?; - let bindings = cbindgen::generate(&crate_dir)?; - bindings.write_to_file("include/pystr_to_utf8.h"); - - let config = cbindgen::Config { - language: cbindgen::Language::Cython, - documentation: true, - cython: cbindgen::CythonConfig { - header: Some("\"pystr_to_utf8.h\"".to_owned()), - cimports: std::collections::BTreeMap::new()}, - usize_is_size_t: true, - ..Default::default() - }; - - let bindings = cbindgen::Builder::new() - .with_crate(&crate_dir) - .with_config(config) - .generate()?; - - // Instead of just writing out the file: - // bindings.write_to_file("include/pystr_to_utf8.pxd"); - // We need to do some post-processing to make it work our code. - // The default output is too opinionated and has unwanted typedefs. - let mut pxd = Vec::new(); - bindings.write(&mut pxd); - let pxd = String::from_utf8(pxd)?; - if !pxd.contains(BAD_PXD) { - panic!("cbindgen generated unexpected pxd: {}", pxd); - } - let pxd = pxd.replace(BAD_PXD, ""); - let pxd = pxd.replace("bool", "bint"); - let pxd = pxd.replace(";", ""); - // println!("{}", &pxd); - std::fs::write("../src/questdb/pystr_to_utf8.pxd", &pxd)?; - Ok(()) -} - -#[cfg(not(feature = "cbindgen"))] -fn main() {} - diff --git a/pystr-to-utf8/src/lib.rs b/pystr-to-utf8/src/lib.rs deleted file mode 100644 index eb3bc01d..00000000 --- a/pystr-to-utf8/src/lib.rs +++ /dev/null @@ -1,286 +0,0 @@ -/******************************************************************************* - * ___ _ ____ ____ - * / _ \ _ _ ___ ___| |_| _ \| __ ) - * | | | | | | |/ _ \/ __| __| | | | _ \ - * | |_| | |_| | __/\__ \ |_| |_| | |_) | - * \__\_\\__,_|\___||___/\__|____/|____/ - * - * Copyright (c) 2014-2019 Appsicle - * Copyright (c) 2019-2024 QuestDB - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - * - ******************************************************************************/ - -use std::ffi::c_char; -use std::slice::from_raw_parts; - -#[allow(non_camel_case_types)] -pub struct qdb_pystr_buf(Vec); - -#[repr(C)] -#[allow(non_camel_case_types)] -#[derive(Debug, Clone, Copy, PartialEq, Eq)] -pub struct qdb_pystr_pos { - pub chain: usize, - pub string: usize -} - -/// Prepare a new buffer. The buffer must be freed with `qdb_pystr_free`. -/// The `qdb_ucsX_to_utf8` functions will write to this buffer. -#[no_mangle] -pub unsafe extern "C" fn qdb_pystr_buf_new() -> *mut qdb_pystr_buf { - Box::into_raw(Box::new(qdb_pystr_buf(Vec::new()))) -} - -/// Get current position. Use in conjunction with `truncate`. -#[no_mangle] -pub unsafe extern "C" fn qdb_pystr_buf_tell( - b: *const qdb_pystr_buf) -> qdb_pystr_pos { - let b = &*b; - let chain_pos = b.0.len(); - let string_pos = if chain_pos > 0 { - b.0[chain_pos - 1].len() - } else { - 0 - }; - qdb_pystr_pos { chain: chain_pos, string: string_pos } -} - -/// Trim the buffer to the given position. Use in conjunction with `tell`. -#[no_mangle] -pub unsafe extern "C" fn qdb_pystr_buf_truncate( - b: *mut qdb_pystr_buf, pos: qdb_pystr_pos) { - let b = &mut *b; - b.0.truncate(pos.chain); - if !b.0.is_empty() { - b.0[pos.chain - 1].truncate(pos.string); - } -} - -/// Reset the converter's buffer to zero length. -#[no_mangle] -pub unsafe extern "C" fn qdb_pystr_buf_clear(b: *mut qdb_pystr_buf) { - let b = &mut *b; - if !b.0.is_empty() { - b.0.truncate(1); - b.0[0].clear(); - } -} - -/// Free the buffer. Must be called after `qdb_pystr_buf_new`. -#[no_mangle] -pub unsafe extern "C" fn qdb_pystr_buf_free(b: *mut qdb_pystr_buf) { - if !b.is_null() { - drop(Box::from_raw(b)); - } -} - -const MIN_BUF_LEN: usize = 1024; - -/// A carefully crafted buffer with spare capacity for `len` bytes. -/// This is necessary to return "stable" addresses and avoid segfaults. -/// Rust is unaware we are borrowing its memory and could try to free it as -/// part of a reallocation if we were to use a `String` directly. -fn get_dest(chain: &mut Vec, len: usize) -> &mut String { - if !chain.is_empty() { - let last = chain.last_mut().unwrap(); - if last.capacity() - last.len() >= len { - return chain.last_mut().unwrap(); - } - } - chain.push(String::with_capacity(std::cmp::max(len, MIN_BUF_LEN))); - chain.last_mut().unwrap() -} - -#[inline(always)] -fn encode_loop<'a, 'b, T, F>( - utf8_mult: usize, - chain: &'a mut Vec, - buf: &'b [T], - get_char: F) -> Result<&'a str, u32> - where - F: Fn(T) -> Option, - T: Copy + Into -{ - let dest = get_dest(chain, utf8_mult * buf.len()); - let last = dest.len(); - // for &b in buf.iter() { - // // Checking for validity is not optional: - // // >>> for n in range(2 ** 16): - // // >>> chr(n).encode('utf-8') - // // UnicodeEncodeError: 'utf-8' codec can't encode character '\ud800' - // // in position 0: surrogates not allowed - // match get_char(b) { - // Some(c) => dest.push(c), - // None => { - // dest.truncate(last); - // return Err(b.into()); - // } - // } - // } - // Ok(&dest[last..]) - unsafe { - let v = dest.as_mut_vec(); - v.set_len(v.capacity()); - let mut index = last; - - for &b in buf.iter() { - let c = match get_char(b) { - Some(c) => c, - None => { - v.set_len(last); - return Err(b.into()) - } - }; - let utf_c_len = c.len_utf8(); - match utf_c_len { - 1 => { - v[index] = c as u8; - }, - 2 => { - let mut codepoint_buf = [0; 4]; - let bytes = c - .encode_utf8(&mut codepoint_buf).as_bytes(); - *v.get_unchecked_mut(index) = - *bytes.get_unchecked(0); - *v.get_unchecked_mut(index + 1) = - *bytes.get_unchecked(1); - }, - 3 => { - let mut codepoint_buf = [0; 4]; - let bytes = c - .encode_utf8(&mut codepoint_buf).as_bytes(); - *v.get_unchecked_mut(index) = - *bytes.get_unchecked(0); - *v.get_unchecked_mut(index + 1) = - *bytes.get_unchecked(1); - *v.get_unchecked_mut(index + 2) = - *bytes.get_unchecked(2); - }, - 4 => { - let mut codepoint_buf = [0; 4]; - let bytes = c - .encode_utf8(&mut codepoint_buf).as_bytes(); - *v.get_unchecked_mut(index) = - *bytes.get_unchecked(0); - *v.get_unchecked_mut(index + 1) = - *bytes.get_unchecked(1); - *v.get_unchecked_mut(index + 2) = - *bytes.get_unchecked(2); - *v.get_unchecked_mut(index + 3) = - *bytes.get_unchecked(3); - }, - _ => unreachable!() - } - index += utf_c_len; - } - v.set_len(index); - } - Ok(&dest[last..]) -} - -/// Convert a Py_UCS1 string to UTF-8. -/// Returns a `buf_out` borrowed ptr of `size_out` len. -/// The buffer is borrowed from `b`. -#[no_mangle] -pub unsafe extern "C" fn qdb_ucs1_to_utf8( - b: *mut qdb_pystr_buf, - count: usize, input: *const u8, - size_out: *mut usize, buf_out: *mut *const c_char) { - let b = &mut *b; - let i = from_raw_parts(input, count); - - // len(chr(2 ** 8 - 1).encode('utf-8')) == 2 - let utf8_mult = 2; - let res = encode_loop( - utf8_mult, - &mut b.0, - i, - |c| Some(c as char)).unwrap(); - *size_out = res.len(); - *buf_out = res.as_ptr() as *const c_char; -} - -/// Convert a Py_UCS2 string to UTF-8. -/// Returns a `buf_out` borrowed ptr of `size_out` len. -/// The buffer is borrowed from `b`. -/// In case of errors, returns `false` and bad_codepoint_out is set to the -/// offending codepoint. -#[no_mangle] -pub unsafe extern "C" fn qdb_ucs2_to_utf8(b: *mut qdb_pystr_buf, - count: usize, - input: *const u16, - size_out: *mut usize, - buf_out: *mut *const c_char, - bad_codepoint_out: *mut u32) -> bool { - let b = &mut *b; - let i = from_raw_parts(input, count); - - // len(chr(2 ** 16 - 1).encode('utf-8')) == 3 - let utf8_mult = 3; - let res = encode_loop( - utf8_mult, - &mut b.0, - i, - |c| char::from_u32(c as u32)); - match res { - Ok(s) => { - *size_out = s.len(); - *buf_out = s.as_ptr() as *const c_char; - true - } - Err(bad) => { - *bad_codepoint_out = bad; - false - } - } -} - -/// Convert a Py_UCS4 string to UTF-8. -/// Returns a `buf_out` borrowed ptr of `size_out` len. -/// The buffer is borrowed from `b`. -/// In case of errors, returns `false` and bad_codepoint_out is set to the -/// offending codepoint. -#[no_mangle] -pub unsafe extern "C" fn qdb_ucs4_to_utf8(b: *mut qdb_pystr_buf, - count: usize, - input: *const u32, - size_out: *mut usize, - buf_out: *mut *const c_char, - bad_codepoint_out: *mut u32) -> bool { - let b = &mut *b; - let i = from_raw_parts(input, count); - - // Max 4 bytes allowed by RFC: https://www.rfc-editor.org/rfc/rfc3629#page-4 - let utf8_mult = 4; - let res = encode_loop( - utf8_mult, - &mut b.0, - i, - |c| char::from_u32(c)); - match res { - Ok(s) => { - *size_out = s.len(); - *buf_out = s.as_ptr() as *const c_char; - true - } - Err(bad) => { - *bad_codepoint_out = bad; - false - } - } -} - -#[cfg(test)] -mod tests; diff --git a/pystr-to-utf8/src/tests.rs b/pystr-to-utf8/src/tests.rs deleted file mode 100644 index 68da613e..00000000 --- a/pystr-to-utf8/src/tests.rs +++ /dev/null @@ -1,315 +0,0 @@ -use super::*; - -struct Buf { - buf: *mut qdb_pystr_buf, -} - -impl Buf { - fn new() -> Self { - Self { - buf: unsafe { qdb_pystr_buf_new() }, - } - } - - fn chain(&self) -> &Vec { - unsafe { &(*self.buf).0 } - } - - fn chain_mut(&mut self) -> &mut Vec { - unsafe { &mut (*self.buf).0 } - } - - fn clear(&mut self) { - unsafe { qdb_pystr_buf_clear(self.buf) } - } - - fn tell(&self) -> qdb_pystr_pos { - unsafe { qdb_pystr_buf_tell(self.buf) } - } - - fn truncate(&mut self, pos: qdb_pystr_pos) { - unsafe { qdb_pystr_buf_truncate(self.buf, pos) } - } - - fn ucs1_to_utf8(&mut self, input: &[u8]) -> &'static str { - let mut size_out = 0; - let mut buf_out = std::ptr::null(); - unsafe { - qdb_ucs1_to_utf8( - self.buf, - input.len(), - input.as_ptr(), - &mut size_out, - &mut buf_out); - } - let slice = unsafe { - from_raw_parts(buf_out as *const u8, size_out) }; - std::str::from_utf8(slice).unwrap() - } - - fn ucs2_to_utf8(&mut self, input: &[u16]) -> Result<&'static str, u32> { - let mut size_out = 0; - let mut buf_out = std::ptr::null(); - let mut bad_codepoint = 0u32; - let ok = unsafe { - qdb_ucs2_to_utf8( - self.buf, - input.len(), - input.as_ptr(), - &mut size_out, - &mut buf_out, - &mut bad_codepoint) - }; - if ok { - let slice = unsafe { - from_raw_parts(buf_out as *const u8, size_out) }; - let msg = std::str::from_utf8(slice).unwrap(); - Ok(msg) - } else { - Err(bad_codepoint) - } - } - - fn ucs4_to_utf8(&mut self, input: &[u32]) -> Result<&'static str, u32> { - let mut size_out = 0; - let mut buf_out = std::ptr::null(); - let mut bad_codepoint = 0u32; - let ok = unsafe { - qdb_ucs4_to_utf8( - self.buf, - input.len(), - input.as_ptr(), - &mut size_out, - &mut buf_out, - &mut bad_codepoint) - }; - if ok { - let slice = unsafe { - from_raw_parts(buf_out as *const u8, size_out) }; - let msg = std::str::from_utf8(slice).unwrap(); - Ok(msg) - } else { - Err(bad_codepoint) - } - } -} - -impl Drop for Buf { - fn drop(&mut self) { - unsafe { - qdb_pystr_buf_free(self.buf); - } - } -} - -#[test] -fn test_empty() { - let b = Buf::new(); - assert_eq!(b.chain().len(), 0); - let pos = b.tell(); - assert_eq!(pos.chain, 0); - assert_eq!(pos.string, 0); -} - -#[test] -fn test_ucs1() { - let mut b = Buf::new(); - let s1 = b.ucs1_to_utf8(b"hello"); - assert_eq!(s1, "hello"); - assert_eq!(b.chain_mut().len(), 1); - assert_eq!(b.chain_mut()[0].as_str().as_ptr(), s1.as_ptr()); - assert_eq!(b.chain()[0], "hello"); - assert_eq!(b.tell().chain, 1); - assert_eq!(b.tell().string, 5); - b.clear(); - assert_eq!(b.chain().len(), 1); - assert_eq!(b.chain()[0], ""); - let s2 = b.ucs1_to_utf8(b""); - assert_eq!(s2, ""); - assert_eq!(b.tell(), qdb_pystr_pos { chain: 1, string: 0 }); - assert_eq!(s2.as_ptr(), b.chain()[0].as_str().as_ptr()); - let s3 = b.ucs1_to_utf8(b"10\xb5"); - assert_eq!(s3, "10µ"); - assert_eq!(s3.len(), 4); // 3 bytes in UCS-1, 4 bytes in UTF-8. - assert_eq!(b.chain().len(), 1); - assert_eq!(s3.as_ptr(), unsafe { - b.chain()[0].as_str().as_ptr().add(s2.len()) - }); - assert_eq!(b.tell(), qdb_pystr_pos { - chain: 1, string: s2.len() + s3.len() }); -} - -#[test] -fn test_resize_and_truncate() { - let mut b = Buf::new(); - let s1 = b.ucs1_to_utf8(b"abcdefghijklmnopqrstuvwxyz"); - assert_eq!(s1, "abcdefghijklmnopqrstuvwxyz"); - assert_eq!(b.chain_mut().len(), 1); - assert_eq!(b.chain_mut()[0].as_str().as_ptr(), s1.as_ptr()); - - let big_string = "hello world".repeat(1000); - assert!(big_string.len() > MIN_BUF_LEN); - let s2 = b.ucs1_to_utf8(big_string.as_bytes()); - assert_eq!(s2, big_string); - assert_eq!(b.chain_mut().len(), 2); - assert_eq!(b.chain_mut()[0].as_str().as_ptr(), s1.as_ptr()); - assert_eq!(b.chain_mut()[1].as_str().as_ptr(), s2.as_ptr()); - assert_eq!(b.tell(), qdb_pystr_pos { chain: 2, string: 11000 }); - b.truncate(b.tell()); - assert_eq!(b.tell(), qdb_pystr_pos { chain: 2, string: 11000 }); - - let spare = b.chain_mut()[1].capacity() - b.chain_mut()[1].len(); - assert!(spare > 4); - - let test_string = "ab"; - let s3 = b.ucs1_to_utf8(test_string.as_bytes()); - assert_eq!(s3, test_string); - assert_eq!(b.chain_mut().len(), 2); - assert_eq!(b.chain_mut()[0].as_str().as_ptr(), s1.as_ptr()); - assert_eq!(b.chain_mut()[1].as_str().as_ptr(), s2.as_ptr()); - assert_eq!(b.tell(), qdb_pystr_pos { - chain: 2, string: 11000 + test_string.len() }); -} - -#[test] -fn test_ucs2() { - let mut b = Buf::new(); - - // We first check code points within the ASCII range. - let s1 = b.ucs2_to_utf8( - &[0x61, 0x62, 0x63, 0x64, 0x65]).unwrap(); - assert_eq!(s1, "abcde"); - assert_eq!(s1.len(), 5); - - // Now chars outside ASCII range, but within UCS-1 range. - // These will yield two bytes each in UTF-8. - let s2 = b.ucs2_to_utf8( - &[0x00f0, 0x00e3, 0x00b5, 0x00b6]) - .unwrap(); - assert_eq!(s2, "ðãµ¶"); - assert_eq!(s2.len(), 8); - - // Now chars that actually require two bytes in UCS-2, but also fit in - // two bytes in UTF-8. - let s3 = b.ucs2_to_utf8( - &[0x0100, 0x069c]) - .unwrap(); - assert_eq!(s3, "Āڜ"); - assert_eq!(s3.len(), 4); - - // Now chars that require two bytes in UCS-2 and 3 bytes in UTF-8. - let s4 = b.ucs2_to_utf8( - &[0x569c, 0xa4c2]) - .unwrap(); - assert_eq!(s4, "嚜꓂"); - assert_eq!(s4.len(), 6); - - // Quick check that we're just writing to the same buffer. - assert_eq!(b.tell(), qdb_pystr_pos { - chain: 1, - string: [s1, s2, s3, s4].iter().map(|s| s.len()).sum() }); - - // Now we finally check that errors are captured. - // For this, we use a code point which is valid in a Python string - // (in UCS-2), but which is not valid when encoded as UTF-8. - // >>> chr(0xd800).encode('utf-8') - // Traceback (most recent call last): - // File "", line 1, in - // UnicodeEncodeError: 'utf-8' codec can't encode character '\ud800' - // in position 0: surrogates not allowed - let before_pos = b.tell(); - let s5 = b.ucs2_to_utf8(&[0x061, 0xd800]); - assert!(s5.is_err()); - assert_eq!(s5.unwrap_err(), 0xd800 as u32); - - // Even though 0x061 (ASCII char 'a') was valid and successfully encoded, - // we also want to be sure that the buffer was not modified and appended to. - assert_eq!(b.tell(), before_pos); - - // Now we check that the buffer is still in a valid state. - let s6 = b.ucs2_to_utf8(&[0x062, 0x063]).unwrap(); - assert_eq!(s6, "bc"); - assert_eq!(b.tell(), qdb_pystr_pos { - chain: 1, - string: [s1, s2, s3, s4, s6].iter().map(|s| s.len()).sum() }); -} - -#[test] -fn test_ucs4() { - let mut b = Buf::new(); - - // We first check code points within the ASCII range. - let s1 = b.ucs4_to_utf8( - &[0x61, 0x62, 0x63, 0x64, 0x65]).unwrap(); - assert_eq!(s1, "abcde"); - assert_eq!(s1.len(), 5); - - // Now chars outside ASCII range, but within UCS-1 range. - // These will yield two bytes each in UTF-8. - let s2 = b.ucs4_to_utf8( - &[0x00f0, 0x00e3, 0x00b5, 0x00b6]) - .unwrap(); - assert_eq!(s2, "ðãµ¶"); - assert_eq!(s2.len(), 8); - - // Now chars that actually require two bytes in UCS-2, but also fit in - // two bytes in UTF-8. - let s3 = b.ucs4_to_utf8( - &[0x0100, 0x069c]) - .unwrap(); - assert_eq!(s3, "Āڜ"); - assert_eq!(s3.len(), 4); - - // Now chars that require two bytes in UCS-2 and 3 bytes in UTF-8. - let s4 = b.ucs4_to_utf8( - &[0x569c, 0xa4c2]) - .unwrap(); - assert_eq!(s4, "嚜꓂"); - assert_eq!(s4.len(), 6); - - // Now chars that require four bytes in UCS-4 and 4 bytes in UTF-8. - let s5 = b.ucs4_to_utf8( - &[0x1f4a9, 0x1f99e]) - .unwrap(); - assert_eq!(s5, "💩🦞"); - assert_eq!(s5.len(), 8); - - // Quick check that we're just writing to the same buffer. - assert_eq!(b.tell(), qdb_pystr_pos { - chain: 1, - string: [s1, s2, s3, s4, s5].iter().map(|s| s.len()).sum() }); - - // Now we finally check that errors are captured. - // For this, we use a code point which is valid in a Python string - // (in UCS-4), but which is not valid when encoded as UTF-8. - // >>> chr(0xd800).encode('utf-8') - // Traceback (most recent call last): - // File "", line 1, in - // UnicodeEncodeError: 'utf-8' codec can't encode character '\ud800' - // in position 0: surrogates not allowed - let before_pos = b.tell(); - let s6 = b.ucs4_to_utf8(&[0x061, 0xd800]); - assert!(s6.is_err()); - assert_eq!(s6.unwrap_err(), 0xd800 as u32); - - // Even though 0x061 (ASCII char 'a') was valid and successfully encoded, - // we also want to be sure that the buffer was not modified and appended to. - assert_eq!(b.tell(), before_pos); - - // We repeat the same with chars with code points higher than the u16 type. - let before_pos = b.tell(); - let s7 = b.ucs4_to_utf8(&[0x061, 0x110000]); - assert!(s7.is_err()); - assert_eq!(s7.unwrap_err(), 0x110000); - - // Even though 0x061 (ASCII char 'a') was valid and successfully encoded, - // we also want to be sure that the buffer was not modified and appended to. - assert_eq!(b.tell(), before_pos); - - // Now we check that the buffer is still in a valid state. - let s8 = b.ucs4_to_utf8(&[0x062, 0x063]).unwrap(); - assert_eq!(s8, "bc"); - assert_eq!(b.tell(), qdb_pystr_pos { - chain: 1, - string: [s1, s2, s3, s4, s5, s8].iter().map(|s| s.len()).sum() }); -} \ No newline at end of file diff --git a/rpyutils/Cargo.lock b/rpyutils/Cargo.lock new file mode 100644 index 00000000..ae7f8f7d --- /dev/null +++ b/rpyutils/Cargo.lock @@ -0,0 +1,157 @@ +# This file is automatically @generated by Cargo. +# It is not intended for manual editing. +version = 4 + +[[package]] +name = "bitflags" +version = "2.9.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1b8e56985ec62d17e9c1001dc89c88ecd7dc08e47eba5ec7c29c7b5eeecde967" + +[[package]] +name = "cfg-if" +version = "1.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9555578bc9e57714c812a1f84e4fc5b4d21fcb063490c624de019f7464c91268" + +[[package]] +name = "getrandom" +version = "0.3.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "26145e563e54f2cadc477553f1ec5ee650b00862f0a58bcd12cbdc5f0ea2d2f4" +dependencies = [ + "cfg-if", + "libc", + "r-efi", + "wasi", +] + +[[package]] +name = "libc" +version = "0.2.174" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1171693293099992e19cddea4e8b849964e9846f4acee11b3948bcc337be8776" + +[[package]] +name = "ppv-lite86" +version = "0.2.21" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "85eae3c4ed2f50dcfe72643da4befc30deadb458a9b590d720cde2f2b1e97da9" +dependencies = [ + "zerocopy", +] + +[[package]] +name = "proc-macro2" +version = "1.0.95" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "02b3e5e68a3a1a02aad3ec490a98007cbc13c37cbe84a3cd7b8e406d76e7f778" +dependencies = [ + "unicode-ident", +] + +[[package]] +name = "quote" +version = "1.0.40" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1885c039570dc00dcb4ff087a89e185fd56bae234ddc7f056a945bf36467248d" +dependencies = [ + "proc-macro2", +] + +[[package]] +name = "r-efi" +version = "5.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "69cdb34c158ceb288df11e18b4bd39de994f6657d83847bdffdbd7f346754b0f" + +[[package]] +name = "rand" +version = "0.9.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9fbfd9d094a40bf3ae768db9361049ace4c0e04a4fd6b359518bd7b73a73dd97" +dependencies = [ + "rand_chacha", + "rand_core", +] + +[[package]] +name = "rand_chacha" +version = "0.9.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d3022b5f1df60f26e1ffddd6c66e8aa15de382ae63b3a0c1bfc0e4d3e3f325cb" +dependencies = [ + "ppv-lite86", + "rand_core", +] + +[[package]] +name = "rand_core" +version = "0.9.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "99d9a13982dcf210057a8a78572b2217b667c3beacbf3a0d8b454f6f82837d38" +dependencies = [ + "getrandom", +] + +[[package]] +name = "rpyutils" +version = "0.1.0" +dependencies = [ + "rand", +] + +[[package]] +name = "syn" +version = "2.0.104" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "17b6f705963418cdb9927482fa304bc562ece2fdd4f616084c50b7023b435a40" +dependencies = [ + "proc-macro2", + "quote", + "unicode-ident", +] + +[[package]] +name = "unicode-ident" +version = "1.0.18" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5a5f39404a5da50712a4c1eecf25e90dd62b613502b7e925fd4e4d19b5c96512" + +[[package]] +name = "wasi" +version = "0.14.2+wasi-0.2.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9683f9a5a998d873c0d21fcbe3c083009670149a8fab228644b8bd36b2c48cb3" +dependencies = [ + "wit-bindgen-rt", +] + +[[package]] +name = "wit-bindgen-rt" +version = "0.39.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6f42320e61fe2cfd34354ecb597f86f413484a798ba44a8ca1165c58d42da6c1" +dependencies = [ + "bitflags", +] + +[[package]] +name = "zerocopy" +version = "0.8.26" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1039dd0d3c310cf05de012d8a39ff557cb0d23087fd44cad61df08fc31907a2f" +dependencies = [ + "zerocopy-derive", +] + +[[package]] +name = "zerocopy-derive" +version = "0.8.26" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9ecf5b4cc5364572d7f4c329661bcc82724222973f2cab6f050a4e5c22f75181" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] diff --git a/rpyutils/Cargo.toml b/rpyutils/Cargo.toml new file mode 100644 index 00000000..480a3dfe --- /dev/null +++ b/rpyutils/Cargo.toml @@ -0,0 +1,12 @@ +[package] +name = "rpyutils" +version = "0.1.0" +edition = "2021" +publish = false + +[lib] +name = "rpyutils" +crate-type = ["staticlib"] + +[dev-dependencies] +rand = "0.9.1" diff --git a/rpyutils/README.md b/rpyutils/README.md new file mode 100644 index 00000000..af54c639 --- /dev/null +++ b/rpyutils/README.md @@ -0,0 +1,4 @@ +# Using from Cython + +* `rpyutils.h` +* `rpyutils.pxd` diff --git a/pystr-to-utf8/cbindgen.toml b/rpyutils/cbindgen.toml similarity index 100% rename from pystr-to-utf8/cbindgen.toml rename to rpyutils/cbindgen.toml diff --git a/pystr-to-utf8/include/pystr_to_utf8.h b/rpyutils/include/rpyutils.h similarity index 96% rename from pystr-to-utf8/include/pystr_to_utf8.h rename to rpyutils/include/rpyutils.h index 45240614..c793249e 100644 --- a/pystr-to-utf8/include/pystr_to_utf8.h +++ b/rpyutils/include/rpyutils.h @@ -108,6 +108,10 @@ bool qdb_ucs4_to_utf8(struct qdb_pystr_buf *b, const char **buf_out, uint32_t *bad_codepoint_out); +uint32_t qdb_active_senders_track_established(int* warn); // bint* warn + +void qdb_active_senders_track_closed(uint32_t slot); + #ifdef __cplusplus } // extern "C" #endif // __cplusplus diff --git a/rpyutils/src/active_senders.rs b/rpyutils/src/active_senders.rs new file mode 100644 index 00000000..3fbee2c5 --- /dev/null +++ b/rpyutils/src/active_senders.rs @@ -0,0 +1,476 @@ +/******************************************************************************* + * ___ _ ____ ____ + * / _ \ _ _ ___ ___| |_| _ \| __ ) + * | | | | | | |/ _ \/ __| __| | | | _ \ + * | |_| | |_| | __/\__ \ |_| |_| | |_) | + * \__\_\\__,_|\___||___/\__|____/|____/ + * + * Copyright (c) 2014-2019 Appsicle + * Copyright (c) 2019-2025 QuestDB + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + ******************************************************************************/ + +use std::{ + collections::VecDeque, ffi::c_int, fmt::Debug, ops::Sub, sync::{LazyLock, Mutex}, time::{Duration, Instant} +}; + +type Slot = u32; + +struct Slots { + /// Next available slot ID in the linear range. + next_slot: Slot, + + /// I.e. "holes" in the range `0..self.next_slot`. + returned: VecDeque, +} + +impl Slots { + fn new() -> Self { + Self { + next_slot: 0, + returned: VecDeque::new(), + } + } + + fn next(&mut self) -> Slot { + if let Some(returned) = self.returned.pop_front() { + returned + } else { + let slot = self.next_slot; + self.next_slot += 1; + slot + } + } + + fn restore(&mut self, slot_id: Slot) { + if slot_id == self.next_slot - 1 { + self.next_slot -= 1; + while let Some(&last) = self.returned.back() { + if last == self.next_slot - 1 { + self.returned.pop_back(); + self.next_slot -= 1; + } else { + break; + } + } + } else { + self.returned.push_back(slot_id); + self.returned.make_contiguous().sort_unstable(); + } + } +} + +#[cfg(test)] +impl Debug for Slots { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + f.debug_struct("Slots") + .field("next_slot", &self.next_slot) + .field("returned", &self.returned) + .finish() + } +} + +trait InstantLike: PartialEq + PartialOrd + Copy + Sub + Debug { + fn now() -> Self; + fn duration_since(&self, earlier: Self) -> Duration; +} + +impl InstantLike for Instant { + fn now() -> Self { + Instant::now() + } + + fn duration_since(&self, earlier: Self) -> Duration { + self.duration_since(earlier) + } +} + +struct ActiveSenders { + slots: Slots, + + /// Tracked established connection events. + /// Keys are slot IDs, which are always non-negative integers. + /// Values are `VecDeque` containing established connection `Instant` timestamps. + series: std::collections::HashMap>, + + /// Timestamp of last issued warning + last_warning: Option, + + /// Window for counting recent reconnections. + reconnect_warn_window: Duration, + + /// Threshold for issuing a warning when the number of recent reconnections exceeds this value. + reconnect_warn_threshold: usize, + + /// Window to suppress warnings after the last warning. + quiet_window: Duration, +} + +#[cfg(test)] +impl Debug for ActiveSenders { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + let mut series = self.series.iter().collect::>(); + series.sort_by(|(k1, _v1), (k2, _v2)| k1.cmp(k2)); + f.debug_struct("ActiveSenders") + .field("slots", &self.slots) + .field("series", &series) + .field("last_warning", &self.last_warning) + .finish() + } +} + +impl> ActiveSenders { + fn new( + reconnect_warn_window: Duration, + reconnect_warn_threshold: usize, + quiet_window: Duration, + ) -> Self { + Self { + slots: Slots::new(), + series: std::collections::HashMap::new(), + last_warning: None, + reconnect_warn_window, + reconnect_warn_threshold, + quiet_window, + } + } + + fn count_recent_reconnections(&mut self) -> usize { + let now = I::now(); + let cutoff: I = now - self.reconnect_warn_window; + let mut max_count = 0; + let mut to_delete = Vec::new(); + + for (&slot_id, serie) in &mut self.series { + while let Some(&established) = serie.front() { + if established < cutoff { + serie.pop_front(); + } else { + break; + } + } + let count = serie.len(); + if count == 0 { + to_delete.push(slot_id); + } else if count > max_count { + max_count = count; + } + } + + for slot_id in to_delete { + self.series.remove(&slot_id); + } + + max_count + } + + fn track_established(&mut self) -> (Slot, bool) { + let slot_id = self.slots.next(); + let serie = self + .series + .entry(slot_id) + .or_insert_with(|| VecDeque::with_capacity(2 * self.reconnect_warn_threshold)); + serie.push_back(I::now()); + + let max_recent_reconnections = self.count_recent_reconnections(); + + let mut warning = false; + + if max_recent_reconnections >= self.reconnect_warn_threshold { + let now = I::now(); + if self.last_warning.is_none() + || now.duration_since(self.last_warning.unwrap()) > self.quiet_window + { + warning = true; + self.last_warning = Some(now); + } + } + (slot_id, warning) + } + + fn track_closed(&mut self, slot_id: Slot) { + self.slots.restore(slot_id); + } +} + +static ACTIVE_SENDERS: LazyLock> = LazyLock::new(|| { + Mutex::new(ActiveSenders::new( + Duration::from_secs(5), + 25, // reconnections + Duration::from_secs(10 * 60), + )) +}); + +#[no_mangle] +pub extern "C" fn qdb_active_senders_track_established(warn: *mut c_int) -> Slot { + let mut active_senders = ACTIVE_SENDERS.lock().unwrap(); + let (slot_id, warning) = active_senders.track_established(); + unsafe { + *warn = warning as c_int; + } + slot_id +} + +#[no_mangle] +pub extern "C" fn qdb_active_senders_track_closed(slot_id: Slot) { + let mut active_senders = ACTIVE_SENDERS.lock().unwrap(); + active_senders.track_closed(slot_id); +} + +#[cfg(test)] +mod tests { + use std::cell::Cell; + + use super::*; + use rand::seq::SliceRandom; + + fn assert_slots_state(slots: &Slots, next_id: Slot, returned: &[Slot]) { + assert_eq!(slots.next_slot, next_id); + assert_eq!(slots.returned.len(), returned.len()); + for (i, &slot) in returned.iter().enumerate() { + assert_eq!(slots.returned[i], slot); + } + } + + /// Test the slots, last-out-first-in usage pattern. + #[test] + fn test_slots_lofi() { + let mut slots = Slots::new(); + assert_slots_state(&slots, 0, &[]); + assert_eq!(slots.next(), 0); + assert_slots_state(&slots, 1, &[]); + assert_eq!(slots.next(), 1); + assert_slots_state(&slots, 2, &[]); + assert_eq!(slots.next(), 2); + assert_slots_state(&slots, 3, &[]); + assert_eq!(slots.next(), 3); + assert_slots_state(&slots, 4, &[]); + slots.restore(3); + assert_slots_state(&slots, 3, &[]); + slots.restore(2); + assert_slots_state(&slots, 2, &[]); + slots.restore(1); + assert_slots_state(&slots, 1, &[]); + slots.restore(0); + assert_slots_state(&slots, 0, &[]); + } + + /// Test the slots, last-out-last-in usage pattern. + #[test] + fn test_slots_loli() { + let mut slots = Slots::new(); + assert_eq!(slots.next(), 0); + assert_eq!(slots.next(), 1); + assert_eq!(slots.next(), 2); + assert_eq!(slots.next(), 3); + + slots.restore(0); + assert_slots_state(&slots, 4, &[0]); + slots.restore(1); + assert_slots_state(&slots, 4, &[0, 1]); + slots.restore(2); + assert_slots_state(&slots, 4, &[0, 1, 2]); + slots.restore(3); + assert_slots_state(&slots, 0, &[]); + } + + /// Tests the slots in twos. + #[test] + fn test_slot_gaps() { + let mut slots = Slots::new(); + + assert_eq!(slots.next(), 0); + assert_eq!(slots.next(), 1); + assert_slots_state(&slots, 2, &[]); + slots.restore(0); + assert_slots_state(&slots, 2, &[0]); + + assert_eq!(slots.next(), 0); + assert_eq!(slots.next(), 2); + assert_eq!(slots.next(), 3); + assert_eq!(slots.next(), 4); + assert_slots_state(&slots, 5, &[]); + slots.restore(1); + assert_slots_state(&slots, 5, &[1]); + slots.restore(3); + assert_slots_state(&slots, 5, &[1, 3]); // gap in the returned sequence + + slots.restore(4); + assert_slots_state(&slots, 3, &[1]); + + slots.restore(2); + assert_slots_state(&slots, 1, &[]); + + slots.restore(0); + } + + #[test] + fn test_slots_random() { + for _ in 0..100 { + let mut slots = Slots::new(); + + let mut acquired = (0..50).map(|_| slots.next()).collect::>(); + assert_slots_state(&slots, 50, &[]); + assert_eq!(acquired.len(), 50); + + let mut rng = rand::rng(); + acquired.shuffle(&mut rng); + + for &slot in &acquired { + slots.restore(slot); + } + assert_slots_state(&slots, 0, &[]); + } + } + + thread_local! { + // Storing time as milliseconds + static NEXT_MOCK_INSTANT_VALUE: Cell = const { Cell::new(0) }; + } + + fn reset_mock_instant() { + // We initialize with a large enough value to avoid subtraction underflow + // issues where `ActiveSenders` needs to calculate a duration in the past. + NEXT_MOCK_INSTANT_VALUE.set(1000000000u64); + } + + fn advance_mock_instant(time: Duration) { + NEXT_MOCK_INSTANT_VALUE.with(|v| { + let new_value = v.get() + time.as_millis() as u64; + v.set(new_value); + }); + } + + #[derive(Clone, Copy, PartialEq, Eq, PartialOrd, Ord)] + struct MockInstant { + millis: u64, + } + + impl Debug for MockInstant { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!(f, "{}ms", self.millis) + } + } + + impl Sub for MockInstant { + type Output = Self; + + fn sub(self, rhs: Duration) -> Self::Output { + MockInstant { + millis: self.millis - rhs.as_millis() as u64, + } + } + } + + impl InstantLike for MockInstant { + fn now() -> Self { + MockInstant { + millis: NEXT_MOCK_INSTANT_VALUE.get(), + } + } + + fn duration_since(&self, earlier: Self) -> Duration { + Duration::from_millis(self.millis - earlier.millis) + } + } + + #[test] + fn test_active_senders_4_independent() { + // We connect 4 independent senders in a 300ms window. + // This will not trigger a warning. + + reset_mock_instant(); + let mut active_senders = + ActiveSenders::::new(Duration::from_secs(5), 3, Duration::from_secs(60)); + assert_eq!(active_senders.track_established(), (0, false)); + + advance_mock_instant(Duration::from_millis(100)); + assert_eq!(active_senders.track_established(), (1, false)); + + advance_mock_instant(Duration::from_millis(100)); + assert_eq!(active_senders.track_established(), (2, false)); + + advance_mock_instant(Duration::from_millis(100)); + assert_eq!(active_senders.track_established(), (3, false)); + + active_senders.track_closed(1); + active_senders.track_closed(2); + + advance_mock_instant(Duration::from_millis(100)); // first reconnection, no trigger + assert_eq!(active_senders.track_established(), (1, false)); + + active_senders.track_closed(3); + active_senders.track_closed(4); + active_senders.track_closed(1); + active_senders.track_closed(2); + } + + #[test] + fn test_active_senders_fast_reconnect() { + reset_mock_instant(); + let mut active_senders = + ActiveSenders::::new(Duration::from_secs(5), 3, Duration::from_secs(60)); + + assert_eq!(active_senders.track_established(), (0, false)); + active_senders.track_closed(0); + + advance_mock_instant(Duration::from_millis(100)); + assert_eq!(active_senders.track_established(), (0, false)); + active_senders.track_closed(0); + + advance_mock_instant(Duration::from_millis(100)); + assert_eq!(active_senders.track_established(), (0, true)); // warn, 3rd reconnect within 5s + active_senders.track_closed(0); + + advance_mock_instant(Duration::from_millis(100)); + assert_eq!(active_senders.track_established(), (0, false)); // suppress warning + active_senders.track_closed(0); + + advance_mock_instant(active_senders.quiet_window); + + assert_eq!(active_senders.track_established(), (0, false)); + + advance_mock_instant(Duration::from_millis(100)); + assert_eq!(active_senders.track_established(), (1, false)); // new slot ID should not affect logic! + + active_senders.track_closed(0); + + advance_mock_instant(Duration::from_millis(100)); + assert_eq!(active_senders.track_established(), (0, false)); + active_senders.track_closed(0); + + advance_mock_instant(Duration::from_millis(100)); + assert_eq!(active_senders.track_established(), (0, true)); // warn, 3rd reconnect within 5s + } + + #[test] + fn test_active_senders_slow_reconnect() { + reset_mock_instant(); + let mut active_senders = + ActiveSenders::::new(Duration::from_secs(5), 3, Duration::from_secs(60)); + + // Ten times: Two reconnects, then a big pause. + for _ in 0..10 { + assert_eq!(active_senders.track_established(), (0, false)); + active_senders.track_closed(0); + + advance_mock_instant(Duration::from_millis(100)); + assert_eq!(active_senders.track_established(), (0, false)); + active_senders.track_closed(0); + + advance_mock_instant(active_senders.reconnect_warn_window); + } + } +} diff --git a/rpyutils/src/lib.rs b/rpyutils/src/lib.rs new file mode 100644 index 00000000..32a8c910 --- /dev/null +++ b/rpyutils/src/lib.rs @@ -0,0 +1,26 @@ +/******************************************************************************* + * ___ _ ____ ____ + * / _ \ _ _ ___ ___| |_| _ \| __ ) + * | | | | | | |/ _ \/ __| __| | | | _ \ + * | |_| | |_| | __/\__ \ |_| |_| | |_) | + * \__\_\\__,_|\___||___/\__|____/|____/ + * + * Copyright (c) 2014-2019 Appsicle + * Copyright (c) 2019-2025 QuestDB + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + ******************************************************************************/ + +mod active_senders; +mod pystr_to_utf8; diff --git a/rpyutils/src/pystr_to_utf8.rs b/rpyutils/src/pystr_to_utf8.rs new file mode 100644 index 00000000..259aefb7 --- /dev/null +++ b/rpyutils/src/pystr_to_utf8.rs @@ -0,0 +1,615 @@ +/******************************************************************************* + * ___ _ ____ ____ + * / _ \ _ _ ___ ___| |_| _ \| __ ) + * | | | | | | |/ _ \/ __| __| | | | _ \ + * | |_| | |_| | __/\__ \ |_| |_| | |_) | + * \__\_\\__,_|\___||___/\__|____/|____/ + * + * Copyright (c) 2014-2019 Appsicle + * Copyright (c) 2019-2025 QuestDB + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + ******************************************************************************/ + +use std::ffi::c_char; +use std::slice::from_raw_parts; + +#[allow(non_camel_case_types)] +pub struct qdb_pystr_buf(Vec); + +#[repr(C)] +#[allow(non_camel_case_types)] +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +pub struct qdb_pystr_pos { + pub chain: usize, + pub string: usize, +} + +/// Prepare a new buffer. The buffer must be freed with `qdb_pystr_free`. +/// The `qdb_ucsX_to_utf8` functions will write to this buffer. +#[no_mangle] +pub unsafe extern "C" fn qdb_pystr_buf_new() -> *mut qdb_pystr_buf { + Box::into_raw(Box::new(qdb_pystr_buf(Vec::new()))) +} + +/// Get current position. Use in conjunction with `truncate`. +#[no_mangle] +pub unsafe extern "C" fn qdb_pystr_buf_tell(b: *const qdb_pystr_buf) -> qdb_pystr_pos { + let b = &*b; + let chain_pos = b.0.len(); + let string_pos = if chain_pos > 0 { + b.0[chain_pos - 1].len() + } else { + 0 + }; + qdb_pystr_pos { + chain: chain_pos, + string: string_pos, + } +} + +/// Trim the buffer to the given position. Use in conjunction with `tell`. +#[no_mangle] +pub unsafe extern "C" fn qdb_pystr_buf_truncate(b: *mut qdb_pystr_buf, pos: qdb_pystr_pos) { + let b = &mut *b; + b.0.truncate(pos.chain); + if !b.0.is_empty() { + b.0[pos.chain - 1].truncate(pos.string); + } +} + +/// Reset the converter's buffer to zero length. +#[no_mangle] +pub unsafe extern "C" fn qdb_pystr_buf_clear(b: *mut qdb_pystr_buf) { + let b = &mut *b; + if !b.0.is_empty() { + b.0.truncate(1); + b.0[0].clear(); + } +} + +/// Free the buffer. Must be called after `qdb_pystr_buf_new`. +#[no_mangle] +pub unsafe extern "C" fn qdb_pystr_buf_free(b: *mut qdb_pystr_buf) { + if !b.is_null() { + drop(Box::from_raw(b)); + } +} + +const MIN_BUF_LEN: usize = 1024; + +/// A carefully crafted buffer with spare capacity for `len` bytes. +/// This is necessary to return "stable" addresses and avoid segfaults. +/// Rust is unaware we are borrowing its memory and could try to free it as +/// part of a reallocation if we were to use a `String` directly. +fn get_dest(chain: &mut Vec, len: usize) -> &mut String { + if !chain.is_empty() { + let last = chain.last_mut().unwrap(); + if last.capacity() - last.len() >= len { + return chain.last_mut().unwrap(); + } + } + chain.push(String::with_capacity(std::cmp::max(len, MIN_BUF_LEN))); + chain.last_mut().unwrap() +} + +#[inline(always)] +fn encode_loop<'a, T, F>( + utf8_mult: usize, + chain: &'a mut Vec, + buf: &[T], + get_char: F, +) -> Result<&'a str, u32> +where + F: Fn(T) -> Option, + T: Copy + Into, +{ + let dest = get_dest(chain, utf8_mult * buf.len()); + let last = dest.len(); + // for &b in buf.iter() { + // // Checking for validity is not optional: + // // >>> for n in range(2 ** 16): + // // >>> chr(n).encode('utf-8') + // // UnicodeEncodeError: 'utf-8' codec can't encode character '\ud800' + // // in position 0: surrogates not allowed + // match get_char(b) { + // Some(c) => dest.push(c), + // None => { + // dest.truncate(last); + // return Err(b.into()); + // } + // } + // } + // Ok(&dest[last..]) + unsafe { + let v = dest.as_mut_vec(); + v.set_len(v.capacity()); + let mut index = last; + + for &b in buf.iter() { + let c = match get_char(b) { + Some(c) => c, + None => { + v.set_len(last); + return Err(b.into()); + } + }; + let utf_c_len = c.len_utf8(); + match utf_c_len { + 1 => { + v[index] = c as u8; + } + 2 => { + let mut codepoint_buf = [0; 4]; + let bytes = c.encode_utf8(&mut codepoint_buf).as_bytes(); + *v.get_unchecked_mut(index) = *bytes.get_unchecked(0); + *v.get_unchecked_mut(index + 1) = *bytes.get_unchecked(1); + } + 3 => { + let mut codepoint_buf = [0; 4]; + let bytes = c.encode_utf8(&mut codepoint_buf).as_bytes(); + *v.get_unchecked_mut(index) = *bytes.get_unchecked(0); + *v.get_unchecked_mut(index + 1) = *bytes.get_unchecked(1); + *v.get_unchecked_mut(index + 2) = *bytes.get_unchecked(2); + } + 4 => { + let mut codepoint_buf = [0; 4]; + let bytes = c.encode_utf8(&mut codepoint_buf).as_bytes(); + *v.get_unchecked_mut(index) = *bytes.get_unchecked(0); + *v.get_unchecked_mut(index + 1) = *bytes.get_unchecked(1); + *v.get_unchecked_mut(index + 2) = *bytes.get_unchecked(2); + *v.get_unchecked_mut(index + 3) = *bytes.get_unchecked(3); + } + _ => unreachable!(), + } + index += utf_c_len; + } + v.set_len(index); + } + Ok(&dest[last..]) +} + +/// Convert a Py_UCS1 string to UTF-8. +/// Returns a `buf_out` borrowed ptr of `size_out` len. +/// The buffer is borrowed from `b`. +#[no_mangle] +pub unsafe extern "C" fn qdb_ucs1_to_utf8( + b: *mut qdb_pystr_buf, + count: usize, + input: *const u8, + size_out: *mut usize, + buf_out: *mut *const c_char, +) { + let b = &mut *b; + let i = from_raw_parts(input, count); + + // len(chr(2 ** 8 - 1).encode('utf-8')) == 2 + let utf8_mult = 2; + let res = encode_loop(utf8_mult, &mut b.0, i, |c| Some(c as char)).unwrap(); + *size_out = res.len(); + *buf_out = res.as_ptr() as *const c_char; +} + +/// Convert a Py_UCS2 string to UTF-8. +/// Returns a `buf_out` borrowed ptr of `size_out` len. +/// The buffer is borrowed from `b`. +/// In case of errors, returns `false` and bad_codepoint_out is set to the +/// offending codepoint. +#[no_mangle] +pub unsafe extern "C" fn qdb_ucs2_to_utf8( + b: *mut qdb_pystr_buf, + count: usize, + input: *const u16, + size_out: *mut usize, + buf_out: *mut *const c_char, + bad_codepoint_out: *mut u32, +) -> bool { + let b = &mut *b; + let i = from_raw_parts(input, count); + + // len(chr(2 ** 16 - 1).encode('utf-8')) == 3 + let utf8_mult = 3; + let res = encode_loop(utf8_mult, &mut b.0, i, |c| char::from_u32(c as u32)); + match res { + Ok(s) => { + *size_out = s.len(); + *buf_out = s.as_ptr() as *const c_char; + true + } + Err(bad) => { + *bad_codepoint_out = bad; + false + } + } +} + +/// Convert a Py_UCS4 string to UTF-8. +/// Returns a `buf_out` borrowed ptr of `size_out` len. +/// The buffer is borrowed from `b`. +/// In case of errors, returns `false` and bad_codepoint_out is set to the +/// offending codepoint. +#[no_mangle] +pub unsafe extern "C" fn qdb_ucs4_to_utf8( + b: *mut qdb_pystr_buf, + count: usize, + input: *const u32, + size_out: *mut usize, + buf_out: *mut *const c_char, + bad_codepoint_out: *mut u32, +) -> bool { + let b = &mut *b; + let i = from_raw_parts(input, count); + + // Max 4 bytes allowed by RFC: https://www.rfc-editor.org/rfc/rfc3629#page-4 + let utf8_mult = 4; + let res = encode_loop(utf8_mult, &mut b.0, i, char::from_u32); + match res { + Ok(s) => { + *size_out = s.len(); + *buf_out = s.as_ptr() as *const c_char; + true + } + Err(bad) => { + *bad_codepoint_out = bad; + false + } + } +} + +#[cfg(test)] +mod tests { + use super::*; + + struct Buf { + buf: *mut qdb_pystr_buf, + } + + impl Buf { + fn new() -> Self { + Self { + buf: unsafe { qdb_pystr_buf_new() }, + } + } + + fn chain(&self) -> &Vec { + unsafe { &(*self.buf).0 } + } + + fn chain_mut(&mut self) -> &mut Vec { + unsafe { &mut (*self.buf).0 } + } + + fn clear(&mut self) { + unsafe { qdb_pystr_buf_clear(self.buf) } + } + + fn tell(&self) -> qdb_pystr_pos { + unsafe { qdb_pystr_buf_tell(self.buf) } + } + + fn truncate(&mut self, pos: qdb_pystr_pos) { + unsafe { qdb_pystr_buf_truncate(self.buf, pos) } + } + + fn ucs1_to_utf8(&mut self, input: &[u8]) -> &'static str { + let mut size_out = 0; + let mut buf_out = std::ptr::null(); + unsafe { + qdb_ucs1_to_utf8( + self.buf, + input.len(), + input.as_ptr(), + &mut size_out, + &mut buf_out, + ); + } + let slice = unsafe { from_raw_parts(buf_out as *const u8, size_out) }; + std::str::from_utf8(slice).unwrap() + } + + fn ucs2_to_utf8(&mut self, input: &[u16]) -> Result<&'static str, u32> { + let mut size_out = 0; + let mut buf_out = std::ptr::null(); + let mut bad_codepoint = 0u32; + let ok = unsafe { + qdb_ucs2_to_utf8( + self.buf, + input.len(), + input.as_ptr(), + &mut size_out, + &mut buf_out, + &mut bad_codepoint, + ) + }; + if ok { + let slice = unsafe { from_raw_parts(buf_out as *const u8, size_out) }; + let msg = std::str::from_utf8(slice).unwrap(); + Ok(msg) + } else { + Err(bad_codepoint) + } + } + + fn ucs4_to_utf8(&mut self, input: &[u32]) -> Result<&'static str, u32> { + let mut size_out = 0; + let mut buf_out = std::ptr::null(); + let mut bad_codepoint = 0u32; + let ok = unsafe { + qdb_ucs4_to_utf8( + self.buf, + input.len(), + input.as_ptr(), + &mut size_out, + &mut buf_out, + &mut bad_codepoint, + ) + }; + if ok { + let slice = unsafe { from_raw_parts(buf_out as *const u8, size_out) }; + let msg = std::str::from_utf8(slice).unwrap(); + Ok(msg) + } else { + Err(bad_codepoint) + } + } + } + + impl Drop for Buf { + fn drop(&mut self) { + unsafe { + qdb_pystr_buf_free(self.buf); + } + } + } + + #[test] + fn test_empty() { + let b = Buf::new(); + assert_eq!(b.chain().len(), 0); + let pos = b.tell(); + assert_eq!(pos.chain, 0); + assert_eq!(pos.string, 0); + } + + #[test] + fn test_ucs1() { + let mut b = Buf::new(); + let s1 = b.ucs1_to_utf8(b"hello"); + assert_eq!(s1, "hello"); + assert_eq!(b.chain_mut().len(), 1); + assert_eq!(b.chain_mut()[0].as_str().as_ptr(), s1.as_ptr()); + assert_eq!(b.chain()[0], "hello"); + assert_eq!(b.tell().chain, 1); + assert_eq!(b.tell().string, 5); + b.clear(); + assert_eq!(b.chain().len(), 1); + assert_eq!(b.chain()[0], ""); + let s2 = b.ucs1_to_utf8(b""); + assert_eq!(s2, ""); + assert_eq!( + b.tell(), + qdb_pystr_pos { + chain: 1, + string: 0 + } + ); + assert_eq!(s2.as_ptr(), b.chain()[0].as_str().as_ptr()); + let s3 = b.ucs1_to_utf8(b"10\xb5"); + assert_eq!(s3, "10µ"); + assert_eq!(s3.len(), 4); // 3 bytes in UCS-1, 4 bytes in UTF-8. + assert_eq!(b.chain().len(), 1); + assert_eq!(s3.as_ptr(), unsafe { + b.chain()[0].as_str().as_ptr().add(s2.len()) + }); + assert_eq!( + b.tell(), + qdb_pystr_pos { + chain: 1, + string: s2.len() + s3.len() + } + ); + } + + #[test] + fn test_resize_and_truncate() { + let mut b = Buf::new(); + let s1 = b.ucs1_to_utf8(b"abcdefghijklmnopqrstuvwxyz"); + assert_eq!(s1, "abcdefghijklmnopqrstuvwxyz"); + assert_eq!(b.chain_mut().len(), 1); + assert_eq!(b.chain_mut()[0].as_str().as_ptr(), s1.as_ptr()); + + let big_string = "hello world".repeat(1000); + assert!(big_string.len() > MIN_BUF_LEN); + let s2 = b.ucs1_to_utf8(big_string.as_bytes()); + assert_eq!(s2, big_string); + assert_eq!(b.chain_mut().len(), 2); + assert_eq!(b.chain_mut()[0].as_str().as_ptr(), s1.as_ptr()); + assert_eq!(b.chain_mut()[1].as_str().as_ptr(), s2.as_ptr()); + assert_eq!( + b.tell(), + qdb_pystr_pos { + chain: 2, + string: 11000 + } + ); + b.truncate(b.tell()); + assert_eq!( + b.tell(), + qdb_pystr_pos { + chain: 2, + string: 11000 + } + ); + + let spare = b.chain_mut()[1].capacity() - b.chain_mut()[1].len(); + assert!(spare > 4); + + let test_string = "ab"; + let s3 = b.ucs1_to_utf8(test_string.as_bytes()); + assert_eq!(s3, test_string); + assert_eq!(b.chain_mut().len(), 2); + assert_eq!(b.chain_mut()[0].as_str().as_ptr(), s1.as_ptr()); + assert_eq!(b.chain_mut()[1].as_str().as_ptr(), s2.as_ptr()); + assert_eq!( + b.tell(), + qdb_pystr_pos { + chain: 2, + string: 11000 + test_string.len() + } + ); + } + + #[test] + fn test_ucs2() { + let mut b = Buf::new(); + + // We first check code points within the ASCII range. + let s1 = b.ucs2_to_utf8(&[0x61, 0x62, 0x63, 0x64, 0x65]).unwrap(); + assert_eq!(s1, "abcde"); + assert_eq!(s1.len(), 5); + + // Now chars outside ASCII range, but within UCS-1 range. + // These will yield two bytes each in UTF-8. + let s2 = b.ucs2_to_utf8(&[0x00f0, 0x00e3, 0x00b5, 0x00b6]).unwrap(); + assert_eq!(s2, "ðãµ¶"); + assert_eq!(s2.len(), 8); + + // Now chars that actually require two bytes in UCS-2, but also fit in + // two bytes in UTF-8. + let s3 = b.ucs2_to_utf8(&[0x0100, 0x069c]).unwrap(); + assert_eq!(s3, "Āڜ"); + assert_eq!(s3.len(), 4); + + // Now chars that require two bytes in UCS-2 and 3 bytes in UTF-8. + let s4 = b.ucs2_to_utf8(&[0x569c, 0xa4c2]).unwrap(); + assert_eq!(s4, "嚜꓂"); + assert_eq!(s4.len(), 6); + + // Quick check that we're just writing to the same buffer. + assert_eq!( + b.tell(), + qdb_pystr_pos { + chain: 1, + string: [s1, s2, s3, s4].iter().map(|s| s.len()).sum() + } + ); + + // Now we finally check that errors are captured. + // For this, we use a code point which is valid in a Python string + // (in UCS-2), but which is not valid when encoded as UTF-8. + // >>> chr(0xd800).encode('utf-8') + // Traceback (most recent call last): + // File "", line 1, in + // UnicodeEncodeError: 'utf-8' codec can't encode character '\ud800' + // in position 0: surrogates not allowed + let before_pos = b.tell(); + let s5 = b.ucs2_to_utf8(&[0x061, 0xd800]); + assert!(s5.is_err()); + assert_eq!(s5.unwrap_err(), 0xd800_u32); + + // Even though 0x061 (ASCII char 'a') was valid and successfully encoded, + // we also want to be sure that the buffer was not modified and appended to. + assert_eq!(b.tell(), before_pos); + + // Now we check that the buffer is still in a valid state. + let s6 = b.ucs2_to_utf8(&[0x062, 0x063]).unwrap(); + assert_eq!(s6, "bc"); + assert_eq!( + b.tell(), + qdb_pystr_pos { + chain: 1, + string: [s1, s2, s3, s4, s6].iter().map(|s| s.len()).sum() + } + ); + } + + #[test] + fn test_ucs4() { + let mut b = Buf::new(); + + // We first check code points within the ASCII range. + let s1 = b.ucs4_to_utf8(&[0x61, 0x62, 0x63, 0x64, 0x65]).unwrap(); + assert_eq!(s1, "abcde"); + assert_eq!(s1.len(), 5); + + // Now chars outside ASCII range, but within UCS-1 range. + // These will yield two bytes each in UTF-8. + let s2 = b.ucs4_to_utf8(&[0x00f0, 0x00e3, 0x00b5, 0x00b6]).unwrap(); + assert_eq!(s2, "ðãµ¶"); + assert_eq!(s2.len(), 8); + + // Now chars that actually require two bytes in UCS-2, but also fit in + // two bytes in UTF-8. + let s3 = b.ucs4_to_utf8(&[0x0100, 0x069c]).unwrap(); + assert_eq!(s3, "Āڜ"); + assert_eq!(s3.len(), 4); + + // Now chars that require two bytes in UCS-2 and 3 bytes in UTF-8. + let s4 = b.ucs4_to_utf8(&[0x569c, 0xa4c2]).unwrap(); + assert_eq!(s4, "嚜꓂"); + assert_eq!(s4.len(), 6); + + // Now chars that require four bytes in UCS-4 and 4 bytes in UTF-8. + let s5 = b.ucs4_to_utf8(&[0x1f4a9, 0x1f99e]).unwrap(); + assert_eq!(s5, "💩🦞"); + assert_eq!(s5.len(), 8); + + // Quick check that we're just writing to the same buffer. + assert_eq!( + b.tell(), + qdb_pystr_pos { + chain: 1, + string: [s1, s2, s3, s4, s5].iter().map(|s| s.len()).sum() + } + ); + + // Now we finally check that errors are captured. + // For this, we use a code point which is valid in a Python string + // (in UCS-4), but which is not valid when encoded as UTF-8. + // >>> chr(0xd800).encode('utf-8') + // Traceback (most recent call last): + // File "", line 1, in + // UnicodeEncodeError: 'utf-8' codec can't encode character '\ud800' + // in position 0: surrogates not allowed + let before_pos = b.tell(); + let s6 = b.ucs4_to_utf8(&[0x061, 0xd800]); + assert!(s6.is_err()); + assert_eq!(s6.unwrap_err(), 0xd800_u32); + + // Even though 0x061 (ASCII char 'a') was valid and successfully encoded, + // we also want to be sure that the buffer was not modified and appended to. + assert_eq!(b.tell(), before_pos); + + // We repeat the same with chars with code points higher than the u16 type. + let before_pos = b.tell(); + let s7 = b.ucs4_to_utf8(&[0x061, 0x110000]); + assert!(s7.is_err()); + assert_eq!(s7.unwrap_err(), 0x110000); + + // Even though 0x061 (ASCII char 'a') was valid and successfully encoded, + // we also want to be sure that the buffer was not modified and appended to. + assert_eq!(b.tell(), before_pos); + + // Now we check that the buffer is still in a valid state. + let s8 = b.ucs4_to_utf8(&[0x062, 0x063]).unwrap(); + assert_eq!(s8, "bc"); + assert_eq!( + b.tell(), + qdb_pystr_pos { + chain: 1, + string: [s1, s2, s3, s4, s5, s8].iter().map(|s| s.len()).sum() + } + ); + } +} diff --git a/setup.py b/setup.py index 0f1e6efe..6ca8c32f 100755 --- a/setup.py +++ b/setup.py @@ -41,17 +41,17 @@ def ingress_extension(): extra_objects = [] questdb_rs_ffi_dir = PROJ_ROOT / 'c-questdb-client' / 'questdb-rs-ffi' - pystr_to_utf8_dir = PROJ_ROOT / 'pystr-to-utf8' + rpyutils_dir = PROJ_ROOT / 'rpyutils' questdb_client_lib_dir = None - pystr_to_utf8_lib_dir = None + rpyutils_lib_dir = None if PLATFORM == 'win32' and MODE == '32bit': questdb_client_lib_dir = \ questdb_rs_ffi_dir / 'target' / WIN_32BIT_CARGO_TARGET / 'release' - pystr_to_utf8_lib_dir = \ - pystr_to_utf8_dir / 'target' / WIN_32BIT_CARGO_TARGET / 'release' + rpyutils_lib_dir = \ + rpyutils_dir / 'target' / WIN_32BIT_CARGO_TARGET / 'release' else: questdb_client_lib_dir = questdb_rs_ffi_dir / 'target' / 'release' - pystr_to_utf8_lib_dir = pystr_to_utf8_dir / 'target' / 'release' + rpyutils_lib_dir = rpyutils_dir / 'target' / 'release' if INSTRUMENT_FUZZING: extra_compile_args.append('-fsanitize=fuzzer-no-link') extra_link_args.append('-fsanitize=fuzzer-no-link') @@ -77,14 +77,14 @@ def ingress_extension(): str(loc / f'{lib_prefix}{name}{lib_suffix}') for loc, name in ( (questdb_client_lib_dir, 'questdb_client'), - (pystr_to_utf8_lib_dir, 'pystr_to_utf8'))] + (rpyutils_lib_dir, 'rpyutils'))] return Extension( "questdb.ingress", ["src/questdb/ingress.pyx"], include_dirs=[ "c-questdb-client/include", - "pystr-to-utf8/include", + "rpyutils/include", np.get_include()], library_dirs=lib_paths, libraries=libraries, @@ -150,7 +150,7 @@ def cargo_build(): subprocess.check_call( cargo_args, - cwd=str(PROJ_ROOT / 'pystr-to-utf8'), + cwd=str(PROJ_ROOT / 'rpyutils'), env=env) diff --git a/src/questdb/dataframe.md b/src/questdb/dataframe.md index 0fb3e2df..5d42d389 100644 --- a/src/questdb/dataframe.md +++ b/src/questdb/dataframe.md @@ -344,7 +344,7 @@ but this would require invoking the Python interpreter and the creation of a gargantuan amount of little temporary objects. This is such a common use case that we do the encoding in a supporting Rust -library. See `pystr-to-utf8` in the source tree. +library. See `rpyutils/src/pystr_to_utf8.rs` in the source tree. It accumulates strings in a address-stable buffer (internally a `Vec`) and allows us to borrow its memory. diff --git a/src/questdb/ingress.pyx b/src/questdb/ingress.pyx index 881bd3b6..0cb6041d 100644 --- a/src/questdb/ingress.pyx +++ b/src/questdb/ingress.pyx @@ -41,6 +41,7 @@ __all__ = [ 'TimestampMicros', 'TimestampNanos', 'TlsCa', + 'WARN_HIGH_RECONNECTS' ] # For prototypes: https://github.com/cython/cython/tree/master/Cython/Includes @@ -61,7 +62,7 @@ from cpython.buffer cimport Py_buffer, PyObject_CheckBuffer, \ from cpython.memoryview cimport PyMemoryView_FromMemory from .line_sender cimport * -from .pystr_to_utf8 cimport * +from .rpyutils cimport * from .conf_str cimport * from .arrow_c_data_interface cimport * from .extra_cpython cimport * @@ -105,111 +106,7 @@ cnp.import_array() # .bumpversion.cfg. VERSION = '3.0.0rc1' - -_SENDER_RECONNECT_WARN_THRESHOLD = 25 # reconnections -_SENDER_RECONNECT_WARN_WINDOW_NS = 5_000_000_000 # 5 seconds in nanoseconds - - -class _ActiveSenders: - def __init__(self): - self._lock = threading.Lock() - - # The slots fields manage a pool of unsigned integer slot IDs. These slot IDs are: - # * Always non-negative integers (starting from 0). - # * Reused when returned. - # * Allocated in the lowest-available order to keep them compact. - self._next_slot = 0 # Next available slot ID in the linear range. - self._returned_slots = [] # I.e. "holes" in the range `0..self._next_slot`. - - # Tracked established/closed connection events. - # Keys are slot IDs, which are always non-negative integers. - # Values are `collections.deque(maxlen=100)` containing established `time.monotonic_ns()` timestamps. - self._series = {} - - # Timestamp of last warning (monotonic_ns) - self._last_warning_ns = None # Track last warning time (monotonic_ns) - - def _get_next_slot(self) -> int: - # Always called with a lock held. - if self._returned_slots: - return heapq.heappop(self._returned_slots) - else: - self._next_slot += 1 - return self._next_slot - 1 - - def _return_slot(self, slot_id): - # Always called with a lock held. - if slot_id == self._next_slot - 1: - # Not optimal since we're not dealing with "trailing" slots, - # but at least the code is simple :-) - self._next_slot -= 1 - else: - heapq.heappush(self._returned_slots, slot_id) - - def _count_recent_reconnections(self, window_ns) -> int: - """ - Return the number of sender connections established within the last `window_ns` window. - Each slot's most recent establishment is counted if it falls within the window. - """ - # Always called with a lock held. - now = time.monotonic_ns() - cutoff = now - window_ns - max_count = 0 - to_delete = [] - for slot_id, serie in self._series.items(): - while serie and serie[0] < cutoff: - serie.popleft() - count = len(serie) - if not serie: - to_delete.append(slot_id) - elif count > max_count: - max_count = count - for slot_id in to_delete: - del self._series[slot_id] - return max_count - - def track_established(self) -> int: - """ - Track a sender connection event (threadsafe). - """ - with self._lock: - slot_id = self._get_next_slot() - serie = self._series.setdefault(slot_id, collections.deque(maxlen=100)) - serie.append(time.monotonic_ns()) - - max_recent_reconnections = self._count_recent_reconnections( - _SENDER_RECONNECT_WARN_WINDOW_NS) - - if max_recent_reconnections >= _SENDER_RECONNECT_WARN_THRESHOLD: - now = time.monotonic_ns() - # 10 minutes in nanoseconds - min_rewarn_interval_ns = 10 * 60 * 1_000_000_000 - no_recent_warnings = self._last_warning_ns is None or \ - (now - self._last_warning_ns > min_rewarn_interval_ns) - if no_recent_warnings: - warnings.warn( - "questdb.ingress.Sender: " - f"Detected {max_recent_reconnections} reconnections " - f"within the last {_SENDER_RECONNECT_WARN_WINDOW_NS / 1_000_000_000} seconds. " - "This may indicate an inefficient coding pattern where the sender is " - "frequently created and destroyed. " - "Consider reusing sender instance whenever possible.", - UserWarning, - stacklevel=2 - ) - self._last_warning_ns = now - return slot_id - - def track_closed(self, slot_id: int): - """ - Track a sender connection closed event (threadsafe). - """ - with self._lock: - self._return_slot(slot_id) - - -_ACTIVE_SENDERS = _ActiveSenders() - +WARN_HIGH_RECONNECTS = True cdef bint _has_gil(PyThreadState** gs): @@ -2545,7 +2442,19 @@ cdef class Sender: self._last_flush_ms[0] = line_sender_now_micros() // 1000 # Track and warn about overly quick reconnections to the server. - self._slot_id = _ACTIVE_SENDERS.track_established() + cdef bint warn = False + if WARN_HIGH_RECONNECTS: + self._slot_id = qdb_active_senders_track_established(&warn) + if warn: + warnings.warn( + "questdb.ingress.Sender: " + f"Detected a burst of reconnections. " + "This may indicate an inefficient coding pattern where the sender is " + "frequently created and destroyed. " + "Consider reusing sender instance whenever possible.", + UserWarning, + stacklevel=1 + ) def __enter__(self) -> Sender: """Call :func:`Sender.establish` at the start of a ``with`` block.""" @@ -2790,7 +2699,7 @@ cdef class Sender: line_sender_close(self._impl) self._impl = NULL if self._slot_id != -1: - _ACTIVE_SENDERS.track_closed(self._slot_id) + qdb_active_senders_track_closed(self._slot_id) self._slot_id = -1 cpdef close(self, bint flush=True): diff --git a/src/questdb/pystr_to_utf8.pxd b/src/questdb/rpyutils.pxd similarity index 93% rename from src/questdb/pystr_to_utf8.pxd rename to src/questdb/rpyutils.pxd index 1822e26c..eaec1b7f 100644 --- a/src/questdb/pystr_to_utf8.pxd +++ b/src/questdb/rpyutils.pxd @@ -1,7 +1,7 @@ from libc.stdint cimport int8_t, int16_t, int32_t, int64_t, intptr_t from libc.stdint cimport uint8_t, uint16_t, uint32_t, uint64_t, uintptr_t -cdef extern from "pystr_to_utf8.h": +cdef extern from "rpyutils.h": cdef struct qdb_pystr_buf: pass @@ -58,3 +58,7 @@ cdef extern from "pystr_to_utf8.h": size_t *size_out, const char **buf_out, uint32_t *bad_codepoint_out) + + uint32_t qdb_active_senders_track_established(bint *warn) + + void qdb_active_senders_track_closed(uint32_t slot) From 3eb128eb1d47e9c9f76c06ff0b3b7b77abb1fd36 Mon Sep 17 00:00:00 2001 From: Adam Cimarosti Date: Mon, 23 Jun 2025 23:22:43 +0100 Subject: [PATCH 21/30] oops, I'd missed one.. --- proj.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/proj.py b/proj.py index becb26a6..2f27c966 100755 --- a/proj.py +++ b/proj.py @@ -87,7 +87,7 @@ def build_fuzzing(): @command def test(all=False, patch_path='1', *args): - _run('cargo', 'test', cwd=PROJ_ROOT / 'pystr-to-utf8') + _run('cargo', 'test', cwd=PROJ_ROOT / 'rpyutils') env = {'TEST_QUESTDB_PATCH_PATH': patch_path} if _arg2bool(all): env['TEST_QUESTDB_INTEGRATION'] = '1' From 004038150029922fe67f87e8686ed6c042ff7aff Mon Sep 17 00:00:00 2001 From: Adam Cimarosti Date: Tue, 24 Jun 2025 10:49:24 +0100 Subject: [PATCH 22/30] Written changelog, bumped version --- .bumpversion.toml | 2 +- CHANGELOG.rst | 64 +++++++++++++++++++++++++++++++++++++++++ docs/conf.py | 2 +- pyproject.toml | 2 +- setup.py | 2 +- src/questdb/__init__.py | 2 +- src/questdb/ingress.pyx | 2 +- 7 files changed, 70 insertions(+), 6 deletions(-) diff --git a/.bumpversion.toml b/.bumpversion.toml index 1417a603..afd41c23 100644 --- a/.bumpversion.toml +++ b/.bumpversion.toml @@ -1,5 +1,5 @@ [tool.bumpversion] -current_version = "3.0.0rc1" +current_version = "3.0.0" commit = false tag = false diff --git a/CHANGELOG.rst b/CHANGELOG.rst index 3598c1f8..aee39b0b 100644 --- a/CHANGELOG.rst +++ b/CHANGELOG.rst @@ -5,6 +5,70 @@ Changelog ========= +3.0.0 (2025-xx-xx) +------------------ + +Features +~~~~~~~~ + +This is the first major release of the QuestDB Python client library +which supports n-dimensional arrays of doubles for QuestDB servers 8.4.0 and up. + +.. code-block:: python + + import numpy as np + + # Create 2D numpy array + array_2d = np.array([ + [1.1, 2.2, 3.3], + [4.4, 5.5, 6.6]], dtype=np.float64) + + sender.row( + 'table', + columns={'array_2d': array_2d}, + at=timestamp) + +The array data is sent over a new protocol version (2) that is auto-negotiated +when using HTTP(s), or can be specified explicitly via the ``protocol_version=2`` +parameter when using TCP(s). + +We recommend using HTTP(s), but here is an TCP example, should you need it:: + + tcp::addr=localhost:9009;protocol_version=2; + +When using ``protocol_version=2`` (with either TCP(s) or HTTP(s)), the sender +will now also serialize ``float`` (double-precision) columns as binary. +You might see a performance uplift if this is a dominant data type in your +ingestion workload. + +When compared to 2.0.4, this release includes all the changes from 3.0.0rc1 and +additionally: + +* Has optimised ingestion performance from C-style contiguous NumPy arrays. + +* Warns at most every 10 minutes when burst of reconnections are detected. + This is to warn about code patterns that may lead to performance issues, such as + + ..code-block:: python + + # Don't do this! Sender objects should be reused. + for row_fields in data: + with Sender.from_conf(conf) as sender: + sender.row(**row_fields) + + This feature can be disabled in code by setting: + + .. code-block:: python + + import questdb.ingress as qi + qi.WARN_HIGH_RECONNECTS = False + +Breaking Changes +~~~~~~~~~~~~~~~~ +Refer to the release notes for 3.0.0rc1 for the breaking changes introduced +in this release compared to 2.x.x. + + 3.0.0rc1 (2025-06-02) --------------------- diff --git a/docs/conf.py b/docs/conf.py index c50d9dc0..3fce6413 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -28,7 +28,7 @@ year = '2024' author = 'QuestDB' copyright = '{0}, {1}'.format(year, author) -version = release = '3.0.0rc1' +version = release = '3.0.0' github_repo_url = 'https://github.com/questdb/py-questdb-client' diff --git a/pyproject.toml b/pyproject.toml index 1ed3bfd8..9971bad1 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -2,7 +2,7 @@ # See: https://packaging.python.org/en/latest/specifications/declaring-project-metadata/ name = "questdb" requires-python = ">=3.9" -version = "3.0.0rc1" +version = "3.0.0" description = "QuestDB client library for Python" readme = "README.rst" classifiers = [ diff --git a/setup.py b/setup.py index 6ca8c32f..da7b9ecb 100755 --- a/setup.py +++ b/setup.py @@ -171,7 +171,7 @@ def readme(): setup( name='questdb', - version='3.0.0rc1', + version='3.0.0', platforms=['any'], python_requires='>=3.8', install_requires=[], diff --git a/src/questdb/__init__.py b/src/questdb/__init__.py index 3885d125..4eb28e38 100644 --- a/src/questdb/__init__.py +++ b/src/questdb/__init__.py @@ -1 +1 @@ -__version__ = '3.0.0rc1' +__version__ = '3.0.0' diff --git a/src/questdb/ingress.pyx b/src/questdb/ingress.pyx index 0cb6041d..19df89ac 100644 --- a/src/questdb/ingress.pyx +++ b/src/questdb/ingress.pyx @@ -104,7 +104,7 @@ cnp.import_array() # This value is automatically updated by the `bump2version` tool. # If you need to update it, also update the search definition in # .bumpversion.cfg. -VERSION = '3.0.0rc1' +VERSION = '3.0.0' WARN_HIGH_RECONNECTS = True From ffeeebe43e717599b9dabdfd36b4c7ee9289ea5b Mon Sep 17 00:00:00 2001 From: Adam Cimarosti Date: Tue, 24 Jun 2025 12:01:41 +0100 Subject: [PATCH 23/30] doc improvements --- CHANGELOG.rst | 2 +- docs/sender.rst | 92 +++++++++++++++++++++++++++++++++++++++++ src/questdb/ingress.pyx | 3 +- 3 files changed, 95 insertions(+), 2 deletions(-) diff --git a/CHANGELOG.rst b/CHANGELOG.rst index aee39b0b..121c55c0 100644 --- a/CHANGELOG.rst +++ b/CHANGELOG.rst @@ -49,7 +49,7 @@ additionally: * Warns at most every 10 minutes when burst of reconnections are detected. This is to warn about code patterns that may lead to performance issues, such as - ..code-block:: python + .. code-block:: python # Don't do this! Sender objects should be reused. for row_fields in data: diff --git a/docs/sender.rst b/docs/sender.rst index 840c1b1a..c03d29a7 100644 --- a/docs/sender.rst +++ b/docs/sender.rst @@ -404,6 +404,98 @@ Read more setup details in the `Enterprise quickstart `_ and the `role-based access control `_ guides. +.. _sender_good_practices: + +Good Practices +============== + +Create tables in advance +------------------------ + +If you're not happy with the default :ref:`table auto creation ` +logic, create the tables in advance. This will allow you to: + +* Specify the column types explicitly. + +* Configure de-duplication rules for the table. + +Specify your own timestamps +--------------------------- + +Always specify your own timestamps using the ``at`` parameter. + +If you use the ``ServerTimestamp`` option, QuestDB will not be able to +deduplicate rows, should you ever need to send them again. + +Instead, if you don't have an a timestamp immediately available, use +``TimestampNanos.now()`` to set the timestamp to the current time. + +This is lighter-weight than using a fully-fledged ``datetime.datetime`` object. + +Prefer ILP/HTTP +--------------- + +Use the ILP/HTTP protocol instead of ILP/TCP for better error reporting and +transaction control. + +.. _sender_tips_connection_reuse: + +Reuse Sender Objects +-------------------- + +Create longer-lived sender objects, as these are not automatically pooled. + +Instead of creating a new sender object for every request, create a single +sender object and reuse it across multiple requests. + +.. code-block:: python + + from questdb.ingress import Sender + + conf = 'http::addr=localhost:9000;' + with Sender.from_conf(conf) as sender: + # Use the sender object for multiple requests + sender.row(...) + sender.row(...) # remember auto-flush may trigger after any row + sender.row(...) + sender.flush() # you can flush explicitly at any point too + # ... + sender.row(...) + sender.dataframe(...) # auto-flush may trigger within a dataframe too + sender.flush() + +Use transactions +---------------- + +Use ref:`transactions ` if you want to ensure that a group +of rows is sent as a single transaction. + +This feature will guarantee that the rows are sent to the server as one, +even if you're using auto-flushing. + +Tune for Performance +-------------------- + +If you need better performance: + +* Tune for larger batches of rows. Tweak the auto-flush settings, or + call :func:`Sender.flush ` less frequently. + +* Use the :func:`Sender.dataframe ` method To + send dataframes instead of appending rows one by one. + +* Try multi-threading: The ``Sender`` logic is designed to release the Python + GIL whenever possible, so you should notice an uplift in performance if you + were bottlenecked by network I/O. + +* Avoid sending data which is very much out of order: The server will re-order + data by timestamp as it arrives. This is generally cheap for data that only + affects the recent past, but if you are sending data that is very much out of + order (for example, from different days), you may want to consider + re-ordering it before sending. For bulk data uploads of historical data, + consider using the `CSV import `_ + feature for best performance. + .. _sender_advanced: Advanced Usage diff --git a/src/questdb/ingress.pyx b/src/questdb/ingress.pyx index 19df89ac..9516af01 100644 --- a/src/questdb/ingress.pyx +++ b/src/questdb/ingress.pyx @@ -2451,7 +2451,8 @@ cdef class Sender: f"Detected a burst of reconnections. " "This may indicate an inefficient coding pattern where the sender is " "frequently created and destroyed. " - "Consider reusing sender instance whenever possible.", + "Consider reusing sender instance whenever possible." + "See: https://py-questdb-client.readthedocs.io/en/latest/sender.html#reuse-sender-objects", UserWarning, stacklevel=1 ) From 34b2aa16c2ed2d35057d827b3bb7ba989ddf2abb Mon Sep 17 00:00:00 2001 From: Adam Cimarosti Date: Tue, 24 Jun 2025 12:09:36 +0100 Subject: [PATCH 24/30] fastparquet now ships for 3.13 --- ci/pip_install_deps.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/ci/pip_install_deps.py b/ci/pip_install_deps.py index 91862c7e..807896a0 100644 --- a/ci/pip_install_deps.py +++ b/ci/pip_install_deps.py @@ -92,9 +92,7 @@ def main(args): import pandas import numpy import pyarrow - if (sys.version_info >= (3, 8) and sys.version_info < (3, 13)): - # As of this commit, fastparquet does not have a binary built for 3.13 - import fastparquet + import fastparquet if __name__ == "__main__": From d1a51160ae810d9466111eb9af57dc9996e5ce14 Mon Sep 17 00:00:00 2001 From: Adam Cimarosti Date: Mon, 7 Jul 2025 09:59:15 +0100 Subject: [PATCH 25/30] updated c-questdb-client dependency --- c-questdb-client | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/c-questdb-client b/c-questdb-client index 5af7515a..924bc390 160000 --- a/c-questdb-client +++ b/c-questdb-client @@ -1 +1 @@ -Subproject commit 5af7515a29bc5b612516474a83e1186c583a73b3 +Subproject commit 924bc3905388d24dbebb31dfe326fd64123cf52f From f517c91d01f6b816bec3fd98f20624dac1640513 Mon Sep 17 00:00:00 2001 From: Adam Cimarosti Date: Mon, 7 Jul 2025 10:10:22 +0100 Subject: [PATCH 26/30] fixed license + tooling dependency issues --- ci/pip_install_deps.py | 1 + dev_requirements.txt | 16 ++++++++-------- pyproject.toml | 23 ++++++++++++++--------- 3 files changed, 23 insertions(+), 17 deletions(-) diff --git a/ci/pip_install_deps.py b/ci/pip_install_deps.py index 807896a0..2d35ddc9 100644 --- a/ci/pip_install_deps.py +++ b/ci/pip_install_deps.py @@ -73,6 +73,7 @@ def main(args): ensure_timezone() pip_install('pip') pip_install('setuptools') + pip_install('packaging') if args.pandas_version is not None and args.pandas_version != '': install_old_pandas_and_numpy(args) else: diff --git a/dev_requirements.txt b/dev_requirements.txt index 36930bfb..7dc6b7f2 100644 --- a/dev_requirements.txt +++ b/dev_requirements.txt @@ -1,11 +1,11 @@ -setuptools>=45.2.0 -Cython>=0.29.32 -wheel>=0.34.2 -cibuildwheel>=2.11.2 -Sphinx>=5.0.2 -sphinx-rtd-theme>=1.0.0 -twine>=4.0.1 -bump2version>=1.0.1 +setuptools>=80.9.0 +packaging>=25.0 +Cython>=3.1.2 +wheel>=0.45.1 +cibuildwheel>=3.0.1 +Sphinx>=8.2.3 +sphinx-rtd-theme>=3.0.2 +twine>=6.1.0 pandas>=1.3.5 numpy>=1.21.0 pyarrow>=10.0.1 diff --git a/pyproject.toml b/pyproject.toml index 9971bad1..72487a56 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -13,19 +13,18 @@ classifiers = [ "Programming Language :: Python :: 3", "Programming Language :: Cython", "Programming Language :: Rust", - "License :: OSI Approved :: Apache Software License", "Topic :: Software Development :: Libraries", "Topic :: System :: Networking", "Topic :: Database :: Front-Ends", "Topic :: Scientific/Engineering" ] +license = "Apache-2.0" +license-files = ["LICENSE.txt"] dependencies = ["numpy>=1.21.0"] # Keep in sync with build-system.requires -[project.license] -text = "Apache License 2.0" [[project.authors]] name = "Adam Cimarosti" -email = "adam@questdb.io" +email = "adam@questdb.com" [project.optional-dependencies] publish = ["twine", "wheel"] @@ -42,11 +41,17 @@ Community = "http://community.questdb.io" [build-system] requires = [ - # Setuptools 18.0 and above properly handles Cython extensions. - "setuptools>=45.2.0", - "wheel>=0.34.2", - "cython>=0.29.24", - "numpy>=1.21.0", # keep in sync with project.dependencies + # Keep in sync with `dev_requirements.txt` + # as well as `ci/pip_install_deps.py`. + + # Tools should be up to date. + "setuptools>=80.9.0", + "packaging>=25.0", + "wheel>=0.45.1", + "cython>=3.1.2", + + # This is the oldest version we're happy to support. + "numpy>=1.21.0", ] From f259701e38da2ba3caf639404c84f38ead9161a7 Mon Sep 17 00:00:00 2001 From: Adam Cimarosti Date: Mon, 7 Jul 2025 10:20:13 +0100 Subject: [PATCH 27/30] updated mentions of which version supports arrays to QuestDB 9.0.0 --- CHANGELOG.rst | 4 ++-- README.rst | 6 +++--- docs/conf.rst | 2 +- docs/sender.rst | 2 +- src/questdb/ingress.pyi | 10 +++++----- src/questdb/ingress.pyx | 8 ++++---- 6 files changed, 16 insertions(+), 16 deletions(-) diff --git a/CHANGELOG.rst b/CHANGELOG.rst index 121c55c0..0d3a7446 100644 --- a/CHANGELOG.rst +++ b/CHANGELOG.rst @@ -12,7 +12,7 @@ Features ~~~~~~~~ This is the first major release of the QuestDB Python client library -which supports n-dimensional arrays of doubles for QuestDB servers 8.4.0 and up. +which supports n-dimensional arrays of doubles for QuestDB servers 9.0.0 and up. .. code-block:: python @@ -81,7 +81,7 @@ Features (currently only for ``np.float64`` element type and up to 32 dimensions). .. note:: - **Server Requirement**: This feature requires QuestDB server version 8.4.0 or higher. + **Server Requirement**: This feature requires QuestDB server version 9.0.0 or higher. Ensure your server is upgraded before ingesting array types, otherwise data ingestion will fail. .. code-block:: python diff --git a/README.rst b/README.rst index 3d4a0ac2..61aa1be0 100644 --- a/README.rst +++ b/README.rst @@ -27,7 +27,7 @@ The latest *stable* version of the library is **2.0.4** (`changelog `_). This release supports NumPy float64 arrays which are transmitted over a new -protocol version supported by QuestDB 8.4.0 or later. +protocol version supported by QuestDB 9.0.0 or later. :: @@ -53,7 +53,7 @@ The most common way to insert data is from a Pandas dataframe. 'amount': [0.00044, 0.001], # NumPy float64 arrays are supported from v3.0.0rc1 onwards. - # Note that requires QuestDB server >= 8.4.0 for array support + # Note that requires QuestDB server >= 9.0.0 for array support 'ord_book_bids': [ np.array([2615.54, 2618.63]), np.array([39269.98, 39270.00]) @@ -83,7 +83,7 @@ You can also send individual rows. This only requires a more minimal installatio 'amount': 0.00044, # NumPy float64 arrays are supported from v3.0.0rc1 onwards. - # Note that requires QuestDB server >= 8.4.0 for array support + # Note that requires QuestDB server >= 9.0.0 for array support 'ord_book_bids': np.array([2615.54, 2618.63]), }, at=TimestampNanos.now()) diff --git a/docs/conf.rst b/docs/conf.rst index 71812a3d..ac226d8d 100644 --- a/docs/conf.rst +++ b/docs/conf.rst @@ -241,7 +241,7 @@ Specifies the version of InfluxDB Line Protocol to use. Valid options are: TCP/TCPS: Defaults to version 1 for compatibility .. note:: - Protocol version ``2`` requires QuestDB server version 8.4.0 or higher. + Protocol version ``2`` requires QuestDB server version 9.0.0 or higher. .. _sender_conf_buffer: diff --git a/docs/sender.rst b/docs/sender.rst index c03d29a7..f11753dc 100644 --- a/docs/sender.rst +++ b/docs/sender.rst @@ -303,7 +303,7 @@ Here is a configuration string with ``protocol_version=2`` for ``TCP``: See the :ref:`sender_conf_protocol_version` section for more details. .. note:: - Protocol version ``2`` requires QuestDB server version 8.4.0 or higher. + Protocol version ``2`` requires QuestDB server version 9.0.0 or higher. Error Reporting =============== diff --git a/src/questdb/ingress.pyi b/src/questdb/ingress.pyi index 7d5c206f..855e5f36 100644 --- a/src/questdb/ingress.pyi +++ b/src/questdb/ingress.pyi @@ -211,7 +211,7 @@ class SenderTransaction: The table name is taken from the transaction. - **Note**: Support for NumPy arrays (``np.array``) requires QuestDB server version 8.4.0 or higher. + **Note**: Support for NumPy arrays (``np.array``) requires QuestDB server version 9.0.0 or higher. """ def dataframe( @@ -295,7 +295,7 @@ class Buffer: This should match the ``cairo.max.file.name.length`` setting of the QuestDB instance you're connecting to. - **Note**: Protocol version ``2`` requires QuestDB server version 8.4.0 or higher. + **Note**: Protocol version ``2`` requires QuestDB server version 9.0.0 or higher. .. code-block:: python @@ -452,7 +452,7 @@ class Buffer: * - ``None`` - *Column is skipped and not serialized.* - **Note**: Support for NumPy arrays (``np.array``) requires QuestDB server version 8.4.0 or higher. + **Note**: Support for NumPy arrays (``np.array``) requires QuestDB server version 9.0.0 or higher. If the destination table was already created, then the columns types will be cast to the types of the existing columns whenever possible @@ -732,7 +732,7 @@ class Buffer: message. * **η**: Support for NumPy arrays (``np.array``) requires QuestDB - server version 8.4.0 or higher. + server version 9.0.0 or higher. **Error Handling and Recovery** @@ -1037,7 +1037,7 @@ class Sender: Refer to the :func:`Buffer.row` documentation for details on arguments. - **Note**: Support for NumPy arrays (``np.array``) requires QuestDB server version 8.4.0 or higher. + **Note**: Support for NumPy arrays (``np.array``) requires QuestDB server version 9.0.0 or higher. """ def dataframe( diff --git a/src/questdb/ingress.pyx b/src/questdb/ingress.pyx index 9516af01..0c620f17 100644 --- a/src/questdb/ingress.pyx +++ b/src/questdb/ingress.pyx @@ -656,7 +656,7 @@ cdef class SenderTransaction: The table name is taken from the transaction. - **Note**: Support for NumPy arrays (``numpy.array``) requires QuestDB server version 8.4.0 or higher. + **Note**: Support for NumPy arrays (``numpy.array``) requires QuestDB server version 9.0.0 or higher. """ if at is None: raise IngressError( @@ -790,7 +790,7 @@ cdef class Buffer: This should match the ``cairo.max.file.name.length`` setting of the QuestDB instance you're connecting to. - **Note**: Protocol version ``2`` requires QuestDB server version 8.4.0 or higher. + **Note**: Protocol version ``2`` requires QuestDB server version 9.0.0 or higher. .. code-block:: python @@ -1185,7 +1185,7 @@ cdef class Buffer: * - ``None`` - *Column is skipped and not serialized.* - **Note**: Support for NumPy arrays (``numpy.array``) requires QuestDB server version 8.4.0 or higher. + **Note**: Support for NumPy arrays (``numpy.array``) requires QuestDB server version 9.0.0 or higher. If the destination table was already created, then the columns types will be cast to the types of the existing columns whenever possible @@ -2508,7 +2508,7 @@ cdef class Sender: Refer to the :func:`Buffer.row` documentation for details on arguments. - **Note**: Support for NumPy arrays (``numpy.array``) requires QuestDB server version 8.4.0 or higher. + **Note**: Support for NumPy arrays (``numpy.array``) requires QuestDB server version 9.0.0 or higher. """ if self._in_txn: raise IngressError( From 86c65679bc4a2c1d676541c1de0e0f02ddb7cdcd Mon Sep 17 00:00:00 2001 From: Adam Cimarosti Date: Mon, 7 Jul 2025 10:25:56 +0100 Subject: [PATCH 28/30] doc clean-up --- README.rst | 11 +---------- 1 file changed, 1 insertion(+), 10 deletions(-) diff --git a/README.rst b/README.rst index 61aa1be0..11a989d4 100644 --- a/README.rst +++ b/README.rst @@ -18,21 +18,12 @@ and full-connection encryption with Install ======= -The latest *stable* version of the library is **2.0.4** (`changelog `_). +The latest version of the library is **3.0.0** (`changelog `_). :: python3 -m pip install -U questdb[dataframe] - -The latest *pre-release* version of the library is **3.0.0r1** (`changelog `_). -This release supports NumPy float64 arrays which are transmitted over a new -protocol version supported by QuestDB 9.0.0 or later. - -:: - - python3 -m pip install --pre -U questdb[dataframe] - Quickstart ========== From 7cb771afe82e9b0d6333739fe80f5e4610a7fd80 Mon Sep 17 00:00:00 2001 From: Adam Cimarosti Date: Mon, 7 Jul 2025 11:44:56 +0100 Subject: [PATCH 29/30] changelog and doc improvements --- CHANGELOG.rst | 8 +++- docs/conf.rst | 11 +++-- docs/installation.rst | 8 ++-- docs/sender.rst | 108 +++++++++++++++++++++++++----------------- 4 files changed, 83 insertions(+), 52 deletions(-) diff --git a/CHANGELOG.rst b/CHANGELOG.rst index 0d3a7446..656b4177 100644 --- a/CHANGELOG.rst +++ b/CHANGELOG.rst @@ -5,7 +5,7 @@ Changelog ========= -3.0.0 (2025-xx-xx) +3.0.0 (2025-07-07) ------------------ Features @@ -63,6 +63,12 @@ additionally: import questdb.ingress as qi qi.WARN_HIGH_RECONNECTS = False +* Fixed ILP/TCP connection shutdown on Windows where some rows could be + lost when closing the ``Sender``, even if explicitly flushed. + +* Added a "Good Practices" section to the "Sending Data over ILP" section of + the documentation. + Breaking Changes ~~~~~~~~~~~~~~~~ Refer to the release notes for 3.0.0rc1 for the breaking changes introduced diff --git a/docs/conf.rst b/docs/conf.rst index ac226d8d..df492463 100644 --- a/docs/conf.rst +++ b/docs/conf.rst @@ -21,8 +21,7 @@ The format of the configuration string is:: .. note:: - * The keys are case-sensitive. - * The trailing semicolon is mandatory. + The keys are case-sensitive. The valid protocols are: @@ -228,7 +227,13 @@ The :ref:`sender_advanced` documentation should help you. Protocol Version ================ -Specifies the version of InfluxDB Line Protocol to use. Valid options are: +Specifies the version of InfluxDB Line Protocol to use. + +Here is a configuration string with ``protocol_version=2`` for ``TCP``:: + + tcp::addr=localhost:9000;protocol_version=2; + +Valid options are: * ``1`` - Text-based format compatible with InfluxDB database when used over HTTP. diff --git a/docs/installation.rst b/docs/installation.rst index a0ef6376..dc2f0405 100644 --- a/docs/installation.rst +++ b/docs/installation.rst @@ -6,7 +6,9 @@ Dependency ========== The Python QuestDB client does not have any additional run-time dependencies and -will run on any version of Python >= 3.8 on most platforms and architectures. +will run on any version of Python >= 3.9 on most platforms and architectures. + +From version 3.0.0, this library depends on ``numpy>=1.21.0``. Optional Dependencies --------------------- @@ -16,12 +18,10 @@ dependencies to be installed: * ``pandas`` * ``pyarrow`` -* ``numpy`` These are bundled as the ``dataframe`` extra. -Without this option, the ``questdb`` package has no dependencies other than -to the Python standard library. +Without this option, you may still ingest data row-by-row. PIP --- diff --git a/docs/sender.rst b/docs/sender.rst index f11753dc..de3b324f 100644 --- a/docs/sender.rst +++ b/docs/sender.rst @@ -264,47 +264,6 @@ completely disabled: See the :ref:`sender_conf_auto_flush` section for more details. and note that ``auto_flush_interval`` :ref:`does NOT start a timer `. -.. _sender_protocol_version: - -Protocol Version -================ - -Specifies the version of InfluxDB Line Protocol to use for sender. - -Valid options are: - -* ``1`` - Text-based format compatible with InfluxDB database when used over HTTP. -* ``2`` - Array support and binary format serialization for 64-bit floats (version specific to QuestDB). -* ``auto`` (default) - Automatic version selection based on connection type. - -Behavior details: - -^^^^^^^^^^^^^^^^^ - -+----------------+--------------------------------------------------------------+ -| Value | Behavior | -+================+==============================================================+ -| | - Plain text serialization | -| ``1`` | - Compatible with InfluxDB servers | -| | - No array type support | -+----------------+--------------------------------------------------------------+ -| ``2`` | - Binary encoding for f64 | -| | - Full support for array | -+----------------+--------------------------------------------------------------+ -| | - **HTTP/HTTPS**: Auto-detects server capability during | -| ``auto`` | handshake (supports version negotiation) | -| | - **TCP/TCPS**: Defaults to version 1 for compatibility | -+----------------+--------------------------------------------------------------+ - -Here is a configuration string with ``protocol_version=2`` for ``TCP``: - -``tcp::addr=localhost:9000;protocol_version=2;`` - -See the :ref:`sender_conf_protocol_version` section for more details. - -.. note:: - Protocol version ``2`` requires QuestDB server version 9.0.0 or higher. - Error Reporting =============== @@ -467,7 +426,7 @@ sender object and reuse it across multiple requests. Use transactions ---------------- -Use ref:`transactions ` if you want to ensure that a group +Use :ref:`transactions ` if you want to ensure that a group of rows is sent as a single transaction. This feature will guarantee that the rows are sent to the server as one, @@ -766,6 +725,46 @@ auto-flush interval:: ... +.. _sender_protocol_version: + +Protocol Version +================ + +Explicitly specifies the version of InfluxDB Line Protocol to use for sender. + +Valid options are: + +* ``protocol_version=1`` +* ``protocol_version=2`` +* ``protocol_version=auto`` (default, if unspecified) + +Behavior details: + ++----------------+--------------------------------------------------------------+ +| Value | Behavior | ++================+==============================================================+ +| | - Plain text serialization | +| ``1`` | - Compatible with InfluxDB servers | +| | - No array type support | ++----------------+--------------------------------------------------------------+ +| ``2`` | - Binary encoding for f64 | +| | - Full support for array | ++----------------+--------------------------------------------------------------+ +| | - **HTTP/HTTPS**: Auto-detects server capability during | +| ``auto`` | handshake (supports version negotiation) | +| | - **TCP/TCPS**: Defaults to version 1 for compatibility | ++----------------+--------------------------------------------------------------+ + +Here is a configuration string with ``protocol_version=2`` for ``TCP``:: + + tcp::addr=localhost:9000;protocol_version=2; + +See the :ref:`sender_conf_protocol_version` section for more details. + +.. note:: + Protocol version ``2`` requires QuestDB server version 9.0.0 or higher. + + .. _sender_which_protocol: ILP/TCP or ILP/HTTP @@ -773,14 +772,35 @@ ILP/TCP or ILP/HTTP The sender supports ``tcp``, ``tcps``, ``http``, and ``https`` protocols. -You should prefer to use the new ILP/HTTP protocol instead of ILP/TCP in most -cases as it provides better feedback on errors and transaction control. +**You should prefer to use the new ILP/HTTP protocol instead of ILP/TCP in most +cases as it provides better feedback on errors and transaction control.** ILP/HTTP is available from: * QuestDB 7.3.10 and later. * QuestDB Enterprise 1.2.7 and later. +ILP/HTTP Also supports :ref:`protocol version ` +auto-detection. + ++----------------+--------------------------------------------------------------+ +| Protocol | Protocol version auto-detection | ++================+==============================================================+ +| ILP/HTTP | **Yes**: The client will communcate to the server using the | +| | latest version supported by both client and the server. | ++----------------+--------------------------------------------------------------+ +| ILP/TCP | **No**: You need to | +| | :ref:`configure ` | +| | ``protocol_version=N`` to to match a version supported by | +| | the server. | ++----------------+--------------------------------------------------------------+ + +.. note:: + + The client will disable features that require a newer + protocol versions than the one used to communicate with the server. + + Since TCP does not block for a response it is useful for high-throughput scenarios in higher latency networks or on older versions of QuestDB which do not support ILP/HTTP quite yet. From 5ca55554c562a203a530af0b4a50ade76c2cd3bb Mon Sep 17 00:00:00 2001 From: Adam Cimarosti Date: Mon, 7 Jul 2025 12:02:13 +0100 Subject: [PATCH 30/30] fixed CI by building questdb with Java 17 --- ci/run_tests_pipeline.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ci/run_tests_pipeline.yaml b/ci/run_tests_pipeline.yaml index 4d88f61c..713816e2 100644 --- a/ci/run_tests_pipeline.yaml +++ b/ci/run_tests_pipeline.yaml @@ -57,7 +57,7 @@ stages: displayName: "Compile QuestDB" inputs: mavenPOMFile: 'questdb/pom.xml' - jdkVersionOption: '1.11' + jdkVersionOption: '1.17' options: "-DskipTests -Pbuild-web-console" condition: eq(variables.vsQuestDbMaster, true) - script: python3 proj.py test 1