diff --git a/stringdtype/pyproject.toml b/stringdtype/pyproject.toml index 3ad86297..1fcf0803 100644 --- a/stringdtype/pyproject.toml +++ b/stringdtype/pyproject.toml @@ -31,6 +31,6 @@ per-file-ignores = {"__init__.py" = ["F401"]} [tool.meson-python.args] dist = [] -setup = ["-Ddebug=true", "-Doptimization=0"] +setup = ["-Ddebug=true", "-Doptimization=2"] compile = [] install = [] diff --git a/stringdtype/stringdtype/scalar.py b/stringdtype/stringdtype/scalar.py index e95aac35..db82debd 100644 --- a/stringdtype/stringdtype/scalar.py +++ b/stringdtype/stringdtype/scalar.py @@ -2,10 +2,4 @@ class StringScalar(str): - def partition(self, sep): - ret = super().partition(sep) - return (str(ret[0]), str(ret[1]), str(ret[2])) - - def rpartition(self, sep): - ret = super().rpartition(sep) - return (str(ret[0]), str(ret[1]), str(ret[2])) + pass diff --git a/stringdtype/stringdtype/src/dtype.c b/stringdtype/stringdtype/src/dtype.c index 37915133..c188d774 100644 --- a/stringdtype/stringdtype/src/dtype.c +++ b/stringdtype/stringdtype/src/dtype.c @@ -146,26 +146,35 @@ static PyObject * stringdtype_getitem(StringDTypeObject *NPY_UNUSED(descr), char **dataptr) { char *data; + size_t len; if (*dataptr == NULL) { data = "\0"; + len = 0; } else { data = ((ss *)dataptr)->buf; + len = ((ss *)dataptr)->len; } - PyObject *val_obj = PyUnicode_FromString(data); + PyObject *val_obj = PyUnicode_FromStringAndSize(data, len); if (val_obj == NULL) { return NULL; } - PyObject *res = PyObject_CallFunctionObjArgs((PyObject *)StringScalar_Type, - val_obj, NULL); - - Py_DECREF(val_obj); - - return res; + /* + * In principle we should return a StringScalar instance here, but + * creating a StringScalar via PyObject_CallFunctionObjArgs has + * approximately 4 times as much overhead than just returning a str + * here. This is due to Python overhead as well as copying the string + * buffer from val_obj to the StringScalar we'd like to return. In + * principle we could avoid this by making a C function like + * PyUnicode_FromStringAndSize that fills a StringScalar instead of a + * str. For now (4-11-23) we are punting on that with the expectation that + * eventually the scalar type for this dtype will be str. + */ + return val_obj; } // PyArray_NonzeroFunc diff --git a/stringdtype/stringdtype/src/main.c b/stringdtype/stringdtype/src/main.c index 64797492..00f4d0e0 100644 --- a/stringdtype/stringdtype/src/main.c +++ b/stringdtype/stringdtype/src/main.c @@ -104,6 +104,7 @@ PyInit__main(void) if (mod == NULL) { goto error; } + StringScalar_Type = (PyTypeObject *)PyObject_GetAttrString(mod, "StringScalar"); Py_DECREF(mod); @@ -116,7 +117,9 @@ PyInit__main(void) goto error; } + Py_INCREF((PyObject *)&StringDType); if (PyModule_AddObject(m, "StringDType", (PyObject *)&StringDType) < 0) { + Py_DECREF((PyObject *)&StringDType); goto error; }