Skip to content

Commit d98210c

Browse files
authored
Merge pull request #55 from ngoldbaum/scalar-speedup
make getitem return str
2 parents 718fea9 + 15ccbb9 commit d98210c

File tree

4 files changed

+21
-15
lines changed

4 files changed

+21
-15
lines changed

stringdtype/pyproject.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,6 @@ per-file-ignores = {"__init__.py" = ["F401"]}
3131

3232
[tool.meson-python.args]
3333
dist = []
34-
setup = ["-Ddebug=true", "-Doptimization=0"]
34+
setup = ["-Ddebug=true", "-Doptimization=2"]
3535
compile = []
3636
install = []

stringdtype/stringdtype/scalar.py

Lines changed: 1 addition & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -2,10 +2,4 @@
22

33

44
class StringScalar(str):
5-
def partition(self, sep):
6-
ret = super().partition(sep)
7-
return (str(ret[0]), str(ret[1]), str(ret[2]))
8-
9-
def rpartition(self, sep):
10-
ret = super().rpartition(sep)
11-
return (str(ret[0]), str(ret[1]), str(ret[2]))
5+
pass

stringdtype/stringdtype/src/dtype.c

Lines changed: 16 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -146,26 +146,35 @@ static PyObject *
146146
stringdtype_getitem(StringDTypeObject *NPY_UNUSED(descr), char **dataptr)
147147
{
148148
char *data;
149+
size_t len;
149150

150151
if (*dataptr == NULL) {
151152
data = "\0";
153+
len = 0;
152154
}
153155
else {
154156
data = ((ss *)dataptr)->buf;
157+
len = ((ss *)dataptr)->len;
155158
}
156159

157-
PyObject *val_obj = PyUnicode_FromString(data);
160+
PyObject *val_obj = PyUnicode_FromStringAndSize(data, len);
158161

159162
if (val_obj == NULL) {
160163
return NULL;
161164
}
162165

163-
PyObject *res = PyObject_CallFunctionObjArgs((PyObject *)StringScalar_Type,
164-
val_obj, NULL);
165-
166-
Py_DECREF(val_obj);
167-
168-
return res;
166+
/*
167+
* In principle we should return a StringScalar instance here, but
168+
* creating a StringScalar via PyObject_CallFunctionObjArgs has
169+
* approximately 4 times as much overhead than just returning a str
170+
* here. This is due to Python overhead as well as copying the string
171+
* buffer from val_obj to the StringScalar we'd like to return. In
172+
* principle we could avoid this by making a C function like
173+
* PyUnicode_FromStringAndSize that fills a StringScalar instead of a
174+
* str. For now (4-11-23) we are punting on that with the expectation that
175+
* eventually the scalar type for this dtype will be str.
176+
*/
177+
return val_obj;
169178
}
170179

171180
// PyArray_NonzeroFunc

stringdtype/stringdtype/src/main.c

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -104,6 +104,7 @@ PyInit__main(void)
104104
if (mod == NULL) {
105105
goto error;
106106
}
107+
107108
StringScalar_Type =
108109
(PyTypeObject *)PyObject_GetAttrString(mod, "StringScalar");
109110
Py_DECREF(mod);
@@ -116,7 +117,9 @@ PyInit__main(void)
116117
goto error;
117118
}
118119

120+
Py_INCREF((PyObject *)&StringDType);
119121
if (PyModule_AddObject(m, "StringDType", (PyObject *)&StringDType) < 0) {
122+
Py_DECREF((PyObject *)&StringDType);
120123
goto error;
121124
}
122125

0 commit comments

Comments
 (0)