Skip to content

ENH: adding support for Py3.6+ memory tracing for khash-maps #38048

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 9 commits into from
Nov 26, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 5 additions & 1 deletion pandas/_libs/hashtable.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -13,10 +13,14 @@ cnp.import_array()


from pandas._libs cimport util
from pandas._libs.khash cimport kh_str_t, khiter_t
from pandas._libs.khash cimport KHASH_TRACE_DOMAIN, kh_str_t, khiter_t
from pandas._libs.missing cimport checknull


def get_hashtable_trace_domain():
return KHASH_TRACE_DOMAIN


cdef int64_t NPY_NAT = util.get_nat()
SIZE_HINT_LIMIT = (1 << 20) + 7

Expand Down
25 changes: 15 additions & 10 deletions pandas/_libs/hashtable_class_helper.pxi.in
Original file line number Diff line number Diff line change
Expand Up @@ -344,9 +344,11 @@ cdef class {{name}}HashTable(HashTable):

def sizeof(self, deep=False):
""" return the size of my table in bytes """
return self.table.n_buckets * (sizeof({{dtype}}_t) + # keys
sizeof(Py_ssize_t) + # vals
sizeof(uint32_t)) # flags
overhead = 4 * sizeof(uint32_t) + 3 * sizeof(uint32_t*)
for_flags = max(1, self.table.n_buckets >> 5) * sizeof(uint32_t)
for_pairs = self.table.n_buckets * (sizeof({{dtype}}_t) + # keys
sizeof(Py_ssize_t)) # vals
return overhead + for_flags + for_pairs

cpdef get_item(self, {{dtype}}_t val):
cdef:
Expand Down Expand Up @@ -669,10 +671,11 @@ cdef class StringHashTable(HashTable):
self.table = NULL

def sizeof(self, deep=False):
""" return the size of my table in bytes """
return self.table.n_buckets * (sizeof(char *) + # keys
sizeof(Py_ssize_t) + # vals
sizeof(uint32_t)) # flags
overhead = 4 * sizeof(uint32_t) + 3 * sizeof(uint32_t*)
for_flags = max(1, self.table.n_buckets >> 5) * sizeof(uint32_t)
for_pairs = self.table.n_buckets * (sizeof(char *) + # keys
sizeof(Py_ssize_t)) # vals
return overhead + for_flags + for_pairs

cpdef get_item(self, str val):
cdef:
Expand Down Expand Up @@ -994,9 +997,11 @@ cdef class PyObjectHashTable(HashTable):

def sizeof(self, deep=False):
""" return the size of my table in bytes """
return self.table.n_buckets * (sizeof(PyObject *) + # keys
sizeof(Py_ssize_t) + # vals
sizeof(uint32_t)) # flags
overhead = 4 * sizeof(uint32_t) + 3 * sizeof(uint32_t*)
for_flags = max(1, self.table.n_buckets >> 5) * sizeof(uint32_t)
for_pairs = self.table.n_buckets * (sizeof(PyObject *) + # keys
sizeof(Py_ssize_t)) # vals
return overhead + for_flags + for_pairs

cpdef get_item(self, object val):
cdef:
Expand Down
2 changes: 2 additions & 0 deletions pandas/_libs/khash.pxd
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,8 @@ from numpy cimport (


cdef extern from "khash_python.h":
const int KHASH_TRACE_DOMAIN

ctypedef uint32_t khint_t
ctypedef khint_t khiter_t

Expand Down
44 changes: 31 additions & 13 deletions pandas/_libs/src/klib/khash.h
Original file line number Diff line number Diff line change
Expand Up @@ -115,6 +115,24 @@ int main() {
#include "../inline_helper.h"


// hooks for memory allocator, C-runtime allocator used per default
#ifndef KHASH_MALLOC
#define KHASH_MALLOC malloc
#endif

#ifndef KHASH_REALLOC
#define KHASH_REALLOC realloc
#endif

#ifndef KHASH_CALLOC
#define KHASH_CALLOC calloc
#endif

#ifndef KHASH_FREE
#define KHASH_FREE free
#endif


#if UINT_MAX == 0xffffffffu
typedef unsigned int khint32_t;
#elif ULONG_MAX == 0xffffffffu
Expand All @@ -138,7 +156,7 @@ typedef unsigned char khint8_t;
#endif

typedef double khfloat64_t;
typedef double khfloat32_t;
typedef float khfloat32_t;

typedef khint32_t khint_t;
typedef khint_t khiter_t;
Expand Down Expand Up @@ -265,14 +283,14 @@ static const double __ac_HASH_UPPER = 0.77;
khval_t *vals; \
} kh_##name##_t; \
SCOPE kh_##name##_t *kh_init_##name(void) { \
return (kh_##name##_t*)calloc(1, sizeof(kh_##name##_t)); \
return (kh_##name##_t*)KHASH_CALLOC(1, sizeof(kh_##name##_t)); \
} \
SCOPE void kh_destroy_##name(kh_##name##_t *h) \
{ \
if (h) { \
free(h->keys); free(h->flags); \
free(h->vals); \
free(h); \
KHASH_FREE(h->keys); KHASH_FREE(h->flags); \
KHASH_FREE(h->vals); \
KHASH_FREE(h); \
} \
} \
SCOPE void kh_clear_##name(kh_##name##_t *h) \
Expand Down Expand Up @@ -305,11 +323,11 @@ static const double __ac_HASH_UPPER = 0.77;
if (new_n_buckets < 4) new_n_buckets = 4; \
if (h->size >= (khint_t)(new_n_buckets * __ac_HASH_UPPER + 0.5)) j = 0; /* requested size is too small */ \
else { /* hash table size to be changed (shrink or expand); rehash */ \
new_flags = (khint32_t*)malloc(__ac_fsize(new_n_buckets) * sizeof(khint32_t)); \
new_flags = (khint32_t*)KHASH_MALLOC(__ac_fsize(new_n_buckets) * sizeof(khint32_t)); \
memset(new_flags, 0xff, __ac_fsize(new_n_buckets) * sizeof(khint32_t)); \
if (h->n_buckets < new_n_buckets) { /* expand */ \
h->keys = (khkey_t*)realloc(h->keys, new_n_buckets * sizeof(khkey_t)); \
if (kh_is_map) h->vals = (khval_t*)realloc(h->vals, new_n_buckets * sizeof(khval_t)); \
h->keys = (khkey_t*)KHASH_REALLOC(h->keys, new_n_buckets * sizeof(khkey_t)); \
if (kh_is_map) h->vals = (khval_t*)KHASH_REALLOC(h->vals, new_n_buckets * sizeof(khval_t)); \
} /* otherwise shrink */ \
} \
} \
Expand Down Expand Up @@ -342,10 +360,10 @@ static const double __ac_HASH_UPPER = 0.77;
} \
} \
if (h->n_buckets > new_n_buckets) { /* shrink the hash table */ \
h->keys = (khkey_t*)realloc(h->keys, new_n_buckets * sizeof(khkey_t)); \
if (kh_is_map) h->vals = (khval_t*)realloc(h->vals, new_n_buckets * sizeof(khval_t)); \
h->keys = (khkey_t*)KHASH_REALLOC(h->keys, new_n_buckets * sizeof(khkey_t)); \
if (kh_is_map) h->vals = (khval_t*)KHASH_REALLOC(h->vals, new_n_buckets * sizeof(khval_t)); \
} \
free(h->flags); /* free the working space */ \
KHASH_FREE(h->flags); /* free the working space */ \
h->flags = new_flags; \
h->n_buckets = new_n_buckets; \
h->n_occupied = h->size; \
Expand Down Expand Up @@ -691,8 +709,8 @@ KHASH_MAP_INIT_INT64(int64, size_t)
KHASH_MAP_INIT_UINT64(uint64, size_t)
KHASH_MAP_INIT_INT16(int16, size_t)
KHASH_MAP_INIT_UINT16(uint16, size_t)
KHASH_MAP_INIT_INT16(int8, size_t)
KHASH_MAP_INIT_UINT16(uint8, size_t)
KHASH_MAP_INIT_INT8(int8, size_t)
KHASH_MAP_INIT_UINT8(uint8, size_t)


#endif /* __AC_KHASH_H */
57 changes: 55 additions & 2 deletions pandas/_libs/src/klib/khash_python.h
Original file line number Diff line number Diff line change
@@ -1,6 +1,59 @@
#include <string.h>
#include <Python.h>

// khash should report usage to tracemalloc
#if PY_VERSION_HEX >= 0x03060000
#include <pymem.h>
#if PY_VERSION_HEX < 0x03070000
#define PyTraceMalloc_Track _PyTraceMalloc_Track
#define PyTraceMalloc_Untrack _PyTraceMalloc_Untrack
#endif
#else
#define PyTraceMalloc_Track(...)
#define PyTraceMalloc_Untrack(...)
#endif


static const int KHASH_TRACE_DOMAIN = 424242;
void *traced_malloc(size_t size){
void * ptr = malloc(size);
if(ptr!=NULL){
PyTraceMalloc_Track(KHASH_TRACE_DOMAIN, (uintptr_t)ptr, size);
}
return ptr;
}

void *traced_calloc(size_t num, size_t size){
void * ptr = calloc(num, size);
if(ptr!=NULL){
PyTraceMalloc_Track(KHASH_TRACE_DOMAIN, (uintptr_t)ptr, num*size);
}
return ptr;
}

void *traced_realloc(void* old_ptr, size_t size){
void * ptr = realloc(old_ptr, size);
if(ptr!=NULL){
if(old_ptr != ptr){
PyTraceMalloc_Untrack(KHASH_TRACE_DOMAIN, (uintptr_t)old_ptr);
}
PyTraceMalloc_Track(KHASH_TRACE_DOMAIN, (uintptr_t)ptr, size);
}
return ptr;
}

void traced_free(void* ptr){
if(ptr!=NULL){
PyTraceMalloc_Untrack(KHASH_TRACE_DOMAIN, (uintptr_t)ptr);
}
free(ptr);
}


#define KHASH_MALLOC traced_malloc
#define KHASH_REALLOC traced_realloc
#define KHASH_CALLOC traced_calloc
#define KHASH_FREE traced_free
#include "khash.h"

// Previously we were using the built in cpython hash function for doubles
Expand Down Expand Up @@ -128,7 +181,7 @@ typedef struct {
typedef kh_str_starts_t* p_kh_str_starts_t;

p_kh_str_starts_t PANDAS_INLINE kh_init_str_starts(void) {
kh_str_starts_t *result = (kh_str_starts_t*)calloc(1, sizeof(kh_str_starts_t));
kh_str_starts_t *result = (kh_str_starts_t*)KHASH_CALLOC(1, sizeof(kh_str_starts_t));
result->table = kh_init_str();
return result;
}
Expand All @@ -151,7 +204,7 @@ khint_t PANDAS_INLINE kh_get_str_starts_item(const kh_str_starts_t* table, const

void PANDAS_INLINE kh_destroy_str_starts(kh_str_starts_t* table) {
kh_destroy_str(table->table);
free(table);
KHASH_FREE(table);
}

void PANDAS_INLINE kh_resize_str_starts(kh_str_starts_t* table, khint_t val) {
Expand Down
2 changes: 1 addition & 1 deletion pandas/core/frame.py
Original file line number Diff line number Diff line change
Expand Up @@ -2724,7 +2724,7 @@ def memory_usage(self, index=True, deep=False) -> Series:
many repeated values.

>>> df['object'].astype('category').memory_usage(deep=True)
5216
5244
"""
result = self._constructor_sliced(
[c.memory_usage(index=False, deep=deep) for col, c in self.items()],
Expand Down
2 changes: 1 addition & 1 deletion pandas/tests/base/test_misc.py
Original file line number Diff line number Diff line change
Expand Up @@ -77,7 +77,7 @@ def test_memory_usage(index_or_series_obj):
if isinstance(obj, Index):
expected = 0
else:
expected = 80 if IS64 else 48
expected = 108 if IS64 else 64
assert res_deep == res == expected
elif is_object or is_categorical:
# only deep will pick them up
Expand Down
85 changes: 78 additions & 7 deletions pandas/tests/libs/test_hashtable.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,6 @@
from contextlib import contextmanager
import tracemalloc

import numpy as np
import pytest

Expand All @@ -6,9 +9,27 @@
import pandas._testing as tm


@contextmanager
def activated_tracemalloc():
tracemalloc.start()
try:
yield
finally:
tracemalloc.stop()


def get_allocated_khash_memory():
snapshot = tracemalloc.take_snapshot()
snapshot = snapshot.filter_traces(
(tracemalloc.DomainFilter(True, ht.get_hashtable_trace_domain()),)
)
return sum(map(lambda x: x.size, snapshot.traces))


@pytest.mark.parametrize(
"table_type, dtype",
[
(ht.PyObjectHashTable, np.object_),
(ht.Int64HashTable, np.int64),
(ht.UInt64HashTable, np.uint64),
(ht.Float64HashTable, np.float64),
Expand Down Expand Up @@ -53,13 +74,15 @@ def test_get_set_contains_len(self, table_type, dtype):
assert str(index + 2) in str(excinfo.value)

def test_map(self, table_type, dtype):
N = 77
table = table_type()
keys = np.arange(N).astype(dtype)
vals = np.arange(N).astype(np.int64) + N
table.map(keys, vals)
for i in range(N):
assert table.get_item(keys[i]) == i + N
# PyObjectHashTable has no map-method
if table_type != ht.PyObjectHashTable:
N = 77
table = table_type()
keys = np.arange(N).astype(dtype)
vals = np.arange(N).astype(np.int64) + N
table.map(keys, vals)
for i in range(N):
assert table.get_item(keys[i]) == i + N

def test_map_locations(self, table_type, dtype):
N = 8
Expand Down Expand Up @@ -101,6 +124,53 @@ def test_unique(self, table_type, dtype):
unique = table.unique(keys)
tm.assert_numpy_array_equal(unique, expected)

def test_tracemalloc_works(self, table_type, dtype):
if dtype in (np.int8, np.uint8):
N = 256
else:
N = 30000
keys = np.arange(N).astype(dtype)
with activated_tracemalloc():
table = table_type()
table.map_locations(keys)
used = get_allocated_khash_memory()
my_size = table.sizeof()
assert used == my_size
del table
assert get_allocated_khash_memory() == 0

def test_tracemalloc_for_empty(self, table_type, dtype):
with activated_tracemalloc():
table = table_type()
used = get_allocated_khash_memory()
my_size = table.sizeof()
assert used == my_size
del table
assert get_allocated_khash_memory() == 0


def test_tracemalloc_works_for_StringHashTable():
N = 1000
keys = np.arange(N).astype(np.compat.unicode).astype(np.object_)
with activated_tracemalloc():
table = ht.StringHashTable()
table.map_locations(keys)
used = get_allocated_khash_memory()
my_size = table.sizeof()
assert used == my_size
del table
assert get_allocated_khash_memory() == 0


def test_tracemalloc_for_empty_StringHashTable():
with activated_tracemalloc():
table = ht.StringHashTable()
used = get_allocated_khash_memory()
my_size = table.sizeof()
assert used == my_size
del table
assert get_allocated_khash_memory() == 0


@pytest.mark.parametrize(
"table_type, dtype",
Expand Down Expand Up @@ -157,6 +227,7 @@ def get_ht_function(fun_name, type_suffix):
@pytest.mark.parametrize(
"dtype, type_suffix",
[
(np.object_, "object"),
(np.int64, "int64"),
(np.uint64, "uint64"),
(np.float64, "float64"),
Expand Down