Skip to content
Merged
Show file tree
Hide file tree
Changes from 9 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 4 additions & 1 deletion pandas/_libs/lib.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -76,7 +76,10 @@ def values_from_object(obj: object):
""" return my values or the object if we are say an ndarray """
func: object

func = getattr(obj, 'get_values', None)
if getattr(obj, '_typ', '') == 'dataframe':
return obj.values

func = getattr(obj, '_internal_get_values', None)
if func is not None:
obj = func()

Expand Down
14 changes: 12 additions & 2 deletions pandas/_libs/src/ujson/python/objToJSON.c
Original file line number Diff line number Diff line change
Expand Up @@ -269,9 +269,19 @@ static PyObject *get_values(PyObject *obj) {
}
}

if (!values && PyObject_HasAttrString(obj, "get_values")) {
if (!values && PyObject_HasAttrString(obj, "_internal_get_values")) {
PRINTMARK();
values = PyObject_CallMethod(obj, "get_values", NULL);
values = PyObject_CallMethod(obj, "_internal_get_values", NULL);
if (values && !PyArray_CheckExact(values)) {
PRINTMARK();
Py_DECREF(values);
values = NULL;
}
}

if (!values && PyObject_HasAttrString(obj, "get_block_values")) {
PRINTMARK();
values = PyObject_CallMethod(obj, "get_block_values", NULL);
if (values && !PyArray_CheckExact(values)) {
PRINTMARK();
Py_DECREF(values);
Expand Down
2 changes: 1 addition & 1 deletion pandas/core/algorithms.py
Original file line number Diff line number Diff line change
Expand Up @@ -1582,7 +1582,7 @@ def take_nd(arr, indexer, axis=0, out=None, fill_value=np.nan, mask_info=None,
return arr.take(indexer, fill_value=fill_value, allow_fill=allow_fill)

if is_sparse(arr):
arr = arr.get_values()
arr = arr.to_dense()
elif isinstance(arr, (ABCIndexClass, ABCSeries)):
arr = arr.values

Expand Down
5 changes: 4 additions & 1 deletion pandas/core/arrays/categorical.py
Original file line number Diff line number Diff line change
Expand Up @@ -1505,6 +1505,9 @@ def get_values(self):
A numpy array of the same dtype as categorical.categories.dtype or
Index if datetime / periods.
"""
raise Exception("USING GET_VALUES")
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Should I be concerned that CI is passing with this here? :)

Perhaps we need a test ensuring that Categorical.get_values() raises a warning.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Should I be concerned that CI is passing with this here? :)

No, that's a good sign :)
As that was my way to find all the places where it was being called, to fix those (although it is not a perfect way, as in certain places the error could also be catched)

But for sure still need to convert this into a warning and add tests that checks that.


def _internal_get_values(self):
# if we are a datetime and period index, return Index to keep metadata
if is_datetimelike(self.categories):
return self.categories.take(self._codes, fill_value=np.nan)
Expand Down Expand Up @@ -1937,7 +1940,7 @@ def __iter__(self):
"""
Returns an Iterator over the values of this Categorical.
"""
return iter(self.get_values().tolist())
return iter(self._internal_get_values().tolist())

def __contains__(self, key):
"""
Expand Down
9 changes: 7 additions & 2 deletions pandas/core/arrays/sparse.py
Original file line number Diff line number Diff line change
Expand Up @@ -454,7 +454,7 @@ def _sparse_array_op(

if left.sp_index.ngaps == 0 or right.sp_index.ngaps == 0:
with np.errstate(all='ignore'):
result = op(left.get_values(), right.get_values())
result = op(left.to_dense(), right.to_dense())
fill = op(_get_fill(left), _get_fill(right))

if left.sp_index.ngaps == 0:
Expand Down Expand Up @@ -1473,7 +1473,12 @@ def to_dense(self):
return np.asarray(self, dtype=self.sp_values.dtype)

# TODO: Look into deprecating this in favor of `to_dense`.
get_values = to_dense
# get_values = to_dense

def get_values(self):
raise Exception("USING GET_VALUES")

_internal_get_values = to_dense

# ------------------------------------------------------------------------
# IO
Expand Down
2 changes: 1 addition & 1 deletion pandas/core/dtypes/concat.py
Original file line number Diff line number Diff line change
Expand Up @@ -197,7 +197,7 @@ def _concat_categorical(to_concat, axis=0):
return union_categoricals(categoricals)

# extract the categoricals & coerce to object if needed
to_concat = [x.get_values() if is_categorical_dtype(x.dtype)
to_concat = [x._internal_get_values() if is_categorical_dtype(x.dtype)
else np.asarray(x).ravel() if not is_datetime64tz_dtype(x)
else np.asarray(x.astype(object)) for x in to_concat]
result = _concat_compat(to_concat)
Expand Down
4 changes: 2 additions & 2 deletions pandas/core/frame.py
Original file line number Diff line number Diff line change
Expand Up @@ -1648,7 +1648,7 @@ def to_records(self, index=True, convert_datetime64=None,
else:
ix_vals = [self.index.values]

arrays = ix_vals + [self[c].get_values() for c in self.columns]
arrays = ix_vals + [self[c]._internal_get_values() for c in self.columns]

count = 0
index_names = list(self.index.names)
Expand All @@ -1664,7 +1664,7 @@ def to_records(self, index=True, convert_datetime64=None,
names = [str(name) for name in itertools.chain(index_names,
self.columns)]
else:
arrays = [self[c].get_values() for c in self.columns]
arrays = [self[c]._internal_get_values() for c in self.columns]
names = [str(c) for c in self.columns]
index_names = []

Expand Down
3 changes: 3 additions & 0 deletions pandas/core/generic.py
Original file line number Diff line number Diff line change
Expand Up @@ -5368,6 +5368,9 @@ def get_values(self):
[nan, 2.],
[nan, 3.]])
"""
raise Exception("USING GET_VALUES")

def _internal_get_values(self):
return self.values

def get_dtype_counts(self):
Expand Down
6 changes: 3 additions & 3 deletions pandas/core/groupby/generic.py
Original file line number Diff line number Diff line change
Expand Up @@ -1111,7 +1111,7 @@ def nunique(self, dropna=True):
"""
ids, _, _ = self.grouper.group_info

val = self.obj.get_values()
val = self.obj._internal_get_values()

try:
sorter = np.lexsort((val, ids))
Expand Down Expand Up @@ -1185,7 +1185,7 @@ def value_counts(self, normalize=False, sort=True, ascending=False,
bins=bins)

ids, _, _ = self.grouper.group_info
val = self.obj.get_values()
val = self.obj._internal_get_values()

# groupby removes null keys from groupings
mask = ids != -1
Expand Down Expand Up @@ -1299,7 +1299,7 @@ def count(self):
Count of values within each group.
"""
ids, _, ngroups = self.grouper.group_info
val = self.obj.get_values()
val = self.obj._internal_get_values()

mask = (ids != -1) & ~isna(val)
ids = ensure_platform_int(ids)
Expand Down
3 changes: 3 additions & 0 deletions pandas/core/indexes/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -3804,6 +3804,9 @@ def get_values(self):
>>> midx.get_values().ndim
1
"""
raise Exception("USING GET_VALUES")

def _internal_get_values(self):
return self.values

@Appender(IndexOpsMixin.memory_usage.__doc__)
Expand Down
4 changes: 4 additions & 0 deletions pandas/core/indexes/category.py
Original file line number Diff line number Diff line change
Expand Up @@ -357,8 +357,12 @@ def _wrap_setop_result(self, other, result):

def get_values(self):
""" return the underlying data as an ndarray """
raise Exception("USING GET_VALUES")
return self._data.get_values()

def _internal_get_values(self):
return self._data._internal_get_values()

def tolist(self):
return self._data.tolist()

Expand Down
2 changes: 1 addition & 1 deletion pandas/core/indexes/multi.py
Original file line number Diff line number Diff line change
Expand Up @@ -1216,7 +1216,7 @@ def values(self):
for i in range(self.nlevels):
vals = self._get_level_values(i)
if is_categorical_dtype(vals):
vals = vals.get_values()
vals = vals._internal_get_values()
if (isinstance(vals.dtype, ExtensionDtype)
or hasattr(vals, '_box_values')):
vals = vals.astype(object)
Expand Down
6 changes: 5 additions & 1 deletion pandas/core/internals/blocks.py
Original file line number Diff line number Diff line change
Expand Up @@ -179,6 +179,9 @@ def get_values(self, dtype=None):
return self.values.astype(object)
return self.values

def get_block_values(self, dtype=None):
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

we really need this?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

is this still needed?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Well, the JSON C code does handle blocks (if I remove the get_block_values part in objToJSON.C that I introduced in this PR, a couple tests fail).
I could name this here the same as for Series/INdex (i.e. _internal_get_values), but I prefer a distinct name to make it clear that the json code is handling blocks and not series/index (and that also makes it clear that all other places where _internal_get_values is used is not handling blocks). That will also make it easier to isolate and try to remove the block handling in the json C code.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Opened #27164 for follow-up on this JSON issue

return self.get_values(dtype=dtype)

def to_dense(self):
return self.values.view()

Expand Down Expand Up @@ -2915,7 +2918,7 @@ def to_dense(self):
# Categorical.get_values returns a DatetimeIndex for datetime
# categories, so we can't simply use `np.asarray(self.values)` like
# other types.
return self.values.get_values()
return self.values._internal_get_values()

def to_native_types(self, slicer=None, na_rep='', quoting=None, **kwargs):
""" convert to our native types format, slicing if desired """
Expand Down Expand Up @@ -3216,6 +3219,7 @@ def _putmask_preserve(nv, n):
dtype, _ = maybe_promote(n.dtype)

if is_extension_type(v.dtype) and is_object_dtype(dtype):
# ?
v = v.get_values(dtype)
else:
v = v.astype(dtype)
Expand Down
2 changes: 1 addition & 1 deletion pandas/core/ops.py
Original file line number Diff line number Diff line change
Expand Up @@ -1893,7 +1893,7 @@ def wrapper(self, other, axis=None):
name=res_name, dtype='bool')

else:
values = self.get_values()
values = self.to_numpy()

with np.errstate(all='ignore'):
res = na_op(values, other)
Expand Down
6 changes: 6 additions & 0 deletions pandas/core/series.py
Original file line number Diff line number Diff line change
Expand Up @@ -506,11 +506,17 @@ def get_values(self):
"""
Same as values (but handles sparseness conversions); is a view.

.. deprecated:: 0.25.0

Returns
-------
numpy.ndarray
Data of the Series.
"""
raise Exception("USING GET_VALUES")
#warnings.warn("deprecated", FutureWarning, stacklevel=2)

def _internal_get_values(self):
return self._data.get_values()

@property
Expand Down
4 changes: 2 additions & 2 deletions pandas/core/sparse/frame.py
Original file line number Diff line number Diff line change
Expand Up @@ -493,7 +493,7 @@ def xs(self, key, axis=0, copy=False):
return data

i = self.index.get_loc(key)
data = self.take([i]).get_values()[0]
data = self.take([i])._internal_get_values()[0]
return Series(data, index=self.columns)

# ----------------------------------------------------------------------
Expand Down Expand Up @@ -694,7 +694,7 @@ def _reindex_with_indexers(self, reindexers, method=None, fill_value=None,
if col not in self:
continue
if row_indexer is not None:
new_arrays[col] = algos.take_1d(self[col].get_values(),
new_arrays[col] = algos.take_1d(self[col]._internal_get_values(),
row_indexer,
fill_value=fill_value)
else:
Expand Down
2 changes: 1 addition & 1 deletion pandas/core/sparse/series.py
Original file line number Diff line number Diff line change
Expand Up @@ -224,7 +224,7 @@ def __repr__(self):
def _reduce(self, op, name, axis=0, skipna=True, numeric_only=None,
filter_type=None, **kwds):
""" perform a reduction operation """
return op(self.get_values(), skipna=skipna, **kwds)
return op(self.array.to_dense(), skipna=skipna, **kwds)

def __getstate__(self):
# pickling
Expand Down
4 changes: 2 additions & 2 deletions pandas/io/formats/format.py
Original file line number Diff line number Diff line change
Expand Up @@ -129,7 +129,7 @@ def _get_footer(self):
return str(footer)

def _get_formatted_values(self):
return format_array(self.categorical.get_values(), None,
return format_array(self.categorical._internal_get_values(), None,
float_format=None, na_rep=self.na_rep)

def to_string(self):
Expand Down Expand Up @@ -1195,7 +1195,7 @@ def _format_strings(self):

if is_categorical_dtype(values.dtype):
# Categorical is special for now, so that we can preserve tzinfo
array = values.get_values()
array = values._internal_get_values()
else:
array = np.asarray(values)

Expand Down
12 changes: 6 additions & 6 deletions pandas/tests/arrays/categorical/test_api.py
Original file line number Diff line number Diff line change
Expand Up @@ -248,7 +248,7 @@ def test_set_categories(self):
tm.assert_index_equal(c.categories, Index([1, 2, 3, 4]))

exp = np.array([1, 2, 3, 4, 1], dtype=np.int64)
tm.assert_numpy_array_equal(c.get_values(), exp)
tm.assert_numpy_array_equal(c.to_dense(), exp)

# all "pointers" to '4' must be changed from 3 to 0,...
c = c.set_categories([4, 3, 2, 1])
Expand All @@ -262,21 +262,21 @@ def test_set_categories(self):

# output is the same
exp = np.array([1, 2, 3, 4, 1], dtype=np.int64)
tm.assert_numpy_array_equal(c.get_values(), exp)
tm.assert_numpy_array_equal(c.to_dense(), exp)
assert c.min() == 4
assert c.max() == 1

# set_categories should set the ordering if specified
c2 = c.set_categories([4, 3, 2, 1], ordered=False)
assert not c2.ordered

tm.assert_numpy_array_equal(c.get_values(), c2.get_values())
tm.assert_numpy_array_equal(c.to_dense(), c2.to_dense())

# set_categories should pass thru the ordering
c2 = c.set_ordered(False).set_categories([4, 3, 2, 1])
assert not c2.ordered

tm.assert_numpy_array_equal(c.get_values(), c2.get_values())
tm.assert_numpy_array_equal(c.to_dense(), c2.to_dense())

@pytest.mark.parametrize('values, categories, new_categories', [
# No NaNs, same cats, same order
Expand Down Expand Up @@ -383,15 +383,15 @@ def test_remove_unused_categories(self):
tm.assert_index_equal(out.categories, Index(['B', 'D', 'F']))
exp_codes = np.array([2, -1, 1, 0, 1, 2, -1], dtype=np.int8)
tm.assert_numpy_array_equal(out.codes, exp_codes)
assert out.get_values().tolist() == val
assert out.to_dense().tolist() == val

alpha = list('abcdefghijklmnopqrstuvwxyz')
val = np.random.choice(alpha[::2], 10000).astype('object')
val[np.random.choice(len(val), 100)] = np.nan

cat = Categorical(values=val, categories=alpha)
out = cat.remove_unused_categories()
assert out.get_values().tolist() == val.tolist()
assert out.to_dense().tolist() == val.tolist()


class TestCategoricalAPIWithFactor(TestCategorical):
Expand Down
2 changes: 1 addition & 1 deletion pandas/tests/arrays/sparse/test_array.py
Original file line number Diff line number Diff line change
Expand Up @@ -616,7 +616,7 @@ def test_shape(self, data, shape, dtype):
[1, np.nan, np.nan, 3, np.nan],
[1, np.nan, 0, 3, 0],
])
@pytest.mark.parametrize("method", ["to_dense", "get_values"])
@pytest.mark.parametrize("method", ["to_dense"]) # , "get_values"])
@pytest.mark.parametrize("fill_value", [None, 0])
def test_dense_repr(self, vals, fill_value, method):
vals = np.array(vals)
Expand Down
2 changes: 1 addition & 1 deletion pandas/tests/frame/test_reshape.py
Original file line number Diff line number Diff line change
Expand Up @@ -804,7 +804,7 @@ def _test_stack_with_multiindex(multiindex):
else:
assert_frame_equal(result, expected)

df.columns = MultiIndex.from_tuples(df.columns.get_values(),
df.columns = MultiIndex.from_tuples(df.columns.to_numpy(),
names=df.columns.names)
expected = df.stack(level=level, dropna=False)
if isinstance(expected, Series):
Expand Down
2 changes: 1 addition & 1 deletion pandas/tests/indexes/multi/test_analytics.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ def test_shift(idx):

def test_groupby(idx):
groups = idx.groupby(np.array([1, 1, 1, 2, 2, 2]))
labels = idx.get_values().tolist()
labels = idx.to_numpy().tolist()
exp = {1: labels[:3], 2: labels[3:]}
tm.assert_dict_equal(groups, exp)

Expand Down
6 changes: 3 additions & 3 deletions pandas/tests/indexes/period/test_period.py
Original file line number Diff line number Diff line change
Expand Up @@ -162,15 +162,15 @@ def test_values(self):

exp = np.array([], dtype=np.object)
tm.assert_numpy_array_equal(idx.values, exp)
tm.assert_numpy_array_equal(idx.get_values(), exp)
#tm.assert_numpy_array_equal(idx.get_values(), exp)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

remove and/or use filterwarnings on the test

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

lol, this is what's failing :_.>

exp = np.array([], dtype=np.int64)
tm.assert_numpy_array_equal(idx._ndarray_values, exp)

idx = pd.PeriodIndex(['2011-01', pd.NaT], freq='M')

exp = np.array([pd.Period('2011-01', freq='M'), pd.NaT], dtype=object)
tm.assert_numpy_array_equal(idx.values, exp)
tm.assert_numpy_array_equal(idx.get_values(), exp)
#tm.assert_numpy_array_equal(idx.get_values(), exp)
exp = np.array([492, -9223372036854775808], dtype=np.int64)
tm.assert_numpy_array_equal(idx._ndarray_values, exp)

Expand All @@ -179,7 +179,7 @@ def test_values(self):
exp = np.array([pd.Period('2011-01-01', freq='D'), pd.NaT],
dtype=object)
tm.assert_numpy_array_equal(idx.values, exp)
tm.assert_numpy_array_equal(idx.get_values(), exp)
#tm.assert_numpy_array_equal(idx.get_values(), exp)
exp = np.array([14975, -9223372036854775808], dtype=np.int64)
tm.assert_numpy_array_equal(idx._ndarray_values, exp)

Expand Down
Loading