diff --git a/pandas/core/groupby/ops.py b/pandas/core/groupby/ops.py index 38478be5a8e07..dd44bc6990d59 100644 --- a/pandas/core/groupby/ops.py +++ b/pandas/core/groupby/ops.py @@ -475,7 +475,8 @@ def _cython_operation(self, kind, values, how, axis, min_count=-1, else: if axis > 0: swapped = True - values = values.swapaxes(0, axis) + assert axis == 1, axis + values = values.T if arity > 1: raise NotImplementedError("arity of more than 1 is not " "supported for the 'how' argument") diff --git a/pandas/core/internals/__init__.py b/pandas/core/internals/__init__.py index b9530e15f71e2..bf46e5d1a74e4 100644 --- a/pandas/core/internals/__init__.py +++ b/pandas/core/internals/__init__.py @@ -1,3 +1,4 @@ + from .blocks import ( # noqa: F401 Block, BoolBlock, CategoricalBlock, ComplexBlock, DatetimeBlock, DatetimeTZBlock, ExtensionBlock, FloatBlock, IntBlock, ObjectBlock, @@ -9,7 +10,7 @@ from .blocks import _safe_reshape # noqa: F401; io.packers from .blocks import make_block # noqa: F401; io.pytables, io.packers from .managers import ( # noqa: F401; reshape.concat, reshape.merge + _transform_index, concatenate_block_managers) -from .managers import items_overlap_with_suffix # noqa: F401; reshape.merge from .blocks import _block_shape # noqa:F401; io.pytables diff --git a/pandas/core/internals/blocks.py b/pandas/core/internals/blocks.py index 652f70746f618..a131509a4ed10 100644 --- a/pandas/core/internals/blocks.py +++ b/pandas/core/internals/blocks.py @@ -722,16 +722,28 @@ def replace(self, to_replace, value, inplace=False, filter=None, try: values, to_replace = self._try_coerce_args(self.values, to_replace) - mask = missing.mask_missing(values, to_replace) - if filter is not None: - filtered_out = ~self.mgr_locs.isin(filter) - mask[filtered_out.nonzero()[0]] = False + except (TypeError, ValueError): + # GH 22083, TypeError or ValueError occurred within error handling + # causes infinite loop. Cast and retry only if not objectblock. + if is_object_dtype(self): + raise + + # try again with a compatible block + block = self.astype(object) + return block.replace(to_replace=original_to_replace, + value=value, + inplace=inplace, + filter=filter, + regex=regex, + convert=convert) + + mask = missing.mask_missing(values, to_replace) + if filter is not None: + filtered_out = ~self.mgr_locs.isin(filter) + mask[filtered_out.nonzero()[0]] = False + try: blocks = self.putmask(mask, value, inplace=inplace) - if convert: - blocks = [b.convert(by_item=True, numeric=False, - copy=not inplace) for b in blocks] - return blocks except (TypeError, ValueError): # GH 22083, TypeError or ValueError occurred within error handling # causes infinite loop. Cast and retry only if not objectblock. @@ -746,6 +758,10 @@ def replace(self, to_replace, value, inplace=False, filter=None, filter=filter, regex=regex, convert=convert) + if convert: + blocks = [b.convert(by_item=True, numeric=False, + copy=not inplace) for b in blocks] + return blocks def _replace_single(self, *args, **kwargs): """ no-op on a non-ObjectBlock """ diff --git a/pandas/core/internals/managers.py b/pandas/core/internals/managers.py index aff39d765dc95..5494b75ff9e4e 100644 --- a/pandas/core/internals/managers.py +++ b/pandas/core/internals/managers.py @@ -1859,48 +1859,6 @@ def _compare_or_regex_search(a, b, regex=False): return result -# TODO: this is no longer used in this module, could be moved to concat -def items_overlap_with_suffix(left, lsuffix, right, rsuffix): - """ - If two indices overlap, add suffixes to overlapping entries. - - If corresponding suffix is empty, the entry is simply converted to string. - - """ - to_rename = left.intersection(right) - if len(to_rename) == 0: - return left, right - else: - if not lsuffix and not rsuffix: - raise ValueError('columns overlap but no suffix specified: ' - '{rename}'.format(rename=to_rename)) - - def renamer(x, suffix): - """Rename the left and right indices. - - If there is overlap, and suffix is not None, add - suffix, otherwise, leave it as-is. - - Parameters - ---------- - x : original column name - suffix : str or None - - Returns - ------- - x : renamed column name - """ - if x in to_rename and suffix is not None: - return '{x}{suffix}'.format(x=x, suffix=suffix) - return x - - lrenamer = partial(renamer, suffix=lsuffix) - rrenamer = partial(renamer, suffix=rsuffix) - - return (_transform_index(left, lrenamer), - _transform_index(right, rrenamer)) - - def _transform_index(index, func, level=None): """ Apply function to all values found in index. diff --git a/pandas/core/reshape/merge.py b/pandas/core/reshape/merge.py index d21ad58e752c2..549c69486ebfa 100644 --- a/pandas/core/reshape/merge.py +++ b/pandas/core/reshape/merge.py @@ -3,6 +3,7 @@ """ import copy +from functools import partial import string import warnings @@ -27,8 +28,7 @@ from pandas.core.arrays.categorical import _recode_for_categories import pandas.core.common as com from pandas.core.frame import _merge_doc -from pandas.core.internals import ( - concatenate_block_managers, items_overlap_with_suffix) +from pandas.core.internals import _transform_index, concatenate_block_managers import pandas.core.sorting as sorting from pandas.core.sorting import is_int64_overflow_possible @@ -555,8 +555,8 @@ def get_result(self): ldata, rdata = self.left._data, self.right._data lsuf, rsuf = self.suffixes - llabels, rlabels = items_overlap_with_suffix(ldata.items, lsuf, - rdata.items, rsuf) + llabels, rlabels = _items_overlap_with_suffix(ldata.items, lsuf, + rdata.items, rsuf) lindexers = {1: left_indexer} if left_indexer is not None else {} rindexers = {1: right_indexer} if right_indexer is not None else {} @@ -1303,8 +1303,8 @@ def get_result(self): ldata, rdata = self.left._data, self.right._data lsuf, rsuf = self.suffixes - llabels, rlabels = items_overlap_with_suffix(ldata.items, lsuf, - rdata.items, rsuf) + llabels, rlabels = _items_overlap_with_suffix(ldata.items, lsuf, + rdata.items, rsuf) if self.fill_method == 'ffill': left_join_indexer = libjoin.ffill_indexer(left_indexer) @@ -1809,3 +1809,45 @@ def validate_operand(obj): else: raise TypeError('Can only merge Series or DataFrame objects, ' 'a {obj} was passed'.format(obj=type(obj))) + + +def _items_overlap_with_suffix(left, lsuffix, right, rsuffix): + """ + If two indices overlap, add suffixes to overlapping entries. + + If corresponding suffix is empty, the entry is simply converted to string. + + """ + to_rename = left.intersection(right) + if len(to_rename) == 0: + return left, right + + if not lsuffix and not rsuffix: + raise ValueError('columns overlap but no suffix specified: ' + '{rename}'.format(rename=to_rename)) + + def renamer(x, suffix): + """ + Rename the left and right indices. + + If there is overlap, and suffix is not None, add + suffix, otherwise, leave it as-is. + + Parameters + ---------- + x : original column name + suffix : str or None + + Returns + ------- + x : renamed column name + """ + if x in to_rename and suffix is not None: + return '{x}{suffix}'.format(x=x, suffix=suffix) + return x + + lrenamer = partial(renamer, suffix=lsuffix) + rrenamer = partial(renamer, suffix=rsuffix) + + return (_transform_index(left, lrenamer), + _transform_index(right, rrenamer)) diff --git a/pandas/tests/io/test_html.py b/pandas/tests/io/test_html.py index 9f9fcabbfe42c..33268b637d44a 100644 --- a/pandas/tests/io/test_html.py +++ b/pandas/tests/io/test_html.py @@ -262,6 +262,7 @@ def test_bad_url_protocol(self): self.read_html('git://github.com', match='.*Water.*') @network + @pytest.mark.slow def test_invalid_url(self): try: with pytest.raises(URLError):