diff --git a/category_encoders/count.py b/category_encoders/count.py index 69aa0af6..38fd1c0e 100644 --- a/category_encoders/count.py +++ b/category_encoders/count.py @@ -271,9 +271,11 @@ def _fit_count_encode(self, X_in, y): def _transform_count_encode(self, X_in, y): """Perform the transform count encoding.""" X = X_in.copy(deep=True) - X.fillna(value=np.nan, inplace=True) for col in self.cols: + + X[col] = X.fillna(value=np.nan)[col] + if self._min_group_size is not None: if col in self._min_group_categories.keys(): X[col] = ( diff --git a/tests/test_encoders.py b/tests/test_encoders.py index 0ec857c5..7791b226 100644 --- a/tests/test_encoders.py +++ b/tests/test_encoders.py @@ -692,3 +692,15 @@ def test_columns(self): self.assertTrue(result['float_edge'].min() < 1, 'should still be a number and untouched') self.assertTrue(result['unique_int'].min() < 1, 'should still be a number and untouched') self.assertTrue(result[321].min() < 1, 'should still be a number') + + def test_ignored_columns_are_untouched(self): + # Make sure None values in ignored columns are preserved. + # See: https://github.com/scikit-learn-contrib/category_encoders/pull/261 + X = pd.DataFrame({'col1': ['A', 'B', None], 'col2': ['C', 'D', None]}) + y = [1, 0, 1] + + for encoder_name in (set(encoders.__all__)): + with self.subTest(encoder_name=encoder_name): + enc = getattr(encoders, encoder_name)(cols=['col1']) + out = enc.fit_transform(X, y) + self.assertTrue(out.col2[2] is None)