Skip to content

Commit 2b47439

Browse files
author
Roger Thomas
committed
Add comments
1 parent e96ec98 commit 2b47439

File tree

1 file changed

+13
-14
lines changed

1 file changed

+13
-14
lines changed

pandas/core/algorithms.py

Lines changed: 13 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -949,34 +949,33 @@ def select_n_frame(frame, columns, n, method, keep):
949949
columns = [columns]
950950
columns = list(columns)
951951
ascending = method == 'nsmallest'
952-
index_is_unique = frame.index.is_unique
953-
if not index_is_unique:
954-
# If index not unique we must reset index to allow re-indexing below
955-
# We must save frame's index to tmp
956-
tmp = Series(np.arange(len(frame)), index=frame.index)
957-
frame = frame.reset_index(drop=True)
952+
original_frame, original_index = frame, frame.index
953+
frame.reset_index(drop=True, inplace=True)
958954
for i, column in enumerate(columns):
955+
# For each column in columns we peform ``method`` on this frame
956+
# To guard against the possibility ``method`` column has duplicate
957+
# values that must be considered for futher columns (# GH15297) we
958+
# filter using isin on the values returned by ``method``. If there are
959+
# no duplicated values, we simply reindex like the values returned
960+
# by ``method``, otherwise we sort the frame and continue
959961
series = frame[column]
960962
values = getattr(series, method)(n, keep=keep)
961963
if i + 1 == len(columns):
964+
# This is the last column => duplicates here don't matter
962965
frame = frame.reindex(values.index)
963966
else:
964967
filtered_frame = frame[series.isin(values)]
965968
if len(filtered_frame) == len(values):
966969
# Values are unique in series => reindex and break
967970
frame = frame.reindex(values.index)
968971
break
972+
# Values are not unique in series => sort and continue
969973
frame = filtered_frame.sort_values(
970974
column, ascending=ascending
971975
)
972-
if not index_is_unique:
973-
# This below line of code is a little obfuscated. We are setting the
974-
# index of the frame back to it's original index using saved original
975-
# index stored in tmp. Because we reset the index on frame (above)
976-
# frame's index is now purely a unique integer index (as is tmp) =>
977-
# to restore the index to frame we can index tmp's index with frame's
978-
# index...
979-
frame.index = tmp.index[frame.index]
976+
original_frame.index = original_index # Restore the index
977+
# Below we set the index of the returning frame to the original index
978+
frame.index = original_index[frame.index]
980979
return frame
981980

982981

0 commit comments

Comments
 (0)