-
-
Notifications
You must be signed in to change notification settings - Fork 18.5k
CLN: get_flattened_iterator #35515
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
CLN: get_flattened_iterator #35515
Changes from 7 commits
2f5c450
071378d
b3af159
1c581e6
1cb2fed
dd8263c
a09af1b
52b938c
775ea23
f889efd
8e83f5a
0ce1136
4584c6d
68273ab
7793b9d
58ddb7e
eb2be1b
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,5 +1,5 @@ | ||
""" miscellaneous sorting / groupby utilities """ | ||
from typing import Callable, Optional | ||
from typing import Callable, List, Optional, Tuple | ||
|
||
import numpy as np | ||
|
||
|
@@ -440,36 +440,20 @@ def ensure_key_mapped(values, key: Optional[Callable], levels=None): | |
return result | ||
|
||
|
||
class _KeyMapper: | ||
""" | ||
Map compressed group id -> key tuple. | ||
""" | ||
|
||
def __init__(self, comp_ids, ngroups: int, levels, labels): | ||
self.levels = levels | ||
self.labels = labels | ||
self.comp_ids = comp_ids.astype(np.int64) | ||
|
||
self.k = len(labels) | ||
self.tables = [hashtable.Int64HashTable(ngroups) for _ in range(self.k)] | ||
|
||
self._populate_tables() | ||
|
||
def _populate_tables(self): | ||
for labs, table in zip(self.labels, self.tables): | ||
table.map(self.comp_ids, labs.astype(np.int64)) | ||
|
||
def get_key(self, comp_id): | ||
return tuple( | ||
level[table.get_item(comp_id)] | ||
for table, level in zip(self.tables, self.levels) | ||
) | ||
|
||
|
||
def get_flattened_iterator(comp_ids, ngroups, levels, labels): | ||
# provide "flattened" iterator for multi-group setting | ||
mapper = _KeyMapper(comp_ids, ngroups, levels, labels) | ||
return [mapper.get_key(i) for i in range(ngroups)] | ||
def get_flattened_list( | ||
comp_ids: np.ndarray, ngroups: int, levels, labels: List[np.ndarray] | ||
) -> List[Tuple]: | ||
"""Map compressed group id -> key tuple.""" | ||
comp_ids = comp_ids.astype(np.int64, copy=False) | ||
tables = [] | ||
for labs, level in zip(labels, levels): | ||
table = hashtable.Int64HashTable(ngroups) | ||
table.map(comp_ids, labs.astype(np.int64, copy=False)) | ||
tables.append(table) | ||
return [ | ||
tuple(level[table.get_item(i)] for table, level in zip(tables, levels)) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Is there any performance difference in creating an intermediary list to store the result of There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Good point. Eliminates a loop iteration and storing these table objects |
||
for i in range(ngroups) | ||
] | ||
|
||
|
||
def get_indexer_dict(label_list, keys): | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
you could make this a list-comprehension, maybe it would be slightly less readable though