@@ -83,8 +83,11 @@ def _make_sorted_values_labels(self):
8383 to_sort = labs [:v ] + labs [v + 1 :] + [labs [v ]]
8484 sizes = [len (x ) for x in levs [:v ] + levs [v + 1 :] + [levs [v ]]]
8585
86- group_index = get_group_index (to_sort , sizes )
87- comp_index , obs_ids = _compress_group_index (group_index )
86+ comp_index , obs_ids = get_compressed_ids (to_sort , sizes )
87+
88+ # group_index = get_group_index(to_sort, sizes)
89+ # comp_index, obs_ids = _compress_group_index(group_index)
90+
8891 ngroups = len (obs_ids )
8992
9093 indexer = algos .groupsort_indexer (comp_index , ngroups )[0 ]
@@ -97,10 +100,10 @@ def _make_selectors(self):
97100 new_levels = self .new_index_levels
98101
99102 # make the mask
100- group_index = get_group_index ( self .sorted_labels [:- 1 ],
101- [len (x ) for x in new_levels ])
103+ remaining_labels = self .sorted_labels [:- 1 ]
104+ level_sizes = [len (x ) for x in new_levels ]
102105
103- comp_index , obs_ids = _compress_group_index ( group_index )
106+ comp_index , obs_ids = get_compressed_ids ( remaining_labels , level_sizes )
104107 ngroups = len (obs_ids )
105108
106109 comp_index = _ensure_platform_int (comp_index )
@@ -391,6 +394,36 @@ def _unstack_frame(obj, level):
391394 value_columns = obj .columns )
392395 return unstacker .get_result ()
393396
397+ def get_compressed_ids (labels , sizes ):
398+ # no overflow
399+ if _long_prod (sizes ) < 2 ** 63 :
400+ group_index = get_group_index (labels , sizes )
401+ comp_index , obs_ids = _compress_group_index (group_index )
402+ else :
403+ n = len (labels [0 ])
404+ mask = np .zeros (n , dtype = bool )
405+ for v in labels :
406+ mask |= v < 0
407+
408+ while _long_prod (sizes ) >= 2 ** 63 :
409+ i = len (sizes )
410+ while _long_prod (sizes [:i ]) >= 2 ** 63 :
411+ i -= 1
412+
413+ rem_index , rem_ids = get_compressed_ids (labels [:i ],
414+ sizes [:i ])
415+ sizes = [len (rem_ids )] + sizes [i :]
416+ labels = [rem_index ] + labels [i :]
417+
418+ return get_compressed_ids (labels , sizes )
419+
420+ return comp_index , obs_ids
421+
422+ def _long_prod (vals ):
423+ result = 1L
424+ for x in vals :
425+ result *= x
426+ return result
394427
395428def stack (frame , level = - 1 , dropna = True ):
396429 """
0 commit comments