|
8 | 8 |
|
9 | 9 | from pandas import DataFrame, Index, MultiIndex, Series, compat
|
10 | 10 | from pandas.core import common as com
|
11 |
| -from pandas.core.arrays.categorical import ( |
12 |
| - _factorize_from_iterable, _factorize_from_iterables) |
13 | 11 | from pandas.core.generic import NDFrame
|
14 |
| -from pandas.core.index import ( |
15 |
| - _all_indexes_same, _get_consensus_names, _get_objs_combined_axis, |
16 |
| - ensure_index) |
| 12 | +from pandas.core.index import _get_objs_combined_axis, ensure_index |
17 | 13 | import pandas.core.indexes.base as ibase
|
18 | 14 | from pandas.core.internals import concatenate_block_managers
|
19 | 15 |
|
@@ -533,103 +529,62 @@ def _concat_indexes(indexes):
|
533 | 529 |
|
534 | 530 |
|
535 | 531 | def _make_concat_multiindex(indexes, keys, levels=None, names=None):
|
| 532 | + """ |
| 533 | + Produce a MultiIndex which includes concatenated pieces in "indexes", |
| 534 | + prepended by one or more levels defined by "keys". |
536 | 535 |
|
537 |
| - if ((levels is None and isinstance(keys[0], tuple)) or |
538 |
| - (levels is not None and len(levels) > 1)): |
539 |
| - zipped = compat.lzip(*keys) |
540 |
| - if names is None: |
541 |
| - names = [None] * len(zipped) |
542 |
| - |
543 |
| - if levels is None: |
544 |
| - _, levels = _factorize_from_iterables(zipped) |
545 |
| - else: |
546 |
| - levels = [ensure_index(x) for x in levels] |
547 |
| - else: |
548 |
| - zipped = [keys] |
549 |
| - if names is None: |
550 |
| - names = [None] |
551 |
| - |
552 |
| - if levels is None: |
553 |
| - levels = [ensure_index(keys)] |
554 |
| - else: |
555 |
| - levels = [ensure_index(x) for x in levels] |
556 |
| - |
557 |
| - if not _all_indexes_same(indexes): |
558 |
| - codes_list = [] |
559 |
| - |
560 |
| - # things are potentially different sizes, so compute the exact codes |
561 |
| - # for each level and pass those to MultiIndex.from_arrays |
562 |
| - |
563 |
| - for hlevel, level in zip(zipped, levels): |
564 |
| - to_concat = [] |
565 |
| - for key, index in zip(hlevel, indexes): |
566 |
| - try: |
567 |
| - i = level.get_loc(key) |
568 |
| - except KeyError: |
569 |
| - raise ValueError('Key {key!s} not in level {level!s}' |
570 |
| - .format(key=key, level=level)) |
571 |
| - |
572 |
| - to_concat.append(np.repeat(i, len(index))) |
573 |
| - codes_list.append(np.concatenate(to_concat)) |
| 536 | + Parameters |
| 537 | + ---------- |
| 538 | + indexes : sequence of Index (or subclass) instances. |
| 539 | + Pieces of new Index. |
| 540 | + keys : sequence of labels, same length as "indexes". |
| 541 | + Labels used to index the pieces in "indexes". |
| 542 | + levels : list of sequences, default None |
| 543 | + Used to override the ".levels" in the resulting hierarchical index. |
| 544 | + names : list, default None |
| 545 | + Names for the levels in the resulting hierarchical index. |
574 | 546 |
|
575 |
| - concat_index = _concat_indexes(indexes) |
| 547 | + Returns |
| 548 | + ------- |
| 549 | + concatenated : MultiIndex |
576 | 550 |
|
577 |
| - # these go at the end |
578 |
| - if isinstance(concat_index, MultiIndex): |
579 |
| - levels.extend(concat_index.levels) |
580 |
| - codes_list.extend(concat_index.codes) |
581 |
| - else: |
582 |
| - codes, categories = _factorize_from_iterable(concat_index) |
583 |
| - levels.append(categories) |
584 |
| - codes_list.append(codes) |
| 551 | + """ |
585 | 552 |
|
586 |
| - if len(names) == len(levels): |
587 |
| - names = list(names) |
| 553 | + orig = _concat_indexes(indexes) |
| 554 | + |
| 555 | + # Simplest way to create and prepend the keys level(s): |
| 556 | + keys_chunks = [([key] * len(idx)) for (key, idx) in zip(keys, indexes)] |
| 557 | + keys_levs = Index([i for l in keys_chunks for i in l], |
| 558 | + tupleize_cols=True) |
| 559 | + tot_df = concat([keys_levs.to_frame().reset_index(drop=True), |
| 560 | + orig.to_frame().reset_index(drop=True)], axis=1) |
| 561 | + temp_names = [None] * keys_levs.nlevels + list(orig.names) |
| 562 | + result = MultiIndex.from_frame(tot_df, names=temp_names) |
| 563 | + |
| 564 | + if names is not None: |
| 565 | + if len(names) == keys_levs.nlevels: |
| 566 | + # Received only names for keys level(s) |
| 567 | + result.names = list(names) + list(result.names)[len(names):] |
588 | 568 | else:
|
589 |
| - # make sure that all of the passed indices have the same nlevels |
590 |
| - if not len({idx.nlevels for idx in indexes}) == 1: |
591 |
| - raise AssertionError("Cannot concat indices that do" |
592 |
| - " not have the same number of levels") |
593 |
| - |
594 |
| - # also copies |
595 |
| - names = names + _get_consensus_names(indexes) |
596 |
| - |
597 |
| - return MultiIndex(levels=levels, codes=codes_list, names=names, |
598 |
| - verify_integrity=False) |
599 |
| - |
600 |
| - new_index = indexes[0] |
601 |
| - n = len(new_index) |
602 |
| - kpieces = len(indexes) |
603 |
| - |
604 |
| - # also copies |
605 |
| - new_names = list(names) |
606 |
| - new_levels = list(levels) |
607 |
| - |
608 |
| - # construct codes |
609 |
| - new_codes = [] |
610 |
| - |
611 |
| - # do something a bit more speedy |
612 |
| - |
613 |
| - for hlevel, level in zip(zipped, levels): |
614 |
| - hlevel = ensure_index(hlevel) |
615 |
| - mapped = level.get_indexer(hlevel) |
616 |
| - |
617 |
| - mask = mapped == -1 |
618 |
| - if mask.any(): |
619 |
| - raise ValueError('Values not found in passed level: {hlevel!s}' |
620 |
| - .format(hlevel=hlevel[mask])) |
621 |
| - |
622 |
| - new_codes.append(np.repeat(mapped, n)) |
623 |
| - |
624 |
| - if isinstance(new_index, MultiIndex): |
625 |
| - new_levels.extend(new_index.levels) |
626 |
| - new_codes.extend([np.tile(lab, kpieces) for lab in new_index.codes]) |
627 |
| - else: |
628 |
| - new_levels.append(new_index) |
629 |
| - new_codes.append(np.tile(np.arange(n), kpieces)) |
630 |
| - |
631 |
| - if len(new_names) < len(new_levels): |
632 |
| - new_names.extend(new_index.names) |
633 |
| - |
634 |
| - return MultiIndex(levels=new_levels, codes=new_codes, names=new_names, |
635 |
| - verify_integrity=False) |
| 569 | + # Received names for all levels |
| 570 | + result.names = names |
| 571 | + |
| 572 | + if levels is not None: |
| 573 | + for i, level in enumerate(levels): |
| 574 | + if level is None: |
| 575 | + continue |
| 576 | + cur_lev = result.levels[i] |
| 577 | + new_lev = Index(level) |
| 578 | + not_found = np.where(new_lev.get_indexer(cur_lev) == -1)[0] |
| 579 | + |
| 580 | + if len(not_found): |
| 581 | + missing = [level[i] for i in not_found] |
| 582 | + raise ValueError("Values not found in passed level: " |
| 583 | + "{missing!s}" |
| 584 | + .format(missing=missing)) |
| 585 | + cur_val = result.get_level_values(i) |
| 586 | + result = (result.set_levels(new_lev, level=i) |
| 587 | + .set_labels(new_lev.get_indexer_for(cur_val), |
| 588 | + level=i)) |
| 589 | + |
| 590 | + return result |
0 commit comments