@@ -1394,7 +1394,7 @@ def _join_level(self, other, level, how='left', return_indexers=False):
1394
1394
new_levels [level ] = new_level
1395
1395
1396
1396
join_index = MultiIndex (levels = new_levels , labels = new_labels ,
1397
- names = left .names )
1397
+ names = left .names , verify_integrity = False )
1398
1398
left_indexer = np .arange (len (left ))[new_lev_labels != - 1 ]
1399
1399
else :
1400
1400
join_index = left
@@ -1856,7 +1856,7 @@ class MultiIndex(Index):
1856
1856
rename = Index .set_names
1857
1857
1858
1858
def __new__ (cls , levels = None , labels = None , sortorder = None , names = None ,
1859
- copy = False ):
1859
+ copy = False , verify_integrity = True ):
1860
1860
if levels is None or labels is None :
1861
1861
raise TypeError ("Must pass both levels and labels" )
1862
1862
if len (levels ) != len (labels ):
@@ -1886,12 +1886,36 @@ def __new__(cls, levels=None, labels=None, sortorder=None, names=None,
1886
1886
else :
1887
1887
subarr .sortorder = sortorder
1888
1888
1889
+ if verify_integrity :
1890
+ subarr ._verify_integrity ()
1891
+
1889
1892
return subarr
1890
1893
1894
+ def _verify_integrity (self ):
1895
+ """Raises ValueError if length of levels and labels don't match or any
1896
+ label would exceed level bounds"""
1897
+ # NOTE: Currently does not check, among other things, that cached
1898
+ # nlevels matches nor that sortorder matches actually sortorder.
1899
+ labels , levels = self .labels , self .levels
1900
+ if len (levels ) != len (labels ):
1901
+ raise ValueError ("Length of levels and labels must match. NOTE:"
1902
+ " this index is in an inconsistent state." )
1903
+ label_length = len (self .labels [0 ])
1904
+ for i , (level , label ) in enumerate (zip (levels , labels )):
1905
+ if len (label ) != label_length :
1906
+ raise ValueError ("Unequal label lengths: %s" % (
1907
+ [len (lab ) for lab in labels ]))
1908
+ if len (label ) and label .max () >= len (level ):
1909
+ raise ValueError ("On level %d, label max (%d) >= length of"
1910
+ " level (%d). NOTE: this index is in an"
1911
+ " inconsistent state" % (i , label .max (),
1912
+ len (level )))
1913
+
1891
1914
def _get_levels (self ):
1892
1915
return self ._levels
1893
1916
1894
- def _set_levels (self , levels , copy = False , validate = True ):
1917
+ def _set_levels (self , levels , copy = False , validate = True ,
1918
+ verify_integrity = False ):
1895
1919
# This is NOT part of the levels property because it should be
1896
1920
# externally not allowed to set levels. User beware if you change
1897
1921
# _levels directly
@@ -1907,7 +1931,10 @@ def _set_levels(self, levels, copy=False, validate=True):
1907
1931
self ._set_names (names )
1908
1932
self ._tuples = None
1909
1933
1910
- def set_levels (self , levels , inplace = False ):
1934
+ if verify_integrity :
1935
+ self ._verify_integrity ()
1936
+
1937
+ def set_levels (self , levels , inplace = False , verify_integrity = True ):
1911
1938
"""
1912
1939
Set new levels on MultiIndex. Defaults to returning
1913
1940
new index.
@@ -1918,6 +1945,8 @@ def set_levels(self, levels, inplace=False):
1918
1945
new levels to apply
1919
1946
inplace : bool
1920
1947
if True, mutates in place
1948
+ verify_integrity : bool (default True)
1949
+ if True, checks that levels and labels are compatible
1921
1950
1922
1951
Returns
1923
1952
-------
@@ -1930,27 +1959,33 @@ def set_levels(self, levels, inplace=False):
1930
1959
else :
1931
1960
idx = self ._shallow_copy ()
1932
1961
idx ._reset_identity ()
1933
- idx ._set_levels (levels )
1962
+ idx ._set_levels (levels , validate = True ,
1963
+ verify_integrity = verify_integrity )
1934
1964
if not inplace :
1935
1965
return idx
1936
1966
1937
1967
# remove me in 0.14 and change to read only property
1938
1968
__set_levels = deprecate ("setting `levels` directly" ,
1939
- partial (set_levels , inplace = True ),
1969
+ partial (set_levels , inplace = True ,
1970
+ verify_integrity = True ),
1940
1971
alt_name = "set_levels" )
1941
1972
levels = property (fget = _get_levels , fset = __set_levels )
1942
1973
1943
1974
def _get_labels (self ):
1944
1975
return self ._labels
1945
1976
1946
- def _set_labels (self , labels , copy = False , validate = True ):
1977
+ def _set_labels (self , labels , copy = False , validate = True ,
1978
+ verify_integrity = False ):
1947
1979
if validate and len (labels ) != self .nlevels :
1948
1980
raise ValueError ("Length of labels must match length of levels" )
1949
1981
self ._labels = FrozenList (_ensure_frozen (labs , copy = copy )._shallow_copy ()
1950
1982
for labs in labels )
1951
1983
self ._tuples = None
1952
1984
1953
- def set_labels (self , labels , inplace = False ):
1985
+ if verify_integrity :
1986
+ self ._verify_integrity ()
1987
+
1988
+ def set_labels (self , labels , inplace = False , verify_integrity = True ):
1954
1989
"""
1955
1990
Set new labels on MultiIndex. Defaults to returning
1956
1991
new index.
@@ -1961,6 +1996,8 @@ def set_labels(self, labels, inplace=False):
1961
1996
new labels to apply
1962
1997
inplace : bool
1963
1998
if True, mutates in place
1999
+ verify_integrity : bool (default True)
2000
+ if True, checks that levels and labels are compatible
1964
2001
1965
2002
Returns
1966
2003
-------
@@ -1973,13 +2010,14 @@ def set_labels(self, labels, inplace=False):
1973
2010
else :
1974
2011
idx = self ._shallow_copy ()
1975
2012
idx ._reset_identity ()
1976
- idx ._set_labels (labels )
2013
+ idx ._set_labels (labels , verify_integrity = verify_integrity )
1977
2014
if not inplace :
1978
2015
return idx
1979
2016
1980
2017
# remove me in 0.14 and change to readonly property
1981
2018
__set_labels = deprecate ("setting labels directly" ,
1982
- partial (set_labels , inplace = True ),
2019
+ partial (set_labels , inplace = True ,
2020
+ verify_integrity = True ),
1983
2021
alt_name = "set_labels" )
1984
2022
labels = property (fget = _get_labels , fset = __set_labels )
1985
2023
@@ -2392,7 +2430,8 @@ def from_arrays(cls, arrays, sortorder=None, names=None):
2392
2430
names = [c .name for c in cats ]
2393
2431
2394
2432
return MultiIndex (levels = levels , labels = labels ,
2395
- sortorder = sortorder , names = names )
2433
+ sortorder = sortorder , names = names ,
2434
+ verify_integrity = False )
2396
2435
2397
2436
@classmethod
2398
2437
def from_tuples (cls , tuples , sortorder = None , names = None ):
@@ -2463,6 +2502,7 @@ def __setstate__(self, state):
2463
2502
self ._set_labels (labels )
2464
2503
self ._set_names (names )
2465
2504
self .sortorder = sortorder
2505
+ self ._verify_integrity ()
2466
2506
2467
2507
def __getitem__ (self , key ):
2468
2508
if np .isscalar (key ):
@@ -2502,7 +2542,7 @@ def take(self, indexer, axis=None):
2502
2542
indexer = com ._ensure_platform_int (indexer )
2503
2543
new_labels = [lab .take (indexer ) for lab in self .labels ]
2504
2544
return MultiIndex (levels = self .levels , labels = new_labels ,
2505
- names = self .names )
2545
+ names = self .names , verify_integrity = False )
2506
2546
2507
2547
def append (self , other ):
2508
2548
"""
@@ -2618,7 +2658,7 @@ def droplevel(self, level=0):
2618
2658
return result
2619
2659
else :
2620
2660
return MultiIndex (levels = new_levels , labels = new_labels ,
2621
- names = new_names )
2661
+ names = new_names , verify_integrity = False )
2622
2662
2623
2663
def swaplevel (self , i , j ):
2624
2664
"""
@@ -2645,7 +2685,7 @@ def swaplevel(self, i, j):
2645
2685
new_names [i ], new_names [j ] = new_names [j ], new_names [i ]
2646
2686
2647
2687
return MultiIndex (levels = new_levels , labels = new_labels ,
2648
- names = new_names )
2688
+ names = new_names , verify_integrity = False )
2649
2689
2650
2690
def reorder_levels (self , order ):
2651
2691
"""
@@ -2664,7 +2704,7 @@ def reorder_levels(self, order):
2664
2704
new_names = [self .names [i ] for i in order ]
2665
2705
2666
2706
return MultiIndex (levels = new_levels , labels = new_labels ,
2667
- names = new_names )
2707
+ names = new_names , verify_integrity = False )
2668
2708
2669
2709
def __getslice__ (self , i , j ):
2670
2710
return self .__getitem__ (slice (i , j ))
@@ -2705,7 +2745,8 @@ def sortlevel(self, level=0, ascending=True):
2705
2745
new_labels = [lab .take (indexer ) for lab in self .labels ]
2706
2746
2707
2747
new_index = MultiIndex (labels = new_labels , levels = self .levels ,
2708
- names = self .names , sortorder = level )
2748
+ names = self .names , sortorder = level ,
2749
+ verify_integrity = False )
2709
2750
2710
2751
return new_index , indexer
2711
2752
@@ -3086,7 +3127,8 @@ def truncate(self, before=None, after=None):
3086
3127
new_labels = [lab [left :right ] for lab in self .labels ]
3087
3128
new_labels [0 ] = new_labels [0 ] - i
3088
3129
3089
- return MultiIndex (levels = new_levels , labels = new_labels )
3130
+ return MultiIndex (levels = new_levels , labels = new_labels ,
3131
+ verify_integrity = False )
3090
3132
3091
3133
def equals (self , other ):
3092
3134
"""
@@ -3180,7 +3222,7 @@ def intersection(self, other):
3180
3222
if len (uniq_tuples ) == 0 :
3181
3223
return MultiIndex (levels = [[]] * self .nlevels ,
3182
3224
labels = [[]] * self .nlevels ,
3183
- names = result_names )
3225
+ names = result_names , verify_integrity = False )
3184
3226
else :
3185
3227
return MultiIndex .from_arrays (lzip (* uniq_tuples ), sortorder = 0 ,
3186
3228
names = result_names )
@@ -3210,14 +3252,14 @@ def diff(self, other):
3210
3252
if self .equals (other ):
3211
3253
return MultiIndex (levels = [[]] * self .nlevels ,
3212
3254
labels = [[]] * self .nlevels ,
3213
- names = result_names )
3255
+ names = result_names , verify_integrity = False )
3214
3256
3215
3257
difference = sorted (set (self .values ) - set (other .values ))
3216
3258
3217
3259
if len (difference ) == 0 :
3218
3260
return MultiIndex (levels = [[]] * self .nlevels ,
3219
3261
labels = [[]] * self .nlevels ,
3220
- names = result_names )
3262
+ names = result_names , verify_integrity = False )
3221
3263
else :
3222
3264
return MultiIndex .from_tuples (difference , sortorder = 0 ,
3223
3265
names = result_names )
@@ -3269,7 +3311,7 @@ def insert(self, loc, item):
3269
3311
new_labels .append (np .insert (labels , loc , lev_loc ))
3270
3312
3271
3313
return MultiIndex (levels = new_levels , labels = new_labels ,
3272
- names = self .names )
3314
+ names = self .names , verify_integrity = False )
3273
3315
3274
3316
def delete (self , loc ):
3275
3317
"""
@@ -3281,7 +3323,7 @@ def delete(self, loc):
3281
3323
"""
3282
3324
new_labels = [np .delete (lab , loc ) for lab in self .labels ]
3283
3325
return MultiIndex (levels = self .levels , labels = new_labels ,
3284
- names = self .names )
3326
+ names = self .names , verify_integrity = False )
3285
3327
3286
3328
get_major_bounds = slice_locs
3287
3329
0 commit comments