From c14b5368ff966f638cc238bf11f5c2860fc81814 Mon Sep 17 00:00:00 2001 From: ian gainey Date: Sun, 28 Apr 2024 10:41:41 -0400 Subject: [PATCH 1/8] Initial commit for PR --- pandas/io/excel/_base.py | 435 ++++++++++++++------ pandas/io/excel/_openpyxl.py | 39 +- pandas/tests/io/data/excel/test_tables.xlsm | Bin 0 -> 11607 bytes pandas/tests/io/data/excel/test_tables.xlsx | Bin 0 -> 11590 bytes pandas/tests/io/excel/test_readers.py | 86 +++- 5 files changed, 423 insertions(+), 137 deletions(-) create mode 100644 pandas/tests/io/data/excel/test_tables.xlsm create mode 100644 pandas/tests/io/data/excel/test_tables.xlsx diff --git a/pandas/io/excel/_base.py b/pandas/io/excel/_base.py index cf9c3be97ee5c..fb7aafb63d26b 100644 --- a/pandas/io/excel/_base.py +++ b/pandas/io/excel/_base.py @@ -78,6 +78,7 @@ FilePath, IntStrT, ReadBuffer, + Scalar, Self, SequenceNotStr, StorageOptions, @@ -116,12 +117,32 @@ Available cases: * Defaults to ``0``: 1st sheet as a `DataFrame` + If a table name is specified and a sheet name is not (so it defaults + to 0), no sheets will be loaded. * ``1``: 2nd sheet as a `DataFrame` * ``"Sheet1"``: Load sheet with name "Sheet1" * ``[0, 1, "Sheet5"]``: Load first, second and sheet named "Sheet5" as a dict of `DataFrame` * ``None``: All worksheets. +table_name: str, list of str, or None, default 0 + Strings are used for table_names that correspond to Excel Table names. + Lists of strings are used to request multiple tables. + Specify ``None`` to get all tables. + + Available cases: + + * Defaults to ``0``: No tables are read or returned + * ``Table1``: Load table with name "Table1", returned as a DataFrame + * ``["Table1", "Table2", "Table3"]``: Load the tables with names "Table1", + "Table2", and "Table3". Returned as a dictionary of DataFrames + * ``sheet_name="Sheet1", table_name="Table1":`` Load both the sheet with + name "Sheet1" and the table with name "Table1". Returned as a nested + dictionary, containing a "sheets" dictionary and a "tables" dictionary. + Each of these 2 dictionaries hold DataFrames of their respective data. + This is the same for if a list of either or both of these parameters + are specified. + header : int, list of int, default 0 Row (0-indexed) to use for the column labels of the parsed DataFrame. If a list of integers is passed those row positions will @@ -295,9 +316,10 @@ Returns ------- -DataFrame or dict of DataFrames +DataFrame, dict of DataFrames, or nested dictionary containing 2 dicts of DataFrames DataFrame from the passed in Excel file. See notes in sheet_name - argument for more information on when a dict of DataFrames is returned. + argument for more information on when a dict of DataFrames is returned, + and table_name for when a nested dictionary is returned. See Also -------- @@ -375,6 +397,9 @@ def read_excel( # sheet name is str or int -> DataFrame sheet_name: str | int = ..., *, + # table name is str -> DataFrame + # If sheet name and table name are specified -> Nested Dictionary of DataFrames + table_name: str = ..., header: int | Sequence[int] | None = ..., names: SequenceNotStr[Hashable] | range | None = ..., index_col: int | str | Sequence[int] | None = ..., @@ -404,7 +429,7 @@ def read_excel( skipfooter: int = ..., storage_options: StorageOptions = ..., dtype_backend: DtypeBackend | lib.NoDefault = ..., -) -> DataFrame: +) -> DataFrame | list[DataFrame]: ... @@ -414,6 +439,9 @@ def read_excel( # sheet name is list or None -> dict[IntStrT, DataFrame] sheet_name: list[IntStrT] | None, *, + # table name is list[str] -> DataFrame + # If sheet name and table name are specified -> Nested Dictionary of DataFrames + table_name: list[str] | None, header: int | Sequence[int] | None = ..., names: SequenceNotStr[Hashable] | range | None = ..., index_col: int | str | Sequence[int] | None = ..., @@ -453,6 +481,8 @@ def read_excel( io, sheet_name: str | int | list[IntStrT] | None = 0, *, + # If sheet name and table name are specified -> Nested Dictionary of DataFrames + table_name: str | int | list[str] | None = 0, header: int | Sequence[int] | None = 0, names: SequenceNotStr[Hashable] | range | None = None, index_col: int | str | Sequence[int] | None = None, @@ -487,8 +517,12 @@ def read_excel( check_dtype_backend(dtype_backend) should_close = False if engine_kwargs is None: - engine_kwargs = {} - + if table_name == 0: + # The only time table_name will have a value of 0 is when it's not specified + engine_kwargs = {} + else: + # To read in table data the file cannot be read only + engine_kwargs = {"read_only": False} if not isinstance(io, ExcelFile): should_close = True io = ExcelFile( @@ -506,6 +540,7 @@ def read_excel( try: data = io.parse( sheet_name=sheet_name, + table_name=table_name, header=header, names=names, index_col=index_col, @@ -550,7 +585,6 @@ def __init__( ) -> None: if engine_kwargs is None: engine_kwargs = {} - self.handles = IOHandles( handle=filepath_or_buffer, compression={"method": None} ) @@ -714,6 +748,7 @@ def f(skiprows: Sequence, x: int) -> bool: def parse( self, sheet_name: str | int | list[int] | list[str] | None = 0, + table_name: str | int | list[str] | None = 0, header: int | Sequence[int] | None = 0, names: SequenceNotStr[Hashable] | range | None = None, index_col: int | Sequence[int] | None = None, @@ -753,12 +788,28 @@ def parse( else: sheets = [sheet_name] + tables: list[str] | None + if isinstance(table_name, int): + tables = None + if table_name != 0: + raise NotImplementedError + elif isinstance(table_name, list): + tables = table_name + ret_dict = True + elif table_name is None: + tables = self.table_names + ret_dict = True + else: + tables = [table_name] + # handle same-type duplicates. sheets = cast(Union[list[int], list[str]], list(dict.fromkeys(sheets).keys())) - output = {} + output = {"sheets": {}, "tables": {}} + outputDict = None last_sheetname = None + outputDict = "sheets" for asheetname in sheets: last_sheetname = asheetname if verbose: @@ -777,146 +828,266 @@ def parse( usecols = maybe_convert_usecols(usecols) if not data: - output[asheetname] = DataFrame() + output[outputDict][asheetname] = DataFrame() continue - is_list_header = False - is_len_one_list_header = False - if is_list_like(header): - assert isinstance(header, Sequence) - is_list_header = True - if len(header) == 1: - is_len_one_list_header = True - - if is_len_one_list_header: - header = cast(Sequence[int], header)[0] - - # forward fill and pull out names for MultiIndex column - header_names = None - if header is not None and is_list_like(header): - assert isinstance(header, Sequence) - - header_names = [] - control_row = [True] * len(data[0]) - - for row in header: - if is_integer(skiprows): - assert isinstance(skiprows, int) - row += skiprows - - if row > len(data) - 1: - raise ValueError( - f"header index {row} exceeds maximum index " - f"{len(data) - 1} of data.", - ) + output = self.parse_multiindex( + data=data, + asheetname=asheetname, + header=header, + output=output, + outputDict=outputDict, + names=names, + index_col=index_col, + usecols=usecols, + dtype=dtype, + skiprows=skiprows, + nrows=nrows, + true_values=true_values, + false_values=false_values, + na_values=na_values, + parse_dates=parse_dates, + date_parser=date_parser, + date_format=date_format, + thousands=thousands, + decimal=decimal, + comment=comment, + skipfooter=skipfooter, + dtype_backend=dtype_backend, + **kwds, + ) + if last_sheetname is None: + raise ValueError("Sheet name is an empty list") - data[row], control_row = fill_mi_header(data[row], control_row) + last_tablename = None + outputDict = "tables" - if index_col is not None: - header_name, _ = pop_header_name(data[row], index_col) - header_names.append(header_name) + if tables is not None: + sheets_reqd = self.get_sheets_required(tables) + for req_sheet in sheets_reqd: + sheet_tables = self.get_sheet_tables(req_sheet) + for atablename in tables: + last_tablename = atablename + table_data = None - # If there is a MultiIndex header and an index then there is also - # a row containing just the index name(s) - has_index_names = False - if is_list_header and not is_len_one_list_header and index_col is not None: - index_col_list: Sequence[int] - if isinstance(index_col, int): - index_col_list = [index_col] - else: - assert isinstance(index_col, Sequence) - index_col_list = index_col - - # We have to handle mi without names. If any of the entries in the data - # columns are not empty, this is a regular row - assert isinstance(header, Sequence) - if len(header) < len(data): - potential_index_names = data[len(header)] - potential_data = [ - x - for i, x in enumerate(potential_index_names) - if not control_row[i] and i not in index_col_list - ] - has_index_names = all(x == "" or x is None for x in potential_data) - - if is_list_like(index_col): - # Forward fill values for MultiIndex index. - if header is None: - offset = 0 - elif isinstance(header, int): - offset = 1 + header - else: - offset = 1 + max(header) + if atablename in sheet_tables.keys(): + if verbose: + print(f"Reading Table: {atablename}") - # GH34673: if MultiIndex names present and not defined in the header, - # offset needs to be incremented so that forward filling starts - # from the first MI value instead of the name - if has_index_names: - offset += 1 + file_rows_needed = self._calc_rows( + header, index_col, skiprows, nrows + ) + table_data = self.get_table_data( + req_sheet, sheet_tables[atablename], file_rows_needed + ) + tables.remove(atablename) + + usecols = maybe_convert_usecols(usecols) + + if not table_data: + output[outputDict][atablename] = DataFrame() + continue + + output = self.parse_multiindex( + data=table_data, + asheetname=atablename, + header=header, + output=output, + outputDict=outputDict, + names=names, + index_col=index_col, + usecols=usecols, + dtype=dtype, + skiprows=skiprows, + nrows=nrows, + true_values=true_values, + false_values=false_values, + na_values=na_values, + parse_dates=parse_dates, + date_parser=date_parser, + date_format=date_format, + thousands=thousands, + decimal=decimal, + comment=comment, + skipfooter=skipfooter, + dtype_backend=dtype_backend, + **kwds, + ) - # Check if we have an empty dataset - # before trying to collect data. - if offset < len(data): - assert isinstance(index_col, Sequence) + if not bool(output["tables"]) and not bool(output["sheets"]): + return DataFrame() - for col in index_col: - last = data[offset][col] + if ret_dict: + if tables is None: + return output["sheets"] + elif sheet_name == 0: + return output["tables"] + else: + return output + elif tables is not None and sheet_name != 0: + return output + elif tables is not None and sheet_name == 0: + return output["tables"][last_tablename] + else: + return output["sheets"][last_sheetname] + + def parse_multiindex( + self, + data: list[list[Scalar]] | None = None, + asheetname: str | int | None = None, + header: int | Sequence[int] | None = 0, + output: dict | None = None, + outputDict: str | None = None, + names: SequenceNotStr[Hashable] | range | None = None, + index_col: int | Sequence[int] | None = None, + usecols=None, + dtype: DtypeArg | None = None, + skiprows: Sequence[int] | int | Callable[[int], object] | None = None, + nrows: int | None = None, + true_values: Iterable[Hashable] | None = None, + false_values: Iterable[Hashable] | None = None, + na_values=None, + parse_dates: list | dict | bool = False, + date_parser: Callable | lib.NoDefault = lib.no_default, + date_format: dict[Hashable, str] | str | None = None, + thousands: str | None = None, + decimal: str = ".", + comment: str | None = None, + skipfooter: int = 0, + dtype_backend: DtypeBackend | lib.NoDefault = lib.no_default, + **kwds, + ): + is_list_header = False + is_len_one_list_header = False + if is_list_like(header): + assert isinstance(header, Sequence) + is_list_header = True + if len(header) == 1: + is_len_one_list_header = True + + if is_len_one_list_header: + header = cast(Sequence[int], header)[0] + + # forward fill and pull out names for MultiIndex column + header_names = None + if header is not None and is_list_like(header): + assert isinstance(header, Sequence) + + header_names = [] + control_row = [True] * len(data[0]) + + for row in header: + if is_integer(skiprows): + assert isinstance(skiprows, int) + row += skiprows + + if row > len(data) - 1: + raise ValueError( + f"header index {row} exceeds maximum index " + f"{len(data) - 1} of data.", + ) - for row in range(offset + 1, len(data)): - if data[row][col] == "" or data[row][col] is None: - data[row][col] = last - else: - last = data[row][col] + data[row], control_row = fill_mi_header(data[row], control_row) - # GH 12292 : error when read one empty column from excel file - try: - parser = TextParser( - data, - names=names, - header=header, - index_col=index_col, - has_index_names=has_index_names, - dtype=dtype, - true_values=true_values, - false_values=false_values, - skiprows=skiprows, - nrows=nrows, - na_values=na_values, - skip_blank_lines=False, # GH 39808 - parse_dates=parse_dates, - date_parser=date_parser, - date_format=date_format, - thousands=thousands, - decimal=decimal, - comment=comment, - skipfooter=skipfooter, - usecols=usecols, - dtype_backend=dtype_backend, - **kwds, - ) + if index_col is not None: + header_name, _ = pop_header_name(data[row], index_col) + header_names.append(header_name) - output[asheetname] = parser.read(nrows=nrows) + # If there is a MultiIndex header and an index then there is also + # a row containing just the index name(s) + has_index_names = False + if is_list_header and not is_len_one_list_header and index_col is not None: + index_col_list: Sequence[int] + if isinstance(index_col, int): + index_col_list = [index_col] + else: + assert isinstance(index_col, Sequence) + index_col_list = index_col + + # We have to handle mi without names. If any of the entries in the data + # columns are not empty, this is a regular row + assert isinstance(header, Sequence) + if len(header) < len(data): + potential_index_names = data[len(header)] + potential_data = [ + x + for i, x in enumerate(potential_index_names) + if not control_row[i] and i not in index_col_list + ] + has_index_names = all(x == "" or x is None for x in potential_data) + + if is_list_like(index_col): + # Forward fill values for MultiIndex index. + if header is None: + offset = 0 + elif isinstance(header, int): + offset = 1 + header + else: + offset = 1 + max(header) + + # GH34673: if MultiIndex names present and not defined in the header, + # offset needs to be incremented so that forward filling starts + # from the first MI value instead of the name + if has_index_names: + offset += 1 + + # Check if we have an empty dataset + # before trying to collect data. + if offset < len(data): + assert isinstance(index_col, Sequence) + + for col in index_col: + last = data[offset][col] + + for row in range(offset + 1, len(data)): + if data[row][col] == "" or data[row][col] is None: + data[row][col] = last + else: + last = data[row][col] + + # GH 12292 : error when read one empty column from excel file + try: + parser = TextParser( + data, + names=names, + header=header, + index_col=index_col, + has_index_names=has_index_names, + dtype=dtype, + true_values=true_values, + false_values=false_values, + skiprows=skiprows, + nrows=nrows, + na_values=na_values, + skip_blank_lines=False, # GH 39808 + parse_dates=parse_dates, + date_parser=date_parser, + date_format=date_format, + thousands=thousands, + decimal=decimal, + comment=comment, + skipfooter=skipfooter, + usecols=usecols, + dtype_backend=dtype_backend, + **kwds, + ) - if header_names: - output[asheetname].columns = output[asheetname].columns.set_names( - header_names - ) + output[outputDict][asheetname] = parser.read(nrows=nrows) - except EmptyDataError: - # No Data, return an empty DataFrame - output[asheetname] = DataFrame() + if header_names: + output[outputDict][asheetname].columns = output[outputDict][ + asheetname + ].columns.set_names(header_names) - except Exception as err: - err.args = (f"{err.args[0]} (sheet: {asheetname})", *err.args[1:]) - raise err + except EmptyDataError: + # No Data, return an empty DataFrame + output[outputDict][asheetname] = DataFrame() - if last_sheetname is None: - raise ValueError("Sheet name is an empty list") + except Exception as err: + err.args = (f"{err.args[0]} (sheet: {asheetname})", *err.args[1:]) + raise err - if ret_dict: - return output - else: - return output[last_sheetname] + return output @doc(storage_options=_shared_docs["storage_options"]) @@ -1564,6 +1735,7 @@ def __fspath__(self): def parse( self, sheet_name: str | int | list[int] | list[str] | None = 0, + table_name: str | int | list[int] | list[str] | None = 0, header: int | Sequence[int] | None = 0, names: SequenceNotStr[Hashable] | range | None = None, index_col: int | Sequence[int] | None = None, @@ -1603,6 +1775,7 @@ def parse( """ return self._reader.parse( sheet_name=sheet_name, + table_name=table_name, header=header, names=names, index_col=index_col, diff --git a/pandas/io/excel/_openpyxl.py b/pandas/io/excel/_openpyxl.py index 218a592c22b4a..c6924538edc22 100644 --- a/pandas/io/excel/_openpyxl.py +++ b/pandas/io/excel/_openpyxl.py @@ -606,14 +606,48 @@ def _convert_cell(self, cell) -> Scalar: return cell.value def get_sheet_data( - self, sheet, file_rows_needed: int | None = None + self, sheet: Workbook.worksheets, file_rows_needed: int | None = None ) -> list[list[Scalar]]: if self.book.read_only: sheet.reset_dimensions() + return self.get_data(sheet.rows, file_rows_needed) + @property + def table_names(self) -> list[str]: + tables = None + tables = [] + for sheet in self.book.worksheets: + for table in sheet.tables.values(): + tables.append(table.name) + return tables + + def get_sheets_required(self, tables) -> list[Workbook.worksheets]: + sheets_reqd = [] + for sheet in self.book.worksheets: + for table in sheet.tables.values(): + if table.name in tables: + sheets_reqd.append(sheet) + continue + return sheets_reqd + + def get_sheet_tables(self, sheet: Workbook.worksheets): + tables = {} + for table in sheet.tables.values(): + tables[table.name] = table + return tables + + def get_table_data( + self, sheet: Workbook.worksheets, tablename, file_rows_needed: int | None = None + ) -> list[list[Scalar]]: + table = sheet[tablename.ref] + return self.get_data(table, file_rows_needed) + + def get_data( + self, input, file_rows_needed: int | None = None + ) -> list[list[Scalar]]: data: list[list[Scalar]] = [] last_row_with_data = -1 - for row_number, row in enumerate(sheet.rows): + for row_number, row in enumerate(input): converted_row = [self._convert_cell(cell) for cell in row] while converted_row and converted_row[-1] == "": # trim trailing empty elements @@ -636,5 +670,4 @@ def get_sheet_data( data_row + (max_width - len(data_row)) * empty_cell for data_row in data ] - return data diff --git a/pandas/tests/io/data/excel/test_tables.xlsm b/pandas/tests/io/data/excel/test_tables.xlsm new file mode 100644 index 0000000000000000000000000000000000000000..0eebb8d5e6da8bf515eb3e5927c8d3db0b1a2452 GIT binary patch literal 11607 zcmeHtWl&w&w)F|F!QC~u6Wk@ZyW7FtEfCz@odkkIaCZnEJP;tb26qVhPSSn9bkliX zy`S&Zy{C%3i-I}Mo_me4rY!|&a0qk&6aWSQ0FVH@-S9?2o&f-0PyhfL00vB3#Lm_k zXzQ%6;$aVT(q(YBu_k^80Y;Sz00aI1e?I@lZ=hF2R=$S`p^fZ>s6Ekc<6HD}wxKsrZz$9gEMQmmz_>xdpAJNPTI#r>*D>I3t(Q8yOSpj~e3|u6x`$h}m=G^Hp zP$!h@Sm=Y>@O$=WG7=&YUE)2jsCtz`OeV*VqEL}=HBT#>zBq2ma2C>1gHdnN=J{0S zYX?WNtP{}DhH#JvoCF})Y`FQ`EWy|Hsl&w=AFpaolLv65`)YK!H@L znQ4FpmSIjVgrOYaQc=>>D7yzzo07)7dPGWGUdWk$S42)lo5Ie{C$cS61f#ZBR!-iY zQ)GNB&ke?f;O?Eh91b(3$QfJ*)oNddH_ou7 zhzIF~iiDg#)7^5Bc}+q&w=P@iy18O9;^Vje_gdU+=U(FTZe|P5=c7sl30Afo0%G_< zDt&we2PphazjdliWal7K%7Ekv57Mu`BhcE3k>SVnKf3)l7VBSJFN^=F+u$RKyO4pa z&x^6h!qRR+5-lVuK5rzyBGpCZk>f3Pyue3R!3_X^<=f_SH?*+G8?`$?a=FG<7J-V+ zLssur9-MOH-~vrU>6k3;P`28O;`;gG^JUsADGw^wwiuey#-g{()_ggWHpzJt}C8>7FIqe53X$B$lXmC%kZ5|F8GEh5XvR9KbeLx;ACVr zSK&QiO>*%aS5?`Z%c9aK+mV~pL*K-@^Gr0O1MS|MUM_P$iIffXnq}-&KSl0^uU0MF z$#9lOF9&Q->A>-ze*{s{(r-mFErcc-2o3<8f?^&L=*_rWGrHM1S{d2dS^WrbWvVuI z8Qka(KJV|Ld)>lq^n?|rV8JG6>XegA53*D&9J^rEo;gKXD5>5a@lcZ3q?A(cVWM;p z5(;c*Zl_MgWR$QGrdc-?y`bb5D(Kpk*=W_19bt?}!$=h`HP4z-nzlCLubR1Yzg*3k zs*6Rs66VDQC^VqeD3rk3w0we7o46EZD+|tgAB&;NltkNNL!RfM%~Vh_NeT1aRm{W2 z7DdN0Ti%5&oY0nYowrO{Z_9b>woOffIJXdaRCi z^{Wb}Ml*_gx9Za?jR$yaao@;bA+0twVVEVJiB>)iqwFjOk3rk|Np|xo?}DUJ6$@Mn z8`X5DkEdZ={>2(K9PrgvYtbo{(&7v!GPl-@+q4!N){-qs^nwH6yx62`#QF3>*owue zWyMCyinQaaqaOr5>%{QULr{H9t$E~Tz(L2{f%gtdHYI7Z`%pv^DVL1qguC?GqU)YV zZV#i)$~mrHO<Ln51jceaioO~p4d za;V9iquonab%V+U}^3YS2bl7|VELAqYLB&-Hq{#=Q!SP#W%k4agjT3tiXZ zlr-}@qM(L&Yi82%ZdyNAu>2^h8SB2TU4{AUbo1;)P!+-j&M9yId?C11W5?wT(~@=q z|7;Hl4m~Rs?BYeGapZu8c4btvx$1HNE*B>_bC|gKz7~}W?=WItFPu(@hB609F^5zF z+oki%i*3?7ycAP?^wnd6Nji8M5HI$hwd*<6M_Rx)?=3d?9a&XZVf79?3qM2{qr-}c z!{~3f@)#UB5$8-RSlSONbM^f`N=sXL+km4=kXTc(;X}CVTCcsx$j|`aQr1pAVV}Lz zc(nA8w94+TUriQe#_G8XAO{*Sj}Wv}~UQE*;e#cj14>9CsP>*g8-y!~zKbV1p>} z=T`4z4g@+oG5)+U|Jd_06BO+-n2`IIUmWmg_(1nVOui8=skNwUmtCCUVP41&Jk?+Vf>$bh}bh5Y1PO7yP=C=DF#?9-8UTvcMIigqnea zCM?EUR3h?$-MT%MF6HE9<@@ZZ_WqpaVuFwDidnvS79NOl@C|z{5?YHSMtNEo(PW9J z^bv*SRdUu^Be$K(B!gY62`j1XgOO)gQd*K`ZRH}7Ca$O6X+EQllpATzX^*Pd*(}f> zEU8v_XufAsWM^p(@>t(Ue%VLBiyp>kE0Ma{rfl(O%rUu|SY(ra=bXGfvFM#H#;Yq} z!nIxM+Iq%yjisyS17{~+)B9Ksda+^xaI(IahM0?IKcXg@xJR`c<(ntD`1mfk^eCP7gY zSCNs~SAh^ezyIe&CQ=)&TM$940ks;iKuVfe|L6m=D7>4`Dndk^7P{bu4t3wMZQr;k zlzo)Xe+a9VUEY7=EH=I)GDq1ztA|XxwEY3< zku}KU4PR!9*S0w7ThqePyi)sfJ|Fw?KLoFwEixc9T7ZT>WLU>0lnanS6 z$G^(TK+~IA8na)yTQ!WhV7jyWeXx2;DT8{=&+mV?3yM!NN$<6Q@b3(P1b>S>No4Cpt`v$h|x_$>WY1*DX?zBzCTm$ z?rMt|H0Y0FZXr4G2MUvODiA{%BS#Mtw!cw=q&H67@`)KI5hrh9S?6hF)nZQX<)J-M2B2$Y$im4S&)PF%tyKI0iyQYcz z9;aPi^r&^B9vjNR#a^Ya6U3+3Z4v;SglvN zV7Us+vHGTF=6Rq?UJgR9VBX3WPMlFTN-sG$I9ts*%HjVyeqS2Y3N=Cl04StCT2nu_ za%Xd(4UqBY_2(|Wuk|SguK}Y2df-uTM0aLn$IFmwypZGa&~r6OcSAxlF<5WZUQnrp z7X2CA_O!Ggn8k#&-;yU)IF$JtsHTk=W&<+W$538J@$}X3@U{oRcq2DP*6Aa@hpo2$ zevTNYtAx_jxA3Ioo07Behu4IpgOOtv)%%|El z&Gp;8?^|P~OD)bkcrE(gm$90&-Mgk5OXu!SzHOO;vwYc}-%;PgwZnF6xHxGinD%gU$QsR)XV}w? zrx0wnzvAWdv=o>Ze4d=5Jzt4#D+K4qIKQEC;YY`I!+$vZk|sC%eY&| zV2~4)Q4D+^{a;w>mwmj(!7>Jz6ZeEuN|Y;Ccd~}A>6J8!iqI$r&k{;GbWjZLUnv1f5Ii(mSDCQ60)xYW~>($jiv1shir6A%70{d>=-`npiadk1%;@!cN z4v3vv)0+anO3)mi$1_EXJCVMZ-imv(P0w{t65PAxe9Nre|5Ay{^KjB5kc*KlT2vT6{5^myZ1v+!z2y=ySOT?{<)KM#PZ9wKpooB_A) zFKL$#4P=p=WpANA4V9ub85@W{T6Jn|Pj;W-46FK5_#L9vHt0~L)5uxDz1MUbxB?q= zinR98BJmZfbE3ajt^A%vV0oMLvb0-kMbKskho*5QL(ZbJu0KLk6$1GsNBt)VRx5u} z1IpBQWa`b3D8u(bp-bvfUxug~DzR)}Sjf>Dh@EGPbvGEFe;DJPg3#8_>TD5O;mH zD+(Va%@p>fV$vOXKcdP=A=MOEABXeuUd2(i!Ggbh&6s~|KC7T(!;CUf17qC>99Mph zv$jMf*tY&u=#!nuVx?}n+FtOReivtk<}x%}$p-8qWB1^IG?Psyqb% zOYIMxUL5`n7ZtNbW|6sM(Cjxlx71D-eJ%4`N1vx|BL|020OcQb<=xn3+hPiH7~tzT zj%&rbIVs?2GXhVNsE*Pph9l@oC z()=lQP-om>B1T16GhN#t&!bjZMDXK9ZUlvnfC?QrwvP5K*C47HT z5DwF8{`KPg%&y0ef-=lxgS*n%w|GOiZNUtrdj$GfbW$fK|iuCFv_ehdn+;TxLH#rJ?B8aroZS>Ke`n0mp4l5qz?E7+&3Q{8D zWO2@ArJ*_AfDA;SbpKZy8-?cK92|sZtL|?~N0mpd2kkx|W|CwMGe011s!TSawz8_f*MV*f4z!dgt+9V4fMqh(6zy5ZL zjAk8~k9Ls_bs}FX>opFM+_>dQsSkGw!49Cl+dkkwP zZ9jV^34Os2=KE5cvA(oT*vzz&D}W!}A5}St52!f2!cEfacv-}@kmtksT>RoK(u((O zlYawtmXniq=lfNcqgOvOnWsH$`Ae$FVo)Nh4w^wB{t@V%oIR|8PCv5Od{rB(1t#PN z{x81BH-|&6>vXX7(o@q4J43ky`=v(1VMTh8`T3)L4_@-FdidP+6Fy<`g;P#ri)6Z! zrD|}sz&^)v7JS`6Y-ZKn@KV*_4griV2(rDRlA`f&pnE@E`;dUIRQUqzWuxf`$s2{2 zJyzT4I3)Ettda*owyFMET6mHKbf3b%^f75^`bxl|hODMa3}GCim{#8rAxnNbD}a@v z$J@P*hd*>Bz!a~}Tr5v~=}i5`5--X;Y~I}3XgXaae)N_yX5;;Y+Uy~ieOQH+UUh?W zp^22ahcMX26^R%u?0U_-_L*Eh3bXZan>VMtRe7e^`z0!9BQV8f|GAn;#*$<0rMXv| z#rI+LDsNtNlgmrf*9tw)4;XbJ6r#$Pj+%TV{Q;y%H!q- zgqvGvdX7`Q(VkPHS95r^`mCOWnDbqR7%FP#tb?z;xy1Qq?71N}b{yba16DCc2u9DL z!&&hlh|N@bIqg4AyK;BCm+mwImQ|!7%hAn8z`^QW-BBHGXW|`s)7LQXf{epIccoWs zzVB2yv#8yucPxh2!+N*p`T%Thq+c^Y9evTY6Hn2_md5CB;>@Z~7jA(a!qEY(yaLXe zi>KuQ_rb~(QUs{gUqGB%J27grlstan6kOPi*m$}S~E)(xG2r<-; z5hvz88R&y=yiP|oQR1m~3!`7;jNqrp*t?AH=8liUlXjglip2(?Zk)%&c7T}s&koj2 zloU-Vh|6c7AM`&tZEolYG*NbTw6Hb%Np0(BA-HZPWU(uMH<+3Ri-=H(cm}v^K_uk) zDnzz~`Lyt*rSQ-UhUMGG9*)nf&v*UMLPRmc>7awA^Is~Mt$K1DMdxbp*7-;TV(JYj zG#&IYMQGLcVtU4%B4a+$Fi&C%oDhCalze^_!zF|wAoRZGIa>j<3F7%%De!yf}ElN)um*A zIQOG5_h*ykk51jMR?E*mU0_Vt4_`%({2!4oZ6`gE;|khB$4hsh%f8hk)=LhEPjqms zhzT9D?LWUYEwm&&BR@o+^DKOu2nplZ=Qe|gJ6hOd3Ga{A{QSj{L{9>Cx@S{SG@Iej zE8h7|Qe}KAY-Ko}_g^B}$fSt|fQ45MeTn^?MQ^sk!HZoKtyS(!1n)nh+Z2-2>%y*` zBXE5eSh#9quf5uOnIJ7ds?t3yiWBOPn=dqU6%2|bcXUO~O8T3-SRmK_>HHh^J&pg7 z^FNF4|6|TqCx6#eaCzT8^qKLu&W9rbz%_E7FR;WSJq}T!Adx!J?UPa{kb#c^=N=d+*Wj1`Z7= z1n!%CaOH5=qf8JwpPNxEwL5dhFouXLql<%Gv`)YX8-v`2Kj>pdw>jfqTL9r3ohovm z{mR{RSa_!R)E^{EqfADZ(`^jAdNhsQGI{18A=b%bWvDn&wyGb9LQS@2$Mdv?v$;-4 zxmTaR!i1>K7O+lwnO6#Si1-EX2C1Ec!=BnQuiSQVqz7!V{@oS_VGkA4ooeJ~9fk9R zvW=0`cG2~gOBq(8zTxG-aYqp?j-A{qO)aYkGa-jkr#u)uP2h396RNvu-psqs z;xLMRP}#F83#_sY=fL!wZm+_#pj*V5aC&sQR*g!S8rS@YMK;{=TNc10cnMU3Sitdn z7W~n5iXXNG#ci=G$QvYxD>g~RD#A+qA~{G1^A__-Bm?px%ApWJrLd@Xk9ty41XxS8 zi;^jstXI#?oV=#tc;UC-vt#7bg84C(ALP%eA1@dOIKp_Z6s!;7Dt09tAG_W@EK(l7 zpb#v3+3p{L9E@tbOc=Q31B5nQleEj)B6(=$y9_{UJ%c9kSK}lxiDFNIVhB`f@CBdv z`kDxOT8r504k_rcZ!P?7%|{iql!L?R>=eW!ymc%0!GSF~ z$k-Lejs4WdWrz7h=mM|5i#WggU?4lptq1ecezRjfB_!Z=*aztq*Pea@ykdX*mD61T z?yiN+dpEVHdH3=8iNHqws;*Y72>$+jWtq!#R_gRE+brcoRL1}%HMSzT1OiJ}7rhsA zuf(fwGM)SsurpTSOMDMbkmaPlW-RGN12F|6vumAlD>ojNwih@=c2nuc}paG1Dow0(WoxKyIv7ICEM_L9tT=1W{ z3rHN1@rv>ROz1(&vUf=NM+ydf#!HmW7;1Wfd;RLIK;Z-qqfDN$fRF%;*a(m)Je%zJjx_bJXGH5f{?krJI z>kwZ$6A~$8uy$55DY3O(0g^Tw1Va(GeGIQY8Oh^5{JuP%Zl zcr0h7;b8qtfgQbM83SBpD-*@^O>kGYehY*E2I`gnkmjt-JI}K?kRMV>9N))O9DQw9Y|cyH>kS>!f4~a#%EZI z-#ts@YQM(b@L7DlnQDRY+j{cQvbgivn0m!oO(aN9vhRf10trg${q=*~5+iKytnWV5zYjBOnSHxvr!I>9e8GnzgmUNuBam&Rv$dG z;tw2?!cejbU2u+lTap=+VUVMJOs?+XL%k}oB2C4(hECPj>+&iGkv`zy`>~I2I6~QH zm&mA^;A_SUA~WSq1;xuV^S6qQ3^WA`ey`=C<+#-ymrAnO5v@5j)C+_ZPHl2+yUwK- za8ndYqh|;-;HX8ex~;Us_9uwXIfHg&9Q)9WFY2psb}30I?vW?aW&nXw)rR=bH^1_H zSafT{MbOE_yM2Qa806I3mIrlgSH~X4PfCa_zJy{ndq%(QjrNT=iqq-b5D(#G*^J|D zR9}upYRU4bz^d-o5Im9N=5J?j=RZ>l4fdxoFe6UWGjg%N9w>PE0N5gpzg^?Mrz^kg0RSv=0N@|V%hLz=d&v2#c|7G`%>N8P3eu1u TCH*)_h6LCLiBXmM$8Y}yeRl9a literal 0 HcmV?d00001 diff --git a/pandas/tests/io/data/excel/test_tables.xlsx b/pandas/tests/io/data/excel/test_tables.xlsx new file mode 100644 index 0000000000000000000000000000000000000000..07b6eb7d8860f9671d1cafd7512558ba4f9032c4 GIT binary patch literal 11590 zcmeHtWl&t(x-CJ12Y1)t?hqunI|SFpgEbC;(70=G4Fqr8-GT=TL4yVhPH=yn?0s(b zVefnE{d}+P=_=ML3ck_nTXT&0*;J8-fyIV;3WWd#1w{dcC@B@-_5=zF|0xs{78C-s zp16a(3&7sRNW;?+05o9ncx^}i9u}G|2MQYU|Nr^?FMb2P8pAd{Z0N0c*Jx5Rri_ER ziI~qrgri^Qe21r-7Ql_8$<>@(KWxz=O#P>QjwRN|sUCKOm(c3kLA$`ZqPhh2?8^4K6BDIdCKv3Vu@5!p;_S_tbx3Ud^2<}8R?rG5fB$=suCWCPz5qSA zLNZ6YV_gZGu?kk05aqzrL=g&Wa@@Fb@RC5TYMR1xZgQ&)6OB;kN3F75qcd&B=Mr@P z#2-;{9j_)~#HHS`!!vFiMVqrN4UK2qDmoT<7fyT7P^2P~ElDVHTxy-L?rJ03YR`Gb z;=ZeA(tI+ByPGBun|ne|tp$}ktrMxkoXEc;+(VWS*Uz@6BLnO}@X3vy5c!V4<=*-2 z~r@` zu(&e^TC#H2L`mAt6bAZb&WgF#Yk5CYYP?pjy_{nm#Ok1QX`^f(7$~TR2N)=of0JvS zCL7f$go6qYH6lagYUB*C1G2LGy#7a~|BJQx7uU<;6_tC~P=XI-ZbJqxzAVO~zm#_u zlWC#Q@O>-$6|F8RkD6$)<2f<924Mht99-GJo3oQJmhLx`RPY^DB$QuaZ!#5U0BCACSMD=lM{#yV zsHtwnZ(V7cSljK0hEK4h`~VJnUHA9h_}V9UN?b#~{ahr+M2xk0`8wY4LAmpCOgjyX!-{fqe#>!Kh5OC~It?q^XO|PeQO{_f zYbSNwKB{DJQE73Qa;)_c1DI14T(FreY3f-p!Jq{wD`FJp&i1hN6}mYZ``(;{cHw$w z3?=$S>63nOU&B7nC(pt8gyvP8JtkWn4mkCu>IEt1q&ohs5+*xPtZlM_#wKo8a=-D^ z@CO-BjY5M##yYsTR%4{?h9;}|Ct!sbf5udwZ9+!y@Kf>wNs1|TG#f&|PIg6!ZuJfH z*CpfrSs4%gOEXwh0woYQAWtrCs2QBR1DmtY{2k_jy_($@3-rkZC0hJ_{d&yHEyppr z=0o$1gQ~B1Y?1f)r~$QJ;7jyY_xJqX_Ps)a2H~J(yAQUN1y>_-nZ?~)NYf>JSmBoH zK$Op3pO%>h$`J)rJ6Q+L!S#gE*L5DQdLBSj%)a_s099hP_L5N?NYp#Rs^HL)cQSz48gk zdEGk+joa@B??mTJxNn0cbV}~(yN;Qg_Aj@{p`^c~akQ#UzCHGIc~abH5fh6q<5yq0 z01R!t*!K))FBLv#04eWJI z(=4C};$LMB7>@c$wPb&|$xbslY&q93vgHL;Ea2&E7gsgmXlhlgLMe2m7n+0@Z>97# zFSp{P4vDeM!qx3f3p-T}`Lolqz=_9pSnzOX(>o*N($(t|$1uxDa}HBc*{sqj3C)C= zDs*SX-G!_9`9s&W^OhK%&RnZ~bX4Cpl*a3uoUt4@L9xw#RK-w=at52V_8v@%=ULiN{E;jfd+E)92gR_FgwgzEEtRYr|IzJ>~ z(NI}N=CQfO<5kb!Z3tE{jU6m+D!8hQHpkyIRVNOSkcK zr>iE0ZJtjbtwS=rP}aU653S>DzIcWcDN_k%VdWW{wN+9!ZxN*|=2J6H?FmTK&?Dvo z2f6^)oVCG6(yHpYtL6iFz8B2MzvZsTk!9CH!r{pec#M{k_n?4CDsfWH0LnWr4v^U z>^A7BbOlqFf%mvm96@|mlA;gos+oRy)}E+J$PK$KGP;Wtrg^$J(Nu|;%n^m)DkVGJ zk(*9+iovecgq4)`!N?OlIbB)HHn4c4x!Vt)RNql&+VxbI)CW!cEDrbz8@d%ihN}#k ztW2FjLAz_&**z4Z=wY0;61nRw+7{2oZ1anWMK1aGE=gMxi#}uSDJcf4xMAGz z)o-~=GZaZK2V1s0JhfSX4K%m%D)Le#nOUK!Co)~5CX$(~r;BlTkF!KHq8uSEtB$Ru zX~mY-KPdEV4&?%C?#B4+@PRQe0pkb~wI3cJtNHLyO+PB5$m}G`BwJfUKg6m{SFOpR zp+?6dGs5_^x=bMvn*l`;GXbo|V4xO}pnf7}_YUPWQsLd6UA&kL395e?hoCso9fu&% zoq16a);K~=w`pa@ladV1>I{sM4CLwzc6}wXZMMeUNEye>8VsJ9CQ%6tH}R1f=|I?D z-~a2P5UGdMEs7%9fLRT-KuMWc|KN+T_;M$oQ=E)GHFUup`{`ZFmSf|hSk_^_@ByM$ z7P$Y~MRI&we2%t%#t@yBJprs@>|91BKs7?Cl|v|yXT1?Og(vs9we0zi)j^9 zY~I4W9dkI=6^Xg!spkaHrQ1d%exvVuU(1Kz0z`fLT*k^kjOef>gZ(+-_*X>*cxDS5 zGwury+lCQWTn}#Fe4L0LHnX(hq%6dywh)sYU8Gk;6Sh`&1g2t6F?TIeZU-+vuaRbK zy3M%zvo(wNN-=y_{z>&8$**mhmg%P6L>B1B=6%HY_D+8&a?F9jn!e1b;2ev(1~}I6 zW6N%|u`%BMY&j^bV5Hmd8PR2MxX83xqM^<)5z`epQ%iU7GT_8e3-0bD@tth7+*X%3 zc0rfRHTKh!bqb^*om84RyS6Ng4Wnj#4NP|r6-lNkI0MNmh61}L{JRtN?yk0oLF4`? z_7;j`5a1;>p9=X?GxX?T()PD%aLi_jo4zsQ6jIbJ9LxQ9_a|1X4oMm+Cx%zrz)K|O z1rH%sdL*KZIPF4iKL4cN$pk)u2#${t{#Wfwm4+QPX5{Io5 z9ru=fY;R4b+)4a^HFbsE8R^H^Q!9OxVs_E;ke zORGG{B`+JLS2S;BlOWDC3!|4B28OHV6yxCkIewoT*NQd5LqTCs{%l44+R9z50Ivb8 zzplS_@jcy7F+>eG9q=Bir64{NshZ=LcS^?+n&uWD|o8M;%4gS{SjPAZ<;_ z`$Jn#$onsO(S<*?di%6#J%-(wO7S67$XP0FH9WlSUNqj+ot1Oy@WuURTYo=q4DceM zG$j|Al6pgS2KnHUlyWd~%({BdOPThY+=95;s(CblvdoMg*7rBghu>lj)?jerQpjVK z*Pk4C#i@N4t7EWai`*c|iih>pbHp%%YcSQxMD{`SGSfnR)1ec^t|*kaf<7-JheUYp$qurN$#4S|q# z--j|zE3P}Y6f^mpy~*5`&qy{r?fD(`J^b5THztdd4x*{|*9V-@f_WxA-9#$E4tp!! zzO#o@Os!7|oplofeTIIRp=y}FY#bj`4&Q9_(SJ$Qyx&Z(OQ<;DbEboRrIBfqmBtbUjG zWJUtD7qD4Lt5}3sVM+ruUL$56qGm|xBB|l&vI>+6%!tJP_8~iQ;Y=%ogq(Qg%63@7 zII#fz&YMz8+?g7gGHln8TefiK751&Nv7paTXlME~$jVxSlP{WHy>p6TZ+HCkqBCq` zWr88-RL+0_9;3=Ciz9=sl^Ri1=Amy4(Mf30GEvva)ZOBc^=_%|>dp}{--Wk*`lR@lpo|bHOnL_Lvr>iJ`k=4!xjGlf zYV`Rsj8RCi3v3L*iEN%DOJVjpe&yqXn*tMa84nVhgZ>81w^e?W1*~ zG^R@;$h%f~*E$f~CciB2-dY~C(ZQ=@R>_jR=wbjuX{th@KIg6f1j}g)qBN#Wc~7O? z42Lm%7Zkdr9W^^d-%yG7di5z?WSQ6Vg2CPJY%C_&j$2QgY&5O2Mw|}$wi?z7V#8AB z^HtxVx=-j8@#@eEgZ699%&+a`i4GsyYv&r09FWr%K%4bBPHmt!v7mw?lblw1-4_MN zScm&JOmt?JHlGIKxWHtH72GH6C}~-V)(ZAFZ>?dIuRV>SksGjQ2gtj=I247CQf9pL zqhm7|`7ol%N+Z`4SRY5gbEn~~*kCOTUNaLOo6juhShu82e1-GP7lu%Ij<2>vBiO$F zhu9|v@x@AmG_Bp>w@Gb?VUy-OVQB#pww#xQn9aa*-&ZcgOps!@PgAe}%0{oE)0-F6 za8^E3WEq)51uZ_kKm79fCUS5H0}A}nK-ryZrY)v0n+3U!_o!B~n~w&W zF+K439sQ9xtV55dOVZ7O2#zD~nQNhAv7~akX$>I~A^1Gxs7{G;#u-L_C>2DrjXCXs z5HTvwnc>z3cN(?Ip+w=4?|ad0hZw@jNmGsUz*9_vqlQ`)$q|uWmT>jFARM9B>g(C* zi9?S+4Q-hDxsLO?bI%*^T zma+F&G-=iEJR&K23BjV??n*SoWU#5FTiByL^{Hi{9kzn%S$AOZa!N9@Bq_dSwV^qo zfOJ%VeE-+iuT`3dvk6d|ZM(m#9abK;?zj6^OeZm(_bXLd>sm`Xq#xK_uex1fmYU+# zy#h4dc#ZDc#go960Mk7#+Bzh_DI@O-2o5(*GHXv%)91a*5ae2vfia8q>4Bdy9a_!; zzMd6*5$r9fuoKIq#S2D zkgt96pUEVMDgEm6O@(`jcUcs6_1=NJGftF|81AeR?-@gY$#~gRk4ep>{THuyVzb1- zFJ^1gzb$Q%HnXkdh!98jM^#P|LzN#~5WX|);3?u-$n)iUCUusJw&HWs1Zu#~1OoLs zKdibQO8-h{9yhU-&*>(MA&IOuWafnWN1z9~c-jGgKeN~TpWQh&^n2l1KlJN^A-8W# zi1qTHr&P9wa!B?{O^3sZ3?uXNNBi!*mE8=91?ne!!;}j@1IHGr3?@spkZJ*a&R`B= zgFt+C&7JU4&EO6ZoGw_Z-J+7B@o<1gKU4dVh@TvI0r9-iVua$Y3Qv#iRvH0Cy*{Vx zevo|%C{ve6mW1h3_-r4Wu8yA!5@yJ1ip&ts0ft5OEg8D(r;`FiIcB1r%Xs7iHxgW_ z>WoEjB99CGTN|P%tFU=1JJYE&@%Ygj+L-kZ6IwF|(2il{wuaRWE`{cDR-P}R*Doj} z5fQ)D%E`#Q3CRt04^p@tNH;V7V>NVaz@1|Ck zXRZ}{mLD+cN-9aSOI1!($n2HVw~a@pr#|?xd^w^mFTsb5i63Dlvo~SRkBNBgC2{e` zEI7U&*jz3nMbNlb4kG)6D>*C8XKV&vMc!)%AH3%yASO5H7Ba9=?>ZCa-dGt@@3MiH zKs5s2&3u3An>^3o2<@rs)I1rEWF&{?%$1W7k9fSD-5GO@(gKc~A9&f^!q9V+;)C^! z7Q33)yVZB~IK=A3d5DRsUiLSbwYTR4-_5+%CC82e{A!@f$4H@>dG-0q??v(1O3$Z2 z>y6mD7uQjfO{( zL_M6hyKeV@=0@fwQqyTt6lF4mlsuYUaSj8^{={O28lTlNOf5$Jnu61Nrjcf{1|a;<(rN@_}2T! zs18Ov-A-Zj^Xw7gWCcgp@tvIUab(J_AFPtG0hsHjF|i#G=Kj5dbsZ(gPzvGl3FHU+ zPflBzI0MYpU7W4$Eq_tlE?Nw!n+;v^0_2WR(_kGDDihCwlqHIWK3|2(l`x+gzO)n` zn$EI(^U%Zlh4a~tKURnYZa5Qs&{RH;ish;o|6z2_E1^1HnLu2_0hOlxKDG$m`d(bG zxZ`|o-WO71I)rga>q!OMfUe@tLQ30u2N*QZnnz9oqJ`0mXg^Wop66H~T@dV&3HEdm zIDzpTYWea+Ze3joH)TGC8v-{v3X?w{K=k-`=ZZ4UpjshL(Lm}_sz03j*_ivY$?`|1 z?su!@SD!90rt7D#5{IA%^mF@3FZ8&Aw$Sm?ZTPb9^{Dl-15y(myepDoM_hZ)ZY&CI zNKdE_u;;uAa}(hZocr9TQ3*#2du)(FSk2F#AIkJ3;HP;t6-9HI3`q;kcT%bo+v2Mu z34WN3;?G%OoN@U@pjl1?@ zlP5u5gi@n>Sb`wbDJNfS=pq;rNp6{nn$?UpcJLss{oVPu+`F&-ublr?eE)yue09>5 zj*9Dt_MtDV|LA-;+PkGG$f^fJ8nb`W+Qq~avQ@MGy8YQ&TeeSU!~b~rd>_(p5X4XK zP?eFKB-fnxs&YeH0m+t`5X}`Ra_ui#zD&v=`ZO=dcXa17dS&d?kWAvS(T4;^!XIUW z)&J6rVWZcXJ%%$xUKw2+?5cYVP1+dbK3rjh8{OtYd}$3pZUmNR!~28X^*IDTztFyy zEsZiCT~2##?A@bd=AI!q2M7C&I#z*>4`Z|Xo-EXSb7nkGcQ}jxm{MT%nKUkJb(V?`n;z$qlV*UF~UeX>qwp-1}FZwE{31#aeKiVa}wVW$( zlJyNQ2aY?7(@9w?%+L+}zyx>_zNbU0VHjLvWBDwCPDlHuaf>}K+dgn#(9Of=IpGqn zBgyC?BCL|&^c>mszIEa9tsA>O%j}8^jX*zYqwiUO5@@6Lq3+;mF;mf5j{cJ{k(EmW zg=-qx7aL60azB{lR5K<~B%Nx-2$sqw7?^nxX%p9kgtks&(c4A7%bX70Ix9;kFtk!~ zU1$Q>!{duymb#|oU1r5j?v|#Mm4}&AKh>t)AN-LZ=yEGocip^^cbUm+8hfv?Yg-mr zWgpIq>owJ0MP$viNH76>0A6ZFC43&&`G`j~-0_brKt}Nvsf4hA_n%qtN7pHS*d7wM zB`@Hv(O@sQWL2w3D~XGg;9#vszN}Y{Xn^kiFHYimrNaIFo~lRD_Q~T+^n&m-mL5( zpA1{%&6T*z`wC;lVF5z~9>D{ejg#ze&+$CV#DA_83t8+YznQS%<- z^AmxM!c|?ZcoD+=`RWSiX`J+FoA#ONiI~m-YFb=HN(m%3Zmx#U=cJ{ouQP!DD){NE z$R&RJ$LLCOU(=TiqXD=g5lYbl&pP|RM!5(x61cWXSGb_K7pU|Ok zkgEUZ6>eNDAQgwKFaQ$VFdzdMa|bgOX9q_htC@o{;AdI}IbHCdxeG)bk@2eXJ#5%P z%Wx0kgWl=&jKIKpn0GJu(KG1UgVxKDd8}pL8$7gGw|t4dS}{A|zj7X>xw@v?T_K9Z ziDFQ}{747&k&s10GiPRzRzJ5>po9-h1P8a%`QFvjJAqU2mD$Q17hxopF^sZW5}QFz z05HwiZte+nU7R}|>j4o}@rks$iQ?RIFjS#aYcD48#+2$;81WYv6NrOzfm22Wq)-_q zMPeXl+~=wwzqV(QwSI>9iK*&HGuF%BvE4^E?XaI;OoR{4*1w)&3C0)p11&cwvxpW~h-m2S46%WEd5jx5pc(q#A z%1lb?PNY%blosTKTX4*x6VF94x0dI3yXH&gOgx|-=!dCI-9$BKBP9egL~QgX@&}~1 z_xJF6Z#SeL1z{c&WN$(LEAvbo9sff-2;=_zWWH2zIrL$3)1NN_ueEq^v%D%WpM%4sgvYwGy zs&}fWo}XCdsyeeU6fF3^QHoX)&~{!b$>c`0<9nrDAg1!;b&h@4sr&+AvPx<6G|4L@ zdhv^HTfMNo3G!3Eplt=`J}k4d`YM7QT1uKb^hvB~s6e@D6XItZUj-``-P;IJ^fQQV z-eLp>0ejo>o*p^Wafb<0lHyA(VOY+bFmL%_eJ79N1D=`?p&Tz;^5#bMWxq-(SsoQx zHTW7rB!1NV{Umq(3$55-e<}+*>J&38Klht~0-k%Qjnb{Q@s5a_!e0QtdlipO9~XhYO=U>_FnwGSK1O(4 zY5hi^A$|M~{;}?Q4EVTu`3*=(_CEmru4NtrKCTFU1Co*d0{mNj@EGB7Hu)QY1~U19 ztn*(H{&9`}JzM#04+RBJ4F&bL+~x5D{CmjxtGPezU(EjuKq~TZ5GDOQNrncs2N9zL I{m Date: Sun, 28 Apr 2024 12:53:38 -0400 Subject: [PATCH 2/8] Added whats new enhancement --- doc/source/whatsnew/v3.0.0.rst | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/doc/source/whatsnew/v3.0.0.rst b/doc/source/whatsnew/v3.0.0.rst index aa378faac2a00..152e018223c93 100644 --- a/doc/source/whatsnew/v3.0.0.rst +++ b/doc/source/whatsnew/v3.0.0.rst @@ -11,9 +11,26 @@ including other versions of pandas. .. --------------------------------------------------------------------------- .. _whatsnew_300.enhancements: + + Enhancements ~~~~~~~~~~~~ +.. _whatsnew_300.enhancements.read_excel_table_parameter: + +``Addition of table name parameter in pandas read_excel`` +^^^^^^^^^^^^^^^^ +Previously, when using pandas :func:`read_excel`` method the data read from Excel could not be selected in more detail than on the sheet level. +To distinguish data that is part of a particular table in the Excel file could be tedious without the use of third party API's. +This enhancement is a solution to `Issue #38937 `__ introduces the ability to specify a table_name parameter that corresponds to the name of a table in the specified Excel file. +The table_name argument accepts the names as a string, a list of strings, or the value of None which corresponds to reading in every table in that file. + +The return format has some changes, but only if a table_name parameter is specified. If it is not specified, the return remains a DataFrame or dictionary of DataFrames +holding the data from each sheet. + - If a table_name is specified and a sheet_name is not, the return will be a DataFrame or dictionary of DataFrames holding the table data desired + - If a table_name is specified and a sheet_name is also specified, the return with be a nested dictionary containing 2 dictionaries: + - The first dictionary is a dictionary of DataFrames corresponding to the data on each sheet + - The second dictionary is a dictionary of DataFrames corresponding to the data in each table .. _whatsnew_300.enhancements.enhancement1: enhancement1 From ff5b7226995f08d32aa3a482a3efd793b2c131bb Mon Sep 17 00:00:00 2001 From: ian gainey Date: Sun, 28 Apr 2024 13:47:06 -0400 Subject: [PATCH 3/8] Resolving commit checks --- pandas/io/excel/_base.py | 1 + 1 file changed, 1 insertion(+) diff --git a/pandas/io/excel/_base.py b/pandas/io/excel/_base.py index fb7aafb63d26b..0ceaaaaa0fcd0 100644 --- a/pandas/io/excel/_base.py +++ b/pandas/io/excel/_base.py @@ -798,6 +798,7 @@ def parse( ret_dict = True elif table_name is None: tables = self.table_names + print(self.table_names) ret_dict = True else: tables = [table_name] From 182627ea63e79d4b78bcf2436a0ac08531e05dc6 Mon Sep 17 00:00:00 2001 From: ian gainey Date: Sun, 28 Apr 2024 13:56:45 -0400 Subject: [PATCH 4/8] Removing local testing comments --- pandas/tests/io/excel/test_readers.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/pandas/tests/io/excel/test_readers.py b/pandas/tests/io/excel/test_readers.py index 9d8817558a1f7..d696aefb94f46 100644 --- a/pandas/tests/io/excel/test_readers.py +++ b/pandas/tests/io/excel/test_readers.py @@ -6,11 +6,12 @@ ) from functools import partial from io import BytesIO +import os from pathlib import Path +import platform import re from urllib.error import URLError import uuid - from zipfile import BadZipFile import numpy as np @@ -987,7 +988,6 @@ def test_corrupt_bytes_raises(self, engine): with pytest.raises(error, match=msg): pd.read_excel(BytesIO(bad_stream)) - """ @pytest.mark.network @pytest.mark.single_cpu def test_read_from_http_url(self, httpserver, read_ext): @@ -1060,7 +1060,6 @@ def test_close_from_py_localpath(self, read_ext): del x # should not throw an exception because the passed file was closed f.read() - """ def test_reader_seconds(self, request, engine, read_ext): xfail_datetimes_with_pyxlsb(engine, request) From 6c171961426e903935239e66b41743ae23e5bf82 Mon Sep 17 00:00:00 2001 From: ian gainey Date: Sun, 28 Apr 2024 14:04:03 -0400 Subject: [PATCH 5/8] Attempting to resolve ruff-format failure that is not occuring on my local pre-commit --- pandas/io/excel/_base.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/pandas/io/excel/_base.py b/pandas/io/excel/_base.py index 3da6fe82779df..a9c906a4f953f 100644 --- a/pandas/io/excel/_base.py +++ b/pandas/io/excel/_base.py @@ -431,9 +431,7 @@ def read_excel( skipfooter: int = ..., storage_options: StorageOptions = ..., dtype_backend: DtypeBackend | lib.NoDefault = ..., -) -> DataFrame | list[DataFrame]: - ... - +) -> DataFrame | list[DataFrame]: ... @overload From d036bb501968e2959846c7056b3cc89391548688 Mon Sep 17 00:00:00 2001 From: ian gainey Date: Sun, 28 Apr 2024 14:47:47 -0400 Subject: [PATCH 6/8] Resolving typing and docstring manual pre-commit errors --- pandas/io/excel/_base.py | 10 +++++----- pandas/io/excel/_openpyxl.py | 2 +- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/pandas/io/excel/_base.py b/pandas/io/excel/_base.py index a9c906a4f953f..2731cae2c5e55 100644 --- a/pandas/io/excel/_base.py +++ b/pandas/io/excel/_base.py @@ -127,7 +127,7 @@ as a dict of `DataFrame` * ``None``: All worksheets. -table_name: str, list of str, or None, default 0 +table_name : str, list of str, or None, default 0 Strings are used for table_names that correspond to Excel Table names. Lists of strings are used to request multiple tables. Specify ``None`` to get all tables. @@ -431,7 +431,7 @@ def read_excel( skipfooter: int = ..., storage_options: StorageOptions = ..., dtype_backend: DtypeBackend | lib.NoDefault = ..., -) -> DataFrame | list[DataFrame]: ... +) -> DataFrame | list[DataFrame] | dict[str, DataFrame]: ... @overload @@ -472,7 +472,7 @@ def read_excel( skipfooter: int = ..., storage_options: StorageOptions = ..., dtype_backend: DtypeBackend | lib.NoDefault = ..., -) -> dict[IntStrT, DataFrame]: ... +) -> DataFrame | dict[IntStrT, DataFrame] | dict[str, DataFrame]: ... @doc(storage_options=_shared_docs["storage_options"]) @@ -482,7 +482,7 @@ def read_excel( sheet_name: str | int | list[IntStrT] | None = 0, *, # If sheet name and table name are specified -> Nested Dictionary of DataFrames - table_name: str | int | list[str] | None = 0, + table_name: str | list[str] | None = 0, header: int | Sequence[int] | None = 0, names: SequenceNotStr[Hashable] | range | None = None, index_col: int | str | Sequence[int] | None = None, @@ -513,7 +513,7 @@ def read_excel( storage_options: StorageOptions | None = None, dtype_backend: DtypeBackend | lib.NoDefault = lib.no_default, engine_kwargs: dict | None = None, -) -> DataFrame | dict[IntStrT, DataFrame]: +) -> DataFrame | dict[IntStrT, DataFrame] | dict[str, DataFrame]: check_dtype_backend(dtype_backend) should_close = False if engine_kwargs is None: diff --git a/pandas/io/excel/_openpyxl.py b/pandas/io/excel/_openpyxl.py index c6924538edc22..1f7985eeeeed5 100644 --- a/pandas/io/excel/_openpyxl.py +++ b/pandas/io/excel/_openpyxl.py @@ -606,7 +606,7 @@ def _convert_cell(self, cell) -> Scalar: return cell.value def get_sheet_data( - self, sheet: Workbook.worksheets, file_rows_needed: int | None = None + self, sheet, file_rows_needed: int | None = None ) -> list[list[Scalar]]: if self.book.read_only: sheet.reset_dimensions() From ea15154defe0a6f864acde7cde39fefd530873b9 Mon Sep 17 00:00:00 2001 From: ian gainey Date: Sun, 28 Apr 2024 15:05:53 -0400 Subject: [PATCH 7/8] Removing type hints considering typing checks doesn't like it --- pandas/io/excel/_openpyxl.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/pandas/io/excel/_openpyxl.py b/pandas/io/excel/_openpyxl.py index 1f7985eeeeed5..1954ad5c5f8d2 100644 --- a/pandas/io/excel/_openpyxl.py +++ b/pandas/io/excel/_openpyxl.py @@ -621,7 +621,7 @@ def table_names(self) -> list[str]: tables.append(table.name) return tables - def get_sheets_required(self, tables) -> list[Workbook.worksheets]: + def get_sheets_required(self, tables): sheets_reqd = [] for sheet in self.book.worksheets: for table in sheet.tables.values(): @@ -630,14 +630,14 @@ def get_sheets_required(self, tables) -> list[Workbook.worksheets]: continue return sheets_reqd - def get_sheet_tables(self, sheet: Workbook.worksheets): + def get_sheet_tables(self, sheet): tables = {} for table in sheet.tables.values(): tables[table.name] = table return tables def get_table_data( - self, sheet: Workbook.worksheets, tablename, file_rows_needed: int | None = None + self, sheet, tablename, file_rows_needed: int | None = None ) -> list[list[Scalar]]: table = sheet[tablename.ref] return self.get_data(table, file_rows_needed) From 748d90fc3c141f2e8d2d10671a07608fd988ef6a Mon Sep 17 00:00:00 2001 From: ian gainey Date: Sun, 28 Apr 2024 15:23:46 -0400 Subject: [PATCH 8/8] These errors make no sense and contradict what I am seeing --- pandas/io/excel/_base.py | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/pandas/io/excel/_base.py b/pandas/io/excel/_base.py index 2731cae2c5e55..bd6223c0e082e 100644 --- a/pandas/io/excel/_base.py +++ b/pandas/io/excel/_base.py @@ -482,7 +482,7 @@ def read_excel( sheet_name: str | int | list[IntStrT] | None = 0, *, # If sheet name and table name are specified -> Nested Dictionary of DataFrames - table_name: str | list[str] | None = 0, + table_name: str | int | list[str] | None = 0, header: int | Sequence[int] | None = 0, names: SequenceNotStr[Hashable] | range | None = None, index_col: int | str | Sequence[int] | None = None, @@ -513,7 +513,7 @@ def read_excel( storage_options: StorageOptions | None = None, dtype_backend: DtypeBackend | lib.NoDefault = lib.no_default, engine_kwargs: dict | None = None, -) -> DataFrame | dict[IntStrT, DataFrame] | dict[str, DataFrame]: +) -> DataFrame | list[DataFrame] | dict[str, DataFrame]: check_dtype_backend(dtype_backend) should_close = False if engine_kwargs is None: @@ -1998,6 +1998,10 @@ def sheet_names(self): """ return self._reader.sheet_names + @property + def table_names(self): + return self._reader.table_names + def close(self) -> None: """close io if necessary""" self._reader.close()