Skip to content

Commit 677a224

Browse files
authored
Merge pull request pandas-dev#3 from dimastbk/issue-50395
some improves
2 parents bebfec5 + 08a5616 commit 677a224

File tree

2 files changed

+191
-53
lines changed

2 files changed

+191
-53
lines changed

pandas/io/excel/_calaminereader.py

Lines changed: 47 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -1,23 +1,31 @@
11
from __future__ import annotations
22

3-
from io import (
4-
BufferedReader,
5-
BytesIO,
3+
from datetime import (
4+
date,
5+
datetime,
6+
time,
67
)
7-
from pathlib import PurePath
88
from tempfile import NamedTemporaryFile
9+
from typing import Union
910

1011
from pandas._typing import (
12+
FilePath,
13+
ReadBuffer,
1114
Scalar,
1215
StorageOptions,
1316
)
1417
from pandas.compat._optional import import_optional_dependency
1518

19+
import pandas as pd
20+
21+
from pandas.io.common import stringify_path
1622
from pandas.io.excel._base import (
1723
BaseExcelReader,
1824
inspect_excel_format,
1925
)
2026

27+
ValueT = Union[int, float, str, bool, time, date, datetime]
28+
2129

2230
class __calamine__:
2331
pass
@@ -28,7 +36,9 @@ class CalamineExcelReader(BaseExcelReader):
2836
_sheet_names: list[str] | None = None
2937

3038
def __init__(
31-
self, filepath_or_buffer, storage_options: StorageOptions = None
39+
self,
40+
filepath_or_buffer: FilePath | ReadBuffer[bytes],
41+
storage_options: StorageOptions = None,
3242
) -> None:
3343
import_optional_dependency("python_calamine")
3444
super().__init__(filepath_or_buffer, storage_options=storage_options)
@@ -37,20 +47,15 @@ def __init__(
3747
def _workbook_class(self) -> type[__calamine__]:
3848
return __calamine__
3949

40-
def load_workbook(
41-
self, filepath_or_buffer: str | PurePath | BufferedReader | BytesIO
42-
) -> str:
43-
if isinstance(filepath_or_buffer, BufferedReader):
44-
filepath_or_buffer = filepath_or_buffer.name
45-
46-
elif isinstance(filepath_or_buffer, BytesIO):
50+
def load_workbook(self, filepath_or_buffer) -> str:
51+
if hasattr(filepath_or_buffer, "read") and hasattr(filepath_or_buffer, "seek"):
4752
ext = inspect_excel_format(filepath_or_buffer)
4853
with NamedTemporaryFile(suffix=f".{ext}", delete=False) as tmp_file:
49-
tmp_file.write(filepath_or_buffer.getvalue())
54+
filepath_or_buffer.seek(0)
55+
tmp_file.write(filepath_or_buffer.read())
5056
filepath_or_buffer = tmp_file.name
51-
52-
elif isinstance(filepath_or_buffer, PurePath):
53-
filepath_or_buffer = filepath_or_buffer.as_posix()
57+
else:
58+
filepath_or_buffer = stringify_path(filepath_or_buffer)
5459

5560
assert isinstance(filepath_or_buffer, str)
5661

@@ -75,7 +80,31 @@ def get_sheet_by_index(self, index: int) -> int:
7580
self.raise_if_bad_sheet_by_index(index)
7681
return index
7782

78-
def get_sheet_data(self, sheet: int, convert_float: bool) -> list[list[Scalar]]:
83+
def get_sheet_data(
84+
self, sheet: int, file_rows_needed: int | None = None
85+
) -> list[list[Scalar]]:
86+
def _convert_cell(value: ValueT) -> Scalar:
87+
if isinstance(value, float):
88+
val = int(value)
89+
if val == value:
90+
return val
91+
else:
92+
return value
93+
elif isinstance(value, date):
94+
return pd.Timestamp(value)
95+
elif isinstance(value, time):
96+
return value.isoformat()
97+
98+
return value
99+
79100
from python_calamine import get_sheet_data
80101

81-
return get_sheet_data(self.book, sheet)
102+
rows = get_sheet_data(self.book, sheet)
103+
data: list[list[Scalar]] = []
104+
105+
for row in rows:
106+
data.append([_convert_cell(cell) for cell in row])
107+
if file_rows_needed is not None and len(data) >= file_rows_needed:
108+
break
109+
110+
return data

0 commit comments

Comments
 (0)