1
1
from __future__ import annotations
2
2
3
- from io import (
4
- BufferedReader ,
5
- BytesIO ,
3
+ from datetime import (
4
+ date ,
5
+ datetime ,
6
+ time ,
6
7
)
7
- from pathlib import PurePath
8
8
from tempfile import NamedTemporaryFile
9
+ from typing import Union
9
10
10
11
from pandas ._typing import (
12
+ FilePath ,
13
+ ReadBuffer ,
11
14
Scalar ,
12
15
StorageOptions ,
13
16
)
14
17
from pandas .compat ._optional import import_optional_dependency
15
18
19
+ import pandas as pd
20
+
21
+ from pandas .io .common import stringify_path
16
22
from pandas .io .excel ._base import (
17
23
BaseExcelReader ,
18
24
inspect_excel_format ,
19
25
)
20
26
27
+ ValueT = Union [int , float , str , bool , time , date , datetime ]
28
+
21
29
22
30
class __calamine__ :
23
31
pass
@@ -28,7 +36,9 @@ class CalamineExcelReader(BaseExcelReader):
28
36
_sheet_names : list [str ] | None = None
29
37
30
38
def __init__ (
31
- self , filepath_or_buffer , storage_options : StorageOptions = None
39
+ self ,
40
+ filepath_or_buffer : FilePath | ReadBuffer [bytes ],
41
+ storage_options : StorageOptions = None ,
32
42
) -> None :
33
43
import_optional_dependency ("python_calamine" )
34
44
super ().__init__ (filepath_or_buffer , storage_options = storage_options )
@@ -37,20 +47,15 @@ def __init__(
37
47
def _workbook_class (self ) -> type [__calamine__ ]:
38
48
return __calamine__
39
49
40
- def load_workbook (
41
- self , filepath_or_buffer : str | PurePath | BufferedReader | BytesIO
42
- ) -> str :
43
- if isinstance (filepath_or_buffer , BufferedReader ):
44
- filepath_or_buffer = filepath_or_buffer .name
45
-
46
- elif isinstance (filepath_or_buffer , BytesIO ):
50
+ def load_workbook (self , filepath_or_buffer ) -> str :
51
+ if hasattr (filepath_or_buffer , "read" ) and hasattr (filepath_or_buffer , "seek" ):
47
52
ext = inspect_excel_format (filepath_or_buffer )
48
53
with NamedTemporaryFile (suffix = f".{ ext } " , delete = False ) as tmp_file :
49
- tmp_file .write (filepath_or_buffer .getvalue ())
54
+ filepath_or_buffer .seek (0 )
55
+ tmp_file .write (filepath_or_buffer .read ())
50
56
filepath_or_buffer = tmp_file .name
51
-
52
- elif isinstance (filepath_or_buffer , PurePath ):
53
- filepath_or_buffer = filepath_or_buffer .as_posix ()
57
+ else :
58
+ filepath_or_buffer = stringify_path (filepath_or_buffer )
54
59
55
60
assert isinstance (filepath_or_buffer , str )
56
61
@@ -75,7 +80,31 @@ def get_sheet_by_index(self, index: int) -> int:
75
80
self .raise_if_bad_sheet_by_index (index )
76
81
return index
77
82
78
- def get_sheet_data (self , sheet : int , convert_float : bool ) -> list [list [Scalar ]]:
83
+ def get_sheet_data (
84
+ self , sheet : int , file_rows_needed : int | None = None
85
+ ) -> list [list [Scalar ]]:
86
+ def _convert_cell (value : ValueT ) -> Scalar :
87
+ if isinstance (value , float ):
88
+ val = int (value )
89
+ if val == value :
90
+ return val
91
+ else :
92
+ return value
93
+ elif isinstance (value , date ):
94
+ return pd .Timestamp (value )
95
+ elif isinstance (value , time ):
96
+ return value .isoformat ()
97
+
98
+ return value
99
+
79
100
from python_calamine import get_sheet_data
80
101
81
- return get_sheet_data (self .book , sheet )
102
+ rows = get_sheet_data (self .book , sheet )
103
+ data : list [list [Scalar ]] = []
104
+
105
+ for row in rows :
106
+ data .append ([_convert_cell (cell ) for cell in row ])
107
+ if file_rows_needed is not None and len (data ) >= file_rows_needed :
108
+ break
109
+
110
+ return data
0 commit comments