Skip to content

Commit e2cfe8e

Browse files
committed
REF: Restore _file in init
Restore file in init Use method to write data
1 parent 2baef30 commit e2cfe8e

File tree

1 file changed

+38
-29
lines changed

1 file changed

+38
-29
lines changed

pandas/io/stata.py

Lines changed: 38 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -223,9 +223,6 @@ def _stata_elapsed_date_to_datetime_vec(dates: Series, fmt: str) -> Series:
223223
half-years since 1960h1 yearly
224224
date - ty
225225
years since 0000
226-
227-
If you don't have pandas with datetime support, then you can't do
228-
milliseconds accurately.
229226
"""
230227
MIN_YEAR, MAX_YEAR = Timestamp.min.year, Timestamp.max.year
231228
MAX_DAY_DELTA = (Timestamp.max - datetime.datetime(1960, 1, 1)).days
@@ -2123,13 +2120,22 @@ def __init__(
21232120
self._fname = stringify_path(fname)
21242121
self.type_converters = {253: np.int32, 252: np.int16, 251: np.int8}
21252122
self._converted_names: Dict[Label, str] = {}
2123+
self._file: Optional[BinaryIO] = None
21262124

21272125
def _write(self, to_write: str) -> None:
21282126
"""
21292127
Helper to call encode before writing to file for Python 3 compat.
21302128
"""
2129+
assert self._file is not None
21312130
self._file.write(to_write.encode(self._encoding))
21322131

2132+
def _write_bytes(self, value: bytes) -> None:
2133+
"""
2134+
Helper to assert file is open before writing.
2135+
"""
2136+
assert self._file is not None
2137+
self._file.write(value)
2138+
21332139
def _prepare_categoricals(self, data: DataFrame) -> DataFrame:
21342140
"""Check for categorical columns, retain categorical information for
21352141
Stata file and convert categorical data to int"""
@@ -2438,6 +2444,7 @@ def _close(self) -> None:
24382444
(if supported)
24392445
"""
24402446
# Some file-like objects might not support flush
2447+
assert self._file is not None
24412448
try:
24422449
self._file.flush()
24432450
except AttributeError:
@@ -2467,7 +2474,7 @@ def _write_expansion_fields(self) -> None:
24672474

24682475
def _write_value_labels(self) -> None:
24692476
for vl in self._value_labels:
2470-
self._file.write(vl.generate_value_label(self._byteorder))
2477+
self._write_bytes(vl.generate_value_label(self._byteorder))
24712478

24722479
def _write_header(
24732480
self,
@@ -2476,22 +2483,22 @@ def _write_header(
24762483
) -> None:
24772484
byteorder = self._byteorder
24782485
# ds_format - just use 114
2479-
self._file.write(struct.pack("b", 114))
2486+
self._write_bytes(struct.pack("b", 114))
24802487
# byteorder
24812488
self._write(byteorder == ">" and "\x01" or "\x02")
24822489
# filetype
24832490
self._write("\x01")
24842491
# unused
24852492
self._write("\x00")
24862493
# number of vars, 2 bytes
2487-
self._file.write(struct.pack(byteorder + "h", self.nvar)[:2])
2494+
self._write_bytes(struct.pack(byteorder + "h", self.nvar)[:2])
24882495
# number of obs, 4 bytes
2489-
self._file.write(struct.pack(byteorder + "i", self.nobs)[:4])
2496+
self._write_bytes(struct.pack(byteorder + "i", self.nobs)[:4])
24902497
# data label 81 bytes, char, null terminated
24912498
if data_label is None:
2492-
self._file.write(self._null_terminate_bytes(_pad_bytes("", 80)))
2499+
self._write_bytes(self._null_terminate_bytes(_pad_bytes("", 80)))
24932500
else:
2494-
self._file.write(
2501+
self._write_bytes(
24952502
self._null_terminate_bytes(_pad_bytes(data_label[:80], 80))
24962503
)
24972504
# time stamp, 18 bytes, char, null terminated
@@ -2522,11 +2529,11 @@ def _write_header(
25222529
+ month_lookup[time_stamp.month]
25232530
+ time_stamp.strftime(" %Y %H:%M")
25242531
)
2525-
self._file.write(self._null_terminate_bytes(ts))
2532+
self._write_bytes(self._null_terminate_bytes(ts))
25262533

25272534
def _write_variable_types(self) -> None:
25282535
for typ in self.typlist:
2529-
self._file.write(struct.pack("B", typ))
2536+
self._write_bytes(struct.pack("B", typ))
25302537

25312538
def _write_varnames(self) -> None:
25322539
# varlist names are checked by _check_column_names
@@ -2619,7 +2626,7 @@ def _prepare_data(self) -> np.recarray:
26192626
return data.to_records(index=False, column_dtypes=dtypes)
26202627

26212628
def _write_data(self, records: np.recarray) -> None:
2622-
self._file.write(records.tobytes())
2629+
self._write_bytes(records.tobytes())
26232630

26242631
@staticmethod
26252632
def _null_terminate_str(s: str) -> str:
@@ -2979,6 +2986,7 @@ def _tag(val: Union[str, bytes], tag: str) -> bytes:
29792986

29802987
def _update_map(self, tag: str) -> None:
29812988
"""Update map location for tag with file position"""
2989+
assert self._file is not None
29822990
self._map[tag] = self._file.tell()
29832991

29842992
def _write_header(
@@ -2988,7 +2996,7 @@ def _write_header(
29882996
) -> None:
29892997
"""Write the file header"""
29902998
byteorder = self._byteorder
2991-
self._file.write(bytes("<stata_dta>", "utf-8"))
2999+
self._write_bytes(bytes("<stata_dta>", "utf-8"))
29923000
bio = BytesIO()
29933001
# ds_format - 117
29943002
bio.write(self._tag(bytes(str(self._dta_version), "utf-8"), "release"))
@@ -3038,12 +3046,13 @@ def _write_header(
30383046
stata_ts = b"\x11" + bytes(ts, "utf-8")
30393047
bio.write(self._tag(stata_ts, "timestamp"))
30403048
bio.seek(0)
3041-
self._file.write(self._tag(bio.read(), "header"))
3049+
self._write_bytes(self._tag(bio.read(), "header"))
30423050

30433051
def _write_map(self) -> None:
30443052
"""Called twice during file write. The first populates the values in
30453053
the map with 0s. The second call writes the final map locations when
30463054
all blocks have been written."""
3055+
assert self._file is not None
30473056
if not self._map:
30483057
self._map = dict(
30493058
(
@@ -3069,15 +3078,15 @@ def _write_map(self) -> None:
30693078
for val in self._map.values():
30703079
bio.write(struct.pack(self._byteorder + "Q", val))
30713080
bio.seek(0)
3072-
self._file.write(self._tag(bio.read(), "map"))
3081+
self._write_bytes(self._tag(bio.read(), "map"))
30733082

30743083
def _write_variable_types(self) -> None:
30753084
self._update_map("variable_types")
30763085
bio = BytesIO()
30773086
for typ in self.typlist:
30783087
bio.write(struct.pack(self._byteorder + "H", typ))
30793088
bio.seek(0)
3080-
self._file.write(self._tag(bio.read(), "variable_types"))
3089+
self._write_bytes(self._tag(bio.read(), "variable_types"))
30813090

30823091
def _write_varnames(self) -> None:
30833092
self._update_map("varnames")
@@ -3089,12 +3098,12 @@ def _write_varnames(self) -> None:
30893098
name = _pad_bytes_new(name[:32].encode(self._encoding), vn_len + 1)
30903099
bio.write(name)
30913100
bio.seek(0)
3092-
self._file.write(self._tag(bio.read(), "varnames"))
3101+
self._write_bytes(self._tag(bio.read(), "varnames"))
30933102

30943103
def _write_sortlist(self) -> None:
30953104
self._update_map("sortlist")
30963105
sort_size = 2 if self._dta_version < 119 else 4
3097-
self._file.write(self._tag(b"\x00" * sort_size * (self.nvar + 1), "sortlist"))
3106+
self._write_bytes(self._tag(b"\x00" * sort_size * (self.nvar + 1), "sortlist"))
30983107

30993108
def _write_formats(self) -> None:
31003109
self._update_map("formats")
@@ -3103,7 +3112,7 @@ def _write_formats(self) -> None:
31033112
for fmt in self.fmtlist:
31043113
bio.write(_pad_bytes_new(fmt.encode(self._encoding), fmt_len))
31053114
bio.seek(0)
3106-
self._file.write(self._tag(bio.read(), "formats"))
3115+
self._write_bytes(self._tag(bio.read(), "formats"))
31073116

31083117
def _write_value_label_names(self) -> None:
31093118
self._update_map("value_label_names")
@@ -3119,7 +3128,7 @@ def _write_value_label_names(self) -> None:
31193128
encoded_name = _pad_bytes_new(name[:32].encode(self._encoding), vl_len + 1)
31203129
bio.write(encoded_name)
31213130
bio.seek(0)
3122-
self._file.write(self._tag(bio.read(), "value_label_names"))
3131+
self._write_bytes(self._tag(bio.read(), "value_label_names"))
31233132

31243133
def _write_variable_labels(self) -> None:
31253134
# Missing labels are 80 blank characters plus null termination
@@ -3133,7 +3142,7 @@ def _write_variable_labels(self) -> None:
31333142
for _ in range(self.nvar):
31343143
bio.write(blank)
31353144
bio.seek(0)
3136-
self._file.write(self._tag(bio.read(), "variable_labels"))
3145+
self._write_bytes(self._tag(bio.read(), "variable_labels"))
31373146
return
31383147

31393148
for col in self.data:
@@ -3153,21 +3162,21 @@ def _write_variable_labels(self) -> None:
31533162
else:
31543163
bio.write(blank)
31553164
bio.seek(0)
3156-
self._file.write(self._tag(bio.read(), "variable_labels"))
3165+
self._write_bytes(self._tag(bio.read(), "variable_labels"))
31573166

31583167
def _write_characteristics(self) -> None:
31593168
self._update_map("characteristics")
3160-
self._file.write(self._tag(b"", "characteristics"))
3169+
self._write_bytes(self._tag(b"", "characteristics"))
31613170

31623171
def _write_data(self, records) -> None:
31633172
self._update_map("data")
3164-
self._file.write(b"<data>")
3165-
self._file.write(records.tobytes())
3166-
self._file.write(b"</data>")
3173+
self._write_bytes(b"<data>")
3174+
self._write_bytes(records.tobytes())
3175+
self._write_bytes(b"</data>")
31673176

31683177
def _write_strls(self) -> None:
31693178
self._update_map("strls")
3170-
self._file.write(self._tag(self._strl_blob, "strls"))
3179+
self._write_bytes(self._tag(self._strl_blob, "strls"))
31713180

31723181
def _write_expansion_fields(self) -> None:
31733182
"""No-op in dta 117+"""
@@ -3181,11 +3190,11 @@ def _write_value_labels(self) -> None:
31813190
lab = self._tag(lab, "lbl")
31823191
bio.write(lab)
31833192
bio.seek(0)
3184-
self._file.write(self._tag(bio.read(), "value_labels"))
3193+
self._write_bytes(self._tag(bio.read(), "value_labels"))
31853194

31863195
def _write_file_close_tag(self) -> None:
31873196
self._update_map("stata_data_close")
3188-
self._file.write(bytes("</stata_dta>", "utf-8"))
3197+
self._write_bytes(bytes("</stata_dta>", "utf-8"))
31893198
self._update_map("end-of-file")
31903199

31913200
def _update_strl_names(self) -> None:

0 commit comments

Comments
 (0)