@@ -223,9 +223,6 @@ def _stata_elapsed_date_to_datetime_vec(dates: Series, fmt: str) -> Series:
223
223
half-years since 1960h1 yearly
224
224
date - ty
225
225
years since 0000
226
-
227
- If you don't have pandas with datetime support, then you can't do
228
- milliseconds accurately.
229
226
"""
230
227
MIN_YEAR , MAX_YEAR = Timestamp .min .year , Timestamp .max .year
231
228
MAX_DAY_DELTA = (Timestamp .max - datetime .datetime (1960 , 1 , 1 )).days
@@ -2123,13 +2120,22 @@ def __init__(
2123
2120
self ._fname = stringify_path (fname )
2124
2121
self .type_converters = {253 : np .int32 , 252 : np .int16 , 251 : np .int8 }
2125
2122
self ._converted_names : Dict [Label , str ] = {}
2123
+ self ._file : Optional [BinaryIO ] = None
2126
2124
2127
2125
def _write (self , to_write : str ) -> None :
2128
2126
"""
2129
2127
Helper to call encode before writing to file for Python 3 compat.
2130
2128
"""
2129
+ assert self ._file is not None
2131
2130
self ._file .write (to_write .encode (self ._encoding ))
2132
2131
2132
+ def _write_bytes (self , value : bytes ) -> None :
2133
+ """
2134
+ Helper to assert file is open before writing.
2135
+ """
2136
+ assert self ._file is not None
2137
+ self ._file .write (value )
2138
+
2133
2139
def _prepare_categoricals (self , data : DataFrame ) -> DataFrame :
2134
2140
"""Check for categorical columns, retain categorical information for
2135
2141
Stata file and convert categorical data to int"""
@@ -2438,6 +2444,7 @@ def _close(self) -> None:
2438
2444
(if supported)
2439
2445
"""
2440
2446
# Some file-like objects might not support flush
2447
+ assert self ._file is not None
2441
2448
try :
2442
2449
self ._file .flush ()
2443
2450
except AttributeError :
@@ -2467,7 +2474,7 @@ def _write_expansion_fields(self) -> None:
2467
2474
2468
2475
def _write_value_labels (self ) -> None :
2469
2476
for vl in self ._value_labels :
2470
- self ._file . write (vl .generate_value_label (self ._byteorder ))
2477
+ self ._write_bytes (vl .generate_value_label (self ._byteorder ))
2471
2478
2472
2479
def _write_header (
2473
2480
self ,
@@ -2476,22 +2483,22 @@ def _write_header(
2476
2483
) -> None :
2477
2484
byteorder = self ._byteorder
2478
2485
# ds_format - just use 114
2479
- self ._file . write (struct .pack ("b" , 114 ))
2486
+ self ._write_bytes (struct .pack ("b" , 114 ))
2480
2487
# byteorder
2481
2488
self ._write (byteorder == ">" and "\x01 " or "\x02 " )
2482
2489
# filetype
2483
2490
self ._write ("\x01 " )
2484
2491
# unused
2485
2492
self ._write ("\x00 " )
2486
2493
# number of vars, 2 bytes
2487
- self ._file . write (struct .pack (byteorder + "h" , self .nvar )[:2 ])
2494
+ self ._write_bytes (struct .pack (byteorder + "h" , self .nvar )[:2 ])
2488
2495
# number of obs, 4 bytes
2489
- self ._file . write (struct .pack (byteorder + "i" , self .nobs )[:4 ])
2496
+ self ._write_bytes (struct .pack (byteorder + "i" , self .nobs )[:4 ])
2490
2497
# data label 81 bytes, char, null terminated
2491
2498
if data_label is None :
2492
- self ._file . write (self ._null_terminate_bytes (_pad_bytes ("" , 80 )))
2499
+ self ._write_bytes (self ._null_terminate_bytes (_pad_bytes ("" , 80 )))
2493
2500
else :
2494
- self ._file . write (
2501
+ self ._write_bytes (
2495
2502
self ._null_terminate_bytes (_pad_bytes (data_label [:80 ], 80 ))
2496
2503
)
2497
2504
# time stamp, 18 bytes, char, null terminated
@@ -2522,11 +2529,11 @@ def _write_header(
2522
2529
+ month_lookup [time_stamp .month ]
2523
2530
+ time_stamp .strftime (" %Y %H:%M" )
2524
2531
)
2525
- self ._file . write (self ._null_terminate_bytes (ts ))
2532
+ self ._write_bytes (self ._null_terminate_bytes (ts ))
2526
2533
2527
2534
def _write_variable_types (self ) -> None :
2528
2535
for typ in self .typlist :
2529
- self ._file . write (struct .pack ("B" , typ ))
2536
+ self ._write_bytes (struct .pack ("B" , typ ))
2530
2537
2531
2538
def _write_varnames (self ) -> None :
2532
2539
# varlist names are checked by _check_column_names
@@ -2619,7 +2626,7 @@ def _prepare_data(self) -> np.recarray:
2619
2626
return data .to_records (index = False , column_dtypes = dtypes )
2620
2627
2621
2628
def _write_data (self , records : np .recarray ) -> None :
2622
- self ._file . write (records .tobytes ())
2629
+ self ._write_bytes (records .tobytes ())
2623
2630
2624
2631
@staticmethod
2625
2632
def _null_terminate_str (s : str ) -> str :
@@ -2979,6 +2986,7 @@ def _tag(val: Union[str, bytes], tag: str) -> bytes:
2979
2986
2980
2987
def _update_map (self , tag : str ) -> None :
2981
2988
"""Update map location for tag with file position"""
2989
+ assert self ._file is not None
2982
2990
self ._map [tag ] = self ._file .tell ()
2983
2991
2984
2992
def _write_header (
@@ -2988,7 +2996,7 @@ def _write_header(
2988
2996
) -> None :
2989
2997
"""Write the file header"""
2990
2998
byteorder = self ._byteorder
2991
- self ._file . write (bytes ("<stata_dta>" , "utf-8" ))
2999
+ self ._write_bytes (bytes ("<stata_dta>" , "utf-8" ))
2992
3000
bio = BytesIO ()
2993
3001
# ds_format - 117
2994
3002
bio .write (self ._tag (bytes (str (self ._dta_version ), "utf-8" ), "release" ))
@@ -3038,12 +3046,13 @@ def _write_header(
3038
3046
stata_ts = b"\x11 " + bytes (ts , "utf-8" )
3039
3047
bio .write (self ._tag (stata_ts , "timestamp" ))
3040
3048
bio .seek (0 )
3041
- self ._file . write (self ._tag (bio .read (), "header" ))
3049
+ self ._write_bytes (self ._tag (bio .read (), "header" ))
3042
3050
3043
3051
def _write_map (self ) -> None :
3044
3052
"""Called twice during file write. The first populates the values in
3045
3053
the map with 0s. The second call writes the final map locations when
3046
3054
all blocks have been written."""
3055
+ assert self ._file is not None
3047
3056
if not self ._map :
3048
3057
self ._map = dict (
3049
3058
(
@@ -3069,15 +3078,15 @@ def _write_map(self) -> None:
3069
3078
for val in self ._map .values ():
3070
3079
bio .write (struct .pack (self ._byteorder + "Q" , val ))
3071
3080
bio .seek (0 )
3072
- self ._file . write (self ._tag (bio .read (), "map" ))
3081
+ self ._write_bytes (self ._tag (bio .read (), "map" ))
3073
3082
3074
3083
def _write_variable_types (self ) -> None :
3075
3084
self ._update_map ("variable_types" )
3076
3085
bio = BytesIO ()
3077
3086
for typ in self .typlist :
3078
3087
bio .write (struct .pack (self ._byteorder + "H" , typ ))
3079
3088
bio .seek (0 )
3080
- self ._file . write (self ._tag (bio .read (), "variable_types" ))
3089
+ self ._write_bytes (self ._tag (bio .read (), "variable_types" ))
3081
3090
3082
3091
def _write_varnames (self ) -> None :
3083
3092
self ._update_map ("varnames" )
@@ -3089,12 +3098,12 @@ def _write_varnames(self) -> None:
3089
3098
name = _pad_bytes_new (name [:32 ].encode (self ._encoding ), vn_len + 1 )
3090
3099
bio .write (name )
3091
3100
bio .seek (0 )
3092
- self ._file . write (self ._tag (bio .read (), "varnames" ))
3101
+ self ._write_bytes (self ._tag (bio .read (), "varnames" ))
3093
3102
3094
3103
def _write_sortlist (self ) -> None :
3095
3104
self ._update_map ("sortlist" )
3096
3105
sort_size = 2 if self ._dta_version < 119 else 4
3097
- self ._file . write (self ._tag (b"\x00 " * sort_size * (self .nvar + 1 ), "sortlist" ))
3106
+ self ._write_bytes (self ._tag (b"\x00 " * sort_size * (self .nvar + 1 ), "sortlist" ))
3098
3107
3099
3108
def _write_formats (self ) -> None :
3100
3109
self ._update_map ("formats" )
@@ -3103,7 +3112,7 @@ def _write_formats(self) -> None:
3103
3112
for fmt in self .fmtlist :
3104
3113
bio .write (_pad_bytes_new (fmt .encode (self ._encoding ), fmt_len ))
3105
3114
bio .seek (0 )
3106
- self ._file . write (self ._tag (bio .read (), "formats" ))
3115
+ self ._write_bytes (self ._tag (bio .read (), "formats" ))
3107
3116
3108
3117
def _write_value_label_names (self ) -> None :
3109
3118
self ._update_map ("value_label_names" )
@@ -3119,7 +3128,7 @@ def _write_value_label_names(self) -> None:
3119
3128
encoded_name = _pad_bytes_new (name [:32 ].encode (self ._encoding ), vl_len + 1 )
3120
3129
bio .write (encoded_name )
3121
3130
bio .seek (0 )
3122
- self ._file . write (self ._tag (bio .read (), "value_label_names" ))
3131
+ self ._write_bytes (self ._tag (bio .read (), "value_label_names" ))
3123
3132
3124
3133
def _write_variable_labels (self ) -> None :
3125
3134
# Missing labels are 80 blank characters plus null termination
@@ -3133,7 +3142,7 @@ def _write_variable_labels(self) -> None:
3133
3142
for _ in range (self .nvar ):
3134
3143
bio .write (blank )
3135
3144
bio .seek (0 )
3136
- self ._file . write (self ._tag (bio .read (), "variable_labels" ))
3145
+ self ._write_bytes (self ._tag (bio .read (), "variable_labels" ))
3137
3146
return
3138
3147
3139
3148
for col in self .data :
@@ -3153,21 +3162,21 @@ def _write_variable_labels(self) -> None:
3153
3162
else :
3154
3163
bio .write (blank )
3155
3164
bio .seek (0 )
3156
- self ._file . write (self ._tag (bio .read (), "variable_labels" ))
3165
+ self ._write_bytes (self ._tag (bio .read (), "variable_labels" ))
3157
3166
3158
3167
def _write_characteristics (self ) -> None :
3159
3168
self ._update_map ("characteristics" )
3160
- self ._file . write (self ._tag (b"" , "characteristics" ))
3169
+ self ._write_bytes (self ._tag (b"" , "characteristics" ))
3161
3170
3162
3171
def _write_data (self , records ) -> None :
3163
3172
self ._update_map ("data" )
3164
- self ._file . write (b"<data>" )
3165
- self ._file . write (records .tobytes ())
3166
- self ._file . write (b"</data>" )
3173
+ self ._write_bytes (b"<data>" )
3174
+ self ._write_bytes (records .tobytes ())
3175
+ self ._write_bytes (b"</data>" )
3167
3176
3168
3177
def _write_strls (self ) -> None :
3169
3178
self ._update_map ("strls" )
3170
- self ._file . write (self ._tag (self ._strl_blob , "strls" ))
3179
+ self ._write_bytes (self ._tag (self ._strl_blob , "strls" ))
3171
3180
3172
3181
def _write_expansion_fields (self ) -> None :
3173
3182
"""No-op in dta 117+"""
@@ -3181,11 +3190,11 @@ def _write_value_labels(self) -> None:
3181
3190
lab = self ._tag (lab , "lbl" )
3182
3191
bio .write (lab )
3183
3192
bio .seek (0 )
3184
- self ._file . write (self ._tag (bio .read (), "value_labels" ))
3193
+ self ._write_bytes (self ._tag (bio .read (), "value_labels" ))
3185
3194
3186
3195
def _write_file_close_tag (self ) -> None :
3187
3196
self ._update_map ("stata_data_close" )
3188
- self ._file . write (bytes ("</stata_dta>" , "utf-8" ))
3197
+ self ._write_bytes (bytes ("</stata_dta>" , "utf-8" ))
3189
3198
self ._update_map ("end-of-file" )
3190
3199
3191
3200
def _update_strl_names (self ) -> None :
0 commit comments