@@ -1070,47 +1070,41 @@ def test_encoding(self):
10701070 result = store .select ('df' , Term ('columns=A' , encoding = 'ascii' ))
10711071 tm .assert_frame_equal (result , expected )
10721072
1073- def test_latin_encoding (self ):
1074-
1075- values = [[b'E\xc9 , 17' , b'' , b'a' , b'b' , b'c' ],
1076- [b'E\xc9 , 17' , b'a' , b'b' , b'c' ],
1077- [b'EE, 17' , b'' , b'a' , b'b' , b'c' ],
1078- [b'E\xc9 , 17' , b'\xf8 \xfc ' , b'a' , b'b' , b'c' ],
1079- [b'' , b'a' , b'b' , b'c' ],
1080- [b'\xf8 \xfc ' , b'a' , b'b' , b'c' ],
1081- [b'A\xf8 \xfc ' , b'' , b'a' , b'b' , b'c' ],
1082- [np .nan , b'' , b'b' , b'c' ],
1083- [b'A\xf8 \xfc ' , np .nan , b'' , b'b' , b'c' ]]
1084-
1085- def _try_decode (x , encoding = 'latin-1' ):
1086- try :
1087- return x .decode (encoding )
1088- except AttributeError :
1089- return x
1090- # not sure how to remove latin-1 from code in python 2 and 3
1091- values = [[_try_decode (x ) for x in y ] for y in values ]
1092-
1093- examples = []
1094- for dtype in ['category' , object ]:
1095- for val in values :
1096- examples .append (pd .Series (val , dtype = dtype ))
1097-
1098- def roundtrip (s , key = 'data' , encoding = 'latin-1' , nan_rep = '' ):
1099- with ensure_clean_path (self .path ) as store :
1100- s .to_hdf (store , key , format = 'table' , encoding = encoding ,
1101- nan_rep = nan_rep )
1102- retr = read_hdf (store , key )
1103- s_nan = s .replace (nan_rep , np .nan )
1104- if is_categorical_dtype (s_nan ):
1105- assert is_categorical_dtype (retr )
1106- assert_series_equal (s_nan , retr , check_dtype = False ,
1107- check_categorical = False )
1108- else :
1109- assert_series_equal (s_nan , retr )
1110-
1111- for s in examples :
1112- roundtrip (s )
1073+ @pytest .mark .parametrize ('val' , [
1074+ [b'E\xc9 , 17' , b'' , b'a' , b'b' , b'c' ],
1075+ [b'E\xc9 , 17' , b'a' , b'b' , b'c' ],
1076+ [b'EE, 17' , b'' , b'a' , b'b' , b'c' ],
1077+ [b'E\xc9 , 17' , b'\xf8 \xfc ' , b'a' , b'b' , b'c' ],
1078+ [b'' , b'a' , b'b' , b'c' ],
1079+ [b'\xf8 \xfc ' , b'a' , b'b' , b'c' ],
1080+ [b'A\xf8 \xfc ' , b'' , b'a' , b'b' , b'c' ],
1081+ [np .nan , b'' , b'b' , b'c' ],
1082+ [b'A\xf8 \xfc ' , np .nan , b'' , b'b' , b'c' ]
1083+ ])
1084+ @pytest .mark .parametrize ('dtype' , ['category' , object ])
1085+ def test_latin_encoding (self , dtype , val ):
1086+ enc = 'latin-1'
1087+ nan_rep = ''
1088+ key = 'data'
1089+
1090+ val = [x .decode (enc ) if isinstance (x , bytes ) else x for x in val ]
1091+ ser = pd .Series (val , dtype = dtype )
1092+
1093+ with ensure_clean_path (self .path ) as store :
1094+ ser .to_hdf (store , key , format = 'table' , encoding = enc ,
1095+ nan_rep = nan_rep )
1096+ retr = read_hdf (store , key )
1097+
1098+ s_nan = ser .replace (nan_rep , np .nan )
1099+
1100+ if is_categorical_dtype (s_nan ):
1101+ assert is_categorical_dtype (retr )
1102+ assert_series_equal (s_nan , retr , check_dtype = False ,
1103+ check_categorical = False )
1104+ else :
1105+ assert_series_equal (s_nan , retr )
11131106
1107+ # FIXME: don't leave commented-out
11141108 # fails:
11151109 # for x in examples:
11161110 # roundtrip(s, nan_rep=b'\xf8\xfc')
0 commit comments