@@ -1181,10 +1181,13 @@ def __init__(
11811181 comment : str | None ,
11821182 skiprows : set [int ] | None = None ,
11831183 infer_nrows : int = 100 ,
1184+ keep_whitespace : bool | None = True ,
1185+ whitespace_chars : str | None = " \t " ,
11841186 ) -> None :
11851187 self .f = f
11861188 self .buffer : Iterator | None = None
1187- self .delimiter = "\r \n " + delimiter if delimiter else "\n \r \t "
1189+ self .keep_whitespace = keep_whitespace
1190+ self .whitespace_chars = whitespace_chars
11881191 self .comment = comment
11891192 if colspecs == "infer" :
11901193 self .colspecs = self .detect_colspecs (
@@ -1211,6 +1214,36 @@ def __init__(
12111214 "2 element tuple or list of integers"
12121215 )
12131216
1217+ if not isinstance (self .keep_whitespace , bool ):
1218+ raise TypeError (
1219+ "keep_whitespace must be type bool (True or False), "
1220+ f"input was type { type (self .keep_whitespace ).__name__ } : "
1221+ f'"{ self .keep_whitespace } "'
1222+ )
1223+ if delimiter :
1224+ ## Delimiters in fixed-width files removed:
1225+ ## use colspecs, widths, or read_table()
1226+ import warnings
1227+
1228+ ## See link regarding fixing anti-patterns & unexpected default behaviour:
1229+ ## https://github.com/pandas-dev/pandas/pull/49832#discussion_r1030615937
1230+ ##
1231+ ## Deprecation warnings ignored by default, show them:
1232+ warnings .simplefilter ("always" )
1233+ warnings .formatwarning = (
1234+ lambda msg , cat , file , line , args1 : f"NOTICE:\n { msg } \n \n "
1235+ f'{ cat } \n File "{ file } ", line { line } '
1236+ "in FixedWidthReader.__init__\n "
1237+ )
1238+ warnings .warn (
1239+ (
1240+ "Delimiters are deprecated in fixed-width files "
1241+ + "- use colspecs or widths\n "
1242+ + "See keep_whitespace in read_fwf(), also see read_table()."
1243+ ),
1244+ DeprecationWarning ,
1245+ )
1246+
12141247 def get_rows (self , infer_nrows : int , skiprows : set [int ] | None = None ) -> list [str ]:
12151248 """
12161249 Read rows from self.f, skipping as specified.
@@ -1283,7 +1316,14 @@ def __next__(self) -> list[str]:
12831316 else :
12841317 line = next (self .f ) # type: ignore[arg-type]
12851318 # Note: 'colspecs' is a sequence of half-open intervals.
1286- return [line [from_ :to ].strip (self .delimiter ) for (from_ , to ) in self .colspecs ]
1319+ line = line .rstrip ("\r \n " )
1320+ if self .keep_whitespace :
1321+ return [line [from_ :to ] for (from_ , to ) in self .colspecs ]
1322+ else :
1323+ return [
1324+ line [from_ :to ].strip (self .whitespace_chars )
1325+ for (from_ , to ) in self .colspecs
1326+ ]
12871327
12881328
12891329class FixedWidthFieldParser (PythonParser ):
@@ -1296,6 +1336,8 @@ def __init__(self, f: ReadCsvBuffer[str], **kwds) -> None:
12961336 # Support iterators, convert to a list.
12971337 self .colspecs = kwds .pop ("colspecs" )
12981338 self .infer_nrows = kwds .pop ("infer_nrows" )
1339+ self .keep_whitespace = kwds .pop ("keep_whitespace" , True )
1340+ self .whitespace_chars = kwds .pop ("whitespace_chars" , " \t " )
12991341 PythonParser .__init__ (self , f , ** kwds )
13001342
13011343 def _make_reader (self , f : IO [str ] | ReadCsvBuffer [str ]) -> None :
@@ -1306,6 +1348,8 @@ def _make_reader(self, f: IO[str] | ReadCsvBuffer[str]) -> None:
13061348 self .comment ,
13071349 self .skiprows ,
13081350 self .infer_nrows ,
1351+ self .keep_whitespace ,
1352+ self .whitespace_chars ,
13091353 )
13101354
13111355 def _remove_empty_lines (self , lines : list [list [Scalar ]]) -> list [list [Scalar ]]:
0 commit comments