Skip to content
124 changes: 41 additions & 83 deletions pandas/io/parsers.py
Original file line number Diff line number Diff line change
Expand Up @@ -381,9 +381,7 @@ def _validate_integer(name, val, min_val=0):
min_val : int
Minimum allowed value (val < min_val will result in a ValueError)
"""
msg = "'{name:s}' must be an integer >={min_val:d}".format(
name=name, min_val=min_val
)
msg = f"'{name:s}' must be an integer >={min_val:d}"

if val is not None:
if is_float(val):
Expand Down Expand Up @@ -822,11 +820,7 @@ def __init__(self, f, engine=None, **kwds):
try:
dialect_val = getattr(dialect, param)
except AttributeError:
raise ValueError(
"Invalid dialect '{dialect}' provided".format(
dialect=kwds["dialect"]
)
)
raise ValueError(f"Invalid dialect {kwds['dialect']} provided")
parser_default = _parser_defaults[param]
provided = kwds.get(param, parser_default)

Expand All @@ -838,11 +832,9 @@ def __init__(self, f, engine=None, **kwds):
# even if it conflicts with the dialect (gh-23761).
if provided != parser_default and provided != dialect_val:
msg = (
"Conflicting values for '{param}': '{val}' was "
"provided, but the dialect specifies '{diaval}'. "
"Using the dialect-specified value.".format(
param=param, val=provided, diaval=dialect_val
)
f"Conflicting values for '{param}': '{provided}' was "
f"provided, but the dialect specifies '{dialect_val}'. "
"Using the dialect-specified value."
)

# Annoying corner case for not warning about
Expand Down Expand Up @@ -993,9 +985,9 @@ def _clean_options(self, options, engine):
encodeable = False
if not encodeable and engine not in ("python", "python-fwf"):
fallback_reason = (
"the separator encoded in {encoding} "
f"the separator encoded in {encoding} "
"is > 1 char long, and the 'c' engine "
"does not support such separators".format(encoding=encoding)
"does not support such separators"
)
engine = "python"

Expand Down Expand Up @@ -1025,19 +1017,19 @@ def _clean_options(self, options, engine):
for arg in _python_unsupported:
if fallback_reason and result[arg] != _c_parser_defaults[arg]:
raise ValueError(
f"Falling back to the 'python' engine because "
"Falling back to the 'python' engine because "
f"{fallback_reason}, but this causes {repr(arg)} to be "
f"ignored as it is not supported by the 'python' engine."
"ignored as it is not supported by the 'python' engine."
)
del result[arg]

if fallback_reason:
warnings.warn(
(
"Falling back to the 'python' engine because "
"{0}; you can avoid this warning by specifying "
f"{fallback_reason}; you can avoid this warning by specifying "
"engine='python'."
).format(fallback_reason),
),
ParserWarning,
stacklevel=5,
)
Expand All @@ -1058,7 +1050,7 @@ def _clean_options(self, options, engine):

msg = (
f"The {repr(arg)} argument has been deprecated and will be "
f"removed in a future version."
"removed in a future version."
)

if result.get(arg, depr_default) != depr_default:
Expand Down Expand Up @@ -1128,9 +1120,9 @@ def _make_engine(self, engine="c"):
klass = FixedWidthFieldParser
else:
raise ValueError(
"Unknown engine: {engine} (valid options are"
f"Unknown engine: {engine} (valid options are"
' "c", "python", or'
' "python-fwf")'.format(engine=engine)
' "python-fwf")'
)
self._engine = klass(self.f, **self.options)

Expand Down Expand Up @@ -1240,7 +1232,7 @@ def _validate_usecols_names(usecols, names):
if len(missing) > 0:
raise ValueError(
"Usecols do not match columns, "
"columns expected but not found: {missing}".format(missing=missing)
f"columns expected but not found: {missing}"
)

return usecols
Expand Down Expand Up @@ -1541,11 +1533,9 @@ def _maybe_dedup_names(self, names):
counts[col] = cur_count + 1

if is_potential_mi:
col = col[:-1] + (
"{column}.{count}".format(column=col[-1], count=cur_count),
)
col = col[:-1] + (f"{col[-1]}.{cur_count}",)
else:
col = "{column}.{count}".format(column=col, count=cur_count)
col = f"{col}.{cur_count}"
cur_count = counts[col]

names[i] = col
Expand Down Expand Up @@ -1591,7 +1581,7 @@ def _get_simple_index(self, data, columns):
def ix(col):
if not isinstance(col, str):
return col
raise ValueError("Index {col} invalid".format(col=col))
raise ValueError(f"Index {col} invalid")

to_remove = []
index = []
Expand All @@ -1615,11 +1605,7 @@ def _get_name(icol):
return icol

if col_names is None:
raise ValueError(
("Must supply column order to use {icol!s} as index").format(
icol=icol
)
)
raise ValueError(f"Must supply column order to use {icol!s} as index")

for i, c in enumerate(col_names):
if i == icol:
Expand Down Expand Up @@ -1695,9 +1681,9 @@ def _convert_to_ndarrays(
warnings.warn(
(
"Both a converter and dtype were specified "
"for column {0} - only the converter will "
f"for column {c} - only the converter will "
"be used"
).format(c),
),
ParserWarning,
stacklevel=7,
)
Expand Down Expand Up @@ -1735,22 +1721,15 @@ def _convert_to_ndarrays(
and not is_categorical_dtype(cast_type)
and na_count > 0
):
raise ValueError(
"Bool column has NA values in "
"column {column}".format(column=c)
)
raise ValueError(f"Bool column has NA values in column {c}")
except (AttributeError, TypeError):
# invalid input to is_bool_dtype
pass
cvals = self._cast_types(cvals, cast_type, c)

result[c] = cvals
if verbose and na_count:
print(
"Filled {count} NA values in column {c!s}".format(
count=na_count, c=c
)
)
print(f"Filled {na_count} NA values in column {c!s}")
return result

def _infer_types(self, values, na_values, try_num_bool=True):
Expand Down Expand Up @@ -1847,18 +1826,17 @@ def _cast_types(self, values, cast_type, column):
return array_type._from_sequence_of_strings(values, dtype=cast_type)
except NotImplementedError:
raise NotImplementedError(
"Extension Array: {ea} must implement "
f"Extension Array: {array_type} must implement "
"_from_sequence_of_strings in order "
"to be used in parser methods".format(ea=array_type)
"to be used in parser methods"
)

else:
try:
values = astype_nansafe(values, cast_type, copy=True, skipna=True)
except ValueError:
raise ValueError(
"Unable to convert column {column} to type "
"{cast_type}".format(column=column, cast_type=cast_type)
f"Unable to convert column {column} to type {cast_type}"
)
return values

Expand Down Expand Up @@ -1929,8 +1907,7 @@ def __init__(self, src, **kwds):
if self.names is None:
if self.prefix:
self.names = [
"{prefix}{i}".format(prefix=self.prefix, i=i)
for i in range(self._reader.table_width)
f"{self.prefix}{i}" for i in range(self._reader.table_width)
]
else:
self.names = list(range(self._reader.table_width))
Expand Down Expand Up @@ -2345,15 +2322,9 @@ def __init__(self, f, **kwds):
raise ValueError("Only length-1 decimal markers supported")

if self.thousands is None:
self.nonnum = re.compile(
r"[^-^0-9^{decimal}]+".format(decimal=self.decimal)
)
self.nonnum = re.compile(fr"[^-^0-9^{self.decimal}]+")
else:
self.nonnum = re.compile(
r"[^-^0-9^{thousands}^{decimal}]+".format(
thousands=self.thousands, decimal=self.decimal
)
)
self.nonnum = re.compile(fr"[^-^0-9^{self.thousands}^{self.decimal}]+")

def _set_no_thousands_columns(self):
# Create a set of column ids that are not to be stripped of thousands
Expand Down Expand Up @@ -2589,8 +2560,8 @@ def _infer_columns(self):
except StopIteration:
if self.line_pos < hr:
raise ValueError(
"Passed header={hr} but only {pos} lines in "
"file".format(hr=hr, pos=(self.line_pos + 1))
f"Passed header={hr} but only {self.line_pos + 1} lines in "
"file"
)

# We have an empty file, so check
Expand All @@ -2613,11 +2584,9 @@ def _infer_columns(self):
for i, c in enumerate(line):
if c == "":
if have_mi_columns:
col_name = "Unnamed: {i}_level_{level}".format(
i=i, level=level
)
col_name = f"Unnamed: {i}_level_{level}"
else:
col_name = "Unnamed: {i}".format(i=i)
col_name = f"Unnamed: {i}"

this_unnamed_cols.append(i)
this_columns.append(col_name)
Expand All @@ -2632,7 +2601,7 @@ def _infer_columns(self):

while cur_count > 0:
counts[col] = cur_count + 1
col = "{column}.{count}".format(column=col, count=cur_count)
col = f"{col}.{cur_count}"
cur_count = counts[col]

this_columns[i] = col
Expand Down Expand Up @@ -2697,12 +2666,7 @@ def _infer_columns(self):

if not names:
if self.prefix:
columns = [
[
"{prefix}{idx}".format(prefix=self.prefix, idx=i)
for i in range(ncols)
]
]
columns = [[f"{self.prefix}{i}" for i in range(ncols)]]
else:
columns = [list(range(ncols))]
columns = self._handle_usecols(columns, columns[0])
Expand Down Expand Up @@ -2904,7 +2868,7 @@ def _alert_malformed(self, msg, row_num):
if self.error_bad_lines:
raise ParserError(msg)
elif self.warn_bad_lines:
base = "Skipping line {row_num}: ".format(row_num=row_num)
base = f"Skipping line {row_num}: "
sys.stderr.write(base + msg + "\n")

def _next_iter_line(self, row_num):
Expand Down Expand Up @@ -3128,10 +3092,8 @@ def _rows_to_cols(self, content):

for row_num, actual_len in bad_lines:
msg = (
"Expected {col_len} fields in line {line}, saw "
"{length}".format(
col_len=col_len, line=(row_num + 1), length=actual_len
)
f"Expected {col_len} fields in line {row_num + 1}, saw "
f"{actual_len}"
)
if (
self.delimiter
Expand Down Expand Up @@ -3329,9 +3291,7 @@ def _isindex(colspec):
converter, colspec, data_dict, orig_names
)
if new_name in data_dict:
raise ValueError(
"New date column already in dict {name}".format(name=new_name)
)
raise ValueError(f"New date column already in dict {new_name}")
new_data[new_name] = col
new_cols.append(new_name)
date_cols.update(old_names)
Expand All @@ -3340,9 +3300,7 @@ def _isindex(colspec):
# dict of new name to column list
for new_name, colspec in parse_spec.items():
if new_name in data_dict:
raise ValueError(
"Date column {name} already in dict".format(name=new_name)
)
raise ValueError(f"Date column {new_name} already in dict")

_, col, old_names = _try_convert_dates(
converter, colspec, data_dict, orig_names
Expand Down Expand Up @@ -3521,7 +3479,7 @@ def _stringify_na_values(na_values):
# we are like 999 here
if v == int(v):
v = int(v)
result.append("{value}.0".format(value=v))
result.append(f"{v}.0")
result.append(str(v))

result.append(v)
Expand Down