diff --git a/pandas/_libs/parsers.pyx b/pandas/_libs/parsers.pyx index 8b9842ba087a5..aed1ee1c6f6e5 100644 --- a/pandas/_libs/parsers.pyx +++ b/pandas/_libs/parsers.pyx @@ -638,18 +638,19 @@ cdef class TextReader: elif len(zip_names) == 0: raise ValueError('Zero files found in compressed ' - 'zip file %s', source) + 'zip file {source}'.format(source=source)) else: raise ValueError('Multiple files found in compressed ' - 'zip file %s', str(zip_names)) + 'zip file {zip_names}' + .format(zip_names=str(zip_names))) elif self.compression == 'xz': if isinstance(source, str): source = _get_lzma_file(lzma)(source, 'rb') else: source = _get_lzma_file(lzma)(filename=source) else: - raise ValueError('Unrecognized compression type: %s' % - self.compression) + raise ValueError('Unrecognized compression type: {compression_type}' + .format(compression_type=self.compression)) if b'utf-16' in (self.encoding or b''): # we need to read utf-16 through UTF8Recoder. @@ -703,8 +704,10 @@ cdef class TextReader: self.parser.cb_io = &buffer_rd_bytes self.parser.cb_cleanup = &del_rd_source else: - raise IOError('Expected file path name or file-like object,' - ' got %s type' % type(source)) + raise IOError('Expected file path name or file-like object, ' + 'got {source_type} type' + .format(source_type=type(source)) + ) cdef _get_header(self): # header is now a list of lists, so field_count should use header[0] @@ -741,11 +744,12 @@ cdef class TextReader: self.parser.lines < hr): msg = self.orig_header if isinstance(msg, list): - msg = "[%s], len of %d," % ( - ','.join(str(m) for m in msg), len(msg)) + msg = "[{msg_lst}], len of {orig_header_len},".format( + msg_lst=(','.join(str(m) for m in msg)), + orig_msg_len=len(msg)) raise ParserError( - 'Passed header=%s but only %d lines in file' - % (msg, self.parser.lines)) + 'Passed header={msg} but only {line_count} lines in file' + .format(msg=msg, line_count=self.parser.lines)) else: field_count = self.parser.line_fields[hr] @@ -779,7 +783,10 @@ cdef class TextReader: if not self.has_mi_columns and self.mangle_dupe_cols: while count > 0: counts[name] = count + 1 - name = '%s.%d' % (name, count) + name = '{name}.{count}'.format( + name=name, + count=count + ) count = counts.get(name, 0) if old_name == '': @@ -990,7 +997,9 @@ cdef class TextReader: cdef _end_clock(self, what): if self.verbose: elapsed = time.time() - self.clocks.pop(-1) - print('%s took: %.2f ms' % (what, elapsed * 1000)) + print('{what} took: {elapsed} ms' + .format(what=what, elapsed=round(elapsed * 1000, 2)) + ) def set_noconvert(self, i): self.noconvert.add(i) @@ -1662,7 +1671,8 @@ cdef _to_fw_string(parser_t *parser, int64_t col, int64_t line_start, char *data ndarray result - result = np.empty(line_end - line_start, dtype='|S%d' % width) + result = np.empty(line_end - line_start, + dtype='|S{width}'.format(width=width)) data = result.data with nogil: @@ -2176,8 +2186,9 @@ def _concatenate_chunks(list chunks): if warning_columns: warning_names = ','.join(warning_columns) warning_message = " ".join([ - "Columns (%s) have mixed types." % warning_names, + "Columns {col_name} have mixed types." "Specify dtype option on import or set low_memory=False." + .format(col_name=warning_names) ]) warnings.warn(warning_message, DtypeWarning, stacklevel=8) return result