From 37927b21eb62e019c3bec326bdc9abd02245d499 Mon Sep 17 00:00:00 2001 From: foxfluff Date: Tue, 21 Feb 2017 15:40:20 -0500 Subject: [PATCH 1/4] #29614: Rename and reimplement DictReader. Added TableReader that offers non-destructive behavior for csv documents with duplicate fieldname values. --- Lib/csv.py | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/Lib/csv.py b/Lib/csv.py index 6a8587674fe02b..bce7ef77226993 100644 --- a/Lib/csv.py +++ b/Lib/csv.py @@ -18,8 +18,8 @@ "Error", "Dialect", "__doc__", "excel", "excel_tab", "field_size_limit", "reader", "writer", "register_dialect", "get_dialect", "list_dialects", "Sniffer", - "unregister_dialect", "__version__", "DictReader", "DictWriter", - "unix_dialect"] + "unregister_dialect", "__version__", "TableReader", "DictReader", + "DictWriter", "unix_dialect"] class Dialect: """Describe a CSV dialect. @@ -78,7 +78,7 @@ class unix_dialect(Dialect): register_dialect("unix", unix_dialect) -class DictReader: +class TableReader: def __init__(self, f, fieldnames=None, restkey=None, restval=None, dialect="excel", *args, **kwds): self._fieldnames = fieldnames # list of keys for the dict @@ -117,7 +117,7 @@ def __next__(self): # values while row == []: row = next(self.reader) - d = OrderedDict(zip(self.fieldnames, row)) + d = tuple(zip(self.fieldnames, row)) lf = len(self.fieldnames) lr = len(row) if lf < lr: @@ -128,6 +128,11 @@ def __next__(self): return d +class DictReader(TableReader): + def __next__(self): + return OrderedDict(super().__next__()) + + class DictWriter: def __init__(self, f, fieldnames, restval="", extrasaction="raise", dialect="excel", *args, **kwds): From 9aa3d4119fd887d1c3fb997a99dde0fc2c45b779 Mon Sep 17 00:00:00 2001 From: Luma Date: Tue, 21 Feb 2017 16:36:10 -0500 Subject: [PATCH 2/4] bpo-29614: Fix bugs based on test failures. --- Lib/csv.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Lib/csv.py b/Lib/csv.py index bce7ef77226993..36f302a83d6818 100644 --- a/Lib/csv.py +++ b/Lib/csv.py @@ -117,7 +117,7 @@ def __next__(self): # values while row == []: row = next(self.reader) - d = tuple(zip(self.fieldnames, row)) + d = list(zip(self.fieldnames, row)) lf = len(self.fieldnames) lr = len(row) if lf < lr: From 55930fcf4c8e2af329def2ef374242220c12eaf6 Mon Sep 17 00:00:00 2001 From: foxfluff Date: Tue, 21 Feb 2017 17:01:46 -0500 Subject: [PATCH 3/4] Additional fixes due to attempted usage of non integers for index assignment. --- Lib/csv.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Lib/csv.py b/Lib/csv.py index 36f302a83d6818..8d2d2400c02b29 100644 --- a/Lib/csv.py +++ b/Lib/csv.py @@ -121,10 +121,10 @@ def __next__(self): lf = len(self.fieldnames) lr = len(row) if lf < lr: - d[self.restkey] = row[lf:] + d.extend((self.restkey, entry) for entry in row[lf:]) elif lf > lr: for key in self.fieldnames[lr:]: - d[key] = self.restval + d.append((key, self.restval)) return d From 48d3ce12a6b765d0a48472a38efe9aac0fccef4b Mon Sep 17 00:00:00 2001 From: foxfluff Date: Tue, 21 Feb 2017 17:27:28 -0500 Subject: [PATCH 4/4] Additional fixes based on testing failures. Originally implemented the wrong behavior if data and headers did not match up in length. --- Lib/csv.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Lib/csv.py b/Lib/csv.py index 8d2d2400c02b29..1fd15fad86cbe5 100644 --- a/Lib/csv.py +++ b/Lib/csv.py @@ -121,7 +121,7 @@ def __next__(self): lf = len(self.fieldnames) lr = len(row) if lf < lr: - d.extend((self.restkey, entry) for entry in row[lf:]) + d.append((self.restkey, row[lf:])) elif lf > lr: for key in self.fieldnames[lr:]: d.append((key, self.restval))