Skip to content

Commit 616347c

Browse files
author
y-p
committed
CLN: preallocate data array only once
1 parent 0e42e46 commit 616347c

File tree

1 file changed

+10
-6
lines changed

1 file changed

+10
-6
lines changed

pandas/core/format.py

Lines changed: 10 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -798,6 +798,11 @@ def __init__(self, obj, path_or_buf, sep=",", na_rep='', float_format=None,
798798
cols=list(cols)
799799
self.cols = cols
800800

801+
# preallocate data 2d list
802+
self.blocks = self.obj._data.blocks
803+
ncols = sum(len(b.items) for b in self.blocks)
804+
self.data =[None] * ncols
805+
801806
# fail early if we have duplicate columns
802807
if len(set(self.cols)) != len(self.cols):
803808
raise Exception("duplicate columns are not permitted in to_csv")
@@ -1001,18 +1006,17 @@ def _save_chunk(self, start_i, end_i):
10011006
data_index = self.data_index
10021007

10031008
# create the data for a chunk
1004-
blocks = self.obj._data.blocks
1005-
data =[None] * sum(len(b.items) for b in blocks)
10061009
slicer = slice(start_i,end_i)
1007-
for i in range(len(blocks)):
1008-
b = blocks[i]
1010+
for i in range(len(self.blocks)):
1011+
b = self.blocks[i]
10091012
d = b.to_native_types(slicer=slicer, na_rep=self.na_rep, float_format=self.float_format)
10101013
for j, k in enumerate(b.items):
1011-
data[colname_map[k]] = d[j]
1014+
# self.data is a preallocated list
1015+
self.data[colname_map[k]] = d[j]
10121016

10131017
ix = data_index.to_native_types(slicer=slicer, na_rep=self.na_rep, float_format=self.float_format)
10141018

1015-
lib.write_csv_rows(data, ix, self.nlevels, self.cols, self.writer)
1019+
lib.write_csv_rows(self.data, ix, self.nlevels, self.cols, self.writer)
10161020

10171021
# from collections import namedtuple
10181022
# ExcelCell = namedtuple("ExcelCell",

0 commit comments

Comments
 (0)