Skip to content

Commit 0775902

Browse files
authored
Merge pull request #59 from queryverse/savestreaming2
Add support for savestreaming method, take 2
2 parents 03dc201 + 76afde8 commit 0775902

File tree

3 files changed

+71
-2
lines changed

3 files changed

+71
-2
lines changed

Project.toml

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -16,9 +16,10 @@ TextParse = "e0df1984-e451-5cb5-8b61-797a481e67e3"
1616

1717
[extras]
1818
Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40"
19+
DataFrames = "a93c6f00-e57d-5684-b7b6-d8193f3e46c0"
1920

2021
[targets]
21-
test = ["Test"]
22+
test = ["Test", "DataFrames"]
2223

2324
[compat]
2425
CodecZlib = "≥ 0.5.2"

src/csv_writer.jl

Lines changed: 46 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -95,3 +95,49 @@ end
9595
function fileio_save(s::FileIO.Stream{FileIO.format"TSV"}, data; delim='\t', quotechar='"', escapechar='"', nastring="NA", header=true)
9696
return _save(s.io, data, delim=delim, quotechar=quotechar, escapechar=escapechar, nastring=nastring, header=header)
9797
end
98+
99+
#
100+
# Streaming version writes header (if any) on first call, then appends on subsequent calls.
101+
#
102+
const CSV_or_TSV = Union{FileIO.format"CSV", FileIO.format"TSV"}
103+
104+
_delim(T) = T <: FileIO.format"CSV" ? ',' : '\t'
105+
106+
mutable struct CSVFileSaveStream{T}
107+
io::T
108+
first_data_written::Bool
109+
delim::Char
110+
quotechar::Char
111+
escapechar::Char
112+
nastring::AbstractString
113+
header::Bool
114+
end
115+
116+
function fileio_savestreaming(f::FileIO.File{T}, data=nothing; delim=_delim(T), quotechar='"', escapechar='"', nastring="NA",
117+
header=true) where T <: CSV_or_TSV
118+
io = open(f.filename, "w")
119+
120+
if data!==nothing
121+
_save(io, data; delim=delim, quotechar=quotechar, escapechar=escapechar, nastring=nastring, header=header)
122+
end
123+
124+
return CSVFileSaveStream(io, data!==nothing, delim, quotechar, escapechar, nastring, header)
125+
end
126+
127+
function fileio_savestreaming(s::FileIO.Stream{T}, data=nothing; delim=_delim(T), quotechar='"', escapechar='"', nastring="NA",
128+
header=false) where T <: CSV_or_TSV
129+
130+
if data!==nothing
131+
_save(s.io, data; delim=delim, quotechar=quotechar, escapechar=escapechar, nastring=nastring, header=header)
132+
end
133+
134+
return CSVFileSaveStream(s.io, data!==nothing, delim, quotechar, escapechar, nastring, header)
135+
end
136+
137+
function Base.write(s::CSVFileSaveStream, data)
138+
_save(s.io, data; delim=s.delim, quotechar=s.quotechar, escapechar=s.escapechar, nastring=s.nastring, header=s.first_data_written ? false : header)
139+
end
140+
141+
function Base.close(s::CSVFileSaveStream)
142+
close(s.io)
143+
end

test/runtests.jl

Lines changed: 23 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -92,7 +92,6 @@ end
9292
output_filename4 = tempname() * ".csv"
9393

9494
try
95-
@show output_filename4
9695
array |> save(output_filename4, quotechar=nothing)
9796

9897
finally
@@ -212,8 +211,31 @@ end
212211
@test showable("text/html", x2) == true
213212
@test showable("application/vnd.dataresource+json", x2) == true
214213
end
214+
215+
end
216+
217+
@testset "savestreaming" begin
218+
using DataFrames
215219

220+
df = DataFrame(A = 1:2:1000, B = repeat(1:10, inner=50), C = 1:500)
221+
df1 = df[1:5, :]
222+
df2 = df[6:10, :]
223+
224+
# Test both csv and tsv formats
225+
for ext in ("csv", "tsv")
226+
fname = "output.$ext"
227+
s = savestreaming(fname, df1)
228+
write(s, df2)
229+
write(s, df2) # add this slice twice
230+
close(s)
216231

232+
new_df = DataFrame(load(fname))
233+
@test new_df[1:5,:] == df1
234+
@test new_df[6:10,:] == df2
235+
@test new_df[11:15,:] == df2
236+
237+
rm(fname)
238+
end
217239
end
218240

219241
end # Outer-most testset

0 commit comments

Comments
 (0)