-
Notifications
You must be signed in to change notification settings - Fork 13
Closed
Description
Hello,
Am trying to load a CSV file from https://raw.githubusercontent.com/beoutbreakprepared/nCoV2019/master/latest_data/latestdata.tar.gz (https://github.com/beoutbreakprepared/nCoV2019/tree/master/latest_data).
Opening file locally works no problem, but trying to do so via CSVFiles leads to following error:
ERROR: LoadError: CSV parsing error in tar/latestdata.csv at line 14455 char 310:
.../world-asia-51345855; San Lazaro Hospital,True,"""thought to have had other pre-existing conditions""...
____________________________________________________^
column 20 is expected to be: TextParse.Field{String,TextParse.Quoted{String,TextParse.Quoted{String,TextParse.StringToken{String},UInt8,UInt8},UInt8,UInt8}}(""<string>"", true, true, false)
Stacktrace:
[1] parsefill!(::TextParse.VectorBackedUTF8String, ::TextParse.LocalOpts{UInt8,UInt8,UInt8}, ::TextParse.Record{Tuple{TextParse.Field{String,TextParse.Quoted{String,TextParse.StringToken{String},UInt8,UInt8}},TextParse.Field{String,TextParse.Quoted{String,TextParse.StringToken{String},UInt8,UInt8}},TextParse.Field{String,TextParse.Quoted{String,TextParse.StringToken{String},UInt8,UInt8}},TextParse.Field{String,TextParse.Quoted{String,TextParse.StringToken{String},UInt8,UInt8}},TextParse.Field{String,TextParse.Quoted{String,TextParse.StringToken{String},UInt8,UInt8}},TextParse.Field{String,TextParse.Quoted{String,TextParse.StringToken{String},UInt8,UInt8}},TextParse.Field{Union{Missing, Float64},TextParse.NAToken{Union{Missing, Float64},TextParse.Numeric{Float64}}},TextParse.Field{Union{Missing, Float64},TextParse.NAToken{Union{Missing, Float64},TextParse.Numeric{Float64}}},TextParse.Field{String,TextParse.Quoted{String,TextParse.StringToken{String},UInt8,UInt8}},TextParse.Field{String,TextParse.Quoted{String,TextParse.StringToken{String},UInt8,UInt8}},TextParse.Field{String,TextParse.Quoted{String,TextParse.StringToken{String},UInt8,UInt8}},TextParse.Field{String,TextParse.Quoted{String,TextParse.StringToken{String},UInt8,UInt8}},TextParse.Field{String,TextParse.Quoted{String,TextParse.Quoted{String,TextParse.StringToken{String},UInt8,UInt8},UInt8,UInt8}},TextParse.Field{String,TextParse.Quoted{String,TextParse.StringToken{String},UInt8,UInt8}},TextParse.Field{String,TextParse.Quoted{String,TextParse.StringToken{String},UInt8,UInt8}},TextParse.Field{String,TextParse.Quoted{String,TextParse.StringToken{String},UInt8,UInt8}},TextParse.Field{String,TextParse.Quoted{String,TextParse.StringToken{String},UInt8,UInt8}},TextParse.Field{String,TextParse.Quoted{String,TextParse.StringToken{String},UInt8,UInt8}},TextParse.Field{String,TextParse.Quoted{String,TextParse.StringToken{String},UInt8,UInt8}},TextParse.Field{String,TextParse.Quoted{String,TextParse.Quoted{String,TextParse.StringToken{String},UInt8,UInt8},UInt8,UInt8}},TextParse.Field{String,TextParse.Quoted{String,TextParse.StringToken{String},UInt8,UInt8}},TextParse.Field{String,TextParse.Quoted{String,TextParse.Quoted{String,TextParse.StringToken{String},UInt8,UInt8},UInt8,UInt8}},TextParse.Field{String,TextParse.Quoted{String,TextParse.StringToken{String},UInt8,UInt8}},TextParse.Field{String,TextParse.Quoted{String,TextParse.StringToken{String},UInt8,UInt8}},TextParse.Field{String,TextParse.Quoted{String,TextParse.Quoted{String,TextParse.StringToken{String},UInt8,UInt8},UInt8,UInt8}},TextParse.Field{String,TextParse.Quoted{String,TextParse.StringToken{String},UInt8,UInt8}},TextParse.Field{String,TextParse.Quoted{String,TextParse.StringToken{String},UInt8,UInt8}},TextParse.Field{String,TextParse.Quoted{String,TextParse.StringToken{String},UInt8,UInt8}},TextParse.Field{String,TextParse.Quoted{String,TextParse.StringToken{String},UInt8,UInt8}},TextParse.Field{String,TextParse.Quoted{String,TextParse.StringToken{String},UInt8,UInt8}},TextParse.Field{Union{Missing, Float64},TextParse.NAToken{Union{Missing, Float64},TextParse.Numeric{Float64}}},TextParse.Field{Missing,TextParse.NAToken{Missing,TextParse.Unknown}},TextParse.Field{Missing,TextParse.NAToken{Missing,TextParse.Unknown}}},Tuple{String,String,String,String,String,String,Union{Missing, Float64},Union{Missing, Float64},String,String,String,String,String,String,String,String,String,String,String,String,String,String,String,String,String,String,String,String,String,String,Union{Missing, Float64},Missing,Missing}}, ::Int64, ::Tuple{Array{String,1},Array{String,1},Array{String,1},Array{String,1},Array{String,1},Array{String,1},Array{Union{Missing, Float64},1},Array{Union{Missing, Float64},1},Array{String,1},Array{String,1},Array{String,1},Array{String,1},Array{String,1},Array{String,1},Array{String,1},Array{String,1},Array{String,1},Array{String,1},Array{String,1},Array{String,1},Array{String,1},Array{String,1},Array{String,1},Array{String,1},Array{String,1},Array{String,1},Array{String,1},Array{String,1},Array{String,1},Array{String,1},Array{Union{Missing, Float64},1},Array{Missing,1},Array{Missing,1}}, ::OrderedCollections.OrderedDict{Union{Int64, String},Union{Nothing, AbstractArray{T,1} where T}}, ::Int64, ::Int64, ::Int64, ::Int64, ::Nothing) at /Users/yoh/.julia/packages/TextParse/EETm2/src/csv.jl:604
[2] _csvread_internal(::TextParse.VectorBackedUTF8String, ::Char; spacedelim::Bool, quotechar::Char, escapechar::Char, commentchar::Nothing, stringtype::Type{T} where T, stringarraytype::Type{T} where T, noresize::Bool, rowno::Int64, prevheaders::Nothing, pooledstrings::Nothing, skiplines_begin::Int64, samecols::Nothing, header_exists::Bool, nastrings::Array{String,1}, colnames::Array{String,1}, colspool::OrderedCollections.OrderedDict{Union{Int64, String},Union{Nothing, AbstractArray{T,1} where T}}, row_estimate::Int64, prev_parsers::Nothing, colparsers::Array{Any,1}, filename::String, type_detect_rows::Int64) at /Users/yoh/.julia/packages/TextParse/EETm2/src/csv.jl:338
[3] (::TextParse.var"#34#36"{Base.Iterators.Pairs{Symbol,Any,Tuple{Symbol,Symbol,Symbol},NamedTuple{(:stringarraytype, :quotechar, :escapechar),Tuple{UnionAll,Char,Char}}},String,Char})(::IOStream) at /Users/yoh/.julia/packages/TextParse/EETm2/src/csv.jl:117
[4] open(::TextParse.var"#34#36"{Base.Iterators.Pairs{Symbol,Any,Tuple{Symbol,Symbol,Symbol},NamedTuple{(:stringarraytype, :quotechar, :escapechar),Tuple{UnionAll,Char,Char}}},String,Char}, ::String, ::Vararg{String,N} where N; kwargs::Base.Iterators.Pairs{Union{},Union{},Tuple{},NamedTuple{(),Tuple{}}}) at ./io.jl:298
[5] open at ./io.jl:296 [inlined]
[6] #_csvread_f#32 at /Users/yoh/.julia/packages/TextParse/EETm2/src/csv.jl:114 [inlined]
[7] csvread(::String, ::Char; kwargs::Base.Iterators.Pairs{Symbol,Any,Tuple{Symbol,Symbol,Symbol},NamedTuple{(:stringarraytype, :quotechar, :escapechar),Tuple{UnionAll,Char,Char}}}) at /Users/yoh/.julia/packages/TextParse/EETm2/src/csv.jl:80
[8] _loaddata(::CSVFiles.CSVFile) at /Users/yoh/.julia/packages/CSVFiles/C68zw/src/CSVFiles.jl:103
[9] get_columns_copy_using_missing(::CSVFiles.CSVFile) at /Users/yoh/.julia/packages/CSVFiles/C68zw/src/CSVFiles.jl:116
[10] columns at /Users/yoh/.julia/packages/Tables/okt7x/src/fallbacks.jl:231 [inlined]
[11] DataFrames.DataFrame(::CSVFiles.CSVFile; copycols::Bool) at /Users/yoh/.julia/packages/DataFrames/S3ZFo/src/other/tables.jl:40
Here below is code that I am using:
import CodecZlib
import CSVFiles
import DataFrames
import HTTP
import Tar
local_csv_filename::String = "tar/latestdata.csv"
local_downloaded_filename::String = "latestdata.tar.gz"
local_tar_dir = "./tar"
HTTP.download("https://raw.githubusercontent.com/beoutbreakprepared/nCoV2019/master/latest_data/latestdata.tar.gz", "./$local_downloaded_filename")
Tar.extract(
CodecZlib.GzipDecompressorStream(
open(local_downloaded_filename, "r")
),
local_tar_dir
)
df::DataFrames.DataFrame = DataFrames.DataFrame(
CSVFiles.load(
local_csv_filename,
delim=',',
quotechar='"',
escapechar='"'
)
)When opening file manually I did see some "" in string columns of data.
Not sure if this is causing problem.
Thank you!
Metadata
Metadata
Assignees
Labels
No labels