Skip to content

CSV file load error - no method matching UInt8(::String) #83

@yoh-meyers

Description

@yoh-meyers

Hello,

Am trying to load a CSV file from https://raw.githubusercontent.com/beoutbreakprepared/nCoV2019/master/latest_data/latestdata.tar.gz (https://github.com/beoutbreakprepared/nCoV2019/tree/master/latest_data).

Opening file locally works no problem, but trying to do so via CSVFiles leads to following error:

ERROR: LoadError: CSV parsing error in tar/latestdata.csv at line 14455 char 310:
.../world-asia-51345855; San Lazaro Hospital,True,"""thought to have had other pre-existing conditions""...
____________________________________________________^
column 20 is expected to be: TextParse.Field{String,TextParse.Quoted{String,TextParse.Quoted{String,TextParse.StringToken{String},UInt8,UInt8},UInt8,UInt8}}(""<string>"", true, true, false)
Stacktrace:
 [1] parsefill!(::TextParse.VectorBackedUTF8String, ::TextParse.LocalOpts{UInt8,UInt8,UInt8}, ::TextParse.Record{Tuple{TextParse.Field{String,TextParse.Quoted{String,TextParse.StringToken{String},UInt8,UInt8}},TextParse.Field{String,TextParse.Quoted{String,TextParse.StringToken{String},UInt8,UInt8}},TextParse.Field{String,TextParse.Quoted{String,TextParse.StringToken{String},UInt8,UInt8}},TextParse.Field{String,TextParse.Quoted{String,TextParse.StringToken{String},UInt8,UInt8}},TextParse.Field{String,TextParse.Quoted{String,TextParse.StringToken{String},UInt8,UInt8}},TextParse.Field{String,TextParse.Quoted{String,TextParse.StringToken{String},UInt8,UInt8}},TextParse.Field{Union{Missing, Float64},TextParse.NAToken{Union{Missing, Float64},TextParse.Numeric{Float64}}},TextParse.Field{Union{Missing, Float64},TextParse.NAToken{Union{Missing, Float64},TextParse.Numeric{Float64}}},TextParse.Field{String,TextParse.Quoted{String,TextParse.StringToken{String},UInt8,UInt8}},TextParse.Field{String,TextParse.Quoted{String,TextParse.StringToken{String},UInt8,UInt8}},TextParse.Field{String,TextParse.Quoted{String,TextParse.StringToken{String},UInt8,UInt8}},TextParse.Field{String,TextParse.Quoted{String,TextParse.StringToken{String},UInt8,UInt8}},TextParse.Field{String,TextParse.Quoted{String,TextParse.Quoted{String,TextParse.StringToken{String},UInt8,UInt8},UInt8,UInt8}},TextParse.Field{String,TextParse.Quoted{String,TextParse.StringToken{String},UInt8,UInt8}},TextParse.Field{String,TextParse.Quoted{String,TextParse.StringToken{String},UInt8,UInt8}},TextParse.Field{String,TextParse.Quoted{String,TextParse.StringToken{String},UInt8,UInt8}},TextParse.Field{String,TextParse.Quoted{String,TextParse.StringToken{String},UInt8,UInt8}},TextParse.Field{String,TextParse.Quoted{String,TextParse.StringToken{String},UInt8,UInt8}},TextParse.Field{String,TextParse.Quoted{String,TextParse.StringToken{String},UInt8,UInt8}},TextParse.Field{String,TextParse.Quoted{String,TextParse.Quoted{String,TextParse.StringToken{String},UInt8,UInt8},UInt8,UInt8}},TextParse.Field{String,TextParse.Quoted{String,TextParse.StringToken{String},UInt8,UInt8}},TextParse.Field{String,TextParse.Quoted{String,TextParse.Quoted{String,TextParse.StringToken{String},UInt8,UInt8},UInt8,UInt8}},TextParse.Field{String,TextParse.Quoted{String,TextParse.StringToken{String},UInt8,UInt8}},TextParse.Field{String,TextParse.Quoted{String,TextParse.StringToken{String},UInt8,UInt8}},TextParse.Field{String,TextParse.Quoted{String,TextParse.Quoted{String,TextParse.StringToken{String},UInt8,UInt8},UInt8,UInt8}},TextParse.Field{String,TextParse.Quoted{String,TextParse.StringToken{String},UInt8,UInt8}},TextParse.Field{String,TextParse.Quoted{String,TextParse.StringToken{String},UInt8,UInt8}},TextParse.Field{String,TextParse.Quoted{String,TextParse.StringToken{String},UInt8,UInt8}},TextParse.Field{String,TextParse.Quoted{String,TextParse.StringToken{String},UInt8,UInt8}},TextParse.Field{String,TextParse.Quoted{String,TextParse.StringToken{String},UInt8,UInt8}},TextParse.Field{Union{Missing, Float64},TextParse.NAToken{Union{Missing, Float64},TextParse.Numeric{Float64}}},TextParse.Field{Missing,TextParse.NAToken{Missing,TextParse.Unknown}},TextParse.Field{Missing,TextParse.NAToken{Missing,TextParse.Unknown}}},Tuple{String,String,String,String,String,String,Union{Missing, Float64},Union{Missing, Float64},String,String,String,String,String,String,String,String,String,String,String,String,String,String,String,String,String,String,String,String,String,String,Union{Missing, Float64},Missing,Missing}}, ::Int64, ::Tuple{Array{String,1},Array{String,1},Array{String,1},Array{String,1},Array{String,1},Array{String,1},Array{Union{Missing, Float64},1},Array{Union{Missing, Float64},1},Array{String,1},Array{String,1},Array{String,1},Array{String,1},Array{String,1},Array{String,1},Array{String,1},Array{String,1},Array{String,1},Array{String,1},Array{String,1},Array{String,1},Array{String,1},Array{String,1},Array{String,1},Array{String,1},Array{String,1},Array{String,1},Array{String,1},Array{String,1},Array{String,1},Array{String,1},Array{Union{Missing, Float64},1},Array{Missing,1},Array{Missing,1}}, ::OrderedCollections.OrderedDict{Union{Int64, String},Union{Nothing, AbstractArray{T,1} where T}}, ::Int64, ::Int64, ::Int64, ::Int64, ::Nothing) at /Users/yoh/.julia/packages/TextParse/EETm2/src/csv.jl:604
 [2] _csvread_internal(::TextParse.VectorBackedUTF8String, ::Char; spacedelim::Bool, quotechar::Char, escapechar::Char, commentchar::Nothing, stringtype::Type{T} where T, stringarraytype::Type{T} where T, noresize::Bool, rowno::Int64, prevheaders::Nothing, pooledstrings::Nothing, skiplines_begin::Int64, samecols::Nothing, header_exists::Bool, nastrings::Array{String,1}, colnames::Array{String,1}, colspool::OrderedCollections.OrderedDict{Union{Int64, String},Union{Nothing, AbstractArray{T,1} where T}}, row_estimate::Int64, prev_parsers::Nothing, colparsers::Array{Any,1}, filename::String, type_detect_rows::Int64) at /Users/yoh/.julia/packages/TextParse/EETm2/src/csv.jl:338
 [3] (::TextParse.var"#34#36"{Base.Iterators.Pairs{Symbol,Any,Tuple{Symbol,Symbol,Symbol},NamedTuple{(:stringarraytype, :quotechar, :escapechar),Tuple{UnionAll,Char,Char}}},String,Char})(::IOStream) at /Users/yoh/.julia/packages/TextParse/EETm2/src/csv.jl:117
 [4] open(::TextParse.var"#34#36"{Base.Iterators.Pairs{Symbol,Any,Tuple{Symbol,Symbol,Symbol},NamedTuple{(:stringarraytype, :quotechar, :escapechar),Tuple{UnionAll,Char,Char}}},String,Char}, ::String, ::Vararg{String,N} where N; kwargs::Base.Iterators.Pairs{Union{},Union{},Tuple{},NamedTuple{(),Tuple{}}}) at ./io.jl:298
 [5] open at ./io.jl:296 [inlined]
 [6] #_csvread_f#32 at /Users/yoh/.julia/packages/TextParse/EETm2/src/csv.jl:114 [inlined]
 [7] csvread(::String, ::Char; kwargs::Base.Iterators.Pairs{Symbol,Any,Tuple{Symbol,Symbol,Symbol},NamedTuple{(:stringarraytype, :quotechar, :escapechar),Tuple{UnionAll,Char,Char}}}) at /Users/yoh/.julia/packages/TextParse/EETm2/src/csv.jl:80
 [8] _loaddata(::CSVFiles.CSVFile) at /Users/yoh/.julia/packages/CSVFiles/C68zw/src/CSVFiles.jl:103
 [9] get_columns_copy_using_missing(::CSVFiles.CSVFile) at /Users/yoh/.julia/packages/CSVFiles/C68zw/src/CSVFiles.jl:116
 [10] columns at /Users/yoh/.julia/packages/Tables/okt7x/src/fallbacks.jl:231 [inlined]
 [11] DataFrames.DataFrame(::CSVFiles.CSVFile; copycols::Bool) at /Users/yoh/.julia/packages/DataFrames/S3ZFo/src/other/tables.jl:40

Here below is code that I am using:

import CodecZlib
import CSVFiles
import DataFrames
import HTTP
import Tar

local_csv_filename::String = "tar/latestdata.csv"
local_downloaded_filename::String = "latestdata.tar.gz"
local_tar_dir = "./tar"

HTTP.download("https://raw.githubusercontent.com/beoutbreakprepared/nCoV2019/master/latest_data/latestdata.tar.gz", "./$local_downloaded_filename")

Tar.extract(
    CodecZlib.GzipDecompressorStream(
        open(local_downloaded_filename, "r")
    ),
    local_tar_dir
)
 
df::DataFrames.DataFrame = DataFrames.DataFrame(
    CSVFiles.load(
        local_csv_filename,
        delim=',',
        quotechar='"',
        escapechar='"'
    )
)

When opening file manually I did see some "" in string columns of data.
Not sure if this is causing problem.

Thank you!

Metadata

Metadata

Assignees

No one assigned

    Labels

    No labels
    No labels

    Type

    No type

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions