Skip to content

Commit 71a7578

Browse files
author
Shashi Gowda
committed
carry around all previously known column types in a dictionary.
Useful when some files lack some columns but the next ones have those
1 parent bdcecc4 commit 71a7578

File tree

1 file changed

+15
-12
lines changed

1 file changed

+15
-12
lines changed

src/csv.jl

Lines changed: 15 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -100,7 +100,7 @@ function csvread{T<:AbstractString}(files::AbstractVector{T},
100100
delim=','; kwargs...)
101101
@assert !isempty(files)
102102
colspool = ColsPool()
103-
cols, headers, rec, nrows = try
103+
cols, headers, parsers, nrows = try
104104
_csvread_f(files[1], delim;
105105
noresize=true,
106106
colspool=colspool,
@@ -117,9 +117,9 @@ function csvread{T<:AbstractString}(files::AbstractVector{T},
117117
n = ceil(Int, nrows * sqrt(2))
118118
resizecols(colspool, n)
119119
end
120-
cols, headers, rec, nrows = try
120+
cols, headers, parsers, nrows = try
121121
_csvread_f(f, delim; rowno=nrows+1, colspool=colspool,
122-
prevheaders=headers, noresize=true, rec=rec, kwargs...)
122+
prevheaders=headers, noresize=true, prev_parsers=parsers, kwargs...)
123123
catch err
124124
println(STDERR, "Error parsing $(f)")
125125
rethrow(err)
@@ -149,7 +149,7 @@ function _csvread_internal(str::AbstractString, delim=',';
149149
colspool = ColsPool(),
150150
nrows = !isempty(colspool) ?
151151
length(first(colspool)[2]) : 0,
152-
rec = nothing,
152+
prev_parsers = nothing,
153153
colparsers=[],
154154
filename=nothing,
155155
type_detect_rows=20)
@@ -191,12 +191,9 @@ function _csvread_internal(str::AbstractString, delim=',';
191191
end
192192

193193
# seed guesses using those from previous file
194-
prevs = rec !== nothing ?
195-
Dict(zip(prevheaders, map(x->x.inner, rec.fields))) : nothing
196-
197194
guess, pos1 = guesscolparsers(str, canonnames, opts,
198195
pos, type_detect_rows, colparsers,
199-
nastrings, prevs)
196+
nastrings, prev_parsers)
200197

201198
if isempty(canonnames)
202199
canonnames = Any[1:length(guess);]
@@ -206,10 +203,11 @@ function _csvread_internal(str::AbstractString, delim=',';
206203
c = get(canonnames, i, i)
207204
# Make column nullable if it's showing up for the
208205
# first time, but not in the first file
209-
if !(fieldtype(v) <: StringLike) && rec !== nothing && !haskey(colspool, c)
206+
if !(fieldtype(v) <: StringLike) && prev_parsers !== nothing && !haskey(colspool, c)
210207
v = isa(v, NAToken) ? v : NAToken(v)
211208
end
212-
guess[i] = tofield(v, opts)
209+
p = tofield(v, opts)
210+
guess[i] = p
213211
end
214212

215213
# the last field is delimited by line end
@@ -246,7 +244,7 @@ function _csvread_internal(str::AbstractString, delim=',';
246244
rowno-1,
247245
fieldtype(f))
248246
catch err
249-
error("Could not convert column $c of type $(eltype(colspool[c])) to type $(fieldtype(f))")
247+
error("Could not convert column $c of eltype $(eltype(colspool[c])) to eltype $(fieldtype(f))")
250248
end
251249
end
252250
else
@@ -398,7 +396,12 @@ function _csvread_internal(str::AbstractString, delim=',';
398396

399397
end
400398

401-
cols, canonnames, rec, finalrows
399+
parsers = prev_parsers === nothing ? Dict() : copy(prev_parsers)
400+
for i in 1:length(rec.fields)
401+
name = get(canonnames, i, i)
402+
parsers[name] = rec.fields[i].inner
403+
end
404+
cols, canonnames, parsers, finalrows
402405
end
403406

404407
function promote_field(failed_str, field, col, err, nastrings)

0 commit comments

Comments
 (0)