From 38edc534ca5b3404327efe497736f3bf8066e850 Mon Sep 17 00:00:00 2001 From: Jacob Quinn Date: Mon, 2 Dec 2024 22:49:24 -0700 Subject: [PATCH] Fix columntable materialization on stored schema Fixes https://github.com/JuliaData/Tables.jl/issues/357. The issue here is for stored schema, the type of the schema is `Schema{nothing, nothing}` which usually indicates tables with many columns. Some tables implementations, however, like ARFFFiles.jl, may choose to explicitly store _all_ schemas, even for very narrow tables. We already have a generated branch which checks for a specialization threshold for the known-schema case, so the fix here is fairly straightforward in just actually checking if the stored schema # of columns is actually too many or not. In the end, users should be aware that `Tables.columntable` isn't a perfect, 100% kind of table implementation that is always expected to work. It was originally meant as just a test implementation that then turned out to be fairly convenient for REPL use. Users should note that generating a named tuple of columns from stored schema doesn't have a way to be particularly efficient, since it necessarily has to generate the NamedTuple type at runtime. --- src/namedtuples.jl | 11 ++++++++--- test/runtests.jl | 11 +++++++++++ 2 files changed, 19 insertions(+), 3 deletions(-) diff --git a/src/namedtuples.jl b/src/namedtuples.jl index 8bdc793..9befe76 100644 --- a/src/namedtuples.jl +++ b/src/namedtuples.jl @@ -176,9 +176,14 @@ function columntable(sch::Schema{names, types}, cols) where {names, types} end end -# extremely large tables -columntable(sch::Schema{nothing, nothing}, cols) = - throw(ArgumentError("input table too wide ($(length(sch.names)) columns) to convert to `NamedTuple` of `AbstractVector`s")) +# extremely large tables or schema explicitly stored +function columntable(sch::Schema{nothing, nothing}, cols) + nms = sch.names + if nms !== nothing && length(nms) > SPECIALIZATION_THRESHOLD + throw(ArgumentError("input table too wide ($(length(nms)) columns) to convert to `NamedTuple` of `AbstractVector`s")) + end + return NamedTuple{Tuple(map(Symbol, nms))}(Tuple(getarray(getcolumn(cols, nms[i])) for i = 1:length(nms))) +end # unknown schema case columntable(::Nothing, cols) = diff --git a/test/runtests.jl b/test/runtests.jl index 832dc80..7c16863 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -1040,3 +1040,14 @@ end @test DataAPI.nrow(Tables.dictrowtable([(a=1, b=2), (a=3, b=4), (a=5, b=6)])) == 3 @test DataAPI.ncol(Tables.dictrowtable([(a=1, b=2), (a=3, b=4), (a=5, b=6)])) == 2 end + +@testset "#357" begin + dct = Tables.dictcolumntable((a=1:3, b=4.0:6.0, c=["7", "8", "9"])) + sch = Tables.schema(dct) + sch = Tables.Schema(sch.names, sch.types, stored=true) + dct = Tables.DictColumnTable(sch, getfield(dct, :values)) + nt = Tables.columntable(dct) + @test nt.a == [1, 2, 3] + @test nt.b == [4.0, 5.0, 6.0] + @test nt.c == ["7", "8", "9"] +end