From 328f359135172f077dec24636e6e6884aa8068f8 Mon Sep 17 00:00:00 2001 From: William Ayd Date: Sun, 21 Jan 2024 13:43:58 -0500 Subject: [PATCH] JSON read support (#236) --- pantab/src/pantab.cpp | 5 +++-- pantab/tests/test_reader.py | 44 +++++++++++++++++++++++++++++++++++++ 2 files changed, 47 insertions(+), 2 deletions(-) diff --git a/pantab/src/pantab.cpp b/pantab/src/pantab.cpp index 3c6a8c07..68ee83fd 100644 --- a/pantab/src/pantab.cpp +++ b/pantab/src/pantab.cpp @@ -666,8 +666,9 @@ static auto arrowTypeFromHyper(const hyperapi::SqlType &sqltype) case hyperapi::TypeTag::Oid : return NANOARROW_TYPE_UINT32; case hyperapi::TypeTag::Double : return NANOARROW_TYPE_DOUBLE; case hyperapi::TypeTag::Bytes : return NANOARROW_TYPE_LARGE_BINARY; - case hyperapi::TypeTag::Varchar : case hyperapi::TypeTag::Char : - case hyperapi::TypeTag::Text : return NANOARROW_TYPE_LARGE_STRING; + case hyperapi::TypeTag::Varchar : case hyperapi::TypeTag:: + Char : case hyperapi::TypeTag::Text : case hyperapi::TypeTag:: + Json : return NANOARROW_TYPE_LARGE_STRING; case hyperapi::TypeTag::Bool : return NANOARROW_TYPE_BOOL; case hyperapi::TypeTag::Date : return NANOARROW_TYPE_DATE32; case hyperapi::TypeTag::Timestamp : case hyperapi::TypeTag:: diff --git a/pantab/tests/test_reader.py b/pantab/tests/test_reader.py index 542395bf..03007e2b 100644 --- a/pantab/tests/test_reader.py +++ b/pantab/tests/test_reader.py @@ -1,3 +1,5 @@ +import json + import pandas as pd import pandas.testing as tm import pytest @@ -158,3 +160,45 @@ def test_read_oid(tmp_hyper): result = pantab.frame_from_hyper(tmp_hyper, table=table_name) tm.assert_frame_equal(result, expected) + + +def test_read_json(tmp_hyper): + # Hyper just uses string to serialize/de-serialize, but we don't have an API + # yet for users to control writing JSON. So just testing the read until then + column_name = "JSON Column" + table_name = tab_api.TableName("public", "table") + table = tab_api.TableDefinition( + table_name=table_name, + columns=[ + tab_api.TableDefinition.Column( + name=column_name, + type=tab_api.SqlType.json(), + nullability=tab_api.NOT_NULLABLE, + ) + ], + ) + + with tab_api.HyperProcess( + telemetry=tab_api.Telemetry.DO_NOT_SEND_USAGE_DATA_TO_TABLEAU + ) as hyper: + with tab_api.Connection( + endpoint=hyper.endpoint, + database=tmp_hyper, + create_mode=tab_api.CreateMode.CREATE_AND_REPLACE, + ) as connection: + connection.catalog.create_table(table_definition=table) + + with tab_api.Inserter(connection, table) as inserter: + inserter.add_rows( + [[json.dumps({"foo": 42})], [json.dumps({"bar": -42})]] + ) + inserter.execute() + + expected = pd.DataFrame( + [[json.dumps({"foo": 42})], [json.dumps({"bar": -42})]], + columns=[column_name], + dtype="large_string[pyarrow]", + ) + + result = pantab.frame_from_hyper(tmp_hyper, table=table_name) + tm.assert_frame_equal(result, expected)