Skip to content

Commit

Permalink
JSON read support (#236)
Browse files Browse the repository at this point in the history
  • Loading branch information
WillAyd authored Jan 21, 2024
1 parent cc9fe23 commit 328f359
Show file tree
Hide file tree
Showing 2 changed files with 47 additions and 2 deletions.
5 changes: 3 additions & 2 deletions pantab/src/pantab.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -666,8 +666,9 @@ static auto arrowTypeFromHyper(const hyperapi::SqlType &sqltype)
case hyperapi::TypeTag::Oid : return NANOARROW_TYPE_UINT32;
case hyperapi::TypeTag::Double : return NANOARROW_TYPE_DOUBLE;
case hyperapi::TypeTag::Bytes : return NANOARROW_TYPE_LARGE_BINARY;
case hyperapi::TypeTag::Varchar : case hyperapi::TypeTag::Char :
case hyperapi::TypeTag::Text : return NANOARROW_TYPE_LARGE_STRING;
case hyperapi::TypeTag::Varchar : case hyperapi::TypeTag::
Char : case hyperapi::TypeTag::Text : case hyperapi::TypeTag::
Json : return NANOARROW_TYPE_LARGE_STRING;
case hyperapi::TypeTag::Bool : return NANOARROW_TYPE_BOOL;
case hyperapi::TypeTag::Date : return NANOARROW_TYPE_DATE32;
case hyperapi::TypeTag::Timestamp : case hyperapi::TypeTag::
Expand Down
44 changes: 44 additions & 0 deletions pantab/tests/test_reader.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
import json

import pandas as pd
import pandas.testing as tm
import pytest
Expand Down Expand Up @@ -158,3 +160,45 @@ def test_read_oid(tmp_hyper):

result = pantab.frame_from_hyper(tmp_hyper, table=table_name)
tm.assert_frame_equal(result, expected)


def test_read_json(tmp_hyper):
# Hyper just uses string to serialize/de-serialize, but we don't have an API
# yet for users to control writing JSON. So just testing the read until then
column_name = "JSON Column"
table_name = tab_api.TableName("public", "table")
table = tab_api.TableDefinition(
table_name=table_name,
columns=[
tab_api.TableDefinition.Column(
name=column_name,
type=tab_api.SqlType.json(),
nullability=tab_api.NOT_NULLABLE,
)
],
)

with tab_api.HyperProcess(
telemetry=tab_api.Telemetry.DO_NOT_SEND_USAGE_DATA_TO_TABLEAU
) as hyper:
with tab_api.Connection(
endpoint=hyper.endpoint,
database=tmp_hyper,
create_mode=tab_api.CreateMode.CREATE_AND_REPLACE,
) as connection:
connection.catalog.create_table(table_definition=table)

with tab_api.Inserter(connection, table) as inserter:
inserter.add_rows(
[[json.dumps({"foo": 42})], [json.dumps({"bar": -42})]]
)
inserter.execute()

expected = pd.DataFrame(
[[json.dumps({"foo": 42})], [json.dumps({"bar": -42})]],
columns=[column_name],
dtype="large_string[pyarrow]",
)

result = pantab.frame_from_hyper(tmp_hyper, table=table_name)
tm.assert_frame_equal(result, expected)

0 comments on commit 328f359

Please sign in to comment.