Skip to content

Commit

Permalink
Add support for reading geography (#239)
Browse files Browse the repository at this point in the history
  • Loading branch information
WillAyd authored Jan 22, 2024
1 parent 462166f commit 63ba24e
Show file tree
Hide file tree
Showing 3 changed files with 69 additions and 13 deletions.
3 changes: 2 additions & 1 deletion pantab/src/pantab.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -709,7 +709,8 @@ static auto arrowTypeFromHyper(const hyperapi::SqlType &sqltype)
case hyperapi::TypeTag::BigInt : return NANOARROW_TYPE_INT64;
case hyperapi::TypeTag::Oid : return NANOARROW_TYPE_UINT32;
case hyperapi::TypeTag::Double : return NANOARROW_TYPE_DOUBLE;
case hyperapi::TypeTag::Bytes : return NANOARROW_TYPE_LARGE_BINARY;
case hyperapi::TypeTag::Geography : case hyperapi::TypeTag::
Bytes : return NANOARROW_TYPE_LARGE_BINARY;
case hyperapi::TypeTag::Varchar : case hyperapi::TypeTag::
Char : case hyperapi::TypeTag::Text : case hyperapi::TypeTag::
Json : return NANOARROW_TYPE_LARGE_STRING;
Expand Down
Binary file removed pantab/tests/data/geography.hyper
Binary file not shown.
79 changes: 67 additions & 12 deletions pantab/tests/test_reader.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,6 @@

import pandas as pd
import pandas.testing as tm
import pytest
import tableauhyperapi as tab_api

import pantab
Expand All @@ -20,17 +19,6 @@ def test_read_doesnt_modify_existing_file(df, tmp_hyper):
assert last_modified == tmp_hyper.stat().st_mtime


def test_reports_unsupported_type(datapath):
"""
Test that we report an error if we encounter an unsupported column type.
Previously, we did not do so but instead assumed that all unsupported columns
would be string columns. This led to very fascinating failures.
"""
db_path = datapath / "geography.hyper"
with pytest.raises(TypeError, match=r"GEOGRAPHY"):
pantab.frame_from_hyper(db_path, table="test")


def test_read_non_roundtrippable(datapath):
result = pantab.frame_from_hyper(
datapath / "dates.hyper", table=tab_api.TableName("Extract", "Extract")
Expand Down Expand Up @@ -202,3 +190,70 @@ def test_read_json(tmp_hyper):

result = pantab.frame_from_hyper(tmp_hyper, table=table_name)
tm.assert_frame_equal(result, expected)


def test_read_geography(tmp_hyper):
# Hyper uses a proprietary format for geography; best we can do is read out bytes
column_name = "Geography Column"
table_name = tab_api.TableName("public", "table")
table = tab_api.TableDefinition(
table_name=table_name,
columns=[
tab_api.TableDefinition.Column(
name=column_name,
type=tab_api.SqlType.geography(),
nullability=tab_api.NOT_NULLABLE,
)
],
)

with tab_api.HyperProcess(
telemetry=tab_api.Telemetry.DO_NOT_SEND_USAGE_DATA_TO_TABLEAU
) as hyper:
with tab_api.Connection(
endpoint=hyper.endpoint,
database=tmp_hyper,
create_mode=tab_api.CreateMode.CREATE_AND_REPLACE,
) as connection:
connection.catalog.create_table(table_definition=table)

inserter_definition = [
tab_api.TableDefinition.Column(
name="geo_as_text",
type=tab_api.SqlType.text(),
nullability=tab_api.NOT_NULLABLE,
)
]
column_mappings = [
tab_api.Inserter.ColumnMapping(
column_name, "CAST(geo_as_text AS GEOGRAPHY)"
)
]
with tab_api.Inserter(
connection,
table,
column_mappings,
inserter_definition=inserter_definition,
) as inserter:
# WKT examples for Seattle / Munich taken from Hyper documentation
# https://tableau.github.io/hyper-db/docs/guides/hyper_file/geodata
inserter.add_rows(
[["point(-122.338083 47.647528)"], ["point(11.584329 48.139257)"]]
)
inserter.execute()

expected = pd.DataFrame(
[
[
b"\x07\xaa\x02\xe0%n\xd9\x01\x01\n\x00\xce\xab\xe8\xfa=\xff\x96\xf0\x8a\x9f\x01"
],
[
b"\x07\xaa\x02\x0c&n\x82\x01\x01\n\x00\xb0\xe2\xd4\xcc>\xd4\xbc\x97\x88\x0f"
],
],
columns=[column_name],
dtype="large_binary[pyarrow]",
)

result = pantab.frame_from_hyper(tmp_hyper, table=table_name)
tm.assert_frame_equal(result, expected)

0 comments on commit 63ba24e

Please sign in to comment.