Skip to content

Commit

Permalink
VARCHAR support (#227)
Browse files Browse the repository at this point in the history
  • Loading branch information
WillAyd authored Jan 18, 2024
1 parent 48f6541 commit 00f0c54
Show file tree
Hide file tree
Showing 2 changed files with 56 additions and 33 deletions.
46 changes: 16 additions & 30 deletions pantab/src/pantab.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -615,36 +615,22 @@ static auto makeReadHelper(const ArrowSchemaView *schema_view,
}
}

static auto
arrowTypeFromHyper(const hyperapi::SqlType &sqltype) -> enum ArrowType {
if (sqltype == hyperapi::SqlType::smallInt()){return NANOARROW_TYPE_INT16;}
else if (sqltype == hyperapi::SqlType::integer()) {
return NANOARROW_TYPE_INT32;
}
else if (sqltype == hyperapi::SqlType::bigInt()) {
return NANOARROW_TYPE_INT64;
}
else if (sqltype == hyperapi::SqlType::doublePrecision()) {
return NANOARROW_TYPE_DOUBLE;
}
else if (sqltype == hyperapi::SqlType::text()) {
return NANOARROW_TYPE_LARGE_STRING;
}
else if (sqltype == hyperapi::SqlType::boolean()) {
return NANOARROW_TYPE_BOOL;
}
else if (sqltype == hyperapi::SqlType::timestamp()) {
return NANOARROW_TYPE_TIMESTAMP;
}
else if (sqltype == hyperapi::SqlType::timestampTZ()) {
return NANOARROW_TYPE_TIMESTAMP; // todo: how to encode tz info?
}
else if (sqltype == hyperapi::SqlType::date()) {
return NANOARROW_TYPE_DATE32;
}

throw nb::type_error(
("unimplemented pandas dtype for type: " + sqltype.toString()).c_str());
static auto arrowTypeFromHyper(const hyperapi::SqlType &sqltype)
-> enum ArrowType {
switch (sqltype.getTag()){
case hyperapi::TypeTag::SmallInt : return NANOARROW_TYPE_INT16;
case hyperapi::TypeTag::Int : return NANOARROW_TYPE_INT32;
case hyperapi::TypeTag::BigInt : return NANOARROW_TYPE_INT64;
case hyperapi::TypeTag::Double : return NANOARROW_TYPE_DOUBLE;
case hyperapi::TypeTag::Varchar : case hyperapi::TypeTag::Char :
case hyperapi::TypeTag::Text : return NANOARROW_TYPE_LARGE_STRING;
case hyperapi::TypeTag::Bool : return NANOARROW_TYPE_BOOL;
case hyperapi::TypeTag::Date : return NANOARROW_TYPE_DATE32;
case hyperapi::TypeTag::Timestamp : case hyperapi::TypeTag::
TimestampTZ : return NANOARROW_TYPE_TIMESTAMP;
default : throw nb::type_error(
("Reader not implemented for type: " + sqltype.toString()).c_str());
}
}

static auto releaseArrowStream(void *ptr) noexcept -> void {
Expand Down
43 changes: 40 additions & 3 deletions pantab/tests/test_reader.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
import pandas as pd
import pandas.testing as tm
import pytest
from tableauhyperapi import TableName
import tableauhyperapi as tab_api

import pantab

Expand Down Expand Up @@ -31,7 +31,7 @@ def test_reports_unsupported_type(datapath):

def test_read_non_roundtrippable(datapath):
result = pantab.frame_from_hyper(
datapath / "dates.hyper", table=TableName("Extract", "Extract")
datapath / "dates.hyper", table=tab_api.TableName("Extract", "Extract")
)
expected = pd.DataFrame(
[["1900-01-01", "2000-01-01"], [pd.NaT, "2050-01-01"]],
Expand All @@ -43,7 +43,8 @@ def test_read_non_roundtrippable(datapath):

def test_reads_non_writeable(datapath):
result = pantab.frame_from_hyper(
datapath / "non_pantab_writeable.hyper", table=TableName("public", "table")
datapath / "non_pantab_writeable.hyper",
table=tab_api.TableName("public", "table"),
)

expected = pd.DataFrame(
Expand Down Expand Up @@ -85,3 +86,39 @@ def test_empty_read_query(df: pd.DataFrame, roundtripped, tmp_hyper):

result = pantab.frame_from_hyper_query(tmp_hyper, query)
tm.assert_frame_equal(result, expected)


def test_read_varchar(tmp_hyper):
column_name = "VARCHAR Column"
table_name = tab_api.TableName("public", "table")
table = tab_api.TableDefinition(
table_name=table_name,
columns=[
tab_api.TableDefinition.Column(
name=column_name,
type=tab_api.SqlType.varchar(42),
nullability=tab_api.NOT_NULLABLE,
)
],
)

with tab_api.HyperProcess(
telemetry=tab_api.Telemetry.DO_NOT_SEND_USAGE_DATA_TO_TABLEAU
) as hyper:
with tab_api.Connection(
endpoint=hyper.endpoint,
database=tmp_hyper,
create_mode=tab_api.CreateMode.CREATE_AND_REPLACE,
) as connection:
connection.catalog.create_table(table_definition=table)

with tab_api.Inserter(connection, table) as inserter:
inserter.add_rows([["foo"], ["bar"]])
inserter.execute()

expected = pd.DataFrame(
[["foo"], ["bar"]], columns=[column_name], dtype="large_string[pyarrow]"
)

result = pantab.frame_from_hyper(tmp_hyper, table=table_name)
tm.assert_frame_equal(result, expected)

0 comments on commit 00f0c54

Please sign in to comment.