Skip to content

Commit

Permalink
Improved error message on append schema mismatch (#241)
Browse files Browse the repository at this point in the history
  • Loading branch information
WillAyd authored Jan 25, 2024
1 parent 63ba24e commit f509305
Show file tree
Hide file tree
Showing 2 changed files with 58 additions and 9 deletions.
43 changes: 40 additions & 3 deletions pantab/src/pantab.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -380,6 +380,41 @@ static auto makeInsertHelper(std::shared_ptr<hyperapi::Inserter> inserter,
}
}

///
/// If a table already exists, ensure the structure is the same as what we
/// append
///
void assertColumnsEqual(
const std::vector<hyperapi::TableDefinition::Column> &new_columns,
const std::vector<hyperapi::TableDefinition::Column> &old_columns) {
const size_t new_size = new_columns.size();
const size_t old_size = old_columns.size();
if (new_size != old_size) {
throw std::invalid_argument(
"Number of columns in new table definition does not match existing");
}

for (size_t i = 0; i < new_size; i++) {
const auto new_col = new_columns[i];
const auto old_col = old_columns[i];
const auto new_name = new_col.getName();
const auto old_name = old_col.getName();
if (new_name != old_name) {
throw std::invalid_argument(
"Column name mismatch at index " + std::to_string(i) +
"; new: " + new_name.toString() + " old: " + old_name.toString());
}

const auto new_type = new_col.getType();
const auto old_type = old_col.getType();
if (new_type != old_type) {
throw std::invalid_argument(
"Column type mismatch at index " + std::to_string(i) +
"; new: " + new_type.toString() + " old: " + old_type.toString());
}
}
};

using SchemaAndTableName = std::tuple<std::string, std::string>;

void write_to_hyper(
Expand Down Expand Up @@ -433,10 +468,12 @@ void write_to_hyper(
const hyperapi::TableName table_name{hyper_schema, hyper_table};
const hyperapi::TableDefinition tableDef{table_name, hyper_columns};
catalog.createSchemaIfNotExists(*table_name.getSchemaName());
if (table_mode == "w") {

if ((table_mode == "a") && (catalog.hasTable(table_name))) {
const auto existing_def = catalog.getTableDefinition(table_name);
assertColumnsEqual(hyper_columns, std::move(existing_def.getColumns()));
} else {
catalog.createTable(tableDef);
} else if (table_mode == "a") {
catalog.createTableIfNotExists(tableDef);
}
const auto inserter =
std::make_shared<hyperapi::Inserter>(connection, tableDef);
Expand Down
24 changes: 18 additions & 6 deletions pantab/tests/test_writer.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,15 +22,27 @@ def test_bad_table_mode_raises(df, tmp_hyper):
pantab.frames_to_hyper({"a": df}, tmp_hyper, table_mode="x")


@pytest.mark.parametrize("new_dtype", ["int64", float])
def test_append_mode_raises_column_dtype_mismatch(new_dtype, df, tmp_hyper, table_name):
@pytest.mark.parametrize(
"new_dtype,hyper_type_name", [("int64", "BIGINT"), (float, "DOUBLE PRECISION")]
)
def test_append_mode_raises_column_dtype_mismatch(
new_dtype, hyper_type_name, df, tmp_hyper, table_name
):
pantab.frame_to_hyper(df, tmp_hyper, table=table_name)

df["int16"] = df["int16"].astype(new_dtype)
# TODO: a better error message from hyper would be nice here
# seems like a limitation of hyper api
msg = ""
with pytest.raises(RuntimeError, match=msg):

msg = f"Column type mismatch at index 0; new: {hyper_type_name} old: SMALLINT"
with pytest.raises(ValueError, match=msg):
pantab.frame_to_hyper(df, tmp_hyper, table=table_name, table_mode="a")


def test_append_mode_raises_ncolumns_mismatch(df, tmp_hyper, table_name):
pantab.frame_to_hyper(df, tmp_hyper, table=table_name)

df = df.drop(columns=["int16"])
msg = "Number of columns"
with pytest.raises(ValueError, match=msg):
pantab.frame_to_hyper(df, tmp_hyper, table=table_name, table_mode="a")


Expand Down

0 comments on commit f509305

Please sign in to comment.