Skip to content

Commit

Permalink
Add tests to verify buildSelectedType() does not invalidate input
Browse files Browse the repository at this point in the history
buildSelectedType() takes a constant typeWithId type tree and
applies the selector on it to return a selected new type tree.
However, it used to modify the input typeWithId unexpectedly
because it duplicates the internal type nodes, but not the
leaves(primitive types). This was fixed in
facebookincubator#9244. This PR
adds tests to verify that fix.
  • Loading branch information
nmahadevuni committed Apr 4, 2024
1 parent efb7e77 commit f8d7e55
Show file tree
Hide file tree
Showing 2 changed files with 56 additions and 7 deletions.
12 changes: 6 additions & 6 deletions velox/dwio/common/TypeUtils.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,7 @@ std::unique_ptr<TypeWithId> visit(
}
if (typeWithId->type()->isRow()) {
std::vector<std::string> names;
std::vector<std::unique_ptr<TypeWithId>> children;
std::vector<std::unique_ptr<TypeWithId>> selectedChildren;
std::vector<std::shared_ptr<const Type>> types;
auto& row = typeWithId->type()->asRow();
for (auto i = 0; i < typeWithId->size(); ++i) {
Expand All @@ -61,31 +61,31 @@ std::unique_ptr<TypeWithId> visit(
names.push_back(row.nameOf(i));
auto newChild = visit(child, selector);
types.push_back(newChild->type());
children.push_back(std::move(newChild));
selectedChildren.push_back(std::move(newChild));
}
}
VELOX_USER_CHECK(
!types.empty(), "selected nothing from row: " + row.toString());
return std::make_unique<TypeWithId>(
ROW(std::move(names), std::move(types)),
std::move(children),
std::move(selectedChildren),
typeWithId->id(),
typeWithId->maxId(),
typeWithId->column());
} else {
checkChildrenSelected(typeWithId, selector);
std::vector<std::unique_ptr<TypeWithId>> children;
std::vector<std::unique_ptr<TypeWithId>> selectedChildren;
std::vector<std::shared_ptr<const Type>> types;
for (auto i = 0; i < typeWithId->size(); ++i) {
auto& child = typeWithId->childAt(i);
auto newChild = visit(child, selector);
types.push_back(newChild->type());
children.push_back(std::move(newChild));
selectedChildren.push_back(std::move(newChild));
}
auto type = createType(typeWithId->type()->kind(), std::move(types));
return std::make_unique<TypeWithId>(
type,
std::move(children),
std::move(selectedChildren),
typeWithId->id(),
typeWithId->maxId(),
typeWithId->column());
Expand Down
51 changes: 50 additions & 1 deletion velox/dwio/common/tests/TypeTests.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,28 @@ using namespace facebook::velox::dwio::common::typeutils;
using facebook::velox::type::fbhive::HiveTypeParser;
using facebook::velox::type::fbhive::HiveTypeSerializer;

void assertEqualTypeWithId(
std::shared_ptr<const TypeWithId>& actual,
std::shared_ptr<const TypeWithId>& expected) {
EXPECT_EQ(actual->size(), expected->size());
for (auto idx = 0; idx < actual->size(); idx++) {
auto actualTypeChild = actual->childAt(idx);
auto expectedTypeChild = expected->childAt(idx);
EXPECT_TRUE(actualTypeChild->type()->kindEquals(expectedTypeChild->type()));
EXPECT_EQ(actualTypeChild->id(), expectedTypeChild->id());
EXPECT_EQ(actualTypeChild->column(), expectedTypeChild->column());
assertEqualTypeWithId(actualTypeChild, expectedTypeChild);
}
}

void assertValidTypeWithId(
const std::shared_ptr<const TypeWithId>& typeWithId) {
for (auto idx = 0; idx < typeWithId->size(); idx++) {
EXPECT_EQ(typeWithId->childAt(idx)->parent(), typeWithId.get());
assertValidTypeWithId(typeWithId->childAt(idx));
}
}

TEST(TestType, selectedType) {
auto type = HiveTypeParser().parse(
"struct<col0:tinyint,col1:smallint,col2:array<string>,"
Expand All @@ -43,11 +65,27 @@ TEST(TestType, selectedType) {
EXPECT_EQ(0, typeWithId->id());
EXPECT_EQ(11, typeWithId->maxId());

auto copySelector = [](size_t index) { return true; };

// The following two lines verify that the original type tree's children are
// not re-parented by the buildSelectedType method when copying. If it is
// re-parented, then this test would crash with SIGSEGV. The return type is
// deliberately ignored so the copied type will be deallocated upon return.
buildSelectedType(typeWithId, copySelector);
EXPECT_EQ(typeWithId->childAt(1)->parent()->type()->kind(), TypeKind::ROW);

auto cutType = buildSelectedType(typeWithId, copySelector);
assertEqualTypeWithId(cutType, typeWithId);
assertValidTypeWithId(typeWithId);
assertValidTypeWithId(cutType);

std::vector<bool> selected(12);
selected[0] = true;
selected[2] = true;
auto selector = [&selected](size_t index) { return selected[index]; };
auto cutType = buildSelectedType(typeWithId, selector);
cutType = buildSelectedType(typeWithId, selector);
assertValidTypeWithId(typeWithId);
assertValidTypeWithId(cutType);
EXPECT_STREQ(
"struct<col1:smallint>",
HiveTypeSerializer::serialize(cutType->type()).c_str());
Expand All @@ -57,6 +95,8 @@ TEST(TestType, selectedType) {

selected.assign(12, true);
cutType = buildSelectedType(typeWithId, selector);
assertValidTypeWithId(typeWithId);
assertValidTypeWithId(cutType);
EXPECT_STREQ(
"struct<col0:tinyint,col1:smallint,col2:array<string>,"
"col3:map<float,double>,col4:float,"
Expand All @@ -69,6 +109,8 @@ TEST(TestType, selectedType) {
selected[0] = true;
selected[8] = true;
cutType = buildSelectedType(typeWithId, selector);
assertValidTypeWithId(typeWithId);
assertValidTypeWithId(cutType);
EXPECT_STREQ(
"struct<col4:float>",
HiveTypeSerializer::serialize(cutType->type()).c_str());
Expand All @@ -91,6 +133,8 @@ TEST(TestType, selectedType) {
selected[3] = true;
selected[4] = true;
cutType = buildSelectedType(typeWithId, selector);
assertValidTypeWithId(typeWithId);
assertValidTypeWithId(cutType);
EXPECT_STREQ(
"struct<col2:array<string>>",
HiveTypeSerializer::serialize(cutType->type()).c_str());
Expand All @@ -106,6 +150,8 @@ TEST(TestType, selectedType) {
selected[6] = true;
selected[7] = true;
cutType = buildSelectedType(typeWithId, selector);
assertValidTypeWithId(typeWithId);
assertValidTypeWithId(cutType);
EXPECT_STREQ(
"struct<col3:map<float,double>>",
HiveTypeSerializer::serialize(cutType->type()).c_str());
Expand Down Expand Up @@ -135,6 +181,9 @@ TEST(TestType, selectedType) {
selected[1] = true;
selected[11] = true;
cutType = buildSelectedType(typeWithId, selector);
assertValidTypeWithId(typeWithId);
assertValidTypeWithId(cutType);

EXPECT_STREQ(
"struct<col0:tinyint,col7:string>",
HiveTypeSerializer::serialize(cutType->type()).c_str());
Expand Down

0 comments on commit f8d7e55

Please sign in to comment.