Skip to content

Commit

Permalink
Fix the problem where buildSelectedType() makes the input type invalid
Browse files Browse the repository at this point in the history
buildSelectedType() takes a constant typeWithId type tree and
applies the selector on it to return a selected new type tree.
However, it modifies the input typeWithId unexpectedly because
it duplicates the internal type nodes, but not the
leaves(primitive types). The leaf nodes were directly moved to
the result instead of copied, thus making in the original
input typeWithId incomplete. This PR fixes the problem by
making a copy of the leaf nodes as well. After this change,
the original input typeWithId would still be valid.
  • Loading branch information
nmahadevuni committed Mar 26, 2024
1 parent 3aa020d commit 23a0dbb
Show file tree
Hide file tree
Showing 3 changed files with 67 additions and 7 deletions.
19 changes: 12 additions & 7 deletions velox/dwio/common/TypeUtils.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -43,11 +43,16 @@ std::shared_ptr<const TypeWithId> visit(
const std::shared_ptr<const TypeWithId>& typeWithId,
const std::function<bool(size_t)>& selector) {
if (typeWithId->type()->isPrimitiveType()) {
return typeWithId;
return std::make_shared<TypeWithId>(
typeWithId->type(),
std::vector<std::shared_ptr<const TypeWithId>>(),
typeWithId->id(),
typeWithId->maxId(),
typeWithId->column());
}
if (typeWithId->type()->isRow()) {
std::vector<std::string> names;
std::vector<std::shared_ptr<const TypeWithId>> typesWithId;
std::vector<std::shared_ptr<const TypeWithId>> selectedChildren;
std::vector<std::shared_ptr<const Type>> types;
auto& row = typeWithId->type()->asRow();
for (auto i = 0; i < typeWithId->size(); ++i) {
Expand All @@ -56,32 +61,32 @@ std::shared_ptr<const TypeWithId> visit(
names.push_back(row.nameOf(i));
std::shared_ptr<const TypeWithId> twid;
twid = visit(child, selector);
typesWithId.push_back(twid);
selectedChildren.push_back(twid);
types.push_back(twid->type());
}
}
VELOX_USER_CHECK(
!types.empty(), "selected nothing from row: " + row.toString());
return std::make_shared<TypeWithId>(
ROW(std::move(names), std::move(types)),
std::move(typesWithId),
std::move(selectedChildren),
typeWithId->id(),
typeWithId->maxId(),
typeWithId->column());
} else {
checkChildrenSelected(typeWithId, selector);
std::vector<std::shared_ptr<const TypeWithId>> typesWithId;
std::vector<std::shared_ptr<const TypeWithId>> selectedChildren;
std::vector<std::shared_ptr<const Type>> types;
for (auto i = 0; i < typeWithId->size(); ++i) {
auto& child = typeWithId->childAt(i);
std::shared_ptr<const TypeWithId> twid = visit(child, selector);
typesWithId.push_back(twid);
selectedChildren.push_back(twid);
types.push_back(twid->type());
}
auto type = createType(typeWithId->type()->kind(), std::move(types));
return std::make_shared<TypeWithId>(
type,
std::move(typesWithId),
std::move(selectedChildren),
typeWithId->id(),
typeWithId->maxId(),
typeWithId->column());
Expand Down
1 change: 1 addition & 0 deletions velox/dwio/common/TypeWithId.h
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@ namespace facebook::velox::dwio::common {

class TypeWithId : public velox::Tree<std::shared_ptr<const TypeWithId>> {
public:
// This constructor will re-parent the children.
TypeWithId(
std::shared_ptr<const velox::Type> type,
std::vector<std::shared_ptr<const TypeWithId>>&& children,
Expand Down
54 changes: 54 additions & 0 deletions velox/dwio/common/tests/TypeTests.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,28 @@ using namespace facebook::velox::dwio::common::typeutils;
using facebook::velox::type::fbhive::HiveTypeParser;
using facebook::velox::type::fbhive::HiveTypeSerializer;

void assertEqualTypeWithId(
std::shared_ptr<const TypeWithId>& actual,
std::shared_ptr<const TypeWithId>& expected) {
EXPECT_EQ(actual->size(), expected->size());
for (auto idx = 0; idx < actual->size(); idx++) {
auto actualTypeChild = actual->childAt(idx);
auto expectedTypeChild = expected->childAt(idx);
EXPECT_TRUE(actualTypeChild->type()->kindEquals(expectedTypeChild->type()));
EXPECT_EQ(actualTypeChild->id(), expectedTypeChild->id());
EXPECT_EQ(actualTypeChild->column(), expectedTypeChild->column());
assertEqualTypeWithId(actualTypeChild, expectedTypeChild);
}
}

void assertValidTypeWithId(
const std::shared_ptr<const TypeWithId>& typeWithId) {
for (auto idx = 0; idx < typeWithId->size(); idx++) {
EXPECT_EQ(typeWithId->childAt(idx)->parent(), typeWithId.get());
assertValidTypeWithId(typeWithId->childAt(idx));
}
}

TEST(TestType, selectedType) {
auto type = HiveTypeParser().parse(
"struct<col0:tinyint,col1:smallint,col2:array<string>,"
Expand All @@ -43,11 +65,27 @@ TEST(TestType, selectedType) {
EXPECT_EQ(0, typeWithId->id());
EXPECT_EQ(11, typeWithId->maxId());

auto copySelector = [](size_t index) { return true; };

// The following two lines verify that the original type tree's children are
// not re-parented by the buildSelectedType method when copying. If it is
// re-parented, then this test would crash with SIGSEGV. The return type is
// deliberately ignored so the copied type will be deallocated upon return.
buildSelectedType(typeWithId, copySelector);
EXPECT_EQ(typeWithId->childAt(1)->parent()->type()->kind(), TypeKind::ROW);

auto typeWithIdCopy = buildSelectedType(typeWithId, copySelector);
assertEqualTypeWithId(typeWithId, typeWithIdCopy);
assertValidTypeWithId(typeWithId);
assertValidTypeWithId(typeWithIdCopy);

std::vector<bool> selected(12);
selected[0] = true;
selected[2] = true;
auto selector = [&selected](size_t index) { return selected[index]; };

auto cutType = buildSelectedType(typeWithId, selector);
assertEqualTypeWithId(typeWithIdCopy, typeWithId);
EXPECT_STREQ(
"struct<col1:smallint>",
HiveTypeSerializer::serialize(cutType->type()).c_str());
Expand All @@ -57,6 +95,9 @@ TEST(TestType, selectedType) {

selected.assign(12, true);
cutType = buildSelectedType(typeWithId, selector);
assertEqualTypeWithId(typeWithIdCopy, typeWithId);
assertValidTypeWithId(typeWithId);
assertValidTypeWithId(typeWithIdCopy);
EXPECT_STREQ(
"struct<col0:tinyint,col1:smallint,col2:array<string>,"
"col3:map<float,double>,col4:float,"
Expand All @@ -69,6 +110,9 @@ TEST(TestType, selectedType) {
selected[0] = true;
selected[8] = true;
cutType = buildSelectedType(typeWithId, selector);
assertEqualTypeWithId(typeWithIdCopy, typeWithId);
assertValidTypeWithId(typeWithId);
assertValidTypeWithId(typeWithIdCopy);
EXPECT_STREQ(
"struct<col4:float>",
HiveTypeSerializer::serialize(cutType->type()).c_str());
Expand All @@ -91,6 +135,9 @@ TEST(TestType, selectedType) {
selected[3] = true;
selected[4] = true;
cutType = buildSelectedType(typeWithId, selector);
assertEqualTypeWithId(typeWithIdCopy, typeWithId);
assertValidTypeWithId(typeWithId);
assertValidTypeWithId(typeWithIdCopy);
EXPECT_STREQ(
"struct<col2:array<string>>",
HiveTypeSerializer::serialize(cutType->type()).c_str());
Expand All @@ -106,6 +153,9 @@ TEST(TestType, selectedType) {
selected[6] = true;
selected[7] = true;
cutType = buildSelectedType(typeWithId, selector);
assertEqualTypeWithId(typeWithIdCopy, typeWithId);
assertValidTypeWithId(typeWithId);
assertValidTypeWithId(typeWithIdCopy);
EXPECT_STREQ(
"struct<col3:map<float,double>>",
HiveTypeSerializer::serialize(cutType->type()).c_str());
Expand Down Expand Up @@ -135,6 +185,10 @@ TEST(TestType, selectedType) {
selected[1] = true;
selected[11] = true;
cutType = buildSelectedType(typeWithId, selector);
assertEqualTypeWithId(typeWithIdCopy, typeWithId);
assertValidTypeWithId(typeWithId);
assertValidTypeWithId(typeWithIdCopy);

EXPECT_STREQ(
"struct<col0:tinyint,col7:string>",
HiveTypeSerializer::serialize(cutType->type()).c_str());
Expand Down

0 comments on commit 23a0dbb

Please sign in to comment.