|
14 | 14 | * limitations under the License.
|
15 | 15 | */
|
16 | 16 |
|
| 17 | +#include "velox/common/base/tests/GTestUtils.h" |
17 | 18 | #include "velox/common/file/FileSystems.h"
|
18 | 19 | #include "velox/connectors/hive/HiveConnectorSplit.h"
|
19 | 20 | #include "velox/connectors/hive/iceberg/IcebergDeleteFile.h"
|
@@ -225,6 +226,35 @@ class HiveIcebergTest : public HiveConnectorTestBase {
|
225 | 226 | ASSERT_TRUE(it->second.peakMemoryBytes > 0);
|
226 | 227 | }
|
227 | 228 |
|
| 229 | + void assertQuery( |
| 230 | + RowTypePtr rowType, |
| 231 | + const std::vector<RowVectorPtr>& dataVectors, |
| 232 | + const std::string duckDbSql, |
| 233 | + const std::unordered_map<std::string, std::optional<std::string>> |
| 234 | + partitionKeys = {}, |
| 235 | + const std::vector<std::string> filters = {}) { |
| 236 | + VELOX_CHECK(!duckDbSql.empty(), "DuckDb sql is empty"); |
| 237 | + auto dataFilePath = TempFilePath::create(); |
| 238 | + |
| 239 | + writeToFile( |
| 240 | + dataFilePath->getPath(), dataVectors, config_, flushPolicyFactory_); |
| 241 | + std::vector<std::shared_ptr<ConnectorSplit>> splits; |
| 242 | + splits.emplace_back( |
| 243 | + makeIcebergSplit(dataFilePath->getPath(), {}, partitionKeys)); |
| 244 | + |
| 245 | + std::unordered_set<std::string> partitionColumns; |
| 246 | + |
| 247 | + for (auto partitionKey : partitionKeys) { |
| 248 | + partitionColumns.insert(partitionKey.first); |
| 249 | + } |
| 250 | + |
| 251 | + auto plan = |
| 252 | + PlanBuilder(pool_.get()) |
| 253 | + .tableScan(rowType, filters, "", nullptr, {}, partitionColumns) |
| 254 | + .planNode(); |
| 255 | + HiveConnectorTestBase::assertQuery(plan, splits, duckDbSql); |
| 256 | + } |
| 257 | + |
228 | 258 | const static int rowCount = 20000;
|
229 | 259 |
|
230 | 260 | private:
|
@@ -337,8 +367,9 @@ class HiveIcebergTest : public HiveConnectorTestBase {
|
337 | 367 |
|
338 | 368 | std::shared_ptr<ConnectorSplit> makeIcebergSplit(
|
339 | 369 | const std::string& dataFilePath,
|
340 |
| - const std::vector<IcebergDeleteFile>& deleteFiles = {}) { |
341 |
| - std::unordered_map<std::string, std::optional<std::string>> partitionKeys; |
| 370 | + const std::vector<IcebergDeleteFile>& deleteFiles = {}, |
| 371 | + const std::unordered_map<std::string, std::optional<std::string>> |
| 372 | + partitionKeys = {}) { |
342 | 373 | std::unordered_map<std::string, std::string> customSplitInfo;
|
343 | 374 | customSplitInfo["table_format"] = "hive-iceberg";
|
344 | 375 |
|
@@ -660,4 +691,25 @@ TEST_F(HiveIcebergTest, positionalDeletesMultipleSplits) {
|
660 | 691 | assertMultipleSplits({}, 10, 3);
|
661 | 692 | }
|
662 | 693 |
|
| 694 | +TEST_F(HiveIcebergTest, testPartitionedRead) { |
| 695 | + RowTypePtr rowType{ROW({"c0", "ds"}, {BIGINT(), DateType::get()})}; |
| 696 | + std::unordered_map<std::string, std::optional<std::string>> partitionKeys; |
| 697 | + // Iceberg API sets partition values for dates to daysSinceEpoch, so |
| 698 | + // in velox, we do not need to convert it to days. |
| 699 | + // Date = 2018-04-06, daysSinceEpoch = 17627 |
| 700 | + partitionKeys["ds"] = "17627"; |
| 701 | + |
| 702 | + std::vector<RowVectorPtr> dataVectors; |
| 703 | + VectorPtr c0 = makeFlatVector<int64_t>((std::vector<int64_t>){1}); |
| 704 | + VectorPtr ds = makeFlatVector<int32_t>((std::vector<int32_t>){17627}); |
| 705 | + dataVectors.push_back(makeRowVector({"c0", "ds"}, {c0, ds})); |
| 706 | + |
| 707 | + assertQuery( |
| 708 | + rowType, dataVectors, "SELECT 1, '2018-04-06'", partitionKeys, {}); |
| 709 | + |
| 710 | + std::vector<std::string> filters = {"ds = date'2018-04-06'"}; |
| 711 | + assertQuery( |
| 712 | + rowType, dataVectors, "SELECT 1, '2018-04-06'", partitionKeys, filters); |
| 713 | +} |
| 714 | + |
663 | 715 | } // namespace facebook::velox::connector::hive::iceberg
|
0 commit comments