Initial

PHILO-HE · PHILO-HE · commit 80f2724d4558 · 2025-03-11T18:16:37.000+08:00
diff --git a/cpp/velox/substrait/SubstraitToVeloxPlanValidator.cc b/cpp/velox/substrait/SubstraitToVeloxPlanValidator.cc
@@ -263,8 +263,16 @@ bool SubstraitToVeloxPlanValidator::isAllowedCast(const TypePtr& fromType, const
 
   // Limited support for X to Timestamp.
   if (toType->isTimestamp() && !fromType->isDate()) {
-    LOG_VALIDATION_MSG("Casting from " + fromType->toString() + " to TIMESTAMP is not supported.");
-    return false;
+    switch (fromType->kind()) {
+      case TypeKind::TINYINT:
+      case TypeKind::SMALLINT:
+      case TypeKind::INTEGER:
+      case TypeKind::BIGINT:
+        break;
+      default:
+        LOG_VALIDATION_MSG("Casting from " + fromType->toString() + " to TIMESTAMP is not supported.");
+        return false;
+    }
   }
 
   // Limited support for Complex types.
diff --git a/gluten-ut/spark32/src/test/scala/org/apache/gluten/utils/velox/VeloxTestSettings.scala b/gluten-ut/spark32/src/test/scala/org/apache/gluten/utils/velox/VeloxTestSettings.scala
@@ -870,6 +870,8 @@ class VeloxTestSettings extends BackendTestSettings {
   enableSuite[GlutenParquetColumnIndexSuite]
     // Rewrite by just removing test timestamp.
     .exclude("test reading unaligned pages - test all types")
+    // Rewrite by converting smaller integral value to timestamp.
+    .exclude("test reading unaligned pages - test all types (dict encode)")
   enableSuite[GlutenParquetCompressionCodecPrecedenceSuite]
   enableSuite[GlutenParquetEncodingSuite]
   enableSuite[GlutenParquetFileFormatV1Suite]
diff --git a/gluten-ut/spark32/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/GlutenParquetColumnIndexSuite.scala b/gluten-ut/spark32/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/GlutenParquetColumnIndexSuite.scala
@@ -38,7 +38,25 @@ class GlutenParquetColumnIndexSuite extends ParquetColumnIndexSuite with GlutenS
         "cast(id as float) as _5",
         "cast(id as double) as _6",
         "cast(id as decimal(20,0)) as _7",
-        "cast(cast(1618161925000 + id * 1000 * 60 * 60 * 24 as timestamp) as date) as _9"
+        "cast(cast(1618161925 + id * 60 * 60 * 24 as timestamp) as date) as _9"
+      )
+    checkUnalignedPages(df)(actions: _*)
+  }
+
+  testGluten("test reading unaligned pages - test all types (dict encode)") {
+    val df = spark
+      .range(0, 2000)
+      .selectExpr(
+        "id as _1",
+        "cast(id % 10 as byte) as _2",
+        "cast(id % 10 as short) as _3",
+        "cast(id % 10 as int) as _4",
+        "cast(id % 10 as float) as _5",
+        "cast(id % 10 as double) as _6",
+        "cast(id % 10 as decimal(20,0)) as _7",
+        "cast(id % 2 as boolean) as _8",
+        "cast(cast(1618161925 + (id % 10) * 60 * 60 * 24 as timestamp) as date) as _9",
+        "cast(1618161925 + (id % 10) as timestamp) as _10"
       )
     checkUnalignedPages(df)(actions: _*)
   }
diff --git a/gluten-ut/spark33/src/test/scala/org/apache/gluten/utils/velox/VeloxTestSettings.scala b/gluten-ut/spark33/src/test/scala/org/apache/gluten/utils/velox/VeloxTestSettings.scala
@@ -664,6 +664,8 @@ class VeloxTestSettings extends BackendTestSettings {
   enableSuite[GlutenParquetColumnIndexSuite]
     // Rewrite by just removing test timestamp.
     .exclude("test reading unaligned pages - test all types")
+    // Rewrite by converting smaller integral value to timestamp.
+    .exclude("test reading unaligned pages - test all types (dict encode)")
   enableSuite[GlutenParquetCompressionCodecPrecedenceSuite]
   enableSuite[GlutenParquetDeltaByteArrayEncodingSuite]
   enableSuite[GlutenParquetDeltaEncodingInteger]
diff --git a/gluten-ut/spark33/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/GlutenParquetColumnIndexSuite.scala b/gluten-ut/spark33/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/GlutenParquetColumnIndexSuite.scala
@@ -38,7 +38,25 @@ class GlutenParquetColumnIndexSuite extends ParquetColumnIndexSuite with GlutenS
         "cast(id as float) as _5",
         "cast(id as double) as _6",
         "cast(id as decimal(20,0)) as _7",
-        "cast(cast(1618161925000 + id * 1000 * 60 * 60 * 24 as timestamp) as date) as _9"
+        "cast(cast(1618161925 + id * 60 * 60 * 24 as timestamp) as date) as _9"
+      )
+    checkUnalignedPages(df)(actions: _*)
+  }
+
+  testGluten("test reading unaligned pages - test all types (dict encode)") {
+    val df = spark
+      .range(0, 2000)
+      .selectExpr(
+        "id as _1",
+        "cast(id % 10 as byte) as _2",
+        "cast(id % 10 as short) as _3",
+        "cast(id % 10 as int) as _4",
+        "cast(id % 10 as float) as _5",
+        "cast(id % 10 as double) as _6",
+        "cast(id % 10 as decimal(20,0)) as _7",
+        "cast(id % 2 as boolean) as _8",
+        "cast(cast(1618161925 + (id % 10) * 60 * 60 * 24 as timestamp) as date) as _9",
+        "cast(1618161925 + (id % 10) as timestamp) as _10"
       )
     checkUnalignedPages(df)(actions: _*)
   }
diff --git a/gluten-ut/spark34/src/test/scala/org/apache/gluten/utils/velox/VeloxTestSettings.scala b/gluten-ut/spark34/src/test/scala/org/apache/gluten/utils/velox/VeloxTestSettings.scala
@@ -660,6 +660,8 @@ class VeloxTestSettings extends BackendTestSettings {
   enableSuite[GlutenParquetColumnIndexSuite]
     // Rewrite by just removing test timestamp.
     .exclude("test reading unaligned pages - test all types")
+    // Rewrite by converting smaller integral value to timestamp.
+    .exclude("test reading unaligned pages - test all types (dict encode)")
   enableSuite[GlutenParquetCompressionCodecPrecedenceSuite]
   enableSuite[GlutenParquetDeltaByteArrayEncodingSuite]
   enableSuite[GlutenParquetDeltaEncodingInteger]
diff --git a/gluten-ut/spark34/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/GlutenParquetColumnIndexSuite.scala b/gluten-ut/spark34/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/GlutenParquetColumnIndexSuite.scala
@@ -38,7 +38,25 @@ class GlutenParquetColumnIndexSuite extends ParquetColumnIndexSuite with GlutenS
         "cast(id as float) as _5",
         "cast(id as double) as _6",
         "cast(id as decimal(20,0)) as _7",
-        "cast(cast(1618161925000 + id * 1000 * 60 * 60 * 24 as timestamp) as date) as _9"
+        "cast(cast(1618161925 + id * 60 * 60 * 24 as timestamp) as date) as _9"
+      )
+    checkUnalignedPages(df)(actions: _*)
+  }
+
+  testGluten("test reading unaligned pages - test all types (dict encode)") {
+    val df = spark
+      .range(0, 2000)
+      .selectExpr(
+        "id as _1",
+        "cast(id % 10 as byte) as _2",
+        "cast(id % 10 as short) as _3",
+        "cast(id % 10 as int) as _4",
+        "cast(id % 10 as float) as _5",
+        "cast(id % 10 as double) as _6",
+        "cast(id % 10 as decimal(20,0)) as _7",
+        "cast(id % 2 as boolean) as _8",
+        "cast(cast(1618161925 + (id % 10) * 60 * 60 * 24 as timestamp) as date) as _9",
+        "cast(1618161925 + (id % 10) as timestamp) as _10"
       )
     checkUnalignedPages(df)(actions: _*)
   }
diff --git a/gluten-ut/spark35/src/test/scala/org/apache/gluten/utils/velox/VeloxTestSettings.scala b/gluten-ut/spark35/src/test/scala/org/apache/gluten/utils/velox/VeloxTestSettings.scala
@@ -671,6 +671,8 @@ class VeloxTestSettings extends BackendTestSettings {
   enableSuite[GlutenParquetColumnIndexSuite]
     // Rewrite by just removing test timestamp.
     .exclude("test reading unaligned pages - test all types")
+    // Rewrite by converting smaller integral value to timestamp.
+    .exclude("test reading unaligned pages - test all types (dict encode)")
   enableSuite[GlutenParquetCompressionCodecPrecedenceSuite]
   enableSuite[GlutenParquetDeltaByteArrayEncodingSuite]
   enableSuite[GlutenParquetDeltaEncodingInteger]
diff --git a/gluten-ut/spark35/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/GlutenParquetColumnIndexSuite.scala b/gluten-ut/spark35/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/GlutenParquetColumnIndexSuite.scala
@@ -38,7 +38,25 @@ class GlutenParquetColumnIndexSuite extends ParquetColumnIndexSuite with GlutenS
         "cast(id as float) as _5",
         "cast(id as double) as _6",
         "cast(id as decimal(20,0)) as _7",
-        "cast(cast(1618161925000 + id * 1000 * 60 * 60 * 24 as timestamp) as date) as _9"
+        "cast(cast(1618161925 + id * 60 * 60 * 24 as timestamp) as date) as _9"
+      )
+    checkUnalignedPages(df)(actions: _*)
+  }
+
+  testGluten("test reading unaligned pages - test all types (dict encode)") {
+    val df = spark
+      .range(0, 2000)
+      .selectExpr(
+        "id as _1",
+        "cast(id % 10 as byte) as _2",
+        "cast(id % 10 as short) as _3",
+        "cast(id % 10 as int) as _4",
+        "cast(id % 10 as float) as _5",
+        "cast(id % 10 as double) as _6",
+        "cast(id % 10 as decimal(20,0)) as _7",
+        "cast(id % 2 as boolean) as _8",
+        "cast(cast(1618161925 + (id % 10) * 60 * 60 * 24 as timestamp) as date) as _9",
+        "cast(1618161925 + (id % 10) as timestamp) as _10"
       )
     checkUnalignedPages(df)(actions: _*)
   }