microsoft
diff --git a/‎src/main/scala/com/microsoft/hyperspace/index/rules/FilterIndexRule.scala
+44-20 b/‎src/main/scala/com/microsoft/hyperspace/index/rules/FilterIndexRule.scala
+44-20
diff --git a/‎src/main/scala/com/microsoft/hyperspace/index/rules/PlanUtils.scala
+165 b/‎src/main/scala/com/microsoft/hyperspace/index/rules/PlanUtils.scala
+165
diff --git a/‎src/main/scala/com/microsoft/hyperspace/index/rules/RuleUtils.scala
+78-5 b/‎src/main/scala/com/microsoft/hyperspace/index/rules/RuleUtils.scala
+78-5
@@ -17,18 +17,19 @@
 package com.microsoft.hyperspace.index.rules
 
 import org.apache.spark.internal.Logging
-import org.apache.spark.sql.catalyst.analysis.CleanupAliases
-import org.apache.spark.sql.catalyst.expressions.{AttributeReference, Expression}
+import org.apache.spark.sql.catalyst.analysis.{CleanupAliases, Resolver, UnresolvedAttribute}
+import org.apache.spark.sql.catalyst.expressions.Expression
 import org.apache.spark.sql.catalyst.plans.logical.{Filter, LeafNode, LogicalPlan, Project}
 import org.apache.spark.sql.catalyst.rules.Rule
 
 import com.microsoft.hyperspace.{ActiveSparkSession, Hyperspace}
 import com.microsoft.hyperspace.actions.Constants
 import com.microsoft.hyperspace.index.IndexLogEntry
 import com.microsoft.hyperspace.index.rankers.FilterIndexRanker
+import com.microsoft.hyperspace.index.rules.PlanUtils._
 import com.microsoft.hyperspace.index.sources.FileBasedRelation
 import com.microsoft.hyperspace.telemetry.{AppInfo, HyperspaceEventLogging, HyperspaceIndexUsageEvent}
-import com.microsoft.hyperspace.util.{HyperspaceConf, ResolverUtils}
+import com.microsoft.hyperspace.util.{HyperspaceConf, ResolverUtils, SchemaUtils}
 
 /**
  * FilterIndex rule looks for opportunities in a logical plan to replace
@@ -53,7 +54,7 @@ object FilterIndexRule
       case ExtractFilterNode(originalPlan, filter, outputColumns, filterColumns) =>
         try {
           val candidateIndexes =
-            findCoveringIndexes(filter, outputColumns, filterColumns)
+            findCoveringIndexes(filter, outputColumns, filterColumns, plan)
           FilterIndexRanker.rank(spark, filter, candidateIndexes) match {
             case Some(index) =>
               // As FilterIndexRule is not intended to support bucketed scan, we set
@@ -99,7 +100,8 @@ object FilterIndexRule
   private def findCoveringIndexes(
       filter: Filter,
       outputColumns: Seq[String],
-      filterColumns: Seq[String]): Seq[IndexLogEntry] = {
+      filterColumns: Seq[String],
+      plan: LogicalPlan): Seq[IndexLogEntry] = {
     RuleUtils.getRelation(spark, filter) match {
       case Some(r) =>
         val indexManager = Hyperspace
@@ -111,20 +113,35 @@ object FilterIndexRule
         //  See https://github.com/microsoft/hyperspace/issues/65
         val allIndexes = indexManager.getIndexes(Seq(Constants.States.ACTIVE))
 
-        val candidateIndexes = allIndexes.filter { index =>
-          indexCoversPlan(
-            outputColumns,
-            filterColumns,
-            index.indexedColumns,
-            index.includedColumns)
+        def resolveWithChildren(fieldName: String, plan: LogicalPlan, resolver: Resolver) = {
+          plan.resolveChildren(UnresolvedAttribute.parseAttributeName(fieldName), resolver)
         }
 
-        // Get candidate via file-level metadata validation. This is performed after pruning
-        // by column schema, as this might be expensive when there are numerous files in the
-        // relation or many indexes to be checked.
-        RuleUtils.getCandidateIndexes(spark, candidateIndexes, r)
-
-      case None => Nil // There is zero or more than one supported relations in Filter's sub-plan.
+        // Resolve output columns with default resolver method
+        val resolvedOutputColumnsOpt =
+          ResolverUtils.resolve(spark, outputColumns, plan, resolveWithChildren, force = false)
+        // Resolve
+        val resolvedFilterColumnsOpt =
+          ResolverUtils.resolve(spark, filterColumns, plan, resolveWithChildren, force = false)
+
+        (resolvedOutputColumnsOpt, resolvedFilterColumnsOpt) match {
+          case (Some(resolvedOutputColumns), Some(resolvedFilterColumns)) =>
+            val candidateIndexes = allIndexes.filter { index =>
+              indexCoversPlan(
+                SchemaUtils.prefixNestedFieldNames(resolvedOutputColumns),
+                SchemaUtils.prefixNestedFieldNames(resolvedFilterColumns),
+                index.indexedColumns,
+                index.includedColumns)
+            }
+
+            // Get candidate via file-level metadata validation. This is performed after pruning
+            // by column schema, as this might be expensive when there are numerous files in the
+            // relation or many indexes to be checked.
+            RuleUtils.getCandidateIndexes(spark, candidateIndexes, r)
+
+          case _ => Nil
+        }
+      case _ => Nil // There is zero or more than one supported relations in Filter's sub-plan.
     }
   }
 
@@ -136,7 +153,6 @@ object FilterIndexRule
    * @param filterColumns List of columns in filter predicate.
    * @param indexedColumns List of indexed columns (e.g. from an index being checked)
    * @param includedColumns List of included columns (e.g. from an index being checked)
-   * @param fileFormat FileFormat for input relation in original logical plan.
    * @return 'true' if
    *         1. Index fully covers output and filter columns, and
    *         2. Filter predicate contains first column in index's 'indexed' columns.
@@ -168,9 +184,17 @@ object ExtractFilterNode {
       val projectColumnNames = CleanupAliases(project)
         .asInstanceOf[Project]
         .projectList
-        .map(_.references.map(_.asInstanceOf[AttributeReference].name))
+        .map(extractNamesFromExpression)
         .flatMap(_.toSeq)
-      val filterColumnNames = condition.references.map(_.name).toSeq
+      val filterColumnNames = extractNamesFromExpression(condition).toSeq
+        .sortBy(-_.length)
+        .foldLeft(Seq.empty[String]) { (acc, e) =>
+          if (!acc.exists(i => i.startsWith(e))) {
+            acc :+ e
+          } else {
+            acc
+          }
+        }
 
       Some(project, filter, projectColumnNames, filterColumnNames)
 
 
@@ -0,0 +1,165 @@
+/*
+ * Copyright (2020) The Hyperspace Project Authors.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package com.microsoft.hyperspace.index.rules
+
+import scala.util.Try
+
+import org.apache.spark.sql.catalyst.expressions.{Alias, AttributeReference, Expression, GetStructField}
+import org.apache.spark.sql.types.{DataType, StructType}
+
+object PlanUtils {
+
+  /**
+   * The method extract field names from a Spark Catalyst [[Expression]].
+   *
+   * @param exp The Spark Catalyst expression from which to extract names.
+   * @return A set of distinct field names.
+   */
+  def extractNamesFromExpression(exp: Expression): Set[String] = {
+    exp match {
+      case AttributeReference(name, _, _, _) =>
+        Set(s"$name")
+      case Alias(child, _) =>
+        extractNamesFromExpression(child)
+      case otherExp =>
+        otherExp.containsChild.map {
+          case g: GetStructField =>
+            s"${getChildNameFromStruct(g)}"
+          case e: Expression =>
+            extractNamesFromExpression(e).filter(_.nonEmpty).mkString(".")
+          case _ => ""
+        }
+    }
+  }
+
+  /**
+   * Given a [[GetStructField]] expression for a nested field (aka a struct)
+   * the method will extract the full field `.` (dot) separated name.
+   *
+   * @param field The [[GetStructField]] field from which we want to extract
+   *              the name.
+   * @return A field name `.` (dot) separated if nested.
+   */
+  def getChildNameFromStruct(field: GetStructField): String = {
+    field.child match {
+      case f: GetStructField =>
+        s"${getChildNameFromStruct(f)}.${field.name.get}"
+      case a: AttributeReference =>
+        s"${a.name}.${field.name.get}"
+      case _ =>
+        s"${field.name.get}"
+    }
+  }
+
+  /**
+   * Given an Spark Catalyst [[Expression]] and a field name the method extracts
+   * the parent search expression and the expression that contains the field name
+   * @param exp The Spark Catalyst [[Expression]] to extract from.
+   * @param name The field name to search for.
+   * @return A tuple with the parent expression and the leaf expression that
+   *         contains the given name.
+   */
+  def extractSearchQuery(exp: Expression, name: String): (Expression, Expression) = {
+    val splits = name.split(".")
+    val expFound = exp.find {
+      case a: AttributeReference if splits.forall(s => a.name.contains(s)) => true
+      case f: GetStructField if splits.forall(s => f.toString().contains(s)) => true
+      case _ => false
+    }.get
+    val parent = exp.find {
+      case e: Expression if e.containsChild.contains(expFound) => true
+      case _ => false
+    }.get
+    (parent, expFound)
+  }
+
+  /**
+   * Given an Spark Catalyst [[Expression]], a needle [[Expression]] and a replace
+   * [[Expression]] the method will replace the needle with the replacement into
+   * the parent expression.
+   *
+   * @param parent The parent Spark Catalyst [[Expression]] into which to replace.
+   * @param needle The Spark Catalyst [[Expression]] needle to search for.
+   * @param repl The replacement Spark Catalyst [[Expression]].
+   * @return A new Spark Catalyst [[Expression]].
+   */
+  def replaceInSearchQuery(
+      parent: Expression,
+      needle: Expression,
+      repl: Expression): Expression = {
+    parent.mapChildren { c =>
+      if (c == needle) {
+        repl
+      } else {
+        c
+      }
+    }
+  }
+
+  /**
+   * Given an Spark Catalyst [[Expression]] and a field name the method
+   * extracts the [[AttributeReference]] for that field name.
+   *
+   * @param exp The Spark Catalyst [[Expression]] to extract from.
+   * @param name The field name for which to extract the attribute reference.
+   * @return A Spark Catalyst [[AttributeReference]] pointing to the field name.
+   */
+  def extractAttributeRef(exp: Expression, name: String): AttributeReference = {
+    val splits = name.split(".")
+    val elem = exp.find {
+      case a: AttributeReference if splits.contains(a.name) => true
+      case _ => false
+    }
+    elem.get.asInstanceOf[AttributeReference]
+  }
+
+  /**
+   * Given a Spark Catalyst [[Expression]] and a field name the method
+   * extracts the type of the field as a Spark SQL [[DataType]].
+   *
+   * @param exp The Spark Catalyst [[Expression]] from which to extract the type.
+   * @param name The field name for which we need to get the type.
+   * @return A Spark SQL [[DataType]] of the given field name.
+   */
+  def extractTypeFromExpression(exp: Expression, name: String): DataType = {
+    val splits = name.split(".")
+    val elem = exp.flatMap {
+      case a: AttributeReference =>
+        if (splits.forall(s => a.name == s)) {
+          Some((name, a.dataType))
+        } else {
+          Try({
+            val h :: t = splits.toList
+            if (a.name == h && a.dataType.isInstanceOf[StructType]) {
+              val currentDataType = a.dataType.asInstanceOf[StructType]
+              val foldedFields = t.foldLeft(Seq.empty[(String, DataType)]) { (acc, i) =>
+                val idx = currentDataType.indexWhere(_.name.equalsIgnoreCase(i))
+                acc :+ (i, currentDataType(idx).dataType)
+              }
+              Some(foldedFields.last)
+            } else {
+              None
+            }
+          }).getOrElse(None)
+        }
+      case f: GetStructField if splits.forall(s => f.toString().contains(s)) =>
+        Some((name, f.dataType))
+      case _ => None
+    }
+    elem.find(e => e._1 == name || e._1 == splits.last).get._2
+  }
+}
@@ -21,7 +21,7 @@ import scala.collection.mutable
 import org.apache.hadoop.fs.Path
 import org.apache.spark.sql.SparkSession
 import org.apache.spark.sql.catalyst.catalog.BucketSpec
-import org.apache.spark.sql.catalyst.expressions.{Attribute, AttributeReference, In, Literal, Not}
+import org.apache.spark.sql.catalyst.expressions.{Attribute, AttributeReference, Expression, ExprId, GetStructField, In, Literal, Not}
 import org.apache.spark.sql.catalyst.optimizer.OptimizeIn
 import org.apache.spark.sql.catalyst.plans.logical._
 import org.apache.spark.sql.execution.datasources._
@@ -32,8 +32,9 @@ import com.microsoft.hyperspace.Hyperspace
 import com.microsoft.hyperspace.index._
 import com.microsoft.hyperspace.index.IndexLogEntryTags.{HYBRIDSCAN_RELATED_CONFIGS, IS_HYBRIDSCAN_CANDIDATE}
 import com.microsoft.hyperspace.index.plans.logical.{BucketUnion, IndexHadoopFsRelation}
+import com.microsoft.hyperspace.index.rules.PlanUtils._
 import com.microsoft.hyperspace.index.sources.FileBasedRelation
-import com.microsoft.hyperspace.util.HyperspaceConf
+import com.microsoft.hyperspace.util.{HyperspaceConf, ResolverUtils, SchemaUtils}
 
 object RuleUtils {
 
@@ -278,10 +279,30 @@ object RuleUtils {
           new ParquetFileFormat,
           Map(IndexConstants.INDEX_RELATION_IDENTIFIER))(spark, index)
 
-        val updatedOutput = relation.plan.output
-          .filter(attr => indexFsRelation.schema.fieldNames.contains(attr.name))
-          .map(_.asInstanceOf[AttributeReference])
+        val resolvedFields =
+          ResolverUtils.resolve(spark, index.indexedColumns ++ index.includedColumns, relation.plan)
+        val updatedOutput =
+          if (resolvedFields.isDefined && resolvedFields.get.exists(_._2)) {
+            indexFsRelation.schema.flatMap { s =>
+              val exprId = getFieldPosition(index, s.name)
+              relation.plan.output.find(a => s.name.contains(a.name)).map { a =>
+                AttributeReference(s.name, s.dataType, a.nullable, a.metadata)(
+                  ExprId(exprId),
+                  a.qualifier)
+              }
+            }
+          } else {
+            relation.plan.output
+                .filter(attr => indexFsRelation.schema.fieldNames.contains(attr.name))
+                .map(_.asInstanceOf[AttributeReference])
+          }
         relation.createLogicalRelation(indexFsRelation, updatedOutput)
+
+      case p: Project if provider.isSupportedProject(p) =>
+        transformProject(p, index)
+
+      case f: Filter if provider.isSupportedFilter(f) =>
+        transformFilter(f, index)
     }
   }
 
@@ -568,4 +589,56 @@ object RuleUtils {
     assert(shuffleInjected)
     shuffled
   }
+
+  private def transformProject(project: Project, index: IndexLogEntry): Project = {
+    val projectedFields = project.projectList.map { exp =>
+      val fieldName = extractNamesFromExpression(exp).head
+      val escapedFieldName = SchemaUtils.prefixNestedFieldName(fieldName)
+      val attr = extractAttributeRef(exp, fieldName)
+      val fieldType = extractTypeFromExpression(exp, fieldName)
+      val exprId = getFieldPosition(index, escapedFieldName)
+      attr.copy(escapedFieldName, fieldType, attr.nullable, attr.metadata)(
+        ExprId(exprId),
+        attr.qualifier)
+    }
+    project.copy(projectList = projectedFields)
+  }
+
+  private def transformFilter(filter: Filter, index: IndexLogEntry): Filter = {
+    val fieldNames = extractNamesFromExpression(filter.condition)
+    var mutableFilter = filter
+    fieldNames.foreach { fieldName =>
+      val escapedFieldName = SchemaUtils.prefixNestedFieldName(fieldName)
+      val nestedFields = getNestedFields(index)
+      if (nestedFields.nonEmpty &&
+          nestedFields.exists(i => i.equalsIgnoreCase(escapedFieldName))) {
+        val (parentExpresion, exp) =
+          extractSearchQuery(filter.condition, fieldName)
+        val fieldType = extractTypeFromExpression(exp, fieldName)
+        val attr = extractAttributeRef(exp, fieldName)
+        val exprId = getFieldPosition(index, escapedFieldName)
+        val newAttr = attr.copy(escapedFieldName, fieldType, attr.nullable, attr.metadata)(
+          ExprId(exprId),
+          attr.qualifier)
+        val newExp = exp match {
+          case _: GetStructField => newAttr
+          case other: Expression => other
+        }
+        val newParentExpression =
+          replaceInSearchQuery(parentExpresion, exp, newExp)
+        mutableFilter = filter.copy(condition = newParentExpression)
+      } else {
+        filter
+      }
+    }
+    mutableFilter
+  }
+
+  private def getNestedFields(index: IndexLogEntry): Seq[String] = {
+    index.schema.fieldNames.filter(_.startsWith(SchemaUtils.NESTED_FIELD_PREFIX))
+  }
+
+  private def getFieldPosition(index: IndexLogEntry, fieldName: String): Int = {
+    index.schema.fieldNames.indexWhere(_.equalsIgnoreCase(fieldName))
+  }
 }