delta-io · vkorukanti · Sep 12, 2023 · Aug 9, 2023 · Sep 12, 2023 · Sep 12, 2023
diff --git a/kernel/kernel-api/src/main/java/io/delta/kernel/Scan.java b/kernel/kernel-api/src/main/java/io/delta/kernel/Scan.java
@@ -26,18 +26,14 @@
 import io.delta.kernel.client.FileReadContext;
 import io.delta.kernel.client.ParquetHandler;
 import io.delta.kernel.client.TableClient;
-import io.delta.kernel.data.ColumnVector;
-import io.delta.kernel.data.ColumnarBatch;
-import io.delta.kernel.data.DataReadResult;
-import io.delta.kernel.data.FileDataReadResult;
-import io.delta.kernel.data.Row;
-import io.delta.kernel.expressions.Expression;
-import io.delta.kernel.expressions.Literal;
+import io.delta.kernel.data.*;
+import io.delta.kernel.expressions.Predicate;
 import io.delta.kernel.types.StructField;
 import io.delta.kernel.types.StructType;
 import io.delta.kernel.utils.CloseableIterator;
 import io.delta.kernel.utils.Tuple2;
 import io.delta.kernel.utils.Utils;
+import static io.delta.kernel.expressions.AlwaysTrue.ALWAYS_TRUE;
 
 import io.delta.kernel.internal.actions.DeletionVectorDescriptor;
 import io.delta.kernel.internal.data.AddFileColumnarBatch;
@@ -67,9 +63,9 @@ public interface Scan {
      * Get the remaining filter that is not guaranteed to be satisfied for the data Delta Kernel
      * returns. This filter is used by Delta Kernel to do data skipping when possible.
      *
-     * @return the remaining filter as an {@link Expression}.
+     * @return the remaining filter as a {@link Predicate}.
      */
-    Optional<Expression> getRemainingFilter();
+    Optional<Predicate> getRemainingFilter();
 
     /**
      * Get the scan state associated with the current scan. This state is common across all
@@ -88,9 +84,8 @@ public interface Scan {
      * @param scanFileRowIter an iterator of {@link Row}s. Each {@link Row} represents one scan file
      *                        from the {@link ColumnarBatch} returned by
      *                        {@link Scan#getScanFiles(TableClient)}
-     * @param filter          An optional filter that can be used for data skipping while reading
-     *                       the
-     *                        scan files.
+     * @param predicate       An optional predicate that can be used for data skipping while reading
+     *                        the scan files.
      * @return Data read from the input scan files as an iterator of {@link DataReadResult}s. Each
      * {@link DataReadResult} instance contains the data read and an optional selection
      * vector that indicates data rows as valid or invalid. It is the responsibility of the
@@ -101,7 +96,7 @@ static CloseableIterator<DataReadResult> readData(
         TableClient tableClient,
         Row scanState,
         CloseableIterator<Row> scanFileRowIter,
-        Optional<Expression> filter) throws IOException {
+        Optional<Predicate> predicate) throws IOException {
         StructType physicalSchema = Utils.getPhysicalSchema(tableClient, scanState);
         StructType logicalSchema = Utils.getLogicalSchema(tableClient, scanState);
         List<String> partitionColumns = Utils.getPartitionColumns(scanState);
@@ -122,7 +117,7 @@ static CloseableIterator<DataReadResult> readData(
         CloseableIterator<FileReadContext> filesReadContextsIter =
             parquetHandler.contextualizeFileReads(
                 scanFileRowIter,
-                filter.orElse(Literal.TRUE));
+                predicate.orElse(ALWAYS_TRUE));
 
         CloseableIterator<FileDataReadResult> data = parquetHandler.readParquetFiles(
             filesReadContextsIter,

diff --git a/kernel/kernel-api/src/main/java/io/delta/kernel/ScanBuilder.java b/kernel/kernel-api/src/main/java/io/delta/kernel/ScanBuilder.java
@@ -18,7 +18,7 @@
 
 import io.delta.kernel.annotation.Evolving;
 import io.delta.kernel.client.TableClient;
-import io.delta.kernel.expressions.Expression;
+import io.delta.kernel.expressions.Predicate;
 import io.delta.kernel.types.StructType;
 
 /**
@@ -34,10 +34,10 @@ public interface ScanBuilder {
      * the given filter.
      *
      * @param tableClient {@link TableClient} instance to use in Delta Kernel.
-     * @param filter      an {@link Expression} which evaluates to boolean.
+     * @param predicate   a {@link Predicate} to prune the metadata or data.
      * @return A {@link ScanBuilder} with filter applied.
      */
-    ScanBuilder withFilter(TableClient tableClient, Expression filter);
+    ScanBuilder withFilter(TableClient tableClient, Predicate predicate);
 
     /**
      * Apply the given <i>readSchema</i>. If the builder already has a projection applied, calling

diff --git a/kernel/kernel-api/src/main/java/io/delta/kernel/client/ExpressionHandler.java b/kernel/kernel-api/src/main/java/io/delta/kernel/client/ExpressionHandler.java
@@ -13,12 +13,13 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-
 package io.delta.kernel.client;
 
 import io.delta.kernel.annotation.Evolving;
+import io.delta.kernel.data.ColumnarBatch;
 import io.delta.kernel.expressions.Expression;
 import io.delta.kernel.expressions.ExpressionEvaluator;
+import io.delta.kernel.types.DataType;
 import io.delta.kernel.types.StructType;
 
 /**
@@ -32,12 +33,16 @@
 public interface ExpressionHandler {
     /**
      * Create an {@link ExpressionEvaluator} that can evaluate the given <i>expression</i> on
-     * {@link io.delta.kernel.data.ColumnarBatch}s with the given <i>batchSchema</i>.
+     * {@link ColumnarBatch}s with the given <i>batchSchema</i>. The <i>expression</i> is
+     * expected to be a scalar expression where for each one input row there
+     * is a one output value.
      *
-     * @param batchSchema Schema of the input data.
+     * @param inputSchema Input data schema
      * @param expression  Expression to evaluate.
-     * @return An {@link ExpressionEvaluator} instance bound to the given expression and
-     * batchSchema.
+     * @param outputType  Expected result data type.
      */
-    ExpressionEvaluator getEvaluator(StructType batchSchema, Expression expression);
+    ExpressionEvaluator getEvaluator(
+        StructType inputSchema,
+        Expression expression,
+        DataType outputType);
 }
diff --git a/kernel/kernel-api/src/main/java/io/delta/kernel/client/FileHandler.java b/kernel/kernel-api/src/main/java/io/delta/kernel/client/FileHandler.java
@@ -18,7 +18,7 @@
 
 import io.delta.kernel.annotation.Evolving;
 import io.delta.kernel.data.Row;
-import io.delta.kernel.expressions.Expression;
+import io.delta.kernel.expressions.Predicate;
 import io.delta.kernel.fs.FileStatus;
 import io.delta.kernel.utils.CloseableIterator;
 
@@ -49,5 +49,5 @@ public interface FileHandler {
      */
     CloseableIterator<FileReadContext> contextualizeFileReads(
         CloseableIterator<Row> fileIter,
-        Expression predicate);
+        Predicate predicate);
 }
diff --git a/.../io/delta/kernel/expressions/EqualTo.java → ...delta/kernel/expressions/AlwaysFalse.java b/.../io/delta/kernel/expressions/EqualTo.java → ...delta/kernel/expressions/AlwaysFalse.java
@@ -13,21 +13,22 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-
 package io.delta.kernel.expressions;
 
+import java.util.Collections;
+
+import io.delta.kernel.annotation.Evolving;
+
 /**
- * Evaluates {@code expr1} = {@code expr2} for {@code new EqualTo(expr1, expr2)}.
+ * Predicate which always evaluates to {@code false}.
+ *
+ * @since 3.0.0
  */
-public final class EqualTo extends BinaryComparison implements Predicate {
+@Evolving
+public final class AlwaysFalse extends Predicate {
+    public static final AlwaysFalse ALWAYS_FALSE = new AlwaysFalse();
 
-    public EqualTo(Expression left, Expression right) {
-        super(left, right, "=");
-    }
-
-    @Override
-    protected Object nullSafeEval(Object leftResult, Object rightResult) {
-        return compare(leftResult, rightResult) == 0;
+    private AlwaysFalse() {
+        super("ALWAYS_FALSE", Collections.emptyList());
     }
 }
-
diff --git a/...ta/kernel/expressions/LeafExpression.java → .../delta/kernel/expressions/AlwaysTrue.java b/...ta/kernel/expressions/LeafExpression.java → .../delta/kernel/expressions/AlwaysTrue.java
@@ -13,31 +13,22 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-
 package io.delta.kernel.expressions;
 
 import java.util.Collections;
-import java.util.List;
-import java.util.Set;
+
+import io.delta.kernel.annotation.Evolving;
 
 /**
- * An {@link Expression} with no children.
+ * Predicate which always evaluates to {@code true}.
+ *
+ * @since 3.0.0
  */
-public abstract class LeafExpression implements Expression {
-
-    protected LeafExpression() {}
+@Evolving
+public final class AlwaysTrue extends Predicate {
+    public static final AlwaysTrue ALWAYS_TRUE = new AlwaysTrue();
 
-    @Override
-    public List<Expression> children() {
-        return Collections.emptyList();
+    private AlwaysTrue() {
+        super("ALWAYS_TRUE", Collections.emptyList());
     }
-
-    @Override
-    public Set<String> references() {
-        return Collections.emptySet();
-    }
-
-    public abstract boolean equals(Object o);
-
-    public abstract int hashCode();
 }
diff --git a/kernel/kernel-api/src/main/java/io/delta/kernel/expressions/And.java b/kernel/kernel-api/src/main/java/io/delta/kernel/expressions/And.java
@@ -13,50 +13,47 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-
 package io.delta.kernel.expressions;
 
-import java.util.Collection;
+import java.util.Arrays;
 
-import io.delta.kernel.types.BooleanType;
+import io.delta.kernel.annotation.Evolving;
 
 /**
- * Evaluates logical {@code expr1} AND {@code expr2} for {@code new And(expr1, expr2)}.
+ * {@code AND} expression
+ * <p>
+ * Definition:
  * <p>
- * Requires both left and right input expressions evaluate to booleans.
+ * <ul>
+ *     <li>Logical {@code expr1} AND {@code expr2} on two inputs.</li>
+ *     <li>Requires both left and right input expressions of type {@link Predicate}.</li>
+ *     <li>Result is null at least one of the inputs is null.</li>
+ * </ul>
+ *
+ * @since 3.0.0
  */
-public final class And extends BinaryOperator implements Predicate {
-
-    public static And apply(Collection<Expression> conjunctions) {
-        if (conjunctions.size() == 0) {
-            throw new IllegalArgumentException("And.apply must be called with at least 1 element");
-        }
-
-        return (And) conjunctions
-            .stream()
-            // we start off with And(true, true)
-            // then we get the 1st expression: And(And(true, true), expr1)
-            // then we get the 2nd expression: And(And(true, true), expr1), expr2) etc.
-            .reduce(new And(Literal.TRUE, Literal.TRUE), And::new);
+@Evolving
+public final class And extends Predicate {
+    public And(Predicate left, Predicate right) {
+        super("AND", Arrays.asList(left, right));
     }
 
-    public And(Expression left, Expression right) {
-        super(left, right, "&&");
-        if (!(left.dataType() instanceof BooleanType) ||
-            !(right.dataType() instanceof BooleanType)) {
+    /**
+     * @return Left side operand.
+     */
+    public Predicate getLeft() {
+        return (Predicate) getChildren().get(0);
+    }
 
-            throw new IllegalArgumentException(
-                String.format(
-                    "'And' requires expressions of type boolean. Got %s and %s.",
-                    left.dataType(),
-                    right.dataType()
-                )
-            );
-        }
+    /**
+     * @return Right side operand.
+     */
+    public Predicate getRight() {
+        return (Predicate) getChildren().get(1);
     }
 
     @Override
-    public Object nullSafeEval(Object leftResult, Object rightResult) {
-        return (boolean) leftResult && (boolean) rightResult;
+    public String toString() {
+        return "(" + getLeft() + " AND " + getRight() + ")";
     }
 }
diff --git a/kernel/kernel-api/src/main/java/io/delta/kernel/expressions/BinaryComparison.java b/kernel/kernel-api/src/main/java/io/delta/kernel/expressions/BinaryComparison.java