Skip to content

Commit

Permalink
expressions
Browse files Browse the repository at this point in the history
  • Loading branch information
vkorukanti committed Sep 6, 2023
1 parent 6dacf13 commit f190318
Show file tree
Hide file tree
Showing 30 changed files with 1,422 additions and 573 deletions.
2 changes: 1 addition & 1 deletion icebergShaded/generate_iceberg_jars.py
Original file line number Diff line number Diff line change
Expand Up @@ -124,7 +124,7 @@ def generate_iceberg_jars():
# Search for all glob results
results = glob.glob(compiled_jar_abs_pattern)
# Compiled jars will include tests, sources, javadocs; exclude them
results = list(filter(lambda result: all(x not in result for x in ["test", "source", "javadoc"]), results))
results = list(filter(lambda result: all(x not in result for x in ["test", "sources", "javadoc"]), results))

if len(results) == 0:
raise Exception("Could not find the jar: " + compled_jar_rel_glob_pattern)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -13,11 +13,12 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/

package io.delta.kernel.client;

import io.delta.kernel.data.ColumnarBatch;
import io.delta.kernel.expressions.Expression;
import io.delta.kernel.expressions.ExpressionEvaluator;
import io.delta.kernel.types.DataType;
import io.delta.kernel.types.StructType;

/**
Expand All @@ -28,12 +29,18 @@
public interface ExpressionHandler {
/**
* Create an {@link ExpressionEvaluator} that can evaluate the given <i>expression</i> on
* {@link io.delta.kernel.data.ColumnarBatch}s with the given <i>batchSchema</i>.
* {@link ColumnarBatch}s with the given <i>batchSchema</i>. The <i>expression</i> is
* expected to be a scalar expression where for each one input row there
* is a one output value.
*
* @param batchSchema Schema of the input data.
* @param inputSchema Input data schema
* @param expression Expression to evaluate.
* @param outputType Expected result data type.
* @return An {@link ExpressionEvaluator} instance bound to the given expression and
* batchSchema.
* <i>inputSchem</i>.
*/
ExpressionEvaluator getEvaluator(StructType batchSchema, Expression expression);
ExpressionEvaluator getEvaluator(
StructType inputSchema,
Expression expression,
DataType outputType);
}

This file was deleted.

This file was deleted.

This file was deleted.

This file was deleted.

Original file line number Diff line number Diff line change
Expand Up @@ -13,101 +13,35 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/

package io.delta.kernel.expressions;

import java.util.Collections;
import java.util.Objects;
import java.util.Set;

import io.delta.kernel.data.Row;
import io.delta.kernel.types.BooleanType;
import io.delta.kernel.types.DataType;
import io.delta.kernel.types.IntegerType;
import io.delta.kernel.types.LongType;
import io.delta.kernel.types.StringType;
import io.delta.kernel.types.StructType;
import java.util.List;

/**
* A column whose row-value will be computed based on the data in a {@link Row}.
* <p>
* It is recommended that you instantiate using an existing table schema
* {@link StructType} with {@link StructType#column(int)}.
* <p>
* Only supports primitive data types, see
* <a href="https://github.com/delta-io/delta/blob/master/PROTOCOL.md#primitive-types">Delta Transaction Log Protocol: Primitive Types</a>.
* An expression type that refers to a column by name (case-sensitive) in the input.
*/
public final class Column extends LeafExpression {
private final int ordinal;
public final class Column implements Expression {
private final String name;
private final DataType dataType;
private final RowEvaluator evaluator;

public Column(int ordinal, String name, DataType dataType) {
this.ordinal = ordinal;
public Column(String name) {
this.name = name;
this.dataType = dataType;

if (dataType instanceof IntegerType) {
evaluator = (row -> row.getInt(ordinal));
} else if (dataType instanceof BooleanType) {
evaluator = (row -> row.getBoolean(ordinal));
} else if (dataType instanceof LongType) {
evaluator = (row -> row.getLong(ordinal));
} else if (dataType instanceof StringType) {
evaluator = (row -> row.getString(ordinal));
} else {
throw new UnsupportedOperationException(
String.format(
"The data type %s of column %s at ordinal %s is not supported",
dataType,
name,
ordinal)
);
}
}

public String name() {
/**
* @return the column name.
*/
public String getName() {
return name;
}

@Override
public Object eval(Row row) {
return row.isNullAt(ordinal) ? null : evaluator.nullSafeEval(row);
}

@Override
public DataType dataType() {
return dataType;
public List<Expression> getChildren() {
return Collections.emptyList();
}

@Override
public String toString() {
return "Column(" + name + ")";
}

@Override
public Set<String> references() {
return Collections.singleton(name);
}

@Override
public boolean equals(Object o) {
if (this == o) return true;
if (o == null || getClass() != o.getClass()) return false;
Column column = (Column) o;
return Objects.equals(ordinal, column.ordinal) &&
Objects.equals(name, column.name) &&
Objects.equals(dataType, column.dataType);
}

@Override
public int hashCode() {
return Objects.hash(name, dataType);
}

@FunctionalInterface
private interface RowEvaluator {
Object nullSafeEval(Row row);
}
}
Loading

0 comments on commit f190318

Please sign in to comment.