Skip to content

Commit

Permalink
adjustment to make rowCount calculation more robust and hopefully fix…
Browse files Browse the repository at this point in the history
…ing null error for polyAlg tests
  • Loading branch information
datomo committed Dec 9, 2024
1 parent 81b4b4b commit e6154a9
Show file tree
Hide file tree
Showing 33 changed files with 199 additions and 153 deletions.
16 changes: 10 additions & 6 deletions core/src/main/java/org/polypheny/db/algebra/AbstractAlgNode.java
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,7 @@
import java.util.ArrayList;
import java.util.Collections;
import java.util.List;
import java.util.Optional;
import java.util.Set;
import java.util.concurrent.atomic.AtomicInteger;
import lombok.Getter;
Expand Down Expand Up @@ -321,8 +322,12 @@ public AlgNode accept( RexShuttle shuttle ) {
@Override
public AlgOptCost computeSelfCost( AlgPlanner planner, AlgMetadataQuery mq ) {
// by default, assume cost is proportional to number of rows
double tupleCount = mq.getTupleCount( this );
return planner.getCostFactory().makeCost( tupleCount, tupleCount, 0 );
Optional<Double> tupleCount = mq.getTupleCount( this );
if ( tupleCount.isEmpty() ) {
return planner.getCostFactory().makeInfiniteCost();
}

return planner.getCostFactory().makeCost( tupleCount.get(), tupleCount.get(), 0 );
}


Expand Down Expand Up @@ -456,10 +461,9 @@ private ObjectNode serializeMetadata( ObjectMapper mapper, GlobalStats gs ) {
}
PolyAlgMetadata meta = new PolyAlgMetadata( mapper, gs );
AlgMetadataQuery mq = this.getCluster().getMetadataQuery();
try {
meta.addCosts( mq.getNonCumulativeCost( this ), mq.getCumulativeCost( this ), mq.getTupleCount( this ) );
} catch ( Exception ignored ) {
}

mq.getTupleCount( this ).ifPresent( aDouble -> meta.addCosts( mq.getNonCumulativeCost( this ), mq.getCumulativeCost( this ), aDouble ) );

return meta.serialize();
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,7 @@
import org.polypheny.db.plan.AlgPlanner;
import org.polypheny.db.plan.AlgTraitDef;
import org.polypheny.db.plan.AlgTraitSet;
import java.util.Optional;


/**
Expand Down Expand Up @@ -70,12 +71,13 @@ protected ConverterImpl( AlgCluster cluster, AlgTraitDef<?> traitDef, AlgTraitSe

@Override
public AlgOptCost computeSelfCost( AlgPlanner planner, AlgMetadataQuery mq ) {
Double dRows = mq.getTupleCount( getInput() );
if ( dRows == null ) {
dRows = Double.MAX_VALUE;
}
Optional<Double> dRows = mq.getTupleCount( getInput() );
double dIo = 0;
return planner.getCostFactory().makeCost( dRows, dRows, dIo );
if ( dRows.isEmpty() ) {
return planner.getCostFactory().makeCost( Double.MAX_VALUE, Double.MAX_VALUE, dIo );
}

return planner.getCostFactory().makeCost( dRows.get(), dRows.get(), dIo );
}


Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,7 @@
import java.util.HashSet;
import java.util.List;
import java.util.Objects;
import java.util.Optional;
import java.util.Set;
import java.util.stream.Collectors;
import lombok.Getter;
Expand Down Expand Up @@ -281,7 +282,10 @@ public double estimateTupleCount( AlgMetadataQuery mq ) {
@Override
public AlgOptCost computeSelfCost( AlgPlanner planner, AlgMetadataQuery mq ) {
// REVIEW jvs: This is bogus, but no more bogus than what's currently in Join.
double rowCount = mq.getTupleCount( this );
Optional<Double> rowCount = mq.getTupleCount( this );
if ( rowCount.isEmpty() ) {
return planner.getCostFactory().makeInfiniteCost();
}
// Aggregates with more aggregate functions cost a bit more
float multiplier = 1f + (float) aggCalls.size() * 0.125f;
for ( AggregateCall aggCall : aggCalls ) {
Expand All @@ -290,7 +294,7 @@ public AlgOptCost computeSelfCost( AlgPlanner planner, AlgMetadataQuery mq ) {
multiplier += 0.0125f;
}
}
return planner.getCostFactory().makeCost( rowCount * multiplier, 0, 0 );
return planner.getCostFactory().makeCost( rowCount.get() * multiplier, 0, 0 );
}


Expand Down
4 changes: 2 additions & 2 deletions core/src/main/java/org/polypheny/db/algebra/core/Calc.java
Original file line number Diff line number Diff line change
Expand Up @@ -126,8 +126,8 @@ public double estimateTupleCount( AlgMetadataQuery mq ) {

@Override
public AlgOptCost computeSelfCost( AlgPlanner planner, AlgMetadataQuery mq ) {
double dRows = mq.getTupleCount( this );
double dCpu = mq.getTupleCount( getInput() ) * program.getExprCount();
double dRows = mq.getTupleCount( this ).orElse( Double.MAX_VALUE );
double dCpu = mq.getTupleCount( getInput() ).orElse( Double.MAX_VALUE ) * program.getExprCount();
double dIo = 0;
return planner.getCostFactory().makeCost( dRows, dCpu, dIo );
}
Expand Down
15 changes: 11 additions & 4 deletions core/src/main/java/org/polypheny/db/algebra/core/Correlate.java
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,7 @@

import com.google.common.collect.ImmutableSet;
import java.util.List;
import java.util.Optional;
import lombok.Getter;
import org.polypheny.db.algebra.AlgNode;
import org.polypheny.db.algebra.AlgWriter;
Expand Down Expand Up @@ -158,21 +159,27 @@ public ImmutableSet<CorrelationId> getVariablesSet() {

@Override
public AlgOptCost computeSelfCost( AlgPlanner planner, AlgMetadataQuery mq ) {
double rowCount = mq.getTupleCount( this );
Optional<Double> rowCount = mq.getTupleCount( this );
if ( rowCount.isEmpty() ) {
return planner.getCostFactory().makeInfiniteCost();
}

final double rightRowCount = right.estimateTupleCount( mq );
final double leftRowCount = left.estimateTupleCount( mq );
if ( Double.isInfinite( leftRowCount ) || Double.isInfinite( rightRowCount ) ) {
return planner.getCostFactory().makeInfiniteCost();
}

Double restartCount = mq.getTupleCount( getLeft() );
Optional<Double> restartCount = mq.getTupleCount( getLeft() );
if ( restartCount.isEmpty() ) {
return planner.getCostFactory().makeInfiniteCost();
}
// RelMetadataQuery.getCumulativeCost(getRight()); does not work for
// RelSubset, so we ask planner to cost-estimate right relation
AlgOptCost rightCost = planner.getCost( getRight(), mq );
AlgOptCost rescanCost = rightCost.multiplyBy( Math.max( 1.0, restartCount - 1 ) );
AlgOptCost rescanCost = rightCost.multiplyBy( Math.max( 1.0, restartCount.get() - 1 ) );

return planner.getCostFactory().makeCost( rowCount /* generate results */ + leftRowCount /* relScan left results */, 0, 0 ).plus( rescanCost );
return planner.getCostFactory().makeCost( rowCount.get() /* generate results */ + leftRowCount /* relScan left results */, 0, 0 ).plus( rescanCost );
}


Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,7 @@

import java.util.List;
import java.util.Objects;
import java.util.Optional;
import java.util.stream.Collectors;
import org.polypheny.db.algebra.AlgDistribution;
import org.polypheny.db.algebra.AlgDistributions;
Expand Down Expand Up @@ -97,9 +98,12 @@ public AlgDistribution getDistribution() {
@Override
public AlgOptCost computeSelfCost( AlgPlanner planner, AlgMetadataQuery mq ) {
// Higher cost if rows are wider discourages pushing a project through an exchange.
double rowCount = mq.getTupleCount( this );
Optional<Double> rowCount = mq.getTupleCount( this );
if ( rowCount.isEmpty() ) {
return planner.getCostFactory().makeInfiniteCost();
}
double bytesPerRow = getTupleType().getFieldCount() * 4;
return planner.getCostFactory().makeCost( Util.nLogN( rowCount ) * bytesPerRow, rowCount, 0 );
return planner.getCostFactory().makeCost( Util.nLogN( rowCount.get() ) * bytesPerRow, rowCount.get(), 0 );
}


Expand Down
10 changes: 7 additions & 3 deletions core/src/main/java/org/polypheny/db/algebra/core/Filter.java
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,7 @@

import com.google.common.collect.ImmutableList;
import java.util.List;
import java.util.Optional;
import lombok.Getter;
import org.polypheny.db.algebra.AlgNode;
import org.polypheny.db.algebra.AlgWriter;
Expand Down Expand Up @@ -130,10 +131,13 @@ public boolean isValid( Litmus litmus, Context context ) {

@Override
public AlgOptCost computeSelfCost( AlgPlanner planner, AlgMetadataQuery mq ) {
double dRows = mq.getTupleCount( this );
double dCpu = mq.getTupleCount( getInput() );
Optional<Double> dRows = mq.getTupleCount( this );
Optional<Double> dCpu = mq.getTupleCount( getInput() );
double dIo = 0;
return planner.getCostFactory().makeCost( dRows, dCpu, dIo );
if(dRows.isEmpty() || dCpu.isEmpty()) {
return planner.getCostFactory().makeInfiniteCost();
}
return planner.getCostFactory().makeCost( dRows.get(), dCpu.get(), dIo );
}


Expand Down
8 changes: 6 additions & 2 deletions core/src/main/java/org/polypheny/db/algebra/core/Join.java
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,7 @@
import java.util.HashSet;
import java.util.List;
import java.util.Objects;
import java.util.Optional;
import java.util.Set;
import lombok.Getter;
import org.polypheny.db.algebra.AlgNode;
Expand Down Expand Up @@ -162,8 +163,11 @@ public boolean isValid( Litmus litmus, Context context ) {
@Override
public AlgOptCost computeSelfCost( AlgPlanner planner, AlgMetadataQuery mq ) {
// REVIEW jvs: Just for now...
double rowCount = mq.getTupleCount( this );
return planner.getCostFactory().makeCost( rowCount, 0, 0 );
Optional<Double> rowCount = mq.getTupleCount( this );
if ( rowCount.isEmpty() ) {
return planner.getCostFactory().makeInfiniteCost();
}
return planner.getCostFactory().makeCost( rowCount.get(), 0, 0 );
}


Expand Down
10 changes: 7 additions & 3 deletions core/src/main/java/org/polypheny/db/algebra/core/Project.java
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,7 @@
import java.util.HashSet;
import java.util.List;
import java.util.Objects;
import java.util.Optional;
import java.util.Set;
import java.util.stream.Collectors;
import org.apache.calcite.linq4j.Ord;
Expand Down Expand Up @@ -180,10 +181,13 @@ public boolean isValid( Litmus litmus, Context context ) {

@Override
public AlgOptCost computeSelfCost( AlgPlanner planner, AlgMetadataQuery mq ) {
double dRows = mq.getTupleCount( getInput() );
double dCpu = dRows * exps.size();
Optional<Double> dRows = mq.getTupleCount( getInput() );
if ( dRows.isEmpty() ) {
return planner.getCostFactory().makeInfiniteCost();
}
double dCpu = dRows.get() * exps.size();
double dIo = 0;
return planner.getCostFactory().makeCost( dRows, dCpu, dIo );
return planner.getCostFactory().makeCost( dRows.get(), dCpu, dIo );
}


Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,7 @@
import java.lang.reflect.Type;
import java.util.ArrayList;
import java.util.List;
import java.util.Optional;
import java.util.Set;
import java.util.stream.Collectors;
import org.apache.calcite.linq4j.Ord;
Expand All @@ -58,7 +59,7 @@

/**
* Relational expression that calls a table-valued function.
*
* <p>
* The function returns a result set.
* It can appear as a leaf in a query tree, or can be applied to relational inputs.
*
Expand Down Expand Up @@ -151,23 +152,16 @@ public double estimateTupleCount( AlgMetadataQuery mq ) {
// Calculate result as the sum of the input row count estimates, assuming there are any, otherwise use the superclass default. So for a no-input UDX, behave like an AbstractAlgNode;
// for a one-input UDX, behave like a SingleRel; for a multi-input UDX, behave like UNION ALL.
// TODO jvs 10-Sep-2007: UDX-supplied costing metadata.
if ( inputs.size() == 0 ) {
if ( inputs.isEmpty() ) {
return super.estimateTupleCount( mq );
}
double nRows = 0.0;
for ( AlgNode input : inputs ) {
Double d = mq.getTupleCount( input );
if ( d != null ) {
nRows += d;
}
}
return nRows;
return inputs.stream().map( mq::getTupleCount ).filter( Optional::isPresent ).mapToDouble( Optional::get ).sum(); // todo maybe only use the sum if all are not infinite
}


/**
* Returns function invocation expression.
*
* <p>
* Within this rexCall, instances of {@link RexIndexRef} refer to entire input {@link AlgNode}s rather than their fields.
*
* @return function invocation expression
Expand Down
10 changes: 7 additions & 3 deletions core/src/main/java/org/polypheny/db/algebra/core/Sort.java
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,7 @@
import com.google.common.collect.ImmutableList;
import java.util.List;
import java.util.Objects;
import java.util.Optional;
import java.util.stream.Collectors;
import lombok.Getter;
import org.apache.calcite.linq4j.Ord;
Expand Down Expand Up @@ -144,10 +145,13 @@ public final Sort copy( AlgTraitSet traitSet, List<AlgNode> inputs ) {
@Override
public AlgOptCost computeSelfCost( AlgPlanner planner, AlgMetadataQuery mq ) {
// Higher cost if rows are wider discourages pushing a project through a sort.
final double rowCount = mq.getTupleCount( this );
Optional<Double> rowCount = mq.getTupleCount( this );
if ( rowCount.isEmpty() ) {
return planner.getCostFactory().makeInfiniteCost();
}
final double bytesPerRow = getTupleType().getFieldCount() * 4;
final double cpu = Util.nLogN( rowCount ) * bytesPerRow;
return planner.getCostFactory().makeCost( rowCount, cpu, 0 );
final double cpu = Util.nLogN( rowCount.get() ) * bytesPerRow;
return planner.getCostFactory().makeCost( rowCount.get(), cpu, 0 );
}


Expand Down
8 changes: 6 additions & 2 deletions core/src/main/java/org/polypheny/db/algebra/core/Values.java
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,7 @@
import java.util.ArrayList;
import java.util.List;
import java.util.Objects;
import java.util.Optional;
import java.util.function.Predicate;
import java.util.stream.Collectors;
import lombok.Getter;
Expand Down Expand Up @@ -144,12 +145,15 @@ protected AlgDataType deriveRowType() {

@Override
public AlgOptCost computeSelfCost( AlgPlanner planner, AlgMetadataQuery mq ) {
double dRows = mq.getTupleCount( this );
Optional<Double> dRows = mq.getTupleCount( this );
if ( dRows.isEmpty() ) {
return planner.getCostFactory().makeInfiniteCost();
}

// Assume CPU is negligible since values are precomputed.
double dCpu = 1;
double dIo = 0;
return planner.getCostFactory().makeCost( dRows, dCpu, dIo );
return planner.getCostFactory().makeCost( dRows.get(), dCpu, dIo );
}


Expand Down
8 changes: 6 additions & 2 deletions core/src/main/java/org/polypheny/db/algebra/core/Window.java
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,7 @@
import java.util.AbstractList;
import java.util.List;
import java.util.Objects;
import java.util.Optional;
import java.util.stream.Collectors;
import org.apache.calcite.linq4j.Ord;
import org.polypheny.db.algebra.AlgCollation;
Expand Down Expand Up @@ -207,12 +208,15 @@ public AlgOptCost computeSelfCost( AlgPlanner planner, AlgMetadataQuery mq ) {
//
// TODO #1. Add memory cost.
// TODO #2. MIN and MAX have higher CPU cost than SUM and COUNT.
final double rowsIn = mq.getTupleCount( getInput() );
Optional<Double> rowsIn = mq.getTupleCount( getInput() );
if ( rowsIn.isEmpty() ) {
return planner.getCostFactory().makeInfiniteCost();
}
int count = groups.size();
for ( Group group : groups ) {
count += group.aggCalls.size();
}
return planner.getCostFactory().makeCost( rowsIn, rowsIn * count, 0 );
return planner.getCostFactory().makeCost( rowsIn.get(), rowsIn.get() * count, 0 );
}


Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -187,8 +187,7 @@ public AlgWriter explainTerms( AlgWriter pw ) {
@Override
public AlgOptCost computeSelfCost( AlgPlanner planner, AlgMetadataQuery mq ) {
// REVIEW jvs: Just for now...
double rowCount = mq.getTupleCount( this );
return planner.getCostFactory().makeCost( rowCount, 0, 0 );
return mq.getTupleCount( this ).map( count -> planner.getCostFactory().makeCost( count, 0, 0 ) ).orElse( planner.getCostFactory().makeInfiniteCost() );
}


Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,7 @@


import com.google.common.collect.ImmutableList;
import java.util.Optional;
import java.util.Set;
import org.apache.calcite.linq4j.tree.BlockBuilder;
import org.apache.calcite.linq4j.tree.Expression;
Expand Down Expand Up @@ -96,7 +97,12 @@ public static EnumerableThetaJoin create( AlgNode left, AlgNode right, RexNode c

@Override
public AlgOptCost computeSelfCost( AlgPlanner planner, AlgMetadataQuery mq ) {
double rowCount = mq.getTupleCount( this );
Optional<Double> count = mq.getTupleCount( this );
if ( count.isEmpty() ) {
return planner.getCostFactory().makeInfiniteCost();
}

double rowCount = count.get();

// Joins can be flipped, and for many algorithms, both versions are viable and have the same cost. To make the results stable between versions of the planner,
// make one of the versions slightly more expensive.
Expand Down
Loading

0 comments on commit e6154a9

Please sign in to comment.