diff --git a/src/engine/Bind.cpp b/src/engine/Bind.cpp index 7ebad19812..2419531888 100644 --- a/src/engine/Bind.cpp +++ b/src/engine/Bind.cpp @@ -99,26 +99,25 @@ ProtoResult Bind::computeResult(bool requestLaziness) { auto applyBind = [this, subRes](IdTable idTable, LocalVocab* localVocab) { return computeExpressionBind(localVocab, std::move(idTable), - subRes->localVocab(), _bind._expression.getPimpl()); }; if (subRes->isFullyMaterialized()) { if (requestLaziness && subRes->idTable().size() > CHUNK_SIZE) { - auto localVocab = - std::make_shared(subRes->getCopyOfLocalVocab()); - auto generator = [](std::shared_ptr vocab, auto applyBind, - std::shared_ptr result) - -> cppcoro::generator { - size_t size = result->idTable().size(); - for (size_t offset = 0; offset < size; offset += CHUNK_SIZE) { - co_yield applyBind( - cloneSubView(result->idTable(), - {offset, std::min(size, offset + CHUNK_SIZE)}), - vocab.get()); - } - }(localVocab, std::move(applyBind), std::move(subRes)); - return {std::move(generator), resultSortedOn(), std::move(localVocab)}; + return { + [](auto applyBind, + std::shared_ptr result) -> Result::Generator { + size_t size = result->idTable().size(); + for (size_t offset = 0; offset < size; offset += CHUNK_SIZE) { + LocalVocab outVocab = result->getCopyOfLocalVocab(); + IdTable idTable = applyBind( + cloneSubView(result->idTable(), + {offset, std::min(size, offset + CHUNK_SIZE)}), + &outVocab); + co_yield {std::move(idTable), std::move(outVocab)}; + } + }(std::move(applyBind), std::move(subRes)), + resultSortedOn()}; } // Make a deep copy of the local vocab from `subRes` and then add to it (in // case BIND adds a new word or words). @@ -132,28 +131,25 @@ ProtoResult Bind::computeResult(bool requestLaziness) { LOG(DEBUG) << "BIND result computation done." << std::endl; return {std::move(result), resultSortedOn(), std::move(localVocab)}; } - auto localVocab = std::make_shared(); auto generator = - [](std::shared_ptr vocab, auto applyBind, - std::shared_ptr result) -> cppcoro::generator { - for (IdTable& idTable : result->idTables()) { - co_yield applyBind(std::move(idTable), vocab.get()); + [](auto applyBind, + std::shared_ptr result) -> Result::Generator { + for (auto& [idTable, localVocab] : result->idTables()) { + IdTable resultTable = applyBind(std::move(idTable), &localVocab); + co_yield {std::move(resultTable), std::move(localVocab)}; } - std::array vocabs{vocab.get(), &result->localVocab()}; - *vocab = LocalVocab::merge(std::span{vocabs}); - }(localVocab, std::move(applyBind), std::move(subRes)); - return {std::move(generator), resultSortedOn(), std::move(localVocab)}; + }(std::move(applyBind), std::move(subRes)); + return {std::move(generator), resultSortedOn()}; } // _____________________________________________________________________________ IdTable Bind::computeExpressionBind( - LocalVocab* outputLocalVocab, IdTable idTable, - const LocalVocab& inputLocalVocab, + LocalVocab* localVocab, IdTable idTable, const sparqlExpression::SparqlExpression* expression) const { sparqlExpression::EvaluationContext evaluationContext( *getExecutionContext(), _subtree->getVariableColumns(), idTable, - getExecutionContext()->getAllocator(), inputLocalVocab, - cancellationHandle_, deadline_); + getExecutionContext()->getAllocator(), *localVocab, cancellationHandle_, + deadline_); sparqlExpression::ExpressionResult expressionResult = expression->evaluate(&evaluationContext); @@ -188,7 +184,7 @@ IdTable Bind::computeExpressionBind( if (it != resultGenerator.end()) { Id constantId = sparqlExpression::detail::constantExpressionResultToId( - std::move(*it), *outputLocalVocab); + std::move(*it), *localVocab); checkCancellation(); ad_utility::chunkedFill(outputColumn, constantId, CHUNK_SIZE, [this]() { checkCancellation(); }); @@ -199,7 +195,7 @@ IdTable Bind::computeExpressionBind( for (auto& resultValue : resultGenerator) { outputColumn[i] = sparqlExpression::detail::constantExpressionResultToId( - std::move(resultValue), *outputLocalVocab); + std::move(resultValue), *localVocab); i++; checkCancellation(); } diff --git a/src/engine/Bind.h b/src/engine/Bind.h index eeaafaf3ed..34c515fb54 100644 --- a/src/engine/Bind.h +++ b/src/engine/Bind.h @@ -49,8 +49,7 @@ class Bind : public Operation { // Implementation for the binding of arbitrary expressions. IdTable computeExpressionBind( - LocalVocab* outputLocalVocab, IdTable idTable, - const LocalVocab& inputLocalVocab, + LocalVocab* localVocab, IdTable idTable, const sparqlExpression::SparqlExpression* expression) const; [[nodiscard]] VariableToColumnMap computeVariableToColumnMap() const override; diff --git a/src/engine/ExportQueryExecutionTrees.cpp b/src/engine/ExportQueryExecutionTrees.cpp index 873bd21ae7..32a874a51f 100644 --- a/src/engine/ExportQueryExecutionTrees.cpp +++ b/src/engine/ExportQueryExecutionTrees.cpp @@ -13,13 +13,15 @@ #include "util/http/MediaTypes.h" // __________________________________________________________________________ -cppcoro::generator ExportQueryExecutionTrees::getIdTables( - const Result& result) { +cppcoro::generator +ExportQueryExecutionTrees::getIdTables(const Result& result) { if (result.isFullyMaterialized()) { - co_yield result.idTable(); + TableConstRefWithVocab pair{result.idTable(), result.localVocab()}; + co_yield pair; } else { - for (const IdTable& idTable : result.idTables()) { - co_yield idTable; + for (const Result::IdTableVocabPair& pair : result.idTables()) { + TableConstRefWithVocab tableWithVocab{pair.idTable_, pair.localVocab_}; + co_yield tableWithVocab; } } } @@ -33,11 +35,14 @@ ExportQueryExecutionTrees::getRowIndices(LimitOffsetClause limitOffset, if (limitOffset._limit.value_or(1) == 0) { co_return; } - for (const IdTable& idTable : getIdTables(result)) { - uint64_t currentOffset = limitOffset.actualOffset(idTable.numRows()); - uint64_t upperBound = limitOffset.upperBound(idTable.numRows()); + for (TableConstRefWithVocab& tableWithVocab : getIdTables(result)) { + uint64_t currentOffset = + limitOffset.actualOffset(tableWithVocab.idTable_.numRows()); + uint64_t upperBound = + limitOffset.upperBound(tableWithVocab.idTable_.numRows()); if (currentOffset != upperBound) { - co_yield {idTable, std::views::iota(currentOffset, upperBound)}; + co_yield {std::move(tableWithVocab), + std::views::iota(currentOffset, upperBound)}; } limitOffset._offset -= currentOffset; if (limitOffset._limit.has_value()) { @@ -57,9 +62,10 @@ ExportQueryExecutionTrees::constructQueryResultToTriples( const ad_utility::sparql_types::Triples& constructTriples, LimitOffsetClause limitAndOffset, std::shared_ptr result, CancellationHandle cancellationHandle) { - for (const auto& [idTable, range] : getRowIndices(limitAndOffset, *result)) { + for (const auto& [pair, range] : getRowIndices(limitAndOffset, *result)) { + auto& idTable = pair.idTable_; for (uint64_t i : range) { - ConstructQueryExportContext context{i, idTable, result->localVocab(), + ConstructQueryExportContext context{i, idTable, pair.localVocab_, qet.getVariableColumns(), qet.getQec()->getIndex()}; using enum PositionInTriple; @@ -172,10 +178,10 @@ ExportQueryExecutionTrees::idTableToQLeverJSONBindings( std::shared_ptr result, CancellationHandle cancellationHandle) { AD_CORRECTNESS_CHECK(result != nullptr); - for (const auto& [idTable, range] : getRowIndices(limitAndOffset, *result)) { + for (const auto& [pair, range] : getRowIndices(limitAndOffset, *result)) { for (uint64_t rowIndex : range) { - co_yield idTableToQLeverJSONRow(qet, columns, result->localVocab(), - rowIndex, idTable) + co_yield idTableToQLeverJSONRow(qet, columns, pair.localVocab_, rowIndex, + pair.idTable_) .dump(); cancellationHandle->throwIfCancelled(); } @@ -405,14 +411,14 @@ ExportQueryExecutionTrees::selectQueryResultToStream( // special case : binary export of IdTable if constexpr (format == MediaType::octetStream) { - for (const auto& [idTable, range] : - getRowIndices(limitAndOffset, *result)) { + for (const auto& [pair, range] : getRowIndices(limitAndOffset, *result)) { for (uint64_t i : range) { for (const auto& columnIndex : selectedColumnIndices) { if (columnIndex.has_value()) { - co_yield std::string_view{reinterpret_cast(&idTable( - i, columnIndex.value().columnIndex_)), - sizeof(Id)}; + co_yield std::string_view{ + reinterpret_cast( + &pair.idTable_(i, columnIndex.value().columnIndex_)), + sizeof(Id)}; } } cancellationHandle->throwIfCancelled(); @@ -436,15 +442,15 @@ ExportQueryExecutionTrees::selectQueryResultToStream( constexpr auto& escapeFunction = format == MediaType::tsv ? RdfEscaping::escapeForTsv : RdfEscaping::escapeForCsv; - for (const auto& [idTable, range] : getRowIndices(limitAndOffset, *result)) { + for (const auto& [pair, range] : getRowIndices(limitAndOffset, *result)) { for (uint64_t i : range) { for (size_t j = 0; j < selectedColumnIndices.size(); ++j) { if (selectedColumnIndices[j].has_value()) { const auto& val = selectedColumnIndices[j].value(); - Id id = idTable(i, val.columnIndex_); + Id id = pair.idTable_(i, val.columnIndex_); auto optionalStringAndType = idToStringAndType( - qet.getQec()->getIndex(), id, result->localVocab(), + qet.getQec()->getIndex(), id, pair.localVocab_, escapeFunction); if (optionalStringAndType.has_value()) [[likely]] { co_yield optionalStringAndType.value().first; @@ -561,15 +567,15 @@ ad_utility::streams::stream_generator ExportQueryExecutionTrees:: auto selectedColumnIndices = qet.selectedVariablesToColumnIndices(selectClause, false); // TODO we could prefilter for the nonexisting variables. - for (const auto& [idTable, range] : getRowIndices(limitAndOffset, *result)) { + for (const auto& [pair, range] : getRowIndices(limitAndOffset, *result)) { for (uint64_t i : range) { co_yield "\n "; for (size_t j = 0; j < selectedColumnIndices.size(); ++j) { if (selectedColumnIndices[j].has_value()) { const auto& val = selectedColumnIndices[j].value(); - Id id = idTable(i, val.columnIndex_); + Id id = pair.idTable_(i, val.columnIndex_); co_yield idToXMLBinding(val.variable_, id, qet.getQec()->getIndex(), - result->localVocab()); + pair.localVocab_); } } co_yield "\n "; @@ -611,12 +617,13 @@ ad_utility::streams::stream_generator ExportQueryExecutionTrees:: co_return; } - auto getBinding = [&](const IdTable& idTable, const uint64_t& i) { + auto getBinding = [&](const IdTable& idTable, const uint64_t& i, + const LocalVocab& localVocab) { nlohmann::ordered_json binding = {}; for (const auto& column : columns) { - auto optionalStringAndType = idToStringAndType( - qet.getQec()->getIndex(), idTable(i, column->columnIndex_), - result->localVocab()); + auto optionalStringAndType = + idToStringAndType(qet.getQec()->getIndex(), + idTable(i, column->columnIndex_), localVocab); if (optionalStringAndType.has_value()) [[likely]] { const auto& [stringValue, xsdType] = optionalStringAndType.value(); binding[column->variable_] = @@ -627,12 +634,12 @@ ad_utility::streams::stream_generator ExportQueryExecutionTrees:: }; bool isFirstRow = true; - for (const auto& [idTable, range] : getRowIndices(limitAndOffset, *result)) { + for (const auto& [pair, range] : getRowIndices(limitAndOffset, *result)) { for (uint64_t i : range) { if (!isFirstRow) [[likely]] { co_yield ","; } - co_yield getBinding(idTable, i); + co_yield getBinding(pair.idTable_, i, pair.localVocab_); cancellationHandle->throwIfCancelled(); isFirstRow = false; } diff --git a/src/engine/ExportQueryExecutionTrees.h b/src/engine/ExportQueryExecutionTrees.h index 339e7b2cf5..d8a42b4d48 100644 --- a/src/engine/ExportQueryExecutionTrees.h +++ b/src/engine/ExportQueryExecutionTrees.h @@ -171,15 +171,23 @@ class ExportQueryExecutionTrees { const parsedQuery::SelectClause& selectClause, LimitOffsetClause limitAndOffset, CancellationHandle cancellationHandle); + // Public for testing. + public: + struct TableConstRefWithVocab { + const IdTable& idTable_; + const LocalVocab& localVocab_; + }; // Helper type that contains an `IdTable` and a view with related indices to // access the `IdTable` with. struct TableWithRange { - const IdTable& idTable_; + TableConstRefWithVocab tableWithVocab_; std::ranges::iota_view view_; }; + private: // Yield all `IdTables` provided by the given `result`. - static cppcoro::generator getIdTables(const Result& result); + static cppcoro::generator + getIdTables(const Result& result); // Return a range that contains the indices of the rows that have to be // exported from the `idTable` given the `LimitOffsetClause`. It takes into diff --git a/src/engine/Filter.cpp b/src/engine/Filter.cpp index 90690c9a70..79ef7572ad 100644 --- a/src/engine/Filter.cpp +++ b/src/engine/Filter.cpp @@ -48,47 +48,55 @@ ProtoResult Filter::computeResult(bool requestLaziness) { LOG(DEBUG) << "Filter result computation..." << endl; checkCancellation(); - auto localVocab = subRes->getSharedLocalVocab(); if (subRes->isFullyMaterialized()) { - IdTable result = filterIdTable(subRes, subRes->idTable()); + IdTable result = filterIdTable(subRes->sortedBy(), subRes->idTable(), + subRes->localVocab()); LOG(DEBUG) << "Filter result computation done." << endl; - return {std::move(result), resultSortedOn(), std::move(localVocab)}; + return {std::move(result), resultSortedOn(), subRes->getSharedLocalVocab()}; } if (requestLaziness) { - return {[](auto subRes, auto* self) -> cppcoro::generator { - for (IdTable& idTable : subRes->idTables()) { - IdTable result = self->filterIdTable(subRes, idTable); - co_yield result; + return {[](auto subRes, auto* self) -> Result::Generator { + for (auto& [idTable, localVocab] : subRes->idTables()) { + IdTable result = self->filterIdTable(subRes->sortedBy(), + idTable, localVocab); + co_yield {std::move(result), std::move(localVocab)}; } }(std::move(subRes), this), - resultSortedOn(), std::move(localVocab)}; + resultSortedOn()}; } // If we receive a generator of IdTables, we need to materialize it into a // single IdTable. size_t width = getSubtree().get()->getResultWidth(); IdTable result{width, getExecutionContext()->getAllocator()}; - ad_utility::callFixedSize(width, [this, &subRes, &result]() { - for (IdTable& idTable : subRes->idTables()) { - computeFilterImpl(result, idTable, subRes->localVocab(), - subRes->sortedBy()); - } - }); + std::vector localVocabs; + ad_utility::callFixedSize( + width, [this, &subRes, &result, &localVocabs]() { + for (Result::IdTableVocabPair& pair : subRes->idTables()) { + computeFilterImpl(result, pair.idTable_, pair.localVocab_, + subRes->sortedBy()); + localVocabs.emplace_back(std::move(pair.localVocab_)); + } + }); + + LocalVocab resultLocalVocab{}; + resultLocalVocab.mergeWith(localVocabs); LOG(DEBUG) << "Filter result computation done." << endl; - return {std::move(result), resultSortedOn(), std::move(localVocab)}; + return {std::move(result), resultSortedOn(), std::move(resultLocalVocab)}; } // _____________________________________________________________________________ -IdTable Filter::filterIdTable(const std::shared_ptr& subRes, - const IdTable& idTable) const { +IdTable Filter::filterIdTable(std::vector sortedBy, + const IdTable& idTable, + const LocalVocab& localVocab) const { size_t width = idTable.numColumns(); IdTable result{width, getExecutionContext()->getAllocator()}; CALL_FIXED_SIZE(width, &Filter::computeFilterImpl, this, result, idTable, - subRes->localVocab(), subRes->sortedBy()); + localVocab, std::move(sortedBy)); return result; } diff --git a/src/engine/Filter.h b/src/engine/Filter.h index 8bf10cb0db..eaa9303bb7 100644 --- a/src/engine/Filter.h +++ b/src/engine/Filter.h @@ -67,6 +67,7 @@ class Filter : public Operation { std::vector sortedBy) const; // Run `computeFilterImpl` on the provided IdTable - IdTable filterIdTable(const std::shared_ptr& subRes, - const IdTable& idTable) const; + IdTable filterIdTable(std::vector sortedBy, + const IdTable& idTable, + const LocalVocab& localVocab) const; }; diff --git a/src/engine/GroupBy.cpp b/src/engine/GroupBy.cpp index 2088a1220d..3c5399efc2 100644 --- a/src/engine/GroupBy.cpp +++ b/src/engine/GroupBy.cpp @@ -339,16 +339,6 @@ ProtoResult GroupBy::computeResult(bool requestLaziness) { LOG(DEBUG) << "GroupBy subresult computation done" << std::endl; - // Make a deep copy of the local vocab from `subresult` and then add to it (in - // case GROUP_CONCAT adds a new word or words). - // - // TODO: In most GROUP BY operations, nothing is added to the local - // vocabulary, so it would be more efficient to first share the pointer here - // (like with `shareLocalVocabFrom`) and only copy it when a new word is about - // to be added. Same for BIND. - - auto localVocab = subresult->getCopyOfLocalVocab(); - std::vector groupByColumns; // parse the group by columns @@ -369,6 +359,7 @@ ProtoResult GroupBy::computeResult(bool requestLaziness) { } if (useHashMapOptimization) { + auto localVocab = subresult->getCopyOfLocalVocab(); IdTable idTable = CALL_FIXED_SIZE( groupByCols.size(), &GroupBy::computeGroupByForHashMapOptimization, this, metadataForUnsequentialData->aggregateAliases_, @@ -383,23 +374,25 @@ ProtoResult GroupBy::computeResult(bool requestLaziness) { if (!subresult->isFullyMaterialized()) { AD_CORRECTNESS_CHECK(metadataForUnsequentialData.has_value()); - auto localVocabPointer = - std::make_shared(std::move(localVocab)); - cppcoro::generator generator = CALL_FIXED_SIZE( + Result::Generator generator = CALL_FIXED_SIZE( (std::array{inWidth, outWidth}), &GroupBy::computeResultLazily, this, std::move(subresult), std::move(aggregates), std::move(metadataForUnsequentialData).value().aggregateAliases_, - std::move(groupByCols), localVocabPointer, !requestLaziness); + std::move(groupByCols), !requestLaziness); return requestLaziness - ? ProtoResult{std::move(generator), resultSortedOn(), - std::move(localVocabPointer)} + ? ProtoResult{std::move(generator), resultSortedOn()} : ProtoResult{cppcoro::getSingleElement(std::move(generator)), - resultSortedOn(), std::move(*localVocabPointer)}; + resultSortedOn()}; } AD_CORRECTNESS_CHECK(subresult->idTable().numColumns() == inWidth); + // Make a copy of the local vocab. Note: the LocalVocab has reference + // semantics via `shared_ptr`, so no actual strings are copied here. + + auto localVocab = subresult->getCopyOfLocalVocab(); + IdTable idTable = CALL_FIXED_SIZE( (std::array{inWidth, outWidth}), &GroupBy::doGroupBy, this, subresult->idTable(), groupByCols, aggregates, &localVocab); @@ -474,14 +467,15 @@ void GroupBy::processEmptyImplicitGroup( // _____________________________________________________________________________ template -cppcoro::generator GroupBy::computeResultLazily( +Result::Generator GroupBy::computeResultLazily( std::shared_ptr subresult, std::vector aggregates, std::vector aggregateAliases, - std::vector groupByCols, std::shared_ptr localVocab, - bool singleIdTable) const { + std::vector groupByCols, bool singleIdTable) const { size_t inWidth = _subtree->getResultWidth(); AD_CONTRACT_CHECK(inWidth == IN_WIDTH || IN_WIDTH == 0); - LazyGroupBy lazyGroupBy{*localVocab, std::move(aggregateAliases), + LocalVocab currentLocalVocab{}; + std::vector storedLocalVocabs; + LazyGroupBy lazyGroupBy{currentLocalVocab, std::move(aggregateAliases), getExecutionContext()->getAllocator(), groupByCols.size()}; @@ -491,12 +485,14 @@ cppcoro::generator GroupBy::computeResultLazily( GroupBlock currentGroupBlock; - for (IdTable& idTable : subresult->idTables()) { + for (Result::IdTableVocabPair& pair : subresult->idTables()) { + auto& idTable = pair.idTable_; if (idTable.empty()) { continue; } AD_CORRECTNESS_CHECK(idTable.numColumns() == inWidth); checkCancellation(); + storedLocalVocabs.emplace_back(std::move(pair.localVocab_)); if (currentGroupBlock.empty()) { for (size_t col : groupByCols) { @@ -505,11 +501,11 @@ cppcoro::generator GroupBy::computeResultLazily( } sparqlExpression::EvaluationContext evaluationContext = - createEvaluationContext(*localVocab, idTable); + createEvaluationContext(currentLocalVocab, idTable); size_t lastBlockStart = searchBlockBoundaries( [this, &groupSplitAcrossTables, &lazyGroupBy, &evaluationContext, - &resultTable, ¤tGroupBlock, &aggregates, &localVocab, + &resultTable, ¤tGroupBlock, &aggregates, ¤tLocalVocab, &groupByCols](size_t blockStart, size_t blockEnd) { if (groupSplitAcrossTables) { lazyGroupBy.processBlock(evaluationContext, blockStart, blockEnd); @@ -521,7 +517,7 @@ cppcoro::generator GroupBy::computeResultLazily( IdTableStatic table = std::move(resultTable).toStatic(); processBlock(table, aggregates, evaluationContext, - blockStart, blockEnd, localVocab.get(), + blockStart, blockEnd, ¤tLocalVocab, groupByCols); resultTable = std::move(table).toDynamic(); } @@ -530,8 +526,16 @@ cppcoro::generator GroupBy::computeResultLazily( groupSplitAcrossTables = true; lazyGroupBy.processBlock(evaluationContext, lastBlockStart, idTable.size()); if (!singleIdTable && !resultTable.empty()) { - co_yield resultTable; + currentLocalVocab.mergeWith(storedLocalVocabs); + Result::IdTableVocabPair outputPair{std::move(resultTable), + std::move(currentLocalVocab)}; + co_yield outputPair; + // Reuse buffer if not moved out + resultTable = std::move(outputPair.idTable_); resultTable.clear(); + // Keep last local vocab for next commit. + currentLocalVocab = std::move(storedLocalVocabs.back()); + storedLocalVocabs.clear(); } } // No need for final commit when loop was never entered. @@ -539,11 +543,11 @@ cppcoro::generator GroupBy::computeResultLazily( // If we have an implicit group by we need to produce one result row if (groupByCols.empty()) { processEmptyImplicitGroup(resultTable, aggregates, - localVocab.get()); - co_yield resultTable; + ¤tLocalVocab); + co_yield {std::move(resultTable), std::move(currentLocalVocab)}; } else if (singleIdTable) { // Yield at least a single empty table if requested. - co_yield resultTable; + co_yield {std::move(resultTable), std::move(currentLocalVocab)}; } co_return; } @@ -560,9 +564,10 @@ cppcoro::generator GroupBy::computeResultLazily( } sparqlExpression::EvaluationContext evaluationContext = - createEvaluationContext(*localVocab, idTable); + createEvaluationContext(currentLocalVocab, idTable); lazyGroupBy.commitRow(resultTable, evaluationContext, currentGroupBlock); - co_yield resultTable; + currentLocalVocab.mergeWith(storedLocalVocabs); + co_yield {std::move(resultTable), std::move(currentLocalVocab)}; } // _____________________________________________________________________________ diff --git a/src/engine/GroupBy.h b/src/engine/GroupBy.h index d18781f1ca..acfc6feac2 100644 --- a/src/engine/GroupBy.h +++ b/src/engine/GroupBy.h @@ -140,12 +140,11 @@ class GroupBy : public Operation { // skipping empty tables unless `singleIdTable` is set which causes the // function to yield a single id table with the complete result. template - cppcoro::generator computeResultLazily( + Result::Generator computeResultLazily( std::shared_ptr subresult, std::vector aggregates, std::vector aggregateAliases, - std::vector groupByCols, std::shared_ptr localVocab, - bool singleIdTable) const; + std::vector groupByCols, bool singleIdTable) const; template void processGroup(const Aggregate& expression, diff --git a/src/engine/IndexScan.cpp b/src/engine/IndexScan.cpp index 8702b4c1f3..471206f80d 100644 --- a/src/engine/IndexScan.cpp +++ b/src/engine/IndexScan.cpp @@ -135,7 +135,7 @@ VariableToColumnMap IndexScan::computeVariableToColumnMap() const { } // _____________________________________________________________________________ -cppcoro::generator IndexScan::scanInChunks() const { +Result::Generator IndexScan::scanInChunks() const { auto metadata = getMetadataForScan(); if (!metadata.has_value()) { co_return; @@ -145,7 +145,7 @@ cppcoro::generator IndexScan::scanInChunks() const { std::vector blocks{blocksSpan.begin(), blocksSpan.end()}; for (IdTable& idTable : getLazyScan(std::move(blocks))) { - co_yield std::move(idTable); + co_yield {std::move(idTable), LocalVocab{}}; } } @@ -153,7 +153,7 @@ cppcoro::generator IndexScan::scanInChunks() const { ProtoResult IndexScan::computeResult(bool requestLaziness) { LOG(DEBUG) << "IndexScan result computation...\n"; if (requestLaziness) { - return {scanInChunks(), resultSortedOn(), LocalVocab{}}; + return {scanInChunks(), resultSortedOn()}; } IdTable idTable{getExecutionContext()->getAllocator()}; diff --git a/src/engine/IndexScan.h b/src/engine/IndexScan.h index bf73b22628..9e75c89d35 100644 --- a/src/engine/IndexScan.h +++ b/src/engine/IndexScan.h @@ -130,7 +130,7 @@ class IndexScan final : public Operation { VariableToColumnMap computeVariableToColumnMap() const override; - cppcoro::generator scanInChunks() const; + Result::Generator scanInChunks() const; // Helper functions for the public `getLazyScanFor...` functions (see above). Permutation::IdTableGenerator getLazyScan( diff --git a/src/engine/LocalVocab.cpp b/src/engine/LocalVocab.cpp index ee0285c532..afa603fba3 100644 --- a/src/engine/LocalVocab.cpp +++ b/src/engine/LocalVocab.cpp @@ -20,11 +20,11 @@ LocalVocab LocalVocab::clone() const { // _____________________________________________________________________________ LocalVocab LocalVocab::merge(std::span vocabs) { LocalVocab res; - auto inserter = std::back_inserter(res.otherWordSets_); - for (const auto* vocab : vocabs) { - std::ranges::copy(vocab->otherWordSets_, inserter); - *inserter = vocab->primaryWordSet_; - } + res.mergeWith(vocabs | + std::views::transform( + [](const LocalVocab* localVocab) -> const LocalVocab& { + return *localVocab; + })); return res; } diff --git a/src/engine/LocalVocab.h b/src/engine/LocalVocab.h index be9a7c4499..5e6c98296a 100644 --- a/src/engine/LocalVocab.h +++ b/src/engine/LocalVocab.h @@ -91,6 +91,17 @@ class LocalVocab { // of the `vocabs`. The primary word set of the newly created vocab is empty. static LocalVocab merge(std::span vocabs); + // Merge all passed local vocabs to keep alive all the words from each of the + // `vocabs`. + template + void mergeWith(const R& vocabs) { + auto inserter = std::back_inserter(otherWordSets_); + for (const auto& vocab : vocabs) { + std::ranges::copy(vocab.otherWordSets_, inserter); + *inserter = vocab.primaryWordSet_; + } + } + // Return all the words from all the word sets as a vector. std::vector getAllWordsForTesting() const; diff --git a/src/engine/Operation.cpp b/src/engine/Operation.cpp index c822943aac..64d2af2342 100644 --- a/src/engine/Operation.cpp +++ b/src/engine/Operation.cpp @@ -180,12 +180,14 @@ CacheValue Operation::runComputationAndPrepareForCache( AD_CONTRACT_CHECK(!pinned); result.cacheDuringConsumption( [maxSize = cache.getMaxSizeSingleEntry()]( - const std::optional& currentIdTable, - const IdTable& newIdTable) { - auto currentSize = currentIdTable.has_value() - ? CacheValue::getSize(currentIdTable.value()) - : 0_B; - return maxSize >= currentSize + CacheValue::getSize(newIdTable); + const std::optional& currentIdTablePair, + const Result::IdTableVocabPair& newIdTable) { + auto currentSize = + currentIdTablePair.has_value() + ? CacheValue::getSize(currentIdTablePair.value().idTable_) + : 0_B; + return maxSize >= + currentSize + CacheValue::getSize(newIdTable.idTable_); }, [runtimeInfo = getRuntimeInfoPointer(), &cache, cacheKey](Result aggregatedResult) { diff --git a/src/engine/Result.cpp b/src/engine/Result.cpp index d2aab66373..292a62069a 100644 --- a/src/engine/Result.cpp +++ b/src/engine/Result.cpp @@ -51,10 +51,11 @@ auto compareRowsBySortColumns(const std::vector& sortedBy) { // _____________________________________________________________________________ Result::Result(IdTable idTable, std::vector sortedBy, SharedLocalVocabWrapper localVocab) - : data_{std::move(idTable)}, - sortedBy_{std::move(sortedBy)}, - localVocab_{std::move(localVocab.localVocab_)} { - AD_CONTRACT_CHECK(localVocab_ != nullptr); + : data_{IdTableSharedLocalVocabPair{std::move(idTable), + std::move(localVocab.localVocab_)}}, + sortedBy_{std::move(sortedBy)} { + AD_CONTRACT_CHECK(std::get(data_).localVocab_ != + nullptr); assertSortOrderIsRespected(this->idTable(), sortedBy_); } @@ -65,40 +66,28 @@ Result::Result(IdTable idTable, std::vector sortedBy, SharedLocalVocabWrapper{std::move(localVocab)}} {} // _____________________________________________________________________________ -Result::Result(cppcoro::generator idTables, - std::vector sortedBy, - SharedLocalVocabWrapper localVocab) - : data_{GenContainer{ - [](auto idTables, auto sortedBy) -> cppcoro::generator { - std::optional previousId = std::nullopt; - for (IdTable& idTable : idTables) { - if (!idTable.empty()) { - if (previousId.has_value()) { - AD_EXPENSIVE_CHECK(!compareRowsBySortColumns(sortedBy)( - idTable.at(0), previousId.value())); - } - previousId = idTable.at(idTable.size() - 1); - } - assertSortOrderIsRespected(idTable, sortedBy); - co_yield std::move(idTable); - } - }(std::move(idTables), sortedBy)}}, - sortedBy_{std::move(sortedBy)}, - localVocab_{std::move(localVocab.localVocab_)} { - AD_CONTRACT_CHECK(localVocab_ != nullptr); -} - -// _____________________________________________________________________________ -Result::Result(cppcoro::generator idTables, - std::vector sortedBy, LocalVocab&& localVocab) - : Result{std::move(idTables), std::move(sortedBy), - SharedLocalVocabWrapper{std::move(localVocab)}} {} +Result::Result(IdTableVocabPair pair, std::vector sortedBy) + : Result{std::move(pair.idTable_), std::move(sortedBy), + std::move(pair.localVocab_)} {} // _____________________________________________________________________________ -Result::Result(cppcoro::generator idTables, - std::vector sortedBy, LocalVocabPtr localVocab) - : Result{std::move(idTables), std::move(sortedBy), - SharedLocalVocabWrapper{std::move(localVocab)}} {} +Result::Result(Generator idTables, std::vector sortedBy) + : data_{GenContainer{[](auto idTables, auto sortedBy) -> Generator { + std::optional previousId = std::nullopt; + for (IdTableVocabPair& pair : idTables) { + auto& idTable = pair.idTable_; + if (!idTable.empty()) { + if (previousId.has_value()) { + AD_EXPENSIVE_CHECK(!compareRowsBySortColumns(sortedBy)( + idTable.at(0), previousId.value())); + } + previousId = idTable.at(idTable.size() - 1); + } + assertSortOrderIsRespected(idTable, sortedBy); + co_yield pair; + } + }(std::move(idTables), sortedBy)}}, + sortedBy_{std::move(sortedBy)} {} // _____________________________________________________________________________ // Apply `LimitOffsetClause` to given `IdTable`. @@ -131,16 +120,17 @@ void Result::applyLimitOffset( } if (isFullyMaterialized()) { ad_utility::timer::Timer limitTimer{ad_utility::timer::Timer::Started}; - resizeIdTable(std::get(data_), limitOffset); + resizeIdTable(std::get(data_).idTable_, + limitOffset); limitTimeCallback(limitTimer.msecs(), idTable()); } else { - auto generator = [](cppcoro::generator original, - LimitOffsetClause limitOffset, - auto limitTimeCallback) -> cppcoro::generator { + auto generator = [](Generator original, LimitOffsetClause limitOffset, + auto limitTimeCallback) -> Generator { if (limitOffset._limit.value_or(1) == 0) { co_return; } - for (IdTable& idTable : original) { + for (IdTableVocabPair& pair : original) { + auto& idTable = pair.idTable_; ad_utility::timer::Timer limitTimer{ad_utility::timer::Timer::Started}; size_t originalSize = idTable.numRows(); resizeIdTable(idTable, limitOffset); @@ -152,7 +142,7 @@ void Result::applyLimitOffset( } limitTimeCallback(limitTimer.value(), idTable); if (limitOffset._offset == 0) { - co_yield idTable; + co_yield pair; } if (limitOffset._limit.value_or(1) == 0) { break; @@ -170,15 +160,14 @@ void Result::assertThatLimitWasRespected(const LimitOffsetClause& limitOffset) { auto limit = limitOffset._limit; AD_CONTRACT_CHECK(!limit.has_value() || numRows <= limit.value()); } else { - auto generator = - [](cppcoro::generator original, - LimitOffsetClause limitOffset) -> cppcoro::generator { + auto generator = [](Generator original, + LimitOffsetClause limitOffset) -> Generator { auto limit = limitOffset._limit; uint64_t elementCount = 0; - for (IdTable& idTable : original) { - elementCount += idTable.numRows(); + for (IdTableVocabPair& pair : original) { + elementCount += pair.idTable_.numRows(); AD_CONTRACT_CHECK(!limit.has_value() || elementCount <= limit.value()); - co_yield idTable; + co_yield pair; } AD_CONTRACT_CHECK(!limit.has_value() || elementCount <= limit.value()); }(std::move(idTables()), limitOffset); @@ -200,17 +189,17 @@ void Result::checkDefinedness(const VariableToColumnMap& varColMap) { }); }; if (isFullyMaterialized()) { - AD_EXPENSIVE_CHECK(performCheck(varColMap, std::get(data_))); + AD_EXPENSIVE_CHECK(performCheck( + varColMap, std::get(data_).idTable_)); } else { - auto generator = - [](cppcoro::generator original, - [[maybe_unused]] VariableToColumnMap varColMap, - [[maybe_unused]] auto performCheck) -> cppcoro::generator { - for (IdTable& idTable : original) { + auto generator = [](Generator original, + [[maybe_unused]] VariableToColumnMap varColMap, + [[maybe_unused]] auto performCheck) -> Generator { + for (IdTableVocabPair& pair : original) { // No need to check subsequent idTables assuming the datatypes // don't change mid result. - AD_EXPENSIVE_CHECK(performCheck(varColMap, idTable)); - co_yield idTable; + AD_EXPENSIVE_CHECK(performCheck(varColMap, pair.idTable_)); + co_yield pair; } }(std::move(idTables()), varColMap, std::move(performCheck)); data_.emplace(std::move(generator)); @@ -222,17 +211,17 @@ void Result::runOnNewChunkComputed( std::function onNewChunk, std::function onGeneratorFinished) { AD_CONTRACT_CHECK(!isFullyMaterialized()); - auto generator = [](cppcoro::generator original, auto onNewChunk, - auto onGeneratorFinished) -> cppcoro::generator { + auto generator = [](Generator original, auto onNewChunk, + auto onGeneratorFinished) -> Generator { // Call this within destructor to make sure it is also called when an // operation stops iterating before reaching the end. absl::Cleanup cleanup{ [&onGeneratorFinished]() { onGeneratorFinished(false); }}; try { ad_utility::timer::Timer timer{ad_utility::timer::Timer::Started}; - for (IdTable& idTable : original) { - onNewChunk(idTable, timer.value()); - co_yield idTable; + for (IdTableVocabPair& pair : original) { + onNewChunk(pair.idTable_, timer.value()); + co_yield pair; timer.start(); } } catch (...) { @@ -260,11 +249,11 @@ void Result::assertSortOrderIsRespected( // _____________________________________________________________________________ const IdTable& Result::idTable() const { AD_CONTRACT_CHECK(isFullyMaterialized()); - return std::get(data_); + return std::get(data_).idTable_; } // _____________________________________________________________________________ -cppcoro::generator& Result::idTables() const { +Result::Generator& Result::idTables() const { AD_CONTRACT_CHECK(!isFullyMaterialized()); const auto& container = std::get(data_); AD_CONTRACT_CHECK(!container.consumed_->exchange(true)); @@ -273,33 +262,40 @@ cppcoro::generator& Result::idTables() const { // _____________________________________________________________________________ bool Result::isFullyMaterialized() const noexcept { - return std::holds_alternative(data_); + return std::holds_alternative(data_); } // _____________________________________________________________________________ void Result::cacheDuringConsumption( - std::function&, const IdTable&)> + std::function&, + const IdTableVocabPair&)> fitInCache, std::function storeInCache) { AD_CONTRACT_CHECK(!isFullyMaterialized()); data_.emplace(ad_utility::wrapGeneratorWithCache( std::move(idTables()), - [fitInCache = std::move(fitInCache)](std::optional& aggregate, - const IdTable& newTable) { - bool doBothFitInCache = fitInCache(aggregate, newTable); + [fitInCache = std::move(fitInCache)]( + std::optional& aggregate, + const IdTableVocabPair& newTablePair) { + bool doBothFitInCache = fitInCache(aggregate, newTablePair); if (doBothFitInCache) { if (aggregate.has_value()) { - aggregate.value().insertAtEnd(newTable); + auto& value = aggregate.value(); + value.idTable_.insertAtEnd(newTablePair.idTable_); + value.localVocab_.mergeWith( + std::span{&newTablePair.localVocab_, 1}); } else { - aggregate.emplace(newTable.clone()); + aggregate.emplace(newTablePair.idTable_.clone(), + newTablePair.localVocab_.clone()); } } return doBothFitInCache; }, - [storeInCache = std::move(storeInCache), sortedBy = sortedBy_, - localVocab = localVocab_](IdTable idTable) mutable { - storeInCache(Result{std::move(idTable), std::move(sortedBy), - SharedLocalVocabWrapper{std::move(localVocab)}}); + [storeInCache = std::move(storeInCache), + sortedBy = sortedBy_](IdTableVocabPair pair) mutable { + storeInCache( + Result{std::move(pair.idTable_), std::move(sortedBy), + SharedLocalVocabWrapper{std::move(pair.localVocab_)}}); })); } diff --git a/src/engine/Result.h b/src/engine/Result.h index a119e05613..d3716fb268 100644 --- a/src/engine/Result.h +++ b/src/engine/Result.h @@ -22,16 +22,37 @@ // through a generator via `idTables()` when it is supposed to be lazily // evaluated. class Result { + public: + struct IdTableVocabPair { + IdTable idTable_; + LocalVocab localVocab_; + + // Explicit constructor to avoid problems with coroutines and temporaries. + // See https://gcc.gnu.org/bugzilla/show_bug.cgi?id=103909 for details. + IdTableVocabPair(IdTable idTable, LocalVocab localVocab) + : idTable_{std::move(idTable)}, localVocab_{std::move(localVocab)} {} + }; + + using Generator = cppcoro::generator; + private: // Needs to be mutable in order to be consumable from a const result. struct GenContainer { - mutable cppcoro::generator generator_; + mutable Generator generator_; mutable std::unique_ptr consumed_ = std::make_unique(false); - explicit GenContainer(cppcoro::generator generator) + explicit GenContainer(Generator generator) : generator_{std::move(generator)} {} }; - using Data = std::variant; + + using LocalVocabPtr = std::shared_ptr; + + struct IdTableSharedLocalVocabPair { + IdTable idTable_; + // The local vocabulary of the result. + LocalVocabPtr localVocab_; + }; + using Data = std::variant; // The actual entries. Data data_; @@ -39,11 +60,6 @@ class Result { // Empty if the result is not sorted on any column. std::vector sortedBy_; - using LocalVocabPtr = std::shared_ptr; - - // The local vocabulary of the result. - LocalVocabPtr localVocab_ = std::make_shared(); - // Note: If additional members and invariants are added to the class (for // example information about the datatypes in each column) make sure that // those remain valid after calling non-const function like @@ -92,12 +108,8 @@ class Result { SharedLocalVocabWrapper localVocab); Result(IdTable idTable, std::vector sortedBy, LocalVocab&& localVocab); - Result(cppcoro::generator idTables, - std::vector sortedBy, SharedLocalVocabWrapper localVocab); - Result(cppcoro::generator idTables, - std::vector sortedBy, LocalVocab&& localVocab); - Result(cppcoro::generator idTables, - std::vector sortedBy, LocalVocabPtr localVocab); + Result(IdTableVocabPair pair, std::vector sortedBy); + Result(Generator idTables, std::vector sortedBy); // Prevent accidental copying of a result table. Result(const Result& other) = delete; Result& operator=(const Result& other) = delete; @@ -131,7 +143,8 @@ class Result { // Throw an `ad_utility::Exception` if the underlying `data_` member holds the // wrong variant. void cacheDuringConsumption( - std::function&, const IdTable&)> + std::function&, + const IdTableVocabPair&)> fitInCache, std::function storeInCache); @@ -141,7 +154,7 @@ class Result { // Access to the underlying `IdTable`s. Throw an `ad_utility::Exception` // if the underlying `data_` member holds the wrong variant. - cppcoro::generator& idTables() const; + Generator& idTables() const; // Const access to the columns by which the `idTable()` is sorted. const std::vector& sortedBy() const { return sortedBy_; } @@ -157,12 +170,17 @@ class Result { // Filter::computeFilterImpl (evaluationContext) // Variable::evaluate (idToStringAndType) // - const LocalVocab& localVocab() const { return *localVocab_; } + const LocalVocab& localVocab() const { + AD_CONTRACT_CHECK(isFullyMaterialized()); + return *std::get(data_).localVocab_; + } // Get the local vocab as a shared pointer to const. This can be used if one // result has the same local vocab as one of its child results. SharedLocalVocabWrapper getSharedLocalVocab() const { - return SharedLocalVocabWrapper{localVocab_}; + AD_CONTRACT_CHECK(isFullyMaterialized()); + return SharedLocalVocabWrapper{ + std::get(data_).localVocab_}; } // Like `getSharedLocalVocabFrom`, but takes more than one result and merges @@ -176,7 +194,7 @@ class Result { static SharedLocalVocabWrapper getMergedLocalVocab(R&& subResults) { std::vector vocabs; for (const Result& table : subResults) { - vocabs.push_back(std::to_address(table.localVocab_)); + vocabs.push_back(&table.localVocab()); } return SharedLocalVocabWrapper{LocalVocab::merge(vocabs)}; } diff --git a/src/engine/Service.cpp b/src/engine/Service.cpp index 9533a69e4f..4587066ba1 100644 --- a/src/engine/Service.cpp +++ b/src/engine/Service.cpp @@ -186,15 +186,12 @@ ProtoResult Service::computeResultImpl([[maybe_unused]] bool requestLaziness) { // Note: The `body`-generator also keeps the complete response connection // alive, so we have no lifetime issue here(see `HttpRequest::send` for // details). - auto localVocabPtr = std::make_shared(); - auto generator = computeResultLazily(expVariableKeys, std::move(body), - localVocabPtr, !requestLaziness); - + auto generator = + computeResultLazily(expVariableKeys, std::move(body), !requestLaziness); return requestLaziness - ? ProtoResult{std::move(generator), resultSortedOn(), - std::move(localVocabPtr)} + ? ProtoResult{std::move(generator), resultSortedOn()} : ProtoResult{cppcoro::getSingleElement(std::move(generator)), - resultSortedOn(), std::move(*localVocabPtr)}; + resultSortedOn()}; } template @@ -242,10 +239,10 @@ void Service::writeJsonResult(const std::vector& vars, } // ____________________________________________________________________________ -cppcoro::generator Service::computeResultLazily( +Result::Generator Service::computeResultLazily( const std::vector vars, - ad_utility::LazyJsonParser::Generator body, - std::shared_ptr localVocab, bool singleIdTable) { + ad_utility::LazyJsonParser::Generator body, bool singleIdTable) { + LocalVocab localVocab{}; IdTable idTable{getResultWidth(), getExecutionContext()->getAllocator()}; size_t rowIdx = 0; @@ -260,10 +257,15 @@ cppcoro::generator Service::computeResultLazily( } CALL_FIXED_SIZE(getResultWidth(), &Service::writeJsonResult, this, vars, - partJson, &idTable, localVocab.get(), rowIdx); + partJson, &idTable, &localVocab, rowIdx); if (!singleIdTable) { - co_yield idTable; + Result::IdTableVocabPair pair{std::move(idTable), + std::move(localVocab)}; + co_yield pair; + // Move back to reuse buffer if not moved out. + idTable = std::move(pair.idTable_); idTable.clear(); + localVocab = LocalVocab{}; rowIdx = 0; } resultExists = true; @@ -294,7 +296,7 @@ cppcoro::generator Service::computeResultLazily( } if (singleIdTable) { - co_yield idTable; + co_yield {std::move(idTable), std::move(localVocab)}; } } diff --git a/src/engine/Service.h b/src/engine/Service.h index d11b0191ba..41aa9e5e56 100644 --- a/src/engine/Service.h +++ b/src/engine/Service.h @@ -148,8 +148,7 @@ class Service : public Operation { // Compute the result lazy as IdTable generator. // If the `singleIdTable` flag is set, the result is yielded as one idTable. - cppcoro::generator computeResultLazily( + Result::Generator computeResultLazily( const std::vector vars, - ad_utility::LazyJsonParser::Generator body, - std::shared_ptr localVocab, bool singleIdTable); + ad_utility::LazyJsonParser::Generator body, bool singleIdTable); }; diff --git a/src/engine/Union.cpp b/src/engine/Union.cpp index 07ef2a401e..0fac7b31f3 100644 --- a/src/engine/Union.cpp +++ b/src/engine/Union.cpp @@ -167,10 +167,8 @@ ProtoResult Union::computeResult(bool requestLaziness) { _subtrees[1]->getResult(requestLaziness); if (requestLaziness) { - auto localVocab = std::make_shared(); - auto generator = - computeResultLazily(std::move(subRes1), std::move(subRes2), localVocab); - return {std::move(generator), resultSortedOn(), std::move(localVocab)}; + return {computeResultLazily(std::move(subRes1), std::move(subRes2)), + resultSortedOn()}; } LOG(DEBUG) << "Union subresult computation done." << std::endl; @@ -257,29 +255,29 @@ IdTable Union::transformToCorrectColumnFormat( } // _____________________________________________________________________________ -cppcoro::generator Union::computeResultLazily( +Result::Generator Union::computeResultLazily( std::shared_ptr result1, - std::shared_ptr result2, - std::shared_ptr localVocab) const { + std::shared_ptr result2) const { std::vector permutation = computePermutation(); if (result1->isFullyMaterialized()) { - co_yield transformToCorrectColumnFormat(result1->idTable().clone(), - permutation); + co_yield { + transformToCorrectColumnFormat(result1->idTable().clone(), permutation), + result1->getCopyOfLocalVocab()}; } else { - for (IdTable& idTable : result1->idTables()) { - co_yield transformToCorrectColumnFormat(std::move(idTable), permutation); + for (auto& [idTable, localVocab] : result1->idTables()) { + co_yield {transformToCorrectColumnFormat(std::move(idTable), permutation), + std::move(localVocab)}; } } permutation = computePermutation(); if (result2->isFullyMaterialized()) { - co_yield transformToCorrectColumnFormat(result2->idTable().clone(), - permutation); + co_yield { + transformToCorrectColumnFormat(result2->idTable().clone(), permutation), + result2->getCopyOfLocalVocab()}; } else { - for (IdTable& idTable : result2->idTables()) { - co_yield transformToCorrectColumnFormat(std::move(idTable), permutation); + for (auto& [idTable, localVocab] : result2->idTables()) { + co_yield {transformToCorrectColumnFormat(std::move(idTable), permutation), + std::move(localVocab)}; } } - std::array vocabs{&result1->localVocab(), - &result2->localVocab()}; - *localVocab = LocalVocab::merge(vocabs); } diff --git a/src/engine/Union.h b/src/engine/Union.h index 782c94e469..e71702315f 100644 --- a/src/engine/Union.h +++ b/src/engine/Union.h @@ -81,8 +81,7 @@ class Union : public Operation { // Create a generator that yields the `IdTable` for the left or right child // one after another and apply a potential differing permutation to it. Write // the merged LocalVocab to the given `LocalVocab` object at the end. - cppcoro::generator computeResultLazily( + Result::Generator computeResultLazily( std::shared_ptr result1, - std::shared_ptr result2, - std::shared_ptr localVocab) const; + std::shared_ptr result2) const; }; diff --git a/test/ExportQueryExecutionTreesTest.cpp b/test/ExportQueryExecutionTreesTest.cpp index ed8482d66c..83f08b2722 100644 --- a/test/ExportQueryExecutionTreesTest.cpp +++ b/test/ExportQueryExecutionTreesTest.cpp @@ -226,10 +226,12 @@ static const std::string xmlTrailer = "\n\n"; // Helper function for easier testing of the `IdTable` generator. std::vector convertToVector( - cppcoro::generator generator) { + cppcoro::generator + generator) { std::vector result; - for (const IdTable& idTable : generator) { - result.push_back(idTable.clone()); + for (const ExportQueryExecutionTrees::TableConstRefWithVocab& pair : + generator) { + result.push_back(pair.idTable_.clone()); } return result; } @@ -239,11 +241,11 @@ auto matchesIdTables(const auto&... tables) { return ElementsAre(matchesIdTable(tables)...); } -// Template is only required because inner class is not visible -template -std::vector convertToVector(cppcoro::generator generator) { +std::vector convertToVector( + cppcoro::generator generator) { std::vector result; - for (const auto& [idTable, range] : generator) { + for (const auto& [pair, range] : generator) { + const auto& idTable = pair.idTable_; result.emplace_back(idTable.numColumns(), idTable.getAllocator()); result.back().insertAtEnd(idTable.begin() + *range.begin(), idTable.begin() + *(range.end() - 1) + 1); @@ -1227,13 +1229,13 @@ TEST(ExportQueryExecutionTrees, getIdTablesMirrorsGenerator) { IdTable idTable1 = makeIdTableFromVector({{1}, {2}, {3}}); IdTable idTable2 = makeIdTableFromVector({{42}, {1337}}); auto tableGenerator = [](IdTable idTableA, - IdTable idTableB) -> cppcoro::generator { - co_yield idTableA; + IdTable idTableB) -> Result::Generator { + co_yield {std::move(idTableA), LocalVocab{}}; - co_yield idTableB; + co_yield {std::move(idTableB), LocalVocab{}}; }(idTable1.clone(), idTable2.clone()); - Result result{std::move(tableGenerator), {}, LocalVocab{}}; + Result result{std::move(tableGenerator), {}}; auto generator = ExportQueryExecutionTrees::getIdTables(result); EXPECT_THAT(convertToVector(std::move(generator)), @@ -1242,12 +1244,13 @@ TEST(ExportQueryExecutionTrees, getIdTablesMirrorsGenerator) { // _____________________________________________________________________________ TEST(ExportQueryExecutionTrees, ensureCorrectSlicingOfSingleIdTable) { - auto tableGenerator = []() -> cppcoro::generator { - IdTable idTable1 = makeIdTableFromVector({{1}, {2}, {3}}); - co_yield idTable1; + auto tableGenerator = []() -> Result::Generator { + Result::IdTableVocabPair pair1{makeIdTableFromVector({{1}, {2}, {3}}), + LocalVocab{}}; + co_yield pair1; }(); - Result result{std::move(tableGenerator), {}, LocalVocab{}}; + Result result{std::move(tableGenerator), {}}; auto generator = ExportQueryExecutionTrees::getRowIndices( LimitOffsetClause{._limit = 1, ._offset = 1}, result); @@ -1259,15 +1262,17 @@ TEST(ExportQueryExecutionTrees, ensureCorrectSlicingOfSingleIdTable) { // _____________________________________________________________________________ TEST(ExportQueryExecutionTrees, ensureCorrectSlicingOfIdTablesWhenFirstIsSkipped) { - auto tableGenerator = []() -> cppcoro::generator { - IdTable idTable1 = makeIdTableFromVector({{1}, {2}, {3}}); - co_yield idTable1; - - IdTable idTable2 = makeIdTableFromVector({{4}, {5}}); - co_yield idTable2; + auto tableGenerator = []() -> Result::Generator { + Result::IdTableVocabPair pair1{makeIdTableFromVector({{1}, {2}, {3}}), + LocalVocab{}}; + co_yield pair1; + + Result::IdTableVocabPair pair2{makeIdTableFromVector({{4}, {5}}), + LocalVocab{}}; + co_yield pair2; }(); - Result result{std::move(tableGenerator), {}, LocalVocab{}}; + Result result{std::move(tableGenerator), {}}; auto generator = ExportQueryExecutionTrees::getRowIndices( LimitOffsetClause{._limit = std::nullopt, ._offset = 3}, result); @@ -1280,15 +1285,17 @@ TEST(ExportQueryExecutionTrees, // _____________________________________________________________________________ TEST(ExportQueryExecutionTrees, ensureCorrectSlicingOfIdTablesWhenLastIsSkipped) { - auto tableGenerator = []() -> cppcoro::generator { - IdTable idTable1 = makeIdTableFromVector({{1}, {2}, {3}}); - co_yield idTable1; - - IdTable idTable2 = makeIdTableFromVector({{4}, {5}}); - co_yield idTable2; + auto tableGenerator = []() -> Result::Generator { + Result::IdTableVocabPair pair1{makeIdTableFromVector({{1}, {2}, {3}}), + LocalVocab{}}; + co_yield pair1; + + Result::IdTableVocabPair pair2{makeIdTableFromVector({{4}, {5}}), + LocalVocab{}}; + co_yield pair2; }(); - Result result{std::move(tableGenerator), {}, LocalVocab{}}; + Result result{std::move(tableGenerator), {}}; auto generator = ExportQueryExecutionTrees::getRowIndices( LimitOffsetClause{._limit = 3}, result); @@ -1301,15 +1308,17 @@ TEST(ExportQueryExecutionTrees, // _____________________________________________________________________________ TEST(ExportQueryExecutionTrees, ensureCorrectSlicingOfIdTablesWhenFirstAndSecondArePartial) { - auto tableGenerator = []() -> cppcoro::generator { - IdTable idTable1 = makeIdTableFromVector({{1}, {2}, {3}}); - co_yield idTable1; - - IdTable idTable2 = makeIdTableFromVector({{4}, {5}}); - co_yield idTable2; + auto tableGenerator = []() -> Result::Generator { + Result::IdTableVocabPair pair1{makeIdTableFromVector({{1}, {2}, {3}}), + LocalVocab{}}; + co_yield pair1; + + Result::IdTableVocabPair pair2{makeIdTableFromVector({{4}, {5}}), + LocalVocab{}}; + co_yield pair2; }(); - Result result{std::move(tableGenerator), {}, LocalVocab{}}; + Result result{std::move(tableGenerator), {}}; auto generator = ExportQueryExecutionTrees::getRowIndices( LimitOffsetClause{._limit = 3, ._offset = 1}, result); @@ -1323,18 +1332,21 @@ TEST(ExportQueryExecutionTrees, // _____________________________________________________________________________ TEST(ExportQueryExecutionTrees, ensureCorrectSlicingOfIdTablesWhenFirstAndLastArePartial) { - auto tableGenerator = []() -> cppcoro::generator { - IdTable idTable1 = makeIdTableFromVector({{1}, {2}, {3}}); - co_yield idTable1; - - IdTable idTable2 = makeIdTableFromVector({{4}, {5}}); - co_yield idTable2; - - IdTable idTable3 = makeIdTableFromVector({{6}, {7}, {8}, {9}}); - co_yield idTable3; + auto tableGenerator = []() -> Result::Generator { + Result::IdTableVocabPair pair1{makeIdTableFromVector({{1}, {2}, {3}}), + LocalVocab{}}; + co_yield pair1; + + Result::IdTableVocabPair pair2{makeIdTableFromVector({{4}, {5}}), + LocalVocab{}}; + co_yield pair2; + + Result::IdTableVocabPair pair3{makeIdTableFromVector({{6}, {7}, {8}, {9}}), + LocalVocab{}}; + co_yield pair3; }(); - Result result{std::move(tableGenerator), {}, LocalVocab{}}; + Result result{std::move(tableGenerator), {}}; auto generator = ExportQueryExecutionTrees::getRowIndices( LimitOffsetClause{._limit = 5, ._offset = 2}, result); @@ -1350,28 +1362,29 @@ TEST(ExportQueryExecutionTrees, // _____________________________________________________________________________ TEST(ExportQueryExecutionTrees, ensureGeneratorIsNotConsumedWhenNotRequired) { { - auto throwingGenerator = []() -> cppcoro::generator { + auto throwingGenerator = []() -> Result::Generator { ADD_FAILURE() << "Generator was started" << std::endl; throw std::runtime_error("Generator was started"); co_return; }(); - Result result{std::move(throwingGenerator), {}, LocalVocab{}}; + Result result{std::move(throwingGenerator), {}}; auto generator = ExportQueryExecutionTrees::getRowIndices( LimitOffsetClause{._limit = 0, ._offset = 0}, result); EXPECT_NO_THROW(convertToVector(std::move(generator))); } { - auto throwAfterYieldGenerator = []() -> cppcoro::generator { - IdTable idTable1 = makeIdTableFromVector({{1}}); - co_yield idTable1; + auto throwAfterYieldGenerator = []() -> Result::Generator { + Result::IdTableVocabPair pair1{makeIdTableFromVector({{1}}), + LocalVocab{}}; + co_yield pair1; ADD_FAILURE() << "Generator was resumed" << std::endl; throw std::runtime_error("Generator was resumed"); }(); - Result result{std::move(throwAfterYieldGenerator), {}, LocalVocab{}}; + Result result{std::move(throwAfterYieldGenerator), {}}; auto generator = ExportQueryExecutionTrees::getRowIndices( LimitOffsetClause{._limit = 1, ._offset = 0}, result); IdTable referenceTable1 = makeIdTableFromVector({{1}}); diff --git a/test/FilterTest.cpp b/test/FilterTest.cpp index 36915261b7..e683503548 100644 --- a/test/FilterTest.cpp +++ b/test/FilterTest.cpp @@ -18,10 +18,14 @@ namespace { ValueId asBool(bool value) { return Id::makeFromBool(value); } // Convert a generator to a vector for easier comparison in assertions -std::vector toVector(cppcoro::generator generator) { +std::vector toVector(Result::Generator generator) { std::vector result; - for (auto& table : generator) { - result.push_back(std::move(table)); + for (auto& pair : generator) { + // IMPORTANT: The `LocalVocab` contained in the pair will be destroyed at + // the end of the iteration. The underlying assumption is that the + // `LocalVocab` will be empty and the `IdTable` won't contain any dangling + // references. + result.push_back(std::move(pair.idTable_)); } return result; } diff --git a/test/GroupByTest.cpp b/test/GroupByTest.cpp index bbfc0ae6e7..bb307fecd8 100644 --- a/test/GroupByTest.cpp +++ b/test/GroupByTest.cpp @@ -1952,10 +1952,10 @@ class GroupByLazyFixture : public ::testing::TestWithParam { ASSERT_NE(result.isFullyMaterialized(), lazyResult); if (lazyResult) { size_t counter = 0; - for (const IdTable& idTable : result.idTables()) { + for (const Result::IdTableVocabPair& pair : result.idTables()) { ASSERT_LT(counter, idTables.size()) << "Too many idTables yielded. Expected: " << idTables.size(); - EXPECT_EQ(idTables.at(counter), idTable); + EXPECT_EQ(idTables.at(counter), pair.idTable_); ++counter; } EXPECT_EQ(counter, idTables.size()) @@ -2130,10 +2130,9 @@ TEST_P(GroupByLazyFixture, nestedAggregateFunctionsWork) { auto result = groupBy.computeResultOnlyForTesting(GetParam()); // Acquire the local vocab index for a given string representation if present. - auto makeEntry = [&result](std::string string) { - return result.localVocab().getIndexOrNullopt( - sparqlExpression::detail::LiteralOrIri{ - L::fromStringRepresentation(std::move(string))}); + auto makeEntry = [](std::string string, const LocalVocab& localVocab) { + return localVocab.getIndexOrNullopt(sparqlExpression::detail::LiteralOrIri{ + L::fromStringRepresentation(std::move(string))}); }; auto entryToId = [](std::optional entry) { @@ -2144,30 +2143,30 @@ TEST_P(GroupByLazyFixture, nestedAggregateFunctionsWork) { auto i = IntId; if (GetParam()) { - EXPECT_EQ(result.localVocab().size(), 0); - auto& generator = result.idTables(); auto iterator = generator.begin(); ASSERT_NE(iterator, generator.end()); - EXPECT_EQ(result.localVocab().size(), 2); - auto entry1 = makeEntry("\"1---\""); - auto entry2 = makeEntry("\"6---\""); - EXPECT_EQ(*iterator, makeIdTableFromVector({{i(0), entryToId(entry1)}, - {i(1), entryToId(entry2)}})); + EXPECT_EQ(iterator->localVocab_.size(), 2); + auto entry1 = makeEntry("\"1---\"", iterator->localVocab_); + auto entry2 = makeEntry("\"6---\"", iterator->localVocab_); + EXPECT_EQ(iterator->idTable_, + makeIdTableFromVector( + {{i(0), entryToId(entry1)}, {i(1), entryToId(entry2)}})); ++iterator; ASSERT_NE(iterator, generator.end()); - EXPECT_EQ(result.localVocab().size(), 3); - auto entry3 = makeEntry("\"8---\""); - EXPECT_EQ(*iterator, makeIdTableFromVector({{i(2), entryToId(entry3)}})); + EXPECT_EQ(iterator->localVocab_.size(), 1); + auto entry3 = makeEntry("\"8---\"", iterator->localVocab_); + EXPECT_EQ(iterator->idTable_, + makeIdTableFromVector({{i(2), entryToId(entry3)}})); EXPECT_EQ(++iterator, generator.end()); } else { EXPECT_EQ(result.localVocab().size(), 3); - auto entry1 = makeEntry("\"1---\""); - auto entry2 = makeEntry("\"6---\""); - auto entry3 = makeEntry("\"8---\""); + auto entry1 = makeEntry("\"1---\"", result.localVocab()); + auto entry2 = makeEntry("\"6---\"", result.localVocab()); + auto entry3 = makeEntry("\"8---\"", result.localVocab()); ASSERT_TRUE(entry1.has_value()); ASSERT_TRUE(entry2.has_value()); diff --git a/test/OperationTest.cpp b/test/OperationTest.cpp index e0f7797597..5b3591dea6 100644 --- a/test/OperationTest.cpp +++ b/test/OperationTest.cpp @@ -406,18 +406,18 @@ TEST(Operation, ensureSignalUpdateIsOnlyCalledEvery50msAndAtTheEnd) { index, &cache, makeAllocator(ad_utility::MemorySize::megabytes(100)), SortPerformanceEstimator{}, [&](std::string) { ++updateCallCounter; }}; CustomGeneratorOperation operation{ - &context, [](const IdTable& idTable) -> cppcoro::generator { + &context, [](const IdTable& idTable) -> Result::Generator { std::this_thread::sleep_for(50ms); - co_yield idTable.clone(); + co_yield {idTable.clone(), LocalVocab{}}; // This one should not trigger because it's below the 50ms threshold std::this_thread::sleep_for(30ms); - co_yield idTable.clone(); + co_yield {idTable.clone(), LocalVocab{}}; std::this_thread::sleep_for(30ms); - co_yield idTable.clone(); + co_yield {idTable.clone(), LocalVocab{}}; // This one should not trigger directly, but trigger because it's the // last one std::this_thread::sleep_for(30ms); - co_yield idTable.clone(); + co_yield {idTable.clone(), LocalVocab{}}; }(idTable)}; ad_utility::Timer timer{ad_utility::Timer::InitialStatus::Started}; @@ -449,9 +449,9 @@ TEST(Operation, ensureSignalUpdateIsCalledAtTheEndOfPartialConsumption) { index, &cache, makeAllocator(ad_utility::MemorySize::megabytes(100)), SortPerformanceEstimator{}, [&](std::string) { ++updateCallCounter; }}; CustomGeneratorOperation operation{ - &context, [](const IdTable& idTable) -> cppcoro::generator { - co_yield idTable.clone(); - co_yield idTable.clone(); + &context, [](const IdTable& idTable) -> Result::Generator { + co_yield {idTable.clone(), LocalVocab{}}; + co_yield {idTable.clone(), LocalVocab{}}; }(idTable)}; { @@ -523,7 +523,8 @@ TEST(Operation, ensureLazyOperationIsCachedIfSmallEnough) { timer, ComputationMode::LAZY_IF_SUPPORTED, "test", false); EXPECT_FALSE(qec->getQueryTreeCache().cacheContains("test")); - for ([[maybe_unused]] IdTable& _ : cacheValue.resultTable().idTables()) { + for ([[maybe_unused]] Result::IdTableVocabPair& _ : + cacheValue.resultTable().idTables()) { } auto aggregatedValue = qec->getQueryTreeCache().getIfContained("test"); @@ -575,7 +576,8 @@ TEST(Operation, checkLazyOperationIsNotCachedIfTooLarge) { EXPECT_FALSE(qec->getQueryTreeCache().cacheContains("test")); qec->getQueryTreeCache().setMaxSizeSingleEntry(originalSize); - for ([[maybe_unused]] IdTable& _ : cacheValue.resultTable().idTables()) { + for ([[maybe_unused]] Result::IdTableVocabPair& _ : + cacheValue.resultTable().idTables()) { } EXPECT_FALSE(qec->getQueryTreeCache().cacheContains("test")); @@ -597,7 +599,8 @@ TEST(Operation, checkLazyOperationIsNotCachedIfUnlikelyToFitInCache) { timer, ComputationMode::LAZY_IF_SUPPORTED, "test", false); EXPECT_FALSE(qec->getQueryTreeCache().cacheContains("test")); - for ([[maybe_unused]] IdTable& _ : cacheValue.resultTable().idTables()) { + for ([[maybe_unused]] Result::IdTableVocabPair& _ : + cacheValue.resultTable().idTables()) { } EXPECT_FALSE(qec->getQueryTreeCache().cacheContains("test")); diff --git a/test/ResultTest.cpp b/test/ResultTest.cpp index a03b3034db..0dfc6e8bc1 100644 --- a/test/ResultTest.cpp +++ b/test/ResultTest.cpp @@ -17,9 +17,8 @@ using ::testing::Values; namespace { // Helper function to generate all possible splits of an IdTable in order to // exhaustively test generator variants. -std::vector> getAllSubSplits( - const IdTable& idTable) { - std::vector> result; +std::vector getAllSubSplits(const IdTable& idTable) { + std::vector result; for (size_t i = 0; i < std::pow(idTable.size() - 1, 2); ++i) { std::vector reverseIndex{}; size_t copy = i; @@ -29,54 +28,48 @@ std::vector> getAllSubSplits( } copy /= 2; } - result.push_back( - [](auto split, IdTable clone) -> cppcoro::generator { - IdTable subSplit{clone.numColumns(), - ad_utility::makeUnlimitedAllocator()}; - size_t splitIndex = 0; - for (size_t i = 0; i < clone.size(); ++i) { - subSplit.push_back(clone[i]); - if (splitIndex < split.size() && split[splitIndex] == i) { - co_yield subSplit; - subSplit.clear(); - ++splitIndex; - } - } - if (subSplit.size() > 0) { - co_yield subSplit; - } - }(std::move(reverseIndex), idTable.clone())); + result.push_back([](auto split, IdTable clone) -> Result::Generator { + IdTable subSplit{clone.numColumns(), + ad_utility::makeUnlimitedAllocator()}; + size_t splitIndex = 0; + for (size_t i = 0; i < clone.size(); ++i) { + subSplit.push_back(clone[i]); + if (splitIndex < split.size() && split[splitIndex] == i) { + Result::IdTableVocabPair pair{std::move(subSplit), LocalVocab{}}; + co_yield pair; + // Move back if not moved out to reuse buffer. + subSplit = std::move(pair.idTable_); + subSplit.clear(); + ++splitIndex; + } + } + if (subSplit.size() > 0) { + co_yield {std::move(subSplit), LocalVocab{}}; + } + }(std::move(reverseIndex), idTable.clone())); } return result; } // _____________________________________________________________________________ -void consumeGenerator(cppcoro::generator& generator) { - for ([[maybe_unused]] IdTable& _ : generator) { +void consumeGenerator(Result::Generator& generator) { + for ([[maybe_unused]] Result::IdTableVocabPair& _ : generator) { } } } // namespace TEST(Result, verifyIdTableThrowsWhenActuallyLazy) { - Result result1{ - []() -> cppcoro::generator { co_return; }(), {}, LocalVocab{}}; - EXPECT_FALSE(result1.isFullyMaterialized()); - EXPECT_THROW(result1.idTable(), ad_utility::Exception); - - Result result2{[]() -> cppcoro::generator { co_return; }(), - {}, - result1.getSharedLocalVocab()}; - EXPECT_FALSE(result2.isFullyMaterialized()); - EXPECT_THROW(result2.idTable(), ad_utility::Exception); + Result result{[]() -> Result::Generator { co_return; }(), {}}; + EXPECT_FALSE(result.isFullyMaterialized()); + EXPECT_THROW(result.idTable(), ad_utility::Exception); } // _____________________________________________________________________________ TEST(Result, verifyIdTableThrowsOnSecondAccess) { - const Result result{ - []() -> cppcoro::generator { co_return; }(), {}, LocalVocab{}}; + const Result result{[]() -> Result::Generator { co_return; }(), {}}; // First access should work - for ([[maybe_unused]] IdTable& _ : result.idTables()) { + for ([[maybe_unused]] Result::IdTableVocabPair& _ : result.idTables()) { ADD_FAILURE() << "Generator is empty"; } // Now it should throw @@ -108,7 +101,7 @@ TEST_P(ResultSortTest, verifyAssertSortOrderIsRespectedSucceedsWhenSorted) { auto idTable = makeIdTableFromVector({{1, 6, 0}, {2, 5, 0}, {3, 4, 0}}); for (auto& generator : getAllSubSplits(idTable)) { - Result result{std::move(generator), std::get<1>(GetParam()), LocalVocab{}}; + Result result{std::move(generator), std::get<1>(GetParam())}; if (std::get<0>(GetParam())) { EXPECT_NO_THROW(consumeGenerator(result.idTables())); } else { @@ -147,7 +140,7 @@ TEST(Result, (Result{idTable.clone(), {3}, LocalVocab{}}), matcher, Exception); for (auto& generator : getAllSubSplits(idTable)) { - Result result{std::move(generator), {3}, LocalVocab{}}; + Result result{std::move(generator), {3}}; AD_EXPECT_THROW_WITH_MESSAGE_AND_TYPE(consumeGenerator(result.idTables()), matcher, Exception); } @@ -156,7 +149,7 @@ TEST(Result, (Result{idTable.clone(), {2, 1337}, LocalVocab{}}), matcher, Exception); for (auto& generator : getAllSubSplits(idTable)) { - Result result{std::move(generator), {2, 1337}, LocalVocab{}}; + Result result{std::move(generator), {2, 1337}}; AD_EXPECT_THROW_WITH_MESSAGE_AND_TYPE(consumeGenerator(result.idTables()), matcher, Exception); } @@ -178,17 +171,15 @@ TEST(Result, verifyRunOnNewChunkComputedFiresCorrectly) { auto idTable2 = makeIdTableFromVector({{3, 4, 0}}); auto idTable3 = makeIdTableFromVector({{1, 6, 0}, {2, 5, 0}, {3, 4, 0}}); - Result result{ - [](auto& t1, auto& t2, auto& t3) -> cppcoro::generator { - std::this_thread::sleep_for(1ms); - co_yield t1; - std::this_thread::sleep_for(3ms); - co_yield t2; - std::this_thread::sleep_for(5ms); - co_yield t3; - }(idTable1, idTable2, idTable3), - {}, - LocalVocab{}}; + Result result{[](auto& t1, auto& t2, auto& t3) -> Result::Generator { + std::this_thread::sleep_for(1ms); + co_yield {t1.clone(), LocalVocab{}}; + std::this_thread::sleep_for(3ms); + co_yield {t2.clone(), LocalVocab{}}; + std::this_thread::sleep_for(5ms); + co_yield {t3.clone(), LocalVocab{}}; + }(idTable1, idTable2, idTable3), + {}}; uint32_t callCounter = 0; bool finishedConsuming = false; @@ -196,13 +187,13 @@ TEST(Result, verifyRunOnNewChunkComputedFiresCorrectly) { [&](const IdTable& idTable, std::chrono::microseconds duration) { ++callCounter; if (callCounter == 1) { - EXPECT_EQ(&idTable1, &idTable); + EXPECT_EQ(idTable1, idTable); EXPECT_GE(duration, 1ms); } else if (callCounter == 2) { - EXPECT_EQ(&idTable2, &idTable); + EXPECT_EQ(idTable2, idTable); EXPECT_GE(duration, 3ms); } else if (callCounter == 3) { - EXPECT_EQ(&idTable3, &idTable); + EXPECT_EQ(idTable3, idTable); EXPECT_GE(duration, 5ms); } }, @@ -220,12 +211,11 @@ TEST(Result, verifyRunOnNewChunkComputedFiresCorrectly) { // _____________________________________________________________________________ TEST(Result, verifyRunOnNewChunkCallsFinishOnError) { Result result{ - []() -> cppcoro::generator { + []() -> Result::Generator { throw std::runtime_error{"verifyRunOnNewChunkCallsFinishOnError"}; co_return; }(), - {}, - LocalVocab{}}; + {}}; uint32_t callCounterGenerator = 0; uint32_t callCounterFinished = 0; @@ -252,11 +242,10 @@ TEST(Result, verifyRunOnNewChunkCallsFinishOnPartialConsumption) { uint32_t callCounterFinished = 0; { - Result result{[](IdTable idTable) -> cppcoro::generator { - co_yield idTable; + Result result{[](IdTable idTable) -> Result::Generator { + co_yield {std::move(idTable), LocalVocab{}}; }(makeIdTableFromVector({{}})), - {}, - LocalVocab{}}; + {}}; result.runOnNewChunkComputed( [&](const IdTable&, std::chrono::microseconds) { @@ -277,11 +266,11 @@ TEST(Result, verifyRunOnNewChunkCallsFinishOnPartialConsumption) { // _____________________________________________________________________________ TEST(Result, verifyCacheDuringConsumptionThrowsWhenFullyMaterialized) { Result result{makeIdTableFromVector({{}}), {}, LocalVocab{}}; - EXPECT_THROW( - result.cacheDuringConsumption( - [](const std::optional&, const IdTable&) { return true; }, - [](Result) {}), - ad_utility::Exception); + EXPECT_THROW(result.cacheDuringConsumption( + [](const std::optional&, + const Result::IdTableVocabPair&) { return true; }, + [](Result) {}), + ad_utility::Exception); } // _____________________________________________________________________________ @@ -290,16 +279,17 @@ TEST(Result, verifyCacheDuringConsumptionRespectsPassedParameters) { // Test positive case for (auto& generator : getAllSubSplits(idTable)) { - Result result{std::move(generator), {0}, LocalVocab{}}; + Result result{std::move(generator), {0}}; result.cacheDuringConsumption( - [predictedSize = 0](const std::optional& aggregator, - const IdTable& newTable) mutable { + [predictedSize = 0]( + const std::optional& aggregator, + const Result::IdTableVocabPair& newTable) mutable { if (aggregator.has_value()) { - EXPECT_EQ(aggregator.value().numColumns(), predictedSize); + EXPECT_EQ(aggregator.value().idTable_.numColumns(), predictedSize); } else { EXPECT_EQ(predictedSize, 0); } - predictedSize += newTable.numColumns(); + predictedSize += newTable.idTable_.numColumns(); return true; }, [&](Result aggregatedResult) { @@ -312,9 +302,10 @@ TEST(Result, verifyCacheDuringConsumptionRespectsPassedParameters) { // Test negative case for (auto& generator : getAllSubSplits(idTable)) { uint32_t callCounter = 0; - Result result{std::move(generator), {}, LocalVocab{}}; + Result result{std::move(generator), {}}; result.cacheDuringConsumption( - [&](const std::optional& aggregator, const IdTable&) { + [&](const std::optional& aggregator, + const Result::IdTableVocabPair&) { EXPECT_FALSE(aggregator.has_value()); ++callCounter; return false; @@ -346,7 +337,7 @@ TEST(Result, verifyApplyLimitOffsetDoesCorrectlyApplyLimitAndOffset) { for (auto& generator : getAllSubSplits(idTable)) { std::vector colSizes{}; uint32_t totalRows = 0; - Result result{std::move(generator), {}, LocalVocab{}}; + Result result{std::move(generator), {}}; result.applyLimitOffset( limitOffset, [&](std::chrono::microseconds, const IdTable& innerTable) { // NOTE: duration can't be tested here, processors are too fast @@ -365,7 +356,7 @@ TEST(Result, verifyApplyLimitOffsetDoesCorrectlyApplyLimitAndOffset) { EXPECT_TRUE(colSizes.empty()); for (const auto& innerTable : result.idTables()) { - for (const auto& row : innerTable) { + for (const auto& row : innerTable.idTable_) { ASSERT_EQ(row.size(), 2); // Make sure we never get values that were supposed to be filtered // out. @@ -396,7 +387,7 @@ TEST(Result, verifyApplyLimitOffsetHandlesZeroLimitCorrectly) { for (auto& generator : getAllSubSplits(idTable)) { uint32_t callCounter = 0; - Result result{std::move(generator), {}, LocalVocab{}}; + Result result{std::move(generator), {}}; result.applyLimitOffset( limitOffset, [&](std::chrono::microseconds, const IdTable&) { ++callCounter; }); @@ -424,7 +415,7 @@ TEST(Result, verifyApplyLimitOffsetHandlesNonZeroOffsetWithoutLimitCorrectly) { for (auto& generator : getAllSubSplits(idTable)) { uint32_t callCounter = 0; - Result result{std::move(generator), {}, LocalVocab{}}; + Result result{std::move(generator), {}}; result.applyLimitOffset( limitOffset, [&](std::chrono::microseconds, const IdTable& innerTable) { for (const auto& row : innerTable) { @@ -457,7 +448,7 @@ TEST(Result, verifyApplyLimitOffsetIsNoOpWhenLimitClauseIsRedundant) { } for (auto& generator : getAllSubSplits(idTable)) { - Result result{std::move(generator), {}, LocalVocab{}}; + Result result{std::move(generator), {}}; result.applyLimitOffset( limitOffset, [&](std::chrono::microseconds, const IdTable&) { ++callCounter; }); @@ -487,7 +478,7 @@ TEST_P(ResultLimitTest, } for (auto& generator : getAllSubSplits(idTable)) { - Result result{std::move(generator), {}, LocalVocab{}}; + Result result{std::move(generator), {}}; result.assertThatLimitWasRespected(std::get<1>(GetParam())); if (std::get<0>(GetParam())) { @@ -538,7 +529,7 @@ TEST_P(ResultDefinednessTest, } } for (auto& generator : getAllSubSplits(*std::get<1>(GetParam()))) { - Result result{std::move(generator), {}, LocalVocab{}}; + Result result{std::move(generator), {}}; result.checkDefinedness(map); if (std::get<0>(GetParam())) { EXPECT_NO_THROW(consumeGenerator(result.idTables())); diff --git a/test/ServiceTest.cpp b/test/ServiceTest.cpp index ff66ca9938..8ee80703e7 100644 --- a/test/ServiceTest.cpp +++ b/test/ServiceTest.cpp @@ -213,30 +213,42 @@ TEST_F(ServiceTest, computeResult) { // compute resulting idTable IdTable idTable{2, ad_utility::testing::makeAllocator()}; - for (auto& row : result.idTables()) { - idTable.insertAtEnd(row); + std::vector localVocabs{}; + for (auto& pair : result.idTables()) { + idTable.insertAtEnd(pair.idTable_); + localVocabs.emplace_back(std::move(pair.localVocab_)); } // create expected idTable - const auto& localVocab = result.localVocab(); - auto get = [&localVocab](const std::string& s) { - return localVocab.getIndexOrNullopt( - ad_utility::triple_component::LiteralOrIri::iriref(s)); + auto get = + [&localVocabs]( + const std::string& s) -> std::optional { + for (const LocalVocab& localVocab : localVocabs) { + auto index = localVocab.getIndexOrNullopt( + ad_utility::triple_component::LiteralOrIri::iriref(s)); + if (index.has_value()) { + return index; + } + } + return std::nullopt; }; std::vector> idVector; std::map ids; + size_t indexCounter = 0; for (auto& row : expIdTableVector) { auto& idVecRow = idVector.emplace_back(); for (auto& e : row) { if (!ids.contains(e)) { - auto idx = get(absl::StrCat("<", e, ">")); - ASSERT_TRUE(idx); + auto str = absl::StrCat("<", e, ">"); + auto idx = get(str); + ASSERT_TRUE(idx) << '\'' << str << "' not in local vocab"; ids.insert({e, Id::makeFromLocalVocabIndex(idx.value())}); + ++indexCounter; } idVecRow.emplace_back(ids.at(e)); } } - EXPECT_EQ(localVocab.size(), ids.size()); + EXPECT_EQ(indexCounter, ids.size()); EXPECT_EQ(idTable, makeIdTableFromVector(idVector)); }; diff --git a/test/UnionTest.cpp b/test/UnionTest.cpp index 7d563e60c7..b0f52f0420 100644 --- a/test/UnionTest.cpp +++ b/test/UnionTest.cpp @@ -103,11 +103,11 @@ TEST(Union, computeUnionLazy) { auto iterator = result.begin(); ASSERT_NE(iterator, result.end()); - ASSERT_EQ(*iterator, expected1); + ASSERT_EQ(iterator->idTable_, expected1); ++iterator; ASSERT_NE(iterator, result.end()); - ASSERT_EQ(*iterator, expected2); + ASSERT_EQ(iterator->idTable_, expected2); ASSERT_EQ(++iterator, result.end()); }; @@ -142,11 +142,11 @@ TEST(Union, ensurePermutationIsAppliedCorrectly) { auto iterator = result.begin(); ASSERT_NE(iterator, result.end()); - ASSERT_EQ(*iterator, expected1); + ASSERT_EQ(iterator->idTable_, expected1); ++iterator; ASSERT_NE(iterator, result.end()); - ASSERT_EQ(*iterator, expected2); + ASSERT_EQ(iterator->idTable_, expected2); ASSERT_EQ(++iterator, result.end()); } diff --git a/test/engine/BindTest.cpp b/test/engine/BindTest.cpp index d0f4309f56..cecec3c4c7 100644 --- a/test/engine/BindTest.cpp +++ b/test/engine/BindTest.cpp @@ -44,7 +44,7 @@ void expectBindYieldsIdTable( auto& idTables = result->idTables(); auto iterator = idTables.begin(); ASSERT_NE(iterator, idTables.end()); - EXPECT_EQ(*iterator, expected); + EXPECT_EQ(iterator->idTable_, expected); EXPECT_EQ(++iterator, idTables.end()); } } @@ -125,9 +125,9 @@ TEST( auto& idTables = result->idTables(); auto iterator = idTables.begin(); ASSERT_NE(iterator, idTables.end()); - EXPECT_EQ(*iterator, table); + EXPECT_EQ(iterator->idTable_, table); ASSERT_NE(++iterator, idTables.end()); - EXPECT_EQ(*iterator, makeIdTableFromVector({{val, val}})); + EXPECT_EQ(iterator->idTable_, makeIdTableFromVector({{val, val}})); EXPECT_EQ(++iterator, idTables.end()); } } diff --git a/test/engine/IndexScanTest.cpp b/test/engine/IndexScanTest.cpp index 255973b38c..58091931cd 100644 --- a/test/engine/IndexScanTest.cpp +++ b/test/engine/IndexScanTest.cpp @@ -485,8 +485,8 @@ TEST(IndexScan, computeResultCanBeConsumedLazily) { IdTable resultTable{3, ad_utility::makeUnlimitedAllocator()}; - for (IdTable& idTable : result.idTables()) { - resultTable.insertAtEnd(idTable); + for (Result::IdTableVocabPair& pair : result.idTables()) { + resultTable.insertAtEnd(pair.idTable_); } EXPECT_EQ(resultTable, @@ -505,7 +505,7 @@ TEST(IndexScan, computeResultReturnsEmptyGeneratorIfScanIsEmpty) { ASSERT_FALSE(result.isFullyMaterialized()); - for ([[maybe_unused]] IdTable& idTable : result.idTables()) { + for ([[maybe_unused]] Result::IdTableVocabPair& pair : result.idTables()) { ADD_FAILURE() << "Generator should be empty" << std::endl; } } diff --git a/test/engine/ValuesForTesting.h b/test/engine/ValuesForTesting.h index 70ee183540..6ebec0fc9e 100644 --- a/test/engine/ValuesForTesting.h +++ b/test/engine/ValuesForTesting.h @@ -88,12 +88,13 @@ class ValuesForTesting : public Operation { for (const IdTable& idTable : tables_) { clones.push_back(idTable.clone()); } - auto generator = [](auto idTables) -> cppcoro::generator { + auto generator = [](auto idTables, + LocalVocab localVocab) -> Result::Generator { for (IdTable& idTable : idTables) { - co_yield std::move(idTable); + co_yield {std::move(idTable), localVocab.clone()}; } - }(std::move(clones)); - return {std::move(generator), resultSortedOn(), localVocab_.clone()}; + }(std::move(clones), localVocab_.clone()); + return {std::move(generator), resultSortedOn()}; } std::optional optionalTable; if (tables_.size() > 1) { diff --git a/test/util/OperationTestHelpers.h b/test/util/OperationTestHelpers.h index 480b76536a..a2d0fdc816 100644 --- a/test/util/OperationTestHelpers.h +++ b/test/util/OperationTestHelpers.h @@ -106,19 +106,19 @@ class AlwaysFailOperation : public Operation { if (!requestLaziness) { throw std::runtime_error{"AlwaysFailOperation"}; } - return {[]() -> cppcoro::generator { + return {[]() -> Result::Generator { throw std::runtime_error{"AlwaysFailOperation"}; // Required so that the exception only occurs within the generator co_return; }(), - resultSortedOn(), LocalVocab{}}; + resultSortedOn()}; } }; // Lazy operation that will yield a result with a custom generator you can // provide via the constructor. class CustomGeneratorOperation : public Operation { - cppcoro::generator generator_; + Result::Generator generator_; std::vector getChildren() override { return {}; } string getCacheKeyImpl() const override { AD_FAIL(); } string getDescriptor() const override { @@ -134,11 +134,11 @@ class CustomGeneratorOperation : public Operation { public: CustomGeneratorOperation(QueryExecutionContext* context, - cppcoro::generator generator) + Result::Generator generator) : Operation{context}, generator_{std::move(generator)} {} ProtoResult computeResult(bool requestLaziness) override { AD_CONTRACT_CHECK(requestLaziness); - return {std::move(generator_), resultSortedOn(), LocalVocab{}}; + return {std::move(generator_), resultSortedOn()}; } };