Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: implements websearch_to_tsquery #1936

Merged
merged 28 commits into from
Sep 19, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
28 commits
Select commit Hold shift + click to select a range
b180c7b
fix: adds migration to store correct data type
as1729 Sep 14, 2023
bb5ff1e
Merge branch '_staging' into as/1829-fix-opportunity-status
TylerHendrickson Sep 15, 2023
ada7712
Merge branch '_staging' into as/1829-fix-opportunity-status
as1729 Sep 15, 2023
fc6e06c
Merge branch '_staging' into as/1829-fix-opportunity-status
as1729 Sep 15, 2023
3fbf24d
fix: removes migration as it was added separately
as1729 Sep 15, 2023
d2ad2c2
fix: opportunity_status filteration
as1729 Sep 15, 2023
d99b698
Merge branch '_staging' into as/1829-fix-opportunity-status
as1729 Sep 15, 2023
ccab740
feat: implements websearch_to_tsquery
as1729 Sep 15, 2023
fe8300e
chore: merge latest _staging
as1729 Sep 18, 2023
40323e3
Merge branch '_staging' into as/1829-fix-phrase-query
as1729 Sep 18, 2023
edf8dff
fix: parameters passed into tsq
as1729 Sep 18, 2023
4b0bee0
chore: add log to debug test failure
as1729 Sep 18, 2023
3800f5b
chore: try something different to
as1729 Sep 18, 2023
5ae540e
chore: test again
as1729 Sep 18, 2023
8d6ccc3
chore: words work now try with phrase
as1729 Sep 18, 2023
a8c991d
chore: add colon to the query
as1729 Sep 18, 2023
ff3e645
chore: add spaces
as1729 Sep 18, 2023
8b340e4
fix: try a few ways
as1729 Sep 18, 2023
2892ce1
chore: prints the response
as1729 Sep 18, 2023
2f43344
chore: try removing the colon
as1729 Sep 18, 2023
af82016
fix: removes colon for good
as1729 Sep 18, 2023
d72f379
fix: ensure FE does not send special chars
as1729 Sep 18, 2023
c8ce338
fix: check for existence before processing keyword
as1729 Sep 18, 2023
1d4a5bf
fix: check existence
as1729 Sep 18, 2023
7b6b302
fix: check existence correctly
as1729 Sep 18, 2023
2a216e4
fix: array checking
as1729 Sep 18, 2023
ed787a7
fix: duplicate group-by columns
as1729 Sep 19, 2023
7c1c2a1
Merge branch '_staging' into as/1829-fix-phrase-query
as1729 Sep 19, 2023
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions packages/client/src/store/modules/grants.js
Original file line number Diff line number Diff line change
Expand Up @@ -56,8 +56,8 @@ function buildGrantsNextQuery({ filters, ordering, pagination }) {
*/
const criteria = { ...filters };
// Validate and fix the inputs into appropriate types.
criteria.includeKeywords = criteria.includeKeywords && criteria.includeKeywords.length > 0 ? criteria.includeKeywords.split(',').map((k) => k.trim()) : null;
criteria.excludeKeywords = criteria.excludeKeywords && criteria.excludeKeywords.length > 0 ? criteria.excludeKeywords.split(',').map((k) => k.trim()) : null;
criteria.includeKeywords = criteria.includeKeywords && criteria.includeKeywords.length > 0 ? criteria.includeKeywords.split(',').map((k) => k.trim().replace(/[^\w\s]/gi, '')) : null;
criteria.excludeKeywords = criteria.excludeKeywords && criteria.excludeKeywords.length > 0 ? criteria.excludeKeywords.split(',').map((k) => k.trim().replace(/[^\w\s]/gi, '')) : null;
criteria.eligibility = criteria.eligibility?.map((e) => e.code);
criteria.fundingTypes = criteria.fundingTypes?.map((f) => f.code);
criteria.bill = criteria.bill === 'All Bills' ? null : criteria.bill;
Expand Down
3 changes: 2 additions & 1 deletion packages/server/__tests__/api/grants.test.js
Original file line number Diff line number Diff line change
Expand Up @@ -466,7 +466,7 @@ describe('`/api/grants` endpoint', () => {
context('GET /api/grants/exportCSVNew', () => {
it('produces correct column format', async () => {
// We constrain the result to a single grant that's listed in seeds/dev/ref/grants.js
const query = '?criteria[includeKeywords]=Community%20Health%20Aide Program:%20%20Tribal%20Planning';
const query = '?criteria[includeKeywords]=Community Health Aide Program Tribal';
const response = await fetchApi(`/grants/exportCSVNew${query}`, agencies.own, fetchOptions.staff);

expect(response.statusText).to.equal('OK');
Expand All @@ -490,6 +490,7 @@ describe('`/api/grants` endpoint', () => {
'URL',
];
const txt = await response.text();

expect(txt.split('\n')[0]).to.equal(expectedCsvHeaders.join(','));
expect(txt.split('\n')[1]).to.contain('HHS-2021-IHS-TPI-0001,Community Health Aide Program: Tribal Planning &');
});
Expand Down
150 changes: 38 additions & 112 deletions packages/server/src/db/index.js
Original file line number Diff line number Diff line change
Expand Up @@ -381,90 +381,56 @@ async function buildPaginationParams(args) {
return { currentPage, perPage, isLengthAware };
}

function orIncludeMap(keywords) {
return keywords.map((kw) => `(${kw})`).join(' | ');
function isValidArray(value) {
return Array.isArray(value) && value.length > 0;
}

function andExcludeMap(keywords) {
return keywords.map((kw) => `!(${kw})`).join(' & ');
}
function buildTsqExpression(includeKeywords, excludeKeywords) {
if (!isValidArray(includeKeywords) && !isValidArray(excludeKeywords)) {
return null;
}

function splitKeywords(keywords) {
const result = { phrases: [], words: [] };
const signedKeywords = { include: [], exclude: [] };

if (!keywords) {
return result;
// wrap phrases in double quotes and ensure keywords have the correct operator
if (isValidArray(includeKeywords)) {
includeKeywords.forEach((ik) => { if (ik.indexOf(' ') > 0) { signedKeywords.include.push(`"${ik}"`); } else { signedKeywords.include.push(ik); } });
}

for (const keyword of keywords) {
if (keyword.indexOf(' ') > 0) {
result.phrases.push(keyword);
} else {
result.words.push(keyword);
}
if (isValidArray(excludeKeywords)) {
excludeKeywords.forEach((ek) => { if (ek.indexOf(' ') > 0) { signedKeywords.exclude.push(`-"${ek}"`); } else { signedKeywords.exclude.push(`-${ek}`); } });
}

return result;
}

function buildTsqExpression(includeKeywords, excludeKeywords) {
const keywords = {
include: { phrases: [], words: [] },
exclude: { phrases: [], words: [] },
expressions: { phrase: '', word: '' },
};
const validExpressions = [];

// filter out the keywords as words or phrases
keywords.include = splitKeywords(includeKeywords, keywords.include);
keywords.exclude = splitKeywords(excludeKeywords, keywords.exclude);

// For each type of keyword, build the relevant tsquery expression
if (keywords.include.phrases.length > 0) {
keywords.expressions.phrase = orIncludeMap(keywords.include.phrases);
}
if (keywords.include.words.length > 0) {
keywords.expressions.word = orIncludeMap(keywords.include.words);
}
if (keywords.exclude.phrases.length > 0) {
keywords.expressions.phrase = `${keywords.expressions.phrase}${keywords.expressions.phrase ? ' & ' : ''}${andExcludeMap(keywords.exclude.phrases)}`;
const includeExpression = signedKeywords.include.join(' or ');
if (includeExpression.length > 0) {
validExpressions.push(includeExpression);
}
if (keywords.exclude.words.length > 0) {
keywords.expressions.word = `${keywords.expressions.word}${keywords.expressions.word ? ' & ' : ''}${andExcludeMap(keywords.exclude.words)}`;
const excludeExpression = signedKeywords.exclude.join(' ');
if (excludeExpression.length > 0) {
validExpressions.push(excludeExpression);
}

return keywords.expressions;
const phrase = validExpressions.join(' ');

return phrase;
}

function buildKeywordQuery(queryBuilder, includeKeywords, excludeKeywords, orderingParams) {
const tsqExpression = buildTsqExpression(includeKeywords, excludeKeywords);
if (tsqExpression.phrase) {
queryBuilder.joinRaw(`cross join phraseto_tsquery('english', ?) as tsqp`, tsqExpression.phrase);
if (tsqExpression) {
queryBuilder.joinRaw(`cross join websearch_to_tsquery('english', ?) as tsqp`, tsqExpression);
queryBuilder.andWhere((q) => {
q.where('tsqp', '@@', knex.raw('title_ts'))
.orWhere('tsqp', '@@', knex.raw('description_ts'));
return q;
});
if (orderingParams.orderBy !== undefined) {
queryBuilder.select(
knex.raw(`ts_rank(title_ts, tsqp) as rank_title_phrase`),
knex.raw(`ts_rank(grants.description_ts, tsqp) as rank_description_phrase`),
);
queryBuilder.groupBy('rank_title_phrase', 'rank_description_phrase');
}
}
if (tsqExpression.word) {
queryBuilder.joinRaw(`cross join to_tsquery('english', ?) as tsq`, tsqExpression.word);
queryBuilder.andWhere((q) => {
q.where('tsq', '@@', knex.raw('title_ts'))
.orWhere('tsq', '@@', knex.raw('description_ts'));
return q;
});
if (orderingParams.orderBy !== undefined) {
queryBuilder.select(
knex.raw(`ts_rank(title_ts, tsq) as rank_title_word`),
knex.raw(`ts_rank(grants.description_ts, tsq) as rank_description_word`),
knex.raw(`ts_rank(title_ts, tsqp) as rank_title`),
knex.raw(`ts_rank(grants.description_ts, tsqp) as rank_description`),
);
queryBuilder.groupBy('rank_title_word', 'rank_description_word');
queryBuilder.groupBy('rank_title', 'rank_description');
}
}
}
Expand Down Expand Up @@ -546,25 +512,10 @@ function grantsQuery(queryBuilder, filters, agencyId, orderingParams, pagination
queryBuilder.orderBy(`${TABLES.grants_viewed}.grant_id`, orderArgs[1]);
queryBuilder.orderBy(`${TABLES.grants}.grant_id`, orderArgs[1]);
} else if (orderingParams.orderBy.includes('rank')) {
const rankColumns = new Set();
for (const statement of queryBuilder._statements) { // eslint-disable-line no-underscore-dangle
if (statement.grouping === 'columns') {
for (const val of statement.value) {
if (val && val.sql) {
if (val.sql.includes('rank_title_word')) {
rankColumns.add({ column: 'rank_title_word', order: 'desc' });
} else if (val.sql.includes('rank_description_word')) {
rankColumns.add({ column: 'rank_description_word', order: 'desc' });
} else if (val.sql.includes('rank_title_phrase')) {
rankColumns.add({ column: 'rank_title_phrase', order: 'desc' });
} else if (val.sql.includes('rank_description_phrase')) {
rankColumns.add({ column: 'rank_description_phrase', order: 'desc' });
}
}
}
}
}
queryBuilder.orderBy([...rankColumns]);
queryBuilder.orderBy([
{ column: 'rank_title', order: 'desc' },
{ column: 'rank_description', order: 'desc' },
]);
} else {
const orderArgs = orderingParams.orderBy.split('|');
const orderDirection = ((orderingParams.orderDesc === 'true') ? 'desc' : 'asc');
Expand Down Expand Up @@ -676,42 +627,17 @@ async function getGrantsNew(filters, paginationParams, orderingParams, tenantId,
'grants.description_ts',
'grants.funding_instrument_codes',
'grants.bill',
'grants.grant_number',
'grants.title',
'grants.status',
'grants.agency_code',
'grants.award_ceiling',
'grants.cost_sharing',
'grants.cfda_list',
'grants.open_date',
'grants.close_date',
'grants.reviewer_name',
'grants.opportunity_category',
'grants.search_terms',
'grants.notes',
'grants.created_at',
'grants.updated_at',
'grants.description',
'grants.eligibility_codes',
'grants.raw_body',
'grants.award_floor',
'grants.revision_id',
'grants.title_ts',
'grants.description_ts',
'grants.funding_instrument_codes',
'grants.bill',
);

const counts = await knex.with('filtered_grants', (qb) => {
qb.modify((q) => grantsQuery(q, filters, agencyId, { orderBy: undefined }, null))
.select([
'grants.grant_id',
'grants.open_date',
'grants.close_date',
'grants.archive_date',
])
.from('grants')
qb.select([
'grants.grant_id',
'grants.open_date',
'grants.close_date',
'grants.archive_date',
]).from('grants')
.groupBy('grants.grant_id', 'grants.open_date', 'grants.close_date', 'grants.archive_date');
qb.modify((q) => grantsQuery(q, filters, agencyId, { orderBy: undefined }, null));
}).countDistinct('filtered_grants.grant_id as total_grants').from('filtered_grants');

const pagination = {
Expand Down
Loading