Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

parser: add separate grammar for > and < date operators #162

Open
wants to merge 1 commit into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 8 additions & 0 deletions inspire_query_parser/ast.py
Original file line number Diff line number Diff line change
Expand Up @@ -168,6 +168,14 @@ class GreaterThanOp(UnaryOp):
pass


class GreaterThanDateOp(UnaryOp):
pass


class LessThanDateOp(UnaryOp):
pass


class LessThanOp(UnaryOp):
pass

Expand Down
28 changes: 22 additions & 6 deletions inspire_query_parser/parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -367,7 +367,7 @@ class SimpleValueWithColonUnit(SimpleValueUnit):


class SimpleDateValueUnit(LeafRule):
grammar = re.compile(r"[\d*\-\.\/]{4,10}(?=($|\s|\)))", re.UNICODE)
grammar = re.compile(r"[\d*\-\.\/\_]{1,10}(?=($|\s|\)))", re.UNICODE)
date_specifiers_regex = re.compile(r"({})\s*(-\s*\d+)?".format('|'.join(DATE_SPECIFIERS_COLLECTION)), re.UNICODE)
string_month_date_regex = re.compile(MONTH_REGEX, re.IGNORECASE)

Expand Down Expand Up @@ -555,6 +555,7 @@ def parse(cls, parser, text, pos):
GreaterEqualOp,
LessEqualOp,
GreaterThanOp,
GreaterThanDateOp,
LessThanOp,
ComplexValue
]
Expand Down Expand Up @@ -600,7 +601,6 @@ def parse(cls, parser, text, pos):
SimpleValueNegation,
SimpleValue,
SimpleDateValueNegation,
SimpleDateValue,
]
)

Expand Down Expand Up @@ -652,7 +652,15 @@ class GreaterThanOp(UnaryRule):
Supports queries like author-count > 2000 or date after 10-2000.
"""
grammar = omit(re.compile(r"after|>", re.IGNORECASE)), attr('op', [SimpleDateValue, SimpleValue])
grammar = omit(re.compile(r">", re.IGNORECASE)), attr('op', [SimpleValue])


class GreaterThanDateOp(UnaryRule):
"""Greater than operator.
Supports queries like author-count > 2000 or date after 10-2000.
"""
grammar = omit(re.compile(r"after|>", re.IGNORECASE)), attr('op', [SimpleDateValue])


class GreaterEqualOp(UnaryRule):
Expand All @@ -673,7 +681,15 @@ class LessThanOp(UnaryRule):
Supports queries like author-count < 100 or date before 1984.
"""
grammar = omit(re.compile(r"before|<", re.IGNORECASE)), attr('op', [SimpleDateValue, SimpleValue])
grammar = omit(re.compile(r"<", re.IGNORECASE)), attr('op', [SimpleValue])


class LessThanDateOp(UnaryRule):
"""Less than operator.
Supports queries like author-count < 100 or date before 1984.
"""
grammar = omit(re.compile(r"before|<", re.IGNORECASE)), attr('op', [SimpleDateValue])


class LessEqualOp(UnaryRule):
Expand Down Expand Up @@ -740,8 +756,8 @@ class DateValue(UnaryRule):
(optional(omit(Literal("="))), RangeOp),
GreaterEqualOp,
LessEqualOp,
GreaterThanOp,
LessThanOp,
GreaterThanDateOp,
LessThanDateOp,
(
optional(omit(Literal("="))),
[
Expand Down
6 changes: 6 additions & 0 deletions inspire_query_parser/visitors/elastic_search_visitor.py
Original file line number Diff line number Diff line change
Expand Up @@ -649,12 +649,18 @@ def visit_range_op(self, node, fieldnames):
def visit_greater_than_op(self, node, fieldnames):
return self._generate_range_queries(force_list(fieldnames), {'gt': node.op.value})

def visit_greater_than_date_op(self, node, fieldnames):
return self._generate_range_queries(force_list(fieldnames), {'gt': node.op.value})

def visit_greater_equal_than_op(self, node, fieldnames):
return self._generate_range_queries(force_list(fieldnames), {'gte': node.op.value})

def visit_less_than_op(self, node, fieldnames):
return self._generate_range_queries(force_list(fieldnames), {'lt': node.op.value})

def visit_less_than_date_op(self, node, fieldnames):
return self._generate_range_queries(force_list(fieldnames), {'lt': node.op.value})

def visit_less_equal_than_op(self, node, fieldnames):
return self._generate_range_queries(force_list(fieldnames), {'lte': node.op.value})

Expand Down
6 changes: 6 additions & 0 deletions inspire_query_parser/visitors/restructuring_visitor.py
Original file line number Diff line number Diff line change
Expand Up @@ -268,6 +268,9 @@ def visit_value(self, node):
def visit_range_op(self, node):
return ast.RangeOp(node.left.accept(self), node.right.accept(self))

def visit_greater_than_date_op(self, node):
return ast.GreaterThanDateOp(node.op.accept(self))

def visit_greater_than_op(self, node):
return ast.GreaterThanOp(node.op.accept(self))

Expand All @@ -279,6 +282,9 @@ def visit_greater_equal_op(self, node):
return ast.GreaterEqualThanOp(value)

def visit_less_than_op(self, node):
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

shall we just drop these ones?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

no, we can't, they're derived from a different grammar

return ast.LessThanDateOp(node.op.accept(self))

def visit_less_than_date_op(self, node):
return ast.LessThanOp(node.op.accept(self))

def visit_less_equal_op(self, node):
Expand Down
18 changes: 18 additions & 0 deletions tests/test_elastic_search_visitor.py
Original file line number Diff line number Diff line change
Expand Up @@ -3235,3 +3235,21 @@ def test_elastic_search_visitor_complex_query():
}
generated_es_query = _parse_query(query_str)
assert generated_es_query == expected_es_query


def test_elastic_search_visitor_regression_greater_than_for_non_date():
query_str = "t after something"
expected_es_query = {
"match": {"titles.full_title": {"query": "after something", "operator": "and"}}
}
generated_es_query = _parse_query(query_str)
assert generated_es_query == expected_es_query


def test_elastic_search_visitor_regression_less_than_for_non_date():
query_str = "t before something"
expected_es_query = {
"match": {"titles.full_title": {"query": "before something", "operator": "and"}}
}
generated_es_query = _parse_query(query_str)
assert generated_es_query == expected_es_query
136 changes: 76 additions & 60 deletions tests/test_parser_functionality.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@
from inspire_query_parser.parser import (And, BooleanQuery, ComplexValue,
DateValue, EmptyQuery, Expression,
GreaterEqualOp, GreaterThanOp,
GreaterThanDateOp,
InspireDateKeyword, InspireKeyword,
InvenioKeywordQuery, LessEqualOp,
LessThanOp, MalformedQueryWords,
Expand Down Expand Up @@ -1762,64 +1763,6 @@
("", Query([EmptyQuery()])),
(" ", Query([EmptyQuery()])),
# G, GE, LT, LE, E queries
(
"date > 2000-10 and < 2000-12",
Query(
[
Statement(
BooleanQuery(
Expression(
SimpleQuery(
SpiresDateKeywordQuery(
InspireDateKeyword("date"),
DateValue(
GreaterThanOp(SimpleDateValue("2000-10"))
),
)
)
),
And(),
Statement(
Expression(
SimpleQuery(
Value(LessThanOp(SimpleDateValue("2000-12")))
)
)
),
)
)
]
),
),
(
"date after 10/2000 and before 2000-12",
Query(
[
Statement(
BooleanQuery(
Expression(
SimpleQuery(
SpiresDateKeywordQuery(
InspireDateKeyword("date"),
DateValue(
GreaterThanOp(SimpleDateValue("10/2000"))
),
)
)
),
And(),
Statement(
Expression(
SimpleQuery(
Value(LessThanOp(SimpleDateValue("2000-12")))
)
)
),
)
)
]
),
),
(
"date >= nov 2000 and d<=2005",
Query(
Expand Down Expand Up @@ -2070,7 +2013,7 @@
SpiresDateKeywordQuery(
InspireDateKeyword("date-updated"),
DateValue(
GreaterThanOp(SimpleDateValue("yesterday - 2"))
GreaterThanDateOp(SimpleDateValue("yesterday - 2"))
),
)
)
Expand Down Expand Up @@ -2112,7 +2055,7 @@
SpiresDateKeywordQuery(
InspireDateKeyword("date"),
DateValue(
GreaterThanOp(
GreaterThanDateOp(
SimpleDateValue("2013")
)
),
Expand Down Expand Up @@ -2335,3 +2278,76 @@ def test_parser_functionality(query_str, expected_parse_tree):
parser = StatefulParser()
_, parse_tree = parser.parse(query_str, Query)
assert parse_tree == expected_parse_tree


@pytest.mark.parametrize(
["query_str", "expected_parse_tree"],
{
(
"date > 2000-10 and < 2000-12",
Query(
[
Statement(
BooleanQuery(
Expression(
SimpleQuery(
SpiresDateKeywordQuery(
InspireDateKeyword("date"),
DateValue(
GreaterThanOp(SimpleDateValue("2000-10"))
),
)
)
),
And(),
Statement(
Expression(
SimpleQuery(
Value(LessThanOp(SimpleDateValue("2000-12")))
)
)
),
)
)
]
),
),
(
"date after 10/2000 and before 2000-12",
Query(
[
Statement(
BooleanQuery(
Expression(
SimpleQuery(
SpiresDateKeywordQuery(
InspireDateKeyword("date"),
DateValue(
GreaterThanOp(SimpleDateValue("10/2000"))
),
)
)
),
And(),
Statement(
Expression(
SimpleQuery(
Value(LessThanOp(SimpleDateValue("2000-12")))
)
)
),
)
)
]
),
),
},
)
@pytest.mark.xfail(
reason="the queries are not correct, should be fixed by https://github.com/cern-sis/issues-inspire/issues/150 "
)
def test_parser_functionality_regressions(query_str, expected_parse_tree):
print("Parsing: " + query_str)
parser = StatefulParser()
_, parse_tree = parser.parse(query_str, Query)
assert parse_tree == expected_parse_tree
41 changes: 18 additions & 23 deletions tests/test_restructuring_visitor.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@
from inspire_query_parser import parser
from inspire_query_parser.ast import (AndOp, EmptyQuery, ExactMatchValue,
GreaterEqualThanOp, GreaterThanOp,
GreaterThanDateOp,LessThanDateOp,
Keyword, KeywordOp, LessEqualThanOp,
LessThanOp, MalformedQuery,
NestedKeywordOp, NotOp, OrOp,
Expand Down Expand Up @@ -354,17 +355,11 @@
# G, GE, LT, LE, E queries
(
'date > 2000-10 and date < 2000-12',
AndOp(
KeywordOp(Keyword('date'), GreaterThanOp(Value('2000-10'))),
KeywordOp(Keyword('date'), LessThanOp(Value('2000-12')))
)
AndOp(KeywordOp(Keyword('date'), GreaterThanDateOp(Value('2000-10'))), KeywordOp(Keyword('date'), LessThanOp(Value('2000-12'))))
),
(
'date after 10/2000 and date before 2000-12',
AndOp(
KeywordOp(Keyword('date'), GreaterThanOp(Value('10/2000'))),
KeywordOp(Keyword('date'), LessThanOp(Value('2000-12')))
)
AndOp(KeywordOp(Keyword('date'), GreaterThanDateOp(Value('10/2000'))), KeywordOp(Keyword('date'), LessThanOp(Value('2000-12'))))
),
(
'date >= nov 2000 and d<=2005',
Expand Down Expand Up @@ -445,23 +440,11 @@
'du > yesterday - 2',
KeywordOp(
Keyword('date-updated'),
GreaterThanOp(Value(str((date.today() - relativedelta(days=3)))))
GreaterThanDateOp(Value(str((date.today() - relativedelta(days=3)))))
)
),

# Wildcard queries
(
'find a \'o*aigh\' and t "alge*" and date >2013',
AndOp(
KeywordOp(Keyword('author'), PartialMatchValue('o*aigh', contains_wildcard=True)),
AndOp(
KeywordOp(Keyword('title'), ExactMatchValue('alge*'

)),
KeywordOp(Keyword('date'), GreaterThanOp(Value('2013')))
)
)
),
(
'a *alge | a alge* | a o*aigh',
OrOp(
Expand All @@ -476,7 +459,19 @@
'find texkey Hirata:1992*',
KeywordOp(Keyword('texkeys'), Value('Hirata:1992*', contains_wildcard=True))
),

(
"find a 'o*aigh' and t \"alge*\" and date >2013",
AndOp(
KeywordOp(
Keyword("author"),
PartialMatchValue("o*aigh", contains_wildcard=True),
),
AndOp(
KeywordOp(Keyword("title"), ExactMatchValue("alge*")),
KeywordOp(Keyword("date"), GreaterThanDateOp(Value("2013"))),
),
),
),
# Queries for implicit "and" removal
('title and foo', AndOp(ValueOp(Value('title')), ValueOp(Value('foo')))),
('author takumi doi', KeywordOp(Keyword('author'), Value('takumi doi'))),
Expand Down Expand Up @@ -711,7 +706,7 @@ def test_foo_bar():
)
),
('find cc italy', KeywordOp(Keyword('country'), Value('italy'))),
('fin date > today', KeywordOp(Keyword('date'), GreaterThanOp(Value(str(date.today()))))),
('fin date > today', KeywordOp(Keyword('date'), GreaterThanDateOp(Value(str(date.today()))))),
('find r atlas-conf-*', KeywordOp(Keyword('reportnumber'), Value('atlas-conf-*', contains_wildcard=True))),
(
'find caption "Diagram for the fermion flow violating process"',
Expand Down