diff --git a/README.md b/README.md index 523f32d..229c412 100644 --- a/README.md +++ b/README.md @@ -1,10 +1,10 @@ # Parse XPATH 3.1 using Pyparsing XPath (XML Path Language) is a query language for selecting nodes from an XML document. -In addition, XPath may be used to compute values (e.g., strings, numbers, or Boolean values) from the content of an XML document. -XPath is supported by the World Wide Web Consortium (W3C). +In addition, XPath is used to compute values (e.g., strings, numbers, or Boolean values) from the content of an XML document. +XPath is maintained by the World Wide Web Consortium (W3C). [Pyparsing](https://github.com/pyparsing/pyparsing) is a parsing module used to construct grammar in Python. -XPyth uses Pyparsing to parse XPath strings, and offers an additional abstraction layer. +XPyth-parser uses Pyparsing to parse XPath strings, and offers an additional abstraction layer. ## Status This library is an attempt to create a parser which can be used both to query XML documents, @@ -13,12 +13,16 @@ The original plan was to support both options. However, XPath 3.1 is not widely Parsing XPath 3.1 on a grammar level should still be supported, but not all information may be available when using the abstraction layer. Most importantly, there will be [XPath functions](https://www.w3.org/2005/xpath-functions/) missing. -Dealing with dynamic contexts (i.e., parsing XML as Parser.xml will be done using LXML for now). +Dealing with dynamic contexts (i.e., parsing XML as Parser.xml will be done using LXML for now). +In a way, XPyth-parser is at the present moment a fancy wrapper around LXML, in order to support some XPath 2.0+ functionality. ### Alternatives For most use cases, there will be (better) alternatives to this project. [LXML](https://lxml.de/) is Pythonic binding for the C libraries libxml2 and libxslt. If only XPath 1.0 is needed, LXML will be a better solution. +### Requirements +xpyth-parser depends on LXML, PyParsing. For parsing dates we use Isodate. + ## Goals This project started out with a specific goal: to parse [XBRL formula](https://specifications.xbrl.org/work-product-index-formula-formula-1.0.html) tests. @@ -27,15 +31,19 @@ Because of this, the author of this library is focussing on correctly interpreti # Examples -from xpyth_parser.parse import Parser -count = Parser("count(1,2,3)") + + from xpyth_parser.parse import Parser + count = Parser("count(1,2,3)").run() + print(count) -> 3 + This will give a wrapper class which contains the resolved syntax tree in count.XPath and the answer in count.resolved_answer # Parsing only It is also possible to only parse the string, but not try to resolve the static and dynamic context -count = Parser("count(1,2,3), no_resolve=True") -count.xpath will be the full syntax tree, instead of having functions processed and contexts applied. -count.run() will resolve the expression as if no_resolve=False. contexts might need to be passed to the object beforehand. + count = Parser("count(1,2,3), no_resolve=True") + +`count.xpath` will be the full syntax tree, instead of having functions processed and contexts applied. +`count.run()` will resolve the expression as if no_resolve=False. contexts might need to be passed to the object beforehand. diff --git a/setup.cfg b/setup.cfg index a09e2e3..d8390b2 100644 --- a/setup.cfg +++ b/setup.cfg @@ -1,6 +1,6 @@ [metadata] -name = xpyth_31_parser -version = 0.0.7 +name = xpyth_parser +version = 0.0.9 author = Erwin Kaats author_email = egit@tuta.io description = An XPath 3.1 Parser @@ -19,6 +19,7 @@ package_dir = packages = find: python_requires = >=3.6 install_requires = + lxml pyparsing isodate diff --git a/src/xpyth_parser/conversion/function.py b/src/xpyth_parser/conversion/function.py index f5c13df..8a2fa16 100644 --- a/src/xpyth_parser/conversion/function.py +++ b/src/xpyth_parser/conversion/function.py @@ -1,10 +1,13 @@ +import functools + import lxml.etree from isodate import parse_date, parse_duration from functools import partial -from .functions.generic import FunctionRegistry +from .functions.generic import FunctionRegistry, QuerySingleton from .qname import QName, Parameter + reg = FunctionRegistry() def cast_lxml_elements(args): @@ -14,15 +17,39 @@ def cast_lxml_elements(args): :return: """ - # If it is one element we found, we can cast it and return it - if isinstance(args, lxml.etree._Element): + if isinstance(args, functools.partial): + #todo: this kind of recursion we now have to build into every function? + args = args() + + if hasattr(args, "expr"): + args = args.expr + + # If it is already a primary, return it + if isinstance(args, str) or isinstance(args, int) or isinstance(args, float): + return args + + elif isinstance(args, bytes): + # Could be an unparsed (L)XML element + etree = lxml.etree.fromstring(args) + try: + arg = int(etree.text) + except: + arg = etree.text + + return arg + + elif isinstance(args, lxml.etree._Element): try: arg = int(args.text) except: arg = args.text - # But we still want to return a list, because that is expected in functions - return [arg] + return arg + + elif args == None: + # If none is passed though (LXML has not found any elements, return the empty list) + return [] + # Else, we need to go through the list casted_args = [] @@ -35,8 +62,10 @@ def cast_lxml_elements(args): casted_args.append(arg) return casted_args -def fn_count(args): + +def fn_count(*args, **kwargs): + args = args[0] if isinstance(args, list): return len(args) @@ -44,63 +73,83 @@ def fn_count(args): return 1 -def fn_avg(args): - casted_args = cast_lxml_elements(args=args) +def fn_avg(*args, **kwargs): + casted_args = cast_lxml_elements(args=args[0]) + + if isinstance(casted_args, int): + # If there is only one value, the sum would be the same as the value + return casted_args + return sum(casted_args) / len(casted_args) -def fn_max(args): - casted_args = cast_lxml_elements(args=args) +def fn_max(*args, **kwargs): + casted_args = cast_lxml_elements(args=args[0]) + if isinstance(casted_args, int): + # If there is only one value, the sum would be the same as the value + return casted_args + return max(casted_args) -def fn_min(args): - casted_args = cast_lxml_elements(args=args) +def fn_min(*args, **kwargs): + casted_args = cast_lxml_elements(args=args[0]) + if isinstance(casted_args, int): + # If there is only one value, the sum would be the same as the value + return casted_args + return min(casted_args) -def fn_sum(args): - casted_args = cast_lxml_elements(args=args) +def fn_sum(*args, **kwargs): + casted_args = cast_lxml_elements(args=args[0]) + + if isinstance(casted_args, int): + # If there is only one value, the sum would be the same as the value + return casted_args + return sum(casted_args) -def fn_not(args): +def fn_not(*args, **kwargs): for arg in args: if arg is True: return False # found an argument that is true # Did not find a True value return True -def fn_empty(args): +def fn_empty(*args, **kwargs): for arg in args: if arg is None or arg == "": return True return False -def xs_date(args): - if len(args) == 0: +def xs_date(*args, **kwargs): + casted_args = cast_lxml_elements(args=args[0]) + if len(casted_args) == 0: return False else: - date = parse_date(args) + date = parse_date(casted_args) return date -def xs_yearMonthDuration(args): - - if len(args) == 0: +def xs_yearMonthDuration(*args, **kwargs): + casted_args = cast_lxml_elements(args=args[0]) + if len(casted_args) == 0: return False else: - duration = parse_duration(args) + duration = parse_duration(casted_args) return duration -def xs_dayTimeDuration(args): - if len(args) == 0: +def xs_dayTimeDuration(*args, **kwargs): + casted_args = cast_lxml_elements(args=args[0]) + if len(casted_args) == 0: return False else: - duration = parse_duration(args) + duration = parse_duration(casted_args) return duration -def xs_qname(args): +def xs_qname(*args, **kwargs): # Returns an xs:QName value formed using a supplied namespace URI and lexical QName. - + args = args[0] if isinstance(args, str): prefix, localname = str(args).split(":", 1) return QName(prefix=prefix, localname=localname) @@ -113,11 +162,17 @@ def xs_qname(args): prefix, localname = str(args[1]).split(":", 1) return QName(prefix=prefix, localname=localname, namespace=args[0]) -def fn_number(args): - # Returns an xs:QName value formed using a supplied namespace URI and lexical QName. +def fn_number(*args, **kwargs): + """ + Returns an xs:QName value formed using a supplied namespace URI and lexical QName. + + :param args: + :return: + """ + casted_args = cast_lxml_elements(args=args[0]) # Otherwise try to cast the argument to float. - return float(args) + return float(casted_args) functions = { "fn:count":fn_count, @@ -134,13 +189,25 @@ def fn_number(args): "xs:QName": xs_qname, } + +# Add XBRL functions +from .functions.xbrl import function_list +functions.update(function_list) + # Add the initial set of functions to the registry reg.add_functions(functions=functions, overwrite_functions=True) -def get_function(v): - qname = v[0] - args = list(v[1:]) +query = QuerySingleton() + +def get_function(toks): + qname = toks[0] + + if not isinstance(qname, QName): + # The first token should really be a qname. This is the name of the function. + return toks + + args = list(toks[1:]) # If no prefix is defined, FN will be assumed for function calls if qname.prefix is None: @@ -157,7 +224,7 @@ def get_function(v): if len(args) == 1: args = args[0] - return partial(function, args) + return partial(function, args, query=query.lxml_tree) else: print("Cannot find function in registry") diff --git a/src/xpyth_parser/conversion/functions/generic.py b/src/xpyth_parser/conversion/functions/generic.py index 1535af0..0ba53ca 100644 --- a/src/xpyth_parser/conversion/functions/generic.py +++ b/src/xpyth_parser/conversion/functions/generic.py @@ -1,7 +1,6 @@ -import functools -import logging + from typing import Union, Optional -from ..qname import Parameter, QName +from ..qname import QName class FunctionRegistry: @@ -54,6 +53,22 @@ def add_functions(self, functions: dict = None, overwrite_functions: Optional[bo # Only overwrite functions if this is explicitly set self.functions[function_name] = function +class QuerySingleton: + _instance = None + lxml_tree = None + + def __new__(cls, *args, **kwargs): + if cls._instance is None: + cls._instance = super().__new__(cls) + return cls._instance + + def __init__( + self, + lxml_tree = None + ): + if lxml_tree is not None: + self.lxml_tree = lxml_tree + class OrExpr: def __init__(self, a, b): diff --git a/src/xpyth_parser/conversion/functions/xbrl.py b/src/xpyth_parser/conversion/functions/xbrl.py new file mode 100644 index 0000000..525ac86 --- /dev/null +++ b/src/xpyth_parser/conversion/functions/xbrl.py @@ -0,0 +1,14 @@ +def identifier(*args, **kwargs): + """ + Gets the identifier of one or more XBRL facts + :param self: + :return: + """ + # https://specifications.xbrl.org/registries/functions-registry-1.0/80132%20xfi.identifier/80132%20xfi.identifier%20function.html + for arg in args[0]: + context_ref = arg.get("contextRef") + q = kwargs['query'] + context = q.xpath(f"/xbrli:xbrl/xbrli:context[@id='{context_ref}']/xbrli:entity/xbrli:identifier", namespaces=q.nsmap) + return context[0].text + +function_list = {"xfi:identifier": identifier} diff --git a/src/xpyth_parser/grammar/expressions.py b/src/xpyth_parser/grammar/expressions.py index 2b7cc11..d931f0b 100644 --- a/src/xpyth_parser/grammar/expressions.py +++ b/src/xpyth_parser/grammar/expressions.py @@ -1,5 +1,6 @@ import functools import operator +import types import pyparsing from pyparsing import ( @@ -11,17 +12,21 @@ ZeroOrMore, Forward, Keyword, - Suppress, + Suppress, Regex, ) +from .qualified_names import VariableRegistry +from ..conversion.functions.generic import QuerySingleton +from ..conversion.tests import processingInstructionTest, anyKindTest, textTest, commentTest, schemaAttributeTest, \ + elementTest, schemaElementTest, documentTest -from .literals import l_par_l, l_par_r, l_dot, t_NCName, t_IntegerLiteral, t_Literal +var_reg = VariableRegistry() -from .qualified_names import t_VarName, t_SingleType, t_AtomicType, t_EQName, t_VarRef -from .tests import t_KindTest, t_NodeTest + +from .literals import l_par_l, l_par_r, l_dot, t_NCName, t_IntegerLiteral, t_Literal, t_StringLiteral from ..conversion.function import get_function, resolve_paths, cast_parameters -from ..conversion.qname import Parameter +from ..conversion.qname import Parameter, qname_from_parse_results xpath_version = "3.1" @@ -36,6 +41,77 @@ Forward() ) # Declare an empty token, as it is used in a recursive declaration later on +""" +4. Qualified Names +https://www.w3.org/TR/REC-xml-names/#ns-qualnames + +""" + +t_Prefix = t_NCName +t_Prefix.setName("Prefix") + +t_LocalPart = t_NCName +t_LocalPart.setName("LocalPart") + +t_PrefixedName = t_Prefix + Suppress(Literal(":")) + t_LocalPart +t_PrefixedName.setName("PrefixedName") + +t_UnprefixedName = t_LocalPart +t_UnprefixedName.setName("UnprefixedName") + +t_QName = t_PrefixedName | t_UnprefixedName +t_QName.setName("Qname") +t_QName.setParseAction(qname_from_parse_results) + +# All these elements refer to QName +t_VarName = t_AtomicType = t_ElementName = t_TypeName = t_AttributeName = t_QName +t_VarName.setName("Varname") + +def get_variable(toks): + + var_name = toks[0] + + if len(toks) > 1: + return Parameter(qname=toks[0], type_declaration=toks[1]) + else: + + for var in var_reg.get_variable(toks[0]): + # return Parameter(qname=v[0]) + if isinstance(var, str): + + parsed_var = t_XPath.parseString(var, parseAll=True) + parsed_var = parsed_var[0].expr + + + # try: + # # Parse the individual value as if it is an XPath expression + # parsed_var = t_XPath.parseString(var, parseAll=True) + # + # # Unpack the outcome + # parsed_var = parsed_var[0].expr + # + # except: + # logging.warning(f"Could not parse parameter '{var_name}', value '{var}'") + # parsed_var = var + + yield parsed_var + + elif isinstance(var, int) or isinstance(var, float): + yield var + + else: + print("Really expected a string as variable") + +t_VarRef = Suppress(Literal("$")) + t_VarName +t_VarRef.setParseAction(get_variable) +t_VarRef.setName("VarRef") + +t_AtomicType.setName("AtomicType") +t_ElementName.setName("ElementName") +t_TypeName.setName("TypeName") +t_AttributeName.setName("AttributeName") + + t_ExprSingle.setName("ExprSingle") # SimpleForClause seems to make a lot more sense in the 3.1 spec than in 2.0. @@ -185,7 +261,21 @@ def resolve_fn(fn): if isinstance(rootexpr, functools.partial): # Main node is a Function. Resolve this and add the answer to the Syntax Tree. - return rootexpr() + # return rootexpr() + function_outcome = rootexpr() + if isinstance(function_outcome, types.GeneratorType): + answers = [] + for ans in function_outcome: + # todo: try to figure out if Functions should be yielding (generator) or returning. + # I'd say yield, because "fn:number(1 to 100)[. mod 5 eq 0]" should be a legal expression + # where 1 to 100 is cast as a number, and 'fed' through the predidicate filtering. + answers.append(ans) + return answers + + + else: + return function_outcome + # return resolve_fn(rootexpr) elif isinstance(rootexpr, Parameter): @@ -252,6 +342,108 @@ def resolve_fn(fn): # Give back the now resolved expression return rootexpr +""" +TESTS +https://www.w3.org/TR/xpath20/#prod-xpath-KindTest +""" + +t_ElementNameOrWildcard = t_ElementName | Literal("*") +t_ElementNameOrWildcard.setName("ElementNameOrWildcard") + +t_ElementTest = ( + Keyword("element") + + l_par_l + + Optional(t_ElementNameOrWildcard + Optional("," + t_TypeName + Optional("?"))) + + l_par_r +) +t_ElementTest.setName("ElementTest") +t_ElementTest.setParseAction(elementTest) + +t_ElementDeclaration = t_ElementName +t_ElementDeclaration.setName("ElementDeclaration") + +t_SchemaElementTest = ( + Keyword("schema-element") + l_par_l + t_ElementDeclaration + l_par_r +) +t_SchemaElementTest.setParseAction(schemaElementTest) +t_SchemaElementTest.setName("schema-element") + +t_DocumentTest = ( + Keyword("document-node") + + l_par_l + + Optional(t_ElementTest | t_SchemaElementTest) + + l_par_r +) +t_DocumentTest.setName("DocumentTest") +t_DocumentTest.setParseAction(documentTest) + +t_AttribNameOrWildcard = t_AttributeName | "*" +t_AttribNameOrWildcard.setName("AttribNameOrWildcard") + +t_AttributeTest = ( + Keyword("attribute") + + l_par_l + + Optional(t_AttribNameOrWildcard + Optional("," + t_TypeName)) + + l_par_r +) +t_AttributeTest.setName("AttributeTest") + +t_AttributeDeclaration = t_AttributeName +t_AttributeDeclaration.setName("AttributeDeclaration") + +t_SchemaAttributeTest = ( + Keyword("schema-attribute") + l_par_l + t_AttributeDeclaration + l_par_r +) +t_SchemaAttributeTest.setParseAction(schemaAttributeTest) +t_SchemaAttributeTest.setName("SchemaAttributeTest") + +t_CommentTest = Keyword("comment") + l_par_l + l_par_r +t_CommentTest.setParseAction(commentTest) +t_CommentTest.setName("comment") + +t_TextTest = Keyword("text") + l_par_l + l_par_r +t_TextTest.setParseAction(textTest) +t_TextTest.setName("TextTest") + +t_AnyKindTest = Keyword("node") + l_par_l + l_par_r +t_AnyKindTest.setParseAction(anyKindTest) +t_AnyKindTest.setName("AnyKindTest") + +t_PITest = ( + Keyword("processing-instruction") + + l_par_l + + Optional(t_NCName | t_StringLiteral) + + l_par_r +) +t_PITest.setParseAction(processingInstructionTest) +t_PITest.setName("Processing-InstructionTest") + +t_KindTest = ( + t_ElementTest + | t_AttributeTest + | t_SchemaElementTest + | t_SchemaAttributeTest + | t_PITest + | t_CommentTest + | t_TextTest + | t_AnyKindTest + | t_DocumentTest +) +t_KindTest.setName("KindTest") +# Just as with t_NumericLiteral, the t_Wildcard order needed to be modified slightly +t_Wildcard = ( + (t_NCName + Literal(":") + Literal("*")) + | (Literal("*") + Literal(":") + t_NCName) + | Literal("*") +) +t_Wildcard.setName("Wildcard") + +t_NameTest = t_QName | t_Wildcard +t_NameTest.setName("NameTest") + +t_NodeTest = t_KindTest | t_NameTest +t_NodeTest.setName("NodeTest") + class XPath: def __init__(self, expr, variable_map=None, xml_etree=None): @@ -281,7 +473,6 @@ def parse_expr(toks): # t_Expr.setParseAction(lambda x: XPath(expr=x[0])) t_Expr.setParseAction(parse_expr) -# https://www.w3.org/TR/xpath20/#doc-xpath-ParenthesizedExpr # https://www.w3.org/TR/xpath20/#doc-xpath-ContextItemExpr @@ -336,21 +527,6 @@ def __init__(self): t_Predicate = Suppress("[") + t_Expr + Suppress("]") t_Predicate.setName("Predicate") -class QuerySingleton: - _instance = None - lxml_tree = None - - def __new__(cls, *args, **kwargs): - if cls._instance is None: - cls._instance = super().__new__(cls) - return cls._instance - - def __init__( - self, - lxml_tree = None - ): - if lxml_tree is not None: - self.lxml_tree = lxml_tree class Predicate: def __init__(self, val): @@ -396,6 +572,17 @@ def get_predicate_list(toks): ) t_ArgumentList.setName("ArgumentList") +t_BracedURILiteral = ( + Literal("Q") + Literal("{") + ZeroOrMore(Regex("[^{}]")) + Literal("}") +) +t_BracedURILiteral.setName("BracedURILiteral") + +t_URIQualifiedName = t_BracedURILiteral + t_NCName +t_URIQualifiedName.setName("URIQualifiedName") + +t_EQName = t_QName | t_URIQualifiedName +t_EQName.setName("EQName") + tx_FunctionName = t_EQName t_FunctionCall = tx_FunctionName + t_ArgumentList @@ -540,6 +727,8 @@ def get_path_expr(toks): :param toks: :return: """ + # todo: Can be ["//", QNAME, Predicate], but also [list of houndreds of paths from a variable] + steps = [] if toks[0] == "/" and len(toks) == 1: # https://www.w3.org/TR/xpath-3/#parse-note-leading-lone-slash @@ -581,11 +770,12 @@ def get_path_expr(toks): # todo: Path expression also needs to be handled while parsing if we want partial functions to work. # this probably means that we should let the whole 'parse first, intepret later' part go :/ - query_singlton = QuerySingleton() + query_singleton = QuerySingleton() query = path_expression.to_str() - result = query_singlton.lxml_tree.xpath(query) - + result = query_singleton.lxml_tree.xpath(query, namespaces=query_singleton.lxml_tree.nsmap) + if len(result) < 1: + return [None] return result @@ -708,6 +898,10 @@ def get_unary_expr(v): t_UnaryExpr.setParseAction(get_unary_expr) + +t_SingleType = t_AtomicType + Optional("?") +t_SingleType.setName("SingleType") + if xpath_version == "2.0": t_CastExpr = t_UnaryExpr + Optional(Keyword("cast") + Keyword("as") + t_SingleType) elif xpath_version == "3.1": @@ -717,6 +911,8 @@ def get_unary_expr(v): t_CastExpr = t_ArrowExpr + Optional(Keyword("cast") + Keyword("as") + t_SingleType) + + t_CastableExpr = t_CastExpr + Optional( Keyword("castable") + Keyword("as") + t_SingleType ) @@ -1343,6 +1539,7 @@ def get_or(v): t_XPath.setName("XPath") + # todo: Generating a railroad map requires Pyparsing 3.0. Uncomment when PP3.0 is released from beta # def create_railroad(): # from pyparsing.diagram import to_railroad, railroad_to_html diff --git a/src/xpyth_parser/grammar/qualified_names.py b/src/xpyth_parser/grammar/qualified_names.py index 903b313..3ab0a28 100644 --- a/src/xpyth_parser/grammar/qualified_names.py +++ b/src/xpyth_parser/grammar/qualified_names.py @@ -1,15 +1,6 @@ -from typing import Union as typing_Union from typing import Optional as typing_Optional -from .literals import t_NCName -from ..conversion.qname import qname_from_parse_results, Parameter, QName -from pyparsing import ( - Literal, - Regex, - Optional, - ZeroOrMore, - Suppress, -) +from ..conversion.qname import QName class VariableRegistry: _instance = None @@ -37,79 +28,32 @@ def __init__( self.variables[variable_name] = variable def get_variable(self, variable_name): + if isinstance(variable_name, QName): variable_name = variable_name.__repr__() + else: + variable_name = variable_name + if variable_name in self.variables.keys(): - return self.variables[variable_name] + values = self.variables[variable_name] + if isinstance(values, list): + return values + else: + return [values] else: # return [None] raise Exception(f"Variable not in registry: '{variable_name}'") -var_reg = VariableRegistry() - -""" -4. Qualified Names -https://www.w3.org/TR/REC-xml-names/#ns-qualnames - -""" - -t_Prefix = t_NCName -t_Prefix.setName("Prefix") - -t_LocalPart = t_NCName -t_LocalPart.setName("LocalPart") - -t_PrefixedName = t_Prefix + Suppress(Literal(":")) + t_LocalPart -t_PrefixedName.setName("PrefixedName") -t_UnprefixedName = t_LocalPart -t_UnprefixedName.setName("UnprefixedName") -t_QName = t_PrefixedName | t_UnprefixedName -t_QName.setName("Qname") -t_QName.setParseAction(qname_from_parse_results) -t_BracedURILiteral = ( - Literal("Q") + Literal("{") + ZeroOrMore(Regex("[^{}]")) + Literal("}") -) -t_BracedURILiteral.setName("BracedURILiteral") -t_URIQualifiedName = t_BracedURILiteral + t_NCName -t_URIQualifiedName.setName("URIQualifiedName") -t_EQName = t_QName | t_URIQualifiedName -t_EQName.setName("EQName") -# All these elements refer to QName -t_VarName = t_AtomicType = t_ElementName = t_TypeName = t_AttributeName = t_QName -t_VarName.setName("Varname") -def get_variable(v): - if len(v) > 1: - return Parameter(qname=v[0], type_declaration=v[1]) - else: - var = var_reg.get_variable(v[0]) - # return Parameter(qname=v[0]) - return var -t_VarRef = Suppress(Literal("$")) + t_VarName -t_VarRef.setParseAction(get_variable) -t_VarRef.setName("VarRef") -t_AtomicType.setName("AtomicType") -t_ElementName.setName("ElementName") -t_TypeName.setName("TypeName") -t_AttributeName.setName("AttributeName") -t_SingleType = t_AtomicType + Optional("?") -t_SingleType.setName("SingleType") -# Just as with t_NumericLiteral, the t_Wildcard order needed to be modified slightly -t_Wildcard = ( - (t_NCName + Literal(":") + Literal("*")) - | (Literal("*") + Literal(":") + t_NCName) - | Literal("*") -) -t_Wildcard.setName("Wildcard") diff --git a/src/xpyth_parser/grammar/tests.py b/src/xpyth_parser/grammar/tests.py deleted file mode 100644 index 191feea..0000000 --- a/src/xpyth_parser/grammar/tests.py +++ /dev/null @@ -1,119 +0,0 @@ -from pyparsing import ( - Optional, - Keyword, - Literal, -) - -from .literals import l_par_l, l_par_r, t_NCName, t_StringLiteral -from .qualified_names import ( - t_TypeName, - t_ElementName, - t_AttributeName, - t_QName, - t_Wildcard, -) -from ..conversion.tests import ( - elementTest, - schemaElementTest, - documentTest, - schemaAttributeTest, - commentTest, - textTest, - anyKindTest, - processingInstructionTest, -) - -""" -TESTS -https://www.w3.org/TR/xpath20/#prod-xpath-KindTest -""" - -t_ElementNameOrWildcard = t_ElementName | Literal("*") -t_ElementNameOrWildcard.setName("ElementNameOrWildcard") - -t_ElementTest = ( - Keyword("element") - + l_par_l - + Optional(t_ElementNameOrWildcard + Optional("," + t_TypeName + Optional("?"))) - + l_par_r -) -t_ElementTest.setName("ElementTest") -t_ElementTest.setParseAction(elementTest) - -t_ElementDeclaration = t_ElementName -t_ElementDeclaration.setName("ElementDeclaration") - -t_SchemaElementTest = ( - Keyword("schema-element") + l_par_l + t_ElementDeclaration + l_par_r -) -t_SchemaElementTest.setParseAction(schemaElementTest) -t_SchemaElementTest.setName("schema-element") - -t_DocumentTest = ( - Keyword("document-node") - + l_par_l - + Optional(t_ElementTest | t_SchemaElementTest) - + l_par_r -) -t_DocumentTest.setName("DocumentTest") -t_DocumentTest.setParseAction(documentTest) - -t_AttribNameOrWildcard = t_AttributeName | "*" -t_AttribNameOrWildcard.setName("AttribNameOrWildcard") - -t_AttributeTest = ( - Keyword("attribute") - + l_par_l - + Optional(t_AttribNameOrWildcard + Optional("," + t_TypeName)) - + l_par_r -) -t_AttributeTest.setName("AttributeTest") - -t_AttributeDeclaration = t_AttributeName -t_AttributeDeclaration.setName("AttributeDeclaration") - -t_SchemaAttributeTest = ( - Keyword("schema-attribute") + l_par_l + t_AttributeDeclaration + l_par_r -) -t_SchemaAttributeTest.setParseAction(schemaAttributeTest) -t_SchemaAttributeTest.setName("SchemaAttributeTest") - -t_CommentTest = Keyword("comment") + l_par_l + l_par_r -t_CommentTest.setParseAction(commentTest) -t_CommentTest.setName("comment") - -t_TextTest = Keyword("text") + l_par_l + l_par_r -t_TextTest.setParseAction(textTest) -t_TextTest.setName("TextTest") - -t_AnyKindTest = Keyword("node") + l_par_l + l_par_r -t_AnyKindTest.setParseAction(anyKindTest) -t_AnyKindTest.setName("AnyKindTest") - -t_PITest = ( - Keyword("processing-instruction") - + l_par_l - + Optional(t_NCName | t_StringLiteral) - + l_par_r -) -t_PITest.setParseAction(processingInstructionTest) -t_PITest.setName("Processing-InstructionTest") - -t_KindTest = ( - t_ElementTest - | t_AttributeTest - | t_SchemaElementTest - | t_SchemaAttributeTest - | t_PITest - | t_CommentTest - | t_TextTest - | t_AnyKindTest - | t_DocumentTest -) -t_KindTest.setName("KindTest") - -t_NameTest = t_QName | t_Wildcard -t_NameTest.setName("NameTest") - -t_NodeTest = t_KindTest | t_NameTest -t_NodeTest.setName("NodeTest") diff --git a/src/xpyth_parser/parse.py b/src/xpyth_parser/parse.py index 8a4bf83..f7c7186 100644 --- a/src/xpyth_parser/parse.py +++ b/src/xpyth_parser/parse.py @@ -1,8 +1,8 @@ from lxml import etree from lxml.etree import Element from typing import Union, Optional -from .grammar.expressions import t_XPath, resolve_expression, QuerySingleton -from .conversion.functions.generic import FunctionRegistry +from .grammar.expressions import t_XPath, resolve_expression +from .conversion.functions.generic import FunctionRegistry, QuerySingleton from .grammar.qualified_names import VariableRegistry @@ -24,7 +24,7 @@ def __init__( :param parseAll: Boolean passed to PyParsing. If set to true, Parsing will fail if any part of the string is not understood. :param variable_map: Dict of variables which Parameters can be mapped to. :param xml: Byte string of an XML object to be parsed - :param no_resolve: If set to True, only grammar is parsed but the expression is not resolved. + :param no_resolve: If set to True, only grammar is parsed but the expression is not resolved. This can be used for debugging. For example: parsed_expr = Parser("(1 + 2) = (2 + 1)") diff --git a/tests/input/empty_instance.xml b/tests/input/empty_instance.xml new file mode 100644 index 0000000..9080bd1 --- /dev/null +++ b/tests/input/empty_instance.xml @@ -0,0 +1,3 @@ + + + diff --git a/tests/test_parsing_path_traversal.py b/tests/test_parsing_path_traversal.py index e22ea70..55457ae 100644 --- a/tests/test_parsing_path_traversal.py +++ b/tests/test_parsing_path_traversal.py @@ -239,3 +239,15 @@ def test_predicate_paths(self): ) # todo: need to figure out while some queries are in lists, others are not. # I think I am unpacking a bit too much somewhere + + def test_empty_paths(self): + TESTDATA_FILENAME = os.path.join( + os.path.dirname(__file__), "input/empty_instance.xml" + ) + + with open(TESTDATA_FILENAME) as xml_file: + xml_bytes = bytes(xml_file.read(), encoding="utf-8") + self.assertEqual(Parser("sum(//singleOccuringElement)", xml=xml_bytes).run(), 0) + self.assertRaises(ValueError, Parser, "min(//singleOccuringElement)", xml=xml_bytes) + self.assertRaises(ValueError, Parser, "max(//singleOccuringElement)", xml=xml_bytes) + self.assertRaises(ZeroDivisionError, Parser, "avg(//singleOccuringElement)", xml=xml_bytes) diff --git a/tests/test_parsing_qualified_names.py b/tests/test_parsing_qualified_names.py index 19d809a..7214799 100644 --- a/tests/test_parsing_qualified_names.py +++ b/tests/test_parsing_qualified_names.py @@ -3,7 +3,7 @@ from pyparsing import ParseException from src.xpyth_parser.conversion.qname import QName -from src.xpyth_parser.grammar.qualified_names import t_QName, t_VarName, t_Wildcard +from src.xpyth_parser.grammar.expressions import t_QName, t_VarName, t_Wildcard class QualifiedNameTests(unittest.TestCase): diff --git a/tests/test_parsing_test_expressions.py b/tests/test_parsing_test_expressions.py index 1b191d5..fd34d7b 100644 --- a/tests/test_parsing_test_expressions.py +++ b/tests/test_parsing_test_expressions.py @@ -2,7 +2,7 @@ from src.xpyth_parser.conversion.qname import QName from src.xpyth_parser.conversion.tests import Test -from src.xpyth_parser.grammar.tests import t_KindTest +from src.xpyth_parser.grammar.expressions import t_KindTest class TestKindTests(unittest.TestCase):