From ed2f6ee6ff2a4066b9ea6b35db2868d4d2154cb5 Mon Sep 17 00:00:00 2001 From: zhengchun Date: Thu, 28 Mar 2024 14:01:44 +0800 Subject: [PATCH] - fix #93, new `mergeQuery` and `descendantOverDescendantQuery` - minor fix some issue --- build.go | 383 +++++++++++++++++++++++++++---------- operator.go | 56 ++---- query.go | 534 ++++++++++++++++++++++++++++++++++++++++++++++++---- 3 files changed, 792 insertions(+), 181 deletions(-) diff --git a/build.go b/build.go index 2977bbc..0c0be78 100644 --- a/build.go +++ b/build.go @@ -7,15 +7,39 @@ import ( type flag int -const ( - noneFlag flag = iota - filterFlag -) +var flagsEnum = struct { + None flag + SmartDesc flag + PosFilter flag + Filter flag + Condition flag +}{ + None: 0, + SmartDesc: 1, + PosFilter: 2, + Filter: 4, + Condition: 8, +} + +type builderProp int + +var builderProps = struct { + None builderProp + PosFilter builderProp + HasPosition builderProp + HasLast builderProp + NonFlat builderProp +}{ + None: 0, + PosFilter: 1, + HasPosition: 2, + HasLast: 4, + NonFlat: 8, +} // builder provides building an XPath expressions. type builder struct { - depth int - flag flag + parseDepth int firstInput query } @@ -63,23 +87,26 @@ func axisPredicate(root *axisNode) func(NodeNavigator) bool { return predicate } -// processAxisNode processes a query for the XPath axis node. -func (b *builder) processAxisNode(root *axisNode) (query, error) { +// processAxis processes a query for the XPath axis node. +func (b *builder) processAxis(root *axisNode, flags flag, props *builderProp) (query, error) { var ( - err error - qyInput query - qyOutput query - predicate = axisPredicate(root) + err error + qyInput query + qyOutput query ) + b.firstInput = nil + predicate := axisPredicate(root) if root.Input == nil { qyInput = &contextQuery{} + *props = builderProps.None } else { + inputFlags := flagsEnum.None if root.AxeType == "child" && (root.Input.Type() == nodeAxis) { if input := root.Input.(*axisNode); input.AxeType == "descendant-or-self" { var qyGrandInput query if input.Input != nil { - qyGrandInput, _ = b.processNode(input.Input) + qyGrandInput, _ = b.processNode(input.Input, flagsEnum.SmartDesc, props) } else { qyGrandInput = &contextQuery{} } @@ -97,11 +124,13 @@ func (b *builder) processAxisNode(root *axisNode) (query, error) { // fix `//*[contains(@id,"food")]//*[contains(@id,"food")]`, see https://github.com/antchfx/htmlquery/issues/52 // Skip the current node(Self:false) for the next descendants nodes. _, ok := qyGrandInput.(*contextQuery) - qyOutput = &descendantQuery{Input: qyGrandInput, Predicate: filter, Self: ok} + qyOutput = &descendantQuery{name: root.LocalName, Input: qyGrandInput, Predicate: filter, Self: ok} return qyOutput, nil } + } else if ((flags & flagsEnum.Filter) == 0) && (root.AxeType == "descendant" || root.AxeType == "descendant-or-self") { + inputFlags |= flagsEnum.SmartDesc } - qyInput, err = b.processNode(root.Input) + qyInput, err = b.processNode(root.Input, inputFlags, props) if err != nil { return nil, err } @@ -109,11 +138,13 @@ func (b *builder) processAxisNode(root *axisNode) (query, error) { switch root.AxeType { case "ancestor": - qyOutput = &ancestorQuery{Input: qyInput, Predicate: predicate} + qyOutput = &ancestorQuery{name: root.LocalName, Input: qyInput, Predicate: predicate} + *props |= builderProps.NonFlat case "ancestor-or-self": - qyOutput = &ancestorQuery{Input: qyInput, Predicate: predicate, Self: true} + qyOutput = &ancestorQuery{name: root.LocalName, Input: qyInput, Predicate: predicate, Self: true} + *props |= builderProps.NonFlat case "attribute": - qyOutput = &attributeQuery{Input: qyInput, Predicate: predicate} + qyOutput = &attributeQuery{name: root.LocalName, Input: qyInput, Predicate: predicate} case "child": filter := func(n NodeNavigator) bool { v := predicate(n) @@ -127,19 +158,35 @@ func (b *builder) processAxisNode(root *axisNode) (query, error) { } return v } - qyOutput = &childQuery{Input: qyInput, Predicate: filter} + if (*props & builderProps.NonFlat) != builderProps.None { + qyOutput = &childQuery{name: root.LocalName, Input: qyInput, Predicate: filter} + } else { + qyOutput = &cachedChildQuery{name: root.LocalName, Input: qyInput, Predicate: filter} + } case "descendant": - qyOutput = &descendantQuery{Input: qyInput, Predicate: predicate} + if (flags & flagsEnum.SmartDesc) != flagsEnum.None { + qyOutput = &descendantOverDescendantQuery{name: root.LocalName, Input: qyInput, MatchSelf: false, Predicate: predicate} + } else { + qyOutput = &descendantQuery{name: root.LocalName, Input: qyInput, Predicate: predicate} + } + *props |= builderProps.NonFlat case "descendant-or-self": - qyOutput = &descendantQuery{Input: qyInput, Predicate: predicate, Self: true} + if (flags & flagsEnum.SmartDesc) != flagsEnum.None { + qyOutput = &descendantOverDescendantQuery{name: root.LocalName, Input: qyInput, MatchSelf: true, Predicate: predicate} + } else { + qyOutput = &descendantQuery{name: root.LocalName, Input: qyInput, Predicate: predicate, Self: true} + } + *props |= builderProps.NonFlat case "following": qyOutput = &followingQuery{Input: qyInput, Predicate: predicate} + *props |= builderProps.NonFlat case "following-sibling": qyOutput = &followingQuery{Input: qyInput, Predicate: predicate, Sibling: true} case "parent": qyOutput = &parentQuery{Input: qyInput, Predicate: predicate} case "preceding": qyOutput = &precedingQuery{Input: qyInput, Predicate: predicate} + *props |= builderProps.NonFlat case "preceding-sibling": qyOutput = &precedingQuery{Input: qyInput, Predicate: predicate, Sibling: true} case "self": @@ -153,56 +200,176 @@ func (b *builder) processAxisNode(root *axisNode) (query, error) { return qyOutput, nil } +func canBeNumber(q query) bool { + if q.ValueType() != xpathResultType.Any { + return q.ValueType() == xpathResultType.Number + } + return true +} + // processFilterNode builds query for the XPath filter predicate. -func (b *builder) processFilterNode(root *filterNode) (query, error) { - b.flag |= filterFlag +func (b *builder) processFilter(root *filterNode, flags flag, props *builderProp) (query, error) { + first := (flags & flagsEnum.Filter) == 0 - qyInput, err := b.processNode(root.Input) + qyInput, err := b.processNode(root.Input, (flags | flagsEnum.Filter), props) if err != nil { return nil, err } - qyCond, err := b.processNode(root.Condition) + firstInput := b.firstInput + + var propsCond builderProp + cond, err := b.processNode(root.Condition, flags, &propsCond) if err != nil { return nil, err } - qyOutput := &filterQuery{Input: qyInput, Predicate: qyCond} - return qyOutput, nil + + // Checking whether is number + if canBeNumber(cond) || ((propsCond & (builderProps.HasPosition | builderProps.HasLast)) != 0) { + propsCond |= builderProps.HasPosition + flags |= flagsEnum.PosFilter + } + + if root.Input.Type() != nodeFilter { + *props &= ^builderProps.PosFilter + } + + if (propsCond & builderProps.HasPosition) != 0 { + *props |= builderProps.PosFilter + } + + merge := (qyInput.Properties() & queryProps.Merge) != 0 + if (propsCond & builderProps.HasPosition) != builderProps.None { + if (propsCond & builderProps.HasLast) != 0 { + // https://github.com/antchfx/xpath/issues/76 + // https://github.com/antchfx/xpath/issues/78 + if qyFunc, ok := cond.(*functionQuery); ok { + switch qyFunc.Input.(type) { + case *filterQuery: + cond = &lastQuery{Input: qyFunc.Input} + } + } + } + } + + if first && firstInput != nil { + if merge && ((*props & builderProps.PosFilter) != 0) { + qyInput = &filterQuery{Input: qyInput, Predicate: cond, NoPosition: false} + + var ( + rootQuery = &contextQuery{} + parent query + ) + switch axisQuery := firstInput.(type) { + case *ancestorQuery: + if _, ok := axisQuery.Input.(*contextQuery); !ok { + parent = axisQuery.Input + axisQuery.Input = rootQuery + } + case *attributeQuery: + if _, ok := axisQuery.Input.(*contextQuery); !ok { + parent = axisQuery.Input + axisQuery.Input = rootQuery + } + case *childQuery: + if _, ok := axisQuery.Input.(*contextQuery); !ok { + parent = axisQuery.Input + axisQuery.Input = rootQuery + } + case *cachedChildQuery: + if _, ok := axisQuery.Input.(*contextQuery); !ok { + parent = axisQuery.Input + axisQuery.Input = rootQuery + } + case *descendantQuery: + if _, ok := axisQuery.Input.(*contextQuery); !ok { + parent = axisQuery.Input + axisQuery.Input = rootQuery + } + case *followingQuery: + if _, ok := axisQuery.Input.(*contextQuery); !ok { + parent = axisQuery.Input + axisQuery.Input = rootQuery + } + case *precedingQuery: + if _, ok := axisQuery.Input.(*contextQuery); !ok { + parent = axisQuery.Input + axisQuery.Input = rootQuery + } + case *parentQuery: + if _, ok := axisQuery.Input.(*contextQuery); !ok { + parent = axisQuery.Input + axisQuery.Input = rootQuery + } + case *selfQuery: + if _, ok := axisQuery.Input.(*contextQuery); !ok { + parent = axisQuery.Input + axisQuery.Input = rootQuery + } + case *groupQuery: + if _, ok := axisQuery.Input.(*contextQuery); !ok { + parent = axisQuery.Input + axisQuery.Input = rootQuery + } + case *descendantOverDescendantQuery: + if _, ok := axisQuery.Input.(*contextQuery); !ok { + parent = axisQuery.Input + axisQuery.Input = rootQuery + } + } + b.firstInput = nil + if parent != nil { + return &mergeQuery{Input: parent, Child: qyInput}, nil + } + return qyInput, nil + } + b.firstInput = nil + } + + resultQuery := &filterQuery{ + Input: qyInput, + Predicate: cond, + NoPosition: (propsCond & builderProps.HasPosition) == 0, + } + return resultQuery, nil } // processFunctionNode processes query for the XPath function node. -func (b *builder) processFunctionNode(root *functionNode) (query, error) { +func (b *builder) processFunction(root *functionNode, props *builderProp) (query, error) { + // Reset builder props + *props = builderProps.None + var qyOutput query switch root.FuncName { case "starts-with": - arg1, err := b.processNode(root.Args[0]) + arg1, err := b.processNode(root.Args[0], flagsEnum.None, props) if err != nil { return nil, err } - arg2, err := b.processNode(root.Args[1]) + arg2, err := b.processNode(root.Args[1], flagsEnum.None, props) if err != nil { return nil, err } - qyOutput = &functionQuery{Input: b.firstInput, Func: startwithFunc(arg1, arg2)} + qyOutput = &functionQuery{Func: startwithFunc(arg1, arg2)} case "ends-with": - arg1, err := b.processNode(root.Args[0]) + arg1, err := b.processNode(root.Args[0], flagsEnum.None, props) if err != nil { return nil, err } - arg2, err := b.processNode(root.Args[1]) + arg2, err := b.processNode(root.Args[1], flagsEnum.None, props) if err != nil { return nil, err } - qyOutput = &functionQuery{Input: b.firstInput, Func: endwithFunc(arg1, arg2)} + qyOutput = &functionQuery{Func: endwithFunc(arg1, arg2)} case "contains": - arg1, err := b.processNode(root.Args[0]) + arg1, err := b.processNode(root.Args[0], flagsEnum.None, props) if err != nil { return nil, err } - arg2, err := b.processNode(root.Args[1]) + arg2, err := b.processNode(root.Args[1], flagsEnum.None, props) if err != nil { return nil, err } - qyOutput = &functionQuery{Input: b.firstInput, Func: containsFunc(arg1, arg2)} + qyOutput = &functionQuery{Func: containsFunc(arg1, arg2)} case "matches": //matches(string , pattern) if len(root.Args) != 2 { @@ -212,10 +379,10 @@ func (b *builder) processFunctionNode(root *functionNode) (query, error) { arg1, arg2 query err error ) - if arg1, err = b.processNode(root.Args[0]); err != nil { + if arg1, err = b.processNode(root.Args[0], flagsEnum.None, props); err != nil { return nil, err } - if arg2, err = b.processNode(root.Args[1]); err != nil { + if arg2, err = b.processNode(root.Args[1], flagsEnum.None, props); err != nil { return nil, err } // Issue #92, testing the regular expression before. @@ -224,7 +391,7 @@ func (b *builder) processFunctionNode(root *functionNode) (query, error) { return nil, fmt.Errorf("matches() got error. %v", err) } } - qyOutput = &functionQuery{Input: b.firstInput, Func: matchesFunc(arg1, arg2)} + qyOutput = &functionQuery{Func: matchesFunc(arg1, arg2)} case "substring": //substring( string , start [, length] ) if len(root.Args) < 2 { @@ -234,18 +401,18 @@ func (b *builder) processFunctionNode(root *functionNode) (query, error) { arg1, arg2, arg3 query err error ) - if arg1, err = b.processNode(root.Args[0]); err != nil { + if arg1, err = b.processNode(root.Args[0], flagsEnum.None, props); err != nil { return nil, err } - if arg2, err = b.processNode(root.Args[1]); err != nil { + if arg2, err = b.processNode(root.Args[1], flagsEnum.None, props); err != nil { return nil, err } if len(root.Args) == 3 { - if arg3, err = b.processNode(root.Args[2]); err != nil { + if arg3, err = b.processNode(root.Args[2], flagsEnum.None, props); err != nil { return nil, err } } - qyOutput = &functionQuery{Input: b.firstInput, Func: substringFunc(arg1, arg2, arg3)} + qyOutput = &functionQuery{Func: substringFunc(arg1, arg2, arg3)} case "substring-before", "substring-after": //substring-xxxx( haystack, needle ) if len(root.Args) != 2 { @@ -255,31 +422,30 @@ func (b *builder) processFunctionNode(root *functionNode) (query, error) { arg1, arg2 query err error ) - if arg1, err = b.processNode(root.Args[0]); err != nil { + if arg1, err = b.processNode(root.Args[0], flagsEnum.None, props); err != nil { return nil, err } - if arg2, err = b.processNode(root.Args[1]); err != nil { + if arg2, err = b.processNode(root.Args[1], flagsEnum.None, props); err != nil { return nil, err } qyOutput = &functionQuery{ - Input: b.firstInput, - Func: substringIndFunc(arg1, arg2, root.FuncName == "substring-after"), + Func: substringIndFunc(arg1, arg2, root.FuncName == "substring-after"), } case "string-length": // string-length( [string] ) if len(root.Args) < 1 { return nil, errors.New("xpath: string-length function must have at least one parameter") } - arg1, err := b.processNode(root.Args[0]) + arg1, err := b.processNode(root.Args[0], flagsEnum.None, props) if err != nil { return nil, err } - qyOutput = &functionQuery{Input: b.firstInput, Func: stringLengthFunc(arg1)} + qyOutput = &functionQuery{Func: stringLengthFunc(arg1)} case "normalize-space": if len(root.Args) == 0 { return nil, errors.New("xpath: normalize-space function must have at least one parameter") } - argQuery, err := b.processNode(root.Args[0]) + argQuery, err := b.processNode(root.Args[0], flagsEnum.None, props) if err != nil { return nil, err } @@ -293,16 +459,16 @@ func (b *builder) processFunctionNode(root *functionNode) (query, error) { arg1, arg2, arg3 query err error ) - if arg1, err = b.processNode(root.Args[0]); err != nil { + if arg1, err = b.processNode(root.Args[0], flagsEnum.None, props); err != nil { return nil, err } - if arg2, err = b.processNode(root.Args[1]); err != nil { + if arg2, err = b.processNode(root.Args[1], flagsEnum.None, props); err != nil { return nil, err } - if arg3, err = b.processNode(root.Args[2]); err != nil { + if arg3, err = b.processNode(root.Args[2], flagsEnum.None, props); err != nil { return nil, err } - qyOutput = &functionQuery{Input: b.firstInput, Func: replaceFunc(arg1, arg2, arg3)} + qyOutput = &functionQuery{Func: replaceFunc(arg1, arg2, arg3)} case "translate": //translate( string , string, string ) if len(root.Args) != 3 { @@ -312,21 +478,21 @@ func (b *builder) processFunctionNode(root *functionNode) (query, error) { arg1, arg2, arg3 query err error ) - if arg1, err = b.processNode(root.Args[0]); err != nil { + if arg1, err = b.processNode(root.Args[0], flagsEnum.None, props); err != nil { return nil, err } - if arg2, err = b.processNode(root.Args[1]); err != nil { + if arg2, err = b.processNode(root.Args[1], flagsEnum.None, props); err != nil { return nil, err } - if arg3, err = b.processNode(root.Args[2]); err != nil { + if arg3, err = b.processNode(root.Args[2], flagsEnum.None, props); err != nil { return nil, err } - qyOutput = &functionQuery{Input: b.firstInput, Func: translateFunc(arg1, arg2, arg3)} + qyOutput = &functionQuery{Func: translateFunc(arg1, arg2, arg3)} case "not": if len(root.Args) == 0 { return nil, errors.New("xpath: not function must have at least one parameter") } - argQuery, err := b.processNode(root.Args[0]) + argQuery, err := b.processNode(root.Args[0], flagsEnum.None, props) if err != nil { return nil, err } @@ -340,46 +506,46 @@ func (b *builder) processFunctionNode(root *functionNode) (query, error) { err error ) if len(root.Args) == 1 { - arg, err = b.processNode(root.Args[0]) + arg, err = b.processNode(root.Args[0], flagsEnum.None, props) if err != nil { return nil, err } } switch root.FuncName { case "name": - qyOutput = &functionQuery{Input: b.firstInput, Func: nameFunc(arg)} + qyOutput = &functionQuery{Func: nameFunc(arg)} case "local-name": - qyOutput = &functionQuery{Input: b.firstInput, Func: localNameFunc(arg)} + qyOutput = &functionQuery{Func: localNameFunc(arg)} case "namespace-uri": - qyOutput = &functionQuery{Input: b.firstInput, Func: namespaceFunc(arg)} + qyOutput = &functionQuery{Func: namespaceFunc(arg)} } case "true", "false": val := root.FuncName == "true" qyOutput = &functionQuery{ - Input: b.firstInput, Func: func(_ query, _ iterator) interface{} { return val }, } case "last": - switch typ := b.firstInput.(type) { - case *groupQuery, *filterQuery: - // https://github.com/antchfx/xpath/issues/76 - // https://github.com/antchfx/xpath/issues/78 - qyOutput = &lastQuery{Input: typ} - default: - qyOutput = &functionQuery{Input: b.firstInput, Func: lastFunc} - } - + //switch typ := b.firstInput.(type) { + //case *groupQuery, *filterQuery: + // https://github.com/antchfx/xpath/issues/76 + // https://github.com/antchfx/xpath/issues/78 + //qyOutput = &lastQuery{Input: typ} + //default: + qyOutput = &functionQuery{Func: lastFunc} + //} + *props |= builderProps.HasLast case "position": - qyOutput = &functionQuery{Input: b.firstInput, Func: positionFunc} + qyOutput = &functionQuery{Func: positionFunc} + *props |= builderProps.HasPosition case "boolean", "number", "string": - inp := b.firstInput + var inp query if len(root.Args) > 1 { return nil, fmt.Errorf("xpath: %s function must have at most one parameter", root.FuncName) } if len(root.Args) == 1 { - argQuery, err := b.processNode(root.Args[0]) + argQuery, err := b.processNode(root.Args[0], flagsEnum.None, props) if err != nil { return nil, err } @@ -396,13 +562,10 @@ func (b *builder) processFunctionNode(root *functionNode) (query, error) { } qyOutput = f case "count": - //if b.firstInput == nil { - // return nil, errors.New("xpath: expression must evaluate to node-set") - //} if len(root.Args) == 0 { return nil, fmt.Errorf("xpath: count(node-sets) function must with have parameters node-sets") } - argQuery, err := b.processNode(root.Args[0]) + argQuery, err := b.processNode(root.Args[0], flagsEnum.None, props) if err != nil { return nil, err } @@ -411,7 +574,7 @@ func (b *builder) processFunctionNode(root *functionNode) (query, error) { if len(root.Args) == 0 { return nil, fmt.Errorf("xpath: sum(node-sets) function must with have parameters node-sets") } - argQuery, err := b.processNode(root.Args[0]) + argQuery, err := b.processNode(root.Args[0], flagsEnum.None, props) if err != nil { return nil, err } @@ -420,7 +583,7 @@ func (b *builder) processFunctionNode(root *functionNode) (query, error) { if len(root.Args) == 0 { return nil, fmt.Errorf("xpath: ceiling(node-sets) function must with have parameters node-sets") } - argQuery, err := b.processNode(root.Args[0]) + argQuery, err := b.processNode(root.Args[0], flagsEnum.None, props) if err != nil { return nil, err } @@ -440,18 +603,18 @@ func (b *builder) processFunctionNode(root *functionNode) (query, error) { } var args []query for _, v := range root.Args { - q, err := b.processNode(v) + q, err := b.processNode(v, flagsEnum.None, props) if err != nil { return nil, err } args = append(args, q) } - qyOutput = &functionQuery{Input: b.firstInput, Func: concatFunc(args...)} + qyOutput = &functionQuery{Func: concatFunc(args...)} case "reverse": if len(root.Args) == 0 { return nil, fmt.Errorf("xpath: reverse(node-sets) function must with have parameters node-sets") } - argQuery, err := b.processNode(root.Args[0]) + argQuery, err := b.processNode(root.Args[0], flagsEnum.None, props) if err != nil { return nil, err } @@ -460,11 +623,11 @@ func (b *builder) processFunctionNode(root *functionNode) (query, error) { if len(root.Args) != 2 { return nil, fmt.Errorf("xpath: string-join(node-sets, separator) function requires node-set and argument") } - argQuery, err := b.processNode(root.Args[0]) + argQuery, err := b.processNode(root.Args[0], flagsEnum.None, props) if err != nil { return nil, err } - arg1, err := b.processNode(root.Args[1]) + arg1, err := b.processNode(root.Args[1], flagsEnum.None, props) if err != nil { return nil, err } @@ -472,18 +635,29 @@ func (b *builder) processFunctionNode(root *functionNode) (query, error) { default: return nil, fmt.Errorf("not yet support this function %s()", root.FuncName) } + + if funcQuery, ok := qyOutput.(*functionQuery); ok && funcQuery.Input == nil { + funcQuery.Input = b.firstInput + } return qyOutput, nil } -func (b *builder) processOperatorNode(root *operatorNode) (query, error) { - left, err := b.processNode(root.Left) +func (b *builder) processOperator(root *operatorNode, props *builderProp) (query, error) { + var ( + leftProp builderProp + rightProp builderProp + ) + + left, err := b.processNode(root.Left, flagsEnum.None, &leftProp) if err != nil { return nil, err } - right, err := b.processNode(root.Right) + right, err := b.processNode(root.Right, flagsEnum.None, &rightProp) if err != nil { return nil, err } + *props = leftProp | rightProp + var qyOutput query switch root.Op { case "+", "-", "*", "div", "mod": // Numeric operator @@ -525,41 +699,45 @@ func (b *builder) processOperatorNode(root *operatorNode) (query, error) { } qyOutput = &booleanQuery{Left: left, Right: right, IsOr: isOr} case "|": + *props |= builderProps.NonFlat qyOutput = &unionQuery{Left: left, Right: right} } return qyOutput, nil } -func (b *builder) processNode(root node) (q query, err error) { - if b.depth = b.depth + 1; b.depth > 1024 { +func (b *builder) processNode(root node, flags flag, props *builderProp) (q query, err error) { + if b.parseDepth = b.parseDepth + 1; b.parseDepth > 1024 { err = errors.New("the xpath expressions is too complex") return } - + *props = builderProps.None switch root.Type() { case nodeConstantOperand: n := root.(*operandNode) q = &constantQuery{Val: n.Val} case nodeRoot: - q = &contextQuery{Root: true} + q = &absoluteQuery{} case nodeAxis: - q, err = b.processAxisNode(root.(*axisNode)) + q, err = b.processAxis(root.(*axisNode), flags, props) b.firstInput = q case nodeFilter: - q, err = b.processFilterNode(root.(*filterNode)) + q, err = b.processFilter(root.(*filterNode), flags, props) b.firstInput = q case nodeFunction: - q, err = b.processFunctionNode(root.(*functionNode)) + q, err = b.processFunction(root.(*functionNode), props) case nodeOperator: - q, err = b.processOperatorNode(root.(*operatorNode)) + q, err = b.processOperator(root.(*operatorNode), props) case nodeGroup: - q, err = b.processNode(root.(*groupNode).Input) + q, err = b.processNode(root.(*groupNode).Input, flagsEnum.None, props) if err != nil { return } q = &groupQuery{Input: q} - b.firstInput = q + if b.firstInput == nil { + b.firstInput = q + } } + b.parseDepth-- return } @@ -579,5 +757,6 @@ func build(expr string, namespaces map[string]string) (q query, err error) { }() root := parse(expr, namespaces) b := &builder{} - return b.processNode(root) + props := builderProps.None + return b.processNode(root, flagsEnum.None, &props) } diff --git a/operator.go b/operator.go index eb38ac6..12aadc1 100644 --- a/operator.go +++ b/operator.go @@ -1,40 +1,12 @@ package xpath import ( - "fmt" "reflect" "strconv" ) // The XPath number operator function list. -// valueType is a return value type. -type valueType int - -const ( - booleanType valueType = iota - numberType - stringType - nodeSetType -) - -func getValueType(i interface{}) valueType { - v := reflect.ValueOf(i) - switch v.Kind() { - case reflect.Float64: - return numberType - case reflect.String: - return stringType - case reflect.Bool: - return booleanType - default: - if _, ok := i.(query); ok { - return nodeSetType - } - } - panic(fmt.Errorf("xpath unknown value type: %v", v.Kind())) -} - type logical func(iterator, string, interface{}, interface{}) bool var logicalFuncs = [][]logical{ @@ -228,50 +200,50 @@ func cmpBooleanBoolean(t iterator, op string, m, n interface{}) bool { // eqFunc is an `=` operator. func eqFunc(t iterator, m, n interface{}) interface{} { - t1 := getValueType(m) - t2 := getValueType(n) + t1 := getXPathType(m) + t2 := getXPathType(n) return logicalFuncs[t1][t2](t, "=", m, n) } // gtFunc is an `>` operator. func gtFunc(t iterator, m, n interface{}) interface{} { - t1 := getValueType(m) - t2 := getValueType(n) + t1 := getXPathType(m) + t2 := getXPathType(n) return logicalFuncs[t1][t2](t, ">", m, n) } // geFunc is an `>=` operator. func geFunc(t iterator, m, n interface{}) interface{} { - t1 := getValueType(m) - t2 := getValueType(n) + t1 := getXPathType(m) + t2 := getXPathType(n) return logicalFuncs[t1][t2](t, ">=", m, n) } // ltFunc is an `<` operator. func ltFunc(t iterator, m, n interface{}) interface{} { - t1 := getValueType(m) - t2 := getValueType(n) + t1 := getXPathType(m) + t2 := getXPathType(n) return logicalFuncs[t1][t2](t, "<", m, n) } // leFunc is an `<=` operator. func leFunc(t iterator, m, n interface{}) interface{} { - t1 := getValueType(m) - t2 := getValueType(n) + t1 := getXPathType(m) + t2 := getXPathType(n) return logicalFuncs[t1][t2](t, "<=", m, n) } // neFunc is an `!=` operator. func neFunc(t iterator, m, n interface{}) interface{} { - t1 := getValueType(m) - t2 := getValueType(n) + t1 := getXPathType(m) + t2 := getXPathType(n) return logicalFuncs[t1][t2](t, "!=", m, n) } // orFunc is an `or` operator. var orFunc = func(t iterator, m, n interface{}) interface{} { - t1 := getValueType(m) - t2 := getValueType(n) + t1 := getXPathType(m) + t2 := getXPathType(n) return logicalFuncs[t1][t2](t, "or", m, n) } diff --git a/query.go b/query.go index 4e6c634..d5a58ca 100644 --- a/query.go +++ b/query.go @@ -7,6 +7,44 @@ import ( "reflect" ) +// The return type of the XPath expression. +type resultType int + +var xpathResultType = struct { + Boolean resultType + // A numeric value + Number resultType + String resultType + // A node collection. + NodeSet resultType + // Any of the XPath node types. + Any resultType +}{ + Boolean: 0, + Number: 1, + String: 2, + NodeSet: 3, + Any: 4, +} + +type queryProp int + +var queryProps = struct { + None queryProp + Position queryProp + Count queryProp + Cached queryProp + Reverse queryProp + Merge queryProp +}{ + None: 0, + Position: 1, + Count: 2, + Cached: 4, + Reverse: 8, + Merge: 16, +} + type iterator interface { Current() NodeNavigator } @@ -20,12 +58,15 @@ type query interface { Evaluate(iterator) interface{} Clone() query + + // ValueType returns the value type of the current query. + ValueType() resultType + + Properties() queryProp } // nopQuery is an empty query that always return nil for any query. -type nopQuery struct { - query -} +type nopQuery struct{} func (nopQuery) Select(iterator) NodeNavigator { return nil } @@ -33,21 +74,23 @@ func (nopQuery) Evaluate(iterator) interface{} { return nil } func (nopQuery) Clone() query { return nopQuery{} } +func (nopQuery) ValueType() resultType { return xpathResultType.NodeSet } + +func (nopQuery) Properties() queryProp { + return queryProps.Merge | queryProps.Position | queryProps.Count | queryProps.Cached +} + // contextQuery is returns current node on the iterator object query. type contextQuery struct { count int - Root bool // Moving to root-level node in the current context iterator. } -func (c *contextQuery) Select(t iterator) (n NodeNavigator) { - if c.count == 0 { - c.count++ - n = t.Current().Copy() - if c.Root { - n.MoveToRoot() - } +func (c *contextQuery) Select(t iterator) NodeNavigator { + if c.count > 0 { + return nil } - return n + c.count++ + return t.Current().Copy() } func (c *contextQuery) Evaluate(iterator) interface{} { @@ -56,12 +99,53 @@ func (c *contextQuery) Evaluate(iterator) interface{} { } func (c *contextQuery) Clone() query { - return &contextQuery{Root: c.Root} + return &contextQuery{} +} + +func (c *contextQuery) ValueType() resultType { + return xpathResultType.NodeSet +} + +func (c *contextQuery) Properties() queryProp { + return queryProps.Merge | queryProps.Position | queryProps.Count | queryProps.Cached +} + +type absoluteQuery struct { + count int +} + +func (a *absoluteQuery) Select(t iterator) (n NodeNavigator) { + if a.count > 0 { + return + } + a.count++ + n = t.Current().Copy() + n.MoveToRoot() + return +} + +func (a *absoluteQuery) Evaluate(t iterator) interface{} { + a.count = 0 + return a +} + +func (a *absoluteQuery) Clone() query { + return &absoluteQuery{} +} + +func (a *absoluteQuery) ValueType() resultType { + return xpathResultType.NodeSet +} + +func (a *absoluteQuery) Properties() queryProp { + return queryProps.Merge | queryProps.Position | queryProps.Count | queryProps.Cached } // ancestorQuery is an XPath ancestor node query.(ancestor::*|ancestor-self::*) type ancestorQuery struct { + name string iterator func() NodeNavigator + table map[uint64]bool Self bool Input query @@ -69,6 +153,10 @@ type ancestorQuery struct { } func (a *ancestorQuery) Select(t iterator) NodeNavigator { + if a.table == nil { + a.table = make(map[uint64]bool) + } + for { if a.iterator == nil { node := a.Input.Select(t) @@ -78,24 +166,27 @@ func (a *ancestorQuery) Select(t iterator) NodeNavigator { first := true node = node.Copy() a.iterator = func() NodeNavigator { - if first && a.Self { + if first { first = false - if a.Predicate(node) { + if a.Self && a.Predicate(node) { return node } } for node.MoveToParent() { - if !a.Predicate(node) { - continue + if a.Predicate(node) { + return node } - return node } return nil } } - if node := a.iterator(); node != nil { - return node + for node := a.iterator(); node != nil; node = a.iterator() { + node_id := getHashCode(node.Copy()) + if _, ok := a.table[node_id]; !ok { + a.table[node_id] = true + return node + } } a.iterator = nil } @@ -112,11 +203,20 @@ func (a *ancestorQuery) Test(n NodeNavigator) bool { } func (a *ancestorQuery) Clone() query { - return &ancestorQuery{Self: a.Self, Input: a.Input.Clone(), Predicate: a.Predicate} + return &ancestorQuery{name: a.name, Self: a.Self, Input: a.Input.Clone(), Predicate: a.Predicate} +} + +func (a *ancestorQuery) ValueType() resultType { + return xpathResultType.NodeSet +} + +func (a *ancestorQuery) Properties() queryProp { + return queryProps.Position | queryProps.Count | queryProps.Cached | queryProps.Merge | queryProps.Reverse } // attributeQuery is an XPath attribute node query.(@*) type attributeQuery struct { + name string iterator func() NodeNavigator Input query @@ -162,11 +262,20 @@ func (a *attributeQuery) Test(n NodeNavigator) bool { } func (a *attributeQuery) Clone() query { - return &attributeQuery{Input: a.Input.Clone(), Predicate: a.Predicate} + return &attributeQuery{name: a.name, Input: a.Input.Clone(), Predicate: a.Predicate} +} + +func (a *attributeQuery) ValueType() resultType { + return xpathResultType.NodeSet +} + +func (a *attributeQuery) Properties() queryProp { + return queryProps.Merge } // childQuery is an XPath child node query.(child::*) type childQuery struct { + name string posit int iterator func() NodeNavigator @@ -216,7 +325,15 @@ func (c *childQuery) Test(n NodeNavigator) bool { } func (c *childQuery) Clone() query { - return &childQuery{Input: c.Input.Clone(), Predicate: c.Predicate} + return &childQuery{name: c.name, Input: c.Input.Clone(), Predicate: c.Predicate} +} + +func (c *childQuery) ValueType() resultType { + return xpathResultType.NodeSet +} + +func (c *childQuery) Properties() queryProp { + return queryProps.Merge } // position returns a position of current NodeNavigator. @@ -224,8 +341,75 @@ func (c *childQuery) position() int { return c.posit } +type cachedChildQuery struct { + name string + posit int + iterator func() NodeNavigator + + Input query + Predicate func(NodeNavigator) bool +} + +func (c *cachedChildQuery) Select(t iterator) NodeNavigator { + for { + if c.iterator == nil { + c.posit = 0 + node := c.Input.Select(t) + if node == nil { + return nil + } + node = node.Copy() + first := true + c.iterator = func() NodeNavigator { + for { + if (first && !node.MoveToChild()) || (!first && !node.MoveToNext()) { + return nil + } + first = false + if c.Predicate(node) { + return node + } + } + } + } + + if node := c.iterator(); node != nil { + c.posit++ + return node + } + c.iterator = nil + } +} + +func (c *cachedChildQuery) Evaluate(t iterator) interface{} { + c.Input.Evaluate(t) + c.iterator = nil + return c +} + +func (c *cachedChildQuery) position() int { + return c.posit +} + +func (c *cachedChildQuery) Test(n NodeNavigator) bool { + return c.Predicate(n) +} + +func (c *cachedChildQuery) Clone() query { + return &childQuery{name: c.name, Input: c.Input.Clone(), Predicate: c.Predicate} +} + +func (c *cachedChildQuery) ValueType() resultType { + return xpathResultType.NodeSet +} + +func (c *cachedChildQuery) Properties() queryProp { + return queryProps.Merge +} + // descendantQuery is an XPath descendant node query.(descendant::* | descendant-or-self::*) type descendantQuery struct { + name string iterator func() NodeNavigator posit int level int @@ -245,14 +429,11 @@ func (d *descendantQuery) Select(t iterator) NodeNavigator { } node = node.Copy() d.level = 0 - positmap := make(map[int]int) first := true d.iterator = func() NodeNavigator { - if first && d.Self { + if first { first = false - if d.Predicate(node) { - d.posit = 1 - positmap[d.level] = 1 + if d.Self && d.Predicate(node) { return node } } @@ -260,7 +441,6 @@ func (d *descendantQuery) Select(t iterator) NodeNavigator { for { if node.MoveToChild() { d.level = d.level + 1 - positmap[d.level] = 0 } else { for { if d.level == 0 { @@ -274,8 +454,6 @@ func (d *descendantQuery) Select(t iterator) NodeNavigator { } } if d.Predicate(node) { - positmap[d.level]++ - d.posit = positmap[d.level] return node } } @@ -283,6 +461,7 @@ func (d *descendantQuery) Select(t iterator) NodeNavigator { } if node := d.iterator(); node != nil { + d.posit++ return node } d.iterator = nil @@ -309,7 +488,15 @@ func (d *descendantQuery) depth() int { } func (d *descendantQuery) Clone() query { - return &descendantQuery{Self: d.Self, Input: d.Input.Clone(), Predicate: d.Predicate} + return &descendantQuery{name: d.name, Self: d.Self, Input: d.Input.Clone(), Predicate: d.Predicate} +} + +func (d *descendantQuery) ValueType() resultType { + return xpathResultType.NodeSet +} + +func (d *descendantQuery) Properties() queryProp { + return queryProps.Merge } // followingQuery is an XPath following node query.(following::*|following-sibling::*) @@ -390,6 +577,14 @@ func (f *followingQuery) Clone() query { return &followingQuery{Input: f.Input.Clone(), Sibling: f.Sibling, Predicate: f.Predicate} } +func (f *followingQuery) ValueType() resultType { + return xpathResultType.NodeSet +} + +func (f *followingQuery) Properties() queryProp { + return queryProps.Merge +} + func (f *followingQuery) position() int { return f.posit } @@ -471,17 +666,30 @@ func (p *precedingQuery) Clone() query { return &precedingQuery{Input: p.Input.Clone(), Sibling: p.Sibling, Predicate: p.Predicate} } +func (p *precedingQuery) ValueType() resultType { + return xpathResultType.NodeSet +} + +func (p *precedingQuery) Properties() queryProp { + return queryProps.Merge | queryProps.Reverse +} + func (p *precedingQuery) position() int { return p.posit } // parentQuery is an XPath parent node query.(parent::*) type parentQuery struct { + table map[uint64]bool Input query Predicate func(NodeNavigator) bool } func (p *parentQuery) Select(t iterator) NodeNavigator { + if p.table == nil { + p.table = make(map[uint64]bool) + } + for { node := p.Input.Select(t) if node == nil { @@ -489,7 +697,11 @@ func (p *parentQuery) Select(t iterator) NodeNavigator { } node = node.Copy() if node.MoveToParent() && p.Predicate(node) { - return node + id := getHashCode(node.Copy()) + if _, ok := p.table[id]; !ok { + p.table[id] = true + return node + } } } } @@ -503,6 +715,14 @@ func (p *parentQuery) Clone() query { return &parentQuery{Input: p.Input.Clone(), Predicate: p.Predicate} } +func (p *parentQuery) ValueType() resultType { + return xpathResultType.NodeSet +} + +func (p *parentQuery) Properties() queryProp { + return queryProps.Position | queryProps.Count | queryProps.Cached | queryProps.Merge +} + func (p *parentQuery) Test(n NodeNavigator) bool { return p.Predicate(n) } @@ -539,12 +759,22 @@ func (s *selfQuery) Clone() query { return &selfQuery{Input: s.Input.Clone(), Predicate: s.Predicate} } +func (s *selfQuery) ValueType() resultType { + return xpathResultType.NodeSet +} + +func (s *selfQuery) Properties() queryProp { + return queryProps.Merge +} + // filterQuery is an XPath query for predicate filter. type filterQuery struct { - Input query - Predicate query - posit int - positmap map[int]int + Input query + Predicate query + NoPosition bool + + posit int + positmap map[int]int } func (f *filterQuery) do(t iterator) bool { @@ -602,6 +832,14 @@ func (f *filterQuery) Clone() query { return &filterQuery{Input: f.Input.Clone(), Predicate: f.Predicate.Clone()} } +func (f *filterQuery) ValueType() resultType { + return xpathResultType.NodeSet +} + +func (f *filterQuery) Properties() queryProp { + return (queryProps.Position | f.Input.Properties()) & (queryProps.Reverse | queryProps.Merge) +} + // functionQuery is an XPath function that returns a computed value for // the Evaluate call of the current NodeNavigator node. Select call isn't // applicable for functionQuery. @@ -624,6 +862,14 @@ func (f *functionQuery) Clone() query { return &functionQuery{Input: f.Input.Clone(), Func: f.Func} } +func (f *functionQuery) ValueType() resultType { + return xpathResultType.Any +} + +func (f *functionQuery) Properties() queryProp { + return queryProps.Merge +} + // transformFunctionQuery diffs from functionQuery where the latter computes a scalar // value (number,string,boolean) for the current NodeNavigator node while the former // (transformFunctionQuery) performs a mapping or transform of the current NodeNavigator @@ -652,6 +898,14 @@ func (f *transformFunctionQuery) Clone() query { return &transformFunctionQuery{Input: f.Input.Clone(), Func: f.Func} } +func (f *transformFunctionQuery) ValueType() resultType { + return xpathResultType.Any +} + +func (f *transformFunctionQuery) Properties() queryProp { + return queryProps.Merge +} + // constantQuery is an XPath constant operand. type constantQuery struct { Val interface{} @@ -669,6 +923,14 @@ func (c *constantQuery) Clone() query { return c } +func (c *constantQuery) ValueType() resultType { + return getXPathType(c.Val) +} + +func (c *constantQuery) Properties() queryProp { + return queryProps.Position | queryProps.Count | queryProps.Cached | queryProps.Merge +} + type groupQuery struct { posit int @@ -692,6 +954,14 @@ func (g *groupQuery) Clone() query { return &groupQuery{Input: g.Input.Clone()} } +func (g *groupQuery) ValueType() resultType { + return g.Input.ValueType() +} + +func (g *groupQuery) Properties() queryProp { + return queryProps.Position +} + func (g *groupQuery) position() int { return g.posit } @@ -726,6 +996,14 @@ func (l *logicalQuery) Clone() query { return &logicalQuery{Left: l.Left.Clone(), Right: l.Right.Clone(), Do: l.Do} } +func (l *logicalQuery) ValueType() resultType { + return xpathResultType.Boolean +} + +func (l *logicalQuery) Properties() queryProp { + return queryProps.Merge +} + // numericQuery is an XPath numeric operator expression. type numericQuery struct { Left, Right query @@ -747,6 +1025,14 @@ func (n *numericQuery) Clone() query { return &numericQuery{Left: n.Left.Clone(), Right: n.Right.Clone(), Do: n.Do} } +func (n *numericQuery) ValueType() resultType { + return xpathResultType.Number +} + +func (n *numericQuery) Properties() queryProp { + return queryProps.Merge +} + type booleanQuery struct { IsOr bool Left, Right query @@ -837,6 +1123,14 @@ func (b *booleanQuery) Clone() query { return &booleanQuery{IsOr: b.IsOr, Left: b.Left.Clone(), Right: b.Right.Clone()} } +func (b *booleanQuery) ValueType() resultType { + return xpathResultType.Boolean +} + +func (b *booleanQuery) Properties() queryProp { + return queryProps.Merge +} + type unionQuery struct { Left, Right query iterator func() NodeNavigator @@ -894,6 +1188,14 @@ func (u *unionQuery) Clone() query { return &unionQuery{Left: u.Left.Clone(), Right: u.Right.Clone()} } +func (u *unionQuery) ValueType() resultType { + return xpathResultType.NodeSet +} + +func (u *unionQuery) Properties() queryProp { + return queryProps.Merge +} + type lastQuery struct { buffer []NodeNavigator counted bool @@ -923,6 +1225,147 @@ func (q *lastQuery) Clone() query { return &lastQuery{Input: q.Input.Clone()} } +func (q *lastQuery) ValueType() resultType { + return xpathResultType.Number +} + +func (q *lastQuery) Properties() queryProp { + return queryProps.Merge +} + +type descendantOverDescendantQuery struct { + name string + level int + posit int + currentNode NodeNavigator + + Input query + MatchSelf bool + Predicate func(NodeNavigator) bool +} + +func (d *descendantOverDescendantQuery) moveToFirstChild() bool { + if d.currentNode.MoveToChild() { + d.level++ + return true + } + return false +} + +func (d *descendantOverDescendantQuery) moveUpUntilNext() bool { + for !d.currentNode.MoveToNext() { + d.level-- + if d.level == 0 { + return false + } + d.currentNode.MoveToParent() + } + return true +} + +func (d *descendantOverDescendantQuery) Select(t iterator) NodeNavigator { + for { + if d.level == 0 { + node := d.Input.Select(t) + if node == nil { + return nil + } + d.currentNode = node.Copy() + d.posit = 0 + if d.MatchSelf && d.Predicate(d.currentNode) { + d.posit = 1 + return d.currentNode + } + d.moveToFirstChild() + } else if !d.moveUpUntilNext() { + continue + } + for ok := true; ok; ok = d.moveToFirstChild() { + if d.Predicate(d.currentNode) { + d.posit++ + return d.currentNode + } + } + } +} + +func (d *descendantOverDescendantQuery) Evaluate(t iterator) interface{} { + d.Input.Evaluate(t) + return d +} + +func (d *descendantOverDescendantQuery) Clone() query { + return &descendantOverDescendantQuery{Input: d.Input.Clone(), Predicate: d.Predicate, MatchSelf: d.MatchSelf} +} + +func (d *descendantOverDescendantQuery) ValueType() resultType { + return xpathResultType.NodeSet +} + +func (d *descendantOverDescendantQuery) Properties() queryProp { + return queryProps.Merge +} + +func (d *descendantOverDescendantQuery) position() int { + return d.posit +} + +type mergeQuery struct { + Input query + Child query + + iterator func() NodeNavigator +} + +func (m *mergeQuery) Select(t iterator) NodeNavigator { + for { + if m.iterator == nil { + root := m.Input.Select(t) + if root == nil { + return nil + } + m.Child.Evaluate(t) + root = root.Copy() + t.Current().MoveTo(root) + var list []NodeNavigator + for node := m.Child.Select(t); node != nil; node = m.Child.Select(t) { + list = append(list, node.Copy()) + } + i := 0 + m.iterator = func() NodeNavigator { + if i >= len(list) { + return nil + } + result := list[i] + i++ + return result + } + } + + if node := m.iterator(); node != nil { + return node + } + m.iterator = nil + } +} + +func (m *mergeQuery) Evaluate(t iterator) interface{} { + m.Input.Evaluate(t) + return m +} + +func (m *mergeQuery) Clone() query { + return &mergeQuery{Input: m.Input.Clone(), Child: m.Child.Clone()} +} + +func (m *mergeQuery) ValueType() resultType { + return xpathResultType.NodeSet +} + +func (m *mergeQuery) Properties() queryProp { + return queryProps.Position | queryProps.Count | queryProps.Cached | queryProps.Merge +} + func getHashCode(n NodeNavigator) uint64 { var sb bytes.Buffer switch n.NodeType() { @@ -981,3 +1424,20 @@ func getNodeDepth(q query) int { } return 0 } + +func getXPathType(i interface{}) resultType { + v := reflect.ValueOf(i) + switch v.Kind() { + case reflect.Float64: + return xpathResultType.Number + case reflect.String: + return xpathResultType.String + case reflect.Bool: + return xpathResultType.Boolean + default: + if _, ok := i.(query); ok { + return xpathResultType.NodeSet + } + } + panic(fmt.Errorf("xpath unknown value type: %v", v.Kind())) +}