Skip to content

Commit

Permalink
Attributes deduplicated more efficiently
Browse files Browse the repository at this point in the history
When reading an XML document, this package uses a more
time-efficient technique to detect and remove attributes
with duplicated names (within each element).
  • Loading branch information
beevik committed Jul 7, 2024
1 parent 97f4565 commit 52ce29e
Showing 1 changed file with 26 additions and 13 deletions.
39 changes: 26 additions & 13 deletions etree.go
Original file line number Diff line number Diff line change
Expand Up @@ -889,6 +889,7 @@ func (e *Element) readFrom(ri io.Reader, settings ReadSettings) (n int64, err er
r = newXmlSimpleReader(ri)
}

attrCheck := make(map[string]int)
dec := newDecoder(r, settings)

var stack stack[*Element]
Expand Down Expand Up @@ -921,8 +922,21 @@ func (e *Element) readFrom(ri io.Reader, settings ReadSettings) (n int64, err er
switch t := t.(type) {
case xml.StartElement:
e := newElement(t.Name.Space, t.Name.Local, top)
for _, a := range t.Attr {
e.createAttr(a.Name.Space, a.Name.Local, a.Value, e, settings.PreserveDuplicateAttrs)
if settings.PreserveDuplicateAttrs {
for _, a := range t.Attr {
e.addAttr(a.Name.Space, a.Name.Local, a.Value)
}
} else {
for i, a := range t.Attr {
key := a.Name.Space + ":" + a.Name.Local
if j, contains := attrCheck[key]; contains {
e.Attr[j].Value = a.Value
} else {
e.addAttr(a.Name.Space, a.Name.Local, a.Value)
attrCheck[key] = i
}
}
clear(attrCheck)
}
stack.push(e)
case xml.EndElement:
Expand Down Expand Up @@ -1365,25 +1379,24 @@ func (e *Element) addChild(t Token) {
// prefix followed by a colon.
func (e *Element) CreateAttr(key, value string) *Attr {
space, skey := spaceDecompose(key)
return e.createAttr(space, skey, value, e, false)
}

// createAttr is a helper function that creates attributes.
func (e *Element) createAttr(space, key, value string, parent *Element, preserveDups bool) *Attr {
if !preserveDups {
for i, a := range e.Attr {
if space == a.Space && key == a.Key {
e.Attr[i].Value = value
return &e.Attr[i]
}
for i, a := range e.Attr {
if space == a.Space && skey == a.Key {
e.Attr[i].Value = value
return &e.Attr[i]
}
}

return e.addAttr(space, skey, value)
}

// addAttr is a helper function that adds an attribute to an element.
func (e *Element) addAttr(space, key, value string) *Attr {
a := Attr{
Space: space,
Key: key,
Value: value,
element: parent,
element: e,
}
e.Attr = append(e.Attr, a)
return &e.Attr[len(e.Attr)-1]
Expand Down

0 comments on commit 52ce29e

Please sign in to comment.