From 52ce29ed5c7b49db4a2e9f92e0eceeb0c577b527 Mon Sep 17 00:00:00 2001 From: Brett Vickers Date: Sun, 7 Jul 2024 09:38:56 -0700 Subject: [PATCH] Attributes deduplicated more efficiently When reading an XML document, this package uses a more time-efficient technique to detect and remove attributes with duplicated names (within each element). --- etree.go | 39 ++++++++++++++++++++++++++------------- 1 file changed, 26 insertions(+), 13 deletions(-) diff --git a/etree.go b/etree.go index 815b6c4..758eb4c 100644 --- a/etree.go +++ b/etree.go @@ -889,6 +889,7 @@ func (e *Element) readFrom(ri io.Reader, settings ReadSettings) (n int64, err er r = newXmlSimpleReader(ri) } + attrCheck := make(map[string]int) dec := newDecoder(r, settings) var stack stack[*Element] @@ -921,8 +922,21 @@ func (e *Element) readFrom(ri io.Reader, settings ReadSettings) (n int64, err er switch t := t.(type) { case xml.StartElement: e := newElement(t.Name.Space, t.Name.Local, top) - for _, a := range t.Attr { - e.createAttr(a.Name.Space, a.Name.Local, a.Value, e, settings.PreserveDuplicateAttrs) + if settings.PreserveDuplicateAttrs { + for _, a := range t.Attr { + e.addAttr(a.Name.Space, a.Name.Local, a.Value) + } + } else { + for i, a := range t.Attr { + key := a.Name.Space + ":" + a.Name.Local + if j, contains := attrCheck[key]; contains { + e.Attr[j].Value = a.Value + } else { + e.addAttr(a.Name.Space, a.Name.Local, a.Value) + attrCheck[key] = i + } + } + clear(attrCheck) } stack.push(e) case xml.EndElement: @@ -1365,25 +1379,24 @@ func (e *Element) addChild(t Token) { // prefix followed by a colon. func (e *Element) CreateAttr(key, value string) *Attr { space, skey := spaceDecompose(key) - return e.createAttr(space, skey, value, e, false) -} -// createAttr is a helper function that creates attributes. -func (e *Element) createAttr(space, key, value string, parent *Element, preserveDups bool) *Attr { - if !preserveDups { - for i, a := range e.Attr { - if space == a.Space && key == a.Key { - e.Attr[i].Value = value - return &e.Attr[i] - } + for i, a := range e.Attr { + if space == a.Space && skey == a.Key { + e.Attr[i].Value = value + return &e.Attr[i] } } + return e.addAttr(space, skey, value) +} + +// addAttr is a helper function that adds an attribute to an element. +func (e *Element) addAttr(space, key, value string) *Attr { a := Attr{ Space: space, Key: key, Value: value, - element: parent, + element: e, } e.Attr = append(e.Attr, a) return &e.Attr[len(e.Attr)-1]