From bfb0bd8b6d8f77ef94cc807f96164aebcc67e09f Mon Sep 17 00:00:00 2001 From: Tom Wright Date: Tue, 22 Oct 2024 19:20:41 +0100 Subject: [PATCH] Rework JSON parser --- dencoding/json.go | 20 -- dencoding/json_decoder.go | 174 ----------------- dencoding/json_decoder_test.go | 54 ----- dencoding/json_encoder.go | 92 --------- dencoding/json_encoder_test.go | 36 ---- dencoding/map.go | 4 +- model/value.go | 3 + model/value_literal.go | 2 +- model/value_literal_test.go | 14 ++ model/value_set_test.go | 15 ++ parsing/json.go | 348 ++++++++++++++++++++++++++++++++- parsing/json_test.go | 49 +++++ 12 files changed, 427 insertions(+), 384 deletions(-) delete mode 100644 dencoding/json.go delete mode 100644 dencoding/json_decoder.go delete mode 100644 dencoding/json_decoder_test.go delete mode 100644 dencoding/json_encoder.go delete mode 100644 dencoding/json_encoder_test.go create mode 100644 model/value_literal_test.go create mode 100644 parsing/json_test.go diff --git a/dencoding/json.go b/dencoding/json.go deleted file mode 100644 index 5a82c3d1..00000000 --- a/dencoding/json.go +++ /dev/null @@ -1,20 +0,0 @@ -package dencoding - -import "encoding/json" - -const ( - jsonOpenObject = json.Delim('{') - jsonCloseObject = json.Delim('}') - jsonOpenArray = json.Delim('[') - jsonCloseArray = json.Delim(']') -) - -// JSONEncoderOption is identifies an option that can be applied to a JSON encoder. -type JSONEncoderOption interface { - ApplyEncoder(encoder *JSONEncoder) -} - -// JSONDecoderOption is identifies an option that can be applied to a JSON decoder. -type JSONDecoderOption interface { - ApplyDecoder(decoder *JSONDecoder) -} diff --git a/dencoding/json_decoder.go b/dencoding/json_decoder.go deleted file mode 100644 index 26b5f788..00000000 --- a/dencoding/json_decoder.go +++ /dev/null @@ -1,174 +0,0 @@ -package dencoding - -import ( - "encoding/json" - "fmt" - "io" - "reflect" - "strings" -) - -// JSONDecoder wraps a standard json encoder to implement custom ordering logic. -type JSONDecoder struct { - decoder *json.Decoder -} - -// NewJSONDecoder returns a new dencoding JSONDecoder. -func NewJSONDecoder(r io.Reader, options ...JSONDecoderOption) *JSONDecoder { - jsonDecoder := json.NewDecoder(r) - jsonDecoder.UseNumber() - decoder := &JSONDecoder{ - decoder: jsonDecoder, - } - for _, o := range options { - o.ApplyDecoder(decoder) - } - return decoder -} - -// Decode decodes the next item found in the decoder and writes it to v. -func (decoder *JSONDecoder) Decode(v any) error { - rv := reflect.ValueOf(v) - if rv.Kind() != reflect.Pointer || rv.IsNil() { - return fmt.Errorf("invalid decode target: %s", reflect.TypeOf(v)) - } - - rve := rv.Elem() - - t, err := decoder.decoder.Token() - if err != nil { - return err - } - - switch t { - case jsonOpenObject: - object, err := decoder.decodeObject() - if err != nil { - return fmt.Errorf("could not decode object: %w", err) - } - rve.Set(reflect.ValueOf(object)) - case jsonOpenArray: - arr, err := decoder.decodeArray() - if err != nil { - return fmt.Errorf("could not decode array: %w", err) - } - rve.Set(reflect.ValueOf(arr)) - default: - value, err := decoder.decodeValue(t) - if err != nil { - return err - } - rve.Set(reflect.ValueOf(value)) - } - - return nil -} - -func (decoder *JSONDecoder) decodeObject() (*Map, error) { - res := NewMap() - - var key any = nil - - for { - t, err := decoder.decoder.Token() - if err != nil { - // We don't expect an EOF here since we're in the middle of processing an object. - return res, err - } - - switch t { - case jsonOpenArray: - if key == nil { - return res, fmt.Errorf("unexpected token: %v", t) - } - value, err := decoder.decodeArray() - if err != nil { - return res, err - } - res.Set(key.(string), value) - key = nil - case jsonCloseArray: - return res, fmt.Errorf("unexpected token: %v", t) - case jsonCloseObject: - return res, nil - case jsonOpenObject: - if key == nil { - return res, fmt.Errorf("unexpected token: %v", t) - } - value, err := decoder.decodeObject() - if err != nil { - return res, err - } - res.Set(key.(string), value) - key = nil - default: - if key == nil { - key = t - } else { - value, err := decoder.decodeValue(t) - if err != nil { - return nil, err - } - res.Set(key.(string), value) - key = nil - } - } - } -} - -func (decoder *JSONDecoder) decodeValue(t json.Token) (any, error) { - switch tv := t.(type) { - case json.Number: - strNum := tv.String() - if strings.Contains(strNum, ".") { - floatNum, err := tv.Float64() - if err == nil { - return floatNum, nil - } - return nil, err - } - intNum, err := tv.Int64() - if err == nil { - return intNum, nil - } - - return nil, err - } - return t, nil -} - -func (decoder *JSONDecoder) decodeArray() ([]any, error) { - res := make([]any, 0) - for { - t, err := decoder.decoder.Token() - if err != nil { - // We don't expect an EOF here since we're in the middle of processing an object. - return res, err - } - - switch t { - case jsonOpenArray: - value, err := decoder.decodeArray() - if err != nil { - return res, err - } - res = append(res, value) - case jsonCloseArray: - return res, nil - case jsonCloseObject: - return res, fmt.Errorf("unexpected token: %t", t) - case jsonOpenObject: - value, err := decoder.decodeObject() - if err != nil { - return res, err - } - res = append(res, value) - default: - value, err := decoder.decodeValue(t) - if err != nil { - return nil, err - } - res = append(res, value) - } - } -} diff --git a/dencoding/json_decoder_test.go b/dencoding/json_decoder_test.go deleted file mode 100644 index 2d95e146..00000000 --- a/dencoding/json_decoder_test.go +++ /dev/null @@ -1,54 +0,0 @@ -package dencoding_test - -import ( - "bytes" - "io" - "reflect" - "testing" - - "github.com/tomwright/dasel/v3/dencoding" -) - -func TestJSONDecoder_Decode(t *testing.T) { - b := []byte(`{"x":1,"a":"hello"}{"x":2,"a":"there"}{"a":"Tom","x":3}`) - dec := dencoding.NewJSONDecoder(bytes.NewReader(b)) - - maps := make([]any, 0) - for { - var v any - if err := dec.Decode(&v); err != nil { - if err == io.EOF { - break - } - t.Errorf("unexpected error: %v", err) - return - } - maps = append(maps, v) - } - - exp := [][]dencoding.KeyValue{ - { - {Key: "x", Value: int64(1)}, - {Key: "a", Value: "hello"}, - }, - { - {Key: "x", Value: int64(2)}, - {Key: "a", Value: "there"}, - }, - { - {Key: "a", Value: "Tom"}, - {Key: "x", Value: int64(3)}, - }, - } - - got := make([][]dencoding.KeyValue, 0) - for _, v := range maps { - if m, ok := v.(*dencoding.Map); ok { - got = append(got, m.KeyValues()) - } - } - - if !reflect.DeepEqual(exp, got) { - t.Errorf("expected %v, got %v", exp, got) - } -} diff --git a/dencoding/json_encoder.go b/dencoding/json_encoder.go deleted file mode 100644 index da060767..00000000 --- a/dencoding/json_encoder.go +++ /dev/null @@ -1,92 +0,0 @@ -package dencoding - -import ( - "bytes" - "encoding/json" - "io" -) - -// lastOptions contains the options that the last JSONEncoder was created with. -// Find a better way of passing this information into nested MarshalJSON calls. -var lastOptions []JSONEncoderOption - -// JSONEncoder wraps a standard json encoder to implement custom ordering logic. -type JSONEncoder struct { - encoder *json.Encoder -} - -// NewJSONEncoder returns a new dencoding JSONEncoder. -func NewJSONEncoder(w io.Writer, options ...JSONEncoderOption) *JSONEncoder { - jsonEncoder := json.NewEncoder(w) - encoder := &JSONEncoder{ - encoder: jsonEncoder, - } - for _, o := range options { - o.ApplyEncoder(encoder) - } - lastOptions = options - return encoder -} - -// Encode encodes the given value and writes the encodes bytes to the stream. -func (encoder *JSONEncoder) Encode(v any) error { - // We rely on Map.MarshalJSON to ensure ordering. - return encoder.encoder.Encode(v) -} - -// Close cleans up the encoder. -func (encoder *JSONEncoder) Close() error { - return nil -} - -// JSONEscapeHTML enables or disables html escaping when encoding JSON. -func JSONEscapeHTML(escape bool) JSONEncoderOption { - return jsonEncodeHTMLOption{escapeHTML: escape} -} - -type jsonEncodeHTMLOption struct { - escapeHTML bool -} - -func (option jsonEncodeHTMLOption) ApplyEncoder(encoder *JSONEncoder) { - encoder.encoder.SetEscapeHTML(option.escapeHTML) -} - -// JSONEncodeIndent sets the indentation when encoding JSON. -func JSONEncodeIndent(prefix string, indent string) JSONEncoderOption { - return jsonEncodeIndent{prefix: prefix, indent: indent} -} - -type jsonEncodeIndent struct { - prefix string - indent string -} - -func (option jsonEncodeIndent) ApplyEncoder(encoder *JSONEncoder) { - encoder.encoder.SetIndent(option.prefix, option.indent) -} - -// MarshalJSON JSON encodes the map and returns the bytes. -// This maintains ordering. -func (m *Map) MarshalJSON() ([]byte, error) { - - buf := new(bytes.Buffer) - buf.Write([]byte(`{`)) - encoder := NewJSONEncoder(buf, lastOptions...) - for i, key := range m.keys { - last := i == len(m.keys)-1 - - if err := encoder.Encode(key); err != nil { - return nil, err - } - buf.Write([]byte(`:`)) - if err := encoder.Encode(m.data[key]); err != nil { - return nil, err - } - if !last { - buf.Write([]byte(`,`)) - } - } - buf.Write([]byte(`}`)) - return buf.Bytes(), nil -} diff --git a/dencoding/json_encoder_test.go b/dencoding/json_encoder_test.go deleted file mode 100644 index 7bb557f8..00000000 --- a/dencoding/json_encoder_test.go +++ /dev/null @@ -1,36 +0,0 @@ -package dencoding_test - -import ( - "bytes" - "testing" - - "github.com/tomwright/dasel/v3/dencoding" -) - -func TestJSONEncoder_Encode(t *testing.T) { - orig := dencoding.NewMap(). - Set("c", "x"). - Set("b", "y"). - Set("a", "z") - - exp := `{ - "c": "x", - "b": "y", - "a": "z" -} -` - - gotBuffer := new(bytes.Buffer) - - encoder := dencoding.NewJSONEncoder(gotBuffer, dencoding.JSONEncodeIndent("", " ")) - if err := encoder.Encode(orig); err != nil { - t.Errorf("unexpected error: %v", err) - return - } - - got := gotBuffer.String() - - if exp != got { - t.Errorf("expected %s, got %s", exp, got) - } -} diff --git a/dencoding/map.go b/dencoding/map.go index 22fb1996..10ed13d1 100644 --- a/dencoding/map.go +++ b/dencoding/map.go @@ -1,6 +1,8 @@ package dencoding -import "reflect" +import ( + "reflect" +) // NewMap returns a new *Map that has its values initialised. func NewMap() *Map { diff --git a/model/value.go b/model/value.go index 58a31690..969bab4d 100644 --- a/model/value.go +++ b/model/value.go @@ -20,6 +20,7 @@ const ( TypeMap Type = "map" TypeSlice Type = "array" TypeUnknown Type = "unknown" + TypeNull Type = "null" ) type KeyValue struct { @@ -129,6 +130,8 @@ func (v *Value) Type() Type { return TypeMap case v.IsSlice(): return TypeSlice + case v.IsNull(): + return TypeNull default: return TypeUnknown } diff --git a/model/value_literal.go b/model/value_literal.go index 51ac280a..096a658b 100644 --- a/model/value_literal.go +++ b/model/value_literal.go @@ -17,7 +17,7 @@ func NewNullValue() *Value { // IsNull returns true if the value is null. func (v *Value) IsNull() bool { - return v.isNull() + return v.UnpackKinds(reflect.Ptr, reflect.Interface).isNull() } func (v *Value) isNull() bool { diff --git a/model/value_literal_test.go b/model/value_literal_test.go new file mode 100644 index 00000000..c34e8fff --- /dev/null +++ b/model/value_literal_test.go @@ -0,0 +1,14 @@ +package model_test + +import ( + "testing" + + "github.com/tomwright/dasel/v3/model" +) + +func TestValue_IsNull(t *testing.T) { + v := model.NewNullValue() + if !v.IsNull() { + t.Fatalf("expected value to be null") + } +} diff --git a/model/value_set_test.go b/model/value_set_test.go index d388aaff..11f0f03d 100644 --- a/model/value_set_test.go +++ b/model/value_set_test.go @@ -46,6 +46,7 @@ func TestValue_Set(t *testing.T) { boolValue func() *model.Value mapValue func() *model.Value sliceValue func() *model.Value + nullValue func() *model.Value }{ { name: "model constructor", @@ -75,6 +76,9 @@ func TestValue_Set(t *testing.T) { } return res }, + nullValue: func() *model.Value { + return model.NewNullValue() + }, }, { name: "go types non ptr", @@ -106,6 +110,9 @@ func TestValue_Set(t *testing.T) { } return model.NewValue(v) }, + nullValue: func() *model.Value { + return model.NewValue(nil) + }, }, { name: "go types ptr", @@ -137,6 +144,10 @@ func TestValue_Set(t *testing.T) { } return model.NewValue(&v) }, + nullValue: func() *model.Value { + var x any + return model.NewValue(&x) + }, }, } @@ -219,6 +230,10 @@ func TestValue_Set(t *testing.T) { return res }, }.run) + t.Run("string over null", setTestCase{ + valueFn: tc.nullValue, + newValue: model.NewStringValue("world"), + }.run) }) } } diff --git a/parsing/json.go b/parsing/json.go index b662f44a..589e9cde 100644 --- a/parsing/json.go +++ b/parsing/json.go @@ -1,11 +1,22 @@ package parsing import ( + "bytes" "encoding/json" + "fmt" + "io" + "strings" "github.com/tomwright/dasel/v3/model" ) +const ( + jsonOpenObject = json.Delim('{') + jsonCloseObject = json.Delim('}') + jsonOpenArray = json.Delim('[') + jsonCloseArray = json.Delim(']') +) + // NewJSONReader creates a new JSON reader. func NewJSONReader() (Reader, error) { return &jsonReader{}, nil @@ -20,20 +31,345 @@ type jsonReader struct{} // Read reads a value from a byte slice. func (j *jsonReader) Read(data []byte) (*model.Value, error) { - var unmarshalled any - if err := json.Unmarshal(data, &unmarshalled); err != nil { + decoder := json.NewDecoder(bytes.NewReader(data)) + decoder.UseNumber() + + t, err := decoder.Token() + if err != nil { + return nil, err + } + + var res *model.Value + + switch t { + case jsonOpenObject: + res, err = j.decodeObject(decoder) + if err != nil { + return nil, fmt.Errorf("could not decode object: %w", err) + } + case jsonOpenArray: + res, err = j.decodeArray(decoder) + if err != nil { + return nil, fmt.Errorf("could not decode array: %w", err) + } + default: + res, err = j.decodeToken(decoder, t) + if err != nil { + return nil, err + } + } + + return res, nil +} + +func (j *jsonReader) decodeObject(decoder *json.Decoder) (*model.Value, error) { + res := model.NewMapValue() + + var key any = nil + + for { + t, err := decoder.Token() + if err != nil { + // We don't expect an EOF here since we're in the middle of processing an object. + return res, err + } + + switch t { + case jsonOpenArray: + if key == nil { + return res, fmt.Errorf("unexpected token: %v", t) + } + value, err := j.decodeArray(decoder) + if err != nil { + return res, err + } + if err := res.SetMapKey(key.(string), value); err != nil { + return res, err + } + key = nil + case jsonCloseArray: + return res, fmt.Errorf("unexpected token: %v", t) + case jsonCloseObject: + return res, nil + case jsonOpenObject: + if key == nil { + return res, fmt.Errorf("unexpected token: %v", t) + } + value, err := j.decodeObject(decoder) + if err != nil { + return res, err + } + if err := res.SetMapKey(key.(string), value); err != nil { + return res, err + } + key = nil + default: + if key == nil { + if tStr, ok := t.(string); ok { + key = tStr + } else { + return nil, fmt.Errorf("unexpected token: %v", t) + } + } else { + value, err := j.decodeToken(decoder, t) + if err != nil { + return nil, err + } + if err := res.SetMapKey(key.(string), value); err != nil { + return res, err + } + key = nil + } + } + } +} + +func (j *jsonReader) decodeArray(decoder *json.Decoder) (*model.Value, error) { + res := model.NewSliceValue() + for { + t, err := decoder.Token() + if err != nil { + // We don't expect an EOF here since we're in the middle of processing an object. + return res, err + } + + switch t { + case jsonOpenArray: + value, err := j.decodeArray(decoder) + if err != nil { + return res, err + } + if err := res.Append(value); err != nil { + return res, err + } + case jsonCloseArray: + return res, nil + case jsonCloseObject: + return res, fmt.Errorf("unexpected token: %t", t) + case jsonOpenObject: + value, err := j.decodeObject(decoder) + if err != nil { + return res, err + } + if err := res.Append(value); err != nil { + return res, err + } + default: + value, err := j.decodeToken(decoder, t) + if err != nil { + return nil, err + } + if err := res.Append(value); err != nil { + return res, err + } + } + } +} + +func (j *jsonReader) decodeToken(decoder *json.Decoder, t json.Token) (*model.Value, error) { + switch tv := t.(type) { + case json.Number: + strNum := tv.String() + if strings.Contains(strNum, ".") { + floatNum, err := tv.Float64() + if err == nil { + return model.NewFloatValue(floatNum), nil + } + return nil, err + } + intNum, err := tv.Int64() + if err == nil { + return model.NewIntValue(intNum), nil + } + return nil, err + default: + return model.NewValue(tv), nil } - return model.NewValue(&unmarshalled), nil } type jsonWriter struct{} // Write writes a value to a byte slice. func (j *jsonWriter) Write(value *model.Value) ([]byte, error) { - res, err := json.Marshal(value.Interface()) - if err != nil { + buf := new(bytes.Buffer) + + es := encoderState{indentStr: " "} + + encoderFn := func(v any) error { + res, err := json.Marshal(v) + if err != nil { + return err + } + _, err = buf.Write(res) + return err + } + + if err := j.write(buf, encoderFn, es, value); err != nil { return nil, err } - return append(res, []byte("\n")...), nil + + if _, err := buf.Write([]byte("\n")); err != nil { + return nil, err + } + + return buf.Bytes(), nil +} + +type encoderState struct { + indent int + indentStr string +} + +func (es encoderState) inc() encoderState { + es.indent++ + return es +} + +func (es encoderState) writeIndent(w io.Writer) error { + if es.indent == 0 || es.indentStr == "" { + return nil + } + i := strings.Repeat(es.indentStr, es.indent) + if _, err := w.Write([]byte(i)); err != nil { + return err + } + return nil +} + +type encoderFn func(v any) error + +func (j *jsonWriter) write(w io.Writer, encoder encoderFn, es encoderState, value *model.Value) error { + switch value.Type() { + case model.TypeMap: + return j.writeMap(w, encoder, es, value) + case model.TypeSlice: + return j.writeSlice(w, encoder, es, value) + case model.TypeString: + val, err := value.StringValue() + if err != nil { + return err + } + return encoder(val) + case model.TypeInt: + val, err := value.IntValue() + if err != nil { + return err + } + return encoder(val) + case model.TypeFloat: + val, err := value.FloatValue() + if err != nil { + return err + } + return encoder(val) + case model.TypeBool: + val, err := value.BoolValue() + if err != nil { + return err + } + return encoder(val) + case model.TypeNull: + return encoder(nil) + default: + return fmt.Errorf("unsupported type: %s", value.Type()) + } +} + +func (j *jsonWriter) writeMap(w io.Writer, encoder encoderFn, es encoderState, value *model.Value) error { + kvs, err := value.MapKeyValues() + if err != nil { + return err + } + + if _, err := w.Write([]byte(`{`)); err != nil { + return err + } + + if len(kvs) > 0 { + if _, err := w.Write([]byte("\n")); err != nil { + return err + } + + incEs := es.inc() + for i, kv := range kvs { + if err := incEs.writeIndent(w); err != nil { + return err + } + + if _, err := w.Write([]byte(fmt.Sprintf(`"%s": `, kv.Key))); err != nil { + return err + } + + if err := j.write(w, encoder, incEs, kv.Value); err != nil { + return err + } + + if i < len(kvs)-1 { + if _, err := w.Write([]byte(`,`)); err != nil { + return err + } + } + + if _, err := w.Write([]byte("\n")); err != nil { + return err + } + } + if err := es.writeIndent(w); err != nil { + return err + } + } + + if _, err := w.Write([]byte(`}`)); err != nil { + return err + } + + return nil +} + +func (j *jsonWriter) writeSlice(w io.Writer, encoder encoderFn, es encoderState, value *model.Value) error { + if _, err := w.Write([]byte(`[`)); err != nil { + return err + } + + length, err := value.SliceLen() + if err != nil { + return fmt.Errorf("error getting slice length: %w", err) + } + + if length > 0 { + if _, err := w.Write([]byte("\n")); err != nil { + return err + } + incEs := es.inc() + for i := 0; i < length; i++ { + if err := incEs.writeIndent(w); err != nil { + return err + } + va, err := value.GetSliceIndex(i) + if err != nil { + return fmt.Errorf("error getting slice index %d: %w", i, err) + } + if err := j.write(w, encoder, incEs, va); err != nil { + return err + } + if i < length-1 { + if _, err := w.Write([]byte(`,`)); err != nil { + return err + } + } + if _, err := w.Write([]byte("\n")); err != nil { + return err + } + } + if err := es.writeIndent(w); err != nil { + return err + } + } + + if _, err := w.Write([]byte(`]`)); err != nil { + return err + } + + return nil } diff --git a/parsing/json_test.go b/parsing/json_test.go new file mode 100644 index 00000000..0a55e1c4 --- /dev/null +++ b/parsing/json_test.go @@ -0,0 +1,49 @@ +package parsing_test + +import ( + "testing" + + "github.com/google/go-cmp/cmp" + "github.com/tomwright/dasel/v3/parsing" +) + +func TestJson(t *testing.T) { + doc := []byte(`{ + "string": "foo", + "int": 1, + "float": 1.1, + "bool": true, + "null": null, + "array": [ + 1, + 2, + 3 + ], + "object": { + "key": "value" + } +} +`) + reader, err := parsing.NewJSONReader() + if err != nil { + t.Fatal(err) + } + writer, err := parsing.NewJSONWriter() + if err != nil { + t.Fatal(err) + } + + value, err := reader.Read(doc) + if err != nil { + t.Fatal(err) + } + + newDoc, err := writer.Write(value) + if err != nil { + t.Fatal(err) + } + + if string(doc) != string(newDoc) { + t.Fatalf("expected %s, got %s...\n%s", string(doc), string(newDoc), cmp.Diff(string(doc), string(newDoc))) + } +}