From 6c2be5f9b7bdeb8a4617ebe28ac7c5dd8fd29efe Mon Sep 17 00:00:00 2001 From: Huy Doan Date: Sun, 12 May 2019 02:41:49 +0700 Subject: [PATCH] added unicode supports, some performance improvements --- sam.nim | 107 ++++++++++++++++++++---------------------------- sam.nimble | 2 +- sam/utils.nim | 59 ++++++++++++++++++++++++++ tests/test2.nim | 14 +++++-- tests/test3.nim | 18 +++++++- tests/test4.nim | 35 ++++++++++++++++ tests/test5.nim | 7 ++++ 7 files changed, 175 insertions(+), 67 deletions(-) create mode 100644 sam/utils.nim create mode 100644 tests/test4.nim create mode 100644 tests/test5.nim diff --git a/sam.nim b/sam.nim index 0d3bae2..9490b87 100644 --- a/sam.nim +++ b/sam.nim @@ -24,14 +24,12 @@ ## import jsmn, strutils, macros -from json import escapeJson +import sam/utils type Mapper = object tokens: seq[JsmnToken] json: string - numTokens: int - stack: seq[int] JsonNode* = ref object mapper: Mapper @@ -39,14 +37,15 @@ type JsonRaw* {.borrow: `.`.} = distinct string + NamingConverter = proc(input: string): string + {.push boundChecks: off, overflowChecks: off.} -template getValue*(t: JsmnToken, json: string): untyped = +template getValue(t: JsmnToken, json: string): untyped = ## Returns a string present of token ``t`` json[t.start.. 0: match = false - assert i <= m.numTokens + assert i <= m.tokens.len tok = m.tokens[i] assert tok.kind != JSMN_UNDEFINED when defined(verbose): @@ -125,19 +124,19 @@ proc loads(target: var any, m: Mapper, pos = 0) = loads(target[x], m, i) inc(i) inc(x) - elif target.type is int: + elif target.type is SomeInteger: assert m.tokens[pos].kind == JSMN_PRIMITIVE let value = m.tokens[pos].getValue(m.json) target = parseInt(value) elif target.type is string: assert m.tokens[pos].kind == JSMN_STRING or m.tokens[pos].getValue(m.json) == "null" if m.tokens[pos].kind == JSMN_STRING: - target = m.tokens[pos].getValue(m.json) + target = unescape(m.tokens[pos].getValue(m.json), "", "") elif target.type is bool: assert m.tokens[pos].kind == JSMN_PRIMITIVE let value = m.tokens[pos].getValue(m.json) target = value[0] == 't' - elif target.type is float: + elif target.type is SomeFloat: assert m.tokens[pos].kind == JSMN_PRIMITIVE target = parseFloat(m.tokens[pos].getValue(m.json)) elif target.type is char: @@ -160,8 +159,8 @@ proc loads(target: var any, m: Mapper, pos = 0) = proc loads*(target: var any, json: string, bufferSize = 256) = var mapper: Mapper mapper.tokens = jsmn.parseJson(json, bufferSize, autoResize=true) - mapper.numTokens = mapper.tokens.len mapper.json = json + shallow(mapper.json) loads(target, mapper) @@ -169,34 +168,35 @@ proc parse*(json: string, bufferSize = 256): JsonNode = # Parse JSON string and returns a `JsonNode` var mapper: Mapper mapper.tokens = jsmn.parseJson(json, bufferSize, autoResize=true) - mapper.numTokens = mapper.tokens.len mapper.json = json + shallow(mapper.json) new(result) result.mapper = mapper -proc parse*(json: string, tokens: seq[JsmnToken], numTokens: int): JsonNode = +proc parse*(json: string, tokens: seq[JsmnToken]): JsonNode = ## Load a parsed JSON tokens and returns a `JsonNode` var mapper: Mapper mapper.tokens = tokens - mapper.numTokens = numTokens mapper.json = json + shallow(mapper.json) new(result) result.mapper = mapper -proc `[]`*(n: JsonNode, key: string): JsonNode {.noSideEffect.} = +func `[]`*(n: JsonNode, key: string): JsonNode {.noSideEffect.} = ## Get a field from a json object, raises `FieldError` if field does not exists assert n.mapper.tokens[n.pos].kind == JSMN_OBJECT - n.mapper.stack.add(n.pos) - result = n + new(result) + result.mapper = n.mapper result.pos = n.mapper.findValue(key, n.pos) -proc `[]`*(n: JsonNode, idx: int): JsonNode {.noSideEffect.} = +func `[]`*(n: JsonNode, idx: int): JsonNode {.noSideEffect.} = ## Get a field from json array, raises `IndexError` if array is empty or index out of bounds assert n.mapper.tokens[n.pos].kind == JSMN_ARRAY - n.mapper.stack.add(n.pos) - result = n + new(result) + result.mapper = n.mapper + if n.mapper.tokens[n.pos].size <= 0: raise newException(IndexError, "index out of bounds") @@ -209,33 +209,12 @@ proc `[]`*(n: JsonNode, idx: int): JsonNode {.noSideEffect.} = result.pos = child.pos inc(i) - #if child.kind == JSMN_ARRAY or child.kind == JSMN_OBJECT: - # result.pos = n.pos + 1 + (1 + child.size) * idx - #else: - # result.pos = n.pos + idx - -proc `{}`*(n: JsonNode, i: int): JsonNode {.inline.} = - ## Traveral back the selection stack - var - i = i - pos: int - while i > 0: - pos = n.mapper.stack.pop() - dec(i) - result = n - result.pos = pos - -proc `{}`*(n: JsonNode): JsonNode {.inline.} = - ## Return the root node - result = n - result.pos = 0 - -proc len*(n: JsonNode): int = +func len*(n: JsonNode): int = ## Returns the number of elements in a json array assert n.mapper.tokens[n.pos].kind == JSMN_ARRAY n.mapper.tokens[n.pos].size -proc hasKey*(n: JsonNode, key: string): bool = +func hasKey*(n: JsonNode, key: string): bool = ## Checks if field exists in object assert n.mapper.tokens[n.pos].kind == JSMN_OBJECT var pos = -1 @@ -243,50 +222,51 @@ proc hasKey*(n: JsonNode, key: string): bool = pos = n.mapper.findValue(key, n.pos) except FieldError: discard - result = pos >= n.pos + result = pos > n.pos -proc toStr*(node: JsonNode): string {.inline.} = +func toStr*(node: JsonNode): string {.inline.} = ## Retrieves the string value of a JSMN_STRING node assert node.mapper.tokens[node.pos].kind == JSMN_STRING - loads(result, node.mapper, node.pos) + var tmp = "" + loads(tmp, node.mapper, node.pos) + result = escapeString(tmp) -proc toInt*(node: JsonNode): int {.inline.} = +func toInt*(node: JsonNode): int {.inline.} = ## Retrieves the int value of a JSMN_PRIMITIVE node assert node.mapper.tokens[node.pos].kind == JSMN_PRIMITIVE loads(result, node.mapper, node.pos) -proc toFloat*(node: JsonNode): float {.inline.} = +func toFloat*(node: JsonNode): float {.inline.} = ## Retrieves the float value of a JSMN_PRIMITIVE node assert node.mapper.tokens[node.pos].kind == JSMN_PRIMITIVE loads(result, node.mapper, node.pos) -proc toBool*(node: JsonNode): bool {.inline.} = +func toBool*(node: JsonNode): bool {.inline.} = ## Retrieves the bool value of a JSMN_PRIMITIVE node assert node.mapper.tokens[node.pos].kind == JSMN_PRIMITIVE loads(result, node.mapper, node.pos) -proc toObj*[T](n: JsonNode): T = +func to*[T](node: JsonNode): T = ## Map a JSMN_OBJECT node into a Nim object when result is ref: new(result) - loads(result, n.mapper, n.pos) + loads(result, node.mapper, node.pos) iterator items*(n: JsonNode): JsonNode = ## Iterator for the items of an array node assert n.mapper.tokens[n.pos].kind == JSMN_ARRAY var - i = n.pos + 1 - node = new(JsonNode) + i = n.pos count = n.mapper.tokens[n.pos].size - node.mapper = n.mapper - while count > 0: + inc(i) if n.mapper.tokens[i].parent == n.pos: dec(count) + var node = new JsonNode + node.mapper = n.mapper node.pos = i yield node - inc(i) iterator pairs*(n: JsonNode): tuple[key: string, val: JsonNode] = ## Iterator for the child elements of an object node @@ -309,7 +289,7 @@ iterator pairs*(n: JsonNode): tuple[key: string, val: JsonNode] = else: inc(i) -proc dumps*(t: auto, x: var string) = +proc dumps*(t: auto, x: var string, namingConverter: NamingConverter = nil) = ## Serialize `t` into `x` when t is object or t is tuple: var first = true @@ -319,7 +299,10 @@ proc dumps*(t: auto, x: var string) = first = false else: x.add "," - x.add "\"" & n & "\"" + if namingConverter != nil: + x.add "\"" & namingConverter(n) & "\"" + else: + x.add "\"" & n & "\"" x.add ":" dumps(v, x) x.add "}" @@ -327,7 +310,7 @@ proc dumps*(t: auto, x: var string) = if t.len == 0: x.add "null" return - x.add escapeJson(t) + x.add escapeString(t) elif t is char: x.add "\"" & $t & "\"" elif t is bool: @@ -359,10 +342,10 @@ proc dumps*(t: auto, x: var string) = else: x.add $t -proc dumps*(t: auto): string = +proc dumps*(t: auto, namingConverter: NamingConverter = nil): string = ## Serialize `t` to a JSON formatted result = newStringOfCap(sizeof(t) shl 1) - dumps(t, result) + dumps(t, result, namingConverter) proc `%`*(x: auto): JsonRaw {.inline.} = ## Convert `x` to a raw json string (JsonRaw is not wrapped when added to json string) @@ -395,7 +378,7 @@ proc stringify(x: NimNode, top = false): NimNode {.compileTime.} = result = newCall(newIdentNode("dumps"), result) macro `$$`*(x: untyped): untyped = - ## Convert anything to a json stirng + ## Convert anything to a json string stringify(x, true) {.pop.} diff --git a/sam.nimble b/sam.nimble index 5549d08..7310c22 100644 --- a/sam.nimble +++ b/sam.nimble @@ -1,6 +1,6 @@ # Package -version = "0.1.6" +version = "0.1.8" author = "Huy Doan" description = "Fast and just works JSON-Binding for Nim" license = "MIT" diff --git a/sam/utils.nim b/sam/utils.nim new file mode 100644 index 0000000..5228018 --- /dev/null +++ b/sam/utils.nim @@ -0,0 +1,59 @@ +from parsejson import parseEscapedUTF16 +from unicode import Rune, toUTF8 + +proc escapeString*(s: string): string = + var buf = s.cstring + var pos = 0 + while true: + case buf[pos] + of '\0': + break + of '"': + add(result, '"') + inc(pos) + of '\\': + case buf[pos+1] + of '\\', '"', '\'', '/': + add(result, buf[pos+1]) + inc(pos, 2) + of 'b': + add(result, '\b') + inc(pos, 2) + of 'f': + add(result, '\f') + inc(pos, 2) + of 'n': + add(result, '\L') + inc(pos, 2) + of 'r': + add(result, '\C') + inc(pos, 2) + of 't': + add(result, '\t') + inc(pos, 2) + of 'v': + add(result, '\v') + inc(pos, 2) + of 'u': + inc(pos, 2) + var r = parseEscapedUTF16(buf, pos) + if r < 0: + break + # Deal with surrogates + if (r and 0xfc00) == 0xd800: + if buf[pos] != '\\' or buf[pos+1] != 'u': + break + inc(pos, 2) + var s = parseEscapedUTF16(buf, pos) + if (s and 0xfc00) == 0xdc00 and s > 0: + r = 0x10000 + (((r - 0xd800) shl 10) or (s - 0xdc00)) + else: + break + add(result, toUTF8(Rune(r))) + else: + # don't bother with the error + add(result, buf[pos]) + inc(pos) + else: + add(result, buf[pos]) + inc(pos) \ No newline at end of file diff --git a/tests/test2.nim b/tests/test2.nim index 19547cc..936bbff 100644 --- a/tests/test2.nim +++ b/tests/test2.nim @@ -7,7 +7,7 @@ type points: seq[int] friends: seq[Student] x: array[2, int] -let js = """{"name": "John", "age": 30, "points": [], "friends": [{"name": "Bob"}, {"name": "Peter", "age": 8}]}""" +let js = """{"name": "John ", "age": 30, "points": [], "friends": [{"name": "Bob πŸ˜‚πŸ˜πŸ˜±β€οΈπŸ˜©πŸ€žπŸΏπŸ€žπŸΏπŸ™"}, {"name": "Peter", "age": 8}]}""" var j = parse(js) let n = j["friends"][1] @@ -23,13 +23,21 @@ s.name = "John Doe" s.age = 20 s.points = @[1,2,3,4,5] -var f1: Student +var f1, f2: Student echo dumps(s) var k: Student k.loads(js) -f1 = toObj[Student](j{}["friends"][0]) +f1 = to[Student](j["friends"][0]) +echo "f1 ", j echo "f1: ", dumps(f1) +var friends = j["friends"] + +f2 = to[Student](friends[0]) + +assert f1 == f2 + +echo "f2: ", dumps(f2) echo getTotalMem(), ", ", getOccupiedMem(), ", ", getFreeMem() diff --git a/tests/test3.nim b/tests/test3.nim index 43e9b67..d03948e 100644 --- a/tests/test3.nim +++ b/tests/test3.nim @@ -1,4 +1,4 @@ -import ../sam +import ../sam, strutils var sub = { "method": "POST", @@ -15,3 +15,19 @@ var echo $$(a, b) echo $$[1,2,3,4] + +echo "=====================================" + + +let json = "{ \"a\": [1, 2, 3, 4], \"b\": \"asd \\\"\", \"c\": \"\\ud83c\\udf83\", \"d\": \"\\u00E6\"}" +echo json + +let testJson = parse(json) + + + +# make sure UTF-16 decoding works. +echo testJson["b"].toStr +echo testJson["c"].toStr +doAssert(testJson["c"].toStr == "πŸŽƒ") +doAssert(testJson["d"].toStr == "Γ¦") \ No newline at end of file diff --git a/tests/test4.nim b/tests/test4.nim new file mode 100644 index 0000000..f2a4441 --- /dev/null +++ b/tests/test4.nim @@ -0,0 +1,35 @@ +import ../sam, strutils + +type + Person = object + id: int + firstName: string + lastName: string + address: string + phoneNumber*: string + userId*: int + vcard*: string + +var p: Person +p.id = 1 +p.firstName = "Tony" +p.lastName = "Stark" +p.address = "Hollywood" +p.phoneNumber = "do I care about this?" +p.userId = 1337 + +proc snakeCaseConverter(s: string): string = + if s == "kind": + return "type" + if s == "fromUser": + return "from" + + result = newStringOfCap(s.len + 5) + for c in s: + if c in {'A'..'Z'}: + result.add("_") + result.add(c.toLowerAscii) + else: + result.add(c) + +assert dumps(p, snakeCaseConverter) == """{"id":1,"first_name":"Tony","last_name":"Stark","address":"Hollywood","phone_number":"do I care about this?","user_id":1337,"vcard":null}""" \ No newline at end of file diff --git a/tests/test5.nim b/tests/test5.nim new file mode 100644 index 0000000..6eaa1e3 --- /dev/null +++ b/tests/test5.nim @@ -0,0 +1,7 @@ +import ../sam/utils + +var fish = "This is a \\ud83d\\udc1f, yes a fish" +assert escapeString(fish) == "This is a 🐟, yes a fish" + + +assert escapeString("Test\"") == r"Test"""