From 6b7ce583b7d8fa39965f477535579554f29a7dbe Mon Sep 17 00:00:00 2001 From: SheetJS Date: Tue, 7 Sep 2021 17:04:43 -0400 Subject: [PATCH 1/2] Buffer#toString throw on unsupported encodings --- AUTHORS.rst | 1 + src-input/duk_bi_buffer.c | 14 +++++++++++++- tests/ecmascript/test-bi-nodejs-buffer-tostring.js | 10 +++++----- 3 files changed, 19 insertions(+), 6 deletions(-) diff --git a/AUTHORS.rst b/AUTHORS.rst index 09ba6d264b..2c15633bcc 100644 --- a/AUTHORS.rst +++ b/AUTHORS.rst @@ -69,6 +69,7 @@ and agreed to irrevocably license their contributions under the Duktape * Nancy Li (https://github.com/NancyLi1013) * William Parks (https://github.com/WilliamParks) * Sam Hellawell (https://github.com/samhellawell) +* SheetJS (https://github.com/SheetJS) Other contributions =================== diff --git a/src-input/duk_bi_buffer.c b/src-input/duk_bi_buffer.c index 53b59be9ae..d50e4f6632 100644 --- a/src-input/duk_bi_buffer.c +++ b/src-input/duk_bi_buffer.c @@ -1183,6 +1183,7 @@ DUK_INTERNAL duk_ret_t duk_bi_uint8array_plainof(duk_hthread *thr) { #if defined(DUK_USE_BUFFEROBJECT_SUPPORT) DUK_INTERNAL duk_ret_t duk_bi_nodejs_buffer_tostring(duk_hthread *thr) { + const char* encoding; duk_hbufobj *h_this; duk_int_t start_offset, end_offset; duk_uint8_t *buf_slice; @@ -1196,7 +1197,18 @@ DUK_INTERNAL duk_ret_t duk_bi_nodejs_buffer_tostring(duk_hthread *thr) { } DUK_HBUFOBJ_ASSERT_VALID(h_this); - /* Ignore encoding for now. */ + /* TODO: support other encodings. currently only 'utf8' is supported. */ + if (duk_is_undefined(thr, 0)) { + encoding = "utf8"; + } else if (duk_is_string(thr, 0)) { + encoding = duk_to_string(thr, 0); + DUK_ASSERT(duk_is_string(thr, 0)); + if(DUK_STRCMP(encoding, "utf8") != 0) { + DUK_DCERROR_TYPE_INVALID_ARGS(thr); + } + } else { + DUK_DCERROR_TYPE_INVALID_ARGS(thr); + } duk__clamp_startend_nonegidx_noshift(thr, (duk_int_t) h_this->length, diff --git a/tests/ecmascript/test-bi-nodejs-buffer-tostring.js b/tests/ecmascript/test-bi-nodejs-buffer-tostring.js index d25ac23719..424dc147da 100644 --- a/tests/ecmascript/test-bi-nodejs-buffer-tostring.js +++ b/tests/ecmascript/test-bi-nodejs-buffer-tostring.js @@ -23,7 +23,6 @@ false true "ABC" "ABC" -"ABC" "DEFG" "EFG" "E" @@ -241,20 +240,21 @@ function nodejsBufferToStringTest() { // buf.toString([encoding], [start], [end]) // Without arguments encoding defaults to UTF-8 and the entire - // buffer is converted to string. At least undefined and null + // buffer is converted to string. At least undefined // are accepted as "not defined" for encoding. b = new Buffer('ABC'); safePrintString(b.toString()); safePrintString(b.toString(undefined)); - safePrintString(b.toString(null)); + // null is not a valid encoding + try { safePrintString(b.toString(null)); } catch(e) { } // If the buffer is a slice of an underlying buffer, only that slice // is string converted. Offsets are relative to the slice. b = new Buffer('ABCDEFGH'); b = b.slice(3, 7); // DEFG safePrintString(b.toString()); - safePrintString(b.toString(null, 1)); - safePrintString(b.toString(null, 1, 2)); + safePrintString(b.toString(undefined, 1)); + safePrintString(b.toString(undefined, 1, 2)); // When the buffer data is legal UTF-8 and the chosen encoding // is UTF-8 (default), Duktape internal representation is correct From 6f30a811b71896dfc9be43f94ac406126f6aea96 Mon Sep 17 00:00:00 2001 From: SheetJS Date: Wed, 8 Sep 2021 17:36:24 -0400 Subject: [PATCH 2/2] Buffer encoding 'utf-8' and case insensitive match --- src-input/duk_bi_buffer.c | 58 ++++++++++++++----- .../test-bi-nodejs-buffer-isencoding.js | 17 +++--- .../test-bi-nodejs-buffer-tostring.js | 20 ++++++- 3 files changed, 71 insertions(+), 24 deletions(-) diff --git a/src-input/duk_bi_buffer.c b/src-input/duk_bi_buffer.c index d50e4f6632..af3c05c23a 100644 --- a/src-input/duk_bi_buffer.c +++ b/src-input/duk_bi_buffer.c @@ -114,6 +114,42 @@ static duk_uint16_t duk__buffer_elemtype_copy_compatible[9] = { }; #endif /* !DUK_USE_PREFER_SIZE */ +#if defined(DUK_USE_BUFFEROBJECT_SUPPORT) +/* Buffer supported encodings */ + +#define DUK_BUF_ENC_UNKNOWN 0 +#define DUK_BUF_ENC_UTF8 1 + +/* longest encoding string + 1 -- should be updated when longer strings are added */ +#define DUK_BUFFER_ENCODING_MAX_LEN 7 + +#define DUK_BUFFER_ENCODING_COUNT 2 +DUK_LOCAL const char * const duk__buffer_encoding_names[DUK_BUFFER_ENCODING_COUNT] = { + "utf8", + "utf-8" +}; + +DUK_LOCAL const duk_int_t duk__buffer_encoding_type_from_name[DUK_BUFFER_ENCODING_COUNT] = { + DUK_BUF_ENC_UTF8, + DUK_BUF_ENC_UTF8 +}; + +DUK_LOCAL duk_int_t duk__parse_string_encoding(const char *encoding) { + duk_uint8_t i; + char buf[DUK_BUFFER_ENCODING_MAX_LEN]; + /* the valid nodejs buffer encodings only contain letters numbers and hyphens */ + for (i = 0; i < DUK_BUFFER_ENCODING_MAX_LEN; ++i) { + if (encoding[i] == 0) { buf[i] = 0; break; } + buf[i] = (char) (encoding[i] | 0x20); + } + for (i = 0; i < DUK_BUFFER_ENCODING_COUNT; ++i) { + if(DUK_STRCMP((const char *)buf, duk__buffer_encoding_names[i]) == 0) return duk__buffer_encoding_type_from_name[i]; + } + return DUK_BUF_ENC_UNKNOWN; +} +#undef DUK_BUFFER_ENCODING_COUNT +#endif /* DUK_USE_BUFFEROBJECT_SUPPORT */ + DUK_LOCAL duk_hbufobj *duk__hbufobj_promote_this(duk_hthread *thr) { duk_tval *tv_dst; duk_hbufobj *res; @@ -1183,12 +1219,14 @@ DUK_INTERNAL duk_ret_t duk_bi_uint8array_plainof(duk_hthread *thr) { #if defined(DUK_USE_BUFFEROBJECT_SUPPORT) DUK_INTERNAL duk_ret_t duk_bi_nodejs_buffer_tostring(duk_hthread *thr) { - const char* encoding; + const char *encoding; + duk_int_t encoding_type; duk_hbufobj *h_this; duk_int_t start_offset, end_offset; duk_uint8_t *buf_slice; duk_size_t slice_length; + h_this = duk__get_bufobj_this(thr); if (h_this == NULL) { /* XXX: happens e.g. when evaluating: String(Buffer.prototype). */ @@ -1197,17 +1235,10 @@ DUK_INTERNAL duk_ret_t duk_bi_nodejs_buffer_tostring(duk_hthread *thr) { } DUK_HBUFOBJ_ASSERT_VALID(h_this); - /* TODO: support other encodings. currently only 'utf8' is supported. */ - if (duk_is_undefined(thr, 0)) { - encoding = "utf8"; - } else if (duk_is_string(thr, 0)) { - encoding = duk_to_string(thr, 0); - DUK_ASSERT(duk_is_string(thr, 0)); - if(DUK_STRCMP(encoding, "utf8") != 0) { + encoding = duk_opt_string(thr, 0, "utf8"); + encoding_type = duk__parse_string_encoding(encoding); + if(encoding_type == DUK_BUF_ENC_UNKNOWN) { DUK_DCERROR_TYPE_INVALID_ARGS(thr); - } - } else { - DUK_DCERROR_TYPE_INVALID_ARGS(thr); } duk__clamp_startend_nonegidx_noshift(thr, @@ -1244,6 +1275,7 @@ DUK_INTERNAL duk_ret_t duk_bi_nodejs_buffer_tostring(duk_hthread *thr) { */ duk_replace(thr, 0); duk_set_top(thr, 1); + /* TODO: support other encodings. currently only 'utf8' is supported. */ return duk_textdecoder_decode_utf8_nodejs(thr); } #endif /* DUK_USE_BUFFEROBJECT_SUPPORT */ @@ -2072,11 +2104,9 @@ DUK_INTERNAL duk_ret_t duk_bi_buffer_slice_shared(duk_hthread *thr) { DUK_INTERNAL duk_ret_t duk_bi_nodejs_buffer_is_encoding(duk_hthread *thr) { const char *encoding; - /* only accept lowercase 'utf8' now. */ - encoding = duk_to_string(thr, 0); DUK_ASSERT(duk_is_string(thr, 0)); /* guaranteed by duk_to_string() */ - duk_push_boolean(thr, DUK_STRCMP(encoding, "utf8") == 0); + duk_push_boolean(thr, duk__parse_string_encoding(encoding) != DUK_BUF_ENC_UNKNOWN); return 1; } #endif /* DUK_USE_BUFFEROBJECT_SUPPORT */ diff --git a/tests/ecmascript/test-bi-nodejs-buffer-isencoding.js b/tests/ecmascript/test-bi-nodejs-buffer-isencoding.js index 52eaf05fe4..e5eb5ca090 100644 --- a/tests/ecmascript/test-bi-nodejs-buffer-isencoding.js +++ b/tests/ecmascript/test-bi-nodejs-buffer-isencoding.js @@ -15,16 +15,17 @@ isEncoding test empty: false undefined: false utf8: true -utf-8: false -UTF8: false -UTF-8: false -Utf8: false -Utf-8: false -uTf8: false -uTf-8: false +utf-8: true +UTF8: true +UTF-8: true +Utf8: true +Utf-8: true +uTf8: true +uTf-8: true ascii: false ASCII: false AsCiI: false +binary: false dummy: false undefined: false null: false @@ -41,7 +42,6 @@ function isEncodingTest() { [ // Any capitalization (and dash / no dash) is accepted by Node.js. - // Duktape accepts 'utf8' only for now. 'utf8', 'utf-8', 'UTF8', 'UTF-8', 'Utf8', 'Utf-8', 'uTf8', 'uTf-8', @@ -49,6 +49,7 @@ function isEncodingTest() { 'ascii', 'ASCII', 'AsCiI', + 'binary', 'dummy', // Non-string values diff --git a/tests/ecmascript/test-bi-nodejs-buffer-tostring.js b/tests/ecmascript/test-bi-nodejs-buffer-tostring.js index 424dc147da..38a6f0508b 100644 --- a/tests/ecmascript/test-bi-nodejs-buffer-tostring.js +++ b/tests/ecmascript/test-bi-nodejs-buffer-tostring.js @@ -23,6 +23,12 @@ false true "ABC" "ABC" +"ABC" +"ABC" +"ABC" +"ABC" +"TypeError" +"TypeError" "DEFG" "EFG" "E" @@ -245,8 +251,18 @@ function nodejsBufferToStringTest() { b = new Buffer('ABC'); safePrintString(b.toString()); safePrintString(b.toString(undefined)); - // null is not a valid encoding - try { safePrintString(b.toString(null)); } catch(e) { } + + // supported encodings + safePrintString(b.toString("utf8")); + safePrintString(b.toString("utf-8")); + + // encodings are case insensitive + safePrintString(b.toString("UtF8")); + safePrintString(b.toString("uTf-8")); + + // invalid encodings should throw a TypeError + try { safePrintString(b.toString(null)); } catch(e) { safePrintString(e.name); } + try { safePrintString(b.toString("wtf")); } catch(e) { safePrintString(e.name); } // If the buffer is a slice of an underlying buffer, only that slice // is string converted. Offsets are relative to the slice.