From 6b7ce583b7d8fa39965f477535579554f29a7dbe Mon Sep 17 00:00:00 2001
From: SheetJS <dev@sheetjs.com>
Date: Tue, 7 Sep 2021 17:04:43 -0400
Subject: [PATCH 1/2] Buffer#toString throw on unsupported encodings

---
 AUTHORS.rst                                        |  1 +
 src-input/duk_bi_buffer.c                          | 14 +++++++++++++-
 tests/ecmascript/test-bi-nodejs-buffer-tostring.js | 10 +++++-----
 3 files changed, 19 insertions(+), 6 deletions(-)

diff --git a/AUTHORS.rst b/AUTHORS.rst
index 09ba6d264b..2c15633bcc 100644
--- a/AUTHORS.rst
+++ b/AUTHORS.rst
@@ -69,6 +69,7 @@ and agreed to irrevocably license their contributions under the Duktape
 * Nancy Li (https://github.com/NancyLi1013)
 * William Parks (https://github.com/WilliamParks)
 * Sam Hellawell (https://github.com/samhellawell)
+* SheetJS (https://github.com/SheetJS)
 
 Other contributions
 ===================
diff --git a/src-input/duk_bi_buffer.c b/src-input/duk_bi_buffer.c
index 53b59be9ae..d50e4f6632 100644
--- a/src-input/duk_bi_buffer.c
+++ b/src-input/duk_bi_buffer.c
@@ -1183,6 +1183,7 @@ DUK_INTERNAL duk_ret_t duk_bi_uint8array_plainof(duk_hthread *thr) {
 
 #if defined(DUK_USE_BUFFEROBJECT_SUPPORT)
 DUK_INTERNAL duk_ret_t duk_bi_nodejs_buffer_tostring(duk_hthread *thr) {
+	const char* encoding;
 	duk_hbufobj *h_this;
 	duk_int_t start_offset, end_offset;
 	duk_uint8_t *buf_slice;
@@ -1196,7 +1197,18 @@ DUK_INTERNAL duk_ret_t duk_bi_nodejs_buffer_tostring(duk_hthread *thr) {
 	}
 	DUK_HBUFOBJ_ASSERT_VALID(h_this);
 
-	/* Ignore encoding for now. */
+	/* TODO: support other encodings.  currently only 'utf8' is supported. */
+	if (duk_is_undefined(thr, 0)) {
+		encoding = "utf8";
+	} else if (duk_is_string(thr, 0)) {
+		encoding = duk_to_string(thr, 0);
+		DUK_ASSERT(duk_is_string(thr, 0));
+		if(DUK_STRCMP(encoding, "utf8") != 0) {
+			DUK_DCERROR_TYPE_INVALID_ARGS(thr);
+		}
+	} else {
+		DUK_DCERROR_TYPE_INVALID_ARGS(thr);
+	}
 
 	duk__clamp_startend_nonegidx_noshift(thr,
 	                                     (duk_int_t) h_this->length,
diff --git a/tests/ecmascript/test-bi-nodejs-buffer-tostring.js b/tests/ecmascript/test-bi-nodejs-buffer-tostring.js
index d25ac23719..424dc147da 100644
--- a/tests/ecmascript/test-bi-nodejs-buffer-tostring.js
+++ b/tests/ecmascript/test-bi-nodejs-buffer-tostring.js
@@ -23,7 +23,6 @@ false
 true
 "ABC"
 "ABC"
-"ABC"
 "DEFG"
 "EFG"
 "E"
@@ -241,20 +240,21 @@ function nodejsBufferToStringTest() {
     // buf.toString([encoding], [start], [end])
 
     // Without arguments encoding defaults to UTF-8 and the entire
-    // buffer is converted to string.  At least undefined and null
+    // buffer is converted to string.  At least undefined
     // are accepted as "not defined" for encoding.
     b = new Buffer('ABC');
     safePrintString(b.toString());
     safePrintString(b.toString(undefined));
-    safePrintString(b.toString(null));
+    // null is not a valid encoding
+    try { safePrintString(b.toString(null)); } catch(e) { }
 
     // If the buffer is a slice of an underlying buffer, only that slice
     // is string converted.  Offsets are relative to the slice.
     b = new Buffer('ABCDEFGH');
     b = b.slice(3, 7);  // DEFG
     safePrintString(b.toString());
-    safePrintString(b.toString(null, 1));
-    safePrintString(b.toString(null, 1, 2));
+    safePrintString(b.toString(undefined, 1));
+    safePrintString(b.toString(undefined, 1, 2));
 
     // When the buffer data is legal UTF-8 and the chosen encoding
     // is UTF-8 (default), Duktape internal representation is correct

From 6f30a811b71896dfc9be43f94ac406126f6aea96 Mon Sep 17 00:00:00 2001
From: SheetJS <dev@sheetjs.com>
Date: Wed, 8 Sep 2021 17:36:24 -0400
Subject: [PATCH 2/2] Buffer encoding 'utf-8' and case insensitive match

---
 src-input/duk_bi_buffer.c                     | 58 ++++++++++++++-----
 .../test-bi-nodejs-buffer-isencoding.js       | 17 +++---
 .../test-bi-nodejs-buffer-tostring.js         | 20 ++++++-
 3 files changed, 71 insertions(+), 24 deletions(-)

diff --git a/src-input/duk_bi_buffer.c b/src-input/duk_bi_buffer.c
index d50e4f6632..af3c05c23a 100644
--- a/src-input/duk_bi_buffer.c
+++ b/src-input/duk_bi_buffer.c
@@ -114,6 +114,42 @@ static duk_uint16_t duk__buffer_elemtype_copy_compatible[9] = {
 };
 #endif  /* !DUK_USE_PREFER_SIZE */
 
+#if defined(DUK_USE_BUFFEROBJECT_SUPPORT)
+/* Buffer supported encodings */
+
+#define DUK_BUF_ENC_UNKNOWN              0
+#define DUK_BUF_ENC_UTF8                 1
+
+/* longest encoding string + 1 -- should be updated when longer strings are added */
+#define DUK_BUFFER_ENCODING_MAX_LEN      7
+
+#define DUK_BUFFER_ENCODING_COUNT        2
+DUK_LOCAL const char * const duk__buffer_encoding_names[DUK_BUFFER_ENCODING_COUNT] = {
+	"utf8",
+	"utf-8"
+};
+
+DUK_LOCAL const duk_int_t duk__buffer_encoding_type_from_name[DUK_BUFFER_ENCODING_COUNT] = {
+	DUK_BUF_ENC_UTF8,
+	DUK_BUF_ENC_UTF8
+};
+
+DUK_LOCAL duk_int_t duk__parse_string_encoding(const char *encoding) {
+	duk_uint8_t i;
+	char buf[DUK_BUFFER_ENCODING_MAX_LEN];
+	/* the valid nodejs buffer encodings only contain letters numbers and hyphens */
+	for (i = 0; i < DUK_BUFFER_ENCODING_MAX_LEN; ++i) {
+		if (encoding[i] == 0) { buf[i] = 0; break; }
+		buf[i] = (char) (encoding[i] | 0x20);
+	}
+	for (i = 0; i < DUK_BUFFER_ENCODING_COUNT; ++i) {
+		if(DUK_STRCMP((const char *)buf, duk__buffer_encoding_names[i]) == 0) return duk__buffer_encoding_type_from_name[i];
+	}
+	return DUK_BUF_ENC_UNKNOWN;
+}
+#undef DUK_BUFFER_ENCODING_COUNT
+#endif  /* DUK_USE_BUFFEROBJECT_SUPPORT */
+
 DUK_LOCAL duk_hbufobj *duk__hbufobj_promote_this(duk_hthread *thr) {
 	duk_tval *tv_dst;
 	duk_hbufobj *res;
@@ -1183,12 +1219,14 @@ DUK_INTERNAL duk_ret_t duk_bi_uint8array_plainof(duk_hthread *thr) {
 
 #if defined(DUK_USE_BUFFEROBJECT_SUPPORT)
 DUK_INTERNAL duk_ret_t duk_bi_nodejs_buffer_tostring(duk_hthread *thr) {
-	const char* encoding;
+	const char *encoding;
+	duk_int_t encoding_type;
 	duk_hbufobj *h_this;
 	duk_int_t start_offset, end_offset;
 	duk_uint8_t *buf_slice;
 	duk_size_t slice_length;
 
+
 	h_this = duk__get_bufobj_this(thr);
 	if (h_this == NULL) {
 		/* XXX: happens e.g. when evaluating: String(Buffer.prototype). */
@@ -1197,17 +1235,10 @@ DUK_INTERNAL duk_ret_t duk_bi_nodejs_buffer_tostring(duk_hthread *thr) {
 	}
 	DUK_HBUFOBJ_ASSERT_VALID(h_this);
 
-	/* TODO: support other encodings.  currently only 'utf8' is supported. */
-	if (duk_is_undefined(thr, 0)) {
-		encoding = "utf8";
-	} else if (duk_is_string(thr, 0)) {
-		encoding = duk_to_string(thr, 0);
-		DUK_ASSERT(duk_is_string(thr, 0));
-		if(DUK_STRCMP(encoding, "utf8") != 0) {
+	encoding = duk_opt_string(thr, 0, "utf8");
+	encoding_type = duk__parse_string_encoding(encoding);
+	if(encoding_type == DUK_BUF_ENC_UNKNOWN) {
 			DUK_DCERROR_TYPE_INVALID_ARGS(thr);
-		}
-	} else {
-		DUK_DCERROR_TYPE_INVALID_ARGS(thr);
 	}
 
 	duk__clamp_startend_nonegidx_noshift(thr,
@@ -1244,6 +1275,7 @@ DUK_INTERNAL duk_ret_t duk_bi_nodejs_buffer_tostring(duk_hthread *thr) {
 	 */
 	duk_replace(thr, 0);
 	duk_set_top(thr, 1);
+	/* TODO: support other encodings.  currently only 'utf8' is supported. */
 	return duk_textdecoder_decode_utf8_nodejs(thr);
 }
 #endif  /* DUK_USE_BUFFEROBJECT_SUPPORT */
@@ -2072,11 +2104,9 @@ DUK_INTERNAL duk_ret_t duk_bi_buffer_slice_shared(duk_hthread *thr) {
 DUK_INTERNAL duk_ret_t duk_bi_nodejs_buffer_is_encoding(duk_hthread *thr) {
 	const char *encoding;
 
-	/* only accept lowercase 'utf8' now. */
-
 	encoding = duk_to_string(thr, 0);
 	DUK_ASSERT(duk_is_string(thr, 0));  /* guaranteed by duk_to_string() */
-	duk_push_boolean(thr, DUK_STRCMP(encoding, "utf8") == 0);
+	duk_push_boolean(thr, duk__parse_string_encoding(encoding) != DUK_BUF_ENC_UNKNOWN);
 	return 1;
 }
 #endif  /* DUK_USE_BUFFEROBJECT_SUPPORT */
diff --git a/tests/ecmascript/test-bi-nodejs-buffer-isencoding.js b/tests/ecmascript/test-bi-nodejs-buffer-isencoding.js
index 52eaf05fe4..e5eb5ca090 100644
--- a/tests/ecmascript/test-bi-nodejs-buffer-isencoding.js
+++ b/tests/ecmascript/test-bi-nodejs-buffer-isencoding.js
@@ -15,16 +15,17 @@ isEncoding test
 empty: false
 undefined: false
 utf8: true
-utf-8: false
-UTF8: false
-UTF-8: false
-Utf8: false
-Utf-8: false
-uTf8: false
-uTf-8: false
+utf-8: true
+UTF8: true
+UTF-8: true
+Utf8: true
+Utf-8: true
+uTf8: true
+uTf-8: true
 ascii: false
 ASCII: false
 AsCiI: false
+binary: false
 dummy: false
 undefined: false
 null: false
@@ -41,7 +42,6 @@ function isEncodingTest() {
 
     [
         // Any capitalization (and dash / no dash) is accepted by Node.js.
-        // Duktape accepts 'utf8' only for now.
         'utf8', 'utf-8', 'UTF8', 'UTF-8',
         'Utf8', 'Utf-8', 'uTf8', 'uTf-8',
 
@@ -49,6 +49,7 @@ function isEncodingTest() {
         'ascii',
         'ASCII',
         'AsCiI',
+        'binary',
         'dummy',
 
         // Non-string values
diff --git a/tests/ecmascript/test-bi-nodejs-buffer-tostring.js b/tests/ecmascript/test-bi-nodejs-buffer-tostring.js
index 424dc147da..38a6f0508b 100644
--- a/tests/ecmascript/test-bi-nodejs-buffer-tostring.js
+++ b/tests/ecmascript/test-bi-nodejs-buffer-tostring.js
@@ -23,6 +23,12 @@ false
 true
 "ABC"
 "ABC"
+"ABC"
+"ABC"
+"ABC"
+"ABC"
+"TypeError"
+"TypeError"
 "DEFG"
 "EFG"
 "E"
@@ -245,8 +251,18 @@ function nodejsBufferToStringTest() {
     b = new Buffer('ABC');
     safePrintString(b.toString());
     safePrintString(b.toString(undefined));
-    // null is not a valid encoding
-    try { safePrintString(b.toString(null)); } catch(e) { }
+
+    // supported encodings
+    safePrintString(b.toString("utf8"));
+    safePrintString(b.toString("utf-8"));
+
+    // encodings are case insensitive
+    safePrintString(b.toString("UtF8"));
+    safePrintString(b.toString("uTf-8"));
+
+    // invalid encodings should throw a TypeError
+    try { safePrintString(b.toString(null)); } catch(e) { safePrintString(e.name); }
+    try { safePrintString(b.toString("wtf")); } catch(e) { safePrintString(e.name); }
 
     // If the buffer is a slice of an underlying buffer, only that slice
     // is string converted.  Offsets are relative to the slice.