summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorEvan Welsh <contact@evanwelsh.com>2021-04-22 00:26:51 -0700
committerPhilip Chimento <philip.chimento@gmail.com>2021-04-25 21:57:40 -0700
commit48d325a0fad60467b5e2cf9e49b9ffd19cd356a8 (patch)
tree8b64c2ab02c05f195a658bb4542eae49fc43a34b
parent260c74786a39f9194b0c6c72d6cbf0571d0d0f86 (diff)
downloadgjs-ewlsh/text-encoding.tar.gz
modules: Implement WHATWG Encoding specificationewlsh/text-encoding
-rw-r--r--.eslintignore2
-rw-r--r--.eslintrc.yml2
-rw-r--r--gjs/byteArray.cpp8
-rw-r--r--gjs/jsapi-util-string.cpp35
-rw-r--r--gjs/jsapi-util.h7
-rw-r--r--gjs/text-encoding.cpp352
-rw-r--r--gjs/text-encoding.h11
-rw-r--r--installed-tests/js/meson.build1
-rw-r--r--installed-tests/js/testEncoding.js1051
-rw-r--r--js.gresource.xml2
-rw-r--r--modules/core/_encodings.js280
-rw-r--r--modules/core/_text.js127
-rw-r--r--modules/core/overrides/GLib.js9
-rw-r--r--modules/script/_bootstrap/default.js13
-rw-r--r--modules/script/byteArray.js4
15 files changed, 1826 insertions, 78 deletions
diff --git a/.eslintignore b/.eslintignore
index 9ee950d3..8f8f93ff 100644
--- a/.eslintignore
+++ b/.eslintignore
@@ -3,4 +3,6 @@
installed-tests/js/jasmine.js
installed-tests/js/modules/badOverrides/WarnLib.js
+# Until ESLint merges class fields.
+modules/core/_text.js
modules/script/jsUnit.js
diff --git a/.eslintrc.yml b/.eslintrc.yml
index 733db371..6887f1cb 100644
--- a/.eslintrc.yml
+++ b/.eslintrc.yml
@@ -253,5 +253,7 @@ globals:
print: readonly
printerr: readonly
window: readonly
+ TextEncoder: readonly
+ TextDecoder: readonly
parserOptions:
ecmaVersion: 2020
diff --git a/gjs/byteArray.cpp b/gjs/byteArray.cpp
index ecf97776..25a1fc74 100644
--- a/gjs/byteArray.cpp
+++ b/gjs/byteArray.cpp
@@ -53,7 +53,13 @@ static bool instance_to_string_func(JSContext* cx, unsigned argc,
if (!gjs_parse_call_args(cx, "toString", args, "|s", "encoding", &encoding))
return false;
- return to_string_impl(cx, this_obj, encoding.get(), args.rval());
+ if (!JS_IsUint8Array(this_obj)) {
+ gjs_throw(cx, "Argument to ByteArray.toString() must be a Uint8Array");
+ return false;
+ }
+
+ return gjs_decode_from_uint8array(cx, this_obj, encoding.get(), true,
+ args.rval());
}
GJS_JSAPI_RETURN_CONVENTION
diff --git a/gjs/jsapi-util-string.cpp b/gjs/jsapi-util-string.cpp
index 5fc1164a..9d4f7cfb 100644
--- a/gjs/jsapi-util-string.cpp
+++ b/gjs/jsapi-util-string.cpp
@@ -98,6 +98,41 @@ JS::UniqueChars gjs_string_to_utf8(JSContext* cx, const JS::Value value) {
return JS_EncodeStringToUTF8(cx, str);
}
+bool gjs_lossy_string_from_utf8(JSContext* cx, const char* utf8_string,
+ JS::MutableHandleValue value_p) {
+ JS::ConstUTF8CharsZ chars(utf8_string, strlen(utf8_string));
+ size_t len;
+ JS::UniqueTwoByteChars twobyte_chars(
+ JS::LossyUTF8CharsToNewTwoByteCharsZ(cx, chars, &len, js::MallocArena)
+ .get());
+ if (!twobyte_chars)
+ return false;
+
+ JS::RootedString str(cx, JS_NewUCStringCopyN(cx, twobyte_chars.get(), len));
+ if (str)
+ value_p.setString(str);
+
+ return str != nullptr;
+}
+bool gjs_lossy_string_from_utf8_n(JSContext* cx, const char* utf8_string,
+ size_t len, JS::MutableHandleValue value_p) {
+ JS::UTF8Chars chars(utf8_string, len);
+ size_t outlen;
+ JS::UniqueTwoByteChars twobyte_chars(
+ JS::LossyUTF8CharsToNewTwoByteCharsZ(cx, chars, &outlen,
+ js::MallocArena)
+ .get());
+ if (!twobyte_chars)
+ return false;
+
+ JS::RootedString str(cx,
+ JS_NewUCStringCopyN(cx, twobyte_chars.get(), outlen));
+ if (str)
+ value_p.setString(str);
+
+ return str != nullptr;
+}
+
bool
gjs_string_from_utf8(JSContext *context,
const char *utf8_string,
diff --git a/gjs/jsapi-util.h b/gjs/jsapi-util.h
index 4e399f25..697fc76b 100644
--- a/gjs/jsapi-util.h
+++ b/gjs/jsapi-util.h
@@ -428,6 +428,13 @@ void gjs_warning_reporter(JSContext*, JSErrorReport* report);
GJS_JSAPI_RETURN_CONVENTION
JS::UniqueChars gjs_string_to_utf8(JSContext* cx, const JS::Value string_val);
+[[nodiscard]] bool gjs_lossy_string_from_utf8(JSContext* context,
+ const char* utf8_string,
+ JS::MutableHandleValue value_p);
+[[nodiscard]] bool gjs_lossy_string_from_utf8_n(JSContext* context,
+ const char* utf8_string,
+ size_t len,
+ JS::MutableHandleValue value_p);
GJS_JSAPI_RETURN_CONVENTION
bool gjs_string_from_utf8(JSContext *context,
const char *utf8_string,
diff --git a/gjs/text-encoding.cpp b/gjs/text-encoding.cpp
index 395f0812..7fa74c84 100644
--- a/gjs/text-encoding.cpp
+++ b/gjs/text-encoding.cpp
@@ -40,27 +40,66 @@ static void gfree_arraybuffer_contents(void* contents, void*) {
g_free(contents);
}
+static const char* FALLBACK = "\ufffd";
+static size_t FALLBACK_LEN = strlen(FALLBACK);
+
+[[nodiscard]] static bool gjs_convert_invalid_input(JSContext* cx,
+ uint8_t* data, size_t len,
+ const char* to_codeset,
+ const char* from_codeset,
+ char** converted);
+
GJS_JSAPI_RETURN_CONVENTION
-bool to_string_impl_slow(JSContext* cx, uint8_t* data, uint32_t len,
- const char* encoding, JS::MutableHandleValue rval) {
- size_t bytes_written;
+bool gjs_decode_from_uint8array_slow(JSContext* cx, uint8_t* data, uint32_t len,
+ const char* encoding, bool fatal,
+ JS::MutableHandleValue rval) {
+ size_t bytes_written, bytes_read;
GError* error = nullptr;
- GjsAutoChar u16_str =
- g_convert(reinterpret_cast<char*>(data), len,
- // Make sure the bytes of the UTF-16 string are laid out in memory
- // such that we can simply reinterpret_cast<char16_t> them.
+ GjsAutoChar u16_str;
+
+// Make sure the bytes of the UTF-16 string are laid out in memory
+// such that we can simply reinterpret_cast<char16_t> them.
#if G_BYTE_ORDER == G_LITTLE_ENDIAN
- "UTF-16LE",
+ const char* to_codeset = "UTF-16LE";
#else
- "UTF-16BE",
+ const char* to_codeset = "UTF-16BE";
#endif
- encoding, /* bytes read = */ nullptr, &bytes_written, &error);
- if (!u16_str)
- return gjs_throw_gerror_message(cx, error); // frees GError
- // bytes_written should be bytes in a UTF-16 string so should be a multiple
- // of 2
- g_assert((bytes_written % 2) == 0);
+ if (fatal) {
+ u16_str =
+ g_convert(reinterpret_cast<char*>(data), len, to_codeset, encoding,
+ /* bytes read = */ nullptr, &bytes_written, &error);
+
+ // bytes_written should be bytes in a UTF-16 string so should be a
+ // multiple of 2
+ g_assert((bytes_written % 2) == 0);
+ } else {
+ // This will fail if the input contains invalid codepoints in the
+ // from_codeset. It inserts a replacement character if the input is
+ // valid but can't be represented in the output.
+ u16_str = g_convert_with_fallback(reinterpret_cast<char*>(data), len,
+ to_codeset, encoding, FALLBACK,
+ &bytes_read, &bytes_written, &error);
+ if (u16_str)
+ g_assert((bytes_written % 2) == 0);
+
+ // If the input is invalid we need to do the conversion ourselves.
+ if (error && g_error_matches(error, G_CONVERT_ERROR,
+ G_CONVERT_ERROR_ILLEGAL_SEQUENCE)) {
+ // Clear the illegal sequence error.
+ g_clear_error(&error);
+
+ char* str;
+ if (!gjs_convert_invalid_input(cx, data, len, to_codeset, encoding,
+ &str))
+ return false;
+
+ u16_str = str;
+ }
+ }
+
+ if (error)
+ return gjs_throw_gerror_message(cx, error);
// g_convert 0-terminates the string, although the 0 isn't included in
// bytes_written
@@ -73,26 +112,34 @@ bool to_string_impl_slow(JSContext* cx, uint8_t* data, uint32_t len,
return true;
}
-// implement toString() with an optional encoding arg
+inline bool is_utf8_label(const char* encoding) {
+ if (encoding) {
+ // Maybe we should be smarter about utf8 synonyms here. Doesn't matter
+ // much though. encoding_is_utf8 is just an optimization anyway.
+ if (strcasecmp(encoding, "utf-8") == 0) {
+ return true;
+ } else {
+ GjsAutoChar stripped(g_strdup(encoding));
+ return (strcasecmp(g_strstrip(stripped), "utf-8") == 0);
+ }
+ } else {
+ return true;
+ }
+}
+
GJS_JSAPI_RETURN_CONVENTION
-bool to_string_impl(JSContext* cx, JS::HandleObject byte_array,
- const char* encoding, JS::MutableHandleValue rval) {
+bool gjs_decode_from_uint8array(JSContext* cx, JS::HandleObject byte_array,
+ const char* encoding, bool fatal,
+ JS::MutableHandleValue rval) {
if (!JS_IsUint8Array(byte_array)) {
- gjs_throw(cx, "Argument to ByteArray.toString() must be a Uint8Array");
+ gjs_throw(
+ cx, "Argument to gjs_decode_from_uint8array must be a Uint8Array");
return false;
}
- bool encoding_is_utf8;
+ bool encoding_is_utf8 = is_utf8_label(encoding);
uint8_t* data;
- if (encoding) {
- // Maybe we should be smarter about utf8 synonyms here. Doesn't matter
- // much though. encoding_is_utf8 is just an optimization anyway.
- encoding_is_utf8 = (strcmp(encoding, "UTF-8") == 0);
- } else {
- encoding_is_utf8 = true;
- }
-
uint32_t len;
bool is_shared_memory;
js::GetUint8ArrayLengthAndData(byte_array, &len, &is_shared_memory, &data);
@@ -103,7 +150,8 @@ bool to_string_impl(JSContext* cx, JS::HandleObject byte_array,
}
if (!encoding_is_utf8)
- return to_string_impl_slow(cx, data, len, encoding, rval);
+ return gjs_decode_from_uint8array_slow(cx, data, len, encoding, fatal,
+ rval);
// optimization, avoids iconv overhead and runs libmozjs hardwired
// utf8-to-utf16
@@ -111,12 +159,24 @@ bool to_string_impl(JSContext* cx, JS::HandleObject byte_array,
// If there are any 0 bytes, including the terminating byte, stop at the
// first one
if (data[len - 1] == 0 || memchr(data, 0, len)) {
- if (!gjs_string_from_utf8(cx, reinterpret_cast<char*>(data), rval))
- return false;
+ if (fatal) {
+ if (!gjs_string_from_utf8(cx, reinterpret_cast<char*>(data), rval))
+ return false;
+ } else {
+ if (!gjs_lossy_string_from_utf8(cx, reinterpret_cast<char*>(data),
+ rval))
+ return false;
+ }
} else {
- if (!gjs_string_from_utf8_n(cx, reinterpret_cast<char*>(data), len,
- rval))
- return false;
+ if (fatal) {
+ if (!gjs_string_from_utf8_n(cx, reinterpret_cast<char*>(data), len,
+ rval))
+ return false;
+ } else {
+ if (!gjs_lossy_string_from_utf8_n(cx, reinterpret_cast<char*>(data),
+ len, rval))
+ return false;
+ }
}
uint8_t* current_data;
@@ -139,50 +199,40 @@ bool to_string_impl(JSContext* cx, JS::HandleObject byte_array,
return true;
// This was the UTF-8 optimized path, so we explicitly pass the encoding
- return to_string_impl_slow(cx, current_data, current_len, "UTF-8", rval);
+ return gjs_decode_from_uint8array_slow(cx, current_data, current_len,
+ "UTF-8", fatal, rval);
}
GJS_JSAPI_RETURN_CONVENTION
-static bool to_string_func(JSContext* cx, unsigned argc, JS::Value* vp) {
+static bool gjs_decode(JSContext* cx, unsigned argc, JS::Value* vp) {
JS::CallArgs args = JS::CallArgsFromVp(argc, vp);
JS::RootedObject byte_array(cx);
+ bool fatal = false;
JS::UniqueChars encoding;
- if (!gjs_parse_call_args(cx, "toString", args, "o|s", "byteArray",
- &byte_array, "encoding", &encoding))
+ if (!gjs_parse_call_args(cx, "toString", args, "o|bs", "byteArray",
+ &byte_array, "fatal", &fatal, "encoding",
+ &encoding))
return false;
- return to_string_impl(cx, byte_array, encoding.get(), args.rval());
+ return gjs_decode_from_uint8array(cx, byte_array, encoding.get(), fatal,
+ args.rval());
}
// fromString() function implementation
-GJS_JSAPI_RETURN_CONVENTION
-static bool from_string_func(JSContext* cx, unsigned argc, JS::Value* vp) {
- JS::CallArgs args = JS::CallArgsFromVp(argc, vp);
-
- JS::UniqueChars utf8;
- JS::UniqueChars encoding;
- if (!gjs_parse_call_args(cx, "fromString", args, "s|s", "string", &utf8,
- "encoding", &encoding))
- return false;
-
- bool encoding_is_utf8;
- if (argc > 1) {
- // Maybe we should be smarter about utf8 synonyms here. Doesn't matter
- // much though. encoding_is_utf8 is just an optimization anyway.
- encoding_is_utf8 = (strcmp(encoding.get(), "UTF-8") == 0);
- } else {
- encoding_is_utf8 = true;
- }
-
+[[nodiscard]] bool gjs_encode_to_uint8array(JSContext* cx, JS::HandleString str,
+ const char* encoding,
+ JS::MutableHandleValue rval) {
JS::RootedObject array_buffer(cx);
+
+ bool encoding_is_utf8 = is_utf8_label(encoding);
if (encoding_is_utf8) {
// optimization? avoids iconv overhead and runs libmozjs hardwired
// utf16-to-utf8.
+ JS::UniqueChars utf8 = JS_EncodeStringToUTF8(cx, str);
size_t len = strlen(utf8.get());
array_buffer = JS::NewArrayBufferWithContents(cx, len, utf8.release());
} else {
- JSString* str = args[0].toString(); // Rooted by args
GError* error = nullptr;
char* encoded = nullptr;
size_t bytes_written;
@@ -200,7 +250,7 @@ static bool from_string_func(JSContext* cx, unsigned argc, JS::Value* vp) {
return false;
encoded = g_convert(reinterpret_cast<const char*>(chars), len,
- /* to_encoding = */ encoding.get(),
+ /* to_encoding = */ encoding,
/* from_encoding = */ "LATIN1",
/* bytes_read = */ nullptr, &bytes_written,
&error);
@@ -212,7 +262,7 @@ static bool from_string_func(JSContext* cx, unsigned argc, JS::Value* vp) {
encoded = g_convert(
reinterpret_cast<const char*>(chars), len * 2,
- /* to_encoding = */ encoding.get(),
+ /* to_encoding = */ encoding,
/* from_encoding = */ "UTF-16",
/* bytes_read = */ nullptr, &bytes_written, &error);
}
@@ -231,14 +281,188 @@ static bool from_string_func(JSContext* cx, unsigned argc, JS::Value* vp) {
JS::RootedObject obj(cx,
JS_NewUint8ArrayWithBuffer(cx, array_buffer, 0, -1));
- args.rval().setObject(*obj);
+ rval.setObject(*obj);
+ return true;
+}
+
+static bool gjs_convert_invalid_input(JSContext* cx, uint8_t* data, size_t len,
+ const char* to_codeset,
+ const char* from_codeset,
+ char** converted) {
+ GError* error = nullptr;
+ GjsAutoUnref<GCharsetConverter> converter(
+ g_charset_converter_new(to_codeset, from_codeset, &error));
+
+ // This should only throw if an encoding is not available.
+ if (error)
+ return gjs_throw_gerror_message(cx, error);
+
+ size_t bytes_written, bytes_read;
+ char buffer[1024];
+
+ // Cast data to convert input type, calculate length.
+ const char* input = reinterpret_cast<const char*>(data);
+ size_t input_len = len * sizeof(char);
+
+ // Use a vector for the output for easy resizing.
+ std::vector<char> output;
+ size_t size = 0;
+
+ do {
+ g_converter_convert(G_CONVERTER(converter.get()), input, input_len,
+ buffer, sizeof(buffer), G_CONVERTER_INPUT_AT_END,
+ &bytes_read, &bytes_written, &error);
+
+ input += bytes_read;
+ input_len -= bytes_read;
+
+ if (bytes_written > 0) {
+ output.resize(size + bytes_written);
+ std::copy(buffer, buffer + bytes_written, output.data() + size);
+ size += bytes_written;
+ }
+
+ if (error) {
+ if (g_error_matches(error, G_IO_ERROR, G_IO_ERROR_INVALID_DATA)) {
+ // Skip the invalid character
+ input += sizeof(char);
+ input_len -= sizeof(char);
+
+ // Append fallback character to the output
+ output.resize(size + FALLBACK_LEN);
+ std::copy(FALLBACK, FALLBACK + FALLBACK_LEN,
+ output.data() + size);
+ size += FALLBACK_LEN;
+
+ g_clear_error(&error);
+ } else if (bytes_written > 0 &&
+ g_error_matches(error, G_IO_ERROR,
+ G_IO_ERROR_PARTIAL_INPUT)) {
+ // Only clear a partial input error if there are no bytes
+ // written. This occurs on the second loop, otherwise we could
+ // error mid-input.
+ g_clear_error(&error);
+ } else if (g_error_matches(error, G_IO_ERROR,
+ G_IO_ERROR_NO_SPACE)) {
+ // If the buffer was full, clear the error and continue
+ // converting.
+ g_clear_error(&error);
+ }
+ }
+ } while (input_len && !error);
+
+ if (!error) {
+ char* arr = reinterpret_cast<char*>(g_malloc0(output.size()));
+
+ std::copy(output.begin(), output.end(), arr);
+
+ *converted = arr;
+
+ // bytes_written should be bytes in a UTF-16 string so should be a
+ // multiple of 2
+ g_assert((bytes_written % 2) == 0);
+
+ return true;
+ }
+
+ return gjs_throw_gerror_message(cx, error);
+}
+
+GJS_JSAPI_RETURN_CONVENTION
+bool gjs_encode_into_uint8array(JSContext* cx, JS::HandleString str,
+ JS::HandleObject uint8array,
+ JS::MutableHandleValue rval) {
+ if (!JS_IsUint8Array(uint8array)) {
+ gjs_throw(
+ cx, "Argument to gjs_encode_into_uint8array must be a Uint8Array");
+ return false;
+ }
+
+ auto len = JS_GetTypedArrayByteLength(uint8array);
+ bool shared;
+
+ // TODO(ewlsh): Garbage collection cannot occur from here...
+ auto data =
+ JS_GetUint8ArrayData(uint8array, &shared, JS::AutoCheckCannotGC(cx));
+
+ if (shared) {
+ gjs_throw(cx, "Cannot encode data into shared memory.");
+ return false;
+ }
+
+ auto maybe = JS_EncodeStringToUTF8BufferPartial(
+ cx, str, mozilla::AsWritableChars(mozilla::Span(data, len)));
+ // ... to here
+
+ if (!maybe) {
+ JS_ReportOutOfMemory(cx);
+ return false;
+ }
+
+ size_t read, written;
+
+ mozilla::Tie(read, written) = *maybe;
+
+ g_assert(written <= len);
+
+ JS::RootedObject result(cx, JS_NewPlainObject(cx));
+ JS::RootedValue readv(cx, JS::NumberValue(read)),
+ writtenv(cx, JS::NumberValue(written));
+
+ if (!JS_SetProperty(cx, result, "read", readv) ||
+ !JS_SetProperty(cx, result, "written", writtenv)) {
+ return false;
+ }
+
+ rval.setObject(*result);
return true;
}
+[[nodiscard]] static bool gjs_encode(JSContext* cx, unsigned argc,
+ JS::Value* vp) {
+ JS::CallArgs args = JS::CallArgsFromVp(argc, vp);
+
+ JS::UniqueChars encoding;
+ JS::UniqueChars utf8;
+ if (!gjs_parse_call_args(cx, "encode", args, "s|s", "string", &utf8,
+ "encoding", &encoding))
+ return false;
+
+ if (!args[0].isString()) {
+ gjs_throw(cx, "First argument to encode() must be a string.");
+ return false;
+ }
+
+ JS::RootedString str(cx, args[0].toString());
+
+ return gjs_encode_to_uint8array(cx, str, encoding.get(), args.rval());
+}
+
+[[nodiscard]] static bool gjs_encode_into(JSContext* cx, unsigned argc,
+ JS::Value* vp) {
+ JS::CallArgs args = JS::CallArgsFromVp(argc, vp);
+
+ JS::UniqueChars utf8;
+ JS::RootedObject uint8array(cx);
+ if (!gjs_parse_call_args(cx, "encodeInto", args, "so", "string", &utf8,
+ "uint8array", &uint8array))
+ return false;
+
+ if (!args[0].isString()) {
+ gjs_throw(cx, "First argument to encode() must be a string.");
+ return false;
+ }
+
+ JS::RootedString str(cx, args[0].toString());
+
+ return gjs_encode_into_uint8array(cx, str, uint8array, args.rval());
+}
+
// clang-format off
static JSFunctionSpec gjs_text_encoding_module_funcs[] = {
- JS_FN("fromString", from_string_func, 2, 0),
- JS_FN("toString", to_string_func, 2, 0),
+ JS_FN("decode", gjs_decode, 3, 0),
+ JS_FN("encodeInto", gjs_encode_into, 2, 0),
+ JS_FN("encode", gjs_encode, 2, 0),
JS_FS_END};
// clang-format on
diff --git a/gjs/text-encoding.h b/gjs/text-encoding.h
index 7524a723..096df8ac 100644
--- a/gjs/text-encoding.h
+++ b/gjs/text-encoding.h
@@ -15,9 +15,14 @@
#include "gjs/macros.h"
-GJS_JSAPI_RETURN_CONVENTION
-bool to_string_impl(JSContext* cx, JS::HandleObject uint8array,
- const char* encoding, JS::MutableHandleValue rval);
+[[nodiscard]] bool gjs_decode_from_uint8array(JSContext* cx,
+ JS::HandleObject uint8array,
+ const char* encoding, bool fatal,
+ JS::MutableHandleValue rval);
+
+[[nodiscard]] bool gjs_encode_to_uint8array(JSContext* cx, JS::HandleString str,
+ const char* encoding,
+ JS::MutableHandleValue rval);
GJS_JSAPI_RETURN_CONVENTION
bool gjs_define_text_encoding_stuff(JSContext* cx,
diff --git a/installed-tests/js/meson.build b/installed-tests/js/meson.build
index 97f9cd07..85371e3a 100644
--- a/installed-tests/js/meson.build
+++ b/installed-tests/js/meson.build
@@ -94,6 +94,7 @@ subdir('libgjstesttools')
jasmine_tests = [
'self',
'ByteArray',
+ 'Encoding',
'Exceptions',
'Format',
'Fundamental',
diff --git a/installed-tests/js/testEncoding.js b/installed-tests/js/testEncoding.js
new file mode 100644
index 00000000..3bd510f4
--- /dev/null
+++ b/installed-tests/js/testEncoding.js
@@ -0,0 +1,1051 @@
+// SPDX-License-Identifier: MIT
+// SPDX-FileCopyrightText: Copyright 2018-2020 the Deno authors. All rights reserved.
+
+// Modified from https://github.com/denoland/deno/blob/923214c53725651792f6d55c5401bf6b475622ea/op_crates/web/08_text_encoding.js
+// Data originally from https://encoding.spec.whatwg.org/encodings.json
+
+describe('Text Encoding', function () {
+ it('textDecoder', function () {
+ const fixture = new Uint8Array([
+ 0xf0, 0x9d, 0x93, 0xbd, 0xf0, 0x9d, 0x93, 0xae, 0xf0, 0x9d, 0x94, 0x81, 0xf0, 0x9d, 0x93, 0xbd,
+ ]);
+ const decoder = new TextDecoder();
+ expect(decoder.decode(fixture)).toBe('𝓽𝓮𝔁𝓽');
+ });
+
+ it('textDecoderIgnoreBOM', function () {
+ const fixture = new Uint8Array([
+ 0xef, 0xbb, 0xbf, 0xf0, 0x9d, 0x93, 0xbd, 0xf0, 0x9d, 0x93, 0xae, 0xf0, 0x9d, 0x94, 0x81, 0xf0, 0x9d, 0x93, 0xbd,
+ ]);
+ const decoder = new TextDecoder('utf-8', {ignoreBOM: true});
+ expect(decoder.decode(fixture)).toBe('𝓽𝓮𝔁𝓽');
+ });
+
+ it('textDecoderNotBOM', function () {
+ const fixture = new Uint8Array([
+ 0xef, 0xbb, 0x89, 0xf0, 0x9d, 0x93, 0xbd, 0xf0, 0x9d, 0x93, 0xae, 0xf0, 0x9d, 0x94, 0x81, 0xf0, 0x9d, 0x93, 0xbd,
+ ]);
+ const decoder = new TextDecoder('utf-8', {ignoreBOM: true});
+ expect(decoder.decode(fixture)).toBe('ﻉ𝓽𝓮𝔁𝓽');
+ });
+
+ it('textDecoderASCII', function () {
+ const fixture = new Uint8Array([0x89, 0x95, 0x9f, 0xbf]);
+ const decoder = new TextDecoder('ascii');
+ expect(decoder.decode(fixture)).toBe('‰•Ÿ¿');
+ });
+
+ it('textDecoderErrorEncoding', function () {
+ expect(() => new TextDecoder('Foo')).toThrowError("Invalid encoding label: 'Foo'");
+ });
+
+ it('textDecoderHandlesUndefined', function () {
+ const fixture = undefined;
+ const decoder = new TextDecoder();
+ expect(decoder.decode(fixture)).toBe('');
+ });
+
+ it('textDecoderThrowsOnEmpty', function () {
+ const fixture = '';
+ const decoder = new TextDecoder();
+
+ expect(() => decoder.decode(fixture))
+ .toThrowError('Provided input cannot be converted to ArrayBufferView or ArrayBuffer');
+ });
+
+ it('textDecoderThrowsOnNull', function () {
+ const fixture = null;
+ const decoder = new TextDecoder();
+
+ expect(() => decoder.decode(fixture))
+ .toThrowError('Provided input cannot be converted to ArrayBufferView or ArrayBuffer');
+ });
+
+ it('textEncoder', function () {
+ const fixture = '𝓽𝓮𝔁𝓽';
+ const encoder = new TextEncoder();
+
+ expect(Array.from(encoder.encode(fixture))).toEqual([
+ 0xf0, 0x9d, 0x93, 0xbd, 0xf0, 0x9d, 0x93, 0xae, 0xf0, 0x9d, 0x94, 0x81, 0xf0, 0x9d, 0x93, 0xbd,
+ ]);
+ });
+
+ it('textEncodeInto', function () {
+ const fixture = 'text';
+ const encoder = new TextEncoder();
+ const bytes = new Uint8Array(5);
+ const result = encoder.encodeInto(fixture, bytes);
+ expect(result.read).toBe(4);
+ expect(result.written).toBe(4);
+
+ expect(Array.from(bytes)).toEqual([0x74, 0x65, 0x78, 0x74, 0x00]);
+ });
+
+ it('textEncodeInto2', function () {
+ const fixture = '𝓽𝓮𝔁𝓽';
+ const encoder = new TextEncoder();
+ const bytes = new Uint8Array(17);
+ const result = encoder.encodeInto(fixture, bytes);
+ expect(result.read).toBe(8);
+ expect(result.written).toBe(16);
+
+ expect(Array.from(bytes)).toEqual([
+ 0xf0, 0x9d, 0x93, 0xbd, 0xf0, 0x9d, 0x93, 0xae, 0xf0, 0x9d, 0x94, 0x81, 0xf0, 0x9d, 0x93, 0xbd, 0x00,
+ ]);
+ });
+
+ it('textEncodeInto3', function () {
+ const fixture = '𝓽𝓮𝔁𝓽';
+ const encoder = new TextEncoder();
+ const bytes = new Uint8Array(5);
+ const result = encoder.encodeInto(fixture, bytes);
+ expect(result.read).toBe(2);
+ expect(result.written).toBe(4);
+
+ expect(Array.from(bytes)).toEqual([0xf0, 0x9d, 0x93, 0xbd, 0x00]);
+ });
+
+ xit('textDecoderSharedUint8Array', function () {
+ const ab = new SharedArrayBuffer(6);
+ const dataView = new DataView(ab);
+ const charCodeA = 'A'.charCodeAt(0);
+ for (let i = 0; i < ab.byteLength; i++)
+ dataView.setUint8(i, charCodeA + i);
+
+ const ui8 = new Uint8Array(ab);
+ const decoder = new TextDecoder();
+ const actual = decoder.decode(ui8);
+ expect(actual).toBe('ABCDEF');
+ });
+
+ xit('textDecoderSharedInt32Array', function () {
+ const ab = new SharedArrayBuffer(8);
+ const dataView = new DataView(ab);
+ const charCodeA = 'A'.charCodeAt(0);
+ for (let i = 0; i < ab.byteLength; i++)
+ dataView.setUint8(i, charCodeA + i);
+
+ const i32 = new Int32Array(ab);
+ const decoder = new TextDecoder();
+ const actual = decoder.decode(i32);
+ expect(actual).toBe('ABCDEFGH');
+ });
+
+ it('toStringShouldBeWebCompatibility', function () {
+ const encoder = new TextEncoder();
+
+ expect(encoder.toString()).toBe('[object TextEncoder]');
+
+ const decoder = new TextDecoder();
+ expect(decoder.toString()).toBe('[object TextDecoder]');
+ });
+
+ it('singleByteEncodings', function () {
+ // Straight from https://encoding.spec.whatwg.org/encodings.json
+ const encodingsTable = [
+ {
+ encodings: [
+ {
+ labels: [
+ 'unicode-1-1-utf-8',
+ 'unicode11utf8',
+ 'unicode20utf8',
+ 'utf-8',
+ 'utf8',
+ 'x-unicode20utf8',
+ ],
+ name: 'UTF-8',
+ },
+ ],
+ heading: 'The Encoding',
+ },
+ {
+ encodings: [
+ {
+ labels: ['866', 'cp866', 'csibm866', 'ibm866'],
+ name: 'IBM866',
+ },
+ {
+ labels: [
+ 'csisolatin2',
+ 'iso-8859-2',
+ 'iso-ir-101',
+ 'iso8859-2',
+ 'iso88592',
+ 'iso_8859-2',
+ 'iso_8859-2:1987',
+ 'l2',
+ 'latin2',
+ ],
+ name: 'ISO-8859-2',
+ },
+ {
+ labels: [
+ 'csisolatin3',
+ 'iso-8859-3',
+ 'iso-ir-109',
+ 'iso8859-3',
+ 'iso88593',
+ 'iso_8859-3',
+ 'iso_8859-3:1988',
+ 'l3',
+ 'latin3',
+ ],
+ name: 'ISO-8859-3',
+ },
+ {
+ labels: [
+ 'csisolatin4',
+ 'iso-8859-4',
+ 'iso-ir-110',
+ 'iso8859-4',
+ 'iso88594',
+ 'iso_8859-4',
+ 'iso_8859-4:1988',
+ 'l4',
+ 'latin4',
+ ],
+ name: 'ISO-8859-4',
+ },
+ {
+ labels: [
+ 'csisolatincyrillic',
+ 'cyrillic',
+ 'iso-8859-5',
+ 'iso-ir-144',
+ 'iso8859-5',
+ 'iso88595',
+ 'iso_8859-5',
+ 'iso_8859-5:1988',
+ ],
+ name: 'ISO-8859-5',
+ },
+ {
+ labels: [
+ 'arabic',
+ 'asmo-708',
+ 'csiso88596e',
+ 'csiso88596i',
+ 'csisolatinarabic',
+ 'ecma-114',
+ 'iso-8859-6',
+ 'iso-8859-6-e',
+ 'iso-8859-6-i',
+ 'iso-ir-127',
+ 'iso8859-6',
+ 'iso88596',
+ 'iso_8859-6',
+ 'iso_8859-6:1987',
+ ],
+ name: 'ISO-8859-6',
+ },
+ {
+ labels: [
+ 'csisolatingreek',
+ 'ecma-118',
+ 'elot_928',
+ 'greek',
+ 'greek8',
+ 'iso-8859-7',
+ 'iso-ir-126',
+ 'iso8859-7',
+ 'iso88597',
+ 'iso_8859-7',
+ 'iso_8859-7:1987',
+ 'sun_eu_greek',
+ ],
+ name: 'ISO-8859-7',
+ },
+ {
+ labels: [
+ 'csiso88598e',
+ 'csisolatinhebrew',
+ 'hebrew',
+ 'iso-8859-8',
+ 'iso-8859-8-e',
+ 'iso-ir-138',
+ 'iso8859-8',
+ 'iso88598',
+ 'iso_8859-8',
+ 'iso_8859-8:1988',
+ 'visual',
+ ],
+ name: 'ISO-8859-8',
+ },
+ {
+ labels: ['csiso88598i', 'iso-8859-8-i', 'logical'],
+ name: 'ISO-8859-8-I',
+ },
+ {
+ labels: [
+ 'csisolatin6',
+ 'iso-8859-10',
+ 'iso-ir-157',
+ 'iso8859-10',
+ 'iso885910',
+ 'l6',
+ 'latin6',
+ ],
+ name: 'ISO-8859-10',
+ },
+ {
+ labels: ['iso-8859-13', 'iso8859-13', 'iso885913'],
+ name: 'ISO-8859-13',
+ },
+ {
+ labels: ['iso-8859-14', 'iso8859-14', 'iso885914'],
+ name: 'ISO-8859-14',
+ },
+ {
+ labels: [
+ 'csisolatin9',
+ 'iso-8859-15',
+ 'iso8859-15',
+ 'iso885915',
+ 'iso_8859-15',
+ 'l9',
+ ],
+ name: 'ISO-8859-15',
+ },
+ {
+ labels: ['iso-8859-16'],
+ name: 'ISO-8859-16',
+ },
+ {
+ labels: ['cskoi8r', 'koi', 'koi8', 'koi8-r', 'koi8_r'],
+ name: 'KOI8-R',
+ },
+ {
+ labels: ['koi8-ru', 'koi8-u'],
+ name: 'KOI8-U',
+ },
+ {
+ labels: ['csmacintosh', 'mac', 'macintosh', 'x-mac-roman'],
+ name: 'macintosh',
+ },
+ {
+ labels: [
+ 'dos-874',
+ 'iso-8859-11',
+ 'iso8859-11',
+ 'iso885911',
+ 'tis-620',
+ 'windows-874',
+ ],
+ name: 'windows-874',
+ },
+ {
+ labels: ['cp1250', 'windows-1250', 'x-cp1250'],
+ name: 'windows-1250',
+ },
+ {
+ labels: ['cp1251', 'windows-1251', 'x-cp1251'],
+ name: 'windows-1251',
+ },
+ {
+ labels: [
+ 'ansi_x3.4-1968',
+ 'ascii',
+ 'cp1252',
+ 'cp819',
+ 'csisolatin1',
+ 'ibm819',
+ 'iso-8859-1',
+ 'iso-ir-100',
+ 'iso8859-1',
+ 'iso88591',
+ 'iso_8859-1',
+ 'iso_8859-1:1987',
+ 'l1',
+ 'latin1',
+ 'us-ascii',
+ 'windows-1252',
+ 'x-cp1252',
+ ],
+ name: 'windows-1252',
+ },
+ {
+ labels: ['cp1253', 'windows-1253', 'x-cp1253'],
+ name: 'windows-1253',
+ },
+ {
+ labels: [
+ 'cp1254',
+ 'csisolatin5',
+ 'iso-8859-9',
+ 'iso-ir-148',
+ 'iso8859-9',
+ 'iso88599',
+ 'iso_8859-9',
+ 'iso_8859-9:1989',
+ 'l5',
+ 'latin5',
+ 'windows-1254',
+ 'x-cp1254',
+ ],
+ name: 'windows-1254',
+ },
+ {
+ labels: ['cp1255', 'windows-1255', 'x-cp1255'],
+ name: 'windows-1255',
+ },
+ {
+ labels: ['cp1256', 'windows-1256', 'x-cp1256'],
+ name: 'windows-1256',
+ },
+ {
+ labels: ['cp1257', 'windows-1257', 'x-cp1257'],
+ name: 'windows-1257',
+ },
+ {
+ labels: ['cp1258', 'windows-1258', 'x-cp1258'],
+ name: 'windows-1258',
+ },
+ {
+ labels: ['x-mac-cyrillic', 'x-mac-ukrainian'],
+ name: 'x-mac-cyrillic',
+ },
+ ],
+ heading: 'Legacy single-byte encodings',
+ },
+ {
+ encodings: [
+ {
+ labels: [
+ 'chinese',
+ 'csgb2312',
+ 'csiso58gb231280',
+ 'gb2312',
+ 'gb_2312',
+ 'gb_2312-80',
+ 'gbk',
+ 'iso-ir-58',
+ 'x-gbk',
+ ],
+ name: 'GBK',
+ },
+ {
+ labels: ['gb18030'],
+ name: 'gb18030',
+ },
+ ],
+ heading: 'Legacy multi-byte Chinese (simplified) encodings',
+ },
+ {
+ encodings: [
+ {
+ labels: ['big5', 'big5-hkscs', 'cn-big5', 'csbig5', 'x-x-big5'],
+ name: 'Big5',
+ },
+ ],
+ heading: 'Legacy multi-byte Chinese (traditional) encodings',
+ },
+ {
+ encodings: [
+ {
+ labels: ['cseucpkdfmtjapanese', 'euc-jp', 'x-euc-jp'],
+ name: 'EUC-JP',
+ },
+ {
+ labels: ['csiso2022jp', 'iso-2022-jp'],
+ name: 'ISO-2022-JP',
+ },
+ {
+ labels: [
+ 'csshiftjis',
+ 'ms932',
+ 'ms_kanji',
+ 'shift-jis',
+ 'shift_jis',
+ 'sjis',
+ 'windows-31j',
+ 'x-sjis',
+ ],
+ name: 'Shift_JIS',
+ },
+ ],
+ heading: 'Legacy multi-byte Japanese encodings',
+ },
+ {
+ encodings: [
+ {
+ labels: [
+ 'cseuckr',
+ 'csksc56011987',
+ 'euc-kr',
+ 'iso-ir-149',
+ 'korean',
+ 'ks_c_5601-1987',
+ 'ks_c_5601-1989',
+ 'ksc5601',
+ 'ksc_5601',
+ 'windows-949',
+ ],
+ name: 'EUC-KR',
+ },
+ ],
+ heading: 'Legacy multi-byte Korean encodings',
+ },
+ {
+ encodings: [
+ {
+ labels: [
+ 'csiso2022kr',
+ 'hz-gb-2312',
+ 'iso-2022-cn',
+ 'iso-2022-cn-ext',
+ 'iso-2022-kr',
+ 'replacement',
+ ],
+ name: 'replacement',
+ },
+ {
+ labels: ['unicodefffe', 'utf-16be'],
+ name: 'UTF-16BE',
+ },
+ {
+ labels: [
+ 'csunicode',
+ 'iso-10646-ucs-2',
+ 'ucs-2',
+ 'unicode',
+ 'unicodefeff',
+ 'utf-16',
+ 'utf-16le',
+ ],
+ name: 'UTF-16LE',
+ },
+ {
+ labels: ['x-user-defined'],
+ name: 'x-user-defined',
+ },
+ ],
+ heading: 'Legacy miscellaneous encodings',
+ },
+ ];
+
+ const singleByteEncodings = encodingsTable.filter(group => {
+ return group.heading === 'Legacy single-byte encodings';
+ })[0].encodings;
+
+ // https://encoding.spec.whatwg.org/indexes.json
+ const singleByteIndexes = {
+ 'IBM866': [
+ 1040, 1041, 1042, 1043, 1044, 1045, 1046, 1047,
+ 1048, 1049, 1050, 1051, 1052, 1053, 1054, 1055,
+ 1056, 1057, 1058, 1059, 1060, 1061, 1062, 1063,
+ 1064, 1065, 1066, 1067, 1068, 1069, 1070, 1071,
+ 1072, 1073, 1074, 1075, 1076, 1077, 1078, 1079,
+ 1080, 1081, 1082, 1083, 1084, 1085, 1086, 1087,
+ 9617, 9618, 9619, 9474, 9508, 9569, 9570, 9558,
+ 9557, 9571, 9553, 9559, 9565, 9564, 9563, 9488,
+ 9492, 9524, 9516, 9500, 9472, 9532, 9566, 9567,
+ 9562, 9556, 9577, 9574, 9568, 9552, 9580, 9575,
+ 9576, 9572, 9573, 9561, 9560, 9554, 9555, 9579,
+ 9578, 9496, 9484, 9608, 9604, 9612, 9616, 9600,
+ 1088, 1089, 1090, 1091, 1092, 1093, 1094, 1095,
+ 1096, 1097, 1098, 1099, 1100, 1101, 1102, 1103,
+ 1025, 1105, 1028, 1108, 1031, 1111, 1038, 1118,
+ 176, 8729, 183, 8730, 8470, 164, 9632, 160,
+ ],
+ 'ISO-8859-2': [
+ 128, 129, 130, 131, 132, 133, 134, 135,
+ 136, 137, 138, 139, 140, 141, 142, 143,
+ 144, 145, 146, 147, 148, 149, 150, 151,
+ 152, 153, 154, 155, 156, 157, 158, 159,
+ 160, 260, 728, 321, 164, 317, 346, 167,
+ 168, 352, 350, 356, 377, 173, 381, 379,
+ 176, 261, 731, 322, 180, 318, 347, 711,
+ 184, 353, 351, 357, 378, 733, 382, 380,
+ 340, 193, 194, 258, 196, 313, 262, 199,
+ 268, 201, 280, 203, 282, 205, 206, 270,
+ 272, 323, 327, 211, 212, 336, 214, 215,
+ 344, 366, 218, 368, 220, 221, 354, 223,
+ 341, 225, 226, 259, 228, 314, 263, 231,
+ 269, 233, 281, 235, 283, 237, 238, 271,
+ 273, 324, 328, 243, 244, 337, 246, 247,
+ 345, 367, 250, 369, 252, 253, 355, 729,
+ ],
+ 'ISO-8859-3': [
+ 128, 129, 130, 131, 132, 133, 134, 135,
+ 136, 137, 138, 139, 140, 141, 142, 143,
+ 144, 145, 146, 147, 148, 149, 150, 151,
+ 152, 153, 154, 155, 156, 157, 158, 159,
+ 160, 294, 728, 163, 164, null, 292, 167,
+ 168, 304, 350, 286, 308, 173, null, 379,
+ 176, 295, 178, 179, 180, 181, 293, 183,
+ 184, 305, 351, 287, 309, 189, null, 380,
+ 192, 193, 194, null, 196, 266, 264, 199,
+ 200, 201, 202, 203, 204, 205, 206, 207,
+ null, 209, 210, 211, 212, 288, 214, 215,
+ 284, 217, 218, 219, 220, 364, 348, 223,
+ 224, 225, 226, null, 228, 267, 265, 231,
+ 232, 233, 234, 235, 236, 237, 238, 239,
+ null, 241, 242, 243, 244, 289, 246, 247,
+ 285, 249, 250, 251, 252, 365, 349, 729,
+ ],
+ 'ISO-8859-4': [
+ 128, 129, 130, 131, 132, 133, 134, 135,
+ 136, 137, 138, 139, 140, 141, 142, 143,
+ 144, 145, 146, 147, 148, 149, 150, 151,
+ 152, 153, 154, 155, 156, 157, 158, 159,
+ 160, 260, 312, 342, 164, 296, 315, 167,
+ 168, 352, 274, 290, 358, 173, 381, 175,
+ 176, 261, 731, 343, 180, 297, 316, 711,
+ 184, 353, 275, 291, 359, 330, 382, 331,
+ 256, 193, 194, 195, 196, 197, 198, 302,
+ 268, 201, 280, 203, 278, 205, 206, 298,
+ 272, 325, 332, 310, 212, 213, 214, 215,
+ 216, 370, 218, 219, 220, 360, 362, 223,
+ 257, 225, 226, 227, 228, 229, 230, 303,
+ 269, 233, 281, 235, 279, 237, 238, 299,
+ 273, 326, 333, 311, 244, 245, 246, 247,
+ 248, 371, 250, 251, 252, 361, 363, 729,
+ ],
+ 'ISO-8859-5': [
+ 128, 129, 130, 131, 132, 133, 134, 135,
+ 136, 137, 138, 139, 140, 141, 142, 143,
+ 144, 145, 146, 147, 148, 149, 150, 151,
+ 152, 153, 154, 155, 156, 157, 158, 159,
+ 160, 1025, 1026, 1027, 1028, 1029, 1030, 1031,
+ 1032, 1033, 1034, 1035, 1036, 173, 1038, 1039,
+ 1040, 1041, 1042, 1043, 1044, 1045, 1046, 1047,
+ 1048, 1049, 1050, 1051, 1052, 1053, 1054, 1055,
+ 1056, 1057, 1058, 1059, 1060, 1061, 1062, 1063,
+ 1064, 1065, 1066, 1067, 1068, 1069, 1070, 1071,
+ 1072, 1073, 1074, 1075, 1076, 1077, 1078, 1079,
+ 1080, 1081, 1082, 1083, 1084, 1085, 1086, 1087,
+ 1088, 1089, 1090, 1091, 1092, 1093, 1094, 1095,
+ 1096, 1097, 1098, 1099, 1100, 1101, 1102, 1103,
+ 8470, 1105, 1106, 1107, 1108, 1109, 1110, 1111,
+ 1112, 1113, 1114, 1115, 1116, 167, 1118, 1119,
+ ],
+ 'ISO-8859-6': [
+ 128, 129, 130, 131, 132, 133, 134, 135,
+ 136, 137, 138, 139, 140, 141, 142, 143,
+ 144, 145, 146, 147, 148, 149, 150, 151,
+ 152, 153, 154, 155, 156, 157, 158, 159,
+ 160, null, null, null, 164, null, null, null,
+ null, null, null, null, 1548, 173, null, null,
+ null, null, null, null, null, null, null, null,
+ null, null, null, 1563, null, null, null, 1567,
+ null, 1569, 1570, 1571, 1572, 1573, 1574, 1575,
+ 1576, 1577, 1578, 1579, 1580, 1581, 1582, 1583,
+ 1584, 1585, 1586, 1587, 1588, 1589, 1590, 1591,
+ 1592, 1593, 1594, null, null, null, null, null,
+ 1600, 1601, 1602, 1603, 1604, 1605, 1606, 1607,
+ 1608, 1609, 1610, 1611, 1612, 1613, 1614, 1615,
+ 1616, 1617, 1618, null, null, null, null, null,
+ null, null, null, null, null, null, null, null,
+ ],
+ 'ISO-8859-7': [
+ 128, 129, 130, 131, 132, 133, 134, 135,
+ 136, 137, 138, 139, 140, 141, 142, 143,
+ 144, 145, 146, 147, 148, 149, 150, 151,
+ 152, 153, 154, 155, 156, 157, 158, 159,
+ 160, 8216, 8217, 163, 8364, 8367, 166, 167,
+ 168, 169, 890, 171, 172, 173, null, 8213,
+ 176, 177, 178, 179, 900, 901, 902, 183,
+ 904, 905, 906, 187, 908, 189, 910, 911,
+ 912, 913, 914, 915, 916, 917, 918, 919,
+ 920, 921, 922, 923, 924, 925, 926, 927,
+ 928, 929, null, 931, 932, 933, 934, 935,
+ 936, 937, 938, 939, 940, 941, 942, 943,
+ 944, 945, 946, 947, 948, 949, 950, 951,
+ 952, 953, 954, 955, 956, 957, 958, 959,
+ 960, 961, 962, 963, 964, 965, 966, 967,
+ 968, 969, 970, 971, 972, 973, 974, null,
+ ],
+ 'ISO-8859-8': [
+ 128, 129, 130, 131, 132, 133, 134, 135,
+ 136, 137, 138, 139, 140, 141, 142, 143,
+ 144, 145, 146, 147, 148, 149, 150, 151,
+ 152, 153, 154, 155, 156, 157, 158, 159,
+ 160, null, 162, 163, 164, 165, 166, 167,
+ 168, 169, 215, 171, 172, 173, 174, 175,
+ 176, 177, 178, 179, 180, 181, 182, 183,
+ 184, 185, 247, 187, 188, 189, 190, null,
+ null, null, null, null, null, null, null, null,
+ null, null, null, null, null, null, null, null,
+ null, null, null, null, null, null, null, null,
+ null, null, null, null, null, null, null, 8215,
+ 1488, 1489, 1490, 1491, 1492, 1493, 1494, 1495,
+ 1496, 1497, 1498, 1499, 1500, 1501, 1502, 1503,
+ 1504, 1505, 1506, 1507, 1508, 1509, 1510, 1511,
+ 1512, 1513, 1514, null, null, 8206, 8207, null,
+ ],
+ 'ISO-8859-10': [
+ 128, 129, 130, 131, 132, 133, 134, 135,
+ 136, 137, 138, 139, 140, 141, 142, 143,
+ 144, 145, 146, 147, 148, 149, 150, 151,
+ 152, 153, 154, 155, 156, 157, 158, 159,
+ 160, 260, 274, 290, 298, 296, 310, 167,
+ 315, 272, 352, 358, 381, 173, 362, 330,
+ 176, 261, 275, 291, 299, 297, 311, 183,
+ 316, 273, 353, 359, 382, 8213, 363, 331,
+ 256, 193, 194, 195, 196, 197, 198, 302,
+ 268, 201, 280, 203, 278, 205, 206, 207,
+ 208, 325, 332, 211, 212, 213, 214, 360,
+ 216, 370, 218, 219, 220, 221, 222, 223,
+ 257, 225, 226, 227, 228, 229, 230, 303,
+ 269, 233, 281, 235, 279, 237, 238, 239,
+ 240, 326, 333, 243, 244, 245, 246, 361,
+ 248, 371, 250, 251, 252, 253, 254, 312,
+ ],
+ 'ISO-8859-13': [
+ 128, 129, 130, 131, 132, 133, 134, 135,
+ 136, 137, 138, 139, 140, 141, 142, 143,
+ 144, 145, 146, 147, 148, 149, 150, 151,
+ 152, 153, 154, 155, 156, 157, 158, 159,
+ 160, 8221, 162, 163, 164, 8222, 166, 167,
+ 216, 169, 342, 171, 172, 173, 174, 198,
+ 176, 177, 178, 179, 8220, 181, 182, 183,
+ 248, 185, 343, 187, 188, 189, 190, 230,
+ 260, 302, 256, 262, 196, 197, 280, 274,
+ 268, 201, 377, 278, 290, 310, 298, 315,
+ 352, 323, 325, 211, 332, 213, 214, 215,
+ 370, 321, 346, 362, 220, 379, 381, 223,
+ 261, 303, 257, 263, 228, 229, 281, 275,
+ 269, 233, 378, 279, 291, 311, 299, 316,
+ 353, 324, 326, 243, 333, 245, 246, 247,
+ 371, 322, 347, 363, 252, 380, 382, 8217,
+ ],
+ 'ISO-8859-14': [
+ 128, 129, 130, 131, 132, 133, 134, 135,
+ 136, 137, 138, 139, 140, 141, 142, 143,
+ 144, 145, 146, 147, 148, 149, 150, 151,
+ 152, 153, 154, 155, 156, 157, 158, 159,
+ 160, 7682, 7683, 163, 266, 267, 7690, 167,
+ 7808, 169, 7810, 7691, 7922, 173, 174, 376,
+ 7710, 7711, 288, 289, 7744, 7745, 182, 7766,
+ 7809, 7767, 7811, 7776, 7923, 7812, 7813, 7777,
+ 192, 193, 194, 195, 196, 197, 198, 199,
+ 200, 201, 202, 203, 204, 205, 206, 207,
+ 372, 209, 210, 211, 212, 213, 214, 7786,
+ 216, 217, 218, 219, 220, 221, 374, 223,
+ 224, 225, 226, 227, 228, 229, 230, 231,
+ 232, 233, 234, 235, 236, 237, 238, 239,
+ 373, 241, 242, 243, 244, 245, 246, 7787,
+ 248, 249, 250, 251, 252, 253, 375, 255,
+ ],
+ 'ISO-8859-15': [
+ 128, 129, 130, 131, 132, 133, 134, 135,
+ 136, 137, 138, 139, 140, 141, 142, 143,
+ 144, 145, 146, 147, 148, 149, 150, 151,
+ 152, 153, 154, 155, 156, 157, 158, 159,
+ 160, 161, 162, 163, 8364, 165, 352, 167,
+ 353, 169, 170, 171, 172, 173, 174, 175,
+ 176, 177, 178, 179, 381, 181, 182, 183,
+ 382, 185, 186, 187, 338, 339, 376, 191,
+ 192, 193, 194, 195, 196, 197, 198, 199,
+ 200, 201, 202, 203, 204, 205, 206, 207,
+ 208, 209, 210, 211, 212, 213, 214, 215,
+ 216, 217, 218, 219, 220, 221, 222, 223,
+ 224, 225, 226, 227, 228, 229, 230, 231,
+ 232, 233, 234, 235, 236, 237, 238, 239,
+ 240, 241, 242, 243, 244, 245, 246, 247,
+ 248, 249, 250, 251, 252, 253, 254, 255,
+ ],
+ 'ISO-8859-16': [
+ 128, 129, 130, 131, 132, 133, 134, 135,
+ 136, 137, 138, 139, 140, 141, 142, 143,
+ 144, 145, 146, 147, 148, 149, 150, 151,
+ 152, 153, 154, 155, 156, 157, 158, 159,
+ 160, 260, 261, 321, 8364, 8222, 352, 167,
+ 353, 169, 536, 171, 377, 173, 378, 379,
+ 176, 177, 268, 322, 381, 8221, 182, 183,
+ 382, 269, 537, 187, 338, 339, 376, 380,
+ 192, 193, 194, 258, 196, 262, 198, 199,
+ 200, 201, 202, 203, 204, 205, 206, 207,
+ 272, 323, 210, 211, 212, 336, 214, 346,
+ 368, 217, 218, 219, 220, 280, 538, 223,
+ 224, 225, 226, 259, 228, 263, 230, 231,
+ 232, 233, 234, 235, 236, 237, 238, 239,
+ 273, 324, 242, 243, 244, 337, 246, 347,
+ 369, 249, 250, 251, 252, 281, 539, 255,
+ ],
+ 'KOI8-R': [
+ 9472, 9474, 9484, 9488, 9492, 9496, 9500, 9508,
+ 9516, 9524, 9532, 9600, 9604, 9608, 9612, 9616,
+ 9617, 9618, 9619, 8992, 9632, 8729, 8730, 8776,
+ 8804, 8805, 160, 8993, 176, 178, 183, 247,
+ 9552, 9553, 9554, 1105, 9555, 9556, 9557, 9558,
+ 9559, 9560, 9561, 9562, 9563, 9564, 9565, 9566,
+ 9567, 9568, 9569, 1025, 9570, 9571, 9572, 9573,
+ 9574, 9575, 9576, 9577, 9578, 9579, 9580, 169,
+ 1102, 1072, 1073, 1094, 1076, 1077, 1092, 1075,
+ 1093, 1080, 1081, 1082, 1083, 1084, 1085, 1086,
+ 1087, 1103, 1088, 1089, 1090, 1091, 1078, 1074,
+ 1100, 1099, 1079, 1096, 1101, 1097, 1095, 1098,
+ 1070, 1040, 1041, 1062, 1044, 1045, 1060, 1043,
+ 1061, 1048, 1049, 1050, 1051, 1052, 1053, 1054,
+ 1055, 1071, 1056, 1057, 1058, 1059, 1046, 1042,
+ 1068, 1067, 1047, 1064, 1069, 1065, 1063, 1066,
+ ],
+ 'KOI8-U': [
+ 9472, 9474, 9484, 9488, 9492, 9496, 9500, 9508,
+ 9516, 9524, 9532, 9600, 9604, 9608, 9612, 9616,
+ 9617, 9618, 9619, 8992, 9632, 8729, 8730, 8776,
+ 8804, 8805, 160, 8993, 176, 178, 183, 247,
+ 9552, 9553, 9554, 1105, 1108, 9556, 1110, 1111,
+ 9559, 9560, 9561, 9562, 9563, 1169, 1118, 9566,
+ 9567, 9568, 9569, 1025, 1028, 9571, 1030, 1031,
+ 9574, 9575, 9576, 9577, 9578, 1168, 1038, 169,
+ 1102, 1072, 1073, 1094, 1076, 1077, 1092, 1075,
+ 1093, 1080, 1081, 1082, 1083, 1084, 1085, 1086,
+ 1087, 1103, 1088, 1089, 1090, 1091, 1078, 1074,
+ 1100, 1099, 1079, 1096, 1101, 1097, 1095, 1098,
+ 1070, 1040, 1041, 1062, 1044, 1045, 1060, 1043,
+ 1061, 1048, 1049, 1050, 1051, 1052, 1053, 1054,
+ 1055, 1071, 1056, 1057, 1058, 1059, 1046, 1042,
+ 1068, 1067, 1047, 1064, 1069, 1065, 1063, 1066,
+ ],
+ 'macintosh': [
+ 196, 197, 199, 201, 209, 214, 220, 225,
+ 224, 226, 228, 227, 229, 231, 233, 232,
+ 234, 235, 237, 236, 238, 239, 241, 243,
+ 242, 244, 246, 245, 250, 249, 251, 252,
+ 8224, 176, 162, 163, 167, 8226, 182, 223,
+ 174, 169, 8482, 180, 168, 8800, 198, 216,
+ 8734, 177, 8804, 8805, 165, 181, 8706, 8721,
+ 8719, 960, 8747, 170, 186, 937, 230, 248,
+ 191, 161, 172, 8730, 402, 8776, 8710, 171,
+ 187, 8230, 160, 192, 195, 213, 338, 339,
+ 8211, 8212, 8220, 8221, 8216, 8217, 247, 9674,
+ 255, 376, 8260, 8364, 8249, 8250, 64257, 64258,
+ 8225, 183, 8218, 8222, 8240, 194, 202, 193,
+ 203, 200, 205, 206, 207, 204, 211, 212,
+ 63743, 210, 218, 219, 217, 305, 710, 732,
+ 175, 728, 729, 730, 184, 733, 731, 711,
+ ],
+ 'windows-874': [
+ 8364, 129, 130, 131, 132, 8230, 134, 135,
+ 136, 137, 138, 139, 140, 141, 142, 143,
+ 144, 8216, 8217, 8220, 8221, 8226, 8211, 8212,
+ 152, 153, 154, 155, 156, 157, 158, 159,
+ 160, 3585, 3586, 3587, 3588, 3589, 3590, 3591,
+ 3592, 3593, 3594, 3595, 3596, 3597, 3598, 3599,
+ 3600, 3601, 3602, 3603, 3604, 3605, 3606, 3607,
+ 3608, 3609, 3610, 3611, 3612, 3613, 3614, 3615,
+ 3616, 3617, 3618, 3619, 3620, 3621, 3622, 3623,
+ 3624, 3625, 3626, 3627, 3628, 3629, 3630, 3631,
+ 3632, 3633, 3634, 3635, 3636, 3637, 3638, 3639,
+ 3640, 3641, 3642, null, null, null, null, 3647,
+ 3648, 3649, 3650, 3651, 3652, 3653, 3654, 3655,
+ 3656, 3657, 3658, 3659, 3660, 3661, 3662, 3663,
+ 3664, 3665, 3666, 3667, 3668, 3669, 3670, 3671,
+ 3672, 3673, 3674, 3675, null, null, null, null,
+ ],
+ 'windows-1250': [
+ 8364, 129, 8218, 131, 8222, 8230, 8224, 8225,
+ 136, 8240, 352, 8249, 346, 356, 381, 377,
+ 144, 8216, 8217, 8220, 8221, 8226, 8211, 8212,
+ 152, 8482, 353, 8250, 347, 357, 382, 378,
+ 160, 711, 728, 321, 164, 260, 166, 167,
+ 168, 169, 350, 171, 172, 173, 174, 379,
+ 176, 177, 731, 322, 180, 181, 182, 183,
+ 184, 261, 351, 187, 317, 733, 318, 380,
+ 340, 193, 194, 258, 196, 313, 262, 199,
+ 268, 201, 280, 203, 282, 205, 206, 270,
+ 272, 323, 327, 211, 212, 336, 214, 215,
+ 344, 366, 218, 368, 220, 221, 354, 223,
+ 341, 225, 226, 259, 228, 314, 263, 231,
+ 269, 233, 281, 235, 283, 237, 238, 271,
+ 273, 324, 328, 243, 244, 337, 246, 247,
+ 345, 367, 250, 369, 252, 253, 355, 729,
+ ],
+ 'windows-1251': [
+ 1026, 1027, 8218, 1107, 8222, 8230, 8224, 8225,
+ 8364, 8240, 1033, 8249, 1034, 1036, 1035, 1039,
+ 1106, 8216, 8217, 8220, 8221, 8226, 8211, 8212,
+ 152, 8482, 1113, 8250, 1114, 1116, 1115, 1119,
+ 160, 1038, 1118, 1032, 164, 1168, 166, 167,
+ 1025, 169, 1028, 171, 172, 173, 174, 1031,
+ 176, 177, 1030, 1110, 1169, 181, 182, 183,
+ 1105, 8470, 1108, 187, 1112, 1029, 1109, 1111,
+ 1040, 1041, 1042, 1043, 1044, 1045, 1046, 1047,
+ 1048, 1049, 1050, 1051, 1052, 1053, 1054, 1055,
+ 1056, 1057, 1058, 1059, 1060, 1061, 1062, 1063,
+ 1064, 1065, 1066, 1067, 1068, 1069, 1070, 1071,
+ 1072, 1073, 1074, 1075, 1076, 1077, 1078, 1079,
+ 1080, 1081, 1082, 1083, 1084, 1085, 1086, 1087,
+ 1088, 1089, 1090, 1091, 1092, 1093, 1094, 1095,
+ 1096, 1097, 1098, 1099, 1100, 1101, 1102, 1103,
+ ],
+ 'windows-1252': [
+ 8364, 129, 8218, 402, 8222, 8230, 8224, 8225,
+ 710, 8240, 352, 8249, 338, 141, 381, 143,
+ 144, 8216, 8217, 8220, 8221, 8226, 8211, 8212,
+ 732, 8482, 353, 8250, 339, 157, 382, 376,
+ 160, 161, 162, 163, 164, 165, 166, 167,
+ 168, 169, 170, 171, 172, 173, 174, 175,
+ 176, 177, 178, 179, 180, 181, 182, 183,
+ 184, 185, 186, 187, 188, 189, 190, 191,
+ 192, 193, 194, 195, 196, 197, 198, 199,
+ 200, 201, 202, 203, 204, 205, 206, 207,
+ 208, 209, 210, 211, 212, 213, 214, 215,
+ 216, 217, 218, 219, 220, 221, 222, 223,
+ 224, 225, 226, 227, 228, 229, 230, 231,
+ 232, 233, 234, 235, 236, 237, 238, 239,
+ 240, 241, 242, 243, 244, 245, 246, 247,
+ 248, 249, 250, 251, 252, 253, 254, 255,
+ ],
+ 'windows-1253': [
+ 8364, 129, 8218, 402, 8222, 8230, 8224, 8225,
+ 136, 8240, 138, 8249, 140, 141, 142, 143,
+ 144, 8216, 8217, 8220, 8221, 8226, 8211, 8212,
+ 152, 8482, 154, 8250, 156, 157, 158, 159,
+ 160, 901, 902, 163, 164, 165, 166, 167,
+ 168, 169, null, 171, 172, 173, 174, 8213,
+ 176, 177, 178, 179, 900, 181, 182, 183,
+ 904, 905, 906, 187, 908, 189, 910, 911,
+ 912, 913, 914, 915, 916, 917, 918, 919,
+ 920, 921, 922, 923, 924, 925, 926, 927,
+ 928, 929, null, 931, 932, 933, 934, 935,
+ 936, 937, 938, 939, 940, 941, 942, 943,
+ 944, 945, 946, 947, 948, 949, 950, 951,
+ 952, 953, 954, 955, 956, 957, 958, 959,
+ 960, 961, 962, 963, 964, 965, 966, 967,
+ 968, 969, 970, 971, 972, 973, 974, null,
+ ],
+ 'windows-1254': [
+ 8364, 129, 8218, 402, 8222, 8230, 8224, 8225,
+ 710, 8240, 352, 8249, 338, 141, 142, 143,
+ 144, 8216, 8217, 8220, 8221, 8226, 8211, 8212,
+ 732, 8482, 353, 8250, 339, 157, 158, 376,
+ 160, 161, 162, 163, 164, 165, 166, 167,
+ 168, 169, 170, 171, 172, 173, 174, 175,
+ 176, 177, 178, 179, 180, 181, 182, 183,
+ 184, 185, 186, 187, 188, 189, 190, 191,
+ 192, 193, 194, 195, 196, 197, 198, 199,
+ 200, 201, 202, 203, 204, 205, 206, 207,
+ 286, 209, 210, 211, 212, 213, 214, 215,
+ 216, 217, 218, 219, 220, 304, 350, 223,
+ 224, 225, 226, 227, 228, 229, 230, 231,
+ 232, 233, 234, 235, 236, 237, 238, 239,
+ 287, 241, 242, 243, 244, 245, 246, 247,
+ 248, 249, 250, 251, 252, 305, 351, 255,
+ ],
+ 'windows-1255': [
+ 8364, 129, 8218, 402, 8222, 8230, 8224, 8225,
+ 710, 8240, 138, 8249, 140, 141, 142, 143,
+ 144, 8216, 8217, 8220, 8221, 8226, 8211, 8212,
+ 732, 8482, 154, 8250, 156, 157, 158, 159,
+ 160, 161, 162, 163, 8362, 165, 166, 167,
+ 168, 169, 215, 171, 172, 173, 174, 175,
+ 176, 177, 178, 179, 180, 181, 182, 183,
+ 184, 185, 247, 187, 188, 189, 190, 191,
+ 1456, 1457, 1458, 1459, 1460, 1461, 1462, 1463,
+ 1464, 1465, 1466, 1467, 1468, 1469, 1470, 1471,
+ 1472, 1473, 1474, 1475, 1520, 1521, 1522, 1523,
+ 1524, null, null, null, null, null, null, null,
+ 1488, 1489, 1490, 1491, 1492, 1493, 1494, 1495,
+ 1496, 1497, 1498, 1499, 1500, 1501, 1502, 1503,
+ 1504, 1505, 1506, 1507, 1508, 1509, 1510, 1511,
+ 1512, 1513, 1514, null, null, 8206, 8207, null,
+ ],
+ 'windows-1256': [
+ 8364, 1662, 8218, 402, 8222, 8230, 8224, 8225,
+ 710, 8240, 1657, 8249, 338, 1670, 1688, 1672,
+ 1711, 8216, 8217, 8220, 8221, 8226, 8211, 8212,
+ 1705, 8482, 1681, 8250, 339, 8204, 8205, 1722,
+ 160, 1548, 162, 163, 164, 165, 166, 167,
+ 168, 169, 1726, 171, 172, 173, 174, 175,
+ 176, 177, 178, 179, 180, 181, 182, 183,
+ 184, 185, 1563, 187, 188, 189, 190, 1567,
+ 1729, 1569, 1570, 1571, 1572, 1573, 1574, 1575,
+ 1576, 1577, 1578, 1579, 1580, 1581, 1582, 1583,
+ 1584, 1585, 1586, 1587, 1588, 1589, 1590, 215,
+ 1591, 1592, 1593, 1594, 1600, 1601, 1602, 1603,
+ 224, 1604, 226, 1605, 1606, 1607, 1608, 231,
+ 232, 233, 234, 235, 1609, 1610, 238, 239,
+ 1611, 1612, 1613, 1614, 244, 1615, 1616, 247,
+ 1617, 249, 1618, 251, 252, 8206, 8207, 1746,
+ ],
+ 'windows-1257': [
+ 8364, 129, 8218, 131, 8222, 8230, 8224, 8225,
+ 136, 8240, 138, 8249, 140, 168, 711, 184, 144,
+ 8216, 8217, 8220, 8221, 8226, 8211, 8212, 152,
+ 8482, 154, 8250, 156, 175, 731, 159, 160,
+ null, 162, 163, 164, null, 166, 167, 216,
+ 169, 342, 171, 172, 173, 174, 198, 176,
+ 177, 178, 179, 180, 181, 182, 183, 248,
+ 185, 343, 187, 188, 189, 190, 230, 260,
+ 302, 256, 262, 196, 197, 280, 274, 268,
+ 201, 377, 278, 290, 310, 298, 315, 352,
+ 323, 325, 211, 332, 213, 214, 215, 370,
+ 321, 346, 362, 220, 379, 381, 223, 261,
+ 303, 257, 263, 228, 229, 281, 275, 269,
+ 233, 378, 279, 291, 311, 299, 316, 353,
+ 324, 326, 243, 333, 245, 246, 247, 371,
+ 322, 347, 363, 252, 380, 382, 729,
+ ],
+ 'windows-1258': [
+ 8364, 129, 8218, 402, 8222, 8230, 8224, 8225,
+ 710, 8240, 138, 8249, 338, 141, 142, 143,
+ 144, 8216, 8217, 8220, 8221, 8226, 8211, 8212,
+ 732, 8482, 154, 8250, 339, 157, 158, 376,
+ 160, 161, 162, 163, 164, 165, 166, 167,
+ 168, 169, 170, 171, 172, 173, 174, 175,
+ 176, 177, 178, 179, 180, 181, 182, 183,
+ 184, 185, 186, 187, 188, 189, 190, 191,
+ 192, 193, 194, 258, 196, 197, 198, 199,
+ 200, 201, 202, 203, 768, 205, 206, 207,
+ 272, 209, 777, 211, 212, 416, 214, 215,
+ 216, 217, 218, 219, 220, 431, 771, 223,
+ 224, 225, 226, 259, 228, 229, 230, 231,
+ 232, 233, 234, 235, 769, 237, 238, 239,
+ 273, 241, 803, 243, 244, 417, 246, 247,
+ 248, 249, 250, 251, 252, 432, 8363, 255,
+ ],
+ 'x-mac-cyrillic': [
+ 1040, 1041, 1042, 1043, 1044, 1045, 1046, 1047,
+ 1048, 1049, 1050, 1051, 1052, 1053, 1054, 1055,
+ 1056, 1057, 1058, 1059, 1060, 1061, 1062, 1063,
+ 1064, 1065, 1066, 1067, 1068, 1069, 1070, 1071,
+ 8224, 176, 1168, 163, 167, 8226, 182, 1030,
+ 174, 169, 8482, 1026, 1106, 8800, 1027, 1107,
+ 8734, 177, 8804, 8805, 1110, 181, 1169, 1032,
+ 1028, 1108, 1031, 1111, 1033, 1113, 1034, 1114,
+ 1112, 1029, 172, 8730, 402, 8776, 8710, 171,
+ 187, 8230, 160, 1035, 1115, 1036, 1116, 1109,
+ 8211, 8212, 8220, 8221, 8216, 8217, 247, 8222,
+ 1038, 1118, 1039, 1119, 8470, 1025, 1105, 1103,
+ 1072, 1073, 1074, 1075, 1076, 1077, 1078, 1079,
+ 1080, 1081, 1082, 1083, 1084, 1085, 1086, 1087,
+ 1088, 1089, 1090, 1091, 1092, 1093, 1094, 1095,
+ 1096, 1097, 1098, 1099, 1100, 1101, 1102, 8364,
+ ],
+ };
+
+ function assertDecode(data, encoding) {
+ for (let i = 0, l = data.length; i < l; i++) {
+ const cp = data.charCodeAt(i);
+ let expectedCp = i < 0x80 ? i : singleByteIndexes[encoding][i - 0x80];
+ if (expectedCp === null)
+ expectedCp = 0xfffd;
+
+ expect(cp).toBe(expectedCp);
+ }
+ }
+ const buffer = new ArrayBuffer(255);
+ const view = new Uint8Array(buffer);
+
+ for (let i = 0, l = view.byteLength; i < l; i++)
+ view[i] = i;
+
+
+ for (let i = 0, l = singleByteEncodings.length; i < l; i++) {
+ const encoding = singleByteEncodings[i];
+ for (let i2 = 0, l2 = encoding.labels.length; i2 < l2; i2++) {
+ const label = encoding.labels[i2];
+ const decoder = new TextDecoder(label);
+
+ const data = decoder.decode(view);
+
+ expect(decoder.encoding).toBe(encoding.name.toLowerCase());
+ assertDecode(data, encoding.name);
+ }
+ }
+ });
+});
diff --git a/js.gresource.xml b/js.gresource.xml
index fc55e597..a0b37730 100644
--- a/js.gresource.xml
+++ b/js.gresource.xml
@@ -42,8 +42,10 @@
<file>modules/core/_cairo.js</file>
<file>modules/core/_common.js</file>
+ <file>modules/core/_encodings.js</file>
<file>modules/core/_format.js</file>
<file>modules/core/_gettext.js</file>
<file>modules/core/_signals.js</file>
+ <file>modules/core/_text.js</file>
</gresource>
</gresources>
diff --git a/modules/core/_encodings.js b/modules/core/_encodings.js
new file mode 100644
index 00000000..dbeeb6d6
--- /dev/null
+++ b/modules/core/_encodings.js
@@ -0,0 +1,280 @@
+// SPDX-License-Identifier: MIT OR LGPL-2.0-or-later
+// SPDX-FileCopyrightText: Node.js contributors. All rights reserved.
+
+// Modified from https://github.com/nodejs/node/blob/78680c1cbc8b0c435963bc512e826b2a6227c315/lib/internal/encoding.js
+// Data originally from https://encoding.spec.whatwg.org/encodings.json
+
+/* exported getEncodingFromLabel */
+
+const encodings = new Map([
+ ['unicode-1-1-utf-8', 'utf-8'],
+ ['utf8', 'utf-8'],
+ ['utf-8', 'utf-8'],
+ ['866', 'ibm866'],
+ ['cp866', 'ibm866'],
+ ['csibm866', 'ibm866'],
+ ['ibm866', 'ibm866'],
+ ['csisolatin2', 'iso-8859-2'],
+ ['iso-8859-2', 'iso-8859-2'],
+ ['iso-ir-101', 'iso-8859-2'],
+ ['iso8859-2', 'iso-8859-2'],
+ ['iso88592', 'iso-8859-2'],
+ ['iso_8859-2', 'iso-8859-2'],
+ ['iso_8859-2:1987', 'iso-8859-2'],
+ ['l2', 'iso-8859-2'],
+ ['latin2', 'iso-8859-2'],
+ ['csisolatin3', 'iso-8859-3'],
+ ['iso-8859-3', 'iso-8859-3'],
+ ['iso-ir-109', 'iso-8859-3'],
+ ['iso8859-3', 'iso-8859-3'],
+ ['iso88593', 'iso-8859-3'],
+ ['iso_8859-3', 'iso-8859-3'],
+ ['iso_8859-3:1988', 'iso-8859-3'],
+ ['l3', 'iso-8859-3'],
+ ['latin3', 'iso-8859-3'],
+ ['csisolatin4', 'iso-8859-4'],
+ ['iso-8859-4', 'iso-8859-4'],
+ ['iso-ir-110', 'iso-8859-4'],
+ ['iso8859-4', 'iso-8859-4'],
+ ['iso88594', 'iso-8859-4'],
+ ['iso_8859-4', 'iso-8859-4'],
+ ['iso_8859-4:1988', 'iso-8859-4'],
+ ['l4', 'iso-8859-4'],
+ ['latin4', 'iso-8859-4'],
+ ['csisolatincyrillic', 'iso-8859-5'],
+ ['cyrillic', 'iso-8859-5'],
+ ['iso-8859-5', 'iso-8859-5'],
+ ['iso-ir-144', 'iso-8859-5'],
+ ['iso8859-5', 'iso-8859-5'],
+ ['iso88595', 'iso-8859-5'],
+ ['iso_8859-5', 'iso-8859-5'],
+ ['iso_8859-5:1988', 'iso-8859-5'],
+ ['arabic', 'iso-8859-6'],
+ ['asmo-708', 'iso-8859-6'],
+ ['csiso88596e', 'iso-8859-6'],
+ ['csiso88596i', 'iso-8859-6'],
+ ['csisolatinarabic', 'iso-8859-6'],
+ ['ecma-114', 'iso-8859-6'],
+ ['iso-8859-6', 'iso-8859-6'],
+ ['iso-8859-6-e', 'iso-8859-6'],
+ ['iso-8859-6-i', 'iso-8859-6'],
+ ['iso-ir-127', 'iso-8859-6'],
+ ['iso8859-6', 'iso-8859-6'],
+ ['iso88596', 'iso-8859-6'],
+ ['iso_8859-6', 'iso-8859-6'],
+ ['iso_8859-6:1987', 'iso-8859-6'],
+ ['csisolatingreek', 'iso-8859-7'],
+ ['ecma-118', 'iso-8859-7'],
+ ['elot_928', 'iso-8859-7'],
+ ['greek', 'iso-8859-7'],
+ ['greek8', 'iso-8859-7'],
+ ['iso-8859-7', 'iso-8859-7'],
+ ['iso-ir-126', 'iso-8859-7'],
+ ['iso8859-7', 'iso-8859-7'],
+ ['iso88597', 'iso-8859-7'],
+ ['iso_8859-7', 'iso-8859-7'],
+ ['iso_8859-7:1987', 'iso-8859-7'],
+ ['sun_eu_greek', 'iso-8859-7'],
+ ['csiso88598e', 'iso-8859-8'],
+ ['csisolatinhebrew', 'iso-8859-8'],
+ ['hebrew', 'iso-8859-8'],
+ ['iso-8859-8', 'iso-8859-8'],
+ ['iso-8859-8-e', 'iso-8859-8'],
+ ['iso-ir-138', 'iso-8859-8'],
+ ['iso8859-8', 'iso-8859-8'],
+ ['iso88598', 'iso-8859-8'],
+ ['iso_8859-8', 'iso-8859-8'],
+ ['iso_8859-8:1988', 'iso-8859-8'],
+ ['visual', 'iso-8859-8'],
+ ['csiso88598i', 'iso-8859-8-i'],
+ ['iso-8859-8-i', 'iso-8859-8-i'],
+ ['logical', 'iso-8859-8-i'],
+ ['csisolatin6', 'iso-8859-10'],
+ ['iso-8859-10', 'iso-8859-10'],
+ ['iso-ir-157', 'iso-8859-10'],
+ ['iso8859-10', 'iso-8859-10'],
+ ['iso885910', 'iso-8859-10'],
+ ['l6', 'iso-8859-10'],
+ ['latin6', 'iso-8859-10'],
+ ['iso-8859-13', 'iso-8859-13'],
+ ['iso8859-13', 'iso-8859-13'],
+ ['iso885913', 'iso-8859-13'],
+ ['iso-8859-14', 'iso-8859-14'],
+ ['iso8859-14', 'iso-8859-14'],
+ ['iso885914', 'iso-8859-14'],
+ ['csisolatin9', 'iso-8859-15'],
+ ['iso-8859-15', 'iso-8859-15'],
+ ['iso8859-15', 'iso-8859-15'],
+ ['iso885915', 'iso-8859-15'],
+ ['iso_8859-15', 'iso-8859-15'],
+ ['iso-8859-16', 'iso-8859-16'],
+ ['ISO-8859-16', 'iso-8859-16'],
+ ['l9', 'iso-8859-15'],
+ ['cskoi8r', 'koi8-r'],
+ ['koi', 'koi8-r'],
+ ['koi8', 'koi8-r'],
+ ['koi8-r', 'koi8-r'],
+ ['koi8_r', 'koi8-r'],
+ ['koi8-ru', 'koi8-u'],
+ ['koi8-u', 'koi8-u'],
+ ['csmacintosh', 'macintosh'],
+ ['mac', 'macintosh'],
+ ['macintosh', 'macintosh'],
+ ['x-mac-roman', 'macintosh'],
+ ['dos-874', 'windows-874'],
+ ['iso-8859-11', 'windows-874'],
+ ['iso8859-11', 'windows-874'],
+ ['iso885911', 'windows-874'],
+ ['tis-620', 'windows-874'],
+ ['windows-874', 'windows-874'],
+ ['cp1250', 'windows-1250'],
+ ['windows-1250', 'windows-1250'],
+ ['x-cp1250', 'windows-1250'],
+ ['cp1251', 'windows-1251'],
+ ['windows-1251', 'windows-1251'],
+ ['x-cp1251', 'windows-1251'],
+ ['ansi_x3.4-1968', 'windows-1252'],
+ ['ascii', 'windows-1252'],
+ ['cp1252', 'windows-1252'],
+ ['cp819', 'windows-1252'],
+ ['csisolatin1', 'windows-1252'],
+ ['ibm819', 'windows-1252'],
+ ['iso-8859-1', 'windows-1252'],
+ ['iso-ir-100', 'windows-1252'],
+ ['iso8859-1', 'windows-1252'],
+ ['iso88591', 'windows-1252'],
+ ['iso_8859-1', 'windows-1252'],
+ ['iso_8859-1:1987', 'windows-1252'],
+ ['l1', 'windows-1252'],
+ ['latin1', 'windows-1252'],
+ ['us-ascii', 'windows-1252'],
+ ['windows-1252', 'windows-1252'],
+ ['x-cp1252', 'windows-1252'],
+ ['cp1253', 'windows-1253'],
+ ['windows-1253', 'windows-1253'],
+ ['x-cp1253', 'windows-1253'],
+ ['cp1254', 'windows-1254'],
+ ['csisolatin5', 'windows-1254'],
+ ['iso-8859-9', 'windows-1254'],
+ ['iso-ir-148', 'windows-1254'],
+ ['iso8859-9', 'windows-1254'],
+ ['iso88599', 'windows-1254'],
+ ['iso_8859-9', 'windows-1254'],
+ ['iso_8859-9:1989', 'windows-1254'],
+ ['l5', 'windows-1254'],
+ ['latin5', 'windows-1254'],
+ ['windows-1254', 'windows-1254'],
+ ['x-cp1254', 'windows-1254'],
+ ['cp1255', 'windows-1255'],
+ ['windows-1255', 'windows-1255'],
+ ['x-cp1255', 'windows-1255'],
+ ['cp1256', 'windows-1256'],
+ ['windows-1256', 'windows-1256'],
+ ['x-cp1256', 'windows-1256'],
+ ['cp1257', 'windows-1257'],
+ ['windows-1257', 'windows-1257'],
+ ['x-cp1257', 'windows-1257'],
+ ['cp1258', 'windows-1258'],
+ ['windows-1258', 'windows-1258'],
+ ['x-cp1258', 'windows-1258'],
+ ['x-mac-cyrillic', 'x-mac-cyrillic'],
+ ['x-mac-ukrainian', 'x-mac-cyrillic'],
+ ['chinese', 'gbk'],
+ ['csgb2312', 'gbk'],
+ ['csiso58gb231280', 'gbk'],
+ ['gb2312', 'gbk'],
+ ['gb_2312', 'gbk'],
+ ['gb_2312-80', 'gbk'],
+ ['gbk', 'gbk'],
+ ['iso-ir-58', 'gbk'],
+ ['x-gbk', 'gbk'],
+ ['gb18030', 'gb18030'],
+ ['big5', 'big5'],
+ ['big5-hkscs', 'big5'],
+ ['cn-big5', 'big5'],
+ ['csbig5', 'big5'],
+ ['x-x-big5', 'big5'],
+ ['cseucpkdfmtjapanese', 'euc-jp'],
+ ['euc-jp', 'euc-jp'],
+ ['x-euc-jp', 'euc-jp'],
+ ['csiso2022jp', 'iso-2022-jp'],
+ ['iso-2022-jp', 'iso-2022-jp'],
+ ['csshiftjis', 'shift_jis'],
+ ['ms932', 'shift_jis'],
+ ['ms_kanji', 'shift_jis'],
+ ['shift-jis', 'shift_jis'],
+ ['shift_jis', 'shift_jis'],
+ ['sjis', 'shift_jis'],
+ ['windows-31j', 'shift_jis'],
+ ['x-sjis', 'shift_jis'],
+ ['cseuckr', 'euc-kr'],
+ ['csksc56011987', 'euc-kr'],
+ ['euc-kr', 'euc-kr'],
+ ['iso-ir-149', 'euc-kr'],
+ ['korean', 'euc-kr'],
+ ['ks_c_5601-1987', 'euc-kr'],
+ ['ks_c_5601-1989', 'euc-kr'],
+ ['ksc5601', 'euc-kr'],
+ ['ksc_5601', 'euc-kr'],
+ ['windows-949', 'euc-kr'],
+ ['utf-16be', 'utf-16be'],
+ ['utf-16le', 'utf-16le'],
+ ['utf-16', 'utf-16le'],
+]);
+
+// Some of the web-specified encodings use
+// aliases which aren't supported in iconv
+const internalEncodings = new Map([
+ ['x-mac-cyrillic', 'MacCyrillic'],
+ // For our purposes we can encoding 8-i as 8
+ ['iso-8859-8-i', 'iso-8859-8'],
+]);
+
+/**
+ * Trims ASCII whitespace from a string.
+ * `String.prototype.trim` removes non-ASCII whitespace.
+ *
+ * @param {string} label the label to trim
+ * @returns {string}
+ */
+const trimAsciiWhitespace = label => {
+ let s = 0;
+ let e = label.length;
+ while (s < e && (
+ label[s] === '\u0009' ||
+ label[s] === '\u000a' ||
+ label[s] === '\u000c' ||
+ label[s] === '\u000d' ||
+ label[s] === '\u0020'))
+ s++;
+
+ while (e > s && (
+ label[e - 1] === '\u0009' ||
+ label[e - 1] === '\u000a' ||
+ label[e - 1] === '\u000c' ||
+ label[e - 1] === '\u000d' ||
+ label[e - 1] === '\u0020'))
+ e--;
+
+ return label.slice(s, e);
+};
+
+/**
+ * @param {string} label the encoding label
+ * @returns {string | undefined}
+ */
+function getEncodingFromLabel(label) {
+ const enc = encodings.get(label);
+
+ if (enc !== undefined) {
+ return {
+ internal: internalEncodings.get(enc),
+ external: enc,
+ };
+ }
+
+
+ const trimmed = encodings.get(trimAsciiWhitespace(label.toLowerCase()));
+
+ return {internal: internalEncodings.get(trimmed), external: trimmed};
+}
diff --git a/modules/core/_text.js b/modules/core/_text.js
new file mode 100644
index 00000000..9bdc7ef2
--- /dev/null
+++ b/modules/core/_text.js
@@ -0,0 +1,127 @@
+// SPDX-License-Identifier: MIT OR LGPL-2.0-or-later
+// SPDX-FileCopyrightText: Evan Welsh
+
+const Encoding = imports._encodingNative;
+
+const { getEncodingFromLabel } = imports._encodings;
+
+var TextDecoder = class TextDecoder {
+ /**
+ * @type {string}
+ */
+ encoding;
+
+ /**
+ * @type {boolean}
+ */
+ ignoreBOM;
+
+ /**
+ * @type {boolean}
+ */
+ fatal;
+
+ get [Symbol.toStringTag]() {
+ return 'TextDecoder';
+ }
+
+ /**
+ * @param {string} encoding
+ * @param {object} [options]
+ * @param {boolean=} options.fatal
+ * @param {boolean=} options.ignoreBOM
+ */
+ constructor(encoding = 'utf-8', options = {}) {
+ const { fatal = false, ignoreBOM = false } = options;
+
+ const encodings = getEncodingFromLabel(encoding);
+ const enc = encodings.internal ?? encodings.external;
+
+ if (enc === undefined) {
+ throw new Error(`Invalid encoding label: '${encoding}'`);
+ }
+
+ Object.defineProperty(this, '_internalEncoding', {
+ value: enc,
+ enumerable: true,
+ writable: false,
+ configurable: false,
+ });
+
+ Object.defineProperty(this, 'encoding', {
+ value: encodings.external,
+ enumerable: true,
+ writable: false,
+ configurable: false,
+ });
+
+ Object.defineProperty(this, 'ignoreBOM', {
+ value: ignoreBOM,
+ enumerable: true,
+ writable: false,
+ configurable: false,
+ });
+
+ Object.defineProperty(this, 'fatal', {
+ value: fatal,
+ enumerable: true,
+ writable: false,
+ configurable: false,
+ });
+ }
+
+ decode(bytes, options = {}) {
+ const { stream = false } = options;
+
+ if (stream) {
+ throw new Error(`TextDecoder does not implement the 'stream' option.`);
+ }
+
+ /** @type {Uint8Array} */
+ let input;
+
+ if (bytes instanceof ArrayBuffer) {
+ input = new Uint8Array(bytes);
+ } else if (bytes instanceof Uint8Array) {
+ input = bytes;
+ } else if (bytes instanceof Object.getPrototypeOf(Uint8Array)) {
+ let { buffer, byteLength, byteOffset } = /** @type {Uint32Array} */ (bytes);
+ input = new Uint8Array(buffer, byteOffset, byteLength);
+ } else if (bytes === undefined) {
+ input = new Uint8Array(0);
+ } else {
+ throw new Error(`Provided input cannot be converted to ArrayBufferView or ArrayBuffer`);
+ }
+
+ if (this.ignoreBOM && input.length > 2 && input[0] === 0xEF && input[1] === 0xBB && input[2] === 0xBF) {
+ if (this.encoding !== 'utf-8') {
+ throw new Error(`Cannot ignore BOM for non-UTF8 encoding.`);
+ }
+
+ let { buffer, byteLength, byteOffset } = input;
+ input = new Uint8Array(buffer, byteOffset + 3, byteLength - 3);
+ }
+
+ return Encoding.decode(input, this.fatal, this._internalEncoding);
+ }
+}
+
+var TextEncoder = class TextEncoder {
+ get [Symbol.toStringTag]() {
+ return 'TextEncoder';
+ }
+
+ get encoding() {
+ return 'utf-8';
+ }
+
+ encode(input = '') {
+ // The TextEncoder specification only allows for UTF-8 encoding.
+ return Encoding.encode(`${input}`, 'UTF-8');
+ }
+
+ encodeInto(input = '', output = new Uint8Array()) {
+ // The TextEncoder specification only allows for UTF-8 encoding.
+ return Encoding.encodeInto(`${input}`, output);
+ }
+} \ No newline at end of file
diff --git a/modules/core/overrides/GLib.js b/modules/core/overrides/GLib.js
index 5e3800a9..e4dca1a1 100644
--- a/modules/core/overrides/GLib.js
+++ b/modules/core/overrides/GLib.js
@@ -50,13 +50,6 @@ function _readSingleType(signature, forceSimple) {
return [char];
}
-function _makeBytes(byteArray) {
- if (byteArray instanceof Uint8Array || byteArray instanceof ByteArray.ByteArray)
- return ByteArray.toGBytes(byteArray);
- else
- return new GLib.Bytes(byteArray);
-}
-
function _packVariant(signature, value) {
if (signature.length === 0)
throw new TypeError('GVariant signature cannot be empty');
@@ -113,7 +106,7 @@ function _packVariant(signature, value) {
byteArray = Uint8Array.of(...byteArray, 0);
bytes = ByteArray.toGBytes(byteArray);
} else {
- bytes = _makeBytes(value);
+ bytes = new GLib.Bytes(value);
}
return GLib.Variant.new_from_bytes(new GLib.VariantType('ay'),
bytes, true);
diff --git a/modules/script/_bootstrap/default.js b/modules/script/_bootstrap/default.js
index 952d7fe3..fe354a02 100644
--- a/modules/script/_bootstrap/default.js
+++ b/modules/script/_bootstrap/default.js
@@ -6,6 +6,7 @@
'use strict';
const {print, printerr, log, logError} = imports._print;
+ const {TextEncoder, TextDecoder} = imports._text;
Object.defineProperties(exports, {
ARGV: {
@@ -16,6 +17,18 @@
return imports.system.programArgs;
},
},
+ TextEncoder: {
+ configurable: false,
+ enumerable: true,
+ writable: false,
+ value: TextEncoder,
+ },
+ TextDecoder: {
+ configurable: false,
+ enumerable: true,
+ writable: false,
+ value: TextDecoder,
+ },
print: {
configurable: false,
enumerable: true,
diff --git a/modules/script/byteArray.js b/modules/script/byteArray.js
index e0b650ac..e127a9c6 100644
--- a/modules/script/byteArray.js
+++ b/modules/script/byteArray.js
@@ -41,7 +41,7 @@ function toString(array, encoding = 'utf-8') {
if (!(array instanceof Uint8Array))
throw new Error('Argument to ByteArray.toString() must be a Uint8Array');
- return Encoding.toString(array, encoding);
+ return Encoding.decode(array, true, encoding);
}
/**
@@ -50,7 +50,7 @@ function toString(array, encoding = 'utf-8') {
* @returns {Uint8Array}
*/
function fromString(str, encoding = 'utf-8') {
- const array = Encoding.fromString(str, encoding);
+ const array = Encoding.encode(str, encoding);
defineToString(array);