summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--benchmark/util/text-decoder.js11
-rw-r--r--lib/internal/encoding.js12
-rw-r--r--src/node_buffer.cc11
3 files changed, 25 insertions, 9 deletions
diff --git a/benchmark/util/text-decoder.js b/benchmark/util/text-decoder.js
index 3d1ccc34bb..a669502860 100644
--- a/benchmark/util/text-decoder.js
+++ b/benchmark/util/text-decoder.js
@@ -5,13 +5,14 @@ const common = require('../common.js');
const bench = common.createBenchmark(main, {
encoding: ['utf-8', 'latin1', 'iso-8859-3'],
ignoreBOM: [0, 1],
+ fatal: [0, 1],
len: [256, 1024 * 16, 1024 * 512],
n: [1e2],
type: ['SharedArrayBuffer', 'ArrayBuffer', 'Buffer']
});
-function main({ encoding, len, n, ignoreBOM, type }) {
- const decoder = new TextDecoder(encoding, { ignoreBOM });
+function main({ encoding, len, n, ignoreBOM, type, fatal }) {
+ const decoder = new TextDecoder(encoding, { ignoreBOM, fatal });
let buf;
switch (type) {
@@ -31,7 +32,11 @@ function main({ encoding, len, n, ignoreBOM, type }) {
bench.start();
for (let i = 0; i < n; i++) {
- decoder.decode(buf);
+ try {
+ decoder.decode(buf);
+ } catch {
+ // eslint-disable no-empty
+ }
}
bench.end(n);
}
diff --git a/lib/internal/encoding.js b/lib/internal/encoding.js
index 5cf20ea04c..e14a8c7491 100644
--- a/lib/internal/encoding.js
+++ b/lib/internal/encoding.js
@@ -29,6 +29,7 @@ const kFlags = Symbol('flags');
const kEncoding = Symbol('encoding');
const kDecoder = Symbol('decoder');
const kEncoder = Symbol('encoder');
+const kFatal = Symbol('kFatal');
const kUTF8FastPath = Symbol('kUTF8FastPath');
const kIgnoreBOM = Symbol('kIgnoreBOM');
@@ -396,17 +397,16 @@ function makeTextDecoderICU() {
flags |= options.ignoreBOM ? CONVERTER_FLAGS_IGNORE_BOM : 0;
}
- // Only support fast path for UTF-8 without FATAL flag
- const fastPathAvailable = enc === 'utf-8' && !(options?.fatal);
-
this[kDecoder] = true;
this[kFlags] = flags;
this[kEncoding] = enc;
this[kIgnoreBOM] = Boolean(options?.ignoreBOM);
- this[kUTF8FastPath] = fastPathAvailable;
+ this[kFatal] = Boolean(options?.fatal);
+ // Only support fast path for UTF-8.
+ this[kUTF8FastPath] = enc === 'utf-8';
this[kHandle] = undefined;
- if (!fastPathAvailable) {
+ if (!this[kUTF8FastPath]) {
this.#prepareConverter();
}
}
@@ -425,7 +425,7 @@ function makeTextDecoderICU() {
this[kUTF8FastPath] &&= !(options?.stream);
if (this[kUTF8FastPath]) {
- return decodeUTF8(input, this[kIgnoreBOM]);
+ return decodeUTF8(input, this[kIgnoreBOM], this[kFatal]);
}
this.#prepareConverter();
diff --git a/src/node_buffer.cc b/src/node_buffer.cc
index 0f3b048272..4a0ffbbca3 100644
--- a/src/node_buffer.cc
+++ b/src/node_buffer.cc
@@ -28,6 +28,7 @@
#include "node_internals.h"
#include "env-inl.h"
+#include "simdutf.h"
#include "string_bytes.h"
#include "string_search.h"
#include "util-inl.h"
@@ -583,10 +584,20 @@ void DecodeUTF8(const FunctionCallbackInfo<Value>& args) {
ArrayBufferViewContents<char> buffer(args[0]);
bool ignore_bom = args[1]->IsTrue();
+ bool has_fatal = args[2]->IsTrue();
const char* data = buffer.data();
size_t length = buffer.length();
+ if (has_fatal) {
+ auto result = simdutf::validate_utf8_with_errors(data, length);
+
+ if (result.error) {
+ return node::THROW_ERR_ENCODING_INVALID_ENCODED_DATA(
+ env->isolate(), "The encoded data was not valid for encoding utf-8");
+ }
+ }
+
if (!ignore_bom && length >= 3) {
if (memcmp(data, "\xEF\xBB\xBF", 3) == 0) {
data += 3;