summaryrefslogtreecommitdiff
path: root/deps/v8/src/strings/unicode.cc
diff options
context:
space:
mode:
Diffstat (limited to 'deps/v8/src/strings/unicode.cc')
-rw-r--r--deps/v8/src/strings/unicode.cc31
1 files changed, 31 insertions, 0 deletions
diff --git a/deps/v8/src/strings/unicode.cc b/deps/v8/src/strings/unicode.cc
index 0a9b3bbb3d..d98d946f16 100644
--- a/deps/v8/src/strings/unicode.cc
+++ b/deps/v8/src/strings/unicode.cc
@@ -239,6 +239,37 @@ bool Utf8::ValidateEncoding(const byte* bytes, size_t length) {
return state == State::kAccept;
}
+// static
+void Utf16::ReplaceUnpairedSurrogates(const uint16_t* source_code_units,
+ uint16_t* dest_code_units,
+ size_t length) {
+ // U+FFFD (REPLACEMENT CHARACTER)
+ constexpr uint16_t kReplacement = 0xFFFD;
+
+ for (size_t i = 0; i < length; i++) {
+ const uint16_t source_code_unit = source_code_units[i];
+ const size_t copy_index = i;
+ uint16_t dest_code_unit = source_code_unit;
+ if (IsLeadSurrogate(source_code_unit)) {
+ // The current code unit is a leading surrogate. If it's not followed by a
+ // trailing surrogate, replace it with the replacement character.
+ if (i == length - 1 || !IsTrailSurrogate(source_code_units[i + 1])) {
+ dest_code_unit = kReplacement;
+ } else {
+ // Copy the paired trailing surrogate. The paired leading surrogate will
+ // be copied below.
+ ++i;
+ dest_code_units[i] = source_code_units[i];
+ }
+ } else if (IsTrailSurrogate(source_code_unit)) {
+ // All paired trailing surrogates are skipped above, so this branch is
+ // only for those that are unpaired.
+ dest_code_unit = kReplacement;
+ }
+ dest_code_units[copy_index] = dest_code_unit;
+ }
+}
+
#if V8_ENABLE_WEBASSEMBLY
bool Wtf8::ValidateEncoding(const byte* bytes, size_t length) {
using State = GeneralizedUtf8DfaDecoder::State;