diff options
Diffstat (limited to 'deps/v8/src/strings/unicode.cc')
-rw-r--r-- | deps/v8/src/strings/unicode.cc | 31 |
1 files changed, 31 insertions, 0 deletions
diff --git a/deps/v8/src/strings/unicode.cc b/deps/v8/src/strings/unicode.cc index 0a9b3bbb3d..d98d946f16 100644 --- a/deps/v8/src/strings/unicode.cc +++ b/deps/v8/src/strings/unicode.cc @@ -239,6 +239,37 @@ bool Utf8::ValidateEncoding(const byte* bytes, size_t length) { return state == State::kAccept; } +// static +void Utf16::ReplaceUnpairedSurrogates(const uint16_t* source_code_units, + uint16_t* dest_code_units, + size_t length) { + // U+FFFD (REPLACEMENT CHARACTER) + constexpr uint16_t kReplacement = 0xFFFD; + + for (size_t i = 0; i < length; i++) { + const uint16_t source_code_unit = source_code_units[i]; + const size_t copy_index = i; + uint16_t dest_code_unit = source_code_unit; + if (IsLeadSurrogate(source_code_unit)) { + // The current code unit is a leading surrogate. If it's not followed by a + // trailing surrogate, replace it with the replacement character. + if (i == length - 1 || !IsTrailSurrogate(source_code_units[i + 1])) { + dest_code_unit = kReplacement; + } else { + // Copy the paired trailing surrogate. The paired leading surrogate will + // be copied below. + ++i; + dest_code_units[i] = source_code_units[i]; + } + } else if (IsTrailSurrogate(source_code_unit)) { + // All paired trailing surrogates are skipped above, so this branch is + // only for those that are unpaired. + dest_code_unit = kReplacement; + } + dest_code_units[copy_index] = dest_code_unit; + } +} + #if V8_ENABLE_WEBASSEMBLY bool Wtf8::ValidateEncoding(const byte* bytes, size_t length) { using State = GeneralizedUtf8DfaDecoder::State; |