diff options
Diffstat (limited to 'deps/v8/src/third_party/utf8-decoder')
-rw-r--r-- | deps/v8/src/third_party/utf8-decoder/LICENSE | 19 | ||||
-rw-r--r-- | deps/v8/src/third_party/utf8-decoder/README.v8 | 18 | ||||
-rw-r--r-- | deps/v8/src/third_party/utf8-decoder/utf8-decoder.h | 78 |
3 files changed, 115 insertions, 0 deletions
diff --git a/deps/v8/src/third_party/utf8-decoder/LICENSE b/deps/v8/src/third_party/utf8-decoder/LICENSE new file mode 100644 index 0000000000..b59bef2fb6 --- /dev/null +++ b/deps/v8/src/third_party/utf8-decoder/LICENSE @@ -0,0 +1,19 @@ +Copyright (c) 2008-2009 Bjoern Hoehrmann <bjoern@hoehrmann.de> + +Permission is hereby granted, free of charge, to any person obtaining a copy of +this software and associated documentation files (the "Software"), to deal in +the Software without restriction, including without limitation the rights to +use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies +of the Software, and to permit persons to whom the Software is furnished to do +so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. diff --git a/deps/v8/src/third_party/utf8-decoder/README.v8 b/deps/v8/src/third_party/utf8-decoder/README.v8 new file mode 100644 index 0000000000..e1e13ce53f --- /dev/null +++ b/deps/v8/src/third_party/utf8-decoder/README.v8 @@ -0,0 +1,18 @@ +Name: DFA UTF-8 Decoder +Short Name: utf8-decoder +URL: http://bjoern.hoehrmann.de/utf-8/decoder/dfa/ +Version: 0 +License: MIT +License File: NOT_SHIPPED +Security Critical: no + +Description: +Decodes UTF-8 bytes using a fast and simple definite finite automata. + +Local modifications: +- Rejection state has been mapped to row 0 (instead of row 1) of the DFA, + saving some 50 bytes and making the table easier to reason about. +- The transitions have been remapped to represent both a state transition and a + bit mask for the incoming byte. +- The caller must now zero out the code point buffer after successful or + unsuccessful state transitions. diff --git a/deps/v8/src/third_party/utf8-decoder/utf8-decoder.h b/deps/v8/src/third_party/utf8-decoder/utf8-decoder.h new file mode 100644 index 0000000000..5668e5ad9e --- /dev/null +++ b/deps/v8/src/third_party/utf8-decoder/utf8-decoder.h @@ -0,0 +1,78 @@ +// See http://bjoern.hoehrmann.de/utf-8/decoder/dfa/ for details. +// The remapped transition table is justified at +// https://docs.google.com/spreadsheets/d/1AZcQwuEL93HmNCljJWUwFMGqf7JAQ0puawZaUgP0E14 + +#include <stdint.h> + +#ifndef __UTF8_DFA_DECODER_H +#define __UTF8_DFA_DECODER_H + +namespace Utf8DfaDecoder { + +enum State : uint8_t { + kReject = 0, + kAccept = 12, + kTwoByte = 24, + kThreeByte = 36, + kThreeByteLowMid = 48, + kFourByte = 60, + kFourByteLow = 72, + kThreeByteHigh = 84, + kFourByteMidHigh = 96, +}; + +static inline void Decode(uint8_t byte, State* state, uint32_t* buffer) { + // This first table maps bytes to character to a transition. + static constexpr uint8_t transitions[] = { + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 00-0F + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 10-1F + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 20-2F + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 30-3F + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 40-4F + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 50-5F + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 60-6F + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 70-7F + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 80-8F + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, // 90-9F + 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, // A0-AF + 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, // B0-BF + 9, 9, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, // C0-CF + 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, // D0-DF + 10, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 6, 5, 5, // E0-EF + 11, 7, 7, 7, 8, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, // F0-FF + }; + + // This second table maps a state to a new state when adding a transition. + // 00-7F + // | 80-8F + // | | 90-9F + // | | | A0-BF + // | | | | C2-DF + // | | | | | E1-EC, EE, EF + // | | | | | | ED + // | | | | | | | F1-F3 + // | | | | | | | | F4 + // | | | | | | | | | C0, C1, F5-FF + // | | | | | | | | | | E0 + // | | | | | | | | | | | F0 + static constexpr uint8_t states[] = { + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // REJECT = 0 + 12, 0, 0, 0, 24, 36, 48, 60, 72, 0, 84, 96, // ACCEPT = 12 + 0, 12, 12, 12, 0, 0, 0, 0, 0, 0, 0, 0, // 2-byte = 24 + 0, 24, 24, 24, 0, 0, 0, 0, 0, 0, 0, 0, // 3-byte = 36 + 0, 24, 24, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 3-byte low/mid = 48 + 0, 36, 36, 36, 0, 0, 0, 0, 0, 0, 0, 0, // 4-byte = 60 + 0, 36, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 4-byte low = 72 + 0, 0, 0, 24, 0, 0, 0, 0, 0, 0, 0, 0, // 3-byte high = 84 + 0, 0, 36, 36, 0, 0, 0, 0, 0, 0, 0, 0, // 4-byte mid/high = 96 + }; + + DCHECK_NE(*state, State::kReject); + uint8_t type = transitions[byte]; + *state = static_cast<State>(states[*state + type]); + *buffer = (*buffer << 6) | (byte & (0x7F >> (type >> 1))); +} + +} // namespace Utf8DfaDecoder + +#endif /* __UTF8_DFA_DECODER_H */ |