summaryrefslogtreecommitdiff
path: root/deps/v8/src/third_party/utf8-decoder
diff options
context:
space:
mode:
Diffstat (limited to 'deps/v8/src/third_party/utf8-decoder')
-rw-r--r--deps/v8/src/third_party/utf8-decoder/LICENSE19
-rw-r--r--deps/v8/src/third_party/utf8-decoder/README.v818
-rw-r--r--deps/v8/src/third_party/utf8-decoder/utf8-decoder.h78
3 files changed, 115 insertions, 0 deletions
diff --git a/deps/v8/src/third_party/utf8-decoder/LICENSE b/deps/v8/src/third_party/utf8-decoder/LICENSE
new file mode 100644
index 0000000000..b59bef2fb6
--- /dev/null
+++ b/deps/v8/src/third_party/utf8-decoder/LICENSE
@@ -0,0 +1,19 @@
+Copyright (c) 2008-2009 Bjoern Hoehrmann <bjoern@hoehrmann.de>
+
+Permission is hereby granted, free of charge, to any person obtaining a copy of
+this software and associated documentation files (the "Software"), to deal in
+the Software without restriction, including without limitation the rights to
+use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
+of the Software, and to permit persons to whom the Software is furnished to do
+so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.
diff --git a/deps/v8/src/third_party/utf8-decoder/README.v8 b/deps/v8/src/third_party/utf8-decoder/README.v8
new file mode 100644
index 0000000000..e1e13ce53f
--- /dev/null
+++ b/deps/v8/src/third_party/utf8-decoder/README.v8
@@ -0,0 +1,18 @@
+Name: DFA UTF-8 Decoder
+Short Name: utf8-decoder
+URL: http://bjoern.hoehrmann.de/utf-8/decoder/dfa/
+Version: 0
+License: MIT
+License File: NOT_SHIPPED
+Security Critical: no
+
+Description:
+Decodes UTF-8 bytes using a fast and simple definite finite automata.
+
+Local modifications:
+- Rejection state has been mapped to row 0 (instead of row 1) of the DFA,
+ saving some 50 bytes and making the table easier to reason about.
+- The transitions have been remapped to represent both a state transition and a
+ bit mask for the incoming byte.
+- The caller must now zero out the code point buffer after successful or
+ unsuccessful state transitions.
diff --git a/deps/v8/src/third_party/utf8-decoder/utf8-decoder.h b/deps/v8/src/third_party/utf8-decoder/utf8-decoder.h
new file mode 100644
index 0000000000..5668e5ad9e
--- /dev/null
+++ b/deps/v8/src/third_party/utf8-decoder/utf8-decoder.h
@@ -0,0 +1,78 @@
+// See http://bjoern.hoehrmann.de/utf-8/decoder/dfa/ for details.
+// The remapped transition table is justified at
+// https://docs.google.com/spreadsheets/d/1AZcQwuEL93HmNCljJWUwFMGqf7JAQ0puawZaUgP0E14
+
+#include <stdint.h>
+
+#ifndef __UTF8_DFA_DECODER_H
+#define __UTF8_DFA_DECODER_H
+
+namespace Utf8DfaDecoder {
+
+enum State : uint8_t {
+ kReject = 0,
+ kAccept = 12,
+ kTwoByte = 24,
+ kThreeByte = 36,
+ kThreeByteLowMid = 48,
+ kFourByte = 60,
+ kFourByteLow = 72,
+ kThreeByteHigh = 84,
+ kFourByteMidHigh = 96,
+};
+
+static inline void Decode(uint8_t byte, State* state, uint32_t* buffer) {
+ // This first table maps bytes to character to a transition.
+ static constexpr uint8_t transitions[] = {
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 00-0F
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 10-1F
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 20-2F
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 30-3F
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 40-4F
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 50-5F
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 60-6F
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 70-7F
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 80-8F
+ 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, // 90-9F
+ 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, // A0-AF
+ 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, // B0-BF
+ 9, 9, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, // C0-CF
+ 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, // D0-DF
+ 10, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 6, 5, 5, // E0-EF
+ 11, 7, 7, 7, 8, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, // F0-FF
+ };
+
+ // This second table maps a state to a new state when adding a transition.
+ // 00-7F
+ // | 80-8F
+ // | | 90-9F
+ // | | | A0-BF
+ // | | | | C2-DF
+ // | | | | | E1-EC, EE, EF
+ // | | | | | | ED
+ // | | | | | | | F1-F3
+ // | | | | | | | | F4
+ // | | | | | | | | | C0, C1, F5-FF
+ // | | | | | | | | | | E0
+ // | | | | | | | | | | | F0
+ static constexpr uint8_t states[] = {
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // REJECT = 0
+ 12, 0, 0, 0, 24, 36, 48, 60, 72, 0, 84, 96, // ACCEPT = 12
+ 0, 12, 12, 12, 0, 0, 0, 0, 0, 0, 0, 0, // 2-byte = 24
+ 0, 24, 24, 24, 0, 0, 0, 0, 0, 0, 0, 0, // 3-byte = 36
+ 0, 24, 24, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 3-byte low/mid = 48
+ 0, 36, 36, 36, 0, 0, 0, 0, 0, 0, 0, 0, // 4-byte = 60
+ 0, 36, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 4-byte low = 72
+ 0, 0, 0, 24, 0, 0, 0, 0, 0, 0, 0, 0, // 3-byte high = 84
+ 0, 0, 36, 36, 0, 0, 0, 0, 0, 0, 0, 0, // 4-byte mid/high = 96
+ };
+
+ DCHECK_NE(*state, State::kReject);
+ uint8_t type = transitions[byte];
+ *state = static_cast<State>(states[*state + type]);
+ *buffer = (*buffer << 6) | (byte & (0x7F >> (type >> 1)));
+}
+
+} // namespace Utf8DfaDecoder
+
+#endif /* __UTF8_DFA_DECODER_H */