summaryrefslogtreecommitdiff
path: root/deps/v8/src/strings/char-predicates-inl.h
blob: 4c43172ff4008e147e565c15cbc23c2419bd8d34 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
// Copyright 2011 the V8 project authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.

#ifndef V8_STRINGS_CHAR_PREDICATES_INL_H_
#define V8_STRINGS_CHAR_PREDICATES_INL_H_

#include "src/base/bounds.h"
#include "src/strings/char-predicates.h"
#include "src/utils/utils.h"

namespace v8 {
namespace internal {

// If c is in 'A'-'Z' or 'a'-'z', return its lower-case.
// Else, return something outside of 'A'-'Z' and 'a'-'z'.
// Note: it ignores LOCALE.
inline constexpr int AsciiAlphaToLower(uc32 c) { return c | 0x20; }

inline constexpr bool IsCarriageReturn(uc32 c) { return c == 0x000D; }

inline constexpr bool IsLineFeed(uc32 c) { return c == 0x000A; }

inline constexpr bool IsAsciiIdentifier(uc32 c) {
  return IsAlphaNumeric(c) || c == '$' || c == '_';
}

inline constexpr bool IsAlphaNumeric(uc32 c) {
  return base::IsInRange(AsciiAlphaToLower(c), 'a', 'z') || IsDecimalDigit(c);
}

inline constexpr bool IsDecimalDigit(uc32 c) {
  // ECMA-262, 3rd, 7.8.3 (p 16)
  return base::IsInRange(c, '0', '9');
}

inline constexpr bool IsHexDigit(uc32 c) {
  // ECMA-262, 3rd, 7.6 (p 15)
  return IsDecimalDigit(c) || base::IsInRange(AsciiAlphaToLower(c), 'a', 'f');
}

inline constexpr bool IsOctalDigit(uc32 c) {
  // ECMA-262, 6th, 7.8.3
  return base::IsInRange(c, '0', '7');
}

inline constexpr bool IsNonOctalDecimalDigit(uc32 c) {
  return base::IsInRange(c, '8', '9');
}

inline constexpr bool IsBinaryDigit(uc32 c) {
  // ECMA-262, 6th, 7.8.3
  return c == '0' || c == '1';
}

inline constexpr bool IsAsciiLower(uc32 c) {
  return base::IsInRange(c, 'a', 'z');
}

inline constexpr bool IsAsciiUpper(uc32 c) {
  return base::IsInRange(c, 'A', 'Z');
}

inline constexpr uc32 ToAsciiUpper(uc32 c) {
  return c & ~(IsAsciiLower(c) << 5);
}

inline constexpr uc32 ToAsciiLower(uc32 c) {
  return c | (IsAsciiUpper(c) << 5);
}

inline constexpr bool IsRegExpWord(uc32 c) {
  return IsAlphaNumeric(c) || c == '_';
}

// Constexpr cache table for character flags.
enum OneByteCharFlags {
  kIsIdentifierStart = 1 << 0,
  kIsIdentifierPart = 1 << 1,
  kIsWhiteSpace = 1 << 2,
  kIsWhiteSpaceOrLineTerminator = 1 << 3,
  kMaybeLineEnd = 1 << 4
};

// See http://www.unicode.org/Public/UCD/latest/ucd/DerivedCoreProperties.txt
// ID_Start. Additionally includes '_' and '$'.
constexpr bool IsOneByteIDStart(uc32 c) {
  return c == 0x0024 || (c >= 0x0041 && c <= 0x005A) || c == 0x005F ||
         (c >= 0x0061 && c <= 0x007A) || c == 0x00AA || c == 0x00B5 ||
         c == 0x00BA || (c >= 0x00C0 && c <= 0x00D6) ||
         (c >= 0x00D8 && c <= 0x00F6) || (c >= 0x00F8 && c <= 0x00FF);
}

// See http://www.unicode.org/Public/UCD/latest/ucd/DerivedCoreProperties.txt
// ID_Continue. Additionally includes '_' and '$'.
constexpr bool IsOneByteIDContinue(uc32 c) {
  return c == 0x0024 || (c >= 0x0030 && c <= 0x0039) || c == 0x005F ||
         (c >= 0x0041 && c <= 0x005A) || (c >= 0x0061 && c <= 0x007A) ||
         c == 0x00AA || c == 0x00B5 || c == 0x00B7 || c == 0x00BA ||
         (c >= 0x00C0 && c <= 0x00D6) || (c >= 0x00D8 && c <= 0x00F6) ||
         (c >= 0x00F8 && c <= 0x00FF);
}

constexpr bool IsOneByteWhitespace(uc32 c) {
  return c == '\t' || c == '\v' || c == '\f' || c == ' ' || c == u'\xa0';
}

constexpr uint8_t BuildOneByteCharFlags(uc32 c) {
  uint8_t result = 0;
  if (IsOneByteIDStart(c) || c == '\\') result |= kIsIdentifierStart;
  if (IsOneByteIDContinue(c) || c == '\\') result |= kIsIdentifierPart;
  if (IsOneByteWhitespace(c)) {
    result |= kIsWhiteSpace | kIsWhiteSpaceOrLineTerminator;
  }
  if (c == '\r' || c == '\n') {
    result |= kIsWhiteSpaceOrLineTerminator | kMaybeLineEnd;
  }
  // Add markers to identify 0x2028 and 0x2029.
  if (c == static_cast<uint8_t>(0x2028) || c == static_cast<uint8_t>(0x2029)) {
    result |= kMaybeLineEnd;
  }
  return result;
}
const constexpr uint8_t kOneByteCharFlags[256] = {
#define BUILD_CHAR_FLAGS(N) BuildOneByteCharFlags(N),
    INT_0_TO_127_LIST(BUILD_CHAR_FLAGS)
#undef BUILD_CHAR_FLAGS
#define BUILD_CHAR_FLAGS(N) BuildOneByteCharFlags(N + 128),
        INT_0_TO_127_LIST(BUILD_CHAR_FLAGS)
#undef BUILD_CHAR_FLAGS
};

bool IsIdentifierStart(uc32 c) {
  if (!base::IsInRange(c, 0, 255)) return IsIdentifierStartSlow(c);
  DCHECK_EQ(IsIdentifierStartSlow(c),
            static_cast<bool>(kOneByteCharFlags[c] & kIsIdentifierStart));
  return kOneByteCharFlags[c] & kIsIdentifierStart;
}

bool IsIdentifierPart(uc32 c) {
  if (!base::IsInRange(c, 0, 255)) return IsIdentifierPartSlow(c);
  DCHECK_EQ(IsIdentifierPartSlow(c),
            static_cast<bool>(kOneByteCharFlags[c] & kIsIdentifierPart));
  return kOneByteCharFlags[c] & kIsIdentifierPart;
}

bool IsWhiteSpace(uc32 c) {
  if (!base::IsInRange(c, 0, 255)) return IsWhiteSpaceSlow(c);
  DCHECK_EQ(IsWhiteSpaceSlow(c),
            static_cast<bool>(kOneByteCharFlags[c] & kIsWhiteSpace));
  return kOneByteCharFlags[c] & kIsWhiteSpace;
}

bool IsWhiteSpaceOrLineTerminator(uc32 c) {
  if (!base::IsInRange(c, 0, 255)) return IsWhiteSpaceOrLineTerminatorSlow(c);
  DCHECK_EQ(
      IsWhiteSpaceOrLineTerminatorSlow(c),
      static_cast<bool>(kOneByteCharFlags[c] & kIsWhiteSpaceOrLineTerminator));
  return kOneByteCharFlags[c] & kIsWhiteSpaceOrLineTerminator;
}

bool IsLineTerminatorSequence(uc32 c, uc32 next) {
  if (kOneByteCharFlags[static_cast<uint8_t>(c)] & kMaybeLineEnd) {
    if (c == '\n') return true;
    if (c == '\r') return next != '\n';
    return base::IsInRange(static_cast<unsigned int>(c), 0x2028u, 0x2029u);
  }
  return false;
}

}  // namespace internal

}  // namespace v8

#endif  // V8_STRINGS_CHAR_PREDICATES_INL_H_