summaryrefslogtreecommitdiff
path: root/compiler/GHC/Utils/Lexeme.hs
blob: 6df962a54b3bb4d6155ec2d7224f9da6e4767f3f (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
-- (c) The GHC Team
--
-- Functions to evaluate whether or not a string is a valid identifier.
-- There is considerable overlap between the logic here and the logic
-- in GHC.Parser.Lexer, but sadly there seems to be no way to merge them.

module GHC.Utils.Lexeme (
          -- * Lexical characteristics of Haskell names

          -- | Use these functions to figure what kind of name a 'FastString'
          -- represents; these functions do /not/ check that the identifier
          -- is valid.

        isLexCon, isLexVar, isLexId, isLexSym,
        isLexConId, isLexConSym, isLexVarId, isLexVarSym,
        startsVarSym, startsVarId, startsConSym, startsConId,

          -- * Validating identifiers

          -- | These functions (working over plain old 'String's) check
          -- to make sure that the identifier is valid.
        okVarOcc, okConOcc, okTcOcc,
        okVarIdOcc, okVarSymOcc, okConIdOcc, okConSymOcc

        -- Some of the exports above are not used within GHC, but may
        -- be of value to GHC API users.

  ) where

import GHC.Prelude

import GHC.Data.FastString

import Data.Char
import qualified Data.Set as Set

import GHC.Lexeme

{-

************************************************************************
*                                                                      *
    Lexical categories
*                                                                      *
************************************************************************

These functions test strings to see if they fit the lexical categories
defined in the Haskell report.

Note [Classification of generated names]
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

Some names generated for internal use can show up in debugging output,
e.g.  when using -ddump-simpl. These generated names start with a $
but should still be pretty-printed using prefix notation. We make sure
this is the case in isLexVarSym by only classifying a name as a symbol
if all its characters are symbols, not just its first one.
-}

isLexCon,   isLexVar,    isLexId,    isLexSym    :: FastString -> Bool
isLexConId, isLexConSym, isLexVarId, isLexVarSym :: FastString -> Bool

isLexCon cs = isLexConId  cs || isLexConSym cs
isLexVar cs = isLexVarId  cs || isLexVarSym cs

isLexId  cs = isLexConId  cs || isLexVarId  cs
isLexSym cs = isLexConSym cs || isLexVarSym cs

-------------
isLexConId cs                           -- Prefix type or data constructors
  | nullFS cs          = False          --      e.g. "Foo", "[]", "(,)"
  | cs == (fsLit "[]") = True
  | otherwise          = startsConId (headFS cs)

isLexVarId cs                           -- Ordinary prefix identifiers
  | nullFS cs         = False           --      e.g. "x", "_x"
  | otherwise         = startsVarId (headFS cs)

isLexConSym cs                          -- Infix type or data constructors
  | nullFS cs          = False          --      e.g. ":-:", ":", "->"
  | cs == (fsLit "->") = True
  | otherwise          = startsConSym (headFS cs)

isLexVarSym fs                          -- Infix identifiers e.g. "+"
  | fs == (fsLit "~R#") = True
  | otherwise
  = case (if nullFS fs then [] else unpackFS fs) of
      [] -> False
      (c:cs) -> startsVarSym c && all isVarSymChar cs
        -- See Note [Classification of generated names]

{-

************************************************************************
*                                                                      *
    Detecting valid names for Template Haskell
*                                                                      *
************************************************************************

-}

----------------------
-- External interface
----------------------

-- | Is this an acceptable variable name?
okVarOcc :: String -> Bool
okVarOcc str@(c:_)
  | startsVarId c
  = okVarIdOcc str
  | startsVarSym c
  = okVarSymOcc str
okVarOcc _ = False

-- | Is this an acceptable constructor name?
okConOcc :: String -> Bool
okConOcc str@(c:_)
  | startsConId c
  = okConIdOcc str
  | startsConSym c
  = okConSymOcc str
  | str == "[]"
  = True
okConOcc _ = False

-- | Is this an acceptable type name?
okTcOcc :: String -> Bool
okTcOcc "[]" = True
okTcOcc "->" = True
okTcOcc "~"  = True
okTcOcc str@(c:_)
  | startsConId c
  = okConIdOcc str
  | startsConSym c
  = okConSymOcc str
  | startsVarSym c
  = okVarSymOcc str
okTcOcc _ = False

-- | Is this an acceptable alphanumeric variable name, assuming it starts
-- with an acceptable letter?
okVarIdOcc :: String -> Bool
okVarIdOcc str = okIdOcc str &&
                 -- admit "_" as a valid identifier.  Required to support typed
                 -- holes in Template Haskell.  See #10267
                 (str == "_" || not (str `Set.member` reservedIds))

-- | Is this an acceptable symbolic variable name, assuming it starts
-- with an acceptable character?
okVarSymOcc :: String -> Bool
okVarSymOcc str = all okSymChar str &&
                  not (str `Set.member` reservedOps) &&
                  not (isDashes str)

-- | Is this an acceptable alphanumeric constructor name, assuming it
-- starts with an acceptable letter?
okConIdOcc :: String -> Bool
okConIdOcc str = okIdOcc str ||
                 is_tuple_name1 True  str ||
                   -- Is it a boxed tuple...
                 is_tuple_name1 False str ||
                   -- ...or an unboxed tuple (#12407)...
                 is_sum_name1 str
                   -- ...or an unboxed sum (#12514)?
  where
    -- check for tuple name, starting at the beginning
    is_tuple_name1 True  ('(' : rest)       = is_tuple_name2 True  rest
    is_tuple_name1 False ('(' : '#' : rest) = is_tuple_name2 False rest
    is_tuple_name1 _     _                  = False

    -- check for tuple tail
    is_tuple_name2 True  ")"          = True
    is_tuple_name2 False "#)"         = True
    is_tuple_name2 boxed (',' : rest) = is_tuple_name2 boxed rest
    is_tuple_name2 boxed (ws  : rest)
      | isSpace ws                    = is_tuple_name2 boxed rest
    is_tuple_name2 _     _            = False

    -- check for sum name, starting at the beginning
    is_sum_name1 ('(' : '#' : rest) = is_sum_name2 False rest
    is_sum_name1 _                  = False

    -- check for sum tail, only allowing at most one underscore
    is_sum_name2 _          "#)"         = True
    is_sum_name2 underscore ('|' : rest) = is_sum_name2 underscore rest
    is_sum_name2 False      ('_' : rest) = is_sum_name2 True rest
    is_sum_name2 underscore (ws  : rest)
      | isSpace ws                       = is_sum_name2 underscore rest
    is_sum_name2 _          _            = False

-- | Is this an acceptable symbolic constructor name, assuming it
-- starts with an acceptable character?
okConSymOcc :: String -> Bool
okConSymOcc ":" = True
okConSymOcc str = all okSymChar str &&
                  not (str `Set.member` reservedOps)

----------------------
-- Internal functions
----------------------

-- | Is this string an acceptable id, possibly with a suffix of hashes,
-- but not worrying about case or clashing with reserved words?
okIdOcc :: String -> Bool
okIdOcc str
  = let hashes = dropWhile okIdChar str in
    all (== '#') hashes   -- -XMagicHash allows a suffix of hashes
                          -- of course, `all` says "True" to an empty list

-- | Is this character acceptable in an identifier (after the first letter)?
-- See alexGetByte in GHC.Parser.Lexer
okIdChar :: Char -> Bool
okIdChar c = case generalCategory c of
  UppercaseLetter -> True
  LowercaseLetter -> True
  TitlecaseLetter -> True
  ModifierLetter  -> True -- See #10196
  OtherLetter     -> True -- See #1103
  NonSpacingMark  -> True -- See #7650
  DecimalNumber   -> True
  OtherNumber     -> True -- See #4373
  _               -> c == '\'' || c == '_'

-- | All reserved identifiers. Taken from section 2.4 of the 2010 Report.
reservedIds :: Set.Set String
reservedIds = Set.fromList [ "case", "class", "data", "default", "deriving"
                           , "do", "else", "foreign", "if", "import", "in"
                           , "infix", "infixl", "infixr", "instance", "let"
                           , "module", "newtype", "of", "then", "type", "where"
                           , "_" ]

-- | All reserved operators. Taken from section 2.4 of the 2010 Report.
reservedOps :: Set.Set String
reservedOps = Set.fromList [ "..", ":", "::", "=", "\\", "|", "<-", "->"
                           , "@", "~", "=>" ]

-- | Does this string contain only dashes and has at least 2 of them?
isDashes :: String -> Bool
isDashes ('-' : '-' : rest) = all (== '-') rest
isDashes _                  = False