summaryrefslogtreecommitdiff
path: root/compiler/GHC/Types/SourceText.hs
blob: 72c77dec95d30c3feb7a132192e05eab10a9a674 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
{-# LANGUAGE DeriveDataTypeable #-}

-- | Source text
--
-- Keeping Source Text for source to source conversions
--
module GHC.Types.SourceText
   ( SourceText (..)
   , pprWithSourceText

   -- * Literals
   , IntegralLit(..)
   , FractionalLit(..)
   , StringLiteral(..)
   , negateIntegralLit
   , negateFractionalLit
   , mkIntegralLit
   , mkTHFractionalLit, rationalFromFractionalLit
   , integralFractionalLit, mkSourceFractionalLit
   , FractionalExponentBase(..)

   -- Used by the pm checker.
   , fractionalLitFromRational
   , mkFractionalLit

   )
where

import GHC.Prelude

import GHC.Data.FastString

import GHC.Utils.Outputable
import GHC.Utils.Binary
import GHC.Utils.Panic

import Data.Function (on)
import Data.Data
import GHC.Real ( Ratio(..) )
import GHC.Types.SrcLoc

{-
Note [Pragma source text]
~~~~~~~~~~~~~~~~~~~~~~~~~
The lexer does a case-insensitive match for pragmas, as well as
accepting both UK and US spelling variants.

So

  {-# SPECIALISE #-}
  {-# SPECIALIZE #-}
  {-# Specialize #-}

will all generate ITspec_prag token for the start of the pragma.

In order to be able to do source to source conversions, the original
source text for the token needs to be preserved, hence the
`SourceText` field.

So the lexer will then generate

  ITspec_prag "{ -# SPECIALISE"
  ITspec_prag "{ -# SPECIALIZE"
  ITspec_prag "{ -# Specialize"

for the cases above.
 [without the space between '{' and '-', otherwise this comment won't parse]


Note [Literal source text]
~~~~~~~~~~~~~~~~~~~~~~~~~~
The lexer/parser converts literals from their original source text
versions to an appropriate internal representation. This is a problem
for tools doing source to source conversions, so the original source
text is stored in literals where this can occur.

Motivating examples for HsLit

  HsChar          '\n'       == '\x20'
  HsCharPrim      '\x41'#    == 'A'#
  HsString        "\x20\x41" == " A"
  HsStringPrim    "\x20"#    == " "#
  HsInt           001        == 1
  HsIntPrim       002#       == 2#
  HsWordPrim      003##      == 3##
  HsInt64Prim     004#Int64  == 4#Int64
  HsWord64Prim    005#Word64 == 5#Word64
  HsInteger       006        == 6

For OverLitVal

  HsIntegral      003      == 0x003
  HsIsString      "\x41nd" == "And"
-}

 -- Note [Literal source text],[Pragma source text]
data SourceText
   = SourceText String
   | NoSourceText
      -- ^ For when code is generated, e.g. TH,
      -- deriving. The pretty printer will then make
      -- its own representation of the item.
   deriving (Data, Show, Eq )

instance Outputable SourceText where
  ppr (SourceText s) = text "SourceText" <+> text s
  ppr NoSourceText   = text "NoSourceText"

instance Binary SourceText where
  put_ bh NoSourceText = putByte bh 0
  put_ bh (SourceText s) = do
        putByte bh 1
        put_ bh s

  get bh = do
    h <- getByte bh
    case h of
      0 -> return NoSourceText
      1 -> do
        s <- get bh
        return (SourceText s)
      _ -> panic $ "Binary SourceText:" ++ show h

-- | Special combinator for showing string literals.
pprWithSourceText :: SourceText -> SDoc -> SDoc
pprWithSourceText NoSourceText     d = d
pprWithSourceText (SourceText src) _ = text src

------------------------------------------------
-- Literals
------------------------------------------------

-- | Integral Literal
--
-- Used (instead of Integer) to represent negative zegative zero which is
-- required for NegativeLiterals extension to correctly parse `-0::Double`
-- as negative zero. See also #13211.
data IntegralLit = IL
   { il_text  :: SourceText
   , il_neg   :: Bool -- See Note [Negative zero] in GHC.Rename.Pat
   , il_value :: Integer
   }
   deriving (Data, Show)

mkIntegralLit :: Integral a => a -> IntegralLit
mkIntegralLit i = IL { il_text = SourceText (show i_integer)
                     , il_neg = i < 0
                     , il_value = i_integer }
  where
    i_integer :: Integer
    i_integer = toInteger i

negateIntegralLit :: IntegralLit -> IntegralLit
negateIntegralLit (IL text neg value)
  = case text of
      SourceText ('-':src) -> IL (SourceText src)       False    (negate value)
      SourceText      src  -> IL (SourceText ('-':src)) True     (negate value)
      NoSourceText         -> IL NoSourceText          (not neg) (negate value)

-- | Fractional Literal
--
-- Used (instead of Rational) to represent exactly the floating point literal that we
-- encountered in the user's source program. This allows us to pretty-print exactly what
-- the user wrote, which is important e.g. for floating point numbers that can't represented
-- as Doubles (we used to via Double for pretty-printing). See also #2245.
-- Note [FractionalLit representation] in GHC.HsToCore.Match.Literal
-- The actual value then is: sign * fl_signi * (fl_exp_base^fl_exp)
--                             where sign = if fl_neg then (-1) else 1
--
-- For example FL { fl_neg = True, fl_signi = 5.3, fl_exp = 4, fl_exp_base = Base10 }
-- denotes  -5300

data FractionalLit = FL
    { fl_text :: SourceText     -- ^ How the value was written in the source
    , fl_neg :: Bool                        -- See Note [Negative zero]
    , fl_signi :: Rational                  -- The significand component of the literal
    , fl_exp :: Integer                     -- The exponent component of the literal
    , fl_exp_base :: FractionalExponentBase -- See Note [fractional exponent bases]
    }
    deriving (Data, Show)
  -- The Show instance is required for the derived GHC.Parser.Lexer.Token instance when DEBUG is on

-- See Note [FractionalLit representation] in GHC.HsToCore.Match.Literal
data FractionalExponentBase
  = Base2 -- Used in hex fractional literals
  | Base10
  deriving (Eq, Ord, Data, Show)

mkFractionalLit :: SourceText -> Bool -> Rational -> Integer -> FractionalExponentBase
                -> FractionalLit
mkFractionalLit = FL

mkRationalWithExponentBase :: Rational -> Integer -> FractionalExponentBase -> Rational
mkRationalWithExponentBase i e feb = i * (eb ^^ e)
  where eb = case feb of Base2 -> 2 ; Base10 -> 10

fractionalLitFromRational :: Rational -> FractionalLit
fractionalLitFromRational r =  FL { fl_text = NoSourceText
                           , fl_neg = r < 0
                           , fl_signi = r
                           , fl_exp = 0
                           , fl_exp_base = Base10 }

rationalFromFractionalLit :: FractionalLit -> Rational
rationalFromFractionalLit (FL _ _ i e expBase) =
  mkRationalWithExponentBase i e expBase

mkTHFractionalLit :: Rational -> FractionalLit
mkTHFractionalLit r =  FL { fl_text = SourceText (show (realToFrac r::Double))
                             -- Converting to a Double here may technically lose
                             -- precision (see #15502). We could alternatively
                             -- convert to a Rational for the most accuracy, but
                             -- it would cause Floats and Doubles to be displayed
                             -- strangely, so we opt not to do this. (In contrast
                             -- to mkIntegralLit, where we always convert to an
                             -- Integer for the highest accuracy.)
                           , fl_neg = r < 0
                           , fl_signi = r
                           , fl_exp = 0
                           , fl_exp_base = Base10 }

negateFractionalLit :: FractionalLit -> FractionalLit
negateFractionalLit (FL text neg i e eb)
  = case text of
      SourceText ('-':src) -> FL (SourceText src)       False (negate i) e eb
      SourceText      src  -> FL (SourceText ('-':src)) True  (negate i) e eb
      NoSourceText         -> FL NoSourceText (not neg) (negate i) e eb

-- | The integer should already be negated if it's negative.
integralFractionalLit :: Bool -> Integer -> FractionalLit
integralFractionalLit neg i = FL { fl_text = SourceText (show i)
                                 , fl_neg = neg
                                 , fl_signi = i :% 1
                                 , fl_exp = 0
                                 , fl_exp_base = Base10 }

-- | The arguments should already be negated if they are negative.
mkSourceFractionalLit :: String -> Bool -> Integer -> Integer
                      -> FractionalExponentBase
                      -> FractionalLit
mkSourceFractionalLit !str !b !r !i !ff = FL (SourceText str) b (r :% 1) i ff

{- Note [fractional exponent bases]
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
For hexadecimal rationals of
the form 0x0.3p10 the exponent is given on base 2 rather than
base 10. These are the only options, hence the sum type. See also #15646.
-}


-- Comparison operations are needed when grouping literals
-- for compiling pattern-matching (module GHC.HsToCore.Match.Literal)

instance Eq IntegralLit where
  (==) = (==) `on` il_value

instance Ord IntegralLit where
  compare = compare `on` il_value

instance Outputable IntegralLit where
  ppr (IL (SourceText src) _ _) = text src
  ppr (IL NoSourceText _ value) = text (show value)


-- | Compare fractional lits with small exponents for value equality but
--   large values for syntactic equality.
compareFractionalLit :: FractionalLit -> FractionalLit -> Ordering
compareFractionalLit fl1 fl2
  | fl_exp fl1 < 100 && fl_exp fl2 < 100 && fl_exp fl1 >= -100 && fl_exp fl2 >= -100
    = rationalFromFractionalLit fl1 `compare` rationalFromFractionalLit fl2
  | otherwise = (compare `on` (\x -> (fl_signi x, fl_exp x, fl_exp_base x))) fl1 fl2

-- | Be wary of using this instance to compare for equal *values* when exponents are
-- large. The same value expressed in different syntactic form won't compare as equal when
-- any of the exponents is >= 100.
instance Eq FractionalLit where
  (==) fl1 fl2 = case compare fl1 fl2 of
          EQ -> True
          _  -> False

-- | Be wary of using this instance to compare for equal *values* when exponents are
-- large. The same value expressed in different syntactic form won't compare as equal when
-- any of the exponents is >= 100.
instance Ord FractionalLit where
  compare = compareFractionalLit

instance Outputable FractionalLit where
  ppr (fl@(FL {})) =
    pprWithSourceText (fl_text fl) $
      rational $ mkRationalWithExponentBase (fl_signi fl) (fl_exp fl) (fl_exp_base fl)

-- | A String Literal in the source, including its original raw format for use by
-- source to source manipulation tools.
data StringLiteral = StringLiteral
                       { sl_st :: SourceText, -- literal raw source.
                                              -- See Note [Literal source text]
                         sl_fs :: FastString, -- literal string value
                         sl_tc :: Maybe RealSrcSpan -- Location of
                                                    -- possible
                                                    -- trailing comma
                       -- AZ: if we could have a LocatedA
                       -- StringLiteral we would not need sl_tc, but
                       -- that would cause import loops.

                       -- AZ:2: sl_tc should be an EpaAnchor, to allow
                       -- editing and reprinting the AST. Need a more
                       -- robust solution.

                       } deriving Data

instance Eq StringLiteral where
  (StringLiteral _ a _) == (StringLiteral _ b _) = a == b

instance Outputable StringLiteral where
  ppr sl = pprWithSourceText (sl_st sl) (ftext $ sl_fs sl)

instance Binary StringLiteral where
  put_ bh (StringLiteral st fs _) = do
            put_ bh st
            put_ bh fs
  get bh = do
            st <- get bh
            fs <- get bh
            return (StringLiteral st fs Nothing)