diff options
Diffstat (limited to 'compiler/GHC/Parser/Annotation.hs')
-rw-r--r-- | compiler/GHC/Parser/Annotation.hs | 201 |
1 files changed, 82 insertions, 119 deletions
diff --git a/compiler/GHC/Parser/Annotation.hs b/compiler/GHC/Parser/Annotation.hs index 8dc12555a0..986ee8c197 100644 --- a/compiler/GHC/Parser/Annotation.hs +++ b/compiler/GHC/Parser/Annotation.hs @@ -28,8 +28,8 @@ module GHC.Parser.Annotation ( -- ** Annotations in 'GenLocated' LocatedA, LocatedL, LocatedC, LocatedN, LocatedAn, LocatedP, - SrcSpanAnnA, SrcSpanAnnL, SrcSpanAnnP, SrcSpanAnnC, SrcSpanAnnN, SrcSpanAnn'(..), - SrcAnn, + SrcSpanAnnA, SrcSpanAnnL, SrcSpanAnnP, SrcSpanAnnC, SrcSpanAnnN, + SrcSpanAnn'(..), SrcAnn, -- ** Annotation data types used in 'GenLocated' @@ -69,7 +69,8 @@ module GHC.Parser.Annotation ( combineSrcSpansA, addCLocA, addCLocAA, - -- ** Constructing 'GenLocated' annotation types when we do not care about annotations. + -- ** Constructing 'GenLocated' annotation types when we do not care + -- about annotations. noLocA, getLocA, noSrcSpanA, noAnnSrcSpan, @@ -184,11 +185,8 @@ https://gitlab.haskell.org/ghc/ghc/wikis/api-annotations -- | Exact print annotations exist so that tools can perform source to -- source conversions of Haskell code. They are used to keep track of --- the various syntactic keywords that are not captured in the --- existing AST. --- --- The annotations, together with original source comments are made available in --- the @'pm_parsed_source@ field of @'GHC.Driver.Env.HsParsedModule'@. +-- the various syntactic keywords that are not otherwise captured in the +-- AST. -- -- The wiki page describing this feature is -- https://gitlab.haskell.org/ghc/ghc/wikis/api-annotations @@ -311,49 +309,6 @@ data AnnKeywordId instance Outputable AnnKeywordId where ppr x = text (show x) --- --------------------------------------------------------------------- - -data EpaComment = - EpaComment - { ac_tok :: EpaCommentTok - , ac_prior_tok :: RealSrcSpan - -- ^ The location of the prior token, used in exact printing. The - -- 'EpaComment' appears as an 'LEpaComment' containing its - -- location. The difference between the end of the prior token - -- and the start of this location is used for the spacing when - -- exact printing the comment. - } - deriving (Eq, Ord, Data, Show) - -data EpaCommentTok = - -- Documentation annotations - EpaDocCommentNext String -- ^ something beginning '-- |' - | EpaDocCommentPrev String -- ^ something beginning '-- ^' - | EpaDocCommentNamed String -- ^ something beginning '-- $' - | EpaDocSection Int String -- ^ a section heading - | EpaDocOptions String -- ^ doc options (prune, ignore-exports, etc) - | EpaLineComment String -- ^ comment starting by "--" - | EpaBlockComment String -- ^ comment in {- -} - | EpaEofComment -- ^ empty comment, capturing - -- location of EOF - - -- See #19697 for a discussion of its use and how it should be - -- removed in favour of capturing it in the location for - -- 'Located HsModule' in the parser. - - deriving (Eq, Ord, Data, Show) --- Note: these are based on the Token versions, but the Token type is --- defined in GHC.Parser.Lexer and bringing it in here would create a loop - -instance Outputable EpaComment where - ppr x = text (show x) - --- | - 'GHC.Parser.Annotation.AnnKeywordId' : 'GHC.Parser.Annotation.AnnOpen', --- 'GHC.Parser.Annotation.AnnClose','GHC.Parser.Annotation.AnnComma', --- 'GHC.Parser.Annotation.AnnRarrow' --- 'GHC.Parser.Annotation.AnnTilde' --- - May have 'GHC.Parser.Annotation.AnnComma' when in a list - -- | Certain tokens can have alternate representations when unicode syntax is -- enabled. This flag is attached to those tokens in the lexer so that the -- original source representation can be reproduced in the corresponding @@ -391,6 +346,43 @@ data HasE = HasE | NoE -- --------------------------------------------------------------------- +data EpaComment = + EpaComment + { ac_tok :: EpaCommentTok + , ac_prior_tok :: RealSrcSpan + -- ^ The location of the prior token, used in exact printing. The + -- 'EpaComment' appears as an 'LEpaComment' containing its + -- location. The difference between the end of the prior token + -- and the start of this location is used for the spacing when + -- exact printing the comment. + } + deriving (Eq, Ord, Data, Show) + +data EpaCommentTok = + -- Documentation annotations + EpaDocCommentNext String -- ^ something beginning '-- |' + | EpaDocCommentPrev String -- ^ something beginning '-- ^' + | EpaDocCommentNamed String -- ^ something beginning '-- $' + | EpaDocSection Int String -- ^ a section heading + | EpaDocOptions String -- ^ doc options (prune, ignore-exports, etc) + | EpaLineComment String -- ^ comment starting by "--" + | EpaBlockComment String -- ^ comment in {- -} + | EpaEofComment -- ^ empty comment, capturing + -- location of EOF + + -- See #19697 for a discussion of EpaEofComment's use and how it + -- should be removed in favour of capturing it in the location for + -- 'Located HsModule' in the parser. + + deriving (Eq, Ord, Data, Show) +-- Note: these are based on the Token versions, but the Token type is +-- defined in GHC.Parser.Lexer and bringing it in here would create a loop + +instance Outputable EpaComment where + ppr x = text (show x) + +-- --------------------------------------------------------------------- + -- | Captures an annotation, storing the @'AnnKeywordId'@ and its -- location. The parser only ever inserts @'EpaLocation'@ fields with a -- RealSrcSpan being the original location of the annotation in the @@ -412,12 +404,16 @@ data EpaLocation = EpaSpan RealSrcSpan | EpaDelta DeltaPos deriving (Data,Show,Eq,Ord) --- | Relative position, line then column. If 'deltaLine' is zero then --- 'deltaColumn' gives the number of spaces between the end of the --- preceding output element and the start of the one this is attached --- to, on the same line. If 'deltaLine' is > 0, then it is the number --- of lines to advance, and 'deltaColumn' is the start column on the --- new line. +-- | Spacing between output items when exact printing. It captures +-- the spacing from the current print position on the page to the +-- position required for the thing about to be printed. This is +-- either on the same line in which case is is simply the number of +-- spaces to emit, or it is some number of lines down, with a given +-- column offset. The exact printing algorithm keeps track of the +-- column offset pertaining to the current anchor position, so the +-- `deltaColumn` is the additional spaces to add in this case. See +-- https://gitlab.haskell.org/ghc/ghc/wikis/api-annotations for +-- details. data DeltaPos = SameLine { deltaColumn :: !Int } | DifferentLine @@ -425,6 +421,8 @@ data DeltaPos deltaColumn :: !Int } deriving (Show,Eq,Ord,Data) +-- | Smart constructor for a 'DeltaPos'. It preserves the invariant +-- that for the 'DifferentLine' constructor 'deltaLine' is always > 0. deltaPos :: Int -> Int -> DeltaPos deltaPos l c = case l of 0 -> SameLine c @@ -450,40 +448,10 @@ instance Outputable AddEpAnn where -- --------------------------------------------------------------------- -{- -Note [In-tree Api annotations] -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - -GHC 7.10 brought in the concept of API Annotations, -https://gitlab.haskell.org/ghc/ghc/-/wikis/api-annotations: - - The hsSyn AST does not directly capture the locations of certain - keywords and punctuation, such as 'let', 'in', 'do', etc. - - These locations are required by any tools wanting to parse a haskell - file, transform the AST in some way, and then regenerate the - original layout for the unchaged parts." - -These were returned in a separate data structure, linked to the main -AST via a combination of SrcSpan and constructor name. - -This indirect linkage kept the AST uncluttered, but made working with -the annotations complex, as two separate data structures had to be -changed at the same time in a coherent way. - -From GHC 9.2.1, these annotations are captured directly in the AST, -using the types in this file, and the Trees That Grow (TTG) extension -points for GhcPs. - -See https://gitlab.haskell.org/ghc/ghc/wikis/api-annotations - -See Note [XRec and Anno in the AST] for details of how this is done. --} - --- | The API Annotations are now kept in the HsSyn AST for the GhcPs --- phase. We do not always have API Annotations though, only for --- parsed code. This type captures that, and allows the --- representation decision to be easily revisited as it evolves. +-- | The exact print annotations (EPAs) are kept in the HsSyn AST for +-- the GhcPs phase. We do not always have EPAs though, only for code +-- that has been parsed as they do not exist for generated +-- code. This type captures that they may be missing. -- -- A goal of the annotations is that an AST can be edited, including -- moving subtrees from one place to another, duplicating them, and so @@ -496,8 +464,8 @@ See Note [XRec and Anno in the AST] for details of how this is done. -- fragment are also captured here. -- -- The 'ann' type parameter allows this general structure to be --- specialised to the specific set of locations of original API --- Annotation elements. So for 'HsLet' we have +-- specialised to the specific set of locations of original exact +-- print annotation elements. So for 'HsLet' we have -- -- type instance XLet GhcPs = EpAnn AnnsLet -- data AnnsLet @@ -507,11 +475,12 @@ See Note [XRec and Anno in the AST] for details of how this is done. -- } deriving Data -- -- The spacing between the items under the scope of a given EpAnn is --- derived from the original 'Anchor'. But there is no requirement --- that the items included in the sub-element have a "matching" --- location in their relative anchors. This allows us to freely move --- elements around, and stitch together new AST fragments out of old --- ones, and have them still printed out in a reasonable way. +-- normally derived from the original 'Anchor'. But if a sub-element +-- is not in its original position, the required spacing can be +-- directly captured in the 'anchor_op' field of the 'entry' Anchor. +-- This allows us to freely move elements around, and stitch together +-- new AST fragments out of old ones, and have them still printed out +-- in a precise way. data EpAnn ann = EpAnn { entry :: Anchor -- ^ Base location for the start of the syntactic element @@ -528,8 +497,11 @@ data EpAnn ann -- | An 'Anchor' records the base location for the start of the -- syntactic element holding the annotations, and is used as the point -- of reference for calculating delta positions for contained --- annotations. If an AST element is moved or deleted, the original --- location is also tracked, for printing the source without gaps. +-- annotations. +-- It is also normally used as the reference point for the spacing of +-- the element relative to its container. If it is moved, that +-- relationship is tracked in the 'anchor_op' instead. + data Anchor = Anchor { anchor :: RealSrcSpan -- ^ Base location for the start of -- the syntactic element holding @@ -557,8 +529,8 @@ realSpanAsAnchor s = Anchor s UnchangedAnchor -- | When we are parsing we add comments that belong a particular AST -- element, and print them together with the element, interleaving --- them into the output stream. But when editin the AST, to move --- fragments around, it is useful to be able to first separate the +-- them into the output stream. But when editing the AST to move +-- fragments around it is useful to be able to first separate the -- comments into those occuring before the AST element and those -- following it. The 'EpaCommentsBalanced' constructor is used to do -- this. The GHC parser will only insert the 'EpaComments' form. @@ -587,14 +559,7 @@ data SrcSpanAnn' a = SrcSpanAnn { ann :: a, locA :: SrcSpan } -- | We mostly use 'SrcSpanAnn\'' with an 'EpAnn\'' type SrcAnn ann = SrcSpanAnn' (EpAnn ann) --- AZ: is SrcAnn the right abbreviation here? Any better suggestions? - --- AZ: should we rename LocatedA to LocatedL? The name comes from --- this being the most common usage, and hence being the default --- annotation. It also has a matching set if utility functions such as --- locA, noLocA, etc. LocatedL would then need a new name, but it is --- relatively rare, and captures a list having an openinc and closing --- adorment, such as parens, braces, etc. + type LocatedA = GenLocated SrcSpanAnnA type LocatedN = GenLocated SrcSpanAnnN @@ -617,7 +582,7 @@ type LocatedAn an = GenLocated (SrcAnn an) Note [XRec and Anno in the AST] ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -The API annotations are now captured directly inside the AST, using +The exact print annotations are captured directly inside the AST, using TTG extension points. However certain annotations need to be captured on the Located versions too. While there is a general form for these, captured in the type SrcSpanAnn', there are also specific usages in @@ -683,8 +648,6 @@ data AnnListItem -- keywords such as 'where'. data AnnList = AnnList { - -- TODO:AZ: should we distinguish AnnList variants for lists - -- with layout and without? al_anchor :: Maybe Anchor, -- ^ start point of a list having layout al_open :: Maybe AddEpAnn, al_close :: Maybe AddEpAnn, @@ -696,7 +659,7 @@ data AnnList -- Annotations for parenthesised elements, such as tuples, lists -- --------------------------------------------------------------------- --- | API Annotation for an item having surrounding "brackets", such as +-- | exact print annotation for an item having surrounding "brackets", such as -- tuples or lists data AnnParen = AnnParen { @@ -705,7 +668,7 @@ data AnnParen ap_close :: EpaLocation } deriving (Data) --- | Detail of the "brackets" used in an 'AnnParen' API Annotation. +-- | Detail of the "brackets" used in an 'AnnParen' exact print annotation. data ParenType = AnnParens -- ^ '(', ')' | AnnParensHash -- ^ '(#', '#)' @@ -721,7 +684,7 @@ parenTypeKws AnnParensSquare = (AnnOpenS, AnnCloseS) -- --------------------------------------------------------------------- --- | API Annotation for the 'Context' data type. +-- | Exact print annotation for the 'Context' data type. data AnnContext = AnnContext { ac_darrow :: Maybe (IsUnicodeSyntax, EpaLocation), @@ -735,7 +698,7 @@ data AnnContext -- Annotations for names -- --------------------------------------------------------------------- --- | API Annotations for a 'RdrName'. There are many kinds of +-- | exact print annotations for a 'RdrName'. There are many kinds of -- adornment that can be attached to a given 'RdrName'. This type -- captures them, as detailed on the individual constructors. data NameAnn @@ -793,8 +756,8 @@ data NameAdornment -- --------------------------------------------------------------------- --- | API Annotation used for capturing the locations of annotations in --- pragmas. +-- | exact print annotation used for capturing the locations of +-- annotations in pragmas. data AnnPragma = AnnPragma { apr_open :: AddEpAnn, |