diff options
author | Zubin Duggal <zubin.duggal@gmail.com> | 2021-10-12 18:25:41 +0530 |
---|---|---|
committer | Marge Bot <ben+marge-bot@smart-cactus.org> | 2021-10-26 12:36:24 -0400 |
commit | 0255ef38b1bb0d4f3608bf92ebc8a93955ccb30a (patch) | |
tree | 718653d95e2d43388f6e20f7fa2057aee1282baf | |
parent | 0f7541dc37d25d8a1056586bbeb57bf0dd2826a0 (diff) | |
download | haskell-0255ef38b1bb0d4f3608bf92ebc8a93955ccb30a.tar.gz |
Warn if unicode bidirectional formatting characters are found in the source (#20263)
-rw-r--r-- | compiler/GHC/Data/StringBuffer.hs | 58 | ||||
-rw-r--r-- | compiler/GHC/Driver/Flags.hs | 5 | ||||
-rw-r--r-- | compiler/GHC/Driver/Main.hs | 37 | ||||
-rw-r--r-- | compiler/GHC/Driver/Session.hs | 3 | ||||
-rw-r--r-- | compiler/GHC/Parser/Errors/Ppr.hs | 17 | ||||
-rw-r--r-- | compiler/GHC/Parser/Errors/Types.hs | 10 | ||||
-rw-r--r-- | docs/users_guide/using-warnings.rst | 18 | ||||
-rw-r--r-- | testsuite/tests/warnings/should_fail/T20263.hs | 2 | ||||
-rw-r--r-- | testsuite/tests/warnings/should_fail/T20263.stderr | 8 | ||||
-rw-r--r-- | testsuite/tests/warnings/should_fail/all.T | 1 |
10 files changed, 156 insertions, 3 deletions
diff --git a/compiler/GHC/Data/StringBuffer.hs b/compiler/GHC/Data/StringBuffer.hs index 749f64b09e..03d720eb37 100644 --- a/compiler/GHC/Data/StringBuffer.hs +++ b/compiler/GHC/Data/StringBuffer.hs @@ -47,7 +47,11 @@ module GHC.Data.StringBuffer -- * Parsing integers parseUnsignedInteger, - ) where + + -- * Checking for bi-directional format characters + containsBidirectionalFormatChar, + bidirectionalFormatChars + ) where import GHC.Prelude @@ -214,6 +218,58 @@ nextChar (StringBuffer buf len (I# cur#)) = let cur' = I# (cur# +# nBytes#) in return (C# c#, StringBuffer buf len cur') + +bidirectionalFormatChars :: [(Char,String)] +bidirectionalFormatChars = + [ ('\x202a' , "U+202A LEFT-TO-RIGHT EMBEDDING (LRE)") + , ('\x202b' , "U+202B RIGHT-TO-LEFT EMBEDDING (RLE)") + , ('\x202c' , "U+202C POP DIRECTIONAL FORMATTING (PDF)") + , ('\x202d' , "U+202D LEFT-TO-RIGHT OVERRIDE (LRO)") + , ('\x202e' , "U+202E RIGHT-TO-LEFT OVERRIDE (RLO)") + , ('\x2066' , "U+2066 LEFT-TO-RIGHT ISOLATE (LRI)") + , ('\x2067' , "U+2067 RIGHT-TO-LEFT ISOLATE (RLI)") + , ('\x2068' , "U+2068 FIRST STRONG ISOLATE (FSI)") + , ('\x2069' , "U+2069 POP DIRECTIONAL ISOLATE (PDI)") + ] + +{-| Returns true if the buffer contains Unicode bi-directional formatting +characters. + +https://www.unicode.org/reports/tr9/#Bidirectional_Character_Types + +Bidirectional format characters are one of +'\x202a' : "U+202A LEFT-TO-RIGHT EMBEDDING (LRE)" +'\x202b' : "U+202B RIGHT-TO-LEFT EMBEDDING (RLE)" +'\x202c' : "U+202C POP DIRECTIONAL FORMATTING (PDF)" +'\x202d' : "U+202D LEFT-TO-RIGHT OVERRIDE (LRO)" +'\x202e' : "U+202E RIGHT-TO-LEFT OVERRIDE (RLO)" +'\x2066' : "U+2066 LEFT-TO-RIGHT ISOLATE (LRI)" +'\x2067' : "U+2067 RIGHT-TO-LEFT ISOLATE (RLI)" +'\x2068' : "U+2068 FIRST STRONG ISOLATE (FSI)" +'\x2069' : "U+2069 POP DIRECTIONAL ISOLATE (PDI)" + +This list is encoded in 'bidirectionalFormatChars' + +-} +{-# INLINE containsBidirectionalFormatChar #-} +containsBidirectionalFormatChar :: StringBuffer -> Bool +containsBidirectionalFormatChar (StringBuffer buf (I# len#) (I# cur#)) + = inlinePerformIO $ unsafeWithForeignPtr buf $ \(Ptr a#) -> do + let go :: Int# -> Bool + go i | isTrue# (i >=# len#) = False + | otherwise = case utf8DecodeCharAddr# a# i of + (# '\x202a'# , _ #) -> True + (# '\x202b'# , _ #) -> True + (# '\x202c'# , _ #) -> True + (# '\x202d'# , _ #) -> True + (# '\x202e'# , _ #) -> True + (# '\x2066'# , _ #) -> True + (# '\x2067'# , _ #) -> True + (# '\x2068'# , _ #) -> True + (# '\x2069'# , _ #) -> True + (# _, bytes #) -> go (i +# bytes) + pure $! go cur# + -- | Return the first UTF-8 character of a nonempty 'StringBuffer' (analogous -- to 'Data.List.head'). __Warning:__ The behavior is undefined if the -- 'StringBuffer' is empty. Similar to 'utf8DecodeChar', if the character diff --git a/compiler/GHC/Driver/Flags.hs b/compiler/GHC/Driver/Flags.hs index 05733c88e4..d7f72fcf2e 100644 --- a/compiler/GHC/Driver/Flags.hs +++ b/compiler/GHC/Driver/Flags.hs @@ -536,6 +536,7 @@ data WarningFlag = | Opt_WarnMissingKindSignatures -- Since 9.2 | Opt_WarnMissingExportedPatternSynonymSignatures -- since 9.2 | Opt_WarnRedundantStrictnessFlags -- Since 9.4 + | Opt_WarnUnicodeBidirectionalFormatCharacters -- Since 9.0.2 deriving (Eq, Ord, Show, Enum) -- | Return the names of a WarningFlag @@ -635,6 +636,7 @@ warnFlagNames wflag = case wflag of Opt_WarnOperatorWhitespace -> "operator-whitespace" :| [] Opt_WarnImplicitLift -> "implicit-lift" :| [] Opt_WarnMissingExportedPatternSynonymSignatures -> "missing-exported-pattern-synonym-signatures" :| [] + Opt_WarnUnicodeBidirectionalFormatCharacters -> "unicode-bidirectional-format-characters" :| [] -- ----------------------------------------------------------------------------- -- Standard sets of warning options @@ -725,7 +727,8 @@ standardWarnings -- see Note [Documenting warning flags] Opt_WarnSpaceAfterBang, Opt_WarnNonCanonicalMonadInstances, Opt_WarnNonCanonicalMonoidInstances, - Opt_WarnOperatorWhitespaceExtConflict + Opt_WarnOperatorWhitespaceExtConflict, + Opt_WarnUnicodeBidirectionalFormatCharacters ] -- | Things you get with -W diff --git a/compiler/GHC/Driver/Main.hs b/compiler/GHC/Driver/Main.hs index 80d50a4589..26647df369 100644 --- a/compiler/GHC/Driver/Main.hs +++ b/compiler/GHC/Driver/Main.hs @@ -239,6 +239,7 @@ import GHC.Data.Maybe import GHC.Driver.Env.KnotVars import GHC.Types.Name.Set (NonCaffySet) import GHC.Driver.GenerateCgIPEStub (generateCgIPEStub) +import Data.List.NonEmpty (NonEmpty ((:|))) {- ********************************************************************** @@ -411,6 +412,17 @@ hscParse' mod_summary Nothing -> liftIO $ hGetStringBuffer src_filename let loc = mkRealSrcLoc (mkFastString src_filename) 1 1 + + let diag_opts = initDiagOpts dflags + when (wopt Opt_WarnUnicodeBidirectionalFormatCharacters dflags) $ do + case checkBidirectionFormatChars (PsLoc loc (BufPos 0)) buf of + Nothing -> pure () + Just chars@((eloc,chr,_) :| _) -> + let span = mkSrcSpanPs $ mkPsSpan eloc (advancePsLoc eloc chr) + in logDiagnostics $ singleMessage $ + mkPlainMsgEnvelope diag_opts span $ + GhcPsMessage $ PsWarnBidirectionalFormatChars chars + let parseMod | HsigFile == ms_hsc_src mod_summary = parseSignature | otherwise = parseModule @@ -469,9 +481,34 @@ hscParse' mod_summary hsc_env <- getHscEnv withPlugins hsc_env applyPluginAction res +checkBidirectionFormatChars :: PsLoc -> StringBuffer -> Maybe (NonEmpty (PsLoc, Char, String)) +checkBidirectionFormatChars start_loc sb + | containsBidirectionalFormatChar sb = Just $ go start_loc sb + | otherwise = Nothing + where + go :: PsLoc -> StringBuffer -> NonEmpty (PsLoc, Char, String) + go loc sb + | atEnd sb = panic "checkBidirectionFormatChars: no char found" + | otherwise = case nextChar sb of + (chr, sb) + | Just desc <- lookup chr bidirectionalFormatChars -> + (loc, chr, desc) :| go1 (advancePsLoc loc chr) sb + | otherwise -> go (advancePsLoc loc chr) sb + + go1 :: PsLoc -> StringBuffer -> [(PsLoc, Char, String)] + go1 loc sb + | atEnd sb = [] + | otherwise = case nextChar sb of + (chr, sb) + | Just desc <- lookup chr bidirectionalFormatChars -> + (loc, chr, desc) : go1 (advancePsLoc loc chr) sb + | otherwise -> go1 (advancePsLoc loc chr) sb + -- ----------------------------------------------------------------------------- -- | If the renamed source has been kept, extract it. Dump it if requested. + + extract_renamed_stuff :: ModSummary -> TcGblEnv -> Hsc RenamedStuff extract_renamed_stuff mod_summary tc_result = do let rn_info = getRenamedStuff tc_result diff --git a/compiler/GHC/Driver/Session.hs b/compiler/GHC/Driver/Session.hs index 478a0f7737..67fa5c0103 100644 --- a/compiler/GHC/Driver/Session.hs +++ b/compiler/GHC/Driver/Session.hs @@ -3209,7 +3209,8 @@ wWarningFlagsDeps = mconcat [ warnSpec Opt_WarnOperatorWhitespaceExtConflict, warnSpec Opt_WarnOperatorWhitespace, warnSpec Opt_WarnImplicitLift, - warnSpec Opt_WarnMissingExportedPatternSynonymSignatures + warnSpec Opt_WarnMissingExportedPatternSynonymSignatures, + warnSpec Opt_WarnUnicodeBidirectionalFormatCharacters ] -- | These @-\<blah\>@ flags can all be reversed with @-no-\<blah\>@ diff --git a/compiler/GHC/Parser/Errors/Ppr.hs b/compiler/GHC/Parser/Errors/Ppr.hs index 9396961cab..138a24ccd5 100644 --- a/compiler/GHC/Parser/Errors/Ppr.hs +++ b/compiler/GHC/Parser/Errors/Ppr.hs @@ -28,6 +28,7 @@ import GHC.Hs.Type (pprLHsContext) import GHC.Builtin.Names (allNameStrings) import GHC.Builtin.Types (filterCTuple) import qualified GHC.LanguageExtensions as LangExt +import Data.List.NonEmpty (NonEmpty((:|))) instance Diagnostic PsMessage where @@ -44,6 +45,20 @@ instance Diagnostic PsMessage where -> mkSimpleDecorated $ text "Multiple Haddock comments for a single entity are not allowed." $$ text "The extraneous comment will be ignored." + PsWarnBidirectionalFormatChars ((loc,_,desc) :| xs) + -> mkSimpleDecorated $ + text "A unicode bidirectional formatting character" <+> parens (text desc) + $$ text "was found at offset" <+> ppr (bufPos (psBufPos loc)) <+> text "in the file" + $$ (case xs of + [] -> empty + xs -> text "along with further bidirectional formatting characters at" <+> pprChars xs + where + pprChars [] = empty + pprChars ((loc,_,desc):xs) = text "offset" <+> ppr (bufPos (psBufPos loc)) <> text ":" <+> text desc + $$ pprChars xs + ) + $$ text "Bidirectional formatting characters may be rendered misleadingly in certain editors" + PsWarnTab tc -> mkSimpleDecorated $ text "Tab character found here" @@ -474,6 +489,7 @@ instance Diagnostic PsMessage where diagnosticReason = \case PsUnknownMessage m -> diagnosticReason m PsHeaderMessage m -> psHeaderMessageReason m + PsWarnBidirectionalFormatChars{} -> WarningWithFlag Opt_WarnUnicodeBidirectionalFormatCharacters PsWarnTab{} -> WarningWithFlag Opt_WarnTabs PsWarnTransitionalLayout{} -> WarningWithFlag Opt_WarnAlternativeLayoutRuleTransitional PsWarnOperatorWhitespaceExtConflict{} -> WarningWithFlag Opt_WarnOperatorWhitespaceExtConflict @@ -586,6 +602,7 @@ instance Diagnostic PsMessage where diagnosticHints = \case PsUnknownMessage m -> diagnosticHints m PsHeaderMessage m -> psHeaderMessageHints m + PsWarnBidirectionalFormatChars{} -> noHints PsWarnTab{} -> [SuggestUseSpaces] PsWarnTransitionalLayout{} -> noHints PsWarnOperatorWhitespaceExtConflict sym -> [SuggestUseWhitespaceAfter sym] diff --git a/compiler/GHC/Parser/Errors/Types.hs b/compiler/GHC/Parser/Errors/Types.hs index 181f793741..d39048c441 100644 --- a/compiler/GHC/Parser/Errors/Types.hs +++ b/compiler/GHC/Parser/Errors/Types.hs @@ -16,6 +16,8 @@ import GHC.Types.Name.Occurrence (OccName) import GHC.Types.Name.Reader import GHC.Unit.Module.Name import GHC.Utils.Outputable +import Data.List.NonEmpty (NonEmpty) +import GHC.Types.SrcLoc (PsLoc) -- The type aliases below are useful to make some type signatures a bit more -- descriptive, like 'handleWarningsThrowErrors' in 'GHC.Driver.Main'. @@ -72,6 +74,14 @@ data PsMessage -} | PsHeaderMessage !PsHeaderMessage + {-| PsWarnBidirectionalFormatChars is a warning (controlled by the -Wwarn-bidirectional-format-characters flag) + that occurs when unicode bi-directional format characters are found within in a file + + The 'PsLoc' contains the exact position in the buffer the character occured, and the + string contains a description of the character. + -} + | PsWarnBidirectionalFormatChars (NonEmpty (PsLoc, Char, String)) + {-| PsWarnTab is a warning (controlled by the -Wwarn-tabs flag) that occurs when tabulations (tabs) are found within a file. diff --git a/docs/users_guide/using-warnings.rst b/docs/users_guide/using-warnings.rst index 4cae76ec5a..8cdd5677fe 100644 --- a/docs/users_guide/using-warnings.rst +++ b/docs/users_guide/using-warnings.rst @@ -55,6 +55,7 @@ To reverse ``-Werror``, which makes all warnings into errors, use ``-Wwarn``. * :ghc-flag:`-Wstar-binder` * :ghc-flag:`-Woperator-whitespace-ext-conflict` * :ghc-flag:`-Wambiguous-fields` + * :ghc-flag:`-Wunicode-bidirectional-format-characters` The following flags are simple ways to select standard "packages" of warnings: @@ -2169,6 +2170,23 @@ of ``-W(no-)*``. This warning has no effect when :extension:`DuplicateRecordFields` is disabled. +.. ghc-flag:: -Wunicode-bidirectional-format-characters + :shortdesc: warn about the usage of unicode bidirectional layout override characters + :type: dynamic + :category: + + Explicit unicode bidirectional formatting characters can cause source code + to be rendered misleadingly in many viewers. We warn if any such character + is present in the source. + + Specifically, the characters disallowed by this warning + are those which are a part of the 'Explicit Formatting` + category of the `Unicode Bidirectional Character Type Listing + <https://www.unicode.org/reports/tr9/#Bidirectional_Character_Types>`_ + + :since: 9.0.2 + + If you're feeling really paranoid, the :ghc-flag:`-dcore-lint` option is a good choice. It turns on heavyweight intra-pass sanity-checking within GHC. (It checks GHC's sanity, not yours.) diff --git a/testsuite/tests/warnings/should_fail/T20263.hs b/testsuite/tests/warnings/should_fail/T20263.hs new file mode 100644 index 0000000000..c04928e9a1 --- /dev/null +++ b/testsuite/tests/warnings/should_fail/T20263.hs @@ -0,0 +1,2 @@ +main :: IO () +main = print {- "Hello" -} "Goodbye" diff --git a/testsuite/tests/warnings/should_fail/T20263.stderr b/testsuite/tests/warnings/should_fail/T20263.stderr new file mode 100644 index 0000000000..5d7b260959 --- /dev/null +++ b/testsuite/tests/warnings/should_fail/T20263.stderr @@ -0,0 +1,8 @@ + +T20263.hs:2:17: error: [-Wunicode-bidirectional-format-characters (in -Wdefault), -Werror=unicode-bidirectional-format-characters] + A unicode bidirectional formatting character (U+2067 RIGHT-TO-LEFT ISOLATE (RLI)) + was found at offset 30 in the file + along with further bidirectional formatting characters at offset 31: U+2066 LEFT-TO-RIGHT ISOLATE (LRI) + offset 40: U+2069 POP DIRECTIONAL ISOLATE (PDI) + offset 41: U+2066 LEFT-TO-RIGHT ISOLATE (LRI) + Bidirectional formatting characters may be rendered misleadingly in certain editors diff --git a/testsuite/tests/warnings/should_fail/all.T b/testsuite/tests/warnings/should_fail/all.T index 075c790222..6eaf7af6fd 100644 --- a/testsuite/tests/warnings/should_fail/all.T +++ b/testsuite/tests/warnings/should_fail/all.T @@ -16,3 +16,4 @@ test('CaretDiagnostics1', ['-fdiagnostics-show-caret -ferror-spans']) test('CaretDiagnostics2', normal, compile_fail, ['-fdiagnostics-show-caret']) test('Colour', normal, compile_fail, ['-fdiagnostics-color=always']) +test('T20263', normal, compile_fail, ['-Wunicode-bidirectional-format-characters -Werror']) |