summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorZubin Duggal <zubin.duggal@gmail.com>2021-10-12 18:25:41 +0530
committerMarge Bot <ben+marge-bot@smart-cactus.org>2021-10-26 12:36:24 -0400
commit0255ef38b1bb0d4f3608bf92ebc8a93955ccb30a (patch)
tree718653d95e2d43388f6e20f7fa2057aee1282baf
parent0f7541dc37d25d8a1056586bbeb57bf0dd2826a0 (diff)
downloadhaskell-0255ef38b1bb0d4f3608bf92ebc8a93955ccb30a.tar.gz
Warn if unicode bidirectional formatting characters are found in the source (#20263)
-rw-r--r--compiler/GHC/Data/StringBuffer.hs58
-rw-r--r--compiler/GHC/Driver/Flags.hs5
-rw-r--r--compiler/GHC/Driver/Main.hs37
-rw-r--r--compiler/GHC/Driver/Session.hs3
-rw-r--r--compiler/GHC/Parser/Errors/Ppr.hs17
-rw-r--r--compiler/GHC/Parser/Errors/Types.hs10
-rw-r--r--docs/users_guide/using-warnings.rst18
-rw-r--r--testsuite/tests/warnings/should_fail/T20263.hs2
-rw-r--r--testsuite/tests/warnings/should_fail/T20263.stderr8
-rw-r--r--testsuite/tests/warnings/should_fail/all.T1
10 files changed, 156 insertions, 3 deletions
diff --git a/compiler/GHC/Data/StringBuffer.hs b/compiler/GHC/Data/StringBuffer.hs
index 749f64b09e..03d720eb37 100644
--- a/compiler/GHC/Data/StringBuffer.hs
+++ b/compiler/GHC/Data/StringBuffer.hs
@@ -47,7 +47,11 @@ module GHC.Data.StringBuffer
-- * Parsing integers
parseUnsignedInteger,
- ) where
+
+ -- * Checking for bi-directional format characters
+ containsBidirectionalFormatChar,
+ bidirectionalFormatChars
+ ) where
import GHC.Prelude
@@ -214,6 +218,58 @@ nextChar (StringBuffer buf len (I# cur#)) =
let cur' = I# (cur# +# nBytes#) in
return (C# c#, StringBuffer buf len cur')
+
+bidirectionalFormatChars :: [(Char,String)]
+bidirectionalFormatChars =
+ [ ('\x202a' , "U+202A LEFT-TO-RIGHT EMBEDDING (LRE)")
+ , ('\x202b' , "U+202B RIGHT-TO-LEFT EMBEDDING (RLE)")
+ , ('\x202c' , "U+202C POP DIRECTIONAL FORMATTING (PDF)")
+ , ('\x202d' , "U+202D LEFT-TO-RIGHT OVERRIDE (LRO)")
+ , ('\x202e' , "U+202E RIGHT-TO-LEFT OVERRIDE (RLO)")
+ , ('\x2066' , "U+2066 LEFT-TO-RIGHT ISOLATE (LRI)")
+ , ('\x2067' , "U+2067 RIGHT-TO-LEFT ISOLATE (RLI)")
+ , ('\x2068' , "U+2068 FIRST STRONG ISOLATE (FSI)")
+ , ('\x2069' , "U+2069 POP DIRECTIONAL ISOLATE (PDI)")
+ ]
+
+{-| Returns true if the buffer contains Unicode bi-directional formatting
+characters.
+
+https://www.unicode.org/reports/tr9/#Bidirectional_Character_Types
+
+Bidirectional format characters are one of
+'\x202a' : "U+202A LEFT-TO-RIGHT EMBEDDING (LRE)"
+'\x202b' : "U+202B RIGHT-TO-LEFT EMBEDDING (RLE)"
+'\x202c' : "U+202C POP DIRECTIONAL FORMATTING (PDF)"
+'\x202d' : "U+202D LEFT-TO-RIGHT OVERRIDE (LRO)"
+'\x202e' : "U+202E RIGHT-TO-LEFT OVERRIDE (RLO)"
+'\x2066' : "U+2066 LEFT-TO-RIGHT ISOLATE (LRI)"
+'\x2067' : "U+2067 RIGHT-TO-LEFT ISOLATE (RLI)"
+'\x2068' : "U+2068 FIRST STRONG ISOLATE (FSI)"
+'\x2069' : "U+2069 POP DIRECTIONAL ISOLATE (PDI)"
+
+This list is encoded in 'bidirectionalFormatChars'
+
+-}
+{-# INLINE containsBidirectionalFormatChar #-}
+containsBidirectionalFormatChar :: StringBuffer -> Bool
+containsBidirectionalFormatChar (StringBuffer buf (I# len#) (I# cur#))
+ = inlinePerformIO $ unsafeWithForeignPtr buf $ \(Ptr a#) -> do
+ let go :: Int# -> Bool
+ go i | isTrue# (i >=# len#) = False
+ | otherwise = case utf8DecodeCharAddr# a# i of
+ (# '\x202a'# , _ #) -> True
+ (# '\x202b'# , _ #) -> True
+ (# '\x202c'# , _ #) -> True
+ (# '\x202d'# , _ #) -> True
+ (# '\x202e'# , _ #) -> True
+ (# '\x2066'# , _ #) -> True
+ (# '\x2067'# , _ #) -> True
+ (# '\x2068'# , _ #) -> True
+ (# '\x2069'# , _ #) -> True
+ (# _, bytes #) -> go (i +# bytes)
+ pure $! go cur#
+
-- | Return the first UTF-8 character of a nonempty 'StringBuffer' (analogous
-- to 'Data.List.head'). __Warning:__ The behavior is undefined if the
-- 'StringBuffer' is empty. Similar to 'utf8DecodeChar', if the character
diff --git a/compiler/GHC/Driver/Flags.hs b/compiler/GHC/Driver/Flags.hs
index 05733c88e4..d7f72fcf2e 100644
--- a/compiler/GHC/Driver/Flags.hs
+++ b/compiler/GHC/Driver/Flags.hs
@@ -536,6 +536,7 @@ data WarningFlag =
| Opt_WarnMissingKindSignatures -- Since 9.2
| Opt_WarnMissingExportedPatternSynonymSignatures -- since 9.2
| Opt_WarnRedundantStrictnessFlags -- Since 9.4
+ | Opt_WarnUnicodeBidirectionalFormatCharacters -- Since 9.0.2
deriving (Eq, Ord, Show, Enum)
-- | Return the names of a WarningFlag
@@ -635,6 +636,7 @@ warnFlagNames wflag = case wflag of
Opt_WarnOperatorWhitespace -> "operator-whitespace" :| []
Opt_WarnImplicitLift -> "implicit-lift" :| []
Opt_WarnMissingExportedPatternSynonymSignatures -> "missing-exported-pattern-synonym-signatures" :| []
+ Opt_WarnUnicodeBidirectionalFormatCharacters -> "unicode-bidirectional-format-characters" :| []
-- -----------------------------------------------------------------------------
-- Standard sets of warning options
@@ -725,7 +727,8 @@ standardWarnings -- see Note [Documenting warning flags]
Opt_WarnSpaceAfterBang,
Opt_WarnNonCanonicalMonadInstances,
Opt_WarnNonCanonicalMonoidInstances,
- Opt_WarnOperatorWhitespaceExtConflict
+ Opt_WarnOperatorWhitespaceExtConflict,
+ Opt_WarnUnicodeBidirectionalFormatCharacters
]
-- | Things you get with -W
diff --git a/compiler/GHC/Driver/Main.hs b/compiler/GHC/Driver/Main.hs
index 80d50a4589..26647df369 100644
--- a/compiler/GHC/Driver/Main.hs
+++ b/compiler/GHC/Driver/Main.hs
@@ -239,6 +239,7 @@ import GHC.Data.Maybe
import GHC.Driver.Env.KnotVars
import GHC.Types.Name.Set (NonCaffySet)
import GHC.Driver.GenerateCgIPEStub (generateCgIPEStub)
+import Data.List.NonEmpty (NonEmpty ((:|)))
{- **********************************************************************
@@ -411,6 +412,17 @@ hscParse' mod_summary
Nothing -> liftIO $ hGetStringBuffer src_filename
let loc = mkRealSrcLoc (mkFastString src_filename) 1 1
+
+ let diag_opts = initDiagOpts dflags
+ when (wopt Opt_WarnUnicodeBidirectionalFormatCharacters dflags) $ do
+ case checkBidirectionFormatChars (PsLoc loc (BufPos 0)) buf of
+ Nothing -> pure ()
+ Just chars@((eloc,chr,_) :| _) ->
+ let span = mkSrcSpanPs $ mkPsSpan eloc (advancePsLoc eloc chr)
+ in logDiagnostics $ singleMessage $
+ mkPlainMsgEnvelope diag_opts span $
+ GhcPsMessage $ PsWarnBidirectionalFormatChars chars
+
let parseMod | HsigFile == ms_hsc_src mod_summary
= parseSignature
| otherwise = parseModule
@@ -469,9 +481,34 @@ hscParse' mod_summary
hsc_env <- getHscEnv
withPlugins hsc_env applyPluginAction res
+checkBidirectionFormatChars :: PsLoc -> StringBuffer -> Maybe (NonEmpty (PsLoc, Char, String))
+checkBidirectionFormatChars start_loc sb
+ | containsBidirectionalFormatChar sb = Just $ go start_loc sb
+ | otherwise = Nothing
+ where
+ go :: PsLoc -> StringBuffer -> NonEmpty (PsLoc, Char, String)
+ go loc sb
+ | atEnd sb = panic "checkBidirectionFormatChars: no char found"
+ | otherwise = case nextChar sb of
+ (chr, sb)
+ | Just desc <- lookup chr bidirectionalFormatChars ->
+ (loc, chr, desc) :| go1 (advancePsLoc loc chr) sb
+ | otherwise -> go (advancePsLoc loc chr) sb
+
+ go1 :: PsLoc -> StringBuffer -> [(PsLoc, Char, String)]
+ go1 loc sb
+ | atEnd sb = []
+ | otherwise = case nextChar sb of
+ (chr, sb)
+ | Just desc <- lookup chr bidirectionalFormatChars ->
+ (loc, chr, desc) : go1 (advancePsLoc loc chr) sb
+ | otherwise -> go1 (advancePsLoc loc chr) sb
+
-- -----------------------------------------------------------------------------
-- | If the renamed source has been kept, extract it. Dump it if requested.
+
+
extract_renamed_stuff :: ModSummary -> TcGblEnv -> Hsc RenamedStuff
extract_renamed_stuff mod_summary tc_result = do
let rn_info = getRenamedStuff tc_result
diff --git a/compiler/GHC/Driver/Session.hs b/compiler/GHC/Driver/Session.hs
index 478a0f7737..67fa5c0103 100644
--- a/compiler/GHC/Driver/Session.hs
+++ b/compiler/GHC/Driver/Session.hs
@@ -3209,7 +3209,8 @@ wWarningFlagsDeps = mconcat [
warnSpec Opt_WarnOperatorWhitespaceExtConflict,
warnSpec Opt_WarnOperatorWhitespace,
warnSpec Opt_WarnImplicitLift,
- warnSpec Opt_WarnMissingExportedPatternSynonymSignatures
+ warnSpec Opt_WarnMissingExportedPatternSynonymSignatures,
+ warnSpec Opt_WarnUnicodeBidirectionalFormatCharacters
]
-- | These @-\<blah\>@ flags can all be reversed with @-no-\<blah\>@
diff --git a/compiler/GHC/Parser/Errors/Ppr.hs b/compiler/GHC/Parser/Errors/Ppr.hs
index 9396961cab..138a24ccd5 100644
--- a/compiler/GHC/Parser/Errors/Ppr.hs
+++ b/compiler/GHC/Parser/Errors/Ppr.hs
@@ -28,6 +28,7 @@ import GHC.Hs.Type (pprLHsContext)
import GHC.Builtin.Names (allNameStrings)
import GHC.Builtin.Types (filterCTuple)
import qualified GHC.LanguageExtensions as LangExt
+import Data.List.NonEmpty (NonEmpty((:|)))
instance Diagnostic PsMessage where
@@ -44,6 +45,20 @@ instance Diagnostic PsMessage where
-> mkSimpleDecorated $
text "Multiple Haddock comments for a single entity are not allowed." $$
text "The extraneous comment will be ignored."
+ PsWarnBidirectionalFormatChars ((loc,_,desc) :| xs)
+ -> mkSimpleDecorated $
+ text "A unicode bidirectional formatting character" <+> parens (text desc)
+ $$ text "was found at offset" <+> ppr (bufPos (psBufPos loc)) <+> text "in the file"
+ $$ (case xs of
+ [] -> empty
+ xs -> text "along with further bidirectional formatting characters at" <+> pprChars xs
+ where
+ pprChars [] = empty
+ pprChars ((loc,_,desc):xs) = text "offset" <+> ppr (bufPos (psBufPos loc)) <> text ":" <+> text desc
+ $$ pprChars xs
+ )
+ $$ text "Bidirectional formatting characters may be rendered misleadingly in certain editors"
+
PsWarnTab tc
-> mkSimpleDecorated $
text "Tab character found here"
@@ -474,6 +489,7 @@ instance Diagnostic PsMessage where
diagnosticReason = \case
PsUnknownMessage m -> diagnosticReason m
PsHeaderMessage m -> psHeaderMessageReason m
+ PsWarnBidirectionalFormatChars{} -> WarningWithFlag Opt_WarnUnicodeBidirectionalFormatCharacters
PsWarnTab{} -> WarningWithFlag Opt_WarnTabs
PsWarnTransitionalLayout{} -> WarningWithFlag Opt_WarnAlternativeLayoutRuleTransitional
PsWarnOperatorWhitespaceExtConflict{} -> WarningWithFlag Opt_WarnOperatorWhitespaceExtConflict
@@ -586,6 +602,7 @@ instance Diagnostic PsMessage where
diagnosticHints = \case
PsUnknownMessage m -> diagnosticHints m
PsHeaderMessage m -> psHeaderMessageHints m
+ PsWarnBidirectionalFormatChars{} -> noHints
PsWarnTab{} -> [SuggestUseSpaces]
PsWarnTransitionalLayout{} -> noHints
PsWarnOperatorWhitespaceExtConflict sym -> [SuggestUseWhitespaceAfter sym]
diff --git a/compiler/GHC/Parser/Errors/Types.hs b/compiler/GHC/Parser/Errors/Types.hs
index 181f793741..d39048c441 100644
--- a/compiler/GHC/Parser/Errors/Types.hs
+++ b/compiler/GHC/Parser/Errors/Types.hs
@@ -16,6 +16,8 @@ import GHC.Types.Name.Occurrence (OccName)
import GHC.Types.Name.Reader
import GHC.Unit.Module.Name
import GHC.Utils.Outputable
+import Data.List.NonEmpty (NonEmpty)
+import GHC.Types.SrcLoc (PsLoc)
-- The type aliases below are useful to make some type signatures a bit more
-- descriptive, like 'handleWarningsThrowErrors' in 'GHC.Driver.Main'.
@@ -72,6 +74,14 @@ data PsMessage
-}
| PsHeaderMessage !PsHeaderMessage
+ {-| PsWarnBidirectionalFormatChars is a warning (controlled by the -Wwarn-bidirectional-format-characters flag)
+ that occurs when unicode bi-directional format characters are found within in a file
+
+ The 'PsLoc' contains the exact position in the buffer the character occured, and the
+ string contains a description of the character.
+ -}
+ | PsWarnBidirectionalFormatChars (NonEmpty (PsLoc, Char, String))
+
{-| PsWarnTab is a warning (controlled by the -Wwarn-tabs flag) that occurs
when tabulations (tabs) are found within a file.
diff --git a/docs/users_guide/using-warnings.rst b/docs/users_guide/using-warnings.rst
index 4cae76ec5a..8cdd5677fe 100644
--- a/docs/users_guide/using-warnings.rst
+++ b/docs/users_guide/using-warnings.rst
@@ -55,6 +55,7 @@ To reverse ``-Werror``, which makes all warnings into errors, use ``-Wwarn``.
* :ghc-flag:`-Wstar-binder`
* :ghc-flag:`-Woperator-whitespace-ext-conflict`
* :ghc-flag:`-Wambiguous-fields`
+ * :ghc-flag:`-Wunicode-bidirectional-format-characters`
The following flags are simple ways to select standard "packages" of warnings:
@@ -2169,6 +2170,23 @@ of ``-W(no-)*``.
This warning has no effect when :extension:`DuplicateRecordFields` is
disabled.
+.. ghc-flag:: -Wunicode-bidirectional-format-characters
+ :shortdesc: warn about the usage of unicode bidirectional layout override characters
+ :type: dynamic
+ :category:
+
+ Explicit unicode bidirectional formatting characters can cause source code
+ to be rendered misleadingly in many viewers. We warn if any such character
+ is present in the source.
+
+ Specifically, the characters disallowed by this warning
+ are those which are a part of the 'Explicit Formatting`
+ category of the `Unicode Bidirectional Character Type Listing
+ <https://www.unicode.org/reports/tr9/#Bidirectional_Character_Types>`_
+
+ :since: 9.0.2
+
+
If you're feeling really paranoid, the :ghc-flag:`-dcore-lint` option is a good choice.
It turns on heavyweight intra-pass sanity-checking within GHC. (It checks GHC's
sanity, not yours.)
diff --git a/testsuite/tests/warnings/should_fail/T20263.hs b/testsuite/tests/warnings/should_fail/T20263.hs
new file mode 100644
index 0000000000..c04928e9a1
--- /dev/null
+++ b/testsuite/tests/warnings/should_fail/T20263.hs
@@ -0,0 +1,2 @@
+main :: IO ()
+main = print {- ⁧⁦"Hello" ⁩⁦-} "Goodbye"
diff --git a/testsuite/tests/warnings/should_fail/T20263.stderr b/testsuite/tests/warnings/should_fail/T20263.stderr
new file mode 100644
index 0000000000..5d7b260959
--- /dev/null
+++ b/testsuite/tests/warnings/should_fail/T20263.stderr
@@ -0,0 +1,8 @@
+
+T20263.hs:2:17: error: [-Wunicode-bidirectional-format-characters (in -Wdefault), -Werror=unicode-bidirectional-format-characters]
+ A unicode bidirectional formatting character (U+2067 RIGHT-TO-LEFT ISOLATE (RLI))
+ was found at offset 30 in the file
+ along with further bidirectional formatting characters at offset 31: U+2066 LEFT-TO-RIGHT ISOLATE (LRI)
+ offset 40: U+2069 POP DIRECTIONAL ISOLATE (PDI)
+ offset 41: U+2066 LEFT-TO-RIGHT ISOLATE (LRI)
+ Bidirectional formatting characters may be rendered misleadingly in certain editors
diff --git a/testsuite/tests/warnings/should_fail/all.T b/testsuite/tests/warnings/should_fail/all.T
index 075c790222..6eaf7af6fd 100644
--- a/testsuite/tests/warnings/should_fail/all.T
+++ b/testsuite/tests/warnings/should_fail/all.T
@@ -16,3 +16,4 @@ test('CaretDiagnostics1',
['-fdiagnostics-show-caret -ferror-spans'])
test('CaretDiagnostics2', normal, compile_fail, ['-fdiagnostics-show-caret'])
test('Colour', normal, compile_fail, ['-fdiagnostics-color=always'])
+test('T20263', normal, compile_fail, ['-Wunicode-bidirectional-format-characters -Werror'])