diff options
author | Matthew Pickering <matthewtpickering@gmail.com> | 2022-02-18 17:27:32 +0100 |
---|---|---|
committer | Marge Bot <ben+marge-bot@smart-cactus.org> | 2022-02-24 20:25:06 -0500 |
commit | 6555b68ca0678827b89c5624db071f5a485d18b7 (patch) | |
tree | 9dbcd231add48a179d7751606523865029d2fc1a /linters | |
parent | 06c18990fb6f10aaf1907ba8f0fe3f1a138da159 (diff) | |
download | haskell-6555b68ca0678827b89c5624db071f5a485d18b7.tar.gz |
Move linters into the tree
This MR moves the GHC linters into the tree, so that they can be run directly using Hadrian.
* Query all files tracked by Git instead of using changed files, so that we can run the exact same linting step locally and in a merge request.
* Only check that the changelogs don't contain TBA when RELEASE=YES.
* Add hadrian/lint script, which runs all the linting steps.
* Ensure the hlint job exits with a failure if hlint is not installed (otherwise we were ignoring the failure). Given that hlint doesn't seem to be available in CI at the moment, I've temporarily allowed failure in the hlint job.
* Run all linting tests in CI using hadrian.
Diffstat (limited to 'linters')
-rw-r--r-- | linters/lint-commit-msg/Main.hs | 160 | ||||
-rw-r--r-- | linters/lint-commit-msg/cabal.project | 2 | ||||
-rw-r--r-- | linters/lint-commit-msg/lint-commit-msg.cabal | 29 | ||||
-rw-r--r-- | linters/lint-notes/Main.hs | 62 | ||||
-rw-r--r-- | linters/lint-notes/Makefile | 17 | ||||
-rw-r--r-- | linters/lint-notes/Notes.hs | 186 | ||||
-rw-r--r-- | linters/lint-notes/check.sh | 30 | ||||
-rw-r--r-- | linters/lint-notes/ghc.mk | 18 | ||||
-rw-r--r-- | linters/lint-notes/lint-notes.cabal | 21 | ||||
-rw-r--r-- | linters/lint-notes/test | 25 | ||||
-rw-r--r-- | linters/lint-submodule-refs/Main.hs | 77 | ||||
-rw-r--r-- | linters/lint-submodule-refs/cabal.project | 2 | ||||
-rw-r--r-- | linters/lint-submodule-refs/lint-submodule-refs.cabal | 28 | ||||
-rw-r--r-- | linters/lint-whitespace/Main.hs | 265 | ||||
-rw-r--r-- | linters/lint-whitespace/cabal.project | 2 | ||||
-rw-r--r-- | linters/lint-whitespace/ghc.mk | 18 | ||||
-rw-r--r-- | linters/lint-whitespace/lint-whitespace.cabal | 31 | ||||
-rw-r--r-- | linters/linters-common/Linters/Common.hs | 197 | ||||
-rw-r--r-- | linters/linters-common/ghc.mk | 17 | ||||
-rw-r--r-- | linters/linters-common/linters-common.cabal | 30 |
20 files changed, 1217 insertions, 0 deletions
diff --git a/linters/lint-commit-msg/Main.hs b/linters/lint-commit-msg/Main.hs new file mode 100644 index 0000000000..cf12b19ba5 --- /dev/null +++ b/linters/lint-commit-msg/Main.hs @@ -0,0 +1,160 @@ +{-# LANGUAGE FlexibleContexts #-} +{-# LANGUAGE LambdaCase #-} +{-# LANGUAGE MultiWayIf #-} +{-# LANGUAGE OverloadedStrings #-} + +{-# OPTIONS_GHC -Wno-missing-local-signatures #-} + +module Main where + +-- base +import Control.Monad + ( forM, forM_, unless, when ) +import Data.Maybe + ( isJust ) +import System.Environment + ( getArgs ) +import System.Exit + ( ExitCode(..), exitWith ) + +-- mtl +import Control.Monad.Writer + ( liftIO, execWriter, tell ) + +-- text +import Data.Text + ( Text ) +import qualified Data.Text as T +import qualified Data.Text.IO as T + ( putStrLn ) + +-- linters-common +import Linters.Common + ( LintMsg(..), LintLvl(..) + , gitCatCommit, gitNormCid, tshow + ) + +-------------------------------------------------------------------------------- + +main :: IO () +main = do + dir:refs <- getArgs >>= \case + [] -> fail "usage: lint-commit-msg <git-repo> [<commit-id>+]" + x -> return x + + stats <- forM (map T.pack refs) $ \ref -> do + cid <- gitNormCid dir ref + (_, msg) <- gitCatCommit dir cid + + let cmsgs = lintMsg msg + + liftIO $ do + -- putStrLn (T.unpack cid) + -- forM_ (zip [1::Int ..] (T.lines msg)) $ \(lno,l) -> do + -- putStrLn (show lno <> "\t" <> show l) + -- putStrLn "--" + + let status = maximum (Nothing : [ Just lvl | LintMsg lvl _ _ _ <- cmsgs ]) + ok = status < Just LintLvlErr + + unless (null cmsgs) $ do + putStrLn "=====================================================================================" + putStrLn ("commit " <> T.unpack cid <> " has linter issues:") + putStrLn "" + forM_ cmsgs $ \(LintMsg lvl lno l m) -> do + let lvls = case lvl of + LintLvlErr -> "*ERROR*" + LintLvlWarn -> "Warning" + putStrLn (" " <> lvls <> " on line " <> show lno <> ": " <> T.unpack m) + putStrLn (" > " <> show l) + putStrLn "" + return () + + unless ok $ + putStrLn ("Validation FAILED for " <> T.unpack cid) + + return status + + unless (null $ filter isJust stats) $ + T.putStrLn "=====================================================================================" + + let stats1 = maximum (Nothing : stats) + + unless (stats1 == Nothing) $ do + T.putStrLn "There were commit message linter issues! For more information see" + T.putStrLn " http://tbaggery.com/2008/04/19/a-note-about-git-commit-messages.html" + T.putStrLn "" + + unless (stats1 < Just LintLvlErr) $ do + T.putStrLn "Validation FAILED because at least one commit had linter errors!" + exitWith (ExitFailure 1) + + T.putStrLn "Commit message validation passed!" + +-- | Commit message linter +lintMsg :: Text -> [LintMsg] +lintMsg msg0 = execWriter $ do + -- subject-line validations + if | T.null (T.strip subj) -> errSubj "empty subject line" + | otherwise -> do + when (T.stripStart subj /= subj) $ + errSubj "subject line with leading whitespace" + + when (T.stripEnd subj /= subj) $ + warnSubj "subject line with trailing whitespace" + + when (T.any (== '\t') subj) $ + errSubj "subject line contains TAB" + + if | slen > 80 -> errSubj ("subject line longer than 80 characters (was " <> tshow slen <> " characters)" + <> " -- , ideally subject line is at most 50 characters long") + | slen > 50 -> warnSubj ("subject line longer than 50 characters (was " <> tshow slen <> " characters)") + | slen < 8 -> errSubj ("subject line shorter than 8 characters (was " <> tshow slen <> " characters)") + | otherwise -> return () + + -- 2nd-line & body validations + case lns of + [] -> return () -- empty commit msg -- will have caused already an LintLvlErr + [_] -> return () -- single-line commit msg + (_:line2:body) -> do + -- 2nd line validations + if | not (T.null line2) + -> tell [LintMsg LintLvlErr 2 line2 "2nd line must be empty"] + | null body + -> tell [LintMsg LintLvlWarn 2 line2 "2nd line exists, but no commit msg body found"] + | otherwise -> return () + + -- body validations + forM_ (zip [3..] body) $ \(lineno,l) -> do + let llen = T.length l + warnBody m = tell [LintMsg LintLvlWarn lineno l m] + errBody m = tell [LintMsg LintLvlErr lineno l m] + + when (T.stripEnd l /= l) $ warnBody "trailing whitespace" + + when (T.any (== '\t') l) $ warnBody "contains TAB character" + + when (T.isPrefixOf "Summary:" l) $ + warnBody "redundant Phabricator 'Summary:' tag detected -- please trim your commit message" + + when (T.isPrefixOf "Summary: Signed-off-by:" l) $ + errBody "'Signed-Off-by:'-marker not starting on first column" + + if | llen > 100 -> errBody ("body line longer than 100 characters (was " + <> tshow llen <> " characters) -- " + <> "ideally body lines are at most 72 characters long") + | llen > 72 -> warnBody ("body line longer than 72 characters (was " + <> tshow llen <> " characters)") + | otherwise -> return () + + return () + where + warnSubj m = tell [LintMsg LintLvlWarn 1 subj m] + errSubj m = tell [LintMsg LintLvlErr 1 subj m] + + lns = T.lines msg0 + + subj | (l0:_) <- lns = l0 + | otherwise = "" + + slen = T.length subj diff --git a/linters/lint-commit-msg/cabal.project b/linters/lint-commit-msg/cabal.project new file mode 100644 index 0000000000..444b636706 --- /dev/null +++ b/linters/lint-commit-msg/cabal.project @@ -0,0 +1,2 @@ +packages: ., + ../linters-common diff --git a/linters/lint-commit-msg/lint-commit-msg.cabal b/linters/lint-commit-msg/lint-commit-msg.cabal new file mode 100644 index 0000000000..8020a925a4 --- /dev/null +++ b/linters/lint-commit-msg/lint-commit-msg.cabal @@ -0,0 +1,29 @@ +cabal-version: 3.0 +name: lint-commit-msg +version: 0.1.0.0 +synopsis: Lint a commit message +license: GPL-3.0-only +author: The GHC team +build-type: Simple + +executable lint-commit-msg + ghc-options: + -Wall + + default-language: + Haskell2010 + + hs-source-dirs: + . + + main-is: + Main.hs + + build-depends: + linters-common, + mtl + >=2.1 && <2.3, + base + >= 4.14 && < 5, + text + >= 1.2 && < 2.1 diff --git a/linters/lint-notes/Main.hs b/linters/lint-notes/Main.hs new file mode 100644 index 0000000000..02ee3f11d4 --- /dev/null +++ b/linters/lint-notes/Main.hs @@ -0,0 +1,62 @@ +import qualified Data.Set as S +import System.Process +import System.Environment + +import Notes + +usage :: IO a +usage = do + putStrLn $ unlines + [ "usage:" + , " lint-notes <mode>" + , " lint-notes <mode> @<response-file>" + , " lint-notes <mode> <file>" + , "" + , "valid modes:" + , " dump dump all Note definitions and references" + , " defs dump all Note definitions" + , " refs dump all Note references" + , " unreferenced dump all unreferenced Note definitions" + , " broken-refs dump all references to missing Notes" + ] + fail "invalid usage" + +main :: IO () +main = do + args <- getArgs + + let printNoteDefs = putStrLn . unlines . map showNoteDef + printNoteRefs = putStrLn . unlines . map showNoteRef + printNoteRefsSugg (bad, sugg) = do + putStrLn . showNoteRef $ bad + putStrLn $ " >" ++ showNoteDef sugg + + + parseMode :: String -> Maybe (NoteDb -> IO ()) + parseMode "dump" = Just $ putStrLn . showNoteDb + parseMode "unreferenced" = Just $ printNoteDefs . S.toList . unreferencedNotes + parseMode "defs" = Just $ printNoteDefs . allNoteDefs + parseMode "refs" = Just $ printNoteRefs . allNoteRefs + parseMode "broken-refs" = Just $ printNoteRefs . map fst . brokenNoteRefs + parseMode "broken-refs-suggest" = Just $ mapM_ printNoteRefsSugg . brokenNoteRefs + parseMode _ = Nothing + + (mode, files) <- case args of + [mode, "@-"] -> do + files <- lines <$> getContents + return (parseMode mode, files) + [mode, '@':respFile] -> do + files <- lines <$> readFile respFile + return (parseMode mode, files) + [mode] -> do + let excludeList = + [ "testsuite/tests/linters/notes.stdout" + , "linters/lint-notes/test" ] + files <- lines <$> readProcess "git" ["ls-tree", "--name-only", "-r", "HEAD"] "" + return (parseMode mode, filter (`notElem` excludeList) files) + _ -> usage + + case mode of + Just run -> filesNotes files >>= run + Nothing -> return () + diff --git a/linters/lint-notes/Makefile b/linters/lint-notes/Makefile new file mode 100644 index 0000000000..71500f0147 --- /dev/null +++ b/linters/lint-notes/Makefile @@ -0,0 +1,17 @@ +# ----------------------------------------------------------------------------- +# +# (c) 2009 The University of Glasgow +# +# This file is part of the GHC build system. +# +# To understand how the build system works and how to modify it, see +# https://gitlab.haskell.org/ghc/ghc/wikis/building/architecture +# https://gitlab.haskell.org/ghc/ghc/wikis/building/modifying +# +# ----------------------------------------------------------------------------- + +dir = linters/lint-notes +TOP = ../.. +include $(TOP)/mk/sub-makefile.mk + +FAST_MAKE_OPTS += stage=none diff --git a/linters/lint-notes/Notes.hs b/linters/lint-notes/Notes.hs new file mode 100644 index 0000000000..cf267d8d67 --- /dev/null +++ b/linters/lint-notes/Notes.hs @@ -0,0 +1,186 @@ +{-# LANGUAGE OverloadedStrings #-} +{-# LANGUAGE BangPatterns #-} + +module Notes where + +import Data.Either +import Data.Foldable +import qualified Data.ByteString as BS +import qualified Data.Text as T +import qualified Data.Text.Encoding as T +import qualified Data.Map.Strict as M +import qualified Data.Set as S +import System.Directory (doesFileExist) +import Data.Array +import Data.List (sortBy) +import Data.Ord + +data SrcLoc = SrcLoc { fileName :: FilePath + , row :: !Int + , column :: !Int + } + deriving (Eq, Ord, Show) + +showSrcLoc :: SrcLoc -> String +showSrcLoc loc = + concat [fileName loc, ":", show (row loc), ":", show (column loc), ":"] + +newtype NoteName = NoteName T.Text + deriving (Eq, Ord, Show) + +showNoteName :: NoteName -> String +showNoteName (NoteName x) = "Note [" <> T.unpack x <> "]" + +data NoteDef = NoteDef { noteDefSrcLoc :: !SrcLoc + , noteDefName :: !NoteName + } + deriving (Eq, Ord, Show) + +showNoteDef :: NoteDef -> String +showNoteDef (NoteDef{noteDefSrcLoc=loc, noteDefName=name}) = + "def " <> showSrcLoc loc <> " " <> showNoteName name + +data NoteRef = NoteRef { noteRefSrcLoc :: !SrcLoc + , noteRefName :: !NoteName + } + deriving (Eq, Ord, Show) + +showNoteRef :: NoteRef -> String +showNoteRef (NoteRef{noteRefSrcLoc=loc, noteRefName=name}) = + "ref " <> showSrcLoc loc <> " " <> showNoteName name + +findNotes :: FilePath -> T.Text -> [Either NoteRef NoteDef] +findNotes fname t = + go 1 (T.lines t) + where + go :: Int -> [T.Text] -> [Either NoteRef NoteDef] + -- Note definitions: + -- We look for a "Note [" token with a "~~~" rule beneath it. + go !lineNo (l0 : l1 : ls) + | hasRule = Right (NoteDef srcLoc name) : go (lineNo+2) ls + where + (prefix, rest) = T.breakOn "Note [" l0 + startCol = T.length prefix + hasRule = T.take 3 (T.drop startCol l1) == "~~~" + srcLoc = SrcLoc fname lineNo startCol + name = NoteName $ T.takeWhile (/= ']') $ T.drop 6 rest + + -- Note references: + -- We look for a "Note [...]", strip away any beginning-of-line + -- comment symbols, and merge whitespace. + go lineNo (l0 : ls) = + [ Left (NoteRef srcLoc (NoteName name)) + | (prefix, rest) <- T.breakOnAll "Note [" l0 + , let startCol = T.length prefix + srcLoc = SrcLoc fname lineNo startCol + (name, suffix) = T.breakOn "]" (T.drop 6 rest<>" "<>T.concat (map stripBeginningOfLineComment $ take 1 ls)) + , "]" `T.isPrefixOf` suffix + ] ++ go (lineNo+1) ls + + go _lineNo [] = [] + +stripBeginningOfLineComment :: T.Text -> T.Text +stripBeginningOfLineComment = T.pack . go . T.unpack + where + -- This implements the following regular expression substitution: + -- + -- s/$ *[(\-\- )\#( \* )] */ / + + go :: String -> String + go ('#':rest) = finish rest + go ('-':'-':rest) = finish rest + go (' ':'*':rest) = finish rest + go ('/':'/':rest) = finish rest + go (' ':rest) = go rest + go xs = finish xs + + finish :: String -> String + finish = dropWhile (==' ') + +data NoteDb = NoteDb { noteRefs :: M.Map FilePath (S.Set NoteRef) + , noteDefs :: M.Map NoteName (S.Set NoteDef) + } + +instance Monoid NoteDb where + mempty = NoteDb M.empty M.empty + +instance Semigroup NoteDb where + NoteDb a b <> NoteDb c d = + NoteDb (M.unionWith (<>) a c) (M.unionWith (<>) b d) + +allNoteDefs :: NoteDb -> [NoteDef] +allNoteDefs db = + [ def + | defs <- M.elems (noteDefs db) + , def <- S.toList defs + ] + +allNoteRefs :: NoteDb -> [NoteRef] +allNoteRefs db = + [ ref + | (_fname, refs) <- M.toList (noteRefs db) + , ref <- S.toList refs + ] + +showNoteDb :: NoteDb -> String +showNoteDb db = unlines $ + map showNoteRef (allNoteRefs db) + ++ + map showNoteDef (allNoteDefs db) + +filesNotes :: [FilePath] + -> IO NoteDb +filesNotes = fmap mconcat . mapM fileNotes + +fileNotes :: FilePath -> IO NoteDb +fileNotes fname = do + is_file <- doesFileExist fname + if is_file + then do + bs <- BS.readFile fname + return $ case T.decodeUtf8' bs of + Left _ -> mempty + Right t -> + let (refs, defs) = partitionEithers (findNotes fname t) + in NoteDb + { noteRefs = M.singleton fname (S.fromList refs) + , noteDefs = M.fromList + [ (noteDefName def, S.singleton def) + | def <- defs + ] + } + else return mempty + +brokenNoteRefs :: NoteDb -> [(NoteRef, NoteDef)] +brokenNoteRefs db = + [ (ref, best_match) + | (_fname, refs) <- M.toList (noteRefs db) + , ref <- S.toList refs + , Nothing <- pure $ M.lookup (noteRefName ref) (noteDefs db) + , let best_match = bestLev (show (noteRefName ref)) (concatMap S.toList (M.elems (noteDefs db))) + ] + +bestLev :: String -> [NoteDef] -> NoteDef +bestLev x ds = head $ sortBy (comparing (\d -> lev x (show (noteDefName d)))) ds + + +lev:: (Eq a) => [a] -> [a] -> Int +lev xs ys = levMemo ! (n, m) + where levMemo = array ((0,0),(n,m)) [((i,j),lev' i j) | i <- [0..n], j <- [0..m]] + n = length xs + m = length ys + xa = listArray (1, n) xs + ya = listArray (1, m) ys + lev' 0 v = v + lev' u 0 = u + lev' u v + | xa ! u == ya ! v = levMemo ! (u-1, v-1) + | otherwise = 1 + minimum [levMemo ! (u, v-1), + levMemo ! (u-1, v), + levMemo ! (u-1, v-1)] + +unreferencedNotes :: NoteDb -> S.Set NoteDef +unreferencedNotes db = + fold $ noteDefs db `M.withoutKeys` referencedNotes + where + referencedNotes = S.fromList $ map noteRefName (allNoteRefs db) diff --git a/linters/lint-notes/check.sh b/linters/lint-notes/check.sh new file mode 100644 index 0000000000..cd87900f9b --- /dev/null +++ b/linters/lint-notes/check.sh @@ -0,0 +1,30 @@ +#!/bin/sh + +set -e + +CABAL_INSTALL="${CABAL_INSTALL:-cabal}" +GHC="${GHC:-ghc}" + +cd "$(dirname $0)" +"$CABAL_INSTALL" build -w "$GHC" +bin="$("$CABAL_INSTALL" list-bin -w "$GHC" lint-notes)" +cd "$(git rev-parse --show-toplevel)" +"$bin" broken-refs \ + | grep -v "linters/lint-notes/expected-broken-note-refs:" \ + | sed 's/:[0-9]\+:[0-9]\+:/:/' \ + > broken-note-refs + +if diff -q linters/lint-notes/expected-broken-note-refs broken-note-refs; then + printf "No unexpected broken note references" +else + printf "Found unexpected broken note references:\n\n" + diff -u linters/lint-notes/expected-broken-note-refs broken-note-refs || true + if [[ "$1" == "-a" ]]; then + cp broken-note-refs linters/lint-notes/expected-broken-note-refs + printf "\n" + printf "Accepted new broken note references." + else + exit 1 + fi +fi + diff --git a/linters/lint-notes/ghc.mk b/linters/lint-notes/ghc.mk new file mode 100644 index 0000000000..bd4e97a85f --- /dev/null +++ b/linters/lint-notes/ghc.mk @@ -0,0 +1,18 @@ +# ----------------------------------------------------------------------------- +# +# (c) 2009 The University of Glasgow +# +# This file is part of the GHC build system. +# +# To understand how the build system works and how to modify it, see +# https://gitlab.haskell.org/ghc/ghc/wikis/building/architecture +# https://gitlab.haskell.org/ghc/ghc/wikis/building/modifying +# +# ----------------------------------------------------------------------------- + +linters/lint-notes_USES_CABAL = YES +linters/lint-notes_PACKAGE = lint-notes +linters/lint-notes_dist-install_PROGNAME = lint-notes +linters/lint-notes_dist-install_INSTALL = NO +linters/lint-notes_dist-install_INSTALL_INPLACE = YES +$(eval $(call build-prog,linters/lint-notes,dist-install,1)) diff --git a/linters/lint-notes/lint-notes.cabal b/linters/lint-notes/lint-notes.cabal new file mode 100644 index 0000000000..9896e74766 --- /dev/null +++ b/linters/lint-notes/lint-notes.cabal @@ -0,0 +1,21 @@ +cabal-version: 2.4 +name: lint-notes +version: 0.1.0.0 +synopsis: A tool for querying and checking GHC Notes +bug-reports: https://gitlab.haskell.org/ghc/ghc +license: BSD-3-Clause +author: Ben Gamari +maintainer: ben@smart-cactus.org +copyright: (c) 2022 Ben Gamari + +executable lint-notes + main-is: Main.hs + other-modules: Notes + build-depends: base >= 4 && < 5 , + bytestring, + containers, + directory, + text, + process, + array + default-language: Haskell2010 diff --git a/linters/lint-notes/test b/linters/lint-notes/test new file mode 100644 index 0000000000..3eb1e0466e --- /dev/null +++ b/linters/lint-notes/test @@ -0,0 +1,25 @@ +Note [Turtles] +~~~~~~~~~~~~~~~ +This is note 1. See Note [Wombats are toes]. + +/* Note [Wombats are toes] + ~~~~~~~~~~~~~~~~~~~~~~~ + This is another Note. + */ + +See Note [This is a +Note reference broken across lines]. + + // This is another Note [Broken + // across a line in a C++ comment]. + + /* + * This is another Note [Broken + * across a line in a C comment]. + */ + + -- This is another Note [Broken + -- across a line in a Haskell comment]. + + # This is another Note [Broken + # across a line in a Python comment]. diff --git a/linters/lint-submodule-refs/Main.hs b/linters/lint-submodule-refs/Main.hs new file mode 100644 index 0000000000..f99f066da0 --- /dev/null +++ b/linters/lint-submodule-refs/Main.hs @@ -0,0 +1,77 @@ +{-# LANGUAGE LambdaCase #-} +{-# LANGUAGE OverloadedStrings #-} + +module Main where + +-- base +import Control.Monad + ( forM, forM_, unless, when ) +import Data.List + ( partition ) +import Data.Maybe + ( mapMaybe ) +import System.Environment + ( getArgs ) +import System.Exit + ( ExitCode(..), exitWith ) + +-- text +import qualified Data.Text as T +import qualified Data.Text.IO as T + ( putStrLn ) + +-- linters-common +import Linters.Common + ( GitType(..) + , gitBranchesContain, gitCatCommit, gitDiffTree, gitNormCid + ) + +-------------------------------------------------------------------------------- + +main :: IO () +main = do + dir:refs <- getArgs >>= \case + [] -> fail "usage: lint-submodule-refs <git-repo> [<commit-id>+]" + x -> return x + + forM_ (map T.pack refs) $ \ref -> do + (cid,deltas) <- gitDiffTree dir ref + + let smDeltas = [ (smPath, smCid) | (_, (GitTypeGitLink, smCid), smPath) <- deltas ] + + unless (null smDeltas) $ do + T.putStrLn $ "Submodule update(s) detected in " <> cid <> ":" + + (_, msg) <- gitCatCommit dir cid + + unless ("submodule" `T.isInfixOf` msg) $ do + T.putStrLn "*FAIL* commit message does not contain magic 'submodule' word." + T.putStrLn "This lint avoids accidental changes to git submodules." + T.putStrLn "Include the word 'submodule' in your commit message to silence this warning, e.g. 'Update submodule'." + exitWith (ExitFailure 1) + + bad <- fmap or $ forM smDeltas $ \(smPath,smCid) -> do + T.putStrLn $ " - " <> smPath <> " => " <> smCid + + let smAbsPath = dir ++ "/" ++ T.unpack smPath + remoteBranches <- gitBranchesContain smAbsPath smCid + + let (wip, nonWip) = partition ("wip/" `T.isPrefixOf`) originBranches + originBranches = mapMaybe isOriginTracking remoteBranches + isOriginTracking = T.stripPrefix "origin/" + let bad = null nonWip + when bad $ do + T.putStrLn $ " *FAIL* commit not found in submodule repo" + T.putStrLn " or not reachable from persistent branches" + T.putStrLn "" + when (not $ null wip) $ do + T.putStrLn " Found the following non-mirrored WIP branches:" + forM_ wip $ \branch -> do + commit <- gitNormCid smAbsPath branch + T.putStrLn $ " - " <> branch <> " -> " <> commit + T.putStrLn "" + pure bad + + if bad + then exitWith (ExitFailure 1) + else T.putStrLn " OK" diff --git a/linters/lint-submodule-refs/cabal.project b/linters/lint-submodule-refs/cabal.project new file mode 100644 index 0000000000..444b636706 --- /dev/null +++ b/linters/lint-submodule-refs/cabal.project @@ -0,0 +1,2 @@ +packages: ., + ../linters-common diff --git a/linters/lint-submodule-refs/lint-submodule-refs.cabal b/linters/lint-submodule-refs/lint-submodule-refs.cabal new file mode 100644 index 0000000000..ce4012adfc --- /dev/null +++ b/linters/lint-submodule-refs/lint-submodule-refs.cabal @@ -0,0 +1,28 @@ +cabal-version: 3.0 +name: lint-submodule-refs +version: 0.1.0.0 +synopsis: Lint submodule references +license: GPL-3.0-only +author: The GHC team +build-type: Simple + +executable lint-submodule-refs + + default-language: + Haskell2010 + + build-depends: + base + >= 4.14 && < 5, + text + >= 1.2 && < 2.1, + linters-common + + ghc-options: + -Wall + + hs-source-dirs: + . + + main-is: + Main.hs diff --git a/linters/lint-whitespace/Main.hs b/linters/lint-whitespace/Main.hs new file mode 100644 index 0000000000..f1657c4f40 --- /dev/null +++ b/linters/lint-whitespace/Main.hs @@ -0,0 +1,265 @@ +{-# LANGUAGE DerivingStrategies #-} +{-# LANGUAGE LambdaCase #-} +{-# LANGUAGE ScopedTypeVariables #-} +{-# LANGUAGE OverloadedStrings #-} +{-# LANGUAGE ViewPatterns #-} + + +module Main where + +-- base +import Control.Exception + ( IOException, handle ) +import Control.Monad + ( forM, forM_, unless, when ) +import Data.Maybe + ( isJust, mapMaybe ) +import System.Environment + ( getArgs ) +import System.Exit + ( ExitCode(..), exitWith ) + +-- containers +import Data.Map.Strict + ( Map ) +import qualified Data.Map.Strict as Map + ( alter, empty, keys, findWithDefault ) + +-- mtl +import Control.Monad.Writer + ( liftIO, execWriter, tell ) + +-- process +import System.Process + ( readProcess ) + +-- text +import Data.Text + ( Text ) +import qualified Data.Text as T +import qualified Data.Text.IO as T + ( readFile ) + +-- linters-common +import Linters.Common + ( LintMsg(..), LintLvl(..) + , GitType(..) + , gitCatBlob, gitDiffTree + , z40 + ) + +-------------------------------------------------------------------------------- + +main :: IO () +main = do + getArgs >>= \case + ("commits":dir:refs) -> mainCommits dir refs + ("commits":_) -> fail "usage: lint-whitespace commits <git-repo> [<commit-id>+]" + ("files":fs) -> mainFiles fs + ("tracked":args) -> mainTracked args + _ -> fail "usage: lint-whitespace <files|commits|tracked> ..." + +mainCommits :: FilePath -> [String] -> IO () +mainCommits dir refs = do + + stats <- forM (map T.pack refs) $ \ref -> do + (cid,deltas) <- gitDiffTree dir ref + + lintMsgs0 <- forM deltas $ \(origs, (gt, blobId), fname) -> if (gt == GitTypeRegFile && hasSuffix fname) + then do + let blobIds0 = [ b0 | (GitTypeRegFile, b0, _) <- origs, b0 /= z40 ] + blob1 <- gitCatBlob dir blobId + blobs0 <- mapM (gitCatBlob dir) blobIds0 + + -- blobs0 will be empty in case in case of newly added files as well as renames/copies + -- blobs0 will contain more than one entry for merge-commits + + return [ (fname, msg) | msg <- lintBlob blobs0 blob1 ] + else return [] + + checkLintMsgs ("commit " <> T.unpack cid) lintMsgs0 + + finalReport stats + +finalReport :: [Maybe LintLvl] -> IO () +finalReport stats = do + unless (null $ filter isJust stats) $ + putStrLn "=====================================================================================" + + let stats1 = maximum (Nothing : stats) + + -- unless (stats1 == Nothing) $ do + -- putStrLn "There were commit message linter issues! For more information see" + -- putStrLn "" + + unless (stats1 < Just LintLvlErr) $ do + putStrLn "Validation FAILED because at least one commit had linter errors!" + exitWith (ExitFailure 1) + + putStrLn "whitespace validation passed!" + +checkLintMsgs :: String -> [[(Text, LintMsg)]] -> IO (Maybe LintLvl) +checkLintMsgs herald lintMsgs0 = do + let lintMsgs = concat lintMsgs0 + status = maximum (Nothing : [ Just lvl | (_, LintMsg lvl _ _ _) <- lintMsgs ]) + ok = status < Just LintLvlErr + + unless (null lintMsgs) $ liftIO $ do + putStrLn "=====================================================================================" + putStrLn (herald <> " has whitespace linter issues:") + putStrLn "" + forM_ lintMsgs $ \(fn, LintMsg lvl lno l m) -> do + let lvls = case lvl of + LintLvlErr -> "*ERROR*" + LintLvlWarn -> "Warning" + putStrLn (" " <> lvls <> " " <> T.unpack fn <> ":" <> show lno <> ": " <> T.unpack m) + putStrLn (" > " <> show l) + putStrLn "" + return () + + unless ok $ liftIO $ + putStrLn ("Validation FAILED for " <> herald) + + return status + +mainFiles :: [FilePath] -> IO () +mainFiles fs = do + stats <- forM fs $ \f -> do + lintMsgs0 <- handle (\(_ :: IOException) -> return []) (lintFile <$> T.readFile f) + checkLintMsgs ("file " <> f) [[(T.pack f, err) | err <- lintMsgs0 ]] + finalReport stats + +mainTracked :: [String] -> IO () +mainTracked args = do + (ignoredFiles, ignoredDirs) <- parseTrackedArgs args + allFiles <- lines <$> readProcess "git" ["ls-tree", "--name-only", "-r", "HEAD"] "" + let files = filter (isTracked ignoredFiles ignoredDirs) allFiles + mainFiles files + +-- Check a file for trailing whitespace and tabs +lintFile :: Text -> [LintMsg] +lintFile blob1 = execWriter $ do + when (hasTabs blob1) $ do + tell [ LintMsg LintLvlErr lno l "introduces TAB" + | (lno,l) <- zip [1..] lns + , "\t" `T.isInfixOf` l + ] + + when (hasTrail blob1) $ do + tell [ LintMsg LintLvlErr lno l "introduces trailing whitespace" + | (lno,l) <- zip [1..] lns, hasTrail l ] + where + lns = T.lines blob1 + +lintBlob :: [Text] -> Text -> [LintMsg] +lintBlob blobs0 blob1 = execWriter $ do + -- Perform simple invariant-preservation checks + when (hasTabs blob1 && not (any hasTabs blobs0)) $ do + tell [ LintMsg LintLvlErr lno l "introduces TAB" + | (lno,l) <- zip [1..] lns + , "\t" `T.isInfixOf` l + ] + + when (hasTrail blob1 && not (any hasTrail blobs0)) $ do + tell [ LintMsg LintLvlErr lno l "introduces trailing whitespace" + | (lno,l) <- zip [1..] lns, hasTrail l ] + + when (missingFinalEOL blob1) $ if not (any missingFinalEOL blobs0) + then tell [LintMsg LintLvlErr llno lln "lacking final EOL"] + else tell [LintMsg LintLvlWarn llno lln "lacking final EOL"] + + where + lns = T.lines blob1 + llno = length lns + lln = case lns of + [] -> "" + _ -> last lns + +hasTabs :: Text -> Bool +hasTabs = T.any (=='\t') + +hasTrail :: Text -> Bool +hasTrail t = or [ " \n" `T.isInfixOf` t + , " \r\n" `T.isInfixOf` t + , "\t\r\n" `T.isInfixOf` t + , " " `T.isSuffixOf` t + , "\t" `T.isSuffixOf` t + ] + +missingFinalEOL :: Text -> Bool +missingFinalEOL = not . T.isSuffixOf "\n" + +-------------------------------------------------- + +data Flag + = IgnoreFiles + | IgnoreDirs + | UnknownFlag Text + deriving stock ( Eq, Ord, Show ) + +parseTrackedArgs :: [String] -> IO ([Text], [Text]) +parseTrackedArgs args = do + unless (null bareArgs) $ + fail "usage: lint-whitespace tracked --ignore-files ... --ignore-dirs ..." + unless (null unknownFlags) $ + fail $ "lint-whitespace tracked: unknown flags " ++ show unknownFlags ++ "\n\ + \supported flags are --ignore-files and --ignore-dirs" + return (ignoredFiles, ignoredDirs) + where + (bareArgs, flagArgs) = splitOn flagMaybe (map T.pack args) + ignoredFiles = Map.findWithDefault [] IgnoreFiles flagArgs + ignoredDirs = Map.findWithDefault [] IgnoreDirs flagArgs + unknownFlags = mapMaybe (\case { UnknownFlag unk -> Just unk ; _ -> Nothing}) + $ Map.keys flagArgs + +-- Assumes the input string has no whitespace. +flagMaybe :: Text -> Maybe Flag +flagMaybe "-f" = Just IgnoreFiles +flagMaybe "--ignore-files" = Just IgnoreFiles +flagMaybe "-d" = Just IgnoreDirs +flagMaybe "--ignore-dirs" = Just IgnoreDirs +flagMaybe str + | let (hyphens, rest) = T.span ((==) '-') str + , not (T.null hyphens) + = Just (UnknownFlag rest) + | otherwise + = Nothing + +splitOn :: forall a b. Ord b => (a -> Maybe b) -> [a] -> ([a], Map b [a]) +splitOn f = go Nothing + where + go :: Maybe b -> [a] -> ([a], Map b [a]) + go _ [] = ([], Map.empty) + go mb_b (a:as) = case f a of + Nothing -> + case go mb_b as of + (xs, yxs) -> + case mb_b of + Nothing -> (a:xs, yxs) + Just b -> (xs, Map.alter (alter_fn a) b yxs) + Just b -> go (Just b) as + alter_fn :: a -> Maybe [a] -> Maybe [a] + alter_fn a Nothing = Just [a] + alter_fn a (Just as) = Just (a:as) + +-------------------------------------------------- +-- Predicates used to filter which files we lint. + +hasSuffix :: Text -> Bool +hasSuffix fn = any (`T.isSuffixOf` fn) suffixes + where + suffixes = T.words ".hs .hsc .lhs .cabal .c .h .lhs-boot .hs-boot .x .y" + +autogenFiles :: [ Text ] +autogenFiles = [ "WCsubst.c", "iconv.c", "Table.hs" ] + +ignoredPrefixes :: [Text] +ignoredPrefixes = [ "testsuite/", "libraries/base/tests" + , "utils/hp2ps", "utils/hpc", "utils/unlit" + ] + +isTracked :: [Text] -> [Text] -> FilePath -> Bool +isTracked ignoredFiles ignoredDirs (T.pack -> fn) + = hasSuffix fn + && not (fn `elem` ignoredFiles) + && not (any (`T.isPrefixOf` fn) ignoredDirs) diff --git a/linters/lint-whitespace/cabal.project b/linters/lint-whitespace/cabal.project new file mode 100644 index 0000000000..444b636706 --- /dev/null +++ b/linters/lint-whitespace/cabal.project @@ -0,0 +1,2 @@ +packages: ., + ../linters-common diff --git a/linters/lint-whitespace/ghc.mk b/linters/lint-whitespace/ghc.mk new file mode 100644 index 0000000000..6b844911e5 --- /dev/null +++ b/linters/lint-whitespace/ghc.mk @@ -0,0 +1,18 @@ +# ----------------------------------------------------------------------------- +# +# (c) 2009 The University of Glasgow +# +# This file is part of the GHC build system. +# +# To understand how the build system works and how to modify it, see +# https://gitlab.haskell.org/ghc/ghc/wikis/building/architecture +# https://gitlab.haskell.org/ghc/ghc/wikis/building/modifying +# +# ----------------------------------------------------------------------------- + +linters/lint-whitespace_USES_CABAL = YES +linters/lint-whitespace_PACKAGE = lint-whitespace +linters/lint-whitespace_dist-install_PROGNAME = lint-whitespace +linters/lint-whitespace_dist-install_INSTALL = NO +linters/lint-whitespace_dist-install_INSTALL_INPLACE = YES +$(eval $(call build-prog,linters/lint-whitespace,dist-install,1)) diff --git a/linters/lint-whitespace/lint-whitespace.cabal b/linters/lint-whitespace/lint-whitespace.cabal new file mode 100644 index 0000000000..e3d2aa3a06 --- /dev/null +++ b/linters/lint-whitespace/lint-whitespace.cabal @@ -0,0 +1,31 @@ +cabal-version: 3.0 +name: lint-whitespace +version: 0.1.0.0 +synopsis: Lint whitespace +license: GPL-3.0-only +author: The GHC team +build-type: Simple + +executable lint-whitespace + + default-language: + Haskell2010 + + hs-source-dirs: + . + + main-is: + Main.hs + + build-depends: + linters-common, + mtl + >=2.1 && <2.3, + process + ^>= 1.6, + containers + ^>= 0.6, + base + >= 4.14 && < 5, + text + >= 1.2 && < 2.1, diff --git a/linters/linters-common/Linters/Common.hs b/linters/linters-common/Linters/Common.hs new file mode 100644 index 0000000000..8d92a87787 --- /dev/null +++ b/linters/linters-common/Linters/Common.hs @@ -0,0 +1,197 @@ +{-# LANGUAGE BangPatterns #-} +{-# LANGUAGE DerivingStrategies #-} +{-# LANGUAGE OverloadedStrings #-} + +{-# OPTIONS_GHC -Wno-incomplete-uni-patterns #-} + +module Linters.Common where + +-- base +import Control.Monad + ( liftM, unless ) +import Data.Function + ( on ) +import Data.List + ( groupBy ) +import Data.Maybe + ( fromMaybe ) +import GHC.IO.Encoding + ( utf8, setLocaleEncoding, getLocaleEncoding, textEncodingName ) + +-- deepseq +import Control.DeepSeq + ( NFData(rnf), force, ($!!) ) + +-- process +import System.Process + ( readProcess ) + +-- text +import Data.Text + ( Text ) +import qualified Data.Text as T + +-------------------------------------------------------------------------------- + +data LintMsg = LintMsg !LintLvl !Int !Text !Text + deriving stock Show + +data LintLvl = LintLvlWarn | LintLvlErr + deriving stock ( Show, Eq, Ord ) + +type GitRef = Text + +type Sh = IO + +silently :: a -> a +silently = id + +runGit :: FilePath -> Text -> [Text] -> Sh Text +runGit fp t ts = runGitStdin fp t ts "" + +-- | Run @git@ operation +runGitStdin :: FilePath -> Text -> [Text] -> Text -> Sh Text +runGitStdin d op args std_in = do + d' <- return $ T.pack d + out <- withUtf8 $ silently $ readProcess "git" (map T.unpack ("-C" : d' : op : args)) (T.unpack std_in) + return (T.pack out) + +-- | WARNING: non-reentrant Hack! +withUtf8 :: Sh a -> Sh a +withUtf8 act = do + oldloc <- getLocaleEncoding + if (textEncodingName oldloc == textEncodingName utf8) + then act + else do + setLocaleEncoding utf8 + r <- act + setLocaleEncoding oldloc + return r + +-- | wrapper around @git cat-file commit@ +-- +-- Returns (commit-header, commit-body) +gitCatCommit :: FilePath -> GitRef -> Sh (Text,Text) +gitCatCommit d ref = do + tmp <- runGit d "cat-file" ["commit", ref ] + return (fmap (T.drop 2) $ T.breakOn "\n\n" tmp) + +-- | wrapper around @git cat-file commit@ +gitCatBlob :: FilePath -> GitRef -> Sh Text +gitCatBlob d ref = do + tmpl <- liftM tread $ runGit d "cat-file" ["-s", ref] -- workaround shelly adding EOLs + tmp <- runGit d "cat-file" ["blob", ref] + return (T.take tmpl tmp) + where + tread = read . T.unpack + +-- | Wrapper around @git rev-parse --verify@ +-- +-- Normalise git ref to commit sha1 +gitNormCid :: FilePath -> GitRef -> Sh GitRef +gitNormCid d ref = do + tmp <- runGit d "rev-parse" ["-q", "--verify", ref <> "^{commit}" ] + return (T.strip tmp) + +-- | wrapper around @git branch --contains@ +gitBranchesContain :: FilePath -> GitRef -> Sh [Text] +gitBranchesContain d ref = do + tmp <- liftM T.lines $ + --errExit False $ print_stderr False $ + runGit d "branch" ["--contains", ref, "-r"] + + unless (all (\s -> T.take 2 s `elem` [" ","* "]) tmp) $ + fail "gitBranchesContain: internal error" + + return $!! map (T.drop 2) tmp + +-- | returns @[(path, (url, key))]@ +-- +-- may throw exception +getModules :: FilePath -> GitRef -> Sh [(Text, (Text, Text))] +getModules d ref = do + tmp <- runGit d "show" [ref <> ":.gitmodules"] + + res <- liftM T.lines $ runGitStdin d "config" [ "--file", "/dev/stdin", "-l" ] tmp + + let ms = [ (T.tail key1,(key2, T.tail val)) + | r <- res, "submodule." `T.isPrefixOf` r + , let (key,val) = T.break (=='=') r + , let (key',key2) = T.breakOnEnd "." key + , let (_,key1) = T.break (=='.') (T.init key') + ] + + ms' = [ (path', (url, k)) + | es@((k,_):_) <- groupBy ((==) `on` fst) ms + , let props = map snd es + , let url = fromMaybe (error "getModules1") (lookup "url" props) + , let path' = fromMaybe (error "getModules2") (lookup "path" props) + ] + + return $!! ms' + + +{- | + +Possible meanings of the 'Char' value: + + * Added (A), + * Copied (C), + * Deleted (D), + * Modified (M), + * Renamed (R), + * have their type (i.e. regular file, symlink, submodule, ...) changed (T), + * are Unmerged (U), + * are Unknown (X), + * or have had their pairing Broken (B). + +-} +gitDiffTree :: FilePath -> GitRef -> Sh (Text, [([(GitType, Text, Char)], (GitType, Text), Text)]) +gitDiffTree d ref = do + tmp <- liftM T.lines $ runGit d "diff-tree" ["--root","-c", "-r", ref] + case tmp of + cid:deltas -> return $!! (cid, map parseDtLine deltas) + [] -> return ("", []) + + where + parseDtLine :: Text -> ([(GitType, Text, Char)], (GitType, Text), Text) + parseDtLine l + | sanityCheck = force (zip3 (map cvtMode mode') oid' (T.unpack k),(cvtMode mode,oid),fp) + | otherwise = error "in parseDtLine" + where + sanityCheck = n > 0 && T.length k == n + + n = T.length cols + (mode',mode:tmp') = splitAt n $ T.split (==' ') l'' + (oid',[oid,k]) = splitAt n tmp' + [l'',fp] = T.split (=='\t') l' + (cols,l') = T.span (==':') l + +gitDiffTreePatch :: FilePath -> GitRef -> Text -> Sh Text +gitDiffTreePatch d ref fname = runGit d "diff-tree" ["--root", "--cc", "-r", ref, "--", fname] + +z40 :: GitRef +z40 = T.pack (replicate 40 '0') + +data GitType + = GitTypeVoid + | GitTypeRegFile + | GitTypeExeFile + | GitTypeTree + | GitTypeSymLink + | GitTypeGitLink + deriving stock (Show,Eq,Ord,Enum) + +instance NFData GitType where rnf !_ = () + +cvtMode :: Text -> GitType +cvtMode "000000" = GitTypeVoid +cvtMode "040000" = GitTypeSymLink +cvtMode "100644" = GitTypeRegFile +cvtMode "100755" = GitTypeExeFile +cvtMode "120000" = GitTypeSymLink +cvtMode "160000" = GitTypeGitLink +cvtMode x = error ("cvtMode: " ++ show x) + +tshow :: Show a => a -> Text +tshow = T.pack . show diff --git a/linters/linters-common/ghc.mk b/linters/linters-common/ghc.mk new file mode 100644 index 0000000000..409e3c9cc8 --- /dev/null +++ b/linters/linters-common/ghc.mk @@ -0,0 +1,17 @@ +# ----------------------------------------------------------------------------- +# +# (c) 2009 The University of Glasgow +# +# This file is part of the GHC build system. +# +# To understand how the build system works and how to modify it, see +# https://gitlab.haskell.org/ghc/ghc/wikis/building/architecture +# https://gitlab.haskell.org/ghc/ghc/wikis/building/modifying +# +# ----------------------------------------------------------------------------- + +linters/linters-common_USES_CABAL = YES +linters/linters-common_PACKAGE = linters-common +linters/linters-common_dist-install_INSTALL = YES +linters/linters-common_dist-install_INSTALL_INPLACE = YES +$(eval $(call build-package,linters/linters-common,dist-install,1)) diff --git a/linters/linters-common/linters-common.cabal b/linters/linters-common/linters-common.cabal new file mode 100644 index 0000000000..02245750dd --- /dev/null +++ b/linters/linters-common/linters-common.cabal @@ -0,0 +1,30 @@ +cabal-version: 3.0 +name: linters-common +version: 0.1.0.0 +synopsis: Common library for GHC linting scripts +license: GPL-3.0-only +author: The GHC team +build-type: Simple + +library + default-language: + Haskell2010 + + build-depends: + process + ^>= 1.6, + base + >= 4.14 && < 5, + text + >= 1.2 && < 2.1, + deepseq + >= 1.1, + + hs-source-dirs: + . + + ghc-options: + -Wall + + exposed-modules: + Linters.Common |