summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorNiklas Hambüchen <mail@nh2.me>2013-08-22 11:05:56 +0900
committerAustin Seipp <aseipp@pobox.com>2013-08-22 16:25:02 -0500
commit677820ee3a3aadbf2ed414deb3926381d94b13a8 (patch)
treed0e19887f49fca9b339310d4a395bebf396f2afb
parent16ae2f0c3ff45e0c78b90ae0761a0f86c70188bd (diff)
downloadhaskell-677820ee3a3aadbf2ed414deb3926381d94b13a8.tar.gz
Fix interface hashes including time stamp of dependent files.
Fixes #8144. Before, the modification time of e.g. #included files (and everything that ends up as a UsageFile, e.g. via addDependentFile) was taken as input for the interface hash of a module. This lead to different hashes for identical inputs on every compilation. We now use file content hashes instead. This changes the interface file format. You will get "Binary.get(Usage): 50" when you try to do an incremental using .hi files that were created with a GHC 7.7 (only) older than this commit. To calculate the md5 hash (`Fingerprint`) of a file in constant space, there now is GHC.Fingerprint.getFileHash, and a fallback version for older GHCs that needs to load the file into memory completely (only used when compiling stage1 with an older GHC). Signed-off-by: Austin Seipp <aseipp@pobox.com>
-rw-r--r--compiler/iface/MkIface.lhs18
-rw-r--r--compiler/main/HscTypes.lhs10
-rw-r--r--compiler/utils/Fingerprint.hsc31
3 files changed, 43 insertions, 16 deletions
diff --git a/compiler/iface/MkIface.lhs b/compiler/iface/MkIface.lhs
index 5819964f2e..4c78955f31 100644
--- a/compiler/iface/MkIface.lhs
+++ b/compiler/iface/MkIface.lhs
@@ -883,16 +883,16 @@ mkUsageInfo :: HscEnv -> Module -> ImportedMods -> NameSet -> [FilePath] -> IO [
mkUsageInfo hsc_env this_mod dir_imp_mods used_names dependent_files
= do
eps <- hscEPS hsc_env
- mtimes <- mapM getModificationUTCTime dependent_files
+ hashes <- mapM getFileHash dependent_files
let mod_usages = mk_mod_usage_info (eps_PIT eps) hsc_env this_mod
dir_imp_mods used_names
- let usages = mod_usages ++ map to_file_usage (zip dependent_files mtimes)
- usages `seqList` return usages
+ let usages = mod_usages ++ [ UsageFile { usg_file_path = f
+ , usg_file_hash = hash }
+ | (f, hash) <- zip dependent_files hashes ]
+ usages `seqList` return usages
-- seq the list of Usages returned: occasionally these
-- don't get evaluated for a while and we can end up hanging on to
-- the entire collection of Ifaces.
- where
- to_file_usage (f, mtime) = UsageFile { usg_file_path = f, usg_mtime = mtime }
mk_mod_usage_info :: PackageIfaceTable
-> HscEnv
@@ -1343,15 +1343,15 @@ checkModUsage this_pkg UsageHomeModule{
checkModUsage _this_pkg UsageFile{ usg_file_path = file,
- usg_mtime = old_mtime } =
+ usg_file_hash = old_hash } =
liftIO $
handleIO handle $ do
- new_mtime <- getModificationUTCTime file
- if (old_mtime /= new_mtime)
+ new_hash <- getFileHash file
+ if (old_hash /= new_hash)
then return recomp
else return UpToDate
where
- recomp = RecompBecause (file ++ " time stamp changed")
+ recomp = RecompBecause (file ++ " changed")
handle =
#ifdef DEBUG
\e -> pprTrace "UsageFile" (text (show e)) $ return recomp
diff --git a/compiler/main/HscTypes.lhs b/compiler/main/HscTypes.lhs
index e022ae3eae..07e78f3883 100644
--- a/compiler/main/HscTypes.lhs
+++ b/compiler/main/HscTypes.lhs
@@ -1796,7 +1796,7 @@ data Usage
} -- ^ Module from the current package
| UsageFile {
usg_file_path :: FilePath,
- usg_mtime :: UTCTime
+ usg_file_hash :: Fingerprint
-- ^ External file dependency. From a CPP #include or TH addDependentFile. Should be absolute.
}
deriving( Eq )
@@ -1831,7 +1831,7 @@ instance Binary Usage where
put_ bh usg@UsageFile{} = do
putByte bh 2
put_ bh (usg_file_path usg)
- put_ bh (usg_mtime usg)
+ put_ bh (usg_file_hash usg)
get bh = do
h <- getByte bh
@@ -1850,9 +1850,9 @@ instance Binary Usage where
return UsageHomeModule { usg_mod_name = nm, usg_mod_hash = mod,
usg_exports = exps, usg_entities = ents, usg_safe = safe }
2 -> do
- fp <- get bh
- mtime <- get bh
- return UsageFile { usg_file_path = fp, usg_mtime = mtime }
+ fp <- get bh
+ hash <- get bh
+ return UsageFile { usg_file_path = fp, usg_file_hash = hash }
i -> error ("Binary.get(Usage): " ++ show i)
\end{code}
diff --git a/compiler/utils/Fingerprint.hsc b/compiler/utils/Fingerprint.hsc
index 95f31c08bb..895906a3c6 100644
--- a/compiler/utils/Fingerprint.hsc
+++ b/compiler/utils/Fingerprint.hsc
@@ -8,18 +8,22 @@
--
-- ----------------------------------------------------------------------------
-{-# OPTIONS_GHC -fno-warn-orphans #-}
module Fingerprint (
Fingerprint(..), fingerprint0,
readHexFingerprint,
fingerprintData,
- fingerprintString
+ fingerprintString,
+ -- Re-exported rom GHC.Fingerprint for GHC >= 7.7, local otherwise
+ getFileHash
) where
#include "md5.h"
##include "HsVersions.h"
import Numeric ( readHex )
+import Foreign
+import Panic
+import System.IO
import GHC.Fingerprint
@@ -30,3 +34,26 @@ readHexFingerprint s = Fingerprint w1 w2
[(w1,"")] = readHex s1
[(w2,"")] = readHex (take 16 s2)
+
+#if __GLASGOW_HASKELL__ < 707
+-- Only use this if we're smaller than GHC 7.7, otherwise
+-- GHC.Fingerprint exports a better version of this function.
+
+-- | Computes the hash of a given file.
+-- It loads the full file into memory an does not work with files bigger than
+-- MAXINT.
+getFileHash :: FilePath -> IO Fingerprint
+getFileHash path = withBinaryFile path ReadMode $ \h -> do
+
+ fileSize <- toIntFileSize `fmap` hFileSize h
+
+ allocaBytes fileSize (\bufPtr -> fingerprintData bufPtr fileSize)
+
+ where
+ toIntFileSize :: Integer -> Int
+ toIntFileSize size
+ | size > fromIntegral (maxBound :: Int) = throwGhcException $
+ Sorry $ "Fingerprint.getFileHash: Tried to calculate hash of file "
+ ++ path ++ " with size > maxBound :: Int. This is not supported."
+ | otherwise = fromIntegral size
+#endif