summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorTobias Dammers <tdammers@gmail.com>2017-10-25 19:09:03 -0400
committerBen Gamari <ben@smart-cactus.org>2017-10-25 19:18:42 -0400
commitdbd81f7e86514498218572b9d978373b1699cc5b (patch)
tree5c4527f5405e67d0de0006fc07dd132162c3b96d
parent4c06ccb71737b77a8165e888ad75417a425549dd (diff)
downloadhaskell-dbd81f7e86514498218572b9d978373b1699cc5b.tar.gz
Factor out readField (#14364)
Improves compiler performance of deriving Read instances, as suggested in the issue. Additionally, we introduce `readSymField`, a companion to `readField` that parses symbol-type fields (where the field name is a symbol, e.g. `(#)`, rather than an alphanumeric identifier. The decision between these two functions is made a compile time, because we already know which one we need based on the field name. Reviewers: austin, hvr, bgamari, RyanGlScott Reviewed By: bgamari Subscribers: RyanGlScott, rwbarton, thomie Differential Revision: https://phabricator.haskell.org/D4108
-rw-r--r--compiler/prelude/PrelNames.hs4
-rw-r--r--compiler/typecheck/TcGenDeriv.hs41
-rw-r--r--libraries/base/GHC/Read.hs46
-rw-r--r--testsuite/tests/perf/compiler/all.T3
4 files changed, 75 insertions, 19 deletions
diff --git a/compiler/prelude/PrelNames.hs b/compiler/prelude/PrelNames.hs
index 760aea5e95..ae695d40e1 100644
--- a/compiler/prelude/PrelNames.hs
+++ b/compiler/prelude/PrelNames.hs
@@ -742,6 +742,10 @@ choose_RDR = varQual_RDR gHC_READ (fsLit "choose")
lexP_RDR = varQual_RDR gHC_READ (fsLit "lexP")
expectP_RDR = varQual_RDR gHC_READ (fsLit "expectP")
+readField_RDR, readSymField_RDR :: RdrName
+readField_RDR = varQual_RDR gHC_READ (fsLit "readField")
+readSymField_RDR = varQual_RDR gHC_READ (fsLit "readSymField")
+
punc_RDR, ident_RDR, symbol_RDR :: RdrName
punc_RDR = dataQual_RDR lEX (fsLit "Punc")
ident_RDR = dataQual_RDR lEX (fsLit "Ident")
diff --git a/compiler/typecheck/TcGenDeriv.hs b/compiler/typecheck/TcGenDeriv.hs
index 9e27ad5ca1..70ceb30305 100644
--- a/compiler/typecheck/TcGenDeriv.hs
+++ b/compiler/typecheck/TcGenDeriv.hs
@@ -900,9 +900,7 @@ instance Read T where
-- Record construction binds even more tightly than application
do expectP (Ident "T1")
expectP (Punc '{')
- expectP (Ident "f1")
- expectP (Punc '=')
- x <- ReadP.reset Read.readPrec
+ x <- Read.readField "f1" (ReadP.reset readPrec)
expectP (Punc '}')
return (T1 { f1 = x }))
+++
@@ -1068,21 +1066,28 @@ gen_Read_binds get_fixity loc tycon
read_arg a ty = ASSERT( not (isUnliftedType ty) )
noLoc (mkBindStmt (nlVarPat a) (nlHsVarApps step_RDR [readPrec_RDR]))
- read_field lbl a = read_lbl lbl ++
- [read_punc "=",
- noLoc (mkBindStmt (nlVarPat a) (nlHsVarApps reset_RDR [readPrec_RDR]))]
-
- -- When reading field labels we might encounter
- -- a = 3
- -- _a = 3
- -- or (#) = 4
- -- Note the parens!
- read_lbl lbl | isSym lbl_str
- = [read_punc "(", symbol_pat lbl_str, read_punc ")"]
- | otherwise
- = ident_h_pat lbl_str
- where
- lbl_str = unpackFS lbl
+ -- When reading field labels we might encounter
+ -- a = 3
+ -- _a = 3
+ -- or (#) = 4
+ -- Note the parens!
+ read_field lbl a =
+ [noLoc
+ (mkBindStmt
+ (nlVarPat a)
+ (nlHsApps
+ read_field
+ [ nlHsLit (mkHsString lbl_str)
+ , nlHsVarApps reset_RDR [readPrec_RDR]
+ ]
+ )
+ )
+ ]
+ where
+ lbl_str = unpackFS lbl
+ read_field
+ | isSym lbl_str = readSymField_RDR
+ | otherwise = readField_RDR
{-
************************************************************************
diff --git a/libraries/base/GHC/Read.hs b/libraries/base/GHC/Read.hs
index ad29cc5c40..2d8ee3de51 100644
--- a/libraries/base/GHC/Read.hs
+++ b/libraries/base/GHC/Read.hs
@@ -36,6 +36,8 @@ module GHC.Read
, choose
, readListDefault, readListPrecDefault
, readNumber
+ , readField
+ , readSymField
-- Temporary
, readParen
@@ -359,6 +361,50 @@ choose sps = foldr ((+++) . try_one) pfail sps
L.Symbol s' | s==s' -> p
_other -> pfail }
+-- See Note [Why readField]
+
+-- | 'Read' parser for a record field, of the form @fieldName=value@. The
+-- @fieldName@ must be an alphanumeric identifier; for symbols (operator-style)
+-- field names, e.g. @(#)@, use 'readSymField'). The second argument is a
+-- parser for the field value.
+readField :: String -> ReadPrec a -> ReadPrec a
+readField fieldName readVal = do
+ expectP (L.Ident fieldName)
+ expectP (L.Punc "=")
+ readVal
+{-# NOINLINE readField #-}
+
+-- See Note [Why readField]
+
+-- | 'Read' parser for a symbol record field, of the form @(###)=value@ (where
+-- @###@ is the field name). The field name must be a symbol (operator-style),
+-- e.g. @(#)@. For regular (alphanumeric) field names, use 'readField'. The
+-- second argument is a parser for the field value.
+readSymField :: String -> ReadPrec a -> ReadPrec a
+readSymField fieldName readVal = do
+ expectP (L.Punc "(")
+ expectP (L.Symbol fieldName)
+ expectP (L.Punc ")")
+ expectP (L.Punc "=")
+ readVal
+{-# NOINLINE readSymField #-}
+
+
+-- Note [Why readField]
+--
+-- Previousy, the code for automatically deriving Read instance (in
+-- typecheck/TcGenDeriv.hs) would generate inline code for parsing fields;
+-- this, however, turned out to produce massive amounts of intermediate code,
+-- and produced a considerable performance hit in the code generator.
+-- Since Read instances are not generally supposed to be perfomance critical,
+-- the readField and readSymField functions have been factored out, and the
+-- code generator now just generates calls rather than manually inlining the
+-- parsers. For large record types (e.g. 500 fields), this produces a
+-- significant performance boost.
+--
+-- See also Trac #14364.
+
+
--------------------------------------------------------------
-- Simple instances of Read
--------------------------------------------------------------
diff --git a/testsuite/tests/perf/compiler/all.T b/testsuite/tests/perf/compiler/all.T
index 41b2af8df9..aa53d989ce 100644
--- a/testsuite/tests/perf/compiler/all.T
+++ b/testsuite/tests/perf/compiler/all.T
@@ -197,7 +197,7 @@ test('T3294',
# 2013-11-13: 1478325844 (x86/Windows, 64bit machine)
# 2014-01-12: 1565185140 (x86/Linux)
# 2013-04-04: 1377050640 (x86/Windows, 64bit machine)
- (wordsize(64), 2253557280, 5)]),
+ (wordsize(64), 1858491504, 5)]),
# old: 1357587088 (amd64/Linux)
# 29/08/2012: 2961778696 (amd64/Linux)
# (^ increase due to new codegen, see #7198)
@@ -212,6 +212,7 @@ test('T3294',
# 2016-07-11: 2739731144 (Windows) after fix for #12227 (ignoring)
# 2017-02-17: 2758641264 (amd64/Linux) (Type indexed Typeable)
# 2017-05-14: 2253557280 (amd64/Linux) Two-pass CmmLayoutStack
+ # 2017-10-24: 1858491504 (amd64/Linux) Improved linear regAlloc
conf_3294,
# Use `+RTS -G1` for more stable residency measurements. Note [residency].