Skip to content

Commit 4bcd0c6

Browse files
authored
Faster ModSummary fingerprints (#1485)
* Faster ModSummary fingerprints The computation of these fingerprints was very suboptimal. This change: - Avoids calling Hdr.getOptions twice - Shares the relevant part of the fingerprint between GetModSummary and GetModSummaryWihoutTimestamps - skips the timestamps altogether, since we already fingerprint the preprocessed - buffer. - Avoids show in the fingerprint computation - Uses efficient fingerprint primitives * remove 64 bits assumption
1 parent d397ef4 commit 4bcd0c6

File tree

12 files changed

+106
-82
lines changed

12 files changed

+106
-82
lines changed

ghcide/src/Development/IDE/Core/Compile.hs

Lines changed: 28 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -113,11 +113,17 @@ import TcEnv (tcLookup)
113113
import Control.Concurrent.Extra
114114
import Control.Concurrent.STM hiding (orElse)
115115
import Data.Aeson (toJSON)
116+
import Data.Binary
117+
import Data.Binary.Put
118+
import Data.Bits (shiftR)
119+
import qualified Data.ByteString.Lazy as LBS
116120
import Data.Coerce
117121
import Data.Functor
118122
import qualified Data.HashMap.Strict as HashMap
119123
import Data.Tuple.Extra (dupe)
120124
import Data.Unique
125+
import Data.Word
126+
import Foreign.Marshal.Array (withArrayLen)
121127
import GHC.Fingerprint
122128
import qualified Language.LSP.Server as LSP
123129
import qualified Language.LSP.Types as LSP
@@ -691,9 +697,9 @@ getModSummaryFromImports
691697
-> FilePath
692698
-> UTCTime
693699
-> Maybe SB.StringBuffer
694-
-> ExceptT [FileDiagnostic] IO (ModSummary,[LImportDecl GhcPs])
700+
-> ExceptT [FileDiagnostic] IO ModSummaryResult
695701
getModSummaryFromImports env fp modTime contents = do
696-
(contents, dflags) <- preprocessor env fp contents
702+
(contents, opts, dflags) <- preprocessor env fp contents
697703

698704
-- The warns will hopefully be reported when we actually parse the module
699705
(_warns, L main_loc hsmod) <- parseHeader dflags fp contents
@@ -720,7 +726,7 @@ getModSummaryFromImports env fp modTime contents = do
720726
srcImports = map convImport src_idecls
721727
textualImports = map convImport (implicit_imports ++ ordinary_imps)
722728

723-
allImps = implicit_imports ++ imps
729+
msrImports = implicit_imports ++ imps
724730

725731
-- Force bits that might keep the string buffer and DynFlags alive unnecessarily
726732
liftIO $ evaluate $ rnf srcImports
@@ -730,7 +736,7 @@ getModSummaryFromImports env fp modTime contents = do
730736

731737
let modl = mkModule (thisPackage dflags) mod
732738
sourceType = if "-boot" `isSuffixOf` takeExtension fp then HsBootFile else HsSrcFile
733-
summary =
739+
msrModSummary =
734740
ModSummary
735741
{ ms_mod = modl
736742
#if MIN_GHC_API_VERSION(8,8,0)
@@ -749,7 +755,24 @@ getModSummaryFromImports env fp modTime contents = do
749755
, ms_srcimps = srcImports
750756
, ms_textual_imps = textualImports
751757
}
752-
return (summary, allImps)
758+
759+
msrFingerprint <- liftIO $ computeFingerprint opts msrModSummary
760+
return ModSummaryResult{..}
761+
where
762+
-- Compute a fingerprint from the contents of `ModSummary`,
763+
-- eliding the timestamps, the preprocessed source and other non relevant fields
764+
computeFingerprint opts ModSummary{..} = do
765+
let moduleUniques = runPut $ do
766+
put $ uniq $ moduleNameFS $ moduleName ms_mod
767+
forM_ (ms_srcimps ++ ms_textual_imps) $ \(mb_p, m) -> do
768+
put $ uniq $ moduleNameFS $ unLoc m
769+
whenJust mb_p $ put . uniq
770+
fingerPrintImports <- fingerprintFromByteString $ LBS.toStrict moduleUniques
771+
return $ fingerprintFingerprints $
772+
[ fingerprintString fp
773+
, fingerPrintImports
774+
] ++ map fingerprintString opts
775+
753776

754777
-- | Parse only the module header
755778
parseHeader

ghcide/src/Development/IDE/Core/Preprocessor.hs

Lines changed: 11 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -37,7 +37,7 @@ import System.IO.Extra
3737

3838
-- | Given a file and some contents, apply any necessary preprocessors,
3939
-- e.g. unlit/cpp. Return the resulting buffer and the DynFlags it implies.
40-
preprocessor :: HscEnv -> FilePath -> Maybe StringBuffer -> ExceptT [FileDiagnostic] IO (StringBuffer, DynFlags)
40+
preprocessor :: HscEnv -> FilePath -> Maybe StringBuffer -> ExceptT [FileDiagnostic] IO (StringBuffer, [String], DynFlags)
4141
preprocessor env filename mbContents = do
4242
-- Perform unlit
4343
(isOnDisk, contents) <-
@@ -51,10 +51,10 @@ preprocessor env filename mbContents = do
5151
return (isOnDisk, contents)
5252

5353
-- Perform cpp
54-
dflags <- ExceptT $ parsePragmasIntoDynFlags env filename contents
55-
(isOnDisk, contents, dflags) <-
54+
(opts, dflags) <- ExceptT $ parsePragmasIntoDynFlags env filename contents
55+
(isOnDisk, contents, opts, dflags) <-
5656
if not $ xopt LangExt.Cpp dflags then
57-
return (isOnDisk, contents, dflags)
57+
return (isOnDisk, contents, opts, dflags)
5858
else do
5959
cppLogs <- liftIO $ newIORef []
6060
contents <- ExceptT
@@ -67,16 +67,16 @@ preprocessor env filename mbContents = do
6767
[] -> throw e
6868
diags -> return $ Left diags
6969
)
70-
dflags <- ExceptT $ parsePragmasIntoDynFlags env filename contents
71-
return (False, contents, dflags)
70+
(opts, dflags) <- ExceptT $ parsePragmasIntoDynFlags env filename contents
71+
return (False, contents, opts, dflags)
7272

7373
-- Perform preprocessor
7474
if not $ gopt Opt_Pp dflags then
75-
return (contents, dflags)
75+
return (contents, opts, dflags)
7676
else do
7777
contents <- liftIO $ runPreprocessor dflags filename $ if isOnDisk then Nothing else Just contents
78-
dflags <- ExceptT $ parsePragmasIntoDynFlags env filename contents
79-
return (contents, dflags)
78+
(opts, dflags) <- ExceptT $ parsePragmasIntoDynFlags env filename contents
79+
return (contents, opts, dflags)
8080
where
8181
logAction :: IORef [CPPLog] -> LogAction
8282
logAction cppLogs dflags _reason severity srcSpan _style msg = do
@@ -135,7 +135,7 @@ parsePragmasIntoDynFlags
135135
:: HscEnv
136136
-> FilePath
137137
-> SB.StringBuffer
138-
-> IO (Either [FileDiagnostic] DynFlags)
138+
-> IO (Either [FileDiagnostic] ([String], DynFlags))
139139
parsePragmasIntoDynFlags env fp contents = catchSrcErrors dflags0 "pragmas" $ do
140140
let opts = Hdr.getOptions dflags0 contents fp
141141

@@ -144,7 +144,7 @@ parsePragmasIntoDynFlags env fp contents = catchSrcErrors dflags0 "pragmas" $ do
144144

145145
(dflags, _, _) <- parseDynamicFilePragma dflags0 opts
146146
dflags' <- initializePlugins env dflags
147-
return $ disableWarningsAsErrors dflags'
147+
return (map unLoc opts, disableWarningsAsErrors dflags')
148148
where dflags0 = hsc_dflags env
149149

150150
-- | Run (unlit) literate haskell preprocessor on a file, or buffer if set

ghcide/src/Development/IDE/Core/RuleTypes.hs

Lines changed: 16 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -44,6 +44,7 @@ import Development.IDE.Import.FindImports (ArtifactsLocation
4444
import Development.IDE.Spans.Common
4545
import Development.IDE.Spans.LocalBindings
4646
import Development.IDE.Types.Options (IdeGhcSession)
47+
import Fingerprint
4748
import GHC.Serialized (Serialized)
4849
import Language.LSP.Types (NormalizedFilePath)
4950
import TcRnMonad (TcGblEnv)
@@ -316,13 +317,24 @@ instance Binary IsFileOfInterestResult
316317

317318
type instance RuleResult IsFileOfInterest = IsFileOfInterestResult
318319

320+
data ModSummaryResult = ModSummaryResult
321+
{ msrModSummary :: !ModSummary
322+
, msrImports :: [LImportDecl GhcPs]
323+
, msrFingerprint :: !Fingerprint
324+
}
325+
326+
instance Show ModSummaryResult where
327+
show _ = "<ModSummaryResult>"
328+
instance NFData ModSummaryResult where
329+
rnf ModSummaryResult{..} =
330+
rnf msrModSummary `seq` rnf msrImports `seq` rnf msrFingerprint
331+
319332
-- | Generate a ModSummary that has enough information to be used to get .hi and .hie files.
320333
-- without needing to parse the entire source
321-
type instance RuleResult GetModSummary = (ModSummary,[LImportDecl GhcPs])
334+
type instance RuleResult GetModSummary = ModSummaryResult
322335

323-
-- | Generate a ModSummary with the timestamps elided,
324-
-- for more successful early cutoff
325-
type instance RuleResult GetModSummaryWithoutTimestamps = (ModSummary,[LImportDecl GhcPs])
336+
-- | Generate a ModSummary with the timestamps and preprocessed content elided, for more successful early cutoff
337+
type instance RuleResult GetModSummaryWithoutTimestamps = ModSummaryResult
326338

327339
data GetParsedModule = GetParsedModule
328340
deriving (Eq, Show, Typeable, Generic)

ghcide/src/Development/IDE/Core/Rules.hs

Lines changed: 22 additions & 40 deletions
Original file line numberDiff line numberDiff line change
@@ -143,9 +143,7 @@ import Data.Hashable
143143
import Data.IORef
144144
import qualified Data.Rope.UTF16 as Rope
145145
import Data.Time (UTCTime (..))
146-
import FastString (FastString (uniq))
147146
import GHC.IO.Encoding
148-
import qualified HeaderInfo as Hdr
149147
import Module
150148
import TcRnMonad (tcg_dependent_files)
151149

@@ -311,7 +309,7 @@ priorityFilesOfInterest = Priority (-2)
311309
-- GHC wiki about: https://gitlab.haskell.org/ghc/ghc/-/wikis/api-annotations
312310
getParsedModuleRule :: Rules ()
313311
getParsedModuleRule = defineEarlyCutoff $ \GetParsedModule file -> do
314-
(ms, _) <- use_ GetModSummary file
312+
ModSummaryResult{msrModSummary = ms} <- use_ GetModSummary file
315313
sess <- use_ GhcSession file
316314
let hsc = hscEnv sess
317315
opt <- getIdeOptions
@@ -376,7 +374,7 @@ mergeParseErrorsHaddock normal haddock = normal ++
376374
-- So it is suitable for use cases where you need a perfect edit.
377375
getParsedModuleWithCommentsRule :: Rules ()
378376
getParsedModuleWithCommentsRule = defineEarlyCutoff $ \GetParsedModuleWithComments file -> do
379-
(ms, _) <- use_ GetModSummary file
377+
ModSummaryResult{msrModSummary = ms} <- use_ GetModSummary file
380378
sess <- use_ GhcSession file
381379
opt <- getIdeOptions
382380

@@ -397,7 +395,7 @@ getParsedModuleDefinition packageState opt file ms = do
397395
getLocatedImportsRule :: Rules ()
398396
getLocatedImportsRule =
399397
define $ \GetLocatedImports file -> do
400-
(ms,_) <- use_ GetModSummaryWithoutTimestamps file
398+
ModSummaryResult{msrModSummary = ms} <- use_ GetModSummaryWithoutTimestamps file
401399
targets <- useNoFile_ GetKnownTargets
402400
let imports = [(False, imp) | imp <- ms_textual_imps ms] ++ [(True, imp) | imp <- ms_srcimps ms]
403401
env_eq <- use_ GhcSession file
@@ -442,7 +440,7 @@ rawDependencyInformation fs = do
442440
return (rdi { rawBootMap = bm })
443441
where
444442
goPlural ff = do
445-
mss <- lift $ (fmap.fmap) fst <$> uses GetModSummaryWithoutTimestamps ff
443+
mss <- lift $ (fmap.fmap) msrModSummary <$> uses GetModSummaryWithoutTimestamps ff
446444
zipWithM go ff mss
447445

448446
go :: NormalizedFilePath -- ^ Current module being processed
@@ -563,7 +561,7 @@ reportImportCyclesRule =
563561
where rng = fromMaybe noRange $ srcSpanToRange (getLoc imp)
564562
fp = toNormalizedFilePath' $ fromMaybe noFilePath $ srcSpanToFilename (getLoc imp)
565563
getModuleName file = do
566-
ms <- fst <$> use_ GetModSummaryWithoutTimestamps file
564+
ms <- msrModSummary <$> use_ GetModSummaryWithoutTimestamps file
567565
pure (moduleNameString . moduleName . ms_mod $ ms)
568566
showCycle mods = T.intercalate ", " (map T.pack mods)
569567

@@ -769,7 +767,7 @@ ghcSessionDepsDefinition :: NormalizedFilePath -> Action (IdeResult HscEnvEq)
769767
ghcSessionDepsDefinition file = do
770768
env <- use_ GhcSession file
771769
let hsc = hscEnv env
772-
(ms,_) <- use_ GetModSummaryWithoutTimestamps file
770+
ms <- msrModSummary <$> use_ GetModSummaryWithoutTimestamps file
773771
deps <- use_ GetDependencies file
774772
let tdeps = transitiveModuleDeps deps
775773
uses_th_qq =
@@ -793,7 +791,7 @@ ghcSessionDepsDefinition file = do
793791
-- This rule also ensures that the `.hie` and `.o` (if needed) files are written out.
794792
getModIfaceFromDiskRule :: Rules ()
795793
getModIfaceFromDiskRule = defineEarlyCutoff $ \GetModIfaceFromDisk f -> do
796-
(ms,_) <- use_ GetModSummary f
794+
ms <- msrModSummary <$> use_ GetModSummary f
797795
(diags_session, mb_session) <- ghcSessionDepsDefinition f
798796
case mb_session of
799797
Nothing -> return (Nothing, (diags_session, Nothing))
@@ -850,7 +848,7 @@ getModIfaceFromDiskAndIndexRule = defineEarlyCutoff $ \GetModIfaceFromDiskAndInd
850848

851849
isHiFileStableRule :: Rules ()
852850
isHiFileStableRule = defineEarlyCutoff $ \IsHiFileStable f -> do
853-
(ms,_) <- use_ GetModSummaryWithoutTimestamps f
851+
ms <- msrModSummary <$> use_ GetModSummaryWithoutTimestamps f
854852
let hiFile = toNormalizedFilePath'
855853
$ ml_hi_file $ ms_location ms
856854
mbHiVersion <- use GetModificationTime_{missingFileDiagnostics=False} hiFile
@@ -873,47 +871,30 @@ getModSummaryRule :: Rules ()
873871
getModSummaryRule = do
874872
defineEarlyCutoff $ \GetModSummary f -> do
875873
session <- hscEnv <$> use_ GhcSession f
876-
let dflags = hsc_dflags session
877874
(modTime, mFileContent) <- getFileContents f
878875
let fp = fromNormalizedFilePath f
879876
modS <- liftIO $ runExceptT $
880877
getModSummaryFromImports session fp modTime (textToStringBuffer <$> mFileContent)
881878
case modS of
882-
Right res@(ms,_) -> do
883-
let fingerPrint = hash (computeFingerprint f (fromJust $ ms_hspp_buf ms) dflags ms, hashUTC modTime)
884-
return ( Just (BS.pack $ show fingerPrint) , ([], Just res))
879+
Right res -> do
880+
bufFingerPrint <- liftIO $
881+
fingerprintFromStringBuffer $ fromJust $ ms_hspp_buf $ msrModSummary res
882+
let fingerPrint = fingerprintFingerprints
883+
[ msrFingerprint res, bufFingerPrint ]
884+
return ( Just (fingerprintToBS fingerPrint) , ([], Just res))
885885
Left diags -> return (Nothing, (diags, Nothing))
886886

887887
defineEarlyCutoff $ \GetModSummaryWithoutTimestamps f -> do
888888
ms <- use GetModSummary f
889889
case ms of
890-
Just res@(msWithTimestamps,_) -> do
891-
let ms = msWithTimestamps {
890+
Just res@ModSummaryResult{..} -> do
891+
let ms = msrModSummary {
892892
ms_hs_date = error "use GetModSummary instead of GetModSummaryWithoutTimestamps",
893893
ms_hspp_buf = error "use GetModSummary instead of GetModSummaryWithoutTimestamps"
894894
}
895-
dflags <- hsc_dflags . hscEnv <$> use_ GhcSession f
896-
let fp = BS.pack $ show $ hash (computeFingerprint f (fromJust $ ms_hspp_buf msWithTimestamps) dflags ms)
897-
return (Just fp, ([], Just res))
895+
fp = fingerprintToBS msrFingerprint
896+
return (Just fp, ([], Just res{msrModSummary = ms}))
898897
Nothing -> return (Nothing, ([], Nothing))
899-
where
900-
-- Compute a fingerprint from the contents of `ModSummary`,
901-
-- eliding the timestamps and other non relevant fields.
902-
computeFingerprint f sb dflags ModSummary{..} =
903-
let fingerPrint =
904-
( moduleNameString (moduleName ms_mod)
905-
, ms_hspp_file
906-
, map unLoc opts
907-
, ml_hs_file ms_location
908-
, fingerPrintImports ms_srcimps
909-
, fingerPrintImports ms_textual_imps
910-
)
911-
fingerPrintImports = map (fmap uniq *** (moduleNameString . unLoc))
912-
opts = Hdr.getOptions dflags sb (fromNormalizedFilePath f)
913-
in fingerPrint
914-
915-
hashUTC UTCTime{..} = (fromEnum utctDay, fromEnum utctDayTime)
916-
917898

918899
generateCore :: RunSimplifier -> NormalizedFilePath -> Action (IdeResult ModGuts)
919900
generateCore runSimplifier file = do
@@ -1074,9 +1055,10 @@ needsCompilationRule = defineEarlyCutoff $ \NeedsCompilation file -> do
10741055
-- that we just threw away, and thus have to recompile all dependencies once
10751056
-- again, this time keeping the object code.
10761057
-- A file needs to be compiled if any file that depends on it uses TemplateHaskell or needs to be compiled
1077-
(ms,_) <- fst <$> useWithStale_ GetModSummaryWithoutTimestamps file
1078-
(modsums,needsComps) <- par (map (fmap (fst . fst)) <$> usesWithStale GetModSummaryWithoutTimestamps revdeps)
1079-
(uses NeedsCompilation revdeps)
1058+
ms <- msrModSummary . fst <$> useWithStale_ GetModSummaryWithoutTimestamps file
1059+
(modsums,needsComps) <-
1060+
par (map (fmap (msrModSummary . fst)) <$> usesWithStale GetModSummaryWithoutTimestamps revdeps)
1061+
(uses NeedsCompilation revdeps)
10801062
pure $ computeLinkableType ms modsums (map join needsComps)
10811063

10821064
pure (Just $ BS.pack $ show $ hash res, ([], Just res))

ghcide/src/Development/IDE/GHC/Util.hs

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@ module Development.IDE.GHC.Util(
1919
moduleImportPath,
2020
cgGutsToCoreModule,
2121
fingerprintToBS,
22+
fingerprintFromByteString,
2223
fingerprintFromStringBuffer,
2324
-- * General utilities
2425
readFileUtf8,
@@ -200,6 +201,11 @@ fingerprintFromStringBuffer :: StringBuffer -> IO Fingerprint
200201
fingerprintFromStringBuffer (StringBuffer buf len cur) =
201202
withForeignPtr buf $ \ptr -> fingerprintData (ptr `plusPtr` cur) len
202203

204+
fingerprintFromByteString :: ByteString -> IO Fingerprint
205+
fingerprintFromByteString bs = do
206+
let (fptr, offset, len) = BS.toForeignPtr bs
207+
withForeignPtr fptr $ \ptr ->
208+
fingerprintData (ptr `plusPtr` offset) len
203209

204210
-- | A slightly modified version of 'hDuplicateTo' from GHC.
205211
-- Importantly, it avoids the bug listed in https://gitlab.haskell.org/ghc/ghc/merge_requests/2318.

ghcide/src/Development/IDE/Plugin/Completions.hs

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -67,14 +67,14 @@ produceCompletions = do
6767
sess <- fmap fst <$> useWithStale GhcSessionDeps file
6868

6969
case (ms, sess) of
70-
(Just (ms,imps), Just sess) -> do
70+
(Just ModSummaryResult{..}, Just sess) -> do
7171
let env = hscEnv sess
7272
-- We do this to be able to provide completions of items that are not restricted to the explicit list
73-
(global, inScope) <- liftIO $ tcRnImportDecls env (dropListFromImportDecl <$> imps) `concurrently` tcRnImportDecls env imps
73+
(global, inScope) <- liftIO $ tcRnImportDecls env (dropListFromImportDecl <$> msrImports) `concurrently` tcRnImportDecls env msrImports
7474
case (global, inScope) of
7575
((_, Just globalEnv), (_, Just inScopeEnv)) -> do
7676
let uri = fromNormalizedUri $ normalizedFilePathToUri file
77-
cdata <- liftIO $ cacheDataProducer uri sess (ms_mod ms) globalEnv inScopeEnv imps
77+
cdata <- liftIO $ cacheDataProducer uri sess (ms_mod msrModSummary) globalEnv inScopeEnv msrImports
7878
return ([], Just cdata)
7979
(_diag, _) ->
8080
return ([], Nothing)
@@ -172,17 +172,17 @@ extendImportHandler' ideState ExtendImport {..}
172172
| Just fp <- uriToFilePath doc,
173173
nfp <- toNormalizedFilePath' fp =
174174
do
175-
(ms, ps, imps) <- MaybeT $ liftIO $
175+
(ModSummaryResult {..}, ps) <- MaybeT $ liftIO $
176176
runAction "extend import" ideState $
177177
runMaybeT $ do
178178
-- We want accurate edits, so do not use stale data here
179-
(ms, imps) <- MaybeT $ use GetModSummaryWithoutTimestamps nfp
179+
msr <- MaybeT $ use GetModSummaryWithoutTimestamps nfp
180180
ps <- MaybeT $ use GetAnnotatedParsedSource nfp
181-
return (ms, ps, imps)
182-
let df = ms_hspp_opts ms
181+
return (msr, ps)
182+
let df = ms_hspp_opts msrModSummary
183183
wantedModule = mkModuleName (T.unpack importName)
184184
wantedQual = mkModuleName . T.unpack <$> importQual
185-
imp <- liftMaybe $ find (isWantedModule wantedModule wantedQual) imps
185+
imp <- liftMaybe $ find (isWantedModule wantedModule wantedQual) msrImports
186186
fmap (nfp,) $ liftEither $
187187
rewriteToWEdit df doc (annsA ps) $
188188
extendImport (T.unpack <$> thingParent) (T.unpack newThing) imp

plugins/default/src/Ide/Plugin/Brittany.hs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -38,7 +38,7 @@ provider ide typ contents nfp opts = liftIO $ do
3838
let (range, selectedContents) = case typ of
3939
FormatText -> (fullRange contents, contents)
4040
FormatRange r -> (normalize r, extractRange r contents)
41-
(modsum, _) <- runAction "brittany" ide $ use_ GetModSummary nfp
41+
modsum <- fmap msrModSummary $ runAction "brittany" ide $ use_ GetModSummaryWithoutTimestamps nfp
4242
let dflags = ms_hspp_opts modsum
4343
let withRuntimeLibdir = bracket_ (setEnv key $ topDir dflags) (unsetEnv key)
4444
where key = "GHC_EXACTPRINT_GHC_LIBDIR"

0 commit comments

Comments
 (0)