Skip to content

Commit e1f73a4

Browse files
authored
Merge pull request #10112 from mpickering/wip/10110
perf: Group together packages by repo when verifying tarballs
2 parents 3169b87 + 7d46115 commit e1f73a4

File tree

4 files changed

+85
-52
lines changed

4 files changed

+85
-52
lines changed

.hlint.yaml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -92,6 +92,7 @@
9292
- ignore: {name: "Use unwords"} # 8 hints
9393
- ignore: {name: "Use void"} # 22 hints
9494
- ignore: {name: "Use when"} # 1 hint
95+
- ignore: {name: "Use uncurry"} # 1 hint
9596

9697
- arguments:
9798
- --ignore-glob=Cabal-syntax/src/Distribution/Fields/Lexer.hs

cabal-install/src/Distribution/Client/FetchUtils.hs

Lines changed: 62 additions & 35 deletions
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,7 @@ module Distribution.Client.FetchUtils
2525
-- ** specifically for repo packages
2626
, checkRepoTarballFetched
2727
, fetchRepoTarball
28-
, verifyFetchedTarball
28+
, verifyFetchedTarballs
2929

3030
-- ** fetching packages asynchronously
3131
, asyncFetchPackages
@@ -98,6 +98,7 @@ import System.IO
9898
, openTempFile
9999
)
100100

101+
import Control.Monad (forM)
101102
import Distribution.Client.Errors
102103
import qualified Hackage.Security.Client as Sec
103104
import qualified Hackage.Security.Util.Checked as Sec
@@ -152,40 +153,66 @@ checkRepoTarballFetched repo pkgid = do
152153
then return (Just file)
153154
else return Nothing
154155

155-
verifyFetchedTarball :: Verbosity -> RepoContext -> Repo -> PackageId -> IO Bool
156-
verifyFetchedTarball verbosity repoCtxt repo pkgid =
157-
let file = packageFile repo pkgid
158-
handleError :: IO Bool -> IO Bool
159-
handleError act = do
160-
res <- Safe.try act
161-
case res of
162-
Left e -> warn verbosity ("Error verifying fetched tarball " ++ file ++ ", will redownload: " ++ show (e :: SomeException)) >> pure False
163-
Right b -> pure b
164-
in handleError $ do
165-
exists <- doesFileExist file
166-
if not exists
167-
then return True -- if the file does not exist, it vacuously passes validation, since it will be downloaded as necessary with what we will then check is a valid hash.
168-
else case repo of
169-
-- a secure repo has hashes we can compare against to confirm this is the correct file.
170-
RepoSecure{} ->
171-
repoContextWithSecureRepo repoCtxt repo $ \repoSecure ->
172-
Sec.withIndex repoSecure $ \callbacks ->
173-
let warnAndFail s = warn verbosity ("Fetched tarball " ++ file ++ " does not match server, will redownload: " ++ s) >> return False
174-
in -- the do block in parens is due to dealing with the checked exceptions mechanism.
175-
( do
176-
fileInfo <- Sec.indexLookupFileInfo callbacks pkgid
177-
sz <- Sec.FileLength . fromInteger <$> getFileSize file
178-
if sz /= Sec.fileInfoLength (Sec.trusted fileInfo)
179-
then warnAndFail "file length mismatch"
180-
else do
181-
res <- Sec.compareTrustedFileInfo (Sec.trusted fileInfo) <$> Sec.computeFileInfo (Sec.Path file :: Sec.Path Sec.Absolute)
182-
if res
183-
then pure True
184-
else warnAndFail "file hash mismatch"
185-
)
186-
`Sec.catchChecked` (\(e :: Sec.InvalidPackageException) -> warnAndFail (show e))
187-
`Sec.catchChecked` (\(e :: Sec.VerificationError) -> warnAndFail (show e))
188-
_ -> pure True
156+
verifyFetchedTarballs
157+
:: Verbosity
158+
-> RepoContext
159+
-> Repo
160+
-> [PackageId]
161+
-> IO
162+
( [ Either
163+
(Repo, PackageId) -- Verified
164+
(Repo, PackageId) -- unverified)
165+
]
166+
)
167+
verifyFetchedTarballs verbosity repoCtxt repo pkgids =
168+
-- Establish the context once per repo (see #10110), this codepath is important
169+
-- to be fast as it can happen when no other building happens.
170+
let establishContext k =
171+
case repo of
172+
RepoSecure{} ->
173+
repoContextWithSecureRepo repoCtxt repo $ \repoSecure ->
174+
Sec.withIndex repoSecure $ \callbacks -> k (Just callbacks)
175+
_ -> k Nothing
176+
in do
177+
establishContext $ \mCallbacks ->
178+
forM pkgids $ \pkgid -> do
179+
let file = packageFile repo pkgid
180+
res <- verifyFetchedTarball verbosity file mCallbacks pkgid
181+
return $ if res then Left (repo, pkgid) else Right (repo, pkgid)
182+
183+
verifyFetchedTarball :: Verbosity -> FilePath -> Maybe Sec.IndexCallbacks -> PackageId -> IO Bool
184+
verifyFetchedTarball verbosity file mCallbacks pkgid =
185+
let
186+
handleError :: IO Bool -> IO Bool
187+
handleError act = do
188+
res <- Safe.try act
189+
case res of
190+
Left e -> warn verbosity ("Error verifying fetched tarball " ++ file ++ ", will redownload: " ++ show (e :: SomeException)) >> pure False
191+
Right b -> pure b
192+
in
193+
handleError $ do
194+
exists <- doesFileExist file
195+
if not exists
196+
then return True -- if the file does not exist, it vacuously passes validation, since it will be downloaded as necessary with what we will then check is a valid hash.
197+
else case mCallbacks of
198+
-- a secure repo has hashes we can compare against to confirm this is the correct file.
199+
Just callbacks ->
200+
let warnAndFail s = warn verbosity ("Fetched tarball " ++ file ++ " does not match server, will redownload: " ++ s) >> return False
201+
in -- the do block in parens is due to dealing with the checked exceptions mechanism.
202+
( do
203+
fileInfo <- Sec.indexLookupFileInfo callbacks pkgid
204+
sz <- Sec.FileLength . fromInteger <$> getFileSize file
205+
if sz /= Sec.fileInfoLength (Sec.trusted fileInfo)
206+
then warnAndFail "file length mismatch"
207+
else do
208+
res <- Sec.compareTrustedFileInfo (Sec.trusted fileInfo) <$> Sec.computeFileInfo (Sec.Path file :: Sec.Path Sec.Absolute)
209+
if res
210+
then pure True
211+
else warnAndFail "file hash mismatch"
212+
)
213+
`Sec.catchChecked` (\(e :: Sec.InvalidPackageException) -> warnAndFail (show e))
214+
`Sec.catchChecked` (\(e :: Sec.VerificationError) -> warnAndFail (show e))
215+
_ -> pure True
189216

190217
-- | Fetch a package if we don't have it already.
191218
fetchPackage

cabal-install/src/Distribution/Client/ProjectPlanning.hs

Lines changed: 21 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -127,7 +127,7 @@ import Distribution.Client.SetupWrapper
127127
import Distribution.Client.Store
128128
import Distribution.Client.Targets (userToPackageConstraint)
129129
import Distribution.Client.Types
130-
import Distribution.Client.Utils (incVersion)
130+
import Distribution.Client.Utils (concatMapM, incVersion)
131131

132132
import qualified Distribution.Client.BuildReports.Storage as BuildReports
133133
import qualified Distribution.Client.IndexUtils as IndexUtils
@@ -206,7 +206,7 @@ import qualified Distribution.Solver.Types.ComponentDeps as CD
206206
import qualified Distribution.Compat.Graph as Graph
207207

208208
import Control.Exception (assert)
209-
import Control.Monad (forM, sequence)
209+
import Control.Monad (sequence)
210210
import Control.Monad.IO.Class (liftIO)
211211
import Control.Monad.State as State (State, execState, runState, state)
212212
import Data.Foldable (fold)
@@ -1069,25 +1069,29 @@ getPackageSourceHashes verbosity withRepoCtx solverPlan = do
10691069
-- Tarballs from repositories, either where the repository provides
10701070
-- hashes as part of the repo metadata, or where we will have to
10711071
-- download and hash the tarball.
1072-
repoTarballPkgsWithMetadataUnvalidated :: [(PackageId, Repo)]
1073-
repoTarballPkgsWithoutMetadata :: [(PackageId, Repo)]
1072+
repoTarballPkgsWithMetadataUnvalidated :: [(Repo, [PackageId])]
1073+
repoTarballPkgsWithoutMetadata :: [(Repo, PackageId)]
10741074
( repoTarballPkgsWithMetadataUnvalidated
10751075
, repoTarballPkgsWithoutMetadata
10761076
) =
10771077
partitionEithers
10781078
[ case repo of
1079-
RepoSecure{} -> Left (pkgid, repo)
1080-
_ -> Right (pkgid, repo)
1079+
RepoSecure{} -> Left (repo, [pkgid])
1080+
_ -> Right (repo, pkgid)
10811081
| (pkgid, RepoTarballPackage repo _ _) <- allPkgLocations
10821082
]
10831083

1084+
-- Group up the unvalidated packages by repo so we only read the remote
1085+
-- index once per repo (see #10110). The packages are ungrouped here and then regrouped
1086+
-- below, it would be better in future to refactor this whole code path so that we don't
1087+
-- repeatedly group and ungroup.
1088+
repoTarballPkgsWithMetadataUnvalidatedMap = Map.fromListWith (++) repoTarballPkgsWithMetadataUnvalidated
1089+
10841090
(repoTarballPkgsWithMetadata, repoTarballPkgsToDownloadWithMeta) <- fmap partitionEithers $
10851091
liftIO $
1086-
withRepoCtx $ \repoctx -> forM repoTarballPkgsWithMetadataUnvalidated $
1087-
\x@(pkg, repo) ->
1088-
verifyFetchedTarball verbosity repoctx repo pkg >>= \b -> case b of
1089-
True -> return $ Left x
1090-
False -> return $ Right x
1092+
withRepoCtx $ \repoctx -> flip concatMapM (Map.toList repoTarballPkgsWithMetadataUnvalidatedMap) $
1093+
\(repo, pkgids) ->
1094+
verifyFetchedTarballs verbosity repoctx repo pkgids
10911095

10921096
-- For tarballs from repos that do not have hashes available we now have
10931097
-- to check if the packages were downloaded already.
@@ -1101,9 +1105,9 @@ getPackageSourceHashes verbosity withRepoCtx solverPlan = do
11011105
[ do
11021106
mtarball <- checkRepoTarballFetched repo pkgid
11031107
case mtarball of
1104-
Nothing -> return (Left (pkgid, repo))
1108+
Nothing -> return (Left (repo, pkgid))
11051109
Just tarball -> return (Right (pkgid, tarball))
1106-
| (pkgid, repo) <- repoTarballPkgsWithoutMetadata
1110+
| (repo, pkgid) <- repoTarballPkgsWithoutMetadata
11071111
]
11081112

11091113
let repoTarballPkgsToDownload = repoTarballPkgsToDownloadWithMeta ++ repoTarballPkgsToDownloadWithNoMeta
@@ -1139,9 +1143,9 @@ getPackageSourceHashes verbosity withRepoCtx solverPlan = do
11391143
| pkgid <- pkgids
11401144
]
11411145
| (repo, pkgids) <-
1142-
map (\grp@((_, repo) :| _) -> (repo, map fst (NE.toList grp)))
1143-
. NE.groupBy ((==) `on` (remoteRepoName . repoRemote . snd))
1144-
. sortBy (compare `on` (remoteRepoName . repoRemote . snd))
1146+
map (\grp@((repo, _) :| _) -> (repo, map snd (NE.toList grp)))
1147+
. NE.groupBy ((==) `on` (remoteRepoName . repoRemote . fst))
1148+
. sortBy (compare `on` (remoteRepoName . repoRemote . fst))
11451149
$ repoTarballPkgsWithMetadata
11461150
]
11471151

@@ -1153,7 +1157,7 @@ getPackageSourceHashes verbosity withRepoCtx solverPlan = do
11531157
[ do
11541158
tarball <- fetchRepoTarball verbosity repoctx repo pkgid
11551159
return (pkgid, tarball)
1156-
| (pkgid, repo) <- repoTarballPkgsToDownload
1160+
| (repo, pkgid) <- repoTarballPkgsToDownload
11571161
]
11581162

11591163
return

cabal-install/src/Distribution/Client/Utils.hs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -38,6 +38,7 @@ module Distribution.Client.Utils
3838
, listFilesInside
3939
, safeRead
4040
, hasElem
41+
, concatMapM
4142
, occursOnlyOrBefore
4243
, giveRTSWarning
4344
) where

0 commit comments

Comments
 (0)