11{-# LANGUAGE RecordWildCards, NamedFieldPuns, RecursiveDo #-}
2+ {-# LANGUAGE TupleSections #-}
23
34module Distribution.Server.Features.Sitemap (
45 SitemapFeature (.. )
@@ -28,6 +29,10 @@ import Network.URI
2829import Control.DeepSeq
2930import Text.Read
3031import Data.List.Split
32+ import Distribution.Server.Framework.BlobStorage
33+ import Distribution.Server.Features.TarIndexCache
34+ import qualified Data.TarIndex as Tar
35+ import System.FilePath (takeExtension )
3136
3237data Sitemap
3338 = Sitemap
@@ -52,6 +57,7 @@ initSitemapFeature :: ServerEnv
5257 -> IO ( CoreFeature
5358 -> DocumentationFeature
5459 -> TagsFeature
60+ -> TarIndexCacheFeature
5561 -> IO SitemapFeature )
5662
5763initSitemapFeature env@ ServerEnv { serverCacheDelay,
@@ -60,10 +66,11 @@ initSitemapFeature env@ServerEnv{ serverCacheDelay,
6066
6167 return $ \ coref@ CoreFeature {.. }
6268 docsCore@ DocumentationFeature {.. }
63- tagsf@ TagsFeature {.. } -> do
69+ tagsf@ TagsFeature {.. }
70+ tarf@ TarIndexCacheFeature {.. } -> do
6471
6572 rec let (feature, updateSitemapCache) =
66- sitemapFeature env coref docsCore tagsf
73+ sitemapFeature env coref docsCore tagsf tarf
6774 initTime sitemapCache
6875
6976 sitemapCache <- newAsyncCacheNF updateSitemapCache
@@ -80,13 +87,15 @@ sitemapFeature :: ServerEnv
8087 -> CoreFeature
8188 -> DocumentationFeature
8289 -> TagsFeature
90+ -> TarIndexCacheFeature
8391 -> UTCTime
8492 -> AsyncCache Sitemap
8593 -> (SitemapFeature , IO Sitemap )
8694sitemapFeature ServerEnv {.. }
8795 CoreFeature {.. }
8896 DocumentationFeature {.. }
8997 TagsFeature {.. }
98+ TarIndexCacheFeature {cachedTarIndex}
9099 initTime
91100 sitemapCache
92101 = (SitemapFeature {.. }, updateSitemapCache)
@@ -151,10 +160,10 @@ sitemapFeature ServerEnv{..}
151160 pkgIndex <- queryGetPackageIndex
152161 docIndex <- queryDocumentationIndex
153162
154- let sitemaps = generateSitemap serverBaseURI pageBuildDate
163+ sitemaps <- generateSitemap serverBaseURI pageBuildDate
155164 (map fst alltags)
156- pkgIndex docIndex
157- uriScheme i = " /sitemap/" <> show i <> " .xml"
165+ pkgIndex docIndex cachedTarIndex
166+ let uriScheme i = " /sitemap/" <> show i <> " .xml"
158167 sitemapIndex = renderSitemapIndex serverBaseURI (map uriScheme [0 .. (length sitemaps - 1 )])
159168 return $ Sitemap (XMLResponse sitemapIndex) (map XMLResponse sitemaps)
160169
@@ -165,19 +174,21 @@ generateSitemap :: URI
165174 -> T. Text
166175 -> [Tag ]
167176 -> PackageIndex. PackageIndex PkgInfo
168- -> Map. Map PackageId a
169- -> [ByteString ]
170- generateSitemap serverBaseURI pageBuildDate alltags pkgIndex docIndex =
171- renderSitemap serverBaseURI <$> chunksOf 50000 allEntries
177+ -> Map. Map PackageId BlobId
178+ -> (BlobId -> IO Tar. TarIndex )
179+ -> IO [ByteString ]
180+ generateSitemap serverBaseURI pageBuildDate alltags pkgIndex docIndex cachedTarIndex = do
181+ versionedDocSubEntries <- versionedDocSubEntriesIO
182+ let -- Combine and build sitemap
183+ allEntries = miscEntries
184+ ++ tagEntries
185+ ++ nameEntries
186+ ++ nameVersEntries
187+ ++ baseDocEntries
188+ ++ versionedDocEntries
189+ ++ versionedDocSubEntries
190+ pure $ renderSitemap serverBaseURI <$> chunksOf 50000 allEntries
172191 where
173- -- Combine and build sitemap
174- allEntries = miscEntries
175- ++ tagEntries
176- ++ nameEntries
177- ++ nameVersEntries
178- ++ baseDocEntries
179- ++ versionedDocEntries
180-
181192 -- Misc. pages
182193 -- e.g. ["http://myhackage.com/index", ...]
183194 miscEntries = urlsToSitemapEntries miscPages pageBuildDate Weekly 0.75
@@ -258,3 +269,26 @@ generateSitemap serverBaseURI pageBuildDate alltags pkgIndex docIndex =
258269 , Map. member (packageId pkg) docIndex
259270 ]
260271 pageBuildDate Monthly 0.25
272+
273+ -- Versioned doc pages in subdirectories
274+ -- versionedSubDocURIs :: [path :: String]
275+ -- e.g. ["http://myhackage.com/packages/mypackage-1.0.2/docs/Lib.html", ...]
276+ versionedDocSubEntriesIO = do
277+ let pkgs = [ (pkg , blob)
278+ | pkg <- concat pkgss
279+ , Just blob <- [Map. lookup (packageId pkg) docIndex]
280+ ]
281+ pkgIndices <- traverse (\ (pkg, blob) -> (pkg,) <$> cachedTarIndex blob) pkgs
282+ pure $ urlsToSitemapEntries
283+ [ prefixPkgURI ++ display (packageId pkg) ++ " /docs" ++ fp
284+ | (pkg, tarIndex) <- pkgIndices
285+ , Just tar <- [Tar. lookup tarIndex " " ]
286+ , fp <- entryToPaths " /" tar
287+ , takeExtension fp == " .html"
288+ ]
289+ pageBuildDate Monthly 0.25
290+
291+ entryToPaths :: FilePath -> Tar. TarIndexEntry -> [FilePath ]
292+ entryToPaths _ (Tar. TarFileEntry _) = []
293+ entryToPaths base (Tar. TarDir content) = map ((base </> ) . fst ) content ++
294+ [ file | (folder, entry) <- content, file <- entryToPaths (base </> folder) entry ]
0 commit comments