Skip to content

Commit 1c4d326

Browse files
author
Peter Bengtsson
authored
Make the staging files, for deployments, as lean as possible (#23049)
* Experiment with making the tarball smaller Part of #1248 * try this * stop debugging * delete translations too * delete heavy search indexes too * push and popd * try this hack * delete but leave directory * debug more * faster delete of translations * less loud * async await * async await * no tree * simplify * experimenting more * unfinished * only the large files * change order * brotli with level 6 * cope better with decorated rest json files * tidying * keep images * cleaning * cleaning up * refactored function * try this * better comment * remove console logging * more important changes * improved fs.accessSync check
1 parent 7c2739e commit 1c4d326

File tree

7 files changed

+167
-28
lines changed

7 files changed

+167
-28
lines changed
Lines changed: 63 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,63 @@
1+
#!/usr/bin/env node
2+
3+
import path from 'path'
4+
import fs from 'fs'
5+
import zlib from 'zlib'
6+
import walk from 'walk-sync'
7+
8+
const DRY_RUN = Boolean(JSON.parse(process.env.DRY_RUN || 'false'))
9+
// Roughly 100KiB means about 25 files at the moment.
10+
// Set this too low and the overheads will be more than the disk and
11+
// network I/O that this intends to serve.
12+
const MIN_GZIP_SIZE = Number(process.env.MIN_GZIP_SIZE || 1024 * 100)
13+
14+
const BROTLI_OPTIONS = {
15+
params: {
16+
[zlib.constants.BROTLI_PARAM_MODE]: zlib.constants.BROTLI_MODE_TEXT,
17+
[zlib.constants.BROTLI_PARAM_QUALITY]: 6,
18+
},
19+
}
20+
main()
21+
22+
async function main() {
23+
compressFromPattern('lib/**/static/**/*.json')
24+
}
25+
26+
async function compressFromPattern(pattern) {
27+
const glob = pattern.includes('*') ? pattern.split(path.sep).slice(1).join(path.sep) : undefined
28+
const walkOptions = {
29+
globs: glob ? [glob] : undefined,
30+
directories: false,
31+
includeBasePath: true,
32+
}
33+
const root = path.resolve(pattern.includes('*') ? pattern.split(path.sep)[0] : pattern)
34+
const filePaths = walk(root, walkOptions).filter((filePath) => {
35+
return fs.statSync(filePath).size > MIN_GZIP_SIZE
36+
})
37+
38+
if (!DRY_RUN) {
39+
console.time(`Compress ${filePaths.length} files`)
40+
const compressed = await Promise.all(filePaths.map(compressFile))
41+
console.timeEnd(`Compress ${filePaths.length} files`)
42+
43+
console.time(`Delete ${compressed.length} files`)
44+
compressed.forEach((filePath) => fs.unlinkSync(filePath))
45+
console.timeEnd(`Delete ${compressed.length} files`)
46+
}
47+
}
48+
49+
function compressFile(filePath) {
50+
return new Promise((resolve, reject) => {
51+
const contentStream = fs.createReadStream(filePath)
52+
const newFilePath = `${filePath}.br`
53+
const writeStream = fs.createWriteStream(newFilePath)
54+
const compressor = zlib.createBrotliCompress(BROTLI_OPTIONS)
55+
contentStream
56+
.pipe(compressor)
57+
.pipe(writeStream)
58+
.on('finish', (err) => {
59+
if (err) return reject(err)
60+
resolve(filePath)
61+
})
62+
})
63+
}

.github/workflows/staging-build-pr.yml

Lines changed: 32 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -30,15 +30,6 @@ concurrency:
3030
cancel-in-progress: true
3131

3232
jobs:
33-
debug:
34-
runs-on: ubuntu-latest
35-
steps:
36-
- name: Dump full context for debugging
37-
run: |
38-
cat << EOF
39-
${{ toJSON(github) }}
40-
EOF
41-
4233
build-pr:
4334
if: ${{ github.repository == 'github/docs-internal' || github.repository == 'github/docs' }}
4435
runs-on: ${{ fromJSON('["ubuntu-latest", "self-hosted"]')[github.repository == 'github/docs-internal'] }}
@@ -117,11 +108,43 @@ jobs:
117108
run: npm set-script heroku-postbuild "echo 'Application was pre-built!'"
118109

119110
- name: Delete heavy things we won't need deployed
111+
if: ${{ github.repository == 'github/docs-internal' }}
120112
run: |
113+
121114
# The dereferenced file is not used in runtime once the
122115
# decorated file has been created from it.
123116
rm -fr lib/rest/static/dereferenced
124117
118+
# Translations are never tested in Staging builds
119+
# but let's keep the empty directory.
120+
rm -fr translations
121+
mkdir translations
122+
123+
# Delete all the big search indexes that are NOT English (`*-en-*`)
124+
pushd lib/search/indexes
125+
ls | grep -v '\-en\-' | xargs rm
126+
popd
127+
128+
# Note! Some day it would be nice to be able to delete
129+
# all the heavy assets because they bloat the tarball.
130+
# But it's not obvious how to test it then. For now, we'll have
131+
# to accept that every staging build has a copy of the images.
132+
133+
# The assumption here is that a staging build will not
134+
# need these legacy redirects. Only the redirects from
135+
# front-matter will be at play.
136+
# These static redirects json files are notoriously large
137+
# and they make the tarball unnecessarily large.
138+
echo '[]' > lib/redirects/static/archived-frontmatter-fallbacks.json
139+
echo '{}' > lib/redirects/static/developer.json
140+
echo '{}' > lib/redirects/static/archived-redirects-from-213-to-217.json
141+
142+
# This will turn every `lib/**/static/*.json` into
143+
# an equivalent `lib/**/static/*.json.br` file.
144+
# Once the server starts, it'll know to fall back to reading
145+
# the `.br` equivalent if the `.json` file isn't present.
146+
node .github/actions-scripts/compress-large-files.js
147+
125148
- name: Create an archive
126149
# Only bother if this is actually a pull request
127150
if: ${{ github.event.pull_request.number }}

lib/read-json-file.js

Lines changed: 28 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,32 @@
11
import fs from 'fs'
2-
import path from 'path'
2+
import { brotliDecompressSync } from 'zlib'
33

44
export default function readJsonFile(xpath) {
5-
return JSON.parse(fs.readFileSync(path.join(process.cwd(), xpath), 'utf8'))
5+
return JSON.parse(fs.readFileSync(xpath, 'utf8'))
6+
}
7+
8+
export function readCompressedJsonFile(xpath) {
9+
if (!xpath.endsWith('.br')) {
10+
xpath += '.br'
11+
}
12+
return JSON.parse(brotliDecompressSync(fs.readFileSync(xpath)))
13+
}
14+
15+
// Ask it to read a `foo.json` file and it will automatically
16+
// first see if there's a `foo.json.br` and only if it's not,
17+
// will fallback to reading the `foo.json` file.
18+
// The reason for this is that staging builds needs to as small as
19+
// possible (in terms of disk) for them to deploy faster. So the
20+
// staging deployment process will compress a bunch of large
21+
// `.json` files before packaging it up.
22+
export function readCompressedJsonFileFallback(xpath) {
23+
try {
24+
return readCompressedJsonFile(xpath)
25+
} catch (err) {
26+
if (err.code === 'ENOENT') {
27+
return readJsonFile(xpath)
28+
} else {
29+
throw err
30+
}
31+
}
632
}

lib/redirects/precompile.js

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@ import path from 'path'
44
import { isPromise } from 'util/types'
55
import { fileURLToPath } from 'url'
66

7-
import readJsonFile from '../read-json-file.js'
7+
import { readCompressedJsonFileFallback } from '../read-json-file.js'
88
import { latest } from '../../lib/enterprise-server-releases.js'
99
import getExceptionRedirects from './exception-redirects.js'
1010
import { languageKeys } from '../languages.js'
@@ -45,7 +45,7 @@ const DISK_CACHE_FILEPATH = path.join(__dirname, `.redirects-cache_${languageKey
4545
// This function runs at server warmup and precompiles possible redirect routes.
4646
// It outputs them in key-value pairs within a neat Javascript object: { oldPath: newPath }
4747
const precompileRedirects = diskMemoize(DISK_CACHE_FILEPATH, async (pageList) => {
48-
const allRedirects = readJsonFile('./lib/redirects/static/developer.json')
48+
const allRedirects = readCompressedJsonFileFallback('./lib/redirects/static/developer.json')
4949

5050
// Replace hardcoded 'latest' with real value in the redirected path
5151
Object.entries(allRedirects).forEach(([oldPath, newPath]) => {

lib/rest/index.js

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -3,12 +3,18 @@ import path from 'path'
33
import fs from 'fs'
44
import { chain, get, groupBy } from 'lodash-es'
55
import { allVersions, allVersionKeys } from '../all-versions.js'
6+
import { readCompressedJsonFileFallback } from '../read-json-file.js'
7+
68
const __dirname = path.dirname(fileURLToPath(import.meta.url))
79
const schemasPath = path.join(__dirname, 'static/decorated')
10+
811
export const operations = {}
912
fs.readdirSync(schemasPath).forEach((filename) => {
10-
const key = filename.replace('.json', '')
11-
const value = JSON.parse(fs.readFileSync(path.join(schemasPath, filename)))
13+
// In staging deploys, the `.json` files might have been converted to
14+
// to `.json.br`. The `readCompressedJsonFileFallback()` function
15+
// can handle both but you need to call it with the `.json` filename.
16+
const key = path.parse(filename).name
17+
const value = readCompressedJsonFileFallback(path.join(schemasPath, filename))
1218
operations[key] = value
1319
})
1420

middleware/archived-enterprise-versions.js

Lines changed: 22 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -9,15 +9,34 @@ import patterns from '../lib/patterns.js'
99
import versionSatisfiesRange from '../lib/version-satisfies-range.js'
1010
import isArchivedVersion from '../lib/is-archived-version.js'
1111
import got from 'got'
12-
import readJsonFile from '../lib/read-json-file.js'
12+
import { readCompressedJsonFileFallback } from '../lib/read-json-file.js'
1313
import { cacheControlFactory } from './cache-control.js'
1414

1515
function readJsonFileLazily(xpath) {
1616
const cache = new Map()
1717
// This will throw if the file isn't accessible at all, e.g. ENOENT
18-
fs.accessSync(xpath)
18+
// But, the file might have been replaced by one called `SAMENAME.json.br`
19+
// because in staging, we ship these files compressed to make the
20+
// deployment faster. So, in our file-presence check, we need to
21+
// account for that.
22+
try {
23+
fs.accessSync(xpath)
24+
} catch (err) {
25+
if (err.code === 'ENOENT') {
26+
try {
27+
fs.accessSync(xpath + '.br')
28+
} catch (err) {
29+
if (err.code === 'ENOENT') {
30+
throw new Error(`Neither ${xpath} nor ${xpath}.br is accessible`)
31+
}
32+
throw err
33+
}
34+
} else {
35+
throw err
36+
}
37+
}
1938
return () => {
20-
if (!cache.has(xpath)) cache.set(xpath, readJsonFile(xpath))
39+
if (!cache.has(xpath)) cache.set(xpath, readCompressedJsonFileFallback(xpath))
2140
return cache.get(xpath)
2241
}
2342
}

middleware/contextualizers/graphql.js

Lines changed: 12 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -1,12 +1,14 @@
1-
import fs from 'fs'
2-
import path from 'path'
3-
import readJsonFile from '../../lib/read-json-file.js'
1+
import { readCompressedJsonFileFallback } from '../../lib/read-json-file.js'
42
import { allVersions } from '../../lib/all-versions.js'
5-
const previews = readJsonFile('./lib/graphql/static/previews.json')
6-
const upcomingChanges = readJsonFile('./lib/graphql/static/upcoming-changes.json')
7-
const changelog = readJsonFile('./lib/graphql/static/changelog.json')
8-
const prerenderedObjects = readJsonFile('./lib/graphql/static/prerendered-objects.json')
9-
const prerenderedInputObjects = readJsonFile('./lib/graphql/static/prerendered-input-objects.json')
3+
const previews = readCompressedJsonFileFallback('./lib/graphql/static/previews.json')
4+
const upcomingChanges = readCompressedJsonFileFallback('./lib/graphql/static/upcoming-changes.json')
5+
const changelog = readCompressedJsonFileFallback('./lib/graphql/static/changelog.json')
6+
const prerenderedObjects = readCompressedJsonFileFallback(
7+
'./lib/graphql/static/prerendered-objects.json'
8+
)
9+
const prerenderedInputObjects = readCompressedJsonFileFallback(
10+
'./lib/graphql/static/prerendered-input-objects.json'
11+
)
1012

1113
const explorerUrl =
1214
process.env.NODE_ENV === 'production'
@@ -27,8 +29,8 @@ export default function graphqlContext(req, res, next) {
2729
const graphqlVersion = currentVersionObj.miscVersionName
2830

2931
req.context.graphql = {
30-
schemaForCurrentVersion: JSON.parse(
31-
fs.readFileSync(path.join(process.cwd(), `lib/graphql/static/schema-${graphqlVersion}.json`))
32+
schemaForCurrentVersion: readCompressedJsonFileFallback(
33+
`lib/graphql/static/schema-${graphqlVersion}.json`
3234
),
3335
previewsForCurrentVersion: previews[graphqlVersion],
3436
upcomingChangesForCurrentVersion: upcomingChanges[graphqlVersion],

0 commit comments

Comments
 (0)