From d562087c45d87190126f111343973e0b82ecc189 Mon Sep 17 00:00:00 2001 From: Peter Bengtsson Date: Tue, 16 May 2023 14:01:12 -0400 Subject: [PATCH] Redirect all requests with too many query string keys (#37062) --- middleware/handle-invalid-querystrings.js | 89 +++++++++++++++++++ middleware/index.js | 2 + .../invalid-querystrings.js | 47 ++++++++++ 3 files changed, 138 insertions(+) create mode 100644 middleware/handle-invalid-querystrings.js create mode 100644 tests/rendering-fixtures/invalid-querystrings.js diff --git a/middleware/handle-invalid-querystrings.js b/middleware/handle-invalid-querystrings.js new file mode 100644 index 000000000000..5eba2c68141b --- /dev/null +++ b/middleware/handle-invalid-querystrings.js @@ -0,0 +1,89 @@ +import statsd from '../lib/statsd.js' +import { noCacheControl, defaultCacheControl } from './cache-control.js' + +const STATSD_KEY = 'middleware.handle_invalid_querystrings' + +// Exported for the sake of end-to-end tests +export const MAX_UNFAMILIAR_KEYS_BAD_REQUEST = 15 +export const MAX_UNFAMILIAR_KEYS_REDIRECT = 3 + +const RECOGNIZED_KEYS_BY_PREFIX = { + '/_next/data/': ['versionId', 'productId', 'restPage', 'apiVersion', 'category', 'subcategory'], + '/api/search': ['query', 'language', 'version', 'page', 'product', 'autocomplete', 'limit'], + '/api/anchor-redirect': ['hash', 'path'], + '/api/webhooks': ['category', 'version'], + '/api/pageinfo': ['pathname'], +} + +const RECOGNIZED_KEYS_BY_ANY = new Set([ + // Learning track pages + 'learn', + 'learnProduct', + // Platform picker + 'platform', + // Tool picker + 'tool', + // When apiVersion isn't the only one. E.g. ?apiVersion=XXX&tool=vscode + 'apiVersion', + // Search + 'query', + // The drop-downs on "Webhook events and payloads" + 'actionType', +]) + +export default function handleInvalidQuerystrings(req, res, next) { + const { method, query, path } = req + if (method === 'GET' || method === 'HEAD') { + const originalKeys = Object.keys(query) + let keys = originalKeys.filter((key) => !RECOGNIZED_KEYS_BY_ANY.has(key)) + if (keys.length > 0) { + // Before we judge the number of query strings, strip out all the ones + // we're familiar with. + for (const [prefix, recognizedKeys] of Object.entries(RECOGNIZED_KEYS_BY_PREFIX)) { + if (path.startsWith(prefix)) { + keys = keys.filter((key) => !recognizedKeys.includes(key)) + } + } + } + + if (keys.length >= MAX_UNFAMILIAR_KEYS_BAD_REQUEST) { + noCacheControl(res) + + res.status(400).send('Too many unrecognized query string parameters') + + const tags = [ + 'response:400', + `url:${req.url}`, + `ip:${req.ip}`, + `path:${req.path}`, + `keys:${originalKeys.length}`, + ] + statsd.increment(STATSD_KEY, 1, tags) + + return + } + + if (keys.length >= MAX_UNFAMILIAR_KEYS_REDIRECT) { + defaultCacheControl(res) + const sp = new URLSearchParams(query) + keys.forEach((key) => sp.delete(key)) + let newURL = req.path + if (sp.toString()) newURL += `?${sp}` + + res.redirect(302, newURL) + + const tags = [ + 'response:302', + `url:${req.url}`, + `ip:${req.ip}`, + `path:${req.path}`, + `keys:${originalKeys.length}`, + ] + statsd.increment(STATSD_KEY, 1, tags) + + return + } + } + + return next() +} diff --git a/middleware/index.js b/middleware/index.js index c62edb5c3e42..899088724a77 100644 --- a/middleware/index.js +++ b/middleware/index.js @@ -64,6 +64,7 @@ import fastlyBehavior from './fastly-behavior.js' import mockVaPortal from './mock-va-portal.js' import dynamicAssets from './dynamic-assets.js' import rateLimit from './rate-limit.js' +import handleInvalidQuerystrings from './handle-invalid-querystrings.js' const { DEPLOYMENT_ENV, NODE_ENV } = process.env const isTest = NODE_ENV === 'test' || process.env.GITHUB_ACTIONS === 'true' @@ -198,6 +199,7 @@ export default function (app) { // *** Early exits *** app.use(rateLimit) + app.use(instrument(handleInvalidQuerystrings, './handle-invalid-querystrings')) app.use(instrument(handleInvalidPaths, './handle-invalid-paths')) app.use(instrument(handleNextDataPath, './handle-next-data-path')) diff --git a/tests/rendering-fixtures/invalid-querystrings.js b/tests/rendering-fixtures/invalid-querystrings.js new file mode 100644 index 000000000000..e6419d0cbcac --- /dev/null +++ b/tests/rendering-fixtures/invalid-querystrings.js @@ -0,0 +1,47 @@ +import { describe } from '@jest/globals' + +import { get } from '../helpers/e2etest.js' +import { + MAX_UNFAMILIAR_KEYS_BAD_REQUEST, + MAX_UNFAMILIAR_KEYS_REDIRECT, +} from '../../middleware/handle-invalid-querystrings.js' + +const alpha = Array.from(Array(26)).map((e, i) => i + 65) +const alphabet = alpha.map((x) => String.fromCharCode(x)) + +describe('invalid query strings', () => { + test('400 for too many unrecognized query strings', async () => { + // This test depends on knowing exactly the number + // of unrecognized query strings that will trigger a 400. + const sp = new URLSearchParams() + alphabet.slice(0, MAX_UNFAMILIAR_KEYS_BAD_REQUEST).forEach((letter) => sp.set(letter, '1')) + const url = `/?${sp}` + const res = await get(url) + expect(res.statusCode).toBe(400) + expect(res.headers['cache-control']).toMatch('no-store') + expect(res.headers['cache-control']).toMatch('private') + }) + + test('302 redirect for many unrecognized query strings', async () => { + // This test depends on knowing exactly the number + // of unrecognized query strings that will trigger a 400. + const sp = new URLSearchParams() + alphabet.slice(0, MAX_UNFAMILIAR_KEYS_REDIRECT).forEach((letter) => sp.set(letter, '1')) + const url = `/?${sp}` + const res = await get(url) + expect(res.statusCode).toBe(302) + expect(res.headers.location).toBe('/') + expect(res.headers['cache-control']).toMatch(/max-age=[1-9]/) + expect(res.headers['cache-control']).toMatch('public') + }) + + test('302 redirect but keeping recognized query strings', async () => { + const sp = new URLSearchParams() + alphabet.slice(0, MAX_UNFAMILIAR_KEYS_REDIRECT).forEach((letter) => sp.set(letter, '1')) + sp.set('platform', 'concrete') + const url = `/en/pages?${sp}` + const res = await get(url) + expect(res.statusCode).toBe(302) + expect(res.headers.location).toBe('/en/pages?platform=concrete') + }) +})