From 18bc3df6f4b5eebe5bad5f65fa22568e6f8e9bf4 Mon Sep 17 00:00:00 2001 From: Matchu Date: Fri, 12 Nov 2021 23:35:05 -0800 Subject: [PATCH] Use browser pooling for /api/assetImage I tried running a pressure test against assetImage on prod with the open-source tool `wrk`: ``` wrk -t12 -c20 -d20s --timeout 20s 'https://impress-2020-box.openneo.net/api/assetImage?libraryUrl=https%3A%2F%2Fimages.neopets.com%2Fcp%2Fitems%2Fdata%2F000%2F000%2F522%2F522756_2bde0443ae%2F522756.js&size=600' ``` I found that, unsurprisingly, we run a lot of concurrent requests, which fill up memory with a lot of Chromium instances! In this change, we declare a small pool of 2 browser contexts, to allow a bit of concurrency but still very strictly limit how many browser instances can actually get created. We might tune this number depending on the actual performance characteristics! --- package.json | 1 + pages/api/assetImage.js | 74 +++++++++++++++++++++++++++++++++-------- yarn.lock | 5 +++ 3 files changed, 66 insertions(+), 14 deletions(-) diff --git a/package.json b/package.json index 28078d6..36510fd 100644 --- a/package.json +++ b/package.json @@ -33,6 +33,7 @@ "easeljs": "^1.0.2", "escape-html": "^1.0.3", "framer-motion": "^4.1.11", + "generic-pool": "^3.8.2", "graphql": "^15.5.0", "honeycomb-beeline": "^2.7.4", "immer": "^9.0.6", diff --git a/pages/api/assetImage.js b/pages/api/assetImage.js index bc77c5e..ee9e2a0 100644 --- a/pages/api/assetImage.js +++ b/pages/api/assetImage.js @@ -23,22 +23,68 @@ const beeline = require("honeycomb-beeline")({ }); const playwright = require("playwright"); +const genericPool = require("generic-pool"); -// We share one browser instance, but create a new independent "context" for -// each request, as a security hedge. (The intent is for the user to request -// very little from the browser, so it shouldn't matter, but it's just an extra -// layer to reduce the risk of what an attack could do!) -// -// TODO: We're probably going to need to limit the number of concurrent browser -// sessions here, right? I don't actually know how the Next.js server -// handles concurrency though, let's pressure-test and find out before -// building a solution. -let SHARED_BROWSER = null; +// Share a single browser instance for all requests, to help perf a lot. +// We implement it as a "pool" of 1, because the pool is better than we are at +// lifecycle management and timeouts! +const browserPool = genericPool.createPool( + { + create: async () => { + console.info(`Starting shared browser instance`); + return await playwright.chromium.launch({ headless: true }); + }, + destroy: (browser) => { + console.info(`Closing shared browser instance`); + browser.close(); + }, + validate: (browser) => browser.isConnected(), + }, + { min: 1, max: 1, testOnBorrow: true, acquireTimeoutMillis: 15000 } +); +browserPool.on("factoryCreateError", (error) => console.error(error)); +browserPool.on("factoryDestroyError", (error) => console.error(error)); +async function getBrowser() { + // HACK: We have the pool *managing* our browser's lifecycle, but we don't + // actually need to *lock* it. So, we "acquire" the browser, then + // immediately release the lock for other `getBrowser` calls. + const browser = await browserPool.acquire(); + browserPool.release(browser); + browser.on("disconnected", () => browserPool.destroy(browser)); + return browser; +} + +// We maintain a small pool of shared browser sessions ("contexts"), to manage +// memory usage. If all the sessions are already in use, a request will wait +// for one of them to become available. +const contextPool = genericPool.createPool( + { + create: async () => { + console.info(`Creating a browser context`); + const browser = await getBrowser(); + return await browser.newContext(); + }, + destroy: (context) => { + console.info(`Closing a browser context`); + context.close(); + }, + validate: (context) => context.browser().isConnected(), + }, + { min: 1, max: 2, testOnBorrow: true, acquireTimeoutMillis: 15000 } +); +contextPool.on("factoryCreateError", (error) => console.error(error)); +contextPool.on("factoryDestroyError", (error) => console.error(error)); async function getBrowserContext() { - if (SHARED_BROWSER == null) { - SHARED_BROWSER = await playwright.chromium.launch({ headless: true }); - } - return await SHARED_BROWSER.newContext(); + const context = await contextPool.acquire(); + + // When the caller closes the context, we don't just release it back to the + // pool; we actually destroy it altogether, to help further isolate requests + // as a safe default for security purposes. (I'm not aware of an attack + // vector, but it feels like a good default, esp when contexts seem fast to + // create!) + context.on("close", () => contextPool.destroy(context)); + + return context; } async function handle(req, res) { diff --git a/yarn.lock b/yarn.lock index 4f3013e..d75fcff 100644 --- a/yarn.lock +++ b/yarn.lock @@ -6776,6 +6776,11 @@ generate-function@^2.3.1: dependencies: is-property "^1.0.2" +generic-pool@^3.8.2: + version "3.8.2" + resolved "https://registry.yarnpkg.com/generic-pool/-/generic-pool-3.8.2.tgz#aab4f280adb522fdfbdc5e5b64d718d3683f04e9" + integrity sha512-nGToKy6p3PAbYQ7p1UlWl6vSPwfwU6TMSWK7TTu+WUY4ZjyZQGniGGt2oNVvyNSpyZYSB43zMXVLcBm08MTMkg== + gensync@^1.0.0-beta.1: version "1.0.0-beta.1" resolved "https://registry.yarnpkg.com/gensync/-/gensync-1.0.0-beta.1.tgz#58f4361ff987e5ff6e1e7a210827aa371eaac269"