Use browser pooling for /api/assetImage

I tried running a pressure test against assetImage on prod with the open-source tool `wrk`:

```
wrk -t12 -c20 -d20s --timeout 20s 'https://impress-2020-box.openneo.net/api/assetImage?libraryUrl=https%3A%2F%2Fimages.neopets.com%2Fcp%2Fitems%2Fdata%2F000%2F000%2F522%2F522756_2bde0443ae%2F522756.js&size=600'
```

I found that, unsurprisingly, we run a lot of concurrent requests, which fill up memory with a lot of Chromium instances!

In this change, we declare a small pool of 2 browser contexts, to allow a bit of concurrency but still very strictly limit how many browser instances can actually get created. We might tune this number depending on the actual performance characteristics!
This commit is contained in:
Emi Matchu 2021-11-12 23:35:05 -08:00
parent 3ec0ae7557
commit 18bc3df6f4
3 changed files with 66 additions and 14 deletions

View file

@ -33,6 +33,7 @@
"easeljs": "^1.0.2", "easeljs": "^1.0.2",
"escape-html": "^1.0.3", "escape-html": "^1.0.3",
"framer-motion": "^4.1.11", "framer-motion": "^4.1.11",
"generic-pool": "^3.8.2",
"graphql": "^15.5.0", "graphql": "^15.5.0",
"honeycomb-beeline": "^2.7.4", "honeycomb-beeline": "^2.7.4",
"immer": "^9.0.6", "immer": "^9.0.6",

View file

@ -23,22 +23,68 @@ const beeline = require("honeycomb-beeline")({
}); });
const playwright = require("playwright"); const playwright = require("playwright");
const genericPool = require("generic-pool");
// We share one browser instance, but create a new independent "context" for // Share a single browser instance for all requests, to help perf a lot.
// each request, as a security hedge. (The intent is for the user to request // We implement it as a "pool" of 1, because the pool is better than we are at
// very little from the browser, so it shouldn't matter, but it's just an extra // lifecycle management and timeouts!
// layer to reduce the risk of what an attack could do!) const browserPool = genericPool.createPool(
// {
// TODO: We're probably going to need to limit the number of concurrent browser create: async () => {
// sessions here, right? I don't actually know how the Next.js server console.info(`Starting shared browser instance`);
// handles concurrency though, let's pressure-test and find out before return await playwright.chromium.launch({ headless: true });
// building a solution. },
let SHARED_BROWSER = null; destroy: (browser) => {
async function getBrowserContext() { console.info(`Closing shared browser instance`);
if (SHARED_BROWSER == null) { browser.close();
SHARED_BROWSER = await playwright.chromium.launch({ headless: true }); },
validate: (browser) => browser.isConnected(),
},
{ min: 1, max: 1, testOnBorrow: true, acquireTimeoutMillis: 15000 }
);
browserPool.on("factoryCreateError", (error) => console.error(error));
browserPool.on("factoryDestroyError", (error) => console.error(error));
async function getBrowser() {
// HACK: We have the pool *managing* our browser's lifecycle, but we don't
// actually need to *lock* it. So, we "acquire" the browser, then
// immediately release the lock for other `getBrowser` calls.
const browser = await browserPool.acquire();
browserPool.release(browser);
browser.on("disconnected", () => browserPool.destroy(browser));
return browser;
} }
return await SHARED_BROWSER.newContext();
// We maintain a small pool of shared browser sessions ("contexts"), to manage
// memory usage. If all the sessions are already in use, a request will wait
// for one of them to become available.
const contextPool = genericPool.createPool(
{
create: async () => {
console.info(`Creating a browser context`);
const browser = await getBrowser();
return await browser.newContext();
},
destroy: (context) => {
console.info(`Closing a browser context`);
context.close();
},
validate: (context) => context.browser().isConnected(),
},
{ min: 1, max: 2, testOnBorrow: true, acquireTimeoutMillis: 15000 }
);
contextPool.on("factoryCreateError", (error) => console.error(error));
contextPool.on("factoryDestroyError", (error) => console.error(error));
async function getBrowserContext() {
const context = await contextPool.acquire();
// When the caller closes the context, we don't just release it back to the
// pool; we actually destroy it altogether, to help further isolate requests
// as a safe default for security purposes. (I'm not aware of an attack
// vector, but it feels like a good default, esp when contexts seem fast to
// create!)
context.on("close", () => contextPool.destroy(context));
return context;
} }
async function handle(req, res) { async function handle(req, res) {

View file

@ -6776,6 +6776,11 @@ generate-function@^2.3.1:
dependencies: dependencies:
is-property "^1.0.2" is-property "^1.0.2"
generic-pool@^3.8.2:
version "3.8.2"
resolved "https://registry.yarnpkg.com/generic-pool/-/generic-pool-3.8.2.tgz#aab4f280adb522fdfbdc5e5b64d718d3683f04e9"
integrity sha512-nGToKy6p3PAbYQ7p1UlWl6vSPwfwU6TMSWK7TTu+WUY4ZjyZQGniGGt2oNVvyNSpyZYSB43zMXVLcBm08MTMkg==
gensync@^1.0.0-beta.1: gensync@^1.0.0-beta.1:
version "1.0.0-beta.1" version "1.0.0-beta.1"
resolved "https://registry.yarnpkg.com/gensync/-/gensync-1.0.0-beta.1.tgz#58f4361ff987e5ff6e1e7a210827aa371eaac269" resolved "https://registry.yarnpkg.com/gensync/-/gensync-1.0.0-beta.1.tgz#58f4361ff987e5ff6e1e7a210827aa371eaac269"