Use browser pooling for /api/assetImage

I tried running a pressure test against assetImage on prod with the open-source tool `wrk`: ``` wrk -t12 -c20 -d20s --timeout 20s 'https://impress-2020-box.openneo.net/api/assetImage?libraryUrl=https%3A%2F%2Fimages.neopets.com%2Fcp%2Fitems%2Fdata%2F000%2F000%2F522%2F522756_2bde0443ae%2F522756.js&size=600' ``` I found that, unsurprisingly, we run a lot of concurrent requests, which fill up memory with a lot of Chromium instances! In this change, we declare a small pool of 2 browser contexts, to allow a bit of concurrency but still very strictly limit how many browser instances can actually get created. We might tune this number depending on the actual performance characteristics!
2021-11-12 23:35:05 -08:00 · 2021-11-12 23:35:05 -08:00 · 18bc3df6f4
commit 18bc3df6f4
parent 3ec0ae7557
3 changed files with 66 additions and 14 deletions
--- a/package.json
+++ b/package.json
@ -33,6 +33,7 @@
    "easeljs": "^1.0.2",
    "escape-html": "^1.0.3",
    "framer-motion": "^4.1.11",
+    "generic-pool": "^3.8.2",
    "graphql": "^15.5.0",
    "honeycomb-beeline": "^2.7.4",
    "immer": "^9.0.6",
--- a/pages/api/assetImage.js
+++ b/pages/api/assetImage.js
@ -23,22 +23,68 @@ const beeline = require("honeycomb-beeline")({
 });

 const playwright = require("playwright");
+const genericPool = require("generic-pool");

-// We share one browser instance, but create a new independent "context" for
-// each request, as a security hedge. (The intent is for the user to request
-// very little from the browser, so it shouldn't matter, but it's just an extra
-// layer to reduce the risk of what an attack could do!)
-//
-// TODO: We're probably going to need to limit the number of concurrent browser
-//       sessions here, right? I don't actually know how the Next.js server
-//       handles concurrency though, let's pressure-test and find out before
-//       building a solution.
-let SHARED_BROWSER = null;
+// Share a single browser instance for all requests, to help perf a lot.
+// We implement it as a "pool" of 1, because the pool is better than we are at
+// lifecycle management and timeouts!
+const browserPool = genericPool.createPool(
+  {
+    create: async () => {
+      console.info(`Starting shared browser instance`);
+      return await playwright.chromium.launch({ headless: true });
+    },
+    destroy: (browser) => {
+      console.info(`Closing shared browser instance`);
+      browser.close();
+    },
+    validate: (browser) => browser.isConnected(),
+  },
+  { min: 1, max: 1, testOnBorrow: true, acquireTimeoutMillis: 15000 }
+);
+browserPool.on("factoryCreateError", (error) => console.error(error));
+browserPool.on("factoryDestroyError", (error) => console.error(error));
+async function getBrowser() {
+  // HACK: We have the pool *managing* our browser's lifecycle, but we don't
+  //       actually need to *lock* it. So, we "acquire" the browser, then
+  //       immediately release the lock for other `getBrowser` calls.
+  const browser = await browserPool.acquire();
+  browserPool.release(browser);
+  browser.on("disconnected", () => browserPool.destroy(browser));
+  return browser;
+}
+
+// We maintain a small pool of shared browser sessions ("contexts"), to manage
+// memory usage. If all the sessions are already in use, a request will wait
+// for one of them to become available.
+const contextPool = genericPool.createPool(
+  {
+    create: async () => {
+      console.info(`Creating a browser context`);
+      const browser = await getBrowser();
+      return await browser.newContext();
+    },
+    destroy: (context) => {
+      console.info(`Closing a browser context`);
+      context.close();
+    },
+    validate: (context) => context.browser().isConnected(),
+  },
+  { min: 1, max: 2, testOnBorrow: true, acquireTimeoutMillis: 15000 }
+);
+contextPool.on("factoryCreateError", (error) => console.error(error));
+contextPool.on("factoryDestroyError", (error) => console.error(error));
 async function getBrowserContext() {
-  if (SHARED_BROWSER == null) {
-    SHARED_BROWSER = await playwright.chromium.launch({ headless: true });
-  }
-  return await SHARED_BROWSER.newContext();
+  const context = await contextPool.acquire();
+
+  // When the caller closes the context, we don't just release it back to the
+  // pool; we actually destroy it altogether, to help further isolate requests
+  // as a safe default for security purposes. (I'm not aware of an attack
+  // vector, but it feels like a good default, esp when contexts seem fast to
+  // create!)
+  context.on("close", () => contextPool.destroy(context));
+
+  return context;
 }

 async function handle(req, res) {
--- a/yarn.lock
+++ b/yarn.lock
@ -6776,6 +6776,11 @@ generate-function@^2.3.1:
  dependencies:
    is-property "^1.0.2"

+generic-pool@^3.8.2:
+  version "3.8.2"
+  resolved "https://registry.yarnpkg.com/generic-pool/-/generic-pool-3.8.2.tgz#aab4f280adb522fdfbdc5e5b64d718d3683f04e9"
+  integrity sha512-nGToKy6p3PAbYQ7p1UlWl6vSPwfwU6TMSWK7TTu+WUY4ZjyZQGniGGt2oNVvyNSpyZYSB43zMXVLcBm08MTMkg==
+
 gensync@^1.0.0-beta.1:
  version "1.0.0-beta.1"
  resolved "https://registry.yarnpkg.com/gensync/-/gensync-1.0.0-beta.1.tgz#58f4361ff987e5ff6e1e7a210827aa371eaac269"