Simplify the page pool

Yeah ok, let's just run one browser instance and one pool.

I feel like I observed that, when I killed chromium in prod, pm2 noticed the abrupt loss of a child process and restarted the whole app process? which is rad? so maybe let's just trying relying on that and see how it goes
This commit is contained in:
Emi Matchu 2021-11-13 02:27:24 -08:00
parent 0c2939dfe4
commit 5039371a1d

View file

@ -25,20 +25,18 @@ const beeline = require("honeycomb-beeline")({
const puppeteer = require("puppeteer"); const puppeteer = require("puppeteer");
const genericPool = require("generic-pool"); const genericPool = require("generic-pool");
console.info(`Creating new browser instance`);
const browserPromise = puppeteer.launch({ headless: true });
// We maintain a small pool of browser pages, to manage memory usage. If all // We maintain a small pool of browser pages, to manage memory usage. If all
// the pages are already in use, a request will wait for one of them to become // the pages are already in use, a request will wait for one of them to become
// available. // available.
// //
// NOTE: I picked 4 because that seemed to be a good number for avoiding maxing // NOTE: 4 pages is about where our 1-cpu prod environment maxes out. We might
// out our CPU. I also noticed that maxing CPU seemed to be a weird // want to upgrade to the 2-cpu box as we add more pressure though, and
// threshold where Chromium processes started behaving poorly after? I'm // then maybe we can afford more pages in the pool?
// not sure I'm diagnosing that correctly though, and I'm worried about
// the sysadmin implications of not having that locked down, y'know?
function createPagePool() {
console.info(`Creating new browser instance`);
const browserPromise = puppeteer.launch({ headless: true });
const pagePool = genericPool.createPool( const PAGE_POOL = genericPool.createPool(
{ {
create: async () => { create: async () => {
console.debug(`Creating a browser page`); console.debug(`Creating a browser page`);
@ -52,25 +50,9 @@ function createPagePool() {
validate: (page) => page.browser().isConnected(), validate: (page) => page.browser().isConnected(),
}, },
{ min: 4, max: 4, testOnBorrow: true, acquireTimeoutMillis: 15000 } { min: 4, max: 4, testOnBorrow: true, acquireTimeoutMillis: 15000 }
); );
pagePool.on("factoryCreateError", (error) => console.error(error)); PAGE_POOL.on("factoryCreateError", (error) => console.error(error));
pagePool.on("factoryDestroyError", (error) => console.error(error)); PAGE_POOL.on("factoryDestroyError", (error) => console.error(error));
pagePool.browserPromise = browserPromise; // we use this during reset
// If the browser terminates unexpectedly, and this is still the current
// page pool, I guess something went wrong! Reset!
browserPromise.then((browser) =>
browser.on("disconnected", () => {
if (PAGE_POOL === pagePool) {
resetPagePool();
}
})
);
return pagePool;
}
let PAGE_POOL = createPagePool(); // TODO: simplify..
async function handle(req, res) { async function handle(req, res) {
const { libraryUrl, size } = req.query; const { libraryUrl, size } = req.query;
@ -124,8 +106,7 @@ async function loadAndScreenshotImage(libraryUrl, size) {
}).toString(); }).toString();
console.debug("Getting browser page"); console.debug("Getting browser page");
const currentPagePool = PAGE_POOL; const page = await PAGE_POOL.acquire();
const page = await currentPagePool.acquire();
try { try {
console.debug("Page ready, navigating to: " + assetImagePageUrl.toString()); console.debug("Page ready, navigating to: " + assetImagePageUrl.toString());
@ -163,7 +144,9 @@ async function loadAndScreenshotImage(libraryUrl, size) {
); );
} }
} finally { } finally {
currentPagePool.release(page); // To avoid memory leaks, we destroy the page when we're done with it.
// The pool will replace it with a fresh one!
PAGE_POOL.destroy(page);
} }
} }