Switch to a small page pool
Hmm I am really not managing to keep the processes under control… maybe I'll try Puppeteer and see if it's just a bit more reliable??
This commit is contained in:
parent
18bc3df6f4
commit
20fff261ef
1 changed files with 86 additions and 75 deletions
|
@ -25,68 +25,77 @@ const beeline = require("honeycomb-beeline")({
|
||||||
const playwright = require("playwright");
|
const playwright = require("playwright");
|
||||||
const genericPool = require("generic-pool");
|
const genericPool = require("generic-pool");
|
||||||
|
|
||||||
// Share a single browser instance for all requests, to help perf a lot.
|
// We maintain a small pool of browser pages, to manage memory usage. If all
|
||||||
// We implement it as a "pool" of 1, because the pool is better than we are at
|
// the pages are already in use, a request will wait for one of them to become
|
||||||
// lifecycle management and timeouts!
|
// available.
|
||||||
const browserPool = genericPool.createPool(
|
//
|
||||||
|
// NOTE: I picked 4 because that seemed to be a good number for avoiding maxing
|
||||||
|
// out our CPU. I also noticed that maxing CPU seemed to be a weird
|
||||||
|
// threshold where Chromium processes started behaving poorly after? I'm
|
||||||
|
// not sure I'm diagnosing that correctly though, and I'm worried about
|
||||||
|
// the sysadmin implications of not having that locked down, y'know?
|
||||||
|
function createPagePool() {
|
||||||
|
console.info(`Creating new browser instance`);
|
||||||
|
const browserPromise = playwright.chromium.launch({ headless: true });
|
||||||
|
|
||||||
|
const pagePool = genericPool.createPool(
|
||||||
{
|
{
|
||||||
create: async () => {
|
create: async () => {
|
||||||
console.info(`Starting shared browser instance`);
|
console.debug(`Creating a browser page`);
|
||||||
return await playwright.chromium.launch({ headless: true });
|
const browser = await browserPromise;
|
||||||
|
return await browser.newPage();
|
||||||
},
|
},
|
||||||
destroy: (browser) => {
|
destroy: (page) => {
|
||||||
console.info(`Closing shared browser instance`);
|
console.debug(`Closing a browser page`);
|
||||||
browser.close();
|
page.close();
|
||||||
},
|
},
|
||||||
validate: (browser) => browser.isConnected(),
|
validate: (page) => page.context().browser().isConnected(),
|
||||||
},
|
},
|
||||||
{ min: 1, max: 1, testOnBorrow: true, acquireTimeoutMillis: 15000 }
|
{ min: 4, max: 4, testOnBorrow: true, acquireTimeoutMillis: 15000 }
|
||||||
);
|
);
|
||||||
browserPool.on("factoryCreateError", (error) => console.error(error));
|
pagePool.on("factoryCreateError", (error) => console.error(error));
|
||||||
browserPool.on("factoryDestroyError", (error) => console.error(error));
|
pagePool.on("factoryDestroyError", (error) => console.error(error));
|
||||||
async function getBrowser() {
|
pagePool.browserPromise = browserPromise; // we use this during reset
|
||||||
// HACK: We have the pool *managing* our browser's lifecycle, but we don't
|
|
||||||
// actually need to *lock* it. So, we "acquire" the browser, then
|
// If the browser terminates unexpectedly, and this is still the current
|
||||||
// immediately release the lock for other `getBrowser` calls.
|
// page pool, I guess something went wrong! Reset!
|
||||||
const browser = await browserPool.acquire();
|
browserPromise.then((browser) =>
|
||||||
browserPool.release(browser);
|
browser.on("disconnected", () => {
|
||||||
browser.on("disconnected", () => browserPool.destroy(browser));
|
if (PAGE_POOL === pagePool) {
|
||||||
return browser;
|
resetPagePool();
|
||||||
|
}
|
||||||
|
})
|
||||||
|
);
|
||||||
|
|
||||||
|
return pagePool;
|
||||||
}
|
}
|
||||||
|
|
||||||
// We maintain a small pool of shared browser sessions ("contexts"), to manage
|
let PAGE_POOL = createPagePool();
|
||||||
// memory usage. If all the sessions are already in use, a request will wait
|
|
||||||
// for one of them to become available.
|
|
||||||
const contextPool = genericPool.createPool(
|
|
||||||
{
|
|
||||||
create: async () => {
|
|
||||||
console.info(`Creating a browser context`);
|
|
||||||
const browser = await getBrowser();
|
|
||||||
return await browser.newContext();
|
|
||||||
},
|
|
||||||
destroy: (context) => {
|
|
||||||
console.info(`Closing a browser context`);
|
|
||||||
context.close();
|
|
||||||
},
|
|
||||||
validate: (context) => context.browser().isConnected(),
|
|
||||||
},
|
|
||||||
{ min: 1, max: 2, testOnBorrow: true, acquireTimeoutMillis: 15000 }
|
|
||||||
);
|
|
||||||
contextPool.on("factoryCreateError", (error) => console.error(error));
|
|
||||||
contextPool.on("factoryDestroyError", (error) => console.error(error));
|
|
||||||
async function getBrowserContext() {
|
|
||||||
const context = await contextPool.acquire();
|
|
||||||
|
|
||||||
// When the caller closes the context, we don't just release it back to the
|
// Every minute, we stop the current browser instance, to clear memory leaks.
|
||||||
// pool; we actually destroy it altogether, to help further isolate requests
|
// (I don't think this endpoint leaks pages, though maybe it does? But I
|
||||||
// as a safe default for security purposes. (I'm not aware of an attack
|
// definitely saw weird trailing memory and CPU usage after lots of requests...)
|
||||||
// vector, but it feels like a good default, esp when contexts seem fast to
|
async function resetPagePool() {
|
||||||
// create!)
|
console.info(`Resetting page pool`);
|
||||||
context.on("close", () => contextPool.destroy(context));
|
const prevPagePool = PAGE_POOL;
|
||||||
|
if (prevPagePool) {
|
||||||
return context;
|
// First, wait for the previous pages to finish. This is called
|
||||||
|
// "draining" the pool: waiting for it to empty. Cute!
|
||||||
|
console.debug(`Draining previous page pool`);
|
||||||
|
prevPagePool.drain().then(async () => {
|
||||||
|
// Then, terminate the browser instance.
|
||||||
|
console.debug(`Previous page pool drained, closing browser`);
|
||||||
|
const browser = await prevPagePool.browserPromise;
|
||||||
|
await browser.close();
|
||||||
|
console.info(`Previous browser closed`);
|
||||||
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
|
const newPagePool = createPagePool();
|
||||||
|
PAGE_POOL = newPagePool;
|
||||||
|
}
|
||||||
|
setInterval(resetPagePool, 60000);
|
||||||
|
|
||||||
async function handle(req, res) {
|
async function handle(req, res) {
|
||||||
const { libraryUrl, size } = req.query;
|
const { libraryUrl, size } = req.query;
|
||||||
if (!libraryUrl) {
|
if (!libraryUrl) {
|
||||||
|
@ -109,6 +118,9 @@ async function handle(req, res) {
|
||||||
imageBuffer = await loadAndScreenshotImage(libraryUrl, size);
|
imageBuffer = await loadAndScreenshotImage(libraryUrl, size);
|
||||||
} catch (e) {
|
} catch (e) {
|
||||||
console.error(e);
|
console.error(e);
|
||||||
|
if (e.name === "TimeoutError") {
|
||||||
|
return reject(res, `Could not load image: Server under heavy load`, 503);
|
||||||
|
}
|
||||||
return reject(res, `Could not load image: ${e.message}`, 500);
|
return reject(res, `Could not load image: ${e.message}`, 500);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -135,12 +147,12 @@ async function loadAndScreenshotImage(libraryUrl, size) {
|
||||||
size,
|
size,
|
||||||
}).toString();
|
}).toString();
|
||||||
|
|
||||||
console.debug("Opening browser page");
|
console.debug("Getting browser page");
|
||||||
const context = await getBrowserContext();
|
const currentPagePool = PAGE_POOL;
|
||||||
const page = await context.newPage();
|
const page = await currentPagePool.acquire();
|
||||||
console.debug("Page opened, navigating to: " + assetImagePageUrl.toString());
|
|
||||||
|
|
||||||
try {
|
try {
|
||||||
|
console.debug("Page ready, navigating to: " + assetImagePageUrl.toString());
|
||||||
await page.goto(assetImagePageUrl.toString());
|
await page.goto(assetImagePageUrl.toString());
|
||||||
console.debug("Page loaded, awaiting image");
|
console.debug("Page loaded, awaiting image");
|
||||||
|
|
||||||
|
@ -149,10 +161,20 @@ async function loadAndScreenshotImage(libraryUrl, size) {
|
||||||
// present, or raising the error if present.
|
// present, or raising the error if present.
|
||||||
const imageBufferPromise = screenshotImageFromPage(page);
|
const imageBufferPromise = screenshotImageFromPage(page);
|
||||||
const errorMessagePromise = readErrorMessageFromPage(page);
|
const errorMessagePromise = readErrorMessageFromPage(page);
|
||||||
const firstResultFromPage = await Promise.any([
|
let firstResultFromPage;
|
||||||
|
try {
|
||||||
|
firstResultFromPage = await Promise.any([
|
||||||
imageBufferPromise.then((imageBuffer) => ({ imageBuffer })),
|
imageBufferPromise.then((imageBuffer) => ({ imageBuffer })),
|
||||||
errorMessagePromise.then((errorMessage) => ({ errorMessage })),
|
errorMessagePromise.then((errorMessage) => ({ errorMessage })),
|
||||||
]);
|
]);
|
||||||
|
} catch (error) {
|
||||||
|
if (error.errors) {
|
||||||
|
// If both promises failed, show all error messages.
|
||||||
|
throw new Error(error.errors.map((e) => e.message).join(", "));
|
||||||
|
} else {
|
||||||
|
throw error;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
if (firstResultFromPage.errorMessage) {
|
if (firstResultFromPage.errorMessage) {
|
||||||
throw new Error(firstResultFromPage.errorMessage);
|
throw new Error(firstResultFromPage.errorMessage);
|
||||||
|
@ -165,18 +187,7 @@ async function loadAndScreenshotImage(libraryUrl, size) {
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
} finally {
|
} finally {
|
||||||
// Tear down our resources when we're done! If it fails, log the error, but
|
currentPagePool.release(page);
|
||||||
// don't block the success of the image.
|
|
||||||
try {
|
|
||||||
await page.close();
|
|
||||||
} catch (e) {
|
|
||||||
console.warn("Error closing page after image finished", e);
|
|
||||||
}
|
|
||||||
try {
|
|
||||||
await context.close();
|
|
||||||
} catch (e) {
|
|
||||||
console.warn("Error closing browser after image finished", e);
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
Loading…
Reference in a new issue