impress-2020/pages/api/assetImage.js
Matchu 18bc3df6f4 Use browser pooling for /api/assetImage
I tried running a pressure test against assetImage on prod with the open-source tool `wrk`:

```
wrk -t12 -c20 -d20s --timeout 20s 'https://impress-2020-box.openneo.net/api/assetImage?libraryUrl=https%3A%2F%2Fimages.neopets.com%2Fcp%2Fitems%2Fdata%2F000%2F000%2F522%2F522756_2bde0443ae%2F522756.js&size=600'
```

I found that, unsurprisingly, we run a lot of concurrent requests, which fill up memory with a lot of Chromium instances!

In this change, we declare a small pool of 2 browser contexts, to allow a bit of concurrency but still very strictly limit how many browser instances can actually get created. We might tune this number depending on the actual performance characteristics!
2021-11-12 23:35:30 -08:00

244 lines
8.1 KiB
JavaScript

/**
* /api/assetImage renders a canvas movie to PNG! To do this, we use a headless
* Chromium browser, which renders a special page in the webapp and screenshots
* the displayed canvas.
*
* This is, of course, a relatively heavyweight operation: it's always gonna be
* a bit slow, and consume significant RAM. So, caching is going to be
* important, so that we're not calling this all the time and overloading the
* endpoint!
*
* Parameters:
* - libraryUrl: A https://images.neopets.com/ URL to a JS movie library
* - size: 600, 300, or 150. Determines the output image size.
*/
const beeline = require("honeycomb-beeline")({
writeKey: process.env["HONEYCOMB_WRITE_KEY"],
dataset:
process.env["NODE_ENV"] === "production"
? "Dress to Impress (2020)"
: "Dress to Impress (2020, dev)",
serviceName: "impress-2020-gql-server",
disableInstrumentationOnLoad: true,
});
const playwright = require("playwright");
const genericPool = require("generic-pool");
// Share a single browser instance for all requests, to help perf a lot.
// We implement it as a "pool" of 1, because the pool is better than we are at
// lifecycle management and timeouts!
const browserPool = genericPool.createPool(
{
create: async () => {
console.info(`Starting shared browser instance`);
return await playwright.chromium.launch({ headless: true });
},
destroy: (browser) => {
console.info(`Closing shared browser instance`);
browser.close();
},
validate: (browser) => browser.isConnected(),
},
{ min: 1, max: 1, testOnBorrow: true, acquireTimeoutMillis: 15000 }
);
browserPool.on("factoryCreateError", (error) => console.error(error));
browserPool.on("factoryDestroyError", (error) => console.error(error));
async function getBrowser() {
// HACK: We have the pool *managing* our browser's lifecycle, but we don't
// actually need to *lock* it. So, we "acquire" the browser, then
// immediately release the lock for other `getBrowser` calls.
const browser = await browserPool.acquire();
browserPool.release(browser);
browser.on("disconnected", () => browserPool.destroy(browser));
return browser;
}
// We maintain a small pool of shared browser sessions ("contexts"), to manage
// memory usage. If all the sessions are already in use, a request will wait
// for one of them to become available.
const contextPool = genericPool.createPool(
{
create: async () => {
console.info(`Creating a browser context`);
const browser = await getBrowser();
return await browser.newContext();
},
destroy: (context) => {
console.info(`Closing a browser context`);
context.close();
},
validate: (context) => context.browser().isConnected(),
},
{ min: 1, max: 2, testOnBorrow: true, acquireTimeoutMillis: 15000 }
);
contextPool.on("factoryCreateError", (error) => console.error(error));
contextPool.on("factoryDestroyError", (error) => console.error(error));
async function getBrowserContext() {
const context = await contextPool.acquire();
// When the caller closes the context, we don't just release it back to the
// pool; we actually destroy it altogether, to help further isolate requests
// as a safe default for security purposes. (I'm not aware of an attack
// vector, but it feels like a good default, esp when contexts seem fast to
// create!)
context.on("close", () => contextPool.destroy(context));
return context;
}
async function handle(req, res) {
const { libraryUrl, size } = req.query;
if (!libraryUrl) {
return reject(res, "libraryUrl is required");
}
if (!isNeopetsUrl(libraryUrl)) {
return reject(
res,
`libraryUrl must be an HTTPS Neopets URL, but was: ${libraryUrl}`
);
}
if (size !== "600" && size !== "300" && size !== "150") {
return reject(res, `size must be 600, 300, or 150, but was: ${size}`);
}
let imageBuffer;
try {
imageBuffer = await loadAndScreenshotImage(libraryUrl, size);
} catch (e) {
console.error(e);
return reject(res, `Could not load image: ${e.message}`, 500);
}
// TODO: Compress the image?
// Send a long-term cache header, to avoid running this any more than we have
// to! If we make a big change, we'll flush the cache or add a version param.
res.setHeader("Cache-Control", "public, max-age=31536000, immutable");
res.setHeader("Content-Type", "image/png");
return res.send(imageBuffer);
}
async function loadAndScreenshotImage(libraryUrl, size) {
// To render the image, we load the /internal/assetImage page in the web app,
// a simple page specifically designed for this API endpoint!
//
// NOTE: If we deploy to a host where localhost:3000 won't work, make this
// configurable with an env var, e.g. process.env.LOCAL_APP_HOST
const assetImagePageUrl = new URL(
"http://localhost:3000/internal/assetImage"
);
assetImagePageUrl.search = new URLSearchParams({
libraryUrl,
size,
}).toString();
console.debug("Opening browser page");
const context = await getBrowserContext();
const page = await context.newPage();
console.debug("Page opened, navigating to: " + assetImagePageUrl.toString());
try {
await page.goto(assetImagePageUrl.toString());
console.debug("Page loaded, awaiting image");
// Start looking for the loaded canvas, *and* for an error message.
// When either one displays, we proceed, either by returning the image if
// present, or raising the error if present.
const imageBufferPromise = screenshotImageFromPage(page);
const errorMessagePromise = readErrorMessageFromPage(page);
const firstResultFromPage = await Promise.any([
imageBufferPromise.then((imageBuffer) => ({ imageBuffer })),
errorMessagePromise.then((errorMessage) => ({ errorMessage })),
]);
if (firstResultFromPage.errorMessage) {
throw new Error(firstResultFromPage.errorMessage);
} else if (firstResultFromPage.imageBuffer) {
return firstResultFromPage.imageBuffer;
} else {
throw new Error(
`Assertion error: Promise.any did not return an errorMessage or an imageBuffer: ` +
`${JSON.stringify(Object.keys(firstResultFromPage))}`
);
}
} finally {
// Tear down our resources when we're done! If it fails, log the error, but
// don't block the success of the image.
try {
await page.close();
} catch (e) {
console.warn("Error closing page after image finished", e);
}
try {
await context.close();
} catch (e) {
console.warn("Error closing browser after image finished", e);
}
}
}
async function screenshotImageFromPage(page) {
await page.waitForSelector("#asset-image-canvas[data-is-loaded=true]", {
timeout: 10000,
});
const canvas = await page.$("#asset-image-canvas[data-is-loaded=true]");
console.debug("Image loaded, taking screenshot");
const imageBuffer = await canvas.screenshot({
omitBackground: true,
});
console.debug(`Screenshot captured, size: ${imageBuffer.length}`);
return imageBuffer;
}
async function readErrorMessageFromPage(page) {
await page.waitForSelector("#asset-image-error-message", {
timeout: 10000,
});
const errorMessageContainer = await page.$("#asset-image-error-message");
const errorMessage = await errorMessageContainer.innerText();
return errorMessage;
}
function isNeopetsUrl(urlString) {
let url;
try {
url = new URL(urlString);
} catch (e) {
return false;
}
return url.origin === "https://images.neopets.com";
}
function reject(res, message, status = 400) {
res.setHeader("Content-Type", "text/plain; charset=utf8");
return res.status(status).send(message);
}
// Polyfill Promise.any for older Node: https://github.com/ungap/promise-any
Promise.any =
Promise.any ||
function ($) {
return new Promise(function (D, E, A, L) {
A = [];
L = $.map(function ($, i) {
return Promise.resolve($).then(D, function (O) {
return ((A[i] = O), --L) || E({ errors: A });
});
}).length;
});
};
async function handleWithBeeline(req, res) {
beeline.withTrace(
{ name: "api/assetImage", operation_name: "api/assetImage" },
() => handle(req, res)
);
}
export default handleWithBeeline;