Matchu
832747b7a8
Before, we were using the ContentType from the S3 object, which was unreliable. This helps us behave better for query-string files! We also add a filename via Content-Disposition for the files that auto-download and for the Save As case. Idk if this is super important exactly, but I feel like it'll be a lifesaver for anyone using this to get at a specific file for their own reference at any point! and it just seems polite lol
137 lines
5.1 KiB
JavaScript
137 lines
5.1 KiB
JavaScript
/**
|
|
* /api/readFromArchive reads a file from our images.neopets.com archive.
|
|
*
|
|
* Parameters:
|
|
* - url: The original canonical URL of the image,
|
|
* e.g. "https://images.neopets.com/items/acbg_virtupets.gif".
|
|
*
|
|
* One might wonder, why have this proxy at all? Why not just use the storage
|
|
* service's own lil HTTP server? My main reason is to have one more lil
|
|
* gate to control access, if it turns out someone tries to burn our egress
|
|
* costs and we need to shut them off! And just in general, I don't like
|
|
* relying on "public ACL" default behavior, it's scary :p
|
|
*
|
|
* For *actual* sharing of the whole archive, we'd want to like, put a .zip
|
|
* file somewhere for people to download, from a service optimized for large
|
|
* downloads.
|
|
*
|
|
* We don't check the incoming request very closely, because like, anything in
|
|
* the archive bucket *is* meant to be read, there aren't secrets in it. And
|
|
* this endpoint doesn't have any dangerous permissions, just endpoints for the
|
|
* bucket. So even if someone did something clever with a URL to trick us into
|
|
* loading the "wrong" thing, I don't see what they'd even *want*, y'know?
|
|
*/
|
|
const beeline = require("honeycomb-beeline")({
|
|
writeKey: process.env["HONEYCOMB_WRITE_KEY"],
|
|
dataset:
|
|
process.env["NODE_ENV"] === "production"
|
|
? "Dress to Impress (2020)"
|
|
: "Dress to Impress (2020, dev)",
|
|
serviceName: "impress-2020-gql-server",
|
|
});
|
|
import * as path from "node:path";
|
|
import { GetObjectCommand, S3Client } from "@aws-sdk/client-s3";
|
|
|
|
async function handle(req, res) {
|
|
const { url } = req.query;
|
|
|
|
let parsedUrl;
|
|
try {
|
|
parsedUrl = new URL(url);
|
|
} catch (error) {
|
|
res.end(`Error parsing URL parameter: ${error.message}`);
|
|
return;
|
|
}
|
|
|
|
// An archive key is like: "images.neopets.com/cp/data/1/2/3/atlas.png?v=123".
|
|
// So basically, just the host, path, and query string; no protocol or hash.
|
|
// (Most archive keys don't have a query string, but some do! And don't
|
|
// worry, `parsedUrl.search` will just be empty if there's no query string.)
|
|
const archiveKey = `${parsedUrl.host}${parsedUrl.pathname}${parsedUrl.search}`;
|
|
let archiveRes;
|
|
try {
|
|
archiveRes = await fetchFromArchive(archiveKey);
|
|
} catch (error) {
|
|
if (error.Code === "NoSuchKey") {
|
|
res.status(404);
|
|
res.end();
|
|
}
|
|
|
|
res.end(`Error loading file from archive: ${error.message}`);
|
|
return;
|
|
}
|
|
|
|
// Set the Content-Type so the browser interprets it correctly, and also tell
|
|
// it the filename via Content-Disposition in case the browser chooses to
|
|
// download it immediately (like with SWFs) or if the user chooses to Save As
|
|
// it later.
|
|
const filename = path.basename(archiveKey.split("?")[0]);
|
|
const contentType = inferContentType(filename);
|
|
res.setHeader("Content-Type", contentType);
|
|
res.setHeader("Content-Disposition", `inline; filename=${filename}`);
|
|
|
|
// Send a long-term cache header, like images.neopets.com does! We assume
|
|
// that, if they change an asset, they'll change the query string to bust the
|
|
// cache, and so we'll get to see that change in the updated archive too.
|
|
res.setHeader("Cache-Control", "public, max-age=31536000, immutable");
|
|
res.send(archiveRes.Body);
|
|
}
|
|
|
|
async function fetchFromArchive(archiveKey) {
|
|
const storage = new S3Client({
|
|
region: process.env.ARCHIVE_STORAGE_REGION,
|
|
endpoint: `https://${process.env.ARCHIVE_STORAGE_HOST}`,
|
|
credentials: {
|
|
accessKeyId: process.env.ARCHIVE_STORAGE_READ_ACCESS_KEY,
|
|
secretAccessKey: process.env.ARCHIVE_STORAGE_READ_SECRET_KEY,
|
|
},
|
|
});
|
|
|
|
const res = await storage.send(
|
|
new GetObjectCommand({
|
|
Bucket: process.env.ARCHIVE_STORAGE_BUCKET,
|
|
Key: archiveKey,
|
|
})
|
|
);
|
|
return res;
|
|
}
|
|
|
|
function inferContentType(filename) {
|
|
// So, S3 objects *do* have a ContentType field, but our upload script
|
|
// doesn't infer it correctly for the filenames that include query strings.
|
|
// Rather than write our own uploader or something to fix the ContentType in
|
|
// S3 after the fact (the latter of which wouldn't be unreasonable and might
|
|
// be a better approach), we just take advantage of the fact that the archive
|
|
// only *really* has a small handful of filetypes that we would need to serve
|
|
// to DTI, and that serving as `application/octet-stream` isn't unreasonable
|
|
// for other kinds of files we might have.
|
|
const fileExtension = path.extname(filename);
|
|
if (fileExtension === ".json") {
|
|
return "application/json";
|
|
} else if (fileExtension === ".js") {
|
|
return "text/javascript";
|
|
} else if (fileExtension === ".png") {
|
|
return "image/png";
|
|
} else if (fileExtension === ".gif") {
|
|
return "image/gif";
|
|
} else if (fileExtension === ".svg") {
|
|
return "image/svg+xml";
|
|
} else if (fileExtension === ".jpeg" || fileExtension === ".jpg") {
|
|
return "image/jpeg";
|
|
} else if (fileExtension === ".swf") {
|
|
return "application/x-shockwave-flash";
|
|
} else if (fileExtension === ".mp3") {
|
|
return "audio/mpeg3";
|
|
} else {
|
|
return "application/octet-stream";
|
|
}
|
|
}
|
|
|
|
async function handleWithBeeline(req, res) {
|
|
beeline.withTrace(
|
|
{ name: "api/readFromArchive", operation_name: "api/readFromArchive" },
|
|
() => handle(req, res)
|
|
);
|
|
}
|
|
|
|
export default handleWithBeeline;
|