in-memory cache for modeling query

I'm using my first ever MySQL Store Procedure for clever cleverness in caching the modeling query!

I realized that checking for the latest contribution timestamp is a pretty reliable way of deciding when modeling data was last updated at all. If that timestamp hasn't changed, we can reuse the results!

I figured that, because query roundtrips are a bottleneck in this environment, I didn't want to make that query separately. So, I built a MySQL procedure to do the check on the database side!
This commit is contained in:
Emi Matchu 2020-09-06 15:49:08 -07:00
parent f73211a50e
commit 5a91dd2f2a
7 changed files with 209 additions and 57 deletions

View file

@ -40,7 +40,7 @@
"build": "yarn build-cached-data && react-app-rewired build",
"test": "react-app-rewired test --env=jsdom",
"eject": "react-scripts eject",
"setup-mysql-user": "mysql -h impress.openneo.net -u matchu -p < setup-mysql-user.sql",
"setup-mysql": "mysql -h impress.openneo.net -u matchu -p < setup-mysql.sql",
"mysql": "mysql --host=impress.openneo.net --user=$(dotenv -p IMPRESS_MYSQL_USER) --password=$(dotenv -p IMPRESS_MYSQL_PASSWORD) --database=openneo_impress",
"mysql-admin": "mysql --host=impress.openneo.net --user=matchu --password --database=openneo_impress",
"build-cached-data": "node -r dotenv/config scripts/build-cached-data.js",

View file

@ -1,25 +0,0 @@
-- Public data tables: read
GRANT SELECT ON openneo_impress.colors TO impress2020;
GRANT SELECT ON openneo_impress.color_translations TO impress2020;
GRANT SELECT ON openneo_impress.items TO impress2020;
GRANT SELECT ON openneo_impress.item_translations TO impress2020;
GRANT SELECT ON openneo_impress.parents_swf_assets TO impress2020;
GRANT SELECT ON openneo_impress.pet_types TO impress2020;
GRANT SELECT ON openneo_impress.pet_states TO impress2020;
GRANT SELECT ON openneo_impress.species TO impress2020;
GRANT SELECT ON openneo_impress.species_translations TO impress2020;
GRANT SELECT ON openneo_impress.swf_assets TO impress2020;
GRANT SELECT ON openneo_impress.zones TO impress2020;
GRANT SELECT ON openneo_impress.zone_translations TO impress2020;
-- Public data tables: write
GRANT UPDATE ON openneo_impress.items TO impress2020;
GRANT DELETE ON openneo_impress.parents_swf_assets TO impress2020;
GRANT UPDATE ON openneo_impress.pet_states TO impress2020;
GRANT UPDATE ON openneo_impress.swf_assets TO impress2020;
-- User data tables
GRANT SELECT ON openneo_impress.closet_hangers TO impress2020;
GRANT SELECT ON openneo_impress.item_outfit_relationships TO impress2020;
GRANT SELECT ON openneo_impress.outfits TO impress2020;
GRANT SELECT ON openneo_impress.users TO impress2020;

90
setup-mysql.sql Normal file
View file

@ -0,0 +1,90 @@
USE openneo_impress;
-- Public data tables: read
GRANT SELECT ON colors TO impress2020;
GRANT SELECT ON color_translations TO impress2020;
GRANT SELECT ON items TO impress2020;
GRANT SELECT ON item_translations TO impress2020;
GRANT SELECT ON parents_swf_assets TO impress2020;
GRANT SELECT ON pet_types TO impress2020;
GRANT SELECT ON pet_states TO impress2020;
GRANT SELECT ON species TO impress2020;
GRANT SELECT ON species_translations TO impress2020;
GRANT SELECT ON swf_assets TO impress2020;
GRANT SELECT ON zones TO impress2020;
GRANT SELECT ON zone_translations TO impress2020;
-- Public data tables: write
GRANT UPDATE ON items TO impress2020;
GRANT DELETE ON parents_swf_assets TO impress2020;
GRANT UPDATE ON pet_states TO impress2020;
GRANT UPDATE ON swf_assets TO impress2020;
-- User data tables
GRANT SELECT ON closet_hangers TO impress2020;
GRANT SELECT ON item_outfit_relationships TO impress2020;
GRANT SELECT ON outfits TO impress2020;
GRANT SELECT ON users TO impress2020;
-- Procedures used in the application
DELIMITER $$
DROP PROCEDURE IF EXISTS GetItemsThatNeedModels$$
CREATE PROCEDURE GetItemsThatNeedModels()
BEGIN
SELECT items.id,
GROUP_CONCAT(DISTINCT pet_types.species_id ORDER BY pet_types.species_id)
AS modeled_species_ids,
-- Vandagyre was added on 2014-11-14, so we add some buffer here.
-- TODO: Some later Dyeworks items don't support Vandagyre.
-- Add a manual db flag?
items.created_at >= "2014-12-01" AS supports_vandagyre
FROM items
INNER JOIN parents_swf_assets psa
ON psa.parent_type = "Item" AND psa.parent_id = items.id
INNER JOIN swf_assets
ON swf_assets.id = psa.swf_asset_id
INNER JOIN pet_types
ON pet_types.body_id = swf_assets.body_id
WHERE
pet_types.color_id = "8"
GROUP BY items.id
HAVING
NOT (
-- Single species (probably just their item)
count(DISTINCT pet_types.species_id) = 1
-- All species modeled
OR modeled_species_ids = "1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,52,53,54,55"
-- All species modeled except Vandagyre, for items that don't support it
OR (NOT supports_vandagyre AND modeled_species_ids = "1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,52,53,54")
-- No species (either an All Bodies item, or a Capsule type thing)
OR modeled_species_ids = ""
)
ORDER BY items.id;
END$$
GRANT EXECUTE ON PROCEDURE GetItemsThatNeedModels TO impress2020$$
-- This procedure is a performance optimization! We want the page to always be
-- up-to-date, but we want to avoid re-running this query if modeling data
-- hasn't changed. So, we use the last contribution timestamp as a cache hint
-- for whether the data has expired. But in this environment, sequential
-- queries are a bottleneck, so I bundled up that logic into this single
-- procedure that can run on the database! That way, it's just one network
-- round-trip instead of two, which makes a noticeable difference in our stack.
DROP PROCEDURE IF EXISTS GetItemsThatNeedModelsIfNotCached$$
CREATE PROCEDURE GetItemsThatNeedModelsIfNotCached(
IN last_known_update TIMESTAMP,
OUT last_actual_update TIMESTAMP
)
BEGIN
SET last_actual_update =
(SELECT created_at FROM contributions ORDER BY id DESC LIMIT 1);
IF last_known_update < last_actual_update THEN
CALL GetItemsThatNeedModels();
END IF;
END$$
GRANT EXECUTE ON PROCEDURE GetItemsThatNeedModelsIfNotCached TO impress2020$$
DELIMITER ;

View file

@ -16,6 +16,7 @@ async function connectToDb({
user,
password,
database: "openneo_impress",
multipleStatements: true,
})
// We upgrade to promises here, instead of using the mysql2/promise import,
// for compatibility with Honeycomb's automatic tracing.

View file

@ -239,6 +239,8 @@ const buildItemSearchToFitLoader = (db, loaders) =>
return responses;
});
let lastKnownUpdate = "1970-01-01"; // start it out very old!
let lastResult = [];
const buildItemsThatNeedModelsLoader = (db) =>
new DataLoader(async (keys) => {
// Essentially, I want to take easy advantage of DataLoader's caching, for
@ -248,39 +250,37 @@ const buildItemsThatNeedModelsLoader = (db) =>
throw new Error(`this loader can only be loaded with the key "all"`);
}
const [rows, _] = await db.execute(`
SELECT items.id,
GROUP_CONCAT(DISTINCT pet_types.species_id ORDER BY pet_types.species_id)
AS modeled_species_ids,
-- Vandagyre was added on 2014-11-14, so we add some buffer here.
-- TODO: Some later Dyeworks items don't support Vandagyre.
-- Add a manual db flag?
items.created_at >= "2014-12-01" AS supports_vandagyre
FROM items
INNER JOIN parents_swf_assets psa
ON psa.parent_type = "Item" AND psa.parent_id = items.id
INNER JOIN swf_assets
ON swf_assets.id = psa.swf_asset_id
INNER JOIN pet_types
ON pet_types.body_id = swf_assets.body_id
WHERE
pet_types.color_id = "8"
GROUP BY items.id
HAVING
NOT (
-- Single species (probably just their item)
count(DISTINCT pet_types.species_id) = 1
-- All species modeled
OR modeled_species_ids = "1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,52,53,54,55"
-- All species modeled except Vandagyre, for items that don't support it
OR (NOT supports_vandagyre AND modeled_species_ids = "1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,52,53,54")
-- No species (either an All Bodies item, or a Capsule type thing)
OR modeled_species_ids = ""
)
ORDER BY items.id
`);
// Call the query as a procedure, defined in `setup-mysql.sql`. It will
// only run the query if modeling data has been changed since the timestamp
// we provide; otherwise, it skips the query and returns no rows, which is
// much faster! (The query takes a few seconds to run.)
const [results, _] = await db.query(
`
CALL GetItemsThatNeedModelsIfNotCached(?, @LastActualUpdate);
SELECT @LastActualUpdate;
`,
[lastKnownUpdate]
);
// The query will return 2 or 3 results.
// Result 1 (optional): The rows produced by the CALL, if it ran the query.
// Or, if it skipped the query, this is omitted.
// Result 2 (required): The MySQL summary of the effects of the CALL.
// Result 3 (required): The 1-row table contianing @LastActualUpdate.
//
// So, check the number of results. If it's 2, then there was no change,
// and we should return our cached value. Or, if it's 3, then we should
// update our cache.
if (results.length === 2) {
return [lastResult];
}
const [rows, __, varRows] = results;
const entities = rows.map(normalizeRow);
lastKnownUpdate = varRows[0]["@LastActualUpdate"];
lastResult = entities;
return [entities];
});

View file

@ -291,6 +291,11 @@ describe("Item", () => {
it("loads items that need models", async () => {
jest.setTimeout(20000);
const buildLoaders = require("../loaders");
const db = await require("../db")();
const { itemsThatNeedModelsLoader } = buildLoaders(db);
await itemsThatNeedModelsLoader.load("all");
const res = await query({
query: gql`
query {

View file

@ -7766,6 +7766,87 @@ Object {
}
`;
exports[`Item returns empty appearance for incompatible items 1`] = `
Object {
"items": Array [
Object {
"appearanceOn": Object {
"layers": Array [
Object {
"id": "37128",
"imageUrl": "https://impress-asset-images.s3.amazonaws.com/object/000/000/014/14856/600x600.png?v2-1587653266000",
"remoteId": "14856",
"svgUrl": null,
"zone": Object {
"depth": 30,
"id": "26",
"label": "Jacket",
},
},
],
"restrictedZones": Array [
Object {
"id": "20",
},
Object {
"id": "22",
},
],
},
"id": "38912",
"name": "Zafara Agent Robe",
},
Object {
"appearanceOn": Object {
"layers": Array [
Object {
"id": "37129",
"imageUrl": "https://impress-asset-images.s3.amazonaws.com/object/000/000/014/14857/600x600.png?v2-0",
"remoteId": "14857",
"svgUrl": null,
"zone": Object {
"depth": 44,
"id": "40",
"label": "Hat",
},
},
],
"restrictedZones": Array [
Object {
"id": "37",
},
Object {
"id": "38",
},
],
},
"id": "38911",
"name": "Zafara Agent Hood",
},
Object {
"appearanceOn": Object {
"layers": Array [
Object {
"id": "30203",
"imageUrl": "https://impress-asset-images.s3.amazonaws.com/object/000/000/006/6829/600x600.png?v2-1598519675000",
"remoteId": "6829",
"svgUrl": null,
"zone": Object {
"depth": 3,
"id": "3",
"label": "Background",
},
},
],
"restrictedZones": Array [],
},
"id": "37375",
"name": "Moon and Stars Background",
},
],
}
`;
exports[`Item skips appearance data for audio assets 1`] = `
Object {
"items": Array [