Simpler, faster modeling query
I narrowed down the problem to the fact that we were joining in pet types against assets, and *then* running GROUP and DISTINCT and everything. Assets x compatible species/color pairs is a LOT of rows! Here, we instead get all the relevant body IDs first, and *then* match them against pet types—which we fetch in one batch to match body to canonical species/color. I'm also trashing the weird caching mechanism we did here, because in practice it doesn't seem reliable anyway. If anything, I'd want to look at stronger CDN caching. (I made a small improvement to the caching annotation, but ultimately it still doesn't matter, because this query uses logged-in stuff and always comes out max-age=0 anyway.)
This commit is contained in:
parent
1f103989a3
commit
0e8e50b054
3 changed files with 53 additions and 107 deletions
|
@ -35,72 +35,3 @@ GRANT SELECT, UPDATE ON openneo_id.users TO impress2020;
|
|||
|
||||
-- mysqldump
|
||||
GRANT LOCK TABLES ON * TO impress2020;
|
||||
|
||||
-- Procedures used in the application
|
||||
|
||||
DELIMITER $$
|
||||
|
||||
DROP PROCEDURE IF EXISTS GetItemsThatNeedModelsV2$$
|
||||
CREATE PROCEDURE GetItemsThatNeedModelsV2()
|
||||
BEGIN
|
||||
SELECT pet_types.color_id AS color_id, items.id AS item_id,
|
||||
GROUP_CONCAT(DISTINCT pet_types.species_id ORDER BY pet_types.species_id)
|
||||
AS modeled_species_ids,
|
||||
-- Vandagyre was added on 2014-11-14, so we add some buffer here.
|
||||
-- TODO: Some later Dyeworks items don't support Vandagyre.
|
||||
-- Add a manual db flag?
|
||||
items.created_at >= "2014-12-01" AS supports_vandagyre
|
||||
FROM items
|
||||
INNER JOIN parents_swf_assets psa
|
||||
ON psa.parent_type = "Item" AND psa.parent_id = items.id
|
||||
INNER JOIN swf_assets
|
||||
ON swf_assets.id = psa.swf_asset_id
|
||||
INNER JOIN pet_types
|
||||
ON pet_types.body_id = swf_assets.body_id
|
||||
WHERE
|
||||
pet_types.color_id IN (
|
||||
8, -- Blue (Standard)
|
||||
6, -- Baby
|
||||
44, -- Maraquan
|
||||
46 -- Mutant
|
||||
)
|
||||
AND items.modeling_status_hint IS NULL
|
||||
GROUP BY color_id, item_id
|
||||
HAVING
|
||||
NOT (
|
||||
-- No species (either an All Bodies item, or a Capsule type thing)
|
||||
count(DISTINCT pet_types.species_id) = 0
|
||||
-- Single species (probably just their item)
|
||||
OR count(DISTINCT pet_types.species_id) = 1
|
||||
-- All species modeled
|
||||
OR count(DISTINCT pet_types.species_id) = 55
|
||||
-- All species modeled except Vandagyre, for items that don't support it
|
||||
OR (NOT supports_vandagyre AND modeled_species_ids = "1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,52,53,54")
|
||||
)
|
||||
ORDER BY color_id, item_id;
|
||||
END$$
|
||||
GRANT EXECUTE ON PROCEDURE GetItemsThatNeedModelsV2 TO impress2020$$
|
||||
|
||||
-- This procedure is a performance optimization! We want the page to always be
|
||||
-- up-to-date, but we want to avoid re-running this query if modeling data
|
||||
-- hasn't changed. So, we use the last contribution timestamp as a cache hint
|
||||
-- for whether the data has expired. But in this environment, sequential
|
||||
-- queries are a bottleneck, so I bundled up that logic into this single
|
||||
-- procedure that can run on the database! That way, it's just one network
|
||||
-- round-trip instead of two, which makes a noticeable difference in our stack.
|
||||
DROP PROCEDURE IF EXISTS GetItemsThatNeedModelsIfNotCachedV2$$
|
||||
CREATE PROCEDURE GetItemsThatNeedModelsIfNotCachedV2(
|
||||
IN last_known_update TIMESTAMP,
|
||||
OUT last_actual_update TIMESTAMP
|
||||
)
|
||||
BEGIN
|
||||
SET last_actual_update =
|
||||
(SELECT created_at FROM contributions ORDER BY id DESC LIMIT 1);
|
||||
|
||||
IF last_known_update < last_actual_update THEN
|
||||
CALL GetItemsThatNeedModelsV2();
|
||||
END IF;
|
||||
END$$
|
||||
GRANT EXECUTE ON PROCEDURE GetItemsThatNeedModelsIfNotCachedV2 TO impress2020$$
|
||||
|
||||
DELIMITER ;
|
||||
|
|
|
@ -410,8 +410,6 @@ const buildNewestItemsLoader = (db, loaders) =>
|
|||
return [entities];
|
||||
});
|
||||
|
||||
let lastKnownUpdate = "1970-01-01"; // start it out very old!
|
||||
let lastResult = new Map();
|
||||
const buildItemsThatNeedModelsLoader = (db) =>
|
||||
new DataLoader(async (keys) => {
|
||||
// Essentially, I want to take easy advantage of DataLoader's caching, for
|
||||
|
@ -421,48 +419,65 @@ const buildItemsThatNeedModelsLoader = (db) =>
|
|||
throw new Error(`this loader can only be loaded with the key "all"`);
|
||||
}
|
||||
|
||||
// Call the query as a procedure, defined in `setup-mysql.sql`. It will
|
||||
// only run the query if modeling data has been changed since the timestamp
|
||||
// we provide; otherwise, it skips the query and returns no rows, which is
|
||||
// much faster! (The query takes a few seconds to run.)
|
||||
//
|
||||
// NOTE: This query has the colors hardcoded, we always fetch all of them!
|
||||
// And then we look up the specific colors
|
||||
const [results, _] = await db.query(
|
||||
const [rows, _] = await db.query(
|
||||
`
|
||||
SELECT T_ITEMS.item_id,
|
||||
T_BODIES.color_id,
|
||||
T_ITEMS.supports_vandagyre,
|
||||
COUNT(*) AS modeled_species_count,
|
||||
GROUP_CONCAT(
|
||||
T_BODIES.species_id
|
||||
ORDER BY T_BODIES.species_id
|
||||
) AS modeled_species_ids
|
||||
FROM (
|
||||
-- NOTE: I found that extracting this as a separate query that runs
|
||||
-- first made things WAAAY faster. Less to join/group, I guess?
|
||||
SELECT DISTINCT items.id AS item_id,
|
||||
swf_assets.body_id AS body_id,
|
||||
-- Vandagyre was added on 2014-11-14, so we add some buffer here.
|
||||
-- TODO: Some later Dyeworks items don't support Vandagyre.
|
||||
-- Add a manual db flag?
|
||||
items.created_at >= "2014-12-01" AS supports_vandagyre
|
||||
FROM items
|
||||
INNER JOIN parents_swf_assets psa ON psa.parent_type = "Item"
|
||||
AND psa.parent_id = items.id
|
||||
INNER JOIN swf_assets ON swf_assets.id = psa.swf_asset_id
|
||||
INNER JOIN item_translations it ON it.item_id = items.id AND it.locale = "en"
|
||||
WHERE items.modeling_status_hint IS NULL AND it.name NOT LIKE "%MME%"
|
||||
ORDER BY item_id
|
||||
) T_ITEMS
|
||||
INNER JOIN (
|
||||
SELECT DISTINCT body_id, species_id, color_id
|
||||
FROM pet_types
|
||||
WHERE color_id IN (6, 8, 44, 46)
|
||||
ORDER BY body_id, species_id
|
||||
) T_BODIES ON T_ITEMS.body_id = T_BODIES.body_id
|
||||
GROUP BY T_ITEMS.item_id, T_BODIES.color_id
|
||||
HAVING NOT (
|
||||
-- No species (either an All Bodies item, or a Capsule type thing)
|
||||
modeled_species_count = 0
|
||||
-- Single species (probably just their item)
|
||||
OR modeled_species_count = 1
|
||||
-- All species modeled
|
||||
OR modeled_species_count = 55
|
||||
-- All species modeled except Vandagyre, for items that don't support it
|
||||
OR (NOT T_ITEMS.supports_vandagyre AND modeled_species_count = 54 AND modeled_species_ids = "1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,52,53,54")
|
||||
)
|
||||
ORDER BY T_ITEMS.item_id;
|
||||
`
|
||||
CALL GetItemsThatNeedModelsIfNotCachedV2(?, @LastActualUpdate);
|
||||
SELECT @LastActualUpdate;
|
||||
`,
|
||||
[lastKnownUpdate]
|
||||
);
|
||||
|
||||
// The query will return 2 or 3 results.
|
||||
// Result 1 (optional): The rows produced by the CALL, if it ran the query.
|
||||
// Or, if it skipped the query, this is omitted.
|
||||
// Result 2 (required): The MySQL summary of the effects of the CALL.
|
||||
// Result 3 (required): The 1-row table contianing @LastActualUpdate.
|
||||
//
|
||||
// So, check the number of results. If it's 3, then we should update our
|
||||
// cache. Or, if it's 2, then there was no change and we can continue with
|
||||
// the existing cached value.
|
||||
if (results.length === 3) {
|
||||
const [rawRows, __, varRows] = results;
|
||||
const rows = rawRows.map(normalizeRow);
|
||||
const entities = rows.map(normalizeRow);
|
||||
|
||||
lastKnownUpdate = varRows[0]["@LastActualUpdate"];
|
||||
|
||||
// We build lastResult into a Map up-front, to speed up the many lookups
|
||||
// that the GQL resolvers will do as we group this data into GQL nodes!
|
||||
lastResult = new Map();
|
||||
for (const { colorId, itemId, ...row } of rows) {
|
||||
if (!lastResult.has(colorId)) {
|
||||
lastResult.set(colorId, new Map());
|
||||
}
|
||||
lastResult.get(colorId).set(itemId, row);
|
||||
const result = new Map();
|
||||
for (const { colorId, itemId, ...entity } of entities) {
|
||||
if (!result.has(colorId)) {
|
||||
result.set(colorId, new Map());
|
||||
}
|
||||
result.get(colorId).set(itemId, entity);
|
||||
}
|
||||
|
||||
return [lastResult];
|
||||
return [result];
|
||||
});
|
||||
|
||||
const buildItemBodiesWithAppearanceDataLoader = (db) =>
|
||||
|
|
|
@ -192,7 +192,7 @@ const typeDefs = gql`
|
|||
# NOTE: Most color IDs won't be accepted here. Either pass the ID of a
|
||||
# major special color like Baby (#6), or leave it blank for standard
|
||||
# bodies like Blue, Green, Red, etc.
|
||||
itemsThatNeedModels(colorId: ID): [Item!]!
|
||||
itemsThatNeedModels(colorId: ID): [Item!]! @cacheControl(maxAge: 1, staleWhileRevalidate: ${oneHour})
|
||||
}
|
||||
|
||||
extend type Mutation {
|
||||
|
|
Loading…
Reference in a new issue