Modeling page performance fix

Ok so, I kinda assumed that the query engine would only compute `all_species_ids_for_this_color` on the rows we actually returned, and it's a fast subquery so it's fine. But that was wrong! I think the query engine computing that for _every_ item, and _then_ filter out stuff with `HAVING`. Which makes sense, because the `HAVING` clause references it, so computing it makes sense!

In this change, we inline the subquery, so it only gets called if the other conditions in the `HAVING` clause don't fail first. That way, it only gets run when needed, and the query runs like 2x faster (~30sec instead of ~60sec), which gets us back inside some timeouts that were triggering around 1 minute and making the page fail.

However, this meant we no longer return `all_species_ids_for_this_color`, which we actually use to determine which species are _left_ to model for! So now, we have a loader that also basically runs the same query as that condition subquery.

A reasonable question would be, at this point, is the `HAVING` clause a good idea? would it be simpler to do the filtering in JS?

and I think it might be simpler, but I would guess noticeably worse performance, because I think we really do filter out a _lot_ of results with that `HAVING` clause—like basically all items, right? So to filter on the JS side, we'd be transferring data for all items over the wire, which… like, that's not even the worst dealbreaker, but it would certainly be noticed. This hypothesis could be wrong, but it's enough of a reason for me to not bother pursuring the refactor!
This commit is contained in:
Emi Matchu 2022-10-10 20:15:16 -07:00
parent b9b0db8b3a
commit 052cc242e4
2 changed files with 59 additions and 23 deletions

View file

@ -544,25 +544,7 @@ async function runItemModelingQuery(db, filterToItemIds) {
GROUP_CONCAT(
T_BODIES.species_id
ORDER BY T_BODIES.species_id
) AS modeled_species_ids,
(
SELECT GROUP_CONCAT(DISTINCT species_id ORDER BY species_id)
FROM pet_types T_PT1
WHERE color_id = T_BODIES.color_id
AND (
-- For non-standard colors, ignore the species that have the same
-- body ID as standard pets. Otherwise, a lot of items will be
-- like "oh, we fit the Maraquan Koi and Maraquan Mynci, where
-- are all the other Maraquans??", which is incorrect!
color_id = 8
OR
(
SELECT count(*) FROM pet_types T_PT2
WHERE T_PT1.body_id = T_PT2.body_id
AND T_PT1.color_id != T_PT2.color_id
) = 0
)
) AS all_species_ids_for_this_color
) AS modeled_species_ids
FROM (
-- NOTE: I found that extracting this as a separate query that runs
-- first made things WAAAY faster. Less to join/group, I guess?
@ -607,7 +589,24 @@ async function runItemModelingQuery(db, filterToItemIds) {
-- Single species (probably just their item)
OR modeled_species_count = 1
-- All species modeled (that are compatible with this color)
OR modeled_species_ids = all_species_ids_for_this_color
OR modeled_species_ids = (
SELECT GROUP_CONCAT(DISTINCT species_id ORDER BY species_id)
FROM pet_types T_PT1
WHERE color_id = T_BODIES.color_id
AND (
-- For non-standard colors, ignore the species that have the same
-- body ID as standard pets. Otherwise, a lot of items will be
-- like "oh, we fit the Maraquan Koi and Maraquan Mynci, where
-- are all the other Maraquans??", which is incorrect!
color_id = 8
OR
(
SELECT count(*) FROM pet_types T_PT2
WHERE T_PT1.body_id = T_PT2.body_id
AND T_PT1.color_id != T_PT2.color_id
) = 0
)
)
-- All species modeled except Vandagyre, for items that don't support it
OR (NOT T_ITEMS.supports_vandagyre AND modeled_species_count = 54 AND modeled_species_ids = "1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,52,53,54")
)
@ -674,6 +673,42 @@ const buildItemsThatNeedModelsLoader = (db, loaders) =>
return [result];
});
const buildAllSpeciesIdsForColorLoader = (db) =>
new DataLoader(async (colorIds) => {
const qs = colorIds.map((_) => "?").join(", ");
const [rows] = await db.execute(
`
SELECT color_id,
GROUP_CONCAT(DISTINCT species_id ORDER BY species_id) AS species_ids
FROM pet_types T_PT1
WHERE color_id IN (${qs})
AND (
-- For non-standard colors, ignore the species that have the same
-- body ID as standard pets. Otherwise, a lot of items will be
-- like "oh, we fit the Maraquan Koi and Maraquan Mynci, where
-- are all the other Maraquans??", which is incorrect!
color_id = 8
OR
(
SELECT count(*) FROM pet_types T_PT2
WHERE T_PT1.body_id = T_PT2.body_id
AND T_PT1.color_id != T_PT2.color_id
) = 0
)
GROUP BY color_id;
`,
colorIds
);
const entities = rows.map(normalizeRow);
return colorIds.map(
(colorId) =>
entities.find((e) => e.colorId === colorId)?.speciesIds?.split(",") ||
[]
);
});
const buildItemBodiesWithAppearanceDataLoader = (db) =>
new DataLoader(async (itemIds) => {
const qs = itemIds.map((_) => "?").join(",");
@ -1487,6 +1522,7 @@ function buildLoaders(db) {
db,
loaders
);
loaders.allSpeciesIdsForColorLoader = buildAllSpeciesIdsForColorLoader(db);
loaders.itemBodiesWithAppearanceDataLoader = buildItemBodiesWithAppearanceDataLoader(
db
);

View file

@ -553,7 +553,7 @@ const resolvers = {
speciesThatNeedModels: async (
{ id },
{ colorId = "8" }, // Blue
{ speciesThatNeedModelsForItemLoader }
{ speciesThatNeedModelsForItemLoader, allSpeciesIdsForColorLoader }
) => {
// NOTE: If we're running this in the context of `itemsThatNeedModels`,
// this loader should already be primed, no extra query!
@ -566,8 +566,8 @@ const resolvers = {
}
const modeledSpeciesIds = row.modeledSpeciesIds.split(",");
const allSpeciesIdsForThisColor = row.allSpeciesIdsForThisColor.split(
","
const allSpeciesIdsForThisColor = await allSpeciesIdsForColorLoader.load(
colorId
);
let allModelableSpeciesIds = allSpeciesIdsForThisColor;