1
0
Fork 0
impress-2020/src/server/loaders.js

1549 lines
50 KiB
JavaScript
Raw Normal View History

import DataLoader from "dataloader";
import { normalizeRow } from "./util";
2020-11-24 14:24:34 -08:00
const buildClosetListLoader = (db) =>
new DataLoader(async (ids) => {
const qs = ids.map((_) => "?").join(",");
const [rows] = await db.execute(
2020-11-24 14:24:34 -08:00
`SELECT * FROM closet_lists WHERE id IN (${qs})`,
ids,
2020-11-24 14:24:34 -08:00
);
const entities = rows.map(normalizeRow);
return ids.map((id) => entities.find((e) => e.id === id));
});
const buildClosetHangersForListLoader = (db) =>
new DataLoader(async (closetListIds) => {
const qs = closetListIds.map((_) => "?").join(",");
const [rows] = await db.execute(
`SELECT * FROM closet_hangers WHERE list_id IN (${qs})`,
closetListIds,
);
const entities = rows.map(normalizeRow);
return closetListIds.map((closetListId) =>
entities.filter((e) => e.listId === closetListId),
);
});
const buildClosetHangersForDefaultListLoader = (db) =>
new DataLoader(async (userIdAndOwnsOrWantsItemsPairs) => {
const conditions = userIdAndOwnsOrWantsItemsPairs
.map((_) => `(user_id = ? AND owned = ? AND list_id IS NULL)`)
.join(" OR ");
const values = userIdAndOwnsOrWantsItemsPairs
.map(({ userId, ownsOrWantsItems }) => [
userId,
ownsOrWantsItems === "OWNS",
])
.flat();
const [rows] = await db.execute(
`SELECT * FROM closet_hangers WHERE ${conditions}`,
values,
);
const entities = rows.map(normalizeRow);
return userIdAndOwnsOrWantsItemsPairs.map(({ userId, ownsOrWantsItems }) =>
entities.filter(
(e) =>
e.userId === userId &&
Boolean(e.owned) === (ownsOrWantsItems === "OWNS"),
),
);
});
2020-07-31 22:11:32 -07:00
const buildColorLoader = (db) => {
const colorLoader = new DataLoader(async (colorIds) => {
const qs = colorIds.map((_) => "?").join(",");
const [rows] = await db.execute(
2020-07-31 22:11:32 -07:00
`SELECT * FROM colors WHERE id IN (${qs}) AND prank = 0`,
colorIds,
2020-07-31 22:11:32 -07:00
);
const entities = rows.map(normalizeRow);
const entitiesByColorId = new Map(entities.map((e) => [e.id, e]));
return colorIds.map(
(colorId) =>
entitiesByColorId.get(String(colorId)) ||
new Error(`could not find color ${colorId}`),
2020-07-31 22:11:32 -07:00
);
});
colorLoader.loadAll = async () => {
const [rows] = await db.execute(`SELECT * FROM colors WHERE prank = 0`);
2020-07-31 22:11:32 -07:00
const entities = rows.map(normalizeRow);
for (const color of entities) {
colorLoader.prime(color.id, color);
}
return entities;
};
return colorLoader;
2020-04-25 03:42:05 -07:00
};
const buildSpeciesLoader = (db) => {
const speciesLoader = new DataLoader(async (speciesIds) => {
const qs = speciesIds.map((_) => "?").join(",");
const [rows] = await db.execute(
`SELECT * FROM species WHERE id IN (${qs})`,
speciesIds,
);
const entities = rows.map(normalizeRow);
const entitiesBySpeciesId = new Map(entities.map((e) => [e.id, e]));
return speciesIds.map(
(speciesId) =>
entitiesBySpeciesId.get(String(speciesId)) ||
new Error(`could not find color ${speciesId}`),
);
});
speciesLoader.loadAll = async () => {
const [rows] = await db.execute(`SELECT * FROM species`);
const entities = rows.map(normalizeRow);
for (const species of entities) {
speciesLoader.prime(species.id, species);
}
return entities;
};
return speciesLoader;
2020-04-25 03:42:05 -07:00
};
const buildTradeMatchesLoader = (db) =>
new DataLoader(
async (userPairs) => {
const conditions = userPairs
.map(
(_) =>
`(public_user_hangers.user_id = ? AND current_user_hangers.user_id = ? AND public_user_hangers.owned = ? AND current_user_hangers.owned = ?)`,
)
.join(" OR ");
const conditionValues = userPairs
.map(({ publicUserId, currentUserId, direction }) => {
if (direction === "public-owns-current-wants") {
return [publicUserId, currentUserId, true, false];
} else if (direction === "public-wants-current-owns") {
return [publicUserId, currentUserId, false, true];
} else {
throw new Error(
`unexpected user pair direction: ${JSON.stringify(direction)}`,
);
}
})
.flat();
const [rows] = await db.query(
`
SET SESSION group_concat_max_len = 4096;
SELECT
public_user_hangers.user_id AS public_user_id,
current_user_hangers.user_id AS current_user_id,
IF(
public_user_hangers.owned,
"public-owns-current-wants",
"public-wants-current-owns"
) AS direction,
GROUP_CONCAT(public_user_hangers.item_id) AS item_ids
FROM closet_hangers AS public_user_hangers
INNER JOIN users AS public_users ON public_users.id = public_user_hangers.user_id
LEFT JOIN closet_lists AS public_user_lists
ON public_user_lists.id = public_user_hangers.list_id
INNER JOIN closet_hangers AS current_user_hangers
ON public_user_hangers.item_id = current_user_hangers.item_id
WHERE (
(${conditions})
AND (
-- For the public user (but not the current), the hanger must be
-- marked Trading.
(public_user_hangers.list_id IS NOT NULL AND public_user_lists.visibility >= 2)
OR (
public_user_hangers.list_id IS NULL AND public_user_hangers.owned = 1
AND public_users.owned_closet_hangers_visibility >= 2
)
OR (
public_user_hangers.list_id IS NULL AND public_user_hangers.owned = 0
AND public_users.wanted_closet_hangers_visibility >= 2
)
)
)
GROUP BY public_user_id, current_user_id;
`,
conditionValues,
);
const entities = rows.map(normalizeRow);
return userPairs.map(({ publicUserId, currentUserId, direction }) => {
const entity = entities.find(
(e) =>
e.publicUserId === publicUserId &&
e.currentUserId === currentUserId &&
e.direction === direction,
);
return entity ? entity.itemIds.split(",") : [];
});
},
{
cacheKeyFn: ({ publicUserId, currentUserId, direction }) =>
`${publicUserId}-${currentUserId}-${direction}`,
},
);
2020-04-25 03:42:05 -07:00
const loadAllPetTypes = (db) => async () => {
const [rows] = await db.execute(`SELECT species_id, color_id FROM pet_types`);
2020-04-25 03:42:05 -07:00
const entities = rows.map(normalizeRow);
return entities;
};
const buildItemLoader = (db) =>
new DataLoader(async (ids) => {
const qs = ids.map((_) => "?").join(",");
const [rows] = await db.execute(
`SELECT * FROM items WHERE id IN (${qs})`,
ids,
);
const entities = rows.map(normalizeRow);
const entitiesById = new Map(entities.map((e) => [e.id, e]));
return ids.map(
(id) =>
2020-07-31 22:11:32 -07:00
entitiesById.get(String(id)) ||
new Error(`could not find item with ID: ${id}`),
);
});
2020-04-22 11:51:36 -07:00
const buildItemTranslationLoader = (db) =>
new DataLoader(async (itemIds) => {
const qs = itemIds.map((_) => "?").join(",");
const [rows] = await db.execute(
`SELECT * FROM item_translations WHERE item_id IN (${qs}) AND locale = "en"`,
itemIds,
);
2020-04-22 11:51:36 -07:00
2020-04-23 01:08:00 -07:00
const entities = rows.map(normalizeRow);
2020-04-22 14:55:12 -07:00
const entitiesByItemId = new Map(entities.map((e) => [e.itemId, e]));
return itemIds.map(
(itemId) =>
entitiesByItemId.get(String(itemId)) ||
new Error(`could not find translation for item ${itemId}`),
);
});
2020-04-22 11:51:36 -07:00
2020-11-08 00:06:51 -08:00
const buildItemByNameLoader = (db, loaders) =>
new DataLoader(
async (names) => {
const qs = names.map((_) => "?").join(", ");
const normalizedNames = names.map((name) => name.trim().toLowerCase());
const [rows] = await db.execute(
{
// NOTE: In our MySQL schema, this is a case-insensitive exact search.
sql: `SELECT items.*, item_translations.* FROM item_translations
2020-11-08 00:06:51 -08:00
INNER JOIN items ON items.id = item_translations.item_id
WHERE name IN (${qs}) AND locale = "en"`,
nestTables: true,
},
normalizedNames,
);
const entitiesByName = new Map();
for (const row of rows) {
const item = normalizeRow(row.items);
const itemTranslation = normalizeRow(row.item_translations);
loaders.itemLoader.prime(item.id, item);
loaders.itemTranslationLoader.prime(item.id, itemTranslation);
const normalizedName = itemTranslation.name.trim().toLowerCase();
entitiesByName.set(normalizedName, { item, itemTranslation });
}
return normalizedNames.map(
(name) =>
entitiesByName.get(name) || { item: null, itemTranslation: null },
);
},
{ cacheKeyFn: (name) => name.trim().toLowerCase() },
);
2020-11-08 00:06:51 -08:00
const itemSearchKindConditions = {
// NOTE: We assume that items cannot have NC rarity and the PB description,
// so we don't bother to filter out PB items in the NC filter, for perf.
NC: `rarity_index IN (0, 500) OR is_manually_nc = 1`,
NP: `rarity_index NOT IN (0, 500) AND is_manually_nc = 0 AND description NOT LIKE "%This item is part of a deluxe paint brush set!%"`,
PB: `description LIKE "%This item is part of a deluxe paint brush set!%"`,
};
function buildItemSearchConditions({
query,
bodyId,
itemKind,
currentUserOwnsOrWants,
currentUserId,
zoneIds,
}) {
// Split the query into words, and search for each word as a substring
// of the name.
const words = query.split(/\s+/);
const wordMatchConditions = [];
const wordMatchValues = [];
for (let word of words) {
// If the word starts with `-`, remove `-` and treat the filter as negative.
const isNegative = word.startsWith("-");
if (isNegative) {
word = word.substr(1);
}
if (!word) {
continue;
}
const condition = isNegative ? "t.name NOT LIKE ?" : "t.name LIKE ?";
const matcher = "%" + word.replace(/_%/g, "\\$0") + "%";
wordMatchConditions.push(condition);
wordMatchValues.push(matcher);
}
const wordMatchCondition = wordMatchConditions.join(" AND ") || "1";
const itemKindCondition = itemSearchKindConditions[itemKind] || "1";
const bodyIdCondition = bodyId
? "(swf_assets.body_id = ? OR swf_assets.body_id = 0)"
: "1";
const bodyIdValues = bodyId ? [bodyId] : [];
const zoneIdsCondition =
zoneIds.length > 0
? `swf_assets.zone_id IN (${zoneIds.map((_) => "?").join(", ")})`
: "1";
const currentUserJoin = currentUserOwnsOrWants
? `INNER JOIN closet_hangers ch ON ch.item_id = items.id`
: "";
const currentUserCondition = currentUserOwnsOrWants
? `ch.user_id = ? AND ch.owned = ?`
: "1";
const currentUserValues = currentUserOwnsOrWants
? [currentUserId, currentUserOwnsOrWants === "OWNS" ? "1" : "0"]
: [];
const queryJoins = `
INNER JOIN item_translations t ON t.item_id = items.id
INNER JOIN parents_swf_assets rel
ON rel.parent_type = "Item" AND rel.parent_id = items.id
INNER JOIN swf_assets ON rel.swf_asset_id = swf_assets.id
${currentUserJoin}
`;
const queryConditions = `
(${wordMatchCondition}) AND (${bodyIdCondition}) AND
(${zoneIdsCondition}) AND (${itemKindCondition}) AND
(${currentUserCondition}) AND t.locale = "en"
`;
const queryConditionValues = [
...wordMatchValues,
...bodyIdValues,
...zoneIds,
...currentUserValues,
];
return { queryJoins, queryConditions, queryConditionValues };
}
const buildItemSearchNumTotalItemsLoader = (db) =>
new DataLoader(async (queries) => {
// This isn't actually optimized as a batch query, we're just using a
// DataLoader API consistency with our other loaders!
const queryPromises = queries.map(
async ({
query,
bodyId,
itemKind,
currentUserOwnsOrWants,
currentUserId,
zoneIds = [],
}) => {
const { queryJoins, queryConditions, queryConditionValues } =
buildItemSearchConditions({
query,
bodyId,
itemKind,
currentUserOwnsOrWants,
currentUserId,
zoneIds,
});
const [totalRows] = await db.execute(
`
SELECT count(DISTINCT items.id) AS numTotalItems FROM items
${queryJoins}
WHERE ${queryConditions}
`,
queryConditionValues,
);
const { numTotalItems } = totalRows[0];
return numTotalItems;
},
);
const responses = await Promise.all(queryPromises);
return responses;
});
const buildItemSearchItemsLoader = (db, loaders) =>
2020-04-24 21:17:03 -07:00
new DataLoader(async (queries) => {
// This isn't actually optimized as a batch query, we're just using a
// DataLoader API consistency with our other loaders!
const queryPromises = queries.map(
async ({
query,
bodyId,
itemKind,
currentUserOwnsOrWants,
currentUserId,
zoneIds = [],
offset,
limit,
}) => {
2020-04-25 01:55:48 -07:00
const actualOffset = offset || 0;
const actualLimit = Math.min(limit || 30, 30);
const { queryJoins, queryConditions, queryConditionValues } =
buildItemSearchConditions({
query,
bodyId,
itemKind,
currentUserOwnsOrWants,
currentUserId,
zoneIds,
});
const [rows] = await db.execute(
`
SELECT DISTINCT items.*, t.name FROM items
${queryJoins}
WHERE ${queryConditions}
ORDER BY t.name
LIMIT ? OFFSET ?
`,
[...queryConditionValues, actualLimit, actualOffset],
);
2020-04-25 01:55:48 -07:00
const entities = rows.map(normalizeRow);
for (const item of entities) {
loaders.itemLoader.prime(item.id, item);
}
return entities;
},
2020-04-25 01:55:48 -07:00
);
const responses = await Promise.all(queryPromises);
return responses;
});
2021-01-18 06:31:27 -08:00
const buildNewestItemsLoader = (db, loaders) =>
new DataLoader(async (keys) => {
// Essentially, I want to provide the loader-like API, and populate other
// loaders, even though there's only one query to run.
if (keys.length !== 1 && keys[0] !== "all-newest") {
throw new Error(
`this loader can only be loaded with the key "all-newest"`,
2021-01-18 06:31:27 -08:00
);
}
const [rows] = await db.execute(
`SELECT * FROM items ORDER BY created_at DESC LIMIT 20;`,
2021-01-18 06:31:27 -08:00
);
const entities = rows.map(normalizeRow);
for (const entity of entities) {
loaders.itemLoader.prime(entity.id, entity);
}
return [entities];
});
async function runItemModelingQuery(db, filterToItemIds) {
let itemIdsCondition;
let itemIdsValues;
if (filterToItemIds === "all") {
// For all items, we use the condition `1`, which matches everything.
itemIdsCondition = "1";
itemIdsValues = [];
} else {
// Or, to filter to certain items, we add their IDs to the WHERE clause.
const qs = filterToItemIds.map((_) => "?").join(", ");
itemIdsCondition = `(item_id IN (${qs}))`;
itemIdsValues = filterToItemIds;
}
return await db.execute(
`
SELECT T_ITEMS.item_id,
T_BODIES.color_id,
T_ITEMS.supports_vandagyre,
COUNT(*) AS modeled_species_count,
GROUP_CONCAT(
T_BODIES.species_id
ORDER BY T_BODIES.species_id
Modeling page performance fix Ok so, I kinda assumed that the query engine would only compute `all_species_ids_for_this_color` on the rows we actually returned, and it's a fast subquery so it's fine. But that was wrong! I think the query engine computing that for _every_ item, and _then_ filter out stuff with `HAVING`. Which makes sense, because the `HAVING` clause references it, so computing it makes sense! In this change, we inline the subquery, so it only gets called if the other conditions in the `HAVING` clause don't fail first. That way, it only gets run when needed, and the query runs like 2x faster (~30sec instead of ~60sec), which gets us back inside some timeouts that were triggering around 1 minute and making the page fail. However, this meant we no longer return `all_species_ids_for_this_color`, which we actually use to determine which species are _left_ to model for! So now, we have a loader that also basically runs the same query as that condition subquery. A reasonable question would be, at this point, is the `HAVING` clause a good idea? would it be simpler to do the filtering in JS? and I think it might be simpler, but I would guess noticeably worse performance, because I think we really do filter out a _lot_ of results with that `HAVING` clause—like basically all items, right? So to filter on the JS side, we'd be transferring data for all items over the wire, which… like, that's not even the worst dealbreaker, but it would certainly be noticed. This hypothesis could be wrong, but it's enough of a reason for me to not bother pursuring the refactor!
2022-10-10 20:15:16 -07:00
) AS modeled_species_ids
FROM (
-- NOTE: I found that extracting this as a separate query that runs
-- first made things WAAAY faster. Less to join/group, I guess?
SELECT DISTINCT items.id AS item_id,
swf_assets.body_id AS body_id,
-- Vandagyre was added on 2014-11-14, so we add some buffer here.
-- TODO: Some later Dyeworks items don't support Vandagyre.
-- Add a manual db flag?
items.created_at >= "2014-12-01" AS supports_vandagyre
FROM items
INNER JOIN parents_swf_assets psa ON psa.parent_type = "Item"
AND psa.parent_id = items.id
INNER JOIN swf_assets ON swf_assets.id = psa.swf_asset_id
INNER JOIN item_translations it ON it.item_id = items.id AND it.locale = "en"
WHERE items.modeling_status_hint IS NULL AND it.name NOT LIKE "%MME%"
AND ${itemIdsCondition}
ORDER BY item_id
) T_ITEMS
INNER JOIN (
SELECT DISTINCT body_id, species_id, color_id
FROM pet_types T_PT1
WHERE color_id IN (6, 8, 44, 46)
AND (
-- For non-standard colors, ignore the species that have the same
-- body ID as standard pets. Otherwise, a lot of items will be
-- like "oh, we fit the Maraquan Koi and Maraquan Mynci, where
-- are all the other Maraquans??", which is incorrect!
color_id = 8
OR
(
SELECT count(*) FROM pet_types T_PT2
WHERE T_PT1.body_id = T_PT2.body_id
AND T_PT1.color_id != T_PT2.color_id
) = 0
)
ORDER BY body_id, species_id
) T_BODIES ON T_ITEMS.body_id = T_BODIES.body_id
GROUP BY T_ITEMS.item_id, T_BODIES.color_id
HAVING NOT (
-- No species (either an All Bodies item, or a Capsule type thing)
modeled_species_count = 0
-- Single species (probably just their item)
OR modeled_species_count = 1
-- All species modeled (that are compatible with this color)
Modeling page performance fix Ok so, I kinda assumed that the query engine would only compute `all_species_ids_for_this_color` on the rows we actually returned, and it's a fast subquery so it's fine. But that was wrong! I think the query engine computing that for _every_ item, and _then_ filter out stuff with `HAVING`. Which makes sense, because the `HAVING` clause references it, so computing it makes sense! In this change, we inline the subquery, so it only gets called if the other conditions in the `HAVING` clause don't fail first. That way, it only gets run when needed, and the query runs like 2x faster (~30sec instead of ~60sec), which gets us back inside some timeouts that were triggering around 1 minute and making the page fail. However, this meant we no longer return `all_species_ids_for_this_color`, which we actually use to determine which species are _left_ to model for! So now, we have a loader that also basically runs the same query as that condition subquery. A reasonable question would be, at this point, is the `HAVING` clause a good idea? would it be simpler to do the filtering in JS? and I think it might be simpler, but I would guess noticeably worse performance, because I think we really do filter out a _lot_ of results with that `HAVING` clause—like basically all items, right? So to filter on the JS side, we'd be transferring data for all items over the wire, which… like, that's not even the worst dealbreaker, but it would certainly be noticed. This hypothesis could be wrong, but it's enough of a reason for me to not bother pursuring the refactor!
2022-10-10 20:15:16 -07:00
OR modeled_species_ids = (
SELECT GROUP_CONCAT(DISTINCT species_id ORDER BY species_id)
FROM pet_types T_PT1
WHERE color_id = T_BODIES.color_id
AND (
-- For non-standard colors, ignore the species that have the same
-- body ID as standard pets. Otherwise, a lot of items will be
-- like "oh, we fit the Maraquan Koi and Maraquan Mynci, where
-- are all the other Maraquans??", which is incorrect!
color_id = 8
OR
(
SELECT count(*) FROM pet_types T_PT2
WHERE T_PT1.body_id = T_PT2.body_id
AND T_PT1.color_id != T_PT2.color_id
) = 0
)
)
-- All species modeled except Vandagyre, for items that don't support it
OR (NOT T_ITEMS.supports_vandagyre AND modeled_species_count = 54 AND modeled_species_ids = "1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,52,53,54")
)
ORDER BY T_ITEMS.item_id
-- We limit the result set a bit, because if there's a bug or something
-- that causes too many records to return, it seems to have a tendency to
-- take up a bunch of resources and crash the site?
LIMIT 200;
`,
[...itemIdsValues],
);
}
const buildSpeciesThatNeedModelsForItemLoader = (db) =>
new DataLoader(
async (colorIdAndItemIdPairs) => {
// Get the requested item IDs, ignoring color for now. Remove duplicates.
let itemIds = colorIdAndItemIdPairs.map(({ itemId }) => itemId);
itemIds = [...new Set(itemIds)];
// Run the big modeling query, but filtered to specifically these items.
// The filter happens very early in the query, so it runs way faster than
// the full modeling query.
const [rows] = await runItemModelingQuery(db, itemIds);
const entities = rows.map(normalizeRow);
// Finally, the query returned a row for each item combined with each
// color built into the query (well, no row when no models needed!). So,
// find the right row for each color/item pair, or possibly null!
return colorIdAndItemIdPairs.map(({ colorId, itemId }) =>
entities.find((e) => e.itemId === itemId && e.colorId === colorId),
);
},
{ cacheKeyFn: ({ colorId, itemId }) => `${colorId}-${itemId}` },
);
const buildItemsThatNeedModelsLoader = (db, loaders) =>
new DataLoader(async (keys) => {
// Essentially, I want to take easy advantage of DataLoader's caching, for
// this query that can only run one way ^_^` There might be a better way to
// do this!
if (keys.length !== 1 && keys[0] !== "all") {
throw new Error(`this loader can only be loaded with the key "all"`);
}
const [rows] = await runItemModelingQuery(db, "all");
const entities = rows.map(normalizeRow);
const result = new Map();
for (const { colorId, itemId, ...entity } of entities) {
loaders.speciesThatNeedModelsForItemLoader.prime(
{ colorId, itemId },
entity,
);
if (!result.has(colorId)) {
result.set(colorId, new Map());
2020-09-15 03:06:17 -07:00
}
result.get(colorId).set(itemId, entity);
}
return [result];
});
Modeling page performance fix Ok so, I kinda assumed that the query engine would only compute `all_species_ids_for_this_color` on the rows we actually returned, and it's a fast subquery so it's fine. But that was wrong! I think the query engine computing that for _every_ item, and _then_ filter out stuff with `HAVING`. Which makes sense, because the `HAVING` clause references it, so computing it makes sense! In this change, we inline the subquery, so it only gets called if the other conditions in the `HAVING` clause don't fail first. That way, it only gets run when needed, and the query runs like 2x faster (~30sec instead of ~60sec), which gets us back inside some timeouts that were triggering around 1 minute and making the page fail. However, this meant we no longer return `all_species_ids_for_this_color`, which we actually use to determine which species are _left_ to model for! So now, we have a loader that also basically runs the same query as that condition subquery. A reasonable question would be, at this point, is the `HAVING` clause a good idea? would it be simpler to do the filtering in JS? and I think it might be simpler, but I would guess noticeably worse performance, because I think we really do filter out a _lot_ of results with that `HAVING` clause—like basically all items, right? So to filter on the JS side, we'd be transferring data for all items over the wire, which… like, that's not even the worst dealbreaker, but it would certainly be noticed. This hypothesis could be wrong, but it's enough of a reason for me to not bother pursuring the refactor!
2022-10-10 20:15:16 -07:00
const buildAllSpeciesIdsForColorLoader = (db) =>
new DataLoader(async (colorIds) => {
const qs = colorIds.map((_) => "?").join(", ");
const [rows] = await db.execute(
`
SELECT color_id,
GROUP_CONCAT(DISTINCT species_id ORDER BY species_id) AS species_ids
FROM pet_types T_PT1
WHERE color_id IN (${qs})
AND (
-- For non-standard colors, ignore the species that have the same
-- body ID as standard pets. Otherwise, a lot of items will be
-- like "oh, we fit the Maraquan Koi and Maraquan Mynci, where
-- are all the other Maraquans??", which is incorrect!
color_id = 8
OR
(
SELECT count(*) FROM pet_types T_PT2
WHERE T_PT1.body_id = T_PT2.body_id
AND T_PT1.color_id != T_PT2.color_id
) = 0
)
GROUP BY color_id;
`,
colorIds,
Modeling page performance fix Ok so, I kinda assumed that the query engine would only compute `all_species_ids_for_this_color` on the rows we actually returned, and it's a fast subquery so it's fine. But that was wrong! I think the query engine computing that for _every_ item, and _then_ filter out stuff with `HAVING`. Which makes sense, because the `HAVING` clause references it, so computing it makes sense! In this change, we inline the subquery, so it only gets called if the other conditions in the `HAVING` clause don't fail first. That way, it only gets run when needed, and the query runs like 2x faster (~30sec instead of ~60sec), which gets us back inside some timeouts that were triggering around 1 minute and making the page fail. However, this meant we no longer return `all_species_ids_for_this_color`, which we actually use to determine which species are _left_ to model for! So now, we have a loader that also basically runs the same query as that condition subquery. A reasonable question would be, at this point, is the `HAVING` clause a good idea? would it be simpler to do the filtering in JS? and I think it might be simpler, but I would guess noticeably worse performance, because I think we really do filter out a _lot_ of results with that `HAVING` clause—like basically all items, right? So to filter on the JS side, we'd be transferring data for all items over the wire, which… like, that's not even the worst dealbreaker, but it would certainly be noticed. This hypothesis could be wrong, but it's enough of a reason for me to not bother pursuring the refactor!
2022-10-10 20:15:16 -07:00
);
const entities = rows.map(normalizeRow);
return colorIds.map(
(colorId) =>
entities.find((e) => e.colorId === colorId)?.speciesIds?.split(",") ||
[],
Modeling page performance fix Ok so, I kinda assumed that the query engine would only compute `all_species_ids_for_this_color` on the rows we actually returned, and it's a fast subquery so it's fine. But that was wrong! I think the query engine computing that for _every_ item, and _then_ filter out stuff with `HAVING`. Which makes sense, because the `HAVING` clause references it, so computing it makes sense! In this change, we inline the subquery, so it only gets called if the other conditions in the `HAVING` clause don't fail first. That way, it only gets run when needed, and the query runs like 2x faster (~30sec instead of ~60sec), which gets us back inside some timeouts that were triggering around 1 minute and making the page fail. However, this meant we no longer return `all_species_ids_for_this_color`, which we actually use to determine which species are _left_ to model for! So now, we have a loader that also basically runs the same query as that condition subquery. A reasonable question would be, at this point, is the `HAVING` clause a good idea? would it be simpler to do the filtering in JS? and I think it might be simpler, but I would guess noticeably worse performance, because I think we really do filter out a _lot_ of results with that `HAVING` clause—like basically all items, right? So to filter on the JS side, we'd be transferring data for all items over the wire, which… like, that's not even the worst dealbreaker, but it would certainly be noticed. This hypothesis could be wrong, but it's enough of a reason for me to not bother pursuring the refactor!
2022-10-10 20:15:16 -07:00
);
});
const buildItemBodiesWithAppearanceDataLoader = (db) =>
new DataLoader(async (itemIds) => {
const qs = itemIds.map((_) => "?").join(",");
const [rows] = await db.execute(
// TODO: I'm not sure this ORDER BY clause will reliably get standard
// bodies to the top, it seems like it depends how DISTINCT works?
`SELECT pet_types.body_id, pet_types.species_id, items.id AS item_id
FROM items
INNER JOIN parents_swf_assets ON
items.id = parents_swf_assets.parent_id AND
parents_swf_assets.parent_type = "Item"
INNER JOIN swf_assets ON
parents_swf_assets.swf_asset_id = swf_assets.id
INNER JOIN pet_types ON
pet_types.body_id = swf_assets.body_id OR swf_assets.body_id = 0
INNER JOIN colors ON
pet_types.color_id = colors.id
WHERE items.id IN (${qs})
GROUP BY pet_types.body_id
ORDER BY
pet_types.species_id,
colors.standard DESC`,
itemIds,
);
const entities = rows.map(normalizeRow);
return itemIds.map((itemId) => entities.filter((e) => e.itemId === itemId));
});
const buildItemAllOccupiedZonesLoader = (db) =>
new DataLoader(async (itemIds) => {
const qs = itemIds.map((_) => "?").join(", ");
const [rows] = await db.execute(
`SELECT items.id, GROUP_CONCAT(DISTINCT sa.zone_id) AS zone_ids FROM items
INNER JOIN parents_swf_assets psa
ON psa.parent_type = "Item" AND psa.parent_id = items.id
INNER JOIN swf_assets sa ON sa.id = psa.swf_asset_id
WHERE items.id IN (${qs})
GROUP BY items.id;`,
itemIds,
);
const entities = rows.map(normalizeRow);
return itemIds.map((itemId) => {
const item = entities.find((e) => e.id === itemId);
if (!item) {
return [];
}
return item.zoneIds.split(",");
});
});
const buildItemCompatibleBodiesAndTheirZonesLoader = (db) =>
new DataLoader(async (itemIds) => {
const qs = itemIds.map((_) => "?").join(", ");
const [rows] = await db.query(
`
SELECT
items.id as itemId,
swf_assets.body_id AS bodyId,
(SELECT species_id FROM pet_types WHERE body_id = bodyId LIMIT 1)
AS speciesId,
GROUP_CONCAT(DISTINCT swf_assets.zone_id) AS zoneIds
FROM items
INNER JOIN parents_swf_assets ON
items.id = parents_swf_assets.parent_id AND
parents_swf_assets.parent_type = "Item"
INNER JOIN swf_assets ON
parents_swf_assets.swf_asset_id = swf_assets.id
WHERE items.id IN (${qs})
GROUP BY items.id, swf_assets.body_id
-- We have some invalid data where the asset has a body ID that
-- matches no pet type. Huh! Well, ignore those bodies!
HAVING speciesId IS NOT NULL OR bodyId = 0;
`,
itemIds,
);
const entities = rows.map(normalizeRow);
return itemIds.map((itemId) => entities.filter((e) => e.itemId === itemId));
});
2020-11-24 14:24:34 -08:00
const buildItemTradesLoader = (db, loaders) =>
new DataLoader(
async (itemIdOwnedPairs) => {
const qs = itemIdOwnedPairs
.map((_) => "(closet_hangers.item_id = ? AND closet_hangers.owned = ?)")
.join(" OR ");
const values = itemIdOwnedPairs
.map(({ itemId, isOwned }) => [itemId, isOwned])
.flat();
const [rows] = await db.execute(
2020-11-24 14:24:34 -08:00
{
sql: `
SELECT
closet_hangers.*, closet_lists.*, users.*
FROM closet_hangers
INNER JOIN users ON users.id = closet_hangers.user_id
LEFT JOIN closet_lists ON closet_lists.id = closet_hangers.list_id
WHERE (
(${qs})
AND (
(closet_hangers.list_id IS NOT NULL AND closet_lists.visibility >= 2)
OR (
closet_hangers.list_id IS NULL AND closet_hangers.owned = 1
AND users.owned_closet_hangers_visibility >= 2
)
OR (
closet_hangers.list_id IS NULL AND closet_hangers.owned = 0
AND users.wanted_closet_hangers_visibility >= 2
)
)
);
`,
nestTables: true,
},
values,
2020-11-24 14:24:34 -08:00
);
const entities = rows.map((row) => ({
closetHanger: normalizeRow(row.closet_hangers),
closetList: normalizeRow(row.closet_lists),
user: normalizeRow(row.users),
}));
for (const entity of entities) {
loaders.userLoader.prime(entity.user.id, entity.user);
loaders.closetListLoader.prime(entity.closetList.id, entity.closetList);
}
return itemIdOwnedPairs.map(({ itemId, isOwned }) =>
entities
.filter(
(e) =>
e.closetHanger.itemId === itemId &&
Boolean(e.closetHanger.owned) === isOwned,
2020-11-24 14:24:34 -08:00
)
.map((e) => ({
id: e.closetHanger.id,
closetList: e.closetList.id ? e.closetList : null,
user: e.user,
})),
2020-11-24 14:24:34 -08:00
);
},
{ cacheKeyFn: ({ itemId, isOwned }) => `${itemId}-${isOwned}` },
2020-11-24 14:24:34 -08:00
);
const buildPetTypeLoader = (db, loaders) =>
2020-06-24 19:05:07 -07:00
new DataLoader(async (petTypeIds) => {
const qs = petTypeIds.map((_) => "?").join(",");
const [rows] = await db.execute(
2020-06-24 19:05:07 -07:00
`SELECT * FROM pet_types WHERE id IN (${qs})`,
petTypeIds,
2020-06-24 19:05:07 -07:00
);
const entities = rows.map(normalizeRow);
for (const petType of entities) {
loaders.petTypeBySpeciesAndColorLoader.prime(
{ speciesId: petType.speciesId, colorId: petType.colorId },
petType,
);
}
2020-06-24 19:05:07 -07:00
return petTypeIds.map((petTypeId) =>
entities.find((e) => e.id === petTypeId),
2020-06-24 19:05:07 -07:00
);
});
const buildPetTypeBySpeciesAndColorLoader = (db, loaders) =>
new DataLoader(
async (speciesAndColorPairs) => {
const conditions = [];
const values = [];
for (const { speciesId, colorId } of speciesAndColorPairs) {
conditions.push("(species_id = ? AND color_id = ?)");
values.push(speciesId, colorId);
}
2020-04-23 01:08:00 -07:00
const [rows] = await db.execute(
`SELECT * FROM pet_types WHERE ${conditions.join(" OR ")}`,
values,
);
2020-04-23 01:08:00 -07:00
const entities = rows.map(normalizeRow);
const entitiesBySpeciesAndColorPair = new Map(
entities.map((e) => [`${e.speciesId},${e.colorId}`, e]),
);
2020-04-23 01:08:00 -07:00
for (const petType of entities) {
loaders.petTypeLoader.prime(petType.id, petType);
}
2020-06-24 19:05:07 -07:00
return speciesAndColorPairs.map(({ speciesId, colorId }) =>
entitiesBySpeciesAndColorPair.get(`${speciesId},${colorId}`),
);
},
{ cacheKeyFn: ({ speciesId, colorId }) => `${speciesId},${colorId}` },
);
2020-04-23 01:08:00 -07:00
const buildPetTypesForColorLoader = (db, loaders) =>
new DataLoader(async (colorIds) => {
const qs = colorIds.map((_) => "?").join(",");
const [rows] = await db.execute(
`SELECT * FROM pet_types WHERE color_id IN (${qs})`,
colorIds,
);
const entities = rows.map(normalizeRow);
for (const petType of entities) {
loaders.petTypeLoader.prime(petType.id, petType);
loaders.petTypeBySpeciesAndColorLoader.prime(
{ speciesId: petType.speciesId, colorId: petType.colorId },
petType,
);
}
return colorIds.map((colorId) =>
entities.filter((e) => e.colorId === colorId),
);
});
const buildAltStyleLoader = (db) =>
new DataLoader(async (altStyleIds) => {
const qs = altStyleIds.map((_) => "?").join(",");
const [rows] = await db.execute(
`SELECT * FROM alt_styles WHERE id IN (${qs})`,
altStyleIds,
);
const entities = rows.map(normalizeRow);
return altStyleIds.map((altStyleId) =>
entities.find((e) => e.id === altStyleId),
);
});
const buildSwfAssetLoader = (db) =>
new DataLoader(async (swfAssetIds) => {
const qs = swfAssetIds.map((_) => "?").join(",");
const [rows] = await db.execute(
`SELECT * FROM swf_assets WHERE id IN (${qs})`,
swfAssetIds,
);
const entities = rows.map(normalizeRow);
return swfAssetIds.map((swfAssetId) =>
entities.find((e) => e.id === swfAssetId),
);
});
const buildSwfAssetCountLoader = (db) =>
new DataLoader(
async (requests) => {
const [rows] = await db.execute(
`
SELECT count(*) AS count, type,
(manifest IS NOT NULL AND manifest != "") AS is_converted
FROM swf_assets
GROUP BY type, is_converted;
`,
);
const entities = rows.map(normalizeRow);
return requests.map(({ type, isConverted }) => {
// Find the returned rows that match this count request.
let matchingEntities = entities;
if (type != null) {
matchingEntities = matchingEntities.filter((e) => e.type === type);
}
if (isConverted != null) {
matchingEntities = matchingEntities.filter(
(e) => Boolean(e.isConverted) === isConverted,
);
}
// Add their counts together, and return the total.
return matchingEntities.map((e) => e.count).reduce((a, b) => a + b, 0);
});
},
{
cacheKeyFn: ({ type, isConverted }) => `${type},${isConverted}`,
},
);
2020-09-19 03:04:19 -07:00
const buildSwfAssetByRemoteIdLoader = (db) =>
new DataLoader(
async (typeAndRemoteIdPairs) => {
const qs = typeAndRemoteIdPairs
.map((_) => "(type = ? AND remote_id = ?)")
.join(" OR ");
const values = typeAndRemoteIdPairs
.map(({ type, remoteId }) => [type, remoteId])
.flat();
const [rows] = await db.execute(
2020-09-19 03:04:19 -07:00
`SELECT * FROM swf_assets WHERE ${qs}`,
values,
2020-09-19 03:04:19 -07:00
);
const entities = rows.map(normalizeRow);
2020-09-19 04:22:39 -07:00
return typeAndRemoteIdPairs.map(({ type, remoteId }) =>
entities.find((e) => e.type === type && e.remoteId === remoteId),
2020-09-19 03:04:19 -07:00
);
},
{ cacheKeyFn: ({ type, remoteId }) => `${type},${remoteId}` },
2020-09-19 03:04:19 -07:00
);
const buildItemSwfAssetLoader = (db, loaders) =>
new DataLoader(
async (itemAndBodyPairs) => {
const conditions = [];
const values = [];
for (const { itemId, bodyId } of itemAndBodyPairs) {
conditions.push(
"(rel.parent_id = ? AND (sa.body_id = ? OR sa.body_id = 0))",
);
values.push(itemId, bodyId);
}
2020-04-23 01:08:00 -07:00
const [rows] = await db.execute(
`SELECT sa.*, rel.parent_id FROM swf_assets sa
2020-04-23 01:08:00 -07:00
INNER JOIN parents_swf_assets rel ON
rel.parent_type = "Item" AND
rel.swf_asset_id = sa.id
WHERE ${conditions.join(" OR ")}`,
values,
);
2020-04-23 01:08:00 -07:00
const entities = rows.map(normalizeRow);
2020-04-23 01:08:00 -07:00
for (const swfAsset of entities) {
loaders.swfAssetLoader.prime(swfAsset.id, swfAsset);
}
return itemAndBodyPairs.map(({ itemId, bodyId }) =>
entities.filter(
(e) =>
e.parentId === itemId && (e.bodyId === bodyId || e.bodyId === "0"),
),
);
},
{ cacheKeyFn: ({ itemId, bodyId }) => `${itemId},${bodyId}` },
);
2020-04-23 01:08:00 -07:00
const buildPetSwfAssetLoader = (db, loaders) =>
new DataLoader(async (petStateIds) => {
const qs = petStateIds.map((_) => "?").join(",");
const [rows] = await db.execute(
`SELECT sa.*, rel.parent_id FROM swf_assets sa
INNER JOIN parents_swf_assets rel ON
rel.parent_type = "PetState" AND
rel.swf_asset_id = sa.id
WHERE rel.parent_id IN (${qs})`,
petStateIds,
);
const entities = rows.map(normalizeRow);
for (const swfAsset of entities) {
loaders.swfAssetLoader.prime(swfAsset.id, swfAsset);
}
return petStateIds.map((petStateId) =>
entities.filter((e) => e.parentId === petStateId),
);
});
const buildAltStyleSwfAssetLoader = (db, loaders) =>
new DataLoader(async (altStyleIds) => {
const qs = altStyleIds.map((_) => "?").join(",");
const [rows] = await db.execute(
`SELECT sa.*, rel.parent_id FROM swf_assets sa
INNER JOIN parents_swf_assets rel ON
rel.parent_type = "AltStyle" AND
rel.swf_asset_id = sa.id
WHERE rel.parent_id IN (${qs})`,
altStyleIds,
);
const entities = rows.map(normalizeRow);
for (const swfAsset of entities) {
loaders.swfAssetLoader.prime(swfAsset.id, swfAsset);
}
return altStyleIds.map((altStyleId) =>
entities.filter((e) => e.parentId === altStyleId),
);
});
2020-10-23 22:55:13 -07:00
const buildNeopetsConnectionLoader = (db) =>
new DataLoader(async (ids) => {
const qs = ids.map((_) => "?").join(", ");
const [rows] = await db.execute(
2020-10-23 22:55:13 -07:00
`SELECT * FROM neopets_connections WHERE id IN (${qs})`,
ids,
2020-10-23 22:55:13 -07:00
);
const entities = rows.map(normalizeRow);
return ids.map((id) => entities.find((e) => e.id === id));
});
2020-06-24 19:05:07 -07:00
const buildOutfitLoader = (db) =>
new DataLoader(async (outfitIds) => {
const qs = outfitIds.map((_) => "?").join(",");
const [rows] = await db.execute(
2020-06-24 19:05:07 -07:00
`SELECT * FROM outfits WHERE id IN (${qs})`,
outfitIds,
2020-06-24 19:05:07 -07:00
);
const entities = rows.map(normalizeRow);
return outfitIds.map((outfitId) => entities.find((e) => e.id === outfitId));
});
const buildItemOutfitRelationshipsLoader = (db) =>
new DataLoader(async (outfitIds) => {
const qs = outfitIds.map((_) => "?").join(",");
const [rows] = await db.execute(
2020-06-24 19:05:07 -07:00
`SELECT * FROM item_outfit_relationships WHERE outfit_id IN (${qs})`,
outfitIds,
2020-06-24 19:05:07 -07:00
);
const entities = rows.map(normalizeRow);
return outfitIds.map((outfitId) =>
entities.filter((e) => e.outfitId === outfitId),
2020-06-24 19:05:07 -07:00
);
});
const buildPetStateLoader = (db) =>
2020-06-24 19:05:07 -07:00
new DataLoader(async (petStateIds) => {
const qs = petStateIds.map((_) => "?").join(",");
const [rows] = await db.execute(
2020-06-24 19:05:07 -07:00
`SELECT * FROM pet_states WHERE id IN (${qs})`,
petStateIds,
2020-06-24 19:05:07 -07:00
);
const entities = rows.map(normalizeRow);
return petStateIds.map((petStateId) =>
entities.find((e) => e.id === petStateId),
2020-06-24 19:05:07 -07:00
);
});
const buildPetStatesForPetTypeLoader = (db, loaders) =>
new DataLoader(async (petTypeIds) => {
const qs = petTypeIds.map((_) => "?").join(",");
const [rows] = await db.execute(
2020-05-02 20:48:32 -07:00
`SELECT * FROM pet_states
WHERE pet_type_id IN (${qs})
ORDER BY (mood_id IS NULL) ASC, mood_id ASC, female DESC,
unconverted DESC, glitched ASC, id DESC`,
petTypeIds,
);
const entities = rows.map(normalizeRow);
2020-06-24 19:05:07 -07:00
for (const petState of entities) {
loaders.petStateLoader.prime(petState.id, petState);
}
return petTypeIds.map((petTypeId) =>
entities.filter((e) => e.petTypeId === petTypeId),
);
});
/** Given a bodyId, loads the canonical PetState to show as an example. */
const buildCanonicalPetStateForBodyLoader = (db, loaders) =>
new DataLoader(
async (requests) => {
// I don't know how to do this query in bulk, so we'll just do it in
// parallel!
return await Promise.all(
requests.map(async ({ bodyId, preferredColorId, fallbackColorId }) => {
// Randomly-ish choose which gender presentation to prefer, based on
// body ID. This makes the outcome stable, which is nice for caching
// and testing and just generally not being surprised, but sitll
// creates an even distribution.
const gender = bodyId % 2 === 0 ? "masc" : "fem";
const bodyCondition = bodyId !== "0" ? `pet_types.body_id = ?` : `1`;
const bodyValues = bodyId !== "0" ? [bodyId] : [];
const [rows] = await db.execute(
{
sql: `
SELECT pet_states.*, pet_types.* FROM pet_states
INNER JOIN pet_types ON pet_types.id = pet_states.pet_type_id
WHERE ${bodyCondition}
ORDER BY
pet_types.color_id = ? DESC, -- Prefer preferredColorId
pet_types.color_id = ? DESC, -- Prefer fallbackColorId
pet_states.mood_id = 1 DESC, -- Prefer Happy
pet_states.female = ? DESC, -- Prefer given gender
pet_states.id DESC, -- Prefer recent models (like in the app)
pet_states.glitched ASC -- Prefer not glitched (like in the app)
LIMIT 1`,
nestTables: true,
},
[
...bodyValues,
preferredColorId || "<ignore>",
fallbackColorId,
gender === "fem",
],
);
const petState = normalizeRow(rows[0].pet_states);
const petType = normalizeRow(rows[0].pet_types);
if (!petState || !petType) {
return null;
}
loaders.petStateLoader.prime(petState.id, petState);
loaders.petTypeLoader.prime(petType.id, petType);
return petState;
}),
);
},
{
cacheKeyFn: ({ bodyId, preferredColorId, fallbackColorId }) =>
`${bodyId}-${preferredColorId}-${fallbackColorId}`,
},
);
const buildPetStateByPetTypeAndAssetsLoader = (db, loaders) =>
2020-09-19 03:59:02 -07:00
new DataLoader(
async (petTypeIdAndAssetIdsPairs) => {
const qs = petTypeIdAndAssetIdsPairs
.map((_) => "(pet_type_id = ? AND swf_asset_ids = ?)")
.join(" OR ");
2020-09-19 04:22:39 -07:00
const values = petTypeIdAndAssetIdsPairs
.map(({ petTypeId, swfAssetIds }) => [petTypeId, swfAssetIds])
.flat();
const [rows] = await db.execute(
2020-09-19 03:59:02 -07:00
`SELECT * FROM pet_states WHERE ${qs}`,
values,
2020-09-19 03:59:02 -07:00
);
const entities = rows.map(normalizeRow);
for (const petState of entities) {
loaders.petStateLoader.prime(petState.id, petState);
}
2020-09-19 03:59:02 -07:00
return petTypeIdAndAssetIdsPairs.map(({ petTypeId, swfAssetIds }) =>
entities.find(
(e) => e.petTypeId === petTypeId && e.swfAssetIds === swfAssetIds,
),
2020-09-19 03:59:02 -07:00
);
},
{
cacheKeyFn: ({ petTypeId, swfAssetIds }) => `${petTypeId}-${swfAssetIds}`,
},
2020-09-19 03:59:02 -07:00
);
2020-09-04 05:57:21 -07:00
const buildUserLoader = (db) =>
new DataLoader(async (ids) => {
const qs = ids.map((_) => "?").join(",");
const [rows] = await db.execute(
2020-09-02 16:09:11 -07:00
`SELECT * FROM users WHERE id IN (${qs})`,
ids,
2020-09-02 16:09:11 -07:00
);
const entities = rows.map(normalizeRow);
const entitiesById = new Map(entities.map((e) => [e.id, e]));
return ids.map(
(id) =>
entitiesById.get(String(id)) ||
new Error(`could not find user with ID: ${id}`),
2020-09-02 16:09:11 -07:00
);
2020-09-04 05:57:21 -07:00
});
const buildUserByNameLoader = (db) =>
new DataLoader(async (names) => {
const qs = names.map((_) => "?").join(",");
const [rows] = await db.execute(
`SELECT * FROM users WHERE name IN (${qs})`,
names,
);
const entities = rows.map(normalizeRow);
return names.map((name) =>
entities.find((e) => e.name.toLowerCase() === name.toLowerCase()),
);
});
const buildUserByEmailLoader = (db) =>
new DataLoader(async (emails) => {
const qs = emails.map((_) => "?").join(",");
const [rows] = await db.execute(
{
sql: `
SELECT users.*, id_users.email FROM users
INNER JOIN openneo_id.users id_users ON id_users.id = users.remote_id
WHERE id_users.email IN (${qs})
`,
nestTables: true,
},
emails,
);
const entities = rows.map((row) => ({
user: normalizeRow(row.users),
email: row.id_users.email,
}));
return emails.map((email) => entities.find((e) => e.email === email).user);
});
2020-09-11 21:34:28 -07:00
const buildUserClosetHangersLoader = (db) =>
2020-09-04 05:57:21 -07:00
new DataLoader(async (userIds) => {
const qs = userIds.map((_) => "?").join(",");
const [rows] = await db.execute(
2020-09-04 05:57:21 -07:00
`SELECT closet_hangers.*, item_translations.name as item_name FROM closet_hangers
INNER JOIN items ON items.id = closet_hangers.item_id
INNER JOIN item_translations ON
item_translations.item_id = items.id AND locale = "en"
2020-09-11 21:34:28 -07:00
WHERE user_id IN (${qs})
2020-09-04 05:57:21 -07:00
ORDER BY item_name`,
userIds,
2020-09-04 05:57:21 -07:00
);
const entities = rows.map(normalizeRow);
return userIds.map((userId) =>
entities.filter((e) => e.userId === String(userId)),
2020-09-04 05:57:21 -07:00
);
});
2020-09-02 16:09:11 -07:00
const buildUserItemClosetHangersLoader = (db) =>
new DataLoader(async (userIdAndItemIdPairs) => {
const conditions = userIdAndItemIdPairs
.map((_) => `(user_id = ? AND item_id = ?)`)
.join(` OR `);
const params = userIdAndItemIdPairs
.map(({ userId, itemId }) => [userId, itemId])
.flat();
const [rows] = await db.execute(
`SELECT * FROM closet_hangers WHERE ${conditions};`,
params,
);
const entities = rows.map(normalizeRow);
return userIdAndItemIdPairs.map(({ userId, itemId }) =>
entities.filter((e) => e.userId === userId && e.itemId === itemId),
);
});
2020-11-24 14:24:34 -08:00
const buildUserClosetListsLoader = (db, loaders) =>
new DataLoader(async (userIds) => {
const qs = userIds.map((_) => "?").join(",");
const [rows] = await db.execute(
`SELECT * FROM closet_lists
WHERE user_id IN (${qs})
ORDER BY name`,
userIds,
);
2020-11-24 14:24:34 -08:00
const entities = rows.map(normalizeRow);
2020-11-24 14:24:34 -08:00
for (const entity of entities) {
loaders.closetListLoader.prime(entity.id, entity);
}
return userIds.map((userId) =>
entities.filter((e) => e.userId === String(userId)),
);
});
2021-01-03 23:36:00 -08:00
const buildUserOutfitsLoader = (db, loaders) =>
Paginate the user outfits page My main inspiration for doing this is actually our potentially-huge upcoming Vercel bill lol From inspecting my Honeycomb dashboard, it looks like the main offender for backend CPU time usage is outfit images. And it looks like they come in big spikes, of lots of low usage and then suddenly 1,000 requests in one minute. My suspicion is that this is from users with many saved outfits loading their outfit page, which previously would show all of them at once. We do have `loading="lazy"` set, but not all browsers support that yet, and I've had trouble pinning down the exact behavior anyway! Anyway, paginating makes for a better experience for those huge-list users anyway. We've been meaning to do it, so here we go! My hope is that this drastically decreases backend CPU hours immediately 🤞 If not, we'll need to investigate in more detail where these outfit image requests are actually coming from! Note that I added the pagination to the existing `outfits` GraphQL endpoint, rather than creating a new one. I felt comfortable doing this because it requires login anyway, so I'm confident that other clients aren't using it; and because, while this kind of thing often creates a risk of problems with frontend and backend code getting out of sync, I think someone running old frontend code will just see only their first 30 outfits (but no pagination toolbar), and get confused and refresh the page, at which point they'll see all of them. (And I actually _prefer_ that slightly confusing UX, to avoid getting more giant spikes of outfit image requests, lol :p)
2021-11-01 19:33:40 -07:00
new DataLoader(async (queries) => {
// This isn't actually optimized as a batch query, we're just using a
// DataLoader API consistency with our other loaders!
return queries.map(async ({ userId, limit, offset }) => {
const actualLimit = Math.min(limit || 30, 30);
const actualOffset = offset || 0;
const [rows] = await db.execute(
`SELECT * FROM outfits
WHERE user_id = ?
ORDER BY name
LIMIT ? OFFSET ?`,
[userId, actualLimit, actualOffset],
Paginate the user outfits page My main inspiration for doing this is actually our potentially-huge upcoming Vercel bill lol From inspecting my Honeycomb dashboard, it looks like the main offender for backend CPU time usage is outfit images. And it looks like they come in big spikes, of lots of low usage and then suddenly 1,000 requests in one minute. My suspicion is that this is from users with many saved outfits loading their outfit page, which previously would show all of them at once. We do have `loading="lazy"` set, but not all browsers support that yet, and I've had trouble pinning down the exact behavior anyway! Anyway, paginating makes for a better experience for those huge-list users anyway. We've been meaning to do it, so here we go! My hope is that this drastically decreases backend CPU hours immediately 🤞 If not, we'll need to investigate in more detail where these outfit image requests are actually coming from! Note that I added the pagination to the existing `outfits` GraphQL endpoint, rather than creating a new one. I felt comfortable doing this because it requires login anyway, so I'm confident that other clients aren't using it; and because, while this kind of thing often creates a risk of problems with frontend and backend code getting out of sync, I think someone running old frontend code will just see only their first 30 outfits (but no pagination toolbar), and get confused and refresh the page, at which point they'll see all of them. (And I actually _prefer_ that slightly confusing UX, to avoid getting more giant spikes of outfit image requests, lol :p)
2021-11-01 19:33:40 -07:00
);
const entities = rows.map(normalizeRow);
for (const entity of entities) {
loaders.outfitLoader.prime(entity.id, entity);
}
return entities;
});
});
const buildUserNumTotalOutfitsLoader = (db) =>
2021-01-03 23:36:00 -08:00
new DataLoader(async (userIds) => {
const qs = userIds.map((_) => "?").join(",");
const [rows] = await db.execute(
Paginate the user outfits page My main inspiration for doing this is actually our potentially-huge upcoming Vercel bill lol From inspecting my Honeycomb dashboard, it looks like the main offender for backend CPU time usage is outfit images. And it looks like they come in big spikes, of lots of low usage and then suddenly 1,000 requests in one minute. My suspicion is that this is from users with many saved outfits loading their outfit page, which previously would show all of them at once. We do have `loading="lazy"` set, but not all browsers support that yet, and I've had trouble pinning down the exact behavior anyway! Anyway, paginating makes for a better experience for those huge-list users anyway. We've been meaning to do it, so here we go! My hope is that this drastically decreases backend CPU hours immediately 🤞 If not, we'll need to investigate in more detail where these outfit image requests are actually coming from! Note that I added the pagination to the existing `outfits` GraphQL endpoint, rather than creating a new one. I felt comfortable doing this because it requires login anyway, so I'm confident that other clients aren't using it; and because, while this kind of thing often creates a risk of problems with frontend and backend code getting out of sync, I think someone running old frontend code will just see only their first 30 outfits (but no pagination toolbar), and get confused and refresh the page, at which point they'll see all of them. (And I actually _prefer_ that slightly confusing UX, to avoid getting more giant spikes of outfit image requests, lol :p)
2021-11-01 19:33:40 -07:00
`SELECT user_id, COUNT(*) as num_total_outfits FROM outfits
2021-01-03 23:36:00 -08:00
WHERE user_id IN (${qs})
Paginate the user outfits page My main inspiration for doing this is actually our potentially-huge upcoming Vercel bill lol From inspecting my Honeycomb dashboard, it looks like the main offender for backend CPU time usage is outfit images. And it looks like they come in big spikes, of lots of low usage and then suddenly 1,000 requests in one minute. My suspicion is that this is from users with many saved outfits loading their outfit page, which previously would show all of them at once. We do have `loading="lazy"` set, but not all browsers support that yet, and I've had trouble pinning down the exact behavior anyway! Anyway, paginating makes for a better experience for those huge-list users anyway. We've been meaning to do it, so here we go! My hope is that this drastically decreases backend CPU hours immediately 🤞 If not, we'll need to investigate in more detail where these outfit image requests are actually coming from! Note that I added the pagination to the existing `outfits` GraphQL endpoint, rather than creating a new one. I felt comfortable doing this because it requires login anyway, so I'm confident that other clients aren't using it; and because, while this kind of thing often creates a risk of problems with frontend and backend code getting out of sync, I think someone running old frontend code will just see only their first 30 outfits (but no pagination toolbar), and get confused and refresh the page, at which point they'll see all of them. (And I actually _prefer_ that slightly confusing UX, to avoid getting more giant spikes of outfit image requests, lol :p)
2021-11-01 19:33:40 -07:00
GROUP BY user_id`,
userIds,
2021-01-03 23:36:00 -08:00
);
const entities = rows.map(normalizeRow);
Paginate the user outfits page My main inspiration for doing this is actually our potentially-huge upcoming Vercel bill lol From inspecting my Honeycomb dashboard, it looks like the main offender for backend CPU time usage is outfit images. And it looks like they come in big spikes, of lots of low usage and then suddenly 1,000 requests in one minute. My suspicion is that this is from users with many saved outfits loading their outfit page, which previously would show all of them at once. We do have `loading="lazy"` set, but not all browsers support that yet, and I've had trouble pinning down the exact behavior anyway! Anyway, paginating makes for a better experience for those huge-list users anyway. We've been meaning to do it, so here we go! My hope is that this drastically decreases backend CPU hours immediately 🤞 If not, we'll need to investigate in more detail where these outfit image requests are actually coming from! Note that I added the pagination to the existing `outfits` GraphQL endpoint, rather than creating a new one. I felt comfortable doing this because it requires login anyway, so I'm confident that other clients aren't using it; and because, while this kind of thing often creates a risk of problems with frontend and backend code getting out of sync, I think someone running old frontend code will just see only their first 30 outfits (but no pagination toolbar), and get confused and refresh the page, at which point they'll see all of them. (And I actually _prefer_ that slightly confusing UX, to avoid getting more giant spikes of outfit image requests, lol :p)
2021-11-01 19:33:40 -07:00
return userIds
.map((userId) => entities.find((e) => e.userId === String(userId)))
.map((e) => (e ? e.numTotalOutfits : 0));
2021-01-03 23:36:00 -08:00
});
2020-11-24 14:43:43 -08:00
const buildUserLastTradeActivityLoader = (db) =>
new DataLoader(async (userIds) => {
const qs = userIds.map((_) => "?").join(",");
const [rows] = await db.execute(
// This query has a custom index: index_closet_hangers_for_last_trade_activity.
// It's on (user_id, owned, list_id, updated_at). The intent is that this
// will enable the query planner to find the max updated_at for each
// user/owned/list_id tuple, and then use the filter conditions later to
// remove non-Trading lists and choose the overall _Trading_ max for the
// user.
//
// I'm not 100% sure that this is exactly what the query planner does,
// but it seems _very_ happy when it has this index: the Butterfly Shower
// item had ~850 users offering it, and this brought the query from
// 10-15sec to 1-2sec. An earlier version of the index, without the
// `owned` field, and forced with `USE INDEX`, was more like 4-5 sec - so
// I'm guessing what happened there is that forcing the index forced a
// better query plan, but that it still held all the hangers, instead of
// deriving intermediate maxes. (With this better index, the query
// planner jumps at it without a hint!)
2020-11-24 14:43:43 -08:00
`
SELECT
closet_hangers.user_id AS user_id,
MAX(closet_hangers.updated_at) AS last_trade_activity
FROM closet_hangers
INNER JOIN users ON users.id = closet_hangers.user_id
LEFT JOIN closet_lists ON closet_lists.id = closet_hangers.list_id
WHERE (
closet_hangers.user_id IN (${qs})
AND (
(closet_hangers.list_id IS NOT NULL AND closet_lists.visibility >= 2)
OR (
closet_hangers.list_id IS NULL AND closet_hangers.owned = 1
AND users.owned_closet_hangers_visibility >= 2
)
OR (
closet_hangers.list_id IS NULL AND closet_hangers.owned = 0
AND users.wanted_closet_hangers_visibility >= 2
)
)
)
GROUP BY closet_hangers.user_id
`,
userIds,
2020-11-24 14:43:43 -08:00
);
const entities = rows.map(normalizeRow);
return userIds.map((userId) => {
const entity = entities.find((e) => e.userId === String(userId));
return entity ? entity.lastTradeActivity : null;
});
});
const buildZoneLoader = (db) => {
const zoneLoader = new DataLoader(async (ids) => {
const qs = ids.map((_) => "?").join(",");
const [rows] = await db.execute(
`SELECT * FROM zones WHERE id IN (${qs})`,
ids,
);
const entities = rows.map(normalizeRow);
const entitiesById = new Map(entities.map((e) => [e.id, e]));
return ids.map(
(id) =>
entitiesById.get(String(id)) ||
new Error(`could not find zone with ID: ${id}`),
);
});
zoneLoader.loadAll = async () => {
const [rows] = await db.execute(`SELECT * FROM zones`);
const entities = rows.map(normalizeRow);
for (const zone of entities) {
zoneLoader.prime(zone.id, zone);
}
return entities;
};
return zoneLoader;
};
function buildLoaders(db) {
2020-06-24 19:05:07 -07:00
const loaders = {};
loaders.loadAllPetTypes = loadAllPetTypes(db);
2020-11-24 14:24:34 -08:00
loaders.closetListLoader = buildClosetListLoader(db);
loaders.closetHangersForListLoader = buildClosetHangersForListLoader(db);
loaders.closetHangersForDefaultListLoader =
buildClosetHangersForDefaultListLoader(db);
2020-07-31 22:11:32 -07:00
loaders.colorLoader = buildColorLoader(db);
loaders.itemLoader = buildItemLoader(db);
2020-06-24 19:05:07 -07:00
loaders.itemTranslationLoader = buildItemTranslationLoader(db);
2020-11-08 00:06:51 -08:00
loaders.itemByNameLoader = buildItemByNameLoader(db, loaders);
loaders.itemSearchNumTotalItemsLoader =
buildItemSearchNumTotalItemsLoader(db);
loaders.itemSearchItemsLoader = buildItemSearchItemsLoader(db, loaders);
2021-01-18 06:31:27 -08:00
loaders.newestItemsLoader = buildNewestItemsLoader(db, loaders);
loaders.speciesThatNeedModelsForItemLoader =
buildSpeciesThatNeedModelsForItemLoader(db);
loaders.itemsThatNeedModelsLoader = buildItemsThatNeedModelsLoader(
db,
loaders,
);
Modeling page performance fix Ok so, I kinda assumed that the query engine would only compute `all_species_ids_for_this_color` on the rows we actually returned, and it's a fast subquery so it's fine. But that was wrong! I think the query engine computing that for _every_ item, and _then_ filter out stuff with `HAVING`. Which makes sense, because the `HAVING` clause references it, so computing it makes sense! In this change, we inline the subquery, so it only gets called if the other conditions in the `HAVING` clause don't fail first. That way, it only gets run when needed, and the query runs like 2x faster (~30sec instead of ~60sec), which gets us back inside some timeouts that were triggering around 1 minute and making the page fail. However, this meant we no longer return `all_species_ids_for_this_color`, which we actually use to determine which species are _left_ to model for! So now, we have a loader that also basically runs the same query as that condition subquery. A reasonable question would be, at this point, is the `HAVING` clause a good idea? would it be simpler to do the filtering in JS? and I think it might be simpler, but I would guess noticeably worse performance, because I think we really do filter out a _lot_ of results with that `HAVING` clause—like basically all items, right? So to filter on the JS side, we'd be transferring data for all items over the wire, which… like, that's not even the worst dealbreaker, but it would certainly be noticed. This hypothesis could be wrong, but it's enough of a reason for me to not bother pursuring the refactor!
2022-10-10 20:15:16 -07:00
loaders.allSpeciesIdsForColorLoader = buildAllSpeciesIdsForColorLoader(db);
loaders.itemBodiesWithAppearanceDataLoader =
buildItemBodiesWithAppearanceDataLoader(db);
loaders.itemAllOccupiedZonesLoader = buildItemAllOccupiedZonesLoader(db);
loaders.itemCompatibleBodiesAndTheirZonesLoader =
buildItemCompatibleBodiesAndTheirZonesLoader(db);
2020-11-24 14:24:34 -08:00
loaders.itemTradesLoader = buildItemTradesLoader(db, loaders);
loaders.petTypeLoader = buildPetTypeLoader(db, loaders);
2020-06-24 19:05:07 -07:00
loaders.petTypeBySpeciesAndColorLoader = buildPetTypeBySpeciesAndColorLoader(
db,
loaders,
2020-06-24 19:05:07 -07:00
);
loaders.petTypesForColorLoader = buildPetTypesForColorLoader(db, loaders);
loaders.altStyleLoader = buildAltStyleLoader(db);
loaders.swfAssetLoader = buildSwfAssetLoader(db);
loaders.swfAssetCountLoader = buildSwfAssetCountLoader(db);
2020-09-19 03:04:19 -07:00
loaders.swfAssetByRemoteIdLoader = buildSwfAssetByRemoteIdLoader(db);
loaders.itemSwfAssetLoader = buildItemSwfAssetLoader(db, loaders);
loaders.petSwfAssetLoader = buildPetSwfAssetLoader(db, loaders);
loaders.altStyleSwfAssetLoader = buildAltStyleSwfAssetLoader(db, loaders);
2020-10-23 22:55:13 -07:00
loaders.neopetsConnectionLoader = buildNeopetsConnectionLoader(db);
2020-06-24 19:05:07 -07:00
loaders.outfitLoader = buildOutfitLoader(db);
loaders.itemOutfitRelationshipsLoader =
buildItemOutfitRelationshipsLoader(db);
2020-06-24 19:05:07 -07:00
loaders.petStateLoader = buildPetStateLoader(db);
loaders.petStatesForPetTypeLoader = buildPetStatesForPetTypeLoader(
db,
loaders,
2020-06-24 19:05:07 -07:00
);
loaders.canonicalPetStateForBodyLoader = buildCanonicalPetStateForBodyLoader(
db,
loaders,
2020-09-19 03:59:02 -07:00
);
loaders.petStateByPetTypeAndAssetsLoader =
buildPetStateByPetTypeAndAssetsLoader(db, loaders);
loaders.speciesLoader = buildSpeciesLoader(db);
loaders.tradeMatchesLoader = buildTradeMatchesLoader(db);
2020-09-02 16:09:11 -07:00
loaders.userLoader = buildUserLoader(db);
loaders.userByNameLoader = buildUserByNameLoader(db);
loaders.userByEmailLoader = buildUserByEmailLoader(db);
2020-09-11 21:34:28 -07:00
loaders.userClosetHangersLoader = buildUserClosetHangersLoader(db);
loaders.userItemClosetHangersLoader = buildUserItemClosetHangersLoader(db);
2020-11-24 14:24:34 -08:00
loaders.userClosetListsLoader = buildUserClosetListsLoader(db, loaders);
Paginate the user outfits page My main inspiration for doing this is actually our potentially-huge upcoming Vercel bill lol From inspecting my Honeycomb dashboard, it looks like the main offender for backend CPU time usage is outfit images. And it looks like they come in big spikes, of lots of low usage and then suddenly 1,000 requests in one minute. My suspicion is that this is from users with many saved outfits loading their outfit page, which previously would show all of them at once. We do have `loading="lazy"` set, but not all browsers support that yet, and I've had trouble pinning down the exact behavior anyway! Anyway, paginating makes for a better experience for those huge-list users anyway. We've been meaning to do it, so here we go! My hope is that this drastically decreases backend CPU hours immediately 🤞 If not, we'll need to investigate in more detail where these outfit image requests are actually coming from! Note that I added the pagination to the existing `outfits` GraphQL endpoint, rather than creating a new one. I felt comfortable doing this because it requires login anyway, so I'm confident that other clients aren't using it; and because, while this kind of thing often creates a risk of problems with frontend and backend code getting out of sync, I think someone running old frontend code will just see only their first 30 outfits (but no pagination toolbar), and get confused and refresh the page, at which point they'll see all of them. (And I actually _prefer_ that slightly confusing UX, to avoid getting more giant spikes of outfit image requests, lol :p)
2021-11-01 19:33:40 -07:00
loaders.userNumTotalOutfitsLoader = buildUserNumTotalOutfitsLoader(db);
2021-01-03 23:36:00 -08:00
loaders.userOutfitsLoader = buildUserOutfitsLoader(db, loaders);
2020-11-24 14:43:43 -08:00
loaders.userLastTradeActivityLoader = buildUserLastTradeActivityLoader(db);
loaders.zoneLoader = buildZoneLoader(db);
2020-06-24 19:05:07 -07:00
return loaders;
}
module.exports = buildLoaders;