item searches by word, not phrase

Been bothered by this for a while!

My hope is that this isn't a notable marginal performance hit—we were already walking the table and doing string ops anyway, I can't imagine adding to that is actually that much of a marginal lift, when the main bottleneck was probably reads. And the perf should be identical for simple single-word queries anyway. But we'll see how it feels!
This commit is contained in:
Emi Matchu 2020-09-01 17:35:41 -07:00
parent 900f055b1b
commit bf465d802e
2 changed files with 136 additions and 10 deletions

View file

@ -152,14 +152,22 @@ const buildItemSearchLoader = (db, loaders) =>
// This isn't actually optimized as a batch query, we're just using a // This isn't actually optimized as a batch query, we're just using a
// DataLoader API consistency with our other loaders! // DataLoader API consistency with our other loaders!
const queryPromises = queries.map(async (query) => { const queryPromises = queries.map(async (query) => {
const queryForMysql = "%" + query.replace(/_%/g, "\\$0") + "%"; // Split the query into words, and search for each word as a substring
// of the name.
const words = query.split(/\s+/);
const wordMatchersForMysql = words.map(
(word) => "%" + word.replace(/_%/g, "\\$0") + "%"
);
const matcherPlaceholders = words
.map((_) => "t.name LIKE ?")
.join(" AND ");
const [rows, _] = await db.execute( const [rows, _] = await db.execute(
`SELECT items.*, t.name FROM items `SELECT items.*, t.name FROM items
INNER JOIN item_translations t ON t.item_id = items.id INNER JOIN item_translations t ON t.item_id = items.id
WHERE t.name LIKE ? AND t.locale="en" WHERE ${matcherPlaceholders} AND t.locale="en"
ORDER BY t.name ORDER BY t.name
LIMIT 30`, LIMIT 30`,
[queryForMysql] [...wordMatchersForMysql]
); );
const entities = rows.map(normalizeRow); const entities = rows.map(normalizeRow);
@ -185,18 +193,24 @@ const buildItemSearchToFitLoader = (db, loaders) =>
const actualOffset = offset || 0; const actualOffset = offset || 0;
const actualLimit = Math.min(limit || 30, 30); const actualLimit = Math.min(limit || 30, 30);
const queryForMysql = "%" + query.replace(/_%/g, "\\$0") + "%"; const words = query.split(/\s+/);
const wordMatchersForMysql = words.map(
(word) => "%" + word.replace(/_%/g, "\\$0") + "%"
);
const matcherPlaceholders = words
.map((_) => "t.name LIKE ?")
.join(" AND ");
const [rows, _] = await db.execute( const [rows, _] = await db.execute(
`SELECT DISTINCT items.*, t.name FROM items `SELECT DISTINCT items.*, t.name FROM items
INNER JOIN item_translations t ON t.item_id = items.id INNER JOIN item_translations t ON t.item_id = items.id
INNER JOIN parents_swf_assets rel INNER JOIN parents_swf_assets rel
ON rel.parent_type = "Item" AND rel.parent_id = items.id ON rel.parent_type = "Item" AND rel.parent_id = items.id
INNER JOIN swf_assets ON rel.swf_asset_id = swf_assets.id INNER JOIN swf_assets ON rel.swf_asset_id = swf_assets.id
WHERE t.name LIKE ? AND t.locale="en" AND WHERE ${matcherPlaceholders} AND t.locale="en" AND
(swf_assets.body_id = ? OR swf_assets.body_id = 0) (swf_assets.body_id = ? OR swf_assets.body_id = 0)
ORDER BY t.name ORDER BY t.name
LIMIT ? OFFSET ?`, LIMIT ? OFFSET ?`,
[queryForMysql, bodyId, actualLimit, actualOffset] [...wordMatchersForMysql, bodyId, actualLimit, actualOffset]
); );
const entities = rows.map(normalizeRow); const entities = rows.map(normalizeRow);

View file

@ -24,11 +24,58 @@ describe("ItemSearch", () => {
Array [ Array [
"SELECT items.*, t.name FROM items "SELECT items.*, t.name FROM items
INNER JOIN item_translations t ON t.item_id = items.id INNER JOIN item_translations t ON t.item_id = items.id
WHERE t.name LIKE ? AND t.locale=\\"en\\" WHERE t.name LIKE ? AND t.name LIKE ? AND t.locale=\\"en\\"
ORDER BY t.name ORDER BY t.name
LIMIT 30", LIMIT 30",
Array [ Array [
"%Neopian Times%", "%Neopian%",
"%Times%",
],
],
]
`);
});
it("searches for each word separately", async () => {
const res = await query({
query: gql`
query {
itemSearch(query: "Tarla Workshop") {
query
items {
id
name
}
}
}
`,
});
expect(res).toHaveNoErrors();
expect(res.data).toMatchInlineSnapshot(`
Object {
"itemSearch": Object {
"items": Array [
Object {
"id": "50377",
"name": "Tarlas Underground Workshop Background",
},
],
"query": "Tarla Workshop",
},
}
`);
expect(getDbCalls()).toMatchInlineSnapshot(`
Array [
Array [
"SELECT items.*, t.name FROM items
INNER JOIN item_translations t ON t.item_id = items.id
WHERE t.name LIKE ? AND t.name LIKE ? AND t.locale=\\"en\\"
ORDER BY t.name
LIMIT 30",
Array [
"%Tarla%",
"%Workshop%",
], ],
], ],
] ]
@ -71,12 +118,77 @@ describe("ItemSearch", () => {
INNER JOIN parents_swf_assets rel INNER JOIN parents_swf_assets rel
ON rel.parent_type = \\"Item\\" AND rel.parent_id = items.id ON rel.parent_type = \\"Item\\" AND rel.parent_id = items.id
INNER JOIN swf_assets ON rel.swf_asset_id = swf_assets.id INNER JOIN swf_assets ON rel.swf_asset_id = swf_assets.id
WHERE t.name LIKE ? AND t.locale=\\"en\\" AND WHERE t.name LIKE ? AND t.name LIKE ? AND t.locale=\\"en\\" AND
(swf_assets.body_id = ? OR swf_assets.body_id = 0) (swf_assets.body_id = ? OR swf_assets.body_id = 0)
ORDER BY t.name ORDER BY t.name
LIMIT ? OFFSET ?", LIMIT ? OFFSET ?",
Array [ Array [
"%Neopian Times%", "%Neopian%",
"%Times%",
"180",
30,
0,
],
],
]
`);
});
it("searches for each word separately (fit mode)", async () => {
const res = await query({
query: gql`
query {
itemSearchToFit(
query: "Tarla Workshop"
speciesId: "54"
colorId: "75"
) {
query
items {
id
name
}
}
}
`,
});
expect(res).toHaveNoErrors();
expect(res.data).toMatchInlineSnapshot(`
Object {
"itemSearchToFit": Object {
"items": Array [
Object {
"id": "50377",
"name": "Tarlas Underground Workshop Background",
},
],
"query": "Tarla Workshop",
},
}
`);
expect(getDbCalls()).toMatchInlineSnapshot(`
Array [
Array [
"SELECT * FROM pet_types WHERE (species_id = ? AND color_id = ?)",
Array [
"54",
"75",
],
],
Array [
"SELECT DISTINCT items.*, t.name FROM items
INNER JOIN item_translations t ON t.item_id = items.id
INNER JOIN parents_swf_assets rel
ON rel.parent_type = \\"Item\\" AND rel.parent_id = items.id
INNER JOIN swf_assets ON rel.swf_asset_id = swf_assets.id
WHERE t.name LIKE ? AND t.name LIKE ? AND t.locale=\\"en\\" AND
(swf_assets.body_id = ? OR swf_assets.body_id = 0)
ORDER BY t.name
LIMIT ? OFFSET ?",
Array [
"%Tarla%",
"%Workshop%",
"180", "180",
30, 30,
0, 0,