Compare commits

..

2 commits

Author SHA1 Message Date
26add4577c Use cached fields for item searches, instead of big joins
This is the second part of the previous change `efda6d74`, in which we
switch out the item search query conditions!

This was a two-parter to ease deployment: first deploy the change with
the migration, then *run* the migration (because it's an unusually slow
one), then deploy this change that actually uses it.

Another way to approach this would've been to deploy it all in one
commit, but not set it as `current` until we had run the migration.
That would have been a reasonable approach too!
2024-09-30 23:16:03 -07:00
efda6d74ab Add cached fields to Item model for searching, but don't use them yet
This is the first part of a change to improve search performance, by
caching occupied zone IDs and supported body IDs onto the Item record
itself, instead of always doing joins with `SwfAsset`.

It's unfortunate, because part of the power of SQL is joins! But doing
joins with big tables, in ways that can't take advantage of indexes in
the same ways as we often want to, is… slow.

It's possible there's something I'm misunderstanding about SQL
optimization, and this _could_ be done with query optimization or
indexes instead of duplicating data like this? This complexity carries
the risk of data getting out of sync in unforeseen ways. But this is
what I know how to do, and it seems to be working, so! Okay!
2024-09-30 23:10:44 -07:00
4 changed files with 51 additions and 59 deletions

View file

@ -11,10 +11,10 @@ class Item < ApplicationRecord
SwfAssetType = 'object' SwfAssetType = 'object'
has_many :closet_hangers has_many :closet_hangers
has_one :contribution, :as => :contributed, :inverse_of => :contributed has_one :contribution, as: :contributed, inverse_of: :contributed
has_one :nc_mall_record has_one :nc_mall_record
has_many :parent_swf_asset_relationships, :as => :parent has_many :parent_swf_asset_relationships, as: :parent
has_many :swf_assets, :through => :parent_swf_asset_relationships has_many :swf_assets, through: :parent_swf_asset_relationships
belongs_to :dyeworks_base_item, class_name: "Item", belongs_to :dyeworks_base_item, class_name: "Item",
default: -> { inferred_dyeworks_base_item }, optional: true default: -> { inferred_dyeworks_base_item }, optional: true
has_many :dyeworks_variants, class_name: "Item", has_many :dyeworks_variants, class_name: "Item",
@ -61,38 +61,18 @@ class Item < ApplicationRecord
'%' + sanitize_sql_like(PAINTBRUSH_SET_DESCRIPTION) + '%') '%' + sanitize_sql_like(PAINTBRUSH_SET_DESCRIPTION) + '%')
} }
scope :occupies, ->(zone_label) { scope :occupies, ->(zone_label) {
zone_ids = Zone.matching_label(zone_label).map(&:id) Zone.matching_label(zone_label).
map { |z| occupies_zone_id(z.id) }.reduce(none, &:or)
# NOTE: In searches, this query performs much better using a subquery
# instead of joins! This is because, in the joins case, filtering by an
# `swf_assets` field but sorting by an `items` field causes the query
# planner to only be able to use an index for *one* of them. In this case,
# MySQL can use the `swf_assets`.`zone_id` index to get the item IDs for
# the subquery, then use the `items`.`name` index to sort them.
i = arel_table
psa = ParentSwfAssetRelationship.arel_table
sa = SwfAsset.arel_table
where(
ParentSwfAssetRelationship.joins(:swf_asset).
where(sa[:zone_id].in(zone_ids)).
where(psa[:parent_type].eq("Item")).
where(psa[:parent_id].eq(i[:id])).
arel.exists
)
} }
scope :not_occupies, ->(zone_label) { scope :not_occupies, ->(zone_label) {
zone_ids = Zone.matching_label(zone_label).map(&:id) Zone.matching_label(zone_label).
i = Item.arel_table map { |z| not_occupies_zone_id(z.id) }.reduce(all, &:and)
sa = SwfAsset.arel_table }
# Querying for "has NO swf_assets matching these zone IDs" is trickier than scope :occupies_zone_id, ->(zone_id) {
# the positive case! To do it, we GROUP_CONCAT the zone_ids together for where("FIND_IN_SET(?, cached_occupied_zone_ids) > 0", zone_id)
# each item, then use FIND_IN_SET to search the result for each zone ID, }
# and assert that it must not find a match. (This is uhh, not exactly fast, scope :not_occupies_zone_id, ->(zone_id) {
# so it helps to have other tighter conditions applied first!) where.not("FIND_IN_SET(?, cached_occupied_zone_ids) > 0", zone_id)
# TODO: I feel like this could also be solved with a LEFT JOIN, idk if that
# performs any better? In Rails 5+ `left_outer_joins` is built in so!
condition = zone_ids.map { 'FIND_IN_SET(?, GROUP_CONCAT(zone_id)) = 0' }.join(' AND ')
joins(:swf_assets).group(i[:id]).having(condition, *zone_ids).distinct
} }
scope :restricts, ->(zone_label) { scope :restricts, ->(zone_label) {
zone_ids = Zone.matching_label(zone_label).map(&:id) zone_ids = Zone.matching_label(zone_label).map(&:id)
@ -105,31 +85,12 @@ class Item < ApplicationRecord
where("NOT (#{condition})", *zone_ids) where("NOT (#{condition})", *zone_ids)
} }
scope :fits, ->(body_id) { scope :fits, ->(body_id) {
joins(:swf_assets).where(swf_assets: {body_id: [body_id, 0]}).distinct where("FIND_IN_SET(?, cached_compatible_body_ids) > 0", body_id).
or(where("FIND_IN_SET('0', cached_compatible_body_ids) > 0"))
} }
scope :not_fits, ->(body_id) { scope :not_fits, ->(body_id) {
i = Item.arel_table where.not("FIND_IN_SET(?, cached_compatible_body_ids) > 0", body_id).
sa = SwfAsset.arel_table and(where.not("FIND_IN_SET('0', cached_compatible_body_ids) > 0"))
# Querying for "has NO swf_assets matching these body IDs" is trickier than
# the positive case! To do it, we GROUP_CONCAT the body_ids together for
# each item, then use FIND_IN_SET to search the result for the body ID,
# and assert that it must not find a match. (This is uhh, not exactly fast,
# so it helps to have other tighter conditions applied first!)
#
# TODO: I feel like this could also be solved with a LEFT JOIN, idk if that
# performs any better? In Rails 5+ `left_outer_joins` is built in so!
#
# NOTE: The `fits` and `not_fits` counts don't perfectly add up to the
# total number of items, 5 items aren't accounted for? I'm not going to
# bother looking into this, but one thing I notice is items with no assets
# somehow would not match either scope in this impl (but LEFT JOIN would!)
joins(:swf_assets).group(i[:id]).
having(
"FIND_IN_SET(?, GROUP_CONCAT(body_id)) = 0 AND " +
"FIND_IN_SET(0, GROUP_CONCAT(body_id)) = 0",
body_id
).
distinct
} }
def nc_trade_value def nc_trade_value
@ -296,6 +257,12 @@ class Item < ApplicationRecord
restricted_zones + occupied_zones restricted_zones + occupied_zones
end end
def update_cached_fields
self.cached_occupied_zone_ids = occupied_zone_ids.sort.join(",")
self.cached_compatible_body_ids = compatible_body_ids.sort.join(",")
self.save!
end
def species_support_ids def species_support_ids
@species_support_ids_array ||= read_attribute('species_support_ids').split(',').map(&:to_i) rescue nil @species_support_ids_array ||= read_attribute('species_support_ids').split(',').map(&:to_i) rescue nil
end end

View file

@ -5,6 +5,9 @@ class ParentSwfAssetRelationship < ApplicationRecord
belongs_to :swf_asset belongs_to :swf_asset
after_save :update_parent_cached_fields
after_destroy :update_parent_cached_fields
def item=(replacement) def item=(replacement)
self.parent = replacement self.parent = replacement
end end
@ -16,4 +19,8 @@ class ParentSwfAssetRelationship < ApplicationRecord
def pet_state=(replacement) def pet_state=(replacement)
self.parent = replacement self.parent = replacement
end end
def update_parent_cached_fields
parent.try(:update_cached_fields)
end
end end

View file

@ -0,0 +1,16 @@
class AddCachedFieldsToItems < ActiveRecord::Migration[7.2]
def change
add_column :items, :cached_occupied_zone_ids, :string, null: false, default: ""
add_column :items, :cached_compatible_body_ids, :text, null: false, default: ""
reversible do |direction|
direction.up do
puts "Updating cached item fields for all items…"
Item.includes(:swf_assets).find_in_batches.with_index do |items, batch|
puts "Updating item batch ##{batch+1}"
items.each(&:update_cached_fields)
end
end
end
end
end

View file

@ -10,7 +10,7 @@
# #
# It's strongly recommended that you check this file into your version control system. # It's strongly recommended that you check this file into your version control system.
ActiveRecord::Schema[7.2].define(version: 2024_09_28_022359) do ActiveRecord::Schema[7.2].define(version: 2024_10_01_052510) do
create_table "alt_styles", charset: "utf8mb4", collation: "utf8mb4_unicode_520_ci", force: :cascade do |t| create_table "alt_styles", charset: "utf8mb4", collation: "utf8mb4_unicode_520_ci", force: :cascade do |t|
t.integer "species_id", null: false t.integer "species_id", null: false
t.integer "color_id", null: false t.integer "color_id", null: false
@ -137,6 +137,8 @@ ActiveRecord::Schema[7.2].define(version: 2024_09_28_022359) do
t.text "description", size: :medium, null: false t.text "description", size: :medium, null: false
t.string "rarity", default: "", null: false t.string "rarity", default: "", null: false
t.integer "dyeworks_base_item_id" t.integer "dyeworks_base_item_id"
t.string "cached_occupied_zone_ids", default: "", null: false
t.text "cached_compatible_body_ids", default: "", null: false
t.index ["dyeworks_base_item_id"], name: "index_items_on_dyeworks_base_item_id" t.index ["dyeworks_base_item_id"], name: "index_items_on_dyeworks_base_item_id"
t.index ["modeling_status_hint", "created_at", "id"], name: "items_modeling_status_hint_and_created_at_and_id" t.index ["modeling_status_hint", "created_at", "id"], name: "items_modeling_status_hint_and_created_at_and_id"
t.index ["modeling_status_hint", "created_at"], name: "items_modeling_status_hint_and_created_at" t.index ["modeling_status_hint", "created_at"], name: "items_modeling_status_hint_and_created_at"
@ -154,7 +156,7 @@ ActiveRecord::Schema[7.2].define(version: 2024_09_28_022359) do
end end
create_table "modeling_logs", id: :integer, charset: "utf8mb4", collation: "utf8mb4_unicode_520_ci", force: :cascade do |t| create_table "modeling_logs", id: :integer, charset: "utf8mb4", collation: "utf8mb4_unicode_520_ci", force: :cascade do |t|
t.datetime "created_at", precision: nil, default: -> { "CURRENT_TIMESTAMP" }, null: false t.datetime "created_at", precision: nil, default: -> { "current_timestamp()" }, null: false
t.text "log_json", size: :long, null: false t.text "log_json", size: :long, null: false
t.string "pet_name", limit: 128, null: false t.string "pet_name", limit: 128, null: false
end end