Save manifest load info when preloading them, too

This was a bit tricky! When I initially turned it on, running
`rails swf_assets:manifests:load` would trigger database errors of "oh
no we can't get a connection from the pool!", because too many records
were trying to concurrently save at once.

So now, we give ourselves the ability to say `save_changes: false`, and
then save them all in one batch after! That way, we're still saving by
default in the edge cases where we're downloading and saving a manifest
on the fly, but batching them in cases where we're likely to be dealing
with a lot of them!
This commit is contained in:
Emi Matchu 2024-02-25 16:02:36 -08:00
parent cc33ce1d6e
commit 2cac048158
3 changed files with 22 additions and 17 deletions

View file

@ -51,11 +51,16 @@ class SwfAsset < ApplicationRecord
end end
def manifest def manifest
raise "manifest_url is blank" if manifest_url.blank?
@manifest ||= load_manifest @manifest ||= load_manifest
end end
def load_manifest def preload_manifest(save_changes: true)
load_manifest(return_content: false, save_changes:)
end
def load_manifest(return_content: true, save_changes: true)
raise "manifest_url is blank" if manifest_url.blank?
# If we recently tried loading the manifest and got a 4xx HTTP status code # If we recently tried loading the manifest and got a 4xx HTTP status code
# (e.g. a 404, there's a surprising amount of these!), don't try again. But # (e.g. a 404, there's a surprising amount of these!), don't try again. But
# after enough time passes, if this is called again, we will! # after enough time passes, if this is called again, we will!
@ -74,22 +79,25 @@ class SwfAsset < ApplicationRecord
end end
begin begin
NeopetsMediaArchive.load_file(manifest_url) => {content:, source:} NeopetsMediaArchive.load_file(manifest_url, return_content:) =>
{content:, source:}
rescue NeopetsMediaArchive::ResponseNotOK => error rescue NeopetsMediaArchive::ResponseNotOK => error
Rails.logger.warn "Failed to load manifest for asset #{id}: " + Rails.logger.warn "Failed to load manifest for asset #{id}: " +
error.message error.message
self.manifest_loaded_at = Time.now self.manifest_loaded_at = Time.now
self.manifest_status_code = error.status self.manifest_status_code = error.status
save! save! if save_changes
return nil return nil
end end
if source == "network" || manifest_loaded_at.blank? if source == "network" || manifest_loaded_at.blank?
self.manifest_loaded_at = Time.now self.manifest_loaded_at = Time.now
self.manifest_status_code = 200 self.manifest_status_code = 200
save! save! if save_changes
end end
return nil unless return_content
begin begin
JSON.parse(content) JSON.parse(content)
rescue JSON::ParserError => error rescue JSON::ParserError => error
@ -99,11 +107,6 @@ class SwfAsset < ApplicationRecord
end end
end end
def preload_manifest
raise "manifest_url is blank" if manifest_url.blank?
NeopetsMediaArchive.preload_file(manifest_url)
end
MANIFEST_BASE_URL = Addressable::URI.parse("https://images.neopets.com") MANIFEST_BASE_URL = Addressable::URI.parse("https://images.neopets.com")
def manifest_asset_urls def manifest_asset_urls
return {} if manifest_url.nil? return {} if manifest_url.nil?
@ -306,7 +309,9 @@ class SwfAsset < ApplicationRecord
swf_assets.map do |swf_asset| swf_assets.map do |swf_asset|
semaphore.async do semaphore.async do
begin begin
swf_asset.preload_manifest # Don't save changes in this big async situation; we'll do it all
# in one batch after, to avoid too much database concurrency!
swf_asset.preload_manifest(save_changes: false)
rescue StandardError => error rescue StandardError => error
Rails.logger.error "Could not preload manifest for asset " + Rails.logger.error "Could not preload manifest for asset " +
"#{swf_asset.id} (#{swf_asset.manifest_url}): #{error.message}" "#{swf_asset.id} (#{swf_asset.manifest_url}): #{error.message}"
@ -319,6 +324,10 @@ class SwfAsset < ApplicationRecord
ensure ensure
barrier.stop # If something goes wrong, clean up all tasks. barrier.stop # If something goes wrong, clean up all tasks.
end end
SwfAsset.transaction do
swf_assets.each(&:save!)
end
end end
before_save do before_save do

View file

@ -37,7 +37,7 @@ module NeopetsMediaArchive
# we're not ready to use yet.) # we're not ready to use yet.)
if File.exist?(local_path) if File.exist?(local_path)
debug "Source file is already loaded, skipping: #{local_path}" debug "Source file is already loaded, skipping: #{local_path}"
return return {content: nil, source: "filesystem"}
end end
end end

View file

@ -56,11 +56,7 @@ namespace :swf_assets do
Sync do Sync do
saved_count = 0 saved_count = 0
swf_assets.find_in_batches(batch_size: 1000) do |swf_assets| swf_assets.find_in_batches(batch_size: 1000) do |swf_assets|
# NOTE: Loading the manifests can both write to the filesystem *and*
# to the database, because we track timestamp and status in the db!
SwfAsset.transaction do
SwfAsset.preload_manifests(swf_assets) SwfAsset.preload_manifests(swf_assets)
end
saved_count += swf_assets.size saved_count += swf_assets.size
puts "Loaded #{saved_count} of #{total_count} manifests" puts "Loaded #{saved_count} of #{total_count} manifests"
end end