From 2cac048158cdd030ed973c9d8a592f1c9399d1d1 Mon Sep 17 00:00:00 2001 From: Emi Matchu Date: Sun, 25 Feb 2024 16:02:36 -0800 Subject: [PATCH] Save manifest load info when preloading them, too This was a bit tricky! When I initially turned it on, running `rails swf_assets:manifests:load` would trigger database errors of "oh no we can't get a connection from the pool!", because too many records were trying to concurrently save at once. So now, we give ourselves the ability to say `save_changes: false`, and then save them all in one batch after! That way, we're still saving by default in the edge cases where we're downloading and saving a manifest on the fly, but batching them in cases where we're likely to be dealing with a lot of them! --- app/models/swf_asset.rb | 31 +++++++++++++++++---------- app/services/neopets_media_archive.rb | 2 +- lib/tasks/swf_assets.rake | 6 +----- 3 files changed, 22 insertions(+), 17 deletions(-) diff --git a/app/models/swf_asset.rb b/app/models/swf_asset.rb index a7bf739f..91b36ba7 100644 --- a/app/models/swf_asset.rb +++ b/app/models/swf_asset.rb @@ -51,11 +51,16 @@ class SwfAsset < ApplicationRecord end def manifest - raise "manifest_url is blank" if manifest_url.blank? @manifest ||= load_manifest end - def load_manifest + def preload_manifest(save_changes: true) + load_manifest(return_content: false, save_changes:) + end + + def load_manifest(return_content: true, save_changes: true) + raise "manifest_url is blank" if manifest_url.blank? + # If we recently tried loading the manifest and got a 4xx HTTP status code # (e.g. a 404, there's a surprising amount of these!), don't try again. But # after enough time passes, if this is called again, we will! @@ -74,22 +79,25 @@ class SwfAsset < ApplicationRecord end begin - NeopetsMediaArchive.load_file(manifest_url) => {content:, source:} + NeopetsMediaArchive.load_file(manifest_url, return_content:) => + {content:, source:} rescue NeopetsMediaArchive::ResponseNotOK => error Rails.logger.warn "Failed to load manifest for asset #{id}: " + error.message self.manifest_loaded_at = Time.now self.manifest_status_code = error.status - save! + save! if save_changes return nil end if source == "network" || manifest_loaded_at.blank? self.manifest_loaded_at = Time.now self.manifest_status_code = 200 - save! + save! if save_changes end + return nil unless return_content + begin JSON.parse(content) rescue JSON::ParserError => error @@ -99,11 +107,6 @@ class SwfAsset < ApplicationRecord end end - def preload_manifest - raise "manifest_url is blank" if manifest_url.blank? - NeopetsMediaArchive.preload_file(manifest_url) - end - MANIFEST_BASE_URL = Addressable::URI.parse("https://images.neopets.com") def manifest_asset_urls return {} if manifest_url.nil? @@ -306,7 +309,9 @@ class SwfAsset < ApplicationRecord swf_assets.map do |swf_asset| semaphore.async do begin - swf_asset.preload_manifest + # Don't save changes in this big async situation; we'll do it all + # in one batch after, to avoid too much database concurrency! + swf_asset.preload_manifest(save_changes: false) rescue StandardError => error Rails.logger.error "Could not preload manifest for asset " + "#{swf_asset.id} (#{swf_asset.manifest_url}): #{error.message}" @@ -319,6 +324,10 @@ class SwfAsset < ApplicationRecord ensure barrier.stop # If something goes wrong, clean up all tasks. end + + SwfAsset.transaction do + swf_assets.each(&:save!) + end end before_save do diff --git a/app/services/neopets_media_archive.rb b/app/services/neopets_media_archive.rb index b84e2b48..a4ac1236 100644 --- a/app/services/neopets_media_archive.rb +++ b/app/services/neopets_media_archive.rb @@ -37,7 +37,7 @@ module NeopetsMediaArchive # we're not ready to use yet.) if File.exist?(local_path) debug "Source file is already loaded, skipping: #{local_path}" - return + return {content: nil, source: "filesystem"} end end diff --git a/lib/tasks/swf_assets.rake b/lib/tasks/swf_assets.rake index d44166a1..a5bb4b66 100644 --- a/lib/tasks/swf_assets.rake +++ b/lib/tasks/swf_assets.rake @@ -56,11 +56,7 @@ namespace :swf_assets do Sync do saved_count = 0 swf_assets.find_in_batches(batch_size: 1000) do |swf_assets| - # NOTE: Loading the manifests can both write to the filesystem *and* - # to the database, because we track timestamp and status in the db! - SwfAsset.transaction do - SwfAsset.preload_manifests(swf_assets) - end + SwfAsset.preload_manifests(swf_assets) saved_count += swf_assets.size puts "Loaded #{saved_count} of #{total_count} manifests" end