From a684c915a9c6e7a27567dae5eb39cf0bd3eb2570 Mon Sep 17 00:00:00 2001 From: Emi Matchu Date: Sun, 25 Feb 2024 15:35:04 -0800 Subject: [PATCH] Track when manifest was last loaded, and what status it returned Now we're *really* duplicating with Impress 2020's system lol, but I need a way to not keep trying to load manifests that are actually 404, which are surprisingly plentiful! This doesn't actually stop us from loading anything yet, it just tracks the timestamps and the HTTP status! But next I'll add logic to skip when it was 4xx recently. --- app/models/swf_asset.rb | 29 ++++++++++++++++++- app/services/neopets_media_archive.rb | 24 +++++++-------- ..._and_manifest_status_code_to_swf_assets.rb | 6 ++++ db/schema.rb | 4 ++- lib/tasks/swf_assets.rake | 6 +++- 5 files changed, 54 insertions(+), 15 deletions(-) create mode 100644 db/migrate/20240225231346_add_manifest_loaded_at_and_manifest_status_code_to_swf_assets.rb diff --git a/app/models/swf_asset.rb b/app/models/swf_asset.rb index 858303b3..dcde8273 100644 --- a/app/models/swf_asset.rb +++ b/app/models/swf_asset.rb @@ -52,7 +52,34 @@ class SwfAsset < ApplicationRecord def manifest raise "manifest_url is blank" if manifest_url.blank? - @manifest ||= NeopetsMediaArchive.load_json(manifest_url) + @manifest ||= load_manifest + end + + def load_manifest + begin + NeopetsMediaArchive.load_file(manifest_url) => {content:, source:} + rescue NeopetsMediaArchive::ResponseNotOK => error + Rails.logger.warn "Failed to load manifest for asset #{id}: " + + error.message + self.manifest_loaded_at = DateTime.now + self.manifest_status_code = error.status + save! + return nil + end + + if source == "network" || manifest_loaded_at.blank? + self.manifest_loaded_at = DateTime.now + self.manifest_status_code = 200 + save! + end + + begin + JSON.parse(content) + rescue JSON::ParserError => error + Rails.logger.warn "Failed to parse manifest for asset #{id}: " + + error.message + return nil + end end def preload_manifest diff --git a/app/services/neopets_media_archive.rb b/app/services/neopets_media_archive.rb index 1b3ff17d..b84e2b48 100644 --- a/app/services/neopets_media_archive.rb +++ b/app/services/neopets_media_archive.rb @@ -17,11 +17,6 @@ module NeopetsMediaArchive ROOT_PATH = Pathname.new(Rails.configuration.neopets_media_archive_root) - # Load the file from the given `images.neopets.com` URI, as JSON. - def self.load_json(uri) - JSON.parse(load_file(uri)) - end - # Load the file from the given `images.neopets.com` URI. def self.load_file(uri, return_content: true) local_path = local_file_path(uri) @@ -31,7 +26,7 @@ module NeopetsMediaArchive begin content = File.read(local_path) debug "Loaded source file from filesystem: #{local_path}" - return content + return {content: content, source: "filesystem"} rescue Errno::ENOENT # If it doesn't exist, that's fine: just move on and download it. end @@ -53,7 +48,7 @@ module NeopetsMediaArchive File.write(local_path, content) info "Wrote source file to filesystem: #{local_path}" - return_content ? content : nil + {content: return_content ? content : nil, source: "network"} end # Load the file from the given `images.neopets.com` URI, but don't return its @@ -78,10 +73,9 @@ module NeopetsMediaArchive # requests in parallel! Sync do response = INTERNET.get(uri) - if response.status == 404 - raise NotFound, "origin server returned 404: #{uri}" - elsif response.status != 200 - raise "expected status 200 but got #{response.status} (#{uri})" + if response.status != 200 + raise ResponseNotOK.new(response.status), + "expected status 200 but got #{response.status} (#{uri})" end response.body.read end @@ -106,7 +100,13 @@ module NeopetsMediaArchive ROOT_PATH + path_within_archive(uri) end - class NotFound < StandardError; end + class ResponseNotOK < StandardError + attr_reader :status + def initialize(status) + super + @status = status + end + end private diff --git a/db/migrate/20240225231346_add_manifest_loaded_at_and_manifest_status_code_to_swf_assets.rb b/db/migrate/20240225231346_add_manifest_loaded_at_and_manifest_status_code_to_swf_assets.rb new file mode 100644 index 00000000..728834fe --- /dev/null +++ b/db/migrate/20240225231346_add_manifest_loaded_at_and_manifest_status_code_to_swf_assets.rb @@ -0,0 +1,6 @@ +class AddManifestLoadedAtAndManifestStatusCodeToSwfAssets < ActiveRecord::Migration[7.1] + def change + add_column :swf_assets, :manifest_loaded_at, :datetime + add_column :swf_assets, :manifest_status_code, :integer + end +end diff --git a/db/schema.rb b/db/schema.rb index 5f7ba0d8..cab8166e 100644 --- a/db/schema.rb +++ b/db/schema.rb @@ -10,7 +10,7 @@ # # It's strongly recommended that you check this file into your version control system. -ActiveRecord::Schema[7.1].define(version: 2024_02_21_005949) do +ActiveRecord::Schema[7.1].define(version: 2024_02_25_231346) do create_table "alt_styles", charset: "utf8mb4", collation: "utf8mb4_unicode_ci", force: :cascade do |t| t.integer "species_id", null: false t.integer "color_id", null: false @@ -245,6 +245,8 @@ ActiveRecord::Schema[7.1].define(version: 2024_02_21_005949) do t.timestamp "manifest_cached_at" t.string "known_glitches", limit: 128, default: "" t.string "manifest_url" + t.datetime "manifest_loaded_at" + t.integer "manifest_status_code" t.index ["body_id"], name: "swf_assets_body_id_and_object_id" t.index ["type", "remote_id"], name: "swf_assets_type_and_id" t.index ["zone_id"], name: "idx_swf_assets_zone_id" diff --git a/lib/tasks/swf_assets.rake b/lib/tasks/swf_assets.rake index a5bb4b66..d44166a1 100644 --- a/lib/tasks/swf_assets.rake +++ b/lib/tasks/swf_assets.rake @@ -56,7 +56,11 @@ namespace :swf_assets do Sync do saved_count = 0 swf_assets.find_in_batches(batch_size: 1000) do |swf_assets| - SwfAsset.preload_manifests(swf_assets) + # NOTE: Loading the manifests can both write to the filesystem *and* + # to the database, because we track timestamp and status in the db! + SwfAsset.transaction do + SwfAsset.preload_manifests(swf_assets) + end saved_count += swf_assets.size puts "Loaded #{saved_count} of #{total_count} manifests" end