Track when manifest was last loaded, and what status it returned

Now we're *really* duplicating with Impress 2020's system lol, but I
need a way to not keep trying to load manifests that are actually 404,
which are surprisingly plentiful!

This doesn't actually stop us from loading anything yet, it just tracks
the timestamps and the HTTP status! But next I'll add logic to skip
when it was 4xx recently.
This commit is contained in:
Emi Matchu 2024-02-25 15:35:04 -08:00
parent 067cee2d41
commit a684c915a9
5 changed files with 54 additions and 15 deletions

View file

@ -52,7 +52,34 @@ class SwfAsset < ApplicationRecord
def manifest def manifest
raise "manifest_url is blank" if manifest_url.blank? raise "manifest_url is blank" if manifest_url.blank?
@manifest ||= NeopetsMediaArchive.load_json(manifest_url) @manifest ||= load_manifest
end
def load_manifest
begin
NeopetsMediaArchive.load_file(manifest_url) => {content:, source:}
rescue NeopetsMediaArchive::ResponseNotOK => error
Rails.logger.warn "Failed to load manifest for asset #{id}: " +
error.message
self.manifest_loaded_at = DateTime.now
self.manifest_status_code = error.status
save!
return nil
end
if source == "network" || manifest_loaded_at.blank?
self.manifest_loaded_at = DateTime.now
self.manifest_status_code = 200
save!
end
begin
JSON.parse(content)
rescue JSON::ParserError => error
Rails.logger.warn "Failed to parse manifest for asset #{id}: " +
error.message
return nil
end
end end
def preload_manifest def preload_manifest

View file

@ -17,11 +17,6 @@ module NeopetsMediaArchive
ROOT_PATH = Pathname.new(Rails.configuration.neopets_media_archive_root) ROOT_PATH = Pathname.new(Rails.configuration.neopets_media_archive_root)
# Load the file from the given `images.neopets.com` URI, as JSON.
def self.load_json(uri)
JSON.parse(load_file(uri))
end
# Load the file from the given `images.neopets.com` URI. # Load the file from the given `images.neopets.com` URI.
def self.load_file(uri, return_content: true) def self.load_file(uri, return_content: true)
local_path = local_file_path(uri) local_path = local_file_path(uri)
@ -31,7 +26,7 @@ module NeopetsMediaArchive
begin begin
content = File.read(local_path) content = File.read(local_path)
debug "Loaded source file from filesystem: #{local_path}" debug "Loaded source file from filesystem: #{local_path}"
return content return {content: content, source: "filesystem"}
rescue Errno::ENOENT rescue Errno::ENOENT
# If it doesn't exist, that's fine: just move on and download it. # If it doesn't exist, that's fine: just move on and download it.
end end
@ -53,7 +48,7 @@ module NeopetsMediaArchive
File.write(local_path, content) File.write(local_path, content)
info "Wrote source file to filesystem: #{local_path}" info "Wrote source file to filesystem: #{local_path}"
return_content ? content : nil {content: return_content ? content : nil, source: "network"}
end end
# Load the file from the given `images.neopets.com` URI, but don't return its # Load the file from the given `images.neopets.com` URI, but don't return its
@ -78,10 +73,9 @@ module NeopetsMediaArchive
# requests in parallel! # requests in parallel!
Sync do Sync do
response = INTERNET.get(uri) response = INTERNET.get(uri)
if response.status == 404 if response.status != 200
raise NotFound, "origin server returned 404: #{uri}" raise ResponseNotOK.new(response.status),
elsif response.status != 200 "expected status 200 but got #{response.status} (#{uri})"
raise "expected status 200 but got #{response.status} (#{uri})"
end end
response.body.read response.body.read
end end
@ -106,7 +100,13 @@ module NeopetsMediaArchive
ROOT_PATH + path_within_archive(uri) ROOT_PATH + path_within_archive(uri)
end end
class NotFound < StandardError; end class ResponseNotOK < StandardError
attr_reader :status
def initialize(status)
super
@status = status
end
end
private private

View file

@ -0,0 +1,6 @@
class AddManifestLoadedAtAndManifestStatusCodeToSwfAssets < ActiveRecord::Migration[7.1]
def change
add_column :swf_assets, :manifest_loaded_at, :datetime
add_column :swf_assets, :manifest_status_code, :integer
end
end

View file

@ -10,7 +10,7 @@
# #
# It's strongly recommended that you check this file into your version control system. # It's strongly recommended that you check this file into your version control system.
ActiveRecord::Schema[7.1].define(version: 2024_02_21_005949) do ActiveRecord::Schema[7.1].define(version: 2024_02_25_231346) do
create_table "alt_styles", charset: "utf8mb4", collation: "utf8mb4_unicode_ci", force: :cascade do |t| create_table "alt_styles", charset: "utf8mb4", collation: "utf8mb4_unicode_ci", force: :cascade do |t|
t.integer "species_id", null: false t.integer "species_id", null: false
t.integer "color_id", null: false t.integer "color_id", null: false
@ -245,6 +245,8 @@ ActiveRecord::Schema[7.1].define(version: 2024_02_21_005949) do
t.timestamp "manifest_cached_at" t.timestamp "manifest_cached_at"
t.string "known_glitches", limit: 128, default: "" t.string "known_glitches", limit: 128, default: ""
t.string "manifest_url" t.string "manifest_url"
t.datetime "manifest_loaded_at"
t.integer "manifest_status_code"
t.index ["body_id"], name: "swf_assets_body_id_and_object_id" t.index ["body_id"], name: "swf_assets_body_id_and_object_id"
t.index ["type", "remote_id"], name: "swf_assets_type_and_id" t.index ["type", "remote_id"], name: "swf_assets_type_and_id"
t.index ["zone_id"], name: "idx_swf_assets_zone_id" t.index ["zone_id"], name: "idx_swf_assets_zone_id"

View file

@ -56,7 +56,11 @@ namespace :swf_assets do
Sync do Sync do
saved_count = 0 saved_count = 0
swf_assets.find_in_batches(batch_size: 1000) do |swf_assets| swf_assets.find_in_batches(batch_size: 1000) do |swf_assets|
# NOTE: Loading the manifests can both write to the filesystem *and*
# to the database, because we track timestamp and status in the db!
SwfAsset.transaction do
SwfAsset.preload_manifests(swf_assets) SwfAsset.preload_manifests(swf_assets)
end
saved_count += swf_assets.size saved_count += swf_assets.size
puts "Loaded #{saved_count} of #{total_count} manifests" puts "Loaded #{saved_count} of #{total_count} manifests"
end end