Track when manifest was last loaded, and what status it returned

Now we're *really* duplicating with Impress 2020's system lol, but I
need a way to not keep trying to load manifests that are actually 404,
which are surprisingly plentiful!

This doesn't actually stop us from loading anything yet, it just tracks
the timestamps and the HTTP status! But next I'll add logic to skip
when it was 4xx recently.
This commit is contained in:
Emi Matchu 2024-02-25 15:35:04 -08:00
parent 067cee2d41
commit a684c915a9
5 changed files with 54 additions and 15 deletions

View file

@ -52,7 +52,34 @@ class SwfAsset < ApplicationRecord
def manifest
raise "manifest_url is blank" if manifest_url.blank?
@manifest ||= NeopetsMediaArchive.load_json(manifest_url)
@manifest ||= load_manifest
end
def load_manifest
begin
NeopetsMediaArchive.load_file(manifest_url) => {content:, source:}
rescue NeopetsMediaArchive::ResponseNotOK => error
Rails.logger.warn "Failed to load manifest for asset #{id}: " +
error.message
self.manifest_loaded_at = DateTime.now
self.manifest_status_code = error.status
save!
return nil
end
if source == "network" || manifest_loaded_at.blank?
self.manifest_loaded_at = DateTime.now
self.manifest_status_code = 200
save!
end
begin
JSON.parse(content)
rescue JSON::ParserError => error
Rails.logger.warn "Failed to parse manifest for asset #{id}: " +
error.message
return nil
end
end
def preload_manifest

View file

@ -17,11 +17,6 @@ module NeopetsMediaArchive
ROOT_PATH = Pathname.new(Rails.configuration.neopets_media_archive_root)
# Load the file from the given `images.neopets.com` URI, as JSON.
def self.load_json(uri)
JSON.parse(load_file(uri))
end
# Load the file from the given `images.neopets.com` URI.
def self.load_file(uri, return_content: true)
local_path = local_file_path(uri)
@ -31,7 +26,7 @@ module NeopetsMediaArchive
begin
content = File.read(local_path)
debug "Loaded source file from filesystem: #{local_path}"
return content
return {content: content, source: "filesystem"}
rescue Errno::ENOENT
# If it doesn't exist, that's fine: just move on and download it.
end
@ -53,7 +48,7 @@ module NeopetsMediaArchive
File.write(local_path, content)
info "Wrote source file to filesystem: #{local_path}"
return_content ? content : nil
{content: return_content ? content : nil, source: "network"}
end
# Load the file from the given `images.neopets.com` URI, but don't return its
@ -78,10 +73,9 @@ module NeopetsMediaArchive
# requests in parallel!
Sync do
response = INTERNET.get(uri)
if response.status == 404
raise NotFound, "origin server returned 404: #{uri}"
elsif response.status != 200
raise "expected status 200 but got #{response.status} (#{uri})"
if response.status != 200
raise ResponseNotOK.new(response.status),
"expected status 200 but got #{response.status} (#{uri})"
end
response.body.read
end
@ -106,7 +100,13 @@ module NeopetsMediaArchive
ROOT_PATH + path_within_archive(uri)
end
class NotFound < StandardError; end
class ResponseNotOK < StandardError
attr_reader :status
def initialize(status)
super
@status = status
end
end
private

View file

@ -0,0 +1,6 @@
class AddManifestLoadedAtAndManifestStatusCodeToSwfAssets < ActiveRecord::Migration[7.1]
def change
add_column :swf_assets, :manifest_loaded_at, :datetime
add_column :swf_assets, :manifest_status_code, :integer
end
end

View file

@ -10,7 +10,7 @@
#
# It's strongly recommended that you check this file into your version control system.
ActiveRecord::Schema[7.1].define(version: 2024_02_21_005949) do
ActiveRecord::Schema[7.1].define(version: 2024_02_25_231346) do
create_table "alt_styles", charset: "utf8mb4", collation: "utf8mb4_unicode_ci", force: :cascade do |t|
t.integer "species_id", null: false
t.integer "color_id", null: false
@ -245,6 +245,8 @@ ActiveRecord::Schema[7.1].define(version: 2024_02_21_005949) do
t.timestamp "manifest_cached_at"
t.string "known_glitches", limit: 128, default: ""
t.string "manifest_url"
t.datetime "manifest_loaded_at"
t.integer "manifest_status_code"
t.index ["body_id"], name: "swf_assets_body_id_and_object_id"
t.index ["type", "remote_id"], name: "swf_assets_type_and_id"
t.index ["zone_id"], name: "idx_swf_assets_zone_id"

View file

@ -56,7 +56,11 @@ namespace :swf_assets do
Sync do
saved_count = 0
swf_assets.find_in_batches(batch_size: 1000) do |swf_assets|
SwfAsset.preload_manifests(swf_assets)
# NOTE: Loading the manifests can both write to the filesystem *and*
# to the database, because we track timestamp and status in the db!
SwfAsset.transaction do
SwfAsset.preload_manifests(swf_assets)
end
saved_count += swf_assets.size
puts "Loaded #{saved_count} of #{total_count} manifests"
end