Track when manifest was last loaded, and what status it returned
Now we're *really* duplicating with Impress 2020's system lol, but I need a way to not keep trying to load manifests that are actually 404, which are surprisingly plentiful! This doesn't actually stop us from loading anything yet, it just tracks the timestamps and the HTTP status! But next I'll add logic to skip when it was 4xx recently.
This commit is contained in:
parent
067cee2d41
commit
a684c915a9
5 changed files with 54 additions and 15 deletions
|
@ -52,7 +52,34 @@ class SwfAsset < ApplicationRecord
|
||||||
|
|
||||||
def manifest
|
def manifest
|
||||||
raise "manifest_url is blank" if manifest_url.blank?
|
raise "manifest_url is blank" if manifest_url.blank?
|
||||||
@manifest ||= NeopetsMediaArchive.load_json(manifest_url)
|
@manifest ||= load_manifest
|
||||||
|
end
|
||||||
|
|
||||||
|
def load_manifest
|
||||||
|
begin
|
||||||
|
NeopetsMediaArchive.load_file(manifest_url) => {content:, source:}
|
||||||
|
rescue NeopetsMediaArchive::ResponseNotOK => error
|
||||||
|
Rails.logger.warn "Failed to load manifest for asset #{id}: " +
|
||||||
|
error.message
|
||||||
|
self.manifest_loaded_at = DateTime.now
|
||||||
|
self.manifest_status_code = error.status
|
||||||
|
save!
|
||||||
|
return nil
|
||||||
|
end
|
||||||
|
|
||||||
|
if source == "network" || manifest_loaded_at.blank?
|
||||||
|
self.manifest_loaded_at = DateTime.now
|
||||||
|
self.manifest_status_code = 200
|
||||||
|
save!
|
||||||
|
end
|
||||||
|
|
||||||
|
begin
|
||||||
|
JSON.parse(content)
|
||||||
|
rescue JSON::ParserError => error
|
||||||
|
Rails.logger.warn "Failed to parse manifest for asset #{id}: " +
|
||||||
|
error.message
|
||||||
|
return nil
|
||||||
|
end
|
||||||
end
|
end
|
||||||
|
|
||||||
def preload_manifest
|
def preload_manifest
|
||||||
|
|
|
@ -17,11 +17,6 @@ module NeopetsMediaArchive
|
||||||
|
|
||||||
ROOT_PATH = Pathname.new(Rails.configuration.neopets_media_archive_root)
|
ROOT_PATH = Pathname.new(Rails.configuration.neopets_media_archive_root)
|
||||||
|
|
||||||
# Load the file from the given `images.neopets.com` URI, as JSON.
|
|
||||||
def self.load_json(uri)
|
|
||||||
JSON.parse(load_file(uri))
|
|
||||||
end
|
|
||||||
|
|
||||||
# Load the file from the given `images.neopets.com` URI.
|
# Load the file from the given `images.neopets.com` URI.
|
||||||
def self.load_file(uri, return_content: true)
|
def self.load_file(uri, return_content: true)
|
||||||
local_path = local_file_path(uri)
|
local_path = local_file_path(uri)
|
||||||
|
@ -31,7 +26,7 @@ module NeopetsMediaArchive
|
||||||
begin
|
begin
|
||||||
content = File.read(local_path)
|
content = File.read(local_path)
|
||||||
debug "Loaded source file from filesystem: #{local_path}"
|
debug "Loaded source file from filesystem: #{local_path}"
|
||||||
return content
|
return {content: content, source: "filesystem"}
|
||||||
rescue Errno::ENOENT
|
rescue Errno::ENOENT
|
||||||
# If it doesn't exist, that's fine: just move on and download it.
|
# If it doesn't exist, that's fine: just move on and download it.
|
||||||
end
|
end
|
||||||
|
@ -53,7 +48,7 @@ module NeopetsMediaArchive
|
||||||
File.write(local_path, content)
|
File.write(local_path, content)
|
||||||
info "Wrote source file to filesystem: #{local_path}"
|
info "Wrote source file to filesystem: #{local_path}"
|
||||||
|
|
||||||
return_content ? content : nil
|
{content: return_content ? content : nil, source: "network"}
|
||||||
end
|
end
|
||||||
|
|
||||||
# Load the file from the given `images.neopets.com` URI, but don't return its
|
# Load the file from the given `images.neopets.com` URI, but don't return its
|
||||||
|
@ -78,10 +73,9 @@ module NeopetsMediaArchive
|
||||||
# requests in parallel!
|
# requests in parallel!
|
||||||
Sync do
|
Sync do
|
||||||
response = INTERNET.get(uri)
|
response = INTERNET.get(uri)
|
||||||
if response.status == 404
|
if response.status != 200
|
||||||
raise NotFound, "origin server returned 404: #{uri}"
|
raise ResponseNotOK.new(response.status),
|
||||||
elsif response.status != 200
|
"expected status 200 but got #{response.status} (#{uri})"
|
||||||
raise "expected status 200 but got #{response.status} (#{uri})"
|
|
||||||
end
|
end
|
||||||
response.body.read
|
response.body.read
|
||||||
end
|
end
|
||||||
|
@ -106,7 +100,13 @@ module NeopetsMediaArchive
|
||||||
ROOT_PATH + path_within_archive(uri)
|
ROOT_PATH + path_within_archive(uri)
|
||||||
end
|
end
|
||||||
|
|
||||||
class NotFound < StandardError; end
|
class ResponseNotOK < StandardError
|
||||||
|
attr_reader :status
|
||||||
|
def initialize(status)
|
||||||
|
super
|
||||||
|
@status = status
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
private
|
private
|
||||||
|
|
||||||
|
|
|
@ -0,0 +1,6 @@
|
||||||
|
class AddManifestLoadedAtAndManifestStatusCodeToSwfAssets < ActiveRecord::Migration[7.1]
|
||||||
|
def change
|
||||||
|
add_column :swf_assets, :manifest_loaded_at, :datetime
|
||||||
|
add_column :swf_assets, :manifest_status_code, :integer
|
||||||
|
end
|
||||||
|
end
|
|
@ -10,7 +10,7 @@
|
||||||
#
|
#
|
||||||
# It's strongly recommended that you check this file into your version control system.
|
# It's strongly recommended that you check this file into your version control system.
|
||||||
|
|
||||||
ActiveRecord::Schema[7.1].define(version: 2024_02_21_005949) do
|
ActiveRecord::Schema[7.1].define(version: 2024_02_25_231346) do
|
||||||
create_table "alt_styles", charset: "utf8mb4", collation: "utf8mb4_unicode_ci", force: :cascade do |t|
|
create_table "alt_styles", charset: "utf8mb4", collation: "utf8mb4_unicode_ci", force: :cascade do |t|
|
||||||
t.integer "species_id", null: false
|
t.integer "species_id", null: false
|
||||||
t.integer "color_id", null: false
|
t.integer "color_id", null: false
|
||||||
|
@ -245,6 +245,8 @@ ActiveRecord::Schema[7.1].define(version: 2024_02_21_005949) do
|
||||||
t.timestamp "manifest_cached_at"
|
t.timestamp "manifest_cached_at"
|
||||||
t.string "known_glitches", limit: 128, default: ""
|
t.string "known_glitches", limit: 128, default: ""
|
||||||
t.string "manifest_url"
|
t.string "manifest_url"
|
||||||
|
t.datetime "manifest_loaded_at"
|
||||||
|
t.integer "manifest_status_code"
|
||||||
t.index ["body_id"], name: "swf_assets_body_id_and_object_id"
|
t.index ["body_id"], name: "swf_assets_body_id_and_object_id"
|
||||||
t.index ["type", "remote_id"], name: "swf_assets_type_and_id"
|
t.index ["type", "remote_id"], name: "swf_assets_type_and_id"
|
||||||
t.index ["zone_id"], name: "idx_swf_assets_zone_id"
|
t.index ["zone_id"], name: "idx_swf_assets_zone_id"
|
||||||
|
|
|
@ -56,7 +56,11 @@ namespace :swf_assets do
|
||||||
Sync do
|
Sync do
|
||||||
saved_count = 0
|
saved_count = 0
|
||||||
swf_assets.find_in_batches(batch_size: 1000) do |swf_assets|
|
swf_assets.find_in_batches(batch_size: 1000) do |swf_assets|
|
||||||
|
# NOTE: Loading the manifests can both write to the filesystem *and*
|
||||||
|
# to the database, because we track timestamp and status in the db!
|
||||||
|
SwfAsset.transaction do
|
||||||
SwfAsset.preload_manifests(swf_assets)
|
SwfAsset.preload_manifests(swf_assets)
|
||||||
|
end
|
||||||
saved_count += swf_assets.size
|
saved_count += swf_assets.size
|
||||||
puts "Loaded #{saved_count} of #{total_count} manifests"
|
puts "Loaded #{saved_count} of #{total_count} manifests"
|
||||||
end
|
end
|
||||||
|
|
Loading…
Reference in a new issue