Create swf_assets:manifests:load task to save all manifest files
Doing that sweet, sweet backfill!! It's not exactly *fast*, since there's about 570k records to work through, but it's pretty good all things considered! Thanks, surprisingly-reusable async code!
This commit is contained in:
parent
9a3b33ea2f
commit
992954ce89
2 changed files with 98 additions and 60 deletions
|
@ -117,10 +117,12 @@ class SwfAsset < ApplicationRecord
|
|||
end
|
||||
|
||||
def manifest
|
||||
raise "manifest_url is blank" if manifest_url.blank?
|
||||
NeopetsMediaArchive.load_json(manifest_url)
|
||||
end
|
||||
|
||||
def preload_manifest
|
||||
raise "manifest_url is blank" if manifest_url.blank?
|
||||
NeopetsMediaArchive.preload_file(manifest_url)
|
||||
end
|
||||
|
||||
|
@ -251,7 +253,14 @@ class SwfAsset < ApplicationRecord
|
|||
# NeopetsMediaArchive will share a pool of persistent connections for
|
||||
# them.
|
||||
swf_assets.map do |swf_asset|
|
||||
semaphore.async { swf_asset.preload_manifest }
|
||||
semaphore.async do
|
||||
begin
|
||||
swf_asset.preload_manifest
|
||||
rescue StandardError => error
|
||||
Rails.logger.error "Could not preload manifest for asset " +
|
||||
"#{swf_asset.id} (#{swf_asset.manifest_url}): #{error.message}"
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
# Wait until all tasks are done.
|
||||
|
|
|
@ -35,15 +35,43 @@ namespace :swf_assets do
|
|||
end
|
||||
end
|
||||
|
||||
namespace :manifests do
|
||||
desc "Save all known manifests to the Neopets Media Archive"
|
||||
task load: [:environment] do
|
||||
# Log errors to STDOUT, but we don't need the info messages about
|
||||
# successful saves.
|
||||
Rails.logger = Logger.new(STDOUT, level: :error)
|
||||
|
||||
# Find all the manifests with known URLs. (We don't have a database
|
||||
# filter for "do we already have the manifest downloaded", but that's
|
||||
# okay, the preload method will quickly check that for us!)
|
||||
swf_assets = SwfAsset.where.not(manifest_url: nil)
|
||||
total_count = swf_assets.count
|
||||
puts "Found #{total_count} assets with manifests"
|
||||
|
||||
# For each batch of 1000 assets, load their manifests concurrently.
|
||||
# Wrap everything in a top-level sync, so keyboard interrupts will
|
||||
# propagate all the way up to here, instead of just cancelling the
|
||||
# current batch.
|
||||
Sync do
|
||||
saved_count = 0
|
||||
swf_assets.find_in_batches(batch_size: 1000) do |swf_assets|
|
||||
SwfAsset.preload_manifests(swf_assets)
|
||||
saved_count += swf_assets.size
|
||||
puts "Loaded #{saved_count} of #{total_count} manifests"
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
desc "Backfill manifest_url for SwfAsset models"
|
||||
task manifests: [:environment] do
|
||||
task urls: [:environment] do
|
||||
timeout = ENV.fetch("TIMEOUT", "5").to_i
|
||||
|
||||
assets = SwfAsset.where(manifest_url: nil)
|
||||
count = assets.count
|
||||
puts "Found #{count} assets without manifests"
|
||||
|
||||
Async do
|
||||
Sync do
|
||||
# Share a pool of persistent connections, rather than reconnecting on
|
||||
# each request. (This library does that automatically!)
|
||||
internet = Async::HTTP::Internet.instance
|
||||
|
@ -109,6 +137,7 @@ namespace :swf_assets do
|
|||
end
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
SWF_URL_PATTERN = %r{^(?:https?:)?//images\.neopets\.com/cp/(bio|items)/swf/(.+?)_([a-z0-9]+)\.swf$}
|
||||
def infer_manifest_url(swf_url, internet)
|
||||
|
|
Loading…
Reference in a new issue