Preload many manifests concurrently for the Alt Styles page

I'm gonna also use this for a task to try to warm up *all* the
manifests in the database! But to start, just a simple one, to prepare
the alt styles page quickly on first run. (This doesn't really matter
in production now that I've already visited the page once, but it helps
when resetting things in dev, and I think more it's about establishing
the pattern!)
This commit is contained in:
Emi Matchu 2024-02-23 13:45:12 -08:00
parent f6cece9a59
commit 9a3b33ea2f
3 changed files with 54 additions and 11 deletions

View file

@ -8,6 +8,10 @@ class AltStylesController < ApplicationController
@alt_styles = @alt_styles.merge(@species.alt_styles)
end
# We're going to link to the HTML5 image URL, so make sure we have all the
# manifests ready!
SwfAsset.preload_manifests @alt_styles.map(&:swf_assets).flatten
respond_to do |format|
format.html { render }
format.json {

View file

@ -1,3 +1,6 @@
require 'async'
require 'async/barrier'
require 'async/semaphore'
require 'fileutils'
require 'uri'
@ -117,6 +120,10 @@ class SwfAsset < ApplicationRecord
NeopetsMediaArchive.load_json(manifest_url)
end
def preload_manifest
NeopetsMediaArchive.preload_file(manifest_url)
end
MANIFEST_BASE_URL = Addressable::URI.parse("https://images.neopets.com")
def manifest_asset_urls
return {} if manifest_url.nil?
@ -229,6 +236,31 @@ class SwfAsset < ApplicationRecord
))
end
# Given a list of SWF assets, ensure all of their manifests are loaded, with
# fast concurrent execution!
def self.preload_manifests(swf_assets)
# Blocks all tasks beneath it.
barrier = Async::Barrier.new
Sync do
# Only allow 10 manifests to be loaded at a time.
semaphore = Async::Semaphore.new(10, parent: barrier)
# Load all the manifests in async tasks. This will load them 10 at a time
# rather than all at once (because of the semaphore), and the
# NeopetsMediaArchive will share a pool of persistent connections for
# them.
swf_assets.map do |swf_asset|
semaphore.async { swf_asset.preload_manifest }
end
# Wait until all tasks are done.
barrier.wait
ensure
barrier.stop # If something goes wrong, clean up all tasks.
end
end
before_save do
# If an asset body ID changes, that means more than one body ID has been
# linked to it, meaning that it's probably wearable by all bodies.

View file

@ -1,5 +1,5 @@
require "addressable/uri"
require "httparty"
require "async/http/internet/instance"
require "json"
# The Neopets Media Archive is a service that mirrors images.neopets.com files
@ -11,8 +11,9 @@ require "json"
# long-term archive, not dependent on their services having 100% uptime in
# order for us to operate. We never discard old files, we just keep going!
module NeopetsMediaArchive
include HTTParty
base_uri "https://images.neopets.com/"
# Share a pool of persistent connections, rather than reconnecting on
# each request. (This library does that automatically!)
INTERNET = Async::HTTP::Internet.instance
ROOT_PATH = Pathname.new(Rails.configuration.neopets_media_archive_root)
@ -46,9 +47,8 @@ module NeopetsMediaArchive
end
# Download the file from the origin, then save a copy for next time.
response = load_file_from_origin(uri)
content = load_file_from_origin(uri)
info "Loaded source file from origin: #{uri}"
content = response.body
local_path.dirname.mkpath
File.write(local_path, content)
info "Wrote source file to filesystem: #{local_path}"
@ -71,13 +71,20 @@ module NeopetsMediaArchive
"https://images.neopets.com, but got #{uri}"
end
response = get(uri)
if response.code == 404
# By running this request in a `Sync` block, we make this method look
# synchronous to the caller—but if run in the context of an async task, it
# will pause execution and move onto other work until the request is done.
# We use this in the `swf_assets:manifests:load` task to perform many
# requests in parallel!
Sync do
response = INTERNET.get(uri)
if response.status == 404
raise NotFound, "origin server returned 404: #{uri}"
elsif response.code != 200
raise "expected status 200 but got #{response.code} (#{uri})"
elsif response.status != 200
raise "expected status 200 but got #{response.status} (#{uri})"
end
response.body.read
end
response
end
def self.path_within_archive(uri)