Preload many manifests concurrently for the Alt Styles page

I'm gonna also use this for a task to try to warm up *all* the
manifests in the database! But to start, just a simple one, to prepare
the alt styles page quickly on first run. (This doesn't really matter
in production now that I've already visited the page once, but it helps
when resetting things in dev, and I think more it's about establishing
the pattern!)
This commit is contained in:
Emi Matchu 2024-02-23 13:45:12 -08:00
parent f6cece9a59
commit 9a3b33ea2f
3 changed files with 54 additions and 11 deletions

View file

@ -8,6 +8,10 @@ class AltStylesController < ApplicationController
@alt_styles = @alt_styles.merge(@species.alt_styles) @alt_styles = @alt_styles.merge(@species.alt_styles)
end end
# We're going to link to the HTML5 image URL, so make sure we have all the
# manifests ready!
SwfAsset.preload_manifests @alt_styles.map(&:swf_assets).flatten
respond_to do |format| respond_to do |format|
format.html { render } format.html { render }
format.json { format.json {

View file

@ -1,3 +1,6 @@
require 'async'
require 'async/barrier'
require 'async/semaphore'
require 'fileutils' require 'fileutils'
require 'uri' require 'uri'
@ -117,6 +120,10 @@ class SwfAsset < ApplicationRecord
NeopetsMediaArchive.load_json(manifest_url) NeopetsMediaArchive.load_json(manifest_url)
end end
def preload_manifest
NeopetsMediaArchive.preload_file(manifest_url)
end
MANIFEST_BASE_URL = Addressable::URI.parse("https://images.neopets.com") MANIFEST_BASE_URL = Addressable::URI.parse("https://images.neopets.com")
def manifest_asset_urls def manifest_asset_urls
return {} if manifest_url.nil? return {} if manifest_url.nil?
@ -229,6 +236,31 @@ class SwfAsset < ApplicationRecord
)) ))
end end
# Given a list of SWF assets, ensure all of their manifests are loaded, with
# fast concurrent execution!
def self.preload_manifests(swf_assets)
# Blocks all tasks beneath it.
barrier = Async::Barrier.new
Sync do
# Only allow 10 manifests to be loaded at a time.
semaphore = Async::Semaphore.new(10, parent: barrier)
# Load all the manifests in async tasks. This will load them 10 at a time
# rather than all at once (because of the semaphore), and the
# NeopetsMediaArchive will share a pool of persistent connections for
# them.
swf_assets.map do |swf_asset|
semaphore.async { swf_asset.preload_manifest }
end
# Wait until all tasks are done.
barrier.wait
ensure
barrier.stop # If something goes wrong, clean up all tasks.
end
end
before_save do before_save do
# If an asset body ID changes, that means more than one body ID has been # If an asset body ID changes, that means more than one body ID has been
# linked to it, meaning that it's probably wearable by all bodies. # linked to it, meaning that it's probably wearable by all bodies.

View file

@ -1,5 +1,5 @@
require "addressable/uri" require "addressable/uri"
require "httparty" require "async/http/internet/instance"
require "json" require "json"
# The Neopets Media Archive is a service that mirrors images.neopets.com files # The Neopets Media Archive is a service that mirrors images.neopets.com files
@ -11,8 +11,9 @@ require "json"
# long-term archive, not dependent on their services having 100% uptime in # long-term archive, not dependent on their services having 100% uptime in
# order for us to operate. We never discard old files, we just keep going! # order for us to operate. We never discard old files, we just keep going!
module NeopetsMediaArchive module NeopetsMediaArchive
include HTTParty # Share a pool of persistent connections, rather than reconnecting on
base_uri "https://images.neopets.com/" # each request. (This library does that automatically!)
INTERNET = Async::HTTP::Internet.instance
ROOT_PATH = Pathname.new(Rails.configuration.neopets_media_archive_root) ROOT_PATH = Pathname.new(Rails.configuration.neopets_media_archive_root)
@ -46,9 +47,8 @@ module NeopetsMediaArchive
end end
# Download the file from the origin, then save a copy for next time. # Download the file from the origin, then save a copy for next time.
response = load_file_from_origin(uri) content = load_file_from_origin(uri)
info "Loaded source file from origin: #{uri}" info "Loaded source file from origin: #{uri}"
content = response.body
local_path.dirname.mkpath local_path.dirname.mkpath
File.write(local_path, content) File.write(local_path, content)
info "Wrote source file to filesystem: #{local_path}" info "Wrote source file to filesystem: #{local_path}"
@ -71,13 +71,20 @@ module NeopetsMediaArchive
"https://images.neopets.com, but got #{uri}" "https://images.neopets.com, but got #{uri}"
end end
response = get(uri) # By running this request in a `Sync` block, we make this method look
if response.code == 404 # synchronous to the caller—but if run in the context of an async task, it
# will pause execution and move onto other work until the request is done.
# We use this in the `swf_assets:manifests:load` task to perform many
# requests in parallel!
Sync do
response = INTERNET.get(uri)
if response.status == 404
raise NotFound, "origin server returned 404: #{uri}" raise NotFound, "origin server returned 404: #{uri}"
elsif response.code != 200 elsif response.status != 200
raise "expected status 200 but got #{response.code} (#{uri})" raise "expected status 200 but got #{response.status} (#{uri})"
end
response.body.read
end end
response
end end
def self.path_within_archive(uri) def self.path_within_archive(uri)