Emi Matchu
86205c5e44
Instead of using `Async::HTTP::Internet` directly, and always applying our `User-Agent` header manually, let's build a helper for it!
116 lines
4.2 KiB
Ruby
116 lines
4.2 KiB
Ruby
require "addressable/uri"
|
|
require "json"
|
|
|
|
# The Neopets Media Archive is a service that mirrors images.neopets.com files
|
|
# locally. You can request a file from it, and we'll serve it from disk if we
|
|
# have it, or request and save it if not.
|
|
#
|
|
# This is a bit different than a cache, because the intention is not just
|
|
# optimization but that we *want* to be saving images.neopets.com as a
|
|
# long-term archive, not dependent on their services having 100% uptime in
|
|
# order for us to operate. We never discard old files, we just keep going!
|
|
module NeopetsMediaArchive
|
|
ROOT_PATH = Pathname.new(Rails.configuration.neopets_media_archive_root)
|
|
|
|
# Load the file from the given `images.neopets.com` URI.
|
|
def self.load_file(uri, return_content: true)
|
|
local_path = local_file_path(uri)
|
|
|
|
# Read the file locally if we have it.
|
|
if return_content
|
|
begin
|
|
content = File.read(local_path)
|
|
debug "Loaded source file from filesystem: #{local_path}"
|
|
return {content: content, source: "filesystem"}
|
|
rescue Errno::ENOENT
|
|
# If it doesn't exist, that's fine: just move on and download it.
|
|
end
|
|
else
|
|
# When we don't need the content, "loading" the file is just ensuring
|
|
# it exists. If it doesn't, we'll move on and load it from source.
|
|
# (We use this when preloading files, to save the cost of reading files
|
|
# we're not ready to use yet.)
|
|
if File.exist?(local_path)
|
|
debug "Source file is already loaded, skipping: #{local_path}"
|
|
return {content: nil, source: "filesystem"}
|
|
end
|
|
end
|
|
|
|
# Download the file from the origin, then save a copy for next time.
|
|
content = load_file_from_origin(uri)
|
|
info "Loaded source file from origin: #{uri}"
|
|
local_path.dirname.mkpath
|
|
File.write(local_path, content)
|
|
info "Wrote source file to filesystem: #{local_path}"
|
|
|
|
{content: return_content ? content : nil, source: "network"}
|
|
end
|
|
|
|
# Load the file from the given `images.neopets.com` URI, but don't return its
|
|
# content. This can be faster in cases where the file's content isn't
|
|
# relevant to us, and we just want to ensure it exists.
|
|
def self.preload_file(uri)
|
|
load_file(uri, return_content: false)
|
|
end
|
|
|
|
# Load the file from the given `images.neopets.com` URI, directly from the
|
|
# source, without checking the local filesystem.
|
|
def self.load_file_from_origin(uri)
|
|
unless Addressable::URI.parse(uri).origin == "https://images.neopets.com"
|
|
raise ArgumentError, "NeopetsMediaArchive can only load from " +
|
|
"https://images.neopets.com, but got #{uri}"
|
|
end
|
|
|
|
# By running this request in a `Sync` block, we make this method look
|
|
# synchronous to the caller—but if run in the context of an async task, it
|
|
# will pause execution and move onto other work until the request is done.
|
|
# We use this in the `swf_assets:manifests:load` task to perform many
|
|
# requests in parallel!
|
|
Sync do
|
|
DTIRequests.get(uri) do |response|
|
|
if response.status != 200
|
|
raise ResponseNotOK.new(response.status),
|
|
"expected status 200 but got #{response.status} (#{uri})"
|
|
end
|
|
response.read
|
|
end
|
|
end
|
|
end
|
|
|
|
def self.path_within_archive(uri)
|
|
uri = Addressable::URI.parse(uri)
|
|
path = uri.host + uri.path
|
|
|
|
# We include the query string as part of the file path, which is a bit odd!
|
|
# But Neopets often uses this for cache-busting, so we do need a mechanism
|
|
# for knowing whether we're holding the right version of the file. We could
|
|
# also consider storing the file by just its normal path, but with some
|
|
# metadata to track versioning information (e.g. a sqlite db, or a metadata
|
|
# file in the same directory).
|
|
path += "?" + uri.query if !uri.query.nil? && !uri.query.empty?
|
|
|
|
path
|
|
end
|
|
|
|
def self.local_file_path(uri)
|
|
ROOT_PATH + path_within_archive(uri)
|
|
end
|
|
|
|
class ResponseNotOK < StandardError
|
|
attr_reader :status
|
|
def initialize(status)
|
|
super
|
|
@status = status
|
|
end
|
|
end
|
|
|
|
private
|
|
|
|
def self.info(message)
|
|
Rails.logger.info "[NeopetsMediaArchive] #{message}"
|
|
end
|
|
|
|
def self.debug(message)
|
|
Rails.logger.debug "[NeopetsMediaArchive] #{message}"
|
|
end
|
|
end
|