8 changed files with 100 additions and 318 deletions
--- a/app/controllers/alt_styles_controller.rb
+++ b/app/controllers/alt_styles_controller.rb
@ -8,10 +8,6 @@ class AltStylesController < ApplicationController
 			@alt_styles = @alt_styles.merge(@species.alt_styles)
 		end

-		# We're going to link to the HTML5 image URL, so make sure we have all the
-		# manifests ready!
-		SwfAsset.preload_manifests @alt_styles.map(&:swf_assets).flatten
-
 		respond_to do |format|
 			format.html { render }
 			format.json {
--- a/app/models/swf_asset.rb
+++ b/app/models/swf_asset.rb
@ -1,6 +1,3 @@
-require 'async'
-require 'async/barrier'
-require 'async/semaphore'
 require 'fileutils'
 require 'uri'

@ -116,39 +113,31 @@ class SwfAsset < ApplicationRecord
    }
  end

-  def manifest
-    raise "manifest_url is blank" if manifest_url.blank?
-    NeopetsMediaArchive.load_json(manifest_url)
-  end
-
-  def preload_manifest
-    raise "manifest_url is blank" if manifest_url.blank?
-    NeopetsMediaArchive.preload_file(manifest_url)
-  end
-
-  MANIFEST_BASE_URL = Addressable::URI.parse("https://images.neopets.com")
-  def manifest_asset_urls
-    return {} if manifest_url.nil?
-
-    begin
-      # Organize the asset URLs by file extension, grab the ones we want, and
-      # convert them from paths to full URLs.
-      manifest["cpmanifest"]["assets"][0]["asset_data"].
-        to_h { |a| [a["file_ext"].to_sym, a] }.
-        slice(:png, :svg, :js)
-        .transform_values { |a| (MANIFEST_BASE_URL + a["url"]).to_s }
-    rescue StandardError => error
-      Rails.logger.error "Could not read URLs from manifest: #{error.full_message}"
-      return {}
-    end
-  end
-
+  MANIFEST_PATTERN = %r{^https://images.neopets.com/(?<prefix>.+)/(?<id>[0-9]+)(?<hash_part>_[^/]+)?/manifest\.json}
  def html5_image_url
-    manifest_asset_urls[:png]
+    return nil if manifest_url.nil?
+
+    # HACK: Just assuming all of these were well-formed by the same process,
+    # and infer the image URL from the manifest URL! But strictly speaking we
+    # should be reading the manifest to check!
+    match = manifest_url.match(MANIFEST_PATTERN)
+    return nil if match.nil?
+    
+    "https://images.neopets.com/#{match[:prefix]}/" +
+      "#{match[:id]}#{match[:hash_part]}/#{match[:id]}.png"
  end

  def html5_svg_url
-    manifest_asset_urls[:svg]
+    return nil if manifest_url.nil?
+
+    # HACK: Just assuming all of these were well-formed by the same process,
+    # and infer the image URL from the manifest URL! But strictly speaking we
+    # should be reading the manifest to check!
+    match = manifest_url.match(MANIFEST_PATTERN)
+    return nil if match.nil?
+    
+    "https://images.neopets.com/#{match[:prefix]}/" +
+      "#{match[:id]}#{match[:hash_part]}/#{match[:id]}.svg"
  end

  def known_glitches
@ -238,38 +227,6 @@ class SwfAsset < ApplicationRecord
    ))
  end

-  # Given a list of SWF assets, ensure all of their manifests are loaded, with
-  # fast concurrent execution!
-  def self.preload_manifests(swf_assets)
-    # Blocks all tasks beneath it.
-    barrier = Async::Barrier.new
-
-    Sync do
-      # Only allow 10 manifests to be loaded at a time.
-      semaphore = Async::Semaphore.new(10, parent: barrier)
-
-      # Load all the manifests in async tasks. This will load them 10 at a time
-      # rather than all at once (because of the semaphore), and the
-      # NeopetsMediaArchive will share a pool of persistent connections for
-      # them.
-      swf_assets.map do |swf_asset|
-        semaphore.async do
-          begin
-            swf_asset.preload_manifest
-          rescue StandardError => error
-            Rails.logger.error "Could not preload manifest for asset " + 
-              "#{swf_asset.id} (#{swf_asset.manifest_url}): #{error.message}"
-          end
-        end
-      end
-
-      # Wait until all tasks are done.
-      barrier.wait
-    ensure
-      barrier.stop # If something goes wrong, clean up all tasks.
-    end
-  end
-
  before_save do
    # If an asset body ID changes, that means more than one body ID has been
    # linked to it, meaning that it's probably wearable by all bodies.
--- a/app/services/neopets_media_archive.rb
+++ b/app/services/neopets_media_archive.rb
@ -1,120 +0,0 @@
-require "addressable/uri"
-require "async/http/internet/instance"
-require "json"
-
-# The Neopets Media Archive is a service that mirrors images.neopets.com files
-# locally. You can request a file from it, and we'll serve it from disk if we
-# have it, or request and save it if not.
-#
-# This is a bit different than a cache, because the intention is not just
-# optimization but that we *want* to be saving images.neopets.com as a
-# long-term archive, not dependent on their services having 100% uptime in
-# order for us to operate. We never discard old files, we just keep going!
-module NeopetsMediaArchive
-  # Share a pool of persistent connections, rather than reconnecting on
-  # each request. (This library does that automatically!)
-  INTERNET = Async::HTTP::Internet.instance
-
-  ROOT_PATH = Pathname.new(Rails.configuration.neopets_media_archive_root)
-
-  # Load the file from the given `images.neopets.com` URI, as JSON.
-  def self.load_json(uri)
-    JSON.parse(load_file(uri))
-  end
-
-  # Load the file from the given `images.neopets.com` URI.
-  def self.load_file(uri, return_content: true)
-    local_path = local_file_path(uri)
-
-    # Read the file locally if we have it.
-    if return_content
-      begin
-        content = File.read(local_path)
-        debug "Loaded source file from filesystem: #{local_path}"
-        return content
-      rescue Errno::ENOENT
-        # If it doesn't exist, that's fine: just move on and download it.
-      end
-    else
-      # When we don't need the content, "loading" the file is just ensuring
-      # it exists. If it doesn't, we'll move on and load it from source.
-      # (We use this when preloading files, to save the cost of reading files
-      # we're not ready to use yet.)
-      if File.exist?(local_path)
-        debug "Source file is already loaded, skipping: #{local_path}"
-        return
-      end
-    end
-
-    # Download the file from the origin, then save a copy for next time.
-    content = load_file_from_origin(uri)
-    info "Loaded source file from origin: #{uri}"
-    local_path.dirname.mkpath
-    File.write(local_path, content)
-    info "Wrote source file to filesystem: #{local_path}"
-    
-    return_content ? content : nil
-  end
-
-  # Load the file from the given `images.neopets.com` URI, but don't return its
-  # content. This can be faster in cases where the file's content isn't
-  # relevant to us, and we just want to ensure it exists.
-  def self.preload_file(uri)
-    load_file(uri, return_content: false)
-  end
-
-  # Load the file from the given `images.neopets.com` URI, directly from the
-  # source, without checking the local filesystem.
-  def self.load_file_from_origin(uri)
-    unless Addressable::URI.parse(uri).origin == "https://images.neopets.com"
-      raise ArgumentError, "NeopetsMediaArchive can only load from " +
-        "https://images.neopets.com, but got #{uri}"
-    end
-
-    # By running this request in a `Sync` block, we make this method look
-    # synchronous to the caller—but if run in the context of an async task, it
-    # will pause execution and move onto other work until the request is done.
-    # We use this in the `swf_assets:manifests:load` task to perform many
-    # requests in parallel!
-    Sync do
-      response = INTERNET.get(uri)
-      if response.status == 404
-        raise NotFound, "origin server returned 404: #{uri}"
-      elsif response.status != 200
-        raise "expected status 200 but got #{response.status} (#{uri})"
-      end
-      response.body.read
-    end
-  end
-
-  def self.path_within_archive(uri)
-    uri = Addressable::URI.parse(uri)
-    path = uri.host + uri.path
-
-    # We include the query string as part of the file path, which is a bit odd!
-    # But Neopets often uses this for cache-busting, so we do need a mechanism
-    # for knowing whether we're holding the right version of the file. We could
-    # also consider storing the file by just its normal path, but with some
-    # metadata to track versioning information (e.g. a sqlite db, or a metadata
-    # file in the same directory).
-    path += "?" + uri.query if !uri.query.nil? && !uri.query.empty?
-
-    path
-  end
-
-  def self.local_file_path(uri)
-    ROOT_PATH + path_within_archive(uri)
-  end
-
-  class NotFound < StandardError; end
-
-  private
-
-  def self.info(message)
-    Rails.logger.info "[NeopetsMediaArchive] #{message}"
-  end
-
-  def self.debug(message)
-    Rails.logger.debug "[NeopetsMediaArchive] #{message}"
-  end
-end
--- a/config/environments/development.rb
+++ b/config/environments/development.rb
@ -107,9 +107,4 @@ Rails.application.configure do
  # override this with the IMPRESS_2020_ORIGIN environment variable!)
  config.impress_2020_origin = ENV.fetch("IMPRESS_2020_ORIGIN",
    "http://localhost:4000")
-
-  # Save the Neopets Media Archive in the local `tmp` folder. (In production,
-  # we keep this in a long-term location instead!)
-  config.neopets_media_archive_root = Rails.root / "tmp" /
-    "neopets_media_archive" / "development"
 end
--- a/config/environments/production.rb
+++ b/config/environments/production.rb
@ -126,8 +126,4 @@ Rails.application.configure do
  # IMPRESS_2020_ORIGIN environment variable!)
  config.impress_2020_origin = ENV.fetch("IMPRESS_2020_ORIGIN",
    "https://impress-2020.openneo.net")
-
-  # Save the Neopets Media Archive in `/var/lib/neopets-media-archive`, a
-  # long-term storage location.
-  config.neopets_media_archive_root = "/var/lib/neopets-media-archive"
 end
--- a/config/environments/test.rb
+++ b/config/environments/test.rb
@ -66,9 +66,4 @@ Rails.application.configure do
  # override this with the IMPRESS_2020_ORIGIN environment variable!)
  config.impress_2020_origin = ENV.fetch("IMPRESS_2020_ORIGIN",
    "http://localhost:4000")
-
-  # Save the Neopets Media Archive in the local `tmp` folder. (In production,
-  # we keep this in a long-term location instead!)
-  config.neopets_media_archive_root = Rails.root / "tmp" /
-    "neopets_media_archive" / "test"
 end
--- a/deploy/setup.yml
+++ b/deploy/setup.yml
@ -402,14 +402,6 @@
        password: "{{ mysql_user_password_2020 }}"
        priv: "openneo_impress.*:ALL,openneo_id.*:ALL"

-    - name: Create the Neopets Media Archive data directory
-      file:
-        path: /var/lib/neopets-media-archive
-        owner: impress
-        group: impress
-        mode: "755"
-        state: directory
-
  handlers:
    - name: Reload nginx
      systemd:
--- a/lib/tasks/swf_assets.rake
+++ b/lib/tasks/swf_assets.rake
@ -35,43 +35,15 @@ namespace :swf_assets do
 		end
 	end

-	namespace :manifests do
-		desc "Save all known manifests to the Neopets Media Archive"
-		task load: [:environment] do
-			# Log errors to STDOUT, but we don't need the info messages about
-			# successful saves.
-			Rails.logger = Logger.new(STDOUT, level: :error)
-
-			# Find all the manifests with known URLs. (We don't have a database
-			# filter for "do we already have the manifest downloaded", but that's
-			# okay, the preload method will quickly check that for us!)
-			swf_assets = SwfAsset.where.not(manifest_url: nil)
-			total_count = swf_assets.count
-			puts "Found #{total_count} assets with manifests"
-
-			# For each batch of 1000 assets, load their manifests concurrently.
-			# Wrap everything in a top-level sync, so keyboard interrupts will
-			# propagate all the way up to here, instead of just cancelling the
-			# current batch.
-			Sync do
-				saved_count = 0
-				swf_assets.find_in_batches(batch_size: 1000) do |swf_assets|
-					SwfAsset.preload_manifests(swf_assets)
-					saved_count += swf_assets.size
-					puts "Loaded #{saved_count} of #{total_count} manifests"
-				end
-			end
-		end
-
 	desc "Backfill manifest_url for SwfAsset models"
-		task urls: [:environment] do
+	task manifests: [:environment] do
 		timeout = ENV.fetch("TIMEOUT", "5").to_i

 		assets = SwfAsset.where(manifest_url: nil)
 		count = assets.count
 		puts "Found #{count} assets without manifests"

-			Sync do
+		Async do
 			# Share a pool of persistent connections, rather than reconnecting on
 			# each request. (This library does that automatically!)
 			internet = Async::HTTP::Internet.instance
@ -136,7 +108,6 @@ namespace :swf_assets do
 			end
 		end
 	end
-	end
 end

 SWF_URL_PATTERN = %r{^(?:https?:)?//images\.neopets\.com/cp/(bio|items)/swf/(.+?)_([a-z0-9]+)\.swf$}