2024-02-24 16:12:02 -08:00
|
|
|
require 'addressable/template'
|
2024-02-23 13:45:12 -08:00
|
|
|
require 'async'
|
|
|
|
require 'async/barrier'
|
|
|
|
require 'async/semaphore'
|
2011-05-20 16:19:14 -07:00
|
|
|
require 'fileutils'
|
|
|
|
require 'uri'
|
|
|
|
|
2023-08-02 16:05:02 -07:00
|
|
|
class SwfAsset < ApplicationRecord
|
2023-07-22 12:23:14 -07:00
|
|
|
# We use the `type` column to mean something other than what Rails means!
|
|
|
|
self.inheritance_column = nil
|
2011-05-13 05:00:34 -07:00
|
|
|
|
2024-02-24 15:22:14 -08:00
|
|
|
# Used in `item_is_body_specific?`. (TODO: Could we refactor this out?)
|
|
|
|
attr_accessor :item
|
2013-01-21 17:34:39 -08:00
|
|
|
|
|
|
|
belongs_to :zone
|
2023-07-26 13:18:19 -07:00
|
|
|
has_many :parent_swf_asset_relationships
|
2024-02-24 15:22:14 -08:00
|
|
|
has_one :contribution, :as => :contributed, :inverse_of => :contributed
|
|
|
|
has_many :parent_swf_asset_relationships
|
2011-05-13 05:00:34 -07:00
|
|
|
|
2023-11-10 16:16:32 -08:00
|
|
|
before_validation :normalize_manifest_url, if: :manifest_url?
|
Add manifest_url to swf_assets table
Ok so, impress-2020 guesses the manifest URL every time based on common
URL patterns. But the right way to do this is to read it from the
modeling data! But also, we don't have a great way to get the modeling
data directly. (Though as I write this, I guess we do have that
auto-modeling trick we use in the DTI 2020 codebase, I wonder if that
could work for this too?)
So anyway, in this change, we update the modeling code to save the
manifest URL, and also the migration includes a big block that attempts
to run impress-2020's manifest-guessing logic for every asset and save
the result!
It's uhh. Not fast. It runs at about 1 asset per second (a lot of these
aren't cache hits), and sometimes stalls out. And we have >600k assets,
so the estimated wall time is uhh. Seven days?
I think there's something we could do here around like, concurrent
execution? Though tbqh with the nature of the slowness being seemingly
about hitting the slow underlying images.neopets.com server, I don't
actually have a lot of faith that concurrency would actually be faster?
I also think it could be sensible to like… extract this from the
migration, and run it as a script to infer missing manifest URLs. That
would be easier to run in chunks and resume if something goes wrong.
Cuz like, I think my reasoning here was that backfilling this data was
part of the migration process… but the thing is, this migration can't
reliably get a manifest for everything (both cuz it depends on an
external service and cuz not everything has one), so it's a perfectly
valid migration to just leave the column as null for all the rows to
start, and fill this in later. I wish I'd written it like that!
But anyway, I'm just running this for now, and taking a break for the
night. Maybe later I'll come around and extract this into a separate
task to just try this on all assets missing manifests instead!
2023-11-09 21:42:51 -08:00
|
|
|
|
2024-02-24 15:22:14 -08:00
|
|
|
delegate :depth, :to => :zone
|
|
|
|
|
|
|
|
scope :biology_assets, -> { where(:type => PetState::SwfAssetType) }
|
|
|
|
scope :object_assets, -> { where(:type => Item::SwfAssetType) }
|
|
|
|
|
2024-02-24 16:12:02 -08:00
|
|
|
CANVAS_MOVIE_IMAGE_URL_TEMPLATE = Addressable::Template.new(
|
|
|
|
Rails.configuration.impress_2020_origin +
|
|
|
|
"/api/assetImage{?libraryUrl,size}"
|
|
|
|
)
|
|
|
|
LEGACY_IMAGE_URL_TEMPLATE = Addressable::Template.new(
|
|
|
|
"https://aws.impress-asset-images.openneo.net/{type}" +
|
|
|
|
"/{id1}/{id2}/{id3}/{id}/{size}x{size}.png?v2-{time}"
|
|
|
|
)
|
2011-05-22 13:30:02 -07:00
|
|
|
|
2010-05-20 16:56:08 -07:00
|
|
|
def as_json(options={})
|
2023-11-11 07:14:48 -08:00
|
|
|
super({
|
2023-11-11 07:21:13 -08:00
|
|
|
only: [:id, :known_glitches],
|
2023-11-11 07:14:48 -08:00
|
|
|
methods: [:zone, :restricted_zones, :urls]
|
|
|
|
}.merge(options))
|
|
|
|
end
|
|
|
|
|
|
|
|
def urls
|
|
|
|
{
|
|
|
|
swf: url,
|
|
|
|
png: image_url,
|
2024-02-24 16:12:02 -08:00
|
|
|
svg: manifest_asset_urls[:svg],
|
2024-02-24 16:25:55 -08:00
|
|
|
canvas_library: manifest_asset_urls[:js],
|
2023-11-11 07:14:48 -08:00
|
|
|
manifest: manifest_url,
|
2010-05-20 16:04:56 -07:00
|
|
|
}
|
2023-11-11 07:14:48 -08:00
|
|
|
end
|
|
|
|
|
Create NeopetsMediaArchive, read the actual manifests for Alt Styles
The Neopets Media Archive is a service that mirrors `images.neopets.com`
over time! Right now we're starting by just loading manifests, and
using them to replace the hacks we used for determining the Alt Style
PNG and SVG URLs; but with time, I want to load *all* customization
media files, to have our own secondary file source that isn't dependent
on Neopets to always be up.
Impress 2020 already caches manifest files, but this strategy is
different in two ways:
1. We're using the filesystem rather than a database column. (That is,
manifest data is kinda duplicated in the system right now!) This is
because I intend to go in a more file-y way long-term anyway, to
load more than just the manifests.
2. Impress 2020 guesses at the manifest URLs by pattern, and reloads
them on a regular basis. Instead, we use the modeling system: when
TNT changes the URL of a manifest by appending a new `?v=` query
string to it, this system will consider it a new URL, and will load
the new copy accordingly.
Fun fact, I actually have been prototyping some of this stuff in a side
project I'd named `impress-media-server`! It's a little Sinatra app
that indeed *does* save all the files needed for customization, and can
generate lightweight lil preview iframes and images pretty easily. I
had initially been planning this as a separate service, but after
thinking over the arch a bit, I think it'll go smoother to just give
the main app all the same access and awareness—and I wrote it all in
Ruby and plain HTML/JS/CSS, so it should be pretty easy to port over
bit-by-bit!
Anyway, only Alt Styles use this for now, but my motivation is to be
able to use more-correct asset URL logic to be able to finally swap
over wardrobe-2020's item search to impress.openneo.net's item search
API endpoint—which will get "Items You Own" searches working again, and
whittle down one of the last big things Impress 2020 can do that the
main app can't. Let's see how it goes!
2024-02-23 12:02:39 -08:00
|
|
|
def manifest
|
2024-02-23 14:06:49 -08:00
|
|
|
raise "manifest_url is blank" if manifest_url.blank?
|
2024-02-24 16:12:02 -08:00
|
|
|
@manifest ||= NeopetsMediaArchive.load_json(manifest_url)
|
Create NeopetsMediaArchive, read the actual manifests for Alt Styles
The Neopets Media Archive is a service that mirrors `images.neopets.com`
over time! Right now we're starting by just loading manifests, and
using them to replace the hacks we used for determining the Alt Style
PNG and SVG URLs; but with time, I want to load *all* customization
media files, to have our own secondary file source that isn't dependent
on Neopets to always be up.
Impress 2020 already caches manifest files, but this strategy is
different in two ways:
1. We're using the filesystem rather than a database column. (That is,
manifest data is kinda duplicated in the system right now!) This is
because I intend to go in a more file-y way long-term anyway, to
load more than just the manifests.
2. Impress 2020 guesses at the manifest URLs by pattern, and reloads
them on a regular basis. Instead, we use the modeling system: when
TNT changes the URL of a manifest by appending a new `?v=` query
string to it, this system will consider it a new URL, and will load
the new copy accordingly.
Fun fact, I actually have been prototyping some of this stuff in a side
project I'd named `impress-media-server`! It's a little Sinatra app
that indeed *does* save all the files needed for customization, and can
generate lightweight lil preview iframes and images pretty easily. I
had initially been planning this as a separate service, but after
thinking over the arch a bit, I think it'll go smoother to just give
the main app all the same access and awareness—and I wrote it all in
Ruby and plain HTML/JS/CSS, so it should be pretty easy to port over
bit-by-bit!
Anyway, only Alt Styles use this for now, but my motivation is to be
able to use more-correct asset URL logic to be able to finally swap
over wardrobe-2020's item search to impress.openneo.net's item search
API endpoint—which will get "Items You Own" searches working again, and
whittle down one of the last big things Impress 2020 can do that the
main app can't. Let's see how it goes!
2024-02-23 12:02:39 -08:00
|
|
|
end
|
|
|
|
|
2024-02-23 13:45:12 -08:00
|
|
|
def preload_manifest
|
2024-02-23 14:06:49 -08:00
|
|
|
raise "manifest_url is blank" if manifest_url.blank?
|
2024-02-23 13:45:12 -08:00
|
|
|
NeopetsMediaArchive.preload_file(manifest_url)
|
|
|
|
end
|
|
|
|
|
Create NeopetsMediaArchive, read the actual manifests for Alt Styles
The Neopets Media Archive is a service that mirrors `images.neopets.com`
over time! Right now we're starting by just loading manifests, and
using them to replace the hacks we used for determining the Alt Style
PNG and SVG URLs; but with time, I want to load *all* customization
media files, to have our own secondary file source that isn't dependent
on Neopets to always be up.
Impress 2020 already caches manifest files, but this strategy is
different in two ways:
1. We're using the filesystem rather than a database column. (That is,
manifest data is kinda duplicated in the system right now!) This is
because I intend to go in a more file-y way long-term anyway, to
load more than just the manifests.
2. Impress 2020 guesses at the manifest URLs by pattern, and reloads
them on a regular basis. Instead, we use the modeling system: when
TNT changes the URL of a manifest by appending a new `?v=` query
string to it, this system will consider it a new URL, and will load
the new copy accordingly.
Fun fact, I actually have been prototyping some of this stuff in a side
project I'd named `impress-media-server`! It's a little Sinatra app
that indeed *does* save all the files needed for customization, and can
generate lightweight lil preview iframes and images pretty easily. I
had initially been planning this as a separate service, but after
thinking over the arch a bit, I think it'll go smoother to just give
the main app all the same access and awareness—and I wrote it all in
Ruby and plain HTML/JS/CSS, so it should be pretty easy to port over
bit-by-bit!
Anyway, only Alt Styles use this for now, but my motivation is to be
able to use more-correct asset URL logic to be able to finally swap
over wardrobe-2020's item search to impress.openneo.net's item search
API endpoint—which will get "Items You Own" searches working again, and
whittle down one of the last big things Impress 2020 can do that the
main app can't. Let's see how it goes!
2024-02-23 12:02:39 -08:00
|
|
|
MANIFEST_BASE_URL = Addressable::URI.parse("https://images.neopets.com")
|
|
|
|
def manifest_asset_urls
|
|
|
|
return {} if manifest_url.nil?
|
|
|
|
|
|
|
|
begin
|
2024-02-24 16:12:02 -08:00
|
|
|
# Organize the asset URLs by file extension, convert them from paths to
|
|
|
|
# full URLs, and grab the ones we want.
|
|
|
|
assets_by_ext = manifest["cpmanifest"]["assets"][0]["asset_data"].
|
|
|
|
group_by { |a| a["file_ext"].to_sym }.
|
|
|
|
transform_values do |assets|
|
|
|
|
assets.map { |a| (MANIFEST_BASE_URL + a["url"]).to_s }
|
|
|
|
end
|
|
|
|
|
|
|
|
if assets_by_ext[:js].present?
|
|
|
|
# If a JS asset is present, assume any other assets are supporting
|
|
|
|
# assets, and skip them. (e.g. if there's a PNG, it's likely to be an
|
|
|
|
# "atlas" file used in the animation, rather than a thumbnail.)
|
|
|
|
#
|
|
|
|
# NOTE: We take the last one, because sometimes there are multiple JS
|
|
|
|
# assets in the same manifest, and earlier ones are broken and later
|
|
|
|
# ones are fixed. I don't know the logic exactly, but that's what we've
|
|
|
|
# seen!
|
|
|
|
{ js: assets_by_ext[:js].last }
|
|
|
|
else
|
|
|
|
# Otherwise, return the last PNG and the last SVG, arbitrarily.
|
|
|
|
# (There's probably only one of each! I'm just going by the same logic
|
|
|
|
# we've seen in the JS library case, that later entries are more likely
|
|
|
|
# to be correct.)
|
|
|
|
{ png: assets_by_ext[:png].last, svg: assets_by_ext[:svg].last }
|
|
|
|
end
|
Create NeopetsMediaArchive, read the actual manifests for Alt Styles
The Neopets Media Archive is a service that mirrors `images.neopets.com`
over time! Right now we're starting by just loading manifests, and
using them to replace the hacks we used for determining the Alt Style
PNG and SVG URLs; but with time, I want to load *all* customization
media files, to have our own secondary file source that isn't dependent
on Neopets to always be up.
Impress 2020 already caches manifest files, but this strategy is
different in two ways:
1. We're using the filesystem rather than a database column. (That is,
manifest data is kinda duplicated in the system right now!) This is
because I intend to go in a more file-y way long-term anyway, to
load more than just the manifests.
2. Impress 2020 guesses at the manifest URLs by pattern, and reloads
them on a regular basis. Instead, we use the modeling system: when
TNT changes the URL of a manifest by appending a new `?v=` query
string to it, this system will consider it a new URL, and will load
the new copy accordingly.
Fun fact, I actually have been prototyping some of this stuff in a side
project I'd named `impress-media-server`! It's a little Sinatra app
that indeed *does* save all the files needed for customization, and can
generate lightweight lil preview iframes and images pretty easily. I
had initially been planning this as a separate service, but after
thinking over the arch a bit, I think it'll go smoother to just give
the main app all the same access and awareness—and I wrote it all in
Ruby and plain HTML/JS/CSS, so it should be pretty easy to port over
bit-by-bit!
Anyway, only Alt Styles use this for now, but my motivation is to be
able to use more-correct asset URL logic to be able to finally swap
over wardrobe-2020's item search to impress.openneo.net's item search
API endpoint—which will get "Items You Own" searches working again, and
whittle down one of the last big things Impress 2020 can do that the
main app can't. Let's see how it goes!
2024-02-23 12:02:39 -08:00
|
|
|
rescue StandardError => error
|
|
|
|
Rails.logger.error "Could not read URLs from manifest: #{error.full_message}"
|
|
|
|
return {}
|
|
|
|
end
|
|
|
|
end
|
2024-01-31 03:02:19 -08:00
|
|
|
|
2024-02-24 16:12:02 -08:00
|
|
|
def image_url
|
|
|
|
# Use the PNG image from the manifest, if one exists.
|
|
|
|
return manifest_asset_urls[:png] if manifest_asset_urls[:png].present?
|
|
|
|
|
|
|
|
# Or, if this is a canvas movie, let Impress 2020 generate a PNG for us.
|
|
|
|
return canvas_movie_image_url if manifest_asset_urls[:js].present?
|
|
|
|
|
|
|
|
# Otherwise, if we don't have the manifest or it doesn't have the files we
|
|
|
|
# need, fall back to the Classic DTI image storage, which was generated
|
|
|
|
# from the SWFs via an old version of gnash (or sometimes manually
|
|
|
|
# overridden). It's less accurate, but well-tested to generally work okay,
|
|
|
|
# and it's the only image we have for assets not yet converted to HTML5.
|
|
|
|
#
|
|
|
|
# NOTE: We've stopped generating these images for new assets! This is
|
|
|
|
# mainly for old assets not yet converted to HTML5.
|
|
|
|
#
|
|
|
|
# NOTE: If you're modeling from a fresh development database, `has_image?`
|
|
|
|
# might be false even though we *do* have a saved copy of the image
|
|
|
|
# available in production. But if you're using the public modeling
|
|
|
|
# data exported from production, then this check should be fine!
|
|
|
|
#
|
|
|
|
# TODO: Rename `has_image?` to `has_legacy_image?`.
|
|
|
|
return legacy_image_url if has_image?
|
|
|
|
|
|
|
|
# Otherwise, there's no image URL.
|
|
|
|
nil
|
|
|
|
end
|
|
|
|
|
|
|
|
def canvas_movie_image_url
|
|
|
|
return nil unless manifest_asset_urls[:js]
|
|
|
|
|
|
|
|
CANVAS_MOVIE_IMAGE_URL_TEMPLATE.expand(
|
|
|
|
libraryUrl: manifest_asset_urls[:js],
|
|
|
|
size: 600,
|
|
|
|
).to_s
|
|
|
|
end
|
|
|
|
|
|
|
|
def legacy_image_url
|
|
|
|
return nil unless has_image?
|
|
|
|
|
|
|
|
padded_id = remote_id.to_s.rjust(12, "0")
|
|
|
|
LEGACY_IMAGE_URL_TEMPLATE.expand(
|
|
|
|
type: type,
|
|
|
|
id1: padded_id[0...3],
|
|
|
|
id2: padded_id[3...6],
|
|
|
|
id3: padded_id[6...9],
|
|
|
|
id: remote_id,
|
|
|
|
size: "600",
|
|
|
|
time: converted_at.to_i,
|
|
|
|
).to_s
|
|
|
|
end
|
|
|
|
|
2023-11-11 07:21:13 -08:00
|
|
|
def known_glitches
|
|
|
|
self[:known_glitches].split(',')
|
|
|
|
end
|
|
|
|
|
|
|
|
def known_glitches=(new_known_glitches)
|
|
|
|
if new_known_glitches.is_a? Array
|
|
|
|
new_known_glitches = new_known_glitches.join(',')
|
|
|
|
end
|
|
|
|
self[:known_glitches] = new_known_glitches
|
|
|
|
end
|
|
|
|
|
2023-11-11 07:14:48 -08:00
|
|
|
def restricted_zone_ids
|
|
|
|
[].tap do |ids|
|
|
|
|
zones_restrict.chars.each_with_index do |bit, index|
|
|
|
|
ids << index + 1 if bit == "1"
|
|
|
|
end
|
|
|
|
end
|
|
|
|
end
|
|
|
|
|
|
|
|
def restricted_zones
|
|
|
|
Zone.where(id: restricted_zone_ids)
|
2010-05-20 16:04:56 -07:00
|
|
|
end
|
2011-05-02 15:07:56 -07:00
|
|
|
|
2010-10-09 08:23:59 -07:00
|
|
|
def body_specific?
|
2021-03-16 10:40:01 -07:00
|
|
|
self.zone.type_id < 3 || item_is_body_specific?
|
|
|
|
end
|
|
|
|
|
|
|
|
def item_is_body_specific?
|
|
|
|
# Get items that we're already bound to in the database, and
|
|
|
|
# also the one passed to us from the current modeling operation,
|
|
|
|
# if any.
|
|
|
|
#
|
|
|
|
# NOTE: I know this has perf impact... it would be better for
|
|
|
|
# modeling to preload this probably? But oh well!
|
|
|
|
items = parent_swf_asset_relationships.includes(:parent).where(parent_type: "Item").map { |r| r.parent }
|
|
|
|
items << item if item
|
|
|
|
|
|
|
|
# Return whether any of them is known to be body-specific.
|
|
|
|
# This ensures that we always respect the explicitly_body_specific flag!
|
|
|
|
return items.any? { |i| i.body_specific? }
|
2010-10-09 08:23:59 -07:00
|
|
|
end
|
2011-05-02 15:07:56 -07:00
|
|
|
|
2010-10-07 07:46:23 -07:00
|
|
|
def origin_pet_type=(pet_type)
|
|
|
|
self.body_id = pet_type.body_id
|
|
|
|
end
|
2011-05-02 15:07:56 -07:00
|
|
|
|
2010-10-07 07:46:23 -07:00
|
|
|
def origin_biology_data=(data)
|
2013-03-05 13:10:25 -08:00
|
|
|
Rails.logger.debug("my biology data is: #{data.inspect}")
|
2010-10-07 07:46:23 -07:00
|
|
|
self.type = 'biology'
|
|
|
|
self.zone_id = data[:zone_id].to_i
|
|
|
|
self.url = data[:asset_url]
|
|
|
|
self.zones_restrict = data[:zones_restrict]
|
Add manifest_url to swf_assets table
Ok so, impress-2020 guesses the manifest URL every time based on common
URL patterns. But the right way to do this is to read it from the
modeling data! But also, we don't have a great way to get the modeling
data directly. (Though as I write this, I guess we do have that
auto-modeling trick we use in the DTI 2020 codebase, I wonder if that
could work for this too?)
So anyway, in this change, we update the modeling code to save the
manifest URL, and also the migration includes a big block that attempts
to run impress-2020's manifest-guessing logic for every asset and save
the result!
It's uhh. Not fast. It runs at about 1 asset per second (a lot of these
aren't cache hits), and sometimes stalls out. And we have >600k assets,
so the estimated wall time is uhh. Seven days?
I think there's something we could do here around like, concurrent
execution? Though tbqh with the nature of the slowness being seemingly
about hitting the slow underlying images.neopets.com server, I don't
actually have a lot of faith that concurrency would actually be faster?
I also think it could be sensible to like… extract this from the
migration, and run it as a script to infer missing manifest URLs. That
would be easier to run in chunks and resume if something goes wrong.
Cuz like, I think my reasoning here was that backfilling this data was
part of the migration process… but the thing is, this migration can't
reliably get a manifest for everything (both cuz it depends on an
external service and cuz not everything has one), so it's a perfectly
valid migration to just leave the column as null for all the rows to
start, and fill this in later. I wish I'd written it like that!
But anyway, I'm just running this for now, and taking a break for the
night. Maybe later I'll come around and extract this into a separate
task to just try this on all assets missing manifests instead!
2023-11-09 21:42:51 -08:00
|
|
|
self.manifest_url = data[:manifest]
|
2010-10-07 07:46:23 -07:00
|
|
|
end
|
2011-05-02 15:07:56 -07:00
|
|
|
|
2010-10-07 07:46:23 -07:00
|
|
|
def origin_object_data=(data)
|
2013-03-05 13:10:25 -08:00
|
|
|
Rails.logger.debug("my object data is: #{data.inspect}")
|
2010-10-07 07:46:23 -07:00
|
|
|
self.type = 'object'
|
|
|
|
self.zone_id = data[:zone_id].to_i
|
|
|
|
self.url = data[:asset_url]
|
2023-10-12 23:09:06 -07:00
|
|
|
self.zones_restrict = ""
|
Add manifest_url to swf_assets table
Ok so, impress-2020 guesses the manifest URL every time based on common
URL patterns. But the right way to do this is to read it from the
modeling data! But also, we don't have a great way to get the modeling
data directly. (Though as I write this, I guess we do have that
auto-modeling trick we use in the DTI 2020 codebase, I wonder if that
could work for this too?)
So anyway, in this change, we update the modeling code to save the
manifest URL, and also the migration includes a big block that attempts
to run impress-2020's manifest-guessing logic for every asset and save
the result!
It's uhh. Not fast. It runs at about 1 asset per second (a lot of these
aren't cache hits), and sometimes stalls out. And we have >600k assets,
so the estimated wall time is uhh. Seven days?
I think there's something we could do here around like, concurrent
execution? Though tbqh with the nature of the slowness being seemingly
about hitting the slow underlying images.neopets.com server, I don't
actually have a lot of faith that concurrency would actually be faster?
I also think it could be sensible to like… extract this from the
migration, and run it as a script to infer missing manifest URLs. That
would be easier to run in chunks and resume if something goes wrong.
Cuz like, I think my reasoning here was that backfilling this data was
part of the migration process… but the thing is, this migration can't
reliably get a manifest for everything (both cuz it depends on an
external service and cuz not everything has one), so it's a perfectly
valid migration to just leave the column as null for all the rows to
start, and fill this in later. I wish I'd written it like that!
But anyway, I'm just running this for now, and taking a break for the
night. Maybe later I'll come around and extract this into a separate
task to just try this on all assets missing manifests instead!
2023-11-09 21:42:51 -08:00
|
|
|
self.manifest_url = data[:manifest]
|
2010-10-07 07:46:23 -07:00
|
|
|
end
|
2011-05-02 15:07:56 -07:00
|
|
|
|
Add manifest_url to swf_assets table
Ok so, impress-2020 guesses the manifest URL every time based on common
URL patterns. But the right way to do this is to read it from the
modeling data! But also, we don't have a great way to get the modeling
data directly. (Though as I write this, I guess we do have that
auto-modeling trick we use in the DTI 2020 codebase, I wonder if that
could work for this too?)
So anyway, in this change, we update the modeling code to save the
manifest URL, and also the migration includes a big block that attempts
to run impress-2020's manifest-guessing logic for every asset and save
the result!
It's uhh. Not fast. It runs at about 1 asset per second (a lot of these
aren't cache hits), and sometimes stalls out. And we have >600k assets,
so the estimated wall time is uhh. Seven days?
I think there's something we could do here around like, concurrent
execution? Though tbqh with the nature of the slowness being seemingly
about hitting the slow underlying images.neopets.com server, I don't
actually have a lot of faith that concurrency would actually be faster?
I also think it could be sensible to like… extract this from the
migration, and run it as a script to infer missing manifest URLs. That
would be easier to run in chunks and resume if something goes wrong.
Cuz like, I think my reasoning here was that backfilling this data was
part of the migration process… but the thing is, this migration can't
reliably get a manifest for everything (both cuz it depends on an
external service and cuz not everything has one), so it's a perfectly
valid migration to just leave the column as null for all the rows to
start, and fill this in later. I wish I'd written it like that!
But anyway, I'm just running this for now, and taking a break for the
night. Maybe later I'll come around and extract this into a separate
task to just try this on all assets missing manifests instead!
2023-11-09 21:42:51 -08:00
|
|
|
def normalize_manifest_url
|
|
|
|
parsed_manifest_url = Addressable::URI.parse(manifest_url)
|
|
|
|
parsed_manifest_url.scheme = "https"
|
|
|
|
self.manifest_url = parsed_manifest_url.to_s
|
|
|
|
end
|
|
|
|
|
2024-02-24 15:24:24 -08:00
|
|
|
# To manually change the body ID without triggering the usual change to 0,
|
|
|
|
# use this override method. (This is intended for use from the console.)
|
|
|
|
def override_body_id(new_body_id)
|
|
|
|
@body_id_overridden = true
|
|
|
|
self.body_id = new_body_id
|
|
|
|
end
|
|
|
|
|
2024-01-24 03:25:23 -08:00
|
|
|
def self.from_biology_data(body_id, data)
|
|
|
|
remote_id = data[:part_id].to_i
|
|
|
|
swf_asset = SwfAsset.find_or_initialize_by type: 'biology',
|
|
|
|
remote_id: remote_id
|
|
|
|
swf_asset.body_id = body_id
|
|
|
|
swf_asset.origin_biology_data = data
|
|
|
|
swf_asset
|
|
|
|
end
|
|
|
|
|
2015-05-03 14:57:42 -07:00
|
|
|
def self.from_wardrobe_link_params(ids)
|
|
|
|
where((
|
|
|
|
arel_table[:remote_id].in(ids[:biology]).and(arel_table[:type].eq('biology'))
|
|
|
|
).or(
|
|
|
|
arel_table[:remote_id].in(ids[:object]).and(arel_table[:type].eq('object'))
|
|
|
|
))
|
|
|
|
end
|
|
|
|
|
2024-02-23 13:45:12 -08:00
|
|
|
# Given a list of SWF assets, ensure all of their manifests are loaded, with
|
|
|
|
# fast concurrent execution!
|
|
|
|
def self.preload_manifests(swf_assets)
|
|
|
|
# Blocks all tasks beneath it.
|
|
|
|
barrier = Async::Barrier.new
|
|
|
|
|
|
|
|
Sync do
|
|
|
|
# Only allow 10 manifests to be loaded at a time.
|
|
|
|
semaphore = Async::Semaphore.new(10, parent: barrier)
|
|
|
|
|
|
|
|
# Load all the manifests in async tasks. This will load them 10 at a time
|
|
|
|
# rather than all at once (because of the semaphore), and the
|
|
|
|
# NeopetsMediaArchive will share a pool of persistent connections for
|
|
|
|
# them.
|
|
|
|
swf_assets.map do |swf_asset|
|
2024-02-23 14:06:49 -08:00
|
|
|
semaphore.async do
|
|
|
|
begin
|
|
|
|
swf_asset.preload_manifest
|
|
|
|
rescue StandardError => error
|
|
|
|
Rails.logger.error "Could not preload manifest for asset " +
|
|
|
|
"#{swf_asset.id} (#{swf_asset.manifest_url}): #{error.message}"
|
|
|
|
end
|
|
|
|
end
|
2024-02-23 13:45:12 -08:00
|
|
|
end
|
|
|
|
|
|
|
|
# Wait until all tasks are done.
|
|
|
|
barrier.wait
|
|
|
|
ensure
|
|
|
|
barrier.stop # If something goes wrong, clean up all tasks.
|
|
|
|
end
|
|
|
|
end
|
|
|
|
|
2010-10-10 11:43:01 -07:00
|
|
|
before_save do
|
2010-10-09 08:23:59 -07:00
|
|
|
# If an asset body ID changes, that means more than one body ID has been
|
|
|
|
# linked to it, meaning that it's probably wearable by all bodies.
|
2013-06-26 20:08:19 -07:00
|
|
|
self.body_id = 0 if !@body_id_overridden && (!self.body_specific? || (!self.new_record? && self.body_id_changed?))
|
2010-10-09 08:23:59 -07:00
|
|
|
end
|
2011-05-02 15:07:56 -07:00
|
|
|
|
2011-02-19 19:09:12 -08:00
|
|
|
class DownloadError < Exception;end
|
2010-05-16 12:01:38 -07:00
|
|
|
end
|