2011-05-20 16:19:14 -07:00
|
|
|
require 'fileutils'
|
|
|
|
require 'uri'
|
|
|
|
|
2023-08-02 16:05:02 -07:00
|
|
|
class SwfAsset < ApplicationRecord
|
2023-07-22 12:23:14 -07:00
|
|
|
# We use the `type` column to mean something other than what Rails means!
|
|
|
|
self.inheritance_column = nil
|
2011-05-13 05:00:34 -07:00
|
|
|
|
2012-07-16 13:34:44 -07:00
|
|
|
IMAGE_SIZES = {
|
|
|
|
:small => [150, 150],
|
|
|
|
:medium => [300, 300],
|
|
|
|
:large => [600, 600]
|
|
|
|
}
|
2013-01-21 17:34:39 -08:00
|
|
|
|
|
|
|
belongs_to :zone
|
2023-07-26 13:18:19 -07:00
|
|
|
has_many :parent_swf_asset_relationships
|
2013-01-21 17:34:39 -08:00
|
|
|
|
2023-07-22 14:04:01 -07:00
|
|
|
scope :includes_depth, -> { includes(:zone) }
|
2011-05-13 05:00:34 -07:00
|
|
|
|
Add manifest_url to swf_assets table
Ok so, impress-2020 guesses the manifest URL every time based on common
URL patterns. But the right way to do this is to read it from the
modeling data! But also, we don't have a great way to get the modeling
data directly. (Though as I write this, I guess we do have that
auto-modeling trick we use in the DTI 2020 codebase, I wonder if that
could work for this too?)
So anyway, in this change, we update the modeling code to save the
manifest URL, and also the migration includes a big block that attempts
to run impress-2020's manifest-guessing logic for every asset and save
the result!
It's uhh. Not fast. It runs at about 1 asset per second (a lot of these
aren't cache hits), and sometimes stalls out. And we have >600k assets,
so the estimated wall time is uhh. Seven days?
I think there's something we could do here around like, concurrent
execution? Though tbqh with the nature of the slowness being seemingly
about hitting the slow underlying images.neopets.com server, I don't
actually have a lot of faith that concurrency would actually be faster?
I also think it could be sensible to like… extract this from the
migration, and run it as a script to infer missing manifest URLs. That
would be easier to run in chunks and resume if something goes wrong.
Cuz like, I think my reasoning here was that backfilling this data was
part of the migration process… but the thing is, this migration can't
reliably get a manifest for everything (both cuz it depends on an
external service and cuz not everything has one), so it's a perfectly
valid migration to just leave the column as null for all the rows to
start, and fill this in later. I wish I'd written it like that!
But anyway, I'm just running this for now, and taking a break for the
night. Maybe later I'll come around and extract this into a separate
task to just try this on all assets missing manifests instead!
2023-11-09 21:42:51 -08:00
|
|
|
before_validation :normalize_manifest_url
|
|
|
|
|
2011-06-10 11:45:33 -07:00
|
|
|
def swf_image_dir
|
|
|
|
@swf_image_dir ||= Rails.root.join('tmp', 'asset_images_before_upload', self.id.to_s)
|
|
|
|
end
|
|
|
|
|
2011-05-13 05:00:34 -07:00
|
|
|
def swf_image_path(size)
|
2011-06-10 11:45:33 -07:00
|
|
|
swf_image_dir.join("#{size.join 'x'}.png")
|
2011-05-20 16:19:14 -07:00
|
|
|
end
|
|
|
|
|
2011-05-22 13:30:02 -07:00
|
|
|
PARTITION_COUNT = 3
|
|
|
|
PARTITION_DIGITS = 3
|
|
|
|
PARTITION_ID_LENGTH = PARTITION_COUNT * PARTITION_DIGITS
|
2023-08-02 13:00:44 -07:00
|
|
|
def partition_path
|
2012-01-12 15:17:59 -08:00
|
|
|
(remote_id / 10**PARTITION_DIGITS).to_s.rjust(PARTITION_ID_LENGTH, '0').tap do |id_str|
|
2011-05-22 13:30:02 -07:00
|
|
|
PARTITION_COUNT.times do |n|
|
|
|
|
id_str.insert(PARTITION_ID_LENGTH - (n * PARTITION_DIGITS), '/')
|
|
|
|
end
|
|
|
|
end
|
|
|
|
end
|
2012-07-16 13:34:44 -07:00
|
|
|
|
|
|
|
def image_version
|
|
|
|
converted_at.to_i
|
|
|
|
end
|
|
|
|
|
|
|
|
def image_url(size=IMAGE_SIZES[:large])
|
|
|
|
host = ASSET_HOSTS[:swf_asset_images]
|
|
|
|
size_key = size.join('x')
|
|
|
|
|
2023-08-02 13:00:44 -07:00
|
|
|
image_dir = "#{self['type']}/#{partition_path}#{self.remote_id}"
|
|
|
|
"//#{host}/#{image_dir}/#{size_key}.png?#{image_version}"
|
2012-07-16 13:34:44 -07:00
|
|
|
end
|
|
|
|
|
|
|
|
def images
|
|
|
|
IMAGE_SIZES.values.map { |size| {:size => size, :url => image_url(size)} }
|
|
|
|
end
|
2011-05-22 13:30:02 -07:00
|
|
|
|
2010-11-06 08:52:58 -07:00
|
|
|
attr_accessor :item
|
2011-05-02 15:07:56 -07:00
|
|
|
|
2013-03-05 18:51:24 -08:00
|
|
|
has_one :contribution, :as => :contributed, :inverse_of => :contributed
|
2012-01-12 15:17:59 -08:00
|
|
|
has_many :parent_swf_asset_relationships
|
2011-05-02 15:07:56 -07:00
|
|
|
|
2010-05-20 16:04:56 -07:00
|
|
|
delegate :depth, :to => :zone
|
2011-09-06 09:15:09 -07:00
|
|
|
|
|
|
|
def self.body_ids_fitting_standard
|
|
|
|
@body_ids_fitting_standard ||= PetType.standard_body_ids + [0]
|
|
|
|
end
|
2011-05-02 15:07:56 -07:00
|
|
|
|
2023-07-22 14:04:01 -07:00
|
|
|
scope :fitting_body_id, ->(body_id) {
|
2010-06-08 15:26:42 -07:00
|
|
|
where(arel_table[:body_id].in([body_id, 0]))
|
|
|
|
}
|
2011-05-02 15:07:56 -07:00
|
|
|
|
2023-07-22 14:04:01 -07:00
|
|
|
scope :fitting_standard_body_ids, -> {
|
2011-09-06 09:15:09 -07:00
|
|
|
where(arel_table[:body_id].in(body_ids_fitting_standard))
|
2010-06-07 13:33:43 -07:00
|
|
|
}
|
2011-05-02 15:07:56 -07:00
|
|
|
|
2023-07-22 14:04:01 -07:00
|
|
|
scope :fitting_color, ->(color) {
|
2011-05-02 15:07:56 -07:00
|
|
|
body_ids = PetType.select(:body_id).where(:color_id => color.id).map(&:body_id)
|
2013-03-07 17:31:49 -08:00
|
|
|
body_ids << 0
|
2011-05-02 15:07:56 -07:00
|
|
|
where(arel_table[:body_id].in(body_ids))
|
|
|
|
}
|
|
|
|
|
2023-07-22 14:04:01 -07:00
|
|
|
scope :biology_assets, -> { where(:type => PetState::SwfAssetType) }
|
|
|
|
scope :object_assets, -> { where(:type => Item::SwfAssetType) }
|
|
|
|
scope :for_item_ids, ->(item_ids) {
|
2012-01-12 15:17:59 -08:00
|
|
|
joins(:parent_swf_asset_relationships).
|
2010-11-25 18:33:34 -08:00
|
|
|
where(ParentSwfAssetRelationship.arel_table[:parent_id].in(item_ids))
|
|
|
|
}
|
2023-07-22 14:04:01 -07:00
|
|
|
scope :with_parent_ids, -> {
|
wardrobe now considers item.species_support_ids when deciding compatibility
For example, the Meerca Maid Tray is a foreground item, so the SWF is marked
as compatible with all body types, but the item itself is clearly marked as
Meercas-only. items#show reflected this properly, but the swf_assets#index
call that the wardrobe uses ignored item.species_support_ids.
So, /bodies/:body_id/swf_assets.json?item_ids[]=... was deprecated in favor
of /pet_types/:pet_type_id/items/swf_assets.json?item_ids=[]..., which is
much like the former route but, before loading assets, also loads the pet
type and items, then filters the items by compatibility, then only loads
assets for the compatible items.
2013-01-02 20:15:32 -08:00
|
|
|
select('swf_assets.*, parents_swf_assets.parent_id')
|
|
|
|
}
|
2011-05-02 15:07:56 -07:00
|
|
|
|
2013-06-26 20:08:19 -07:00
|
|
|
# To manually change the body ID without triggering the usual change to 0,
|
|
|
|
# use this override method.
|
|
|
|
def override_body_id(new_body_id)
|
|
|
|
@body_id_overridden = true
|
|
|
|
self.body_id = new_body_id
|
|
|
|
end
|
|
|
|
|
2010-05-20 16:56:08 -07:00
|
|
|
def as_json(options={})
|
2010-10-10 19:18:42 -07:00
|
|
|
json = {
|
2012-01-12 15:17:59 -08:00
|
|
|
:id => remote_id,
|
2011-05-22 13:30:02 -07:00
|
|
|
:type => type,
|
2010-05-20 16:04:56 -07:00
|
|
|
:depth => depth,
|
2010-06-07 16:50:49 -07:00
|
|
|
:body_id => body_id,
|
2010-10-10 19:18:42 -07:00
|
|
|
:zone_id => zone_id,
|
|
|
|
:zones_restrict => zones_restrict,
|
2011-05-20 16:19:14 -07:00
|
|
|
:is_body_specific => body_specific?,
|
2022-10-15 16:26:12 -07:00
|
|
|
# Now that we don't proactively convert images anymore, let's just always
|
|
|
|
# say `has_image: true` when sending data to the frontend, so it'll use the
|
|
|
|
# new URLs anyway!
|
|
|
|
:has_image => true,
|
2012-07-16 13:34:44 -07:00
|
|
|
:images => images
|
2010-05-20 16:04:56 -07:00
|
|
|
}
|
2010-10-10 19:18:42 -07:00
|
|
|
json[:parent_id] = options[:parent_id] if options[:parent_id]
|
|
|
|
json
|
2010-05-20 16:04:56 -07:00
|
|
|
end
|
2011-05-02 15:07:56 -07:00
|
|
|
|
2010-10-09 08:23:59 -07:00
|
|
|
def body_specific?
|
2021-03-16 10:40:01 -07:00
|
|
|
self.zone.type_id < 3 || item_is_body_specific?
|
|
|
|
end
|
|
|
|
|
|
|
|
def item_is_body_specific?
|
|
|
|
# Get items that we're already bound to in the database, and
|
|
|
|
# also the one passed to us from the current modeling operation,
|
|
|
|
# if any.
|
|
|
|
#
|
|
|
|
# NOTE: I know this has perf impact... it would be better for
|
|
|
|
# modeling to preload this probably? But oh well!
|
|
|
|
items = parent_swf_asset_relationships.includes(:parent).where(parent_type: "Item").map { |r| r.parent }
|
|
|
|
items << item if item
|
|
|
|
|
|
|
|
# Return whether any of them is known to be body-specific.
|
|
|
|
# This ensures that we always respect the explicitly_body_specific flag!
|
|
|
|
return items.any? { |i| i.body_specific? }
|
2010-10-09 08:23:59 -07:00
|
|
|
end
|
2011-05-02 15:07:56 -07:00
|
|
|
|
2010-10-07 07:46:23 -07:00
|
|
|
def origin_pet_type=(pet_type)
|
|
|
|
self.body_id = pet_type.body_id
|
|
|
|
end
|
2011-05-02 15:07:56 -07:00
|
|
|
|
2010-10-07 07:46:23 -07:00
|
|
|
def origin_biology_data=(data)
|
2013-03-05 13:10:25 -08:00
|
|
|
Rails.logger.debug("my biology data is: #{data.inspect}")
|
2010-10-07 07:46:23 -07:00
|
|
|
self.type = 'biology'
|
|
|
|
self.zone_id = data[:zone_id].to_i
|
|
|
|
self.url = data[:asset_url]
|
|
|
|
self.zones_restrict = data[:zones_restrict]
|
Add manifest_url to swf_assets table
Ok so, impress-2020 guesses the manifest URL every time based on common
URL patterns. But the right way to do this is to read it from the
modeling data! But also, we don't have a great way to get the modeling
data directly. (Though as I write this, I guess we do have that
auto-modeling trick we use in the DTI 2020 codebase, I wonder if that
could work for this too?)
So anyway, in this change, we update the modeling code to save the
manifest URL, and also the migration includes a big block that attempts
to run impress-2020's manifest-guessing logic for every asset and save
the result!
It's uhh. Not fast. It runs at about 1 asset per second (a lot of these
aren't cache hits), and sometimes stalls out. And we have >600k assets,
so the estimated wall time is uhh. Seven days?
I think there's something we could do here around like, concurrent
execution? Though tbqh with the nature of the slowness being seemingly
about hitting the slow underlying images.neopets.com server, I don't
actually have a lot of faith that concurrency would actually be faster?
I also think it could be sensible to like… extract this from the
migration, and run it as a script to infer missing manifest URLs. That
would be easier to run in chunks and resume if something goes wrong.
Cuz like, I think my reasoning here was that backfilling this data was
part of the migration process… but the thing is, this migration can't
reliably get a manifest for everything (both cuz it depends on an
external service and cuz not everything has one), so it's a perfectly
valid migration to just leave the column as null for all the rows to
start, and fill this in later. I wish I'd written it like that!
But anyway, I'm just running this for now, and taking a break for the
night. Maybe later I'll come around and extract this into a separate
task to just try this on all assets missing manifests instead!
2023-11-09 21:42:51 -08:00
|
|
|
self.manifest_url = data[:manifest]
|
2010-10-07 07:46:23 -07:00
|
|
|
end
|
2011-05-02 15:07:56 -07:00
|
|
|
|
2010-10-07 07:46:23 -07:00
|
|
|
def origin_object_data=(data)
|
2013-03-05 13:10:25 -08:00
|
|
|
Rails.logger.debug("my object data is: #{data.inspect}")
|
2010-10-07 07:46:23 -07:00
|
|
|
self.type = 'object'
|
|
|
|
self.zone_id = data[:zone_id].to_i
|
|
|
|
self.url = data[:asset_url]
|
2023-10-12 23:09:06 -07:00
|
|
|
self.zones_restrict = ""
|
Add manifest_url to swf_assets table
Ok so, impress-2020 guesses the manifest URL every time based on common
URL patterns. But the right way to do this is to read it from the
modeling data! But also, we don't have a great way to get the modeling
data directly. (Though as I write this, I guess we do have that
auto-modeling trick we use in the DTI 2020 codebase, I wonder if that
could work for this too?)
So anyway, in this change, we update the modeling code to save the
manifest URL, and also the migration includes a big block that attempts
to run impress-2020's manifest-guessing logic for every asset and save
the result!
It's uhh. Not fast. It runs at about 1 asset per second (a lot of these
aren't cache hits), and sometimes stalls out. And we have >600k assets,
so the estimated wall time is uhh. Seven days?
I think there's something we could do here around like, concurrent
execution? Though tbqh with the nature of the slowness being seemingly
about hitting the slow underlying images.neopets.com server, I don't
actually have a lot of faith that concurrency would actually be faster?
I also think it could be sensible to like… extract this from the
migration, and run it as a script to infer missing manifest URLs. That
would be easier to run in chunks and resume if something goes wrong.
Cuz like, I think my reasoning here was that backfilling this data was
part of the migration process… but the thing is, this migration can't
reliably get a manifest for everything (both cuz it depends on an
external service and cuz not everything has one), so it's a perfectly
valid migration to just leave the column as null for all the rows to
start, and fill this in later. I wish I'd written it like that!
But anyway, I'm just running this for now, and taking a break for the
night. Maybe later I'll come around and extract this into a separate
task to just try this on all assets missing manifests instead!
2023-11-09 21:42:51 -08:00
|
|
|
self.manifest_url = data[:manifest]
|
2010-10-07 07:46:23 -07:00
|
|
|
end
|
2011-05-02 15:07:56 -07:00
|
|
|
|
2010-11-27 15:41:06 -08:00
|
|
|
def mall_data=(data)
|
|
|
|
self.zone_id = data['zone'].to_i
|
Oops, fix mall spider bug, added by our HTTPS fix
Oh, yeah, shit, okay, when we set `self.url` like that, it's supposed to be the _canonical_ URL for the SWF, not our proxied one—this is the URL that's gonna go in the database.
We do proxying late in the process, like when we're actually setting up to download something, but for just referencing where the asset lives, we use `images.neopets.com`.
In this change, we revert the use of `NEOPETS_IMAGES_URL_ORIGIN`, but we _do_ update this to `https` for good measure. (We currently have both HTTP and HTTPS urls in the database, I guess neopets.com started serving different URLs at some point, this is probably the future! And anything interpreting these URLs will need to handle both cases anyway, unless we do some kind of migration update situation thing.)
We're migrating the incorrect assets with the following query (with the limit changed to match the number we currently see in the DB, just as a safety check):
```
UPDATE swf_assets SET url = REPLACE(url, 'http://images.neopets-asset-proxy.openneo.net', 'https://images.neopets.com') WHERE url LIKE 'http://images.neopets-asset-proxy.openneo.net%' ORDER BY id LIMIT 2000;
```
2022-08-23 03:04:54 -07:00
|
|
|
self.url = "https://images.neopets.com/#{data['url']}"
|
2010-11-27 15:41:06 -08:00
|
|
|
end
|
2011-05-02 15:07:56 -07:00
|
|
|
|
Add manifest_url to swf_assets table
Ok so, impress-2020 guesses the manifest URL every time based on common
URL patterns. But the right way to do this is to read it from the
modeling data! But also, we don't have a great way to get the modeling
data directly. (Though as I write this, I guess we do have that
auto-modeling trick we use in the DTI 2020 codebase, I wonder if that
could work for this too?)
So anyway, in this change, we update the modeling code to save the
manifest URL, and also the migration includes a big block that attempts
to run impress-2020's manifest-guessing logic for every asset and save
the result!
It's uhh. Not fast. It runs at about 1 asset per second (a lot of these
aren't cache hits), and sometimes stalls out. And we have >600k assets,
so the estimated wall time is uhh. Seven days?
I think there's something we could do here around like, concurrent
execution? Though tbqh with the nature of the slowness being seemingly
about hitting the slow underlying images.neopets.com server, I don't
actually have a lot of faith that concurrency would actually be faster?
I also think it could be sensible to like… extract this from the
migration, and run it as a script to infer missing manifest URLs. That
would be easier to run in chunks and resume if something goes wrong.
Cuz like, I think my reasoning here was that backfilling this data was
part of the migration process… but the thing is, this migration can't
reliably get a manifest for everything (both cuz it depends on an
external service and cuz not everything has one), so it's a perfectly
valid migration to just leave the column as null for all the rows to
start, and fill this in later. I wish I'd written it like that!
But anyway, I'm just running this for now, and taking a break for the
night. Maybe later I'll come around and extract this into a separate
task to just try this on all assets missing manifests instead!
2023-11-09 21:42:51 -08:00
|
|
|
def normalize_manifest_url
|
|
|
|
parsed_manifest_url = Addressable::URI.parse(manifest_url)
|
|
|
|
parsed_manifest_url.scheme = "https"
|
|
|
|
self.manifest_url = parsed_manifest_url.to_s
|
|
|
|
end
|
|
|
|
|
2015-05-03 14:57:42 -07:00
|
|
|
def self.from_wardrobe_link_params(ids)
|
|
|
|
where((
|
|
|
|
arel_table[:remote_id].in(ids[:biology]).and(arel_table[:type].eq('biology'))
|
|
|
|
).or(
|
|
|
|
arel_table[:remote_id].in(ids[:object]).and(arel_table[:type].eq('object'))
|
|
|
|
))
|
|
|
|
end
|
|
|
|
|
2010-10-10 11:43:01 -07:00
|
|
|
before_save do
|
2010-10-09 08:23:59 -07:00
|
|
|
# If an asset body ID changes, that means more than one body ID has been
|
|
|
|
# linked to it, meaning that it's probably wearable by all bodies.
|
2013-06-26 20:08:19 -07:00
|
|
|
self.body_id = 0 if !@body_id_overridden && (!self.body_specific? || (!self.new_record? && self.body_id_changed?))
|
2010-10-09 08:23:59 -07:00
|
|
|
end
|
2011-05-02 15:07:56 -07:00
|
|
|
|
2011-02-19 19:09:12 -08:00
|
|
|
class DownloadError < Exception;end
|
2010-05-16 12:01:38 -07:00
|
|
|
end
|