From 620e59f3edb2625d1bd0779b59e3d377388e4b39 Mon Sep 17 00:00:00 2001 From: Emi Matchu Date: Sat, 7 Sep 2024 12:51:59 -0700 Subject: [PATCH] Add `rails rainbow_pool:import` task, to get clean image hashes for pets Used to have something like this long ago, now here's the latest version! This task can't run autonomously, it needs the human user to provide a neologin cookie value. So, no cron for us! But we're cleaning up *years* of lil guys in one swoop now :3 --- lib/tasks/rainbow_pool.rake | 102 ++++++++++++++++++++++++++++++++++++ 1 file changed, 102 insertions(+) create mode 100644 lib/tasks/rainbow_pool.rake diff --git a/lib/tasks/rainbow_pool.rake b/lib/tasks/rainbow_pool.rake new file mode 100644 index 00000000..37d30a01 --- /dev/null +++ b/lib/tasks/rainbow_pool.rake @@ -0,0 +1,102 @@ +require "addressable/template" +require "async/http/internet/instance" + +namespace :rainbow_pool do + desc "Import all basic image hashes from the Rainbow Pool, onto PetTypes" + task :import => :environment do + neologin = STDIN.getpass("Neologin cookie: ") + + all_pet_types = PetType.all.to_a + all_pet_types_by_species_id_and_color_id = all_pet_types. + to_h { |pt| [[pt.species_id, pt.color_id], pt] } + all_colors_by_name = Color.all.to_h { |c| [c.human_name.downcase, c] } + + # TODO: Do these in parallel? I set up the HTTP requests to be able to + # handle it, and just didn't set up the rest of the code for it, lol + Species.order(:name).each do |species| + begin + hashes_by_color_name = RainbowPool.load_hashes_for_species( + species.id, neologin) + rescue => error + puts "Failed to load #{species.name} page, skipping: #{error.message}" + next + end + + changed_pet_types = [] + + hashes_by_color_name.each do |color_name, image_hash| + color = all_colors_by_name[color_name.downcase] + if color.nil? + puts "Skipping unrecognized color name: #{color_name}" + next + end + + pet_type = all_pet_types_by_species_id_and_color_id[ + [species.id, color.id]] + if pet_type.nil? + puts "Skipping unrecognized pet type: " + + "#{color_name} #{species.human_name}" + next + end + + if pet_type.basic_image_hash.nil? + puts "Found new image hash: #{image_hash} (#{pet_type.human_name})" + pet_type.basic_image_hash = image_hash + changed_pet_types << pet_type + elsif pet_type.basic_image_hash != image_hash + puts "Updating image hash: #{image_hash} ({#{pet_type.human_name})" + pet_type.basic_image_hash = image_hash + changed_pet_types << pet_type + else + # No need to do anything with image hashes that match! + end + end + + PetType.transaction { changed_pet_types.each(&:save!) } + puts "Saved #{changed_pet_types.size} image hashes for " + + "#{species.human_name}" + end + end +end + +module RainbowPool + # Share a pool of persistent connections, rather than reconnecting on + # each request. (This library does that automatically!) + INTERNET = Async::HTTP::Internet.instance + + class << self + SPECIES_PAGE_URL_TEMPLATE = Addressable::Template.new( + "https://www.neopets.com/pool/all_pb.phtml{?f_species_id}" + ) + def load_hashes_for_species(species_id, neologin) + Sync do + url = SPECIES_PAGE_URL_TEMPLATE.expand(f_species_id: species_id) + INTERNET.get(url, [ + ["User-Agent", Rails.configuration.user_agent_for_neopets], + ["Cookie", "neologin=#{neologin}"], + ]) do |response| + if response.status != 200 + raise "expected status 200 but got #{response.status} (#{url})" + end + + parse_hashes_from_page response.read + end + end + end + + private + + IMAGE_HASH_PATTERN = %r{ + set_pet_img\( + 'https?://pets\.neopets\.com/cp/(?[0-9a-z]+)/[0-9]+/[0-9]+\.png', + \s* + '(?.+?)' + \) + }x + def parse_hashes_from_page(html) + html.scan(IMAGE_HASH_PATTERN).to_h do |(image_hash, color_name)| + [color_name, image_hash] + end + end + end +end